summaryrefslogtreecommitdiff
path: root/textwolf/include/textwolf/codepages.hpp
diff options
context:
space:
mode:
authorAndreas Baumann <abaumann@yahoo.com>2014-06-14 20:15:59 +0200
committerAndreas Baumann <abaumann@yahoo.com>2014-06-14 20:15:59 +0200
commit913e4215f22e16ad90a30b7e68e8cd2165c6812d (patch)
treed7aef8f6e7b29895f1b0160cb647e5427181198e /textwolf/include/textwolf/codepages.hpp
parent4f6d08ce39cc430ed7ba90d143bf7af3fc8ca6d5 (diff)
downloadcrawler-913e4215f22e16ad90a30b7e68e8cd2165c6812d.tar.gz
crawler-913e4215f22e16ad90a30b7e68e8cd2165c6812d.tar.bz2
added textwolf and a test for it
Diffstat (limited to 'textwolf/include/textwolf/codepages.hpp')
-rw-r--r--textwolf/include/textwolf/codepages.hpp182
1 files changed, 182 insertions, 0 deletions
diff --git a/textwolf/include/textwolf/codepages.hpp b/textwolf/include/textwolf/codepages.hpp
new file mode 100644
index 0000000..4e8e7cf
--- /dev/null
+++ b/textwolf/include/textwolf/codepages.hpp
@@ -0,0 +1,182 @@
+/*
+---------------------------------------------------------------------
+ The template library textwolf implements an input iterator on
+ a set of XML path expressions without backward references on an
+ STL conforming input iterator as source. It does no buffering
+ or read ahead and is dedicated for stream processing of XML
+ for a small set of XML queries.
+ Stream processing in this context refers to processing the
+ document without buffering anything but the current result token
+ processed with its tag hierarchy information.
+
+ Copyright (C) 2010,2011,2012,2013,2014 Patrick Frey
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 3.0 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+
+--------------------------------------------------------------------
+
+ The latest version of textwolf can be found at 'http://github.com/patrickfrey/textwolf'
+ For documentation see 'http://patrickfrey.github.com/textwolf'
+
+--------------------------------------------------------------------
+*/
+/// \file textwolf/codepages.hpp
+/// \brief Definition of IsoLatin code pages
+
+#ifndef __TEXTWOLF_CODE_PAGES_HPP__
+#define __TEXTWOLF_CODE_PAGES_HPP__
+#include "textwolf/char.hpp"
+#include <map>
+
+namespace textwolf {
+namespace charset {
+
+/// \class IsoLatinCodePage
+/// \brief IsoLatin code page
+class IsoLatinCodePage
+{
+private:
+ struct InvOvlCodeMap
+ {
+ InvOvlCodeMap()
+ {
+ struct Element
+ {
+ unsigned short first;
+ unsigned char second;
+ };
+ struct ElementAr
+ {
+ Element ar[ 64];
+ };
+ static const ElementAr ovlar[9] =
+ {
+ {{{0,0}}},
+ {{{260,161}, {728,162}, {321,163}, {317,165}, {346,166}, {352,169}, {350,170}, {356,171}, {377,172}, {381,174}, {379,175}, {261,177}, {731,178}, {322,179}, {318,181}, {347,182}, {711,183}, {353,185}, {351,186}, {357,187}, {378,188}, {733,189}, {382,190}, {380,191}, {340,192}, {258,195}, {313,197}, {262,198}, {268,200}, {280,202}, {282,204}, {270,207}, {272,208}, {323,209}, {327,210}, {336,213}, {344,216}, {366,217}, {368,219}, {354,222}, {341,224}, {259,227}, {314,229}, {263,230}, {269,232}, {281,234}, {283,236}, {271,239}, {273,240}, {324,241}, {328,242}, {337,245}, {345,248}, {367,249}, {369,251}, {355,254}, {729,255}, {0,0}}},
+ {{{294,161}, {728,162}, {292,165}, {304,168}, {350,169}, {286,170}, {308,171}, {379,173}, {295,175}, {293,180}, {305,183}, {351,184}, {287,185}, {309,186}, {380,188}, {266,193}, {264,194}, {288,208}, {284,211}, {364,216}, {348,217}, {267,223}, {265,224}, {289,238}, {285,241}, {365,246}, {349,247}, {729,248}, {0,0}}},
+ {{{260,161}, {312,162}, {342,163}, {296,165}, {315,166}, {352,169}, {274,170}, {290,171}, {358,172}, {381,174}, {261,177}, {731,178}, {343,179}, {297,181}, {316,182}, {711,183}, {353,185}, {275,186}, {291,187}, {359,188}, {330,189}, {382,190}, {331,191}, {256,192}, {302,199}, {268,200}, {280,202}, {278,204}, {298,207}, {272,208}, {325,209}, {332,210}, {310,211}, {370,217}, {360,221}, {362,222}, {257,224}, {303,231}, {269,232}, {281,234}, {279,236}, {299,239}, {273,240}, {326,241}, {333,242}, {311,243}, {371,249}, {361,253}, {363,254}, {729,255}, {0,0}}},
+ {{{286,208}, {304,221}, {350,222}, {287,240}, {305,253}, {351,254}, {0,0}}},
+ {{{260,161}, {274,162}, {290,163}, {298,164}, {296,165}, {310,166}, {315,168}, {272,169}, {352,170}, {358,171}, {381,172}, {362,174}, {330,175}, {261,177}, {275,178}, {291,179}, {299,180}, {297,181}, {311,182}, {316,184}, {273,185}, {353,186}, {359,187}, {382,188}, {8213,189}, {363,190}, {331,191}, {256,192}, {302,199}, {268,200}, {280,202}, {278,204}, {325,209}, {332,210}, {360,215}, {370,217}, {257,224}, {303,231}, {269,232}, {281,234}, {279,236}, {326,241}, {333,242}, {361,247}, {371,249}, {312,255}, {0,0}}},
+ {{{8221,161}, {8222,165}, {342,170}, {8220,180}, {343,186}, {260,192}, {302,193}, {256,194}, {262,195}, {280,198}, {274,199}, {268,200}, {377,202}, {278,203}, {290,204}, {310,205}, {298,206}, {315,207}, {352,208}, {323,209}, {325,210}, {332,212}, {370,216}, {321,217}, {346,218}, {362,219}, {379,221}, {381,222}, {261,224}, {303,225}, {257,226}, {263,227}, {281,230}, {275,231}, {269,232}, {378,234}, {279,235}, {291,236}, {311,237}, {299,238}, {316,239}, {353,240}, {324,241}, {326,242}, {333,244}, {371,248}, {322,249}, {347,250}, {363,251}, {380,253}, {382,254}, {8217,255}, {0,0}}},
+ {{{7682,161}, {7683,162}, {266,164}, {267,165}, {7690,166}, {7808,168}, {7810,170}, {7691,171}, {7922,172}, {376,175}, {7710,176}, {7711,177}, {288,178}, {289,179}, {7744,180}, {7745,181}, {7766,183}, {7809,184}, {7767,185}, {7811,186}, {7776,187}, {7923,188}, {7812,189}, {7813,190}, {7777,191}, {372,208}, {7786,215}, {374,222}, {373,240}, {7787,247}, {375,254}, {0,0}}},
+ {{{8364,164}, {352,166}, {353,168}, {381,180}, {382,184}, {338,188}, {339,189}, {376,190}, {0,0}}}
+ };
+ unsigned int idx = 0;
+ for (; idx < 9; ++idx)
+ {
+ unsigned int ii = 0;
+ for (; ovlar[idx].ar[ii].first; ++ii)
+ {
+ m_map[idx][ ovlar[idx].ar[ii].first] = ovlar[idx].ar[ii].second;
+ }
+ }
+ }
+
+ inline const std::map<unsigned short, unsigned char>* get( unsigned int idx) const
+ {
+ return &m_map[ idx];
+ }
+ private:
+ std::map<unsigned short, unsigned char> m_map[9];
+ };
+
+public:
+ /// \brief Copy constructor
+ IsoLatinCodePage( const IsoLatinCodePage& o)
+ :m_cd(o.m_cd)
+ ,m_invcd(o.m_invcd)
+ ,m_invovlcd(o.m_invovlcd){}
+
+ /// \brief Constructor
+ /// \param[in] idx IsoLatin code page index, 1 for "IsoLatin-1"
+ IsoLatinCodePage( unsigned int idx)
+ {
+ enum {NofCodePages=9};
+ struct CodePage
+ {
+ unsigned short ar[128];
+ };
+ static const CodePage codePage[ NofCodePages] = {
+ {{128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255}},
+ {{128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 260, 728, 321, 164, 317, 346, 167, 168, 352, 350, 356, 377, 173, 381, 379, 176, 261, 731, 322, 180, 318, 347, 711, 184, 353, 351, 357, 378, 733, 382, 380, 340, 193, 194, 258, 196, 313, 262, 199, 268, 201, 280, 203, 282, 205, 206, 270, 272, 323, 327, 211, 212, 336, 214, 215, 344, 366, 218, 368, 220, 221, 354, 223, 341, 225, 226, 259, 228, 314, 263, 231, 269, 233, 281, 235, 283, 237, 238, 271, 273, 324, 328, 243, 244, 337, 246, 247, 345, 367, 250, 369, 252, 253, 355, 729}},
+ {{128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 294, 728, 163, 164, 292, 167, 168, 304, 350, 286, 308, 173, 379, 176, 295, 178, 179, 180, 181, 293, 183, 184, 305, 351, 287, 309, 189, 380, 192, 193, 194, 196, 266, 264, 199, 200, 201, 202, 203, 204, 205, 206, 207, 209, 210, 211, 212, 288, 214, 215, 284, 217, 218, 219, 220, 364, 348, 223, 224, 225, 226, 228, 267, 265, 231, 232, 233, 234, 235, 236, 237, 238, 239, 241, 242, 243, 244, 289, 246, 247, 285, 249, 250, 251, 252, 365, 349, 729}},
+ {{128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 260, 312, 342, 164, 296, 315, 167, 168, 352, 274, 290, 358, 173, 381, 175, 176, 261, 731, 343, 180, 297, 316, 711, 184, 353, 275, 291, 359, 330, 382, 331, 256, 193, 194, 195, 196, 197, 198, 302, 268, 201, 280, 203, 278, 205, 206, 298, 272, 325, 332, 310, 212, 213, 214, 215, 216, 370, 218, 219, 220, 360, 362, 223, 257, 225, 226, 227, 228, 229, 230, 303, 269, 233, 281, 235, 279, 237, 238, 299, 273, 326, 333, 311, 244, 245, 246, 247, 248, 371, 250, 251, 252, 361, 363, 729}},
+ {{128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 286, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 304, 350, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 287, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 305, 351, 255}},
+ {{128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 260, 274, 290, 298, 296, 310, 167, 315, 272, 352, 358, 381, 173, 362, 330, 176, 261, 275, 291, 299, 297, 311, 183, 316, 273, 353, 359, 382, 8213, 363, 331, 256, 193, 194, 195, 196, 197, 198, 302, 268, 201, 280, 203, 278, 205, 206, 207, 208, 325, 332, 211, 212, 213, 214, 360, 216, 370, 218, 219, 220, 221, 222, 223, 257, 225, 226, 227, 228, 229, 230, 303, 269, 233, 281, 235, 279, 237, 238, 239, 240, 326, 333, 243, 244, 245, 246, 361, 248, 371, 250, 251, 252, 253, 254, 312}},
+ {{128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 8221, 162, 163, 164, 8222, 166, 167, 216, 169, 342, 171, 172, 173, 174, 198, 176, 177, 178, 179, 8220, 181, 182, 183, 248, 185, 343, 187, 188, 189, 190, 230, 260, 302, 256, 262, 196, 197, 280, 274, 268, 201, 377, 278, 290, 310, 298, 315, 352, 323, 325, 211, 332, 213, 214, 215, 370, 321, 346, 362, 220, 379, 381, 223, 261, 303, 257, 263, 228, 229, 281, 275, 269, 233, 378, 279, 291, 311, 299, 316, 353, 324, 326, 243, 333, 245, 246, 247, 371, 322, 347, 363, 252, 380, 382, 8217}},
+ {{128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 7682, 7683, 163, 266, 267, 7690, 167, 7808, 169, 7810, 7691, 7922, 173, 174, 376, 7710, 7711, 288, 289, 7744, 7745, 182, 7766, 7809, 7767, 7811, 7776, 7923, 7812, 7813, 7777, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 372, 209, 210, 211, 212, 213, 214, 7786, 216, 217, 218, 219, 220, 221, 374, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 373, 241, 242, 243, 244, 245, 246, 7787, 248, 249, 250, 251, 252, 253, 375, 255}},
+ {{128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 8364, 165, 352, 167, 353, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 381, 181, 182, 183, 382, 185, 186, 187, 338, 339, 376, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255}}
+ };
+ static const CodePage invcodePage[ NofCodePages] = {
+ {{128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255}},
+ {{128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 0, 0, 0, 164, 0, 0, 167, 168, 0, 0, 0, 0, 173, 0, 0, 176, 0, 0, 0, 180, 0, 0, 0, 184, 0, 0, 0, 0, 0, 0, 0, 0, 193, 194, 0, 196, 0, 0, 199, 0, 201, 0, 203, 0, 205, 206, 0, 0, 0, 0, 211, 212, 0, 214, 215, 0, 0, 218, 0, 220, 221, 0, 223, 0, 225, 226, 0, 228, 0, 0, 231, 0, 233, 0, 235, 0, 237, 238, 0, 0, 0, 0, 243, 244, 0, 246, 247, 0, 0, 250, 0, 252, 253, 0, 0}},
+ {{128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 0, 0, 163, 164, 0, 0, 166, 167, 0, 0, 0, 0, 172, 0, 0, 174, 0, 176, 177, 178, 179, 0, 181, 182, 0, 0, 0, 0, 187, 0, 0, 189, 190, 191, 0, 192, 0, 0, 195, 196, 197, 198, 199, 200, 201, 202, 203, 0, 204, 205, 206, 207, 0, 209, 210, 0, 212, 213, 214, 215, 0, 0, 218, 219, 220, 221, 0, 222, 0, 0, 225, 226, 227, 228, 229, 230, 231, 232, 233, 0, 234, 235, 236, 237, 0, 239, 240, 0, 242, 243, 244, 245, 0, 0, 0}},
+ {{128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 0, 0, 0, 164, 0, 0, 167, 168, 0, 0, 0, 0, 173, 0, 175, 176, 0, 0, 0, 180, 0, 0, 0, 184, 0, 0, 0, 0, 0, 0, 0, 0, 193, 194, 195, 196, 197, 198, 0, 0, 201, 0, 203, 0, 205, 206, 0, 0, 0, 0, 0, 212, 213, 214, 215, 216, 0, 218, 219, 220, 0, 0, 223, 0, 225, 226, 227, 228, 229, 230, 0, 0, 233, 0, 235, 0, 237, 238, 0, 0, 0, 0, 0, 244, 245, 246, 247, 248, 0, 250, 251, 252, 0, 0, 0}},
+ {{128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 0, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 0, 0, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 0, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 0, 0, 255}},
+ {{128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 0, 0, 0, 0, 0, 0, 167, 0, 0, 0, 0, 0, 173, 0, 0, 176, 0, 0, 0, 0, 0, 0, 183, 0, 0, 0, 0, 0, 0, 0, 0, 0, 193, 194, 195, 196, 197, 198, 0, 0, 201, 0, 203, 0, 205, 206, 207, 208, 0, 0, 211, 212, 213, 214, 0, 216, 0, 218, 219, 220, 221, 222, 223, 0, 225, 226, 227, 228, 229, 230, 0, 0, 233, 0, 235, 0, 237, 238, 239, 240, 0, 0, 243, 244, 245, 246, 0, 248, 0, 250, 251, 252, 253, 254, 0}},
+ {{128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 0, 162, 163, 164, 0, 166, 167, 0, 169, 0, 171, 172, 173, 174, 0, 176, 177, 178, 179, 0, 181, 182, 183, 0, 185, 0, 187, 188, 189, 190, 0, 0, 0, 0, 0, 196, 197, 175, 0, 0, 201, 0, 0, 0, 0, 0, 0, 0, 0, 0, 211, 0, 213, 214, 215, 168, 0, 0, 0, 220, 0, 0, 223, 0, 0, 0, 0, 228, 229, 191, 0, 0, 233, 0, 0, 0, 0, 0, 0, 0, 0, 0, 243, 0, 245, 246, 247, 184, 0, 0, 0, 252, 0, 0, 0}},
+ {{128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 0, 0, 163, 0, 0, 0, 167, 0, 169, 0, 0, 0, 173, 174, 0, 0, 0, 0, 0, 0, 0, 182, 0, 0, 0, 0, 0, 0, 0, 0, 0, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 0, 209, 210, 211, 212, 213, 214, 0, 216, 217, 218, 219, 220, 221, 0, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 0, 241, 242, 243, 244, 245, 246, 0, 248, 249, 250, 251, 252, 253, 0, 255}},
+ {{128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 0, 165, 0, 167, 0, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 0, 181, 182, 183, 0, 185, 186, 187, 0, 0, 0, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255}}
+ };
+ static const InvOvlCodeMap invOvlCodeMap;
+
+ if (idx > NofCodePages || idx == 0) throw std::logic_error( "code page index not supported");
+ m_cd = &codePage[ idx-1].ar[0];
+ m_invcd = &invcodePage[ idx-1].ar[0];
+ m_invovlcd = invOvlCodeMap.get( idx-1);
+ }
+
+ /// \brief Get the unicode character representation of the character ch in this codepage
+ /// \param[in] ch character in this codepage
+ /// \return the unicode representation of the passed character
+ inline UChar ucharcode( char ch) const
+ {
+ if ((signed char)ch >= 0) return ch;
+ return m_cd[ (unsigned int)(unsigned char)ch - 128];
+ }
+
+ /// \brief Get the character representation of a unicode character in this codepage
+ /// \param[in] ch unicode character
+ /// \return the representation of the passed unicode character in this codepage
+ inline char invcode( UChar ch) const
+ {
+ char rt = 0;
+ if (ch <= 128) return ch;
+ if (ch <= 255) rt = m_invcd[ ch - 128];
+ if (rt == 0)
+ {
+ std::map<unsigned short, unsigned char>::const_iterator fi = m_invovlcd->find( ch);
+ if (fi == m_invovlcd->end()) return 0;
+ rt = fi->second;
+ }
+ return rt;
+ }
+
+ /// \brief Evaluate if two code pages are equal
+ static inline bool is_equal( const IsoLatinCodePage& a, const IsoLatinCodePage& b)
+ {
+ return a.m_cd == b.m_cd;
+ }
+
+private:
+ const unsigned short* m_cd;
+ const unsigned short* m_invcd;
+ const std::map<unsigned short, unsigned char>* m_invovlcd;
+};
+
+}}
+#endif
+
+