diff options
author | Andreas Baumann <abaumann@yahoo.com> | 2014-06-14 20:15:59 +0200 |
---|---|---|
committer | Andreas Baumann <abaumann@yahoo.com> | 2014-06-14 20:15:59 +0200 |
commit | 913e4215f22e16ad90a30b7e68e8cd2165c6812d (patch) | |
tree | d7aef8f6e7b29895f1b0160cb647e5427181198e /textwolf/include/textwolf/codepages.hpp | |
parent | 4f6d08ce39cc430ed7ba90d143bf7af3fc8ca6d5 (diff) | |
download | crawler-913e4215f22e16ad90a30b7e68e8cd2165c6812d.tar.gz crawler-913e4215f22e16ad90a30b7e68e8cd2165c6812d.tar.bz2 |
added textwolf and a test for it
Diffstat (limited to 'textwolf/include/textwolf/codepages.hpp')
-rw-r--r-- | textwolf/include/textwolf/codepages.hpp | 182 |
1 files changed, 182 insertions, 0 deletions
diff --git a/textwolf/include/textwolf/codepages.hpp b/textwolf/include/textwolf/codepages.hpp new file mode 100644 index 0000000..4e8e7cf --- /dev/null +++ b/textwolf/include/textwolf/codepages.hpp @@ -0,0 +1,182 @@ +/* +--------------------------------------------------------------------- + The template library textwolf implements an input iterator on + a set of XML path expressions without backward references on an + STL conforming input iterator as source. It does no buffering + or read ahead and is dedicated for stream processing of XML + for a small set of XML queries. + Stream processing in this context refers to processing the + document without buffering anything but the current result token + processed with its tag hierarchy information. + + Copyright (C) 2010,2011,2012,2013,2014 Patrick Frey + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 3.0 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +-------------------------------------------------------------------- + + The latest version of textwolf can be found at 'http://github.com/patrickfrey/textwolf' + For documentation see 'http://patrickfrey.github.com/textwolf' + +-------------------------------------------------------------------- +*/ +/// \file textwolf/codepages.hpp +/// \brief Definition of IsoLatin code pages + +#ifndef __TEXTWOLF_CODE_PAGES_HPP__ +#define __TEXTWOLF_CODE_PAGES_HPP__ +#include "textwolf/char.hpp" +#include <map> + +namespace textwolf { +namespace charset { + +/// \class IsoLatinCodePage +/// \brief IsoLatin code page +class IsoLatinCodePage +{ +private: + struct InvOvlCodeMap + { + InvOvlCodeMap() + { + struct Element + { + unsigned short first; + unsigned char second; + }; + struct ElementAr + { + Element ar[ 64]; + }; + static const ElementAr ovlar[9] = + { + {{{0,0}}}, + {{{260,161}, {728,162}, {321,163}, {317,165}, {346,166}, {352,169}, {350,170}, {356,171}, {377,172}, {381,174}, {379,175}, {261,177}, {731,178}, {322,179}, {318,181}, {347,182}, {711,183}, {353,185}, {351,186}, {357,187}, {378,188}, {733,189}, {382,190}, {380,191}, {340,192}, {258,195}, {313,197}, {262,198}, {268,200}, {280,202}, {282,204}, {270,207}, {272,208}, {323,209}, {327,210}, {336,213}, {344,216}, {366,217}, {368,219}, {354,222}, {341,224}, {259,227}, {314,229}, {263,230}, {269,232}, {281,234}, {283,236}, {271,239}, {273,240}, {324,241}, {328,242}, {337,245}, {345,248}, {367,249}, {369,251}, {355,254}, {729,255}, {0,0}}}, + {{{294,161}, {728,162}, {292,165}, {304,168}, {350,169}, {286,170}, {308,171}, {379,173}, {295,175}, {293,180}, {305,183}, {351,184}, {287,185}, {309,186}, {380,188}, {266,193}, {264,194}, {288,208}, {284,211}, {364,216}, {348,217}, {267,223}, {265,224}, {289,238}, {285,241}, {365,246}, {349,247}, {729,248}, {0,0}}}, + {{{260,161}, {312,162}, {342,163}, {296,165}, {315,166}, {352,169}, {274,170}, {290,171}, {358,172}, {381,174}, {261,177}, {731,178}, {343,179}, {297,181}, {316,182}, {711,183}, {353,185}, {275,186}, {291,187}, {359,188}, {330,189}, {382,190}, {331,191}, {256,192}, {302,199}, {268,200}, {280,202}, {278,204}, {298,207}, {272,208}, {325,209}, {332,210}, {310,211}, {370,217}, {360,221}, {362,222}, {257,224}, {303,231}, {269,232}, {281,234}, {279,236}, {299,239}, {273,240}, {326,241}, {333,242}, {311,243}, {371,249}, {361,253}, {363,254}, {729,255}, {0,0}}}, + {{{286,208}, {304,221}, {350,222}, {287,240}, {305,253}, {351,254}, {0,0}}}, + {{{260,161}, {274,162}, {290,163}, {298,164}, {296,165}, {310,166}, {315,168}, {272,169}, {352,170}, {358,171}, {381,172}, {362,174}, {330,175}, {261,177}, {275,178}, {291,179}, {299,180}, {297,181}, {311,182}, {316,184}, {273,185}, {353,186}, {359,187}, {382,188}, {8213,189}, {363,190}, {331,191}, {256,192}, {302,199}, {268,200}, {280,202}, {278,204}, {325,209}, {332,210}, {360,215}, {370,217}, {257,224}, {303,231}, {269,232}, {281,234}, {279,236}, {326,241}, {333,242}, {361,247}, {371,249}, {312,255}, {0,0}}}, + {{{8221,161}, {8222,165}, {342,170}, {8220,180}, {343,186}, {260,192}, {302,193}, {256,194}, {262,195}, {280,198}, {274,199}, {268,200}, {377,202}, {278,203}, {290,204}, {310,205}, {298,206}, {315,207}, {352,208}, {323,209}, {325,210}, {332,212}, {370,216}, {321,217}, {346,218}, {362,219}, {379,221}, {381,222}, {261,224}, {303,225}, {257,226}, {263,227}, {281,230}, {275,231}, {269,232}, {378,234}, {279,235}, {291,236}, {311,237}, {299,238}, {316,239}, {353,240}, {324,241}, {326,242}, {333,244}, {371,248}, {322,249}, {347,250}, {363,251}, {380,253}, {382,254}, {8217,255}, {0,0}}}, + {{{7682,161}, {7683,162}, {266,164}, {267,165}, {7690,166}, {7808,168}, {7810,170}, {7691,171}, {7922,172}, {376,175}, {7710,176}, {7711,177}, {288,178}, {289,179}, {7744,180}, {7745,181}, {7766,183}, {7809,184}, {7767,185}, {7811,186}, {7776,187}, {7923,188}, {7812,189}, {7813,190}, {7777,191}, {372,208}, {7786,215}, {374,222}, {373,240}, {7787,247}, {375,254}, {0,0}}}, + {{{8364,164}, {352,166}, {353,168}, {381,180}, {382,184}, {338,188}, {339,189}, {376,190}, {0,0}}} + }; + unsigned int idx = 0; + for (; idx < 9; ++idx) + { + unsigned int ii = 0; + for (; ovlar[idx].ar[ii].first; ++ii) + { + m_map[idx][ ovlar[idx].ar[ii].first] = ovlar[idx].ar[ii].second; + } + } + } + + inline const std::map<unsigned short, unsigned char>* get( unsigned int idx) const + { + return &m_map[ idx]; + } + private: + std::map<unsigned short, unsigned char> m_map[9]; + }; + +public: + /// \brief Copy constructor + IsoLatinCodePage( const IsoLatinCodePage& o) + :m_cd(o.m_cd) + ,m_invcd(o.m_invcd) + ,m_invovlcd(o.m_invovlcd){} + + /// \brief Constructor + /// \param[in] idx IsoLatin code page index, 1 for "IsoLatin-1" + IsoLatinCodePage( unsigned int idx) + { + enum {NofCodePages=9}; + struct CodePage + { + unsigned short ar[128]; + }; + static const CodePage codePage[ NofCodePages] = { + {{128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255}}, + {{128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 260, 728, 321, 164, 317, 346, 167, 168, 352, 350, 356, 377, 173, 381, 379, 176, 261, 731, 322, 180, 318, 347, 711, 184, 353, 351, 357, 378, 733, 382, 380, 340, 193, 194, 258, 196, 313, 262, 199, 268, 201, 280, 203, 282, 205, 206, 270, 272, 323, 327, 211, 212, 336, 214, 215, 344, 366, 218, 368, 220, 221, 354, 223, 341, 225, 226, 259, 228, 314, 263, 231, 269, 233, 281, 235, 283, 237, 238, 271, 273, 324, 328, 243, 244, 337, 246, 247, 345, 367, 250, 369, 252, 253, 355, 729}}, + {{128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 294, 728, 163, 164, 292, 167, 168, 304, 350, 286, 308, 173, 379, 176, 295, 178, 179, 180, 181, 293, 183, 184, 305, 351, 287, 309, 189, 380, 192, 193, 194, 196, 266, 264, 199, 200, 201, 202, 203, 204, 205, 206, 207, 209, 210, 211, 212, 288, 214, 215, 284, 217, 218, 219, 220, 364, 348, 223, 224, 225, 226, 228, 267, 265, 231, 232, 233, 234, 235, 236, 237, 238, 239, 241, 242, 243, 244, 289, 246, 247, 285, 249, 250, 251, 252, 365, 349, 729}}, + {{128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 260, 312, 342, 164, 296, 315, 167, 168, 352, 274, 290, 358, 173, 381, 175, 176, 261, 731, 343, 180, 297, 316, 711, 184, 353, 275, 291, 359, 330, 382, 331, 256, 193, 194, 195, 196, 197, 198, 302, 268, 201, 280, 203, 278, 205, 206, 298, 272, 325, 332, 310, 212, 213, 214, 215, 216, 370, 218, 219, 220, 360, 362, 223, 257, 225, 226, 227, 228, 229, 230, 303, 269, 233, 281, 235, 279, 237, 238, 299, 273, 326, 333, 311, 244, 245, 246, 247, 248, 371, 250, 251, 252, 361, 363, 729}}, + {{128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 286, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 304, 350, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 287, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 305, 351, 255}}, + {{128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 260, 274, 290, 298, 296, 310, 167, 315, 272, 352, 358, 381, 173, 362, 330, 176, 261, 275, 291, 299, 297, 311, 183, 316, 273, 353, 359, 382, 8213, 363, 331, 256, 193, 194, 195, 196, 197, 198, 302, 268, 201, 280, 203, 278, 205, 206, 207, 208, 325, 332, 211, 212, 213, 214, 360, 216, 370, 218, 219, 220, 221, 222, 223, 257, 225, 226, 227, 228, 229, 230, 303, 269, 233, 281, 235, 279, 237, 238, 239, 240, 326, 333, 243, 244, 245, 246, 361, 248, 371, 250, 251, 252, 253, 254, 312}}, + {{128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 8221, 162, 163, 164, 8222, 166, 167, 216, 169, 342, 171, 172, 173, 174, 198, 176, 177, 178, 179, 8220, 181, 182, 183, 248, 185, 343, 187, 188, 189, 190, 230, 260, 302, 256, 262, 196, 197, 280, 274, 268, 201, 377, 278, 290, 310, 298, 315, 352, 323, 325, 211, 332, 213, 214, 215, 370, 321, 346, 362, 220, 379, 381, 223, 261, 303, 257, 263, 228, 229, 281, 275, 269, 233, 378, 279, 291, 311, 299, 316, 353, 324, 326, 243, 333, 245, 246, 247, 371, 322, 347, 363, 252, 380, 382, 8217}}, + {{128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 7682, 7683, 163, 266, 267, 7690, 167, 7808, 169, 7810, 7691, 7922, 173, 174, 376, 7710, 7711, 288, 289, 7744, 7745, 182, 7766, 7809, 7767, 7811, 7776, 7923, 7812, 7813, 7777, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 372, 209, 210, 211, 212, 213, 214, 7786, 216, 217, 218, 219, 220, 221, 374, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 373, 241, 242, 243, 244, 245, 246, 7787, 248, 249, 250, 251, 252, 253, 375, 255}}, + {{128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 8364, 165, 352, 167, 353, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 381, 181, 182, 183, 382, 185, 186, 187, 338, 339, 376, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255}} + }; + static const CodePage invcodePage[ NofCodePages] = { + {{128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255}}, + {{128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 0, 0, 0, 164, 0, 0, 167, 168, 0, 0, 0, 0, 173, 0, 0, 176, 0, 0, 0, 180, 0, 0, 0, 184, 0, 0, 0, 0, 0, 0, 0, 0, 193, 194, 0, 196, 0, 0, 199, 0, 201, 0, 203, 0, 205, 206, 0, 0, 0, 0, 211, 212, 0, 214, 215, 0, 0, 218, 0, 220, 221, 0, 223, 0, 225, 226, 0, 228, 0, 0, 231, 0, 233, 0, 235, 0, 237, 238, 0, 0, 0, 0, 243, 244, 0, 246, 247, 0, 0, 250, 0, 252, 253, 0, 0}}, + {{128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 0, 0, 163, 164, 0, 0, 166, 167, 0, 0, 0, 0, 172, 0, 0, 174, 0, 176, 177, 178, 179, 0, 181, 182, 0, 0, 0, 0, 187, 0, 0, 189, 190, 191, 0, 192, 0, 0, 195, 196, 197, 198, 199, 200, 201, 202, 203, 0, 204, 205, 206, 207, 0, 209, 210, 0, 212, 213, 214, 215, 0, 0, 218, 219, 220, 221, 0, 222, 0, 0, 225, 226, 227, 228, 229, 230, 231, 232, 233, 0, 234, 235, 236, 237, 0, 239, 240, 0, 242, 243, 244, 245, 0, 0, 0}}, + {{128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 0, 0, 0, 164, 0, 0, 167, 168, 0, 0, 0, 0, 173, 0, 175, 176, 0, 0, 0, 180, 0, 0, 0, 184, 0, 0, 0, 0, 0, 0, 0, 0, 193, 194, 195, 196, 197, 198, 0, 0, 201, 0, 203, 0, 205, 206, 0, 0, 0, 0, 0, 212, 213, 214, 215, 216, 0, 218, 219, 220, 0, 0, 223, 0, 225, 226, 227, 228, 229, 230, 0, 0, 233, 0, 235, 0, 237, 238, 0, 0, 0, 0, 0, 244, 245, 246, 247, 248, 0, 250, 251, 252, 0, 0, 0}}, + {{128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 0, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 0, 0, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 0, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 0, 0, 255}}, + {{128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 0, 0, 0, 0, 0, 0, 167, 0, 0, 0, 0, 0, 173, 0, 0, 176, 0, 0, 0, 0, 0, 0, 183, 0, 0, 0, 0, 0, 0, 0, 0, 0, 193, 194, 195, 196, 197, 198, 0, 0, 201, 0, 203, 0, 205, 206, 207, 208, 0, 0, 211, 212, 213, 214, 0, 216, 0, 218, 219, 220, 221, 222, 223, 0, 225, 226, 227, 228, 229, 230, 0, 0, 233, 0, 235, 0, 237, 238, 239, 240, 0, 0, 243, 244, 245, 246, 0, 248, 0, 250, 251, 252, 253, 254, 0}}, + {{128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 0, 162, 163, 164, 0, 166, 167, 0, 169, 0, 171, 172, 173, 174, 0, 176, 177, 178, 179, 0, 181, 182, 183, 0, 185, 0, 187, 188, 189, 190, 0, 0, 0, 0, 0, 196, 197, 175, 0, 0, 201, 0, 0, 0, 0, 0, 0, 0, 0, 0, 211, 0, 213, 214, 215, 168, 0, 0, 0, 220, 0, 0, 223, 0, 0, 0, 0, 228, 229, 191, 0, 0, 233, 0, 0, 0, 0, 0, 0, 0, 0, 0, 243, 0, 245, 246, 247, 184, 0, 0, 0, 252, 0, 0, 0}}, + {{128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 0, 0, 163, 0, 0, 0, 167, 0, 169, 0, 0, 0, 173, 174, 0, 0, 0, 0, 0, 0, 0, 182, 0, 0, 0, 0, 0, 0, 0, 0, 0, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 0, 209, 210, 211, 212, 213, 214, 0, 216, 217, 218, 219, 220, 221, 0, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 0, 241, 242, 243, 244, 245, 246, 0, 248, 249, 250, 251, 252, 253, 0, 255}}, + {{128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 0, 165, 0, 167, 0, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 0, 181, 182, 183, 0, 185, 186, 187, 0, 0, 0, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255}} + }; + static const InvOvlCodeMap invOvlCodeMap; + + if (idx > NofCodePages || idx == 0) throw std::logic_error( "code page index not supported"); + m_cd = &codePage[ idx-1].ar[0]; + m_invcd = &invcodePage[ idx-1].ar[0]; + m_invovlcd = invOvlCodeMap.get( idx-1); + } + + /// \brief Get the unicode character representation of the character ch in this codepage + /// \param[in] ch character in this codepage + /// \return the unicode representation of the passed character + inline UChar ucharcode( char ch) const + { + if ((signed char)ch >= 0) return ch; + return m_cd[ (unsigned int)(unsigned char)ch - 128]; + } + + /// \brief Get the character representation of a unicode character in this codepage + /// \param[in] ch unicode character + /// \return the representation of the passed unicode character in this codepage + inline char invcode( UChar ch) const + { + char rt = 0; + if (ch <= 128) return ch; + if (ch <= 255) rt = m_invcd[ ch - 128]; + if (rt == 0) + { + std::map<unsigned short, unsigned char>::const_iterator fi = m_invovlcd->find( ch); + if (fi == m_invovlcd->end()) return 0; + rt = fi->second; + } + return rt; + } + + /// \brief Evaluate if two code pages are equal + static inline bool is_equal( const IsoLatinCodePage& a, const IsoLatinCodePage& b) + { + return a.m_cd == b.m_cd; + } + +private: + const unsigned short* m_cd; + const unsigned short* m_invcd; + const std::map<unsigned short, unsigned char>* m_invovlcd; +}; + +}} +#endif + + |