summaryrefslogtreecommitdiff
path: root/src/libutil/win32/tcharutils.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/libutil/win32/tcharutils.cpp')
-rw-r--r--src/libutil/win32/tcharutils.cpp99
1 files changed, 99 insertions, 0 deletions
diff --git a/src/libutil/win32/tcharutils.cpp b/src/libutil/win32/tcharutils.cpp
new file mode 100644
index 0000000..a757aa2
--- /dev/null
+++ b/src/libutil/win32/tcharutils.cpp
@@ -0,0 +1,99 @@
+/* This file is part of Strigi Desktop Search
+ *
+ * Copyright (C) 2007 Jos van den Oever <jos@vandenoever.info>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this library; see the file COPYING.LIB. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#include "tcharutils.h"
+
+#include <string.h>
+
+using namespace std;
+
+std::wstring
+utf8toucs2(const char*p, const char*e) {
+ wstring ucs2;
+ if (e <= p) return ucs2;
+ ucs2.reserve(3*(e-p));
+ wchar_t w = 0;
+ char nb = 0;
+ while (p < e) {
+ char c = *p;
+ if (nb--) {
+ w = (w<<6) + (c & 0x3f);
+ } else if ((0xE0 & c) == 0xC0) {
+ w = c & 0x1F;
+ nb = 0;
+ } else if ((0xF0 & c) == 0xE0) {
+ w = c & 0x0F;
+ nb = 1;
+ } else if ((0xF8 & c) == 0xF0) {
+ w = c & 0x07;
+ nb = 2;
+ } else {
+ w = (w<<6) + (c&0x7F);
+ ucs2 += w;
+ w = 0;
+ nb = 0;
+ }
+ p++;
+ }
+ return ucs2;
+}
+std::wstring
+utf8toucs2(const char* p) {
+ return utf8toucs2(p, p+strlen(p));
+}
+std::wstring
+utf8toucs2(const std::string& utf8) {
+ const char* p = utf8.c_str();
+ const char* e = p + utf8.length();
+ return utf8toucs2(p, e);
+}
+std::string
+wchartoutf8(const wchar_t* p, const wchar_t* e) {
+ string utf8;
+ utf8.reserve((int)(1.5*(e-p)));
+ while (p < e) {
+ wchar_t c = *p;
+ if (c < 0x80) {
+ utf8 += (char)c;
+ } else if (c < 0x800) {
+ char c2 = (char)((c & 0x3f) | 0x80);
+ utf8 += (c>>6) | 0xc0;
+ utf8 += c2;
+ } else if (c < 0x10000) {
+ char c3 = (char)((c & 0x3f) | 0x80);
+ char c2 = (char)(((c>>6) & 0x3f) | 0x80);
+ utf8 += (c>>12) | 0xe0;
+ utf8 += c2;
+ utf8 += c3;
+ }
+ p++;
+ }
+ return utf8;
+}
+std::string
+wchartoutf8(const wchar_t* p) {
+ return wchartoutf8(p, p+wcslen(p));
+}
+std::string
+wchartoutf8(const std::wstring& wchar) {
+ const wchar_t *p = wchar.c_str();
+ const wchar_t *e = p+wchar.length();
+ return wchartoutf8(p, e);
+}