diff options
author | Andreas Baumann <abaumann@yahoo.com> | 2014-10-15 15:50:00 +0200 |
---|---|---|
committer | Andreas Baumann <abaumann@yahoo.com> | 2014-10-15 15:50:00 +0200 |
commit | b37950fecfb3afb53942fba28e4e36d0b8753351 (patch) | |
tree | 52a51bd7b6a808ee43d4422d79d9d0aee5dde4fd /src | |
parent | 1127e07a2a6d6b3c0cbd342396e6c8af7ee54040 (diff) | |
download | crawler-b37950fecfb3afb53942fba28e4e36d0b8753351.tar.gz crawler-b37950fecfb3afb53942fba28e4e36d0b8753351.tar.bz2 |
added file iteration also on Windows
Diffstat (limited to 'src')
-rw-r--r-- | src/libutil/FileUtils.cpp | 59 | ||||
-rwxr-xr-x | src/libutil/Makefile.W32 | 2 | ||||
-rw-r--r-- | src/libutil/win32/tcharutils.cpp | 99 | ||||
-rw-r--r-- | src/libutil/win32/tcharutils.h | 32 |
4 files changed, 191 insertions, 1 deletions
diff --git a/src/libutil/FileUtils.cpp b/src/libutil/FileUtils.cpp index 64b1bb1..975a78c 100644 --- a/src/libutil/FileUtils.cpp +++ b/src/libutil/FileUtils.cpp @@ -7,6 +7,8 @@ #else #define WIN32_MEAN_AND_LEAN #include <windows.h> +#include "util/win32/errormsg.hpp" +#include "win32/tcharutils.h" #endif #include <sstream> @@ -61,10 +63,65 @@ static vector<string> directory_entries_unix( const string &dir, bool absolute, #endif #ifdef _WIN32 -static vector<string> directory_entries_win32( const string &/*dir*/, bool /*absolute*/, bool /* recursive */ ) +static vector<string> directory_entries_win32( const string &dir, bool absolute, bool recursive ) { vector<string> files; + wstringstream wpath; + wpath << utf8toucs2( dir ) << L"\\*."; + + WIN32_FIND_DATAW fd; + HANDLE h = FindFirstFileW( wpath.str( ).c_str( ), &fd ); + if( h == INVALID_HANDLE_VALUE ) { + DWORD err = GetLastError( ); + if( err == ERROR_NO_MORE_FILES ) { + FindClose( h ); + return files; + } + ostringstream ss; + ss << "FindFirstFileW failed with '" << dir << "': " << getLastError( ); + throw runtime_error( ss.str( ) ); + } + + while( h != INVALID_HANDLE_VALUE ) { + if( ( fd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY ) != 0 ) { + if( wcscmp( fd.cFileName, L"." ) == 0 || + wcscmp( fd.cFileName, L".." ) == 0 ) { + goto NEXT; + } + if( recursive ) { + ostringstream ss; + ss << dir << "\\" << wchartoutf8( fd.cFileName ); + vector<string> subfiles = directory_entries_win32( ss.str( ), absolute, recursive ); + files.insert( files.end( ), subfiles.begin( ), subfiles.end( ) ); + } + } else if( ( fd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY ) == 0 ) { + if( absolute ) { + ostringstream ss; + ss << dir << "\\" << wchartoutf8( fd.cFileName ); + files.push_back( ss.str( ) ); + } else { + files.push_back( string( wchartoutf8( fd.cFileName ) ) ); + } + } +NEXT: + if( !FindNextFileW( h, &fd ) ) { + DWORD err = GetLastError( ); + if( err == ERROR_NO_MORE_FILES ) { + // finish iterating + FindClose( h ); + return files; + } else { + FindClose( h ); + ostringstream ss; + ss << "FindNextFileW failed with '" << dir << "': " << getLastError( ); + throw runtime_error( ss.str( ) ); + } + } + } + + FindClose( h ); + return files; } #endif diff --git a/src/libutil/Makefile.W32 b/src/libutil/Makefile.W32 index ec6dcdb..6d81814 100755 --- a/src/libutil/Makefile.W32 +++ b/src/libutil/Makefile.W32 @@ -20,12 +20,14 @@ CPP_OBJS = \ StringUtils.obj \ win32\errormsg.obj \ win32\stringutils.obj \ + win32\tcharutils.obj \ FileUtils.obj DYNAMIC_CPP_OBJS = \ StringUtils.dllobj \ win32\errormsg.dllobj \ win32\stringutils.dllobj \ + win32\tcharutils.dllobj \ FileUtils.dllobj STATIC_LIB = \ diff --git a/src/libutil/win32/tcharutils.cpp b/src/libutil/win32/tcharutils.cpp new file mode 100644 index 0000000..a757aa2 --- /dev/null +++ b/src/libutil/win32/tcharutils.cpp @@ -0,0 +1,99 @@ +/* This file is part of Strigi Desktop Search + * + * Copyright (C) 2007 Jos van den Oever <jos@vandenoever.info> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public License + * along with this library; see the file COPYING.LIB. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + */ + +#include "tcharutils.h" + +#include <string.h> + +using namespace std; + +std::wstring +utf8toucs2(const char*p, const char*e) { + wstring ucs2; + if (e <= p) return ucs2; + ucs2.reserve(3*(e-p)); + wchar_t w = 0; + char nb = 0; + while (p < e) { + char c = *p; + if (nb--) { + w = (w<<6) + (c & 0x3f); + } else if ((0xE0 & c) == 0xC0) { + w = c & 0x1F; + nb = 0; + } else if ((0xF0 & c) == 0xE0) { + w = c & 0x0F; + nb = 1; + } else if ((0xF8 & c) == 0xF0) { + w = c & 0x07; + nb = 2; + } else { + w = (w<<6) + (c&0x7F); + ucs2 += w; + w = 0; + nb = 0; + } + p++; + } + return ucs2; +} +std::wstring +utf8toucs2(const char* p) { + return utf8toucs2(p, p+strlen(p)); +} +std::wstring +utf8toucs2(const std::string& utf8) { + const char* p = utf8.c_str(); + const char* e = p + utf8.length(); + return utf8toucs2(p, e); +} +std::string +wchartoutf8(const wchar_t* p, const wchar_t* e) { + string utf8; + utf8.reserve((int)(1.5*(e-p))); + while (p < e) { + wchar_t c = *p; + if (c < 0x80) { + utf8 += (char)c; + } else if (c < 0x800) { + char c2 = (char)((c & 0x3f) | 0x80); + utf8 += (c>>6) | 0xc0; + utf8 += c2; + } else if (c < 0x10000) { + char c3 = (char)((c & 0x3f) | 0x80); + char c2 = (char)(((c>>6) & 0x3f) | 0x80); + utf8 += (c>>12) | 0xe0; + utf8 += c2; + utf8 += c3; + } + p++; + } + return utf8; +} +std::string +wchartoutf8(const wchar_t* p) { + return wchartoutf8(p, p+wcslen(p)); +} +std::string +wchartoutf8(const std::wstring& wchar) { + const wchar_t *p = wchar.c_str(); + const wchar_t *e = p+wchar.length(); + return wchartoutf8(p, e); +} diff --git a/src/libutil/win32/tcharutils.h b/src/libutil/win32/tcharutils.h new file mode 100644 index 0000000..40022fd --- /dev/null +++ b/src/libutil/win32/tcharutils.h @@ -0,0 +1,32 @@ +/* This file is part of Strigi Desktop Search + * + * Copyright (C) 2007 Jos van den Oever <jos@vandenoever.info> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public License + * along with this library; see the file COPYING.LIB. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + */ + +#ifndef _TCHAR_UTILS_H_ +#define _TCHAR_UTILS_H_ + +#include <wchar.h> +#include <string> + +std::string wchartoutf8(const wchar_t*); +std::wstring utf8toucs2(const char*); +std::string wchartoutf8(const std::wstring&); +std::wstring utf8toucs2(const std::string&); + +#endif |