summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndreas Baumann <abaumann@yahoo.com>2014-10-15 15:50:00 +0200
committerAndreas Baumann <abaumann@yahoo.com>2014-10-15 15:50:00 +0200
commitb37950fecfb3afb53942fba28e4e36d0b8753351 (patch)
tree52a51bd7b6a808ee43d4422d79d9d0aee5dde4fd
parent1127e07a2a6d6b3c0cbd342396e6c8af7ee54040 (diff)
downloadcrawler-b37950fecfb3afb53942fba28e4e36d0b8753351.tar.gz
crawler-b37950fecfb3afb53942fba28e4e36d0b8753351.tar.bz2
added file iteration also on Windows
-rwxr-xr-xREADME.3rdPARTY45
-rw-r--r--src/libutil/FileUtils.cpp59
-rwxr-xr-xsrc/libutil/Makefile.W322
-rw-r--r--src/libutil/win32/tcharutils.cpp99
-rw-r--r--src/libutil/win32/tcharutils.h32
-rw-r--r--tests/utils/test6.MUST.WIN325
-rwxr-xr-xtests/utils/test6.cpp24
7 files changed, 255 insertions, 11 deletions
diff --git a/README.3rdPARTY b/README.3rdPARTY
index 31c7c05..215b9da 100755
--- a/README.3rdPARTY
+++ b/README.3rdPARTY
@@ -338,7 +338,26 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
tolua
------
+-----/* This file is part of Strigi Desktop Search
+ *
+ * Copyright (C) 2007 Jos van den Oever <jos@vandenoever.info>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this library; see the file COPYING.LIB. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ */
+
http://www.tecgraf.puc-rio.br/~celes/tolua/
@@ -386,3 +405,27 @@ OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+tcharutils
+----------
+
+http://www.vandenoever.info/software/strigi/
+
+This file is part of Strigi Desktop Search
+
+Copyright (C) 2007 Jos van den Oever <jos@vandenoever.info>
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Library General Public
+License as published by the Free Software Foundation; either
+version 2 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Library General Public License for more details.
+
+You should have received a copy of the GNU Library General Public License
+along with this library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+Boston, MA 02110-1301, USA.
diff --git a/src/libutil/FileUtils.cpp b/src/libutil/FileUtils.cpp
index 64b1bb1..975a78c 100644
--- a/src/libutil/FileUtils.cpp
+++ b/src/libutil/FileUtils.cpp
@@ -7,6 +7,8 @@
#else
#define WIN32_MEAN_AND_LEAN
#include <windows.h>
+#include "util/win32/errormsg.hpp"
+#include "win32/tcharutils.h"
#endif
#include <sstream>
@@ -61,10 +63,65 @@ static vector<string> directory_entries_unix( const string &dir, bool absolute,
#endif
#ifdef _WIN32
-static vector<string> directory_entries_win32( const string &/*dir*/, bool /*absolute*/, bool /* recursive */ )
+static vector<string> directory_entries_win32( const string &dir, bool absolute, bool recursive )
{
vector<string> files;
+ wstringstream wpath;
+ wpath << utf8toucs2( dir ) << L"\\*.";
+
+ WIN32_FIND_DATAW fd;
+ HANDLE h = FindFirstFileW( wpath.str( ).c_str( ), &fd );
+ if( h == INVALID_HANDLE_VALUE ) {
+ DWORD err = GetLastError( );
+ if( err == ERROR_NO_MORE_FILES ) {
+ FindClose( h );
+ return files;
+ }
+ ostringstream ss;
+ ss << "FindFirstFileW failed with '" << dir << "': " << getLastError( );
+ throw runtime_error( ss.str( ) );
+ }
+
+ while( h != INVALID_HANDLE_VALUE ) {
+ if( ( fd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY ) != 0 ) {
+ if( wcscmp( fd.cFileName, L"." ) == 0 ||
+ wcscmp( fd.cFileName, L".." ) == 0 ) {
+ goto NEXT;
+ }
+ if( recursive ) {
+ ostringstream ss;
+ ss << dir << "\\" << wchartoutf8( fd.cFileName );
+ vector<string> subfiles = directory_entries_win32( ss.str( ), absolute, recursive );
+ files.insert( files.end( ), subfiles.begin( ), subfiles.end( ) );
+ }
+ } else if( ( fd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY ) == 0 ) {
+ if( absolute ) {
+ ostringstream ss;
+ ss << dir << "\\" << wchartoutf8( fd.cFileName );
+ files.push_back( ss.str( ) );
+ } else {
+ files.push_back( string( wchartoutf8( fd.cFileName ) ) );
+ }
+ }
+NEXT:
+ if( !FindNextFileW( h, &fd ) ) {
+ DWORD err = GetLastError( );
+ if( err == ERROR_NO_MORE_FILES ) {
+ // finish iterating
+ FindClose( h );
+ return files;
+ } else {
+ FindClose( h );
+ ostringstream ss;
+ ss << "FindNextFileW failed with '" << dir << "': " << getLastError( );
+ throw runtime_error( ss.str( ) );
+ }
+ }
+ }
+
+ FindClose( h );
+
return files;
}
#endif
diff --git a/src/libutil/Makefile.W32 b/src/libutil/Makefile.W32
index ec6dcdb..6d81814 100755
--- a/src/libutil/Makefile.W32
+++ b/src/libutil/Makefile.W32
@@ -20,12 +20,14 @@ CPP_OBJS = \
StringUtils.obj \
win32\errormsg.obj \
win32\stringutils.obj \
+ win32\tcharutils.obj \
FileUtils.obj
DYNAMIC_CPP_OBJS = \
StringUtils.dllobj \
win32\errormsg.dllobj \
win32\stringutils.dllobj \
+ win32\tcharutils.dllobj \
FileUtils.dllobj
STATIC_LIB = \
diff --git a/src/libutil/win32/tcharutils.cpp b/src/libutil/win32/tcharutils.cpp
new file mode 100644
index 0000000..a757aa2
--- /dev/null
+++ b/src/libutil/win32/tcharutils.cpp
@@ -0,0 +1,99 @@
+/* This file is part of Strigi Desktop Search
+ *
+ * Copyright (C) 2007 Jos van den Oever <jos@vandenoever.info>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this library; see the file COPYING.LIB. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#include "tcharutils.h"
+
+#include <string.h>
+
+using namespace std;
+
+std::wstring
+utf8toucs2(const char*p, const char*e) {
+ wstring ucs2;
+ if (e <= p) return ucs2;
+ ucs2.reserve(3*(e-p));
+ wchar_t w = 0;
+ char nb = 0;
+ while (p < e) {
+ char c = *p;
+ if (nb--) {
+ w = (w<<6) + (c & 0x3f);
+ } else if ((0xE0 & c) == 0xC0) {
+ w = c & 0x1F;
+ nb = 0;
+ } else if ((0xF0 & c) == 0xE0) {
+ w = c & 0x0F;
+ nb = 1;
+ } else if ((0xF8 & c) == 0xF0) {
+ w = c & 0x07;
+ nb = 2;
+ } else {
+ w = (w<<6) + (c&0x7F);
+ ucs2 += w;
+ w = 0;
+ nb = 0;
+ }
+ p++;
+ }
+ return ucs2;
+}
+std::wstring
+utf8toucs2(const char* p) {
+ return utf8toucs2(p, p+strlen(p));
+}
+std::wstring
+utf8toucs2(const std::string& utf8) {
+ const char* p = utf8.c_str();
+ const char* e = p + utf8.length();
+ return utf8toucs2(p, e);
+}
+std::string
+wchartoutf8(const wchar_t* p, const wchar_t* e) {
+ string utf8;
+ utf8.reserve((int)(1.5*(e-p)));
+ while (p < e) {
+ wchar_t c = *p;
+ if (c < 0x80) {
+ utf8 += (char)c;
+ } else if (c < 0x800) {
+ char c2 = (char)((c & 0x3f) | 0x80);
+ utf8 += (c>>6) | 0xc0;
+ utf8 += c2;
+ } else if (c < 0x10000) {
+ char c3 = (char)((c & 0x3f) | 0x80);
+ char c2 = (char)(((c>>6) & 0x3f) | 0x80);
+ utf8 += (c>>12) | 0xe0;
+ utf8 += c2;
+ utf8 += c3;
+ }
+ p++;
+ }
+ return utf8;
+}
+std::string
+wchartoutf8(const wchar_t* p) {
+ return wchartoutf8(p, p+wcslen(p));
+}
+std::string
+wchartoutf8(const std::wstring& wchar) {
+ const wchar_t *p = wchar.c_str();
+ const wchar_t *e = p+wchar.length();
+ return wchartoutf8(p, e);
+}
diff --git a/src/libutil/win32/tcharutils.h b/src/libutil/win32/tcharutils.h
new file mode 100644
index 0000000..40022fd
--- /dev/null
+++ b/src/libutil/win32/tcharutils.h
@@ -0,0 +1,32 @@
+/* This file is part of Strigi Desktop Search
+ *
+ * Copyright (C) 2007 Jos van den Oever <jos@vandenoever.info>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this library; see the file COPYING.LIB. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#ifndef _TCHAR_UTILS_H_
+#define _TCHAR_UTILS_H_
+
+#include <wchar.h>
+#include <string>
+
+std::string wchartoutf8(const wchar_t*);
+std::wstring utf8toucs2(const char*);
+std::string wchartoutf8(const std::wstring&);
+std::wstring utf8toucs2(const std::string&);
+
+#endif
diff --git a/tests/utils/test6.MUST.WIN32 b/tests/utils/test6.MUST.WIN32
new file mode 100644
index 0000000..df3933e
--- /dev/null
+++ b/tests/utils/test6.MUST.WIN32
@@ -0,0 +1,5 @@
+test6.DATA\adir2\adir3\afile4
+test6.DATA\adir2\adir3\afile5
+test6.DATA\adir\afile2
+test6.DATA\adir\afile3
+test6.DATA\afile
diff --git a/tests/utils/test6.cpp b/tests/utils/test6.cpp
index 839f69b..6ad9e10 100755
--- a/tests/utils/test6.cpp
+++ b/tests/utils/test6.cpp
@@ -2,19 +2,25 @@
#include <iostream>
#include <algorithm>
+#include <stdexcept>
using namespace std;
int main( void )
{
- vector<string> entries = directory_entries( "test6.DATA", true, true );
-
- sort( entries.begin( ), entries.end( ) );
-
- vector<string>::const_iterator it, end = entries.end( );
- for( it = entries.begin( ); it != end; it++ ) {
- cout << (*it) << endl;
+ try {
+ vector<string> entries = directory_entries( "test6.DATA", true, true );
+
+ sort( entries.begin( ), entries.end( ) );
+
+ vector<string>::const_iterator it, end = entries.end( );
+ for( it = entries.begin( ); it != end; it++ ) {
+ cout << (*it) << endl;
+ }
+
+ return 0;
+ } catch( exception &e ) {
+ cerr << "ERROR: " << e.what( ) << endl;
+ return 1;
}
-
- return 0;
}