diff options
author | Andreas Baumann <abaumann@yahoo.com> | 2012-08-11 18:42:26 +0200 |
---|---|---|
committer | Andreas Baumann <abaumann@yahoo.com> | 2012-08-11 18:42:26 +0200 |
commit | 8264f32b2233c1f46446c51ea5a2983fd9f76497 (patch) | |
tree | d3f5751d94b54b2effdb2485b180e806f1fe0625 /tests/url | |
parent | 16795b49385577c92dc891dd4788728cc541bdc9 (diff) | |
download | crawler-8264f32b2233c1f46446c51ea5a2983fd9f76497.tar.gz crawler-8264f32b2233c1f46446c51ea5a2983fd9f76497.tar.bz2 |
google url normalization works on Windows, test1 must be improved:
there are linking problems (/DSHARED in *.lib normalization libraries
produce clashing registry structures)
Diffstat (limited to 'tests/url')
-rwxr-xr-x | tests/url/Makefile.W32 | 10 | ||||
-rwxr-xr-x | tests/url/test1.cpp | 16 |
2 files changed, 18 insertions, 8 deletions
diff --git a/tests/url/Makefile.W32 b/tests/url/Makefile.W32 index c65000f..cd545dd 100755 --- a/tests/url/Makefile.W32 +++ b/tests/url/Makefile.W32 @@ -11,13 +11,17 @@ INCLUDE_CXXFLAGS = \ INCLUDE_DIRS = \ /I. \ /I$(TOPDIR)\src \ - /I$(TOPDIR)\src\modules\urlnormalizer\simpleurl + /I$(TOPDIR)\src\modules\urlnormalizer\simpleurl \ + /I$(TOPDIR)\src\modules\urlnormalizer\googleurl INCLUDE_LDFLAGS = \ INCLUDE_LIBS = \ $(TOPDIR)\src\crawlingwolf.lib \ - $(TOPDIR)\src\modules\urlnormalizer\simpleurl\simpleurlnormalizer.lib + $(TOPDIR)\src\modules\urlnormalizer\googleurl\googleurlnormalizer.lib \ + $(TOPDIR)\googleurl\googleurl.lib \ + "$(ICU_DIR)\lib\icuuc.lib" +# $(TOPDIR)\src\modules\urlnormalizer\simpleurl\simpleurlnormalizer.lib \ TEST_CPP_BINS = \ test1.exe @@ -36,6 +40,8 @@ local_clean: local_distclean: local_test: + @-copy "$(ICU_DIR)\bin\icuuc49.dll" . + @-copy "$(ICU_DIR)\bin\icudt49.dll" . @-exec_test test1 test1 "parse illegal protocol" simple parse www.andreasbaumann.cc @-exec_test test1 test2 "parse normal start URL without slash" simple parse http://www.andreasbaumann.cc @-exec_test test1 test3 "parse normal start URL with slash" simple parse http://www.andreasbaumann.cc/ diff --git a/tests/url/test1.cpp b/tests/url/test1.cpp index 2512e79..b46e690 100755 --- a/tests/url/test1.cpp +++ b/tests/url/test1.cpp @@ -4,7 +4,7 @@ #include "ModuleLoader.hpp" #else #include "SimpleURLNormalizer.hpp" -//#include "GoogleURLNormalizer.hpp" +#include "GoogleURLNormalizer.hpp" #endif #include <vector> @@ -29,18 +29,22 @@ int main( int argc, char *argv[] ) #ifdef USE_MODULELOADER vector<string> modules; - // modules.push_back( "../../src/modules/urlnormalizer/simpleurl/mod_urlnormalizer_simple.so" ); +#ifndef _WIN32 + modules.push_back( "../../src/modules/urlnormalizer/simpleurl/mod_urlnormalizer_simple.so" ); + modules.push_back( "../../src/modules/urlnormalizer/googleurl/mod_urlnormalizer_googleurl.so" ); +#else modules.push_back( "..\\..\\src\\modules\\urlnormalizer\\simpleurl\\mod_urlnormalizer_simple.dll" ); - // modules.push_back( "../../src/modules/urlnormalizer/googleurl/mod_urlnormalizer_googleurl.so" ); + modules.push_back( "..\\..\\src\\modules\\urlnormalizer\\googleurl\\mod_urlnormalizer_googleurl.dll" ); +#endif ModuleLoader<URLNormalizer> urlNormalizers( modules ); URLNormalizer *normalizer = urlNormalizers.create( method ); #else URLNormalizer *normalizer; if( strcmp( method, "simple" ) == 0 ) { - normalizer = new SimpleURLNormalizer( ); - // } else if( strcmp( method, "google" ) == 0 ) { - // normalizer = new GoogleURLNormalizer( ); +// normalizer = new SimpleURLNormalizer( ); + } else if( strcmp( method, "google" ) == 0 ) { + normalizer = new GoogleURLNormalizer( ); } else { cerr << "Unknown normalization method '" << method << "'" << endl; return 1; |