summaryrefslogtreecommitdiff
path: root/tests/url
diff options
context:
space:
mode:
authorAndreas Baumann <abaumann@yahoo.com>2012-08-11 18:42:26 +0200
committerAndreas Baumann <abaumann@yahoo.com>2012-08-11 18:42:26 +0200
commit8264f32b2233c1f46446c51ea5a2983fd9f76497 (patch)
treed3f5751d94b54b2effdb2485b180e806f1fe0625 /tests/url
parent16795b49385577c92dc891dd4788728cc541bdc9 (diff)
downloadcrawler-8264f32b2233c1f46446c51ea5a2983fd9f76497.tar.gz
crawler-8264f32b2233c1f46446c51ea5a2983fd9f76497.tar.bz2
google url normalization works on Windows, test1 must be improved:
there are linking problems (/DSHARED in *.lib normalization libraries produce clashing registry structures)
Diffstat (limited to 'tests/url')
-rwxr-xr-xtests/url/Makefile.W3210
-rwxr-xr-xtests/url/test1.cpp16
2 files changed, 18 insertions, 8 deletions
diff --git a/tests/url/Makefile.W32 b/tests/url/Makefile.W32
index c65000f..cd545dd 100755
--- a/tests/url/Makefile.W32
+++ b/tests/url/Makefile.W32
@@ -11,13 +11,17 @@ INCLUDE_CXXFLAGS = \
INCLUDE_DIRS = \
/I. \
/I$(TOPDIR)\src \
- /I$(TOPDIR)\src\modules\urlnormalizer\simpleurl
+ /I$(TOPDIR)\src\modules\urlnormalizer\simpleurl \
+ /I$(TOPDIR)\src\modules\urlnormalizer\googleurl
INCLUDE_LDFLAGS = \
INCLUDE_LIBS = \
$(TOPDIR)\src\crawlingwolf.lib \
- $(TOPDIR)\src\modules\urlnormalizer\simpleurl\simpleurlnormalizer.lib
+ $(TOPDIR)\src\modules\urlnormalizer\googleurl\googleurlnormalizer.lib \
+ $(TOPDIR)\googleurl\googleurl.lib \
+ "$(ICU_DIR)\lib\icuuc.lib"
+# $(TOPDIR)\src\modules\urlnormalizer\simpleurl\simpleurlnormalizer.lib \
TEST_CPP_BINS = \
test1.exe
@@ -36,6 +40,8 @@ local_clean:
local_distclean:
local_test:
+ @-copy "$(ICU_DIR)\bin\icuuc49.dll" .
+ @-copy "$(ICU_DIR)\bin\icudt49.dll" .
@-exec_test test1 test1 "parse illegal protocol" simple parse www.andreasbaumann.cc
@-exec_test test1 test2 "parse normal start URL without slash" simple parse http://www.andreasbaumann.cc
@-exec_test test1 test3 "parse normal start URL with slash" simple parse http://www.andreasbaumann.cc/
diff --git a/tests/url/test1.cpp b/tests/url/test1.cpp
index 2512e79..b46e690 100755
--- a/tests/url/test1.cpp
+++ b/tests/url/test1.cpp
@@ -4,7 +4,7 @@
#include "ModuleLoader.hpp"
#else
#include "SimpleURLNormalizer.hpp"
-//#include "GoogleURLNormalizer.hpp"
+#include "GoogleURLNormalizer.hpp"
#endif
#include <vector>
@@ -29,18 +29,22 @@ int main( int argc, char *argv[] )
#ifdef USE_MODULELOADER
vector<string> modules;
- // modules.push_back( "../../src/modules/urlnormalizer/simpleurl/mod_urlnormalizer_simple.so" );
+#ifndef _WIN32
+ modules.push_back( "../../src/modules/urlnormalizer/simpleurl/mod_urlnormalizer_simple.so" );
+ modules.push_back( "../../src/modules/urlnormalizer/googleurl/mod_urlnormalizer_googleurl.so" );
+#else
modules.push_back( "..\\..\\src\\modules\\urlnormalizer\\simpleurl\\mod_urlnormalizer_simple.dll" );
- // modules.push_back( "../../src/modules/urlnormalizer/googleurl/mod_urlnormalizer_googleurl.so" );
+ modules.push_back( "..\\..\\src\\modules\\urlnormalizer\\googleurl\\mod_urlnormalizer_googleurl.dll" );
+#endif
ModuleLoader<URLNormalizer> urlNormalizers( modules );
URLNormalizer *normalizer = urlNormalizers.create( method );
#else
URLNormalizer *normalizer;
if( strcmp( method, "simple" ) == 0 ) {
- normalizer = new SimpleURLNormalizer( );
- // } else if( strcmp( method, "google" ) == 0 ) {
- // normalizer = new GoogleURLNormalizer( );
+// normalizer = new SimpleURLNormalizer( );
+ } else if( strcmp( method, "google" ) == 0 ) {
+ normalizer = new GoogleURLNormalizer( );
} else {
cerr << "Unknown normalization method '" << method << "'" << endl;
return 1;