diff options
-rw-r--r-- | src/GNUmakefile | 1 | ||||
-rw-r--r-- | src/modules/urlnormalizer/googleurl/GNUmakefile | 1 | ||||
-rw-r--r-- | src/modules/urlnormalizer/simpleurl/GNUmakefile | 1 | ||||
-rw-r--r-- | tests/googleurl/GNUmakefile | 43 | ||||
-rwxr-xr-x | tests/googleurl/exec_test | 12 | ||||
-rw-r--r-- | tests/googleurl/test1.MUST | 1 | ||||
-rw-r--r-- | tests/googleurl/test1.cpp | 37 | ||||
-rw-r--r-- | tests/googleurl/test100.MUST | 7 | ||||
-rw-r--r-- | tests/googleurl/test101.MUST | 7 | ||||
-rw-r--r-- | tests/googleurl/test102.MUST | 7 | ||||
-rw-r--r-- | tests/googleurl/test103.MUST | 7 | ||||
-rw-r--r-- | tests/googleurl/test104.MUST | 7 | ||||
-rw-r--r-- | tests/googleurl/test105.MUST | 7 | ||||
-rw-r--r-- | tests/googleurl/test2.MUST | 7 | ||||
-rw-r--r-- | tests/googleurl/test2.cpp | 37 | ||||
-rw-r--r-- | tests/googleurl/test3.MUST | 7 | ||||
-rw-r--r-- | tests/googleurl/test4.MUST | 7 | ||||
-rw-r--r-- | tests/googleurl/test5.MUST | 7 | ||||
-rw-r--r-- | tests/googleurl/test6.MUST | 7 | ||||
-rw-r--r-- | tests/url/GNUmakefile | 8 | ||||
-rw-r--r-- | tests/url/test1.cpp | 21 | ||||
-rw-r--r-- | tests/url/test2.cpp | 20 |
22 files changed, 24 insertions, 235 deletions
diff --git a/src/GNUmakefile b/src/GNUmakefile index 5a25794..9351c13 100644 --- a/src/GNUmakefile +++ b/src/GNUmakefile @@ -53,6 +53,7 @@ $(LOCAL_STATIC_LIB): $(LOCAL_STATIC_LIB_OBJS) ar rcs $(LOCAL_STATIC_LIB) $(LOCAL_STATIC_LIB_OBJS) local_clean: + @-rm -f $(LOCAL_STATIC_LIB) local_distclean: diff --git a/src/modules/urlnormalizer/googleurl/GNUmakefile b/src/modules/urlnormalizer/googleurl/GNUmakefile index cd52be9..6c5283f 100644 --- a/src/modules/urlnormalizer/googleurl/GNUmakefile +++ b/src/modules/urlnormalizer/googleurl/GNUmakefile @@ -14,6 +14,7 @@ INCLUDE_LDFLAGS = \ INCLUDE_LIBS = \ $(TOPDIR)/googleurl/libgoogleurl.a \ + $(TOPDIR)/src/libcrawlingwolf.a \ -licui18n -licuuc DYNAMIC_MODULE = \ diff --git a/src/modules/urlnormalizer/simpleurl/GNUmakefile b/src/modules/urlnormalizer/simpleurl/GNUmakefile index b6fc0a0..46f0e27 100644 --- a/src/modules/urlnormalizer/simpleurl/GNUmakefile +++ b/src/modules/urlnormalizer/simpleurl/GNUmakefile @@ -12,6 +12,7 @@ INCLUDE_CXXFLAGS = \ INCLUDE_LDFLAGS = \ INCLUDE_LIBS = \ + $(TOPDIR)/src/libcrawlingwolf.a DYNAMIC_MODULE = \ mod_urlnormalizer_simple.so diff --git a/tests/googleurl/GNUmakefile b/tests/googleurl/GNUmakefile deleted file mode 100644 index be8c4f0..0000000 --- a/tests/googleurl/GNUmakefile +++ /dev/null @@ -1,43 +0,0 @@ -TOPDIR = ../.. - -SUBDIRS = - -INCLUDE_DIRS = \ - -I$(TOPDIR)/src - -INCLUDE_LDFLAGS = - -INCLUDE_LIBS = \ - $(TOPDIR)/src/libcrawlingwolf.a \ - $(TOPDIR)/googleurl/libgoogleurl.a \ - -licui18n -licuuc - -TEST_CPP_BINS = \ - test1$(EXE) \ - test2$(EXE) - -OBJS = - --include $(TOPDIR)/makefiles/gmake/sub.mk - -local_all: - -local_clean: - -@rm -f *.db *.db-journal 2>/dev/null - -@rm -f *.RES *.DIFF - -local_distclean: - -local_test: - @-./exec_test test1 test1 "parse illegal protocol" www.andreasbaumann.cc - @-./exec_test test1 test2 "parse normal start URL without slash" http://www.andreasbaumann.cc - @-./exec_test test1 test3 "parse normal start URL with slash" http://www.andreasbaumann.cc/ - @-./exec_test test1 test4 "parse normal URL" http://www.andreasbaumann.cc/index.html - @-./exec_test test1 test5 "parse normal URL with default port" http://www.andreasbaumann.cc:80/index.html - @-./exec_test test1 test6 "parse normal URL with non-standard port" http://www.andreasbaumann.cc:8080/index.html - @-./exec_test test2 test100 "normalize a relative URL" http://www.andreasbaumann.cc/index.html /software.html - @-./exec_test test2 test101 "absolute URL in HTML content" http://www.andreasbaumann.cc/index.html http://www.yahoo.com/page.html - @-./exec_test test2 test102 "path normalization, relative path" http://www.andreasbaumann.cc/adir/index.html bdir/page.html - @-./exec_test test2 test103 "path normalization, absolute path" http://www.andreasbaumann.cc/adir/index.html /bdir/page.html - @-./exec_test test2 test104 "path normalization, current dir" http://www.andreasbaumann.cc/adir/index.html ./bdir/page.html - @-./exec_test test2 test105 "path normalization, previous dir" http://www.andreasbaumann.cc/adir/index.html ../bdir/page.html diff --git a/tests/googleurl/exec_test b/tests/googleurl/exec_test deleted file mode 100755 index 92b656f..0000000 --- a/tests/googleurl/exec_test +++ /dev/null @@ -1,12 +0,0 @@ -#!/bin/sh - -BINARY=$1 -shift -ID=$1 -shift -TITLE=$1 -shift - -printf "$ID: $TITLE .. " -./$BINARY $* >$ID.RES 2>&1 -diff $ID.MUST $ID.RES > $ID.DIFF && printf "OK\n" || printf "ERROR\n" diff --git a/tests/googleurl/test1.MUST b/tests/googleurl/test1.MUST deleted file mode 100644 index 1b6af48..0000000 --- a/tests/googleurl/test1.MUST +++ /dev/null @@ -1 +0,0 @@ -Illegal URL! diff --git a/tests/googleurl/test1.cpp b/tests/googleurl/test1.cpp deleted file mode 100644 index 278be5e..0000000 --- a/tests/googleurl/test1.cpp +++ /dev/null @@ -1,37 +0,0 @@ -#include "URL.hpp" -#include "GoogleURLNormalizer.hpp" - -#include <iostream> -#include <string> - -using namespace std; - -int main( int argc, char *argv[] ) -{ - if( argc != 2 ) { - cerr << "usage: test1 <url>\n" << endl; - return 1; - } - - char *urlstring = argv[1]; - - URLNormalizer *normalizer = new GoogleURLNormalizer( ); - URL url = normalizer->parseUrl( urlstring ); - delete normalizer; - - if( url == URL::Null ) { - cerr << "Illegal URL!" << endl; - return 1; - } - - cout << "protocol: " << url.protocol( ) << endl - << "host: " << url.host( ) << endl - << "port: " << url.port( ) << endl - << "path: " << url.path( ) << endl - << "query: " << url.query( ) << endl - << "fragment: " << url.fragment( ) << endl; - - cout << "URL: " << url << endl; - - return 0; -} diff --git a/tests/googleurl/test100.MUST b/tests/googleurl/test100.MUST deleted file mode 100644 index 40fb968..0000000 --- a/tests/googleurl/test100.MUST +++ /dev/null @@ -1,7 +0,0 @@ -protocol: http -host: www.andreasbaumann.cc -port: 80 -path: /software.html -query: -fragment: -URL: http://www.andreasbaumann.cc/software.html diff --git a/tests/googleurl/test101.MUST b/tests/googleurl/test101.MUST deleted file mode 100644 index b4c5eca..0000000 --- a/tests/googleurl/test101.MUST +++ /dev/null @@ -1,7 +0,0 @@ -protocol: http -host: www.yahoo.com -port: 80 -path: /page.html -query: -fragment: -URL: http://www.yahoo.com/page.html diff --git a/tests/googleurl/test102.MUST b/tests/googleurl/test102.MUST deleted file mode 100644 index 7482d26..0000000 --- a/tests/googleurl/test102.MUST +++ /dev/null @@ -1,7 +0,0 @@ -protocol: http -host: www.andreasbaumann.cc -port: 80 -path: /adir/bdir/page.html -query: -fragment: -URL: http://www.andreasbaumann.cc/adir/bdir/page.html diff --git a/tests/googleurl/test103.MUST b/tests/googleurl/test103.MUST deleted file mode 100644 index 085a06c..0000000 --- a/tests/googleurl/test103.MUST +++ /dev/null @@ -1,7 +0,0 @@ -protocol: http -host: www.andreasbaumann.cc -port: 80 -path: /bdir/page.html -query: -fragment: -URL: http://www.andreasbaumann.cc/bdir/page.html diff --git a/tests/googleurl/test104.MUST b/tests/googleurl/test104.MUST deleted file mode 100644 index 7482d26..0000000 --- a/tests/googleurl/test104.MUST +++ /dev/null @@ -1,7 +0,0 @@ -protocol: http -host: www.andreasbaumann.cc -port: 80 -path: /adir/bdir/page.html -query: -fragment: -URL: http://www.andreasbaumann.cc/adir/bdir/page.html diff --git a/tests/googleurl/test105.MUST b/tests/googleurl/test105.MUST deleted file mode 100644 index 085a06c..0000000 --- a/tests/googleurl/test105.MUST +++ /dev/null @@ -1,7 +0,0 @@ -protocol: http -host: www.andreasbaumann.cc -port: 80 -path: /bdir/page.html -query: -fragment: -URL: http://www.andreasbaumann.cc/bdir/page.html diff --git a/tests/googleurl/test2.MUST b/tests/googleurl/test2.MUST deleted file mode 100644 index 92158a6..0000000 --- a/tests/googleurl/test2.MUST +++ /dev/null @@ -1,7 +0,0 @@ -protocol: http -host: www.andreasbaumann.cc -port: 80 -path: / -query: -fragment: -URL: http://www.andreasbaumann.cc/ diff --git a/tests/googleurl/test2.cpp b/tests/googleurl/test2.cpp deleted file mode 100644 index 8dee689..0000000 --- a/tests/googleurl/test2.cpp +++ /dev/null @@ -1,37 +0,0 @@ -#include "URL.hpp" -#include "GoogleURLNormalizer.hpp" - -#include <iostream> -#include <string> - -using namespace std; - -int main( int argc, char *argv[] ) -{ - if( argc != 3 ) { - cerr << "usage: test2 <base url> <partial url>\n" << endl; - return 1; - } - - char *baseUrlString = argv[1]; - char *partialUrlString = argv[2]; - - URLNormalizer *normalizer = new GoogleURLNormalizer( ); - - URL baseUrl = normalizer->parseUrl( baseUrlString ); - - URL url = normalizer->normalize( baseUrl, partialUrlString ); - - cout << "protocol: " << url.protocol( ) << endl - << "host: " << url.host( ) << endl - << "port: " << url.port( ) << endl - << "path: " << url.path( ) << endl - << "query: " << url.query( ) << endl - << "fragment: " << url.fragment( ) << endl; - - cout << "URL: " << url << endl; - - delete normalizer; - - return 0; -} diff --git a/tests/googleurl/test3.MUST b/tests/googleurl/test3.MUST deleted file mode 100644 index 92158a6..0000000 --- a/tests/googleurl/test3.MUST +++ /dev/null @@ -1,7 +0,0 @@ -protocol: http -host: www.andreasbaumann.cc -port: 80 -path: / -query: -fragment: -URL: http://www.andreasbaumann.cc/ diff --git a/tests/googleurl/test4.MUST b/tests/googleurl/test4.MUST deleted file mode 100644 index 0649e10..0000000 --- a/tests/googleurl/test4.MUST +++ /dev/null @@ -1,7 +0,0 @@ -protocol: http -host: www.andreasbaumann.cc -port: 80 -path: /index.html -query: -fragment: -URL: http://www.andreasbaumann.cc/index.html diff --git a/tests/googleurl/test5.MUST b/tests/googleurl/test5.MUST deleted file mode 100644 index 0649e10..0000000 --- a/tests/googleurl/test5.MUST +++ /dev/null @@ -1,7 +0,0 @@ -protocol: http -host: www.andreasbaumann.cc -port: 80 -path: /index.html -query: -fragment: -URL: http://www.andreasbaumann.cc/index.html diff --git a/tests/googleurl/test6.MUST b/tests/googleurl/test6.MUST deleted file mode 100644 index de9b556..0000000 --- a/tests/googleurl/test6.MUST +++ /dev/null @@ -1,7 +0,0 @@ -protocol: http -host: www.andreasbaumann.cc -port: 8080 -path: /index.html -query: -fragment: -URL: http://www.andreasbaumann.cc:8080/index.html diff --git a/tests/url/GNUmakefile b/tests/url/GNUmakefile index 62ca063..5a3ca08 100644 --- a/tests/url/GNUmakefile +++ b/tests/url/GNUmakefile @@ -3,16 +3,12 @@ TOPDIR = ../.. SUBDIRS = INCLUDE_DIRS = \ - -I$(TOPDIR)/src \ - -I$(TOPDIR)/src/modules/urlnormalizer/simpleurl \ - -I$(TOPDIR)/src/modules/urlnormalizer/googleurl + -I$(TOPDIR)/src INCLUDE_LDFLAGS = INCLUDE_LIBS = \ - $(TOPDIR)/src/libcrawlingwolf.a \ - $(TOPDIR)/googleurl/libgoogleurl.a \ - -licui18n -licuuc + $(TOPDIR)/src/libcrawlingwolf.a TEST_CPP_BINS = \ test1$(EXE) \ diff --git a/tests/url/test1.cpp b/tests/url/test1.cpp index 5fd3e90..732d52e 100644 --- a/tests/url/test1.cpp +++ b/tests/url/test1.cpp @@ -1,7 +1,8 @@ #include "URL.hpp" -#include "SimpleURLNormalizer.hpp" -#include "GoogleURLNormalizer.hpp" +#include "URLNormalizer.hpp" +#include "ModuleLoader.hpp" +#include <vector> #include <iostream> #include <string> #include <cstring> @@ -17,18 +18,16 @@ int main( int argc, char *argv[] ) char *method = argv[1]; char *urlstring = argv[2]; + + vector<string> modules; + modules.push_back( "../../src/modules/urlnormalizer/simpleurl/mod_urlnormalizer_simple.so" ); + modules.push_back( "../../src/modules/urlnormalizer/googleurl/mod_urlnormalizer_googleurl.so" ); + ModuleLoader<URLNormalizer> urlNormalizers( modules ); - URLNormalizer *normalizer; - if( strcmp( method, "simple" ) == 0 ) { - normalizer = new SimpleURLNormalizer( ); - } else if( strcmp( method, "google" ) == 0 ) { - normalizer = new GoogleURLNormalizer( ); - } else { - cerr << "illegal method '" << method << "'" << endl; - } + URLNormalizer *normalizer = urlNormalizers.create( method ); URL url = normalizer->parseUrl( urlstring ); - delete normalizer; + urlNormalizers.destroy( normalizer ); if( url == URL::Null ) { cerr << "Illegal URL!" << endl; diff --git a/tests/url/test2.cpp b/tests/url/test2.cpp index fb660a3..1d57629 100644 --- a/tests/url/test2.cpp +++ b/tests/url/test2.cpp @@ -1,6 +1,6 @@ #include "URL.hpp" -#include "SimpleURLNormalizer.hpp" -#include "GoogleURLNormalizer.hpp" +#include "URLNormalizer.hpp" +#include "ModuleLoader.hpp" #include <iostream> #include <string> @@ -19,14 +19,12 @@ int main( int argc, char *argv[] ) char *baseUrlString = argv[2]; char *partialUrlString = argv[3]; - URLNormalizer *normalizer; - if( strcmp( method, "simple" ) == 0 ) { - normalizer = new SimpleURLNormalizer( ); - } else if( strcmp( method, "google" ) == 0 ) { - normalizer = new GoogleURLNormalizer( ); - } else { - cerr << "illegal method '" << method << "'" << endl; - } + vector<string> modules; + modules.push_back( "../../src/modules/urlnormalizer/simpleurl/mod_urlnormalizer_simple.so" ); + modules.push_back( "../../src/modules/urlnormalizer/googleurl/mod_urlnormalizer_googleurl.so" ); + ModuleLoader<URLNormalizer> urlNormalizers( modules ); + + URLNormalizer *normalizer = urlNormalizers.create( method ); URL baseUrl = normalizer->parseUrl( baseUrlString ); @@ -41,7 +39,7 @@ int main( int argc, char *argv[] ) cout << "URL: " << url << endl; - delete normalizer; + urlNormalizers.destroy( normalizer ); return 0; } |