summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/GNUmakefile1
-rw-r--r--src/modules/urlnormalizer/googleurl/GNUmakefile1
-rw-r--r--src/modules/urlnormalizer/simpleurl/GNUmakefile1
-rw-r--r--tests/googleurl/GNUmakefile43
-rwxr-xr-xtests/googleurl/exec_test12
-rw-r--r--tests/googleurl/test1.MUST1
-rw-r--r--tests/googleurl/test1.cpp37
-rw-r--r--tests/googleurl/test100.MUST7
-rw-r--r--tests/googleurl/test101.MUST7
-rw-r--r--tests/googleurl/test102.MUST7
-rw-r--r--tests/googleurl/test103.MUST7
-rw-r--r--tests/googleurl/test104.MUST7
-rw-r--r--tests/googleurl/test105.MUST7
-rw-r--r--tests/googleurl/test2.MUST7
-rw-r--r--tests/googleurl/test2.cpp37
-rw-r--r--tests/googleurl/test3.MUST7
-rw-r--r--tests/googleurl/test4.MUST7
-rw-r--r--tests/googleurl/test5.MUST7
-rw-r--r--tests/googleurl/test6.MUST7
-rw-r--r--tests/url/GNUmakefile8
-rw-r--r--tests/url/test1.cpp21
-rw-r--r--tests/url/test2.cpp20
22 files changed, 24 insertions, 235 deletions
diff --git a/src/GNUmakefile b/src/GNUmakefile
index 5a25794..9351c13 100644
--- a/src/GNUmakefile
+++ b/src/GNUmakefile
@@ -53,6 +53,7 @@ $(LOCAL_STATIC_LIB): $(LOCAL_STATIC_LIB_OBJS)
ar rcs $(LOCAL_STATIC_LIB) $(LOCAL_STATIC_LIB_OBJS)
local_clean:
+ @-rm -f $(LOCAL_STATIC_LIB)
local_distclean:
diff --git a/src/modules/urlnormalizer/googleurl/GNUmakefile b/src/modules/urlnormalizer/googleurl/GNUmakefile
index cd52be9..6c5283f 100644
--- a/src/modules/urlnormalizer/googleurl/GNUmakefile
+++ b/src/modules/urlnormalizer/googleurl/GNUmakefile
@@ -14,6 +14,7 @@ INCLUDE_LDFLAGS = \
INCLUDE_LIBS = \
$(TOPDIR)/googleurl/libgoogleurl.a \
+ $(TOPDIR)/src/libcrawlingwolf.a \
-licui18n -licuuc
DYNAMIC_MODULE = \
diff --git a/src/modules/urlnormalizer/simpleurl/GNUmakefile b/src/modules/urlnormalizer/simpleurl/GNUmakefile
index b6fc0a0..46f0e27 100644
--- a/src/modules/urlnormalizer/simpleurl/GNUmakefile
+++ b/src/modules/urlnormalizer/simpleurl/GNUmakefile
@@ -12,6 +12,7 @@ INCLUDE_CXXFLAGS = \
INCLUDE_LDFLAGS = \
INCLUDE_LIBS = \
+ $(TOPDIR)/src/libcrawlingwolf.a
DYNAMIC_MODULE = \
mod_urlnormalizer_simple.so
diff --git a/tests/googleurl/GNUmakefile b/tests/googleurl/GNUmakefile
deleted file mode 100644
index be8c4f0..0000000
--- a/tests/googleurl/GNUmakefile
+++ /dev/null
@@ -1,43 +0,0 @@
-TOPDIR = ../..
-
-SUBDIRS =
-
-INCLUDE_DIRS = \
- -I$(TOPDIR)/src
-
-INCLUDE_LDFLAGS =
-
-INCLUDE_LIBS = \
- $(TOPDIR)/src/libcrawlingwolf.a \
- $(TOPDIR)/googleurl/libgoogleurl.a \
- -licui18n -licuuc
-
-TEST_CPP_BINS = \
- test1$(EXE) \
- test2$(EXE)
-
-OBJS =
-
--include $(TOPDIR)/makefiles/gmake/sub.mk
-
-local_all:
-
-local_clean:
- -@rm -f *.db *.db-journal 2>/dev/null
- -@rm -f *.RES *.DIFF
-
-local_distclean:
-
-local_test:
- @-./exec_test test1 test1 "parse illegal protocol" www.andreasbaumann.cc
- @-./exec_test test1 test2 "parse normal start URL without slash" http://www.andreasbaumann.cc
- @-./exec_test test1 test3 "parse normal start URL with slash" http://www.andreasbaumann.cc/
- @-./exec_test test1 test4 "parse normal URL" http://www.andreasbaumann.cc/index.html
- @-./exec_test test1 test5 "parse normal URL with default port" http://www.andreasbaumann.cc:80/index.html
- @-./exec_test test1 test6 "parse normal URL with non-standard port" http://www.andreasbaumann.cc:8080/index.html
- @-./exec_test test2 test100 "normalize a relative URL" http://www.andreasbaumann.cc/index.html /software.html
- @-./exec_test test2 test101 "absolute URL in HTML content" http://www.andreasbaumann.cc/index.html http://www.yahoo.com/page.html
- @-./exec_test test2 test102 "path normalization, relative path" http://www.andreasbaumann.cc/adir/index.html bdir/page.html
- @-./exec_test test2 test103 "path normalization, absolute path" http://www.andreasbaumann.cc/adir/index.html /bdir/page.html
- @-./exec_test test2 test104 "path normalization, current dir" http://www.andreasbaumann.cc/adir/index.html ./bdir/page.html
- @-./exec_test test2 test105 "path normalization, previous dir" http://www.andreasbaumann.cc/adir/index.html ../bdir/page.html
diff --git a/tests/googleurl/exec_test b/tests/googleurl/exec_test
deleted file mode 100755
index 92b656f..0000000
--- a/tests/googleurl/exec_test
+++ /dev/null
@@ -1,12 +0,0 @@
-#!/bin/sh
-
-BINARY=$1
-shift
-ID=$1
-shift
-TITLE=$1
-shift
-
-printf "$ID: $TITLE .. "
-./$BINARY $* >$ID.RES 2>&1
-diff $ID.MUST $ID.RES > $ID.DIFF && printf "OK\n" || printf "ERROR\n"
diff --git a/tests/googleurl/test1.MUST b/tests/googleurl/test1.MUST
deleted file mode 100644
index 1b6af48..0000000
--- a/tests/googleurl/test1.MUST
+++ /dev/null
@@ -1 +0,0 @@
-Illegal URL!
diff --git a/tests/googleurl/test1.cpp b/tests/googleurl/test1.cpp
deleted file mode 100644
index 278be5e..0000000
--- a/tests/googleurl/test1.cpp
+++ /dev/null
@@ -1,37 +0,0 @@
-#include "URL.hpp"
-#include "GoogleURLNormalizer.hpp"
-
-#include <iostream>
-#include <string>
-
-using namespace std;
-
-int main( int argc, char *argv[] )
-{
- if( argc != 2 ) {
- cerr << "usage: test1 <url>\n" << endl;
- return 1;
- }
-
- char *urlstring = argv[1];
-
- URLNormalizer *normalizer = new GoogleURLNormalizer( );
- URL url = normalizer->parseUrl( urlstring );
- delete normalizer;
-
- if( url == URL::Null ) {
- cerr << "Illegal URL!" << endl;
- return 1;
- }
-
- cout << "protocol: " << url.protocol( ) << endl
- << "host: " << url.host( ) << endl
- << "port: " << url.port( ) << endl
- << "path: " << url.path( ) << endl
- << "query: " << url.query( ) << endl
- << "fragment: " << url.fragment( ) << endl;
-
- cout << "URL: " << url << endl;
-
- return 0;
-}
diff --git a/tests/googleurl/test100.MUST b/tests/googleurl/test100.MUST
deleted file mode 100644
index 40fb968..0000000
--- a/tests/googleurl/test100.MUST
+++ /dev/null
@@ -1,7 +0,0 @@
-protocol: http
-host: www.andreasbaumann.cc
-port: 80
-path: /software.html
-query:
-fragment:
-URL: http://www.andreasbaumann.cc/software.html
diff --git a/tests/googleurl/test101.MUST b/tests/googleurl/test101.MUST
deleted file mode 100644
index b4c5eca..0000000
--- a/tests/googleurl/test101.MUST
+++ /dev/null
@@ -1,7 +0,0 @@
-protocol: http
-host: www.yahoo.com
-port: 80
-path: /page.html
-query:
-fragment:
-URL: http://www.yahoo.com/page.html
diff --git a/tests/googleurl/test102.MUST b/tests/googleurl/test102.MUST
deleted file mode 100644
index 7482d26..0000000
--- a/tests/googleurl/test102.MUST
+++ /dev/null
@@ -1,7 +0,0 @@
-protocol: http
-host: www.andreasbaumann.cc
-port: 80
-path: /adir/bdir/page.html
-query:
-fragment:
-URL: http://www.andreasbaumann.cc/adir/bdir/page.html
diff --git a/tests/googleurl/test103.MUST b/tests/googleurl/test103.MUST
deleted file mode 100644
index 085a06c..0000000
--- a/tests/googleurl/test103.MUST
+++ /dev/null
@@ -1,7 +0,0 @@
-protocol: http
-host: www.andreasbaumann.cc
-port: 80
-path: /bdir/page.html
-query:
-fragment:
-URL: http://www.andreasbaumann.cc/bdir/page.html
diff --git a/tests/googleurl/test104.MUST b/tests/googleurl/test104.MUST
deleted file mode 100644
index 7482d26..0000000
--- a/tests/googleurl/test104.MUST
+++ /dev/null
@@ -1,7 +0,0 @@
-protocol: http
-host: www.andreasbaumann.cc
-port: 80
-path: /adir/bdir/page.html
-query:
-fragment:
-URL: http://www.andreasbaumann.cc/adir/bdir/page.html
diff --git a/tests/googleurl/test105.MUST b/tests/googleurl/test105.MUST
deleted file mode 100644
index 085a06c..0000000
--- a/tests/googleurl/test105.MUST
+++ /dev/null
@@ -1,7 +0,0 @@
-protocol: http
-host: www.andreasbaumann.cc
-port: 80
-path: /bdir/page.html
-query:
-fragment:
-URL: http://www.andreasbaumann.cc/bdir/page.html
diff --git a/tests/googleurl/test2.MUST b/tests/googleurl/test2.MUST
deleted file mode 100644
index 92158a6..0000000
--- a/tests/googleurl/test2.MUST
+++ /dev/null
@@ -1,7 +0,0 @@
-protocol: http
-host: www.andreasbaumann.cc
-port: 80
-path: /
-query:
-fragment:
-URL: http://www.andreasbaumann.cc/
diff --git a/tests/googleurl/test2.cpp b/tests/googleurl/test2.cpp
deleted file mode 100644
index 8dee689..0000000
--- a/tests/googleurl/test2.cpp
+++ /dev/null
@@ -1,37 +0,0 @@
-#include "URL.hpp"
-#include "GoogleURLNormalizer.hpp"
-
-#include <iostream>
-#include <string>
-
-using namespace std;
-
-int main( int argc, char *argv[] )
-{
- if( argc != 3 ) {
- cerr << "usage: test2 <base url> <partial url>\n" << endl;
- return 1;
- }
-
- char *baseUrlString = argv[1];
- char *partialUrlString = argv[2];
-
- URLNormalizer *normalizer = new GoogleURLNormalizer( );
-
- URL baseUrl = normalizer->parseUrl( baseUrlString );
-
- URL url = normalizer->normalize( baseUrl, partialUrlString );
-
- cout << "protocol: " << url.protocol( ) << endl
- << "host: " << url.host( ) << endl
- << "port: " << url.port( ) << endl
- << "path: " << url.path( ) << endl
- << "query: " << url.query( ) << endl
- << "fragment: " << url.fragment( ) << endl;
-
- cout << "URL: " << url << endl;
-
- delete normalizer;
-
- return 0;
-}
diff --git a/tests/googleurl/test3.MUST b/tests/googleurl/test3.MUST
deleted file mode 100644
index 92158a6..0000000
--- a/tests/googleurl/test3.MUST
+++ /dev/null
@@ -1,7 +0,0 @@
-protocol: http
-host: www.andreasbaumann.cc
-port: 80
-path: /
-query:
-fragment:
-URL: http://www.andreasbaumann.cc/
diff --git a/tests/googleurl/test4.MUST b/tests/googleurl/test4.MUST
deleted file mode 100644
index 0649e10..0000000
--- a/tests/googleurl/test4.MUST
+++ /dev/null
@@ -1,7 +0,0 @@
-protocol: http
-host: www.andreasbaumann.cc
-port: 80
-path: /index.html
-query:
-fragment:
-URL: http://www.andreasbaumann.cc/index.html
diff --git a/tests/googleurl/test5.MUST b/tests/googleurl/test5.MUST
deleted file mode 100644
index 0649e10..0000000
--- a/tests/googleurl/test5.MUST
+++ /dev/null
@@ -1,7 +0,0 @@
-protocol: http
-host: www.andreasbaumann.cc
-port: 80
-path: /index.html
-query:
-fragment:
-URL: http://www.andreasbaumann.cc/index.html
diff --git a/tests/googleurl/test6.MUST b/tests/googleurl/test6.MUST
deleted file mode 100644
index de9b556..0000000
--- a/tests/googleurl/test6.MUST
+++ /dev/null
@@ -1,7 +0,0 @@
-protocol: http
-host: www.andreasbaumann.cc
-port: 8080
-path: /index.html
-query:
-fragment:
-URL: http://www.andreasbaumann.cc:8080/index.html
diff --git a/tests/url/GNUmakefile b/tests/url/GNUmakefile
index 62ca063..5a3ca08 100644
--- a/tests/url/GNUmakefile
+++ b/tests/url/GNUmakefile
@@ -3,16 +3,12 @@ TOPDIR = ../..
SUBDIRS =
INCLUDE_DIRS = \
- -I$(TOPDIR)/src \
- -I$(TOPDIR)/src/modules/urlnormalizer/simpleurl \
- -I$(TOPDIR)/src/modules/urlnormalizer/googleurl
+ -I$(TOPDIR)/src
INCLUDE_LDFLAGS =
INCLUDE_LIBS = \
- $(TOPDIR)/src/libcrawlingwolf.a \
- $(TOPDIR)/googleurl/libgoogleurl.a \
- -licui18n -licuuc
+ $(TOPDIR)/src/libcrawlingwolf.a
TEST_CPP_BINS = \
test1$(EXE) \
diff --git a/tests/url/test1.cpp b/tests/url/test1.cpp
index 5fd3e90..732d52e 100644
--- a/tests/url/test1.cpp
+++ b/tests/url/test1.cpp
@@ -1,7 +1,8 @@
#include "URL.hpp"
-#include "SimpleURLNormalizer.hpp"
-#include "GoogleURLNormalizer.hpp"
+#include "URLNormalizer.hpp"
+#include "ModuleLoader.hpp"
+#include <vector>
#include <iostream>
#include <string>
#include <cstring>
@@ -17,18 +18,16 @@ int main( int argc, char *argv[] )
char *method = argv[1];
char *urlstring = argv[2];
+
+ vector<string> modules;
+ modules.push_back( "../../src/modules/urlnormalizer/simpleurl/mod_urlnormalizer_simple.so" );
+ modules.push_back( "../../src/modules/urlnormalizer/googleurl/mod_urlnormalizer_googleurl.so" );
+ ModuleLoader<URLNormalizer> urlNormalizers( modules );
- URLNormalizer *normalizer;
- if( strcmp( method, "simple" ) == 0 ) {
- normalizer = new SimpleURLNormalizer( );
- } else if( strcmp( method, "google" ) == 0 ) {
- normalizer = new GoogleURLNormalizer( );
- } else {
- cerr << "illegal method '" << method << "'" << endl;
- }
+ URLNormalizer *normalizer = urlNormalizers.create( method );
URL url = normalizer->parseUrl( urlstring );
- delete normalizer;
+ urlNormalizers.destroy( normalizer );
if( url == URL::Null ) {
cerr << "Illegal URL!" << endl;
diff --git a/tests/url/test2.cpp b/tests/url/test2.cpp
index fb660a3..1d57629 100644
--- a/tests/url/test2.cpp
+++ b/tests/url/test2.cpp
@@ -1,6 +1,6 @@
#include "URL.hpp"
-#include "SimpleURLNormalizer.hpp"
-#include "GoogleURLNormalizer.hpp"
+#include "URLNormalizer.hpp"
+#include "ModuleLoader.hpp"
#include <iostream>
#include <string>
@@ -19,14 +19,12 @@ int main( int argc, char *argv[] )
char *baseUrlString = argv[2];
char *partialUrlString = argv[3];
- URLNormalizer *normalizer;
- if( strcmp( method, "simple" ) == 0 ) {
- normalizer = new SimpleURLNormalizer( );
- } else if( strcmp( method, "google" ) == 0 ) {
- normalizer = new GoogleURLNormalizer( );
- } else {
- cerr << "illegal method '" << method << "'" << endl;
- }
+ vector<string> modules;
+ modules.push_back( "../../src/modules/urlnormalizer/simpleurl/mod_urlnormalizer_simple.so" );
+ modules.push_back( "../../src/modules/urlnormalizer/googleurl/mod_urlnormalizer_googleurl.so" );
+ ModuleLoader<URLNormalizer> urlNormalizers( modules );
+
+ URLNormalizer *normalizer = urlNormalizers.create( method );
URL baseUrl = normalizer->parseUrl( baseUrlString );
@@ -41,7 +39,7 @@ int main( int argc, char *argv[] )
cout << "URL: " << url << endl;
- delete normalizer;
+ urlNormalizers.destroy( normalizer );
return 0;
}