diff options
author | Andreas Baumann <abaumann@yahoo.com> | 2012-07-28 18:07:26 +0200 |
---|---|---|
committer | Andreas Baumann <abaumann@yahoo.com> | 2012-07-28 18:07:26 +0200 |
commit | 3856d7214b3b3eb3e5b8c3ac025b7aeeb93cd044 (patch) | |
tree | 456ce7af97f65c94bcf30319c218b45ddab2632f /tests/url | |
parent | cbec8f229bb4d995c9fb05babf176e82a6f6db7c (diff) | |
download | crawler-3856d7214b3b3eb3e5b8c3ac025b7aeeb93cd044.tar.gz crawler-3856d7214b3b3eb3e5b8c3ac025b7aeeb93cd044.tar.bz2 |
started to add URL normalizers and testing environment for URLs
Diffstat (limited to 'tests/url')
-rw-r--r-- | tests/url/GNUmakefile | 10 | ||||
-rw-r--r-- | tests/url/test1.MUST | 6 | ||||
-rw-r--r-- | tests/url/test1.cpp | 3 | ||||
-rw-r--r-- | tests/url/test2.MUST | 6 | ||||
-rw-r--r-- | tests/url/test2.cpp | 40 |
5 files changed, 63 insertions, 2 deletions
diff --git a/tests/url/GNUmakefile b/tests/url/GNUmakefile index e3913bd..81d7a0e 100644 --- a/tests/url/GNUmakefile +++ b/tests/url/GNUmakefile @@ -7,10 +7,12 @@ INCLUDE_DIRS = \ INCLUDE_LDFLAGS = -INCLUDE_LIBS = +INCLUDE_LIBS = \ + $(TOPDIR)/src/libcrawlingwolf.a TEST_CPP_BINS = \ - test1$(EXE) + test1$(EXE) \ + test2$(EXE) OBJS = @@ -19,7 +21,11 @@ OBJS = local_all: local_clean: + -@rm -f *.db *.db-journal 2>/dev/null + -@rm -f *.RES *.DIFF local_distclean: local_test: + @-./exec_test test1 "output normal URL" http://www.andreasbaumann.cc/index.html + @-./exec_test test2 "normalize a relative URL" http://www.andreasbaumann.cc/index.html /software.html diff --git a/tests/url/test1.MUST b/tests/url/test1.MUST new file mode 100644 index 0000000..38a3a27 --- /dev/null +++ b/tests/url/test1.MUST @@ -0,0 +1,6 @@ +protocol: http +port: 80 +domain: www.andreasbaumann.cc +path: /index.html +fragment: + diff --git a/tests/url/test1.cpp b/tests/url/test1.cpp index 264bef0..64ae223 100644 --- a/tests/url/test1.cpp +++ b/tests/url/test1.cpp @@ -17,7 +17,10 @@ int main( int argc, char *argv[] ) URL url( urlstring ); cout << "protocol: " << url.protocol( ) << endl + << "port: " << url.port( ) << endl << "domain: " << url.domain( ) << endl + << "path: " << url.path( ) << endl + << "fragment: " << url.fragment( ) << endl << endl; return 0; diff --git a/tests/url/test2.MUST b/tests/url/test2.MUST new file mode 100644 index 0000000..4718213 --- /dev/null +++ b/tests/url/test2.MUST @@ -0,0 +1,6 @@ +protocol: http +port: 80 +domain: www.andreasbaumann.cc +path: /software.html +fragment: + diff --git a/tests/url/test2.cpp b/tests/url/test2.cpp new file mode 100644 index 0000000..b9a3270 --- /dev/null +++ b/tests/url/test2.cpp @@ -0,0 +1,40 @@ +#include "URL.hpp" +#include "SimpleURLNormalizer.hpp" + +#include <iostream> +#include <string> + +using namespace std; + +int main( int argc, char *argv[] ) +{ + if( argc != 3 ) { + cerr << "usage: test2 <base url> <partial url>\n" << endl; + return 1; + } + + char *baseUrlString = argv[1]; + char *partialUrlString = argv[2]; + + URL baseUrl( baseUrlString ); + URL partialUrl( partialUrlString ); + + URL url = baseUrl; + URLNormalizer *normalizer = new SimpleURLNormalizer( ); + if( !normalizer->normalize( url, partialUrl ) ) { + delete normalizer; + cerr << "Normalization error!\n" << endl; + return 1; + } + + cout << "protocol: " << url.protocol( ) << endl + << "port: " << url.port( ) << endl + << "domain: " << url.domain( ) << endl + << "path: " << url.path( ) << endl + << "fragment: " << url.fragment( ) << endl + << endl; + + delete normalizer; + + return 0; +} |