summaryrefslogtreecommitdiff
path: root/tests/url
diff options
context:
space:
mode:
authorAndreas Baumann <abaumann@yahoo.com>2012-07-28 18:07:26 +0200
committerAndreas Baumann <abaumann@yahoo.com>2012-07-28 18:07:26 +0200
commit3856d7214b3b3eb3e5b8c3ac025b7aeeb93cd044 (patch)
tree456ce7af97f65c94bcf30319c218b45ddab2632f /tests/url
parentcbec8f229bb4d995c9fb05babf176e82a6f6db7c (diff)
downloadcrawler-3856d7214b3b3eb3e5b8c3ac025b7aeeb93cd044.tar.gz
crawler-3856d7214b3b3eb3e5b8c3ac025b7aeeb93cd044.tar.bz2
started to add URL normalizers and testing environment for URLs
Diffstat (limited to 'tests/url')
-rw-r--r--tests/url/GNUmakefile10
-rw-r--r--tests/url/test1.MUST6
-rw-r--r--tests/url/test1.cpp3
-rw-r--r--tests/url/test2.MUST6
-rw-r--r--tests/url/test2.cpp40
5 files changed, 63 insertions, 2 deletions
diff --git a/tests/url/GNUmakefile b/tests/url/GNUmakefile
index e3913bd..81d7a0e 100644
--- a/tests/url/GNUmakefile
+++ b/tests/url/GNUmakefile
@@ -7,10 +7,12 @@ INCLUDE_DIRS = \
INCLUDE_LDFLAGS =
-INCLUDE_LIBS =
+INCLUDE_LIBS = \
+ $(TOPDIR)/src/libcrawlingwolf.a
TEST_CPP_BINS = \
- test1$(EXE)
+ test1$(EXE) \
+ test2$(EXE)
OBJS =
@@ -19,7 +21,11 @@ OBJS =
local_all:
local_clean:
+ -@rm -f *.db *.db-journal 2>/dev/null
+ -@rm -f *.RES *.DIFF
local_distclean:
local_test:
+ @-./exec_test test1 "output normal URL" http://www.andreasbaumann.cc/index.html
+ @-./exec_test test2 "normalize a relative URL" http://www.andreasbaumann.cc/index.html /software.html
diff --git a/tests/url/test1.MUST b/tests/url/test1.MUST
new file mode 100644
index 0000000..38a3a27
--- /dev/null
+++ b/tests/url/test1.MUST
@@ -0,0 +1,6 @@
+protocol: http
+port: 80
+domain: www.andreasbaumann.cc
+path: /index.html
+fragment:
+
diff --git a/tests/url/test1.cpp b/tests/url/test1.cpp
index 264bef0..64ae223 100644
--- a/tests/url/test1.cpp
+++ b/tests/url/test1.cpp
@@ -17,7 +17,10 @@ int main( int argc, char *argv[] )
URL url( urlstring );
cout << "protocol: " << url.protocol( ) << endl
+ << "port: " << url.port( ) << endl
<< "domain: " << url.domain( ) << endl
+ << "path: " << url.path( ) << endl
+ << "fragment: " << url.fragment( ) << endl
<< endl;
return 0;
diff --git a/tests/url/test2.MUST b/tests/url/test2.MUST
new file mode 100644
index 0000000..4718213
--- /dev/null
+++ b/tests/url/test2.MUST
@@ -0,0 +1,6 @@
+protocol: http
+port: 80
+domain: www.andreasbaumann.cc
+path: /software.html
+fragment:
+
diff --git a/tests/url/test2.cpp b/tests/url/test2.cpp
new file mode 100644
index 0000000..b9a3270
--- /dev/null
+++ b/tests/url/test2.cpp
@@ -0,0 +1,40 @@
+#include "URL.hpp"
+#include "SimpleURLNormalizer.hpp"
+
+#include <iostream>
+#include <string>
+
+using namespace std;
+
+int main( int argc, char *argv[] )
+{
+ if( argc != 3 ) {
+ cerr << "usage: test2 <base url> <partial url>\n" << endl;
+ return 1;
+ }
+
+ char *baseUrlString = argv[1];
+ char *partialUrlString = argv[2];
+
+ URL baseUrl( baseUrlString );
+ URL partialUrl( partialUrlString );
+
+ URL url = baseUrl;
+ URLNormalizer *normalizer = new SimpleURLNormalizer( );
+ if( !normalizer->normalize( url, partialUrl ) ) {
+ delete normalizer;
+ cerr << "Normalization error!\n" << endl;
+ return 1;
+ }
+
+ cout << "protocol: " << url.protocol( ) << endl
+ << "port: " << url.port( ) << endl
+ << "domain: " << url.domain( ) << endl
+ << "path: " << url.path( ) << endl
+ << "fragment: " << url.fragment( ) << endl
+ << endl;
+
+ delete normalizer;
+
+ return 0;
+}