summaryrefslogtreecommitdiff
path: root/tests/fetcher
diff options
context:
space:
mode:
authorAndreas Baumann <abaumann@yahoo.com>2012-08-12 19:31:04 +0200
committerAndreas Baumann <abaumann@yahoo.com>2012-08-12 19:31:04 +0200
commit799ac1861171cd58fc7036b447b931eac8722561 (patch)
tree0ef25e9a40dd5502ed933cebe8d8a49934a054c1 /tests/fetcher
parent5fe4ec20a6aa83dc8728ff83766efc35c7818ab6 (diff)
downloadcrawler-799ac1861171cd58fc7036b447b931eac8722561.tar.gz
crawler-799ac1861171cd58fc7036b447b931eac8722561.tar.bz2
fixed fetcher test on linux
Diffstat (limited to 'tests/fetcher')
-rw-r--r--tests/fetcher/GNUmakefile35
-rwxr-xr-xtests/fetcher/test1.cpp23
2 files changed, 37 insertions, 21 deletions
diff --git a/tests/fetcher/GNUmakefile b/tests/fetcher/GNUmakefile
index 69d882d..f6e0d25 100644
--- a/tests/fetcher/GNUmakefile
+++ b/tests/fetcher/GNUmakefile
@@ -8,16 +8,25 @@ SUBDIRS =
INCLUDE_DIRS = \
-I$(TOPDIR)/src \
-I$(TOPDIR)/src/modules/urlnormalizer/simpleurl \
- -I$(TOPDIR)/src/modules/urlnormalizer/googleurl
+ -I$(TOPDIR)/src/modules/fetcher/libfetch
INCLUDE_LDFLAGS =
INCLUDE_LIBS = \
- $(TOPDIR)/src/libcrawlingwolf.a \
$(TOPDIR)/src/modules/urlnormalizer/simpleurl/libsimpleurlnormalizer.a \
- $(TOPDIR)/src/modules/urlnormalizer/googleurl/libgoogleurlnormalizer.a \
- $(TOPDIR)/googleurl/libgoogleurl.a \
- -licui18n -licuuc
+ $(TOPDIR)/src/modules/fetcher/libfetch/liblibfetchfetcher.a \
+ $(TOPDIR)/src/libcrawlingwolf.a \
+ $(TOPDIR)/libfetch/libfetch.a
+
+# openssl
+ifeq ($(WITH_SSL),1)
+
+INCLUDE_CFLAGS += \
+ -DWITH_SSL
+
+INCLUDE_LIBS += \
+ $(OPENSSL_LIBS)
+endif
TEST_CPP_BINS = \
test1$(EXE)
@@ -29,24 +38,8 @@ OBJS =
local_all:
local_clean:
- -@rm -f *.db *.db-journal 2>/dev/null
-@rm -f *.RES *.DIFF
local_distclean:
local_test:
- @-for METHOD in simple_urlnormalizer google_urlnormalizer; do \
- echo "Using URL normalizer '$$METHOD'.." ; \
- ./exec_test test1 test1 "parse illegal protocol" $$METHOD parse www.andreasbaumann.cc ; \
- ./exec_test test1 test2 "parse normal start URL without slash" $$METHOD parse http://www.andreasbaumann.cc ; \
- ./exec_test test1 test3 "parse normal start URL with slash" $$METHOD parse http://www.andreasbaumann.cc/ ; \
- ./exec_test test1 test4 "parse normal URL" $$METHOD parse http://www.andreasbaumann.cc/index.html ; \
- ./exec_test test1 test5 "parse normal URL with default port" $$METHOD parse http://www.andreasbaumann.cc:80/index.html ; \
- ./exec_test test1 test6 "parse normal URL with non-standard port" $$METHOD parse http://www.andreasbaumann.cc:8080/index.html ; \
- ./exec_test test1 test100 "normalize a relative URL" $$METHOD normalize http://www.andreasbaumann.cc/index.html /software.html ; \
- ./exec_test test1 test101 "absolute URL in HTML content" $$METHOD normalize http://www.andreasbaumann.cc/index.html http://www.yahoo.com/page.html ; \
- ./exec_test test1 test102 "path normalization, relative path" $$METHOD normalize http://www.andreasbaumann.cc/adir/index.html bdir/page.html ; \
- ./exec_test test1 test103 "path normalization, absolute path" $$METHOD normalize http://www.andreasbaumann.cc/adir/index.html /bdir/page.html ; \
- ./exec_test test1 test104 "path normalization, current dir" $$METHOD normalize http://www.andreasbaumann.cc/adir/index.html ./bdir/page.html ; \
- ./exec_test test1 test105 "path normalization, previous dir" $$METHOD normalize http://www.andreasbaumann.cc/adir/index.html ../bdir/page.html ; \
- done
diff --git a/tests/fetcher/test1.cpp b/tests/fetcher/test1.cpp
index 7777ba7..9e5502d 100755
--- a/tests/fetcher/test1.cpp
+++ b/tests/fetcher/test1.cpp
@@ -1,5 +1,6 @@
#include "URL.hpp"
#include "SimpleURLNormalizer.hpp"
+#include "RewindInputStream.hpp"
#ifdef USE_MODULELOADER
#include "Fetcher.hpp"
#include "ModuleLoader.hpp"
@@ -18,6 +19,17 @@
using namespace std;
+static void copy_stream( istream& in, ostream &out )
+{
+ enum { BUFSIZE = 4096 };
+ char buf[BUFSIZE];
+
+ while( in.good( ) && !in.eof( ) ) {
+ in.read( buf, BUFSIZE );
+ out.write( buf, in.gcount( ) );
+ }
+}
+
int main( int argc, char *argv[] )
{
try {
@@ -41,9 +53,13 @@ int main( int argc, char *argv[] )
#else
Fetcher *fetcher;
if( strcmp( method, "libfetch_fetcher" ) == 0 ) {
+#ifndef _WIN32
fetcher = new LibFetchFetcher( );
+#endif
} else if( strcmp( method, "winhttp_fetcher" ) == 0 ) {
+#ifdef _WIN32
fetcher = new WinHttpFetcher( );
+#endif
} else {
cerr << "Unknown fetcher method '" << method << "'" << endl;
return 1;
@@ -52,6 +68,13 @@ int main( int argc, char *argv[] )
SimpleURLNormalizer normalizer;
URL url = normalizer.parseUrl( urlString );
+
+ RewindInputStream *s = fetcher->fetch( url );
+ copy_stream( *s, cout );
+// s->rewind( );
+// copy_stream( *s, cout );
+
+ delete s;
#ifdef USE_MODULELOADER
fetchers.destroy( fetcher );