diff options
author | Andreas Baumann <abaumann@yahoo.com> | 2012-08-12 19:31:04 +0200 |
---|---|---|
committer | Andreas Baumann <abaumann@yahoo.com> | 2012-08-12 19:31:04 +0200 |
commit | 799ac1861171cd58fc7036b447b931eac8722561 (patch) | |
tree | 0ef25e9a40dd5502ed933cebe8d8a49934a054c1 /tests/fetcher | |
parent | 5fe4ec20a6aa83dc8728ff83766efc35c7818ab6 (diff) | |
download | crawler-799ac1861171cd58fc7036b447b931eac8722561.tar.gz crawler-799ac1861171cd58fc7036b447b931eac8722561.tar.bz2 |
fixed fetcher test on linux
Diffstat (limited to 'tests/fetcher')
-rw-r--r-- | tests/fetcher/GNUmakefile | 35 | ||||
-rwxr-xr-x | tests/fetcher/test1.cpp | 23 |
2 files changed, 37 insertions, 21 deletions
diff --git a/tests/fetcher/GNUmakefile b/tests/fetcher/GNUmakefile index 69d882d..f6e0d25 100644 --- a/tests/fetcher/GNUmakefile +++ b/tests/fetcher/GNUmakefile @@ -8,16 +8,25 @@ SUBDIRS = INCLUDE_DIRS = \ -I$(TOPDIR)/src \ -I$(TOPDIR)/src/modules/urlnormalizer/simpleurl \ - -I$(TOPDIR)/src/modules/urlnormalizer/googleurl + -I$(TOPDIR)/src/modules/fetcher/libfetch INCLUDE_LDFLAGS = INCLUDE_LIBS = \ - $(TOPDIR)/src/libcrawlingwolf.a \ $(TOPDIR)/src/modules/urlnormalizer/simpleurl/libsimpleurlnormalizer.a \ - $(TOPDIR)/src/modules/urlnormalizer/googleurl/libgoogleurlnormalizer.a \ - $(TOPDIR)/googleurl/libgoogleurl.a \ - -licui18n -licuuc + $(TOPDIR)/src/modules/fetcher/libfetch/liblibfetchfetcher.a \ + $(TOPDIR)/src/libcrawlingwolf.a \ + $(TOPDIR)/libfetch/libfetch.a + +# openssl +ifeq ($(WITH_SSL),1) + +INCLUDE_CFLAGS += \ + -DWITH_SSL + +INCLUDE_LIBS += \ + $(OPENSSL_LIBS) +endif TEST_CPP_BINS = \ test1$(EXE) @@ -29,24 +38,8 @@ OBJS = local_all: local_clean: - -@rm -f *.db *.db-journal 2>/dev/null -@rm -f *.RES *.DIFF local_distclean: local_test: - @-for METHOD in simple_urlnormalizer google_urlnormalizer; do \ - echo "Using URL normalizer '$$METHOD'.." ; \ - ./exec_test test1 test1 "parse illegal protocol" $$METHOD parse www.andreasbaumann.cc ; \ - ./exec_test test1 test2 "parse normal start URL without slash" $$METHOD parse http://www.andreasbaumann.cc ; \ - ./exec_test test1 test3 "parse normal start URL with slash" $$METHOD parse http://www.andreasbaumann.cc/ ; \ - ./exec_test test1 test4 "parse normal URL" $$METHOD parse http://www.andreasbaumann.cc/index.html ; \ - ./exec_test test1 test5 "parse normal URL with default port" $$METHOD parse http://www.andreasbaumann.cc:80/index.html ; \ - ./exec_test test1 test6 "parse normal URL with non-standard port" $$METHOD parse http://www.andreasbaumann.cc:8080/index.html ; \ - ./exec_test test1 test100 "normalize a relative URL" $$METHOD normalize http://www.andreasbaumann.cc/index.html /software.html ; \ - ./exec_test test1 test101 "absolute URL in HTML content" $$METHOD normalize http://www.andreasbaumann.cc/index.html http://www.yahoo.com/page.html ; \ - ./exec_test test1 test102 "path normalization, relative path" $$METHOD normalize http://www.andreasbaumann.cc/adir/index.html bdir/page.html ; \ - ./exec_test test1 test103 "path normalization, absolute path" $$METHOD normalize http://www.andreasbaumann.cc/adir/index.html /bdir/page.html ; \ - ./exec_test test1 test104 "path normalization, current dir" $$METHOD normalize http://www.andreasbaumann.cc/adir/index.html ./bdir/page.html ; \ - ./exec_test test1 test105 "path normalization, previous dir" $$METHOD normalize http://www.andreasbaumann.cc/adir/index.html ../bdir/page.html ; \ - done diff --git a/tests/fetcher/test1.cpp b/tests/fetcher/test1.cpp index 7777ba7..9e5502d 100755 --- a/tests/fetcher/test1.cpp +++ b/tests/fetcher/test1.cpp @@ -1,5 +1,6 @@ #include "URL.hpp" #include "SimpleURLNormalizer.hpp" +#include "RewindInputStream.hpp" #ifdef USE_MODULELOADER #include "Fetcher.hpp" #include "ModuleLoader.hpp" @@ -18,6 +19,17 @@ using namespace std; +static void copy_stream( istream& in, ostream &out ) +{ + enum { BUFSIZE = 4096 }; + char buf[BUFSIZE]; + + while( in.good( ) && !in.eof( ) ) { + in.read( buf, BUFSIZE ); + out.write( buf, in.gcount( ) ); + } +} + int main( int argc, char *argv[] ) { try { @@ -41,9 +53,13 @@ int main( int argc, char *argv[] ) #else Fetcher *fetcher; if( strcmp( method, "libfetch_fetcher" ) == 0 ) { +#ifndef _WIN32 fetcher = new LibFetchFetcher( ); +#endif } else if( strcmp( method, "winhttp_fetcher" ) == 0 ) { +#ifdef _WIN32 fetcher = new WinHttpFetcher( ); +#endif } else { cerr << "Unknown fetcher method '" << method << "'" << endl; return 1; @@ -52,6 +68,13 @@ int main( int argc, char *argv[] ) SimpleURLNormalizer normalizer; URL url = normalizer.parseUrl( urlString ); + + RewindInputStream *s = fetcher->fetch( url ); + copy_stream( *s, cout ); +// s->rewind( ); +// copy_stream( *s, cout ); + + delete s; #ifdef USE_MODULELOADER fetchers.destroy( fetcher ); |