diff options
author | Andreas Baumann <abaumann@yahoo.com> | 2012-08-12 19:13:52 +0200 |
---|---|---|
committer | Andreas Baumann <abaumann@yahoo.com> | 2012-08-12 19:13:52 +0200 |
commit | 5fe4ec20a6aa83dc8728ff83766efc35c7818ab6 (patch) | |
tree | d81a1be98664433080e69521982d6d3092d1a9f9 | |
parent | 63929b266e3000374c5e5161e4495d64142b907e (diff) | |
download | crawler-5fe4ec20a6aa83dc8728ff83766efc35c7818ab6.tar.gz crawler-5fe4ec20a6aa83dc8728ff83766efc35c7818ab6.tar.bz2 |
added a fetcher module test
-rwxr-xr-x | src/modules/fetcher/winhttp/WinHttpFetcher.cpp | 11 | ||||
-rw-r--r-- | tests/GNUmakefile | 4 | ||||
-rw-r--r-- | tests/Makefile.W32 | 4 | ||||
-rw-r--r-- | tests/fetcher/GNUmakefile | 52 | ||||
-rwxr-xr-x | tests/fetcher/Makefile.W32 | 39 | ||||
-rwxr-xr-x | tests/fetcher/test1.cpp | 67 |
6 files changed, 175 insertions, 2 deletions
diff --git a/src/modules/fetcher/winhttp/WinHttpFetcher.cpp b/src/modules/fetcher/winhttp/WinHttpFetcher.cpp index a22ab1a..1adc7a0 100755 --- a/src/modules/fetcher/winhttp/WinHttpFetcher.cpp +++ b/src/modules/fetcher/winhttp/WinHttpFetcher.cpp @@ -1,6 +1,11 @@ #include "WinHttpFetcher.hpp" #include "WinHttpRewindInputStream.hpp" +#include "win32/errormsg.hpp" + +#include <sstream> +#include <stdexcept> + WinHttpFetcher::WinHttpFetcher( ) : m_session( 0 ) { @@ -8,6 +13,12 @@ WinHttpFetcher::WinHttpFetcher( ) WINHTTP_ACCESS_TYPE_DEFAULT_PROXY, WINHTTP_NO_PROXY_NAME, WINHTTP_NO_PROXY_BYPASS, 0 ); + + if( !m_session ) { + std::ostringstream ss; + ss << "Error creating WinHttp session: " << getLastError( ); + throw new std::runtime_error( ss.str( ) ); + } } WinHttpFetcher::~WinHttpFetcher( ) diff --git a/tests/GNUmakefile b/tests/GNUmakefile index 09bc024..4b3f5dc 100644 --- a/tests/GNUmakefile +++ b/tests/GNUmakefile @@ -1,6 +1,8 @@ TOPDIR = .. -SUBDIRS = utils url streamhtmlparser libfetch curl psql sqlite typedetect +SUBDIRS = \ + utils url streamhtmlparser libfetch curl psql sqlite typedetect \ + fetcher -include $(TOPDIR)/makefiles/gmake/sub.mk diff --git a/tests/Makefile.W32 b/tests/Makefile.W32 index 654bc93..748a309 100644 --- a/tests/Makefile.W32 +++ b/tests/Makefile.W32 @@ -1,6 +1,8 @@ TOPDIR = .. -SUBDIRS = utils winhttp url streamhtmlparser +SUBDIRS = \ + utils winhttp url streamhtmlparser \ + fetcher !INCLUDE $(TOPDIR)\makefiles\nmake\sub.mk diff --git a/tests/fetcher/GNUmakefile b/tests/fetcher/GNUmakefile new file mode 100644 index 0000000..69d882d --- /dev/null +++ b/tests/fetcher/GNUmakefile @@ -0,0 +1,52 @@ +TOPDIR = ../.. + +SUBDIRS = + +#INCLUDE_CXXFLAGS = \ +# -DUSE_MODULELOADER + +INCLUDE_DIRS = \ + -I$(TOPDIR)/src \ + -I$(TOPDIR)/src/modules/urlnormalizer/simpleurl \ + -I$(TOPDIR)/src/modules/urlnormalizer/googleurl + +INCLUDE_LDFLAGS = + +INCLUDE_LIBS = \ + $(TOPDIR)/src/libcrawlingwolf.a \ + $(TOPDIR)/src/modules/urlnormalizer/simpleurl/libsimpleurlnormalizer.a \ + $(TOPDIR)/src/modules/urlnormalizer/googleurl/libgoogleurlnormalizer.a \ + $(TOPDIR)/googleurl/libgoogleurl.a \ + -licui18n -licuuc + +TEST_CPP_BINS = \ + test1$(EXE) + +OBJS = + +-include $(TOPDIR)/makefiles/gmake/sub.mk + +local_all: + +local_clean: + -@rm -f *.db *.db-journal 2>/dev/null + -@rm -f *.RES *.DIFF + +local_distclean: + +local_test: + @-for METHOD in simple_urlnormalizer google_urlnormalizer; do \ + echo "Using URL normalizer '$$METHOD'.." ; \ + ./exec_test test1 test1 "parse illegal protocol" $$METHOD parse www.andreasbaumann.cc ; \ + ./exec_test test1 test2 "parse normal start URL without slash" $$METHOD parse http://www.andreasbaumann.cc ; \ + ./exec_test test1 test3 "parse normal start URL with slash" $$METHOD parse http://www.andreasbaumann.cc/ ; \ + ./exec_test test1 test4 "parse normal URL" $$METHOD parse http://www.andreasbaumann.cc/index.html ; \ + ./exec_test test1 test5 "parse normal URL with default port" $$METHOD parse http://www.andreasbaumann.cc:80/index.html ; \ + ./exec_test test1 test6 "parse normal URL with non-standard port" $$METHOD parse http://www.andreasbaumann.cc:8080/index.html ; \ + ./exec_test test1 test100 "normalize a relative URL" $$METHOD normalize http://www.andreasbaumann.cc/index.html /software.html ; \ + ./exec_test test1 test101 "absolute URL in HTML content" $$METHOD normalize http://www.andreasbaumann.cc/index.html http://www.yahoo.com/page.html ; \ + ./exec_test test1 test102 "path normalization, relative path" $$METHOD normalize http://www.andreasbaumann.cc/adir/index.html bdir/page.html ; \ + ./exec_test test1 test103 "path normalization, absolute path" $$METHOD normalize http://www.andreasbaumann.cc/adir/index.html /bdir/page.html ; \ + ./exec_test test1 test104 "path normalization, current dir" $$METHOD normalize http://www.andreasbaumann.cc/adir/index.html ./bdir/page.html ; \ + ./exec_test test1 test105 "path normalization, previous dir" $$METHOD normalize http://www.andreasbaumann.cc/adir/index.html ../bdir/page.html ; \ + done diff --git a/tests/fetcher/Makefile.W32 b/tests/fetcher/Makefile.W32 new file mode 100755 index 0000000..c472a8a --- /dev/null +++ b/tests/fetcher/Makefile.W32 @@ -0,0 +1,39 @@ +TOPDIR = ..\.. + +SUBDIRS = + +!INCLUDE $(TOPDIR)\makefiles\nmake\platform.mk + +INCLUDE_CXXFLAGS = \ + /D_WIN32_WINNT=0x504 \ + /DUSE_MODULELOADER + +INCLUDE_DIRS = \ + /I. \ + /I$(TOPDIR)\src \ + /I$(TOPDIR)\src\modules\fetcher\winhttp \ + /I$(TOPDIR)\src\modules\urlnormalizer\simpleurl + +INCLUDE_LDFLAGS = \ + +INCLUDE_LIBS = \ + $(TOPDIR)\src\crawlingwolf.lib \ + $(TOPDIR)\src\modules\fetcher\winhttp\winhttpfetcher.lib \ + $(TOPDIR)\src\modules\urlnormalizer\simpleurl\simpleurlnormalizer.lib + +TEST_CPP_BINS = \ + test1.exe + +OBJS = + +!INCLUDE $(TOPDIR)\makefiles\nmake\sub.mk + +test1.exe: test1.obj + +local_all: + +local_clean: + +local_distclean: + +local_test: diff --git a/tests/fetcher/test1.cpp b/tests/fetcher/test1.cpp new file mode 100755 index 0000000..7777ba7 --- /dev/null +++ b/tests/fetcher/test1.cpp @@ -0,0 +1,67 @@ +#include "URL.hpp" +#include "SimpleURLNormalizer.hpp" +#ifdef USE_MODULELOADER +#include "Fetcher.hpp" +#include "ModuleLoader.hpp" +#else +#ifndef _WIN32 +#include "LibFetchFetcher.hpp" +#else +#include "WinHttpFetcher.hpp" +#endif +#endif + +#include <vector> +#include <iostream> +#include <string> +#include <cstring> + +using namespace std; + +int main( int argc, char *argv[] ) +{ + try { + if( argc < 3 ) { + cerr << "usage: test1 <method> <url>\n" << endl; + return 1; + } + + char *method = argv[1]; + char *urlString = argv[2]; + +#ifdef USE_MODULELOADER + vector<string> modules; +#ifndef _WIN32 + modules.push_back( "../../src/modules/fetcher/libfetch/mod_fetcher_libfetch.so" ); +#else + modules.push_back( "..\\..\\src\\modules\\fetcher\\winhttp\\mod_fetcher_winhttp.dll" ); +#endif + ModuleLoader<Fetcher> fetchers( modules ); + Fetcher *fetcher = fetchers.create( method ); +#else + Fetcher *fetcher; + if( strcmp( method, "libfetch_fetcher" ) == 0 ) { + fetcher = new LibFetchFetcher( ); + } else if( strcmp( method, "winhttp_fetcher" ) == 0 ) { + fetcher = new WinHttpFetcher( ); + } else { + cerr << "Unknown fetcher method '" << method << "'" << endl; + return 1; + } +#endif + + SimpleURLNormalizer normalizer; + URL url = normalizer.parseUrl( urlString ); + +#ifdef USE_MODULELOADER + fetchers.destroy( fetcher ); +#else + delete fetcher; +#endif + + return 0; + } catch( exception &e ) { + cerr << e.what( ) << endl; + return 1; + } +} |