summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndreas Baumann <abaumann@yahoo.com>2012-08-12 19:13:52 +0200
committerAndreas Baumann <abaumann@yahoo.com>2012-08-12 19:13:52 +0200
commit5fe4ec20a6aa83dc8728ff83766efc35c7818ab6 (patch)
treed81a1be98664433080e69521982d6d3092d1a9f9
parent63929b266e3000374c5e5161e4495d64142b907e (diff)
downloadcrawler-5fe4ec20a6aa83dc8728ff83766efc35c7818ab6.tar.gz
crawler-5fe4ec20a6aa83dc8728ff83766efc35c7818ab6.tar.bz2
added a fetcher module test
-rwxr-xr-xsrc/modules/fetcher/winhttp/WinHttpFetcher.cpp11
-rw-r--r--tests/GNUmakefile4
-rw-r--r--tests/Makefile.W324
-rw-r--r--tests/fetcher/GNUmakefile52
-rwxr-xr-xtests/fetcher/Makefile.W3239
-rwxr-xr-xtests/fetcher/test1.cpp67
6 files changed, 175 insertions, 2 deletions
diff --git a/src/modules/fetcher/winhttp/WinHttpFetcher.cpp b/src/modules/fetcher/winhttp/WinHttpFetcher.cpp
index a22ab1a..1adc7a0 100755
--- a/src/modules/fetcher/winhttp/WinHttpFetcher.cpp
+++ b/src/modules/fetcher/winhttp/WinHttpFetcher.cpp
@@ -1,6 +1,11 @@
#include "WinHttpFetcher.hpp"
#include "WinHttpRewindInputStream.hpp"
+#include "win32/errormsg.hpp"
+
+#include <sstream>
+#include <stdexcept>
+
WinHttpFetcher::WinHttpFetcher( )
: m_session( 0 )
{
@@ -8,6 +13,12 @@ WinHttpFetcher::WinHttpFetcher( )
WINHTTP_ACCESS_TYPE_DEFAULT_PROXY,
WINHTTP_NO_PROXY_NAME,
WINHTTP_NO_PROXY_BYPASS, 0 );
+
+ if( !m_session ) {
+ std::ostringstream ss;
+ ss << "Error creating WinHttp session: " << getLastError( );
+ throw new std::runtime_error( ss.str( ) );
+ }
}
WinHttpFetcher::~WinHttpFetcher( )
diff --git a/tests/GNUmakefile b/tests/GNUmakefile
index 09bc024..4b3f5dc 100644
--- a/tests/GNUmakefile
+++ b/tests/GNUmakefile
@@ -1,6 +1,8 @@
TOPDIR = ..
-SUBDIRS = utils url streamhtmlparser libfetch curl psql sqlite typedetect
+SUBDIRS = \
+ utils url streamhtmlparser libfetch curl psql sqlite typedetect \
+ fetcher
-include $(TOPDIR)/makefiles/gmake/sub.mk
diff --git a/tests/Makefile.W32 b/tests/Makefile.W32
index 654bc93..748a309 100644
--- a/tests/Makefile.W32
+++ b/tests/Makefile.W32
@@ -1,6 +1,8 @@
TOPDIR = ..
-SUBDIRS = utils winhttp url streamhtmlparser
+SUBDIRS = \
+ utils winhttp url streamhtmlparser \
+ fetcher
!INCLUDE $(TOPDIR)\makefiles\nmake\sub.mk
diff --git a/tests/fetcher/GNUmakefile b/tests/fetcher/GNUmakefile
new file mode 100644
index 0000000..69d882d
--- /dev/null
+++ b/tests/fetcher/GNUmakefile
@@ -0,0 +1,52 @@
+TOPDIR = ../..
+
+SUBDIRS =
+
+#INCLUDE_CXXFLAGS = \
+# -DUSE_MODULELOADER
+
+INCLUDE_DIRS = \
+ -I$(TOPDIR)/src \
+ -I$(TOPDIR)/src/modules/urlnormalizer/simpleurl \
+ -I$(TOPDIR)/src/modules/urlnormalizer/googleurl
+
+INCLUDE_LDFLAGS =
+
+INCLUDE_LIBS = \
+ $(TOPDIR)/src/libcrawlingwolf.a \
+ $(TOPDIR)/src/modules/urlnormalizer/simpleurl/libsimpleurlnormalizer.a \
+ $(TOPDIR)/src/modules/urlnormalizer/googleurl/libgoogleurlnormalizer.a \
+ $(TOPDIR)/googleurl/libgoogleurl.a \
+ -licui18n -licuuc
+
+TEST_CPP_BINS = \
+ test1$(EXE)
+
+OBJS =
+
+-include $(TOPDIR)/makefiles/gmake/sub.mk
+
+local_all:
+
+local_clean:
+ -@rm -f *.db *.db-journal 2>/dev/null
+ -@rm -f *.RES *.DIFF
+
+local_distclean:
+
+local_test:
+ @-for METHOD in simple_urlnormalizer google_urlnormalizer; do \
+ echo "Using URL normalizer '$$METHOD'.." ; \
+ ./exec_test test1 test1 "parse illegal protocol" $$METHOD parse www.andreasbaumann.cc ; \
+ ./exec_test test1 test2 "parse normal start URL without slash" $$METHOD parse http://www.andreasbaumann.cc ; \
+ ./exec_test test1 test3 "parse normal start URL with slash" $$METHOD parse http://www.andreasbaumann.cc/ ; \
+ ./exec_test test1 test4 "parse normal URL" $$METHOD parse http://www.andreasbaumann.cc/index.html ; \
+ ./exec_test test1 test5 "parse normal URL with default port" $$METHOD parse http://www.andreasbaumann.cc:80/index.html ; \
+ ./exec_test test1 test6 "parse normal URL with non-standard port" $$METHOD parse http://www.andreasbaumann.cc:8080/index.html ; \
+ ./exec_test test1 test100 "normalize a relative URL" $$METHOD normalize http://www.andreasbaumann.cc/index.html /software.html ; \
+ ./exec_test test1 test101 "absolute URL in HTML content" $$METHOD normalize http://www.andreasbaumann.cc/index.html http://www.yahoo.com/page.html ; \
+ ./exec_test test1 test102 "path normalization, relative path" $$METHOD normalize http://www.andreasbaumann.cc/adir/index.html bdir/page.html ; \
+ ./exec_test test1 test103 "path normalization, absolute path" $$METHOD normalize http://www.andreasbaumann.cc/adir/index.html /bdir/page.html ; \
+ ./exec_test test1 test104 "path normalization, current dir" $$METHOD normalize http://www.andreasbaumann.cc/adir/index.html ./bdir/page.html ; \
+ ./exec_test test1 test105 "path normalization, previous dir" $$METHOD normalize http://www.andreasbaumann.cc/adir/index.html ../bdir/page.html ; \
+ done
diff --git a/tests/fetcher/Makefile.W32 b/tests/fetcher/Makefile.W32
new file mode 100755
index 0000000..c472a8a
--- /dev/null
+++ b/tests/fetcher/Makefile.W32
@@ -0,0 +1,39 @@
+TOPDIR = ..\..
+
+SUBDIRS =
+
+!INCLUDE $(TOPDIR)\makefiles\nmake\platform.mk
+
+INCLUDE_CXXFLAGS = \
+ /D_WIN32_WINNT=0x504 \
+ /DUSE_MODULELOADER
+
+INCLUDE_DIRS = \
+ /I. \
+ /I$(TOPDIR)\src \
+ /I$(TOPDIR)\src\modules\fetcher\winhttp \
+ /I$(TOPDIR)\src\modules\urlnormalizer\simpleurl
+
+INCLUDE_LDFLAGS = \
+
+INCLUDE_LIBS = \
+ $(TOPDIR)\src\crawlingwolf.lib \
+ $(TOPDIR)\src\modules\fetcher\winhttp\winhttpfetcher.lib \
+ $(TOPDIR)\src\modules\urlnormalizer\simpleurl\simpleurlnormalizer.lib
+
+TEST_CPP_BINS = \
+ test1.exe
+
+OBJS =
+
+!INCLUDE $(TOPDIR)\makefiles\nmake\sub.mk
+
+test1.exe: test1.obj
+
+local_all:
+
+local_clean:
+
+local_distclean:
+
+local_test:
diff --git a/tests/fetcher/test1.cpp b/tests/fetcher/test1.cpp
new file mode 100755
index 0000000..7777ba7
--- /dev/null
+++ b/tests/fetcher/test1.cpp
@@ -0,0 +1,67 @@
+#include "URL.hpp"
+#include "SimpleURLNormalizer.hpp"
+#ifdef USE_MODULELOADER
+#include "Fetcher.hpp"
+#include "ModuleLoader.hpp"
+#else
+#ifndef _WIN32
+#include "LibFetchFetcher.hpp"
+#else
+#include "WinHttpFetcher.hpp"
+#endif
+#endif
+
+#include <vector>
+#include <iostream>
+#include <string>
+#include <cstring>
+
+using namespace std;
+
+int main( int argc, char *argv[] )
+{
+ try {
+ if( argc < 3 ) {
+ cerr << "usage: test1 <method> <url>\n" << endl;
+ return 1;
+ }
+
+ char *method = argv[1];
+ char *urlString = argv[2];
+
+#ifdef USE_MODULELOADER
+ vector<string> modules;
+#ifndef _WIN32
+ modules.push_back( "../../src/modules/fetcher/libfetch/mod_fetcher_libfetch.so" );
+#else
+ modules.push_back( "..\\..\\src\\modules\\fetcher\\winhttp\\mod_fetcher_winhttp.dll" );
+#endif
+ ModuleLoader<Fetcher> fetchers( modules );
+ Fetcher *fetcher = fetchers.create( method );
+#else
+ Fetcher *fetcher;
+ if( strcmp( method, "libfetch_fetcher" ) == 0 ) {
+ fetcher = new LibFetchFetcher( );
+ } else if( strcmp( method, "winhttp_fetcher" ) == 0 ) {
+ fetcher = new WinHttpFetcher( );
+ } else {
+ cerr << "Unknown fetcher method '" << method << "'" << endl;
+ return 1;
+ }
+#endif
+
+ SimpleURLNormalizer normalizer;
+ URL url = normalizer.parseUrl( urlString );
+
+#ifdef USE_MODULELOADER
+ fetchers.destroy( fetcher );
+#else
+ delete fetcher;
+#endif
+
+ return 0;
+ } catch( exception &e ) {
+ cerr << e.what( ) << endl;
+ return 1;
+ }
+}