diff options
author | Andreas Baumann <abaumann@yahoo.com> | 2012-08-11 14:20:00 +0200 |
---|---|---|
committer | Andreas Baumann <abaumann@yahoo.com> | 2012-08-11 14:20:00 +0200 |
commit | 1648a5d7feee25ca5bd0497bfb950db3d6544c16 (patch) | |
tree | cb3670f3ff3463c7699e5d2ef8866844f067d0a4 /tests | |
parent | 9cc10524b118f61b28763e81fa7310ae53023552 (diff) | |
download | crawler-1648a5d7feee25ca5bd0497bfb950db3d6544c16.tar.gz crawler-1648a5d7feee25ca5bd0497bfb950db3d6544c16.tar.bz2 |
added fetchers for type detection tests
Diffstat (limited to 'tests')
-rw-r--r-- | tests/typedetect/GNUmakefile | 26 | ||||
-rw-r--r-- | tests/typedetect/test1.cpp | 42 | ||||
-rw-r--r-- | tests/typedetect/test100.MUST | 1 |
3 files changed, 47 insertions, 22 deletions
diff --git a/tests/typedetect/GNUmakefile b/tests/typedetect/GNUmakefile index 785038c..1e3db6e 100644 --- a/tests/typedetect/GNUmakefile +++ b/tests/typedetect/GNUmakefile @@ -9,17 +9,19 @@ INCLUDE_DIRS = \ -I$(TOPDIR)/src \ -I$(TOPDIR)/src/modules/typedetect/libmagic \ -I$(TOPDIR)/src/modules/fetcher/file \ - -I$(TOPDIR)/src/modules/fetcher/libfetch + -I$(TOPDIR)/src/modules/fetcher/libfetch \ + -I$(TOPDIR)/src/modules/urlnormalizer/simpleurl INCLUDE_LDFLAGS = INCLUDE_LIBS = \ - $(TOPDIR)/src/libcrawlingwolf.a \ $(TOPDIR)/src/modules/typedetect/libmagic/liblibmagictypedetect.a \ -lmagic \ $(TOPDIR)/src/modules/fetcher/file/libfilefetcher.a \ $(TOPDIR)/src/modules/fetcher/libfetch/liblibfetchfetcher.a \ - $(TOPDIR)/libfetch/libfetch.a + $(TOPDIR)/libfetch/libfetch.a \ + $(TOPDIR)/src/modules/urlnormalizer/simpleurl/libsimpleurlnormalizer.a \ + $(TOPDIR)/src/libcrawlingwolf.a # openssl ifeq ($(WITH_SSL),1) @@ -46,12 +48,18 @@ local_clean: local_distclean: local_test: + @-echo "Using fetcher 'file'.." @-for METHOD in libmagic; do \ echo "Using MIME type detector '$$METHOD'.." ; \ - ./exec_test test1 test1 "detect a simple C++ file" $$METHOD `pwd`/test1.cpp ; \ - ./exec_test test1 test2 "detect a M$ word file" $$METHOD `pwd`/test2.doc ; \ - ./exec_test test1 test3 "detect a Excel file" $$METHOD `pwd`/test3.xls ; \ - ./exec_test test1 test4 "detect a HTML file" $$METHOD `pwd`/test4.html ; \ - ./exec_test test1 test5 "detect a CSS file" $$METHOD `pwd`/test5.css ; \ - ./exec_test test1 test6 "detect a CSS file" $$METHOD `pwd`/test6.js ; \ + ./exec_test test1 test1 "detect a simple C++ file" $$METHOD file file://localhost/`pwd`/test1.cpp ; \ + ./exec_test test1 test2 "detect a M$ word file" $$METHOD file file://localhost/`pwd`/test2.doc ; \ + ./exec_test test1 test3 "detect a Excel file" $$METHOD file file://localhost/`pwd`/test3.xls ; \ + ./exec_test test1 test4 "detect a HTML file" $$METHOD file file://localhost/`pwd`/test4.html ; \ + ./exec_test test1 test5 "detect a CSS file" $$METHOD file file://localhost/`pwd`/test5.css ; \ + ./exec_test test1 test6 "detect a Javascript file" $$METHOD file file://localhost/`pwd`/test6.js ; \ + done + @-echo "Using fetcher 'libfetch'" + @-for METHOD in libmagic; do \ + echo "Using MIME type detector '$$METHOD'.." ; \ + ./exec_test test1 test100 "detect a HTML file" $$METHOD libfetch http://www.andreasbaumann.cc ; \ done diff --git a/tests/typedetect/test1.cpp b/tests/typedetect/test1.cpp index 3269e54..96bed30 100644 --- a/tests/typedetect/test1.cpp +++ b/tests/typedetect/test1.cpp @@ -1,12 +1,14 @@ #ifdef USE_MODULELOADER #include "TypeDetect.hpp" #include "ModuleLoader.hpp" +#include "Fetcher.hpp" #else #include "LibMagicTypeDetect.hpp" -#endif - #include "FileFetcher.hpp" #include "LibFetchFetcher.hpp" +#endif + +#include "SimpleURLNormalizer.hpp" #include <vector> #include <iostream> @@ -17,13 +19,14 @@ using namespace std; int main( int argc, char *argv[] ) { - if( argc < 3 ) { - cerr << "usage: test1 <method> <file>\n" << endl; + if( argc < 4 ) { + cerr << "usage: test1 <method> <fetcher> <URL>\n" << endl; return 1; } char *method = argv[1]; - char *file = argv[2]; + char *fetcherMethod = argv[2]; + char *urlStr = argv[3]; #ifdef USE_MODULELOADER vector<string> modules; @@ -41,15 +44,27 @@ int main( int argc, char *argv[] ) } #endif - URL url( "file", "localhost", 0, file, "", "" ); - FileFetcher fetcher; - - /* - URL url( "http", "www.andreasbaumann.cc", 80, "/index.shtml", "", "" ); - LibFetchFetcher fetcher; */ - - RewindInputStream *s = fetcher.fetch( url ); +#ifdef USE_MODULELOADER +#error TODO +#else + Fetcher *fetcher; + if( strcmp( fetcherMethod, "file" ) == 0 ) { + fetcher = new FileFetcher( ); + } else if( strcmp( fetcherMethod, "libfetch" ) == 0 ) { + fetcher = new LibFetchFetcher( ); + } else { + cerr << "Unknown fetcher method '" << fetcherMethod << "'" << endl; + return 1; + } +#endif + + SimpleURLNormalizer normalizer; + URL url = normalizer.parseUrl( urlStr ); + + RewindInputStream *s = fetcher->fetch( url ); +// (void)typeDetect->detect( s ); +// s->rewind( ); MIMEType type = typeDetect->detect( s ); delete s; @@ -58,6 +73,7 @@ int main( int argc, char *argv[] ) typeDetects.destroy( typeDetect ); #else delete typeDetect; + delete fetcher; #endif if( type == MIMEType::Null ) { diff --git a/tests/typedetect/test100.MUST b/tests/typedetect/test100.MUST new file mode 100644 index 0000000..e47c367 --- /dev/null +++ b/tests/typedetect/test100.MUST @@ -0,0 +1 @@ +MIME type: text/html |