summaryrefslogtreecommitdiff
path: root/tests
diff options
context:
space:
mode:
authorAndreas Baumann <abaumann@yahoo.com>2012-08-11 14:20:00 +0200
committerAndreas Baumann <abaumann@yahoo.com>2012-08-11 14:20:00 +0200
commit1648a5d7feee25ca5bd0497bfb950db3d6544c16 (patch)
treecb3670f3ff3463c7699e5d2ef8866844f067d0a4 /tests
parent9cc10524b118f61b28763e81fa7310ae53023552 (diff)
downloadcrawler-1648a5d7feee25ca5bd0497bfb950db3d6544c16.tar.gz
crawler-1648a5d7feee25ca5bd0497bfb950db3d6544c16.tar.bz2
added fetchers for type detection tests
Diffstat (limited to 'tests')
-rw-r--r--tests/typedetect/GNUmakefile26
-rw-r--r--tests/typedetect/test1.cpp42
-rw-r--r--tests/typedetect/test100.MUST1
3 files changed, 47 insertions, 22 deletions
diff --git a/tests/typedetect/GNUmakefile b/tests/typedetect/GNUmakefile
index 785038c..1e3db6e 100644
--- a/tests/typedetect/GNUmakefile
+++ b/tests/typedetect/GNUmakefile
@@ -9,17 +9,19 @@ INCLUDE_DIRS = \
-I$(TOPDIR)/src \
-I$(TOPDIR)/src/modules/typedetect/libmagic \
-I$(TOPDIR)/src/modules/fetcher/file \
- -I$(TOPDIR)/src/modules/fetcher/libfetch
+ -I$(TOPDIR)/src/modules/fetcher/libfetch \
+ -I$(TOPDIR)/src/modules/urlnormalizer/simpleurl
INCLUDE_LDFLAGS =
INCLUDE_LIBS = \
- $(TOPDIR)/src/libcrawlingwolf.a \
$(TOPDIR)/src/modules/typedetect/libmagic/liblibmagictypedetect.a \
-lmagic \
$(TOPDIR)/src/modules/fetcher/file/libfilefetcher.a \
$(TOPDIR)/src/modules/fetcher/libfetch/liblibfetchfetcher.a \
- $(TOPDIR)/libfetch/libfetch.a
+ $(TOPDIR)/libfetch/libfetch.a \
+ $(TOPDIR)/src/modules/urlnormalizer/simpleurl/libsimpleurlnormalizer.a \
+ $(TOPDIR)/src/libcrawlingwolf.a
# openssl
ifeq ($(WITH_SSL),1)
@@ -46,12 +48,18 @@ local_clean:
local_distclean:
local_test:
+ @-echo "Using fetcher 'file'.."
@-for METHOD in libmagic; do \
echo "Using MIME type detector '$$METHOD'.." ; \
- ./exec_test test1 test1 "detect a simple C++ file" $$METHOD `pwd`/test1.cpp ; \
- ./exec_test test1 test2 "detect a M$ word file" $$METHOD `pwd`/test2.doc ; \
- ./exec_test test1 test3 "detect a Excel file" $$METHOD `pwd`/test3.xls ; \
- ./exec_test test1 test4 "detect a HTML file" $$METHOD `pwd`/test4.html ; \
- ./exec_test test1 test5 "detect a CSS file" $$METHOD `pwd`/test5.css ; \
- ./exec_test test1 test6 "detect a CSS file" $$METHOD `pwd`/test6.js ; \
+ ./exec_test test1 test1 "detect a simple C++ file" $$METHOD file file://localhost/`pwd`/test1.cpp ; \
+ ./exec_test test1 test2 "detect a M$ word file" $$METHOD file file://localhost/`pwd`/test2.doc ; \
+ ./exec_test test1 test3 "detect a Excel file" $$METHOD file file://localhost/`pwd`/test3.xls ; \
+ ./exec_test test1 test4 "detect a HTML file" $$METHOD file file://localhost/`pwd`/test4.html ; \
+ ./exec_test test1 test5 "detect a CSS file" $$METHOD file file://localhost/`pwd`/test5.css ; \
+ ./exec_test test1 test6 "detect a Javascript file" $$METHOD file file://localhost/`pwd`/test6.js ; \
+ done
+ @-echo "Using fetcher 'libfetch'"
+ @-for METHOD in libmagic; do \
+ echo "Using MIME type detector '$$METHOD'.." ; \
+ ./exec_test test1 test100 "detect a HTML file" $$METHOD libfetch http://www.andreasbaumann.cc ; \
done
diff --git a/tests/typedetect/test1.cpp b/tests/typedetect/test1.cpp
index 3269e54..96bed30 100644
--- a/tests/typedetect/test1.cpp
+++ b/tests/typedetect/test1.cpp
@@ -1,12 +1,14 @@
#ifdef USE_MODULELOADER
#include "TypeDetect.hpp"
#include "ModuleLoader.hpp"
+#include "Fetcher.hpp"
#else
#include "LibMagicTypeDetect.hpp"
-#endif
-
#include "FileFetcher.hpp"
#include "LibFetchFetcher.hpp"
+#endif
+
+#include "SimpleURLNormalizer.hpp"
#include <vector>
#include <iostream>
@@ -17,13 +19,14 @@ using namespace std;
int main( int argc, char *argv[] )
{
- if( argc < 3 ) {
- cerr << "usage: test1 <method> <file>\n" << endl;
+ if( argc < 4 ) {
+ cerr << "usage: test1 <method> <fetcher> <URL>\n" << endl;
return 1;
}
char *method = argv[1];
- char *file = argv[2];
+ char *fetcherMethod = argv[2];
+ char *urlStr = argv[3];
#ifdef USE_MODULELOADER
vector<string> modules;
@@ -41,15 +44,27 @@ int main( int argc, char *argv[] )
}
#endif
- URL url( "file", "localhost", 0, file, "", "" );
- FileFetcher fetcher;
-
- /*
- URL url( "http", "www.andreasbaumann.cc", 80, "/index.shtml", "", "" );
- LibFetchFetcher fetcher; */
-
- RewindInputStream *s = fetcher.fetch( url );
+#ifdef USE_MODULELOADER
+#error TODO
+#else
+ Fetcher *fetcher;
+ if( strcmp( fetcherMethod, "file" ) == 0 ) {
+ fetcher = new FileFetcher( );
+ } else if( strcmp( fetcherMethod, "libfetch" ) == 0 ) {
+ fetcher = new LibFetchFetcher( );
+ } else {
+ cerr << "Unknown fetcher method '" << fetcherMethod << "'" << endl;
+ return 1;
+ }
+#endif
+
+ SimpleURLNormalizer normalizer;
+ URL url = normalizer.parseUrl( urlStr );
+
+ RewindInputStream *s = fetcher->fetch( url );
+// (void)typeDetect->detect( s );
+// s->rewind( );
MIMEType type = typeDetect->detect( s );
delete s;
@@ -58,6 +73,7 @@ int main( int argc, char *argv[] )
typeDetects.destroy( typeDetect );
#else
delete typeDetect;
+ delete fetcher;
#endif
if( type == MIMEType::Null ) {
diff --git a/tests/typedetect/test100.MUST b/tests/typedetect/test100.MUST
new file mode 100644
index 0000000..e47c367
--- /dev/null
+++ b/tests/typedetect/test100.MUST
@@ -0,0 +1 @@
+MIME type: text/html