summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndreas Baumann <abaumann@yahoo.com>2012-08-12 17:43:31 +0200
committerAndreas Baumann <abaumann@yahoo.com>2012-08-12 17:43:31 +0200
commit63929b266e3000374c5e5161e4495d64142b907e (patch)
tree322651caeb4bbd50d02363fe488ea6bf127e09d9
parent33aedc4334997239a14d1cf287031a9a9c3a3a13 (diff)
downloadcrawler-63929b266e3000374c5e5161e4495d64142b907e.tar.gz
crawler-63929b266e3000374c5e5161e4495d64142b907e.tar.bz2
improved error handling in module loader
crawlingwolf.exe starts on Windows, fetcher still missing
-rwxr-xr-xsrc/Makefile.W323
-rwxr-xr-xsrc/ModuleLoader.hpp6
-rwxr-xr-xsrc/crawlingwolf.cpp28
-rw-r--r--src/modules/urlfilter/chain/ChainURLFilter.cpp2
-rwxr-xr-xsrc/win32/errormsg.cpp27
-rwxr-xr-xsrc/win32/errormsg.hpp8
-rwxr-xr-xutils/win32/Makefile.W322
7 files changed, 68 insertions, 8 deletions
diff --git a/src/Makefile.W32 b/src/Makefile.W32
index b1fd26f..c44711d 100755
--- a/src/Makefile.W32
+++ b/src/Makefile.W32
@@ -15,6 +15,7 @@ INCLUDE_LDFLAGS = \
INCLUDE_LIBS = \
LOCAL_STATIC_LIB_OBJS = \
+ win32\errormsg.obj \
URL.obj \
MIMEType.obj
@@ -40,7 +41,7 @@ local_all: $(LOCAL_STATIC_LIB)
local_clean:
@-erase $(LOCAL_STATIC_LIB) 2>NUL
- @-erase $(CPP_OBJS) 2>NUL
+ @-erase $(CPP_OBJS) win32\*.obj 2>NUL
@-erase test.bat 2>NUL
local_distclean:
diff --git a/src/ModuleLoader.hpp b/src/ModuleLoader.hpp
index 07328e8..a7ecd34 100755
--- a/src/ModuleLoader.hpp
+++ b/src/ModuleLoader.hpp
@@ -13,6 +13,7 @@
#else
#define WIN32_MEAN_AND_LEAN
#include <windows.h>
+#include "win32/errormsg.hpp"
#endif
#include "ModuleRegistry.hpp"
@@ -57,8 +58,9 @@ class BaseModuleLoader {
#ifndef _WIN32
throw std::runtime_error( dlerror( ) );
#else
- // TODO: error message here
- throw std::runtime_error( "Module load error" );
+ std::ostringstream ss;
+ ss << "Module '" << *it << "' loading error: " << getLastError( );
+ throw std::runtime_error( ss.str( ) );
#endif
}
diff --git a/src/crawlingwolf.cpp b/src/crawlingwolf.cpp
index f96ecbb..08f3eec 100755
--- a/src/crawlingwolf.cpp
+++ b/src/crawlingwolf.cpp
@@ -86,8 +86,8 @@ int main( void )
filterModules.push_back( "./modules/urlfilter/protocol/mod_urlfilter_protocol.so" );
filterModules.push_back( "./modules/urlfilter/host/mod_urlfilter_host.so" );
#else
- normalizerModules.push_back( ".\\modules\\urlfilter\\protocol\\mod_urlfilter_protocol.dll" );
- normalizerModules.push_back( ".\\modules\\urlfilter\\host\\mod_urlfilter_host.dll" );
+ filterModules.push_back( ".\\modules\\urlfilter\\protocol\\mod_urlfilter_protocol.dll" );
+ filterModules.push_back( ".\\modules\\urlfilter\\host\\mod_urlfilter_host.dll" );
#endif
ModuleLoader<URLFilter, TYPELIST_1( const set<string> ) > urlFilters( filterModules );
@@ -95,37 +95,47 @@ int main( void )
#ifndef _WIN32
filterChainModules.push_back( "./modules/urlfilter/chain/mod_urlfilter_chain.so" );
#else
- normalizerModules.push_back( ".\\modules\\urlfilter\\chain\\mod_urlfilter_chain.dll" );
+ filterChainModules.push_back( ".\\modules\\urlfilter\\chain\\mod_urlfilter_chain.dll" );
#endif
ModuleLoader<URLFilter, TYPELIST_1( const list<URLFilter *> ) > urlChainFilter( filterChainModules );
vector<string> frontierModules;
#ifndef _WIN32
frontierModules.push_back( "./modules/frontier/memory/mod_frontier_memory.so" );
+#else
+ frontierModules.push_back( ".\\modules\\frontier\\memory\\mod_frontier_memory.dll" );
#endif
ModuleLoader<Frontier> frontiers( frontierModules );
vector<string> fetcherModules;
#ifndef _WIN32
fetcherModules.push_back( "./modules/fetcher/libfetch/mod_fetcher_libfetch.so" );
+#else
+ fetcherModules.push_back( ".\\modules\\fetcher\\winhttp\\mod_fetcher_winhttp.dll" );
#endif
ModuleLoader<Fetcher> fetchers( fetcherModules );
vector<string> urlseenModules;
#ifndef _WIN32
urlseenModules.push_back( "./modules/urlseen/memory/mod_urlseen_memory.so" );
+#else
+ urlseenModules.push_back( ".\\modules\\urlseen\\memory\\mod_urlseen_memory.dll" );
#endif
ModuleLoader<URLSeen> urlSeens( urlseenModules );
vector<string> deduperModules;
#ifndef _WIN32
deduperModules.push_back( "./modules/deduper/null/mod_deduper_null.so" );
+#else
+ deduperModules.push_back( ".\\modules\\deduper\\null\\mod_deduper_null.dll" );
#endif
ModuleLoader<Deduper> dedupers( deduperModules );
vector<string> processorModules;
#ifndef _WIN32
processorModules.push_back( "./modules/processor/htmllinkextract/mod_processor_htmllinkextract.so" );
+#else
+ processorModules.push_back( ".\\modules\\processor\\htmllinkextract\\mod_processor_htmllinkextract.dll" );
#endif
ModuleLoader<Processor, TYPELIST_4( URLNormalizer *, Frontier *, URLFilter *, URLSeen * ) > processors( processorModules );
@@ -136,10 +146,16 @@ int main( void )
ModuleLoader<TypeDetect> typeDetectors( typeDetectModules );
Frontier *frontier = frontiers.create( "memory_frontier" );
+#ifndef _WIN32
Fetcher *fetcher = fetchers.create( "libfetch_fetcher" );
+#else
+ Fetcher *fetcher = fetchers.create( "winhttp_fetcher" );
+#endif
Deduper *deduper = dedupers.create( "null_deduper" );
URLSeen *urlSeen = urlSeens.create( "memory_urlseen" );
+#ifndef _WIN32
TypeDetect *typeDetect = typeDetectors.create( "libmagic_typedetect" );
+#endif
set<string> protocols;
protocols.insert( "http" );
@@ -180,6 +196,7 @@ int main( void )
continue;
}
+#ifndef _WIN32
MIMEType mimeType = typeDetect->detect( s );
if( mimeType != MIMEType::Null ) {
@@ -191,6 +208,9 @@ int main( void )
LOG( logINFO ) << "Storing archive " << url;
}
}
+#else
+ htmlParser->process( s );
+#endif
delete s;
}
@@ -200,7 +220,9 @@ int main( void )
urlChainFilter.destroy( chainFilter );
urlFilters.destroy( protocolFilter );
urlFilters.destroy( hostFilter );
+#ifndef _WIN32
typeDetectors.destroy( typeDetect );
+#endif
urlSeens.destroy( urlSeen );
dedupers.destroy( deduper );
fetchers.destroy( fetcher );
diff --git a/src/modules/urlfilter/chain/ChainURLFilter.cpp b/src/modules/urlfilter/chain/ChainURLFilter.cpp
index 4dcf493..b4a8cf3 100644
--- a/src/modules/urlfilter/chain/ChainURLFilter.cpp
+++ b/src/modules/urlfilter/chain/ChainURLFilter.cpp
@@ -16,4 +16,4 @@ bool ChainURLFilter::filter( const URL url )
return true;
}
-REGISTER_MODULE_1( "chain_urlfiler", URLFilter, ChainURLFilter, const std::list<URLFilter *> )
+REGISTER_MODULE_1( "chain_urlfilter", URLFilter, ChainURLFilter, const std::list<URLFilter *> )
diff --git a/src/win32/errormsg.cpp b/src/win32/errormsg.cpp
new file mode 100755
index 0000000..1b58ea3
--- /dev/null
+++ b/src/win32/errormsg.cpp
@@ -0,0 +1,27 @@
+#include "errormsg.hpp"
+
+using namespace std;
+
+#define WIN32_MEAN_AND_LEAN
+#include <windows.h>
+
+string getLastError( )
+{
+ LPTSTR buf;
+ DWORD size;
+
+ DWORD lastErr = GetLastError( );
+
+ if( !FormatMessage(
+ FORMAT_MESSAGE_ALLOCATE_BUFFER |
+ FORMAT_MESSAGE_FROM_SYSTEM |
+ FORMAT_MESSAGE_IGNORE_INSERTS |
+ FORMAT_MESSAGE_MAX_WIDTH_MASK,
+ NULL, lastErr, 0, (LPTSTR)&buf,
+ 0, NULL ) ) {
+ return "<no message available>";
+ }
+
+ return string( buf );
+}
+
diff --git a/src/win32/errormsg.hpp b/src/win32/errormsg.hpp
new file mode 100755
index 0000000..443e27d
--- /dev/null
+++ b/src/win32/errormsg.hpp
@@ -0,0 +1,8 @@
+#ifndef __ERRORMSG_H
+#define __ERRORMSG_H
+
+#include <string>
+
+std::string getLastError( );
+
+#endif
diff --git a/utils/win32/Makefile.W32 b/utils/win32/Makefile.W32
index 2edb2d1..c8acd52 100755
--- a/utils/win32/Makefile.W32
+++ b/utils/win32/Makefile.W32
@@ -18,7 +18,7 @@ INCLUDE_LIBS = \
CPP_BINS = \
dos2unix.exe
-OBJS =
+OBJS = \
!INCLUDE $(TOPDIR)\makefiles\nmake\sub.mk