diff options
author | Andreas Baumann <abaumann@yahoo.com> | 2012-08-12 17:43:31 +0200 |
---|---|---|
committer | Andreas Baumann <abaumann@yahoo.com> | 2012-08-12 17:43:31 +0200 |
commit | 63929b266e3000374c5e5161e4495d64142b907e (patch) | |
tree | 322651caeb4bbd50d02363fe488ea6bf127e09d9 | |
parent | 33aedc4334997239a14d1cf287031a9a9c3a3a13 (diff) | |
download | crawler-63929b266e3000374c5e5161e4495d64142b907e.tar.gz crawler-63929b266e3000374c5e5161e4495d64142b907e.tar.bz2 |
improved error handling in module loader
crawlingwolf.exe starts on Windows, fetcher still missing
-rwxr-xr-x | src/Makefile.W32 | 3 | ||||
-rwxr-xr-x | src/ModuleLoader.hpp | 6 | ||||
-rwxr-xr-x | src/crawlingwolf.cpp | 28 | ||||
-rw-r--r-- | src/modules/urlfilter/chain/ChainURLFilter.cpp | 2 | ||||
-rwxr-xr-x | src/win32/errormsg.cpp | 27 | ||||
-rwxr-xr-x | src/win32/errormsg.hpp | 8 | ||||
-rwxr-xr-x | utils/win32/Makefile.W32 | 2 |
7 files changed, 68 insertions, 8 deletions
diff --git a/src/Makefile.W32 b/src/Makefile.W32 index b1fd26f..c44711d 100755 --- a/src/Makefile.W32 +++ b/src/Makefile.W32 @@ -15,6 +15,7 @@ INCLUDE_LDFLAGS = \ INCLUDE_LIBS = \ LOCAL_STATIC_LIB_OBJS = \ + win32\errormsg.obj \ URL.obj \ MIMEType.obj @@ -40,7 +41,7 @@ local_all: $(LOCAL_STATIC_LIB) local_clean: @-erase $(LOCAL_STATIC_LIB) 2>NUL - @-erase $(CPP_OBJS) 2>NUL + @-erase $(CPP_OBJS) win32\*.obj 2>NUL @-erase test.bat 2>NUL local_distclean: diff --git a/src/ModuleLoader.hpp b/src/ModuleLoader.hpp index 07328e8..a7ecd34 100755 --- a/src/ModuleLoader.hpp +++ b/src/ModuleLoader.hpp @@ -13,6 +13,7 @@ #else #define WIN32_MEAN_AND_LEAN #include <windows.h> +#include "win32/errormsg.hpp" #endif #include "ModuleRegistry.hpp" @@ -57,8 +58,9 @@ class BaseModuleLoader { #ifndef _WIN32 throw std::runtime_error( dlerror( ) ); #else - // TODO: error message here - throw std::runtime_error( "Module load error" ); + std::ostringstream ss; + ss << "Module '" << *it << "' loading error: " << getLastError( ); + throw std::runtime_error( ss.str( ) ); #endif } diff --git a/src/crawlingwolf.cpp b/src/crawlingwolf.cpp index f96ecbb..08f3eec 100755 --- a/src/crawlingwolf.cpp +++ b/src/crawlingwolf.cpp @@ -86,8 +86,8 @@ int main( void ) filterModules.push_back( "./modules/urlfilter/protocol/mod_urlfilter_protocol.so" ); filterModules.push_back( "./modules/urlfilter/host/mod_urlfilter_host.so" ); #else - normalizerModules.push_back( ".\\modules\\urlfilter\\protocol\\mod_urlfilter_protocol.dll" ); - normalizerModules.push_back( ".\\modules\\urlfilter\\host\\mod_urlfilter_host.dll" ); + filterModules.push_back( ".\\modules\\urlfilter\\protocol\\mod_urlfilter_protocol.dll" ); + filterModules.push_back( ".\\modules\\urlfilter\\host\\mod_urlfilter_host.dll" ); #endif ModuleLoader<URLFilter, TYPELIST_1( const set<string> ) > urlFilters( filterModules ); @@ -95,37 +95,47 @@ int main( void ) #ifndef _WIN32 filterChainModules.push_back( "./modules/urlfilter/chain/mod_urlfilter_chain.so" ); #else - normalizerModules.push_back( ".\\modules\\urlfilter\\chain\\mod_urlfilter_chain.dll" ); + filterChainModules.push_back( ".\\modules\\urlfilter\\chain\\mod_urlfilter_chain.dll" ); #endif ModuleLoader<URLFilter, TYPELIST_1( const list<URLFilter *> ) > urlChainFilter( filterChainModules ); vector<string> frontierModules; #ifndef _WIN32 frontierModules.push_back( "./modules/frontier/memory/mod_frontier_memory.so" ); +#else + frontierModules.push_back( ".\\modules\\frontier\\memory\\mod_frontier_memory.dll" ); #endif ModuleLoader<Frontier> frontiers( frontierModules ); vector<string> fetcherModules; #ifndef _WIN32 fetcherModules.push_back( "./modules/fetcher/libfetch/mod_fetcher_libfetch.so" ); +#else + fetcherModules.push_back( ".\\modules\\fetcher\\winhttp\\mod_fetcher_winhttp.dll" ); #endif ModuleLoader<Fetcher> fetchers( fetcherModules ); vector<string> urlseenModules; #ifndef _WIN32 urlseenModules.push_back( "./modules/urlseen/memory/mod_urlseen_memory.so" ); +#else + urlseenModules.push_back( ".\\modules\\urlseen\\memory\\mod_urlseen_memory.dll" ); #endif ModuleLoader<URLSeen> urlSeens( urlseenModules ); vector<string> deduperModules; #ifndef _WIN32 deduperModules.push_back( "./modules/deduper/null/mod_deduper_null.so" ); +#else + deduperModules.push_back( ".\\modules\\deduper\\null\\mod_deduper_null.dll" ); #endif ModuleLoader<Deduper> dedupers( deduperModules ); vector<string> processorModules; #ifndef _WIN32 processorModules.push_back( "./modules/processor/htmllinkextract/mod_processor_htmllinkextract.so" ); +#else + processorModules.push_back( ".\\modules\\processor\\htmllinkextract\\mod_processor_htmllinkextract.dll" ); #endif ModuleLoader<Processor, TYPELIST_4( URLNormalizer *, Frontier *, URLFilter *, URLSeen * ) > processors( processorModules ); @@ -136,10 +146,16 @@ int main( void ) ModuleLoader<TypeDetect> typeDetectors( typeDetectModules ); Frontier *frontier = frontiers.create( "memory_frontier" ); +#ifndef _WIN32 Fetcher *fetcher = fetchers.create( "libfetch_fetcher" ); +#else + Fetcher *fetcher = fetchers.create( "winhttp_fetcher" ); +#endif Deduper *deduper = dedupers.create( "null_deduper" ); URLSeen *urlSeen = urlSeens.create( "memory_urlseen" ); +#ifndef _WIN32 TypeDetect *typeDetect = typeDetectors.create( "libmagic_typedetect" ); +#endif set<string> protocols; protocols.insert( "http" ); @@ -180,6 +196,7 @@ int main( void ) continue; } +#ifndef _WIN32 MIMEType mimeType = typeDetect->detect( s ); if( mimeType != MIMEType::Null ) { @@ -191,6 +208,9 @@ int main( void ) LOG( logINFO ) << "Storing archive " << url; } } +#else + htmlParser->process( s ); +#endif delete s; } @@ -200,7 +220,9 @@ int main( void ) urlChainFilter.destroy( chainFilter ); urlFilters.destroy( protocolFilter ); urlFilters.destroy( hostFilter ); +#ifndef _WIN32 typeDetectors.destroy( typeDetect ); +#endif urlSeens.destroy( urlSeen ); dedupers.destroy( deduper ); fetchers.destroy( fetcher ); diff --git a/src/modules/urlfilter/chain/ChainURLFilter.cpp b/src/modules/urlfilter/chain/ChainURLFilter.cpp index 4dcf493..b4a8cf3 100644 --- a/src/modules/urlfilter/chain/ChainURLFilter.cpp +++ b/src/modules/urlfilter/chain/ChainURLFilter.cpp @@ -16,4 +16,4 @@ bool ChainURLFilter::filter( const URL url ) return true; } -REGISTER_MODULE_1( "chain_urlfiler", URLFilter, ChainURLFilter, const std::list<URLFilter *> ) +REGISTER_MODULE_1( "chain_urlfilter", URLFilter, ChainURLFilter, const std::list<URLFilter *> ) diff --git a/src/win32/errormsg.cpp b/src/win32/errormsg.cpp new file mode 100755 index 0000000..1b58ea3 --- /dev/null +++ b/src/win32/errormsg.cpp @@ -0,0 +1,27 @@ +#include "errormsg.hpp" + +using namespace std; + +#define WIN32_MEAN_AND_LEAN +#include <windows.h> + +string getLastError( ) +{ + LPTSTR buf; + DWORD size; + + DWORD lastErr = GetLastError( ); + + if( !FormatMessage( + FORMAT_MESSAGE_ALLOCATE_BUFFER | + FORMAT_MESSAGE_FROM_SYSTEM | + FORMAT_MESSAGE_IGNORE_INSERTS | + FORMAT_MESSAGE_MAX_WIDTH_MASK, + NULL, lastErr, 0, (LPTSTR)&buf, + 0, NULL ) ) { + return "<no message available>"; + } + + return string( buf ); +} + diff --git a/src/win32/errormsg.hpp b/src/win32/errormsg.hpp new file mode 100755 index 0000000..443e27d --- /dev/null +++ b/src/win32/errormsg.hpp @@ -0,0 +1,8 @@ +#ifndef __ERRORMSG_H +#define __ERRORMSG_H + +#include <string> + +std::string getLastError( ); + +#endif diff --git a/utils/win32/Makefile.W32 b/utils/win32/Makefile.W32 index 2edb2d1..c8acd52 100755 --- a/utils/win32/Makefile.W32 +++ b/utils/win32/Makefile.W32 @@ -18,7 +18,7 @@ INCLUDE_LIBS = \ CPP_BINS = \ dos2unix.exe -OBJS = +OBJS = \ !INCLUDE $(TOPDIR)\makefiles\nmake\sub.mk |