diff options
Diffstat (limited to 'src/crawlingwolf.cpp')
-rwxr-xr-x | src/crawlingwolf.cpp | 28 |
1 files changed, 25 insertions, 3 deletions
diff --git a/src/crawlingwolf.cpp b/src/crawlingwolf.cpp index f96ecbb..08f3eec 100755 --- a/src/crawlingwolf.cpp +++ b/src/crawlingwolf.cpp @@ -86,8 +86,8 @@ int main( void ) filterModules.push_back( "./modules/urlfilter/protocol/mod_urlfilter_protocol.so" ); filterModules.push_back( "./modules/urlfilter/host/mod_urlfilter_host.so" ); #else - normalizerModules.push_back( ".\\modules\\urlfilter\\protocol\\mod_urlfilter_protocol.dll" ); - normalizerModules.push_back( ".\\modules\\urlfilter\\host\\mod_urlfilter_host.dll" ); + filterModules.push_back( ".\\modules\\urlfilter\\protocol\\mod_urlfilter_protocol.dll" ); + filterModules.push_back( ".\\modules\\urlfilter\\host\\mod_urlfilter_host.dll" ); #endif ModuleLoader<URLFilter, TYPELIST_1( const set<string> ) > urlFilters( filterModules ); @@ -95,37 +95,47 @@ int main( void ) #ifndef _WIN32 filterChainModules.push_back( "./modules/urlfilter/chain/mod_urlfilter_chain.so" ); #else - normalizerModules.push_back( ".\\modules\\urlfilter\\chain\\mod_urlfilter_chain.dll" ); + filterChainModules.push_back( ".\\modules\\urlfilter\\chain\\mod_urlfilter_chain.dll" ); #endif ModuleLoader<URLFilter, TYPELIST_1( const list<URLFilter *> ) > urlChainFilter( filterChainModules ); vector<string> frontierModules; #ifndef _WIN32 frontierModules.push_back( "./modules/frontier/memory/mod_frontier_memory.so" ); +#else + frontierModules.push_back( ".\\modules\\frontier\\memory\\mod_frontier_memory.dll" ); #endif ModuleLoader<Frontier> frontiers( frontierModules ); vector<string> fetcherModules; #ifndef _WIN32 fetcherModules.push_back( "./modules/fetcher/libfetch/mod_fetcher_libfetch.so" ); +#else + fetcherModules.push_back( ".\\modules\\fetcher\\winhttp\\mod_fetcher_winhttp.dll" ); #endif ModuleLoader<Fetcher> fetchers( fetcherModules ); vector<string> urlseenModules; #ifndef _WIN32 urlseenModules.push_back( "./modules/urlseen/memory/mod_urlseen_memory.so" ); +#else + urlseenModules.push_back( ".\\modules\\urlseen\\memory\\mod_urlseen_memory.dll" ); #endif ModuleLoader<URLSeen> urlSeens( urlseenModules ); vector<string> deduperModules; #ifndef _WIN32 deduperModules.push_back( "./modules/deduper/null/mod_deduper_null.so" ); +#else + deduperModules.push_back( ".\\modules\\deduper\\null\\mod_deduper_null.dll" ); #endif ModuleLoader<Deduper> dedupers( deduperModules ); vector<string> processorModules; #ifndef _WIN32 processorModules.push_back( "./modules/processor/htmllinkextract/mod_processor_htmllinkextract.so" ); +#else + processorModules.push_back( ".\\modules\\processor\\htmllinkextract\\mod_processor_htmllinkextract.dll" ); #endif ModuleLoader<Processor, TYPELIST_4( URLNormalizer *, Frontier *, URLFilter *, URLSeen * ) > processors( processorModules ); @@ -136,10 +146,16 @@ int main( void ) ModuleLoader<TypeDetect> typeDetectors( typeDetectModules ); Frontier *frontier = frontiers.create( "memory_frontier" ); +#ifndef _WIN32 Fetcher *fetcher = fetchers.create( "libfetch_fetcher" ); +#else + Fetcher *fetcher = fetchers.create( "winhttp_fetcher" ); +#endif Deduper *deduper = dedupers.create( "null_deduper" ); URLSeen *urlSeen = urlSeens.create( "memory_urlseen" ); +#ifndef _WIN32 TypeDetect *typeDetect = typeDetectors.create( "libmagic_typedetect" ); +#endif set<string> protocols; protocols.insert( "http" ); @@ -180,6 +196,7 @@ int main( void ) continue; } +#ifndef _WIN32 MIMEType mimeType = typeDetect->detect( s ); if( mimeType != MIMEType::Null ) { @@ -191,6 +208,9 @@ int main( void ) LOG( logINFO ) << "Storing archive " << url; } } +#else + htmlParser->process( s ); +#endif delete s; } @@ -200,7 +220,9 @@ int main( void ) urlChainFilter.destroy( chainFilter ); urlFilters.destroy( protocolFilter ); urlFilters.destroy( hostFilter ); +#ifndef _WIN32 typeDetectors.destroy( typeDetect ); +#endif urlSeens.destroy( urlSeen ); dedupers.destroy( deduper ); fetchers.destroy( fetcher ); |