diff options
Diffstat (limited to 'src/crawlingwolf.cpp')
-rw-r--r-- | src/crawlingwolf.cpp | 9 |
1 files changed, 4 insertions, 5 deletions
diff --git a/src/crawlingwolf.cpp b/src/crawlingwolf.cpp index 328cc80..e924b16 100644 --- a/src/crawlingwolf.cpp +++ b/src/crawlingwolf.cpp @@ -2,9 +2,6 @@ #include "MemoryFrontier.hpp" #include "MD5Deduper.hpp" #include "HTMLLinkExtractProcessor.hpp" -#include "ChainURLFilter.hpp" -#include "ProtocolURLFilter.hpp" -#include "HostURLFilter.hpp" #include "MemoryURLSeen.hpp" #include "URLNormalizer.hpp" #include "ModuleLoader.hpp" @@ -28,6 +25,7 @@ int main( void ) Deduper *deduper = new MD5Deduper( ); URLSeen *urlSeen = new MemoryURLSeen( ); +/* set<string> protocols; protocols.insert( "http" ); protocols.insert( "https" ); @@ -38,10 +36,11 @@ int main( void ) HostURLFilter hostFilter( hosts ); ChainURLFilter filters( &protocolFilter, &hostFilter ); - +*/ URLNormalizer *normalizer = urlNormalizers.create( "google" ); - Processor *processor = new HTMLLinkExtractProcessor( normalizer, frontier, &filters, urlSeen ); + //Processor *processor = new HTMLLinkExtractProcessor( normalizer, frontier, &filters, urlSeen ); + Processor *processor = new HTMLLinkExtractProcessor( normalizer, frontier, std::list( ), urlSeen ); LOG( logNOTICE ) << "Crawler started.."; |