summaryrefslogtreecommitdiff
path: root/src/crawlingwolf.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/crawlingwolf.cpp')
-rw-r--r--src/crawlingwolf.cpp9
1 files changed, 4 insertions, 5 deletions
diff --git a/src/crawlingwolf.cpp b/src/crawlingwolf.cpp
index 328cc80..e924b16 100644
--- a/src/crawlingwolf.cpp
+++ b/src/crawlingwolf.cpp
@@ -2,9 +2,6 @@
#include "MemoryFrontier.hpp"
#include "MD5Deduper.hpp"
#include "HTMLLinkExtractProcessor.hpp"
-#include "ChainURLFilter.hpp"
-#include "ProtocolURLFilter.hpp"
-#include "HostURLFilter.hpp"
#include "MemoryURLSeen.hpp"
#include "URLNormalizer.hpp"
#include "ModuleLoader.hpp"
@@ -28,6 +25,7 @@ int main( void )
Deduper *deduper = new MD5Deduper( );
URLSeen *urlSeen = new MemoryURLSeen( );
+/*
set<string> protocols;
protocols.insert( "http" );
protocols.insert( "https" );
@@ -38,10 +36,11 @@ int main( void )
HostURLFilter hostFilter( hosts );
ChainURLFilter filters( &protocolFilter, &hostFilter );
-
+*/
URLNormalizer *normalizer = urlNormalizers.create( "google" );
- Processor *processor = new HTMLLinkExtractProcessor( normalizer, frontier, &filters, urlSeen );
+ //Processor *processor = new HTMLLinkExtractProcessor( normalizer, frontier, &filters, urlSeen );
+ Processor *processor = new HTMLLinkExtractProcessor( normalizer, frontier, std::list( ), urlSeen );
LOG( logNOTICE ) << "Crawler started..";