summaryrefslogtreecommitdiff
path: root/src/crawlingwolf.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/crawlingwolf.cpp')
-rw-r--r--src/crawlingwolf.cpp5
1 files changed, 4 insertions, 1 deletions
diff --git a/src/crawlingwolf.cpp b/src/crawlingwolf.cpp
index 2f4e067..ee002f4 100644
--- a/src/crawlingwolf.cpp
+++ b/src/crawlingwolf.cpp
@@ -5,6 +5,7 @@
#include "ChainURLFilter.hpp"
#include "ProtocolURLFilter.hpp"
#include "DomainURLFilter.hpp"
+#include "MemoryURLSeen.hpp"
#include <set>
@@ -15,6 +16,7 @@ int main( void )
Frontier *frontier = new MemoryFrontier( );
Fetcher *fetcher = new LibFetchFetcher( );
Deduper *deduper = new MD5Deduper( );
+ URLSeen *urlSeen = new MemoryURLSeen( );
set<string> protocols;
protocols.insert( "http" );
@@ -27,7 +29,7 @@ int main( void )
ChainURLFilter filters( &protocolFilter, &domainFilter );
- Processor *processor = new HTMLLinkExtractProcessor( frontier, &filters );
+ Processor *processor = new HTMLLinkExtractProcessor( frontier, &filters, urlSeen );
LOG( logNOTICE ) << "Crawler started..";
@@ -50,6 +52,7 @@ int main( void )
}
delete processor;
+ delete urlSeen;
delete deduper;
delete fetcher;
delete frontier;