diff options
Diffstat (limited to 'src/crawlingwolf.cpp')
-rw-r--r-- | src/crawlingwolf.cpp | 5 |
1 files changed, 4 insertions, 1 deletions
diff --git a/src/crawlingwolf.cpp b/src/crawlingwolf.cpp index 2f4e067..ee002f4 100644 --- a/src/crawlingwolf.cpp +++ b/src/crawlingwolf.cpp @@ -5,6 +5,7 @@ #include "ChainURLFilter.hpp" #include "ProtocolURLFilter.hpp" #include "DomainURLFilter.hpp" +#include "MemoryURLSeen.hpp" #include <set> @@ -15,6 +16,7 @@ int main( void ) Frontier *frontier = new MemoryFrontier( ); Fetcher *fetcher = new LibFetchFetcher( ); Deduper *deduper = new MD5Deduper( ); + URLSeen *urlSeen = new MemoryURLSeen( ); set<string> protocols; protocols.insert( "http" ); @@ -27,7 +29,7 @@ int main( void ) ChainURLFilter filters( &protocolFilter, &domainFilter ); - Processor *processor = new HTMLLinkExtractProcessor( frontier, &filters ); + Processor *processor = new HTMLLinkExtractProcessor( frontier, &filters, urlSeen ); LOG( logNOTICE ) << "Crawler started.."; @@ -50,6 +52,7 @@ int main( void ) } delete processor; + delete urlSeen; delete deduper; delete fetcher; delete frontier; |