diff options
Diffstat (limited to 'src/crawl/crawl.cpp')
-rwxr-xr-x | src/crawl/crawl.cpp | 23 |
1 files changed, 17 insertions, 6 deletions
diff --git a/src/crawl/crawl.cpp b/src/crawl/crawl.cpp index 947976f..f10075f 100755 --- a/src/crawl/crawl.cpp +++ b/src/crawl/crawl.cpp @@ -55,14 +55,18 @@ BOOL WINAPI termHandler( DWORD ctrlType ) #endif -int main( void ) +static int counter = 0; + +int main( int /* argc */, char *argv[] ) { try { LuaVM luaVm; - + // Logger::instance( ).openConsoleLog( logINFO ); Logger::instance( ).openConsoleLog( logDEBUG ); + luaVm.loadSource( argv[1] ); + #ifndef _WIN32 struct sigaction sa; memset( &sa, 0, sizeof( struct sigaction ) ); @@ -173,10 +177,10 @@ int main( void ) URLFilter *protocolFilter = urlFilters.create( "protocol_urlfilter", protocols ); set<string> hosts; - hosts.insert( "www.andreasbaumann.cc" ); + hosts.insert( "andreasbaumann.dyndns.org" ); +// hosts.insert( "www.andreasbaumann.cc" ); // hosts.insert( "relevancy.bger.ch" ); // hosts.insert( "wolframe.net" ); -// hosts.insert( "andreasbaumann.dyndns.org" ); URLFilter *hostFilter = urlFilters.create( "host_urlfilter", hosts ); @@ -199,10 +203,10 @@ int main( void ) LOG( logNOTICE ) << "Crawler started.."; - frontier->addUrl( normalizer->parseUrl( "http://www.andreasbaumann.cc" ) ); +// frontier->addUrl( normalizer->parseUrl( "http://www.andreasbaumann.cc" ) ); // frontier->addUrl( normalizer->parseUrl( "http://relevancy.bger.ch/robots.txt" ) ); // frontier->addUrl( normalizer->parseUrl( "http://wolframe.net" ) ); -// frontier->addUrl( normalizer->parseUrl( "http://andreasbaumann.dyndns.org/nzz/" ) ); + frontier->addUrl( normalizer->parseUrl( "http://andreasbaumann.dyndns.org/test/" ) ); URL url; while( !term && ( url = frontier->getNextUrl( ) ) != URL::Null ) { @@ -246,6 +250,10 @@ int main( void ) } //~ sleep( 2 ); + counter++; + if( counter > 10 ) { + term = true; + } #else htmlParser->process( s ); #endif @@ -274,5 +282,8 @@ int main( void ) } catch( exception &e ) { LOG( logFATAL ) << "Crawler stopped: " << e.what( ); return 1; + } catch( ... ) { + LOG( logFATAL ) << "Crawler stopped due to unknown exception!"; + return 1; } } |