summaryrefslogtreecommitdiff
path: root/src/crawl/crawl.cpp
diff options
context:
space:
mode:
authorAndreas Baumann <mail@andreasbaumann.cc>2014-09-28 21:29:03 +0200
committerAndreas Baumann <mail@andreasbaumann.cc>2014-09-28 21:29:03 +0200
commitb80687f17644766eb890598297c0f37bb898d76d (patch)
tree44e6a15cc058087a19dd44d44c2d1d52194a5876 /src/crawl/crawl.cpp
parentc82a15eb0ffe61c1d2d2630981777f72013e833a (diff)
downloadcrawler-b80687f17644766eb890598297c0f37bb898d76d.tar.gz
crawler-b80687f17644766eb890598297c0f37bb898d76d.tar.bz2
first Lua config of crawler
Diffstat (limited to 'src/crawl/crawl.cpp')
-rwxr-xr-xsrc/crawl/crawl.cpp23
1 files changed, 17 insertions, 6 deletions
diff --git a/src/crawl/crawl.cpp b/src/crawl/crawl.cpp
index 947976f..f10075f 100755
--- a/src/crawl/crawl.cpp
+++ b/src/crawl/crawl.cpp
@@ -55,14 +55,18 @@ BOOL WINAPI termHandler( DWORD ctrlType )
#endif
-int main( void )
+static int counter = 0;
+
+int main( int /* argc */, char *argv[] )
{
try {
LuaVM luaVm;
-
+
// Logger::instance( ).openConsoleLog( logINFO );
Logger::instance( ).openConsoleLog( logDEBUG );
+ luaVm.loadSource( argv[1] );
+
#ifndef _WIN32
struct sigaction sa;
memset( &sa, 0, sizeof( struct sigaction ) );
@@ -173,10 +177,10 @@ int main( void )
URLFilter *protocolFilter = urlFilters.create( "protocol_urlfilter", protocols );
set<string> hosts;
- hosts.insert( "www.andreasbaumann.cc" );
+ hosts.insert( "andreasbaumann.dyndns.org" );
+// hosts.insert( "www.andreasbaumann.cc" );
// hosts.insert( "relevancy.bger.ch" );
// hosts.insert( "wolframe.net" );
-// hosts.insert( "andreasbaumann.dyndns.org" );
URLFilter *hostFilter = urlFilters.create( "host_urlfilter", hosts );
@@ -199,10 +203,10 @@ int main( void )
LOG( logNOTICE ) << "Crawler started..";
- frontier->addUrl( normalizer->parseUrl( "http://www.andreasbaumann.cc" ) );
+// frontier->addUrl( normalizer->parseUrl( "http://www.andreasbaumann.cc" ) );
// frontier->addUrl( normalizer->parseUrl( "http://relevancy.bger.ch/robots.txt" ) );
// frontier->addUrl( normalizer->parseUrl( "http://wolframe.net" ) );
-// frontier->addUrl( normalizer->parseUrl( "http://andreasbaumann.dyndns.org/nzz/" ) );
+ frontier->addUrl( normalizer->parseUrl( "http://andreasbaumann.dyndns.org/test/" ) );
URL url;
while( !term && ( url = frontier->getNextUrl( ) ) != URL::Null ) {
@@ -246,6 +250,10 @@ int main( void )
}
//~ sleep( 2 );
+ counter++;
+ if( counter > 10 ) {
+ term = true;
+ }
#else
htmlParser->process( s );
#endif
@@ -274,5 +282,8 @@ int main( void )
} catch( exception &e ) {
LOG( logFATAL ) << "Crawler stopped: " << e.what( );
return 1;
+ } catch( ... ) {
+ LOG( logFATAL ) << "Crawler stopped due to unknown exception!";
+ return 1;
}
}