diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/crawl/crawl.conf | 12 | ||||
-rwxr-xr-x | src/crawl/crawl.cpp | 72 |
2 files changed, 65 insertions, 19 deletions
diff --git a/src/crawl/crawl.conf b/src/crawl/crawl.conf index 154d90a..29c3620 100644 --- a/src/crawl/crawl.conf +++ b/src/crawl/crawl.conf @@ -81,24 +81,24 @@ filters = { function init( ) - io.write( "Init..\n" ) + log( "NOTICE", "Init.." ) -- normalizer = urlnormalizers.create( "google_urlnormalizer" ); normalizer = GoogleURLNormalizer:new( ) -- normalizer2 = urlnormalizers.create( "simple_urlnormalizer" ); normalizer2 = SimpleURLNormalizer:new( ) base = tolua.cast( normalizer, "URLNormalizer" ) - io.write( "type: " .. tolua.type( base ) .. "\n" ) + log( "DEBUG", "type: " .. tolua.type( base ) ) end function destroy( ) - io.write( "Destroy..\n" ) + log( "NOTICE", "Destroy.." ) normalizer:delete( ) end function crawl( ) - io.write( "Crawling..\n" ) + log( "NOTICE", "Crawling.." ) local baseUrl = base:parseUrl( "http://www.base.com" ) - io.write( "base URL is: " .. baseUrl:str( ) .. "\n" ) + log( "DEBUG", "base URL is: " .. baseUrl:str( ) ) local url = base:normalize( baseUrl, "/relativedir/relativefile.html" ) - io.write( "URL is: " .. url:str( ) .. "\n" ) + log( "DEBUG", "URL is: " .. url:str( ) ) end diff --git a/src/crawl/crawl.cpp b/src/crawl/crawl.cpp index 77b6876..46b4f16 100755 --- a/src/crawl/crawl.cpp +++ b/src/crawl/crawl.cpp @@ -90,6 +90,47 @@ static vector<string> searchModuleFiles( const vector<string> &modules, const ve return moduleFiles; } +static int lua_log( lua_State *l ) +{ + size_t nofParams = lua_gettop( l ); + + if( nofParams == 0 ) return 0; + + const char *logLevel = luaL_checkstring( l, 1 ); + ostringstream ss; + + for( size_t i = 2; i <= nofParams; i++ ) { + int type = lua_type( l, i ); + + switch( type ) { + case LUA_TNIL: + ss << "<nil>"; + break; + + case LUA_TSTRING: + ss << lua_tostring( l, i ); + break; + + case LUA_TNUMBER: + ss << lua_tonumber( l, i ); + break; + + default: + ss << "<unknown type " << lua_typename( l, lua_type( l, i ) ) << ">"; + break; + } + if( i != nofParams ) { + ss << " "; + } + } + + LOG( Logger::fromString( logLevel ) ) << ss.str( ); + + lua_pop( l, nofParams ); + + return 0; +} + int main( int /* argc */, char *argv[] ) { try { @@ -98,13 +139,18 @@ int main( int /* argc */, char *argv[] ) initialize_libcrawler( (void *)&luaVm ); //Logger::instance( ).openConsoleLog( logDEBUG ); - - // load configuration (Lua) and execute main (to - // get basic configuration in form of global - // variables) + + // load configuration (Lua) luaVm.loadSource( argv[1] ); + + // register logging function + lua_pushcclosure( luaVm.handle( ), &lua_log, 0 ); + lua_setglobal( luaVm.handle( ), "log" ); + + // execute main (to get basic configuration in form + // of global variables) luaVm.executeMain( ); - + std::string logLevel = luaVm.getString( "logger.level" ); Logger::instance( ).openConsoleLog( Logger::fromString( logLevel ) ); @@ -138,35 +184,35 @@ int main( int /* argc */, char *argv[] ) modules = luaVm.getStringArray( "modules.urlfilters" ); vector<string> filterModules = searchModuleFiles( modules, allModuleFiles ); - ModuleLoader<URLFilter, TYPELIST_1( const set<string> ) > urlFilters( filterModules ); + ModuleLoader<URLFilter, TYPELIST_1( const set<string> ) > urlFilters( filterModules, CLOSE_DEFERRED, (void *)&luaVm ); modules = luaVm.getStringArray( "modules.urlchainfilters" ); vector<string> filterChainModules = searchModuleFiles( modules, allModuleFiles ); - ModuleLoader<URLFilter, TYPELIST_1( const list<URLFilter *> ) > urlChainFilter( filterChainModules ); + ModuleLoader<URLFilter, TYPELIST_1( const list<URLFilter *> ) > urlChainFilter( filterChainModules, CLOSE_DEFERRED, (void *)&luaVm ); modules = luaVm.getStringArray( "modules.urlfrontiers" ); vector<string> frontierModules = searchModuleFiles( modules, allModuleFiles ); - ModuleLoader<Frontier> frontiers( frontierModules ); + ModuleLoader<Frontier> frontiers( frontierModules, CLOSE_DEFERRED, (void *)&luaVm ); modules = luaVm.getStringArray( "modules.fetchers" ); vector<string> fetcherModules = searchModuleFiles( modules, allModuleFiles ); - ModuleLoader<Fetcher> fetchers( fetcherModules ); + ModuleLoader<Fetcher> fetchers( fetcherModules, CLOSE_DEFERRED, (void *)&luaVm ); modules = luaVm.getStringArray( "modules.urlseens" ); vector<string> urlseenModules = searchModuleFiles( modules, allModuleFiles ); - ModuleLoader<URLSeen> urlSeens( urlseenModules ); + ModuleLoader<URLSeen> urlSeens( urlseenModules, CLOSE_DEFERRED, (void *)&luaVm ); modules = luaVm.getStringArray( "modules.dedupers" ); vector<string> deduperModules = searchModuleFiles( modules, allModuleFiles ); - ModuleLoader<Deduper> dedupers( deduperModules ); + ModuleLoader<Deduper> dedupers( deduperModules, CLOSE_DEFERRED, (void *)&luaVm ); modules = luaVm.getStringArray( "modules.processors" ); vector<string> processorModules = searchModuleFiles( modules, allModuleFiles ); - ModuleLoader<Processor, TYPELIST_4( URLNormalizer *, Frontier *, URLFilter *, URLSeen * ) > processors( processorModules ); + ModuleLoader<Processor, TYPELIST_4( URLNormalizer *, Frontier *, URLFilter *, URLSeen * ) > processors( processorModules, CLOSE_DEFERRED, (void *)&luaVm ); modules = luaVm.getStringArray( "modules.typedetects" ); vector<string> typeDetectModules = searchModuleFiles( modules, allModuleFiles ); - ModuleLoader<TypeDetect> typeDetectors( typeDetectModules ); + ModuleLoader<TypeDetect> typeDetectors( typeDetectModules, CLOSE_DEFERRED, (void *)&luaVm ); // initialize crawler function luaVm.executeFunction( "init" ); |