From c5351f58bcf494a56ecfd17fe5e68eb3b17dac7d Mon Sep 17 00:00:00 2001 From: Andreas Baumann Date: Fri, 10 Oct 2014 11:26:05 +0200 Subject: added execution function to LuaVm reorganized main lua.conf and calling several functions --- include/luaglue/LuaVM.hpp | 3 ++- src/crawl/crawl.conf | 33 ++++++++++++++++++++++++--------- src/crawl/crawl.cpp | 31 +++++++++++++++++++++---------- src/libluaglue/LuaVM.cpp | 21 ++++++++++++++++++--- 4 files changed, 65 insertions(+), 23 deletions(-) diff --git a/include/luaglue/LuaVM.hpp b/include/luaglue/LuaVM.hpp index bd2fd21..8fe781b 100755 --- a/include/luaglue/LuaVM.hpp +++ b/include/luaglue/LuaVM.hpp @@ -14,7 +14,8 @@ class LuaVM LUAGLUE_DLL_VISIBLE ~LuaVM( ); LUAGLUE_DLL_VISIBLE void loadSource( const char *sourceFilename ); - LUAGLUE_DLL_VISIBLE void executeMain( ); + void executeMain( ); + void executeFunction( const std::string &f ); LUAGLUE_DLL_VISIBLE void dumpState( ); LUAGLUE_DLL_VISIBLE void fullGarbageCollect( ); diff --git a/src/crawl/crawl.conf b/src/crawl/crawl.conf index 7c3fb80..816f23f 100644 --- a/src/crawl/crawl.conf +++ b/src/crawl/crawl.conf @@ -1,17 +1,12 @@ -local normalizer = GoogleURLNormalizer:new( ) -local baseUrl = normalizer:parseUrl( "http://www.base.com" ) -io.write( "base URL is: " .. baseUrl:str( ) .. "\n" ) -local url = normalizer:normalize( baseUrl, "/relativedir/relativefile.html" ) -io.write( "URL is: " .. url:str( ) .. "\n" ) -normalizer:delete( ) - -- global setting crawler = { -- stop after N documents - stop_after_N_operations = 10 + stop_after_N_operations = 10, + module_path = modules, + modules_search_recursive = true } logger = { @@ -25,6 +20,11 @@ seeds = { "http://wolframe.net" } +urlnormalizers = { + "mod_normalizer_simple", + "mod_normalizer_google" +} + filters = { -- allowed protocols to be fetched protocols = { @@ -39,6 +39,21 @@ filters = { } } + +function init( ) + io.write( "Init..\n" ) + normalizer = GoogleURLNormalizer:new( ) +end + +function destroy( ) + io.write( "Destroy..\n" ) + normalizer:delete( ) +end + function crawl( ) - io.write( "Crawling." ) + io.write( "Crawling..\n" ) + local baseUrl = normalizer:parseUrl( "http://www.base.com" ) + io.write( "base URL is: " .. baseUrl:str( ) .. "\n" ) + local url = normalizer:normalize( baseUrl, "/relativedir/relativefile.html" ) + io.write( "URL is: " .. url:str( ) .. "\n" ) end diff --git a/src/crawl/crawl.cpp b/src/crawl/crawl.cpp index 3a8fdff..e26de0a 100755 --- a/src/crawl/crawl.cpp +++ b/src/crawl/crawl.cpp @@ -71,8 +71,11 @@ int main( int /* argc */, char *argv[] ) // Logger::instance( ).openConsoleLog( logINFO ); Logger::instance( ).openConsoleLog( logDEBUG ); + // load configuration (Lua) and execute main (to + // get basic configuration in form of global + // variables) luaVm.loadSource( argv[1] ); - //luaVm.executeMain( ); + luaVm.executeMain( ); #ifndef _WIN32 struct sigaction sa; @@ -86,6 +89,7 @@ int main( int /* argc */, char *argv[] ) SetConsoleCtrlHandler( termHandler, TRUE ); #endif + // go through all type of modules and load them with the proper loader LOG( logNOTICE ) << "Loading modules"; vector normalizerModules; @@ -98,6 +102,22 @@ int main( int /* argc */, char *argv[] ) #endif ModuleLoader urlNormalizers( normalizerModules, CLOSE_DEFERRED, (void *)&luaVm ); +#ifdef WITH_LUA + // TODO: should be in the laoding function of libcrawl + tolua_URL_open( luaVm.handle( ) ); +#endif + + // initialize crawler function + luaVm.executeFunction( "init" ); + + // perform a crawl step + luaVm.executeFunction( "crawl" ); + + // cleaning up + luaVm.executeFunction( "destroy" ); + + return 0; + vector filterModules; #ifndef _WIN32 filterModules.push_back( "./modules/urlfilter/protocol/mod_urlfilter_protocol.so" ); @@ -287,15 +307,6 @@ int main( int /* argc */, char *argv[] ) LOG( logNOTICE ) << "Crawler stopped.. normal shutdown.."; -#ifdef WITH_LUA - // TODO: should be in the laoding function of libcrawl - tolua_URL_open( luaVm.handle( ) ); -#endif - - luaVm.executeMain( ); - //luaVm.dumpState( ); - - return 0; } catch( exception &e ) { LOG( logFATAL ) << "Crawler stopped: " << e.what( ); return 1; diff --git a/src/libluaglue/LuaVM.cpp b/src/libluaglue/LuaVM.cpp index d587897..7b165f1 100644 --- a/src/libluaglue/LuaVM.cpp +++ b/src/libluaglue/LuaVM.cpp @@ -3,6 +3,7 @@ #include #include #include +#include using namespace std; @@ -50,9 +51,7 @@ void LuaVM::loadSource( const char *sourceFilename ) void LuaVM::executeMain( ) { - int res; - - res = lua_pcall( m_lua, 0, LUA_MULTRET, 0 ); + int res = lua_pcall( m_lua, 0, 0, 0 ); if( res != 0 ) { ostringstream ss; ss << "Can't execute main body of Lua source file '" << m_sourceFilename << "': " << lua_tostring( m_lua, -1 ); @@ -61,6 +60,22 @@ void LuaVM::executeMain( ) } } +void LuaVM::executeFunction( const string &f ) +{ + //int top = lua_gettop( m_lua ); + lua_getglobal( m_lua, f.c_str( ) ); + int res = lua_pcall( m_lua, 0, LUA_MULTRET, 0 ); + if( res != 0 ) { + ostringstream ss; + ss << "Unable to call Lua function '" << f << "': " << lua_tostring( m_lua, -1 ); + lua_pop( m_lua, 1 ); + throw new std::runtime_error( ss.str( ) ); + } + //int nresults = lua_gettop( m_lua ) - top; + + // TODO: return results +} + void LuaVM::dumpState( ) { lua_rawgeti( m_lua, LUA_REGISTRYINDEX, LUA_RIDX_GLOBALS ); -- cgit v1.2.3-54-g00ecf