summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorAndreas Baumann <mail@andreasbaumann.cc>2014-10-10 11:26:05 +0200
committerAndreas Baumann <mail@andreasbaumann.cc>2014-10-10 11:26:05 +0200
commitc5351f58bcf494a56ecfd17fe5e68eb3b17dac7d (patch)
treebca845c55a2a7f407a856950a6293f0759d2e391 /src
parent5382e843f651f834c1df31bd494ed0638be07960 (diff)
downloadcrawler-c5351f58bcf494a56ecfd17fe5e68eb3b17dac7d.tar.gz
crawler-c5351f58bcf494a56ecfd17fe5e68eb3b17dac7d.tar.bz2
added execution function to LuaVm
reorganized main lua.conf and calling several functions
Diffstat (limited to 'src')
-rw-r--r--src/crawl/crawl.conf33
-rwxr-xr-xsrc/crawl/crawl.cpp31
-rw-r--r--src/libluaglue/LuaVM.cpp21
3 files changed, 63 insertions, 22 deletions
diff --git a/src/crawl/crawl.conf b/src/crawl/crawl.conf
index 7c3fb80..816f23f 100644
--- a/src/crawl/crawl.conf
+++ b/src/crawl/crawl.conf
@@ -1,17 +1,12 @@
-local normalizer = GoogleURLNormalizer:new( )
-local baseUrl = normalizer:parseUrl( "http://www.base.com" )
-io.write( "base URL is: " .. baseUrl:str( ) .. "\n" )
-local url = normalizer:normalize( baseUrl, "/relativedir/relativefile.html" )
-io.write( "URL is: " .. url:str( ) .. "\n" )
-normalizer:delete( )
-
-- global setting
crawler = {
-- stop after N documents
- stop_after_N_operations = 10
+ stop_after_N_operations = 10,
+ module_path = modules,
+ modules_search_recursive = true
}
logger = {
@@ -25,6 +20,11 @@ seeds = {
"http://wolframe.net"
}
+urlnormalizers = {
+ "mod_normalizer_simple",
+ "mod_normalizer_google"
+}
+
filters = {
-- allowed protocols to be fetched
protocols = {
@@ -39,6 +39,21 @@ filters = {
}
}
+
+function init( )
+ io.write( "Init..\n" )
+ normalizer = GoogleURLNormalizer:new( )
+end
+
+function destroy( )
+ io.write( "Destroy..\n" )
+ normalizer:delete( )
+end
+
function crawl( )
- io.write( "Crawling." )
+ io.write( "Crawling..\n" )
+ local baseUrl = normalizer:parseUrl( "http://www.base.com" )
+ io.write( "base URL is: " .. baseUrl:str( ) .. "\n" )
+ local url = normalizer:normalize( baseUrl, "/relativedir/relativefile.html" )
+ io.write( "URL is: " .. url:str( ) .. "\n" )
end
diff --git a/src/crawl/crawl.cpp b/src/crawl/crawl.cpp
index 3a8fdff..e26de0a 100755
--- a/src/crawl/crawl.cpp
+++ b/src/crawl/crawl.cpp
@@ -71,8 +71,11 @@ int main( int /* argc */, char *argv[] )
// Logger::instance( ).openConsoleLog( logINFO );
Logger::instance( ).openConsoleLog( logDEBUG );
+ // load configuration (Lua) and execute main (to
+ // get basic configuration in form of global
+ // variables)
luaVm.loadSource( argv[1] );
- //luaVm.executeMain( );
+ luaVm.executeMain( );
#ifndef _WIN32
struct sigaction sa;
@@ -86,6 +89,7 @@ int main( int /* argc */, char *argv[] )
SetConsoleCtrlHandler( termHandler, TRUE );
#endif
+ // go through all type of modules and load them with the proper loader
LOG( logNOTICE ) << "Loading modules";
vector<string> normalizerModules;
@@ -98,6 +102,22 @@ int main( int /* argc */, char *argv[] )
#endif
ModuleLoader<URLNormalizer> urlNormalizers( normalizerModules, CLOSE_DEFERRED, (void *)&luaVm );
+#ifdef WITH_LUA
+ // TODO: should be in the laoding function of libcrawl
+ tolua_URL_open( luaVm.handle( ) );
+#endif
+
+ // initialize crawler function
+ luaVm.executeFunction( "init" );
+
+ // perform a crawl step
+ luaVm.executeFunction( "crawl" );
+
+ // cleaning up
+ luaVm.executeFunction( "destroy" );
+
+ return 0;
+
vector<string> filterModules;
#ifndef _WIN32
filterModules.push_back( "./modules/urlfilter/protocol/mod_urlfilter_protocol.so" );
@@ -287,15 +307,6 @@ int main( int /* argc */, char *argv[] )
LOG( logNOTICE ) << "Crawler stopped.. normal shutdown..";
-#ifdef WITH_LUA
- // TODO: should be in the laoding function of libcrawl
- tolua_URL_open( luaVm.handle( ) );
-#endif
-
- luaVm.executeMain( );
- //luaVm.dumpState( );
-
- return 0;
} catch( exception &e ) {
LOG( logFATAL ) << "Crawler stopped: " << e.what( );
return 1;
diff --git a/src/libluaglue/LuaVM.cpp b/src/libluaglue/LuaVM.cpp
index d587897..7b165f1 100644
--- a/src/libluaglue/LuaVM.cpp
+++ b/src/libluaglue/LuaVM.cpp
@@ -3,6 +3,7 @@
#include <stdexcept>
#include <sstream>
#include <iostream>
+#include <string>
using namespace std;
@@ -50,9 +51,7 @@ void LuaVM::loadSource( const char *sourceFilename )
void LuaVM::executeMain( )
{
- int res;
-
- res = lua_pcall( m_lua, 0, LUA_MULTRET, 0 );
+ int res = lua_pcall( m_lua, 0, 0, 0 );
if( res != 0 ) {
ostringstream ss;
ss << "Can't execute main body of Lua source file '" << m_sourceFilename << "': " << lua_tostring( m_lua, -1 );
@@ -61,6 +60,22 @@ void LuaVM::executeMain( )
}
}
+void LuaVM::executeFunction( const string &f )
+{
+ //int top = lua_gettop( m_lua );
+ lua_getglobal( m_lua, f.c_str( ) );
+ int res = lua_pcall( m_lua, 0, LUA_MULTRET, 0 );
+ if( res != 0 ) {
+ ostringstream ss;
+ ss << "Unable to call Lua function '" << f << "': " << lua_tostring( m_lua, -1 );
+ lua_pop( m_lua, 1 );
+ throw new std::runtime_error( ss.str( ) );
+ }
+ //int nresults = lua_gettop( m_lua ) - top;
+
+ // TODO: return results
+}
+
void LuaVM::dumpState( )
{
lua_rawgeti( m_lua, LUA_REGISTRYINDEX, LUA_RIDX_GLOBALS );