summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndreas Baumann <mail@andreasbaumann.cc>2014-10-16 22:06:49 +0200
committerAndreas Baumann <mail@andreasbaumann.cc>2014-10-16 22:06:49 +0200
commitd6340a5ec44dcdc871294672f0ac21e4ab3a324c (patch)
tree29db205406723a8d9886bf35b06a18fac05df3d6
parent3ded6a7bc07975cb5708e0d4aced335c5722d974 (diff)
downloadcrawler-d6340a5ec44dcdc871294672f0ac21e4ab3a324c.tar.gz
crawler-d6340a5ec44dcdc871294672f0ac21e4ab3a324c.tar.bz2
added first simple Lua logger
-rwxr-xr-xTODOS2
-rw-r--r--src/crawl/crawl.conf12
-rwxr-xr-xsrc/crawl/crawl.cpp72
3 files changed, 66 insertions, 20 deletions
diff --git a/TODOS b/TODOS
index 76c42c1..1065211 100755
--- a/TODOS
+++ b/TODOS
@@ -77,4 +77,4 @@
URLs to other processes.
- random distribute
- keep local links of host local
-
+- log.trace
diff --git a/src/crawl/crawl.conf b/src/crawl/crawl.conf
index 154d90a..29c3620 100644
--- a/src/crawl/crawl.conf
+++ b/src/crawl/crawl.conf
@@ -81,24 +81,24 @@ filters = {
function init( )
- io.write( "Init..\n" )
+ log( "NOTICE", "Init.." )
-- normalizer = urlnormalizers.create( "google_urlnormalizer" );
normalizer = GoogleURLNormalizer:new( )
-- normalizer2 = urlnormalizers.create( "simple_urlnormalizer" );
normalizer2 = SimpleURLNormalizer:new( )
base = tolua.cast( normalizer, "URLNormalizer" )
- io.write( "type: " .. tolua.type( base ) .. "\n" )
+ log( "DEBUG", "type: " .. tolua.type( base ) )
end
function destroy( )
- io.write( "Destroy..\n" )
+ log( "NOTICE", "Destroy.." )
normalizer:delete( )
end
function crawl( )
- io.write( "Crawling..\n" )
+ log( "NOTICE", "Crawling.." )
local baseUrl = base:parseUrl( "http://www.base.com" )
- io.write( "base URL is: " .. baseUrl:str( ) .. "\n" )
+ log( "DEBUG", "base URL is: " .. baseUrl:str( ) )
local url = base:normalize( baseUrl, "/relativedir/relativefile.html" )
- io.write( "URL is: " .. url:str( ) .. "\n" )
+ log( "DEBUG", "URL is: " .. url:str( ) )
end
diff --git a/src/crawl/crawl.cpp b/src/crawl/crawl.cpp
index 77b6876..46b4f16 100755
--- a/src/crawl/crawl.cpp
+++ b/src/crawl/crawl.cpp
@@ -90,6 +90,47 @@ static vector<string> searchModuleFiles( const vector<string> &modules, const ve
return moduleFiles;
}
+static int lua_log( lua_State *l )
+{
+ size_t nofParams = lua_gettop( l );
+
+ if( nofParams == 0 ) return 0;
+
+ const char *logLevel = luaL_checkstring( l, 1 );
+ ostringstream ss;
+
+ for( size_t i = 2; i <= nofParams; i++ ) {
+ int type = lua_type( l, i );
+
+ switch( type ) {
+ case LUA_TNIL:
+ ss << "<nil>";
+ break;
+
+ case LUA_TSTRING:
+ ss << lua_tostring( l, i );
+ break;
+
+ case LUA_TNUMBER:
+ ss << lua_tonumber( l, i );
+ break;
+
+ default:
+ ss << "<unknown type " << lua_typename( l, lua_type( l, i ) ) << ">";
+ break;
+ }
+ if( i != nofParams ) {
+ ss << " ";
+ }
+ }
+
+ LOG( Logger::fromString( logLevel ) ) << ss.str( );
+
+ lua_pop( l, nofParams );
+
+ return 0;
+}
+
int main( int /* argc */, char *argv[] )
{
try {
@@ -98,13 +139,18 @@ int main( int /* argc */, char *argv[] )
initialize_libcrawler( (void *)&luaVm );
//Logger::instance( ).openConsoleLog( logDEBUG );
-
- // load configuration (Lua) and execute main (to
- // get basic configuration in form of global
- // variables)
+
+ // load configuration (Lua)
luaVm.loadSource( argv[1] );
+
+ // register logging function
+ lua_pushcclosure( luaVm.handle( ), &lua_log, 0 );
+ lua_setglobal( luaVm.handle( ), "log" );
+
+ // execute main (to get basic configuration in form
+ // of global variables)
luaVm.executeMain( );
-
+
std::string logLevel = luaVm.getString( "logger.level" );
Logger::instance( ).openConsoleLog( Logger::fromString( logLevel ) );
@@ -138,35 +184,35 @@ int main( int /* argc */, char *argv[] )
modules = luaVm.getStringArray( "modules.urlfilters" );
vector<string> filterModules = searchModuleFiles( modules, allModuleFiles );
- ModuleLoader<URLFilter, TYPELIST_1( const set<string> ) > urlFilters( filterModules );
+ ModuleLoader<URLFilter, TYPELIST_1( const set<string> ) > urlFilters( filterModules, CLOSE_DEFERRED, (void *)&luaVm );
modules = luaVm.getStringArray( "modules.urlchainfilters" );
vector<string> filterChainModules = searchModuleFiles( modules, allModuleFiles );
- ModuleLoader<URLFilter, TYPELIST_1( const list<URLFilter *> ) > urlChainFilter( filterChainModules );
+ ModuleLoader<URLFilter, TYPELIST_1( const list<URLFilter *> ) > urlChainFilter( filterChainModules, CLOSE_DEFERRED, (void *)&luaVm );
modules = luaVm.getStringArray( "modules.urlfrontiers" );
vector<string> frontierModules = searchModuleFiles( modules, allModuleFiles );
- ModuleLoader<Frontier> frontiers( frontierModules );
+ ModuleLoader<Frontier> frontiers( frontierModules, CLOSE_DEFERRED, (void *)&luaVm );
modules = luaVm.getStringArray( "modules.fetchers" );
vector<string> fetcherModules = searchModuleFiles( modules, allModuleFiles );
- ModuleLoader<Fetcher> fetchers( fetcherModules );
+ ModuleLoader<Fetcher> fetchers( fetcherModules, CLOSE_DEFERRED, (void *)&luaVm );
modules = luaVm.getStringArray( "modules.urlseens" );
vector<string> urlseenModules = searchModuleFiles( modules, allModuleFiles );
- ModuleLoader<URLSeen> urlSeens( urlseenModules );
+ ModuleLoader<URLSeen> urlSeens( urlseenModules, CLOSE_DEFERRED, (void *)&luaVm );
modules = luaVm.getStringArray( "modules.dedupers" );
vector<string> deduperModules = searchModuleFiles( modules, allModuleFiles );
- ModuleLoader<Deduper> dedupers( deduperModules );
+ ModuleLoader<Deduper> dedupers( deduperModules, CLOSE_DEFERRED, (void *)&luaVm );
modules = luaVm.getStringArray( "modules.processors" );
vector<string> processorModules = searchModuleFiles( modules, allModuleFiles );
- ModuleLoader<Processor, TYPELIST_4( URLNormalizer *, Frontier *, URLFilter *, URLSeen * ) > processors( processorModules );
+ ModuleLoader<Processor, TYPELIST_4( URLNormalizer *, Frontier *, URLFilter *, URLSeen * ) > processors( processorModules, CLOSE_DEFERRED, (void *)&luaVm );
modules = luaVm.getStringArray( "modules.typedetects" );
vector<string> typeDetectModules = searchModuleFiles( modules, allModuleFiles );
- ModuleLoader<TypeDetect> typeDetectors( typeDetectModules );
+ ModuleLoader<TypeDetect> typeDetectors( typeDetectModules, CLOSE_DEFERRED, (void *)&luaVm );
// initialize crawler function
luaVm.executeFunction( "init" );