From 4ff9da462748b8811cdbe27c7ef3babaa5c119c3 Mon Sep 17 00:00:00 2001 From: Andreas Baumann Date: Thu, 16 Oct 2014 15:50:30 +0200 Subject: testing with two urlnormalizer modules (google and simple) in crawl.conf --- include/module/ModuleLoader.hpp | 8 +++--- src/crawl/crawl.conf | 11 ++++++-- src/modules/urlnormalizer/simpleurl/GNUmakefile | 33 ++++++++++++++++++++++ .../simpleurl/SimpleURLNormalizer.cpp | 21 +++++++++++++- 4 files changed, 65 insertions(+), 8 deletions(-) diff --git a/include/module/ModuleLoader.hpp b/include/module/ModuleLoader.hpp index b0cddd6..0acc445 100755 --- a/include/module/ModuleLoader.hpp +++ b/include/module/ModuleLoader.hpp @@ -232,7 +232,7 @@ class ModuleLoader< Interface, TYPELIST_1( T1 ) > : public BaseModuleLoader< Int std::string clazz = demangle( typeid( *obj ) ); - BaseModuleLoader< Interface, TYPELIST_1( T1 ) >::m_modules.insert( std::make_pair( clazz, (*it).second ) ); + (void)BaseModuleLoader< Interface, TYPELIST_1( T1 ) >::m_modules.insert( std::make_pair( clazz, (*it).second ) ); return obj; } @@ -261,7 +261,7 @@ class ModuleLoader< Interface, TYPELIST_2( T1, T2 ) > : public BaseModuleLoader< std::string clazz = demangle( typeid( *obj ) ); - BaseModuleLoader< Interface, TYPELIST_2( T1, T2 ) >::m_modules.insert( std::make_pair( clazz, (*it).second ) ); + (void)BaseModuleLoader< Interface, TYPELIST_2( T1, T2 ) >::m_modules.insert( std::make_pair( clazz, (*it).second ) ); return obj; } @@ -290,7 +290,7 @@ class ModuleLoader< Interface, TYPELIST_3( T1, T2, T3 ) > : public BaseModuleLoa std::string clazz = demangle( typeid( *obj ) ); - BaseModuleLoader< Interface, TYPELIST_3( T1, T2, T3 ) >::m_modules.insert( std::make_pair( clazz, (*it).second ) ); + (void)BaseModuleLoader< Interface, TYPELIST_3( T1, T2, T3 ) >::m_modules.insert( std::make_pair( clazz, (*it).second ) ); return obj; } @@ -319,7 +319,7 @@ class ModuleLoader< Interface, TYPELIST_4( T1, T2, T3, T4 ) > : public BaseModul std::string clazz = demangle( typeid( *obj ) ); - BaseModuleLoader< Interface, TYPELIST_4( T1, T2, T3, T4 ) >::m_modules.insert( std::make_pair( clazz, (*it).second ) ); + (void)BaseModuleLoader< Interface, TYPELIST_4( T1, T2, T3, T4 ) >::m_modules.insert( std::make_pair( clazz, (*it).second ) ); return obj; } diff --git a/src/crawl/crawl.conf b/src/crawl/crawl.conf index fd9776f..154d90a 100644 --- a/src/crawl/crawl.conf +++ b/src/crawl/crawl.conf @@ -2,7 +2,7 @@ crawler = { -- stop after N documents - stop_after_N_operations = 10, + stop_after_N_operations = 0, module_path = "modules", @@ -82,7 +82,12 @@ filters = { function init( ) io.write( "Init..\n" ) + -- normalizer = urlnormalizers.create( "google_urlnormalizer" ); normalizer = GoogleURLNormalizer:new( ) + -- normalizer2 = urlnormalizers.create( "simple_urlnormalizer" ); + normalizer2 = SimpleURLNormalizer:new( ) + base = tolua.cast( normalizer, "URLNormalizer" ) + io.write( "type: " .. tolua.type( base ) .. "\n" ) end function destroy( ) @@ -92,8 +97,8 @@ end function crawl( ) io.write( "Crawling..\n" ) - local baseUrl = normalizer:parseUrl( "http://www.base.com" ) + local baseUrl = base:parseUrl( "http://www.base.com" ) io.write( "base URL is: " .. baseUrl:str( ) .. "\n" ) - local url = normalizer:normalize( baseUrl, "/relativedir/relativefile.html" ) + local url = base:normalize( baseUrl, "/relativedir/relativefile.html" ) io.write( "URL is: " .. url:str( ) .. "\n" ) end diff --git a/src/modules/urlnormalizer/simpleurl/GNUmakefile b/src/modules/urlnormalizer/simpleurl/GNUmakefile index d7e6378..64053a9 100644 --- a/src/modules/urlnormalizer/simpleurl/GNUmakefile +++ b/src/modules/urlnormalizer/simpleurl/GNUmakefile @@ -10,14 +10,36 @@ INCLUDE_DIRS = \ -I$(TOPDIR)/include/util \ -I$(TOPDIR)/include/crawler +ifeq ($(WITH_LUA),1) +INCLUDE_DIRS += \ + -I$(TOPDIR)/include/luaglue \ + $(TOLUA_INCLUDES) +endif + INCLUDE_CXXFLAGS = \ +ifeq ($(WITH_LUA),1) +INCLUDE_CXXFLAGS += \ + -DWITH_LUA +endif + INCLUDE_LDFLAGS = \ -L$(TOPDIR)/src/libcrawler +ifeq ($(WITH_LUA),1) +INCLUDE_LDFLAGS += \ + $(TOLUA_LDFLAGS) +endif + INCLUDE_LIBS = \ -lcrawler +ifeq ($(WITH_LUA),1) +INCLUDE_LIBS += \ + -llua \ + $(TOLUA_LIBS) +endif + DYNAMIC_MODULE = \ mod_urlnormalizer_simple.so @@ -27,11 +49,22 @@ STATIC_LIB = \ CPP_OBJS = \ SimpleURLNormalizer.o +ifeq ($(WITH_LUA),1) +CPP_OBJS += \ + SimpleURLNormalizerLua.o +endif + -include $(TOPDIR)/makefiles/gmake/sub.mk +SimpleURLNormalizerLua.cpp: SimpleURLNormalizer.pkg + $(TOLUA) -H SimpleURLNormalizerLua.hpp -o SimpleURLNormalizerLua.cpp SimpleURLNormalizer.pkg + local_all: local_clean: +ifeq ($(WITH_LUA),1) + @-rm SimpleURLNormalizerLua.cpp SimpleURLNormalizerLua.hpp +endif local_distclean: diff --git a/src/modules/urlnormalizer/simpleurl/SimpleURLNormalizer.cpp b/src/modules/urlnormalizer/simpleurl/SimpleURLNormalizer.cpp index 8a7efd7..0ab1d05 100755 --- a/src/modules/urlnormalizer/simpleurl/SimpleURLNormalizer.cpp +++ b/src/modules/urlnormalizer/simpleurl/SimpleURLNormalizer.cpp @@ -3,6 +3,12 @@ #include "SimpleURLNormalizer.hpp" +#ifdef WITH_LUA +#include "tolua.h" +#include "SimpleURLNormalizerLua.hpp" +#include "LuaVM.hpp" +#endif + using namespace std; SimpleURLNormalizer::SimpleURLNormalizer( ) @@ -150,4 +156,17 @@ void SimpleURLNormalizer::normalizePath( string &path ) } } -REGISTER_MODULE( "simple_urlnormalizer", 0, 0, URLNormalizer, SimpleURLNormalizer ) +static void initModule( void *user_data ) +{ +#ifdef WITH_LUA + LuaVM *luaVm = (LuaVM *)user_data; + + tolua_SimpleURLNormalizer_open( luaVm->handle( ) ); +#endif +} + +static void destroyModule( void * /* user_data */ ) +{ +} + +REGISTER_MODULE( "simple_urlnormalizer", &initModule, &destroyModule, URLNormalizer, SimpleURLNormalizer ) -- cgit v1.2.3-54-g00ecf