summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorAndreas Baumann <mail@andreasbaumann.cc>2014-10-09 08:59:02 +0200
committerAndreas Baumann <mail@andreasbaumann.cc>2014-10-09 08:59:02 +0200
commit7d8b1ff684b412da292e0fc734748975188a0f10 (patch)
tree2673e3da51cc80bfc38a426048b30a4d71c31d4c /src
parent62c5bb90525baf0d82c23892c2666f611750d63c (diff)
downloadcrawler-7d8b1ff684b412da292e0fc734748975188a0f10.tar.gz
crawler-7d8b1ff684b412da292e0fc734748975188a0f10.tar.bz2
first trials with a Google normalizer called from Lua, std::string is the problem currently
and the missing wrapper for the URL class also added a local 'tolua', we will have to hack it
Diffstat (limited to 'src')
-rw-r--r--src/crawl/crawl.conf4
-rwxr-xr-xsrc/crawl/crawl.cpp7
-rw-r--r--src/modules/urlnormalizer/googleurl/GNUmakefile26
-rwxr-xr-xsrc/modules/urlnormalizer/googleurl/GoogleURLNormalizer.cpp21
-rw-r--r--src/modules/urlnormalizer/googleurl/GoogleURLNormalizer.pkg14
5 files changed, 68 insertions, 4 deletions
diff --git a/src/crawl/crawl.conf b/src/crawl/crawl.conf
index ddc1da6..a524eaf 100644
--- a/src/crawl/crawl.conf
+++ b/src/crawl/crawl.conf
@@ -1,3 +1,7 @@
+local normalizer = GoogleURLNormalizer:new( )
+local baseUrl = normalizer:parseUrl( "http://www.base.com" )
+-- normalizer:normalize( base, "/relativedir/relativefile.html" )
+
-- global setting
crawler = {
diff --git a/src/crawl/crawl.cpp b/src/crawl/crawl.cpp
index 14a02dd..08d3dbf 100755
--- a/src/crawl/crawl.cpp
+++ b/src/crawl/crawl.cpp
@@ -66,7 +66,7 @@ int main( int /* argc */, char *argv[] )
Logger::instance( ).openConsoleLog( logDEBUG );
luaVm.loadSource( argv[1] );
- luaVm.executeMain( );
+ //luaVm.executeMain( );
#ifndef _WIN32
struct sigaction sa;
@@ -254,7 +254,7 @@ int main( int /* argc */, char *argv[] )
//~ sleep( 2 );
counter++;
- if( counter > 10 ) {
+ if( counter > 0 ) {
term = true;
}
#else
@@ -281,7 +281,8 @@ int main( int /* argc */, char *argv[] )
LOG( logNOTICE ) << "Crawler stopped.. normal shutdown..";
- luaVm.dumpState( );
+ luaVm.executeMain( );
+ //luaVm.dumpState( );
return 0;
} catch( exception &e ) {
diff --git a/src/modules/urlnormalizer/googleurl/GNUmakefile b/src/modules/urlnormalizer/googleurl/GNUmakefile
index ddd9e73..da181aa 100644
--- a/src/modules/urlnormalizer/googleurl/GNUmakefile
+++ b/src/modules/urlnormalizer/googleurl/GNUmakefile
@@ -11,8 +11,18 @@ INCLUDE_DIRS = \
-I$(TOPDIR)/include/crawler \
-I$(TOPDIR)/googleurl
+ifeq ($(WITH_LUA),1)
+INCLUDE_DIRS += \
+ -I$(TOPDIR)/include/luaglue
+endif
+
INCLUDE_CXXFLAGS = \
+ifeq ($(WITH_LUA),1)
+INCLUDE_CXXFLAGS += \
+ -DWITH_LUA
+endif
+
INCLUDE_LDFLAGS = \
-L$(TOPDIR)/googleurl \
-L$(TOPDIR)/src/libcrawler
@@ -22,6 +32,11 @@ INCLUDE_LIBS = \
-lcrawler \
-licui18n -licuuc
+ifeq ($(WITH_LUA),1)
+INCLUDE_LIBS += \
+ -ltolua -llua
+endif
+
DYNAMIC_MODULE = \
mod_urlnormalizer_googleurl.so
@@ -31,11 +46,22 @@ STATIC_LIB = \
CPP_OBJS = \
GoogleURLNormalizer.o
+ifeq ($(WITH_LUA),1)
+CPP_OBJS += \
+ GoogleURLNormalizerLua.o
+endif
+
-include $(TOPDIR)/makefiles/gmake/sub.mk
+GoogleURLNormalizerLua.cpp: GoogleURLNormalizer.pkg
+ tolua -H GoogleURLNormalizerLua.hpp -o GoogleURLNormalizerLua.cpp GoogleURLNormalizer.pkg
+
local_all:
local_clean:
+ifeq ($(WITH_LUA),1)
+ @-rm GoogleURLNormalizerLua.cpp GoogleURLNormalizerLua.hpp
+endif
local_distclean:
diff --git a/src/modules/urlnormalizer/googleurl/GoogleURLNormalizer.cpp b/src/modules/urlnormalizer/googleurl/GoogleURLNormalizer.cpp
index ea04980..734afea 100755
--- a/src/modules/urlnormalizer/googleurl/GoogleURLNormalizer.cpp
+++ b/src/modules/urlnormalizer/googleurl/GoogleURLNormalizer.cpp
@@ -6,6 +6,12 @@
#include "url_canon_stdstring.h"
#include "url_parse.h"
+#ifdef WITH_LUA
+#include "tolua.h"
+#include "GoogleURLNormalizerLua.hpp"
+#include "LuaVM.hpp"
+#endif
+
using namespace std;
using namespace url_util;
using namespace url_canon;
@@ -106,4 +112,17 @@ URL GoogleURLNormalizer::normalize( const URL url, const string s )
"" );
}
-REGISTER_MODULE( "google_urlnormalizer", 0, 0, URLNormalizer, GoogleURLNormalizer )
+static void initModule( void *user_data )
+{
+#ifdef WITH_LUA
+ LuaVM *luaVm = (LuaVM *)user_data;
+
+ tolua_GoogleURLNormalizer_open( luaVm->handle( ) );
+#endif
+}
+
+static void destroyModule( void * /* user_data */ )
+{
+}
+
+REGISTER_MODULE( "google_urlnormalizer", &initModule, &destroyModule, URLNormalizer, GoogleURLNormalizer )
diff --git a/src/modules/urlnormalizer/googleurl/GoogleURLNormalizer.pkg b/src/modules/urlnormalizer/googleurl/GoogleURLNormalizer.pkg
new file mode 100644
index 0000000..ca62fe3
--- /dev/null
+++ b/src/modules/urlnormalizer/googleurl/GoogleURLNormalizer.pkg
@@ -0,0 +1,14 @@
+$#include "GoogleURLNormalizer.hpp"
+
+$using std::string;
+
+class GoogleURLNormalizer : public URLNormalizer
+{
+ GoogleURLNormalizer( ) { }
+
+ virtual ~GoogleURLNormalizer( ) { }
+
+ virtual URL parseUrl( string s );
+
+ virtual URL normalize( const URL url, const string s );
+};