summaryrefslogtreecommitdiff
path: root/src/modules
diff options
context:
space:
mode:
authorAndreas Baumann <mail@andreasbaumann.cc>2014-10-09 08:59:02 +0200
committerAndreas Baumann <mail@andreasbaumann.cc>2014-10-09 08:59:02 +0200
commit7d8b1ff684b412da292e0fc734748975188a0f10 (patch)
tree2673e3da51cc80bfc38a426048b30a4d71c31d4c /src/modules
parent62c5bb90525baf0d82c23892c2666f611750d63c (diff)
downloadcrawler-7d8b1ff684b412da292e0fc734748975188a0f10.tar.gz
crawler-7d8b1ff684b412da292e0fc734748975188a0f10.tar.bz2
first trials with a Google normalizer called from Lua, std::string is the problem currently
and the missing wrapper for the URL class also added a local 'tolua', we will have to hack it
Diffstat (limited to 'src/modules')
-rw-r--r--src/modules/urlnormalizer/googleurl/GNUmakefile26
-rwxr-xr-xsrc/modules/urlnormalizer/googleurl/GoogleURLNormalizer.cpp21
-rw-r--r--src/modules/urlnormalizer/googleurl/GoogleURLNormalizer.pkg14
3 files changed, 60 insertions, 1 deletions
diff --git a/src/modules/urlnormalizer/googleurl/GNUmakefile b/src/modules/urlnormalizer/googleurl/GNUmakefile
index ddd9e73..da181aa 100644
--- a/src/modules/urlnormalizer/googleurl/GNUmakefile
+++ b/src/modules/urlnormalizer/googleurl/GNUmakefile
@@ -11,8 +11,18 @@ INCLUDE_DIRS = \
-I$(TOPDIR)/include/crawler \
-I$(TOPDIR)/googleurl
+ifeq ($(WITH_LUA),1)
+INCLUDE_DIRS += \
+ -I$(TOPDIR)/include/luaglue
+endif
+
INCLUDE_CXXFLAGS = \
+ifeq ($(WITH_LUA),1)
+INCLUDE_CXXFLAGS += \
+ -DWITH_LUA
+endif
+
INCLUDE_LDFLAGS = \
-L$(TOPDIR)/googleurl \
-L$(TOPDIR)/src/libcrawler
@@ -22,6 +32,11 @@ INCLUDE_LIBS = \
-lcrawler \
-licui18n -licuuc
+ifeq ($(WITH_LUA),1)
+INCLUDE_LIBS += \
+ -ltolua -llua
+endif
+
DYNAMIC_MODULE = \
mod_urlnormalizer_googleurl.so
@@ -31,11 +46,22 @@ STATIC_LIB = \
CPP_OBJS = \
GoogleURLNormalizer.o
+ifeq ($(WITH_LUA),1)
+CPP_OBJS += \
+ GoogleURLNormalizerLua.o
+endif
+
-include $(TOPDIR)/makefiles/gmake/sub.mk
+GoogleURLNormalizerLua.cpp: GoogleURLNormalizer.pkg
+ tolua -H GoogleURLNormalizerLua.hpp -o GoogleURLNormalizerLua.cpp GoogleURLNormalizer.pkg
+
local_all:
local_clean:
+ifeq ($(WITH_LUA),1)
+ @-rm GoogleURLNormalizerLua.cpp GoogleURLNormalizerLua.hpp
+endif
local_distclean:
diff --git a/src/modules/urlnormalizer/googleurl/GoogleURLNormalizer.cpp b/src/modules/urlnormalizer/googleurl/GoogleURLNormalizer.cpp
index ea04980..734afea 100755
--- a/src/modules/urlnormalizer/googleurl/GoogleURLNormalizer.cpp
+++ b/src/modules/urlnormalizer/googleurl/GoogleURLNormalizer.cpp
@@ -6,6 +6,12 @@
#include "url_canon_stdstring.h"
#include "url_parse.h"
+#ifdef WITH_LUA
+#include "tolua.h"
+#include "GoogleURLNormalizerLua.hpp"
+#include "LuaVM.hpp"
+#endif
+
using namespace std;
using namespace url_util;
using namespace url_canon;
@@ -106,4 +112,17 @@ URL GoogleURLNormalizer::normalize( const URL url, const string s )
"" );
}
-REGISTER_MODULE( "google_urlnormalizer", 0, 0, URLNormalizer, GoogleURLNormalizer )
+static void initModule( void *user_data )
+{
+#ifdef WITH_LUA
+ LuaVM *luaVm = (LuaVM *)user_data;
+
+ tolua_GoogleURLNormalizer_open( luaVm->handle( ) );
+#endif
+}
+
+static void destroyModule( void * /* user_data */ )
+{
+}
+
+REGISTER_MODULE( "google_urlnormalizer", &initModule, &destroyModule, URLNormalizer, GoogleURLNormalizer )
diff --git a/src/modules/urlnormalizer/googleurl/GoogleURLNormalizer.pkg b/src/modules/urlnormalizer/googleurl/GoogleURLNormalizer.pkg
new file mode 100644
index 0000000..ca62fe3
--- /dev/null
+++ b/src/modules/urlnormalizer/googleurl/GoogleURLNormalizer.pkg
@@ -0,0 +1,14 @@
+$#include "GoogleURLNormalizer.hpp"
+
+$using std::string;
+
+class GoogleURLNormalizer : public URLNormalizer
+{
+ GoogleURLNormalizer( ) { }
+
+ virtual ~GoogleURLNormalizer( ) { }
+
+ virtual URL parseUrl( string s );
+
+ virtual URL normalize( const URL url, const string s );
+};