summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorAndreas Baumann <mail@andreasbaumann.cc>2014-10-09 15:49:49 +0200
committerAndreas Baumann <mail@andreasbaumann.cc>2014-10-09 15:49:49 +0200
commit3fc6e3cf5b586640a057e3f8335605c2bf5784ec (patch)
tree5d8029190edfd1c1fd6d93359c517bea32a2a08e /src
parent5c37d14e53c729c10d87849408058031d96d51f1 (diff)
downloadcrawler-3fc6e3cf5b586640a057e3f8335605c2bf5784ec.tar.gz
crawler-3fc6e3cf5b586640a057e3f8335605c2bf5784ec.tar.bz2
first running lua code with URL normalization, cleanup needed..
Diffstat (limited to 'src')
-rwxr-xr-xsrc/crawl/GNUmakefile11
-rw-r--r--src/crawl/crawl.conf4
-rwxr-xr-xsrc/crawl/crawl.cpp11
-rwxr-xr-xsrc/libcrawler/GNUmakefile25
-rw-r--r--src/libcrawler/URL.pkg32
-rw-r--r--src/modules/urlnormalizer/googleurl/GoogleURLNormalizer.pkg2
6 files changed, 81 insertions, 4 deletions
diff --git a/src/crawl/GNUmakefile b/src/crawl/GNUmakefile
index f664f0d..5a9051a 100755
--- a/src/crawl/GNUmakefile
+++ b/src/crawl/GNUmakefile
@@ -26,13 +26,22 @@ INCLUDE_LIBS = \
# openssl
ifeq ($(WITH_SSL),1)
-INCLUDE_CFLAGS += \
+INCLUDE_CXXFLAGS += \
-DWITH_SSL
INCLUDE_LIBS += \
$(OPENSSL_LIBS)
endif
+ifeq ($(WITH_LUA),1)
+INCLUDE_CXXFLAGS += \
+ -DWITH_LUA
+
+INCLUDE_DIRS += \
+ -I$(TOPDIR)/src/libcrawler \
+ $(TOLUA_INCLUDES)
+endif
+
CPP_OBJS = \
CPP_BINS = \
diff --git a/src/crawl/crawl.conf b/src/crawl/crawl.conf
index 922ae42..5d1c380 100644
--- a/src/crawl/crawl.conf
+++ b/src/crawl/crawl.conf
@@ -1,8 +1,8 @@
local normalizer = GoogleURLNormalizer:new( )
local baseUrl = normalizer:parseUrl( "http://www.base.com" )
---io.write( "base URL is: " .. baseUrl.str( ) )
+io.write( "base URL is: " .. baseUrl:str( ) .. "\n" )
local url = normalizer:normalize( baseUrl, "/relativedir/relativefile.html" )
---io.write( "URL is: " .. url.str( ) )
+io.write( "URL is: " .. url:str( ) .. "\n" )
-- global setting
diff --git a/src/crawl/crawl.cpp b/src/crawl/crawl.cpp
index 08d3dbf..3a8fdff 100755
--- a/src/crawl/crawl.cpp
+++ b/src/crawl/crawl.cpp
@@ -55,6 +55,12 @@ BOOL WINAPI termHandler( DWORD ctrlType )
#endif
+#ifdef WITH_LUA
+// TODO: should be in the laoding function of libcrawl
+#include "tolua.h"
+#include "URLLua.hpp"
+#endif
+
static int counter = 0;
int main( int /* argc */, char *argv[] )
@@ -281,6 +287,11 @@ int main( int /* argc */, char *argv[] )
LOG( logNOTICE ) << "Crawler stopped.. normal shutdown..";
+#ifdef WITH_LUA
+ // TODO: should be in the laoding function of libcrawl
+ tolua_URL_open( luaVm.handle( ) );
+#endif
+
luaVm.executeMain( );
//luaVm.dumpState( );
diff --git a/src/libcrawler/GNUmakefile b/src/libcrawler/GNUmakefile
index e28b916..d546058 100755
--- a/src/libcrawler/GNUmakefile
+++ b/src/libcrawler/GNUmakefile
@@ -8,14 +8,31 @@ INCLUDE_CPPFLAGS = \
INCLUDE_LDFLAGS = \
+ifeq ($(WITH_LUA),1)
+INCLUDE_LDFLAGS += \
+ $(TOLUA_LDFLAGS)
+endif
+
INCLUDE_DIRS = \
-I. \
-I$(TOPDIR)/include/logger \
-I$(TOPDIR)/include/util \
-I$(TOPDIR)/include/crawler
+ifeq ($(WITH_LUA),1)
+INCLUDE_DIRS += \
+ -I$(TOPDIR)/include/luaglue \
+ $(TOLUA_INCLUDES)
+endif
+
INCLUDE_LIBS = \
+ifeq ($(WITH_LUA),1)
+INCLUDE_LIBS += \
+ -llua \
+ $(TOLUA_LIBS)
+endif
+
STATIC_LIB = libcrawler.a
DYNAMIC_LIB = libcrawler.so
@@ -28,8 +45,16 @@ CPP_OBJS = \
MIMEType.o \
SpoolRewindInputStream.o
+ifeq ($(WITH_LUA),1)
+CPP_OBJS += \
+ URLLua.o
+endif
+
-include $(TOPDIR)/makefiles/gmake/sub.mk
+URLLua.cpp: URL.pkg
+ $(TOLUA) -H URLLua.hpp -o URLLua.cpp URL.pkg
+
local_all:
local_clean:
diff --git a/src/libcrawler/URL.pkg b/src/libcrawler/URL.pkg
new file mode 100644
index 0000000..d27288d
--- /dev/null
+++ b/src/libcrawler/URL.pkg
@@ -0,0 +1,32 @@
+$#include "URL.hpp"
+
+$using namespace std;
+
+class URL
+{
+ URL( ) {}
+
+ URL( const URL& url ) {}
+
+ URL( const std::string _protocol, const std::string _host, const unsigned short _port, const std::string _path, const std::string _query, const std::string _fragment ) {}
+
+ const string protocol( ) const {}
+
+ const string host( ) const {}
+
+ unsigned short port( ) const {}
+
+ const string path( ) const {}
+
+ const string query( ) const {}
+
+ std::string fragment( ) const {}
+
+ std::string str( ) const {}
+
+ static URL Null;
+
+ bool operator==( const URL &other ) const {}
+
+ bool operator<( const URL &other ) const {}
+};
diff --git a/src/modules/urlnormalizer/googleurl/GoogleURLNormalizer.pkg b/src/modules/urlnormalizer/googleurl/GoogleURLNormalizer.pkg
index ca62fe3..3af1f5f 100644
--- a/src/modules/urlnormalizer/googleurl/GoogleURLNormalizer.pkg
+++ b/src/modules/urlnormalizer/googleurl/GoogleURLNormalizer.pkg
@@ -8,7 +8,7 @@ class GoogleURLNormalizer : public URLNormalizer
virtual ~GoogleURLNormalizer( ) { }
- virtual URL parseUrl( string s );
+ virtual URL parseUrl( const string s );
virtual URL normalize( const URL url, const string s );
};