From 3fc6e3cf5b586640a057e3f8335605c2bf5784ec Mon Sep 17 00:00:00 2001 From: Andreas Baumann Date: Thu, 9 Oct 2014 15:49:49 +0200 Subject: first running lua code with URL normalization, cleanup needed.. --- src/crawl/GNUmakefile | 11 +++++++- src/crawl/crawl.conf | 4 +-- src/crawl/crawl.cpp | 11 ++++++++ src/libcrawler/GNUmakefile | 25 +++++++++++++++++ src/libcrawler/URL.pkg | 32 ++++++++++++++++++++++ .../googleurl/GoogleURLNormalizer.pkg | 2 +- 6 files changed, 81 insertions(+), 4 deletions(-) create mode 100644 src/libcrawler/URL.pkg (limited to 'src') diff --git a/src/crawl/GNUmakefile b/src/crawl/GNUmakefile index f664f0d..5a9051a 100755 --- a/src/crawl/GNUmakefile +++ b/src/crawl/GNUmakefile @@ -26,13 +26,22 @@ INCLUDE_LIBS = \ # openssl ifeq ($(WITH_SSL),1) -INCLUDE_CFLAGS += \ +INCLUDE_CXXFLAGS += \ -DWITH_SSL INCLUDE_LIBS += \ $(OPENSSL_LIBS) endif +ifeq ($(WITH_LUA),1) +INCLUDE_CXXFLAGS += \ + -DWITH_LUA + +INCLUDE_DIRS += \ + -I$(TOPDIR)/src/libcrawler \ + $(TOLUA_INCLUDES) +endif + CPP_OBJS = \ CPP_BINS = \ diff --git a/src/crawl/crawl.conf b/src/crawl/crawl.conf index 922ae42..5d1c380 100644 --- a/src/crawl/crawl.conf +++ b/src/crawl/crawl.conf @@ -1,8 +1,8 @@ local normalizer = GoogleURLNormalizer:new( ) local baseUrl = normalizer:parseUrl( "http://www.base.com" ) ---io.write( "base URL is: " .. baseUrl.str( ) ) +io.write( "base URL is: " .. baseUrl:str( ) .. "\n" ) local url = normalizer:normalize( baseUrl, "/relativedir/relativefile.html" ) ---io.write( "URL is: " .. url.str( ) ) +io.write( "URL is: " .. url:str( ) .. "\n" ) -- global setting diff --git a/src/crawl/crawl.cpp b/src/crawl/crawl.cpp index 08d3dbf..3a8fdff 100755 --- a/src/crawl/crawl.cpp +++ b/src/crawl/crawl.cpp @@ -55,6 +55,12 @@ BOOL WINAPI termHandler( DWORD ctrlType ) #endif +#ifdef WITH_LUA +// TODO: should be in the laoding function of libcrawl +#include "tolua.h" +#include "URLLua.hpp" +#endif + static int counter = 0; int main( int /* argc */, char *argv[] ) @@ -281,6 +287,11 @@ int main( int /* argc */, char *argv[] ) LOG( logNOTICE ) << "Crawler stopped.. normal shutdown.."; +#ifdef WITH_LUA + // TODO: should be in the laoding function of libcrawl + tolua_URL_open( luaVm.handle( ) ); +#endif + luaVm.executeMain( ); //luaVm.dumpState( ); diff --git a/src/libcrawler/GNUmakefile b/src/libcrawler/GNUmakefile index e28b916..d546058 100755 --- a/src/libcrawler/GNUmakefile +++ b/src/libcrawler/GNUmakefile @@ -8,14 +8,31 @@ INCLUDE_CPPFLAGS = \ INCLUDE_LDFLAGS = \ +ifeq ($(WITH_LUA),1) +INCLUDE_LDFLAGS += \ + $(TOLUA_LDFLAGS) +endif + INCLUDE_DIRS = \ -I. \ -I$(TOPDIR)/include/logger \ -I$(TOPDIR)/include/util \ -I$(TOPDIR)/include/crawler +ifeq ($(WITH_LUA),1) +INCLUDE_DIRS += \ + -I$(TOPDIR)/include/luaglue \ + $(TOLUA_INCLUDES) +endif + INCLUDE_LIBS = \ +ifeq ($(WITH_LUA),1) +INCLUDE_LIBS += \ + -llua \ + $(TOLUA_LIBS) +endif + STATIC_LIB = libcrawler.a DYNAMIC_LIB = libcrawler.so @@ -28,8 +45,16 @@ CPP_OBJS = \ MIMEType.o \ SpoolRewindInputStream.o +ifeq ($(WITH_LUA),1) +CPP_OBJS += \ + URLLua.o +endif + -include $(TOPDIR)/makefiles/gmake/sub.mk +URLLua.cpp: URL.pkg + $(TOLUA) -H URLLua.hpp -o URLLua.cpp URL.pkg + local_all: local_clean: diff --git a/src/libcrawler/URL.pkg b/src/libcrawler/URL.pkg new file mode 100644 index 0000000..d27288d --- /dev/null +++ b/src/libcrawler/URL.pkg @@ -0,0 +1,32 @@ +$#include "URL.hpp" + +$using namespace std; + +class URL +{ + URL( ) {} + + URL( const URL& url ) {} + + URL( const std::string _protocol, const std::string _host, const unsigned short _port, const std::string _path, const std::string _query, const std::string _fragment ) {} + + const string protocol( ) const {} + + const string host( ) const {} + + unsigned short port( ) const {} + + const string path( ) const {} + + const string query( ) const {} + + std::string fragment( ) const {} + + std::string str( ) const {} + + static URL Null; + + bool operator==( const URL &other ) const {} + + bool operator<( const URL &other ) const {} +}; diff --git a/src/modules/urlnormalizer/googleurl/GoogleURLNormalizer.pkg b/src/modules/urlnormalizer/googleurl/GoogleURLNormalizer.pkg index ca62fe3..3af1f5f 100644 --- a/src/modules/urlnormalizer/googleurl/GoogleURLNormalizer.pkg +++ b/src/modules/urlnormalizer/googleurl/GoogleURLNormalizer.pkg @@ -8,7 +8,7 @@ class GoogleURLNormalizer : public URLNormalizer virtual ~GoogleURLNormalizer( ) { } - virtual URL parseUrl( string s ); + virtual URL parseUrl( const string s ); virtual URL normalize( const URL url, const string s ); }; -- cgit v1.2.3-54-g00ecf