diff options
-rw-r--r-- | docs/LINKS | 2 | ||||
-rw-r--r-- | src/Deduper.cpp | 5 | ||||
-rw-r--r-- | src/Deduper.hpp | 2 | ||||
-rw-r--r-- | src/GNUmakefile | 14 | ||||
-rw-r--r-- | src/ModuleLoader.hpp | 81 | ||||
-rw-r--r-- | src/ModuleRegistry.hpp | 21 | ||||
-rw-r--r-- | src/crawlingwolf.cpp | 15 | ||||
-rw-r--r-- | src/modules/GNUmakefile | 18 | ||||
-rw-r--r-- | src/modules/urlnormalizer/GNUmakefile | 18 | ||||
-rw-r--r-- | src/modules/urlnormalizer/googleurl/GNUmakefile | 38 | ||||
-rw-r--r-- | src/modules/urlnormalizer/googleurl/GoogleURLNormalizer.cpp (renamed from src/GoogleURLNormalizer.cpp) | 12 | ||||
-rw-r--r-- | src/modules/urlnormalizer/googleurl/GoogleURLNormalizer.hpp (renamed from src/GoogleURLNormalizer.hpp) | 3 | ||||
-rw-r--r-- | src/modules/urlnormalizer/simpleurl/GNUmakefile | 35 | ||||
-rw-r--r-- | src/modules/urlnormalizer/simpleurl/SimpleURLNormalizer.cpp (renamed from src/SimpleURLNormalizer.cpp) | 11 | ||||
-rw-r--r-- | src/modules/urlnormalizer/simpleurl/SimpleURLNormalizer.hpp (renamed from src/SimpleURLNormalizer.hpp) | 3 |
15 files changed, 257 insertions, 21 deletions
@@ -39,3 +39,5 @@ http://www.ibm.com/developerworks/linux/library/l-embed-lua/ Loadable modules in C++ http://www.isotton.com/devel/docs/C++-dlopen-mini-HOWTO/C++-dlopen-mini-HOWTO.html +http://www.linuxjournal.com/article/3687?page=0,1 +http://www.artima.com/cppsource/subscription_problem.html diff --git a/src/Deduper.cpp b/src/Deduper.cpp deleted file mode 100644 index 451b8ab..0000000 --- a/src/Deduper.cpp +++ /dev/null @@ -1,5 +0,0 @@ -#include "Deduper.hpp" - -Deduper::~Deduper( ) -{ -} diff --git a/src/Deduper.hpp b/src/Deduper.hpp index 36421fa..3cb33c1 100644 --- a/src/Deduper.hpp +++ b/src/Deduper.hpp @@ -7,7 +7,7 @@ class Deduper { public: - virtual ~Deduper( ) = 0; + virtual ~Deduper( ) { }; virtual bool contentSeen( const URL url, RewindInputStream *s ) = 0; }; diff --git a/src/GNUmakefile b/src/GNUmakefile index eaf57c8..5a25794 100644 --- a/src/GNUmakefile +++ b/src/GNUmakefile @@ -1,6 +1,6 @@ TOPDIR = .. -SUBDIRS = +SUBDIRS = modules -include $(TOPDIR)/makefiles/gmake/platform.mk @@ -10,14 +10,11 @@ INCLUDE_LDFLAGS = \ INCLUDE_DIRS = \ -I$(TOPDIR)/libfetch \ - -I$(TOPDIR)/streamhtmlparser \ - -I$(TOPDIR)/googleurl + -I$(TOPDIR)/streamhtmlparser INCLUDE_LIBS = \ $(TOPDIR)/libfetch/libfetch.a \ - $(TOPDIR)/streamhtmlparser/libstreamhtmlparser.a \ - $(TOPDIR)/googleurl/libgoogleurl.a \ - -licui18n -licuuc + $(TOPDIR)/streamhtmlparser/libstreamhtmlparser.a # openssl ifeq ($(WITH_SSL),1) @@ -33,14 +30,11 @@ LOCAL_STATIC_LIB_OBJS = \ URL.o \ LibFetchFetcher.o \ LibFetchRewindInputStream.o \ - Deduper.o \ HTMLLinkExtractProcessor.o \ ProtocolURLFilter.o \ HostURLFilter.o \ ChainURLFilter.o \ - MemoryURLSeen.o \ - SimpleURLNormalizer.o \ - GoogleURLNormalizer.o + MemoryURLSeen.o CPP_OBJS = \ $(LOCAL_STATIC_LIB_OBJS) diff --git a/src/ModuleLoader.hpp b/src/ModuleLoader.hpp new file mode 100644 index 0000000..c4b73dc --- /dev/null +++ b/src/ModuleLoader.hpp @@ -0,0 +1,81 @@ +#ifndef __MODULELOADER_H +#define __MODULELOADER_H + +#include <vector> +#include <map> +#include <string> +#include <stdexcept> + +#include <dlfcn.h> + +#include "ModuleRegistry.hpp" + +template< typename Interface > +struct Module { + void *handle; + ModuleRegistry<Interface> *registry; +}; + +template< typename Interface > +class ModuleLoader { + + typedef typename std::map<std::string, Module< Interface > > mapType; + + protected: + mapType m_modules; + + public: + + ModuleLoader<Interface>( ) + { + } + + ModuleLoader<Interface>( const std::vector<std::string> files ) + { + Module<Interface> m; + + for( std::vector<string>::const_iterator it = files.begin( ); it != files.end( ); it++ ) { + m.handle = dlopen( it->c_str( ), RTLD_NOW ); + if( !m.handle ) { + throw std::runtime_error( dlerror( ) ); + } + + m.registry = static_cast<ModuleRegistry<Interface> *>( dlsym( m.handle, "registry" ) ); + if( !m.registry ) { + dlclose( m.handle ); + throw std::runtime_error( "missing module registry" ); + } + + m_modules[m.registry->name] = m; + } + } + + ~ModuleLoader<Interface>( ) + { + for( typename mapType::const_iterator it = m_modules.begin( ); it != m_modules.end( ); it++ ) { + dlclose( (*it).second.handle ); + } + } + + Interface *create( std::string subclass ) const + { + typename mapType::const_iterator it = m_modules.find( subclass ); + if( it == m_modules.end( ) ) { + throw std::runtime_error( "calling unknown constructor" ); + } + + return (*it).second.registry->create( ); + } + + void destroy( std::string subclass, Interface *obj ) const + { + typename mapType::const_iterator it = m_modules.find( subclass ); + if( it == m_modules.end( ) ) { + throw std::runtime_error( "calling unknown destructor" ); + } + + (*it).second.registry->destroy( obj ); + } +}; + +#endif diff --git a/src/ModuleRegistry.hpp b/src/ModuleRegistry.hpp new file mode 100644 index 0000000..fbbdd40 --- /dev/null +++ b/src/ModuleRegistry.hpp @@ -0,0 +1,21 @@ +#ifndef __MODULEINTERFACE_H +#define __MODULEINTERFACE_H + +#include <string> + +template< typename Interface > +struct ModuleRegistry { + std::string name; + Interface *(*create)( ); + void (*destroy)( Interface *obj ); + + ModuleRegistry( ) { } + + ModuleRegistry<Interface>( std::string _name, Interface *(*_create)( ), + void (*_destroy)( Interface *obj ) ) + : name( _name ), create( _create ), destroy( _destroy ) + { + } +}; + +#endif diff --git a/src/crawlingwolf.cpp b/src/crawlingwolf.cpp index 080423d..213f9a5 100644 --- a/src/crawlingwolf.cpp +++ b/src/crawlingwolf.cpp @@ -6,10 +6,11 @@ #include "ProtocolURLFilter.hpp" #include "HostURLFilter.hpp" #include "MemoryURLSeen.hpp" -#include "SimpleURLNormalizer.hpp" -#include "GoogleURLNormalizer.hpp" +#include "URLNormalizer.hpp" +#include "ModuleLoader.hpp" #include <set> +#include <vector> using namespace std; @@ -33,8 +34,12 @@ int main( void ) ChainURLFilter filters( &protocolFilter, &hostFilter ); - //URLNormalizer *normalizer = new SimpleURLNormalizer( ); - URLNormalizer *normalizer = new GoogleURLNormalizer( ); + vector<string> modules; + modules.push_back( "./modules/urlnormalizer/simpleurl/mod_urlnormalizer_simple.so" ); + modules.push_back( "./modules/urlnormalizer/googleurl/mod_urlnormalizer_googleurl.so" ); + ModuleLoader<URLNormalizer> urlNormalizers( modules ); + //URLNormalizer *normalizer = urlNormalizers.create( "simple" ); + URLNormalizer *normalizer = urlNormalizers.create( "google" ); Processor *processor = new HTMLLinkExtractProcessor( normalizer, frontier, &filters, urlSeen ); @@ -59,7 +64,7 @@ int main( void ) } delete processor; - delete normalizer; + urlNormalizers.destroy( "google", normalizer ); delete urlSeen; delete deduper; delete fetcher; diff --git a/src/modules/GNUmakefile b/src/modules/GNUmakefile new file mode 100644 index 0000000..ddf5ee4 --- /dev/null +++ b/src/modules/GNUmakefile @@ -0,0 +1,18 @@ +TOPDIR = ../.. + +SUBDIRS = urlnormalizer + +-include $(TOPDIR)/makefiles/gmake/sub.mk + +local_all: + +local_clean: + +local_distclean: + +local_install: + +local_uninstall: + +local_test: + diff --git a/src/modules/urlnormalizer/GNUmakefile b/src/modules/urlnormalizer/GNUmakefile new file mode 100644 index 0000000..83e369c --- /dev/null +++ b/src/modules/urlnormalizer/GNUmakefile @@ -0,0 +1,18 @@ +TOPDIR = ../../.. + +SUBDIRS = simpleurl googleurl + +-include $(TOPDIR)/makefiles/gmake/sub.mk + +local_all: + +local_clean: + +local_distclean: + +local_install: + +local_uninstall: + +local_test: + diff --git a/src/modules/urlnormalizer/googleurl/GNUmakefile b/src/modules/urlnormalizer/googleurl/GNUmakefile new file mode 100644 index 0000000..cd52be9 --- /dev/null +++ b/src/modules/urlnormalizer/googleurl/GNUmakefile @@ -0,0 +1,38 @@ +TOPDIR = ../../../.. + +SUBDIRS = + +-include $(TOPDIR)/makefiles/gmake/platform.mk + +INCLUDE_DIRS = \ + -I. -I$(TOPDIR)/src \ + -I$(TOPDIR)/googleurl + +INCLUDE_CXXFLAGS = \ + +INCLUDE_LDFLAGS = \ + +INCLUDE_LIBS = \ + $(TOPDIR)/googleurl/libgoogleurl.a \ + -licui18n -licuuc + +DYNAMIC_MODULE = \ + mod_urlnormalizer_googleurl.so + +CPP_OBJS = \ + GoogleURLNormalizer.o + +-include $(TOPDIR)/makefiles/gmake/sub.mk + +local_all: + +local_clean: + +local_distclean: + +local_install: + +local_uninstall: + +local_test: + diff --git a/src/GoogleURLNormalizer.cpp b/src/modules/urlnormalizer/googleurl/GoogleURLNormalizer.cpp index 023a9e4..e5810d6 100644 --- a/src/GoogleURLNormalizer.cpp +++ b/src/modules/urlnormalizer/googleurl/GoogleURLNormalizer.cpp @@ -103,3 +103,15 @@ URL GoogleURLNormalizer::normalize( const URL url, const string s ) componentString( canonical, parsed.path ), "", "" ); } + +static URLNormalizer *create( ) +{ + return new GoogleURLNormalizer( ); +} + +static void destroy( URLNormalizer *obj ) +{ + delete obj; +} + +ModuleRegistry<URLNormalizer> registry( "google", &create, &destroy ); diff --git a/src/GoogleURLNormalizer.hpp b/src/modules/urlnormalizer/googleurl/GoogleURLNormalizer.hpp index 1aa33bf..7fd3cfb 100644 --- a/src/GoogleURLNormalizer.hpp +++ b/src/modules/urlnormalizer/googleurl/GoogleURLNormalizer.hpp @@ -2,6 +2,7 @@ #define __GOOGLEURLNORMALIZER_H #include "URLNormalizer.hpp" +#include "ModuleRegistry.hpp" class GoogleURLNormalizer : public URLNormalizer { public: @@ -14,4 +15,6 @@ class GoogleURLNormalizer : public URLNormalizer { virtual URL normalize( const URL url, const std::string s ); }; +extern "C" ModuleRegistry<URLNormalizer> registry; + #endif diff --git a/src/modules/urlnormalizer/simpleurl/GNUmakefile b/src/modules/urlnormalizer/simpleurl/GNUmakefile new file mode 100644 index 0000000..b6fc0a0 --- /dev/null +++ b/src/modules/urlnormalizer/simpleurl/GNUmakefile @@ -0,0 +1,35 @@ +TOPDIR = ../../../.. + +SUBDIRS = + +-include $(TOPDIR)/makefiles/gmake/platform.mk + +INCLUDE_DIRS = \ + -I. -I$(TOPDIR)/src + +INCLUDE_CXXFLAGS = \ + +INCLUDE_LDFLAGS = \ + +INCLUDE_LIBS = \ + +DYNAMIC_MODULE = \ + mod_urlnormalizer_simple.so + +CPP_OBJS = \ + SimpleURLNormalizer.o + +-include $(TOPDIR)/makefiles/gmake/sub.mk + +local_all: + +local_clean: + +local_distclean: + +local_install: + +local_uninstall: + +local_test: + diff --git a/src/SimpleURLNormalizer.cpp b/src/modules/urlnormalizer/simpleurl/SimpleURLNormalizer.cpp index 058dd6e..328a82b 100644 --- a/src/SimpleURLNormalizer.cpp +++ b/src/modules/urlnormalizer/simpleurl/SimpleURLNormalizer.cpp @@ -146,3 +146,14 @@ void SimpleURLNormalizer::normalizePath( string &path ) } } +static URLNormalizer *create( ) +{ + return new SimpleURLNormalizer( ); +} + +static void destroy( URLNormalizer *obj ) +{ + delete obj; +} + +ModuleRegistry<URLNormalizer> registry( "simple", &create, &destroy ); diff --git a/src/SimpleURLNormalizer.hpp b/src/modules/urlnormalizer/simpleurl/SimpleURLNormalizer.hpp index de478a4..1badaef 100644 --- a/src/SimpleURLNormalizer.hpp +++ b/src/modules/urlnormalizer/simpleurl/SimpleURLNormalizer.hpp @@ -2,6 +2,7 @@ #define __SIMPLEURLNORMALIZER_H #include "URLNormalizer.hpp" +#include "ModuleRegistry.hpp" class SimpleURLNormalizer : public URLNormalizer { public: @@ -15,4 +16,6 @@ class SimpleURLNormalizer : public URLNormalizer { void normalizePath( std::string &path ); }; +extern "C" ModuleRegistry<URLNormalizer> registry; + #endif |