diff options
23 files changed, 340 insertions, 31 deletions
@@ -42,3 +42,13 @@ http://www.isotton.com/devel/docs/C++-dlopen-mini-HOWTO/C++-dlopen-mini-HOWTO.ht http://www.linuxjournal.com/article/3687?page=0,1 http://www.artima.com/cppsource/subscription_problem.html http://kristiannielsen.livejournal.com/11783.html + +Meta Programming in C++ + +Model C++ Design (Alexandrescu) +The Loki Template library +http://www.codeproject.com/Articles/5629/Tiny-Template-Library-implementing-typelist +http://www.drdobbs.com/cpp/extracting-function-parameter-and-return/240000586?pgno=2 +http://sourceforge.net/projects/toast/: portable type_info.name() +http://gcc.gnu.org/onlinedocs/libstdc++/manual/ext_demangling.html +?? name of module or typeid of derived class in module? diff --git a/src/GNUmakefile b/src/GNUmakefile index 4948b49..906f3ea 100644 --- a/src/GNUmakefile +++ b/src/GNUmakefile @@ -31,9 +31,6 @@ LOCAL_STATIC_LIB_OBJS = \ LibFetchFetcher.o \ LibFetchRewindInputStream.o \ HTMLLinkExtractProcessor.o \ - ProtocolURLFilter.o \ - HostURLFilter.o \ - ChainURLFilter.o \ MemoryURLSeen.o CPP_OBJS = \ @@ -42,8 +39,8 @@ CPP_OBJS = \ LOCAL_STATIC_LIB = \ libcrawlingwolf.a -CPP_BINS = \ - crawlingwolf$(EXE) +#CPP_BINS = \ +# crawlingwolf$(EXE) -include $(TOPDIR)/makefiles/gmake/sub.mk diff --git a/src/ModuleLoader.hpp b/src/ModuleLoader.hpp index 2c88ed6..1a89d38 100644 --- a/src/ModuleLoader.hpp +++ b/src/ModuleLoader.hpp @@ -11,6 +11,9 @@ #include "ModuleRegistry.hpp" +#include "TypeList.hpp" +#include "TypeInfo.hpp" + template< typename Interface > struct Module { void *handle; @@ -41,7 +44,9 @@ class ModuleLoader { throw std::runtime_error( dlerror( ) ); } - m.registry = static_cast<ModuleRegistry<Interface> *>( dlsym( m.handle, "registry" ) ); + std::string registryName = "registry_" + demangle( typeid( Interface ) ); + + m.registry = static_cast<ModuleRegistry<Interface> *>( dlsym( m.handle, registryName.c_str( ) ) ); if( !m.registry ) { dlclose( m.handle ); throw std::runtime_error( "missing module registry" ); @@ -70,7 +75,7 @@ class ModuleLoader { Interface *obj = (*it).second.registry->create( ); - std::string clazz = typeid( *obj ).name( ); + std::string clazz = demangle( typeid( *obj ) ); m_modules.insert( std::make_pair( clazz, (*it).second ) ); @@ -79,7 +84,7 @@ class ModuleLoader { void destroy( Interface *obj ) { - std::string clazz = typeid( *obj ).name( ); + std::string clazz = demangle( typeid( *obj ) ); typename mapType::const_iterator it = m_modules.find( clazz ); if( it == m_modules.end( ) ) { diff --git a/src/ModuleRegistry.hpp b/src/ModuleRegistry.hpp index 403bb22..3b7e5d9 100644 --- a/src/ModuleRegistry.hpp +++ b/src/ModuleRegistry.hpp @@ -3,29 +3,42 @@ #include <string> +#include "TypeList.hpp" + +template< typename Interface, typename CtorParams = NullType > +struct ModuleRegistry; + template< typename Interface > -struct ModuleRegistry { +struct ModuleRegistry< Interface > { std::string name; Interface *(*create)( ); void (*destroy)( Interface *obj ); - ModuleRegistry( ) { } - - ModuleRegistry<Interface>( std::string _name, Interface *(*_create)( ), + ModuleRegistry( std::string _name, Interface *(*_create)( ), void (*_destroy)( Interface *obj ) ) : name( _name ), create( _create ), destroy( _destroy ) { } }; +template< typename Interface, typename P1 > +struct ModuleRegistry< Interface, TYPELIST_1( P1 ) > { + std::string name; + Interface *(*create)( P1 ); + void (*destroy)( Interface *obj ); + + ModuleRegistry( std::string _name, Interface *(*_create)( P1 ), + void (*_destroy)( Interface *obj ) ) + : name( _name ), create( _create ), destroy( _destroy ) + { + } +}; + #ifdef SHARED + #define DECLARE_MODULE( baseClass ) \ -extern "C" ModuleRegistry<baseClass> registry; -#else -#define DECLARE_MODULE( baseClass ) -#endif + extern ModuleRegistry<baseClass> registry ## _ ## baseClass; -#ifdef SHARED #define REGISTER_MODULE( name, baseClass, subClass ) \ static baseClass *create( ) \ { \ @@ -37,9 +50,32 @@ static void destroy( baseClass *obj ) \ delete obj; \ } \ \ -ModuleRegistry<baseClass> registry( name, &create, &destroy ); -#else +ModuleRegistry<baseClass> registry ## _ ## baseClass( name, &create, &destroy ); + +#define DECLARE_MODULE_1( baseClass, T1 ) \ + extern ModuleRegistry<baseClass, TYPELIST_1( T1 ) > registry ## _ ## baseClass; + +#define REGISTER_MODULE_1( name, baseClass, subClass, T1 ) \ +static baseClass *create( T1 t ) \ +{ \ + return new subClass( t ); \ +} \ + \ +static void destroy( baseClass *obj ) \ +{ \ + delete obj; \ +} \ + \ +ModuleRegistry<baseClass, TYPELIST_1( T1 )> registry ## _ ## baseClass( name, &create, &destroy ); + +#else // SHARED + +#define DECLARE_MODULE( baseClass ) +#define DECLARE_MODULE_1( baseClass, T ) + #define REGISTER_MODULE( name, baseClass, subClass ) -#endif - -#endif +#define REGISTER_MODULE_1( name, baseClass, subClass, T ) + +#endif // SHARED + +#endif // __MODULEINTERFACE_H diff --git a/src/TypeInfo.hpp b/src/TypeInfo.hpp new file mode 100644 index 0000000..4133ec3 --- /dev/null +++ b/src/TypeInfo.hpp @@ -0,0 +1,33 @@ +#ifndef __TYPEINFO_H +#define __TYPEINFO_H + +#include <typeinfo> +#include <string> +#include <stdexcept> + +#ifdef __GNUG__ + +#include <cxxabi.h> + +std::string demangle( const std::type_info &info ) +{ + enum { BUFLEN = 200 }; + char buf[BUFLEN]; + std::size_t buflen = BUFLEN; + int status; + + __cxxabiv1::__cxa_demangle( info.name( ), buf, &buflen, &status ); + if( status != 0 ) { + throw std::runtime_error( "__cxa_demangle failed!" ); + } + + return buf; +} + +#else + +#error "C++ demangling not ported!" + +#endif + +#endif diff --git a/src/TypeList.hpp b/src/TypeList.hpp new file mode 100644 index 0000000..bc8c49b --- /dev/null +++ b/src/TypeList.hpp @@ -0,0 +1,28 @@ +#ifndef __TYPELIST_H +#define __TYPELIST_H + +class NullType {}; + +template< class T, class U > +struct TypeList { + typedef T Head; + typedef U Tail; +}; + +#define TYPELIST_1( T1 ) TypeList< T1, NullType > +#define TYPELIST_2( T1, T2 ) TypeList< T1, TYPELIST_1( T2 ) > +#define TYPELIST_3( T1, T2, T3 ) TypeList< T1, TYPELIST_2( T2, T3 ) > +#define TYPELIST_4( T1, T2, T3, T4 ) TypeList< T1, TYPELIST_3( T2, T3, T4 ) > + +template< class T> struct Length; +template< > struct Length< NullType > +{ + enum { value = 0 }; +}; +template< class T, class U > +struct Length< TypeList< T, U > > +{ + enum { value = 1 + Length< U >::value }; +}; + +#endif diff --git a/src/TypeTraits.hpp b/src/TypeTraits.hpp new file mode 100644 index 0000000..b01051e --- /dev/null +++ b/src/TypeTraits.hpp @@ -0,0 +1,9 @@ +#ifndef __TYPETRAITS_H +#define __TYPETRAITS_H + +template< typename T > +class TypeTraits { + typedef typename +}; + +#endif diff --git a/src/crawlingwolf.cpp b/src/crawlingwolf.cpp index 328cc80..e924b16 100644 --- a/src/crawlingwolf.cpp +++ b/src/crawlingwolf.cpp @@ -2,9 +2,6 @@ #include "MemoryFrontier.hpp" #include "MD5Deduper.hpp" #include "HTMLLinkExtractProcessor.hpp" -#include "ChainURLFilter.hpp" -#include "ProtocolURLFilter.hpp" -#include "HostURLFilter.hpp" #include "MemoryURLSeen.hpp" #include "URLNormalizer.hpp" #include "ModuleLoader.hpp" @@ -28,6 +25,7 @@ int main( void ) Deduper *deduper = new MD5Deduper( ); URLSeen *urlSeen = new MemoryURLSeen( ); +/* set<string> protocols; protocols.insert( "http" ); protocols.insert( "https" ); @@ -38,10 +36,11 @@ int main( void ) HostURLFilter hostFilter( hosts ); ChainURLFilter filters( &protocolFilter, &hostFilter ); - +*/ URLNormalizer *normalizer = urlNormalizers.create( "google" ); - Processor *processor = new HTMLLinkExtractProcessor( normalizer, frontier, &filters, urlSeen ); + //Processor *processor = new HTMLLinkExtractProcessor( normalizer, frontier, &filters, urlSeen ); + Processor *processor = new HTMLLinkExtractProcessor( normalizer, frontier, std::list( ), urlSeen ); LOG( logNOTICE ) << "Crawler started.."; diff --git a/src/modules/GNUmakefile b/src/modules/GNUmakefile index ddf5ee4..cd45705 100644 --- a/src/modules/GNUmakefile +++ b/src/modules/GNUmakefile @@ -1,6 +1,6 @@ TOPDIR = ../.. -SUBDIRS = urlnormalizer +SUBDIRS = urlnormalizer urlfilter -include $(TOPDIR)/makefiles/gmake/sub.mk diff --git a/src/modules/urlfilter/GNUmakefile b/src/modules/urlfilter/GNUmakefile new file mode 100644 index 0000000..ea5262d --- /dev/null +++ b/src/modules/urlfilter/GNUmakefile @@ -0,0 +1,18 @@ +TOPDIR = ../../.. + +SUBDIRS = protocol host +#chain + +-include $(TOPDIR)/makefiles/gmake/sub.mk + +local_all: + +local_clean: + +local_distclean: + +local_install: + +local_uninstall: + +local_test: diff --git a/src/ChainURLFilter.cpp b/src/modules/urlfilter/chain/ChainURLFilter.cpp index a356299..a356299 100644 --- a/src/ChainURLFilter.cpp +++ b/src/modules/urlfilter/chain/ChainURLFilter.cpp diff --git a/src/ChainURLFilter.hpp b/src/modules/urlfilter/chain/ChainURLFilter.hpp index 216af6b..8c6d165 100644 --- a/src/ChainURLFilter.hpp +++ b/src/modules/urlfilter/chain/ChainURLFilter.hpp @@ -2,6 +2,7 @@ #define __CHAIN_URLFILTER_H #include "URLFilter.hpp" +#include "ModuleRegistry.hpp" #include <list> @@ -19,4 +20,7 @@ class ChainURLFilter : public URLFilter std::list<URLFilter *> m_filters; }; +DECLARE_MODULE( URLFilter ) +DECLARE_MODULE_1( URLFilter, URLFilter * ) + #endif diff --git a/src/modules/urlfilter/chain/GNUmakefile b/src/modules/urlfilter/chain/GNUmakefile new file mode 100644 index 0000000..5b7c827 --- /dev/null +++ b/src/modules/urlfilter/chain/GNUmakefile @@ -0,0 +1,39 @@ +TOPDIR = ../../../.. + +SUBDIRS = + +-include $(TOPDIR)/makefiles/gmake/platform.mk + +INCLUDE_DIRS = \ + -I. -I$(TOPDIR)/src + +INCLUDE_CXXFLAGS = \ + +INCLUDE_LDFLAGS = \ + +INCLUDE_LIBS = \ + $(TOPDIR)/src/libcrawlingwolf.a + +DYNAMIC_MODULE = \ + mod_urlfilter_chain.so + +STATIC_LIB = \ + libchainurlfilter.a + +CPP_OBJS = \ + ChainURLFilter.o + +-include $(TOPDIR)/makefiles/gmake/sub.mk + +local_all: + +local_clean: + +local_distclean: + +local_install: + +local_uninstall: + +local_test: + diff --git a/src/modules/urlfilter/host/GNUmakefile b/src/modules/urlfilter/host/GNUmakefile new file mode 100644 index 0000000..beff685 --- /dev/null +++ b/src/modules/urlfilter/host/GNUmakefile @@ -0,0 +1,39 @@ +TOPDIR = ../../../.. + +SUBDIRS = + +-include $(TOPDIR)/makefiles/gmake/platform.mk + +INCLUDE_DIRS = \ + -I. -I$(TOPDIR)/src + +INCLUDE_CXXFLAGS = \ + +INCLUDE_LDFLAGS = \ + +INCLUDE_LIBS = \ + $(TOPDIR)/src/libcrawlingwolf.a + +DYNAMIC_MODULE = \ + mod_urlfilter_host.so + +STATIC_LIB = \ + libhosturlfilter.a + +CPP_OBJS = \ + HostURLFilter.o + +-include $(TOPDIR)/makefiles/gmake/sub.mk + +local_all: + +local_clean: + +local_distclean: + +local_install: + +local_uninstall: + +local_test: + diff --git a/src/HostURLFilter.cpp b/src/modules/urlfilter/host/HostURLFilter.cpp index 3c3686f..6981a36 100644 --- a/src/HostURLFilter.cpp +++ b/src/modules/urlfilter/host/HostURLFilter.cpp @@ -17,3 +17,5 @@ bool HostURLFilter::filter( const URL url ) return res; } + +REGISTER_MODULE_1( "host", URLFilter, HostURLFilter, const std::set<std::string> ) diff --git a/src/HostURLFilter.hpp b/src/modules/urlfilter/host/HostURLFilter.hpp index aa91e09..6d1349e 100644 --- a/src/HostURLFilter.hpp +++ b/src/modules/urlfilter/host/HostURLFilter.hpp @@ -2,8 +2,10 @@ #define __HOST_URLFILTER_H #include "URLFilter.hpp" +#include "ModuleRegistry.hpp" #include <set> +#include <string> class HostURLFilter : public URLFilter { @@ -16,4 +18,6 @@ class HostURLFilter : public URLFilter std::set<std::string> m_hosts; }; +DECLARE_MODULE_1( URLFilter, const std::set<std::string> ) + #endif diff --git a/src/modules/urlfilter/protocol/GNUmakefile b/src/modules/urlfilter/protocol/GNUmakefile new file mode 100644 index 0000000..52027bc --- /dev/null +++ b/src/modules/urlfilter/protocol/GNUmakefile @@ -0,0 +1,39 @@ +TOPDIR = ../../../.. + +SUBDIRS = + +-include $(TOPDIR)/makefiles/gmake/platform.mk + +INCLUDE_DIRS = \ + -I. -I$(TOPDIR)/src + +INCLUDE_CXXFLAGS = \ + +INCLUDE_LDFLAGS = \ + +INCLUDE_LIBS = \ + $(TOPDIR)/src/libcrawlingwolf.a + +DYNAMIC_MODULE = \ + mod_urlfilter_protocol.so + +STATIC_LIB = \ + libprotocolurlfilter.a + +CPP_OBJS = \ + ProtocolURLFilter.o + +-include $(TOPDIR)/makefiles/gmake/sub.mk + +local_all: + +local_clean: + +local_distclean: + +local_install: + +local_uninstall: + +local_test: + diff --git a/src/ProtocolURLFilter.cpp b/src/modules/urlfilter/protocol/ProtocolURLFilter.cpp index 3f495ed..e50dcc1 100644 --- a/src/ProtocolURLFilter.cpp +++ b/src/modules/urlfilter/protocol/ProtocolURLFilter.cpp @@ -17,3 +17,5 @@ bool ProtocolURLFilter::filter( const URL url ) return res; } + +REGISTER_MODULE_1( "protocol", URLFilter, ProtocolURLFilter, const std::set<std::string> ) diff --git a/src/ProtocolURLFilter.hpp b/src/modules/urlfilter/protocol/ProtocolURLFilter.hpp index 3fe18f8..b829e61 100644 --- a/src/ProtocolURLFilter.hpp +++ b/src/modules/urlfilter/protocol/ProtocolURLFilter.hpp @@ -2,8 +2,10 @@ #define __PROTOCOL_URLFILTER_H #include "URLFilter.hpp" +#include "ModuleRegistry.hpp" #include <set> +#include <string> class ProtocolURLFilter : public URLFilter { @@ -16,4 +18,6 @@ class ProtocolURLFilter : public URLFilter std::set<std::string> m_protocols; }; +DECLARE_MODULE_1( URLFilter, const std::set<std::string> ) + #endif diff --git a/tests/GNUmakefile b/tests/GNUmakefile index f582bbb..e2b08bb 100644 --- a/tests/GNUmakefile +++ b/tests/GNUmakefile @@ -1,6 +1,6 @@ TOPDIR = .. -SUBDIRS = url streamhtmlparser libfetch curl psql sqlite +SUBDIRS = utils url streamhtmlparser libfetch curl psql sqlite -include $(TOPDIR)/makefiles/gmake/sub.mk diff --git a/tests/url/GNUmakefile b/tests/url/GNUmakefile index 6a9104a..6ca1f96 100644 --- a/tests/url/GNUmakefile +++ b/tests/url/GNUmakefile @@ -2,8 +2,8 @@ TOPDIR = ../.. SUBDIRS = -#INCLUDE_CXXFLAGS = \ -# -DUSE_MODULELOADER +INCLUDE_CXXFLAGS = \ + -DUSE_MODULELOADER INCLUDE_DIRS = \ -I$(TOPDIR)/src \ diff --git a/tests/utils/GNUmakefile b/tests/utils/GNUmakefile new file mode 100644 index 0000000..e3913bd --- /dev/null +++ b/tests/utils/GNUmakefile @@ -0,0 +1,25 @@ +TOPDIR = ../.. + +SUBDIRS = + +INCLUDE_DIRS = \ + -I$(TOPDIR)/src + +INCLUDE_LDFLAGS = + +INCLUDE_LIBS = + +TEST_CPP_BINS = \ + test1$(EXE) + +OBJS = + +-include $(TOPDIR)/makefiles/gmake/sub.mk + +local_all: + +local_clean: + +local_distclean: + +local_test: diff --git a/tests/utils/test1.cpp b/tests/utils/test1.cpp new file mode 100644 index 0000000..987149b --- /dev/null +++ b/tests/utils/test1.cpp @@ -0,0 +1,16 @@ +#include "TypeList.hpp" + +#include <iostream> +using namespace std; + +typedef TypeList< int, TypeList< char *, TypeList< int, NullType > > > TestType; + +typedef TYPELIST_3( int, char *, int ) TestType2; + +int len = Length<TestType2>::value; + +int main( void ) +{ + cout << "len: " << len << endl; + return 0; +} |