diff options
author | Andreas Baumann <abaumann@yahoo.com> | 2012-08-12 16:30:33 +0200 |
---|---|---|
committer | Andreas Baumann <abaumann@yahoo.com> | 2012-08-12 16:30:33 +0200 |
commit | 561646e33cee7d2437b2732c484d2ea22312e925 (patch) | |
tree | 11e4899f6b413b179a48d57ef0576b25514463de /src/modules | |
parent | bb12c70e96e7fa3b13335dbfe877dfc68861237f (diff) | |
download | crawler-561646e33cee7d2437b2732c484d2ea22312e925.tar.gz crawler-561646e33cee7d2437b2732c484d2ea22312e925.tar.bz2 |
better naming of modules
Diffstat (limited to 'src/modules')
15 files changed, 41 insertions, 18 deletions
diff --git a/src/modules/deduper/null/NullDeduper.cpp b/src/modules/deduper/null/NullDeduper.cpp index 9eca5c4..6d56cb0 100644 --- a/src/modules/deduper/null/NullDeduper.cpp +++ b/src/modules/deduper/null/NullDeduper.cpp @@ -1,3 +1,3 @@ #include "NullDeduper.hpp" -REGISTER_MODULE( "null", Deduper, NullDeduper ) +REGISTER_MODULE( "null_deduper", Deduper, NullDeduper ) diff --git a/src/modules/fetcher/file/FileFetcher.cpp b/src/modules/fetcher/file/FileFetcher.cpp index 8d66e14..b344b3f 100644 --- a/src/modules/fetcher/file/FileFetcher.cpp +++ b/src/modules/fetcher/file/FileFetcher.cpp @@ -9,4 +9,4 @@ RewindInputStream *FileFetcher::fetch( const URL url ) return s; } -REGISTER_MODULE( "file", Fetcher, FileFetcher ) +REGISTER_MODULE( "file_fetcher", Fetcher, FileFetcher ) diff --git a/src/modules/fetcher/libfetch/LibFetchFetcher.cpp b/src/modules/fetcher/libfetch/LibFetchFetcher.cpp index 5b770a7..9cbc926 100644 --- a/src/modules/fetcher/libfetch/LibFetchFetcher.cpp +++ b/src/modules/fetcher/libfetch/LibFetchFetcher.cpp @@ -9,4 +9,4 @@ RewindInputStream *LibFetchFetcher::fetch( const URL url ) return s; } -REGISTER_MODULE( "libfetch", Fetcher, LibFetchFetcher ) +REGISTER_MODULE( "libfetch_fetcher", Fetcher, LibFetchFetcher ) diff --git a/src/modules/fetcher/winhttp/Makefile.W32 b/src/modules/fetcher/winhttp/Makefile.W32 index 49874e0..ddf751a 100755 --- a/src/modules/fetcher/winhttp/Makefile.W32 +++ b/src/modules/fetcher/winhttp/Makefile.W32 @@ -14,7 +14,9 @@ INCLUDE_DIRS = \ INCLUDE_LDFLAGS = \ INCLUDE_LIBS = \ - $(TOPDIR)\src\crawlingwolf.lib + $(TOPDIR)\src\crawlingwolf.lib \ + WinHttp.lib + DYNAMIC_MODULE = \ mod_fetcher_winhttp.dll diff --git a/src/modules/fetcher/winhttp/WinHttpFetcher.cpp b/src/modules/fetcher/winhttp/WinHttpFetcher.cpp index 06ab550..a22ab1a 100755 --- a/src/modules/fetcher/winhttp/WinHttpFetcher.cpp +++ b/src/modules/fetcher/winhttp/WinHttpFetcher.cpp @@ -1,10 +1,24 @@ #include "WinHttpFetcher.hpp" #include "WinHttpRewindInputStream.hpp" +WinHttpFetcher::WinHttpFetcher( ) + : m_session( 0 ) +{ + m_session = WinHttpOpen( L"WinHTTP CrawlingWolf/0.0.1", + WINHTTP_ACCESS_TYPE_DEFAULT_PROXY, + WINHTTP_NO_PROXY_NAME, + WINHTTP_NO_PROXY_BYPASS, 0 ); +} + +WinHttpFetcher::~WinHttpFetcher( ) +{ + WinHttpCloseHandle( m_session ); +} + RewindInputStream *WinHttpFetcher::fetch( const URL url ) { WinHttpRewindInputStream *s = new WinHttpRewindInputStream( url ); return s; } -REGISTER_MODULE( "winhttp", Fetcher, WinHttpFetcher ) +REGISTER_MODULE( "winhttp_fetcher", Fetcher, WinHttpFetcher ) diff --git a/src/modules/fetcher/winhttp/WinHttpFetcher.hpp b/src/modules/fetcher/winhttp/WinHttpFetcher.hpp index a731da6..5854738 100755 --- a/src/modules/fetcher/winhttp/WinHttpFetcher.hpp +++ b/src/modules/fetcher/winhttp/WinHttpFetcher.hpp @@ -4,16 +4,23 @@ #include "Fetcher.hpp" #include "ModuleRegistry.hpp" +#define WIN32_MEAN_AND_LEAN +#include <windows.h> +#include <winhttp.h> + class WinHttpFetcher : public Fetcher { public: - WinHttpFetcher( ) { - } + WinHttpFetcher( ); - virtual ~WinHttpFetcher( ) { - } + virtual ~WinHttpFetcher( ); virtual RewindInputStream *fetch( const URL url ); + + HINTERNET &session( ) { return m_session; } + + private: + HINTERNET m_session; }; DECLARE_MODULE( Fetcher ) diff --git a/src/modules/frontier/memory/MemoryFrontier.cpp b/src/modules/frontier/memory/MemoryFrontier.cpp index ada78dd..2311353 100644 --- a/src/modules/frontier/memory/MemoryFrontier.cpp +++ b/src/modules/frontier/memory/MemoryFrontier.cpp @@ -1,3 +1,3 @@ #include "MemoryFrontier.hpp" -REGISTER_MODULE( "memory", Frontier, MemoryFrontier ) +REGISTER_MODULE( "memory_frontier", Frontier, MemoryFrontier ) diff --git a/src/modules/processor/htmllinkextract/HTMLLinkExtractProcessor.cpp b/src/modules/processor/htmllinkextract/HTMLLinkExtractProcessor.cpp index 78e7b31..f575a0c 100644 --- a/src/modules/processor/htmllinkextract/HTMLLinkExtractProcessor.cpp +++ b/src/modules/processor/htmllinkextract/HTMLLinkExtractProcessor.cpp @@ -68,4 +68,4 @@ void HTMLLinkExtractProcessor::process( RewindInputStream *s ) m_parser.Reset( ); } -REGISTER_MODULE_4( "htmllinkextract", Processor, HTMLLinkExtractProcessor, URLNormalizer *, Frontier *, URLFilter *, URLSeen * ) +REGISTER_MODULE_4( "htmllinkextract_processor", Processor, HTMLLinkExtractProcessor, URLNormalizer *, Frontier *, URLFilter *, URLSeen * ) diff --git a/src/modules/typedetect/libmagic/LibMagicTypeDetect.cpp b/src/modules/typedetect/libmagic/LibMagicTypeDetect.cpp index b65d6eb..cdc8926 100644 --- a/src/modules/typedetect/libmagic/LibMagicTypeDetect.cpp +++ b/src/modules/typedetect/libmagic/LibMagicTypeDetect.cpp @@ -61,4 +61,4 @@ MIMEType LibMagicTypeDetect::detect( RewindInputStream *s ) return MIMEType( res ); } -REGISTER_MODULE( "libmagic", TypeDetect, LibMagicTypeDetect ) +REGISTER_MODULE( "libmagic_typedetect", TypeDetect, LibMagicTypeDetect ) diff --git a/src/modules/urlfilter/chain/ChainURLFilter.cpp b/src/modules/urlfilter/chain/ChainURLFilter.cpp index fc2de93..4dcf493 100644 --- a/src/modules/urlfilter/chain/ChainURLFilter.cpp +++ b/src/modules/urlfilter/chain/ChainURLFilter.cpp @@ -16,4 +16,4 @@ bool ChainURLFilter::filter( const URL url ) return true; } -REGISTER_MODULE_1( "chain", URLFilter, ChainURLFilter, const std::list<URLFilter *> ) +REGISTER_MODULE_1( "chain_urlfiler", URLFilter, ChainURLFilter, const std::list<URLFilter *> ) diff --git a/src/modules/urlfilter/host/HostURLFilter.cpp b/src/modules/urlfilter/host/HostURLFilter.cpp index 6981a36..dd03910 100644 --- a/src/modules/urlfilter/host/HostURLFilter.cpp +++ b/src/modules/urlfilter/host/HostURLFilter.cpp @@ -18,4 +18,4 @@ bool HostURLFilter::filter( const URL url ) return res; } -REGISTER_MODULE_1( "host", URLFilter, HostURLFilter, const std::set<std::string> ) +REGISTER_MODULE_1( "host_urlfilter", URLFilter, HostURLFilter, const std::set<std::string> ) diff --git a/src/modules/urlfilter/protocol/ProtocolURLFilter.cpp b/src/modules/urlfilter/protocol/ProtocolURLFilter.cpp index e50dcc1..96168b7 100644 --- a/src/modules/urlfilter/protocol/ProtocolURLFilter.cpp +++ b/src/modules/urlfilter/protocol/ProtocolURLFilter.cpp @@ -18,4 +18,4 @@ bool ProtocolURLFilter::filter( const URL url ) return res; } -REGISTER_MODULE_1( "protocol", URLFilter, ProtocolURLFilter, const std::set<std::string> ) +REGISTER_MODULE_1( "protocol_urlfilter", URLFilter, ProtocolURLFilter, const std::set<std::string> ) diff --git a/src/modules/urlnormalizer/googleurl/GoogleURLNormalizer.cpp b/src/modules/urlnormalizer/googleurl/GoogleURLNormalizer.cpp index 7e5edde..3db781c 100755 --- a/src/modules/urlnormalizer/googleurl/GoogleURLNormalizer.cpp +++ b/src/modules/urlnormalizer/googleurl/GoogleURLNormalizer.cpp @@ -104,4 +104,4 @@ URL GoogleURLNormalizer::normalize( const URL url, const string s ) "", "" ); } -REGISTER_MODULE( "google", URLNormalizer, GoogleURLNormalizer ) +REGISTER_MODULE( "google_urlnormalizer", URLNormalizer, GoogleURLNormalizer ) diff --git a/src/modules/urlnormalizer/simpleurl/SimpleURLNormalizer.cpp b/src/modules/urlnormalizer/simpleurl/SimpleURLNormalizer.cpp index 87b3794..b2dd34f 100755 --- a/src/modules/urlnormalizer/simpleurl/SimpleURLNormalizer.cpp +++ b/src/modules/urlnormalizer/simpleurl/SimpleURLNormalizer.cpp @@ -150,4 +150,4 @@ void SimpleURLNormalizer::normalizePath( string &path ) } } -REGISTER_MODULE( "simple", URLNormalizer, SimpleURLNormalizer ) +REGISTER_MODULE( "simple_urlnormalizer", URLNormalizer, SimpleURLNormalizer ) diff --git a/src/modules/urlseen/memory/MemoryURLSeen.cpp b/src/modules/urlseen/memory/MemoryURLSeen.cpp index 15149e9..e7bc9e6 100644 --- a/src/modules/urlseen/memory/MemoryURLSeen.cpp +++ b/src/modules/urlseen/memory/MemoryURLSeen.cpp @@ -21,4 +21,4 @@ bool MemoryURLSeen::seen( const URL url ) return hasSeen; } -REGISTER_MODULE( "memory", URLSeen, MemoryURLSeen ) +REGISTER_MODULE( "memory_urlseen", URLSeen, MemoryURLSeen ) |