summaryrefslogtreecommitdiff
path: root/src/modules
diff options
context:
space:
mode:
authorAndreas Baumann <abaumann@yahoo.com>2012-08-12 16:30:33 +0200
committerAndreas Baumann <abaumann@yahoo.com>2012-08-12 16:30:33 +0200
commit561646e33cee7d2437b2732c484d2ea22312e925 (patch)
tree11e4899f6b413b179a48d57ef0576b25514463de /src/modules
parentbb12c70e96e7fa3b13335dbfe877dfc68861237f (diff)
downloadcrawler-561646e33cee7d2437b2732c484d2ea22312e925.tar.gz
crawler-561646e33cee7d2437b2732c484d2ea22312e925.tar.bz2
better naming of modules
Diffstat (limited to 'src/modules')
-rw-r--r--src/modules/deduper/null/NullDeduper.cpp2
-rw-r--r--src/modules/fetcher/file/FileFetcher.cpp2
-rw-r--r--src/modules/fetcher/libfetch/LibFetchFetcher.cpp2
-rwxr-xr-xsrc/modules/fetcher/winhttp/Makefile.W324
-rwxr-xr-xsrc/modules/fetcher/winhttp/WinHttpFetcher.cpp16
-rwxr-xr-xsrc/modules/fetcher/winhttp/WinHttpFetcher.hpp15
-rw-r--r--src/modules/frontier/memory/MemoryFrontier.cpp2
-rw-r--r--src/modules/processor/htmllinkextract/HTMLLinkExtractProcessor.cpp2
-rw-r--r--src/modules/typedetect/libmagic/LibMagicTypeDetect.cpp2
-rw-r--r--src/modules/urlfilter/chain/ChainURLFilter.cpp2
-rw-r--r--src/modules/urlfilter/host/HostURLFilter.cpp2
-rw-r--r--src/modules/urlfilter/protocol/ProtocolURLFilter.cpp2
-rwxr-xr-xsrc/modules/urlnormalizer/googleurl/GoogleURLNormalizer.cpp2
-rwxr-xr-xsrc/modules/urlnormalizer/simpleurl/SimpleURLNormalizer.cpp2
-rw-r--r--src/modules/urlseen/memory/MemoryURLSeen.cpp2
15 files changed, 41 insertions, 18 deletions
diff --git a/src/modules/deduper/null/NullDeduper.cpp b/src/modules/deduper/null/NullDeduper.cpp
index 9eca5c4..6d56cb0 100644
--- a/src/modules/deduper/null/NullDeduper.cpp
+++ b/src/modules/deduper/null/NullDeduper.cpp
@@ -1,3 +1,3 @@
#include "NullDeduper.hpp"
-REGISTER_MODULE( "null", Deduper, NullDeduper )
+REGISTER_MODULE( "null_deduper", Deduper, NullDeduper )
diff --git a/src/modules/fetcher/file/FileFetcher.cpp b/src/modules/fetcher/file/FileFetcher.cpp
index 8d66e14..b344b3f 100644
--- a/src/modules/fetcher/file/FileFetcher.cpp
+++ b/src/modules/fetcher/file/FileFetcher.cpp
@@ -9,4 +9,4 @@ RewindInputStream *FileFetcher::fetch( const URL url )
return s;
}
-REGISTER_MODULE( "file", Fetcher, FileFetcher )
+REGISTER_MODULE( "file_fetcher", Fetcher, FileFetcher )
diff --git a/src/modules/fetcher/libfetch/LibFetchFetcher.cpp b/src/modules/fetcher/libfetch/LibFetchFetcher.cpp
index 5b770a7..9cbc926 100644
--- a/src/modules/fetcher/libfetch/LibFetchFetcher.cpp
+++ b/src/modules/fetcher/libfetch/LibFetchFetcher.cpp
@@ -9,4 +9,4 @@ RewindInputStream *LibFetchFetcher::fetch( const URL url )
return s;
}
-REGISTER_MODULE( "libfetch", Fetcher, LibFetchFetcher )
+REGISTER_MODULE( "libfetch_fetcher", Fetcher, LibFetchFetcher )
diff --git a/src/modules/fetcher/winhttp/Makefile.W32 b/src/modules/fetcher/winhttp/Makefile.W32
index 49874e0..ddf751a 100755
--- a/src/modules/fetcher/winhttp/Makefile.W32
+++ b/src/modules/fetcher/winhttp/Makefile.W32
@@ -14,7 +14,9 @@ INCLUDE_DIRS = \
INCLUDE_LDFLAGS = \
INCLUDE_LIBS = \
- $(TOPDIR)\src\crawlingwolf.lib
+ $(TOPDIR)\src\crawlingwolf.lib \
+ WinHttp.lib
+
DYNAMIC_MODULE = \
mod_fetcher_winhttp.dll
diff --git a/src/modules/fetcher/winhttp/WinHttpFetcher.cpp b/src/modules/fetcher/winhttp/WinHttpFetcher.cpp
index 06ab550..a22ab1a 100755
--- a/src/modules/fetcher/winhttp/WinHttpFetcher.cpp
+++ b/src/modules/fetcher/winhttp/WinHttpFetcher.cpp
@@ -1,10 +1,24 @@
#include "WinHttpFetcher.hpp"
#include "WinHttpRewindInputStream.hpp"
+WinHttpFetcher::WinHttpFetcher( )
+ : m_session( 0 )
+{
+ m_session = WinHttpOpen( L"WinHTTP CrawlingWolf/0.0.1",
+ WINHTTP_ACCESS_TYPE_DEFAULT_PROXY,
+ WINHTTP_NO_PROXY_NAME,
+ WINHTTP_NO_PROXY_BYPASS, 0 );
+}
+
+WinHttpFetcher::~WinHttpFetcher( )
+{
+ WinHttpCloseHandle( m_session );
+}
+
RewindInputStream *WinHttpFetcher::fetch( const URL url )
{
WinHttpRewindInputStream *s = new WinHttpRewindInputStream( url );
return s;
}
-REGISTER_MODULE( "winhttp", Fetcher, WinHttpFetcher )
+REGISTER_MODULE( "winhttp_fetcher", Fetcher, WinHttpFetcher )
diff --git a/src/modules/fetcher/winhttp/WinHttpFetcher.hpp b/src/modules/fetcher/winhttp/WinHttpFetcher.hpp
index a731da6..5854738 100755
--- a/src/modules/fetcher/winhttp/WinHttpFetcher.hpp
+++ b/src/modules/fetcher/winhttp/WinHttpFetcher.hpp
@@ -4,16 +4,23 @@
#include "Fetcher.hpp"
#include "ModuleRegistry.hpp"
+#define WIN32_MEAN_AND_LEAN
+#include <windows.h>
+#include <winhttp.h>
+
class WinHttpFetcher : public Fetcher
{
public:
- WinHttpFetcher( ) {
- }
+ WinHttpFetcher( );
- virtual ~WinHttpFetcher( ) {
- }
+ virtual ~WinHttpFetcher( );
virtual RewindInputStream *fetch( const URL url );
+
+ HINTERNET &session( ) { return m_session; }
+
+ private:
+ HINTERNET m_session;
};
DECLARE_MODULE( Fetcher )
diff --git a/src/modules/frontier/memory/MemoryFrontier.cpp b/src/modules/frontier/memory/MemoryFrontier.cpp
index ada78dd..2311353 100644
--- a/src/modules/frontier/memory/MemoryFrontier.cpp
+++ b/src/modules/frontier/memory/MemoryFrontier.cpp
@@ -1,3 +1,3 @@
#include "MemoryFrontier.hpp"
-REGISTER_MODULE( "memory", Frontier, MemoryFrontier )
+REGISTER_MODULE( "memory_frontier", Frontier, MemoryFrontier )
diff --git a/src/modules/processor/htmllinkextract/HTMLLinkExtractProcessor.cpp b/src/modules/processor/htmllinkextract/HTMLLinkExtractProcessor.cpp
index 78e7b31..f575a0c 100644
--- a/src/modules/processor/htmllinkextract/HTMLLinkExtractProcessor.cpp
+++ b/src/modules/processor/htmllinkextract/HTMLLinkExtractProcessor.cpp
@@ -68,4 +68,4 @@ void HTMLLinkExtractProcessor::process( RewindInputStream *s )
m_parser.Reset( );
}
-REGISTER_MODULE_4( "htmllinkextract", Processor, HTMLLinkExtractProcessor, URLNormalizer *, Frontier *, URLFilter *, URLSeen * )
+REGISTER_MODULE_4( "htmllinkextract_processor", Processor, HTMLLinkExtractProcessor, URLNormalizer *, Frontier *, URLFilter *, URLSeen * )
diff --git a/src/modules/typedetect/libmagic/LibMagicTypeDetect.cpp b/src/modules/typedetect/libmagic/LibMagicTypeDetect.cpp
index b65d6eb..cdc8926 100644
--- a/src/modules/typedetect/libmagic/LibMagicTypeDetect.cpp
+++ b/src/modules/typedetect/libmagic/LibMagicTypeDetect.cpp
@@ -61,4 +61,4 @@ MIMEType LibMagicTypeDetect::detect( RewindInputStream *s )
return MIMEType( res );
}
-REGISTER_MODULE( "libmagic", TypeDetect, LibMagicTypeDetect )
+REGISTER_MODULE( "libmagic_typedetect", TypeDetect, LibMagicTypeDetect )
diff --git a/src/modules/urlfilter/chain/ChainURLFilter.cpp b/src/modules/urlfilter/chain/ChainURLFilter.cpp
index fc2de93..4dcf493 100644
--- a/src/modules/urlfilter/chain/ChainURLFilter.cpp
+++ b/src/modules/urlfilter/chain/ChainURLFilter.cpp
@@ -16,4 +16,4 @@ bool ChainURLFilter::filter( const URL url )
return true;
}
-REGISTER_MODULE_1( "chain", URLFilter, ChainURLFilter, const std::list<URLFilter *> )
+REGISTER_MODULE_1( "chain_urlfiler", URLFilter, ChainURLFilter, const std::list<URLFilter *> )
diff --git a/src/modules/urlfilter/host/HostURLFilter.cpp b/src/modules/urlfilter/host/HostURLFilter.cpp
index 6981a36..dd03910 100644
--- a/src/modules/urlfilter/host/HostURLFilter.cpp
+++ b/src/modules/urlfilter/host/HostURLFilter.cpp
@@ -18,4 +18,4 @@ bool HostURLFilter::filter( const URL url )
return res;
}
-REGISTER_MODULE_1( "host", URLFilter, HostURLFilter, const std::set<std::string> )
+REGISTER_MODULE_1( "host_urlfilter", URLFilter, HostURLFilter, const std::set<std::string> )
diff --git a/src/modules/urlfilter/protocol/ProtocolURLFilter.cpp b/src/modules/urlfilter/protocol/ProtocolURLFilter.cpp
index e50dcc1..96168b7 100644
--- a/src/modules/urlfilter/protocol/ProtocolURLFilter.cpp
+++ b/src/modules/urlfilter/protocol/ProtocolURLFilter.cpp
@@ -18,4 +18,4 @@ bool ProtocolURLFilter::filter( const URL url )
return res;
}
-REGISTER_MODULE_1( "protocol", URLFilter, ProtocolURLFilter, const std::set<std::string> )
+REGISTER_MODULE_1( "protocol_urlfilter", URLFilter, ProtocolURLFilter, const std::set<std::string> )
diff --git a/src/modules/urlnormalizer/googleurl/GoogleURLNormalizer.cpp b/src/modules/urlnormalizer/googleurl/GoogleURLNormalizer.cpp
index 7e5edde..3db781c 100755
--- a/src/modules/urlnormalizer/googleurl/GoogleURLNormalizer.cpp
+++ b/src/modules/urlnormalizer/googleurl/GoogleURLNormalizer.cpp
@@ -104,4 +104,4 @@ URL GoogleURLNormalizer::normalize( const URL url, const string s )
"", "" );
}
-REGISTER_MODULE( "google", URLNormalizer, GoogleURLNormalizer )
+REGISTER_MODULE( "google_urlnormalizer", URLNormalizer, GoogleURLNormalizer )
diff --git a/src/modules/urlnormalizer/simpleurl/SimpleURLNormalizer.cpp b/src/modules/urlnormalizer/simpleurl/SimpleURLNormalizer.cpp
index 87b3794..b2dd34f 100755
--- a/src/modules/urlnormalizer/simpleurl/SimpleURLNormalizer.cpp
+++ b/src/modules/urlnormalizer/simpleurl/SimpleURLNormalizer.cpp
@@ -150,4 +150,4 @@ void SimpleURLNormalizer::normalizePath( string &path )
}
}
-REGISTER_MODULE( "simple", URLNormalizer, SimpleURLNormalizer )
+REGISTER_MODULE( "simple_urlnormalizer", URLNormalizer, SimpleURLNormalizer )
diff --git a/src/modules/urlseen/memory/MemoryURLSeen.cpp b/src/modules/urlseen/memory/MemoryURLSeen.cpp
index 15149e9..e7bc9e6 100644
--- a/src/modules/urlseen/memory/MemoryURLSeen.cpp
+++ b/src/modules/urlseen/memory/MemoryURLSeen.cpp
@@ -21,4 +21,4 @@ bool MemoryURLSeen::seen( const URL url )
return hasSeen;
}
-REGISTER_MODULE( "memory", URLSeen, MemoryURLSeen )
+REGISTER_MODULE( "memory_urlseen", URLSeen, MemoryURLSeen )