summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorAndreas Baumann <abaumann@yahoo.com>2012-09-06 22:18:23 +0200
committerAndreas Baumann <abaumann@yahoo.com>2012-09-06 22:18:23 +0200
commit13fc9a7da5111f4ddba942d3c6b6b8654ce395d6 (patch)
treee86210e3d939911e35f930a6dc73c3ebb591243b /src
parentf5c586f7231f7e033c5528bcefea357e4e64441c (diff)
downloadcrawler-13fc9a7da5111f4ddba942d3c6b6b8654ce395d6.tar.gz
crawler-13fc9a7da5111f4ddba942d3c6b6b8654ce395d6.tar.bz2
more splitting into libcrawl, crawl binary
moved more public header to 'include' changed approach for dynamic linking on Windows
Diffstat (limited to 'src')
-rw-r--r--src/DNSResolver.hpp7
-rw-r--r--src/Deduper.hpp15
-rwxr-xr-xsrc/Fetcher.hpp15
-rw-r--r--src/Frontier.hpp16
-rwxr-xr-xsrc/GNUmakefile54
-rw-r--r--src/MIMEType.hpp100
-rwxr-xr-xsrc/Makefile.W3253
-rw-r--r--src/Processor.hpp13
-rwxr-xr-xsrc/RewindInputStream.hpp31
-rwxr-xr-xsrc/SpoolRewindInputStream.hpp51
-rw-r--r--src/URL.hpp138
-rw-r--r--src/URLFilter.hpp14
-rw-r--r--src/URLNormalizer.hpp17
-rw-r--r--src/URLSeen.hpp12
-rwxr-xr-xsrc/crawl/GNUmakefile55
-rwxr-xr-xsrc/crawl/Makefile.W3239
-rwxr-xr-xsrc/crawl/crawl.cpp (renamed from src/crawl.cpp)0
-rwxr-xr-xsrc/libcrawler/GNUmakefile42
-rw-r--r--src/libcrawler/MIMEType.cpp (renamed from src/MIMEType.cpp)0
-rwxr-xr-xsrc/libcrawler/Makefile.W3245
-rw-r--r--src/libcrawler/SpoolRewindInputStream.cpp (renamed from src/SpoolRewindInputStream.cpp)0
-rw-r--r--src/libcrawler/URL.cpp (renamed from src/URL.cpp)0
-rwxr-xr-xsrc/libcrawler/win32/errormsg.cpp (renamed from src/win32/errormsg.cpp)2
-rwxr-xr-xsrc/libcrawler/win32/stringutils.cpp (renamed from src/win32/stringutils.cpp)2
-rwxr-xr-xsrc/logger/Makefile.W322
-rwxr-xr-xsrc/modules/deduper/null/Makefile.W3213
-rwxr-xr-xsrc/modules/fetcher/file/Makefile.W325
-rwxr-xr-xsrc/modules/fetcher/winhttp/Makefile.W325
-rwxr-xr-xsrc/modules/frontier/memory/Makefile.W325
-rwxr-xr-xsrc/modules/urlfilter/chain/Makefile.W324
-rwxr-xr-xsrc/modules/urlfilter/host/Makefile.W325
-rwxr-xr-xsrc/modules/urlfilter/protocol/Makefile.W325
-rwxr-xr-xsrc/modules/urlnormalizer/googleurl/Makefile.W323
-rwxr-xr-xsrc/modules/urlnormalizer/simpleurl/Makefile.W325
-rwxr-xr-xsrc/win32/errormsg.hpp8
-rwxr-xr-xsrc/win32/stringutils.hpp8
36 files changed, 222 insertions, 567 deletions
diff --git a/src/DNSResolver.hpp b/src/DNSResolver.hpp
deleted file mode 100644
index 8f79734..0000000
--- a/src/DNSResolver.hpp
+++ /dev/null
@@ -1,7 +0,0 @@
-#ifndef __DNSRESOLVER_H
-#define __DNSRESOLVER_H
-
-class DNSResolver {
-};
-
-#endif
diff --git a/src/Deduper.hpp b/src/Deduper.hpp
deleted file mode 100644
index 3cb33c1..0000000
--- a/src/Deduper.hpp
+++ /dev/null
@@ -1,15 +0,0 @@
-#ifndef __DEDUPER_H
-#define __DEDUPER_H
-
-#include "URL.hpp"
-#include "RewindInputStream.hpp"
-
-class Deduper
-{
- public:
- virtual ~Deduper( ) { };
-
- virtual bool contentSeen( const URL url, RewindInputStream *s ) = 0;
-};
-
-#endif
diff --git a/src/Fetcher.hpp b/src/Fetcher.hpp
deleted file mode 100755
index 40f1c7a..0000000
--- a/src/Fetcher.hpp
+++ /dev/null
@@ -1,15 +0,0 @@
-#ifndef __FETCHER_H
-#define __FETCHER_H
-
-#include "URL.hpp"
-#include "RewindInputStream.hpp"
-
-class Fetcher
-{
- public:
- virtual ~Fetcher( ) { };
-
- virtual RewindInputStream *fetch( const URL url ) = 0;
-};
-
-#endif
diff --git a/src/Frontier.hpp b/src/Frontier.hpp
deleted file mode 100644
index 54c0dd6..0000000
--- a/src/Frontier.hpp
+++ /dev/null
@@ -1,16 +0,0 @@
-#ifndef __FRONTIER_H
-#define __FRONTIER_H
-
-#include "URL.hpp"
-
-class Frontier
-{
- public:
- virtual ~Frontier( ) { };
-
- virtual URL getNextUrl( ) = 0;
-
- virtual void addUrl( const URL url ) = 0;
-};
-
-#endif
diff --git a/src/GNUmakefile b/src/GNUmakefile
index 97b934a..7367ce2 100755
--- a/src/GNUmakefile
+++ b/src/GNUmakefile
@@ -1,69 +1,17 @@
TOPDIR = ..
-SUBDIRS = logger modules
-
--include $(TOPDIR)/makefiles/gmake/platform.mk
-
-INCLUDE_CPPFLAGS = \
-
-
-INCLUDE_DIRS = \
- -I. \
- -I$(TOPDIR)/include/logger \
- -I$(TOPDIR)/include/util \
- -I$(TOPDIR)/include/module
-
-INCLUDE_LDFLAGS = \
- -L$(TOPDIR)/src/logger
-
-INCLUDE_LIBS = \
- -llogger
-
-# openssl
-ifeq ($(WITH_SSL),1)
-
-INCLUDE_CFLAGS += \
- -DWITH_SSL
-
-INCLUDE_LIBS += \
- $(OPENSSL_LIBS)
-endif
-
-STATIC_LIB = libcrawler.a
-
-DYNAMIC_LIB = libcrawler.so
-DYNAMIC_LIB_MAJOR = 0
-DYNAMIC_LIB_MINOR = 0
-DYNAMIC_LIB_PATCH = 0
-
-CPP_OBJS = \
- URL.o \
- MIMEType.o \
- SpoolRewindInputStream.o
-
-CPP_BINS = \
- crawl$(EXE)
+SUBDIRS = logger libcrawler modules crawl
-include $(TOPDIR)/makefiles/gmake/sub.mk
local_all:
-modules: $(DYNAMIC_LIB)
-
local_clean:
- @-rm -f $(LOCAL_STATIC_LIB)
local_distclean:
local_install:
- $(INSTALL) -d -m 0755 $(DESTDIR)$(bindir)
- $(INSTALL) -m 0775 crawl$(EXE) $(DESTDIR)$(bindir)
local_uninstall:
- @-rm -f $(DESTDIR)$(bindir)/crawl
- @-rmdir $(DESTDIR)$(bindir)
local_test:
-
-run:
- @LD_LIBRARY_PATH=$(TOPDIR)/src:$(TOPDIR)/src/logger:$(TOPDIR)/googleurl:$(TOPDIR)/libfetch:$(TOPDIR)/streamhtmlparser ./crawl
diff --git a/src/MIMEType.hpp b/src/MIMEType.hpp
deleted file mode 100644
index 3a628ca..0000000
--- a/src/MIMEType.hpp
+++ /dev/null
@@ -1,100 +0,0 @@
-#ifndef __MIMETYPE_H
-#define __MIMETYPE_H
-
-#include <string>
-#include <cstring>
-#include <iostream>
-#include <sstream>
-
-class MIMEType {
- protected:
- std::string m_type;
- std::string m_subtype;
-
- public:
- MIMEType( )
- : m_type( "" ), m_subtype( "" )
- {
- }
-
- MIMEType( const std::string _type, const std::string _subtype )
- : m_type( _type ), m_subtype( _subtype )
- {
- }
-
- MIMEType( const MIMEType &m )
- : m_type( m.m_type ), m_subtype( m.m_subtype )
- {
- }
-
- MIMEType( const char *s )
- {
- const char *pos;
- if( ( pos = strchr( s, '/' ) ) == NULL ) {
- *this = Null;
- } else {
- m_type = std::string( s, 0, pos - s );
- m_subtype = std::string( s, pos - s + 1, strlen( s ) - ( pos - s + 1 ) );
- }
- }
-
- MIMEType& operator=( const MIMEType &m )
- {
- if( this != &m ) {
- this->m_type = m.m_type;
- this->m_subtype = m.m_subtype;
- }
- return *this;
- }
-
- const std::string type( ) const
- {
- return m_type;
- }
-
- const std::string subtype( ) const
- {
- return m_subtype;
- }
-
- std::string str( ) const
- {
- std::ostringstream os;
- os << *this;
- return os.str( );
- }
-
- static MIMEType Null;
-
- bool operator!=( const MIMEType &other ) const
- {
- return( str( ) != other.str( ) );
- }
-
- bool operator==( const MIMEType &other ) const
- {
- return( str( ) == other.str( ) );
- }
-
- bool operator<( const MIMEType &other ) const
- {
- return( str( ) < other.str( ) );
- }
-
- template< typename CharT, typename TraitsT > friend
- std::basic_ostream< CharT, TraitsT >& operator<<( std::basic_ostream<CharT, TraitsT>&s, const MIMEType& m );
-};
-
-template< typename CharT, typename TraitsT >
-inline std::basic_ostream< CharT, TraitsT >& operator<<( std::basic_ostream< CharT, TraitsT > &s, const MIMEType &m )
-{
- if( m.type( ).empty( ) ) {
- return s;
- }
-
- s << m.type( ) << "/" << m.subtype( );
-
- return s;
-}
-
-#endif
diff --git a/src/Makefile.W32 b/src/Makefile.W32
index ffcbe27..f3702cf 100755
--- a/src/Makefile.W32
+++ b/src/Makefile.W32
@@ -1,58 +1,12 @@
TOPDIR = ..
-SUBDIRS = logger modules
-
-!INCLUDE $(TOPDIR)\makefiles\nmake\platform.mk
-
-INCLUDE_CXXFLAGS = \
- /D_WIN32_WINNT=0x504
-
-INCLUDE_DIRS = \
- /I. \
- /I$(TOPDIR)\include\logger \
- /I$(TOPDIR)\include\module \
- /I$(TOPDIR)\include\util
-
-INCLUDE_LDFLAGS = \
-
-INCLUDE_LIBS = \
- $(TOPDIR)\src\logger\logger.lib \
- Ws2_32.lib
-
-LOCAL_STATIC_LIB_OBJS = \
- win32\errormsg.obj \
- win32\stringutils.obj \
- URL.obj \
- MIMEType.obj \
- SpoolRewindInputStream.obj
-
-LOCAL_STATIC_LIB = \
- crawler.lib
-
-CPP_OBJS = \
- $(LOCAL_STATIC_LIB_OBJS)
-
-CPP_BINS = \
- crawl.exe
+SUBDIRS = logger libcrawler modules crawl
!INCLUDE $(TOPDIR)\makefiles\nmake\sub.mk
-all: dependencies $(CPP_OBJS) $(CPP_BINS)
-
-dependencies:
- cd logger & $(MAKE) /nologo /f Makefile.w32 all
-
-crawl.exe: crawl.obj $(CPP_OBJS)
-
-$(LOCAL_STATIC_LIB): $(LOCAL_STATIC_LIB_OBJS)
- $(LINK) /lib /nologo /out:$@ $(STATIC_LDFLAGS) $?
-
-local_all: $(LOCAL_STATIC_LIB) $(CPP_BINS)
+local_all:
local_clean:
- @-erase $(LOCAL_STATIC_LIB) 2>NUL
- @-erase $(CPP_OBJS) win32\*.obj 2>NUL
- @-erase test.bat 2>NUL
local_distclean:
@@ -62,6 +16,7 @@ copy_prereq:
@-copy "$(ICU_DIR)\bin\icuuc49.dll" . >NUL
@-copy "$(ICU_DIR)\bin\icudt49.dll" . >NUL
@-copy "$(TOPDIR)\src\logger\logger.dll" . >NUL
+ @-copy "$(TOPDIR)\src\libcrawler\crawler.dll" . >NUL
run: copy_prereq
- @-crawl.exe
+ @-crawl\crawl.exe
diff --git a/src/Processor.hpp b/src/Processor.hpp
deleted file mode 100644
index bc17ec0..0000000
--- a/src/Processor.hpp
+++ /dev/null
@@ -1,13 +0,0 @@
-#ifndef __PROCESSOR_H
-#define __PROCESSOR_H
-
-#include "RewindInputStream.hpp"
-
-class Processor {
- public:
- virtual ~Processor( ) { }
-
- virtual void process( RewindInputStream *s ) = 0;
-};
-
-#endif
diff --git a/src/RewindInputStream.hpp b/src/RewindInputStream.hpp
deleted file mode 100755
index 39d7b6e..0000000
--- a/src/RewindInputStream.hpp
+++ /dev/null
@@ -1,31 +0,0 @@
-#ifndef __REWIND_INPUT_STREAM_H
-#define __REWIND_INPUT_STREAM_H
-
-#include "URL.hpp"
-
-#include <iostream>
-#include <string>
-
-class RewindInputStream : public std::istream {
- public:
- const URL getBaseUrl( ) const
- {
- return m_baseUrl;
- }
-
- virtual void rewind( ) = 0;
-
- virtual std::string lastErrMsg( ) const = 0;
-
- protected:
-
- RewindInputStream( const URL &url )
- : std::istream( 0 ), m_baseUrl( url )
- {
- }
-
- private:
- URL m_baseUrl;
-};
-
-#endif
diff --git a/src/SpoolRewindInputStream.hpp b/src/SpoolRewindInputStream.hpp
deleted file mode 100755
index aff593d..0000000
--- a/src/SpoolRewindInputStream.hpp
+++ /dev/null
@@ -1,51 +0,0 @@
-#ifndef __SPOOLREWINDINPUTSTREAM_H
-#define __SPOOLREWINDINPUTSTREAM_H
-
-#include "RewindInputStream.hpp"
-
-#include <iostream>
-#include <fstream>
-#include <vector>
-
-class spool_streambuf : public std::streambuf
-{
- public:
- explicit spool_streambuf( size_t bufSize = 256, size_t putBack = 1, size_t spoolBufSize = 8192 );
-
- ~spool_streambuf( );
-
- void rewind( );
-
- protected:
- virtual std::streambuf::int_type readFromSource( ) = 0;
-
- private:
- int_type underflow( );
-
- private:
- const size_t m_putBack;
- std::vector<char> m_spoolBuf;
- size_t m_spoolBufPos;
- size_t m_spoolBufSize;
- std::fstream m_spoolFile;
- enum { TO_SPOOL_MEMORY = 1, TO_SPOOL_FILE = 2, FROM_SPOOL_MEMORY = 3, FROM_SPOOL_FILE = 4 } m_state;
-
- protected:
- std::vector<char> m_buf;
- char *m_base;
- char *m_start;
-};
-
-class SpoolRewindInputStream : public RewindInputStream
-{
- public:
- SpoolRewindInputStream( const URL &url );
- virtual ~SpoolRewindInputStream( );
-
- virtual void rewind( );
-
- protected:
- spool_streambuf *m_buf;
-};
-
-#endif
diff --git a/src/URL.hpp b/src/URL.hpp
deleted file mode 100644
index 5cbd733..0000000
--- a/src/URL.hpp
+++ /dev/null
@@ -1,138 +0,0 @@
-#ifndef __URL_H
-#define __URL_H
-
-#include <string>
-#include <iostream>
-#include <sstream>
-
-using namespace std;
-
-class URL {
- protected:
- string m_protocol;
- string m_host;
- unsigned short m_port;
- string m_path;
- string m_query;
- string m_fragment;
-
- public:
- URL( )
- : m_protocol( "" ), m_host( "" ), m_port( 0 ), m_path( "" ), m_query( "" ), m_fragment( "" )
- {
- }
-
- URL( const URL& url )
- : m_protocol( url.m_protocol ), m_host( url.m_host ), m_port( url.m_port ), m_path( url.m_path ), m_query( url.m_query ), m_fragment( url.m_fragment )
- {
- }
-
- URL( const std::string _protocol, const std::string _host, const unsigned short _port, const std::string _path, const std::string _query, const std::string _fragment )
- : m_protocol( _protocol ), m_host( _host ), m_port( _port ), m_path( _path ), m_query( _query ), m_fragment( _fragment )
- {
- }
-
- URL& operator=( const URL& u ) {
- if( this != &u ) {
- this->m_protocol = u.m_protocol;
- this->m_port = u.m_port;
- this->m_host = u.m_host;
- this->m_path = u.m_path;
- this->m_query = u.m_query;
- this->m_fragment = u.m_fragment;
- }
- return *this;
- }
-
- const string protocol( ) const
- {
- return m_protocol;
- }
-
- const string host( ) const
- {
- return m_host;
- }
-
- unsigned short port( ) const
- {
- return m_port;
- }
-
- const string path( ) const
- {
- return m_path;
- }
-
- const string query( ) const
- {
- return m_query;
- }
-
- std::string fragment( ) const
- {
- return m_fragment;
- }
-
- std::string str( ) const
- {
- std::ostringstream os;
- os << *this;
- return os.str( );
- }
-
- static URL Null;
-
- bool operator!=( const URL &other ) const
- {
- return( str( ) != other.str( ) );
- }
-
- bool operator==( const URL &other ) const
- {
- return( str( ) == other.str( ) );
- }
-
- bool operator<( const URL &other ) const
- {
- return( str( ) < other.str( ) );
- }
-
- template< typename CharT, typename TraitsT > friend
- basic_ostream<CharT, TraitsT>& operator<<( basic_ostream<CharT, TraitsT>&s, const URL& u );
-
- static unsigned short defaultPort( const std::string p )
- {
- if( p == "http" ) return 80;
- else if( p == "https" ) return 443;
- else if( p == "ftp" ) return 21;
- else return 0;
- }
-};
-
-template< typename CharT, typename TraitsT >
-inline basic_ostream<CharT, TraitsT>& operator<<( basic_ostream<CharT, TraitsT>&s, const URL& u ) {
- if( u.protocol( ).empty( ) ) {
- return s;
- }
-
- s << u.protocol( ) << "://" << u.host( );
-
- if( u.port( ) != URL::defaultPort( u.protocol( ) ) ) {
- s << ":" << u.port( );
- }
-
- s << u.path( );
-
- if( !u.query( ).empty( ) ) {
- s << "?" << u.query( );
- }
-
- if( !u.fragment( ).empty( ) ) {
- s << "#" << u.fragment( );
- }
-
- return s;
-}
-
-#endif
diff --git a/src/URLFilter.hpp b/src/URLFilter.hpp
deleted file mode 100644
index 2136009..0000000
--- a/src/URLFilter.hpp
+++ /dev/null
@@ -1,14 +0,0 @@
-#ifndef __URLFILTER_H
-#define __URLFILTER_H
-
-#include "URL.hpp"
-
-class URLFilter
-{
- public:
- virtual ~URLFilter( ) { };
-
- virtual bool filter( const URL url ) = 0;
-};
-
-#endif
diff --git a/src/URLNormalizer.hpp b/src/URLNormalizer.hpp
deleted file mode 100644
index af1781a..0000000
--- a/src/URLNormalizer.hpp
+++ /dev/null
@@ -1,17 +0,0 @@
-#ifndef __URLNORMALIZER_H
-#define __URLNORMALIZER_H
-
-#include <string>
-
-#include "URL.hpp"
-
-class URLNormalizer {
- public:
- virtual ~URLNormalizer( ) { };
-
- virtual URL parseUrl( const std::string s ) = 0;
-
- virtual URL normalize( const URL url, const std::string s ) = 0;
-};
-
-#endif
diff --git a/src/URLSeen.hpp b/src/URLSeen.hpp
deleted file mode 100644
index 742c863..0000000
--- a/src/URLSeen.hpp
+++ /dev/null
@@ -1,12 +0,0 @@
-#ifndef __URLSEEN_H
-#define __URLSEEN_H
-
-#include "URL.hpp"
-
-class URLSeen {
- public:
- virtual ~URLSeen( ) { };
- virtual bool seen( const URL url ) = 0;
-};
-
-#endif
diff --git a/src/crawl/GNUmakefile b/src/crawl/GNUmakefile
new file mode 100755
index 0000000..6899fde
--- /dev/null
+++ b/src/crawl/GNUmakefile
@@ -0,0 +1,55 @@
+TOPDIR = ../..
+
+SUBDIRS =
+
+-include $(TOPDIR)/makefiles/gmake/platform.mk
+
+INCLUDE_CPPFLAGS = \
+
+INCLUDE_DIRS = \
+ -I. \
+ -I$(TOPDIR)/include/logger \
+ -I$(TOPDIR)/include/util \
+ -I$(TOPDIR)/include/module
+
+INCLUDE_LDFLAGS = \
+ -L$(TOPDIR)/src/logger
+
+INCLUDE_LIBS = \
+ -llogger
+
+# openssl
+ifeq ($(WITH_SSL),1)
+
+INCLUDE_CFLAGS += \
+ -DWITH_SSL
+
+INCLUDE_LIBS += \
+ $(OPENSSL_LIBS)
+endif
+
+CPP_OBJS = \
+
+CPP_BINS = \
+ crawl$(EXE)
+
+-include $(TOPDIR)/makefiles/gmake/sub.mk
+
+local_all:
+
+local_clean:
+
+local_distclean:
+
+local_install:
+ $(INSTALL) -d -m 0755 $(DESTDIR)$(bindir)
+ $(INSTALL) -m 0775 crawl$(EXE) $(DESTDIR)$(bindir)
+
+local_uninstall:
+ @-rm -f $(DESTDIR)$(bindir)/crawl
+ @-rmdir $(DESTDIR)$(bindir)
+
+local_test:
+
+run:
+ @LD_LIBRARY_PATH=$(TOPDIR)/src:$(TOPDIR)/src/logger:$(TOPDIR)/googleurl:$(TOPDIR)/libfetch:$(TOPDIR)/streamhtmlparser ./crawl
diff --git a/src/crawl/Makefile.W32 b/src/crawl/Makefile.W32
new file mode 100755
index 0000000..74442dc
--- /dev/null
+++ b/src/crawl/Makefile.W32
@@ -0,0 +1,39 @@
+TOPDIR = ..\..
+
+SUBDIRS =
+
+!INCLUDE $(TOPDIR)\makefiles\nmake\platform.mk
+
+INCLUDE_CXXFLAGS = \
+ /D_WIN32_WINNT=0x504 \
+ /DSHARED
+
+INCLUDE_DIRS = \
+ /I. \
+ /I$(TOPDIR)\include\logger \
+ /I$(TOPDIR)\include\module \
+ /I$(TOPDIR)\include\util \
+ /I$(TOPDIR)\include\crawler
+
+INCLUDE_LDFLAGS = \
+
+INCLUDE_LIBS = \
+ $(TOPDIR)\src\logger\logger.lib \
+ $(TOPDIR)\src\libcrawler\crawler.lib
+
+CPP_OBJS = \
+
+CPP_BINS = \
+ crawl.exe
+
+!INCLUDE $(TOPDIR)\makefiles\nmake\sub.mk
+
+crawl.exe: crawl.obj
+
+local_all: $(CPP_BINS)
+
+local_clean:
+
+local_distclean:
+
+local_test:
diff --git a/src/crawl.cpp b/src/crawl/crawl.cpp
index 823ed02..823ed02 100755
--- a/src/crawl.cpp
+++ b/src/crawl/crawl.cpp
diff --git a/src/libcrawler/GNUmakefile b/src/libcrawler/GNUmakefile
new file mode 100755
index 0000000..c1e7a7f
--- /dev/null
+++ b/src/libcrawler/GNUmakefile
@@ -0,0 +1,42 @@
+TOPDIR = ../..
+
+SUBDIRS =
+
+-include $(TOPDIR)/makefiles/gmake/platform.mk
+
+INCLUDE_CPPFLAGS = \
+
+INCLUDE_LDFLAGS = \
+
+INCLUDE_DIRS = \
+ -I. \
+ -I$(TOPDIR)/include/logger \
+ -I$(TOPDIR)/include/util
+
+INCLUDE_LIBS = \
+
+STATIC_LIB = libcrawler.a
+
+DYNAMIC_LIB = libcrawler.so
+DYNAMIC_LIB_MAJOR = 0
+DYNAMIC_LIB_MINOR = 0
+DYNAMIC_LIB_PATCH = 0
+
+CPP_OBJS = \
+ URL.o \
+ MIMEType.o \
+ SpoolRewindInputStream.o
+
+-include $(TOPDIR)/makefiles/gmake/sub.mk
+
+local_all:
+
+local_clean:
+
+local_distclean:
+
+local_install:
+
+local_uninstall:
+
+local_test:
diff --git a/src/MIMEType.cpp b/src/libcrawler/MIMEType.cpp
index 25dc20c..25dc20c 100644
--- a/src/MIMEType.cpp
+++ b/src/libcrawler/MIMEType.cpp
diff --git a/src/libcrawler/Makefile.W32 b/src/libcrawler/Makefile.W32
new file mode 100755
index 0000000..ab18d2c
--- /dev/null
+++ b/src/libcrawler/Makefile.W32
@@ -0,0 +1,45 @@
+TOPDIR = ..\..
+
+SUBDIRS =
+
+!INCLUDE $(TOPDIR)\makefiles\nmake\platform.mk
+
+INCLUDE_CXXFLAGS = \
+ /D_WIN32_WINNT=0x504 \
+ /DBUILDING_CRAWLER
+
+INCLUDE_DIRS = \
+ /I. \
+ /I$(TOPDIR)\include\crawler \
+ /I$(TOPDIR)\include\logger \
+ /I$(TOPDIR)\include\util
+
+INCLUDE_LDFLAGS = \
+
+INCLUDE_LIBS = \
+ $(TOPDIR)\src\logger\logger.lib
+
+CPP_OBJS = \
+ win32\errormsg.dllobj \
+ win32\stringutils.dllobj \
+ URL.dllobj \
+ MIMEType.dllobj \
+ SpoolRewindInputStream.dllobj
+
+DYNAMIC_LIB = \
+ crawler.dll
+
+!INCLUDE $(TOPDIR)\makefiles\nmake\sub.mk
+
+$(DYNAMIC_LIB): $(CPP_OBJS)
+ $(LINK) /nologo /dll /out:$@ $(LDFLAGS) $(LIBS) $?
+
+local_all: $(DYNAMIC_LIB)
+
+local_clean:
+ @-erase $(DYNAMIC_LIB) 2>NUL
+ @-erase win32\*.obj 2>NUL
+
+local_distclean:
+
+local_test:
diff --git a/src/SpoolRewindInputStream.cpp b/src/libcrawler/SpoolRewindInputStream.cpp
index 9135741..9135741 100644
--- a/src/SpoolRewindInputStream.cpp
+++ b/src/libcrawler/SpoolRewindInputStream.cpp
diff --git a/src/URL.cpp b/src/libcrawler/URL.cpp
index f208500..f208500 100644
--- a/src/URL.cpp
+++ b/src/libcrawler/URL.cpp
diff --git a/src/win32/errormsg.cpp b/src/libcrawler/win32/errormsg.cpp
index 1b58ea3..c0a65d8 100755
--- a/src/win32/errormsg.cpp
+++ b/src/libcrawler/win32/errormsg.cpp
@@ -1,4 +1,4 @@
-#include "errormsg.hpp"
+#include "win32/errormsg.hpp"
using namespace std;
diff --git a/src/win32/stringutils.cpp b/src/libcrawler/win32/stringutils.cpp
index a82dd7a..607735c 100755
--- a/src/win32/stringutils.cpp
+++ b/src/libcrawler/win32/stringutils.cpp
@@ -1,4 +1,4 @@
-#include "errormsg.hpp"
+#include "win32/stringutils.hpp"
using namespace std;
diff --git a/src/logger/Makefile.W32 b/src/logger/Makefile.W32
index 3f9352d..d5516ca 100755
--- a/src/logger/Makefile.W32
+++ b/src/logger/Makefile.W32
@@ -6,7 +6,7 @@ SUBDIRS =
INCLUDE_CXXFLAGS = \
/D_WIN32_WINNT=0x504 \
- /DSHARED
+ /DSHARED /DBUILDING_LOGGER /DBUILDING_UTIL
INCLUDE_DIRS = \
/I. \
diff --git a/src/modules/deduper/null/Makefile.W32 b/src/modules/deduper/null/Makefile.W32
index e01235f..e0d6c60 100755
--- a/src/modules/deduper/null/Makefile.W32
+++ b/src/modules/deduper/null/Makefile.W32
@@ -11,12 +11,21 @@ INCLUDE_DIRS = \
/I. \
/I$(TOPDIR)\src \
/I$(TOPDIR)\include\module \
- /I$(TOPDIR)\include\util
+ /I$(TOPDIR)\include\util \
+ /I$(TOPDIR)\include\crawler
+
+copy_prereq:
+ @-copy "$(ICU_DIR)\bin\icuuc49.dll" . >NUL
+ @-copy "$(ICU_DIR)\bin\icudt49.dll" . >NUL
+ @-copy "$(TOPDIR)\src\logger\logger.dll" . >NUL
+
+run: copy_prereq
+ @-crawl.exe
INCLUDE_LDFLAGS = \
INCLUDE_LIBS = \
- $(TOPDIR)\src\crawler.lib
+ $(TOPDIR)\src\libcrawler\crawler.lib
DYNAMIC_MODULE = \
mod_deduper_null.dll
diff --git a/src/modules/fetcher/file/Makefile.W32 b/src/modules/fetcher/file/Makefile.W32
index 3203d6d..a77daa6 100755
--- a/src/modules/fetcher/file/Makefile.W32
+++ b/src/modules/fetcher/file/Makefile.W32
@@ -11,12 +11,13 @@ INCLUDE_DIRS = \
/I. \
/I$(TOPDIR)\src \
/I$(TOPDIR)\include\module \
- /I$(TOPDIR)\include\util
+ /I$(TOPDIR)\include\util \
+ /I$(TOPDIR)\include\crawler
INCLUDE_LDFLAGS = \
INCLUDE_LIBS = \
- $(TOPDIR)\src\crawler.lib
+ $(TOPDIR)\src\libcrawler\crawler.lib
DYNAMIC_MODULE = \
mod_fetcher_file.dll
diff --git a/src/modules/fetcher/winhttp/Makefile.W32 b/src/modules/fetcher/winhttp/Makefile.W32
index b46aa88..4cedb9c 100755
--- a/src/modules/fetcher/winhttp/Makefile.W32
+++ b/src/modules/fetcher/winhttp/Makefile.W32
@@ -12,12 +12,13 @@ INCLUDE_DIRS = \
/I$(TOPDIR)\src \
/I$(TOPDIR)\include\module \
/I$(TOPDIR)\include\util \
- /I$(TOPDIR)\include\logger
+ /I$(TOPDIR)\include\logger \
+ /I$(TOPDIR)\include\crawler
INCLUDE_LDFLAGS = \
INCLUDE_LIBS = \
- $(TOPDIR)\src\crawler.lib \
+ $(TOPDIR)\src\libcrawler\crawler.lib \
$(TOPDIR)\src\logger\logger.lib \
WinHttp.lib
diff --git a/src/modules/frontier/memory/Makefile.W32 b/src/modules/frontier/memory/Makefile.W32
index b44d95f..b12e4ca 100755
--- a/src/modules/frontier/memory/Makefile.W32
+++ b/src/modules/frontier/memory/Makefile.W32
@@ -12,12 +12,13 @@ INCLUDE_DIRS = \
/I$(TOPDIR)\src \
/I$(TOPDIR)\include\module \
/I$(TOPDIR)\include\util \
- /I$(TOPDIR)\include\logger
+ /I$(TOPDIR)\include\logger \
+ /I$(TOPDIR)\include\crawler
INCLUDE_LDFLAGS = \
INCLUDE_LIBS = \
- $(TOPDIR)\src\crawler.lib \
+ $(TOPDIR)\src\libcrawler\crawler.lib \
$(TOPDIR)\src\logger\logger.lib
DYNAMIC_MODULE = \
diff --git a/src/modules/urlfilter/chain/Makefile.W32 b/src/modules/urlfilter/chain/Makefile.W32
index 5a766ab..d3ad373 100755
--- a/src/modules/urlfilter/chain/Makefile.W32
+++ b/src/modules/urlfilter/chain/Makefile.W32
@@ -12,12 +12,12 @@ INCLUDE_DIRS = \
/I$(TOPDIR)\src \
/I$(TOPDIR)\include\module \
/I$(TOPDIR)\include\util \
- /I$(TOPDIR)\include\logger
+ /I$(TOPDIR)\include\crawler
INCLUDE_LDFLAGS = \
INCLUDE_LIBS = \
- $(TOPDIR)\src\crawler.lib
+ $(TOPDIR)\src\libcrawler\crawler.lib
DYNAMIC_MODULE = \
mod_urlfilter_chain.dll
diff --git a/src/modules/urlfilter/host/Makefile.W32 b/src/modules/urlfilter/host/Makefile.W32
index 3b99125..024dc67 100755
--- a/src/modules/urlfilter/host/Makefile.W32
+++ b/src/modules/urlfilter/host/Makefile.W32
@@ -12,12 +12,13 @@ INCLUDE_DIRS = \
/I$(TOPDIR)\src \
/I$(TOPDIR)\include\module \
/I$(TOPDIR)\include\util \
- /I$(TOPDIR)\include\logger
+ /I$(TOPDIR)\include\logger \
+ /I$(TOPDIR)\include\crawler
INCLUDE_LDFLAGS = \
INCLUDE_LIBS = \
- $(TOPDIR)\src\crawler.lib \
+ $(TOPDIR)\src\libcrawler\crawler.lib \
$(TOPDIR)\src\logger\logger.lib
DYNAMIC_MODULE = \
diff --git a/src/modules/urlfilter/protocol/Makefile.W32 b/src/modules/urlfilter/protocol/Makefile.W32
index 747d714..e859829 100755
--- a/src/modules/urlfilter/protocol/Makefile.W32
+++ b/src/modules/urlfilter/protocol/Makefile.W32
@@ -12,12 +12,13 @@ INCLUDE_DIRS = \
/I$(TOPDIR)\src \
/I$(TOPDIR)\include\module \
/I$(TOPDIR)\include\util \
- /I$(TOPDIR)\include\logger
+ /I$(TOPDIR)\include\logger \
+ /I$(TOPDIR)\include\crawler
INCLUDE_LDFLAGS = \
INCLUDE_LIBS = \
- $(TOPDIR)\src\crawler.lib \
+ $(TOPDIR)\src\libcrawler\crawler.lib \
$(TOPDIR)\src\logger\logger.lib
DYNAMIC_MODULE = \
diff --git a/src/modules/urlnormalizer/googleurl/Makefile.W32 b/src/modules/urlnormalizer/googleurl/Makefile.W32
index 4cc09c1..a906404 100755
--- a/src/modules/urlnormalizer/googleurl/Makefile.W32
+++ b/src/modules/urlnormalizer/googleurl/Makefile.W32
@@ -12,13 +12,14 @@ INCLUDE_DIRS = \
/I$(TOPDIR)\src \
/I$(TOPDIR)\include\module \
/I$(TOPDIR)\include\util \
+ /I$(TOPDIR)\include\crawler \
/I$(TOPDIR)\googleurl
INCLUDE_LDFLAGS = \
INCLUDE_LIBS = \
$(TOPDIR)\googleurl\googleurl.lib \
- $(TOPDIR)\src\crawler.lib \
+ $(TOPDIR)\src\libcrawler\crawler.lib \
"$(ICU_DIR)\lib\icuuc.lib"
DYNAMIC_MODULE = \
diff --git a/src/modules/urlnormalizer/simpleurl/Makefile.W32 b/src/modules/urlnormalizer/simpleurl/Makefile.W32
index 69e732a..2a26e9c 100755
--- a/src/modules/urlnormalizer/simpleurl/Makefile.W32
+++ b/src/modules/urlnormalizer/simpleurl/Makefile.W32
@@ -11,12 +11,13 @@ INCLUDE_DIRS = \
/I. \
/I$(TOPDIR)\src \
/I$(TOPDIR)\include\module \
- /I$(TOPDIR)\include\util
+ /I$(TOPDIR)\include\util \
+ /I$(TOPDIR)\include\crawler
INCLUDE_LDFLAGS = \
INCLUDE_LIBS = \
- $(TOPDIR)\src\crawler.lib
+ $(TOPDIR)\src\libcrawler\crawler.lib
DYNAMIC_MODULE = \
mod_urlnormalizer_simple.dll
diff --git a/src/win32/errormsg.hpp b/src/win32/errormsg.hpp
deleted file mode 100755
index 443e27d..0000000
--- a/src/win32/errormsg.hpp
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifndef __ERRORMSG_H
-#define __ERRORMSG_H
-
-#include <string>
-
-std::string getLastError( );
-
-#endif
diff --git a/src/win32/stringutils.hpp b/src/win32/stringutils.hpp
deleted file mode 100755
index 6d4bd80..0000000
--- a/src/win32/stringutils.hpp
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifndef __STRINGUTILS_H
-#define __STRINGUTILS_H
-
-#include <string>
-
-std::wstring s2ws( const std::string &s );
-
-#endif