diff options
author | Andreas Baumann <abaumann@yahoo.com> | 2012-09-06 22:18:23 +0200 |
---|---|---|
committer | Andreas Baumann <abaumann@yahoo.com> | 2012-09-06 22:18:23 +0200 |
commit | 13fc9a7da5111f4ddba942d3c6b6b8654ce395d6 (patch) | |
tree | e86210e3d939911e35f930a6dc73c3ebb591243b /src/libcrawler | |
parent | f5c586f7231f7e033c5528bcefea357e4e64441c (diff) | |
download | crawler-13fc9a7da5111f4ddba942d3c6b6b8654ce395d6.tar.gz crawler-13fc9a7da5111f4ddba942d3c6b6b8654ce395d6.tar.bz2 |
more splitting into libcrawl, crawl binary
moved more public header to 'include'
changed approach for dynamic linking on Windows
Diffstat (limited to 'src/libcrawler')
-rwxr-xr-x | src/libcrawler/GNUmakefile | 42 | ||||
-rw-r--r-- | src/libcrawler/MIMEType.cpp | 5 | ||||
-rwxr-xr-x | src/libcrawler/Makefile.W32 | 45 | ||||
-rw-r--r-- | src/libcrawler/SpoolRewindInputStream.cpp | 181 | ||||
-rw-r--r-- | src/libcrawler/URL.cpp | 5 | ||||
-rwxr-xr-x | src/libcrawler/win32/errormsg.cpp | 27 | ||||
-rwxr-xr-x | src/libcrawler/win32/stringutils.cpp | 21 |
7 files changed, 326 insertions, 0 deletions
diff --git a/src/libcrawler/GNUmakefile b/src/libcrawler/GNUmakefile new file mode 100755 index 0000000..c1e7a7f --- /dev/null +++ b/src/libcrawler/GNUmakefile @@ -0,0 +1,42 @@ +TOPDIR = ../.. + +SUBDIRS = + +-include $(TOPDIR)/makefiles/gmake/platform.mk + +INCLUDE_CPPFLAGS = \ + +INCLUDE_LDFLAGS = \ + +INCLUDE_DIRS = \ + -I. \ + -I$(TOPDIR)/include/logger \ + -I$(TOPDIR)/include/util + +INCLUDE_LIBS = \ + +STATIC_LIB = libcrawler.a + +DYNAMIC_LIB = libcrawler.so +DYNAMIC_LIB_MAJOR = 0 +DYNAMIC_LIB_MINOR = 0 +DYNAMIC_LIB_PATCH = 0 + +CPP_OBJS = \ + URL.o \ + MIMEType.o \ + SpoolRewindInputStream.o + +-include $(TOPDIR)/makefiles/gmake/sub.mk + +local_all: + +local_clean: + +local_distclean: + +local_install: + +local_uninstall: + +local_test: diff --git a/src/libcrawler/MIMEType.cpp b/src/libcrawler/MIMEType.cpp new file mode 100644 index 0000000..25dc20c --- /dev/null +++ b/src/libcrawler/MIMEType.cpp @@ -0,0 +1,5 @@ +#include "MIMEType.hpp" + +MIMEType MIMEType::Null; + + diff --git a/src/libcrawler/Makefile.W32 b/src/libcrawler/Makefile.W32 new file mode 100755 index 0000000..ab18d2c --- /dev/null +++ b/src/libcrawler/Makefile.W32 @@ -0,0 +1,45 @@ +TOPDIR = ..\.. + +SUBDIRS = + +!INCLUDE $(TOPDIR)\makefiles\nmake\platform.mk + +INCLUDE_CXXFLAGS = \ + /D_WIN32_WINNT=0x504 \ + /DBUILDING_CRAWLER + +INCLUDE_DIRS = \ + /I. \ + /I$(TOPDIR)\include\crawler \ + /I$(TOPDIR)\include\logger \ + /I$(TOPDIR)\include\util + +INCLUDE_LDFLAGS = \ + +INCLUDE_LIBS = \ + $(TOPDIR)\src\logger\logger.lib + +CPP_OBJS = \ + win32\errormsg.dllobj \ + win32\stringutils.dllobj \ + URL.dllobj \ + MIMEType.dllobj \ + SpoolRewindInputStream.dllobj + +DYNAMIC_LIB = \ + crawler.dll + +!INCLUDE $(TOPDIR)\makefiles\nmake\sub.mk + +$(DYNAMIC_LIB): $(CPP_OBJS) + $(LINK) /nologo /dll /out:$@ $(LDFLAGS) $(LIBS) $? + +local_all: $(DYNAMIC_LIB) + +local_clean: + @-erase $(DYNAMIC_LIB) 2>NUL + @-erase win32\*.obj 2>NUL + +local_distclean: + +local_test: diff --git a/src/libcrawler/SpoolRewindInputStream.cpp b/src/libcrawler/SpoolRewindInputStream.cpp new file mode 100644 index 0000000..9135741 --- /dev/null +++ b/src/libcrawler/SpoolRewindInputStream.cpp @@ -0,0 +1,181 @@ +#include "SpoolRewindInputStream.hpp" +#include "Logger.hpp" + +#include <algorithm> +#include <cstring> +#include <cassert> + +using namespace std; + +spool_streambuf::spool_streambuf( size_t bufSize, size_t putBack, size_t spoolBufSize ) + : m_putBack( max( putBack, size_t( 1 ) ) ), + m_spoolBuf( spoolBufSize ), m_spoolBufPos( 0 ), + m_spoolBufSize( spoolBufSize ), m_state( TO_SPOOL_MEMORY ), + m_buf( max( bufSize, putBack ) + putBack ), + m_base( 0 ), m_start( 0 ) +{ + char *end = &m_buf.front( ) + m_buf.size( ); + setg( end, end, end ); +} + +spool_streambuf::~spool_streambuf( ) +{ + switch( m_state ) { + case TO_SPOOL_MEMORY: + case FROM_SPOOL_MEMORY: + // memory only, nothing to clean up + break; + + case TO_SPOOL_FILE: + case FROM_SPOOL_FILE: + m_spoolFile.close( ); + (void)remove( "/tmp/spool.tmp" ); + break; + } +} + +streambuf::int_type spool_streambuf::underflow( ) +{ + // check if buffer is exhausted, if not, return current character + if( gptr( ) < egptr( ) ) + return traits_type::to_int_type( *gptr( ) ); + + m_base = &m_buf.front( ); + m_start = m_base; + + // move put back away + if( eback( ) == m_base ) { + memmove( m_base, egptr( ) - m_putBack, m_putBack ); + m_start += m_putBack; + } + + // read from source or spool (depends on calling rewind) + streambuf::int_type n; + switch( m_state ) { + case TO_SPOOL_MEMORY: + case TO_SPOOL_FILE: + n = readFromSource( ); + if( n == 0 ) { + return traits_type::eof( ); + } else if( n < 0 ) { + // TODO handle error + return traits_type::eof( ); + } + + if( m_state == TO_SPOOL_MEMORY ) { + // as long we can "spool" to memory, do so.. + if( m_spoolBufPos + n <= m_spoolBufSize ) { + m_spoolBuf.insert( m_spoolBuf.begin( ) + m_spoolBufPos, m_start, m_start + n ); + m_spoolBufPos += n; + } else { + // ..otherwise start spooling to disk, write + // current memory spool buffer first.. + LOG( logWARNING ) << "Spooling spool buffer exceeded (>" << m_spoolBufSize << ")"; + m_spoolFile.open( "/tmp/spool.tmp", ios::binary | ios::out | ios::trunc ); + assert( m_spoolFile.good( ) ); + m_spoolFile.write( &m_spoolBuf.front( ), m_spoolBufSize ); + assert( m_spoolFile.good( ) ); + m_state = TO_SPOOL_FILE; + m_spoolFile.write( m_start, n ); + assert( m_spoolFile.good( ) ); + } + } else { + // we are appending to the spool file + assert( m_spoolFile.good( ) ); + m_spoolFile.write( m_start, n ); + assert( m_spoolFile.good( ) ); + } + + break; + + case FROM_SPOOL_MEMORY: + n = min( m_buf.size( ) - ( m_start - m_base ), m_spoolBufSize - m_spoolBufPos ); + if( n == 0 ) { + return traits_type::eof( ); + } + + copy( m_spoolBuf.begin( ) + m_spoolBufPos, + m_spoolBuf.begin( ) + m_spoolBufPos + n, + m_buf.begin( ) + ( m_start - m_base ) ); + + m_spoolBufPos += n; + + break; + + case FROM_SPOOL_FILE: + + n = min( m_buf.size( ) - ( m_start - m_base ), m_spoolBufSize - m_spoolBufPos ); + m_spoolFile.read( m_start, n ); + m_spoolBufPos += n; + if( m_spoolBufPos > m_spoolBufSize ) { + return traits_type::eof( ); + } + if( n == 0 || m_spoolFile.eof( ) ) { + return traits_type::eof( ); + } + + break; + } + + // set pointers + setg( m_base, m_start, m_start + n ); + + return traits_type::to_int_type( *gptr( ) ); +} + +void spool_streambuf::rewind( ) +{ + switch( m_state ) { + case TO_SPOOL_MEMORY: + m_spoolBufPos = 0; + m_state = FROM_SPOOL_MEMORY; + break; + + case TO_SPOOL_FILE: + m_spoolFile.close( ); + m_spoolFile.open( "/tmp/spool.tmp", ios::binary | ios::in ); + m_spoolFile.seekg( 0, ios::end ); + m_spoolBufSize = m_spoolFile.tellg( ); + m_spoolFile.seekg( 0, ios::beg ); + m_spoolBufPos = 0; + m_state = FROM_SPOOL_FILE; + break; + + case FROM_SPOOL_MEMORY: + m_spoolBufPos = 0; + break; + + case FROM_SPOOL_FILE: + m_spoolBufPos = 0; + m_spoolFile.seekg( 0, ios::beg ); + break; + } + + char *end = &m_buf.front( ) + m_buf.size( ); + setg( end, end, end ); + pubseekpos( 0, ios_base::in ); +} + +SpoolRewindInputStream::SpoolRewindInputStream( const URL &url ) + : RewindInputStream( url ), m_buf( 0 ) +{ +} + +SpoolRewindInputStream::~SpoolRewindInputStream( ) +{ +} + +void SpoolRewindInputStream::rewind( ) +{ + // consume rest of web request, force spooling in streambuf + enum { CHUNKSIZE = 1024 }; + char buf[CHUNKSIZE]; + + while( good( ) && !eof( ) ) { + read( buf, CHUNKSIZE ); + } + + ios::clear( ); + assert( m_buf != 0 ); + m_buf->rewind( ); +} diff --git a/src/libcrawler/URL.cpp b/src/libcrawler/URL.cpp new file mode 100644 index 0000000..f208500 --- /dev/null +++ b/src/libcrawler/URL.cpp @@ -0,0 +1,5 @@ +#include "URL.hpp" + +URL URL::Null; + + diff --git a/src/libcrawler/win32/errormsg.cpp b/src/libcrawler/win32/errormsg.cpp new file mode 100755 index 0000000..c0a65d8 --- /dev/null +++ b/src/libcrawler/win32/errormsg.cpp @@ -0,0 +1,27 @@ +#include "win32/errormsg.hpp" + +using namespace std; + +#define WIN32_MEAN_AND_LEAN +#include <windows.h> + +string getLastError( ) +{ + LPTSTR buf; + DWORD size; + + DWORD lastErr = GetLastError( ); + + if( !FormatMessage( + FORMAT_MESSAGE_ALLOCATE_BUFFER | + FORMAT_MESSAGE_FROM_SYSTEM | + FORMAT_MESSAGE_IGNORE_INSERTS | + FORMAT_MESSAGE_MAX_WIDTH_MASK, + NULL, lastErr, 0, (LPTSTR)&buf, + 0, NULL ) ) { + return "<no message available>"; + } + + return string( buf ); +} + diff --git a/src/libcrawler/win32/stringutils.cpp b/src/libcrawler/win32/stringutils.cpp new file mode 100755 index 0000000..607735c --- /dev/null +++ b/src/libcrawler/win32/stringutils.cpp @@ -0,0 +1,21 @@ +#include "win32/stringutils.hpp" + +using namespace std; + +#define WIN32_MEAN_AND_LEAN +#include <windows.h> + +std::wstring s2ws( const std::string &s ) +{ + // get size for buffer and allocate it + int len; + int slength = (int)s.length( )+1; + len = MultiByteToWideChar( CP_ACP, 0, s.c_str( ), slength, 0, 0 ); + wchar_t *buf = new wchar_t[len]; + + // convert + MultiByteToWideChar( CP_ACP, 0, s.c_str( ), slength, buf, len ); + std::wstring res( buf ); + delete[] buf; + return res; +} |