diff options
author | Andreas Baumann <abaumann@yahoo.com> | 2012-08-17 18:06:53 +0200 |
---|---|---|
committer | Andreas Baumann <abaumann@yahoo.com> | 2012-08-17 18:06:53 +0200 |
commit | b9afc21c2da33b492d4ae846c39a6fe9e3d415c9 (patch) | |
tree | 5fc2415b3f12648fa5554d2def489c2450b91faf /src/modules | |
parent | c0b159e9f992c70921eb5ca4c0f0f6d448cb9a65 (diff) | |
download | crawler-b9afc21c2da33b492d4ae846c39a6fe9e3d415c9.tar.gz crawler-b9afc21c2da33b492d4ae846c39a6fe9e3d415c9.tar.bz2 |
adapted winhttp fetcher to new spooling, but crashes
Diffstat (limited to 'src/modules')
-rwxr-xr-x | src/modules/fetcher/winhttp/WinHttpRewindInputStream.cpp | 207 | ||||
-rwxr-xr-x | src/modules/fetcher/winhttp/WinHttpRewindInputStream.hpp | 8 |
2 files changed, 23 insertions, 192 deletions
diff --git a/src/modules/fetcher/winhttp/WinHttpRewindInputStream.cpp b/src/modules/fetcher/winhttp/WinHttpRewindInputStream.cpp index 52f67a5..92d0998 100755 --- a/src/modules/fetcher/winhttp/WinHttpRewindInputStream.cpp +++ b/src/modules/fetcher/winhttp/WinHttpRewindInputStream.cpp @@ -1,206 +1,56 @@ #include "WinHttpRewindInputStream.hpp" #include "WinHttpFetcher.hpp" +#include "SpoolRewindInputStream.hpp" #include "Logger.hpp" -#include <streambuf> -#include <vector> -#include <algorithm> -#include <string> -#include <cstring> -#include <stdexcept> -#include <cassert> -#include <fstream> -#include <cstring> - #include "win32/errormsg.hpp" #include "win32/stringutils.hpp" using namespace std; -class winhttp_buffer : public streambuf +class winhttp_buffer : public spool_streambuf { public: explicit winhttp_buffer( HINTERNET connect, HINTERNET request, size_t bufSize = 256, size_t putBack = 1, size_t spoolBufSize = 8192 ); - - ~winhttp_buffer( ); + + protected: - void rewind( ); - - private: - int_type underflow( ); + virtual streambuf::int_type readFromSource( ); private: HINTERNET m_connect; HINTERNET m_request; - const size_t m_putBack; - vector<char> m_buf; - vector<char> m_spoolBuf; - size_t m_spoolBufPos; - size_t m_spoolBufSize; - fstream m_spoolFile; - enum { TO_SPOOL_MEMORY = 1, TO_SPOOL_FILE = 2, FROM_SPOOL_MEMORY = 3, FROM_SPOOL_FILE = 4 } m_state; }; winhttp_buffer::winhttp_buffer( HINTERNET connect, HINTERNET request, size_t bufSize, size_t putBack, size_t spoolBufSize ) - : m_connect( connect ), m_request( request ), m_putBack( max( putBack, size_t( 1 ) ) ), - m_buf( max( bufSize, putBack ) + putBack ), - m_spoolBuf( spoolBufSize ), m_spoolBufPos( 0 ), - m_spoolBufSize( 0 ), m_state( TO_SPOOL_MEMORY ) + : spool_streambuf( bufSize, putBack, spoolBufSize ), + m_connect( connect ), m_request( request ) { - char *end = &m_buf.front( ) + m_buf.size( ); - setg( end, end, end ); -} - -winhttp_buffer::~winhttp_buffer( ) -{ - switch( m_state ) { - case TO_SPOOL_MEMORY: - case FROM_SPOOL_MEMORY: - // memory only, nothing to clean up - break; - - case TO_SPOOL_FILE: - case FROM_SPOOL_FILE: - m_spoolFile.close( ); - (void)remove( "/tmp/spool.tmp" ); - break; - } } -streambuf::int_type winhttp_buffer::underflow( ) +streambuf::int_type winhttp_buffer::readFromSource( ) { - // check if buffer is exhausted, if not, return current character - if( gptr( ) < egptr( ) ) - return traits_type::to_int_type( *gptr( ) ); - - char *base = &m_buf.front( ); - char *start = base; - - // move put back away - if( eback( ) == base ) { - memmove( base, egptr( ) - m_putBack, m_putBack ); - start += m_putBack; + DWORD avail = 0; + if( !WinHttpQueryDataAvailable( m_request, &avail ) ) { + // TODO error handling + return traits_type::eof( ); } - - // read from source or spool (depends on calling rewind) - DWORD size; - DWORD avail; - DWORD n; - switch( m_state ) { - case TO_SPOOL_MEMORY: - case TO_SPOOL_FILE: - avail = 0; - if( !WinHttpQueryDataAvailable( m_request, &avail ) ) { - // TODO error handling - return traits_type::eof( ); - } - if( avail == 0 ) { - return traits_type::eof( ); - } - - size = min( avail, m_buf.size( ) - ( start - base ) ); - if( !WinHttpReadData( m_request, (LPVOID)start, size, &n ) ) { - // TODO error handling - return traits_type::eof( ); - } - - if( m_state == TO_SPOOL_MEMORY ) { - // as long we can "spool" to memory, do so.. - if( m_spoolBufSize + n <= m_spoolBuf.size( ) ) { - m_spoolBuf.insert( m_spoolBuf.begin( ) + m_spoolBufPos, start, start + n ); - m_spoolBufPos += n; - m_spoolBufSize += n; - } else { - // ..otherwise start spooling to disk, write - // current memory spool buffer first.. - LOG( logWARNING ) << "Spooling spool buffer exceeded (>" << m_spoolBuf.size( ) << ")"; - m_spoolFile.open( "/tmp/spool.tmp", ios::binary | ios::out | ios::trunc ); - assert( m_spoolFile.good( ) ); - m_spoolFile.write( &m_spoolBuf.front( ), m_spoolBufSize ); - assert( m_spoolFile.good( ) ); - m_state = TO_SPOOL_FILE; - m_spoolFile.write( start, n ); - assert( m_spoolFile.good( ) ); - } - } else { - // we are appending to the spool file - assert( m_spoolFile.good( ) ); - m_spoolFile.write( start, n ); - assert( m_spoolFile.good( ) ); - } - - break; - - case FROM_SPOOL_MEMORY: - n = min( m_buf.size( ) - ( start - base ), m_spoolBufSize - m_spoolBufPos ); - if( n == 0 ) { - return traits_type::eof( ); - } - - copy( m_spoolBuf.begin( ) + m_spoolBufPos, - m_spoolBuf.begin( ) + m_spoolBufPos + n, - m_buf.begin( ) + ( start - base ) ); - - m_spoolBufPos += n; - - break; - - case FROM_SPOOL_FILE: - - n = min( m_buf.size( ) - ( start - base ), m_spoolBufSize - m_spoolBufPos ); - m_spoolFile.read( start, n ); - m_spoolBufPos += n; - if( m_spoolBufPos > m_spoolBufSize ) { - return traits_type::eof( ); - } - if( n == 0 || m_spoolFile.eof( ) ) { - return traits_type::eof( ); - } - - break; + if( avail == 0 ) { + return traits_type::eof( ); } - - // set pointers - setg( base, start, start + n ); - - return traits_type::to_int_type( *gptr( ) ); -} - -void winhttp_buffer::rewind( ) -{ - switch( m_state ) { - case TO_SPOOL_MEMORY: - m_spoolBufPos = 0; - m_state = FROM_SPOOL_MEMORY; - break; - case TO_SPOOL_FILE: - m_spoolFile.close( ); - m_spoolFile.open( "/tmp/spool.tmp", ios::binary | ios::in ); - m_spoolFile.seekg( 0, ios::end ); - m_spoolBufSize = m_spoolFile.tellg( ); - m_spoolFile.seekg( 0, ios::beg ); - m_spoolBufPos = 0; - m_state = FROM_SPOOL_FILE; - break; - - case FROM_SPOOL_MEMORY: - m_spoolBufPos = 0; - break; - - case FROM_SPOOL_FILE: - m_spoolBufPos = 0; - m_spoolFile.seekg( 0, ios::beg ); - break; + DWORD n; + DWORD size = min( avail, m_buf.size( ) - ( m_start - m_base ) ); + if( !WinHttpReadData( m_request, (LPVOID)m_start, size, &n ) ) { + // TODO error handling + return traits_type::eof( ); } - char *end = &m_buf.front( ) + m_buf.size( ); - setg( end, end, end ); - pubseekpos( 0, ios_base::in ); + return n; } WinHttpRewindInputStream::WinHttpRewindInputStream( const URL &url, WinHttpFetcher *fetcher ) - : RewindInputStream( url ), m_fetcher( fetcher ), m_connect( 0 ), m_request( 0 ), m_buf( 0 ) + : SpoolRewindInputStream( url ), m_fetcher( fetcher ), m_connect( 0 ), m_request( 0 ) { m_connect = WinHttpConnect( m_fetcher->session( ), s2ws( url.host( ) ).c_str( ), INTERNET_DEFAULT_HTTP_PORT, 0 ); @@ -238,21 +88,6 @@ WinHttpRewindInputStream::~WinHttpRewindInputStream( ) if( m_connect ) WinHttpCloseHandle( m_connect ); } -void WinHttpRewindInputStream::rewind( ) -{ - // consume rest of web request, force spooling in streambuf - enum { CHUNKSIZE = 1024 }; - char buf[CHUNKSIZE]; - - while( good( ) && !eof( ) ) { - read( buf, CHUNKSIZE ); - } - - ios::clear( ); - assert( m_buf != 0 ); - m_buf->rewind( ); -} - std::string WinHttpRewindInputStream::lastErrMsg( ) const { return getLastError( ); diff --git a/src/modules/fetcher/winhttp/WinHttpRewindInputStream.hpp b/src/modules/fetcher/winhttp/WinHttpRewindInputStream.hpp index 7d06792..5ab00f8 100755 --- a/src/modules/fetcher/winhttp/WinHttpRewindInputStream.hpp +++ b/src/modules/fetcher/winhttp/WinHttpRewindInputStream.hpp @@ -2,27 +2,23 @@ #define __WINHTTP_REWIND_INPUT_STREAM_H #include "RewindInputStream.hpp" -#include "URL.hpp" +#include "SpoolRewindInputStream.hpp" #define WIN32_MEAN_AND_LEAN #include <windows.h> #include <winhttp.h> -class winhttp_buffer; class WinHttpFetcher; -class WinHttpRewindInputStream : public RewindInputStream +class WinHttpRewindInputStream : public SpoolRewindInputStream { public: WinHttpRewindInputStream( const URL &url, WinHttpFetcher *fetcher ); virtual ~WinHttpRewindInputStream( ); - virtual void rewind( ); - virtual std::string lastErrMsg( ) const; private: - winhttp_buffer *m_buf; WinHttpFetcher *m_fetcher; HINTERNET m_connect; HINTERNET m_request; |