summaryrefslogtreecommitdiff
path: root/src/modules
diff options
context:
space:
mode:
authorAndreas Baumann <abaumann@yahoo.com>2012-08-17 18:06:53 +0200
committerAndreas Baumann <abaumann@yahoo.com>2012-08-17 18:06:53 +0200
commitb9afc21c2da33b492d4ae846c39a6fe9e3d415c9 (patch)
tree5fc2415b3f12648fa5554d2def489c2450b91faf /src/modules
parentc0b159e9f992c70921eb5ca4c0f0f6d448cb9a65 (diff)
downloadcrawler-b9afc21c2da33b492d4ae846c39a6fe9e3d415c9.tar.gz
crawler-b9afc21c2da33b492d4ae846c39a6fe9e3d415c9.tar.bz2
adapted winhttp fetcher to new spooling, but crashes
Diffstat (limited to 'src/modules')
-rwxr-xr-xsrc/modules/fetcher/winhttp/WinHttpRewindInputStream.cpp207
-rwxr-xr-xsrc/modules/fetcher/winhttp/WinHttpRewindInputStream.hpp8
2 files changed, 23 insertions, 192 deletions
diff --git a/src/modules/fetcher/winhttp/WinHttpRewindInputStream.cpp b/src/modules/fetcher/winhttp/WinHttpRewindInputStream.cpp
index 52f67a5..92d0998 100755
--- a/src/modules/fetcher/winhttp/WinHttpRewindInputStream.cpp
+++ b/src/modules/fetcher/winhttp/WinHttpRewindInputStream.cpp
@@ -1,206 +1,56 @@
#include "WinHttpRewindInputStream.hpp"
#include "WinHttpFetcher.hpp"
+#include "SpoolRewindInputStream.hpp"
#include "Logger.hpp"
-#include <streambuf>
-#include <vector>
-#include <algorithm>
-#include <string>
-#include <cstring>
-#include <stdexcept>
-#include <cassert>
-#include <fstream>
-#include <cstring>
-
#include "win32/errormsg.hpp"
#include "win32/stringutils.hpp"
using namespace std;
-class winhttp_buffer : public streambuf
+class winhttp_buffer : public spool_streambuf
{
public:
explicit winhttp_buffer( HINTERNET connect, HINTERNET request, size_t bufSize = 256, size_t putBack = 1, size_t spoolBufSize = 8192 );
-
- ~winhttp_buffer( );
+
+ protected:
- void rewind( );
-
- private:
- int_type underflow( );
+ virtual streambuf::int_type readFromSource( );
private:
HINTERNET m_connect;
HINTERNET m_request;
- const size_t m_putBack;
- vector<char> m_buf;
- vector<char> m_spoolBuf;
- size_t m_spoolBufPos;
- size_t m_spoolBufSize;
- fstream m_spoolFile;
- enum { TO_SPOOL_MEMORY = 1, TO_SPOOL_FILE = 2, FROM_SPOOL_MEMORY = 3, FROM_SPOOL_FILE = 4 } m_state;
};
winhttp_buffer::winhttp_buffer( HINTERNET connect, HINTERNET request, size_t bufSize, size_t putBack, size_t spoolBufSize )
- : m_connect( connect ), m_request( request ), m_putBack( max( putBack, size_t( 1 ) ) ),
- m_buf( max( bufSize, putBack ) + putBack ),
- m_spoolBuf( spoolBufSize ), m_spoolBufPos( 0 ),
- m_spoolBufSize( 0 ), m_state( TO_SPOOL_MEMORY )
+ : spool_streambuf( bufSize, putBack, spoolBufSize ),
+ m_connect( connect ), m_request( request )
{
- char *end = &m_buf.front( ) + m_buf.size( );
- setg( end, end, end );
-}
-
-winhttp_buffer::~winhttp_buffer( )
-{
- switch( m_state ) {
- case TO_SPOOL_MEMORY:
- case FROM_SPOOL_MEMORY:
- // memory only, nothing to clean up
- break;
-
- case TO_SPOOL_FILE:
- case FROM_SPOOL_FILE:
- m_spoolFile.close( );
- (void)remove( "/tmp/spool.tmp" );
- break;
- }
}
-streambuf::int_type winhttp_buffer::underflow( )
+streambuf::int_type winhttp_buffer::readFromSource( )
{
- // check if buffer is exhausted, if not, return current character
- if( gptr( ) < egptr( ) )
- return traits_type::to_int_type( *gptr( ) );
-
- char *base = &m_buf.front( );
- char *start = base;
-
- // move put back away
- if( eback( ) == base ) {
- memmove( base, egptr( ) - m_putBack, m_putBack );
- start += m_putBack;
+ DWORD avail = 0;
+ if( !WinHttpQueryDataAvailable( m_request, &avail ) ) {
+ // TODO error handling
+ return traits_type::eof( );
}
-
- // read from source or spool (depends on calling rewind)
- DWORD size;
- DWORD avail;
- DWORD n;
- switch( m_state ) {
- case TO_SPOOL_MEMORY:
- case TO_SPOOL_FILE:
- avail = 0;
- if( !WinHttpQueryDataAvailable( m_request, &avail ) ) {
- // TODO error handling
- return traits_type::eof( );
- }
- if( avail == 0 ) {
- return traits_type::eof( );
- }
-
- size = min( avail, m_buf.size( ) - ( start - base ) );
- if( !WinHttpReadData( m_request, (LPVOID)start, size, &n ) ) {
- // TODO error handling
- return traits_type::eof( );
- }
-
- if( m_state == TO_SPOOL_MEMORY ) {
- // as long we can "spool" to memory, do so..
- if( m_spoolBufSize + n <= m_spoolBuf.size( ) ) {
- m_spoolBuf.insert( m_spoolBuf.begin( ) + m_spoolBufPos, start, start + n );
- m_spoolBufPos += n;
- m_spoolBufSize += n;
- } else {
- // ..otherwise start spooling to disk, write
- // current memory spool buffer first..
- LOG( logWARNING ) << "Spooling spool buffer exceeded (>" << m_spoolBuf.size( ) << ")";
- m_spoolFile.open( "/tmp/spool.tmp", ios::binary | ios::out | ios::trunc );
- assert( m_spoolFile.good( ) );
- m_spoolFile.write( &m_spoolBuf.front( ), m_spoolBufSize );
- assert( m_spoolFile.good( ) );
- m_state = TO_SPOOL_FILE;
- m_spoolFile.write( start, n );
- assert( m_spoolFile.good( ) );
- }
- } else {
- // we are appending to the spool file
- assert( m_spoolFile.good( ) );
- m_spoolFile.write( start, n );
- assert( m_spoolFile.good( ) );
- }
-
- break;
-
- case FROM_SPOOL_MEMORY:
- n = min( m_buf.size( ) - ( start - base ), m_spoolBufSize - m_spoolBufPos );
- if( n == 0 ) {
- return traits_type::eof( );
- }
-
- copy( m_spoolBuf.begin( ) + m_spoolBufPos,
- m_spoolBuf.begin( ) + m_spoolBufPos + n,
- m_buf.begin( ) + ( start - base ) );
-
- m_spoolBufPos += n;
-
- break;
-
- case FROM_SPOOL_FILE:
-
- n = min( m_buf.size( ) - ( start - base ), m_spoolBufSize - m_spoolBufPos );
- m_spoolFile.read( start, n );
- m_spoolBufPos += n;
- if( m_spoolBufPos > m_spoolBufSize ) {
- return traits_type::eof( );
- }
- if( n == 0 || m_spoolFile.eof( ) ) {
- return traits_type::eof( );
- }
-
- break;
+ if( avail == 0 ) {
+ return traits_type::eof( );
}
-
- // set pointers
- setg( base, start, start + n );
-
- return traits_type::to_int_type( *gptr( ) );
-}
-
-void winhttp_buffer::rewind( )
-{
- switch( m_state ) {
- case TO_SPOOL_MEMORY:
- m_spoolBufPos = 0;
- m_state = FROM_SPOOL_MEMORY;
- break;
- case TO_SPOOL_FILE:
- m_spoolFile.close( );
- m_spoolFile.open( "/tmp/spool.tmp", ios::binary | ios::in );
- m_spoolFile.seekg( 0, ios::end );
- m_spoolBufSize = m_spoolFile.tellg( );
- m_spoolFile.seekg( 0, ios::beg );
- m_spoolBufPos = 0;
- m_state = FROM_SPOOL_FILE;
- break;
-
- case FROM_SPOOL_MEMORY:
- m_spoolBufPos = 0;
- break;
-
- case FROM_SPOOL_FILE:
- m_spoolBufPos = 0;
- m_spoolFile.seekg( 0, ios::beg );
- break;
+ DWORD n;
+ DWORD size = min( avail, m_buf.size( ) - ( m_start - m_base ) );
+ if( !WinHttpReadData( m_request, (LPVOID)m_start, size, &n ) ) {
+ // TODO error handling
+ return traits_type::eof( );
}
- char *end = &m_buf.front( ) + m_buf.size( );
- setg( end, end, end );
- pubseekpos( 0, ios_base::in );
+ return n;
}
WinHttpRewindInputStream::WinHttpRewindInputStream( const URL &url, WinHttpFetcher *fetcher )
- : RewindInputStream( url ), m_fetcher( fetcher ), m_connect( 0 ), m_request( 0 ), m_buf( 0 )
+ : SpoolRewindInputStream( url ), m_fetcher( fetcher ), m_connect( 0 ), m_request( 0 )
{
m_connect = WinHttpConnect( m_fetcher->session( ), s2ws( url.host( ) ).c_str( ),
INTERNET_DEFAULT_HTTP_PORT, 0 );
@@ -238,21 +88,6 @@ WinHttpRewindInputStream::~WinHttpRewindInputStream( )
if( m_connect ) WinHttpCloseHandle( m_connect );
}
-void WinHttpRewindInputStream::rewind( )
-{
- // consume rest of web request, force spooling in streambuf
- enum { CHUNKSIZE = 1024 };
- char buf[CHUNKSIZE];
-
- while( good( ) && !eof( ) ) {
- read( buf, CHUNKSIZE );
- }
-
- ios::clear( );
- assert( m_buf != 0 );
- m_buf->rewind( );
-}
-
std::string WinHttpRewindInputStream::lastErrMsg( ) const
{
return getLastError( );
diff --git a/src/modules/fetcher/winhttp/WinHttpRewindInputStream.hpp b/src/modules/fetcher/winhttp/WinHttpRewindInputStream.hpp
index 7d06792..5ab00f8 100755
--- a/src/modules/fetcher/winhttp/WinHttpRewindInputStream.hpp
+++ b/src/modules/fetcher/winhttp/WinHttpRewindInputStream.hpp
@@ -2,27 +2,23 @@
#define __WINHTTP_REWIND_INPUT_STREAM_H
#include "RewindInputStream.hpp"
-#include "URL.hpp"
+#include "SpoolRewindInputStream.hpp"
#define WIN32_MEAN_AND_LEAN
#include <windows.h>
#include <winhttp.h>
-class winhttp_buffer;
class WinHttpFetcher;
-class WinHttpRewindInputStream : public RewindInputStream
+class WinHttpRewindInputStream : public SpoolRewindInputStream
{
public:
WinHttpRewindInputStream( const URL &url, WinHttpFetcher *fetcher );
virtual ~WinHttpRewindInputStream( );
- virtual void rewind( );
-
virtual std::string lastErrMsg( ) const;
private:
- winhttp_buffer *m_buf;
WinHttpFetcher *m_fetcher;
HINTERNET m_connect;
HINTERNET m_request;