From 992919c4be2336c5b8f2f0549712c51bd7fb9c03 Mon Sep 17 00:00:00 2001 From: Andreas Baumann Date: Fri, 3 Oct 2014 20:23:24 +0200 Subject: fixed setting data from source in fetcher modules --- include/crawler/SpoolRewindInputStream.hpp | 2 +- src/GNUmakefile | 2 + src/crawl/crawl.cpp | 2 +- src/libcrawler/SpoolRewindInputStream.cpp | 22 ++++--- .../fetcher/libcurl/LibCurlRewindInputStream.cpp | 70 +--------------------- 5 files changed, 20 insertions(+), 78 deletions(-) diff --git a/include/crawler/SpoolRewindInputStream.hpp b/include/crawler/SpoolRewindInputStream.hpp index 523c1b6..5befa71 100755 --- a/include/crawler/SpoolRewindInputStream.hpp +++ b/include/crawler/SpoolRewindInputStream.hpp @@ -22,7 +22,7 @@ class spool_streambuf : public std::streambuf private: CRAWLER_DLL_VISIBLE int_type underflow( ); - CRAWLER_DLL_VISIBLE void spoolData( size_t n ); + CRAWLER_DLL_VISIBLE void spoolData( char *data, size_t n ); private: const size_t m_putBack; diff --git a/src/GNUmakefile b/src/GNUmakefile index 24522c2..9bd6fe4 100755 --- a/src/GNUmakefile +++ b/src/GNUmakefile @@ -39,3 +39,5 @@ runmemcheckgui: runperfcheck: @LD_LIBRARY_PATH=$(LD_LIBRARY_PATH) $(PERFCHECK) $(TOPDIR)/src/crawl/crawl $(TOPDIR)/src/crawl/crawl.conf +showenv: + @echo export LD_LIBRARY_PATH=$(LD_LIBRARY_PATH) diff --git a/src/crawl/crawl.cpp b/src/crawl/crawl.cpp index 9f5e0b2..82cb690 100755 --- a/src/crawl/crawl.cpp +++ b/src/crawl/crawl.cpp @@ -120,7 +120,7 @@ int main( int /* argc */, char *argv[] ) vector fetcherModules; #ifndef _WIN32 -// fetcherModules.push_back( "./modules/fetcher/libfetch/mod_fetcher_libfetch.so" ); + fetcherModules.push_back( "./modules/fetcher/libfetch/mod_fetcher_libfetch.so" ); fetcherModules.push_back( "./modules/fetcher/libcurl/mod_fetcher_libcurl.so" ); #else fetcherModules.push_back( ".\\modules\\fetcher\\winhttp\\mod_fetcher_winhttp.dll" ); diff --git a/src/libcrawler/SpoolRewindInputStream.cpp b/src/libcrawler/SpoolRewindInputStream.cpp index 13ab105..472a539 100644 --- a/src/libcrawler/SpoolRewindInputStream.cpp +++ b/src/libcrawler/SpoolRewindInputStream.cpp @@ -37,27 +37,31 @@ spool_streambuf::~spool_streambuf( ) streambuf::int_type spool_streambuf::spoolSourceData( char *data, size_t n ) { size_t data_len = m_buf.size( ) - ( m_start - m_base ) ; + + // more space than data, no problem if( n < data_len ) { data_len = n; } + // enough space in the stream read buffer, put it there m_base = &m_buf.front( ); m_start = m_base; + setg( m_base, m_start, m_start + data_len ); + + n -= data_len; - memcpy( m_start, data, data_len ); - - spoolData( data_len ); + spoolData( data, n ); - return data_len; + return data_len + n; } -void spool_streambuf::spoolData( size_t n ) +void spool_streambuf::spoolData( char *data, size_t n ) { switch( m_state ) { case TO_SPOOL_MEMORY: // as long we can "spool" to memory, do so.. if( m_spoolBufPos + n <= m_spoolBufSize ) { - m_spoolBuf.insert( m_spoolBuf.begin( ) + m_spoolBufPos, m_start, m_start + n ); + m_spoolBuf.insert( m_spoolBuf.begin( ) + m_spoolBufPos, data, data + n ); m_spoolBufPos += n; } else { // ..otherwise start spooling to disk, write @@ -68,7 +72,7 @@ void spool_streambuf::spoolData( size_t n ) m_spoolFile.write( &m_spoolBuf.front( ), m_spoolBufSize ); assert( m_spoolFile.good( ) ); m_state = TO_SPOOL_FILE; - m_spoolFile.write( m_start, n ); + m_spoolFile.write( data, n ); assert( m_spoolFile.good( ) ); } break; @@ -76,7 +80,7 @@ void spool_streambuf::spoolData( size_t n ) case TO_SPOOL_FILE: // we are appending to the spool file assert( m_spoolFile.good( ) ); - m_spoolFile.write( m_start, n ); + m_spoolFile.write( data, n ); assert( m_spoolFile.good( ) ); break; @@ -117,7 +121,7 @@ streambuf::int_type spool_streambuf::underflow( ) return traits_type::eof( ); } - spoolData( n ); + spoolData( m_start, n ); break; diff --git a/src/modules/fetcher/libcurl/LibCurlRewindInputStream.cpp b/src/modules/fetcher/libcurl/LibCurlRewindInputStream.cpp index 7d75123..19098e8 100644 --- a/src/modules/fetcher/libcurl/LibCurlRewindInputStream.cpp +++ b/src/modules/fetcher/libcurl/LibCurlRewindInputStream.cpp @@ -35,8 +35,10 @@ size_t libcurl_buffer::write_data( char *ptr, size_t s, size_t n ) size_t data_spooled = spoolSourceData( ptr, chunk_size ); - return data_spooled; + // otherwise we must use CURL_WRITEFUNC_PAUSE + assert( data_spooled == n ); + return data_spooled; } streambuf::int_type libcurl_buffer::readFromSource( ) @@ -84,69 +86,3 @@ string LibCurlRewindInputStream::lastErrMsg( ) const // TODO: fetch m_curl state and return error return "ERROR"; } - -#if 0 - -#include -#include -#include -#include -#include - -using namespace std; - -class MemoryWriter { - static const size_t initial_size = 100; - - protected: - char *c; - size_t c_size; - - public: - MemoryWriter( ) { - c = (char *)malloc( initial_size ); - c_size = 0; - } - - ~MemoryWriter( ) { - if( c != NULL ) - free( c ); - } - - size_t f( char *ptr, size_t s, size_t n ) { - size_t part_size = s * n; - - c = (char *)realloc( c, c_size + part_size + 1 ); - - assert( c != NULL ); - - memcpy( &c[c_size], ptr, part_size ); - c_size += part_size; - c[c_size] = '\0'; - - return part_size; - } - - char *str( ) { - return c; - } - - size_t size( ) { - return c_size; - } -}; - -int main( int argc, char *argv[] ) { - Easy curl; - MemoryWriter w; - - - cout << cURLpp::Infos::ResponseCode::get( curl ) << endl - << w.size( ) << " " - << url << endl; - - cout << w.str( ) << endl; - - return EXIT_SUCCESS; -} -#endif -- cgit v1.2.3-54-g00ecf