summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndreas Baumann <mail@andreasbaumann.cc>2014-10-03 20:23:24 +0200
committerAndreas Baumann <mail@andreasbaumann.cc>2014-10-03 20:23:24 +0200
commit992919c4be2336c5b8f2f0549712c51bd7fb9c03 (patch)
tree45db77d91baa8509edb8a46356d2a42aefe3faa7
parent7af671da43dee7dd07333cab38c181106a328832 (diff)
downloadcrawler-992919c4be2336c5b8f2f0549712c51bd7fb9c03.tar.gz
crawler-992919c4be2336c5b8f2f0549712c51bd7fb9c03.tar.bz2
fixed setting data from source in fetcher modules
-rwxr-xr-xinclude/crawler/SpoolRewindInputStream.hpp2
-rwxr-xr-xsrc/GNUmakefile2
-rwxr-xr-xsrc/crawl/crawl.cpp2
-rw-r--r--src/libcrawler/SpoolRewindInputStream.cpp22
-rw-r--r--src/modules/fetcher/libcurl/LibCurlRewindInputStream.cpp70
5 files changed, 20 insertions, 78 deletions
diff --git a/include/crawler/SpoolRewindInputStream.hpp b/include/crawler/SpoolRewindInputStream.hpp
index 523c1b6..5befa71 100755
--- a/include/crawler/SpoolRewindInputStream.hpp
+++ b/include/crawler/SpoolRewindInputStream.hpp
@@ -22,7 +22,7 @@ class spool_streambuf : public std::streambuf
private:
CRAWLER_DLL_VISIBLE int_type underflow( );
- CRAWLER_DLL_VISIBLE void spoolData( size_t n );
+ CRAWLER_DLL_VISIBLE void spoolData( char *data, size_t n );
private:
const size_t m_putBack;
diff --git a/src/GNUmakefile b/src/GNUmakefile
index 24522c2..9bd6fe4 100755
--- a/src/GNUmakefile
+++ b/src/GNUmakefile
@@ -39,3 +39,5 @@ runmemcheckgui:
runperfcheck:
@LD_LIBRARY_PATH=$(LD_LIBRARY_PATH) $(PERFCHECK) $(TOPDIR)/src/crawl/crawl $(TOPDIR)/src/crawl/crawl.conf
+showenv:
+ @echo export LD_LIBRARY_PATH=$(LD_LIBRARY_PATH)
diff --git a/src/crawl/crawl.cpp b/src/crawl/crawl.cpp
index 9f5e0b2..82cb690 100755
--- a/src/crawl/crawl.cpp
+++ b/src/crawl/crawl.cpp
@@ -120,7 +120,7 @@ int main( int /* argc */, char *argv[] )
vector<string> fetcherModules;
#ifndef _WIN32
-// fetcherModules.push_back( "./modules/fetcher/libfetch/mod_fetcher_libfetch.so" );
+ fetcherModules.push_back( "./modules/fetcher/libfetch/mod_fetcher_libfetch.so" );
fetcherModules.push_back( "./modules/fetcher/libcurl/mod_fetcher_libcurl.so" );
#else
fetcherModules.push_back( ".\\modules\\fetcher\\winhttp\\mod_fetcher_winhttp.dll" );
diff --git a/src/libcrawler/SpoolRewindInputStream.cpp b/src/libcrawler/SpoolRewindInputStream.cpp
index 13ab105..472a539 100644
--- a/src/libcrawler/SpoolRewindInputStream.cpp
+++ b/src/libcrawler/SpoolRewindInputStream.cpp
@@ -37,27 +37,31 @@ spool_streambuf::~spool_streambuf( )
streambuf::int_type spool_streambuf::spoolSourceData( char *data, size_t n )
{
size_t data_len = m_buf.size( ) - ( m_start - m_base ) ;
+
+ // more space than data, no problem
if( n < data_len ) {
data_len = n;
}
+ // enough space in the stream read buffer, put it there
m_base = &m_buf.front( );
m_start = m_base;
+ setg( m_base, m_start, m_start + data_len );
+
+ n -= data_len;
- memcpy( m_start, data, data_len );
-
- spoolData( data_len );
+ spoolData( data, n );
- return data_len;
+ return data_len + n;
}
-void spool_streambuf::spoolData( size_t n )
+void spool_streambuf::spoolData( char *data, size_t n )
{
switch( m_state ) {
case TO_SPOOL_MEMORY:
// as long we can "spool" to memory, do so..
if( m_spoolBufPos + n <= m_spoolBufSize ) {
- m_spoolBuf.insert( m_spoolBuf.begin( ) + m_spoolBufPos, m_start, m_start + n );
+ m_spoolBuf.insert( m_spoolBuf.begin( ) + m_spoolBufPos, data, data + n );
m_spoolBufPos += n;
} else {
// ..otherwise start spooling to disk, write
@@ -68,7 +72,7 @@ void spool_streambuf::spoolData( size_t n )
m_spoolFile.write( &m_spoolBuf.front( ), m_spoolBufSize );
assert( m_spoolFile.good( ) );
m_state = TO_SPOOL_FILE;
- m_spoolFile.write( m_start, n );
+ m_spoolFile.write( data, n );
assert( m_spoolFile.good( ) );
}
break;
@@ -76,7 +80,7 @@ void spool_streambuf::spoolData( size_t n )
case TO_SPOOL_FILE:
// we are appending to the spool file
assert( m_spoolFile.good( ) );
- m_spoolFile.write( m_start, n );
+ m_spoolFile.write( data, n );
assert( m_spoolFile.good( ) );
break;
@@ -117,7 +121,7 @@ streambuf::int_type spool_streambuf::underflow( )
return traits_type::eof( );
}
- spoolData( n );
+ spoolData( m_start, n );
break;
diff --git a/src/modules/fetcher/libcurl/LibCurlRewindInputStream.cpp b/src/modules/fetcher/libcurl/LibCurlRewindInputStream.cpp
index 7d75123..19098e8 100644
--- a/src/modules/fetcher/libcurl/LibCurlRewindInputStream.cpp
+++ b/src/modules/fetcher/libcurl/LibCurlRewindInputStream.cpp
@@ -35,8 +35,10 @@ size_t libcurl_buffer::write_data( char *ptr, size_t s, size_t n )
size_t data_spooled = spoolSourceData( ptr, chunk_size );
- return data_spooled;
+ // otherwise we must use CURL_WRITEFUNC_PAUSE
+ assert( data_spooled == n );
+ return data_spooled;
}
streambuf::int_type libcurl_buffer::readFromSource( )
@@ -84,69 +86,3 @@ string LibCurlRewindInputStream::lastErrMsg( ) const
// TODO: fetch m_curl state and return error
return "ERROR";
}
-
-#if 0
-
-#include <iostream>
-#include <sys/types.h>
-#include <cassert>
-#include <cstring>
-#include <cstdlib>
-
-using namespace std;
-
-class MemoryWriter {
- static const size_t initial_size = 100;
-
- protected:
- char *c;
- size_t c_size;
-
- public:
- MemoryWriter( ) {
- c = (char *)malloc( initial_size );
- c_size = 0;
- }
-
- ~MemoryWriter( ) {
- if( c != NULL )
- free( c );
- }
-
- size_t f( char *ptr, size_t s, size_t n ) {
- size_t part_size = s * n;
-
- c = (char *)realloc( c, c_size + part_size + 1 );
-
- assert( c != NULL );
-
- memcpy( &c[c_size], ptr, part_size );
- c_size += part_size;
- c[c_size] = '\0';
-
- return part_size;
- }
-
- char *str( ) {
- return c;
- }
-
- size_t size( ) {
- return c_size;
- }
-};
-
-int main( int argc, char *argv[] ) {
- Easy curl;
- MemoryWriter w;
-
-
- cout << cURLpp::Infos::ResponseCode::get( curl ) << endl
- << w.size( ) << " "
- << url << endl;
-
- cout << w.str( ) << endl;
-
- return EXIT_SUCCESS;
-}
-#endif