diff options
author | Andreas Baumann <mail@andreasbaumann.cc> | 2014-10-03 17:58:36 +0200 |
---|---|---|
committer | Andreas Baumann <mail@andreasbaumann.cc> | 2014-10-03 17:58:36 +0200 |
commit | 92ba06d58475fd4ab07d8e3b1efa6993f1f02340 (patch) | |
tree | 385a511835136fb2d190df05651b03c015690e91 /src | |
parent | ee52b3eab8cc7feb49fa6db964b94b35e2bc8bac (diff) | |
download | crawler-92ba06d58475fd4ab07d8e3b1efa6993f1f02340.tar.gz crawler-92ba06d58475fd4ab07d8e3b1efa6993f1f02340.tar.bz2 |
added an experimental curl fetcher
Diffstat (limited to 'src')
-rwxr-xr-x | src/crawl/crawl.cpp | 6 | ||||
-rw-r--r-- | src/libcrawler/SpoolRewindInputStream.cpp | 82 | ||||
-rw-r--r-- | src/modules/fetcher/GNUmakefile | 2 |
3 files changed, 64 insertions, 26 deletions
diff --git a/src/crawl/crawl.cpp b/src/crawl/crawl.cpp index 4f3eb00..9f5e0b2 100755 --- a/src/crawl/crawl.cpp +++ b/src/crawl/crawl.cpp @@ -120,7 +120,8 @@ int main( int /* argc */, char *argv[] ) vector<string> fetcherModules; #ifndef _WIN32 - fetcherModules.push_back( "./modules/fetcher/libfetch/mod_fetcher_libfetch.so" ); +// fetcherModules.push_back( "./modules/fetcher/libfetch/mod_fetcher_libfetch.so" ); + fetcherModules.push_back( "./modules/fetcher/libcurl/mod_fetcher_libcurl.so" ); #else fetcherModules.push_back( ".\\modules\\fetcher\\winhttp\\mod_fetcher_winhttp.dll" ); #endif @@ -162,7 +163,8 @@ int main( int /* argc */, char *argv[] ) Frontier *frontier = frontiers.create( "memory_frontier" ); #ifndef _WIN32 - Fetcher *fetcher = fetchers.create( "libfetch_fetcher" ); +// Fetcher *fetcher = fetchers.create( "libfetch_fetcher" ); + Fetcher *fetcher = fetchers.create( "libcurl_fetcher" ); #else Fetcher *fetcher = fetchers.create( "winhttp_fetcher" ); #endif diff --git a/src/libcrawler/SpoolRewindInputStream.cpp b/src/libcrawler/SpoolRewindInputStream.cpp index 9135741..13ab105 100644 --- a/src/libcrawler/SpoolRewindInputStream.cpp +++ b/src/libcrawler/SpoolRewindInputStream.cpp @@ -34,6 +34,61 @@ spool_streambuf::~spool_streambuf( ) } } +streambuf::int_type spool_streambuf::spoolSourceData( char *data, size_t n ) +{ + size_t data_len = m_buf.size( ) - ( m_start - m_base ) ; + if( n < data_len ) { + data_len = n; + } + + m_base = &m_buf.front( ); + m_start = m_base; + + memcpy( m_start, data, data_len ); + + spoolData( data_len ); + + return data_len; +} + +void spool_streambuf::spoolData( size_t n ) +{ + switch( m_state ) { + case TO_SPOOL_MEMORY: + // as long we can "spool" to memory, do so.. + if( m_spoolBufPos + n <= m_spoolBufSize ) { + m_spoolBuf.insert( m_spoolBuf.begin( ) + m_spoolBufPos, m_start, m_start + n ); + m_spoolBufPos += n; + } else { + // ..otherwise start spooling to disk, write + // current memory spool buffer first.. + LOG( logWARNING ) << "Spooling spool buffer exceeded (>" << m_spoolBufSize << ")"; + m_spoolFile.open( "/tmp/spool.tmp", ios::binary | ios::out | ios::trunc ); + assert( m_spoolFile.good( ) ); + m_spoolFile.write( &m_spoolBuf.front( ), m_spoolBufSize ); + assert( m_spoolFile.good( ) ); + m_state = TO_SPOOL_FILE; + m_spoolFile.write( m_start, n ); + assert( m_spoolFile.good( ) ); + } + break; + + case TO_SPOOL_FILE: + // we are appending to the spool file + assert( m_spoolFile.good( ) ); + m_spoolFile.write( m_start, n ); + assert( m_spoolFile.good( ) ); + break; + + case FROM_SPOOL_MEMORY: + case FROM_SPOOL_FILE: + throw logic_error( "Still getting data from source after rewind!" ); + + default: + throw logic_error( "Illegal state!" ); + } +} + streambuf::int_type spool_streambuf::underflow( ) { // check if buffer is exhausted, if not, return current character @@ -62,29 +117,7 @@ streambuf::int_type spool_streambuf::underflow( ) return traits_type::eof( ); } - if( m_state == TO_SPOOL_MEMORY ) { - // as long we can "spool" to memory, do so.. - if( m_spoolBufPos + n <= m_spoolBufSize ) { - m_spoolBuf.insert( m_spoolBuf.begin( ) + m_spoolBufPos, m_start, m_start + n ); - m_spoolBufPos += n; - } else { - // ..otherwise start spooling to disk, write - // current memory spool buffer first.. - LOG( logWARNING ) << "Spooling spool buffer exceeded (>" << m_spoolBufSize << ")"; - m_spoolFile.open( "/tmp/spool.tmp", ios::binary | ios::out | ios::trunc ); - assert( m_spoolFile.good( ) ); - m_spoolFile.write( &m_spoolBuf.front( ), m_spoolBufSize ); - assert( m_spoolFile.good( ) ); - m_state = TO_SPOOL_FILE; - m_spoolFile.write( m_start, n ); - assert( m_spoolFile.good( ) ); - } - } else { - // we are appending to the spool file - assert( m_spoolFile.good( ) ); - m_spoolFile.write( m_start, n ); - assert( m_spoolFile.good( ) ); - } + spoolData( n ); break; @@ -115,6 +148,9 @@ streambuf::int_type spool_streambuf::underflow( ) } break; + + default: + throw logic_error( "Illegal state!" ); } // set pointers diff --git a/src/modules/fetcher/GNUmakefile b/src/modules/fetcher/GNUmakefile index 89dfe93..6be8eaa 100644 --- a/src/modules/fetcher/GNUmakefile +++ b/src/modules/fetcher/GNUmakefile @@ -1,6 +1,6 @@ TOPDIR = ../../.. -SUBDIRS = libfetch file +SUBDIRS = libfetch file libcurl -include $(TOPDIR)/makefiles/gmake/sub.mk |