summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorAndreas Baumann <mail@andreasbaumann.cc>2014-10-03 17:58:36 +0200
committerAndreas Baumann <mail@andreasbaumann.cc>2014-10-03 17:58:36 +0200
commit92ba06d58475fd4ab07d8e3b1efa6993f1f02340 (patch)
tree385a511835136fb2d190df05651b03c015690e91 /src
parentee52b3eab8cc7feb49fa6db964b94b35e2bc8bac (diff)
downloadcrawler-92ba06d58475fd4ab07d8e3b1efa6993f1f02340.tar.gz
crawler-92ba06d58475fd4ab07d8e3b1efa6993f1f02340.tar.bz2
added an experimental curl fetcher
Diffstat (limited to 'src')
-rwxr-xr-xsrc/crawl/crawl.cpp6
-rw-r--r--src/libcrawler/SpoolRewindInputStream.cpp82
-rw-r--r--src/modules/fetcher/GNUmakefile2
3 files changed, 64 insertions, 26 deletions
diff --git a/src/crawl/crawl.cpp b/src/crawl/crawl.cpp
index 4f3eb00..9f5e0b2 100755
--- a/src/crawl/crawl.cpp
+++ b/src/crawl/crawl.cpp
@@ -120,7 +120,8 @@ int main( int /* argc */, char *argv[] )
vector<string> fetcherModules;
#ifndef _WIN32
- fetcherModules.push_back( "./modules/fetcher/libfetch/mod_fetcher_libfetch.so" );
+// fetcherModules.push_back( "./modules/fetcher/libfetch/mod_fetcher_libfetch.so" );
+ fetcherModules.push_back( "./modules/fetcher/libcurl/mod_fetcher_libcurl.so" );
#else
fetcherModules.push_back( ".\\modules\\fetcher\\winhttp\\mod_fetcher_winhttp.dll" );
#endif
@@ -162,7 +163,8 @@ int main( int /* argc */, char *argv[] )
Frontier *frontier = frontiers.create( "memory_frontier" );
#ifndef _WIN32
- Fetcher *fetcher = fetchers.create( "libfetch_fetcher" );
+// Fetcher *fetcher = fetchers.create( "libfetch_fetcher" );
+ Fetcher *fetcher = fetchers.create( "libcurl_fetcher" );
#else
Fetcher *fetcher = fetchers.create( "winhttp_fetcher" );
#endif
diff --git a/src/libcrawler/SpoolRewindInputStream.cpp b/src/libcrawler/SpoolRewindInputStream.cpp
index 9135741..13ab105 100644
--- a/src/libcrawler/SpoolRewindInputStream.cpp
+++ b/src/libcrawler/SpoolRewindInputStream.cpp
@@ -34,6 +34,61 @@ spool_streambuf::~spool_streambuf( )
}
}
+streambuf::int_type spool_streambuf::spoolSourceData( char *data, size_t n )
+{
+ size_t data_len = m_buf.size( ) - ( m_start - m_base ) ;
+ if( n < data_len ) {
+ data_len = n;
+ }
+
+ m_base = &m_buf.front( );
+ m_start = m_base;
+
+ memcpy( m_start, data, data_len );
+
+ spoolData( data_len );
+
+ return data_len;
+}
+
+void spool_streambuf::spoolData( size_t n )
+{
+ switch( m_state ) {
+ case TO_SPOOL_MEMORY:
+ // as long we can "spool" to memory, do so..
+ if( m_spoolBufPos + n <= m_spoolBufSize ) {
+ m_spoolBuf.insert( m_spoolBuf.begin( ) + m_spoolBufPos, m_start, m_start + n );
+ m_spoolBufPos += n;
+ } else {
+ // ..otherwise start spooling to disk, write
+ // current memory spool buffer first..
+ LOG( logWARNING ) << "Spooling spool buffer exceeded (>" << m_spoolBufSize << ")";
+ m_spoolFile.open( "/tmp/spool.tmp", ios::binary | ios::out | ios::trunc );
+ assert( m_spoolFile.good( ) );
+ m_spoolFile.write( &m_spoolBuf.front( ), m_spoolBufSize );
+ assert( m_spoolFile.good( ) );
+ m_state = TO_SPOOL_FILE;
+ m_spoolFile.write( m_start, n );
+ assert( m_spoolFile.good( ) );
+ }
+ break;
+
+ case TO_SPOOL_FILE:
+ // we are appending to the spool file
+ assert( m_spoolFile.good( ) );
+ m_spoolFile.write( m_start, n );
+ assert( m_spoolFile.good( ) );
+ break;
+
+ case FROM_SPOOL_MEMORY:
+ case FROM_SPOOL_FILE:
+ throw logic_error( "Still getting data from source after rewind!" );
+
+ default:
+ throw logic_error( "Illegal state!" );
+ }
+}
+
streambuf::int_type spool_streambuf::underflow( )
{
// check if buffer is exhausted, if not, return current character
@@ -62,29 +117,7 @@ streambuf::int_type spool_streambuf::underflow( )
return traits_type::eof( );
}
- if( m_state == TO_SPOOL_MEMORY ) {
- // as long we can "spool" to memory, do so..
- if( m_spoolBufPos + n <= m_spoolBufSize ) {
- m_spoolBuf.insert( m_spoolBuf.begin( ) + m_spoolBufPos, m_start, m_start + n );
- m_spoolBufPos += n;
- } else {
- // ..otherwise start spooling to disk, write
- // current memory spool buffer first..
- LOG( logWARNING ) << "Spooling spool buffer exceeded (>" << m_spoolBufSize << ")";
- m_spoolFile.open( "/tmp/spool.tmp", ios::binary | ios::out | ios::trunc );
- assert( m_spoolFile.good( ) );
- m_spoolFile.write( &m_spoolBuf.front( ), m_spoolBufSize );
- assert( m_spoolFile.good( ) );
- m_state = TO_SPOOL_FILE;
- m_spoolFile.write( m_start, n );
- assert( m_spoolFile.good( ) );
- }
- } else {
- // we are appending to the spool file
- assert( m_spoolFile.good( ) );
- m_spoolFile.write( m_start, n );
- assert( m_spoolFile.good( ) );
- }
+ spoolData( n );
break;
@@ -115,6 +148,9 @@ streambuf::int_type spool_streambuf::underflow( )
}
break;
+
+ default:
+ throw logic_error( "Illegal state!" );
}
// set pointers
diff --git a/src/modules/fetcher/GNUmakefile b/src/modules/fetcher/GNUmakefile
index 89dfe93..6be8eaa 100644
--- a/src/modules/fetcher/GNUmakefile
+++ b/src/modules/fetcher/GNUmakefile
@@ -1,6 +1,6 @@
TOPDIR = ../../..
-SUBDIRS = libfetch file
+SUBDIRS = libfetch file libcurl
-include $(TOPDIR)/makefiles/gmake/sub.mk