diff options
author | Andreas Baumann <mail@andreasbaumann.cc> | 2014-10-03 17:59:00 +0200 |
---|---|---|
committer | Andreas Baumann <mail@andreasbaumann.cc> | 2014-10-03 17:59:00 +0200 |
commit | 7af671da43dee7dd07333cab38c181106a328832 (patch) | |
tree | e8a9d0cfed41ab4344fc0f969044aac352b4d311 | |
parent | 92ba06d58475fd4ab07d8e3b1efa6993f1f02340 (diff) | |
download | crawler-7af671da43dee7dd07333cab38c181106a328832.tar.gz crawler-7af671da43dee7dd07333cab38c181106a328832.tar.bz2 |
..
-rw-r--r-- | src/modules/fetcher/libcurl/GNUmakefile | 47 | ||||
-rw-r--r-- | src/modules/fetcher/libcurl/LibCurlFetcher.cpp | 18 | ||||
-rwxr-xr-x | src/modules/fetcher/libcurl/LibCurlFetcher.hpp | 19 | ||||
-rw-r--r-- | src/modules/fetcher/libcurl/LibCurlRewindInputStream.cpp | 152 | ||||
-rwxr-xr-x | src/modules/fetcher/libcurl/LibCurlRewindInputStream.hpp | 21 |
5 files changed, 257 insertions, 0 deletions
diff --git a/src/modules/fetcher/libcurl/GNUmakefile b/src/modules/fetcher/libcurl/GNUmakefile new file mode 100644 index 0000000..bedd46f --- /dev/null +++ b/src/modules/fetcher/libcurl/GNUmakefile @@ -0,0 +1,47 @@ +TOPDIR = ../../../.. + +SUBDIRS = + +-include $(TOPDIR)/makefiles/gmake/platform.mk + +INCLUDE_DIRS = \ + -I. -I$(TOPDIR)/src \ + -I$(TOPDIR)/include/module \ + -I$(TOPDIR)/include/util \ + -I$(TOPDIR)/include/crawler \ + $(CURL_INCLUDES) + +INCLUDE_CXXFLAGS = \ + +INCLUDE_LDFLAGS = \ + -L$(TOPDIR)/src/libcrawler \ + $(CURL_LDFLAGS) + +INCLUDE_LIBS = \ + -lcrawler \ + $(CURL_LIBS) + +DYNAMIC_MODULE = \ + mod_fetcher_libcurl.so + +STATIC_LIB = \ + liblibcurlfetcher.a + +CPP_OBJS = \ + LibCurlFetcher.o \ + LibCurlRewindInputStream.o + +-include $(TOPDIR)/makefiles/gmake/sub.mk + +local_all: + +local_clean: + +local_distclean: + +local_install: + +local_uninstall: + +local_test: + diff --git a/src/modules/fetcher/libcurl/LibCurlFetcher.cpp b/src/modules/fetcher/libcurl/LibCurlFetcher.cpp new file mode 100644 index 0000000..4ace062 --- /dev/null +++ b/src/modules/fetcher/libcurl/LibCurlFetcher.cpp @@ -0,0 +1,18 @@ +#include "LibCurlFetcher.hpp" +#include "LibCurlRewindInputStream.hpp" + +LibCurlFetcher::LibCurlFetcher( ) +{ +} + +LibCurlFetcher::~LibCurlFetcher( ) +{ +} + +RewindInputStream *LibCurlFetcher::fetch( const URL url ) +{ + LibCurlRewindInputStream *s = new LibCurlRewindInputStream( url ); + return s; +} + +REGISTER_MODULE( "libcurl_fetcher", Fetcher, LibCurlFetcher ) diff --git a/src/modules/fetcher/libcurl/LibCurlFetcher.hpp b/src/modules/fetcher/libcurl/LibCurlFetcher.hpp new file mode 100755 index 0000000..ef83d4b --- /dev/null +++ b/src/modules/fetcher/libcurl/LibCurlFetcher.hpp @@ -0,0 +1,19 @@ +#ifndef __LIBCURL_FETCHER_H +#define __LIBCURL_FETCHER_H + +#include "Fetcher.hpp" +#include "ModuleRegistry.hpp" + +class LibCurlFetcher : public Fetcher +{ + public: + LibCurlFetcher( ); + + virtual ~LibCurlFetcher( ); + + virtual RewindInputStream *fetch( const URL url ); +}; + +DECLARE_MODULE( Fetcher ) + +#endif diff --git a/src/modules/fetcher/libcurl/LibCurlRewindInputStream.cpp b/src/modules/fetcher/libcurl/LibCurlRewindInputStream.cpp new file mode 100644 index 0000000..7d75123 --- /dev/null +++ b/src/modules/fetcher/libcurl/LibCurlRewindInputStream.cpp @@ -0,0 +1,152 @@ +#include "LibCurlRewindInputStream.hpp" +#include "SpoolRewindInputStream.hpp" + +#include <curlpp/Options.hpp> +//#include <curlpp/Infos.hpp> +//#include <curlpp/Types.hpp> + +using namespace cURLpp; + +using namespace std; + +class libcurl_buffer : public spool_streambuf +{ + public: + explicit libcurl_buffer( Easy *curl, size_t bufSize = 256, size_t putBack = 1, size_t spoolBufSize = 8192 ); + + size_t write_data( char *ptr, size_t s, size_t n ); + + protected: + + virtual streambuf::int_type readFromSource( ); + + private: + Easy *m_curl; +}; + +libcurl_buffer::libcurl_buffer( Easy *curl, size_t bufSize, size_t putBack, size_t spoolBufSize ) + : spool_streambuf( bufSize, putBack, spoolBufSize ), m_curl( curl ) +{ +} + +size_t libcurl_buffer::write_data( char *ptr, size_t s, size_t n ) +{ + size_t chunk_size = s * n; + + size_t data_spooled = spoolSourceData( ptr, chunk_size ); + + return data_spooled; + +} + +streambuf::int_type libcurl_buffer::readFromSource( ) +{ + // done before, we should not get called here! + + return 0; +} + +LibCurlRewindInputStream::LibCurlRewindInputStream( const URL &url ) + : SpoolRewindInputStream( url ), m_curl( 0 ) +{ + m_curl = new Easy( ); + + m_curl->setOpt( Options::Url( url.str( ).c_str( ) ) ); + + // how to use logger here? + m_curl->setOpt( Options::Header( true ) ); + m_curl->setOpt( Options::Verbose( true ) ); +/* m_curl->set( DebugFunction( types::DebugFunctionFunctor( & + request.setOpt(Verbose(true)); + request.setOpt(DebugFunction(curlpp::types::DebugFunctionFunctor(&myWindow, + &MyWindow::writeDebug))); +*/ + m_buf = new libcurl_buffer( m_curl ); + rdbuf( m_buf ); + + Types::WriteFunctionFunctor functor( static_cast<libcurl_buffer *>( m_buf ), &libcurl_buffer::write_data ); + Options::WriteFunction *wf = new Options::WriteFunction( functor ); + m_curl->setOpt( wf ); + + m_curl->perform( ); + + // TODO: error handling +} + +LibCurlRewindInputStream::~LibCurlRewindInputStream( ) +{ + if( m_buf ) delete m_buf; + if( m_curl ) delete m_curl; +} + +string LibCurlRewindInputStream::lastErrMsg( ) const +{ + // TODO: fetch m_curl state and return error + return "ERROR"; +} + +#if 0 + +#include <iostream> +#include <sys/types.h> +#include <cassert> +#include <cstring> +#include <cstdlib> + +using namespace std; + +class MemoryWriter { + static const size_t initial_size = 100; + + protected: + char *c; + size_t c_size; + + public: + MemoryWriter( ) { + c = (char *)malloc( initial_size ); + c_size = 0; + } + + ~MemoryWriter( ) { + if( c != NULL ) + free( c ); + } + + size_t f( char *ptr, size_t s, size_t n ) { + size_t part_size = s * n; + + c = (char *)realloc( c, c_size + part_size + 1 ); + + assert( c != NULL ); + + memcpy( &c[c_size], ptr, part_size ); + c_size += part_size; + c[c_size] = '\0'; + + return part_size; + } + + char *str( ) { + return c; + } + + size_t size( ) { + return c_size; + } +}; + +int main( int argc, char *argv[] ) { + Easy curl; + MemoryWriter w; + + + cout << cURLpp::Infos::ResponseCode::get( curl ) << endl + << w.size( ) << " " + << url << endl; + + cout << w.str( ) << endl; + + return EXIT_SUCCESS; +} +#endif diff --git a/src/modules/fetcher/libcurl/LibCurlRewindInputStream.hpp b/src/modules/fetcher/libcurl/LibCurlRewindInputStream.hpp new file mode 100755 index 0000000..9efefab --- /dev/null +++ b/src/modules/fetcher/libcurl/LibCurlRewindInputStream.hpp @@ -0,0 +1,21 @@ +#ifndef __LIBCURL_REWIND_INPUT_STREAM_H +#define __LIBCURL_REWIND_INPUT_STREAM_H + +#include "SpoolRewindInputStream.hpp" + +#include <curlpp/cURLpp.hpp> +#include <curlpp/Easy.hpp> + +class LibCurlRewindInputStream : public SpoolRewindInputStream +{ + public: + LibCurlRewindInputStream( const URL &url ); + virtual ~LibCurlRewindInputStream( ); + + virtual std::string lastErrMsg( ) const; + + private: + cURLpp::Easy *m_curl; +}; + +#endif |