summaryrefslogtreecommitdiff
path: root/src/modules
diff options
context:
space:
mode:
authorAndreas Baumann <mail@andreasbaumann.cc>2014-10-03 17:59:00 +0200
committerAndreas Baumann <mail@andreasbaumann.cc>2014-10-03 17:59:00 +0200
commit7af671da43dee7dd07333cab38c181106a328832 (patch)
treee8a9d0cfed41ab4344fc0f969044aac352b4d311 /src/modules
parent92ba06d58475fd4ab07d8e3b1efa6993f1f02340 (diff)
downloadcrawler-7af671da43dee7dd07333cab38c181106a328832.tar.gz
crawler-7af671da43dee7dd07333cab38c181106a328832.tar.bz2
..
Diffstat (limited to 'src/modules')
-rw-r--r--src/modules/fetcher/libcurl/GNUmakefile47
-rw-r--r--src/modules/fetcher/libcurl/LibCurlFetcher.cpp18
-rwxr-xr-xsrc/modules/fetcher/libcurl/LibCurlFetcher.hpp19
-rw-r--r--src/modules/fetcher/libcurl/LibCurlRewindInputStream.cpp152
-rwxr-xr-xsrc/modules/fetcher/libcurl/LibCurlRewindInputStream.hpp21
5 files changed, 257 insertions, 0 deletions
diff --git a/src/modules/fetcher/libcurl/GNUmakefile b/src/modules/fetcher/libcurl/GNUmakefile
new file mode 100644
index 0000000..bedd46f
--- /dev/null
+++ b/src/modules/fetcher/libcurl/GNUmakefile
@@ -0,0 +1,47 @@
+TOPDIR = ../../../..
+
+SUBDIRS =
+
+-include $(TOPDIR)/makefiles/gmake/platform.mk
+
+INCLUDE_DIRS = \
+ -I. -I$(TOPDIR)/src \
+ -I$(TOPDIR)/include/module \
+ -I$(TOPDIR)/include/util \
+ -I$(TOPDIR)/include/crawler \
+ $(CURL_INCLUDES)
+
+INCLUDE_CXXFLAGS = \
+
+INCLUDE_LDFLAGS = \
+ -L$(TOPDIR)/src/libcrawler \
+ $(CURL_LDFLAGS)
+
+INCLUDE_LIBS = \
+ -lcrawler \
+ $(CURL_LIBS)
+
+DYNAMIC_MODULE = \
+ mod_fetcher_libcurl.so
+
+STATIC_LIB = \
+ liblibcurlfetcher.a
+
+CPP_OBJS = \
+ LibCurlFetcher.o \
+ LibCurlRewindInputStream.o
+
+-include $(TOPDIR)/makefiles/gmake/sub.mk
+
+local_all:
+
+local_clean:
+
+local_distclean:
+
+local_install:
+
+local_uninstall:
+
+local_test:
+
diff --git a/src/modules/fetcher/libcurl/LibCurlFetcher.cpp b/src/modules/fetcher/libcurl/LibCurlFetcher.cpp
new file mode 100644
index 0000000..4ace062
--- /dev/null
+++ b/src/modules/fetcher/libcurl/LibCurlFetcher.cpp
@@ -0,0 +1,18 @@
+#include "LibCurlFetcher.hpp"
+#include "LibCurlRewindInputStream.hpp"
+
+LibCurlFetcher::LibCurlFetcher( )
+{
+}
+
+LibCurlFetcher::~LibCurlFetcher( )
+{
+}
+
+RewindInputStream *LibCurlFetcher::fetch( const URL url )
+{
+ LibCurlRewindInputStream *s = new LibCurlRewindInputStream( url );
+ return s;
+}
+
+REGISTER_MODULE( "libcurl_fetcher", Fetcher, LibCurlFetcher )
diff --git a/src/modules/fetcher/libcurl/LibCurlFetcher.hpp b/src/modules/fetcher/libcurl/LibCurlFetcher.hpp
new file mode 100755
index 0000000..ef83d4b
--- /dev/null
+++ b/src/modules/fetcher/libcurl/LibCurlFetcher.hpp
@@ -0,0 +1,19 @@
+#ifndef __LIBCURL_FETCHER_H
+#define __LIBCURL_FETCHER_H
+
+#include "Fetcher.hpp"
+#include "ModuleRegistry.hpp"
+
+class LibCurlFetcher : public Fetcher
+{
+ public:
+ LibCurlFetcher( );
+
+ virtual ~LibCurlFetcher( );
+
+ virtual RewindInputStream *fetch( const URL url );
+};
+
+DECLARE_MODULE( Fetcher )
+
+#endif
diff --git a/src/modules/fetcher/libcurl/LibCurlRewindInputStream.cpp b/src/modules/fetcher/libcurl/LibCurlRewindInputStream.cpp
new file mode 100644
index 0000000..7d75123
--- /dev/null
+++ b/src/modules/fetcher/libcurl/LibCurlRewindInputStream.cpp
@@ -0,0 +1,152 @@
+#include "LibCurlRewindInputStream.hpp"
+#include "SpoolRewindInputStream.hpp"
+
+#include <curlpp/Options.hpp>
+//#include <curlpp/Infos.hpp>
+//#include <curlpp/Types.hpp>
+
+using namespace cURLpp;
+
+using namespace std;
+
+class libcurl_buffer : public spool_streambuf
+{
+ public:
+ explicit libcurl_buffer( Easy *curl, size_t bufSize = 256, size_t putBack = 1, size_t spoolBufSize = 8192 );
+
+ size_t write_data( char *ptr, size_t s, size_t n );
+
+ protected:
+
+ virtual streambuf::int_type readFromSource( );
+
+ private:
+ Easy *m_curl;
+};
+
+libcurl_buffer::libcurl_buffer( Easy *curl, size_t bufSize, size_t putBack, size_t spoolBufSize )
+ : spool_streambuf( bufSize, putBack, spoolBufSize ), m_curl( curl )
+{
+}
+
+size_t libcurl_buffer::write_data( char *ptr, size_t s, size_t n )
+{
+ size_t chunk_size = s * n;
+
+ size_t data_spooled = spoolSourceData( ptr, chunk_size );
+
+ return data_spooled;
+
+}
+
+streambuf::int_type libcurl_buffer::readFromSource( )
+{
+ // done before, we should not get called here!
+
+ return 0;
+}
+
+LibCurlRewindInputStream::LibCurlRewindInputStream( const URL &url )
+ : SpoolRewindInputStream( url ), m_curl( 0 )
+{
+ m_curl = new Easy( );
+
+ m_curl->setOpt( Options::Url( url.str( ).c_str( ) ) );
+
+ // how to use logger here?
+ m_curl->setOpt( Options::Header( true ) );
+ m_curl->setOpt( Options::Verbose( true ) );
+/* m_curl->set( DebugFunction( types::DebugFunctionFunctor( &
+ request.setOpt(Verbose(true));
+ request.setOpt(DebugFunction(curlpp::types::DebugFunctionFunctor(&myWindow,
+ &MyWindow::writeDebug)));
+*/
+ m_buf = new libcurl_buffer( m_curl );
+ rdbuf( m_buf );
+
+ Types::WriteFunctionFunctor functor( static_cast<libcurl_buffer *>( m_buf ), &libcurl_buffer::write_data );
+ Options::WriteFunction *wf = new Options::WriteFunction( functor );
+ m_curl->setOpt( wf );
+
+ m_curl->perform( );
+
+ // TODO: error handling
+}
+
+LibCurlRewindInputStream::~LibCurlRewindInputStream( )
+{
+ if( m_buf ) delete m_buf;
+ if( m_curl ) delete m_curl;
+}
+
+string LibCurlRewindInputStream::lastErrMsg( ) const
+{
+ // TODO: fetch m_curl state and return error
+ return "ERROR";
+}
+
+#if 0
+
+#include <iostream>
+#include <sys/types.h>
+#include <cassert>
+#include <cstring>
+#include <cstdlib>
+
+using namespace std;
+
+class MemoryWriter {
+ static const size_t initial_size = 100;
+
+ protected:
+ char *c;
+ size_t c_size;
+
+ public:
+ MemoryWriter( ) {
+ c = (char *)malloc( initial_size );
+ c_size = 0;
+ }
+
+ ~MemoryWriter( ) {
+ if( c != NULL )
+ free( c );
+ }
+
+ size_t f( char *ptr, size_t s, size_t n ) {
+ size_t part_size = s * n;
+
+ c = (char *)realloc( c, c_size + part_size + 1 );
+
+ assert( c != NULL );
+
+ memcpy( &c[c_size], ptr, part_size );
+ c_size += part_size;
+ c[c_size] = '\0';
+
+ return part_size;
+ }
+
+ char *str( ) {
+ return c;
+ }
+
+ size_t size( ) {
+ return c_size;
+ }
+};
+
+int main( int argc, char *argv[] ) {
+ Easy curl;
+ MemoryWriter w;
+
+
+ cout << cURLpp::Infos::ResponseCode::get( curl ) << endl
+ << w.size( ) << " "
+ << url << endl;
+
+ cout << w.str( ) << endl;
+
+ return EXIT_SUCCESS;
+}
+#endif
diff --git a/src/modules/fetcher/libcurl/LibCurlRewindInputStream.hpp b/src/modules/fetcher/libcurl/LibCurlRewindInputStream.hpp
new file mode 100755
index 0000000..9efefab
--- /dev/null
+++ b/src/modules/fetcher/libcurl/LibCurlRewindInputStream.hpp
@@ -0,0 +1,21 @@
+#ifndef __LIBCURL_REWIND_INPUT_STREAM_H
+#define __LIBCURL_REWIND_INPUT_STREAM_H
+
+#include "SpoolRewindInputStream.hpp"
+
+#include <curlpp/cURLpp.hpp>
+#include <curlpp/Easy.hpp>
+
+class LibCurlRewindInputStream : public SpoolRewindInputStream
+{
+ public:
+ LibCurlRewindInputStream( const URL &url );
+ virtual ~LibCurlRewindInputStream( );
+
+ virtual std::string lastErrMsg( ) const;
+
+ private:
+ cURLpp::Easy *m_curl;
+};
+
+#endif