diff options
author | Andreas Baumann <abaumann@yahoo.com> | 2014-06-14 20:15:59 +0200 |
---|---|---|
committer | Andreas Baumann <abaumann@yahoo.com> | 2014-06-14 20:15:59 +0200 |
commit | 913e4215f22e16ad90a30b7e68e8cd2165c6812d (patch) | |
tree | d7aef8f6e7b29895f1b0160cb647e5427181198e /tests | |
parent | 4f6d08ce39cc430ed7ba90d143bf7af3fc8ca6d5 (diff) | |
download | crawler-913e4215f22e16ad90a30b7e68e8cd2165c6812d.tar.gz crawler-913e4215f22e16ad90a30b7e68e8cd2165c6812d.tar.bz2 |
added textwolf and a test for it
Diffstat (limited to 'tests')
-rw-r--r-- | tests/GNUmakefile | 2 | ||||
-rw-r--r-- | tests/Makefile.W32 | 2 | ||||
-rw-r--r-- | tests/textwolf/GNUmakefile | 26 | ||||
-rwxr-xr-x | tests/textwolf/Makefile.W32 | 33 | ||||
-rw-r--r-- | tests/textwolf/test1.cpp | 59 | ||||
-rw-r--r-- | tests/textwolf/test1.xml | 27 |
6 files changed, 147 insertions, 2 deletions
diff --git a/tests/GNUmakefile b/tests/GNUmakefile index 46fe18b..a21e409 100644 --- a/tests/GNUmakefile +++ b/tests/GNUmakefile @@ -2,7 +2,7 @@ TOPDIR = .. SUBDIRS = \ utils logger modules url streamhtmlparser libfetch curl psql sqlite typedetect \ - fetcher + fetcher textwolf -include $(TOPDIR)/makefiles/gmake/sub.mk diff --git a/tests/Makefile.W32 b/tests/Makefile.W32 index f5c828b..ff7735a 100644 --- a/tests/Makefile.W32 +++ b/tests/Makefile.W32 @@ -2,7 +2,7 @@ TOPDIR = .. SUBDIRS = \ utils logger modules winhttp url streamhtmlparser \ - fetcher + fetcher textwolf !INCLUDE $(TOPDIR)\makefiles\nmake\sub.mk diff --git a/tests/textwolf/GNUmakefile b/tests/textwolf/GNUmakefile new file mode 100644 index 0000000..d6601a5 --- /dev/null +++ b/tests/textwolf/GNUmakefile @@ -0,0 +1,26 @@ +TOPDIR = ../.. + +SUBDIRS = + +INCLUDE_DIRS = \ + -I$(TOPDIR)/textwolf/include + +INCLUDE_LDFLAGS = \ + +INCLUDE_LIBS = \ + +TEST_CPP_BINS = \ + test1$(EXE) + +OBJS = + +-include $(TOPDIR)/makefiles/gmake/sub.mk + +local_all: + +local_clean: + +local_distclean: + +local_test: + @./test1 test1.xml diff --git a/tests/textwolf/Makefile.W32 b/tests/textwolf/Makefile.W32 new file mode 100755 index 0000000..39c436e --- /dev/null +++ b/tests/textwolf/Makefile.W32 @@ -0,0 +1,33 @@ +TOPDIR = ..\.. + +SUBDIRS = + +!INCLUDE $(TOPDIR)\makefiles\nmake\platform.mk + +INCLUDE_CXXFLAGS = \ + /D_WIN32_WINNT=0x504 + +INCLUDE_DIRS = \ + /I. \ + /I$(TOPDIR)\textwolf\include + +INCLUDE_LDFLAGS = \ + +INCLUDE_LIBS = \ + +TEST_CPP_BINS = \ + test1.exe + +OBJS = + +!INCLUDE $(TOPDIR)\makefiles\nmake\sub.mk + +test1.exe: test1.obj + +local_all: + +local_clean: + +local_distclean: + +local_test: diff --git a/tests/textwolf/test1.cpp b/tests/textwolf/test1.cpp new file mode 100644 index 0000000..42c58b3 --- /dev/null +++ b/tests/textwolf/test1.cpp @@ -0,0 +1,59 @@ +#include "textwolf.hpp" +#include "textwolf/istreamiterator.hpp" + +#include <iostream> +#include <fstream> + +using namespace std; +using namespace textwolf; + +int main( int argc, char *argv[] ) +{ + if( argc != 2 ) { + cerr << "Usage: test1 <XML file>\n" << endl; + return 1; + } + + char *xmlFileName = argv[1]; + + ifstream xmlFile( xmlFileName ); + if( !xmlFile.good( ) ) { + cerr << "ERROR: Can't open file '" << xmlFileName << "'" << endl; + return 1; + } + + IStreamIterator isitr( xmlFile ); + + typedef XMLScanner<IStreamIterator, charset::UTF8, charset::UTF8, std::string> Scan; + Scan xs( isitr ); + std::string currentTag; + for( Scan::iterator itr = xs.begin( ); itr != xs.end( ); itr++ ) { + switch( itr->type( ) ) { + case Scan::OpenTag: + currentTag = itr->content( ); + break; + + case Scan::Content: + if( currentTag == "loc" ) { + cout << itr->content( ) << endl; + } + break; + + case Scan::None: + case Scan::ErrorOccurred: + case Scan::HeaderStart: + case Scan::HeaderAttribName: + case Scan::HeaderAttribValue: + case Scan::HeaderEnd: + case Scan::DocAttribValue: + case Scan::DocAttribEnd: + case Scan::TagAttribName: + case Scan::TagAttribValue: + case Scan::CloseTagIm: + case Scan::CloseTag: + case Scan::Exit: + default: + break; + } + } +} diff --git a/tests/textwolf/test1.xml b/tests/textwolf/test1.xml new file mode 100644 index 0000000..07575e2 --- /dev/null +++ b/tests/textwolf/test1.xml @@ -0,0 +1,27 @@ +<?xml version="1.0" encoding="UTF-8"?> +<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"> + <sitemap> + <loc>http://relevancy.bger.ch/sitemaps/sitemap_atf_fr.xml</loc> + <lastmod>2014-06-14T10:10:03Z</lastmod> + </sitemap> + <sitemap> + <loc>http://relevancy.bger.ch/sitemaps/sitemap_cedh_fr.xml</loc> + <lastmod>2014-06-14T10:10:01Z</lastmod> + </sitemap> + <sitemap> + <loc>http://relevancy.bger.ch/sitemaps/sitemap_atf_it.xml</loc> + <lastmod>2014-06-14T10:10:04Z</lastmod> + </sitemap> + <sitemap> + <loc>http://relevancy.bger.ch/sitemaps/sitemap_cedh_it.xml</loc> + <lastmod>2014-06-14T10:10:01Z</lastmod> + </sitemap> + <sitemap> + <loc>http://relevancy.bger.ch/sitemaps/sitemap_cedh_de.xml</loc> + <lastmod>2014-06-14T10:10:01Z</lastmod> + </sitemap> + <sitemap> + <loc>http://relevancy.bger.ch/sitemaps/sitemap_atf_de.xml</loc> + <lastmod>2014-06-14T10:10:02Z</lastmod> + </sitemap> +</sitemapindex> |