summaryrefslogtreecommitdiff
path: root/tests
diff options
context:
space:
mode:
authorAndreas Baumann <abaumann@yahoo.com>2014-06-15 12:56:27 +0200
committerAndreas Baumann <abaumann@yahoo.com>2014-06-15 12:56:27 +0200
commit967bf4c00d2342137fec917354f40241a9230c7a (patch)
treebad68ba541a9c89a013caafeb3b06b8ad659e531 /tests
parentd86b7653915ea006a329b290211a5972c6bbcc70 (diff)
downloadcrawler-967bf4c00d2342137fec917354f40241a9230c7a.tar.gz
crawler-967bf4c00d2342137fec917354f40241a9230c7a.tar.bz2
added "xpath"-like selection test for textwolf
Diffstat (limited to 'tests')
-rw-r--r--tests/textwolf/GNUmakefile6
-rw-r--r--tests/textwolf/test.xml (renamed from tests/textwolf/test1.xml)0
-rw-r--r--tests/textwolf/test1.cpp36
-rw-r--r--tests/textwolf/test2.cpp51
4 files changed, 73 insertions, 20 deletions
diff --git a/tests/textwolf/GNUmakefile b/tests/textwolf/GNUmakefile
index d6601a5..3324619 100644
--- a/tests/textwolf/GNUmakefile
+++ b/tests/textwolf/GNUmakefile
@@ -10,7 +10,8 @@ INCLUDE_LDFLAGS = \
INCLUDE_LIBS = \
TEST_CPP_BINS = \
- test1$(EXE)
+ test1$(EXE) \
+ test2$(EXE)
OBJS =
@@ -23,4 +24,5 @@ local_clean:
local_distclean:
local_test:
- @./test1 test1.xml
+ @./test1 test.xml
+ @./test2 test.xml
diff --git a/tests/textwolf/test1.xml b/tests/textwolf/test.xml
index 07575e2..07575e2 100644
--- a/tests/textwolf/test1.xml
+++ b/tests/textwolf/test.xml
diff --git a/tests/textwolf/test1.cpp b/tests/textwolf/test1.cpp
index 1d6a385..ea0b985 100644
--- a/tests/textwolf/test1.cpp
+++ b/tests/textwolf/test1.cpp
@@ -24,37 +24,37 @@ int main( int argc, char *argv[] )
IStreamIterator isitr( xmlFile );
- typedef XMLScanner<IStreamIterator, charset::UTF8, charset::UTF8, std::string> Scan;
- Scan xs( isitr );
+ typedef XMLScanner<IStreamIterator, charset::UTF8, charset::UTF8, std::string> Scanner;
+ Scanner xs( isitr );
std::string currentTag;
- for( Scan::iterator itr = xs.begin( ); itr != xs.end( ); itr++ ) {
+ for( Scanner::iterator itr = xs.begin( ); itr != xs.end( ); itr++ ) {
switch( itr->type( ) ) {
- case Scan::OpenTag:
+ case Scanner::OpenTag:
currentTag = itr->content( );
break;
- case Scan::Content:
+ case Scanner::Content:
if( currentTag == "loc" ) {
cout << itr->content( ) << endl;
}
break;
- case Scan::CloseTag:
+ case Scanner::CloseTag:
currentTag.clear( );
break;
- case Scan::None:
- case Scan::ErrorOccurred:
- case Scan::HeaderStart:
- case Scan::HeaderAttribName:
- case Scan::HeaderAttribValue:
- case Scan::HeaderEnd:
- case Scan::DocAttribValue:
- case Scan::DocAttribEnd:
- case Scan::TagAttribName:
- case Scan::TagAttribValue:
- case Scan::CloseTagIm:
- case Scan::Exit:
+ case Scanner::None:
+ case Scanner::ErrorOccurred:
+ case Scanner::HeaderStart:
+ case Scanner::HeaderAttribName:
+ case Scanner::HeaderAttribValue:
+ case Scanner::HeaderEnd:
+ case Scanner::DocAttribValue:
+ case Scanner::DocAttribEnd:
+ case Scanner::TagAttribName:
+ case Scanner::TagAttribValue:
+ case Scanner::CloseTagIm:
+ case Scanner::Exit:
default:
break;
}
diff --git a/tests/textwolf/test2.cpp b/tests/textwolf/test2.cpp
new file mode 100644
index 0000000..79d1a6d
--- /dev/null
+++ b/tests/textwolf/test2.cpp
@@ -0,0 +1,51 @@
+#include "textwolf.hpp"
+#include "textwolf/istreamiterator.hpp"
+
+#include <iostream>
+#include <fstream>
+
+using namespace std;
+using namespace textwolf;
+
+int main( int argc, char *argv[] )
+{
+ if( argc != 2 ) {
+ cerr << "Usage: test1 <XML file>\n" << endl;
+ return 1;
+ }
+
+ char *xmlFileName = argv[1];
+
+ ifstream xmlFile( xmlFileName );
+ if( !xmlFile.good( ) ) {
+ cerr << "ERROR: Can't open file '" << xmlFileName << "'" << endl;
+ return 1;
+ }
+
+ IStreamIterator isitr( xmlFile );
+
+ typedef enum {
+ SITEMAP_LOC = 1
+ } XmlNodes;
+ typedef XMLPathSelectAutomaton<charset::UTF8> Automaton;
+ Automaton atm;
+ // //sitemap/loc content
+ (*atm)--["sitemap"]["loc"]( ) = SITEMAP_LOC;
+
+ typedef XMLPathSelect<charset::UTF8> PathSelect;
+ typedef XMLScanner<IStreamIterator, charset::UTF8, charset::UTF8, std::string> Scanner;
+ Scanner xsc( isitr );
+ PathSelect xsel( &atm );
+
+ Scanner::iterator ci, ce;
+ for( ci = xsc.begin( ), ce = xsc.end( ); ci != ce; ci++ ) {
+ PathSelect::iterator itr = xsel.push( ci->type( ), ci->content( ), ci->size( ) );
+ PathSelect::iterator end = xsel.end( );
+ for( ; itr != end; itr++ ) {
+ if( *itr == SITEMAP_LOC ) {
+ cout << ci->content( ) << endl;
+ }
+ }
+ }
+}
+