summaryrefslogtreecommitdiff
path: root/tests/streamhtmlparser
diff options
context:
space:
mode:
authorAndreas Baumann <abaumann@yahoo.com>2012-07-14 20:19:16 +0200
committerAndreas Baumann <abaumann@yahoo.com>2012-07-14 20:19:16 +0200
commit3c903a9a1784edc375119f1db7992e94765c0fbf (patch)
tree27ff1afc9b38a5cb7a037e7e9ef36bdd9903116e /tests/streamhtmlparser
parent54cce110784d33d658b5f78286a98bee244a9eeb (diff)
downloadcrawler-3c903a9a1784edc375119f1db7992e94765c0fbf.tar.gz
crawler-3c903a9a1784edc375119f1db7992e94765c0fbf.tar.bz2
added a second link parser test
Diffstat (limited to 'tests/streamhtmlparser')
-rw-r--r--tests/streamhtmlparser/GNUmakefile3
-rw-r--r--tests/streamhtmlparser/test2.cpp51
2 files changed, 53 insertions, 1 deletions
diff --git a/tests/streamhtmlparser/GNUmakefile b/tests/streamhtmlparser/GNUmakefile
index 0fad0a5..de0c9a9 100644
--- a/tests/streamhtmlparser/GNUmakefile
+++ b/tests/streamhtmlparser/GNUmakefile
@@ -11,7 +11,8 @@ INCLUDE_LIBS = \
$(TOPDIR)/streamhtmlparser/libstreamhtmlparser.a
TEST_CPP_BINS = \
- test1$(EXE)
+ test1$(EXE) \
+ test2$(EXE)
OBJS =
diff --git a/tests/streamhtmlparser/test2.cpp b/tests/streamhtmlparser/test2.cpp
new file mode 100644
index 0000000..3c41cae
--- /dev/null
+++ b/tests/streamhtmlparser/test2.cpp
@@ -0,0 +1,51 @@
+#include "htmlparser_cpp.h"
+
+#include <iostream>
+#include <fstream>
+#include <cstring>
+
+using namespace std;
+using namespace streamhtmlparser;
+
+int main( int argc, char *argv[] )
+{
+ if( argc != 2 ) {
+ cerr << "Usage: test2 <HTML file>\n" << endl;
+ return 1;
+ }
+
+ char *htmlFileName = argv[1];
+
+ HtmlParser parser;
+
+ ifstream htmlFile( htmlFileName );
+ if( !htmlFile.good( ) ) {
+ cerr << "ERROR: Can't open file '" << htmlFileName << "'" << endl;
+ return 1;
+ }
+
+ string link;
+ char buf[1] = {0};
+ bool in_link = false;
+
+ while( htmlFile.good( ) && !htmlFile.eof( ) ) {
+ buf[0] = htmlFile.get( );
+ parser.Parse( buf, 1 );
+
+ if( parser.state( ) == HtmlParser::STATE_VALUE && parser.tag( ) != NULL && parser.attribute( ) != NULL && parser.value( ) != NULL ) {
+ if( strcmp( parser.tag( ), "a" ) == 0 && strcmp( parser.attribute( ), "href" ) == 0 ) {
+ link = parser.value( );
+ in_link = true;
+ }
+ } else if( in_link && parser.state( ) == HtmlParser::STATE_TAG ) {
+ cout << link << endl;
+ link.clear( );
+ in_link = false;
+ } else if( parser.state( ) == HtmlParser::STATE_ERROR ) {
+ cerr << endl << "ERROR at " << endl;
+ return 1;
+ }
+ }
+
+ return 0;
+}