diff options
author | Andreas Baumann <abaumann@yahoo.com> | 2012-07-14 20:19:16 +0200 |
---|---|---|
committer | Andreas Baumann <abaumann@yahoo.com> | 2012-07-14 20:19:16 +0200 |
commit | 3c903a9a1784edc375119f1db7992e94765c0fbf (patch) | |
tree | 27ff1afc9b38a5cb7a037e7e9ef36bdd9903116e /tests/streamhtmlparser | |
parent | 54cce110784d33d658b5f78286a98bee244a9eeb (diff) | |
download | crawler-3c903a9a1784edc375119f1db7992e94765c0fbf.tar.gz crawler-3c903a9a1784edc375119f1db7992e94765c0fbf.tar.bz2 |
added a second link parser test
Diffstat (limited to 'tests/streamhtmlparser')
-rw-r--r-- | tests/streamhtmlparser/GNUmakefile | 3 | ||||
-rw-r--r-- | tests/streamhtmlparser/test2.cpp | 51 |
2 files changed, 53 insertions, 1 deletions
diff --git a/tests/streamhtmlparser/GNUmakefile b/tests/streamhtmlparser/GNUmakefile index 0fad0a5..de0c9a9 100644 --- a/tests/streamhtmlparser/GNUmakefile +++ b/tests/streamhtmlparser/GNUmakefile @@ -11,7 +11,8 @@ INCLUDE_LIBS = \ $(TOPDIR)/streamhtmlparser/libstreamhtmlparser.a TEST_CPP_BINS = \ - test1$(EXE) + test1$(EXE) \ + test2$(EXE) OBJS = diff --git a/tests/streamhtmlparser/test2.cpp b/tests/streamhtmlparser/test2.cpp new file mode 100644 index 0000000..3c41cae --- /dev/null +++ b/tests/streamhtmlparser/test2.cpp @@ -0,0 +1,51 @@ +#include "htmlparser_cpp.h" + +#include <iostream> +#include <fstream> +#include <cstring> + +using namespace std; +using namespace streamhtmlparser; + +int main( int argc, char *argv[] ) +{ + if( argc != 2 ) { + cerr << "Usage: test2 <HTML file>\n" << endl; + return 1; + } + + char *htmlFileName = argv[1]; + + HtmlParser parser; + + ifstream htmlFile( htmlFileName ); + if( !htmlFile.good( ) ) { + cerr << "ERROR: Can't open file '" << htmlFileName << "'" << endl; + return 1; + } + + string link; + char buf[1] = {0}; + bool in_link = false; + + while( htmlFile.good( ) && !htmlFile.eof( ) ) { + buf[0] = htmlFile.get( ); + parser.Parse( buf, 1 ); + + if( parser.state( ) == HtmlParser::STATE_VALUE && parser.tag( ) != NULL && parser.attribute( ) != NULL && parser.value( ) != NULL ) { + if( strcmp( parser.tag( ), "a" ) == 0 && strcmp( parser.attribute( ), "href" ) == 0 ) { + link = parser.value( ); + in_link = true; + } + } else if( in_link && parser.state( ) == HtmlParser::STATE_TAG ) { + cout << link << endl; + link.clear( ); + in_link = false; + } else if( parser.state( ) == HtmlParser::STATE_ERROR ) { + cerr << endl << "ERROR at " << endl; + return 1; + } + } + + return 0; +} |