summaryrefslogtreecommitdiff
path: root/tests/streamhtmlparser
diff options
context:
space:
mode:
authorAndreas Baumann <abaumann@yahoo.com>2012-07-14 17:16:21 +0200
committerAndreas Baumann <abaumann@yahoo.com>2012-07-14 17:16:21 +0200
commit54cce110784d33d658b5f78286a98bee244a9eeb (patch)
tree9c4d998343e7dc88323ae8ef6d5a04c6b958df9c /tests/streamhtmlparser
parentfcb682cb1955d362390665330fdf476cab7dc10b (diff)
downloadcrawler-54cce110784d33d658b5f78286a98bee244a9eeb.tar.gz
crawler-54cce110784d33d658b5f78286a98bee244a9eeb.tar.bz2
added streamhtmlparser
Diffstat (limited to 'tests/streamhtmlparser')
-rw-r--r--tests/streamhtmlparser/GNUmakefile26
-rw-r--r--tests/streamhtmlparser/test1.cpp74
-rw-r--r--tests/streamhtmlparser/test1.html55
3 files changed, 155 insertions, 0 deletions
diff --git a/tests/streamhtmlparser/GNUmakefile b/tests/streamhtmlparser/GNUmakefile
new file mode 100644
index 0000000..0fad0a5
--- /dev/null
+++ b/tests/streamhtmlparser/GNUmakefile
@@ -0,0 +1,26 @@
+TOPDIR = ../..
+
+SUBDIRS =
+
+INCLUDE_DIRS = \
+ -I$(TOPDIR)/streamhtmlparser/include
+
+INCLUDE_LDFLAGS =
+
+INCLUDE_LIBS = \
+ $(TOPDIR)/streamhtmlparser/libstreamhtmlparser.a
+
+TEST_CPP_BINS = \
+ test1$(EXE)
+
+OBJS =
+
+-include $(TOPDIR)/makefiles/gmake/sub.mk
+
+local_all:
+
+local_clean:
+
+local_distclean:
+
+local_test:
diff --git a/tests/streamhtmlparser/test1.cpp b/tests/streamhtmlparser/test1.cpp
new file mode 100644
index 0000000..2e13c5c
--- /dev/null
+++ b/tests/streamhtmlparser/test1.cpp
@@ -0,0 +1,74 @@
+#include "htmlparser_cpp.h"
+
+#include <iostream>
+#include <fstream>
+
+using namespace std;
+using namespace streamhtmlparser;
+
+int main( int argc, char *argv[] )
+{
+ if( argc != 2 ) {
+ cerr << "Usage: test1 <HTML file>\n" << endl;
+ return 1;
+ }
+
+ char *htmlFileName = argv[1];
+
+ HtmlParser parser;
+
+ ifstream htmlFile( htmlFileName );
+ if( !htmlFile.good( ) ) {
+ cerr << "ERROR: Can't open file '" << htmlFileName << "'" << endl;
+ return 1;
+ }
+
+ char buf[1] = {0};
+ while( htmlFile.good( ) && !htmlFile.eof( ) ) {
+ buf[0] = htmlFile.get( );
+ parser.Parse( buf, 1 );
+ int state = parser.state( );
+ switch( state ) {
+ case HtmlParser::STATE_TEXT:
+ cout << "TEXT '" << buf[0] << "'"
+ << " '" << ( ( parser.tag( ) != NULL ) ? parser.tag( ) : "NULL" ) << "'"
+ << " '" << ( ( parser.attribute( ) != NULL ) ? parser.attribute( ) : "NULL" ) << "'"
+ << " '" << ( ( parser.value( ) != NULL ) ? parser.value( ) : "NULL" ) << "'"
+ << endl;
+ break;
+
+ case HtmlParser::STATE_TAG:
+ cout << "TAG"
+ << " '" << ( ( parser.tag( ) != NULL ) ? parser.tag( ) : "NULL" ) << "'"
+ << " '" << ( ( parser.attribute( ) != NULL ) ? parser.attribute( ) : "NULL" ) << "'"
+ << " '" << ( ( parser.value( ) != NULL ) ? parser.value( ) : "NULL" ) << "'"
+ << endl;
+ break;
+
+ case HtmlParser::STATE_ATTR:
+ cout << "ATTR"
+ << " '" << ( ( parser.tag( ) != NULL ) ? parser.tag( ) : "NULL" ) << "'"
+ << " '" << ( ( parser.attribute( ) != NULL ) ? parser.attribute( ) : "NULL" ) << "'"
+ << " '" << ( ( parser.value( ) != NULL ) ? parser.value( ) : "NULL" ) << "'"
+ << endl;
+ break;
+
+ case HtmlParser::STATE_VALUE:
+ cout << "VALUE"
+ << " '" << ( ( parser.tag( ) != NULL ) ? parser.tag( ) : "NULL" ) << "'"
+ << " '" << ( ( parser.attribute( ) != NULL ) ? parser.attribute( ) : "NULL" ) << "'"
+ << " '" << ( ( parser.value( ) != NULL ) ? parser.value( ) : "NULL" ) << "'"
+ << endl;
+ break;
+
+ case HtmlParser::STATE_ERROR:
+ cerr << endl << "ERROR at " << endl;
+ return 1;
+
+ default:
+ cout << "UNKNOWN state " << state << endl;
+ }
+ }
+
+ return 0;
+}
diff --git a/tests/streamhtmlparser/test1.html b/tests/streamhtmlparser/test1.html
new file mode 100644
index 0000000..27d9184
--- /dev/null
+++ b/tests/streamhtmlparser/test1.html
@@ -0,0 +1,55 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
+ "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
+<head>
+ <title>Andreas Baumann's Personal Home Page</title>
+ <meta http-equiv="content-type" content="text/html; charset=iso-8859-1"/>
+ <meta name="author" content="Andreas Baumann"/>
+ <link rel="stylesheet" href="css/style.css" type="text/css" media="screen"/>
+</head>
+
+<body>
+ <div id="container">
+
+ <div id="sitename">
+ <h1>Andreas Baumann's Personal Home Page</h1>
+ </div>
+
+ <div id="mainmenu">
+ <ul>
+ <li><a href="/">Home</a></li>
+ <li><a href="/software.shtml">Software</a></li>
+ <li><a href="/toolbox.shtml">Toolbox</a></li>
+ </ul>
+</div>
+
+ <div id="wrap">
+
+
+
+<div id="leftside">
+</div>
+
+
+<div id="content">
+
+<h1>Andreas Baumann's Personal Home Page</h1>
+
+<p>Written in plain HTML with some server-side includes. Synchronization is done
+ with rsync over ssh. If you ask why, well, the two wikis I had before
+ (I don't mention names) were hacked in no time.. :-)
+</p>
+
+</div> <!-- end content -->
+<div class="clearingdiv">&nbsp;</div>
+</div> <!-- end wrap -->
+</div> <!-- end container -->
+
+<div id="footer">
+ &copy; 2009 - 2012, Zurich, Andreas Baumann
+</div>
+
+</body>
+
+</html>
+