diff options
-rw-r--r-- | src/GNUmakefile | 16 | ||||
-rw-r--r-- | src/SimpleURLNormalizer.cpp | 14 | ||||
-rw-r--r-- | src/SimpleURLNormalizer.hpp | 13 | ||||
-rw-r--r-- | src/URL.hpp | 12 | ||||
-rw-r--r-- | src/URLNormalizer.hpp | 13 | ||||
-rw-r--r-- | tests/url/GNUmakefile | 10 | ||||
-rw-r--r-- | tests/url/test1.MUST | 6 | ||||
-rw-r--r-- | tests/url/test1.cpp | 3 | ||||
-rw-r--r-- | tests/url/test2.MUST | 6 | ||||
-rw-r--r-- | tests/url/test2.cpp | 40 |
10 files changed, 125 insertions, 8 deletions
diff --git a/src/GNUmakefile b/src/GNUmakefile index 816d6be..3d3d7b8 100644 --- a/src/GNUmakefile +++ b/src/GNUmakefile @@ -26,7 +26,7 @@ INCLUDE_LIBS += \ $(OPENSSL_LIBS) endif -CPP_OBJS = \ +LOCAL_STATIC_LIB_OBJS = \ URL.o \ LibFetchFetcher.o \ LibFetchRewindInputStream.o \ @@ -35,14 +35,24 @@ CPP_OBJS = \ ProtocolURLFilter.o \ DomainURLFilter.o \ ChainURLFilter.o \ - MemoryURLSeen.o + MemoryURLSeen.o \ + SimpleURLNormalizer.o + +CPP_OBJS = \ + $(LOCAL_STATIC_LIB_OBJS) + +LOCAL_STATIC_LIB = \ + libcrawlingwolf.a CPP_BINS = \ crawlingwolf$(EXE) -include $(TOPDIR)/makefiles/gmake/sub.mk -local_all: +local_all: $(LOCAL_STATIC_LIB) + +$(LOCAL_STATIC_LIB): $(LOCAL_STATIC_LIB_OBJS) + ar rcs $(LOCAL_STATIC_LIB) $(LOCAL_STATIC_LIB_OBJS) local_clean: diff --git a/src/SimpleURLNormalizer.cpp b/src/SimpleURLNormalizer.cpp new file mode 100644 index 0000000..21c34ae --- /dev/null +++ b/src/SimpleURLNormalizer.cpp @@ -0,0 +1,14 @@ +#include "SimpleURLNormalizer.hpp" + +SimpleURLNormalizer::SimpleURLNormalizer( ) +{ +} + +bool SimpleURLNormalizer::normalize( URL &url, const URL contextUrl ) +{ + (void)url; + (void)contextUrl; + + return true; +} + diff --git a/src/SimpleURLNormalizer.hpp b/src/SimpleURLNormalizer.hpp new file mode 100644 index 0000000..433f4e8 --- /dev/null +++ b/src/SimpleURLNormalizer.hpp @@ -0,0 +1,13 @@ +#ifndef __SIMPLEURLNORMALIZER_H +#define __SIMPLEURLNORMALIZER_H + +#include "URLNormalizer.hpp" + +class SimpleURLNormalizer : public URLNormalizer { + public: + SimpleURLNormalizer( ); + + bool normalize( URL &url, const URL contextUrl ); +}; + +#endif diff --git a/src/URL.hpp b/src/URL.hpp index 0d1b113..fac0074 100644 --- a/src/URL.hpp +++ b/src/URL.hpp @@ -7,6 +7,9 @@ using namespace std; class URL { + protected: + string m_url; + public: URL( ) : m_url( "" ) { @@ -28,7 +31,7 @@ class URL { { } - string str( ) const + std::string str( ) const { return m_url; } @@ -54,6 +57,11 @@ class URL { return "/"; } + std::string fragment( ) const + { + return ""; + } + static URL Null; bool operator!=( const URL &other ) const { @@ -67,8 +75,6 @@ class URL { template< typename CharT, typename TraitsT > friend basic_ostream<CharT, TraitsT>& operator<<( basic_ostream<CharT, TraitsT>&s, const URL& u ); - protected: - string m_url; }; template< typename CharT, typename TraitsT > diff --git a/src/URLNormalizer.hpp b/src/URLNormalizer.hpp new file mode 100644 index 0000000..a1f6abf --- /dev/null +++ b/src/URLNormalizer.hpp @@ -0,0 +1,13 @@ +#ifndef __URLNORMALIZER_H +#define __URLNORMALIZER_H + +#include "URL.hpp" + +class URLNormalizer { + public: + virtual ~URLNormalizer( ) { }; + + virtual bool normalize( URL &url, const URL contextUrl ) = 0; +}; + +#endif diff --git a/tests/url/GNUmakefile b/tests/url/GNUmakefile index e3913bd..81d7a0e 100644 --- a/tests/url/GNUmakefile +++ b/tests/url/GNUmakefile @@ -7,10 +7,12 @@ INCLUDE_DIRS = \ INCLUDE_LDFLAGS = -INCLUDE_LIBS = +INCLUDE_LIBS = \ + $(TOPDIR)/src/libcrawlingwolf.a TEST_CPP_BINS = \ - test1$(EXE) + test1$(EXE) \ + test2$(EXE) OBJS = @@ -19,7 +21,11 @@ OBJS = local_all: local_clean: + -@rm -f *.db *.db-journal 2>/dev/null + -@rm -f *.RES *.DIFF local_distclean: local_test: + @-./exec_test test1 "output normal URL" http://www.andreasbaumann.cc/index.html + @-./exec_test test2 "normalize a relative URL" http://www.andreasbaumann.cc/index.html /software.html diff --git a/tests/url/test1.MUST b/tests/url/test1.MUST new file mode 100644 index 0000000..38a3a27 --- /dev/null +++ b/tests/url/test1.MUST @@ -0,0 +1,6 @@ +protocol: http +port: 80 +domain: www.andreasbaumann.cc +path: /index.html +fragment: + diff --git a/tests/url/test1.cpp b/tests/url/test1.cpp index 264bef0..64ae223 100644 --- a/tests/url/test1.cpp +++ b/tests/url/test1.cpp @@ -17,7 +17,10 @@ int main( int argc, char *argv[] ) URL url( urlstring ); cout << "protocol: " << url.protocol( ) << endl + << "port: " << url.port( ) << endl << "domain: " << url.domain( ) << endl + << "path: " << url.path( ) << endl + << "fragment: " << url.fragment( ) << endl << endl; return 0; diff --git a/tests/url/test2.MUST b/tests/url/test2.MUST new file mode 100644 index 0000000..4718213 --- /dev/null +++ b/tests/url/test2.MUST @@ -0,0 +1,6 @@ +protocol: http +port: 80 +domain: www.andreasbaumann.cc +path: /software.html +fragment: + diff --git a/tests/url/test2.cpp b/tests/url/test2.cpp new file mode 100644 index 0000000..b9a3270 --- /dev/null +++ b/tests/url/test2.cpp @@ -0,0 +1,40 @@ +#include "URL.hpp" +#include "SimpleURLNormalizer.hpp" + +#include <iostream> +#include <string> + +using namespace std; + +int main( int argc, char *argv[] ) +{ + if( argc != 3 ) { + cerr << "usage: test2 <base url> <partial url>\n" << endl; + return 1; + } + + char *baseUrlString = argv[1]; + char *partialUrlString = argv[2]; + + URL baseUrl( baseUrlString ); + URL partialUrl( partialUrlString ); + + URL url = baseUrl; + URLNormalizer *normalizer = new SimpleURLNormalizer( ); + if( !normalizer->normalize( url, partialUrl ) ) { + delete normalizer; + cerr << "Normalization error!\n" << endl; + return 1; + } + + cout << "protocol: " << url.protocol( ) << endl + << "port: " << url.port( ) << endl + << "domain: " << url.domain( ) << endl + << "path: " << url.path( ) << endl + << "fragment: " << url.fragment( ) << endl + << endl; + + delete normalizer; + + return 0; +} |