summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/GNUmakefile16
-rw-r--r--src/SimpleURLNormalizer.cpp14
-rw-r--r--src/SimpleURLNormalizer.hpp13
-rw-r--r--src/URL.hpp12
-rw-r--r--src/URLNormalizer.hpp13
-rw-r--r--tests/url/GNUmakefile10
-rw-r--r--tests/url/test1.MUST6
-rw-r--r--tests/url/test1.cpp3
-rw-r--r--tests/url/test2.MUST6
-rw-r--r--tests/url/test2.cpp40
10 files changed, 125 insertions, 8 deletions
diff --git a/src/GNUmakefile b/src/GNUmakefile
index 816d6be..3d3d7b8 100644
--- a/src/GNUmakefile
+++ b/src/GNUmakefile
@@ -26,7 +26,7 @@ INCLUDE_LIBS += \
$(OPENSSL_LIBS)
endif
-CPP_OBJS = \
+LOCAL_STATIC_LIB_OBJS = \
URL.o \
LibFetchFetcher.o \
LibFetchRewindInputStream.o \
@@ -35,14 +35,24 @@ CPP_OBJS = \
ProtocolURLFilter.o \
DomainURLFilter.o \
ChainURLFilter.o \
- MemoryURLSeen.o
+ MemoryURLSeen.o \
+ SimpleURLNormalizer.o
+
+CPP_OBJS = \
+ $(LOCAL_STATIC_LIB_OBJS)
+
+LOCAL_STATIC_LIB = \
+ libcrawlingwolf.a
CPP_BINS = \
crawlingwolf$(EXE)
-include $(TOPDIR)/makefiles/gmake/sub.mk
-local_all:
+local_all: $(LOCAL_STATIC_LIB)
+
+$(LOCAL_STATIC_LIB): $(LOCAL_STATIC_LIB_OBJS)
+ ar rcs $(LOCAL_STATIC_LIB) $(LOCAL_STATIC_LIB_OBJS)
local_clean:
diff --git a/src/SimpleURLNormalizer.cpp b/src/SimpleURLNormalizer.cpp
new file mode 100644
index 0000000..21c34ae
--- /dev/null
+++ b/src/SimpleURLNormalizer.cpp
@@ -0,0 +1,14 @@
+#include "SimpleURLNormalizer.hpp"
+
+SimpleURLNormalizer::SimpleURLNormalizer( )
+{
+}
+
+bool SimpleURLNormalizer::normalize( URL &url, const URL contextUrl )
+{
+ (void)url;
+ (void)contextUrl;
+
+ return true;
+}
+
diff --git a/src/SimpleURLNormalizer.hpp b/src/SimpleURLNormalizer.hpp
new file mode 100644
index 0000000..433f4e8
--- /dev/null
+++ b/src/SimpleURLNormalizer.hpp
@@ -0,0 +1,13 @@
+#ifndef __SIMPLEURLNORMALIZER_H
+#define __SIMPLEURLNORMALIZER_H
+
+#include "URLNormalizer.hpp"
+
+class SimpleURLNormalizer : public URLNormalizer {
+ public:
+ SimpleURLNormalizer( );
+
+ bool normalize( URL &url, const URL contextUrl );
+};
+
+#endif
diff --git a/src/URL.hpp b/src/URL.hpp
index 0d1b113..fac0074 100644
--- a/src/URL.hpp
+++ b/src/URL.hpp
@@ -7,6 +7,9 @@
using namespace std;
class URL {
+ protected:
+ string m_url;
+
public:
URL( )
: m_url( "" ) {
@@ -28,7 +31,7 @@ class URL {
{
}
- string str( ) const
+ std::string str( ) const
{
return m_url;
}
@@ -54,6 +57,11 @@ class URL {
return "/";
}
+ std::string fragment( ) const
+ {
+ return "";
+ }
+
static URL Null;
bool operator!=( const URL &other ) const {
@@ -67,8 +75,6 @@ class URL {
template< typename CharT, typename TraitsT > friend
basic_ostream<CharT, TraitsT>& operator<<( basic_ostream<CharT, TraitsT>&s, const URL& u );
- protected:
- string m_url;
};
template< typename CharT, typename TraitsT >
diff --git a/src/URLNormalizer.hpp b/src/URLNormalizer.hpp
new file mode 100644
index 0000000..a1f6abf
--- /dev/null
+++ b/src/URLNormalizer.hpp
@@ -0,0 +1,13 @@
+#ifndef __URLNORMALIZER_H
+#define __URLNORMALIZER_H
+
+#include "URL.hpp"
+
+class URLNormalizer {
+ public:
+ virtual ~URLNormalizer( ) { };
+
+ virtual bool normalize( URL &url, const URL contextUrl ) = 0;
+};
+
+#endif
diff --git a/tests/url/GNUmakefile b/tests/url/GNUmakefile
index e3913bd..81d7a0e 100644
--- a/tests/url/GNUmakefile
+++ b/tests/url/GNUmakefile
@@ -7,10 +7,12 @@ INCLUDE_DIRS = \
INCLUDE_LDFLAGS =
-INCLUDE_LIBS =
+INCLUDE_LIBS = \
+ $(TOPDIR)/src/libcrawlingwolf.a
TEST_CPP_BINS = \
- test1$(EXE)
+ test1$(EXE) \
+ test2$(EXE)
OBJS =
@@ -19,7 +21,11 @@ OBJS =
local_all:
local_clean:
+ -@rm -f *.db *.db-journal 2>/dev/null
+ -@rm -f *.RES *.DIFF
local_distclean:
local_test:
+ @-./exec_test test1 "output normal URL" http://www.andreasbaumann.cc/index.html
+ @-./exec_test test2 "normalize a relative URL" http://www.andreasbaumann.cc/index.html /software.html
diff --git a/tests/url/test1.MUST b/tests/url/test1.MUST
new file mode 100644
index 0000000..38a3a27
--- /dev/null
+++ b/tests/url/test1.MUST
@@ -0,0 +1,6 @@
+protocol: http
+port: 80
+domain: www.andreasbaumann.cc
+path: /index.html
+fragment:
+
diff --git a/tests/url/test1.cpp b/tests/url/test1.cpp
index 264bef0..64ae223 100644
--- a/tests/url/test1.cpp
+++ b/tests/url/test1.cpp
@@ -17,7 +17,10 @@ int main( int argc, char *argv[] )
URL url( urlstring );
cout << "protocol: " << url.protocol( ) << endl
+ << "port: " << url.port( ) << endl
<< "domain: " << url.domain( ) << endl
+ << "path: " << url.path( ) << endl
+ << "fragment: " << url.fragment( ) << endl
<< endl;
return 0;
diff --git a/tests/url/test2.MUST b/tests/url/test2.MUST
new file mode 100644
index 0000000..4718213
--- /dev/null
+++ b/tests/url/test2.MUST
@@ -0,0 +1,6 @@
+protocol: http
+port: 80
+domain: www.andreasbaumann.cc
+path: /software.html
+fragment:
+
diff --git a/tests/url/test2.cpp b/tests/url/test2.cpp
new file mode 100644
index 0000000..b9a3270
--- /dev/null
+++ b/tests/url/test2.cpp
@@ -0,0 +1,40 @@
+#include "URL.hpp"
+#include "SimpleURLNormalizer.hpp"
+
+#include <iostream>
+#include <string>
+
+using namespace std;
+
+int main( int argc, char *argv[] )
+{
+ if( argc != 3 ) {
+ cerr << "usage: test2 <base url> <partial url>\n" << endl;
+ return 1;
+ }
+
+ char *baseUrlString = argv[1];
+ char *partialUrlString = argv[2];
+
+ URL baseUrl( baseUrlString );
+ URL partialUrl( partialUrlString );
+
+ URL url = baseUrl;
+ URLNormalizer *normalizer = new SimpleURLNormalizer( );
+ if( !normalizer->normalize( url, partialUrl ) ) {
+ delete normalizer;
+ cerr << "Normalization error!\n" << endl;
+ return 1;
+ }
+
+ cout << "protocol: " << url.protocol( ) << endl
+ << "port: " << url.port( ) << endl
+ << "domain: " << url.domain( ) << endl
+ << "path: " << url.path( ) << endl
+ << "fragment: " << url.fragment( ) << endl
+ << endl;
+
+ delete normalizer;
+
+ return 0;
+}