diff options
author | Andreas Baumann <abaumann@yahoo.com> | 2012-08-04 15:42:51 +0200 |
---|---|---|
committer | Andreas Baumann <abaumann@yahoo.com> | 2012-08-04 15:42:51 +0200 |
commit | 39d78b3dd658db518740c561f0b4e80a43987a38 (patch) | |
tree | 254f224c2e109de8b4cbfac821c895ec891de1f3 | |
parent | 0c92e873518ce6a92caeba0be81a0d81d16c6ed8 (diff) | |
download | crawler-39d78b3dd658db518740c561f0b4e80a43987a38.tar.gz crawler-39d78b3dd658db518740c561f0b4e80a43987a38.tar.bz2 |
rearangment for url testing
-rw-r--r-- | README.3rdPARTY | 2 | ||||
-rw-r--r-- | docs/LINKS | 6 | ||||
-rw-r--r-- | makefiles/gmake/help.mk | 6 | ||||
-rw-r--r-- | tests/GNUmakefile | 2 | ||||
-rw-r--r-- | tests/googleurl/GNUmakefile | 24 | ||||
-rw-r--r-- | tests/googleurl/test1.cpp | 4 | ||||
-rw-r--r-- | tests/simpleurl/GNUmakefile (renamed from tests/url/GNUmakefile) | 0 | ||||
-rwxr-xr-x | tests/simpleurl/exec_test (renamed from tests/url/exec_test) | 0 | ||||
-rw-r--r-- | tests/simpleurl/test1.MUST (renamed from tests/url/test1.MUST) | 0 | ||||
-rw-r--r-- | tests/simpleurl/test1.cpp (renamed from tests/url/test1.cpp) | 0 | ||||
-rw-r--r-- | tests/simpleurl/test100.MUST (renamed from tests/url/test100.MUST) | 0 | ||||
-rw-r--r-- | tests/simpleurl/test101.MUST (renamed from tests/url/test101.MUST) | 0 | ||||
-rw-r--r-- | tests/simpleurl/test102.MUST (renamed from tests/url/test102.MUST) | 0 | ||||
-rw-r--r-- | tests/simpleurl/test103.MUST (renamed from tests/url/test103.MUST) | 0 | ||||
-rw-r--r-- | tests/simpleurl/test104.MUST (renamed from tests/url/test104.MUST) | 0 | ||||
-rw-r--r-- | tests/simpleurl/test105.MUST (renamed from tests/url/test105.MUST) | 0 | ||||
-rw-r--r-- | tests/simpleurl/test2.MUST (renamed from tests/url/test2.MUST) | 0 | ||||
-rw-r--r-- | tests/simpleurl/test2.cpp (renamed from tests/url/test2.cpp) | 0 | ||||
-rw-r--r-- | tests/simpleurl/test3.MUST (renamed from tests/url/test3.MUST) | 0 | ||||
-rw-r--r-- | tests/simpleurl/test4.MUST (renamed from tests/url/test4.MUST) | 0 | ||||
-rw-r--r-- | tests/simpleurl/test5.MUST (renamed from tests/url/test5.MUST) | 0 | ||||
-rw-r--r-- | tests/simpleurl/test6.MUST (renamed from tests/url/test6.MUST) | 0 |
22 files changed, 42 insertions, 2 deletions
diff --git a/README.3rdPARTY b/README.3rdPARTY index 4f89092..2e9ac56 100644 --- a/README.3rdPARTY +++ b/README.3rdPARTY @@ -39,7 +39,7 @@ http://code.google.com/p/google-url/ - flattened hierarchy - renamed *.cc to *.cpp - removed a lot of compilation problems: - - illegal external termplate instationations (at least in C98++ they + - illegal external template instantiations (at least in C98++ they were illegal) - superflous commas at end of enumerations - parameters shadowing member or member functions of classes @@ -4,6 +4,8 @@ http://mercator.comm.nsdlib.org/ authors working for Microsoft now :-) +Some Java roboter frameworks: + heritrix crawler4j @@ -12,6 +14,10 @@ mainly dead or unusable: jspider websphinx +A C++ web robot + +http://code.google.com/p/whalebot/ + Javascript support phantomjs http://code.google.com/p/phantomjs/ diff --git a/makefiles/gmake/help.mk b/makefiles/gmake/help.mk index 2904c39..f9b64b4 100644 --- a/makefiles/gmake/help.mk +++ b/makefiles/gmake/help.mk @@ -36,6 +36,11 @@ WITH_LOCAL_STREAMHTMLPARSER=1 use Google stream HTML 4 parser WITH_LIBXML2=1 build the libxml2 parser +URL parsing and normalization: + +WITH_LOCAL_GOOGLEURL=1 use Google URL for normalization/parsing +WITH_ICU=1 enable ICU support for URL parsing in Google URL + scripting support: WITH_LUA=1 use Lua for configuration and scripting @@ -47,5 +52,6 @@ ENABLE_NLS=0 Don't build gettext NLS support (default is on) Example: make WITH_SSL=1 WITH_SQLITE3=1 WITH_PGSQL=1 \ WITH_LOCAL_LIBFETCH=1 WITH_LIBXML2=1 \ + WITH_LOCAL_GOOGLEURL=1 WITH_ICU=1 \ WITH_LUA=1 diff --git a/tests/GNUmakefile b/tests/GNUmakefile index f582bbb..8931f49 100644 --- a/tests/GNUmakefile +++ b/tests/GNUmakefile @@ -1,6 +1,6 @@ TOPDIR = .. -SUBDIRS = url streamhtmlparser libfetch curl psql sqlite +SUBDIRS = simpleurl googleurl streamhtmlparser libfetch curl psql sqlite -include $(TOPDIR)/makefiles/gmake/sub.mk diff --git a/tests/googleurl/GNUmakefile b/tests/googleurl/GNUmakefile new file mode 100644 index 0000000..6e573aa --- /dev/null +++ b/tests/googleurl/GNUmakefile @@ -0,0 +1,24 @@ +TOPDIR = ../.. + +SUBDIRS = + +INCLUDE_DIRS = \ + +INCLUDE_LDFLAGS = + +INCLUDE_LIBS = \ + +TEST_CPP_BINS = \ + test1$(EXE) + +OBJS = + +-include $(TOPDIR)/makefiles/gmake/sub.mk + +local_all: + +local_clean: + +local_distclean: + +local_test: diff --git a/tests/googleurl/test1.cpp b/tests/googleurl/test1.cpp new file mode 100644 index 0000000..703f81f --- /dev/null +++ b/tests/googleurl/test1.cpp @@ -0,0 +1,4 @@ +int main( void ) +{ + return 0; +} diff --git a/tests/url/GNUmakefile b/tests/simpleurl/GNUmakefile index f0a28b0..f0a28b0 100644 --- a/tests/url/GNUmakefile +++ b/tests/simpleurl/GNUmakefile diff --git a/tests/url/exec_test b/tests/simpleurl/exec_test index 92b656f..92b656f 100755 --- a/tests/url/exec_test +++ b/tests/simpleurl/exec_test diff --git a/tests/url/test1.MUST b/tests/simpleurl/test1.MUST index 1b6af48..1b6af48 100644 --- a/tests/url/test1.MUST +++ b/tests/simpleurl/test1.MUST diff --git a/tests/url/test1.cpp b/tests/simpleurl/test1.cpp index 23c7d74..23c7d74 100644 --- a/tests/url/test1.cpp +++ b/tests/simpleurl/test1.cpp diff --git a/tests/url/test100.MUST b/tests/simpleurl/test100.MUST index 40fb968..40fb968 100644 --- a/tests/url/test100.MUST +++ b/tests/simpleurl/test100.MUST diff --git a/tests/url/test101.MUST b/tests/simpleurl/test101.MUST index b4c5eca..b4c5eca 100644 --- a/tests/url/test101.MUST +++ b/tests/simpleurl/test101.MUST diff --git a/tests/url/test102.MUST b/tests/simpleurl/test102.MUST index 7482d26..7482d26 100644 --- a/tests/url/test102.MUST +++ b/tests/simpleurl/test102.MUST diff --git a/tests/url/test103.MUST b/tests/simpleurl/test103.MUST index 085a06c..085a06c 100644 --- a/tests/url/test103.MUST +++ b/tests/simpleurl/test103.MUST diff --git a/tests/url/test104.MUST b/tests/simpleurl/test104.MUST index 7482d26..7482d26 100644 --- a/tests/url/test104.MUST +++ b/tests/simpleurl/test104.MUST diff --git a/tests/url/test105.MUST b/tests/simpleurl/test105.MUST index 085a06c..085a06c 100644 --- a/tests/url/test105.MUST +++ b/tests/simpleurl/test105.MUST diff --git a/tests/url/test2.MUST b/tests/simpleurl/test2.MUST index 92158a6..92158a6 100644 --- a/tests/url/test2.MUST +++ b/tests/simpleurl/test2.MUST diff --git a/tests/url/test2.cpp b/tests/simpleurl/test2.cpp index 4b6aa0d..4b6aa0d 100644 --- a/tests/url/test2.cpp +++ b/tests/simpleurl/test2.cpp diff --git a/tests/url/test3.MUST b/tests/simpleurl/test3.MUST index 92158a6..92158a6 100644 --- a/tests/url/test3.MUST +++ b/tests/simpleurl/test3.MUST diff --git a/tests/url/test4.MUST b/tests/simpleurl/test4.MUST index 0649e10..0649e10 100644 --- a/tests/url/test4.MUST +++ b/tests/simpleurl/test4.MUST diff --git a/tests/url/test5.MUST b/tests/simpleurl/test5.MUST index 0649e10..0649e10 100644 --- a/tests/url/test5.MUST +++ b/tests/simpleurl/test5.MUST diff --git a/tests/url/test6.MUST b/tests/simpleurl/test6.MUST index de9b556..de9b556 100644 --- a/tests/url/test6.MUST +++ b/tests/simpleurl/test6.MUST |