blob: 60ec5cf35590977eaee72cb50a8d289d3ab2cddd (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
|
TOPDIR = ../..
SUBDIRS =
#INCLUDE_CXXFLAGS = \
# -DUSE_MODULELOADER
INCLUDE_DIRS = \
-I$(TOPDIR)/src \
-I$(TOPDIR)/src/modules/urlnormalizer/simpleurl \
-I$(TOPDIR)/src/modules/urlnormalizer/googleurl \
-I$(TOPDIR)/include/module \
-I$(TOPDIR)/include/util \
-I$(TOPDIR)/include/crawler
ifeq ($(WITH_LUA),1)
INCLUDE_DIRS += \
-I$(TOPDIR)/include/luaglue \
$(TOLUA_INCLUDES)
endif
ifeq ($(WITH_LUA),1)
INCLUDE_CXXFLAGS += \
-DWITH_LUA
endif
INCLUDE_LDFLAGS = \
-L$(TOPDIR)/src/libcrawler \
-L$(TOPDIR)/src/liblogger \
-L$(TOPDIR)/googleurl
ifeq ($(WITH_LUA),1)
INCLUDE_LDFLAGS += \
-L$(TOPDIR)/src/libluaglue \
-L$(TOPDIR)/src/libutil \
$(TOLUA_LDFLAGS)
endif
INCLUDE_LIBS = \
-lcurl \
$(TOPDIR)/src/modules/urlnormalizer/simpleurl/libsimpleurlnormalizer.a \
$(TOPDIR)/src/modules/urlnormalizer/googleurl/libgoogleurlnormalizer.a \
-lgoogleurl \
-lcrawler \
-llogger \
-licui18n -licuuc
ifeq ($(WITH_LUA),1)
INCLUDE_LIBS += \
-lluaglue \
-lutil \
-llua \
$(TOLUA_LIBS)
endif
TEST_CPP_BINS = \
test1$(EXE)
OBJS =
-include $(TOPDIR)/makefiles/gmake/sub.mk
local_all:
local_clean:
-@rm -f *.db *.db-journal 2>/dev/null
-@rm -f *.RES *.DIFF
local_distclean:
local_test:
@-for METHOD in simple_urlnormalizer google_urlnormalizer; do \
echo "Using URL normalizer '$$METHOD'.." ; \
./exec_test test1 test1 "parse illegal protocol" $$METHOD parse www.andreasbaumann.cc ; \
./exec_test test1 test2 "parse normal start URL without slash" $$METHOD parse http://www.andreasbaumann.cc ; \
./exec_test test1 test3 "parse normal start URL with slash" $$METHOD parse http://www.andreasbaumann.cc/ ; \
./exec_test test1 test4 "parse normal URL" $$METHOD parse http://www.andreasbaumann.cc/index.html ; \
./exec_test test1 test5 "parse normal URL with default port" $$METHOD parse http://www.andreasbaumann.cc:80/index.html ; \
./exec_test test1 test6 "parse normal URL with non-standard port" $$METHOD parse http://www.andreasbaumann.cc:8080/index.html ; \
./exec_test test1 test100 "normalize a relative URL" $$METHOD normalize http://www.andreasbaumann.cc/index.html /software.html ; \
./exec_test test1 test101 "absolute URL in HTML content" $$METHOD normalize http://www.andreasbaumann.cc/index.html http://www.yahoo.com/page.html ; \
./exec_test test1 test102 "path normalization, relative path" $$METHOD normalize http://www.andreasbaumann.cc/adir/index.html bdir/page.html ; \
./exec_test test1 test103 "path normalization, absolute path" $$METHOD normalize http://www.andreasbaumann.cc/adir/index.html /bdir/page.html ; \
./exec_test test1 test104 "path normalization, current dir" $$METHOD normalize http://www.andreasbaumann.cc/adir/index.html ./bdir/page.html ; \
./exec_test test1 test105 "path normalization, previous dir" $$METHOD normalize http://www.andreasbaumann.cc/adir/index.html ../bdir/page.html ; \
done
|