diff options
author | Andreas Baumann <mail@andreasbaumann.cc> | 2014-10-14 15:14:50 +0200 |
---|---|---|
committer | Andreas Baumann <mail@andreasbaumann.cc> | 2014-10-14 15:14:50 +0200 |
commit | 836de6564bd8e4eca422d2d9c691b1949107e102 (patch) | |
tree | f1adbea299f3c94e3e7bc6716cb1fdb700fef3f3 /src | |
parent | 6032e79dc4c51fa4dc82751eab3bd0cdf0eac845 (diff) | |
download | crawler-836de6564bd8e4eca422d2d9c691b1949107e102.tar.gz crawler-836de6564bd8e4eca422d2d9c691b1949107e102.tar.bz2 |
fixed linking of url/fetcher test (only links, doesn't run yet)
Diffstat (limited to 'src')
-rw-r--r-- | src/crawl/crawl.conf | 12 |
1 files changed, 7 insertions, 5 deletions
diff --git a/src/crawl/crawl.conf b/src/crawl/crawl.conf index 8169ae1..25e4003 100644 --- a/src/crawl/crawl.conf +++ b/src/crawl/crawl.conf @@ -13,6 +13,13 @@ logger = { level = "DEBUG" } +modules = { + urlnormalizers = { + "mod_normalizer_simple", + "mod_normalizer_google" + } +} + -- seeds: URLS which are fed in the beginning to the URL frontier seeds = { @@ -20,11 +27,6 @@ seeds = { "http://wolframe.net" } -urlnormalizers = { - "mod_normalizer_simple", - "mod_normalizer_google" -} - filters = { -- allowed protocols to be fetched protocols = { |