diff options
author | Andreas Baumann <mail@andreasbaumann.cc> | 2014-10-16 13:29:01 +0200 |
---|---|---|
committer | Andreas Baumann <mail@andreasbaumann.cc> | 2014-10-16 13:29:01 +0200 |
commit | 971d5d22e7117acb95c7903dd5b911b96fc97dcf (patch) | |
tree | 079fb0e064e1c4a35dbd27821e993b573d268ba2 /src/crawl/crawl.conf | |
parent | ff403df10813717698dc47e0b22f19d62c007cff (diff) | |
download | crawler-971d5d22e7117acb95c7903dd5b911b96fc97dcf.tar.gz crawler-971d5d22e7117acb95c7903dd5b911b96fc97dcf.tar.bz2 |
creating all module constructors now from Lua configuration
Diffstat (limited to 'src/crawl/crawl.conf')
-rw-r--r-- | src/crawl/crawl.conf | 38 |
1 files changed, 38 insertions, 0 deletions
diff --git a/src/crawl/crawl.conf b/src/crawl/crawl.conf index bfcd07b..fd9776f 100644 --- a/src/crawl/crawl.conf +++ b/src/crawl/crawl.conf @@ -17,7 +17,45 @@ modules = { urlnormalizers = { "mod_urlnormalizer_simple", "mod_urlnormalizer_googleurl" + }, + + urlfilters = { + "mod_urlfilter_host", + "mod_urlfilter_protocol" + }, + + urlchainfilters = { + "mod_urlfilter_chain" + }, + + urlfrontiers = { + "mod_frontier_memory" + }, + + fetchers = { + "mod_fetcher_libfetch", + "mod_fetcher_libcurl", + "mod_fetcher_winhttp" + }, + + urlseens = { + "mod_urlseen_memory" + }, + + dedupers = { + "mod_deduper_null" + }, + + processors = { + "mod_processor_htmllinkextract", + "mod_processor_robotstxt", + "mod_processor_sitemap" + }, + + typedetects = { + "mod_typedetect_libmagic" } + } -- seeds: URLS which are fed in the beginning to the URL frontier |