summaryrefslogtreecommitdiff
path: root/src/crawl/crawl.conf
diff options
context:
space:
mode:
authorAndreas Baumann <mail@andreasbaumann.cc>2014-10-10 11:26:05 +0200
committerAndreas Baumann <mail@andreasbaumann.cc>2014-10-10 11:26:05 +0200
commitc5351f58bcf494a56ecfd17fe5e68eb3b17dac7d (patch)
treebca845c55a2a7f407a856950a6293f0759d2e391 /src/crawl/crawl.conf
parent5382e843f651f834c1df31bd494ed0638be07960 (diff)
downloadcrawler-c5351f58bcf494a56ecfd17fe5e68eb3b17dac7d.tar.gz
crawler-c5351f58bcf494a56ecfd17fe5e68eb3b17dac7d.tar.bz2
added execution function to LuaVm
reorganized main lua.conf and calling several functions
Diffstat (limited to 'src/crawl/crawl.conf')
-rw-r--r--src/crawl/crawl.conf33
1 files changed, 24 insertions, 9 deletions
diff --git a/src/crawl/crawl.conf b/src/crawl/crawl.conf
index 7c3fb80..816f23f 100644
--- a/src/crawl/crawl.conf
+++ b/src/crawl/crawl.conf
@@ -1,17 +1,12 @@
-local normalizer = GoogleURLNormalizer:new( )
-local baseUrl = normalizer:parseUrl( "http://www.base.com" )
-io.write( "base URL is: " .. baseUrl:str( ) .. "\n" )
-local url = normalizer:normalize( baseUrl, "/relativedir/relativefile.html" )
-io.write( "URL is: " .. url:str( ) .. "\n" )
-normalizer:delete( )
-
-- global setting
crawler = {
-- stop after N documents
- stop_after_N_operations = 10
+ stop_after_N_operations = 10,
+ module_path = modules,
+ modules_search_recursive = true
}
logger = {
@@ -25,6 +20,11 @@ seeds = {
"http://wolframe.net"
}
+urlnormalizers = {
+ "mod_normalizer_simple",
+ "mod_normalizer_google"
+}
+
filters = {
-- allowed protocols to be fetched
protocols = {
@@ -39,6 +39,21 @@ filters = {
}
}
+
+function init( )
+ io.write( "Init..\n" )
+ normalizer = GoogleURLNormalizer:new( )
+end
+
+function destroy( )
+ io.write( "Destroy..\n" )
+ normalizer:delete( )
+end
+
function crawl( )
- io.write( "Crawling." )
+ io.write( "Crawling..\n" )
+ local baseUrl = normalizer:parseUrl( "http://www.base.com" )
+ io.write( "base URL is: " .. baseUrl:str( ) .. "\n" )
+ local url = normalizer:normalize( baseUrl, "/relativedir/relativefile.html" )
+ io.write( "URL is: " .. url:str( ) .. "\n" )
end