summaryrefslogtreecommitdiff
path: root/src/crawl/crawl.conf
diff options
context:
space:
mode:
Diffstat (limited to 'src/crawl/crawl.conf')
-rw-r--r--src/crawl/crawl.conf11
1 files changed, 8 insertions, 3 deletions
diff --git a/src/crawl/crawl.conf b/src/crawl/crawl.conf
index fd9776f..154d90a 100644
--- a/src/crawl/crawl.conf
+++ b/src/crawl/crawl.conf
@@ -2,7 +2,7 @@
crawler = {
-- stop after N documents
- stop_after_N_operations = 10,
+ stop_after_N_operations = 0,
module_path = "modules",
@@ -82,7 +82,12 @@ filters = {
function init( )
io.write( "Init..\n" )
+ -- normalizer = urlnormalizers.create( "google_urlnormalizer" );
normalizer = GoogleURLNormalizer:new( )
+ -- normalizer2 = urlnormalizers.create( "simple_urlnormalizer" );
+ normalizer2 = SimpleURLNormalizer:new( )
+ base = tolua.cast( normalizer, "URLNormalizer" )
+ io.write( "type: " .. tolua.type( base ) .. "\n" )
end
function destroy( )
@@ -92,8 +97,8 @@ end
function crawl( )
io.write( "Crawling..\n" )
- local baseUrl = normalizer:parseUrl( "http://www.base.com" )
+ local baseUrl = base:parseUrl( "http://www.base.com" )
io.write( "base URL is: " .. baseUrl:str( ) .. "\n" )
- local url = normalizer:normalize( baseUrl, "/relativedir/relativefile.html" )
+ local url = base:normalize( baseUrl, "/relativedir/relativefile.html" )
io.write( "URL is: " .. url:str( ) .. "\n" )
end