summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndreas Baumann <mail@andreasbaumann.cc>2014-10-15 17:00:12 +0200
committerAndreas Baumann <mail@andreasbaumann.cc>2014-10-15 17:00:12 +0200
commit14e5ae07984d3e898d452ec3ddda10afac01be54 (patch)
tree8e38f40e0052a792b63c4733525a28579db46ab2
parent7c1364816bce2ecfad1ecc118225b1fa6f28faed (diff)
downloadcrawler-14e5ae07984d3e898d452ec3ddda10afac01be54.tar.gz
crawler-14e5ae07984d3e898d452ec3ddda10afac01be54.tar.bz2
first preliminary module searcher
-rw-r--r--include/util/StringUtils.hpp2
-rw-r--r--src/crawl/crawl.conf4
-rwxr-xr-xsrc/crawl/crawl.cpp23
-rw-r--r--src/libutil/StringUtils.cpp12
4 files changed, 32 insertions, 9 deletions
diff --git a/include/util/StringUtils.hpp b/include/util/StringUtils.hpp
index af8e82f..f2615e0 100644
--- a/include/util/StringUtils.hpp
+++ b/include/util/StringUtils.hpp
@@ -8,5 +8,7 @@
UTIL_DLL_VISIBLE bool stringicasecmp( const std::string &s1, const std::string &s2 );
UTIL_DLL_VISIBLE std::vector<std::string> split( const std::string &s, const std::string &delimiter, bool keepEmpty = true );
+UTIL_DLL_VISIBLE bool endswith( const std::string &s, const std::string &endstring );
+UTIL_DLL_VISIBLE bool startswith( const std::string &s, const std::string &startstring );
#endif
diff --git a/src/crawl/crawl.conf b/src/crawl/crawl.conf
index 25e4003..bfcd07b 100644
--- a/src/crawl/crawl.conf
+++ b/src/crawl/crawl.conf
@@ -15,8 +15,8 @@ logger = {
modules = {
urlnormalizers = {
- "mod_normalizer_simple",
- "mod_normalizer_google"
+ "mod_urlnormalizer_simple",
+ "mod_urlnormalizer_googleurl"
}
}
diff --git a/src/crawl/crawl.cpp b/src/crawl/crawl.cpp
index 88f6aa3..147b2cc 100755
--- a/src/crawl/crawl.cpp
+++ b/src/crawl/crawl.cpp
@@ -14,6 +14,7 @@
#include "LuaVM.hpp"
+#include "StringUtils.hpp"
#include "FileUtils.hpp"
#include <set>
@@ -97,15 +98,23 @@ int main( int /* argc */, char *argv[] )
<< ( modulesSearchRecursive ? "(recursive)" : "" );
vector<string> entries = directory_entries( modulePath, true, modulesSearchRecursive );
- vector<string>::const_iterator it, end = entries.end( );
- for( it = entries.begin( ); it != end; it++ ) {
- cout << (*it) << endl;
- }
vector<string> modules = luaVm.getStringArray( "modules.urlnormalizers" );
- end = modules.end( );
- for( it = modules.begin( ); it != end; it++ ) {
- cout << (*it) << endl;
+ vector<string>::const_iterator it2, end2 = modules.end( );
+ for( it2 = modules.begin( ); it2 != end2; it2++ ) {
+#ifndef _WIN32
+ string module = (*it2) + ".so";
+#else
+ string module = (*it2) + ".dll";
+#endif
+ cout << "Searching for module '" << module << "'" << endl;
+
+ vector<string>::const_iterator it, end = entries.end( );
+ for( it = entries.begin( ); it != end; it++ ) {
+ if( endswith( (*it), module ) ) {
+ cout << " Found inf file '" << (*it) << "'" << endl;
+ }
+ }
}
vector<string> normalizerModules;
diff --git a/src/libutil/StringUtils.cpp b/src/libutil/StringUtils.cpp
index 13be8d4..640ce20 100644
--- a/src/libutil/StringUtils.cpp
+++ b/src/libutil/StringUtils.cpp
@@ -45,3 +45,15 @@ std::vector<std::string> split( const string &s, const string &delimiter, bool k
return result;
}
+
+bool endswith( const string &s, const string &endstring )
+{
+ unsigned int pos = s.rfind( endstring );
+ return pos != string::npos && pos + endstring.length( ) == s.length( );
+}
+
+bool startswith( const string &s, const string &startstring )
+{
+ unsigned int pos = s.find( startstring );
+ return pos != string::npos;
+}