summaryrefslogtreecommitdiff
path: root/src/modules/processor/robotstxt/RobotsTxtProcessor.hpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/modules/processor/robotstxt/RobotsTxtProcessor.hpp')
-rw-r--r--src/modules/processor/robotstxt/RobotsTxtProcessor.hpp14
1 files changed, 14 insertions, 0 deletions
diff --git a/src/modules/processor/robotstxt/RobotsTxtProcessor.hpp b/src/modules/processor/robotstxt/RobotsTxtProcessor.hpp
index a274f2b..532c741 100644
--- a/src/modules/processor/robotstxt/RobotsTxtProcessor.hpp
+++ b/src/modules/processor/robotstxt/RobotsTxtProcessor.hpp
@@ -9,6 +9,20 @@ class RobotsTxtProcessor : public Processor {
RobotsTxtProcessor( );
virtual ~RobotsTxtProcessor( );
virtual void process( RewindInputStream *s );
+
+ typedef enum {
+ UserAgentKeyword,
+ DisallowKeyword,
+ CrawlDelayKeyword,
+ SitemapKeyword,
+ UnknownKeyword,
+ NoKeyword
+ } KeywordType;
+
+ private:
+ KeywordType getKeyword( string::const_iterator &it, string::const_iterator end );
+ void skipSpaces( string::const_iterator &it, string::const_iterator end );
+ void handleLine( const std::string &s );
};
DECLARE_MODULE( Processor )