diff options
author | Andreas Baumann <abaumann@yahoo.com> | 2012-07-14 17:16:21 +0200 |
---|---|---|
committer | Andreas Baumann <abaumann@yahoo.com> | 2012-07-14 17:16:21 +0200 |
commit | 54cce110784d33d658b5f78286a98bee244a9eeb (patch) | |
tree | 9c4d998343e7dc88323ae8ef6d5a04c6b958df9c /streamhtmlparser/jsparser_fsm.config | |
parent | fcb682cb1955d362390665330fdf476cab7dc10b (diff) | |
download | crawler-54cce110784d33d658b5f78286a98bee244a9eeb.tar.gz crawler-54cce110784d33d658b5f78286a98bee244a9eeb.tar.bz2 |
added streamhtmlparser
Diffstat (limited to 'streamhtmlparser/jsparser_fsm.config')
-rw-r--r-- | streamhtmlparser/jsparser_fsm.config | 157 |
1 files changed, 157 insertions, 0 deletions
diff --git a/streamhtmlparser/jsparser_fsm.config b/streamhtmlparser/jsparser_fsm.config new file mode 100644 index 0000000..830e13a --- /dev/null +++ b/streamhtmlparser/jsparser_fsm.config @@ -0,0 +1,157 @@ +# Copyright 2008 Google Inc. All Rights Reserved. +# Author: falmeida@google.com (Filipe Almeida) + +name = 'jsparser' + +comment = 'Simplified finite state machine for tracking of javascript states' + +condition('q', '\''), +condition('dq', '\"'), +condition('/', '/'), +condition('*', '*'), +condition('[', '['), +condition(']', ']'), +condition('lf', '\n'), +condition('backslash', '\\'), +condition('default', '[:default:]') + +# Main javascript body. +state(name = 'js_text', + external = 'text', + transitions = [ + ['q', 'js_q'], + ['dq', 'js_dq'], + ['/', 'js_slash'], + ['default', 'js_text'] + ]) + +# Single quoted string literal. +state(name = 'js_q', + external = 'q', + transitions = [ + ['backslash', 'js_q_e'], + ['q', 'js_text'], + ['default', 'js_q'] + ]) + +# Javascript escaped character in a single quoted string literal. +state(name = 'js_q_e', + external = 'q', + transitions = [ + ['default', 'js_q'] + ]) + +# Double quoted string literal +state(name = 'js_dq', + external = 'dq', + transitions = [ + ['backslash', 'js_dq_e'], + ['dq', 'js_text'], + ['default', 'js_dq'] + ]) + +# Javascript escaped character in a double quoted string literal. +state(name = 'js_dq_e', + external = 'dq', + transitions = [ + ['default', 'js_dq'] + ]) + +# Possible start of a javascript comment. +state(name = 'js_slash', + external = 'text', + transitions = [ + ['/', 'js_comment_ln'], + ['*', 'js_comment_ml'], + ['default', 'js_text'] + ]) + +# Possible start of a regular expression literal. +# +# The state diagram does not reach this state directly. When js_slash is +# reached, the function enter_state_js_slash() is called, which checks if the +# last token belongs to the set of tokens that can precede a regular +# expression, in which case it changes the state to js_regexp_slash. +# +# For more information please read the comments in +# jsparser.c:enter_state_js_slash(). +state(name = 'js_regexp_slash', + external = 'text', + transitions = [ + ['/', 'js_comment_ln'], + ['*', 'js_comment_ml'], + ['backslash', 'js_regexp_e'], + ['[', 'js_regexp_bracket'], + ['default', 'js_regexp'] + ]) + +# Regular expression literal. +state(name = 'js_regexp', + external = 'regexp', + transitions = [ + ['backslash', 'js_regexp_e'], + ['[', 'js_regexp_bracket'], + ['/', 'js_text'], + ['default', 'js_regexp'] + ]) + +# Regexp bracket expression +state(name = 'js_regexp_bracket', + external = 'regexp', + transitions = [ + ['backslash', 'js_regexp_bracket_e'], + [']', 'js_regexp'], + ['default', 'js_regexp_bracket'] + ]) + +# Backslash escaped regexp bracket expression +state(name = 'js_regexp_bracket_e', + external = 'regexp', + transitions = [ + ['default', 'js_regexp_bracket'] + ]) + +# Escaped regular expression char. +state(name = 'js_regexp_e', + external = 'regexp', + transitions = [ + ['default', 'js_regexp'] + ]) + +# Start of a single line javascript comment (//). +state(name = 'js_comment_ln', + external = 'comment', + transitions = [ + ['lf', 'js_comment_after'], + ['default', 'js_comment_ln'] + ]) + +# Start of a multiline javascript comment (/*). +state(name = 'js_comment_ml', + external = 'comment', + transitions = [ + ['*', 'js_comment_ml_close'], + ['default', 'js_comment_ml'] + ]) + +# Close of a multiline javascript comment (*/). +state(name = 'js_comment_ml_close', + external = 'comment', + transitions = [ + ['/', 'js_comment_after'], + ['default', 'js_comment_ml'] + ]) + +# Ending character of a javascript comment. +# In can either be a '/ in the case of a multiline comment, or a line +# terminator in the case of a single line comment. +# This is needed so we don't insert the '/' or the new line character into the +# ring buffer. +state(name = 'js_comment_after', + external = 'text', + transitions = [ + ['q', 'js_q'], + ['dq', 'js_dq'], + ['/', 'js_slash'], + ['default', 'js_text'] + ]) |