summaryrefslogtreecommitdiff
path: root/streamhtmlparser/tools/generate_fsm.py
diff options
context:
space:
mode:
Diffstat (limited to 'streamhtmlparser/tools/generate_fsm.py')
-rwxr-xr-xstreamhtmlparser/tools/generate_fsm.py331
1 files changed, 331 insertions, 0 deletions
diff --git a/streamhtmlparser/tools/generate_fsm.py b/streamhtmlparser/tools/generate_fsm.py
new file mode 100755
index 0000000..fac54a0
--- /dev/null
+++ b/streamhtmlparser/tools/generate_fsm.py
@@ -0,0 +1,331 @@
+#!/usr/bin/env python
+#
+# Copyright (c) 2008, Google Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# ---
+# Author: Filipe Almeida
+#
+# Generate a C include file from a finite state machine definition.
+#
+# Right now the form is the one expected by htmlparser.cc so this file is pretty
+# tightly coupled with htmlparser.cc.
+
+__author__ = 'falmeida@google.com (Filipe Almeida)'
+
+import sys
+
+from fsm_config import FSMConfig
+
+
+class FSMGenerateAbstract(object):
+
+ def __init__(self, config):
+ self._config = config
+
+ def Generate(self):
+ """Returns the generated FSM description for the specified language.
+
+ Raises a TypeError, because abstract methods can not be called.
+
+ Raises:
+ TypeError
+ """
+ raise TypeError('Abstract method %s.%s called' % (self._class.__name__,
+ self._function))
+
+
+class FSMGenerateC(FSMGenerateAbstract):
+ """Generate the C definition from a statemachien configuration object."""
+
+ TABSTOP_ = 2
+
+ def _Prefix(self):
+ """Return a c declaration prefix."""
+
+ return self._config.name.lower() + '_'
+
+ def _StateInternalC(self, st):
+ """Return the internal name of the state."""
+
+ return '%sSTATE_INT_%s' % (self._Prefix().upper(), st.upper())
+
+ def _StateExternalC(self, st):
+ """Return the external name of the state."""
+
+ return '%sSTATE_%s' % (self._Prefix().upper(), st.upper())
+
+ def _MakeTuple(self, data):
+ """Converts data to a string representation of a C tuple."""
+
+ return '{ %s }' % ', '.join(data)
+
+ def _CreateHeader(self):
+ """Print the include file header."""
+
+ out = []
+
+ if self._config.comment:
+ out.append('/* ' + self._config.comment)
+ else:
+ out.append('/* State machine definition for ' + self._config.name)
+ out.append(' * Auto generated by generate_fsm.py. Please do not edit.')
+ out.append(' */')
+
+ return '\n'.join(out)
+
+ def _ListToIndentedString(self, list):
+ indented_list = [' ' + e for e in list]
+ return ',\n'.join(indented_list)
+
+ def _CreateEnum(self, name, data):
+ """Print a c enum definition."""
+
+ return 'enum %s {\n%s\n};\n' % (name,
+ self._ListToIndentedString(data))
+
+ def _CreateStructList(self, name, type, data):
+ """Print a c flat list.
+
+ Generic function to print list in c in the form of a struct.
+
+ Args:
+ name: name of the structure.
+ type: type of the struct.
+ data: contents of the struct as a list of elements
+
+ Returns:
+ String with the generated list.
+ """
+
+ return "static const %s %s[] = {\n%s\n};\n" % (
+ type,
+ name,
+ self._ListToIndentedString(data))
+
+ def _CreateStatesEnum(self):
+ """Print the internal states enum.
+
+ Prints an enum containing all the valid states.
+
+ Returns:
+ String containing a C enumeration of the states.
+ """
+ list = [] # output list
+
+ for state in self._config.states:
+ list.append(self._StateInternalC(state))
+ return self._CreateEnum(self._Prefix() + 'state_internal_enum', list)
+
+ def _CreateStatesExternal(self):
+ """Print a struct with a mapping from internal to external states."""
+ list = [] # output list
+
+ for state_name in self._config.states:
+ list.append(self._StateExternalC(
+ self._config.states[state_name].external_name))
+
+ return self._CreateStructList(self._Prefix() + 'states_external',
+ 'int',
+ list)
+
+ def _CreateStatesInternalNames(self):
+ """Return a struct mapping internal states to a strings."""
+ out = [] # output list
+
+ for state_name in self._config.states:
+ out.append('"' + state_name + '"')
+
+ return self._CreateStructList(self._Prefix() + 'states_internal_names',
+ 'char *',
+ out)
+
+ def _CreateNumStates(self):
+ """Print a Macro defining the number of states."""
+
+ return "#define %s_NUM_STATES %s" % (self._config.name.upper(),
+ str(len(self._config.states) + 1))
+
+ def _ExpandBracketExpression(self, expression):
+ """Expand ranges in a regexp bracket expression.
+
+ Returns a string with the ranges in a bracket expression expanded.
+
+ The bracket expression is similar to grep(1) or regular expression bracket
+ expressions but it does not support the negation (^) modifier or named
+ character classes like [:alpha:] or [:alnum:].
+
+ The especial character class [:default:] will expand to all elements in the
+ ascii range.
+
+ For example, the expression 'a-c13A-D' will expand to 'abc13ABCD'.
+
+ Args:
+ expression: A regexp bracket expression. Ie: 'A-Z0-9'.
+
+ Returns:
+ A string with the ranges in the bracket expression expanded.
+ """
+
+ def ExpandRange(start, end):
+ """Return a sequence of characters between start and end.
+
+ Args:
+ start: first character of the sequence.
+ end: last character of the sequence.
+
+ Returns:
+ string containing the sequence of characters between start and end.
+ """
+ return [chr(c) for c in range(ord(start), ord(end) + 1)]
+
+ def ListNext(input_list):
+ """Pop the first element of a list.
+
+ Args:
+ input_list: python list object.
+
+ Returns:
+ First element of the list or None if the list is empty.
+ """
+ if input_list:
+ return input_list.pop(0)
+ else:
+ return None
+
+ out = [] # List containing the output
+
+ # Special case for the character class [:default:]
+ if expression == '[:default:]':
+ out = [chr(c) for c in range(0, 255)]
+ return ''.join(out)
+
+ chars = [c for c in expression] # list o characters in the expression.
+
+ current = ListNext(chars)
+ while current:
+ next = ListNext(chars)
+ if next == '-':
+ next = ListNext(chars)
+ if next:
+ out.extend(ExpandRange(current, next))
+ else:
+ out.append(current)
+ out.append('-')
+ current = ListNext(chars)
+ else:
+ out.append(current)
+ current = next
+
+ return ''.join(out)
+
+ def _CreateTransitionTable(self):
+ """Print the state transition list.
+
+ Returns a set of C structures that define the transition table for the state
+ machine. This structure is a list of lists of ints (int **). The outer list
+ indexes the source state and the inner list contains the destination state
+ for each of the possible input characters:
+
+ const int * const* transitions[source][input] == destination.
+
+ The conditions are mapped from the conditions variable.
+
+ Returns:
+ String containing the generated transition table in a C struct.
+ """
+ out = [] # output list
+ default_state = 'STATEMACHINE_ERROR'
+ state_table = {}
+
+ for state in self._config.states:
+ state_table[state] = [default_state for col in xrange(255)]
+
+ # We process the transition in reverse order while updating the table.
+ for i_transition in range(len(self._config.transitions) - 1, -1, -1):
+ transition = self._config.transitions[i_transition]
+ (condition_name, src, dst) = (transition.condition,
+ transition.source,
+ transition.destination)
+ condition = self._config.conditions[condition_name]
+ char_list = self._ExpandBracketExpression(condition)
+
+ for c in char_list:
+ state_table[src][ord(c)] = self._StateInternalC(dst)
+
+ # Create the inner lists which map input characters to destination states.
+ for state in self._config.states:
+ transition_row = []
+ for c in xrange(0, 255):
+ transition_row.append(' /* %06s */ %s' % (repr(chr(c)),
+ state_table[state][c]))
+
+ out.append(self._CreateStructList('%stransition_row_%s' %
+ (self._Prefix(),
+ state),
+ 'int',
+ transition_row))
+ out.append('\n')
+
+ # Create the outer list, which map source states to input characters.
+ out.append('static const %s %s[] = {\n' % ('int *', self._Prefix() +
+ 'state_transitions'))
+
+ row_list = [' %stransition_row_%s' %
+ (self._Prefix(), row) for row in self._config.states]
+ out.append(',\n'.join(row_list))
+ out.append('\n};\n')
+
+ return ''.join(out)
+
+ def Generate(self):
+ """Returns the generated the C include statements for the statemachine."""
+
+ print '\n'.join((self._CreateHeader(),
+ self._CreateNumStates(),
+ self._CreateStatesEnum(),
+ self._CreateStatesExternal(),
+ self._CreateStatesInternalNames(),
+ self._CreateTransitionTable()))
+
+
+def main():
+ if len(sys.argv) != 2:
+ print "usage: generate_fsm.py config_file"
+ sys.exit(1)
+
+ config = FSMConfig()
+ config.Load(sys.argv[1])
+
+ gen = FSMGenerateC(config)
+ gen.Generate()
+
+
+if __name__ == "__main__":
+ main()