#!/usr/bin/env python # # Copyright (c) 2008, Google Inc. # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above # copyright notice, this list of conditions and the following disclaimer # in the documentation and/or other materials provided with the # distribution. # * Neither the name of Google Inc. nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # --- # Author: Filipe Almeida # # Generate a C include file from a finite state machine definition. # # Right now the form is the one expected by htmlparser.cc so this file is pretty # tightly coupled with htmlparser.cc. __author__ = 'falmeida@google.com (Filipe Almeida)' import sys from fsm_config import FSMConfig class FSMGenerateAbstract(object): def __init__(self, config): self._config = config def Generate(self): """Returns the generated FSM description for the specified language. Raises a TypeError, because abstract methods can not be called. Raises: TypeError """ raise TypeError('Abstract method %s.%s called' % (self._class.__name__, self._function)) class FSMGenerateC(FSMGenerateAbstract): """Generate the C definition from a statemachien configuration object.""" TABSTOP_ = 2 def _Prefix(self): """Return a c declaration prefix.""" return self._config.name.lower() + '_' def _StateInternalC(self, st): """Return the internal name of the state.""" return '%sSTATE_INT_%s' % (self._Prefix().upper(), st.upper()) def _StateExternalC(self, st): """Return the external name of the state.""" return '%sSTATE_%s' % (self._Prefix().upper(), st.upper()) def _MakeTuple(self, data): """Converts data to a string representation of a C tuple.""" return '{ %s }' % ', '.join(data) def _CreateHeader(self): """Print the include file header.""" out = [] if self._config.comment: out.append('/* ' + self._config.comment) else: out.append('/* State machine definition for ' + self._config.name) out.append(' * Auto generated by generate_fsm.py. Please do not edit.') out.append(' */') return '\n'.join(out) def _ListToIndentedString(self, list): indented_list = [' ' + e for e in list] return ',\n'.join(indented_list) def _CreateEnum(self, name, data): """Print a c enum definition.""" return 'enum %s {\n%s\n};\n' % (name, self._ListToIndentedString(data)) def _CreateStructList(self, name, type, data): """Print a c flat list. Generic function to print list in c in the form of a struct. Args: name: name of the structure. type: type of the struct. data: contents of the struct as a list of elements Returns: String with the generated list. """ return "static const %s %s[] = {\n%s\n};\n" % ( type, name, self._ListToIndentedString(data)) def _CreateStatesEnum(self): """Print the internal states enum. Prints an enum containing all the valid states. Returns: String containing a C enumeration of the states. """ list = [] # output list for state in self._config.states: list.append(self._StateInternalC(state)) return self._CreateEnum(self._Prefix() + 'state_internal_enum', list) def _CreateStatesExternal(self): """Print a struct with a mapping from internal to external states.""" list = [] # output list for state_name in self._config.states: list.append(self._StateExternalC( self._config.states[state_name].external_name)) return self._CreateStructList(self._Prefix() + 'states_external', 'int', list) def _CreateStatesInternalNames(self): """Return a struct mapping internal states to a strings.""" out = [] # output list for state_name in self._config.states: out.append('"' + state_name + '"') return self._CreateStructList(self._Prefix() + 'states_internal_names', 'char *', out) def _CreateNumStates(self): """Print a Macro defining the number of states.""" return "#define %s_NUM_STATES %s" % (self._config.name.upper(), str(len(self._config.states) + 1)) def _ExpandBracketExpression(self, expression): """Expand ranges in a regexp bracket expression. Returns a string with the ranges in a bracket expression expanded. The bracket expression is similar to grep(1) or regular expression bracket expressions but it does not support the negation (^) modifier or named character classes like [:alpha:] or [:alnum:]. The especial character class [:default:] will expand to all elements in the ascii range. For example, the expression 'a-c13A-D' will expand to 'abc13ABCD'. Args: expression: A regexp bracket expression. Ie: 'A-Z0-9'. Returns: A string with the ranges in the bracket expression expanded. """ def ExpandRange(start, end): """Return a sequence of characters between start and end. Args: start: first character of the sequence. end: last character of the sequence. Returns: string containing the sequence of characters between start and end. """ return [chr(c) for c in range(ord(start), ord(end) + 1)] def ListNext(input_list): """Pop the first element of a list. Args: input_list: python list object. Returns: First element of the list or None if the list is empty. """ if input_list: return input_list.pop(0) else: return None out = [] # List containing the output # Special case for the character class [:default:] if expression == '[:default:]': out = [chr(c) for c in range(0, 255)] return ''.join(out) chars = [c for c in expression] # list o characters in the expression. current = ListNext(chars) while current: next = ListNext(chars) if next == '-': next = ListNext(chars) if next: out.extend(ExpandRange(current, next)) else: out.append(current) out.append('-') current = ListNext(chars) else: out.append(current) current = next return ''.join(out) def _CreateTransitionTable(self): """Print the state transition list. Returns a set of C structures that define the transition table for the state machine. This structure is a list of lists of ints (int **). The outer list indexes the source state and the inner list contains the destination state for each of the possible input characters: const int * const* transitions[source][input] == destination. The conditions are mapped from the conditions variable. Returns: String containing the generated transition table in a C struct. """ out = [] # output list default_state = 'STATEMACHINE_ERROR' state_table = {} for state in self._config.states: state_table[state] = [default_state for col in xrange(255)] # We process the transition in reverse order while updating the table. for i_transition in range(len(self._config.transitions) - 1, -1, -1): transition = self._config.transitions[i_transition] (condition_name, src, dst) = (transition.condition, transition.source, transition.destination) condition = self._config.conditions[condition_name] char_list = self._ExpandBracketExpression(condition) for c in char_list: state_table[src][ord(c)] = self._StateInternalC(dst) # Create the inner lists which map input characters to destination states. for state in self._config.states: transition_row = [] for c in xrange(0, 255): transition_row.append(' /* %06s */ %s' % (repr(chr(c)), state_table[state][c])) out.append(self._CreateStructList('%stransition_row_%s' % (self._Prefix(), state), 'int', transition_row)) out.append('\n') # Create the outer list, which map source states to input characters. out.append('static const %s %s[] = {\n' % ('int *', self._Prefix() + 'state_transitions')) row_list = [' %stransition_row_%s' % (self._Prefix(), row) for row in self._config.states] out.append(',\n'.join(row_list)) out.append('\n};\n') return ''.join(out) def Generate(self): """Returns the generated the C include statements for the statemachine.""" print '\n'.join((self._CreateHeader(), self._CreateNumStates(), self._CreateStatesEnum(), self._CreateStatesExternal(), self._CreateStatesInternalNames(), self._CreateTransitionTable())) def main(): if len(sys.argv) != 2: print "usage: generate_fsm.py config_file" sys.exit(1) config = FSMConfig() config.Load(sys.argv[1]) gen = FSMGenerateC(config) gen.Generate() if __name__ == "__main__": main()