/* Copyright (c) 2007, Google Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: * * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following disclaimer * in the documentation and/or other materials provided with the * distribution. * * Neither the name of Google Inc. nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * --- * Author: Filipe Almeida */ #ifndef STREAMHTMLPARSER_STATEMACHINE_H #define STREAMHTMLPARSER_STATEMACHINE_H #include "port.h" /* TODO(falmeida): I'm not sure about these limits, but since right now we only * have 24 states it should be fine */ enum { STATEMACHINE_ERROR = 127 }; #define STATEMACHINE_RECORD_BUFFER_SIZE 256 #define STATEMACHINE_MAX_STR_ERROR 80 struct statemachine_ctx_s; typedef void(*state_event_function)(struct statemachine_ctx_s *, int, char, int); typedef struct statemachine_definition_s { int num_states; const int* const* transition_table; /* Array containing the name of the states as a C string. * This field is optional and if not in use it should be set to NULL. */ const char* const* state_names; state_event_function *in_state_events; state_event_function *enter_state_events; state_event_function *exit_state_events; } statemachine_definition; typedef struct statemachine_ctx_s { int current_state; int next_state; statemachine_definition *definition; char current_char; /* Current line number. */ int line_number; /* Current column number. */ int column_number; char record_buffer[STATEMACHINE_RECORD_BUFFER_SIZE]; size_t record_pos; /* True if we are recording the stream to record_buffer. */ int recording; /* In case there was an error (we are in state STATEMACHINE_ERROR), it will * contain a human readable description of the error. */ char error_msg[STATEMACHINE_MAX_STR_ERROR]; /* Storage space for the layer above. */ void *user; } statemachine_ctx; /* Populates the statemachine definition. * * Receives a transition table and an optional array of state names. It uses * this data to populate the state machine definition. * * The transition table structure is a list of lists of ints (int **). The * outer list indexes the source state and the inner list contains the * destination state for each of the possible input characters: * * const int* const* transitions[source][input] == destination. * * The optional argument state_names points to a list of strings containing * human readable state names. These strings are used when reporting error * messages. */ void statemachine_definition_populate(statemachine_definition *def, const int* const* transition_table, const char* const* state_names); void statemachine_in_state(statemachine_definition *def, int st, state_event_function func); void statemachine_enter_state(statemachine_definition *def, int st, state_event_function func); void statemachine_exit_state(statemachine_definition *def, int st, state_event_function func); statemachine_definition *statemachine_definition_new(int states); void statemachine_definition_delete(statemachine_definition *def); int statemachine_get_state(statemachine_ctx *ctx); void statemachine_set_state(statemachine_ctx *ctx, int state); void statemachine_start_record(statemachine_ctx *ctx); const char *statemachine_stop_record(statemachine_ctx *ctx); const char *statemachine_record_buffer(statemachine_ctx *ctx); /* Returns the the number of characters currently stored in the record buffer. */ static INLINE size_t statemachine_record_length(statemachine_ctx *ctx) { return ctx->record_pos + 1; } /* Return the current line number. */ static INLINE int statemachine_get_line_number(statemachine_ctx *ctx) { return ctx->line_number; } /* Set the current line number. */ static INLINE void statemachine_set_line_number(statemachine_ctx *ctx, int line) { ctx->line_number = line; } /* Return the current column number. */ static INLINE int statemachine_get_column_number(statemachine_ctx *ctx) { return ctx->column_number; } /* Set the current column number. */ static INLINE void statemachine_set_column_number(statemachine_ctx *ctx, int column) { ctx->column_number = column; } /* Retrieve a human readable error message in case an error occurred. * * NULL is returned if the parser didn't encounter an error. */ static INLINE const char *statemachine_get_error_msg(statemachine_ctx *ctx) { if (ctx->next_state == STATEMACHINE_ERROR) { return ctx->error_msg; } else { return NULL; } } /* Reset the statemachine. * * The state is set to the initialization values. This includes setting the * state to the default state (0), stopping recording and setting the line * number to 1. */ void statemachine_reset(statemachine_ctx *ctx); /* Initializes a new statemachine. Receives a statemachine definition object * that should have been initialized with statemachine_definition_new() and a * user reference to be used by the caller. * * Returns NULL if initialization fails. * * Initialization failure is fatal, and if this function fails it may not * deallocate all previsouly allocated memory. */ statemachine_ctx *statemachine_new(statemachine_definition *def, void *user); /* Returns a pointer to a context which is a duplicate of the statemachine src. * The statemachine definition and the user pointer have to be provided since * these references are not owned by the statemachine itself. */ statemachine_ctx *statemachine_duplicate(statemachine_ctx *ctx, statemachine_definition *def, void *user); /* Copies the context of the statemachine pointed to by src to the statemachine * provided by dst. * The statemachine definition and the user pointer have to be provided since * these references are not owned by the statemachine itself. */ void statemachine_copy(statemachine_ctx *dst, statemachine_ctx *src, statemachine_definition *def, void *user); int statemachine_parse(statemachine_ctx *ctx, const char *str, int size); void statemachine_delete(statemachine_ctx *ctx); /***** * The following functions are only exported for testing purposes and should * be treated as private. */ /* Encode the character as an escaped C string. * * Encode the character chr into the string output. Writes at most len * characters to the output string but makes sure output is NULL terminated. */ void statemachine_encode_char(char chr, char *output, size_t len); #endif /* STREAMHTMLPARSER_STATEMACHINE_H */