summaryrefslogtreecommitdiff
path: root/streamhtmlparser/statemachine.h
blob: e586d352689d6e87ce527d84a194d6eaa83f78a9 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
/* Copyright (c) 2007, Google Inc.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are
 * met:
 *
 *     * Redistributions of source code must retain the above copyright
 * notice, this list of conditions and the following disclaimer.
 *     * Redistributions in binary form must reproduce the above
 * copyright notice, this list of conditions and the following disclaimer
 * in the documentation and/or other materials provided with the
 * distribution.
 *     * Neither the name of Google Inc. nor the names of its
 * contributors may be used to endorse or promote products derived from
 * this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 * ---
 * Author: Filipe Almeida
 */

#ifndef STREAMHTMLPARSER_STATEMACHINE_H
#define STREAMHTMLPARSER_STATEMACHINE_H

#include "port.h"

/* TODO(falmeida): I'm not sure about these limits, but since right now we only
 * have 24 states it should be fine */

enum {
    STATEMACHINE_ERROR = 127
};

#define STATEMACHINE_RECORD_BUFFER_SIZE 256

#define STATEMACHINE_MAX_STR_ERROR 80

struct statemachine_ctx_s;

typedef void(*state_event_function)(struct statemachine_ctx_s *, int, char,
                                    int);

typedef struct statemachine_definition_s {
    int num_states;
    const int* const* transition_table;

    /* Array containing the name of the states as a C string.
     * This field is optional and if not in use it should be set to NULL.
     */
    const char* const* state_names;
    state_event_function *in_state_events;
    state_event_function *enter_state_events;
    state_event_function *exit_state_events;
} statemachine_definition;

typedef struct statemachine_ctx_s {
    int current_state;
    int next_state;
    statemachine_definition *definition;
    char current_char;

    /* Current line number. */
    int line_number;

    /* Current column number. */
    int column_number;
    char record_buffer[STATEMACHINE_RECORD_BUFFER_SIZE];
    size_t record_pos;

    /* True if we are recording the stream to record_buffer. */
    int recording;

    /* In case there was an error (we are in state STATEMACHINE_ERROR), it will
     * contain a human readable description of the error.
     */
    char error_msg[STATEMACHINE_MAX_STR_ERROR];

    /* Storage space for the layer above. */
    void *user;
} statemachine_ctx;

/* Populates the statemachine definition.
 *
 * Receives a transition table and an optional array of state names. It uses
 * this data to populate the state machine definition.
 *
 * The transition table structure is a list of lists of ints (int **). The
 * outer list indexes the source state and the inner list contains the
 * destination state for each of the possible input characters:
 *
 * const int* const* transitions[source][input] == destination.
 *
 * The optional argument state_names points to a list of strings containing
 * human readable state names. These strings are used when reporting error
 * messages.
 */
void statemachine_definition_populate(statemachine_definition *def,
                                     const int* const* transition_table,
                                     const char* const* state_names);

void statemachine_in_state(statemachine_definition *def, int st,
                           state_event_function func);
void statemachine_enter_state(statemachine_definition *def, int st,
                                     state_event_function func);
void statemachine_exit_state(statemachine_definition *def, int st,
                                    state_event_function func);

statemachine_definition *statemachine_definition_new(int states);
void statemachine_definition_delete(statemachine_definition *def);

int statemachine_get_state(statemachine_ctx *ctx);
void statemachine_set_state(statemachine_ctx *ctx, int state);

void statemachine_start_record(statemachine_ctx *ctx);
const char *statemachine_stop_record(statemachine_ctx *ctx);
const char *statemachine_record_buffer(statemachine_ctx *ctx);

/* Returns the the number of characters currently stored in the record buffer.
 */
static INLINE size_t statemachine_record_length(statemachine_ctx *ctx) {
  return ctx->record_pos + 1;
}

/* Return the current line number. */
static INLINE int statemachine_get_line_number(statemachine_ctx *ctx) {
  return ctx->line_number;
}

/* Set the current line number. */
static INLINE void statemachine_set_line_number(statemachine_ctx *ctx,
                                                int line) {
  ctx->line_number = line;
}

/* Return the current column number. */
static INLINE int statemachine_get_column_number(statemachine_ctx *ctx) {
  return ctx->column_number;
}

/* Set the current column number. */
static INLINE void statemachine_set_column_number(statemachine_ctx *ctx,
                                                  int column) {
  ctx->column_number = column;
}


/* Retrieve a human readable error message in case an error occurred.
 *
 * NULL is returned if the parser didn't encounter an error.
 */
static INLINE const char *statemachine_get_error_msg(statemachine_ctx *ctx) {
  if (ctx->next_state == STATEMACHINE_ERROR) {
    return ctx->error_msg;
  } else {
    return NULL;
  }
}

/* Reset the statemachine.
 *
 * The state is set to the initialization values. This includes setting the
 * state to the default state (0), stopping recording and setting the line
 * number to 1.
 */
void statemachine_reset(statemachine_ctx *ctx);

/* Initializes a new statemachine. Receives a statemachine definition object
 * that should have been initialized with statemachine_definition_new() and a
 * user reference to be used by the caller.
 *
 * Returns NULL if initialization fails.
 *
 * Initialization failure is fatal, and if this function fails it may not
 * deallocate all previsouly allocated memory.
 */
statemachine_ctx *statemachine_new(statemachine_definition *def,
                                   void *user);

/* Returns a pointer to a context which is a duplicate of the statemachine src.
 * The statemachine definition and the user pointer have to be provided since
 * these references are not owned by the statemachine itself.
 */
statemachine_ctx *statemachine_duplicate(statemachine_ctx *ctx,
                                         statemachine_definition *def,
                                         void *user);

/* Copies the context of the statemachine pointed to by src to the statemachine
 * provided by dst.
 * The statemachine definition and the user pointer have to be provided since
 * these references are not owned by the statemachine itself.
 */
void statemachine_copy(statemachine_ctx *dst,
                       statemachine_ctx *src,
                       statemachine_definition *def,
                       void *user);

int statemachine_parse(statemachine_ctx *ctx, const char *str, int size);

void statemachine_delete(statemachine_ctx *ctx);


/*****
 * The following functions are only exported for testing purposes and should
 * be treated as private. */


/* Encode the character as an escaped C string.
 *
 * Encode the character chr into the string output. Writes at most len
 * characters to the output string but makes sure output is NULL terminated.
 */
void statemachine_encode_char(char chr, char *output, size_t len);

#endif /* STREAMHTMLPARSER_STATEMACHINE_H */