diff options
author | Andreas Baumann <abaumann@yahoo.com> | 2012-08-12 10:40:23 +0200 |
---|---|---|
committer | Andreas Baumann <abaumann@yahoo.com> | 2012-08-12 10:40:23 +0200 |
commit | fe4e07657381e7db947630981a1d34410e4753e3 (patch) | |
tree | 7daf065a73ab29fd954f81e40dfde9f6e66a022a /streamhtmlparser | |
parent | b2f35389b68fb1e58ea13dc273fdebaf534a325c (diff) | |
download | crawler-fe4e07657381e7db947630981a1d34410e4753e3.tar.gz crawler-fe4e07657381e7db947630981a1d34410e4753e3.tar.bz2 |
streamhtmlparser works on Windows
Diffstat (limited to 'streamhtmlparser')
-rwxr-xr-x | streamhtmlparser/Makefile.W32 | 38 | ||||
-rwxr-xr-x[-rw-r--r--] | streamhtmlparser/htmlparser.c | 5 | ||||
-rwxr-xr-x[-rw-r--r--] | streamhtmlparser/jsparser.c | 12 | ||||
-rwxr-xr-x | streamhtmlparser/port.h | 20 | ||||
-rwxr-xr-x[-rw-r--r--] | streamhtmlparser/statemachine.h | 14 |
5 files changed, 75 insertions, 14 deletions
diff --git a/streamhtmlparser/Makefile.W32 b/streamhtmlparser/Makefile.W32 new file mode 100755 index 0000000..050b5c3 --- /dev/null +++ b/streamhtmlparser/Makefile.W32 @@ -0,0 +1,38 @@ +TOPDIR = .. + +SUBDIRS = + +!INCLUDE $(TOPDIR)\makefiles\nmake\platform.mk + +INCLUDE_CXXFLAGS = \ + /D_WIN32_WINNT=0x504 + +INCLUDE_DIRS = \ + /I. + +INCLUDE_LDFLAGS = \ + +INCLUDE_LIBS = \ + +CPP_OBJS = \ + statemachine.obj \ + jsparser.obj \ + htmlparser.obj + +STATIC_LIB = \ + streamhtmlparser.lib + +all: $(CPP_OBJS) $(STATIC_LIB) + +!INCLUDE $(TOPDIR)\makefiles\nmake\sub.mk + +$(STATIC_LIB): $(CPP_OBJS) + $(LINK) /lib /nologo /out:$@ $(STATIC_LDFLAGS) $? + +local_all: + +local_clean: + +local_distclean: + +local_test: diff --git a/streamhtmlparser/htmlparser.c b/streamhtmlparser/htmlparser.c index c88486a..3e820f7 100644..100755 --- a/streamhtmlparser/htmlparser.c +++ b/streamhtmlparser/htmlparser.c @@ -38,7 +38,6 @@ #include <stdio.h> #include <stdlib.h> #include <string.h> -#include <strings.h> #include <ctype.h> #include <assert.h> @@ -77,7 +76,7 @@ static struct entityfilter_table_s { /* Utility functions */ /* Similar to strncpy() but avoids the NULL padding. */ -static inline void nopad_strncpy(char *dst, const char *src, size_t dst_size, +static INLINE void nopad_strncpy(char *dst, const char *src, size_t dst_size, size_t src_size) { size_t size; @@ -103,7 +102,7 @@ static int state_external(int st) * * From: http://www.w3.org/TR/html401/struct/text.html#h-9.1 */ -static inline int html_isspace(char chr) +static INLINE int html_isspace(char chr) { if (chr == ' ' || chr == '\t' || chr == '\n' || chr == '\r') { return 1; diff --git a/streamhtmlparser/jsparser.c b/streamhtmlparser/jsparser.c index 9d71c74..dc94cfc 100644..100755 --- a/streamhtmlparser/jsparser.c +++ b/streamhtmlparser/jsparser.c @@ -40,6 +40,8 @@ #include "statemachine.h" #include "jsparser.h" +#include "port.h" + /* So we can support both C and C++ compilers, we use the CAST() macro instead * of using C style casts or static_cast<>() directly. */ @@ -117,7 +119,7 @@ static const char *regexp_token_prefix[] = { /* Converts the internal state into the external superstate. */ -static inline int state_external(int state) +static INLINE int state_external(int state) { assert(state < JSPARSER_NUM_STATES); assert(state >= 0); @@ -129,7 +131,7 @@ static inline int state_external(int state) * with the exception of unicode space and line terminators: * http://www.ecma-international.org/publications/files/ECMA-ST/Ecma-262.pdf */ -static inline int js_is_whitespace(char c) +static INLINE int js_is_whitespace(char c) { return c == '\t' || /* Tab 0x09 */ c == '\v' || /* Vertical Tab 0x0B */ @@ -147,7 +149,7 @@ static inline int js_is_whitespace(char c) * For more detail on the limitations of having this relaxed set of characters * please see the comments in_state_js_text(). */ -static inline int js_is_identifier(char c) { +static INLINE int js_is_identifier(char c) { return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || @@ -202,7 +204,7 @@ void jsparser_buffer_append_str(jsparser_ctx *js, const char *str) /* Returns the position relative to the start of the buffer or -1 if past the * size of the buffer.. */ -static inline int jsparser_buffer_absolute_pos(jsparser_ctx *js, int pos) +static INLINE int jsparser_buffer_absolute_pos(jsparser_ctx *js, int pos) { int absolute_pos; int buffer_len; @@ -363,7 +365,7 @@ static int bsearch_strcmp(const void *a, const void *b) * precede a regular expression in the javascript grammar, and returns true if * the argument is found on that list. */ -static inline int is_regexp_token_prefix(char *token) +static INLINE int is_regexp_token_prefix(char *token) { assert(token != NULL); diff --git a/streamhtmlparser/port.h b/streamhtmlparser/port.h new file mode 100755 index 0000000..e826959 --- /dev/null +++ b/streamhtmlparser/port.h @@ -0,0 +1,20 @@ +#ifndef __INLINE_H
+#define __INLINE_H
+
+#ifdef _MSC_VER
+#define INLINE __inline
+#else
+#define INLINE inline
+#endif
+
+#ifndef _WIN32
+#include <strings.h>
+#else
+#define WIN32_MEAN_AND_LEAN
+#include <windows.h>
+#define strcasecmp stricmp
+#define strncasecmp strnicmp
+#define snprintf _snprintf
+#endif
+
+#endif
diff --git a/streamhtmlparser/statemachine.h b/streamhtmlparser/statemachine.h index a05ffe7..e586d35 100644..100755 --- a/streamhtmlparser/statemachine.h +++ b/streamhtmlparser/statemachine.h @@ -34,6 +34,8 @@ #ifndef STREAMHTMLPARSER_STATEMACHINE_H #define STREAMHTMLPARSER_STATEMACHINE_H +#include "port.h" + /* TODO(falmeida): I'm not sure about these limits, but since right now we only * have 24 states it should be fine */ @@ -127,28 +129,28 @@ const char *statemachine_record_buffer(statemachine_ctx *ctx); /* Returns the the number of characters currently stored in the record buffer. */ -static inline size_t statemachine_record_length(statemachine_ctx *ctx) { +static INLINE size_t statemachine_record_length(statemachine_ctx *ctx) { return ctx->record_pos + 1; } /* Return the current line number. */ -static inline int statemachine_get_line_number(statemachine_ctx *ctx) { +static INLINE int statemachine_get_line_number(statemachine_ctx *ctx) { return ctx->line_number; } /* Set the current line number. */ -static inline void statemachine_set_line_number(statemachine_ctx *ctx, +static INLINE void statemachine_set_line_number(statemachine_ctx *ctx, int line) { ctx->line_number = line; } /* Return the current column number. */ -static inline int statemachine_get_column_number(statemachine_ctx *ctx) { +static INLINE int statemachine_get_column_number(statemachine_ctx *ctx) { return ctx->column_number; } /* Set the current column number. */ -static inline void statemachine_set_column_number(statemachine_ctx *ctx, +static INLINE void statemachine_set_column_number(statemachine_ctx *ctx, int column) { ctx->column_number = column; } @@ -158,7 +160,7 @@ static inline void statemachine_set_column_number(statemachine_ctx *ctx, * * NULL is returned if the parser didn't encounter an error. */ -static inline const char *statemachine_get_error_msg(statemachine_ctx *ctx) { +static INLINE const char *statemachine_get_error_msg(statemachine_ctx *ctx) { if (ctx->next_state == STATEMACHINE_ERROR) { return ctx->error_msg; } else { |