diff options
author | Andreas Baumann <mail@andreasbaumann.cc> | 2018-06-25 21:57:18 +0200 |
---|---|---|
committer | Andreas Baumann <mail@andreasbaumann.cc> | 2018-06-25 21:57:18 +0200 |
commit | 3f838a994053966cdcbb1e2a32ad00801156d343 (patch) | |
tree | 15e1b646acda3616f95f4fb7fc2458a4d5ae9eca /minic | |
parent | 93815e0c124b4f05043cf6c4c91a3f8cef17f34e (diff) | |
download | compilertests-3f838a994053966cdcbb1e2a32ad00801156d343.tar.gz compilertests-3f838a994053966cdcbb1e2a32ad00801156d343.tar.bz2 |
parsing ident
Diffstat (limited to 'minic')
-rw-r--r-- | minic/const.h | 3 | ||||
-rw-r--r-- | minic/parse.c | 25 | ||||
-rw-r--r-- | minic/scan.c | 76 | ||||
-rw-r--r-- | minic/scan.h | 11 |
4 files changed, 105 insertions, 10 deletions
diff --git a/minic/const.h b/minic/const.h index 3c47234..e22075e 100644 --- a/minic/const.h +++ b/minic/const.h @@ -1,6 +1,7 @@ #pragma once enum { + MAX_IDENT_LEN = 15, MAX_TMP_LEN = 50 }; -
\ No newline at end of file + diff --git a/minic/parse.c b/minic/parse.c index 0658908..8de56a6 100644 --- a/minic/parse.c +++ b/minic/parse.c @@ -20,6 +20,27 @@ void parser_debug( Parser *p, int enable ) } } +static void print_symbol( scanner_Symbol s ) +{ + switch( s.sym ) { + case S_eof: + print( "PARSER(EOF)" ); + break; + case S_INT: + print( "PARSER(INT)" ); + break; + case S_DIV: + print( "PARSER(DIV)" ); + break; + case S_IDENT: + // TODO: print identifier + print( "PARSER(IDENT)" ); + break; + default: + print( "PARSER(<unknown symbol>)" ); + } +} + void parser_parse( Parser *p ) { scanner_Symbol s; @@ -27,5 +48,9 @@ void parser_parse( Parser *p ) s = scanner_scan( p->s ); while( s.sym != S_eof ) { + if( p->debug ) { + print_symbol( s ); + } + s = scanner_scan( p->s ); } } diff --git a/minic/scan.c b/minic/scan.c index 1e0f358..6e325bc 100644 --- a/minic/scan.c +++ b/minic/scan.c @@ -4,6 +4,7 @@ #include "const.h" #include "stdlib.h" #include "string.h" +#include "ctype.h" void scanner_init( Scanner *s, char *src ) { @@ -41,14 +42,26 @@ static char get_char( Scanner *s ) static char peek_char( Scanner *s, int seek ) { - char c; char *pos = s->pos; + char c = *pos; - for( c = *pos; c != '\0' && seek > 0; pos++, seek-- ); + while( c != '\0' && seek > 0 ) { + c = *pos; + pos++; + seek--; + } return c; } +static void skip_char( Scanner *s, int seek ) +{ + while( seek > 0 ) { + get_char( s ); + seek--; + } +} + static void skip_whitespace( Scanner *s ) { for( ; ; get_char( s ) ) { @@ -83,6 +96,18 @@ static void error( Scanner *s, char *msg ) halt( ); } +static void unexpected_char( Scanner *s, int c ) +{ + char msg[MAX_TMP_LEN]; + msg[0] = '\0'; + + strcat( msg, "unexpected character '" ); + strcat_c( msg, c ); + strcat( msg, "'" ); + + error( s, msg ); +} + static void skip_comment( Scanner *s ) { get_char( s ); @@ -97,6 +122,27 @@ static void skip_comment( Scanner *s ) get_char( s ); } +static scanner_Symbol parse_ident( Scanner *s ) +{ + scanner_Symbol sym; + int len = 0; + char tok[MAX_IDENT_LEN+1]; + + while( len < MAX_IDENT_LEN && isalnum( s->peek ) ) { + strcat_c( tok, s->peek ); + len++; + get_char( s ); + } + + /* TODO: we can add the token into the identifier table ourselves or + * pass it to the parser and let the parser do it? + */ + + sym.sym = S_IDENT; + + return sym; +} + scanner_Symbol scanner_scan( Scanner *s ) { scanner_Symbol sym; @@ -124,11 +170,33 @@ scanner_Symbol scanner_scan( Scanner *s ) print( "SCANNER(COMMENT_END)" ); } continue; + } else { + sym.sym = S_DIV; + return sym; + } + break; + + case 'i': + switch( peek_char( s, 1 ) ) { + case 'n': + switch( peek_char( s, 2 ) ) { + case 't': + // todo: consume two chars + skip_char( s, 2 ); + sym.sym = S_INT; + return sym; + } + goto id; } break; - + + id: + case 'm': case 'a': case 'n': + sym = parse_ident( s ); + return sym; + default: - error( s, "unexpected symbol" ); + unexpected_char( s, s->peek ); sym.sym = S_eof; return sym; } diff --git a/minic/scan.h b/minic/scan.h index 3e6942a..be24e08 100644 --- a/minic/scan.h +++ b/minic/scan.h @@ -1,14 +1,15 @@ #pragma once +#include "const.h" + typedef enum scanner_Sym { S_undef, - S_eof + S_eof, + S_DIV, + S_INT, + S_IDENT } scanner_Sym; -enum { - MAX_IDENT_LEN = 10 -}; - typedef struct scanner_Symbol { scanner_Sym sym; union { |