summaryrefslogtreecommitdiff
path: root/minic
diff options
context:
space:
mode:
authorAndreas Baumann <mail@andreasbaumann.cc>2018-06-25 21:57:18 +0200
committerAndreas Baumann <mail@andreasbaumann.cc>2018-06-25 21:57:18 +0200
commit3f838a994053966cdcbb1e2a32ad00801156d343 (patch)
tree15e1b646acda3616f95f4fb7fc2458a4d5ae9eca /minic
parent93815e0c124b4f05043cf6c4c91a3f8cef17f34e (diff)
downloadcompilertests-3f838a994053966cdcbb1e2a32ad00801156d343.tar.gz
compilertests-3f838a994053966cdcbb1e2a32ad00801156d343.tar.bz2
parsing ident
Diffstat (limited to 'minic')
-rw-r--r--minic/const.h3
-rw-r--r--minic/parse.c25
-rw-r--r--minic/scan.c76
-rw-r--r--minic/scan.h11
4 files changed, 105 insertions, 10 deletions
diff --git a/minic/const.h b/minic/const.h
index 3c47234..e22075e 100644
--- a/minic/const.h
+++ b/minic/const.h
@@ -1,6 +1,7 @@
#pragma once
enum {
+ MAX_IDENT_LEN = 15,
MAX_TMP_LEN = 50
};
- \ No newline at end of file
+
diff --git a/minic/parse.c b/minic/parse.c
index 0658908..8de56a6 100644
--- a/minic/parse.c
+++ b/minic/parse.c
@@ -20,6 +20,27 @@ void parser_debug( Parser *p, int enable )
}
}
+static void print_symbol( scanner_Symbol s )
+{
+ switch( s.sym ) {
+ case S_eof:
+ print( "PARSER(EOF)" );
+ break;
+ case S_INT:
+ print( "PARSER(INT)" );
+ break;
+ case S_DIV:
+ print( "PARSER(DIV)" );
+ break;
+ case S_IDENT:
+ // TODO: print identifier
+ print( "PARSER(IDENT)" );
+ break;
+ default:
+ print( "PARSER(<unknown symbol>)" );
+ }
+}
+
void parser_parse( Parser *p )
{
scanner_Symbol s;
@@ -27,5 +48,9 @@ void parser_parse( Parser *p )
s = scanner_scan( p->s );
while( s.sym != S_eof ) {
+ if( p->debug ) {
+ print_symbol( s );
+ }
+ s = scanner_scan( p->s );
}
}
diff --git a/minic/scan.c b/minic/scan.c
index 1e0f358..6e325bc 100644
--- a/minic/scan.c
+++ b/minic/scan.c
@@ -4,6 +4,7 @@
#include "const.h"
#include "stdlib.h"
#include "string.h"
+#include "ctype.h"
void scanner_init( Scanner *s, char *src )
{
@@ -41,14 +42,26 @@ static char get_char( Scanner *s )
static char peek_char( Scanner *s, int seek )
{
- char c;
char *pos = s->pos;
+ char c = *pos;
- for( c = *pos; c != '\0' && seek > 0; pos++, seek-- );
+ while( c != '\0' && seek > 0 ) {
+ c = *pos;
+ pos++;
+ seek--;
+ }
return c;
}
+static void skip_char( Scanner *s, int seek )
+{
+ while( seek > 0 ) {
+ get_char( s );
+ seek--;
+ }
+}
+
static void skip_whitespace( Scanner *s )
{
for( ; ; get_char( s ) ) {
@@ -83,6 +96,18 @@ static void error( Scanner *s, char *msg )
halt( );
}
+static void unexpected_char( Scanner *s, int c )
+{
+ char msg[MAX_TMP_LEN];
+ msg[0] = '\0';
+
+ strcat( msg, "unexpected character '" );
+ strcat_c( msg, c );
+ strcat( msg, "'" );
+
+ error( s, msg );
+}
+
static void skip_comment( Scanner *s )
{
get_char( s );
@@ -97,6 +122,27 @@ static void skip_comment( Scanner *s )
get_char( s );
}
+static scanner_Symbol parse_ident( Scanner *s )
+{
+ scanner_Symbol sym;
+ int len = 0;
+ char tok[MAX_IDENT_LEN+1];
+
+ while( len < MAX_IDENT_LEN && isalnum( s->peek ) ) {
+ strcat_c( tok, s->peek );
+ len++;
+ get_char( s );
+ }
+
+ /* TODO: we can add the token into the identifier table ourselves or
+ * pass it to the parser and let the parser do it?
+ */
+
+ sym.sym = S_IDENT;
+
+ return sym;
+}
+
scanner_Symbol scanner_scan( Scanner *s )
{
scanner_Symbol sym;
@@ -124,11 +170,33 @@ scanner_Symbol scanner_scan( Scanner *s )
print( "SCANNER(COMMENT_END)" );
}
continue;
+ } else {
+ sym.sym = S_DIV;
+ return sym;
+ }
+ break;
+
+ case 'i':
+ switch( peek_char( s, 1 ) ) {
+ case 'n':
+ switch( peek_char( s, 2 ) ) {
+ case 't':
+ // todo: consume two chars
+ skip_char( s, 2 );
+ sym.sym = S_INT;
+ return sym;
+ }
+ goto id;
}
break;
-
+
+ id:
+ case 'm': case 'a': case 'n':
+ sym = parse_ident( s );
+ return sym;
+
default:
- error( s, "unexpected symbol" );
+ unexpected_char( s, s->peek );
sym.sym = S_eof;
return sym;
}
diff --git a/minic/scan.h b/minic/scan.h
index 3e6942a..be24e08 100644
--- a/minic/scan.h
+++ b/minic/scan.h
@@ -1,14 +1,15 @@
#pragma once
+#include "const.h"
+
typedef enum scanner_Sym {
S_undef,
- S_eof
+ S_eof,
+ S_DIV,
+ S_INT,
+ S_IDENT
} scanner_Sym;
-enum {
- MAX_IDENT_LEN = 10
-};
-
typedef struct scanner_Symbol {
scanner_Sym sym;
union {