diff options
author | Andreas Baumann <mail@andreasbaumann.cc> | 2019-02-22 19:16:24 +0100 |
---|---|---|
committer | Andreas Baumann <mail@andreasbaumann.cc> | 2019-02-22 19:16:24 +0100 |
commit | 9f76ed072277ec01ca591e478cf7686914b9e530 (patch) | |
tree | 8906b2887b896496974446b6877ba25aa4d788f8 /minie | |
parent | 638b7de7d7fb19b756a2f2bc266222b74b4e7295 (diff) | |
download | compilertests-9f76ed072277ec01ca591e478cf7686914b9e530.tar.gz compilertests-9f76ed072277ec01ca591e478cf7686914b9e530.tar.bz2 |
work on adding const declarations
introduced symbol table structure
Diffstat (limited to 'minie')
-rw-r--r-- | minie/TODOS | 35 | ||||
-rw-r--r-- | minie/e2c.c | 109 | ||||
-rw-r--r-- | minie/ec.e | 25 |
3 files changed, 138 insertions, 31 deletions
diff --git a/minie/TODOS b/minie/TODOS index 4c1d2f0..0d53087 100644 --- a/minie/TODOS +++ b/minie/TODOS @@ -192,6 +192,41 @@ a+rand( ) helps us to detect it is actually a function, otoh we can get the same information from the symbol table. +enums: + +Oberon has none. +You can always use constants or sets, but then the switch statement cannot +be protected against wrong use of constants. C and Java went the way from +constants to proper enums. +=> subtyping problem, extending enums means removing states to be defined +in a sane way. Now removing states in an enum makes hardly code relying on +more states behave in a consistent way. +=> subtype-explosion, enums are just a fancy way of defining integer constants +the only practical application I have is avoid implicit type-coersion to ints +and handle the ranges in a state machine switch. +=> enums used in array subscripts lead to the sub-range problem of pascal/edison + unless I force enums to always start from 0,1,2,... as internal representation +=> OOP has no need for enums, as I can discriminate and extend a basic type, +e.g. KEYWORD extended to KEYWORD_MODULE, KEYWORD_IF, etc. +=> enum constants have no const value, so they cannot be used to define an + array (or at least, this needs a special cast again) +compared to functional languages the C-version of enums is quite limites, +see tagged unions (for instance in Rust). + +underscores: +started when trying to add S_module constant, so defacto a workaround for +a missing namespace/module called 'Scanner' with constant 'module'. Do we +forbid _ alltogether, as they are a sign of bad modularization or namespace +emulation? On the other hand we will have longer identifiers, so _ is needed +to separate words. + +AST: +https://stackoverflow.com/questions/21150454/representing-an-abstract-syntax-tree-in-c + +design +Scanner class or struct vs. OPS module containing all variables. all modules +in the Oberon compiler act as singletons. + links ----- diff --git a/minie/e2c.c b/minie/e2c.c index def9d99..da59c30 100644 --- a/minie/e2c.c +++ b/minie/e2c.c @@ -4,7 +4,7 @@ #include <stdarg.h> #include <assert.h> -/* CONSTANTS */ +/* constants */ enum { MAX_IDENT_LEN = 24, @@ -17,7 +17,7 @@ enum { MAX_TYPE_DATA_SIZE = 2048 }; -/* SCANNER */ +/* scanner */ static int look; static int row; @@ -37,6 +37,7 @@ typedef enum { S_import, S_procedure, S_return, + S_const, S_var, S_if, S_do, @@ -82,6 +83,7 @@ char *symname[S_eof+1] = { "import", "procedure", "return", + "const", "var", "if", "do", @@ -172,6 +174,12 @@ static int isDigit( int c ) return 0; } +static int isSpecial( int c ) +{ + if( c == '_' ) return 1; + return 0; +} + static int isCharacter( int c ) { if( isAlpha( c ) ) return 1; @@ -205,7 +213,7 @@ static void identifier( void ) ident[n] = look; n++; look = getChar( ); - while( ( isAlpha( look ) || isDigit( look ) ) && n < MAX_IDENT_LEN ) { + while( ( isAlpha( look ) || isDigit( look ) || isSpecial( look ) ) && n < MAX_IDENT_LEN ) { ident[n] = look; n++; look = getChar( ); @@ -303,6 +311,11 @@ next: } return S_ident; case 'c': + identifier( ); + if( strcmp( ident, "const" ) == 0 ) { + return S_const; + } + return S_ident; case 'd': identifier( ); if( strcmp( ident, "do" ) == 0 ) { @@ -496,7 +509,7 @@ next: return S_char; } -/* SYMBOL TABLE */ +/* symbol table */ typedef enum { TYPE_UNKNOWN, @@ -570,28 +583,35 @@ struct Type { } details; }; -static int nof_symbols = 0; -static Type symbols[MAX_SYMBOLS]; +typedef struct SymbolTable SymbolTable; -static void insert_symbol( Type type, char *name ) +struct SymbolTable { + int nof_symbols; + Type symbols[MAX_SYMBOLS]; + SymbolTable *parent; +}; + +static SymbolTable symbols; + +static void insert_symbol( SymbolTable *table, Type type, char *name ) { - if( nof_symbols >= MAX_SYMBOLS ) { + if( table->nof_symbols >= MAX_SYMBOLS ) { Abort( "Symbol table exhausted, increase MAX_SYMBOLS and recompile e2c" ); } - symbols[nof_symbols] = type; - strncpy( symbols[nof_symbols].name, name, MAX_IDENT_LEN ); + table->symbols[table->nof_symbols] = type; + strncpy( table->symbols[table->nof_symbols].name, name, MAX_IDENT_LEN ); - nof_symbols++; + table->nof_symbols++; } -static Type get_symbol_type( char *name ) +static Type get_symbol_type( SymbolTable *table, char *name ) { int i; - for( i = 0; i < nof_symbols; i++ ) { - if( strcmp( symbols[i].name, name ) == 0 ) { - return symbols[i]; + for( i = 0; i < table->nof_symbols; i++ ) { + if( strcmp( table->symbols[i].name, name ) == 0 ) { + return table->symbols[i]; } } @@ -635,7 +655,7 @@ static int length( char *name ) { Type type; - type = get_symbol_type( name ); + type = get_symbol_type( &symbols, name ); if( type.type == TYPE_ARRAY ) { return type.details.array.len; } else { @@ -690,7 +710,7 @@ static void register_internal_functions( void ) type.details.function.return_value = (Type *)( type.details.function.data + sizeof( Type ) ); type.details.function.return_value->type = TYPE_INTEGER; type.details.function.internal = 1; - insert_symbol( type, "length" ); + insert_symbol( &symbols, type, "length" ); /* constructor for char type */ type.type = TYPE_FUNCTION; @@ -700,7 +720,7 @@ static void register_internal_functions( void ) type.details.function.return_value = (Type *)( type.details.function.data + sizeof( Type ) ); type.details.function.return_value->type = TYPE_CHAR; type.details.function.internal = 1; - insert_symbol( type, "char" ); + insert_symbol( &symbols, type, "char" ); /* TODO: register functions in module system, should be outside in the stage-1 compiler */ @@ -709,7 +729,7 @@ static void register_internal_functions( void ) type.details.function.return_value = (Type *)type.details.function.data; type.details.function.return_value->type = TYPE_CHAR; type.details.function.internal = 1; - insert_symbol( type, "system.readchar" ); + insert_symbol( &symbols, type, "system.readchar" ); type.type = TYPE_FUNCTION; type.details.function.len = 1; @@ -718,7 +738,7 @@ static void register_internal_functions( void ) type.details.function.return_value = (Type *)type.details.function.data; type.details.function.return_value->type = TYPE_NONE; type.details.function.internal = 1; - insert_symbol( type, "system.readline" ); + insert_symbol( &symbols, type, "system.readline" ); } static void register_internal_constants( void ) @@ -728,11 +748,11 @@ static void register_internal_constants( void ) type.type = TYPE_CONSTANT; type.details.constant.type = TYPE_BOOLEAN; type.details.constant.value.boolean = 1; - insert_symbol( type, "true" ); + insert_symbol( &symbols, type, "true" ); type.details.constant.type = TYPE_BOOLEAN; type.details.constant.value.boolean = 0; - insert_symbol( type, "false" ); + insert_symbol( &symbols, type, "false" ); } static void init( void ) @@ -831,7 +851,7 @@ static void factor( void ) sym = getSym( ); } else if( sym == S_ident ) { qualident( ); - type = get_symbol_type( varName ); + type = get_symbol_type( &symbols, varName ); if( type.type == TYPE_FUNCTION ) { parameterList( ); } else if( type.type == TYPE_CONSTANT ) { @@ -935,7 +955,7 @@ static void selector( void ) selected_array = 0; if( sym == S_lbracket ) { Expect( S_lbracket ); - type = get_symbol_type( varName ); + type = get_symbol_type( &symbols, varName ); if( type.type != TYPE_ARRAY ) { Abort( "Selecting element of non-array" ); } @@ -956,7 +976,7 @@ static void assignment( void ) /* left hand side */ /* precondition: qualident has been already parsed outside */ selector( ); - type = get_symbol_type( varName ); + type = get_symbol_type( &symbols, varName ); /* x := ( a+ b )- 3; -> x = (a+b)-3; * s1 := s2; -> strncpy( s2, s1, length( s2 ) ); @@ -1161,7 +1181,7 @@ static void statement( void ) parameterList( ); emitLn( ";" ); } else { - Type type = get_symbol_type( varName ); + Type type = get_symbol_type( &symbols, varName ); if( type.type == TYPE_FUNCTION ) { /* procedure call without parameter */ /* TODO: check number of parameter and return value to be 0 */ @@ -1253,6 +1273,36 @@ static void type( void ) arrayType( ); } } +static void constDeclaration( void ) +{ + /* TODO: constName? */ + variableName( ); + sym = getSym( ); + Expect( S_equals ); + /* TODO: ConstEpression requires an interpreter */ + if( sym == S_number ) { + number( ); + sym = getSym( ); + emitLn( "const int %s = %d;", varName, num ); + } else { + Abort( "Supporting numeric constants only" ); + } +} + +static void constBlock( void ) +{ + Expect( S_const ); + constDeclaration( ); + if( sym == S_begin || sym == S_var ) return; + while( sym == S_semicolon ) { + sym = getSym( ); + if( sym == S_ident ) { + constDeclaration( ); + } else if( sym == S_begin || sym == S_var ) { + return; + } + } +} static void variableDeclaration( void ) { @@ -1260,7 +1310,7 @@ static void variableDeclaration( void ) sym = getSym( ); Expect( S_colon ); type( ); - insert_symbol( lastType, varName ); + insert_symbol( &symbols, lastType, varName ); if( lastType.type == TYPE_ARRAY ) { /* TODO: this works for now, though it's not correct */ emitLn( "static %s %s[%d];", @@ -1355,7 +1405,7 @@ static void procedureDeclaration( void ) } Expect( S_semicolon ); - insert_symbol( funcType, procName ); + insert_symbol( &symbols, funcType, procName ); emit( "%s %s( ", return_type, procName ); if( funcType.details.function.len == 0 ) { @@ -1387,6 +1437,9 @@ static void procedureBlock( void ) static void declarationBlock( void ) { + if( sym == S_const ) { + constBlock( ); + } if( sym == S_var ) { variableBlock( ); } @@ -2,16 +2,22 @@ module ec; import system; +const + S_module = 1; + var col : integer; row : integer; look : char; + sym : integer; procedure Halt; begin system.halt( 1 ) end +(* scanner *) + procedure getChar : char; var c : char; @@ -32,9 +38,22 @@ end procedure isWhite( c : char ) : boolean; begin if ( c = char( 0 ) ) or ( c = char( 10 ) ) or ( c = char( 13 ) ) or ( c = char( 9 ) ) do - return true; + return true + else + return false + end +end + +(* parser *) + +procedure Expect( expect : integer ); +(* TODO: Error line 51, pos 22: Unknown symbol 'expect': add symbol to local scope + and remove it at end of scope/procedure *) +begin + if ( sym = expect ) do + sym = getSym( ); else - return false; + Abort( "Expected symbol", symname[expect] ); end end @@ -53,7 +72,7 @@ end procedure doModule; begin -(* Expect( S_module ); *) + Expect( S_module ); look := getChar( ); while ( look <> char( 0 ) ) do if not isWhite( look ) do |