diff options
author | Andreas Baumann <mail@andreasbaumann.cc> | 2020-05-29 19:37:08 +0200 |
---|---|---|
committer | Andreas Baumann <mail@andreasbaumann.cc> | 2020-05-29 19:37:08 +0200 |
commit | de5d9c5d1b735e95a20a8671d38697f1c7e638ea (patch) | |
tree | c36e67bb004671324b1f847c3a57a6da32530bf7 | |
parent | 8c3ad4c4acc3212ba5d1ff0a3aada3e714dad071 (diff) | |
download | compilertests-de5d9c5d1b735e95a20a8671d38697f1c7e638ea.tar.gz compilertests-de5d9c5d1b735e95a20a8671d38697f1c7e638ea.tar.bz2 |
added support for strings in ec
-rw-r--r-- | ecomp-c/ec.c | 247 | ||||
-rw-r--r-- | ecomp-c/minie.ebnf | 7 | ||||
-rw-r--r-- | ecomp-c/test1.e | 9 |
3 files changed, 199 insertions, 64 deletions
diff --git a/ecomp-c/ec.c b/ecomp-c/ec.c index 784671f..80bc815 100644 --- a/ecomp-c/ec.c +++ b/ecomp-c/ec.c @@ -12,7 +12,8 @@ enum { MAX_IDENT_LEN = 64, MAX_NUMBER_LEN = 10, MAX_NUMBER_OF_ENUMERATIONS = 6, - MAX_LABEL_LEN = 64 + MAX_LABEL_LEN = 64, + MAX_STRING_LEN = 64 }; static int DEBUG_GETCHAR = 0; @@ -24,6 +25,7 @@ typedef enum { S_ident = 0, S_number, S_char, + S_string, S_module, S_begin, S_end, @@ -62,6 +64,7 @@ static char *symname[S_eof+1] = { "ident", "number", "char", + "string", "module", "begin", "end", @@ -105,6 +108,7 @@ static S_Symbol sym; static char ident[MAX_IDENT_LEN+1]; static int num; static int ch; +static char str[MAX_STRING_LEN+1]; static void Err( char *s, va_list args ) { @@ -140,6 +144,18 @@ static void *Allocate( unsigned int size ) return p; } +static char *AllocateAndCopyStr( char *s ) +{ + char *d; + int len = strlen( s ); + if( len > MAX_STRING_LEN ) { + Abort( "Too long string literal, should not happen" ); + } + d = (char *)Allocate( len + 1 ); + strlcpy( d, s, MAX_STRING_LEN ); + return d; +} + static int getChar( void ) { int c; @@ -220,7 +236,7 @@ static void character( void ) { if( look == '\'' ) { look = getChar( ); - if( isDigit( look ) || isAlpha( look ) ) { + if( isDigit( look ) || isAlpha( look ) || isSpecial( look ) ) { ch = look; } else { Abort( "Expecting a character as character literal" ); @@ -233,6 +249,28 @@ static void character( void ) } } +static void string( void ) +{ + int n = 0; + + if( look == '"' ) { + look = getChar( ); + while( ( isDigit( look ) || isAlpha( look ) || isSpecial( look ) ) && n < MAX_STRING_LEN ) { + str[n] = look; + n++; + look = getChar( ); + } + if( n == MAX_STRING_LEN ) { + Abort( "String exceeds maximal length" ); + } + str[n] = '\0'; + if( look != '\"' ) { + Abort( "Unterminated string literal" ); + } + look = getChar( ); + } +} + static void identifier( void ) { int n = 0; @@ -241,7 +279,7 @@ static void identifier( void ) ident[n] = look; n++; look = getChar( ); - while( ( isAlpha( look ) || isDigit( look ) || isSpecial( look ) ) && n < MAX_IDENT_LEN ) { + while( ( isAlpha( look ) || isDigit( look ) || look == '_' ) && n < MAX_IDENT_LEN ) { ident[n] = look; n++; look = getChar( ); @@ -440,6 +478,10 @@ static S_Symbol getSym( void ) character( ); s = S_char; break; + case '\"': + string( ); + s = S_string; + break; case ':': look = getChar( ); if( look == '=' ) { @@ -571,7 +613,8 @@ static void Emit_DD( int d ) typedef enum { SYMBOL_CLASS_CONSTANT, SYMBOL_CLASS_VARIABLE, - SYMBOL_CLASS_TYPE + SYMBOL_CLASS_SIMPLE_TYPE, + SYMBOL_CLASS_ARRAY_TYPE } SymbolClass; typedef struct Symbol { @@ -583,9 +626,12 @@ typedef struct Symbol { int integer_value; char boolean_value; int character_value; + char *string_value; /* variable */ int initialized; int size; + /* array type */ + int dim; } Symbol; static Symbol *integer_type; @@ -604,9 +650,9 @@ static Scope *current_scope; static Scope *create_scope( Scope *parent, char *name ) { - Scope *scope = Allocate( sizeof( Scope ) ); + Scope *scope = (Scope *)Allocate( sizeof( Scope ) ); - scope->name = Allocate( strlen( name ) + 1 ); + scope->name = (char *)Allocate( strlen( name ) + 1 ); strlcpy( scope->name, name, strlen( name ) + 1 ); scope->symbol = NULL; scope->parent = parent; @@ -617,6 +663,9 @@ static Scope *create_scope( Scope *parent, char *name ) static void free_symbol( Symbol *sym ) { + if( sym->string_value != NULL ) { + free( sym->string_value ); + } free( sym->name ); free( sym ); } @@ -688,6 +737,7 @@ static Symbol *insert_symbol( Scope *scope, char *name, SymbolClass class ) sym->initialized = 0; sym->size = 0; sym->next = scope->symbol; + sym->string_value = NULL; scope->symbol = sym; return sym; @@ -717,6 +767,7 @@ typedef struct ExpressionNode { int integer_value; int boolean_value; int character_value; + char *string_value; Symbol *symbol; Symbol *actual_type; } ExpressionNode; @@ -726,6 +777,7 @@ static ExpressionNode *create_expression_node( void ) ExpressionNode *node = Allocate( sizeof( ExpressionNode ) ); node->left = NULL; node->right = NULL; + node->string_value = NULL; return node; } @@ -738,6 +790,9 @@ static void free_expression_node( ExpressionNode *node ) if( node->right != NULL ) { free_expression_node( node->right ); } + if( node->string_value != NULL ) { + free( node->string_value ); + } free( node ); } @@ -823,10 +878,12 @@ static int get_size( Symbol *symbol ) case SYMBOL_CLASS_CONSTANT: case SYMBOL_CLASS_VARIABLE: return get_size( symbol->type ); - case SYMBOL_CLASS_TYPE: + case SYMBOL_CLASS_SIMPLE_TYPE: return symbol->size; + case SYMBOL_CLASS_ARRAY_TYPE: + default: + Abort( "No size for class '%d", symbol->class ); } - Abort( "No size for class '%d", symbol->class ); return 0; } @@ -996,9 +1053,6 @@ static ExpressionNode *parseFactor( void ) if( symbol == NULL ) { Abort( "Unknown identifier '%s'", ident ); } - if( symbol->class == SYMBOL_CLASS_TYPE ) { - Abort( "'%s' is the name for a type and not a constant or variable as expected", ident ); - } if( symbol->class == SYMBOL_CLASS_CONSTANT ) { node = create_expression_node( ); node->type = EXPRESSION_NODE_TYPE_CONST; @@ -1012,8 +1066,7 @@ static ExpressionNode *parseFactor( void ) } else { Abort( "Unhandled expression assignment from identifier with type '%s'", node->actual_type->name ); } - } - if( symbol->class == SYMBOL_CLASS_VARIABLE ) { + } else if( symbol->class == SYMBOL_CLASS_VARIABLE ) { if( !symbol->initialized ) { Abort( "Variable '%s' has not been initialized yet", ident ); } @@ -1021,6 +1074,8 @@ static ExpressionNode *parseFactor( void ) node->type = EXPRESSION_NODE_TYPE_VAR; node->symbol = symbol; node->actual_type = symbol->type; + } else { + Abort( "'%s' is the name for a type and not a constant or variable as expected", ident ); } sym = getSym( ); } else if( sym == S_lparen ) { @@ -1139,8 +1194,9 @@ static void parseAssignment( Scope *scope ) if( symbol == NULL ) { Abort( "Unknown variable '%s'", ident ); } - if( symbol->class == SYMBOL_CLASS_TYPE ) { - Abort( "'%s' is not a variable as expected", ident ); + if( symbol->class == SYMBOL_CLASS_SIMPLE_TYPE || + symbol->class == SYMBOL_CLASS_ARRAY_TYPE ) { + Abort( "'%s' is a type and not a variable as expected", ident ); } if( symbol->class == SYMBOL_CLASS_CONSTANT ) { Abort( "'%s' is a constant and can not be changed", ident ); @@ -1278,6 +1334,8 @@ static void parseStatementBlock( Scope *scope ) static ExpressionNode *parseConstExpression( void ) { ExpressionNode *node; + char typeName[MAX_IDENT_LEN+1]; + Symbol *type; node = create_expression_node( ); @@ -1291,6 +1349,18 @@ static ExpressionNode *parseConstExpression( void ) node->character_value = ch; node->actual_type = character_type; sym = getSym( ); + } else if( sym == S_string ) { + node->type = EXPRESSION_NODE_TYPE_CONST; + node->string_value = AllocateAndCopyStr( str ); + snprintf( typeName, MAX_IDENT_LEN, "array %d of %s", strlen( str ), character_type->name ); + type = get_symbol( current_scope, typeName ); + if( type == NULL ) { + type = insert_symbol( current_scope, typeName, SYMBOL_CLASS_ARRAY_TYPE ); + type->dim = strlen( str ); + type->type = character_type; + } + node->actual_type = type; + sym = getSym( ); } else if( sym == S_ident ) { Symbol *symbol = get_symbol( current_scope, ident ); if( symbol == NULL ) { @@ -1325,6 +1395,9 @@ static void generate_symbol_comment( char *mode, Symbol *constant ) } } else if( constant->type == character_type ) { Emit( "'%c'", constant->character_value ); + } else if( constant->type->class == SYMBOL_CLASS_ARRAY_TYPE ) { + /* TODO: iterate all elements of basic type and issue value */ + Emit( "array %d of %s = { ... }", constant->type->dim, constant->type->type->name ); } else { Abort( "Unhandled symbol (%s) comment case for type '%s'", mode, constant->type->name ); } @@ -1355,6 +1428,12 @@ static void symbol_copy_value( Symbol *from, Symbol *to ) to->boolean_value = from->boolean_value; } else if( from->type == character_type ) { to->character_value = from->character_value; + } else if( from->type->class == SYMBOL_CLASS_ARRAY_TYPE ) { + if( from->type->type == character_type ) { + to->string_value = AllocateAndCopyStr( from->string_value ); + } else { + Abort( "Unhandled case for array type '%s' when copying value of symbol", from->type->name ); + } } else { Abort( "Unhandled case for type '%s' when copying value of symbol", from->type->name ); } @@ -1373,19 +1452,26 @@ static void symbol_copy_node_value( ExpressionNode *from, Symbol *to ) to->boolean_value = from->boolean_value; } else if( from->actual_type == character_type ) { to->character_value = from->character_value; + } else if( from->actual_type->class == SYMBOL_CLASS_ARRAY_TYPE ) { + if( from->actual_type->type == character_type ) { + to->string_value = AllocateAndCopyStr( from->string_value ); + } else { + Abort( "Unhandled case for array type '%s' when copying value of symbol from expression node", from->actual_type->name ); + } } else { Abort( "Unhandled case for type '%s' when copying value of symbol from expression node", from->actual_type->name ); } } -static Symbol *parseSimpleType( void ) +static Symbol *parseSimpleType( Scope *current_scope ) { Symbol *type = get_symbol( current_scope, ident ); if( type == NULL ) { Abort( "Unknown type '%s'", ident ); } - if( type->class != SYMBOL_CLASS_TYPE ) { + if( type->class != SYMBOL_CLASS_SIMPLE_TYPE && + type->class != SYMBOL_CLASS_ARRAY_TYPE ) { Abort( "'%s' is defined, but is not a type as expected", ident ); } @@ -1394,11 +1480,12 @@ static Symbol *parseSimpleType( void ) return type; } -static Symbol *parseType( void ); +static Symbol *parseType( Scope *current_scope ); -static Symbol *parseArrayType( void ) +static Symbol *parseArrayType( Scope *current_scope ) { - Symbol *type = NULL; + char typeName[MAX_IDENT_LEN+1]; + Symbol *type, *simple = NULL; Expect( S_array ); @@ -1407,26 +1494,35 @@ static Symbol *parseArrayType( void ) } Expect( S_of ); + + simple = parseSimpleType( current_scope ); + + snprintf( typeName, MAX_IDENT_LEN, "array %d of %s", num, simple->name ); - parseType( ); + type = get_symbol( current_scope, typeName ); + if( type == NULL ) { + type = insert_symbol( current_scope, typeName, SYMBOL_CLASS_ARRAY_TYPE ); + type->dim = num; + type->type = simple; + } return type; } -static Symbol *parseType( void ) +static Symbol *parseType( Scope *current_scope ) { Symbol *type; if( sym == S_array ) { - type = parseArrayType( ); + type = parseArrayType( current_scope ); } else { - type = parseSimpleType( ); + type = parseSimpleType( current_scope ); } return type; } -static void parseConstDeclaration( void ) +static void parseConstDeclaration( Scope *current_scope ) { int nof_constants = 0; Symbol *constant[MAX_NUMBER_OF_ENUMERATIONS], *type; @@ -1449,7 +1545,7 @@ static void parseConstDeclaration( void ) Expect( S_colon ); - type = parseType( ); + type = parseType( current_scope ); Expect( S_equals ); @@ -1478,14 +1574,14 @@ static void parseConstDeclaration( void ) free_expression_node( node ); } -static void parseConstBlock( void ) +static void parseConstBlock( Scope *current_scope ) { Expect( S_const ); - parseConstDeclaration( ); + parseConstDeclaration( current_scope ); while( sym == S_semicolon ) { sym = getSym( ); if( sym == S_ident ) { - parseConstDeclaration( ); + parseConstDeclaration( current_scope ); } else if( sym == S_begin || sym == S_var ) { return; } else { @@ -1494,7 +1590,7 @@ static void parseConstBlock( void ) } } -static void parseVariableDeclaration( void ) +static void parseVariableDeclaration( Scope *current_scope ) { int nof_variables = 0; Symbol *variable[MAX_NUMBER_OF_ENUMERATIONS], *type; @@ -1517,7 +1613,7 @@ static void parseVariableDeclaration( void ) Expect( S_colon ); - type = parseType( ); + type = parseType( current_scope ); if( sym == S_assign ) { sym = getSym( ); @@ -1553,14 +1649,14 @@ static void parseVariableDeclaration( void ) } } -static void parseVariableBlock( void ) +static void parseVariableBlock( Scope *current_scope ) { Expect( S_var ); - parseVariableDeclaration( ); + parseVariableDeclaration( current_scope ); while( sym == S_semicolon ) { sym = getSym( ); if( sym == S_ident ) { - parseVariableDeclaration( ); + parseVariableDeclaration( current_scope ); } else if( sym == S_begin ) { return; } else { @@ -1569,17 +1665,17 @@ static void parseVariableBlock( void ) } } -static void parseDeclarationBlock( void ) +static void parseDeclarationBlock( Scope *current_scope ) { if( sym == S_const ) { - parseConstBlock( ); + parseConstBlock( current_scope ); } if( sym == S_var ) { - parseVariableBlock( ); + parseVariableBlock( current_scope ); } } -static void parseModule( void ) +static void parseModule( Scope *current_scope ) { Expect( S_module ); Expect( S_ident ); @@ -1587,18 +1683,18 @@ static void parseModule( void ) strlcpy( moduleName, ident, MAX_IDENT_LEN ); } Expect( S_semicolon ); - parseDeclarationBlock( ); - parseStatementBlock( global_scope ); + parseDeclarationBlock( current_scope ); + parseStatementBlock( current_scope ); } static void register_internal_types( Scope *scope ) { Symbol *const_symbol; - integer_type = insert_symbol( current_scope, "integer", SYMBOL_CLASS_TYPE ); + integer_type = insert_symbol( current_scope, "integer", SYMBOL_CLASS_SIMPLE_TYPE ); integer_type->size = 4; - boolean_type = insert_symbol( current_scope, "boolean", SYMBOL_CLASS_TYPE ); + boolean_type = insert_symbol( current_scope, "boolean", SYMBOL_CLASS_SIMPLE_TYPE ); boolean_type->size = 1; const_symbol = insert_symbol( current_scope, "false", SYMBOL_CLASS_CONSTANT ); @@ -1609,7 +1705,7 @@ static void register_internal_types( Scope *scope ) const_symbol->type = boolean_type; const_symbol->boolean_value = 1; - character_type = insert_symbol( current_scope, "character", SYMBOL_CLASS_TYPE ); + character_type = insert_symbol( current_scope, "character", SYMBOL_CLASS_SIMPLE_TYPE ); character_type->size = 1; } @@ -1635,30 +1731,65 @@ static void prologue( void ) Emit( "org $1000000\n" ); } -static void epilogue( void ) +static void reserve_initialize( Symbol *symbol ) { - Symbol *symbol; - - Emit( "hlt\n" ); - symbol = current_scope->symbol; - while( symbol != NULL ) { - if( symbol->class == SYMBOL_CLASS_VARIABLE ) { + switch( symbol->type->class ) { + case SYMBOL_CLASS_SIMPLE_TYPE: if( symbol->type == integer_type ) { - Emit( "%s: dd $", symbol->name ); + Emit( "dd $" ); Emit_DD( symbol->integer_value ); - Emit( "\n" ); } else if( symbol->type == boolean_type ) { - Emit( "%s: db $", symbol->name ); + Emit( "db $" ); Emit_Hexbyte( symbol->boolean_value ); - Emit( "\n" ); } else if( symbol->type == character_type ) { - Emit( "%s: db $", symbol->name ); + Emit( "db $" ); Emit_Hexbyte( symbol->character_value ); - Emit( "\n" ); } else { - Abort( "Unhandled variable space reservation and initializiation for type '%s' in variable '%s'", + Abort( "Unhandled variable space reservation and initializiation for simple type '%s' in variable '%s'", symbol->type->name, symbol->name ); } + Emit( "\n" ); + break; + + case SYMBOL_CLASS_ARRAY_TYPE: { + int i = 0; + + if( symbol->type->type == character_type ) { + int len = strlen( symbol->string_value ); + Emit( "db \"" ); + while( i < len ) { + Emit( "%c", symbol->string_value[i] ); + i++; + } + Emit( "\"" ); + while( i < symbol->type->dim ) { + Emit( "$00" ); + } + Emit( "\n" ); + } else { + for( i = 0; i < symbol->type->dim; i++ ) { + reserve_initialize( symbol->type ); + } + } + + } break; + + default: + Abort( "Unhandled variable space reservation and initializiation for complex type '%s' in variable '%s'", + symbol->type->name, symbol->name ); + } +} + +static void epilogue( void ) +{ + Symbol *symbol; + + Emit( "hlt\n" ); + symbol = current_scope->symbol; + while( symbol != NULL ) { + if( symbol->class == SYMBOL_CLASS_VARIABLE ) { + Emit( "%s: ", symbol->name ); + reserve_initialize( symbol ); } symbol = symbol->next; } @@ -1673,7 +1804,7 @@ int main( void ) { init( ); prologue( ); - parseModule( ); + parseModule( global_scope ); if( sym != S_eof ) { Abort( "Unexpected EOF" ); } diff --git a/ecomp-c/minie.ebnf b/ecomp-c/minie.ebnf index 1392c89..6404899 100644 --- a/ecomp-c/minie.ebnf +++ b/ecomp-c/minie.ebnf @@ -2,11 +2,12 @@ Digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" . Letter = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" | "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" | "U" | "V" | "W" | "X" | "Y" | "Z" | "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" | "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" | "u" | "v" | "w" | "x" | "y" | "z" . Special = "_" . -Identifier = Letter { Letter | Digit | Special } . +Identifier = Letter { Letter | Digit | "_" } . Number = Digit { Digit } . -Character = "'" Digit | Letter "'" . +Character = "'" Digit | Letter | Special "'" . +String = """" { Character } """" . -Factor = Number | Identifier | "(" Expression ")" | "not" Factor . +Factor = Number | Character | String | Identifier | "(" Expression ")" | "not" Factor . Term = Factor { ( "*" | "/" | "mod" | "and" ) Factor } . SimpleExpression = Term { ( "+" | "-" | "or" ) Term } . RelationalOperator = "=" | "<>" | "<" | ">" | "<=" | ">=" . diff --git a/ecomp-c/test1.e b/ecomp-c/test1.e index 3ad3bac..f18f847 100644 --- a/ecomp-c/test1.e +++ b/ecomp-c/test1.e @@ -8,10 +8,13 @@ const // integer constant N, M : integer = 20; O : integer = N; - C : character = 'a'; + C : character = 'A'; // boolean constant INIT_STATE : boolean = true; + + // string constant + S : array 5 of character = "hello"; var // this is an integer @@ -24,7 +27,7 @@ var flag : boolean; i : integer; j : character := C; - s : array 100 of character; + s : array 5 of character := S; begin a := 1; @@ -54,5 +57,5 @@ begin i := i + 1; end; - j := 'b'; + j := 'B'; end |