summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndreas Baumann <mail@andreasbaumann.cc>2020-05-29 19:37:08 +0200
committerAndreas Baumann <mail@andreasbaumann.cc>2020-05-29 19:37:08 +0200
commitde5d9c5d1b735e95a20a8671d38697f1c7e638ea (patch)
treec36e67bb004671324b1f847c3a57a6da32530bf7
parent8c3ad4c4acc3212ba5d1ff0a3aada3e714dad071 (diff)
downloadcompilertests-de5d9c5d1b735e95a20a8671d38697f1c7e638ea.tar.gz
compilertests-de5d9c5d1b735e95a20a8671d38697f1c7e638ea.tar.bz2
added support for strings in ec
-rw-r--r--ecomp-c/ec.c247
-rw-r--r--ecomp-c/minie.ebnf7
-rw-r--r--ecomp-c/test1.e9
3 files changed, 199 insertions, 64 deletions
diff --git a/ecomp-c/ec.c b/ecomp-c/ec.c
index 784671f..80bc815 100644
--- a/ecomp-c/ec.c
+++ b/ecomp-c/ec.c
@@ -12,7 +12,8 @@ enum {
MAX_IDENT_LEN = 64,
MAX_NUMBER_LEN = 10,
MAX_NUMBER_OF_ENUMERATIONS = 6,
- MAX_LABEL_LEN = 64
+ MAX_LABEL_LEN = 64,
+ MAX_STRING_LEN = 64
};
static int DEBUG_GETCHAR = 0;
@@ -24,6 +25,7 @@ typedef enum {
S_ident = 0,
S_number,
S_char,
+ S_string,
S_module,
S_begin,
S_end,
@@ -62,6 +64,7 @@ static char *symname[S_eof+1] = {
"ident",
"number",
"char",
+ "string",
"module",
"begin",
"end",
@@ -105,6 +108,7 @@ static S_Symbol sym;
static char ident[MAX_IDENT_LEN+1];
static int num;
static int ch;
+static char str[MAX_STRING_LEN+1];
static void Err( char *s, va_list args )
{
@@ -140,6 +144,18 @@ static void *Allocate( unsigned int size )
return p;
}
+static char *AllocateAndCopyStr( char *s )
+{
+ char *d;
+ int len = strlen( s );
+ if( len > MAX_STRING_LEN ) {
+ Abort( "Too long string literal, should not happen" );
+ }
+ d = (char *)Allocate( len + 1 );
+ strlcpy( d, s, MAX_STRING_LEN );
+ return d;
+}
+
static int getChar( void )
{
int c;
@@ -220,7 +236,7 @@ static void character( void )
{
if( look == '\'' ) {
look = getChar( );
- if( isDigit( look ) || isAlpha( look ) ) {
+ if( isDigit( look ) || isAlpha( look ) || isSpecial( look ) ) {
ch = look;
} else {
Abort( "Expecting a character as character literal" );
@@ -233,6 +249,28 @@ static void character( void )
}
}
+static void string( void )
+{
+ int n = 0;
+
+ if( look == '"' ) {
+ look = getChar( );
+ while( ( isDigit( look ) || isAlpha( look ) || isSpecial( look ) ) && n < MAX_STRING_LEN ) {
+ str[n] = look;
+ n++;
+ look = getChar( );
+ }
+ if( n == MAX_STRING_LEN ) {
+ Abort( "String exceeds maximal length" );
+ }
+ str[n] = '\0';
+ if( look != '\"' ) {
+ Abort( "Unterminated string literal" );
+ }
+ look = getChar( );
+ }
+}
+
static void identifier( void )
{
int n = 0;
@@ -241,7 +279,7 @@ static void identifier( void )
ident[n] = look;
n++;
look = getChar( );
- while( ( isAlpha( look ) || isDigit( look ) || isSpecial( look ) ) && n < MAX_IDENT_LEN ) {
+ while( ( isAlpha( look ) || isDigit( look ) || look == '_' ) && n < MAX_IDENT_LEN ) {
ident[n] = look;
n++;
look = getChar( );
@@ -440,6 +478,10 @@ static S_Symbol getSym( void )
character( );
s = S_char;
break;
+ case '\"':
+ string( );
+ s = S_string;
+ break;
case ':':
look = getChar( );
if( look == '=' ) {
@@ -571,7 +613,8 @@ static void Emit_DD( int d )
typedef enum {
SYMBOL_CLASS_CONSTANT,
SYMBOL_CLASS_VARIABLE,
- SYMBOL_CLASS_TYPE
+ SYMBOL_CLASS_SIMPLE_TYPE,
+ SYMBOL_CLASS_ARRAY_TYPE
} SymbolClass;
typedef struct Symbol {
@@ -583,9 +626,12 @@ typedef struct Symbol {
int integer_value;
char boolean_value;
int character_value;
+ char *string_value;
/* variable */
int initialized;
int size;
+ /* array type */
+ int dim;
} Symbol;
static Symbol *integer_type;
@@ -604,9 +650,9 @@ static Scope *current_scope;
static Scope *create_scope( Scope *parent, char *name )
{
- Scope *scope = Allocate( sizeof( Scope ) );
+ Scope *scope = (Scope *)Allocate( sizeof( Scope ) );
- scope->name = Allocate( strlen( name ) + 1 );
+ scope->name = (char *)Allocate( strlen( name ) + 1 );
strlcpy( scope->name, name, strlen( name ) + 1 );
scope->symbol = NULL;
scope->parent = parent;
@@ -617,6 +663,9 @@ static Scope *create_scope( Scope *parent, char *name )
static void free_symbol( Symbol *sym )
{
+ if( sym->string_value != NULL ) {
+ free( sym->string_value );
+ }
free( sym->name );
free( sym );
}
@@ -688,6 +737,7 @@ static Symbol *insert_symbol( Scope *scope, char *name, SymbolClass class )
sym->initialized = 0;
sym->size = 0;
sym->next = scope->symbol;
+ sym->string_value = NULL;
scope->symbol = sym;
return sym;
@@ -717,6 +767,7 @@ typedef struct ExpressionNode {
int integer_value;
int boolean_value;
int character_value;
+ char *string_value;
Symbol *symbol;
Symbol *actual_type;
} ExpressionNode;
@@ -726,6 +777,7 @@ static ExpressionNode *create_expression_node( void )
ExpressionNode *node = Allocate( sizeof( ExpressionNode ) );
node->left = NULL;
node->right = NULL;
+ node->string_value = NULL;
return node;
}
@@ -738,6 +790,9 @@ static void free_expression_node( ExpressionNode *node )
if( node->right != NULL ) {
free_expression_node( node->right );
}
+ if( node->string_value != NULL ) {
+ free( node->string_value );
+ }
free( node );
}
@@ -823,10 +878,12 @@ static int get_size( Symbol *symbol )
case SYMBOL_CLASS_CONSTANT:
case SYMBOL_CLASS_VARIABLE:
return get_size( symbol->type );
- case SYMBOL_CLASS_TYPE:
+ case SYMBOL_CLASS_SIMPLE_TYPE:
return symbol->size;
+ case SYMBOL_CLASS_ARRAY_TYPE:
+ default:
+ Abort( "No size for class '%d", symbol->class );
}
- Abort( "No size for class '%d", symbol->class );
return 0;
}
@@ -996,9 +1053,6 @@ static ExpressionNode *parseFactor( void )
if( symbol == NULL ) {
Abort( "Unknown identifier '%s'", ident );
}
- if( symbol->class == SYMBOL_CLASS_TYPE ) {
- Abort( "'%s' is the name for a type and not a constant or variable as expected", ident );
- }
if( symbol->class == SYMBOL_CLASS_CONSTANT ) {
node = create_expression_node( );
node->type = EXPRESSION_NODE_TYPE_CONST;
@@ -1012,8 +1066,7 @@ static ExpressionNode *parseFactor( void )
} else {
Abort( "Unhandled expression assignment from identifier with type '%s'", node->actual_type->name );
}
- }
- if( symbol->class == SYMBOL_CLASS_VARIABLE ) {
+ } else if( symbol->class == SYMBOL_CLASS_VARIABLE ) {
if( !symbol->initialized ) {
Abort( "Variable '%s' has not been initialized yet", ident );
}
@@ -1021,6 +1074,8 @@ static ExpressionNode *parseFactor( void )
node->type = EXPRESSION_NODE_TYPE_VAR;
node->symbol = symbol;
node->actual_type = symbol->type;
+ } else {
+ Abort( "'%s' is the name for a type and not a constant or variable as expected", ident );
}
sym = getSym( );
} else if( sym == S_lparen ) {
@@ -1139,8 +1194,9 @@ static void parseAssignment( Scope *scope )
if( symbol == NULL ) {
Abort( "Unknown variable '%s'", ident );
}
- if( symbol->class == SYMBOL_CLASS_TYPE ) {
- Abort( "'%s' is not a variable as expected", ident );
+ if( symbol->class == SYMBOL_CLASS_SIMPLE_TYPE ||
+ symbol->class == SYMBOL_CLASS_ARRAY_TYPE ) {
+ Abort( "'%s' is a type and not a variable as expected", ident );
}
if( symbol->class == SYMBOL_CLASS_CONSTANT ) {
Abort( "'%s' is a constant and can not be changed", ident );
@@ -1278,6 +1334,8 @@ static void parseStatementBlock( Scope *scope )
static ExpressionNode *parseConstExpression( void )
{
ExpressionNode *node;
+ char typeName[MAX_IDENT_LEN+1];
+ Symbol *type;
node = create_expression_node( );
@@ -1291,6 +1349,18 @@ static ExpressionNode *parseConstExpression( void )
node->character_value = ch;
node->actual_type = character_type;
sym = getSym( );
+ } else if( sym == S_string ) {
+ node->type = EXPRESSION_NODE_TYPE_CONST;
+ node->string_value = AllocateAndCopyStr( str );
+ snprintf( typeName, MAX_IDENT_LEN, "array %d of %s", strlen( str ), character_type->name );
+ type = get_symbol( current_scope, typeName );
+ if( type == NULL ) {
+ type = insert_symbol( current_scope, typeName, SYMBOL_CLASS_ARRAY_TYPE );
+ type->dim = strlen( str );
+ type->type = character_type;
+ }
+ node->actual_type = type;
+ sym = getSym( );
} else if( sym == S_ident ) {
Symbol *symbol = get_symbol( current_scope, ident );
if( symbol == NULL ) {
@@ -1325,6 +1395,9 @@ static void generate_symbol_comment( char *mode, Symbol *constant )
}
} else if( constant->type == character_type ) {
Emit( "'%c'", constant->character_value );
+ } else if( constant->type->class == SYMBOL_CLASS_ARRAY_TYPE ) {
+ /* TODO: iterate all elements of basic type and issue value */
+ Emit( "array %d of %s = { ... }", constant->type->dim, constant->type->type->name );
} else {
Abort( "Unhandled symbol (%s) comment case for type '%s'", mode, constant->type->name );
}
@@ -1355,6 +1428,12 @@ static void symbol_copy_value( Symbol *from, Symbol *to )
to->boolean_value = from->boolean_value;
} else if( from->type == character_type ) {
to->character_value = from->character_value;
+ } else if( from->type->class == SYMBOL_CLASS_ARRAY_TYPE ) {
+ if( from->type->type == character_type ) {
+ to->string_value = AllocateAndCopyStr( from->string_value );
+ } else {
+ Abort( "Unhandled case for array type '%s' when copying value of symbol", from->type->name );
+ }
} else {
Abort( "Unhandled case for type '%s' when copying value of symbol", from->type->name );
}
@@ -1373,19 +1452,26 @@ static void symbol_copy_node_value( ExpressionNode *from, Symbol *to )
to->boolean_value = from->boolean_value;
} else if( from->actual_type == character_type ) {
to->character_value = from->character_value;
+ } else if( from->actual_type->class == SYMBOL_CLASS_ARRAY_TYPE ) {
+ if( from->actual_type->type == character_type ) {
+ to->string_value = AllocateAndCopyStr( from->string_value );
+ } else {
+ Abort( "Unhandled case for array type '%s' when copying value of symbol from expression node", from->actual_type->name );
+ }
} else {
Abort( "Unhandled case for type '%s' when copying value of symbol from expression node", from->actual_type->name );
}
}
-static Symbol *parseSimpleType( void )
+static Symbol *parseSimpleType( Scope *current_scope )
{
Symbol *type = get_symbol( current_scope, ident );
if( type == NULL ) {
Abort( "Unknown type '%s'", ident );
}
- if( type->class != SYMBOL_CLASS_TYPE ) {
+ if( type->class != SYMBOL_CLASS_SIMPLE_TYPE &&
+ type->class != SYMBOL_CLASS_ARRAY_TYPE ) {
Abort( "'%s' is defined, but is not a type as expected", ident );
}
@@ -1394,11 +1480,12 @@ static Symbol *parseSimpleType( void )
return type;
}
-static Symbol *parseType( void );
+static Symbol *parseType( Scope *current_scope );
-static Symbol *parseArrayType( void )
+static Symbol *parseArrayType( Scope *current_scope )
{
- Symbol *type = NULL;
+ char typeName[MAX_IDENT_LEN+1];
+ Symbol *type, *simple = NULL;
Expect( S_array );
@@ -1407,26 +1494,35 @@ static Symbol *parseArrayType( void )
}
Expect( S_of );
+
+ simple = parseSimpleType( current_scope );
+
+ snprintf( typeName, MAX_IDENT_LEN, "array %d of %s", num, simple->name );
- parseType( );
+ type = get_symbol( current_scope, typeName );
+ if( type == NULL ) {
+ type = insert_symbol( current_scope, typeName, SYMBOL_CLASS_ARRAY_TYPE );
+ type->dim = num;
+ type->type = simple;
+ }
return type;
}
-static Symbol *parseType( void )
+static Symbol *parseType( Scope *current_scope )
{
Symbol *type;
if( sym == S_array ) {
- type = parseArrayType( );
+ type = parseArrayType( current_scope );
} else {
- type = parseSimpleType( );
+ type = parseSimpleType( current_scope );
}
return type;
}
-static void parseConstDeclaration( void )
+static void parseConstDeclaration( Scope *current_scope )
{
int nof_constants = 0;
Symbol *constant[MAX_NUMBER_OF_ENUMERATIONS], *type;
@@ -1449,7 +1545,7 @@ static void parseConstDeclaration( void )
Expect( S_colon );
- type = parseType( );
+ type = parseType( current_scope );
Expect( S_equals );
@@ -1478,14 +1574,14 @@ static void parseConstDeclaration( void )
free_expression_node( node );
}
-static void parseConstBlock( void )
+static void parseConstBlock( Scope *current_scope )
{
Expect( S_const );
- parseConstDeclaration( );
+ parseConstDeclaration( current_scope );
while( sym == S_semicolon ) {
sym = getSym( );
if( sym == S_ident ) {
- parseConstDeclaration( );
+ parseConstDeclaration( current_scope );
} else if( sym == S_begin || sym == S_var ) {
return;
} else {
@@ -1494,7 +1590,7 @@ static void parseConstBlock( void )
}
}
-static void parseVariableDeclaration( void )
+static void parseVariableDeclaration( Scope *current_scope )
{
int nof_variables = 0;
Symbol *variable[MAX_NUMBER_OF_ENUMERATIONS], *type;
@@ -1517,7 +1613,7 @@ static void parseVariableDeclaration( void )
Expect( S_colon );
- type = parseType( );
+ type = parseType( current_scope );
if( sym == S_assign ) {
sym = getSym( );
@@ -1553,14 +1649,14 @@ static void parseVariableDeclaration( void )
}
}
-static void parseVariableBlock( void )
+static void parseVariableBlock( Scope *current_scope )
{
Expect( S_var );
- parseVariableDeclaration( );
+ parseVariableDeclaration( current_scope );
while( sym == S_semicolon ) {
sym = getSym( );
if( sym == S_ident ) {
- parseVariableDeclaration( );
+ parseVariableDeclaration( current_scope );
} else if( sym == S_begin ) {
return;
} else {
@@ -1569,17 +1665,17 @@ static void parseVariableBlock( void )
}
}
-static void parseDeclarationBlock( void )
+static void parseDeclarationBlock( Scope *current_scope )
{
if( sym == S_const ) {
- parseConstBlock( );
+ parseConstBlock( current_scope );
}
if( sym == S_var ) {
- parseVariableBlock( );
+ parseVariableBlock( current_scope );
}
}
-static void parseModule( void )
+static void parseModule( Scope *current_scope )
{
Expect( S_module );
Expect( S_ident );
@@ -1587,18 +1683,18 @@ static void parseModule( void )
strlcpy( moduleName, ident, MAX_IDENT_LEN );
}
Expect( S_semicolon );
- parseDeclarationBlock( );
- parseStatementBlock( global_scope );
+ parseDeclarationBlock( current_scope );
+ parseStatementBlock( current_scope );
}
static void register_internal_types( Scope *scope )
{
Symbol *const_symbol;
- integer_type = insert_symbol( current_scope, "integer", SYMBOL_CLASS_TYPE );
+ integer_type = insert_symbol( current_scope, "integer", SYMBOL_CLASS_SIMPLE_TYPE );
integer_type->size = 4;
- boolean_type = insert_symbol( current_scope, "boolean", SYMBOL_CLASS_TYPE );
+ boolean_type = insert_symbol( current_scope, "boolean", SYMBOL_CLASS_SIMPLE_TYPE );
boolean_type->size = 1;
const_symbol = insert_symbol( current_scope, "false", SYMBOL_CLASS_CONSTANT );
@@ -1609,7 +1705,7 @@ static void register_internal_types( Scope *scope )
const_symbol->type = boolean_type;
const_symbol->boolean_value = 1;
- character_type = insert_symbol( current_scope, "character", SYMBOL_CLASS_TYPE );
+ character_type = insert_symbol( current_scope, "character", SYMBOL_CLASS_SIMPLE_TYPE );
character_type->size = 1;
}
@@ -1635,30 +1731,65 @@ static void prologue( void )
Emit( "org $1000000\n" );
}
-static void epilogue( void )
+static void reserve_initialize( Symbol *symbol )
{
- Symbol *symbol;
-
- Emit( "hlt\n" );
- symbol = current_scope->symbol;
- while( symbol != NULL ) {
- if( symbol->class == SYMBOL_CLASS_VARIABLE ) {
+ switch( symbol->type->class ) {
+ case SYMBOL_CLASS_SIMPLE_TYPE:
if( symbol->type == integer_type ) {
- Emit( "%s: dd $", symbol->name );
+ Emit( "dd $" );
Emit_DD( symbol->integer_value );
- Emit( "\n" );
} else if( symbol->type == boolean_type ) {
- Emit( "%s: db $", symbol->name );
+ Emit( "db $" );
Emit_Hexbyte( symbol->boolean_value );
- Emit( "\n" );
} else if( symbol->type == character_type ) {
- Emit( "%s: db $", symbol->name );
+ Emit( "db $" );
Emit_Hexbyte( symbol->character_value );
- Emit( "\n" );
} else {
- Abort( "Unhandled variable space reservation and initializiation for type '%s' in variable '%s'",
+ Abort( "Unhandled variable space reservation and initializiation for simple type '%s' in variable '%s'",
symbol->type->name, symbol->name );
}
+ Emit( "\n" );
+ break;
+
+ case SYMBOL_CLASS_ARRAY_TYPE: {
+ int i = 0;
+
+ if( symbol->type->type == character_type ) {
+ int len = strlen( symbol->string_value );
+ Emit( "db \"" );
+ while( i < len ) {
+ Emit( "%c", symbol->string_value[i] );
+ i++;
+ }
+ Emit( "\"" );
+ while( i < symbol->type->dim ) {
+ Emit( "$00" );
+ }
+ Emit( "\n" );
+ } else {
+ for( i = 0; i < symbol->type->dim; i++ ) {
+ reserve_initialize( symbol->type );
+ }
+ }
+
+ } break;
+
+ default:
+ Abort( "Unhandled variable space reservation and initializiation for complex type '%s' in variable '%s'",
+ symbol->type->name, symbol->name );
+ }
+}
+
+static void epilogue( void )
+{
+ Symbol *symbol;
+
+ Emit( "hlt\n" );
+ symbol = current_scope->symbol;
+ while( symbol != NULL ) {
+ if( symbol->class == SYMBOL_CLASS_VARIABLE ) {
+ Emit( "%s: ", symbol->name );
+ reserve_initialize( symbol );
}
symbol = symbol->next;
}
@@ -1673,7 +1804,7 @@ int main( void )
{
init( );
prologue( );
- parseModule( );
+ parseModule( global_scope );
if( sym != S_eof ) {
Abort( "Unexpected EOF" );
}
diff --git a/ecomp-c/minie.ebnf b/ecomp-c/minie.ebnf
index 1392c89..6404899 100644
--- a/ecomp-c/minie.ebnf
+++ b/ecomp-c/minie.ebnf
@@ -2,11 +2,12 @@ Digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" .
Letter = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" | "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" | "U" | "V" | "W" | "X" | "Y" | "Z" |
"a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" | "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" | "u" | "v" | "w" | "x" | "y" | "z" .
Special = "_" .
-Identifier = Letter { Letter | Digit | Special } .
+Identifier = Letter { Letter | Digit | "_" } .
Number = Digit { Digit } .
-Character = "'" Digit | Letter "'" .
+Character = "'" Digit | Letter | Special "'" .
+String = """" { Character } """" .
-Factor = Number | Identifier | "(" Expression ")" | "not" Factor .
+Factor = Number | Character | String | Identifier | "(" Expression ")" | "not" Factor .
Term = Factor { ( "*" | "/" | "mod" | "and" ) Factor } .
SimpleExpression = Term { ( "+" | "-" | "or" ) Term } .
RelationalOperator = "=" | "<>" | "<" | ">" | "<=" | ">=" .
diff --git a/ecomp-c/test1.e b/ecomp-c/test1.e
index 3ad3bac..f18f847 100644
--- a/ecomp-c/test1.e
+++ b/ecomp-c/test1.e
@@ -8,10 +8,13 @@ const
// integer constant
N, M : integer = 20;
O : integer = N;
- C : character = 'a';
+ C : character = 'A';
// boolean constant
INIT_STATE : boolean = true;
+
+ // string constant
+ S : array 5 of character = "hello";
var
// this is an integer
@@ -24,7 +27,7 @@ var
flag : boolean;
i : integer;
j : character := C;
- s : array 100 of character;
+ s : array 5 of character := S;
begin
a := 1;
@@ -54,5 +57,5 @@ begin
i := i + 1;
end;
- j := 'b';
+ j := 'B';
end