/* * Bootstrapping E compiler written in C. * * Uses only minimal features of C, no preprocessor, minimal functions * of the standard library and is in C89. * */ /* constants */ enum { MAX_IDENT_LEN = 64, MAX_NUMBER_LEN = 10, MAX_NUMBER_OF_ENUMERATIONS = 6, MAX_LABEL_LEN = 64, MAX_STRING_LEN = 64 }; static int DEBUG_GETCHAR = 0; static int DEBUG_SCANNER = 0; typedef enum { BINARY_FORMAT_EMUL = 0, BINARY_FORMAT_ELF = 1 } BinaryFormat; static BinaryFormat binary_format = BINARY_FORMAT_EMUL; /* scanner */ typedef enum { S_ident = 0, S_number, S_char, S_string, S_module, S_begin, S_end, S_array, S_of, S_var, S_const, S_if, S_do, S_else, S_while, S_procedure, S_div, S_mod, S_not, S_and, S_or, S_semicolon, S_colon, S_assign, S_equals, S_plus, S_minus, S_star, S_slash, S_lparen, S_rparen, S_lbracket, S_rbracket, S_comma, S_less, S_less_or_equals, S_more, S_more_or_equals, S_not_equals, S_eof } S_Symbol; static char *symname[S_eof+1] = { "ident", "number", "char", "string", "module", "begin", "end", "array", "of", "var", "const", "if", "do", "else", "while", "procedure", "div", "mod", "not", "and", "or", ";", ":", ":=", "=", "+", "-", "*", "/", "(", ")", "[", "]", ",", "<", "<=", ">", ">=", "<>", "eof" }; static int col; static int row; static int look; static S_Symbol sym; static char ident[MAX_IDENT_LEN+1]; static int num; static int ch; static char str[MAX_STRING_LEN+1]; static void Err( char *s, va_list args ) { fprintf( stderr, "Error line %d, pos %d: ", row, col ); vfprintf( stderr, s, args ); fputs( "\n", stderr ); fflush( stderr ); } static void Halt( int code ) { exit( code ); } static void Abort( char *s, ... ) { va_list args; va_start( args, s ); Err( s, args ); va_end( args ); Halt( EXIT_FAILURE ); } static void *Allocate( unsigned int size ) { char *p; p = malloc( size ); if( p == NULL ) { Abort( "Out of memory" ); } return p; } static char *AllocateAndCopyStr( char *s ) { char *d; int len; if( s == NULL ) { return NULL; } len = strlen( s ); if( len > MAX_STRING_LEN ) { Abort( "Too long string literal, should not happen" ); } d = (char *)Allocate( len + 1 ); strlcpy( d, s, MAX_STRING_LEN ); return d; } static int getChar( void ) { int c; c = getchar( ); if( DEBUG_GETCHAR ) { if( c == '\n' ) { fprintf( stderr, "getchar -> '\\n'\n" ); } else if( c == EOF ) { fprintf( stderr, "getchar -> 'EOF'\n" ); } else { fprintf( stderr, "getchar -> '%c'\n", c ); } } if( c == EOF ) { return c; } col++; if( c == '\n' ) { col = 1; row++; } return c; } static int isWhite( int c ) { if( c == ' ' || c == '\r' || c == '\n' || c == '\t' ) return 1; return 0; } static int isAlpha( int c ) { if( ( c >= 'A' && c <= 'Z' ) || ( c >= 'a' && c <= 'z' ) ) return 1; return 0; } static int isDigit( int c ) { if( ( c >= '0' && c <= '9' ) ) return 1; return 0; } static int isSpecial( int c ) { switch( c ) { case '_': case ' ': return 1; default: return 0; } } static void skipWhite( void ) { while( isWhite( look ) ) { look = getChar( ); } } static void number( void ) { int n = 0; if( isDigit( look ) ) { num = look - '0'; look = getChar( ); while( isDigit( look ) && n < MAX_NUMBER_LEN ) { n++; num = 10 * num + ( look - '0' ); look = getChar( ); } if( n == MAX_NUMBER_LEN ) { Abort( "Number exceeds maximal length" ); } } } static void character( void ) { if( look == '\'' ) { look = getChar( ); if( isDigit( look ) || isAlpha( look ) || isSpecial( look ) ) { ch = look; } else { Abort( "Expecting a character as character literal" ); } look = getChar( ); if( look != '\'' ) { Abort( "Unterminated character literal" ); } look = getChar( ); } } static void string( void ) { int n = 0; if( look == '"' ) { look = getChar( ); while( ( isDigit( look ) || isAlpha( look ) || isSpecial( look ) ) && n < MAX_STRING_LEN ) { str[n] = look; n++; look = getChar( ); } if( n == MAX_STRING_LEN ) { Abort( "String exceeds maximal length" ); } str[n] = '\0'; if( look != '\"' ) { Abort( "Unterminated string literal" ); } look = getChar( ); } } static void identifier( void ) { int n = 0; if( isAlpha( look ) ) { ident[n] = look; n++; look = getChar( ); while( ( isAlpha( look ) || isDigit( look ) || look == '_' ) && n < MAX_IDENT_LEN ) { ident[n] = look; n++; look = getChar( ); } ident[n] = '\0'; if( n == MAX_IDENT_LEN ) { Abort( "Identifier exceeds maximal length" ); } } } static void skipEolComment( void ) { look = getChar( ); while( look != '\n' ) { look = getChar( ); } look = getChar( ); } static void skipBlockComment( void ) { do { while( look != '*' ) { look = getChar( ); } look = getChar( ); } while( look != '/' ); look = getChar( ); } static S_Symbol getSym( void ) { int s = S_eof; skipWhite( ); switch( look ) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': number( ); s = S_number; break; case 'a': identifier( ); if( strcmp( ident, "and" ) == 0 ) { s = S_and; } else if( strcmp( ident, "array" ) == 0 ) { s = S_array; } else { s = S_ident; } break; case 'b': identifier( ); if( strcmp( ident, "begin" ) == 0 ) { s = S_begin; } else { s = S_ident; } break; case 'c': identifier( ); if( strcmp( ident, "const" ) == 0 ) { s = S_const; } else { s = S_ident; } break; case 'd': identifier( ); if( strcmp( ident, "do" ) == 0 ) { s = S_do; } else if( strcmp( ident, "div" ) == 0 ) { s = S_div; } else { s = S_ident; } break; case 'e': identifier( ); if( strcmp( ident, "end" ) == 0 ) { s = S_end; } else if( strcmp( ident, "else" ) == 0 ) { s = S_else; } else { s = S_ident; } break; case 'i': identifier( ); if( strcmp( ident, "if" ) == 0 ) { s = S_if; } else { s = S_ident; } break; case 'm': identifier( ); if( strcmp( ident, "mod" ) == 0 ) { s = S_mod; } else if( strcmp( ident, "module" ) == 0 ) { s = S_module; } else { s = S_ident; } break; case 'n': identifier( ); if( strcmp( ident, "not" ) == 0 ) { s = S_not; } else { s = S_ident; } break; case 'o': identifier( ); if( strcmp( ident, "or" ) == 0 ) { s = S_or; } else if( strcmp( ident, "of" ) == 0 ) { s = S_of; } else { s = S_ident; } break; case 'p': identifier( ); if( strcmp( ident, "procedure" ) == 0 ) { s = S_procedure; } else { s = S_ident; } break; case 'v': identifier( ); if( strcmp( ident, "var" ) == 0 ) { s = S_var; } else { s = S_ident; } break; case 'w': identifier( ); if( strcmp( ident, "while" ) == 0 ) { s = S_while; } else { s = S_ident; } break; case 'f': case 'g': case 'h': case 'j': case 'k': case 'l': case 'q': case 'r': case 's': case 't': case 'u': case 'x': case 'y': case 'z': case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': case 'Y': case 'Z': identifier( ); s = S_ident; break; case '\'': character( ); s = S_char; break; case '\"': string( ); s = S_string; break; case ':': look = getChar( ); if( look == '=' ) { look = getChar( ); s = S_assign; } else { s = S_colon; } break; case ';': look = getChar( ); s = S_semicolon; break; case '=': look = getChar( ); s = S_equals; break; case '+': look = getChar( ); s = S_plus; break; case '-': look = getChar( ); s = S_minus; break; case '*': look = getChar( ); s = S_star; break; case '/': look = getChar( ); if( look == '/' ) { skipEolComment( ); return getSym( ); } else if( look == '*' ) { skipBlockComment( ); return getSym( ); } else { s = S_slash; } break; case '(': look = getChar( ); s = S_lparen; break; case ')': look = getChar( ); s = S_rparen; break; case '[': look = getChar( ); s = S_lbracket; break; case ']': look = getChar( ); s = S_rbracket; break; case ',': look = getChar( ); s = S_comma; break; case '<': look = getChar( ); if( look == '=' ) { look = getChar( ); s = S_less_or_equals; } else if( look == '>' ) { look = getChar( ); s = S_not_equals; } else { s = S_less; } break; case '>': look = getChar( ); if( look == '=' ) { look = getChar( ); s = S_more_or_equals; } else { s = S_more; } break; case EOF: s = S_eof; break; default: Abort( "Illegal character '%c'", (char)look ); } if( DEBUG_SCANNER ) { switch( s ) { case S_ident: fprintf( stderr, "sym -> %s( '%s' )\n", symname[s], ident ); break; case S_number: fprintf( stderr, "sym -> %s( '%d' )\n", symname[s], num ); break; default: fprintf( stderr, "sym -> %s\n", symname[s] ); } } return s; } /* codegen */ static void Emit( char *s, ... ) { va_list args; va_start( args, s ); vprintf( s, args ); va_end( args ); fflush( stdout ); } static void Emit_Hexbyte( int d ) { /* TODO: maybe add %02X to our printf */ if( d < 16 ) { Emit( "0%X", d ); } else { Emit( "%X", d ); } } static void Emit_DD( int d ) { Emit_Hexbyte( ( d >> 24 ) & 0xFF ); Emit_Hexbyte( ( d >> 16 ) & 0xFF ); Emit_Hexbyte( ( d >> 8 ) & 0xFF ); Emit_Hexbyte( d & 0xFF ); } /* parser */ typedef enum { SYMBOL_CLASS_CONSTANT, SYMBOL_CLASS_VARIABLE, SYMBOL_CLASS_SIMPLE_TYPE, SYMBOL_CLASS_ARRAY_TYPE, SYMBOL_CLASS_PROCEDURE_TYPE } SymbolClass; typedef struct Symbol { SymbolClass class; char *name; struct Symbol *next; struct Symbol *type; /* constant */ int integer_value; int boolean_value; char character_value; char *string_value; int *intarr_value; /* variable */ int size; int offset; /* array type */ int dim; /* procedure type */ char *label; struct Scope *scope; struct Symbol *param; } Symbol; static Symbol *integer_type; static Symbol *boolean_type; static Symbol *character_type; typedef struct Scope { char *name; int label_no; struct Symbol *symbol; struct Scope *parent; int nof_symbols; } Scope; static Scope *global_scope; static Scope *create_scope( Scope *parent, char *name ) { Scope *scope = (Scope *)Allocate( sizeof( Scope ) ); scope->name = (char *)Allocate( strlen( name ) + 1 ); strlcpy( scope->name, name, strlen( name ) + 1 ); scope->symbol = NULL; scope->parent = parent; scope->label_no = 0; scope->nof_symbols = 0; return scope; } static void free_symbol( Symbol *sym ) { if( sym->string_value != NULL ) { free( sym->string_value ); } if( sym->intarr_value != NULL ) { free( sym->intarr_value ); } if( sym->label != NULL ) { free( sym->label ); } if( sym->param != NULL ) { Symbol *param, *tmp; param = sym->param; while( param != NULL ) { tmp = param->next; free_symbol( param ); param = tmp; } } free( sym->name ); free( sym ); } static void free_scope( Scope *scope ) { Symbol *sym, *next; sym = scope->symbol; while( sym != NULL ) { next = sym->next; free_symbol( sym ); sym = next; } free( scope->name ); free( scope ); } char *get_local_label( Scope *scope ) { char *label; label = Allocate( MAX_LABEL_LEN ); snprintf( label, MAX_LABEL_LEN - 1, "__%s_%d", scope->name, scope->label_no ); scope->label_no++; return label; } char *get_scoped_label( Scope *scope, char *name ) { char *label; label = Allocate( MAX_LABEL_LEN ); snprintf( label, MAX_LABEL_LEN - 1, "__%s_%s", scope->name, name ); return label; } static Symbol *get_symbol( Scope *scope, char *name ) { Symbol *sym; while( scope != NULL ) { sym = scope->symbol; while( sym != NULL ) { if( strcmp( sym->name, name ) == 0 ) { return sym; } sym = sym->next; } scope = scope->parent; } return NULL; } static Symbol *create_symbol( char *name, SymbolClass class ) { Symbol *symbol; symbol = Allocate( sizeof( Symbol ) ); symbol->class = class; symbol->name = Allocate( strlen( name ) + 1 ); strlcpy( symbol->name, name, strlen( name ) + 1 ); symbol->size = 0; symbol->offset = 0; symbol->dim = 0; symbol->string_value = NULL; symbol->intarr_value = NULL; symbol->type = NULL; symbol->label = NULL; symbol->param = NULL; return symbol; } static Symbol *copy_symbol( Symbol *symbol ) { Symbol *copy; copy = Allocate( sizeof( Symbol ) ); memcpy( copy, symbol, sizeof( Symbol ) ); copy->name = AllocateAndCopyStr( symbol->name ); copy->string_value = AllocateAndCopyStr( symbol->string_value ); copy->label = AllocateAndCopyStr( symbol->label ); return copy; } static Symbol *insert_symbol( Scope *scope, char *name, SymbolClass class ) { Symbol *symbol; symbol = scope->symbol; while( symbol != NULL ) { if( strcmp( symbol->name, name ) == 0 ) { return symbol; } symbol = symbol->next; } if( symbol != NULL ) { Abort( "'%s' is already defined.", name ); } symbol = create_symbol( name, class ); symbol->next = scope->symbol; scope->symbol = symbol; scope->nof_symbols++; return symbol; } static char moduleName[MAX_IDENT_LEN+1]; static void Expect( S_Symbol expect ) { if( sym == expect ) { sym = getSym( ); } else { Abort( "Expected symbol '%s'", symname[expect] ); } } typedef enum ExpressionNodeType { EXPRESSION_NODE_TYPE_CONST, EXPRESSION_NODE_TYPE_VAR, EXPRESSION_NODE_TYPE_OP } ExpressionNodeType; typedef struct ExpressionNode { ExpressionNodeType type; S_Symbol op; struct ExpressionNode *left, *right, *next, *prev; int integer_value; int boolean_value; char character_value; char *string_value; int *intarr_value; Symbol *symbol; Symbol *actual_type; } ExpressionNode; typedef struct ExpressionNodeList { ExpressionNode *head; ExpressionNode *tail; } ExpressionNodeList; static ExpressionNode *create_expression_node( void ) { ExpressionNode *node = Allocate( sizeof( ExpressionNode ) ); node->left = NULL; node->right = NULL; node->next = NULL; node->prev = NULL; node->string_value = NULL; node->intarr_value = NULL; return node; } static void free_expression_node( ExpressionNode *node ) { if( node->left != NULL ) { free_expression_node( node->left ); } if( node->right != NULL ) { free_expression_node( node->right ); } if( node->next != NULL ) { free_expression_node( node->next ); } /* do not free node->prev! double linked list */ if( node->string_value != NULL ) { free( node->string_value ); } if( node->intarr_value != NULL ) { free( node->intarr_value ); } free( node ); } static void generate_expression_comment( ExpressionNode *node ) { switch( node->type ) { case EXPRESSION_NODE_TYPE_CONST: if( node->actual_type == integer_type ) { Emit( "%d ", node->integer_value ); } else if( node->actual_type == boolean_type ) { if( node->boolean_value == 0 ) { Emit( "false " ); } else if( node->boolean_value == 1 ) { Emit( "true " ); } else { Abort( "Internal representation problem of a boolean (is '%d')", node->boolean_value ); } } else if( node->actual_type == character_type ) { Emit( "'%c' ", node->character_value ); } else { Abort( "Unhandled basic type '%s' in assembly comment generation", node->actual_type->name ); } break; case EXPRESSION_NODE_TYPE_VAR: Emit( "%s ", node->symbol->name ); break; case EXPRESSION_NODE_TYPE_OP: if( node->left != NULL ) { generate_expression_comment( node->left ); } if( node->right != NULL ) { generate_expression_comment( node->right ); } Emit( "%s ", symname[node->op] ); break; default: Abort( "Unhandled case in expression tree while outputing comment!" ); } } static int get_size( Symbol *symbol ); static void EmitArrayAddress( Symbol *array ) { /* expecting result of expression (the array index) on top of the stack * and an array symbol */ Emit( "pop eax\n" ); Emit( "mov ebx, %d\n", get_size( array->type->type ) ); Emit( "mul ebx\n" ); Emit( "push eax\n" ); if( array->offset == 0 ) { /* global variable */ Emit( "mov eax, %s\n", array->name ); } else { /* local variable */ Emit( "push ebp\n" ); Emit( "pop eax\n" ); Emit( "mov ebx, %d\n", array->offset ); Emit( "sub eax, ebx\n" ); } Emit( "pop ebx\n" ); Emit( "add eax, ebx\n" ); Emit( "push eax\n" ); } static void EmitArrayDereference( Symbol *array ) { EmitArrayAddress( array ); Emit( "pop ebx\n" ); switch( get_size( array->type->type ) ) { case 4: Emit( "mov eax, [ebx]\n" ); Emit( "push eax\n" ); break; case 1: Emit( "mov al, [ebx]\n" ); Emit( "push eax\n" ); break; default: Abort( "Unhandled size %d when retrieving register indirectly from memory", array->type->size ); } } static void EmitConditionTest( Scope *scope, S_Symbol relation ) { char *jump_label1 = get_local_label( scope ); char *jump_label2 = get_local_label( scope ); Emit( "cmp eax, ebx\n" ); switch( relation ) { case S_equals: Emit( "je %s\n", jump_label1 ); break; case S_less: Emit( "jb %s\n", jump_label1 ); break; case S_less_or_equals: Emit( "jbe %s\n", jump_label1 ); break; case S_more: Emit( "ja %s\n", jump_label1 ); break; case S_more_or_equals: Emit( "jae %s\n", jump_label1 ); break; case S_not_equals: Emit( "jne %s\n", jump_label1 ); break; default: Abort( "Unhandled relation '%s' when generating compare code in expression tree!", symname[relation] ); } Emit( "mov eax, 0\n" ); Emit( "jmp %s\n", jump_label2 ); Emit( "%s: mov eax, 1\n", jump_label1 ); Emit( "%s:\n", jump_label2 ); free( jump_label2 ); free( jump_label1 ); } static int is_compatible_type( Symbol *to, Symbol *from ) { if( to == from ) { return 1; } if( to->class == SYMBOL_CLASS_ARRAY_TYPE && from->class == SYMBOL_CLASS_ARRAY_TYPE ) { if( to->type == character_type && from->type == character_type ) { if( to->dim >= from->dim ) { return 1; } } } return 0; } static int get_size( Symbol *symbol ) { switch( symbol->class ) { case SYMBOL_CLASS_CONSTANT: case SYMBOL_CLASS_VARIABLE: return get_size( symbol->type ); case SYMBOL_CLASS_SIMPLE_TYPE: return symbol->size; case SYMBOL_CLASS_ARRAY_TYPE: return symbol->dim * get_size( symbol->type ); default: Abort( "No size for class '%d", symbol->class ); } return 0; } static void emit_expression_code( ExpressionNode *node, Scope *scope ) { switch( node->type ) { case EXPRESSION_NODE_TYPE_CONST: if( node->actual_type == integer_type ) { Emit( "mov eax, %d\n", node->integer_value ); } else if( node->actual_type == boolean_type ) { Emit( "mov eax, %d\n", node->boolean_value ); } else if( node->actual_type == character_type ) { Emit( "mov eax, %d\n", node->character_value ); } else { Abort( "Missing expression node code generation handling for type '%s'", node->actual_type->name ); } Emit( "push eax\n" ); break; case EXPRESSION_NODE_TYPE_VAR: switch( get_size( node->symbol ) ) { case 4: /* TODO: cleanup local global variable access */ if( node->symbol->offset == 0 ) { Emit( "mov eax, [%s]\n", node->symbol->name ); } else { Emit( "push ebp\n" ); Emit( "pop ebx\n" ); Emit( "mov eax, %d\n", node->symbol->offset ); Emit( "sub ebx, eax\n" ); Emit( "mov eax, [ebx]\n" ); } break; case 1: /* TODO: cleanup local global variable access */ if( node->symbol->offset == 0 ) { Emit( "mov al, [%s]\n", node->symbol->name ); } else { Emit( "push ebp\n" ); Emit( "pop ebx\n" ); Emit( "mov eax, %d\n", node->symbol->offset ); Emit( "sub ebx, eax\n" ); Emit( "mov al, [ebx]\n" ); } break; default: Abort( "Unhandled size %d when reading register from memory", node->symbol->size ); } Emit( "push eax\n" ); break; case EXPRESSION_NODE_TYPE_OP: switch( node->op ) { case S_plus: emit_expression_code( node->left, scope ); emit_expression_code( node->right, scope ); Emit( "pop ebx\n" ); Emit( "pop eax\n" ); Emit( "add eax, ebx\n" ); Emit( "push eax\n" ); break; case S_minus: emit_expression_code( node->left, scope ); emit_expression_code( node->right, scope ); Emit( "pop ebx\n" ); Emit( "pop eax\n" ); Emit( "sub eax, ebx\n" ); Emit( "push eax\n" ); break; case S_star: emit_expression_code( node->left, scope ); emit_expression_code( node->right, scope ); Emit( "pop ebx\n" ); Emit( "pop eax\n" ); Emit( "mul ebx\n" ); Emit( "push eax\n" ); break; case S_div: emit_expression_code( node->left, scope ); emit_expression_code( node->right, scope ); Emit( "pop ebx\n" ); Emit( "pop eax\n" ); Emit( "div ebx\n" ); Emit( "push eax\n" ); break; case S_mod: emit_expression_code( node->left, scope ); emit_expression_code( node->right, scope ); Emit( "pop ebx\n" ); Emit( "pop eax\n" ); Emit( "mov edx, 0\n" ); Emit( "div ebx\n" ); Emit( "push edx\n" ); break; case S_equals: case S_less: case S_less_or_equals: case S_more: case S_more_or_equals: case S_not_equals: emit_expression_code( node->left, scope ); emit_expression_code( node->right, scope ); Emit( "pop ebx\n" ); Emit( "pop eax\n" ); EmitConditionTest( scope, node->op ); Emit( "push eax\n" ); break; case S_not: { char *jump_label1 = get_local_label( scope ); char *jump_label2 = get_local_label( scope ); Emit( "; expr not\n" ); emit_expression_code( node->left, scope ); Emit( "pop eax\n" ); Emit( "mov ebx, 0\n" ); Emit( "cmp eax, ebx\n" ); Emit( "je %s\n", jump_label1 ); Emit( "mov eax, 0\n" ); Emit( "jmp %s\n", jump_label2 ); Emit( "%s:\n", jump_label1 ); Emit( "mov eax, 1\n" ); Emit( "%s:\n", jump_label2 ); Emit( "push eax\n" ); free( jump_label2 ); free( jump_label1 ); } break; case S_and: { char *jump_label = get_local_label( scope ); Emit( "; expr left\n" ); emit_expression_code( node->left, scope ); Emit( "pop eax\n" ); Emit( "mov ebx, 0\n" ); Emit( "cmp eax, ebx\n" ); Emit( "je %s\n", jump_label ); Emit( "; expr right\n" ); emit_expression_code( node->right, scope ); Emit( "pop eax\n" ); Emit( "%s:\n", jump_label ); Emit( "push eax\n" ); free( jump_label ); } break; case S_or: { char *jump_label = get_local_label( scope ); Emit( "; expr left\n" ); emit_expression_code( node->left, scope ); Emit( "pop eax\n" ); Emit( "mov ebx, 1\n" ); Emit( "cmp eax, ebx\n" ); Emit( "je %s\n", jump_label ); Emit( "; expr right\n" ); emit_expression_code( node->right, scope ); Emit( "pop eax\n" ); Emit( "%s:\n", jump_label ); Emit( "push eax\n" ); } break; case S_lbracket: emit_expression_code( node->left, scope ); EmitArrayDereference( node->symbol ); break; default: Abort( "Unhandled operation '%s' when generating operation in expression tree!", symname[node->op] ); } break; default: Abort( "Unhandled case in expression tree when generating output code!" ); } } static ExpressionNode *parseExpression( Scope *scope ); static ExpressionNode *parseFactor( Scope *scope ) { Symbol *symbol; ExpressionNode *node = NULL; if( sym == S_number ) { node = create_expression_node( ); node->type = EXPRESSION_NODE_TYPE_CONST; node->integer_value = num; node->actual_type = integer_type; sym = getSym( ); } else if( sym == S_char ) { node = create_expression_node( ); node->type = EXPRESSION_NODE_TYPE_CONST; node->character_value = ch; node->actual_type = character_type; sym = getSym( ); } else if( sym == S_ident ) { symbol = get_symbol( scope, ident ); if( symbol == NULL ) { Abort( "Unknown identifier '%s'", ident ); } if( symbol->class == SYMBOL_CLASS_CONSTANT ) { node = create_expression_node( ); node->type = EXPRESSION_NODE_TYPE_CONST; node->actual_type = symbol->type; if( node->actual_type == integer_type ) { node->integer_value = symbol->integer_value; } else if( node->actual_type == boolean_type ) { node->boolean_value = symbol->boolean_value; } else if( node->actual_type == character_type ) { node->character_value = symbol->character_value; } else { Abort( "Unhandled expression assignment from identifier with type '%s'", node->actual_type->name ); } sym = getSym( ); } else if( symbol->class == SYMBOL_CLASS_VARIABLE ) { sym = getSym( ); if( sym == S_lbracket ) { Expect( S_lbracket ); if( symbol->type->class != SYMBOL_CLASS_ARRAY_TYPE ) { Abort( "Dereferencing non-array '%s'", symbol->name ); } node = create_expression_node( ); node->type = EXPRESSION_NODE_TYPE_OP; node->op = S_lbracket; node->left = parseExpression( scope ); node->symbol = symbol; node->actual_type = symbol->type->type; Expect( S_rbracket ); } else { node = create_expression_node( ); node->type = EXPRESSION_NODE_TYPE_VAR; node->symbol = symbol; node->actual_type = symbol->type; } } else { Abort( "'%s' is the name for a type and not a constant or variable as expected", ident ); } } else if( sym == S_lparen ) { sym = getSym( ); node = parseExpression( scope ); Expect( S_rparen ); } else if( sym == S_not ) { node = create_expression_node( ); node->type = EXPRESSION_NODE_TYPE_OP; node->op = sym; sym = getSym( ); node->left = parseFactor( scope ); node->actual_type = node->left->actual_type; } else { Abort( "Expected a literal, a variable or a constant." ); } return node; } static ExpressionNode *parseTerm( Scope *scope ) { ExpressionNode *node, *tmp; node = parseFactor( scope ); while( sym == S_star || sym == S_div || sym == S_mod || sym == S_and ) { tmp = node; node = create_expression_node( ); node->type = EXPRESSION_NODE_TYPE_OP; node->op = sym; sym = getSym( ); node->left = tmp; node->right = parseFactor( scope ); if( node->left->actual_type == node->right->actual_type ) { node->actual_type = node->left->actual_type; } else { Abort( "Incompatible types in expression: '%s' on the left, '%s' on the right", node->left->actual_type->name, node->right->actual_type->name ); } } return node; } static ExpressionNode *parseSimpleExpression( Scope *scope ) { ExpressionNode *node, *tmp; node = parseTerm( scope ); while( sym == S_plus || sym == S_minus || sym == S_or ) { tmp = node; node = create_expression_node( ); node->type = EXPRESSION_NODE_TYPE_OP; node->op = sym; sym = getSym( ); node->left = tmp; node->right = parseTerm( scope ); if( node->left->actual_type == node->right->actual_type ) { node->actual_type = node->left->actual_type; } else { Abort( "Incompatible types in expression: '%s' on the left, '%s' on the right", node->left->actual_type->name, node->right->actual_type->name ); } } return node; } static int isRelationalOperator( S_Symbol sym ) { if( sym == S_less || sym == S_less_or_equals || sym == S_equals || sym == S_more || sym == S_more_or_equals || sym == S_not_equals ) { return 1; } else { return 0; } } static ExpressionNode *parseExpression( Scope *scope ) { ExpressionNode *node, *tmp; node = parseSimpleExpression( scope ); if( isRelationalOperator( sym ) ) { tmp = node; node = create_expression_node( ); node->type = EXPRESSION_NODE_TYPE_OP; node->op = sym; sym = getSym( ); node->left = tmp; node->right = parseSimpleExpression( scope ); if( node->left->actual_type == node->right->actual_type ) { node->actual_type = get_symbol( scope, "boolean" ); } else { Abort( "Incompatible types in expression: '%s' on the left, '%s' on the right", node->left->actual_type->name, node->right->actual_type->name ); } } return node; } static void parseAssignment( Scope *scope ) { Symbol *symbol; ExpressionNode *node, *lhs, *rhs; symbol = get_symbol( scope, ident ); if( symbol == NULL ) { Abort( "Unknown variable '%s'", ident ); } if( symbol->class == SYMBOL_CLASS_SIMPLE_TYPE || symbol->class == SYMBOL_CLASS_ARRAY_TYPE ) { Abort( "'%s' is a type and not a variable as expected", ident ); } if( symbol->class == SYMBOL_CLASS_CONSTANT ) { Abort( "'%s' is a constant and can not be changed", ident ); } if( sym == S_assign ) { Expect( S_assign ); node = parseExpression( scope ); Emit( "; LET %s <- ", symbol->name ); if( !is_compatible_type( symbol->type, node->actual_type ) ) { Abort( "Incompatible assignment of expression of type '%s' to variable '%s' of type '%s'", node->actual_type->name, symbol->name, symbol->type->name ); } generate_expression_comment( node ); Emit( "\n" ); emit_expression_code( node, scope ); switch( get_size( symbol ) ) { case 4: /* TODO: cleanup local global variable access */ if( symbol->offset == 0 ) { Emit( "pop eax\n" ); Emit( "mov [%s], eax\n", symbol->name ); } else { Emit( "push ebp\n" ); Emit( "pop ebx\n" ); Emit( "mov eax, %d\n", symbol->offset ); Emit( "sub ebx, eax\n" ); Emit( "pop eax\n" ); Emit( "mov [ebx], eax\n" ); } break; case 1: /* TODO: cleanup local global variable access */ if( symbol->offset == 0 ) { Emit( "pop eax\n" ); Emit( "mov [%s], al\n", symbol->name ); } else { Emit( "push ebp\n" ); Emit( "pop ebx\n" ); Emit( "mov eax, %d\n", symbol->offset ); Emit( "sub ebx, eax\n" ); Emit( "pop eax\n" ); Emit( "mov [ebx], al\n" ); } break; default: Abort( "Unhandled size %d when storing register immediately to memory", symbol->size ); } free_expression_node( node ); } else if( sym == S_lbracket ) { Expect( S_lbracket ); if( symbol->type->class != SYMBOL_CLASS_ARRAY_TYPE ) { Abort( "array index not allowed on '%s' (not an array)", symbol->name ); } Emit( "; LET %s[", symbol->name ); lhs = parseExpression( scope ); Expect( S_rbracket ); generate_expression_comment( lhs ); Emit( "] <- " ); Expect( S_assign ); rhs = parseExpression( scope ); generate_expression_comment( rhs ); Emit( "\n" ); emit_expression_code( lhs, scope ); EmitArrayAddress( symbol ); if( !is_compatible_type( symbol->type->type, rhs->actual_type ) ) { Abort( "Incompatible assignment of expression of type '%s' to variable '%s' of type '%s'", rhs->actual_type->name, symbol->name, symbol->type->name ); } emit_expression_code( rhs, scope ); Emit( "pop eax\n" ); Emit( "pop ebx\n" ); switch( get_size( symbol->type->type ) ) { case 4: Emit( "mov [ebx], eax\n" ); break; case 1: Emit( "mov [ebx], al\n" ); break; default: Abort( "Unhandled size %d when storing register indirectly to memory", symbol->type->size ); } free_expression_node( rhs ); free_expression_node( lhs ); } } static void parseStatementSequence( Scope *scope ); static void parseIfStatement( Scope *scope ) { ExpressionNode *node; char *jump_label1 = get_local_label( scope ); char *jump_label2 = get_local_label( scope ); node = parseExpression( scope ); Emit( "; TEST " ); generate_expression_comment( node ); Emit( "\n" ); emit_expression_code( node, scope ); Expect( S_do ); Emit( "pop eax\n" ); Emit( "mov ebx, 0\n" ); Emit( "cmp eax, ebx\n" ); Emit( "je %s\n", jump_label1 ); parseStatementSequence( scope ); Emit( "jmp %s\n", jump_label2 ); Emit( "%s:\n", jump_label1 ); if( sym == S_else ) { sym = getSym( ); parseStatementSequence( scope ); } Emit( "%s:\n", jump_label2 ); Expect( S_end ); free_expression_node( node ); free( jump_label2 ); free( jump_label1 ); } static void parseWhileStatement( Scope *scope ) { ExpressionNode *node; char *jump_label1 = get_local_label( scope ); char *jump_label2 = get_local_label( scope ); char *jump_label3 = get_local_label( scope ); char *jump_label4 = get_local_label( scope ); node = parseExpression( scope ); Emit( "; WHILE " ); generate_expression_comment( node ); Emit( "\n" ); Emit( "%s:\n", jump_label1 ); emit_expression_code( node, scope ); Expect( S_do ); Emit( "pop eax\n" ); Emit( "mov ebx, 0\n" ); Emit( "cmp eax, ebx\n" ); /* TODO: support far addresses for conditional jumps in asm-i386 later, * avoid this jump workaround for long scopes */ Emit( "je %s\n", jump_label3 ); Emit( "jmp %s\n", jump_label4 ); Emit( "%s: jmp %s\n", jump_label3, jump_label2 ); Emit( "%s:\n", jump_label4 ); parseStatementSequence( scope ); Emit( "jmp %s\n", jump_label1 ); Emit( "%s:\n", jump_label2 ); Expect( S_end ); free_expression_node( node ); free( jump_label4 ); free( jump_label3 ); free( jump_label2 ); free( jump_label1 ); } static void parseParameterList( Scope *scope, ExpressionNodeList *list ) { ExpressionNode *node; /* function insertBeginning(List list, Node newNode) if list.firstNode == null list.firstNode := newNode list.lastNode := newNode newNode.prev := null newNode.next := null else newNode.next := list.firstNode if list.firstNode.prev == null newNode.prev := null -- (not always necessary) list.firstNode := newNode else newNode.prev := list.firstNode.prev list.firstNode.prev.next := newNode list.firstNode.prev := newNode */ Expect( S_lparen ); list->head = NULL; list->tail = NULL; do { node = parseExpression( scope ); if( list->head == NULL && list->tail == NULL ) { list->head = node; list->tail = node; } else { node->next = list->head; if( list->head->prev == NULL ) { list->head = node; } else { node->prev = list->head->prev; list->head->prev->next = node; } list->head->prev = node; } if( sym == S_comma ) { sym = getSym( ); } } while( sym != S_rparen ); Expect( S_rparen ); } static void parseProcedureCall( Scope *scope ) { Symbol *symbol, *param; int nof_expected_params; int nof_actual_params; ExpressionNode *node; ExpressionNodeList list; symbol = get_symbol( scope, ident ); nof_expected_params = 0; param = symbol->param; while( param != NULL ) { nof_expected_params++; param = param->next; } nof_actual_params = 0; if( sym == S_lparen ) { parseParameterList( scope, &list ); node = list.head; while( node != NULL ) { nof_actual_params++; node = node->next; } } /* check cardinality of parameters */ if( nof_actual_params != nof_expected_params ) { Abort( "procedure '%s' expects %d parameter(s), but %d were given", symbol->name, nof_expected_params, nof_actual_params ); } /* check types */ if( nof_expected_params > 0 ) { node = list.tail; param = symbol->param; while( param != NULL ) { if( !is_compatible_type( node->actual_type, param->type ) ) { Abort( "Incompatible parameter type for parameter '%s' in procedure '%s', expecting type '%s', got '%s'", param->name, symbol->name, param->type->name, node->actual_type->name ); } node = node->prev; param = param->next; } } /* emit assembly comments */ Emit( "; CALL %s", symbol->label ); if( nof_actual_params > 0 ) { Emit( "( " ); node = list.head; while( node != NULL ) { generate_expression_comment( node ); node = node->next; } Emit( ")" ); } Emit( "\n" ); /* emit assembly to push parameters onto stack and call procedure */ if( nof_actual_params > 0 ) { node = list.head; while( node != NULL ) { emit_expression_code( node, scope ); node = node->next; } free_expression_node( list.head ); } Emit( "call %s\n", symbol->label ); } static void parseStatement( Scope *scope ) { Symbol *symbol; if( sym == S_if ) { sym = getSym( ); parseIfStatement( scope ); } else if( sym == S_while ) { sym = getSym( ); parseWhileStatement( scope ); } else if( sym == S_ident ) { sym = getSym( ); symbol = get_symbol( scope, ident ); if( symbol == NULL ) { Abort( "Unknown indentifier '%s'", ident ); } if( symbol->class == SYMBOL_CLASS_PROCEDURE_TYPE ) { parseProcedureCall( scope ); } else { parseAssignment( scope ); } } else if( sym == S_end ) { return; } else { sym = getSym( ); } } static void parseStatementSequence( Scope *scope ) { parseStatement( scope ); if( sym == S_end ) { return; } while( sym == S_semicolon ) { sym = getSym( ); if( sym == S_end || sym == S_else ) { return; } parseStatement( scope ); } } static void parseStatementBlock( Scope *scope ) { Expect( S_begin ); parseStatementSequence( scope ); Expect( S_end ); } /* TODO: implement simple expression and later complex expression */ static ExpressionNode *parseConstExpression( Scope *scope ) { ExpressionNode *node; char typeName[MAX_IDENT_LEN+1]; Symbol *type; int len; node = create_expression_node( ); if( sym == S_number ) { node->type = EXPRESSION_NODE_TYPE_CONST; node->integer_value = num; node->actual_type = integer_type; sym = getSym( ); } else if( sym == S_char ) { node->type = EXPRESSION_NODE_TYPE_CONST; node->character_value = ch; node->actual_type = character_type; sym = getSym( ); } else if( sym == S_string ) { node->type = EXPRESSION_NODE_TYPE_CONST; node->string_value = AllocateAndCopyStr( str ); len = strlen( str ); snprintf( typeName, MAX_IDENT_LEN, "array %d of %s", len + 1, character_type->name ); type = get_symbol( scope, typeName ); if( type == NULL ) { type = insert_symbol( scope, typeName, SYMBOL_CLASS_ARRAY_TYPE ); type->dim = len + 1; type->type = character_type; } node->actual_type = type; sym = getSym( ); } else if( sym == S_ident ) { Symbol *symbol = get_symbol( scope, ident ); if( symbol == NULL ) { Abort( "Constant '%s' is unknown", ident ); } if( symbol->class != SYMBOL_CLASS_CONSTANT ) { Abort( "Assignment value of '%s' is not a constant", ident ); } node->type = EXPRESSION_NODE_TYPE_VAR; node->symbol = symbol; node->actual_type = symbol->type; sym = getSym( ); } else { Abort( "Expected a literal or a constant identifier" ); } return node; } static void generate_symbol_comment( char *mode, Symbol *constant ) { Emit( "; %s %s -> %s, ", mode, constant->name, constant->type->name ); if( constant->type == integer_type ) { Emit( "%d", constant->integer_value ); } else if( constant->type == boolean_type ) { if( constant->boolean_value == 0 ) { Emit( "false" ); } else if( constant->boolean_value == 1 ) { Emit( "true" ); } else { Abort( "Wrong internal value '%d' of a boolean when printing constant comment", constant->boolean_value ); } } else if( constant->type == character_type ) { Emit( "'%c'", constant->character_value ); } else if( constant->type->class == SYMBOL_CLASS_ARRAY_TYPE ) { if( constant->type->type == character_type ) { /* TODO: handle non-printables */ Emit( "\"%s\"", constant->string_value ); } else { /* TODO: iterate all elements of basic type and issue value */ Emit( "array %d of %s = { ... }", constant->type->dim, constant->type->type->name ); } } else { Abort( "Unhandled symbol (%s) comment case for type '%s'", mode, constant->type->name ); } Emit( "\n" ); } static void generate_constant_comment( Symbol *variable ) { generate_symbol_comment( "CONST", variable ); } static void generate_variable_comment( Symbol *variable ) { generate_symbol_comment( "DECL", variable ); } static void symbol_copy( Symbol *from, Symbol *to ) { if( !is_compatible_type( to->type, from->type ) ) { Abort( "type mismatch when copying symbol value (from: '%s', to: '%s')", from->type->name, to->type->name ); } if( from->type == integer_type ) { to->type = integer_type; to->integer_value = from->integer_value; } else if( from->type == boolean_type ) { to->type = boolean_type; to->boolean_value = from->boolean_value; } else if( from->type == character_type ) { to->type = character_type; to->character_value = from->character_value; } else if( from->type->class == SYMBOL_CLASS_ARRAY_TYPE ) { if( from->type->type == character_type ) { /* type remains, initializer is smaller than dimensioned string */ to->string_value = AllocateAndCopyStr( from->string_value ); } else if( from->type->type == integer_type ) { memcpy( to->intarr_value, from->intarr_value, to->dim * sizeof( int ) ); } else { Abort( "Unhandled case for array type '%s' when copying value of symbol", from->type->name ); } } else { Abort( "Unhandled case for type '%s' when copying value of symbol", from->type->name ); } } static int evaluate_const_expression_as_integer( ExpressionNode *node ) { switch( node->type ) { case EXPRESSION_NODE_TYPE_CONST: if( node->actual_type != integer_type ) { Abort( "Expecting an integer constant, not a constant of type '%s'", node->actual_type->name ); } return node->integer_value; case EXPRESSION_NODE_TYPE_VAR: if( node->symbol->type != integer_type ) { Abort( "Evaluation a const expression as integer, but expression is of type '%s'", node->symbol->type->name ); } return node->symbol->integer_value; case EXPRESSION_NODE_TYPE_OP: Abort( "Evaluation of complex const expression currently not implemented" ); break; } return 0; } static void symbol_copy_node( ExpressionNode *from, Symbol *to ) { if( !is_compatible_type( to->type, from->actual_type ) ) { Abort( "type mismatch when copying node to symbol value (from: '%s', to: '%s')", from->actual_type->name, to->type->name ); } if( from->actual_type == integer_type ) { to->type = integer_type; to->integer_value = from->integer_value; } else if( from->actual_type == boolean_type ) { to->type = boolean_type; to->boolean_value = from->boolean_value; } else if( from->actual_type == character_type ) { to->type = character_type; to->character_value = from->character_value; } else if( from->actual_type->class == SYMBOL_CLASS_ARRAY_TYPE ) { if( from->actual_type->type == character_type ) { /* type remains, initializer is smaller than dimensioned string */ to->string_value = AllocateAndCopyStr( from->string_value ); } else if( from->actual_type->type == integer_type ) { memcpy( to->intarr_value, from->intarr_value, to->dim * sizeof( int ) ); } else { Abort( "Unhandled case for array type '%s' when copying value of symbol from expression node", from->actual_type->name ); } } else { Abort( "Unhandled case for type '%s' when copying value of symbol from expression node", from->actual_type->name ); } } static void symbol_copy_from_node( ExpressionNode *from, Symbol *to ) { if( from->type == EXPRESSION_NODE_TYPE_CONST ) { symbol_copy_node( from, to ); } else if( from->type == EXPRESSION_NODE_TYPE_VAR ) { symbol_copy( from->symbol, to ); } else { Abort( "Unknown symbol copy from expression node" ); } } static Symbol *parseSimpleType( Scope *scope ) { Symbol *type = get_symbol( scope, ident ); if( type == NULL ) { Abort( "Unknown type '%s'", ident ); } if( type->class != SYMBOL_CLASS_SIMPLE_TYPE && type->class != SYMBOL_CLASS_ARRAY_TYPE ) { Abort( "'%s' is defined, but is not a type as expected", ident ); } sym = getSym( ); return type; } static Symbol *parseType( Scope *scope ); static Symbol *parseArrayType( Scope *scope ) { char typeName[MAX_IDENT_LEN+1]; Symbol *type, *simple = NULL; ExpressionNode *node; int num = 0; Expect( S_array ); if( sym != S_of ) { node = parseConstExpression( scope ); if( !is_compatible_type( node->actual_type, integer_type ) ) { Abort( "Dimension of an array declaration must be of type '%s', not '%s'", integer_type->name, node->actual_type->name ); } num = evaluate_const_expression_as_integer( node ); free_expression_node( node ); } Expect( S_of ); simple = parseSimpleType( scope ); snprintf( typeName, MAX_IDENT_LEN, "array %d of %s", num, simple->name ); type = get_symbol( scope, typeName ); if( type == NULL ) { type = insert_symbol( scope, typeName, SYMBOL_CLASS_ARRAY_TYPE ); type->dim = num; type->type = simple; } return type; } static Symbol *parseType( Scope *scope ) { Symbol *type; if( sym == S_array ) { type = parseArrayType( scope ); } else { type = parseSimpleType( scope ); } return type; } static void parseConstDeclaration( Scope *scope ) { int nof_constants = 0; Symbol *constant[MAX_NUMBER_OF_ENUMERATIONS], *type; int i; ExpressionNode *node; do { if( sym == S_begin || sym == S_var || sym == S_procedure ) { return; } else if( sym == S_comma ) { sym = getSym( ); } if( nof_constants >= MAX_NUMBER_OF_ENUMERATIONS ) { Abort( "Too many enumerations in const declaration" ); } constant[nof_constants] = insert_symbol( scope, ident, SYMBOL_CLASS_CONSTANT ); nof_constants++; sym = getSym( ); } while( sym == S_comma ); Expect( S_colon ); type = parseType( scope ); Expect( S_equals ); node = parseConstExpression( scope ); if( !is_compatible_type( type, node->actual_type ) ) { Abort( "Assigning a constant of type '%s' from an expression of type '%s'", type->name, node->actual_type->name ); } for( i = 0; i < nof_constants; i++ ) { constant[i]->type = type; symbol_copy_from_node( node, constant[i] ); } for( i = 0; i < nof_constants; i++ ) { generate_constant_comment( constant[i] ); } free_expression_node( node ); } static void parseConstBlock( Scope *scope ) { Expect( S_const ); parseConstDeclaration( scope ); while( sym == S_semicolon ) { sym = getSym( ); if( sym == S_ident ) { parseConstDeclaration( scope ); } else if( sym == S_begin || sym == S_var || sym == S_procedure ) { return; } else { Abort( "Expecting constant declaration" ); } } } static void parseVariableDeclaration( Scope *scope ) { int nof_variables = 0; Symbol *variable[MAX_NUMBER_OF_ENUMERATIONS], *type; int i; ExpressionNode *node; do { if( sym == S_begin || sym == S_procedure ) { return; } else if( sym == S_comma ) { sym = getSym( ); } if( nof_variables >= MAX_NUMBER_OF_ENUMERATIONS ) { Abort( "Too many enumerations in variable declaration" ); } variable[nof_variables] = insert_symbol( scope, ident, SYMBOL_CLASS_VARIABLE ); nof_variables++; sym = getSym( ); } while( sym == S_comma ); Expect( S_colon ); type = parseType( scope ); if( sym == S_assign ) { sym = getSym( ); node = parseConstExpression( scope ); if( !is_compatible_type( type, node->actual_type ) ) { Abort( "Assigning a variable of type '%s' from an expression of type '%s'", type->name, node->actual_type->name ); } for( i = 0; i < nof_variables; i++ ) { variable[i]->type = type; symbol_copy_from_node( node, variable[i] ); } free_expression_node( node ); } else { for( i = 0; i < nof_variables; i++ ) { ExpressionNode *node; node = create_expression_node( ); node->type = EXPRESSION_NODE_TYPE_CONST; node->integer_value = 0; node->boolean_value = 0; /* false */ node->character_value = 0; /* NUL */ node->string_value = AllocateAndCopyStr( "" ); node->actual_type = type; variable[i]->type = type; symbol_copy_from_node( node, variable[i] ); free_expression_node( node ); } } for( i = 0; i < nof_variables; i++ ) { generate_variable_comment( variable[i] ); } } static void parseVariableBlock( Scope *scope ) { Expect( S_var ); parseVariableDeclaration( scope ); while( sym == S_semicolon ) { sym = getSym( ); if( sym == S_ident ) { parseVariableDeclaration( scope ); } else if( sym == S_begin || sym == S_procedure ) { return; } else { Abort( "Expecting variable declaration" ); } } } static void parseProcedureDeclarationBlock( Scope *scope ) { if( sym == S_const ) { parseConstBlock( scope ); } if( sym == S_var ) { parseVariableBlock( scope ); } } static void parseParameterDeclaration( Scope *scope ) { Symbol *symbol; symbol = insert_symbol( scope, ident, SYMBOL_CLASS_VARIABLE ); sym = getSym( ); Expect( S_colon ); symbol->type = parseType( scope ); } static void parseParameterDeclarationList( Scope *scope ) { Expect( S_lparen ); do { if( sym == S_ident ) { parseParameterDeclaration( scope ); if( sym == S_comma ) { sym = getSym( ); } } } while( sym != S_rparen ); Expect( S_rparen ); } static void parseProcedureDeclaration( Scope *scope ) { char *procedure_label; Symbol *symbol; int size_locals; int size_params = 0; Expect( S_procedure ); if( sym != S_ident ) { Abort( "Expecting name of a procedure after 'procedure'" ); } symbol = get_symbol( scope, ident ); if( symbol == NULL ) { symbol = insert_symbol( scope, ident, SYMBOL_CLASS_PROCEDURE_TYPE ); procedure_label = get_scoped_label( scope, ident ); symbol->label = AllocateAndCopyStr( procedure_label ); free( procedure_label ); } else if( symbol->class == SYMBOL_CLASS_PROCEDURE_TYPE ) { /* TODO: check, if parameter lists and return values match */ } /* local scope holding all local defitions */ symbol->scope = create_scope( scope, symbol->name ); sym = getSym( ); if( sym == S_semicolon ) { /* no parameters */ sym = getSym( ); size_params = 0; } else if( sym == S_lparen ) { Symbol *param, *copy, *defparam; int offset; parseParameterDeclarationList( symbol->scope ); if( sym == S_semicolon ) { /* forward declaration */ sym = getSym( ); } param = symbol->scope->symbol; /* compute size and offsets of parameter on the stack, * also remember the types of the parameters for type * checking when generating a produre call */ size_params = 0; offset = -4; while( param != NULL ) { if( param->class == SYMBOL_CLASS_VARIABLE ) { int size = get_size( param ); size_params += size; param->offset = offset - size; Emit( "; param %s, offset: %d, size: %d\n", param->name, param->offset, size ); offset -= size; } param = param->next; } /* compare types in case of an already existing forward declaration */ if( symbol->param != NULL ) { param = symbol->scope->symbol; defparam = symbol->param; while( param != NULL || defparam != NULL ) { if( param == NULL || defparam == NULL ) { if( param == NULL ) { Abort( "Prototype mismatch for procedure '%s', too few parameter in procedure declaration", symbol->name ); } else { Abort( "Prototype mismatch for procedure '%s', too few parameter in forward declaration", symbol->name ); } } if( !is_compatible_type( defparam->type, param->type ) ) { Abort( "Incompatible parameter type between forward declaration for parameter '%s' in procedure '%s', expecting type '%s', got '%s'", param->name, symbol->name, defparam->type->name, param->type->name ); } defparam = defparam->next; param = param->next; } free_symbol( symbol->param ); symbol->param = NULL; } /* remember types and names of parameters (keeping the variable names * of the definition of the procedure and not the one of the forward * declaration */ if( symbol->param == NULL ) { param = symbol->scope->symbol; while( param != NULL ) { if( param->class == SYMBOL_CLASS_VARIABLE ) { copy = copy_symbol( param ); copy->scope = NULL; copy->next = symbol->param; symbol->param = copy; } param = param->next; } } } else { Abort( "Semicolon expected" ); } /* procedure body */ if( sym == S_const || sym == S_var || sym == S_begin ) { Symbol *local, *param_or_local; int offset; Emit( "; PROC %s\n", symbol->name ); Emit( "%s:\n", symbol->label ); /* save base pointer, set base pointer to location of stack */ Emit( "push ebp\n" ); Emit( "push esp\n" ); Emit( "pop ebp\n" ); /* TODO: parse parameters, assign parameter values to * local variables (when passed by value) or set * addresses for VAR parameters */ parseProcedureDeclarationBlock( symbol->scope ); /* compute sizes of all locals */ size_locals = 0; offset = 0; local = symbol->scope->symbol; while( local != NULL ) { if( local->class == SYMBOL_CLASS_VARIABLE && local->offset == 0 ) { int size = get_size( local ); size_locals += size; local->offset = offset + size; Emit( "; local %s, offset: %d, size: %d\n", local->name, local->offset, size ); offset += size; } local = local->next; } /* reserve space on stack for locals */ if( size_locals > 0 ) { Emit( "mov eax, %d\n", size_locals ); Emit( "sub esp, eax\n" ); } /* initialize local variables (sort of reserve_initialize_local) * at runtime (every time the stack is created for that procedure) */ param_or_local = symbol->scope->symbol; while( param_or_local != NULL ) { if( param_or_local->class == SYMBOL_CLASS_VARIABLE && param_or_local->offset >= 0 ) { /* TODO: use type here! */ int size = get_size( param_or_local ); if( param_or_local->type == integer_type || param_or_local->type == boolean_type ) { switch( size ) { case 4: /* TODO: cleanup local global variable access */ Emit( "push ebp\n" ); Emit( "pop ebx\n" ); Emit( "mov eax, %d\n", param_or_local->offset ); Emit( "sub ebx, eax\n" ); Emit( "mov eax, %d\n", param_or_local->integer_value ); Emit( "mov [ebx], eax\n" ); break; case 1: /* TODO: cleanup local global variable access */ Emit( "push ebp\n" ); Emit( "pop ebx\n" ); Emit( "mov eax, %d\n", param_or_local->offset ); Emit( "sub ebx, eax\n" ); Emit( "mov eax, %d\n", param_or_local->boolean_value ); Emit( "mov [ebx], al\n" ); break; default: Abort( "Unhandled case when initializing local variable '%s' of primitive type on stack", param_or_local->name ); } } else if( param_or_local->type->class == SYMBOL_CLASS_ARRAY_TYPE ) { /* TODO: we would need an internal memset here * memset( ebp + offset, get_size, 0 ) */ } else { Abort( "Unhandled case when initializing local variable '%s' of complex type on stack", param_or_local->name ); } } param_or_local = param_or_local->next; } /* TODO: allocate space on stack for local variables (and * parameters) */ /* TODO: assign symbols to relative addresses (base pointer) */ /* TODO: initialize local variables */ parseStatementBlock( symbol->scope ); /* discard locals, restore base pointer, discard params */ if( size_locals > 0 ) { Emit( "mov eax, %d\n", size_locals ); Emit( "add esp, eax\n" ); } Emit( "pop ebp\n" ); if( size_params > 0 ) { Emit( "ret %d\n", size_params ); } else { Emit( "ret\n" ); } } /* TODO: free only locals, keep parameters in scope, * they are needed for type checks when calling the * procedure */ free_scope( symbol->scope ); } static void parseProcedureBlock( Scope *scope ) { while( sym == S_procedure ) { parseProcedureDeclaration( scope ); } } static void parseDeclarationBlock( Scope *scope ) { if( sym == S_const ) { parseConstBlock( scope ); } if( sym == S_var ) { parseVariableBlock( scope ); } if( sym == S_procedure ) { parseProcedureBlock( scope ); } } static void parseModule( Scope *scope ) { char *entry_label; Expect( S_module ); if( sym == S_ident ) { strlcpy( moduleName, ident, MAX_IDENT_LEN ); } sym = getSym( ); entry_label = get_local_label( scope ); Emit( "jmp %s\n", entry_label ); Expect( S_semicolon ); parseDeclarationBlock( scope ); Emit( "%s:\n", entry_label ); parseStatementBlock( scope ); free( entry_label ); } static void register_internal_types( Scope *scope ) { Symbol *const_symbol; integer_type = insert_symbol( scope, "integer", SYMBOL_CLASS_SIMPLE_TYPE ); integer_type->size = 4; boolean_type = insert_symbol( scope, "boolean", SYMBOL_CLASS_SIMPLE_TYPE ); boolean_type->size = 1; const_symbol = insert_symbol( scope, "false", SYMBOL_CLASS_CONSTANT ); const_symbol->type = boolean_type; const_symbol->boolean_value = 0; const_symbol = insert_symbol( scope, "true", SYMBOL_CLASS_CONSTANT ); const_symbol->type = boolean_type; const_symbol->boolean_value = 1; character_type = insert_symbol( scope, "character", SYMBOL_CLASS_SIMPLE_TYPE ); character_type->size = 1; } static void init( void ) { col = 1; row = 1; look = getChar( ); global_scope = create_scope( NULL, "global" ); register_internal_types( global_scope ); sym = getSym( ); } static void prologue_emul( void ) { /* fasm or asm-i386 syntax */ Emit( "format binary\n" ); Emit( "use32\n" ); Emit( "org $1000000\n" ); } static void prologue_elf( void ) { /* fasm, i386, ELF, Linux */ Emit( "format binary\n" ); Emit( "use32\n" ); Emit( "org $08048000\n" ); Emit( "ehdr:\n" ); Emit( "db $7F, \"ELF\"\n" ); Emit( "db 1\n" ); Emit( "db 1\n" ); Emit( "db 1\n" ); Emit( "db 0\n" ); Emit( "dd 0, 0\n" ); Emit( "dw 2\n" ); Emit( "dw 3\n" ); Emit( "dd 1\n" ); Emit( "dd _start\n" ); Emit( "dd phdr - $$\n" ); Emit( "dd 0\n" ); Emit( "dd 0\n" ); Emit( "dw ehdrsize\n" ); Emit( "dw phdrsize\n" ); Emit( "dw 1\n" ); Emit( "dw 0\n" ); Emit( "dw 0\n" ); Emit( "dw 0\n" ); Emit( "ehdrsize = $ - ehdr\n" ); Emit( "phdr:\n" ); Emit( "dd 1\n" ); Emit( "dd 0\n" ); Emit( "dd $$\n" ); Emit( "dd $$\n" ); Emit( "dd filesize\n" ); Emit( "dd filesize\n" ); Emit( "dd 7\n" ); Emit( "dd $1000\n" ); Emit( "phdrsize = $ - phdr\n" ); Emit( "_start:"); } static void prologue( void ) { switch( binary_format ) { case BINARY_FORMAT_EMUL: prologue_emul( ); break; case BINARY_FORMAT_ELF: prologue_elf( ); break; } } static void reserve_initialize( Symbol *symbol ) { int i; int len; switch( symbol->type->class ) { case SYMBOL_CLASS_SIMPLE_TYPE: if( symbol->type == integer_type ) { Emit( "dd $" ); Emit_DD( symbol->integer_value ); } else if( symbol->type == boolean_type ) { Emit( "db $" ); Emit_Hexbyte( symbol->boolean_value ); } else if( symbol->type == character_type ) { Emit( "db $" ); Emit_Hexbyte( symbol->character_value ); } else { Abort( "Unhandled variable space reservation and initializiation for simple type '%s' in variable '%s'", symbol->type->name, symbol->name ); } Emit( "\n" ); break; case SYMBOL_CLASS_ARRAY_TYPE: if( symbol->type->type == character_type ) { i = 0; Emit( "db " ); if( symbol->string_value != NULL && strcmp( symbol->string_value, "" ) != 0 ) { Emit( "\"" ); len = strlen( symbol->string_value ); while( i < len ) { Emit( "%c", symbol->string_value[i] ); i++; } Emit( "\"" ); } while( i < symbol->type->dim ) { if( i > 0 ) { Emit( ", " ); } Emit( "0" ); i++; } Emit( "\n" ); } else if( symbol->type->type == integer_type ) { for( i = 0; i < symbol->type->dim; i++ ) { Emit( "dd $" ); Emit_DD( 0 ); Emit( "\n" ); } } else { /* TODO: this can not work */ Abort( "Unhandled generic array case" ); /* for( i = 0; i < symbol->type->dim; i++ ) { reserve_initialize( symbol->type ); } */ } break; default: Abort( "Unhandled variable space reservation and initializiation for complex type '%s' in variable '%s'", symbol->type->name, symbol->name ); } } static void Emit_symbols( void ) { Symbol *symbol; symbol = global_scope->symbol; while( symbol != NULL ) { if( symbol->class == SYMBOL_CLASS_VARIABLE ) { Emit( "%s: ", symbol->name ); reserve_initialize( symbol ); } symbol = symbol->next; } } static void epilogue_emul( void ) { Emit( "hlt\n" ); Emit_symbols( ); } static void epilogue_elf( void ) { Emit( "mov ebx, 0\n" ); Emit( "mov eax, 1\n" ); Emit( "int $80\n" ); Emit_symbols( ); Emit( "filesize = $ - $$\n" ); } static void epilogue( void ) { switch( binary_format ) { case BINARY_FORMAT_EMUL: epilogue_emul( ); break; case BINARY_FORMAT_ELF: epilogue_elf( ); break; } } static void deinit( void ) { free_scope( global_scope ); } int main( void ) { init( ); prologue( ); parseModule( global_scope ); if( sym != S_eof ) { Abort( "Unexpected EOF" ); } epilogue( ); deinit( ); malloc_stats( ); Halt( EXIT_SUCCESS ); return 0; }