#include #include #include #include #include /* constants */ enum { MAX_IDENT_LEN = 24, MAX_NUMBER_LEN = 9, MAX_ERRMSG_LEN = 64, MAX_STRING_LEN = 64, MAX_SYMBOLS = 64, MAX_RECORD_MEMBERS = 4, MAX_PARAMETERS = 4 }; /* scanner */ static int look; static int row; static int col; static int num; static char str[MAX_STRING_LEN+1]; static char ident[MAX_IDENT_LEN+1]; typedef enum { S_char = 0, S_ident, S_number, S_string, S_module, S_begin, S_end, S_import, S_procedure, S_return, S_const, S_var, S_if, S_do, S_else, S_array, S_of, S_for, S_to, S_while, S_dot, S_comma, S_semicolon, S_colon, S_assign, S_plus, S_minus, S_star, S_slash, S_and, S_or, S_not, S_lparen, S_rparen, S_lbracket, S_rbracket, S_less, S_less_or_equals, S_equals, S_more_or_equals, S_more, S_not_equals, S_eof } Scanner_Symbol; char *symname[S_eof+1] = { "char", "ident", "nunber", "string", "module", "begin", "end", "import", "procedure", "return", "const", "var", "if", "do", "else", "array", "of", "for", "to", "while", ".", ",", ";", ":", ":=", "+", "-", "*", "/", "and", "or", "not", "(", ")", "[", "]", "<", "<=", "=", ">=", ">", "<>", "eof" }; static Scanner_Symbol sym; static void Err( char *s, va_list args ) { fprintf( stderr, "Error line %d, pos %d: ", row, col ); vfprintf( stderr, s, args ); fputs( "\n", stderr ); fflush( stderr ); } static void Halt( void ) { exit( EXIT_FAILURE ); } static void Abort( char *s, ... ) { va_list args; va_start( args, s ); Err( s, args ); va_end( args ); Halt( ); } static int getChar( void ) { int c = getc( stdin ); if( c == EOF ) { return c; } col++; if( c == '\n' ) { col = 1; row++; } return c; } static int isWhite( int c ) { if( c == ' ' || c == '\r' || c == '\n' || c == '\t' ) return 1; return 0; } static int isAlpha( int c ) { if( ( c >= 'A' && c <= 'Z' ) || ( c >= 'a' && c <= 'z' ) ) return 1; return 0; } static int isDigit( int c ) { if( ( c >= '0' && c <= '9' ) ) return 1; return 0; } static int isSpecial( int c ) { if( c == '_' ) return 1; return 0; } static int isCharacter( int c ) { if( isAlpha( c ) ) return 1; if( isDigit( c ) ) return 1; switch( c ) { case ' ': case '[': case ']': case ':': case ';': case ',': /* TODO: allow more characters as we go along */ return 1; default: return 0; } return 0; } static void skipWhite( void ) { while( isWhite( look ) ) { look = getChar( ); } } static void identifier( void ) { int n = 0; if( isAlpha( look ) ) { ident[n] = look; n++; look = getChar( ); while( ( isAlpha( look ) || isDigit( look ) || isSpecial( look ) ) && n < MAX_IDENT_LEN ) { ident[n] = look; n++; look = getChar( ); } ident[n] = '\0'; if( n == MAX_IDENT_LEN ) { Abort( "Identifier exceeds maximal length" ); } sym = S_ident; } } static void number( void ) { int n = 0; if( isDigit( look ) ) { num = look - '0'; look = getChar( ); while( isDigit( look ) && n < MAX_NUMBER_LEN ) { n++; num = 10 * num + (look - '0' ); look = getChar( ); } if( n == MAX_NUMBER_LEN ) { Abort( "Number gand exceeds maximal length" ); } sym = S_number; } } static void string( void ) { int n = 0; look = getChar( ); while( look != '"' && isCharacter( look ) && n < MAX_STRING_LEN ) { str[n] = look; n++; look = getChar( ); } str[n] = '\0'; if( n == MAX_STRING_LEN ) { Abort( "String constant exceeds maximal length" ); } if( look != '"' ) { Abort( "Unterminated string or illegal character in string" ); } sym = S_string; assert( look == '"' ); look = getChar( ); } static void skipComment( void ) { look = getChar( ); cont: while( look != '*' ) { look = getChar( ); } look = getChar( ); if( look != ')' ) { goto cont; } look = getChar( ); } static Scanner_Symbol getSym( void ) { next: skipWhite( ); switch( look ) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': number( ); return S_number; case 'a': identifier( ); if( strcmp( ident, "array" ) == 0 ) { return S_array; } else if( strcmp( ident, "and" ) == 0 ) { return S_and; } return S_ident; case 'b': identifier( ); if( strcmp( ident, "begin" ) == 0 ) { return S_begin; } return S_ident; case 'c': identifier( ); if( strcmp( ident, "const" ) == 0 ) { return S_const; } return S_ident; case 'd': identifier( ); if( strcmp( ident, "do" ) == 0 ) { return S_do; } return S_ident; case 'e': identifier( ); if( strcmp( ident, "end" ) == 0 ) { return S_end; } else if( strcmp( ident, "else" ) == 0 ) { return S_else; } return S_ident; case 'f': identifier( ); if( strcmp( ident, "for" ) == 0 ) { return S_for; } return S_ident; case 'g': case 'h': case 'j': case 'i': identifier( ); if( strcmp( ident, "if" ) == 0 ) { return S_if; } else if( strcmp( ident, "import" ) == 0 ) { return S_import; } return S_ident; case 'k': case 'l': identifier( ); return S_ident; case 'm': identifier( ); if( strcmp( ident, "module" ) == 0 ) { return S_module; } return S_ident; case 'n': identifier( ); if( strcmp( ident, "not" ) == 0 ) { return S_not; } return S_ident; case 'o': identifier( ); if( strcmp( ident, "of" ) == 0 ) { return S_of; } else if( strcmp( ident, "or" ) == 0 ) { return S_or; } return S_ident; case 'p': identifier( ); if( strcmp( ident, "procedure" ) == 0 ) { return S_procedure; } return S_ident; case 'q': case 'r': identifier( ); if( strcmp( ident, "return" ) == 0 ) { return S_return; } case 's': case 't': identifier( ); if( strcmp( ident, "to" ) == 0 ) { return S_to; } return S_ident; case 'u': case 'v': identifier( ); if( strcmp( ident, "var" ) == 0 ) { return S_var; } return S_ident; case 'w': identifier( ); if( strcmp( ident, "while" ) == 0 ) { return S_while; } return S_ident; case 'x': case 'y': case 'z': case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': case 'Y': case 'Z': identifier( ); return S_ident; case '"': string( ); return S_string; case ':': look = getChar( ); if( look == '=' ) { look = getChar( ); return S_assign; } return S_colon; case ';': look = getChar( ); return S_semicolon; case '.': look = getChar( ); return S_dot; case ',': look = getChar( ); return S_comma; case '+': look = getChar( ); return S_plus; case '-': look = getChar( ); return S_minus; case '*': look = getChar( ); return S_star; case '/': look = getChar( ); return S_slash; case '(': look = getChar( ); if( look == '*' ) { skipComment( ); goto next; } return S_lparen; case ')': look = getChar( ); return S_rparen; case '[': look = getChar( ); return S_lbracket; case ']': look = getChar( ); return S_rbracket; case '<': look = getChar( ); if( look == '=' ) { look = getChar( ); return S_less_or_equals; } else if( look == '>' ) { look = getChar( ); return S_not_equals; } return S_less; case '=': look = getChar( ); return S_equals; case '>': look = getChar( ); if( look == '=' ) { look = getChar( ); return S_more_or_equals; } return S_more; case EOF: return S_eof; default: Abort( "Illegal character '%c'", (char)look ); } return S_char; } /* symbol table */ typedef enum { SYMBOL_TYPE = 1, SYMBOL_VARIABLE = 2, SYMBOL_CONSTANT = 4, SYMBOL_PROCEDURE = 8, SYMBOL_MODULE = 16 } SymbolKind; static const SymbolKind SYMBOL_ANY = SYMBOL_TYPE | SYMBOL_VARIABLE | SYMBOL_CONSTANT | SYMBOL_PROCEDURE | SYMBOL_MODULE; typedef enum { TYPE_BASIC, TYPE_ARRAY, TYPE_RECORD, TYPE_PROCEDURE, TYPE_NONE, TYPE_ANY } TypeKind; typedef struct Symbol Symbol; typedef struct Type Type; typedef struct ArrayType { Type *type; int len; } ArrayType; typedef struct RecordType { Type *type[MAX_RECORD_MEMBERS]; int len; } RecordType; typedef struct Procedure { Type *params[MAX_PARAMETERS]; Type *return_value; int len; /* TODO: later origin, module int internal;*/ } Procedure; typedef enum BasicType { TYPE_INTEGER, TYPE_BOOLEAN, TYPE_CHAR, TYPE_BYTE } BasicType; struct Type { TypeKind kind; union { BasicType basic; ArrayType array; RecordType *record; Procedure *procedure; } type; }; typedef struct Variable { Type *type; } Variable; /* TODO: constants are only possible to define from basic types currently */ typedef struct Constant { Type *type; union { int boolean; int integer; char character; char byte; } value; } Constant; struct Symbol { SymbolKind kind; char name[MAX_IDENT_LEN]; union { Type type; Variable variable; Constant constant; Procedure procedure; } symbol; }; typedef struct SymbolTable SymbolTable; struct SymbolTable { int nof_symbols; Symbol symbols[MAX_SYMBOLS]; SymbolTable *parent; }; static SymbolTable symbols; static SymbolTable *current_scope = &symbols; static void init_symboltable( SymbolTable *table, SymbolTable *parent ) { table->nof_symbols = 0; table->parent = parent; } static Symbol *insert_symbol( SymbolTable *table, char *name ) { if( table->nof_symbols >= MAX_SYMBOLS ) { Abort( "Symbol table exhausted, increase MAX_SYMBOLS and recompile e2c" ); } strcpy( table->symbols[table->nof_symbols].name, name, MAX_IDENT_LEN ); table->symbols[table->nof_symbols].name[MAX_IDENT_LEN-1] = '\0'; table->nof_symbols++; return &table->symbols[table->nof_symbols-1]; } /* TODO: this is inefficient and should be a search tree of sorts */ static Symbol *get_symbol( SymbolTable *table, char *name, SymbolKind kind ) { int i; /* search in table of currect scope */ for( i = 0; i < table->nof_symbols; i++ ) { if( strcmp( table->symbols[i].name, name ) == 0 && ( table->symbols[i].kind == kind ) ) { return &table->symbols[i]; } } /* traverse stack of parent symbol tables */ if( table->parent != NULL ) { return get_symbol( table->parent, name, kind ); } return NULL; } static Type *get_type_symbol( SymbolTable *table, char *name ) { Symbol *symbol = get_symbol( table, name, SYMBOL_TYPE ); if( symbol->kind != SYMBOL_TYPE ) { Abort( "Expected name of a type for '%s'", name ); } return &symbol->symbol.type; } #if 0 /* done with symbol table now */ static Type *strToBasicType( SymbolTable *table, char *name ) { Type *type = get_type_symbol( table, name ); if( type == NULL || type->kind != TYPE_BASIC ) { Abort( "'%s' is not a basic type as expected", name ); } return type; } #endif static char *basicTypeToCType( BasicType type ) { switch( type ) { case TYPE_INTEGER: return "signed int"; case TYPE_BOOLEAN: /* C89, no bool */ return "unsigned char"; case TYPE_CHAR: /* TODO: Unicode, for now ASCII */ return "unsigned char"; case TYPE_BYTE: return "unsigned char"; default: Abort( "Unknown basic type, no mapping defined to C" ); } } static char *typeToCType( Type *type ) { static char s[MAX_IDENT_LEN]; if( type->kind == TYPE_ARRAY ) { Type *subType = type->type.array.type; /* TODO: must be a basic type for now */ if( subType->kind == TYPE_BASIC ) { sprintf( s, "%s*", basicTypeToCType( subType->type.basic ) ); } else { Abort( "Arrays are currently only possible for basic types" ); } } else if( type->kind == TYPE_BASIC ) { sprintf( s, "%s ", basicTypeToCType( type->type.basic ) ); } else { Abort( "Unknown complex type to C mapping" ); } return s; } /* INTERNAL FUNCTIONS */ static int length( char *name ) { Type *type; type = get_type_symbol( current_scope, name ); if( type != NULL && type->kind == TYPE_ARRAY ) { return type->type.array.len; } else { Abort( "length called on non-array variable '%s'", name ); } } /* PARSER */ static char moduleName[MAX_IDENT_LEN+1]; static char varName[MAX_IDENT_LEN+1]; static char procName[MAX_IDENT_LEN+1]; static void Expect( Scanner_Symbol expect ) { if( sym == expect ) { sym = getSym( ); } else { Abort( "Expected symbol '%s'", symname[expect] ); } } static void emitLn( char *s, ... ) { va_list args; va_start( args, s ); vprintf( s, args ); va_end( args ); puts( "" ); fflush( stdout ); } static void emit( char *s, ... ) { va_list args; va_start( args, s ); vprintf( s, args ); va_end( args ); fflush( stdout ); } static void register_internal_types( void ) { Symbol *symbol; /* basic types */ symbol = insert_symbol( &symbols, "integer" ); symbol->kind = SYMBOL_TYPE; symbol->symbol.type.kind = TYPE_BASIC; symbol = insert_symbol( &symbols, "boolean" ); symbol->kind = SYMBOL_TYPE; symbol->symbol.type.kind = TYPE_BASIC; symbol = insert_symbol( &symbols, "char" ); symbol->kind = SYMBOL_TYPE; symbol->symbol.type.kind = TYPE_BASIC; symbol = insert_symbol( &symbols, "byte" ); symbol->kind = SYMBOL_TYPE; symbol->symbol.type.kind = TYPE_BASIC; /* the any type (for the type in the array for length) */ symbol = insert_symbol( &symbols, "__any" ); symbol->kind = SYMBOL_TYPE; symbol->symbol.type.kind = TYPE_ANY; /* the null type (for procedures not returning a type) */ symbol = insert_symbol( &symbols, "__void" ); symbol->kind = SYMBOL_TYPE; symbol->symbol.type.kind = TYPE_NONE; /* array of any type (for length) */ symbol = insert_symbol( &symbols, "__gen_array_of_any" ); symbol->kind = SYMBOL_TYPE; symbol->symbol.type.kind = TYPE_ARRAY; symbol->symbol.type.type.array.len = 0; symbol->symbol.type.type.array.type = get_type_symbol( &symbols, "__any" ); /* array of any size of char (string, for system.readline) */ symbol = insert_symbol( &symbols, "__gen_array_of_char" ); symbol->kind = SYMBOL_TYPE; symbol->symbol.type.kind = TYPE_ARRAY; symbol->symbol.type.type.array.len = 0; symbol->symbol.type.type.array.type = get_type_symbol( &symbols, "char" ); } static void register_internal_constants( void ) { Symbol *symbol; symbol = insert_symbol( &symbols, "true" ); symbol->kind = SYMBOL_CONSTANT; symbol->symbol.constant.type = get_type_symbol( &symbols, "boolean" ); symbol->symbol.constant.value.boolean = 1; symbol = insert_symbol( &symbols, "false" ); symbol->kind = SYMBOL_CONSTANT; symbol->symbol.constant.type = get_type_symbol( &symbols, "boolean" ); symbol->symbol.constant.value.boolean = 0; } static void register_internal_functions( void ) { Symbol *symbol; /* internal compiler functions, are always internal to the compiler as they need * special treatment */ /* get length of any array */ symbol = insert_symbol( &symbols, "length" ); symbol->kind = SYMBOL_PROCEDURE; symbol->symbol.procedure.return_value = get_type_symbol( &symbols, "integer" ); symbol->symbol.procedure.len = 1; symbol->symbol.procedure.params[0] = get_type_symbol( &symbols, "__gen_array_of_any" ); /* constructor for char type (from type integer) */ symbol = insert_symbol( &symbols, "char" ); symbol->kind = SYMBOL_PROCEDURE; symbol->symbol.procedure.return_value = get_type_symbol( &symbols, "char" ); symbol->symbol.procedure.len = 1; symbol->symbol.procedure.params[0] = get_type_symbol( &symbols, "integer" ); /* TODO: register functions in module system, should be outside in the stage-1 compiler, * should also add a system module symbol first and attach all procedures to it * needs a rewrite of the qualifier matching code, so later.. */ symbol = insert_symbol( &symbols, "system.readchar" ); symbol->kind = SYMBOL_PROCEDURE; symbol->symbol.procedure.return_value = get_type_symbol( &symbols, "char" ); symbol->symbol.procedure.len = 0; symbol = insert_symbol( &symbols, "system.readline" ); symbol->kind = SYMBOL_PROCEDURE; symbol->symbol.procedure.return_value = get_type_symbol( &symbols, "__void" ); symbol->symbol.procedure.len = 1; symbol->symbol.procedure.params[0] = get_type_symbol( &symbols, "__gen_array_of_char" ); } static void init( void ) { col = 1; row = 1; look = getChar( ); ident[0] = '\0'; num = 0; sym = getSym( ); } static void prologue( void ) { emitLn( "/* generated with e2c */" ); emitLn( "#include " ); emitLn( "#include " ); emitLn( "#include " ); emitLn( \ "int getc_wrapper( void ) {" \ " int c = getc( stdin );" \ " if( c == EOF ) {" \ " if( feof( stdin ) ) {" \ " return '\\0';" \ " } else {" \ " fprintf( stderr, \"ERROR: read error\\n\" );" \ " }" \ " } else {" \ " return c;" \ " }" \ "}" ); init_symboltable( &symbols, NULL ); register_internal_types( ); register_internal_constants( ); register_internal_functions( ); } static void epilogue( void ) { emitLn( "int main( void ) {" ); emitLn( "module_%s_init( );", moduleName ); emitLn( "}" ); } /* TODO: don't abuse varName */ static void qualident( void ) { identifier( ); if( sym == S_ident ) { strncpy( varName, ident, MAX_IDENT_LEN ); varName[MAX_IDENT_LEN-1] = '\0'; } sym = getSym( ); if( sym == S_dot ) { sym = getSym( ); if( sym == S_ident ) { strncat( varName, ".", MAX_IDENT_LEN ); varName[MAX_IDENT_LEN-1] = '\0'; strncat( varName, ident, MAX_IDENT_LEN ); varName[MAX_IDENT_LEN-1] = '\0'; sym = getSym( ); } } } static void variableName( void ) { identifier( ); if( sym == S_ident ) { strncpy( varName, ident, MAX_IDENT_LEN ); varName[MAX_IDENT_LEN-1] = '\0'; } } static void expression( void ); static void simpleExpression( void ); static void parameterList( void ); static void factor( void ) { Symbol *symbol; if( sym == S_plus ) { sym = getSym( ); } else if( sym == S_minus ) { sym = getSym( ); emit( "-" ); } else if( sym == S_not ) { sym = getSym( ); emit( "!" ); } if( sym == S_number ) { emit( "%d", num ); sym = getSym( ); } else if( sym == S_string ) { emit( "\"%s\"", str ); sym = getSym( ); } else if( sym == S_ident ) { qualident( ); symbol = get_symbol( current_scope, varName, SYMBOL_ANY ); if( symbol->kind == SYMBOL_TYPE && symbol->symbol.type.kind == TYPE_PROCEDURE ) { parameterList( ); } else if( symbol->kind == SYMBOL_CONSTANT ) { if( symbol->symbol.constant.type->kind == TYPE_BASIC ) { switch( symbol->symbol.constant.type->type.basic ) { case TYPE_BOOLEAN: emit( "%d", symbol->symbol.constant.value.boolean ); break; case TYPE_INTEGER: emit( "%d", symbol->symbol.constant.value.integer ); break; case TYPE_CHAR: emit( "%c", symbol->symbol.constant.value.character ); break; case TYPE_BYTE: emit( "%d", symbol->symbol.constant.value.byte ); break; default: Abort( "Unknown basic type constant '%s'", varName ); } } else { Abort( "Unknown complex constant '%s'", varName ); } } else if( symbol->kind == SYMBOL_TYPE && symbol->symbol.type.kind == TYPE_ARRAY ) { if( sym == S_lbracket ) { sym = getSym( ); emit( "%s[", varName ); simpleExpression( ); Expect( S_rbracket ); emit( "]" ); } else { emit( "%s", varName ); } } else if( symbol->kind == SYMBOL_VARIABLE ) { emit( "%s", varName ); } else { Abort( "Unkown factor symbol '%s'", varName ); } } else if( sym == S_lparen ) { emit( "(" ); sym = getSym( ); expression( ); Expect( S_rparen ); emit( ")" ); } else { Abort( "Expected expression" ); } } static void term( void ) { factor( ); while( sym == S_star || sym == S_slash || sym == S_and ) { if( sym == S_star ) emit( "*" ); if( sym == S_slash ) emit( "/" ); if( sym == S_and ) emit( " && " ); sym = getSym( ); factor( ); } } static void simpleExpression( void ) { term( ); while( sym == S_plus || sym == S_minus || sym == S_or ) { if( sym == S_plus ) emit( "+" ); if( sym == S_minus ) emit( "-" ); if( sym == S_or ) emit( " || " ); sym = getSym( ); term( ); } } static int isRelationalOperator( Scanner_Symbol sym ) { if( sym == S_less || sym == S_less_or_equals || sym == S_equals || sym == S_more || sym == S_more_or_equals || sym == S_not_equals ) { return 1; } else { return 0; } } static void relationalOperator( void ) { if( sym == S_less ) { emit( " < " ); } else if( sym == S_less_or_equals ) { emit( " <= " ); } else if( sym == S_equals ) { emit( " == " ); } else if( sym == S_more_or_equals ) { emit( " >= " ); } else if( sym == S_more ) { emit( " > " ); } else if( sym == S_not_equals ) { emit( " != " ); } } static void expression( void ) { simpleExpression( ); if( isRelationalOperator( sym ) ) { relationalOperator( ); sym = getSym( ); simpleExpression( ); } } static int selected_array; static void selector( void ) { Type *type; selected_array = 0; if( sym == S_lbracket ) { Expect( S_lbracket ); type = get_type_symbol( current_scope, varName ); if( type != NULL && type->kind != TYPE_ARRAY ) { Abort( "Selecting element of non-array" ); } selected_array = 1; emit( "%s[", varName ); expression( ); Expect( S_rbracket ); emit( "]" ); } /* TODO: here we had record field selector later */ } static void assignment( void ) { Type *type; /* left hand side */ /* precondition: qualident has been already parsed outside */ selector( ); type = get_type_symbol( current_scope, varName ); if( type == NULL ) { Abort( "Unknown type for variable '%s'", varName ); } /* x := ( a+ b )- 3; -> x = (a+b)-3; * s1 := s2; -> strncpy( s2, s1, length( s2 ) ); * s[i] := c; -> s[i] = c; * s[i] := char( 0 ); -> s[i] = '\0'; * s.str[index(18)+2] := 'a' + f(offset+2); */ /* special case: array of char on both sides comes heavily in the * way without an AST in-between */ if( type->kind != TYPE_ARRAY && type->type.array.type->kind == TYPE_BASIC && type->type.array.type->type.basic == TYPE_CHAR ) { emit( "strncpy( %s, ", varName ); } else { if( selected_array ) { emit( " = ", varName ); } else { emit( "%s = ", varName ); } } Expect( S_assign ); /* emit( " = " ); */ /* right hand side, any expression */ expression( ); if( type->kind != TYPE_ARRAY && type->type.array.type->kind == TYPE_BASIC && type->type.array.type->type.basic == TYPE_CHAR ) { emit( ", %d ); ", type->type.array.len ); emit( "%s[%d-1] = '\\0'", varName, type->type.array.len ); } emitLn( ";" ); } static void statementSequence( void ); static void doIf( void ) { emit( "if( " ); expression( ); emitLn( ") {" ); Expect( S_do ); statementSequence( ); if( sym == S_else ) { emitLn( " } else {" ); sym = getSym( ); statementSequence( ); } Expect( S_end ); emitLn( "}" ); } static void statementBlock( void ); void static doFor( void ) { char loopVar[MAX_IDENT_LEN]; emit( "for( " ); identifier( ); strncpy( loopVar, ident, MAX_IDENT_LEN ); loopVar[MAX_IDENT_LEN-1] = '\0'; emit( " %s = ", loopVar ); sym = getSym( ); Expect( S_assign ); simpleExpression( ); emit( "; %s <= ", loopVar ); Expect( S_to ); simpleExpression( ); /* TODO: add "by" constExpression if needed */ emitLn( "; %s++ ) {", loopVar ); Expect( S_do ); statementSequence( ); Expect( S_end ); emitLn( "}" ); } static void doWhile( void ) { emit( "while( " ); expression( ); emitLn( ") {" ); Expect( S_do ); statementSequence( ); Expect( S_end ); emitLn( "}" ); } static void doReturn( void ) { emit( "return " ); expression( ); emitLn( ";" ); } static void parameterList( void ) { char funcName[MAX_IDENT_LEN]; int n = 0; /* varName contains the function/procedure name, * not the best name, actually the precondition is * to have parsed a qualident */ strncpy( funcName, varName, MAX_IDENT_LEN ); funcName[MAX_IDENT_LEN-1] = '\0'; Expect( S_lparen ); if( sym == S_rparen ) { if( strcmp( funcName, "system.readchar" ) == 0 ) { emit( "getc_wrapper( )" ); sym = getSym( ); return; } emit( "%s( )", funcName ); sym = getSym( ); return; } /* prologue of 1-parameter function or procedure */ if( sym != S_comma ) { /* handle internal 0-parameter functions and procedures */ /* TODO: add internal function maps as symbols and procedures */ if( strcmp( funcName, "length" ) == 0 ) { /* we don't allow expression here, only simple variables */ qualident( ); emit( "%d", length( varName ) ); Expect( S_rparen ); return; /* constructor for non-printable characters */ } else if( strcmp( funcName, "char" ) == 0 ) { number( ); emit( "(char)%d", num ); sym = getSym( ); Expect( S_rparen ); return; } else if( strcmp( funcName, "system.writeline" ) == 0 ) { emit( "printf( \"%%s\\n\", " ); } else if( strcmp( funcName, "system.writestring" ) == 0 ) { emit( "printf( \"%%s\", " ); } else if( strcmp( funcName, "system.writeinteger" ) == 0 ) { emit( "printf( \"%%d\", " ); } else if( strcmp( funcName, "system.writechar" ) == 0 ) { emit( "printf( \"%%c\", " ); } else if( strcmp( funcName, "system.halt" ) == 0 ) { emit( "exit( " ); } else if( strcmp( funcName, "system.readline" ) == 0 ) { /* Type type = get_symbol_type( varName ); */ /* TODO: check if parameter is an array of char, get length of the defined array and put it into the parameter of getline */ emit( "{ size_t _n = %d; fgets( (char *)&", 255 ); } else { emit( "%s( ", funcName ); } } /* TODO: no VAR parameters, strictly pass-by-value */ simpleExpression( ); n = 1; while( sym == S_comma ) { n++; emit( ", " ); sym = getSym( ); /* TODO: no VAR parameters, strictly pass-by-value */ simpleExpression( ); } /* epilogue of 1-parameter function or procedure */ if( n == 1 ) { if( strcmp( funcName, "system.readline" ) == 0 ) { emit( ", _n, stdin ); }" ); } else { emit( ")" ); } } else { emit( ")" ); } Expect( S_rparen ); } static void statement( void ) { if( sym == S_if ) { sym = getSym( ); doIf( ); } else if( sym == S_for ) { sym = getSym( ); doFor( ); } else if( sym == S_while ) { sym = getSym( ); doWhile( ); } else if( sym == S_return ) { sym = getSym( ); doReturn( ); /* TODO: S_else feels wrong here, just for the end ';' in an if-block */ } else if( sym == S_end || sym == S_else ) { return; } else if( sym == S_ident ) { qualident( ); if( sym == S_lparen ) { /* procedure call with parameter */ parameterList( ); emitLn( ";" ); } else { Symbol *symbol = get_symbol( current_scope, varName, SYMBOL_ANY ); if( symbol->kind == SYMBOL_PROCEDURE ) { /* procedure call without parameter */ /* TODO: check number of parameter and return value to be 0 */ emitLn( "%s( );", varName ); } else if( symbol->kind == SYMBOL_VARIABLE ) { assignment( ); } else { Abort( "Expected variable for assignment or a procedure for a procedure call for '%s'", varName ); } } } else { Abort( "Illegal statement" ); } } static void statementSequence( void ) { statement( ); if( sym == S_end ) return; while( sym == S_semicolon ) { sym = getSym( ); statement( ); } } static void statementBlock( void ) { Expect( S_begin ); statementSequence( ); if( sym == S_return ) { sym = getSym( ); identifier( ); } Expect( S_end ); } static Type *doType( void ); static Type *simpleType( void ) { Type *type; type = get_type_symbol( current_scope, ident ); if( type != NULL && type->kind == TYPE_BASIC ) { sym = getSym( ); return type; } else { Abort( "Unknown type '%s'", ident ); } } static Type *arrayType( void ) { char typeName[MAX_STRING_LEN]; Type *type; Type *basicType; sym = getSym( ); Expect( S_lbracket ); /* TODO: should be a const expression */ number( ); if( num == 0 ) { Abort( "array of size 0 makes no sense" ); } sym = getSym( ); Expect( S_rbracket ); /* array size is in num */ if( sym == S_of ) { sym = getSym( ); basicType = doType( ); /* TODO here: this is an anonymous type of an array of len and basic type */ sprintf( typeName, "__array_%d_of_%s", num, ident ); type = get_type_symbol( current_scope, typeName ); if( type == NULL ) { Symbol *symbol = insert_symbol( current_scope, typeName ); symbol->kind = SYMBOL_TYPE; symbol->symbol.type.kind = TYPE_ARRAY; symbol->symbol.type.type.array.len = num; symbol->symbol.type.type.array.type = basicType; type = &symbol->symbol.type; } return type; } else { Abort( "of' expected in array definition" ); } } static Type *doType( ) { identifier( ); if( sym == S_ident ) { return simpleType( ); } else if( sym == S_array ) { return arrayType( ); } } static void constDeclaration( void ) { /* TODO: constName? */ variableName( ); sym = getSym( ); Expect( S_equals ); /* TODO: ConstExpression requires an interpreter */ if( sym == S_number ) { Symbol *symbol; number( ); sym = getSym( ); symbol = insert_symbol( &symbols, varName ); symbol->kind = SYMBOL_CONSTANT; /* TODO: deduce type from value (byte/integer)? */ symbol->symbol.constant.type = get_type_symbol( &symbols, "integer" ); symbol->symbol.constant.value.integer = num; } else { Abort( "Supporting numeric constants only" ); } } static void constBlock( void ) { Expect( S_const ); constDeclaration( ); if( sym == S_begin || sym == S_var ) return; while( sym == S_semicolon ) { sym = getSym( ); if( sym == S_ident ) { constDeclaration( ); } else if( sym == S_begin || sym == S_var ) { return; } } } static void variableDeclaration( void ) { Type *type; Symbol *symbol; variableName( ); sym = getSym( ); Expect( S_colon ); type = doType( ); symbol = insert_symbol( current_scope, varName ); symbol->kind = SYMBOL_VARIABLE; symbol->symbol.variable.type = type; if( type->kind == TYPE_ARRAY ) { if( type->type.array.type->kind == TYPE_BASIC ) { /* TODO: this works for now, though it's not correct */ emitLn( "static %s %s[%d];", basicTypeToCType( type->type.array.type->type.basic ), varName, num ); } else { Abort( "Declaring an array '%s' with no basic type is not supported", varName ); } } else { if( type->type.array.type->kind == TYPE_BASIC ) { emitLn( "static %s %s;", basicTypeToCType( type->type.array.type->type.basic ), varName ); } else { Abort( "User defined types not supported" ); } } } static void variableBlock( void ) { Expect( S_var ); variableDeclaration( ); if( sym == S_begin ) return; while( sym == S_semicolon ) { sym = getSym( ); if( sym == S_ident ) { variableDeclaration( ); } else if( sym == S_begin ) { return; } } } static void procedureName( void ) { identifier( ); if( sym == S_ident ) { strncpy( procName, ident, MAX_IDENT_LEN ); procName[MAX_IDENT_LEN-1] = '\0'; } } static void procedureDeclaration( void ) { char typeName[MAX_STRING_LEN]; char return_type[MAX_IDENT_LEN]; Symbol *symbol; Type *funcType; SymbolTable locals; init_symboltable( &locals, current_scope ); Expect( S_procedure ); procedureName( ); symbol = insert_symbol( &locals, procName ); symbol->kind = TYPE_PROCEDURE; symbol->symbol.procedure.len = 0; #if 0 //~ symbol->symbol.procedure.return_value = get_type_symbol( &symbols, "integer" ); //~ symbol->symbol.procedure.params[0] = get_type_symbol( &symbols, "__gen_array_of_any" ); #endif sym = getSym( ); if( sym == S_lparen ) { sym = getSym( ); /* TODO: || S_var later */ if( sym == S_ident ) { do { if( symbol->symbol.procedure.len >= MAX_PARAMETERS ) { Abort( "Too many parameters in definition of procedure" ); } identifier( ); /* NO: new way, remember all local names and types, eventually definiting * new types. Then when entering the VAR section of the procedure, add * the parameters as local variables BEFORE the definition of new locals! */ /* define local variable of that type */ /* TODO: just in the case of a following block, otherwise we have a forward * declaraition */ /* lookup types, define local variables as parameters */ /* define the procedure with pointers to eventually new (anonymous) types */ /* TODO from here */ /* new? sprintf( typeName, sprintf( typeName, "__array_%d_of_%s", num, ident ); type = get_type_symbol( current_scope, typeName ); if( type == NULL ) { Symbol *symbol = insert_symbol( current_scope, typeName ); symbol->kind = SYMBOL_TYPE; symbol->symbol.type.kind = TYPE_ARRAY; symbol->symbol.type.type.array.len = num; symbol->symbol.type.type.array.type = basicType; type = &symbol->symbol.type; } */ symbol->symbol.procedure.params[symbol->symbol.procedure.len].kind = SYMBOL_TYPE; funcType.details.function.params[funcType.details.function.len] = (Type *)( &funcType.details.function.data + sizeof( Type ) * funcType.details.function.len ); strncpy( funcType.details.function.params[funcType.details.function.len]->name, ident, MAX_IDENT_LEN ); funcType.details.function.params[funcType.details.function.len]->name[MAX_IDENT_LEN-1] = '\0'; sym = getSym( ); Expect( S_colon ); if( sym == S_array ) { sym = getSym( ); Expect( S_of ); doType( ); funcType.details.function.params[funcType.details.function.len]->type = TYPE_ARRAY; funcType.details.function.params[funcType.details.function.len]->details.array.len = 0; funcType.details.function.params[funcType.details.function.len]->details.array.type = (Type *)&funcType.details.function.params[funcType.details.function.len]->details.array.data; funcType.details.function.params[funcType.details.function.len]->details.array.type->type = lastType.type; } else { doType( ); funcType.details.function.params[funcType.details.function.len]->type = lastType.type; } insert_symbol( &locals, *funcType.details.function.params[funcType.details.function.len], funcType.details.function.params[funcType.details.function.len]->name ); funcType.details.function.len++; } while( sym == S_comma ); } Expect( S_rparen ); /* do not allow empty parameter lists in C++ style */ if( funcType.details.function.len == 0 ) { Abort( "Empty parameter list must not be enclosed with ( )" ); } } if( sym == S_colon ) { sym = getSym( ); simpleType( ); strncpy( return_type, basicTypeToCType( lastType.type ), MAX_IDENT_LEN ); } else { strncpy( return_type, "void", MAX_IDENT_LEN ); } Expect( S_semicolon ); insert_symbol( current_scope, funcType, procName ); current_scope = &locals; emit( "%s %s( ", return_type, procName ); if( funcType.details.function.len == 0 ) { emit( "void" ); } else { int i; for( i = 0; i < funcType.details.function.len; i++ ) { emit( "%s%s ", typeToCType( funcType.details.function.params[i] ), funcType.details.function.params[i]->name ); } } emitLn( ") {" ); if( sym == S_var ) { variableBlock( ); } if( sym == S_begin ) { statementBlock( ); } current_scope = current_scope->parent; emitLn( "}" ); } static void procedureBlock( void ) { while( sym == S_procedure ) { procedureDeclaration( ); } } static void declarationBlock( void ) { if( sym == S_const ) { constBlock( ); } if( sym == S_var ) { variableBlock( ); } procedureBlock( ); } static void handleImport( void ) { fprintf( stderr, "Importing module '%s'\n", ident ); } static void importBlock( void ) { Expect( S_import ); identifier( ); handleImport( ); sym = getSym( ); while( sym == S_comma ) { identifier( ); sym = getSym( ); handleImport( ); sym = getSym( ); } Expect( S_semicolon ); } static void doModule( void ) { Expect( S_module ); if( sym == S_ident ) { strncpy( moduleName, ident, MAX_IDENT_LEN ); moduleName[MAX_IDENT_LEN-1] = '\0'; } Expect( S_ident ); Expect( S_semicolon ); if( sym == S_import ) { importBlock( ); } declarationBlock( ); emitLn( "void module_%s_init( ) {", moduleName ); statementBlock( ); emitLn( "}" ); } int main( void ) { prologue( ); init( ); doModule( ); epilogue( ); exit( EXIT_SUCCESS ); }