/* constants */ enum { MAX_IDENT_LEN = 20, MAX_NOF_REGISTERS = 6 }; /* scanner */ enum { S_PLUS = 1, S_MINUS, S_STAR, S_SLASH, S_SEMICOLON, S_LPAREN, S_RPAREN, S_LBRACE, S_RBRACE, S_ASSIGN, S_EQUALS, S_NOT_EQUALS, S_LESS, S_LESS_OR_EQUALS, S_MORE, S_MORE_OR_EQUALS, S_INT = 50, S_IF, S_ELSE, S_WHILE, S_DO, S_PUTINT, S_IDENT = 60, S_NUM = 70, S_EOI = 98, S_ERR = 99 }; struct Scanner { int col; int row; int c; int pushback; int token; int debug; int num; char *ident; }; void scannerPrintErrorHeader( struct Scanner *scanner ) { putnl( ); putstring( "Error line " ); putint( scanner->row ); putstring( ", pos " ); putint( scanner->col ); putstring( ": " ); } void scannerPrintState( struct Scanner *scanner ) { putint( scanner->row ); putchar( '/' ); putint( scanner->col ); putstring( ": " ); putint( scanner->token ); if( scanner->token == S_NUM ) { putchar( '(' ); putint( scanner->num ); putchar( ')' ); } putnl( ); } struct Scanner *createScanner( ) { struct Scanner *scanner; scanner = (struct Scanner *)malloc( sizeof( struct Scanner ) ); scanner->col = 0; scanner->row = 1; scanner->c = EOF; scanner->pushback = 0; scanner->ident = (char *)malloc( MAX_IDENT_LEN + 1 ); scanner->debug = 0; return scanner; } void freeScanner( struct Scanner *scanner ) { free( scanner->ident ); free( (char *)scanner ); } void pushBack( struct Scanner *scanner ) { scanner->pushback = scanner->c; } int getChar( struct Scanner *scanner ) { if( scanner->pushback ) { scanner->c = scanner->pushback; scanner->pushback = 0; return scanner->c; } scanner->c = getchar( ); if( scanner->c == EOF ) { return scanner->c; } scanner->col++; if( scanner->c == '\n' ) { scanner->col = 0; scanner->row++; } return scanner->c; } int skipWhite( struct Scanner *scanner ) { scanner->c = getChar( scanner ); while( isspace( scanner->c ) ) { scanner->c = getChar( scanner ); } return scanner->c; } void scanNumber( struct Scanner *scanner ) { scanner->num = scanner->c - '0'; scanner->c = getChar( scanner ); while( isdigit( scanner->c ) ) { scanner->num = 10 * scanner->num + ( scanner->c - '0' ); scanner->c = getChar( scanner ); } pushBack( scanner ); } void scanIdent( struct Scanner *scanner ) { int n; n = 0; while( isalnum( scanner->c ) || ( scanner->c == '_' ) ) { scanner->ident[n] = scanner->c; n++; if( n >= MAX_IDENT_LEN - 1 ) { scannerPrintErrorHeader( scanner ); putstring( "too long identifier" ); putnl( ); exit( EXIT_FAILURE ); } scanner->c = getChar( scanner ); } scanner->ident[n] = 0; /* c4 doesn't handle '\0' */ pushBack( scanner ); } int keyword( char *ident ) { switch( ident[0] ) { case 'd': if( strcmp( ident, "do" ) == 0 ) { return S_DO; } break; case 'e': if( strcmp( ident, "else" ) == 0 ) { return S_ELSE; } break; case 'i': if( strcmp( ident, "if" ) == 0 ) { return S_IF; } else if( strcmp( ident, "int" ) == 0 ) { return S_INT; } break; case 'p': if( strcmp( ident, "putint" ) == 0 ) { return S_PUTINT; } break; case 'w': if( strcmp( ident, "while" ) == 0 ) { return S_WHILE; } break; } return 0; } int getToken( struct Scanner *scanner ) { int c; c = skipWhite( scanner ); switch( c ) { case EOF: scanner->token = S_EOI; break; case '+': scanner->token = S_PLUS; break; case '-': scanner->token = S_MINUS; break; case '*': scanner->token = S_STAR; break; case '/': c = getChar( scanner ); if( c == '/' ) { while( c != EOF && c != '\n' ) { c = getChar( scanner ); } scanner->token = getToken( scanner ); } else if( c == '*' ) { do { while( c != EOF && c != '*' ) { c = getChar( scanner ); } c = getChar( scanner ); } while( c != EOF && c != '/' ); c = getChar( scanner ); scanner->token = getToken( scanner ); } else { pushBack( scanner ); scanner->token = S_SLASH; } break; case ';': scanner->token = S_SEMICOLON; break; case '=': c = getChar( scanner ); if( c == '=' ) { scanner->token = S_EQUALS; } else { pushBack( scanner ); scanner->token = S_ASSIGN; } break; case '!': c = getChar( scanner ); if( c == '=' ) { scanner->token = S_NOT_EQUALS; } else { pushBack( scanner ); } break; case '<': c = getChar( scanner ); if( c == '=' ) { scanner->token = S_LESS_OR_EQUALS; } else { scanner->token = S_LESS; } break; case '>': c = getChar( scanner ); if( c == '=' ) { scanner->token = S_MORE_OR_EQUALS; } else { scanner->token = S_MORE; } break; case '(': scanner->token = S_LPAREN; break; case ')': scanner->token = S_RPAREN; break; case '{': scanner->token = S_LBRACE; break; case '}': scanner->token = S_RBRACE; break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': scanNumber( scanner ); scanner->token = S_NUM; break; case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u': case 'v': case 'w': case 'x': case 'y': case 'z': case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': case 'Y': case 'Z': case '_': scanIdent( scanner ); if( ( scanner->token = keyword( scanner->ident ) ) ) { } else { scanner->token = S_IDENT; } break; default: scanner->token = S_ERR; scannerPrintErrorHeader( scanner ); putstring( "unknown token '" ); putchar( c ); putstring( "'" ); putnl( ); exit( EXIT_FAILURE ); } if( scanner->debug ) { scannerPrintState( scanner ); } return scanner->token; } /* symbol table */ struct Symbol { char *name; struct Symbol *next; }; struct Symbol *createSymbol( char *s ) { struct Symbol *sym; sym = (struct Symbol *)malloc( sizeof ( struct Symbol ) ); sym->name = strdup( s ); sym->next = NULL; return sym; } void freeSymbol( struct Symbol *sym ) { free( sym->name ); free( (char *)sym ); } /* scope */ struct Scope { char *name; struct Symbol *sym; int labelNo; }; struct Scope *createScope( char *name ) { struct Scope *scope; scope = (struct Scope *)malloc( sizeof( struct Scope ) ); scope->name = strdup( name ); scope->sym = NULL; scope->labelNo = 0; return scope; } void freeScope( struct Scope *scope ) { struct Symbol *sym, *next; free( scope->name ); sym = scope->sym; while( sym != NULL ) { next = sym->next; freeSymbol( sym ); sym = next; } free( (char *)scope ); } struct Symbol *getSymbol( struct Scope *scope, char *name ) { struct Symbol *sym; sym = scope->sym; while( sym != NULL ) { if( strcmp( name, sym->name ) == 0 ) { return sym; } sym = sym->next; } return NULL; } void insertSymbol( struct Scope *scope, struct Symbol *sym ) { if( scope->sym == NULL ) { scope->sym = sym; } else { sym->next = scope->sym; scope->sym = sym; } } /* AST */ enum { A_INT_LITERAL = 1, A_IDENT, A_LVIDENT, A_ADD = 10, A_SUBTRACT, A_MULTIPLY, A_DIVIDE, A_ASSIGN, A_EQUALS, A_NOT_EQUALS, A_LESS, A_LESS_OR_EQUALS, A_MORE, A_MORE_OR_EQUALS, A_ERR = 99 }; struct ASTnode { int op; /* node operation, one of the A_ enums */ struct ASTnode *left; /* left side operand */ struct ASTnode *right; /* right side operand (or empty for unary operations */ int intval; /* value in case of A_INT_LITERAL */ struct Symbol *sym; /* symbol in case of A_IDENT */ }; struct ASTnode *createASTnode( int op, struct ASTnode *left, struct ASTnode *right, int intval, struct Symbol *sym ) { struct ASTnode *node; node = (struct ASTnode *)malloc( sizeof( struct ASTnode ) ); node->op = op; node->left = left; node->right = right; node->intval = intval; node->sym = sym; return node; } struct ASTnode *createASTbinary( int op, struct ASTnode *left, struct ASTnode *right ) { return createASTnode( op, left, right, 0, NULL ); } struct ASTnode *createASTleafInt( int op, int intval ) { return createASTnode( op, NULL, NULL, intval, NULL ); } struct ASTnode *createASTleafSym( int op, struct Symbol *sym ) { return createASTnode( op, NULL, NULL, 0, sym ); } void freeASTnode( struct ASTnode *node ) { if( node->left != NULL ) { freeASTnode( node->left ); } if( node->right != NULL ) { freeASTnode( node->right ); } free( (char *)node ); } /* parser/compiler, no forward declarations */ struct Parser { int token; struct Scanner *scanner; struct Scope *global_scope; int debug; }; struct Generator { struct Scanner *scanner; char **regName; /* names of the registers */ int *regFree; /* free registers */ int spillReg; /* index of the current spilled register */ int debug; /* adds debug output to generated assembly */ }; struct Compiler { struct Parser *parser; struct Generator *generator; }; /* code generation */ enum { EAX = 0, EBX, ECX, EDX, ESI, EDI, NOREG = -1 }; struct Generator *createGenerator( struct Scanner *scanner ) { struct Generator *generator; int i; generator = (struct Generator *)malloc( sizeof( struct Generator ) ); generator->scanner = scanner; generator->regName = (char **)malloc( sizeof( char * ) * MAX_NOF_REGISTERS ); generator->regName[0] = strdup( "eax" ); generator->regName[1] = strdup( "ebx" ); generator->regName[2] = strdup( "ecx" ); generator->regName[3] = strdup( "edx" ); generator->regName[4] = strdup( "esi" ); generator->regName[5] = strdup( "edi" ); generator->regFree = (int *)malloc( sizeof( int ) * MAX_NOF_REGISTERS ); i = 0; while( i < MAX_NOF_REGISTERS ) { generator->regFree[i] = 1; i++; } generator->spillReg = 0; generator->debug = 0; return generator; } void freeGenerator( struct Generator *generator ) { int i; i = 0; while( i < MAX_NOF_REGISTERS ) { free( generator->regName[i] ); i++; } free( (char *)generator->regName ); free( (char *)generator->regFree ); free( (char *)generator ); } void putreg( struct Generator *generator, int reg ) { putstring( generator->regName[reg] ); } void putlow8reg( struct Generator *generator, int reg ) { switch( reg ) { case EAX: putstring( "al" ); break; case EBX: putstring( "bl" ); break; case ECX: putstring( "cl" ); break; case EDX: putstring( "dl" ); break; default: scannerPrintErrorHeader( generator->scanner ); putstring( "error using low 8-bit subreguster of reguster '" ); putstring( generator->regName[reg] ); putstring( "'" ); putnl( ); exit( EXIT_FAILURE ); } } void genAddGlob( struct Generator *generator, char *ident ) { putstring( ident ); putstring( ": " ); putstring( "dd $0" ); putnl( ); } void genSaveReg( struct Generator *generator, int reg ) { putstring( "push " ); putreg( generator, reg ); putnl( ); } void genRestoreReg( struct Generator *generator, int reg ) { putstring( "pop " ); putreg( generator, reg ); putnl( ); } int genAllocReg( struct Generator *generator ) { int reg; reg = 0; while( reg < MAX_NOF_REGISTERS ) { if( generator->regFree[reg] ) { generator->regFree[reg] = 0; if( generator->debug ) { putstring( "; allocating register " ); putreg( generator, reg ); putnl( ); } return reg; } reg++; } if( reg >= MAX_NOF_REGISTERS ) { /* simplistic spilling strategy to stack, as described in https://github.com/DoctorWkt/acwj/blob/master/54_Reg_Spills/Readme.md */ reg = generator->spillReg % MAX_NOF_REGISTERS; generator->spillReg++; if( generator->debug ) { putstring( "; spilling register " ); putreg( generator, reg ); putstring( " to stack, allocating register" ); putnl( ); } genSaveReg( generator, reg ); } return reg; } void genFreeReg( struct Generator *generator, int reg ) { int r; if( generator->spillReg > 0 ) { generator->spillReg--; r = ( generator->spillReg % MAX_NOF_REGISTERS ); if( generator->debug ) { putstring( "; restoring spilled register " ); putreg( generator, reg ); putstring( " from stack" ); putnl( ); } genRestoreReg( generator, r ); } else { if( generator->regFree[reg] == 0 ) { generator->regFree[reg] = 1; if( generator->debug ) { putstring( "; deallocating register " ); putreg( generator, reg ); putnl( ); } } else { scannerPrintErrorHeader( generator->scanner ); putstring( "error freeing non-allocated register '" ); putstring( generator->regName[reg] ); putstring( "'" ); putnl( ); exit( EXIT_FAILURE ); } } } void genFreeAllRegs( struct Generator *generator ) { int i; i = 0; while( i < MAX_NOF_REGISTERS ) { if( generator->regFree[i] == 0 ) { generator->regFree[i] = 1; if( generator->debug ) { putstring( "; deallocating register " ); putreg( generator, i ); putnl( ); } } i++; } } int genLoadImm( struct Generator *generator, int intval ) { int reg; reg = genAllocReg( generator ); putstring( "mov " ); putreg( generator, reg ); putstring( ", " ); putint( intval ); putnl( ); return reg; } int genLoadIdent( struct Generator *generator, char *ident ) { int reg; reg = genAllocReg( generator ); putstring( "mov " ); putreg( generator, reg ); putstring( ", [" ); putstring( ident ); putstring( "]" ); putnl( ); return reg; } int genStoreIdent( struct Generator *generator, int reg, char *ident ) { putstring( "mov [" ); putstring( ident ); putstring( "], " ); putreg( generator, reg ); putnl( ); return reg; } int genBinary( char *op, struct Generator *generator,int leftreg, int rightreg, int implicit ) { putstring( op ); putchar( ' ' ); if( !implicit ) { putreg( generator, leftreg ); putstring( ", " ); } putreg( generator, rightreg ); putnl( ); genFreeReg( generator, rightreg ); return leftreg; } int genAdd( struct Generator *generator, int leftreg, int rightreg ) { return genBinary( "add", generator, leftreg, rightreg, 0 ); } int genSub( struct Generator *generator, int leftreg, int rightreg ) { return genBinary( "sub", generator, leftreg, rightreg, 0 ); } int genMul( struct Generator *generator, int leftreg, int rightreg ) { int reg; if( leftreg == EAX ) { reg = genBinary( "mul", generator, leftreg, rightreg, 1 ); } else { if( !generator->regFree[EAX] ) { genSaveReg( generator, EAX ); } if( leftreg != EDX && !generator->regFree[EDX] ) { genSaveReg( generator, EDX ); } if( rightreg == EAX ) { genSaveReg( generator, EBX ); putstring( "mov ebx, eax" ); putnl( ); } putstring( "mov eax, " ); putreg( generator, leftreg ); putnl( ); if( rightreg == EAX ) { reg = genBinary( "mul", generator, EAX, EBX, 1 ); genRestoreReg( generator, EBX ); } else { reg = genBinary( "mul", generator, EAX, rightreg, 1 ); } putstring( "mov " ); putreg( generator, leftreg ); putstring( ", eax" ); putnl( ); if( leftreg != EDX && !generator->regFree[EDX] ) { genRestoreReg( generator, EDX ); } if( !generator->regFree[EAX] ) { genRestoreReg( generator, EAX ); } reg = leftreg; } return reg; } int genDiv( struct Generator *generator, int leftreg, int rightreg ) { int reg; if( leftreg == EAX ) { if( !generator->regFree[EDX] ) { genSaveReg( generator, EDX ); } putstring( "mov edx, 0" ); putnl( ); reg = genBinary( "div", generator, leftreg, rightreg, 1 ); } else { if( !generator->regFree[EAX] ) { genSaveReg( generator, EAX ); } if( !generator->regFree[EDX] ) { genSaveReg( generator, EDX ); } if( rightreg == EAX ) { genSaveReg( generator, EBX ); putstring( "mov ebx, eax" ); putnl( ); } putstring( "mov eax, " ); putreg( generator, leftreg ); putnl( ); putstring( "mov edx, 0" ); putnl( ); if( rightreg == EAX ) { reg = genBinary( "div", generator, EAX, EBX, 1 ); genRestoreReg( generator, EBX ); } else { reg = genBinary( "div", generator, EAX, rightreg, 1 ); } putstring( "mov " ); putreg( generator, leftreg ); putstring( ", eax" ); putnl( ); if( !generator->regFree[EDX] ) { genRestoreReg( generator, EDX ); } if( !generator->regFree[EAX] ) { genRestoreReg( generator, EAX ); } reg = leftreg; } return reg; } int genCompare( char *op, struct Generator *generator, int leftreg, int rightreg ) { putstring( "cmp " ); putreg( generator, leftreg ); putstring( ", " ); putreg( generator, rightreg ); putnl( ); putstring( op ); putchar( ' ' ); putlow8reg( generator, leftreg ); putnl( ); putstring( "and " ); putreg( generator, leftreg ); putstring( ", $FF" ); putnl( ); genFreeReg( generator, rightreg ); return leftreg; } int generateFromAST( struct Generator *generator, struct ASTnode *node, int inreg ) { int leftreg, rightreg, reg; reg = NOREG; leftreg = NOREG; rightreg = NOREG; if( node->left != NULL ) { leftreg = generateFromAST( generator, node->left, NOREG ); } if( node->right != NULL ) { rightreg = generateFromAST( generator, node->right, leftreg ); } switch( node->op ) { case A_INT_LITERAL: reg = genLoadImm( generator, node->intval ); break; case A_IDENT: reg = genLoadIdent( generator, node->sym->name ); break; case A_LVIDENT: reg = genStoreIdent( generator, inreg, node->sym->name ); break; case A_ADD: reg = genAdd( generator, leftreg, rightreg ); break; case A_SUBTRACT: reg = genSub( generator, leftreg, rightreg ); break; case A_MULTIPLY: reg = genMul( generator, leftreg, rightreg ); break; case A_DIVIDE: reg = genDiv( generator, leftreg, rightreg ); break; case A_ASSIGN: reg = inreg; break; case A_EQUALS: reg = genCompare( "sete", generator, leftreg, rightreg ); break; case A_NOT_EQUALS: reg = genCompare( "setne", generator, leftreg, rightreg ); break; case A_LESS_OR_EQUALS: reg = genCompare( "setle", generator, leftreg, rightreg ); break; case A_LESS: reg = genCompare( "setl", generator, leftreg, rightreg ); break; case A_MORE: reg = genCompare( "setg", generator, leftreg, rightreg ); break; case A_MORE_OR_EQUALS: reg = genCompare( "setge", generator, leftreg, rightreg ); break; default: putint( node->op ); putstring( "?" ); break; } return reg; } void genPrologue( struct Compiler *compiler ) { if( compiler->parser->debug ) { putstring( "; prologue" ); putnl( ); } putstring( "format binary" ); putnl( ); putstring( "use32" ); putnl( ); putstring( "org $1000000" ); putnl( ); putstring( "jmp _start" ); putnl( ); putstring( "putint:" ); putnl( ); putstring( "mov esi, 0" ); putnl( ); putstring( "lea edi, [putint_string_fmt]" ); putnl( ); putstring( "itoa_init:" ); putnl( ); putstring( "mov byte [edi], 0x20" ); putnl( ); putstring( "inc edi"); putnl( ); putstring( "inc esi"); putnl( ); putstring( "cmp esi, 9" ); putnl( ); putstring( "jne itoa_init" ); putnl( ); putstring( "inc esi"); putnl( ); putstring( "itoa_loop:" ); putnl( ); putstring( "mov edx, 0" ); putnl( ); putstring( "div esi" ); putnl( ); putstring( "add edx, 0x30" ); putnl( ); putstring( "mov byte [edi], dl" ); putnl( ); putstring( "dec edi" ); putnl( ); putstring( "cmp eax, 0" ); putnl( ); putstring( "jnz itoa_loop" ); putnl( ); putstring( "mov eax, 4" ); putnl( ); putstring( "mov ebx, 1" ); putnl( ); putstring( "mov ecx, putint_string_fmt" ); putnl( ); putstring( "mov edx, 11" ); putnl( ); putstring( "int 0x80" ); putnl( ); putstring( "ret" ); putnl( ); } void genEpilogue( struct Compiler *compiler ) { struct Symbol *sym; if( compiler->parser->debug ) { putstring( "; entry point" ); putnl( ); } putstring( "_start:" ); putnl( ); putstring( "call main" ); putnl( ); putstring( "hlt" ); putnl( ); sym = compiler->parser->global_scope->sym; while( sym != NULL ) { genAddGlob( compiler->generator, sym->name ); sym = sym->next; } putstring( "putint_string_fmt:" ); putnl( ); putstring( "db \" \", 10, 0" ); putnl( ); } /* parser */ struct Parser *createParser( ) { struct Parser *parser; parser = (struct Parser *)malloc( sizeof( struct Parser ) ); parser->scanner = createScanner( ); parser->global_scope = createScope( "global" ); parser->debug = 0; return parser; } void freeParser( struct Parser *parser ) { freeScope( parser->global_scope ); freeScanner( parser->scanner ); free( (char *)parser ); } void parserExpect( struct Parser *parser, int must, char *what ) { if( parser->token == must ) { parser->token = getToken( parser->scanner ); } else { scannerPrintErrorHeader( parser->scanner ); putstring( what ); putstring( " expected" ); putnl( ); exit( EXIT_FAILURE ); } } int parserTokenToOperator( struct Parser *parser, int token ) { int op; op = A_ERR; switch( token ) { case S_STAR: op = A_MULTIPLY; break; case S_SLASH: op = A_DIVIDE; break; case S_PLUS: op = A_ADD; break; case S_MINUS: op = A_SUBTRACT; break; case S_EQUALS: op = A_EQUALS; break; case S_NOT_EQUALS: op = A_NOT_EQUALS; break; case S_LESS: op = A_LESS; break; case S_LESS_OR_EQUALS: op = A_LESS_OR_EQUALS; break; case S_MORE: op = A_MORE; break; case S_MORE_OR_EQUALS: op = A_MORE_OR_EQUALS; break; default: scannerPrintErrorHeader( parser->scanner ); putstring( "unknown operator token for token " ); putint( token ); putnl( ); exit( EXIT_FAILURE ); } return op; } int parserOperatorPrecedence( struct Parser *parser, int operator ) { int precedence; precedence = 0; switch( operator ) { case A_LESS: case A_LESS_OR_EQUALS: case A_MORE: case A_MORE_OR_EQUALS: precedence = 4; break; case A_EQUALS: case A_NOT_EQUALS: precedence = 3; break; case A_MULTIPLY: case A_DIVIDE: precedence = 2; break; case A_ADD: case A_SUBTRACT: precedence = 1; break; default: scannerPrintErrorHeader( parser->scanner ); putstring( "unknown operator precedence for operator " ); putint( operator ); putnl( ); exit( EXIT_FAILURE ); } return precedence; } struct ASTnode *parseExpression( struct Parser *parser, int level ) { struct ASTnode *left, *right; struct Symbol *sym; int op; if( parser->token == S_EOI ) { scannerPrintErrorHeader( parser->scanner ); putstring( "unexpected eof in expression" ); putnl( ); exit( EXIT_FAILURE ); } if( parser->token == S_NUM ) { left = createASTleafInt( A_INT_LITERAL, parser->scanner->num ); } else if( parser->token == S_IDENT ) { sym = getSymbol( parser->global_scope, parser->scanner->ident ); if( sym == NULL ) { scannerPrintErrorHeader( parser->scanner ); putstring( "unknown identifier '" ); putstring( parser->scanner->ident ); putstring( "' in expression" ); putnl( ); exit( EXIT_FAILURE ); } left = createASTleafSym( A_IDENT, sym ); } else { left = NULL; scannerPrintErrorHeader( parser->scanner ); putstring( "unknown token '" ); putint( parser->token ); putstring( "' in expression" ); putnl( ); exit( EXIT_FAILURE ); } parser->token = getToken( parser->scanner ); if( parser->token == S_EOI || parser->token == S_SEMICOLON || parser->token == S_RPAREN ) { return left; } op = parserTokenToOperator( parser, parser->token ); while( parserOperatorPrecedence( parser, op ) > level ) { parser->token = getToken( parser->scanner ); right = parseExpression( parser, parserOperatorPrecedence( parser, op ) ); left = createASTbinary( op, left, right ); if( parser->token == S_EOI || parser->token == S_SEMICOLON || parser->token == S_RPAREN ) { return left; } op = parserTokenToOperator( parser, parser->token ); } return left; } void parseDeclaration( struct Compiler *compiler ) { struct Parser *parser; struct Symbol *sym; parser = compiler->parser; parserExpect( parser, S_INT, "int" ); parserExpect( parser, S_IDENT, "identifier" ); sym = getSymbol( parser->global_scope, parser->scanner->ident ); if( sym == NULL ) { sym = createSymbol( parser->scanner->ident ); insertSymbol( parser->global_scope, sym ); } else { scannerPrintErrorHeader( parser->scanner ); putstring( "duplicate global symbol '" ); putstring( parser->scanner->ident ); putstring( "'" ); putnl( ); exit( EXIT_FAILURE ); } parserExpect( parser, S_SEMICOLON, ";" ); } void parseAssignment( struct Compiler *compiler ) { struct Symbol *sym; struct ASTnode *node, *left, *right; struct Parser *parser; parser = compiler->parser; parserExpect( parser, S_IDENT, "identifier" ); sym = getSymbol( parser->global_scope, parser->scanner->ident ); if( sym == NULL ) { scannerPrintErrorHeader( parser->scanner ); putstring( "unknown symbol '" ); putstring( parser->scanner->ident ); putstring( "'" ); putnl( ); exit( EXIT_FAILURE ); } right = createASTleafSym( A_LVIDENT, sym ); parserExpect( parser, S_ASSIGN, "=" ); left = parseExpression( parser, 0 ); parserExpect( parser, S_SEMICOLON, ";" ); node = createASTbinary( A_ASSIGN, left, right ); generateFromAST( compiler->generator, node, NOREG ); genFreeAllRegs( compiler->generator ); freeASTnode( node ); } void parsePutint( struct Compiler *compiler ) { struct Parser *parser; struct ASTnode *node; parser = compiler->parser; parserExpect( parser, S_PUTINT, "putint" ); parserExpect( parser, S_LPAREN, "(" ); node = parseExpression( parser, 0 ); parserExpect( parser, S_RPAREN, ")" ); parserExpect( parser, S_SEMICOLON, ";" ); generateFromAST( compiler->generator, node, NOREG ); putstring( "call putint" ); putnl( ); genFreeAllRegs( compiler->generator ); freeASTnode( node ); } /* TODO c4: forward reference of function */ void parseStatementBlock( struct Compiler *compiler ); char *genGetLabel( struct Compiler *compiler, struct Scope *scope ) { char *label; char *s; label = (char *)malloc( MAX_IDENT_LEN ); strlcpy( label, "__", MAX_IDENT_LEN ); strlcat( label, scope->name, MAX_IDENT_LEN ); strlcat( label, "_", 1 ); s = (char *)malloc( MAX_IDENT_LEN ); itoa( scope->labelNo, s, 10 ); strlcat( label, s, MAX_IDENT_LEN ); free( (char *)s ); scope->labelNo++; return label; } void parseIf( struct Compiler *compiler ) { struct Parser *parser; struct ASTnode *node; char *label1, *label2; parser = compiler->parser; parserExpect( parser, S_IF, "if" ); parserExpect( parser, S_LPAREN, "(" ); node = parseExpression( parser, 0 ); if( parser->debug ) { putstring( "; if then" ); putnl( ); } generateFromAST( compiler->generator, node, NOREG ); /* TODO: this should be condensed and optimized for the normal path * (rare ifs, invert the condition, jmpXX directly */ putstring( "cmp al, 0" ); putnl( ); genFreeAllRegs( compiler->generator ); label1 = genGetLabel( compiler, compiler->parser->global_scope ); putstring( "je " ); putstring( label1 ); putnl( ); parserExpect( parser, S_RPAREN, ")" ); parseStatementBlock( compiler ); if( parser->token == S_ELSE ) { label2 = genGetLabel( compiler, compiler->parser->global_scope ); putstring( "jmp " ); putstring( label2 ); putnl( ); if( parser->debug ) { putstring( "; else" ); putnl( ); } putstring( label1 ); putchar( ':' ); putnl( ); compiler->parser->token = getToken( compiler->parser->scanner ); parseStatementBlock( compiler ); putstring( label2 ); putchar( ':' ); putnl( ); free( label2 ); } else { putstring( label1 ); putchar( ':' ); putnl( ); } if( parser->debug ) { putstring( "; fi" ); putnl( ); } free( label1 ); } void parseWhile( struct Compiler *compiler ) { struct Parser *parser; struct ASTnode *node; char *label1, *label2; parser = compiler->parser; parserExpect( parser, S_WHILE, "while" ); parserExpect( parser, S_LPAREN, "(" ); node = parseExpression( parser, 0 ); if( parser->debug ) { putstring( "; while " ); putnl( ); } label2 = genGetLabel( compiler, compiler->parser->global_scope ); putstring( label2 ); putchar( ':' ); putnl( ); generateFromAST( compiler->generator, node, NOREG ); putstring( "cmp al, 0" ); putnl( ); genFreeAllRegs( compiler->generator ); label1 = genGetLabel( compiler, compiler->parser->global_scope ); putstring( "je " ); putstring( label1 ); putnl( ); parserExpect( parser, S_RPAREN, ")" ); parseStatementBlock( compiler ); putstring( "jmp " ); putstring( label2 ); putnl( ); putstring( label1 ); putchar( ':' ); putnl( ); if( parser->debug ) { putstring( "; fi" ); putnl( ); } free( label2 ); free( label1 ); } void parseDo( struct Compiler *compiler ) { struct Parser *parser; struct ASTnode *node; char *label1; parser = compiler->parser; parserExpect( parser, S_DO, "do" ); if( parser->debug ) { putstring( "; do" ); putnl( ); } label1 = genGetLabel( compiler, compiler->parser->global_scope ); putstring( label1 ); putchar( ':' ); putnl( ); parseStatementBlock( compiler ); parserExpect( parser, S_WHILE, "while" ); parserExpect( parser, S_LPAREN, "(" ); node = parseExpression( parser, 0 ); generateFromAST( compiler->generator, node, NOREG ); putstring( "cmp al, 0" ); putnl( ); genFreeAllRegs( compiler->generator ); putstring( "jne " ); putstring( label1 ); putnl( ); parserExpect( parser, S_RPAREN, ")" ); parserExpect( parser, S_SEMICOLON, ";" ); free( label1 ); } void parseStatement( struct Compiler *compiler ) { struct Parser *parser; parser = compiler->parser; if( parser->token == S_INT ) { parseDeclaration( compiler ); } else if( parser->token == S_IDENT ) { parseAssignment( compiler ); } else if( parser->token == S_PUTINT ) { parsePutint( compiler ); } else if( parser->token == S_IF ) { parseIf( compiler ); } else if( parser->token == S_WHILE ) { parseWhile( compiler ); } else if( parser->token == S_DO ) { parseDo( compiler ); } else if( parser->token == S_RBRACE || parser->token == S_EOI ) { return; } else { scannerPrintErrorHeader( parser->scanner ); putstring( "unexpected token '" ); putint( parser->token ); putstring( "'" ); putnl( ); exit( EXIT_FAILURE ); } } void parseStatementBlock( struct Compiler *compiler ) { struct Parser *parser; parser = compiler->parser; parserExpect( parser, S_LBRACE, "{" ); while( parser->token != S_RBRACE && parser->token != S_EOI ) { parseStatement( compiler ); } parserExpect( parser, S_RBRACE, "}" ); } void parseFunctionDeclaration( struct Compiler *compiler ) { struct Parser *parser; struct Symbol *sym; parser = compiler->parser; if( parser->token == S_IDENT ) { if( strcmp( parser->scanner->ident, "void" ) != 0 ) { scannerPrintErrorHeader( parser->scanner ); putstring( "expected void as return value of a function declaration" ); putnl( ); exit( EXIT_FAILURE ); } } parserExpect( parser, S_IDENT, "identifier" ); sym = getSymbol( parser->global_scope, parser->scanner->ident ); if( sym == NULL ) { sym = createSymbol( parser->scanner->ident ); insertSymbol( parser->global_scope, sym ); } else { scannerPrintErrorHeader( parser->scanner ); putstring( "duplicate global symbol '" ); putstring( parser->scanner->ident ); putstring( "'" ); putnl( ); exit( EXIT_FAILURE ); } if( parser->debug ) { putstring( "; function declaration '" ); putstring( parser->scanner->ident ); putstring( "'" ); putnl( ); } putstring( parser->scanner->ident ); putchar( ':' ); putnl( ); putstring( "push ebp" ); putnl( ); putstring( "mov ebp, esp" ); putnl( ); parser->token = getToken( compiler->parser->scanner ); parserExpect( parser, S_LPAREN, "(" ); parserExpect( parser, S_RPAREN, ")" ); parseStatementBlock( compiler ); putstring( "pop ebp" ); putnl( ); putstring( "ret" ); putnl( ); } /* compiler */ struct Compiler *createCompiler( ) { struct Compiler *compiler; compiler = (struct Compiler *)malloc( sizeof( struct Compiler ) ); compiler->parser = createParser( ); compiler->generator = createGenerator( compiler->parser->scanner ); return compiler; } void freeCompiler( struct Compiler *compiler ) { freeGenerator( compiler->generator ); freeParser( compiler->parser ); free( (char *)compiler ); } /* main */ int main( int argc, char **argv ) { struct Compiler *compiler; compiler = createCompiler( ); /* compiler->parser->scanner->debug = 1; */ compiler->parser->debug = 1; /* compiler->generator->debug = 1; */ genPrologue( compiler ); compiler->parser->token = getToken( compiler->parser->scanner ); parseFunctionDeclaration( compiler ); genEpilogue( compiler ); freeCompiler( compiler ); exit( EXIT_SUCCESS ); return EXIT_SUCCESS; }