/* constants */ enum { MAX_IDENT_LEN = 20, MAX_NOF_REGISTERS = 6 }; /* scanner */ enum { S_PLUS = 1, S_MINUS, S_STAR, S_SLASH, S_SEMICOLON, S_EQUALS, S_INT = 10, S_IDENT, S_NUM = 20, S_ERR = 30, S_EOI = 31 }; struct Scanner { int col; int row; int c; int pushback; int token; int debug; int num; char *ident; }; void scannerPrintErrorHeader( struct Scanner *scanner ) { putnl( ); putstring( "Error line " ); putint( scanner->row ); putstring( ", pos " ); putint( scanner->col ); putstring( ": " ); } void scannerPrintState( struct Scanner *scanner ) { putint( scanner->row ); putchar( '/' ); putint( scanner->col ); putstring( ": " ); putint( scanner->token ); if( scanner->token == S_NUM ) { putchar( '(' ); putint( scanner->num ); putchar( ')' ); } putnl( ); } struct Scanner *createScanner( ) { struct Scanner *scanner; scanner = (struct Scanner *)malloc( sizeof( struct Scanner ) ); scanner->col = 0; scanner->row = 1; scanner->c = EOF; scanner->pushback = 0; scanner->ident = (char *)malloc( MAX_IDENT_LEN + 1 ); scanner->debug = 0; return scanner; } void freeScanner( struct Scanner *scanner ) { free( scanner->ident ); free( (char *)scanner ); } void pushBack( struct Scanner *scanner ) { scanner->pushback = scanner->c; } int getChar( struct Scanner *scanner ) { if( scanner->pushback ) { scanner->c = scanner->pushback; scanner->pushback = 0; return scanner->c; } scanner->c = getchar( ); if( scanner->c == EOF ) { return scanner->c; } scanner->col++; if( scanner->c == '\n' ) { scanner->col = 0; scanner->row++; } return scanner->c; } int skipWhite( struct Scanner *scanner ) { scanner->c = getChar( scanner ); while( isspace( scanner->c ) ) { scanner->c = getChar( scanner ); } return scanner->c; } void scanNumber( struct Scanner *scanner ) { scanner->num = scanner->c - '0'; scanner->c = getChar( scanner ); while( isdigit( scanner->c ) ) { scanner->num = 10 * scanner->num + ( scanner->c - '0' ); scanner->c = getChar( scanner ); } pushBack( scanner ); } void scanIdent( struct Scanner *scanner ) { int n; n = 0; while( isalnum( scanner->c ) || ( scanner->c == '_' ) ) { scanner->ident[n] = scanner->c; n++; if( n >= MAX_IDENT_LEN - 1 ) { scannerPrintErrorHeader( scanner ); putstring( "too long identifier" ); putnl( ); exit( EXIT_FAILURE ); } scanner->c = getChar( scanner ); } scanner->ident[n] = 0; /* c4 doesn't handle '\0' */ pushBack( scanner ); } int keyword( char *ident ) { switch( ident[0] ) { case 'i': if( strcmp( ident, "int" ) == 0 ) { return S_INT; } else { return 0; } break; default: return 0; } } int getToken( struct Scanner *scanner ) { int c; c = skipWhite( scanner ); switch( c ) { case EOF: scanner->token = S_EOI; break; case '+': scanner->token = S_PLUS; break; case '-': scanner->token = S_MINUS; break; case '*': scanner->token = S_STAR; break; case '/': c = getChar( scanner ); if( c == '/' ) { while( c != EOF && c != '\n' ) { c = getChar( scanner ); } scanner->token = getToken( scanner ); } else if( c == '*' ) { do { while( c != EOF && c != '*' ) { c = getChar( scanner ); } c = getChar( scanner ); } while( c != EOF && c != '/' ); c = getChar( scanner ); scanner->token = getToken( scanner ); } else { pushBack( scanner ); scanner->token = S_SLASH; } break; case ';': scanner->token = S_SEMICOLON; break; case '=': scanner->token = S_EQUALS; break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': scanNumber( scanner ); scanner->token = S_NUM; break; case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u': case 'v': case 'w': case 'x': case 'y': case 'z': case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': case 'Y': case 'Z': case '_': scanIdent( scanner ); if( ( scanner->token = keyword( scanner->ident ) ) ) { } else { scanner->token = S_IDENT; } break; default: scanner->token = S_ERR; scannerPrintErrorHeader( scanner ); putstring( "unknown token '" ); putchar( c ); putstring( "'" ); putnl( ); exit( EXIT_FAILURE ); } if( scanner->debug ) { scannerPrintState( scanner ); } return scanner->token; } /* symbol table */ struct Symbol { char *name; struct Symbol *next; }; struct Symbol *createSymbol( char *s ) { struct Symbol *sym; sym = (struct Symbol *)malloc( sizeof ( struct Symbol ) ); sym->name = strdup( s ); sym->next = NULL; return sym; } void freeSymbol( struct Symbol *sym ) { free( sym->name ); free( (char *)sym ); } /* scope */ struct Scope { char *name; struct Symbol *sym; }; struct Scope *createScope( char *name ) { struct Scope *scope; scope = (struct Scope *)malloc( sizeof( struct Scope ) ); scope->name = strdup( name ); scope->sym = NULL; return scope; } void freeScope( struct Scope *scope ) { struct Symbol *sym, *next; free( scope->name ); sym = scope->sym; while( sym != NULL ) { next = sym->next; freeSymbol( sym ); sym = next; } free( (char *)scope ); } struct Symbol *getSymbol( struct Scope *scope, char *name ) { struct Symbol *sym; sym = scope->sym; while( sym != NULL ) { if( strcmp( name, sym->name ) == 0 ) { return sym; } sym = sym->next; } return NULL; } void insertSymbol( struct Scope *scope, struct Symbol *sym ) { if( scope->sym == NULL ) { scope->sym = sym; } else { sym->next = scope->sym; scope->sym = sym; } } /* AST */ enum { A_INT_LITERAL = 1, A_IDENT, A_ADD = 10, A_SUBTRACT, A_MULTIPLY, A_DIVIDE, A_ASSIGN }; struct ASTnode { int op; struct ASTnode *left; struct ASTnode *right; int intval; /* for A_INT_LITERAL */ struct Symbol *sym; /* for A_IDENT */ }; struct ASTnode *createASTnode( int op, struct ASTnode *left, struct ASTnode *right, int intval, struct Symbol *sym ) { struct ASTnode *node; node = (struct ASTnode *)malloc( sizeof( struct ASTnode ) ); node->op = op; node->left = left; node->right = right; node->intval = intval; node->sym = sym; return node; } struct ASTnode *createASTbinary( int op, struct ASTnode *left, struct ASTnode *right ) { return createASTnode( op, left, right, 0, NULL ); } struct ASTnode *createASTleafInt( int op, int intval ) { return createASTnode( op, NULL, NULL, intval, NULL ); } struct ASTnode *createASTleafSym( int op, struct Symbol *sym ) { return createASTnode( op, NULL, NULL, 0, sym ); } void freeASTnode( struct ASTnode *node ) { if( node->left != NULL ) { freeASTnode( node->left ); } if( node->right != NULL ) { freeASTnode( node->right ); } free( (char *)node ); } /* code generation */ enum { EAX = 0, EBX, ECX, EDX, ESI, EDI }; struct Generator { struct Scanner *scanner; char **regName; int *regFree; }; struct Generator *createGenerator( struct Scanner *scanner ) { struct Generator *generator; int i; generator = (struct Generator *)malloc( sizeof( struct Generator ) ); generator->scanner = scanner; generator->regName = (char **)malloc( sizeof( char * ) * MAX_NOF_REGISTERS ); generator->regName[0] = strdup( "eax" ); generator->regName[1] = strdup( "ebx" ); generator->regName[2] = strdup( "ecx" ); generator->regName[3] = strdup( "edx" ); generator->regName[4] = strdup( "esi" ); generator->regName[5] = strdup( "edi" ); generator->regFree = (int *)malloc( sizeof( int ) * MAX_NOF_REGISTERS ); i = 0; while( i < MAX_NOF_REGISTERS ) { generator->regFree[i] = 1; i++; } return generator; } void freeGenerator( struct Generator *generator ) { int i; i = 0; while( i < MAX_NOF_REGISTERS ) { free( generator->regName[i] ); i++; } free( (char *)generator->regName ); free( (char *)generator->regFree ); free( (char *)generator ); } int genAllocReg( struct Generator *generator ) { int i; i = 0; while( i < MAX_NOF_REGISTERS ) { if( generator->regFree[i] ) { generator->regFree[i] = 0; return i; } i++; } scannerPrintErrorHeader( generator->scanner ); putstring( "out of registers" ); putnl( ); exit( EXIT_FAILURE ); return -1; } void genFreeReg( struct Generator *generator, int reg ) { if( generator->regFree[reg] == 0 ) { generator->regFree[reg] = 1; } else { scannerPrintErrorHeader( generator->scanner ); putstring( "error freeing non-allocated register '" ); putstring( generator->regName[reg] ); putstring( "'" ); putnl( ); exit( EXIT_FAILURE ); } } void putreg( struct Generator *generator, int reg ) { putstring( generator->regName[reg] ); } int genLoadImm( struct Generator *generator, int intval ) { int reg; reg = genAllocReg( generator ); putstring( "mov " ); putreg( generator, reg ); putstring( ", " ); putint( intval ); putnl( ); return reg; } int genLoadIdent( struct Generator *generator, char *ident ) { int reg; reg = genAllocReg( generator ); scannerPrintErrorHeader( generator->scanner ); putstring( "reading values from symbols not implemted" ); putnl( ); exit( EXIT_FAILURE ); return reg; } int genAdd( struct Generator *generator,int leftreg, int rightreg ) { putstring( "add " ); putreg( generator, leftreg ); putstring( ", " ); putreg( generator, rightreg ); putnl( ); genFreeReg( generator, rightreg ); return leftreg; } int genSub( struct Generator *generator, int leftreg, int rightreg ) { putstring( "sub " ); putreg( generator, leftreg ); putstring( ", " ); putreg( generator, rightreg ); putnl( ); genFreeReg( generator, rightreg ); return leftreg; } int genMul( struct Generator *generator, int leftreg, int rightreg ) { putstring( "mul " ); putreg( generator, leftreg ); putstring( ", " ); putreg( generator, rightreg ); putnl( ); genFreeReg( generator, rightreg ); return leftreg; } int genDiv( struct Generator *generator, int leftreg, int rightreg ) { putstring( "div " ); putreg( generator, leftreg ); putstring( ", " ); putreg( generator, rightreg ); putnl( ); genFreeReg( generator, rightreg ); return leftreg; } int generateFromAST( struct Generator *generator, struct ASTnode *node ) { int leftreg, rightreg, reg; if( node->left != NULL ) { leftreg = generateFromAST( generator, node->left ); } if( node->right != NULL ) { rightreg = generateFromAST( generator, node->right ); } switch( node->op ) { case A_INT_LITERAL: reg = genLoadImm( generator, node->intval ); break; case A_IDENT: reg = genLoadIdent( generator, node->sym->name ); break; case A_ADD: reg = genAdd( generator, leftreg, rightreg ); break; case A_SUBTRACT: reg = genSub( generator, leftreg, rightreg ); break; case A_MULTIPLY: reg = genMul( generator, leftreg, rightreg ); break; case A_DIVIDE: reg = genDiv( generator, leftreg, rightreg ); break; case A_ASSIGN: putstring( "A_ASSIGN" ); break; default: putint( node->op ); putstring( "?" ); break; } return reg; } /* parser */ struct Parser { int token; struct Scanner *scanner; struct Scope *global_scope; }; struct Compiler { struct Parser *parser; struct Generator *generator; }; struct Parser *createParser( ) { struct Parser *parser; parser = (struct Parser *)malloc( sizeof( struct Parser ) ); parser->scanner = createScanner( ); parser->global_scope = createScope( "global" ); return parser; } void freeParser( struct Parser *parser ) { freeScope( parser->global_scope ); freeScanner( parser->scanner ); free( (char *)parser ); } void parserExpect( struct Parser *parser, int must, char *what ) { if( parser->token == must ) { parser->token = getToken( parser->scanner ); } else { scannerPrintErrorHeader( parser->scanner ); putstring( what ); putstring( " expected" ); putnl( ); exit( EXIT_FAILURE ); } } struct ASTnode *parseExpression( struct Parser *parser ) { struct ASTnode *node, *left, *right; if( parser->token == S_EOI ) { scannerPrintErrorHeader( parser->scanner ); putstring( "unexpected eof in expression" ); putnl( ); exit( EXIT_FAILURE ); } if( parser->token == S_NUM ) { left = createASTleafInt( A_INT_LITERAL, parser->scanner->num ); } parser->token = getToken( parser->scanner ); if( parser->token == S_PLUS ) { parser->token = getToken( parser->scanner ); right = parseExpression( parser ); node = createASTbinary( A_ADD, left, right ); } else if( parser->token == S_MINUS ) { parser->token = getToken( parser->scanner ); right = parseExpression( parser ); node = createASTbinary( A_SUBTRACT, left, right ); } else if( parser->token == S_STAR ) { parser->token = getToken( parser->scanner ); right = parseExpression( parser ); node = createASTbinary( A_MULTIPLY, left, right ); } else if( parser->token == S_SLASH ) { parser->token = getToken( parser->scanner ); right = parseExpression( parser ); node = createASTbinary( A_DIVIDE, left, right ); } else if( parser->token == S_EOI || parser->token == S_SEMICOLON ) { node = left; } else { scannerPrintErrorHeader( parser->scanner ); putstring( "unexpected token '" ); putint( parser->token ); putstring( "' in expression" ); putnl( ); exit( EXIT_FAILURE ); } return node; } void parseDeclaration( struct Parser *parser ) { struct Symbol *sym; parserExpect( parser, S_INT, "int" ); parserExpect( parser, S_IDENT, "identifier" ); sym = getSymbol( parser->global_scope, parser->scanner->ident ); if( sym == NULL ) { sym = createSymbol( parser->scanner->ident ); insertSymbol( parser->global_scope, sym ); } else { scannerPrintErrorHeader( parser->scanner ); putstring( "duplicate global symbol '" ); putstring( parser->scanner->ident ); putstring( "'" ); putnl( ); exit( EXIT_FAILURE ); } parserExpect( parser, S_SEMICOLON, ";" ); } void parseAssignment( struct Compiler *compiler ) { struct Symbol *sym; struct ASTnode *node, *left, *right; struct Parser *parser; parser = compiler->parser; parserExpect( parser, S_IDENT, "identifier" ); sym = getSymbol( parser->global_scope, parser->scanner->ident ); if( sym == NULL ) { scannerPrintErrorHeader( parser->scanner ); putstring( "unknown symbol '" ); putstring( parser->scanner->ident ); putstring( "'" ); putnl( ); exit( EXIT_FAILURE ); } right = createASTleafSym( A_IDENT, sym ); parserExpect( parser, S_EQUALS, "=" ); left = parseExpression( parser ); parserExpect( parser, S_SEMICOLON, ";" ); node = createASTbinary( A_ASSIGN, left, right ); generateFromAST( compiler->generator, node ); freeASTnode( node ); } void parseStatement( struct Compiler *compiler ) { struct Parser *parser; parser = compiler->parser; if( parser->token == S_INT ) { parseDeclaration( parser ); } else if( parser->token == S_IDENT ) { parseAssignment( compiler ); } else if( parser->token == S_EOI ) { return; } else { scannerPrintErrorHeader( parser->scanner ); putstring( "unexpected token '" ); putint( parser->token ); putstring( "'" ); putnl( ); exit( EXIT_FAILURE ); } } /* compiler */ struct Compiler *createCompiler( ) { struct Compiler *compiler; compiler = (struct Compiler *)malloc( sizeof( struct Compiler ) ); compiler->parser = createParser( ); compiler->generator = createGenerator( compiler->parser->scanner ); return compiler; } void freeCompiler( struct Compiler *compiler ) { freeGenerator( compiler->generator ); freeParser( compiler->parser ); free( (char *)compiler ); } /* main */ int main( int argc, char **argv ) { struct Compiler *compiler; compiler = createCompiler( ); compiler->parser->token = getToken( compiler->parser->scanner ); while( compiler->parser->token != S_EOI ) { parseStatement( compiler ); } freeCompiler( compiler ); exit( EXIT_SUCCESS ); return EXIT_SUCCESS; }