From c03c5fb46c0b2bbaa028d823786095a210896627 Mon Sep 17 00:00:00 2001 From: Andreas Baumann Date: Wed, 25 Aug 2021 19:31:42 +0000 Subject: some work on cc generating the code with AST --- miniany/REQUIREMENTS | 8 +- miniany/cc.c | 432 ++++++++++++++++++++++++++++++++++++++++++++------- 2 files changed, 381 insertions(+), 59 deletions(-) diff --git a/miniany/REQUIREMENTS b/miniany/REQUIREMENTS index b8281d1..d49c276 100644 --- a/miniany/REQUIREMENTS +++ b/miniany/REQUIREMENTS @@ -7,6 +7,7 @@ implementing: - requires a 3 parameter syscall to 80h (Linux) - requires - inline assembly +- for loop not implementing: - libc @@ -49,4 +50,9 @@ not implementing: - typedefs are just syntactic sugar, I use them mostly for 'struct T' -> 'T' - initializers of global and locals, not that important as we use C89 anyway, forcing us to separate declaration and usage of variables per scope - +- unions, useful to safe space in AST, but not strictly necessary +- bool, useful, but not strigtly necessary +- enums as constant replacement (instead of preprocessor), realy enum types + are not really useful. +- forward struct definitions or typedefs (handy for Compiler structure), but.. + diff --git a/miniany/cc.c b/miniany/cc.c index 5400305..3f1dac3 100644 --- a/miniany/cc.c +++ b/miniany/cc.c @@ -1,3 +1,10 @@ +/* constants */ + +enum { + MAX_IDENT_LEN = 20, + MAX_NOF_REGISTERS = 6 +}; + /* scanner */ enum { @@ -14,10 +21,6 @@ enum { S_EOI = 31 }; -enum { - MAX_IDENT_LEN = 20 -}; - struct Scanner { int col; int row; @@ -29,6 +32,31 @@ struct Scanner { char *ident; }; +void scannerPrintErrorHeader( struct Scanner *scanner ) +{ + putnl( ); + putstring( "Error line " ); + putint( scanner->row ); + putstring( ", pos " ); + putint( scanner->col ); + putstring( ": " ); +} + +void scannerPrintState( struct Scanner *scanner ) +{ + putint( scanner->row ); + putchar( '/' ); + putint( scanner->col ); + putstring( ": " ); + putint( scanner->token ); + if( scanner->token == S_NUM ) { + putchar( '(' ); + putint( scanner->num ); + putchar( ')' ); + } + putnl( ); +} + struct Scanner *createScanner( ) { struct Scanner *scanner; @@ -86,15 +114,6 @@ int skipWhite( struct Scanner *scanner ) return scanner->c; } -void printErrorHeader( struct Scanner *scanner ) -{ - putstring( "Error line " ); - putint( scanner->row ); - putstring( ", pos " ); - putint( scanner->col ); - putstring( ": " ); -} - void scanNumber( struct Scanner *scanner ) { scanner->num = scanner->c - '0'; @@ -115,7 +134,7 @@ void scanIdent( struct Scanner *scanner ) scanner->ident[n] = scanner->c; n++; if( n >= MAX_IDENT_LEN - 1 ) { - printErrorHeader( scanner ); + scannerPrintErrorHeader( scanner ); putstring( "too long identifier" ); putnl( ); exit( EXIT_FAILURE ); @@ -141,21 +160,6 @@ int keyword( char *ident ) } } -void scannerPrintState( struct Scanner *scanner ) -{ - putint( scanner->row ); - putchar( '/' ); - putint( scanner->col ); - putstring( ": " ); - putint( scanner->token ); - if( scanner->token == S_NUM ) { - putchar( '(' ); - putint( scanner->num ); - putchar( ')' ); - } - putnl( ); -} - int getToken( struct Scanner *scanner ) { int c; @@ -228,7 +232,7 @@ int getToken( struct Scanner *scanner ) break; default: scanner->token = S_ERR; - printErrorHeader( scanner ); + scannerPrintErrorHeader( scanner ); putstring( "unknown token '" ); putchar( c ); putstring( "'" ); @@ -326,6 +330,278 @@ void insertSymbol( struct Scope *scope, struct Symbol *sym ) } } +/* AST */ + +enum { + A_INT_LITERAL = 1, + A_IDENT, + A_ADD = 10, + A_SUBTRACT, + A_MULTIPLY, + A_DIVIDE, + A_ASSIGN +}; + +struct ASTnode { + int op; + struct ASTnode *left; + struct ASTnode *right; + int intval; /* for A_INT_LITERAL */ + struct Symbol *sym; /* for A_IDENT */ +}; + +struct ASTnode *createASTnode( int op, struct ASTnode *left, struct ASTnode *right, int intval, struct Symbol *sym ) +{ + struct ASTnode *node; + + node = (struct ASTnode *)malloc( sizeof( struct ASTnode ) ); + node->op = op; + node->left = left; + node->right = right; + node->intval = intval; + node->sym = sym; + + return node; +} + +struct ASTnode *createASTbinary( int op, struct ASTnode *left, struct ASTnode *right ) +{ + return createASTnode( op, left, right, 0, NULL ); +} + +struct ASTnode *createASTleafInt( int op, int intval ) +{ + return createASTnode( op, NULL, NULL, intval, NULL ); +} + +struct ASTnode *createASTleafSym( int op, struct Symbol *sym ) +{ + return createASTnode( op, NULL, NULL, 0, sym ); +} + +void freeASTnode( struct ASTnode *node ) +{ + if( node->left != NULL ) { + freeASTnode( node->left ); + } + if( node->right != NULL ) { + freeASTnode( node->right ); + } + free( (char *)node ); +} + +/* code generation */ + +enum { + EAX = 0, + EBX, + ECX, + EDX, + ESI, + EDI +}; + +struct Generator { + struct Scanner *scanner; + char **regName; + int *regFree; +}; + +struct Generator *createGenerator( struct Scanner *scanner ) +{ + struct Generator *generator; + int i; + + generator = (struct Generator *)malloc( sizeof( struct Generator ) ); + generator->scanner = scanner; + generator->regName = (char **)malloc( sizeof( char * ) * MAX_NOF_REGISTERS ); + generator->regName[0] = strdup( "eax" ); + generator->regName[1] = strdup( "ebx" ); + generator->regName[2] = strdup( "ecx" ); + generator->regName[3] = strdup( "edx" ); + generator->regName[4] = strdup( "esi" ); + generator->regName[5] = strdup( "edi" ); + generator->regFree = (int *)malloc( sizeof( int ) * MAX_NOF_REGISTERS ); + i = 0; + while( i < MAX_NOF_REGISTERS ) { + generator->regFree[i] = 1; + i++; + } + + return generator; +} + +void freeGenerator( struct Generator *generator ) +{ + int i; + + i = 0; + while( i < MAX_NOF_REGISTERS ) { + free( generator->regName[i] ); + i++; + } + free( (char *)generator->regName ); + free( (char *)generator->regFree ); + free( (char *)generator ); +} + +int genAllocReg( struct Generator *generator ) +{ + int i; + + i = 0; + while( i < MAX_NOF_REGISTERS ) { + if( generator->regFree[i] ) { + generator->regFree[i] = 0; + return i; + } + i++; + } + + scannerPrintErrorHeader( generator->scanner ); + putstring( "out of registers" ); + putnl( ); + exit( EXIT_FAILURE ); + + return -1; +} + +void genFreeReg( struct Generator *generator, int reg ) +{ + if( generator->regFree[reg] == 0 ) { + generator->regFree[reg] = 1; + } else { + scannerPrintErrorHeader( generator->scanner ); + putstring( "error freeing non-allocated register '" ); + putstring( generator->regName[reg] ); + putstring( "'" ); + putnl( ); + exit( EXIT_FAILURE ); + } +} + +void putreg( struct Generator *generator, int reg ) +{ + putstring( generator->regName[reg] ); +} + +int genLoadImm( struct Generator *generator, int intval ) +{ + int reg; + + reg = genAllocReg( generator ); + putstring( "mov " ); + putreg( generator, reg ); + putstring( ", " ); + putint( intval ); + putnl( ); + + return reg; +} + +int genLoadIdent( struct Generator *generator, char *ident ) +{ + int reg; + + reg = genAllocReg( generator ); + scannerPrintErrorHeader( generator->scanner ); + putstring( "reading values from symbols not implemted" ); + putnl( ); + exit( EXIT_FAILURE ); + + return reg; +} + +int genAdd( struct Generator *generator,int leftreg, int rightreg ) +{ + putstring( "add " ); + putreg( generator, leftreg ); + putstring( ", " ); + putreg( generator, rightreg ); + putnl( ); + genFreeReg( generator, rightreg ); + + return leftreg; +} + +int genSub( struct Generator *generator, int leftreg, int rightreg ) +{ + putstring( "sub " ); + putreg( generator, leftreg ); + putstring( ", " ); + putreg( generator, rightreg ); + putnl( ); + genFreeReg( generator, rightreg ); + + return leftreg; +} + +int genMul( struct Generator *generator, int leftreg, int rightreg ) +{ + putstring( "mul " ); + putreg( generator, leftreg ); + putstring( ", " ); + putreg( generator, rightreg ); + putnl( ); + genFreeReg( generator, rightreg ); + + return leftreg; +} + +int genDiv( struct Generator *generator, int leftreg, int rightreg ) +{ + putstring( "div " ); + putreg( generator, leftreg ); + putstring( ", " ); + putreg( generator, rightreg ); + putnl( ); + genFreeReg( generator, rightreg ); + + return leftreg; +} + +int generateFromAST( struct Generator *generator, struct ASTnode *node ) +{ + int leftreg, rightreg, reg; + + if( node->left != NULL ) { + leftreg = generateFromAST( generator, node->left ); + } + if( node->right != NULL ) { + rightreg = generateFromAST( generator, node->right ); + } + switch( node->op ) { + case A_INT_LITERAL: + reg = genLoadImm( generator, node->intval ); + break; + case A_IDENT: + reg = genLoadIdent( generator, node->sym->name ); + break; + case A_ADD: + reg = genAdd( generator, leftreg, rightreg ); + break; + case A_SUBTRACT: + reg = genSub( generator, leftreg, rightreg ); + break; + case A_MULTIPLY: + reg = genMul( generator, leftreg, rightreg ); + break; + case A_DIVIDE: + reg = genDiv( generator, leftreg, rightreg ); + break; + case A_ASSIGN: + putstring( "A_ASSIGN" ); + break; + default: + putint( node->op ); + putstring( "?" ); + break; + + } + + return reg; +} + /* parser */ struct Parser { @@ -334,6 +610,11 @@ struct Parser { struct Scope *global_scope; }; +struct Compiler { + struct Parser *parser; + struct Generator *generator; +}; + struct Parser *createParser( ) { struct Parser *parser; @@ -357,7 +638,7 @@ void parserExpect( struct Parser *parser, int must, char *what ) if( parser->token == must ) { parser->token = getToken( parser->scanner ); } else { - printErrorHeader( parser->scanner ); + scannerPrintErrorHeader( parser->scanner ); putstring( what ); putstring( " expected" ); putnl( ); @@ -365,44 +646,50 @@ void parserExpect( struct Parser *parser, int must, char *what ) } } -void parseExpression( struct Parser *parser ) +struct ASTnode *parseExpression( struct Parser *parser ) { + struct ASTnode *node, *left, *right; + if( parser->token == S_EOI ) { - printErrorHeader( parser->scanner ); + scannerPrintErrorHeader( parser->scanner ); putstring( "unexpected eof in expression" ); putnl( ); exit( EXIT_FAILURE ); } if( parser->token == S_NUM ) { - putstring( "immediate int " ); - putint( parser->scanner->num ); - putnl( ); + left = createASTleafInt( A_INT_LITERAL, parser->scanner->num ); } parser->token = getToken( parser->scanner ); if( parser->token == S_PLUS ) { parser->token = getToken( parser->scanner ); - parseExpression( parser ); + right = parseExpression( parser ); + node = createASTbinary( A_ADD, left, right ); } else if( parser->token == S_MINUS ) { parser->token = getToken( parser->scanner ); - parseExpression( parser ); + right = parseExpression( parser ); + node = createASTbinary( A_SUBTRACT, left, right ); } else if( parser->token == S_STAR ) { parser->token = getToken( parser->scanner ); - parseExpression( parser ); + right = parseExpression( parser ); + node = createASTbinary( A_MULTIPLY, left, right ); } else if( parser->token == S_SLASH ) { parser->token = getToken( parser->scanner ); - parseExpression( parser ); + right = parseExpression( parser ); + node = createASTbinary( A_DIVIDE, left, right ); } else if( parser->token == S_EOI || parser->token == S_SEMICOLON ) { - return; + node = left; } else { - printErrorHeader( parser->scanner ); + scannerPrintErrorHeader( parser->scanner ); putstring( "unexpected token '" ); putint( parser->token ); putstring( "' in expression" ); putnl( ); exit( EXIT_FAILURE ); } + + return node; } void parseDeclaration( struct Parser *parser ) @@ -411,13 +698,12 @@ void parseDeclaration( struct Parser *parser ) parserExpect( parser, S_INT, "int" ); parserExpect( parser, S_IDENT, "identifier" ); - putstring( "Adding glob: " ); putstring( parser->scanner->ident ); putnl( ); sym = getSymbol( parser->global_scope, parser->scanner->ident ); if( sym == NULL ) { sym = createSymbol( parser->scanner->ident ); insertSymbol( parser->global_scope, sym ); } else { - printErrorHeader( parser->scanner ); + scannerPrintErrorHeader( parser->scanner ); putstring( "duplicate global symbol '" ); putstring( parser->scanner->ident ); putstring( "'" ); @@ -427,35 +713,46 @@ void parseDeclaration( struct Parser *parser ) parserExpect( parser, S_SEMICOLON, ";" ); } -void parseAssignment( struct Parser *parser ) +void parseAssignment( struct Compiler *compiler ) { struct Symbol *sym; + struct ASTnode *node, *left, *right; + struct Parser *parser; + parser = compiler->parser; parserExpect( parser, S_IDENT, "identifier" ); sym = getSymbol( parser->global_scope, parser->scanner->ident ); if( sym == NULL ) { - printErrorHeader( parser->scanner ); + scannerPrintErrorHeader( parser->scanner ); putstring( "unknown symbol '" ); putstring( parser->scanner->ident ); putstring( "'" ); putnl( ); exit( EXIT_FAILURE ); } + right = createASTleafSym( A_IDENT, sym ); parserExpect( parser, S_EQUALS, "=" ); - parseExpression( parser ); + left = parseExpression( parser ); parserExpect( parser, S_SEMICOLON, ";" ); + + node = createASTbinary( A_ASSIGN, left, right ); + generateFromAST( compiler->generator, node ); + freeASTnode( node ); } -void parseStatement( struct Parser *parser ) +void parseStatement( struct Compiler *compiler ) { + struct Parser *parser; + + parser = compiler->parser; if( parser->token == S_INT ) { parseDeclaration( parser ); } else if( parser->token == S_IDENT ) { - parseAssignment( parser ); + parseAssignment( compiler ); } else if( parser->token == S_EOI ) { return; } else { - printErrorHeader( parser->scanner ); + scannerPrintErrorHeader( parser->scanner ); putstring( "unexpected token '" ); putint( parser->token ); putstring( "'" ); @@ -464,19 +761,38 @@ void parseStatement( struct Parser *parser ) } } +/* compiler */ + +struct Compiler *createCompiler( ) +{ + struct Compiler *compiler; + + compiler = (struct Compiler *)malloc( sizeof( struct Compiler ) ); + compiler->parser = createParser( ); + compiler->generator = createGenerator( compiler->parser->scanner ); + + return compiler; +} + +void freeCompiler( struct Compiler *compiler ) +{ + freeGenerator( compiler->generator ); + freeParser( compiler->parser ); + free( (char *)compiler ); +} + /* main */ int main( int argc, char **argv ) { - struct Parser *parser; - - parser = createParser( ); - parser->scanner->debug = 1; - parser->token = getToken( parser->scanner ); - while( parser->token != S_EOI ) { - parseStatement( parser ); + struct Compiler *compiler; + + compiler = createCompiler( ); + compiler->parser->token = getToken( compiler->parser->scanner ); + while( compiler->parser->token != S_EOI ) { + parseStatement( compiler ); } - freeParser( parser ); + freeCompiler( compiler ); exit( EXIT_SUCCESS ); -- cgit v1.2.3-54-g00ecf