From 6a647f4c573c0d44e250fc99a69683a55b1afae6 Mon Sep 17 00:00:00 2001 From: Andreas Baumann Date: Mon, 30 Aug 2021 07:45:27 +0000 Subject: implemented simplistic register spilling first working binary produced with cc/fasm and run on emul --- miniany/README | 3 +- miniany/REQUIREMENTS | 6 +- miniany/c4.c | 2 +- miniany/cc.c | 340 +++++++++++++++++++++++++++++++++++++++------------ miniany/test1.c | 1 + 5 files changed, 273 insertions(+), 79 deletions(-) diff --git a/miniany/README b/miniany/README index fef7bac..c011906 100644 --- a/miniany/README +++ b/miniany/README @@ -61,6 +61,7 @@ complex things into our own compiler. * documentation * "Compiler Construction", Niklaus Wirth - * https://github.com/DoctorWkt/acwj + * https://github.com/DoctorWkt/acwj: a nice series on building a C compiler, + step by step with lots of good explanations * https://www.engr.mun.ca/~theo/Misc/exp_parsing.htm#climbing, https://en.wikipedia.org/wiki/Operator-precedence_parser#Precedence_climbing_method diff --git a/miniany/REQUIREMENTS b/miniany/REQUIREMENTS index d49c276..0a6076c 100644 --- a/miniany/REQUIREMENTS +++ b/miniany/REQUIREMENTS @@ -7,7 +7,6 @@ implementing: - requires a 3 parameter syscall to 80h (Linux) - requires - inline assembly -- for loop not implementing: - libc @@ -55,4 +54,9 @@ not implementing: - enums as constant replacement (instead of preprocessor), realy enum types are not really useful. - forward struct definitions or typedefs (handy for Compiler structure), but.. +- for loop: unless we start optimizing (SIMD) there is no real benefit + for a generic 'for', a strict for i=0 to N, i++ is easier to optimize, when + you have a grammatical construct to help recognizing it. +- register number for register alloation + https://en.wikipedia.org/wiki/Strahler_number diff --git a/miniany/c4.c b/miniany/c4.c index 70e6b63..8317947 100644 --- a/miniany/c4.c +++ b/miniany/c4.c @@ -737,7 +737,7 @@ int main(int argc, char **argv) case IALP: t = sp + pc[1]; a = isalpha( (int)t[-1]); break; case SCMP: t = sp + pc[1]; a = strcmp((char *)t[-1], (char *)t[-2]); break; case SDUP: t = sp + pc[1]; a = (int)strdup((char *)t[-1]); break; - case EXIT: printf("exit(%d) cycle = %d\n", *sp, cycle); return *sp; + case EXIT: /* printf("exit(%d) cycle = %d\n", *sp, cycle); */ return *sp; default: printf("unknown instruction = %d! cycle = %d\n", i, cycle); return -1; } } diff --git a/miniany/cc.c b/miniany/cc.c index 3f1dac3..0805970 100644 --- a/miniany/cc.c +++ b/miniany/cc.c @@ -15,10 +15,10 @@ enum { S_SEMICOLON, S_EQUALS, S_INT = 10, - S_IDENT, - S_NUM = 20, - S_ERR = 30, - S_EOI = 31 + S_IDENT = 20, + S_NUM = 30, + S_ERR = 40, + S_EOI = 41 }; struct Scanner { @@ -335,6 +335,7 @@ void insertSymbol( struct Scope *scope, struct Symbol *sym ) enum { A_INT_LITERAL = 1, A_IDENT, + A_LVIDENT, A_ADD = 10, A_SUBTRACT, A_MULTIPLY, @@ -390,6 +391,19 @@ void freeASTnode( struct ASTnode *node ) free( (char *)node ); } +/* parser/compiler, no forward definitions */ + +struct Parser { + int token; + struct Scanner *scanner; + struct Scope *global_scope; +}; + +struct Compiler { + struct Parser *parser; + struct Generator *generator; +}; + /* code generation */ enum { @@ -398,13 +412,16 @@ enum { ECX, EDX, ESI, - EDI + EDI, + NOREG = -1 }; struct Generator { struct Scanner *scanner; char **regName; int *regFree; + int spillReg; + int debug; }; struct Generator *createGenerator( struct Scanner *scanner ) @@ -427,6 +444,8 @@ struct Generator *createGenerator( struct Scanner *scanner ) generator->regFree[i] = 1; i++; } + generator->spillReg = 0; + generator->debug = 0; return generator; } @@ -445,44 +464,118 @@ void freeGenerator( struct Generator *generator ) free( (char *)generator ); } +void putreg( struct Generator *generator, int reg ) +{ + putstring( generator->regName[reg] ); +} + +void genAddGlob( struct Generator *generator, char *ident ) +{ + putstring( ident ); + putstring( ": " ); + putstring( "dd $0" ); + putnl( ); +} + +void genSaveReg( struct Generator *generator, int reg ) +{ + putstring( "push " ); + putreg( generator, reg ); + putnl( ); +} + +void genRestoreReg( struct Generator *generator, int reg ) +{ + putstring( "pop " ); + putreg( generator, reg ); + putnl( ); +} + int genAllocReg( struct Generator *generator ) { - int i; + int reg; - i = 0; - while( i < MAX_NOF_REGISTERS ) { - if( generator->regFree[i] ) { - generator->regFree[i] = 0; - return i; + reg = 0; + while( reg < MAX_NOF_REGISTERS ) { + if( generator->regFree[reg] ) { + generator->regFree[reg] = 0; + if( generator->debug ) { + putstring( "; allocating register " ); + putreg( generator, reg ); + putnl( ); + } + return reg; } - i++; + reg++; } - scannerPrintErrorHeader( generator->scanner ); - putstring( "out of registers" ); - putnl( ); - exit( EXIT_FAILURE ); - - return -1; + if( reg >= MAX_NOF_REGISTERS ) { + /* simplistic spilling strategy to stack, as described + in https://github.com/DoctorWkt/acwj/blob/master/54_Reg_Spills/Readme.md + */ + reg = generator->spillReg % MAX_NOF_REGISTERS; + generator->spillReg++; + if( generator->debug ) { + putstring( "; spilling register " ); + putreg( generator, reg ); + putstring( " to stack, allocating register" ); + putnl( ); + } + genSaveReg( generator, reg ); + } + + return reg; } void genFreeReg( struct Generator *generator, int reg ) { - if( generator->regFree[reg] == 0 ) { - generator->regFree[reg] = 1; + int r; + + if( generator->spillReg > 0 ) { + generator->spillReg--; + r = ( generator->spillReg % MAX_NOF_REGISTERS ); + if( generator->debug ) { + putstring( "; restoring spilled register " ); + putreg( generator, reg ); + putstring( " from stack" ); + putnl( ); + } + genRestoreReg( generator, r ); } else { - scannerPrintErrorHeader( generator->scanner ); - putstring( "error freeing non-allocated register '" ); - putstring( generator->regName[reg] ); - putstring( "'" ); - putnl( ); - exit( EXIT_FAILURE ); + if( generator->regFree[reg] == 0 ) { + generator->regFree[reg] = 1; + if( generator->debug ) { + putstring( "; deallocating register " ); + putreg( generator, reg ); + putnl( ); + } + } else { + scannerPrintErrorHeader( generator->scanner ); + putstring( "error freeing non-allocated register '" ); + putstring( generator->regName[reg] ); + putstring( "'" ); + putnl( ); + exit( EXIT_FAILURE ); + } } } -void putreg( struct Generator *generator, int reg ) +void genFreeAllRegs( struct Generator *generator ) { - putstring( generator->regName[reg] ); + int i; + + i = 0; + while( i < MAX_NOF_REGISTERS ) { + if( generator->regFree[i] == 0 ) { + generator->regFree[i] = 1; + if( generator->debug ) { + putstring( "; deallocating register " ); + putreg( generator, i ); + putnl( ); + } + } + i++; + } } int genLoadImm( struct Generator *generator, int intval ) @@ -504,31 +597,35 @@ int genLoadIdent( struct Generator *generator, char *ident ) int reg; reg = genAllocReg( generator ); - scannerPrintErrorHeader( generator->scanner ); - putstring( "reading values from symbols not implemted" ); + putstring( "mov " ); + putreg( generator, reg ); + putstring( ", [" ); + putstring( ident ); + putstring( "]" ); putnl( ); - exit( EXIT_FAILURE ); return reg; } -int genAdd( struct Generator *generator,int leftreg, int rightreg ) +int genStoreIdent( struct Generator *generator, int reg, char *ident ) { - putstring( "add " ); - putreg( generator, leftreg ); - putstring( ", " ); - putreg( generator, rightreg ); + putstring( "mov [" ); + putstring( ident ); + putstring( "], " ); + putreg( generator, reg ); putnl( ); - genFreeReg( generator, rightreg ); - - return leftreg; + + return reg; } -int genSub( struct Generator *generator, int leftreg, int rightreg ) +int genBinary( char *op, struct Generator *generator,int leftreg, int rightreg, int implicit ) { - putstring( "sub " ); - putreg( generator, leftreg ); - putstring( ", " ); + putstring( op ); + putchar( ' ' ); + if( !implicit ) { + putreg( generator, leftreg ); + putstring( ", " ); + } putreg( generator, rightreg ); putnl( ); genFreeReg( generator, rightreg ); @@ -536,39 +633,97 @@ int genSub( struct Generator *generator, int leftreg, int rightreg ) return leftreg; } +int genAdd( struct Generator *generator, int leftreg, int rightreg ) +{ + return genBinary( "add", generator, leftreg, rightreg, 0 ); +} + +int genSub( struct Generator *generator, int leftreg, int rightreg ) +{ + return genBinary( "sub", generator, leftreg, rightreg, 0 ); +} + int genMul( struct Generator *generator, int leftreg, int rightreg ) { - putstring( "mul " ); - putreg( generator, leftreg ); - putstring( ", " ); - putreg( generator, rightreg ); - putnl( ); - genFreeReg( generator, rightreg ); + int reg; - return leftreg; + if( leftreg == EAX ) { + return genBinary( "mul", generator, leftreg, rightreg, 1 ); + } else { + genSaveReg( generator, EAX ); + if( leftreg != EDX ) { + genSaveReg( generator, EDX ); + } + if( rightreg == EAX ) { + genSaveReg( generator, EBX ); + putstring( "mov ebx, eax" ); + putnl( ); + } + putstring( "mov eax, " ); + putreg( generator, leftreg ); + putnl( ); + if( rightreg == EAX ) { + reg = genBinary( "mul", generator, EAX, EBX, 1 ); + genRestoreReg( generator, EBX ); + } else { + reg = genBinary( "mul", generator, EAX, rightreg, 1 ); + } + putstring( "mov " ); + putreg( generator, leftreg ); + putstring( ", eax" ); + putnl( ); + if( leftreg != EDX ) { + genRestoreReg( generator, EDX ); + } + genRestoreReg( generator, EAX ); + return leftreg; + } } int genDiv( struct Generator *generator, int leftreg, int rightreg ) { - putstring( "div " ); - putreg( generator, leftreg ); - putstring( ", " ); - putreg( generator, rightreg ); - putnl( ); - genFreeReg( generator, rightreg ); + int reg; - return leftreg; + if( leftreg == EAX ) { + return genBinary( "div", generator, leftreg, rightreg, 1 ); + } else { + genSaveReg( generator, EAX ); + genSaveReg( generator, EDX ); + if( rightreg == EAX ) { + genSaveReg( generator, EBX ); + putstring( "mov ebx, eax" ); + putnl( ); + } + putstring( "mov eax, " ); + putreg( generator, leftreg ); + putnl( ); + putstring( "mov edx, 0" ); + putnl( ); + if( rightreg == EAX ) { + reg = genBinary( "div", generator, EAX, EBX, 1 ); + genRestoreReg( generator, EBX ); + } else { + reg = genBinary( "div", generator, EAX, rightreg, 1 ); + } + putstring( "mov " ); + putreg( generator, leftreg ); + putstring( ", eax" ); + putnl( ); + genRestoreReg( generator, EDX ); + genRestoreReg( generator, EAX ); + return leftreg; + } } -int generateFromAST( struct Generator *generator, struct ASTnode *node ) +int generateFromAST( struct Generator *generator, struct ASTnode *node, int inreg ) { int leftreg, rightreg, reg; if( node->left != NULL ) { - leftreg = generateFromAST( generator, node->left ); + leftreg = generateFromAST( generator, node->left, NOREG ); } if( node->right != NULL ) { - rightreg = generateFromAST( generator, node->right ); + rightreg = generateFromAST( generator, node->right, leftreg ); } switch( node->op ) { case A_INT_LITERAL: @@ -577,6 +732,9 @@ int generateFromAST( struct Generator *generator, struct ASTnode *node ) case A_IDENT: reg = genLoadIdent( generator, node->sym->name ); break; + case A_LVIDENT: + reg = genStoreIdent( generator, inreg, node->sym->name ); + break; case A_ADD: reg = genAdd( generator, leftreg, rightreg ); break; @@ -590,7 +748,6 @@ int generateFromAST( struct Generator *generator, struct ASTnode *node ) reg = genDiv( generator, leftreg, rightreg ); break; case A_ASSIGN: - putstring( "A_ASSIGN" ); break; default: putint( node->op ); @@ -602,18 +759,30 @@ int generateFromAST( struct Generator *generator, struct ASTnode *node ) return reg; } -/* parser */ +void genPrologue( struct Compiler *compiler ) +{ + putstring( "format binary" ); + putnl( ); + putstring( "use32" ); + putnl( ); + putstring( "org $1000000" ); + putnl( ); +} -struct Parser { - int token; - struct Scanner *scanner; - struct Scope *global_scope; -}; +void genEpilogue( struct Compiler *compiler ) +{ + struct Symbol *sym; + + putstring( "hlt" ); + putnl( ); + sym = compiler->parser->global_scope->sym; + while( sym != NULL ) { + genAddGlob( compiler->generator, sym->name ); + sym = sym->next; + } +} -struct Compiler { - struct Parser *parser; - struct Generator *generator; -}; +/* parser */ struct Parser *createParser( ) { @@ -649,6 +818,7 @@ void parserExpect( struct Parser *parser, int must, char *what ) struct ASTnode *parseExpression( struct Parser *parser ) { struct ASTnode *node, *left, *right; + struct Symbol *sym; if( parser->token == S_EOI ) { scannerPrintErrorHeader( parser->scanner ); @@ -659,8 +829,19 @@ struct ASTnode *parseExpression( struct Parser *parser ) if( parser->token == S_NUM ) { left = createASTleafInt( A_INT_LITERAL, parser->scanner->num ); + } else if( parser->token = S_IDENT ) { + sym = getSymbol( parser->global_scope, parser->scanner->ident ); + if( sym == NULL ) { + scannerPrintErrorHeader( parser->scanner ); + putstring( "unknown identifier '" ); + putstring( parser->scanner->ident ); + putstring( "' in expression" ); + putnl( ); + exit( EXIT_FAILURE ); + } + left = createASTleafSym( A_IDENT, sym ); } - + parser->token = getToken( parser->scanner ); if( parser->token == S_PLUS ) { parser->token = getToken( parser->scanner ); @@ -692,10 +873,12 @@ struct ASTnode *parseExpression( struct Parser *parser ) return node; } -void parseDeclaration( struct Parser *parser ) +void parseDeclaration( struct Compiler *compiler ) { + struct Parser *parser; struct Symbol *sym; + parser = compiler->parser; parserExpect( parser, S_INT, "int" ); parserExpect( parser, S_IDENT, "identifier" ); sym = getSymbol( parser->global_scope, parser->scanner->ident ); @@ -730,13 +913,14 @@ void parseAssignment( struct Compiler *compiler ) putnl( ); exit( EXIT_FAILURE ); } - right = createASTleafSym( A_IDENT, sym ); + right = createASTleafSym( A_LVIDENT, sym ); parserExpect( parser, S_EQUALS, "=" ); left = parseExpression( parser ); parserExpect( parser, S_SEMICOLON, ";" ); node = createASTbinary( A_ASSIGN, left, right ); - generateFromAST( compiler->generator, node ); + generateFromAST( compiler->generator, node, NOREG ); + genFreeAllRegs( compiler->generator ); freeASTnode( node ); } @@ -746,7 +930,7 @@ void parseStatement( struct Compiler *compiler ) parser = compiler->parser; if( parser->token == S_INT ) { - parseDeclaration( parser ); + parseDeclaration( compiler ); } else if( parser->token == S_IDENT ) { parseAssignment( compiler ); } else if( parser->token == S_EOI ) { @@ -788,10 +972,14 @@ int main( int argc, char **argv ) struct Compiler *compiler; compiler = createCompiler( ); +/* compiler->parser->scanner->debug = 1; */ +/* compiler->generator->debug = 1; */ + genPrologue( compiler ); compiler->parser->token = getToken( compiler->parser->scanner ); while( compiler->parser->token != S_EOI ) { parseStatement( compiler ); } + genEpilogue( compiler ); freeCompiler( compiler ); exit( EXIT_SUCCESS ); diff --git a/miniany/test1.c b/miniany/test1.c index 94684fa..db266c0 100644 --- a/miniany/test1.c +++ b/miniany/test1.c @@ -4,3 +4,4 @@ int i; int j; i = 12+25/5-2*3; // another comment +j = i/3+3*4; // another assignment -- cgit v1.2.3-54-g00ecf