From e3cb0f8facec3723a8e0f6f245c7688cf3da8804 Mon Sep 17 00:00:00 2001 From: Andreas Baumann Date: Thu, 23 Sep 2021 19:05:07 +0200 Subject: cc - proper Pratt parsing of expressions - fixed some register saving around divs and muls --- miniany/BUGS | 2 + miniany/README | 3 + miniany/REQUIREMENTS | 31 ++++++++- miniany/TODOS | 8 --- miniany/build.sh | 1 + miniany/cc.c | 175 ++++++++++++++++++++++++++++++++---------------- miniany/test1.c | 4 +- miniany/test1.disasmust | 36 ++++++++++ miniany/test1.hexmust | 16 ++--- 9 files changed, 199 insertions(+), 77 deletions(-) create mode 100644 miniany/BUGS delete mode 100644 miniany/TODOS create mode 100644 miniany/test1.disasmust diff --git a/miniany/BUGS b/miniany/BUGS new file mode 100644 index 0000000..a796801 --- /dev/null +++ b/miniany/BUGS @@ -0,0 +1,2 @@ +- c4: + - break in a while is not working (leading to JMP 0) diff --git a/miniany/README b/miniany/README index 53266d6..033f3d4 100644 --- a/miniany/README +++ b/miniany/README @@ -70,6 +70,9 @@ complex things into our own compiler. * c.c in swieros: https://github.com/rswier/swieros.git +* https://github.com/lotabout/write-a-C-interpreter/blob/master/tutorial/en/: tutorial + based on C4 how to build a C interpreter, explains nicely details in C4. + * documentation * "Compiler Construction", Niklaus Wirth * https://github.com/DoctorWkt/acwj: a nice series on building a C compiler, diff --git a/miniany/REQUIREMENTS b/miniany/REQUIREMENTS index 645bdfa..7dcfc78 100644 --- a/miniany/REQUIREMENTS +++ b/miniany/REQUIREMENTS @@ -54,6 +54,7 @@ not implementing: - enums as constant replacement (instead of preprocessor), realy enum types are not really useful. - forward struct definitions or typedefs (handy for Compiler structure), but.. + and we can work around by not producing any loops (hopefully) - for loop: unless we start optimizing (SIMD) there is no real benefit for a generic 'for', a strict for i=0 to N, i++ is easier to optimize, when you have a grammatical construct to help recognizing it. @@ -63,7 +64,7 @@ not implementing: can just be a ignored keyword. - c4 freestanding - uses some casts, the malloc ones are actually good for clarification, - the ones in memset are not so usefull (this is all because we don't + the ones in memset are not so useful (this is all because we don't have 'void *') - open/read/close is POSIX, we would prefer either C style file handling (we have it in libc-freestanding.c or some stdin, stdout thingy) @@ -82,4 +83,30 @@ not implementing: - other cases translate by hand: - case EXIT: /* putstring("exit("); putint(*sp); putstring(") cycle = "); putint(cycle); putnl(); */ return *sp; - default: putstring("unknown instruction = "); putint(i); putstring("! cycle = "); putint(cycle); putnl(); return -1; - + TODO: + - global char array declarations + - void parameter +TODO: +- avoid GNU-stype inline assembler (is far too complex), have more a + inline bytecode adder for explicit opcodes, e.g. nop -> .byte 0x90 + - c.c in swieros (the c4 successor) has 'asm(NOP)', this is something we + could implement easily, preferably just as 'asm(0x90)'. + u.h contains an enum with opcodes (most likely doable or an easy architecture + like the one in swieros, I doubt this works for Intel opcodes). + There should though be only one single point of information for + opcodes per architecture, so asm gets sort of an inline string + generator for the assembly output. Or we share a common C-file with + enums for the opcodes and cat it to both the assembler and the compiler + during the build (should not result in increaed code size, as + those are enums). + the asm(x) or asm(x,y) constructs can be mapped on the host compilers + to asm __volatile__ .byte ugliness. In cc and c4 we can take the swieros + approach. This should give us nice lowlevel inline assembly in a really + simplified way (basically embedding bytes). + Not having inline assembly means you need compilation units written + and linked to the program in assembly, which - well - adds a linker + and calling conventions, which might be too early in bootstrapping. +- asm-i386: device a new version which runs on c4 and is again freestanding + - static: just ignore, we don't have a linker, otoh, just rewrite it whithout static, + vararg, etc. +- c4.c: checkout c5-AST branch (darn, that one looks more promising to extend!) diff --git a/miniany/TODOS b/miniany/TODOS deleted file mode 100644 index 07a0c81..0000000 --- a/miniany/TODOS +++ /dev/null @@ -1,8 +0,0 @@ -- c4 - - global char array declarations - - void parameter -- avoid inline assembler, have more a inline bytecode adder - explicit opcodes, e.g. nop -> .byte 0x90 - - - diff --git a/miniany/build.sh b/miniany/build.sh index 68124b0..b0c13f3 100755 --- a/miniany/build.sh +++ b/miniany/build.sh @@ -53,6 +53,7 @@ case "${MODE}" in CFLAGS+=" -D__FREESTANDING__" ;; hosted) + CFLAGS+=" -D__HOSTED__" ;; *) echo "ERROR: Unknown environment '${MODE}' (use 'freestanding' or 'hosted')" 1>&2 diff --git a/miniany/cc.c b/miniany/cc.c index ccda806..4d26893 100644 --- a/miniany/cc.c +++ b/miniany/cc.c @@ -17,8 +17,8 @@ enum { S_INT = 10, S_IDENT = 20, S_NUM = 30, - S_ERR = 40, - S_EOI = 41 + S_EOI = 98, + S_ERR = 99 }; struct Scanner { @@ -340,15 +340,16 @@ enum { A_SUBTRACT, A_MULTIPLY, A_DIVIDE, - A_ASSIGN + A_ASSIGN, + A_ERR = 99 }; struct ASTnode { - int op; - struct ASTnode *left; - struct ASTnode *right; - int intval; /* for A_INT_LITERAL */ - struct Symbol *sym; /* for A_IDENT */ + int op; /* node operation, one of the A_ enums */ + struct ASTnode *left; /* left side operand */ + struct ASTnode *right; /* right side operand (or empty for unary operations */ + int intval; /* value in case of A_INT_LITERAL */ + struct Symbol *sym; /* symbol in case of A_IDENT */ }; struct ASTnode *createASTnode( int op, struct ASTnode *left, struct ASTnode *right, int intval, struct Symbol *sym ) @@ -391,7 +392,7 @@ void freeASTnode( struct ASTnode *node ) free( (char *)node ); } -/* parser/compiler, no forward definitions */ +/* parser/compiler, no forward declarations */ struct Parser { int token; @@ -399,6 +400,14 @@ struct Parser { struct Scope *global_scope; }; +struct Generator { + struct Scanner *scanner; + char **regName; /* names of the registers */ + int *regFree; /* free registers */ + int spillReg; /* index of the current spilled register */ + int debug; /* adds debug output to generated assembly */ +}; + struct Compiler { struct Parser *parser; struct Generator *generator; @@ -416,14 +425,6 @@ enum { NOREG = -1 }; -struct Generator { - struct Scanner *scanner; - char **regName; - int *regFree; - int spillReg; - int debug; -}; - struct Generator *createGenerator( struct Scanner *scanner ) { struct Generator *generator; @@ -650,8 +651,10 @@ int genMul( struct Generator *generator, int leftreg, int rightreg ) if( leftreg == EAX ) { reg = genBinary( "mul", generator, leftreg, rightreg, 1 ); } else { - genSaveReg( generator, EAX ); - if( leftreg != EDX ) { + if( !generator->regFree[EAX] ) { + genSaveReg( generator, EAX ); + } + if( leftreg != EDX && !generator->regFree[EDX] ) { genSaveReg( generator, EDX ); } if( rightreg == EAX ) { @@ -672,10 +675,12 @@ int genMul( struct Generator *generator, int leftreg, int rightreg ) putreg( generator, leftreg ); putstring( ", eax" ); putnl( ); - if( leftreg != EDX ) { + if( leftreg != EDX && !generator->regFree[EDX] ) { genRestoreReg( generator, EDX ); } - genRestoreReg( generator, EAX ); + if( !generator->regFree[EAX] ) { + genRestoreReg( generator, EAX ); + } reg = leftreg; } @@ -689,8 +694,12 @@ int genDiv( struct Generator *generator, int leftreg, int rightreg ) if( leftreg == EAX ) { reg = genBinary( "div", generator, leftreg, rightreg, 1 ); } else { - genSaveReg( generator, EAX ); - genSaveReg( generator, EDX ); + if( !generator->regFree[EAX] ) { + genSaveReg( generator, EAX ); + } + if( !generator->regFree[EDX] ) { + genSaveReg( generator, EDX ); + } if( rightreg == EAX ) { genSaveReg( generator, EBX ); putstring( "mov ebx, eax" ); @@ -711,8 +720,12 @@ int genDiv( struct Generator *generator, int leftreg, int rightreg ) putreg( generator, leftreg ); putstring( ", eax" ); putnl( ); - genRestoreReg( generator, EDX ); - genRestoreReg( generator, EAX ); + if( !generator->regFree[EDX] ) { + genRestoreReg( generator, EDX ); + } + if( !generator->regFree[EAX] ) { + genRestoreReg( generator, EAX ); + } reg = leftreg; } @@ -823,10 +836,71 @@ void parserExpect( struct Parser *parser, int must, char *what ) } } -struct ASTnode *parseExpression( struct Parser *parser ) +int parserTokenToOperator( struct Parser *parser, int token ) { - struct ASTnode *node, *left, *right; + int op; + + op = A_ERR; + switch( token ) { + case S_STAR: + op = A_MULTIPLY; + break; + + case S_SLASH: + op = A_DIVIDE; + break; + + case S_PLUS: + op = A_ADD; + break; + + case S_MINUS: + op = A_SUBTRACT; + break; + + default: + scannerPrintErrorHeader( parser->scanner ); + putstring( "unknown operator token for token " ); + putint( token ); + putnl( ); + exit( EXIT_FAILURE ); + } + + return op; +} + +int parserOperatorPrecedence( struct Parser *parser, int operator ) +{ + int precedence; + + precedence = 0; + switch( operator ) { + case A_MULTIPLY: + case A_DIVIDE: + precedence = 2; + break; + + case A_ADD: + case A_SUBTRACT: + precedence = 1; + break; + + default: + scannerPrintErrorHeader( parser->scanner ); + putstring( "unknown operator precedence for operator " ); + putint( operator ); + putnl( ); + exit( EXIT_FAILURE ); + } + + return precedence; +} + +struct ASTnode *parseExpression( struct Parser *parser, int level ) +{ + struct ASTnode *left, *right; struct Symbol *sym; + int op; if( parser->token == S_EOI ) { scannerPrintErrorHeader( parser->scanner ); @@ -846,7 +920,7 @@ struct ASTnode *parseExpression( struct Parser *parser ) putstring( "' in expression" ); putnl( ); exit( EXIT_FAILURE ); - } + } left = createASTleafSym( A_IDENT, sym ); } else { left = NULL; @@ -859,35 +933,22 @@ struct ASTnode *parseExpression( struct Parser *parser ) } parser->token = getToken( parser->scanner ); - if( parser->token == S_PLUS ) { - parser->token = getToken( parser->scanner ); - right = parseExpression( parser ); - node = createASTbinary( A_ADD, left, right ); - } else if( parser->token == S_MINUS ) { - parser->token = getToken( parser->scanner ); - right = parseExpression( parser ); - node = createASTbinary( A_SUBTRACT, left, right ); - } else if( parser->token == S_STAR ) { - parser->token = getToken( parser->scanner ); - right = parseExpression( parser ); - node = createASTbinary( A_MULTIPLY, left, right ); - } else if( parser->token == S_SLASH ) { + if( parser->token == S_EOI || parser->token == S_SEMICOLON ) { + return left; + } + + op = parserTokenToOperator( parser, parser->token ); + while( parserOperatorPrecedence( parser, op ) > level ) { parser->token = getToken( parser->scanner ); - right = parseExpression( parser ); - node = createASTbinary( A_DIVIDE, left, right ); - } else if( parser->token == S_EOI || parser->token == S_SEMICOLON ) { - node = left; - } else { - node = NULL; - scannerPrintErrorHeader( parser->scanner ); - putstring( "unexpected token '" ); - putint( parser->token ); - putstring( "' in expression" ); - putnl( ); - exit( EXIT_FAILURE ); + right = parseExpression( parser, parserOperatorPrecedence( parser, op ) ); + left = createASTbinary( op, left, right ); + if( parser->token == S_EOI || parser->token == S_SEMICOLON ) { + return left; + } + op = parserTokenToOperator( parser, parser->token ); } - return node; + return left; } void parseDeclaration( struct Compiler *compiler ) @@ -932,7 +993,7 @@ void parseAssignment( struct Compiler *compiler ) } right = createASTleafSym( A_LVIDENT, sym ); parserExpect( parser, S_EQUALS, "=" ); - left = parseExpression( parser ); + left = parseExpression( parser, 0 ); parserExpect( parser, S_SEMICOLON, ";" ); node = createASTbinary( A_ASSIGN, left, right ); @@ -989,8 +1050,8 @@ int main( int argc, char **argv ) struct Compiler *compiler; compiler = createCompiler( ); -/* compiler->parser->scanner->debug = 1; */ -/* compiler->generator->debug = 1; */ + /* compiler->parser->scanner->debug = 1; */ + /* compiler->generator->debug = 1; */ genPrologue( compiler ); compiler->parser->token = getToken( compiler->parser->scanner ); while( compiler->parser->token != S_EOI ) { diff --git a/miniany/test1.c b/miniany/test1.c index db266c0..4eeffed 100644 --- a/miniany/test1.c +++ b/miniany/test1.c @@ -3,5 +3,5 @@ int i; int j; -i = 12+25/5-2*3; // another comment -j = i/3+3*4; // another assignment +i = 12+25/5-2*3; // 25/5 -> 5, 12+5 -> 17, 2*3 -> 6, 17-6 -> 11 +j = i/3+3*4; // 11 / 3 -> 3, 3*4 -> 12, 3+12 -> 15 diff --git a/miniany/test1.disasmust b/miniany/test1.disasmust new file mode 100644 index 0000000..6f51f14 --- /dev/null +++ b/miniany/test1.disasmust @@ -0,0 +1,36 @@ +01000000 B80C000000 mov eax,0xc +01000005 BB19000000 mov ebx,0x19 +0100000A B905000000 mov ecx,0x5 +0100000F 50 push eax +01000010 89D8 mov eax,ebx +01000012 BA00000000 mov edx,0x0 +01000017 F7F1 div ecx +01000019 89C3 mov ebx,eax +0100001B 58 pop eax +0100001C 01D8 add eax,ebx +0100001E BB02000000 mov ebx,0x2 +01000023 B903000000 mov ecx,0x3 +01000028 50 push eax +01000029 89D8 mov eax,ebx +0100002B F7E1 mul ecx +0100002D 89C3 mov ebx,eax +0100002F 58 pop eax +01000030 29D8 sub eax,ebx +01000032 A361000001 mov [0x1000061],eax +01000037 A161000001 mov eax,[0x1000061] +0100003C BB03000000 mov ebx,0x3 +01000041 F7F3 div ebx +01000043 BB03000000 mov ebx,0x3 +01000048 B904000000 mov ecx,0x4 +0100004D 50 push eax +0100004E 89D8 mov eax,ebx +01000050 F7E1 mul ecx +01000052 89C3 mov ebx,eax +01000054 58 pop eax +01000055 01D8 add eax,ebx +01000057 A35D000001 mov [0x100005d],eax +0100005C F4 hlt +0100005D 0000 add [eax],al +0100005F 0000 add [eax],al +01000061 0000 add [eax],al +01000063 0000 add [eax],al diff --git a/miniany/test1.hexmust b/miniany/test1.hexmust index 1f67e53..3e2a922 100644 --- a/miniany/test1.hexmust +++ b/miniany/test1.hexmust @@ -1,8 +1,8 @@ -00000000 b8 0c 00 00 00 bb 19 00 00 00 b9 05 00 00 00 ba |................| -00000010 02 00 00 00 be 03 00 00 00 50 89 d0 f7 e6 89 c2 |.........P......| -00000020 58 29 d1 50 52 89 d8 ba 00 00 00 00 f7 f1 89 c3 |X).PR...........| -00000030 5a 58 01 d8 a3 65 00 00 01 a1 65 00 00 01 bb 03 |ZX...e....e.....| -00000040 00 00 00 b9 03 00 00 00 ba 04 00 00 00 50 52 89 |.............PR.| -00000050 c8 f7 e2 89 c1 5a 58 01 cb f7 f3 a3 61 00 00 01 |.....ZX.....a...| -00000060 f4 00 00 00 00 00 00 00 00 |.........| -00000069 +00000000 b8 0c 00 00 00 bb 19 00 00 00 b9 05 00 00 00 50 |...............P| +00000010 89 d8 ba 00 00 00 00 f7 f1 89 c3 58 01 d8 bb 02 |...........X....| +00000020 00 00 00 b9 03 00 00 00 50 89 d8 f7 e1 89 c3 58 |........P......X| +00000030 29 d8 a3 61 00 00 01 a1 61 00 00 01 bb 03 00 00 |)..a....a.......| +00000040 00 f7 f3 bb 03 00 00 00 b9 04 00 00 00 50 89 d8 |.............P..| +00000050 f7 e1 89 c3 58 01 d8 a3 5d 00 00 01 f4 00 00 00 |....X...].......| +00000060 00 00 00 00 00 |.....| +00000065 -- cgit v1.2.3-54-g00ecf