summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndreas Baumann <mail@andreasbaumann.cc>2021-09-23 19:05:07 +0200
committerAndreas Baumann <mail@andreasbaumann.cc>2021-09-23 19:05:07 +0200
commite3cb0f8facec3723a8e0f6f245c7688cf3da8804 (patch)
treecbe87dfa8baebcb7a6616ff4b13c43ceb462d5a7
parent1d4b293aea9a1c595634e95675299805ee8fde0c (diff)
downloadcompilertests-e3cb0f8facec3723a8e0f6f245c7688cf3da8804.tar.gz
compilertests-e3cb0f8facec3723a8e0f6f245c7688cf3da8804.tar.bz2
cc
- proper Pratt parsing of expressions - fixed some register saving around divs and muls
-rw-r--r--miniany/BUGS2
-rw-r--r--miniany/README3
-rw-r--r--miniany/REQUIREMENTS31
-rw-r--r--miniany/TODOS8
-rwxr-xr-xminiany/build.sh1
-rw-r--r--miniany/cc.c175
-rw-r--r--miniany/test1.c4
-rw-r--r--miniany/test1.disasmust36
-rw-r--r--miniany/test1.hexmust16
9 files changed, 199 insertions, 77 deletions
diff --git a/miniany/BUGS b/miniany/BUGS
new file mode 100644
index 0000000..a796801
--- /dev/null
+++ b/miniany/BUGS
@@ -0,0 +1,2 @@
+- c4:
+ - break in a while is not working (leading to JMP 0)
diff --git a/miniany/README b/miniany/README
index 53266d6..033f3d4 100644
--- a/miniany/README
+++ b/miniany/README
@@ -70,6 +70,9 @@ complex things into our own compiler.
* c.c in swieros: https://github.com/rswier/swieros.git
+* https://github.com/lotabout/write-a-C-interpreter/blob/master/tutorial/en/: tutorial
+ based on C4 how to build a C interpreter, explains nicely details in C4.
+
* documentation
* "Compiler Construction", Niklaus Wirth
* https://github.com/DoctorWkt/acwj: a nice series on building a C compiler,
diff --git a/miniany/REQUIREMENTS b/miniany/REQUIREMENTS
index 645bdfa..7dcfc78 100644
--- a/miniany/REQUIREMENTS
+++ b/miniany/REQUIREMENTS
@@ -54,6 +54,7 @@ not implementing:
- enums as constant replacement (instead of preprocessor), realy enum types
are not really useful.
- forward struct definitions or typedefs (handy for Compiler structure), but..
+ and we can work around by not producing any loops (hopefully)
- for loop: unless we start optimizing (SIMD) there is no real benefit
for a generic 'for', a strict for i=0 to N, i++ is easier to optimize, when
you have a grammatical construct to help recognizing it.
@@ -63,7 +64,7 @@ not implementing:
can just be a ignored keyword.
- c4 freestanding
- uses some casts, the malloc ones are actually good for clarification,
- the ones in memset are not so usefull (this is all because we don't
+ the ones in memset are not so useful (this is all because we don't
have 'void *')
- open/read/close is POSIX, we would prefer either C style file handling
(we have it in libc-freestanding.c or some stdin, stdout thingy)
@@ -82,4 +83,30 @@ not implementing:
- other cases translate by hand:
- case EXIT: /* putstring("exit("); putint(*sp); putstring(") cycle = "); putint(cycle); putnl(); */ return *sp;
- default: putstring("unknown instruction = "); putint(i); putstring("! cycle = "); putint(cycle); putnl(); return -1;
-
+ TODO:
+ - global char array declarations
+ - void parameter
+TODO:
+- avoid GNU-stype inline assembler (is far too complex), have more a
+ inline bytecode adder for explicit opcodes, e.g. nop -> .byte 0x90
+ - c.c in swieros (the c4 successor) has 'asm(NOP)', this is something we
+ could implement easily, preferably just as 'asm(0x90)'.
+ u.h contains an enum with opcodes (most likely doable or an easy architecture
+ like the one in swieros, I doubt this works for Intel opcodes).
+ There should though be only one single point of information for
+ opcodes per architecture, so asm gets sort of an inline string
+ generator for the assembly output. Or we share a common C-file with
+ enums for the opcodes and cat it to both the assembler and the compiler
+ during the build (should not result in increaed code size, as
+ those are enums).
+ the asm(x) or asm(x,y) constructs can be mapped on the host compilers
+ to asm __volatile__ .byte ugliness. In cc and c4 we can take the swieros
+ approach. This should give us nice lowlevel inline assembly in a really
+ simplified way (basically embedding bytes).
+ Not having inline assembly means you need compilation units written
+ and linked to the program in assembly, which - well - adds a linker
+ and calling conventions, which might be too early in bootstrapping.
+- asm-i386: device a new version which runs on c4 and is again freestanding
+ - static: just ignore, we don't have a linker, otoh, just rewrite it whithout static,
+ vararg, etc.
+- c4.c: checkout c5-AST branch (darn, that one looks more promising to extend!)
diff --git a/miniany/TODOS b/miniany/TODOS
deleted file mode 100644
index 07a0c81..0000000
--- a/miniany/TODOS
+++ /dev/null
@@ -1,8 +0,0 @@
-- c4
- - global char array declarations
- - void parameter
-- avoid inline assembler, have more a inline bytecode adder
- explicit opcodes, e.g. nop -> .byte 0x90
-
-
-
diff --git a/miniany/build.sh b/miniany/build.sh
index 68124b0..b0c13f3 100755
--- a/miniany/build.sh
+++ b/miniany/build.sh
@@ -53,6 +53,7 @@ case "${MODE}" in
CFLAGS+=" -D__FREESTANDING__"
;;
hosted)
+ CFLAGS+=" -D__HOSTED__"
;;
*)
echo "ERROR: Unknown environment '${MODE}' (use 'freestanding' or 'hosted')" 1>&2
diff --git a/miniany/cc.c b/miniany/cc.c
index ccda806..4d26893 100644
--- a/miniany/cc.c
+++ b/miniany/cc.c
@@ -17,8 +17,8 @@ enum {
S_INT = 10,
S_IDENT = 20,
S_NUM = 30,
- S_ERR = 40,
- S_EOI = 41
+ S_EOI = 98,
+ S_ERR = 99
};
struct Scanner {
@@ -340,15 +340,16 @@ enum {
A_SUBTRACT,
A_MULTIPLY,
A_DIVIDE,
- A_ASSIGN
+ A_ASSIGN,
+ A_ERR = 99
};
struct ASTnode {
- int op;
- struct ASTnode *left;
- struct ASTnode *right;
- int intval; /* for A_INT_LITERAL */
- struct Symbol *sym; /* for A_IDENT */
+ int op; /* node operation, one of the A_ enums */
+ struct ASTnode *left; /* left side operand */
+ struct ASTnode *right; /* right side operand (or empty for unary operations */
+ int intval; /* value in case of A_INT_LITERAL */
+ struct Symbol *sym; /* symbol in case of A_IDENT */
};
struct ASTnode *createASTnode( int op, struct ASTnode *left, struct ASTnode *right, int intval, struct Symbol *sym )
@@ -391,7 +392,7 @@ void freeASTnode( struct ASTnode *node )
free( (char *)node );
}
-/* parser/compiler, no forward definitions */
+/* parser/compiler, no forward declarations */
struct Parser {
int token;
@@ -399,6 +400,14 @@ struct Parser {
struct Scope *global_scope;
};
+struct Generator {
+ struct Scanner *scanner;
+ char **regName; /* names of the registers */
+ int *regFree; /* free registers */
+ int spillReg; /* index of the current spilled register */
+ int debug; /* adds debug output to generated assembly */
+};
+
struct Compiler {
struct Parser *parser;
struct Generator *generator;
@@ -416,14 +425,6 @@ enum {
NOREG = -1
};
-struct Generator {
- struct Scanner *scanner;
- char **regName;
- int *regFree;
- int spillReg;
- int debug;
-};
-
struct Generator *createGenerator( struct Scanner *scanner )
{
struct Generator *generator;
@@ -650,8 +651,10 @@ int genMul( struct Generator *generator, int leftreg, int rightreg )
if( leftreg == EAX ) {
reg = genBinary( "mul", generator, leftreg, rightreg, 1 );
} else {
- genSaveReg( generator, EAX );
- if( leftreg != EDX ) {
+ if( !generator->regFree[EAX] ) {
+ genSaveReg( generator, EAX );
+ }
+ if( leftreg != EDX && !generator->regFree[EDX] ) {
genSaveReg( generator, EDX );
}
if( rightreg == EAX ) {
@@ -672,10 +675,12 @@ int genMul( struct Generator *generator, int leftreg, int rightreg )
putreg( generator, leftreg );
putstring( ", eax" );
putnl( );
- if( leftreg != EDX ) {
+ if( leftreg != EDX && !generator->regFree[EDX] ) {
genRestoreReg( generator, EDX );
}
- genRestoreReg( generator, EAX );
+ if( !generator->regFree[EAX] ) {
+ genRestoreReg( generator, EAX );
+ }
reg = leftreg;
}
@@ -689,8 +694,12 @@ int genDiv( struct Generator *generator, int leftreg, int rightreg )
if( leftreg == EAX ) {
reg = genBinary( "div", generator, leftreg, rightreg, 1 );
} else {
- genSaveReg( generator, EAX );
- genSaveReg( generator, EDX );
+ if( !generator->regFree[EAX] ) {
+ genSaveReg( generator, EAX );
+ }
+ if( !generator->regFree[EDX] ) {
+ genSaveReg( generator, EDX );
+ }
if( rightreg == EAX ) {
genSaveReg( generator, EBX );
putstring( "mov ebx, eax" );
@@ -711,8 +720,12 @@ int genDiv( struct Generator *generator, int leftreg, int rightreg )
putreg( generator, leftreg );
putstring( ", eax" );
putnl( );
- genRestoreReg( generator, EDX );
- genRestoreReg( generator, EAX );
+ if( !generator->regFree[EDX] ) {
+ genRestoreReg( generator, EDX );
+ }
+ if( !generator->regFree[EAX] ) {
+ genRestoreReg( generator, EAX );
+ }
reg = leftreg;
}
@@ -823,10 +836,71 @@ void parserExpect( struct Parser *parser, int must, char *what )
}
}
-struct ASTnode *parseExpression( struct Parser *parser )
+int parserTokenToOperator( struct Parser *parser, int token )
{
- struct ASTnode *node, *left, *right;
+ int op;
+
+ op = A_ERR;
+ switch( token ) {
+ case S_STAR:
+ op = A_MULTIPLY;
+ break;
+
+ case S_SLASH:
+ op = A_DIVIDE;
+ break;
+
+ case S_PLUS:
+ op = A_ADD;
+ break;
+
+ case S_MINUS:
+ op = A_SUBTRACT;
+ break;
+
+ default:
+ scannerPrintErrorHeader( parser->scanner );
+ putstring( "unknown operator token for token " );
+ putint( token );
+ putnl( );
+ exit( EXIT_FAILURE );
+ }
+
+ return op;
+}
+
+int parserOperatorPrecedence( struct Parser *parser, int operator )
+{
+ int precedence;
+
+ precedence = 0;
+ switch( operator ) {
+ case A_MULTIPLY:
+ case A_DIVIDE:
+ precedence = 2;
+ break;
+
+ case A_ADD:
+ case A_SUBTRACT:
+ precedence = 1;
+ break;
+
+ default:
+ scannerPrintErrorHeader( parser->scanner );
+ putstring( "unknown operator precedence for operator " );
+ putint( operator );
+ putnl( );
+ exit( EXIT_FAILURE );
+ }
+
+ return precedence;
+}
+
+struct ASTnode *parseExpression( struct Parser *parser, int level )
+{
+ struct ASTnode *left, *right;
struct Symbol *sym;
+ int op;
if( parser->token == S_EOI ) {
scannerPrintErrorHeader( parser->scanner );
@@ -846,7 +920,7 @@ struct ASTnode *parseExpression( struct Parser *parser )
putstring( "' in expression" );
putnl( );
exit( EXIT_FAILURE );
- }
+ }
left = createASTleafSym( A_IDENT, sym );
} else {
left = NULL;
@@ -859,35 +933,22 @@ struct ASTnode *parseExpression( struct Parser *parser )
}
parser->token = getToken( parser->scanner );
- if( parser->token == S_PLUS ) {
- parser->token = getToken( parser->scanner );
- right = parseExpression( parser );
- node = createASTbinary( A_ADD, left, right );
- } else if( parser->token == S_MINUS ) {
- parser->token = getToken( parser->scanner );
- right = parseExpression( parser );
- node = createASTbinary( A_SUBTRACT, left, right );
- } else if( parser->token == S_STAR ) {
- parser->token = getToken( parser->scanner );
- right = parseExpression( parser );
- node = createASTbinary( A_MULTIPLY, left, right );
- } else if( parser->token == S_SLASH ) {
+ if( parser->token == S_EOI || parser->token == S_SEMICOLON ) {
+ return left;
+ }
+
+ op = parserTokenToOperator( parser, parser->token );
+ while( parserOperatorPrecedence( parser, op ) > level ) {
parser->token = getToken( parser->scanner );
- right = parseExpression( parser );
- node = createASTbinary( A_DIVIDE, left, right );
- } else if( parser->token == S_EOI || parser->token == S_SEMICOLON ) {
- node = left;
- } else {
- node = NULL;
- scannerPrintErrorHeader( parser->scanner );
- putstring( "unexpected token '" );
- putint( parser->token );
- putstring( "' in expression" );
- putnl( );
- exit( EXIT_FAILURE );
+ right = parseExpression( parser, parserOperatorPrecedence( parser, op ) );
+ left = createASTbinary( op, left, right );
+ if( parser->token == S_EOI || parser->token == S_SEMICOLON ) {
+ return left;
+ }
+ op = parserTokenToOperator( parser, parser->token );
}
- return node;
+ return left;
}
void parseDeclaration( struct Compiler *compiler )
@@ -932,7 +993,7 @@ void parseAssignment( struct Compiler *compiler )
}
right = createASTleafSym( A_LVIDENT, sym );
parserExpect( parser, S_EQUALS, "=" );
- left = parseExpression( parser );
+ left = parseExpression( parser, 0 );
parserExpect( parser, S_SEMICOLON, ";" );
node = createASTbinary( A_ASSIGN, left, right );
@@ -989,8 +1050,8 @@ int main( int argc, char **argv )
struct Compiler *compiler;
compiler = createCompiler( );
-/* compiler->parser->scanner->debug = 1; */
-/* compiler->generator->debug = 1; */
+ /* compiler->parser->scanner->debug = 1; */
+ /* compiler->generator->debug = 1; */
genPrologue( compiler );
compiler->parser->token = getToken( compiler->parser->scanner );
while( compiler->parser->token != S_EOI ) {
diff --git a/miniany/test1.c b/miniany/test1.c
index db266c0..4eeffed 100644
--- a/miniany/test1.c
+++ b/miniany/test1.c
@@ -3,5 +3,5 @@
int i;
int j;
-i = 12+25/5-2*3; // another comment
-j = i/3+3*4; // another assignment
+i = 12+25/5-2*3; // 25/5 -> 5, 12+5 -> 17, 2*3 -> 6, 17-6 -> 11
+j = i/3+3*4; // 11 / 3 -> 3, 3*4 -> 12, 3+12 -> 15
diff --git a/miniany/test1.disasmust b/miniany/test1.disasmust
new file mode 100644
index 0000000..6f51f14
--- /dev/null
+++ b/miniany/test1.disasmust
@@ -0,0 +1,36 @@
+01000000 B80C000000 mov eax,0xc
+01000005 BB19000000 mov ebx,0x19
+0100000A B905000000 mov ecx,0x5
+0100000F 50 push eax
+01000010 89D8 mov eax,ebx
+01000012 BA00000000 mov edx,0x0
+01000017 F7F1 div ecx
+01000019 89C3 mov ebx,eax
+0100001B 58 pop eax
+0100001C 01D8 add eax,ebx
+0100001E BB02000000 mov ebx,0x2
+01000023 B903000000 mov ecx,0x3
+01000028 50 push eax
+01000029 89D8 mov eax,ebx
+0100002B F7E1 mul ecx
+0100002D 89C3 mov ebx,eax
+0100002F 58 pop eax
+01000030 29D8 sub eax,ebx
+01000032 A361000001 mov [0x1000061],eax
+01000037 A161000001 mov eax,[0x1000061]
+0100003C BB03000000 mov ebx,0x3
+01000041 F7F3 div ebx
+01000043 BB03000000 mov ebx,0x3
+01000048 B904000000 mov ecx,0x4
+0100004D 50 push eax
+0100004E 89D8 mov eax,ebx
+01000050 F7E1 mul ecx
+01000052 89C3 mov ebx,eax
+01000054 58 pop eax
+01000055 01D8 add eax,ebx
+01000057 A35D000001 mov [0x100005d],eax
+0100005C F4 hlt
+0100005D 0000 add [eax],al
+0100005F 0000 add [eax],al
+01000061 0000 add [eax],al
+01000063 0000 add [eax],al
diff --git a/miniany/test1.hexmust b/miniany/test1.hexmust
index 1f67e53..3e2a922 100644
--- a/miniany/test1.hexmust
+++ b/miniany/test1.hexmust
@@ -1,8 +1,8 @@
-00000000 b8 0c 00 00 00 bb 19 00 00 00 b9 05 00 00 00 ba |................|
-00000010 02 00 00 00 be 03 00 00 00 50 89 d0 f7 e6 89 c2 |.........P......|
-00000020 58 29 d1 50 52 89 d8 ba 00 00 00 00 f7 f1 89 c3 |X).PR...........|
-00000030 5a 58 01 d8 a3 65 00 00 01 a1 65 00 00 01 bb 03 |ZX...e....e.....|
-00000040 00 00 00 b9 03 00 00 00 ba 04 00 00 00 50 52 89 |.............PR.|
-00000050 c8 f7 e2 89 c1 5a 58 01 cb f7 f3 a3 61 00 00 01 |.....ZX.....a...|
-00000060 f4 00 00 00 00 00 00 00 00 |.........|
-00000069
+00000000 b8 0c 00 00 00 bb 19 00 00 00 b9 05 00 00 00 50 |...............P|
+00000010 89 d8 ba 00 00 00 00 f7 f1 89 c3 58 01 d8 bb 02 |...........X....|
+00000020 00 00 00 b9 03 00 00 00 50 89 d8 f7 e1 89 c3 58 |........P......X|
+00000030 29 d8 a3 61 00 00 01 a1 61 00 00 01 bb 03 00 00 |)..a....a.......|
+00000040 00 f7 f3 bb 03 00 00 00 b9 04 00 00 00 50 89 d8 |.............P..|
+00000050 f7 e1 89 c3 58 01 d8 a3 5d 00 00 01 f4 00 00 00 |....X...].......|
+00000060 00 00 00 00 00 |.....|
+00000065