From 3f553969451504d020ff90644edd348ffbcd2907 Mon Sep 17 00:00:00 2001 From: Andreas Baumann Date: Thu, 19 Aug 2021 16:57:07 +0000 Subject: c4: also merged in structures --- miniany/c4.c | 258 +++++++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 182 insertions(+), 76 deletions(-) diff --git a/miniany/c4.c b/miniany/c4.c index 990c527..a20715c 100644 --- a/miniany/c4.c +++ b/miniany/c4.c @@ -1,6 +1,6 @@ // c4.c - C in four functions -// char, int, and pointer types +// char, int, structs, and pointer types // if, while, do, return, switch and expression statements // just enough features to allow self-compilation and a bit more @@ -21,8 +21,8 @@ int *e, *le, // current position in emitted code *cas, // case statement patch-up pointer *brak, // break statement patch-up pointer *def, // default statement patch-up pointer - *id, // currently parsed identifier - *sym, // symbol table (simple list of identifiers) + *tsize, // array (indexed by type) of type sizes + tnew, // next available type tk, // current token ival, // current token value ty, // current expression type @@ -31,11 +31,33 @@ int *e, *le, // current position in emitted code src, // print source and assembly flag debug; // print executed instructions +// identifier +struct ident_s { + int tk; + int hash; + char *name; + int class; + int type; + int val; + int stype; + int hclass; + int htype; + int hval; +} *id, // currently parsed identifier + *sym; // symbol table (simple list of identifiers) + +struct member_s { + struct ident_s *id; + int offset; + int type; + struct member_s *next; +} **members; // array (indexed by type) of struct member lists + // tokens and classes (operators last and in precedence order) enum { Num = 128, Fun, Sys, Glo, Loc, Id, - Break, Case, Char, Default, Else, Enum, If, Int, Return, Sizeof, Do, Switch, While, - Assign, Cond, Lor, Lan, Or, Xor, And, Eq, Ne, Lt, Gt, Le, Ge, Shl, Shr, Add, Sub, Mul, Div, Mod, Inc, Dec, Brak + Break, Case, Char, Default, Else, Enum, If, Int, Return, Sizeof, Do, Struct, Switch, While, + Assign, Cond, Lor, Lan, Or, Xor, And, Eq, Ne, Lt, Gt, Le, Ge, Shl, Shr, Add, Sub, Mul, Div, Mod, Inc, Dec, Dot, Arrow, Brak }; // opcodes @@ -44,10 +66,7 @@ enum { LEA ,IMM ,JMP ,JSR ,BZ ,BNZ ,ENT ,ADJ ,LEV ,LI ,LC ,SI ,SC ,PSH , OPEN,READ,CLOS,PRTF,MALC,FREE,MSET,MCMP,GETC,PUTS,PUTN,PUTC,PUTI,ISPC,IDGT,IANU,IALP,SCMP,EXIT }; // types -enum { CHAR, INT, PTR }; - -// identifier offsets (since we can't create an ident struct) -enum { Tk, Hash, Name, Class, Type, Val, HClass, HType, HVal, Idsz }; +enum { CHAR, INT, PTR = 256, PTR2 = 512 }; void next() { @@ -90,13 +109,13 @@ void next() tk = tk * 147 + *p++; tk = (tk << 6) + (p - pp); id = sym; - while (id[Tk]) { - if (tk == id[Hash] && !memcmp((char *)id[Name], pp, p - pp)) { tk = id[Tk]; return; } - id = id + Idsz; + while (id->tk) { + if (tk == id->hash && !memcmp(id->name, pp, p - pp)) { tk = id->tk; return; } + id = id + 1; } - id[Name] = (int)pp; - id[Hash] = tk; - tk = id[Tk] = Id; + id->name = pp; + id->hash = tk; + tk = id->tk = Id; return; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': @@ -145,7 +164,7 @@ void next() return; case '=': if (*p == '=') { ++p; tk = Eq; } else tk = Assign; return; case '+': if (*p == '+') { ++p; tk = Inc; } else tk = Add; return; - case '-': if (*p == '-') { ++p; tk = Dec; } else tk = Sub; return; + case '-': if (*p == '-') { ++p; tk = Dec; } else if (*p == '>') { ++p; tk = Arrow; } else tk = Sub; return; case '!': if (*p == '=') { ++p; tk = Ne; } return; case '<': if (*p == '=') { ++p; tk = Le; } else if (*p == '<') { ++p; tk = Shl; } else tk = Lt; return; case '>': if (*p == '=') { ++p; tk = Ge; } else if (*p == '>') { ++p; tk = Shr; } else tk = Gt; return; @@ -156,6 +175,7 @@ void next() case '*': tk = Mul; return; case '[': tk = Brak; return; case '?': tk = Cond; return; + case '.': tk = Dot; return; case '~': case ';': case '{': case '}': case '(': case ')': case ']': case ',': case ':': @@ -166,7 +186,9 @@ void next() void expr(int lev) { - int t, *d; + int t, *b, sz; + struct ident_s *d; + struct member_s *m; switch (tk) { case 0: printf("%d: unexpected eof in expression\n", line); exit(-1); @@ -179,9 +201,10 @@ void expr(int lev) case Sizeof: next(); if (tk == '(') next(); else { printf("%d: open paren expected in sizeof\n", line); exit(-1); } ty = INT; if (tk == Int) next(); else if (tk == Char) { next(); ty = CHAR; } + else if (tk == Struct) { next(); if (tk != Id) { printf("%d: bad struct type\n", line); exit(-1); } ty = id->stype; next(); } while (tk == Mul) { next(); ty = ty + PTR; } if (tk == ')') next(); else { printf("%d: close paren expected in sizeof\n", line); exit(-1); } - *++e = IMM; *++e = (ty == CHAR) ? sizeof(char) : sizeof(int); + *++e = IMM; *++e = ty >= PTR ? sizeof(int) : tsize[ty]; ty = INT; break; case Id: @@ -191,24 +214,25 @@ void expr(int lev) t = 0; while (tk != ')') { expr(Assign); *++e = PSH; ++t; if (tk == ',') next(); } next(); - if (d[Class] == Sys) *++e = d[Val]; - else if (d[Class] == Fun) { *++e = JSR; *++e = d[Val]; } + if (d->class == Sys) *++e = d->val; + else if (d->class == Fun) { *++e = JSR; *++e = d->val; } else { printf("%d: bad function call\n", line); exit(-1); } if (t) { *++e = ADJ; *++e = t; } - ty = d[Type]; + ty = d->type; } - else if (d[Class] == Num) { *++e = IMM; *++e = d[Val]; ty = INT; } + else if (d->class == Num) { *++e = IMM; *++e = d->val; ty = INT; } else { - if (d[Class] == Loc) { *++e = LEA; *++e = loc - d[Val]; } - else if (d[Class] == Glo) { *++e = IMM; *++e = d[Val]; } + if (d->class == Loc) { *++e = LEA; *++e = loc - d->val; } + else if (d->class == Glo) { *++e = IMM; *++e = d->val; } else { printf("%d: undefined variable\n", line); exit(-1); } - *++e = ((ty = d[Type]) == CHAR) ? LC : LI; + if ((ty = d->type) <= INT || ty >= PTR) *++e = (ty == CHAR) ? LC : LI; } break; case '(': next(); - if (tk == Int || tk == Char) { - t = (tk == Int) ? INT : CHAR; next(); + if (tk == Int || tk == Char || tk == Struct) { + if (tk == Int) { next(); t = INT; } else if (tk == Char) { next(); t = CHAR; } + else { next(); if (tk != Id) { printf("%d: bad struct type\n", line); exit(-1); } t = id->stype; next(); } while (tk == Mul) { next(); t = t + PTR; } if (tk == ')') next(); else { printf("%d: bad cast\n", line); exit(-1); } expr(Inc); @@ -222,7 +246,7 @@ void expr(int lev) case Mul: next(); expr(Inc); if (ty > INT) ty = ty - PTR; else { printf("%d: bad dereference\n", line); exit(-1); } - *++e = (ty == CHAR) ? LC : LI; + if (ty <= INT || ty >= PTR) *++e = (ty == CHAR) ? LC : LI; break; case And: next(); expr(Inc); @@ -244,7 +268,7 @@ void expr(int lev) else if (*e == LI) { *e = PSH; *++e = LI; } else { printf("%d: bad lvalue in pre-increment\n", line); exit(-1); } *++e = PSH; - *++e = IMM; *++e = (ty > PTR) ? sizeof(int) : sizeof(char); + *++e = IMM; *++e = ty >= PTR2 ? sizeof(int) : (ty >= PTR) ? tsize[ty - PTR] : 1; *++e = (t == Inc) ? ADD : SUB; *++e = (ty == CHAR) ? SC : SI; break; @@ -261,15 +285,15 @@ void expr(int lev) break; case Cond: next(); - *++e = BZ; d = ++e; + *++e = BZ; b = ++e; expr(Assign); if (tk == ':') next(); else { printf("%d: conditional missing colon\n", line); exit(-1); } - *d = (int)(e + 3); *++e = JMP; d = ++e; + *b = (int)(e + 3); *++e = JMP; b = ++e; expr(Cond); - *d = (int)(e + 1); + *b = (int)(e + 1); break; - case Lor: next(); *++e = BNZ; d = ++e; expr(Lan); *d = (int)(e + 1); ty = INT; break; - case Lan: next(); *++e = BZ; d = ++e; expr(Or); *d = (int)(e + 1); ty = INT; break; + case Lor: next(); *++e = BNZ; b = ++e; expr(Lan); *b = (int)(e + 1); ty = INT; break; + case Lan: next(); *++e = BZ; b = ++e; expr(Or); *b = (int)(e + 1); ty = INT; break; case Or: next(); *++e = PSH; expr(Xor); *++e = OR; ty = INT; break; case Xor: next(); *++e = PSH; expr(And); *++e = XOR; ty = INT; break; case And: next(); *++e = PSH; expr(Eq); *++e = AND; ty = INT; break; @@ -283,14 +307,17 @@ void expr(int lev) case Shr: next(); *++e = PSH; expr(Add); *++e = SHR; ty = INT; break; case Add: next(); *++e = PSH; expr(Mul); - if ((ty = t) > PTR) { *++e = PSH; *++e = IMM; *++e = sizeof(int); *++e = MUL; } + sz = (ty = t) >= PTR2 ? sizeof(int) : ty >= PTR ? tsize[ty - PTR] : 1; + if (sz > 1) { *++e = PSH; *++e = IMM; *++e = sz; *++e = MUL; } *++e = ADD; break; case Sub: next(); *++e = PSH; expr(Mul); - if (t > PTR && t == ty) { *++e = SUB; *++e = PSH; *++e = IMM; *++e = sizeof(int); *++e = DIV; ty = INT; } - else if ((ty = t) > PTR) { *++e = PSH; *++e = IMM; *++e = sizeof(int); *++e = MUL; *++e = SUB; } + sz = t >= PTR2 ? sizeof(int) : t >= PTR ? tsize[t - PTR] : 1; + if (t == ty && sz > 1) { *++e = SUB; *++e = PSH; *++e = IMM; *++e = sz; *++e = DIV; ty = INT; } + else if (sz > 1) { *++e = PSH; *++e = IMM; *++e = sz; *++e = MUL; *++e = SUB; } else *++e = SUB; + ty = t; break; case Mul: next(); *++e = PSH; expr(Inc); *++e = MUL; ty = INT; break; case Div: next(); *++e = PSH; expr(Inc); *++e = DIV; ty = INT; break; @@ -300,20 +327,35 @@ void expr(int lev) if (*e == LC) { *e = PSH; *++e = LC; } else if (*e == LI) { *e = PSH; *++e = LI; } else { printf("%d: bad lvalue in post-increment\n", line); exit(-1); } - *++e = PSH; *++e = IMM; *++e = (ty > PTR) ? sizeof(int) : sizeof(char); + sz = ty >= PTR2 ? sizeof(int) : ty >= PTR ? tsize[ty - PTR] : 1; + *++e = PSH; *++e = IMM; *++e = sz; *++e = (tk == Inc) ? ADD : SUB; *++e = (ty == CHAR) ? SC : SI; - *++e = PSH; *++e = IMM; *++e = (ty > PTR) ? sizeof(int) : sizeof(char); + *++e = PSH; *++e = IMM; *++e = sz; *++e = (tk == Inc) ? SUB : ADD; next(); break; + case Dot: + ty = ty + PTR; + case Arrow: + if (ty <= PTR+INT || ty >= PTR2) { printf("%d: structure expected\n", line); exit(-1); } + next(); + if (tk != Id) { printf("%d: structure member expected\n", line); exit(-1); } + m = members[ty - PTR]; while (m && m->id != id) m = m->next; + if (!m) { printf("%d: structure member not found\n", line); exit(-1); } + if (m->offset) { *++e = PSH; *++e = IMM; *++e = m->offset; *++e = ADD; } + ty = m->type; + if (ty <= INT || ty >= PTR) *++e = (ty == CHAR) ? LC : LI; + next(); + break; case Brak: next(); *++e = PSH; expr(Assign); if (tk == ']') next(); else { printf("%d: close bracket expected\n", line); exit(-1); } - if (t > PTR) { *++e = PSH; *++e = IMM; *++e = sizeof(int); *++e = MUL; } - else if (t < PTR) { printf("%d: pointer type expected\n", line); exit(-1); } + if (t < PTR) { printf("%d: pointer type expected\n", line); exit(-1); } + sz = (t = t - PTR) >= PTR ? sizeof(int) : tsize[t]; + if (sz > 1) { *++e = PSH; *++e = IMM; *++e = sz; *++e = MUL; } *++e = ADD; - *++e = ((ty = t - PTR) == CHAR) ? LC : LI; + if ((ty = t) <= INT || ty >= PTR) *++e = (ty == CHAR) ? LC : LI; break; default: printf("%d: compiler error tk=%d\n", line, tk); exit(-1); } @@ -417,7 +459,9 @@ void stmt() int main(int argc, char **argv) { - int fd, bt, ty, poolsz, *idmain; + int fd, bt, mbt, ty, poolsz; + struct ident_s *idmain, *d; + struct member_s *m; int *pc, *sp, *bp, a, cycle; // vm registers int i, *t, neg; // temps @@ -433,22 +477,26 @@ int main(int argc, char **argv) if (!(le = e = malloc(poolsz))) { printf("could not malloc(%d) text area\n", poolsz); return -1; } if (!(data = malloc(poolsz))) { printf("could not malloc(%d) data area\n", poolsz); return -1; } if (!(sp = malloc(poolsz))) { printf("could not malloc(%d) stack area\n", poolsz); return -1; } + if (!(tsize = malloc(PTR * sizeof(int)))) { printf("could not malloc() tsize area\n"); return -1; } + if (!(members = malloc(PTR * sizeof(struct member_s *)))) { printf("could not malloc() members area\n"); return -1; } memset(sym, 0, poolsz); memset(e, 0, poolsz); memset(data, 0, poolsz); - - p = "break case char default else enum if int return sizeof do switch while " + memset(tsize, 0, PTR * sizeof(int)); + memset(members, 0, PTR * sizeof(struct member_s *)); + + p = "break case char default else enum if int return sizeof do struct switch while " "EOF EXIT_SUCCESS EXIT_FAILURE NULL " "open read close printf malloc free memset memcmp getchar putstring putnl putchar putint isspace isdigit isalnum isalpha strcmp exit void main"; - i = Break; while (i <= While) { next(); id[Tk] = i++; } // add keywords to symbol table + i = Break; while (i <= While) { next(); id->tk = i++; } // add keywords to symbol table // add library constants - next(); id[Class] = Num; id[Type] = INT; id[Val] = -1; - next(); id[Class] = Num; id[Type] = INT; id[Val] = 0; - next(); id[Class] = Num; id[Type] = INT; id[Val] = 1; - next(); id[Class] = Num; id[Type] = INT; id[Val] = (int)NULL; - i = OPEN; while (i <= EXIT) { next(); id[Class] = Sys; id[Type] = INT; id[Val] = i++; } // add library to symbol table - next(); id[Tk] = Char; // handle void type + next(); id->class = Num; id->type = INT; id->val = -1; + next(); id->class = Num; id->type = INT; id->val = 0; + next(); id->class = Num; id->type = INT; id->val = 1; + next(); id->class = Num; id->type = INT; id->val = (int)NULL; + i = OPEN; while (i <= EXIT) { next(); id->class = Sys; id->type = INT; id->val = i++; } // add library to symbol table + next(); id->tk = Char; // handle void type next(); idmain = id; // keep track of main if (!(lp = p = malloc(poolsz))) { printf("could not malloc(%d) source area\n", poolsz); return -1; } @@ -456,6 +504,10 @@ int main(int argc, char **argv) p[i] = 0; close(fd); + // add primitive types + tsize[tnew++] = sizeof(char); + tsize[tnew++] = sizeof(int); + // parse declarations line = 1; next(); @@ -483,33 +535,83 @@ int main(int argc, char **argv) } next(); } - id[Class] = Num; id[Type] = INT; id[Val] = i++; + id->class = Num; id->type = INT; id->val = i++; if (tk == ',') next(); } next(); } } + else if (tk == Struct) { + next(); + if (tk == Id) { + if (!id->stype) id->stype = tnew++; + bt = id->stype; + next(); + } else { + bt = tnew++; + } + if (tk == '{') { + next(); + if (members[bt]) { printf("%d: duplicate structure definition\n", line); return -1; } + i = 0; + while (tk != '}') { + mbt = INT; + if (tk == Int) next(); + else if (tk == Char) { next(); mbt = CHAR; } + else if (tk == Struct) { + next(); + if (tk != Id) { printf("%d: bad struct declaration\n", line); return -1; } + mbt = id->stype; + next(); + } + while (tk != ';') { + ty = mbt; + while (tk == Mul) { next(); ty = ty + PTR; } + if (tk != Id) { printf("%d: bad struct member definition\n", line); return -1; } + m = malloc(sizeof(struct member_s)); + m->id = id; + m->offset = i; + m->type = ty; + m->next = members[bt]; + members[bt] = m; + i = i + (ty >= PTR ? sizeof(int) : tsize[ty]); + i = (i + 3) & -4; + next(); + if (tk == ',') next(); + } + next(); + } + next(); + tsize[bt] = i; + } + } while (tk != ';' && tk != '}') { ty = bt; while (tk == Mul) { next(); ty = ty + PTR; } if (tk != Id) { printf("%d: bad global declaration\n", line); return -1; } - if (id[Class]) { printf("%d: duplicate global definition\n", line); return -1; } + if (id->class) { printf("%d: duplicate global definition\n", line); return -1; } next(); - id[Type] = ty; + id->type = ty; if (tk == '(') { // function - id[Class] = Fun; - id[Val] = (int)(e + 1); + id->class = Fun; + id->val = (int)(e + 1); next(); i = 0; while (tk != ')') { ty = INT; if (tk == Int) next(); else if (tk == Char) { next(); ty = CHAR; } + else if (tk == Struct) { + next(); + if (tk != Id) { printf("%d: bad struct declaration\n", line); return -1; } + ty = id->stype; + next(); + } while (tk == Mul) { next(); ty = ty + PTR; } if (tk != Id) { printf("%d: bad parameter declaration\n", line); return -1; } - if (id[Class] == Loc) { printf("%d: duplicate parameter definition\n", line); return -1; } - id[HClass] = id[Class]; id[Class] = Loc; - id[HType] = id[Type]; id[Type] = ty; - id[HVal] = id[Val]; id[Val] = i++; + if (id->class == Loc) { printf("%d: duplicate parameter definition\n", line); return -1; } + id->hclass = id->class; id->class = Loc; + id->htype = id->type; id->type = ty; + id->hval = id->val; id->val = i++; next(); if (tk == ',') next(); } @@ -517,17 +619,21 @@ int main(int argc, char **argv) if (tk != '{') { printf("%d: bad function definition\n", line); return -1; } loc = ++i; next(); - while (tk == Int || tk == Char) { - bt = (tk == Int) ? INT : CHAR; + while (tk == Int || tk == Char || tk == Struct) { + if (tk == Int) bt = INT; else if (tk == Char) bt = CHAR; else { + next(); + if (tk != Id) { printf("%d: bad struct declaration\n", line); return -1; } + bt = id->stype; + } next(); while (tk != ';') { ty = bt; while (tk == Mul) { next(); ty = ty + PTR; } if (tk != Id) { printf("%d: bad local declaration\n", line); return -1; } - if (id[Class] == Loc) { printf("%d: duplicate local definition\n", line); return -1; } - id[HClass] = id[Class]; id[Class] = Loc; - id[HType] = id[Type]; id[Type] = ty; - id[HVal] = id[Val]; id[Val] = ++i; + if (id->class == Loc) { printf("%d: duplicate local definition\n", line); return -1; } + id->hclass = id->class; id->class = Loc; + id->htype = id->type; id->type = ty; + id->hval = id->val; id->val = ++i; next(); if (tk == ',') next(); } @@ -537,18 +643,18 @@ int main(int argc, char **argv) while (tk != '}') stmt(); *++e = LEV; id = sym; // unwind symbol table locals - while (id[Tk]) { - if (id[Class] == Loc) { - id[Class] = id[HClass]; - id[Type] = id[HType]; - id[Val] = id[HVal]; + while (id->tk) { + if (id->class == Loc) { + id->class = id->hclass; + id->type = id->htype; + id->val = id->hval; } - id = id + Idsz; + id = id + 1; } } else { - id[Class] = Glo; - id[Val] = (int)data; + id->class = Glo; + id->val = (int)data; data = data + sizeof(int); } if (tk == ',') next(); @@ -556,7 +662,7 @@ int main(int argc, char **argv) next(); } - if (!(pc = (int *)idmain[Val])) { printf("main() not defined\n"); return -1; } + if (!(pc = (int *)idmain->val)) { printf("main() not defined\n"); return -1; } if (src) return 0; // setup stack -- cgit v1.2.3-54-g00ecf