summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndreas Baumann <mail@andreasbaumann.cc>2021-08-19 16:57:07 +0000
committerAndreas Baumann <mail@andreasbaumann.cc>2021-08-19 16:57:07 +0000
commit3f553969451504d020ff90644edd348ffbcd2907 (patch)
tree8fbd10ff4e0dc51ad7a8ef41a54d1d5799190a6c
parentbfda462bca5db1693b2fa1c2845c8f4242d33782 (diff)
downloadcompilertests-3f553969451504d020ff90644edd348ffbcd2907.tar.gz
compilertests-3f553969451504d020ff90644edd348ffbcd2907.tar.bz2
c4: also merged in structures
-rw-r--r--miniany/c4.c258
1 files changed, 182 insertions, 76 deletions
diff --git a/miniany/c4.c b/miniany/c4.c
index 990c527..a20715c 100644
--- a/miniany/c4.c
+++ b/miniany/c4.c
@@ -1,6 +1,6 @@
// c4.c - C in four functions
-// char, int, and pointer types
+// char, int, structs, and pointer types
// if, while, do, return, switch and expression statements
// just enough features to allow self-compilation and a bit more
@@ -21,8 +21,8 @@ int *e, *le, // current position in emitted code
*cas, // case statement patch-up pointer
*brak, // break statement patch-up pointer
*def, // default statement patch-up pointer
- *id, // currently parsed identifier
- *sym, // symbol table (simple list of identifiers)
+ *tsize, // array (indexed by type) of type sizes
+ tnew, // next available type
tk, // current token
ival, // current token value
ty, // current expression type
@@ -31,11 +31,33 @@ int *e, *le, // current position in emitted code
src, // print source and assembly flag
debug; // print executed instructions
+// identifier
+struct ident_s {
+ int tk;
+ int hash;
+ char *name;
+ int class;
+ int type;
+ int val;
+ int stype;
+ int hclass;
+ int htype;
+ int hval;
+} *id, // currently parsed identifier
+ *sym; // symbol table (simple list of identifiers)
+
+struct member_s {
+ struct ident_s *id;
+ int offset;
+ int type;
+ struct member_s *next;
+} **members; // array (indexed by type) of struct member lists
+
// tokens and classes (operators last and in precedence order)
enum {
Num = 128, Fun, Sys, Glo, Loc, Id,
- Break, Case, Char, Default, Else, Enum, If, Int, Return, Sizeof, Do, Switch, While,
- Assign, Cond, Lor, Lan, Or, Xor, And, Eq, Ne, Lt, Gt, Le, Ge, Shl, Shr, Add, Sub, Mul, Div, Mod, Inc, Dec, Brak
+ Break, Case, Char, Default, Else, Enum, If, Int, Return, Sizeof, Do, Struct, Switch, While,
+ Assign, Cond, Lor, Lan, Or, Xor, And, Eq, Ne, Lt, Gt, Le, Ge, Shl, Shr, Add, Sub, Mul, Div, Mod, Inc, Dec, Dot, Arrow, Brak
};
// opcodes
@@ -44,10 +66,7 @@ enum { LEA ,IMM ,JMP ,JSR ,BZ ,BNZ ,ENT ,ADJ ,LEV ,LI ,LC ,SI ,SC ,PSH ,
OPEN,READ,CLOS,PRTF,MALC,FREE,MSET,MCMP,GETC,PUTS,PUTN,PUTC,PUTI,ISPC,IDGT,IANU,IALP,SCMP,EXIT };
// types
-enum { CHAR, INT, PTR };
-
-// identifier offsets (since we can't create an ident struct)
-enum { Tk, Hash, Name, Class, Type, Val, HClass, HType, HVal, Idsz };
+enum { CHAR, INT, PTR = 256, PTR2 = 512 };
void next()
{
@@ -90,13 +109,13 @@ void next()
tk = tk * 147 + *p++;
tk = (tk << 6) + (p - pp);
id = sym;
- while (id[Tk]) {
- if (tk == id[Hash] && !memcmp((char *)id[Name], pp, p - pp)) { tk = id[Tk]; return; }
- id = id + Idsz;
+ while (id->tk) {
+ if (tk == id->hash && !memcmp(id->name, pp, p - pp)) { tk = id->tk; return; }
+ id = id + 1;
}
- id[Name] = (int)pp;
- id[Hash] = tk;
- tk = id[Tk] = Id;
+ id->name = pp;
+ id->hash = tk;
+ tk = id->tk = Id;
return;
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
@@ -145,7 +164,7 @@ void next()
return;
case '=': if (*p == '=') { ++p; tk = Eq; } else tk = Assign; return;
case '+': if (*p == '+') { ++p; tk = Inc; } else tk = Add; return;
- case '-': if (*p == '-') { ++p; tk = Dec; } else tk = Sub; return;
+ case '-': if (*p == '-') { ++p; tk = Dec; } else if (*p == '>') { ++p; tk = Arrow; } else tk = Sub; return;
case '!': if (*p == '=') { ++p; tk = Ne; } return;
case '<': if (*p == '=') { ++p; tk = Le; } else if (*p == '<') { ++p; tk = Shl; } else tk = Lt; return;
case '>': if (*p == '=') { ++p; tk = Ge; } else if (*p == '>') { ++p; tk = Shr; } else tk = Gt; return;
@@ -156,6 +175,7 @@ void next()
case '*': tk = Mul; return;
case '[': tk = Brak; return;
case '?': tk = Cond; return;
+ case '.': tk = Dot; return;
case '~': case ';': case '{': case '}':
case '(': case ')': case ']': case ',':
case ':':
@@ -166,7 +186,9 @@ void next()
void expr(int lev)
{
- int t, *d;
+ int t, *b, sz;
+ struct ident_s *d;
+ struct member_s *m;
switch (tk) {
case 0: printf("%d: unexpected eof in expression\n", line); exit(-1);
@@ -179,9 +201,10 @@ void expr(int lev)
case Sizeof:
next(); if (tk == '(') next(); else { printf("%d: open paren expected in sizeof\n", line); exit(-1); }
ty = INT; if (tk == Int) next(); else if (tk == Char) { next(); ty = CHAR; }
+ else if (tk == Struct) { next(); if (tk != Id) { printf("%d: bad struct type\n", line); exit(-1); } ty = id->stype; next(); }
while (tk == Mul) { next(); ty = ty + PTR; }
if (tk == ')') next(); else { printf("%d: close paren expected in sizeof\n", line); exit(-1); }
- *++e = IMM; *++e = (ty == CHAR) ? sizeof(char) : sizeof(int);
+ *++e = IMM; *++e = ty >= PTR ? sizeof(int) : tsize[ty];
ty = INT;
break;
case Id:
@@ -191,24 +214,25 @@ void expr(int lev)
t = 0;
while (tk != ')') { expr(Assign); *++e = PSH; ++t; if (tk == ',') next(); }
next();
- if (d[Class] == Sys) *++e = d[Val];
- else if (d[Class] == Fun) { *++e = JSR; *++e = d[Val]; }
+ if (d->class == Sys) *++e = d->val;
+ else if (d->class == Fun) { *++e = JSR; *++e = d->val; }
else { printf("%d: bad function call\n", line); exit(-1); }
if (t) { *++e = ADJ; *++e = t; }
- ty = d[Type];
+ ty = d->type;
}
- else if (d[Class] == Num) { *++e = IMM; *++e = d[Val]; ty = INT; }
+ else if (d->class == Num) { *++e = IMM; *++e = d->val; ty = INT; }
else {
- if (d[Class] == Loc) { *++e = LEA; *++e = loc - d[Val]; }
- else if (d[Class] == Glo) { *++e = IMM; *++e = d[Val]; }
+ if (d->class == Loc) { *++e = LEA; *++e = loc - d->val; }
+ else if (d->class == Glo) { *++e = IMM; *++e = d->val; }
else { printf("%d: undefined variable\n", line); exit(-1); }
- *++e = ((ty = d[Type]) == CHAR) ? LC : LI;
+ if ((ty = d->type) <= INT || ty >= PTR) *++e = (ty == CHAR) ? LC : LI;
}
break;
case '(':
next();
- if (tk == Int || tk == Char) {
- t = (tk == Int) ? INT : CHAR; next();
+ if (tk == Int || tk == Char || tk == Struct) {
+ if (tk == Int) { next(); t = INT; } else if (tk == Char) { next(); t = CHAR; }
+ else { next(); if (tk != Id) { printf("%d: bad struct type\n", line); exit(-1); } t = id->stype; next(); }
while (tk == Mul) { next(); t = t + PTR; }
if (tk == ')') next(); else { printf("%d: bad cast\n", line); exit(-1); }
expr(Inc);
@@ -222,7 +246,7 @@ void expr(int lev)
case Mul:
next(); expr(Inc);
if (ty > INT) ty = ty - PTR; else { printf("%d: bad dereference\n", line); exit(-1); }
- *++e = (ty == CHAR) ? LC : LI;
+ if (ty <= INT || ty >= PTR) *++e = (ty == CHAR) ? LC : LI;
break;
case And:
next(); expr(Inc);
@@ -244,7 +268,7 @@ void expr(int lev)
else if (*e == LI) { *e = PSH; *++e = LI; }
else { printf("%d: bad lvalue in pre-increment\n", line); exit(-1); }
*++e = PSH;
- *++e = IMM; *++e = (ty > PTR) ? sizeof(int) : sizeof(char);
+ *++e = IMM; *++e = ty >= PTR2 ? sizeof(int) : (ty >= PTR) ? tsize[ty - PTR] : 1;
*++e = (t == Inc) ? ADD : SUB;
*++e = (ty == CHAR) ? SC : SI;
break;
@@ -261,15 +285,15 @@ void expr(int lev)
break;
case Cond:
next();
- *++e = BZ; d = ++e;
+ *++e = BZ; b = ++e;
expr(Assign);
if (tk == ':') next(); else { printf("%d: conditional missing colon\n", line); exit(-1); }
- *d = (int)(e + 3); *++e = JMP; d = ++e;
+ *b = (int)(e + 3); *++e = JMP; b = ++e;
expr(Cond);
- *d = (int)(e + 1);
+ *b = (int)(e + 1);
break;
- case Lor: next(); *++e = BNZ; d = ++e; expr(Lan); *d = (int)(e + 1); ty = INT; break;
- case Lan: next(); *++e = BZ; d = ++e; expr(Or); *d = (int)(e + 1); ty = INT; break;
+ case Lor: next(); *++e = BNZ; b = ++e; expr(Lan); *b = (int)(e + 1); ty = INT; break;
+ case Lan: next(); *++e = BZ; b = ++e; expr(Or); *b = (int)(e + 1); ty = INT; break;
case Or: next(); *++e = PSH; expr(Xor); *++e = OR; ty = INT; break;
case Xor: next(); *++e = PSH; expr(And); *++e = XOR; ty = INT; break;
case And: next(); *++e = PSH; expr(Eq); *++e = AND; ty = INT; break;
@@ -283,14 +307,17 @@ void expr(int lev)
case Shr: next(); *++e = PSH; expr(Add); *++e = SHR; ty = INT; break;
case Add:
next(); *++e = PSH; expr(Mul);
- if ((ty = t) > PTR) { *++e = PSH; *++e = IMM; *++e = sizeof(int); *++e = MUL; }
+ sz = (ty = t) >= PTR2 ? sizeof(int) : ty >= PTR ? tsize[ty - PTR] : 1;
+ if (sz > 1) { *++e = PSH; *++e = IMM; *++e = sz; *++e = MUL; }
*++e = ADD;
break;
case Sub:
next(); *++e = PSH; expr(Mul);
- if (t > PTR && t == ty) { *++e = SUB; *++e = PSH; *++e = IMM; *++e = sizeof(int); *++e = DIV; ty = INT; }
- else if ((ty = t) > PTR) { *++e = PSH; *++e = IMM; *++e = sizeof(int); *++e = MUL; *++e = SUB; }
+ sz = t >= PTR2 ? sizeof(int) : t >= PTR ? tsize[t - PTR] : 1;
+ if (t == ty && sz > 1) { *++e = SUB; *++e = PSH; *++e = IMM; *++e = sz; *++e = DIV; ty = INT; }
+ else if (sz > 1) { *++e = PSH; *++e = IMM; *++e = sz; *++e = MUL; *++e = SUB; }
else *++e = SUB;
+ ty = t;
break;
case Mul: next(); *++e = PSH; expr(Inc); *++e = MUL; ty = INT; break;
case Div: next(); *++e = PSH; expr(Inc); *++e = DIV; ty = INT; break;
@@ -300,20 +327,35 @@ void expr(int lev)
if (*e == LC) { *e = PSH; *++e = LC; }
else if (*e == LI) { *e = PSH; *++e = LI; }
else { printf("%d: bad lvalue in post-increment\n", line); exit(-1); }
- *++e = PSH; *++e = IMM; *++e = (ty > PTR) ? sizeof(int) : sizeof(char);
+ sz = ty >= PTR2 ? sizeof(int) : ty >= PTR ? tsize[ty - PTR] : 1;
+ *++e = PSH; *++e = IMM; *++e = sz;
*++e = (tk == Inc) ? ADD : SUB;
*++e = (ty == CHAR) ? SC : SI;
- *++e = PSH; *++e = IMM; *++e = (ty > PTR) ? sizeof(int) : sizeof(char);
+ *++e = PSH; *++e = IMM; *++e = sz;
*++e = (tk == Inc) ? SUB : ADD;
next();
break;
+ case Dot:
+ ty = ty + PTR;
+ case Arrow:
+ if (ty <= PTR+INT || ty >= PTR2) { printf("%d: structure expected\n", line); exit(-1); }
+ next();
+ if (tk != Id) { printf("%d: structure member expected\n", line); exit(-1); }
+ m = members[ty - PTR]; while (m && m->id != id) m = m->next;
+ if (!m) { printf("%d: structure member not found\n", line); exit(-1); }
+ if (m->offset) { *++e = PSH; *++e = IMM; *++e = m->offset; *++e = ADD; }
+ ty = m->type;
+ if (ty <= INT || ty >= PTR) *++e = (ty == CHAR) ? LC : LI;
+ next();
+ break;
case Brak:
next(); *++e = PSH; expr(Assign);
if (tk == ']') next(); else { printf("%d: close bracket expected\n", line); exit(-1); }
- if (t > PTR) { *++e = PSH; *++e = IMM; *++e = sizeof(int); *++e = MUL; }
- else if (t < PTR) { printf("%d: pointer type expected\n", line); exit(-1); }
+ if (t < PTR) { printf("%d: pointer type expected\n", line); exit(-1); }
+ sz = (t = t - PTR) >= PTR ? sizeof(int) : tsize[t];
+ if (sz > 1) { *++e = PSH; *++e = IMM; *++e = sz; *++e = MUL; }
*++e = ADD;
- *++e = ((ty = t - PTR) == CHAR) ? LC : LI;
+ if ((ty = t) <= INT || ty >= PTR) *++e = (ty == CHAR) ? LC : LI;
break;
default: printf("%d: compiler error tk=%d\n", line, tk); exit(-1);
}
@@ -417,7 +459,9 @@ void stmt()
int main(int argc, char **argv)
{
- int fd, bt, ty, poolsz, *idmain;
+ int fd, bt, mbt, ty, poolsz;
+ struct ident_s *idmain, *d;
+ struct member_s *m;
int *pc, *sp, *bp, a, cycle; // vm registers
int i, *t, neg; // temps
@@ -433,22 +477,26 @@ int main(int argc, char **argv)
if (!(le = e = malloc(poolsz))) { printf("could not malloc(%d) text area\n", poolsz); return -1; }
if (!(data = malloc(poolsz))) { printf("could not malloc(%d) data area\n", poolsz); return -1; }
if (!(sp = malloc(poolsz))) { printf("could not malloc(%d) stack area\n", poolsz); return -1; }
+ if (!(tsize = malloc(PTR * sizeof(int)))) { printf("could not malloc() tsize area\n"); return -1; }
+ if (!(members = malloc(PTR * sizeof(struct member_s *)))) { printf("could not malloc() members area\n"); return -1; }
memset(sym, 0, poolsz);
memset(e, 0, poolsz);
memset(data, 0, poolsz);
-
- p = "break case char default else enum if int return sizeof do switch while "
+ memset(tsize, 0, PTR * sizeof(int));
+ memset(members, 0, PTR * sizeof(struct member_s *));
+
+ p = "break case char default else enum if int return sizeof do struct switch while "
"EOF EXIT_SUCCESS EXIT_FAILURE NULL "
"open read close printf malloc free memset memcmp getchar putstring putnl putchar putint isspace isdigit isalnum isalpha strcmp exit void main";
- i = Break; while (i <= While) { next(); id[Tk] = i++; } // add keywords to symbol table
+ i = Break; while (i <= While) { next(); id->tk = i++; } // add keywords to symbol table
// add library constants
- next(); id[Class] = Num; id[Type] = INT; id[Val] = -1;
- next(); id[Class] = Num; id[Type] = INT; id[Val] = 0;
- next(); id[Class] = Num; id[Type] = INT; id[Val] = 1;
- next(); id[Class] = Num; id[Type] = INT; id[Val] = (int)NULL;
- i = OPEN; while (i <= EXIT) { next(); id[Class] = Sys; id[Type] = INT; id[Val] = i++; } // add library to symbol table
- next(); id[Tk] = Char; // handle void type
+ next(); id->class = Num; id->type = INT; id->val = -1;
+ next(); id->class = Num; id->type = INT; id->val = 0;
+ next(); id->class = Num; id->type = INT; id->val = 1;
+ next(); id->class = Num; id->type = INT; id->val = (int)NULL;
+ i = OPEN; while (i <= EXIT) { next(); id->class = Sys; id->type = INT; id->val = i++; } // add library to symbol table
+ next(); id->tk = Char; // handle void type
next(); idmain = id; // keep track of main
if (!(lp = p = malloc(poolsz))) { printf("could not malloc(%d) source area\n", poolsz); return -1; }
@@ -456,6 +504,10 @@ int main(int argc, char **argv)
p[i] = 0;
close(fd);
+ // add primitive types
+ tsize[tnew++] = sizeof(char);
+ tsize[tnew++] = sizeof(int);
+
// parse declarations
line = 1;
next();
@@ -483,33 +535,83 @@ int main(int argc, char **argv)
}
next();
}
- id[Class] = Num; id[Type] = INT; id[Val] = i++;
+ id->class = Num; id->type = INT; id->val = i++;
if (tk == ',') next();
}
next();
}
}
+ else if (tk == Struct) {
+ next();
+ if (tk == Id) {
+ if (!id->stype) id->stype = tnew++;
+ bt = id->stype;
+ next();
+ } else {
+ bt = tnew++;
+ }
+ if (tk == '{') {
+ next();
+ if (members[bt]) { printf("%d: duplicate structure definition\n", line); return -1; }
+ i = 0;
+ while (tk != '}') {
+ mbt = INT;
+ if (tk == Int) next();
+ else if (tk == Char) { next(); mbt = CHAR; }
+ else if (tk == Struct) {
+ next();
+ if (tk != Id) { printf("%d: bad struct declaration\n", line); return -1; }
+ mbt = id->stype;
+ next();
+ }
+ while (tk != ';') {
+ ty = mbt;
+ while (tk == Mul) { next(); ty = ty + PTR; }
+ if (tk != Id) { printf("%d: bad struct member definition\n", line); return -1; }
+ m = malloc(sizeof(struct member_s));
+ m->id = id;
+ m->offset = i;
+ m->type = ty;
+ m->next = members[bt];
+ members[bt] = m;
+ i = i + (ty >= PTR ? sizeof(int) : tsize[ty]);
+ i = (i + 3) & -4;
+ next();
+ if (tk == ',') next();
+ }
+ next();
+ }
+ next();
+ tsize[bt] = i;
+ }
+ }
while (tk != ';' && tk != '}') {
ty = bt;
while (tk == Mul) { next(); ty = ty + PTR; }
if (tk != Id) { printf("%d: bad global declaration\n", line); return -1; }
- if (id[Class]) { printf("%d: duplicate global definition\n", line); return -1; }
+ if (id->class) { printf("%d: duplicate global definition\n", line); return -1; }
next();
- id[Type] = ty;
+ id->type = ty;
if (tk == '(') { // function
- id[Class] = Fun;
- id[Val] = (int)(e + 1);
+ id->class = Fun;
+ id->val = (int)(e + 1);
next(); i = 0;
while (tk != ')') {
ty = INT;
if (tk == Int) next();
else if (tk == Char) { next(); ty = CHAR; }
+ else if (tk == Struct) {
+ next();
+ if (tk != Id) { printf("%d: bad struct declaration\n", line); return -1; }
+ ty = id->stype;
+ next();
+ }
while (tk == Mul) { next(); ty = ty + PTR; }
if (tk != Id) { printf("%d: bad parameter declaration\n", line); return -1; }
- if (id[Class] == Loc) { printf("%d: duplicate parameter definition\n", line); return -1; }
- id[HClass] = id[Class]; id[Class] = Loc;
- id[HType] = id[Type]; id[Type] = ty;
- id[HVal] = id[Val]; id[Val] = i++;
+ if (id->class == Loc) { printf("%d: duplicate parameter definition\n", line); return -1; }
+ id->hclass = id->class; id->class = Loc;
+ id->htype = id->type; id->type = ty;
+ id->hval = id->val; id->val = i++;
next();
if (tk == ',') next();
}
@@ -517,17 +619,21 @@ int main(int argc, char **argv)
if (tk != '{') { printf("%d: bad function definition\n", line); return -1; }
loc = ++i;
next();
- while (tk == Int || tk == Char) {
- bt = (tk == Int) ? INT : CHAR;
+ while (tk == Int || tk == Char || tk == Struct) {
+ if (tk == Int) bt = INT; else if (tk == Char) bt = CHAR; else {
+ next();
+ if (tk != Id) { printf("%d: bad struct declaration\n", line); return -1; }
+ bt = id->stype;
+ }
next();
while (tk != ';') {
ty = bt;
while (tk == Mul) { next(); ty = ty + PTR; }
if (tk != Id) { printf("%d: bad local declaration\n", line); return -1; }
- if (id[Class] == Loc) { printf("%d: duplicate local definition\n", line); return -1; }
- id[HClass] = id[Class]; id[Class] = Loc;
- id[HType] = id[Type]; id[Type] = ty;
- id[HVal] = id[Val]; id[Val] = ++i;
+ if (id->class == Loc) { printf("%d: duplicate local definition\n", line); return -1; }
+ id->hclass = id->class; id->class = Loc;
+ id->htype = id->type; id->type = ty;
+ id->hval = id->val; id->val = ++i;
next();
if (tk == ',') next();
}
@@ -537,18 +643,18 @@ int main(int argc, char **argv)
while (tk != '}') stmt();
*++e = LEV;
id = sym; // unwind symbol table locals
- while (id[Tk]) {
- if (id[Class] == Loc) {
- id[Class] = id[HClass];
- id[Type] = id[HType];
- id[Val] = id[HVal];
+ while (id->tk) {
+ if (id->class == Loc) {
+ id->class = id->hclass;
+ id->type = id->htype;
+ id->val = id->hval;
}
- id = id + Idsz;
+ id = id + 1;
}
}
else {
- id[Class] = Glo;
- id[Val] = (int)data;
+ id->class = Glo;
+ id->val = (int)data;
data = data + sizeof(int);
}
if (tk == ',') next();
@@ -556,7 +662,7 @@ int main(int argc, char **argv)
next();
}
- if (!(pc = (int *)idmain[Val])) { printf("main() not defined\n"); return -1; }
+ if (!(pc = (int *)idmain->val)) { printf("main() not defined\n"); return -1; }
if (src) return 0;
// setup stack