From 1a741c1c0168e8b2383a62100709c625a5a8961f Mon Sep 17 00:00:00 2001 From: Andreas Baumann Date: Sun, 18 Jul 2021 19:16:08 +0200 Subject: another test with c4 and a minic compiler --- miniany/README | 18 ++ miniany/REQUIREMENTS | 31 +++ miniany/_start-stub.c | 8 + miniany/build.sh | 82 +++++++ miniany/c4.c | 556 ++++++++++++++++++++++++++++++++++++++++++++ miniany/cc.c | 39 ++++ miniany/libc-freestanding.c | 145 ++++++++++++ miniany/libc-hosted.c | 7 + 8 files changed, 886 insertions(+) create mode 100644 miniany/README create mode 100644 miniany/REQUIREMENTS create mode 100644 miniany/_start-stub.c create mode 100755 miniany/build.sh create mode 100644 miniany/c4.c create mode 100644 miniany/cc.c create mode 100644 miniany/libc-freestanding.c create mode 100644 miniany/libc-hosted.c diff --git a/miniany/README b/miniany/README new file mode 100644 index 0000000..77c7726 --- /dev/null +++ b/miniany/README @@ -0,0 +1,18 @@ +Building +-------- + +./build.sh cc tcc hosted d +./build.sh cc tcc freestanding d + +Acknoledgments +-------------- + +c4 - C in four functions +minimalistic C compiler running on an emulator, inspiration for this +project +https://github.com/rswier/c4.git + +selfie +C* self-hosting C compiler (also emulator, hypervisor) for RISCV, +inspiration for what makes up a minimal C language, +http://selfie.cs.uni-salzburg.at/ diff --git a/miniany/REQUIREMENTS b/miniany/REQUIREMENTS new file mode 100644 index 0000000..0352078 --- /dev/null +++ b/miniany/REQUIREMENTS @@ -0,0 +1,31 @@ +implementing: + +- userland + - argument passing to main function (argc, argv) +- libc + - print_char + - requires a 3 parameter syscall to 80h (Linux) + - requires + - inline assembly + +not implementing: +- libc + - printf + - format string only, as replacement for puts + - vararg required in compiler + - puts + - requires stdout, which is a FILE structure + - print_char + - requires a 3 parameter syscall to 80h (Linux) + - requires + - either inline assembly + - linker and calling convention +- preprocessor + - have a cat building up the required modules instead + - needs file operations (at least open, close, read) + - needs a file system on the host and the destination + (alternative: have a tape-like file system) +- linker + - have compilation units needs a linker do build + an executable + diff --git a/miniany/_start-stub.c b/miniany/_start-stub.c new file mode 100644 index 0000000..1ae3978 --- /dev/null +++ b/miniany/_start-stub.c @@ -0,0 +1,8 @@ +/* + * _start stub for tcc in freestanding mode + */ + +int _start( int argc, char **argv ) +{ + return main( argc, argv ); +} diff --git a/miniany/build.sh b/miniany/build.sh new file mode 100755 index 0000000..0db0d13 --- /dev/null +++ b/miniany/build.sh @@ -0,0 +1,82 @@ +#!/bin/bash + +BINARY=${1:-cc} +COMPILER=${2:-gcc} +MODE=${3:-freestanding} +LEVEL=${4:-d} + +DEBUG=0 + +declare -a MODULES +MODULES+=("libc-${MODE}.c") +MODULES+=("$BINARY.c") + +case "${COMPILER}" in + gcc) + CFLAGS="-m32 -march=i386 -Wall -pedantic -Werror -std=c89 -x c" + ;; + clang) + CFLAGS="-m32 -march=i386 -Werror -Wall -pedantic -std=c89 -x c" + ;; + pcc) + CFLAGS="-march=i386 -Werror -Wall -std=c89 -x c" + ;; + tcc) + CFLAGS="-m32 -march=i386 -Werror -Wall -std=c89" + ;; + *) + echo "ERROR: Unknown compiler '${COMPILER}' (use gcc, clang, pcc or tcc)" 1>&2 + exit 1 + ;; +esac + +case "${LEVEL}" in + 0|1|2|3) + CFLAGS+=" -O${LEVEL}" + ;; + d) + CFLAGS+=" -g -O0" + DEBUG=1 + ;; + *) + echo "ERROR: Unknown compilation level '${LEVEL}' (use one of 0123 for -O, or d for -O0 and debugging)" 1>&2 + exit 1 + ;; +esac + +case "${MODE}" in + freestanding|hosted) + ;; + *) + echo "ERROR: Unknown environment '${MODE}' (use 'freestanding' or 'hosted')" 1>&2 + exit 1 + ;; +esac + +case "${COMPILER}:${MODE}" in + gcc:freestanding) + CFLAGS+=" -ffreestanding -fno-stack-protector -nostdlib -emain -fno-omit-frame-pointer" + ;; + clang:freestanding) + CFLAGS+=" -ffreestanding -fno-stack-protector -nostdlib -Wl,-emain -fno-omit-frame-pointer" + ;; + pcc:freestanding) + CFLAGS+=" -ffreestanding -nostdlib -Wl,-emain" + ;; + tcc:freestanding) + CFLAGS+=" -fno-bultin -nostdlib" + MODULES+=("_start-stub.c") + ;; + *:hosted) + #~ CFLAGS+=" -lbsd" + ;; +esac + +echo "${COMPILER} ${CFLAGS} -o ${BINARY} ${MODULES[@]}" +if [ "${DEBUG}" = 1 ]; then + cat ${MODULES[@]} > "${BINARY}_tmp.c" + ${COMPILER} ${CFLAGS} -o ${BINARY} "${BINARY}_tmp.c" +else + cat ${MODULES[@]} | ${COMPILER} ${CFLAGS} -o ${BINARY} - +fi + diff --git a/miniany/c4.c b/miniany/c4.c new file mode 100644 index 0000000..692ed1f --- /dev/null +++ b/miniany/c4.c @@ -0,0 +1,556 @@ +// c4.c - C in four functions + +// char, int, and pointer types +// if, while, return, and expression statements +// just enough features to allow self-compilation and a bit more + +// Written by Robert Swierczek + +#include +#include +#include +#include +#include +//#define int long long + +char *p, *lp, // current position in source code + *data; // data/bss pointer + +int *e, *le, // current position in emitted code + *id, // currently parsed identifier + *sym, // symbol table (simple list of identifiers) + tk, // current token + ival, // current token value + ty, // current expression type + loc, // local variable offset + line, // current line number + src, // print source and assembly flag + debug; // print executed instructions + +// tokens and classes (operators last and in precedence order) +enum { + Num = 128, Fun, Sys, Glo, Loc, Id, + Char, Else, Enum, If, Int, Void, Return, Sizeof, While, + Assign, Cond, Lor, Lan, Or, Xor, And, Eq, Ne, Lt, Gt, Le, Ge, Shl, Shr, Add, Sub, Mul, Div, Mod, Inc, Dec, Brak +}; + +// opcodes +enum { LEA ,IMM ,JMP ,JSR ,BZ ,BNZ ,ENT ,ADJ ,LEV ,LI ,LC ,SI ,SC ,PSH , + OR ,XOR ,AND ,EQ ,NE ,LT ,GT ,LE ,GE ,SHL ,SHR ,ADD ,SUB ,MUL ,DIV ,MOD , + OPEN,READ,CLOS,PRTF,PUTS,GETC,PUTC,MALC,FREE,MSET,MCMP,EXIT }; + +// types +enum { CHAR, INT, PTR }; + +// identifier offsets (since we can't create an ident struct) +enum { Tk, Hash, Name, Class, Type, Val, HClass, HType, HVal, Idsz }; + +void next() +{ + char *pp; + + while (tk = *p) { + ++p; + if (tk == '\n') { + if (src) { + printf("%d: %.*s", line, p - lp, lp); + lp = p; + while (le < e) { + printf("%8.4s", &"LEA ,IMM ,JMP ,JSR ,BZ ,BNZ ,ENT ,ADJ ,LEV ,LI ,LC ,SI ,SC ,PSH ," + "OR ,XOR ,AND ,EQ ,NE ,LT ,GT ,LE ,GE ,SHL ,SHR ,ADD ,SUB ,MUL ,DIV ,MOD ," + "OPEN,READ,CLOS,PRTF,PUTS,GETC,PUTC,MALC,FREE,MSET,MCMP,EXIT,"[*++le * 5]); + if (*le <= ADJ) printf(" %d\n", *++le); else printf("\n"); + } + } + ++line; + } + else if (tk == '#') { + while (*p != 0 && *p != '\n') ++p; + } + else if ((tk >= 'a' && tk <= 'z') || (tk >= 'A' && tk <= 'Z') || tk == '_') { + pp = p - 1; + while ((*p >= 'a' && *p <= 'z') || (*p >= 'A' && *p <= 'Z') || (*p >= '0' && *p <= '9') || *p == '_') + tk = tk * 147 + *p++; + tk = (tk << 6) + (p - pp); + id = sym; + while (id[Tk]) { + if (tk == id[Hash] && !memcmp((char *)id[Name], pp, p - pp)) { tk = id[Tk]; return; } + id = id + Idsz; + } + id[Name] = (int)pp; + id[Hash] = tk; + tk = id[Tk] = Id; + return; + } + else if (tk >= '0' && tk <= '9') { + if (ival = tk - '0') { while (*p >= '0' && *p <= '9') ival = ival * 10 + *p++ - '0'; } + else if (*p == 'x' || *p == 'X') { + while ((tk = *++p) && ((tk >= '0' && tk <= '9') || (tk >= 'a' && tk <= 'f') || (tk >= 'A' && tk <= 'F'))) + ival = ival * 16 + (tk & 15) + (tk >= 'A' ? 9 : 0); + } + else { while (*p >= '0' && *p <= '7') ival = ival * 8 + *p++ - '0'; } + tk = Num; + return; + } + else if (tk == '/') { + if (*p == '/') { + ++p; + while (*p != 0 && *p != '\n') ++p; + } + else if (*p != 0 && *p == '*') { + ++p; + while (*p != 0 && *p != '/') { + if (*p == '\n') line++; + if (*p == '*'); + ++p; + } + ++p; + } + else { + tk = Div; + return; + } + } + else if (tk == '\'' || tk == '"') { + pp = data; + while (*p != 0 && *p != tk) { + if ((ival = *p++) == '\\') { + if ((ival = *p++) == 'n') ival = '\n'; + } + if (tk == '"') *data++ = ival; + } + ++p; + if (tk == '"') ival = (int)pp; else tk = Num; + return; + } + else if (tk == '=') { if (*p == '=') { ++p; tk = Eq; } else tk = Assign; return; } + else if (tk == '+') { if (*p == '+') { ++p; tk = Inc; } else tk = Add; return; } + else if (tk == '-') { + if (*p == '-') { + ++p; tk = Dec; + } + else if (*p >= '0' && *p <= '9') { + if (ival = *p - '0') { p++; while (*p >= '0' && *p <= '9') ival = ival * 10 + *p++ - '0'; } + ival = -ival; + tk = Num; + return; + } + else tk = Sub; return; + } + else if (tk == '!') { if (*p == '=') { ++p; tk = Ne; } return; } + else if (tk == '<') { if (*p == '=') { ++p; tk = Le; } else if (*p == '<') { ++p; tk = Shl; } else tk = Lt; return; } + else if (tk == '>') { if (*p == '=') { ++p; tk = Ge; } else if (*p == '>') { ++p; tk = Shr; } else tk = Gt; return; } + else if (tk == '|') { if (*p == '|') { ++p; tk = Lor; } else tk = Or; return; } + else if (tk == '&') { if (*p == '&') { ++p; tk = Lan; } else tk = And; return; } + else if (tk == '^') { tk = Xor; return; } + else if (tk == '%') { tk = Mod; return; } + else if (tk == '*') { tk = Mul; return; } + else if (tk == '[') { tk = Brak; return; } + else if (tk == '?') { tk = Cond; return; } + else if (tk == '~' || tk == ';' || tk == '{' || tk == '}' || tk == '(' || tk == ')' || tk == ']' || tk == ',' || tk == ':') return; + } +} + +void expr(int lev) +{ + int t, *d; + + if (!tk) { printf("%d: unexpected eof in expression\n", line); exit(-1); } + else if (tk == Num) { *++e = IMM; *++e = ival; next(); ty = INT; } + else if (tk == '"') { + *++e = IMM; *++e = ival; next(); + while (tk == '"') next(); + data = (char *)((int)data + sizeof(int) & -sizeof(int)); ty = PTR; + } + else if (tk == Sizeof) { + next(); if (tk == '(') next(); else { printf("%d: open paren expected in sizeof\n", line); exit(-1); } + ty = INT; if (tk == Int) next(); else if (tk == Char) { next(); ty = CHAR; } + while (tk == Mul) { next(); ty = ty + PTR; } + if (tk == ')') next(); else { printf("%d: close paren expected in sizeof\n", line); exit(-1); } + *++e = IMM; *++e = (ty == CHAR) ? sizeof(char) : sizeof(int); + ty = INT; + } + else if (tk == Id) { + d = id; next(); + if (tk == '(') { + next(); + t = 0; + while (tk != ')') { expr(Assign); *++e = PSH; ++t; if (tk == ',') next(); } + next(); + if (d[Class] == Sys) *++e = d[Val]; + else if (d[Class] == Fun) { *++e = JSR; *++e = d[Val]; } + else { printf("%d: bad function call\n", line); exit(-1); } + if (t) { *++e = ADJ; *++e = t; } + ty = d[Type]; + } + else if (d[Class] == Num) { *++e = IMM; *++e = d[Val]; ty = INT; } + else { + if (d[Class] == Loc) { *++e = LEA; *++e = loc - d[Val]; } + else if (d[Class] == Glo) { *++e = IMM; *++e = d[Val]; } + else { printf("%d: undefined variable\n", line); exit(-1); } + *++e = ((ty = d[Type]) == CHAR) ? LC : LI; + } + } + else if (tk == '(') { + next(); + if (tk == Int || tk == Char) { + t = (tk == Int) ? INT : CHAR; next(); + while (tk == Mul) { next(); t = t + PTR; } + if (tk == ')') next(); else { printf("%d: bad cast\n", line); exit(-1); } + expr(Inc); + ty = t; + } + else { + expr(Assign); + if (tk == ')') next(); else { printf("%d: close paren expected\n", line); exit(-1); } + } + } + else if (tk == Mul) { + next(); expr(Inc); + if (ty > INT) ty = ty - PTR; else { printf("%d: bad dereference\n", line); exit(-1); } + *++e = (ty == CHAR) ? LC : LI; + } + else if (tk == And) { + next(); expr(Inc); + if (*e == LC || *e == LI) --e; else { printf("%d: bad address-of\n", line); exit(-1); } + ty = ty + PTR; + } + else if (tk == '!') { next(); expr(Inc); *++e = PSH; *++e = IMM; *++e = 0; *++e = EQ; ty = INT; } + else if (tk == '~') { next(); expr(Inc); *++e = PSH; *++e = IMM; *++e = -1; *++e = XOR; ty = INT; } + else if (tk == Add) { next(); expr(Inc); ty = INT; } + else if (tk == Sub) { + next(); *++e = IMM; + if (tk == Num) { *++e = -ival; next(); } else { *++e = -1; *++e = PSH; expr(Inc); *++e = MUL; } + ty = INT; + } + else if (tk == Inc || tk == Dec) { + t = tk; next(); expr(Inc); + if (*e == LC) { *e = PSH; *++e = LC; } + else if (*e == LI) { *e = PSH; *++e = LI; } + else { printf("%d: bad lvalue in pre-increment\n", line); exit(-1); } + *++e = PSH; + *++e = IMM; *++e = (ty > PTR) ? sizeof(int) : sizeof(char); + *++e = (t == Inc) ? ADD : SUB; + *++e = (ty == CHAR) ? SC : SI; + } + else { printf("%d: bad expression\n", line); exit(-1); } + + while (tk >= lev) { // "precedence climbing" or "Top Down Operator Precedence" method + t = ty; + if (tk == Assign) { + next(); + if (*e == LC || *e == LI) *e = PSH; else { printf("%d: bad lvalue in assignment\n", line); exit(-1); } + expr(Assign); *++e = ((ty = t) == CHAR) ? SC : SI; + } + else if (tk == Cond) { + next(); + *++e = BZ; d = ++e; + expr(Assign); + if (tk == ':') next(); else { printf("%d: conditional missing colon\n", line); exit(-1); } + *d = (int)(e + 3); *++e = JMP; d = ++e; + expr(Cond); + *d = (int)(e + 1); + } + else if (tk == Lor) { next(); *++e = BNZ; d = ++e; expr(Lan); *d = (int)(e + 1); ty = INT; } + else if (tk == Lan) { next(); *++e = BZ; d = ++e; expr(Or); *d = (int)(e + 1); ty = INT; } + else if (tk == Or) { next(); *++e = PSH; expr(Xor); *++e = OR; ty = INT; } + else if (tk == Xor) { next(); *++e = PSH; expr(And); *++e = XOR; ty = INT; } + else if (tk == And) { next(); *++e = PSH; expr(Eq); *++e = AND; ty = INT; } + else if (tk == Eq) { next(); *++e = PSH; expr(Lt); *++e = EQ; ty = INT; } + else if (tk == Ne) { next(); *++e = PSH; expr(Lt); *++e = NE; ty = INT; } + else if (tk == Lt) { next(); *++e = PSH; expr(Shl); *++e = LT; ty = INT; } + else if (tk == Gt) { next(); *++e = PSH; expr(Shl); *++e = GT; ty = INT; } + else if (tk == Le) { next(); *++e = PSH; expr(Shl); *++e = LE; ty = INT; } + else if (tk == Ge) { next(); *++e = PSH; expr(Shl); *++e = GE; ty = INT; } + else if (tk == Shl) { next(); *++e = PSH; expr(Add); *++e = SHL; ty = INT; } + else if (tk == Shr) { next(); *++e = PSH; expr(Add); *++e = SHR; ty = INT; } + else if (tk == Add) { + next(); *++e = PSH; expr(Mul); + if ((ty = t) > PTR) { *++e = PSH; *++e = IMM; *++e = sizeof(int); *++e = MUL; } + *++e = ADD; + } + else if (tk == Sub) { + next(); *++e = PSH; expr(Mul); + if (t > PTR && t == ty) { *++e = SUB; *++e = PSH; *++e = IMM; *++e = sizeof(int); *++e = DIV; ty = INT; } + else if ((ty = t) > PTR) { *++e = PSH; *++e = IMM; *++e = sizeof(int); *++e = MUL; *++e = SUB; } + else *++e = SUB; + } + else if (tk == Mul) { next(); *++e = PSH; expr(Inc); *++e = MUL; ty = INT; } + else if (tk == Div) { next(); *++e = PSH; expr(Inc); *++e = DIV; ty = INT; } + else if (tk == Mod) { next(); *++e = PSH; expr(Inc); *++e = MOD; ty = INT; } + else if (tk == Inc || tk == Dec) { + if (*e == LC) { *e = PSH; *++e = LC; } + else if (*e == LI) { *e = PSH; *++e = LI; } + else { printf("%d: bad lvalue in post-increment\n", line); exit(-1); } + *++e = PSH; *++e = IMM; *++e = (ty > PTR) ? sizeof(int) : sizeof(char); + *++e = (tk == Inc) ? ADD : SUB; + *++e = (ty == CHAR) ? SC : SI; + *++e = PSH; *++e = IMM; *++e = (ty > PTR) ? sizeof(int) : sizeof(char); + *++e = (tk == Inc) ? SUB : ADD; + next(); + } + else if (tk == Brak) { + next(); *++e = PSH; expr(Assign); + if (tk == ']') next(); else { printf("%d: close bracket expected\n", line); exit(-1); } + if (t > PTR) { *++e = PSH; *++e = IMM; *++e = sizeof(int); *++e = MUL; } + else if (t < PTR) { printf("%d: pointer type expected\n", line); exit(-1); } + *++e = ADD; + *++e = ((ty = t - PTR) == CHAR) ? LC : LI; + } + else { printf("%d: compiler error tk=%d\n", line, tk); exit(-1); } + } +} + +void stmt() +{ + int *a, *b; + + if (tk == If) { + next(); + if (tk == '(') next(); else { printf("%d: open paren expected\n", line); exit(-1); } + expr(Assign); + if (tk == ')') next(); else { printf("%d: close paren expected\n", line); exit(-1); } + *++e = BZ; b = ++e; + stmt(); + if (tk == Else) { + *b = (int)(e + 3); *++e = JMP; b = ++e; + next(); + stmt(); + } + *b = (int)(e + 1); + } + else if (tk == While) { + next(); + a = e + 1; + if (tk == '(') next(); else { printf("%d: open paren expected\n", line); exit(-1); } + expr(Assign); + if (tk == ')') next(); else { printf("%d: close paren expected\n", line); exit(-1); } + *++e = BZ; b = ++e; + stmt(); + *++e = JMP; *++e = (int)a; + *b = (int)(e + 1); + } + else if (tk == Return) { + next(); + if (tk != ';') expr(Assign); + *++e = LEV; + if (tk == ';') next(); else { printf("%d: semicolon expected\n", line); exit(-1); } + } + else if (tk == '{') { + next(); + while (tk != '}') stmt(); + next(); + } + else if (tk == ';') { + next(); + } + else { + expr(Assign); + if (tk == ';') next(); else { printf("%d: semicolon expected\n", line); exit(-1); } + } +} + +int main(int argc, char **argv) +{ + int fd, bt, ty, poolsz, *idmain; + int *pc, *sp, *bp, a, cycle; // vm registers + int i, *t; // temps + + --argc; ++argv; + if (argc > 0 && **argv == '-' && (*argv)[1] == 's') { src = 1; --argc; ++argv; } + if (argc > 0 && **argv == '-' && (*argv)[1] == 'd') { debug = 1; --argc; ++argv; } + if (argc < 1) { printf("usage: c4 [-s] [-d] file ...\n"); return -1; } + + if ((fd = open(*argv, 0)) < 0) { printf("could not open(%s)\n", *argv); return -1; } + + poolsz = 256*1024; // arbitrary size + if (!(sym = malloc(poolsz))) { printf("could not malloc(%d) symbol area\n", poolsz); return -1; } + if (!(le = e = malloc(poolsz))) { printf("could not malloc(%d) text area\n", poolsz); return -1; } + if (!(data = malloc(poolsz))) { printf("could not malloc(%d) data area\n", poolsz); return -1; } + if (!(sp = malloc(poolsz))) { printf("could not malloc(%d) stack area\n", poolsz); return -1; } + + memset(sym, 0, poolsz); + memset(e, 0, poolsz); + memset(data, 0, poolsz); + + p = "char else enum if int void return sizeof while " + "EOF EXIT_SUCCESS " + "open read close printf puts getchar putchar malloc free memset memcmp exit void main"; + i = Char; while (i <= While) { next(); id[Tk] = i++; } // add keywords to symbol table + // add library constants + next(); id[Class] = Num; id[Type] = INT; id[Val] = -1; + next(); id[Class] = Num; id[Type] = INT; id[Val] = 0; + i = OPEN; while (i <= EXIT) { next(); id[Class] = Sys; id[Type] = INT; id[Val] = i++; } // add library to symbol table + next(); id[Tk] = Char; // handle void type + next(); idmain = id; // keep track of main + + if (!(lp = p = malloc(poolsz))) { printf("could not malloc(%d) source area\n", poolsz); return -1; } + if ((i = read(fd, p, poolsz-1)) <= 0) { printf("read() returned %d\n", i); return -1; } + p[i] = 0; + close(fd); + + // parse declarations + line = 1; + next(); + while (tk) { + bt = INT; // basetype + if (tk == Int) next(); + else if (tk == Char) { next(); bt = CHAR; } + else if (tk == Enum) { + next(); + if (tk != '{') next(); + if (tk == '{') { + next(); + i = 0; + while (tk != '}') { + if (tk != Id) { printf("%d: bad enum identifier %d\n", line, tk); return -1; } + next(); + if (tk == Assign) { + next(); + if (tk != Num) { printf("%d: bad enum initializer\n", line); return -1; } + i = ival; + next(); + } + id[Class] = Num; id[Type] = INT; id[Val] = i++; + if (tk == ',') next(); + } + next(); + } + } + while (tk != ';' && tk != '}') { + ty = bt; + while (tk == Mul) { next(); ty = ty + PTR; } + if (tk != Id) { printf("%d: bad global declaration\n", line); return -1; } + if (id[Class]) { printf("%d: duplicate global definition\n", line); return -1; } + next(); + id[Type] = ty; + if (tk == '(') { // function + id[Class] = Fun; + id[Val] = (int)(e + 1); + next(); i = 0; + while (tk != ')') { + ty = INT; + if (tk == Int) next(); + else if (tk == Char) { next(); ty = CHAR; } + else if (tk == Void) { printf("here\n"); } + while (tk == Mul) { next(); ty = ty + PTR; } + if (tk != Id) { printf("%d: bad parameter declaration\n", line); return -1; } + if (id[Class] == Loc) { printf("%d: duplicate parameter definition\n", line); return -1; } + id[HClass] = id[Class]; id[Class] = Loc; + id[HType] = id[Type]; id[Type] = ty; + id[HVal] = id[Val]; id[Val] = i++; + next(); + if (tk == ',') next(); + } + next(); + if (tk != '{') { printf("%d: bad function definition\n", line); return -1; } + loc = ++i; + next(); + while (tk == Int || tk == Char) { + bt = (tk == Int) ? INT : CHAR; + next(); + while (tk != ';') { + ty = bt; + while (tk == Mul) { next(); ty = ty + PTR; } + if (tk != Id) { printf("%d: bad local declaration\n", line); return -1; } + if (id[Class] == Loc) { printf("%d: duplicate local definition\n", line); return -1; } + id[HClass] = id[Class]; id[Class] = Loc; + id[HType] = id[Type]; id[Type] = ty; + id[HVal] = id[Val]; id[Val] = ++i; + next(); + if (tk == ',') next(); + } + next(); + } + *++e = ENT; *++e = i - loc; + while (tk != '}') stmt(); + *++e = LEV; + id = sym; // unwind symbol table locals + while (id[Tk]) { + if (id[Class] == Loc) { + id[Class] = id[HClass]; + id[Type] = id[HType]; + id[Val] = id[HVal]; + } + id = id + Idsz; + } + } + else { + id[Class] = Glo; + id[Val] = (int)data; + data = data + sizeof(int); + } + if (tk == ',') next(); + } + next(); + } + + if (!(pc = (int *)idmain[Val])) { printf("main() not defined\n"); return -1; } + if (src) return 0; + + // setup stack + bp = sp = (int *)((int)sp + poolsz); + *--sp = EXIT; // call exit if main returns + *--sp = PSH; t = sp; + *--sp = argc; + *--sp = (int)argv; + *--sp = (int)t; + + // run... + cycle = 0; + while (1) { + i = *pc++; ++cycle; + if (debug) { + printf("%d> %.4s", cycle, + &"LEA ,IMM ,JMP ,JSR ,BZ ,BNZ ,ENT ,ADJ ,LEV ,LI ,LC ,SI ,SC ,PSH ," + "OR ,XOR ,AND ,EQ ,NE ,LT ,GT ,LE ,GE ,SHL ,SHR ,ADD ,SUB ,MUL ,DIV ,MOD ," + "OPEN,READ,CLOS,PRTF,PUTS,GETC,PUTC,MALC,FREE,MSET,MCMP,EXIT,"[i * 5]); + if (i <= ADJ) printf(" %d\n", *pc); else printf("\n"); + } + if (i == LEA) a = (int)(bp + *pc++); // load local address + else if (i == IMM) a = *pc++; // load global address or immediate + else if (i == JMP) pc = (int *)*pc; // jump + else if (i == JSR) { *--sp = (int)(pc + 1); pc = (int *)*pc; } // jump to subroutine + else if (i == BZ) pc = a ? pc + 1 : (int *)*pc; // branch if zero + else if (i == BNZ) pc = a ? (int *)*pc : pc + 1; // branch if not zero + else if (i == ENT) { *--sp = (int)bp; bp = sp; sp = sp - *pc++; } // enter subroutine + else if (i == ADJ) sp = sp + *pc++; // stack adjust + else if (i == LEV) { sp = bp; bp = (int *)*sp++; pc = (int *)*sp++; } // leave subroutine + else if (i == LI) a = *(int *)a; // load int + else if (i == LC) a = *(char *)a; // load char + else if (i == SI) *(int *)*sp++ = a; // store int + else if (i == SC) a = *(char *)*sp++ = a; // store char + else if (i == PSH) *--sp = a; // push + + else if (i == OR) a = *sp++ | a; + else if (i == XOR) a = *sp++ ^ a; + else if (i == AND) a = *sp++ & a; + else if (i == EQ) a = *sp++ == a; + else if (i == NE) a = *sp++ != a; + else if (i == LT) a = *sp++ < a; + else if (i == GT) a = *sp++ > a; + else if (i == LE) a = *sp++ <= a; + else if (i == GE) a = *sp++ >= a; + else if (i == SHL) a = *sp++ << a; + else if (i == SHR) a = *sp++ >> a; + else if (i == ADD) a = *sp++ + a; + else if (i == SUB) a = *sp++ - a; + else if (i == MUL) a = *sp++ * a; + else if (i == DIV) a = *sp++ / a; + else if (i == MOD) a = *sp++ % a; + + else if (i == OPEN) a = open((char *)sp[1], *sp); + else if (i == READ) a = read(sp[2], (char *)sp[1], *sp); + else if (i == CLOS) a = close(*sp); + else if (i == PRTF) { t = sp + pc[1]; a = printf((char *)t[-1], t[-2], t[-3], t[-4], t[-5], t[-6]); } + else if (i == PUTS) { t = sp + pc[1]; a = printf("%s\n", (char *)t[-1]); } + else if (i == GETC) a = getchar(); + else if (i == PUTC) putchar(*sp); + else if (i == MALC) a = (int)malloc(*sp); + else if (i == FREE) free((void *)*sp); + else if (i == MSET) a = (int)memset((char *)sp[2], sp[1], *sp); + else if (i == MCMP) a = memcmp((char *)sp[2], (char *)sp[1], *sp); + else if (i == EXIT) { printf("exit(%d) cycle = %d\n", *sp, cycle); return *sp; } + else { printf("unknown instruction = %d! cycle = %d\n", i, cycle); return -1; } + } +} diff --git a/miniany/cc.c b/miniany/cc.c new file mode 100644 index 0000000..df89ff7 --- /dev/null +++ b/miniany/cc.c @@ -0,0 +1,39 @@ +int col; +int row; + +int getChar( ) +{ + int c; + + c = getchar( ); + if( c == EOF ) { + return c; + } + col++; + if( c == '\n' ) { + col = 1; + row++; + putchar( '$' ); + } + return c; +} + +int main( int argc, char **argv ) +{ + int c; + + col = 1; + row = 1; + + puts( "Hello CC" ); + + c = getChar( ); + while( c != EOF ) { + putchar( c ); + c = getChar( ); + } + + exit( EXIT_SUCCESS ); + + return EXIT_SUCCESS; +} diff --git a/miniany/libc-freestanding.c b/miniany/libc-freestanding.c new file mode 100644 index 0000000..b4e48f8 --- /dev/null +++ b/miniany/libc-freestanding.c @@ -0,0 +1,145 @@ +/* + * minimal freestanding C library + * + * works for IA-32 and Linux, uses old INT 80h software interrupts + * for system calls. + * + */ + +int strlen( char *s ) +{ + char *p; + + p = s; + while( *p != '\0' ) { + p++; + } + + return p - s; +} + +enum { + EXIT_SUCCESS = 0, + EXIT_FAILURE = 1 +}; + +enum { + SYSCALL_EXIT = 1, + SYSCALL_READ = 3, + SYSCALL_WRITE = 4 +}; + +int errno; + +static __attribute__((noinline)) int syscall1( int id, int arg0 ) +{ + int retval; + + __asm__ volatile( "\ + push %%ebx\n\ + mov %1, %%eax\n\ + mov %2, %%ebx\n\ + int $0x80\n\ + mov %%eax, %0\n\ + pop %%ebx\n" : "=m"( retval ) : "m"( id ), "m"( arg0 ) + : "eax", "ebx" ); + + if( retval < 0 ) { + errno = -retval; + return -1; + } + + return retval; +} + +static __attribute__((noinline)) int syscall3( int id, int arg0, int arg1, int arg2 ) +{ + int retval; + + __asm__ volatile( "\ + push %%ebx\n\ + push %%ecx\n\ + push %%edx\n\ + mov %1, %%eax\n\ + mov %2, %%ebx\n\ + mov %3, %%ecx\n\ + mov %4, %%edx\n\ + int $0x80\n\ + mov %%eax, %0\n\ + pop %%edx\n\ + pop %%ecx\n\ + pop %%ebx\n" : "=m"( retval ) : "m"( id ), "m"( arg0 ), "m"( arg1 ), "m"( arg2 ) + : "eax", "ebx", "ecx", "edx" ); + + if( retval < 0 ) { + errno = -retval; + return -1; + } + + return retval; +} + +enum { + STDIN_FILENO = 0, + STDOUT_FILENO = 1, + STDERR_FILENO = 2 +}; + +static void print_char( int fd, int c ) +{ + char s[1]; + s[0] = c; + syscall3( SYSCALL_WRITE, fd, (int)s, 1 ); +} + +static void print_string( char *s ) +{ + syscall3( SYSCALL_WRITE, STDOUT_FILENO, (int)s, strlen( s ) ); +} + +static void print_newline( void ) +{ + print_char( STDOUT_FILENO, '\n' ); +} + +static int read_string( int fd, char *buf, int size ) +{ + return syscall3( SYSCALL_READ, fd, (int)buf, size ); +} + +enum { + EOF = -1 +}; + +int puts( char *s ) +{ + print_string( s ); + print_newline( ); + + return 1; +} + +int getchar( ) +{ + int res; + char buf[1]; + + res = read_string( STDIN_FILENO, buf, 1 ); + if( res == 0 ) { + return EOF; + } + + return buf[0]; +} + +int putchar( int c ) +{ + print_char( STDOUT_FILENO, c ); + + return c; +} + +void exit( int status ) +{ + syscall1( SYSCALL_EXIT, status ); +} diff --git a/miniany/libc-hosted.c b/miniany/libc-hosted.c new file mode 100644 index 0000000..8cb4d46 --- /dev/null +++ b/miniany/libc-hosted.c @@ -0,0 +1,7 @@ +/* + * include files for C library of the host. Currently only tested + * with glibc 2.31. + */ + +#include +#include -- cgit v1.2.3-54-g00ecf