summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndreas Baumann <mail@andreasbaumann.cc>2021-07-18 19:16:08 +0200
committerAndreas Baumann <mail@andreasbaumann.cc>2021-07-18 19:16:08 +0200
commit1a741c1c0168e8b2383a62100709c625a5a8961f (patch)
tree9b62cecba87e6ba8725cb1cadbb6d505d2880399
parent6c6fe33b224dc3342d8d8fc5fc2d27777eb55b89 (diff)
downloadcompilertests-1a741c1c0168e8b2383a62100709c625a5a8961f.tar.gz
compilertests-1a741c1c0168e8b2383a62100709c625a5a8961f.tar.bz2
another test with c4 and a minic compiler
-rw-r--r--miniany/README18
-rw-r--r--miniany/REQUIREMENTS31
-rw-r--r--miniany/_start-stub.c8
-rwxr-xr-xminiany/build.sh82
-rw-r--r--miniany/c4.c556
-rw-r--r--miniany/cc.c39
-rw-r--r--miniany/libc-freestanding.c145
-rw-r--r--miniany/libc-hosted.c7
8 files changed, 886 insertions, 0 deletions
diff --git a/miniany/README b/miniany/README
new file mode 100644
index 0000000..77c7726
--- /dev/null
+++ b/miniany/README
@@ -0,0 +1,18 @@
+Building
+--------
+
+./build.sh cc tcc hosted d
+./build.sh cc tcc freestanding d
+
+Acknoledgments
+--------------
+
+c4 - C in four functions
+minimalistic C compiler running on an emulator, inspiration for this
+project
+https://github.com/rswier/c4.git
+
+selfie
+C* self-hosting C compiler (also emulator, hypervisor) for RISCV,
+inspiration for what makes up a minimal C language,
+http://selfie.cs.uni-salzburg.at/
diff --git a/miniany/REQUIREMENTS b/miniany/REQUIREMENTS
new file mode 100644
index 0000000..0352078
--- /dev/null
+++ b/miniany/REQUIREMENTS
@@ -0,0 +1,31 @@
+implementing:
+
+- userland
+ - argument passing to main function (argc, argv)
+- libc
+ - print_char
+ - requires a 3 parameter syscall to 80h (Linux)
+ - requires
+ - inline assembly
+
+not implementing:
+- libc
+ - printf
+ - format string only, as replacement for puts
+ - vararg required in compiler
+ - puts
+ - requires stdout, which is a FILE structure
+ - print_char
+ - requires a 3 parameter syscall to 80h (Linux)
+ - requires
+ - either inline assembly
+ - linker and calling convention
+- preprocessor
+ - have a cat building up the required modules instead
+ - needs file operations (at least open, close, read)
+ - needs a file system on the host and the destination
+ (alternative: have a tape-like file system)
+- linker
+ - have compilation units needs a linker do build
+ an executable
+
diff --git a/miniany/_start-stub.c b/miniany/_start-stub.c
new file mode 100644
index 0000000..1ae3978
--- /dev/null
+++ b/miniany/_start-stub.c
@@ -0,0 +1,8 @@
+/*
+ * _start stub for tcc in freestanding mode
+ */
+
+int _start( int argc, char **argv )
+{
+ return main( argc, argv );
+}
diff --git a/miniany/build.sh b/miniany/build.sh
new file mode 100755
index 0000000..0db0d13
--- /dev/null
+++ b/miniany/build.sh
@@ -0,0 +1,82 @@
+#!/bin/bash
+
+BINARY=${1:-cc}
+COMPILER=${2:-gcc}
+MODE=${3:-freestanding}
+LEVEL=${4:-d}
+
+DEBUG=0
+
+declare -a MODULES
+MODULES+=("libc-${MODE}.c")
+MODULES+=("$BINARY.c")
+
+case "${COMPILER}" in
+ gcc)
+ CFLAGS="-m32 -march=i386 -Wall -pedantic -Werror -std=c89 -x c"
+ ;;
+ clang)
+ CFLAGS="-m32 -march=i386 -Werror -Wall -pedantic -std=c89 -x c"
+ ;;
+ pcc)
+ CFLAGS="-march=i386 -Werror -Wall -std=c89 -x c"
+ ;;
+ tcc)
+ CFLAGS="-m32 -march=i386 -Werror -Wall -std=c89"
+ ;;
+ *)
+ echo "ERROR: Unknown compiler '${COMPILER}' (use gcc, clang, pcc or tcc)" 1>&2
+ exit 1
+ ;;
+esac
+
+case "${LEVEL}" in
+ 0|1|2|3)
+ CFLAGS+=" -O${LEVEL}"
+ ;;
+ d)
+ CFLAGS+=" -g -O0"
+ DEBUG=1
+ ;;
+ *)
+ echo "ERROR: Unknown compilation level '${LEVEL}' (use one of 0123 for -O<n>, or d for -O0 and debugging)" 1>&2
+ exit 1
+ ;;
+esac
+
+case "${MODE}" in
+ freestanding|hosted)
+ ;;
+ *)
+ echo "ERROR: Unknown environment '${MODE}' (use 'freestanding' or 'hosted')" 1>&2
+ exit 1
+ ;;
+esac
+
+case "${COMPILER}:${MODE}" in
+ gcc:freestanding)
+ CFLAGS+=" -ffreestanding -fno-stack-protector -nostdlib -emain -fno-omit-frame-pointer"
+ ;;
+ clang:freestanding)
+ CFLAGS+=" -ffreestanding -fno-stack-protector -nostdlib -Wl,-emain -fno-omit-frame-pointer"
+ ;;
+ pcc:freestanding)
+ CFLAGS+=" -ffreestanding -nostdlib -Wl,-emain"
+ ;;
+ tcc:freestanding)
+ CFLAGS+=" -fno-bultin -nostdlib"
+ MODULES+=("_start-stub.c")
+ ;;
+ *:hosted)
+ #~ CFLAGS+=" -lbsd"
+ ;;
+esac
+
+echo "${COMPILER} ${CFLAGS} -o ${BINARY} ${MODULES[@]}"
+if [ "${DEBUG}" = 1 ]; then
+ cat ${MODULES[@]} > "${BINARY}_tmp.c"
+ ${COMPILER} ${CFLAGS} -o ${BINARY} "${BINARY}_tmp.c"
+else
+ cat ${MODULES[@]} | ${COMPILER} ${CFLAGS} -o ${BINARY} -
+fi
+
diff --git a/miniany/c4.c b/miniany/c4.c
new file mode 100644
index 0000000..692ed1f
--- /dev/null
+++ b/miniany/c4.c
@@ -0,0 +1,556 @@
+// c4.c - C in four functions
+
+// char, int, and pointer types
+// if, while, return, and expression statements
+// just enough features to allow self-compilation and a bit more
+
+// Written by Robert Swierczek
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <memory.h>
+#include <unistd.h>
+#include <fcntl.h>
+//#define int long long
+
+char *p, *lp, // current position in source code
+ *data; // data/bss pointer
+
+int *e, *le, // current position in emitted code
+ *id, // currently parsed identifier
+ *sym, // symbol table (simple list of identifiers)
+ tk, // current token
+ ival, // current token value
+ ty, // current expression type
+ loc, // local variable offset
+ line, // current line number
+ src, // print source and assembly flag
+ debug; // print executed instructions
+
+// tokens and classes (operators last and in precedence order)
+enum {
+ Num = 128, Fun, Sys, Glo, Loc, Id,
+ Char, Else, Enum, If, Int, Void, Return, Sizeof, While,
+ Assign, Cond, Lor, Lan, Or, Xor, And, Eq, Ne, Lt, Gt, Le, Ge, Shl, Shr, Add, Sub, Mul, Div, Mod, Inc, Dec, Brak
+};
+
+// opcodes
+enum { LEA ,IMM ,JMP ,JSR ,BZ ,BNZ ,ENT ,ADJ ,LEV ,LI ,LC ,SI ,SC ,PSH ,
+ OR ,XOR ,AND ,EQ ,NE ,LT ,GT ,LE ,GE ,SHL ,SHR ,ADD ,SUB ,MUL ,DIV ,MOD ,
+ OPEN,READ,CLOS,PRTF,PUTS,GETC,PUTC,MALC,FREE,MSET,MCMP,EXIT };
+
+// types
+enum { CHAR, INT, PTR };
+
+// identifier offsets (since we can't create an ident struct)
+enum { Tk, Hash, Name, Class, Type, Val, HClass, HType, HVal, Idsz };
+
+void next()
+{
+ char *pp;
+
+ while (tk = *p) {
+ ++p;
+ if (tk == '\n') {
+ if (src) {
+ printf("%d: %.*s", line, p - lp, lp);
+ lp = p;
+ while (le < e) {
+ printf("%8.4s", &"LEA ,IMM ,JMP ,JSR ,BZ ,BNZ ,ENT ,ADJ ,LEV ,LI ,LC ,SI ,SC ,PSH ,"
+ "OR ,XOR ,AND ,EQ ,NE ,LT ,GT ,LE ,GE ,SHL ,SHR ,ADD ,SUB ,MUL ,DIV ,MOD ,"
+ "OPEN,READ,CLOS,PRTF,PUTS,GETC,PUTC,MALC,FREE,MSET,MCMP,EXIT,"[*++le * 5]);
+ if (*le <= ADJ) printf(" %d\n", *++le); else printf("\n");
+ }
+ }
+ ++line;
+ }
+ else if (tk == '#') {
+ while (*p != 0 && *p != '\n') ++p;
+ }
+ else if ((tk >= 'a' && tk <= 'z') || (tk >= 'A' && tk <= 'Z') || tk == '_') {
+ pp = p - 1;
+ while ((*p >= 'a' && *p <= 'z') || (*p >= 'A' && *p <= 'Z') || (*p >= '0' && *p <= '9') || *p == '_')
+ tk = tk * 147 + *p++;
+ tk = (tk << 6) + (p - pp);
+ id = sym;
+ while (id[Tk]) {
+ if (tk == id[Hash] && !memcmp((char *)id[Name], pp, p - pp)) { tk = id[Tk]; return; }
+ id = id + Idsz;
+ }
+ id[Name] = (int)pp;
+ id[Hash] = tk;
+ tk = id[Tk] = Id;
+ return;
+ }
+ else if (tk >= '0' && tk <= '9') {
+ if (ival = tk - '0') { while (*p >= '0' && *p <= '9') ival = ival * 10 + *p++ - '0'; }
+ else if (*p == 'x' || *p == 'X') {
+ while ((tk = *++p) && ((tk >= '0' && tk <= '9') || (tk >= 'a' && tk <= 'f') || (tk >= 'A' && tk <= 'F')))
+ ival = ival * 16 + (tk & 15) + (tk >= 'A' ? 9 : 0);
+ }
+ else { while (*p >= '0' && *p <= '7') ival = ival * 8 + *p++ - '0'; }
+ tk = Num;
+ return;
+ }
+ else if (tk == '/') {
+ if (*p == '/') {
+ ++p;
+ while (*p != 0 && *p != '\n') ++p;
+ }
+ else if (*p != 0 && *p == '*') {
+ ++p;
+ while (*p != 0 && *p != '/') {
+ if (*p == '\n') line++;
+ if (*p == '*');
+ ++p;
+ }
+ ++p;
+ }
+ else {
+ tk = Div;
+ return;
+ }
+ }
+ else if (tk == '\'' || tk == '"') {
+ pp = data;
+ while (*p != 0 && *p != tk) {
+ if ((ival = *p++) == '\\') {
+ if ((ival = *p++) == 'n') ival = '\n';
+ }
+ if (tk == '"') *data++ = ival;
+ }
+ ++p;
+ if (tk == '"') ival = (int)pp; else tk = Num;
+ return;
+ }
+ else if (tk == '=') { if (*p == '=') { ++p; tk = Eq; } else tk = Assign; return; }
+ else if (tk == '+') { if (*p == '+') { ++p; tk = Inc; } else tk = Add; return; }
+ else if (tk == '-') {
+ if (*p == '-') {
+ ++p; tk = Dec;
+ }
+ else if (*p >= '0' && *p <= '9') {
+ if (ival = *p - '0') { p++; while (*p >= '0' && *p <= '9') ival = ival * 10 + *p++ - '0'; }
+ ival = -ival;
+ tk = Num;
+ return;
+ }
+ else tk = Sub; return;
+ }
+ else if (tk == '!') { if (*p == '=') { ++p; tk = Ne; } return; }
+ else if (tk == '<') { if (*p == '=') { ++p; tk = Le; } else if (*p == '<') { ++p; tk = Shl; } else tk = Lt; return; }
+ else if (tk == '>') { if (*p == '=') { ++p; tk = Ge; } else if (*p == '>') { ++p; tk = Shr; } else tk = Gt; return; }
+ else if (tk == '|') { if (*p == '|') { ++p; tk = Lor; } else tk = Or; return; }
+ else if (tk == '&') { if (*p == '&') { ++p; tk = Lan; } else tk = And; return; }
+ else if (tk == '^') { tk = Xor; return; }
+ else if (tk == '%') { tk = Mod; return; }
+ else if (tk == '*') { tk = Mul; return; }
+ else if (tk == '[') { tk = Brak; return; }
+ else if (tk == '?') { tk = Cond; return; }
+ else if (tk == '~' || tk == ';' || tk == '{' || tk == '}' || tk == '(' || tk == ')' || tk == ']' || tk == ',' || tk == ':') return;
+ }
+}
+
+void expr(int lev)
+{
+ int t, *d;
+
+ if (!tk) { printf("%d: unexpected eof in expression\n", line); exit(-1); }
+ else if (tk == Num) { *++e = IMM; *++e = ival; next(); ty = INT; }
+ else if (tk == '"') {
+ *++e = IMM; *++e = ival; next();
+ while (tk == '"') next();
+ data = (char *)((int)data + sizeof(int) & -sizeof(int)); ty = PTR;
+ }
+ else if (tk == Sizeof) {
+ next(); if (tk == '(') next(); else { printf("%d: open paren expected in sizeof\n", line); exit(-1); }
+ ty = INT; if (tk == Int) next(); else if (tk == Char) { next(); ty = CHAR; }
+ while (tk == Mul) { next(); ty = ty + PTR; }
+ if (tk == ')') next(); else { printf("%d: close paren expected in sizeof\n", line); exit(-1); }
+ *++e = IMM; *++e = (ty == CHAR) ? sizeof(char) : sizeof(int);
+ ty = INT;
+ }
+ else if (tk == Id) {
+ d = id; next();
+ if (tk == '(') {
+ next();
+ t = 0;
+ while (tk != ')') { expr(Assign); *++e = PSH; ++t; if (tk == ',') next(); }
+ next();
+ if (d[Class] == Sys) *++e = d[Val];
+ else if (d[Class] == Fun) { *++e = JSR; *++e = d[Val]; }
+ else { printf("%d: bad function call\n", line); exit(-1); }
+ if (t) { *++e = ADJ; *++e = t; }
+ ty = d[Type];
+ }
+ else if (d[Class] == Num) { *++e = IMM; *++e = d[Val]; ty = INT; }
+ else {
+ if (d[Class] == Loc) { *++e = LEA; *++e = loc - d[Val]; }
+ else if (d[Class] == Glo) { *++e = IMM; *++e = d[Val]; }
+ else { printf("%d: undefined variable\n", line); exit(-1); }
+ *++e = ((ty = d[Type]) == CHAR) ? LC : LI;
+ }
+ }
+ else if (tk == '(') {
+ next();
+ if (tk == Int || tk == Char) {
+ t = (tk == Int) ? INT : CHAR; next();
+ while (tk == Mul) { next(); t = t + PTR; }
+ if (tk == ')') next(); else { printf("%d: bad cast\n", line); exit(-1); }
+ expr(Inc);
+ ty = t;
+ }
+ else {
+ expr(Assign);
+ if (tk == ')') next(); else { printf("%d: close paren expected\n", line); exit(-1); }
+ }
+ }
+ else if (tk == Mul) {
+ next(); expr(Inc);
+ if (ty > INT) ty = ty - PTR; else { printf("%d: bad dereference\n", line); exit(-1); }
+ *++e = (ty == CHAR) ? LC : LI;
+ }
+ else if (tk == And) {
+ next(); expr(Inc);
+ if (*e == LC || *e == LI) --e; else { printf("%d: bad address-of\n", line); exit(-1); }
+ ty = ty + PTR;
+ }
+ else if (tk == '!') { next(); expr(Inc); *++e = PSH; *++e = IMM; *++e = 0; *++e = EQ; ty = INT; }
+ else if (tk == '~') { next(); expr(Inc); *++e = PSH; *++e = IMM; *++e = -1; *++e = XOR; ty = INT; }
+ else if (tk == Add) { next(); expr(Inc); ty = INT; }
+ else if (tk == Sub) {
+ next(); *++e = IMM;
+ if (tk == Num) { *++e = -ival; next(); } else { *++e = -1; *++e = PSH; expr(Inc); *++e = MUL; }
+ ty = INT;
+ }
+ else if (tk == Inc || tk == Dec) {
+ t = tk; next(); expr(Inc);
+ if (*e == LC) { *e = PSH; *++e = LC; }
+ else if (*e == LI) { *e = PSH; *++e = LI; }
+ else { printf("%d: bad lvalue in pre-increment\n", line); exit(-1); }
+ *++e = PSH;
+ *++e = IMM; *++e = (ty > PTR) ? sizeof(int) : sizeof(char);
+ *++e = (t == Inc) ? ADD : SUB;
+ *++e = (ty == CHAR) ? SC : SI;
+ }
+ else { printf("%d: bad expression\n", line); exit(-1); }
+
+ while (tk >= lev) { // "precedence climbing" or "Top Down Operator Precedence" method
+ t = ty;
+ if (tk == Assign) {
+ next();
+ if (*e == LC || *e == LI) *e = PSH; else { printf("%d: bad lvalue in assignment\n", line); exit(-1); }
+ expr(Assign); *++e = ((ty = t) == CHAR) ? SC : SI;
+ }
+ else if (tk == Cond) {
+ next();
+ *++e = BZ; d = ++e;
+ expr(Assign);
+ if (tk == ':') next(); else { printf("%d: conditional missing colon\n", line); exit(-1); }
+ *d = (int)(e + 3); *++e = JMP; d = ++e;
+ expr(Cond);
+ *d = (int)(e + 1);
+ }
+ else if (tk == Lor) { next(); *++e = BNZ; d = ++e; expr(Lan); *d = (int)(e + 1); ty = INT; }
+ else if (tk == Lan) { next(); *++e = BZ; d = ++e; expr(Or); *d = (int)(e + 1); ty = INT; }
+ else if (tk == Or) { next(); *++e = PSH; expr(Xor); *++e = OR; ty = INT; }
+ else if (tk == Xor) { next(); *++e = PSH; expr(And); *++e = XOR; ty = INT; }
+ else if (tk == And) { next(); *++e = PSH; expr(Eq); *++e = AND; ty = INT; }
+ else if (tk == Eq) { next(); *++e = PSH; expr(Lt); *++e = EQ; ty = INT; }
+ else if (tk == Ne) { next(); *++e = PSH; expr(Lt); *++e = NE; ty = INT; }
+ else if (tk == Lt) { next(); *++e = PSH; expr(Shl); *++e = LT; ty = INT; }
+ else if (tk == Gt) { next(); *++e = PSH; expr(Shl); *++e = GT; ty = INT; }
+ else if (tk == Le) { next(); *++e = PSH; expr(Shl); *++e = LE; ty = INT; }
+ else if (tk == Ge) { next(); *++e = PSH; expr(Shl); *++e = GE; ty = INT; }
+ else if (tk == Shl) { next(); *++e = PSH; expr(Add); *++e = SHL; ty = INT; }
+ else if (tk == Shr) { next(); *++e = PSH; expr(Add); *++e = SHR; ty = INT; }
+ else if (tk == Add) {
+ next(); *++e = PSH; expr(Mul);
+ if ((ty = t) > PTR) { *++e = PSH; *++e = IMM; *++e = sizeof(int); *++e = MUL; }
+ *++e = ADD;
+ }
+ else if (tk == Sub) {
+ next(); *++e = PSH; expr(Mul);
+ if (t > PTR && t == ty) { *++e = SUB; *++e = PSH; *++e = IMM; *++e = sizeof(int); *++e = DIV; ty = INT; }
+ else if ((ty = t) > PTR) { *++e = PSH; *++e = IMM; *++e = sizeof(int); *++e = MUL; *++e = SUB; }
+ else *++e = SUB;
+ }
+ else if (tk == Mul) { next(); *++e = PSH; expr(Inc); *++e = MUL; ty = INT; }
+ else if (tk == Div) { next(); *++e = PSH; expr(Inc); *++e = DIV; ty = INT; }
+ else if (tk == Mod) { next(); *++e = PSH; expr(Inc); *++e = MOD; ty = INT; }
+ else if (tk == Inc || tk == Dec) {
+ if (*e == LC) { *e = PSH; *++e = LC; }
+ else if (*e == LI) { *e = PSH; *++e = LI; }
+ else { printf("%d: bad lvalue in post-increment\n", line); exit(-1); }
+ *++e = PSH; *++e = IMM; *++e = (ty > PTR) ? sizeof(int) : sizeof(char);
+ *++e = (tk == Inc) ? ADD : SUB;
+ *++e = (ty == CHAR) ? SC : SI;
+ *++e = PSH; *++e = IMM; *++e = (ty > PTR) ? sizeof(int) : sizeof(char);
+ *++e = (tk == Inc) ? SUB : ADD;
+ next();
+ }
+ else if (tk == Brak) {
+ next(); *++e = PSH; expr(Assign);
+ if (tk == ']') next(); else { printf("%d: close bracket expected\n", line); exit(-1); }
+ if (t > PTR) { *++e = PSH; *++e = IMM; *++e = sizeof(int); *++e = MUL; }
+ else if (t < PTR) { printf("%d: pointer type expected\n", line); exit(-1); }
+ *++e = ADD;
+ *++e = ((ty = t - PTR) == CHAR) ? LC : LI;
+ }
+ else { printf("%d: compiler error tk=%d\n", line, tk); exit(-1); }
+ }
+}
+
+void stmt()
+{
+ int *a, *b;
+
+ if (tk == If) {
+ next();
+ if (tk == '(') next(); else { printf("%d: open paren expected\n", line); exit(-1); }
+ expr(Assign);
+ if (tk == ')') next(); else { printf("%d: close paren expected\n", line); exit(-1); }
+ *++e = BZ; b = ++e;
+ stmt();
+ if (tk == Else) {
+ *b = (int)(e + 3); *++e = JMP; b = ++e;
+ next();
+ stmt();
+ }
+ *b = (int)(e + 1);
+ }
+ else if (tk == While) {
+ next();
+ a = e + 1;
+ if (tk == '(') next(); else { printf("%d: open paren expected\n", line); exit(-1); }
+ expr(Assign);
+ if (tk == ')') next(); else { printf("%d: close paren expected\n", line); exit(-1); }
+ *++e = BZ; b = ++e;
+ stmt();
+ *++e = JMP; *++e = (int)a;
+ *b = (int)(e + 1);
+ }
+ else if (tk == Return) {
+ next();
+ if (tk != ';') expr(Assign);
+ *++e = LEV;
+ if (tk == ';') next(); else { printf("%d: semicolon expected\n", line); exit(-1); }
+ }
+ else if (tk == '{') {
+ next();
+ while (tk != '}') stmt();
+ next();
+ }
+ else if (tk == ';') {
+ next();
+ }
+ else {
+ expr(Assign);
+ if (tk == ';') next(); else { printf("%d: semicolon expected\n", line); exit(-1); }
+ }
+}
+
+int main(int argc, char **argv)
+{
+ int fd, bt, ty, poolsz, *idmain;
+ int *pc, *sp, *bp, a, cycle; // vm registers
+ int i, *t; // temps
+
+ --argc; ++argv;
+ if (argc > 0 && **argv == '-' && (*argv)[1] == 's') { src = 1; --argc; ++argv; }
+ if (argc > 0 && **argv == '-' && (*argv)[1] == 'd') { debug = 1; --argc; ++argv; }
+ if (argc < 1) { printf("usage: c4 [-s] [-d] file ...\n"); return -1; }
+
+ if ((fd = open(*argv, 0)) < 0) { printf("could not open(%s)\n", *argv); return -1; }
+
+ poolsz = 256*1024; // arbitrary size
+ if (!(sym = malloc(poolsz))) { printf("could not malloc(%d) symbol area\n", poolsz); return -1; }
+ if (!(le = e = malloc(poolsz))) { printf("could not malloc(%d) text area\n", poolsz); return -1; }
+ if (!(data = malloc(poolsz))) { printf("could not malloc(%d) data area\n", poolsz); return -1; }
+ if (!(sp = malloc(poolsz))) { printf("could not malloc(%d) stack area\n", poolsz); return -1; }
+
+ memset(sym, 0, poolsz);
+ memset(e, 0, poolsz);
+ memset(data, 0, poolsz);
+
+ p = "char else enum if int void return sizeof while "
+ "EOF EXIT_SUCCESS "
+ "open read close printf puts getchar putchar malloc free memset memcmp exit void main";
+ i = Char; while (i <= While) { next(); id[Tk] = i++; } // add keywords to symbol table
+ // add library constants
+ next(); id[Class] = Num; id[Type] = INT; id[Val] = -1;
+ next(); id[Class] = Num; id[Type] = INT; id[Val] = 0;
+ i = OPEN; while (i <= EXIT) { next(); id[Class] = Sys; id[Type] = INT; id[Val] = i++; } // add library to symbol table
+ next(); id[Tk] = Char; // handle void type
+ next(); idmain = id; // keep track of main
+
+ if (!(lp = p = malloc(poolsz))) { printf("could not malloc(%d) source area\n", poolsz); return -1; }
+ if ((i = read(fd, p, poolsz-1)) <= 0) { printf("read() returned %d\n", i); return -1; }
+ p[i] = 0;
+ close(fd);
+
+ // parse declarations
+ line = 1;
+ next();
+ while (tk) {
+ bt = INT; // basetype
+ if (tk == Int) next();
+ else if (tk == Char) { next(); bt = CHAR; }
+ else if (tk == Enum) {
+ next();
+ if (tk != '{') next();
+ if (tk == '{') {
+ next();
+ i = 0;
+ while (tk != '}') {
+ if (tk != Id) { printf("%d: bad enum identifier %d\n", line, tk); return -1; }
+ next();
+ if (tk == Assign) {
+ next();
+ if (tk != Num) { printf("%d: bad enum initializer\n", line); return -1; }
+ i = ival;
+ next();
+ }
+ id[Class] = Num; id[Type] = INT; id[Val] = i++;
+ if (tk == ',') next();
+ }
+ next();
+ }
+ }
+ while (tk != ';' && tk != '}') {
+ ty = bt;
+ while (tk == Mul) { next(); ty = ty + PTR; }
+ if (tk != Id) { printf("%d: bad global declaration\n", line); return -1; }
+ if (id[Class]) { printf("%d: duplicate global definition\n", line); return -1; }
+ next();
+ id[Type] = ty;
+ if (tk == '(') { // function
+ id[Class] = Fun;
+ id[Val] = (int)(e + 1);
+ next(); i = 0;
+ while (tk != ')') {
+ ty = INT;
+ if (tk == Int) next();
+ else if (tk == Char) { next(); ty = CHAR; }
+ else if (tk == Void) { printf("here\n"); }
+ while (tk == Mul) { next(); ty = ty + PTR; }
+ if (tk != Id) { printf("%d: bad parameter declaration\n", line); return -1; }
+ if (id[Class] == Loc) { printf("%d: duplicate parameter definition\n", line); return -1; }
+ id[HClass] = id[Class]; id[Class] = Loc;
+ id[HType] = id[Type]; id[Type] = ty;
+ id[HVal] = id[Val]; id[Val] = i++;
+ next();
+ if (tk == ',') next();
+ }
+ next();
+ if (tk != '{') { printf("%d: bad function definition\n", line); return -1; }
+ loc = ++i;
+ next();
+ while (tk == Int || tk == Char) {
+ bt = (tk == Int) ? INT : CHAR;
+ next();
+ while (tk != ';') {
+ ty = bt;
+ while (tk == Mul) { next(); ty = ty + PTR; }
+ if (tk != Id) { printf("%d: bad local declaration\n", line); return -1; }
+ if (id[Class] == Loc) { printf("%d: duplicate local definition\n", line); return -1; }
+ id[HClass] = id[Class]; id[Class] = Loc;
+ id[HType] = id[Type]; id[Type] = ty;
+ id[HVal] = id[Val]; id[Val] = ++i;
+ next();
+ if (tk == ',') next();
+ }
+ next();
+ }
+ *++e = ENT; *++e = i - loc;
+ while (tk != '}') stmt();
+ *++e = LEV;
+ id = sym; // unwind symbol table locals
+ while (id[Tk]) {
+ if (id[Class] == Loc) {
+ id[Class] = id[HClass];
+ id[Type] = id[HType];
+ id[Val] = id[HVal];
+ }
+ id = id + Idsz;
+ }
+ }
+ else {
+ id[Class] = Glo;
+ id[Val] = (int)data;
+ data = data + sizeof(int);
+ }
+ if (tk == ',') next();
+ }
+ next();
+ }
+
+ if (!(pc = (int *)idmain[Val])) { printf("main() not defined\n"); return -1; }
+ if (src) return 0;
+
+ // setup stack
+ bp = sp = (int *)((int)sp + poolsz);
+ *--sp = EXIT; // call exit if main returns
+ *--sp = PSH; t = sp;
+ *--sp = argc;
+ *--sp = (int)argv;
+ *--sp = (int)t;
+
+ // run...
+ cycle = 0;
+ while (1) {
+ i = *pc++; ++cycle;
+ if (debug) {
+ printf("%d> %.4s", cycle,
+ &"LEA ,IMM ,JMP ,JSR ,BZ ,BNZ ,ENT ,ADJ ,LEV ,LI ,LC ,SI ,SC ,PSH ,"
+ "OR ,XOR ,AND ,EQ ,NE ,LT ,GT ,LE ,GE ,SHL ,SHR ,ADD ,SUB ,MUL ,DIV ,MOD ,"
+ "OPEN,READ,CLOS,PRTF,PUTS,GETC,PUTC,MALC,FREE,MSET,MCMP,EXIT,"[i * 5]);
+ if (i <= ADJ) printf(" %d\n", *pc); else printf("\n");
+ }
+ if (i == LEA) a = (int)(bp + *pc++); // load local address
+ else if (i == IMM) a = *pc++; // load global address or immediate
+ else if (i == JMP) pc = (int *)*pc; // jump
+ else if (i == JSR) { *--sp = (int)(pc + 1); pc = (int *)*pc; } // jump to subroutine
+ else if (i == BZ) pc = a ? pc + 1 : (int *)*pc; // branch if zero
+ else if (i == BNZ) pc = a ? (int *)*pc : pc + 1; // branch if not zero
+ else if (i == ENT) { *--sp = (int)bp; bp = sp; sp = sp - *pc++; } // enter subroutine
+ else if (i == ADJ) sp = sp + *pc++; // stack adjust
+ else if (i == LEV) { sp = bp; bp = (int *)*sp++; pc = (int *)*sp++; } // leave subroutine
+ else if (i == LI) a = *(int *)a; // load int
+ else if (i == LC) a = *(char *)a; // load char
+ else if (i == SI) *(int *)*sp++ = a; // store int
+ else if (i == SC) a = *(char *)*sp++ = a; // store char
+ else if (i == PSH) *--sp = a; // push
+
+ else if (i == OR) a = *sp++ | a;
+ else if (i == XOR) a = *sp++ ^ a;
+ else if (i == AND) a = *sp++ & a;
+ else if (i == EQ) a = *sp++ == a;
+ else if (i == NE) a = *sp++ != a;
+ else if (i == LT) a = *sp++ < a;
+ else if (i == GT) a = *sp++ > a;
+ else if (i == LE) a = *sp++ <= a;
+ else if (i == GE) a = *sp++ >= a;
+ else if (i == SHL) a = *sp++ << a;
+ else if (i == SHR) a = *sp++ >> a;
+ else if (i == ADD) a = *sp++ + a;
+ else if (i == SUB) a = *sp++ - a;
+ else if (i == MUL) a = *sp++ * a;
+ else if (i == DIV) a = *sp++ / a;
+ else if (i == MOD) a = *sp++ % a;
+
+ else if (i == OPEN) a = open((char *)sp[1], *sp);
+ else if (i == READ) a = read(sp[2], (char *)sp[1], *sp);
+ else if (i == CLOS) a = close(*sp);
+ else if (i == PRTF) { t = sp + pc[1]; a = printf((char *)t[-1], t[-2], t[-3], t[-4], t[-5], t[-6]); }
+ else if (i == PUTS) { t = sp + pc[1]; a = printf("%s\n", (char *)t[-1]); }
+ else if (i == GETC) a = getchar();
+ else if (i == PUTC) putchar(*sp);
+ else if (i == MALC) a = (int)malloc(*sp);
+ else if (i == FREE) free((void *)*sp);
+ else if (i == MSET) a = (int)memset((char *)sp[2], sp[1], *sp);
+ else if (i == MCMP) a = memcmp((char *)sp[2], (char *)sp[1], *sp);
+ else if (i == EXIT) { printf("exit(%d) cycle = %d\n", *sp, cycle); return *sp; }
+ else { printf("unknown instruction = %d! cycle = %d\n", i, cycle); return -1; }
+ }
+}
diff --git a/miniany/cc.c b/miniany/cc.c
new file mode 100644
index 0000000..df89ff7
--- /dev/null
+++ b/miniany/cc.c
@@ -0,0 +1,39 @@
+int col;
+int row;
+
+int getChar( )
+{
+ int c;
+
+ c = getchar( );
+ if( c == EOF ) {
+ return c;
+ }
+ col++;
+ if( c == '\n' ) {
+ col = 1;
+ row++;
+ putchar( '$' );
+ }
+ return c;
+}
+
+int main( int argc, char **argv )
+{
+ int c;
+
+ col = 1;
+ row = 1;
+
+ puts( "Hello CC" );
+
+ c = getChar( );
+ while( c != EOF ) {
+ putchar( c );
+ c = getChar( );
+ }
+
+ exit( EXIT_SUCCESS );
+
+ return EXIT_SUCCESS;
+}
diff --git a/miniany/libc-freestanding.c b/miniany/libc-freestanding.c
new file mode 100644
index 0000000..b4e48f8
--- /dev/null
+++ b/miniany/libc-freestanding.c
@@ -0,0 +1,145 @@
+/*
+ * minimal freestanding C library
+ *
+ * works for IA-32 and Linux, uses old INT 80h software interrupts
+ * for system calls.
+ *
+ */
+
+int strlen( char *s )
+{
+ char *p;
+
+ p = s;
+ while( *p != '\0' ) {
+ p++;
+ }
+
+ return p - s;
+}
+
+enum {
+ EXIT_SUCCESS = 0,
+ EXIT_FAILURE = 1
+};
+
+enum {
+ SYSCALL_EXIT = 1,
+ SYSCALL_READ = 3,
+ SYSCALL_WRITE = 4
+};
+
+int errno;
+
+static __attribute__((noinline)) int syscall1( int id, int arg0 )
+{
+ int retval;
+
+ __asm__ volatile( "\
+ push %%ebx\n\
+ mov %1, %%eax\n\
+ mov %2, %%ebx\n\
+ int $0x80\n\
+ mov %%eax, %0\n\
+ pop %%ebx\n" : "=m"( retval ) : "m"( id ), "m"( arg0 )
+ : "eax", "ebx" );
+
+ if( retval < 0 ) {
+ errno = -retval;
+ return -1;
+ }
+
+ return retval;
+}
+
+static __attribute__((noinline)) int syscall3( int id, int arg0, int arg1, int arg2 )
+{
+ int retval;
+
+ __asm__ volatile( "\
+ push %%ebx\n\
+ push %%ecx\n\
+ push %%edx\n\
+ mov %1, %%eax\n\
+ mov %2, %%ebx\n\
+ mov %3, %%ecx\n\
+ mov %4, %%edx\n\
+ int $0x80\n\
+ mov %%eax, %0\n\
+ pop %%edx\n\
+ pop %%ecx\n\
+ pop %%ebx\n" : "=m"( retval ) : "m"( id ), "m"( arg0 ), "m"( arg1 ), "m"( arg2 )
+ : "eax", "ebx", "ecx", "edx" );
+
+ if( retval < 0 ) {
+ errno = -retval;
+ return -1;
+ }
+
+ return retval;
+}
+
+enum {
+ STDIN_FILENO = 0,
+ STDOUT_FILENO = 1,
+ STDERR_FILENO = 2
+};
+
+static void print_char( int fd, int c )
+{
+ char s[1];
+ s[0] = c;
+ syscall3( SYSCALL_WRITE, fd, (int)s, 1 );
+}
+
+static void print_string( char *s )
+{
+ syscall3( SYSCALL_WRITE, STDOUT_FILENO, (int)s, strlen( s ) );
+}
+
+static void print_newline( void )
+{
+ print_char( STDOUT_FILENO, '\n' );
+}
+
+static int read_string( int fd, char *buf, int size )
+{
+ return syscall3( SYSCALL_READ, fd, (int)buf, size );
+}
+
+enum {
+ EOF = -1
+};
+
+int puts( char *s )
+{
+ print_string( s );
+ print_newline( );
+
+ return 1;
+}
+
+int getchar( )
+{
+ int res;
+ char buf[1];
+
+ res = read_string( STDIN_FILENO, buf, 1 );
+ if( res == 0 ) {
+ return EOF;
+ }
+
+ return buf[0];
+}
+
+int putchar( int c )
+{
+ print_char( STDOUT_FILENO, c );
+
+ return c;
+}
+
+void exit( int status )
+{
+ syscall1( SYSCALL_EXIT, status );
+}
diff --git a/miniany/libc-hosted.c b/miniany/libc-hosted.c
new file mode 100644
index 0000000..8cb4d46
--- /dev/null
+++ b/miniany/libc-hosted.c
@@ -0,0 +1,7 @@
+/*
+ * include files for C library of the host. Currently only tested
+ * with glibc 2.31.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>