From a4da2b8de09cc73e16da0d6259b8d3d383f8acad Mon Sep 17 00:00:00 2001 From: Andreas Baumann Date: Thu, 30 Sep 2021 07:46:18 +0000 Subject: some Wordgrinder doku --- miniany/cc.wg | 200 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 200 insertions(+) create mode 100644 miniany/cc.wg diff --git a/miniany/cc.wg b/miniany/cc.wg new file mode 100644 index 0000000..e5ae626 --- /dev/null +++ b/miniany/cc.wg @@ -0,0 +1,200 @@ +WordGrinder dumpfile v3: this is a text file; diff me! +.addons.autosave.enabled: false +.addons.autosave.pattern: "%F.autosave.%T.wg" +.addons.autosave.period: 10 +.addons.htmlexport.bold_off: "" +.addons.htmlexport.bold_on: "" +.addons.htmlexport.italic_off: "" +.addons.htmlexport.italic_on: "" +.addons.htmlexport.underline_off: "" +.addons.htmlexport.underline_on: "" +.addons.pagecount.enabled: false +.addons.pagecount.wordsperpage: 250 +.addons.scrapbook.document: "Scrapbook" +.addons.scrapbook.pattern: "Item from '%N' at %T:" +.addons.scrapbook.timestamp: true +.addons.smartquotes.doublequotes: false +.addons.smartquotes.leftdouble: "“" +.addons.smartquotes.leftsingle: "‘" +.addons.smartquotes.notinraw: true +.addons.smartquotes.rightdouble: "”" +.addons.smartquotes.rightsingle: "’" +.addons.smartquotes.singlequotes: false +.addons.spellchecker.enabled: false +.addons.spellchecker.usesystemdictionary: true +.addons.spellchecker.useuserdictionary: true +.documents.1.co: 1 +.documents.1.cp: 31 +.documents.1.cw: 1 +.documents.1.margin: 0 +.documents.1.name: "main" +.documents.1.sticky_selection: false +.documents.1.viewmode: 1 +.documents.1.wordcount: 774 +.fileformat: 8 +.findtext: "C4" +.menu.accelerators.^@: "ZM" +.menu.accelerators.^B: "SB" +.menu.accelerators.BACKSPACE: "ZDPC" +.menu.accelerators.^C: "EC" +.menu.accelerators.DELETE: "ZDNC" +.menu.accelerators.^DOWN: "ZNP" +.menu.accelerators.DOWN: "ZD" +.menu.accelerators.^E: "ZDW" +.menu.accelerators.EC: "^C" +.menu.accelerators.ECadd: "^M" +.menu.accelerators.ECfind: "^L" +.menu.accelerators.EF: "^F" +.menu.accelerators.EG: "^G" +.menu.accelerators.EN: "^K" +.menu.accelerators.END: "ZE" +.menu.accelerators.EP: "^V" +.menu.accelerators.ER: "^R" +.menu.accelerators.Eredo: "^Y" +.menu.accelerators.ET: "^X" +.menu.accelerators.Eundo: "^Z" +.menu.accelerators.^F: "EF" +.menu.accelerators.FQ: "^Q" +.menu.accelerators.FS: "^S" +.menu.accelerators.^G: "EG" +.menu.accelerators.HOME: "ZH" +.menu.accelerators.^I: "SI" +.menu.accelerators.^K: "EN" +.menu.accelerators.^L: "ECfind" +.menu.accelerators.^LEFT: "ZWL" +.menu.accelerators.LEFT: "ZL" +.menu.accelerators.^M: "ECadd" +.menu.accelerators.^O: "SO" +.menu.accelerators.^P: "SP" +.menu.accelerators.^PGDN: "ZED" +.menu.accelerators.PGDN: "ZPGDN" +.menu.accelerators.^PGUP: "ZBD" +.menu.accelerators.PGUP: "ZPGUP" +.menu.accelerators.^Q: "FQ" +.menu.accelerators.^R: "ER" +.menu.accelerators.^RIGHT: "ZWR" +.menu.accelerators.RIGHT: "ZR" +.menu.accelerators.^S: "FS" +.menu.accelerators.SB: "^B" +.menu.accelerators.S^DOWN: "ZSNP" +.menu.accelerators.SDOWN: "ZSD" +.menu.accelerators.SEND: "ZSE" +.menu.accelerators.SHOME: "ZSH" +.menu.accelerators.SI: "^I" +.menu.accelerators.S^LEFT: "ZSWL" +.menu.accelerators.SLEFT: "ZSL" +.menu.accelerators.SO: "^O" +.menu.accelerators.SP: "^P" +.menu.accelerators.S^PGDN: "ZSED" +.menu.accelerators.SPGDN: "ZSPGDN" +.menu.accelerators.S^PGUP: "ZSBD" +.menu.accelerators.SPGUP: "ZSPGUP" +.menu.accelerators.S^RIGHT: "ZSWR" +.menu.accelerators.SRIGHT: "ZSR" +.menu.accelerators.SU: "^U" +.menu.accelerators.S^UP: "ZSPP" +.menu.accelerators.SUP: "ZSU" +.menu.accelerators.^U: "SU" +.menu.accelerators.^UP: "ZPP" +.menu.accelerators.UP: "ZU" +.menu.accelerators.^V: "EP" +.menu.accelerators.^W: "ZSW" +.menu.accelerators.^X: "ET" +.menu.accelerators.^Y: "Eredo" +.menu.accelerators.^Z: "Eundo" +.menu.accelerators.ZBD: "^PGUP" +.menu.accelerators.ZD: "DOWN" +.menu.accelerators.ZDNC: "DELETE" +.menu.accelerators.ZDPC: "BACKSPACE" +.menu.accelerators.ZDW: "^E" +.menu.accelerators.ZE: "END" +.menu.accelerators.ZED: "^PGDN" +.menu.accelerators.ZH: "HOME" +.menu.accelerators.ZL: "LEFT" +.menu.accelerators.ZM: "^@" +.menu.accelerators.ZNP: "^DOWN" +.menu.accelerators.ZPGDN: "PGDN" +.menu.accelerators.ZPGUP: "PGUP" +.menu.accelerators.ZPP: "^UP" +.menu.accelerators.ZR: "RIGHT" +.menu.accelerators.ZSBD: "S^PGUP" +.menu.accelerators.ZSD: "SDOWN" +.menu.accelerators.ZSE: "SEND" +.menu.accelerators.ZSED: "S^PGDN" +.menu.accelerators.ZSH: "SHOME" +.menu.accelerators.ZSL: "SLEFT" +.menu.accelerators.ZSNP: "S^DOWN" +.menu.accelerators.ZSPGDN: "SPGDN" +.menu.accelerators.ZSPGUP: "SPGUP" +.menu.accelerators.ZSPP: "S^UP" +.menu.accelerators.ZSR: "SRIGHT" +.menu.accelerators.ZSU: "SUP" +.menu.accelerators.ZSW: "^W" +.menu.accelerators.ZSWL: "S^LEFT" +.menu.accelerators.ZSWR: "S^RIGHT" +.menu.accelerators.ZU: "UP" +.menu.accelerators.ZWL: "^LEFT" +.menu.accelerators.ZWR: "^RIGHT" +.name: "/home/abaumann/projects/compilertests/miniany/cc.wg" +.replacetext: "" +.statusbar: true +.current: 1 +#1 +H1 CC - a self-hosting, bootstrappable, minimal C compiler +H2 Introduction +P On the never-ending quest of a minimal system I found Swieros and C4 (the C compiler in 4 functions). Inspired and intrigued I started to implement my own. +P For abaos (a small operating system of mine, also in C) I cloned the minimal C library, so we can build a freestanding version of C4. +P C4 serves as a test whether my own CC is minimal enough and doesn't use silly functions. Additionally C4 as well as CC are compiled both in a (on Linux) hosted version and a freestanding version. We use a series of compilers like gcc, clang, tcc and pcc to make sure that we are not using silly C constructs. +P In order to be able to port easily we make almost no use of system calls, the ones we need are: +LB brk: for malloc/free, change the start address of the heap segment of the process, if the OS only assigns a single static space, then brk results in a NOP. +LB exit: terminate the process, return does not always work in all combinations (for instance with pcc on Linux). Can be a NOP, we don't require any trickery as atext and we don't use buffering anywhere (for instance flushing stdout on exit). +LB read/write: read from stdin linearly, write to stdout linearly, this is essentially a model using an input and an output tape. Those two functions must really exist. This basically eliminates the need for a file system which we might not have during early bootstrapping. +P Similarly we simplify the C language to not use certain features which can cause trouble when bootstrapping: +LB variable arguments: though simple in principle (just some pointers into the stack if you use a stack for function parameters), it is not typesafe. And the only example in practice it's really heavily used for is in printf-like functions. +LB preprocessor: it needs a filesystem, we take this outside of the compiler by feeding it an (eventually) concatenated list of *.c files. +LB two types: int and char, so we can interpret memory as words or as bytes. +H2 Local version of C4 +P The local version of C4 has the following adaoptions and extensions: +LB switch statement from the switch-and-structs branch, adapted c4 itself to use switch statements instead of if's (as in the switch-and-structs branch) +LB struct support from switch-and-structs +LB constants like EOF, EXIT_SUCCESS, NULL +LB standard C block comments along to c++ end of line ones +LB negative enum initializers +LB do/while loops +LB more C functions like isspace, getc, strcmp +LB some simplified functions for printing like putstring, putint, putnl +LB strict C89 conformance, mainly use standard comment blocks, also removed some warnings +LB some casts around malloc and memset to fit to non-void freestanding-libc +LB converted printf to putstring/putint/putnl and some helper functions for error reporting like error() +LB removed all memory leaks +LB de-POSIX-ified, no open/read/close, use getchar from stdin only (don't assume the existence of a file system), this also means we had to create sort of an old style tape-file with FS markers to separate the files piped to c4. +P Note: only too late I discovered that there was a C5 version of the same compiler, which would maybe have served better as a basis. +H2 Examples +H3 Running on the host system using the hosts C compiler +P Compiled in either hosted (host libc) or freestanding (our own libc, currently IA-32 Linux kernel only syscalls): +PRE ./build.sh cc hostcc hosted d +PRE ./build.sh cc hostcc freestanding d +PRE ./cc < test1.c > test1.asm +P Create a plain binary from the assembly code: +PRE fasm test1.asm test1.bin +P Disassemble it to verify it's correctness: +PRE ndisasm -b32 -o1000000h -a test1.bin +P You can choose gcc, clang, tcc or pcc as host compiler (hostcc). +H3 Running on the host in the C4 interpreter +P Running in C4 interpreter, again, the C4 program can be compiled in hosted or freestanding mode: +PRE ./build.sh c4 hostcc hosted d +PRE ./build.sh c4 hostcc freestanding d +P Here again you can choose the host compiler for compiling C4. +P Then we have to create the standard input for C4 using: +PRE echo -n -e "\034" > EOF +PRE cat cc.c EOF hello.c | ./c4 +PRE cat c4.c EOF cc.c EOF hello.c | ./c4 +PRE cat c4.c4 EOF c4.c EOF cc.c EOF hello.c | ./c4 +P EOF contains the traditional FS (file separator) character in the ASCII character set. Every time c4/c4.c is invoked it reads exacly one input file up to the first FS character (or stops at the end of stdin). +P We can also use -s, or -d on every level as follows: +PRE cat cc.c EOF hello.c | ./c4 -d +H2 References +LB "Compiler Construction", Niklaus Wirth +LB https://github.com/DoctorWkt/acwj: a nice series on building a C compiler, step by step with lots of good explanations +LB https://www.engr.mun.ca/~theo/Misc/exp_parsing.htm#climbing, https://en.wikipedia.org/wiki/Operator-precedence_parser#Precedence_climbing_method +. -- cgit v1.2.3-54-g00ecf