summaryrefslogtreecommitdiff
path: root/miniany/cc.wg
blob: e5ae626d8dc2a844ac789961735f1b093caaf1f5 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
WordGrinder dumpfile v3: this is a text file; diff me!
.addons.autosave.enabled: false
.addons.autosave.pattern: "%F.autosave.%T.wg"
.addons.autosave.period: 10
.addons.htmlexport.bold_off: "</b>"
.addons.htmlexport.bold_on: "<b>"
.addons.htmlexport.italic_off: "</i>"
.addons.htmlexport.italic_on: "<i>"
.addons.htmlexport.underline_off: "</u>"
.addons.htmlexport.underline_on: "<u>"
.addons.pagecount.enabled: false
.addons.pagecount.wordsperpage: 250
.addons.scrapbook.document: "Scrapbook"
.addons.scrapbook.pattern: "Item from '%N' at %T:"
.addons.scrapbook.timestamp: true
.addons.smartquotes.doublequotes: false
.addons.smartquotes.leftdouble: "“"
.addons.smartquotes.leftsingle: "‘"
.addons.smartquotes.notinraw: true
.addons.smartquotes.rightdouble: "”"
.addons.smartquotes.rightsingle: "’"
.addons.smartquotes.singlequotes: false
.addons.spellchecker.enabled: false
.addons.spellchecker.usesystemdictionary: true
.addons.spellchecker.useuserdictionary: true
.documents.1.co: 1
.documents.1.cp: 31
.documents.1.cw: 1
.documents.1.margin: 0
.documents.1.name: "main"
.documents.1.sticky_selection: false
.documents.1.viewmode: 1
.documents.1.wordcount: 774
.fileformat: 8
.findtext: "C4"
.menu.accelerators.^@: "ZM"
.menu.accelerators.^B: "SB"
.menu.accelerators.BACKSPACE: "ZDPC"
.menu.accelerators.^C: "EC"
.menu.accelerators.DELETE: "ZDNC"
.menu.accelerators.^DOWN: "ZNP"
.menu.accelerators.DOWN: "ZD"
.menu.accelerators.^E: "ZDW"
.menu.accelerators.EC: "^C"
.menu.accelerators.ECadd: "^M"
.menu.accelerators.ECfind: "^L"
.menu.accelerators.EF: "^F"
.menu.accelerators.EG: "^G"
.menu.accelerators.EN: "^K"
.menu.accelerators.END: "ZE"
.menu.accelerators.EP: "^V"
.menu.accelerators.ER: "^R"
.menu.accelerators.Eredo: "^Y"
.menu.accelerators.ET: "^X"
.menu.accelerators.Eundo: "^Z"
.menu.accelerators.^F: "EF"
.menu.accelerators.FQ: "^Q"
.menu.accelerators.FS: "^S"
.menu.accelerators.^G: "EG"
.menu.accelerators.HOME: "ZH"
.menu.accelerators.^I: "SI"
.menu.accelerators.^K: "EN"
.menu.accelerators.^L: "ECfind"
.menu.accelerators.^LEFT: "ZWL"
.menu.accelerators.LEFT: "ZL"
.menu.accelerators.^M: "ECadd"
.menu.accelerators.^O: "SO"
.menu.accelerators.^P: "SP"
.menu.accelerators.^PGDN: "ZED"
.menu.accelerators.PGDN: "ZPGDN"
.menu.accelerators.^PGUP: "ZBD"
.menu.accelerators.PGUP: "ZPGUP"
.menu.accelerators.^Q: "FQ"
.menu.accelerators.^R: "ER"
.menu.accelerators.^RIGHT: "ZWR"
.menu.accelerators.RIGHT: "ZR"
.menu.accelerators.^S: "FS"
.menu.accelerators.SB: "^B"
.menu.accelerators.S^DOWN: "ZSNP"
.menu.accelerators.SDOWN: "ZSD"
.menu.accelerators.SEND: "ZSE"
.menu.accelerators.SHOME: "ZSH"
.menu.accelerators.SI: "^I"
.menu.accelerators.S^LEFT: "ZSWL"
.menu.accelerators.SLEFT: "ZSL"
.menu.accelerators.SO: "^O"
.menu.accelerators.SP: "^P"
.menu.accelerators.S^PGDN: "ZSED"
.menu.accelerators.SPGDN: "ZSPGDN"
.menu.accelerators.S^PGUP: "ZSBD"
.menu.accelerators.SPGUP: "ZSPGUP"
.menu.accelerators.S^RIGHT: "ZSWR"
.menu.accelerators.SRIGHT: "ZSR"
.menu.accelerators.SU: "^U"
.menu.accelerators.S^UP: "ZSPP"
.menu.accelerators.SUP: "ZSU"
.menu.accelerators.^U: "SU"
.menu.accelerators.^UP: "ZPP"
.menu.accelerators.UP: "ZU"
.menu.accelerators.^V: "EP"
.menu.accelerators.^W: "ZSW"
.menu.accelerators.^X: "ET"
.menu.accelerators.^Y: "Eredo"
.menu.accelerators.^Z: "Eundo"
.menu.accelerators.ZBD: "^PGUP"
.menu.accelerators.ZD: "DOWN"
.menu.accelerators.ZDNC: "DELETE"
.menu.accelerators.ZDPC: "BACKSPACE"
.menu.accelerators.ZDW: "^E"
.menu.accelerators.ZE: "END"
.menu.accelerators.ZED: "^PGDN"
.menu.accelerators.ZH: "HOME"
.menu.accelerators.ZL: "LEFT"
.menu.accelerators.ZM: "^@"
.menu.accelerators.ZNP: "^DOWN"
.menu.accelerators.ZPGDN: "PGDN"
.menu.accelerators.ZPGUP: "PGUP"
.menu.accelerators.ZPP: "^UP"
.menu.accelerators.ZR: "RIGHT"
.menu.accelerators.ZSBD: "S^PGUP"
.menu.accelerators.ZSD: "SDOWN"
.menu.accelerators.ZSE: "SEND"
.menu.accelerators.ZSED: "S^PGDN"
.menu.accelerators.ZSH: "SHOME"
.menu.accelerators.ZSL: "SLEFT"
.menu.accelerators.ZSNP: "S^DOWN"
.menu.accelerators.ZSPGDN: "SPGDN"
.menu.accelerators.ZSPGUP: "SPGUP"
.menu.accelerators.ZSPP: "S^UP"
.menu.accelerators.ZSR: "SRIGHT"
.menu.accelerators.ZSU: "SUP"
.menu.accelerators.ZSW: "^W"
.menu.accelerators.ZSWL: "S^LEFT"
.menu.accelerators.ZSWR: "S^RIGHT"
.menu.accelerators.ZU: "UP"
.menu.accelerators.ZWL: "^LEFT"
.menu.accelerators.ZWR: "^RIGHT"
.name: "/home/abaumann/projects/compilertests/miniany/cc.wg"
.replacetext: ""
.statusbar: true
.current: 1
#1
H1 CC - a self-hosting, bootstrappable, minimal C compiler
H2 Introduction
P On the never-ending quest of a minimal system I found Swieros and C4 (the C compiler in 4 functions). Inspired and intrigued I started to implement my own.
P For abaos (a small operating system of mine, also in C) I cloned the minimal C library, so we can build a freestanding version of C4.
P C4 serves as a test whether my own CC is minimal enough and doesn't use silly functions. Additionally C4 as well as CC are compiled both in a (on Linux) hosted version and a freestanding version. We use a series of compilers like gcc, clang, tcc and pcc to make sure that we are not using silly C constructs.
P In order to be able to port easily we make almost no use of system calls, the ones we need are:
LB brk: for malloc/free, change the start address of the heap segment of the process, if the OS only assigns a single static space, then brk results in a NOP.
LB exit: terminate the process, return does not always work in all combinations (for instance with pcc on Linux). Can be a NOP, we don't require any trickery as atext and we don't use buffering anywhere (for instance flushing stdout on exit).
LB read/write: read from stdin linearly, write to stdout linearly, this is essentially a model using an input and an output tape. Those two functions must really exist. This basically eliminates the need for a file system which we might not have during early bootstrapping.
P Similarly we simplify the C language to not use certain features which can cause trouble when bootstrapping:
LB variable arguments: though simple in principle (just some pointers into the stack if you use a stack for function parameters), it is not typesafe. And the only example in practice it's really heavily used for is in printf-like functions.
LB preprocessor: it needs a filesystem, we take this outside of the compiler by feeding it an (eventually) concatenated list of *.c files.
LB two types: int and char, so we can interpret memory as words or as bytes.
H2 Local version of C4
P The local version of C4 has the following adaoptions and extensions:
LB switch statement from the switch-and-structs branch, adapted c4 itself to use switch statements instead of if's (as in the switch-and-structs branch)
LB struct support from switch-and-structs
LB constants like EOF, EXIT_SUCCESS, NULL
LB standard C block comments along to c++ end of line ones
LB negative enum initializers
LB do/while loops
LB more C functions like isspace, getc, strcmp
LB some simplified functions for printing like putstring, putint, putnl
LB strict C89 conformance, mainly use standard comment blocks, also removed some warnings
LB some casts around malloc and memset to fit to non-void freestanding-libc
LB converted printf to putstring/putint/putnl and some helper functions for error reporting like error()
LB removed all memory leaks
LB de-POSIX-ified, no open/read/close, use getchar from stdin only (don't assume the existence of a file system), this also means we had to create sort of an old style tape-file with FS markers to separate the files piped to c4.
P Note: only too late I discovered that there was a C5 version of the same compiler, which would maybe have served better as a basis.
H2 Examples
H3 Running on the host system using the hosts C compiler
P Compiled in either hosted (host libc) or freestanding (our own libc, currently IA-32 Linux kernel only syscalls):
PRE ./build.sh cc hostcc hosted d
PRE ./build.sh cc hostcc freestanding d
PRE ./cc < test1.c > test1.asm
P Create a plain binary from the assembly code:
PRE fasm test1.asm test1.bin
P Disassemble it to verify it's correctness:
PRE ndisasm -b32 -o1000000h -a test1.bin
P You can choose gcc, clang, tcc or pcc as host compiler (hostcc).
H3 Running on the host in the C4 interpreter
P Running in C4 interpreter, again, the C4 program can be compiled in hosted or freestanding mode:
PRE ./build.sh c4 hostcc hosted d
PRE ./build.sh c4 hostcc freestanding d
P Here again you can choose the host compiler for compiling C4.
P Then we have to create the standard input for C4 using:
PRE echo -n -e "\034" > EOF
PRE cat cc.c EOF hello.c | ./c4
PRE cat c4.c EOF cc.c EOF hello.c | ./c4
PRE cat c4.c4 EOF c4.c EOF cc.c EOF hello.c | ./c4
P EOF contains the traditional FS (file separator) character in the ASCII character set. Every time c4/c4.c is invoked it reads exacly one input file up to the first FS character (or stops at the end of stdin).
P We can also use -s, or -d on every level as follows:
PRE cat cc.c EOF hello.c | ./c4 -d
H2 References
LB "Compiler Construction", Niklaus Wirth
LB https://github.com/DoctorWkt/acwj: a nice series on building a C compiler, step by step with lots of good explanations
LB https://www.engr.mun.ca/~theo/Misc/exp_parsing.htm#climbing, https://en.wikipedia.org/wiki/Operator-precedence_parser#Precedence_climbing_method
.