summaryrefslogtreecommitdiff
path: root/ecomp-c/README
blob: 4e1e88c88f684a49453858a5b57034c448ea4284 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
build and test instructions
---------------------------

cat libc-freestanding.c ec.c | gcc -g -O0 -m32 -march=i386 -ffreestanding -fno-stack-protector -nostdlib -emain -Werror -Wno-noreturn -Wall -pedantic -fno-pic -std=c89 -o ec -x c -
# optimized with own libc, syscalls need -fno-omit-frame-pointer otherwise they clobber the stack
cat libc-freestanding.c ec.c | gcc -g -O1 -m32 -march=i386 -fno-omit-frame-pointer -ffreestanding -fno-stack-protector -nostdlib -emain -Werror -Wno-noreturn -Wall -pedantic -fno-pic -std=c89 -o ec -x c -
cat libc-freestanding.c ec.c | gcc -g -O2 -m32 -march=i386 -fno-omit-frame-pointer -ffreestanding -fno-stack-protector -nostdlib -emain -Werror -Wno-noreturn -Wall -pedantic -fno-pic -std=c89 -o ec -x c -
cat libc-freestanding.c ec.c | gcc -g -O3 -m32 -march=i386 -fno-omit-frame-pointer -ffreestanding -fno-stack-protector -nostdlib -emain -Werror -Wno-noreturn -Wall -pedantic -fno-pic -std=c89 -o ec -x c -
# to use libc and syscall of the host
cat libc-hosted.c ec.c | gcc -g -O0 -m32 -march=i386 -Werror -Wall -pedantic -std=c89 -o ec -lbsd -x c -
cat libc-hosted.c ec.c | gcc -g -O3 -m32 -march=i386 -Werror -Wall -pedantic -std=c89 -o ec -lbsd -x c -

cat libc-freestanding.c ec.c | clang -g -O0 -m32 -march=i386 -ffreestanding -fno-stack-protector -nostdlib -Wl,-emain -Werror -Wall -pedantic -std=c89 -o ec -x c -
# ENOSYS in syscall wrappers, is the optimizer clobbering something here?
cat libc-freestanding.c ec.c | clang -g -O1 -m32 -march=i386 -fno-omit-frame-pointer  -ffreestanding -fno-stack-protector -nostdlib -Wl,-emain -Werror -Wall -pedantic -std=c89 -o ec -x c -
cat libc-freestanding.c ec.c | clang -g -O2 -m32 -march=i386 -fno-omit-frame-pointer  -ffreestanding -fno-stack-protector -nostdlib -Wl,-emain -Werror -Wall -pedantic -std=c89 -o ec -x c -
cat libc-freestanding.c ec.c | clang -g -O3 -m32 -march=i386 -fno-omit-frame-pointer  -ffreestanding -fno-stack-protector -nostdlib -Wl,-emain -Werror -Wall -pedantic -std=c89 -o ec -x c -
# to use libc and syscall of the host
cat libc-hosted.c ec.c | clang -g -O3 -m32 -march=i386 -Werror -Wall -pedantic -std=c89 -o ec -lbsd -x c -

# pcc works fine
cat libc-freestanding.c ec.c | pcc -g -O0 -march=i386 -ffreestanding -nostdlib -Wl,-emain -Werror -Wall -std=c89 -o ec -x c -
cat libc-freestanding.c ec.c | pcc -g -O1 -march=i386 -ffreestanding -nostdlib -Wl,-emain -Werror -Wall -std=c89 -o ec -x c -
# to use libc and syscall of the host
# valgrind fails in SIGILL at unhandled instruction bytes: 0xC8 0x4 0x0 0x0
cat libc-hosted.c ec.c | pcc -g -O1 -march=i386 -Werror -Wall -std=c89 -o ec -lbsd -x c -

# -nostdlib segfaults with tcc 0.9.27
# hangs with git version above 0.9.27
cat libc-freestanding.c ec.c | tcc -g -m32 -march=i386 -fno-builtin -std=c89 -Werror -Wall -o ec -
# needs git version above 0.9.27
cat libc-freestanding.c ec.c _start-stub.c | tcc -g -m32 -march=i386 -nostdlib -std=c89 -Werror -Wall -o ec -
# to use libc and syscall of the host
cat libc-hosted.c ec.c | tcc -g -m32 -march=i386 -std=c89 -Werror -Wall -o ec -lbsd -

# debbuging freestanding compiler
cat libc-freestanding.c ec.c > test.c
gcc -g -O0 -m32 -march=i386 -ffreestanding -fno-stack-protector -nostdlib -emain -Werror -Wno-noreturn -Wall -pedantic -fno-pic -std=c89 -o ec test.c

assembler
---------

cat libc-freestanding.c asm-i386.c | gcc -g -O0 -m32 -march=i386 -ffreestanding -fno-stack-protector -nostdlib -emain -Werror -Wno-noreturn -Wall -pedantic -fno-pic -std=c89 -o asm-i386 -x c -
# optimized with own libc, syscalls need -fno-omit-frame-pointer otherwise they clobber the stack
cat libc-freestanding.c asm-i386.c | gcc -g -O1 -m32 -march=i386 -fno-omit-frame-pointer -ffreestanding -fno-stack-protector -nostdlib -emain -Werror -Wno-noreturn -Wall -pedantic -fno-pic -std=c89 -o asm-i386 -x c -
cat libc-freestanding.c asm-i386.c | gcc -g -O2 -m32 -march=i386 -fno-omit-frame-pointer -ffreestanding -fno-stack-protector -nostdlib -emain -Werror -Wno-noreturn -Wall -pedantic -fno-pic -std=c89 -o asm-i386 -x c -
cat libc-freestanding.c asm-i386.c | gcc -g -O3 -m32 -march=i386 -fno-omit-frame-pointer -ffreestanding -fno-stack-protector -nostdlib -emain -Werror -Wno-noreturn -Wall -pedantic -fno-pic -std=c89 -o asm-i386 -x c -

# to use libc and syscall of the host
cat libc-hosted.c asm-i386.c | gcc -g -O0 -m32 -march=i386 -fno-stack-protector -Werror -Wno-noreturn -Wall -pedantic -fno-pic -std=c89 -o asm-i386 -x c - -lbsd
cat libc-hosted.c asm-i386.c | gcc -g -O3 -m32 -march=i386 -Werror -Wall -pedantic -std=c89 -o asm-i386.c -lbsd -x c -

cat libc-freestanding.c asm-i386.c | clang -g -O0 -m32 -march=i386 -ffreestanding -fno-stack-protector -nostdlib -Wl,-emain -Werror -Wall -pedantic -std=c89 -o asm-i386 -x c -
# ENOSYS in syscall wrappers, is the optimizer clobbering something here?
cat libc-freestanding.c asm-i386.c | clang -g -O1 -m32 -march=i386 -fno-omit-frame-pointer  -ffreestanding -fno-stack-protector -nostdlib -Wl,-emain -Werror -Wall -pedantic -std=c89 -o asm-i386 -x c -
cat libc-freestanding.c asm-i386.c | clang -g -O2 -m32 -march=i386 -fno-omit-frame-pointer  -ffreestanding -fno-stack-protector -nostdlib -Wl,-emain -Werror -Wall -pedantic -std=c89 -o asm-i386 -x c -
cat libc-freestanding.c asm-i386.c | clang -g -O3 -m32 -march=i386 -fno-omit-frame-pointer  -ffreestanding -fno-stack-protector -nostdlib -Wl,-emain -Werror -Wall -pedantic -std=c89 -o asm-i386 -x c -
# to use libc and syscall of the host
cat libc-hosted.c asm-i386.c | clang -g -O3 -m32 -march=i386 -Werror -Wall -pedantic -std=c89 -o asm-i386 -lbsd -x c -

cat libc-freestanding.c asm-i386.c | pcc -g -O1 -march=i386 -ffreestanding -nostdlib -Wl,-emain -Werror -Wall -std=c89 -o asm-i386 -x c -
# to use libc and syscall of the host
# valgrind fails in SIGILL at unhandled instruction bytes: 0xC8 0x4 0x0 0x0
cat libc-hosted.c asm-i386.c | pcc -g -O1 -march=i386 -Werror -Wall -std=c89 -o asm-i386 -lbsd -x c -

cat libc-freestanding.c asm-i386.c _start-stub.c | tcc -g -m32 -march=i386 -nostdlib -std=c89 -Werror -Wall -o asm-i386 -
cat libc-freestanding.c asm-i386.c _start-stub.c | tcc -g -m32 -march=i386 -nostdlib -std=c89 -Werror -Wall -o asm-i386 -

# -nostdlib segfaults with tcc 0.9.27
# hangs with git version above 0.9.27
cat libc-freestanding.c asm-i386.c | tcc -g -m32 -march=i386 -fno-builtin -std=c89 -Werror -Wall -o asm-i386 -
# needs git version above 0.9.27
cat libc-freestanding.c asm-i386.c _start-stub.c | tcc -g -m32 -march=i386 -nostdlib -std=c89 -Werror -Wall -o asm-i386 -
# to use libc and syscall of the host
cat libc-hosted.c asm-i386.c | tcc -g -m32 -march=i386 -std=c89 -Werror -Wall -o asm-i386 -lbsd -

# for debugging freestandig mode
cat libc-freestanding.c asm-i386.c > test.c
gcc -g -O0 -m32 -march=i386 -ffreestanding -fno-stack-protector -nostdlib -emain -Werror -Wno-noreturn -Wall -pedantic -fno-pic -std=c89 -o asm-i386 test.c

# for debugging hosted mode
cat libc-hosted.c asm-i386.c > test.c
gcc -g -O0 -m32 -march=i386 -Werror -Wall -pedantic -std=c89 -o asm-i386 -lbsd test.c

usage
-----

# compile
./ec < test1.e > test1.asm
# use the host assembler to produce a binary
fasm test1.asm test1.bin
# use our own minimalistic assembler
./asm-i386 < test1.asm > test1.bin
gcc -g -Wall -std=c99 -o emul emul.c -lunicorn -lcapstone -pthread
./emul test1.bin 

# run test framework
tests/run_tests.sh

links
-----

1:10:00 video Hjalfi writes a compiler

things I got from cowgol:
inner-nested functions/procedures
don't do automatic type promotion, maybe something like uint8, int8, etc.
no recursion, well, we might need that

video Hjalfi writes an assembler

no frees as things are freed in the end, well, we don't obey that rule,
compiler/assembler should be embedable and they have local scopes which
can be freed while running, thus reducing the memory usage and hence
allowing bigger modules to be comiled/assembled.

hashtables as simple table on the first character + list, avoids complex
hashtable classes in C.

pass operator precedence as recursive descent variable (as in retargetable
C compiler). We prefer the hierarchical approach

one file per assembly file, load at fixed ORG (sort of in a.out style).

syscalls
--------

https://www.win.tue.nl/~aeb/linux/lk/lk-4.html

memory management
-----------------

nice lecture on the topic:
http://dmitrysoshnikov.com/compilers/writing-a-memory-allocator/

https://www.informatik.htw-dresden.de/~beck/ASM/syscall_list.html
sbrk and brk inspiration drawn form rt0 (git@github.com:lpsantil/rt0.git)

malloc inspired by:
https://arjunsreedharan.org/post/148675821737/memory-allocators-101-write-a-simple-memory
https://github.com/arjun024/memalloc.git

full bootstrappable compiler
----------------------------

we also embedd the needed syscalls (currently Linux 32-bit only):
- exit
- read from stdin
- write to stdout
- brk
the parts of libc we needs are embedded. this mininimalistic libc
uses the syscalls.

requirements
------------
 
our parser requires a language recursive functions

const expressions
-----------------

const
	N : integer = 20;
	M : integer = 2 * N + 3;

this needs a small interpreter to create the right constants. If we
have a different target architecture we must emulate that target
architecture's semantic!

const folding also is desireable for functions: 

function f( x : integer ) : integer
begin
	return 2 * x;
end

const
	M : integer = 2 * f( N );
	
This makes the compiler much more complicated, as we have to basically
interpret arbitrary code.

const folding eliminates the need for a preprocessor, as we can
easily define global constants like platforms, etc.

const
	PLATFORM_BITS : integer = 32;

type
	integer32 : ARRAY[PLATFORM_BITS] OF BIT;
	
there are henn-and-egg dragons here!

but we need it for internal constants like 'true' and 'false' for
initializing a constant of 'boolean'.

detection of uninitialized variables
------------------------------------

This might be very hard and heuristical (and the depend on the compiler
optimization level). Simple flows can be statically analyzed, what
to do when conditions, loops and complex data structures come into place?
Also, all statically allocated variables must be initialized (and be it
to zero) to get deterministic behaviour. Not a biggie, considering this
is done at compile time and doesn't affect runtime.
Data on the stack (local variables and parameters) must always be
initialized by hand.

nesting
-------

proper function/procedure nesting implies a lot of things to implement
like closures. For now we don't implement them.

prototypes/forward declarations
-------------------------------

Wirth dropped them (use function variables or nesting), and uses 
variables of type procedure/function. We should see if it is not
easier to implement them as they are merily the same as importing
an exported module symbol. Also, not having nesting might require
us to use forward references (we have them now in C in both the
compiler and the assembler, so).

https://github.com/andreaspirklbauer/Oberon-forward-references-of-procedures

loops
-----

do we need repeat/until along to a while?
can we do simpler SIMD optimizations when adding a simple (stricly
mathematical FOR-loop)?
what about WHILE S1 DO, ELSIF S2 DO, ELSE S3.. like in Oberon?

assertions
----------

got added in Oberon, handy for debugging and strict contract-based
programming.

assembler
---------

http://ref.x86asm.net/coder32.html
"Art Of Intel x86 Assembly"
Intel® 64 and IA-32 Architectures Software Developer’s Manual
https://www.felixcloutier.com/x86/index.html
http://www.c-jump.com/CIS77/CPU/x86/lecture.html#X77_0140_encoding_add_ecx_eax
https://c9x.me/x86/html/file_module_x86_id_147.html