build and test instructions --------------------------- cat libc-freestanding.c ec.c | gcc -g -O0 -m32 -march=i386 -ffreestanding -fno-stack-protector -nostdlib -emain -Werror -Wno-noreturn -Wall -pedantic -fno-pic -std=c89 -o ec -x c - # optimized with own libc, syscalls need -fno-omit-frame-pointer otherwise they clobber the stack cat libc-freestanding.c ec.c | gcc -g -O1 -m32 -march=i386 -fno-omit-frame-pointer -ffreestanding -fno-stack-protector -nostdlib -emain -Werror -Wno-noreturn -Wall -pedantic -fno-pic -std=c89 -o ec -x c - cat libc-freestanding.c ec.c | gcc -g -O2 -m32 -march=i386 -fno-omit-frame-pointer -ffreestanding -fno-stack-protector -nostdlib -emain -Werror -Wno-noreturn -Wall -pedantic -fno-pic -std=c89 -o ec -x c - cat libc-freestanding.c ec.c | gcc -g -O3 -m32 -march=i386 -fno-omit-frame-pointer -ffreestanding -fno-stack-protector -nostdlib -emain -Werror -Wno-noreturn -Wall -pedantic -fno-pic -std=c89 -o ec -x c - # to use libc and syscall of the host cat libc-hosted.c ec.c | gcc -g -O0 -m32 -march=i386 -Werror -Wall -pedantic -std=c89 -o ec -lbsd -x c - cat libc-hosted.c ec.c | gcc -g -O3 -m32 -march=i386 -Werror -Wall -pedantic -std=c89 -o ec -lbsd -x c - cat libc-freestanding.c ec.c | clang -g -O0 -m32 -march=i386 -ffreestanding -fno-stack-protector -nostdlib -Wl,-emain -Werror -Wall -pedantic -std=c89 -o ec -x c - # ENOSYS in syscall wrappers, is the optimizer clobbering something here? cat libc-freestanding.c ec.c | clang -g -O1 -m32 -march=i386 -fno-omit-frame-pointer -ffreestanding -fno-stack-protector -nostdlib -Wl,-emain -Werror -Wall -pedantic -std=c89 -o ec -x c - cat libc-freestanding.c ec.c | clang -g -O2 -m32 -march=i386 -fno-omit-frame-pointer -ffreestanding -fno-stack-protector -nostdlib -Wl,-emain -Werror -Wall -pedantic -std=c89 -o ec -x c - cat libc-freestanding.c ec.c | clang -g -O3 -m32 -march=i386 -fno-omit-frame-pointer -ffreestanding -fno-stack-protector -nostdlib -Wl,-emain -Werror -Wall -pedantic -std=c89 -o ec -x c - # to use libc and syscall of the host cat libc-hosted.c ec.c | clang -g -O3 -m32 -march=i386 -Werror -Wall -pedantic -std=c89 -o ec -lbsd -x c - # pcc works fine cat libc-freestanding.c ec.c | pcc -g -O0 -march=i386 -ffreestanding -nostdlib -Wl,-emain -Werror -Wall -std=c89 -o ec -x c - cat libc-freestanding.c ec.c | pcc -g -O1 -march=i386 -ffreestanding -nostdlib -Wl,-emain -Werror -Wall -std=c89 -o ec -x c - # to use libc and syscall of the host # valgrind fails in SIGILL at unhandled instruction bytes: 0xC8 0x4 0x0 0x0 cat libc-hosted.c ec.c | pcc -g -O1 -march=i386 -Werror -Wall -std=c89 -o ec -lbsd -x c - # -nostdlib segfaults with tcc 0.9.27 # hangs with git version above 0.9.27 cat libc-freestanding.c ec.c | tcc -g -m32 -march=i386 -fno-builtin -std=c89 -Werror -Wall -o ec - # needs git version above 0.9.27 cat libc-freestanding.c ec.c _start-stub.c | tcc -g -m32 -march=i386 -nostdlib -std=c89 -Werror -Wall -o ec - # to use libc and syscall of the host cat libc-hosted.c ec.c | tcc -g -m32 -march=i386 -std=c89 -Werror -Wall -o ec -lbsd - # debbuging freestanding compiler cat libc-freestanding.c ec.c > test.c gcc -g -O0 -m32 -march=i386 -ffreestanding -fno-stack-protector -nostdlib -emain -Werror -Wno-noreturn -Wall -pedantic -fno-pic -std=c89 -o ec test.c assembler --------- cat libc-freestanding.c asm-i386.c | gcc -g -O0 -m32 -march=i386 -ffreestanding -fno-stack-protector -nostdlib -emain -Werror -Wno-noreturn -Wall -pedantic -fno-pic -std=c89 -o asm-i386 -x c - # optimized with own libc, syscalls need -fno-omit-frame-pointer otherwise they clobber the stack cat libc-freestanding.c asm-i386.c | gcc -g -O1 -m32 -march=i386 -fno-omit-frame-pointer -ffreestanding -fno-stack-protector -nostdlib -emain -Werror -Wno-noreturn -Wall -pedantic -fno-pic -std=c89 -o asm-i386 -x c - cat libc-freestanding.c asm-i386.c | gcc -g -O2 -m32 -march=i386 -fno-omit-frame-pointer -ffreestanding -fno-stack-protector -nostdlib -emain -Werror -Wno-noreturn -Wall -pedantic -fno-pic -std=c89 -o asm-i386 -x c - cat libc-freestanding.c asm-i386.c | gcc -g -O3 -m32 -march=i386 -fno-omit-frame-pointer -ffreestanding -fno-stack-protector -nostdlib -emain -Werror -Wno-noreturn -Wall -pedantic -fno-pic -std=c89 -o asm-i386 -x c - # to use libc and syscall of the host cat libc-hosted.c asm-i386.c | gcc -g -O0 -m32 -march=i386 -fno-stack-protector -Werror -Wno-noreturn -Wall -pedantic -fno-pic -std=c89 -o asm-i386 -x c - -lbsd cat libc-hosted.c asm-i386.c | gcc -g -O3 -m32 -march=i386 -Werror -Wall -pedantic -std=c89 -o asm-i386.c -lbsd -x c - cat libc-freestanding.c asm-i386.c | clang -g -O0 -m32 -march=i386 -ffreestanding -fno-stack-protector -nostdlib -Wl,-emain -Werror -Wall -pedantic -std=c89 -o asm-i386 -x c - # ENOSYS in syscall wrappers, is the optimizer clobbering something here? cat libc-freestanding.c asm-i386.c | clang -g -O1 -m32 -march=i386 -fno-omit-frame-pointer -ffreestanding -fno-stack-protector -nostdlib -Wl,-emain -Werror -Wall -pedantic -std=c89 -o asm-i386 -x c - cat libc-freestanding.c asm-i386.c | clang -g -O2 -m32 -march=i386 -fno-omit-frame-pointer -ffreestanding -fno-stack-protector -nostdlib -Wl,-emain -Werror -Wall -pedantic -std=c89 -o asm-i386 -x c - cat libc-freestanding.c asm-i386.c | clang -g -O3 -m32 -march=i386 -fno-omit-frame-pointer -ffreestanding -fno-stack-protector -nostdlib -Wl,-emain -Werror -Wall -pedantic -std=c89 -o asm-i386 -x c - # to use libc and syscall of the host cat libc-hosted.c asm-i386.c | clang -g -O3 -m32 -march=i386 -Werror -Wall -pedantic -std=c89 -o asm-i386 -lbsd -x c - cat libc-freestanding.c asm-i386.c | pcc -g -O1 -march=i386 -ffreestanding -nostdlib -Wl,-emain -Werror -Wall -std=c89 -o asm-i386 -x c - # to use libc and syscall of the host # valgrind fails in SIGILL at unhandled instruction bytes: 0xC8 0x4 0x0 0x0 cat libc-hosted.c asm-i386.c | pcc -g -O1 -march=i386 -Werror -Wall -std=c89 -o asm-i386 -lbsd -x c - cat libc-freestanding.c asm-i386.c _start-stub.c | tcc -g -m32 -march=i386 -nostdlib -std=c89 -Werror -Wall -o asm-i386 - cat libc-freestanding.c asm-i386.c _start-stub.c | tcc -g -m32 -march=i386 -nostdlib -std=c89 -Werror -Wall -o asm-i386 - # -nostdlib segfaults with tcc 0.9.27 # hangs with git version above 0.9.27 cat libc-freestanding.c asm-i386.c | tcc -g -m32 -march=i386 -fno-builtin -std=c89 -Werror -Wall -o asm-i386 - # needs git version above 0.9.27 cat libc-freestanding.c asm-i386.c _start-stub.c | tcc -g -m32 -march=i386 -nostdlib -std=c89 -Werror -Wall -o asm-i386 - # to use libc and syscall of the host cat libc-hosted.c asm-i386.c | tcc -g -m32 -march=i386 -std=c89 -Werror -Wall -o asm-i386 -lbsd - # for debugging freestandig mode cat libc-freestanding.c asm-i386.c > test.c gcc -g -O0 -m32 -march=i386 -ffreestanding -fno-stack-protector -nostdlib -emain -Werror -Wno-noreturn -Wall -pedantic -fno-pic -std=c89 -o asm-i386 test.c # for debugging hosted mode cat libc-hosted.c asm-i386.c > test.c gcc -g -O0 -m32 -march=i386 -Werror -Wall -pedantic -std=c89 -o asm-i386 -lbsd test.c usage ----- # compile ./ec < test1.e > test1.asm # use the host assembler to produce a binary fasm test1.asm test1.bin # use our own minimalistic assembler ./asm-i386 < test1.asm > test1.bin gcc -g -Wall -std=c99 -o emul emul.c -lunicorn -lcapstone -pthread ./emul test1.bin # run test framework tests/run_tests.sh links ----- 1:10:00 video Hjalfi writes a compiler things I got from cowgol: inner-nested functions/procedures don't do automatic type promotion, maybe something like uint8, int8, etc. no recursion, well, we might need that video Hjalfi writes an assembler no frees as things are freed in the end, well, we don't obey that rule, compiler/assembler should be embedable and they have local scopes which can be freed while running, thus reducing the memory usage and hence allowing bigger modules to be comiled/assembled. hashtables as simple table on the first character + list, avoids complex hashtable classes in C. pass operator precedence as recursive descent variable (as in retargetable C compiler). We prefer the hierarchical approach one file per assembly file, load at fixed ORG (sort of in a.out style). another assembler, 0:15:00 neested functions allow more efficient compiling as local scopes can be dropped and their local symbols. syscalls -------- https://www.win.tue.nl/~aeb/linux/lk/lk-4.html memory management ----------------- nice lecture on the topic: http://dmitrysoshnikov.com/compilers/writing-a-memory-allocator/ https://www.informatik.htw-dresden.de/~beck/ASM/syscall_list.html sbrk and brk inspiration drawn form rt0 (git@github.com:lpsantil/rt0.git) malloc inspired by: https://arjunsreedharan.org/post/148675821737/memory-allocators-101-write-a-simple-memory https://github.com/arjun024/memalloc.git full bootstrappable compiler ---------------------------- we also embedd the needed syscalls (currently Linux 32-bit only): - exit - read from stdin - write to stdout - brk the parts of libc we needs are embedded. this mininimalistic libc uses the syscalls. requirements ------------ our parser requires a language recursive functions const expressions ----------------- const N : integer = 20; M : integer = 2 * N + 3; this needs a small interpreter to create the right constants. If we have a different target architecture we must emulate that target architecture's semantic! const folding also is desireable for functions: function f( x : integer ) : integer begin return 2 * x; end const M : integer = 2 * f( N ); This makes the compiler much more complicated, as we have to basically interpret arbitrary code. const folding eliminates the need for a preprocessor, as we can easily define global constants like platforms, etc. const PLATFORM_BITS : integer = 32; type integer32 : ARRAY[PLATFORM_BITS] OF BIT; there are henn-and-egg dragons here! but we need it for internal constants like 'true' and 'false' for initializing a constant of 'boolean'. detection of uninitialized variables ------------------------------------ This might be very hard and heuristical (and the depend on the compiler optimization level). Simple flows can be statically analyzed, what to do when conditions, loops and complex data structures come into place? Also, all statically allocated variables must be initialized (and be it to zero) to get deterministic behaviour. Not a biggie, considering this is done at compile time and doesn't affect runtime. Data on the stack (local variables and parameters) must always be initialized by hand. nesting ------- proper function/procedure nesting implies a lot of things to implement like closures. For now we don't implement them. prototypes/forward declarations ------------------------------- Wirth dropped them (use function variables or nesting), and uses variables of type procedure/function. We should see if it is not easier to implement them as they are merily the same as importing an exported module symbol. Also, not having nesting might require us to use forward references (we have them now in C in both the compiler and the assembler, so). https://github.com/andreaspirklbauer/Oberon-forward-references-of-procedures loops ----- do we need repeat/until along to a while? can we do simpler SIMD optimizations when adding a simple (stricly mathematical FOR-loop)? what about WHILE S1 DO, ELSIF S2 DO, ELSE S3.. like in Oberon? assertions ---------- got added in Oberon, handy for debugging and strict contract-based programming. assembler --------- http://ref.x86asm.net/coder32.html "Art Of Intel x86 Assembly" Intel® 64 and IA-32 Architectures Software Developer’s Manual https://www.felixcloutier.com/x86/index.html http://www.c-jump.com/CIS77/CPU/x86/lecture.html#X77_0140_encoding_add_ecx_eax https://c9x.me/x86/html/file_module_x86_id_147.html