diff options
author | Andreas Baumann <mail@andreasbaumann.cc> | 2018-06-24 21:06:45 +0200 |
---|---|---|
committer | Andreas Baumann <mail@andreasbaumann.cc> | 2018-06-24 21:06:45 +0200 |
commit | 1dea27eeaf888a48638c6975e9898fae532b87d2 (patch) | |
tree | 3ca92a48f549ead1b4238d111533d3c2689642ec /minic | |
parent | bfba16c5074686a5f5252312cc6aa9d19fd375e2 (diff) | |
download | compilertests-1dea27eeaf888a48638c6975e9898fae532b87d2.tar.gz compilertests-1dea27eeaf888a48638c6975e9898fae532b87d2.tar.bz2 |
some work on the lexer of minic (reading comments)
Diffstat (limited to 'minic')
-rw-r--r-- | minic/README | 90 | ||||
-rw-r--r-- | minic/const.h | 6 | ||||
-rw-r--r-- | minic/main.c | 41 | ||||
-rw-r--r-- | minic/minic.c | 6 | ||||
-rw-r--r-- | minic/parse.h | 13 | ||||
-rw-r--r-- | minic/scan.c | 142 | ||||
-rw-r--r-- | minic/scan.h | 34 | ||||
-rw-r--r-- | minic/symbol.c | 1 | ||||
-rw-r--r-- | minic/symbol.h | 33 | ||||
-rw-r--r-- | minic/types.h | 25 |
10 files changed, 382 insertions, 9 deletions
diff --git a/minic/README b/minic/README index c321d53..c25f521 100644 --- a/minic/README +++ b/minic/README @@ -6,6 +6,8 @@ minimal C to program the kernel. design desicions ---------------- +Use enum constants rather than preprocessor constants. + Do we allow structs, functions etc. We could go with global variables only and basic types, makes the compiler simpler, but the code maybe not so well-structured. @@ -27,7 +29,11 @@ header file includes #include "filename.h" (but we can go without preprocessor and make it a special command in the language itself. We don't want and need -preprocessor tricks (we think). +preprocessor tricks (we think). On the other hand we want to use +only things known to standard compilers so that we can bootstrap +from a host with a standard compiler. + +=> #include "filename.h" include guards -------------- @@ -52,6 +58,15 @@ instead of We could implement a simple preprocessor functionality which only works with defined symbols (names and no values). +#pragma once is not portable, but maybe quite well supported? + +include_next +------------ + +To extend a standard header. I see this only in a hosted environment. +For instance to shim standard headers if a compiler doesn't provide +some things (see stdlib.h in abaos/libc). + platform switches ----------------- @@ -63,7 +78,11 @@ platform switches alternatives? -Linking to specific implementation files io-host.c vs io-minios.c +Linking to specific implementation files io-host.c vs io-abaos.c + +pimpls and casts for OS-specific structs, not-typesave. on the +other hand #ifdef's in structs are also quite dangerous (ABI +mismatches). debug code ---------- @@ -74,7 +93,7 @@ debug code constants --------- -for array dimentions mainly. +for array dimensions mainly. #define ACONSTANT 5 int b[ACONSTANT]; @@ -158,7 +177,72 @@ user of a module uses: import io; +shadowing +--------- + +Traditional C has it, but is it a good idea? Why has it been +invented? How much is the complecity of symbol management increased? + +TODO: Finding a counter example. + +Coffescript doesn't have explicit shadowing, so you always have to +choose proper names. + +We have to avoid confusion between assignment and declaration: + +x = a; +int x = a; + +One symbol space (pascal) so you cannot have a type 'X' and +a variable 'X' or a function 'X' in the same scope. Also here +C deviates and has separate namespaces. + +Approaches +---------- + +C4 +-- + +C4 is self-hosting and has the minimum features we need, it lacks +some things: +- create object files for running (not just in-memory execution) +- too many OS dependencies +- functions are part of the parser + +This shows that the compiler is indeed self-hosting: + +./c4 c4.c c4.c hello.c +hello, world +exit(0) cycle = 9 +exit(0) cycle = 26015 +exit(0) cycle = 10059669 + +Minimalistic, usable with modifications for bootstrapping a compiler. + +lcc +--- + +book. very good to read. shows practical issues. sadly the coding style +is not of our likeing. the distinction in front and backend is very good. +picoc looks like a bootstrapping interpreter. + +qbe +--- + +Interesting project. with a bootstrapping minic. Sort of an intermediate +LLVM-like language, but much simpler. + links ----- https://github.com/alexfru/SmallerC +https://github.com/rswier/c4 +http://c9x.me/compile/doc/il.html (QBE) + +Building +-------- + +gcc -I../minilib -g -O0 -m32 -march=i386 -ffreestanding -Werror -Wall -Wno-return-type -pedantic -std=c89 -o minic *.c ../minilib/*.c +clang -I../minilib -g -O0 -march=i386 -fno-builtin -std=c89 -Werror -Wall -Wno-return-type -o minic *.c ../minilib/*.c +tcc -I../minilib -g -O0 -march=i386 -fno-builtin -std=c89 -Werror -Wall -Wno-return-type -o minic *.c ../minilib/*.c +pcc -I../minilib -g -O0 -march=i386 -fno-builtin -std=c89 -Wall -Wno-return-type -o minic *.c ../minilib/*.c diff --git a/minic/const.h b/minic/const.h new file mode 100644 index 0000000..3c47234 --- /dev/null +++ b/minic/const.h @@ -0,0 +1,6 @@ +#pragma once + +enum { + MAX_TMP_LEN = 50 +}; +
\ No newline at end of file diff --git a/minic/main.c b/minic/main.c new file mode 100644 index 0000000..a902b2e --- /dev/null +++ b/minic/main.c @@ -0,0 +1,41 @@ +#include "stdlib.h" +#include "arena.h" +#include "symbol.h" +#include "scan.h" +#include "parse.h" +#include "io.h" + +int main( int argc, char *argv[] ) +{ + char *src; + Scanner scanner; + Parser parser; + + if( argc != 3 ) { + print( "USAGE: minic <module.c> <module.bin>" ); + return 1; + } + + src = readallfile( argv[1] ); + if( !src ) { + print( "UNABLE TO READ SOURCE FILE" ); + return 1; + } + + scanner_init( &scanner, src ); + parser_init( &parser, &scanner ); + + scanner_debug( &scanner, 1 ); + parser_debug( &parser, 1 ); + + parser_parse( &parser ); + +/* writefile( argv[2], parser.code, DEFAULT_MEMORY_SIZE ); */ + + parser_done( &parser ); + scanner_done( &scanner ); + + deallocate( (void **)&src ); + + exit( 0 ); +} diff --git a/minic/minic.c b/minic/minic.c deleted file mode 100644 index 0031601..0000000 --- a/minic/minic.c +++ /dev/null @@ -1,6 +0,0 @@ -#include "arena.h" - -int main( int argc, char *argv[] ) -{ - return 0; -} diff --git a/minic/parse.h b/minic/parse.h new file mode 100644 index 0000000..1d44e5c --- /dev/null +++ b/minic/parse.h @@ -0,0 +1,13 @@ +#pragma once + +#include "scan.h" + +typedef struct Parser { + Scanner *s; + int debug; +} Parser; + +void parser_init( Parser *p, Scanner *s ); +void parser_done( Parser *p ); +void parser_debug( Parser *p, int enable ); +void parser_parse( Parser *p ); diff --git a/minic/scan.c b/minic/scan.c new file mode 100644 index 0000000..8b2e290 --- /dev/null +++ b/minic/scan.c @@ -0,0 +1,142 @@ +#include "scan.h" +#include "io.h" +#include "minilib.h" +#include "const.h" +#include "stdlib.h" +#include "string.h" + +void scanner_init( Scanner *s, char *src ) +{ + s->src = src; + scanner_reset( s ); +} + +void scanner_reset( Scanner *s ) +{ + s->peek = ' '; + s->row = 1; + s->col = 1; + s->pos = s->src; +} + +void scanner_done( Scanner *s ) +{ +} + +static char get_char( Scanner *s ) +{ + char c; + + c = *s->pos; + s->pos++; + s->peek = c; + s->col++; + if( c == '\n' ) { + s->col = 1; + s->row++; + } + + return c; +} + +static char peek_char( Scanner *s, int seek ) +{ + char c; + char *pos = s->pos; + + for( c = *pos; c != '\0' && seek > 0; pos++, seek-- ); + + return c; +} + +static void skip_whitespace( Scanner *s ) +{ + for( ; ; get_char( s ) ) { + if( s->peek == ' ' || s->peek == '\t' ) { + continue; + } else if( s->peek == '\n' ) { + break; + } else if( s->peek == '\0' ) { + break; + } else { + break; + } + } +} + +static void error( Scanner *s, char *msg ) +{ + char buf[MAX_TMP_LEN]; + char buf2[12]; + buf[0] = '\0'; + + itoa( s->row, buf2, 10 ); + strcat( buf, "ERROR in row " ); + strcat( buf, buf2 ); + itoa( s->col, buf2, 10 ); + strcat( buf, ", col " ); + strcat( buf, buf2 ); + strcat( buf, ": " ); + strcat( buf, msg ); + + print( buf ); + halt( ); +} + +static void skip_comment( Scanner *s ) +{ + while( s->peek != '*' ) { + get_char( s ); + } + get_char( s ); + if( s->peek != '/' ) { + error( s, "unclosed comment" ); + } +} + +scanner_Symbol scanner_scan( Scanner *s ) +{ + scanner_Symbol sym; + + for( ;; ) { + skip_whitespace( s ); + switch( s->peek ) { + case '\0': + sym.sym = S_eof; + if( s->debug ) { + print( "SCANNER(EOF)" ); + } + return sym; + + case '/': + get_char( s ); + if( s->peek == '*' ) { + if( s->debug ) { + print( "SCANNER(COMMENT_START)" ); + } + get_char( s ); + skip_comment( s ); + if( s->debug ) { + print( "SCANNER(COMMENT_END)" ); + } + continue; + } + break; + + default: + error( s, "unexpected symbol" ); + sym.sym = S_eof; + return sym; + } + } +} + +void scanner_debug( Scanner *s, int enable ) +{ + s->debug = enable; + + if( s->debug ) { + print( "SCANNER DEBUGGING ENABLED" ); + } +} + diff --git a/minic/scan.h b/minic/scan.h new file mode 100644 index 0000000..3e6942a --- /dev/null +++ b/minic/scan.h @@ -0,0 +1,34 @@ +#pragma once + +typedef enum scanner_Sym { + S_undef, + S_eof +} scanner_Sym; + +enum { + MAX_IDENT_LEN = 10 +}; + +typedef struct scanner_Symbol { + scanner_Sym sym; + union { + char s[MAX_IDENT_LEN]; + int n; + } data; + int tag; +} scanner_Symbol; + +typedef struct Scanner { + int peek; + int row; + int col; + char *src; + char *pos; + int debug; +} Scanner; + +void scanner_init( Scanner *s, char *src ); +void scanner_reset( Scanner *s ); +void scanner_done( Scanner *s ); +scanner_Symbol scanner_scan( Scanner *s ); +void scanner_debug( Scanner *s, int enable ); diff --git a/minic/symbol.c b/minic/symbol.c new file mode 100644 index 0000000..41fc105 --- /dev/null +++ b/minic/symbol.c @@ -0,0 +1 @@ +#include "symbol.h" diff --git a/minic/symbol.h b/minic/symbol.h new file mode 100644 index 0000000..ed31013 --- /dev/null +++ b/minic/symbol.h @@ -0,0 +1,33 @@ +#pragma once + +typedef struct Symbol { + char *name; + struct Symbol *upper; +} Symbol; + +typedef struct Table { + struct Table *previous; +} Table; + +typedef struct Scope { + int level; +} Scope; + +void scope_enter( Scope *scope ); +void scope_exit( Scope *scope ); + +Symbol scope_install_symbol( Scope *scope, const char *name ); + +Symbol scope_lookup_symbol( Scope *scope, const char *name ); + +Symbol scope_install_label( Scope *scope, const char *name ); +Symbol scope_lookup_label( Scope *scope, const char *name ); + +Symbol scope_install_label( Scope *scope, const char *name ); +Symbol scope_lookup_label( Scope *scope, const char *name ); + +Symbol scope_install_constant( Scope *scope, const char *name ); +Symbol scope_lookup_constant( Scope *scope, const char *name ); + +Symbol scope_install_temporary( Scope *scope, const char *name ); +Symbol scope_lookip_temporary( Scope *scope, const char *name ); diff --git a/minic/types.h b/minic/types.h new file mode 100644 index 0000000..8955923 --- /dev/null +++ b/minic/types.h @@ -0,0 +1,25 @@ +#pragma once + +#include <stdbool.h> + +typedef enum TypeOp { + TYPE_OP_CHAR, + TYPE_OP_INT, + TYPE_OP_POINTER +} TypeOp; + +typedef struct Type { + TypeOp op; +} Type; + +enum { + NOF_DEFAULT_TYPES = 1 +}; + +Type defaultTypes[NOF_DEFAULT_TYPES] = [ + TYPE_OP_CHAR +}; + +void type_table_init( ); + +bool isChar( Type t ); |