summaryrefslogtreecommitdiff
path: root/minic
diff options
context:
space:
mode:
authorAndreas Baumann <mail@andreasbaumann.cc>2018-06-24 21:06:45 +0200
committerAndreas Baumann <mail@andreasbaumann.cc>2018-06-24 21:06:45 +0200
commit1dea27eeaf888a48638c6975e9898fae532b87d2 (patch)
tree3ca92a48f549ead1b4238d111533d3c2689642ec /minic
parentbfba16c5074686a5f5252312cc6aa9d19fd375e2 (diff)
downloadcompilertests-1dea27eeaf888a48638c6975e9898fae532b87d2.tar.gz
compilertests-1dea27eeaf888a48638c6975e9898fae532b87d2.tar.bz2
some work on the lexer of minic (reading comments)
Diffstat (limited to 'minic')
-rw-r--r--minic/README90
-rw-r--r--minic/const.h6
-rw-r--r--minic/main.c41
-rw-r--r--minic/minic.c6
-rw-r--r--minic/parse.h13
-rw-r--r--minic/scan.c142
-rw-r--r--minic/scan.h34
-rw-r--r--minic/symbol.c1
-rw-r--r--minic/symbol.h33
-rw-r--r--minic/types.h25
10 files changed, 382 insertions, 9 deletions
diff --git a/minic/README b/minic/README
index c321d53..c25f521 100644
--- a/minic/README
+++ b/minic/README
@@ -6,6 +6,8 @@ minimal C to program the kernel.
design desicions
----------------
+Use enum constants rather than preprocessor constants.
+
Do we allow structs, functions etc. We could go with global variables
only and basic types, makes the compiler simpler, but the code maybe
not so well-structured.
@@ -27,7 +29,11 @@ header file includes
#include "filename.h" (but we can go without preprocessor and make it
a special command in the language itself. We don't want and need
-preprocessor tricks (we think).
+preprocessor tricks (we think). On the other hand we want to use
+only things known to standard compilers so that we can bootstrap
+from a host with a standard compiler.
+
+=> #include "filename.h"
include guards
--------------
@@ -52,6 +58,15 @@ instead of
We could implement a simple preprocessor functionality which only
works with defined symbols (names and no values).
+#pragma once is not portable, but maybe quite well supported?
+
+include_next
+------------
+
+To extend a standard header. I see this only in a hosted environment.
+For instance to shim standard headers if a compiler doesn't provide
+some things (see stdlib.h in abaos/libc).
+
platform switches
-----------------
@@ -63,7 +78,11 @@ platform switches
alternatives?
-Linking to specific implementation files io-host.c vs io-minios.c
+Linking to specific implementation files io-host.c vs io-abaos.c
+
+pimpls and casts for OS-specific structs, not-typesave. on the
+other hand #ifdef's in structs are also quite dangerous (ABI
+mismatches).
debug code
----------
@@ -74,7 +93,7 @@ debug code
constants
---------
-for array dimentions mainly.
+for array dimensions mainly.
#define ACONSTANT 5
int b[ACONSTANT];
@@ -158,7 +177,72 @@ user of a module uses:
import io;
+shadowing
+---------
+
+Traditional C has it, but is it a good idea? Why has it been
+invented? How much is the complecity of symbol management increased?
+
+TODO: Finding a counter example.
+
+Coffescript doesn't have explicit shadowing, so you always have to
+choose proper names.
+
+We have to avoid confusion between assignment and declaration:
+
+x = a;
+int x = a;
+
+One symbol space (pascal) so you cannot have a type 'X' and
+a variable 'X' or a function 'X' in the same scope. Also here
+C deviates and has separate namespaces.
+
+Approaches
+----------
+
+C4
+--
+
+C4 is self-hosting and has the minimum features we need, it lacks
+some things:
+- create object files for running (not just in-memory execution)
+- too many OS dependencies
+- functions are part of the parser
+
+This shows that the compiler is indeed self-hosting:
+
+./c4 c4.c c4.c hello.c
+hello, world
+exit(0) cycle = 9
+exit(0) cycle = 26015
+exit(0) cycle = 10059669
+
+Minimalistic, usable with modifications for bootstrapping a compiler.
+
+lcc
+---
+
+book. very good to read. shows practical issues. sadly the coding style
+is not of our likeing. the distinction in front and backend is very good.
+picoc looks like a bootstrapping interpreter.
+
+qbe
+---
+
+Interesting project. with a bootstrapping minic. Sort of an intermediate
+LLVM-like language, but much simpler.
+
links
-----
https://github.com/alexfru/SmallerC
+https://github.com/rswier/c4
+http://c9x.me/compile/doc/il.html (QBE)
+
+Building
+--------
+
+gcc -I../minilib -g -O0 -m32 -march=i386 -ffreestanding -Werror -Wall -Wno-return-type -pedantic -std=c89 -o minic *.c ../minilib/*.c
+clang -I../minilib -g -O0 -march=i386 -fno-builtin -std=c89 -Werror -Wall -Wno-return-type -o minic *.c ../minilib/*.c
+tcc -I../minilib -g -O0 -march=i386 -fno-builtin -std=c89 -Werror -Wall -Wno-return-type -o minic *.c ../minilib/*.c
+pcc -I../minilib -g -O0 -march=i386 -fno-builtin -std=c89 -Wall -Wno-return-type -o minic *.c ../minilib/*.c
diff --git a/minic/const.h b/minic/const.h
new file mode 100644
index 0000000..3c47234
--- /dev/null
+++ b/minic/const.h
@@ -0,0 +1,6 @@
+#pragma once
+
+enum {
+ MAX_TMP_LEN = 50
+};
+ \ No newline at end of file
diff --git a/minic/main.c b/minic/main.c
new file mode 100644
index 0000000..a902b2e
--- /dev/null
+++ b/minic/main.c
@@ -0,0 +1,41 @@
+#include "stdlib.h"
+#include "arena.h"
+#include "symbol.h"
+#include "scan.h"
+#include "parse.h"
+#include "io.h"
+
+int main( int argc, char *argv[] )
+{
+ char *src;
+ Scanner scanner;
+ Parser parser;
+
+ if( argc != 3 ) {
+ print( "USAGE: minic <module.c> <module.bin>" );
+ return 1;
+ }
+
+ src = readallfile( argv[1] );
+ if( !src ) {
+ print( "UNABLE TO READ SOURCE FILE" );
+ return 1;
+ }
+
+ scanner_init( &scanner, src );
+ parser_init( &parser, &scanner );
+
+ scanner_debug( &scanner, 1 );
+ parser_debug( &parser, 1 );
+
+ parser_parse( &parser );
+
+/* writefile( argv[2], parser.code, DEFAULT_MEMORY_SIZE ); */
+
+ parser_done( &parser );
+ scanner_done( &scanner );
+
+ deallocate( (void **)&src );
+
+ exit( 0 );
+}
diff --git a/minic/minic.c b/minic/minic.c
deleted file mode 100644
index 0031601..0000000
--- a/minic/minic.c
+++ /dev/null
@@ -1,6 +0,0 @@
-#include "arena.h"
-
-int main( int argc, char *argv[] )
-{
- return 0;
-}
diff --git a/minic/parse.h b/minic/parse.h
new file mode 100644
index 0000000..1d44e5c
--- /dev/null
+++ b/minic/parse.h
@@ -0,0 +1,13 @@
+#pragma once
+
+#include "scan.h"
+
+typedef struct Parser {
+ Scanner *s;
+ int debug;
+} Parser;
+
+void parser_init( Parser *p, Scanner *s );
+void parser_done( Parser *p );
+void parser_debug( Parser *p, int enable );
+void parser_parse( Parser *p );
diff --git a/minic/scan.c b/minic/scan.c
new file mode 100644
index 0000000..8b2e290
--- /dev/null
+++ b/minic/scan.c
@@ -0,0 +1,142 @@
+#include "scan.h"
+#include "io.h"
+#include "minilib.h"
+#include "const.h"
+#include "stdlib.h"
+#include "string.h"
+
+void scanner_init( Scanner *s, char *src )
+{
+ s->src = src;
+ scanner_reset( s );
+}
+
+void scanner_reset( Scanner *s )
+{
+ s->peek = ' ';
+ s->row = 1;
+ s->col = 1;
+ s->pos = s->src;
+}
+
+void scanner_done( Scanner *s )
+{
+}
+
+static char get_char( Scanner *s )
+{
+ char c;
+
+ c = *s->pos;
+ s->pos++;
+ s->peek = c;
+ s->col++;
+ if( c == '\n' ) {
+ s->col = 1;
+ s->row++;
+ }
+
+ return c;
+}
+
+static char peek_char( Scanner *s, int seek )
+{
+ char c;
+ char *pos = s->pos;
+
+ for( c = *pos; c != '\0' && seek > 0; pos++, seek-- );
+
+ return c;
+}
+
+static void skip_whitespace( Scanner *s )
+{
+ for( ; ; get_char( s ) ) {
+ if( s->peek == ' ' || s->peek == '\t' ) {
+ continue;
+ } else if( s->peek == '\n' ) {
+ break;
+ } else if( s->peek == '\0' ) {
+ break;
+ } else {
+ break;
+ }
+ }
+}
+
+static void error( Scanner *s, char *msg )
+{
+ char buf[MAX_TMP_LEN];
+ char buf2[12];
+ buf[0] = '\0';
+
+ itoa( s->row, buf2, 10 );
+ strcat( buf, "ERROR in row " );
+ strcat( buf, buf2 );
+ itoa( s->col, buf2, 10 );
+ strcat( buf, ", col " );
+ strcat( buf, buf2 );
+ strcat( buf, ": " );
+ strcat( buf, msg );
+
+ print( buf );
+ halt( );
+}
+
+static void skip_comment( Scanner *s )
+{
+ while( s->peek != '*' ) {
+ get_char( s );
+ }
+ get_char( s );
+ if( s->peek != '/' ) {
+ error( s, "unclosed comment" );
+ }
+}
+
+scanner_Symbol scanner_scan( Scanner *s )
+{
+ scanner_Symbol sym;
+
+ for( ;; ) {
+ skip_whitespace( s );
+ switch( s->peek ) {
+ case '\0':
+ sym.sym = S_eof;
+ if( s->debug ) {
+ print( "SCANNER(EOF)" );
+ }
+ return sym;
+
+ case '/':
+ get_char( s );
+ if( s->peek == '*' ) {
+ if( s->debug ) {
+ print( "SCANNER(COMMENT_START)" );
+ }
+ get_char( s );
+ skip_comment( s );
+ if( s->debug ) {
+ print( "SCANNER(COMMENT_END)" );
+ }
+ continue;
+ }
+ break;
+
+ default:
+ error( s, "unexpected symbol" );
+ sym.sym = S_eof;
+ return sym;
+ }
+ }
+}
+
+void scanner_debug( Scanner *s, int enable )
+{
+ s->debug = enable;
+
+ if( s->debug ) {
+ print( "SCANNER DEBUGGING ENABLED" );
+ }
+}
+
diff --git a/minic/scan.h b/minic/scan.h
new file mode 100644
index 0000000..3e6942a
--- /dev/null
+++ b/minic/scan.h
@@ -0,0 +1,34 @@
+#pragma once
+
+typedef enum scanner_Sym {
+ S_undef,
+ S_eof
+} scanner_Sym;
+
+enum {
+ MAX_IDENT_LEN = 10
+};
+
+typedef struct scanner_Symbol {
+ scanner_Sym sym;
+ union {
+ char s[MAX_IDENT_LEN];
+ int n;
+ } data;
+ int tag;
+} scanner_Symbol;
+
+typedef struct Scanner {
+ int peek;
+ int row;
+ int col;
+ char *src;
+ char *pos;
+ int debug;
+} Scanner;
+
+void scanner_init( Scanner *s, char *src );
+void scanner_reset( Scanner *s );
+void scanner_done( Scanner *s );
+scanner_Symbol scanner_scan( Scanner *s );
+void scanner_debug( Scanner *s, int enable );
diff --git a/minic/symbol.c b/minic/symbol.c
new file mode 100644
index 0000000..41fc105
--- /dev/null
+++ b/minic/symbol.c
@@ -0,0 +1 @@
+#include "symbol.h"
diff --git a/minic/symbol.h b/minic/symbol.h
new file mode 100644
index 0000000..ed31013
--- /dev/null
+++ b/minic/symbol.h
@@ -0,0 +1,33 @@
+#pragma once
+
+typedef struct Symbol {
+ char *name;
+ struct Symbol *upper;
+} Symbol;
+
+typedef struct Table {
+ struct Table *previous;
+} Table;
+
+typedef struct Scope {
+ int level;
+} Scope;
+
+void scope_enter( Scope *scope );
+void scope_exit( Scope *scope );
+
+Symbol scope_install_symbol( Scope *scope, const char *name );
+
+Symbol scope_lookup_symbol( Scope *scope, const char *name );
+
+Symbol scope_install_label( Scope *scope, const char *name );
+Symbol scope_lookup_label( Scope *scope, const char *name );
+
+Symbol scope_install_label( Scope *scope, const char *name );
+Symbol scope_lookup_label( Scope *scope, const char *name );
+
+Symbol scope_install_constant( Scope *scope, const char *name );
+Symbol scope_lookup_constant( Scope *scope, const char *name );
+
+Symbol scope_install_temporary( Scope *scope, const char *name );
+Symbol scope_lookip_temporary( Scope *scope, const char *name );
diff --git a/minic/types.h b/minic/types.h
new file mode 100644
index 0000000..8955923
--- /dev/null
+++ b/minic/types.h
@@ -0,0 +1,25 @@
+#pragma once
+
+#include <stdbool.h>
+
+typedef enum TypeOp {
+ TYPE_OP_CHAR,
+ TYPE_OP_INT,
+ TYPE_OP_POINTER
+} TypeOp;
+
+typedef struct Type {
+ TypeOp op;
+} Type;
+
+enum {
+ NOF_DEFAULT_TYPES = 1
+};
+
+Type defaultTypes[NOF_DEFAULT_TYPES] = [
+ TYPE_OP_CHAR
+};
+
+void type_table_init( );
+
+bool isChar( Type t );