diff options
author | Andreas Baumann <mail@andreasbaumann.cc> | 2018-09-02 21:24:22 +0200 |
---|---|---|
committer | Andreas Baumann <mail@andreasbaumann.cc> | 2018-09-02 21:24:22 +0200 |
commit | 7a66db4c15f8e8661b9cb967be43cbbc67022b11 (patch) | |
tree | 2f581b75e0dc92f19faa92bff44fe021edc34b99 /minie | |
parent | c5c75d786de3e6d21ec146df9e043896feb8dfba (diff) | |
download | compilertests-7a66db4c15f8e8661b9cb967be43cbbc67022b11.tar.gz compilertests-7a66db4c15f8e8661b9cb967be43cbbc67022b11.tar.bz2 |
some work on an E to C converter
Diffstat (limited to 'minie')
-rw-r--r-- | minie/DESIGN | 32 | ||||
-rw-r--r-- | minie/README | 3 | ||||
-rw-r--r-- | minie/e2c.c | 259 | ||||
-rw-r--r-- | minie/test1.e | 4 | ||||
-rw-r--r-- | minie/test2.e | 2 | ||||
-rw-r--r-- | minie/test3.e | 4 |
6 files changed, 304 insertions, 0 deletions
diff --git a/minie/DESIGN b/minie/DESIGN index 35cb30f..2bba202 100644 --- a/minie/DESIGN +++ b/minie/DESIGN @@ -1,3 +1,5 @@ +Premises: + We want to build a simple compiler for a simple language. In the end we want to be self-hosting. @@ -8,6 +10,12 @@ in their own language. Starting with a C compiler is too hard, has too many quirks. +We want minimal code we duplicate in more than one language. + +We don't want to maintain to much code in the old language. + +Every tool should possibly be written in the new language. + Options: - Choose an existing language or a subset of it, e.g. a mini C @@ -20,3 +28,27 @@ Options: - Bootstrap new language in ever more complex compilers written in the new language itself (as gcc does). - problem: maintain 2, 3, 4 compilers + +- There is a two or a three language step. We can use O as + destination language for generated code, N for the new language + and write the first tools in a third language X. + +- Language O is just a special backend for the code generator. + So it's the first one we implement. + +Steps: + +- O: old language, well-established, can be ported, can build native code +- O', O'': subset languages of language O with reduced features, O' has + most features in common with O, O''''' has least features in common with O +- N: new language we want to have a compiler for +- N', N'': subset languages of language N with reduced features + +Step 1: Build a translator from N'' -> O'' written in O, O', O'' + +We also use the O-toolchain for building all artifacts (compiler, assembler, linker). +Try to build minimal subsets of N and use as little features of O for +the generated code. As this is a throw-away piece of code, it doesn't +matter so much how many features of O we use to implement it. + +Step 2: Write compiler in N, with a backend for O'' diff --git a/minie/README b/minie/README index e69de29..eca8290 100644 --- a/minie/README +++ b/minie/README @@ -0,0 +1,3 @@ +gcc -g -O0 -Wall -pedantic -std=c89 -o e2c e2c.c +./e2c < test1.e +./e2c < test2.e diff --git a/minie/e2c.c b/minie/e2c.c new file mode 100644 index 0000000..197c222 --- /dev/null +++ b/minie/e2c.c @@ -0,0 +1,259 @@ +#include <stdio.h> +#include <string.h> +#include <stdlib.h> + +enum { + MAX_IDENT_LEN = 16, + MAX_ERRMSG_LEN = 64 +}; + +static int look; +static int row; +static int col; +static char ident[MAX_IDENT_LEN]; + +typedef enum { + S_char = 0, + S_ident, + S_module, + S_begin, + S_end, + S_semicolon, + S_eof +} Symbol; + +char *symname[S_eof+1] = { + "char", + "ident", + "module", + "begin", + "end", + ";", + "eof" +}; + +static Symbol sym; + +static void Err( char *s ) +{ + fprintf( stderr, "Error line %d, pos %d: %s\n", row, col, s ); +} + +static void Halt( ) +{ + exit( EXIT_FAILURE ); +} + +static void Abort( char *s ) +{ + Err( s ); + Halt( ); +} + +static int getChar( void ) +{ + int c = getc( stdin ); + if( c == EOF ) { + return c; + } + row++; + if( c == '\n' ) { + row = 1; + col++; + } + return c; +} + +static int isWhite( int c ) +{ + if( c == ' ' || c == '\n' ) return 1; + return 0; +} + +static int isAlpha( int c ) +{ + if( ( c >= 'A' && c <= 'Z' ) || ( c >= 'a' && c <= 'z' ) ) return 1; + return 0; +} + +static int isDigit( int c ) +{ + if( ( c >= '0' && c <= '9' ) ) return 1; + return 0; +} + +static void skipWhite( void ) +{ + while( isWhite( look ) ) { + look = getChar( ); + } +} + +static void identifier( ) +{ + int n = 0; + if( isAlpha( look ) ) { + ident[n] = look; + n++; + look = getChar( ); + while( ( isAlpha( look ) || isDigit( look ) ) && n < MAX_IDENT_LEN ) { + ident[n] = look; + n++; + look = getChar( ); + } + ident[n] = '\0'; + if( n == MAX_IDENT_LEN ) { + Abort( "Identifier exceeded maximal length" ); + } + sym = S_ident; + } +} + +static Symbol getSym( ) +{ + look = getChar( ); + skipWhite( ); + ident[0] = '\0'; + switch( look ) { + case 'a': + case 'b': + identifier( ); + if( strcmp( ident, "begin" ) == 0 ) { + return S_begin; + } + break; + case 'c': + case 'd': + case 'e': + identifier( ); + if( strcmp( ident, "end" ) == 0 ) { + return S_end; + } + break; + case 'f': + case 'g': + case 'h': + case 'j': + case 'i': + case 'k': + case 'l': + identifier( ); + break; + case 'm': + identifier( ); + if( strcmp( ident, "module" ) == 0 ) { + return S_module; + } + break; + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + identifier( ); + break; + case ';': + return S_semicolon; + case EOF: + return S_eof; + default: + Abort( "Illegal character" ); + } + return S_char; +} + +static void Expect( Symbol expect ) +{ + if( sym == expect ) { + sym = getSym( ); + } else { + char s[MAX_ERRMSG_LEN]; + s[0] = '\0'; + strncat( s, "Expected symbol '", MAX_ERRMSG_LEN ); + strncat( s, symname[expect], MAX_ERRMSG_LEN ); + strncat( s, "'", MAX_ERRMSG_LEN ); + s[MAX_ERRMSG_LEN-1] = '\0'; + Abort( s ); + } +} + +static void emit( char *s ) +{ + puts( s ); +} + +static void prologue( void ) +{ + emit( "/* generated with e2c */" ); +} + +static void init( void ) +{ + look = 0; + col = 1; + row = 1; + ident[0] = '\0'; + sym = getSym( ); +} + +static void epilogue( void ) +{ +} + +static void block( void ) +{ + Expect( S_begin ); + Expect( S_end ); +} + +static void module( void ) +{ + Expect( S_module ); + identifier( ); + Expect( S_semicolon ); + block( ); +} + +int main( void ) +{ + prologue( ); + init( ); + module( ); + epilogue( ); + + exit( EXIT_SUCCESS ); +} diff --git a/minie/test1.e b/minie/test1.e new file mode 100644 index 0000000..9ee3bab --- /dev/null +++ b/minie/test1.e @@ -0,0 +1,4 @@ +module test1; + +begin +end diff --git a/minie/test2.e b/minie/test2.e new file mode 100644 index 0000000..fce2779 --- /dev/null +++ b/minie/test2.e @@ -0,0 +1,2 @@ +begin +end diff --git a/minie/test3.e b/minie/test3.e new file mode 100644 index 0000000..1b9f2b3 --- /dev/null +++ b/minie/test3.e @@ -0,0 +1,4 @@ +module test3; + +begin +end |