summaryrefslogtreecommitdiff
path: root/minie
diff options
context:
space:
mode:
authorAndreas Baumann <mail@andreasbaumann.cc>2018-09-02 21:24:22 +0200
committerAndreas Baumann <mail@andreasbaumann.cc>2018-09-02 21:24:22 +0200
commit7a66db4c15f8e8661b9cb967be43cbbc67022b11 (patch)
tree2f581b75e0dc92f19faa92bff44fe021edc34b99 /minie
parentc5c75d786de3e6d21ec146df9e043896feb8dfba (diff)
downloadcompilertests-7a66db4c15f8e8661b9cb967be43cbbc67022b11.tar.gz
compilertests-7a66db4c15f8e8661b9cb967be43cbbc67022b11.tar.bz2
some work on an E to C converter
Diffstat (limited to 'minie')
-rw-r--r--minie/DESIGN32
-rw-r--r--minie/README3
-rw-r--r--minie/e2c.c259
-rw-r--r--minie/test1.e4
-rw-r--r--minie/test2.e2
-rw-r--r--minie/test3.e4
6 files changed, 304 insertions, 0 deletions
diff --git a/minie/DESIGN b/minie/DESIGN
index 35cb30f..2bba202 100644
--- a/minie/DESIGN
+++ b/minie/DESIGN
@@ -1,3 +1,5 @@
+Premises:
+
We want to build a simple compiler for a simple language.
In the end we want to be self-hosting.
@@ -8,6 +10,12 @@ in their own language.
Starting with a C compiler is too hard, has too many quirks.
+We want minimal code we duplicate in more than one language.
+
+We don't want to maintain to much code in the old language.
+
+Every tool should possibly be written in the new language.
+
Options:
- Choose an existing language or a subset of it, e.g. a mini C
@@ -20,3 +28,27 @@ Options:
- Bootstrap new language in ever more complex compilers written
in the new language itself (as gcc does).
- problem: maintain 2, 3, 4 compilers
+
+- There is a two or a three language step. We can use O as
+ destination language for generated code, N for the new language
+ and write the first tools in a third language X.
+
+- Language O is just a special backend for the code generator.
+ So it's the first one we implement.
+
+Steps:
+
+- O: old language, well-established, can be ported, can build native code
+- O', O'': subset languages of language O with reduced features, O' has
+ most features in common with O, O''''' has least features in common with O
+- N: new language we want to have a compiler for
+- N', N'': subset languages of language N with reduced features
+
+Step 1: Build a translator from N'' -> O'' written in O, O', O''
+
+We also use the O-toolchain for building all artifacts (compiler, assembler, linker).
+Try to build minimal subsets of N and use as little features of O for
+the generated code. As this is a throw-away piece of code, it doesn't
+matter so much how many features of O we use to implement it.
+
+Step 2: Write compiler in N, with a backend for O''
diff --git a/minie/README b/minie/README
index e69de29..eca8290 100644
--- a/minie/README
+++ b/minie/README
@@ -0,0 +1,3 @@
+gcc -g -O0 -Wall -pedantic -std=c89 -o e2c e2c.c
+./e2c < test1.e
+./e2c < test2.e
diff --git a/minie/e2c.c b/minie/e2c.c
new file mode 100644
index 0000000..197c222
--- /dev/null
+++ b/minie/e2c.c
@@ -0,0 +1,259 @@
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+
+enum {
+ MAX_IDENT_LEN = 16,
+ MAX_ERRMSG_LEN = 64
+};
+
+static int look;
+static int row;
+static int col;
+static char ident[MAX_IDENT_LEN];
+
+typedef enum {
+ S_char = 0,
+ S_ident,
+ S_module,
+ S_begin,
+ S_end,
+ S_semicolon,
+ S_eof
+} Symbol;
+
+char *symname[S_eof+1] = {
+ "char",
+ "ident",
+ "module",
+ "begin",
+ "end",
+ ";",
+ "eof"
+};
+
+static Symbol sym;
+
+static void Err( char *s )
+{
+ fprintf( stderr, "Error line %d, pos %d: %s\n", row, col, s );
+}
+
+static void Halt( )
+{
+ exit( EXIT_FAILURE );
+}
+
+static void Abort( char *s )
+{
+ Err( s );
+ Halt( );
+}
+
+static int getChar( void )
+{
+ int c = getc( stdin );
+ if( c == EOF ) {
+ return c;
+ }
+ row++;
+ if( c == '\n' ) {
+ row = 1;
+ col++;
+ }
+ return c;
+}
+
+static int isWhite( int c )
+{
+ if( c == ' ' || c == '\n' ) return 1;
+ return 0;
+}
+
+static int isAlpha( int c )
+{
+ if( ( c >= 'A' && c <= 'Z' ) || ( c >= 'a' && c <= 'z' ) ) return 1;
+ return 0;
+}
+
+static int isDigit( int c )
+{
+ if( ( c >= '0' && c <= '9' ) ) return 1;
+ return 0;
+}
+
+static void skipWhite( void )
+{
+ while( isWhite( look ) ) {
+ look = getChar( );
+ }
+}
+
+static void identifier( )
+{
+ int n = 0;
+ if( isAlpha( look ) ) {
+ ident[n] = look;
+ n++;
+ look = getChar( );
+ while( ( isAlpha( look ) || isDigit( look ) ) && n < MAX_IDENT_LEN ) {
+ ident[n] = look;
+ n++;
+ look = getChar( );
+ }
+ ident[n] = '\0';
+ if( n == MAX_IDENT_LEN ) {
+ Abort( "Identifier exceeded maximal length" );
+ }
+ sym = S_ident;
+ }
+}
+
+static Symbol getSym( )
+{
+ look = getChar( );
+ skipWhite( );
+ ident[0] = '\0';
+ switch( look ) {
+ case 'a':
+ case 'b':
+ identifier( );
+ if( strcmp( ident, "begin" ) == 0 ) {
+ return S_begin;
+ }
+ break;
+ case 'c':
+ case 'd':
+ case 'e':
+ identifier( );
+ if( strcmp( ident, "end" ) == 0 ) {
+ return S_end;
+ }
+ break;
+ case 'f':
+ case 'g':
+ case 'h':
+ case 'j':
+ case 'i':
+ case 'k':
+ case 'l':
+ identifier( );
+ break;
+ case 'm':
+ identifier( );
+ if( strcmp( ident, "module" ) == 0 ) {
+ return S_module;
+ }
+ break;
+ case 'n':
+ case 'o':
+ case 'p':
+ case 'q':
+ case 'r':
+ case 's':
+ case 't':
+ case 'u':
+ case 'v':
+ case 'w':
+ case 'x':
+ case 'y':
+ case 'z':
+ case 'A':
+ case 'B':
+ case 'C':
+ case 'D':
+ case 'E':
+ case 'F':
+ case 'G':
+ case 'H':
+ case 'I':
+ case 'J':
+ case 'K':
+ case 'L':
+ case 'M':
+ case 'N':
+ case 'O':
+ case 'P':
+ case 'Q':
+ case 'R':
+ case 'S':
+ case 'T':
+ case 'U':
+ case 'V':
+ case 'W':
+ case 'X':
+ case 'Y':
+ case 'Z':
+ identifier( );
+ break;
+ case ';':
+ return S_semicolon;
+ case EOF:
+ return S_eof;
+ default:
+ Abort( "Illegal character" );
+ }
+ return S_char;
+}
+
+static void Expect( Symbol expect )
+{
+ if( sym == expect ) {
+ sym = getSym( );
+ } else {
+ char s[MAX_ERRMSG_LEN];
+ s[0] = '\0';
+ strncat( s, "Expected symbol '", MAX_ERRMSG_LEN );
+ strncat( s, symname[expect], MAX_ERRMSG_LEN );
+ strncat( s, "'", MAX_ERRMSG_LEN );
+ s[MAX_ERRMSG_LEN-1] = '\0';
+ Abort( s );
+ }
+}
+
+static void emit( char *s )
+{
+ puts( s );
+}
+
+static void prologue( void )
+{
+ emit( "/* generated with e2c */" );
+}
+
+static void init( void )
+{
+ look = 0;
+ col = 1;
+ row = 1;
+ ident[0] = '\0';
+ sym = getSym( );
+}
+
+static void epilogue( void )
+{
+}
+
+static void block( void )
+{
+ Expect( S_begin );
+ Expect( S_end );
+}
+
+static void module( void )
+{
+ Expect( S_module );
+ identifier( );
+ Expect( S_semicolon );
+ block( );
+}
+
+int main( void )
+{
+ prologue( );
+ init( );
+ module( );
+ epilogue( );
+
+ exit( EXIT_SUCCESS );
+}
diff --git a/minie/test1.e b/minie/test1.e
new file mode 100644
index 0000000..9ee3bab
--- /dev/null
+++ b/minie/test1.e
@@ -0,0 +1,4 @@
+module test1;
+
+begin
+end
diff --git a/minie/test2.e b/minie/test2.e
new file mode 100644
index 0000000..fce2779
--- /dev/null
+++ b/minie/test2.e
@@ -0,0 +1,2 @@
+begin
+end
diff --git a/minie/test3.e b/minie/test3.e
new file mode 100644
index 0000000..1b9f2b3
--- /dev/null
+++ b/minie/test3.e
@@ -0,0 +1,4 @@
+module test3;
+
+begin
+end