summaryrefslogtreecommitdiff
path: root/minie
diff options
context:
space:
mode:
authorAndreas Baumann <mail@andreasbaumann.cc>2019-02-22 19:16:24 +0100
committerAndreas Baumann <mail@andreasbaumann.cc>2019-02-22 19:16:24 +0100
commit9f76ed072277ec01ca591e478cf7686914b9e530 (patch)
tree8906b2887b896496974446b6877ba25aa4d788f8 /minie
parent638b7de7d7fb19b756a2f2bc266222b74b4e7295 (diff)
downloadcompilertests-9f76ed072277ec01ca591e478cf7686914b9e530.tar.gz
compilertests-9f76ed072277ec01ca591e478cf7686914b9e530.tar.bz2
work on adding const declarations
introduced symbol table structure
Diffstat (limited to 'minie')
-rw-r--r--minie/TODOS35
-rw-r--r--minie/e2c.c109
-rw-r--r--minie/ec.e25
3 files changed, 138 insertions, 31 deletions
diff --git a/minie/TODOS b/minie/TODOS
index 4c1d2f0..0d53087 100644
--- a/minie/TODOS
+++ b/minie/TODOS
@@ -192,6 +192,41 @@ a+rand( )
helps us to detect it is actually a function, otoh we can get the same
information from the symbol table.
+enums:
+
+Oberon has none.
+You can always use constants or sets, but then the switch statement cannot
+be protected against wrong use of constants. C and Java went the way from
+constants to proper enums.
+=> subtyping problem, extending enums means removing states to be defined
+in a sane way. Now removing states in an enum makes hardly code relying on
+more states behave in a consistent way.
+=> subtype-explosion, enums are just a fancy way of defining integer constants
+the only practical application I have is avoid implicit type-coersion to ints
+and handle the ranges in a state machine switch.
+=> enums used in array subscripts lead to the sub-range problem of pascal/edison
+ unless I force enums to always start from 0,1,2,... as internal representation
+=> OOP has no need for enums, as I can discriminate and extend a basic type,
+e.g. KEYWORD extended to KEYWORD_MODULE, KEYWORD_IF, etc.
+=> enum constants have no const value, so they cannot be used to define an
+ array (or at least, this needs a special cast again)
+compared to functional languages the C-version of enums is quite limites,
+see tagged unions (for instance in Rust).
+
+underscores:
+started when trying to add S_module constant, so defacto a workaround for
+a missing namespace/module called 'Scanner' with constant 'module'. Do we
+forbid _ alltogether, as they are a sign of bad modularization or namespace
+emulation? On the other hand we will have longer identifiers, so _ is needed
+to separate words.
+
+AST:
+https://stackoverflow.com/questions/21150454/representing-an-abstract-syntax-tree-in-c
+
+design
+Scanner class or struct vs. OPS module containing all variables. all modules
+in the Oberon compiler act as singletons.
+
links
-----
diff --git a/minie/e2c.c b/minie/e2c.c
index def9d99..da59c30 100644
--- a/minie/e2c.c
+++ b/minie/e2c.c
@@ -4,7 +4,7 @@
#include <stdarg.h>
#include <assert.h>
-/* CONSTANTS */
+/* constants */
enum {
MAX_IDENT_LEN = 24,
@@ -17,7 +17,7 @@ enum {
MAX_TYPE_DATA_SIZE = 2048
};
-/* SCANNER */
+/* scanner */
static int look;
static int row;
@@ -37,6 +37,7 @@ typedef enum {
S_import,
S_procedure,
S_return,
+ S_const,
S_var,
S_if,
S_do,
@@ -82,6 +83,7 @@ char *symname[S_eof+1] = {
"import",
"procedure",
"return",
+ "const",
"var",
"if",
"do",
@@ -172,6 +174,12 @@ static int isDigit( int c )
return 0;
}
+static int isSpecial( int c )
+{
+ if( c == '_' ) return 1;
+ return 0;
+}
+
static int isCharacter( int c )
{
if( isAlpha( c ) ) return 1;
@@ -205,7 +213,7 @@ static void identifier( void )
ident[n] = look;
n++;
look = getChar( );
- while( ( isAlpha( look ) || isDigit( look ) ) && n < MAX_IDENT_LEN ) {
+ while( ( isAlpha( look ) || isDigit( look ) || isSpecial( look ) ) && n < MAX_IDENT_LEN ) {
ident[n] = look;
n++;
look = getChar( );
@@ -303,6 +311,11 @@ next:
}
return S_ident;
case 'c':
+ identifier( );
+ if( strcmp( ident, "const" ) == 0 ) {
+ return S_const;
+ }
+ return S_ident;
case 'd':
identifier( );
if( strcmp( ident, "do" ) == 0 ) {
@@ -496,7 +509,7 @@ next:
return S_char;
}
-/* SYMBOL TABLE */
+/* symbol table */
typedef enum {
TYPE_UNKNOWN,
@@ -570,28 +583,35 @@ struct Type {
} details;
};
-static int nof_symbols = 0;
-static Type symbols[MAX_SYMBOLS];
+typedef struct SymbolTable SymbolTable;
-static void insert_symbol( Type type, char *name )
+struct SymbolTable {
+ int nof_symbols;
+ Type symbols[MAX_SYMBOLS];
+ SymbolTable *parent;
+};
+
+static SymbolTable symbols;
+
+static void insert_symbol( SymbolTable *table, Type type, char *name )
{
- if( nof_symbols >= MAX_SYMBOLS ) {
+ if( table->nof_symbols >= MAX_SYMBOLS ) {
Abort( "Symbol table exhausted, increase MAX_SYMBOLS and recompile e2c" );
}
- symbols[nof_symbols] = type;
- strncpy( symbols[nof_symbols].name, name, MAX_IDENT_LEN );
+ table->symbols[table->nof_symbols] = type;
+ strncpy( table->symbols[table->nof_symbols].name, name, MAX_IDENT_LEN );
- nof_symbols++;
+ table->nof_symbols++;
}
-static Type get_symbol_type( char *name )
+static Type get_symbol_type( SymbolTable *table, char *name )
{
int i;
- for( i = 0; i < nof_symbols; i++ ) {
- if( strcmp( symbols[i].name, name ) == 0 ) {
- return symbols[i];
+ for( i = 0; i < table->nof_symbols; i++ ) {
+ if( strcmp( table->symbols[i].name, name ) == 0 ) {
+ return table->symbols[i];
}
}
@@ -635,7 +655,7 @@ static int length( char *name )
{
Type type;
- type = get_symbol_type( name );
+ type = get_symbol_type( &symbols, name );
if( type.type == TYPE_ARRAY ) {
return type.details.array.len;
} else {
@@ -690,7 +710,7 @@ static void register_internal_functions( void )
type.details.function.return_value = (Type *)( type.details.function.data + sizeof( Type ) );
type.details.function.return_value->type = TYPE_INTEGER;
type.details.function.internal = 1;
- insert_symbol( type, "length" );
+ insert_symbol( &symbols, type, "length" );
/* constructor for char type */
type.type = TYPE_FUNCTION;
@@ -700,7 +720,7 @@ static void register_internal_functions( void )
type.details.function.return_value = (Type *)( type.details.function.data + sizeof( Type ) );
type.details.function.return_value->type = TYPE_CHAR;
type.details.function.internal = 1;
- insert_symbol( type, "char" );
+ insert_symbol( &symbols, type, "char" );
/* TODO: register functions in module system, should be outside in the stage-1 compiler */
@@ -709,7 +729,7 @@ static void register_internal_functions( void )
type.details.function.return_value = (Type *)type.details.function.data;
type.details.function.return_value->type = TYPE_CHAR;
type.details.function.internal = 1;
- insert_symbol( type, "system.readchar" );
+ insert_symbol( &symbols, type, "system.readchar" );
type.type = TYPE_FUNCTION;
type.details.function.len = 1;
@@ -718,7 +738,7 @@ static void register_internal_functions( void )
type.details.function.return_value = (Type *)type.details.function.data;
type.details.function.return_value->type = TYPE_NONE;
type.details.function.internal = 1;
- insert_symbol( type, "system.readline" );
+ insert_symbol( &symbols, type, "system.readline" );
}
static void register_internal_constants( void )
@@ -728,11 +748,11 @@ static void register_internal_constants( void )
type.type = TYPE_CONSTANT;
type.details.constant.type = TYPE_BOOLEAN;
type.details.constant.value.boolean = 1;
- insert_symbol( type, "true" );
+ insert_symbol( &symbols, type, "true" );
type.details.constant.type = TYPE_BOOLEAN;
type.details.constant.value.boolean = 0;
- insert_symbol( type, "false" );
+ insert_symbol( &symbols, type, "false" );
}
static void init( void )
@@ -831,7 +851,7 @@ static void factor( void )
sym = getSym( );
} else if( sym == S_ident ) {
qualident( );
- type = get_symbol_type( varName );
+ type = get_symbol_type( &symbols, varName );
if( type.type == TYPE_FUNCTION ) {
parameterList( );
} else if( type.type == TYPE_CONSTANT ) {
@@ -935,7 +955,7 @@ static void selector( void )
selected_array = 0;
if( sym == S_lbracket ) {
Expect( S_lbracket );
- type = get_symbol_type( varName );
+ type = get_symbol_type( &symbols, varName );
if( type.type != TYPE_ARRAY ) {
Abort( "Selecting element of non-array" );
}
@@ -956,7 +976,7 @@ static void assignment( void )
/* left hand side */
/* precondition: qualident has been already parsed outside */
selector( );
- type = get_symbol_type( varName );
+ type = get_symbol_type( &symbols, varName );
/* x := ( a+ b )- 3; -> x = (a+b)-3;
* s1 := s2; -> strncpy( s2, s1, length( s2 ) );
@@ -1161,7 +1181,7 @@ static void statement( void )
parameterList( );
emitLn( ";" );
} else {
- Type type = get_symbol_type( varName );
+ Type type = get_symbol_type( &symbols, varName );
if( type.type == TYPE_FUNCTION ) {
/* procedure call without parameter */
/* TODO: check number of parameter and return value to be 0 */
@@ -1253,6 +1273,36 @@ static void type( void )
arrayType( );
}
}
+static void constDeclaration( void )
+{
+ /* TODO: constName? */
+ variableName( );
+ sym = getSym( );
+ Expect( S_equals );
+ /* TODO: ConstEpression requires an interpreter */
+ if( sym == S_number ) {
+ number( );
+ sym = getSym( );
+ emitLn( "const int %s = %d;", varName, num );
+ } else {
+ Abort( "Supporting numeric constants only" );
+ }
+}
+
+static void constBlock( void )
+{
+ Expect( S_const );
+ constDeclaration( );
+ if( sym == S_begin || sym == S_var ) return;
+ while( sym == S_semicolon ) {
+ sym = getSym( );
+ if( sym == S_ident ) {
+ constDeclaration( );
+ } else if( sym == S_begin || sym == S_var ) {
+ return;
+ }
+ }
+}
static void variableDeclaration( void )
{
@@ -1260,7 +1310,7 @@ static void variableDeclaration( void )
sym = getSym( );
Expect( S_colon );
type( );
- insert_symbol( lastType, varName );
+ insert_symbol( &symbols, lastType, varName );
if( lastType.type == TYPE_ARRAY ) {
/* TODO: this works for now, though it's not correct */
emitLn( "static %s %s[%d];",
@@ -1355,7 +1405,7 @@ static void procedureDeclaration( void )
}
Expect( S_semicolon );
- insert_symbol( funcType, procName );
+ insert_symbol( &symbols, funcType, procName );
emit( "%s %s( ", return_type, procName );
if( funcType.details.function.len == 0 ) {
@@ -1387,6 +1437,9 @@ static void procedureBlock( void )
static void declarationBlock( void )
{
+ if( sym == S_const ) {
+ constBlock( );
+ }
if( sym == S_var ) {
variableBlock( );
}
diff --git a/minie/ec.e b/minie/ec.e
index b8a4409..1dd1221 100644
--- a/minie/ec.e
+++ b/minie/ec.e
@@ -2,16 +2,22 @@ module ec;
import system;
+const
+ S_module = 1;
+
var
col : integer;
row : integer;
look : char;
+ sym : integer;
procedure Halt;
begin
system.halt( 1 )
end
+(* scanner *)
+
procedure getChar : char;
var
c : char;
@@ -32,9 +38,22 @@ end
procedure isWhite( c : char ) : boolean;
begin
if ( c = char( 0 ) ) or ( c = char( 10 ) ) or ( c = char( 13 ) ) or ( c = char( 9 ) ) do
- return true;
+ return true
+ else
+ return false
+ end
+end
+
+(* parser *)
+
+procedure Expect( expect : integer );
+(* TODO: Error line 51, pos 22: Unknown symbol 'expect': add symbol to local scope
+ and remove it at end of scope/procedure *)
+begin
+ if ( sym = expect ) do
+ sym = getSym( );
else
- return false;
+ Abort( "Expected symbol", symname[expect] );
end
end
@@ -53,7 +72,7 @@ end
procedure doModule;
begin
-(* Expect( S_module ); *)
+ Expect( S_module );
look := getChar( );
while ( look <> char( 0 ) ) do
if not isWhite( look ) do