From 6f060c85719e9a9ebe2429e06e83aa79a01773c7 Mon Sep 17 00:00:00 2001 From: Andreas Baumann Date: Sat, 9 Feb 2019 10:41:05 +0100 Subject: some fixing around LHS/RHS assignment handling for strings --- minie/TODOS | 164 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ minie/e2c.c | 1 + minie/test9.e | 7 ++- 3 files changed, 168 insertions(+), 4 deletions(-) create mode 100644 minie/TODOS (limited to 'minie') diff --git a/minie/TODOS b/minie/TODOS new file mode 100644 index 0000000..95c8423 --- /dev/null +++ b/minie/TODOS @@ -0,0 +1,164 @@ +expression can also be the result of a function (system.readline) +assignment dependens on the type of the left-hand-side string (s := system.readline) +or w introduce VAR parameters + +s := system.writeline( ); +system.readline( var s : array of char ); + +the problem here is: we cannot return a structure of arbitrary size +via the stack. so the var version is the one fitting more to one +written in a real system module in language e. The embedded function +version of the pseudo system module looks more like a special assignment +for string arrays. + +types: + +boolean: and or not operators + +integer : + - * / mod, max(int), min(int) + +char: 'A' or char(13) + +byte: 8-bit unsigned value +word: 32-bit addressable value + +enum colors (red, blue, green, yellow) we allow int(red)=0 +what about assignments of explicit values as in C? + +<= <> = (assignment :=) + +const types? + +no floats for now + +structured types: record, array, set + +array a[1:25] + +strings are arrays of variable len? +do we need ranges for arrays? + +a : array[20] of integer; + +is clearer than in Edison: array a[0..20] (int) + +array 10, 10 of integer, that's Oberon syntatic sugar + +another way of representing the length in the last bytes of the array +and also to zero terminate the string (bron dijkstra string): +https://github.com/norayr/Bron-Dijkstra-Strings/blob/master/bdStrings.Mod +Dijkstra\ -\ Efficient\ String.pdf + +--- + +edison-es drops modules. I actually find system.writeln, system.readln quite +appealing. + +writeinteger +writeboolean +writechar +writeln/writeline/writestring, but there is no basic type for a sequence +of chars, is this a array[20] of char? + +explicit skip + +strings as types: "Abc" is a string constant can be represented as +array[3] of char, but then, how can this be assigned to a array[4] of char? +So types can be assigned if they are compatible, so we can say assigning +an array[3] of char (also 'Abc') to array[4] of char is possible, but +not the other way round as it would violate the boundaries! +array of char is only possible with dynamic memory management, which is +a thing we might not want at all? +0-terminated vs. length. but not 255 Pascal-like, have a RLE schema for +first N bytes. +char can also be unicode, conversion to integer is possible, but not to +byte. Use array[128] of byte for buffers. +certain functions might have to work on arrays of arbitrary size, like +a 'StrCopy' function with 'array of char' with an unknown size. They need +a relaxed type check and delegate checking of boundaries if needed into +the runtime. + +built-in functions like LEN or system.length. length, sizeof sounds +more like a compiler thing, system.length more like a library thing. +Actually. We don't want to say len is platform dependency, so using +length in a piece of code might be very portable. + +So we have an internal set of functions related to compiler things: +- domains of data types +- conversion of data types +- len, size, addr of variables/arrays + +The system module on the other hand contains things which relate +somehow to the environment, e.g. backend, operating system and which +might have to be ported heavily. They are still called inside the +compiler most likely when generating code. + +expressions + +var + b : boolean; + +b := s[i] <> char( 0 ); +if b do + x +end + +is the same as: + +if s[i] <> char( 0 ) do + x +end + +The '<>' operator must return a boolean type. So we just call expressions +inside if as for in the assignment (later also in the 'while' condition). + +return expression: only at the end, after statementBlock, or as +"begin" statementlist [ "return expression ] "end" or +as a semantic thing allowing "return" everywhere but knowing whether +the context is a procedural or a function context, or as in C, allow +it everywhere because everything is a function. + +system.readline( s ) fits more to fgets, but s := system.readline; is +more what I want. + +memory management +----------------- + +options: +- static allocation +- stack-based +- explicit: C malloc/free +- region-based +- thread-local heap +- implizit: + - garbage collection + - ARC: reference counting and weak pointers + +decouple from polymorphism, seems to be a big design problem in +most programming messages. + +dangers in real-time programming: +- priority inversion on locks +- fragmentation of memory, program fails because there is not enough + un-fragmented space, a copying garbage collector might help or + compacting and rewriting pointers, but this is again a real-time + issue if not done incrementally + +how to decoouple read-only and read-write parts of the the statically +allocated memory? + +Stack only allocation if possible. This also means, temorary structures +can not be trees with pointers. This means a transpiler must emit code +(in our case C source code) while parsing, which might me challenging. + +Even better is static sized local buffers and global statically allocated +structures (e.g. a symbol table with at most 50 types). This limits have +to be adapted and the compiler has to be recompiled. But the benefits are +that you are not using any dynamic memory allocation which can go wrong +in some ways. + +links +----- + +https://hackernoon.com/considerations-for-programming-language-design-a-rebuttal-5fb7ef2fd4ba +https://en.wikibooks.org/wiki/Oberon/A2/Oberon.Strings.Mod diff --git a/minie/e2c.c b/minie/e2c.c index e2d35f2..baaa1b8 100644 --- a/minie/e2c.c +++ b/minie/e2c.c @@ -929,6 +929,7 @@ static void assignment( void ) /* left hand side */ /* precondition: qualident has been already parsed outside */ selector( ); + type = get_symbol_type( varName ); /* x := ( a+ b )- 3; -> x = (a+b)-3; * s1 := s2; -> strncpy( s2, s1, length( s2 ) ); diff --git a/minie/test9.e b/minie/test9.e index e7ce804..c625bce 100644 --- a/minie/test9.e +++ b/minie/test9.e @@ -6,7 +6,7 @@ var len : integer; i : integer; s : array[64] of char; - d : array[64] of char; + d : array[24] of char; c : char; begin @@ -19,7 +19,6 @@ begin i := i + 1; c := system.readchar( ) end; - (* prints garbage *) - system.writeline( s ); - (* doesn't work currently: d := s *) + d := s; + system.writeline( d ); end -- cgit v1.2.3-54-g00ecf