summaryrefslogtreecommitdiff
path: root/miniany
diff options
context:
space:
mode:
Diffstat (limited to 'miniany')
-rw-r--r--miniany/README6
-rw-r--r--miniany/REQUIREMENTS12
-rw-r--r--miniany/cc.c280
-rw-r--r--miniany/libc-freestanding.c51
-rw-r--r--miniany/libc-hosted.c2
5 files changed, 329 insertions, 22 deletions
diff --git a/miniany/README b/miniany/README
index fcd9e06..851db39 100644
--- a/miniany/README
+++ b/miniany/README
@@ -34,11 +34,13 @@ tcc -O0 -g -o c4 c4.c
inspiration for what makes up a minimal C language
* tiny.c
- * http://www.iro.umontreal.ca/~felipe/IFT2030-Automne2002/Complements/tinyc.c
+ * http://www.iro.umontreal.ca/~felipe/IFT2030-Automne2002/Complements/tinyc.c,
+ Marc Feeley, really easy and much more readable, meant as educational compiler
* c.c in swieros: https://github.com/rswier/swieros.git
* documentation
* "Compiler Construction", Niklaus Wirth
* https://github.com/DoctorWkt/acwj
-
+ * https://www.engr.mun.ca/~theo/Misc/exp_parsing.htm#climbing,
+ https://en.wikipedia.org/wiki/Operator-precedence_parser#Precedence_climbing_method
diff --git a/miniany/REQUIREMENTS b/miniany/REQUIREMENTS
index 474e975..cd6ffcf 100644
--- a/miniany/REQUIREMENTS
+++ b/miniany/REQUIREMENTS
@@ -29,3 +29,15 @@ not implementing:
- linker
- have compilation units needs a linker do build
an executable
+- symname[t] printing the symbol and not the number,
+ requires static initializers for array of char*
+- ASTs are basically only useful when you start to optimize,
+ till then you can use an intermediate format (as C4) does
+ and a stack machine. They also make the code easier readable.
+ For use they fore the introduction of pointers, references and structs.
+ In expression parsing we see, that const folding already needs
+ an AST, because we should not emit code when still reading
+ a constant expression. It also seperates syntactical stuff like '['
+ from logical stuff like 'declaration of array size' and 'derefencing
+ a pointer'.
+
diff --git a/miniany/cc.c b/miniany/cc.c
index 1637627..ebdebdb 100644
--- a/miniany/cc.c
+++ b/miniany/cc.c
@@ -1,45 +1,297 @@
int col;
int row;
+int pushback;
+
+int token;
+
+int DEBUG_SCANNER;
+
+enum {
+ MAX_IDENT_LEN = 20,
+ TEST = -1
+};
+
+void pushBack( int c )
+{
+ pushback = c;
+}
+
int getChar( )
{
int c;
+ if( pushback ) {
+ c = pushback;
+ pushback = 0;
+ return c;
+ }
+
c = getchar( );
if( c == EOF ) {
return c;
}
col++;
if( c == '\n' ) {
- col = 1;
+ col = 0;
row++;
}
return c;
}
-int main( int argc, char **argv )
+int skipWhite( )
{
int c;
- col = 1;
- row = 1;
+ c = getChar( );
+ while( isspace( c ) ) {
+ c = getChar( );
+ }
+
+ return c;
+}
- putstring( "Hello CC" );
- putnl( );
+enum {
+ S_PLUS = 1,
+ S_MINUS,
+ S_STAR,
+ S_SLASH,
+ S_SEMICOLON,
+ S_EQUALS,
+ S_INT = 10,
+ S_IDENT,
+ S_NUM = 20,
+ S_ERR = 30,
+ S_EOI = 31
+};
- c = getChar( );
+void printErrorHeader( )
+{
+ putstring( "Error line " );
putint( row );
+ putstring( ", pos " );
+ putint( col );
putstring( ": " );
- while( c != EOF ) {
- if( c == '\n' ) {
- putchar( '$' );
- putchar( c );
- putint( row );
- putstring( ": " );
+}
+
+int num;
+
+void scanNumber( int c )
+{
+ num = c - '0';
+ c = getChar( );
+ while( isdigit( c ) ) {
+ num = 10 * num + ( c - '0' );
+ c = getChar( );
+ }
+ pushBack( c );
+}
+
+/* c4: no data segment allocation in char array decleration */
+char *ident;
+/*char ident[20];
+ char ident[MAX_IDENT_LEN];
+*/
+
+void scanIdent( int c )
+{
+ int n;
+
+ n = 0;
+ while( isalnum( c ) || ( c == '_' ) ) {
+ ident[n] = c;
+ n++;
+ if( n >= MAX_IDENT_LEN - 1 ) {
+ printErrorHeader( );
+ putstring( "too long identifier" );
+ putnl( );
+ exit( EXIT_FAILURE );
+ }
+ c = getChar( );
+ }
+ ident[n] = 0; /* c4 doesn't handle '\0' */
+ pushBack( c );
+}
+
+int keyword( char *ident )
+{
+ if( *ident == 'i' ) {
+ if( strcmp( ident, "int" ) == 0 ) {
+ return S_INT;
} else {
- putchar( c );
+ return 0;
}
+ }
+
+ return 0;
+}
+
+int getToken( )
+{
+ int t;
+ int c;
+
+ c = skipWhite( );
+
+ if( c == EOF ) {
+ t = S_EOI;
+ } else if( c == '+' ) {
+ t = S_PLUS;
+ } else if( c == '-' ) {
+ t = S_MINUS;
+ } else if( c == '*' ) {
+ t = S_STAR;
+ } else if ( c == '/' ) {
c = getChar( );
+ if( c == '/' ) {
+ while( c != '\n' ) {
+ c = getChar( );
+ }
+ t = getToken( );
+ } else if( c == '*' ) {
+ do {
+ while( c != '*' ) {
+ c = getChar( );
+ }
+ c = getChar( );
+ } while( c != '/' );
+ c = getChar( );
+ t = getToken( );
+ } else {
+ pushBack( c );
+ t = S_SLASH;
+ }
+ } else if( c == ';' ) {
+ t = S_SEMICOLON;
+ } else if( c == '=' ) {
+ t = S_EQUALS;
+ } else if( isdigit( c ) ) {
+ scanNumber( c );
+ t = S_NUM;
+ } else if( c >= 'a' && c <= 'z' ) {
+ scanIdent( c );
+ if( ( t = keyword( ident ) ) ) {
+ } else {
+ t = S_IDENT;
+ }
+ } else {
+ t = S_ERR;
+ printErrorHeader( );
+ putstring( "unknown token '" );
+ putchar( c );
+ putstring( "'" );
+ putnl( );
+ exit( EXIT_FAILURE );
+ }
+
+ if( DEBUG_SCANNER ) {
+ putint( row );
+ putchar( '/' );
+ putint( col );
+ putstring( ": " );
+ putint( t );
+ if( t == S_NUM ) {
+ putchar( '(' );
+ putint( num );
+ putchar( ')' );
+ }
+ putnl( );
+ }
+
+ return t;
+}
+
+void expect( int must, char *what )
+{
+ if( token == must ) {
+ token = getToken( );
+ } else {
+ printErrorHeader( );
+ putstring( what );
+ putstring( " expected" );
+ putnl( );
+ exit( EXIT_FAILURE );
+ }
+}
+
+void parseExpression( )
+{
+ if( token == S_EOI ) {
+ printErrorHeader( );
+ putstring( "unexpected eof in expression" );
+ putnl( );
+ exit( EXIT_FAILURE );
+ }
+
+ if( token == S_NUM ) {
+ putstring( "immediate int " );
+ putint( num );
+ putnl( );
+ }
+
+ token = getToken( );
+ if( token == S_PLUS ) {
+ token = getToken( );
+ parseExpression( );
+ } else if( token == S_MINUS ) {
+ token = getToken( );
+ parseExpression( );
+ } else if( token == S_STAR ) {
+ token = getToken( );
+ parseExpression( );
+ } else if( token == S_SLASH ) {
+ token = getToken( );
+ parseExpression( );
+ } else if( token == S_EOI || token == S_SEMICOLON ) {
+ return;
+ } else {
+ printErrorHeader( );
+ putstring( "unexpected token '" );
+ putint( token );
+ putstring( "' in expression" );
+ putnl( );
+ exit( EXIT_FAILURE );
+ }
+}
+
+void parseDeclaration( )
+{
+ expect( S_INT, "int" );
+ expect( S_IDENT, "identifier" );
+ putstring( "Adding glob: " ); putstring( ident ); putnl( );
+ expect( S_SEMICOLON, ";" );
+}
+
+void parseAssignment( )
+{
+ token = getToken( );
+ expect( S_EQUALS, "=" );
+ parseExpression( );
+ expect( S_SEMICOLON, ";" );
+}
+
+void parseStatement( )
+{
+ if( token == S_INT ) {
+ parseDeclaration( );
+ } else if( token == S_IDENT ) {
+ parseAssignment( );
+ } else if( token == S_EOI ) {
+ return;
+ }
+}
+
+int main( int argc, char **argv )
+{
+ col = 0;
+ row = 1;
+ pushback = 0;
+ DEBUG_SCANNER = 1;
+ ident = "12345678901234567890";
+
+ token = getToken( );
+ while( token != S_EOI ) {
+ parseStatement( );
}
exit( EXIT_SUCCESS );
diff --git a/miniany/libc-freestanding.c b/miniany/libc-freestanding.c
index 8b1a378..ffed093 100644
--- a/miniany/libc-freestanding.c
+++ b/miniany/libc-freestanding.c
@@ -20,6 +20,40 @@ int strlen( char *s )
return p - s;
}
+int strcmp( char *s1, char *s2 )
+{
+ while( ( *s1 != '\0' ) && ( *s2 != '\0' ) && *s1 == *s2 ) {
+ s1++;
+ s2++;
+ }
+
+ return *s1 - *s2;
+}
+
+int isspace( int c )
+{
+ if( c == ' ' || c == '\r' || c == '\n' || c == '\t' ) return 1;
+ return 0;
+}
+
+int isdigit( int c )
+{
+ if( c >= '0' && c <= '9' ) return 1;
+ return 0;
+}
+
+int isalpha( int c )
+{
+ if( ( c >= 'a' && c <= 'z' ) || ( c >= 'A' && c <= 'Z' ) ) return 1;
+ return 0;
+}
+
+int isalnum( int c )
+{
+ if( isalpha( c ) || isdigit( c ) ) return 1;
+ return 0;
+}
+
enum {
EXIT_SUCCESS = 0,
EXIT_FAILURE = 1
@@ -31,9 +65,18 @@ enum {
SYSCALL_WRITE = 4
};
+/* EOF = -1 is a const expression for c4 */
+/* EOF = 0xFFFFFFFF clang doesn't like this */
+/* EOF = 0xFFFFFFFF gcc overflow in conversion from 'enum <anonymous>' to 'int' changes value from '4294967295' to '-1' */
+/* EOF = 256, this is not standard, but doesn't really matter in freestanding mode,
+ * code should really not relly on the fact what the value of EOF is (-1 usually) */
+enum {
+ EOF = 256
+};
+
int errno;
-static __attribute__((noinline)) int syscall1( int id, int arg0 )
+int syscall1( int id, int arg0 )
{
int retval;
@@ -54,7 +97,7 @@ static __attribute__((noinline)) int syscall1( int id, int arg0 )
return retval;
}
-static __attribute__((noinline)) int syscall3( int id, int arg0, int arg1, int arg2 )
+int syscall3( int id, int arg0, int arg1, int arg2 )
{
int retval;
@@ -109,10 +152,6 @@ static int read_string( int fd, char *buf, int size )
return syscall3( SYSCALL_READ, fd, (int)buf, size );
}
-enum {
- EOF = -1
-};
-
int puts( char *s )
{
print_string( s );
diff --git a/miniany/libc-hosted.c b/miniany/libc-hosted.c
index 4c663bf..4154aa1 100644
--- a/miniany/libc-hosted.c
+++ b/miniany/libc-hosted.c
@@ -5,6 +5,8 @@
#include <stdio.h>
#include <stdlib.h>
+#include <ctype.h>
+#include <string.h>
int putstring( char *s )
{