summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndreas Baumann <mail@andreasbaumann.cc>2021-08-13 11:36:31 +0200
committerAndreas Baumann <mail@andreasbaumann.cc>2021-08-13 11:36:31 +0200
commit9ba1bb3d2007b37e3392208ef027f88d78e51c3e (patch)
tree26e24a12fa75f0346d3ab4f7e55d310bb81ab339
parenta4e168a1b45b693bf68e30f37ea233b8f151329c (diff)
downloadcompilertests-9ba1bb3d2007b37e3392208ef027f88d78e51c3e.tar.gz
compilertests-9ba1bb3d2007b37e3392208ef027f88d78e51c3e.tar.bz2
cc: some work on the scanner, expression parser
-rw-r--r--miniany/README6
-rw-r--r--miniany/REQUIREMENTS12
-rw-r--r--miniany/cc.c280
-rw-r--r--miniany/libc-freestanding.c51
-rw-r--r--miniany/libc-hosted.c2
-rw-r--r--old/c.ebnf229
-rw-r--r--old/minic.ebnf9
7 files changed, 567 insertions, 22 deletions
diff --git a/miniany/README b/miniany/README
index fcd9e06..851db39 100644
--- a/miniany/README
+++ b/miniany/README
@@ -34,11 +34,13 @@ tcc -O0 -g -o c4 c4.c
inspiration for what makes up a minimal C language
* tiny.c
- * http://www.iro.umontreal.ca/~felipe/IFT2030-Automne2002/Complements/tinyc.c
+ * http://www.iro.umontreal.ca/~felipe/IFT2030-Automne2002/Complements/tinyc.c,
+ Marc Feeley, really easy and much more readable, meant as educational compiler
* c.c in swieros: https://github.com/rswier/swieros.git
* documentation
* "Compiler Construction", Niklaus Wirth
* https://github.com/DoctorWkt/acwj
-
+ * https://www.engr.mun.ca/~theo/Misc/exp_parsing.htm#climbing,
+ https://en.wikipedia.org/wiki/Operator-precedence_parser#Precedence_climbing_method
diff --git a/miniany/REQUIREMENTS b/miniany/REQUIREMENTS
index 474e975..cd6ffcf 100644
--- a/miniany/REQUIREMENTS
+++ b/miniany/REQUIREMENTS
@@ -29,3 +29,15 @@ not implementing:
- linker
- have compilation units needs a linker do build
an executable
+- symname[t] printing the symbol and not the number,
+ requires static initializers for array of char*
+- ASTs are basically only useful when you start to optimize,
+ till then you can use an intermediate format (as C4) does
+ and a stack machine. They also make the code easier readable.
+ For use they fore the introduction of pointers, references and structs.
+ In expression parsing we see, that const folding already needs
+ an AST, because we should not emit code when still reading
+ a constant expression. It also seperates syntactical stuff like '['
+ from logical stuff like 'declaration of array size' and 'derefencing
+ a pointer'.
+
diff --git a/miniany/cc.c b/miniany/cc.c
index 1637627..ebdebdb 100644
--- a/miniany/cc.c
+++ b/miniany/cc.c
@@ -1,45 +1,297 @@
int col;
int row;
+int pushback;
+
+int token;
+
+int DEBUG_SCANNER;
+
+enum {
+ MAX_IDENT_LEN = 20,
+ TEST = -1
+};
+
+void pushBack( int c )
+{
+ pushback = c;
+}
+
int getChar( )
{
int c;
+ if( pushback ) {
+ c = pushback;
+ pushback = 0;
+ return c;
+ }
+
c = getchar( );
if( c == EOF ) {
return c;
}
col++;
if( c == '\n' ) {
- col = 1;
+ col = 0;
row++;
}
return c;
}
-int main( int argc, char **argv )
+int skipWhite( )
{
int c;
- col = 1;
- row = 1;
+ c = getChar( );
+ while( isspace( c ) ) {
+ c = getChar( );
+ }
+
+ return c;
+}
- putstring( "Hello CC" );
- putnl( );
+enum {
+ S_PLUS = 1,
+ S_MINUS,
+ S_STAR,
+ S_SLASH,
+ S_SEMICOLON,
+ S_EQUALS,
+ S_INT = 10,
+ S_IDENT,
+ S_NUM = 20,
+ S_ERR = 30,
+ S_EOI = 31
+};
- c = getChar( );
+void printErrorHeader( )
+{
+ putstring( "Error line " );
putint( row );
+ putstring( ", pos " );
+ putint( col );
putstring( ": " );
- while( c != EOF ) {
- if( c == '\n' ) {
- putchar( '$' );
- putchar( c );
- putint( row );
- putstring( ": " );
+}
+
+int num;
+
+void scanNumber( int c )
+{
+ num = c - '0';
+ c = getChar( );
+ while( isdigit( c ) ) {
+ num = 10 * num + ( c - '0' );
+ c = getChar( );
+ }
+ pushBack( c );
+}
+
+/* c4: no data segment allocation in char array decleration */
+char *ident;
+/*char ident[20];
+ char ident[MAX_IDENT_LEN];
+*/
+
+void scanIdent( int c )
+{
+ int n;
+
+ n = 0;
+ while( isalnum( c ) || ( c == '_' ) ) {
+ ident[n] = c;
+ n++;
+ if( n >= MAX_IDENT_LEN - 1 ) {
+ printErrorHeader( );
+ putstring( "too long identifier" );
+ putnl( );
+ exit( EXIT_FAILURE );
+ }
+ c = getChar( );
+ }
+ ident[n] = 0; /* c4 doesn't handle '\0' */
+ pushBack( c );
+}
+
+int keyword( char *ident )
+{
+ if( *ident == 'i' ) {
+ if( strcmp( ident, "int" ) == 0 ) {
+ return S_INT;
} else {
- putchar( c );
+ return 0;
}
+ }
+
+ return 0;
+}
+
+int getToken( )
+{
+ int t;
+ int c;
+
+ c = skipWhite( );
+
+ if( c == EOF ) {
+ t = S_EOI;
+ } else if( c == '+' ) {
+ t = S_PLUS;
+ } else if( c == '-' ) {
+ t = S_MINUS;
+ } else if( c == '*' ) {
+ t = S_STAR;
+ } else if ( c == '/' ) {
c = getChar( );
+ if( c == '/' ) {
+ while( c != '\n' ) {
+ c = getChar( );
+ }
+ t = getToken( );
+ } else if( c == '*' ) {
+ do {
+ while( c != '*' ) {
+ c = getChar( );
+ }
+ c = getChar( );
+ } while( c != '/' );
+ c = getChar( );
+ t = getToken( );
+ } else {
+ pushBack( c );
+ t = S_SLASH;
+ }
+ } else if( c == ';' ) {
+ t = S_SEMICOLON;
+ } else if( c == '=' ) {
+ t = S_EQUALS;
+ } else if( isdigit( c ) ) {
+ scanNumber( c );
+ t = S_NUM;
+ } else if( c >= 'a' && c <= 'z' ) {
+ scanIdent( c );
+ if( ( t = keyword( ident ) ) ) {
+ } else {
+ t = S_IDENT;
+ }
+ } else {
+ t = S_ERR;
+ printErrorHeader( );
+ putstring( "unknown token '" );
+ putchar( c );
+ putstring( "'" );
+ putnl( );
+ exit( EXIT_FAILURE );
+ }
+
+ if( DEBUG_SCANNER ) {
+ putint( row );
+ putchar( '/' );
+ putint( col );
+ putstring( ": " );
+ putint( t );
+ if( t == S_NUM ) {
+ putchar( '(' );
+ putint( num );
+ putchar( ')' );
+ }
+ putnl( );
+ }
+
+ return t;
+}
+
+void expect( int must, char *what )
+{
+ if( token == must ) {
+ token = getToken( );
+ } else {
+ printErrorHeader( );
+ putstring( what );
+ putstring( " expected" );
+ putnl( );
+ exit( EXIT_FAILURE );
+ }
+}
+
+void parseExpression( )
+{
+ if( token == S_EOI ) {
+ printErrorHeader( );
+ putstring( "unexpected eof in expression" );
+ putnl( );
+ exit( EXIT_FAILURE );
+ }
+
+ if( token == S_NUM ) {
+ putstring( "immediate int " );
+ putint( num );
+ putnl( );
+ }
+
+ token = getToken( );
+ if( token == S_PLUS ) {
+ token = getToken( );
+ parseExpression( );
+ } else if( token == S_MINUS ) {
+ token = getToken( );
+ parseExpression( );
+ } else if( token == S_STAR ) {
+ token = getToken( );
+ parseExpression( );
+ } else if( token == S_SLASH ) {
+ token = getToken( );
+ parseExpression( );
+ } else if( token == S_EOI || token == S_SEMICOLON ) {
+ return;
+ } else {
+ printErrorHeader( );
+ putstring( "unexpected token '" );
+ putint( token );
+ putstring( "' in expression" );
+ putnl( );
+ exit( EXIT_FAILURE );
+ }
+}
+
+void parseDeclaration( )
+{
+ expect( S_INT, "int" );
+ expect( S_IDENT, "identifier" );
+ putstring( "Adding glob: " ); putstring( ident ); putnl( );
+ expect( S_SEMICOLON, ";" );
+}
+
+void parseAssignment( )
+{
+ token = getToken( );
+ expect( S_EQUALS, "=" );
+ parseExpression( );
+ expect( S_SEMICOLON, ";" );
+}
+
+void parseStatement( )
+{
+ if( token == S_INT ) {
+ parseDeclaration( );
+ } else if( token == S_IDENT ) {
+ parseAssignment( );
+ } else if( token == S_EOI ) {
+ return;
+ }
+}
+
+int main( int argc, char **argv )
+{
+ col = 0;
+ row = 1;
+ pushback = 0;
+ DEBUG_SCANNER = 1;
+ ident = "12345678901234567890";
+
+ token = getToken( );
+ while( token != S_EOI ) {
+ parseStatement( );
}
exit( EXIT_SUCCESS );
diff --git a/miniany/libc-freestanding.c b/miniany/libc-freestanding.c
index 8b1a378..ffed093 100644
--- a/miniany/libc-freestanding.c
+++ b/miniany/libc-freestanding.c
@@ -20,6 +20,40 @@ int strlen( char *s )
return p - s;
}
+int strcmp( char *s1, char *s2 )
+{
+ while( ( *s1 != '\0' ) && ( *s2 != '\0' ) && *s1 == *s2 ) {
+ s1++;
+ s2++;
+ }
+
+ return *s1 - *s2;
+}
+
+int isspace( int c )
+{
+ if( c == ' ' || c == '\r' || c == '\n' || c == '\t' ) return 1;
+ return 0;
+}
+
+int isdigit( int c )
+{
+ if( c >= '0' && c <= '9' ) return 1;
+ return 0;
+}
+
+int isalpha( int c )
+{
+ if( ( c >= 'a' && c <= 'z' ) || ( c >= 'A' && c <= 'Z' ) ) return 1;
+ return 0;
+}
+
+int isalnum( int c )
+{
+ if( isalpha( c ) || isdigit( c ) ) return 1;
+ return 0;
+}
+
enum {
EXIT_SUCCESS = 0,
EXIT_FAILURE = 1
@@ -31,9 +65,18 @@ enum {
SYSCALL_WRITE = 4
};
+/* EOF = -1 is a const expression for c4 */
+/* EOF = 0xFFFFFFFF clang doesn't like this */
+/* EOF = 0xFFFFFFFF gcc overflow in conversion from 'enum <anonymous>' to 'int' changes value from '4294967295' to '-1' */
+/* EOF = 256, this is not standard, but doesn't really matter in freestanding mode,
+ * code should really not relly on the fact what the value of EOF is (-1 usually) */
+enum {
+ EOF = 256
+};
+
int errno;
-static __attribute__((noinline)) int syscall1( int id, int arg0 )
+int syscall1( int id, int arg0 )
{
int retval;
@@ -54,7 +97,7 @@ static __attribute__((noinline)) int syscall1( int id, int arg0 )
return retval;
}
-static __attribute__((noinline)) int syscall3( int id, int arg0, int arg1, int arg2 )
+int syscall3( int id, int arg0, int arg1, int arg2 )
{
int retval;
@@ -109,10 +152,6 @@ static int read_string( int fd, char *buf, int size )
return syscall3( SYSCALL_READ, fd, (int)buf, size );
}
-enum {
- EOF = -1
-};
-
int puts( char *s )
{
print_string( s );
diff --git a/miniany/libc-hosted.c b/miniany/libc-hosted.c
index 4c663bf..4154aa1 100644
--- a/miniany/libc-hosted.c
+++ b/miniany/libc-hosted.c
@@ -5,6 +5,8 @@
#include <stdio.h>
#include <stdlib.h>
+#include <ctype.h>
+#include <string.h>
int putstring( char *s )
{
diff --git a/old/c.ebnf b/old/c.ebnf
new file mode 100644
index 0000000..9a3c0ae
--- /dev/null
+++ b/old/c.ebnf
@@ -0,0 +1,229 @@
+<translation-unit> ::= {<external-declaration>}*
+
+<external-declaration> ::= <function-definition>
+ | <declaration>
+
+<function-definition> ::= {<declaration-specifier>}* <declarator> {<declaration>}* <compound-statement>
+
+<declaration-specifier> ::= <storage-class-specifier>
+ | <type-specifier>
+ | <type-qualifier>
+
+<storage-class-specifier> ::= auto
+ | register
+ | static
+ | extern
+ | typedef
+
+<type-specifier> ::= void
+ | char
+ | short
+ | int
+ | long
+ | float
+ | double
+ | signed
+ | unsigned
+ | <struct-or-union-specifier>
+ | <enum-specifier>
+ | <typedef-name>
+
+<struct-or-union-specifier> ::= <struct-or-union> <identifier> { {<struct-declaration>}+ }
+ | <struct-or-union> { {<struct-declaration>}+ }
+ | <struct-or-union> <identifier>
+
+<struct-or-union> ::= struct
+ | union
+
+<struct-declaration> ::= {<specifier-qualifier>}* <struct-declarator-list>
+
+<specifier-qualifier> ::= <type-specifier>
+ | <type-qualifier>
+
+<struct-declarator-list> ::= <struct-declarator>
+ | <struct-declarator-list> , <struct-declarator>
+
+<struct-declarator> ::= <declarator>
+ | <declarator> : <constant-expression>
+ | : <constant-expression>
+
+<declarator> ::= {<pointer>}? <direct-declarator>
+
+<pointer> ::= * {<type-qualifier>}* {<pointer>}?
+
+<type-qualifier> ::= const
+ | volatile
+
+<direct-declarator> ::= <identifier>
+ | ( <declarator> )
+ | <direct-declarator> [ {<constant-expression>}? ]
+ | <direct-declarator> ( <parameter-type-list> )
+ | <direct-declarator> ( {<identifier>}* )
+
+<constant-expression> ::= <conditional-expression>
+
+<conditional-expression> ::= <logical-or-expression>
+ | <logical-or-expression> ? <expression> : <conditional-expression>
+
+<logical-or-expression> ::= <logical-and-expression>
+ | <logical-or-expression> || <logical-and-expression>
+
+<logical-and-expression> ::= <inclusive-or-expression>
+ | <logical-and-expression> && <inclusive-or-expression>
+
+<inclusive-or-expression> ::= <exclusive-or-expression>
+ | <inclusive-or-expression> | <exclusive-or-expression>
+
+<exclusive-or-expression> ::= <and-expression>
+ | <exclusive-or-expression> ^ <and-expression>
+
+<and-expression> ::= <equality-expression>
+ | <and-expression> & <equality-expression>
+
+<equality-expression> ::= <relational-expression>
+ | <equality-expression> == <relational-expression>
+ | <equality-expression> != <relational-expression>
+
+<relational-expression> ::= <shift-expression>
+ | <relational-expression> < <shift-expression>
+ | <relational-expression> > <shift-expression>
+ | <relational-expression> <= <shift-expression>
+ | <relational-expression> >= <shift-expression>
+
+<shift-expression> ::= <additive-expression>
+ | <shift-expression> << <additive-expression>
+ | <shift-expression> >> <additive-expression>
+
+<additive-expression> ::= <multiplicative-expression>
+ | <additive-expression> + <multiplicative-expression>
+ | <additive-expression> - <multiplicative-expression>
+
+<multiplicative-expression> ::= <cast-expression>
+ | <multiplicative-expression> * <cast-expression>
+ | <multiplicative-expression> / <cast-expression>
+ | <multiplicative-expression> % <cast-expression>
+
+<cast-expression> ::= <unary-expression>
+ | ( <type-name> ) <cast-expression>
+
+<unary-expression> ::= <postfix-expression>
+ | ++ <unary-expression>
+ | -- <unary-expression>
+ | <unary-operator> <cast-expression>
+ | sizeof <unary-expression>
+ | sizeof <type-name>
+
+<postfix-expression> ::= <primary-expression>
+ | <postfix-expression> [ <expression> ]
+ | <postfix-expression> ( {<assignment-expression>}* )
+ | <postfix-expression> . <identifier>
+ | <postfix-expression> -> <identifier>
+ | <postfix-expression> ++
+ | <postfix-expression> --
+
+<primary-expression> ::= <identifier>
+ | <constant>
+ | <string>
+ | ( <expression> )
+
+<constant> ::= <integer-constant>
+ | <character-constant>
+ | <floating-constant>
+ | <enumeration-constant>
+
+<expression> ::= <assignment-expression>
+ | <expression> , <assignment-expression>
+
+<assignment-expression> ::= <conditional-expression>
+ | <unary-expression> <assignment-operator> <assignment-expression>
+
+<assignment-operator> ::= =
+ | *=
+ | /=
+ | %=
+ | +=
+ | -=
+ | <<=
+ | >>=
+ | &=
+ | ^=
+ | |=
+
+<unary-operator> ::= &
+ | *
+ | +
+ | -
+ | ~
+ | !
+
+<type-name> ::= {<specifier-qualifier>}+ {<abstract-declarator>}?
+
+<parameter-type-list> ::= <parameter-list>
+ | <parameter-list> , ...
+
+<parameter-list> ::= <parameter-declaration>
+ | <parameter-list> , <parameter-declaration>
+
+<parameter-declaration> ::= {<declaration-specifier>}+ <declarator>
+ | {<declaration-specifier>}+ <abstract-declarator>
+ | {<declaration-specifier>}+
+
+<abstract-declarator> ::= <pointer>
+ | <pointer> <direct-abstract-declarator>
+ | <direct-abstract-declarator>
+
+<direct-abstract-declarator> ::= ( <abstract-declarator> )
+ | {<direct-abstract-declarator>}? [ {<constant-expression>}? ]
+ | {<direct-abstract-declarator>}? ( {<parameter-type-list>}? )
+
+<enum-specifier> ::= enum <identifier> { <enumerator-list> }
+ | enum { <enumerator-list> }
+ | enum <identifier>
+
+<enumerator-list> ::= <enumerator>
+ | <enumerator-list> , <enumerator>
+
+<enumerator> ::= <identifier>
+ | <identifier> = <constant-expression>
+
+<typedef-name> ::= <identifier>
+
+<declaration> ::= {<declaration-specifier>}+ {<init-declarator>}* ;
+
+<init-declarator> ::= <declarator>
+ | <declarator> = <initializer>
+
+<initializer> ::= <assignment-expression>
+ | { <initializer-list> }
+ | { <initializer-list> , }
+
+<initializer-list> ::= <initializer>
+ | <initializer-list> , <initializer>
+
+<compound-statement> ::= { {<declaration>}* {<statement>}* }
+
+<statement> ::= <labeled-statement>
+ | <expression-statement>
+ | <compound-statement>
+ | <selection-statement>
+ | <iteration-statement>
+ | <jump-statement>
+
+<labeled-statement> ::= <identifier> : <statement>
+ | case <constant-expression> : <statement>
+ | default : <statement>
+
+<expression-statement> ::= {<expression>}? ;
+
+<selection-statement> ::= if ( <expression> ) <statement>
+ | if ( <expression> ) <statement> else <statement>
+ | switch ( <expression> ) <statement>
+
+<iteration-statement> ::= while ( <expression> ) <statement>
+ | do <statement> while ( <expression> ) ;
+ | for ( {<expression>}? ; {<expression>}? ; {<expression>}? ) <statement>
+
+<jump-statement> ::= goto <identifier> ;
+ | continue ;
+ | break ;
+ | return {<expression>}? ;
diff --git a/old/minic.ebnf b/old/minic.ebnf
index 9379e35..4c354e6 100644
--- a/old/minic.ebnf
+++ b/old/minic.ebnf
@@ -5,3 +5,12 @@ Integer = Digit { Digit }.
Letter = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" | "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" | "U" | "V" | "W" | "X" | "Y" | "Z" |
"a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" | "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" | "u" | "v" | "w" | "x" | "y" | "z".
Digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9".
+
+Expression = Integer
+ | Expression "*" Expression
+ | Expression "/" Expression
+ | Expression "+" Expression
+ | Expression "-" Expression
+ .
+
+