diff options
author | Andreas Baumann <mail@andreasbaumann.cc> | 2021-08-13 11:36:31 +0200 |
---|---|---|
committer | Andreas Baumann <mail@andreasbaumann.cc> | 2021-08-13 11:36:31 +0200 |
commit | 9ba1bb3d2007b37e3392208ef027f88d78e51c3e (patch) | |
tree | 26e24a12fa75f0346d3ab4f7e55d310bb81ab339 | |
parent | a4e168a1b45b693bf68e30f37ea233b8f151329c (diff) | |
download | compilertests-9ba1bb3d2007b37e3392208ef027f88d78e51c3e.tar.gz compilertests-9ba1bb3d2007b37e3392208ef027f88d78e51c3e.tar.bz2 |
cc: some work on the scanner, expression parser
-rw-r--r-- | miniany/README | 6 | ||||
-rw-r--r-- | miniany/REQUIREMENTS | 12 | ||||
-rw-r--r-- | miniany/cc.c | 280 | ||||
-rw-r--r-- | miniany/libc-freestanding.c | 51 | ||||
-rw-r--r-- | miniany/libc-hosted.c | 2 | ||||
-rw-r--r-- | old/c.ebnf | 229 | ||||
-rw-r--r-- | old/minic.ebnf | 9 |
7 files changed, 567 insertions, 22 deletions
diff --git a/miniany/README b/miniany/README index fcd9e06..851db39 100644 --- a/miniany/README +++ b/miniany/README @@ -34,11 +34,13 @@ tcc -O0 -g -o c4 c4.c inspiration for what makes up a minimal C language * tiny.c - * http://www.iro.umontreal.ca/~felipe/IFT2030-Automne2002/Complements/tinyc.c + * http://www.iro.umontreal.ca/~felipe/IFT2030-Automne2002/Complements/tinyc.c, + Marc Feeley, really easy and much more readable, meant as educational compiler * c.c in swieros: https://github.com/rswier/swieros.git * documentation * "Compiler Construction", Niklaus Wirth * https://github.com/DoctorWkt/acwj - + * https://www.engr.mun.ca/~theo/Misc/exp_parsing.htm#climbing, + https://en.wikipedia.org/wiki/Operator-precedence_parser#Precedence_climbing_method diff --git a/miniany/REQUIREMENTS b/miniany/REQUIREMENTS index 474e975..cd6ffcf 100644 --- a/miniany/REQUIREMENTS +++ b/miniany/REQUIREMENTS @@ -29,3 +29,15 @@ not implementing: - linker - have compilation units needs a linker do build an executable +- symname[t] printing the symbol and not the number, + requires static initializers for array of char* +- ASTs are basically only useful when you start to optimize, + till then you can use an intermediate format (as C4) does + and a stack machine. They also make the code easier readable. + For use they fore the introduction of pointers, references and structs. + In expression parsing we see, that const folding already needs + an AST, because we should not emit code when still reading + a constant expression. It also seperates syntactical stuff like '[' + from logical stuff like 'declaration of array size' and 'derefencing + a pointer'. + diff --git a/miniany/cc.c b/miniany/cc.c index 1637627..ebdebdb 100644 --- a/miniany/cc.c +++ b/miniany/cc.c @@ -1,45 +1,297 @@ int col; int row; +int pushback; + +int token; + +int DEBUG_SCANNER; + +enum { + MAX_IDENT_LEN = 20, + TEST = -1 +}; + +void pushBack( int c ) +{ + pushback = c; +} + int getChar( ) { int c; + if( pushback ) { + c = pushback; + pushback = 0; + return c; + } + c = getchar( ); if( c == EOF ) { return c; } col++; if( c == '\n' ) { - col = 1; + col = 0; row++; } return c; } -int main( int argc, char **argv ) +int skipWhite( ) { int c; - col = 1; - row = 1; + c = getChar( ); + while( isspace( c ) ) { + c = getChar( ); + } + + return c; +} - putstring( "Hello CC" ); - putnl( ); +enum { + S_PLUS = 1, + S_MINUS, + S_STAR, + S_SLASH, + S_SEMICOLON, + S_EQUALS, + S_INT = 10, + S_IDENT, + S_NUM = 20, + S_ERR = 30, + S_EOI = 31 +}; - c = getChar( ); +void printErrorHeader( ) +{ + putstring( "Error line " ); putint( row ); + putstring( ", pos " ); + putint( col ); putstring( ": " ); - while( c != EOF ) { - if( c == '\n' ) { - putchar( '$' ); - putchar( c ); - putint( row ); - putstring( ": " ); +} + +int num; + +void scanNumber( int c ) +{ + num = c - '0'; + c = getChar( ); + while( isdigit( c ) ) { + num = 10 * num + ( c - '0' ); + c = getChar( ); + } + pushBack( c ); +} + +/* c4: no data segment allocation in char array decleration */ +char *ident; +/*char ident[20]; + char ident[MAX_IDENT_LEN]; +*/ + +void scanIdent( int c ) +{ + int n; + + n = 0; + while( isalnum( c ) || ( c == '_' ) ) { + ident[n] = c; + n++; + if( n >= MAX_IDENT_LEN - 1 ) { + printErrorHeader( ); + putstring( "too long identifier" ); + putnl( ); + exit( EXIT_FAILURE ); + } + c = getChar( ); + } + ident[n] = 0; /* c4 doesn't handle '\0' */ + pushBack( c ); +} + +int keyword( char *ident ) +{ + if( *ident == 'i' ) { + if( strcmp( ident, "int" ) == 0 ) { + return S_INT; } else { - putchar( c ); + return 0; } + } + + return 0; +} + +int getToken( ) +{ + int t; + int c; + + c = skipWhite( ); + + if( c == EOF ) { + t = S_EOI; + } else if( c == '+' ) { + t = S_PLUS; + } else if( c == '-' ) { + t = S_MINUS; + } else if( c == '*' ) { + t = S_STAR; + } else if ( c == '/' ) { c = getChar( ); + if( c == '/' ) { + while( c != '\n' ) { + c = getChar( ); + } + t = getToken( ); + } else if( c == '*' ) { + do { + while( c != '*' ) { + c = getChar( ); + } + c = getChar( ); + } while( c != '/' ); + c = getChar( ); + t = getToken( ); + } else { + pushBack( c ); + t = S_SLASH; + } + } else if( c == ';' ) { + t = S_SEMICOLON; + } else if( c == '=' ) { + t = S_EQUALS; + } else if( isdigit( c ) ) { + scanNumber( c ); + t = S_NUM; + } else if( c >= 'a' && c <= 'z' ) { + scanIdent( c ); + if( ( t = keyword( ident ) ) ) { + } else { + t = S_IDENT; + } + } else { + t = S_ERR; + printErrorHeader( ); + putstring( "unknown token '" ); + putchar( c ); + putstring( "'" ); + putnl( ); + exit( EXIT_FAILURE ); + } + + if( DEBUG_SCANNER ) { + putint( row ); + putchar( '/' ); + putint( col ); + putstring( ": " ); + putint( t ); + if( t == S_NUM ) { + putchar( '(' ); + putint( num ); + putchar( ')' ); + } + putnl( ); + } + + return t; +} + +void expect( int must, char *what ) +{ + if( token == must ) { + token = getToken( ); + } else { + printErrorHeader( ); + putstring( what ); + putstring( " expected" ); + putnl( ); + exit( EXIT_FAILURE ); + } +} + +void parseExpression( ) +{ + if( token == S_EOI ) { + printErrorHeader( ); + putstring( "unexpected eof in expression" ); + putnl( ); + exit( EXIT_FAILURE ); + } + + if( token == S_NUM ) { + putstring( "immediate int " ); + putint( num ); + putnl( ); + } + + token = getToken( ); + if( token == S_PLUS ) { + token = getToken( ); + parseExpression( ); + } else if( token == S_MINUS ) { + token = getToken( ); + parseExpression( ); + } else if( token == S_STAR ) { + token = getToken( ); + parseExpression( ); + } else if( token == S_SLASH ) { + token = getToken( ); + parseExpression( ); + } else if( token == S_EOI || token == S_SEMICOLON ) { + return; + } else { + printErrorHeader( ); + putstring( "unexpected token '" ); + putint( token ); + putstring( "' in expression" ); + putnl( ); + exit( EXIT_FAILURE ); + } +} + +void parseDeclaration( ) +{ + expect( S_INT, "int" ); + expect( S_IDENT, "identifier" ); + putstring( "Adding glob: " ); putstring( ident ); putnl( ); + expect( S_SEMICOLON, ";" ); +} + +void parseAssignment( ) +{ + token = getToken( ); + expect( S_EQUALS, "=" ); + parseExpression( ); + expect( S_SEMICOLON, ";" ); +} + +void parseStatement( ) +{ + if( token == S_INT ) { + parseDeclaration( ); + } else if( token == S_IDENT ) { + parseAssignment( ); + } else if( token == S_EOI ) { + return; + } +} + +int main( int argc, char **argv ) +{ + col = 0; + row = 1; + pushback = 0; + DEBUG_SCANNER = 1; + ident = "12345678901234567890"; + + token = getToken( ); + while( token != S_EOI ) { + parseStatement( ); } exit( EXIT_SUCCESS ); diff --git a/miniany/libc-freestanding.c b/miniany/libc-freestanding.c index 8b1a378..ffed093 100644 --- a/miniany/libc-freestanding.c +++ b/miniany/libc-freestanding.c @@ -20,6 +20,40 @@ int strlen( char *s ) return p - s; } +int strcmp( char *s1, char *s2 ) +{ + while( ( *s1 != '\0' ) && ( *s2 != '\0' ) && *s1 == *s2 ) { + s1++; + s2++; + } + + return *s1 - *s2; +} + +int isspace( int c ) +{ + if( c == ' ' || c == '\r' || c == '\n' || c == '\t' ) return 1; + return 0; +} + +int isdigit( int c ) +{ + if( c >= '0' && c <= '9' ) return 1; + return 0; +} + +int isalpha( int c ) +{ + if( ( c >= 'a' && c <= 'z' ) || ( c >= 'A' && c <= 'Z' ) ) return 1; + return 0; +} + +int isalnum( int c ) +{ + if( isalpha( c ) || isdigit( c ) ) return 1; + return 0; +} + enum { EXIT_SUCCESS = 0, EXIT_FAILURE = 1 @@ -31,9 +65,18 @@ enum { SYSCALL_WRITE = 4 }; +/* EOF = -1 is a const expression for c4 */ +/* EOF = 0xFFFFFFFF clang doesn't like this */ +/* EOF = 0xFFFFFFFF gcc overflow in conversion from 'enum <anonymous>' to 'int' changes value from '4294967295' to '-1' */ +/* EOF = 256, this is not standard, but doesn't really matter in freestanding mode, + * code should really not relly on the fact what the value of EOF is (-1 usually) */ +enum { + EOF = 256 +}; + int errno; -static __attribute__((noinline)) int syscall1( int id, int arg0 ) +int syscall1( int id, int arg0 ) { int retval; @@ -54,7 +97,7 @@ static __attribute__((noinline)) int syscall1( int id, int arg0 ) return retval; } -static __attribute__((noinline)) int syscall3( int id, int arg0, int arg1, int arg2 ) +int syscall3( int id, int arg0, int arg1, int arg2 ) { int retval; @@ -109,10 +152,6 @@ static int read_string( int fd, char *buf, int size ) return syscall3( SYSCALL_READ, fd, (int)buf, size ); } -enum { - EOF = -1 -}; - int puts( char *s ) { print_string( s ); diff --git a/miniany/libc-hosted.c b/miniany/libc-hosted.c index 4c663bf..4154aa1 100644 --- a/miniany/libc-hosted.c +++ b/miniany/libc-hosted.c @@ -5,6 +5,8 @@ #include <stdio.h> #include <stdlib.h> +#include <ctype.h> +#include <string.h> int putstring( char *s ) { diff --git a/old/c.ebnf b/old/c.ebnf new file mode 100644 index 0000000..9a3c0ae --- /dev/null +++ b/old/c.ebnf @@ -0,0 +1,229 @@ +<translation-unit> ::= {<external-declaration>}* + +<external-declaration> ::= <function-definition> + | <declaration> + +<function-definition> ::= {<declaration-specifier>}* <declarator> {<declaration>}* <compound-statement> + +<declaration-specifier> ::= <storage-class-specifier> + | <type-specifier> + | <type-qualifier> + +<storage-class-specifier> ::= auto + | register + | static + | extern + | typedef + +<type-specifier> ::= void + | char + | short + | int + | long + | float + | double + | signed + | unsigned + | <struct-or-union-specifier> + | <enum-specifier> + | <typedef-name> + +<struct-or-union-specifier> ::= <struct-or-union> <identifier> { {<struct-declaration>}+ } + | <struct-or-union> { {<struct-declaration>}+ } + | <struct-or-union> <identifier> + +<struct-or-union> ::= struct + | union + +<struct-declaration> ::= {<specifier-qualifier>}* <struct-declarator-list> + +<specifier-qualifier> ::= <type-specifier> + | <type-qualifier> + +<struct-declarator-list> ::= <struct-declarator> + | <struct-declarator-list> , <struct-declarator> + +<struct-declarator> ::= <declarator> + | <declarator> : <constant-expression> + | : <constant-expression> + +<declarator> ::= {<pointer>}? <direct-declarator> + +<pointer> ::= * {<type-qualifier>}* {<pointer>}? + +<type-qualifier> ::= const + | volatile + +<direct-declarator> ::= <identifier> + | ( <declarator> ) + | <direct-declarator> [ {<constant-expression>}? ] + | <direct-declarator> ( <parameter-type-list> ) + | <direct-declarator> ( {<identifier>}* ) + +<constant-expression> ::= <conditional-expression> + +<conditional-expression> ::= <logical-or-expression> + | <logical-or-expression> ? <expression> : <conditional-expression> + +<logical-or-expression> ::= <logical-and-expression> + | <logical-or-expression> || <logical-and-expression> + +<logical-and-expression> ::= <inclusive-or-expression> + | <logical-and-expression> && <inclusive-or-expression> + +<inclusive-or-expression> ::= <exclusive-or-expression> + | <inclusive-or-expression> | <exclusive-or-expression> + +<exclusive-or-expression> ::= <and-expression> + | <exclusive-or-expression> ^ <and-expression> + +<and-expression> ::= <equality-expression> + | <and-expression> & <equality-expression> + +<equality-expression> ::= <relational-expression> + | <equality-expression> == <relational-expression> + | <equality-expression> != <relational-expression> + +<relational-expression> ::= <shift-expression> + | <relational-expression> < <shift-expression> + | <relational-expression> > <shift-expression> + | <relational-expression> <= <shift-expression> + | <relational-expression> >= <shift-expression> + +<shift-expression> ::= <additive-expression> + | <shift-expression> << <additive-expression> + | <shift-expression> >> <additive-expression> + +<additive-expression> ::= <multiplicative-expression> + | <additive-expression> + <multiplicative-expression> + | <additive-expression> - <multiplicative-expression> + +<multiplicative-expression> ::= <cast-expression> + | <multiplicative-expression> * <cast-expression> + | <multiplicative-expression> / <cast-expression> + | <multiplicative-expression> % <cast-expression> + +<cast-expression> ::= <unary-expression> + | ( <type-name> ) <cast-expression> + +<unary-expression> ::= <postfix-expression> + | ++ <unary-expression> + | -- <unary-expression> + | <unary-operator> <cast-expression> + | sizeof <unary-expression> + | sizeof <type-name> + +<postfix-expression> ::= <primary-expression> + | <postfix-expression> [ <expression> ] + | <postfix-expression> ( {<assignment-expression>}* ) + | <postfix-expression> . <identifier> + | <postfix-expression> -> <identifier> + | <postfix-expression> ++ + | <postfix-expression> -- + +<primary-expression> ::= <identifier> + | <constant> + | <string> + | ( <expression> ) + +<constant> ::= <integer-constant> + | <character-constant> + | <floating-constant> + | <enumeration-constant> + +<expression> ::= <assignment-expression> + | <expression> , <assignment-expression> + +<assignment-expression> ::= <conditional-expression> + | <unary-expression> <assignment-operator> <assignment-expression> + +<assignment-operator> ::= = + | *= + | /= + | %= + | += + | -= + | <<= + | >>= + | &= + | ^= + | |= + +<unary-operator> ::= & + | * + | + + | - + | ~ + | ! + +<type-name> ::= {<specifier-qualifier>}+ {<abstract-declarator>}? + +<parameter-type-list> ::= <parameter-list> + | <parameter-list> , ... + +<parameter-list> ::= <parameter-declaration> + | <parameter-list> , <parameter-declaration> + +<parameter-declaration> ::= {<declaration-specifier>}+ <declarator> + | {<declaration-specifier>}+ <abstract-declarator> + | {<declaration-specifier>}+ + +<abstract-declarator> ::= <pointer> + | <pointer> <direct-abstract-declarator> + | <direct-abstract-declarator> + +<direct-abstract-declarator> ::= ( <abstract-declarator> ) + | {<direct-abstract-declarator>}? [ {<constant-expression>}? ] + | {<direct-abstract-declarator>}? ( {<parameter-type-list>}? ) + +<enum-specifier> ::= enum <identifier> { <enumerator-list> } + | enum { <enumerator-list> } + | enum <identifier> + +<enumerator-list> ::= <enumerator> + | <enumerator-list> , <enumerator> + +<enumerator> ::= <identifier> + | <identifier> = <constant-expression> + +<typedef-name> ::= <identifier> + +<declaration> ::= {<declaration-specifier>}+ {<init-declarator>}* ; + +<init-declarator> ::= <declarator> + | <declarator> = <initializer> + +<initializer> ::= <assignment-expression> + | { <initializer-list> } + | { <initializer-list> , } + +<initializer-list> ::= <initializer> + | <initializer-list> , <initializer> + +<compound-statement> ::= { {<declaration>}* {<statement>}* } + +<statement> ::= <labeled-statement> + | <expression-statement> + | <compound-statement> + | <selection-statement> + | <iteration-statement> + | <jump-statement> + +<labeled-statement> ::= <identifier> : <statement> + | case <constant-expression> : <statement> + | default : <statement> + +<expression-statement> ::= {<expression>}? ; + +<selection-statement> ::= if ( <expression> ) <statement> + | if ( <expression> ) <statement> else <statement> + | switch ( <expression> ) <statement> + +<iteration-statement> ::= while ( <expression> ) <statement> + | do <statement> while ( <expression> ) ; + | for ( {<expression>}? ; {<expression>}? ; {<expression>}? ) <statement> + +<jump-statement> ::= goto <identifier> ; + | continue ; + | break ; + | return {<expression>}? ; diff --git a/old/minic.ebnf b/old/minic.ebnf index 9379e35..4c354e6 100644 --- a/old/minic.ebnf +++ b/old/minic.ebnf @@ -5,3 +5,12 @@ Integer = Digit { Digit }. Letter = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" | "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" | "U" | "V" | "W" | "X" | "Y" | "Z" | "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" | "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" | "u" | "v" | "w" | "x" | "y" | "z". Digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9". + +Expression = Integer + | Expression "*" Expression + | Expression "/" Expression + | Expression "+" Expression + | Expression "-" Expression + . + + |