From 9ba1bb3d2007b37e3392208ef027f88d78e51c3e Mon Sep 17 00:00:00 2001 From: Andreas Baumann Date: Fri, 13 Aug 2021 11:36:31 +0200 Subject: cc: some work on the scanner, expression parser --- miniany/cc.c | 280 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 266 insertions(+), 14 deletions(-) (limited to 'miniany/cc.c') diff --git a/miniany/cc.c b/miniany/cc.c index 1637627..ebdebdb 100644 --- a/miniany/cc.c +++ b/miniany/cc.c @@ -1,45 +1,297 @@ int col; int row; +int pushback; + +int token; + +int DEBUG_SCANNER; + +enum { + MAX_IDENT_LEN = 20, + TEST = -1 +}; + +void pushBack( int c ) +{ + pushback = c; +} + int getChar( ) { int c; + if( pushback ) { + c = pushback; + pushback = 0; + return c; + } + c = getchar( ); if( c == EOF ) { return c; } col++; if( c == '\n' ) { - col = 1; + col = 0; row++; } return c; } -int main( int argc, char **argv ) +int skipWhite( ) { int c; - col = 1; - row = 1; + c = getChar( ); + while( isspace( c ) ) { + c = getChar( ); + } + + return c; +} - putstring( "Hello CC" ); - putnl( ); +enum { + S_PLUS = 1, + S_MINUS, + S_STAR, + S_SLASH, + S_SEMICOLON, + S_EQUALS, + S_INT = 10, + S_IDENT, + S_NUM = 20, + S_ERR = 30, + S_EOI = 31 +}; - c = getChar( ); +void printErrorHeader( ) +{ + putstring( "Error line " ); putint( row ); + putstring( ", pos " ); + putint( col ); putstring( ": " ); - while( c != EOF ) { - if( c == '\n' ) { - putchar( '$' ); - putchar( c ); - putint( row ); - putstring( ": " ); +} + +int num; + +void scanNumber( int c ) +{ + num = c - '0'; + c = getChar( ); + while( isdigit( c ) ) { + num = 10 * num + ( c - '0' ); + c = getChar( ); + } + pushBack( c ); +} + +/* c4: no data segment allocation in char array decleration */ +char *ident; +/*char ident[20]; + char ident[MAX_IDENT_LEN]; +*/ + +void scanIdent( int c ) +{ + int n; + + n = 0; + while( isalnum( c ) || ( c == '_' ) ) { + ident[n] = c; + n++; + if( n >= MAX_IDENT_LEN - 1 ) { + printErrorHeader( ); + putstring( "too long identifier" ); + putnl( ); + exit( EXIT_FAILURE ); + } + c = getChar( ); + } + ident[n] = 0; /* c4 doesn't handle '\0' */ + pushBack( c ); +} + +int keyword( char *ident ) +{ + if( *ident == 'i' ) { + if( strcmp( ident, "int" ) == 0 ) { + return S_INT; } else { - putchar( c ); + return 0; } + } + + return 0; +} + +int getToken( ) +{ + int t; + int c; + + c = skipWhite( ); + + if( c == EOF ) { + t = S_EOI; + } else if( c == '+' ) { + t = S_PLUS; + } else if( c == '-' ) { + t = S_MINUS; + } else if( c == '*' ) { + t = S_STAR; + } else if ( c == '/' ) { c = getChar( ); + if( c == '/' ) { + while( c != '\n' ) { + c = getChar( ); + } + t = getToken( ); + } else if( c == '*' ) { + do { + while( c != '*' ) { + c = getChar( ); + } + c = getChar( ); + } while( c != '/' ); + c = getChar( ); + t = getToken( ); + } else { + pushBack( c ); + t = S_SLASH; + } + } else if( c == ';' ) { + t = S_SEMICOLON; + } else if( c == '=' ) { + t = S_EQUALS; + } else if( isdigit( c ) ) { + scanNumber( c ); + t = S_NUM; + } else if( c >= 'a' && c <= 'z' ) { + scanIdent( c ); + if( ( t = keyword( ident ) ) ) { + } else { + t = S_IDENT; + } + } else { + t = S_ERR; + printErrorHeader( ); + putstring( "unknown token '" ); + putchar( c ); + putstring( "'" ); + putnl( ); + exit( EXIT_FAILURE ); + } + + if( DEBUG_SCANNER ) { + putint( row ); + putchar( '/' ); + putint( col ); + putstring( ": " ); + putint( t ); + if( t == S_NUM ) { + putchar( '(' ); + putint( num ); + putchar( ')' ); + } + putnl( ); + } + + return t; +} + +void expect( int must, char *what ) +{ + if( token == must ) { + token = getToken( ); + } else { + printErrorHeader( ); + putstring( what ); + putstring( " expected" ); + putnl( ); + exit( EXIT_FAILURE ); + } +} + +void parseExpression( ) +{ + if( token == S_EOI ) { + printErrorHeader( ); + putstring( "unexpected eof in expression" ); + putnl( ); + exit( EXIT_FAILURE ); + } + + if( token == S_NUM ) { + putstring( "immediate int " ); + putint( num ); + putnl( ); + } + + token = getToken( ); + if( token == S_PLUS ) { + token = getToken( ); + parseExpression( ); + } else if( token == S_MINUS ) { + token = getToken( ); + parseExpression( ); + } else if( token == S_STAR ) { + token = getToken( ); + parseExpression( ); + } else if( token == S_SLASH ) { + token = getToken( ); + parseExpression( ); + } else if( token == S_EOI || token == S_SEMICOLON ) { + return; + } else { + printErrorHeader( ); + putstring( "unexpected token '" ); + putint( token ); + putstring( "' in expression" ); + putnl( ); + exit( EXIT_FAILURE ); + } +} + +void parseDeclaration( ) +{ + expect( S_INT, "int" ); + expect( S_IDENT, "identifier" ); + putstring( "Adding glob: " ); putstring( ident ); putnl( ); + expect( S_SEMICOLON, ";" ); +} + +void parseAssignment( ) +{ + token = getToken( ); + expect( S_EQUALS, "=" ); + parseExpression( ); + expect( S_SEMICOLON, ";" ); +} + +void parseStatement( ) +{ + if( token == S_INT ) { + parseDeclaration( ); + } else if( token == S_IDENT ) { + parseAssignment( ); + } else if( token == S_EOI ) { + return; + } +} + +int main( int argc, char **argv ) +{ + col = 0; + row = 1; + pushback = 0; + DEBUG_SCANNER = 1; + ident = "12345678901234567890"; + + token = getToken( ); + while( token != S_EOI ) { + parseStatement( ); } exit( EXIT_SUCCESS ); -- cgit v1.2.3-54-g00ecf