/* scanner */ enum { S_PLUS = 1, S_MINUS, S_STAR, S_SLASH, S_SEMICOLON, S_EQUALS, S_INT = 10, S_IDENT, S_NUM = 20, S_ERR = 30, S_EOI = 31 }; enum { MAX_IDENT_LEN = 20 }; struct Scanner { int col; int row; int c; int pushback; int token; int debug; int num; char *ident; }; struct Scanner *createScanner( ) { struct Scanner *scanner; scanner = (struct Scanner *)malloc( sizeof( struct Scanner ) ); scanner->col = 0; scanner->row = 1; scanner->c = EOF; scanner->pushback = 0; scanner->ident = (char *)malloc( MAX_IDENT_LEN + 1 ); scanner->debug = 0; return scanner; } void freeScanner( struct Scanner *scanner ) { free( scanner->ident ); free( (char *)scanner ); } void pushBack( struct Scanner *scanner ) { scanner->pushback = scanner->c; } int getChar( struct Scanner *scanner ) { if( scanner->pushback ) { scanner->c = scanner->pushback; scanner->pushback = 0; return scanner->c; } scanner->c = getchar( ); if( scanner->c == EOF ) { return scanner->c; } scanner->col++; if( scanner->c == '\n' ) { scanner->col = 0; scanner->row++; } return scanner->c; } int skipWhite( struct Scanner *scanner ) { scanner->c = getChar( scanner ); while( isspace( scanner->c ) ) { scanner->c = getChar( scanner ); } return scanner->c; } void printErrorHeader( struct Scanner *scanner ) { putstring( "Error line " ); putint( scanner->row ); putstring( ", pos " ); putint( scanner->col ); putstring( ": " ); } void scanNumber( struct Scanner *scanner ) { scanner->num = scanner->c - '0'; scanner->c = getChar( scanner ); while( isdigit( scanner->c ) ) { scanner->num = 10 * scanner->num + ( scanner->c - '0' ); scanner->c = getChar( scanner ); } pushBack( scanner ); } void scanIdent( struct Scanner *scanner ) { int n; n = 0; while( isalnum( scanner->c ) || ( scanner->c == '_' ) ) { scanner->ident[n] = scanner->c; n++; if( n >= MAX_IDENT_LEN - 1 ) { printErrorHeader( scanner ); putstring( "too long identifier" ); putnl( ); exit( EXIT_FAILURE ); } scanner->c = getChar( scanner ); } scanner->ident[n] = 0; /* c4 doesn't handle '\0' */ pushBack( scanner ); } int keyword( char *ident ) { switch( ident[0] ) { case 'i': if( strcmp( ident, "int" ) == 0 ) { return S_INT; } else { return 0; } break; default: return 0; } } void scannerPrintState( struct Scanner *scanner ) { putint( scanner->row ); putchar( '/' ); putint( scanner->col ); putstring( ": " ); putint( scanner->token ); if( scanner->token == S_NUM ) { putchar( '(' ); putint( scanner->num ); putchar( ')' ); } putnl( ); } int getToken( struct Scanner *scanner ) { int c; c = skipWhite( scanner ); switch( c ) { case EOF: scanner->token = S_EOI; break; case '+': scanner->token = S_PLUS; break; case '-': scanner->token = S_MINUS; break; case '*': scanner->token = S_STAR; break; case '/': c = getChar( scanner ); if( c == '/' ) { while( c != EOF && c != '\n' ) { c = getChar( scanner ); } scanner->token = getToken( scanner ); } else if( c == '*' ) { do { while( c != EOF && c != '*' ) { c = getChar( scanner ); } c = getChar( scanner ); } while( c != EOF && c != '/' ); c = getChar( scanner ); scanner->token = getToken( scanner ); } else { pushBack( scanner ); scanner->token = S_SLASH; } break; case ';': scanner->token = S_SEMICOLON; break; case '=': scanner->token = S_EQUALS; break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': scanNumber( scanner ); scanner->token = S_NUM; break; case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u': case 'v': case 'w': case 'x': case 'y': case 'z': case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': case 'Y': case 'Z': case '_': scanIdent( scanner ); if( ( scanner->token = keyword( scanner->ident ) ) ) { } else { scanner->token = S_IDENT; } break; default: scanner->token = S_ERR; printErrorHeader( scanner ); putstring( "unknown token '" ); putchar( c ); putstring( "'" ); putnl( ); exit( EXIT_FAILURE ); } if( scanner->debug ) { scannerPrintState( scanner ); } return scanner->token; } /* symbol table */ struct Symbol { char *name; struct Symbol *next; }; struct Symbol *createSymbol( char *s ) { struct Symbol *sym; sym = (struct Symbol *)malloc( sizeof ( struct Symbol ) ); sym->name = strdup( s ); sym->next = NULL; return sym; } void freeSymbol( struct Symbol *sym ) { free( sym->name ); free( (char *)sym ); } /* scope */ struct Scope { char *name; struct Symbol *sym; }; struct Scope *createScope( char *name ) { struct Scope *scope; scope = (struct Scope *)malloc( sizeof( struct Scope ) ); scope->name = strdup( name ); scope->sym = NULL; return scope; } void freeScope( struct Scope *scope ) { struct Symbol *sym, *next; free( scope->name ); sym = scope->sym; while( sym != NULL ) { next = sym->next; freeSymbol( sym ); sym = next; } free( (char *)scope ); } struct Symbol *getSymbol( struct Scope *scope, char *name ) { struct Symbol *sym; sym = scope->sym; while( sym != NULL ) { if( strcmp( name, sym->name ) == 0 ) { return sym; } sym = sym->next; } return NULL; } void insertSymbol( struct Scope *scope, struct Symbol *sym ) { if( scope->sym == NULL ) { scope->sym = sym; } else { sym->next = scope->sym; scope->sym = sym; } } /* parser */ struct Parser { int token; struct Scanner *scanner; struct Scope *global_scope; }; struct Parser *createParser( ) { struct Parser *parser; parser = (struct Parser *)malloc( sizeof( struct Parser ) ); parser->scanner = createScanner( ); parser->global_scope = createScope( "global" ); return parser; } void freeParser( struct Parser *parser ) { freeScope( parser->global_scope ); freeScanner( parser->scanner ); free( (char *)parser ); } void parserExpect( struct Parser *parser, int must, char *what ) { if( parser->token == must ) { parser->token = getToken( parser->scanner ); } else { printErrorHeader( parser->scanner ); putstring( what ); putstring( " expected" ); putnl( ); exit( EXIT_FAILURE ); } } void parseExpression( struct Parser *parser ) { if( parser->token == S_EOI ) { printErrorHeader( parser->scanner ); putstring( "unexpected eof in expression" ); putnl( ); exit( EXIT_FAILURE ); } if( parser->token == S_NUM ) { putstring( "immediate int " ); putint( parser->scanner->num ); putnl( ); } parser->token = getToken( parser->scanner ); if( parser->token == S_PLUS ) { parser->token = getToken( parser->scanner ); parseExpression( parser ); } else if( parser->token == S_MINUS ) { parser->token = getToken( parser->scanner ); parseExpression( parser ); } else if( parser->token == S_STAR ) { parser->token = getToken( parser->scanner ); parseExpression( parser ); } else if( parser->token == S_SLASH ) { parser->token = getToken( parser->scanner ); parseExpression( parser ); } else if( parser->token == S_EOI || parser->token == S_SEMICOLON ) { return; } else { printErrorHeader( parser->scanner ); putstring( "unexpected token '" ); putint( parser->token ); putstring( "' in expression" ); putnl( ); exit( EXIT_FAILURE ); } } void parseDeclaration( struct Parser *parser ) { struct Symbol *sym; parserExpect( parser, S_INT, "int" ); parserExpect( parser, S_IDENT, "identifier" ); putstring( "Adding glob: " ); putstring( parser->scanner->ident ); putnl( ); sym = getSymbol( parser->global_scope, parser->scanner->ident ); if( sym == NULL ) { sym = createSymbol( parser->scanner->ident ); insertSymbol( parser->global_scope, sym ); } else { printErrorHeader( parser->scanner ); putstring( "duplicate global symbol '" ); putstring( parser->scanner->ident ); putstring( "'" ); putnl( ); exit( EXIT_FAILURE ); } parserExpect( parser, S_SEMICOLON, ";" ); } void parseAssignment( struct Parser *parser ) { struct Symbol *sym; parserExpect( parser, S_IDENT, "identifier" ); sym = getSymbol( parser->global_scope, parser->scanner->ident ); if( sym == NULL ) { printErrorHeader( parser->scanner ); putstring( "unknown symbol '" ); putstring( parser->scanner->ident ); putstring( "'" ); putnl( ); exit( EXIT_FAILURE ); } parserExpect( parser, S_EQUALS, "=" ); parseExpression( parser ); parserExpect( parser, S_SEMICOLON, ";" ); } void parseStatement( struct Parser *parser ) { if( parser->token == S_INT ) { parseDeclaration( parser ); } else if( parser->token == S_IDENT ) { parseAssignment( parser ); } else if( parser->token == S_EOI ) { return; } else { printErrorHeader( parser->scanner ); putstring( "unexpected token '" ); putint( parser->token ); putstring( "'" ); putnl( ); exit( EXIT_FAILURE ); } } /* main */ int main( int argc, char **argv ) { struct Parser *parser; parser = createParser( ); parser->scanner->debug = 1; parser->token = getToken( parser->scanner ); while( parser->token != S_EOI ) { parseStatement( parser ); } freeParser( parser ); exit( EXIT_SUCCESS ); return EXIT_SUCCESS; }