#include "scan.h" #include "io.h" #include "minilib.h" #include "const.h" #include "stdlib.h" #include "string.h" #include "ctype.h" void scanner_init( Scanner *s, char *src ) { s->src = src; scanner_reset( s ); } void scanner_reset( Scanner *s ) { s->peek = ' '; s->row = 1; s->col = 0; s->pos = s->src; } void scanner_done( Scanner *s ) { } static char get_char( Scanner *s ) { char c; c = *s->pos; s->pos++; s->peek = c; s->col++; if( c == '\n' ) { s->col = 1; s->row++; } return c; } static char peek_char( Scanner *s, int seek ) { char *pos = s->pos; char c = *pos; while( c != '\0' && seek > 0 ) { c = *pos; pos++; seek--; } return c; } static void skip_char( Scanner *s, int seek ) { while( seek > 0 ) { get_char( s ); seek--; } } static void skip_whitespace( Scanner *s ) { for( ; ; get_char( s ) ) { if( s->peek == ' ' || s->peek == '\t' || s->peek == '\n' ) { continue; } else if( s->peek == '\0' ) { return; } else { break; } } } static void error( Scanner *s, char *msg ) { char buf[MAX_TMP_LEN]; char buf2[12]; buf[0] = '\0'; itoa( s->row, buf2, 10 ); strcat( buf, "ERROR in row " ); strcat( buf, buf2 ); itoa( s->col, buf2, 10 ); strcat( buf, ", col " ); strcat( buf, buf2 ); strcat( buf, ": " ); strcat( buf, msg ); print( buf ); halt( ); } static void unexpected_char( Scanner *s, int c ) { char msg[MAX_TMP_LEN]; msg[0] = '\0'; strcat( msg, "unexpected character '" ); strcat_c( msg, c ); strcat( msg, "'" ); error( s, msg ); } static void skip_comment( Scanner *s ) { get_char( s ); get_char( s ); while( s->peek != '*' ) { get_char( s ); } get_char( s ); if( s->peek != '/' ) { error( s, "unclosed comment" ); } get_char( s ); } static scanner_Symbol parse_ident( Scanner *s ) { scanner_Symbol sym; int len = 0; sym.sym = S_IDENT; sym.data.s[0] = '\0'; while( len < MAX_IDENT_LEN && ( isalnum( s->peek ) || s->peek == '_' ) ) { strcat_c( sym.data.s, s->peek ); len++; get_char( s ); } return sym; } static scanner_Symbol parse_number( Scanner *s ) { scanner_Symbol sym; int len = 0; sym.sym = S_INT_CONST; sym.data.i = 0; /* TODO: unsigned, signed, overflow handling */ while( len < MAX_NUMBER_LEN && isdigit( s->peek ) ) { int digit = s->peek - '0'; sym.data.i = sym.data.i * 10 + digit; len++; get_char( s ); } return sym; } scanner_Symbol scanner_scan( Scanner *s ) { scanner_Symbol sym; int c; for( ;; ) { skip_whitespace( s ); switch( s->peek ) { case '\0': sym.sym = S_eof; if( s->debug ) { print( "SCANNER(EOF)" ); } return sym; case '/': c = peek_char( s, 1 ); if( c == '*' ) { if( s->debug ) { print( "SCANNER(COMMENT_START)" ); } skip_comment( s ); if( s->debug ) { print( "SCANNER(COMMENT_END)" ); } continue; } else { get_char( s ); sym.sym = S_DIV; return sym; } break; case 'i': switch( peek_char( s, 1 ) ) { case 'n': switch( peek_char( s, 2 ) ) { case 't': skip_char( s, 3 ); sym.sym = S_INT; return sym; } } goto id; case 'v': switch( peek_char( s, 1 ) ) { case 'o': switch( peek_char( s, 2 ) ) { case 'i': switch( peek_char( s, 3 ) ) { case 'd': skip_char( s, 4 ); sym.sym = S_VOID; return sym; } } } goto id; case 'r': switch( peek_char( s, 1 ) ) { case 'e': switch( peek_char( s, 2 ) ) { case 't': switch( peek_char( s, 3 ) ) { case 'u': switch( peek_char( s, 4 ) ) { case 'r': switch( peek_char( s, 5 ) ) { case 'n': skip_char( s, 6 ); sym.sym = S_RETURN; return sym; } } } } } goto id; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': sym = parse_number( s ); return sym; id: case 'm': case 'a': case 'n': sym = parse_ident( s ); return sym; case ';': get_char( s ); sym.sym = S_SEMICOLON; return sym; case '(': get_char( s ); sym.sym = S_LPARENT; return sym; case ')': get_char( s ); sym.sym = S_RPARENT; return sym; case '{': get_char( s ); sym.sym = S_LCURL; return sym; case '}': get_char( s ); sym.sym = S_RCURL; return sym; default: unexpected_char( s, s->peek ); sym.sym = S_eof; return sym; } } } void scanner_debug( Scanner *s, int enable ) { s->debug = enable; if( s->debug ) { print( "SCANNER DEBUGGING ENABLED" ); } }