diff options
Diffstat (limited to 'miniany/doc/www.iro.umontreal.ca_felipe_IFT2030-Automne2002_Complements_tinyc.c')
-rw-r--r-- | miniany/doc/www.iro.umontreal.ca_felipe_IFT2030-Automne2002_Complements_tinyc.c | 291 |
1 files changed, 291 insertions, 0 deletions
diff --git a/miniany/doc/www.iro.umontreal.ca_felipe_IFT2030-Automne2002_Complements_tinyc.c b/miniany/doc/www.iro.umontreal.ca_felipe_IFT2030-Automne2002_Complements_tinyc.c new file mode 100644 index 0000000..c25ee44 --- /dev/null +++ b/miniany/doc/www.iro.umontreal.ca_felipe_IFT2030-Automne2002_Complements_tinyc.c @@ -0,0 +1,291 @@ +/* file: "tinyc.c" */ + +/* Copyright (C) 2001 by Marc Feeley, All Rights Reserved. */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +/* + * This is a compiler for the Tiny-C language. Tiny-C is a + * considerably stripped down version of C and it is meant as a + * pedagogical tool for learning about compilers. The integer global + * variables "a" to "z" are predefined and initialized to zero, and it + * is not possible to declare new variables. The compiler reads the + * program from standard input and prints out the value of the + * variables that are not zero. The grammar of Tiny-C in EBNF is: + * + * <program> ::= <statement> + * <statement> ::= "if" <paren_expr> <statement> | + * "if" <paren_expr> <statement> "else" <statement> | + * "while" <paren_expr> <statement> | + * "do" <statement> "while" <paren_expr> ";" | + * "{" { <statement> } "}" | + * <expr> ";" | + * ";" + * <paren_expr> ::= "(" <expr> ")" + * <expr> ::= <test> | <id> "=" <expr> + * <test> ::= <sum> | <sum> "<" <sum> + * <sum> ::= <term> | <sum> "+" <term> | <sum> "-" <term> + * <term> ::= <id> | <int> | <paren_expr> + * <id> ::= "a" | "b" | "c" | "d" | ... | "z" + * <int> ::= <an_unsigned_decimal_integer> + * + * Here are a few invocations of the compiler: + * + * % echo "a=b=c=2<3;" | ./a.out + * a = 1 + * b = 1 + * c = 1 + * % echo "{ i=1; while (i<100) i=i+i; }" | ./a.out + * i = 128 + * % echo "{ i=125; j=100; while (i-j) if (i<j) j=j-i; else i=i-j; }" | ./a.out + * i = 25 + * j = 25 + * % echo "{ i=1; do i=i+10; while (i<50); }" | ./a.out + * i = 51 + * % echo "{ i=1; while ((i=i+10)<50) ; }" | ./a.out + * i = 51 + * % echo "{ i=7; if (i<5) x=1; if (i<10) y=2; }" | ./a.out + * i = 7 + * y = 2 + * + * The compiler does a minimal amount of error checking to help + * highlight the structure of the compiler. + */ + + +/*---------------------------------------------------------------------------*/ + +/* Lexer. */ + +enum { DO_SYM, ELSE_SYM, IF_SYM, WHILE_SYM, LBRA, RBRA, LPAR, RPAR, + PLUS, MINUS, LESS, SEMI, EQUAL, INT, ID, EOI }; + +char *words[] = { "do", "else", "if", "while", NULL }; + +int ch = ' '; +int sym; +int int_val; +char id_name[100]; + +void syntax_error() { fprintf(stderr, "syntax error\n"); exit(1); } + +void next_ch() { ch = getchar(); } + +void next_sym() +{ again: switch (ch) + { case ' ': case '\n': next_ch(); goto again; + case EOF: sym = EOI; break; + case '{': next_ch(); sym = LBRA; break; + case '}': next_ch(); sym = RBRA; break; + case '(': next_ch(); sym = LPAR; break; + case ')': next_ch(); sym = RPAR; break; + case '+': next_ch(); sym = PLUS; break; + case '-': next_ch(); sym = MINUS; break; + case '<': next_ch(); sym = LESS; break; + case ';': next_ch(); sym = SEMI; break; + case '=': next_ch(); sym = EQUAL; break; + default: + if (ch >= '0' && ch <= '9') + { int_val = 0; /* missing overflow check */ + while (ch >= '0' && ch <= '9') + { int_val = int_val*10 + (ch - '0'); next_ch(); } + sym = INT; + } + else if (ch >= 'a' && ch <= 'z') + { int i = 0; /* missing overflow check */ + while ((ch >= 'a' && ch <= 'z') || ch == '_') + { id_name[i++] = ch; next_ch(); } + id_name[i] = '\0'; + sym = 0; + while (words[sym] != NULL && strcmp(words[sym], id_name) != 0) + sym++; + if (words[sym] == NULL) + if (id_name[1] == '\0') sym = ID; else syntax_error(); + } + else + syntax_error(); + } +} + +/*---------------------------------------------------------------------------*/ + +/* Parser. */ + +enum { VAR, CST, ADD, SUB, LT, SET, + IF1, IF2, WHILE, DO, EMPTY, SEQ, EXPR, PROG }; + +struct node { int kind; struct node *o1, *o2, *o3; int val; }; +typedef struct node node; + +node *new_node(int k) +{ node *x = (node*)malloc(sizeof(node)); x->kind = k; return x; } + +node *paren_expr(); /* forward declaration */ + +node *term() /* <term> ::= <id> | <int> | <paren_expr> */ +{ node *x; + if (sym == ID) { x=new_node(VAR); x->val=id_name[0]-'a'; next_sym(); } + else if (sym == INT) { x=new_node(CST); x->val=int_val; next_sym(); } + else x = paren_expr(); + return x; +} + +node *sum() /* <sum> ::= <term> | <sum> "+" <term> | <sum> "-" <term> */ +{ node *t, *x = term(); + while (sym == PLUS || sym == MINUS) + { t=x; x=new_node(sym==PLUS?ADD:SUB); next_sym(); x->o1=t; x->o2=term(); } + return x; +} + +node *test() /* <test> ::= <sum> | <sum> "<" <sum> */ +{ node *t, *x = sum(); + if (sym == LESS) + { t=x; x=new_node(LT); next_sym(); x->o1=t; x->o2=sum(); } + return x; +} + +node *expr() /* <expr> ::= <test> | <id> "=" <expr> */ +{ node *t, *x; + if (sym != ID) return test(); + x = test(); + if (x->kind == VAR && sym == EQUAL) + { t=x; x=new_node(SET); next_sym(); x->o1=t; x->o2=expr(); } + return x; +} + +node *paren_expr() /* <paren_expr> ::= "(" <expr> ")" */ +{ node *x; + if (sym == LPAR) next_sym(); else syntax_error(); + x = expr(); + if (sym == RPAR) next_sym(); else syntax_error(); + return x; +} + +node *statement() +{ node *t, *x; + if (sym == IF_SYM) /* "if" <paren_expr> <statement> */ + { x = new_node(IF1); + next_sym(); + x->o1 = paren_expr(); + x->o2 = statement(); + if (sym == ELSE_SYM) /* ... "else" <statement> */ + { x->kind = IF2; + next_sym(); + x->o3 = statement(); + } + } + else if (sym == WHILE_SYM) /* "while" <paren_expr> <statement> */ + { x = new_node(WHILE); + next_sym(); + x->o1 = paren_expr(); + x->o2 = statement(); + } + else if (sym == DO_SYM) /* "do" <statement> "while" <paren_expr> ";" */ + { x = new_node(DO); + next_sym(); + x->o1 = statement(); + if (sym == WHILE_SYM) next_sym(); else syntax_error(); + x->o2 = paren_expr(); + if (sym == SEMI) next_sym(); else syntax_error(); + } + else if (sym == SEMI) /* ";" */ + { x = new_node(EMPTY); next_sym(); } + else if (sym == LBRA) /* "{" { <statement> } "}" */ + { x = new_node(EMPTY); + next_sym(); + while (sym != RBRA) + { t=x; x=new_node(SEQ); x->o1=t; x->o2=statement(); } + next_sym(); + } + else /* <expr> ";" */ + { x = new_node(EXPR); + x->o1 = expr(); + if (sym == SEMI) next_sym(); else syntax_error(); + } + return x; +} + +node *program() /* <program> ::= <statement> */ +{ node *x = new_node(PROG); + next_sym(); x->o1 = statement(); if (sym != EOI) syntax_error(); + return x; +} + +/*---------------------------------------------------------------------------*/ + +/* Code generator. */ + +enum { IFETCH, ISTORE, IPUSH, IPOP, IADD, ISUB, ILT, JZ, JNZ, JMP, HALT }; + +typedef char code; +code object[1000], *here = object; + +void g(code c) { *here++ = c; } /* missing overflow check */ +code *hole() { return here++; } +void fix(code *src, code *dst) { *src = dst-src; } /* missing overflow check */ + +void c(node *x) +{ code *p1, *p2; + switch (x->kind) + { case VAR : g(IFETCH); g(x->val); break; + case CST : g(IPUSH); g(x->val); break; + case ADD : c(x->o1); c(x->o2); g(IADD); break; + case SUB : c(x->o1); c(x->o2); g(ISUB); break; + case LT : c(x->o1); c(x->o2); g(ILT); break; + case SET : c(x->o2); g(ISTORE); g(x->o1->val); break; + case IF1 : c(x->o1); g(JZ); p1=hole(); c(x->o2); fix(p1,here); break; + case IF2 : c(x->o1); g(JZ); p1=hole(); c(x->o2); g(JMP); p2=hole(); + fix(p1,here); c(x->o3); fix(p2,here); break; + case WHILE: p1=here; c(x->o1); g(JZ); p2=hole(); c(x->o2); + g(JMP); fix(hole(),p1); fix(p2,here); break; + case DO : p1=here; c(x->o1); c(x->o2); g(JNZ); fix(hole(),p1); break; + case EMPTY: break; + case SEQ : c(x->o1); c(x->o2); break; + case EXPR : c(x->o1); g(IPOP); break; + case PROG : c(x->o1); g(HALT); break; + } +} + +/*---------------------------------------------------------------------------*/ + +/* Virtual machine. */ + +int globals[26]; + +void run() +{ int stack[1000], *sp = stack; + code *pc = object; + again: switch (*pc++) + { case IFETCH: *sp++ = globals[*pc++]; goto again; + case ISTORE: globals[*pc++] = sp[-1]; goto again; + case IPUSH : *sp++ = *pc++; goto again; + case IPOP : --sp; goto again; + case IADD : sp[-2] = sp[-2] + sp[-1]; --sp; goto again; + case ISUB : sp[-2] = sp[-2] - sp[-1]; --sp; goto again; + case ILT : sp[-2] = sp[-2] < sp[-1]; --sp; goto again; + case JMP : pc += *pc; goto again; + case JZ : if (*--sp == 0) pc += *pc; else pc++; goto again; + case JNZ : if (*--sp != 0) pc += *pc; else pc++; goto again; + } +} + +/*---------------------------------------------------------------------------*/ + +/* Main program. */ + +int main() +{ int i; + + c(program()); + + for (i=0; i<26; i++) + globals[i] = 0; + run(); + for (i=0; i<26; i++) + if (globals[i] != 0) + printf("%c = %d\n", 'a'+i, globals[i]); + + return 0; +} |