/*------------------------------------------------------------------------- Coco.ATG -- Attributed Grammar Compiler Generator Coco/R, Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz extended by M. Loeberbauer & A. Woess, Univ. of Linz ported to C++ by Csaba Balazs, University of Szeged with improvements by Pat Terry, Rhodes University This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. As an exception, it is allowed to write an extension of Coco/R that is used as a plugin in non-free software. If not otherwise stated, any source code generated by Coco/R (other than Coco/R itself) does not fall under the GNU General Public License. -------------------------------------------------------------------------*/ /*------------------------------------------------------------------------- compile with: Coco Coco.ATG -namespace Coco -------------------------------------------------------------------------*/ $namespace=Coco #include "Tab.h" #include "DFA.h" #include "ParserGen.h" COMPILER Coco int id; int str; FILE* trace; // other Coco objects referenced in this ATG Tab *tab; DFA *dfa; ParserGen *pgen; bool genScanner; wchar_t* tokenString; // used in declarations of literal tokens wchar_t* noString; // used in declarations of literal tokens // This method will be called by the contructor if it exits. // This support is specific to the C++ version of Coco/R. void Init() { tab = NULL; dfa = NULL; pgen = NULL; id = 0; str = 1; tokenString = NULL; noString = coco_string_create(L"-none-"); } // Uncomment this method if cleanup is necessary, // this method will be called by the destructor if it exists. // This support is specific to the C++ version of Coco/R. // void Destroy() { // nothing to do // } /*-------------------------------------------------------------------------*/ CHARACTERS letter = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_". digit = "0123456789". cr = '\r'. lf = '\n'. tab = '\t'. stringCh = ANY - '"' - '\\' - cr - lf. charCh = ANY - '\'' - '\\' - cr - lf. printable = '\u0020' .. '\u007e'. hex = "0123456789abcdef". TOKENS ident = letter { letter | digit }. number = digit { digit }. string = '"' { stringCh | '\\' printable } '"'. badString = '"' { stringCh | '\\' printable } (cr | lf). char = '\'' ( charCh | '\\' printable { hex } ) '\''. PRAGMAS ddtSym = '$' { digit | letter }. (. tab->SetDDT(la->val); .) optionSym = '$' letter { letter } '=' { digit | letter | '-' | '.' | ':' }. (. tab->SetOption(la->val); .) COMMENTS FROM "/*" TO "*/" NESTED COMMENTS FROM "//" TO lf IGNORE cr + lf + tab /*-------------------------------------------------------------------------*/ PRODUCTIONS Coco (. Symbol *sym; Graph *g, *g1, *g2; wchar_t* gramName = NULL; CharSet *s; .) = (. int beg = la->pos; int line = la->line; .) { // this section can be used // for #include statements ANY } (. if (la->pos != beg) { pgen->usingPos = new Position(beg, t->pos + coco_string_length(t->val), 0, line); } .) "COMPILER" (. genScanner = true; tab->ignored = new CharSet(); .) ident (. gramName = coco_string_create(t->val); beg = la->pos; line = la->line; .) { ANY } (. tab->semDeclPos = new Position(beg, la->pos, 0, line); .) [ "IGNORECASE" (. dfa->ignoreCase = true; .) ] /* pdt */ [ "CHARACTERS" { SetDecl }] [ "TOKENS" { TokenDecl }] [ "PRAGMAS" { TokenDecl }] { "COMMENTS" (. bool nested = false; .) "FROM" TokenExpr "TO" TokenExpr [ "NESTED" (. nested = true; .) ] (. dfa->NewComment(g1->l, g2->l, nested); .) } { "IGNORE" Set (. tab->ignored->Or(s); .) } SYNC "PRODUCTIONS" (. if (genScanner) dfa->MakeDeterministic(); tab->DeleteNodes(); .) { ident (. sym = tab->FindSym(t->val); bool undef = (sym == NULL); if (undef) sym = tab->NewSym(Node::nt, t->val, t->line); else { if (sym->typ == Node::nt) { if (sym->graph != NULL) SemErr(L"name declared twice"); } else SemErr(L"this symbol kind not allowed on left side of production"); sym->line = t->line; } bool noAttrs = (sym->attrPos == NULL); sym->attrPos = NULL; .) [ AttrDecl ] (. if (!undef) if (noAttrs != (sym->attrPos == NULL)) SemErr(L"attribute mismatch between declaration and use of this symbol"); .) [ SemText<.sym->semPos.> ] WEAK '=' Expression (. sym->graph = g->l; tab->Finish(g); .) WEAK '.' } "END" ident (. if (!coco_string_equal(gramName, t->val)) SemErr(L"name does not match grammar name"); tab->gramSy = tab->FindSym(gramName); if (tab->gramSy == NULL) SemErr(L"missing production for grammar name"); else { sym = tab->gramSy; if (sym->attrPos != NULL) SemErr(L"grammar symbol must not have attributes"); } tab->noSym = tab->NewSym(Node::t, L"???", 0); // noSym gets highest number tab->SetupAnys(); tab->RenumberPragmas(); if (tab->ddt[2]) tab->PrintNodes(); if (errors->count == 0) { wprintf(L"checking\n"); tab->CompSymbolSets(); if (tab->ddt[7]) tab->XRef(); if (tab->GrammarOk()) { wprintf(L"parser"); pgen->WriteParser(); if (genScanner) { wprintf(L" + scanner"); dfa->WriteScanner(); if (tab->ddt[0]) dfa->PrintStates(); } wprintf(L" generated\n"); if (tab->ddt[8]) pgen->WriteStatistics(); } } if (tab->ddt[6]) tab->PrintSymbolTable(); .) '.' . /*------------------------------------------------------------------------------------*/ SetDecl (. CharSet *s; .) = ident (. wchar_t *name = coco_string_create(t->val); CharClass *c = tab->FindCharClass(name); if (c != NULL) SemErr(L"name declared twice"); .) '=' Set (. if (s->Elements() == 0) SemErr(L"character set must not be empty"); tab->NewCharClass(name, s); .) '.' . /*------------------------------------------------------------------------------------*/ Set (. CharSet *s2; .) = SimSet { '+' SimSet (. s->Or(s2); .) | '-' SimSet (. s->Subtract(s2); .) } . /*------------------------------------------------------------------------------------*/ SimSet (. int n1, n2; .) = (. s = new CharSet(); .) ( ident (. CharClass *c = tab->FindCharClass(t->val); if (c == NULL) SemErr(L"undefined name"); else s->Or(c->set); .) | string (. wchar_t *subName2 = coco_string_create(t->val, 1, coco_string_length(t->val)-2); wchar_t *name = tab->Unescape(subName2); coco_string_delete(subName2); wchar_t ch; int len = coco_string_length(name); for(int i=0; i < len; i++) { ch = name[i]; if (dfa->ignoreCase) { if ((L'A' <= ch) && (ch <= L'Z')) ch = ch - (L'A' - L'a'); // ch.ToLower() } s->Set(ch); } coco_string_delete(name); .) | Char (. s->Set(n1); .) [ ".." Char (. for (int i = n1; i <= n2; i++) s->Set(i); .) ] | "ANY" (. s = new CharSet(); s->Fill(); .) ) . /*--------------------------------------------------------------------------------------*/ Char = char (. n = 0; wchar_t* subName = coco_string_create(t->val, 1, coco_string_length(t->val)-2); wchar_t* name = tab->Unescape(subName); coco_string_delete(subName); // "<= 1" instead of "== 1" to allow the escape sequence '\0' in c++ if (coco_string_length(name) <= 1) n = name[0]; else SemErr(L"unacceptable character value"); coco_string_delete(name); if (dfa->ignoreCase && (((wchar_t) n) >= 'A') && (((wchar_t) n) <= 'Z')) n += 32; .) . /*------------------------------------------------------------------------------------*/ TokenDecl (. wchar_t* name = NULL; int kind; Symbol *sym; Graph *g; .) = Sym (. sym = tab->FindSym(name); if (sym != NULL) SemErr(L"name declared twice"); else { sym = tab->NewSym(typ, name, t->line); sym->tokenKind = Symbol::fixedToken; } tokenString = NULL; .) SYNC ( '=' TokenExpr '.' (. if (kind == str) SemErr(L"a literal must not be declared with a structure"); tab->Finish(g); if (tokenString == NULL || coco_string_equal(tokenString, noString)) dfa->ConvertToStates(g->l, sym); else { // TokenExpr is a single string if ((*(tab->literals))[tokenString] != NULL) SemErr(L"token string declared twice"); tab->literals->Set(tokenString, sym); dfa->MatchLiteral(tokenString, sym); } .) | (. if (kind == id) genScanner = false; else dfa->MatchLiteral(sym->name, sym); .) ) [ SemText<.sym->semPos.> (. if (typ != Node::pr) SemErr(L"semantic action not allowed here"); .) ] . /*------------------------------------------------------------------------------------*/ AttrDecl = '<' (. int beg = la->pos; int col = la->col; int line = la->line; .) { ANY | badString (. SemErr(L"bad string in attributes"); .) } '>' (. if (t->pos > beg) sym->attrPos = new Position(beg, t->pos, col, line); .) | "<." (. int beg = la->pos; int col = la->col; int line = la->line; .) { ANY | badString (. SemErr(L"bad string in attributes"); .) } ".>" (. if (t->pos > beg) sym->attrPos = new Position(beg, t->pos, col, line); .) . /*------------------------------------------------------------------------------------*/ Expression (. Graph *g2; .) = Term (. bool first = true; .) { WEAK '|' Term (. if (first) { tab->MakeFirstAlt(g); first = false; } tab->MakeAlternative(g, g2); .) } . /*------------------------------------------------------------------------------------*/ Term (. Graph *g2; Node *rslv = NULL; g = NULL; .) = ( [ (. rslv = tab->NewNode(Node::rslv, (Symbol*)NULL, la->line); .) Resolver<.rslv->pos.> (. g = new Graph(rslv); .) ] Factor (. if (rslv != NULL) tab->MakeSequence(g, g2); else g = g2; .) { Factor (. tab->MakeSequence(g, g2); .) } | (. g = new Graph(tab->NewNode(Node::eps, (Symbol*)NULL, 0)); .) ) (. if (g == NULL) // invalid start of Term g = new Graph(tab->NewNode(Node::eps, (Symbol*)NULL, 0)); .) . /*------------------------------------------------------------------------------------*/ Factor (. wchar_t* name = NULL; int kind; Position *pos; bool weak = false; g = NULL; .) = ( [ "WEAK" (. weak = true; .) ] Sym (. Symbol *sym = tab->FindSym(name); if (sym == NULL && kind == str) sym = (Symbol*)((*(tab->literals))[name]); bool undef = (sym == NULL); if (undef) { if (kind == id) sym = tab->NewSym(Node::nt, name, 0); // forward nt else if (genScanner) { sym = tab->NewSym(Node::t, name, t->line); dfa->MatchLiteral(sym->name, sym); } else { // undefined string in production SemErr(L"undefined string in production"); sym = tab->eofSy; // dummy } } int typ = sym->typ; if (typ != Node::t && typ != Node::nt) SemErr(L"this symbol kind is not allowed in a production"); if (weak) { if (typ == Node::t) typ = Node::wt; else SemErr(L"only terminals may be weak"); } Node *p = tab->NewNode(typ, sym, t->line); g = new Graph(p); .) [ Attribs

(. if (kind != id) SemErr(L"a literal must not have attributes"); .) ] (. if (undef) sym->attrPos = p->pos; // dummy else if ((p->pos == NULL) != (sym->attrPos == NULL)) SemErr(L"attribute mismatch between declaration and use of this symbol"); .) | '(' Expression ')' | '[' Expression ']' (. tab->MakeOption(g); .) | '{' Expression '}' (. tab->MakeIteration(g); .) | SemText (. Node *p = tab->NewNode(Node::sem, (Symbol*)NULL, 0); p->pos = pos; g = new Graph(p); .) | "ANY" (. Node *p = tab->NewNode(Node::any, (Symbol*)NULL, 0); // p.set is set in tab->SetupAnys g = new Graph(p); .) | "SYNC" (. Node *p = tab->NewNode(Node::sync, (Symbol*)NULL, 0); g = new Graph(p); .) ) (. if (g == NULL) // invalid start of Factor g = new Graph(tab->NewNode(Node::eps, (Symbol*)NULL, 0)); .) . /*------------------------------------------------------------------------------------*/ Resolver = "IF" "(" (. int beg = la->pos; int col = la->col; int line = la->line; .) Condition (. pos = new Position(beg, t->pos, col, line); .) . /*------------------------------------------------------------------------------------*/ Condition = { "(" Condition | ANY } ")" . /*------------------------------------------------------------------------------------*/ TokenExpr (. Graph *g2; .) = TokenTerm (. bool first = true; .) { WEAK '|' TokenTerm (. if (first) { tab->MakeFirstAlt(g); first = false; } tab->MakeAlternative(g, g2); .) } . /*------------------------------------------------------------------------------------*/ TokenTerm (. Graph *g2; .) = TokenFactor { TokenFactor (. tab->MakeSequence(g, g2); .) } [ "CONTEXT" '(' TokenExpr (. tab->SetContextTrans(g2->l); dfa->hasCtxMoves = true; tab->MakeSequence(g, g2); .) ')' ] . /*------------------------------------------------------------------------------------*/ TokenFactor (. wchar_t* name = NULL; int kind; .) = (. g = NULL; .) ( Sym (. if (kind == id) { CharClass *c = tab->FindCharClass(name); if (c == NULL) { SemErr(L"undefined name"); c = tab->NewCharClass(name, new CharSet()); } Node *p = tab->NewNode(Node::clas, (Symbol*)NULL, 0); p->val = c->n; g = new Graph(p); tokenString = coco_string_create(noString); } else { // str g = tab->StrToGraph(name); if (tokenString == NULL) tokenString = coco_string_create(name); else tokenString = coco_string_create(noString); } .) | '(' TokenExpr ')' | '[' TokenExpr ']' (. tab->MakeOption(g); tokenString = coco_string_create(noString); .) | '{' TokenExpr '}' (. tab->MakeIteration(g); tokenString = coco_string_create(noString); .) ) (. if (g == NULL) // invalid start of TokenFactor g = new Graph(tab->NewNode(Node::eps, (Symbol*)NULL, 0)); .) . /*------------------------------------------------------------------------------------*/ Sym = (. name = coco_string_create(L"???"); kind = id; .) ( ident (. kind = id; coco_string_delete(name); name = coco_string_create(t->val); .) | (string (. coco_string_delete(name); name = coco_string_create(t->val); .) | char (. wchar_t *subName = coco_string_create(t->val, 1, coco_string_length(t->val)-2); coco_string_delete(name); name = coco_string_create_append(L"\"", subName); coco_string_delete(subName); coco_string_merge(name, L"\""); .) ) (. kind = str; if (dfa->ignoreCase) { wchar_t *oldName = name; name = coco_string_create_lower(name); coco_string_delete(oldName); } if (coco_string_indexof(name, ' ') >= 0) SemErr(L"literal tokens must not contain blanks"); .) ) . /*------------------------------------------------------------------------------------*/ Attribs = '<' (. int beg = la->pos; int col = la->col; int line = la->line; .) { ANY | badString (. SemErr(L"bad string in attributes"); .) } '>' (. if (t->pos > beg) p->pos = new Position(beg, t->pos, col, line); .) | "<." (. int beg = la->pos; int col = la->col; int line = la->line; .) { ANY | badString (. SemErr(L"bad string in attributes"); .) } ".>" (. if (t->pos > beg) p->pos = new Position(beg, t->pos, col, line); .) . /*------------------------------------------------------------------------------------*/ SemText = "(." (. int beg = la->pos; int col = la->col; int line = t->line; .) { ANY | badString (. SemErr(L"bad string in semantic action"); .) | "(." (. SemErr(L"missing end of previous semantic action"); .) } ".)" (. pos = new Position(beg, t->pos, col, line); .) . /*------------------------------------------------------------------------------------*/ END Coco.