/* ParParser Copyright (C) 2016 Frans Faase Example of a parallel parsing algorithm. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. GNU General Public License: http://www.iwriteiam.nl/GNU.txt Details: http://www.iwriteiam.nl/D1608.html#28 */ #include #include /**** Generic classes ****/ /* EnterCall is a class to aid debugging by tracing calls */ class EnterCall { public: EnterCall(const char *name) : _name(name) { printf("%*.*sEnter %s\n", _depth, _depth, "", _name); _depth++; } ~EnterCall() { _depth--; printf("%*.*sLeave %s\n", _depth, _depth, "", _name); } static void indent() { printf("%*.*s", _depth, _depth, ""); } private: const char* _name; static long _depth; }; long EnterCall::_depth = 0; /* RefCounter is a base class for all reference counted objects For debugging purposes all instances are given a unique number (stored in _inst). The total number of currently allocated objects is kept in _allocated. */ class RefCounter { public: RefCounter(const char *tn) : _refCounter(0), _tn(tn) { _inst = _instance++; EnterCall::indent(); printf("New %s_%ld\n", _tn, _inst); _allocated++; } virtual ~RefCounter() { EnterCall::indent(); printf("Delete %s_%ld\n", _tn, _inst); _allocated--; } void inc() { _refCounter++; EnterCall::indent(); printf("Inc %s_%ld to %ld", _tn, _inst, _refCounter); if (_in != 0 && _member != 0) { printf(" in %s_%ld.%s", _in->_tn, _in->_inst, _member); _in = 0; _member = 0; } printf("\n"); } void dec() { EnterCall::indent(); printf("Dec %s_%ld to %ld\n", _tn, _inst, _refCounter-1); if (--_refCounter == 0) delete this; } static void loc(RefCounter* in, const char* member) { _in = in; _member = member; } static unsigned long allocated() { return _allocated; } private: unsigned long _refCounter; const char* _tn; unsigned long _inst; static unsigned long _instance; static unsigned long _allocated; static RefCounter* _in; static const char* _member; }; unsigned long RefCounter::_instance = 0L; unsigned long RefCounter::_allocated = 0L; RefCounter* RefCounter::_in = 0; const char* RefCounter::_member = 0; /* SmartPtr is a class for smart reference counting. The isLoop method can be used to indicate that the reference is creating a loop. */ template class SmartPtr { public: SmartPtr() : _ptr(0), _loop(false) {} SmartPtr(T* ptr) : _ptr(ptr), _loop(false) { if (_ptr != 0) _ptr->inc(); } SmartPtr(const SmartPtr& ptr) : _ptr(ptr), _loop(false) { if (_ptr != 0) _ptr->inc(); } ~SmartPtr() { if (_ptr != 0 && !_loop) _ptr->dec(); } operator T*() const { return _ptr; } T* operator->() { return _ptr; } T& operator*() { return *_ptr; } SmartPtr& operator=(T* ptr) { if (ptr == _ptr) return *this; T* old_ptr = _ptr; _ptr = ptr; if (_ptr != 0) _ptr->inc(); if (old_ptr && !_loop) old_ptr->dec(); _loop = false; return *this; } SmartPtr& operator=(const SmartPtr& ptr) { return operator=(ptr._ptr); } void isLoop() { if (_ptr != 0) { if (!_loop) _ptr->dec(); _loop = true; } } private: T* _ptr; bool _loop; }; /* SmartPtrCollection is a class implementing a collection of smart pointers. It is implemented by a single linked list. */ template class SmartPtrCollection { struct Elem { Elem(T* val) : first(val), next(0) {} ~Elem() { delete next; } SmartPtr first; Elem* next; }; public: SmartPtrCollection() : first(0) { ref_next = &first; } ~SmartPtrCollection() { delete first; } bool empty() { return first == 0; } void add(T* ptr) { *ref_next = new Elem(ptr); ref_next = &(*ref_next)->next; } class iterator { public: iterator(const SmartPtrCollection& col) { elem = col.first; } bool more() { return elem != 0; } void next() { elem = elem->next; } T* operator*() { return elem->first; } private: Elem* elem; }; SmartPtrCollection& operator=(const SmartPtrCollection& rhs) { delete first; first = 0; ref_next = &first; for (Elem* it = rhs.first; it != 0; it = it->next) add(it->first); return *this; } private: Elem *first; Elem **ref_next; }; /**** Grammar description ****/ /* The grammar consists of a collection of non-terminals, where each non-terminal has a unique name consisting of a single capital letter and can have a number of rules. The root non-terminal has the name 'A'. A rule is represented by a string of characters, where each capital letter stands for a non-terminal and each other character stands for terminal. Each rule also has a name, which is used in the abstract parse tree. */ /* ParseRule is a class implementing a list of parse rules */ class ParseRule { friend class NonTerminal; ParseRule(const char* rule, const char* name, ParseRule* next) : _rule(rule), _next(next), _name(name) {} public: const char* rule() const { return _rule; } const char* name() const { return _name; } class iterator { public: iterator(const ParseRule* rules) { _cur = rules; } bool more() { return _cur != 0; } void next() { _cur = _cur->_next; } const char *rule() { return _cur->rule(); } const char *name() { return _cur->name(); } const ParseRule* operator*() { return _cur; } private: const ParseRule* _cur; }; private: const char* _rule; const char* _name; ParseRule* _next; }; /* NonTerminal is a class that implements a list of non-terminals */ class NonTerminal { public: NonTerminal(const char name, bool empty, NonTerminal* next) : _name(name), _empty(empty), _next(next), _rules(0) {} char name() const { return _name; } bool empty() const { return _empty; } class iterator { public: iterator(const NonTerminal* first) { _cur = first; } bool more() { return _cur != 0; } void next() { _cur = _cur->_next; } char name() const { return _cur->name(); } const NonTerminal* operator*() { return _cur; } ParseRule* rules() const { return _cur->_rules; } private: const NonTerminal* _cur; }; void addRule(const char* rule, const char* name) { _rules = new ParseRule(rule, name, _rules); } const ParseRule* rules() const { return _rules; } private: char _name; bool _empty; ParseRule* _rules; NonTerminal* _next; }; const NonTerminal* find(NonTerminal* nonTerminals, char name) { for (NonTerminal::iterator it(nonTerminals); it.more(); it.next()) if (it.name() == name) return *it; return 0; } /**** Abstract parse tree ****/ /* The abstract parse tree is represented by the class List where the children (if there are any) are stored in reverse order. */ class List : public RefCounter { public: List(const ParseRule* rule, List* children, List* prev) : RefCounter("List") { EnterCall enterCall("New List"); RefCounter::loc(this, "_rule"); _rule = rule; RefCounter::loc(this, "_children"); _children = children; RefCounter::loc(this, "_prev"); _prev = prev; } //const Tree* prev() const { return _prev; } void print(FILE *f) { if (_prev != 0) { _prev->print(f); fprintf(f, ","); } if (_rule != 0) { fprintf(f, "%s", _rule->name()); if (_children != 0) { fprintf(f, "("); _children->print(f); fprintf(f, ")"); } } else fprintf(f, ""); } private: const ParseRule *_rule; SmartPtr _children; SmartPtr _prev; }; /**** Parser state ****/ class ParsePointRule; class ParsePointNT : public RefCounter { public: ParsePointNT(const NonTerminal* nt) : RefCounter("ParsePointNT")/*, _nt(nt)*/ { EnterCall enterCall("New ParsePointNT"); RefCounter::loc(this, "_nt"); _nt = nt; } const NonTerminal* _nt; SmartPtrCollection _rule_uses; }; class ParsePointRule : public RefCounter { public: ParsePointRule(const ParseRule* parseRule, int pos, ParsePointNT* nt, List* prev_list) : RefCounter("ParsePointRule"), _parseRule(parseRule), _pos(pos)/*, _nt(nt), _prev_list(prev_list)*/ { EnterCall enterCall("New ParsePointRule"); RefCounter::loc(this, "_nt"); _nt = nt; RefCounter::loc(this, "_prev_list"); _prev_list = prev_list; } char val() { return _parseRule->rule()[_pos]; } const ParseRule* _parseRule; int _pos; SmartPtr _nt; SmartPtr _prev_list; }; class ParseConfiguration { public: ParseConfiguration(NonTerminal* nonTerminals) : _nonTerminals(nonTerminals) {} ParseConfiguration& operator=(const ParseConfiguration& rhs) { _nonTerminals = rhs._nonTerminals; _parsePointNonTerminals = rhs._parsePointNonTerminals; _parsePointRules = rhs._parsePointRules; return *this; } void addNonTerminal(const NonTerminal* nt, ParsePointRule* parsePointRule) { EnterCall call("addNonTerminal"); EnterCall::indent(); printf("non terminal = %c\n", nt->name()); for (SmartPtrCollection::iterator it(_parsePointNonTerminals); it.more(); it.next()) { ParsePointNT* ppnt = (*it); if (ppnt->_nt == nt) { // already included if (parsePointRule != 0) { ppnt->_rule_uses.add(parsePointRule); } return; } } ParsePointNT* ppnt = new ParsePointNT(nt); _parsePointNonTerminals.add(ppnt); if (parsePointRule != 0) { ppnt->_rule_uses.add(parsePointRule); } for (ParseRule::iterator it(nt->rules()); it.more(); it.next()) { List* prev_list = 0; addRule(*it, 0, ppnt, prev_list); } } bool addRule(const ParseRule* parseRule, int pos, ParsePointNT* ppnt, List* &prev_list) { for (; parseRule->rule()[pos] != '\0'; pos++) { ParsePointRule* pprule = new ParsePointRule(parseRule, pos, ppnt, prev_list); EnterCall::indent(); printf("new ParsePoint for rule %s at %d, name: %s\n", parseRule->rule(), pos, parseRule->name()); _parsePointRules.add(pprule); const NonTerminal* cnt = find(_nonTerminals, parseRule->rule()[pos]); if (cnt != 0) { addNonTerminal(cnt, pprule); if (pos == 0 && cnt == ppnt->_nt) pprule->_nt.isLoop(); } if (cnt == 0 || !cnt->empty()) return false; if (cnt != 0) prev_list = new List(0, 0, prev_list); } return true; } bool noParsePointRules() { return _parsePointRules.empty(); } void addStepParsePoint(const ParseRule* parseRule, int pos, ParsePointNT *ppnt, List* prev_list) { EnterCall call("addStepParsePoint"); EnterCall::indent(); printf("for %s at %d, name: %s\n", parseRule->rule(), pos, parseRule->name()); if (addRule(parseRule, pos, ppnt, prev_list)) { if (!ppnt->_rule_uses.empty()) { for (SmartPtrCollection::iterator it(ppnt->_rule_uses); it.more(); it.next()) { ParsePointRule *a_rule = (*it); List* new_list = new List(parseRule, prev_list, a_rule->_prev_list); addStepParsePoint(a_rule->_parseRule, a_rule->_pos + 1, a_rule->_nt, new_list); } } else { List* list = new List(parseRule, prev_list, 0); ParsePointRule* pprule = new ParsePointRule(parseRule, pos, ppnt, list); EnterCall::indent(); printf("new ParsePoint for rule %s at %d, name: %s\n", parseRule->rule(), pos, parseRule->name()); _parsePointRules.add(pprule); } } } void stepWith(char ch, ParseConfiguration& newParseConfiguration) { for (SmartPtrCollection::iterator it(_parsePointRules); it.more(); it.next()) { ParsePointRule* pprule = *it; if (pprule->val() == ch) { printf("Found match for rule %s at %d\n", pprule->_parseRule->rule(), pprule->_pos); newParseConfiguration.addStepParsePoint(pprule->_parseRule, pprule->_pos + 1, pprule->_nt, pprule->_prev_list); } } } void print(FILE* f) { for (SmartPtrCollection::iterator it(_parsePointRules); it.more(); it.next()) { ParsePointRule* pprule = *it; fprintf(f, " %c : ", pprule->_nt->_nt->name()); int i = 0; for (const char *s = pprule->_parseRule->rule(); *s != '\0'; s++, i++) { if (i == pprule->_pos) fprintf(f, "."); fprintf(f, "%c", *s); } if (i == pprule->_pos) fprintf(f, "."); fprintf(f, " (%s) ", pprule->_parseRule->name()); if (pprule->_prev_list != 0) pprule->_prev_list->print(f); fprintf(f, "\n"); } } private: NonTerminal* _nonTerminals; SmartPtrCollection _parsePointNonTerminals; SmartPtrCollection _parsePointRules; }; void parseInput(NonTerminal* nonTerminals, const char* line) { const NonTerminal* root_nt = find(nonTerminals, 'A'); if (root_nt == 0) { printf("Did not find non terminal A\n"); return; } ParseConfiguration curParseConfiguration(nonTerminals); curParseConfiguration.addNonTerminal(root_nt, 0); for (const char* s = line; *s != '\0'; ++s) { printf("State:\n"); curParseConfiguration.print(stdout); printf("Parsing: %s\n", s); ParseConfiguration newParseConfiguration(nonTerminals); if (curParseConfiguration.noParsePointRules()) { printf("Could not parse: %s\n", s); return; } curParseConfiguration.stepWith(*s, newParseConfiguration); printf("curParseConfiguration = newParseConfiguration;\n"); curParseConfiguration = newParseConfiguration; printf("teardown newParseConfiguration\n"); } curParseConfiguration.print(stdout); printf("teardown curParseConfiguration\n"); } int main(int argc, char *argv[]) { NonTerminal* nonTerminals = 0; nonTerminals = new NonTerminal('A', false, nonTerminals); nonTerminals->addRule("E!", "expr"); nonTerminals = new NonTerminal('E', false, nonTerminals); nonTerminals->addRule("E+E", "plus"); nonTerminals->addRule("Sx", "x"); nonTerminals = new NonTerminal('S', true, nonTerminals); nonTerminals->addRule("-", "sign"); parseInput(nonTerminals, "x+-x+x!"); printf("Still allocated: %ld\n", RefCounter::allocated()); return 0; }