/* ParParser Copyright (C) 2016 Frans Faase Example of a parallel parsing algorithm. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. GNU General Public License: http://www.iwriteiam.nl/GNU.txt Details: http://www.iwriteiam.nl/D1608.html#9b */ #include #include class EnterCall { public: EnterCall(const char *name) : _name(name) { printf("%*.*sEnter %s\n", _depth, _depth, "", _name); _depth++; if (_depth > 20) exit(-1); } ~EnterCall() { _depth--; printf("%*.*sLeave %s\n", _depth, _depth, "", _name); } static void indent() { printf("%*.*s", _depth, _depth, ""); } private: const char* _name; static long _depth; }; long EnterCall::_depth = 0; class RefCounter { public: RefCounter() : _refCounter(1) {} virtual ~RefCounter() {} void inc() { _refCounter++; } void dec() { if (--_refCounter == 0) delete this; } private: unsigned long _refCounter; }; template class SmartPtr { public: SmartPtr() : _ptr(0) {} SmartPtr(T* ptr) : _ptr(ptr) { if (_ptr != 0) _ptr->inc(); } SmartPtr(SmartPtr& ptr) : _ptr(ptr.val()) { if (_ptr != 0) _ptr->inc(); } ~SmartPtr() { if (_ptr != 0) _ptr->dec(); } operator T*() { return _ptr; } T* operator->() { return _ptr; } T& operator*() { return *_ptr; } SmartPtr& operator=(T* ptr) { if (ptr == _ptr) return *this; T* old_ptr = _ptr; _ptr = ptr; if (_ptr != 0) _ptr->inc(); if (old_ptr) old_ptr->dec(); return *this; } private: T* _ptr; }; class ParseRule { friend class NonTerminal; ParseRule(const char* rule, const char* name, ParseRule* next) : _rule(rule), _next(next), _name(name) {} public: const char* rule() const { return _rule; } const char* name() const { return _name; } class iterator { public: iterator(const ParseRule* first) { _cur = first; } bool more() { return _cur != 0; } void next() { _cur = _cur->_next; } const char *rule() { return _cur->rule(); } const char *name() { return _cur->name(); } const ParseRule* operator*() { return _cur; } private: const ParseRule* _cur; }; private: const char* _rule; const char* _name; ParseRule* _next; }; class NonTerminal { public: NonTerminal(const char name, bool empty, NonTerminal* next) : _name(name), _empty(empty), _next(next), _rules(0) {} char name() const { return _name; } bool empty() const { return _empty; } class iterator { public: iterator(const NonTerminal* first) { _cur = first; } bool more() { return _cur != 0; } void next() { _cur = _cur->_next; } char name() const { return _cur->name(); } const NonTerminal* operator*() { return _cur; } ParseRule* rules() const { return _cur->_rules; } private: const NonTerminal* _cur; }; class rulesIterator { public: rulesIterator(const NonTerminal* nt) { _cur = nt->_rules; } bool more() { return _cur != 0; } void next() { _cur = _cur->_next; } const ParseRule* operator*() { return _cur; } private: const ParseRule* _cur; }; void addRule(const char* rule, const char* name) { _rules = new ParseRule(rule, name, _rules); } private: char _name; bool _empty; ParseRule* _rules; NonTerminal* _next; }; const NonTerminal* find(NonTerminal* nonTerminals, char name) { for (NonTerminal::iterator it(nonTerminals); it.more(); it.next()) if (it.name() == name) return *it; return 0; } class List; class Tree : public RefCounter { public: Tree(const ParseRule* rule, List* children) : _rule(rule), _children(children) {} void print(FILE *f); private: const ParseRule *_rule; SmartPtr _children; }; class List : public RefCounter { public: List(Tree* tree, List* prev) : _tree(tree), _prev(prev) {} void print(FILE *f); private: SmartPtr _tree; SmartPtr _prev; }; void Tree::print(FILE *f) { fprintf(f, "%s", _rule->name()); if (_children != 0) { fprintf(f, "("); _children->print(f); fprintf(f, ")"); } } void List::print(FILE *f) { if (_prev != 0) { _prev->print(f); fprintf(f, ","); } if (_tree != 0) _tree->print(f); else fprintf(f, ""); } class ParsePointRule; class ParsePointNT : public RefCounter { //friend class ParseConfiguration; public: ParsePointNT(const NonTerminal* nt, ParsePointNT* next) : _nt(nt), _next_in_config(next) {} class iterator { public: iterator(ParsePointNT *nonTerminals) : _cur(nonTerminals) {} bool more() const { return _cur != 0; } void next() { _cur = _cur->_next_in_config; } ParsePointNT* operator*() const { return _cur; } private: ParsePointNT* _cur; }; const NonTerminal* _nt; SmartPtr _rule_uses; SmartPtr _next_in_config; }; class ParsePointRule : public RefCounter { public: ParsePointRule(const ParseRule* parseRule, int pos, ParsePointNT* nt, ParsePointRule* next, List* prev) : _parseRule(parseRule), _pos(pos), _nt(nt), _next_in_config(next), _prev(prev) {} class iterator { public: iterator(ParsePointRule *rule) : _cur(rule) {} bool more() const { return _cur != 0; } void next() { _cur = _cur->_next_in_config; } ParsePointRule* operator*() const { return _cur; } private: ParsePointRule* _cur; }; char val() { return _parseRule->rule()[_pos]; } const ParseRule* _parseRule; int _pos; SmartPtr _nt; SmartPtr _next_us_nt; SmartPtr _next_in_config; SmartPtr _prev; }; class ParseConfiguration { public: ParseConfiguration(NonTerminal* nonTerminals) : _nonTerminals(nonTerminals) {} void addNonTerminal(const NonTerminal* nt, ParsePointRule* parsePointRule) { EnterCall call("addNonTerminal"); EnterCall::indent(); printf("non terminal = %c\n", nt->name()); for (ParsePointNT::iterator it(_parsePointNonTerminals); it.more(); it.next()) { ParsePointNT* ppnt = (*it); if (ppnt->_nt == nt) { // already included if (parsePointRule != 0) { parsePointRule->_next_us_nt = ppnt->_rule_uses; ppnt->_rule_uses = parsePointRule; } return; } } ParsePointNT* ppnt = new ParsePointNT(nt, _parsePointNonTerminals); _parsePointNonTerminals = ppnt; if (parsePointRule != 0) { parsePointRule->_next_us_nt = ppnt->_rule_uses; ppnt->_rule_uses = parsePointRule; } for (NonTerminal::rulesIterator it(nt); it.more(); it.next()) { List* prev = 0; addRule(*it, 0, ppnt, prev); } } bool addRule(const ParseRule* parseRule, int pos, ParsePointNT* ppnt, List* &prev) { for (; parseRule->rule()[pos] != '\0'; pos++) { ParsePointRule* pprule = new ParsePointRule(parseRule, pos, ppnt, _parsePointRules, prev); EnterCall::indent(); printf("new ParsePoint for rule %s at %d, name: %s\n", parseRule->rule(), pos, parseRule->name()); _parsePointRules = pprule; const NonTerminal* cnt = find(_nonTerminals, parseRule->rule()[pos]); if (cnt != 0) addNonTerminal(cnt, pprule); if (cnt == 0 || !cnt->empty()) return false; if (cnt != 0) prev = new List(0, prev); } return true; } bool noParsePointRules() { return (ParsePointRule*)_parsePointRules == 0; } void addStepParsePoint(const ParseRule* parseRule, int pos, ParsePointNT *ppnt, List* prev) { EnterCall call("addStepParsePoint"); EnterCall::indent(); printf("for %s at %d, name: %s\n", parseRule->rule(), pos, parseRule->name()); if (addRule(parseRule, pos, ppnt, prev)) { Tree* new_tree = new Tree(parseRule, prev); if (ppnt->_rule_uses != 0) { for (ParsePointRule* a_rule = ppnt->_rule_uses; a_rule != 0; a_rule = a_rule->_next_us_nt) { List* new_list = new List(new_tree, a_rule->_prev); addStepParsePoint(a_rule->_parseRule, a_rule->_pos + 1, a_rule->_nt, new_list); } } else { List* list = new List(new_tree, 0); ParsePointRule* pprule = new ParsePointRule(parseRule, pos, ppnt, _parsePointRules, list); EnterCall::indent(); printf("new ParsePoint for rule %s at %d, name: %s\n", parseRule->rule(), pos, parseRule->name()); _parsePointRules = pprule; } } } void stepWith(char ch, ParseConfiguration& newParseConfiguration) { for (ParsePointRule::iterator it(_parsePointRules); it.more(); it.next()) { ParsePointRule* pprule = *it; if (pprule->val() == ch) { printf("Found match for rule %s at %d\n", pprule->_parseRule->rule(), pprule->_pos); newParseConfiguration.addStepParsePoint(pprule->_parseRule, pprule->_pos + 1, pprule->_nt, pprule->_prev); } } } void print(FILE* f) { for (ParsePointRule::iterator it(_parsePointRules); it.more(); it.next()) { ParsePointRule* pprule = *it; fprintf(f, " %c : ", pprule->_nt->_nt->name()); int i = 0; for (const char *s = pprule->_parseRule->rule(); *s != '\0'; s++, i++) { if (i == pprule->_pos) fprintf(f, "."); fprintf(f, "%c", *s); } if (i == pprule->_pos) fprintf(f, "."); fprintf(f, " (%s) ", pprule->_parseRule->name()); if (pprule->_prev != 0) pprule->_prev->print(f); fprintf(f, "\n"); } } private: NonTerminal* _nonTerminals; SmartPtr _parsePointNonTerminals; SmartPtr _parsePointRules; }; void parseInput(NonTerminal* nonTerminals, const char* line) { const NonTerminal* root_nt = find(nonTerminals, 'A'); if (root_nt == 0) { printf("Did not find non terminal A\n"); return; } ParseConfiguration curParseConfiguration(nonTerminals); curParseConfiguration.addNonTerminal(root_nt, 0); for (const char* s = line; *s != '\0'; ++s) { printf("State:\n"); curParseConfiguration.print(stdout); printf("Parsing: %s\n", s); ParseConfiguration newParseConfiguration(nonTerminals); if (curParseConfiguration.noParsePointRules()) { printf("Could not parse: *s\n", s); return; } curParseConfiguration.stepWith(*s, newParseConfiguration); curParseConfiguration = newParseConfiguration; } curParseConfiguration.print(stdout); } int main(int argc, char *argv[]) { NonTerminal* nonTerminals = 0; nonTerminals = new NonTerminal('A', false, nonTerminals); nonTerminals->addRule("E", "expr"); nonTerminals = new NonTerminal('E', false, nonTerminals); nonTerminals->addRule("Sx", "x"); nonTerminals->addRule("E+E", "plus"); nonTerminals = new NonTerminal('S', true, nonTerminals); nonTerminals->addRule("-", "sign"); parseInput(nonTerminals, "x+-x+x"); return 0; }