From b01052f96c107603a30663c47308cadf7c30d94d Mon Sep 17 00:00:00 2001 From: Roland Reichwein Date: Sat, 18 Jan 2020 23:13:56 +0100 Subject: Fix lex bnf --- minicc.cpp | 160 +++++++++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 123 insertions(+), 37 deletions(-) (limited to 'minicc.cpp') diff --git a/minicc.cpp b/minicc.cpp index 94bae72..d147baa 100644 --- a/minicc.cpp +++ b/minicc.cpp @@ -33,16 +33,16 @@ std::vector GetPath(std::string Token, BNF ReverseBNF, std::string } auto Reverse(BNF bnf){ - std::map> result; + std::map> result; for (const auto& [from, to] : bnf) { for (const auto& list : to) { for (const auto& element : list) { auto i{result.find(element)}; if (i != result.end()) // already present - i->second.push_back(from); + i->second.insert(from); else // new element - result.emplace(element, std::vector{1, from}); + result.emplace(element, std::set{from}); } } } @@ -68,10 +68,11 @@ private: public: bool Valid(const std::string& Top) const { - // Start symbol on top + // A token is non empty if (node_num == 0) return false; + // Start symbol on top auto rootNode{nodes.find(root)}; if (rootNode == nodes.end()) throw std::runtime_error("Node not found: "s + std::to_string(root)); @@ -88,24 +89,98 @@ public: return true; } - // try to add node - bool Add(char c, const BNF& bnf, const std::map>& reverseBNF) { + bool AddFirstNode(char c, const BNF& bnf, const std::map>& reverseBNF) { + node_num ++; + root = node_num; + last = node_num; + std::string node_name(1, char(c)); + + auto reverseRule{reverseBNF.find(node_name)}; + if (reverseRule == reverseBNF.end()) + throw std::runtime_error("Reverse rule not found: "s + node_name); + + std::vector children; // default: no children for terminal + auto rule{bnf.find(node_name)}; + if (rule != bnf.end()) { // multiple variants! + bool hit{false}; + for (const auto& i : rule->second) { + if (i.size() > 0 && i[0] == node_name) { + if (hit) + throw std::runtime_error("Multiple matching rules found for "s + node_name); + children = i; + hit = true; + } + } + if (!hit) + throw std::runtime_error("No matching rule found for "s + node_name); + } + + nodes.emplace(root, TreeNode{0, std::vector{}, children, node_name}); + return true; + } + + // try to add character to tree + bool Add(char c, const BNF& bnf, const std::map>& reverseBNF) { if (nodes.empty()) { // first node - node_num ++; - root = node_num; - last = node_num; - std::string node_name{1, c}; - auto rule{reverseBNF.find(node_name)}; - if (rule == reverseBNF.end()) - throw std::runtime_error("Rule not found: "s + node_name); - nodes.emplace(root, TreeNode{0, std::vector{}, std::vector{}, node_name}); - return true; - } else { - // TODO + return AddFirstNode(c, bnf, reverseBNF); + } else { // at least one character is already present + // Traverse tree until partially filled node found + // or new node can be added + index_t current_index{last}; + + while (current_index != 0) { + TreeNode& node {nodes[current_index]}; + if (node.childs.size() < node.child_names.size()) { // partially filled node + throw std::runtime_error("TODO: partially filled node"); + } + current_index = node.parent; + } + throw std::runtime_error("TODO: need to add node"); + } return false; } + // add path to start symbol + void Resolve(const BNF& bnf, const std::map>& reverseBNF) { + if (nodes.empty()) // only handle non-empty trees + return; + + while (true) { + std::string& old_root_name { nodes[root].name }; // current root node name + + auto parents {reverseBNF.find(old_root_name)}; + if (parents != reverseBNF.end()) { // parents in bnf available + bool hit{false}; + for (auto& parent : parents->second) { + for (const auto& list : bnf.at(parent)) { + if (list.size() == 1 && list[0] == old_root_name) { + if (!hit) { + const std::string& new_root_name {parent}; + // Add new TreeNode in the direction to root: + // New root with 1 child (old root) + nodes.emplace(++node_num, + TreeNode{0, // parent + std::vector{root}, // child indices + std::vector{old_root_name}, // child names + new_root_name // name + }); + nodes[root].parent = node_num; + root = node_num; + // this->last stays + hit = true; + } else + throw std::runtime_error("Error: Multiple resolve nodes for "s + old_root_name); + } + } + } + if (!hit) + break; + } else + break; + } + } + }; class Lexer @@ -115,17 +190,18 @@ private: const BNF &bnf; const std::string& Top; - std::map> ReverseBNF; + std::map> ReverseBNF; // to be called on token end - void CheckCandidates(std::deque& candidates, std::string& token, std::vector& result) + void FinalizeCandidates(std::deque& candidates, std::string& token, std::vector& result) { if (candidates.empty()) { // skip if (!token.empty()) throw std::runtime_error("Expected empty token, got "s + token); } else { // check candidates bool valid{false}; - for (const auto& ct : candidates) { + for (auto& ct : candidates) { + ct.Resolve(bnf, ReverseBNF); if (ct.Valid(Top)) { if (valid) throw std::runtime_error("Found ambiguous token "s + token); @@ -141,6 +217,16 @@ private: } } + void AddCandidate(char c, std::deque& candidates) { + // new candidate: starting with c + auto element {ReverseBNF.find(std::string(1, c))}; + if (element != ReverseBNF.end()) { + Tree newTree; + if (newTree.Add(c, bnf, ReverseBNF)) + candidates.push_back(newTree); + } + } + public: Lexer(const BNF& bnf, const std::string& Top): bnf(bnf), Top(Top), ReverseBNF{Reverse(bnf)} { @@ -156,9 +242,10 @@ public: for (size_t pos{0}; pos < s.size(); pos++) { char c{s[pos]}; + std::cout << "Char: |" << c << "|" << std::endl; if (Whitespace.find(c) != std::string::npos) { // found whitespace character // evaluate token up to now and skip whitespace - CheckCandidates(candidates, token, result); + FinalizeCandidates(candidates, token, result); } else { // no whitespace: try to add to tree std::deque EraseList; int i = 0; @@ -169,26 +256,21 @@ public: } } - if (candidates.size() - EraseList.size() > 0) { // Added to some candidates: Continue growing token - token.push_back(c); + if (token.empty()) { + AddCandidate(c, candidates); + } else if (candidates.size() - EraseList.size() > 0) { // added to some candidates: Erase invalidated ones for (const auto& i : EraseList) candidates.erase(candidates.begin() + i); } else { // no candidates left: new tree - CheckCandidates(candidates, token, result); - } - - // new candidate: starting with c - auto element {ReverseBNF.find(std::string(1, c))}; - if (element != ReverseBNF.end()) { - Tree newTree; - if (newTree.Add(c, bnf, ReverseBNF)) - candidates.push_back(newTree); + FinalizeCandidates(candidates, token, result); + AddCandidate(c, candidates); } + token.push_back(c); } } // Final evaluation of last token - CheckCandidates(candidates, token, result); + FinalizeCandidates(candidates, token, result); return result; } @@ -232,9 +314,12 @@ TEST_F(Test, BNF) { {"identifier", {{"identifier-nondigit"}, {"identifier", "identifier-nondigit"}, {"identifier", "digit"}}}, - {"digit", {{"0", "1", "2", "3", "4", "5", "6", "7", "8", "9" }}}, - {"identifier-nondigit", {{"a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", - "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "_"}}}, + {"digit", {{"0"}, {"1"}, {"2"}, {"3"}, {"4"}, {"5"}, {"6"}, {"7"}, {"8"}, {"9"} }}, + {"identifier-nondigit", + {{"a"}, {"b"}, {"c"}, {"d"}, {"e"}, {"f"}, {"g"}, {"h"}, {"i"}, {"j"}, {"k"}, {"l"}, {"m"}, + {"n"}, {"o"}, {"p"}, {"q"}, {"r"}, {"s"}, {"t"}, {"u"}, {"v"}, {"w"}, {"x"}, {"y"}, {"z"}, + {"A"}, {"B"}, {"C"}, {"D"}, {"E"}, {"F"}, {"G"}, {"H"}, {"I"}, {"J"}, {"K"}, {"L"}, {"M"}, + {"N"}, {"O"}, {"P"}, {"Q"}, {"R"}, {"S"}, {"T"}, {"U"}, {"V"}, {"W"}, {"X"}, {"Y"}, {"Z"}, {"_"}}}, {"preprocessing-op-or-punc", {{";"}, {"="}}}, {"pp-number", {{"digit"}, @@ -256,10 +341,11 @@ TEST_F(Test, BNF) { Lexer lexer(LexBNF, LexTop); auto tokens = lexer.Lex(Code); - +#if 1 for (const auto& i: tokens) { std::cout << i << std::endl; } +#endif //auto Program = Compile(tokens, Top, bnf, Terminals); } -- cgit v1.2.3