diff options
-rw-r--r-- | grammer.cpp | 74 | ||||
-rw-r--r-- | grammer.h | 2 | ||||
-rw-r--r-- | test-lexer.cpp | 4 |
3 files changed, 59 insertions, 21 deletions
diff --git a/grammer.cpp b/grammer.cpp index 22b874e..3c7671b 100644 --- a/grammer.cpp +++ b/grammer.cpp @@ -74,14 +74,20 @@ void Compiler::DumpTree() std::cout << "= Dump =======================================" << std::endl; std::cout << "nodes.size()=" << nodes.size() << std::endl; if (nodes.size() > 0) { - std::cout << "--- Nodes ------------------------------------" << std::endl; - std::cout << "root_node_id=" << root_node_id << std::endl; - for (const auto& node : nodes) { - std::cout << node.type << "(" << node.node_id << "):"; - for (const auto& child : node.child_ids) { - std::cout << " " << child; + if (0) { + std::cout << "--- Nodes ------------------------------------" << std::endl; + std::cout << "root_node_id=" << root_node_id << std::endl; + for (const auto& node : nodes) { + std::cout << node.type << "(" << node.node_id << "):"; + for (const auto& child : node.child_ids) { + std::cout << " "; + if (ChildIdIsToken(child)) + std::cout << "t" << TokenIdFromChildId(child); + else + std::cout << child; + } + std::cout << std::endl; } - std::cout << std::endl; } std::cout << "--- Tree -------------------------------------" << std::endl; @@ -133,6 +139,7 @@ std::vector<std::string>& Compiler::getNodeExpectedChilds(index_t node_id) // returns true if all childs are filled, recursively. Else false with to_fill as hint to node to fill bool Compiler::subTreeIsComplete(index_t node_id, index_t& to_fill) { + // recurse first to get first subtree to_fill for (const auto& i : nodes[node_id].child_ids) { if (!ChildIdIsToken(i)) { // consider subtrees if (!subTreeIsComplete(i, to_fill)) @@ -244,6 +251,39 @@ bool Compiler::AddRootNode() return true; } +void Compiler::removeTokensUpTo(index_t token_id) +{ + removeTokensUpTo(token_id, root_node_id); +} + +void Compiler::removeTokensUpTo(index_t token_id, index_t node_id) +{ + // std::cout << "DEBUG " << token_id << " " << tokens_used << std::endl; + // token_id should be the new tokens_used + + if (token_id < tokens_used) { + auto& node{nodes[node_id]}; + auto& child_ids {node.child_ids}; + + // remove relevant tokens from end + while (token_id < tokens_used && child_ids.size() && ChildIdIsToken(child_ids.back()) && TokenIdFromChildId(child_ids.back()) >= token_id) { + std::cout << "Removing token " << TokenIdFromChildId(child_ids.back()) << std::endl; + child_ids.pop_back(); + if (tokens_used > 0) + tokens_used--; + else + throw std::runtime_error("ICE: Removing non-existing token at "s + std::to_string(node_id) + " ("s + node.type + ")"s); + } + + // recurse from back, to remove tokens from end + for (auto i = child_ids.size() - 1; token_id < tokens_used && i >= 0; i--) { + if (!ChildIdIsToken(child_ids[i])) { + removeTokensUpTo(token_id, child_ids[i]); + } + } + } +} + // Go back one step: Remove Node or Token void Compiler::RemoveLastNode() { @@ -254,35 +294,31 @@ void Compiler::RemoveLastNode() if (node.child_ids.empty()) { // No children -> now tree is empty clear(); } else if (ChildIdIsToken(node.child_ids.back())) { // last token child: remove - tokens_used--; - node.child_ids.pop_back(); + removeTokensUpTo(TokenIdFromChildId(node.child_ids.back())); } else if (node.child_ids.size() == 1) { // One child: removing possible if (!ChildIdIsToken(node.child_ids[0])) { // node: set new root nodes[node.child_ids[0]].parent_node_id = node.child_ids[0]; root_node_id = node.child_ids[0]; } + std::cout << "Removing root node " << nodes.back().type << "(" << nodes.back().node_id << ")" << std::endl; nodes.pop_back(); - std::cout << "Removing " << nodes.back().type << "(" << nodes.back().node_id << ")" << std::endl; } else { DumpTree(); - throw std::runtime_error("Backtrack not possible: Root not empty"); // ICE + throw std::runtime_error("ICE: Backtrack not possible: Root not empty"); } } else if (node.child_ids.empty()) { // No children -> remove leaf // We have a parent, otherwise we would have taken previous branch TreeNode& parent {nodes[node.parent_node_id]}; if (parent.child_ids.empty() || parent.child_ids.back() != node_id) - throw std::runtime_error("Backtrack: Bad child nodes"); // ICE + throw std::runtime_error("ICE: Backtrack: Bad child nodes"); parent.child_ids.pop_back(); + std::cout << "Removing " << nodes.back().type << "(" << nodes.back().node_id << ")" << std::endl; nodes.pop_back(); } else if (ChildIdIsToken(node.child_ids.back())) { - node.child_ids.pop_back(); - if (tokens_used > 0) - tokens_used--; - else - throw std::runtime_error("Parse error at "s + std::to_string(node_id) + " ("s + node.type + ")"s); + removeTokensUpTo(TokenIdFromChildId(node.child_ids.back())); } else { // In the middle - throw std::runtime_error("Backtrack in the middle of the tree: "s + std::to_string(node_id) + " ("s + node.type + ")"s); // ICE + throw std::runtime_error("ICE: Backtrack in the middle of the tree: "s + std::to_string(node_id) + " ("s + node.type + ")"s); } DumpTree(); } @@ -293,7 +329,7 @@ void Compiler::ChangeNodeType() TreeNode& node {nodes.back()}; if (node.alternatives.empty()) - throw std::runtime_error("No alternatives found during Backtrack"); // ICE + throw std::runtime_error("ICE: No alternatives found during Backtrack"); auto& [type, variant] {node.alternatives.front()}; @@ -55,6 +55,8 @@ private: size_t CommonPrefix(const std::vector<Token>& tokens, const std::vector<std::string>& types); void AddFirstNode(); bool AddRootNode(); + void removeTokensUpTo(index_t token_id); + void removeTokensUpTo(index_t token_id, index_t node_id); void RemoveLastNode(); void ChangeNodeType(); void TrackBack(); diff --git a/test-lexer.cpp b/test-lexer.cpp index 2eeaef8..05633d0 100644 --- a/test-lexer.cpp +++ b/test-lexer.cpp @@ -61,7 +61,7 @@ TEST_F(Test, BNF) { // implicit? //std::set<std::string> Terminals{"identifier", "=", ";"}; - std::string Code{"a = bc ; c = 123 ; esd = Ff ; 1 = XYZ ; "}; + std::string Code{"a = bc ; c = 123 ; esd = Ff ; 1 = XYZ "}; std::vector<Token> tokens_reference{ {"identifier", "a", { 1, 1} }, {"preprocessing-op-or-punc", "=", { 1, 3}}, @@ -78,7 +78,7 @@ TEST_F(Test, BNF) { {"pp-number", "1", { 1, 31}}, {"preprocessing-op-or-punc", "=", { 1, 33}}, {"identifier", "XYZ", { 1, 35}}, - {"preprocessing-op-or-punc", ";", { 1, 39}}, + //{"preprocessing-op-or-punc", ";", { 1, 39}}, }; Lex::Lexer lexer(LexBNF, LexTop); |