summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--grammer.cpp74
-rw-r--r--grammer.h2
-rw-r--r--test-lexer.cpp4
3 files changed, 59 insertions, 21 deletions
diff --git a/grammer.cpp b/grammer.cpp
index 22b874e..3c7671b 100644
--- a/grammer.cpp
+++ b/grammer.cpp
@@ -74,14 +74,20 @@ void Compiler::DumpTree()
std::cout << "= Dump =======================================" << std::endl;
std::cout << "nodes.size()=" << nodes.size() << std::endl;
if (nodes.size() > 0) {
- std::cout << "--- Nodes ------------------------------------" << std::endl;
- std::cout << "root_node_id=" << root_node_id << std::endl;
- for (const auto& node : nodes) {
- std::cout << node.type << "(" << node.node_id << "):";
- for (const auto& child : node.child_ids) {
- std::cout << " " << child;
+ if (0) {
+ std::cout << "--- Nodes ------------------------------------" << std::endl;
+ std::cout << "root_node_id=" << root_node_id << std::endl;
+ for (const auto& node : nodes) {
+ std::cout << node.type << "(" << node.node_id << "):";
+ for (const auto& child : node.child_ids) {
+ std::cout << " ";
+ if (ChildIdIsToken(child))
+ std::cout << "t" << TokenIdFromChildId(child);
+ else
+ std::cout << child;
+ }
+ std::cout << std::endl;
}
- std::cout << std::endl;
}
std::cout << "--- Tree -------------------------------------" << std::endl;
@@ -133,6 +139,7 @@ std::vector<std::string>& Compiler::getNodeExpectedChilds(index_t node_id)
// returns true if all childs are filled, recursively. Else false with to_fill as hint to node to fill
bool Compiler::subTreeIsComplete(index_t node_id, index_t& to_fill)
{
+ // recurse first to get first subtree to_fill
for (const auto& i : nodes[node_id].child_ids) {
if (!ChildIdIsToken(i)) { // consider subtrees
if (!subTreeIsComplete(i, to_fill))
@@ -244,6 +251,39 @@ bool Compiler::AddRootNode()
return true;
}
+void Compiler::removeTokensUpTo(index_t token_id)
+{
+ removeTokensUpTo(token_id, root_node_id);
+}
+
+void Compiler::removeTokensUpTo(index_t token_id, index_t node_id)
+{
+ // std::cout << "DEBUG " << token_id << " " << tokens_used << std::endl;
+ // token_id should be the new tokens_used
+
+ if (token_id < tokens_used) {
+ auto& node{nodes[node_id]};
+ auto& child_ids {node.child_ids};
+
+ // remove relevant tokens from end
+ while (token_id < tokens_used && child_ids.size() && ChildIdIsToken(child_ids.back()) && TokenIdFromChildId(child_ids.back()) >= token_id) {
+ std::cout << "Removing token " << TokenIdFromChildId(child_ids.back()) << std::endl;
+ child_ids.pop_back();
+ if (tokens_used > 0)
+ tokens_used--;
+ else
+ throw std::runtime_error("ICE: Removing non-existing token at "s + std::to_string(node_id) + " ("s + node.type + ")"s);
+ }
+
+ // recurse from back, to remove tokens from end
+ for (auto i = child_ids.size() - 1; token_id < tokens_used && i >= 0; i--) {
+ if (!ChildIdIsToken(child_ids[i])) {
+ removeTokensUpTo(token_id, child_ids[i]);
+ }
+ }
+ }
+}
+
// Go back one step: Remove Node or Token
void Compiler::RemoveLastNode()
{
@@ -254,35 +294,31 @@ void Compiler::RemoveLastNode()
if (node.child_ids.empty()) { // No children -> now tree is empty
clear();
} else if (ChildIdIsToken(node.child_ids.back())) { // last token child: remove
- tokens_used--;
- node.child_ids.pop_back();
+ removeTokensUpTo(TokenIdFromChildId(node.child_ids.back()));
} else if (node.child_ids.size() == 1) { // One child: removing possible
if (!ChildIdIsToken(node.child_ids[0])) {
// node: set new root
nodes[node.child_ids[0]].parent_node_id = node.child_ids[0];
root_node_id = node.child_ids[0];
}
+ std::cout << "Removing root node " << nodes.back().type << "(" << nodes.back().node_id << ")" << std::endl;
nodes.pop_back();
- std::cout << "Removing " << nodes.back().type << "(" << nodes.back().node_id << ")" << std::endl;
} else {
DumpTree();
- throw std::runtime_error("Backtrack not possible: Root not empty"); // ICE
+ throw std::runtime_error("ICE: Backtrack not possible: Root not empty");
}
} else if (node.child_ids.empty()) { // No children -> remove leaf
// We have a parent, otherwise we would have taken previous branch
TreeNode& parent {nodes[node.parent_node_id]};
if (parent.child_ids.empty() || parent.child_ids.back() != node_id)
- throw std::runtime_error("Backtrack: Bad child nodes"); // ICE
+ throw std::runtime_error("ICE: Backtrack: Bad child nodes");
parent.child_ids.pop_back();
+ std::cout << "Removing " << nodes.back().type << "(" << nodes.back().node_id << ")" << std::endl;
nodes.pop_back();
} else if (ChildIdIsToken(node.child_ids.back())) {
- node.child_ids.pop_back();
- if (tokens_used > 0)
- tokens_used--;
- else
- throw std::runtime_error("Parse error at "s + std::to_string(node_id) + " ("s + node.type + ")"s);
+ removeTokensUpTo(TokenIdFromChildId(node.child_ids.back()));
} else { // In the middle
- throw std::runtime_error("Backtrack in the middle of the tree: "s + std::to_string(node_id) + " ("s + node.type + ")"s); // ICE
+ throw std::runtime_error("ICE: Backtrack in the middle of the tree: "s + std::to_string(node_id) + " ("s + node.type + ")"s);
}
DumpTree();
}
@@ -293,7 +329,7 @@ void Compiler::ChangeNodeType()
TreeNode& node {nodes.back()};
if (node.alternatives.empty())
- throw std::runtime_error("No alternatives found during Backtrack"); // ICE
+ throw std::runtime_error("ICE: No alternatives found during Backtrack");
auto& [type, variant] {node.alternatives.front()};
diff --git a/grammer.h b/grammer.h
index 4f0d235..f6f9d95 100644
--- a/grammer.h
+++ b/grammer.h
@@ -55,6 +55,8 @@ private:
size_t CommonPrefix(const std::vector<Token>& tokens, const std::vector<std::string>& types);
void AddFirstNode();
bool AddRootNode();
+ void removeTokensUpTo(index_t token_id);
+ void removeTokensUpTo(index_t token_id, index_t node_id);
void RemoveLastNode();
void ChangeNodeType();
void TrackBack();
diff --git a/test-lexer.cpp b/test-lexer.cpp
index 2eeaef8..05633d0 100644
--- a/test-lexer.cpp
+++ b/test-lexer.cpp
@@ -61,7 +61,7 @@ TEST_F(Test, BNF) {
// implicit?
//std::set<std::string> Terminals{"identifier", "=", ";"};
- std::string Code{"a = bc ; c = 123 ; esd = Ff ; 1 = XYZ ; "};
+ std::string Code{"a = bc ; c = 123 ; esd = Ff ; 1 = XYZ "};
std::vector<Token> tokens_reference{
{"identifier", "a", { 1, 1} },
{"preprocessing-op-or-punc", "=", { 1, 3}},
@@ -78,7 +78,7 @@ TEST_F(Test, BNF) {
{"pp-number", "1", { 1, 31}},
{"preprocessing-op-or-punc", "=", { 1, 33}},
{"identifier", "XYZ", { 1, 35}},
- {"preprocessing-op-or-punc", ";", { 1, 39}},
+ //{"preprocessing-op-or-punc", ";", { 1, 39}},
};
Lex::Lexer lexer(LexBNF, LexTop);