From 9f28c5b7fb5d97d5b5def918978d4232e5dde31e Mon Sep 17 00:00:00 2001 From: Roland Reichwein Date: Thu, 12 Nov 2020 18:39:38 +0100 Subject: implement restore of head recursion --- grammer.cpp | 125 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 124 insertions(+), 1 deletion(-) (limited to 'grammer.cpp') diff --git a/grammer.cpp b/grammer.cpp index 3f3a0f1..4af1fd4 100644 --- a/grammer.cpp +++ b/grammer.cpp @@ -3,6 +3,7 @@ #include "debug.h" #include +#include #include using namespace Gram; @@ -71,7 +72,9 @@ void Compiler::DumpTree() auto [current_index, indent] {todo.front()}; todo.pop_front(); - std::string line(indent, ' '); + std::string line; + for (int i = 0; i < indent; i++) + line += "| "; if (ChildIdIsToken(current_index)) { index_t token_id {TokenIdFromChildId(current_index)}; line += "Token("s + std::to_string(token_id) + "): "s + tokens[token_id].type; @@ -387,6 +390,124 @@ Compiler::Compiler(BNF& bnf, const std::string& top) fillStartCache(); } +namespace { + + bool hasExtChild(index_t index, const std::vector& nodes) + { + if (index >= nodes.size()) + throw std::runtime_error("Node out of range at ext node detection: "s + std::to_string(index) + " vs. "s + std::to_string(nodes.size())); + + if (nodes[index].child_ids.size() < 1) // no childs + return false; + + int32_t child_id {nodes[index].child_ids.back()}; + + if (ChildIdIsToken(child_id)) // token can't be ext node + return false; + + if (child_id >= nodes.size()) + throw std::runtime_error("Child node out of range at ext node detection: "s + std::to_string(child_id) + " vs. "s + std::to_string(nodes.size())); + + return nodes[child_id].type.ends_with("-EXT"); + } + + // node to add depends on variant: + // std::string: node type + // returns: index of new node + index_t AddNode(std::vector& result, index_t parent, std::string type) + { + index_t node_pos{result.size()}; + + NodePosition pos{parent, 0}; + + if (result.size() > 0) { // i.e. we have a predecessor + pos.child_pos = result[parent].child_ids.size(); + result[parent].child_ids.push_back(node_pos); + } + + result.emplace_back(TreeNode{pos, node_pos, type, 0, {} }); + + return node_pos; + } + + // forward declaration + void visit(index_t nodes_index, const std::vector& nodes, index_t result_parent, std::vector& result); + + // copy child nodes, recursively + // nodes_index: node whose childs to copy to result + // skip: number of elements to skip at end + void copy_childs(index_t nodes_index, const std::vector& nodes, index_t result_parent, std::vector& result, int skip = 0) + { + for (int i = 0; i < int(nodes[nodes_index].child_ids.size()) - skip; i++) { + int32_t child_id{nodes[nodes_index].child_ids[i]}; + if (ChildIdIsNode(child_id)) + visit(child_id, nodes, result_parent, result); + else + result[result_parent].child_ids.push_back(child_id); + } + } + + // returns node to continue with + index_t visitExt(index_t nodes_index, const std::vector& nodes, index_t result_parent, std::vector& result) + { + std::string type{nodes[nodes_index].type}; + assert(type.ends_with("-EXT")); + type = type.substr(0, type.size() - 4); // remove "-EXT" from type + + if (hasExtChild(nodes_index, nodes)) { // ignore empty last "-EXT" nodes finally + // at this point, we have at least 1 last child ("-EXT"): visit it first + result_parent = visitExt(nodes[nodes_index].child_ids.back(), nodes, result_parent, result); + } + + size_t result_node_id{ AddNode(result, result_parent, type)}; // old parent is new child + + copy_childs(nodes_index, nodes, result_parent, result, 1); + + return result_node_id; + } + + // visit nodes recursively, adding to result + // index: index in nodes + void visit(index_t nodes_index, const std::vector& nodes, index_t result_parent, std::vector& result) + { + std::string type{nodes[nodes_index].type}; + + if (type.ends_with("-EXT") && nodes[nodes_index].child_ids.empty()) { + // ignore end of -EXT chain + } else if (hasExtChild(nodes_index, nodes)) { // resolve ext node + result_parent = visitExt(nodes[nodes_index].child_ids.back(), nodes, result_parent, result); + + if (!type.ends_with("-EXT")) // if first-in row, add childs here + copy_childs(nodes_index, nodes, result_parent, result, 1); + + } else { // normal node: just copy, recursively + + assert(!type.ends_with("-EXT")); + + result_parent = AddNode(result, result_parent, type); + copy_childs(nodes_index, nodes, result_parent, result); + } + } + + // At the start of compile() head recursion is removed. This leads to a + // different tree of nodes. This functions restores the head recursion in the + // nodes tree. + // Note: "variant" element of nodes will be all zero since it can't be + // restored easily + std::vector restoreHeadRecursion(const std::vector& nodes) + { + // traverse nodes (old), while reversing if tail recursion via -EXT is detected + + std::vector result; + + if (nodes.size() > 0) + visit(0, nodes, 0, result); + + return result; + } + +} // namespace + std::vector Compiler::compile(std::vector p_tokens) { clear(); @@ -401,6 +522,8 @@ std::vector Compiler::compile(std::vector p_tokens) if (!match(m_top, 0, tokens.size())) throw std::runtime_error("Compile error"); + nodes = restoreHeadRecursion(nodes); + DumpTree(); return nodes; -- cgit v1.2.3