summaryrefslogtreecommitdiffhomepage
path: root/grammer.cpp
diff options
context:
space:
mode:
authorRoland Reichwein <mail@reichwein.it>2020-11-12 18:39:38 +0100
committerRoland Reichwein <mail@reichwein.it>2020-11-12 18:39:38 +0100
commit9f28c5b7fb5d97d5b5def918978d4232e5dde31e (patch)
treecc03a4824e53531cca0b1a0fdc87a1278e42d493 /grammer.cpp
parent32e19781c554c83643fcab4c4f39a6a552c367f5 (diff)
implement restore of head recursion
Diffstat (limited to 'grammer.cpp')
-rw-r--r--grammer.cpp125
1 files changed, 124 insertions, 1 deletions
diff --git a/grammer.cpp b/grammer.cpp
index 3f3a0f1..4af1fd4 100644
--- a/grammer.cpp
+++ b/grammer.cpp
@@ -3,6 +3,7 @@
#include "debug.h"
#include <algorithm>
+#include <cassert>
#include <limits>
using namespace Gram;
@@ -71,7 +72,9 @@ void Compiler::DumpTree()
auto [current_index, indent] {todo.front()};
todo.pop_front();
- std::string line(indent, ' ');
+ std::string line;
+ for (int i = 0; i < indent; i++)
+ line += "| ";
if (ChildIdIsToken(current_index)) {
index_t token_id {TokenIdFromChildId(current_index)};
line += "Token("s + std::to_string(token_id) + "): "s + tokens[token_id].type;
@@ -387,6 +390,124 @@ Compiler::Compiler(BNF& bnf, const std::string& top)
fillStartCache();
}
+namespace {
+
+ bool hasExtChild(index_t index, const std::vector<TreeNode>& nodes)
+ {
+ if (index >= nodes.size())
+ throw std::runtime_error("Node out of range at ext node detection: "s + std::to_string(index) + " vs. "s + std::to_string(nodes.size()));
+
+ if (nodes[index].child_ids.size() < 1) // no childs
+ return false;
+
+ int32_t child_id {nodes[index].child_ids.back()};
+
+ if (ChildIdIsToken(child_id)) // token can't be ext node
+ return false;
+
+ if (child_id >= nodes.size())
+ throw std::runtime_error("Child node out of range at ext node detection: "s + std::to_string(child_id) + " vs. "s + std::to_string(nodes.size()));
+
+ return nodes[child_id].type.ends_with("-EXT");
+ }
+
+ // node to add depends on variant:
+ // std::string: node type
+ // returns: index of new node
+ index_t AddNode(std::vector<TreeNode>& result, index_t parent, std::string type)
+ {
+ index_t node_pos{result.size()};
+
+ NodePosition pos{parent, 0};
+
+ if (result.size() > 0) { // i.e. we have a predecessor
+ pos.child_pos = result[parent].child_ids.size();
+ result[parent].child_ids.push_back(node_pos);
+ }
+
+ result.emplace_back(TreeNode{pos, node_pos, type, 0, {} });
+
+ return node_pos;
+ }
+
+ // forward declaration
+ void visit(index_t nodes_index, const std::vector<TreeNode>& nodes, index_t result_parent, std::vector<TreeNode>& result);
+
+ // copy child nodes, recursively
+ // nodes_index: node whose childs to copy to result
+ // skip: number of elements to skip at end
+ void copy_childs(index_t nodes_index, const std::vector<TreeNode>& nodes, index_t result_parent, std::vector<TreeNode>& result, int skip = 0)
+ {
+ for (int i = 0; i < int(nodes[nodes_index].child_ids.size()) - skip; i++) {
+ int32_t child_id{nodes[nodes_index].child_ids[i]};
+ if (ChildIdIsNode(child_id))
+ visit(child_id, nodes, result_parent, result);
+ else
+ result[result_parent].child_ids.push_back(child_id);
+ }
+ }
+
+ // returns node to continue with
+ index_t visitExt(index_t nodes_index, const std::vector<TreeNode>& nodes, index_t result_parent, std::vector<TreeNode>& result)
+ {
+ std::string type{nodes[nodes_index].type};
+ assert(type.ends_with("-EXT"));
+ type = type.substr(0, type.size() - 4); // remove "-EXT" from type
+
+ if (hasExtChild(nodes_index, nodes)) { // ignore empty last "-EXT" nodes finally
+ // at this point, we have at least 1 last child ("-EXT"): visit it first
+ result_parent = visitExt(nodes[nodes_index].child_ids.back(), nodes, result_parent, result);
+ }
+
+ size_t result_node_id{ AddNode(result, result_parent, type)}; // old parent is new child
+
+ copy_childs(nodes_index, nodes, result_parent, result, 1);
+
+ return result_node_id;
+ }
+
+ // visit nodes recursively, adding to result
+ // index: index in nodes
+ void visit(index_t nodes_index, const std::vector<TreeNode>& nodes, index_t result_parent, std::vector<TreeNode>& result)
+ {
+ std::string type{nodes[nodes_index].type};
+
+ if (type.ends_with("-EXT") && nodes[nodes_index].child_ids.empty()) {
+ // ignore end of -EXT chain
+ } else if (hasExtChild(nodes_index, nodes)) { // resolve ext node
+ result_parent = visitExt(nodes[nodes_index].child_ids.back(), nodes, result_parent, result);
+
+ if (!type.ends_with("-EXT")) // if first-in row, add childs here
+ copy_childs(nodes_index, nodes, result_parent, result, 1);
+
+ } else { // normal node: just copy, recursively
+
+ assert(!type.ends_with("-EXT"));
+
+ result_parent = AddNode(result, result_parent, type);
+ copy_childs(nodes_index, nodes, result_parent, result);
+ }
+ }
+
+ // At the start of compile() head recursion is removed. This leads to a
+ // different tree of nodes. This functions restores the head recursion in the
+ // nodes tree.
+ // Note: "variant" element of nodes will be all zero since it can't be
+ // restored easily
+ std::vector<TreeNode> restoreHeadRecursion(const std::vector<TreeNode>& nodes)
+ {
+ // traverse nodes (old), while reversing if tail recursion via -EXT is detected
+
+ std::vector<TreeNode> result;
+
+ if (nodes.size() > 0)
+ visit(0, nodes, 0, result);
+
+ return result;
+ }
+
+} // namespace
+
std::vector<TreeNode> Compiler::compile(std::vector<Token> p_tokens)
{
clear();
@@ -401,6 +522,8 @@ std::vector<TreeNode> Compiler::compile(std::vector<Token> p_tokens)
if (!match(m_top, 0, tokens.size()))
throw std::runtime_error("Compile error");
+ nodes = restoreHeadRecursion(nodes);
+
DumpTree();
return nodes;