diff options
author | Roland Reichwein <mail@reichwein.it> | 2020-03-26 22:20:27 +0100 |
---|---|---|
committer | Roland Reichwein <mail@reichwein.it> | 2020-03-26 22:20:27 +0100 |
commit | 5467147d9470ee294ddab938098c3ef172222066 (patch) | |
tree | 1c01c90ff99ed0a4ee637ca6f99cf24be9ace62f | |
parent | 94b1f232ea51767fc30c9ed6b3e5912d776bda30 (diff) |
Prepared new Node Tree (WIP)
-rw-r--r-- | grammer.cpp | 123 | ||||
-rw-r--r-- | grammer.h | 27 |
2 files changed, 57 insertions, 93 deletions
diff --git a/grammer.cpp b/grammer.cpp index 97930b1..3cc88fa 100644 --- a/grammer.cpp +++ b/grammer.cpp @@ -16,7 +16,7 @@ void Debug(std::string s) void Compiler::clear() { nodes.clear(); - root_node_id = 0; + begin_pos = NodePosition{}; } std::string Compiler::GetTypeOfNode(index_t node_id) const @@ -26,39 +26,6 @@ std::string Compiler::GetTypeOfNode(index_t node_id) const return nodes[node_id].type; } -bool Compiler::IsRootNode(index_t node_id) const -{ - auto& node {nodes[node_id]}; - return node.parent_node_id == node.node_id; -} - -void Compiler::Validate() const -{ - // A program is non empty - if (nodes.size() == 0) - return; - //throw std::runtime_error("Program is empty"); - - // Consistency check for nodes - if (root_node_id >= nodes.size()) - throw std::runtime_error("Bad root node: "s + std::to_string(root_node_id) + " vs. "s + std::to_string(nodes.size())); - - // Start symbol on top - if (GetTypeOfNode(root_node_id) != m_top) - throw std::runtime_error("Root node not start symbol!"); - - // All nodes filled - for (const auto& node: nodes) { - if (node.child_ids.size() != bnf[node.type][node.variant].size()) - throw std::runtime_error("Node not filled: "s + node.type + "["s + std::to_string(node.variant) + "]"s); - } -} - -bool Compiler::rootIsStartSymbol() const -{ - return GetTypeOfNode(root_node_id) == m_top; -} - bool Gram::ChildIdIsToken(int32_t child_id) { return child_id < 0; @@ -81,7 +48,6 @@ void Compiler::DumpTree() if (nodes.size() > 0) { if (0) { Debug("--- Nodes ------------------------------------"); - Debug("root_node_id="s + std::to_string(root_node_id)); for (const auto& node : nodes) { std::string line{"("s + std::to_string(node.node_id) + "):"s}; for (const auto& child : node.child_ids) { @@ -96,7 +62,7 @@ void Compiler::DumpTree() } Debug("--- Tree -------------------------------------"); - std::deque<std::pair<int32_t, size_t>> todo{std::pair<int32_t, size_t>{static_cast<int32_t>(root_node_id), 0}}; // id, indent + std::deque<std::pair<int32_t, size_t>> todo{std::pair<int32_t, size_t>{static_cast<int32_t>(0), 0}}; // id, indent while (!todo.empty()) { auto [current_index, indent] {todo.front()}; todo.pop_front(); @@ -120,38 +86,41 @@ void Compiler::DumpTree() Debug("=============================================="); } -bool Compiler::treeIsComplete() const +index_t Compiler::AddNode(const std::string& type, index_t variant, NodePosition pos) { - return nodes.size() > 0 && rootIsStartSymbol(); + auto& list { bnf[type][variant]}; + index_t node_id{nodes.size()}; + if (nodes.size() > 0) + nodes[pos.node_id].child_ids[pos.child_pos] = node_id; + nodes.emplace_back(TreeNode{pos, node_id, type, variant, std::vector<int32_t>(size_t(list.size()), 0)}); + + Debug("AddNode(): "s + nodes[pos.node_id].type + "->"s + type + ": "s + std::to_string(node_id)); + DumpTree(); + + return node_id; } -index_t Compiler::AddNode(const std::string& child_type, index_t parent_index) +void Compiler::RemoveNode() { - TreeNode& parent {nodes[parent_index]}; - index_t index = nodes.size(); - parent.child_ids.push_back(index); - - index_t variant{}; - std::deque<std::pair<std::string, index_t>> alternatives; - - const auto& variants { bnf[child_type] }; - bool found{false}; - for (int i = 0; i < variants.size(); i++) { // variants - if (!found) { // use first match - variant = i; - found = true; - } else { // defer all others - alternatives.emplace_back(child_type, i); - } - } + auto& pos{nodes.back().pos}; - nodes.emplace_back(TreeNode{parent_index, index, child_type, variant, std::vector<int32_t>{}}); - //root stays + nodes[pos.node_id].child_ids[pos.child_pos] = 0; + nodes.pop_back(); +} - Debug("AddNode(): "s + nodes[parent_index].type + "->"s + child_type + ": "s + std::to_string(index)); - DumpTree(); +Compiler::AddNodeGuard::AddNodeGuard(Compiler& compiler, const std::string& type, index_t variant, NodePosition pos): m_compiler(compiler) +{ + m_compiler.AddNode(type, variant, pos); +} - return index; +Compiler::AddNodeGuard::~AddNodeGuard() +{ + m_compiler.RemoveNode(); +} + +void Compiler::IncNodePosition(NodePosition& pos) +{ + // TODO } size_t Compiler::minimumSymbolsNeeded(std::string symbol) @@ -197,16 +166,15 @@ size_t Compiler::minimumSymbolsNeeded(std::vector<std::string> symbol_list) /// begin, end: indexes in tokens list bool Compiler::match(std::vector<std::string> symbol_list, size_t begin, size_t end) { - // TODO: isTerminal() necessary here? - // match terminal symbols at start - while (begin < end && isTerminal(bnf, tokens[begin].type) && symbol_list.size() > 0 && symbol_list.front() == tokens[begin].type) { + while (begin < end && symbol_list.size() > 0 && symbol_list.front() == tokens[begin].type) { begin++; symbol_list.erase(symbol_list.begin()); + IncNodePosition(begin_pos); // TODO: guard? } // match terminal symbols at end - while (begin < end && isTerminal(bnf, tokens[end - 1].type) && symbol_list.size() > 0 && symbol_list.back() == tokens[end - 1].type) { + while (begin < end && symbol_list.size() > 0 && symbol_list.back() == tokens[end - 1].type) { end--; symbol_list.erase(symbol_list.end() - 1); } @@ -218,12 +186,14 @@ bool Compiler::match(std::vector<std::string> symbol_list, size_t begin, size_t return false; } - // now, symbol_list[begin .. end - 1] has size > 0 and contains non-terminal symbols at start and end + // now, symbol_list has size > 0 and contains non-terminal symbols at start and end // resolve first symbol auto it{bnf.find(symbol_list.front())}; if (it != bnf.end()) { - for (std::vector<std::string> list: it->second) { // iterate over alternatives + for (size_t i = 0; i < it->second.size(); i++) { // iterate over alternatives + //AddNodeGuard guard(*this, symbol_list.front(), i, begin_pos); + std::vector<std::string> list {it->second[i]}; list.insert(list.end(), symbol_list.begin() + 1, symbol_list.end()); if (minimumSymbolsNeeded(list) > end - begin) // stop recursion continue; @@ -272,29 +242,12 @@ std::pair<index_t, std::vector<TreeNode>> Compiler::compile(std::vector<Token> p if (tokens.size() == 0) throw std::runtime_error("No tokens!"); -#if 0 - // - // bottom-up algorithm - // - while (!treeIsComplete()) { - if (!FillTree()) { - TrackBack(); - } else if (!AddRootNode()) { - TrackBack(); - } else if (!FillTree()) { - TrackBack(); - } - } -#else // // top-down algorithm // if (!match(m_top, 0, tokens.size())) throw std::runtime_error("Compile error."); -#endif - - Validate(); - return std::pair<index_t, std::vector<TreeNode>>{root_node_id, nodes}; + return std::pair<index_t, std::vector<TreeNode>>{0, nodes}; } @@ -11,23 +11,27 @@ class GrammerTest; namespace Gram { +struct NodePosition { + index_t node_id{}; // 0-based + index_t child_pos{}; // 0-based +}; + // TreeNodes are only intermediate. Terminal symbols don't get of TreeNodes // token_id: index into token list // node_id: index into tree node list struct TreeNode { - index_t parent_node_id{}; // root if parent_node_id == node_id + NodePosition pos; // position of this node in tree index_t node_id{}; std::string type; index_t variant; // bnf[type][variant] - std::vector<int32_t> child_ids; // < 0: terminal: token_id; >= 0: non-terminal: node_id; fill token by token + std::vector<int32_t> child_ids; // < 0: terminal: token_id; > 0: non-terminal: node_id; = 0: unset }; class Compiler { private: // The result - index_t root_node_id{}; std::vector<TreeNode> nodes; // Input @@ -44,11 +48,18 @@ private: // Node specific std::string GetTypeOfNode(index_t node_id) const; - bool IsRootNode(index_t node_id) const; - bool rootIsStartSymbol() const; - bool treeIsComplete() const; - void Validate() const; - index_t AddNode(const std::string& child_type, index_t parent_index); + index_t AddNode(const std::string& type, index_t variant, NodePosition pos = {}); + void RemoveNode(); + // Adds Node and Removes it on scope exit (RAII) + class AddNodeGuard { + Compiler& m_compiler; + public: + AddNodeGuard(Compiler& compiler, const std::string& type, index_t variant, NodePosition pos); + ~AddNodeGuard(); + }; + void IncNodePosition(NodePosition& pos); + + NodePosition begin_pos; // top-down algorithm std::unordered_map<std::string, size_t> m_min; // cache |