#include "grammer.h" #include using namespace Gram; void Compiler::clear() { nodes.clear(); root_node_id = 0; tokens_used = 0; } std::string Compiler::GetTypeOfNode(index_t node_id) const { return nodes[node_id].type; } bool Compiler::IsRootNode(index_t node_id) const { auto node& {nodes[node_id]}; return node.parent_node_id == node.node_id; } void Compiler::Validate() const { // A program is non empty if (nodes.size() == 0) throw std::runtime_error(""); // Consistency check for nodes if (root_node_id >= nodes.size()) throw std::runtime_error("Bad root node: "s + std::to_string(root_node_id) + " vs. "s + std::to_string(nodes.size())); // Start symbol on top if (GetTypeOfNode(root_node_id) != Top) throw std::runtime_error("Root node not start symbol!"); // All nodes filled for (const auto& node: nodes) { if (node.child_ids.size() != bnf[node.type][node.variant].size()) throw std::runtime_error("Node not filled: "s + node.type + "["s + std::to_string(node.variant) + "]"s); } } void Compiler::DumpTree() { std::cout << "=== Tree =====================================" << std::endl; for (const auto& i : nodes) { std::cout << i.type << std::endl; } } bool Compiler::RootIsStartSymbol() const { return GetTypeOfNode(root_node_id) == Top; } namespace { bool ChildIdIsToken(int32_t child_id) { return child_id < 0; } index_t TokenIdFromChildId(int32_t child_id) { return index_t(-child_id) - 1; } int32_t ChildIdFromTokenId(index_t token_id) { return -1 - int32_t(token_id); } } // namespace bool Compiler::AllTokensUsed() const { return tokens_used == tokens.size(); } bool Compiler::treeIsComplete() const { return RootIsStartSymbol() && AllTokensUsed(); } std::vector& Compiler::getNodeExpectedChilds(node_id) { std::string& type = nodes[node_id].type; index_t& variant = nodes[node_id].variant; return bnf[type][variant]; } // returns true if all childs are filled, recursively. Else false with to_fill as hint to node to fill bool Compiler::subTreeIsComplete(index_t node_id, index_t& to_fill) { for (const auto& i : nodes[node_id].child_ids) { if (!ChildIdIsToken(i)) { // consider subtrees if (!subTreeIsComplete(i, to_fill)) return false; // found incomplete subtree -> return it! } } if (nodes[node_id].child_ids.size() < getNodeExpectedChilds(node_id).size()) { to_fill = node_id; return false; } return true; } size_t Compiler::CommonPrefix(const std::vector& tokens, const std::vector& types) { auto [tokens_it, types_it] = std::mismatch(tokens.begin(), tokens.end(), types.begin(), types.end(), [](const Token& token, const std::string& s){ return token.type == s; }); return types_it - types.begin(); // a distance, 0 on no match } void Compiler::AddFirstNode() { root_node_id = 0; const std::string& child_type = tokens[0].type; auto it = ReverseBNF.find(child_type); if (it == ReverseBNF.end()) throw std::runtime_error("Illegal first token: "s + child_type + " ("s + tokens[0].value + ")"s); std::set& alternatives_set {it->second}; std::string node_type; index_t node_variant; std::deque> alternatives; // only for valid elements from alternatives_set std::vector child_ids; for (const auto& type : alternatives_set) { const auto& variants{bnf[type]}; for (int i = 0; i < variants.size(); i++) { const std::vector & variant{variants[i]}; size_t common{}; if ((common = CommonPrefix(tokens, variant)) > 0) { // match a common prefix if (node_type == "") { // no match yet node_type = type; node_variant = i; for (int token_id = 0; token_id < common; token_id++) child_ids.push_back(ChildIdFromTokenId(token_id)); } else alternatives.emplace_back(type, i); } } } if (node_type == "") // no matching type found throw std::runtime_error("Syntax error on first token: "s + child_type + " ("s + tokens[0].value + ")"s); nodes.emplace_back({0, 0, node_type, node_variant, alternatives, child_ids}); tokens_used = child_ids.size(); } bool Compiler::AddRootNode() { if (nodes.size() == 0) { AddFirstNode(); } else { const std::string& child_type = nodes[root_node_id].type; // starting at old root auto it = ReverseBNF.find(child_type); if (it == ReverseBNF.end()) // this one doesn't have a parent, maybe a start symbol to discard? return false; index_t old_root_node_id {root_node_id}; index_t new_root_node_id {nodes.size()}; std::set& alternatives_set {it->second}; std::string node_type; index_t node_variant; std::deque> alternatives; // only for valid elements from alternatives_set std::vector child_ids{size_t(1), old_root_node_id}; for (const auto& type : alternatives_set) { const auto& variants{bnf[type]}; for (int i = 0; i < variants.size(); i++) { const std::vector & variant{variants[i]}; if (child_type == variant[0]) { if (node_type == "") { node_type = type; node_variant = i; } else alternatives.emplace_back(type, i); // duplicates possible: variants of same type } } } if (node_type == "") // no matching type found (maybe backtrack possible?) return false; // now add! nodes[old_root_node_id].parent_node_id = new_root_node_id; root_node_id = new_root_node_id; nodes.emplace_back({root_node_id, root_node_id, node_type, node_variant, alternatives, child_ids}); // keep tokens_used as is } return true; } void Compiler::RemoveLastNode() { TreeNode& node {nodes.back()}; index_t node_id = node.node_id; if (node_id == root_node_id) { // No parent -> remove root if (node.child_ids().empty()) { // No children -> now empty clear(); } else if (node.child_ids().size() == 1) { // One child: removing possible root_node_id = node.child_ids()[0]; nodes.pop_back(); } else throw std::runtime_error("Backtrack not possible: Root not empty"); // ICE } else if (node.child_ids().empty()) { // No children -> remove leaf // We have a parent, otherwise we would have taken previous branch TreeNode& parent {nodes[node.parent_node_id]}; if (parent.child_ids().empty() || parent.child_ids().last() != node_id) throw std::runtime_error("Backtrack: Bad child nodes"); // ICE parent.childs_ids().pop_back(); nodes.pop_back(); } else { // In the middle throw std::runtime_error("Backtrack in the middle of the tree."); // ICE } } // Change type of last node according to alternatives void Compiler::ChangeNodeType() { TreeNode& node {nodes.back()}; index_t node_id = node.node_id; if (node.alternatives.empty()) throw std::runtime_error("No alternatives found during Backtrack"); // ICE auto& [type, variant] {node.alternatives.front()}; node.type = type; node.variant = variant; node.alternatives.pop_front(); } // throws if no further track back is possible: compile error void Compiler::TrackBack() { // Search backwards for alternatives: last node with alternatives (variant or alt. token) while (!nodes.empty() && nodes.last().alternatives.empty()) { RemoveLastNode(); } if (nodes.empty()) { throw std::runtime_error("Compile error: Invalid program."); } ChangeNodeType(); } // return shortest path with variants // via first-entry-in-bnf-rule // excluding lower (already exists) // including upper (already determined to be included) // breadth-first search std::map Compiler::traverse(lower, upper) { std::map visited; // node, child std::deque> todo{{lower, ""}}; // node, child while (!todo.empty()) { auto& [current_node, current_child] = todo.front(); todo.pop_front(); if (!visited[current_node]) { auto parents_it {reverseBNF.find(current_node)}; if (parents_it != reverseBNF.end()) { auto& parents {parents_it->second}; visited[current_node] = current_child; std::for_each(parents.begin(), parents.end(), [&](const auto&x) {todo.emplace_back(x, current_node);}); } } } return visited; } // returns list from lower (excluding) to upper (including) // returns empty list on fail std::vector Compiler::GetPath(std::string upper, std::string lower) { std::vector result; // traverse bnf from lower to upper std::map visited {traverse(lower, upper)}; auto current {upper}; while (current != lower) { auto& child = visited[current]; result.push_back(current); current = child; } return result; } index_t Compiler::AddNode(const std::string& name, const std::string& child_type, index_t parent_index) { TreeNode& parent {nodes[parent_index]}; index_t index = nodes.size(); parent.child_ids.push_back(index); index_t variant; std::deque alternatives; const auto& lists { bnf[parent.type] }; for (int i = 0; i < lists.size(); i++) { // variants if (lists[i].size() > 0 && lists[i][0] == child_type) variant = i; } nodes.emplace_back({parent_index, index, child_type, variant, alternatives, {}}); //root stays, tokens_used stays return index; } void Compiler::AddPath(const std::vector& path, index_t current_index) { for (int i = path.size() - 1; i > 0; i--) { std::string child_name = path[i - 1]; current_index = AddNode(path[i], child_name, current_index); } nodes.back().child_ids.emplace_back(ChildIdFromTokenId(tokens_used)); tokens_used++; } bool Compiler::FillTree() { if (nodes.size() == 0) // ignore empty tree, successfully return true; index_t to_fill; while (!subTreeIsComplete(root_node_id, to_fill)) { auto& node {nodes[to_fill]}; auto list = GetPath(bnf[node.type][node.variant][node.child_ids.size()], tokens[tokens_used]); if (list.size() > 0) { AddPath(list, to_fill); return true; } else { return false; } } } Compiler::Compiler(const BNF& bnf, const std::string& Top): bnf(bnf), Top(Top), ReverseBNF{Reverse(bnf)} { } Tree Compiler::compile(std::vector Tokens) { clear(); tokens = Tokens; if (tokens.size() == 0) throw std::runtime_error("No tokens!"); while (!treeIsComplete()) { if (!FillTree()) TrackBack(); else if (!AddRootNode()) TrackBack(); else if (!FillTree()) TrackBack(); } Validate(); return tree; }