From 3057729f132d516dd9ed58c6964a495aa1c11c3d Mon Sep 17 00:00:00 2001 From: Roland Reichwein Date: Fri, 27 Mar 2020 17:39:58 +0100 Subject: Top-down algo recognizes first C*+ program --- grammer.cpp | 117 ++++++++++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 95 insertions(+), 22 deletions(-) (limited to 'grammer.cpp') diff --git a/grammer.cpp b/grammer.cpp index 3cc88fa..cd5b50f 100644 --- a/grammer.cpp +++ b/grammer.cpp @@ -15,8 +15,8 @@ void Debug(std::string s) void Compiler::clear() { + symbol_variants.clear(); nodes.clear(); - begin_pos = NodePosition{}; } std::string Compiler::GetTypeOfNode(index_t node_id) const @@ -26,11 +26,21 @@ std::string Compiler::GetTypeOfNode(index_t node_id) const return nodes[node_id].type; } +bool Gram::ChildIdIsEmpty(int32_t child_id) +{ + return child_id == 0; +} + bool Gram::ChildIdIsToken(int32_t child_id) { return child_id < 0; } +bool Gram::ChildIdIsNode(int32_t child_id) +{ + return child_id > 0; +} + index_t Gram::TokenIdFromChildId(int32_t child_id) { return index_t(-child_id) - 1; @@ -94,33 +104,53 @@ index_t Compiler::AddNode(const std::string& type, index_t variant, NodePosition nodes[pos.node_id].child_ids[pos.child_pos] = node_id; nodes.emplace_back(TreeNode{pos, node_id, type, variant, std::vector(size_t(list.size()), 0)}); - Debug("AddNode(): "s + nodes[pos.node_id].type + "->"s + type + ": "s + std::to_string(node_id)); - DumpTree(); - return node_id; } -void Compiler::RemoveNode() +Compiler::AddNodeGuard::AddNodeGuard(Compiler& compiler, index_t variant): m_compiler(compiler) { - auto& pos{nodes.back().pos}; - - nodes[pos.node_id].child_ids[pos.child_pos] = 0; - nodes.pop_back(); -} - -Compiler::AddNodeGuard::AddNodeGuard(Compiler& compiler, const std::string& type, index_t variant, NodePosition pos): m_compiler(compiler) -{ - m_compiler.AddNode(type, variant, pos); + m_compiler.symbol_variants.push_back(variant); } Compiler::AddNodeGuard::~AddNodeGuard() { - m_compiler.RemoveNode(); + m_compiler.symbol_variants.pop_back(); } void Compiler::IncNodePosition(NodePosition& pos) { - // TODO + if (nodes.size() == 0) // special case: empty tree + return; + if (pos.node_id >= nodes.size()) + throw std::runtime_error("ICE: NodePosition with node_id "s + std::to_string(pos.node_id) + " doesn't exist."s); + if (pos.child_pos >= nodes[pos.node_id].child_ids.size()) + throw std::runtime_error("ICE: NodePosition with child_pos "s + std::to_string(pos.child_pos) + " in node_id "s + std::to_string(pos.node_id) + " doesn't exist."s); + + int32_t child_id{nodes[pos.node_id].child_ids[pos.child_pos]}; + + if (ChildIdIsEmpty(child_id)) + throw std::runtime_error("ICE: NodePosition is empty"); + + // Actually, advance + if (ChildIdIsToken(child_id)) { + pos.child_pos++; + } else { + pos.node_id = child_id; + pos.child_pos = 0; + } + + // Go to parent if child ids completely traversed + while (pos.node_id > 0 && pos.child_pos >= nodes[pos.node_id].child_ids.size()) { + pos = nodes[pos.node_id].pos; + pos.child_pos++; + } + + // Advancing at root node for last child is allowed: Finished + if (pos.child_pos >= nodes[pos.node_id].child_ids.size()) + return; + + if (ChildIdIsNode(nodes[pos.node_id].child_ids[pos.child_pos])) + throw std::runtime_error("ICE: No node expected at "s + std::to_string(pos.node_id) + "/"s + std::to_string(pos.child_pos)); } size_t Compiler::minimumSymbolsNeeded(std::string symbol) @@ -170,7 +200,6 @@ bool Compiler::match(std::vector symbol_list, size_t begin, size_t while (begin < end && symbol_list.size() > 0 && symbol_list.front() == tokens[begin].type) { begin++; symbol_list.erase(symbol_list.begin()); - IncNodePosition(begin_pos); // TODO: guard? } // match terminal symbols at end @@ -181,8 +210,11 @@ bool Compiler::match(std::vector symbol_list, size_t begin, size_t // matching of empty lists if (symbol_list.size() == 0) { - if (begin == end) // only match real empty list + if (begin == end) { // only match real empty list + // this is the point of the final match + constructTree(); return true; + } return false; } @@ -192,7 +224,7 @@ bool Compiler::match(std::vector symbol_list, size_t begin, size_t auto it{bnf.find(symbol_list.front())}; if (it != bnf.end()) { for (size_t i = 0; i < it->second.size(); i++) { // iterate over alternatives - //AddNodeGuard guard(*this, symbol_list.front(), i, begin_pos); + AddNodeGuard guard(*this, i); std::vector list {it->second[i]}; list.insert(list.end(), symbol_list.begin() + 1, symbol_list.end()); if (minimumSymbolsNeeded(list) > end - begin) // stop recursion @@ -214,6 +246,42 @@ bool Compiler::match(std::string symbol, size_t begin, size_t end) return match(symbol_list, begin, end); } +void Compiler::constructTree() +{ + symbol_variants_it = symbol_variants.begin(); + + m_symbol_list = {m_top}; + m_symbol_list_pos = 0; + + NodePosition tree_pos; + + while (m_symbol_list_pos < m_symbol_list.size()) { + std::string symbol{m_symbol_list[m_symbol_list_pos]}; + + if (isTerminal(bnf, symbol)) { + // Advance terminal symbol + nodes[tree_pos.node_id].child_ids[tree_pos.child_pos] = ChildIdFromTokenId(m_symbol_list_pos); + IncNodePosition(tree_pos); + m_symbol_list_pos++; + } else { + // Replace non-terminal symbol + m_symbol_list.erase(m_symbol_list.begin() + m_symbol_list_pos); + std::vector list {bnf[symbol][*symbol_variants_it]}; + m_symbol_list.insert(m_symbol_list.begin() + m_symbol_list_pos, list.begin(), list.end()); + + index_t node_id {AddNode(symbol, *symbol_variants_it, tree_pos)}; + + if (node_id > 0) { + nodes[tree_pos.node_id].child_ids[tree_pos.child_pos] = node_id; + IncNodePosition(tree_pos); + } + + symbol_variants_it++; + } + } + +} + Compiler::Compiler(BNF& bnf, const std::string& top): bnf(bnf), m_top(top), ReverseBNF{Reverse(bnf)}, reversedFirst{reverseFirst(bnf)} { //std::cout << "DEBUG: " << m_top << std::endl; @@ -234,7 +302,7 @@ Compiler::Compiler(BNF& bnf, const std::string& top): bnf(bnf), m_top(top), Reve minimumSymbolsNeeded("translation-unit"); } -std::pair> Compiler::compile(std::vector p_tokens) +std::vector Compiler::compile(std::vector p_tokens) { clear(); tokens = p_tokens; @@ -243,11 +311,16 @@ std::pair> Compiler::compile(std::vector p throw std::runtime_error("No tokens!"); // - // top-down algorithm + // top-down algorithm: + // + // 1. Match linear tokens list to bnf, building up list of used variants (symbol_variants) + // 2. Construct Node Tree from symbol_variants // if (!match(m_top, 0, tokens.size())) throw std::runtime_error("Compile error."); - return std::pair>{0, nodes}; + //DumpTree(); + + return nodes; } -- cgit v1.2.3