#include "grammer.h" #include #include using namespace Gram; bool debug{false}; void Debug(std::string s) { if (debug) std::cout << s << std::endl; } void Compiler::clear() { nodes.clear(); begin_pos = NodePosition{}; } std::string Compiler::GetTypeOfNode(index_t node_id) const { if (node_id >= nodes.size()) throw std::runtime_error("GetTypeOfNode(): node_id="s + std::to_string(node_id) + ", nodes.size()="s + std::to_string(nodes.size())); return nodes[node_id].type; } bool Gram::ChildIdIsToken(int32_t child_id) { return child_id < 0; } index_t Gram::TokenIdFromChildId(int32_t child_id) { return index_t(-child_id) - 1; } int32_t Gram::ChildIdFromTokenId(index_t token_id) { return -1 - int32_t(token_id); } void Compiler::DumpTree() { Debug("= Dump ======================================="); Debug("nodes.size()="s + std::to_string(nodes.size())); if (nodes.size() > 0) { if (0) { Debug("--- Nodes ------------------------------------"); for (const auto& node : nodes) { std::string line{"("s + std::to_string(node.node_id) + "):"s}; for (const auto& child : node.child_ids) { line += " "s; if (ChildIdIsToken(child)) line += "t"s + std::to_string(TokenIdFromChildId(child)); else line += std::to_string(child); } Debug(line); } } Debug("--- Tree -------------------------------------"); std::deque> todo{std::pair{static_cast(0), 0}}; // id, indent while (!todo.empty()) { auto [current_index, indent] {todo.front()}; todo.pop_front(); std::string line(indent, ' '); if (ChildIdIsToken(current_index)) { index_t token_id {TokenIdFromChildId(current_index)}; line += "Token("s + std::to_string(token_id) + "): "s + tokens[token_id].type + "("s + tokens[token_id].value + ")"s; } else { auto& node {nodes[current_index]}; line += "Node("s + std::to_string(current_index) + "): "s + node.type + "/" + std::to_string(node.variant); auto child_ids{node.child_ids}; for (int i = 0; i < child_ids.size(); i++) { todo.insert(todo.begin() + i, std::pair{child_ids[i], indent + 1}); } } Debug(line); } } Debug("=============================================="); } index_t Compiler::AddNode(const std::string& type, index_t variant, NodePosition pos) { auto& list { bnf[type][variant]}; index_t node_id{nodes.size()}; if (nodes.size() > 0) nodes[pos.node_id].child_ids[pos.child_pos] = node_id; nodes.emplace_back(TreeNode{pos, node_id, type, variant, std::vector(size_t(list.size()), 0)}); Debug("AddNode(): "s + nodes[pos.node_id].type + "->"s + type + ": "s + std::to_string(node_id)); DumpTree(); return node_id; } void Compiler::RemoveNode() { auto& pos{nodes.back().pos}; nodes[pos.node_id].child_ids[pos.child_pos] = 0; nodes.pop_back(); } Compiler::AddNodeGuard::AddNodeGuard(Compiler& compiler, const std::string& type, index_t variant, NodePosition pos): m_compiler(compiler) { m_compiler.AddNode(type, variant, pos); } Compiler::AddNodeGuard::~AddNodeGuard() { m_compiler.RemoveNode(); } void Compiler::IncNodePosition(NodePosition& pos) { // TODO } size_t Compiler::minimumSymbolsNeeded(std::string symbol) { if (isTerminal(bnf, symbol)) { return 1; } else { auto it_min{m_min.find(symbol)}; if (it_min != m_min.end()) return it_min->second; m_min[symbol] = std::numeric_limits::max(); auto it{bnf.find(symbol)}; if (it != bnf.end()) { size_t minimum{std::numeric_limits::max()}; for (const auto& list: it->second) { minimum = std::min(minimum, minimumSymbolsNeeded(list)); } m_min[symbol] = minimum; return minimum; } else throw std::runtime_error("ICE: Symbol "s + symbol + " expected in BNF"s); } } size_t Compiler::minimumSymbolsNeeded(std::vector symbol_list) { size_t result{0}; for (const auto& symbol: symbol_list) { size_t min { minimumSymbolsNeeded(symbol) }; if (min == std::numeric_limits::max()) return min; result += min; } return result; } /// begin, end: indexes in tokens list bool Compiler::match(std::vector symbol_list, size_t begin, size_t end) { // match terminal symbols at start while (begin < end && symbol_list.size() > 0 && symbol_list.front() == tokens[begin].type) { begin++; symbol_list.erase(symbol_list.begin()); IncNodePosition(begin_pos); // TODO: guard? } // match terminal symbols at end while (begin < end && symbol_list.size() > 0 && symbol_list.back() == tokens[end - 1].type) { end--; symbol_list.erase(symbol_list.end() - 1); } // matching of empty lists if (symbol_list.size() == 0) { if (begin == end) // only match real empty list return true; return false; } // now, symbol_list has size > 0 and contains non-terminal symbols at start and end // resolve first symbol auto it{bnf.find(symbol_list.front())}; if (it != bnf.end()) { for (size_t i = 0; i < it->second.size(); i++) { // iterate over alternatives //AddNodeGuard guard(*this, symbol_list.front(), i, begin_pos); std::vector list {it->second[i]}; list.insert(list.end(), symbol_list.begin() + 1, symbol_list.end()); if (minimumSymbolsNeeded(list) > end - begin) // stop recursion continue; // TODO: recurse last? if (match(list, begin, end)) return true; } } else return false; // terminal symbol not found in bnf, non-matching return false; // no match found } bool Compiler::match(std::string symbol, size_t begin, size_t end) { std::vector symbol_list{symbol}; return match(symbol_list, begin, end); } Compiler::Compiler(BNF& bnf, const std::string& top): bnf(bnf), m_top(top), ReverseBNF{Reverse(bnf)}, reversedFirst{reverseFirst(bnf)} { //std::cout << "DEBUG: " << m_top << std::endl; // // prepare helper cache (TODO: remove this ugly workaround for remaining bad marker elements) // minimumSymbolsNeeded("translation-unit"); // remove bad marker elements auto it{m_min.begin()}; while (it != m_min.end()) { if (it->second == std::numeric_limits::max()) { it = m_min.erase(it); } else { ++it; } } minimumSymbolsNeeded("translation-unit"); } std::pair> Compiler::compile(std::vector p_tokens) { clear(); tokens = p_tokens; if (tokens.size() == 0) throw std::runtime_error("No tokens!"); // // top-down algorithm // if (!match(m_top, 0, tokens.size())) throw std::runtime_error("Compile error."); return std::pair>{0, nodes}; }