diff options
Diffstat (limited to 'grammer.cpp')
-rw-r--r-- | grammer.cpp | 136 |
1 files changed, 131 insertions, 5 deletions
diff --git a/grammer.cpp b/grammer.cpp index 5557b12..cc285b9 100644 --- a/grammer.cpp +++ b/grammer.cpp @@ -1,6 +1,7 @@ #include "grammer.h" #include <algorithm> +#include <limits> using namespace Gram; @@ -44,7 +45,7 @@ void Compiler::Validate() const throw std::runtime_error("Bad root node: "s + std::to_string(root_node_id) + " vs. "s + std::to_string(nodes.size())); // Start symbol on top - if (GetTypeOfNode(root_node_id) != Top) + if (GetTypeOfNode(root_node_id) != m_top) throw std::runtime_error("Root node not start symbol!"); // All nodes filled @@ -56,7 +57,7 @@ void Compiler::Validate() const bool Compiler::rootIsStartSymbol() const { - return GetTypeOfNode(root_node_id) == Top; + return GetTypeOfNode(root_node_id) == m_top; } bool Gram::ChildIdIsToken(int32_t child_id) @@ -493,18 +494,136 @@ bool Compiler::FillTree() return true; } -Compiler::Compiler(BNF& bnf, const std::string& Top): bnf(bnf), Top(Top), ReverseBNF{Reverse(bnf)}, reversedFirst{reverseFirst(bnf)} +size_t Compiler::minimumSymbolsNeeded(std::string symbol) { + if (isTerminal(bnf, symbol)) { + return 1; + } else { + auto it_min{m_min.find(symbol)}; + if (it_min != m_min.end()) + return it_min->second; + m_min[symbol] = std::numeric_limits<size_t>::max(); + + auto it{bnf.find(symbol)}; + if (it != bnf.end()) { + size_t minimum{std::numeric_limits<size_t>::max()}; + + for (const auto& list: it->second) { + minimum = std::min(minimum, minimumSymbolsNeeded(list)); + } + + m_min[symbol] = minimum; + + return minimum; + } else + throw std::runtime_error("ICE: Symbol "s + symbol + " expected in BNF"s); + } +} + +size_t Compiler::minimumSymbolsNeeded(std::vector<std::string> symbol_list) +{ + size_t result{0}; + + for (const auto& symbol: symbol_list) { + size_t min { minimumSymbolsNeeded(symbol) }; + if (min == std::numeric_limits<size_t>::max()) + return min; + result += min; + } + + return result; +} + +/// begin, end: indexes in tokens list +bool Compiler::match(std::vector<std::string> symbol_list, size_t begin, size_t end) +{ + // TODO: isTerminal() necessary here? + + std::cout << "DEBUG: Matching:"; + for (auto symbol: symbol_list) { + std::cout << " |" << symbol << "|"; + } + std::cout << std::endl; + + // match terminal symbols at start + while (begin < end && isTerminal(bnf, tokens[begin].type) && symbol_list.size() > 0 && symbol_list.front() == tokens[begin].type) { + begin++; + symbol_list.erase(symbol_list.begin()); + } + + // match terminal symbols at end + while (begin < end && isTerminal(bnf, tokens[end - 1].type) && symbol_list.size() > 0 && symbol_list.back() == tokens[end - 1].type) { + end--; + symbol_list.erase(symbol_list.end() - 1); + } + + // matching of empty lists + if (symbol_list.size() == 0) { + if (begin == end) // only match real empty list + return true; + return false; + } + + // now, symbol_list[begin .. end - 1] has size > 0 and contains non-terminal symbols at start and end + + // resolve first symbol + auto it{bnf.find(symbol_list.front())}; + if (it != bnf.end()) { + for (std::vector<std::string> list: it->second) { // iterate over alternatives + std::cout << "ALTERNATIVE for " << symbol_list.front() << " with " << list.size() << std::endl; + list.insert(list.end(), symbol_list.begin() + 1, symbol_list.end()); + std::cout << "Min: " << minimumSymbolsNeeded(list) << ", end-begin: " << (end-begin) << std::endl; + if (minimumSymbolsNeeded(list) > end - begin) // stop recursion + continue; + + // TODO: recurse last? + if (match(list, begin, end)) + return true; + } + } else + return false; // terminal symbol not found in bnf, non-matching + + return false; // no match found +} + +bool Compiler::match(std::string symbol, size_t begin, size_t end) +{ + std::vector<std::string> symbol_list{symbol}; + return match(symbol_list, begin, end); +} + +Compiler::Compiler(BNF& bnf, const std::string& top): bnf(bnf), m_top(top), ReverseBNF{Reverse(bnf)}, reversedFirst{reverseFirst(bnf)} +{ + //std::cout << "DEBUG: " << m_top << std::endl; + + // + // prepare helper cache (TODO: remove this ugly workaround for remaining bad marker elements) + // + minimumSymbolsNeeded("translation-unit"); + // remove bad marker elements + auto it{m_min.begin()}; + while (it != m_min.end()) { + if (it->second == std::numeric_limits<size_t>::max()) { + it = m_min.erase(it); + } else { + ++it; + } + } + minimumSymbolsNeeded("translation-unit"); } -std::pair<index_t, std::vector<TreeNode>> Compiler::compile(std::vector<Token> Tokens) +std::pair<index_t, std::vector<TreeNode>> Compiler::compile(std::vector<Token> p_tokens) { clear(); - tokens = Tokens; + tokens = p_tokens; if (tokens.size() == 0) throw std::runtime_error("No tokens!"); +#if 0 + // + // bottom-up algorithm + // while (!treeIsComplete()) { if (!FillTree()) { TrackBack(); @@ -514,6 +633,13 @@ std::pair<index_t, std::vector<TreeNode>> Compiler::compile(std::vector<Token> T TrackBack(); } } +#else + // + // top-down algorithm + // + if (!match(m_top, 0, tokens.size())) + throw std::runtime_error("Compile error."); +#endif Validate(); |