summaryrefslogtreecommitdiffhomepage
path: root/grammer.cpp
diff options
context:
space:
mode:
authorRoland Reichwein <mail@reichwein.it>2020-03-23 22:02:40 +0100
committerRoland Reichwein <mail@reichwein.it>2020-03-23 22:02:40 +0100
commitfce1c18103f208857af477ca47af9aa908bd69b6 (patch)
tree3d0affeca1328825e81be8950f19e956f39ab213 /grammer.cpp
parent365183e243d164185bca6ad9fa4e0d75664800f4 (diff)
Helper functions for grammer
Diffstat (limited to 'grammer.cpp')
-rw-r--r--grammer.cpp136
1 files changed, 131 insertions, 5 deletions
diff --git a/grammer.cpp b/grammer.cpp
index 5557b12..cc285b9 100644
--- a/grammer.cpp
+++ b/grammer.cpp
@@ -1,6 +1,7 @@
#include "grammer.h"
#include <algorithm>
+#include <limits>
using namespace Gram;
@@ -44,7 +45,7 @@ void Compiler::Validate() const
throw std::runtime_error("Bad root node: "s + std::to_string(root_node_id) + " vs. "s + std::to_string(nodes.size()));
// Start symbol on top
- if (GetTypeOfNode(root_node_id) != Top)
+ if (GetTypeOfNode(root_node_id) != m_top)
throw std::runtime_error("Root node not start symbol!");
// All nodes filled
@@ -56,7 +57,7 @@ void Compiler::Validate() const
bool Compiler::rootIsStartSymbol() const
{
- return GetTypeOfNode(root_node_id) == Top;
+ return GetTypeOfNode(root_node_id) == m_top;
}
bool Gram::ChildIdIsToken(int32_t child_id)
@@ -493,18 +494,136 @@ bool Compiler::FillTree()
return true;
}
-Compiler::Compiler(BNF& bnf, const std::string& Top): bnf(bnf), Top(Top), ReverseBNF{Reverse(bnf)}, reversedFirst{reverseFirst(bnf)}
+size_t Compiler::minimumSymbolsNeeded(std::string symbol)
{
+ if (isTerminal(bnf, symbol)) {
+ return 1;
+ } else {
+ auto it_min{m_min.find(symbol)};
+ if (it_min != m_min.end())
+ return it_min->second;
+ m_min[symbol] = std::numeric_limits<size_t>::max();
+
+ auto it{bnf.find(symbol)};
+ if (it != bnf.end()) {
+ size_t minimum{std::numeric_limits<size_t>::max()};
+
+ for (const auto& list: it->second) {
+ minimum = std::min(minimum, minimumSymbolsNeeded(list));
+ }
+
+ m_min[symbol] = minimum;
+
+ return minimum;
+ } else
+ throw std::runtime_error("ICE: Symbol "s + symbol + " expected in BNF"s);
+ }
+}
+
+size_t Compiler::minimumSymbolsNeeded(std::vector<std::string> symbol_list)
+{
+ size_t result{0};
+
+ for (const auto& symbol: symbol_list) {
+ size_t min { minimumSymbolsNeeded(symbol) };
+ if (min == std::numeric_limits<size_t>::max())
+ return min;
+ result += min;
+ }
+
+ return result;
+}
+
+/// begin, end: indexes in tokens list
+bool Compiler::match(std::vector<std::string> symbol_list, size_t begin, size_t end)
+{
+ // TODO: isTerminal() necessary here?
+
+ std::cout << "DEBUG: Matching:";
+ for (auto symbol: symbol_list) {
+ std::cout << " |" << symbol << "|";
+ }
+ std::cout << std::endl;
+
+ // match terminal symbols at start
+ while (begin < end && isTerminal(bnf, tokens[begin].type) && symbol_list.size() > 0 && symbol_list.front() == tokens[begin].type) {
+ begin++;
+ symbol_list.erase(symbol_list.begin());
+ }
+
+ // match terminal symbols at end
+ while (begin < end && isTerminal(bnf, tokens[end - 1].type) && symbol_list.size() > 0 && symbol_list.back() == tokens[end - 1].type) {
+ end--;
+ symbol_list.erase(symbol_list.end() - 1);
+ }
+
+ // matching of empty lists
+ if (symbol_list.size() == 0) {
+ if (begin == end) // only match real empty list
+ return true;
+ return false;
+ }
+
+ // now, symbol_list[begin .. end - 1] has size > 0 and contains non-terminal symbols at start and end
+
+ // resolve first symbol
+ auto it{bnf.find(symbol_list.front())};
+ if (it != bnf.end()) {
+ for (std::vector<std::string> list: it->second) { // iterate over alternatives
+ std::cout << "ALTERNATIVE for " << symbol_list.front() << " with " << list.size() << std::endl;
+ list.insert(list.end(), symbol_list.begin() + 1, symbol_list.end());
+ std::cout << "Min: " << minimumSymbolsNeeded(list) << ", end-begin: " << (end-begin) << std::endl;
+ if (minimumSymbolsNeeded(list) > end - begin) // stop recursion
+ continue;
+
+ // TODO: recurse last?
+ if (match(list, begin, end))
+ return true;
+ }
+ } else
+ return false; // terminal symbol not found in bnf, non-matching
+
+ return false; // no match found
+}
+
+bool Compiler::match(std::string symbol, size_t begin, size_t end)
+{
+ std::vector<std::string> symbol_list{symbol};
+ return match(symbol_list, begin, end);
+}
+
+Compiler::Compiler(BNF& bnf, const std::string& top): bnf(bnf), m_top(top), ReverseBNF{Reverse(bnf)}, reversedFirst{reverseFirst(bnf)}
+{
+ //std::cout << "DEBUG: " << m_top << std::endl;
+
+ //
+ // prepare helper cache (TODO: remove this ugly workaround for remaining bad marker elements)
+ //
+ minimumSymbolsNeeded("translation-unit");
+ // remove bad marker elements
+ auto it{m_min.begin()};
+ while (it != m_min.end()) {
+ if (it->second == std::numeric_limits<size_t>::max()) {
+ it = m_min.erase(it);
+ } else {
+ ++it;
+ }
+ }
+ minimumSymbolsNeeded("translation-unit");
}
-std::pair<index_t, std::vector<TreeNode>> Compiler::compile(std::vector<Token> Tokens)
+std::pair<index_t, std::vector<TreeNode>> Compiler::compile(std::vector<Token> p_tokens)
{
clear();
- tokens = Tokens;
+ tokens = p_tokens;
if (tokens.size() == 0)
throw std::runtime_error("No tokens!");
+#if 0
+ //
+ // bottom-up algorithm
+ //
while (!treeIsComplete()) {
if (!FillTree()) {
TrackBack();
@@ -514,6 +633,13 @@ std::pair<index_t, std::vector<TreeNode>> Compiler::compile(std::vector<Token> T
TrackBack();
}
}
+#else
+ //
+ // top-down algorithm
+ //
+ if (!match(m_top, 0, tokens.size()))
+ throw std::runtime_error("Compile error.");
+#endif
Validate();