summaryrefslogtreecommitdiffhomepage
path: root/grammer.cpp
diff options
context:
space:
mode:
authorRoland Reichwein <mail@reichwein.it>2020-03-30 18:33:01 +0200
committerRoland Reichwein <mail@reichwein.it>2020-03-30 18:33:01 +0200
commit79fbc8bf495770e4a8b7c66c46acf07f4e47e568 (patch)
tree174c0f3c194013c39a73f6a50ed8866784388c07 /grammer.cpp
parent2eb2383387d16fc919c07e1a6b9211406576b893 (diff)
Speed up compile
Diffstat (limited to 'grammer.cpp')
-rw-r--r--grammer.cpp96
1 files changed, 83 insertions, 13 deletions
diff --git a/grammer.cpp b/grammer.cpp
index 8b6f01b..9ceed12 100644
--- a/grammer.cpp
+++ b/grammer.cpp
@@ -147,7 +147,7 @@ void Compiler::IncNodePosition(NodePosition& pos)
throw std::runtime_error("ICE: No node expected at "s + std::to_string(pos.node_id) + "/"s + std::to_string(pos.child_pos));
}
-size_t Compiler::minimumSymbolsNeeded(std::string symbol)
+size_t Compiler::minimumSymbolsNeeded(const std::string& symbol)
{
if (isTerminal(bnf, symbol)) {
return 1;
@@ -173,7 +173,7 @@ size_t Compiler::minimumSymbolsNeeded(std::string symbol)
}
}
-size_t Compiler::minimumSymbolsNeeded(std::vector<std::string> symbol_list)
+size_t Compiler::minimumSymbolsNeeded(const std::vector<std::string>& symbol_list)
{
size_t result{0};
@@ -196,12 +196,6 @@ bool Compiler::match(std::vector<std::string> symbol_list, size_t begin, size_t
symbol_list.erase(symbol_list.begin());
}
- // match terminal symbols at end
- while (begin < end && symbol_list.size() > 0 && symbol_list.back() == tokens[end - 1].type) {
- end--;
- symbol_list.erase(symbol_list.end() - 1);
- }
-
// matching of empty lists
if (symbol_list.size() == 0) {
if (begin == end) { // only match real empty list
@@ -218,6 +212,8 @@ bool Compiler::match(std::vector<std::string> symbol_list, size_t begin, size_t
auto it{bnf.find(symbol_list.front())};
if (it != bnf.end()) {
for (size_t i = 0; i < it->second.size(); i++) { // iterate over alternatives
+ if (!canStartWith(symbol_list.front(), i, tokens[begin].type)) // optimization
+ continue;
AddNodeGuard guard(*this, i);
std::vector<std::string> list {it->second[i]};
list.insert(list.end(), symbol_list.begin() + 1, symbol_list.end());
@@ -240,6 +236,74 @@ bool Compiler::match(std::string symbol, size_t begin, size_t end)
return match(symbol_list, begin, end);
}
+bool Compiler::canStartWith(const std::string& non_terminal, size_t variant, const std::string& terminal) const
+{
+ return m_start_cache.contains({non_terminal, variant, terminal});
+}
+
+void Compiler::fillStartCache()
+{
+ std::unordered_set<std::string> terminals {getTerminals(bnf)};
+
+ { // follow terminal symbols
+ for (const std::string& terminal: terminals) {
+ std::unordered_set<std::string> current_set{terminal};
+ std::unordered_set<std::string> done_set;
+ std::unordered_set<std::string> next_set;
+
+ do {
+ for (const auto& current_symbol: current_set) {
+ auto it {reversedPosFirst.find(current_symbol)};
+ if (it != reversedPosFirst.end()) {
+ std::unordered_set<std::pair<std::string, index_t>, PairHash>& positions{it->second};
+
+ for (const auto& position: positions) {
+ m_start_cache.insert(std::tuple<std::string, size_t, std::string>{position.first, position.second, terminal});
+ if (done_set.find(position.first) == done_set.end()) {
+ next_set.insert(position.first);
+ done_set.insert(position.first);
+ }
+ }
+ }
+ }
+
+ current_set = next_set;
+ next_set.clear();
+ } while (!current_set.empty());
+ }
+ }
+
+ { // follow empty non-terminal symbols, and combine all found non-terminals with all terminals
+ std::unordered_set<std::pair<std::string, size_t>, PairHash> current_set {getEmptyPositions(bnf)};
+ std::unordered_set<std::pair<std::string, size_t>, PairHash> done_set;
+ std::unordered_set<std::pair<std::string, size_t>, PairHash> next_set;
+
+ do {
+ for (const auto& current_pos: current_set) {
+ for (const std::string& terminal: terminals) {
+ m_start_cache.insert(std::tuple<std::string, size_t, std::string>{current_pos.first, current_pos.second, terminal});
+ }
+
+ auto it {reversedPosFirst.find(current_pos.first)};
+ if (it != reversedPosFirst.end()) {
+ std::unordered_set<std::pair<std::string, index_t>, PairHash>& positions{it->second};
+
+ for (const auto& position: positions) {
+ if (done_set.find(position) == done_set.end()) {
+ next_set.insert(position);
+ done_set.insert(position);
+ }
+ }
+ }
+
+ }
+
+ current_set = next_set;
+ next_set.clear();
+ } while (!current_set.empty());
+ }
+}
+
void Compiler::constructTree()
{
symbol_variants_it = symbol_variants.begin();
@@ -276,14 +340,17 @@ void Compiler::constructTree()
}
-Compiler::Compiler(BNF& bnf, const std::string& top): bnf(bnf), m_top(top), ReverseBNF{Reverse(bnf)}, reversedFirst{reverseFirst(bnf)}
+Compiler::Compiler(BNF& bnf, const std::string& top)
+ : bnf(bnf)
+ , m_top(top)
+ //, ReverseBNF{Reverse(bnf)}
+ //, reversedFirst{reverseFirst(bnf)}
+ , reversedPosFirst{reversePosFirst(bnf)}
{
- //std::cout << "DEBUG: " << m_top << std::endl;
-
//
// prepare helper cache (TODO: remove this ugly workaround for remaining bad marker elements)
//
- minimumSymbolsNeeded("translation-unit");
+ (void) minimumSymbolsNeeded("translation-unit");
// remove bad marker elements
auto it{m_min.begin()};
while (it != m_min.end()) {
@@ -293,7 +360,10 @@ Compiler::Compiler(BNF& bnf, const std::string& top): bnf(bnf), m_top(top), Reve
++it;
}
}
- minimumSymbolsNeeded("translation-unit");
+ (void) minimumSymbolsNeeded("translation-unit");
+
+ // fill other cache
+ fillStartCache();
}
std::vector<TreeNode> Compiler::compile(std::vector<Token> p_tokens)