From 3c8e6cc25e414fed9dbaadef6fed9cc7efaf3068 Mon Sep 17 00:00:00 2001 From: Roland Reichwein Date: Tue, 31 Mar 2020 12:03:53 +0200 Subject: Optimize start lookup --- bnf.h | 1 + grammer.cpp | 35 ++++++++++++++++++++++------------- grammer.h | 3 +-- minicc.h | 9 +++++++++ 4 files changed, 33 insertions(+), 15 deletions(-) diff --git a/bnf.h b/bnf.h index 0430a2c..e5c62ed 100644 --- a/bnf.h +++ b/bnf.h @@ -30,3 +30,4 @@ struct PairHash { std::unordered_set, PairHash> getEmptyPositions(const BNF& bnf); std::unordered_map, PairHash>> reversePosFirst(const BNF& bnf); // like reverseFirstPos, but including variant + diff --git a/grammer.cpp b/grammer.cpp index 9ceed12..50617ce 100644 --- a/grammer.cpp +++ b/grammer.cpp @@ -209,13 +209,11 @@ bool Compiler::match(std::vector symbol_list, size_t begin, size_t // now, symbol_list has size > 0 and contains non-terminal symbols at start and end // resolve first symbol - auto it{bnf.find(symbol_list.front())}; - if (it != bnf.end()) { - for (size_t i = 0; i < it->second.size(); i++) { // iterate over alternatives - if (!canStartWith(symbol_list.front(), i, tokens[begin].type)) // optimization - continue; + auto it = m_match_lut.find({symbol_list.front(), tokens[begin].type}); + if (it != m_match_lut.end()) { + for (size_t i: it->second) { AddNodeGuard guard(*this, i); - std::vector list {it->second[i]}; + std::vector list {bnf[symbol_list.front()][i]}; list.insert(list.end(), symbol_list.begin() + 1, symbol_list.end()); if (minimumSymbolsNeeded(list) > end - begin) // stop recursion continue; @@ -236,11 +234,6 @@ bool Compiler::match(std::string symbol, size_t begin, size_t end) return match(symbol_list, begin, end); } -bool Compiler::canStartWith(const std::string& non_terminal, size_t variant, const std::string& terminal) const -{ - return m_start_cache.contains({non_terminal, variant, terminal}); -} - void Compiler::fillStartCache() { std::unordered_set terminals {getTerminals(bnf)}; @@ -258,7 +251,13 @@ void Compiler::fillStartCache() std::unordered_set, PairHash>& positions{it->second}; for (const auto& position: positions) { - m_start_cache.insert(std::tuple{position.first, position.second, terminal}); + auto it_lut {m_match_lut.find({position.first, terminal})}; + if (it_lut == m_match_lut.end()) { // add new list + m_match_lut[std::pair{position.first, terminal}] = std::vector{position.second}; + } else { // extend list + it_lut->second.emplace_back(position.second); + } + if (done_set.find(position.first) == done_set.end()) { next_set.insert(position.first); done_set.insert(position.first); @@ -281,7 +280,13 @@ void Compiler::fillStartCache() do { for (const auto& current_pos: current_set) { for (const std::string& terminal: terminals) { - m_start_cache.insert(std::tuple{current_pos.first, current_pos.second, terminal}); + auto it_lut {m_match_lut.find({current_pos.first, terminal})}; + if (it_lut == m_match_lut.end()) { // add new list + m_match_lut[std::pair{current_pos.first, terminal}] = std::vector{current_pos.second}; + } else { // extend list + it_lut->second.emplace_back(current_pos.second); + } + } auto it {reversedPosFirst.find(current_pos.first)}; @@ -302,6 +307,10 @@ void Compiler::fillStartCache() next_set.clear(); } while (!current_set.empty()); } + + for (auto& x: m_match_lut) { + std::sort(x.second.begin(), x.second.end()); + } } void Compiler::constructTree() diff --git a/grammer.h b/grammer.h index df7ff8d..9b72d3a 100644 --- a/grammer.h +++ b/grammer.h @@ -81,8 +81,7 @@ private: return h0 ^ (h1 << 1) ^ (h2 << 2); } }; - std::unordered_set, TupleHash> m_start_cache; - bool canStartWith(const std::string& non_terminal, size_t variant, const std::string& terminal) const; + std::unordered_map, std::vector, PairHashSS> m_match_lut; void fillStartCache(); void constructTree(); diff --git a/minicc.h b/minicc.h index d3135c6..92678a1 100644 --- a/minicc.h +++ b/minicc.h @@ -34,3 +34,12 @@ struct Token { // For printing via Google Test bool operator==(const Token &a, const Token &b); std::ostream& operator<<(std::ostream& os, const Token& token); + +struct PairHashSS { + size_t operator()(const std::pair& p) const noexcept + { + size_t h0 {std::hash{}(p.first)}; + size_t h1 {std::hash{}(p.second)}; + return h0 ^ (h1 << 1); + } +}; -- cgit v1.2.3