summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorRoland Reichwein <mail@reichwein.it>2020-03-31 12:03:53 +0200
committerRoland Reichwein <mail@reichwein.it>2020-03-31 12:03:53 +0200
commit3c8e6cc25e414fed9dbaadef6fed9cc7efaf3068 (patch)
treeaace36c9d6ac0302f188f02be55050c82617fbe3
parent79fbc8bf495770e4a8b7c66c46acf07f4e47e568 (diff)
Optimize start lookup
-rw-r--r--bnf.h1
-rw-r--r--grammer.cpp35
-rw-r--r--grammer.h3
-rw-r--r--minicc.h9
4 files changed, 33 insertions, 15 deletions
diff --git a/bnf.h b/bnf.h
index 0430a2c..e5c62ed 100644
--- a/bnf.h
+++ b/bnf.h
@@ -30,3 +30,4 @@ struct PairHash {
std::unordered_set<std::pair<std::string, size_t>, PairHash> getEmptyPositions(const BNF& bnf);
std::unordered_map<std::string, std::unordered_set<std::pair<std::string, index_t>, PairHash>> reversePosFirst(const BNF& bnf); // like reverseFirstPos, but including variant
+
diff --git a/grammer.cpp b/grammer.cpp
index 9ceed12..50617ce 100644
--- a/grammer.cpp
+++ b/grammer.cpp
@@ -209,13 +209,11 @@ bool Compiler::match(std::vector<std::string> symbol_list, size_t begin, size_t
// now, symbol_list has size > 0 and contains non-terminal symbols at start and end
// resolve first symbol
- auto it{bnf.find(symbol_list.front())};
- if (it != bnf.end()) {
- for (size_t i = 0; i < it->second.size(); i++) { // iterate over alternatives
- if (!canStartWith(symbol_list.front(), i, tokens[begin].type)) // optimization
- continue;
+ auto it = m_match_lut.find({symbol_list.front(), tokens[begin].type});
+ if (it != m_match_lut.end()) {
+ for (size_t i: it->second) {
AddNodeGuard guard(*this, i);
- std::vector<std::string> list {it->second[i]};
+ std::vector<std::string> list {bnf[symbol_list.front()][i]};
list.insert(list.end(), symbol_list.begin() + 1, symbol_list.end());
if (minimumSymbolsNeeded(list) > end - begin) // stop recursion
continue;
@@ -236,11 +234,6 @@ bool Compiler::match(std::string symbol, size_t begin, size_t end)
return match(symbol_list, begin, end);
}
-bool Compiler::canStartWith(const std::string& non_terminal, size_t variant, const std::string& terminal) const
-{
- return m_start_cache.contains({non_terminal, variant, terminal});
-}
-
void Compiler::fillStartCache()
{
std::unordered_set<std::string> terminals {getTerminals(bnf)};
@@ -258,7 +251,13 @@ void Compiler::fillStartCache()
std::unordered_set<std::pair<std::string, index_t>, PairHash>& positions{it->second};
for (const auto& position: positions) {
- m_start_cache.insert(std::tuple<std::string, size_t, std::string>{position.first, position.second, terminal});
+ auto it_lut {m_match_lut.find({position.first, terminal})};
+ if (it_lut == m_match_lut.end()) { // add new list
+ m_match_lut[std::pair<std::string, std::string>{position.first, terminal}] = std::vector<size_t>{position.second};
+ } else { // extend list
+ it_lut->second.emplace_back(position.second);
+ }
+
if (done_set.find(position.first) == done_set.end()) {
next_set.insert(position.first);
done_set.insert(position.first);
@@ -281,7 +280,13 @@ void Compiler::fillStartCache()
do {
for (const auto& current_pos: current_set) {
for (const std::string& terminal: terminals) {
- m_start_cache.insert(std::tuple<std::string, size_t, std::string>{current_pos.first, current_pos.second, terminal});
+ auto it_lut {m_match_lut.find({current_pos.first, terminal})};
+ if (it_lut == m_match_lut.end()) { // add new list
+ m_match_lut[std::pair<std::string, std::string>{current_pos.first, terminal}] = std::vector<size_t>{current_pos.second};
+ } else { // extend list
+ it_lut->second.emplace_back(current_pos.second);
+ }
+
}
auto it {reversedPosFirst.find(current_pos.first)};
@@ -302,6 +307,10 @@ void Compiler::fillStartCache()
next_set.clear();
} while (!current_set.empty());
}
+
+ for (auto& x: m_match_lut) {
+ std::sort(x.second.begin(), x.second.end());
+ }
}
void Compiler::constructTree()
diff --git a/grammer.h b/grammer.h
index df7ff8d..9b72d3a 100644
--- a/grammer.h
+++ b/grammer.h
@@ -81,8 +81,7 @@ private:
return h0 ^ (h1 << 1) ^ (h2 << 2);
}
};
- std::unordered_set<std::tuple<std::string, size_t, std::string>, TupleHash> m_start_cache;
- bool canStartWith(const std::string& non_terminal, size_t variant, const std::string& terminal) const;
+ std::unordered_map<std::pair<std::string, std::string>, std::vector<size_t>, PairHashSS> m_match_lut;
void fillStartCache();
void constructTree();
diff --git a/minicc.h b/minicc.h
index d3135c6..92678a1 100644
--- a/minicc.h
+++ b/minicc.h
@@ -34,3 +34,12 @@ struct Token {
// For printing via Google Test
bool operator==(const Token &a, const Token &b);
std::ostream& operator<<(std::ostream& os, const Token& token);
+
+struct PairHashSS {
+ size_t operator()(const std::pair<std::string,std::string>& p) const noexcept
+ {
+ size_t h0 {std::hash<std::string>{}(p.first)};
+ size_t h1 {std::hash<std::string>{}(p.second)};
+ return h0 ^ (h1 << 1);
+ }
+};