diff options
author | Roland Reichwein <mail@reichwein.it> | 2020-03-31 18:46:49 +0200 |
---|---|---|
committer | Roland Reichwein <mail@reichwein.it> | 2020-03-31 18:46:49 +0200 |
commit | cd6c436cb70c4323c7d14ebd74f89bb0914649f2 (patch) | |
tree | 1bdb2a8409b2b370bcaa5e1b6cf091c0c8e1f779 /grammer.cpp | |
parent | bed46a062c442656b1bc16d965e12405297029d3 (diff) |
Optimization: Replace head recursion by tail recursion in matching
Diffstat (limited to 'grammer.cpp')
-rw-r--r-- | grammer.cpp | 58 |
1 files changed, 34 insertions, 24 deletions
diff --git a/grammer.cpp b/grammer.cpp index 50617ce..d7afaef 100644 --- a/grammer.cpp +++ b/grammer.cpp @@ -92,7 +92,7 @@ void Compiler::DumpTree() index_t Compiler::AddNode(const std::string& type, index_t variant, NodePosition pos) { - auto& list { bnf[type][variant]}; + auto& list { m_bnf[type][variant]}; index_t node_id{nodes.size()}; if (nodes.size() > 0) nodes[pos.node_id].child_ids[pos.child_pos] = node_id; @@ -149,7 +149,7 @@ void Compiler::IncNodePosition(NodePosition& pos) size_t Compiler::minimumSymbolsNeeded(const std::string& symbol) { - if (isTerminal(bnf, symbol)) { + if (isTerminal(m_bnf, symbol)) { return 1; } else { auto it_min{m_min.find(symbol)}; @@ -157,8 +157,8 @@ size_t Compiler::minimumSymbolsNeeded(const std::string& symbol) return it_min->second; m_min[symbol] = std::numeric_limits<size_t>::max(); - auto it{bnf.find(symbol)}; - if (it != bnf.end()) { + auto it{m_bnf.find(symbol)}; + if (it != m_bnf.end()) { size_t minimum{std::numeric_limits<size_t>::max()}; for (const auto& list: it->second) { @@ -196,7 +196,7 @@ bool Compiler::match(std::vector<std::string> symbol_list, size_t begin, size_t symbol_list.erase(symbol_list.begin()); } - // matching of empty lists + // matching of empty list in non-terminals if (symbol_list.size() == 0) { if (begin == end) { // only match real empty list // this is the point of the final match @@ -206,6 +206,19 @@ bool Compiler::match(std::vector<std::string> symbol_list, size_t begin, size_t return false; } + // matching of empty list in terminals + if (begin == end) { + const auto& symbol{symbol_list.front()}; + auto it{m_empty_lut.find(symbol)}; + if (it == m_empty_lut.end()) // can't match empty tokens list with this nt-symbol + return false; + + AddNodeGuard guard(*this, it->second); + std::vector<std::string> list {m_bnf[symbol][it->second]}; + list.insert(list.end(), symbol_list.begin() + 1, symbol_list.end()); + return match(list, begin, end); + } + // now, symbol_list has size > 0 and contains non-terminal symbols at start and end // resolve first symbol @@ -213,12 +226,11 @@ bool Compiler::match(std::vector<std::string> symbol_list, size_t begin, size_t if (it != m_match_lut.end()) { for (size_t i: it->second) { AddNodeGuard guard(*this, i); - std::vector<std::string> list {bnf[symbol_list.front()][i]}; + std::vector<std::string> list {m_bnf[symbol_list.front()][i]}; list.insert(list.end(), symbol_list.begin() + 1, symbol_list.end()); if (minimumSymbolsNeeded(list) > end - begin) // stop recursion continue; - // TODO: recurse last? if (match(list, begin, end)) return true; } @@ -236,7 +248,7 @@ bool Compiler::match(std::string symbol, size_t begin, size_t end) void Compiler::fillStartCache() { - std::unordered_set<std::string> terminals {getTerminals(bnf)}; + std::unordered_set<std::string> terminals {getTerminals(m_bnf)}; { // follow terminal symbols for (const std::string& terminal: terminals) { @@ -273,12 +285,13 @@ void Compiler::fillStartCache() } { // follow empty non-terminal symbols, and combine all found non-terminals with all terminals - std::unordered_set<std::pair<std::string, size_t>, PairHash> current_set {getEmptyPositions(bnf)}; + std::unordered_set<std::pair<std::string, size_t>, PairHash> current_set {getEmptyPositions(m_bnf)}; std::unordered_set<std::pair<std::string, size_t>, PairHash> done_set; std::unordered_set<std::pair<std::string, size_t>, PairHash> next_set; do { for (const auto& current_pos: current_set) { + m_empty_lut[current_pos.first] = current_pos.second; for (const std::string& terminal: terminals) { auto it_lut {m_match_lut.find({current_pos.first, terminal})}; if (it_lut == m_match_lut.end()) { // add new list @@ -286,7 +299,6 @@ void Compiler::fillStartCache() } else { // extend list it_lut->second.emplace_back(current_pos.second); } - } auto it {reversedPosFirst.find(current_pos.first)}; @@ -317,24 +329,24 @@ void Compiler::constructTree() { symbol_variants_it = symbol_variants.begin(); - m_symbol_list = {m_top}; - m_symbol_list_pos = 0; + std::vector<std::string> symbol_list{m_top}; + index_t symbol_list_pos{0}; NodePosition tree_pos; - while (m_symbol_list_pos < m_symbol_list.size()) { - std::string symbol{m_symbol_list[m_symbol_list_pos]}; + while (symbol_list_pos < symbol_list.size()) { + std::string symbol{symbol_list[symbol_list_pos]}; - if (isTerminal(bnf, symbol)) { + if (isTerminal(m_bnf, symbol)) { // Advance terminal symbol - nodes[tree_pos.node_id].child_ids[tree_pos.child_pos] = ChildIdFromTokenId(m_symbol_list_pos); + nodes[tree_pos.node_id].child_ids[tree_pos.child_pos] = ChildIdFromTokenId(symbol_list_pos); IncNodePosition(tree_pos); - m_symbol_list_pos++; + symbol_list_pos++; } else { // Replace non-terminal symbol - m_symbol_list.erase(m_symbol_list.begin() + m_symbol_list_pos); - std::vector<std::string> list {bnf[symbol][*symbol_variants_it]}; - m_symbol_list.insert(m_symbol_list.begin() + m_symbol_list_pos, list.begin(), list.end()); + symbol_list.erase(symbol_list.begin() + symbol_list_pos); + std::vector<std::string> list {m_bnf[symbol][*symbol_variants_it]}; + symbol_list.insert(symbol_list.begin() + symbol_list_pos, list.begin(), list.end()); index_t node_id {AddNode(symbol, *symbol_variants_it, tree_pos)}; @@ -350,11 +362,9 @@ void Compiler::constructTree() } Compiler::Compiler(BNF& bnf, const std::string& top) - : bnf(bnf) + : m_bnf {removeHeadRecursion(bnf)} , m_top(top) - //, ReverseBNF{Reverse(bnf)} - //, reversedFirst{reverseFirst(bnf)} - , reversedPosFirst{reversePosFirst(bnf)} + , reversedPosFirst{reversePosFirst(m_bnf)} { // // prepare helper cache (TODO: remove this ugly workaround for remaining bad marker elements) |