From 79fbc8bf495770e4a8b7c66c46acf07f4e47e568 Mon Sep 17 00:00:00 2001 From: Roland Reichwein Date: Mon, 30 Mar 2020 18:33:01 +0200 Subject: Speed up compile --- grammer.cpp | 96 ++++++++++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 83 insertions(+), 13 deletions(-) (limited to 'grammer.cpp') diff --git a/grammer.cpp b/grammer.cpp index 8b6f01b..9ceed12 100644 --- a/grammer.cpp +++ b/grammer.cpp @@ -147,7 +147,7 @@ void Compiler::IncNodePosition(NodePosition& pos) throw std::runtime_error("ICE: No node expected at "s + std::to_string(pos.node_id) + "/"s + std::to_string(pos.child_pos)); } -size_t Compiler::minimumSymbolsNeeded(std::string symbol) +size_t Compiler::minimumSymbolsNeeded(const std::string& symbol) { if (isTerminal(bnf, symbol)) { return 1; @@ -173,7 +173,7 @@ size_t Compiler::minimumSymbolsNeeded(std::string symbol) } } -size_t Compiler::minimumSymbolsNeeded(std::vector symbol_list) +size_t Compiler::minimumSymbolsNeeded(const std::vector& symbol_list) { size_t result{0}; @@ -196,12 +196,6 @@ bool Compiler::match(std::vector symbol_list, size_t begin, size_t symbol_list.erase(symbol_list.begin()); } - // match terminal symbols at end - while (begin < end && symbol_list.size() > 0 && symbol_list.back() == tokens[end - 1].type) { - end--; - symbol_list.erase(symbol_list.end() - 1); - } - // matching of empty lists if (symbol_list.size() == 0) { if (begin == end) { // only match real empty list @@ -218,6 +212,8 @@ bool Compiler::match(std::vector symbol_list, size_t begin, size_t auto it{bnf.find(symbol_list.front())}; if (it != bnf.end()) { for (size_t i = 0; i < it->second.size(); i++) { // iterate over alternatives + if (!canStartWith(symbol_list.front(), i, tokens[begin].type)) // optimization + continue; AddNodeGuard guard(*this, i); std::vector list {it->second[i]}; list.insert(list.end(), symbol_list.begin() + 1, symbol_list.end()); @@ -240,6 +236,74 @@ bool Compiler::match(std::string symbol, size_t begin, size_t end) return match(symbol_list, begin, end); } +bool Compiler::canStartWith(const std::string& non_terminal, size_t variant, const std::string& terminal) const +{ + return m_start_cache.contains({non_terminal, variant, terminal}); +} + +void Compiler::fillStartCache() +{ + std::unordered_set terminals {getTerminals(bnf)}; + + { // follow terminal symbols + for (const std::string& terminal: terminals) { + std::unordered_set current_set{terminal}; + std::unordered_set done_set; + std::unordered_set next_set; + + do { + for (const auto& current_symbol: current_set) { + auto it {reversedPosFirst.find(current_symbol)}; + if (it != reversedPosFirst.end()) { + std::unordered_set, PairHash>& positions{it->second}; + + for (const auto& position: positions) { + m_start_cache.insert(std::tuple{position.first, position.second, terminal}); + if (done_set.find(position.first) == done_set.end()) { + next_set.insert(position.first); + done_set.insert(position.first); + } + } + } + } + + current_set = next_set; + next_set.clear(); + } while (!current_set.empty()); + } + } + + { // follow empty non-terminal symbols, and combine all found non-terminals with all terminals + std::unordered_set, PairHash> current_set {getEmptyPositions(bnf)}; + std::unordered_set, PairHash> done_set; + std::unordered_set, PairHash> next_set; + + do { + for (const auto& current_pos: current_set) { + for (const std::string& terminal: terminals) { + m_start_cache.insert(std::tuple{current_pos.first, current_pos.second, terminal}); + } + + auto it {reversedPosFirst.find(current_pos.first)}; + if (it != reversedPosFirst.end()) { + std::unordered_set, PairHash>& positions{it->second}; + + for (const auto& position: positions) { + if (done_set.find(position) == done_set.end()) { + next_set.insert(position); + done_set.insert(position); + } + } + } + + } + + current_set = next_set; + next_set.clear(); + } while (!current_set.empty()); + } +} + void Compiler::constructTree() { symbol_variants_it = symbol_variants.begin(); @@ -276,14 +340,17 @@ void Compiler::constructTree() } -Compiler::Compiler(BNF& bnf, const std::string& top): bnf(bnf), m_top(top), ReverseBNF{Reverse(bnf)}, reversedFirst{reverseFirst(bnf)} +Compiler::Compiler(BNF& bnf, const std::string& top) + : bnf(bnf) + , m_top(top) + //, ReverseBNF{Reverse(bnf)} + //, reversedFirst{reverseFirst(bnf)} + , reversedPosFirst{reversePosFirst(bnf)} { - //std::cout << "DEBUG: " << m_top << std::endl; - // // prepare helper cache (TODO: remove this ugly workaround for remaining bad marker elements) // - minimumSymbolsNeeded("translation-unit"); + (void) minimumSymbolsNeeded("translation-unit"); // remove bad marker elements auto it{m_min.begin()}; while (it != m_min.end()) { @@ -293,7 +360,10 @@ Compiler::Compiler(BNF& bnf, const std::string& top): bnf(bnf), m_top(top), Reve ++it; } } - minimumSymbolsNeeded("translation-unit"); + (void) minimumSymbolsNeeded("translation-unit"); + + // fill other cache + fillStartCache(); } std::vector Compiler::compile(std::vector p_tokens) -- cgit v1.2.3