From 79fbc8bf495770e4a8b7c66c46acf07f4e47e568 Mon Sep 17 00:00:00 2001 From: Roland Reichwein Date: Mon, 30 Mar 2020 18:33:01 +0200 Subject: Speed up compile --- Makefile | 2 +- bnf.cpp | 64 ++++++++++++++++++++++++++++++++++++---- bnf.h | 23 ++++++++++++--- cpp.cpp | 25 ++++++++++++---- cpp.h | 58 ++++++++++++++++++++---------------- grammer.cpp | 96 ++++++++++++++++++++++++++++++++++++++++++++++++++++-------- grammer.h | 26 ++++++++++++---- test-cpp.cpp | 20 ++++++++----- 8 files changed, 247 insertions(+), 67 deletions(-) diff --git a/Makefile b/Makefile index 210e788..41ce102 100644 --- a/Makefile +++ b/Makefile @@ -3,10 +3,10 @@ PROJECTNAME=minicc CXX=clang++-10 #CXX=g++-9 +#CXXFLAGS=-O2 -DNDEBUG CXXFLAGS=-O0 -g -D_DEBUG # -fprofile-instr-generate -fcoverage-mapping # gcc:--coverage -#CXXFLAGS=-O2 -DNDEBUG CXXFLAGS+= -Wall -I. diff --git a/bnf.cpp b/bnf.cpp index 32e1175..04a6be9 100644 --- a/bnf.cpp +++ b/bnf.cpp @@ -1,8 +1,8 @@ #include "bnf.h" -std::unordered_map> Reverse(BNF bnf) +std::unordered_map> Reverse(const BNF& bnf) { - std::unordered_map> result; + std::unordered_map> result; for (const auto& [from, to] : bnf) { for (const auto& list : to) { @@ -11,7 +11,7 @@ std::unordered_map> Reverse(BNF bnf) if (i != result.end()) // already present i->second.insert(from); else // new element - result.emplace(element, std::set{from}); + result.emplace(element, std::unordered_set{from}); } } } @@ -19,9 +19,9 @@ std::unordered_map> Reverse(BNF bnf) return result; } -std::unordered_map> reverseFirst(BNF bnf) +std::unordered_map> reverseFirst(const BNF& bnf) { - std::unordered_map> result; + std::unordered_map> result; for (const auto& [from, to] : bnf) { for (const auto& list : to) { @@ -31,7 +31,7 @@ std::unordered_map> reverseFirst(BNF bnf) if (i != result.end()) // already present i->second.insert(from); else // new element - result.emplace(element, std::set{from}); + result.emplace(element, std::unordered_set{from}); } } } @@ -73,3 +73,55 @@ bool isTerminal(const BNF& bnf, const std::string& symbol) return bnf.find(symbol) == bnf.end(); } +std::unordered_set getTerminals(const BNF& bnf) +{ + std::unordered_set result; + + for (const auto& [from, to] : bnf) { + for (const auto& list : to) { + for (const auto& element : list) { + if (isTerminal(bnf, element)) + result.insert(element); + } + } + } + + return result; +} + +std::unordered_set, PairHash> getEmptyPositions(const BNF& bnf) +{ + std::unordered_set, PairHash> result; + + for (const auto& [from, to] : bnf) { + for (size_t i = 0; i < to.size(); i++) { + const auto& list{to[i]}; + if (list.size() == 0) + result.insert({from, i}); + } + } + + return result; +} + +std::unordered_map, PairHash>> reversePosFirst(const BNF& bnf) +{ + std::unordered_map, PairHash>> result; + + for (const auto& [from, to] : bnf) { + for (size_t i = 0; i < to.size(); i++) { + const auto& list{to[i]}; + if (list.size() > 0) { + const auto& element{list[0]}; + auto it{result.find(element)}; + if (it != result.end()) // already present + it->second.insert({from, i}); + else // new element + result.emplace(element, std::unordered_set, PairHash>{{from, i}}); + } + } + } + + return result; +} + diff --git a/bnf.h b/bnf.h index 148b6d1..0430a2c 100644 --- a/bnf.h +++ b/bnf.h @@ -1,17 +1,32 @@ #pragma once +#include "minicc.h" + #include -#include -#include #include +#include +#include #include #include using BNF = std::unordered_map>>; -std::unordered_map> Reverse(BNF bnf); // unused now, remove? -std::unordered_map> reverseFirst(BNF bnf); +std::unordered_map> Reverse(const BNF& bnf); // unused now, remove? +std::unordered_map> reverseFirst(const BNF& bnf); BNF SubBNF(const BNF& bnf, const std::string& top); bool isTerminal(const BNF& bnf, const std::string& symbol); +std::unordered_set getTerminals(const BNF& bnf); + +struct PairHash { + size_t operator()(const std::pair& p) const noexcept + { + size_t h0 {std::hash{}(p.first)}; + size_t h1 {std::hash{}(p.second)}; + return h0 ^ (h1 << 1); + } +}; +std::unordered_set, PairHash> getEmptyPositions(const BNF& bnf); + +std::unordered_map, PairHash>> reversePosFirst(const BNF& bnf); // like reverseFirstPos, but including variant diff --git a/cpp.cpp b/cpp.cpp index ea40661..27f8570 100644 --- a/cpp.cpp +++ b/cpp.cpp @@ -10,7 +10,9 @@ #include #include +#include #include +#include using namespace Gram; @@ -222,13 +224,26 @@ std::vector CPP::analysis(const std::vector& tokens) return compiler.compile(tokens); } +namespace { + + CPP::map_type map_translation_unit { + {"top-level-declaration-seq", [](){}} + }; + +} // anonymous namespace + +void CPP::traverse(index_t node_id, map_type& map) +{ + // TODO +} + // Phase 7.c: Translate -void CPP::translate(const std::vector& tree) +void CPP::translate() { - if (tree.size() == 0) + if (m_nodes.size() == 0) throw std::runtime_error("ICE: Tree is empty"); - //traverse(i, ); + traverse(0, map_translation_unit); } // Phase 8: Instantiate objects @@ -259,8 +274,8 @@ void CPP::compile(const std::string& code) concatenate_strings(); auto tokens = tokens_from_pptokens(pp_tokens); - auto nodes = analysis(tokens); - translate(nodes); + m_nodes = analysis(tokens); + translate(); instantiate(); diff --git a/cpp.h b/cpp.h index 571182a..dfe09ca 100644 --- a/cpp.h +++ b/cpp.h @@ -9,31 +9,37 @@ class CPP { public: -CPP(); -~CPP(); - -std::string valueOfNode(index_t node_index, const std::vector& Tree); - -// phases of translation, according to standard -void source_charset_map(); // phase 1 -void backslash_escape(); // phase 2 -std::vector preprocessing_tokenize(const std::string& s); // phase 3 -void preprocess(); // phase 4 -void execution_charset_map(); // phase 5 -void concatenate_strings(); // phase 6 -std::vector tokens_from_pptokens(const std::vector& pp_tokens); // phase 7.a -std::vector analysis(const std::vector&); // phase 7.b -void translate(const std::vector& tree); // phase 7.c -void instantiate(); // phase 8 -void link(); // phase 9 - -// all phases of translation -void compile(const std::string& code); - -std::vector getCode(); -std::vector getData(); - + CPP(); + ~CPP(); + + std::string valueOfNode(index_t node_index, const std::vector& Tree); + + // phases of translation, according to standard + void source_charset_map(); // phase 1 + void backslash_escape(); // phase 2 + std::vector preprocessing_tokenize(const std::string& s); // phase 3 + void preprocess(); // phase 4 + void execution_charset_map(); // phase 5 + void concatenate_strings(); // phase 6 + std::vector tokens_from_pptokens(const std::vector& pp_tokens); // phase 7.a + std::vector analysis(const std::vector&); // phase 7.b + void translate(); // phase 7.c + void instantiate(); // phase 8 + void link(); // phase 9 + + // all phases of translation + void compile(const std::string& code); + + std::vector getCode(); + std::vector getData(); + + typedef std::unordered_map> map_type; + private: - std::string m_code; - std::vector m_charTokens; + std::string m_code; // input / start + std::vector m_charTokens; // result of phase 3 + std::vector m_nodes; // result of phase 7.b + + void traverse(index_t node_id, map_type& map); }; + diff --git a/grammer.cpp b/grammer.cpp index 8b6f01b..9ceed12 100644 --- a/grammer.cpp +++ b/grammer.cpp @@ -147,7 +147,7 @@ void Compiler::IncNodePosition(NodePosition& pos) throw std::runtime_error("ICE: No node expected at "s + std::to_string(pos.node_id) + "/"s + std::to_string(pos.child_pos)); } -size_t Compiler::minimumSymbolsNeeded(std::string symbol) +size_t Compiler::minimumSymbolsNeeded(const std::string& symbol) { if (isTerminal(bnf, symbol)) { return 1; @@ -173,7 +173,7 @@ size_t Compiler::minimumSymbolsNeeded(std::string symbol) } } -size_t Compiler::minimumSymbolsNeeded(std::vector symbol_list) +size_t Compiler::minimumSymbolsNeeded(const std::vector& symbol_list) { size_t result{0}; @@ -196,12 +196,6 @@ bool Compiler::match(std::vector symbol_list, size_t begin, size_t symbol_list.erase(symbol_list.begin()); } - // match terminal symbols at end - while (begin < end && symbol_list.size() > 0 && symbol_list.back() == tokens[end - 1].type) { - end--; - symbol_list.erase(symbol_list.end() - 1); - } - // matching of empty lists if (symbol_list.size() == 0) { if (begin == end) { // only match real empty list @@ -218,6 +212,8 @@ bool Compiler::match(std::vector symbol_list, size_t begin, size_t auto it{bnf.find(symbol_list.front())}; if (it != bnf.end()) { for (size_t i = 0; i < it->second.size(); i++) { // iterate over alternatives + if (!canStartWith(symbol_list.front(), i, tokens[begin].type)) // optimization + continue; AddNodeGuard guard(*this, i); std::vector list {it->second[i]}; list.insert(list.end(), symbol_list.begin() + 1, symbol_list.end()); @@ -240,6 +236,74 @@ bool Compiler::match(std::string symbol, size_t begin, size_t end) return match(symbol_list, begin, end); } +bool Compiler::canStartWith(const std::string& non_terminal, size_t variant, const std::string& terminal) const +{ + return m_start_cache.contains({non_terminal, variant, terminal}); +} + +void Compiler::fillStartCache() +{ + std::unordered_set terminals {getTerminals(bnf)}; + + { // follow terminal symbols + for (const std::string& terminal: terminals) { + std::unordered_set current_set{terminal}; + std::unordered_set done_set; + std::unordered_set next_set; + + do { + for (const auto& current_symbol: current_set) { + auto it {reversedPosFirst.find(current_symbol)}; + if (it != reversedPosFirst.end()) { + std::unordered_set, PairHash>& positions{it->second}; + + for (const auto& position: positions) { + m_start_cache.insert(std::tuple{position.first, position.second, terminal}); + if (done_set.find(position.first) == done_set.end()) { + next_set.insert(position.first); + done_set.insert(position.first); + } + } + } + } + + current_set = next_set; + next_set.clear(); + } while (!current_set.empty()); + } + } + + { // follow empty non-terminal symbols, and combine all found non-terminals with all terminals + std::unordered_set, PairHash> current_set {getEmptyPositions(bnf)}; + std::unordered_set, PairHash> done_set; + std::unordered_set, PairHash> next_set; + + do { + for (const auto& current_pos: current_set) { + for (const std::string& terminal: terminals) { + m_start_cache.insert(std::tuple{current_pos.first, current_pos.second, terminal}); + } + + auto it {reversedPosFirst.find(current_pos.first)}; + if (it != reversedPosFirst.end()) { + std::unordered_set, PairHash>& positions{it->second}; + + for (const auto& position: positions) { + if (done_set.find(position) == done_set.end()) { + next_set.insert(position); + done_set.insert(position); + } + } + } + + } + + current_set = next_set; + next_set.clear(); + } while (!current_set.empty()); + } +} + void Compiler::constructTree() { symbol_variants_it = symbol_variants.begin(); @@ -276,14 +340,17 @@ void Compiler::constructTree() } -Compiler::Compiler(BNF& bnf, const std::string& top): bnf(bnf), m_top(top), ReverseBNF{Reverse(bnf)}, reversedFirst{reverseFirst(bnf)} +Compiler::Compiler(BNF& bnf, const std::string& top) + : bnf(bnf) + , m_top(top) + //, ReverseBNF{Reverse(bnf)} + //, reversedFirst{reverseFirst(bnf)} + , reversedPosFirst{reversePosFirst(bnf)} { - //std::cout << "DEBUG: " << m_top << std::endl; - // // prepare helper cache (TODO: remove this ugly workaround for remaining bad marker elements) // - minimumSymbolsNeeded("translation-unit"); + (void) minimumSymbolsNeeded("translation-unit"); // remove bad marker elements auto it{m_min.begin()}; while (it != m_min.end()) { @@ -293,7 +360,10 @@ Compiler::Compiler(BNF& bnf, const std::string& top): bnf(bnf), m_top(top), Reve ++it; } } - minimumSymbolsNeeded("translation-unit"); + (void) minimumSymbolsNeeded("translation-unit"); + + // fill other cache + fillStartCache(); } std::vector Compiler::compile(std::vector p_tokens) diff --git a/grammer.h b/grammer.h index 88cb7b7..df7ff8d 100644 --- a/grammer.h +++ b/grammer.h @@ -5,6 +5,7 @@ #include #include +#include #include #include @@ -41,8 +42,9 @@ private: BNF &bnf; // not const for access via operator[] const std::string m_top; - std::unordered_map> ReverseBNF; // possible parent types of a given type; unused now: remove? - std::unordered_map> reversedFirst; // possible parent types of first childs of a given type + //std::unordered_map> ReverseBNF; // possible parent types of a given type; unused now: remove? + //std::unordered_map> reversedFirst; // possible parent types of first childs of a given type + std::unordered_map, PairHash>> reversedPosFirst; // possible parent types of first childs of a given type std::deque symbol_variants; decltype(symbol_variants)::iterator symbol_variants_it; @@ -63,12 +65,26 @@ private: void IncNodePosition(NodePosition& pos); // top-down algorithm - std::unordered_map m_min; // cache - size_t minimumSymbolsNeeded(std::string symbol); - size_t minimumSymbolsNeeded(std::vector symbol_list); + std::unordered_map m_min; // cache for MinimumSymbolsNeeded + size_t minimumSymbolsNeeded(const std::string& symbol); + size_t minimumSymbolsNeeded(const std::vector& symbol_list); bool match(std::string symbol, size_t begin, size_t end); bool match(std::vector symbol_list, size_t begin, size_t end); + // start / end cache + struct TupleHash { + size_t operator()(const std::tuple& t) const noexcept + { + size_t h0 {std::hash{}(std::get<0>(t))}; + size_t h1 {std::hash{}(std::get<1>(t))}; + size_t h2 {std::hash{}(std::get<2>(t))}; + return h0 ^ (h1 << 1) ^ (h2 << 2); + } + }; + std::unordered_set, TupleHash> m_start_cache; + bool canStartWith(const std::string& non_terminal, size_t variant, const std::string& terminal) const; + void fillStartCache(); + void constructTree(); std::vector m_symbol_list; index_t m_symbol_list_pos{}; diff --git a/test-cpp.cpp b/test-cpp.cpp index aabc4f4..8a9b7cb 100644 --- a/test-cpp.cpp +++ b/test-cpp.cpp @@ -24,13 +24,12 @@ class CppTest: public ::testing::Test { protected: CppTest() { - debug = true; + //debug = true; } ~CppTest() { } }; -#if 1 TEST_F(CppTest, preprocessing_tokenize) { CPP cpp; auto pp_tokens = cpp.preprocessing_tokenize("int main() { return 1; }"); @@ -45,7 +44,6 @@ TEST_F(CppTest, preprocessing_tokenize) { ASSERT_EQ(nodes.size(), 44); } -#endif TEST_F(CppTest, preprocessing_tokenize_compile_error) { CPP cpp; @@ -65,8 +63,16 @@ TEST_F(CppTest, preprocessing_tokenize_compile_error) { FAIL() << "Exception expected"; } -#if 0 -TEST(Cpp, translate) { - CPP::translate(); +TEST(Cpp, compile) { + CPP cpp; + + cpp.compile("int main() { return 1 + 1; }"); } -#endif + +TEST(Cpp, compile_2_times) { + CPP cpp; + + cpp.compile("int main() { return (1 + 2) * 2; }"); + cpp.compile("int main() { return 1 + 2 * 2; }"); +} + -- cgit v1.2.3