diff options
-rw-r--r-- | Makefile | 2 | ||||
-rw-r--r-- | bnf.cpp | 64 | ||||
-rw-r--r-- | bnf.h | 23 | ||||
-rw-r--r-- | cpp.cpp | 25 | ||||
-rw-r--r-- | cpp.h | 58 | ||||
-rw-r--r-- | grammer.cpp | 96 | ||||
-rw-r--r-- | grammer.h | 26 | ||||
-rw-r--r-- | test-cpp.cpp | 20 |
8 files changed, 247 insertions, 67 deletions
@@ -3,10 +3,10 @@ PROJECTNAME=minicc CXX=clang++-10 #CXX=g++-9 +#CXXFLAGS=-O2 -DNDEBUG CXXFLAGS=-O0 -g -D_DEBUG # -fprofile-instr-generate -fcoverage-mapping # gcc:--coverage -#CXXFLAGS=-O2 -DNDEBUG CXXFLAGS+= -Wall -I. @@ -1,8 +1,8 @@ #include "bnf.h" -std::unordered_map<std::string, std::set<std::string>> Reverse(BNF bnf) +std::unordered_map<std::string, std::unordered_set<std::string>> Reverse(const BNF& bnf) { - std::unordered_map<std::string, std::set<std::string>> result; + std::unordered_map<std::string, std::unordered_set<std::string>> result; for (const auto& [from, to] : bnf) { for (const auto& list : to) { @@ -11,7 +11,7 @@ std::unordered_map<std::string, std::set<std::string>> Reverse(BNF bnf) if (i != result.end()) // already present i->second.insert(from); else // new element - result.emplace(element, std::set{from}); + result.emplace(element, std::unordered_set{from}); } } } @@ -19,9 +19,9 @@ std::unordered_map<std::string, std::set<std::string>> Reverse(BNF bnf) return result; } -std::unordered_map<std::string, std::set<std::string>> reverseFirst(BNF bnf) +std::unordered_map<std::string, std::unordered_set<std::string>> reverseFirst(const BNF& bnf) { - std::unordered_map<std::string, std::set<std::string>> result; + std::unordered_map<std::string, std::unordered_set<std::string>> result; for (const auto& [from, to] : bnf) { for (const auto& list : to) { @@ -31,7 +31,7 @@ std::unordered_map<std::string, std::set<std::string>> reverseFirst(BNF bnf) if (i != result.end()) // already present i->second.insert(from); else // new element - result.emplace(element, std::set{from}); + result.emplace(element, std::unordered_set{from}); } } } @@ -73,3 +73,55 @@ bool isTerminal(const BNF& bnf, const std::string& symbol) return bnf.find(symbol) == bnf.end(); } +std::unordered_set<std::string> getTerminals(const BNF& bnf) +{ + std::unordered_set<std::string> result; + + for (const auto& [from, to] : bnf) { + for (const auto& list : to) { + for (const auto& element : list) { + if (isTerminal(bnf, element)) + result.insert(element); + } + } + } + + return result; +} + +std::unordered_set<std::pair<std::string, size_t>, PairHash> getEmptyPositions(const BNF& bnf) +{ + std::unordered_set<std::pair<std::string, size_t>, PairHash> result; + + for (const auto& [from, to] : bnf) { + for (size_t i = 0; i < to.size(); i++) { + const auto& list{to[i]}; + if (list.size() == 0) + result.insert({from, i}); + } + } + + return result; +} + +std::unordered_map<std::string, std::unordered_set<std::pair<std::string, index_t>, PairHash>> reversePosFirst(const BNF& bnf) +{ + std::unordered_map<std::string, std::unordered_set<std::pair<std::string, index_t>, PairHash>> result; + + for (const auto& [from, to] : bnf) { + for (size_t i = 0; i < to.size(); i++) { + const auto& list{to[i]}; + if (list.size() > 0) { + const auto& element{list[0]}; + auto it{result.find(element)}; + if (it != result.end()) // already present + it->second.insert({from, i}); + else // new element + result.emplace(element, std::unordered_set<std::pair<std::string, index_t>, PairHash>{{from, i}}); + } + } + } + + return result; +} + @@ -1,17 +1,32 @@ #pragma once +#include "minicc.h" + #include <deque> -#include <unordered_map> -#include <set> #include <string> +#include <unordered_map> +#include <unordered_set> #include <utility> #include <vector> using BNF = std::unordered_map<std::string, std::vector<std::vector<std::string>>>; -std::unordered_map<std::string, std::set<std::string>> Reverse(BNF bnf); // unused now, remove? -std::unordered_map<std::string, std::set<std::string>> reverseFirst(BNF bnf); +std::unordered_map<std::string, std::unordered_set<std::string>> Reverse(const BNF& bnf); // unused now, remove? +std::unordered_map<std::string, std::unordered_set<std::string>> reverseFirst(const BNF& bnf); BNF SubBNF(const BNF& bnf, const std::string& top); bool isTerminal(const BNF& bnf, const std::string& symbol); +std::unordered_set<std::string> getTerminals(const BNF& bnf); + +struct PairHash { + size_t operator()(const std::pair<std::string,size_t>& p) const noexcept + { + size_t h0 {std::hash<std::string>{}(p.first)}; + size_t h1 {std::hash<size_t >{}(p.second)}; + return h0 ^ (h1 << 1); + } +}; +std::unordered_set<std::pair<std::string, size_t>, PairHash> getEmptyPositions(const BNF& bnf); + +std::unordered_map<std::string, std::unordered_set<std::pair<std::string, index_t>, PairHash>> reversePosFirst(const BNF& bnf); // like reverseFirstPos, but including variant @@ -10,7 +10,9 @@ #include <gtest/gtest.h> #include <gmock/gmock.h> +#include <functional> #include <unordered_set> +#include <unordered_map> using namespace Gram; @@ -222,13 +224,26 @@ std::vector<Gram::TreeNode> CPP::analysis(const std::vector<Token>& tokens) return compiler.compile(tokens); } +namespace { + + CPP::map_type map_translation_unit { + {"top-level-declaration-seq", [](){}} + }; + +} // anonymous namespace + +void CPP::traverse(index_t node_id, map_type& map) +{ + // TODO +} + // Phase 7.c: Translate -void CPP::translate(const std::vector<Gram::TreeNode>& tree) +void CPP::translate() { - if (tree.size() == 0) + if (m_nodes.size() == 0) throw std::runtime_error("ICE: Tree is empty"); - //traverse(i, ); + traverse(0, map_translation_unit); } // Phase 8: Instantiate objects @@ -259,8 +274,8 @@ void CPP::compile(const std::string& code) concatenate_strings(); auto tokens = tokens_from_pptokens(pp_tokens); - auto nodes = analysis(tokens); - translate(nodes); + m_nodes = analysis(tokens); + translate(); instantiate(); @@ -9,31 +9,37 @@ class CPP { public: -CPP(); -~CPP(); - -std::string valueOfNode(index_t node_index, const std::vector<Gram::TreeNode>& Tree); - -// phases of translation, according to standard -void source_charset_map(); // phase 1 -void backslash_escape(); // phase 2 -std::vector<Token> preprocessing_tokenize(const std::string& s); // phase 3 -void preprocess(); // phase 4 -void execution_charset_map(); // phase 5 -void concatenate_strings(); // phase 6 -std::vector<Token> tokens_from_pptokens(const std::vector<Token>& pp_tokens); // phase 7.a -std::vector<Gram::TreeNode> analysis(const std::vector<Token>&); // phase 7.b -void translate(const std::vector<Gram::TreeNode>& tree); // phase 7.c -void instantiate(); // phase 8 -void link(); // phase 9 - -// all phases of translation -void compile(const std::string& code); - -std::vector<uint8_t> getCode(); -std::vector<uint8_t> getData(); - + CPP(); + ~CPP(); + + std::string valueOfNode(index_t node_index, const std::vector<Gram::TreeNode>& Tree); + + // phases of translation, according to standard + void source_charset_map(); // phase 1 + void backslash_escape(); // phase 2 + std::vector<Token> preprocessing_tokenize(const std::string& s); // phase 3 + void preprocess(); // phase 4 + void execution_charset_map(); // phase 5 + void concatenate_strings(); // phase 6 + std::vector<Token> tokens_from_pptokens(const std::vector<Token>& pp_tokens); // phase 7.a + std::vector<Gram::TreeNode> analysis(const std::vector<Token>&); // phase 7.b + void translate(); // phase 7.c + void instantiate(); // phase 8 + void link(); // phase 9 + + // all phases of translation + void compile(const std::string& code); + + std::vector<uint8_t> getCode(); + std::vector<uint8_t> getData(); + + typedef std::unordered_map<std::string, std::function<void()>> map_type; + private: - std::string m_code; - std::vector<Token> m_charTokens; + std::string m_code; // input / start + std::vector<Token> m_charTokens; // result of phase 3 + std::vector<Gram::TreeNode> m_nodes; // result of phase 7.b + + void traverse(index_t node_id, map_type& map); }; + diff --git a/grammer.cpp b/grammer.cpp index 8b6f01b..9ceed12 100644 --- a/grammer.cpp +++ b/grammer.cpp @@ -147,7 +147,7 @@ void Compiler::IncNodePosition(NodePosition& pos) throw std::runtime_error("ICE: No node expected at "s + std::to_string(pos.node_id) + "/"s + std::to_string(pos.child_pos)); } -size_t Compiler::minimumSymbolsNeeded(std::string symbol) +size_t Compiler::minimumSymbolsNeeded(const std::string& symbol) { if (isTerminal(bnf, symbol)) { return 1; @@ -173,7 +173,7 @@ size_t Compiler::minimumSymbolsNeeded(std::string symbol) } } -size_t Compiler::minimumSymbolsNeeded(std::vector<std::string> symbol_list) +size_t Compiler::minimumSymbolsNeeded(const std::vector<std::string>& symbol_list) { size_t result{0}; @@ -196,12 +196,6 @@ bool Compiler::match(std::vector<std::string> symbol_list, size_t begin, size_t symbol_list.erase(symbol_list.begin()); } - // match terminal symbols at end - while (begin < end && symbol_list.size() > 0 && symbol_list.back() == tokens[end - 1].type) { - end--; - symbol_list.erase(symbol_list.end() - 1); - } - // matching of empty lists if (symbol_list.size() == 0) { if (begin == end) { // only match real empty list @@ -218,6 +212,8 @@ bool Compiler::match(std::vector<std::string> symbol_list, size_t begin, size_t auto it{bnf.find(symbol_list.front())}; if (it != bnf.end()) { for (size_t i = 0; i < it->second.size(); i++) { // iterate over alternatives + if (!canStartWith(symbol_list.front(), i, tokens[begin].type)) // optimization + continue; AddNodeGuard guard(*this, i); std::vector<std::string> list {it->second[i]}; list.insert(list.end(), symbol_list.begin() + 1, symbol_list.end()); @@ -240,6 +236,74 @@ bool Compiler::match(std::string symbol, size_t begin, size_t end) return match(symbol_list, begin, end); } +bool Compiler::canStartWith(const std::string& non_terminal, size_t variant, const std::string& terminal) const +{ + return m_start_cache.contains({non_terminal, variant, terminal}); +} + +void Compiler::fillStartCache() +{ + std::unordered_set<std::string> terminals {getTerminals(bnf)}; + + { // follow terminal symbols + for (const std::string& terminal: terminals) { + std::unordered_set<std::string> current_set{terminal}; + std::unordered_set<std::string> done_set; + std::unordered_set<std::string> next_set; + + do { + for (const auto& current_symbol: current_set) { + auto it {reversedPosFirst.find(current_symbol)}; + if (it != reversedPosFirst.end()) { + std::unordered_set<std::pair<std::string, index_t>, PairHash>& positions{it->second}; + + for (const auto& position: positions) { + m_start_cache.insert(std::tuple<std::string, size_t, std::string>{position.first, position.second, terminal}); + if (done_set.find(position.first) == done_set.end()) { + next_set.insert(position.first); + done_set.insert(position.first); + } + } + } + } + + current_set = next_set; + next_set.clear(); + } while (!current_set.empty()); + } + } + + { // follow empty non-terminal symbols, and combine all found non-terminals with all terminals + std::unordered_set<std::pair<std::string, size_t>, PairHash> current_set {getEmptyPositions(bnf)}; + std::unordered_set<std::pair<std::string, size_t>, PairHash> done_set; + std::unordered_set<std::pair<std::string, size_t>, PairHash> next_set; + + do { + for (const auto& current_pos: current_set) { + for (const std::string& terminal: terminals) { + m_start_cache.insert(std::tuple<std::string, size_t, std::string>{current_pos.first, current_pos.second, terminal}); + } + + auto it {reversedPosFirst.find(current_pos.first)}; + if (it != reversedPosFirst.end()) { + std::unordered_set<std::pair<std::string, index_t>, PairHash>& positions{it->second}; + + for (const auto& position: positions) { + if (done_set.find(position) == done_set.end()) { + next_set.insert(position); + done_set.insert(position); + } + } + } + + } + + current_set = next_set; + next_set.clear(); + } while (!current_set.empty()); + } +} + void Compiler::constructTree() { symbol_variants_it = symbol_variants.begin(); @@ -276,14 +340,17 @@ void Compiler::constructTree() } -Compiler::Compiler(BNF& bnf, const std::string& top): bnf(bnf), m_top(top), ReverseBNF{Reverse(bnf)}, reversedFirst{reverseFirst(bnf)} +Compiler::Compiler(BNF& bnf, const std::string& top) + : bnf(bnf) + , m_top(top) + //, ReverseBNF{Reverse(bnf)} + //, reversedFirst{reverseFirst(bnf)} + , reversedPosFirst{reversePosFirst(bnf)} { - //std::cout << "DEBUG: " << m_top << std::endl; - // // prepare helper cache (TODO: remove this ugly workaround for remaining bad marker elements) // - minimumSymbolsNeeded("translation-unit"); + (void) minimumSymbolsNeeded("translation-unit"); // remove bad marker elements auto it{m_min.begin()}; while (it != m_min.end()) { @@ -293,7 +360,10 @@ Compiler::Compiler(BNF& bnf, const std::string& top): bnf(bnf), m_top(top), Reve ++it; } } - minimumSymbolsNeeded("translation-unit"); + (void) minimumSymbolsNeeded("translation-unit"); + + // fill other cache + fillStartCache(); } std::vector<TreeNode> Compiler::compile(std::vector<Token> p_tokens) @@ -5,6 +5,7 @@ #include <cstdint> #include <deque> +#include <tuple> #include <unordered_set> #include <utility> @@ -41,8 +42,9 @@ private: BNF &bnf; // not const for access via operator[] const std::string m_top; - std::unordered_map<std::string, std::set<std::string>> ReverseBNF; // possible parent types of a given type; unused now: remove? - std::unordered_map<std::string, std::set<std::string>> reversedFirst; // possible parent types of first childs of a given type + //std::unordered_map<std::string, std::unordered_set<std::string>> ReverseBNF; // possible parent types of a given type; unused now: remove? + //std::unordered_map<std::string, std::unordered_set<std::string>> reversedFirst; // possible parent types of first childs of a given type + std::unordered_map<std::string, std::unordered_set<std::pair<std::string, index_t>, PairHash>> reversedPosFirst; // possible parent types of first childs of a given type std::deque<index_t> symbol_variants; decltype(symbol_variants)::iterator symbol_variants_it; @@ -63,12 +65,26 @@ private: void IncNodePosition(NodePosition& pos); // top-down algorithm - std::unordered_map<std::string, size_t> m_min; // cache - size_t minimumSymbolsNeeded(std::string symbol); - size_t minimumSymbolsNeeded(std::vector<std::string> symbol_list); + std::unordered_map<std::string, size_t> m_min; // cache for MinimumSymbolsNeeded + size_t minimumSymbolsNeeded(const std::string& symbol); + size_t minimumSymbolsNeeded(const std::vector<std::string>& symbol_list); bool match(std::string symbol, size_t begin, size_t end); bool match(std::vector<std::string> symbol_list, size_t begin, size_t end); + // start / end cache + struct TupleHash { + size_t operator()(const std::tuple<std::string,size_t,std::string>& t) const noexcept + { + size_t h0 {std::hash<std::string>{}(std::get<0>(t))}; + size_t h1 {std::hash<size_t >{}(std::get<1>(t))}; + size_t h2 {std::hash<std::string>{}(std::get<2>(t))}; + return h0 ^ (h1 << 1) ^ (h2 << 2); + } + }; + std::unordered_set<std::tuple<std::string, size_t, std::string>, TupleHash> m_start_cache; + bool canStartWith(const std::string& non_terminal, size_t variant, const std::string& terminal) const; + void fillStartCache(); + void constructTree(); std::vector<std::string> m_symbol_list; index_t m_symbol_list_pos{}; diff --git a/test-cpp.cpp b/test-cpp.cpp index aabc4f4..8a9b7cb 100644 --- a/test-cpp.cpp +++ b/test-cpp.cpp @@ -24,13 +24,12 @@ class CppTest: public ::testing::Test { protected: CppTest() { - debug = true; + //debug = true; } ~CppTest() { } }; -#if 1 TEST_F(CppTest, preprocessing_tokenize) { CPP cpp; auto pp_tokens = cpp.preprocessing_tokenize("int main() { return 1; }"); @@ -45,7 +44,6 @@ TEST_F(CppTest, preprocessing_tokenize) { ASSERT_EQ(nodes.size(), 44); } -#endif TEST_F(CppTest, preprocessing_tokenize_compile_error) { CPP cpp; @@ -65,8 +63,16 @@ TEST_F(CppTest, preprocessing_tokenize_compile_error) { FAIL() << "Exception expected"; } -#if 0 -TEST(Cpp, translate) { - CPP::translate(); +TEST(Cpp, compile) { + CPP cpp; + + cpp.compile("int main() { return 1 + 1; }"); } -#endif + +TEST(Cpp, compile_2_times) { + CPP cpp; + + cpp.compile("int main() { return (1 + 2) * 2; }"); + cpp.compile("int main() { return 1 + 2 * 2; }"); +} + |