summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--Makefile2
-rw-r--r--bnf.cpp64
-rw-r--r--bnf.h23
-rw-r--r--cpp.cpp25
-rw-r--r--cpp.h58
-rw-r--r--grammer.cpp96
-rw-r--r--grammer.h26
-rw-r--r--test-cpp.cpp20
8 files changed, 247 insertions, 67 deletions
diff --git a/Makefile b/Makefile
index 210e788..41ce102 100644
--- a/Makefile
+++ b/Makefile
@@ -3,10 +3,10 @@ PROJECTNAME=minicc
CXX=clang++-10
#CXX=g++-9
+#CXXFLAGS=-O2 -DNDEBUG
CXXFLAGS=-O0 -g -D_DEBUG
# -fprofile-instr-generate -fcoverage-mapping
# gcc:--coverage
-#CXXFLAGS=-O2 -DNDEBUG
CXXFLAGS+= -Wall -I.
diff --git a/bnf.cpp b/bnf.cpp
index 32e1175..04a6be9 100644
--- a/bnf.cpp
+++ b/bnf.cpp
@@ -1,8 +1,8 @@
#include "bnf.h"
-std::unordered_map<std::string, std::set<std::string>> Reverse(BNF bnf)
+std::unordered_map<std::string, std::unordered_set<std::string>> Reverse(const BNF& bnf)
{
- std::unordered_map<std::string, std::set<std::string>> result;
+ std::unordered_map<std::string, std::unordered_set<std::string>> result;
for (const auto& [from, to] : bnf) {
for (const auto& list : to) {
@@ -11,7 +11,7 @@ std::unordered_map<std::string, std::set<std::string>> Reverse(BNF bnf)
if (i != result.end()) // already present
i->second.insert(from);
else // new element
- result.emplace(element, std::set{from});
+ result.emplace(element, std::unordered_set{from});
}
}
}
@@ -19,9 +19,9 @@ std::unordered_map<std::string, std::set<std::string>> Reverse(BNF bnf)
return result;
}
-std::unordered_map<std::string, std::set<std::string>> reverseFirst(BNF bnf)
+std::unordered_map<std::string, std::unordered_set<std::string>> reverseFirst(const BNF& bnf)
{
- std::unordered_map<std::string, std::set<std::string>> result;
+ std::unordered_map<std::string, std::unordered_set<std::string>> result;
for (const auto& [from, to] : bnf) {
for (const auto& list : to) {
@@ -31,7 +31,7 @@ std::unordered_map<std::string, std::set<std::string>> reverseFirst(BNF bnf)
if (i != result.end()) // already present
i->second.insert(from);
else // new element
- result.emplace(element, std::set{from});
+ result.emplace(element, std::unordered_set{from});
}
}
}
@@ -73,3 +73,55 @@ bool isTerminal(const BNF& bnf, const std::string& symbol)
return bnf.find(symbol) == bnf.end();
}
+std::unordered_set<std::string> getTerminals(const BNF& bnf)
+{
+ std::unordered_set<std::string> result;
+
+ for (const auto& [from, to] : bnf) {
+ for (const auto& list : to) {
+ for (const auto& element : list) {
+ if (isTerminal(bnf, element))
+ result.insert(element);
+ }
+ }
+ }
+
+ return result;
+}
+
+std::unordered_set<std::pair<std::string, size_t>, PairHash> getEmptyPositions(const BNF& bnf)
+{
+ std::unordered_set<std::pair<std::string, size_t>, PairHash> result;
+
+ for (const auto& [from, to] : bnf) {
+ for (size_t i = 0; i < to.size(); i++) {
+ const auto& list{to[i]};
+ if (list.size() == 0)
+ result.insert({from, i});
+ }
+ }
+
+ return result;
+}
+
+std::unordered_map<std::string, std::unordered_set<std::pair<std::string, index_t>, PairHash>> reversePosFirst(const BNF& bnf)
+{
+ std::unordered_map<std::string, std::unordered_set<std::pair<std::string, index_t>, PairHash>> result;
+
+ for (const auto& [from, to] : bnf) {
+ for (size_t i = 0; i < to.size(); i++) {
+ const auto& list{to[i]};
+ if (list.size() > 0) {
+ const auto& element{list[0]};
+ auto it{result.find(element)};
+ if (it != result.end()) // already present
+ it->second.insert({from, i});
+ else // new element
+ result.emplace(element, std::unordered_set<std::pair<std::string, index_t>, PairHash>{{from, i}});
+ }
+ }
+ }
+
+ return result;
+}
+
diff --git a/bnf.h b/bnf.h
index 148b6d1..0430a2c 100644
--- a/bnf.h
+++ b/bnf.h
@@ -1,17 +1,32 @@
#pragma once
+#include "minicc.h"
+
#include <deque>
-#include <unordered_map>
-#include <set>
#include <string>
+#include <unordered_map>
+#include <unordered_set>
#include <utility>
#include <vector>
using BNF = std::unordered_map<std::string, std::vector<std::vector<std::string>>>;
-std::unordered_map<std::string, std::set<std::string>> Reverse(BNF bnf); // unused now, remove?
-std::unordered_map<std::string, std::set<std::string>> reverseFirst(BNF bnf);
+std::unordered_map<std::string, std::unordered_set<std::string>> Reverse(const BNF& bnf); // unused now, remove?
+std::unordered_map<std::string, std::unordered_set<std::string>> reverseFirst(const BNF& bnf);
BNF SubBNF(const BNF& bnf, const std::string& top);
bool isTerminal(const BNF& bnf, const std::string& symbol);
+std::unordered_set<std::string> getTerminals(const BNF& bnf);
+
+struct PairHash {
+ size_t operator()(const std::pair<std::string,size_t>& p) const noexcept
+ {
+ size_t h0 {std::hash<std::string>{}(p.first)};
+ size_t h1 {std::hash<size_t >{}(p.second)};
+ return h0 ^ (h1 << 1);
+ }
+};
+std::unordered_set<std::pair<std::string, size_t>, PairHash> getEmptyPositions(const BNF& bnf);
+
+std::unordered_map<std::string, std::unordered_set<std::pair<std::string, index_t>, PairHash>> reversePosFirst(const BNF& bnf); // like reverseFirstPos, but including variant
diff --git a/cpp.cpp b/cpp.cpp
index ea40661..27f8570 100644
--- a/cpp.cpp
+++ b/cpp.cpp
@@ -10,7 +10,9 @@
#include <gtest/gtest.h>
#include <gmock/gmock.h>
+#include <functional>
#include <unordered_set>
+#include <unordered_map>
using namespace Gram;
@@ -222,13 +224,26 @@ std::vector<Gram::TreeNode> CPP::analysis(const std::vector<Token>& tokens)
return compiler.compile(tokens);
}
+namespace {
+
+ CPP::map_type map_translation_unit {
+ {"top-level-declaration-seq", [](){}}
+ };
+
+} // anonymous namespace
+
+void CPP::traverse(index_t node_id, map_type& map)
+{
+ // TODO
+}
+
// Phase 7.c: Translate
-void CPP::translate(const std::vector<Gram::TreeNode>& tree)
+void CPP::translate()
{
- if (tree.size() == 0)
+ if (m_nodes.size() == 0)
throw std::runtime_error("ICE: Tree is empty");
- //traverse(i, );
+ traverse(0, map_translation_unit);
}
// Phase 8: Instantiate objects
@@ -259,8 +274,8 @@ void CPP::compile(const std::string& code)
concatenate_strings();
auto tokens = tokens_from_pptokens(pp_tokens);
- auto nodes = analysis(tokens);
- translate(nodes);
+ m_nodes = analysis(tokens);
+ translate();
instantiate();
diff --git a/cpp.h b/cpp.h
index 571182a..dfe09ca 100644
--- a/cpp.h
+++ b/cpp.h
@@ -9,31 +9,37 @@ class CPP {
public:
-CPP();
-~CPP();
-
-std::string valueOfNode(index_t node_index, const std::vector<Gram::TreeNode>& Tree);
-
-// phases of translation, according to standard
-void source_charset_map(); // phase 1
-void backslash_escape(); // phase 2
-std::vector<Token> preprocessing_tokenize(const std::string& s); // phase 3
-void preprocess(); // phase 4
-void execution_charset_map(); // phase 5
-void concatenate_strings(); // phase 6
-std::vector<Token> tokens_from_pptokens(const std::vector<Token>& pp_tokens); // phase 7.a
-std::vector<Gram::TreeNode> analysis(const std::vector<Token>&); // phase 7.b
-void translate(const std::vector<Gram::TreeNode>& tree); // phase 7.c
-void instantiate(); // phase 8
-void link(); // phase 9
-
-// all phases of translation
-void compile(const std::string& code);
-
-std::vector<uint8_t> getCode();
-std::vector<uint8_t> getData();
-
+ CPP();
+ ~CPP();
+
+ std::string valueOfNode(index_t node_index, const std::vector<Gram::TreeNode>& Tree);
+
+ // phases of translation, according to standard
+ void source_charset_map(); // phase 1
+ void backslash_escape(); // phase 2
+ std::vector<Token> preprocessing_tokenize(const std::string& s); // phase 3
+ void preprocess(); // phase 4
+ void execution_charset_map(); // phase 5
+ void concatenate_strings(); // phase 6
+ std::vector<Token> tokens_from_pptokens(const std::vector<Token>& pp_tokens); // phase 7.a
+ std::vector<Gram::TreeNode> analysis(const std::vector<Token>&); // phase 7.b
+ void translate(); // phase 7.c
+ void instantiate(); // phase 8
+ void link(); // phase 9
+
+ // all phases of translation
+ void compile(const std::string& code);
+
+ std::vector<uint8_t> getCode();
+ std::vector<uint8_t> getData();
+
+ typedef std::unordered_map<std::string, std::function<void()>> map_type;
+
private:
- std::string m_code;
- std::vector<Token> m_charTokens;
+ std::string m_code; // input / start
+ std::vector<Token> m_charTokens; // result of phase 3
+ std::vector<Gram::TreeNode> m_nodes; // result of phase 7.b
+
+ void traverse(index_t node_id, map_type& map);
};
+
diff --git a/grammer.cpp b/grammer.cpp
index 8b6f01b..9ceed12 100644
--- a/grammer.cpp
+++ b/grammer.cpp
@@ -147,7 +147,7 @@ void Compiler::IncNodePosition(NodePosition& pos)
throw std::runtime_error("ICE: No node expected at "s + std::to_string(pos.node_id) + "/"s + std::to_string(pos.child_pos));
}
-size_t Compiler::minimumSymbolsNeeded(std::string symbol)
+size_t Compiler::minimumSymbolsNeeded(const std::string& symbol)
{
if (isTerminal(bnf, symbol)) {
return 1;
@@ -173,7 +173,7 @@ size_t Compiler::minimumSymbolsNeeded(std::string symbol)
}
}
-size_t Compiler::minimumSymbolsNeeded(std::vector<std::string> symbol_list)
+size_t Compiler::minimumSymbolsNeeded(const std::vector<std::string>& symbol_list)
{
size_t result{0};
@@ -196,12 +196,6 @@ bool Compiler::match(std::vector<std::string> symbol_list, size_t begin, size_t
symbol_list.erase(symbol_list.begin());
}
- // match terminal symbols at end
- while (begin < end && symbol_list.size() > 0 && symbol_list.back() == tokens[end - 1].type) {
- end--;
- symbol_list.erase(symbol_list.end() - 1);
- }
-
// matching of empty lists
if (symbol_list.size() == 0) {
if (begin == end) { // only match real empty list
@@ -218,6 +212,8 @@ bool Compiler::match(std::vector<std::string> symbol_list, size_t begin, size_t
auto it{bnf.find(symbol_list.front())};
if (it != bnf.end()) {
for (size_t i = 0; i < it->second.size(); i++) { // iterate over alternatives
+ if (!canStartWith(symbol_list.front(), i, tokens[begin].type)) // optimization
+ continue;
AddNodeGuard guard(*this, i);
std::vector<std::string> list {it->second[i]};
list.insert(list.end(), symbol_list.begin() + 1, symbol_list.end());
@@ -240,6 +236,74 @@ bool Compiler::match(std::string symbol, size_t begin, size_t end)
return match(symbol_list, begin, end);
}
+bool Compiler::canStartWith(const std::string& non_terminal, size_t variant, const std::string& terminal) const
+{
+ return m_start_cache.contains({non_terminal, variant, terminal});
+}
+
+void Compiler::fillStartCache()
+{
+ std::unordered_set<std::string> terminals {getTerminals(bnf)};
+
+ { // follow terminal symbols
+ for (const std::string& terminal: terminals) {
+ std::unordered_set<std::string> current_set{terminal};
+ std::unordered_set<std::string> done_set;
+ std::unordered_set<std::string> next_set;
+
+ do {
+ for (const auto& current_symbol: current_set) {
+ auto it {reversedPosFirst.find(current_symbol)};
+ if (it != reversedPosFirst.end()) {
+ std::unordered_set<std::pair<std::string, index_t>, PairHash>& positions{it->second};
+
+ for (const auto& position: positions) {
+ m_start_cache.insert(std::tuple<std::string, size_t, std::string>{position.first, position.second, terminal});
+ if (done_set.find(position.first) == done_set.end()) {
+ next_set.insert(position.first);
+ done_set.insert(position.first);
+ }
+ }
+ }
+ }
+
+ current_set = next_set;
+ next_set.clear();
+ } while (!current_set.empty());
+ }
+ }
+
+ { // follow empty non-terminal symbols, and combine all found non-terminals with all terminals
+ std::unordered_set<std::pair<std::string, size_t>, PairHash> current_set {getEmptyPositions(bnf)};
+ std::unordered_set<std::pair<std::string, size_t>, PairHash> done_set;
+ std::unordered_set<std::pair<std::string, size_t>, PairHash> next_set;
+
+ do {
+ for (const auto& current_pos: current_set) {
+ for (const std::string& terminal: terminals) {
+ m_start_cache.insert(std::tuple<std::string, size_t, std::string>{current_pos.first, current_pos.second, terminal});
+ }
+
+ auto it {reversedPosFirst.find(current_pos.first)};
+ if (it != reversedPosFirst.end()) {
+ std::unordered_set<std::pair<std::string, index_t>, PairHash>& positions{it->second};
+
+ for (const auto& position: positions) {
+ if (done_set.find(position) == done_set.end()) {
+ next_set.insert(position);
+ done_set.insert(position);
+ }
+ }
+ }
+
+ }
+
+ current_set = next_set;
+ next_set.clear();
+ } while (!current_set.empty());
+ }
+}
+
void Compiler::constructTree()
{
symbol_variants_it = symbol_variants.begin();
@@ -276,14 +340,17 @@ void Compiler::constructTree()
}
-Compiler::Compiler(BNF& bnf, const std::string& top): bnf(bnf), m_top(top), ReverseBNF{Reverse(bnf)}, reversedFirst{reverseFirst(bnf)}
+Compiler::Compiler(BNF& bnf, const std::string& top)
+ : bnf(bnf)
+ , m_top(top)
+ //, ReverseBNF{Reverse(bnf)}
+ //, reversedFirst{reverseFirst(bnf)}
+ , reversedPosFirst{reversePosFirst(bnf)}
{
- //std::cout << "DEBUG: " << m_top << std::endl;
-
//
// prepare helper cache (TODO: remove this ugly workaround for remaining bad marker elements)
//
- minimumSymbolsNeeded("translation-unit");
+ (void) minimumSymbolsNeeded("translation-unit");
// remove bad marker elements
auto it{m_min.begin()};
while (it != m_min.end()) {
@@ -293,7 +360,10 @@ Compiler::Compiler(BNF& bnf, const std::string& top): bnf(bnf), m_top(top), Reve
++it;
}
}
- minimumSymbolsNeeded("translation-unit");
+ (void) minimumSymbolsNeeded("translation-unit");
+
+ // fill other cache
+ fillStartCache();
}
std::vector<TreeNode> Compiler::compile(std::vector<Token> p_tokens)
diff --git a/grammer.h b/grammer.h
index 88cb7b7..df7ff8d 100644
--- a/grammer.h
+++ b/grammer.h
@@ -5,6 +5,7 @@
#include <cstdint>
#include <deque>
+#include <tuple>
#include <unordered_set>
#include <utility>
@@ -41,8 +42,9 @@ private:
BNF &bnf; // not const for access via operator[]
const std::string m_top;
- std::unordered_map<std::string, std::set<std::string>> ReverseBNF; // possible parent types of a given type; unused now: remove?
- std::unordered_map<std::string, std::set<std::string>> reversedFirst; // possible parent types of first childs of a given type
+ //std::unordered_map<std::string, std::unordered_set<std::string>> ReverseBNF; // possible parent types of a given type; unused now: remove?
+ //std::unordered_map<std::string, std::unordered_set<std::string>> reversedFirst; // possible parent types of first childs of a given type
+ std::unordered_map<std::string, std::unordered_set<std::pair<std::string, index_t>, PairHash>> reversedPosFirst; // possible parent types of first childs of a given type
std::deque<index_t> symbol_variants;
decltype(symbol_variants)::iterator symbol_variants_it;
@@ -63,12 +65,26 @@ private:
void IncNodePosition(NodePosition& pos);
// top-down algorithm
- std::unordered_map<std::string, size_t> m_min; // cache
- size_t minimumSymbolsNeeded(std::string symbol);
- size_t minimumSymbolsNeeded(std::vector<std::string> symbol_list);
+ std::unordered_map<std::string, size_t> m_min; // cache for MinimumSymbolsNeeded
+ size_t minimumSymbolsNeeded(const std::string& symbol);
+ size_t minimumSymbolsNeeded(const std::vector<std::string>& symbol_list);
bool match(std::string symbol, size_t begin, size_t end);
bool match(std::vector<std::string> symbol_list, size_t begin, size_t end);
+ // start / end cache
+ struct TupleHash {
+ size_t operator()(const std::tuple<std::string,size_t,std::string>& t) const noexcept
+ {
+ size_t h0 {std::hash<std::string>{}(std::get<0>(t))};
+ size_t h1 {std::hash<size_t >{}(std::get<1>(t))};
+ size_t h2 {std::hash<std::string>{}(std::get<2>(t))};
+ return h0 ^ (h1 << 1) ^ (h2 << 2);
+ }
+ };
+ std::unordered_set<std::tuple<std::string, size_t, std::string>, TupleHash> m_start_cache;
+ bool canStartWith(const std::string& non_terminal, size_t variant, const std::string& terminal) const;
+ void fillStartCache();
+
void constructTree();
std::vector<std::string> m_symbol_list;
index_t m_symbol_list_pos{};
diff --git a/test-cpp.cpp b/test-cpp.cpp
index aabc4f4..8a9b7cb 100644
--- a/test-cpp.cpp
+++ b/test-cpp.cpp
@@ -24,13 +24,12 @@ class CppTest: public ::testing::Test
{
protected:
CppTest() {
- debug = true;
+ //debug = true;
}
~CppTest() {
}
};
-#if 1
TEST_F(CppTest, preprocessing_tokenize) {
CPP cpp;
auto pp_tokens = cpp.preprocessing_tokenize("int main() { return 1; }");
@@ -45,7 +44,6 @@ TEST_F(CppTest, preprocessing_tokenize) {
ASSERT_EQ(nodes.size(), 44);
}
-#endif
TEST_F(CppTest, preprocessing_tokenize_compile_error) {
CPP cpp;
@@ -65,8 +63,16 @@ TEST_F(CppTest, preprocessing_tokenize_compile_error) {
FAIL() << "Exception expected";
}
-#if 0
-TEST(Cpp, translate) {
- CPP::translate();
+TEST(Cpp, compile) {
+ CPP cpp;
+
+ cpp.compile("int main() { return 1 + 1; }");
}
-#endif
+
+TEST(Cpp, compile_2_times) {
+ CPP cpp;
+
+ cpp.compile("int main() { return (1 + 2) * 2; }");
+ cpp.compile("int main() { return 1 + 2 * 2; }");
+}
+