summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorRoland Reichwein <mail@reichwein.it>2020-02-16 16:33:19 +0100
committerRoland Reichwein <mail@reichwein.it>2020-02-16 16:33:19 +0100
commit2f4118526972f7f3d5147342bc65909fcc82b6c7 (patch)
tree3514b840da299d7203ce1bbcbf81d36be1108260
parent0aaa6440a23e1dedf4a907fa46f979ea9d248e99 (diff)
clang++-9, integrate cppbnf
-rw-r--r--Makefile13
-rw-r--r--bnf.cpp31
-rw-r--r--bnf.h2
-rwxr-xr-xcppbnf.cpp51
-rw-r--r--grammer.cpp59
5 files changed, 100 insertions, 56 deletions
diff --git a/Makefile b/Makefile
index b18defe..1c2db6c 100644
--- a/Makefile
+++ b/Makefile
@@ -1,16 +1,16 @@
PROJECTNAME=minicc
-CXX=clang++-8
+CXX=clang++-9
#CXX=g++-8
-#CXXFLAGS=-O0 -D_DEBUG
+CXXFLAGS=-O0 -D_DEBUG
# -fprofile-instr-generate -fcoverage-mapping
# gcc:--coverage
-CXXFLAGS=-O2 -DNDEBUG
+#CXXFLAGS=-O2 -DNDEBUG
-CXXFLAGS+= -Wall -std=c++17 -I. -Ilib
+CXXFLAGS+= -Wall -std=c++17 -I.
-ifeq ($(CXX),clang++-8)
+ifeq ($(CXX),clang++-9)
# currently broken:
# ld.lld-8: error: undefined symbol: boost::re_detail_106700::cpp_regex_traits_implementation<char>::transform(char const*, char const*) const
#CXXFLAGS+= -stdlib=libc++
@@ -28,7 +28,7 @@ LIBS=\
-lboost_regex \
-lpthread
-ifeq ($(CXX),clang++-8)
+ifeq ($(CXX),clang++-9)
LIBS+= \
-fuse-ld=lld-8 \
-lstdc++ \
@@ -48,6 +48,7 @@ SRC=\
grammer.cpp \
lexer.cpp \
minicc.cpp \
+ cppbnf.cpp \
test-lexer.cpp \
googletest/src/gtest-all.cpp \
googlemock/src/gmock-all.cpp
diff --git a/bnf.cpp b/bnf.cpp
index 1efb459..f240f29 100644
--- a/bnf.cpp
+++ b/bnf.cpp
@@ -1,6 +1,7 @@
#include "bnf.h"
-std::map<std::string, std::set<std::string>> Reverse(BNF bnf){
+std::map<std::string, std::set<std::string>> Reverse(BNF bnf)
+{
std::map<std::string, std::set<std::string>> result;
for (const auto& [from, to] : bnf) {
@@ -18,4 +19,32 @@ std::map<std::string, std::set<std::string>> Reverse(BNF bnf){
return result;
}
+BNF SubBNF(const BNF& bnf, const std::string& top)
+{
+ BNF result;
+
+ std::vector<std::string> todo{top};
+
+ while (!todo.empty()) {
+ std::string current{todo.back()};
+ todo.pop_back();
+
+ auto it = bnf.find(current);
+ if (it != bnf.end()) {
+ // add current value
+ result[it->first] = it->second;
+
+ // add sub-tree values if not present yet, but available in original bnf
+ for (auto& variant: it->second) {
+ for (auto& child: variant) {
+ if (result.find(child) == result.end() && bnf.find(child) != bnf.end()) {
+ todo.push_back(child);
+ }
+ }
+ }
+ }
+ }
+
+ return result;
+}
diff --git a/bnf.h b/bnf.h
index 959e1f8..56aacb0 100644
--- a/bnf.h
+++ b/bnf.h
@@ -12,3 +12,5 @@ using namespace std::string_literals;
using BNF = std::map<std::string, std::vector<std::vector<std::string>>>;
std::map<std::string, std::set<std::string>> Reverse(BNF bnf);
+
+BNF SubBNF(const BNF& bnf, const std::string& top);
diff --git a/cppbnf.cpp b/cppbnf.cpp
index 5266fa0..0b6e58f 100755
--- a/cppbnf.cpp
+++ b/cppbnf.cpp
@@ -21,7 +21,7 @@ namespace {
return result;
}
- std::unordered_map<std::string, std::unordered_set<std::string>> reverseBNF(const T_BNF& bnf)
+ std::unordered_map<std::string, std::unordered_set<std::string>> reverseBNF(const BNF& bnf)
{
std::unordered_map<std::string, std::unordered_set<std::string>> result;
for (const auto& [symbol, lists] : bnf) {
@@ -39,12 +39,12 @@ namespace {
return result;
}
- bool isTerminal(const std::string& symbol, const T_BNF& bnf)
+ bool isTerminal(const std::string& symbol, const BNF& bnf)
{
return bnf.find(symbol) == bnf.end();
}
- size_t numberOfStartSymbols(const T_BNF& bnf)
+ size_t numberOfStartSymbols(const BNF& bnf)
{
// exactly 1 start symbol
std::vector<std::string> startSymbols;
@@ -64,7 +64,7 @@ namespace {
return startSymbols.size();
}
- bool symbolsValid(const T_BNF& bnf)
+ bool symbolsValid(const BNF& bnf)
{
for (const auto& [symbol, lists] : bnf) {
if (boost::contains(symbol, " ")) {
@@ -74,14 +74,19 @@ namespace {
for (const auto& list : lists) {
for (const auto& i : list) {
- if (i.size() == 1 && !isTerminal(i, bnf)) {
- std::cerr << "Warning: Non-Terminal symbol " << i << " in " << symbol << " is too long." << std::endl;
- return false;
- }
- if (boost::contains(i, " ")) {
- std::cerr << "Warning: Symbol " << i << " in " << symbol << " contains space." << std::endl;
- return false;
+ if (!isTerminal(i, bnf)) {
+ // every non-terminal symbol must be longer that 1 char
+ if (i.size() == 1) {
+ std::cerr << "Warning: Non-Terminal symbol " << i << " in " << symbol << " is too short." << std::endl;
+ return false;
+ }
+
+ // non-terminal symbols must not contain space
+ if (boost::contains(i, " ")) {
+ std::cerr << "Warning: Symbol " << i << " in " << symbol << " contains space." << std::endl;
+ return false;
+ }
}
}
}
@@ -92,11 +97,11 @@ namespace {
// returns 1 if exactly one start symbol and
// all nodes size > 1, except terminal symbols
- bool valid(const T_BNF& bnf) {
+ bool valid(const BNF& bnf) {
return numberOfStartSymbols(bnf) == 1 && symbolsValid(bnf);
}
- bool validLex(const T_BNF& bnf) {
+ bool validLex(const BNF& bnf) {
// all terminal symbols exactly one character
for (const auto& [symbol, lists] : bnf) {
@@ -174,7 +179,7 @@ namespace {
}
}
- T_BNF& normalizeBNF(T_BNF& bnf)
+ BNF& normalizeBNF(BNF& bnf)
{
// resolve OPTIONAL symbols
for (auto& [symbol, lists] : bnf) {
@@ -184,7 +189,7 @@ namespace {
return bnf;
}
- T_BNF& normalizeLexBNF(T_BNF& bnf)
+ BNF& normalizeLexBNF(BNF& bnf)
{
normalizeBNF(bnf);
@@ -206,9 +211,9 @@ namespace {
} // namespace
-T_BNF GetCppBNFLex()
+BNF GetCppBNFLex()
{
- T_BNF bnf{
+ BNF bnf{
// [gram.lex]
{"hex-quad", {{"hexadecimal-digit", "hexadecimal-digit", "hexadecimal-digit", "hexadecimal-digit"}}},
@@ -520,9 +525,9 @@ T_BNF GetCppBNFLex()
return normalizeLexBNF(bnf);
}
-T_BNF GetCppBNFGram()
+BNF GetCppBNFGram()
{
- T_BNF bnf{
+ BNF bnf{
// [gram.key]
{"typedef-name", {{"identifier"}, {"simple-template-id"}}},
@@ -1919,15 +1924,15 @@ T_BNF GetCppBNFGram()
return normalizeBNF(bnf);
}
-TEST(Lex, Test2) {
- auto bnf = GetCppBNFLex();
+TEST(CppBnf, LexicalBnf) {
+ auto bnf = SubBNF(GetCppBNFLex(), "preprocessing-token");
EXPECT_TRUE(valid(bnf));
EXPECT_TRUE(validLex(bnf));
}
-TEST(Gram, Test3) {
- auto bnf = GetCppBNFGram();
+TEST(CppBnf, GrammarBnf) {
+ auto bnf = SubBNF(GetCppBNFGram(), "translation-unit");
EXPECT_TRUE(valid(bnf));
}
diff --git a/grammer.cpp b/grammer.cpp
index 3c7671b..808c49e 100644
--- a/grammer.cpp
+++ b/grammer.cpp
@@ -4,7 +4,16 @@
using namespace Gram;
-void Compiler::clear() {
+static bool debug{false};
+
+void Debug(std::string s)
+{
+ if (debug)
+ std::cout << s << std::endl;
+}
+
+void Compiler::clear()
+{
nodes.clear();
root_node_id = 0;
@@ -71,52 +80,51 @@ namespace {
void Compiler::DumpTree()
{
- std::cout << "= Dump =======================================" << std::endl;
- std::cout << "nodes.size()=" << nodes.size() << std::endl;
+ Debug("= Dump =======================================");
+ Debug("nodes.size()="s + std::to_string(nodes.size()));
if (nodes.size() > 0) {
if (0) {
- std::cout << "--- Nodes ------------------------------------" << std::endl;
- std::cout << "root_node_id=" << root_node_id << std::endl;
+ Debug("--- Nodes ------------------------------------");
+ Debug("root_node_id="s + std::to_string(root_node_id));
for (const auto& node : nodes) {
- std::cout << node.type << "(" << node.node_id << "):";
+ std::string line{"("s + std::to_string(node.node_id) + "):"s};
for (const auto& child : node.child_ids) {
- std::cout << " ";
+ line += " "s;
if (ChildIdIsToken(child))
- std::cout << "t" << TokenIdFromChildId(child);
+ line += "t"s + std::to_string(TokenIdFromChildId(child));
else
- std::cout << child;
+ line += std::to_string(child);
}
- std::cout << std::endl;
+ Debug(line);
}
}
- std::cout << "--- Tree -------------------------------------" << std::endl;
+ Debug("--- Tree -------------------------------------");
std::deque<std::pair<int32_t, size_t>> todo{std::pair<int32_t, size_t>{static_cast<int32_t>(root_node_id), 0}}; // id, indent
while (!todo.empty()) {
auto [current_index, indent] {todo.front()};
todo.pop_front();
- std::cout << std::string(indent, ' ');
+ std::string line(indent, ' ');
if (ChildIdIsToken(current_index)) {
index_t token_id {TokenIdFromChildId(current_index)};
- std::cout << "Token(" << token_id << "): " << tokens[token_id].type << "(" << tokens[token_id].value << ")";
+ line += "Token("s + std::to_string(token_id) + "): "s + tokens[token_id].type + "("s + tokens[token_id].value + ")"s;
} else {
auto& node {nodes[current_index]};
- std::cout << "Node(" << current_index << "): " << node.type << "/" << node.variant;
+ line += "Node("s + std::to_string(current_index) + "): "s + node.type + "/" + std::to_string(node.variant);
auto child_ids{node.child_ids};
for (int i = 0; i < child_ids.size(); i++) {
todo.insert(todo.begin() + i, std::pair<int32_t, size_t>{child_ids[i], indent + 1});
}
if (node.alternatives.size()) {
- std::cout << ", " << node.alternatives.size() << " alternatives available";
+ line += ", "s + std::to_string(node.alternatives.size()) + " alternatives available"s;
}
}
- std::cout << std::endl;
-
+ Debug(line);
}
}
- std::cout << "==============================================" << std::endl;
+ Debug("==============================================");
}
bool Compiler::AllTokensUsed() const
@@ -240,7 +248,7 @@ bool Compiler::AddRootNode()
return false;
// now add!
- std::cout << "AddRootNode(): Adding " << node_type << std::endl;
+ Debug("AddRootNode(): Adding "s + node_type);
nodes[old_root_node_id].parent_node_id = new_root_node_id;
root_node_id = new_root_node_id;
nodes.emplace_back(TreeNode{root_node_id, root_node_id, node_type, node_variant, alternatives, child_ids});
@@ -258,7 +266,6 @@ void Compiler::removeTokensUpTo(index_t token_id)
void Compiler::removeTokensUpTo(index_t token_id, index_t node_id)
{
- // std::cout << "DEBUG " << token_id << " " << tokens_used << std::endl;
// token_id should be the new tokens_used
if (token_id < tokens_used) {
@@ -267,7 +274,7 @@ void Compiler::removeTokensUpTo(index_t token_id, index_t node_id)
// remove relevant tokens from end
while (token_id < tokens_used && child_ids.size() && ChildIdIsToken(child_ids.back()) && TokenIdFromChildId(child_ids.back()) >= token_id) {
- std::cout << "Removing token " << TokenIdFromChildId(child_ids.back()) << std::endl;
+ Debug("Removing token "s + std::to_string(TokenIdFromChildId(child_ids.back())));
child_ids.pop_back();
if (tokens_used > 0)
tokens_used--;
@@ -301,7 +308,7 @@ void Compiler::RemoveLastNode()
nodes[node.child_ids[0]].parent_node_id = node.child_ids[0];
root_node_id = node.child_ids[0];
}
- std::cout << "Removing root node " << nodes.back().type << "(" << nodes.back().node_id << ")" << std::endl;
+ Debug("Removing root node "s + nodes.back().type + "("s + std::to_string(nodes.back().node_id) + ")"s);
nodes.pop_back();
} else {
DumpTree();
@@ -313,7 +320,7 @@ void Compiler::RemoveLastNode()
if (parent.child_ids.empty() || parent.child_ids.back() != node_id)
throw std::runtime_error("ICE: Backtrack: Bad child nodes");
parent.child_ids.pop_back();
- std::cout << "Removing " << nodes.back().type << "(" << nodes.back().node_id << ")" << std::endl;
+ Debug("Removing "s + nodes.back().type + "("s + std::to_string(nodes.back().node_id) + ")"s);
nodes.pop_back();
} else if (ChildIdIsToken(node.child_ids.back())) {
removeTokensUpTo(TokenIdFromChildId(node.child_ids.back()));
@@ -435,7 +442,7 @@ index_t Compiler::AddNode(const std::string& child_type, index_t parent_index)
nodes.emplace_back(TreeNode{parent_index, index, child_type, variant, alternatives, std::vector<int32_t>{}});
//root stays, tokens_used stays
- std::cout << "AddNode(): " << parent.type << "->" << child_type << ": "<< index << std::endl;
+ Debug("AddNode(): "s + parent.type + "->"s + child_type + ": "s + std::to_string(index));
DumpTree();
return index;
@@ -450,7 +457,7 @@ void Compiler::AddPath(const std::vector<std::string>& path, index_t current_ind
nodes.back().child_ids.emplace_back(ChildIdFromTokenId(tokens_used));
tokens_used++;
- std::cout << "AddPath(): token " << tokens.back().type << std::endl;
+ Debug("AddPath(): token "s + tokens.back().type);
DumpTree();
}
@@ -470,7 +477,7 @@ bool Compiler::FillTree()
if (next_child == tokens[tokens_used].type) { // add token directly
node.child_ids.push_back(ChildIdFromTokenId(tokens_used));
tokens_used++;
- std::cout << "tokens_used++: " << tokens_used << std::endl;
+ Debug("tokens_used++: "s + std::to_string(tokens_used));
DumpTree();
} else { // add inner nodes
auto list = GetPath(next_child, tokens[tokens_used].type);