diff options
-rw-r--r-- | Makefile | 13 | ||||
-rw-r--r-- | bnf.cpp | 31 | ||||
-rw-r--r-- | bnf.h | 2 | ||||
-rwxr-xr-x | cppbnf.cpp | 51 | ||||
-rw-r--r-- | grammer.cpp | 59 |
5 files changed, 100 insertions, 56 deletions
@@ -1,16 +1,16 @@ PROJECTNAME=minicc -CXX=clang++-8 +CXX=clang++-9 #CXX=g++-8 -#CXXFLAGS=-O0 -D_DEBUG +CXXFLAGS=-O0 -D_DEBUG # -fprofile-instr-generate -fcoverage-mapping # gcc:--coverage -CXXFLAGS=-O2 -DNDEBUG +#CXXFLAGS=-O2 -DNDEBUG -CXXFLAGS+= -Wall -std=c++17 -I. -Ilib +CXXFLAGS+= -Wall -std=c++17 -I. -ifeq ($(CXX),clang++-8) +ifeq ($(CXX),clang++-9) # currently broken: # ld.lld-8: error: undefined symbol: boost::re_detail_106700::cpp_regex_traits_implementation<char>::transform(char const*, char const*) const #CXXFLAGS+= -stdlib=libc++ @@ -28,7 +28,7 @@ LIBS=\ -lboost_regex \ -lpthread -ifeq ($(CXX),clang++-8) +ifeq ($(CXX),clang++-9) LIBS+= \ -fuse-ld=lld-8 \ -lstdc++ \ @@ -48,6 +48,7 @@ SRC=\ grammer.cpp \ lexer.cpp \ minicc.cpp \ + cppbnf.cpp \ test-lexer.cpp \ googletest/src/gtest-all.cpp \ googlemock/src/gmock-all.cpp @@ -1,6 +1,7 @@ #include "bnf.h" -std::map<std::string, std::set<std::string>> Reverse(BNF bnf){ +std::map<std::string, std::set<std::string>> Reverse(BNF bnf) +{ std::map<std::string, std::set<std::string>> result; for (const auto& [from, to] : bnf) { @@ -18,4 +19,32 @@ std::map<std::string, std::set<std::string>> Reverse(BNF bnf){ return result; } +BNF SubBNF(const BNF& bnf, const std::string& top) +{ + BNF result; + + std::vector<std::string> todo{top}; + + while (!todo.empty()) { + std::string current{todo.back()}; + todo.pop_back(); + + auto it = bnf.find(current); + if (it != bnf.end()) { + // add current value + result[it->first] = it->second; + + // add sub-tree values if not present yet, but available in original bnf + for (auto& variant: it->second) { + for (auto& child: variant) { + if (result.find(child) == result.end() && bnf.find(child) != bnf.end()) { + todo.push_back(child); + } + } + } + } + } + + return result; +} @@ -12,3 +12,5 @@ using namespace std::string_literals; using BNF = std::map<std::string, std::vector<std::vector<std::string>>>; std::map<std::string, std::set<std::string>> Reverse(BNF bnf); + +BNF SubBNF(const BNF& bnf, const std::string& top); @@ -21,7 +21,7 @@ namespace { return result; } - std::unordered_map<std::string, std::unordered_set<std::string>> reverseBNF(const T_BNF& bnf) + std::unordered_map<std::string, std::unordered_set<std::string>> reverseBNF(const BNF& bnf) { std::unordered_map<std::string, std::unordered_set<std::string>> result; for (const auto& [symbol, lists] : bnf) { @@ -39,12 +39,12 @@ namespace { return result; } - bool isTerminal(const std::string& symbol, const T_BNF& bnf) + bool isTerminal(const std::string& symbol, const BNF& bnf) { return bnf.find(symbol) == bnf.end(); } - size_t numberOfStartSymbols(const T_BNF& bnf) + size_t numberOfStartSymbols(const BNF& bnf) { // exactly 1 start symbol std::vector<std::string> startSymbols; @@ -64,7 +64,7 @@ namespace { return startSymbols.size(); } - bool symbolsValid(const T_BNF& bnf) + bool symbolsValid(const BNF& bnf) { for (const auto& [symbol, lists] : bnf) { if (boost::contains(symbol, " ")) { @@ -74,14 +74,19 @@ namespace { for (const auto& list : lists) { for (const auto& i : list) { - if (i.size() == 1 && !isTerminal(i, bnf)) { - std::cerr << "Warning: Non-Terminal symbol " << i << " in " << symbol << " is too long." << std::endl; - return false; - } - if (boost::contains(i, " ")) { - std::cerr << "Warning: Symbol " << i << " in " << symbol << " contains space." << std::endl; - return false; + if (!isTerminal(i, bnf)) { + // every non-terminal symbol must be longer that 1 char + if (i.size() == 1) { + std::cerr << "Warning: Non-Terminal symbol " << i << " in " << symbol << " is too short." << std::endl; + return false; + } + + // non-terminal symbols must not contain space + if (boost::contains(i, " ")) { + std::cerr << "Warning: Symbol " << i << " in " << symbol << " contains space." << std::endl; + return false; + } } } } @@ -92,11 +97,11 @@ namespace { // returns 1 if exactly one start symbol and // all nodes size > 1, except terminal symbols - bool valid(const T_BNF& bnf) { + bool valid(const BNF& bnf) { return numberOfStartSymbols(bnf) == 1 && symbolsValid(bnf); } - bool validLex(const T_BNF& bnf) { + bool validLex(const BNF& bnf) { // all terminal symbols exactly one character for (const auto& [symbol, lists] : bnf) { @@ -174,7 +179,7 @@ namespace { } } - T_BNF& normalizeBNF(T_BNF& bnf) + BNF& normalizeBNF(BNF& bnf) { // resolve OPTIONAL symbols for (auto& [symbol, lists] : bnf) { @@ -184,7 +189,7 @@ namespace { return bnf; } - T_BNF& normalizeLexBNF(T_BNF& bnf) + BNF& normalizeLexBNF(BNF& bnf) { normalizeBNF(bnf); @@ -206,9 +211,9 @@ namespace { } // namespace -T_BNF GetCppBNFLex() +BNF GetCppBNFLex() { - T_BNF bnf{ + BNF bnf{ // [gram.lex] {"hex-quad", {{"hexadecimal-digit", "hexadecimal-digit", "hexadecimal-digit", "hexadecimal-digit"}}}, @@ -520,9 +525,9 @@ T_BNF GetCppBNFLex() return normalizeLexBNF(bnf); } -T_BNF GetCppBNFGram() +BNF GetCppBNFGram() { - T_BNF bnf{ + BNF bnf{ // [gram.key] {"typedef-name", {{"identifier"}, {"simple-template-id"}}}, @@ -1919,15 +1924,15 @@ T_BNF GetCppBNFGram() return normalizeBNF(bnf); } -TEST(Lex, Test2) { - auto bnf = GetCppBNFLex(); +TEST(CppBnf, LexicalBnf) { + auto bnf = SubBNF(GetCppBNFLex(), "preprocessing-token"); EXPECT_TRUE(valid(bnf)); EXPECT_TRUE(validLex(bnf)); } -TEST(Gram, Test3) { - auto bnf = GetCppBNFGram(); +TEST(CppBnf, GrammarBnf) { + auto bnf = SubBNF(GetCppBNFGram(), "translation-unit"); EXPECT_TRUE(valid(bnf)); } diff --git a/grammer.cpp b/grammer.cpp index 3c7671b..808c49e 100644 --- a/grammer.cpp +++ b/grammer.cpp @@ -4,7 +4,16 @@ using namespace Gram; -void Compiler::clear() { +static bool debug{false}; + +void Debug(std::string s) +{ + if (debug) + std::cout << s << std::endl; +} + +void Compiler::clear() +{ nodes.clear(); root_node_id = 0; @@ -71,52 +80,51 @@ namespace { void Compiler::DumpTree() { - std::cout << "= Dump =======================================" << std::endl; - std::cout << "nodes.size()=" << nodes.size() << std::endl; + Debug("= Dump ======================================="); + Debug("nodes.size()="s + std::to_string(nodes.size())); if (nodes.size() > 0) { if (0) { - std::cout << "--- Nodes ------------------------------------" << std::endl; - std::cout << "root_node_id=" << root_node_id << std::endl; + Debug("--- Nodes ------------------------------------"); + Debug("root_node_id="s + std::to_string(root_node_id)); for (const auto& node : nodes) { - std::cout << node.type << "(" << node.node_id << "):"; + std::string line{"("s + std::to_string(node.node_id) + "):"s}; for (const auto& child : node.child_ids) { - std::cout << " "; + line += " "s; if (ChildIdIsToken(child)) - std::cout << "t" << TokenIdFromChildId(child); + line += "t"s + std::to_string(TokenIdFromChildId(child)); else - std::cout << child; + line += std::to_string(child); } - std::cout << std::endl; + Debug(line); } } - std::cout << "--- Tree -------------------------------------" << std::endl; + Debug("--- Tree -------------------------------------"); std::deque<std::pair<int32_t, size_t>> todo{std::pair<int32_t, size_t>{static_cast<int32_t>(root_node_id), 0}}; // id, indent while (!todo.empty()) { auto [current_index, indent] {todo.front()}; todo.pop_front(); - std::cout << std::string(indent, ' '); + std::string line(indent, ' '); if (ChildIdIsToken(current_index)) { index_t token_id {TokenIdFromChildId(current_index)}; - std::cout << "Token(" << token_id << "): " << tokens[token_id].type << "(" << tokens[token_id].value << ")"; + line += "Token("s + std::to_string(token_id) + "): "s + tokens[token_id].type + "("s + tokens[token_id].value + ")"s; } else { auto& node {nodes[current_index]}; - std::cout << "Node(" << current_index << "): " << node.type << "/" << node.variant; + line += "Node("s + std::to_string(current_index) + "): "s + node.type + "/" + std::to_string(node.variant); auto child_ids{node.child_ids}; for (int i = 0; i < child_ids.size(); i++) { todo.insert(todo.begin() + i, std::pair<int32_t, size_t>{child_ids[i], indent + 1}); } if (node.alternatives.size()) { - std::cout << ", " << node.alternatives.size() << " alternatives available"; + line += ", "s + std::to_string(node.alternatives.size()) + " alternatives available"s; } } - std::cout << std::endl; - + Debug(line); } } - std::cout << "==============================================" << std::endl; + Debug("=============================================="); } bool Compiler::AllTokensUsed() const @@ -240,7 +248,7 @@ bool Compiler::AddRootNode() return false; // now add! - std::cout << "AddRootNode(): Adding " << node_type << std::endl; + Debug("AddRootNode(): Adding "s + node_type); nodes[old_root_node_id].parent_node_id = new_root_node_id; root_node_id = new_root_node_id; nodes.emplace_back(TreeNode{root_node_id, root_node_id, node_type, node_variant, alternatives, child_ids}); @@ -258,7 +266,6 @@ void Compiler::removeTokensUpTo(index_t token_id) void Compiler::removeTokensUpTo(index_t token_id, index_t node_id) { - // std::cout << "DEBUG " << token_id << " " << tokens_used << std::endl; // token_id should be the new tokens_used if (token_id < tokens_used) { @@ -267,7 +274,7 @@ void Compiler::removeTokensUpTo(index_t token_id, index_t node_id) // remove relevant tokens from end while (token_id < tokens_used && child_ids.size() && ChildIdIsToken(child_ids.back()) && TokenIdFromChildId(child_ids.back()) >= token_id) { - std::cout << "Removing token " << TokenIdFromChildId(child_ids.back()) << std::endl; + Debug("Removing token "s + std::to_string(TokenIdFromChildId(child_ids.back()))); child_ids.pop_back(); if (tokens_used > 0) tokens_used--; @@ -301,7 +308,7 @@ void Compiler::RemoveLastNode() nodes[node.child_ids[0]].parent_node_id = node.child_ids[0]; root_node_id = node.child_ids[0]; } - std::cout << "Removing root node " << nodes.back().type << "(" << nodes.back().node_id << ")" << std::endl; + Debug("Removing root node "s + nodes.back().type + "("s + std::to_string(nodes.back().node_id) + ")"s); nodes.pop_back(); } else { DumpTree(); @@ -313,7 +320,7 @@ void Compiler::RemoveLastNode() if (parent.child_ids.empty() || parent.child_ids.back() != node_id) throw std::runtime_error("ICE: Backtrack: Bad child nodes"); parent.child_ids.pop_back(); - std::cout << "Removing " << nodes.back().type << "(" << nodes.back().node_id << ")" << std::endl; + Debug("Removing "s + nodes.back().type + "("s + std::to_string(nodes.back().node_id) + ")"s); nodes.pop_back(); } else if (ChildIdIsToken(node.child_ids.back())) { removeTokensUpTo(TokenIdFromChildId(node.child_ids.back())); @@ -435,7 +442,7 @@ index_t Compiler::AddNode(const std::string& child_type, index_t parent_index) nodes.emplace_back(TreeNode{parent_index, index, child_type, variant, alternatives, std::vector<int32_t>{}}); //root stays, tokens_used stays - std::cout << "AddNode(): " << parent.type << "->" << child_type << ": "<< index << std::endl; + Debug("AddNode(): "s + parent.type + "->"s + child_type + ": "s + std::to_string(index)); DumpTree(); return index; @@ -450,7 +457,7 @@ void Compiler::AddPath(const std::vector<std::string>& path, index_t current_ind nodes.back().child_ids.emplace_back(ChildIdFromTokenId(tokens_used)); tokens_used++; - std::cout << "AddPath(): token " << tokens.back().type << std::endl; + Debug("AddPath(): token "s + tokens.back().type); DumpTree(); } @@ -470,7 +477,7 @@ bool Compiler::FillTree() if (next_child == tokens[tokens_used].type) { // add token directly node.child_ids.push_back(ChildIdFromTokenId(tokens_used)); tokens_used++; - std::cout << "tokens_used++: " << tokens_used << std::endl; + Debug("tokens_used++: "s + std::to_string(tokens_used)); DumpTree(); } else { // add inner nodes auto list = GetPath(next_child, tokens[tokens_used].type); |