diff options
-rw-r--r-- | bnf.cpp | 20 | ||||
-rw-r--r-- | bnf.h | 3 | ||||
-rw-r--r-- | cpp.cpp | 10 | ||||
-rw-r--r-- | grammer.cpp | 19 | ||||
-rw-r--r-- | grammer.h | 4 | ||||
-rw-r--r-- | test-lexer.cpp | 4 |
6 files changed, 47 insertions, 13 deletions
@@ -19,6 +19,26 @@ std::map<std::string, std::set<std::string>> Reverse(BNF bnf) return result; } +std::map<std::string, std::set<std::string>> reverseFirst(BNF bnf) +{ + std::map<std::string, std::set<std::string>> result; + + for (const auto& [from, to] : bnf) { + for (const auto& list : to) { + if (list.size() > 0) { + const auto& element{list[0]}; + auto i{result.find(element)}; + if (i != result.end()) // already present + i->second.insert(from); + else // new element + result.emplace(element, std::set{from}); + } + } + } + + return result; +} + BNF SubBNF(const BNF& bnf, const std::string& top) { BNF result; @@ -11,6 +11,7 @@ using namespace std::string_literals; using BNF = std::map<std::string, std::vector<std::vector<std::string>>>; -std::map<std::string, std::set<std::string>> Reverse(BNF bnf); +std::map<std::string, std::set<std::string>> Reverse(BNF bnf); // unused now, remove? +std::map<std::string, std::set<std::string>> reverseFirst(BNF bnf); BNF SubBNF(const BNF& bnf, const std::string& top); @@ -82,6 +82,7 @@ std::pair<index_t, std::vector<TreeNode>> CPP::preprocessing_tokenize(const std: {" "}, {"\t"}, {"\n"}, {"\r"} }; Gram::Compiler compiler(bnf, "file"); + debug = true; std::pair<index_t, std::vector<TreeNode>> Tree = compiler.compile(m_charTokens); debug = true; @@ -246,12 +247,21 @@ protected: } }; +#if 0 TEST_F(CppTest, preprocessing_tokenize) { CPP cpp; auto ppTree = cpp.preprocessing_tokenize("int main() { return 1; }"); cpp.tokens_from_pptokens(ppTree); } +#endif + +TEST_F(CppTest, preprocessing_tokenize2) { + CPP cpp; + auto ppTree = cpp.preprocessing_tokenize("in ma"); + + cpp.tokens_from_pptokens(ppTree); +} #if 0 TEST(Cpp, translate) { diff --git a/grammer.cpp b/grammer.cpp index 8243fa8..be01adc 100644 --- a/grammer.cpp +++ b/grammer.cpp @@ -170,8 +170,8 @@ void Compiler::AddFirstNode() root_node_id = 0; const std::string& child_type = tokens[0].type; - auto it = ReverseBNF.find(child_type); - if (it == ReverseBNF.end()) + auto it = reversedFirst.find(child_type); + if (it == reversedFirst.end()) throw std::runtime_error("Illegal first token: "s + child_type + " ("s + tokens[0].value + ")"s); std::set<std::string>& alternatives_set {it->second}; @@ -212,8 +212,8 @@ bool Compiler::AddRootNode() AddFirstNode(); } else { const std::string& child_type = nodes[root_node_id].type; // starting at old root - auto it = ReverseBNF.find(child_type); - if (it == ReverseBNF.end()) // this one doesn't have a parent, maybe a start symbol to discard? + auto it = reversedFirst.find(child_type); + if (it == reversedFirst.end()) // this one doesn't have a parent, maybe a start symbol to discard? return false; index_t old_root_node_id {root_node_id}; @@ -260,6 +260,7 @@ void Compiler::removeTokensUpTo(index_t token_id) removeTokensUpTo(token_id, root_node_id); } +// operate on node_id void Compiler::removeTokensUpTo(index_t token_id, index_t node_id) { // token_id should be the new tokens_used @@ -279,7 +280,7 @@ void Compiler::removeTokensUpTo(index_t token_id, index_t node_id) } // recurse from back, to remove tokens from end - for (auto i = child_ids.size() - 1; token_id < tokens_used && i >= 0; i--) { + for (int i = child_ids.size() - 1; token_id < tokens_used && i >= 0; i--) { if (!ChildIdIsToken(child_ids[i])) { removeTokensUpTo(token_id, child_ids[i]); } @@ -376,9 +377,9 @@ std::map<std::string, std::string> Compiler::traverse(const std::string& lower, auto it {visited.find(current_node)}; if (it == visited.end()) { // not visited, yet: visit now - auto parents_it {ReverseBNF.find(current_node)}; + auto parents_it {reversedFirst.find(current_node)}; - if (parents_it != ReverseBNF.end()) { + if (parents_it != reversedFirst.end()) { auto& parents {parents_it->second}; visited[current_node] = current_child; @@ -441,7 +442,7 @@ index_t Compiler::AddNode(const std::string& child_type, index_t parent_index) nodes.emplace_back(TreeNode{parent_index, index, child_type, variant, alternatives, std::vector<int32_t>{}}); //root stays, tokens_used stays - Debug("AddNode(): "s + parent.type + "->"s + child_type + ": "s + std::to_string(index)); + Debug("AddNode(): "s + nodes[parent_index].type + "->"s + child_type + ": "s + std::to_string(index)); DumpTree(); return index; @@ -490,7 +491,7 @@ bool Compiler::FillTree() return true; } -Compiler::Compiler(BNF& bnf, const std::string& Top): bnf(bnf), Top(Top), ReverseBNF{Reverse(bnf)} +Compiler::Compiler(BNF& bnf, const std::string& Top): bnf(bnf), Top(Top), ReverseBNF{Reverse(bnf)}, reversedFirst{reverseFirst(bnf)} { } @@ -38,8 +38,8 @@ private: BNF &bnf; // not const for access via operator[] const std::string& Top; - std::map<std::string, std::set<std::string>> ReverseBNF; // possible parent types of a given type - + std::map<std::string, std::set<std::string>> ReverseBNF; // possible parent types of a given type; unused now: remove? + std::map<std::string, std::set<std::string>> reversedFirst; // possible parent types of first childs of a given type // Tree specific void clear(); diff --git a/test-lexer.cpp b/test-lexer.cpp index 79b9930..735f670 100644 --- a/test-lexer.cpp +++ b/test-lexer.cpp @@ -64,7 +64,7 @@ TEST_F(Test, BNF) { // implicit? //std::set<std::string> Terminals{"identifier", "=", ";"}; - std::string Code{"a = bc ; c = 123 ; esd = Ff ; 1 = XYZ ; "}; + std::string Code{"a = bc ; c = 123 ; esd = Ff ; "};//1 = XYZ ; "}; std::vector<Token> tokens_reference{ {"identifier", "a", { 1, 1} }, {"preprocessing-op-or-punc", "=", { 1, 3}}, @@ -78,10 +78,12 @@ TEST_F(Test, BNF) { {"preprocessing-op-or-punc", "=", { 1, 24}}, {"identifier", "Ff", { 1, 26}}, {"preprocessing-op-or-punc", ";", { 1, 29}}, +#if 0 {"pp-number", "1", { 1, 31}}, {"preprocessing-op-or-punc", "=", { 1, 33}}, {"identifier", "XYZ", { 1, 35}}, {"preprocessing-op-or-punc", ";", { 1, 39}}, +#endif }; Lex::Lexer lexer(LexBNF, LexTop); |