#include #include "gmock/gmock.h" #include "gtest/gtest.h" #include #include #include #include #include #include using namespace std::string_literals; using BNF = std::map>>; using Terminals = std::set; using ProgramNode = std::deque; using PathElement = std::pair; // Name, Index std::vector split(std::string s) { std::vector result; boost::algorithm::split(result, s, boost::algorithm::is_any_of(s), boost::algorithm::token_compress_on); while (result.size() > 0 && result.back() == ""s) result.pop_back(); return result; } std::vector GetPath(std::string Token, BNF ReverseBNF, std::string Top, Terminals terminals = {}, std::vector PreviousPath = {}) { throw std::runtime_error("Compile error"); return {}; // TODO } auto Reverse(BNF bnf){ std::map> result; for (const auto& [from, to] : bnf) { for (const auto& list : to) { for (const auto& element : list) { auto i{result.find(element)}; if (i != result.end()) // already present i->second.push_back(from); else // new element result.emplace(element, std::vector{1, from}); } } } return result; } using index_t = size_t; struct TreeNode { index_t parent; std::vector childs; // fill char by char std::vector child_names; // fill always std::string name; }; class Tree { private: std::map nodes; // index 0 = non existing; index starting at 1 index_t node_num{}; index_t root{}; index_t last{}; public: bool Valid(const std::string& Top) const { // Start symbol on top if (node_num == 0) return false; auto rootNode{nodes.find(root)}; if (rootNode == nodes.end()) throw std::runtime_error("Node not found: "s + std::to_string(root)); if (rootNode->second.name != Top) return false; // All nodes filled (implies all leaves are terminal) for (const auto& [index, node]: nodes) { if (node.childs.size() < node.child_names.size()) return false; // node not filled } return true; } // try to add node bool Add(char c, const BNF& bnf, const std::map>& reverseBNF) { if (nodes.empty()) { // first node node_num ++; root = node_num; last = node_num; std::string node_name{1, c}; auto rule{reverseBNF.find(node_name)}; if (rule == reverseBNF.end()) throw std::runtime_error("Rule not found: "s + node_name); nodes.emplace(root, TreeNode{0, std::vector{}, std::vector{}, node_name}); return true; } else { // TODO } return false; } }; class Lexer { private: const BNF &bnf; const std::string& Top; std::map> ReverseBNF; // to be called on token end void CheckCandidates(std::deque& candidates, std::string& token, std::vector& result) { if (candidates.empty()) { // skip if (!token.empty()) throw std::runtime_error("Expected empty token, got "s + token); } else { // check candidates bool valid{false}; for (const auto& ct : candidates) { if (ct.Valid(Top)) { if (valid) throw std::runtime_error("Found ambiguous token "s + token); result.push_back(token); token.clear(); valid = true; } } if (!valid) throw std::runtime_error("Invalid token: "s + token); candidates.clear(); } } public: Lexer(const BNF& bnf, const std::string& Top): bnf(bnf), Top(Top), ReverseBNF{Reverse(bnf)} { } std::vector Lex(const std::string& s) { std::vector result; std::string token; std::string Whitespace{"\t \n\r"}; std::deque candidates; for (size_t pos{0}; pos < s.size(); pos++) { char c{s[pos]}; if (Whitespace.find(c) != std::string::npos) { // found whitespace character // evaluate token up to now and skip whitespace CheckCandidates(candidates, token, result); } else { // no whitespace: try to add to tree std::deque EraseList; int i = 0; for (auto ct = candidates.begin(); ct != candidates.end(); ct++, i++) { if (!ct->Add(c, bnf, ReverseBNF)) { // either add char or delete candidate // push to front to get reversed order EraseList.push_front(i); // no candidate anymore } } if (candidates.size() - EraseList.size() > 0) { // Added to some candidates: Continue growing token token.push_back(c); for (const auto& i : EraseList) candidates.erase(candidates.begin() + i); } else { // no candidates left: new tree CheckCandidates(candidates, token, result); } // new candidate: starting with c auto element {ReverseBNF.find(std::string(1, c))}; if (element != ReverseBNF.end()) { Tree newTree; if (newTree.Add(c, bnf, ReverseBNF)) candidates.push_back(newTree); } } } // Final evaluation of last token CheckCandidates(candidates, token, result); return result; } }; ProgramNode Compile(std::vector Tokens, std::string Top, BNF bnf, Terminals terminals) { BNF ReverseBNF;//{ Reverse(bnf)}; if (Tokens.size()){ std::string Token = Tokens[0]; auto Path = GetPath(Token, ReverseBNF, Top, terminals); if (Path.size()) { size_t Index{1}; while (Index < Tokens.size()) { Path = GetPath(Token, ReverseBNF, Top, terminals, Path); Index++; } } else throw std::runtime_error("Invalid token: "s + Token); } else throw std::runtime_error("No tokens!"); return {}; } class Test: public ::testing::Test { protected: Test(){} ~Test() override {} }; TEST_F(Test, BNF) { std::string LexTop{"preprocessing-token"}; BNF LexBNF{ {"preprocessing-token", {{"identifier"}, {"preprocessing-op-or-punc"}, {"pp-number"}}}, {"identifier", {{"identifier-nondigit"}, {"identifier", "identifier-nondigit"}, {"identifier", "digit"}}}, {"digit", {{"0", "1", "2", "3", "4", "5", "6", "7", "8", "9" }}}, {"identifier-nondigit", {{"a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "_"}}}, {"preprocessing-op-or-punc", {{";"}, {"="}}}, {"pp-number", {{"digit"}, {"pp-number", "digit"}}} }; std::string Top{"program"}; BNF bnf{ {"program", {{"statement-list"}}}, {"statement-list", {{"statement", "statement-list"}, {}, }}, {"statement", {{"assigmnent", ";"}}}, {"assignment", {{"identifier", "=", "identifier"}}} }; std::set Terminals{"identifier", "=", ";"}; std::string Code{"a = bc ; c = 123 ; esd = Ff ; 1 = XYZ"}; Lexer lexer(LexBNF, LexTop); auto tokens = lexer.Lex(Code); for (const auto& i: tokens) { std::cout << i << std::endl; } //auto Program = Compile(tokens, Top, bnf, Terminals); } int main(int argc, char* argv[]) { ::testing::InitGoogleMock(&argc, argv); return RUN_ALL_TESTS(); }