From e172ed9f799501c234c8da18cef829244473f1d7 Mon Sep 17 00:00:00 2001 From: Roland Reichwein Date: Thu, 23 Jan 2020 21:53:57 +0100 Subject: Added grammer --- bnf.h | 1 - grammer.cpp | 255 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++- grammer.h | 37 ++++++++- lexer.cpp | 8 ++ lexer.h | 2 + test-lexer.cpp | 19 +++-- 6 files changed, 307 insertions(+), 15 deletions(-) diff --git a/bnf.h b/bnf.h index 8aa2684..79d3c05 100644 --- a/bnf.h +++ b/bnf.h @@ -11,7 +11,6 @@ using namespace std::string_literals; using BNF = std::map>>; using Terminals = std::set; -using ProgramNode = std::deque; using PathElement = std::pair; // Name, Index std::map> Reverse(BNF bnf); diff --git a/grammer.cpp b/grammer.cpp index d275da6..796c099 100644 --- a/grammer.cpp +++ b/grammer.cpp @@ -2,15 +2,264 @@ using namespace Gram; -Compiler::Compiler(const BNF& bnf, const std::string& Top): m_bnf(bnf), m_Top(Top), ReverseBNF{Reverse(bnf)} +void Tree::clear() { + nodes.clear(); + root = 0; + last = 0; + node_num = 0; +} + +bool Tree::Valid(const std::string& Top) const { + // A program is non empty + if (node_num == 0) + return false; + + // Start symbol on top + auto rootNode{nodes.find(root)}; + if (rootNode == nodes.end()) + throw std::runtime_error("Node not found: "s + std::to_string(root)); + + if (rootNode->second.token.type != Top) + return false; + + // All nodes filled (implies all leaves are terminal) + for (const auto& [index, node]: nodes) { + if (node.childs.size() < node.child_types.size()) + return false; // node not filled + } + + return true; +} + +bool Tree::AddFirstNode(const Token& token, const BNF& bnf, const std::map>& reverseBNF) { + node_num ++; + root = node_num; + last = node_num; + + auto reverseRule{reverseBNF.find(token.type)}; + if (reverseRule == reverseBNF.end()) + throw std::runtime_error("Reverse rule not found for "s + token.type); + + auto rule{bnf.find(token.type)}; + if (rule != bnf.end()) { // multiple variants! + throw std::runtime_error("BNF rule for terminal symbol "s + token.type + " found."s); + } + nodes.emplace(root, TreeNode{0, std::vector{}, std::vector{}, token}); + return true; +} + +std::vector Tree::getParentTreeNode(const BNF& bnf, const std::map>& reverseBNF) { + std::vector result; // default: empty + + auto& root_name {nodes[root].token.type}; + auto bnfParents {reverseBNF.find(root_name)}; + if (bnfParents == reverseBNF.end()) + return result; + + for (const auto& parent_node_name : bnfParents->second) { + auto lists {bnf.at(parent_node_name)}; + for (const auto& list : lists) { + if (list.size() > 0 && list[0] == root_name) { + TreeNode node{0, std::vector{root}, list, Token{parent_node_name}}; + result.push_back(node); + } + } + } + + return result; +} + +index_t Tree::GetLast() { + index_t result {root}; + + while(result != 0 && nodes[result].childs.size() >= 2) { + result = nodes[result].childs[nodes[result].childs.size() - 1]; + } + + return result; +} + +void Tree::AddRootNode(const TreeNode& newRootNode) { + node_num++; + nodes[node_num] = newRootNode; + root = node_num; + last = node_num; +} + +void Tree::RemoveRootNode() { + root = nodes[root].childs[0]; + nodes.erase(node_num); + node_num--; + last = GetLast(); +} + +// Path from leaf to root +std::vector Tree::GetPath(std::string a, std::string b, const BNF& bnf, const std::map>& reverseBNF) { + std::vector result; + + while (a != b) { + auto parents {reverseBNF.find(a)}; + if (parents == reverseBNF.end()) + return {}; + + bool hit{false}; + for (const auto& parent : parents->second) { + for (const auto& list : bnf.at(parent)) { + if (list.size() > 0 && list[0] == a) { + if (!hit) { + result.push_back(a); + a = parent; + hit = true; + } else + throw std::runtime_error("Double match for "s + parent + "/"s + a); + } + } + } + } + if (a == b) { + result.push_back(a); + } + return result; +} + +index_t Tree::AddNode(const std::string& name, const std::string& child_name, index_t parent_index, const BNF& bnf, const std::map>& reverseBNF) { + TreeNode& parent {nodes[parent_index]}; + node_num++; + index_t index = node_num; + parent.childs.push_back(index); + std::vector child_types; + auto rule {bnf.find(name)}; + if (rule != bnf.end()) { + for (auto& list : rule->second) { + if (list.size() > 0 && list[0] == child_name) + child_types = list; + } + } + nodes.emplace(index, TreeNode{parent_index, {}, child_types, Token{name}}); + //root stays + last = GetLast(); + + return index; } -ProgramNode Compiler::compile(std::vector Tokens) +void Tree::AddPath(const std::vector& path, index_t current_index, const BNF& bnf, const std::map>& reverseBNF) { + for (int i = path.size() - 1; i >= 0; i--) { + std::string child_name; + if (i > 0) + child_name = path[i - 1]; + current_index = AddNode(path[i], child_name, current_index, bnf, reverseBNF); + } +} + +// try to add Node to tree +bool Tree::Add(const Token& token, const BNF& bnf, const std::map>& reverseBNF) { + if (nodes.empty()) { // first node + return AddFirstNode(token, bnf, reverseBNF); + } else { // at least one character is already present + // Traverse tree until partially filled node found + // or new node can be added + index_t current_index{last}; + + while (current_index != 0) { + TreeNode& node {nodes[current_index]}; + if (node.childs.size() < node.child_types.size()) { // partially filled node + std::vector list = GetPath(token.type, node.child_types[node.childs.size()], bnf, reverseBNF); + if (list.size() > 0) { + AddPath(list, current_index, bnf, reverseBNF); + return true; + } else { + return false; // The path a->b is not available via bnf + } + } + current_index = node.parent; + } + + // Add node at root + + std::vector parent_nodes = getParentTreeNode(bnf, reverseBNF); + if (parent_nodes.size() == 0) + throw std::runtime_error("Couldn't add new parent node for "s + nodes[root].token.type); + + for (const auto &i : parent_nodes) { + AddRootNode(i); + if (Add(token, bnf, reverseBNF)) + return true; + RemoveRootNode(); + } + + } + return false; +} + +// add path to start symbol +void Tree::Resolve(const BNF& bnf, const std::map>& reverseBNF) { + if (nodes.empty()) // only handle non-empty trees + return; + + while (true) { + std::string& old_root_name { nodes[root].token.type }; // current root node type + + auto parents {reverseBNF.find(old_root_name)}; + if (parents != reverseBNF.end()) { // parents in bnf available + bool hit{false}; + for (auto& parent : parents->second) { + for (const auto& list : bnf.at(parent)) { + if (list.size() == 1 && list[0] == old_root_name) { + if (!hit) { + // Add new TreeNode in the direction to root: + // New root with 1 child (old root) + nodes.emplace(++node_num, + TreeNode{0, // parent + std::vector{root}, // child indices + std::vector{old_root_name}, // child types + Token{parent} + }); + nodes[root].parent = node_num; + root = node_num; + // this->last stays + hit = true; + } else + throw std::runtime_error("Error: Multiple resolve nodes for "s + old_root_name); + } + } + } + if (!hit) + break; + } else + break; + } +} + +void Tree::Dump() +{ + for (const auto& i : nodes) { + std::cout << i.second.token.type << " (" << i.second.token.value << ")" << std::endl; + } +} + +Compiler::Compiler(const BNF& bnf, const std::string& Top): bnf(bnf), Top(Top), ReverseBNF{Reverse(bnf)} +{ +} + +Tree Compiler::compile(std::vector Tokens) { if (Tokens.size()){ } else throw std::runtime_error("No tokens!"); - return {}; + Tree tree; + + for (const Token& i : Tokens) { + if (!tree.Add(i, bnf, ReverseBNF)) { + //tree.Dump(); + throw std::runtime_error("Compile error: Invalid token "s + i.type); + } + } + + tree.Resolve(bnf, ReverseBNF); + if (!tree.Valid(Top)) + throw std::runtime_error("Compile error: Program incomplete"); + + return tree; } diff --git a/grammer.h b/grammer.h index 01ea681..87aa1ad 100644 --- a/grammer.h +++ b/grammer.h @@ -5,18 +5,49 @@ namespace Gram { +struct TreeNode { + index_t parent{}; + std::vector childs; // fill Token by Token + std::vector child_types; // fill always + Token token; +}; + +class Tree { +private: + std::map nodes; // index 0 = non existing; index starting at 1 + index_t node_num{}; + index_t root{}; + index_t last{}; + +public: + void clear(); + bool Valid(const std::string& Top) const; + bool AddFirstNode(const Token& token, const BNF& bnf, const std::map>& reverseBNF); + std::vector getParentTreeNode(const BNF& bnf, const std::map>& reverseBNF); + index_t GetLast(); + void AddRootNode(const TreeNode& newRootNode); + void RemoveRootNode(); + std::vector GetPath(std::string a, std::string b, const BNF& bnf, const std::map>& reverseBNF); + index_t AddNode(const std::string& name, const std::string& child_name, index_t parent_index, const BNF& bnf, const std::map>& reverseBNF); + void AddPath(const std::vector& path, index_t current_index, const BNF& bnf, const std::map>& reverseBNF); + bool Add(const Token& token, const BNF& bnf, const std::map>& reverseBNF); + void Resolve(const BNF& bnf, const std::map>& reverseBNF); + + void Dump(); +}; + class Compiler { private: - const BNF &m_bnf; - const std::string& m_Top; + const BNF &bnf; + const std::string& Top; std::map> ReverseBNF; public: Compiler(const BNF& bnf, const std::string& Top); - ProgramNode compile(std::vector Tokens); + Tree compile(std::vector Tokens); }; } // namespace Gram diff --git a/lexer.cpp b/lexer.cpp index 3b26c52..7717b85 100644 --- a/lexer.cpp +++ b/lexer.cpp @@ -292,3 +292,11 @@ std::vector Lexer::Lex(const std::string& s) return result; } +// C++: Preprocessor Tokens To Tokens +void Lexer::PreprocessorTokensToTokens(std::vector& tokens) +{ + for (auto& i : tokens) { + if (i.type == "preprocessing-op-or-punc") + i.type = i.value; + } +} diff --git a/lexer.h b/lexer.h index c6576d0..b46808b 100644 --- a/lexer.h +++ b/lexer.h @@ -53,6 +53,8 @@ public: Lexer(const BNF& bnf, const std::string& Top); std::vector Lex(const std::string& s); + static void PreprocessorTokensToTokens(std::vector& tokens); + }; } // namespace Lex diff --git a/test-lexer.cpp b/test-lexer.cpp index 4942013..4f68fc6 100644 --- a/test-lexer.cpp +++ b/test-lexer.cpp @@ -48,10 +48,11 @@ TEST_F(Test, BNF) { std::string Top{"program"}; BNF bnf{ {"program", {{"statement-list"}}}, - {"statement-list", {{"statement", "statement-list"}, - {}, }}, - {"statement", {{"assigmnent", ";"}}}, - {"assignment", {{"identifier", "=", "identifier"}}} + {"statement-list", {{"statement-list", "statement"}, + {"statement"}, }}, + {"statement", {{"assignment", ";"}}}, + {"assignment", {{"identifier", "=", "identifier"}, + {"identifier", "=", "pp-number"}}} }; // implicit? @@ -80,15 +81,17 @@ TEST_F(Test, BNF) { auto tokens = lexer.Lex(Code); ASSERT_EQ(tokens, tokens_reference); -#if 0 +#if 1 for (const auto& i: tokens) { - std::cout << i.value << std::endl; + std::cout << i.type << ": " << i.value << std::endl; } #endif + + Lex::Lexer::PreprocessorTokensToTokens(tokens); Gram::Compiler compiler(bnf, Top); - auto Program = compiler.compile(tokens); + auto Tree = compiler.compile(tokens); - //ASSERT_EQ(Program, Program_reference); + ASSERT_TRUE(Tree.Valid(Top)); } int main(int argc, char* argv[]) { -- cgit v1.2.3