summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorRoland Reichwein <mail@reichwein.it>2020-01-23 21:53:57 +0100
committerRoland Reichwein <mail@reichwein.it>2020-01-23 21:53:57 +0100
commite172ed9f799501c234c8da18cef829244473f1d7 (patch)
tree2c36c8adef69bd7c1b61efc1fe91188ab61ca6d8
parentf7cdbb6635d62a0347be579cb8dd6badec22956d (diff)
Added grammer
-rw-r--r--bnf.h1
-rw-r--r--grammer.cpp255
-rw-r--r--grammer.h37
-rw-r--r--lexer.cpp8
-rw-r--r--lexer.h2
-rw-r--r--test-lexer.cpp19
6 files changed, 307 insertions, 15 deletions
diff --git a/bnf.h b/bnf.h
index 8aa2684..79d3c05 100644
--- a/bnf.h
+++ b/bnf.h
@@ -11,7 +11,6 @@ using namespace std::string_literals;
using BNF = std::map<std::string, std::vector<std::vector<std::string>>>;
using Terminals = std::set<std::string>;
-using ProgramNode = std::deque<std::string>;
using PathElement = std::pair<std::string, size_t>; // Name, Index
std::map<std::string, std::set<std::string>> Reverse(BNF bnf);
diff --git a/grammer.cpp b/grammer.cpp
index d275da6..796c099 100644
--- a/grammer.cpp
+++ b/grammer.cpp
@@ -2,15 +2,264 @@
using namespace Gram;
-Compiler::Compiler(const BNF& bnf, const std::string& Top): m_bnf(bnf), m_Top(Top), ReverseBNF{Reverse(bnf)}
+void Tree::clear() {
+ nodes.clear();
+ root = 0;
+ last = 0;
+ node_num = 0;
+}
+
+bool Tree::Valid(const std::string& Top) const {
+ // A program is non empty
+ if (node_num == 0)
+ return false;
+
+ // Start symbol on top
+ auto rootNode{nodes.find(root)};
+ if (rootNode == nodes.end())
+ throw std::runtime_error("Node not found: "s + std::to_string(root));
+
+ if (rootNode->second.token.type != Top)
+ return false;
+
+ // All nodes filled (implies all leaves are terminal)
+ for (const auto& [index, node]: nodes) {
+ if (node.childs.size() < node.child_types.size())
+ return false; // node not filled
+ }
+
+ return true;
+}
+
+bool Tree::AddFirstNode(const Token& token, const BNF& bnf, const std::map<std::string, std::set<std::string>>& reverseBNF) {
+ node_num ++;
+ root = node_num;
+ last = node_num;
+
+ auto reverseRule{reverseBNF.find(token.type)};
+ if (reverseRule == reverseBNF.end())
+ throw std::runtime_error("Reverse rule not found for "s + token.type);
+
+ auto rule{bnf.find(token.type)};
+ if (rule != bnf.end()) { // multiple variants!
+ throw std::runtime_error("BNF rule for terminal symbol "s + token.type + " found."s);
+ }
+ nodes.emplace(root, TreeNode{0, std::vector<index_t>{}, std::vector<std::string>{}, token});
+ return true;
+}
+
+std::vector<TreeNode> Tree::getParentTreeNode(const BNF& bnf, const std::map<std::string, std::set<std::string>>& reverseBNF) {
+ std::vector<TreeNode> result; // default: empty
+
+ auto& root_name {nodes[root].token.type};
+ auto bnfParents {reverseBNF.find(root_name)};
+ if (bnfParents == reverseBNF.end())
+ return result;
+
+ for (const auto& parent_node_name : bnfParents->second) {
+ auto lists {bnf.at(parent_node_name)};
+ for (const auto& list : lists) {
+ if (list.size() > 0 && list[0] == root_name) {
+ TreeNode node{0, std::vector<index_t>{root}, list, Token{parent_node_name}};
+ result.push_back(node);
+ }
+ }
+ }
+
+ return result;
+}
+
+index_t Tree::GetLast() {
+ index_t result {root};
+
+ while(result != 0 && nodes[result].childs.size() >= 2) {
+ result = nodes[result].childs[nodes[result].childs.size() - 1];
+ }
+
+ return result;
+}
+
+void Tree::AddRootNode(const TreeNode& newRootNode) {
+ node_num++;
+ nodes[node_num] = newRootNode;
+ root = node_num;
+ last = node_num;
+}
+
+void Tree::RemoveRootNode() {
+ root = nodes[root].childs[0];
+ nodes.erase(node_num);
+ node_num--;
+ last = GetLast();
+}
+
+// Path from leaf to root
+std::vector<std::string> Tree::GetPath(std::string a, std::string b, const BNF& bnf, const std::map<std::string, std::set<std::string>>& reverseBNF) {
+ std::vector<std::string> result;
+
+ while (a != b) {
+ auto parents {reverseBNF.find(a)};
+ if (parents == reverseBNF.end())
+ return {};
+
+ bool hit{false};
+ for (const auto& parent : parents->second) {
+ for (const auto& list : bnf.at(parent)) {
+ if (list.size() > 0 && list[0] == a) {
+ if (!hit) {
+ result.push_back(a);
+ a = parent;
+ hit = true;
+ } else
+ throw std::runtime_error("Double match for "s + parent + "/"s + a);
+ }
+ }
+ }
+ }
+ if (a == b) {
+ result.push_back(a);
+ }
+ return result;
+}
+
+index_t Tree::AddNode(const std::string& name, const std::string& child_name, index_t parent_index, const BNF& bnf, const std::map<std::string, std::set<std::string>>& reverseBNF)
{
+ TreeNode& parent {nodes[parent_index]};
+ node_num++;
+ index_t index = node_num;
+ parent.childs.push_back(index);
+ std::vector<std::string> child_types;
+ auto rule {bnf.find(name)};
+ if (rule != bnf.end()) {
+ for (auto& list : rule->second) {
+ if (list.size() > 0 && list[0] == child_name)
+ child_types = list;
+ }
+ }
+ nodes.emplace(index, TreeNode{parent_index, {}, child_types, Token{name}});
+ //root stays
+ last = GetLast();
+
+ return index;
}
-ProgramNode Compiler::compile(std::vector<Token> Tokens)
+void Tree::AddPath(const std::vector<std::string>& path, index_t current_index, const BNF& bnf, const std::map<std::string, std::set<std::string>>& reverseBNF) {
+ for (int i = path.size() - 1; i >= 0; i--) {
+ std::string child_name;
+ if (i > 0)
+ child_name = path[i - 1];
+ current_index = AddNode(path[i], child_name, current_index, bnf, reverseBNF);
+ }
+}
+
+// try to add Node to tree
+bool Tree::Add(const Token& token, const BNF& bnf, const std::map<std::string, std::set<std::string>>& reverseBNF) {
+ if (nodes.empty()) { // first node
+ return AddFirstNode(token, bnf, reverseBNF);
+ } else { // at least one character is already present
+ // Traverse tree until partially filled node found
+ // or new node can be added
+ index_t current_index{last};
+
+ while (current_index != 0) {
+ TreeNode& node {nodes[current_index]};
+ if (node.childs.size() < node.child_types.size()) { // partially filled node
+ std::vector<std::string> list = GetPath(token.type, node.child_types[node.childs.size()], bnf, reverseBNF);
+ if (list.size() > 0) {
+ AddPath(list, current_index, bnf, reverseBNF);
+ return true;
+ } else {
+ return false; // The path a->b is not available via bnf
+ }
+ }
+ current_index = node.parent;
+ }
+
+ // Add node at root
+
+ std::vector<TreeNode> parent_nodes = getParentTreeNode(bnf, reverseBNF);
+ if (parent_nodes.size() == 0)
+ throw std::runtime_error("Couldn't add new parent node for "s + nodes[root].token.type);
+
+ for (const auto &i : parent_nodes) {
+ AddRootNode(i);
+ if (Add(token, bnf, reverseBNF))
+ return true;
+ RemoveRootNode();
+ }
+
+ }
+ return false;
+}
+
+// add path to start symbol
+void Tree::Resolve(const BNF& bnf, const std::map<std::string, std::set<std::string>>& reverseBNF) {
+ if (nodes.empty()) // only handle non-empty trees
+ return;
+
+ while (true) {
+ std::string& old_root_name { nodes[root].token.type }; // current root node type
+
+ auto parents {reverseBNF.find(old_root_name)};
+ if (parents != reverseBNF.end()) { // parents in bnf available
+ bool hit{false};
+ for (auto& parent : parents->second) {
+ for (const auto& list : bnf.at(parent)) {
+ if (list.size() == 1 && list[0] == old_root_name) {
+ if (!hit) {
+ // Add new TreeNode in the direction to root:
+ // New root with 1 child (old root)
+ nodes.emplace(++node_num,
+ TreeNode{0, // parent
+ std::vector<index_t>{root}, // child indices
+ std::vector<std::string>{old_root_name}, // child types
+ Token{parent}
+ });
+ nodes[root].parent = node_num;
+ root = node_num;
+ // this->last stays
+ hit = true;
+ } else
+ throw std::runtime_error("Error: Multiple resolve nodes for "s + old_root_name);
+ }
+ }
+ }
+ if (!hit)
+ break;
+ } else
+ break;
+ }
+}
+
+void Tree::Dump()
+{
+ for (const auto& i : nodes) {
+ std::cout << i.second.token.type << " (" << i.second.token.value << ")" << std::endl;
+ }
+}
+
+Compiler::Compiler(const BNF& bnf, const std::string& Top): bnf(bnf), Top(Top), ReverseBNF{Reverse(bnf)}
+{
+}
+
+Tree Compiler::compile(std::vector<Token> Tokens)
{
if (Tokens.size()){
} else
throw std::runtime_error("No tokens!");
- return {};
+ Tree tree;
+
+ for (const Token& i : Tokens) {
+ if (!tree.Add(i, bnf, ReverseBNF)) {
+ //tree.Dump();
+ throw std::runtime_error("Compile error: Invalid token "s + i.type);
+ }
+ }
+
+ tree.Resolve(bnf, ReverseBNF);
+ if (!tree.Valid(Top))
+ throw std::runtime_error("Compile error: Program incomplete");
+
+ return tree;
}
diff --git a/grammer.h b/grammer.h
index 01ea681..87aa1ad 100644
--- a/grammer.h
+++ b/grammer.h
@@ -5,18 +5,49 @@
namespace Gram {
+struct TreeNode {
+ index_t parent{};
+ std::vector<index_t> childs; // fill Token by Token
+ std::vector<std::string> child_types; // fill always
+ Token token;
+};
+
+class Tree {
+private:
+ std::map<index_t, TreeNode> nodes; // index 0 = non existing; index starting at 1
+ index_t node_num{};
+ index_t root{};
+ index_t last{};
+
+public:
+ void clear();
+ bool Valid(const std::string& Top) const;
+ bool AddFirstNode(const Token& token, const BNF& bnf, const std::map<std::string, std::set<std::string>>& reverseBNF);
+ std::vector<TreeNode> getParentTreeNode(const BNF& bnf, const std::map<std::string, std::set<std::string>>& reverseBNF);
+ index_t GetLast();
+ void AddRootNode(const TreeNode& newRootNode);
+ void RemoveRootNode();
+ std::vector<std::string> GetPath(std::string a, std::string b, const BNF& bnf, const std::map<std::string, std::set<std::string>>& reverseBNF);
+ index_t AddNode(const std::string& name, const std::string& child_name, index_t parent_index, const BNF& bnf, const std::map<std::string, std::set<std::string>>& reverseBNF);
+ void AddPath(const std::vector<std::string>& path, index_t current_index, const BNF& bnf, const std::map<std::string, std::set<std::string>>& reverseBNF);
+ bool Add(const Token& token, const BNF& bnf, const std::map<std::string, std::set<std::string>>& reverseBNF);
+ void Resolve(const BNF& bnf, const std::map<std::string, std::set<std::string>>& reverseBNF);
+
+ void Dump();
+};
+
class Compiler
{
private:
- const BNF &m_bnf;
- const std::string& m_Top;
+ const BNF &bnf;
+ const std::string& Top;
std::map<std::string, std::set<std::string>> ReverseBNF;
public:
Compiler(const BNF& bnf, const std::string& Top);
- ProgramNode compile(std::vector<Token> Tokens);
+ Tree compile(std::vector<Token> Tokens);
};
} // namespace Gram
diff --git a/lexer.cpp b/lexer.cpp
index 3b26c52..7717b85 100644
--- a/lexer.cpp
+++ b/lexer.cpp
@@ -292,3 +292,11 @@ std::vector<Token> Lexer::Lex(const std::string& s)
return result;
}
+// C++: Preprocessor Tokens To Tokens
+void Lexer::PreprocessorTokensToTokens(std::vector<Token>& tokens)
+{
+ for (auto& i : tokens) {
+ if (i.type == "preprocessing-op-or-punc")
+ i.type = i.value;
+ }
+}
diff --git a/lexer.h b/lexer.h
index c6576d0..b46808b 100644
--- a/lexer.h
+++ b/lexer.h
@@ -53,6 +53,8 @@ public:
Lexer(const BNF& bnf, const std::string& Top);
std::vector<Token> Lex(const std::string& s);
+ static void PreprocessorTokensToTokens(std::vector<Token>& tokens);
+
};
} // namespace Lex
diff --git a/test-lexer.cpp b/test-lexer.cpp
index 4942013..4f68fc6 100644
--- a/test-lexer.cpp
+++ b/test-lexer.cpp
@@ -48,10 +48,11 @@ TEST_F(Test, BNF) {
std::string Top{"program"};
BNF bnf{
{"program", {{"statement-list"}}},
- {"statement-list", {{"statement", "statement-list"},
- {}, }},
- {"statement", {{"assigmnent", ";"}}},
- {"assignment", {{"identifier", "=", "identifier"}}}
+ {"statement-list", {{"statement-list", "statement"},
+ {"statement"}, }},
+ {"statement", {{"assignment", ";"}}},
+ {"assignment", {{"identifier", "=", "identifier"},
+ {"identifier", "=", "pp-number"}}}
};
// implicit?
@@ -80,15 +81,17 @@ TEST_F(Test, BNF) {
auto tokens = lexer.Lex(Code);
ASSERT_EQ(tokens, tokens_reference);
-#if 0
+#if 1
for (const auto& i: tokens) {
- std::cout << i.value << std::endl;
+ std::cout << i.type << ": " << i.value << std::endl;
}
#endif
+
+ Lex::Lexer::PreprocessorTokensToTokens(tokens);
Gram::Compiler compiler(bnf, Top);
- auto Program = compiler.compile(tokens);
+ auto Tree = compiler.compile(tokens);
- //ASSERT_EQ(Program, Program_reference);
+ ASSERT_TRUE(Tree.Valid(Top));
}
int main(int argc, char* argv[]) {