From 5d3d2130e2c1c3793d17b12dc7bcc0ee06795b9e Mon Sep 17 00:00:00 2001 From: Roland Reichwein Date: Sun, 19 Jan 2020 17:50:06 +0100 Subject: First working lexer --- minicc.cpp | 149 ++++++++++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 128 insertions(+), 21 deletions(-) diff --git a/minicc.cpp b/minicc.cpp index d147baa..b392195 100644 --- a/minicc.cpp +++ b/minicc.cpp @@ -3,6 +3,7 @@ #include "gmock/gmock.h" #include "gtest/gtest.h" +#include #include #include #include @@ -26,12 +27,6 @@ std::vector split(std::string s) return result; } -std::vector GetPath(std::string Token, BNF ReverseBNF, std::string Top, Terminals terminals = {}, std::vector PreviousPath = {}) -{ - throw std::runtime_error("Compile error"); - return {}; // TODO -} - auto Reverse(BNF bnf){ std::map> result; @@ -53,7 +48,7 @@ auto Reverse(BNF bnf){ using index_t = size_t; struct TreeNode { - index_t parent; + index_t parent{}; std::vector childs; // fill char by char std::vector child_names; // fill always std::string name; @@ -97,26 +92,118 @@ public: auto reverseRule{reverseBNF.find(node_name)}; if (reverseRule == reverseBNF.end()) - throw std::runtime_error("Reverse rule not found: "s + node_name); + throw std::runtime_error("Reverse rule not found for "s + node_name); - std::vector children; // default: no children for terminal auto rule{bnf.find(node_name)}; if (rule != bnf.end()) { // multiple variants! + throw std::runtime_error("BNF rule for terminal symbol "s + node_name + " found."s); + } + nodes.emplace(root, TreeNode{0, std::vector{}, std::vector{}, node_name}); + return true; + } + + std::vector getParentTreeNode(const BNF& bnf, const std::map>& reverseBNF) { + std::vector result; // default: empty + + auto& root_name {nodes[root].name}; + auto bnfParents {reverseBNF.find(root_name)}; + if (bnfParents == reverseBNF.end()) + return result; + + for (const auto& parent_node_name : bnfParents->second) { + auto lists {bnf.at(parent_node_name)}; + for (const auto& list : lists) { + if (list.size() > 0 && list[0] == root_name) { + TreeNode node{0, std::vector{root}, list, parent_node_name}; + result.push_back(node); + } + } + } + + return result; + } + + index_t GetLast() { + index_t result {root}; + + while(result != 0 && nodes[result].childs.size() >= 2) { + result = nodes[result].childs[nodes[result].childs.size() - 1]; + } + + return result; + } + + void AddRootNode(const TreeNode& newRootNode) { + node_num++; + nodes[node_num] = newRootNode; + root = node_num; + last = node_num; + } + + void RemoveRootNode() { + root = nodes[root].childs[0]; + nodes.erase(node_num); + node_num--; + last = GetLast(); + } + + // Path from leaf to root + std::vector GetPath(std::string a, std::string b, const BNF& bnf, const std::map>& reverseBNF) { + std::vector result; + + while (a != b) { + auto parents {reverseBNF.find(a)}; + if (parents == reverseBNF.end()) + return {}; + bool hit{false}; - for (const auto& i : rule->second) { - if (i.size() > 0 && i[0] == node_name) { - if (hit) - throw std::runtime_error("Multiple matching rules found for "s + node_name); - children = i; - hit = true; + for (const auto& parent : parents->second) { + for (const auto& list : bnf.at(parent)) { + if (list.size() > 0 && list[0] == a) { + if (!hit) { + result.push_back(a); + a = parent; + hit = true; + } else + throw std::runtime_error("Double match for "s + parent + "/"s + a); + } } } - if (!hit) - throw std::runtime_error("No matching rule found for "s + node_name); } + if (a == b) { + result.push_back(a); + } + return result; + } - nodes.emplace(root, TreeNode{0, std::vector{}, children, node_name}); - return true; + index_t AddNode(const std::string& name, const std::string& child_name, index_t parent_index, const BNF& bnf, const std::map>& reverseBNF) + { + TreeNode& parent {nodes[parent_index]}; + node_num++; + index_t index = node_num; + parent.childs.push_back(index); + std::vector child_names; + auto rule {bnf.find(name)}; + if (rule != bnf.end()) { + for (auto& list : rule->second) { + if (list.size() > 0 && list[0] == child_name) + child_names = list; + } + } + nodes.emplace(index, TreeNode{parent_index, {}, child_names, name}); + //root stays + last = GetLast(); + + return index; + } + + void AddPath(const std::vector& path, index_t current_index, const BNF& bnf, const std::map>& reverseBNF) { + for (int i = path.size() - 1; i >= 0; i--) { + std::string child_name; + if (i > 0) + child_name = path[i - 1]; + current_index = AddNode(path[i], child_name, current_index, bnf, reverseBNF); + } } // try to add character to tree @@ -131,11 +218,29 @@ public: while (current_index != 0) { TreeNode& node {nodes[current_index]}; if (node.childs.size() < node.child_names.size()) { // partially filled node - throw std::runtime_error("TODO: partially filled node"); + std::vector list = GetPath(std::string(1, c), node.child_names[node.childs.size()], bnf, reverseBNF); + if (list.size() > 0) { + AddPath(list, current_index, bnf, reverseBNF); + return true; + } else { + return false; // The path a->b is not available via bnf + } } current_index = node.parent; } - throw std::runtime_error("TODO: need to add node"); + + // Add node at root + + std::vector parent_nodes = getParentTreeNode(bnf, reverseBNF); + if (parent_nodes.size() == 0) + throw std::runtime_error("Couldn't add new parent node."); + + for (const auto &i : parent_nodes) { + AddRootNode(i); + if (Add(c, bnf, reverseBNF)) + return true; + RemoveRootNode(); + } } return false; @@ -283,6 +388,7 @@ ProgramNode Compile(std::vector Tokens, std::string Top, BNF bnf, T if (Tokens.size()){ std::string Token = Tokens[0]; +#if 0 auto Path = GetPath(Token, ReverseBNF, Top, terminals); if (Path.size()) { size_t Index{1}; @@ -292,6 +398,7 @@ ProgramNode Compile(std::vector Tokens, std::string Top, BNF bnf, T } } else throw std::runtime_error("Invalid token: "s + Token); +#endif } else throw std::runtime_error("No tokens!"); -- cgit v1.2.3