diff options
author | Roland Reichwein <mail@reichwein.it> | 2020-01-21 22:49:30 +0100 |
---|---|---|
committer | Roland Reichwein <mail@reichwein.it> | 2020-01-21 22:49:30 +0100 |
commit | f4b2027868c9733bbbbcb4c5ec6d5462a8447e5d (patch) | |
tree | 40ce459f1a501d6c88936c78f6dbcbb8aadd04ca /minicc.cpp | |
parent | 08997620fd617b580c1adbcb03c90cf621aa7069 (diff) |
Separate to cpp files
Diffstat (limited to 'minicc.cpp')
-rw-r--r-- | minicc.cpp | 471 |
1 files changed, 5 insertions, 466 deletions
@@ -1,7 +1,9 @@ -#include <boost/algorithm/string.hpp> +#include "bnf.h" +#include "lexer.h" +#include "grammer.h" +#include "minicc.h" -#include "gmock/gmock.h" -#include "gtest/gtest.h" +#include <boost/algorithm/string.hpp> #include <algorithm> #include <cctype> @@ -12,13 +14,6 @@ #include <utility> #include <vector> -using namespace std::string_literals; - -using BNF = std::map<std::string, std::vector<std::vector<std::string>>>; -using Terminals = std::set<std::string>; -using ProgramNode = std::deque<std::string>; -using PathElement = std::pair<std::string, size_t>; // Name, Index - std::vector<std::string> split(std::string s) { std::vector<std::string> result; @@ -28,297 +23,11 @@ std::vector<std::string> split(std::string s) return result; } -auto Reverse(BNF bnf){ - std::map<std::string, std::set<std::string>> result; - - for (const auto& [from, to] : bnf) { - for (const auto& list : to) { - for (const auto& element : list) { - auto i{result.find(element)}; - if (i != result.end()) // already present - i->second.insert(from); - else // new element - result.emplace(element, std::set{from}); - } - } - } - - return result; -} - -using index_t = size_t; - -struct TreeNode { - index_t parent{}; - std::vector<index_t> childs; // fill char by char - std::vector<std::string> child_names; // fill always - std::string name; -}; - -class Tree { -private: - std::map<index_t, TreeNode> nodes; // index 0 = non existing; index starting at 1 - index_t node_num{}; - index_t root{}; - index_t last{}; - -public: - void clear() { - nodes.clear(); - root = 0; - last = 0; - node_num = 0; - } - - // Type of lexical token - std::string GetType() { - if (node_num > 0 && nodes[root].child_names.size() == 1) - return nodes[root].child_names[0]; - return ""; - } - - bool Valid(const std::string& Top) const { - // A token is non empty - if (node_num == 0) - return false; - - // Start symbol on top - auto rootNode{nodes.find(root)}; - if (rootNode == nodes.end()) - throw std::runtime_error("Node not found: "s + std::to_string(root)); - - if (rootNode->second.name != Top) - return false; - - // All nodes filled (implies all leaves are terminal) - for (const auto& [index, node]: nodes) { - if (node.childs.size() < node.child_names.size()) - return false; // node not filled - } - - return true; - } - - bool AddFirstNode(char c, const BNF& bnf, const std::map<std::string, std::set<std::string>>& reverseBNF) { - node_num ++; - root = node_num; - last = node_num; - std::string node_name(1, char(c)); - - auto reverseRule{reverseBNF.find(node_name)}; - if (reverseRule == reverseBNF.end()) - throw std::runtime_error("Reverse rule not found for "s + node_name); - - auto rule{bnf.find(node_name)}; - if (rule != bnf.end()) { // multiple variants! - throw std::runtime_error("BNF rule for terminal symbol "s + node_name + " found."s); - } - nodes.emplace(root, TreeNode{0, std::vector<index_t>{}, std::vector<std::string>{}, node_name}); - return true; - } - - std::vector<TreeNode> getParentTreeNode(const BNF& bnf, const std::map<std::string, std::set<std::string>>& reverseBNF) { - std::vector<TreeNode> result; // default: empty - - auto& root_name {nodes[root].name}; - auto bnfParents {reverseBNF.find(root_name)}; - if (bnfParents == reverseBNF.end()) - return result; - - for (const auto& parent_node_name : bnfParents->second) { - auto lists {bnf.at(parent_node_name)}; - for (const auto& list : lists) { - if (list.size() > 0 && list[0] == root_name) { - TreeNode node{0, std::vector<index_t>{root}, list, parent_node_name}; - result.push_back(node); - } - } - } - - return result; - } - - index_t GetLast() { - index_t result {root}; - - while(result != 0 && nodes[result].childs.size() >= 2) { - result = nodes[result].childs[nodes[result].childs.size() - 1]; - } - - return result; - } - - void AddRootNode(const TreeNode& newRootNode) { - node_num++; - nodes[node_num] = newRootNode; - root = node_num; - last = node_num; - } - - void RemoveRootNode() { - root = nodes[root].childs[0]; - nodes.erase(node_num); - node_num--; - last = GetLast(); - } - - // Path from leaf to root - std::vector<std::string> GetPath(std::string a, std::string b, const BNF& bnf, const std::map<std::string, std::set<std::string>>& reverseBNF) { - std::vector<std::string> result; - - while (a != b) { - auto parents {reverseBNF.find(a)}; - if (parents == reverseBNF.end()) - return {}; - - bool hit{false}; - for (const auto& parent : parents->second) { - for (const auto& list : bnf.at(parent)) { - if (list.size() > 0 && list[0] == a) { - if (!hit) { - result.push_back(a); - a = parent; - hit = true; - } else - throw std::runtime_error("Double match for "s + parent + "/"s + a); - } - } - } - } - if (a == b) { - result.push_back(a); - } - return result; - } - - index_t AddNode(const std::string& name, const std::string& child_name, index_t parent_index, const BNF& bnf, const std::map<std::string, std::set<std::string>>& reverseBNF) - { - TreeNode& parent {nodes[parent_index]}; - node_num++; - index_t index = node_num; - parent.childs.push_back(index); - std::vector<std::string> child_names; - auto rule {bnf.find(name)}; - if (rule != bnf.end()) { - for (auto& list : rule->second) { - if (list.size() > 0 && list[0] == child_name) - child_names = list; - } - } - nodes.emplace(index, TreeNode{parent_index, {}, child_names, name}); - //root stays - last = GetLast(); - - return index; - } - - void AddPath(const std::vector<std::string>& path, index_t current_index, const BNF& bnf, const std::map<std::string, std::set<std::string>>& reverseBNF) { - for (int i = path.size() - 1; i >= 0; i--) { - std::string child_name; - if (i > 0) - child_name = path[i - 1]; - current_index = AddNode(path[i], child_name, current_index, bnf, reverseBNF); - } - } - - // try to add character to tree - bool Add(char c, const BNF& bnf, const std::map<std::string, std::set<std::string>>& reverseBNF) { - if (nodes.empty()) { // first node - return AddFirstNode(c, bnf, reverseBNF); - } else { // at least one character is already present - // Traverse tree until partially filled node found - // or new node can be added - index_t current_index{last}; - - while (current_index != 0) { - TreeNode& node {nodes[current_index]}; - if (node.childs.size() < node.child_names.size()) { // partially filled node - std::vector<std::string> list = GetPath(std::string(1, c), node.child_names[node.childs.size()], bnf, reverseBNF); - if (list.size() > 0) { - AddPath(list, current_index, bnf, reverseBNF); - return true; - } else { - return false; // The path a->b is not available via bnf - } - } - current_index = node.parent; - } - - // Add node at root - - std::vector<TreeNode> parent_nodes = getParentTreeNode(bnf, reverseBNF); - if (parent_nodes.size() == 0) - throw std::runtime_error("Couldn't add new parent node."); - - for (const auto &i : parent_nodes) { - AddRootNode(i); - if (Add(c, bnf, reverseBNF)) - return true; - RemoveRootNode(); - } - - } - return false; - } - - // add path to start symbol - void Resolve(const BNF& bnf, const std::map<std::string, std::set<std::string>>& reverseBNF) { - if (nodes.empty()) // only handle non-empty trees - return; - - while (true) { - std::string& old_root_name { nodes[root].name }; // current root node name - - auto parents {reverseBNF.find(old_root_name)}; - if (parents != reverseBNF.end()) { // parents in bnf available - bool hit{false}; - for (auto& parent : parents->second) { - for (const auto& list : bnf.at(parent)) { - if (list.size() == 1 && list[0] == old_root_name) { - if (!hit) { - const std::string& new_root_name {parent}; - // Add new TreeNode in the direction to root: - // New root with 1 child (old root) - nodes.emplace(++node_num, - TreeNode{0, // parent - std::vector<index_t>{root}, // child indices - std::vector<std::string>{old_root_name}, // child names - new_root_name // name - }); - nodes[root].parent = node_num; - root = node_num; - // this->last stays - hit = true; - } else - throw std::runtime_error("Error: Multiple resolve nodes for "s + old_root_name); - } - } - } - if (!hit) - break; - } else - break; - } - } - -}; - -struct Location { - size_t line; - size_t column; -}; - bool operator==(const Location &a, const Location &b) { return (a.line == b.line && a.column == b.column); } -struct Token { - std::string type; - std::string value; - Location location; -}; - bool operator==(const Token &a, const Token &b) { return (a.type == b.type && a.value == b.value && a.location == b.location); @@ -328,173 +37,3 @@ std::ostream& operator<<(std::ostream& os, const Token& token) { return os << token.type << ": " << token.value << "(" << token.location.line << ":" << token.location.column << ")"; } -class Lexer -{ - -private: - const BNF &bnf; - const std::string& Top; - - Location location{1, 0}; - - std::map<std::string, std::set<std::string>> ReverseBNF; - - // to be called on token end - void FinalizeTree(Tree& tree, std::string& token, std::vector<Token>& result) - { - tree.Resolve(bnf, ReverseBNF); - if (tree.Valid(Top)) { - result.emplace_back(Token{tree.GetType(), token, Location{location.line, location.column - token.size()}}); - token.clear(); - } - tree.clear(); - } - -public: - Lexer(const BNF& bnf, const std::string& Top): bnf(bnf), Top(Top), ReverseBNF{Reverse(bnf)} - { - } - - std::vector<Token> Lex(const std::string& s) - { - std::vector<Token> result; - std::string token; - - std::string Whitespace{"\t \n\r"}; - Tree tree; - - for (size_t pos{0}; pos < s.size(); pos++) { - char c{s[pos]}; - if (c == '\n') { - location.column = 0; - location.line++; - } else if (std::isprint(c)) { - location.column++; - } - - //std::cout << "Char: |" << c << "|" << std::endl; - if (Whitespace.find(c) != std::string::npos) { // found whitespace character - // evaluate token up to now and skip whitespace - FinalizeTree(tree, token, result); - } else { // no whitespace: try to add to tree - if (!tree.Add(c, bnf, ReverseBNF)) { - FinalizeTree(tree, token, result); - if (!tree.Add(c, bnf, ReverseBNF)) - throw std::runtime_error("Parse error"); - } - - token.push_back(c); - } - } - - // Final evaluation of last token - FinalizeTree(tree, token, result); - - return result; - } - -}; - -class Compiler -{ - -private: - const BNF &bnf; - const std::string& Top; - - std::map<std::string, std::set<std::string>> ReverseBNF; - -public: - Compiler(const BNF& bnf, const std::string& Top): bnf(bnf), Top(Top), ReverseBNF{Reverse(bnf)} - { - } - - ProgramNode compile(std::vector<Token> Tokens) - { - if (Tokens.size()){ - } else - throw std::runtime_error("No tokens!"); - - return {}; - } -}; - -class Test: public ::testing::Test { -protected: - Test(){} - ~Test() override {} -}; - -TEST_F(Test, BNF) { - std::string LexTop{"preprocessing-token"}; - BNF LexBNF{ - {"preprocessing-token", {{"identifier"}, - {"preprocessing-op-or-punc"}, - {"pp-number"}}}, - - {"identifier", {{"identifier-nondigit"}, - {"identifier", "identifier-nondigit"}, - {"identifier", "digit"}}}, - {"digit", {{"0"}, {"1"}, {"2"}, {"3"}, {"4"}, {"5"}, {"6"}, {"7"}, {"8"}, {"9"} }}, - {"identifier-nondigit", - {{"a"}, {"b"}, {"c"}, {"d"}, {"e"}, {"f"}, {"g"}, {"h"}, {"i"}, {"j"}, {"k"}, {"l"}, {"m"}, - {"n"}, {"o"}, {"p"}, {"q"}, {"r"}, {"s"}, {"t"}, {"u"}, {"v"}, {"w"}, {"x"}, {"y"}, {"z"}, - {"A"}, {"B"}, {"C"}, {"D"}, {"E"}, {"F"}, {"G"}, {"H"}, {"I"}, {"J"}, {"K"}, {"L"}, {"M"}, - {"N"}, {"O"}, {"P"}, {"Q"}, {"R"}, {"S"}, {"T"}, {"U"}, {"V"}, {"W"}, {"X"}, {"Y"}, {"Z"}, {"_"}}}, - {"preprocessing-op-or-punc", {{";"}, - {"="}}}, - {"pp-number", {{"digit"}, - {"pp-number", "digit"}}} - }; - - std::string Top{"program"}; - BNF bnf{ - {"program", {{"statement-list"}}}, - {"statement-list", {{"statement", "statement-list"}, - {}, }}, - {"statement", {{"assigmnent", ";"}}}, - {"assignment", {{"identifier", "=", "identifier"}}} - }; - - // implicit? - //std::set<std::string> Terminals{"identifier", "=", ";"}; - - std::string Code{"a = bc ; c = 123 ; esd = Ff ; 1 = XYZ"}; - std::vector<Token> tokens_reference{ - {"identifier", "a", { 1, 1} }, - {"preprocessing-op-or-punc", "=", { 1, 3}}, - {"identifier", "bc", { 1, 5}}, - {"preprocessing-op-or-punc", ";", { 1, 8}}, - {"identifier", "c", { 1, 10}}, - {"preprocessing-op-or-punc", "=", { 1, 12}}, - {"pp-number", "123", { 1, 14}}, - {"preprocessing-op-or-punc", ";", { 1, 18}}, - {"identifier", "esd", { 1, 20}}, - {"preprocessing-op-or-punc", "=", { 1, 24}}, - {"identifier", "Ff", { 1, 26}}, - {"preprocessing-op-or-punc", ";", { 1, 29}}, - {"pp-number", "1", { 1, 31}}, - {"preprocessing-op-or-punc", "=", { 1, 33}}, - {"identifier", "XYZ", { 1, 34}}, - }; - - Lexer lexer(LexBNF, LexTop); - auto tokens = lexer.Lex(Code); - - ASSERT_EQ(tokens, tokens_reference); -#if 0 - for (const auto& i: tokens) { - std::cout << i.value << std::endl; - } -#endif - Compiler compiler(bnf, Top); - auto Program = compiler.compile(tokens); - - //ASSERT_EQ(Program, Program_reference); -} - -int main(int argc, char* argv[]) { - ::testing::InitGoogleMock(&argc, argv); - return RUN_ALL_TESTS(); -} - |