From f4b2027868c9733bbbbcb4c5ec6d5462a8447e5d Mon Sep 17 00:00:00 2001 From: Roland Reichwein Date: Tue, 21 Jan 2020 22:49:30 +0100 Subject: Separate to cpp files --- Makefile | 4 + bnf.cpp | 21 +++ bnf.h | 17 +++ grammer.cpp | 14 ++ grammer.h | 19 +++ lexer.cpp | 292 +++++++++++++++++++++++++++++++++++ lexer.h | 56 +++++++ minicc.cpp | 471 +-------------------------------------------------------- minicc.h | 26 ++++ test-lexer.cpp | 97 ++++++++++++ 10 files changed, 551 insertions(+), 466 deletions(-) create mode 100644 bnf.cpp create mode 100644 bnf.h create mode 100644 grammer.cpp create mode 100644 grammer.h create mode 100644 lexer.cpp create mode 100644 lexer.h create mode 100644 minicc.h create mode 100644 test-lexer.cpp diff --git a/Makefile b/Makefile index af47b09..547e905 100644 --- a/Makefile +++ b/Makefile @@ -44,6 +44,10 @@ endif SRC=\ minicc.cpp \ + lexer.cpp \ + grammer.cpp \ + bnf.cpp \ + test-lexer.cpp \ googletest/src/gtest-all.cpp \ googlemock/src/gmock-all.cpp diff --git a/bnf.cpp b/bnf.cpp new file mode 100644 index 0000000..1efb459 --- /dev/null +++ b/bnf.cpp @@ -0,0 +1,21 @@ +#include "bnf.h" + +std::map> Reverse(BNF bnf){ + std::map> result; + + for (const auto& [from, to] : bnf) { + for (const auto& list : to) { + for (const auto& element : list) { + auto i{result.find(element)}; + if (i != result.end()) // already present + i->second.insert(from); + else // new element + result.emplace(element, std::set{from}); + } + } + } + + return result; +} + + diff --git a/bnf.h b/bnf.h new file mode 100644 index 0000000..8aa2684 --- /dev/null +++ b/bnf.h @@ -0,0 +1,17 @@ +#pragma once + +#include +#include +#include +#include +#include +#include + +using namespace std::string_literals; + +using BNF = std::map>>; +using Terminals = std::set; +using ProgramNode = std::deque; +using PathElement = std::pair; // Name, Index + +std::map> Reverse(BNF bnf); diff --git a/grammer.cpp b/grammer.cpp new file mode 100644 index 0000000..a44104c --- /dev/null +++ b/grammer.cpp @@ -0,0 +1,14 @@ +#include "grammer.h" + +Compiler::Compiler(const BNF& bnf, const std::string& Top): m_bnf(bnf), m_Top(Top), ReverseBNF{Reverse(bnf)} +{ +} + +ProgramNode Compiler::compile(std::vector Tokens) +{ + if (Tokens.size()){ + } else + throw std::runtime_error("No tokens!"); + + return {}; +} diff --git a/grammer.h b/grammer.h new file mode 100644 index 0000000..5e76c60 --- /dev/null +++ b/grammer.h @@ -0,0 +1,19 @@ +#pragma once + +#include "bnf.h" +#include "minicc.h" + +class Compiler +{ + +private: + const BNF &m_bnf; + const std::string& m_Top; + + std::map> ReverseBNF; + +public: + Compiler(const BNF& bnf, const std::string& Top); + ProgramNode compile(std::vector Tokens); +}; + diff --git a/lexer.cpp b/lexer.cpp new file mode 100644 index 0000000..46a38bd --- /dev/null +++ b/lexer.cpp @@ -0,0 +1,292 @@ +#include "lexer.h" + +void Tree::clear() { + nodes.clear(); + root = 0; + last = 0; + node_num = 0; +} + +// Type of lexical token +std::string Tree::GetType() { + if (node_num > 0 && nodes[root].child_names.size() == 1) + return nodes[root].child_names[0]; + return ""; +} + +bool Tree::Valid(const std::string& Top) const { + // A token is non empty + if (node_num == 0) + return false; + + // Start symbol on top + auto rootNode{nodes.find(root)}; + if (rootNode == nodes.end()) + throw std::runtime_error("Node not found: "s + std::to_string(root)); + + if (rootNode->second.name != Top) + return false; + + // All nodes filled (implies all leaves are terminal) + for (const auto& [index, node]: nodes) { + if (node.childs.size() < node.child_names.size()) + return false; // node not filled + } + + return true; +} + +bool Tree::AddFirstNode(char c, const BNF& bnf, const std::map>& reverseBNF) { + node_num ++; + root = node_num; + last = node_num; + std::string node_name(1, char(c)); + + auto reverseRule{reverseBNF.find(node_name)}; + if (reverseRule == reverseBNF.end()) + throw std::runtime_error("Reverse rule not found for "s + node_name); + + auto rule{bnf.find(node_name)}; + if (rule != bnf.end()) { // multiple variants! + throw std::runtime_error("BNF rule for terminal symbol "s + node_name + " found."s); + } + nodes.emplace(root, TreeNode{0, std::vector{}, std::vector{}, node_name}); + return true; +} + +std::vector Tree::getParentTreeNode(const BNF& bnf, const std::map>& reverseBNF) { + std::vector result; // default: empty + + auto& root_name {nodes[root].name}; + auto bnfParents {reverseBNF.find(root_name)}; + if (bnfParents == reverseBNF.end()) + return result; + + for (const auto& parent_node_name : bnfParents->second) { + auto lists {bnf.at(parent_node_name)}; + for (const auto& list : lists) { + if (list.size() > 0 && list[0] == root_name) { + TreeNode node{0, std::vector{root}, list, parent_node_name}; + result.push_back(node); + } + } + } + + return result; +} + +index_t Tree::GetLast() { + index_t result {root}; + + while(result != 0 && nodes[result].childs.size() >= 2) { + result = nodes[result].childs[nodes[result].childs.size() - 1]; + } + + return result; +} + +void Tree::AddRootNode(const TreeNode& newRootNode) { + node_num++; + nodes[node_num] = newRootNode; + root = node_num; + last = node_num; +} + +void Tree::RemoveRootNode() { + root = nodes[root].childs[0]; + nodes.erase(node_num); + node_num--; + last = GetLast(); +} + +// Path from leaf to root +std::vector Tree::GetPath(std::string a, std::string b, const BNF& bnf, const std::map>& reverseBNF) { + std::vector result; + + while (a != b) { + auto parents {reverseBNF.find(a)}; + if (parents == reverseBNF.end()) + return {}; + + bool hit{false}; + for (const auto& parent : parents->second) { + for (const auto& list : bnf.at(parent)) { + if (list.size() > 0 && list[0] == a) { + if (!hit) { + result.push_back(a); + a = parent; + hit = true; + } else + throw std::runtime_error("Double match for "s + parent + "/"s + a); + } + } + } + } + if (a == b) { + result.push_back(a); + } + return result; +} + +index_t Tree::AddNode(const std::string& name, const std::string& child_name, index_t parent_index, const BNF& bnf, const std::map>& reverseBNF) +{ + TreeNode& parent {nodes[parent_index]}; + node_num++; + index_t index = node_num; + parent.childs.push_back(index); + std::vector child_names; + auto rule {bnf.find(name)}; + if (rule != bnf.end()) { + for (auto& list : rule->second) { + if (list.size() > 0 && list[0] == child_name) + child_names = list; + } + } + nodes.emplace(index, TreeNode{parent_index, {}, child_names, name}); + //root stays + last = GetLast(); + + return index; +} + +void Tree::AddPath(const std::vector& path, index_t current_index, const BNF& bnf, const std::map>& reverseBNF) { + for (int i = path.size() - 1; i >= 0; i--) { + std::string child_name; + if (i > 0) + child_name = path[i - 1]; + current_index = AddNode(path[i], child_name, current_index, bnf, reverseBNF); + } +} + +// try to add character to tree +bool Tree::Add(char c, const BNF& bnf, const std::map>& reverseBNF) { + if (nodes.empty()) { // first node + return AddFirstNode(c, bnf, reverseBNF); + } else { // at least one character is already present + // Traverse tree until partially filled node found + // or new node can be added + index_t current_index{last}; + + while (current_index != 0) { + TreeNode& node {nodes[current_index]}; + if (node.childs.size() < node.child_names.size()) { // partially filled node + std::vector list = GetPath(std::string(1, c), node.child_names[node.childs.size()], bnf, reverseBNF); + if (list.size() > 0) { + AddPath(list, current_index, bnf, reverseBNF); + return true; + } else { + return false; // The path a->b is not available via bnf + } + } + current_index = node.parent; + } + + // Add node at root + + std::vector parent_nodes = getParentTreeNode(bnf, reverseBNF); + if (parent_nodes.size() == 0) + throw std::runtime_error("Couldn't add new parent node."); + + for (const auto &i : parent_nodes) { + AddRootNode(i); + if (Add(c, bnf, reverseBNF)) + return true; + RemoveRootNode(); + } + + } + return false; +} + +// add path to start symbol +void Tree::Resolve(const BNF& bnf, const std::map>& reverseBNF) { + if (nodes.empty()) // only handle non-empty trees + return; + + while (true) { + std::string& old_root_name { nodes[root].name }; // current root node name + + auto parents {reverseBNF.find(old_root_name)}; + if (parents != reverseBNF.end()) { // parents in bnf available + bool hit{false}; + for (auto& parent : parents->second) { + for (const auto& list : bnf.at(parent)) { + if (list.size() == 1 && list[0] == old_root_name) { + if (!hit) { + const std::string& new_root_name {parent}; + // Add new TreeNode in the direction to root: + // New root with 1 child (old root) + nodes.emplace(++node_num, + TreeNode{0, // parent + std::vector{root}, // child indices + std::vector{old_root_name}, // child names + new_root_name // name + }); + nodes[root].parent = node_num; + root = node_num; + // this->last stays + hit = true; + } else + throw std::runtime_error("Error: Multiple resolve nodes for "s + old_root_name); + } + } + } + if (!hit) + break; + } else + break; + } +} + +void Lexer::FinalizeTree(Tree& tree, std::string& token, std::vector& result) +{ + tree.Resolve(bnf, ReverseBNF); + if (tree.Valid(Top)) { + result.emplace_back(Token{tree.GetType(), token, Location{location.line, location.column - token.size()}}); + token.clear(); + } + tree.clear(); +} + +Lexer::Lexer(const BNF& bnf, const std::string& Top): bnf(bnf), Top(Top), ReverseBNF{Reverse(bnf)} +{ +} + +std::vector Lexer::Lex(const std::string& s) +{ + std::vector result; + std::string token; + + std::string Whitespace{"\t \n\r"}; + Tree tree; + + for (size_t pos{0}; pos < s.size(); pos++) { + char c{s[pos]}; + if (c == '\n') { + location.column = 0; + location.line++; + } else if (std::isprint(c)) { + location.column++; + } + + //std::cout << "Char: |" << c << "|" << std::endl; + if (Whitespace.find(c) != std::string::npos) { // found whitespace character + // evaluate token up to now and skip whitespace + FinalizeTree(tree, token, result); + } else { // no whitespace: try to add to tree + if (!tree.Add(c, bnf, ReverseBNF)) { + FinalizeTree(tree, token, result); + if (!tree.Add(c, bnf, ReverseBNF)) + throw std::runtime_error("Parse error"); + } + + token.push_back(c); + } + } + + // Final evaluation of last token + FinalizeTree(tree, token, result); + + return result; +} + diff --git a/lexer.h b/lexer.h new file mode 100644 index 0000000..c571294 --- /dev/null +++ b/lexer.h @@ -0,0 +1,56 @@ +#pragma once + +#include "minicc.h" +#include "bnf.h" + +struct TreeNode { + index_t parent{}; + std::vector childs; // fill char by char + std::vector child_names; // fill always + std::string name; +}; + +class Tree { +private: + std::map nodes; // index 0 = non existing; index starting at 1 + index_t node_num{}; + index_t root{}; + index_t last{}; + +public: + void clear(); + std::string GetType(); + bool Valid(const std::string& Top) const; + bool AddFirstNode(char c, const BNF& bnf, const std::map>& reverseBNF); + std::vector getParentTreeNode(const BNF& bnf, const std::map>& reverseBNF); + index_t GetLast(); + void AddRootNode(const TreeNode& newRootNode); + void RemoveRootNode(); + std::vector GetPath(std::string a, std::string b, const BNF& bnf, const std::map>& reverseBNF); + index_t AddNode(const std::string& name, const std::string& child_name, index_t parent_index, const BNF& bnf, const std::map>& reverseBNF); + void AddPath(const std::vector& path, index_t current_index, const BNF& bnf, const std::map>& reverseBNF); + bool Add(char c, const BNF& bnf, const std::map>& reverseBNF); + void Resolve(const BNF& bnf, const std::map>& reverseBNF); +}; + +class Lexer +{ + +private: + const BNF &bnf; + const std::string& Top; + + Location location{1, 0}; + + std::map> ReverseBNF; + + // to be called on token end + void FinalizeTree(Tree& tree, std::string& token, std::vector& result); + +public: + Lexer(const BNF& bnf, const std::string& Top); + std::vector Lex(const std::string& s); + +}; + + diff --git a/minicc.cpp b/minicc.cpp index 0562491..48d2a15 100644 --- a/minicc.cpp +++ b/minicc.cpp @@ -1,7 +1,9 @@ -#include +#include "bnf.h" +#include "lexer.h" +#include "grammer.h" +#include "minicc.h" -#include "gmock/gmock.h" -#include "gtest/gtest.h" +#include #include #include @@ -12,13 +14,6 @@ #include #include -using namespace std::string_literals; - -using BNF = std::map>>; -using Terminals = std::set; -using ProgramNode = std::deque; -using PathElement = std::pair; // Name, Index - std::vector split(std::string s) { std::vector result; @@ -28,297 +23,11 @@ std::vector split(std::string s) return result; } -auto Reverse(BNF bnf){ - std::map> result; - - for (const auto& [from, to] : bnf) { - for (const auto& list : to) { - for (const auto& element : list) { - auto i{result.find(element)}; - if (i != result.end()) // already present - i->second.insert(from); - else // new element - result.emplace(element, std::set{from}); - } - } - } - - return result; -} - -using index_t = size_t; - -struct TreeNode { - index_t parent{}; - std::vector childs; // fill char by char - std::vector child_names; // fill always - std::string name; -}; - -class Tree { -private: - std::map nodes; // index 0 = non existing; index starting at 1 - index_t node_num{}; - index_t root{}; - index_t last{}; - -public: - void clear() { - nodes.clear(); - root = 0; - last = 0; - node_num = 0; - } - - // Type of lexical token - std::string GetType() { - if (node_num > 0 && nodes[root].child_names.size() == 1) - return nodes[root].child_names[0]; - return ""; - } - - bool Valid(const std::string& Top) const { - // A token is non empty - if (node_num == 0) - return false; - - // Start symbol on top - auto rootNode{nodes.find(root)}; - if (rootNode == nodes.end()) - throw std::runtime_error("Node not found: "s + std::to_string(root)); - - if (rootNode->second.name != Top) - return false; - - // All nodes filled (implies all leaves are terminal) - for (const auto& [index, node]: nodes) { - if (node.childs.size() < node.child_names.size()) - return false; // node not filled - } - - return true; - } - - bool AddFirstNode(char c, const BNF& bnf, const std::map>& reverseBNF) { - node_num ++; - root = node_num; - last = node_num; - std::string node_name(1, char(c)); - - auto reverseRule{reverseBNF.find(node_name)}; - if (reverseRule == reverseBNF.end()) - throw std::runtime_error("Reverse rule not found for "s + node_name); - - auto rule{bnf.find(node_name)}; - if (rule != bnf.end()) { // multiple variants! - throw std::runtime_error("BNF rule for terminal symbol "s + node_name + " found."s); - } - nodes.emplace(root, TreeNode{0, std::vector{}, std::vector{}, node_name}); - return true; - } - - std::vector getParentTreeNode(const BNF& bnf, const std::map>& reverseBNF) { - std::vector result; // default: empty - - auto& root_name {nodes[root].name}; - auto bnfParents {reverseBNF.find(root_name)}; - if (bnfParents == reverseBNF.end()) - return result; - - for (const auto& parent_node_name : bnfParents->second) { - auto lists {bnf.at(parent_node_name)}; - for (const auto& list : lists) { - if (list.size() > 0 && list[0] == root_name) { - TreeNode node{0, std::vector{root}, list, parent_node_name}; - result.push_back(node); - } - } - } - - return result; - } - - index_t GetLast() { - index_t result {root}; - - while(result != 0 && nodes[result].childs.size() >= 2) { - result = nodes[result].childs[nodes[result].childs.size() - 1]; - } - - return result; - } - - void AddRootNode(const TreeNode& newRootNode) { - node_num++; - nodes[node_num] = newRootNode; - root = node_num; - last = node_num; - } - - void RemoveRootNode() { - root = nodes[root].childs[0]; - nodes.erase(node_num); - node_num--; - last = GetLast(); - } - - // Path from leaf to root - std::vector GetPath(std::string a, std::string b, const BNF& bnf, const std::map>& reverseBNF) { - std::vector result; - - while (a != b) { - auto parents {reverseBNF.find(a)}; - if (parents == reverseBNF.end()) - return {}; - - bool hit{false}; - for (const auto& parent : parents->second) { - for (const auto& list : bnf.at(parent)) { - if (list.size() > 0 && list[0] == a) { - if (!hit) { - result.push_back(a); - a = parent; - hit = true; - } else - throw std::runtime_error("Double match for "s + parent + "/"s + a); - } - } - } - } - if (a == b) { - result.push_back(a); - } - return result; - } - - index_t AddNode(const std::string& name, const std::string& child_name, index_t parent_index, const BNF& bnf, const std::map>& reverseBNF) - { - TreeNode& parent {nodes[parent_index]}; - node_num++; - index_t index = node_num; - parent.childs.push_back(index); - std::vector child_names; - auto rule {bnf.find(name)}; - if (rule != bnf.end()) { - for (auto& list : rule->second) { - if (list.size() > 0 && list[0] == child_name) - child_names = list; - } - } - nodes.emplace(index, TreeNode{parent_index, {}, child_names, name}); - //root stays - last = GetLast(); - - return index; - } - - void AddPath(const std::vector& path, index_t current_index, const BNF& bnf, const std::map>& reverseBNF) { - for (int i = path.size() - 1; i >= 0; i--) { - std::string child_name; - if (i > 0) - child_name = path[i - 1]; - current_index = AddNode(path[i], child_name, current_index, bnf, reverseBNF); - } - } - - // try to add character to tree - bool Add(char c, const BNF& bnf, const std::map>& reverseBNF) { - if (nodes.empty()) { // first node - return AddFirstNode(c, bnf, reverseBNF); - } else { // at least one character is already present - // Traverse tree until partially filled node found - // or new node can be added - index_t current_index{last}; - - while (current_index != 0) { - TreeNode& node {nodes[current_index]}; - if (node.childs.size() < node.child_names.size()) { // partially filled node - std::vector list = GetPath(std::string(1, c), node.child_names[node.childs.size()], bnf, reverseBNF); - if (list.size() > 0) { - AddPath(list, current_index, bnf, reverseBNF); - return true; - } else { - return false; // The path a->b is not available via bnf - } - } - current_index = node.parent; - } - - // Add node at root - - std::vector parent_nodes = getParentTreeNode(bnf, reverseBNF); - if (parent_nodes.size() == 0) - throw std::runtime_error("Couldn't add new parent node."); - - for (const auto &i : parent_nodes) { - AddRootNode(i); - if (Add(c, bnf, reverseBNF)) - return true; - RemoveRootNode(); - } - - } - return false; - } - - // add path to start symbol - void Resolve(const BNF& bnf, const std::map>& reverseBNF) { - if (nodes.empty()) // only handle non-empty trees - return; - - while (true) { - std::string& old_root_name { nodes[root].name }; // current root node name - - auto parents {reverseBNF.find(old_root_name)}; - if (parents != reverseBNF.end()) { // parents in bnf available - bool hit{false}; - for (auto& parent : parents->second) { - for (const auto& list : bnf.at(parent)) { - if (list.size() == 1 && list[0] == old_root_name) { - if (!hit) { - const std::string& new_root_name {parent}; - // Add new TreeNode in the direction to root: - // New root with 1 child (old root) - nodes.emplace(++node_num, - TreeNode{0, // parent - std::vector{root}, // child indices - std::vector{old_root_name}, // child names - new_root_name // name - }); - nodes[root].parent = node_num; - root = node_num; - // this->last stays - hit = true; - } else - throw std::runtime_error("Error: Multiple resolve nodes for "s + old_root_name); - } - } - } - if (!hit) - break; - } else - break; - } - } - -}; - -struct Location { - size_t line; - size_t column; -}; - bool operator==(const Location &a, const Location &b) { return (a.line == b.line && a.column == b.column); } -struct Token { - std::string type; - std::string value; - Location location; -}; - bool operator==(const Token &a, const Token &b) { return (a.type == b.type && a.value == b.value && a.location == b.location); @@ -328,173 +37,3 @@ std::ostream& operator<<(std::ostream& os, const Token& token) { return os << token.type << ": " << token.value << "(" << token.location.line << ":" << token.location.column << ")"; } -class Lexer -{ - -private: - const BNF &bnf; - const std::string& Top; - - Location location{1, 0}; - - std::map> ReverseBNF; - - // to be called on token end - void FinalizeTree(Tree& tree, std::string& token, std::vector& result) - { - tree.Resolve(bnf, ReverseBNF); - if (tree.Valid(Top)) { - result.emplace_back(Token{tree.GetType(), token, Location{location.line, location.column - token.size()}}); - token.clear(); - } - tree.clear(); - } - -public: - Lexer(const BNF& bnf, const std::string& Top): bnf(bnf), Top(Top), ReverseBNF{Reverse(bnf)} - { - } - - std::vector Lex(const std::string& s) - { - std::vector result; - std::string token; - - std::string Whitespace{"\t \n\r"}; - Tree tree; - - for (size_t pos{0}; pos < s.size(); pos++) { - char c{s[pos]}; - if (c == '\n') { - location.column = 0; - location.line++; - } else if (std::isprint(c)) { - location.column++; - } - - //std::cout << "Char: |" << c << "|" << std::endl; - if (Whitespace.find(c) != std::string::npos) { // found whitespace character - // evaluate token up to now and skip whitespace - FinalizeTree(tree, token, result); - } else { // no whitespace: try to add to tree - if (!tree.Add(c, bnf, ReverseBNF)) { - FinalizeTree(tree, token, result); - if (!tree.Add(c, bnf, ReverseBNF)) - throw std::runtime_error("Parse error"); - } - - token.push_back(c); - } - } - - // Final evaluation of last token - FinalizeTree(tree, token, result); - - return result; - } - -}; - -class Compiler -{ - -private: - const BNF &bnf; - const std::string& Top; - - std::map> ReverseBNF; - -public: - Compiler(const BNF& bnf, const std::string& Top): bnf(bnf), Top(Top), ReverseBNF{Reverse(bnf)} - { - } - - ProgramNode compile(std::vector Tokens) - { - if (Tokens.size()){ - } else - throw std::runtime_error("No tokens!"); - - return {}; - } -}; - -class Test: public ::testing::Test { -protected: - Test(){} - ~Test() override {} -}; - -TEST_F(Test, BNF) { - std::string LexTop{"preprocessing-token"}; - BNF LexBNF{ - {"preprocessing-token", {{"identifier"}, - {"preprocessing-op-or-punc"}, - {"pp-number"}}}, - - {"identifier", {{"identifier-nondigit"}, - {"identifier", "identifier-nondigit"}, - {"identifier", "digit"}}}, - {"digit", {{"0"}, {"1"}, {"2"}, {"3"}, {"4"}, {"5"}, {"6"}, {"7"}, {"8"}, {"9"} }}, - {"identifier-nondigit", - {{"a"}, {"b"}, {"c"}, {"d"}, {"e"}, {"f"}, {"g"}, {"h"}, {"i"}, {"j"}, {"k"}, {"l"}, {"m"}, - {"n"}, {"o"}, {"p"}, {"q"}, {"r"}, {"s"}, {"t"}, {"u"}, {"v"}, {"w"}, {"x"}, {"y"}, {"z"}, - {"A"}, {"B"}, {"C"}, {"D"}, {"E"}, {"F"}, {"G"}, {"H"}, {"I"}, {"J"}, {"K"}, {"L"}, {"M"}, - {"N"}, {"O"}, {"P"}, {"Q"}, {"R"}, {"S"}, {"T"}, {"U"}, {"V"}, {"W"}, {"X"}, {"Y"}, {"Z"}, {"_"}}}, - {"preprocessing-op-or-punc", {{";"}, - {"="}}}, - {"pp-number", {{"digit"}, - {"pp-number", "digit"}}} - }; - - std::string Top{"program"}; - BNF bnf{ - {"program", {{"statement-list"}}}, - {"statement-list", {{"statement", "statement-list"}, - {}, }}, - {"statement", {{"assigmnent", ";"}}}, - {"assignment", {{"identifier", "=", "identifier"}}} - }; - - // implicit? - //std::set Terminals{"identifier", "=", ";"}; - - std::string Code{"a = bc ; c = 123 ; esd = Ff ; 1 = XYZ"}; - std::vector tokens_reference{ - {"identifier", "a", { 1, 1} }, - {"preprocessing-op-or-punc", "=", { 1, 3}}, - {"identifier", "bc", { 1, 5}}, - {"preprocessing-op-or-punc", ";", { 1, 8}}, - {"identifier", "c", { 1, 10}}, - {"preprocessing-op-or-punc", "=", { 1, 12}}, - {"pp-number", "123", { 1, 14}}, - {"preprocessing-op-or-punc", ";", { 1, 18}}, - {"identifier", "esd", { 1, 20}}, - {"preprocessing-op-or-punc", "=", { 1, 24}}, - {"identifier", "Ff", { 1, 26}}, - {"preprocessing-op-or-punc", ";", { 1, 29}}, - {"pp-number", "1", { 1, 31}}, - {"preprocessing-op-or-punc", "=", { 1, 33}}, - {"identifier", "XYZ", { 1, 34}}, - }; - - Lexer lexer(LexBNF, LexTop); - auto tokens = lexer.Lex(Code); - - ASSERT_EQ(tokens, tokens_reference); -#if 0 - for (const auto& i: tokens) { - std::cout << i.value << std::endl; - } -#endif - Compiler compiler(bnf, Top); - auto Program = compiler.compile(tokens); - - //ASSERT_EQ(Program, Program_reference); -} - -int main(int argc, char* argv[]) { - ::testing::InitGoogleMock(&argc, argv); - return RUN_ALL_TESTS(); -} - diff --git a/minicc.h b/minicc.h new file mode 100644 index 0000000..0500b8c --- /dev/null +++ b/minicc.h @@ -0,0 +1,26 @@ +#pragma once + +#include +#include +#include +#include + +using index_t = size_t; + +std::vector split(std::string s); + +struct Location { + size_t line; + size_t column; +}; + +bool operator==(const Location &a, const Location &b); + +struct Token { + std::string type; + std::string value; + Location location; +}; + +bool operator==(const Token &a, const Token &b); +std::ostream& operator<<(std::ostream& os, const Token& token); diff --git a/test-lexer.cpp b/test-lexer.cpp new file mode 100644 index 0000000..39db3ec --- /dev/null +++ b/test-lexer.cpp @@ -0,0 +1,97 @@ +#include "bnf.h" +#include "lexer.h" +#include "grammer.h" +#include "minicc.h" + +#include + +#include "gmock/gmock.h" +#include "gtest/gtest.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +class Test: public ::testing::Test { +protected: + Test(){} + ~Test() override {} +}; + +TEST_F(Test, BNF) { + std::string LexTop{"preprocessing-token"}; + BNF LexBNF{ + {"preprocessing-token", {{"identifier"}, + {"preprocessing-op-or-punc"}, + {"pp-number"}}}, + + {"identifier", {{"identifier-nondigit"}, + {"identifier", "identifier-nondigit"}, + {"identifier", "digit"}}}, + {"digit", {{"0"}, {"1"}, {"2"}, {"3"}, {"4"}, {"5"}, {"6"}, {"7"}, {"8"}, {"9"} }}, + {"identifier-nondigit", + {{"a"}, {"b"}, {"c"}, {"d"}, {"e"}, {"f"}, {"g"}, {"h"}, {"i"}, {"j"}, {"k"}, {"l"}, {"m"}, + {"n"}, {"o"}, {"p"}, {"q"}, {"r"}, {"s"}, {"t"}, {"u"}, {"v"}, {"w"}, {"x"}, {"y"}, {"z"}, + {"A"}, {"B"}, {"C"}, {"D"}, {"E"}, {"F"}, {"G"}, {"H"}, {"I"}, {"J"}, {"K"}, {"L"}, {"M"}, + {"N"}, {"O"}, {"P"}, {"Q"}, {"R"}, {"S"}, {"T"}, {"U"}, {"V"}, {"W"}, {"X"}, {"Y"}, {"Z"}, {"_"}}}, + {"preprocessing-op-or-punc", {{";"}, + {"="}}}, + {"pp-number", {{"digit"}, + {"pp-number", "digit"}}} + }; + + std::string Top{"program"}; + BNF bnf{ + {"program", {{"statement-list"}}}, + {"statement-list", {{"statement", "statement-list"}, + {}, }}, + {"statement", {{"assigmnent", ";"}}}, + {"assignment", {{"identifier", "=", "identifier"}}} + }; + + // implicit? + //std::set Terminals{"identifier", "=", ";"}; + + std::string Code{"a = bc ; c = 123 ; esd = Ff ; 1 = XYZ"}; + std::vector tokens_reference{ + {"identifier", "a", { 1, 1} }, + {"preprocessing-op-or-punc", "=", { 1, 3}}, + {"identifier", "bc", { 1, 5}}, + {"preprocessing-op-or-punc", ";", { 1, 8}}, + {"identifier", "c", { 1, 10}}, + {"preprocessing-op-or-punc", "=", { 1, 12}}, + {"pp-number", "123", { 1, 14}}, + {"preprocessing-op-or-punc", ";", { 1, 18}}, + {"identifier", "esd", { 1, 20}}, + {"preprocessing-op-or-punc", "=", { 1, 24}}, + {"identifier", "Ff", { 1, 26}}, + {"preprocessing-op-or-punc", ";", { 1, 29}}, + {"pp-number", "1", { 1, 31}}, + {"preprocessing-op-or-punc", "=", { 1, 33}}, + {"identifier", "XYZ", { 1, 34}}, + }; + + Lexer lexer(LexBNF, LexTop); + auto tokens = lexer.Lex(Code); + + ASSERT_EQ(tokens, tokens_reference); +#if 0 + for (const auto& i: tokens) { + std::cout << i.value << std::endl; + } +#endif + Compiler compiler(bnf, Top); + auto Program = compiler.compile(tokens); + + //ASSERT_EQ(Program, Program_reference); +} + +int main(int argc, char* argv[]) { + ::testing::InitGoogleMock(&argc, argv); + return RUN_ALL_TESTS(); +} -- cgit v1.2.3