summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorRoland Reichwein <mail@reichwein.it>2020-01-21 22:49:30 +0100
committerRoland Reichwein <mail@reichwein.it>2020-01-21 22:49:30 +0100
commitf4b2027868c9733bbbbcb4c5ec6d5462a8447e5d (patch)
tree40ce459f1a501d6c88936c78f6dbcbb8aadd04ca
parent08997620fd617b580c1adbcb03c90cf621aa7069 (diff)
Separate to cpp files
-rw-r--r--Makefile4
-rw-r--r--bnf.cpp21
-rw-r--r--bnf.h17
-rw-r--r--grammer.cpp14
-rw-r--r--grammer.h19
-rw-r--r--lexer.cpp292
-rw-r--r--lexer.h56
-rw-r--r--minicc.cpp471
-rw-r--r--minicc.h26
-rw-r--r--test-lexer.cpp97
10 files changed, 551 insertions, 466 deletions
diff --git a/Makefile b/Makefile
index af47b09..547e905 100644
--- a/Makefile
+++ b/Makefile
@@ -44,6 +44,10 @@ endif
SRC=\
minicc.cpp \
+ lexer.cpp \
+ grammer.cpp \
+ bnf.cpp \
+ test-lexer.cpp \
googletest/src/gtest-all.cpp \
googlemock/src/gmock-all.cpp
diff --git a/bnf.cpp b/bnf.cpp
new file mode 100644
index 0000000..1efb459
--- /dev/null
+++ b/bnf.cpp
@@ -0,0 +1,21 @@
+#include "bnf.h"
+
+std::map<std::string, std::set<std::string>> Reverse(BNF bnf){
+ std::map<std::string, std::set<std::string>> result;
+
+ for (const auto& [from, to] : bnf) {
+ for (const auto& list : to) {
+ for (const auto& element : list) {
+ auto i{result.find(element)};
+ if (i != result.end()) // already present
+ i->second.insert(from);
+ else // new element
+ result.emplace(element, std::set{from});
+ }
+ }
+ }
+
+ return result;
+}
+
+
diff --git a/bnf.h b/bnf.h
new file mode 100644
index 0000000..8aa2684
--- /dev/null
+++ b/bnf.h
@@ -0,0 +1,17 @@
+#pragma once
+
+#include <deque>
+#include <map>
+#include <set>
+#include <string>
+#include <utility>
+#include <vector>
+
+using namespace std::string_literals;
+
+using BNF = std::map<std::string, std::vector<std::vector<std::string>>>;
+using Terminals = std::set<std::string>;
+using ProgramNode = std::deque<std::string>;
+using PathElement = std::pair<std::string, size_t>; // Name, Index
+
+std::map<std::string, std::set<std::string>> Reverse(BNF bnf);
diff --git a/grammer.cpp b/grammer.cpp
new file mode 100644
index 0000000..a44104c
--- /dev/null
+++ b/grammer.cpp
@@ -0,0 +1,14 @@
+#include "grammer.h"
+
+Compiler::Compiler(const BNF& bnf, const std::string& Top): m_bnf(bnf), m_Top(Top), ReverseBNF{Reverse(bnf)}
+{
+}
+
+ProgramNode Compiler::compile(std::vector<Token> Tokens)
+{
+ if (Tokens.size()){
+ } else
+ throw std::runtime_error("No tokens!");
+
+ return {};
+}
diff --git a/grammer.h b/grammer.h
new file mode 100644
index 0000000..5e76c60
--- /dev/null
+++ b/grammer.h
@@ -0,0 +1,19 @@
+#pragma once
+
+#include "bnf.h"
+#include "minicc.h"
+
+class Compiler
+{
+
+private:
+ const BNF &m_bnf;
+ const std::string& m_Top;
+
+ std::map<std::string, std::set<std::string>> ReverseBNF;
+
+public:
+ Compiler(const BNF& bnf, const std::string& Top);
+ ProgramNode compile(std::vector<Token> Tokens);
+};
+
diff --git a/lexer.cpp b/lexer.cpp
new file mode 100644
index 0000000..46a38bd
--- /dev/null
+++ b/lexer.cpp
@@ -0,0 +1,292 @@
+#include "lexer.h"
+
+void Tree::clear() {
+ nodes.clear();
+ root = 0;
+ last = 0;
+ node_num = 0;
+}
+
+// Type of lexical token
+std::string Tree::GetType() {
+ if (node_num > 0 && nodes[root].child_names.size() == 1)
+ return nodes[root].child_names[0];
+ return "";
+}
+
+bool Tree::Valid(const std::string& Top) const {
+ // A token is non empty
+ if (node_num == 0)
+ return false;
+
+ // Start symbol on top
+ auto rootNode{nodes.find(root)};
+ if (rootNode == nodes.end())
+ throw std::runtime_error("Node not found: "s + std::to_string(root));
+
+ if (rootNode->second.name != Top)
+ return false;
+
+ // All nodes filled (implies all leaves are terminal)
+ for (const auto& [index, node]: nodes) {
+ if (node.childs.size() < node.child_names.size())
+ return false; // node not filled
+ }
+
+ return true;
+}
+
+bool Tree::AddFirstNode(char c, const BNF& bnf, const std::map<std::string, std::set<std::string>>& reverseBNF) {
+ node_num ++;
+ root = node_num;
+ last = node_num;
+ std::string node_name(1, char(c));
+
+ auto reverseRule{reverseBNF.find(node_name)};
+ if (reverseRule == reverseBNF.end())
+ throw std::runtime_error("Reverse rule not found for "s + node_name);
+
+ auto rule{bnf.find(node_name)};
+ if (rule != bnf.end()) { // multiple variants!
+ throw std::runtime_error("BNF rule for terminal symbol "s + node_name + " found."s);
+ }
+ nodes.emplace(root, TreeNode{0, std::vector<index_t>{}, std::vector<std::string>{}, node_name});
+ return true;
+}
+
+std::vector<TreeNode> Tree::getParentTreeNode(const BNF& bnf, const std::map<std::string, std::set<std::string>>& reverseBNF) {
+ std::vector<TreeNode> result; // default: empty
+
+ auto& root_name {nodes[root].name};
+ auto bnfParents {reverseBNF.find(root_name)};
+ if (bnfParents == reverseBNF.end())
+ return result;
+
+ for (const auto& parent_node_name : bnfParents->second) {
+ auto lists {bnf.at(parent_node_name)};
+ for (const auto& list : lists) {
+ if (list.size() > 0 && list[0] == root_name) {
+ TreeNode node{0, std::vector<index_t>{root}, list, parent_node_name};
+ result.push_back(node);
+ }
+ }
+ }
+
+ return result;
+}
+
+index_t Tree::GetLast() {
+ index_t result {root};
+
+ while(result != 0 && nodes[result].childs.size() >= 2) {
+ result = nodes[result].childs[nodes[result].childs.size() - 1];
+ }
+
+ return result;
+}
+
+void Tree::AddRootNode(const TreeNode& newRootNode) {
+ node_num++;
+ nodes[node_num] = newRootNode;
+ root = node_num;
+ last = node_num;
+}
+
+void Tree::RemoveRootNode() {
+ root = nodes[root].childs[0];
+ nodes.erase(node_num);
+ node_num--;
+ last = GetLast();
+}
+
+// Path from leaf to root
+std::vector<std::string> Tree::GetPath(std::string a, std::string b, const BNF& bnf, const std::map<std::string, std::set<std::string>>& reverseBNF) {
+ std::vector<std::string> result;
+
+ while (a != b) {
+ auto parents {reverseBNF.find(a)};
+ if (parents == reverseBNF.end())
+ return {};
+
+ bool hit{false};
+ for (const auto& parent : parents->second) {
+ for (const auto& list : bnf.at(parent)) {
+ if (list.size() > 0 && list[0] == a) {
+ if (!hit) {
+ result.push_back(a);
+ a = parent;
+ hit = true;
+ } else
+ throw std::runtime_error("Double match for "s + parent + "/"s + a);
+ }
+ }
+ }
+ }
+ if (a == b) {
+ result.push_back(a);
+ }
+ return result;
+}
+
+index_t Tree::AddNode(const std::string& name, const std::string& child_name, index_t parent_index, const BNF& bnf, const std::map<std::string, std::set<std::string>>& reverseBNF)
+{
+ TreeNode& parent {nodes[parent_index]};
+ node_num++;
+ index_t index = node_num;
+ parent.childs.push_back(index);
+ std::vector<std::string> child_names;
+ auto rule {bnf.find(name)};
+ if (rule != bnf.end()) {
+ for (auto& list : rule->second) {
+ if (list.size() > 0 && list[0] == child_name)
+ child_names = list;
+ }
+ }
+ nodes.emplace(index, TreeNode{parent_index, {}, child_names, name});
+ //root stays
+ last = GetLast();
+
+ return index;
+}
+
+void Tree::AddPath(const std::vector<std::string>& path, index_t current_index, const BNF& bnf, const std::map<std::string, std::set<std::string>>& reverseBNF) {
+ for (int i = path.size() - 1; i >= 0; i--) {
+ std::string child_name;
+ if (i > 0)
+ child_name = path[i - 1];
+ current_index = AddNode(path[i], child_name, current_index, bnf, reverseBNF);
+ }
+}
+
+// try to add character to tree
+bool Tree::Add(char c, const BNF& bnf, const std::map<std::string, std::set<std::string>>& reverseBNF) {
+ if (nodes.empty()) { // first node
+ return AddFirstNode(c, bnf, reverseBNF);
+ } else { // at least one character is already present
+ // Traverse tree until partially filled node found
+ // or new node can be added
+ index_t current_index{last};
+
+ while (current_index != 0) {
+ TreeNode& node {nodes[current_index]};
+ if (node.childs.size() < node.child_names.size()) { // partially filled node
+ std::vector<std::string> list = GetPath(std::string(1, c), node.child_names[node.childs.size()], bnf, reverseBNF);
+ if (list.size() > 0) {
+ AddPath(list, current_index, bnf, reverseBNF);
+ return true;
+ } else {
+ return false; // The path a->b is not available via bnf
+ }
+ }
+ current_index = node.parent;
+ }
+
+ // Add node at root
+
+ std::vector<TreeNode> parent_nodes = getParentTreeNode(bnf, reverseBNF);
+ if (parent_nodes.size() == 0)
+ throw std::runtime_error("Couldn't add new parent node.");
+
+ for (const auto &i : parent_nodes) {
+ AddRootNode(i);
+ if (Add(c, bnf, reverseBNF))
+ return true;
+ RemoveRootNode();
+ }
+
+ }
+ return false;
+}
+
+// add path to start symbol
+void Tree::Resolve(const BNF& bnf, const std::map<std::string, std::set<std::string>>& reverseBNF) {
+ if (nodes.empty()) // only handle non-empty trees
+ return;
+
+ while (true) {
+ std::string& old_root_name { nodes[root].name }; // current root node name
+
+ auto parents {reverseBNF.find(old_root_name)};
+ if (parents != reverseBNF.end()) { // parents in bnf available
+ bool hit{false};
+ for (auto& parent : parents->second) {
+ for (const auto& list : bnf.at(parent)) {
+ if (list.size() == 1 && list[0] == old_root_name) {
+ if (!hit) {
+ const std::string& new_root_name {parent};
+ // Add new TreeNode in the direction to root:
+ // New root with 1 child (old root)
+ nodes.emplace(++node_num,
+ TreeNode{0, // parent
+ std::vector<index_t>{root}, // child indices
+ std::vector<std::string>{old_root_name}, // child names
+ new_root_name // name
+ });
+ nodes[root].parent = node_num;
+ root = node_num;
+ // this->last stays
+ hit = true;
+ } else
+ throw std::runtime_error("Error: Multiple resolve nodes for "s + old_root_name);
+ }
+ }
+ }
+ if (!hit)
+ break;
+ } else
+ break;
+ }
+}
+
+void Lexer::FinalizeTree(Tree& tree, std::string& token, std::vector<Token>& result)
+{
+ tree.Resolve(bnf, ReverseBNF);
+ if (tree.Valid(Top)) {
+ result.emplace_back(Token{tree.GetType(), token, Location{location.line, location.column - token.size()}});
+ token.clear();
+ }
+ tree.clear();
+}
+
+Lexer::Lexer(const BNF& bnf, const std::string& Top): bnf(bnf), Top(Top), ReverseBNF{Reverse(bnf)}
+{
+}
+
+std::vector<Token> Lexer::Lex(const std::string& s)
+{
+ std::vector<Token> result;
+ std::string token;
+
+ std::string Whitespace{"\t \n\r"};
+ Tree tree;
+
+ for (size_t pos{0}; pos < s.size(); pos++) {
+ char c{s[pos]};
+ if (c == '\n') {
+ location.column = 0;
+ location.line++;
+ } else if (std::isprint(c)) {
+ location.column++;
+ }
+
+ //std::cout << "Char: |" << c << "|" << std::endl;
+ if (Whitespace.find(c) != std::string::npos) { // found whitespace character
+ // evaluate token up to now and skip whitespace
+ FinalizeTree(tree, token, result);
+ } else { // no whitespace: try to add to tree
+ if (!tree.Add(c, bnf, ReverseBNF)) {
+ FinalizeTree(tree, token, result);
+ if (!tree.Add(c, bnf, ReverseBNF))
+ throw std::runtime_error("Parse error");
+ }
+
+ token.push_back(c);
+ }
+ }
+
+ // Final evaluation of last token
+ FinalizeTree(tree, token, result);
+
+ return result;
+}
+
diff --git a/lexer.h b/lexer.h
new file mode 100644
index 0000000..c571294
--- /dev/null
+++ b/lexer.h
@@ -0,0 +1,56 @@
+#pragma once
+
+#include "minicc.h"
+#include "bnf.h"
+
+struct TreeNode {
+ index_t parent{};
+ std::vector<index_t> childs; // fill char by char
+ std::vector<std::string> child_names; // fill always
+ std::string name;
+};
+
+class Tree {
+private:
+ std::map<index_t, TreeNode> nodes; // index 0 = non existing; index starting at 1
+ index_t node_num{};
+ index_t root{};
+ index_t last{};
+
+public:
+ void clear();
+ std::string GetType();
+ bool Valid(const std::string& Top) const;
+ bool AddFirstNode(char c, const BNF& bnf, const std::map<std::string, std::set<std::string>>& reverseBNF);
+ std::vector<TreeNode> getParentTreeNode(const BNF& bnf, const std::map<std::string, std::set<std::string>>& reverseBNF);
+ index_t GetLast();
+ void AddRootNode(const TreeNode& newRootNode);
+ void RemoveRootNode();
+ std::vector<std::string> GetPath(std::string a, std::string b, const BNF& bnf, const std::map<std::string, std::set<std::string>>& reverseBNF);
+ index_t AddNode(const std::string& name, const std::string& child_name, index_t parent_index, const BNF& bnf, const std::map<std::string, std::set<std::string>>& reverseBNF);
+ void AddPath(const std::vector<std::string>& path, index_t current_index, const BNF& bnf, const std::map<std::string, std::set<std::string>>& reverseBNF);
+ bool Add(char c, const BNF& bnf, const std::map<std::string, std::set<std::string>>& reverseBNF);
+ void Resolve(const BNF& bnf, const std::map<std::string, std::set<std::string>>& reverseBNF);
+};
+
+class Lexer
+{
+
+private:
+ const BNF &bnf;
+ const std::string& Top;
+
+ Location location{1, 0};
+
+ std::map<std::string, std::set<std::string>> ReverseBNF;
+
+ // to be called on token end
+ void FinalizeTree(Tree& tree, std::string& token, std::vector<Token>& result);
+
+public:
+ Lexer(const BNF& bnf, const std::string& Top);
+ std::vector<Token> Lex(const std::string& s);
+
+};
+
+
diff --git a/minicc.cpp b/minicc.cpp
index 0562491..48d2a15 100644
--- a/minicc.cpp
+++ b/minicc.cpp
@@ -1,7 +1,9 @@
-#include <boost/algorithm/string.hpp>
+#include "bnf.h"
+#include "lexer.h"
+#include "grammer.h"
+#include "minicc.h"
-#include "gmock/gmock.h"
-#include "gtest/gtest.h"
+#include <boost/algorithm/string.hpp>
#include <algorithm>
#include <cctype>
@@ -12,13 +14,6 @@
#include <utility>
#include <vector>
-using namespace std::string_literals;
-
-using BNF = std::map<std::string, std::vector<std::vector<std::string>>>;
-using Terminals = std::set<std::string>;
-using ProgramNode = std::deque<std::string>;
-using PathElement = std::pair<std::string, size_t>; // Name, Index
-
std::vector<std::string> split(std::string s)
{
std::vector<std::string> result;
@@ -28,297 +23,11 @@ std::vector<std::string> split(std::string s)
return result;
}
-auto Reverse(BNF bnf){
- std::map<std::string, std::set<std::string>> result;
-
- for (const auto& [from, to] : bnf) {
- for (const auto& list : to) {
- for (const auto& element : list) {
- auto i{result.find(element)};
- if (i != result.end()) // already present
- i->second.insert(from);
- else // new element
- result.emplace(element, std::set{from});
- }
- }
- }
-
- return result;
-}
-
-using index_t = size_t;
-
-struct TreeNode {
- index_t parent{};
- std::vector<index_t> childs; // fill char by char
- std::vector<std::string> child_names; // fill always
- std::string name;
-};
-
-class Tree {
-private:
- std::map<index_t, TreeNode> nodes; // index 0 = non existing; index starting at 1
- index_t node_num{};
- index_t root{};
- index_t last{};
-
-public:
- void clear() {
- nodes.clear();
- root = 0;
- last = 0;
- node_num = 0;
- }
-
- // Type of lexical token
- std::string GetType() {
- if (node_num > 0 && nodes[root].child_names.size() == 1)
- return nodes[root].child_names[0];
- return "";
- }
-
- bool Valid(const std::string& Top) const {
- // A token is non empty
- if (node_num == 0)
- return false;
-
- // Start symbol on top
- auto rootNode{nodes.find(root)};
- if (rootNode == nodes.end())
- throw std::runtime_error("Node not found: "s + std::to_string(root));
-
- if (rootNode->second.name != Top)
- return false;
-
- // All nodes filled (implies all leaves are terminal)
- for (const auto& [index, node]: nodes) {
- if (node.childs.size() < node.child_names.size())
- return false; // node not filled
- }
-
- return true;
- }
-
- bool AddFirstNode(char c, const BNF& bnf, const std::map<std::string, std::set<std::string>>& reverseBNF) {
- node_num ++;
- root = node_num;
- last = node_num;
- std::string node_name(1, char(c));
-
- auto reverseRule{reverseBNF.find(node_name)};
- if (reverseRule == reverseBNF.end())
- throw std::runtime_error("Reverse rule not found for "s + node_name);
-
- auto rule{bnf.find(node_name)};
- if (rule != bnf.end()) { // multiple variants!
- throw std::runtime_error("BNF rule for terminal symbol "s + node_name + " found."s);
- }
- nodes.emplace(root, TreeNode{0, std::vector<index_t>{}, std::vector<std::string>{}, node_name});
- return true;
- }
-
- std::vector<TreeNode> getParentTreeNode(const BNF& bnf, const std::map<std::string, std::set<std::string>>& reverseBNF) {
- std::vector<TreeNode> result; // default: empty
-
- auto& root_name {nodes[root].name};
- auto bnfParents {reverseBNF.find(root_name)};
- if (bnfParents == reverseBNF.end())
- return result;
-
- for (const auto& parent_node_name : bnfParents->second) {
- auto lists {bnf.at(parent_node_name)};
- for (const auto& list : lists) {
- if (list.size() > 0 && list[0] == root_name) {
- TreeNode node{0, std::vector<index_t>{root}, list, parent_node_name};
- result.push_back(node);
- }
- }
- }
-
- return result;
- }
-
- index_t GetLast() {
- index_t result {root};
-
- while(result != 0 && nodes[result].childs.size() >= 2) {
- result = nodes[result].childs[nodes[result].childs.size() - 1];
- }
-
- return result;
- }
-
- void AddRootNode(const TreeNode& newRootNode) {
- node_num++;
- nodes[node_num] = newRootNode;
- root = node_num;
- last = node_num;
- }
-
- void RemoveRootNode() {
- root = nodes[root].childs[0];
- nodes.erase(node_num);
- node_num--;
- last = GetLast();
- }
-
- // Path from leaf to root
- std::vector<std::string> GetPath(std::string a, std::string b, const BNF& bnf, const std::map<std::string, std::set<std::string>>& reverseBNF) {
- std::vector<std::string> result;
-
- while (a != b) {
- auto parents {reverseBNF.find(a)};
- if (parents == reverseBNF.end())
- return {};
-
- bool hit{false};
- for (const auto& parent : parents->second) {
- for (const auto& list : bnf.at(parent)) {
- if (list.size() > 0 && list[0] == a) {
- if (!hit) {
- result.push_back(a);
- a = parent;
- hit = true;
- } else
- throw std::runtime_error("Double match for "s + parent + "/"s + a);
- }
- }
- }
- }
- if (a == b) {
- result.push_back(a);
- }
- return result;
- }
-
- index_t AddNode(const std::string& name, const std::string& child_name, index_t parent_index, const BNF& bnf, const std::map<std::string, std::set<std::string>>& reverseBNF)
- {
- TreeNode& parent {nodes[parent_index]};
- node_num++;
- index_t index = node_num;
- parent.childs.push_back(index);
- std::vector<std::string> child_names;
- auto rule {bnf.find(name)};
- if (rule != bnf.end()) {
- for (auto& list : rule->second) {
- if (list.size() > 0 && list[0] == child_name)
- child_names = list;
- }
- }
- nodes.emplace(index, TreeNode{parent_index, {}, child_names, name});
- //root stays
- last = GetLast();
-
- return index;
- }
-
- void AddPath(const std::vector<std::string>& path, index_t current_index, const BNF& bnf, const std::map<std::string, std::set<std::string>>& reverseBNF) {
- for (int i = path.size() - 1; i >= 0; i--) {
- std::string child_name;
- if (i > 0)
- child_name = path[i - 1];
- current_index = AddNode(path[i], child_name, current_index, bnf, reverseBNF);
- }
- }
-
- // try to add character to tree
- bool Add(char c, const BNF& bnf, const std::map<std::string, std::set<std::string>>& reverseBNF) {
- if (nodes.empty()) { // first node
- return AddFirstNode(c, bnf, reverseBNF);
- } else { // at least one character is already present
- // Traverse tree until partially filled node found
- // or new node can be added
- index_t current_index{last};
-
- while (current_index != 0) {
- TreeNode& node {nodes[current_index]};
- if (node.childs.size() < node.child_names.size()) { // partially filled node
- std::vector<std::string> list = GetPath(std::string(1, c), node.child_names[node.childs.size()], bnf, reverseBNF);
- if (list.size() > 0) {
- AddPath(list, current_index, bnf, reverseBNF);
- return true;
- } else {
- return false; // The path a->b is not available via bnf
- }
- }
- current_index = node.parent;
- }
-
- // Add node at root
-
- std::vector<TreeNode> parent_nodes = getParentTreeNode(bnf, reverseBNF);
- if (parent_nodes.size() == 0)
- throw std::runtime_error("Couldn't add new parent node.");
-
- for (const auto &i : parent_nodes) {
- AddRootNode(i);
- if (Add(c, bnf, reverseBNF))
- return true;
- RemoveRootNode();
- }
-
- }
- return false;
- }
-
- // add path to start symbol
- void Resolve(const BNF& bnf, const std::map<std::string, std::set<std::string>>& reverseBNF) {
- if (nodes.empty()) // only handle non-empty trees
- return;
-
- while (true) {
- std::string& old_root_name { nodes[root].name }; // current root node name
-
- auto parents {reverseBNF.find(old_root_name)};
- if (parents != reverseBNF.end()) { // parents in bnf available
- bool hit{false};
- for (auto& parent : parents->second) {
- for (const auto& list : bnf.at(parent)) {
- if (list.size() == 1 && list[0] == old_root_name) {
- if (!hit) {
- const std::string& new_root_name {parent};
- // Add new TreeNode in the direction to root:
- // New root with 1 child (old root)
- nodes.emplace(++node_num,
- TreeNode{0, // parent
- std::vector<index_t>{root}, // child indices
- std::vector<std::string>{old_root_name}, // child names
- new_root_name // name
- });
- nodes[root].parent = node_num;
- root = node_num;
- // this->last stays
- hit = true;
- } else
- throw std::runtime_error("Error: Multiple resolve nodes for "s + old_root_name);
- }
- }
- }
- if (!hit)
- break;
- } else
- break;
- }
- }
-
-};
-
-struct Location {
- size_t line;
- size_t column;
-};
-
bool operator==(const Location &a, const Location &b)
{
return (a.line == b.line && a.column == b.column);
}
-struct Token {
- std::string type;
- std::string value;
- Location location;
-};
-
bool operator==(const Token &a, const Token &b)
{
return (a.type == b.type && a.value == b.value && a.location == b.location);
@@ -328,173 +37,3 @@ std::ostream& operator<<(std::ostream& os, const Token& token) {
return os << token.type << ": " << token.value << "(" << token.location.line << ":" << token.location.column << ")";
}
-class Lexer
-{
-
-private:
- const BNF &bnf;
- const std::string& Top;
-
- Location location{1, 0};
-
- std::map<std::string, std::set<std::string>> ReverseBNF;
-
- // to be called on token end
- void FinalizeTree(Tree& tree, std::string& token, std::vector<Token>& result)
- {
- tree.Resolve(bnf, ReverseBNF);
- if (tree.Valid(Top)) {
- result.emplace_back(Token{tree.GetType(), token, Location{location.line, location.column - token.size()}});
- token.clear();
- }
- tree.clear();
- }
-
-public:
- Lexer(const BNF& bnf, const std::string& Top): bnf(bnf), Top(Top), ReverseBNF{Reverse(bnf)}
- {
- }
-
- std::vector<Token> Lex(const std::string& s)
- {
- std::vector<Token> result;
- std::string token;
-
- std::string Whitespace{"\t \n\r"};
- Tree tree;
-
- for (size_t pos{0}; pos < s.size(); pos++) {
- char c{s[pos]};
- if (c == '\n') {
- location.column = 0;
- location.line++;
- } else if (std::isprint(c)) {
- location.column++;
- }
-
- //std::cout << "Char: |" << c << "|" << std::endl;
- if (Whitespace.find(c) != std::string::npos) { // found whitespace character
- // evaluate token up to now and skip whitespace
- FinalizeTree(tree, token, result);
- } else { // no whitespace: try to add to tree
- if (!tree.Add(c, bnf, ReverseBNF)) {
- FinalizeTree(tree, token, result);
- if (!tree.Add(c, bnf, ReverseBNF))
- throw std::runtime_error("Parse error");
- }
-
- token.push_back(c);
- }
- }
-
- // Final evaluation of last token
- FinalizeTree(tree, token, result);
-
- return result;
- }
-
-};
-
-class Compiler
-{
-
-private:
- const BNF &bnf;
- const std::string& Top;
-
- std::map<std::string, std::set<std::string>> ReverseBNF;
-
-public:
- Compiler(const BNF& bnf, const std::string& Top): bnf(bnf), Top(Top), ReverseBNF{Reverse(bnf)}
- {
- }
-
- ProgramNode compile(std::vector<Token> Tokens)
- {
- if (Tokens.size()){
- } else
- throw std::runtime_error("No tokens!");
-
- return {};
- }
-};
-
-class Test: public ::testing::Test {
-protected:
- Test(){}
- ~Test() override {}
-};
-
-TEST_F(Test, BNF) {
- std::string LexTop{"preprocessing-token"};
- BNF LexBNF{
- {"preprocessing-token", {{"identifier"},
- {"preprocessing-op-or-punc"},
- {"pp-number"}}},
-
- {"identifier", {{"identifier-nondigit"},
- {"identifier", "identifier-nondigit"},
- {"identifier", "digit"}}},
- {"digit", {{"0"}, {"1"}, {"2"}, {"3"}, {"4"}, {"5"}, {"6"}, {"7"}, {"8"}, {"9"} }},
- {"identifier-nondigit",
- {{"a"}, {"b"}, {"c"}, {"d"}, {"e"}, {"f"}, {"g"}, {"h"}, {"i"}, {"j"}, {"k"}, {"l"}, {"m"},
- {"n"}, {"o"}, {"p"}, {"q"}, {"r"}, {"s"}, {"t"}, {"u"}, {"v"}, {"w"}, {"x"}, {"y"}, {"z"},
- {"A"}, {"B"}, {"C"}, {"D"}, {"E"}, {"F"}, {"G"}, {"H"}, {"I"}, {"J"}, {"K"}, {"L"}, {"M"},
- {"N"}, {"O"}, {"P"}, {"Q"}, {"R"}, {"S"}, {"T"}, {"U"}, {"V"}, {"W"}, {"X"}, {"Y"}, {"Z"}, {"_"}}},
- {"preprocessing-op-or-punc", {{";"},
- {"="}}},
- {"pp-number", {{"digit"},
- {"pp-number", "digit"}}}
- };
-
- std::string Top{"program"};
- BNF bnf{
- {"program", {{"statement-list"}}},
- {"statement-list", {{"statement", "statement-list"},
- {}, }},
- {"statement", {{"assigmnent", ";"}}},
- {"assignment", {{"identifier", "=", "identifier"}}}
- };
-
- // implicit?
- //std::set<std::string> Terminals{"identifier", "=", ";"};
-
- std::string Code{"a = bc ; c = 123 ; esd = Ff ; 1 = XYZ"};
- std::vector<Token> tokens_reference{
- {"identifier", "a", { 1, 1} },
- {"preprocessing-op-or-punc", "=", { 1, 3}},
- {"identifier", "bc", { 1, 5}},
- {"preprocessing-op-or-punc", ";", { 1, 8}},
- {"identifier", "c", { 1, 10}},
- {"preprocessing-op-or-punc", "=", { 1, 12}},
- {"pp-number", "123", { 1, 14}},
- {"preprocessing-op-or-punc", ";", { 1, 18}},
- {"identifier", "esd", { 1, 20}},
- {"preprocessing-op-or-punc", "=", { 1, 24}},
- {"identifier", "Ff", { 1, 26}},
- {"preprocessing-op-or-punc", ";", { 1, 29}},
- {"pp-number", "1", { 1, 31}},
- {"preprocessing-op-or-punc", "=", { 1, 33}},
- {"identifier", "XYZ", { 1, 34}},
- };
-
- Lexer lexer(LexBNF, LexTop);
- auto tokens = lexer.Lex(Code);
-
- ASSERT_EQ(tokens, tokens_reference);
-#if 0
- for (const auto& i: tokens) {
- std::cout << i.value << std::endl;
- }
-#endif
- Compiler compiler(bnf, Top);
- auto Program = compiler.compile(tokens);
-
- //ASSERT_EQ(Program, Program_reference);
-}
-
-int main(int argc, char* argv[]) {
- ::testing::InitGoogleMock(&argc, argv);
- return RUN_ALL_TESTS();
-}
-
diff --git a/minicc.h b/minicc.h
new file mode 100644
index 0000000..0500b8c
--- /dev/null
+++ b/minicc.h
@@ -0,0 +1,26 @@
+#pragma once
+
+#include <cstdlib>
+#include <vector>
+#include <string>
+#include <iostream>
+
+using index_t = size_t;
+
+std::vector<std::string> split(std::string s);
+
+struct Location {
+ size_t line;
+ size_t column;
+};
+
+bool operator==(const Location &a, const Location &b);
+
+struct Token {
+ std::string type;
+ std::string value;
+ Location location;
+};
+
+bool operator==(const Token &a, const Token &b);
+std::ostream& operator<<(std::ostream& os, const Token& token);
diff --git a/test-lexer.cpp b/test-lexer.cpp
new file mode 100644
index 0000000..39db3ec
--- /dev/null
+++ b/test-lexer.cpp
@@ -0,0 +1,97 @@
+#include "bnf.h"
+#include "lexer.h"
+#include "grammer.h"
+#include "minicc.h"
+
+#include <boost/algorithm/string.hpp>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+#include <algorithm>
+#include <cctype>
+#include <deque>
+#include <map>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+class Test: public ::testing::Test {
+protected:
+ Test(){}
+ ~Test() override {}
+};
+
+TEST_F(Test, BNF) {
+ std::string LexTop{"preprocessing-token"};
+ BNF LexBNF{
+ {"preprocessing-token", {{"identifier"},
+ {"preprocessing-op-or-punc"},
+ {"pp-number"}}},
+
+ {"identifier", {{"identifier-nondigit"},
+ {"identifier", "identifier-nondigit"},
+ {"identifier", "digit"}}},
+ {"digit", {{"0"}, {"1"}, {"2"}, {"3"}, {"4"}, {"5"}, {"6"}, {"7"}, {"8"}, {"9"} }},
+ {"identifier-nondigit",
+ {{"a"}, {"b"}, {"c"}, {"d"}, {"e"}, {"f"}, {"g"}, {"h"}, {"i"}, {"j"}, {"k"}, {"l"}, {"m"},
+ {"n"}, {"o"}, {"p"}, {"q"}, {"r"}, {"s"}, {"t"}, {"u"}, {"v"}, {"w"}, {"x"}, {"y"}, {"z"},
+ {"A"}, {"B"}, {"C"}, {"D"}, {"E"}, {"F"}, {"G"}, {"H"}, {"I"}, {"J"}, {"K"}, {"L"}, {"M"},
+ {"N"}, {"O"}, {"P"}, {"Q"}, {"R"}, {"S"}, {"T"}, {"U"}, {"V"}, {"W"}, {"X"}, {"Y"}, {"Z"}, {"_"}}},
+ {"preprocessing-op-or-punc", {{";"},
+ {"="}}},
+ {"pp-number", {{"digit"},
+ {"pp-number", "digit"}}}
+ };
+
+ std::string Top{"program"};
+ BNF bnf{
+ {"program", {{"statement-list"}}},
+ {"statement-list", {{"statement", "statement-list"},
+ {}, }},
+ {"statement", {{"assigmnent", ";"}}},
+ {"assignment", {{"identifier", "=", "identifier"}}}
+ };
+
+ // implicit?
+ //std::set<std::string> Terminals{"identifier", "=", ";"};
+
+ std::string Code{"a = bc ; c = 123 ; esd = Ff ; 1 = XYZ"};
+ std::vector<Token> tokens_reference{
+ {"identifier", "a", { 1, 1} },
+ {"preprocessing-op-or-punc", "=", { 1, 3}},
+ {"identifier", "bc", { 1, 5}},
+ {"preprocessing-op-or-punc", ";", { 1, 8}},
+ {"identifier", "c", { 1, 10}},
+ {"preprocessing-op-or-punc", "=", { 1, 12}},
+ {"pp-number", "123", { 1, 14}},
+ {"preprocessing-op-or-punc", ";", { 1, 18}},
+ {"identifier", "esd", { 1, 20}},
+ {"preprocessing-op-or-punc", "=", { 1, 24}},
+ {"identifier", "Ff", { 1, 26}},
+ {"preprocessing-op-or-punc", ";", { 1, 29}},
+ {"pp-number", "1", { 1, 31}},
+ {"preprocessing-op-or-punc", "=", { 1, 33}},
+ {"identifier", "XYZ", { 1, 34}},
+ };
+
+ Lexer lexer(LexBNF, LexTop);
+ auto tokens = lexer.Lex(Code);
+
+ ASSERT_EQ(tokens, tokens_reference);
+#if 0
+ for (const auto& i: tokens) {
+ std::cout << i.value << std::endl;
+ }
+#endif
+ Compiler compiler(bnf, Top);
+ auto Program = compiler.compile(tokens);
+
+ //ASSERT_EQ(Program, Program_reference);
+}
+
+int main(int argc, char* argv[]) {
+ ::testing::InitGoogleMock(&argc, argv);
+ return RUN_ALL_TESTS();
+}