summaryrefslogtreecommitdiffhomepage
path: root/lexer.cpp
diff options
context:
space:
mode:
authorRoland Reichwein <mail@reichwein.it>2020-03-14 15:24:23 +0100
committerRoland Reichwein <mail@reichwein.it>2020-03-14 15:24:23 +0100
commit15a56fcb5f29b8507298144a835a819de652e788 (patch)
tree1527d3d5996389f00f9f0afa2a4fa1122a794830 /lexer.cpp
parent6fcfe0a5cf8f53658e50e346076768eec229e695 (diff)
Prepare DFA
Diffstat (limited to 'lexer.cpp')
-rw-r--r--lexer.cpp283
1 files changed, 1 insertions, 282 deletions
diff --git a/lexer.cpp b/lexer.cpp
index 63330c1..4a458c3 100644
--- a/lexer.cpp
+++ b/lexer.cpp
@@ -2,294 +2,13 @@
using namespace Lex;
-void Tree::clear() {
- nodes.clear();
- root = 0;
- last = 0;
- node_num = 0;
-}
-
-// Type of lexical token
-std::string Tree::GetType() {
- if (node_num > 0 && nodes[root].child_names.size() == 1)
- return nodes[root].child_names[0];
- return "";
-}
-
-bool Tree::Valid(const std::string& Top) const {
- // A token is non empty
- if (node_num == 0)
- return false;
-
- // Start symbol on top
- auto rootNode{nodes.find(root)};
- if (rootNode == nodes.end())
- throw std::runtime_error("Node not found: "s + std::to_string(root));
-
- if (rootNode->second.name != Top)
- return false;
-
- // All nodes filled (implies all leaves are terminal)
- for (const auto& [index, node]: nodes) {
- if (node.childs.size() < node.child_names.size())
- return false; // node not filled
- }
-
- return true;
-}
-
-bool Tree::AddFirstNode(char c, const BNF& bnf, const std::map<std::string, std::set<std::string>>& reverseBNF) {
- node_num ++;
- root = node_num;
- last = node_num;
- std::string node_name(1, char(c));
-
- auto reverseRule{reverseBNF.find(node_name)};
- if (reverseRule == reverseBNF.end())
- throw std::runtime_error("Reverse rule not found for "s + node_name);
-
- auto rule{bnf.find(node_name)};
- if (rule != bnf.end()) { // multiple variants!
- throw std::runtime_error("BNF rule for terminal symbol "s + node_name + " found."s);
- }
- nodes.emplace(root, TreeNode{0, std::vector<index_t>{}, std::vector<std::string>{}, node_name});
- return true;
-}
-
-std::vector<TreeNode> Tree::getParentTreeNode(const BNF& bnf, const std::map<std::string, std::set<std::string>>& reverseBNF) {
- std::vector<TreeNode> result; // default: empty
-
- auto& root_name {nodes[root].name};
- auto bnfParents {reverseBNF.find(root_name)};
- if (bnfParents == reverseBNF.end())
- return result;
-
- for (const auto& parent_node_name : bnfParents->second) {
- auto lists {bnf.at(parent_node_name)};
- for (const auto& list : lists) {
- if (list.size() > 0 && list[0] == root_name) {
- TreeNode node{0, std::vector<index_t>{root}, list, parent_node_name};
- result.push_back(node);
- }
- }
- }
-
- return result;
-}
-
-index_t Tree::GetLast() {
- index_t result {root};
-
- while(result != 0 && nodes[result].childs.size() >= 2) {
- result = nodes[result].childs[nodes[result].childs.size() - 1];
- }
-
- return result;
-}
-
-void Tree::AddRootNode(const TreeNode& newRootNode) {
- node_num++;
- nodes[node_num] = newRootNode;
- root = node_num;
- last = node_num;
-}
-
-void Tree::RemoveRootNode() {
- root = nodes[root].childs[0];
- nodes.erase(node_num);
- node_num--;
- last = GetLast();
-}
-
-// Path from leaf to root
-std::vector<std::string> Tree::GetPath(std::string a, std::string b, const BNF& bnf, const std::map<std::string, std::set<std::string>>& reverseBNF) {
- std::vector<std::string> result;
-
- while (a != b) {
- auto parents {reverseBNF.find(a)};
- if (parents == reverseBNF.end())
- return {};
-
- bool hit{false};
- for (const auto& parent : parents->second) {
- for (const auto& list : bnf.at(parent)) {
- if (list.size() > 0 && list[0] == a) {
- if (!hit) {
- result.push_back(a);
- a = parent;
- hit = true;
- } else
- throw std::runtime_error("Double match for "s + parent + "/"s + a);
- }
- }
- }
- }
- if (a == b) {
- result.push_back(a);
- }
- return result;
-}
-
-index_t Tree::AddNode(const std::string& name, const std::string& child_name, index_t parent_index, const BNF& bnf, const std::map<std::string, std::set<std::string>>& reverseBNF)
-{
- TreeNode& parent {nodes[parent_index]};
- node_num++;
- index_t index = node_num;
- parent.childs.push_back(index);
- std::vector<std::string> child_names;
- auto rule {bnf.find(name)};
- if (rule != bnf.end()) {
- for (auto& list : rule->second) {
- if (list.size() > 0 && list[0] == child_name)
- child_names = list;
- }
- }
- nodes.emplace(index, TreeNode{parent_index, {}, child_names, name});
- //root stays
- last = GetLast();
-
- return index;
-}
-
-void Tree::AddPath(const std::vector<std::string>& path, index_t current_index, const BNF& bnf, const std::map<std::string, std::set<std::string>>& reverseBNF) {
- for (int i = path.size() - 1; i >= 0; i--) {
- std::string child_name;
- if (i > 0)
- child_name = path[i - 1];
- current_index = AddNode(path[i], child_name, current_index, bnf, reverseBNF);
- }
-}
-
-// try to add character to tree
-bool Tree::Add(char c, const BNF& bnf, const std::map<std::string, std::set<std::string>>& reverseBNF) {
- if (nodes.empty()) { // first node
- return AddFirstNode(c, bnf, reverseBNF);
- } else { // at least one character is already present
- // Traverse tree until partially filled node found
- // or new node can be added
- index_t current_index{last};
-
- while (current_index != 0) {
- TreeNode& node {nodes[current_index]};
- if (node.childs.size() < node.child_names.size()) { // partially filled node
- std::vector<std::string> list = GetPath(std::string(1, c), node.child_names[node.childs.size()], bnf, reverseBNF);
- if (list.size() > 0) {
- AddPath(list, current_index, bnf, reverseBNF);
- return true;
- } else {
- return false; // The path a->b is not available via bnf
- }
- }
- current_index = node.parent;
- }
-
- // Add node at root
-
- std::vector<TreeNode> parent_nodes = getParentTreeNode(bnf, reverseBNF);
- if (parent_nodes.size() == 0)
- throw std::runtime_error("Couldn't add new parent node.");
-
- for (const auto &i : parent_nodes) {
- AddRootNode(i);
- if (Add(c, bnf, reverseBNF))
- return true;
- RemoveRootNode();
- }
-
- }
- return false;
-}
-
-// add path to start symbol
-void Tree::Resolve(const BNF& bnf, const std::map<std::string, std::set<std::string>>& reverseBNF) {
- if (nodes.empty()) // only handle non-empty trees
- return;
-
- while (true) {
- std::string& old_root_name { nodes[root].name }; // current root node name
-
- auto parents {reverseBNF.find(old_root_name)};
- if (parents != reverseBNF.end()) { // parents in bnf available
- bool hit{false};
- for (auto& parent : parents->second) {
- for (const auto& list : bnf.at(parent)) {
- if (list.size() == 1 && list[0] == old_root_name) {
- if (!hit) {
- const std::string& new_root_name {parent};
- // Add new TreeNode in the direction to root:
- // New root with 1 child (old root)
- nodes.emplace(++node_num,
- TreeNode{0, // parent
- std::vector<index_t>{root}, // child indices
- std::vector<std::string>{old_root_name}, // child names
- new_root_name // name
- });
- nodes[root].parent = node_num;
- root = node_num;
- // this->last stays
- hit = true;
- } else
- throw std::runtime_error("Error: Multiple resolve nodes for "s + old_root_name);
- }
- }
- }
- if (!hit)
- break;
- } else
- break;
- }
-}
-
-void Lexer::FinalizeTree(Tree& tree, std::string& token, std::vector<Token>& result)
-{
- tree.Resolve(bnf, ReverseBNF);
- if (tree.Valid(Top)) {
- result.emplace_back(Token{tree.GetType(), token, Location{location.line, location.column - token.size()}});
- token.clear();
- }
- tree.clear();
-}
-
-Lexer::Lexer(const BNF& bnf, const std::string& Top): bnf(bnf), Top(Top), ReverseBNF{Reverse(bnf)}
+Lexer::Lexer(const BNF& bnf, const std::string& Top)
{
}
std::vector<Token> Lexer::Lex(const std::string& s)
{
- location = {1, 0};
-
std::vector<Token> result;
- std::string token;
-
- std::string Whitespace{"\t \n\r"};
- Tree tree;
-
- for (size_t pos{0}; pos < s.size(); pos++) {
- char c{s[pos]};
- if (c == '\n') {
- location.column = 0;
- location.line++;
- } else if (std::isprint(c)) {
- location.column++;
- }
-
- //std::cout << "Char: |" << c << "|" << std::endl;
- if (Whitespace.find(c) != std::string::npos) { // found whitespace character
- // evaluate token up to now and skip whitespace
- FinalizeTree(tree, token, result);
- } else { // no whitespace: try to add to tree
- if (!tree.Add(c, bnf, ReverseBNF)) {
- FinalizeTree(tree, token, result);
- if (!tree.Add(c, bnf, ReverseBNF))
- throw std::runtime_error("Parse error");
- }
-
- token.push_back(c);
- }
- }
-
- // Final evaluation of last token
- FinalizeTree(tree, token, result);
return result;
}