summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorRoland Reichwein <mail@reichwein.it>2020-01-18 23:13:56 +0100
committerRoland Reichwein <mail@reichwein.it>2020-01-18 23:13:56 +0100
commitb01052f96c107603a30663c47308cadf7c30d94d (patch)
tree5002e4136bde94b7674898f37d76cd0abb01ffa3
parent766b70f8b85197de38a9fd629641ca9f70cc2340 (diff)
Fix lex bnf
-rw-r--r--minicc.cpp160
1 files changed, 123 insertions, 37 deletions
diff --git a/minicc.cpp b/minicc.cpp
index 94bae72..d147baa 100644
--- a/minicc.cpp
+++ b/minicc.cpp
@@ -33,16 +33,16 @@ std::vector<PathElement> GetPath(std::string Token, BNF ReverseBNF, std::string
}
auto Reverse(BNF bnf){
- std::map<std::string, std::vector<std::string>> result;
+ std::map<std::string, std::set<std::string>> result;
for (const auto& [from, to] : bnf) {
for (const auto& list : to) {
for (const auto& element : list) {
auto i{result.find(element)};
if (i != result.end()) // already present
- i->second.push_back(from);
+ i->second.insert(from);
else // new element
- result.emplace(element, std::vector{1, from});
+ result.emplace(element, std::set{from});
}
}
}
@@ -68,10 +68,11 @@ private:
public:
bool Valid(const std::string& Top) const {
- // Start symbol on top
+ // A token is non empty
if (node_num == 0)
return false;
+ // Start symbol on top
auto rootNode{nodes.find(root)};
if (rootNode == nodes.end())
throw std::runtime_error("Node not found: "s + std::to_string(root));
@@ -88,24 +89,98 @@ public:
return true;
}
- // try to add node
- bool Add(char c, const BNF& bnf, const std::map<std::string, std::vector<std::string>>& reverseBNF) {
+ bool AddFirstNode(char c, const BNF& bnf, const std::map<std::string, std::set<std::string>>& reverseBNF) {
+ node_num ++;
+ root = node_num;
+ last = node_num;
+ std::string node_name(1, char(c));
+
+ auto reverseRule{reverseBNF.find(node_name)};
+ if (reverseRule == reverseBNF.end())
+ throw std::runtime_error("Reverse rule not found: "s + node_name);
+
+ std::vector<std::string> children; // default: no children for terminal
+ auto rule{bnf.find(node_name)};
+ if (rule != bnf.end()) { // multiple variants!
+ bool hit{false};
+ for (const auto& i : rule->second) {
+ if (i.size() > 0 && i[0] == node_name) {
+ if (hit)
+ throw std::runtime_error("Multiple matching rules found for "s + node_name);
+ children = i;
+ hit = true;
+ }
+ }
+ if (!hit)
+ throw std::runtime_error("No matching rule found for "s + node_name);
+ }
+
+ nodes.emplace(root, TreeNode{0, std::vector<index_t>{}, children, node_name});
+ return true;
+ }
+
+ // try to add character to tree
+ bool Add(char c, const BNF& bnf, const std::map<std::string, std::set<std::string>>& reverseBNF) {
if (nodes.empty()) { // first node
- node_num ++;
- root = node_num;
- last = node_num;
- std::string node_name{1, c};
- auto rule{reverseBNF.find(node_name)};
- if (rule == reverseBNF.end())
- throw std::runtime_error("Rule not found: "s + node_name);
- nodes.emplace(root, TreeNode{0, std::vector<index_t>{}, std::vector<std::string>{}, node_name});
- return true;
- } else {
- // TODO
+ return AddFirstNode(c, bnf, reverseBNF);
+ } else { // at least one character is already present
+ // Traverse tree until partially filled node found
+ // or new node can be added
+ index_t current_index{last};
+
+ while (current_index != 0) {
+ TreeNode& node {nodes[current_index]};
+ if (node.childs.size() < node.child_names.size()) { // partially filled node
+ throw std::runtime_error("TODO: partially filled node");
+ }
+ current_index = node.parent;
+ }
+ throw std::runtime_error("TODO: need to add node");
+
}
return false;
}
+ // add path to start symbol
+ void Resolve(const BNF& bnf, const std::map<std::string, std::set<std::string>>& reverseBNF) {
+ if (nodes.empty()) // only handle non-empty trees
+ return;
+
+ while (true) {
+ std::string& old_root_name { nodes[root].name }; // current root node name
+
+ auto parents {reverseBNF.find(old_root_name)};
+ if (parents != reverseBNF.end()) { // parents in bnf available
+ bool hit{false};
+ for (auto& parent : parents->second) {
+ for (const auto& list : bnf.at(parent)) {
+ if (list.size() == 1 && list[0] == old_root_name) {
+ if (!hit) {
+ const std::string& new_root_name {parent};
+ // Add new TreeNode in the direction to root:
+ // New root with 1 child (old root)
+ nodes.emplace(++node_num,
+ TreeNode{0, // parent
+ std::vector<index_t>{root}, // child indices
+ std::vector<std::string>{old_root_name}, // child names
+ new_root_name // name
+ });
+ nodes[root].parent = node_num;
+ root = node_num;
+ // this->last stays
+ hit = true;
+ } else
+ throw std::runtime_error("Error: Multiple resolve nodes for "s + old_root_name);
+ }
+ }
+ }
+ if (!hit)
+ break;
+ } else
+ break;
+ }
+ }
+
};
class Lexer
@@ -115,17 +190,18 @@ private:
const BNF &bnf;
const std::string& Top;
- std::map<std::string, std::vector<std::string>> ReverseBNF;
+ std::map<std::string, std::set<std::string>> ReverseBNF;
// to be called on token end
- void CheckCandidates(std::deque<Tree>& candidates, std::string& token, std::vector<std::string>& result)
+ void FinalizeCandidates(std::deque<Tree>& candidates, std::string& token, std::vector<std::string>& result)
{
if (candidates.empty()) { // skip
if (!token.empty())
throw std::runtime_error("Expected empty token, got "s + token);
} else { // check candidates
bool valid{false};
- for (const auto& ct : candidates) {
+ for (auto& ct : candidates) {
+ ct.Resolve(bnf, ReverseBNF);
if (ct.Valid(Top)) {
if (valid)
throw std::runtime_error("Found ambiguous token "s + token);
@@ -141,6 +217,16 @@ private:
}
}
+ void AddCandidate(char c, std::deque<Tree>& candidates) {
+ // new candidate: starting with c
+ auto element {ReverseBNF.find(std::string(1, c))};
+ if (element != ReverseBNF.end()) {
+ Tree newTree;
+ if (newTree.Add(c, bnf, ReverseBNF))
+ candidates.push_back(newTree);
+ }
+ }
+
public:
Lexer(const BNF& bnf, const std::string& Top): bnf(bnf), Top(Top), ReverseBNF{Reverse(bnf)}
{
@@ -156,9 +242,10 @@ public:
for (size_t pos{0}; pos < s.size(); pos++) {
char c{s[pos]};
+ std::cout << "Char: |" << c << "|" << std::endl;
if (Whitespace.find(c) != std::string::npos) { // found whitespace character
// evaluate token up to now and skip whitespace
- CheckCandidates(candidates, token, result);
+ FinalizeCandidates(candidates, token, result);
} else { // no whitespace: try to add to tree
std::deque<index_t> EraseList;
int i = 0;
@@ -169,26 +256,21 @@ public:
}
}
- if (candidates.size() - EraseList.size() > 0) { // Added to some candidates: Continue growing token
- token.push_back(c);
+ if (token.empty()) {
+ AddCandidate(c, candidates);
+ } else if (candidates.size() - EraseList.size() > 0) { // added to some candidates: Erase invalidated ones
for (const auto& i : EraseList)
candidates.erase(candidates.begin() + i);
} else { // no candidates left: new tree
- CheckCandidates(candidates, token, result);
- }
-
- // new candidate: starting with c
- auto element {ReverseBNF.find(std::string(1, c))};
- if (element != ReverseBNF.end()) {
- Tree newTree;
- if (newTree.Add(c, bnf, ReverseBNF))
- candidates.push_back(newTree);
+ FinalizeCandidates(candidates, token, result);
+ AddCandidate(c, candidates);
}
+ token.push_back(c);
}
}
// Final evaluation of last token
- CheckCandidates(candidates, token, result);
+ FinalizeCandidates(candidates, token, result);
return result;
}
@@ -232,9 +314,12 @@ TEST_F(Test, BNF) {
{"identifier", {{"identifier-nondigit"},
{"identifier", "identifier-nondigit"},
{"identifier", "digit"}}},
- {"digit", {{"0", "1", "2", "3", "4", "5", "6", "7", "8", "9" }}},
- {"identifier-nondigit", {{"a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z",
- "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "_"}}},
+ {"digit", {{"0"}, {"1"}, {"2"}, {"3"}, {"4"}, {"5"}, {"6"}, {"7"}, {"8"}, {"9"} }},
+ {"identifier-nondigit",
+ {{"a"}, {"b"}, {"c"}, {"d"}, {"e"}, {"f"}, {"g"}, {"h"}, {"i"}, {"j"}, {"k"}, {"l"}, {"m"},
+ {"n"}, {"o"}, {"p"}, {"q"}, {"r"}, {"s"}, {"t"}, {"u"}, {"v"}, {"w"}, {"x"}, {"y"}, {"z"},
+ {"A"}, {"B"}, {"C"}, {"D"}, {"E"}, {"F"}, {"G"}, {"H"}, {"I"}, {"J"}, {"K"}, {"L"}, {"M"},
+ {"N"}, {"O"}, {"P"}, {"Q"}, {"R"}, {"S"}, {"T"}, {"U"}, {"V"}, {"W"}, {"X"}, {"Y"}, {"Z"}, {"_"}}},
{"preprocessing-op-or-punc", {{";"},
{"="}}},
{"pp-number", {{"digit"},
@@ -256,10 +341,11 @@ TEST_F(Test, BNF) {
Lexer lexer(LexBNF, LexTop);
auto tokens = lexer.Lex(Code);
-
+#if 1
for (const auto& i: tokens) {
std::cout << i << std::endl;
}
+#endif
//auto Program = Compile(tokens, Top, bnf, Terminals);
}