summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorRoland Reichwein <mail@reichwein.it>2020-01-18 15:36:42 +0100
committerRoland Reichwein <mail@reichwein.it>2020-01-18 15:36:42 +0100
commit766b70f8b85197de38a9fd629641ca9f70cc2340 (patch)
treef62ae2a98487af55f32cceeeda7b261f24955be3
parent39716aa1907e975f6e37adafe42d72d643223a98 (diff)
Lexer (WIP)
-rw-r--r--TODO1
-rw-r--r--minicc.cpp209
2 files changed, 137 insertions, 73 deletions
diff --git a/TODO b/TODO
new file mode 100644
index 0000000..751699a
--- /dev/null
+++ b/TODO
@@ -0,0 +1 @@
+Locations in source: File:line
diff --git a/minicc.cpp b/minicc.cpp
index b609477..94bae72 100644
--- a/minicc.cpp
+++ b/minicc.cpp
@@ -5,6 +5,7 @@
#include <deque>
#include <map>
+#include <memory>
#include <string>
#include <utility>
#include <vector>
@@ -31,115 +32,172 @@ std::vector<PathElement> GetPath(std::string Token, BNF ReverseBNF, std::string
return {}; // TODO
}
-BNF Reverse(BNF bnf){
- return {}; // TODO
+auto Reverse(BNF bnf){
+ std::map<std::string, std::vector<std::string>> result;
+
+ for (const auto& [from, to] : bnf) {
+ for (const auto& list : to) {
+ for (const auto& element : list) {
+ auto i{result.find(element)};
+ if (i != result.end()) // already present
+ i->second.push_back(from);
+ else // new element
+ result.emplace(element, std::vector{1, from});
+ }
+ }
+ }
+
+ return result;
}
using index_t = size_t;
struct TreeNode {
index_t parent;
- std::vector<index_t> childs;
+ std::vector<index_t> childs; // fill char by char
+ std::vector<std::string> child_names; // fill always
std::string name;
};
-struct Tree {
+class Tree {
+private:
std::map<index_t, TreeNode> nodes; // index 0 = non existing; index starting at 1
index_t node_num{};
index_t root{};
index_t last{};
- bool Valid() const {
- // Start symbol?
- // All terminal symbols?
- return true; // TODO
+public:
+ bool Valid(const std::string& Top) const {
+ // Start symbol on top
+ if (node_num == 0)
+ return false;
+
+ auto rootNode{nodes.find(root)};
+ if (rootNode == nodes.end())
+ throw std::runtime_error("Node not found: "s + std::to_string(root));
+
+ if (rootNode->second.name != Top)
+ return false;
+
+ // All nodes filled (implies all leaves are terminal)
+ for (const auto& [index, node]: nodes) {
+ if (node.childs.size() < node.child_names.size())
+ return false; // node not filled
+ }
+
+ return true;
}
- bool Add(char c) {
+ // try to add node
+ bool Add(char c, const BNF& bnf, const std::map<std::string, std::vector<std::string>>& reverseBNF) {
+ if (nodes.empty()) { // first node
+ node_num ++;
+ root = node_num;
+ last = node_num;
+ std::string node_name{1, c};
+ auto rule{reverseBNF.find(node_name)};
+ if (rule == reverseBNF.end())
+ throw std::runtime_error("Rule not found: "s + node_name);
+ nodes.emplace(root, TreeNode{0, std::vector<index_t>{}, std::vector<std::string>{}, node_name});
+ return true;
+ } else {
// TODO
- //node_num ++;
- //nodes.emplace(node_num, {});
- return false;
}
+ return false;
+ }
};
-void CheckCandidates(std::deque<Tree>& candidates, std::string& token, std::vector<std::string>& result)
+class Lexer
{
- bool valid{false};
- for (const auto& ct : candidates) {
- if (ct.Valid()) {
- if (valid)
- throw std::runtime_error("Found ambiguous token "s + token);
- result.push_back(token);
- token.clear();
- valid = true;
+
+private:
+ const BNF &bnf;
+ const std::string& Top;
+
+ std::map<std::string, std::vector<std::string>> ReverseBNF;
+
+ // to be called on token end
+ void CheckCandidates(std::deque<Tree>& candidates, std::string& token, std::vector<std::string>& result)
+ {
+ if (candidates.empty()) { // skip
+ if (!token.empty())
+ throw std::runtime_error("Expected empty token, got "s + token);
+ } else { // check candidates
+ bool valid{false};
+ for (const auto& ct : candidates) {
+ if (ct.Valid(Top)) {
+ if (valid)
+ throw std::runtime_error("Found ambiguous token "s + token);
+ result.push_back(token);
+ token.clear();
+ valid = true;
+ }
+ }
+ if (!valid)
+ throw std::runtime_error("Invalid token: "s + token);
+
+ candidates.clear();
}
}
- if (!valid)
- throw std::runtime_error("Invalid token: "s + token);
- candidates.clear();
-}
-
-std::vector<std::string> Lex(std::string s, std::string Top, BNF bnf)
-{
- std::vector<std::string> result;
- std::string token;
+public:
+ Lexer(const BNF& bnf, const std::string& Top): bnf(bnf), Top(Top), ReverseBNF{Reverse(bnf)}
+ {
+ }
- BNF ReverseBNF{ Reverse(bnf)};
+ std::vector<std::string> Lex(const std::string& s)
+ {
+ std::vector<std::string> result;
+ std::string token;
- std::string Whitespace{"\t \n\r"};
- std::deque<Tree> candidates;
+ std::string Whitespace{"\t \n\r"};
+ std::deque<Tree> candidates;
- for (size_t pos{0}; pos < s.size(); pos++) {
- char c{s[pos]};
- if (Whitespace.find(c) != std::string::npos) { // found whitespace character
- if (candidates.empty()) { // skip
- if (!token.empty())
- throw std::runtime_error("Expected empty token, got "s + token);
- } else { // check candidates
+ for (size_t pos{0}; pos < s.size(); pos++) {
+ char c{s[pos]};
+ if (Whitespace.find(c) != std::string::npos) { // found whitespace character
+ // evaluate token up to now and skip whitespace
CheckCandidates(candidates, token, result);
- }
- } else { // no whitespace: try to add to tree
- std::deque<index_t> EraseList;
- int i = 0;
- for (auto ct = candidates.begin(); ct != candidates.end(); ct++, i++) {
- if (!ct->Add(c)) {
- EraseList.push_front(i); // no candidate anymore
+ } else { // no whitespace: try to add to tree
+ std::deque<index_t> EraseList;
+ int i = 0;
+ for (auto ct = candidates.begin(); ct != candidates.end(); ct++, i++) {
+ if (!ct->Add(c, bnf, ReverseBNF)) { // either add char or delete candidate
+ // push to front to get reversed order
+ EraseList.push_front(i); // no candidate anymore
+ }
+ }
+
+ if (candidates.size() - EraseList.size() > 0) { // Added to some candidates: Continue growing token
+ token.push_back(c);
+ for (const auto& i : EraseList)
+ candidates.erase(candidates.begin() + i);
+ } else { // no candidates left: new tree
+ CheckCandidates(candidates, token, result);
+ }
+
+ // new candidate: starting with c
+ auto element {ReverseBNF.find(std::string(1, c))};
+ if (element != ReverseBNF.end()) {
+ Tree newTree;
+ if (newTree.Add(c, bnf, ReverseBNF))
+ candidates.push_back(newTree);
}
- }
- // new candidate: starting with c
- auto element {ReverseBNF.find(std::string(1, c))};
- if (element != ReverseBNF.end()) {
- candidates.emplace_back();
- candidates.back().Add(c);
- }
-
- if (candidates.size() - EraseList.size() > 0) { // Added to some candidates: Continue growing token
- token.push_back(c);
- for (const auto& i : EraseList)
- candidates.erase(candidates.begin() + i);
- } else { // no candidates left: new tree
- CheckCandidates(candidates, token, result);
}
}
- }
- // Final evaluation
- if (candidates.empty()) { // skip
- if (!token.empty())
- throw std::runtime_error("Expected empty token at end of input, got "s + token);
- } else { // check candidates
+ // Final evaluation of last token
CheckCandidates(candidates, token, result);
+
+ return result;
}
- return result;
-}
+};
ProgramNode Compile(std::vector<std::string> Tokens, std::string Top, BNF bnf, Terminals terminals)
{
- BNF ReverseBNF{ Reverse(bnf)};
+ BNF ReverseBNF;//{ Reverse(bnf)};
if (Tokens.size()){
std::string Token = Tokens[0];
@@ -194,10 +252,15 @@ TEST_F(Test, BNF) {
std::set<std::string> Terminals{"identifier", "=", ";"};
- std::string Code{"a = b ; c = d ; e = f ;"};
+ std::string Code{"a = bc ; c = 123 ; esd = Ff ; 1 = XYZ"};
- auto tokens = Lex(Code, LexTop, LexBNF);
- auto Program = Compile(tokens, Top, bnf, Terminals);
+ Lexer lexer(LexBNF, LexTop);
+ auto tokens = lexer.Lex(Code);
+
+ for (const auto& i: tokens) {
+ std::cout << i << std::endl;
+ }
+ //auto Program = Compile(tokens, Top, bnf, Terminals);
}
int main(int argc, char* argv[]) {