From c413fee0cde65e379f82afffd8d701f663aeb0be Mon Sep 17 00:00:00 2001 From: Roland Reichwein Date: Wed, 15 Jan 2020 22:44:41 +0100 Subject: Lex (WIP) --- minicc.cpp | 51 ++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 46 insertions(+), 5 deletions(-) (limited to 'minicc.cpp') diff --git a/minicc.cpp b/minicc.cpp index 07090a8..f0f4e4c 100644 --- a/minicc.cpp +++ b/minicc.cpp @@ -35,6 +35,20 @@ BNF Reverse(BNF bnf){ return {}; // TODO } +using index_t = size_t; + +struct TreeNode { + index_t parent; + std::vector childs; + std::string name; +}; + +using Tree = std::map; + +bool ValidTree(const Tree& tree) { + return true; // TODO +} + std::vector Lex(std::string s, std::string Top, BNF bnf) { std::vector result; @@ -43,13 +57,39 @@ std::vector Lex(std::string s, std::string Top, BNF bnf) BNF ReverseBNF{ Reverse(bnf)}; std::string Whitespace{"\t \n\r"}; + std::deque candidates; for (size_t pos{0}; pos < s.size(); pos++) { char c{s[pos]}; - if (token.empty() and Whitespace.find(c) != std::string::npos) - continue; // skip whitespace between tokens - - auto Path = GetPath(std::string{1, c}, ReverseBNF, Top); + if (Whitespace.find(c) != std::string::npos) { + if (candidates.empty()) { // skip + if (!token.empty()) + throw std::runtime_error("Expected empty token, got "s + token); + } else { // check candidates + bool valid{false}; + for (const auto& ct : candidates) { + if (ValidTree(ct)) { + result.push_back(token); + token.clear(); + valid = true; + } + } + if (!valid) + throw std::runtime_error("Invalid token: "s + token); + + candidates.clear(); + } + } else { // no whitespace: try to add to tree + for (const auto& ct : candidates) { + if (!TryAdd(cti, c)) { + candidates.erase(i); // no candidate anymore + } + } + if (candidates.empty()) { // no candidates anymore + } else { + } + //token.push_back(c); + } } return result; @@ -95,7 +135,8 @@ TEST_F(Test, BNF) { {"digit", {{"0", "1", "2", "3", "4", "5", "6", "7", "8", "9" }}}, {"identifier-nondigit", {{"a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "_"}}}, - {"preprocessing-op-or-punc", {{";"}}}, + {"preprocessing-op-or-punc", {{";"}, + {"="}}}, {"pp-number", {{"digit"}, {"pp-number", "digit"}}} }; -- cgit v1.2.3