summaryrefslogtreecommitdiffhomepage
path: root/minicc.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'minicc.cpp')
-rw-r--r--minicc.cpp93
1 files changed, 71 insertions, 22 deletions
diff --git a/minicc.cpp b/minicc.cpp
index 37fc974..f548181 100644
--- a/minicc.cpp
+++ b/minicc.cpp
@@ -4,6 +4,7 @@
#include "gtest/gtest.h"
#include <algorithm>
+#include <cctype>
#include <deque>
#include <map>
#include <memory>
@@ -69,6 +70,13 @@ public:
node_num = 0;
}
+ // Type of lexical token
+ std::string GetType() {
+ if (node_num > 0 && nodes[root].child_names.size() == 1)
+ return nodes[root].child_names[0];
+ return "";
+ }
+
bool Valid(const std::string& Top) const {
// A token is non empty
if (node_num == 0)
@@ -295,6 +303,31 @@ public:
};
+struct Location {
+ size_t line;
+ size_t column;
+};
+
+bool operator==(const Location &a, const Location &b)
+{
+ return (a.line == b.line && a.column == b.column);
+}
+
+struct Token {
+ std::string type;
+ std::string value;
+ Location location;
+};
+
+bool operator==(const Token &a, const Token &b)
+{
+ return (a.type == b.type && a.value == b.value && a.location == b.location);
+}
+
+std::ostream& operator<<(std::ostream& os, const Token& token) {
+ return os << token.type << ": " << token.value << "(" << token.location.line << ":" << token.location.column << ")";
+}
+
class Lexer
{
@@ -302,14 +335,16 @@ private:
const BNF &bnf;
const std::string& Top;
+ Location location{1, 0};
+
std::map<std::string, std::set<std::string>> ReverseBNF;
// to be called on token end
- void FinalizeTree(Tree& tree, std::string& token, std::vector<std::string>& result)
+ void FinalizeTree(Tree& tree, std::string& token, std::vector<Token>& result)
{
tree.Resolve(bnf, ReverseBNF);
if (tree.Valid(Top)) {
- result.push_back(token);
+ result.emplace_back(Token{tree.GetType(), token, Location{location.line, location.column - token.size()}});
token.clear();
}
tree.clear();
@@ -320,9 +355,9 @@ public:
{
}
- std::vector<std::string> Lex(const std::string& s)
+ std::vector<Token> Lex(const std::string& s)
{
- std::vector<std::string> result;
+ std::vector<Token> result;
std::string token;
std::string Whitespace{"\t \n\r"};
@@ -330,7 +365,14 @@ public:
for (size_t pos{0}; pos < s.size(); pos++) {
char c{s[pos]};
- std::cout << "Char: |" << c << "|" << std::endl;
+ if (c == '\n') {
+ location.column = 0;
+ location.line++;
+ } else if (std::isprint(c)) {
+ location.column++;
+ }
+
+ //std::cout << "Char: |" << c << "|" << std::endl;
if (Whitespace.find(c) != std::string::npos) { // found whitespace character
// evaluate token up to now and skip whitespace
FinalizeTree(tree, token, result);
@@ -353,23 +395,11 @@ public:
};
-ProgramNode Compile(std::vector<std::string> Tokens, std::string Top, BNF bnf, Terminals terminals)
+ProgramNode Compile(std::vector<Token> Tokens, std::string Top, BNF bnf, Terminals terminals)
{
- BNF ReverseBNF;//{ Reverse(bnf)};
+ std::map<std::string, std::set<std::string>> ReverseBNF{ Reverse(bnf)};
if (Tokens.size()){
- std::string Token = Tokens[0];
-#if 0
- auto Path = GetPath(Token, ReverseBNF, Top, terminals);
- if (Path.size()) {
- size_t Index{1};
- while (Index < Tokens.size()) {
- Path = GetPath(Token, ReverseBNF, Top, terminals, Path);
- Index++;
- }
- } else
- throw std::runtime_error("Invalid token: "s + Token);
-#endif
} else
throw std::runtime_error("No tokens!");
@@ -416,15 +446,34 @@ TEST_F(Test, BNF) {
std::set<std::string> Terminals{"identifier", "=", ";"};
std::string Code{"a = bc ; c = 123 ; esd = Ff ; 1 = XYZ"};
+ std::vector<Token> tokens_reference{
+ {"identifier", "a", { 1, 1} },
+ {"preprocessing-op-or-punc", "=", { 1, 3}},
+ {"identifier", "bc", { 1, 5}},
+ {"preprocessing-op-or-punc", ";", { 1, 8}},
+ {"identifier", "c", { 1, 10}},
+ {"preprocessing-op-or-punc", "=", { 1, 12}},
+ {"pp-number", "123", { 1, 14}},
+ {"preprocessing-op-or-punc", ";", { 1, 18}},
+ {"identifier", "esd", { 1, 20}},
+ {"preprocessing-op-or-punc", "=", { 1, 24}},
+ {"identifier", "Ff", { 1, 26}},
+ {"preprocessing-op-or-punc", ";", { 1, 29}},
+ {"pp-number", "1", { 1, 31}},
+ {"preprocessing-op-or-punc", "=", { 1, 33}},
+ {"identifier", "XYZ", { 1, 34}},
+ };
Lexer lexer(LexBNF, LexTop);
auto tokens = lexer.Lex(Code);
-#if 1
+
+ ASSERT_EQ(tokens, tokens_reference);
+#if 0
for (const auto& i: tokens) {
- std::cout << i << std::endl;
+ std::cout << i.value << std::endl;
}
#endif
- //auto Program = Compile(tokens, Top, bnf, Terminals);
+ auto Program = Compile(tokens, Top, bnf, Terminals);
}
int main(int argc, char* argv[]) {