diff options
author | Roland Reichwein <mail@reichwein.it> | 2020-01-19 19:58:33 +0100 |
---|---|---|
committer | Roland Reichwein <mail@reichwein.it> | 2020-01-19 19:58:33 +0100 |
commit | c6fe8cccc49a70af8b8367fcfe19ff8a05f16d7a (patch) | |
tree | 5e6c95f3e679168a0899dd86426d2f7c87dbad3c /minicc.cpp | |
parent | 4e6fe47af1964c3efe50d0e244331d0dd4df51fc (diff) |
Added Locations
Diffstat (limited to 'minicc.cpp')
-rw-r--r-- | minicc.cpp | 93 |
1 files changed, 71 insertions, 22 deletions
@@ -4,6 +4,7 @@ #include "gtest/gtest.h" #include <algorithm> +#include <cctype> #include <deque> #include <map> #include <memory> @@ -69,6 +70,13 @@ public: node_num = 0; } + // Type of lexical token + std::string GetType() { + if (node_num > 0 && nodes[root].child_names.size() == 1) + return nodes[root].child_names[0]; + return ""; + } + bool Valid(const std::string& Top) const { // A token is non empty if (node_num == 0) @@ -295,6 +303,31 @@ public: }; +struct Location { + size_t line; + size_t column; +}; + +bool operator==(const Location &a, const Location &b) +{ + return (a.line == b.line && a.column == b.column); +} + +struct Token { + std::string type; + std::string value; + Location location; +}; + +bool operator==(const Token &a, const Token &b) +{ + return (a.type == b.type && a.value == b.value && a.location == b.location); +} + +std::ostream& operator<<(std::ostream& os, const Token& token) { + return os << token.type << ": " << token.value << "(" << token.location.line << ":" << token.location.column << ")"; +} + class Lexer { @@ -302,14 +335,16 @@ private: const BNF &bnf; const std::string& Top; + Location location{1, 0}; + std::map<std::string, std::set<std::string>> ReverseBNF; // to be called on token end - void FinalizeTree(Tree& tree, std::string& token, std::vector<std::string>& result) + void FinalizeTree(Tree& tree, std::string& token, std::vector<Token>& result) { tree.Resolve(bnf, ReverseBNF); if (tree.Valid(Top)) { - result.push_back(token); + result.emplace_back(Token{tree.GetType(), token, Location{location.line, location.column - token.size()}}); token.clear(); } tree.clear(); @@ -320,9 +355,9 @@ public: { } - std::vector<std::string> Lex(const std::string& s) + std::vector<Token> Lex(const std::string& s) { - std::vector<std::string> result; + std::vector<Token> result; std::string token; std::string Whitespace{"\t \n\r"}; @@ -330,7 +365,14 @@ public: for (size_t pos{0}; pos < s.size(); pos++) { char c{s[pos]}; - std::cout << "Char: |" << c << "|" << std::endl; + if (c == '\n') { + location.column = 0; + location.line++; + } else if (std::isprint(c)) { + location.column++; + } + + //std::cout << "Char: |" << c << "|" << std::endl; if (Whitespace.find(c) != std::string::npos) { // found whitespace character // evaluate token up to now and skip whitespace FinalizeTree(tree, token, result); @@ -353,23 +395,11 @@ public: }; -ProgramNode Compile(std::vector<std::string> Tokens, std::string Top, BNF bnf, Terminals terminals) +ProgramNode Compile(std::vector<Token> Tokens, std::string Top, BNF bnf, Terminals terminals) { - BNF ReverseBNF;//{ Reverse(bnf)}; + std::map<std::string, std::set<std::string>> ReverseBNF{ Reverse(bnf)}; if (Tokens.size()){ - std::string Token = Tokens[0]; -#if 0 - auto Path = GetPath(Token, ReverseBNF, Top, terminals); - if (Path.size()) { - size_t Index{1}; - while (Index < Tokens.size()) { - Path = GetPath(Token, ReverseBNF, Top, terminals, Path); - Index++; - } - } else - throw std::runtime_error("Invalid token: "s + Token); -#endif } else throw std::runtime_error("No tokens!"); @@ -416,15 +446,34 @@ TEST_F(Test, BNF) { std::set<std::string> Terminals{"identifier", "=", ";"}; std::string Code{"a = bc ; c = 123 ; esd = Ff ; 1 = XYZ"}; + std::vector<Token> tokens_reference{ + {"identifier", "a", { 1, 1} }, + {"preprocessing-op-or-punc", "=", { 1, 3}}, + {"identifier", "bc", { 1, 5}}, + {"preprocessing-op-or-punc", ";", { 1, 8}}, + {"identifier", "c", { 1, 10}}, + {"preprocessing-op-or-punc", "=", { 1, 12}}, + {"pp-number", "123", { 1, 14}}, + {"preprocessing-op-or-punc", ";", { 1, 18}}, + {"identifier", "esd", { 1, 20}}, + {"preprocessing-op-or-punc", "=", { 1, 24}}, + {"identifier", "Ff", { 1, 26}}, + {"preprocessing-op-or-punc", ";", { 1, 29}}, + {"pp-number", "1", { 1, 31}}, + {"preprocessing-op-or-punc", "=", { 1, 33}}, + {"identifier", "XYZ", { 1, 34}}, + }; Lexer lexer(LexBNF, LexTop); auto tokens = lexer.Lex(Code); -#if 1 + + ASSERT_EQ(tokens, tokens_reference); +#if 0 for (const auto& i: tokens) { - std::cout << i << std::endl; + std::cout << i.value << std::endl; } #endif - //auto Program = Compile(tokens, Top, bnf, Terminals); + auto Program = Compile(tokens, Top, bnf, Terminals); } int main(int argc, char* argv[]) { |