From c6fe8cccc49a70af8b8367fcfe19ff8a05f16d7a Mon Sep 17 00:00:00 2001 From: Roland Reichwein Date: Sun, 19 Jan 2020 19:58:33 +0100 Subject: Added Locations --- minicc.cpp | 93 +++++++++++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 71 insertions(+), 22 deletions(-) (limited to 'minicc.cpp') diff --git a/minicc.cpp b/minicc.cpp index 37fc974..f548181 100644 --- a/minicc.cpp +++ b/minicc.cpp @@ -4,6 +4,7 @@ #include "gtest/gtest.h" #include +#include #include #include #include @@ -69,6 +70,13 @@ public: node_num = 0; } + // Type of lexical token + std::string GetType() { + if (node_num > 0 && nodes[root].child_names.size() == 1) + return nodes[root].child_names[0]; + return ""; + } + bool Valid(const std::string& Top) const { // A token is non empty if (node_num == 0) @@ -295,6 +303,31 @@ public: }; +struct Location { + size_t line; + size_t column; +}; + +bool operator==(const Location &a, const Location &b) +{ + return (a.line == b.line && a.column == b.column); +} + +struct Token { + std::string type; + std::string value; + Location location; +}; + +bool operator==(const Token &a, const Token &b) +{ + return (a.type == b.type && a.value == b.value && a.location == b.location); +} + +std::ostream& operator<<(std::ostream& os, const Token& token) { + return os << token.type << ": " << token.value << "(" << token.location.line << ":" << token.location.column << ")"; +} + class Lexer { @@ -302,14 +335,16 @@ private: const BNF &bnf; const std::string& Top; + Location location{1, 0}; + std::map> ReverseBNF; // to be called on token end - void FinalizeTree(Tree& tree, std::string& token, std::vector& result) + void FinalizeTree(Tree& tree, std::string& token, std::vector& result) { tree.Resolve(bnf, ReverseBNF); if (tree.Valid(Top)) { - result.push_back(token); + result.emplace_back(Token{tree.GetType(), token, Location{location.line, location.column - token.size()}}); token.clear(); } tree.clear(); @@ -320,9 +355,9 @@ public: { } - std::vector Lex(const std::string& s) + std::vector Lex(const std::string& s) { - std::vector result; + std::vector result; std::string token; std::string Whitespace{"\t \n\r"}; @@ -330,7 +365,14 @@ public: for (size_t pos{0}; pos < s.size(); pos++) { char c{s[pos]}; - std::cout << "Char: |" << c << "|" << std::endl; + if (c == '\n') { + location.column = 0; + location.line++; + } else if (std::isprint(c)) { + location.column++; + } + + //std::cout << "Char: |" << c << "|" << std::endl; if (Whitespace.find(c) != std::string::npos) { // found whitespace character // evaluate token up to now and skip whitespace FinalizeTree(tree, token, result); @@ -353,23 +395,11 @@ public: }; -ProgramNode Compile(std::vector Tokens, std::string Top, BNF bnf, Terminals terminals) +ProgramNode Compile(std::vector Tokens, std::string Top, BNF bnf, Terminals terminals) { - BNF ReverseBNF;//{ Reverse(bnf)}; + std::map> ReverseBNF{ Reverse(bnf)}; if (Tokens.size()){ - std::string Token = Tokens[0]; -#if 0 - auto Path = GetPath(Token, ReverseBNF, Top, terminals); - if (Path.size()) { - size_t Index{1}; - while (Index < Tokens.size()) { - Path = GetPath(Token, ReverseBNF, Top, terminals, Path); - Index++; - } - } else - throw std::runtime_error("Invalid token: "s + Token); -#endif } else throw std::runtime_error("No tokens!"); @@ -416,15 +446,34 @@ TEST_F(Test, BNF) { std::set Terminals{"identifier", "=", ";"}; std::string Code{"a = bc ; c = 123 ; esd = Ff ; 1 = XYZ"}; + std::vector tokens_reference{ + {"identifier", "a", { 1, 1} }, + {"preprocessing-op-or-punc", "=", { 1, 3}}, + {"identifier", "bc", { 1, 5}}, + {"preprocessing-op-or-punc", ";", { 1, 8}}, + {"identifier", "c", { 1, 10}}, + {"preprocessing-op-or-punc", "=", { 1, 12}}, + {"pp-number", "123", { 1, 14}}, + {"preprocessing-op-or-punc", ";", { 1, 18}}, + {"identifier", "esd", { 1, 20}}, + {"preprocessing-op-or-punc", "=", { 1, 24}}, + {"identifier", "Ff", { 1, 26}}, + {"preprocessing-op-or-punc", ";", { 1, 29}}, + {"pp-number", "1", { 1, 31}}, + {"preprocessing-op-or-punc", "=", { 1, 33}}, + {"identifier", "XYZ", { 1, 34}}, + }; Lexer lexer(LexBNF, LexTop); auto tokens = lexer.Lex(Code); -#if 1 + + ASSERT_EQ(tokens, tokens_reference); +#if 0 for (const auto& i: tokens) { - std::cout << i << std::endl; + std::cout << i.value << std::endl; } #endif - //auto Program = Compile(tokens, Top, bnf, Terminals); + auto Program = Compile(tokens, Top, bnf, Terminals); } int main(int argc, char* argv[]) { -- cgit v1.2.3