diff options
author | Roland Reichwein <mail@reichwein.it> | 2020-03-21 15:38:05 +0100 |
---|---|---|
committer | Roland Reichwein <mail@reichwein.it> | 2020-03-21 15:38:05 +0100 |
commit | 3cc139bce0283018473d4906ee2ea5f40f771255 (patch) | |
tree | 658956ab52b8892d419a2ba905f4c41f21f6d926 | |
parent | 74350b52fee9f576a1cc71d99cfd4ebdf5a73e0f (diff) |
Add lexer to CPP
-rw-r--r-- | cpp.cpp | 120 | ||||
-rw-r--r-- | cpp.h | 4 |
2 files changed, 39 insertions, 85 deletions
@@ -2,9 +2,10 @@ #include "bnf.h" #include "cppbnf.h" +#include "debug.h" +#include "lexer.h" #include "grammer.h" #include "minicc.h" -#include "debug.h" #include <gtest/gtest.h> #include <gmock/gmock.h> @@ -29,66 +30,14 @@ void CPP::backslash_escape() // TODO } -namespace { - -std::vector<Token> sourceToCharTokens(const std::string& code) -{ - std::vector<Token> result; - - Location location{1, 1}; - - for (char c: code) { - if (c == '\n') { - location.column = 1; - location.line++; - } else if (std::isprint(c)) { - location.column++; - } - - result.emplace_back(Token{std::string(1, c), std::string(1, c), location}); - } - return result; -} - -} - // Phase 3: Parse preprocessing tokens -std::pair<index_t, std::vector<TreeNode>> CPP::preprocessing_tokenize(const std::string& s) +std::vector<Token> CPP::preprocessing_tokenize(const std::string& s) { - m_code = s; - - m_charTokens = sourceToCharTokens(s); - auto bnf{SubBNF(GetCppBNFLex(), "preprocessing-token")}; + + Lex::Lexer lexer(bnf, "preprocessing-token"); - // add to bnf to match whole file - bnf["file"] = { - {"preprocessing-token-list"}, - {"whitespace-list", "preprocessing-token-list"} - }; - bnf["preprocessing-token-list"] = { - {"preprocessing-token-padded"}, - {"preprocessing-token-list", "preprocessing-token-padded"} - }; - bnf["preprocessing-token-padded"] = { - {"preprocessing-token"}, - {"preprocessing-token", "whitespace-list"} - }; - bnf["whitespace-list"] = { - {"whitespace-char"}, - {"whitespace-list", "whitespace-char" } - }; - bnf["whitespace-char"] = { - {" "}, {"\t"}, {"\n"}, {"\r"} - }; - Gram::Compiler compiler(bnf, "file"); - debug = true; - std::pair<index_t, std::vector<TreeNode>> Tree = compiler.compile(m_charTokens); - - debug = true; - compiler.DumpTree(); - - return Tree; + return lexer.Lex(s); } // Phase 4: Preprocessing @@ -137,7 +86,7 @@ std::string CPP::valueOfNode(index_t node_index, const std::vector<TreeNode>& Tr }; namespace { - std::unordered_set<std::string> pp_types{ + std::unordered_set<std::string> pp_types { "identifier", "pp-number", "character-literal", @@ -146,10 +95,16 @@ namespace { "user-defined-string-literal", "preprocessing-op-or-punc" }; + + std::unordered_set<std::string> keywords { + "alignas", + "alignof", + // ... Keywords table, p.15 + }; } // Phase 7.a: Create tokens from preprocessing tokens -std::vector<Token> CPP::tokens_from_pptokens(std::pair<index_t, std::vector<TreeNode>> Tree) +std::vector<Token> CPP::tokens_from_pptokens(std::vector<Token> pp_tokens) { std::vector<Token> result; @@ -161,28 +116,23 @@ std::vector<Token> CPP::tokens_from_pptokens(std::pair<index_t, std::vector<Tree // "user-defined-string-literal" -> "literal" + value // "preprocessing-op-or-punc" -> value+value (operator,punctuator) - // TODO: traverse Tree, creating Token list - std::vector<index_t> todo(1, index_t(Tree.first)); - - while (!todo.empty()) { - index_t current_index = todo.back(); - todo.pop_back(); - - TreeNode &node{Tree.second[current_index]}; - - // visit node - if (pp_types.find(node.type) != pp_types.end()) { // TODO - std::cout << node.type << ": " << valueOfNode(current_index, Tree.second) << std::endl; - } else { // only traverse further if not handled - - // iterate backwards in childs, to get depth-first search in tree, from the beginning - std::for_each(node.child_ids.rbegin(), node.child_ids.rend(), [&](int32_t child){ - if (!ChildIdIsToken(child)) - todo.push_back(child); - }); - } + for (auto& token: pp_tokens) { + if (pp_types.find(token.type) != pp_types.end()) { + if (token.type == "identifier") { +#if 0 + if (keywords.find(token.value) != keywords.end()) + result.emplace_back("keyword", token.value); + else +#endif + result.emplace_back(Token{"identifier"s, token.value}); + } + else if (token.type == "preprocessing-op-or-punc") + result.emplace_back(Token{token.value, token.value}); + else + result.emplace_back(Token{"literal", token.value}); + } else + throw std::runtime_error("Unhandled preprocessing token: "s + token.value + " ("s + token.type + ")"s); } - return result; } @@ -238,12 +188,16 @@ protected: } }; -#if 0 +#if 1 TEST_F(CppTest, preprocessing_tokenize) { CPP cpp; - auto ppTree = cpp.preprocessing_tokenize("int main() { return 1; }"); + auto pp_tokens = cpp.preprocessing_tokenize("int main() { return 1; }"); - cpp.tokens_from_pptokens(ppTree); + ASSERT_EQ(pp_tokens.size(), 9); + + auto tokens = cpp.tokens_from_pptokens(pp_tokens); + + ASSERT_EQ(tokens.size(), 9); } #endif @@ -17,11 +17,11 @@ std::string valueOfNode(index_t node_index, const std::vector<Gram::TreeNode>& T // phases of translation, according to standard void source_charset_map(); // phase 1 void backslash_escape(); // phase 2 -std::pair<index_t, std::vector<Gram::TreeNode>> preprocessing_tokenize(const std::string& s); // phase 3 +std::vector<Token> preprocessing_tokenize(const std::string& s); // phase 3 void preprocess(); // phase 4 void execution_charset_map(); // phase 5 void concatenate_strings(); // phase 6 -std::vector<Token> tokens_from_pptokens(std::pair<index_t, std::vector<Gram::TreeNode>> Tree); // phase 7.a +std::vector<Token> tokens_from_pptokens(std::vector<Token> pp_tokens); // phase 7.a std::pair<index_t, std::vector<Gram::TreeNode>> analysis(std::vector<Token>); // phase 7.b void translate(); // phase 7.c void instantiate(); // phase 8 |