summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorRoland Reichwein <mail@reichwein.it>2020-03-21 15:38:05 +0100
committerRoland Reichwein <mail@reichwein.it>2020-03-21 15:38:05 +0100
commit3cc139bce0283018473d4906ee2ea5f40f771255 (patch)
tree658956ab52b8892d419a2ba905f4c41f21f6d926
parent74350b52fee9f576a1cc71d99cfd4ebdf5a73e0f (diff)
Add lexer to CPP
-rw-r--r--cpp.cpp120
-rw-r--r--cpp.h4
2 files changed, 39 insertions, 85 deletions
diff --git a/cpp.cpp b/cpp.cpp
index 3d8c20d..bbb6e27 100644
--- a/cpp.cpp
+++ b/cpp.cpp
@@ -2,9 +2,10 @@
#include "bnf.h"
#include "cppbnf.h"
+#include "debug.h"
+#include "lexer.h"
#include "grammer.h"
#include "minicc.h"
-#include "debug.h"
#include <gtest/gtest.h>
#include <gmock/gmock.h>
@@ -29,66 +30,14 @@ void CPP::backslash_escape()
// TODO
}
-namespace {
-
-std::vector<Token> sourceToCharTokens(const std::string& code)
-{
- std::vector<Token> result;
-
- Location location{1, 1};
-
- for (char c: code) {
- if (c == '\n') {
- location.column = 1;
- location.line++;
- } else if (std::isprint(c)) {
- location.column++;
- }
-
- result.emplace_back(Token{std::string(1, c), std::string(1, c), location});
- }
- return result;
-}
-
-}
-
// Phase 3: Parse preprocessing tokens
-std::pair<index_t, std::vector<TreeNode>> CPP::preprocessing_tokenize(const std::string& s)
+std::vector<Token> CPP::preprocessing_tokenize(const std::string& s)
{
- m_code = s;
-
- m_charTokens = sourceToCharTokens(s);
-
auto bnf{SubBNF(GetCppBNFLex(), "preprocessing-token")};
+
+ Lex::Lexer lexer(bnf, "preprocessing-token");
- // add to bnf to match whole file
- bnf["file"] = {
- {"preprocessing-token-list"},
- {"whitespace-list", "preprocessing-token-list"}
- };
- bnf["preprocessing-token-list"] = {
- {"preprocessing-token-padded"},
- {"preprocessing-token-list", "preprocessing-token-padded"}
- };
- bnf["preprocessing-token-padded"] = {
- {"preprocessing-token"},
- {"preprocessing-token", "whitespace-list"}
- };
- bnf["whitespace-list"] = {
- {"whitespace-char"},
- {"whitespace-list", "whitespace-char" }
- };
- bnf["whitespace-char"] = {
- {" "}, {"\t"}, {"\n"}, {"\r"}
- };
- Gram::Compiler compiler(bnf, "file");
- debug = true;
- std::pair<index_t, std::vector<TreeNode>> Tree = compiler.compile(m_charTokens);
-
- debug = true;
- compiler.DumpTree();
-
- return Tree;
+ return lexer.Lex(s);
}
// Phase 4: Preprocessing
@@ -137,7 +86,7 @@ std::string CPP::valueOfNode(index_t node_index, const std::vector<TreeNode>& Tr
};
namespace {
- std::unordered_set<std::string> pp_types{
+ std::unordered_set<std::string> pp_types {
"identifier",
"pp-number",
"character-literal",
@@ -146,10 +95,16 @@ namespace {
"user-defined-string-literal",
"preprocessing-op-or-punc"
};
+
+ std::unordered_set<std::string> keywords {
+ "alignas",
+ "alignof",
+ // ... Keywords table, p.15
+ };
}
// Phase 7.a: Create tokens from preprocessing tokens
-std::vector<Token> CPP::tokens_from_pptokens(std::pair<index_t, std::vector<TreeNode>> Tree)
+std::vector<Token> CPP::tokens_from_pptokens(std::vector<Token> pp_tokens)
{
std::vector<Token> result;
@@ -161,28 +116,23 @@ std::vector<Token> CPP::tokens_from_pptokens(std::pair<index_t, std::vector<Tree
// "user-defined-string-literal" -> "literal" + value
// "preprocessing-op-or-punc" -> value+value (operator,punctuator)
- // TODO: traverse Tree, creating Token list
- std::vector<index_t> todo(1, index_t(Tree.first));
-
- while (!todo.empty()) {
- index_t current_index = todo.back();
- todo.pop_back();
-
- TreeNode &node{Tree.second[current_index]};
-
- // visit node
- if (pp_types.find(node.type) != pp_types.end()) { // TODO
- std::cout << node.type << ": " << valueOfNode(current_index, Tree.second) << std::endl;
- } else { // only traverse further if not handled
-
- // iterate backwards in childs, to get depth-first search in tree, from the beginning
- std::for_each(node.child_ids.rbegin(), node.child_ids.rend(), [&](int32_t child){
- if (!ChildIdIsToken(child))
- todo.push_back(child);
- });
- }
+ for (auto& token: pp_tokens) {
+ if (pp_types.find(token.type) != pp_types.end()) {
+ if (token.type == "identifier") {
+#if 0
+ if (keywords.find(token.value) != keywords.end())
+ result.emplace_back("keyword", token.value);
+ else
+#endif
+ result.emplace_back(Token{"identifier"s, token.value});
+ }
+ else if (token.type == "preprocessing-op-or-punc")
+ result.emplace_back(Token{token.value, token.value});
+ else
+ result.emplace_back(Token{"literal", token.value});
+ } else
+ throw std::runtime_error("Unhandled preprocessing token: "s + token.value + " ("s + token.type + ")"s);
}
-
return result;
}
@@ -238,12 +188,16 @@ protected:
}
};
-#if 0
+#if 1
TEST_F(CppTest, preprocessing_tokenize) {
CPP cpp;
- auto ppTree = cpp.preprocessing_tokenize("int main() { return 1; }");
+ auto pp_tokens = cpp.preprocessing_tokenize("int main() { return 1; }");
- cpp.tokens_from_pptokens(ppTree);
+ ASSERT_EQ(pp_tokens.size(), 9);
+
+ auto tokens = cpp.tokens_from_pptokens(pp_tokens);
+
+ ASSERT_EQ(tokens.size(), 9);
}
#endif
diff --git a/cpp.h b/cpp.h
index 724e08b..282c83d 100644
--- a/cpp.h
+++ b/cpp.h
@@ -17,11 +17,11 @@ std::string valueOfNode(index_t node_index, const std::vector<Gram::TreeNode>& T
// phases of translation, according to standard
void source_charset_map(); // phase 1
void backslash_escape(); // phase 2
-std::pair<index_t, std::vector<Gram::TreeNode>> preprocessing_tokenize(const std::string& s); // phase 3
+std::vector<Token> preprocessing_tokenize(const std::string& s); // phase 3
void preprocess(); // phase 4
void execution_charset_map(); // phase 5
void concatenate_strings(); // phase 6
-std::vector<Token> tokens_from_pptokens(std::pair<index_t, std::vector<Gram::TreeNode>> Tree); // phase 7.a
+std::vector<Token> tokens_from_pptokens(std::vector<Token> pp_tokens); // phase 7.a
std::pair<index_t, std::vector<Gram::TreeNode>> analysis(std::vector<Token>); // phase 7.b
void translate(); // phase 7.c
void instantiate(); // phase 8