diff options
author | Roland Reichwein <mail@reichwein.it> | 2020-02-20 23:26:57 +0100 |
---|---|---|
committer | Roland Reichwein <mail@reichwein.it> | 2020-02-20 23:26:57 +0100 |
commit | 10c2b7f9b6676dafd62d0eeda507b5ee5c6db216 (patch) | |
tree | 705885f41d9224e8678578e99db9f80af8136e94 /cpp.cpp | |
parent | ba8520d3435c75c2568c05f1333966a4c1a4d69b (diff) |
Grammar applied to lex (WIP)
Diffstat (limited to 'cpp.cpp')
-rw-r--r-- | cpp.cpp | 121 |
1 files changed, 113 insertions, 8 deletions
@@ -9,6 +9,14 @@ #include <gtest/gtest.h> #include <gmock/gmock.h> +#include <unordered_set> + +using namespace Gram; + +CPP::CPP(){} + +CPP::~CPP(){} + // Phase 1: Map physical character set to basic source character set void CPP::source_charset_map() { @@ -45,9 +53,11 @@ std::vector<Token> sourceToCharTokens(const std::string& code) } // Phase 3: Parse preprocessing tokens -void CPP::preprocessing_tokenize(const std::string& s) +std::pair<index_t, std::vector<TreeNode>> CPP::preprocessing_tokenize(const std::string& s) { - auto charTokens {sourceToCharTokens(s)}; + m_code = s; + + m_charTokens = sourceToCharTokens(s); auto bnf{SubBNF(GetCppBNFLex(), "preprocessing-token")}; @@ -72,8 +82,12 @@ void CPP::preprocessing_tokenize(const std::string& s) {" "}, {"\t"}, {"\n"}, {"\r"} }; Gram::Compiler compiler(bnf, "file"); + std::pair<index_t, std::vector<TreeNode>> Tree = compiler.compile(m_charTokens); + debug = true; - auto Tree = compiler.compile(charTokens); + compiler.DumpTree(); + + return Tree; } // Phase 4: Preprocessing @@ -94,12 +108,84 @@ void CPP::concatenate_strings() // TODO } -// Phase 7: Create tokens from preprocessing tokens -void CPP::tokens_from_pptokens() +std::string CPP::valueOfNode(index_t node_index, const std::vector<TreeNode>& Tree) { - // TODO + std::string result; + + std::vector<int32_t> todo(1, int32_t(node_index)); + + while (!todo.empty()) { + int32_t current_index = todo.back(); + todo.pop_back(); + + // visit node if token + if (ChildIdIsToken(current_index)) { + result += m_code[TokenIdFromChildId(current_index)]; + } else { + + const TreeNode &node{Tree[current_index]}; + + // iterate backwards in childs, to get depth-first search in tree, from the beginning + std::for_each(node.child_ids.rbegin(), node.child_ids.rend(), [&](int32_t child){ + todo.push_back(child); + }); + } + } + + return result; +}; + +namespace { + std::unordered_set<std::string> pp_types{ + "identifier", + "pp-number", + "character-literal", + "user-defined-character-literal", + "string-literal", + "user-defined-string-literal", + "preprocessing-op-or-punc" + }; } +// Phase 7.a: Create tokens from preprocessing tokens +std::vector<Token> CPP::tokens_from_pptokens(std::pair<index_t, std::vector<TreeNode>> Tree) +{ + std::vector<Token> result; + + // "identifier" + value -> "identifier" + value, except identifiers from table 5.11, p.14 -> keyword as value, value + // "pp-number" + value -> "literal" + value + // "character-literal" -> "literal" + value + // "user-defined-character-literal" -> "literal" + value + // "string-literal" -> "literal" + value + // "user-defined-string-literal" -> "literal" + value + // "preprocessing-op-or-punc" -> value+value (operator,punctuator) + + // TODO: traverse Tree, creating Token list + std::vector<index_t> todo(1, index_t(Tree.first)); + + while (!todo.empty()) { + index_t current_index = todo.back(); + todo.pop_back(); + + TreeNode &node{Tree.second[current_index]}; + + // visit node + if (pp_types.find(node.type) != pp_types.end()) { // TODO + std::cout << node.type << ": " << valueOfNode(current_index, Tree.second) << std::endl; + } else { // only traverse further if not handled + + // iterate backwards in childs, to get depth-first search in tree, from the beginning + std::for_each(node.child_ids.rbegin(), node.child_ids.rend(), [&](int32_t child){ + if (!ChildIdIsToken(child)) + todo.push_back(child); + }); + } + } + + return result; +} + +// TODO: remove in favor of tokens_from_pptokens() void CPP::PreprocessorTokensToTokens(std::vector<Token>& tokens) { for (auto& i : tokens) { @@ -108,6 +194,18 @@ void CPP::PreprocessorTokensToTokens(std::vector<Token>& tokens) } } +// Phase 7.b: Grammar Analysis +std::pair<index_t, std::vector<Gram::TreeNode>> analysis(std::vector<Token>) +{ + return {0 , {}}; +} + +// Phase 7.c: Translate +void CPP::translate() +{ + // TODO +} + // Phase 8: Instantiate objects void CPP::instantiate() { @@ -123,6 +221,7 @@ void CPP::link() // phases of translation, according to standard void CPP::translate(const std::string& code) { +#if 0 // fix signatures! source_charset_map(); backslash_escape(); preprocessing_tokenize(code); @@ -130,22 +229,28 @@ void CPP::translate(const std::string& code) execution_charset_map(); concatenate_strings(); tokens_from_pptokens(); + analysis(); + translate(); instantiate(); link(); +#endif } class CppTest: public ::testing::Test { protected: CppTest() { - debug = false; + //debug = true; } ~CppTest() { } }; TEST_F(CppTest, preprocessing_tokenize) { - CPP::preprocessing_tokenize("int main() { return 1; }"); + CPP cpp; + auto ppTree = cpp.preprocessing_tokenize("int main() { return 1; }"); + + cpp.tokens_from_pptokens(ppTree); } #if 0 |