diff options
author | Roland Reichwein <mail@reichwein.it> | 2020-10-31 18:10:58 +0100 |
---|---|---|
committer | Roland Reichwein <mail@reichwein.it> | 2020-10-31 18:10:58 +0100 |
commit | 8256280b348b4b53fff35c9101ced0a8dfb2c58e (patch) | |
tree | e745b79e98c853cf1891372e8b2a926a5d81fff5 | |
parent | ce77838c4f32b9dc237f0c4b17d1f1e1741254d4 (diff) |
CPP::translate() (WIP), documentation, bugfixing
-rw-r--r-- | Makefile | 2 | ||||
-rw-r--r-- | TODO | 10 | ||||
-rw-r--r-- | cpp.cpp | 50 | ||||
-rw-r--r-- | cpp.h | 10 | ||||
-rw-r--r-- | grammer.cpp | 4 | ||||
-rw-r--r-- | grammer.h | 4 | ||||
-rw-r--r-- | minicc.cpp | 8 | ||||
-rw-r--r-- | minicc.h | 8 |
8 files changed, 57 insertions, 39 deletions
@@ -89,7 +89,7 @@ TESTSRC=\ SRC=$(PROGSRC) mcc.cpp all: test-$(PROJECTNAME) mcc - ./test-$(PROJECTNAME) + ./test-$(PROJECTNAME) # --gtest_filter='*preprocessing_tokenize*' # testsuite ---------------------------------------------- test-$(PROJECTNAME): $(TESTSRC:.cpp=.o) @@ -1,9 +1 @@ - - Token() = default; - Token(const std::string& s) { type = s; } // Assign type via "=" from string - -start symbol implicitly from bnf -validate bnf: no empty types or values, or empty target lists - -map -> unordered_map -set -> unordered_set +Update cppbnf.cpp to n4860 @@ -11,6 +11,7 @@ #include <gmock/gmock.h> #include <functional> +#include <optional> #include <unordered_set> #include <unordered_map> #include <filesystem> @@ -19,7 +20,16 @@ using namespace Gram; namespace fs = std::filesystem; -CPP::CPP(){} +CPP::CPP(): map_translation_unit ({ + {"/translation-unit/top-level-declaration-seq/top-level-declaration/declaration/function-definition", + [&](fs::path& path, index_t node_id) + { + //std::cout << "DEBUG: " << path << ", " << node_id << ", " << valueOfNode(node_id, m_nodes) << ", " << m_nodes[node_id].node_id << ", " << m_nodes[node_id].pos.node_id << std::endl; + } + }, +}) +{ +} CPP::~CPP(){} @@ -66,6 +76,8 @@ void CPP::concatenate_strings() std::string CPP::valueOfNode(index_t node_index, const std::vector<TreeNode>& Tree) { std::string result; + std::optional<size_t> pos0; + index_t last_index; std::vector<int32_t> todo(1, int32_t(node_index)); @@ -75,7 +87,10 @@ std::string CPP::valueOfNode(index_t node_index, const std::vector<TreeNode>& Tr // visit node if token if (ChildIdIsToken(current_index)) { - result += m_code[TokenIdFromChildId(current_index)]; + if (!pos0) { + pos0 = m_tokens[TokenIdFromChildId(current_index)].location.pos; + } + last_index = TokenIdFromChildId(current_index); } else { const TreeNode &node{Tree[current_index]}; @@ -87,7 +102,10 @@ std::string CPP::valueOfNode(index_t node_index, const std::vector<TreeNode>& Tr } } - return result; + if (!pos0) + throw std::runtime_error("ICE: Node value not available"); + + return m_code.substr(*pos0, m_tokens[last_index].location.pos - *pos0) + m_tokens[last_index].value; }; namespace { @@ -203,16 +221,16 @@ std::vector<Token> CPP::tokens_from_pptokens(const std::vector<Token>& pp_tokens if (pp_types.find(token.type) != pp_types.end()) { if (token.type == "identifier") { if (keywords.find(token.value) != keywords.end()) - result.emplace_back(Token{token.value, token.value}); + result.emplace_back(Token{token.value, token.value, token.location}); else - result.emplace_back(Token{"identifier"s, token.value}); + result.emplace_back(Token{"identifier"s, token.value, token.location}); } else if (token.type == "preprocessing-op-or-punc") - result.emplace_back(Token{token.value, token.value}); + result.emplace_back(Token{token.value, token.value, token.location}); else - result.emplace_back(Token{"literal", token.value}); + result.emplace_back(Token{"literal", token.value, token.location}); } else - throw std::runtime_error("Unhandled preprocessing token: "s + token.value + " ("s + token.type + ")"s); + throw std::runtime_error("Unhandled preprocessing token: "s + token.toString()); } return result; } @@ -227,14 +245,6 @@ std::vector<Gram::TreeNode> CPP::analysis(const std::vector<Token>& tokens) return compiler.compile(tokens); } -namespace { - - CPP::map_type map_translation_unit { - {"/translation-unit/top-level-declaration-seq/top-level-declaration/declaration/function-definition", [](){}}, - }; - -} // anonymous namespace - void CPP::traverse(index_t node_id, map_type& map, fs::path parent_path) { fs::path current_path{parent_path / m_nodes[node_id].type}; @@ -242,7 +252,7 @@ void CPP::traverse(index_t node_id, map_type& map, fs::path parent_path) // execute callbacks auto it{map.find(current_path.generic_string())}; if (it != map.end()) { - std::cout << "DEBUG: Found " << current_path << std::endl; + it->second(current_path, node_id); } // recurse tree @@ -277,6 +287,8 @@ void CPP::link() // phases of translation, according to standard void CPP::compile(const std::string& code) { + m_code = code; + source_charset_map(); // phase 1 backslash_escape(); // phase 2 @@ -289,8 +301,8 @@ void CPP::compile(const std::string& code) concatenate_strings(); // phase 6 - auto tokens = tokens_from_pptokens(pp_tokens); // phase 7a - m_nodes = analysis(tokens); // phase 7b + m_tokens = tokens_from_pptokens(pp_tokens); // phase 7a + m_nodes = analysis(m_tokens); // phase 7b translate(); // phase 7c instantiate(); // phase 8 @@ -33,13 +33,15 @@ public: std::vector<uint8_t> getCode(); std::vector<uint8_t> getData(); - typedef std::unordered_map<std::string, std::function<void()>> map_type; - private: - std::string m_code; // input / start - std::vector<Token> m_charTokens; // result of phase 3 + typedef std::unordered_map<std::string, std::function<void(fs::path&, index_t)>> map_type; + + std::string m_code; // input from compile() + std::vector<Token> m_tokens; // result of phase 7.a std::vector<Gram::TreeNode> m_nodes; // result of phase 7.b void traverse(index_t node_id, map_type& map, fs::path parent_path = "/"); + + CPP::map_type map_translation_unit; }; diff --git a/grammer.cpp b/grammer.cpp index d7afaef..31a4bbf 100644 --- a/grammer.cpp +++ b/grammer.cpp @@ -74,7 +74,9 @@ void Compiler::DumpTree() std::string line(indent, ' '); if (ChildIdIsToken(current_index)) { index_t token_id {TokenIdFromChildId(current_index)}; - line += "Token("s + std::to_string(token_id) + "): "s + tokens[token_id].type + "("s + tokens[token_id].value + ")"s; + line += "Token("s + std::to_string(token_id) + "): "s + tokens[token_id].type; + if (tokens[token_id].value != tokens[token_id].type) + line += "("s + tokens[token_id].value + ")"s; } else { auto& node {nodes[current_index]}; line += "Node("s + std::to_string(current_index) + "): "s + node.type + "/" + std::to_string(node.variant); @@ -22,8 +22,8 @@ struct NodePosition { // token_id: index into token list // node_id: index into tree node list struct TreeNode { - NodePosition pos; // position of this node in tree - index_t node_id{}; + NodePosition pos; // position of this node in tree (i.e. parent node_id + child_pos in parent) + index_t node_id{}; // this node's id std::string type; index_t variant; // bnf[type][variant] @@ -50,7 +50,13 @@ void Location::advance(bool newline) } } -std::string Location::toString() +std::string Location::toString() const { return std::to_string(line) + ":"s + std::to_string(column); } + +std::string Token::toString() const +{ + return location.toString() + ": "s + value + " ("s + type + ")"s; +} + @@ -1,3 +1,5 @@ +// Common definitions + #pragma once #include <cstdlib> @@ -19,8 +21,8 @@ struct Location { size_t column{1}; size_t pos{0}; - void advance(bool newline = false); - std::string toString(); + void advance(bool newline = false); ///< advance 1 char + std::string toString() const; }; bool operator==(const Location &a, const Location &b); @@ -29,6 +31,8 @@ struct Token { std::string type; std::string value; Location location; + + std::string toString() const; }; // For printing via Google Test |