From 8256280b348b4b53fff35c9101ced0a8dfb2c58e Mon Sep 17 00:00:00 2001 From: Roland Reichwein Date: Sat, 31 Oct 2020 18:10:58 +0100 Subject: CPP::translate() (WIP), documentation, bugfixing --- Makefile | 2 +- TODO | 10 +--------- cpp.cpp | 50 +++++++++++++++++++++++++++++++------------------- cpp.h | 10 ++++++---- grammer.cpp | 4 +++- grammer.h | 4 ++-- minicc.cpp | 8 +++++++- minicc.h | 8 ++++++-- 8 files changed, 57 insertions(+), 39 deletions(-) diff --git a/Makefile b/Makefile index 237548b..a600db3 100644 --- a/Makefile +++ b/Makefile @@ -89,7 +89,7 @@ TESTSRC=\ SRC=$(PROGSRC) mcc.cpp all: test-$(PROJECTNAME) mcc - ./test-$(PROJECTNAME) + ./test-$(PROJECTNAME) # --gtest_filter='*preprocessing_tokenize*' # testsuite ---------------------------------------------- test-$(PROJECTNAME): $(TESTSRC:.cpp=.o) diff --git a/TODO b/TODO index 58b4f2b..faafda7 100644 --- a/TODO +++ b/TODO @@ -1,9 +1 @@ - - Token() = default; - Token(const std::string& s) { type = s; } // Assign type via "=" from string - -start symbol implicitly from bnf -validate bnf: no empty types or values, or empty target lists - -map -> unordered_map -set -> unordered_set +Update cppbnf.cpp to n4860 diff --git a/cpp.cpp b/cpp.cpp index 6e8a28e..563ba4c 100644 --- a/cpp.cpp +++ b/cpp.cpp @@ -11,6 +11,7 @@ #include #include +#include #include #include #include @@ -19,7 +20,16 @@ using namespace Gram; namespace fs = std::filesystem; -CPP::CPP(){} +CPP::CPP(): map_translation_unit ({ + {"/translation-unit/top-level-declaration-seq/top-level-declaration/declaration/function-definition", + [&](fs::path& path, index_t node_id) + { + //std::cout << "DEBUG: " << path << ", " << node_id << ", " << valueOfNode(node_id, m_nodes) << ", " << m_nodes[node_id].node_id << ", " << m_nodes[node_id].pos.node_id << std::endl; + } + }, +}) +{ +} CPP::~CPP(){} @@ -66,6 +76,8 @@ void CPP::concatenate_strings() std::string CPP::valueOfNode(index_t node_index, const std::vector& Tree) { std::string result; + std::optional pos0; + index_t last_index; std::vector todo(1, int32_t(node_index)); @@ -75,7 +87,10 @@ std::string CPP::valueOfNode(index_t node_index, const std::vector& Tr // visit node if token if (ChildIdIsToken(current_index)) { - result += m_code[TokenIdFromChildId(current_index)]; + if (!pos0) { + pos0 = m_tokens[TokenIdFromChildId(current_index)].location.pos; + } + last_index = TokenIdFromChildId(current_index); } else { const TreeNode &node{Tree[current_index]}; @@ -87,7 +102,10 @@ std::string CPP::valueOfNode(index_t node_index, const std::vector& Tr } } - return result; + if (!pos0) + throw std::runtime_error("ICE: Node value not available"); + + return m_code.substr(*pos0, m_tokens[last_index].location.pos - *pos0) + m_tokens[last_index].value; }; namespace { @@ -203,16 +221,16 @@ std::vector CPP::tokens_from_pptokens(const std::vector& pp_tokens if (pp_types.find(token.type) != pp_types.end()) { if (token.type == "identifier") { if (keywords.find(token.value) != keywords.end()) - result.emplace_back(Token{token.value, token.value}); + result.emplace_back(Token{token.value, token.value, token.location}); else - result.emplace_back(Token{"identifier"s, token.value}); + result.emplace_back(Token{"identifier"s, token.value, token.location}); } else if (token.type == "preprocessing-op-or-punc") - result.emplace_back(Token{token.value, token.value}); + result.emplace_back(Token{token.value, token.value, token.location}); else - result.emplace_back(Token{"literal", token.value}); + result.emplace_back(Token{"literal", token.value, token.location}); } else - throw std::runtime_error("Unhandled preprocessing token: "s + token.value + " ("s + token.type + ")"s); + throw std::runtime_error("Unhandled preprocessing token: "s + token.toString()); } return result; } @@ -227,14 +245,6 @@ std::vector CPP::analysis(const std::vector& tokens) return compiler.compile(tokens); } -namespace { - - CPP::map_type map_translation_unit { - {"/translation-unit/top-level-declaration-seq/top-level-declaration/declaration/function-definition", [](){}}, - }; - -} // anonymous namespace - void CPP::traverse(index_t node_id, map_type& map, fs::path parent_path) { fs::path current_path{parent_path / m_nodes[node_id].type}; @@ -242,7 +252,7 @@ void CPP::traverse(index_t node_id, map_type& map, fs::path parent_path) // execute callbacks auto it{map.find(current_path.generic_string())}; if (it != map.end()) { - std::cout << "DEBUG: Found " << current_path << std::endl; + it->second(current_path, node_id); } // recurse tree @@ -277,6 +287,8 @@ void CPP::link() // phases of translation, according to standard void CPP::compile(const std::string& code) { + m_code = code; + source_charset_map(); // phase 1 backslash_escape(); // phase 2 @@ -289,8 +301,8 @@ void CPP::compile(const std::string& code) concatenate_strings(); // phase 6 - auto tokens = tokens_from_pptokens(pp_tokens); // phase 7a - m_nodes = analysis(tokens); // phase 7b + m_tokens = tokens_from_pptokens(pp_tokens); // phase 7a + m_nodes = analysis(m_tokens); // phase 7b translate(); // phase 7c instantiate(); // phase 8 diff --git a/cpp.h b/cpp.h index 3d82751..6db7146 100644 --- a/cpp.h +++ b/cpp.h @@ -33,13 +33,15 @@ public: std::vector getCode(); std::vector getData(); - typedef std::unordered_map> map_type; - private: - std::string m_code; // input / start - std::vector m_charTokens; // result of phase 3 + typedef std::unordered_map> map_type; + + std::string m_code; // input from compile() + std::vector m_tokens; // result of phase 7.a std::vector m_nodes; // result of phase 7.b void traverse(index_t node_id, map_type& map, fs::path parent_path = "/"); + + CPP::map_type map_translation_unit; }; diff --git a/grammer.cpp b/grammer.cpp index d7afaef..31a4bbf 100644 --- a/grammer.cpp +++ b/grammer.cpp @@ -74,7 +74,9 @@ void Compiler::DumpTree() std::string line(indent, ' '); if (ChildIdIsToken(current_index)) { index_t token_id {TokenIdFromChildId(current_index)}; - line += "Token("s + std::to_string(token_id) + "): "s + tokens[token_id].type + "("s + tokens[token_id].value + ")"s; + line += "Token("s + std::to_string(token_id) + "): "s + tokens[token_id].type; + if (tokens[token_id].value != tokens[token_id].type) + line += "("s + tokens[token_id].value + ")"s; } else { auto& node {nodes[current_index]}; line += "Node("s + std::to_string(current_index) + "): "s + node.type + "/" + std::to_string(node.variant); diff --git a/grammer.h b/grammer.h index a8a3356..e179a9e 100644 --- a/grammer.h +++ b/grammer.h @@ -22,8 +22,8 @@ struct NodePosition { // token_id: index into token list // node_id: index into tree node list struct TreeNode { - NodePosition pos; // position of this node in tree - index_t node_id{}; + NodePosition pos; // position of this node in tree (i.e. parent node_id + child_pos in parent) + index_t node_id{}; // this node's id std::string type; index_t variant; // bnf[type][variant] diff --git a/minicc.cpp b/minicc.cpp index d180517..88cb6ab 100644 --- a/minicc.cpp +++ b/minicc.cpp @@ -50,7 +50,13 @@ void Location::advance(bool newline) } } -std::string Location::toString() +std::string Location::toString() const { return std::to_string(line) + ":"s + std::to_string(column); } + +std::string Token::toString() const +{ + return location.toString() + ": "s + value + " ("s + type + ")"s; +} + diff --git a/minicc.h b/minicc.h index 92678a1..28b494a 100644 --- a/minicc.h +++ b/minicc.h @@ -1,3 +1,5 @@ +// Common definitions + #pragma once #include @@ -19,8 +21,8 @@ struct Location { size_t column{1}; size_t pos{0}; - void advance(bool newline = false); - std::string toString(); + void advance(bool newline = false); ///< advance 1 char + std::string toString() const; }; bool operator==(const Location &a, const Location &b); @@ -29,6 +31,8 @@ struct Token { std::string type; std::string value; Location location; + + std::string toString() const; }; // For printing via Google Test -- cgit v1.2.3