#include "cpp.h" #include "asm/intel64/encode.h" #include "asm/operators.h" #include "bnf.h" #include "cppbnf.h" #include "debug.h" #include "flowgraph/node.h" #include "lexer.h" #include "grammer.h" #include "minicc.h" #include #include #include #include #include #include using namespace Gram; namespace fs = std::filesystem; CPP::CPP() { node_eval_map = getNodeEvalMap(); } CPP::~CPP(){} // Phase 1: Map physical character set to basic source character set void CPP::source_charset_map() { // TODO } // Phase 2: Escape backslashed line endings void CPP::backslash_escape() { // TODO } // Phase 3: Parse preprocessing tokens, TODO: discard comments std::vector CPP::preprocessing_tokenize(const std::string& s) { auto bnf{SubBNF(CPPBNF::GetCppBNFLex(), "preprocessing-token")}; Lex::Lexer lexer(bnf, "preprocessing-token"); return lexer.Lex(s); } // Phase 4: Preprocessing void CPP::preprocess() { // TODO } // Phase 5: Map chars and strings to execution charset void CPP::execution_charset_map() { // TODO } // Phase 6: Concatenate adjacent string literals void CPP::concatenate_strings() { // TODO } std::string CPP::valueOfNode(index_t node_index) const { std::string result; std::optional pos0; index_t last_index; std::vector todo(1, int32_t(node_index)); while (!todo.empty()) { int32_t current_index = todo.back(); todo.pop_back(); // visit node if token if (ChildIdIsToken(current_index)) { if (!pos0) { pos0 = m_tokens[TokenIdFromChildId(current_index)].location.pos; } last_index = TokenIdFromChildId(current_index); } else { const TreeNode &node{m_nodes[current_index]}; // iterate backwards in childs, to get depth-first search in tree, from the beginning std::for_each(node.child_ids.rbegin(), node.child_ids.rend(), [&](int32_t child){ todo.push_back(child); }); } } if (!pos0) throw std::runtime_error("ICE: Node value not available"); return m_source.substr(*pos0, m_tokens[last_index].location.pos - *pos0) + m_tokens[last_index].value; }; std::string CPP::typeOfNode(index_t node_id) const { if (node_id >= m_nodes.size()) throw std::runtime_error("CPP::typeOfNode(): node_id="s + std::to_string(node_id) + ", m_nodes.size()="s + std::to_string(m_nodes.size())); return m_nodes[node_id].type; } namespace { std::unordered_set pp_types { "identifier", "pp-number", "character-literal", "user-defined-character-literal", "string-literal", "user-defined-string-literal", "preprocessing-op-or-punc" }; std::unordered_set keywords { "alignas", "alignof", "asm", "auto", "bool", "break", "case", "catch", "char", "char8_t", "char16_t", "char32_t", "class", "concept", "const", "consteval", "constexpr", "constinit", "const_cast", "continue", "co_await", "co_return", "co_yield", "decltype", "default", "delete", "do", "double", "dynamic_cast", "else", "enum", "explicit", "export", "extern", "false", "float", "for", "friend", "goto", "if", "inline", "int", "long", "mutable", "namespace", "new", "noexcept", "nullptr", "operator", "private", "protected", "public", "register", "reinterpret_cast", "requires", "return", "short", "signed", "sizeof", "static", "static_assert", "static_cast", "struct", "switch", "template", "this", "thread_local", "throw", "true", "try", "typedef", "typeid", "typename", "union", "unsigned", "using", "virtual", "void", "volatile", "wchar_t", "while", }; } // Phase 7.a: Create tokens from preprocessing tokens std::vector CPP::tokens_from_pptokens(const std::vector& pp_tokens) { std::vector result; // "identifier" + value -> "identifier" + value, except identifiers from table 5.11, p.14 -> keyword as value, value // "pp-number" + value -> "literal" + value // "character-literal" -> "literal" + value // "user-defined-character-literal" -> "literal" + value // "string-literal" -> "literal" + value // "user-defined-string-literal" -> "literal" + value // "preprocessing-op-or-punc" -> value+value (operator,punctuator) for (auto& token: pp_tokens) { if (pp_types.find(token.type) != pp_types.end()) { if (token.type == "identifier") { if (keywords.find(token.value) != keywords.end()) result.emplace_back(Token{token.value, token.value, token.location}); else result.emplace_back(Token{"identifier"s, token.value, token.location}); } else if (token.type == "preprocessing-op-or-punc") result.emplace_back(Token{token.value, token.value, token.location}); else result.emplace_back(Token{"literal", token.value, token.location}); } else throw std::runtime_error("Unhandled preprocessing token: "s + token.toString()); } return result; } // Phase 7.b: Grammar Analysis std::vector CPP::analysis(const std::vector& tokens) { auto bnf = SubBNF(CPPBNF::GetCppBNFGram(), "translation-unit"); Gram::Compiler compiler(bnf, "translation-unit"); return compiler.compile(tokens); } std::string CPP::locationOfNode(index_t node_index) const { if (node_index >= m_nodes.size()) throw std::runtime_error("ICE: locationOfNode(): Bad node_index "s + std::to_string(node_index) + " vs. "s + std::to_string(m_nodes.size())); for (const auto& child_id: m_nodes[node_index].child_ids) { if (ChildIdIsNode(child_id)) { std::string location{locationOfNode(child_id)}; if (location.size() > 0) return location; } else { // ChildIdIsToken return m_tokens[TokenIdFromChildId(child_id)].location.toString(); } } return ""; // not found } void CPP::compileError(index_t node_id, const std::string& msg) const { std::string location{locationOfNode(node_id)}; if (location.size() == 0) location = "Node "s + std::to_string(node_id) + "doesn't have any token"; throw std::runtime_error("Compile error: "s + location + ": "s + msg); } std::string CPP::typeOfChild(int32_t child_id) const { if (ChildIdIsToken(child_id)) { index_t token_id {TokenIdFromChildId(child_id)}; return m_tokens[token_id].type; } else { // ChildIdIsNode return m_nodes[child_id].type; } } bool CPP::childTypesOfNodeMatch(index_t node_id, const std::vector& pattern) const { const std::vector& child_ids{m_nodes[node_id].child_ids}; if (child_ids.size() != pattern.size()) return false; // mismatch for (size_t i = 0; i < child_ids.size(); i++) { if (pattern[i] != "" && typeOfChild(child_ids[i]) != pattern[i]) return false; // mismatch } return true; // match } bool CPP::childTypesOfChildMatch(index_t index, index_t child_index, const std::vector& pattern) const { if (index >= m_nodes.size()) return false; if (child_index >= m_nodes[index].child_ids.size()) return false; if (!ChildIdIsNode(m_nodes[index].child_ids[child_index])) return false; return childTypesOfNodeMatch(m_nodes[index].child_ids[child_index], pattern); } // Get value for specified node, at bnf rule index child_index std::any CPP::getValue(index_t node_id, index_t child_index) { size_t num_values_on_top {m_nodes[node_id].child_ids.size()}; if (num_values_on_top > mValues.size()) throw std::runtime_error("ICE: Expected at least "s + std::to_string(num_values_on_top) + " but only have "s + std::to_string(mValues.size()) + " in total"s); if (child_index >= num_values_on_top) throw std::runtime_error("ICE: Requested value at index "s + std::to_string(child_index) + " but only have "s + std::to_string(num_values_on_top) + " values on top"s); return *(mValues.end() - num_values_on_top + child_index); } std::string CPP::getType(index_t node_id, index_t child_index) { if (node_id >= m_nodes.size()) throw std::runtime_error("ICE: Requested node at index "s + std::to_string(node_id) + " but only have "s + std::to_string(m_nodes.size()) + " nodes"s); if (child_index >= m_nodes[node_id].child_ids.size()) throw std::runtime_error("ICE: "s + m_nodes[node_id].type + ": Requested child at index "s + std::to_string(child_index) + " but only have "s + std::to_string(m_nodes[node_id].child_ids.size()) + " childs"s); int32_t child_id {m_nodes[node_id].child_ids[child_index]}; return typeOfChild(child_id); } std::string CPP::ruleString(index_t node_id) { if (node_id >= m_nodes.size()) throw std::runtime_error("ICE: Requested node at index "s + std::to_string(node_id) + " but only have "s + std::to_string(m_nodes.size()) + " nodes"s); std::string result{m_nodes[node_id].type + ":"}; for (const auto child_id: m_nodes[node_id].child_ids) { result += " " + typeOfChild(child_id); } return result; } namespace { // Promote last op of specified graph if appropriate (i.e. if not yet big enough) void promoteLastOp(FlowGraph::Graph& graph, FlowGraph::DataType targetType) { FlowGraph::DataType sourceType {graph.lastOp()->destination().type()}; if (targetType != sourceType) { FlowGraph::Data temporary {FlowGraph::MakeTemporary(graph.scope(), targetType)}; std::shared_ptr node {std::make_shared(FlowGraph::UnaryOperationType::Resize, temporary, graph.lastOp()->destination())}; graph.append(node); } } // Promote last ops of specified graphs until width is equal void promoteLastOps(FlowGraph::Graph& graph0, FlowGraph::Graph& graph1) { // Find target type FlowGraph::DataType targetType = FlowGraph::DataType::Int; if (graph0.lastOp()->destination().type() == FlowGraph::DataType::LongLong || graph1.lastOp()->destination().type() == FlowGraph::DataType::LongLong) targetType = FlowGraph::DataType::LongLong; // TODO: Support unsigned variants // promote both graph results to target types promoteLastOp(graph0, targetType); promoteLastOp(graph1, targetType); } std::unordered_map binaryOperationsMap { {"+", FlowGraph::BinaryOperationType::Add}, {"-", FlowGraph::BinaryOperationType::Subtract}, {"*", FlowGraph::BinaryOperationType::Multiply}, {"/", FlowGraph::BinaryOperationType::Divide}, {"%", FlowGraph::BinaryOperationType::Modulo}, {"<<", FlowGraph::BinaryOperationType::ShiftLeft}, {">>", FlowGraph::BinaryOperationType::ShiftRight}, }; } // namespace FlowGraph::Graph CPP::BinaryOperation(index_t index) { if (getValue(index, 0).type() != typeid(FlowGraph::Graph)) throw std::runtime_error("ICE: Bad data type for argument 1: "s + demangle(getValue(index, 0).type())); if (getValue(index, 2).type() != typeid(FlowGraph::Graph)) throw std::runtime_error("ICE: Bad data type for argument 3: "s + demangle(getValue(index, 2).type())); FlowGraph::Graph value0{std::any_cast(getValue(index, 0))}; FlowGraph::Graph value1{std::any_cast(getValue(index, 2))}; promoteLastOps(value0, value1); std::shared_ptr lastOp0{value0.lastOp()}; std::shared_ptr lastOp1{value1.lastOp()}; FlowGraph::Graph result{value0}; result.append(value1); FlowGraph::Data destination{FlowGraph::MakeTemporary(result.scope(), lastOp0->destination().type())}; std::string typeString{getType(index, 1)}; auto it{binaryOperationsMap.find(typeString)}; FlowGraph::BinaryOperationType type{}; if (it != binaryOperationsMap.end()) { type = it->second; } else { throw std::runtime_error("ICE: Unknown operation: "s + typeString); } std::shared_ptr node {std::make_shared(type, destination, lastOp0->destination(), lastOp1->destination())}; result.append(node); return result; } std::unordered_map> CPP::getNodeEvalMap() { return { { "primary-expression", [&](index_t index) -> std::any { if (childTypesOfNodeMatch(index, {"literal"})) return getValue(index, 0); if (childTypesOfNodeMatch(index, {"(", "expression", ")"})) return getValue(index, 1); throw std::runtime_error("ICE: Unsupported childs: "s + ruleString(index)); // TODO } }, { "postfix-expression", [&](index_t index) -> std::any { if (childTypesOfNodeMatch(index, {"primary-expression"})) return getValue(index, 0); throw std::runtime_error("ICE: Unsupported childs: "s + ruleString(index)); // TODO } }, { "unary-expression", [&](index_t index) -> std::any { if (childTypesOfNodeMatch(index, {"postfix-expression"})) return getValue(index, 0); throw std::runtime_error("ICE: Unsupported childs: "s + ruleString(index)); // TODO } }, { "cast-expression", [&](index_t index) -> std::any { if (childTypesOfNodeMatch(index, {"unary-expression"})) return getValue(index, 0); throw std::runtime_error("ICE: Unsupported childs: "s + ruleString(index)); // TODO } }, { "pm-expression", [&](index_t index) -> std::any { if (childTypesOfNodeMatch(index, {"cast-expression"})) return getValue(index, 0); throw std::runtime_error("ICE: Unsupported childs: "s + ruleString(index)); // TODO } }, { "multiplicative-expression", [&](index_t index) -> std::any { if (childTypesOfNodeMatch(index, {"multiplicative-expression", "", "pm-expression"})) { return BinaryOperation(index); } if (childTypesOfNodeMatch(index, {"pm-expression"})) return getValue(index, 0); throw std::runtime_error("ICE: Unsupported childs: "s + ruleString(index)); } }, { "additive-expression", [&](index_t index) -> std::any { if (childTypesOfNodeMatch(index, {"additive-expression", "", "multiplicative-expression"})) { return BinaryOperation(index); } if (childTypesOfNodeMatch(index, {"multiplicative-expression"})) return getValue(index, 0); throw std::runtime_error("ICE: Unsupported childs: "s + ruleString(index)); } }, { "shift-expression", [&](index_t index) -> std::any { if (childTypesOfNodeMatch(index, {"shift-expression", "", "additive-expression"})) { return BinaryOperation(index); } if (childTypesOfNodeMatch(index, {"additive-expression"})) return getValue(index, 0); throw std::runtime_error("ICE: Unsupported childs: "s + ruleString(index)); // TODO } }, { "compare-expression", [&](index_t index) -> std::any { if (childTypesOfNodeMatch(index, {"shift-expression"})) return getValue(index, 0); throw std::runtime_error("ICE: Unsupported childs: "s + ruleString(index)); // TODO } }, { "relational-expression", [&](index_t index) -> std::any { if (childTypesOfNodeMatch(index, {"compare-expression"})) return getValue(index, 0); throw std::runtime_error("ICE: Unsupported childs: "s + ruleString(index)); // TODO } }, { "equality-expression", [&](index_t index) -> std::any { if (childTypesOfNodeMatch(index, {"relational-expression"})) return getValue(index, 0); throw std::runtime_error("ICE: Unsupported childs: "s + ruleString(index)); // TODO } }, { "and-expression", [&](index_t index) -> std::any { if (childTypesOfNodeMatch(index, {"equality-expression"})) return getValue(index, 0); throw std::runtime_error("ICE: Unsupported childs: "s + ruleString(index)); // TODO } }, { "exclusive-or-expression", [&](index_t index) -> std::any { if (childTypesOfNodeMatch(index, {"and-expression"})) return getValue(index, 0); throw std::runtime_error("ICE: Unsupported childs: "s + ruleString(index)); // TODO } }, { "inclusive-or-expression", [&](index_t index) -> std::any { if (childTypesOfNodeMatch(index, {"exclusive-or-expression"})) return getValue(index, 0); throw std::runtime_error("ICE: Unsupported childs: "s + ruleString(index)); // TODO } }, { "logical-and-expression", [&](index_t index) -> std::any { if (childTypesOfNodeMatch(index, {"inclusive-or-expression"})) return getValue(index, 0); throw std::runtime_error("ICE: Unsupported childs: "s + ruleString(index)); // TODO } }, { "logical-or-expression", [&](index_t index) -> std::any { if (childTypesOfNodeMatch(index, {"logical-and-expression"})) return getValue(index, 0); throw std::runtime_error("ICE: Unsupported childs: "s + ruleString(index)); // TODO } }, { "conditional-expression", [&](index_t index) -> std::any { if (childTypesOfNodeMatch(index, {"logical-or-expression"})) return getValue(index, 0); throw std::runtime_error("ICE: Unsupported childs: "s + ruleString(index)); // TODO } }, { "assignment-expression", [&](index_t index) -> std::any { if (childTypesOfNodeMatch(index, {"conditional-expression"})) return getValue(index, 0); throw std::runtime_error("ICE: Unsupported childs: "s + ruleString(index)); // TODO } }, { "expression", [&](index_t index) -> std::any { if (childTypesOfNodeMatch(index, {"assignment-expression"})) { if (getValue(index, 0).type() == typeid(FlowGraph::Graph)) { FlowGraph::Graph graph {std::any_cast(getValue(index, 0))}; mCPPContext.graph = graph; return graph; } else { throw std::runtime_error("ICE: expression: Unsupported argument type: "s + demangle(getValue(index, 0).type())); } } throw std::runtime_error("ICE: Unsupported childs: "s + ruleString(index)); // TODO } }, }; } // precondition: stack contains child values c1, ..., cn on top -> to be popped // postcondition: stack contains value on top -> to be pushed void CPP::getValueOfNode(index_t index) { size_t num_childs {m_nodes[index].child_ids.size()}; if (mValues.size() < num_childs) throw std::runtime_error("ICE: Expected num_childs elements on values stack at "s + locationOfNode(index)); auto function_it{node_eval_map.find(m_nodes[index].type)}; std::any result; if (function_it != node_eval_map.end()) { result = function_it->second(index); } else { //TODO: throw std::runtime_error("ICE: Node type not implemented: "s + m_nodes[index].type); } mValues.resize(mValues.size() - num_childs); mValues.push_back(result); } // pushes result onto stack void CPP::getValueOfToken(index_t index) { // TODO: also support the other tokens ... if (m_tokens[index].type == "literal") { // TODO: also support other types, different from Int FlowGraph::Data data{FlowGraph::MakeConstantInt(stoi(m_tokens[index].value))}; FlowGraph::Graph graph{{std::make_shared(data)}}; mValues.push_back(graph); } else { //throw std::runtime_error("ICE: Unsupported token: "s + m_tokens[index].type); mValues.push_back(std::any{}); } } void CPP::visitRecursive(index_t node_id) { const auto& childs {m_nodes[node_id].child_ids}; for (const auto child: childs) { if (ChildIdIsNode(child)) { visitRecursive(child); } else { getValueOfToken(TokenIdFromChildId(child)); } } getValueOfNode(node_id); } // Phase 7.c: Translate void CPP::translate() { if (m_nodes.size() == 0) throw std::runtime_error("ICE: Tree is empty"); // TODO: this could be implemented iteratively, via a stack? visitRecursive(0); } // Phase 8: Instantiate objects void CPP::instantiate() { // TODO: template instantiation Asm::toMachineCode(mCPPContext.graph, mSegment); } // Phase 9: Link libraries void CPP::link() { // TODO // mSegment -> elf mCode = mSegment.getCode(); } // phases of translation, according to standard void CPP::compile(const std::string& source) { m_source = source; source_charset_map(); // phase 1 backslash_escape(); // phase 2 auto pp_tokens = preprocessing_tokenize(m_source); // phase 3 preprocess(); // phase 4 execution_charset_map(); // phase 5 concatenate_strings(); // phase 6 m_tokens = tokens_from_pptokens(pp_tokens); // phase 7a m_nodes = analysis(m_tokens); // phase 7b translate(); // phase 7c instantiate(); // phase 8 link(); // phase 9 } std::vector CPP::getCode() { return mCode; } std::vector CPP::getData() { return {}; // TODO }