#include "cpp.h" #include "bnf.h" #include "cppbnf.h" #include "grammer.h" #include "minicc.h" #include "debug.h" #include #include #include using namespace Gram; CPP::CPP(){} CPP::~CPP(){} // Phase 1: Map physical character set to basic source character set void CPP::source_charset_map() { // TODO } // Phase 2: Escape backslashed line endings void CPP::backslash_escape() { // TODO } namespace { std::vector sourceToCharTokens(const std::string& code) { std::vector result; Location location{1, 1}; for (char c: code) { if (c == '\n') { location.column = 1; location.line++; } else if (std::isprint(c)) { location.column++; } result.emplace_back(Token{std::string(1, c), std::string(1, c), location}); } return result; } } // Phase 3: Parse preprocessing tokens std::pair> CPP::preprocessing_tokenize(const std::string& s) { m_code = s; m_charTokens = sourceToCharTokens(s); auto bnf{SubBNF(GetCppBNFLex(), "preprocessing-token")}; // add to bnf to match whole file bnf["file"] = { {"preprocessing-token-list"}, {"whitespace-list", "preprocessing-token-list"} }; bnf["preprocessing-token-list"] = { {"preprocessing-token-padded"}, {"preprocessing-token-list", "preprocessing-token-padded"} }; bnf["preprocessing-token-padded"] = { {"preprocessing-token"}, {"preprocessing-token", "whitespace-list"} }; bnf["whitespace-list"] = { {"whitespace-char"}, {"whitespace-list", "whitespace-char" } }; bnf["whitespace-char"] = { {" "}, {"\t"}, {"\n"}, {"\r"} }; Gram::Compiler compiler(bnf, "file"); debug = true; std::pair> Tree = compiler.compile(m_charTokens); debug = true; compiler.DumpTree(); return Tree; } // Phase 4: Preprocessing void CPP::preprocess() { // TODO } // Phase 5: Map chars and strings to execution charset void CPP::execution_charset_map() { // TODO } // Phase 6: Concatenate adjacent string literals void CPP::concatenate_strings() { // TODO } std::string CPP::valueOfNode(index_t node_index, const std::vector& Tree) { std::string result; std::vector todo(1, int32_t(node_index)); while (!todo.empty()) { int32_t current_index = todo.back(); todo.pop_back(); // visit node if token if (ChildIdIsToken(current_index)) { result += m_code[TokenIdFromChildId(current_index)]; } else { const TreeNode &node{Tree[current_index]}; // iterate backwards in childs, to get depth-first search in tree, from the beginning std::for_each(node.child_ids.rbegin(), node.child_ids.rend(), [&](int32_t child){ todo.push_back(child); }); } } return result; }; namespace { std::unordered_set pp_types{ "identifier", "pp-number", "character-literal", "user-defined-character-literal", "string-literal", "user-defined-string-literal", "preprocessing-op-or-punc" }; } // Phase 7.a: Create tokens from preprocessing tokens std::vector CPP::tokens_from_pptokens(std::pair> Tree) { std::vector result; // "identifier" + value -> "identifier" + value, except identifiers from table 5.11, p.14 -> keyword as value, value // "pp-number" + value -> "literal" + value // "character-literal" -> "literal" + value // "user-defined-character-literal" -> "literal" + value // "string-literal" -> "literal" + value // "user-defined-string-literal" -> "literal" + value // "preprocessing-op-or-punc" -> value+value (operator,punctuator) // TODO: traverse Tree, creating Token list std::vector todo(1, index_t(Tree.first)); while (!todo.empty()) { index_t current_index = todo.back(); todo.pop_back(); TreeNode &node{Tree.second[current_index]}; // visit node if (pp_types.find(node.type) != pp_types.end()) { // TODO std::cout << node.type << ": " << valueOfNode(current_index, Tree.second) << std::endl; } else { // only traverse further if not handled // iterate backwards in childs, to get depth-first search in tree, from the beginning std::for_each(node.child_ids.rbegin(), node.child_ids.rend(), [&](int32_t child){ if (!ChildIdIsToken(child)) todo.push_back(child); }); } } return result; } // TODO: remove in favor of tokens_from_pptokens() void CPP::PreprocessorTokensToTokens(std::vector& tokens) { for (auto& i : tokens) { if (i.type == "preprocessing-op-or-punc") i.type = i.value; } } // Phase 7.b: Grammar Analysis std::pair> analysis(std::vector) { return {0 , {}}; } // Phase 7.c: Translate void CPP::translate() { // TODO } // Phase 8: Instantiate objects void CPP::instantiate() { // TODO } // Phase 9: Link libraries void CPP::link() { // TODO } // phases of translation, according to standard void CPP::translate(const std::string& code) { #if 0 // fix signatures! source_charset_map(); backslash_escape(); preprocessing_tokenize(code); preprocess(); execution_charset_map(); concatenate_strings(); tokens_from_pptokens(); analysis(); translate(); instantiate(); link(); #endif } class CppTest: public ::testing::Test { protected: CppTest() { //debug = true; } ~CppTest() { } }; #if 0 TEST_F(CppTest, preprocessing_tokenize) { CPP cpp; auto ppTree = cpp.preprocessing_tokenize("int main() { return 1; }"); cpp.tokens_from_pptokens(ppTree); } #endif TEST_F(CppTest, preprocessing_tokenize2) { CPP cpp; auto ppTree = cpp.preprocessing_tokenize("in ma"); cpp.tokens_from_pptokens(ppTree); } #if 0 TEST(Cpp, translate) { CPP::translate(); } #endif