summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorRoland Reichwein <mail@reichwein.it>2020-10-31 18:10:58 +0100
committerRoland Reichwein <mail@reichwein.it>2020-10-31 18:10:58 +0100
commit8256280b348b4b53fff35c9101ced0a8dfb2c58e (patch)
treee745b79e98c853cf1891372e8b2a926a5d81fff5
parentce77838c4f32b9dc237f0c4b17d1f1e1741254d4 (diff)
CPP::translate() (WIP), documentation, bugfixing
-rw-r--r--Makefile2
-rw-r--r--TODO10
-rw-r--r--cpp.cpp50
-rw-r--r--cpp.h10
-rw-r--r--grammer.cpp4
-rw-r--r--grammer.h4
-rw-r--r--minicc.cpp8
-rw-r--r--minicc.h8
8 files changed, 57 insertions, 39 deletions
diff --git a/Makefile b/Makefile
index 237548b..a600db3 100644
--- a/Makefile
+++ b/Makefile
@@ -89,7 +89,7 @@ TESTSRC=\
SRC=$(PROGSRC) mcc.cpp
all: test-$(PROJECTNAME) mcc
- ./test-$(PROJECTNAME)
+ ./test-$(PROJECTNAME) # --gtest_filter='*preprocessing_tokenize*'
# testsuite ----------------------------------------------
test-$(PROJECTNAME): $(TESTSRC:.cpp=.o)
diff --git a/TODO b/TODO
index 58b4f2b..faafda7 100644
--- a/TODO
+++ b/TODO
@@ -1,9 +1 @@
-
- Token() = default;
- Token(const std::string& s) { type = s; } // Assign type via "=" from string
-
-start symbol implicitly from bnf
-validate bnf: no empty types or values, or empty target lists
-
-map -> unordered_map
-set -> unordered_set
+Update cppbnf.cpp to n4860
diff --git a/cpp.cpp b/cpp.cpp
index 6e8a28e..563ba4c 100644
--- a/cpp.cpp
+++ b/cpp.cpp
@@ -11,6 +11,7 @@
#include <gmock/gmock.h>
#include <functional>
+#include <optional>
#include <unordered_set>
#include <unordered_map>
#include <filesystem>
@@ -19,7 +20,16 @@ using namespace Gram;
namespace fs = std::filesystem;
-CPP::CPP(){}
+CPP::CPP(): map_translation_unit ({
+ {"/translation-unit/top-level-declaration-seq/top-level-declaration/declaration/function-definition",
+ [&](fs::path& path, index_t node_id)
+ {
+ //std::cout << "DEBUG: " << path << ", " << node_id << ", " << valueOfNode(node_id, m_nodes) << ", " << m_nodes[node_id].node_id << ", " << m_nodes[node_id].pos.node_id << std::endl;
+ }
+ },
+})
+{
+}
CPP::~CPP(){}
@@ -66,6 +76,8 @@ void CPP::concatenate_strings()
std::string CPP::valueOfNode(index_t node_index, const std::vector<TreeNode>& Tree)
{
std::string result;
+ std::optional<size_t> pos0;
+ index_t last_index;
std::vector<int32_t> todo(1, int32_t(node_index));
@@ -75,7 +87,10 @@ std::string CPP::valueOfNode(index_t node_index, const std::vector<TreeNode>& Tr
// visit node if token
if (ChildIdIsToken(current_index)) {
- result += m_code[TokenIdFromChildId(current_index)];
+ if (!pos0) {
+ pos0 = m_tokens[TokenIdFromChildId(current_index)].location.pos;
+ }
+ last_index = TokenIdFromChildId(current_index);
} else {
const TreeNode &node{Tree[current_index]};
@@ -87,7 +102,10 @@ std::string CPP::valueOfNode(index_t node_index, const std::vector<TreeNode>& Tr
}
}
- return result;
+ if (!pos0)
+ throw std::runtime_error("ICE: Node value not available");
+
+ return m_code.substr(*pos0, m_tokens[last_index].location.pos - *pos0) + m_tokens[last_index].value;
};
namespace {
@@ -203,16 +221,16 @@ std::vector<Token> CPP::tokens_from_pptokens(const std::vector<Token>& pp_tokens
if (pp_types.find(token.type) != pp_types.end()) {
if (token.type == "identifier") {
if (keywords.find(token.value) != keywords.end())
- result.emplace_back(Token{token.value, token.value});
+ result.emplace_back(Token{token.value, token.value, token.location});
else
- result.emplace_back(Token{"identifier"s, token.value});
+ result.emplace_back(Token{"identifier"s, token.value, token.location});
}
else if (token.type == "preprocessing-op-or-punc")
- result.emplace_back(Token{token.value, token.value});
+ result.emplace_back(Token{token.value, token.value, token.location});
else
- result.emplace_back(Token{"literal", token.value});
+ result.emplace_back(Token{"literal", token.value, token.location});
} else
- throw std::runtime_error("Unhandled preprocessing token: "s + token.value + " ("s + token.type + ")"s);
+ throw std::runtime_error("Unhandled preprocessing token: "s + token.toString());
}
return result;
}
@@ -227,14 +245,6 @@ std::vector<Gram::TreeNode> CPP::analysis(const std::vector<Token>& tokens)
return compiler.compile(tokens);
}
-namespace {
-
- CPP::map_type map_translation_unit {
- {"/translation-unit/top-level-declaration-seq/top-level-declaration/declaration/function-definition", [](){}},
- };
-
-} // anonymous namespace
-
void CPP::traverse(index_t node_id, map_type& map, fs::path parent_path)
{
fs::path current_path{parent_path / m_nodes[node_id].type};
@@ -242,7 +252,7 @@ void CPP::traverse(index_t node_id, map_type& map, fs::path parent_path)
// execute callbacks
auto it{map.find(current_path.generic_string())};
if (it != map.end()) {
- std::cout << "DEBUG: Found " << current_path << std::endl;
+ it->second(current_path, node_id);
}
// recurse tree
@@ -277,6 +287,8 @@ void CPP::link()
// phases of translation, according to standard
void CPP::compile(const std::string& code)
{
+ m_code = code;
+
source_charset_map(); // phase 1
backslash_escape(); // phase 2
@@ -289,8 +301,8 @@ void CPP::compile(const std::string& code)
concatenate_strings(); // phase 6
- auto tokens = tokens_from_pptokens(pp_tokens); // phase 7a
- m_nodes = analysis(tokens); // phase 7b
+ m_tokens = tokens_from_pptokens(pp_tokens); // phase 7a
+ m_nodes = analysis(m_tokens); // phase 7b
translate(); // phase 7c
instantiate(); // phase 8
diff --git a/cpp.h b/cpp.h
index 3d82751..6db7146 100644
--- a/cpp.h
+++ b/cpp.h
@@ -33,13 +33,15 @@ public:
std::vector<uint8_t> getCode();
std::vector<uint8_t> getData();
- typedef std::unordered_map<std::string, std::function<void()>> map_type;
-
private:
- std::string m_code; // input / start
- std::vector<Token> m_charTokens; // result of phase 3
+ typedef std::unordered_map<std::string, std::function<void(fs::path&, index_t)>> map_type;
+
+ std::string m_code; // input from compile()
+ std::vector<Token> m_tokens; // result of phase 7.a
std::vector<Gram::TreeNode> m_nodes; // result of phase 7.b
void traverse(index_t node_id, map_type& map, fs::path parent_path = "/");
+
+ CPP::map_type map_translation_unit;
};
diff --git a/grammer.cpp b/grammer.cpp
index d7afaef..31a4bbf 100644
--- a/grammer.cpp
+++ b/grammer.cpp
@@ -74,7 +74,9 @@ void Compiler::DumpTree()
std::string line(indent, ' ');
if (ChildIdIsToken(current_index)) {
index_t token_id {TokenIdFromChildId(current_index)};
- line += "Token("s + std::to_string(token_id) + "): "s + tokens[token_id].type + "("s + tokens[token_id].value + ")"s;
+ line += "Token("s + std::to_string(token_id) + "): "s + tokens[token_id].type;
+ if (tokens[token_id].value != tokens[token_id].type)
+ line += "("s + tokens[token_id].value + ")"s;
} else {
auto& node {nodes[current_index]};
line += "Node("s + std::to_string(current_index) + "): "s + node.type + "/" + std::to_string(node.variant);
diff --git a/grammer.h b/grammer.h
index a8a3356..e179a9e 100644
--- a/grammer.h
+++ b/grammer.h
@@ -22,8 +22,8 @@ struct NodePosition {
// token_id: index into token list
// node_id: index into tree node list
struct TreeNode {
- NodePosition pos; // position of this node in tree
- index_t node_id{};
+ NodePosition pos; // position of this node in tree (i.e. parent node_id + child_pos in parent)
+ index_t node_id{}; // this node's id
std::string type;
index_t variant; // bnf[type][variant]
diff --git a/minicc.cpp b/minicc.cpp
index d180517..88cb6ab 100644
--- a/minicc.cpp
+++ b/minicc.cpp
@@ -50,7 +50,13 @@ void Location::advance(bool newline)
}
}
-std::string Location::toString()
+std::string Location::toString() const
{
return std::to_string(line) + ":"s + std::to_string(column);
}
+
+std::string Token::toString() const
+{
+ return location.toString() + ": "s + value + " ("s + type + ")"s;
+}
+
diff --git a/minicc.h b/minicc.h
index 92678a1..28b494a 100644
--- a/minicc.h
+++ b/minicc.h
@@ -1,3 +1,5 @@
+// Common definitions
+
#pragma once
#include <cstdlib>
@@ -19,8 +21,8 @@ struct Location {
size_t column{1};
size_t pos{0};
- void advance(bool newline = false);
- std::string toString();
+ void advance(bool newline = false); ///< advance 1 char
+ std::string toString() const;
};
bool operator==(const Location &a, const Location &b);
@@ -29,6 +31,8 @@ struct Token {
std::string type;
std::string value;
Location location;
+
+ std::string toString() const;
};
// For printing via Google Test