summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--grammer.cpp123
-rw-r--r--grammer.h27
2 files changed, 57 insertions, 93 deletions
diff --git a/grammer.cpp b/grammer.cpp
index 97930b1..3cc88fa 100644
--- a/grammer.cpp
+++ b/grammer.cpp
@@ -16,7 +16,7 @@ void Debug(std::string s)
void Compiler::clear()
{
nodes.clear();
- root_node_id = 0;
+ begin_pos = NodePosition{};
}
std::string Compiler::GetTypeOfNode(index_t node_id) const
@@ -26,39 +26,6 @@ std::string Compiler::GetTypeOfNode(index_t node_id) const
return nodes[node_id].type;
}
-bool Compiler::IsRootNode(index_t node_id) const
-{
- auto& node {nodes[node_id]};
- return node.parent_node_id == node.node_id;
-}
-
-void Compiler::Validate() const
-{
- // A program is non empty
- if (nodes.size() == 0)
- return;
- //throw std::runtime_error("Program is empty");
-
- // Consistency check for nodes
- if (root_node_id >= nodes.size())
- throw std::runtime_error("Bad root node: "s + std::to_string(root_node_id) + " vs. "s + std::to_string(nodes.size()));
-
- // Start symbol on top
- if (GetTypeOfNode(root_node_id) != m_top)
- throw std::runtime_error("Root node not start symbol!");
-
- // All nodes filled
- for (const auto& node: nodes) {
- if (node.child_ids.size() != bnf[node.type][node.variant].size())
- throw std::runtime_error("Node not filled: "s + node.type + "["s + std::to_string(node.variant) + "]"s);
- }
-}
-
-bool Compiler::rootIsStartSymbol() const
-{
- return GetTypeOfNode(root_node_id) == m_top;
-}
-
bool Gram::ChildIdIsToken(int32_t child_id)
{
return child_id < 0;
@@ -81,7 +48,6 @@ void Compiler::DumpTree()
if (nodes.size() > 0) {
if (0) {
Debug("--- Nodes ------------------------------------");
- Debug("root_node_id="s + std::to_string(root_node_id));
for (const auto& node : nodes) {
std::string line{"("s + std::to_string(node.node_id) + "):"s};
for (const auto& child : node.child_ids) {
@@ -96,7 +62,7 @@ void Compiler::DumpTree()
}
Debug("--- Tree -------------------------------------");
- std::deque<std::pair<int32_t, size_t>> todo{std::pair<int32_t, size_t>{static_cast<int32_t>(root_node_id), 0}}; // id, indent
+ std::deque<std::pair<int32_t, size_t>> todo{std::pair<int32_t, size_t>{static_cast<int32_t>(0), 0}}; // id, indent
while (!todo.empty()) {
auto [current_index, indent] {todo.front()};
todo.pop_front();
@@ -120,38 +86,41 @@ void Compiler::DumpTree()
Debug("==============================================");
}
-bool Compiler::treeIsComplete() const
+index_t Compiler::AddNode(const std::string& type, index_t variant, NodePosition pos)
{
- return nodes.size() > 0 && rootIsStartSymbol();
+ auto& list { bnf[type][variant]};
+ index_t node_id{nodes.size()};
+ if (nodes.size() > 0)
+ nodes[pos.node_id].child_ids[pos.child_pos] = node_id;
+ nodes.emplace_back(TreeNode{pos, node_id, type, variant, std::vector<int32_t>(size_t(list.size()), 0)});
+
+ Debug("AddNode(): "s + nodes[pos.node_id].type + "->"s + type + ": "s + std::to_string(node_id));
+ DumpTree();
+
+ return node_id;
}
-index_t Compiler::AddNode(const std::string& child_type, index_t parent_index)
+void Compiler::RemoveNode()
{
- TreeNode& parent {nodes[parent_index]};
- index_t index = nodes.size();
- parent.child_ids.push_back(index);
-
- index_t variant{};
- std::deque<std::pair<std::string, index_t>> alternatives;
-
- const auto& variants { bnf[child_type] };
- bool found{false};
- for (int i = 0; i < variants.size(); i++) { // variants
- if (!found) { // use first match
- variant = i;
- found = true;
- } else { // defer all others
- alternatives.emplace_back(child_type, i);
- }
- }
+ auto& pos{nodes.back().pos};
- nodes.emplace_back(TreeNode{parent_index, index, child_type, variant, std::vector<int32_t>{}});
- //root stays
+ nodes[pos.node_id].child_ids[pos.child_pos] = 0;
+ nodes.pop_back();
+}
- Debug("AddNode(): "s + nodes[parent_index].type + "->"s + child_type + ": "s + std::to_string(index));
- DumpTree();
+Compiler::AddNodeGuard::AddNodeGuard(Compiler& compiler, const std::string& type, index_t variant, NodePosition pos): m_compiler(compiler)
+{
+ m_compiler.AddNode(type, variant, pos);
+}
- return index;
+Compiler::AddNodeGuard::~AddNodeGuard()
+{
+ m_compiler.RemoveNode();
+}
+
+void Compiler::IncNodePosition(NodePosition& pos)
+{
+ // TODO
}
size_t Compiler::minimumSymbolsNeeded(std::string symbol)
@@ -197,16 +166,15 @@ size_t Compiler::minimumSymbolsNeeded(std::vector<std::string> symbol_list)
/// begin, end: indexes in tokens list
bool Compiler::match(std::vector<std::string> symbol_list, size_t begin, size_t end)
{
- // TODO: isTerminal() necessary here?
-
// match terminal symbols at start
- while (begin < end && isTerminal(bnf, tokens[begin].type) && symbol_list.size() > 0 && symbol_list.front() == tokens[begin].type) {
+ while (begin < end && symbol_list.size() > 0 && symbol_list.front() == tokens[begin].type) {
begin++;
symbol_list.erase(symbol_list.begin());
+ IncNodePosition(begin_pos); // TODO: guard?
}
// match terminal symbols at end
- while (begin < end && isTerminal(bnf, tokens[end - 1].type) && symbol_list.size() > 0 && symbol_list.back() == tokens[end - 1].type) {
+ while (begin < end && symbol_list.size() > 0 && symbol_list.back() == tokens[end - 1].type) {
end--;
symbol_list.erase(symbol_list.end() - 1);
}
@@ -218,12 +186,14 @@ bool Compiler::match(std::vector<std::string> symbol_list, size_t begin, size_t
return false;
}
- // now, symbol_list[begin .. end - 1] has size > 0 and contains non-terminal symbols at start and end
+ // now, symbol_list has size > 0 and contains non-terminal symbols at start and end
// resolve first symbol
auto it{bnf.find(symbol_list.front())};
if (it != bnf.end()) {
- for (std::vector<std::string> list: it->second) { // iterate over alternatives
+ for (size_t i = 0; i < it->second.size(); i++) { // iterate over alternatives
+ //AddNodeGuard guard(*this, symbol_list.front(), i, begin_pos);
+ std::vector<std::string> list {it->second[i]};
list.insert(list.end(), symbol_list.begin() + 1, symbol_list.end());
if (minimumSymbolsNeeded(list) > end - begin) // stop recursion
continue;
@@ -272,29 +242,12 @@ std::pair<index_t, std::vector<TreeNode>> Compiler::compile(std::vector<Token> p
if (tokens.size() == 0)
throw std::runtime_error("No tokens!");
-#if 0
- //
- // bottom-up algorithm
- //
- while (!treeIsComplete()) {
- if (!FillTree()) {
- TrackBack();
- } else if (!AddRootNode()) {
- TrackBack();
- } else if (!FillTree()) {
- TrackBack();
- }
- }
-#else
//
// top-down algorithm
//
if (!match(m_top, 0, tokens.size()))
throw std::runtime_error("Compile error.");
-#endif
-
- Validate();
- return std::pair<index_t, std::vector<TreeNode>>{root_node_id, nodes};
+ return std::pair<index_t, std::vector<TreeNode>>{0, nodes};
}
diff --git a/grammer.h b/grammer.h
index d70f675..39e6884 100644
--- a/grammer.h
+++ b/grammer.h
@@ -11,23 +11,27 @@ class GrammerTest;
namespace Gram {
+struct NodePosition {
+ index_t node_id{}; // 0-based
+ index_t child_pos{}; // 0-based
+};
+
// TreeNodes are only intermediate. Terminal symbols don't get of TreeNodes
// token_id: index into token list
// node_id: index into tree node list
struct TreeNode {
- index_t parent_node_id{}; // root if parent_node_id == node_id
+ NodePosition pos; // position of this node in tree
index_t node_id{};
std::string type;
index_t variant; // bnf[type][variant]
- std::vector<int32_t> child_ids; // < 0: terminal: token_id; >= 0: non-terminal: node_id; fill token by token
+ std::vector<int32_t> child_ids; // < 0: terminal: token_id; > 0: non-terminal: node_id; = 0: unset
};
class Compiler
{
private:
// The result
- index_t root_node_id{};
std::vector<TreeNode> nodes;
// Input
@@ -44,11 +48,18 @@ private:
// Node specific
std::string GetTypeOfNode(index_t node_id) const;
- bool IsRootNode(index_t node_id) const;
- bool rootIsStartSymbol() const;
- bool treeIsComplete() const;
- void Validate() const;
- index_t AddNode(const std::string& child_type, index_t parent_index);
+ index_t AddNode(const std::string& type, index_t variant, NodePosition pos = {});
+ void RemoveNode();
+ // Adds Node and Removes it on scope exit (RAII)
+ class AddNodeGuard {
+ Compiler& m_compiler;
+ public:
+ AddNodeGuard(Compiler& compiler, const std::string& type, index_t variant, NodePosition pos);
+ ~AddNodeGuard();
+ };
+ void IncNodePosition(NodePosition& pos);
+
+ NodePosition begin_pos;
// top-down algorithm
std::unordered_map<std::string, size_t> m_min; // cache