summaryrefslogtreecommitdiffhomepage
path: root/grammer.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'grammer.cpp')
-rw-r--r--grammer.cpp117
1 files changed, 95 insertions, 22 deletions
diff --git a/grammer.cpp b/grammer.cpp
index 3cc88fa..cd5b50f 100644
--- a/grammer.cpp
+++ b/grammer.cpp
@@ -15,8 +15,8 @@ void Debug(std::string s)
void Compiler::clear()
{
+ symbol_variants.clear();
nodes.clear();
- begin_pos = NodePosition{};
}
std::string Compiler::GetTypeOfNode(index_t node_id) const
@@ -26,11 +26,21 @@ std::string Compiler::GetTypeOfNode(index_t node_id) const
return nodes[node_id].type;
}
+bool Gram::ChildIdIsEmpty(int32_t child_id)
+{
+ return child_id == 0;
+}
+
bool Gram::ChildIdIsToken(int32_t child_id)
{
return child_id < 0;
}
+bool Gram::ChildIdIsNode(int32_t child_id)
+{
+ return child_id > 0;
+}
+
index_t Gram::TokenIdFromChildId(int32_t child_id)
{
return index_t(-child_id) - 1;
@@ -94,33 +104,53 @@ index_t Compiler::AddNode(const std::string& type, index_t variant, NodePosition
nodes[pos.node_id].child_ids[pos.child_pos] = node_id;
nodes.emplace_back(TreeNode{pos, node_id, type, variant, std::vector<int32_t>(size_t(list.size()), 0)});
- Debug("AddNode(): "s + nodes[pos.node_id].type + "->"s + type + ": "s + std::to_string(node_id));
- DumpTree();
-
return node_id;
}
-void Compiler::RemoveNode()
+Compiler::AddNodeGuard::AddNodeGuard(Compiler& compiler, index_t variant): m_compiler(compiler)
{
- auto& pos{nodes.back().pos};
-
- nodes[pos.node_id].child_ids[pos.child_pos] = 0;
- nodes.pop_back();
-}
-
-Compiler::AddNodeGuard::AddNodeGuard(Compiler& compiler, const std::string& type, index_t variant, NodePosition pos): m_compiler(compiler)
-{
- m_compiler.AddNode(type, variant, pos);
+ m_compiler.symbol_variants.push_back(variant);
}
Compiler::AddNodeGuard::~AddNodeGuard()
{
- m_compiler.RemoveNode();
+ m_compiler.symbol_variants.pop_back();
}
void Compiler::IncNodePosition(NodePosition& pos)
{
- // TODO
+ if (nodes.size() == 0) // special case: empty tree
+ return;
+ if (pos.node_id >= nodes.size())
+ throw std::runtime_error("ICE: NodePosition with node_id "s + std::to_string(pos.node_id) + " doesn't exist."s);
+ if (pos.child_pos >= nodes[pos.node_id].child_ids.size())
+ throw std::runtime_error("ICE: NodePosition with child_pos "s + std::to_string(pos.child_pos) + " in node_id "s + std::to_string(pos.node_id) + " doesn't exist."s);
+
+ int32_t child_id{nodes[pos.node_id].child_ids[pos.child_pos]};
+
+ if (ChildIdIsEmpty(child_id))
+ throw std::runtime_error("ICE: NodePosition is empty");
+
+ // Actually, advance
+ if (ChildIdIsToken(child_id)) {
+ pos.child_pos++;
+ } else {
+ pos.node_id = child_id;
+ pos.child_pos = 0;
+ }
+
+ // Go to parent if child ids completely traversed
+ while (pos.node_id > 0 && pos.child_pos >= nodes[pos.node_id].child_ids.size()) {
+ pos = nodes[pos.node_id].pos;
+ pos.child_pos++;
+ }
+
+ // Advancing at root node for last child is allowed: Finished
+ if (pos.child_pos >= nodes[pos.node_id].child_ids.size())
+ return;
+
+ if (ChildIdIsNode(nodes[pos.node_id].child_ids[pos.child_pos]))
+ throw std::runtime_error("ICE: No node expected at "s + std::to_string(pos.node_id) + "/"s + std::to_string(pos.child_pos));
}
size_t Compiler::minimumSymbolsNeeded(std::string symbol)
@@ -170,7 +200,6 @@ bool Compiler::match(std::vector<std::string> symbol_list, size_t begin, size_t
while (begin < end && symbol_list.size() > 0 && symbol_list.front() == tokens[begin].type) {
begin++;
symbol_list.erase(symbol_list.begin());
- IncNodePosition(begin_pos); // TODO: guard?
}
// match terminal symbols at end
@@ -181,8 +210,11 @@ bool Compiler::match(std::vector<std::string> symbol_list, size_t begin, size_t
// matching of empty lists
if (symbol_list.size() == 0) {
- if (begin == end) // only match real empty list
+ if (begin == end) { // only match real empty list
+ // this is the point of the final match
+ constructTree();
return true;
+ }
return false;
}
@@ -192,7 +224,7 @@ bool Compiler::match(std::vector<std::string> symbol_list, size_t begin, size_t
auto it{bnf.find(symbol_list.front())};
if (it != bnf.end()) {
for (size_t i = 0; i < it->second.size(); i++) { // iterate over alternatives
- //AddNodeGuard guard(*this, symbol_list.front(), i, begin_pos);
+ AddNodeGuard guard(*this, i);
std::vector<std::string> list {it->second[i]};
list.insert(list.end(), symbol_list.begin() + 1, symbol_list.end());
if (minimumSymbolsNeeded(list) > end - begin) // stop recursion
@@ -214,6 +246,42 @@ bool Compiler::match(std::string symbol, size_t begin, size_t end)
return match(symbol_list, begin, end);
}
+void Compiler::constructTree()
+{
+ symbol_variants_it = symbol_variants.begin();
+
+ m_symbol_list = {m_top};
+ m_symbol_list_pos = 0;
+
+ NodePosition tree_pos;
+
+ while (m_symbol_list_pos < m_symbol_list.size()) {
+ std::string symbol{m_symbol_list[m_symbol_list_pos]};
+
+ if (isTerminal(bnf, symbol)) {
+ // Advance terminal symbol
+ nodes[tree_pos.node_id].child_ids[tree_pos.child_pos] = ChildIdFromTokenId(m_symbol_list_pos);
+ IncNodePosition(tree_pos);
+ m_symbol_list_pos++;
+ } else {
+ // Replace non-terminal symbol
+ m_symbol_list.erase(m_symbol_list.begin() + m_symbol_list_pos);
+ std::vector<std::string> list {bnf[symbol][*symbol_variants_it]};
+ m_symbol_list.insert(m_symbol_list.begin() + m_symbol_list_pos, list.begin(), list.end());
+
+ index_t node_id {AddNode(symbol, *symbol_variants_it, tree_pos)};
+
+ if (node_id > 0) {
+ nodes[tree_pos.node_id].child_ids[tree_pos.child_pos] = node_id;
+ IncNodePosition(tree_pos);
+ }
+
+ symbol_variants_it++;
+ }
+ }
+
+}
+
Compiler::Compiler(BNF& bnf, const std::string& top): bnf(bnf), m_top(top), ReverseBNF{Reverse(bnf)}, reversedFirst{reverseFirst(bnf)}
{
//std::cout << "DEBUG: " << m_top << std::endl;
@@ -234,7 +302,7 @@ Compiler::Compiler(BNF& bnf, const std::string& top): bnf(bnf), m_top(top), Reve
minimumSymbolsNeeded("translation-unit");
}
-std::pair<index_t, std::vector<TreeNode>> Compiler::compile(std::vector<Token> p_tokens)
+std::vector<TreeNode> Compiler::compile(std::vector<Token> p_tokens)
{
clear();
tokens = p_tokens;
@@ -243,11 +311,16 @@ std::pair<index_t, std::vector<TreeNode>> Compiler::compile(std::vector<Token> p
throw std::runtime_error("No tokens!");
//
- // top-down algorithm
+ // top-down algorithm:
+ //
+ // 1. Match linear tokens list to bnf, building up list of used variants (symbol_variants)
+ // 2. Construct Node Tree from symbol_variants
//
if (!match(m_top, 0, tokens.size()))
throw std::runtime_error("Compile error.");
- return std::pair<index_t, std::vector<TreeNode>>{0, nodes};
+ //DumpTree();
+
+ return nodes;
}