summaryrefslogtreecommitdiffhomepage
path: root/grammer.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'grammer.cpp')
-rw-r--r--grammer.cpp58
1 files changed, 34 insertions, 24 deletions
diff --git a/grammer.cpp b/grammer.cpp
index 50617ce..d7afaef 100644
--- a/grammer.cpp
+++ b/grammer.cpp
@@ -92,7 +92,7 @@ void Compiler::DumpTree()
index_t Compiler::AddNode(const std::string& type, index_t variant, NodePosition pos)
{
- auto& list { bnf[type][variant]};
+ auto& list { m_bnf[type][variant]};
index_t node_id{nodes.size()};
if (nodes.size() > 0)
nodes[pos.node_id].child_ids[pos.child_pos] = node_id;
@@ -149,7 +149,7 @@ void Compiler::IncNodePosition(NodePosition& pos)
size_t Compiler::minimumSymbolsNeeded(const std::string& symbol)
{
- if (isTerminal(bnf, symbol)) {
+ if (isTerminal(m_bnf, symbol)) {
return 1;
} else {
auto it_min{m_min.find(symbol)};
@@ -157,8 +157,8 @@ size_t Compiler::minimumSymbolsNeeded(const std::string& symbol)
return it_min->second;
m_min[symbol] = std::numeric_limits<size_t>::max();
- auto it{bnf.find(symbol)};
- if (it != bnf.end()) {
+ auto it{m_bnf.find(symbol)};
+ if (it != m_bnf.end()) {
size_t minimum{std::numeric_limits<size_t>::max()};
for (const auto& list: it->second) {
@@ -196,7 +196,7 @@ bool Compiler::match(std::vector<std::string> symbol_list, size_t begin, size_t
symbol_list.erase(symbol_list.begin());
}
- // matching of empty lists
+ // matching of empty list in non-terminals
if (symbol_list.size() == 0) {
if (begin == end) { // only match real empty list
// this is the point of the final match
@@ -206,6 +206,19 @@ bool Compiler::match(std::vector<std::string> symbol_list, size_t begin, size_t
return false;
}
+ // matching of empty list in terminals
+ if (begin == end) {
+ const auto& symbol{symbol_list.front()};
+ auto it{m_empty_lut.find(symbol)};
+ if (it == m_empty_lut.end()) // can't match empty tokens list with this nt-symbol
+ return false;
+
+ AddNodeGuard guard(*this, it->second);
+ std::vector<std::string> list {m_bnf[symbol][it->second]};
+ list.insert(list.end(), symbol_list.begin() + 1, symbol_list.end());
+ return match(list, begin, end);
+ }
+
// now, symbol_list has size > 0 and contains non-terminal symbols at start and end
// resolve first symbol
@@ -213,12 +226,11 @@ bool Compiler::match(std::vector<std::string> symbol_list, size_t begin, size_t
if (it != m_match_lut.end()) {
for (size_t i: it->second) {
AddNodeGuard guard(*this, i);
- std::vector<std::string> list {bnf[symbol_list.front()][i]};
+ std::vector<std::string> list {m_bnf[symbol_list.front()][i]};
list.insert(list.end(), symbol_list.begin() + 1, symbol_list.end());
if (minimumSymbolsNeeded(list) > end - begin) // stop recursion
continue;
- // TODO: recurse last?
if (match(list, begin, end))
return true;
}
@@ -236,7 +248,7 @@ bool Compiler::match(std::string symbol, size_t begin, size_t end)
void Compiler::fillStartCache()
{
- std::unordered_set<std::string> terminals {getTerminals(bnf)};
+ std::unordered_set<std::string> terminals {getTerminals(m_bnf)};
{ // follow terminal symbols
for (const std::string& terminal: terminals) {
@@ -273,12 +285,13 @@ void Compiler::fillStartCache()
}
{ // follow empty non-terminal symbols, and combine all found non-terminals with all terminals
- std::unordered_set<std::pair<std::string, size_t>, PairHash> current_set {getEmptyPositions(bnf)};
+ std::unordered_set<std::pair<std::string, size_t>, PairHash> current_set {getEmptyPositions(m_bnf)};
std::unordered_set<std::pair<std::string, size_t>, PairHash> done_set;
std::unordered_set<std::pair<std::string, size_t>, PairHash> next_set;
do {
for (const auto& current_pos: current_set) {
+ m_empty_lut[current_pos.first] = current_pos.second;
for (const std::string& terminal: terminals) {
auto it_lut {m_match_lut.find({current_pos.first, terminal})};
if (it_lut == m_match_lut.end()) { // add new list
@@ -286,7 +299,6 @@ void Compiler::fillStartCache()
} else { // extend list
it_lut->second.emplace_back(current_pos.second);
}
-
}
auto it {reversedPosFirst.find(current_pos.first)};
@@ -317,24 +329,24 @@ void Compiler::constructTree()
{
symbol_variants_it = symbol_variants.begin();
- m_symbol_list = {m_top};
- m_symbol_list_pos = 0;
+ std::vector<std::string> symbol_list{m_top};
+ index_t symbol_list_pos{0};
NodePosition tree_pos;
- while (m_symbol_list_pos < m_symbol_list.size()) {
- std::string symbol{m_symbol_list[m_symbol_list_pos]};
+ while (symbol_list_pos < symbol_list.size()) {
+ std::string symbol{symbol_list[symbol_list_pos]};
- if (isTerminal(bnf, symbol)) {
+ if (isTerminal(m_bnf, symbol)) {
// Advance terminal symbol
- nodes[tree_pos.node_id].child_ids[tree_pos.child_pos] = ChildIdFromTokenId(m_symbol_list_pos);
+ nodes[tree_pos.node_id].child_ids[tree_pos.child_pos] = ChildIdFromTokenId(symbol_list_pos);
IncNodePosition(tree_pos);
- m_symbol_list_pos++;
+ symbol_list_pos++;
} else {
// Replace non-terminal symbol
- m_symbol_list.erase(m_symbol_list.begin() + m_symbol_list_pos);
- std::vector<std::string> list {bnf[symbol][*symbol_variants_it]};
- m_symbol_list.insert(m_symbol_list.begin() + m_symbol_list_pos, list.begin(), list.end());
+ symbol_list.erase(symbol_list.begin() + symbol_list_pos);
+ std::vector<std::string> list {m_bnf[symbol][*symbol_variants_it]};
+ symbol_list.insert(symbol_list.begin() + symbol_list_pos, list.begin(), list.end());
index_t node_id {AddNode(symbol, *symbol_variants_it, tree_pos)};
@@ -350,11 +362,9 @@ void Compiler::constructTree()
}
Compiler::Compiler(BNF& bnf, const std::string& top)
- : bnf(bnf)
+ : m_bnf {removeHeadRecursion(bnf)}
, m_top(top)
- //, ReverseBNF{Reverse(bnf)}
- //, reversedFirst{reverseFirst(bnf)}
- , reversedPosFirst{reversePosFirst(bnf)}
+ , reversedPosFirst{reversePosFirst(m_bnf)}
{
//
// prepare helper cache (TODO: remove this ugly workaround for remaining bad marker elements)