summaryrefslogtreecommitdiffhomepage
path: root/lexer.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lexer.cpp')
-rw-r--r--lexer.cpp62
1 files changed, 39 insertions, 23 deletions
diff --git a/lexer.cpp b/lexer.cpp
index 9ef29b1..0944738 100644
--- a/lexer.cpp
+++ b/lexer.cpp
@@ -4,9 +4,8 @@
using namespace Lex;
-size_t Lexer::newState(std::string state_type)
+size_t Lexer::newState()
{
- m_state_types.push_back(state_type);
return states++;
}
@@ -66,19 +65,19 @@ std::vector<std::pair<size_t, char>> Lexer::getSuccessors(size_t state)
}
// Helper function
-void Lexer::addPathOrTransition(size_t state0, size_t state1, std::string symbol, std::string type)
+void Lexer::addPathOrTransition(size_t state0, size_t state1, std::string symbol)
{
if (isTerminal(m_bnf, symbol)) { // end recursion with new transition
if (symbol.size() != 1)
throw std::runtime_error("Bad sized terminal symbol: "s + symbol);
addTransition(state0, state1, symbol[0]);
} else { // recurse via non-terminal symbol
- addPath(state0, state1, symbol, type);
+ addPath(state0, state1, symbol);
}
}
// Helper function: add one rule
-void Lexer::addRule(const std::vector<std::string>& list, size_t list_index_from, size_t list_index_to, size_t state0, size_t state1, const std::string& rule_symbol, std::string type)
+void Lexer::addRule(const std::vector<std::string>& list, size_t list_index_from, size_t list_index_to, size_t state0, size_t state1, const std::string& rule_symbol)
{
size_t previousState{state0};
@@ -88,27 +87,24 @@ void Lexer::addRule(const std::vector<std::string>& list, size_t list_index_from
if (symbol == rule_symbol)
throw std::runtime_error("Recursion found but not allowed");
- size_t state{newState(type)};
- addPathOrTransition(previousState, state, symbol, type);
+ size_t state{newState()};
+ addPathOrTransition(previousState, state, symbol);
previousState = state;
}
if (list.back() == rule_symbol)
throw std::runtime_error("Recursion found but not allowed");
// last transition
- addPathOrTransition(previousState, state1, list.back(), type);
+ addPathOrTransition(previousState, state1, list.back());
}
// Add paths between state0 and state1, including new states and transitions
-void Lexer::addPath(size_t state0, size_t state1, std::string s, std::string type)
+void Lexer::addPath(size_t state0, size_t state1, std::string s)
{
- if (type == "" && s != "" && s != m_top)
- type = s;
-
// state0 -> [paths] -> state01 -> state1
// ^ v
// [recursion paths]
- size_t state01{newState(type)};
+ size_t state01{newState()};
auto it {m_bnf.find(s)};
if (it == m_bnf.end())
@@ -120,9 +116,9 @@ void Lexer::addPath(size_t state0, size_t state1, std::string s, std::string typ
throw std::runtime_error("List too small in rule "s + s);
if (list[0] == s) { // recursion rule
- addRule(list, 1, list_size, state01, state01, s, type);
+ addRule(list, 1, list_size, state01, state01, s);
} else { // non-recursion rule
- addRule(list, 0, list_size, state0, state01, s, type);
+ addRule(list, 0, list_size, state0, state01, s);
}
}
addTransition(state01, state1, 0); // empty transition to end
@@ -154,14 +150,35 @@ void Lexer::removeEmptyTransitions()
}
}
-Lexer::Lexer(const BNF& bnf, const std::string& top): m_bnf(bnf), m_top(top), m_startState(newState()), m_endState(newState())
+Lexer::Lexer(const BNF& bnf, const std::string& top): m_bnf(bnf), m_top(top), m_startState(newState())
{
- addPath(m_startState, m_endState, m_top, "");
+ // types are the second level symbols, directly under top
+ auto it {m_bnf.find(m_top)};
+ if (it == m_bnf.end())
+ throw std::runtime_error("Start symbol "s + m_top + " not found."s);
+
+ auto& list {it->second};
+
+ for (auto& element : list) {
+ if (element.size() != 1)
+ throw std::runtime_error("Bad type rule in "s + m_top + ": size = "s + std::to_string(element.size()));
+
+ auto endState{newState()};
+ std::string type{element[0]};
+ m_state_types[endState] = type;
+
+ addPath(m_startState, endState, type);
+ }
replaceEmptyTransitions();
removeEmptyTransitions();
}
+bool Lexer::isEndState(size_t state)
+{
+ return m_state_types.find(state) != m_state_types.end();
+}
+
Token Lexer::getToken(const std::string& s, Location& location)
{
Location oldLocation{location}; // start of token
@@ -175,16 +192,16 @@ Token Lexer::getToken(const std::string& s, Location& location)
// match as much as possible
while (location.pos < s.size() && states.size() > 0) {
char currentChar{s[location.pos]};
- std::cout << "DEBUG: Char: " << currentChar << std::endl;
+ //std::cout << "DEBUG: Char: " << currentChar << std::endl;
for (const auto& state: states) {
std::vector<std::pair<size_t, char>> successors{getSuccessors(state)};
for (const auto& [nextState, c]: successors) {
if (c == currentChar) {
- if (nextState == m_endState) { // save intermediate result upon match
+ if (isEndState(nextState)) { // save intermediate result upon match
found = location;
found.advance();
- state_type = m_state_types[state];
+ state_type = m_state_types[nextState];
} else {
newStates.push_back(nextState);
}
@@ -198,10 +215,9 @@ Token Lexer::getToken(const std::string& s, Location& location)
std::string value {s.substr(oldLocation.pos, found.pos - oldLocation.pos)};
- if (found.pos > 0)
- std::cout << "DEBUG: Matched " << found.pos - oldLocation.pos << " chars: " << value << "|" << state_type << std::endl;
- else
+ if (found.pos == 0)
throw std::runtime_error("Bad Token at "s + oldLocation.toString());
+ //else std::cout << "DEBUG: Matched " << found.pos - oldLocation.pos << " chars: " << value << "|" << state_type << std::endl;
location = found; // reset to end of match