summaryrefslogtreecommitdiffhomepage
path: root/lexer.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lexer.cpp')
-rw-r--r--lexer.cpp76
1 files changed, 64 insertions, 12 deletions
diff --git a/lexer.cpp b/lexer.cpp
index 61caae1..9ef29b1 100644
--- a/lexer.cpp
+++ b/lexer.cpp
@@ -1,5 +1,7 @@
#include "lexer.h"
+#include <algorithm>
+
using namespace Lex;
size_t Lexer::newState(std::string state_type)
@@ -19,18 +21,31 @@ void Lexer::addTransition(size_t state0, size_t state1, char c)
}
}
-std::vector<std::pair<size_t, char>> Lexer::getSuccessors(size_t state)
+void Lexer::removeTransition(size_t state0, size_t state1, char c)
+{
+ auto it{transitions.find(state0)};
+
+ if (it != transitions.end()) {
+ std::pair<size_t, char> reference{state1, c};
+ it->second.erase(std::remove(it->second.begin(), it->second.end(), reference),
+ it->second.end());
+ } else {
+ throw std::runtime_error("Transition not found: "s + std::to_string(state0) + "->"s + std::to_string(state1));
+ }
+}
+
+std::vector<std::pair<size_t, char>> Lexer::getSuccessorsViaEmpty(size_t state)
{
std::vector<std::pair<size_t, char>> result;
auto it{transitions.find(state)};
- if (it != transitions.end()) { // new list entry
+ if (it != transitions.end()) {
for (auto &i: it->second) {
- if (i.first == m_endState || i.second != '\0') { // add transition to end state or transition via actual char
+ if (i.second == '\0') { // add empty transitions
result.push_back(i);
- } else { // follow empty transition
- auto successors { getSuccessors(i.first) };
+ // and more, recursively
+ auto successors { getSuccessorsViaEmpty(i.first) };
result.insert(result.end(), successors.begin(), successors.end());
}
}
@@ -39,6 +54,17 @@ std::vector<std::pair<size_t, char>> Lexer::getSuccessors(size_t state)
return result;
}
+std::vector<std::pair<size_t, char>> Lexer::getSuccessors(size_t state)
+{
+ auto it{transitions.find(state)};
+
+ if (it != transitions.end()) {
+ return it->second;
+ }
+
+ return {};
+}
+
// Helper function
void Lexer::addPathOrTransition(size_t state0, size_t state1, std::string symbol, std::string type)
{
@@ -102,10 +128,38 @@ void Lexer::addPath(size_t state0, size_t state1, std::string s, std::string typ
addTransition(state01, state1, 0); // empty transition to end
}
-Lexer::Lexer(const BNF& bnf, const std::string& top): m_bnf(bnf), m_top(top)
- , m_startState(newState()), m_endState(newState())
+void Lexer::replaceEmptyTransitions()
+{
+ // iterate over all transitions
+ for (auto& [state0, list]: transitions) {
+ std::vector<std::pair<size_t, char>> list_extension; // which elements must be added to list
+ for (auto& [state1, currentChar]: list) {
+ if (currentChar != '\0') { // for every non-empty transition
+ // add extension via following empty transitions
+ auto successors { getSuccessorsViaEmpty(state1) };
+ for (auto &[state, dummyChar] : successors) {
+ list_extension.emplace_back(state, currentChar);
+ }
+ }
+ }
+ list.insert(list.end(), list_extension.begin(), list_extension.end());
+ }
+}
+
+void Lexer::removeEmptyTransitions()
+{
+ for (auto& [state0, list]: transitions) {
+ list.erase(std::remove_if(list.begin(), list.end(), [](const std::pair<size_t, char>& pair){return pair.second == '\0';}),
+ list.end());
+ }
+}
+
+Lexer::Lexer(const BNF& bnf, const std::string& top): m_bnf(bnf), m_top(top), m_startState(newState()), m_endState(newState())
{
addPath(m_startState, m_endState, m_top, "");
+
+ replaceEmptyTransitions();
+ removeEmptyTransitions();
}
Token Lexer::getToken(const std::string& s, Location& location)
@@ -127,15 +181,13 @@ Token Lexer::getToken(const std::string& s, Location& location)
std::vector<std::pair<size_t, char>> successors{getSuccessors(state)};
for (const auto& [nextState, c]: successors) {
if (c == currentChar) {
- newStates.push_back(nextState);
if (nextState == m_endState) { // save intermediate result upon match
found = location;
found.advance();
state_type = m_state_types[state];
+ } else {
+ newStates.push_back(nextState);
}
- } else if (nextState == m_endState) { // save intermediate result w/o match of c
- found = location;
- state_type = m_state_types[state];
}
}
}
@@ -149,7 +201,7 @@ Token Lexer::getToken(const std::string& s, Location& location)
if (found.pos > 0)
std::cout << "DEBUG: Matched " << found.pos - oldLocation.pos << " chars: " << value << "|" << state_type << std::endl;
else
- throw std::runtime_error("Tokenize error at "s + oldLocation.toString());
+ throw std::runtime_error("Bad Token at "s + oldLocation.toString());
location = found; // reset to end of match