From 9502989aec80e8f75cf14e7dd7d1d85333090866 Mon Sep 17 00:00:00 2001 From: Roland Reichwein Date: Sun, 15 Mar 2020 21:47:42 +0100 Subject: Fix lex error --- lexer.cpp | 76 +++++++++++++++++++++++++++++++++++++++++++++++++++++---------- lexer.h | 5 +++++ 2 files changed, 69 insertions(+), 12 deletions(-) diff --git a/lexer.cpp b/lexer.cpp index 61caae1..9ef29b1 100644 --- a/lexer.cpp +++ b/lexer.cpp @@ -1,5 +1,7 @@ #include "lexer.h" +#include + using namespace Lex; size_t Lexer::newState(std::string state_type) @@ -19,18 +21,31 @@ void Lexer::addTransition(size_t state0, size_t state1, char c) } } -std::vector> Lexer::getSuccessors(size_t state) +void Lexer::removeTransition(size_t state0, size_t state1, char c) +{ + auto it{transitions.find(state0)}; + + if (it != transitions.end()) { + std::pair reference{state1, c}; + it->second.erase(std::remove(it->second.begin(), it->second.end(), reference), + it->second.end()); + } else { + throw std::runtime_error("Transition not found: "s + std::to_string(state0) + "->"s + std::to_string(state1)); + } +} + +std::vector> Lexer::getSuccessorsViaEmpty(size_t state) { std::vector> result; auto it{transitions.find(state)}; - if (it != transitions.end()) { // new list entry + if (it != transitions.end()) { for (auto &i: it->second) { - if (i.first == m_endState || i.second != '\0') { // add transition to end state or transition via actual char + if (i.second == '\0') { // add empty transitions result.push_back(i); - } else { // follow empty transition - auto successors { getSuccessors(i.first) }; + // and more, recursively + auto successors { getSuccessorsViaEmpty(i.first) }; result.insert(result.end(), successors.begin(), successors.end()); } } @@ -39,6 +54,17 @@ std::vector> Lexer::getSuccessors(size_t state) return result; } +std::vector> Lexer::getSuccessors(size_t state) +{ + auto it{transitions.find(state)}; + + if (it != transitions.end()) { + return it->second; + } + + return {}; +} + // Helper function void Lexer::addPathOrTransition(size_t state0, size_t state1, std::string symbol, std::string type) { @@ -102,10 +128,38 @@ void Lexer::addPath(size_t state0, size_t state1, std::string s, std::string typ addTransition(state01, state1, 0); // empty transition to end } -Lexer::Lexer(const BNF& bnf, const std::string& top): m_bnf(bnf), m_top(top) - , m_startState(newState()), m_endState(newState()) +void Lexer::replaceEmptyTransitions() +{ + // iterate over all transitions + for (auto& [state0, list]: transitions) { + std::vector> list_extension; // which elements must be added to list + for (auto& [state1, currentChar]: list) { + if (currentChar != '\0') { // for every non-empty transition + // add extension via following empty transitions + auto successors { getSuccessorsViaEmpty(state1) }; + for (auto &[state, dummyChar] : successors) { + list_extension.emplace_back(state, currentChar); + } + } + } + list.insert(list.end(), list_extension.begin(), list_extension.end()); + } +} + +void Lexer::removeEmptyTransitions() +{ + for (auto& [state0, list]: transitions) { + list.erase(std::remove_if(list.begin(), list.end(), [](const std::pair& pair){return pair.second == '\0';}), + list.end()); + } +} + +Lexer::Lexer(const BNF& bnf, const std::string& top): m_bnf(bnf), m_top(top), m_startState(newState()), m_endState(newState()) { addPath(m_startState, m_endState, m_top, ""); + + replaceEmptyTransitions(); + removeEmptyTransitions(); } Token Lexer::getToken(const std::string& s, Location& location) @@ -127,15 +181,13 @@ Token Lexer::getToken(const std::string& s, Location& location) std::vector> successors{getSuccessors(state)}; for (const auto& [nextState, c]: successors) { if (c == currentChar) { - newStates.push_back(nextState); if (nextState == m_endState) { // save intermediate result upon match found = location; found.advance(); state_type = m_state_types[state]; + } else { + newStates.push_back(nextState); } - } else if (nextState == m_endState) { // save intermediate result w/o match of c - found = location; - state_type = m_state_types[state]; } } } @@ -149,7 +201,7 @@ Token Lexer::getToken(const std::string& s, Location& location) if (found.pos > 0) std::cout << "DEBUG: Matched " << found.pos - oldLocation.pos << " chars: " << value << "|" << state_type << std::endl; else - throw std::runtime_error("Tokenize error at "s + oldLocation.toString()); + throw std::runtime_error("Bad Token at "s + oldLocation.toString()); location = found; // reset to end of match diff --git a/lexer.h b/lexer.h index eb41e3d..a033cbc 100644 --- a/lexer.h +++ b/lexer.h @@ -27,6 +27,8 @@ class Lexer size_t newState(std::string state_type = ""); void addTransition(size_t state0, size_t state1, char c); + void removeTransition(size_t state0, size_t state1, char c); + std::vector> getSuccessorsViaEmpty(size_t state); std::vector> getSuccessors(size_t state); // Build up automaton, recursively @@ -37,6 +39,9 @@ class Lexer Token getToken(const std::string& s, Location& location); void skipWhitespace(const std::string& s, Location& location); + void replaceEmptyTransitions(); + void removeEmptyTransitions(); + public: Lexer(const BNF& bnf, const std::string& top); std::vector Lex(const std::string& s); -- cgit v1.2.3