1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
|
#pragma once
#include "minicc.h"
#include "bnf.h"
#include <string>
#include <unordered_map>
#include <vector>
namespace Lex {
class Lexer
{
// constructor input
const BNF& m_bnf;
const std::string& m_top;
// Graph
size_t states{}; // start, ...
std::unordered_map<size_t, std::vector<std::pair<size_t, char>>> transitions; // transitions: state -> {state,character}, ...; empty transition is marked by \0
std::unordered_map<size_t, std::string> m_state_types; // only necessary for 2nd level symbol names
size_t m_startState;
// Graph manipulation
size_t newState();
bool isEndState(size_t state);
void addTransition(size_t state0, size_t state1, char c);
void removeTransition(size_t state0, size_t state1, char c);
std::vector<std::pair<size_t, char>> getSuccessorsViaEmpty(size_t state);
std::vector<std::pair<size_t, char>> getSuccessors(size_t state);
// Build up automaton, recursively
void addPath(size_t state0, size_t state1, std::string s);
void addPathOrTransition(size_t state0, size_t state1, std::string symbol);
void addRule(const std::vector<std::string>& list, size_t list_index_from, size_t list_index_to, size_t state0, size_t state1, const std::string& rule_symbol);
Token getToken(const std::string& s, Location& location);
void skipWhitespace(const std::string& s, Location& location);
void replaceEmptyTransitions();
void removeEmptyTransitions();
public:
Lexer(const BNF& bnf, const std::string& top);
std::vector<Token> Lex(const std::string& s);
};
} // namespace Lex
|