summaryrefslogtreecommitdiffhomepage
path: root/lexer.h
blob: aa724bb14eca42966cdb28b2970f9586ed84bb56 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
#pragma once

#include "minicc.h"
#include "bnf.h"

#include <string>
#include <unordered_map>
#include <vector>

namespace Lex {

class Lexer
{
 // constructor input
 const BNF& m_bnf;
 const std::string& m_top;

 // Graph
 
 size_t states{}; // start, ...
 std::unordered_map<size_t, std::vector<std::pair<size_t, char>>> transitions; // transitions: state -> {state,character}, ...; empty transition is marked by \0
 std::unordered_map<size_t, std::string> m_state_types; // only necessary for 2nd level symbol names
 size_t m_startState;

 // Graph manipulation
 
 size_t newState();
 bool isEndState(size_t state);
 void addTransition(size_t state0, size_t state1, char c);
 void removeTransition(size_t state0, size_t state1, char c);
 std::vector<std::pair<size_t, char>> getSuccessorsViaEmpty(size_t state);
 std::vector<std::pair<size_t, char>> getSuccessors(size_t state);

 // Build up automaton, recursively
 void addPath(size_t state0, size_t state1, std::string s);
 void addPathOrTransition(size_t state0, size_t state1, std::string symbol);
 void addRule(const std::vector<std::string>& list, size_t list_index_from, size_t list_index_to, size_t state0, size_t state1, const std::string& rule_symbol);

 Token getToken(const std::string& s, Location& location);
 void skipWhitespace(const std::string& s, Location& location);

 void replaceEmptyTransitions();
 void removeEmptyTransitions();

public:
 Lexer(const BNF& bnf, const std::string& top);
 std::vector<Token> Lex(const std::string& s);

};

} // namespace Lex