summaryrefslogtreecommitdiffhomepage
path: root/asm/arm/instruction.h
blob: daadcce084e78b203d897d24a6b2274c2f120087 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
#pragma once

#include <memory>
#include <optional>
#include <stdexcept>
#include <string>
#include <vector>

using namespace std::string_literals;

using code_sequence = std::vector<uint8_t>;

template<typename from_t>
code_sequence to_code_sequence(from_t v);

template<>
code_sequence to_code_sequence<uint16_t>(uint16_t v) {
 return code_sequence{static_cast<uint8_t>(v & 0xFF), static_cast<uint8_t>(v >> 8)};
}

template<>
code_sequence to_code_sequence<uint32_t>(uint32_t v) {
 return code_sequence{static_cast<uint8_t>(v & 0xFF), static_cast<uint8_t>(v >> 8), static_cast<uint8_t>(v >> 16),static_cast<uint8_t>(v >> 24) };
}

uint32_t high_bits(uint8_t number)
{
 return ~((static_cast<uint32_t>(1) << (32 - number)) - 1);
}

// Identify operator with leading bits
class Pattern
{
public:
 Pattern(uint32_t bits, uint32_t mask): _bits{bits}, _mask{mask} {}

 template<typename T>
 T encode()
 {
  return static_cast<T>(_bits);
 }

 Pattern& alternative_syntax() {
  throw std::runtime_error("alternative_syntax: Unimplemented");
  return *this;
 }


private:
 uint32_t _bits;
 uint32_t _mask;
};

class Operand
{
protected:
 Operand(uint8_t pos): _pos(pos){}
private:
 uint8_t _pos;
};

class Register: public Operand
{
public:
 Register(uint8_t size, uint8_t pos): Operand{pos} {}
 Register(uint32_t mask): Operand{0} { throw std::runtime_error("Unimplemented"); }
 Register(const std::string& name): Operand{0} { throw std::runtime_error("Unimplemented"); }
};

class SameRegister: public Operand
{
public:
 SameRegister(uint8_t index): Operand{index} {};
};

class Immediate: public Operand
{
public:
 Immediate(uint8_t size, uint8_t pos): Operand{pos}, _bits{size} {}
private:
 uint8_t _bits;
};

using Operands = std::vector<std::shared_ptr<Operand>>;

// Pattern [, Operand, ...]
class Instruction
{
public:
 Instruction(const std::string& mnemonic, uint8_t size, Pattern pattern, Operands operands): _mnemonic(mnemonic), _size(size), _pattern(pattern), _operands(operands) {}
 code_sequence encode(const std::vector<std::string>& arguments)
 {
  if (_size == 2) { // 16 bit thumb insn
   uint16_t result{ _pattern.encode<uint16_t>()};
   return to_code_sequence(result);
  } else if (_size == 4) { // 32 bit thumb insn
   uint32_t result{ _pattern.encode<uint32_t>()};
   return to_code_sequence(result);
  } else {
   throw std::runtime_error("Unsupported instruction size "s + std::to_string(_size));
  }
 }

private:
 std::string _mnemonic;
 uint8_t _size;
 Pattern _pattern;
 Operands _operands;
};

namespace {
 // factory functions
 std::shared_ptr<Operand> imm(uint8_t size, uint8_t pos, uint8_t factor = 1, uint32_t code0_is_value = 0, const std::vector<uint32_t>& invalid_values = {}){ return std::make_shared<Immediate>(size, pos); }
 std::shared_ptr<Operand> label(uint8_t size, uint8_t pos, uint8_t factor = 1, bool is_signed = false){ return std::make_shared<Immediate>(size, pos); }
 std::shared_ptr<Operand> label(uint8_t size, const std::vector<uint8_t>& bits, uint32_t flip_mask, uint8_t factor = 1, bool is_signed = false){ throw std::runtime_error("Unimplemented: label w/ flipmask"); }
 // reg_list_ref_index, see LDM
 std::shared_ptr<Operand> reg(uint8_t size, uint8_t pos, std::optional<uint8_t> reg_list_ref_index = std::nullopt) { return std::make_shared<Register>(size, pos); }
 std::shared_ptr<Operand> reg(uint32_t mask) { return std::make_shared<Register>(mask); }
 std::shared_ptr<Operand> reg(const std::string& name) { return std::make_shared<Register>(name); }
 std::shared_ptr<Operand> reg_list(uint8_t size, uint8_t pos) { throw std::runtime_error("Unimplemented: reg_list"); }
 std::shared_ptr<Operand> id(const std::string& name) { throw std::runtime_error("Unimplemented: id"); }
 std::shared_ptr<Operand> same_reg(uint8_t index) { return std::make_shared<SameRegister>(index); }
 std::shared_ptr<Operand> cond(uint8_t pos) { throw std::runtime_error("Unimplemented: cond"); }

 std::shared_ptr<Operand> suffixed(std::shared_ptr<Operand> op) { throw std::runtime_error("Unimplemented: suffixed"); }
 template<typename T, typename... Targs>
 std::shared_ptr<Operand> suffixed(std::shared_ptr<Operand> op, T suffix, Targs... suffixes) {
  // TODO: add suffix to op
  return suffixed(op, suffixes...);
 }

 std::shared_ptr<Operand> bracketed(Operands operands) { throw std::runtime_error("Unimplemented: bracketed"); };
 std::shared_ptr<Operand> optional(std::shared_ptr<Operand> op) { return op; }

 // TODO: consistency checks:
 // * all bits in an instruction defined
 // * unambiguous patterns
 //   - except: alternative syntax, which needs to be same as other pattern
 std::vector<Instruction> insns{
  {"adcs", 2, Pattern(0x4140, high_bits(10)), Operands{optional(same_reg(1)), reg(3, 0), reg(3, 3)}}, // T1
  {"adds", 2, Pattern(0x1C00, high_bits(8)), Operands{reg(3, 0), reg(3, 3), imm(3, 6)}}, // T1
  {"adds", 2, Pattern(0x3000, high_bits(5)), Operands{optional(same_reg(1)), reg(3, 8), imm(8, 0)}}, // T2
  {"adds", 2, Pattern(0x1800, high_bits(7)), Operands{reg(3, 0), reg(3, 3), reg(6, 6)}}, // T1
  {"add", 2, Pattern(0x4400, high_bits(8)), Operands{reg(0b10000111), reg(4, 3)}}, // T2
  {"add", 2, Pattern(0xA800, high_bits(5)), Operands{reg(3, 8), imm(8, 0, 4)}}, // T1
  {"add", 2, Pattern(0xB000, high_bits(9)), Operands{optional(reg("sp")), reg("sp"), imm(7, 0, 4)}}, // T2
  {"add", 2, Pattern(0x4468, 0xFF78), Operands{same_reg(2), reg("sp"), reg(0b10000111)}}, // T1
  {"add", 2, Pattern(0x4485, 0xFF87), Operands{reg("sp"), reg(4, 3)}}, // T2
  {"adr", 2, Pattern(0xA000, high_bits(5)), Operands{reg(3, 8), label(8, 0, 4)}}, // T1
  {"add", 2, Pattern(0xA000, high_bits(5)).alternative_syntax(), Operands{reg(3, 8), imm(8, 0, 4)}}, // T1, alternative syntax
  {"ands", 2, Pattern(0x4000, high_bits(10)), Operands{optional(same_reg(1)), reg(3, 0), reg(3, 3)}}, // T1
  {"asrs", 2, Pattern(0x1000, high_bits(5)), Operands{reg(3, 0), reg(3, 3), imm(5, 6, 1, 32)}}, // T1
  {"asrs", 2, Pattern(0x4100, high_bits(10)), Operands{same_reg(1), reg(3, 0), reg(3, 3)}}, // T1
  {"b<c>", 2, Pattern(0xD000, high_bits(4)), Operands{cond(8), label(8, 0, 2, true)}}, // T1
  {"b", 2, Pattern(0xE000, high_bits(5)), Operands{label(11, 0, 2, true)}}, // T2
  {"bal", 2, Pattern(0xE000, high_bits(5)).alternative_syntax(), Operands{label(11, 0, 2, true)}}, // T2
  {"bics", 2, Pattern(0x4380, high_bits(10)), Operands{optional(same_reg(1)), reg(3, 0), reg(3, 3)}}, // T1
  {"bkpt", 2, Pattern(0xBE00, high_bits(8)), Operands{imm(8, 0)}}, // T1
  {"bl", 4, Pattern(0xF000D000, 0xF800D000), Operands{label(24, {11, 0, 10, 16, 1, 11, 1, 13, 1, 26}, 0x00002800, 2, true)}}, // T1
  {"blx", 2, Pattern(0x4780, 0xFF87), Operands{reg(4, 3)}}, // T1
  {"bx", 2, Pattern(0x4700, 0xFF87), Operands{reg(4, 3)}}, // T1
  {"cmn", 2, Pattern(0x42C0, high_bits(10)), Operands{reg(3, 0), reg(3, 3)}}, // T1
  {"cmp", 2, Pattern(0x2800, high_bits(5)), Operands{reg(3, 8), imm(8, 0)}}, // T1
  {"cmp", 2, Pattern(0x4280, high_bits(10)), Operands{reg(3, 0), reg(3, 3)}}, // T1
  {"cmp", 2, Pattern(0x4500, high_bits(8)), Operands{reg(0x87), reg(4, 3)}}, // T2
  {"cps", 2, Pattern(0xB662, 0xFFEF), Operands{imm(1, 4)}}, // T1
  {"cpy", 2, Pattern(0x4600, high_bits(8)).alternative_syntax(), Operands{reg(0x87), reg(4, 3)}}, // T1
  {"dmb", 4, Pattern(0xF3BF8F5F, high_bits(32)), Operands{optional(id("sy"))}}, // T1
  {"dsb", 4, Pattern(0xF3BF8F4F, high_bits(32)), Operands{optional(id("sy"))}}, // T1
  {"eors", 2, Pattern(0x4040, high_bits(10)), Operands{optional(same_reg(1)), reg(3, 0), reg(3, 3)}}, // T1
  {"isb", 4, Pattern(0xF3BF8F6F, high_bits(32)), Operands{optional(id("sy"))}}, // T1
  {"ldm", 2, Pattern(0xC800, high_bits(5)), Operands{suffixed(reg(3, 8, 1), "!", ""), reg_list(8, 0)}}, // T1
  {"ldmia", 2, Pattern(0xC800, high_bits(5)).alternative_syntax(), Operands{suffixed(reg(3, 8, 1), "!", ""), reg_list(8, 0)}}, // T1
  {"ldmfd", 2, Pattern(0xC800, high_bits(5)).alternative_syntax(), Operands{suffixed(reg(3, 8, 1), "!", ""), reg_list(8, 0)}}, // T1
  {"ldr", 2, Pattern(0x6800, high_bits(5)), Operands{reg(3, 0), bracketed(Operands{reg(3, 3), optional(imm(5, 6, 4))})}}, // T1
  {"ldr", 2, Pattern(0x9800, high_bits(5)), Operands{reg(3, 8), bracketed(Operands{reg("sp"), optional(imm(8, 0, 4))})}}, // T2

  {"lsls", 2, Pattern(0x0000, high_bits(5)), Operands{reg(3, 0), reg(3, 3), imm(5, 6, 1, 0, {0})}},
  {"mov", 2, Pattern(0x4600, high_bits(8)), Operands{reg(0x87), reg(4, 3)}}, // T1
  {"movs", 2, Pattern(0x0000, high_bits(10)), Operands{reg(3, 0), reg(3, 3)}}, // T2
 };
};