#pragma once #include #include #include #include #include using namespace std::string_literals; using code_sequence = std::vector; template code_sequence to_code_sequence(from_t v); template<> code_sequence to_code_sequence(uint16_t v) { return code_sequence{static_cast(v & 0xFF), static_cast(v >> 8)}; } template<> code_sequence to_code_sequence(uint32_t v) { return code_sequence{static_cast(v & 0xFF), static_cast(v >> 8), static_cast(v >> 16),static_cast(v >> 24) }; } uint32_t high_bits(uint8_t number) { return ~((static_cast(1) << (32 - number)) - 1); } // Identify operator with leading bits class Pattern { public: Pattern(uint32_t bits, uint32_t mask, bool is_alternative_syntax = false): _bits{bits}, _mask{mask} {} template T encode() { return static_cast(_bits); } private: uint32_t _bits; uint32_t _mask; }; class Operand { protected: Operand(uint8_t pos): _pos(pos){} private: uint8_t _pos; }; class Register: public Operand { public: Register(uint8_t size, uint8_t pos): Operand{pos} {} Register(uint32_t mask): Operand{0} { throw std::runtime_error("Unimplemented"); } Register(const std::string& name): Operand{0} { throw std::runtime_error("Unimplemented"); } }; class SameRegister: public Operand { public: SameRegister(uint8_t index): Operand{index} {}; }; class Immediate: public Operand { public: Immediate(uint8_t size, uint8_t pos): Operand{pos}, _bits{size} {} private: uint8_t _bits; }; using Operands = std::vector>; // Pattern [, Operand, ...] class Instruction { public: Instruction(const std::string& mnemonic, uint8_t size, Pattern pattern, Operands operands): _mnemonic(mnemonic), _size(size), _pattern(pattern), _operands(operands) {} code_sequence encode(const std::vector& arguments) { if (_size == 2) { // 16 bit thumb insn uint16_t result{ _pattern.encode()}; return to_code_sequence(result); } else if (_size == 4) { // 32 bit thumb insn uint32_t result{ _pattern.encode()}; return to_code_sequence(result); } else { throw std::runtime_error("Unsupported instruction size "s + std::to_string(_size)); } } private: std::string _mnemonic; uint8_t _size; Pattern _pattern; Operands _operands; }; namespace { // factory functions std::shared_ptr imm(uint8_t size, uint8_t pos, uint8_t factor = 1, uint32_t code0_is_value = 0, const std::vector& invalid_values = {}){ return std::make_shared(size, pos); } std::shared_ptr label(uint8_t size, uint8_t pos, uint8_t factor = 1, bool is_signed = false){ return std::make_shared(size, pos); } std::shared_ptr label(uint8_t size, const std::vector& bits, uint32_t flip_mask, uint8_t factor = 1, bool is_signed = false){ throw std::runtime_error("Unimplemented: label w/ flipmask"); } // reg_list_ref_index, see LDM std::shared_ptr reg(uint8_t size, uint8_t pos, std::optional reg_list_ref_index = std::nullopt) { return std::make_shared(size, pos); } std::shared_ptr reg(uint32_t mask) { return std::make_shared(mask); } std::shared_ptr reg(const std::string& name) { return std::make_shared(name); } std::shared_ptr reg_list(uint8_t size, uint8_t pos) { throw std::runtime_error("Unimplemented: reg_list"); } std::shared_ptr id(const std::string& name) { throw std::runtime_error("Unimplemented: id"); } std::shared_ptr same_reg(uint8_t index) { return std::make_shared(index); } std::shared_ptr cond(uint8_t pos) { throw std::runtime_error("Unimplemented: cond"); } std::shared_ptr optional(std::shared_ptr op) { return op; } // TODO: consistency checks: // * all bits in an instruction defined // * unambiguous patterns // - except: alternative syntax, which needs to be same as other pattern std::vector insns{ {"adcs", 2, Pattern(0x4140, high_bits(10)), Operands{optional(same_reg(1)), reg(3, 0), reg(3, 3)}}, // T1 {"adds", 2, Pattern(0x1C00, high_bits(8)), Operands{reg(3, 0), reg(3, 3), imm(3, 6)}}, // T1 {"adds", 2, Pattern(0x3000, high_bits(5)), Operands{optional(same_reg(1)), reg(3, 8), imm(8, 0)}}, // T2 {"adds", 2, Pattern(0x1800, high_bits(7)), Operands{reg(3, 0), reg(3, 3), reg(6, 6)}}, // T1 {"add", 2, Pattern(0x4400, high_bits(8)), Operands{reg(0b10000111), reg(4, 3)}}, // T2 {"add", 2, Pattern(0xA800, high_bits(5)), Operands{reg(3, 8), imm(8, 0, 4)}}, // T1 {"add", 2, Pattern(0xB000, high_bits(9)), Operands{optional(reg("sp")), reg("sp"), imm(7, 0, 4)}}, // T2 {"add", 2, Pattern(0x4468, 0xFF78), Operands{same_reg(2), reg("sp"), reg(0b10000111)}}, // T1 {"add", 2, Pattern(0x4485, 0xFF87), Operands{reg("sp"), reg(4, 3)}}, // T2 {"adr", 2, Pattern(0xA000, high_bits(5)), Operands{reg(3, 8), label(8, 0, 4)}}, // T1 {"add", 2, Pattern(0xA000, high_bits(5), true), Operands{reg(3, 8), imm(8, 0, 4)}}, // T1, alternative syntax {"ands", 2, Pattern(0x4000, high_bits(10)), Operands{optional(same_reg(1)), reg(3, 0), reg(3, 3)}}, // T1 {"asrs", 2, Pattern(0x1000, high_bits(5)), Operands{reg(3, 0), reg(3, 3), imm(5, 6, 1, 32)}}, // T1 {"asrs", 2, Pattern(0x4100, high_bits(10)), Operands{same_reg(1), reg(3, 0), reg(3, 3)}}, // T1 {"b", 2, Pattern(0xD000, high_bits(4)), Operands{cond(8), label(8, 0, 2, true)}}, // T1 {"b", 2, Pattern(0xE000, high_bits(5)), Operands{label(11, 0, 2, true)}}, // T2 {"bal", 2, Pattern(0xE000, high_bits(5), true), Operands{label(11, 0, 2, true)}}, // T2 {"bics", 2, Pattern(0x4380, high_bits(10)), Operands{optional(same_reg(1)), reg(3, 0), reg(3, 3)}}, // T1 {"bkpt", 2, Pattern(0xBE00, high_bits(8)), Operands{imm(8, 0)}}, // T1 {"bl", 4, Pattern(0xF000D000, 0xF800D000), Operands{label(24, {11, 0, 10, 16, 1, 11, 1, 13, 1, 26}, 0x00002800, 2, true)}}, // T1 {"blx", 2, Pattern(0x4780, 0xFF87), Operands{reg(4, 3)}}, // T1 {"bx", 2, Pattern(0x4700, 0xFF87), Operands{reg(4, 3)}}, // T1 {"cmn", 2, Pattern(0x42C0, high_bits(10)), Operands{reg(3, 0), reg(3, 3)}}, // T1 {"cmp", 2, Pattern(0x2800, high_bits(5)), Operands{reg(3, 8), imm(8, 0)}}, // T1 {"cmp", 2, Pattern(0x4280, high_bits(10)), Operands{reg(3, 0), reg(3, 3)}}, // T1 {"cmp", 2, Pattern(0x4500, high_bits(8)), Operands{reg(0x87), reg(4, 3)}}, // T2 {"cps", 2, Pattern(0xB662, 0xFFEF), Operands{imm(1, 4)}}, // T1 {"cpy", 2, Pattern(0x4600, high_bits(8), true), Operands{reg(0x87), reg(4, 3)}}, // T1 {"dmb", 4, Pattern(0xF3BF8F5F, high_bits(32)), Operands{optional(id("sy"))}}, // T1 {"dsb", 4, Pattern(0xF3BF8F4F, high_bits(32)), Operands{optional(id("sy"))}}, // T1 {"eors", 2, Pattern(0x4040, high_bits(10)), Operands{optional(same_reg(1)), reg(3, 0), reg(3, 3)}}, // T1 {"isb", 4, Pattern(0xF3BF8F6F, high_bits(32)), Operands{optional(id("sy"))}}, // T1 {"ldm", 2, Pattern(0xC800, high_bits(5)), Operands{reg(3, 8, 1), reg_list(8, 0)}}, // T1 {"ldm", 2, Pattern(0xC800, high_bits(5)), Operands{reg(3, 8, 1), reg_list(8, 0)}}, // T1 {"ldmia", 2, Pattern(0xC800, high_bits(5), true), Operands{reg(3, 8, 1), reg_list(8, 0)}}, // T1 {"ldmfd", 2, Pattern(0xC800, high_bits(5), true), Operands{reg(3, 8, 1), reg_list(8, 0)}}, // T1 {"lsls", 2, Pattern(0x0000, high_bits(5)), Operands{reg(3, 0), reg(3, 3), imm(5, 6, 1, 0, {0})}}, {"mov", 2, Pattern(0x4600, high_bits(8)), Operands{reg(0x87), reg(4, 3)}}, // T1 {"movs", 2, Pattern(0x0000, high_bits(10)), Operands{reg(3, 0), reg(3, 3)}}, // T2 }; };