#pragma once #include #include #include #include #include using namespace std::string_literals; using code_sequence = std::vector; template code_sequence to_code_sequence(from_t v); template<> code_sequence to_code_sequence(uint16_t v) { return code_sequence{static_cast(v & 0xFF), static_cast(v >> 8)}; } template<> code_sequence to_code_sequence(uint32_t v) { return code_sequence{static_cast(v & 0xFF), static_cast(v >> 8), static_cast(v >> 16),static_cast(v >> 24) }; } uint32_t high_bits(uint8_t number) { return ~((static_cast(1) << (32 - number)) - 1); } // Identify operator with leading bits class Pattern { public: Pattern(uint32_t bits, uint32_t mask): _bits{bits}, _mask{mask} {} template T encode() { return static_cast(_bits); } Pattern& alternative_syntax() { throw std::runtime_error("alternative_syntax: Unimplemented"); return *this; } private: uint32_t _bits; uint32_t _mask; }; class Operand { protected: Operand(uint8_t pos): _pos(pos){} private: uint8_t _pos; }; class Register: public Operand { public: Register(uint8_t size, uint8_t pos): Operand{pos} {} Register(uint32_t mask): Operand{0} { throw std::runtime_error("Unimplemented"); } Register(const std::string& name): Operand{0} { throw std::runtime_error("Unimplemented"); } }; class SameRegister: public Operand { public: SameRegister(uint8_t index): Operand{index} {}; }; class Immediate: public Operand { public: Immediate(uint8_t size, uint8_t pos): Operand{pos}, _bits{size} {} private: uint8_t _bits; }; using Operands = std::vector>; // Pattern [, Operand, ...] class Instruction { public: Instruction(const std::string& mnemonic, uint8_t size, Pattern pattern, Operands operands): _mnemonic(mnemonic), _size(size), _pattern(pattern), _operands(operands) {} code_sequence encode(const std::vector& arguments) { if (_size == 2) { // 16 bit thumb insn uint16_t result{ _pattern.encode()}; return to_code_sequence(result); } else if (_size == 4) { // 32 bit thumb insn uint32_t result{ _pattern.encode()}; return to_code_sequence(result); } else { throw std::runtime_error("Unsupported instruction size "s + std::to_string(_size)); } } private: std::string _mnemonic; uint8_t _size; Pattern _pattern; Operands _operands; }; namespace { // factory functions std::shared_ptr imm(uint8_t size, uint8_t pos, uint8_t factor = 1, uint32_t code0_is_value = 0, const std::vector& invalid_values = {}){ return std::make_shared(size, pos); } std::shared_ptr label(uint8_t size, uint8_t pos, uint8_t factor = 1, bool is_signed = false){ return std::make_shared(size, pos); } std::shared_ptr label(uint8_t size, const std::vector& bits, uint32_t flip_mask, uint8_t factor = 1, bool is_signed = false){ throw std::runtime_error("Unimplemented: label w/ flipmask"); } // reg size 3: arm register 0-7 // reg size 4: arm register 0-15 // reg size 8: arm special registers for insns msr, mrs // reg_list_ref_index, see LDM std::shared_ptr reg(uint8_t size, uint8_t pos, std::optional reg_list_ref_index = std::nullopt) { return std::make_shared(size, pos); } std::shared_ptr reg(uint32_t mask) { return std::make_shared(mask); } std::shared_ptr reg(const std::string& name) { return std::make_shared(name); } std::shared_ptr reg_list(uint8_t size, uint8_t pos) { throw std::runtime_error("Unimplemented: reg_list"); } std::shared_ptr id(const std::string& name) { throw std::runtime_error("Unimplemented: id"); } std::shared_ptr same_reg(uint8_t index) { return std::make_shared(index); } std::shared_ptr cond(uint8_t pos) { throw std::runtime_error("Unimplemented: cond"); } std::shared_ptr suffixed(std::shared_ptr op) { throw std::runtime_error("Unimplemented: suffixed"); } // suffixes: list of alternative suffixes as strings, including "" template std::shared_ptr suffixed(std::shared_ptr op, T suffix, Targs... suffixes) { // TODO: add suffix to op return suffixed(op, suffixes...); } std::shared_ptr prefixed(std::shared_ptr op) { throw std::runtime_error("Unimplemented: prefixed"); } // prefixes: list of alternative refixes as strings, including "" template std::shared_ptr prefixed(std::shared_ptr op, T prefix, Targs... prefixes) { // TODO: add prefix to op return prefixed(op, prefixes...); } std::shared_ptr bracketed(Operands operands) { throw std::runtime_error("Unimplemented: bracketed"); }; std::shared_ptr optional(std::shared_ptr op) { return op; } // TODO: consistency checks: // * all bits in an instruction defined // * unambiguous patterns // - except: alternative syntax, which needs to be same as exactly 1 other pattern std::vector insns{ {"adcs", 2, Pattern(0x4140, high_bits(10)), Operands{optional(same_reg(1)), reg(3, 0), reg(3, 3)}}, // T1 {"adds", 2, Pattern(0x1C00, high_bits(8)), Operands{reg(3, 0), reg(3, 3), imm(3, 6)}}, // T1 {"adds", 2, Pattern(0x3000, high_bits(5)), Operands{optional(same_reg(1)), reg(3, 8), imm(8, 0)}}, // T2 {"adds", 2, Pattern(0x1800, high_bits(7)), Operands{reg(3, 0), reg(3, 3), reg(6, 6)}}, // T1 {"add", 2, Pattern(0x4400, high_bits(8)), Operands{reg(0b10000111), reg(4, 3)}}, // T2 {"add", 2, Pattern(0xA800, high_bits(5)), Operands{reg(3, 8), imm(8, 0, 4)}}, // T1 {"add", 2, Pattern(0xB000, high_bits(9)), Operands{optional(reg("sp")), reg("sp"), imm(7, 0, 4)}}, // T2 {"add", 2, Pattern(0x4468, 0xFF78), Operands{same_reg(2), reg("sp"), reg(0b10000111)}}, // T1 {"add", 2, Pattern(0x4485, 0xFF87), Operands{reg("sp"), reg(4, 3)}}, // T2 {"adr", 2, Pattern(0xA000, high_bits(5)), Operands{reg(3, 8), label(8, 0, 4)}}, // T1 {"add", 2, Pattern(0xA000, high_bits(5)).alternative_syntax(), Operands{reg(3, 8), imm(8, 0, 4)}}, // T1, alternative syntax {"ands", 2, Pattern(0x4000, high_bits(10)), Operands{optional(same_reg(1)), reg(3, 0), reg(3, 3)}}, // T1 {"asrs", 2, Pattern(0x1000, high_bits(5)), Operands{reg(3, 0), reg(3, 3), imm(5, 6, 1, 32)}}, // T1 {"asrs", 2, Pattern(0x4100, high_bits(10)), Operands{same_reg(1), reg(3, 0), reg(3, 3)}}, // T1 {"b", 2, Pattern(0xD000, high_bits(4)), Operands{cond(8), label(8, 0, 2, true)}}, // T1 {"b", 2, Pattern(0xE000, high_bits(5)), Operands{label(11, 0, 2, true)}}, // T2 {"bal", 2, Pattern(0xE000, high_bits(5)).alternative_syntax(), Operands{label(11, 0, 2, true)}}, // T2 {"bics", 2, Pattern(0x4380, high_bits(10)), Operands{optional(same_reg(1)), reg(3, 0), reg(3, 3)}}, // T1 {"bkpt", 2, Pattern(0xBE00, high_bits(8)), Operands{imm(8, 0)}}, // T1 {"bl", 4, Pattern(0xF000D000, 0xF800D000), Operands{label(24, {11, 0, 10, 16, 1, 11, 1, 13, 1, 26}, 0x00002800, 2, true)}}, // T1 {"blx", 2, Pattern(0x4780, 0xFF87), Operands{reg(4, 3)}}, // T1 {"bx", 2, Pattern(0x4700, 0xFF87), Operands{reg(4, 3)}}, // T1 {"cmn", 2, Pattern(0x42C0, high_bits(10)), Operands{reg(3, 0), reg(3, 3)}}, // T1 {"cmp", 2, Pattern(0x2800, high_bits(5)), Operands{reg(3, 8), imm(8, 0)}}, // T1 {"cmp", 2, Pattern(0x4280, high_bits(10)), Operands{reg(3, 0), reg(3, 3)}}, // T1 {"cmp", 2, Pattern(0x4500, high_bits(8)), Operands{reg(0x87), reg(4, 3)}}, // T2 {"cps", 2, Pattern(0xB662, 0xFFEF), Operands{imm(1, 4)}}, // T1 {"cpy", 2, Pattern(0x4600, high_bits(8)).alternative_syntax(), Operands{reg(0x87), reg(4, 3)}}, // T1 {"dmb", 4, Pattern(0xF3BF8F5F, high_bits(32)), Operands{optional(id("sy"))}}, // T1 {"dsb", 4, Pattern(0xF3BF8F4F, high_bits(32)), Operands{optional(id("sy"))}}, // T1 {"eors", 2, Pattern(0x4040, high_bits(10)), Operands{optional(same_reg(1)), reg(3, 0), reg(3, 3)}}, // T1 {"isb", 4, Pattern(0xF3BF8F6F, high_bits(32)), Operands{optional(id("sy"))}}, // T1 {"ldm", 2, Pattern(0xC800, high_bits(5)), Operands{suffixed(reg(3, 8, 1), "!", ""), reg_list(8, 0)}}, // T1 {"ldmia", 2, Pattern(0xC800, high_bits(5)).alternative_syntax(), Operands{suffixed(reg(3, 8, 1), "!", ""), reg_list(8, 0)}}, // T1 {"ldmfd", 2, Pattern(0xC800, high_bits(5)).alternative_syntax(), Operands{suffixed(reg(3, 8, 1), "!", ""), reg_list(8, 0)}}, // T1 {"ldr", 2, Pattern(0x6800, high_bits(5)), Operands{reg(3, 0), bracketed(Operands{reg(3, 3), optional(imm(5, 6, 4))})}}, // T1 {"ldr", 2, Pattern(0x9800, high_bits(5)), Operands{reg(3, 8), bracketed(Operands{reg("sp"), optional(imm(8, 0, 4))})}}, // T2 {"ldr", 2, Pattern(0x4800, high_bits(5)), Operands{reg(3, 8), label(8, 0, 4)}}, // T1 {"ldr", 2, Pattern(0x4800, high_bits(5)).alternative_syntax(), Operands{reg(3, 8), bracketed(Operands{reg("pc"), imm(8, 0, 4)})}}, // T1 {"ldr", 2, Pattern(0x5800, high_bits(7)), Operands{reg(3, 0), bracketed(Operands{reg(3, 3), reg(3,6), optional(id("lsl #0"))})}}, // T1 {"ldrb", 2, Pattern(0x7800, high_bits(5)), Operands{reg(3, 0), bracketed(Operands{reg(3, 3), optional(imm(5, 6, 4))})}}, // T1 {"ldrb", 2, Pattern(0x5C00, high_bits(7)), Operands{reg(3, 0), bracketed(Operands{reg(3, 3), reg(3,6), optional(id("lsl #0"))})}}, // T1 {"ldrh", 2, Pattern(0x8800, high_bits(5)), Operands{reg(3, 0), bracketed(Operands{reg(3, 3), optional(imm(5, 6, 4))})}}, // T1 {"ldrh", 2, Pattern(0x5A00, high_bits(7)), Operands{reg(3, 0), bracketed(Operands{reg(3, 3), reg(3,6), optional(id("lsl #0"))})}}, // T1 {"ldrsb", 2, Pattern(0x5600, high_bits(7)), Operands{reg(3, 0), bracketed(Operands{reg(3, 3), reg(3,6), optional(id("lsl #0"))})}}, // T1 {"ldrsh", 2, Pattern(0x5E00, high_bits(7)), Operands{reg(3, 0), bracketed(Operands{reg(3, 3), reg(3,6), optional(id("lsl #0"))})}}, // T1 {"lsls", 2, Pattern(0x0000, high_bits(5)), Operands{reg(3, 0), reg(3, 3), imm(5, 6, 1, 0, {0})}}, // T1 {"lsls", 2, Pattern(0x4080, high_bits(10)), Operands{reg(3, 0), same_reg(0), reg(3, 3)}}, // T1 {"lsrs", 2, Pattern(0x0800, high_bits(5)), Operands{reg(3, 0), reg(3, 3), imm(5, 6, 1, 0, {0})}}, // T1 {"lsrs", 2, Pattern(0x40C0, high_bits(10)), Operands{reg(3, 0), same_reg(0), reg(3, 3)}}, // T1 {"movs", 2, Pattern(0x2000, high_bits(5)), Operands{reg(3, 0), imm(8, 0)}}, // T1 {"mov", 2, Pattern(0x4600, high_bits(8)), Operands{reg(0x87), reg(4, 3)}}, // T1 {"movs", 2, Pattern(0x0000, high_bits(10)), Operands{reg(3, 0), reg(3, 3)}}, // T2 {"movs", 2, Pattern(0x1000, high_bits(5)).alternative_syntax(), Operands{reg(3, 0), reg(3, 3), prefixed(imm(5, 6, 1, 32), "asr ")}}, // T1 {"movs", 2, Pattern(0x0000, high_bits(5)).alternative_syntax(), Operands{reg(3, 0), reg(3, 3), prefixed(imm(5, 6, 1, 0, {0}), "lsl ")}}, // T1 {"movs", 2, Pattern(0x0800, high_bits(5)).alternative_syntax(), Operands{reg(3, 0), reg(3, 3), prefixed(imm(5, 6, 1, 0, {0}), "lsr ")}}, // T1 {"movs", 2, Pattern(0x4100, high_bits(10)).alternative_syntax(), Operands{same_reg(1), reg(3, 0), prefixed(reg(3, 3), "asr ")}}, // T1 {"movs", 2, Pattern(0x4080, high_bits(10)).alternative_syntax(), Operands{reg(3, 0), same_reg(0), prefixed(reg(3, 3), "lsl ")}}, // T1 {"movs", 2, Pattern(0x40C0, high_bits(10)).alternative_syntax(), Operands{reg(3, 0), same_reg(0), prefixed(reg(3, 3), "lsr ")}}, // T1 {"movs", 2, Pattern(0x41C0, high_bits(10)).alternative_syntax(), Operands{reg(3, 0), same_reg(0), prefixed(reg(3, 3), "ror ")}}, // T1 {"mrs", 4, Pattern(0xF3EF8000, high_bits(20)), Operands{reg(4, 8), reg(8, 0)}}, // T1 {"msr", 4, Pattern(0xF3808800, 0xFFF0FF00), Operands{reg(4, 16), reg(8, 0)}}, // T1 {"rors", 2, Pattern(0x41C0, high_bits(10)), Operands{reg(3, 0), same_reg(0), reg(3, 3)}}, // T1 }; };