#pragma once #include #include #include #include #include using namespace std::string_literals; using code_sequence = std::vector; template code_sequence to_code_sequence(from_t v); template<> code_sequence to_code_sequence(uint16_t v) { return code_sequence{static_cast(v & 0xFF), static_cast(v >> 8)}; } template<> code_sequence to_code_sequence(uint32_t v) { return code_sequence{static_cast(v & 0xFF), static_cast(v >> 8), static_cast(v >> 16),static_cast(v >> 24) }; } uint32_t high_bits(uint8_t number) { return ~((static_cast(1) << (32 - number)) - 1); } // Identify operator with leading bits class Pattern { public: Pattern(uint32_t bits, uint32_t mask): _bits{bits}, _mask{mask} {} template T encode() { return static_cast(_bits); } Pattern& alternative_syntax() { throw std::runtime_error("alternative_syntax: Unimplemented"); return *this; } private: uint32_t _bits; uint32_t _mask; }; class Operand { protected: Operand(uint8_t pos): _pos(pos){} private: uint8_t _pos; }; class Register: public Operand { public: Register(uint8_t size, uint8_t pos): Operand{pos} {} Register(uint32_t mask): Operand{0} { throw std::runtime_error("Unimplemented"); } Register(const std::string& name): Operand{0} { throw std::runtime_error("Unimplemented"); } }; class SameRegister: public Operand { public: SameRegister(uint8_t index): Operand{index} {}; }; class Immediate: public Operand { public: Immediate(uint8_t size, uint8_t pos): Operand{pos}, _bits{size} {} private: uint8_t _bits; }; using Operands = std::vector>; // Pattern [, Operand, ...] class Instruction { public: Instruction(const std::string& mnemonic, uint8_t size, Pattern pattern, Operands operands): _mnemonic(mnemonic), _size(size), _pattern(pattern), _operands(operands) {} code_sequence encode(const std::vector& arguments) { if (_size == 2) { // 16 bit thumb insn uint16_t result{ _pattern.encode()}; return to_code_sequence(result); } else if (_size == 4) { // 32 bit thumb insn uint32_t result{ _pattern.encode()}; return to_code_sequence(result); } else { throw std::runtime_error("Unsupported instruction size "s + std::to_string(_size)); } } private: std::string _mnemonic; uint8_t _size; Pattern _pattern; Operands _operands; }; namespace { // factory functions std::shared_ptr imm(uint8_t size, uint8_t pos, uint8_t factor = 1, uint32_t code0_is_value = 0, const std::vector& invalid_values = {}){ return std::make_shared(size, pos); } std::shared_ptr label(uint8_t size, uint8_t pos, uint8_t factor = 1, bool is_signed = false){ return std::make_shared(size, pos); } std::shared_ptr label(uint8_t size, const std::vector& bits, uint32_t flip_mask, uint8_t factor = 1, bool is_signed = false){ throw std::runtime_error("Unimplemented: label w/ flipmask"); } // reg size 3: arm register 0-7 // reg size 4: arm register 0-15 // reg size 8: arm special registers for insns msr, mrs // reg_list_ref_index, see LDM std::shared_ptr reg(uint8_t size, uint8_t pos, std::optional reg_list_ref_index = std::nullopt) { return std::make_shared(size, pos); } std::shared_ptr reg(uint32_t mask) { return std::make_shared(mask); } std::shared_ptr reg(const std::string& name) { return std::make_shared(name); } // register list: "{" "," "," ... "}" // size 8 -> arm regs 0-7 // size 9 -> pc + arm regs 0-7 std::vector default_regs{"r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7"}; std::shared_ptr reg_list(uint8_t size, uint8_t pos, const std::vector& regs = default_regs) { throw std::runtime_error("Unimplemented: reg_list"); } // duplicate same operand (reg) to 2 locations in encoding std::shared_ptr duplicate(std::shared_ptr op, uint8_t pos) {throw std::runtime_error("Unimplemented: duplicate");} std::shared_ptr id(const std::string& name) { throw std::runtime_error("Unimplemented: id"); } std::shared_ptr same_reg(uint8_t index) { return std::make_shared(index); } std::shared_ptr cond(uint8_t pos) { throw std::runtime_error("Unimplemented: cond"); } std::shared_ptr suffixed(std::shared_ptr op) { throw std::runtime_error("Unimplemented: suffixed"); } // suffixes: list of alternative suffixes as strings, including "" template std::shared_ptr suffixed(std::shared_ptr op, T suffix, Targs... suffixes) { // TODO: add suffix to op return suffixed(op, suffixes...); } std::shared_ptr prefixed(std::shared_ptr op) { throw std::runtime_error("Unimplemented: prefixed"); } // prefixes: list of alternative refixes as strings, including "" template std::shared_ptr prefixed(std::shared_ptr op, T prefix, Targs... prefixes) { // TODO: add prefix to op return prefixed(op, prefixes...); } std::shared_ptr bracketed(Operands operands) { throw std::runtime_error("Unimplemented: bracketed"); }; std::shared_ptr optional(std::shared_ptr op) { throw std::runtime_error("Unimplemented: optional");return op; } // TODO: consistency checks: // * all bits in an instruction defined // * unambiguous patterns // - except: alternative syntax, which needs to be same as exactly 1 other pattern std::vector insns{ {"adcs", 2, Pattern(0x4140, high_bits(10)), Operands{optional(same_reg(1)), reg(3, 0), reg(3, 3)}}, // T1 {"adds", 2, Pattern(0x1C00, high_bits(7)), Operands{reg(3, 0), reg(3, 3), imm(3, 6)}}, // T1 {"adds", 2, Pattern(0x3000, high_bits(5)), Operands{optional(same_reg(1)), reg(3, 8), imm(8, 0)}}, // T2 {"adds", 2, Pattern(0x1800, high_bits(7)), Operands{reg(3, 0), reg(3, 3), reg(6, 6)}}, // T1 {"add", 2, Pattern(0x4400, high_bits(8)), Operands{reg(0b10000111), reg(4, 3)}}, // T2 {"add", 2, Pattern(0xA800, high_bits(5)), Operands{reg(3, 8), imm(8, 0, 4)}}, // T1 {"add", 2, Pattern(0xB000, high_bits(9)), Operands{optional(reg("sp")), reg("sp"), imm(7, 0, 4)}}, // T2 {"add", 2, Pattern(0x4468, 0xFF78), Operands{same_reg(2), reg("sp"), reg(0b10000111)}}, // T1 {"add", 2, Pattern(0x4485, 0xFF87), Operands{reg("sp"), reg(4, 3)}}, // T2 {"adr", 2, Pattern(0xA000, high_bits(5)), Operands{reg(3, 8), label(8, 0, 4)}}, // T1 {"add", 2, Pattern(0xA000, high_bits(5)).alternative_syntax(), Operands{reg(3, 8), imm(8, 0, 4)}}, // T1, alternative syntax {"ands", 2, Pattern(0x4000, high_bits(10)), Operands{optional(same_reg(1)), reg(3, 0), reg(3, 3)}}, // T1 {"asrs", 2, Pattern(0x1000, high_bits(5)), Operands{reg(3, 0), reg(3, 3), imm(5, 6, 1, 32)}}, // T1 {"asrs", 2, Pattern(0x4100, high_bits(10)), Operands{same_reg(1), reg(3, 0), reg(3, 3)}}, // T1 {"b", 2, Pattern(0xD000, high_bits(4)), Operands{cond(8), label(8, 0, 2, true)}}, // T1 {"b", 2, Pattern(0xE000, high_bits(5)), Operands{label(11, 0, 2, true)}}, // T2 {"bal", 2, Pattern(0xE000, high_bits(5)).alternative_syntax(), Operands{label(11, 0, 2, true)}}, // T2 {"bics", 2, Pattern(0x4380, high_bits(10)), Operands{optional(same_reg(1)), reg(3, 0), reg(3, 3)}}, // T1 {"bkpt", 2, Pattern(0xBE00, high_bits(8)), Operands{imm(8, 0)}}, // T1 {"bl", 4, Pattern(0xF000D000, 0xF800D000), Operands{label(24, {11, 0, 10, 16, 1, 11, 1, 13, 1, 26}, 0x00002800, 2, true)}}, // T1 {"blx", 2, Pattern(0x4780, 0xFF87), Operands{reg(4, 3)}}, // T1 {"bx", 2, Pattern(0x4700, 0xFF87), Operands{reg(4, 3)}}, // T1 {"cmn", 2, Pattern(0x42C0, high_bits(10)), Operands{reg(3, 0), reg(3, 3)}}, // T1 {"cmp", 2, Pattern(0x2800, high_bits(5)), Operands{reg(3, 8), imm(8, 0)}}, // T1 {"cmp", 2, Pattern(0x4280, high_bits(10)), Operands{reg(3, 0), reg(3, 3)}}, // T1 {"cmp", 2, Pattern(0x4500, high_bits(8)), Operands{reg(0x87), reg(4, 3)}}, // T2 {"cps", 2, Pattern(0xB662, 0xFFEF), Operands{imm(1, 4)}}, // T1 {"cpy", 2, Pattern(0x4600, high_bits(8)).alternative_syntax(), Operands{reg(0x87), reg(4, 3)}}, // T1 {"dmb", 4, Pattern(0xF3BF8F5F, high_bits(32)), Operands{optional(id("sy"))}}, // T1 {"dsb", 4, Pattern(0xF3BF8F4F, high_bits(32)), Operands{optional(id("sy"))}}, // T1 {"eors", 2, Pattern(0x4040, high_bits(10)), Operands{optional(same_reg(1)), reg(3, 0), reg(3, 3)}}, // T1 {"isb", 4, Pattern(0xF3BF8F6F, high_bits(32)), Operands{optional(id("sy"))}}, // T1 {"ldm", 2, Pattern(0xC800, high_bits(5)), Operands{suffixed(reg(3, 8, 1), "!", ""), reg_list(8, 0)}}, // T1 {"ldmia", 2, Pattern(0xC800, high_bits(5)).alternative_syntax(), Operands{suffixed(reg(3, 8, 1), "!", ""), reg_list(8, 0)}}, // T1 {"ldmfd", 2, Pattern(0xC800, high_bits(5)).alternative_syntax(), Operands{suffixed(reg(3, 8, 1), "!", ""), reg_list(8, 0)}}, // T1 {"ldr", 2, Pattern(0x6800, high_bits(5)), Operands{reg(3, 0), bracketed(Operands{reg(3, 3), imm(5, 6, 4)})}}, // T1 {"ldr", 2, Pattern(0x6800, high_bits(5) + (0x1F << 6)).alternative_syntax(), Operands{reg(3, 0), bracketed(Operands{reg(3, 3)})}}, // T1 {"ldr", 2, Pattern(0x9800, high_bits(5)), Operands{reg(3, 8), bracketed(Operands{reg("sp"), imm(8, 0, 4)})}}, // T2 {"ldr", 2, Pattern(0x9800, high_bits(5) + 0xFF).alternative_syntax(), Operands{reg(3, 8), bracketed(Operands{reg("sp")})}}, // T2 {"ldr", 2, Pattern(0x4800, high_bits(5)), Operands{reg(3, 8), label(8, 0, 4)}}, // T1 {"ldr", 2, Pattern(0x4800, high_bits(5)).alternative_syntax(), Operands{reg(3, 8), bracketed(Operands{reg("pc"), imm(8, 0, 4)})}}, // T1 {"ldr", 2, Pattern(0x5800, high_bits(7)), Operands{reg(3, 0), bracketed(Operands{reg(3, 3), reg(3,6), optional(id("lsl #0"))})}}, // T1 {"ldrb", 2, Pattern(0x7800, high_bits(5)), Operands{reg(3, 0), bracketed(Operands{reg(3, 3), imm(5, 6)})}}, // T1 {"ldrb", 2, Pattern(0x7800, high_bits(5) + (0x1F << 6)).alternative_syntax(), Operands{reg(3, 0), bracketed(Operands{reg(3, 3)})}}, // T1 {"ldrb", 2, Pattern(0x5C00, high_bits(7)), Operands{reg(3, 0), bracketed(Operands{reg(3, 3), reg(3,6), optional(id("lsl #0"))})}}, // T1 {"ldrh", 2, Pattern(0x8800, high_bits(5)), Operands{reg(3, 0), bracketed(Operands{reg(3, 3), imm(5, 6, 2)})}}, // T1 {"ldrh", 2, Pattern(0x8800, high_bits(5) + (0x1F << 6)).alternative_syntax(), Operands{reg(3, 0), bracketed(Operands{reg(3, 3)})}}, // T1 {"ldrh", 2, Pattern(0x5A00, high_bits(7)), Operands{reg(3, 0), bracketed(Operands{reg(3, 3), reg(3,6), optional(id("lsl #0"))})}}, // T1 {"ldrsb", 2, Pattern(0x5600, high_bits(7)), Operands{reg(3, 0), bracketed(Operands{reg(3, 3), reg(3,6), optional(id("lsl #0"))})}}, // T1 {"ldrsh", 2, Pattern(0x5E00, high_bits(7)), Operands{reg(3, 0), bracketed(Operands{reg(3, 3), reg(3,6), optional(id("lsl #0"))})}}, // T1 {"lsls", 2, Pattern(0x0000, high_bits(5)), Operands{reg(3, 0), reg(3, 3), imm(5, 6, 1, 0, {0})}}, // T1 {"lsls", 2, Pattern(0x4080, high_bits(10)), Operands{reg(3, 0), same_reg(0), reg(3, 3)}}, // T1 {"lsrs", 2, Pattern(0x0800, high_bits(5)), Operands{reg(3, 0), reg(3, 3), imm(5, 6, 1, 0, {0})}}, // T1 {"lsrs", 2, Pattern(0x40C0, high_bits(10)), Operands{reg(3, 0), same_reg(0), reg(3, 3)}}, // T1 {"movs", 2, Pattern(0x2000, high_bits(5)), Operands{reg(3, 0), imm(8, 0)}}, // T1 {"mov", 2, Pattern(0x4600, high_bits(8)), Operands{reg(0x87), reg(4, 3)}}, // T1 {"movs", 2, Pattern(0x0000, high_bits(10)), Operands{reg(3, 0), reg(3, 3)}}, // T2 {"movs", 2, Pattern(0x1000, high_bits(5)).alternative_syntax(), Operands{reg(3, 0), reg(3, 3), prefixed(imm(5, 6, 1, 32), "asr ")}}, // T1 {"movs", 2, Pattern(0x0000, high_bits(5)).alternative_syntax(), Operands{reg(3, 0), reg(3, 3), prefixed(imm(5, 6, 1, 0, {0}), "lsl ")}}, // T1 {"movs", 2, Pattern(0x0800, high_bits(5)).alternative_syntax(), Operands{reg(3, 0), reg(3, 3), prefixed(imm(5, 6, 1, 0, {0}), "lsr ")}}, // T1 {"movs", 2, Pattern(0x4100, high_bits(10)).alternative_syntax(), Operands{same_reg(1), reg(3, 0), prefixed(reg(3, 3), "asr ")}}, // T1 {"movs", 2, Pattern(0x4080, high_bits(10)).alternative_syntax(), Operands{reg(3, 0), same_reg(0), prefixed(reg(3, 3), "lsl ")}}, // T1 {"movs", 2, Pattern(0x40C0, high_bits(10)).alternative_syntax(), Operands{reg(3, 0), same_reg(0), prefixed(reg(3, 3), "lsr ")}}, // T1 {"movs", 2, Pattern(0x41C0, high_bits(10)).alternative_syntax(), Operands{reg(3, 0), same_reg(0), prefixed(reg(3, 3), "ror ")}}, // T1 {"mrs", 4, Pattern(0xF3EF8000, high_bits(20)), Operands{reg(4, 8), reg(8, 0)}}, // T1 {"msr", 4, Pattern(0xF3808800, 0xFFF0FF00), Operands{reg(4, 16), reg(8, 0)}}, // T1 {"muls", 2, Pattern(0x4340, high_bits(10)), Operands{optional(same_reg(2)), reg(3, 3), reg(3, 0)}}, // T1 {"mvns", 2, Pattern(0x43C0, high_bits(10)), Operands{reg(3, 0), reg(3, 3)}}, // T1 {"neg", 2, Pattern(0x4240, high_bits(10)).alternative_syntax(), Operands{reg(3, 0), reg(3, 3)}}, // T1 {"neg", 2, Pattern(0x4240, high_bits(10)).alternative_syntax(), Operands{duplicate(reg(3, 0), 3)}}, // T1 {"nop", 2, Pattern{0xBF00, 0xFFFF}, {}}, // T1 {"orrs", 2, Pattern{0x4300, high_bits(10)}, Operands{optional(same_reg(1)), reg(3, 0), reg(3, 3)}}, // T1 {"pop", 2, Pattern{0xBC00, high_bits(7)}, Operands{reg_list(9, 0, {"r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "pc"})}}, // T1 {"push", 2, Pattern{0xB400, high_bits(7)}, Operands{reg_list(9, 0, {"r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "lr"})}}, // T1 {"rev", 2, Pattern{0xBA00, high_bits(10)}, Operands{reg(3, 0), reg(3, 3)}}, // T1 {"rev16", 2, Pattern{0xBA40, high_bits(10)}, Operands{reg(3, 0), reg(3, 3)}}, // T1 {"revsh", 2, Pattern{0xBAC0, high_bits(10)}, Operands{reg(3, 0), reg(3, 3)}}, // T1 {"rors", 2, Pattern(0x41C0, high_bits(10)), Operands{reg(3, 0), same_reg(0), reg(3, 3)}}, // T1 {"rsbs", 2, Pattern(0x4240, high_bits(10)), Operands{reg(3, 0), reg(3, 3), id("#0")}}, // T1 {"rsbs", 2, Pattern(0x4240, high_bits(10)).alternative_syntax(), Operands{duplicate(reg(3, 0), 3), id("#0")}}, // T1 {"sbcs", 2, Pattern(0x4180, high_bits(10)), Operands{optional(same_reg(1)), reg(3, 0), reg(3, 3)}}, // T1 {"sev", 2, Pattern(0xBF40, 0xFFFF), {}}, // T1 {"stm", 2, Pattern(0xC000, high_bits(5)), Operands{suffixed(reg(3, 8), "!"), reg_list(8, 0)}}, // T1 {"str", 2, Pattern(0x6000, high_bits(5)), Operands{reg(3, 0), bracketed(Operands{reg(3, 3), imm(5, 6, 4)})}}, // T1 {"str", 2, Pattern(0x6000, high_bits(5) + (0x1F << 6)).alternative_syntax(), Operands{reg(3, 0), bracketed(Operands{reg(3, 3)})}}, // T1 {"str", 2, Pattern(0x9000, high_bits(5)), Operands{reg(3, 8), bracketed(Operands{reg("sp"), imm(8, 0, 4)})}}, // T2 {"str", 2, Pattern(0x9000, high_bits(5) + 0xFF).alternative_syntax(), Operands{reg(3, 8), bracketed(Operands{reg("sp")})}}, // T2 {"str", 2, Pattern(0x5000, high_bits(7)), Operands{reg(3, 0), bracketed(Operands{reg(3, 3), reg(3,6), optional(id("lsl #0"))})}}, // T1 {"strb", 2, Pattern(0x7000, high_bits(5)), Operands{reg(3, 0), bracketed(Operands{reg(3, 3), imm(5, 6)})}}, // T1 {"strb", 2, Pattern(0x7000, high_bits(5) + (0x1F << 6)).alternative_syntax(), Operands{reg(3, 0), bracketed(Operands{reg(3, 3)})}}, // T1 {"strb", 2, Pattern(0x5400, high_bits(7)), Operands{reg(3, 0), bracketed(Operands{reg(3, 3), reg(3,6), optional(id("lsl #0"))})}}, // T1 {"strh", 2, Pattern(0x8000, high_bits(5)), Operands{reg(3, 0), bracketed(Operands{reg(3, 3), imm(5, 6, 2)})}}, // T1 {"strh", 2, Pattern(0x8000, high_bits(5) + (0x1F << 6)).alternative_syntax(), Operands{reg(3, 0), bracketed(Operands{reg(3, 3)})}}, // T1 {"strh", 2, Pattern(0x5200, high_bits(7)), Operands{reg(3, 0), bracketed(Operands{reg(3, 3), reg(3,6), optional(id("lsl #0"))})}}, // T1 {"subs", 2, Pattern(0x1E00, high_bits(7)), Operands{reg(3, 0), reg(3, 3), imm(3, 6)}}, // T1 {"subs", 2, Pattern(0x3800, high_bits(5)), Operands{optional(same_reg(1)), reg(3, 8), imm(8, 0)}}, // T2 {"subs", 2, Pattern(0x1A00, high_bits(7)), Operands{reg(3, 0), reg(3, 3), reg(6, 6)}}, // T1 {"subs", 2, Pattern(0x1A00, high_bits(7)).alternative_syntax(), Operands{duplicate(reg(3, 0), 3), reg(6, 6)}}, // T1 {"sub", 2, Pattern(0xB080, high_bits(9)), Operands{optional(reg("sp")), reg("sp"), imm(7, 0, 4)}}, // T1 {"svc", 2, Pattern(0xDF00, high_bits(8)), Operands{imm(8, 0)}}, // T1 {"sxtb", 2, Pattern(0xB240, high_bits(10)), Operands{reg(3, 0), reg(3, 3)}}, // T1 {"sxth", 2, Pattern(0xB200, high_bits(10)), Operands{reg(3, 0), reg(3, 3)}}, // T1 {"tst", 2, Pattern(0x4200, high_bits(10)), Operands{reg(3, 0), reg(3, 3)}}, // T1 {"udf", 2, Pattern(0xDE00, high_bits(8)), Operands{imm(8, 0)}}, // T1 {"uxtb", 2, Pattern(0xB2C0, high_bits(10)), Operands{reg(3, 0), reg(3, 3)}}, // T1 {"uxth", 2, Pattern(0xB280, high_bits(10)), Operands{reg(3, 0), reg(3, 3)}}, // T1 {"wfe", 2, Pattern(0xBF20, 0xFFFF), {}}, // T1 {"wfi", 2, Pattern(0xBF30, 0xFFFF), {}}, // T1 {"yield", 2, Pattern(0xBF10, 0xFFFF), {}}, // T1 }; };