From 7250bbe5ae2d2ee6b0334bc462aab73f7d8dac0e Mon Sep 17 00:00:00 2001 From: Roland Reichwein Date: Fri, 20 Nov 2020 10:59:18 +0100 Subject: Assembler bugfixes - tests run successfully now --- asm/assembler.cpp | 8 +++ asm/assembler.h | 41 +++++------ asm/intel64/add.cpp | 43 ++++++++++++ asm/intel64/encode.cpp | 8 +-- asm/parse.cpp | 181 +++++++++++++++++++++++++++++++++++++++++++------ tests/test-asm.cpp | 20 ++++-- 6 files changed, 252 insertions(+), 49 deletions(-) diff --git a/asm/assembler.cpp b/asm/assembler.cpp index b555125..4eb37f0 100644 --- a/asm/assembler.cpp +++ b/asm/assembler.cpp @@ -2,6 +2,14 @@ using namespace std::string_literals; +Asm::Args::Immediate32::Immediate32(const Asm::Args::Immediate64& imm64) +{ + if (imm64.value() < 0x100000000) + m_value = static_cast(imm64.value()); + else + throw std::runtime_error("Immediate32: Constructed from too big Immediate64"); +} + namespace { std::unordered_map ops; diff --git a/asm/assembler.h b/asm/assembler.h index ea23fbc..8cdaa31 100644 --- a/asm/assembler.h +++ b/asm/assembler.h @@ -28,18 +28,21 @@ public: { public: Immediate8(uint8_t value): m_value(value) {} - uint8_t value() {return m_value;} + uint8_t value() const {return m_value;} std::vector getCode() {return {m_value};}; private: uint8_t m_value; }; + class Immediate64; + class Immediate32 { public: Immediate32(uint32_t value): m_value(value) {} - uint32_t value() { return m_value; } + Immediate32(const Immediate64&); ///< Convert from Immediate64 if data is small enough + uint32_t value() const { return m_value; } std::vector getCode() { std::vector result(size_t(4)); *(reinterpret_cast(result.data())) = boost::endian::native_to_little(m_value); @@ -54,7 +57,7 @@ public: { public: Immediate64(uint64_t value): m_value(value) {} - uint64_t value() { return m_value; } + uint64_t value() const { return m_value; } std::vector getCode() { std::vector result(size_t(8)); *(reinterpret_cast(result.data())) = boost::endian::native_to_little(m_value); @@ -69,7 +72,7 @@ public: { public: Register8(const std::string& name): m_name(name) {} - std::string name() { return m_name; } + std::string name() const { return m_name; } private: std::string m_name; @@ -79,7 +82,7 @@ public: { public: Register32(const std::string& name): m_name(name) {} - std::string name() { return m_name; } + std::string name() const { return m_name; } private: std::string m_name; @@ -89,7 +92,7 @@ public: { public: Register64(const std::string& name): m_name(name) {} - std::string name() { return m_name; } + std::string name() const { return m_name; } private: std::string m_name; @@ -100,10 +103,10 @@ public: { public: Mem8Ptr64(const std::string& reg, int32_t offs = 0): m_reg(reg), m_offs(offs) {} - Mem8Ptr64(const std::string& reg, const std::string& reg2 = ""s, int32_t offs = 0): m_reg(reg), m_reg2(reg2), m_offs(offs) {} - std::string reg() { return m_reg; } - std::string reg2() { return m_reg2; } - int32_t offs() { return m_offs; } + Mem8Ptr64(const std::string& reg, const std::string& reg2, int32_t offs = 0): m_reg(reg), m_reg2(reg2), m_offs(offs) {} + std::string reg() const { return m_reg; } + std::string reg2() const { return m_reg2; } + int32_t offs() const { return m_offs; } private: std::string m_reg; @@ -116,10 +119,10 @@ public: { public: Mem32Ptr64(const std::string& reg, int32_t offs = 0): m_reg(reg), m_offs(offs) {} - Mem32Ptr64(const std::string& reg, const std::string& reg2 = ""s, int32_t offs = 0): m_reg(reg), m_reg2(reg2), m_offs(offs) {} - std::string reg() { return m_reg; } - std::string reg2() { return m_reg2; } - int32_t offs() { return m_offs; } + Mem32Ptr64(const std::string& reg, const std::string& reg2, int32_t offs = 0): m_reg(reg), m_reg2(reg2), m_offs(offs) {} + std::string reg() const { return m_reg; } + std::string reg2() const { return m_reg2; } + int32_t offs() const { return m_offs; } private: std::string m_reg; @@ -132,10 +135,10 @@ public: { public: Mem64Ptr64(const std::string& reg, int32_t offs = 0): m_reg(reg), m_offs(offs) {} - Mem64Ptr64(const std::string& reg, const std::string& reg2 = ""s, int32_t offs = 0): m_reg(reg), m_reg2(reg2), m_offs(offs) {} - std::string reg() { return m_reg; } - std::string reg2() { return m_reg2; } - int32_t offs() { return m_offs; } + Mem64Ptr64(const std::string& reg, const std::string& reg2, int32_t offs = 0): m_reg(reg), m_reg2(reg2), m_offs(offs) {} + std::string reg() const { return m_reg; } + std::string reg2() const { return m_reg2; } + int32_t offs() const { return m_offs; } private: std::string m_reg; @@ -147,7 +150,7 @@ public: { public: Label(const std::string& name): m_name(name) {} - std::string name() { return m_name; } + std::string name() const { return m_name; } private: std::string m_name; diff --git a/asm/intel64/add.cpp b/asm/intel64/add.cpp index 957c27f..07b14a1 100644 --- a/asm/intel64/add.cpp +++ b/asm/intel64/add.cpp @@ -28,12 +28,34 @@ Op_add::Op_add(const Asm::Args& args) { // add rax, imm32 machine_code = REX("W") + std::vector{ 0x05 } + std::any_cast(args[1]).getCode(); + } else if (args[0].type() == typeid(Asm::Args::Register8) && args[1].type() == typeid(Asm::Args::Register8)) { // add reg8, reg8 + machine_code = std::vector{ 0x00 } + ModRM(std::any_cast(args[1]).name(), std::any_cast(args[0]).name()); + + } else if (args[0].type() == typeid(Asm::Args::Register32) && args[1].type() == typeid(Asm::Args::Register32)) { // add reg32, reg32 + machine_code = std::vector{ 0x01 } + ModRM(std::any_cast(args[1]).name(), std::any_cast(args[0]).name()); + + } else if (args[0].type() == typeid(Asm::Args::Register64) && args[1].type() == typeid(Asm::Args::Register64)) { // add reg64, reg64 + machine_code = REX("W") + std::vector{ 0x01 } + ModRM(std::any_cast(args[1]).name(), std::any_cast(args[0]).name()); + } else if (args[0].type() == typeid(Asm::Args::Register32) && args[1].type() == typeid(Asm::Args::Mem32Ptr64)) { // add reg32, [reg64] machine_code = std::vector{ 0x03 } + ModRM(std::any_cast(args[0]).name(), std::any_cast(args[1]).reg()); } else if (args[0].type() == typeid(Asm::Args::Register64) && args[1].type() == typeid(Asm::Args::Mem64Ptr64)) { // add reg64, [reg64] machine_code = REX("W") + std::vector{ 0x03 } + ModRM(std::any_cast(args[0]).name(), std::any_cast(args[1]).reg()); + } else if (args[0].type() == typeid(Asm::Args::Mem8Ptr64) && args[1].type() == typeid(Asm::Args::Immediate8)) { // add [reg64], imm8 + machine_code = std::vector{ 0x80 } + ModRM("/0", std::any_cast(args[0]).reg()) + std::any_cast(args[1]).getCode(); + + } else if (args[0].type() == typeid(Asm::Args::Mem32Ptr64) && args[1].type() == typeid(Asm::Args::Immediate32)) { // add [reg64], imm32 + machine_code = std::vector{ 0x81 } + ModRM("/0", std::any_cast(args[0]).reg()) + std::any_cast(args[1]).getCode(); + + } else if (args[0].type() == typeid(Asm::Args::Mem64Ptr64) && args[1].type() == typeid(Asm::Args::Immediate32)) { // add qword ptr [reg64], imm32 (sign-extended) + machine_code = REX("W") + std::vector{ 0x81 } + ModRM("/0", std::any_cast(args[0]).reg()) + std::any_cast(args[1]).getCode(); + + } else if (args[0].type() == typeid(Asm::Args::Mem64Ptr64) && args[1].type() == typeid(Asm::Args::Immediate64)) { // add qword ptr [reg64], imm32 (sign-extended) - reduce imm64 to imm32! + Asm::Args::Immediate32 imm32{std::any_cast(args[1])}; + machine_code = REX("W") + std::vector{ 0x81 } + ModRM("/0", std::any_cast(args[0]).reg()) + imm32.getCode(); + } else { throw std::runtime_error("Unimplemented: add "s + args[0].type().name() + " "s + args[1].type().name()); } @@ -48,10 +70,31 @@ bool registered { registerOp(mangleName("add"), [](const Asm::Args& args) -> std::shared_ptr{ return std::make_shared(args); }) && + registerOp(mangleName("add"), [](const Asm::Args& args) -> std::shared_ptr{ + return std::make_shared(args); + }) && + registerOp(mangleName("add"), [](const Asm::Args& args) -> std::shared_ptr{ + return std::make_shared(args); + }) && + registerOp(mangleName("add"), [](const Asm::Args& args) -> std::shared_ptr{ + return std::make_shared(args); + }) && registerOp(mangleName("add"), [](const Asm::Args& args) -> std::shared_ptr{ return std::make_shared(args); }) && registerOp(mangleName("add"), [](const Asm::Args& args) -> std::shared_ptr{ + return std::make_shared(args); + }) && + registerOp(mangleName("add"), [](const Asm::Args& args) -> std::shared_ptr{ + return std::make_shared(args); + }) && + registerOp(mangleName("add"), [](const Asm::Args& args) -> std::shared_ptr{ + return std::make_shared(args); + }) && + registerOp(mangleName("add"), [](const Asm::Args& args) -> std::shared_ptr{ + return std::make_shared(args); + }) && + registerOp(mangleName("add"), [](const Asm::Args& args) -> std::shared_ptr{ // automatically converted to 32-bit (sign extended) if small enough. Intel doesn't support ADD ..., imm64 return std::make_shared(args); }) }; diff --git a/asm/intel64/encode.cpp b/asm/intel64/encode.cpp index 51ca7a0..1b35d89 100644 --- a/asm/intel64/encode.cpp +++ b/asm/intel64/encode.cpp @@ -14,7 +14,7 @@ namespace { std::shared_ptr makeLoadValue(const FlowGraph::Data& data, const FlowGraph::Graph& graph) { if (data.type() != FlowGraph::DataType::Int) { - std::runtime_error("Bad type for operand: "s + std::to_string(int(data.type()))); + throw std::runtime_error("Bad type for operand: "s + std::to_string(int(data.type()))); } if (!data.storage()) @@ -41,7 +41,7 @@ std::shared_ptr makeLoadValue(const FlowGraph::Data& data, const FlowGraph:: std::shared_ptr makeStoreValue(const FlowGraph::Data& data, const FlowGraph::Graph& graph) { if (data.type() != FlowGraph::DataType::Int) { - std::runtime_error("Bad type for operand: "s + std::to_string(int(data.type()))); + throw std::runtime_error("Bad type for operand: "s + std::to_string(int(data.type()))); } if (!data.storage()) @@ -60,7 +60,7 @@ std::shared_ptr makeStoreValue(const FlowGraph::Data& data, const FlowGraph: std::shared_ptr makeAddValue(const FlowGraph::Data& data, const FlowGraph::Graph& graph) { if (data.type() != FlowGraph::DataType::Int) { - std::runtime_error("Bad type for operand: "s + std::to_string(int(data.type()))); + throw std::runtime_error("Bad type for operand: "s + std::to_string(int(data.type()))); } if (!data.storage()) @@ -87,7 +87,7 @@ std::shared_ptr makeAddValue(const FlowGraph::Data& data, const FlowGraph::G std::vector> makeMulValue(const FlowGraph::Data& data, const FlowGraph::Graph& graph) { if (data.type() != FlowGraph::DataType::Int) { - std::runtime_error("Bad type for operand: "s + std::to_string(int(data.type()))); + throw std::runtime_error("Bad type for operand: "s + std::to_string(int(data.type()))); } if (!data.storage()) diff --git a/asm/parse.cpp b/asm/parse.cpp index e62f585..8f6f831 100644 --- a/asm/parse.cpp +++ b/asm/parse.cpp @@ -5,6 +5,7 @@ #include #include +#include #include #include @@ -43,11 +44,13 @@ namespace { } } + std::string reg_re{"[[:alpha:]][[:alnum:]]*"}; + // parse optional label bool parseLabel(const std::string& asm_code, size_t& pos, std::string& result) { parseWhitespace(asm_code, pos); - std::regex re_label("([[:alpha:]]([[:alnum:]])+):", std::regex_constants::ECMAScript); + std::regex re_label("("s + reg_re + "):"s, std::regex_constants::ECMAScript); std::smatch match; if (std::regex_search(asm_code.cbegin() + pos, asm_code.cend(), match, re_label, std::regex_constants::match_continuous)) { @@ -64,7 +67,7 @@ namespace { bool parseMnemonic(const std::string& asm_code, size_t& pos, std::string& result) { parseWhitespace(asm_code, pos); - std::regex re_mnemonic("[[:alpha:]]([[:alnum:]])+", std::regex_constants::ECMAScript); + std::regex re_mnemonic(reg_re, std::regex_constants::ECMAScript); std::smatch match; if (std::regex_search(asm_code.cbegin() + pos, asm_code.cend(), match, re_mnemonic, std::regex_constants::match_continuous)) { @@ -77,10 +80,10 @@ namespace { return false; } - bool parseRegister8(const std::string& asm_code, size_t& pos, std::any& result) { + bool parseRegister8(const std::string& asm_code, size_t& pos, std::any& result, size_t& size_hint) { parseWhitespace(asm_code, pos); - std::regex re_name("[[:alpha:]]+", std::regex_constants::ECMAScript); + std::regex re_name(reg_re, std::regex_constants::ECMAScript); std::smatch match; if (std::regex_search(asm_code.cbegin() + pos, asm_code.cend(), match, re_name, std::regex_constants::match_continuous)) { @@ -88,6 +91,7 @@ namespace { if (reg8.contains(name)) { pos += name.size(); result = Asm::Args::Register8(name); + size_hint = 8; return true; } } @@ -95,10 +99,10 @@ namespace { return false; } - bool parseRegister32(const std::string& asm_code, size_t& pos, std::any& result) { + bool parseRegister32(const std::string& asm_code, size_t& pos, std::any& result, size_t& size_hint) { parseWhitespace(asm_code, pos); - std::regex re_name("[[:alpha:]]+", std::regex_constants::ECMAScript); + std::regex re_name(reg_re, std::regex_constants::ECMAScript); std::smatch match; if (std::regex_search(asm_code.cbegin() + pos, asm_code.cend(), match, re_name, std::regex_constants::match_continuous)) { @@ -106,6 +110,7 @@ namespace { if (reg32.contains(name)) { pos += name.size(); result = Asm::Args::Register32(name); + size_hint = 32; return true; } } @@ -113,10 +118,10 @@ namespace { return false; } - bool parseRegister64(const std::string& asm_code, size_t& pos, std::any& result) { + bool parseRegister64(const std::string& asm_code, size_t& pos, std::any& result, size_t& size_hint) { parseWhitespace(asm_code, pos); - std::regex re_name("[[:alpha:]]+", std::regex_constants::ECMAScript); + std::regex re_name(reg_re, std::regex_constants::ECMAScript); std::smatch match; if (std::regex_search(asm_code.cbegin() + pos, asm_code.cend(), match, re_name, std::regex_constants::match_continuous)) { @@ -124,6 +129,64 @@ namespace { if (reg64.contains(name)) { pos += name.size(); result = Asm::Args::Register64(name); + size_hint = 64; + return true; + } + } + + return false; + } + + bool parseMem8Ptr64(const std::string& asm_code, size_t& pos, std::any& result, size_t& size_hint) { + parseWhitespace(asm_code, pos); + + std::regex re_name("byte ptr *\\[("s + reg_re + ")\\]"s, std::regex_constants::ECMAScript); + + std::smatch match; + if (std::regex_search(asm_code.cbegin() + pos, asm_code.cend(), match, re_name, std::regex_constants::match_continuous)) { + std::string name {boost::algorithm::to_lower_copy(match[1].str())}; + if (reg64.contains(name)) { + pos += match[0].length(); + result = Asm::Args::Mem8Ptr64{name}; + size_hint = 8; + return true; + } + } + + return false; + } + + bool parseMem32Ptr64(const std::string& asm_code, size_t& pos, std::any& result, size_t& size_hint) { + parseWhitespace(asm_code, pos); + + std::regex re_name("(dword ptr *)?\\[("s + reg_re + ")\\]"s, std::regex_constants::ECMAScript); + + std::smatch match; + if (std::regex_search(asm_code.cbegin() + pos, asm_code.cend(), match, re_name, std::regex_constants::match_continuous)) { + std::string name {boost::algorithm::to_lower_copy(match[2].str())}; + if (reg64.contains(name)) { + pos += match[0].length(); + result = Asm::Args::Mem32Ptr64(name); + size_hint = 32; + return true; + } + } + + return false; + } + + bool parseMem64Ptr64(const std::string& asm_code, size_t& pos, std::any& result, size_t& size_hint) { + parseWhitespace(asm_code, pos); + + std::regex re_name("qword ptr *\\[("s + reg_re + ")\\]"s, std::regex_constants::ECMAScript); + + std::smatch match; + if (std::regex_search(asm_code.cbegin() + pos, asm_code.cend(), match, re_name, std::regex_constants::match_continuous)) { + std::string name {boost::algorithm::to_lower_copy(match[1].str())}; + if (reg64.contains(name)) { + pos += match[0].length(); + result = Asm::Args::Mem64Ptr64(name); + size_hint = 64; return true; } } @@ -131,7 +194,37 @@ namespace { return false; } - bool parseImmediate32(const std::string& asm_code, size_t& pos, std::any& result) { + bool parseImmediate8(const std::string& asm_code, size_t& pos, std::any& result, size_t& size_hint) { + if (size_hint != 8) + return false; + + parseWhitespace(asm_code, pos); + + std::regex re_name("[[:digit:]]+|0x[[:xdigit:]]+", std::regex_constants::ECMAScript); + + std::smatch match; + if (std::regex_search(asm_code.cbegin() + pos, asm_code.cend(), match, re_name, std::regex_constants::match_continuous)) { + int32_t value{}; + try { + value = stoll(match[0]); + } catch (...) { + throw std::runtime_error("Assembler parse error: Bad immediate: "s + match[0].str()); + } + if (value < -128 || value > 255) + throw std::runtime_error("Assembler parse error: Bad 8 bit immediate: "s + match[0].str()); + + pos += match[0].length(); + result = Asm::Args::Immediate8(static_cast(value)); + return true; + } + + return false; + } + + bool parseImmediate32(const std::string& asm_code, size_t& pos, std::any& result, size_t& size_hint) { + if (size_hint != 32 && size_hint != 0) + return false; + parseWhitespace(asm_code, pos); std::regex re_name("[[:digit:]]+|0x[[:xdigit:]]+", std::regex_constants::ECMAScript); @@ -152,18 +245,54 @@ namespace { return false; } + bool parseImmediate64(const std::string& asm_code, size_t& pos, std::any& result, size_t& size_hint) { + if (size_hint != 64) + return false; + + parseWhitespace(asm_code, pos); + + std::regex re_name("[[:digit:]]+|0x[[:xdigit:]]+", std::regex_constants::ECMAScript); + + std::smatch match; + if (std::regex_search(asm_code.cbegin() + pos, asm_code.cend(), match, re_name, std::regex_constants::match_continuous)) { + int64_t value{}; + try { + value = stoll(match[0]); + } catch (...) { + throw std::runtime_error("Assembler parse error: Bad immediate: "s + match[0].str()); + } + + pos += match[0].length(); + result = Asm::Args::Immediate64(static_cast(value)); + return true; + } + + return false; + } + // parse optional single operand - bool parseOperand(const std::string& asm_code, size_t& pos, std::any& result) { + bool parseOperand(const std::string& asm_code, size_t& pos, std::any& result, size_t& size_hint) { parseWhitespace(asm_code, pos); - if (parseRegister8(asm_code, pos, result)) + if (parseRegister8(asm_code, pos, result, size_hint)) + return true; + if (parseRegister32(asm_code, pos, result, size_hint)) + return true; + if (parseRegister64(asm_code, pos, result, size_hint)) return true; - if (parseRegister32(asm_code, pos, result)) + + if (parseMem8Ptr64(asm_code, pos, result, size_hint)) + return true; + if (parseMem32Ptr64(asm_code, pos, result, size_hint)) return true; - if (parseRegister64(asm_code, pos, result)) + if (parseMem64Ptr64(asm_code, pos, result, size_hint)) return true; - if (parseImmediate32(asm_code, pos, result)) + if (parseImmediate8(asm_code, pos, result, size_hint)) + return true; + if (parseImmediate32(asm_code, pos, result, size_hint)) + return true; + if (parseImmediate64(asm_code, pos, result, size_hint)) return true; return false; @@ -172,12 +301,13 @@ namespace { // parse optional multiple operands, separated by commas void parseOperands(const std::string& asm_code, size_t& pos, Asm::Args& result) { std::any operand; - if (parseOperand(asm_code, pos, operand)) { + size_t size_hint{0}; // in bits, 0=no hint + if (parseOperand(asm_code, pos, operand, size_hint)) { result.push_back(operand); parseWhitespace(asm_code, pos); while (pos < asm_code.size() && asm_code[pos] == ',') { pos++; - if (parseOperand(asm_code, pos, operand)) { + if (parseOperand(asm_code, pos, operand, size_hint)) { result.push_back(operand); } else { throw std::runtime_error("Assembler error: expected operand after comma"); @@ -223,20 +353,27 @@ namespace { // all optional: // label: mnemonic operands... ;comment - std::string result_string; - if (parseLabel(asm_code, pos, result_string)) - result.emplace_back(std::make_shared