From 1fae63de23320a1663b7c591e247ad81852ab6dc Mon Sep 17 00:00:00 2001 From: Roland Reichwein Date: Sun, 22 Nov 2020 13:00:06 +0100 Subject: Support 16-bit short --- asm/assembler.cpp | 182 +++++++++++++++++++++++++++++++++++++++++++++---- asm/assembler.h | 77 ++++++++++++++------- asm/intel64/codes.cpp | 9 ++- asm/intel64/codes.h | 2 + asm/intel64/encode.cpp | 17 +++-- asm/intel64/mov.cpp | 8 +++ asm/parse.cpp | 42 ++++++++++++ 7 files changed, 287 insertions(+), 50 deletions(-) (limited to 'asm') diff --git a/asm/assembler.cpp b/asm/assembler.cpp index 35e971b..88481bc 100644 --- a/asm/assembler.cpp +++ b/asm/assembler.cpp @@ -1,7 +1,38 @@ #include "assembler.h" +#include "byteorder.h" + using namespace std::string_literals; +Asm::Args::Immediate8::Immediate8(uint8_t value): m_value(value) +{} + +uint8_t Asm::Args::Immediate8::value() const +{ + return m_value; +} + +std::vector Asm::Args::Immediate8::getCode() const +{ + return {m_value}; +} + +Asm::Args::Immediate16::Immediate16(uint16_t value): m_value(value) +{} + +uint16_t Asm::Args::Immediate16::value() const +{ + return m_value; +} + +std::vector Asm::Args::Immediate16::getCode() const +{ + return endian::to_little(m_value); +} + +Asm::Args::Immediate32::Immediate32(uint32_t value): m_value(value) +{} + Asm::Args::Immediate32::Immediate32(const Asm::Args::Immediate64& imm64) { if (imm64.value() < 0x100000000) @@ -10,23 +41,144 @@ Asm::Args::Immediate32::Immediate32(const Asm::Args::Immediate64& imm64) throw std::runtime_error("Immediate32: Constructed from too big Immediate64"); } -Asm::Args::Mem8Ptr64::Mem8Ptr64(const std::string& reg, int32_t offs): m_reg(reg), m_offs(offs) {} -Asm::Args::Mem8Ptr64::Mem8Ptr64(const std::string& reg, const std::string& reg2, int32_t offs): m_reg(reg), m_reg2(reg2), m_offs(offs) {} -std::string Asm::Args::Mem8Ptr64::reg() const { return "["s + m_reg + "]"s; } -std::string Asm::Args::Mem8Ptr64::reg2() const { return "["s + m_reg2 + "]"s; } -int32_t Asm::Args::Mem8Ptr64::offs() const { return m_offs; } +uint32_t Asm::Args::Immediate32::value() const +{ + return m_value; +} + +std::vector Asm::Args::Immediate32::getCode() const +{ + return endian::to_little(m_value); +} + +Asm::Args::Immediate64::Immediate64(uint64_t value): m_value(value) +{} + +uint64_t Asm::Args::Immediate64::value() const +{ + return m_value; +} + +std::vector Asm::Args::Immediate64::getCode() const +{ + return endian::to_little(m_value); +} + +Asm::Args::Register8::Register8(const std::string& name): m_name(name) +{} + +std::string Asm::Args::Register8::name() const +{ + return m_name; +} + +Asm::Args::Register16::Register16(const std::string& name): m_name(name) +{} + +std::string Asm::Args::Register16::name() const +{ + return m_name; +} + +Asm::Args::Register32::Register32(const std::string& name): m_name(name) +{} + +std::string Asm::Args::Register32::name() const +{ + return m_name; +} + +Asm::Args::Register64::Register64(const std::string& name): m_name(name) +{} + +std::string Asm::Args::Register64::name() const +{ + return m_name; +} + +Asm::Args::Mem8Ptr64::Mem8Ptr64(const std::string& reg, int32_t offs): m_reg(reg), m_offs(offs) +{} + +Asm::Args::Mem8Ptr64::Mem8Ptr64(const std::string& reg, const std::string& reg2, int32_t offs): m_reg(reg), m_reg2(reg2), m_offs(offs) +{} + +std::string Asm::Args::Mem8Ptr64::reg() const +{ + return "["s + m_reg + "]"s; +} + +std::string Asm::Args::Mem8Ptr64::reg2() const +{ + return "["s + m_reg2 + "]"s; +} + +int32_t Asm::Args::Mem8Ptr64::offs() const +{ + return m_offs; +} + +Asm::Args::Mem16Ptr64::Mem16Ptr64(const std::string& reg, int32_t offs): m_reg(reg), m_offs(offs) +{} + +Asm::Args::Mem16Ptr64::Mem16Ptr64(const std::string& reg, const std::string& reg2, int32_t offs): m_reg(reg), m_reg2(reg2), m_offs(offs) +{} + +std::string Asm::Args::Mem16Ptr64::reg() const +{ + return "["s + m_reg + "]"s; +} -Asm::Args::Mem32Ptr64::Mem32Ptr64(const std::string& reg, int32_t offs): m_reg(reg), m_offs(offs) {} -Asm::Args::Mem32Ptr64::Mem32Ptr64(const std::string& reg, const std::string& reg2, int32_t offs): m_reg(reg), m_reg2(reg2), m_offs(offs) {} -std::string Asm::Args::Mem32Ptr64::reg() const { return "["s + m_reg + "]"s; } -std::string Asm::Args::Mem32Ptr64::reg2() const { return "["s + m_reg2 + "]"s; } -int32_t Asm::Args::Mem32Ptr64::offs() const { return m_offs; } +std::string Asm::Args::Mem16Ptr64::reg2() const +{ + return "["s + m_reg2 + "]"s; +} + +int32_t Asm::Args::Mem16Ptr64::offs() const +{ + return m_offs; +} -Asm::Args::Mem64Ptr64::Mem64Ptr64(const std::string& reg, int32_t offs): m_reg(reg), m_offs(offs) {} -Asm::Args::Mem64Ptr64::Mem64Ptr64(const std::string& reg, const std::string& reg2, int32_t offs): m_reg(reg), m_reg2(reg2), m_offs(offs) {} -std::string Asm::Args::Mem64Ptr64::reg() const { return "["s + m_reg + "]"s; } -std::string Asm::Args::Mem64Ptr64::reg2() const { return "["s + m_reg2 + "]"s; } -int32_t Asm::Args::Mem64Ptr64::offs() const { return m_offs; } +Asm::Args::Mem32Ptr64::Mem32Ptr64(const std::string& reg, int32_t offs): m_reg(reg), m_offs(offs) +{} + +Asm::Args::Mem32Ptr64::Mem32Ptr64(const std::string& reg, const std::string& reg2, int32_t offs): m_reg(reg), m_reg2(reg2), m_offs(offs) +{} + +std::string Asm::Args::Mem32Ptr64::reg() const +{ + return "["s + m_reg + "]"s; +} + +std::string Asm::Args::Mem32Ptr64::reg2() const +{ + return "["s + m_reg2 + "]"s; +} + +int32_t Asm::Args::Mem32Ptr64::offs() const +{ + return m_offs; +} + +Asm::Args::Mem64Ptr64::Mem64Ptr64(const std::string& reg, int32_t offs): m_reg(reg), m_offs(offs) +{} + +Asm::Args::Mem64Ptr64::Mem64Ptr64(const std::string& reg, const std::string& reg2, int32_t offs): m_reg(reg), m_reg2(reg2), m_offs(offs) +{} + +std::string Asm::Args::Mem64Ptr64::reg() const +{ + return "["s + m_reg + "]"s; +} + +std::string Asm::Args::Mem64Ptr64::reg2() const +{ + return "["s + m_reg2 + "]"s; +} + +int32_t Asm::Args::Mem64Ptr64::offs() const +{ + return m_offs; +} namespace { diff --git a/asm/assembler.h b/asm/assembler.h index 1fdc658..12e7c4e 100644 --- a/asm/assembler.h +++ b/asm/assembler.h @@ -6,8 +6,6 @@ #include "../minicc.h" -#include - #include #include #include @@ -27,27 +25,34 @@ public: class Immediate8 { public: - Immediate8(uint8_t value): m_value(value) {} - uint8_t value() const {return m_value;} - std::vector getCode() {return {m_value};}; + Immediate8(uint8_t value); + uint8_t value() const; + std::vector getCode() const; private: uint8_t m_value; }; + class Immediate16 + { + public: + Immediate16(uint16_t value); + uint16_t value() const; + std::vector getCode() const; + + private: + uint16_t m_value; + }; + class Immediate64; class Immediate32 { public: - Immediate32(uint32_t value): m_value(value) {} + Immediate32(uint32_t value); Immediate32(const Immediate64&); ///< Convert from Immediate64 if data is small enough - uint32_t value() const { return m_value; } - std::vector getCode() { - std::vector result(size_t(4)); - *(reinterpret_cast(result.data())) = boost::endian::native_to_little(m_value); - return result; - }; + uint32_t value() const; + std::vector getCode() const; private: uint32_t m_value; @@ -56,13 +61,9 @@ public: class Immediate64 { public: - Immediate64(uint64_t value): m_value(value) {} - uint64_t value() const { return m_value; } - std::vector getCode() { - std::vector result(size_t(8)); - *(reinterpret_cast(result.data())) = boost::endian::native_to_little(m_value); - return result; - }; + Immediate64(uint64_t value); + uint64_t value() const; + std::vector getCode() const; private: uint64_t m_value; @@ -71,8 +72,18 @@ public: class Register8 { public: - Register8(const std::string& name): m_name(name) {} - std::string name() const { return m_name; } + Register8(const std::string& name); + std::string name() const; + + private: + std::string m_name; + }; + + class Register16 + { + public: + Register16(const std::string& name); + std::string name() const; private: std::string m_name; @@ -81,8 +92,8 @@ public: class Register32 { public: - Register32(const std::string& name): m_name(name) {} - std::string name() const { return m_name; } + Register32(const std::string& name); + std::string name() const; private: std::string m_name; @@ -91,8 +102,8 @@ public: class Register64 { public: - Register64(const std::string& name): m_name(name) {} - std::string name() const { return m_name; } + Register64(const std::string& name); + std::string name() const; private: std::string m_name; @@ -114,6 +125,22 @@ public: int32_t m_offs; }; + // 64 bit Ptr to 16 bit Memory + class Mem16Ptr64 + { + public: + Mem16Ptr64(const std::string& reg, int32_t offs = 0); + Mem16Ptr64(const std::string& reg, const std::string& reg2, int32_t offs = 0); + std::string reg() const; + std::string reg2() const; + int32_t offs() const; + + private: + std::string m_reg; + std::string m_reg2; + int32_t m_offs; + }; + // 64 bit Ptr to 32 bit Memory class Mem32Ptr64 { diff --git a/asm/intel64/codes.cpp b/asm/intel64/codes.cpp index 76108a1..18980bd 100644 --- a/asm/intel64/codes.cpp +++ b/asm/intel64/codes.cpp @@ -1,5 +1,6 @@ #include "codes.h" +#include "byteorder.h" #include "minicc.h" #include "../operators.h" @@ -84,7 +85,7 @@ std::vector ModRM(const std::string& reg, const std::string& rm, int32_ displacement_bytes.push_back(uint8_t(disp)); } else { result |= 0b10000000; // 32 bit displacement - displacement_bytes += to_little_endian(disp); + displacement_bytes += endian::to_little(uint32_t(disp)); } result |= rm_bits; } else { // normal register access @@ -104,6 +105,12 @@ uint8_t RegNo(const std::string& reg) return index->second; } +// Switch from operand size 64bit to 32bit +std::vector OpSizePrefix() +{ + return {0x66}; +} + #if 0 prefixes{ "lock", 0xf0, diff --git a/asm/intel64/codes.h b/asm/intel64/codes.h index ba378a6..9d737e0 100644 --- a/asm/intel64/codes.h +++ b/asm/intel64/codes.h @@ -15,3 +15,5 @@ std::vector ModRM(const std::string& reg, const std::string& rm, int32_ // Just the number of reg, e.g. for encoding inside primary opcode uint8_t RegNo(const std::string& reg); + +std::vector OpSizePrefix(); diff --git a/asm/intel64/encode.cpp b/asm/intel64/encode.cpp index 62101f3..a51854a 100644 --- a/asm/intel64/encode.cpp +++ b/asm/intel64/encode.cpp @@ -3,10 +3,9 @@ #include "asm/assembler.h" #include "asm/parse.h" +#include "byteorder.h" #include "minicc.h" -#include - #include namespace { @@ -26,7 +25,7 @@ std::shared_ptr makeLoadValue(const FlowGraph::Data& data, const FlowGraph:: if (value.value().size() < sizeof(uint32_t)) throw std::runtime_error("ICE: Int data from operand needs at least 4 bytes, got "s + std::to_string(value.value().size())); - uint32_t immediate = from_little_endian(value.value()); + uint32_t immediate = endian::from_little32(value.value()); return makeOp("mov", Asm::Args{{Asm::Args::Register32("eax"), Asm::Args::Immediate32(immediate)}}); } else if (typeid(data_storage) == typeid(FlowGraph::TemporaryStorage)) { @@ -72,7 +71,7 @@ std::shared_ptr makeAddValue(const FlowGraph::Data& data, const FlowGraph::G if (value.value().size() < sizeof(uint32_t)) throw std::runtime_error("ICE: Int data from operand needs at least 4 bytes, got "s + std::to_string(value.value().size())); - uint32_t immediate = from_little_endian(value.value()); + uint32_t immediate = endian::from_little32(value.value()); return makeOp("add", Asm::Args{{Asm::Args::Register32("eax"), Asm::Args::Immediate32(immediate)}}); } else if (typeid(data_storage) == typeid(FlowGraph::TemporaryStorage)) { @@ -99,7 +98,7 @@ std::vector> makeMulValue(const FlowGraph::Data& data, co if (value.value().size() < sizeof(uint32_t)) throw std::runtime_error("ICE: Int data from operand needs at least 4 bytes, got "s + std::to_string(value.value().size())); - uint32_t immediate = from_little_endian(value.value()); + uint32_t immediate = endian::from_little32(value.value()); return {{ makeOp("mov", Asm::Args{{Asm::Args::Register32("ebx"), Asm::Args::Immediate32(immediate)}}), @@ -129,7 +128,7 @@ std::vector> makeDivValue(const FlowGraph::Data& data, co if (value.value().size() < sizeof(uint32_t)) throw std::runtime_error("ICE: Int data from operand needs at least 4 bytes, got "s + std::to_string(value.value().size())); - uint32_t immediate = from_little_endian(value.value()); + uint32_t immediate = endian::from_little32(value.value()); return {{ makeOp("mov", Asm::Args{{Asm::Args::Register32("ebx"), Asm::Args::Immediate32(immediate)}}), @@ -211,12 +210,12 @@ void Asm::toMachineCode(const FlowGraph::Graph& graph, Segment& segment) } else if (typeid(node_deref) == typeid(FlowGraph::CreateScopeOp)) { //FlowGraph::CreateScopeOp& op {dynamic_cast(*node)}; // TODO: Create stack frame - //segment.push_back(makeOp("push", Asm::Args{{Asm::Args::Register64("rbp")}})); - //segment.push_back(makeOp("mov", Asm::Args{{Asm::Args::Register64("rbp"), Asm::Args::Register64("rsp")}})); + segment.push_back(makeOp("push", Asm::Args{{Asm::Args::Register64("rbp")}})); + segment.push_back(makeOp("mov", Asm::Args{{Asm::Args::Register64("rbp"), Asm::Args::Register64("rsp")}})); } else if (typeid(node_deref) == typeid(FlowGraph::DestroyScopeOp)) { //FlowGraph::DestroyScopeOp& op {dynamic_cast(*node)}; // TODO: Destroy stack frame - //segment.push_back(makeOp("pop", Asm::Args{{Asm::Args::Register64("rbp")}})); + segment.push_back(makeOp("pop", Asm::Args{{Asm::Args::Register64("rbp")}})); segment.push_back(makeLoadValue(graph.lastOp()->destination(), graph)); // TODO: Just get last operation result to eax for now segment.push_back(makeOp("xor", Asm::Args{{Asm::Args::Register64("rdi"), Asm::Args::Register64("rdi")}})); diff --git a/asm/intel64/mov.cpp b/asm/intel64/mov.cpp index 805675a..9382a76 100644 --- a/asm/intel64/mov.cpp +++ b/asm/intel64/mov.cpp @@ -16,6 +16,11 @@ Op_mov::Op_mov(const Asm::Args& args) machine_code = std::vector{ 0x88 } + ModRM(std::any_cast(args[1]).name(), std::any_cast(args[0]).name()); + } else if (args[0].type() == typeid(Asm::Args::Register16) && args[1].type() == typeid(Asm::Args::Register16)) { // mov reg16, reg16 + // r/m16, r16: ModRM:r/m (w), ModRM:reg (r) + machine_code = OpSizePrefix() + std::vector{ 0x89 } + + ModRM(std::any_cast(args[1]).name(), std::any_cast(args[0]).name()); + } else if (args[0].type() == typeid(Asm::Args::Register32) && args[1].type() == typeid(Asm::Args::Register32)) { // mov reg32, reg32 // r/m32, r32: ModRM:r/m (w), ModRM:reg (r) machine_code = std::vector{ 0x89 } + @@ -79,6 +84,9 @@ bool registered { registerOp(mangleName("mov"), [](const Asm::Args& args) -> std::shared_ptr{ return std::make_shared(args); }) && + registerOp(mangleName("mov"), [](const Asm::Args& args) -> std::shared_ptr{ + return std::make_shared(args); + }) && registerOp(mangleName("mov"), [](const Asm::Args& args) -> std::shared_ptr{ return std::make_shared(args); }) && diff --git a/asm/parse.cpp b/asm/parse.cpp index 28e79f3..a3156c2 100644 --- a/asm/parse.cpp +++ b/asm/parse.cpp @@ -106,6 +106,25 @@ namespace { return false; } + bool parseRegister16(const std::string& asm_code, size_t& pos, std::any& result, size_t& size_hint) { + parseWhitespace(asm_code, pos); + + std::regex re_name(reg_re, std::regex_constants::ECMAScript); + + std::smatch match; + if (std::regex_search(asm_code.cbegin() + pos, asm_code.cend(), match, re_name, std::regex_constants::match_continuous)) { + std::string name {boost::algorithm::to_lower_copy(match[0].str())}; + if (reg16.contains(name)) { + pos += name.size(); + result = Asm::Args::Register16(name); + size_hint = 16; + return true; + } + } + + return false; + } + bool parseRegister32(const std::string& asm_code, size_t& pos, std::any& result, size_t& size_hint) { parseWhitespace(asm_code, pos); @@ -163,6 +182,25 @@ namespace { return false; } + bool parseMem16Ptr64(const std::string& asm_code, size_t& pos, std::any& result, size_t& size_hint) { + parseWhitespace(asm_code, pos); + + std::regex re_name("(word ptr *)?\\[("s + reg_re + ")\\]"s, std::regex_constants::ECMAScript); + + std::smatch match; + if (std::regex_search(asm_code.cbegin() + pos, asm_code.cend(), match, re_name, std::regex_constants::match_continuous)) { + std::string name {boost::algorithm::to_lower_copy(match[2].str())}; + if (reg16.contains(name)) { + pos += match[0].length(); + result = Asm::Args::Mem16Ptr64(name); + size_hint = 16; + return true; + } + } + + return false; + } + bool parseMem32Ptr64(const std::string& asm_code, size_t& pos, std::any& result, size_t& size_hint) { parseWhitespace(asm_code, pos); @@ -283,6 +321,8 @@ namespace { if (parseRegister8(asm_code, pos, result, size_hint)) return true; + if (parseRegister16(asm_code, pos, result, size_hint)) + return true; if (parseRegister32(asm_code, pos, result, size_hint)) return true; if (parseRegister64(asm_code, pos, result, size_hint)) @@ -290,6 +330,8 @@ namespace { if (parseMem8Ptr64(asm_code, pos, result, size_hint)) return true; + if (parseMem16Ptr64(asm_code, pos, result, size_hint)) + return true; if (parseMem32Ptr64(asm_code, pos, result, size_hint)) return true; if (parseMem64Ptr64(asm_code, pos, result, size_hint)) -- cgit v1.2.3