summaryrefslogtreecommitdiffhomepage
path: root/asm
diff options
context:
space:
mode:
authorRoland Reichwein <mail@reichwein.it>2020-11-22 13:00:06 +0100
committerRoland Reichwein <mail@reichwein.it>2020-11-22 13:00:06 +0100
commit1fae63de23320a1663b7c591e247ad81852ab6dc (patch)
treebfe539b7eeb5a4d7b68aa1bf853898d60e34f784 /asm
parent739297d8895b08a9ecd8e81b01b7ba8e8dc4a8ae (diff)
Support 16-bit short
Diffstat (limited to 'asm')
-rw-r--r--asm/assembler.cpp182
-rw-r--r--asm/assembler.h77
-rw-r--r--asm/intel64/codes.cpp9
-rw-r--r--asm/intel64/codes.h2
-rw-r--r--asm/intel64/encode.cpp17
-rw-r--r--asm/intel64/mov.cpp8
-rw-r--r--asm/parse.cpp42
7 files changed, 287 insertions, 50 deletions
diff --git a/asm/assembler.cpp b/asm/assembler.cpp
index 35e971b..88481bc 100644
--- a/asm/assembler.cpp
+++ b/asm/assembler.cpp
@@ -1,7 +1,38 @@
#include "assembler.h"
+#include "byteorder.h"
+
using namespace std::string_literals;
+Asm::Args::Immediate8::Immediate8(uint8_t value): m_value(value)
+{}
+
+uint8_t Asm::Args::Immediate8::value() const
+{
+ return m_value;
+}
+
+std::vector<uint8_t> Asm::Args::Immediate8::getCode() const
+{
+ return {m_value};
+}
+
+Asm::Args::Immediate16::Immediate16(uint16_t value): m_value(value)
+{}
+
+uint16_t Asm::Args::Immediate16::value() const
+{
+ return m_value;
+}
+
+std::vector<uint8_t> Asm::Args::Immediate16::getCode() const
+{
+ return endian::to_little(m_value);
+}
+
+Asm::Args::Immediate32::Immediate32(uint32_t value): m_value(value)
+{}
+
Asm::Args::Immediate32::Immediate32(const Asm::Args::Immediate64& imm64)
{
if (imm64.value() < 0x100000000)
@@ -10,23 +41,144 @@ Asm::Args::Immediate32::Immediate32(const Asm::Args::Immediate64& imm64)
throw std::runtime_error("Immediate32: Constructed from too big Immediate64");
}
-Asm::Args::Mem8Ptr64::Mem8Ptr64(const std::string& reg, int32_t offs): m_reg(reg), m_offs(offs) {}
-Asm::Args::Mem8Ptr64::Mem8Ptr64(const std::string& reg, const std::string& reg2, int32_t offs): m_reg(reg), m_reg2(reg2), m_offs(offs) {}
-std::string Asm::Args::Mem8Ptr64::reg() const { return "["s + m_reg + "]"s; }
-std::string Asm::Args::Mem8Ptr64::reg2() const { return "["s + m_reg2 + "]"s; }
-int32_t Asm::Args::Mem8Ptr64::offs() const { return m_offs; }
+uint32_t Asm::Args::Immediate32::value() const
+{
+ return m_value;
+}
+
+std::vector<uint8_t> Asm::Args::Immediate32::getCode() const
+{
+ return endian::to_little(m_value);
+}
+
+Asm::Args::Immediate64::Immediate64(uint64_t value): m_value(value)
+{}
+
+uint64_t Asm::Args::Immediate64::value() const
+{
+ return m_value;
+}
+
+std::vector<uint8_t> Asm::Args::Immediate64::getCode() const
+{
+ return endian::to_little(m_value);
+}
+
+Asm::Args::Register8::Register8(const std::string& name): m_name(name)
+{}
+
+std::string Asm::Args::Register8::name() const
+{
+ return m_name;
+}
+
+Asm::Args::Register16::Register16(const std::string& name): m_name(name)
+{}
+
+std::string Asm::Args::Register16::name() const
+{
+ return m_name;
+}
+
+Asm::Args::Register32::Register32(const std::string& name): m_name(name)
+{}
+
+std::string Asm::Args::Register32::name() const
+{
+ return m_name;
+}
+
+Asm::Args::Register64::Register64(const std::string& name): m_name(name)
+{}
+
+std::string Asm::Args::Register64::name() const
+{
+ return m_name;
+}
+
+Asm::Args::Mem8Ptr64::Mem8Ptr64(const std::string& reg, int32_t offs): m_reg(reg), m_offs(offs)
+{}
+
+Asm::Args::Mem8Ptr64::Mem8Ptr64(const std::string& reg, const std::string& reg2, int32_t offs): m_reg(reg), m_reg2(reg2), m_offs(offs)
+{}
+
+std::string Asm::Args::Mem8Ptr64::reg() const
+{
+ return "["s + m_reg + "]"s;
+}
+
+std::string Asm::Args::Mem8Ptr64::reg2() const
+{
+ return "["s + m_reg2 + "]"s;
+}
+
+int32_t Asm::Args::Mem8Ptr64::offs() const
+{
+ return m_offs;
+}
+
+Asm::Args::Mem16Ptr64::Mem16Ptr64(const std::string& reg, int32_t offs): m_reg(reg), m_offs(offs)
+{}
+
+Asm::Args::Mem16Ptr64::Mem16Ptr64(const std::string& reg, const std::string& reg2, int32_t offs): m_reg(reg), m_reg2(reg2), m_offs(offs)
+{}
+
+std::string Asm::Args::Mem16Ptr64::reg() const
+{
+ return "["s + m_reg + "]"s;
+}
-Asm::Args::Mem32Ptr64::Mem32Ptr64(const std::string& reg, int32_t offs): m_reg(reg), m_offs(offs) {}
-Asm::Args::Mem32Ptr64::Mem32Ptr64(const std::string& reg, const std::string& reg2, int32_t offs): m_reg(reg), m_reg2(reg2), m_offs(offs) {}
-std::string Asm::Args::Mem32Ptr64::reg() const { return "["s + m_reg + "]"s; }
-std::string Asm::Args::Mem32Ptr64::reg2() const { return "["s + m_reg2 + "]"s; }
-int32_t Asm::Args::Mem32Ptr64::offs() const { return m_offs; }
+std::string Asm::Args::Mem16Ptr64::reg2() const
+{
+ return "["s + m_reg2 + "]"s;
+}
+
+int32_t Asm::Args::Mem16Ptr64::offs() const
+{
+ return m_offs;
+}
-Asm::Args::Mem64Ptr64::Mem64Ptr64(const std::string& reg, int32_t offs): m_reg(reg), m_offs(offs) {}
-Asm::Args::Mem64Ptr64::Mem64Ptr64(const std::string& reg, const std::string& reg2, int32_t offs): m_reg(reg), m_reg2(reg2), m_offs(offs) {}
-std::string Asm::Args::Mem64Ptr64::reg() const { return "["s + m_reg + "]"s; }
-std::string Asm::Args::Mem64Ptr64::reg2() const { return "["s + m_reg2 + "]"s; }
-int32_t Asm::Args::Mem64Ptr64::offs() const { return m_offs; }
+Asm::Args::Mem32Ptr64::Mem32Ptr64(const std::string& reg, int32_t offs): m_reg(reg), m_offs(offs)
+{}
+
+Asm::Args::Mem32Ptr64::Mem32Ptr64(const std::string& reg, const std::string& reg2, int32_t offs): m_reg(reg), m_reg2(reg2), m_offs(offs)
+{}
+
+std::string Asm::Args::Mem32Ptr64::reg() const
+{
+ return "["s + m_reg + "]"s;
+}
+
+std::string Asm::Args::Mem32Ptr64::reg2() const
+{
+ return "["s + m_reg2 + "]"s;
+}
+
+int32_t Asm::Args::Mem32Ptr64::offs() const
+{
+ return m_offs;
+}
+
+Asm::Args::Mem64Ptr64::Mem64Ptr64(const std::string& reg, int32_t offs): m_reg(reg), m_offs(offs)
+{}
+
+Asm::Args::Mem64Ptr64::Mem64Ptr64(const std::string& reg, const std::string& reg2, int32_t offs): m_reg(reg), m_reg2(reg2), m_offs(offs)
+{}
+
+std::string Asm::Args::Mem64Ptr64::reg() const
+{
+ return "["s + m_reg + "]"s;
+}
+
+std::string Asm::Args::Mem64Ptr64::reg2() const
+{
+ return "["s + m_reg2 + "]"s;
+}
+
+int32_t Asm::Args::Mem64Ptr64::offs() const
+{
+ return m_offs;
+}
namespace {
diff --git a/asm/assembler.h b/asm/assembler.h
index 1fdc658..12e7c4e 100644
--- a/asm/assembler.h
+++ b/asm/assembler.h
@@ -6,8 +6,6 @@
#include "../minicc.h"
-#include <boost/endian/conversion.hpp>
-
#include <any>
#include <functional>
#include <iostream>
@@ -27,27 +25,34 @@ public:
class Immediate8
{
public:
- Immediate8(uint8_t value): m_value(value) {}
- uint8_t value() const {return m_value;}
- std::vector<uint8_t> getCode() {return {m_value};};
+ Immediate8(uint8_t value);
+ uint8_t value() const;
+ std::vector<uint8_t> getCode() const;
private:
uint8_t m_value;
};
+ class Immediate16
+ {
+ public:
+ Immediate16(uint16_t value);
+ uint16_t value() const;
+ std::vector<uint8_t> getCode() const;
+
+ private:
+ uint16_t m_value;
+ };
+
class Immediate64;
class Immediate32
{
public:
- Immediate32(uint32_t value): m_value(value) {}
+ Immediate32(uint32_t value);
Immediate32(const Immediate64&); ///< Convert from Immediate64 if data is small enough
- uint32_t value() const { return m_value; }
- std::vector<uint8_t> getCode() {
- std::vector<uint8_t> result(size_t(4));
- *(reinterpret_cast<uint32_t*>(result.data())) = boost::endian::native_to_little(m_value);
- return result;
- };
+ uint32_t value() const;
+ std::vector<uint8_t> getCode() const;
private:
uint32_t m_value;
@@ -56,13 +61,9 @@ public:
class Immediate64
{
public:
- Immediate64(uint64_t value): m_value(value) {}
- uint64_t value() const { return m_value; }
- std::vector<uint8_t> getCode() {
- std::vector<uint8_t> result(size_t(8));
- *(reinterpret_cast<uint64_t*>(result.data())) = boost::endian::native_to_little(m_value);
- return result;
- };
+ Immediate64(uint64_t value);
+ uint64_t value() const;
+ std::vector<uint8_t> getCode() const;
private:
uint64_t m_value;
@@ -71,8 +72,18 @@ public:
class Register8
{
public:
- Register8(const std::string& name): m_name(name) {}
- std::string name() const { return m_name; }
+ Register8(const std::string& name);
+ std::string name() const;
+
+ private:
+ std::string m_name;
+ };
+
+ class Register16
+ {
+ public:
+ Register16(const std::string& name);
+ std::string name() const;
private:
std::string m_name;
@@ -81,8 +92,8 @@ public:
class Register32
{
public:
- Register32(const std::string& name): m_name(name) {}
- std::string name() const { return m_name; }
+ Register32(const std::string& name);
+ std::string name() const;
private:
std::string m_name;
@@ -91,8 +102,8 @@ public:
class Register64
{
public:
- Register64(const std::string& name): m_name(name) {}
- std::string name() const { return m_name; }
+ Register64(const std::string& name);
+ std::string name() const;
private:
std::string m_name;
@@ -114,6 +125,22 @@ public:
int32_t m_offs;
};
+ // 64 bit Ptr to 16 bit Memory
+ class Mem16Ptr64
+ {
+ public:
+ Mem16Ptr64(const std::string& reg, int32_t offs = 0);
+ Mem16Ptr64(const std::string& reg, const std::string& reg2, int32_t offs = 0);
+ std::string reg() const;
+ std::string reg2() const;
+ int32_t offs() const;
+
+ private:
+ std::string m_reg;
+ std::string m_reg2;
+ int32_t m_offs;
+ };
+
// 64 bit Ptr to 32 bit Memory
class Mem32Ptr64
{
diff --git a/asm/intel64/codes.cpp b/asm/intel64/codes.cpp
index 76108a1..18980bd 100644
--- a/asm/intel64/codes.cpp
+++ b/asm/intel64/codes.cpp
@@ -1,5 +1,6 @@
#include "codes.h"
+#include "byteorder.h"
#include "minicc.h"
#include "../operators.h"
@@ -84,7 +85,7 @@ std::vector<uint8_t> ModRM(const std::string& reg, const std::string& rm, int32_
displacement_bytes.push_back(uint8_t(disp));
} else {
result |= 0b10000000; // 32 bit displacement
- displacement_bytes += to_little_endian(disp);
+ displacement_bytes += endian::to_little(uint32_t(disp));
}
result |= rm_bits;
} else { // normal register access
@@ -104,6 +105,12 @@ uint8_t RegNo(const std::string& reg)
return index->second;
}
+// Switch from operand size 64bit to 32bit
+std::vector<uint8_t> OpSizePrefix()
+{
+ return {0x66};
+}
+
#if 0
prefixes{
"lock", 0xf0,
diff --git a/asm/intel64/codes.h b/asm/intel64/codes.h
index ba378a6..9d737e0 100644
--- a/asm/intel64/codes.h
+++ b/asm/intel64/codes.h
@@ -15,3 +15,5 @@ std::vector<uint8_t> ModRM(const std::string& reg, const std::string& rm, int32_
// Just the number of reg, e.g. for encoding inside primary opcode
uint8_t RegNo(const std::string& reg);
+
+std::vector<uint8_t> OpSizePrefix();
diff --git a/asm/intel64/encode.cpp b/asm/intel64/encode.cpp
index 62101f3..a51854a 100644
--- a/asm/intel64/encode.cpp
+++ b/asm/intel64/encode.cpp
@@ -3,10 +3,9 @@
#include "asm/assembler.h"
#include "asm/parse.h"
+#include "byteorder.h"
#include "minicc.h"
-#include <boost/endian/conversion.hpp>
-
#include <exception>
namespace {
@@ -26,7 +25,7 @@ std::shared_ptr<Op> makeLoadValue(const FlowGraph::Data& data, const FlowGraph::
if (value.value().size() < sizeof(uint32_t))
throw std::runtime_error("ICE: Int data from operand needs at least 4 bytes, got "s + std::to_string(value.value().size()));
- uint32_t immediate = from_little_endian(value.value());
+ uint32_t immediate = endian::from_little32(value.value());
return makeOp("mov", Asm::Args{{Asm::Args::Register32("eax"), Asm::Args::Immediate32(immediate)}});
} else if (typeid(data_storage) == typeid(FlowGraph::TemporaryStorage)) {
@@ -72,7 +71,7 @@ std::shared_ptr<Op> makeAddValue(const FlowGraph::Data& data, const FlowGraph::G
if (value.value().size() < sizeof(uint32_t))
throw std::runtime_error("ICE: Int data from operand needs at least 4 bytes, got "s + std::to_string(value.value().size()));
- uint32_t immediate = from_little_endian(value.value());
+ uint32_t immediate = endian::from_little32(value.value());
return makeOp("add", Asm::Args{{Asm::Args::Register32("eax"), Asm::Args::Immediate32(immediate)}});
} else if (typeid(data_storage) == typeid(FlowGraph::TemporaryStorage)) {
@@ -99,7 +98,7 @@ std::vector<std::shared_ptr<Chunk>> makeMulValue(const FlowGraph::Data& data, co
if (value.value().size() < sizeof(uint32_t))
throw std::runtime_error("ICE: Int data from operand needs at least 4 bytes, got "s + std::to_string(value.value().size()));
- uint32_t immediate = from_little_endian(value.value());
+ uint32_t immediate = endian::from_little32(value.value());
return {{
makeOp("mov", Asm::Args{{Asm::Args::Register32("ebx"), Asm::Args::Immediate32(immediate)}}),
@@ -129,7 +128,7 @@ std::vector<std::shared_ptr<Chunk>> makeDivValue(const FlowGraph::Data& data, co
if (value.value().size() < sizeof(uint32_t))
throw std::runtime_error("ICE: Int data from operand needs at least 4 bytes, got "s + std::to_string(value.value().size()));
- uint32_t immediate = from_little_endian(value.value());
+ uint32_t immediate = endian::from_little32(value.value());
return {{
makeOp("mov", Asm::Args{{Asm::Args::Register32("ebx"), Asm::Args::Immediate32(immediate)}}),
@@ -211,12 +210,12 @@ void Asm::toMachineCode(const FlowGraph::Graph& graph, Segment& segment)
} else if (typeid(node_deref) == typeid(FlowGraph::CreateScopeOp)) {
//FlowGraph::CreateScopeOp& op {dynamic_cast<FlowGraph::CreateScopeOp&>(*node)}; // TODO: Create stack frame
- //segment.push_back(makeOp("push", Asm::Args{{Asm::Args::Register64("rbp")}}));
- //segment.push_back(makeOp("mov", Asm::Args{{Asm::Args::Register64("rbp"), Asm::Args::Register64("rsp")}}));
+ segment.push_back(makeOp("push", Asm::Args{{Asm::Args::Register64("rbp")}}));
+ segment.push_back(makeOp("mov", Asm::Args{{Asm::Args::Register64("rbp"), Asm::Args::Register64("rsp")}}));
} else if (typeid(node_deref) == typeid(FlowGraph::DestroyScopeOp)) {
//FlowGraph::DestroyScopeOp& op {dynamic_cast<FlowGraph::DestroyScopeOp&>(*node)}; // TODO: Destroy stack frame
- //segment.push_back(makeOp("pop", Asm::Args{{Asm::Args::Register64("rbp")}}));
+ segment.push_back(makeOp("pop", Asm::Args{{Asm::Args::Register64("rbp")}}));
segment.push_back(makeLoadValue(graph.lastOp()->destination(), graph)); // TODO: Just get last operation result to eax for now
segment.push_back(makeOp("xor", Asm::Args{{Asm::Args::Register64("rdi"), Asm::Args::Register64("rdi")}}));
diff --git a/asm/intel64/mov.cpp b/asm/intel64/mov.cpp
index 805675a..9382a76 100644
--- a/asm/intel64/mov.cpp
+++ b/asm/intel64/mov.cpp
@@ -16,6 +16,11 @@ Op_mov::Op_mov(const Asm::Args& args)
machine_code = std::vector<uint8_t>{ 0x88 } +
ModRM(std::any_cast<Asm::Args::Register8>(args[1]).name(), std::any_cast<Asm::Args::Register8>(args[0]).name());
+ } else if (args[0].type() == typeid(Asm::Args::Register16) && args[1].type() == typeid(Asm::Args::Register16)) { // mov reg16, reg16
+ // r/m16, r16: ModRM:r/m (w), ModRM:reg (r)
+ machine_code = OpSizePrefix() + std::vector<uint8_t>{ 0x89 } +
+ ModRM(std::any_cast<Asm::Args::Register16>(args[1]).name(), std::any_cast<Asm::Args::Register16>(args[0]).name());
+
} else if (args[0].type() == typeid(Asm::Args::Register32) && args[1].type() == typeid(Asm::Args::Register32)) { // mov reg32, reg32
// r/m32, r32: ModRM:r/m (w), ModRM:reg (r)
machine_code = std::vector<uint8_t>{ 0x89 } +
@@ -79,6 +84,9 @@ bool registered {
registerOp(mangleName<Asm::Args::Register8, Asm::Args::Register8>("mov"), [](const Asm::Args& args) -> std::shared_ptr<Op>{
return std::make_shared<Op_mov>(args);
}) &&
+ registerOp(mangleName<Asm::Args::Register16, Asm::Args::Register16>("mov"), [](const Asm::Args& args) -> std::shared_ptr<Op>{
+ return std::make_shared<Op_mov>(args);
+ }) &&
registerOp(mangleName<Asm::Args::Register32, Asm::Args::Register32>("mov"), [](const Asm::Args& args) -> std::shared_ptr<Op>{
return std::make_shared<Op_mov>(args);
}) &&
diff --git a/asm/parse.cpp b/asm/parse.cpp
index 28e79f3..a3156c2 100644
--- a/asm/parse.cpp
+++ b/asm/parse.cpp
@@ -106,6 +106,25 @@ namespace {
return false;
}
+ bool parseRegister16(const std::string& asm_code, size_t& pos, std::any& result, size_t& size_hint) {
+ parseWhitespace(asm_code, pos);
+
+ std::regex re_name(reg_re, std::regex_constants::ECMAScript);
+
+ std::smatch match;
+ if (std::regex_search(asm_code.cbegin() + pos, asm_code.cend(), match, re_name, std::regex_constants::match_continuous)) {
+ std::string name {boost::algorithm::to_lower_copy(match[0].str())};
+ if (reg16.contains(name)) {
+ pos += name.size();
+ result = Asm::Args::Register16(name);
+ size_hint = 16;
+ return true;
+ }
+ }
+
+ return false;
+ }
+
bool parseRegister32(const std::string& asm_code, size_t& pos, std::any& result, size_t& size_hint) {
parseWhitespace(asm_code, pos);
@@ -163,6 +182,25 @@ namespace {
return false;
}
+ bool parseMem16Ptr64(const std::string& asm_code, size_t& pos, std::any& result, size_t& size_hint) {
+ parseWhitespace(asm_code, pos);
+
+ std::regex re_name("(word ptr *)?\\[("s + reg_re + ")\\]"s, std::regex_constants::ECMAScript);
+
+ std::smatch match;
+ if (std::regex_search(asm_code.cbegin() + pos, asm_code.cend(), match, re_name, std::regex_constants::match_continuous)) {
+ std::string name {boost::algorithm::to_lower_copy(match[2].str())};
+ if (reg16.contains(name)) {
+ pos += match[0].length();
+ result = Asm::Args::Mem16Ptr64(name);
+ size_hint = 16;
+ return true;
+ }
+ }
+
+ return false;
+ }
+
bool parseMem32Ptr64(const std::string& asm_code, size_t& pos, std::any& result, size_t& size_hint) {
parseWhitespace(asm_code, pos);
@@ -283,6 +321,8 @@ namespace {
if (parseRegister8(asm_code, pos, result, size_hint))
return true;
+ if (parseRegister16(asm_code, pos, result, size_hint))
+ return true;
if (parseRegister32(asm_code, pos, result, size_hint))
return true;
if (parseRegister64(asm_code, pos, result, size_hint))
@@ -290,6 +330,8 @@ namespace {
if (parseMem8Ptr64(asm_code, pos, result, size_hint))
return true;
+ if (parseMem16Ptr64(asm_code, pos, result, size_hint))
+ return true;
if (parseMem32Ptr64(asm_code, pos, result, size_hint))
return true;
if (parseMem64Ptr64(asm_code, pos, result, size_hint))