summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorRoland Reichwein <mail@reichwein.it>2020-10-18 16:59:54 +0200
committerRoland Reichwein <mail@reichwein.it>2020-10-18 16:59:54 +0200
commit8a2d1dc5c8b6639985d26d1c915048d87d52426b (patch)
tree2f3957a1c24ef35b4ec9259a6a0d97393b248a57
parent8f28495ab9a8ebf53868405541e907394895e51f (diff)
Added xor, mov, jmp
-rw-r--r--Makefile3
-rw-r--r--asm/assembler.cpp2
-rw-r--r--asm/assembler.h124
-rw-r--r--asm/chunk.h17
-rw-r--r--asm/intel64/add.cpp25
-rw-r--r--asm/intel64/codes.cpp63
-rw-r--r--asm/intel64/codes.h6
-rw-r--r--asm/intel64/int.cpp4
-rw-r--r--asm/intel64/jmp.cpp103
-rw-r--r--asm/intel64/jmp.h31
-rw-r--r--asm/intel64/mov.cpp31
-rw-r--r--asm/intel64/mov.h31
-rw-r--r--asm/intel64/xor.cpp31
-rw-r--r--asm/intel64/xor.h31
-rw-r--r--asm/segment.cpp9
-rw-r--r--asm/segment.h3
-rw-r--r--intel.cpp437
-rw-r--r--test-asm.cpp10
18 files changed, 457 insertions, 504 deletions
diff --git a/Makefile b/Makefile
index e406833..38e5759 100644
--- a/Makefile
+++ b/Makefile
@@ -49,8 +49,11 @@ PROGSRC=\
asm/chunk.cpp \
asm/intel64/add.cpp \
asm/intel64/int.cpp \
+ asm/intel64/jmp.cpp \
+ asm/intel64/mov.cpp \
asm/intel64/nop.cpp \
asm/intel64/ret.cpp \
+ asm/intel64/xor.cpp \
asm/intel64/codes.cpp \
asm/operators.cpp \
asm/segment.cpp \
diff --git a/asm/assembler.cpp b/asm/assembler.cpp
index d6ab230..5c879b1 100644
--- a/asm/assembler.cpp
+++ b/asm/assembler.cpp
@@ -11,7 +11,7 @@ std::unordered_map<std::string, FactoryFunction> ops;
bool registerOp(const std::string& mnemonic, FactoryFunction f)
{
if (ops.contains(mnemonic)) {
- std::cout << "Warning: mnemonic |" << mnemonic << "| already registered." << std::endl;
+ std::cerr << "Warning: mnemonic |" << mnemonic << "| already registered." << std::endl;
return false;
}
diff --git a/asm/assembler.h b/asm/assembler.h
index 3d3e9a9..52c3da5 100644
--- a/asm/assembler.h
+++ b/asm/assembler.h
@@ -10,8 +10,84 @@
#include <memory>
#include <string>
#include <unordered_map>
+#include <vector>
+
+// TODO: namespace Asm, e.g. AsmArgs -> Asm::Args
+
+class AsmArgs: public std::vector<std::any>
+{
+public:
+ AsmArgs(){}
+ AsmArgs(const std::vector<std::any>& args): std::vector<std::any>(args){}
+
+ class Immediate8
+ {
+ public:
+ Immediate8(uint8_t value): m_value(value) {}
+ uint8_t value() {return m_value;}
+ std::vector<uint8_t> getCode() {return {m_value};};
+
+ private:
+ uint8_t m_value;
+ };
+
+ class Immediate32
+ {
+ public:
+ Immediate32(uint32_t value): m_value(value) {}
+ uint32_t value() { return m_value; }
+ std::vector<uint8_t> getCode() {
+ std::vector<uint8_t> result(size_t(4));
+ *(reinterpret_cast<uint32_t*>(result.data())) = boost::endian::native_to_little(m_value);
+ return result;
+ };
+
+ private:
+ uint32_t m_value;
+ };
+
+ class Register8
+ {
+ public:
+ Register8(const std::string& name): m_name(name) {}
+ std::string name() { return m_name; }
+
+ private:
+ std::string m_name;
+ };
+
+ class Register32
+ {
+ public:
+ Register32(const std::string& name): m_name(name) {}
+ std::string name() { return m_name; }
+
+ private:
+ std::string m_name;
+ };
+
+ class Register64
+ {
+ public:
+ Register64(const std::string& name): m_name(name) {}
+ std::string name() { return m_name; }
+
+ private:
+ std::string m_name;
+ };
+
+ class Label
+ {
+ public:
+ Label(const std::string& name): m_name(name) {}
+ std::string name() { return m_name; }
+
+ private:
+ std::string m_name;
+ };
+
+};
-using AsmArgs = std::vector<std::any>;
using FactoryFunction = std::function<std::shared_ptr<Op>(AsmArgs&)>;
// mnemonic: mnemonic including argument types
@@ -44,49 +120,3 @@ std::string mangleName(const std::string& s)
std::string mangleName(const std::string& s, AsmArgs& args);
-class Immediate8
-{
-public:
- Immediate8(uint8_t value): m_value(value) {}
- uint8_t value() {return m_value;}
- std::vector<uint8_t> getCode() {return {m_value};};
-
-private:
- uint8_t m_value;
-};
-
-class Immediate32
-{
-public:
- Immediate32(uint32_t value): m_value(value) {}
- uint32_t value() { return m_value; }
- std::vector<uint8_t> getCode() {
- std::vector<uint8_t> result(size_t(4));
- *(reinterpret_cast<uint32_t*>(result.data())) = boost::endian::native_to_little(m_value);
- return result;
- };
-
-private:
- uint32_t m_value;
-};
-
-class Register32
-{
-public:
- Register32(const std::string& name): m_name(name) {}
- std::string name() { return m_name; }
-
-private:
- std::string m_name;
-};
-
-class Register64
-{
-public:
- Register64(const std::string& name): m_name(name) {}
- std::string name() { return m_name; }
-
-private:
- std::string m_name;
-};
-
diff --git a/asm/chunk.h b/asm/chunk.h
index cc45ea9..cf6efaa 100644
--- a/asm/chunk.h
+++ b/asm/chunk.h
@@ -6,6 +6,9 @@
#include <string>
#include <vector>
+// TODO: use it everywhere!
+using OP_T = std::vector<uint8_t>;
+
class Chunk
{
public:
@@ -14,6 +17,20 @@ public:
virtual size_t size() = 0; ///< returns size in bytes
};
+// can be added via multiple inheritance to chunks with addresses
+struct AddressFeature
+{
+ std::string label;
+
+ std::vector<uint8_t> machine_code;
+ size_t addr_size;
+ size_t addr_offs; ///< offset inside code
+
+ std::vector<uint8_t> alternative_code;
+ size_t alternative_size;
+ size_t alternative_offs; ///< offset inside code
+};
+
class Label: public Chunk
{
public:
diff --git a/asm/intel64/add.cpp b/asm/intel64/add.cpp
index dc5c704..2de2219 100644
--- a/asm/intel64/add.cpp
+++ b/asm/intel64/add.cpp
@@ -9,10 +9,16 @@ using namespace std::string_literals;
Op_add::Op_add(AsmArgs& args)
{
- if (args[0].type() == typeid(Register32) && std::any_cast<Register32>(args[0]).name() == "eax" && args[1].type() == typeid(Immediate32)) { // add eax, imm32
- machine_code = std::vector<uint8_t>{ 0x05 } + std::any_cast<Immediate32>(args[1]).getCode();
- } else if (args[0].type() == typeid(Register64) && std::any_cast<Register64>(args[0]).name() == "rax" && args[1].type() == typeid(Immediate32)) { // add rax, imm32
- machine_code = REX("W") + std::vector<uint8_t>{ 0x05 } + std::any_cast<Immediate32>(args[1]).getCode();
+ if (args[0].type() == typeid(AsmArgs::Register32) &&
+ std::any_cast<AsmArgs::Register32>(args[0]).name() == "eax" &&
+ args[1].type() == typeid(AsmArgs::Immediate32))
+ { // add eax, imm32
+ machine_code = std::vector<uint8_t>{ 0x05 } + std::any_cast<AsmArgs::Immediate32>(args[1]).getCode();
+ } else if (args[0].type() == typeid(AsmArgs::Register64) &&
+ std::any_cast<AsmArgs::Register64>(args[0]).name() == "rax" &&
+ args[1].type() == typeid(AsmArgs::Immediate32))
+ { // add rax, imm32
+ machine_code = REX("W") + std::vector<uint8_t>{ 0x05 } + std::any_cast<AsmArgs::Immediate32>(args[1]).getCode();
} else {
throw std::runtime_error("Unimplemented: add "s + args[0].type().name() + " "s + args[1].type().name());
}
@@ -20,12 +26,13 @@ Op_add::Op_add(AsmArgs& args)
namespace {
-bool registered0 { registerOp(mangleName<Register32, Immediate32>("add"), [](AsmArgs& args) -> std::shared_ptr<Op>{
+bool registered {
+ registerOp(mangleName<AsmArgs::Register32, AsmArgs::Immediate32>("add"), [](AsmArgs& args) -> std::shared_ptr<Op>{
return std::make_shared<Op_add>(args);
- }) };
-// TODO
-bool registered1 { registerOp(mangleName<Register64, Immediate32>("add"), [](AsmArgs& args) -> std::shared_ptr<Op>{
+ }) &&
+ registerOp(mangleName<AsmArgs::Register64, AsmArgs::Immediate32>("add"), [](AsmArgs& args) -> std::shared_ptr<Op>{
return std::make_shared<Op_add>(args);
- }) };
+ })
+};
}
diff --git a/asm/intel64/codes.cpp b/asm/intel64/codes.cpp
index a1d9e87..66a08dd 100644
--- a/asm/intel64/codes.cpp
+++ b/asm/intel64/codes.cpp
@@ -1,7 +1,12 @@
#include "codes.h"
+#include <exception>
+#include <unordered_map>
+
+using namespace std::string_literals;
+
// REX prefix: 0b0100WRXB
-std::vector<uint8_t> REX(std::string s) {
+std::vector<uint8_t> REX(const std::string& s) {
uint8_t result{0b01000000};
if (s == "W")
result |= 0b00001000;
@@ -15,3 +20,59 @@ std::vector<uint8_t> REX(std::string s) {
return { result };
}
+namespace {
+
+ std::unordered_map<std::string, size_t> IndexOfRegister{
+ {"al", 0}, {"ah", 4},
+ {"bl", 3}, {"bh", 7},
+ {"cl", 1}, {"ch", 5},
+ {"dl", 2}, {"dh", 6},
+
+ {"ax", 0}, {"sp", 4},
+ {"bx", 3}, {"bp", 7},
+ {"cx", 1}, {"si", 5},
+ {"dx", 2}, {"di", 6},
+
+ {"eax", 0}, {"esp", 4},
+ {"ebx", 3}, {"ebp", 7},
+ {"ecx", 1}, {"esi", 5},
+ {"edx", 2}, {"edi", 6},
+ };
+
+}
+
+// Manual, page 530
+// Reg + Reg/Memory
+uint8_t ModRM(const std::string& reg, const std::string& rm) {
+ // TODO: extend
+ uint8_t result{0b11000000};
+
+ auto index1{ IndexOfRegister.find(reg) };
+ if (index1 == IndexOfRegister.end())
+ throw std::runtime_error("Unknown register for arg1: "s + reg);
+
+ result |= (index1->second << 3);
+
+ auto index2{ IndexOfRegister.find(rm) };
+ if (index2 == IndexOfRegister.end())
+ throw std::runtime_error("Unknown register for arg2: "s + rm);
+
+ result |= index2->second;
+
+ return result;
+}
+
+#if 0
+ prefixes{
+ "lock", 0xf0,
+
+ // branch hint
+ 0x2e, "branch not taken"
+ 0x3e, "branch taken"
+
+ 0x66, "operand size override" // switch between 16 and 32 bit operands
+ 0x67, "address size override" // switch between 16 and 32 bit addresses
+ };
+ };
+#endif
+
diff --git a/asm/intel64/codes.h b/asm/intel64/codes.h
index 32eff1c..0ff17f1 100644
--- a/asm/intel64/codes.h
+++ b/asm/intel64/codes.h
@@ -5,4 +5,8 @@
#include <vector>
// REX prefix: 0b0100WRXB
-std::vector<uint8_t> REX(std::string s);
+std::vector<uint8_t> REX(const std::string& s);
+
+// Manual, page 530
+// Reg + Reg/Memory
+uint8_t ModRM(const std::string& reg, const std::string& rm);
diff --git a/asm/intel64/int.cpp b/asm/intel64/int.cpp
index 7b682ab..a7df338 100644
--- a/asm/intel64/int.cpp
+++ b/asm/intel64/int.cpp
@@ -6,7 +6,7 @@ Op_int::Op_int(AsmArgs& args)
{
// At this point, the registration already ensured the number and types of args
- Immediate8 i {std::any_cast<Immediate8>(args[0])};
+ AsmArgs::Immediate8 i {std::any_cast<AsmArgs::Immediate8>(args[0])};
if (i.value() == 0) { // INT 0
machine_code = { 0xCE };
@@ -21,7 +21,7 @@ Op_int::Op_int(AsmArgs& args)
namespace {
-bool registered { registerOp(mangleName<Immediate8>("int"), [](AsmArgs& args) -> std::shared_ptr<Op>{
+bool registered { registerOp(mangleName<AsmArgs::Immediate8>("int"), [](AsmArgs& args) -> std::shared_ptr<Op>{
return std::make_shared<Op_int>(args);
}) };
diff --git a/asm/intel64/jmp.cpp b/asm/intel64/jmp.cpp
new file mode 100644
index 0000000..30ae546
--- /dev/null
+++ b/asm/intel64/jmp.cpp
@@ -0,0 +1,103 @@
+#include "jmp.h"
+
+#include "codes.h"
+
+#include <asm/assembler.h>
+#include <asm/operators.h>
+
+#include <asm/intel64/codes.h>
+
+using namespace std::string_literals;
+
+namespace {
+ struct Jump {
+ std::string name;
+ OP_T jmp8; ///< if empty, not available
+ OP_T jmp32; ///< if empty, not available
+ };
+
+ std::vector<Jump> jumpOps {
+ // Call Procedure
+ {"call", OP_T{}, OP_T{ 0xE8 } }, // no addr8 version
+
+ // Unconditional Jump
+ {"jmp", OP_T{ 0xEB }, OP_T{ 0xE9 } },
+
+ // Conditional Jumps
+ {"ja", OP_T{ 0x77 }, OP_T{ 0x0F, 0x87 }},
+ {"jae", OP_T{ 0x73 }, OP_T{ 0x0F, 0x83 }},
+ {"jb", OP_T{ 0x72 }, OP_T{ 0x0F, 0x82 }},
+ {"jbe", OP_T{ 0x76 }, OP_T{ 0x0F, 0x86 }},
+ {"jc", OP_T{ 0x72 }, OP_T{ 0x0F, 0x82 }},
+ {"jecxz", OP_T{ 0xE3 }, OP_T{} }, // no addr32 version
+ {"jrcxz", OP_T{ 0xE3 }, OP_T{} }, // no addr32 version
+ {"je", OP_T{ 0x74 }, OP_T{ 0x0F, 0x84 }},
+ {"jg", OP_T{ 0x7F }, OP_T{ 0x0F, 0x8F }},
+ {"jge", OP_T{ 0x7D }, OP_T{ 0x0F, 0x8D }},
+ {"jl", OP_T{ 0x7C }, OP_T{ 0x0F, 0x8C }},
+ {"jle", OP_T{ 0x7E }, OP_T{ 0x0F, 0x8E }},
+ {"jna", OP_T{ 0x76 }, OP_T{ 0x0F, 0x86 }},
+ {"jnae", OP_T{ 0x72 }, OP_T{ 0x0F, 0x82 }},
+ {"jnb", OP_T{ 0x73 }, OP_T{ 0x0F, 0x83 }},
+ {"jnbe", OP_T{ 0x77 }, OP_T{ 0x0F, 0x87 }},
+ {"jnc", OP_T{ 0x73 }, OP_T{ 0x0F, 0x83 }},
+ {"jne", OP_T{ 0x75 }, OP_T{ 0x0F, 0x85 }},
+ {"jng", OP_T{ 0x7E }, OP_T{ 0x0F, 0x8E }},
+ {"jnge", OP_T{ 0x7C }, OP_T{ 0x0F, 0x8C }},
+ {"jnl", OP_T{ 0x7D }, OP_T{ 0x0F, 0x8D }},
+ {"jnle", OP_T{ 0x7F }, OP_T{ 0x0F, 0x8F }},
+ {"jno", OP_T{ 0x71 }, OP_T{ 0x0F, 0x81 }},
+ {"jnp", OP_T{ 0x7B }, OP_T{ 0x0F, 0x8B }},
+ {"jns", OP_T{ 0x79 }, OP_T{ 0x0F, 0x89 }},
+ {"jnz", OP_T{ 0x75 }, OP_T{ 0x0F, 0x85 }},
+ {"jo", OP_T{ 0x70 }, OP_T{ 0x0F, 0x80 }},
+ {"jp", OP_T{ 0x7A }, OP_T{ 0x0F, 0x8A }},
+ {"jpe", OP_T{ 0x7A }, OP_T{ 0x0F, 0x8A }},
+ {"jpo", OP_T{ 0x7B }, OP_T{ 0x0F, 0x8B }},
+ {"js", OP_T{ 0x78 }, OP_T{ 0x0F, 0x88 }},
+ {"jz", OP_T{ 0x74 }, OP_T{ 0x0F, 0x84 }},
+ };
+
+ bool registerOps() {
+ bool result{true};
+ for (const auto& jumpOp: jumpOps) {
+ result &= registerOp(mangleName<AsmArgs::Label>(jumpOp.name), [&](AsmArgs& args) -> std::shared_ptr<Op>{
+ return std::make_shared<Op_jmp>(jumpOp.name, args, jumpOp.jmp8, jumpOp.jmp32);
+ });
+ }
+ return result;
+ }
+
+ bool registered {
+ registerOps()
+ };
+}
+
+Op_jmp::Op_jmp(const std::string& name, AsmArgs& args, const OP_T& jmp8, const OP_T& jmp32)
+{
+ label = std::any_cast<AsmArgs::Label>(args[0]).name();
+
+ if (!jmp32.empty()) { // set machine_code
+ machine_code = jmp32 + OP_T{size_t(4), uint8_t(0)};
+ addr_size = 4;
+ addr_offs = jmp32.size();
+ if (!jmp8.empty()) { // also provide alternative
+ alternative_code = jmp8 + OP_T{size_t(1), uint8_t(0)};
+ alternative_size = 1;
+ alternative_offs = jmp8.size();
+ }
+ }
+
+ if (machine_code.empty() && !jmp8.empty()) {
+ machine_code = jmp8 + OP_T{size_t(1), uint8_t(0)};
+ addr_size = 1;
+ addr_offs = jmp8.size();
+ }
+
+ if (machine_code.empty()) {
+ throw std::runtime_error("Unimplemented: "s + name);
+ }
+
+ // actual address not set, yet!
+}
+
diff --git a/asm/intel64/jmp.h b/asm/intel64/jmp.h
new file mode 100644
index 0000000..db8a5a8
--- /dev/null
+++ b/asm/intel64/jmp.h
@@ -0,0 +1,31 @@
+// jmp
+// call
+// ja
+// ...
+
+#pragma once
+
+#include <asm/assembler.h>
+
+class Op_jmp: public Op, public AddressFeature
+{
+public:
+ Op_jmp(const std::string& name, AsmArgs& args, const OP_T& jmp8, const OP_T& jmp32);
+
+ std::vector<uint8_t> getCode() override
+ {
+ return machine_code;
+ }
+
+ size_t size() override
+ {
+ return machine_code.size();
+ }
+
+ bool optimize() override ///< returns true if changed
+ {
+ return false;
+ }
+
+};
+
diff --git a/asm/intel64/mov.cpp b/asm/intel64/mov.cpp
new file mode 100644
index 0000000..33589e9
--- /dev/null
+++ b/asm/intel64/mov.cpp
@@ -0,0 +1,31 @@
+#include "mov.h"
+
+#include "codes.h"
+
+#include <asm/assembler.h>
+#include <asm/operators.h>
+
+#include <asm/intel64/codes.h>
+
+using namespace std::string_literals;
+
+Op_mov::Op_mov(AsmArgs& args)
+{
+ if (args[0].type() == typeid(AsmArgs::Register8) && args[1].type() == typeid(AsmArgs::Register8)) { // mov reg8, reg8
+ // r/m8, r8: ModRM:r/m (w), ModRM:reg (r)
+ machine_code = std::vector<uint8_t>{ 0x88 } +
+ ModRM(std::any_cast<AsmArgs::Register8>(args[1]).name(), std::any_cast<AsmArgs::Register8>(args[0]).name());
+ } else {
+ throw std::runtime_error("Unimplemented: mov "s + args[0].type().name() + " "s + args[1].type().name());
+ }
+}
+
+namespace {
+
+bool registered {
+ registerOp(mangleName<AsmArgs::Register8, AsmArgs::Register8>("mov"), [](AsmArgs& args) -> std::shared_ptr<Op>{
+ return std::make_shared<Op_mov>(args);
+ })
+};
+
+}
diff --git a/asm/intel64/mov.h b/asm/intel64/mov.h
new file mode 100644
index 0000000..e1b2304
--- /dev/null
+++ b/asm/intel64/mov.h
@@ -0,0 +1,31 @@
+// Memory Move
+
+#pragma once
+
+#include <asm/assembler.h>
+
+class Op_mov: public Op
+{
+public:
+ Op_mov(AsmArgs& args);
+
+public:
+ std::vector<uint8_t> getCode() override
+ {
+ return machine_code;
+ }
+
+ size_t size() override
+ {
+ return machine_code.size();
+ }
+
+ bool optimize() override ///< returns true if changed
+ {
+ return false;
+ }
+
+protected:
+ std::vector<uint8_t> machine_code;
+};
+
diff --git a/asm/intel64/xor.cpp b/asm/intel64/xor.cpp
new file mode 100644
index 0000000..c0dbb68
--- /dev/null
+++ b/asm/intel64/xor.cpp
@@ -0,0 +1,31 @@
+#include "xor.h"
+
+#include "codes.h"
+
+#include <asm/assembler.h>
+#include <asm/operators.h>
+
+#include <asm/intel64/codes.h>
+
+using namespace std::string_literals;
+
+Op_xor::Op_xor(AsmArgs& args)
+{
+ if (args[0].type() == typeid(AsmArgs::Register8) && args[1].type() == typeid(AsmArgs::Register8)) { // xor reg8, reg8
+ // r8, r/m8: ModRM:reg (w), ModRM:r/m (r)
+ machine_code = std::vector<uint8_t>{ 0x32 } +
+ ModRM(std::any_cast<AsmArgs::Register8>(args[0]).name(), std::any_cast<AsmArgs::Register8>(args[1]).name());
+ } else {
+ throw std::runtime_error("Unimplemented: xor "s + args[0].type().name() + " "s + args[1].type().name());
+ }
+}
+
+namespace {
+
+bool registered {
+ registerOp(mangleName<AsmArgs::Register8, AsmArgs::Register8>("xor"), [](AsmArgs& args) -> std::shared_ptr<Op>{
+ return std::make_shared<Op_xor>(args);
+ })
+};
+
+}
diff --git a/asm/intel64/xor.h b/asm/intel64/xor.h
new file mode 100644
index 0000000..f00a657
--- /dev/null
+++ b/asm/intel64/xor.h
@@ -0,0 +1,31 @@
+// XOR
+
+#pragma once
+
+#include <asm/assembler.h>
+
+class Op_xor: public Op
+{
+public:
+ Op_xor(AsmArgs& args);
+
+public:
+ std::vector<uint8_t> getCode() override
+ {
+ return machine_code;
+ }
+
+ size_t size() override
+ {
+ return machine_code.size();
+ }
+
+ bool optimize() override ///< returns true if changed
+ {
+ return false;
+ }
+
+protected:
+ std::vector<uint8_t> machine_code;
+};
+
diff --git a/asm/segment.cpp b/asm/segment.cpp
index 60b8348..9fb7a52 100644
--- a/asm/segment.cpp
+++ b/asm/segment.cpp
@@ -30,3 +30,12 @@ std::vector<uint8_t> Segment::getCode()
return result;
}
+
+void Segment::insertAddresses()
+{
+}
+
+void Segment::optimize()
+{
+ // TODO
+}
diff --git a/asm/segment.h b/asm/segment.h
index f0a758e..dfacd12 100644
--- a/asm/segment.h
+++ b/asm/segment.h
@@ -12,6 +12,7 @@ class Segment: public std::vector<std::shared_ptr<Chunk>>
public:
size_t getAddressOfLabel(const std::string& label);
std::vector<uint8_t> getCode();
+ void insertAddresses();
+ void optimize();
};
-
diff --git a/intel.cpp b/intel.cpp
deleted file mode 100644
index 65b9f3f..0000000
--- a/intel.cpp
+++ /dev/null
@@ -1,437 +0,0 @@
-// Intel assembly language
-
-
-// segments: code, stack
-
-#include "minicc.h"
-
-#include <algorithm>
-#include <any>
-#include <array>
-#include <deque>
-#include <functional>
-#include <stdexcept>
-#include <functional>
-#include <stdexcept>
-#include <string>
-#include <unordered_map>
-#include <vector>
-
-using namespace std::string_literals;
-using namespace std::placeholders;
-
-namespace {
-
- std::vector<uint8_t> imm8(std::string s) {
- long value{ std::stol(s) };
- uint8_t* bin = reinterpret_cast<uint8_t*>(&value);
- return { uint8_t(*bin & 0xFF) };
- }
-
- std::vector<uint8_t> imm32(std::string s) {
- long value{ std::stol(s) };
- uint32_t* bin = reinterpret_cast<uint32_t*>(&value);
- return {uint8_t(*bin & 0xFF), uint8_t(*bin >> 8 & 0xFF), uint8_t(*bin >> 16 & 0xFF), uint8_t(*bin >> 24 & 0xFF) };
- }
-
- std::unordered_map<std::string, size_t> IndexOfRegister{
- {"al", 0}, {"ah", 4},
- {"bl", 3}, {"bh", 7},
- {"cl", 1}, {"ch", 5},
- {"dl", 2}, {"dh", 6},
-
- {"ax", 0}, {"sp", 4},
- {"bx", 3}, {"bp", 7},
- {"cx", 1}, {"si", 5},
- {"dx", 2}, {"di", 6},
-
- {"eax", 0}, {"esp", 4},
- {"ebx", 3}, {"ebp", 7},
- {"ecx", 1}, {"esi", 5},
- {"edx", 2}, {"edi", 6},
- };
-
- // Manual, page 530
- // Reg + Reg/Memory
- uint8_t ModRM(std::string reg, std::string rm) {
- // TODO: extend
- uint8_t result{0b11000000};
-
- auto index1{ IndexOfRegister.find(reg) };
- if (index1 == IndexOfRegister.end())
- throw std::runtime_error("Unknown register for arg1: "s + reg);
-
- result |= (index1->second << 3);
-
- auto index2{ IndexOfRegister.find(rm) };
- if (index2 == IndexOfRegister.end())
- throw std::runtime_error("Unknown register for arg2: "s + rm);
-
- result |= index2->second;
-
- return result;
- }
-
- enum class AddressType {
- Relative8,
- Relative16,
- Relative32,
- Absolute8,
- Absolute16,
- Absolute32,
- };
-
- struct Address
- {
- AddressType type;
- size_t position; // relative to respective machine code, e.g. byte 1 in jump
- std::string label; // where to jump to, as label
- };
-
- struct InstructionCode
- {
- std::vector<uint8_t> machine_code;
- std::vector<Address> addresses;
- };
-
- // List of alternative codes
- typedef std::deque<InstructionCode> InstructionCodeList;
-
- bool O1{ true }; // Optimization
-
- using OP_T = std::vector<uint8_t>;
-
- InstructionCodeList op_jmp(const std::vector<Token>& sl, std::vector<uint8_t> op_bytes_8, std::vector<uint8_t> op_bytes_32)
- {
- if (sl.size() == 2) { // JMP rel8 / rel32
- const std::string& label{ sl[1].value };
- InstructionCodeList result;
- if (op_bytes_32.size() > 0) {
- op_bytes_32.resize(op_bytes_32.size() + 4, 0x00);
- result.push_back({ op_bytes_32, { {AddressType::Relative32, op_bytes_32.size() - 4, label} } } );
- }
- if (op_bytes_8.size() > 0 && (O1 || op_bytes_32.size() == 0)) {
- op_bytes_8.push_back(0x00);
- result.push_back({ op_bytes_8, { {AddressType::Relative8, op_bytes_8.size() - 1, label} } });
- }
- return result;
- }
-
- // ... TODO
- throw std::runtime_error("Unknown command: "s + sl[0].value);
- }
-
- std::unordered_map<std::string, std::function<InstructionCodeList(const std::vector<Token>&)>> ops_old{
-
- // Call Procedure
- {"call", std::bind(op_jmp, _1, OP_T{}, OP_T{ 0xE8 })},
-
-
- // Unconditional Jump
- {"jmp", std::bind(op_jmp, _1, OP_T{ 0xEB }, OP_T{ 0xE9 })},
-
- // Conditional Jumps
- {"ja", std::bind(op_jmp, _1, OP_T{ 0x77 }, OP_T{ 0x0F, 0x87 })},
- {"jae", std::bind(op_jmp, _1, OP_T{ 0x73 }, OP_T{ 0x0F, 0x83 })},
- {"jb", std::bind(op_jmp, _1, OP_T{ 0x72 }, OP_T{ 0x0F, 0x82 })},
- {"jbe", std::bind(op_jmp, _1, OP_T{ 0x76 }, OP_T{ 0x0F, 0x86 })},
- {"jc", std::bind(op_jmp, _1, OP_T{ 0x72 }, OP_T{ 0x0F, 0x82 })},
- {"jecxz", std::bind(op_jmp, _1, OP_T{ 0xE3 }, OP_T{})},
- {"jrcxz", std::bind(op_jmp, _1, OP_T{ 0xE3 }, OP_T{})},
- {"je", std::bind(op_jmp, _1, OP_T{ 0x74 }, OP_T{ 0x0F, 0x84 })},
- {"jg", std::bind(op_jmp, _1, OP_T{ 0x7F }, OP_T{ 0x0F, 0x8F })},
- {"jge", std::bind(op_jmp, _1, OP_T{ 0x7D }, OP_T{ 0x0F, 0x8D })},
- {"jl", std::bind(op_jmp, _1, OP_T{ 0x7C }, OP_T{ 0x0F, 0x8C })},
- {"jle", std::bind(op_jmp, _1, OP_T{ 0x7E }, OP_T{ 0x0F, 0x8E })},
- {"jna", std::bind(op_jmp, _1, OP_T{ 0x76 }, OP_T{ 0x0F, 0x86 })},
- {"jnae", std::bind(op_jmp, _1, OP_T{ 0x72 }, OP_T{ 0x0F, 0x82 })},
- {"jnb", std::bind(op_jmp, _1, OP_T{ 0x73 }, OP_T{ 0x0F, 0x83 })},
- {"jnbe", std::bind(op_jmp, _1, OP_T{ 0x77 }, OP_T{ 0x0F, 0x87 })},
- {"jnc", std::bind(op_jmp, _1, OP_T{ 0x73 }, OP_T{ 0x0F, 0x83 })},
- {"jne", std::bind(op_jmp, _1, OP_T{ 0x75 }, OP_T{ 0x0F, 0x85 })},
- {"jng", std::bind(op_jmp, _1, OP_T{ 0x7E }, OP_T{ 0x0F, 0x8E })},
- {"jnge", std::bind(op_jmp, _1, OP_T{ 0x7C }, OP_T{ 0x0F, 0x8C })},
- {"jnl", std::bind(op_jmp, _1, OP_T{ 0x7D }, OP_T{ 0x0F, 0x8D })},
- {"jnle", std::bind(op_jmp, _1, OP_T{ 0x7F }, OP_T{ 0x0F, 0x8F })},
- {"jno", std::bind(op_jmp, _1, OP_T{ 0x71 }, OP_T{ 0x0F, 0x81 })},
- {"jnp", std::bind(op_jmp, _1, OP_T{ 0x7B }, OP_T{ 0x0F, 0x8B })},
- {"jns", std::bind(op_jmp, _1, OP_T{ 0x79 }, OP_T{ 0x0F, 0x89 })},
- {"jnz", std::bind(op_jmp, _1, OP_T{ 0x75 }, OP_T{ 0x0F, 0x85 })},
- {"jo", std::bind(op_jmp, _1, OP_T{ 0x70 }, OP_T{ 0x0F, 0x80 })},
- {"jp", std::bind(op_jmp, _1, OP_T{ 0x7A }, OP_T{ 0x0F, 0x8A })},
- {"jpe", std::bind(op_jmp, _1, OP_T{ 0x7A }, OP_T{ 0x0F, 0x8A })},
- {"jpo", std::bind(op_jmp, _1, OP_T{ 0x7B }, OP_T{ 0x0F, 0x8B })},
- {"js", std::bind(op_jmp, _1, OP_T{ 0x78 }, OP_T{ 0x0F, 0x88 })},
- {"jz", std::bind(op_jmp, _1, OP_T{ 0x74 }, OP_T{ 0x0F, 0x84 })},
-
- // Memory Move
- { "mov", [](const std::vector<Token>& sl) -> InstructionCodeList {
- if (sl.size() == 3) {
- return { { std::vector<uint8_t>{ 0x88 } + ModRM(sl[2].value, sl[1].value), {} } }; // r/m8, r8: ModRM:r/m (w), ModRM:reg (r)
- }
-
- // ... TODO
- throw std::runtime_error("Unknown command: "s + sl[0].value);
- }},
-
- { "xor", [](const std::vector<Token>& sl) -> InstructionCodeList {
- if (sl.size() == 3) {
- return { { std::vector<uint8_t>{ 0x33 } + ModRM(sl[1].value, sl[2].value) } }; // r8, r/m8: ModRM:reg (w), ModRM:r/m (r)
- }
-
- // ... TODO
- throw std::runtime_error("Unknown command: "s + sl[0].value);
- }},
-
- };
-
-#if 0
- prefixes{
- "lock", 0xf0,
-
- // branch hint
- 0x2e, "branch not taken"
- 0x3e, "branch taken"
-
- 0x66, "operand size override" // switch between 16 and 32 bit operands
- 0x67, "address size override" // switch between 16 and 32 bit addresses
- };
- };
-#endif
-
-#ifdef ASM_PARSER
- BNF GetBNF() {
- // TODO:
- return {
- { "assembler-unit", {
- {}
- }},
- { "immediate-32", {
- {}
- }},
- { "mnemonic", {
- {}
- }},
- { "register", {
- {}
- }},
- { "register-8", {
- {}
- }},
- { "register-16", {
- {}
- }},
- { "register-32", {
- {}
- }},
- { "register-64", {
- {}
- }},
-
- };
- };
-#endif
-
- // Checks a 32 bit relative address if it's valid as 8 bit address
- bool IsSmallAddress(const InstructionCode& insn) {
- if (insn.addresses.size() != 1)
- throw std::runtime_error("Bad number of addresses in insn");
-
- size_t i{insn.addresses[0].position};
-
- if (i > insn.machine_code.size() - 3)
- throw std::runtime_error("Bad Address index "s + std::to_string(i) + " in insn with "s + std::to_string(insn.machine_code.size()) + " bytes"s);
-
- if (std::count(insn.machine_code.begin() + i, insn.machine_code.begin() + i + 3, 0x00) == 3 ||
- std::count(insn.machine_code.begin() + i, insn.machine_code.begin() + i + 3, 0xFF) == 3)
- return true;
-
- return false;
- }
-
-
-} // namespace
-
-class Assembler {
-
- std::unordered_map<std::string, size_t> labels; ///< labels with their positions in instruction list
-
- /// 1st Level: Instructions
- /// 2nd Level: Alternatives
- /// 3rd Level: Bytes of single instruction
- std::vector<InstructionCodeList> insn_list;
-
- uint64_t addressFromInstructionIndex(size_t index)
- {
- // TODO: cache this to prevent repetitive summing
-
- if (index > insn_list.size())
- throw std::runtime_error("Index "s + std::to_string(index) + " out of range ("s + std::to_string(insn_list.size()) + ")"s);
-
- uint64_t sum{};
-
- for (size_t i = 0; i < index; i++) {
- if (insn_list[i].size() < 1) {
- throw std::runtime_error("Insufficient alternatives at index "s + std::to_string(i));
- }
-
- sum += static_cast<uint64_t>(insn_list[i][0].machine_code.size());
- }
-
- return sum;
- }
-
- uint64_t addressFromLabel(std::string label)
- {
- auto it{ labels.find(label) };
- if (it == labels.end())
- throw std::runtime_error("Label not found: "s + label);
-
- return addressFromInstructionIndex(it->second);
- }
-
- std::unordered_map<AddressType, std::function<void(std::vector<uint8_t>&, const Address&, uint64_t)>> addressInserters{
- {AddressType::Relative8, [&](std::vector<uint8_t>& machine_code, const Address& target_address, uint64_t insn_address)
- {
- int64_t difference = static_cast<int64_t>(addressFromLabel(target_address.label)) - insn_address;
- if (difference < -128 || difference > 127)
- throw std::runtime_error("Distance too big");
-
- int8_t diff8 = static_cast<int8_t>(difference);
- uint8_t diff_u8 = *reinterpret_cast<uint8_t*>(&diff8);
-
- machine_code[target_address.position] = diff_u8;
- }
- },
- {AddressType::Relative16, [&](std::vector<uint8_t>& machine_code, const Address& target_address, uint64_t insn_address) { throw std::runtime_error("Relative16 Address not yet supported."); }},
- {AddressType::Relative32, [&](std::vector<uint8_t>& machine_code, const Address& target_address, uint64_t insn_address)
- {
- int64_t difference = static_cast<int64_t>(addressFromLabel(target_address.label)) - insn_address;
- if (difference < -4294967296 || difference > 4294967295)
- throw std::runtime_error("Distance too big");
-
- int32_t diff32 = static_cast<int32_t>(difference);
- uint32_t diff_u32 = *reinterpret_cast<uint32_t*>(&diff32);
-
- machine_code[target_address.position] = diff_u32 & 0xFF; // little endian
- machine_code[target_address.position + 1] = diff_u32 >> 8 & 0xFF;
- machine_code[target_address.position + 2] = diff_u32 >> 16 & 0xFF;
- machine_code[target_address.position + 3] = diff_u32 >> 24 & 0xFF;
- }
- },
- {AddressType::Absolute8, [&](std::vector<uint8_t>& machine_code, const Address& target_address, uint64_t insn_address) {throw std::runtime_error("Absolute8 Address not yet supported."); }},
- {AddressType::Absolute16, [&](std::vector<uint8_t>& machine_code, const Address& target_address, uint64_t insn_address) {throw std::runtime_error("Absolute16 Address not yet supported."); }},
- {AddressType::Absolute32, [&](std::vector<uint8_t>& machine_code, const Address& target_address, uint64_t insn_address) {throw std::runtime_error("Absolute32 Address not yet supported."); }},
- };
-
- void produce_machine_code(std::vector<std::vector<Token>>& tl)
- {
- for (const auto& t : tl) {
- // label:
- // label: mnemonic arg1, arg2, arg3
- // mnemonic arg1, arg2, arg3
-
- if (t.size() == 2 && t[0].type == "label" && t[1].type == ":") { // label
- if (labels.find(t[0].value) != labels.end())
- throw std::runtime_error("Label already defined: "s + t[0].value);
-
- labels[t[0].value] = insn_list.size();
- } else if (t.size() >= 1 && t[0].type == "instruction") { // instruction
- std::string instruction{ t[0].value };
- auto it = ops_old.find(instruction);
- if (it == ops_old.end())
- throw std::runtime_error("Unknown instruction: "s + instruction);
-
- InstructionCodeList codes = it->second(t);
-
- if (codes.size() == 0)
- throw std::runtime_error("No instruction generated");
-
- insn_list.push_back(codes);
-
- } else
- throw std::runtime_error("Syntax error"s);
- }
- }
-
- void insert_addresses()
- {
- for (size_t i = 0; i < insn_list.size(); i++) {
- InstructionCodeList& list{ insn_list[i] };
- if (list.size() == 0)
- throw std::runtime_error("No instruction at index "s + std::to_string(i));
-
- InstructionCode& code{ list[0] };
-
- for (const auto& address : code.addresses) {
- addressInserters[address.type](code.machine_code, address, addressFromInstructionIndex(i));
- }
- }
- }
-
- void optimize()
- {
- // reduce Jump sizes via alternatives if possible
- bool changed{};
- do {
- changed = false;
-
- for (size_t i = 0; i < insn_list.size(); i++) {
- InstructionCodeList& list{ insn_list[i] }; // Alternatives
-
- // apply specific heuristics to optimization case
- if (list.size() == 2) {
- if (list[0].addresses.size() == 1 && list[1].addresses.size() == 1) {
- if (list[0].addresses[0].type == AddressType::Relative32 && list[1].addresses[0].type == AddressType::Relative8) {
- if (IsSmallAddress(list[0])) {
- list.pop_front();
- break; // start over from start of program
- }
- }
- }
- }
- }
-
- if (changed)
- insert_addresses(); // update
-
- } while (changed);
- }
-
- std::vector<uint8_t> collect_code()
- {
- std::vector<uint8_t> result;
-
- // collect generated machine instructions for result
- // Alternatives already resolved, if configured. Consider only 1st entry (no matter if optimized or not).
- for (size_t i = 0; i < insn_list.size(); i++) {
- InstructionCodeList& list{ insn_list[i] };
- if (list.size() == 0)
- throw std::runtime_error("No instruction at index "s + std::to_string(i));
-
- InstructionCode& code{ list[0] };
-
- result.insert(result.end(), code.machine_code.begin(), code.machine_code.end());
- }
-
- return result;
- }
-
-public:
- Assembler() {}
-
- std::vector<uint8_t> assemble(std::vector<std::vector<Token>> tl)
- {
- labels.clear();
- insn_list.clear();
-
- produce_machine_code(tl); // 1st pass
- insert_addresses(); // 2nd pass
- if (O1) {
- optimize(); // 3rd pass
- }
-
- return collect_code(); // 4th pass
- }
-
-}; // class Assembler
diff --git a/test-asm.cpp b/test-asm.cpp
index d839683..256902c 100644
--- a/test-asm.cpp
+++ b/test-asm.cpp
@@ -39,7 +39,7 @@ protected:
TEST_F(AsmTest, Intel64_add) {
Segment segment;
- AsmArgs args{{Register32("eax"), Immediate32(1)}};
+ AsmArgs args{{AsmArgs::Register32("eax"), AsmArgs::Immediate32(1)}};
segment.push_back(makeOp("add", args));
ASSERT_EQ(segment.size(), 1);
@@ -48,7 +48,7 @@ TEST_F(AsmTest, Intel64_add) {
TEST_F(AsmTest, Intel64_int_0) {
Segment segment;
- AsmArgs args{{Immediate8(0)}};
+ AsmArgs args{{AsmArgs::Immediate8(0)}};
segment.push_back(makeOp("int", args));
ASSERT_EQ(segment.size(), 1);
@@ -57,7 +57,7 @@ TEST_F(AsmTest, Intel64_int_0) {
TEST_F(AsmTest, Intel64_int_1) {
Segment segment;
- AsmArgs args{{Immediate8(1)}};
+ AsmArgs args{{AsmArgs::Immediate8(1)}};
segment.push_back(makeOp("int", args));
ASSERT_EQ(segment.size(), 1);
@@ -66,7 +66,7 @@ TEST_F(AsmTest, Intel64_int_1) {
TEST_F(AsmTest, Intel64_int_5) {
Segment segment;
- AsmArgs args{{Immediate8(5)}};
+ AsmArgs args{{AsmArgs::Immediate8(5)}};
segment.push_back(makeOp("int", args));
ASSERT_EQ(segment.size(), 1);
@@ -93,7 +93,7 @@ TEST_F(AsmTest, Intel64_multiple) {
Segment segment;
segment.push_back(makeOp("nop"));
- AsmArgs args{{Immediate8(5)}};
+ AsmArgs args{{AsmArgs::Immediate8(5)}};
segment.push_back(makeOp("int", args));
segment.push_back(makeOp("ret"));
segment.push_back(makeLabel("data1"));