summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorRoland Reichwein <mail@reichwein.it>2020-11-09 09:50:58 +0100
committerRoland Reichwein <mail@reichwein.it>2020-11-09 09:50:58 +0100
commit1ac8ab06e9aad3b6d22685255459d71cb49e1f28 (patch)
tree95e4ca7de492180aef9d459ee40663b1bf134b66
parentdb0654fa48ddc07e6bcaaaeddfa301a32806dadc (diff)
First program: Can add 2 integers and return result via exit code
-rw-r--r--asm/encode.cpp62
-rw-r--r--asm/intel64/add.cpp6
-rw-r--r--asm/intel64/codes.cpp46
-rw-r--r--asm/intel64/codes.h3
-rw-r--r--asm/intel64/mov.cpp5
-rw-r--r--cpp.cpp7
-rw-r--r--flowgraph/data.h5
-rw-r--r--flowgraph/node.cpp2
-rw-r--r--flowgraph/node.h11
9 files changed, 123 insertions, 24 deletions
diff --git a/asm/encode.cpp b/asm/encode.cpp
index ea50cb7..8bf33c0 100644
--- a/asm/encode.cpp
+++ b/asm/encode.cpp
@@ -1,6 +1,68 @@
+// Intel specific conversion: Abstract Graph -> Machine specific segment
#include "encode.h"
+#include "asm/assembler.h"
+#include "minicc.h"
+
+#include <boost/endian/conversion.hpp>
+
+#include <exception>
+
void Asm::toMachineCode(const FlowGraph::Graph& graph, Segment& segment)
{
+ segment.clear();
+
+ for (const std::shared_ptr<FlowGraph::Node>& node: graph) {
+ try {
+ FlowGraph::BinaryOperation& op {dynamic_cast<FlowGraph::BinaryOperation&>(*node)};
+
+ auto operands {op.operands()};
+ // TODO: ignore destination (0) for now
+
+ if (operands[1].type() != FlowGraph::DataType::Int) {
+ std::runtime_error("Bad type for operand 1: "s + std::to_string(int(operands[1].type())));
+ }
+
+ if (operands[2].type() != FlowGraph::DataType::Int) {
+ std::runtime_error("Bad type for operand 2: "s + std::to_string(int(operands[2].type())));
+ }
+
+ if (!operands[1].storage())
+ throw std::runtime_error("ICE: Operand 1 storage is 0");
+ if (!operands[2].storage())
+ throw std::runtime_error("ICE: Operand 2 storage is 0");
+
+ uint32_t immediate1{};
+ try {
+ FlowGraph::Constant& value1 {dynamic_cast<FlowGraph::Constant&>(*operands[1].storage())};
+ if (value1.value().size() < sizeof(uint32_t))
+ throw std::runtime_error("ICE: Int data from operand 1 needs at least 4 bytes, got "s + std::to_string(value1.value().size()));
+
+ immediate1 = boost::endian::little_to_native(*(reinterpret_cast<const uint32_t*>(value1.value().data())));
+ } catch (const std::bad_cast& ex) {
+ std::runtime_error("Bad value for operand 1: Constant expected");
+ }
+
+ uint32_t immediate2{};
+ try {
+ FlowGraph::Constant& value2 {dynamic_cast<FlowGraph::Constant&>(*operands[2].storage())};
+ if (value2.value().size() < sizeof(uint32_t))
+ throw std::runtime_error("ICE: Int data from operand 2 needs at least 4 bytes, got "s + std::to_string(value2.value().size()));
+
+ immediate2 = boost::endian::little_to_native(*(reinterpret_cast<const uint32_t*>(value2.value().data())));
+ } catch (const std::bad_cast& ex) {
+ std::runtime_error("Bad value for operand 2: Constant expected");
+ }
+
+ Asm::Args args1{{Asm::Args::Register32("edi"), Asm::Args::Immediate32(immediate1)}};
+ segment.push_back(makeOp("mov", args1));
+
+ Asm::Args args2{{Asm::Args::Register32("edi"), Asm::Args::Immediate32(immediate2)}};
+ segment.push_back(makeOp("add", args2));
+
+ } catch (const std::bad_cast& ex) {
+ std::runtime_error("ICE: Encoding: Unsupported node: "s + ex.what());
+ }
+ }
}
diff --git a/asm/intel64/add.cpp b/asm/intel64/add.cpp
index 106ffec..4438895 100644
--- a/asm/intel64/add.cpp
+++ b/asm/intel64/add.cpp
@@ -12,8 +12,12 @@ Op_add::Op_add(Asm::Args& args)
if (args[0].type() == typeid(Asm::Args::Register32) &&
std::any_cast<Asm::Args::Register32>(args[0]).name() == "eax" &&
args[1].type() == typeid(Asm::Args::Immediate32))
- { // add eax, imm32
+ { // add eax, imm32 (before "add reg32, imm32"! It's shorter.)
machine_code = std::vector<uint8_t>{ 0x05 } + std::any_cast<Asm::Args::Immediate32>(args[1]).getCode();
+ } else if (args[0].type() == typeid(Asm::Args::Register32) &&
+ args[1].type() == typeid(Asm::Args::Immediate32))
+ { // add reg32, imm32
+ machine_code = std::vector<uint8_t>{ 0x81 } + ModRM("/0", std::any_cast<Asm::Args::Register32>(args[0]).name()) + std::any_cast<Asm::Args::Immediate32>(args[1]).getCode();
} else if (args[0].type() == typeid(Asm::Args::Register64) &&
std::any_cast<Asm::Args::Register64>(args[0]).name() == "rax" &&
args[1].type() == typeid(Asm::Args::Immediate32))
diff --git a/asm/intel64/codes.cpp b/asm/intel64/codes.cpp
index 66a08dd..5d93a57 100644
--- a/asm/intel64/codes.cpp
+++ b/asm/intel64/codes.cpp
@@ -29,14 +29,14 @@ namespace {
{"dl", 2}, {"dh", 6},
{"ax", 0}, {"sp", 4},
- {"bx", 3}, {"bp", 7},
- {"cx", 1}, {"si", 5},
- {"dx", 2}, {"di", 6},
+ {"bx", 3}, {"bp", 5},
+ {"cx", 1}, {"si", 6},
+ {"dx", 2}, {"di", 7},
{"eax", 0}, {"esp", 4},
- {"ebx", 3}, {"ebp", 7},
- {"ecx", 1}, {"esi", 5},
- {"edx", 2}, {"edi", 6},
+ {"ebx", 3}, {"ebp", 5},
+ {"ecx", 1}, {"esi", 6},
+ {"edx", 2}, {"edi", 7},
};
}
@@ -44,15 +44,26 @@ namespace {
// Manual, page 530
// Reg + Reg/Memory
uint8_t ModRM(const std::string& reg, const std::string& rm) {
- // TODO: extend
uint8_t result{0b11000000};
- auto index1{ IndexOfRegister.find(reg) };
- if (index1 == IndexOfRegister.end())
- throw std::runtime_error("Unknown register for arg1: "s + reg);
-
- result |= (index1->second << 3);
-
+ size_t val_reg{};
+ // reg
+ if (reg.size() > 0 && reg[0] == '/') { // "/digit"
+ try {
+ val_reg = stoull(reg.substr(1));
+ } catch (const std::exception& ex) {
+ throw std::runtime_error("ModRM: Bad digit in arg1: "s + reg);
+ }
+ } else { // reg
+ auto index1{ IndexOfRegister.find(reg) };
+ if (index1 == IndexOfRegister.end())
+ throw std::runtime_error("ModRM: Unknown register for arg1: "s + reg);
+ val_reg = index1->second;
+ }
+
+ result |= (val_reg << 3);
+
+ // rm
auto index2{ IndexOfRegister.find(rm) };
if (index2 == IndexOfRegister.end())
throw std::runtime_error("Unknown register for arg2: "s + rm);
@@ -62,6 +73,15 @@ uint8_t ModRM(const std::string& reg, const std::string& rm) {
return result;
}
+uint8_t RegNo(const std::string& reg)
+{
+ auto index{ IndexOfRegister.find(reg) };
+ if (index == IndexOfRegister.end())
+ throw std::runtime_error("Reg: Unknown register for arg: "s + reg);
+
+ return index->second;
+}
+
#if 0
prefixes{
"lock", 0xf0,
diff --git a/asm/intel64/codes.h b/asm/intel64/codes.h
index 0ff17f1..112eef4 100644
--- a/asm/intel64/codes.h
+++ b/asm/intel64/codes.h
@@ -10,3 +10,6 @@ std::vector<uint8_t> REX(const std::string& s);
// Manual, page 530
// Reg + Reg/Memory
uint8_t ModRM(const std::string& reg, const std::string& rm);
+
+// Just the number of reg, e.g. for encoding inside primary opcode
+uint8_t RegNo(const std::string& reg);
diff --git a/asm/intel64/mov.cpp b/asm/intel64/mov.cpp
index 40a48f8..8603fc9 100644
--- a/asm/intel64/mov.cpp
+++ b/asm/intel64/mov.cpp
@@ -15,6 +15,8 @@ Op_mov::Op_mov(Asm::Args& args)
// r/m8, r8: ModRM:r/m (w), ModRM:reg (r)
machine_code = std::vector<uint8_t>{ 0x88 } +
ModRM(std::any_cast<Asm::Args::Register8>(args[1]).name(), std::any_cast<Asm::Args::Register8>(args[0]).name());
+ } else if (args[0].type() == typeid(Asm::Args::Register32) && args[1].type() == typeid(Asm::Args::Immediate32)) { // mov reg32, imm32
+ machine_code = std::vector<uint8_t>{ static_cast<uint8_t>(0xB8 + RegNo(std::any_cast<Asm::Args::Register32>(args[0]).name())) } + std::any_cast<Asm::Args::Immediate32>(args[1]).getCode();
} else {
throw std::runtime_error("Unimplemented: mov "s + args[0].type().name() + " "s + args[1].type().name());
}
@@ -25,6 +27,9 @@ namespace {
bool registered {
registerOp(mangleName<Asm::Args::Register8, Asm::Args::Register8>("mov"), [](Asm::Args& args) -> std::shared_ptr<Op>{
return std::make_shared<Op_mov>(args);
+ }) &&
+ registerOp(mangleName<Asm::Args::Register32, Asm::Args::Immediate32>("mov"), [](Asm::Args& args) -> std::shared_ptr<Op>{
+ return std::make_shared<Op_mov>(args);
})
};
diff --git a/cpp.cpp b/cpp.cpp
index c988b5d..c8de4c8 100644
--- a/cpp.cpp
+++ b/cpp.cpp
@@ -1,6 +1,7 @@
#include "cpp.h"
#include "asm/encode.h"
+#include "asm/operators.h"
#include "bnf.h"
#include "cppbnf.h"
#include "debug.h"
@@ -584,13 +585,13 @@ void CPP::link()
// TODO
// mSegment -> elf
-#if 0
- return {
+
+ mCode = std::vector<uint8_t>{
0x48, 0xc7, 0xc0, 0x3c, 0x00, 0x00, 0x00, // mov $0x3c,%rax # syscall 60
0x48, 0x31, 0xff, // xor %rdi,%rdi # exit code 0
+ } + mSegment.getCode() + std::vector<uint8_t>{ // add to edi
0x0f, 0x05, // syscall
};
-#endif
}
// phases of translation, according to standard
diff --git a/flowgraph/data.h b/flowgraph/data.h
index 1ed4964..abf046d 100644
--- a/flowgraph/data.h
+++ b/flowgraph/data.h
@@ -8,7 +8,7 @@
namespace FlowGraph {
// Explicitely not including size
- enum class DataType
+ enum class DataType: int
{
Size,
Int,
@@ -27,8 +27,9 @@ namespace FlowGraph {
class Data
{
public:
- Data(DataType type, std::shared_ptr<Storage> storage):m_type(type) {}
+ Data(DataType type, std::shared_ptr<Storage> storage): m_type(type), m_storage(storage) {}
DataType type() const { return m_type; }
+ std::shared_ptr<Storage> storage() { return m_storage; }
private:
const DataType m_type;
std::shared_ptr<Storage> m_storage;
diff --git a/flowgraph/node.cpp b/flowgraph/node.cpp
index 81217ce..795a252 100644
--- a/flowgraph/node.cpp
+++ b/flowgraph/node.cpp
@@ -9,7 +9,7 @@ using namespace FlowGraph;
// 4 byte for now
Data FlowGraph::MakeConstantInt(int i)
{
- std::vector<uint8_t> value(size_t(4));
+ std::vector<uint8_t> value(size_t(4), uint8_t(0));
*(reinterpret_cast<int32_t*>(value.data())) = boost::endian::native_to_little(static_cast<int32_t>(i));
return Data(DataType::Int, std::make_shared<Constant>(value));
}
diff --git a/flowgraph/node.h b/flowgraph/node.h
index 89f6088..853b017 100644
--- a/flowgraph/node.h
+++ b/flowgraph/node.h
@@ -17,7 +17,12 @@ namespace FlowGraph {
class Node
{
public:
+ Node(){}
+ Node(std::vector<Data> operands): mOperands(operands) {}
+ std::vector<Data>& operands() { return mOperands; }
virtual ~Node() {}; // force class to be polymorphic (e.g. in a container)
+ private:
+ std::vector<Data> mOperands;
};
// Memory on Heap: new and delete
@@ -146,13 +151,11 @@ namespace FlowGraph {
{
public:
BinaryOperation(BinaryOperationType type, Data& destination, Data& source0, Data& source1):
- m_type(type), m_destination(destination), m_source0(source0), m_source1(source1)
+ Node(std::vector<Data>({destination, source0, source1})), m_type(type)
{}
+ BinaryOperationType type() {return m_type;}
private:
BinaryOperationType m_type;
- Data m_destination;
- Data m_source0;
- Data m_source1;
};
} // namespace FlowGraph