From 1ac8ab06e9aad3b6d22685255459d71cb49e1f28 Mon Sep 17 00:00:00 2001 From: Roland Reichwein Date: Mon, 9 Nov 2020 09:50:58 +0100 Subject: First program: Can add 2 integers and return result via exit code --- asm/encode.cpp | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++ asm/intel64/add.cpp | 6 ++++- asm/intel64/codes.cpp | 46 +++++++++++++++++++++++++++----------- asm/intel64/codes.h | 3 +++ asm/intel64/mov.cpp | 5 +++++ cpp.cpp | 7 +++--- flowgraph/data.h | 5 +++-- flowgraph/node.cpp | 2 +- flowgraph/node.h | 11 +++++---- 9 files changed, 123 insertions(+), 24 deletions(-) diff --git a/asm/encode.cpp b/asm/encode.cpp index ea50cb7..8bf33c0 100644 --- a/asm/encode.cpp +++ b/asm/encode.cpp @@ -1,6 +1,68 @@ +// Intel specific conversion: Abstract Graph -> Machine specific segment #include "encode.h" +#include "asm/assembler.h" +#include "minicc.h" + +#include + +#include + void Asm::toMachineCode(const FlowGraph::Graph& graph, Segment& segment) { + segment.clear(); + + for (const std::shared_ptr& node: graph) { + try { + FlowGraph::BinaryOperation& op {dynamic_cast(*node)}; + + auto operands {op.operands()}; + // TODO: ignore destination (0) for now + + if (operands[1].type() != FlowGraph::DataType::Int) { + std::runtime_error("Bad type for operand 1: "s + std::to_string(int(operands[1].type()))); + } + + if (operands[2].type() != FlowGraph::DataType::Int) { + std::runtime_error("Bad type for operand 2: "s + std::to_string(int(operands[2].type()))); + } + + if (!operands[1].storage()) + throw std::runtime_error("ICE: Operand 1 storage is 0"); + if (!operands[2].storage()) + throw std::runtime_error("ICE: Operand 2 storage is 0"); + + uint32_t immediate1{}; + try { + FlowGraph::Constant& value1 {dynamic_cast(*operands[1].storage())}; + if (value1.value().size() < sizeof(uint32_t)) + throw std::runtime_error("ICE: Int data from operand 1 needs at least 4 bytes, got "s + std::to_string(value1.value().size())); + + immediate1 = boost::endian::little_to_native(*(reinterpret_cast(value1.value().data()))); + } catch (const std::bad_cast& ex) { + std::runtime_error("Bad value for operand 1: Constant expected"); + } + + uint32_t immediate2{}; + try { + FlowGraph::Constant& value2 {dynamic_cast(*operands[2].storage())}; + if (value2.value().size() < sizeof(uint32_t)) + throw std::runtime_error("ICE: Int data from operand 2 needs at least 4 bytes, got "s + std::to_string(value2.value().size())); + + immediate2 = boost::endian::little_to_native(*(reinterpret_cast(value2.value().data()))); + } catch (const std::bad_cast& ex) { + std::runtime_error("Bad value for operand 2: Constant expected"); + } + + Asm::Args args1{{Asm::Args::Register32("edi"), Asm::Args::Immediate32(immediate1)}}; + segment.push_back(makeOp("mov", args1)); + + Asm::Args args2{{Asm::Args::Register32("edi"), Asm::Args::Immediate32(immediate2)}}; + segment.push_back(makeOp("add", args2)); + + } catch (const std::bad_cast& ex) { + std::runtime_error("ICE: Encoding: Unsupported node: "s + ex.what()); + } + } } diff --git a/asm/intel64/add.cpp b/asm/intel64/add.cpp index 106ffec..4438895 100644 --- a/asm/intel64/add.cpp +++ b/asm/intel64/add.cpp @@ -12,8 +12,12 @@ Op_add::Op_add(Asm::Args& args) if (args[0].type() == typeid(Asm::Args::Register32) && std::any_cast(args[0]).name() == "eax" && args[1].type() == typeid(Asm::Args::Immediate32)) - { // add eax, imm32 + { // add eax, imm32 (before "add reg32, imm32"! It's shorter.) machine_code = std::vector{ 0x05 } + std::any_cast(args[1]).getCode(); + } else if (args[0].type() == typeid(Asm::Args::Register32) && + args[1].type() == typeid(Asm::Args::Immediate32)) + { // add reg32, imm32 + machine_code = std::vector{ 0x81 } + ModRM("/0", std::any_cast(args[0]).name()) + std::any_cast(args[1]).getCode(); } else if (args[0].type() == typeid(Asm::Args::Register64) && std::any_cast(args[0]).name() == "rax" && args[1].type() == typeid(Asm::Args::Immediate32)) diff --git a/asm/intel64/codes.cpp b/asm/intel64/codes.cpp index 66a08dd..5d93a57 100644 --- a/asm/intel64/codes.cpp +++ b/asm/intel64/codes.cpp @@ -29,14 +29,14 @@ namespace { {"dl", 2}, {"dh", 6}, {"ax", 0}, {"sp", 4}, - {"bx", 3}, {"bp", 7}, - {"cx", 1}, {"si", 5}, - {"dx", 2}, {"di", 6}, + {"bx", 3}, {"bp", 5}, + {"cx", 1}, {"si", 6}, + {"dx", 2}, {"di", 7}, {"eax", 0}, {"esp", 4}, - {"ebx", 3}, {"ebp", 7}, - {"ecx", 1}, {"esi", 5}, - {"edx", 2}, {"edi", 6}, + {"ebx", 3}, {"ebp", 5}, + {"ecx", 1}, {"esi", 6}, + {"edx", 2}, {"edi", 7}, }; } @@ -44,15 +44,26 @@ namespace { // Manual, page 530 // Reg + Reg/Memory uint8_t ModRM(const std::string& reg, const std::string& rm) { - // TODO: extend uint8_t result{0b11000000}; - auto index1{ IndexOfRegister.find(reg) }; - if (index1 == IndexOfRegister.end()) - throw std::runtime_error("Unknown register for arg1: "s + reg); - - result |= (index1->second << 3); - + size_t val_reg{}; + // reg + if (reg.size() > 0 && reg[0] == '/') { // "/digit" + try { + val_reg = stoull(reg.substr(1)); + } catch (const std::exception& ex) { + throw std::runtime_error("ModRM: Bad digit in arg1: "s + reg); + } + } else { // reg + auto index1{ IndexOfRegister.find(reg) }; + if (index1 == IndexOfRegister.end()) + throw std::runtime_error("ModRM: Unknown register for arg1: "s + reg); + val_reg = index1->second; + } + + result |= (val_reg << 3); + + // rm auto index2{ IndexOfRegister.find(rm) }; if (index2 == IndexOfRegister.end()) throw std::runtime_error("Unknown register for arg2: "s + rm); @@ -62,6 +73,15 @@ uint8_t ModRM(const std::string& reg, const std::string& rm) { return result; } +uint8_t RegNo(const std::string& reg) +{ + auto index{ IndexOfRegister.find(reg) }; + if (index == IndexOfRegister.end()) + throw std::runtime_error("Reg: Unknown register for arg: "s + reg); + + return index->second; +} + #if 0 prefixes{ "lock", 0xf0, diff --git a/asm/intel64/codes.h b/asm/intel64/codes.h index 0ff17f1..112eef4 100644 --- a/asm/intel64/codes.h +++ b/asm/intel64/codes.h @@ -10,3 +10,6 @@ std::vector REX(const std::string& s); // Manual, page 530 // Reg + Reg/Memory uint8_t ModRM(const std::string& reg, const std::string& rm); + +// Just the number of reg, e.g. for encoding inside primary opcode +uint8_t RegNo(const std::string& reg); diff --git a/asm/intel64/mov.cpp b/asm/intel64/mov.cpp index 40a48f8..8603fc9 100644 --- a/asm/intel64/mov.cpp +++ b/asm/intel64/mov.cpp @@ -15,6 +15,8 @@ Op_mov::Op_mov(Asm::Args& args) // r/m8, r8: ModRM:r/m (w), ModRM:reg (r) machine_code = std::vector{ 0x88 } + ModRM(std::any_cast(args[1]).name(), std::any_cast(args[0]).name()); + } else if (args[0].type() == typeid(Asm::Args::Register32) && args[1].type() == typeid(Asm::Args::Immediate32)) { // mov reg32, imm32 + machine_code = std::vector{ static_cast(0xB8 + RegNo(std::any_cast(args[0]).name())) } + std::any_cast(args[1]).getCode(); } else { throw std::runtime_error("Unimplemented: mov "s + args[0].type().name() + " "s + args[1].type().name()); } @@ -25,6 +27,9 @@ namespace { bool registered { registerOp(mangleName("mov"), [](Asm::Args& args) -> std::shared_ptr{ return std::make_shared(args); + }) && + registerOp(mangleName("mov"), [](Asm::Args& args) -> std::shared_ptr{ + return std::make_shared(args); }) }; diff --git a/cpp.cpp b/cpp.cpp index c988b5d..c8de4c8 100644 --- a/cpp.cpp +++ b/cpp.cpp @@ -1,6 +1,7 @@ #include "cpp.h" #include "asm/encode.h" +#include "asm/operators.h" #include "bnf.h" #include "cppbnf.h" #include "debug.h" @@ -584,13 +585,13 @@ void CPP::link() // TODO // mSegment -> elf -#if 0 - return { + + mCode = std::vector{ 0x48, 0xc7, 0xc0, 0x3c, 0x00, 0x00, 0x00, // mov $0x3c,%rax # syscall 60 0x48, 0x31, 0xff, // xor %rdi,%rdi # exit code 0 + } + mSegment.getCode() + std::vector{ // add to edi 0x0f, 0x05, // syscall }; -#endif } // phases of translation, according to standard diff --git a/flowgraph/data.h b/flowgraph/data.h index 1ed4964..abf046d 100644 --- a/flowgraph/data.h +++ b/flowgraph/data.h @@ -8,7 +8,7 @@ namespace FlowGraph { // Explicitely not including size - enum class DataType + enum class DataType: int { Size, Int, @@ -27,8 +27,9 @@ namespace FlowGraph { class Data { public: - Data(DataType type, std::shared_ptr storage):m_type(type) {} + Data(DataType type, std::shared_ptr storage): m_type(type), m_storage(storage) {} DataType type() const { return m_type; } + std::shared_ptr storage() { return m_storage; } private: const DataType m_type; std::shared_ptr m_storage; diff --git a/flowgraph/node.cpp b/flowgraph/node.cpp index 81217ce..795a252 100644 --- a/flowgraph/node.cpp +++ b/flowgraph/node.cpp @@ -9,7 +9,7 @@ using namespace FlowGraph; // 4 byte for now Data FlowGraph::MakeConstantInt(int i) { - std::vector value(size_t(4)); + std::vector value(size_t(4), uint8_t(0)); *(reinterpret_cast(value.data())) = boost::endian::native_to_little(static_cast(i)); return Data(DataType::Int, std::make_shared(value)); } diff --git a/flowgraph/node.h b/flowgraph/node.h index 89f6088..853b017 100644 --- a/flowgraph/node.h +++ b/flowgraph/node.h @@ -17,7 +17,12 @@ namespace FlowGraph { class Node { public: + Node(){} + Node(std::vector operands): mOperands(operands) {} + std::vector& operands() { return mOperands; } virtual ~Node() {}; // force class to be polymorphic (e.g. in a container) + private: + std::vector mOperands; }; // Memory on Heap: new and delete @@ -146,13 +151,11 @@ namespace FlowGraph { { public: BinaryOperation(BinaryOperationType type, Data& destination, Data& source0, Data& source1): - m_type(type), m_destination(destination), m_source0(source0), m_source1(source1) + Node(std::vector({destination, source0, source1})), m_type(type) {} + BinaryOperationType type() {return m_type;} private: BinaryOperationType m_type; - Data m_destination; - Data m_source0; - Data m_source1; }; } // namespace FlowGraph -- cgit v1.2.3