diff options
Diffstat (limited to 'intel.cpp')
-rw-r--r-- | intel.cpp | 437 |
1 files changed, 0 insertions, 437 deletions
diff --git a/intel.cpp b/intel.cpp deleted file mode 100644 index 65b9f3f..0000000 --- a/intel.cpp +++ /dev/null @@ -1,437 +0,0 @@ -// Intel assembly language - - -// segments: code, stack - -#include "minicc.h" - -#include <algorithm> -#include <any> -#include <array> -#include <deque> -#include <functional> -#include <stdexcept> -#include <functional> -#include <stdexcept> -#include <string> -#include <unordered_map> -#include <vector> - -using namespace std::string_literals; -using namespace std::placeholders; - -namespace { - - std::vector<uint8_t> imm8(std::string s) { - long value{ std::stol(s) }; - uint8_t* bin = reinterpret_cast<uint8_t*>(&value); - return { uint8_t(*bin & 0xFF) }; - } - - std::vector<uint8_t> imm32(std::string s) { - long value{ std::stol(s) }; - uint32_t* bin = reinterpret_cast<uint32_t*>(&value); - return {uint8_t(*bin & 0xFF), uint8_t(*bin >> 8 & 0xFF), uint8_t(*bin >> 16 & 0xFF), uint8_t(*bin >> 24 & 0xFF) }; - } - - std::unordered_map<std::string, size_t> IndexOfRegister{ - {"al", 0}, {"ah", 4}, - {"bl", 3}, {"bh", 7}, - {"cl", 1}, {"ch", 5}, - {"dl", 2}, {"dh", 6}, - - {"ax", 0}, {"sp", 4}, - {"bx", 3}, {"bp", 7}, - {"cx", 1}, {"si", 5}, - {"dx", 2}, {"di", 6}, - - {"eax", 0}, {"esp", 4}, - {"ebx", 3}, {"ebp", 7}, - {"ecx", 1}, {"esi", 5}, - {"edx", 2}, {"edi", 6}, - }; - - // Manual, page 530 - // Reg + Reg/Memory - uint8_t ModRM(std::string reg, std::string rm) { - // TODO: extend - uint8_t result{0b11000000}; - - auto index1{ IndexOfRegister.find(reg) }; - if (index1 == IndexOfRegister.end()) - throw std::runtime_error("Unknown register for arg1: "s + reg); - - result |= (index1->second << 3); - - auto index2{ IndexOfRegister.find(rm) }; - if (index2 == IndexOfRegister.end()) - throw std::runtime_error("Unknown register for arg2: "s + rm); - - result |= index2->second; - - return result; - } - - enum class AddressType { - Relative8, - Relative16, - Relative32, - Absolute8, - Absolute16, - Absolute32, - }; - - struct Address - { - AddressType type; - size_t position; // relative to respective machine code, e.g. byte 1 in jump - std::string label; // where to jump to, as label - }; - - struct InstructionCode - { - std::vector<uint8_t> machine_code; - std::vector<Address> addresses; - }; - - // List of alternative codes - typedef std::deque<InstructionCode> InstructionCodeList; - - bool O1{ true }; // Optimization - - using OP_T = std::vector<uint8_t>; - - InstructionCodeList op_jmp(const std::vector<Token>& sl, std::vector<uint8_t> op_bytes_8, std::vector<uint8_t> op_bytes_32) - { - if (sl.size() == 2) { // JMP rel8 / rel32 - const std::string& label{ sl[1].value }; - InstructionCodeList result; - if (op_bytes_32.size() > 0) { - op_bytes_32.resize(op_bytes_32.size() + 4, 0x00); - result.push_back({ op_bytes_32, { {AddressType::Relative32, op_bytes_32.size() - 4, label} } } ); - } - if (op_bytes_8.size() > 0 && (O1 || op_bytes_32.size() == 0)) { - op_bytes_8.push_back(0x00); - result.push_back({ op_bytes_8, { {AddressType::Relative8, op_bytes_8.size() - 1, label} } }); - } - return result; - } - - // ... TODO - throw std::runtime_error("Unknown command: "s + sl[0].value); - } - - std::unordered_map<std::string, std::function<InstructionCodeList(const std::vector<Token>&)>> ops_old{ - - // Call Procedure - {"call", std::bind(op_jmp, _1, OP_T{}, OP_T{ 0xE8 })}, - - - // Unconditional Jump - {"jmp", std::bind(op_jmp, _1, OP_T{ 0xEB }, OP_T{ 0xE9 })}, - - // Conditional Jumps - {"ja", std::bind(op_jmp, _1, OP_T{ 0x77 }, OP_T{ 0x0F, 0x87 })}, - {"jae", std::bind(op_jmp, _1, OP_T{ 0x73 }, OP_T{ 0x0F, 0x83 })}, - {"jb", std::bind(op_jmp, _1, OP_T{ 0x72 }, OP_T{ 0x0F, 0x82 })}, - {"jbe", std::bind(op_jmp, _1, OP_T{ 0x76 }, OP_T{ 0x0F, 0x86 })}, - {"jc", std::bind(op_jmp, _1, OP_T{ 0x72 }, OP_T{ 0x0F, 0x82 })}, - {"jecxz", std::bind(op_jmp, _1, OP_T{ 0xE3 }, OP_T{})}, - {"jrcxz", std::bind(op_jmp, _1, OP_T{ 0xE3 }, OP_T{})}, - {"je", std::bind(op_jmp, _1, OP_T{ 0x74 }, OP_T{ 0x0F, 0x84 })}, - {"jg", std::bind(op_jmp, _1, OP_T{ 0x7F }, OP_T{ 0x0F, 0x8F })}, - {"jge", std::bind(op_jmp, _1, OP_T{ 0x7D }, OP_T{ 0x0F, 0x8D })}, - {"jl", std::bind(op_jmp, _1, OP_T{ 0x7C }, OP_T{ 0x0F, 0x8C })}, - {"jle", std::bind(op_jmp, _1, OP_T{ 0x7E }, OP_T{ 0x0F, 0x8E })}, - {"jna", std::bind(op_jmp, _1, OP_T{ 0x76 }, OP_T{ 0x0F, 0x86 })}, - {"jnae", std::bind(op_jmp, _1, OP_T{ 0x72 }, OP_T{ 0x0F, 0x82 })}, - {"jnb", std::bind(op_jmp, _1, OP_T{ 0x73 }, OP_T{ 0x0F, 0x83 })}, - {"jnbe", std::bind(op_jmp, _1, OP_T{ 0x77 }, OP_T{ 0x0F, 0x87 })}, - {"jnc", std::bind(op_jmp, _1, OP_T{ 0x73 }, OP_T{ 0x0F, 0x83 })}, - {"jne", std::bind(op_jmp, _1, OP_T{ 0x75 }, OP_T{ 0x0F, 0x85 })}, - {"jng", std::bind(op_jmp, _1, OP_T{ 0x7E }, OP_T{ 0x0F, 0x8E })}, - {"jnge", std::bind(op_jmp, _1, OP_T{ 0x7C }, OP_T{ 0x0F, 0x8C })}, - {"jnl", std::bind(op_jmp, _1, OP_T{ 0x7D }, OP_T{ 0x0F, 0x8D })}, - {"jnle", std::bind(op_jmp, _1, OP_T{ 0x7F }, OP_T{ 0x0F, 0x8F })}, - {"jno", std::bind(op_jmp, _1, OP_T{ 0x71 }, OP_T{ 0x0F, 0x81 })}, - {"jnp", std::bind(op_jmp, _1, OP_T{ 0x7B }, OP_T{ 0x0F, 0x8B })}, - {"jns", std::bind(op_jmp, _1, OP_T{ 0x79 }, OP_T{ 0x0F, 0x89 })}, - {"jnz", std::bind(op_jmp, _1, OP_T{ 0x75 }, OP_T{ 0x0F, 0x85 })}, - {"jo", std::bind(op_jmp, _1, OP_T{ 0x70 }, OP_T{ 0x0F, 0x80 })}, - {"jp", std::bind(op_jmp, _1, OP_T{ 0x7A }, OP_T{ 0x0F, 0x8A })}, - {"jpe", std::bind(op_jmp, _1, OP_T{ 0x7A }, OP_T{ 0x0F, 0x8A })}, - {"jpo", std::bind(op_jmp, _1, OP_T{ 0x7B }, OP_T{ 0x0F, 0x8B })}, - {"js", std::bind(op_jmp, _1, OP_T{ 0x78 }, OP_T{ 0x0F, 0x88 })}, - {"jz", std::bind(op_jmp, _1, OP_T{ 0x74 }, OP_T{ 0x0F, 0x84 })}, - - // Memory Move - { "mov", [](const std::vector<Token>& sl) -> InstructionCodeList { - if (sl.size() == 3) { - return { { std::vector<uint8_t>{ 0x88 } + ModRM(sl[2].value, sl[1].value), {} } }; // r/m8, r8: ModRM:r/m (w), ModRM:reg (r) - } - - // ... TODO - throw std::runtime_error("Unknown command: "s + sl[0].value); - }}, - - { "xor", [](const std::vector<Token>& sl) -> InstructionCodeList { - if (sl.size() == 3) { - return { { std::vector<uint8_t>{ 0x33 } + ModRM(sl[1].value, sl[2].value) } }; // r8, r/m8: ModRM:reg (w), ModRM:r/m (r) - } - - // ... TODO - throw std::runtime_error("Unknown command: "s + sl[0].value); - }}, - - }; - -#if 0 - prefixes{ - "lock", 0xf0, - - // branch hint - 0x2e, "branch not taken" - 0x3e, "branch taken" - - 0x66, "operand size override" // switch between 16 and 32 bit operands - 0x67, "address size override" // switch between 16 and 32 bit addresses - }; - }; -#endif - -#ifdef ASM_PARSER - BNF GetBNF() { - // TODO: - return { - { "assembler-unit", { - {} - }}, - { "immediate-32", { - {} - }}, - { "mnemonic", { - {} - }}, - { "register", { - {} - }}, - { "register-8", { - {} - }}, - { "register-16", { - {} - }}, - { "register-32", { - {} - }}, - { "register-64", { - {} - }}, - - }; - }; -#endif - - // Checks a 32 bit relative address if it's valid as 8 bit address - bool IsSmallAddress(const InstructionCode& insn) { - if (insn.addresses.size() != 1) - throw std::runtime_error("Bad number of addresses in insn"); - - size_t i{insn.addresses[0].position}; - - if (i > insn.machine_code.size() - 3) - throw std::runtime_error("Bad Address index "s + std::to_string(i) + " in insn with "s + std::to_string(insn.machine_code.size()) + " bytes"s); - - if (std::count(insn.machine_code.begin() + i, insn.machine_code.begin() + i + 3, 0x00) == 3 || - std::count(insn.machine_code.begin() + i, insn.machine_code.begin() + i + 3, 0xFF) == 3) - return true; - - return false; - } - - -} // namespace - -class Assembler { - - std::unordered_map<std::string, size_t> labels; ///< labels with their positions in instruction list - - /// 1st Level: Instructions - /// 2nd Level: Alternatives - /// 3rd Level: Bytes of single instruction - std::vector<InstructionCodeList> insn_list; - - uint64_t addressFromInstructionIndex(size_t index) - { - // TODO: cache this to prevent repetitive summing - - if (index > insn_list.size()) - throw std::runtime_error("Index "s + std::to_string(index) + " out of range ("s + std::to_string(insn_list.size()) + ")"s); - - uint64_t sum{}; - - for (size_t i = 0; i < index; i++) { - if (insn_list[i].size() < 1) { - throw std::runtime_error("Insufficient alternatives at index "s + std::to_string(i)); - } - - sum += static_cast<uint64_t>(insn_list[i][0].machine_code.size()); - } - - return sum; - } - - uint64_t addressFromLabel(std::string label) - { - auto it{ labels.find(label) }; - if (it == labels.end()) - throw std::runtime_error("Label not found: "s + label); - - return addressFromInstructionIndex(it->second); - } - - std::unordered_map<AddressType, std::function<void(std::vector<uint8_t>&, const Address&, uint64_t)>> addressInserters{ - {AddressType::Relative8, [&](std::vector<uint8_t>& machine_code, const Address& target_address, uint64_t insn_address) - { - int64_t difference = static_cast<int64_t>(addressFromLabel(target_address.label)) - insn_address; - if (difference < -128 || difference > 127) - throw std::runtime_error("Distance too big"); - - int8_t diff8 = static_cast<int8_t>(difference); - uint8_t diff_u8 = *reinterpret_cast<uint8_t*>(&diff8); - - machine_code[target_address.position] = diff_u8; - } - }, - {AddressType::Relative16, [&](std::vector<uint8_t>& machine_code, const Address& target_address, uint64_t insn_address) { throw std::runtime_error("Relative16 Address not yet supported."); }}, - {AddressType::Relative32, [&](std::vector<uint8_t>& machine_code, const Address& target_address, uint64_t insn_address) - { - int64_t difference = static_cast<int64_t>(addressFromLabel(target_address.label)) - insn_address; - if (difference < -4294967296 || difference > 4294967295) - throw std::runtime_error("Distance too big"); - - int32_t diff32 = static_cast<int32_t>(difference); - uint32_t diff_u32 = *reinterpret_cast<uint32_t*>(&diff32); - - machine_code[target_address.position] = diff_u32 & 0xFF; // little endian - machine_code[target_address.position + 1] = diff_u32 >> 8 & 0xFF; - machine_code[target_address.position + 2] = diff_u32 >> 16 & 0xFF; - machine_code[target_address.position + 3] = diff_u32 >> 24 & 0xFF; - } - }, - {AddressType::Absolute8, [&](std::vector<uint8_t>& machine_code, const Address& target_address, uint64_t insn_address) {throw std::runtime_error("Absolute8 Address not yet supported."); }}, - {AddressType::Absolute16, [&](std::vector<uint8_t>& machine_code, const Address& target_address, uint64_t insn_address) {throw std::runtime_error("Absolute16 Address not yet supported."); }}, - {AddressType::Absolute32, [&](std::vector<uint8_t>& machine_code, const Address& target_address, uint64_t insn_address) {throw std::runtime_error("Absolute32 Address not yet supported."); }}, - }; - - void produce_machine_code(std::vector<std::vector<Token>>& tl) - { - for (const auto& t : tl) { - // label: - // label: mnemonic arg1, arg2, arg3 - // mnemonic arg1, arg2, arg3 - - if (t.size() == 2 && t[0].type == "label" && t[1].type == ":") { // label - if (labels.find(t[0].value) != labels.end()) - throw std::runtime_error("Label already defined: "s + t[0].value); - - labels[t[0].value] = insn_list.size(); - } else if (t.size() >= 1 && t[0].type == "instruction") { // instruction - std::string instruction{ t[0].value }; - auto it = ops_old.find(instruction); - if (it == ops_old.end()) - throw std::runtime_error("Unknown instruction: "s + instruction); - - InstructionCodeList codes = it->second(t); - - if (codes.size() == 0) - throw std::runtime_error("No instruction generated"); - - insn_list.push_back(codes); - - } else - throw std::runtime_error("Syntax error"s); - } - } - - void insert_addresses() - { - for (size_t i = 0; i < insn_list.size(); i++) { - InstructionCodeList& list{ insn_list[i] }; - if (list.size() == 0) - throw std::runtime_error("No instruction at index "s + std::to_string(i)); - - InstructionCode& code{ list[0] }; - - for (const auto& address : code.addresses) { - addressInserters[address.type](code.machine_code, address, addressFromInstructionIndex(i)); - } - } - } - - void optimize() - { - // reduce Jump sizes via alternatives if possible - bool changed{}; - do { - changed = false; - - for (size_t i = 0; i < insn_list.size(); i++) { - InstructionCodeList& list{ insn_list[i] }; // Alternatives - - // apply specific heuristics to optimization case - if (list.size() == 2) { - if (list[0].addresses.size() == 1 && list[1].addresses.size() == 1) { - if (list[0].addresses[0].type == AddressType::Relative32 && list[1].addresses[0].type == AddressType::Relative8) { - if (IsSmallAddress(list[0])) { - list.pop_front(); - break; // start over from start of program - } - } - } - } - } - - if (changed) - insert_addresses(); // update - - } while (changed); - } - - std::vector<uint8_t> collect_code() - { - std::vector<uint8_t> result; - - // collect generated machine instructions for result - // Alternatives already resolved, if configured. Consider only 1st entry (no matter if optimized or not). - for (size_t i = 0; i < insn_list.size(); i++) { - InstructionCodeList& list{ insn_list[i] }; - if (list.size() == 0) - throw std::runtime_error("No instruction at index "s + std::to_string(i)); - - InstructionCode& code{ list[0] }; - - result.insert(result.end(), code.machine_code.begin(), code.machine_code.end()); - } - - return result; - } - -public: - Assembler() {} - - std::vector<uint8_t> assemble(std::vector<std::vector<Token>> tl) - { - labels.clear(); - insn_list.clear(); - - produce_machine_code(tl); // 1st pass - insert_addresses(); // 2nd pass - if (O1) { - optimize(); // 3rd pass - } - - return collect_code(); // 4th pass - } - -}; // class Assembler |