summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorRoland Reichwein <mail@reichwein.it>2020-10-11 14:25:23 +0200
committerRoland Reichwein <mail@reichwein.it>2020-10-11 14:25:23 +0200
commitefbbad04bc093a133ca2aa5a462de0d37b04f929 (patch)
tree8dc39bee70cd40a03e844b47303528bc63ffbe24
parent0c5a5a94dab6d19192946f0a57d920628823ece1 (diff)
Added COFF, Intel ASM
-rw-r--r--Makefile2
-rw-r--r--coff.cpp693
-rw-r--r--coff.h8
-rw-r--r--intel.cpp503
4 files changed, 1206 insertions, 0 deletions
diff --git a/Makefile b/Makefile
index 9645144..e907b75 100644
--- a/Makefile
+++ b/Makefile
@@ -48,10 +48,12 @@ PROGSRC=\
bnf.cpp \
cpp.cpp \
cppbnf.cpp \
+ coff.cpp \
debug.cpp \
elf.cpp \
file.cpp \
grammer.cpp \
+ intel.cpp \
lexer.cpp \
minicc.cpp \
diff --git a/coff.cpp b/coff.cpp
new file mode 100644
index 0000000..f2a5aa8
--- /dev/null
+++ b/coff.cpp
@@ -0,0 +1,693 @@
+#include "coff.h"
+
+#include <boost/algorithm/string/predicate.hpp>
+#include <boost/endian/conversion.hpp>
+
+#include <algorithm>
+#include <cstddef>
+#include <cstdint>
+#include <cstring>
+#include <filesystem>
+#include <fstream>
+#include <iostream>
+#include <sstream>
+#include <stdexcept>
+#include <string>
+#include <vector>
+
+namespace fs = std::filesystem;
+
+using namespace std::string_literals;
+
+namespace {
+
+#pragma pack(push)
+#pragma pack(1)
+struct MSDOSStub
+{
+ uint8_t padding[0x3c];
+ uint32_t PESignatureOffset;
+};
+
+struct PESignature
+{
+ uint8_t bytes[4]{}; // "PE\0\0"
+};
+
+struct COFFHeader
+{
+ uint16_t Machine{};
+ uint16_t NumberOfSections{};
+ uint32_t TimeDateStamp{};
+ uint32_t PointerToSymbolTable{};
+ uint32_t NumberOfSymbols{};
+ uint16_t SizeOfOptionalHeader{};
+ uint16_t Characteristics{};
+};
+
+// COFFHeader.Machine:
+const uint16_t IMAGE_FILE_MACHINE_UNKNOWN = 0;
+const uint16_t IMAGE_FILE_MACHINE_AMD64 = 0x8664;
+
+// COFFHeader.Characteristics:
+const uint16_t IMAGE_FILE_EXECUTABLE_IMAGE = 0x002;
+const uint16_t IMAGE_FILE_LARGE_ADDRESS_AWARE = 0x020;
+
+// COFFOptionalHeader_Windows.SubSystem
+const uint16_t IMAGE_SUBSYSTEM_WINDOWS_CUI = 3;
+
+struct COFFOptionalHeader
+{
+ uint16_t Magic{};
+ uint8_t MajorLinkerVersion{};
+ uint8_t MinorLinkerVersion{};
+ uint32_t SizeOfCode{};
+ uint32_t SizeOfInitializedData{};
+ uint32_t SizeOfUninitializedData{};
+ uint32_t AddressOfEntryPoint{};
+ uint32_t BaseOfCode{};
+ uint32_t BaseOfData{};
+};
+
+// COFFOptionalHeader.Magic
+const uint16_t MAGIC_PE32 = 0x010b;
+const uint16_t MAGIC_PE32p = 0x020b;
+
+// SectionHeader.Characteristics
+const uint32_t IMAGE_SCN_CNT_CODE = 0x00000020;
+const uint32_t IMAGE_SCN_CNT_INITIALIZED_DATA = 0x00000040;
+const uint32_t IMAGE_SCN_MEM_EXECUTE = 0x20000000;
+const uint32_t IMAGE_SCN_MEM_READ = 0x40000000;
+const uint32_t IMAGE_SCN_MEM_WRITE = 0x80000000;
+
+struct COFFOptionalHeader_PE32p
+{
+ uint16_t Magic{};
+ uint8_t MajorLinkerVersion{};
+ uint8_t MinorLinkerVersion{};
+ uint32_t SizeOfCode{};
+ uint32_t SizeOfInitializedData{};
+ uint32_t SizeOfUninitializedData{};
+ uint32_t AddressOfEntryPoint{};
+ uint32_t BaseOfCode{};
+};
+
+struct COFFOptionalHeader_Windows
+{
+ uint32_t ImageBase{};
+ uint32_t SectionAlignment{};
+ uint32_t FileAlignment{};
+ uint16_t MajorOperatingSystemVersion{};
+ uint16_t MinorOperatingSystemVersion{};
+ uint16_t MajorImageVersion{};
+ uint16_t MinorImageVersion{};
+ uint16_t MajorSubsystemVersion{};
+ uint16_t MinorSubsystemVersion{};
+ uint32_t Win32VersionValue{}; // reserved, =0
+ uint32_t SizeOfImage{};
+ uint32_t SizeOfHeaders{};
+ uint32_t CheckSum{};
+ uint16_t Subsystem{};
+ uint16_t DllCharacteristics{};
+ uint32_t SizeOfStackReserve{};
+ uint32_t SizeOfStackCommit{};
+ uint32_t SizeOfHeapReserve{};
+ uint32_t SizeOfHeapCommit{};
+ uint32_t LoaderFlags{};
+ uint32_t NumberOfRvaAndSizes{};
+};
+
+struct COFFOptionalHeader_Windows_PE32p
+{
+ uint64_t ImageBase{};
+ uint32_t SectionAlignment{};
+ uint32_t FileAlignment{};
+ uint16_t MajorOperatingSystemVersion{};
+ uint16_t MinorOperatingSystemVersion{};
+ uint16_t MajorImageVersion{};
+ uint16_t MinorImageVersion{};
+ uint16_t MajorSubsystemVersion{};
+ uint16_t MinorSubsystemVersion{};
+ uint32_t Win32VersionValue{}; // reserved, =0
+ uint32_t SizeOfImage{};
+ uint32_t SizeOfHeaders{};
+ uint32_t CheckSum{};
+ uint16_t Subsystem{};
+ uint16_t DllCharacteristics{};
+ uint64_t SizeOfStackReserve{};
+ uint64_t SizeOfStackCommit{};
+ uint64_t SizeOfHeapReserve{};
+ uint64_t SizeOfHeapCommit{};
+ uint32_t LoaderFlags{};
+ uint32_t NumberOfRvaAndSizes{};
+};
+
+// For each section:
+struct SectionHeader
+{
+ uint8_t Name[8]{};
+ uint32_t VirtualSize{};
+ uint32_t VirtualAddress{};
+ uint32_t SizeOfRawData{};
+ uint32_t PointerToRawData{};
+ uint32_t PointerToRelocations{};
+ uint32_t PointerToLinenumbers{};
+ uint16_t NumberOfRelocations{};
+ uint16_t NumberOfLinenumbers{};
+ uint32_t Characteristics{};
+};
+
+struct COFFRelocation
+{
+ uint32_t VirtualAddress{};
+ uint32_t SymbolTableIndex{};
+ uint16_t Type{};
+};
+
+struct COFFSymbolTableRecord
+{
+ uint64_t Name{}; // up-to-8-Byte String or COFFSymbolTableRecordName (if longer)
+ uint32_t Value{};
+ uint16_t SectionNumber{};
+ uint16_t Type{};
+ uint8_t StorageClass{};
+ uint8_t NumberOfAuxSymbols{};
+};
+
+struct COFFSymbolTableRecordName
+{
+ uint32_t Zeroes{};
+ uint32_t Offset{};
+};
+
+struct LibSignature
+{
+ uint8_t bytes[8]{}; // "!<arch>\n"
+};
+
+struct LibHeader
+{
+ uint8_t Name[16]{};
+ uint8_t Date[12]{};
+ uint8_t UserID[6]{};
+ uint8_t GroupID[6]{};
+ uint8_t Mode[8]{};
+ uint8_t Size[10]{}; // ASCII-decimal size of Member Body (size without LibHeader size)
+ uint8_t End[2]{}; // "~\n"
+
+ std::string GetName() const
+ {
+ std::string s{ (char*)&Name, sizeof(Name) };
+
+ size_t pos = s.find("/");
+ if (pos == s.npos)
+ throw std::runtime_error("LibHeader Name doesn't contain '/'");
+
+ if (pos == 0) {
+ if (s[1] == '/') { // "//"
+ return "//"; // longnames header
+ } else if (s[1] == ' ') { // "/ "
+ return "/"; // linker members (#0 and #1)
+ } else {
+ pos = s.find(" "); // string is zero-padded. We return string without trailing zeros.
+ return s.substr(0, pos); // "/<number-as-offset-into-longnames-member>"
+ }
+ } else {
+ return s.substr(0, pos); // "name/"
+ }
+ }
+
+ size_t BodySize() const
+ {
+ std::string s{(char*)&Size, sizeof(Size)};
+ size_t pos{s.find(" ")}; // remove trailing space-padding
+ s = s.substr(0, pos);
+
+ try {
+ return std::stoll(s);
+ }
+ catch (const std::exception &) {
+ throw std::runtime_error("Bad size for LibHeader");
+ }
+ }
+};
+
+struct FirstLinkerMember {
+ uint32_t NumberOfSymbols; // big endian
+ // NumberOfSymbols x uint32_t Offsets;
+ // String Table
+};
+
+struct SecondLinkerMember {
+ uint32_t NumberOfMembers;
+ // NumberOfMembers x uint32_t Offsets;
+ // uint32_t NumberOfSymbols;
+ // NumberOfSymbols x uint16_t Indices;
+};
+
+// TODO: export table
+// TODO: import table
+// TODO: relocations table
+// TODO: TLS table (thread local storage)
+#pragma pack(pop)
+
+ std::vector<uint8_t> getFile(const fs::path& filename)
+ {
+ std::ifstream file(filename.string(), std::ios::in | std::ios::binary | std::ios::ate);
+
+ if (file.is_open()) {
+ std::ifstream::pos_type fileSize = file.tellg();
+ file.seekg(0, std::ios::beg);
+
+ std::vector<uint8_t> bytes(fileSize, 0);
+ file.read(reinterpret_cast<char*>(bytes.data()), fileSize);
+
+ return bytes;
+
+ } else {
+ throw std::runtime_error("Opening "s + filename.string() + " for reading");
+ }
+ }
+
+ uint32_t PE_addr(const std::vector<uint8_t>& data)
+ {
+ if (data.size() >= 0x40) {
+ size_t offset = *(reinterpret_cast<const uint32_t*>(data.data() + 0x3c));
+ if (data.size() >= offset + 4) {
+ std::vector<uint8_t> ref{ 'P', 'E', '\0', '\0' };
+ auto [data_it, ref_it] { std::mismatch(data.begin() + offset, data.end(), ref.begin(), ref.end()) };
+ if (ref_it == ref.end())
+ return uint8_t(offset) + 4;
+ }
+ }
+ return 0;
+ }
+
+ bool isPE(const std::vector<uint8_t>& data)
+ {
+ return PE_addr(data);
+ }
+
+ char to_hex_digit(uint8_t value)
+ {
+ if (value < 10)
+ return '0' + value;
+ else
+ return 'a' + value - 10;
+ }
+
+ template< typename T >
+ std::string to_hex(T i)
+ {
+ std::stringstream stream;
+ if (sizeof(T) == 1)
+ stream << to_hex_digit(i >> 4) << to_hex_digit(i & 0xF);
+ else
+ stream
+ << std::setfill('0') << std::setw(sizeof(T) * 2)
+ << std::hex << i;
+ return stream.str();
+ }
+
+ template< typename T >
+ std::string to_0xhex(T i)
+ {
+ std::stringstream stream;
+ if (sizeof(T) == 1)
+ stream << to_hex_digit(i >> 4) << to_hex_digit(i & 0xF);
+ else
+ stream << "0x"
+ << std::setfill('0') << std::setw(sizeof(T) * 2)
+ << std::hex << i;
+ return stream.str();
+ }
+
+ std::string to_string(const uint8_t(&name)[8]) {
+ if (name[0] == '/') {
+ // rest contains a decimal number ASCII coded as offset into string table
+ throw std::runtime_error("Unimplemented /-based name, TODO!");
+ }
+
+ return std::string(reinterpret_cast<const char*>(name), 8);
+ }
+
+ void DumpSection(const std::vector<uint8_t>& data, uint32_t Offset, uint32_t Size, uint32_t VirtualSize)
+ {
+ if (data.size() < Offset + Size)
+ throw std::runtime_error("Not enough raw data to dump, got "s + std::to_string(data.size()) + ", expected "s + std::to_string(Offset + Size));
+ // Size < VirtualSize: the rest is implicitly padded
+ std::string printable;
+
+ for (uint32_t i = 0; i < VirtualSize; i++) {
+ if (i % 16 == 0) {
+ std::cout << " " << printable << "\n " << to_0xhex(i) << " ";
+ printable = "";
+ } else if (i % 16 == 8)
+ std::cout << " ";
+ std::string value = (i < Size) ? to_hex(uint8_t(data[Offset + i])) : "oo";
+ int c = (i < Size) ? data[Offset + i] : 0;
+ std::cout << " " << value;
+ if (std::isprint(c))
+ printable.append(size_t(1), char(c));
+ else
+ printable.append(".");
+ }
+ std::cout << (VirtualSize % 16 > 0 ? std::string(size_t(3 * (16 - VirtualSize % 16)), ' ') + (VirtualSize % 16 <= 8 ? " " : "") : "") << " " << printable;
+
+ std::cout << "\n";
+ }
+
+ // PE =
+ // MSDOSStub
+ // PESignature
+ // COFFHeader
+ // + COFFOptionalHeader or COFFOptionalHeader_P32p
+ // + COFFOptionalHeader_Windows or COFFOptionalHeader_Windows_PE32p
+ // + N x DataDirectory
+ // SectionHeader(s)
+
+ void DumpExe(const std::vector<uint8_t>& data)
+ {
+ size_t offset{ PE_addr(data) };
+
+ if (data.size() >= offset + sizeof(COFFHeader)) {
+ std::cout << "COFF Image (EXE) found.\n" << std::endl;
+ const COFFHeader& coffHeader{ *(reinterpret_cast<const COFFHeader*>(data.data() + offset)) };
+
+ std::cout << "Machine: " << to_0xhex(coffHeader.Machine) << "\n";
+ if (coffHeader.Machine != IMAGE_FILE_MACHINE_AMD64)
+ std::cout << " Warning: Unsupported.\n";
+ std::cout << "NumberOfSections: " << coffHeader.NumberOfSections << "\n";
+
+ if (coffHeader.SizeOfOptionalHeader == 0)
+ std::cout << "Warning: SizeOfOptionalHeader is " << coffHeader.SizeOfOptionalHeader << ". Expected " << sizeof(COFFOptionalHeader) << ".\n";
+
+ for (int i = 1; i <= coffHeader.NumberOfSections; i++) {
+ if (data.size() < offset + sizeof(COFFHeader) + coffHeader.SizeOfOptionalHeader + i * sizeof(SectionHeader))
+ throw std::runtime_error("Data size too small to read next Section Header");
+ const SectionHeader& sectionHeader{ *(reinterpret_cast<const SectionHeader*>(data.data() + offset + coffHeader.SizeOfOptionalHeader + sizeof(COFFHeader) + (i - 1) * sizeof(SectionHeader))) };
+ std::cout << "\nSection #" << i << ":\n";
+ std::cout << " Name: " << to_string(sectionHeader.Name) << "\n";
+ std::cout << " Size: " << sectionHeader.VirtualSize << " bytes\n";
+ std::cout << " Raw Data:\n";
+ DumpSection(data, sectionHeader.PointerToRawData, sectionHeader.SizeOfRawData, sectionHeader.VirtualSize);
+ }
+ } else
+ throw std::runtime_error("Data size too small to read COFF Header.");
+ }
+
+ // COFF OBJ =
+ // COFFHeader
+ // SectionHeader(s)
+ void DumpObj(const std::vector<uint8_t>& data)
+ {
+ if (data.size() >= sizeof(COFFHeader)) {
+ std::cout << "COFF OBJ found.\n" << std::endl;
+ const COFFHeader& coffHeader{ *(reinterpret_cast<const COFFHeader*>(data.data())) };
+
+ std::cout << "Machine: " << to_0xhex(coffHeader.Machine) << "\n";
+ if (coffHeader.Machine != IMAGE_FILE_MACHINE_AMD64) {
+ std::cout << " Warning: Unsupported.\n";
+ return;
+ }
+ std::cout << "NumberOfSections: " << coffHeader.NumberOfSections << "\n";
+
+ if (coffHeader.SizeOfOptionalHeader != 0)
+ std::cout << "Warning: SizeOfOptionalHeader is " << coffHeader.SizeOfOptionalHeader << ". Expected 0.\n";
+
+ for (int i = 1; i <= coffHeader.NumberOfSections; i++) {
+ if (data.size() < sizeof(COFFHeader) + i * sizeof(SectionHeader))
+ throw std::runtime_error("Data size too small to read next Section Header");
+ const SectionHeader& sectionHeader{ *(reinterpret_cast<const SectionHeader*>(data.data() + sizeof(COFFHeader) + (i - 1) * sizeof(SectionHeader))) };
+ std::cout << "\nSection #" << i << ":\n";
+ std::cout << " Name: " << to_string(sectionHeader.Name) << "\n";
+ std::cout << " Size: " << sectionHeader.SizeOfRawData << " bytes\n";
+ std::cout << " Raw Data:\n";
+ DumpSection(data, sectionHeader.PointerToRawData, sectionHeader.SizeOfRawData, sectionHeader.SizeOfRawData); // sectionHeader.VirtualSize is 0 for obj
+ }
+ } else
+ throw std::runtime_error("Data size too small to read COFF Header.");
+ }
+
+ void DumpMember(size_t n, const std::vector<uint8_t>& data, size_t byteoffset)
+ {
+ const LibHeader& libHeader{ *(reinterpret_cast<const LibHeader*>(data.data() + byteoffset)) };
+
+ if (libHeader.End[0] != 0x60 || libHeader.End[1] != 0x0A)
+ throw std::runtime_error("Bad EndOFHeader signature for header #"s + std::to_string(n + 1) + " at byte offset "s + std::to_string(byteoffset));
+
+ if (data.size() < byteoffset + sizeof(LibHeader) + libHeader.BodySize())
+ throw std::runtime_error("Too few bytes for linker member #"s + std::to_string(n + 1));
+
+ if (n == 0) { // 1st Linker Member
+ if (libHeader.GetName() != "/")
+ throw std::runtime_error("Bad Name for 1st Linker Member: "s + libHeader.GetName());
+
+ if (data.size() < byteoffset + sizeof(LibHeader) + sizeof(FirstLinkerMember))
+ throw std::runtime_error("Too few bytes for first linker member.");
+
+ const FirstLinkerMember& firstLinkerMember{ *(reinterpret_cast<const FirstLinkerMember*>(data.data() + byteoffset + sizeof(LibHeader))) };
+ std::cout << "First Linker Member with " << boost::endian::big_to_native(firstLinkerMember.NumberOfSymbols) << " Symbol(s): Ignored (obsolete).\n" << std::endl;
+ } else if (n == 1) { // 2nd Linker Member
+ if (libHeader.GetName() != "/")
+ throw std::runtime_error("Bad Name for 2nd Linker Member: "s + libHeader.GetName());
+
+ if (data.size() < byteoffset + sizeof(LibHeader) + sizeof(SecondLinkerMember))
+ throw std::runtime_error("Too few bytes for second linker member.");
+
+ const SecondLinkerMember& secondLinkerMember{ *(reinterpret_cast<const SecondLinkerMember*>(data.data() + byteoffset + sizeof(LibHeader))) };
+ std::cout << "Second Linker Member: " << secondLinkerMember.NumberOfMembers << " Archive Member(s)\n" << std::endl;
+ } else if (n == 2 && libHeader.GetName() == "//") { // Longnames Member.
+ // undocumented: Longnames Member not always present
+ if (libHeader.GetName() != "//")
+ throw std::runtime_error("Bad Name for Longnames Member: "s + libHeader.GetName());
+
+ std::cout << "Longnames Member\n" << std::endl;
+ } else { // n >= 3: OBJ members
+ std::cout << "OBJ Member #" << (n - 2) << "\n" << std::endl;
+ std::vector<uint8_t> obj{ data.begin() + byteoffset + sizeof(LibHeader), data.begin() + byteoffset + sizeof(LibHeader) + libHeader.BodySize() };
+ DumpObj(obj);
+ }
+ }
+
+ // LIB =
+ // LibSignature
+ // LibHeaders (+ Body each):
+ // Linker Member 1 (directory, obsolete)
+ // Linker Member 2 (directory)
+ // Longnames Member (names of archive members)
+ // OBJ1
+ // [OBJ2]
+ // [...]
+
+ void DumpLib(const std::vector<uint8_t>& data)
+ {
+#if 0
+ size_t p1{ 0 };
+ std::vector<uint8_t> x{ {'\\', '\\'} };
+ auto it = std::search(data.begin(), data.end(), x.begin(), x.end());
+
+ if (it != data.end())
+ std::cout << "DEBUG: " << (it - data.begin()) << std::endl;
+ else
+ std::cout << "DEBUG: " << "not found." << std::endl;
+#endif
+ size_t n{ 0 };
+ size_t byteoffset{ sizeof(LibSignature) };
+ while (byteoffset < data.size()) {
+ const LibHeader& libHeader{ *(reinterpret_cast<const LibHeader*>(data.data() + byteoffset)) };
+
+ if (data.size() < byteoffset + sizeof(LibHeader))
+ throw std::runtime_error("Too few bytes in lib header for member #"s + std::to_string(n + 1) + ": "s + std::to_string(data.size()));
+
+ DumpMember(n, data, byteoffset);
+
+ n++;
+ byteoffset += sizeof(LibHeader) + libHeader.BodySize();
+
+ while (byteoffset % 2 != 0) // align to 2-byte ??? (undocumented)
+ byteoffset++;
+ }
+ }
+
+} // namespace
+
+
+void COFF::Dump(fs::path path)
+{
+ auto data{getFile(path)};
+
+ if (data.size() >= 8 && boost::starts_with(data, "!<arch>\n"s)) {
+ DumpLib(data);
+ } else if (data.size() >= 2 && data[0] == 0x64 && data[1] == 0x86) {
+ DumpObj(data);
+ } else if (isPE(data)) {
+ DumpExe(data);
+ } else
+ throw std::runtime_error("Bad file type.");
+}
+
+namespace {
+
+ void setFile(const fs::path& filename, const char* data, size_t size)
+ {
+ std::ofstream file(filename.string(), std::ios::out | std::ios::binary | std::ios::trunc);
+ if (file.is_open()) {
+ file.write(data, size);
+ }
+ else {
+ throw std::runtime_error("Opening "s + filename.string() + " for writing");
+ }
+ }
+
+ void setFile(const fs::path& filename, const std::string& s)
+ {
+ setFile(filename, s.data(), s.size());
+ }
+
+ void setFile(const fs::path& filename, const std::vector<uint8_t>& s)
+ {
+ setFile(filename, reinterpret_cast<const char*>(s.data()), s.size());
+ }
+
+ void PutDOSStub(std::vector<uint8_t>& data)
+ {
+ std::vector<uint8_t> x{ 'M', 'Z' };
+ x.resize(0x3c);
+ data.insert(data.end(), x.begin(), x.end());
+ std::vector<uint8_t> address{0x40, 0, 0, 0}; // 32-bit address points to end of thus DOSStub
+ data.insert(data.end(), address.begin(), address.end());
+ }
+
+ void PutPESignature(std::vector<uint8_t>& data)
+ {
+ std::vector<uint8_t> sig{ 'P', 'E', '\0', '\0' };
+ data.insert(data.end(), sig.begin(), sig.end());
+ }
+
+ void PutCOFFHeader(std::vector<uint8_t>& data)
+ {
+ {
+ std::vector<uint8_t> header_v(sizeof(COFFHeader), uint8_t{});
+ COFFHeader& header{ *reinterpret_cast<COFFHeader*>(header_v.data()) };
+ header.Machine = 0x8664; // AMD64
+ header.NumberOfSections = 2;
+ header.SizeOfOptionalHeader = sizeof(COFFOptionalHeader_PE32p) + sizeof(COFFOptionalHeader_Windows_PE32p) + 8 * 16; // 0xf0
+ header.Characteristics = IMAGE_FILE_EXECUTABLE_IMAGE | IMAGE_FILE_LARGE_ADDRESS_AWARE;
+ data.insert(data.end(), header_v.begin(), header_v.end());
+ }
+
+ {
+ std::vector<uint8_t> optional_header_v(sizeof(COFFOptionalHeader_PE32p), uint8_t{});
+ COFFOptionalHeader_PE32p& optional_header{ *reinterpret_cast<COFFOptionalHeader_PE32p*>(optional_header_v.data()) };
+ optional_header.Magic = 0x20B; // PE32+
+ optional_header.SizeOfCode = 512;
+ optional_header.SizeOfInitializedData = 512;
+ optional_header.SizeOfUninitializedData = 0;
+ optional_header.AddressOfEntryPoint = 0x1000;
+ optional_header.BaseOfCode = 0x1000;
+ data.insert(data.end(), optional_header_v.begin(), optional_header_v.end());
+ }
+
+ {
+ std::vector<uint8_t> optional_windows_v(sizeof(COFFOptionalHeader_Windows_PE32p), uint8_t{});
+ COFFOptionalHeader_Windows_PE32p& optional_windows{ *reinterpret_cast<COFFOptionalHeader_Windows_PE32p*>(optional_windows_v.data()) };
+ optional_windows.ImageBase = 0x140000000;
+ optional_windows.SectionAlignment = 0x1000;
+ optional_windows.FileAlignment = 512;
+#if 1
+ optional_windows.MajorImageVersion = 6;
+ optional_windows.MajorOperatingSystemVersion = 6;
+ optional_windows.MajorSubsystemVersion = 6;
+#endif
+ optional_windows.SizeOfImage = 0x3000;
+ optional_windows.SizeOfHeaders = 512;
+ optional_windows.CheckSum = 0;
+ optional_windows.Subsystem = IMAGE_SUBSYSTEM_WINDOWS_CUI;
+#if 0
+ optional_windows.DllCharacteristics = 0x8160;
+#endif
+ optional_windows.SizeOfStackReserve = 0x100000;
+ optional_windows.SizeOfStackCommit = 0x1000;
+ optional_windows.SizeOfHeapReserve = 0x100000;
+ optional_windows.SizeOfHeapCommit = 0x1000;
+ optional_windows.NumberOfRvaAndSizes = 0x10;
+ data.insert(data.end(), optional_windows_v.begin(), optional_windows_v.end());
+ }
+
+ {
+ std::vector<uint8_t> data_directories(8 * 16, uint8_t{});
+ data.insert(data.end(), data_directories.begin(), data_directories.end());
+ }
+ }
+
+ void PutCOFFSectionCodeHeader(std::vector<uint8_t>& data)
+ {
+ std::vector<uint8_t> section_header_v(sizeof(SectionHeader), uint8_t{});
+ SectionHeader& section_header{ *reinterpret_cast<SectionHeader*>(section_header_v.data()) };
+ uint8_t Name[8]{ '.', 't', 'e', 'x', 't', 0, 0, 0 };
+ memcpy(section_header.Name, Name, 8);
+ section_header.VirtualSize = 3; // TODO
+ section_header.VirtualAddress = 0x1000;
+ section_header.SizeOfRawData = 512; // multiple of optional_windows.FileAlignment
+ section_header.PointerToRawData = 512;
+ section_header.Characteristics = IMAGE_SCN_CNT_CODE | IMAGE_SCN_MEM_EXECUTE | IMAGE_SCN_MEM_READ;
+ data.insert(data.end(), section_header_v.begin(), section_header_v.end());
+ }
+
+ void PutCOFFSectionCode(std::vector<uint8_t>& data)
+ {
+ { // pad before code
+ std::vector<uint8_t> pad(512 - data.size(), uint8_t{});
+ data.insert(data.end(), pad.begin(), pad.end());
+ }
+
+ { // test code: return 0
+ std::vector<uint8_t> code{0x33, 0xC0, 0xC3};
+ data.insert(data.end(), code.begin(), code.end());
+ }
+
+ { // pad after code
+ std::vector<uint8_t> pad(1024 - data.size(), uint8_t{});
+ data.insert(data.end(), pad.begin(), pad.end());
+ }
+ }
+
+ void PutCOFFSectionDataHeader(std::vector<uint8_t>& data)
+ {
+ std::vector<uint8_t> section_header_v(sizeof(SectionHeader), uint8_t{});
+ SectionHeader& section_header{ *reinterpret_cast<SectionHeader*>(section_header_v.data()) };
+ uint8_t Name[8]{ '.', 'd', 'a', 't', 'a', 0, 0, 0 };
+ memcpy(section_header.Name, Name, 8);
+ section_header.VirtualSize = 3; // TODO
+ section_header.VirtualAddress = 0x2000;
+ section_header.SizeOfRawData = 512; // multiple of optional_windows.FileAlignment
+ section_header.PointerToRawData = 1024;
+ section_header.Characteristics = IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_MEM_READ;
+ data.insert(data.end(), section_header_v.begin(), section_header_v.end());
+ }
+
+ void PutCOFFSectionData(std::vector<uint8_t>& data)
+ {
+ { // test data
+ std::vector<uint8_t> x(1536 - data.size(), uint8_t{});
+ data.insert(data.end(), x.begin(), x.end());
+ }
+
+ }
+} // namespace
+
+void COFF::Create(std::filesystem::path path)
+{
+ std::vector<uint8_t> data;
+
+ PutDOSStub(data);
+ PutPESignature(data);
+ PutCOFFHeader(data);
+ PutCOFFSectionCodeHeader(data);
+ PutCOFFSectionDataHeader(data);
+ PutCOFFSectionCode(data);
+ PutCOFFSectionData(data);
+
+ setFile(path, data);
+}
diff --git a/coff.h b/coff.h
new file mode 100644
index 0000000..401912b
--- /dev/null
+++ b/coff.h
@@ -0,0 +1,8 @@
+#pragma once
+
+#include <filesystem>
+
+namespace COFF {
+ void Dump(std::filesystem::path path);
+ void Create(std::filesystem::path path);
+} \ No newline at end of file
diff --git a/intel.cpp b/intel.cpp
new file mode 100644
index 0000000..dfcaa75
--- /dev/null
+++ b/intel.cpp
@@ -0,0 +1,503 @@
+// Intel assembly language
+
+
+// segments: code, stack
+
+#include "minicc.h"
+
+#include <algorithm>
+#include <array>
+#include <deque>
+#include <functional>
+#include <stdexcept>
+#include <functional>
+#include <stdexcept>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+using namespace std::string_literals;
+using namespace std::placeholders;
+
+namespace {
+
+ // binary code operators
+ std::vector<uint8_t> operator+(std::vector<uint8_t> a, const std::vector<uint8_t>& b) {
+ a.insert(a.end(), b.begin(), b.end());
+ return a;
+ }
+
+ std::vector<uint8_t> operator+(std::vector<uint8_t> a, const uint8_t& b) {
+ a.push_back(b);
+ return a;
+ }
+
+ // REX prefix: 0b0100WRXB
+ std::vector<uint8_t> REX(std::string s) {
+ uint8_t result{0b01000000};
+ if (s == "W")
+ result |= 0b00001000;
+ if (s == "R")
+ result |= 0b00000100;
+ if (s == "X")
+ result |= 0b00000010;
+ if (s == "B")
+ result |= 0b00000001;
+
+ return { result };
+ }
+
+ std::vector<uint8_t> imm8(std::string s) {
+ long value{ std::stol(s) };
+ uint8_t* bin = reinterpret_cast<uint8_t*>(&value);
+ return { uint8_t(*bin & 0xFF) };
+ }
+
+ std::vector<uint8_t> imm32(std::string s) {
+ long value{ std::stol(s) };
+ uint32_t* bin = reinterpret_cast<uint32_t*>(&value);
+ return {uint8_t(*bin & 0xFF), uint8_t(*bin >> 8 & 0xFF), uint8_t(*bin >> 16 & 0xFF), uint8_t(*bin >> 24 & 0xFF) };
+ }
+
+ std::unordered_map<std::string, size_t> IndexOfRegister{
+ {"al", 0}, {"ah", 4},
+ {"bl", 3}, {"bh", 7},
+ {"cl", 1}, {"ch", 5},
+ {"dl", 2}, {"dh", 6},
+
+ {"ax", 0}, {"sp", 4},
+ {"bx", 3}, {"bp", 7},
+ {"cx", 1}, {"si", 5},
+ {"dx", 2}, {"di", 6},
+
+ {"eax", 0}, {"esp", 4},
+ {"ebx", 3}, {"ebp", 7},
+ {"ecx", 1}, {"esi", 5},
+ {"edx", 2}, {"edi", 6},
+ };
+
+ // Manual, page 530
+ // Reg + Reg/Memory
+ uint8_t ModRM(std::string reg, std::string rm) {
+ // TODO: extend
+ uint8_t result{0b11000000};
+
+ auto index1{ IndexOfRegister.find(reg) };
+ if (index1 == IndexOfRegister.end())
+ throw std::runtime_error("Unknown register for arg1: "s + reg);
+
+ result |= (index1->second << 3);
+
+ auto index2{ IndexOfRegister.find(rm) };
+ if (index2 == IndexOfRegister.end())
+ throw std::runtime_error("Unknown register for arg2: "s + rm);
+
+ result |= index2->second;
+
+ return result;
+ }
+
+ enum class AddressType {
+ Relative8,
+ Relative16,
+ Relative32,
+ Absolute8,
+ Absolute16,
+ Absolute32,
+ };
+
+ struct Address
+ {
+ AddressType type;
+ size_t position; // relative to respective machine code, e.g. byte 1 in jump
+ std::string label; // where to jump to, as label
+ };
+
+ struct InstructionCode
+ {
+ std::vector<uint8_t> machine_code;
+ std::vector<Address> addresses;
+ };
+
+ // List of alternative codes
+ typedef std::deque<InstructionCode> InstructionCodeList;
+
+ bool O1{ true }; // Optimization
+
+ using OP_T = std::vector<uint8_t>;
+
+ InstructionCodeList op_jmp(const std::vector<Token>& sl, std::vector<uint8_t> op_bytes_8, std::vector<uint8_t> op_bytes_32)
+ {
+ if (sl.size() == 2) { // JMP rel8 / rel32
+ const std::string& label{ sl[1].value };
+ InstructionCodeList result;
+ if (op_bytes_32.size() > 0) {
+ op_bytes_32.resize(op_bytes_32.size() + 4, 0x00);
+ result.push_back({ op_bytes_32, { {AddressType::Relative32, op_bytes_32.size() - 4, label} } } );
+ }
+ if (op_bytes_8.size() > 0 && (O1 || op_bytes_32.size() == 0)) {
+ op_bytes_8.push_back(0x00);
+ result.push_back({ op_bytes_8, { {AddressType::Relative8, op_bytes_8.size() - 1, label} } });
+ }
+ return result;
+ }
+
+ // ... TODO
+ throw std::runtime_error("Unknown command: "s + sl[0].value);
+ }
+
+ std::unordered_map<std::string, std::function<InstructionCodeList(const std::vector<Token>&)>> ops{
+
+ // Integer Addition
+ {"add", [](const std::vector<Token>& sl) -> InstructionCodeList {
+ if (sl.size() == 3) {
+ if (sl[1].value == "eax") { // ADD EAX, imm32
+ return { { std::vector<uint8_t>{ 0x05 } +imm32(sl[2].value), {} } };
+ } else if (sl[1].value == "rax") { // ADD RAX, imm32
+ return { { REX("W") + std::vector<uint8_t>{ 0x05 } +imm32(sl[2].value), {} } };
+ }
+ }
+
+ // ... TODO
+ throw std::runtime_error("Unknown command: "s + sl[0].value);
+ }},
+
+ // Call Procedure
+ {"call", std::bind(op_jmp, _1, OP_T{}, OP_T{ 0xE8 })},
+
+ // Interrupt
+ {"int", [](const std::vector<Token>& sl) -> InstructionCodeList {
+ if (sl.size() == 2) {
+ if (sl[1].value == "0") { // INT 0
+ return { { std::vector<uint8_t>{ 0xCE }} };
+ } else if (sl[1].value == "1") { // INT 1
+ return { { std::vector<uint8_t>{ 0xF1 }} };
+ } else if (sl[1].value == "3") { // INT 3
+ return { { std::vector<uint8_t>{ 0xCC }} };
+ } else { // INT <...>
+ return { { std::vector<uint8_t>{ 0xCD } +imm8(sl[2].value) } };
+ }
+ }
+
+ // ... TODO
+ throw std::runtime_error("Unknown command: "s + sl[0].value);
+ }},
+
+ // Unconditional Jump
+ {"jmp", std::bind(op_jmp, _1, OP_T{ 0xEB }, OP_T{ 0xE9 })},
+
+ // Conditional Jumps
+ {"ja", std::bind(op_jmp, _1, OP_T{ 0x77 }, OP_T{ 0x0F, 0x87 })},
+ {"jae", std::bind(op_jmp, _1, OP_T{ 0x73 }, OP_T{ 0x0F, 0x83 })},
+ {"jb", std::bind(op_jmp, _1, OP_T{ 0x72 }, OP_T{ 0x0F, 0x82 })},
+ {"jbe", std::bind(op_jmp, _1, OP_T{ 0x76 }, OP_T{ 0x0F, 0x86 })},
+ {"jc", std::bind(op_jmp, _1, OP_T{ 0x72 }, OP_T{ 0x0F, 0x82 })},
+ {"jecxz", std::bind(op_jmp, _1, OP_T{ 0xE3 }, OP_T{})},
+ {"jrcxz", std::bind(op_jmp, _1, OP_T{ 0xE3 }, OP_T{})},
+ {"je", std::bind(op_jmp, _1, OP_T{ 0x74 }, OP_T{ 0x0F, 0x84 })},
+ {"jg", std::bind(op_jmp, _1, OP_T{ 0x7F }, OP_T{ 0x0F, 0x8F })},
+ {"jge", std::bind(op_jmp, _1, OP_T{ 0x7D }, OP_T{ 0x0F, 0x8D })},
+ {"jl", std::bind(op_jmp, _1, OP_T{ 0x7C }, OP_T{ 0x0F, 0x8C })},
+ {"jle", std::bind(op_jmp, _1, OP_T{ 0x7E }, OP_T{ 0x0F, 0x8E })},
+ {"jna", std::bind(op_jmp, _1, OP_T{ 0x76 }, OP_T{ 0x0F, 0x86 })},
+ {"jnae", std::bind(op_jmp, _1, OP_T{ 0x72 }, OP_T{ 0x0F, 0x82 })},
+ {"jnb", std::bind(op_jmp, _1, OP_T{ 0x73 }, OP_T{ 0x0F, 0x83 })},
+ {"jnbe", std::bind(op_jmp, _1, OP_T{ 0x77 }, OP_T{ 0x0F, 0x87 })},
+ {"jnc", std::bind(op_jmp, _1, OP_T{ 0x73 }, OP_T{ 0x0F, 0x83 })},
+ {"jne", std::bind(op_jmp, _1, OP_T{ 0x75 }, OP_T{ 0x0F, 0x85 })},
+ {"jng", std::bind(op_jmp, _1, OP_T{ 0x7E }, OP_T{ 0x0F, 0x8E })},
+ {"jnge", std::bind(op_jmp, _1, OP_T{ 0x7C }, OP_T{ 0x0F, 0x8C })},
+ {"jnl", std::bind(op_jmp, _1, OP_T{ 0x7D }, OP_T{ 0x0F, 0x8D })},
+ {"jnle", std::bind(op_jmp, _1, OP_T{ 0x7F }, OP_T{ 0x0F, 0x8F })},
+ {"jno", std::bind(op_jmp, _1, OP_T{ 0x71 }, OP_T{ 0x0F, 0x81 })},
+ {"jnp", std::bind(op_jmp, _1, OP_T{ 0x7B }, OP_T{ 0x0F, 0x8B })},
+ {"jns", std::bind(op_jmp, _1, OP_T{ 0x79 }, OP_T{ 0x0F, 0x89 })},
+ {"jnz", std::bind(op_jmp, _1, OP_T{ 0x75 }, OP_T{ 0x0F, 0x85 })},
+ {"jo", std::bind(op_jmp, _1, OP_T{ 0x70 }, OP_T{ 0x0F, 0x80 })},
+ {"jp", std::bind(op_jmp, _1, OP_T{ 0x7A }, OP_T{ 0x0F, 0x8A })},
+ {"jpe", std::bind(op_jmp, _1, OP_T{ 0x7A }, OP_T{ 0x0F, 0x8A })},
+ {"jpo", std::bind(op_jmp, _1, OP_T{ 0x7B }, OP_T{ 0x0F, 0x8B })},
+ {"js", std::bind(op_jmp, _1, OP_T{ 0x78 }, OP_T{ 0x0F, 0x88 })},
+ {"jz", std::bind(op_jmp, _1, OP_T{ 0x74 }, OP_T{ 0x0F, 0x84 })},
+
+ // Memory Move
+ { "mov", [](const std::vector<Token>& sl) -> InstructionCodeList {
+ if (sl.size() == 3) {
+ return { { std::vector<uint8_t>{ 0x88 } + ModRM(sl[2].value, sl[1].value), {} } }; // r/m8, r8: ModRM:r/m (w), ModRM:reg (r)
+ }
+
+ // ... TODO
+ throw std::runtime_error("Unknown command: "s + sl[0].value);
+ }},
+
+ // No Operation
+ { "nop", [](const std::vector<Token>& sl) -> InstructionCodeList {
+ return {{ std::vector<uint8_t>{ 0x90 }, {}}};
+ }},
+
+ // Return from procedure
+ { "ret", [](const std::vector<Token>& sl) -> InstructionCodeList {
+ return {{ std::vector<uint8_t>{ 0xC3 }, {}}}; // near return; TODO: far return is 0xCB
+ }},
+
+ { "xor", [](const std::vector<Token>& sl) -> InstructionCodeList {
+ if (sl.size() == 3) {
+ return { { std::vector<uint8_t>{ 0x33 } + ModRM(sl[1].value, sl[2].value) } }; // r8, r/m8: ModRM:reg (w), ModRM:r/m (r)
+ }
+
+ // ... TODO
+ throw std::runtime_error("Unknown command: "s + sl[0].value);
+ }},
+
+ };
+
+#if 0
+ prefixes{
+ "lock", 0xf0,
+
+ // branch hint
+ 0x2e, "branch not taken"
+ 0x3e, "branch taken"
+
+ 0x66, "operand size override" // switch between 16 and 32 bit operands
+ 0x67, "address size override" // switch between 16 and 32 bit addresses
+ };
+ };
+#endif
+
+#ifdef ASM_PARSER
+ BNF GetBNF() {
+ // TODO:
+ return {
+ { "assembler-unit", {
+ {}
+ }},
+ { "immediate-32", {
+ {}
+ }},
+ { "mnemonic", {
+ {}
+ }},
+ { "register", {
+ {}
+ }},
+ { "register-8", {
+ {}
+ }},
+ { "register-16", {
+ {}
+ }},
+ { "register-32", {
+ {}
+ }},
+ { "register-64", {
+ {}
+ }},
+
+ };
+ };
+#endif
+
+ // Checks a 32 bit relative address if it's valid as 8 bit address
+ bool IsSmallAddress(const InstructionCode& insn) {
+ if (insn.addresses.size() != 1)
+ throw std::runtime_error("Bad number of addresses in insn");
+
+ size_t i{insn.addresses[0].position};
+
+ if (i > insn.machine_code.size() - 3)
+ throw std::runtime_error("Bad Address index "s + std::to_string(i) + " in insn with "s + std::to_string(insn.machine_code.size()) + " bytes"s);
+
+ if (std::count(insn.machine_code.begin() + i, insn.machine_code.begin() + i + 3, 0x00) == 3 ||
+ std::count(insn.machine_code.begin() + i, insn.machine_code.begin() + i + 3, 0xFF) == 3)
+ return true;
+
+ return false;
+ }
+
+
+} // namespace
+
+class Assembler {
+
+ std::unordered_map<std::string, size_t> labels; ///< labels with their positions in instruction list
+
+ /// 1st Level: Instructions
+ /// 2nd Level: Alternatives
+ /// 3rd Level: Bytes of single instruction
+ std::vector<InstructionCodeList> insn_list;
+
+ uint64_t addressFromInstructionIndex(size_t index)
+ {
+ // TODO: cache this to prevent repetitive summing
+
+ if (index > insn_list.size())
+ throw std::runtime_error("Index "s + std::to_string(index) + " out of range ("s + std::to_string(insn_list.size()) + ")"s);
+
+ uint64_t sum{};
+
+ for (size_t i = 0; i < index; i++) {
+ if (insn_list[i].size() < 1) {
+ throw std::runtime_error("Insufficient alternatives at index "s + std::to_string(i));
+ }
+
+ sum += static_cast<uint64_t>(insn_list[i][0].machine_code.size());
+ }
+
+ return sum;
+ }
+
+ uint64_t addressFromLabel(std::string label)
+ {
+ auto it{ labels.find(label) };
+ if (it == labels.end())
+ throw std::runtime_error("Label not found: "s + label);
+
+ return addressFromInstructionIndex(it->second);
+ }
+
+ std::unordered_map<AddressType, std::function<void(std::vector<uint8_t>&, const Address&, uint64_t)>> addressInserters{
+ {AddressType::Relative8, [&](std::vector<uint8_t>& machine_code, const Address& target_address, uint64_t insn_address)
+ {
+ int64_t difference = static_cast<int64_t>(addressFromLabel(target_address.label)) - insn_address;
+ if (difference < -128 || difference > 127)
+ throw std::runtime_error("Distance too big");
+
+ int8_t diff8 = static_cast<int8_t>(difference);
+ uint8_t diff_u8 = *reinterpret_cast<uint8_t*>(&diff8);
+
+ machine_code[target_address.position] = diff_u8;
+ }
+ },
+ {AddressType::Relative16, [&](std::vector<uint8_t>& machine_code, const Address& target_address, uint64_t insn_address) { throw std::runtime_error("Relative16 Address not yet supported."); }},
+ {AddressType::Relative32, [&](std::vector<uint8_t>& machine_code, const Address& target_address, uint64_t insn_address)
+ {
+ int64_t difference = static_cast<int64_t>(addressFromLabel(target_address.label)) - insn_address;
+ if (difference < -4294967296 || difference > 4294967295)
+ throw std::runtime_error("Distance too big");
+
+ int32_t diff32 = static_cast<int32_t>(difference);
+ uint32_t diff_u32 = *reinterpret_cast<uint32_t*>(&diff32);
+
+ machine_code[target_address.position] = diff_u32 & 0xFF; // little endian
+ machine_code[target_address.position + 1] = diff_u32 >> 8 & 0xFF;
+ machine_code[target_address.position + 2] = diff_u32 >> 16 & 0xFF;
+ machine_code[target_address.position + 3] = diff_u32 >> 24 & 0xFF;
+ }
+ },
+ {AddressType::Absolute8, [&](std::vector<uint8_t>& machine_code, const Address& target_address, uint64_t insn_address) {throw std::runtime_error("Absolute8 Address not yet supported."); }},
+ {AddressType::Absolute16, [&](std::vector<uint8_t>& machine_code, const Address& target_address, uint64_t insn_address) {throw std::runtime_error("Absolute16 Address not yet supported."); }},
+ {AddressType::Absolute32, [&](std::vector<uint8_t>& machine_code, const Address& target_address, uint64_t insn_address) {throw std::runtime_error("Absolute32 Address not yet supported."); }},
+ };
+
+ void produce_machine_code(std::vector<std::vector<Token>>& tl)
+ {
+ for (const auto& t : tl) {
+ // label:
+ // label: mnemonic arg1, arg2, arg3
+ // mnemonic arg1, arg2, arg3
+
+ if (t.size() == 2 && t[0].type == "label" && t[1].type == ":") { // label
+ if (labels.find(t[0].value) != labels.end())
+ throw std::runtime_error("Label already defined: "s + t[0].value);
+
+ labels[t[0].value] = insn_list.size();
+ } else if (t.size() >= 1 && t[0].type == "instruction") { // instruction
+ std::string instruction{ t[0].value };
+ auto it = ops.find(instruction);
+ if (it == ops.end())
+ throw std::runtime_error("Unknown instruction: "s + instruction);
+
+ InstructionCodeList codes = it->second(t);
+
+ if (codes.size() == 0)
+ throw std::runtime_error("No instruction generated");
+
+ insn_list.push_back(codes);
+
+ } else
+ throw std::runtime_error("Syntax error"s);
+ }
+ }
+
+ void insert_addresses()
+ {
+ for (size_t i = 0; i < insn_list.size(); i++) {
+ InstructionCodeList& list{ insn_list[i] };
+ if (list.size() == 0)
+ throw std::runtime_error("No instruction at index "s + std::to_string(i));
+
+ InstructionCode& code{ list[0] };
+
+ for (const auto& address : code.addresses) {
+ addressInserters[address.type](code.machine_code, address, addressFromInstructionIndex(i));
+ }
+ }
+ }
+
+ void optimize()
+ {
+ // reduce Jump sizes via alternatives if possible
+ bool changed{};
+ do {
+ changed = false;
+
+ for (size_t i = 0; i < insn_list.size(); i++) {
+ InstructionCodeList& list{ insn_list[i] }; // Alternatives
+
+ // apply specific heuristics to optimization case
+ if (list.size() == 2) {
+ if (list[0].addresses.size() == 1 && list[1].addresses.size() == 1) {
+ if (list[0].addresses[0].type == AddressType::Relative32 && list[1].addresses[0].type == AddressType::Relative8) {
+ if (IsSmallAddress(list[0])) {
+ list.pop_front();
+ break; // start over from start of program
+ }
+ }
+ }
+ }
+ }
+
+ if (changed)
+ insert_addresses(); // update
+
+ } while (changed);
+ }
+
+ std::vector<uint8_t> collect_code()
+ {
+ std::vector<uint8_t> result;
+
+ // collect generated machine instructions for result
+ // Alternatives already resolved, if configured. Consider only 1st entry (no matter if optimized or not).
+ for (size_t i = 0; i < insn_list.size(); i++) {
+ InstructionCodeList& list{ insn_list[i] };
+ if (list.size() == 0)
+ throw std::runtime_error("No instruction at index "s + std::to_string(i));
+
+ InstructionCode& code{ list[0] };
+
+ result.insert(result.end(), code.machine_code.begin(), code.machine_code.end());
+ }
+
+ return result;
+ }
+
+public:
+ Assembler() {}
+
+ std::vector<uint8_t> assemble(std::vector<std::vector<Token>> tl)
+ {
+ labels.clear();
+ insn_list.clear();
+
+ produce_machine_code(tl); // 1st pass
+ insert_addresses(); // 2nd pass
+ if (O1) {
+ optimize(); // 3rd pass
+ }
+
+ return collect_code(); // 4th pass
+ }
+
+}; // class Assembler