diff options
-rw-r--r-- | Makefile | 7 | ||||
-rw-r--r-- | asm/assembler.cpp | 34 | ||||
-rw-r--r-- | asm/assembler.h | 25 | ||||
-rw-r--r-- | asm/intel64/all_ops.h | 5 | ||||
-rw-r--r-- | asm/intel64/int.cpp | 28 | ||||
-rw-r--r-- | asm/intel64/int.h | 31 | ||||
-rw-r--r-- | asm/intel64/nop.h | 2 | ||||
-rw-r--r-- | asm/intel64/ret.cpp | 12 | ||||
-rw-r--r-- | asm/intel64/ret.h | 12 | ||||
-rw-r--r-- | asm/operators.cpp | 13 | ||||
-rw-r--r-- | asm/operators.h | 8 | ||||
-rw-r--r-- | asm/segment.cpp | 36 | ||||
-rw-r--r-- | asm/segment.h | 2 | ||||
-rw-r--r-- | intel.cpp | 33 | ||||
-rw-r--r-- | test-asm.cpp | 92 |
15 files changed, 291 insertions, 49 deletions
@@ -47,8 +47,11 @@ endif PROGSRC=\ asm/assembler.cpp \ asm/chunk.cpp \ - asm/segment.cpp \ + asm/intel64/int.cpp \ asm/intel64/nop.cpp \ + asm/intel64/ret.cpp \ + asm/operators.cpp \ + asm/segment.cpp \ bnf.cpp \ cpp.cpp \ cppbnf.cpp \ @@ -57,7 +60,6 @@ PROGSRC=\ elf.cpp \ file.cpp \ grammer.cpp \ - intel.cpp \ lexer.cpp \ minicc.cpp \ @@ -68,6 +70,7 @@ TESTSRC=\ test-grammer.cpp \ test-lexer.cpp \ test-minicc.cpp \ + test-asm.cpp \ googlemock/src/gmock-all.cpp \ googletest/src/gtest-all.cpp \ $(PROGSRC) diff --git a/asm/assembler.cpp b/asm/assembler.cpp index 275bd4a..7a9f6cf 100644 --- a/asm/assembler.cpp +++ b/asm/assembler.cpp @@ -1,7 +1,13 @@ #include "assembler.h" +using namespace std::string_literals; + +namespace { + std::unordered_map<std::string, FactoryFunction> ops; +} + bool registerOp(const std::string& mnemonic, FactoryFunction f) { if (ops.contains(mnemonic)) { @@ -15,3 +21,31 @@ bool registerOp(const std::string& mnemonic, FactoryFunction f) return true; } + +std::string mangleName(const std::string& s, AsmArgs& args) +{ + std::string result {s}; + + for (const auto& arg: args) { + result += "_"s + arg.type().name(); + } + + return result; +} + +std::shared_ptr<Op> makeOp(const std::string& mnemonic, AsmArgs& args) +{ + std::string mangled{mangleName(mnemonic, args)}; + + const auto& i{ops.find(mangled)}; + if (i == ops.end()) + throw std::runtime_error("Instruction "s + mangled + " not implemented."); + + return i->second(args); +} + +std::shared_ptr<Op> makeOp(const std::string& mnemonic) +{ + AsmArgs dummy; // empty list of arguments + return makeOp(mnemonic, dummy); +} diff --git a/asm/assembler.h b/asm/assembler.h index b9c39a6..42b5f8d 100644 --- a/asm/assembler.h +++ b/asm/assembler.h @@ -1,6 +1,7 @@ #pragma once -#include "chunk.h" +//#include "chunk.h" +//#include "segment.h" #include <any> #include <functional> @@ -9,11 +10,19 @@ #include <string> #include <unordered_map> -using AsmArgs = std::vector<std::any>; // 0th element is mnemonic +using AsmArgs = std::vector<std::any>; using FactoryFunction = std::function<std::shared_ptr<Op>(AsmArgs&)>; +// mnemonic: mnemonic including argument types bool registerOp(const std::string& mnemonic, FactoryFunction f); +// Create Op from a registered mnemonic +// mnemonic: just the mnemonic name +std::shared_ptr<Op> makeOp(const std::string& mnemonic, AsmArgs& args); + +// overload for empty list of arguments +std::shared_ptr<Op> makeOp(const std::string& mnemonic); + template<typename T> std::string mangleNameOne(const std::string& s) { @@ -29,3 +38,15 @@ std::string mangleName(const std::string& s) return mangleName<Targs...>(s + "_" + typeid(T).name()); } +std::string mangleName(const std::string& s, AsmArgs& args); + +class Immediate8 +{ +public: + Immediate8(uint8_t value): m_value(value) {} + uint8_t value() {return m_value;} + +private: + uint8_t m_value; +}; + diff --git a/asm/intel64/all_ops.h b/asm/intel64/all_ops.h new file mode 100644 index 0000000..83b654b --- /dev/null +++ b/asm/intel64/all_ops.h @@ -0,0 +1,5 @@ +#pragma once + +#include "int.h" +#include "nop.h" +#include "ret.h" diff --git a/asm/intel64/int.cpp b/asm/intel64/int.cpp new file mode 100644 index 0000000..7b682ab --- /dev/null +++ b/asm/intel64/int.cpp @@ -0,0 +1,28 @@ +#include "int.h" + +#include <asm/assembler.h> + +Op_int::Op_int(AsmArgs& args) +{ + // At this point, the registration already ensured the number and types of args + + Immediate8 i {std::any_cast<Immediate8>(args[0])}; + + if (i.value() == 0) { // INT 0 + machine_code = { 0xCE }; + } else if (i.value() == 1) { // INT 1 + machine_code = { 0xF1 }; + } else if (i.value() == 3) { // INT 3 + machine_code = { 0xCC }; + } else { // INT <...> + machine_code = std::vector<uint8_t>{ 0xCD, i.value() }; + } +} + +namespace { + +bool registered { registerOp(mangleName<Immediate8>("int"), [](AsmArgs& args) -> std::shared_ptr<Op>{ + return std::make_shared<Op_int>(args); + }) }; + +} diff --git a/asm/intel64/int.h b/asm/intel64/int.h new file mode 100644 index 0000000..7bd60c8 --- /dev/null +++ b/asm/intel64/int.h @@ -0,0 +1,31 @@ +// Interrupt + +#pragma once + +#include <asm/assembler.h> + +class Op_int: public Op +{ +public: + Op_int(AsmArgs& args); + +public: + std::vector<uint8_t> getCode() override + { + return machine_code; + } + + size_t size() override + { + return machine_code.size(); + } + + bool optimize() override ///< returns true if changed + { + return false; + } + +protected: + std::vector<uint8_t> machine_code; +}; + diff --git a/asm/intel64/nop.h b/asm/intel64/nop.h index 72d6d1b..233b2ef 100644 --- a/asm/intel64/nop.h +++ b/asm/intel64/nop.h @@ -1,3 +1,5 @@ +// No Operation + #pragma once #include <asm/chunk.h> diff --git a/asm/intel64/ret.cpp b/asm/intel64/ret.cpp new file mode 100644 index 0000000..cd9ddd4 --- /dev/null +++ b/asm/intel64/ret.cpp @@ -0,0 +1,12 @@ +#include "ret.h" + +#include <asm/assembler.h> + +namespace { + +bool registered { registerOp("ret", [](AsmArgs& args) -> std::shared_ptr<Op>{ + return std::make_shared<Op_ret>(); + }) }; + +} + diff --git a/asm/intel64/ret.h b/asm/intel64/ret.h new file mode 100644 index 0000000..7e7f68c --- /dev/null +++ b/asm/intel64/ret.h @@ -0,0 +1,12 @@ +// Return from procedure + +#pragma once + +#include <asm/chunk.h> + +class Op_ret: public OpSimple +{ +public: + Op_ret() : OpSimple({ 0xC3 }) {} // near return; TODO: far return is 0xCB +}; + diff --git a/asm/operators.cpp b/asm/operators.cpp new file mode 100644 index 0000000..9f7d5d9 --- /dev/null +++ b/asm/operators.cpp @@ -0,0 +1,13 @@ +#include "operators.h" + +// binary code operators +std::vector<uint8_t> operator+(std::vector<uint8_t> a, const std::vector<uint8_t>& b) { + a.insert(a.end(), b.begin(), b.end()); + return a; +} + +std::vector<uint8_t> operator+(std::vector<uint8_t> a, const uint8_t& b) { + a.push_back(b); + return a; +} + diff --git a/asm/operators.h b/asm/operators.h new file mode 100644 index 0000000..93dc15e --- /dev/null +++ b/asm/operators.h @@ -0,0 +1,8 @@ +#pragma once + +#include <cstdint> +#include <vector> + +std::vector<uint8_t> operator+(std::vector<uint8_t> a, const std::vector<uint8_t>& b); +std::vector<uint8_t> operator+(std::vector<uint8_t> a, const uint8_t& b); + diff --git a/asm/segment.cpp b/asm/segment.cpp index db83941..60b8348 100644 --- a/asm/segment.cpp +++ b/asm/segment.cpp @@ -1,20 +1,32 @@ #include "segment.h" +#include "operators.h" + using namespace std::string_literals; size_t Segment::getAddressOfLabel(const std::string& label) - { - size_t address{0}; - auto i{begin()}; - while (i != end()) { - Chunk& chunk{**i}; - address += chunk.size(); - if (typeid(chunk) == typeid(Label)) { - if (dynamic_cast<Label&>(chunk).name() == label) { - return address; - } +{ + size_t address{0}; + auto i{begin()}; + while (i != end()) { + Chunk& chunk{**i}; + address += chunk.size(); + if (typeid(chunk) == typeid(Label)) { + if (dynamic_cast<Label&>(chunk).name() == label) { + return address; } } - - throw std::runtime_error("Bad label: "s + label); } + + throw std::runtime_error("Bad label: "s + label); +} + +std::vector<uint8_t> Segment::getCode() +{ + std::vector<uint8_t> result; + + for (const auto& chunk: *this) + result = result + chunk->getCode(); + + return result; +} diff --git a/asm/segment.h b/asm/segment.h index 1c080d3..f0a758e 100644 --- a/asm/segment.h +++ b/asm/segment.h @@ -9,7 +9,9 @@ class Segment: public std::vector<std::shared_ptr<Chunk>> { +public: size_t getAddressOfLabel(const std::string& label); + std::vector<uint8_t> getCode(); }; @@ -22,17 +22,6 @@ using namespace std::placeholders; namespace { - // binary code operators - std::vector<uint8_t> operator+(std::vector<uint8_t> a, const std::vector<uint8_t>& b) { - a.insert(a.end(), b.begin(), b.end()); - return a; - } - - std::vector<uint8_t> operator+(std::vector<uint8_t> a, const uint8_t& b) { - a.push_back(b); - return a; - } - // REX prefix: 0b0100WRXB std::vector<uint8_t> REX(std::string s) { uint8_t result{0b01000000}; @@ -166,23 +155,6 @@ namespace { // Call Procedure {"call", std::bind(op_jmp, _1, OP_T{}, OP_T{ 0xE8 })}, - // Interrupt - {"int", [](const std::vector<Token>& sl) -> InstructionCodeList { - if (sl.size() == 2) { - if (sl[1].value == "0") { // INT 0 - return { { std::vector<uint8_t>{ 0xCE }} }; - } else if (sl[1].value == "1") { // INT 1 - return { { std::vector<uint8_t>{ 0xF1 }} }; - } else if (sl[1].value == "3") { // INT 3 - return { { std::vector<uint8_t>{ 0xCC }} }; - } else { // INT <...> - return { { std::vector<uint8_t>{ 0xCD } +imm8(sl[2].value) } }; - } - } - - // ... TODO - throw std::runtime_error("Unknown command: "s + sl[0].value); - }}, // Unconditional Jump {"jmp", std::bind(op_jmp, _1, OP_T{ 0xEB }, OP_T{ 0xE9 })}, @@ -231,11 +203,6 @@ namespace { throw std::runtime_error("Unknown command: "s + sl[0].value); }}, - // Return from procedure - { "ret", [](const std::vector<Token>& sl) -> InstructionCodeList { - return {{ std::vector<uint8_t>{ 0xC3 }, {}}}; // near return; TODO: far return is 0xCB - }}, - { "xor", [](const std::vector<Token>& sl) -> InstructionCodeList { if (sl.size() == 3) { return { { std::vector<uint8_t>{ 0x33 } + ModRM(sl[1].value, sl[2].value) } }; // r8, r/m8: ModRM:reg (w), ModRM:r/m (r) diff --git a/test-asm.cpp b/test-asm.cpp new file mode 100644 index 0000000..882f998 --- /dev/null +++ b/test-asm.cpp @@ -0,0 +1,92 @@ +#include "asm/chunk.h" +#include "asm/assembler.h" +#include "asm/segment.h" +#include "asm/intel64/all_ops.h" + +#include "minicc.h" + +#include <boost/algorithm/string.hpp> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" + +#include <algorithm> +#include <cctype> +#include <deque> +#include <map> +#include <memory> +#include <string> +#include <system_error> +#include <utility> +#include <vector> + +using namespace std::string_literals; +namespace fs = std::filesystem; + +class AsmTest: public ::testing::Test +{ +protected: + AsmTest() { + //debug = true; + } + ~AsmTest() { + } + void SetUp(){ + } + void TearDown(){ + } +}; + +TEST_F(AsmTest, Intel64_int_0) { + Segment segment; + AsmArgs args{{Immediate8(0)}}; + segment.push_back(makeOp("int", args)); + + ASSERT_EQ(segment.size(), 1); + ASSERT_EQ(segment.getCode(), std::vector<uint8_t>{0xCE}); +} + +TEST_F(AsmTest, Intel64_int_1) { + Segment segment; + AsmArgs args{{Immediate8(1)}}; + segment.push_back(makeOp("int", args)); + + ASSERT_EQ(segment.size(), 1); + ASSERT_EQ(segment.getCode(), std::vector<uint8_t>{0xF1}); +} + +TEST_F(AsmTest, Intel64_int_5) { + Segment segment; + AsmArgs args{{Immediate8(5)}}; + segment.push_back(makeOp("int", args)); + + ASSERT_EQ(segment.size(), 1); + ASSERT_EQ(segment.getCode(), std::vector<uint8_t>({0xCD, 0x05})); +} + +TEST_F(AsmTest, Intel64_nop) { + Segment segment; + segment.push_back(makeOp("nop")); + + ASSERT_EQ(segment.size(), 1); + ASSERT_EQ(segment.getCode(), std::vector<uint8_t>{0x90}); +} + +TEST_F(AsmTest, Intel64_ret) { + Segment segment; + segment.push_back(makeOp("ret")); + + ASSERT_EQ(segment.size(), 1); + ASSERT_EQ(segment.getCode(), std::vector<uint8_t>{0xC3}); +} + +TEST_F(AsmTest, Intel64_multiple) { + Segment segment; + segment.push_back(makeOp("nop")); + AsmArgs args{{Immediate8(5)}}; + segment.push_back(makeOp("int", args)); + segment.push_back(makeOp("ret")); + + ASSERT_EQ(segment.size(), 3); + ASSERT_EQ(segment.getCode(), std::vector<uint8_t>({0x90, 0xCD, 0x05, 0xC3})); +} |