summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--Makefile7
-rw-r--r--asm/assembler.cpp34
-rw-r--r--asm/assembler.h25
-rw-r--r--asm/intel64/all_ops.h5
-rw-r--r--asm/intel64/int.cpp28
-rw-r--r--asm/intel64/int.h31
-rw-r--r--asm/intel64/nop.h2
-rw-r--r--asm/intel64/ret.cpp12
-rw-r--r--asm/intel64/ret.h12
-rw-r--r--asm/operators.cpp13
-rw-r--r--asm/operators.h8
-rw-r--r--asm/segment.cpp36
-rw-r--r--asm/segment.h2
-rw-r--r--intel.cpp33
-rw-r--r--test-asm.cpp92
15 files changed, 291 insertions, 49 deletions
diff --git a/Makefile b/Makefile
index 5971b19..463598f 100644
--- a/Makefile
+++ b/Makefile
@@ -47,8 +47,11 @@ endif
PROGSRC=\
asm/assembler.cpp \
asm/chunk.cpp \
- asm/segment.cpp \
+ asm/intel64/int.cpp \
asm/intel64/nop.cpp \
+ asm/intel64/ret.cpp \
+ asm/operators.cpp \
+ asm/segment.cpp \
bnf.cpp \
cpp.cpp \
cppbnf.cpp \
@@ -57,7 +60,6 @@ PROGSRC=\
elf.cpp \
file.cpp \
grammer.cpp \
- intel.cpp \
lexer.cpp \
minicc.cpp \
@@ -68,6 +70,7 @@ TESTSRC=\
test-grammer.cpp \
test-lexer.cpp \
test-minicc.cpp \
+ test-asm.cpp \
googlemock/src/gmock-all.cpp \
googletest/src/gtest-all.cpp \
$(PROGSRC)
diff --git a/asm/assembler.cpp b/asm/assembler.cpp
index 275bd4a..7a9f6cf 100644
--- a/asm/assembler.cpp
+++ b/asm/assembler.cpp
@@ -1,7 +1,13 @@
#include "assembler.h"
+using namespace std::string_literals;
+
+namespace {
+
std::unordered_map<std::string, FactoryFunction> ops;
+}
+
bool registerOp(const std::string& mnemonic, FactoryFunction f)
{
if (ops.contains(mnemonic)) {
@@ -15,3 +21,31 @@ bool registerOp(const std::string& mnemonic, FactoryFunction f)
return true;
}
+
+std::string mangleName(const std::string& s, AsmArgs& args)
+{
+ std::string result {s};
+
+ for (const auto& arg: args) {
+ result += "_"s + arg.type().name();
+ }
+
+ return result;
+}
+
+std::shared_ptr<Op> makeOp(const std::string& mnemonic, AsmArgs& args)
+{
+ std::string mangled{mangleName(mnemonic, args)};
+
+ const auto& i{ops.find(mangled)};
+ if (i == ops.end())
+ throw std::runtime_error("Instruction "s + mangled + " not implemented.");
+
+ return i->second(args);
+}
+
+std::shared_ptr<Op> makeOp(const std::string& mnemonic)
+{
+ AsmArgs dummy; // empty list of arguments
+ return makeOp(mnemonic, dummy);
+}
diff --git a/asm/assembler.h b/asm/assembler.h
index b9c39a6..42b5f8d 100644
--- a/asm/assembler.h
+++ b/asm/assembler.h
@@ -1,6 +1,7 @@
#pragma once
-#include "chunk.h"
+//#include "chunk.h"
+//#include "segment.h"
#include <any>
#include <functional>
@@ -9,11 +10,19 @@
#include <string>
#include <unordered_map>
-using AsmArgs = std::vector<std::any>; // 0th element is mnemonic
+using AsmArgs = std::vector<std::any>;
using FactoryFunction = std::function<std::shared_ptr<Op>(AsmArgs&)>;
+// mnemonic: mnemonic including argument types
bool registerOp(const std::string& mnemonic, FactoryFunction f);
+// Create Op from a registered mnemonic
+// mnemonic: just the mnemonic name
+std::shared_ptr<Op> makeOp(const std::string& mnemonic, AsmArgs& args);
+
+// overload for empty list of arguments
+std::shared_ptr<Op> makeOp(const std::string& mnemonic);
+
template<typename T>
std::string mangleNameOne(const std::string& s)
{
@@ -29,3 +38,15 @@ std::string mangleName(const std::string& s)
return mangleName<Targs...>(s + "_" + typeid(T).name());
}
+std::string mangleName(const std::string& s, AsmArgs& args);
+
+class Immediate8
+{
+public:
+ Immediate8(uint8_t value): m_value(value) {}
+ uint8_t value() {return m_value;}
+
+private:
+ uint8_t m_value;
+};
+
diff --git a/asm/intel64/all_ops.h b/asm/intel64/all_ops.h
new file mode 100644
index 0000000..83b654b
--- /dev/null
+++ b/asm/intel64/all_ops.h
@@ -0,0 +1,5 @@
+#pragma once
+
+#include "int.h"
+#include "nop.h"
+#include "ret.h"
diff --git a/asm/intel64/int.cpp b/asm/intel64/int.cpp
new file mode 100644
index 0000000..7b682ab
--- /dev/null
+++ b/asm/intel64/int.cpp
@@ -0,0 +1,28 @@
+#include "int.h"
+
+#include <asm/assembler.h>
+
+Op_int::Op_int(AsmArgs& args)
+{
+ // At this point, the registration already ensured the number and types of args
+
+ Immediate8 i {std::any_cast<Immediate8>(args[0])};
+
+ if (i.value() == 0) { // INT 0
+ machine_code = { 0xCE };
+ } else if (i.value() == 1) { // INT 1
+ machine_code = { 0xF1 };
+ } else if (i.value() == 3) { // INT 3
+ machine_code = { 0xCC };
+ } else { // INT <...>
+ machine_code = std::vector<uint8_t>{ 0xCD, i.value() };
+ }
+}
+
+namespace {
+
+bool registered { registerOp(mangleName<Immediate8>("int"), [](AsmArgs& args) -> std::shared_ptr<Op>{
+ return std::make_shared<Op_int>(args);
+ }) };
+
+}
diff --git a/asm/intel64/int.h b/asm/intel64/int.h
new file mode 100644
index 0000000..7bd60c8
--- /dev/null
+++ b/asm/intel64/int.h
@@ -0,0 +1,31 @@
+// Interrupt
+
+#pragma once
+
+#include <asm/assembler.h>
+
+class Op_int: public Op
+{
+public:
+ Op_int(AsmArgs& args);
+
+public:
+ std::vector<uint8_t> getCode() override
+ {
+ return machine_code;
+ }
+
+ size_t size() override
+ {
+ return machine_code.size();
+ }
+
+ bool optimize() override ///< returns true if changed
+ {
+ return false;
+ }
+
+protected:
+ std::vector<uint8_t> machine_code;
+};
+
diff --git a/asm/intel64/nop.h b/asm/intel64/nop.h
index 72d6d1b..233b2ef 100644
--- a/asm/intel64/nop.h
+++ b/asm/intel64/nop.h
@@ -1,3 +1,5 @@
+// No Operation
+
#pragma once
#include <asm/chunk.h>
diff --git a/asm/intel64/ret.cpp b/asm/intel64/ret.cpp
new file mode 100644
index 0000000..cd9ddd4
--- /dev/null
+++ b/asm/intel64/ret.cpp
@@ -0,0 +1,12 @@
+#include "ret.h"
+
+#include <asm/assembler.h>
+
+namespace {
+
+bool registered { registerOp("ret", [](AsmArgs& args) -> std::shared_ptr<Op>{
+ return std::make_shared<Op_ret>();
+ }) };
+
+}
+
diff --git a/asm/intel64/ret.h b/asm/intel64/ret.h
new file mode 100644
index 0000000..7e7f68c
--- /dev/null
+++ b/asm/intel64/ret.h
@@ -0,0 +1,12 @@
+// Return from procedure
+
+#pragma once
+
+#include <asm/chunk.h>
+
+class Op_ret: public OpSimple
+{
+public:
+ Op_ret() : OpSimple({ 0xC3 }) {} // near return; TODO: far return is 0xCB
+};
+
diff --git a/asm/operators.cpp b/asm/operators.cpp
new file mode 100644
index 0000000..9f7d5d9
--- /dev/null
+++ b/asm/operators.cpp
@@ -0,0 +1,13 @@
+#include "operators.h"
+
+// binary code operators
+std::vector<uint8_t> operator+(std::vector<uint8_t> a, const std::vector<uint8_t>& b) {
+ a.insert(a.end(), b.begin(), b.end());
+ return a;
+}
+
+std::vector<uint8_t> operator+(std::vector<uint8_t> a, const uint8_t& b) {
+ a.push_back(b);
+ return a;
+}
+
diff --git a/asm/operators.h b/asm/operators.h
new file mode 100644
index 0000000..93dc15e
--- /dev/null
+++ b/asm/operators.h
@@ -0,0 +1,8 @@
+#pragma once
+
+#include <cstdint>
+#include <vector>
+
+std::vector<uint8_t> operator+(std::vector<uint8_t> a, const std::vector<uint8_t>& b);
+std::vector<uint8_t> operator+(std::vector<uint8_t> a, const uint8_t& b);
+
diff --git a/asm/segment.cpp b/asm/segment.cpp
index db83941..60b8348 100644
--- a/asm/segment.cpp
+++ b/asm/segment.cpp
@@ -1,20 +1,32 @@
#include "segment.h"
+#include "operators.h"
+
using namespace std::string_literals;
size_t Segment::getAddressOfLabel(const std::string& label)
- {
- size_t address{0};
- auto i{begin()};
- while (i != end()) {
- Chunk& chunk{**i};
- address += chunk.size();
- if (typeid(chunk) == typeid(Label)) {
- if (dynamic_cast<Label&>(chunk).name() == label) {
- return address;
- }
+{
+ size_t address{0};
+ auto i{begin()};
+ while (i != end()) {
+ Chunk& chunk{**i};
+ address += chunk.size();
+ if (typeid(chunk) == typeid(Label)) {
+ if (dynamic_cast<Label&>(chunk).name() == label) {
+ return address;
}
}
-
- throw std::runtime_error("Bad label: "s + label);
}
+
+ throw std::runtime_error("Bad label: "s + label);
+}
+
+std::vector<uint8_t> Segment::getCode()
+{
+ std::vector<uint8_t> result;
+
+ for (const auto& chunk: *this)
+ result = result + chunk->getCode();
+
+ return result;
+}
diff --git a/asm/segment.h b/asm/segment.h
index 1c080d3..f0a758e 100644
--- a/asm/segment.h
+++ b/asm/segment.h
@@ -9,7 +9,9 @@
class Segment: public std::vector<std::shared_ptr<Chunk>>
{
+public:
size_t getAddressOfLabel(const std::string& label);
+ std::vector<uint8_t> getCode();
};
diff --git a/intel.cpp b/intel.cpp
index 9dac6c0..9f24f1d 100644
--- a/intel.cpp
+++ b/intel.cpp
@@ -22,17 +22,6 @@ using namespace std::placeholders;
namespace {
- // binary code operators
- std::vector<uint8_t> operator+(std::vector<uint8_t> a, const std::vector<uint8_t>& b) {
- a.insert(a.end(), b.begin(), b.end());
- return a;
- }
-
- std::vector<uint8_t> operator+(std::vector<uint8_t> a, const uint8_t& b) {
- a.push_back(b);
- return a;
- }
-
// REX prefix: 0b0100WRXB
std::vector<uint8_t> REX(std::string s) {
uint8_t result{0b01000000};
@@ -166,23 +155,6 @@ namespace {
// Call Procedure
{"call", std::bind(op_jmp, _1, OP_T{}, OP_T{ 0xE8 })},
- // Interrupt
- {"int", [](const std::vector<Token>& sl) -> InstructionCodeList {
- if (sl.size() == 2) {
- if (sl[1].value == "0") { // INT 0
- return { { std::vector<uint8_t>{ 0xCE }} };
- } else if (sl[1].value == "1") { // INT 1
- return { { std::vector<uint8_t>{ 0xF1 }} };
- } else if (sl[1].value == "3") { // INT 3
- return { { std::vector<uint8_t>{ 0xCC }} };
- } else { // INT <...>
- return { { std::vector<uint8_t>{ 0xCD } +imm8(sl[2].value) } };
- }
- }
-
- // ... TODO
- throw std::runtime_error("Unknown command: "s + sl[0].value);
- }},
// Unconditional Jump
{"jmp", std::bind(op_jmp, _1, OP_T{ 0xEB }, OP_T{ 0xE9 })},
@@ -231,11 +203,6 @@ namespace {
throw std::runtime_error("Unknown command: "s + sl[0].value);
}},
- // Return from procedure
- { "ret", [](const std::vector<Token>& sl) -> InstructionCodeList {
- return {{ std::vector<uint8_t>{ 0xC3 }, {}}}; // near return; TODO: far return is 0xCB
- }},
-
{ "xor", [](const std::vector<Token>& sl) -> InstructionCodeList {
if (sl.size() == 3) {
return { { std::vector<uint8_t>{ 0x33 } + ModRM(sl[1].value, sl[2].value) } }; // r8, r/m8: ModRM:reg (w), ModRM:r/m (r)
diff --git a/test-asm.cpp b/test-asm.cpp
new file mode 100644
index 0000000..882f998
--- /dev/null
+++ b/test-asm.cpp
@@ -0,0 +1,92 @@
+#include "asm/chunk.h"
+#include "asm/assembler.h"
+#include "asm/segment.h"
+#include "asm/intel64/all_ops.h"
+
+#include "minicc.h"
+
+#include <boost/algorithm/string.hpp>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+#include <algorithm>
+#include <cctype>
+#include <deque>
+#include <map>
+#include <memory>
+#include <string>
+#include <system_error>
+#include <utility>
+#include <vector>
+
+using namespace std::string_literals;
+namespace fs = std::filesystem;
+
+class AsmTest: public ::testing::Test
+{
+protected:
+ AsmTest() {
+ //debug = true;
+ }
+ ~AsmTest() {
+ }
+ void SetUp(){
+ }
+ void TearDown(){
+ }
+};
+
+TEST_F(AsmTest, Intel64_int_0) {
+ Segment segment;
+ AsmArgs args{{Immediate8(0)}};
+ segment.push_back(makeOp("int", args));
+
+ ASSERT_EQ(segment.size(), 1);
+ ASSERT_EQ(segment.getCode(), std::vector<uint8_t>{0xCE});
+}
+
+TEST_F(AsmTest, Intel64_int_1) {
+ Segment segment;
+ AsmArgs args{{Immediate8(1)}};
+ segment.push_back(makeOp("int", args));
+
+ ASSERT_EQ(segment.size(), 1);
+ ASSERT_EQ(segment.getCode(), std::vector<uint8_t>{0xF1});
+}
+
+TEST_F(AsmTest, Intel64_int_5) {
+ Segment segment;
+ AsmArgs args{{Immediate8(5)}};
+ segment.push_back(makeOp("int", args));
+
+ ASSERT_EQ(segment.size(), 1);
+ ASSERT_EQ(segment.getCode(), std::vector<uint8_t>({0xCD, 0x05}));
+}
+
+TEST_F(AsmTest, Intel64_nop) {
+ Segment segment;
+ segment.push_back(makeOp("nop"));
+
+ ASSERT_EQ(segment.size(), 1);
+ ASSERT_EQ(segment.getCode(), std::vector<uint8_t>{0x90});
+}
+
+TEST_F(AsmTest, Intel64_ret) {
+ Segment segment;
+ segment.push_back(makeOp("ret"));
+
+ ASSERT_EQ(segment.size(), 1);
+ ASSERT_EQ(segment.getCode(), std::vector<uint8_t>{0xC3});
+}
+
+TEST_F(AsmTest, Intel64_multiple) {
+ Segment segment;
+ segment.push_back(makeOp("nop"));
+ AsmArgs args{{Immediate8(5)}};
+ segment.push_back(makeOp("int", args));
+ segment.push_back(makeOp("ret"));
+
+ ASSERT_EQ(segment.size(), 3);
+ ASSERT_EQ(segment.getCode(), std::vector<uint8_t>({0x90, 0xCD, 0x05, 0xC3}));
+}