summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorRoland Reichwein <mail@reichwein.it>2020-11-20 10:59:18 +0100
committerRoland Reichwein <mail@reichwein.it>2020-11-20 10:59:18 +0100
commit7250bbe5ae2d2ee6b0334bc462aab73f7d8dac0e (patch)
treeb910e778baa3e2434215abbb50e45e64aa92970c
parent05faf77202d0f8762160f135e714961d2c456cca (diff)
Assembler bugfixes - tests run successfully now
-rw-r--r--asm/assembler.cpp8
-rw-r--r--asm/assembler.h41
-rw-r--r--asm/intel64/add.cpp43
-rw-r--r--asm/intel64/encode.cpp8
-rw-r--r--asm/parse.cpp181
-rw-r--r--tests/test-asm.cpp20
6 files changed, 252 insertions, 49 deletions
diff --git a/asm/assembler.cpp b/asm/assembler.cpp
index b555125..4eb37f0 100644
--- a/asm/assembler.cpp
+++ b/asm/assembler.cpp
@@ -2,6 +2,14 @@
using namespace std::string_literals;
+Asm::Args::Immediate32::Immediate32(const Asm::Args::Immediate64& imm64)
+{
+ if (imm64.value() < 0x100000000)
+ m_value = static_cast<uint32_t>(imm64.value());
+ else
+ throw std::runtime_error("Immediate32: Constructed from too big Immediate64");
+}
+
namespace {
std::unordered_map<std::string, FactoryFunction> ops;
diff --git a/asm/assembler.h b/asm/assembler.h
index ea23fbc..8cdaa31 100644
--- a/asm/assembler.h
+++ b/asm/assembler.h
@@ -28,18 +28,21 @@ public:
{
public:
Immediate8(uint8_t value): m_value(value) {}
- uint8_t value() {return m_value;}
+ uint8_t value() const {return m_value;}
std::vector<uint8_t> getCode() {return {m_value};};
private:
uint8_t m_value;
};
+ class Immediate64;
+
class Immediate32
{
public:
Immediate32(uint32_t value): m_value(value) {}
- uint32_t value() { return m_value; }
+ Immediate32(const Immediate64&); ///< Convert from Immediate64 if data is small enough
+ uint32_t value() const { return m_value; }
std::vector<uint8_t> getCode() {
std::vector<uint8_t> result(size_t(4));
*(reinterpret_cast<uint32_t*>(result.data())) = boost::endian::native_to_little(m_value);
@@ -54,7 +57,7 @@ public:
{
public:
Immediate64(uint64_t value): m_value(value) {}
- uint64_t value() { return m_value; }
+ uint64_t value() const { return m_value; }
std::vector<uint8_t> getCode() {
std::vector<uint8_t> result(size_t(8));
*(reinterpret_cast<uint64_t*>(result.data())) = boost::endian::native_to_little(m_value);
@@ -69,7 +72,7 @@ public:
{
public:
Register8(const std::string& name): m_name(name) {}
- std::string name() { return m_name; }
+ std::string name() const { return m_name; }
private:
std::string m_name;
@@ -79,7 +82,7 @@ public:
{
public:
Register32(const std::string& name): m_name(name) {}
- std::string name() { return m_name; }
+ std::string name() const { return m_name; }
private:
std::string m_name;
@@ -89,7 +92,7 @@ public:
{
public:
Register64(const std::string& name): m_name(name) {}
- std::string name() { return m_name; }
+ std::string name() const { return m_name; }
private:
std::string m_name;
@@ -100,10 +103,10 @@ public:
{
public:
Mem8Ptr64(const std::string& reg, int32_t offs = 0): m_reg(reg), m_offs(offs) {}
- Mem8Ptr64(const std::string& reg, const std::string& reg2 = ""s, int32_t offs = 0): m_reg(reg), m_reg2(reg2), m_offs(offs) {}
- std::string reg() { return m_reg; }
- std::string reg2() { return m_reg2; }
- int32_t offs() { return m_offs; }
+ Mem8Ptr64(const std::string& reg, const std::string& reg2, int32_t offs = 0): m_reg(reg), m_reg2(reg2), m_offs(offs) {}
+ std::string reg() const { return m_reg; }
+ std::string reg2() const { return m_reg2; }
+ int32_t offs() const { return m_offs; }
private:
std::string m_reg;
@@ -116,10 +119,10 @@ public:
{
public:
Mem32Ptr64(const std::string& reg, int32_t offs = 0): m_reg(reg), m_offs(offs) {}
- Mem32Ptr64(const std::string& reg, const std::string& reg2 = ""s, int32_t offs = 0): m_reg(reg), m_reg2(reg2), m_offs(offs) {}
- std::string reg() { return m_reg; }
- std::string reg2() { return m_reg2; }
- int32_t offs() { return m_offs; }
+ Mem32Ptr64(const std::string& reg, const std::string& reg2, int32_t offs = 0): m_reg(reg), m_reg2(reg2), m_offs(offs) {}
+ std::string reg() const { return m_reg; }
+ std::string reg2() const { return m_reg2; }
+ int32_t offs() const { return m_offs; }
private:
std::string m_reg;
@@ -132,10 +135,10 @@ public:
{
public:
Mem64Ptr64(const std::string& reg, int32_t offs = 0): m_reg(reg), m_offs(offs) {}
- Mem64Ptr64(const std::string& reg, const std::string& reg2 = ""s, int32_t offs = 0): m_reg(reg), m_reg2(reg2), m_offs(offs) {}
- std::string reg() { return m_reg; }
- std::string reg2() { return m_reg2; }
- int32_t offs() { return m_offs; }
+ Mem64Ptr64(const std::string& reg, const std::string& reg2, int32_t offs = 0): m_reg(reg), m_reg2(reg2), m_offs(offs) {}
+ std::string reg() const { return m_reg; }
+ std::string reg2() const { return m_reg2; }
+ int32_t offs() const { return m_offs; }
private:
std::string m_reg;
@@ -147,7 +150,7 @@ public:
{
public:
Label(const std::string& name): m_name(name) {}
- std::string name() { return m_name; }
+ std::string name() const { return m_name; }
private:
std::string m_name;
diff --git a/asm/intel64/add.cpp b/asm/intel64/add.cpp
index 957c27f..07b14a1 100644
--- a/asm/intel64/add.cpp
+++ b/asm/intel64/add.cpp
@@ -28,12 +28,34 @@ Op_add::Op_add(const Asm::Args& args)
{ // add rax, imm32
machine_code = REX("W") + std::vector<uint8_t>{ 0x05 } + std::any_cast<Asm::Args::Immediate32>(args[1]).getCode();
+ } else if (args[0].type() == typeid(Asm::Args::Register8) && args[1].type() == typeid(Asm::Args::Register8)) { // add reg8, reg8
+ machine_code = std::vector<uint8_t>{ 0x00 } + ModRM(std::any_cast<Asm::Args::Register8>(args[1]).name(), std::any_cast<Asm::Args::Register8>(args[0]).name());
+
+ } else if (args[0].type() == typeid(Asm::Args::Register32) && args[1].type() == typeid(Asm::Args::Register32)) { // add reg32, reg32
+ machine_code = std::vector<uint8_t>{ 0x01 } + ModRM(std::any_cast<Asm::Args::Register32>(args[1]).name(), std::any_cast<Asm::Args::Register32>(args[0]).name());
+
+ } else if (args[0].type() == typeid(Asm::Args::Register64) && args[1].type() == typeid(Asm::Args::Register64)) { // add reg64, reg64
+ machine_code = REX("W") + std::vector<uint8_t>{ 0x01 } + ModRM(std::any_cast<Asm::Args::Register64>(args[1]).name(), std::any_cast<Asm::Args::Register64>(args[0]).name());
+
} else if (args[0].type() == typeid(Asm::Args::Register32) && args[1].type() == typeid(Asm::Args::Mem32Ptr64)) { // add reg32, [reg64]
machine_code = std::vector<uint8_t>{ 0x03 } + ModRM(std::any_cast<Asm::Args::Register32>(args[0]).name(), std::any_cast<Asm::Args::Mem32Ptr64>(args[1]).reg());
} else if (args[0].type() == typeid(Asm::Args::Register64) && args[1].type() == typeid(Asm::Args::Mem64Ptr64)) { // add reg64, [reg64]
machine_code = REX("W") + std::vector<uint8_t>{ 0x03 } + ModRM(std::any_cast<Asm::Args::Register64>(args[0]).name(), std::any_cast<Asm::Args::Mem64Ptr64>(args[1]).reg());
+ } else if (args[0].type() == typeid(Asm::Args::Mem8Ptr64) && args[1].type() == typeid(Asm::Args::Immediate8)) { // add [reg64], imm8
+ machine_code = std::vector<uint8_t>{ 0x80 } + ModRM("/0", std::any_cast<Asm::Args::Mem8Ptr64>(args[0]).reg()) + std::any_cast<Asm::Args::Immediate8>(args[1]).getCode();
+
+ } else if (args[0].type() == typeid(Asm::Args::Mem32Ptr64) && args[1].type() == typeid(Asm::Args::Immediate32)) { // add [reg64], imm32
+ machine_code = std::vector<uint8_t>{ 0x81 } + ModRM("/0", std::any_cast<Asm::Args::Mem32Ptr64>(args[0]).reg()) + std::any_cast<Asm::Args::Immediate32>(args[1]).getCode();
+
+ } else if (args[0].type() == typeid(Asm::Args::Mem64Ptr64) && args[1].type() == typeid(Asm::Args::Immediate32)) { // add qword ptr [reg64], imm32 (sign-extended)
+ machine_code = REX("W") + std::vector<uint8_t>{ 0x81 } + ModRM("/0", std::any_cast<Asm::Args::Mem64Ptr64>(args[0]).reg()) + std::any_cast<Asm::Args::Immediate32>(args[1]).getCode();
+
+ } else if (args[0].type() == typeid(Asm::Args::Mem64Ptr64) && args[1].type() == typeid(Asm::Args::Immediate64)) { // add qword ptr [reg64], imm32 (sign-extended) - reduce imm64 to imm32!
+ Asm::Args::Immediate32 imm32{std::any_cast<Asm::Args::Immediate64>(args[1])};
+ machine_code = REX("W") + std::vector<uint8_t>{ 0x81 } + ModRM("/0", std::any_cast<Asm::Args::Mem64Ptr64>(args[0]).reg()) + imm32.getCode();
+
} else {
throw std::runtime_error("Unimplemented: add "s + args[0].type().name() + " "s + args[1].type().name());
}
@@ -48,11 +70,32 @@ bool registered {
registerOp(mangleName<Asm::Args::Register64, Asm::Args::Immediate32>("add"), [](const Asm::Args& args) -> std::shared_ptr<Op>{
return std::make_shared<Op_add>(args);
}) &&
+ registerOp(mangleName<Asm::Args::Register8, Asm::Args::Register8>("add"), [](const Asm::Args& args) -> std::shared_ptr<Op>{
+ return std::make_shared<Op_add>(args);
+ }) &&
+ registerOp(mangleName<Asm::Args::Register32, Asm::Args::Register32>("add"), [](const Asm::Args& args) -> std::shared_ptr<Op>{
+ return std::make_shared<Op_add>(args);
+ }) &&
+ registerOp(mangleName<Asm::Args::Register64, Asm::Args::Register64>("add"), [](const Asm::Args& args) -> std::shared_ptr<Op>{
+ return std::make_shared<Op_add>(args);
+ }) &&
registerOp(mangleName<Asm::Args::Register32, Asm::Args::Mem32Ptr64>("add"), [](const Asm::Args& args) -> std::shared_ptr<Op>{
return std::make_shared<Op_add>(args);
}) &&
registerOp(mangleName<Asm::Args::Register64, Asm::Args::Mem64Ptr64>("add"), [](const Asm::Args& args) -> std::shared_ptr<Op>{
return std::make_shared<Op_add>(args);
+ }) &&
+ registerOp(mangleName<Asm::Args::Mem8Ptr64, Asm::Args::Immediate8>("add"), [](const Asm::Args& args) -> std::shared_ptr<Op>{
+ return std::make_shared<Op_add>(args);
+ }) &&
+ registerOp(mangleName<Asm::Args::Mem32Ptr64, Asm::Args::Immediate32>("add"), [](const Asm::Args& args) -> std::shared_ptr<Op>{
+ return std::make_shared<Op_add>(args);
+ }) &&
+ registerOp(mangleName<Asm::Args::Mem64Ptr64, Asm::Args::Immediate32>("add"), [](const Asm::Args& args) -> std::shared_ptr<Op>{
+ return std::make_shared<Op_add>(args);
+ }) &&
+ registerOp(mangleName<Asm::Args::Mem64Ptr64, Asm::Args::Immediate64>("add"), [](const Asm::Args& args) -> std::shared_ptr<Op>{ // automatically converted to 32-bit (sign extended) if small enough. Intel doesn't support ADD ..., imm64
+ return std::make_shared<Op_add>(args);
})
};
diff --git a/asm/intel64/encode.cpp b/asm/intel64/encode.cpp
index 51ca7a0..1b35d89 100644
--- a/asm/intel64/encode.cpp
+++ b/asm/intel64/encode.cpp
@@ -14,7 +14,7 @@ namespace {
std::shared_ptr<Op> makeLoadValue(const FlowGraph::Data& data, const FlowGraph::Graph& graph)
{
if (data.type() != FlowGraph::DataType::Int) {
- std::runtime_error("Bad type for operand: "s + std::to_string(int(data.type())));
+ throw std::runtime_error("Bad type for operand: "s + std::to_string(int(data.type())));
}
if (!data.storage())
@@ -41,7 +41,7 @@ std::shared_ptr<Op> makeLoadValue(const FlowGraph::Data& data, const FlowGraph::
std::shared_ptr<Op> makeStoreValue(const FlowGraph::Data& data, const FlowGraph::Graph& graph)
{
if (data.type() != FlowGraph::DataType::Int) {
- std::runtime_error("Bad type for operand: "s + std::to_string(int(data.type())));
+ throw std::runtime_error("Bad type for operand: "s + std::to_string(int(data.type())));
}
if (!data.storage())
@@ -60,7 +60,7 @@ std::shared_ptr<Op> makeStoreValue(const FlowGraph::Data& data, const FlowGraph:
std::shared_ptr<Op> makeAddValue(const FlowGraph::Data& data, const FlowGraph::Graph& graph)
{
if (data.type() != FlowGraph::DataType::Int) {
- std::runtime_error("Bad type for operand: "s + std::to_string(int(data.type())));
+ throw std::runtime_error("Bad type for operand: "s + std::to_string(int(data.type())));
}
if (!data.storage())
@@ -87,7 +87,7 @@ std::shared_ptr<Op> makeAddValue(const FlowGraph::Data& data, const FlowGraph::G
std::vector<std::shared_ptr<Chunk>> makeMulValue(const FlowGraph::Data& data, const FlowGraph::Graph& graph)
{
if (data.type() != FlowGraph::DataType::Int) {
- std::runtime_error("Bad type for operand: "s + std::to_string(int(data.type())));
+ throw std::runtime_error("Bad type for operand: "s + std::to_string(int(data.type())));
}
if (!data.storage())
diff --git a/asm/parse.cpp b/asm/parse.cpp
index e62f585..8f6f831 100644
--- a/asm/parse.cpp
+++ b/asm/parse.cpp
@@ -5,6 +5,7 @@
#include <boost/algorithm/string.hpp>
#include <exception>
+#include <functional>
#include <regex>
#include <unordered_set>
@@ -43,11 +44,13 @@ namespace {
}
}
+ std::string reg_re{"[[:alpha:]][[:alnum:]]*"};
+
// parse optional label
bool parseLabel(const std::string& asm_code, size_t& pos, std::string& result) {
parseWhitespace(asm_code, pos);
- std::regex re_label("([[:alpha:]]([[:alnum:]])+):", std::regex_constants::ECMAScript);
+ std::regex re_label("("s + reg_re + "):"s, std::regex_constants::ECMAScript);
std::smatch match;
if (std::regex_search(asm_code.cbegin() + pos, asm_code.cend(), match, re_label, std::regex_constants::match_continuous)) {
@@ -64,7 +67,7 @@ namespace {
bool parseMnemonic(const std::string& asm_code, size_t& pos, std::string& result) {
parseWhitespace(asm_code, pos);
- std::regex re_mnemonic("[[:alpha:]]([[:alnum:]])+", std::regex_constants::ECMAScript);
+ std::regex re_mnemonic(reg_re, std::regex_constants::ECMAScript);
std::smatch match;
if (std::regex_search(asm_code.cbegin() + pos, asm_code.cend(), match, re_mnemonic, std::regex_constants::match_continuous)) {
@@ -77,10 +80,10 @@ namespace {
return false;
}
- bool parseRegister8(const std::string& asm_code, size_t& pos, std::any& result) {
+ bool parseRegister8(const std::string& asm_code, size_t& pos, std::any& result, size_t& size_hint) {
parseWhitespace(asm_code, pos);
- std::regex re_name("[[:alpha:]]+", std::regex_constants::ECMAScript);
+ std::regex re_name(reg_re, std::regex_constants::ECMAScript);
std::smatch match;
if (std::regex_search(asm_code.cbegin() + pos, asm_code.cend(), match, re_name, std::regex_constants::match_continuous)) {
@@ -88,6 +91,7 @@ namespace {
if (reg8.contains(name)) {
pos += name.size();
result = Asm::Args::Register8(name);
+ size_hint = 8;
return true;
}
}
@@ -95,10 +99,10 @@ namespace {
return false;
}
- bool parseRegister32(const std::string& asm_code, size_t& pos, std::any& result) {
+ bool parseRegister32(const std::string& asm_code, size_t& pos, std::any& result, size_t& size_hint) {
parseWhitespace(asm_code, pos);
- std::regex re_name("[[:alpha:]]+", std::regex_constants::ECMAScript);
+ std::regex re_name(reg_re, std::regex_constants::ECMAScript);
std::smatch match;
if (std::regex_search(asm_code.cbegin() + pos, asm_code.cend(), match, re_name, std::regex_constants::match_continuous)) {
@@ -106,6 +110,7 @@ namespace {
if (reg32.contains(name)) {
pos += name.size();
result = Asm::Args::Register32(name);
+ size_hint = 32;
return true;
}
}
@@ -113,10 +118,10 @@ namespace {
return false;
}
- bool parseRegister64(const std::string& asm_code, size_t& pos, std::any& result) {
+ bool parseRegister64(const std::string& asm_code, size_t& pos, std::any& result, size_t& size_hint) {
parseWhitespace(asm_code, pos);
- std::regex re_name("[[:alpha:]]+", std::regex_constants::ECMAScript);
+ std::regex re_name(reg_re, std::regex_constants::ECMAScript);
std::smatch match;
if (std::regex_search(asm_code.cbegin() + pos, asm_code.cend(), match, re_name, std::regex_constants::match_continuous)) {
@@ -124,6 +129,64 @@ namespace {
if (reg64.contains(name)) {
pos += name.size();
result = Asm::Args::Register64(name);
+ size_hint = 64;
+ return true;
+ }
+ }
+
+ return false;
+ }
+
+ bool parseMem8Ptr64(const std::string& asm_code, size_t& pos, std::any& result, size_t& size_hint) {
+ parseWhitespace(asm_code, pos);
+
+ std::regex re_name("byte ptr *\\[("s + reg_re + ")\\]"s, std::regex_constants::ECMAScript);
+
+ std::smatch match;
+ if (std::regex_search(asm_code.cbegin() + pos, asm_code.cend(), match, re_name, std::regex_constants::match_continuous)) {
+ std::string name {boost::algorithm::to_lower_copy(match[1].str())};
+ if (reg64.contains(name)) {
+ pos += match[0].length();
+ result = Asm::Args::Mem8Ptr64{name};
+ size_hint = 8;
+ return true;
+ }
+ }
+
+ return false;
+ }
+
+ bool parseMem32Ptr64(const std::string& asm_code, size_t& pos, std::any& result, size_t& size_hint) {
+ parseWhitespace(asm_code, pos);
+
+ std::regex re_name("(dword ptr *)?\\[("s + reg_re + ")\\]"s, std::regex_constants::ECMAScript);
+
+ std::smatch match;
+ if (std::regex_search(asm_code.cbegin() + pos, asm_code.cend(), match, re_name, std::regex_constants::match_continuous)) {
+ std::string name {boost::algorithm::to_lower_copy(match[2].str())};
+ if (reg64.contains(name)) {
+ pos += match[0].length();
+ result = Asm::Args::Mem32Ptr64(name);
+ size_hint = 32;
+ return true;
+ }
+ }
+
+ return false;
+ }
+
+ bool parseMem64Ptr64(const std::string& asm_code, size_t& pos, std::any& result, size_t& size_hint) {
+ parseWhitespace(asm_code, pos);
+
+ std::regex re_name("qword ptr *\\[("s + reg_re + ")\\]"s, std::regex_constants::ECMAScript);
+
+ std::smatch match;
+ if (std::regex_search(asm_code.cbegin() + pos, asm_code.cend(), match, re_name, std::regex_constants::match_continuous)) {
+ std::string name {boost::algorithm::to_lower_copy(match[1].str())};
+ if (reg64.contains(name)) {
+ pos += match[0].length();
+ result = Asm::Args::Mem64Ptr64(name);
+ size_hint = 64;
return true;
}
}
@@ -131,7 +194,37 @@ namespace {
return false;
}
- bool parseImmediate32(const std::string& asm_code, size_t& pos, std::any& result) {
+ bool parseImmediate8(const std::string& asm_code, size_t& pos, std::any& result, size_t& size_hint) {
+ if (size_hint != 8)
+ return false;
+
+ parseWhitespace(asm_code, pos);
+
+ std::regex re_name("[[:digit:]]+|0x[[:xdigit:]]+", std::regex_constants::ECMAScript);
+
+ std::smatch match;
+ if (std::regex_search(asm_code.cbegin() + pos, asm_code.cend(), match, re_name, std::regex_constants::match_continuous)) {
+ int32_t value{};
+ try {
+ value = stoll(match[0]);
+ } catch (...) {
+ throw std::runtime_error("Assembler parse error: Bad immediate: "s + match[0].str());
+ }
+ if (value < -128 || value > 255)
+ throw std::runtime_error("Assembler parse error: Bad 8 bit immediate: "s + match[0].str());
+
+ pos += match[0].length();
+ result = Asm::Args::Immediate8(static_cast<uint8_t>(value));
+ return true;
+ }
+
+ return false;
+ }
+
+ bool parseImmediate32(const std::string& asm_code, size_t& pos, std::any& result, size_t& size_hint) {
+ if (size_hint != 32 && size_hint != 0)
+ return false;
+
parseWhitespace(asm_code, pos);
std::regex re_name("[[:digit:]]+|0x[[:xdigit:]]+", std::regex_constants::ECMAScript);
@@ -152,18 +245,54 @@ namespace {
return false;
}
+ bool parseImmediate64(const std::string& asm_code, size_t& pos, std::any& result, size_t& size_hint) {
+ if (size_hint != 64)
+ return false;
+
+ parseWhitespace(asm_code, pos);
+
+ std::regex re_name("[[:digit:]]+|0x[[:xdigit:]]+", std::regex_constants::ECMAScript);
+
+ std::smatch match;
+ if (std::regex_search(asm_code.cbegin() + pos, asm_code.cend(), match, re_name, std::regex_constants::match_continuous)) {
+ int64_t value{};
+ try {
+ value = stoll(match[0]);
+ } catch (...) {
+ throw std::runtime_error("Assembler parse error: Bad immediate: "s + match[0].str());
+ }
+
+ pos += match[0].length();
+ result = Asm::Args::Immediate64(static_cast<uint64_t>(value));
+ return true;
+ }
+
+ return false;
+ }
+
// parse optional single operand
- bool parseOperand(const std::string& asm_code, size_t& pos, std::any& result) {
+ bool parseOperand(const std::string& asm_code, size_t& pos, std::any& result, size_t& size_hint) {
parseWhitespace(asm_code, pos);
- if (parseRegister8(asm_code, pos, result))
+ if (parseRegister8(asm_code, pos, result, size_hint))
+ return true;
+ if (parseRegister32(asm_code, pos, result, size_hint))
+ return true;
+ if (parseRegister64(asm_code, pos, result, size_hint))
return true;
- if (parseRegister32(asm_code, pos, result))
+
+ if (parseMem8Ptr64(asm_code, pos, result, size_hint))
+ return true;
+ if (parseMem32Ptr64(asm_code, pos, result, size_hint))
return true;
- if (parseRegister64(asm_code, pos, result))
+ if (parseMem64Ptr64(asm_code, pos, result, size_hint))
return true;
- if (parseImmediate32(asm_code, pos, result))
+ if (parseImmediate8(asm_code, pos, result, size_hint))
+ return true;
+ if (parseImmediate32(asm_code, pos, result, size_hint))
+ return true;
+ if (parseImmediate64(asm_code, pos, result, size_hint))
return true;
return false;
@@ -172,12 +301,13 @@ namespace {
// parse optional multiple operands, separated by commas
void parseOperands(const std::string& asm_code, size_t& pos, Asm::Args& result) {
std::any operand;
- if (parseOperand(asm_code, pos, operand)) {
+ size_t size_hint{0}; // in bits, 0=no hint
+ if (parseOperand(asm_code, pos, operand, size_hint)) {
result.push_back(operand);
parseWhitespace(asm_code, pos);
while (pos < asm_code.size() && asm_code[pos] == ',') {
pos++;
- if (parseOperand(asm_code, pos, operand)) {
+ if (parseOperand(asm_code, pos, operand, size_hint)) {
result.push_back(operand);
} else {
throw std::runtime_error("Assembler error: expected operand after comma");
@@ -223,20 +353,27 @@ namespace {
// all optional:
// label: mnemonic operands... ;comment <eol>
- std::string result_string;
- if (parseLabel(asm_code, pos, result_string))
- result.emplace_back(std::make_shared<Label>(result_string));
+ std::string label;
+ std::function<void()> label_fn {[](){}};
+ if (parseLabel(asm_code, pos, label))
+ label_fn = [&]() { result.emplace_back(std::make_shared<Label>(label)); }; // defer to successfully completed line
- if (parseMnemonic(asm_code, pos, result_string)) {
- Asm::Args args;
+ std::string mnemonic;
+ Asm::Args args;
+ std::function<void()> mnemonic_fn {[](){}};
+ if (parseMnemonic(asm_code, pos, mnemonic)) {
parseOperands(asm_code, pos, args);
- result.emplace_back(makeOp(result_string, args));
+ mnemonic_fn = [&]() { result.emplace_back(makeOp(mnemonic, args)); }; // defer to successfully completed line
}
parseComment(asm_code, pos);
if (!parseEol(asm_code, pos))
throw std::runtime_error("Assembler error at pos "s + std::to_string(pos));
+
+ // Append only if no error occured, to get the correct error
+ label_fn();
+ mnemonic_fn();
}
} // namespace
diff --git a/tests/test-asm.cpp b/tests/test-asm.cpp
index f4a1a2c..019f89c 100644
--- a/tests/test-asm.cpp
+++ b/tests/test-asm.cpp
@@ -178,22 +178,34 @@ TEST_F(AsmParseTest, parse_op_3) {
}
TEST_F(AsmParseTest, parse_op_4) {
- std::vector<std::shared_ptr<Chunk>> chunks4{parseAsm("add [edi], 3")};
+ std::vector<std::shared_ptr<Chunk>> chunks4{parseAsm("add [rdi], 3")};
ASSERT_EQ(chunks4.size(), 1);
}
+TEST_F(AsmParseTest, parse_op_4_error) {
+ ASSERT_THROW(parseAsm("add [edi], 3"), std::runtime_error);
+}
+
TEST_F(AsmParseTest, parse_op_5) {
- std::vector<std::shared_ptr<Chunk>> chunks5{parseAsm("add byte ptr [edi], 3")};
+ std::vector<std::shared_ptr<Chunk>> chunks5{parseAsm("add byte ptr [rdi], 3")};
ASSERT_EQ(chunks5.size(), 1);
}
+TEST_F(AsmParseTest, parse_op_5_error) {
+ ASSERT_THROW(parseAsm("add byte ptr [edi], 3"), std::runtime_error);
+}
+
TEST_F(AsmParseTest, parse_op_6) {
- std::vector<std::shared_ptr<Chunk>> chunks6{parseAsm("add dword ptr[edi], 3")};
+ std::vector<std::shared_ptr<Chunk>> chunks6{parseAsm("add dword ptr[rdi], 3")};
ASSERT_EQ(chunks6.size(), 1);
}
+TEST_F(AsmParseTest, parse_op_6_error) {
+ ASSERT_THROW(parseAsm("add dword ptr[al], 3"), std::runtime_error);
+}
+
TEST_F(AsmParseTest, parse_op_7) {
- std::vector<std::shared_ptr<Chunk>> chunks7{parseAsm("add qword ptr[edi], 3")};
+ std::vector<std::shared_ptr<Chunk>> chunks7{parseAsm("add qword ptr[rdi], 3")};
ASSERT_EQ(chunks7.size(), 1);
}