summaryrefslogtreecommitdiffhomepage
path: root/asm/parse.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'asm/parse.cpp')
-rw-r--r--asm/parse.cpp181
1 files changed, 159 insertions, 22 deletions
diff --git a/asm/parse.cpp b/asm/parse.cpp
index e62f585..8f6f831 100644
--- a/asm/parse.cpp
+++ b/asm/parse.cpp
@@ -5,6 +5,7 @@
#include <boost/algorithm/string.hpp>
#include <exception>
+#include <functional>
#include <regex>
#include <unordered_set>
@@ -43,11 +44,13 @@ namespace {
}
}
+ std::string reg_re{"[[:alpha:]][[:alnum:]]*"};
+
// parse optional label
bool parseLabel(const std::string& asm_code, size_t& pos, std::string& result) {
parseWhitespace(asm_code, pos);
- std::regex re_label("([[:alpha:]]([[:alnum:]])+):", std::regex_constants::ECMAScript);
+ std::regex re_label("("s + reg_re + "):"s, std::regex_constants::ECMAScript);
std::smatch match;
if (std::regex_search(asm_code.cbegin() + pos, asm_code.cend(), match, re_label, std::regex_constants::match_continuous)) {
@@ -64,7 +67,7 @@ namespace {
bool parseMnemonic(const std::string& asm_code, size_t& pos, std::string& result) {
parseWhitespace(asm_code, pos);
- std::regex re_mnemonic("[[:alpha:]]([[:alnum:]])+", std::regex_constants::ECMAScript);
+ std::regex re_mnemonic(reg_re, std::regex_constants::ECMAScript);
std::smatch match;
if (std::regex_search(asm_code.cbegin() + pos, asm_code.cend(), match, re_mnemonic, std::regex_constants::match_continuous)) {
@@ -77,10 +80,10 @@ namespace {
return false;
}
- bool parseRegister8(const std::string& asm_code, size_t& pos, std::any& result) {
+ bool parseRegister8(const std::string& asm_code, size_t& pos, std::any& result, size_t& size_hint) {
parseWhitespace(asm_code, pos);
- std::regex re_name("[[:alpha:]]+", std::regex_constants::ECMAScript);
+ std::regex re_name(reg_re, std::regex_constants::ECMAScript);
std::smatch match;
if (std::regex_search(asm_code.cbegin() + pos, asm_code.cend(), match, re_name, std::regex_constants::match_continuous)) {
@@ -88,6 +91,7 @@ namespace {
if (reg8.contains(name)) {
pos += name.size();
result = Asm::Args::Register8(name);
+ size_hint = 8;
return true;
}
}
@@ -95,10 +99,10 @@ namespace {
return false;
}
- bool parseRegister32(const std::string& asm_code, size_t& pos, std::any& result) {
+ bool parseRegister32(const std::string& asm_code, size_t& pos, std::any& result, size_t& size_hint) {
parseWhitespace(asm_code, pos);
- std::regex re_name("[[:alpha:]]+", std::regex_constants::ECMAScript);
+ std::regex re_name(reg_re, std::regex_constants::ECMAScript);
std::smatch match;
if (std::regex_search(asm_code.cbegin() + pos, asm_code.cend(), match, re_name, std::regex_constants::match_continuous)) {
@@ -106,6 +110,7 @@ namespace {
if (reg32.contains(name)) {
pos += name.size();
result = Asm::Args::Register32(name);
+ size_hint = 32;
return true;
}
}
@@ -113,10 +118,10 @@ namespace {
return false;
}
- bool parseRegister64(const std::string& asm_code, size_t& pos, std::any& result) {
+ bool parseRegister64(const std::string& asm_code, size_t& pos, std::any& result, size_t& size_hint) {
parseWhitespace(asm_code, pos);
- std::regex re_name("[[:alpha:]]+", std::regex_constants::ECMAScript);
+ std::regex re_name(reg_re, std::regex_constants::ECMAScript);
std::smatch match;
if (std::regex_search(asm_code.cbegin() + pos, asm_code.cend(), match, re_name, std::regex_constants::match_continuous)) {
@@ -124,6 +129,64 @@ namespace {
if (reg64.contains(name)) {
pos += name.size();
result = Asm::Args::Register64(name);
+ size_hint = 64;
+ return true;
+ }
+ }
+
+ return false;
+ }
+
+ bool parseMem8Ptr64(const std::string& asm_code, size_t& pos, std::any& result, size_t& size_hint) {
+ parseWhitespace(asm_code, pos);
+
+ std::regex re_name("byte ptr *\\[("s + reg_re + ")\\]"s, std::regex_constants::ECMAScript);
+
+ std::smatch match;
+ if (std::regex_search(asm_code.cbegin() + pos, asm_code.cend(), match, re_name, std::regex_constants::match_continuous)) {
+ std::string name {boost::algorithm::to_lower_copy(match[1].str())};
+ if (reg64.contains(name)) {
+ pos += match[0].length();
+ result = Asm::Args::Mem8Ptr64{name};
+ size_hint = 8;
+ return true;
+ }
+ }
+
+ return false;
+ }
+
+ bool parseMem32Ptr64(const std::string& asm_code, size_t& pos, std::any& result, size_t& size_hint) {
+ parseWhitespace(asm_code, pos);
+
+ std::regex re_name("(dword ptr *)?\\[("s + reg_re + ")\\]"s, std::regex_constants::ECMAScript);
+
+ std::smatch match;
+ if (std::regex_search(asm_code.cbegin() + pos, asm_code.cend(), match, re_name, std::regex_constants::match_continuous)) {
+ std::string name {boost::algorithm::to_lower_copy(match[2].str())};
+ if (reg64.contains(name)) {
+ pos += match[0].length();
+ result = Asm::Args::Mem32Ptr64(name);
+ size_hint = 32;
+ return true;
+ }
+ }
+
+ return false;
+ }
+
+ bool parseMem64Ptr64(const std::string& asm_code, size_t& pos, std::any& result, size_t& size_hint) {
+ parseWhitespace(asm_code, pos);
+
+ std::regex re_name("qword ptr *\\[("s + reg_re + ")\\]"s, std::regex_constants::ECMAScript);
+
+ std::smatch match;
+ if (std::regex_search(asm_code.cbegin() + pos, asm_code.cend(), match, re_name, std::regex_constants::match_continuous)) {
+ std::string name {boost::algorithm::to_lower_copy(match[1].str())};
+ if (reg64.contains(name)) {
+ pos += match[0].length();
+ result = Asm::Args::Mem64Ptr64(name);
+ size_hint = 64;
return true;
}
}
@@ -131,7 +194,37 @@ namespace {
return false;
}
- bool parseImmediate32(const std::string& asm_code, size_t& pos, std::any& result) {
+ bool parseImmediate8(const std::string& asm_code, size_t& pos, std::any& result, size_t& size_hint) {
+ if (size_hint != 8)
+ return false;
+
+ parseWhitespace(asm_code, pos);
+
+ std::regex re_name("[[:digit:]]+|0x[[:xdigit:]]+", std::regex_constants::ECMAScript);
+
+ std::smatch match;
+ if (std::regex_search(asm_code.cbegin() + pos, asm_code.cend(), match, re_name, std::regex_constants::match_continuous)) {
+ int32_t value{};
+ try {
+ value = stoll(match[0]);
+ } catch (...) {
+ throw std::runtime_error("Assembler parse error: Bad immediate: "s + match[0].str());
+ }
+ if (value < -128 || value > 255)
+ throw std::runtime_error("Assembler parse error: Bad 8 bit immediate: "s + match[0].str());
+
+ pos += match[0].length();
+ result = Asm::Args::Immediate8(static_cast<uint8_t>(value));
+ return true;
+ }
+
+ return false;
+ }
+
+ bool parseImmediate32(const std::string& asm_code, size_t& pos, std::any& result, size_t& size_hint) {
+ if (size_hint != 32 && size_hint != 0)
+ return false;
+
parseWhitespace(asm_code, pos);
std::regex re_name("[[:digit:]]+|0x[[:xdigit:]]+", std::regex_constants::ECMAScript);
@@ -152,18 +245,54 @@ namespace {
return false;
}
+ bool parseImmediate64(const std::string& asm_code, size_t& pos, std::any& result, size_t& size_hint) {
+ if (size_hint != 64)
+ return false;
+
+ parseWhitespace(asm_code, pos);
+
+ std::regex re_name("[[:digit:]]+|0x[[:xdigit:]]+", std::regex_constants::ECMAScript);
+
+ std::smatch match;
+ if (std::regex_search(asm_code.cbegin() + pos, asm_code.cend(), match, re_name, std::regex_constants::match_continuous)) {
+ int64_t value{};
+ try {
+ value = stoll(match[0]);
+ } catch (...) {
+ throw std::runtime_error("Assembler parse error: Bad immediate: "s + match[0].str());
+ }
+
+ pos += match[0].length();
+ result = Asm::Args::Immediate64(static_cast<uint64_t>(value));
+ return true;
+ }
+
+ return false;
+ }
+
// parse optional single operand
- bool parseOperand(const std::string& asm_code, size_t& pos, std::any& result) {
+ bool parseOperand(const std::string& asm_code, size_t& pos, std::any& result, size_t& size_hint) {
parseWhitespace(asm_code, pos);
- if (parseRegister8(asm_code, pos, result))
+ if (parseRegister8(asm_code, pos, result, size_hint))
+ return true;
+ if (parseRegister32(asm_code, pos, result, size_hint))
+ return true;
+ if (parseRegister64(asm_code, pos, result, size_hint))
return true;
- if (parseRegister32(asm_code, pos, result))
+
+ if (parseMem8Ptr64(asm_code, pos, result, size_hint))
+ return true;
+ if (parseMem32Ptr64(asm_code, pos, result, size_hint))
return true;
- if (parseRegister64(asm_code, pos, result))
+ if (parseMem64Ptr64(asm_code, pos, result, size_hint))
return true;
- if (parseImmediate32(asm_code, pos, result))
+ if (parseImmediate8(asm_code, pos, result, size_hint))
+ return true;
+ if (parseImmediate32(asm_code, pos, result, size_hint))
+ return true;
+ if (parseImmediate64(asm_code, pos, result, size_hint))
return true;
return false;
@@ -172,12 +301,13 @@ namespace {
// parse optional multiple operands, separated by commas
void parseOperands(const std::string& asm_code, size_t& pos, Asm::Args& result) {
std::any operand;
- if (parseOperand(asm_code, pos, operand)) {
+ size_t size_hint{0}; // in bits, 0=no hint
+ if (parseOperand(asm_code, pos, operand, size_hint)) {
result.push_back(operand);
parseWhitespace(asm_code, pos);
while (pos < asm_code.size() && asm_code[pos] == ',') {
pos++;
- if (parseOperand(asm_code, pos, operand)) {
+ if (parseOperand(asm_code, pos, operand, size_hint)) {
result.push_back(operand);
} else {
throw std::runtime_error("Assembler error: expected operand after comma");
@@ -223,20 +353,27 @@ namespace {
// all optional:
// label: mnemonic operands... ;comment <eol>
- std::string result_string;
- if (parseLabel(asm_code, pos, result_string))
- result.emplace_back(std::make_shared<Label>(result_string));
+ std::string label;
+ std::function<void()> label_fn {[](){}};
+ if (parseLabel(asm_code, pos, label))
+ label_fn = [&]() { result.emplace_back(std::make_shared<Label>(label)); }; // defer to successfully completed line
- if (parseMnemonic(asm_code, pos, result_string)) {
- Asm::Args args;
+ std::string mnemonic;
+ Asm::Args args;
+ std::function<void()> mnemonic_fn {[](){}};
+ if (parseMnemonic(asm_code, pos, mnemonic)) {
parseOperands(asm_code, pos, args);
- result.emplace_back(makeOp(result_string, args));
+ mnemonic_fn = [&]() { result.emplace_back(makeOp(mnemonic, args)); }; // defer to successfully completed line
}
parseComment(asm_code, pos);
if (!parseEol(asm_code, pos))
throw std::runtime_error("Assembler error at pos "s + std::to_string(pos));
+
+ // Append only if no error occured, to get the correct error
+ label_fn();
+ mnemonic_fn();
}
} // namespace