From 7250bbe5ae2d2ee6b0334bc462aab73f7d8dac0e Mon Sep 17 00:00:00 2001 From: Roland Reichwein Date: Fri, 20 Nov 2020 10:59:18 +0100 Subject: Assembler bugfixes - tests run successfully now --- asm/parse.cpp | 181 +++++++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 159 insertions(+), 22 deletions(-) (limited to 'asm/parse.cpp') diff --git a/asm/parse.cpp b/asm/parse.cpp index e62f585..8f6f831 100644 --- a/asm/parse.cpp +++ b/asm/parse.cpp @@ -5,6 +5,7 @@ #include #include +#include #include #include @@ -43,11 +44,13 @@ namespace { } } + std::string reg_re{"[[:alpha:]][[:alnum:]]*"}; + // parse optional label bool parseLabel(const std::string& asm_code, size_t& pos, std::string& result) { parseWhitespace(asm_code, pos); - std::regex re_label("([[:alpha:]]([[:alnum:]])+):", std::regex_constants::ECMAScript); + std::regex re_label("("s + reg_re + "):"s, std::regex_constants::ECMAScript); std::smatch match; if (std::regex_search(asm_code.cbegin() + pos, asm_code.cend(), match, re_label, std::regex_constants::match_continuous)) { @@ -64,7 +67,7 @@ namespace { bool parseMnemonic(const std::string& asm_code, size_t& pos, std::string& result) { parseWhitespace(asm_code, pos); - std::regex re_mnemonic("[[:alpha:]]([[:alnum:]])+", std::regex_constants::ECMAScript); + std::regex re_mnemonic(reg_re, std::regex_constants::ECMAScript); std::smatch match; if (std::regex_search(asm_code.cbegin() + pos, asm_code.cend(), match, re_mnemonic, std::regex_constants::match_continuous)) { @@ -77,10 +80,10 @@ namespace { return false; } - bool parseRegister8(const std::string& asm_code, size_t& pos, std::any& result) { + bool parseRegister8(const std::string& asm_code, size_t& pos, std::any& result, size_t& size_hint) { parseWhitespace(asm_code, pos); - std::regex re_name("[[:alpha:]]+", std::regex_constants::ECMAScript); + std::regex re_name(reg_re, std::regex_constants::ECMAScript); std::smatch match; if (std::regex_search(asm_code.cbegin() + pos, asm_code.cend(), match, re_name, std::regex_constants::match_continuous)) { @@ -88,6 +91,7 @@ namespace { if (reg8.contains(name)) { pos += name.size(); result = Asm::Args::Register8(name); + size_hint = 8; return true; } } @@ -95,10 +99,10 @@ namespace { return false; } - bool parseRegister32(const std::string& asm_code, size_t& pos, std::any& result) { + bool parseRegister32(const std::string& asm_code, size_t& pos, std::any& result, size_t& size_hint) { parseWhitespace(asm_code, pos); - std::regex re_name("[[:alpha:]]+", std::regex_constants::ECMAScript); + std::regex re_name(reg_re, std::regex_constants::ECMAScript); std::smatch match; if (std::regex_search(asm_code.cbegin() + pos, asm_code.cend(), match, re_name, std::regex_constants::match_continuous)) { @@ -106,6 +110,7 @@ namespace { if (reg32.contains(name)) { pos += name.size(); result = Asm::Args::Register32(name); + size_hint = 32; return true; } } @@ -113,10 +118,10 @@ namespace { return false; } - bool parseRegister64(const std::string& asm_code, size_t& pos, std::any& result) { + bool parseRegister64(const std::string& asm_code, size_t& pos, std::any& result, size_t& size_hint) { parseWhitespace(asm_code, pos); - std::regex re_name("[[:alpha:]]+", std::regex_constants::ECMAScript); + std::regex re_name(reg_re, std::regex_constants::ECMAScript); std::smatch match; if (std::regex_search(asm_code.cbegin() + pos, asm_code.cend(), match, re_name, std::regex_constants::match_continuous)) { @@ -124,6 +129,64 @@ namespace { if (reg64.contains(name)) { pos += name.size(); result = Asm::Args::Register64(name); + size_hint = 64; + return true; + } + } + + return false; + } + + bool parseMem8Ptr64(const std::string& asm_code, size_t& pos, std::any& result, size_t& size_hint) { + parseWhitespace(asm_code, pos); + + std::regex re_name("byte ptr *\\[("s + reg_re + ")\\]"s, std::regex_constants::ECMAScript); + + std::smatch match; + if (std::regex_search(asm_code.cbegin() + pos, asm_code.cend(), match, re_name, std::regex_constants::match_continuous)) { + std::string name {boost::algorithm::to_lower_copy(match[1].str())}; + if (reg64.contains(name)) { + pos += match[0].length(); + result = Asm::Args::Mem8Ptr64{name}; + size_hint = 8; + return true; + } + } + + return false; + } + + bool parseMem32Ptr64(const std::string& asm_code, size_t& pos, std::any& result, size_t& size_hint) { + parseWhitespace(asm_code, pos); + + std::regex re_name("(dword ptr *)?\\[("s + reg_re + ")\\]"s, std::regex_constants::ECMAScript); + + std::smatch match; + if (std::regex_search(asm_code.cbegin() + pos, asm_code.cend(), match, re_name, std::regex_constants::match_continuous)) { + std::string name {boost::algorithm::to_lower_copy(match[2].str())}; + if (reg64.contains(name)) { + pos += match[0].length(); + result = Asm::Args::Mem32Ptr64(name); + size_hint = 32; + return true; + } + } + + return false; + } + + bool parseMem64Ptr64(const std::string& asm_code, size_t& pos, std::any& result, size_t& size_hint) { + parseWhitespace(asm_code, pos); + + std::regex re_name("qword ptr *\\[("s + reg_re + ")\\]"s, std::regex_constants::ECMAScript); + + std::smatch match; + if (std::regex_search(asm_code.cbegin() + pos, asm_code.cend(), match, re_name, std::regex_constants::match_continuous)) { + std::string name {boost::algorithm::to_lower_copy(match[1].str())}; + if (reg64.contains(name)) { + pos += match[0].length(); + result = Asm::Args::Mem64Ptr64(name); + size_hint = 64; return true; } } @@ -131,7 +194,37 @@ namespace { return false; } - bool parseImmediate32(const std::string& asm_code, size_t& pos, std::any& result) { + bool parseImmediate8(const std::string& asm_code, size_t& pos, std::any& result, size_t& size_hint) { + if (size_hint != 8) + return false; + + parseWhitespace(asm_code, pos); + + std::regex re_name("[[:digit:]]+|0x[[:xdigit:]]+", std::regex_constants::ECMAScript); + + std::smatch match; + if (std::regex_search(asm_code.cbegin() + pos, asm_code.cend(), match, re_name, std::regex_constants::match_continuous)) { + int32_t value{}; + try { + value = stoll(match[0]); + } catch (...) { + throw std::runtime_error("Assembler parse error: Bad immediate: "s + match[0].str()); + } + if (value < -128 || value > 255) + throw std::runtime_error("Assembler parse error: Bad 8 bit immediate: "s + match[0].str()); + + pos += match[0].length(); + result = Asm::Args::Immediate8(static_cast(value)); + return true; + } + + return false; + } + + bool parseImmediate32(const std::string& asm_code, size_t& pos, std::any& result, size_t& size_hint) { + if (size_hint != 32 && size_hint != 0) + return false; + parseWhitespace(asm_code, pos); std::regex re_name("[[:digit:]]+|0x[[:xdigit:]]+", std::regex_constants::ECMAScript); @@ -152,18 +245,54 @@ namespace { return false; } + bool parseImmediate64(const std::string& asm_code, size_t& pos, std::any& result, size_t& size_hint) { + if (size_hint != 64) + return false; + + parseWhitespace(asm_code, pos); + + std::regex re_name("[[:digit:]]+|0x[[:xdigit:]]+", std::regex_constants::ECMAScript); + + std::smatch match; + if (std::regex_search(asm_code.cbegin() + pos, asm_code.cend(), match, re_name, std::regex_constants::match_continuous)) { + int64_t value{}; + try { + value = stoll(match[0]); + } catch (...) { + throw std::runtime_error("Assembler parse error: Bad immediate: "s + match[0].str()); + } + + pos += match[0].length(); + result = Asm::Args::Immediate64(static_cast(value)); + return true; + } + + return false; + } + // parse optional single operand - bool parseOperand(const std::string& asm_code, size_t& pos, std::any& result) { + bool parseOperand(const std::string& asm_code, size_t& pos, std::any& result, size_t& size_hint) { parseWhitespace(asm_code, pos); - if (parseRegister8(asm_code, pos, result)) + if (parseRegister8(asm_code, pos, result, size_hint)) + return true; + if (parseRegister32(asm_code, pos, result, size_hint)) + return true; + if (parseRegister64(asm_code, pos, result, size_hint)) return true; - if (parseRegister32(asm_code, pos, result)) + + if (parseMem8Ptr64(asm_code, pos, result, size_hint)) + return true; + if (parseMem32Ptr64(asm_code, pos, result, size_hint)) return true; - if (parseRegister64(asm_code, pos, result)) + if (parseMem64Ptr64(asm_code, pos, result, size_hint)) return true; - if (parseImmediate32(asm_code, pos, result)) + if (parseImmediate8(asm_code, pos, result, size_hint)) + return true; + if (parseImmediate32(asm_code, pos, result, size_hint)) + return true; + if (parseImmediate64(asm_code, pos, result, size_hint)) return true; return false; @@ -172,12 +301,13 @@ namespace { // parse optional multiple operands, separated by commas void parseOperands(const std::string& asm_code, size_t& pos, Asm::Args& result) { std::any operand; - if (parseOperand(asm_code, pos, operand)) { + size_t size_hint{0}; // in bits, 0=no hint + if (parseOperand(asm_code, pos, operand, size_hint)) { result.push_back(operand); parseWhitespace(asm_code, pos); while (pos < asm_code.size() && asm_code[pos] == ',') { pos++; - if (parseOperand(asm_code, pos, operand)) { + if (parseOperand(asm_code, pos, operand, size_hint)) { result.push_back(operand); } else { throw std::runtime_error("Assembler error: expected operand after comma"); @@ -223,20 +353,27 @@ namespace { // all optional: // label: mnemonic operands... ;comment - std::string result_string; - if (parseLabel(asm_code, pos, result_string)) - result.emplace_back(std::make_shared