diff options
Diffstat (limited to 'asm/parse.cpp')
-rw-r--r-- | asm/parse.cpp | 181 |
1 files changed, 159 insertions, 22 deletions
diff --git a/asm/parse.cpp b/asm/parse.cpp index e62f585..8f6f831 100644 --- a/asm/parse.cpp +++ b/asm/parse.cpp @@ -5,6 +5,7 @@ #include <boost/algorithm/string.hpp> #include <exception> +#include <functional> #include <regex> #include <unordered_set> @@ -43,11 +44,13 @@ namespace { } } + std::string reg_re{"[[:alpha:]][[:alnum:]]*"}; + // parse optional label bool parseLabel(const std::string& asm_code, size_t& pos, std::string& result) { parseWhitespace(asm_code, pos); - std::regex re_label("([[:alpha:]]([[:alnum:]])+):", std::regex_constants::ECMAScript); + std::regex re_label("("s + reg_re + "):"s, std::regex_constants::ECMAScript); std::smatch match; if (std::regex_search(asm_code.cbegin() + pos, asm_code.cend(), match, re_label, std::regex_constants::match_continuous)) { @@ -64,7 +67,7 @@ namespace { bool parseMnemonic(const std::string& asm_code, size_t& pos, std::string& result) { parseWhitespace(asm_code, pos); - std::regex re_mnemonic("[[:alpha:]]([[:alnum:]])+", std::regex_constants::ECMAScript); + std::regex re_mnemonic(reg_re, std::regex_constants::ECMAScript); std::smatch match; if (std::regex_search(asm_code.cbegin() + pos, asm_code.cend(), match, re_mnemonic, std::regex_constants::match_continuous)) { @@ -77,10 +80,10 @@ namespace { return false; } - bool parseRegister8(const std::string& asm_code, size_t& pos, std::any& result) { + bool parseRegister8(const std::string& asm_code, size_t& pos, std::any& result, size_t& size_hint) { parseWhitespace(asm_code, pos); - std::regex re_name("[[:alpha:]]+", std::regex_constants::ECMAScript); + std::regex re_name(reg_re, std::regex_constants::ECMAScript); std::smatch match; if (std::regex_search(asm_code.cbegin() + pos, asm_code.cend(), match, re_name, std::regex_constants::match_continuous)) { @@ -88,6 +91,7 @@ namespace { if (reg8.contains(name)) { pos += name.size(); result = Asm::Args::Register8(name); + size_hint = 8; return true; } } @@ -95,10 +99,10 @@ namespace { return false; } - bool parseRegister32(const std::string& asm_code, size_t& pos, std::any& result) { + bool parseRegister32(const std::string& asm_code, size_t& pos, std::any& result, size_t& size_hint) { parseWhitespace(asm_code, pos); - std::regex re_name("[[:alpha:]]+", std::regex_constants::ECMAScript); + std::regex re_name(reg_re, std::regex_constants::ECMAScript); std::smatch match; if (std::regex_search(asm_code.cbegin() + pos, asm_code.cend(), match, re_name, std::regex_constants::match_continuous)) { @@ -106,6 +110,7 @@ namespace { if (reg32.contains(name)) { pos += name.size(); result = Asm::Args::Register32(name); + size_hint = 32; return true; } } @@ -113,10 +118,10 @@ namespace { return false; } - bool parseRegister64(const std::string& asm_code, size_t& pos, std::any& result) { + bool parseRegister64(const std::string& asm_code, size_t& pos, std::any& result, size_t& size_hint) { parseWhitespace(asm_code, pos); - std::regex re_name("[[:alpha:]]+", std::regex_constants::ECMAScript); + std::regex re_name(reg_re, std::regex_constants::ECMAScript); std::smatch match; if (std::regex_search(asm_code.cbegin() + pos, asm_code.cend(), match, re_name, std::regex_constants::match_continuous)) { @@ -124,6 +129,64 @@ namespace { if (reg64.contains(name)) { pos += name.size(); result = Asm::Args::Register64(name); + size_hint = 64; + return true; + } + } + + return false; + } + + bool parseMem8Ptr64(const std::string& asm_code, size_t& pos, std::any& result, size_t& size_hint) { + parseWhitespace(asm_code, pos); + + std::regex re_name("byte ptr *\\[("s + reg_re + ")\\]"s, std::regex_constants::ECMAScript); + + std::smatch match; + if (std::regex_search(asm_code.cbegin() + pos, asm_code.cend(), match, re_name, std::regex_constants::match_continuous)) { + std::string name {boost::algorithm::to_lower_copy(match[1].str())}; + if (reg64.contains(name)) { + pos += match[0].length(); + result = Asm::Args::Mem8Ptr64{name}; + size_hint = 8; + return true; + } + } + + return false; + } + + bool parseMem32Ptr64(const std::string& asm_code, size_t& pos, std::any& result, size_t& size_hint) { + parseWhitespace(asm_code, pos); + + std::regex re_name("(dword ptr *)?\\[("s + reg_re + ")\\]"s, std::regex_constants::ECMAScript); + + std::smatch match; + if (std::regex_search(asm_code.cbegin() + pos, asm_code.cend(), match, re_name, std::regex_constants::match_continuous)) { + std::string name {boost::algorithm::to_lower_copy(match[2].str())}; + if (reg64.contains(name)) { + pos += match[0].length(); + result = Asm::Args::Mem32Ptr64(name); + size_hint = 32; + return true; + } + } + + return false; + } + + bool parseMem64Ptr64(const std::string& asm_code, size_t& pos, std::any& result, size_t& size_hint) { + parseWhitespace(asm_code, pos); + + std::regex re_name("qword ptr *\\[("s + reg_re + ")\\]"s, std::regex_constants::ECMAScript); + + std::smatch match; + if (std::regex_search(asm_code.cbegin() + pos, asm_code.cend(), match, re_name, std::regex_constants::match_continuous)) { + std::string name {boost::algorithm::to_lower_copy(match[1].str())}; + if (reg64.contains(name)) { + pos += match[0].length(); + result = Asm::Args::Mem64Ptr64(name); + size_hint = 64; return true; } } @@ -131,7 +194,37 @@ namespace { return false; } - bool parseImmediate32(const std::string& asm_code, size_t& pos, std::any& result) { + bool parseImmediate8(const std::string& asm_code, size_t& pos, std::any& result, size_t& size_hint) { + if (size_hint != 8) + return false; + + parseWhitespace(asm_code, pos); + + std::regex re_name("[[:digit:]]+|0x[[:xdigit:]]+", std::regex_constants::ECMAScript); + + std::smatch match; + if (std::regex_search(asm_code.cbegin() + pos, asm_code.cend(), match, re_name, std::regex_constants::match_continuous)) { + int32_t value{}; + try { + value = stoll(match[0]); + } catch (...) { + throw std::runtime_error("Assembler parse error: Bad immediate: "s + match[0].str()); + } + if (value < -128 || value > 255) + throw std::runtime_error("Assembler parse error: Bad 8 bit immediate: "s + match[0].str()); + + pos += match[0].length(); + result = Asm::Args::Immediate8(static_cast<uint8_t>(value)); + return true; + } + + return false; + } + + bool parseImmediate32(const std::string& asm_code, size_t& pos, std::any& result, size_t& size_hint) { + if (size_hint != 32 && size_hint != 0) + return false; + parseWhitespace(asm_code, pos); std::regex re_name("[[:digit:]]+|0x[[:xdigit:]]+", std::regex_constants::ECMAScript); @@ -152,18 +245,54 @@ namespace { return false; } + bool parseImmediate64(const std::string& asm_code, size_t& pos, std::any& result, size_t& size_hint) { + if (size_hint != 64) + return false; + + parseWhitespace(asm_code, pos); + + std::regex re_name("[[:digit:]]+|0x[[:xdigit:]]+", std::regex_constants::ECMAScript); + + std::smatch match; + if (std::regex_search(asm_code.cbegin() + pos, asm_code.cend(), match, re_name, std::regex_constants::match_continuous)) { + int64_t value{}; + try { + value = stoll(match[0]); + } catch (...) { + throw std::runtime_error("Assembler parse error: Bad immediate: "s + match[0].str()); + } + + pos += match[0].length(); + result = Asm::Args::Immediate64(static_cast<uint64_t>(value)); + return true; + } + + return false; + } + // parse optional single operand - bool parseOperand(const std::string& asm_code, size_t& pos, std::any& result) { + bool parseOperand(const std::string& asm_code, size_t& pos, std::any& result, size_t& size_hint) { parseWhitespace(asm_code, pos); - if (parseRegister8(asm_code, pos, result)) + if (parseRegister8(asm_code, pos, result, size_hint)) + return true; + if (parseRegister32(asm_code, pos, result, size_hint)) + return true; + if (parseRegister64(asm_code, pos, result, size_hint)) return true; - if (parseRegister32(asm_code, pos, result)) + + if (parseMem8Ptr64(asm_code, pos, result, size_hint)) + return true; + if (parseMem32Ptr64(asm_code, pos, result, size_hint)) return true; - if (parseRegister64(asm_code, pos, result)) + if (parseMem64Ptr64(asm_code, pos, result, size_hint)) return true; - if (parseImmediate32(asm_code, pos, result)) + if (parseImmediate8(asm_code, pos, result, size_hint)) + return true; + if (parseImmediate32(asm_code, pos, result, size_hint)) + return true; + if (parseImmediate64(asm_code, pos, result, size_hint)) return true; return false; @@ -172,12 +301,13 @@ namespace { // parse optional multiple operands, separated by commas void parseOperands(const std::string& asm_code, size_t& pos, Asm::Args& result) { std::any operand; - if (parseOperand(asm_code, pos, operand)) { + size_t size_hint{0}; // in bits, 0=no hint + if (parseOperand(asm_code, pos, operand, size_hint)) { result.push_back(operand); parseWhitespace(asm_code, pos); while (pos < asm_code.size() && asm_code[pos] == ',') { pos++; - if (parseOperand(asm_code, pos, operand)) { + if (parseOperand(asm_code, pos, operand, size_hint)) { result.push_back(operand); } else { throw std::runtime_error("Assembler error: expected operand after comma"); @@ -223,20 +353,27 @@ namespace { // all optional: // label: mnemonic operands... ;comment <eol> - std::string result_string; - if (parseLabel(asm_code, pos, result_string)) - result.emplace_back(std::make_shared<Label>(result_string)); + std::string label; + std::function<void()> label_fn {[](){}}; + if (parseLabel(asm_code, pos, label)) + label_fn = [&]() { result.emplace_back(std::make_shared<Label>(label)); }; // defer to successfully completed line - if (parseMnemonic(asm_code, pos, result_string)) { - Asm::Args args; + std::string mnemonic; + Asm::Args args; + std::function<void()> mnemonic_fn {[](){}}; + if (parseMnemonic(asm_code, pos, mnemonic)) { parseOperands(asm_code, pos, args); - result.emplace_back(makeOp(result_string, args)); + mnemonic_fn = [&]() { result.emplace_back(makeOp(mnemonic, args)); }; // defer to successfully completed line } parseComment(asm_code, pos); if (!parseEol(asm_code, pos)) throw std::runtime_error("Assembler error at pos "s + std::to_string(pos)); + + // Append only if no error occured, to get the correct error + label_fn(); + mnemonic_fn(); } } // namespace |