diff options
Diffstat (limited to 'asm')
| -rw-r--r-- | asm/arm/README.md | 4 | ||||
| -rw-r--r-- | asm/arm/TODO | 1 | ||||
| -rw-r--r-- | asm/arm/YMakefile | 8 | ||||
| -rw-r--r-- | asm/arm/assembler.h | 38 | ||||
| -rw-r--r-- | asm/arm/instruction.h | 109 | ||||
| -rw-r--r-- | asm/arm/main.cpp | 41 | ||||
| -rw-r--r-- | asm/arm/parse.cpp | 441 | ||||
| -rw-r--r-- | asm/arm/parse.h | 7 | 
8 files changed, 649 insertions, 0 deletions
| diff --git a/asm/arm/README.md b/asm/arm/README.md new file mode 100644 index 0000000..742c243 --- /dev/null +++ b/asm/arm/README.md @@ -0,0 +1,4 @@ +Assembler for ARMv6-M +===================== + +Supports thumb instruction set diff --git a/asm/arm/TODO b/asm/arm/TODO new file mode 100644 index 0000000..2d47ab0 --- /dev/null +++ b/asm/arm/TODO @@ -0,0 +1 @@ +mnemonic alias diff --git a/asm/arm/YMakefile b/asm/arm/YMakefile new file mode 100644 index 0000000..833bb88 --- /dev/null +++ b/asm/arm/YMakefile @@ -0,0 +1,8 @@ +<ymake> + <build> +  <name>arm</name> +  <source>main.cpp</source> +  <linklib>fmt</linklib> +  <linklib>reichwein</linklib> + </build> +</ymake> diff --git a/asm/arm/assembler.h b/asm/arm/assembler.h new file mode 100644 index 0000000..13eca00 --- /dev/null +++ b/asm/arm/assembler.h @@ -0,0 +1,38 @@ +#pragma once + +#include <string> +#include <vector> + +#include "instruction.h" +#include "parse.h" + +class Encoding +{ +}; + +class OpCode: public Encoding +{ +private: + Instruction _instruction; + std::vector<std::string> _arguments; +}; + +class Data: public Encoding +{ +}; + +using Assembly = std::vector<Encoding>; + +class Assembler +{ +public: + Assembler(){} + +// passes: +//  0. parse +//  1. assign sizes and offsets to codes (assign labels) +//  2. assemble to code sequence (use labels for offsets) + + code_sequence encode(const std::string& source){ throw std::runtime_error("Assembler.encode unimplemented");} + std::string decode(code_sequence) { throw std::runtime_error("Assembler.decode unimplemented");} +}; diff --git a/asm/arm/instruction.h b/asm/arm/instruction.h new file mode 100644 index 0000000..224cc6e --- /dev/null +++ b/asm/arm/instruction.h @@ -0,0 +1,109 @@ +#pragma once + +#include <memory> +#include <stdexcept> +#include <string> +#include <vector> + +using namespace std::string_literals; + +using code_sequence = std::vector<uint8_t>; + +template<typename from_t> +code_sequence to_code_sequence(from_t v); + +template<> +code_sequence to_code_sequence<uint16_t>(uint16_t v) { + return code_sequence{static_cast<uint8_t>(v & 0xFF), static_cast<uint8_t>(v >> 8)}; +} + +template<> +code_sequence to_code_sequence<uint32_t>(uint32_t v) { + return code_sequence{static_cast<uint8_t>(v & 0xFF), static_cast<uint8_t>(v >> 8), static_cast<uint8_t>(v >> 16),static_cast<uint8_t>(v >> 24) }; +} + +uint32_t high_bits(uint8_t number) +{ + return ~((static_cast<uint32_t>(1) << (32 - number)) - 1); +} + +// Identify operator with leading bits +class Pattern +{ +public: + Pattern(uint32_t bits, uint32_t mask): _bits{bits}, _mask{mask} {} + + template<typename T> + T encode() + { +  return static_cast<T>(_bits); + } + + +private: + uint32_t _bits; + uint32_t _mask; +}; + +class Operand +{ +protected: + Operand(uint8_t pos): _pos(pos){} +private: + uint8_t _pos; +}; + +class Register: public Operand +{ +public: + Register(uint8_t pos): Operand{pos} {}; +}; + +class Immediate: public Operand +{ +public: + Immediate(uint8_t pos, uint8_t bits): Operand{pos}, _bits{bits} {} +private: + uint8_t _bits; +}; + +using Operands = std::vector<std::shared_ptr<Operand>>; + +// Pattern [, Operand, ...] +class Instruction +{ +public: + Instruction(const std::string& mnemonic, uint8_t size, Pattern pattern, Operands operands): _mnemonic(mnemonic), _size(size), _pattern(pattern), _operands(operands) {} + code_sequence encode(const std::vector<std::string>& arguments) + { +  if (_size == 2) { // 16 bit thumb insn +   uint16_t result{ _pattern.encode<uint16_t>()}; +   return to_code_sequence(result); +  } else if (_size == 4) { // 32 bit thumb insn +   uint32_t result{ _pattern.encode<uint32_t>()}; +   return to_code_sequence(result); +  } else { +   throw std::runtime_error("Unsupported instruction size "s + std::to_string(_size)); +  } + } + +private: + std::string _mnemonic; + uint8_t _size; + Pattern _pattern; + Operands _operands; +}; + +namespace { + // factory functions + std::shared_ptr<Operand> imm(uint8_t pos, uint8_t size){ return std::make_shared<Immediate>(pos, size); }; + std::shared_ptr<Operand> reg(uint8_t pos) { return std::make_shared<Register>(pos); }; + + std::vector<Instruction> insns{ +  {"adcs", 2, Pattern(0x4140, high_bits(10)), Operands{reg(0), reg(3)}}, + +  {"lsls", 2, Pattern(0x0000, high_bits(5)), Operands{reg(0), reg(3), imm(6, 5)}} + }; +}; + + diff --git a/asm/arm/main.cpp b/asm/arm/main.cpp new file mode 100644 index 0000000..20e67ab --- /dev/null +++ b/asm/arm/main.cpp @@ -0,0 +1,41 @@ +#include <iostream> +#include <stdexcept> + +#include <fmt/format.h> + +#include <libreichwein/file.h> + +#include "assembler.h" +#include "instruction.h" + +void dump(const code_sequence& c) +{ + for (size_t i = 0; i < c.size(); ++i) { +  if ((i % 16) == 0) { +   if (i > 0) +    std::cout << std::endl; +  } else { +   std::cout << " "; +  } +  std::cout << fmt::format("{:02x}", c[i]); + } + std::cout << std::endl; +} + +int main(int argc, char* argv[]) +{ + try { +  if (argc == 2) { +   Assembler assembler; +   std::string source {Reichwein::File::getFile(argv[1])}; +   code_sequence result {assembler.encode(source)}; +   dump(result); +  } +  return 0; + } catch (const std::exception& ex) { +  std::cerr << "Error: " << ex.what() << std::endl; +  return 1; + } + return 2; +} + diff --git a/asm/arm/parse.cpp b/asm/arm/parse.cpp new file mode 100644 index 0000000..a3156c2 --- /dev/null +++ b/asm/arm/parse.cpp @@ -0,0 +1,441 @@ +#include "parse.h" + +#include "asm/assembler.h" + +#include <boost/algorithm/string.hpp> + +#include <exception> +#include <functional> +#include <regex> +#include <unordered_set> + +using namespace std::string_literals; + +namespace { +  + std::unordered_set<std::string> reg8 { +  "al", "ah", +  "bl", "bh", +  "cl", "ch", +  "dl", "dh", + }; + + std::unordered_set<std::string> reg16 { +  "ax", "sp", +  "bx", "bp", +  "cx", "si", +  "dx", "di", + }; + + std::unordered_set<std::string> reg32 { +  "eax", "esp", +  "ebx", "ebp", +  "ecx", "esi", +  "edx", "edi", + }; + + std::unordered_set<std::string> reg64 { +  "rax", "rsp", +  "rbx", "rbp", +  "rcx", "rsi", +  "rdx", "rdi", + }; + + // skip optional whitespace + void parseWhitespace(const std::string& asm_code, size_t& pos) { +  std::regex re_whitespace("[ \\t]+", std::regex_constants::ECMAScript); + +  std::smatch match; +  if (std::regex_search(asm_code.cbegin() + pos, asm_code.cend(), match, re_whitespace, std::regex_constants::match_continuous)) { +   pos += match[0].length(); +  } + } + + std::string reg_re{"[[:alpha:]][[:alnum:]]*"}; + + // parse optional label + bool parseLabel(const std::string& asm_code, size_t& pos, std::string& result) { +  parseWhitespace(asm_code, pos); +   +  std::regex re_label("("s + reg_re + "):"s, std::regex_constants::ECMAScript); + +  std::smatch match; +  if (std::regex_search(asm_code.cbegin() + pos, asm_code.cend(), match, re_label, std::regex_constants::match_continuous)) { +   pos += match[0].length(); +   result = match[1]; +   return true; +  } + +  return false; + } + + // parse optional mnemonic + // return true iff mnemonic found + bool parseMnemonic(const std::string& asm_code, size_t& pos, std::string& result) { +  parseWhitespace(asm_code, pos); +   +  std::regex re_mnemonic(reg_re, std::regex_constants::ECMAScript); + +  std::smatch match; +  if (std::regex_search(asm_code.cbegin() + pos, asm_code.cend(), match, re_mnemonic, std::regex_constants::match_continuous)) { +   std::string name {boost::algorithm::to_lower_copy(match[0].str())}; +   pos += name.size(); +   result = name; +   return true; +  } + +  return false; + } +  + bool parseRegister8(const std::string& asm_code, size_t& pos, std::any& result, size_t& size_hint) { +  parseWhitespace(asm_code, pos); +   +  std::regex re_name(reg_re, std::regex_constants::ECMAScript); + +  std::smatch match; +  if (std::regex_search(asm_code.cbegin() + pos, asm_code.cend(), match, re_name, std::regex_constants::match_continuous)) { +   std::string name {boost::algorithm::to_lower_copy(match[0].str())}; +   if (reg8.contains(name)) { +    pos += name.size(); +    result = Asm::Args::Register8(name); +    size_hint = 8; +    return true; +   } +  } + +  return false; + } + + bool parseRegister16(const std::string& asm_code, size_t& pos, std::any& result, size_t& size_hint) { +  parseWhitespace(asm_code, pos); +   +  std::regex re_name(reg_re, std::regex_constants::ECMAScript); + +  std::smatch match; +  if (std::regex_search(asm_code.cbegin() + pos, asm_code.cend(), match, re_name, std::regex_constants::match_continuous)) { +   std::string name {boost::algorithm::to_lower_copy(match[0].str())}; +   if (reg16.contains(name)) { +    pos += name.size(); +    result = Asm::Args::Register16(name); +    size_hint = 16; +    return true; +   } +  } + +  return false; + } + + bool parseRegister32(const std::string& asm_code, size_t& pos, std::any& result, size_t& size_hint) { +  parseWhitespace(asm_code, pos); +   +  std::regex re_name(reg_re, std::regex_constants::ECMAScript); + +  std::smatch match; +  if (std::regex_search(asm_code.cbegin() + pos, asm_code.cend(), match, re_name, std::regex_constants::match_continuous)) { +   std::string name {boost::algorithm::to_lower_copy(match[0].str())}; +   if (reg32.contains(name)) { +    pos += name.size(); +    result = Asm::Args::Register32(name); +    size_hint = 32; +    return true; +   } +  } + +  return false; + } + + bool parseRegister64(const std::string& asm_code, size_t& pos, std::any& result, size_t& size_hint) { +  parseWhitespace(asm_code, pos); +   +  std::regex re_name(reg_re, std::regex_constants::ECMAScript); + +  std::smatch match; +  if (std::regex_search(asm_code.cbegin() + pos, asm_code.cend(), match, re_name, std::regex_constants::match_continuous)) { +   std::string name {boost::algorithm::to_lower_copy(match[0].str())}; +   if (reg64.contains(name)) { +    pos += name.size(); +    result = Asm::Args::Register64(name); +    size_hint = 64; +    return true; +   } +  } + +  return false; + } + + bool parseMem8Ptr64(const std::string& asm_code, size_t& pos, std::any& result, size_t& size_hint) { +  parseWhitespace(asm_code, pos); +   +  std::regex re_name("byte ptr *\\[("s + reg_re + ")\\]"s, std::regex_constants::ECMAScript); + +  std::smatch match; +  if (std::regex_search(asm_code.cbegin() + pos, asm_code.cend(), match, re_name, std::regex_constants::match_continuous)) { +   std::string name {boost::algorithm::to_lower_copy(match[1].str())}; +   if (reg64.contains(name)) { +    pos += match[0].length(); +    result = Asm::Args::Mem8Ptr64{name}; +    size_hint = 8; +    return true; +   } +  } + +  return false; + } + + bool parseMem16Ptr64(const std::string& asm_code, size_t& pos, std::any& result, size_t& size_hint) { +  parseWhitespace(asm_code, pos); +   +  std::regex re_name("(word ptr *)?\\[("s + reg_re + ")\\]"s, std::regex_constants::ECMAScript); + +  std::smatch match; +  if (std::regex_search(asm_code.cbegin() + pos, asm_code.cend(), match, re_name, std::regex_constants::match_continuous)) { +   std::string name {boost::algorithm::to_lower_copy(match[2].str())}; +   if (reg16.contains(name)) { +    pos += match[0].length(); +    result = Asm::Args::Mem16Ptr64(name); +    size_hint = 16; +    return true; +   } +  } + +  return false; + } + + bool parseMem32Ptr64(const std::string& asm_code, size_t& pos, std::any& result, size_t& size_hint) { +  parseWhitespace(asm_code, pos); +   +  std::regex re_name("(dword ptr *)?\\[("s + reg_re + ")\\]"s, std::regex_constants::ECMAScript); + +  std::smatch match; +  if (std::regex_search(asm_code.cbegin() + pos, asm_code.cend(), match, re_name, std::regex_constants::match_continuous)) { +   std::string name {boost::algorithm::to_lower_copy(match[2].str())}; +   if (reg64.contains(name)) { +    pos += match[0].length(); +    result = Asm::Args::Mem32Ptr64(name); +    size_hint = 32; +    return true; +   } +  } + +  return false; + } + + bool parseMem64Ptr64(const std::string& asm_code, size_t& pos, std::any& result, size_t& size_hint) { +  parseWhitespace(asm_code, pos); +   +  std::regex re_name("qword ptr *\\[("s + reg_re + ")\\]"s, std::regex_constants::ECMAScript); + +  std::smatch match; +  if (std::regex_search(asm_code.cbegin() + pos, asm_code.cend(), match, re_name, std::regex_constants::match_continuous)) { +   std::string name {boost::algorithm::to_lower_copy(match[1].str())}; +   if (reg64.contains(name)) { +    pos += match[0].length(); +    result = Asm::Args::Mem64Ptr64(name); +    size_hint = 64; +    return true; +   } +  } + +  return false; + } + + bool parseImmediate8(const std::string& asm_code, size_t& pos, std::any& result, size_t& size_hint) { +  if (size_hint != 8) +   return false; + +  parseWhitespace(asm_code, pos); +   +  std::regex re_name("[[:digit:]]+|0x[[:xdigit:]]+", std::regex_constants::ECMAScript); + +  std::smatch match; +  if (std::regex_search(asm_code.cbegin() + pos, asm_code.cend(), match, re_name, std::regex_constants::match_continuous)) { +   int32_t value{}; +   try { +    value = stoll(match[0]); +   } catch (...) { +    throw std::runtime_error("Assembler parse error: Bad immediate: "s + match[0].str()); +   } +   if (value < -128 || value > 255) +    throw std::runtime_error("Assembler parse error: Bad 8 bit immediate: "s + match[0].str()); + +   pos += match[0].length(); +   result = Asm::Args::Immediate8(static_cast<uint8_t>(value)); +   return true; +  } + +  return false; + } + + bool parseImmediate32(const std::string& asm_code, size_t& pos, std::any& result, size_t& size_hint) { +  if (size_hint != 32 && size_hint != 0) +   return false; +   +  parseWhitespace(asm_code, pos); +   +  std::regex re_name("[[:digit:]]+|0x[[:xdigit:]]+", std::regex_constants::ECMAScript); + +  std::smatch match; +  if (std::regex_search(asm_code.cbegin() + pos, asm_code.cend(), match, re_name, std::regex_constants::match_continuous)) { +   int32_t value{}; +   try { +    value = stoll(match[0]); +   } catch (...) { +    throw std::runtime_error("Assembler parse error: Bad Immediate: "s + match[0].str()); +   } +   pos += match[0].length(); +   result = Asm::Args::Immediate32(static_cast<uint32_t>(value)); +   return true; +  } + +  return false; + } + + bool parseImmediate64(const std::string& asm_code, size_t& pos, std::any& result, size_t& size_hint) { +  if (size_hint != 64) +   return false; + +  parseWhitespace(asm_code, pos); +   +  std::regex re_name("[[:digit:]]+|0x[[:xdigit:]]+", std::regex_constants::ECMAScript); + +  std::smatch match; +  if (std::regex_search(asm_code.cbegin() + pos, asm_code.cend(), match, re_name, std::regex_constants::match_continuous)) { +   int64_t value{}; +   try { +    value = stoll(match[0]); +   } catch (...) { +    throw std::runtime_error("Assembler parse error: Bad immediate: "s + match[0].str()); +   } + +   pos += match[0].length(); +   result = Asm::Args::Immediate64(static_cast<uint64_t>(value)); +   return true; +  } + +  return false; + } + + // parse optional single operand + bool parseOperand(const std::string& asm_code, size_t& pos, std::any& result, size_t& size_hint) { +  parseWhitespace(asm_code, pos); + +  if (parseRegister8(asm_code, pos, result, size_hint)) +   return true; +  if (parseRegister16(asm_code, pos, result, size_hint)) +   return true; +  if (parseRegister32(asm_code, pos, result, size_hint)) +   return true; +  if (parseRegister64(asm_code, pos, result, size_hint)) +   return true; +   +  if (parseMem8Ptr64(asm_code, pos, result, size_hint)) +   return true; +  if (parseMem16Ptr64(asm_code, pos, result, size_hint)) +   return true; +  if (parseMem32Ptr64(asm_code, pos, result, size_hint)) +   return true; +  if (parseMem64Ptr64(asm_code, pos, result, size_hint)) +   return true; + +  if (parseImmediate8(asm_code, pos, result, size_hint)) +   return true; +  if (parseImmediate32(asm_code, pos, result, size_hint)) +   return true; +  if (parseImmediate64(asm_code, pos, result, size_hint)) +   return true; + +  return false; + } + + // parse optional multiple operands, separated by commas + void parseOperands(const std::string& asm_code, size_t& pos, Asm::Args& result) { +  std::any operand; +  size_t size_hint{0}; // in bits, 0=no hint +  if (parseOperand(asm_code, pos, operand, size_hint)) { +   result.push_back(operand); +   parseWhitespace(asm_code, pos); +   while (pos < asm_code.size() && asm_code[pos] == ',') { +    pos++; +    if (parseOperand(asm_code, pos, operand, size_hint)) { +     result.push_back(operand); +    } else { +     throw std::runtime_error("Assembler error: expected operand after comma"); +    } +    parseWhitespace(asm_code, pos); +   } +  } + } + + // parse optional comment + void parseComment(const std::string& asm_code, size_t& pos) { +  parseWhitespace(asm_code, pos); +   +  std::regex re_comment("(;|//).*", std::regex_constants::ECMAScript); + +  std::smatch match; +  if (std::regex_search(asm_code.cbegin() + pos, asm_code.cend(), match, re_comment, std::regex_constants::match_continuous)) { +   pos += match[0].length(); +  } + } + + // parse end of line (or whole code) + bool parseEol(const std::string& asm_code, size_t& pos) { +  parseWhitespace(asm_code, pos); + +  if (pos < asm_code.size() && asm_code[pos] != 0x0a && asm_code[pos] != 0x0d) +   return false; // this is the only case where parseEol() doesn't work + +  while (pos < asm_code.size()) { +   char c { asm_code[pos] }; +   if (c == 0x0a || c == 0x0d) { +    pos++; +   } else { +    break; +   } +  } + +  return true; + } + + // parse single line + void parseLine(const std::string& asm_code, size_t& pos, std::vector<std::shared_ptr<Chunk>>& result) { +  // all optional: +  // label: mnemonic operands... ;comment <eol> + +  std::string label; +  std::function<void()> label_fn {[](){}}; +  if (parseLabel(asm_code, pos, label)) +   label_fn = [&]() { result.emplace_back(std::make_shared<Label>(label)); }; // defer to successfully completed line + +  std::string mnemonic; +  Asm::Args args; +  std::function<void()> mnemonic_fn {[](){}}; +  if (parseMnemonic(asm_code, pos, mnemonic)) { +   parseOperands(asm_code, pos, args); +   mnemonic_fn = [&]() { result.emplace_back(makeOp(mnemonic, args)); }; // defer to successfully completed line +  } + +  parseComment(asm_code, pos); + +  if (!parseEol(asm_code, pos)) +   throw std::runtime_error("Assembler error at pos "s + std::to_string(pos)); + +  // Append only if no error occured, to get the correct error +  label_fn(); +  mnemonic_fn(); + } + +} // namespace + +std::vector<std::shared_ptr<Chunk>> parseAsm(const std::string& asm_code) +{ + std::vector<std::shared_ptr<Chunk>> result; + size_t pos{0}; + + while (pos != asm_code.size()) { +  parseLine(asm_code, pos, result); + } + + return result; +} + diff --git a/asm/arm/parse.h b/asm/arm/parse.h new file mode 100644 index 0000000..4dce4b2 --- /dev/null +++ b/asm/arm/parse.h @@ -0,0 +1,7 @@ +#pragma once + +#include <vector> +#include <string> + +// asm_code: multiline asm code +std::vector<std::vector<std::string>> parseAsm(const std::string& asm_code); | 
