diff options
| -rw-r--r-- | include/unicode.h | 63 | 
1 files changed, 54 insertions, 9 deletions
| diff --git a/include/unicode.h b/include/unicode.h index 5774db7..8dedb19 100644 --- a/include/unicode.h +++ b/include/unicode.h @@ -548,6 +548,47 @@ namespace unicode {    return it2 - it1;   } + template<int value_size> + struct ConvertInputOptimizer {}; + + template<> struct ConvertInputOptimizer<1> + { +  static const uint32_t ascii_mask { 0x80808080 }; + }; +  + template<int value_size> + struct ConvertOutputOptimizer {}; + + template<> struct ConvertOutputOptimizer<1> + { +  template<typename input_value_type, class output_string_type, int code_units> +  inline static void append(const input_value_type* addr, output_string_type& s) +  { +   s.append(reinterpret_cast<const typename output_string_type::value_type*>(addr), code_units); +  } + }; +  + template<int AccuSize, class ConvertInputOptimizer, class ConvertOutputOptimizer> + struct ArchitectureOptimizer {}; + + template<class ConvertInputOptimizer, class ConvertOutputOptimizer> + struct ArchitectureOptimizer<8, ConvertInputOptimizer, ConvertOutputOptimizer> + { +  typedef ConvertInputOptimizer input_optimizer; +  typedef ConvertOutputOptimizer output_optimizer; +  typedef uint64_t accu_type; +  static const size_t accu_size {8}; +  static const accu_type addr_mask {accu_size - 1}; +  static const accu_type ascii_mask { ((accu_type)input_optimizer::ascii_mask) << 32 | (accu_type)input_optimizer::ascii_mask }; +  static const accu_type ascii_value { 0ULL }; +   +  template<typename input_value_type, class output_string_type> +  inline static void append(const input_value_type* addr, output_string_type& s) +  { +   output_optimizer::template append<input_value_type, output_string_type, accu_size>(addr, s); +  } + }; +   // From and To are facets   template<typename From, typename To, std::enable_if_t<std::is_empty<From>::value, bool> = true>   typename To::string_type convert(const typename From::string_type& s) @@ -557,17 +598,21 @@ namespace unicode {    if constexpr(sizeof(typename From::string_type::value_type) == 1 &&                 sizeof(typename To::value_type) == 1 &&                 sizeof(size_t) >= 8) { +   typedef ConvertInputOptimizer<sizeof(typename From::string_type::value_type)> input_optimizer; +   typedef ConvertOutputOptimizer<sizeof(typename To::value_type)> output_optimizer; +   typedef ArchitectureOptimizer<sizeof(size_t), input_optimizer, output_optimizer> arch_optimizer; +     auto begin{From::begin(s)};     auto end{From::end(s)};     auto back_inserter{To::back_inserter(result)}; -   auto addr{reinterpret_cast<const uint64_t*>(&s.data()[s.size() - input_distance(begin, end)])}; -   while (input_distance(begin, end) >= 8) { -    if (((uintptr_t)(void*)addr & 7) == 0) { -     while (input_distance(begin, end) >= 8) { -      uint64_t data{*addr}; -      if ((data & 0x8080808080808080ULL) == 0ULL) { -       result.append(reinterpret_cast<const typename To::value_type*>(addr), 8); -       begin += 8; +   auto addr{reinterpret_cast<const typename arch_optimizer::accu_type*>(&s.data()[s.size() - input_distance(begin, end)])}; +   while (input_distance(begin, end) >= arch_optimizer::accu_size) { +    if (((uintptr_t)(void*)addr & arch_optimizer::addr_mask) == 0) { +     while (input_distance(begin, end) >= arch_optimizer::accu_size) { +      typename arch_optimizer::accu_type data{*addr}; +      if ((data & arch_optimizer::ascii_mask) == arch_optimizer::ascii_value) { +       arch_optimizer::template append<typename From::string_type::value_type, typename To::string_type>(reinterpret_cast<const typename From::string_type::value_type*>(addr), result); +       begin += arch_optimizer::accu_size;         ++addr;        } else {         // just advance one code unit for now @@ -579,7 +624,7 @@ namespace unicode {      }      // keep up after unaligned Non-ASCII code points -    while (begin!= end && (uintptr_t)(void*)(addr = reinterpret_cast<const uint64_t*>(&s.data()[s.size() - input_distance(begin, end)])) & 7) { +    while (begin!= end && (uintptr_t)(void*)(addr = reinterpret_cast<const typename arch_optimizer::accu_type*>(&s.data()[s.size() - input_distance(begin, end)])) & arch_optimizer::addr_mask) {       back_inserter = *begin;       ++begin;      } | 
