diff options
author | Roland Reichwein <mail@reichwein.it> | 2021-12-23 19:49:04 +0100 |
---|---|---|
committer | Roland Reichwein <mail@reichwein.it> | 2021-12-23 19:49:04 +0100 |
commit | 98f9132997353bb3e750e8e2db99ebd474a8dbb6 (patch) | |
tree | d30f6c7a612337fc7e8f86e16e7a79eb355f0c9a /include | |
parent | 721064dc293d8915fbb33d83bd983a40dcca180f (diff) |
Generalize optimization
Diffstat (limited to 'include')
-rw-r--r-- | include/unicode.h | 63 |
1 files changed, 54 insertions, 9 deletions
diff --git a/include/unicode.h b/include/unicode.h index 5774db7..8dedb19 100644 --- a/include/unicode.h +++ b/include/unicode.h @@ -548,6 +548,47 @@ namespace unicode { return it2 - it1; } + template<int value_size> + struct ConvertInputOptimizer {}; + + template<> struct ConvertInputOptimizer<1> + { + static const uint32_t ascii_mask { 0x80808080 }; + }; + + template<int value_size> + struct ConvertOutputOptimizer {}; + + template<> struct ConvertOutputOptimizer<1> + { + template<typename input_value_type, class output_string_type, int code_units> + inline static void append(const input_value_type* addr, output_string_type& s) + { + s.append(reinterpret_cast<const typename output_string_type::value_type*>(addr), code_units); + } + }; + + template<int AccuSize, class ConvertInputOptimizer, class ConvertOutputOptimizer> + struct ArchitectureOptimizer {}; + + template<class ConvertInputOptimizer, class ConvertOutputOptimizer> + struct ArchitectureOptimizer<8, ConvertInputOptimizer, ConvertOutputOptimizer> + { + typedef ConvertInputOptimizer input_optimizer; + typedef ConvertOutputOptimizer output_optimizer; + typedef uint64_t accu_type; + static const size_t accu_size {8}; + static const accu_type addr_mask {accu_size - 1}; + static const accu_type ascii_mask { ((accu_type)input_optimizer::ascii_mask) << 32 | (accu_type)input_optimizer::ascii_mask }; + static const accu_type ascii_value { 0ULL }; + + template<typename input_value_type, class output_string_type> + inline static void append(const input_value_type* addr, output_string_type& s) + { + output_optimizer::template append<input_value_type, output_string_type, accu_size>(addr, s); + } + }; + // From and To are facets template<typename From, typename To, std::enable_if_t<std::is_empty<From>::value, bool> = true> typename To::string_type convert(const typename From::string_type& s) @@ -557,17 +598,21 @@ namespace unicode { if constexpr(sizeof(typename From::string_type::value_type) == 1 && sizeof(typename To::value_type) == 1 && sizeof(size_t) >= 8) { + typedef ConvertInputOptimizer<sizeof(typename From::string_type::value_type)> input_optimizer; + typedef ConvertOutputOptimizer<sizeof(typename To::value_type)> output_optimizer; + typedef ArchitectureOptimizer<sizeof(size_t), input_optimizer, output_optimizer> arch_optimizer; + auto begin{From::begin(s)}; auto end{From::end(s)}; auto back_inserter{To::back_inserter(result)}; - auto addr{reinterpret_cast<const uint64_t*>(&s.data()[s.size() - input_distance(begin, end)])}; - while (input_distance(begin, end) >= 8) { - if (((uintptr_t)(void*)addr & 7) == 0) { - while (input_distance(begin, end) >= 8) { - uint64_t data{*addr}; - if ((data & 0x8080808080808080ULL) == 0ULL) { - result.append(reinterpret_cast<const typename To::value_type*>(addr), 8); - begin += 8; + auto addr{reinterpret_cast<const typename arch_optimizer::accu_type*>(&s.data()[s.size() - input_distance(begin, end)])}; + while (input_distance(begin, end) >= arch_optimizer::accu_size) { + if (((uintptr_t)(void*)addr & arch_optimizer::addr_mask) == 0) { + while (input_distance(begin, end) >= arch_optimizer::accu_size) { + typename arch_optimizer::accu_type data{*addr}; + if ((data & arch_optimizer::ascii_mask) == arch_optimizer::ascii_value) { + arch_optimizer::template append<typename From::string_type::value_type, typename To::string_type>(reinterpret_cast<const typename From::string_type::value_type*>(addr), result); + begin += arch_optimizer::accu_size; ++addr; } else { // just advance one code unit for now @@ -579,7 +624,7 @@ namespace unicode { } // keep up after unaligned Non-ASCII code points - while (begin!= end && (uintptr_t)(void*)(addr = reinterpret_cast<const uint64_t*>(&s.data()[s.size() - input_distance(begin, end)])) & 7) { + while (begin!= end && (uintptr_t)(void*)(addr = reinterpret_cast<const typename arch_optimizer::accu_type*>(&s.data()[s.size() - input_distance(begin, end)])) & arch_optimizer::addr_mask) { back_inserter = *begin; ++begin; } |