From 98f9132997353bb3e750e8e2db99ebd474a8dbb6 Mon Sep 17 00:00:00 2001 From: Roland Reichwein Date: Thu, 23 Dec 2021 19:49:04 +0100 Subject: Generalize optimization --- include/unicode.h | 63 +++++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 54 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/unicode.h b/include/unicode.h index 5774db7..8dedb19 100644 --- a/include/unicode.h +++ b/include/unicode.h @@ -548,6 +548,47 @@ namespace unicode { return it2 - it1; } + template + struct ConvertInputOptimizer {}; + + template<> struct ConvertInputOptimizer<1> + { + static const uint32_t ascii_mask { 0x80808080 }; + }; + + template + struct ConvertOutputOptimizer {}; + + template<> struct ConvertOutputOptimizer<1> + { + template + inline static void append(const input_value_type* addr, output_string_type& s) + { + s.append(reinterpret_cast(addr), code_units); + } + }; + + template + struct ArchitectureOptimizer {}; + + template + struct ArchitectureOptimizer<8, ConvertInputOptimizer, ConvertOutputOptimizer> + { + typedef ConvertInputOptimizer input_optimizer; + typedef ConvertOutputOptimizer output_optimizer; + typedef uint64_t accu_type; + static const size_t accu_size {8}; + static const accu_type addr_mask {accu_size - 1}; + static const accu_type ascii_mask { ((accu_type)input_optimizer::ascii_mask) << 32 | (accu_type)input_optimizer::ascii_mask }; + static const accu_type ascii_value { 0ULL }; + + template + inline static void append(const input_value_type* addr, output_string_type& s) + { + output_optimizer::template append(addr, s); + } + }; + // From and To are facets template::value, bool> = true> typename To::string_type convert(const typename From::string_type& s) @@ -557,17 +598,21 @@ namespace unicode { if constexpr(sizeof(typename From::string_type::value_type) == 1 && sizeof(typename To::value_type) == 1 && sizeof(size_t) >= 8) { + typedef ConvertInputOptimizer input_optimizer; + typedef ConvertOutputOptimizer output_optimizer; + typedef ArchitectureOptimizer arch_optimizer; + auto begin{From::begin(s)}; auto end{From::end(s)}; auto back_inserter{To::back_inserter(result)}; - auto addr{reinterpret_cast(&s.data()[s.size() - input_distance(begin, end)])}; - while (input_distance(begin, end) >= 8) { - if (((uintptr_t)(void*)addr & 7) == 0) { - while (input_distance(begin, end) >= 8) { - uint64_t data{*addr}; - if ((data & 0x8080808080808080ULL) == 0ULL) { - result.append(reinterpret_cast(addr), 8); - begin += 8; + auto addr{reinterpret_cast(&s.data()[s.size() - input_distance(begin, end)])}; + while (input_distance(begin, end) >= arch_optimizer::accu_size) { + if (((uintptr_t)(void*)addr & arch_optimizer::addr_mask) == 0) { + while (input_distance(begin, end) >= arch_optimizer::accu_size) { + typename arch_optimizer::accu_type data{*addr}; + if ((data & arch_optimizer::ascii_mask) == arch_optimizer::ascii_value) { + arch_optimizer::template append(reinterpret_cast(addr), result); + begin += arch_optimizer::accu_size; ++addr; } else { // just advance one code unit for now @@ -579,7 +624,7 @@ namespace unicode { } // keep up after unaligned Non-ASCII code points - while (begin!= end && (uintptr_t)(void*)(addr = reinterpret_cast(&s.data()[s.size() - input_distance(begin, end)])) & 7) { + while (begin!= end && (uintptr_t)(void*)(addr = reinterpret_cast(&s.data()[s.size() - input_distance(begin, end)])) & arch_optimizer::addr_mask) { back_inserter = *begin; ++begin; } -- cgit v1.2.3