From 216e03ce9e1b891cbbfaea942c4cd607d29b67f7 Mon Sep 17 00:00:00 2001 From: Roland Reichwein Date: Sat, 25 Dec 2021 18:40:40 +0100 Subject: Minor refactoring --- include/unicode.h | 32 ++++++++++++++------------------ 1 file changed, 14 insertions(+), 18 deletions(-) diff --git a/include/unicode.h b/include/unicode.h index c2d727a..c9f2331 100644 --- a/include/unicode.h +++ b/include/unicode.h @@ -93,6 +93,8 @@ namespace unicode::detail { template inline static bool is_byte0_of(value_type b) noexcept { + static_assert(n >= 2 && n <= 4); + return (b & static_cast(0xFF << (7 - n))) == static_cast(0xFF << (8 - n)); } @@ -548,19 +550,15 @@ namespace unicode { return it2 - it1; } + // Optimizations following: + static const size_t accu_size {sizeof(size_t)}; + template struct ConvertInputOptimizer {}; template<> struct ConvertInputOptimizer<1> { static const uint32_t ascii_mask { 0x80808080 }; - // 00112233 - // 00112222 - // 00111122 - // 00111111 - // 00001122 - // 00001111 - // 00000011 }; template<> struct ConvertInputOptimizer<2> @@ -572,7 +570,7 @@ namespace unicode { { static const uint32_t ascii_mask { 0xFFFFFF80 }; }; - + template struct ArchitectureOptimizer {}; @@ -581,7 +579,6 @@ namespace unicode { { typedef ConvertInputOptimizer input_optimizer; typedef uint32_t accu_type; - static const size_t accu_size {4}; static const accu_type addr_mask {accu_size - 1}; static const accu_type ascii_mask { (accu_type)input_optimizer::ascii_mask }; static const accu_type ascii_value { 0ULL }; @@ -610,7 +607,6 @@ namespace unicode { { typedef ConvertInputOptimizer input_optimizer; typedef uint64_t accu_type; - static const size_t accu_size {8}; static const accu_type addr_mask {accu_size - 1}; static const accu_type ascii_mask { ((accu_type)input_optimizer::ascii_mask) << 32 | (accu_type)input_optimizer::ascii_mask }; static const accu_type ascii_value { 0ULL }; @@ -647,21 +643,21 @@ namespace unicode { { typename To::string_type result; - if constexpr(sizeof(size_t) == 4 || sizeof(size_t) == 8) { - typedef ConvertInputOptimizer input_optimizer; - typedef ArchitectureOptimizer arch_optimizer; + if constexpr(accu_size == 4 || accu_size == 8) { + typedef ConvertInputOptimizer input_optimizer; + typedef ArchitectureOptimizer arch_optimizer; auto begin{From::begin(s)}; auto end{From::end(s)}; auto back_inserter{To::back_inserter(result)}; auto addr{reinterpret_cast(&s.data()[s.size() - input_distance(begin, end)])}; - while (input_distance(begin, end) >= arch_optimizer::accu_size) { + while (input_distance(begin, end) >= accu_size) { if (((uintptr_t)(void*)addr & arch_optimizer::addr_mask) == 0) { - while (input_distance(begin, end) >= arch_optimizer::accu_size) { + while (input_distance(begin, end) >= accu_size) { typename arch_optimizer::accu_type data{*addr}; if ((data & arch_optimizer::ascii_mask) == arch_optimizer::ascii_value) { - arch_optimizer::template append(reinterpret_cast(addr), result); - begin += arch_optimizer::accu_size / sizeof(typename From::string_type::value_type); + arch_optimizer::template append(reinterpret_cast(addr), result); + begin += accu_size / sizeof(typename From::value_type); ++addr; } else { // just advance one code unit for now @@ -673,7 +669,7 @@ namespace unicode { } // keep up after unaligned Non-ASCII code points - while (begin!= end && (uintptr_t)(void*)(addr = reinterpret_cast(&s.data()[s.size() - input_distance(begin, end)])) & arch_optimizer::addr_mask) { + while (begin != end && (uintptr_t)(void*)(addr = reinterpret_cast(&s.data()[s.size() - input_distance(begin, end)])) & arch_optimizer::addr_mask) { back_inserter = *begin; ++begin; } -- cgit v1.2.3