diff options
author | Roland Reichwein <mail@reichwein.it> | 2021-12-25 18:40:40 +0100 |
---|---|---|
committer | Roland Reichwein <mail@reichwein.it> | 2021-12-25 18:40:40 +0100 |
commit | 216e03ce9e1b891cbbfaea942c4cd607d29b67f7 (patch) | |
tree | ebfb838a5c4b86bd706638574c2c4949f6de42f6 /include | |
parent | 79dc9edc72c5b9fefb129fe36029d4781b1e969c (diff) |
Minor refactoring
Diffstat (limited to 'include')
-rw-r--r-- | include/unicode.h | 32 |
1 files changed, 14 insertions, 18 deletions
diff --git a/include/unicode.h b/include/unicode.h index c2d727a..c9f2331 100644 --- a/include/unicode.h +++ b/include/unicode.h @@ -93,6 +93,8 @@ namespace unicode::detail { template<size_t n> inline static bool is_byte0_of(value_type b) noexcept { + static_assert(n >= 2 && n <= 4); + return (b & static_cast<value_type>(0xFF << (7 - n))) == static_cast<value_type>(0xFF << (8 - n)); } @@ -548,19 +550,15 @@ namespace unicode { return it2 - it1; } + // Optimizations following: + static const size_t accu_size {sizeof(size_t)}; + template<int value_size> struct ConvertInputOptimizer {}; template<> struct ConvertInputOptimizer<1> { static const uint32_t ascii_mask { 0x80808080 }; - // 00112233 - // 00112222 - // 00111122 - // 00111111 - // 00001122 - // 00001111 - // 00000011 }; template<> struct ConvertInputOptimizer<2> @@ -572,7 +570,7 @@ namespace unicode { { static const uint32_t ascii_mask { 0xFFFFFF80 }; }; - + template<int AccuSize, class ConvertInputOptimizer> struct ArchitectureOptimizer {}; @@ -581,7 +579,6 @@ namespace unicode { { typedef ConvertInputOptimizer input_optimizer; typedef uint32_t accu_type; - static const size_t accu_size {4}; static const accu_type addr_mask {accu_size - 1}; static const accu_type ascii_mask { (accu_type)input_optimizer::ascii_mask }; static const accu_type ascii_value { 0ULL }; @@ -610,7 +607,6 @@ namespace unicode { { typedef ConvertInputOptimizer input_optimizer; typedef uint64_t accu_type; - static const size_t accu_size {8}; static const accu_type addr_mask {accu_size - 1}; static const accu_type ascii_mask { ((accu_type)input_optimizer::ascii_mask) << 32 | (accu_type)input_optimizer::ascii_mask }; static const accu_type ascii_value { 0ULL }; @@ -647,21 +643,21 @@ namespace unicode { { typename To::string_type result; - if constexpr(sizeof(size_t) == 4 || sizeof(size_t) == 8) { - typedef ConvertInputOptimizer<sizeof(typename From::string_type::value_type)> input_optimizer; - typedef ArchitectureOptimizer<sizeof(size_t), input_optimizer> arch_optimizer; + if constexpr(accu_size == 4 || accu_size == 8) { + typedef ConvertInputOptimizer<sizeof(typename From::value_type)> input_optimizer; + typedef ArchitectureOptimizer<accu_size, input_optimizer> arch_optimizer; auto begin{From::begin(s)}; auto end{From::end(s)}; auto back_inserter{To::back_inserter(result)}; auto addr{reinterpret_cast<const typename arch_optimizer::accu_type*>(&s.data()[s.size() - input_distance(begin, end)])}; - while (input_distance(begin, end) >= arch_optimizer::accu_size) { + while (input_distance(begin, end) >= accu_size) { if (((uintptr_t)(void*)addr & arch_optimizer::addr_mask) == 0) { - while (input_distance(begin, end) >= arch_optimizer::accu_size) { + while (input_distance(begin, end) >= accu_size) { typename arch_optimizer::accu_type data{*addr}; if ((data & arch_optimizer::ascii_mask) == arch_optimizer::ascii_value) { - arch_optimizer::template append<typename From::string_type::value_type, typename To::string_type>(reinterpret_cast<const typename From::string_type::value_type*>(addr), result); - begin += arch_optimizer::accu_size / sizeof(typename From::string_type::value_type); + arch_optimizer::template append<typename From::value_type, typename To::string_type>(reinterpret_cast<const typename From::value_type*>(addr), result); + begin += accu_size / sizeof(typename From::value_type); ++addr; } else { // just advance one code unit for now @@ -673,7 +669,7 @@ namespace unicode { } // keep up after unaligned Non-ASCII code points - while (begin!= end && (uintptr_t)(void*)(addr = reinterpret_cast<const typename arch_optimizer::accu_type*>(&s.data()[s.size() - input_distance(begin, end)])) & arch_optimizer::addr_mask) { + while (begin != end && (uintptr_t)(void*)(addr = reinterpret_cast<const typename arch_optimizer::accu_type*>(&s.data()[s.size() - input_distance(begin, end)])) & arch_optimizer::addr_mask) { back_inserter = *begin; ++begin; } |