diff options
author | Roland Reichwein <mail@reichwein.it> | 2021-12-26 16:58:04 +0100 |
---|---|---|
committer | Roland Reichwein <mail@reichwein.it> | 2021-12-26 16:58:04 +0100 |
commit | e0e5623b46fdaa0988faa76af506d5bc1035ee42 (patch) | |
tree | 29255219e493122a3f0873d76b57338200b7f171 /include | |
parent | 23c31aef916dbfd17774a80258676a963426a698 (diff) |
Optimization fixes
Diffstat (limited to 'include')
-rw-r--r-- | include/unicode.h | 29 |
1 files changed, 22 insertions, 7 deletions
diff --git a/include/unicode.h b/include/unicode.h index 3d6477c..395f172 100644 --- a/include/unicode.h +++ b/include/unicode.h @@ -31,6 +31,13 @@ namespace unicode { template<typename T> static inline bool is_valid_unicode(const T& value) noexcept { + if constexpr(sizeof(T) == 1) + return true; + else if constexpr(sizeof(T) == 2) + //return value <= 0xD7FF || value >= 0xE000; + return (value & 0xF800) != 0xD800; + else + //return (value & 0xFFFFF800) != 0x0000D800 && (value >> 16) <= 0x10; return value <= 0xD7FF || (value >= 0xE000 && value <= 0x10FFFF); } @@ -161,7 +168,7 @@ namespace unicode::detail { { char16_t unit0 {static_cast<char16_t>(get_code_unit<0>())}; - if (unit0 <= 0xD7FF || unit0 >= 0xE000) { // 1 unit (BMP Basic Multilingual Plane) + if (is_valid_unicode(unit0)) { // 1 unit (BMP Basic Multilingual Plane) std::advance(iterator, 1); return unit0; } else { @@ -547,10 +554,16 @@ namespace unicode { // std::distance doesn't work here: it is based on "output" distance of iterators template<class Iterator> - size_t input_distance(const Iterator& it1, const Iterator& it2) + inline size_t input_distance(const Iterator& it1, const Iterator& it2) { return it2 - it1; } + + template<class Iterator> + inline size_t input_distance_bytes(const Iterator& it1, const Iterator& it2) + { + return input_distance(it1, it2) * sizeof(typename Iterator::value_type); + } // Optimizations following: static const size_t accu_size {sizeof(size_t)}; @@ -637,7 +650,8 @@ namespace unicode { static_cast<typename output_string_type::value_type>(addr[1])}); } } - }; + + }; // class ArchitectureOptimizer // From and To are facets template<typename From, typename To, std::enable_if_t<std::is_empty<From>::value, bool> = true> @@ -653,16 +667,17 @@ namespace unicode { auto end{From::end(s)}; auto back_inserter{To::back_inserter(result)}; auto addr{reinterpret_cast<const typename arch_optimizer::accu_type*>(&s.data()[s.size() - input_distance(begin, end)])}; - while (input_distance(begin, end) >= accu_size) { + while (input_distance_bytes(begin, end) >= accu_size) { if (((uintptr_t)(void*)addr & arch_optimizer::addr_mask) == 0) { - while (input_distance(begin, end) >= accu_size) { + while (input_distance_bytes(begin, end) >= accu_size) { typename arch_optimizer::accu_type data{*addr}; if ((data & arch_optimizer::ascii_mask) == arch_optimizer::ascii_value) { - arch_optimizer::template append<typename From::value_type, typename To::string_type>(reinterpret_cast<const typename From::value_type*>(addr), result); + arch_optimizer::template append(reinterpret_cast<const typename From::value_type*>(addr), result); begin += accu_size / sizeof(typename From::value_type); ++addr; } else { - // just advance one code unit for now + // just advance one code unit for now and break to trigger unoptimized + // version until next accu boundary back_inserter = *begin; ++begin; break; |