diff options
Diffstat (limited to 'include/unicode.h')
-rw-r--r-- | include/unicode.h | 30 |
1 files changed, 15 insertions, 15 deletions
diff --git a/include/unicode.h b/include/unicode.h index a50f525..eb872ec 100644 --- a/include/unicode.h +++ b/include/unicode.h @@ -77,15 +77,15 @@ namespace unicode { { if constexpr(sizeof(input_value_type) == sizeof(typename output_string_type::value_type)) { s.append(reinterpret_cast<const typename output_string_type::value_type*>(addr), accu_size / sizeof(input_value_type)); - } else if constexpr(sizeof(input_value_type) == 1) { + } else if constexpr(is_utf_8_v<input_value_type>) { s.append({static_cast<typename output_string_type::value_type>(addr[0]), static_cast<typename output_string_type::value_type>(addr[1]), static_cast<typename output_string_type::value_type>(addr[2]), static_cast<typename output_string_type::value_type>(addr[3])}); - } else if constexpr(sizeof(input_value_type) == 2) { + } else if constexpr(is_utf_16_v<input_value_type>) { s.append({static_cast<typename output_string_type::value_type>(addr[0]), static_cast<typename output_string_type::value_type>(addr[1])}); - } else if constexpr(sizeof(input_value_type) == 4) { + } else if constexpr(is_utf_32_v<input_value_type>) { s.append({static_cast<typename output_string_type::value_type>(addr[0])}); } } @@ -105,7 +105,7 @@ namespace unicode { { if constexpr(sizeof(input_value_type) == sizeof(typename output_string_type::value_type)) { s.append(reinterpret_cast<const typename output_string_type::value_type*>(addr), accu_size / sizeof(input_value_type)); - } else if constexpr(sizeof(input_value_type) == 1) { + } else if constexpr(is_utf_8_v<input_value_type>) { s.append({static_cast<typename output_string_type::value_type>(addr[0]), static_cast<typename output_string_type::value_type>(addr[1]), static_cast<typename output_string_type::value_type>(addr[2]), @@ -114,12 +114,12 @@ namespace unicode { static_cast<typename output_string_type::value_type>(addr[5]), static_cast<typename output_string_type::value_type>(addr[6]), static_cast<typename output_string_type::value_type>(addr[7])}); - } else if constexpr(sizeof(input_value_type) == 2) { + } else if constexpr(is_utf_16_v<input_value_type>) { s.append({static_cast<typename output_string_type::value_type>(addr[0]), static_cast<typename output_string_type::value_type>(addr[1]), static_cast<typename output_string_type::value_type>(addr[2]), static_cast<typename output_string_type::value_type>(addr[3])}); - } else if constexpr(sizeof(input_value_type) == 4) { + } else if constexpr(is_utf_32_v<input_value_type>) { s.append({static_cast<typename output_string_type::value_type>(addr[0]), static_cast<typename output_string_type::value_type>(addr[1])}); } @@ -174,7 +174,7 @@ namespace unicode { return result; } - template<size_t bits_to_compare = 32, typename To, typename std::enable_if_t<(sizeof(To) == 1), bool> = true> + template<size_t bits_to_compare = 32, typename To, typename std::enable_if_t<is_utf_8_v<To>, bool> = true> inline void append_utf(std::basic_string<To>& result, const char32_t& value) { using From = char32_t; @@ -190,7 +190,7 @@ namespace unicode { } } - template<size_t bits_to_compare = 32, typename To, typename std::enable_if_t<(sizeof(To) == 2), bool> = true> + template<size_t bits_to_compare = 32, typename To, typename std::enable_if_t<is_utf_16_v<To>, bool> = true> inline void append_utf(std::basic_string<To>& result, const char32_t& value) { if (bits_to_compare <= 16 || value <= 0xFFFF) { // expect value to be already valid Unicode values @@ -201,7 +201,7 @@ namespace unicode { } } - template<size_t bits_to_compare = 32, typename To, typename std::enable_if_t<(sizeof(To) == 4), bool> = true> + template<size_t bits_to_compare = 32, typename To, typename std::enable_if_t<is_utf_32_v<To>, bool> = true> inline void append_utf(std::basic_string<To>& result, const char32_t& value) { // expect value to be already valid Unicode values (checked in input iterator) @@ -211,7 +211,7 @@ namespace unicode { // Little Endian optimized version for UTF-8 // In block_mode, at least 4 bytes are in accu. On first call, even 8. // otherwise, at least one code unit is in accu - template<typename From, typename To, bool block_mode = true, typename std::enable_if_t<(sizeof(From) == 1), bool> = true> + template<typename From, typename To, bool block_mode = true, typename std::enable_if_t<is_utf_8_v<From>, bool> = true> inline static void append_accu(std::basic_string<To>& result, uint64_t& accu, int& bytes_in_accu) { #if 1 @@ -265,7 +265,7 @@ namespace unicode { // Little Endian optimized version for UTF-16 // In block_mode, at least 4 bytes are in accu. On first call, even 8. // otherwise, at least one code unit is in accu - template<typename From, typename To, bool block_mode = true, typename std::enable_if_t<(sizeof(From) == 2), bool> = true> + template<typename From, typename To, bool block_mode = true, typename std::enable_if_t<is_utf_16_v<From>, bool> = true> inline static void append_accu(std::basic_string<To>& result, uint64_t& accu, int& bytes_in_accu) { #if 1 @@ -282,7 +282,7 @@ namespace unicode { if ((accu & 0xFC00FC00FC00FC00) == 0xDC00D800DC00D800) { // found 4 code units forming 3 code points in UTF-16; // by definition of UTF-16, we have valid unicode values at this point - if constexpr(sizeof(To) == 4) { + if constexpr(is_utf_32_v<To>) { //result.resize(result.size() + 2); //*reinterpret_cast<uint64_t*>(&result[result.size() - 2]) = (((accu & 0x03FF000003FF) << 10) | ((accu >> 16) & 0x03FF000003FF)) + 0x0001000000010000; result.append({ @@ -316,7 +316,7 @@ namespace unicode { typename To::string_type convert_optimized_utf(const typename From::string_type& s) { typename To::string_type result; - if constexpr(sizeof(typename From::value_type) == 4) { + if constexpr(is_utf_32_v<typename From::value_type>) { for (const auto value: s) { if (is_valid_unicode(value)) append_utf(result, value); @@ -324,7 +324,7 @@ namespace unicode { throw std::invalid_argument("Invalid Unicode character in UTF-32"); } #if 0 - } else if constexpr(sizeof(typename From::value_type) == 2) { + } else if constexpr(is_utf_16_v<typename From::value_type>) { for (int i = 0; i < s.size(); i++) { typename From::value_type unit0{s[i]}; if (is_valid_unicode(unit0)) { @@ -388,7 +388,7 @@ namespace unicode { } else { throw std::invalid_argument("Invalid UTF input"); } - } else if constexpr(accu_size == 8 && is_little_endian() && sizeof(typename From::value_type) == 1 && + } else if constexpr(accu_size == 8 && is_little_endian() && is_utf_8_v<typename From::value_type> && is_utf_encoding_v<From> && is_utf_encoding_v<To>) { // endian specific optimization return convert_optimized_utf<From, To>(s); } else if constexpr(accu_size == 4 || accu_size == 8) { // accu size specific optimization with speedup for 7bit input |