From a138fe998b04693ca350cbc9cd144a4116b4400f Mon Sep 17 00:00:00 2001 From: Roland Reichwein Date: Wed, 5 Jan 2022 20:43:41 +0100 Subject: Simplify UTF-8 decoding: 2 byte sequences always contain valid Unicode values --- include/unicode/optimization.h | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/include/unicode/optimization.h b/include/unicode/optimization.h index d7b054d..412c8ab 100644 --- a/include/unicode/optimization.h +++ b/include/unicode/optimization.h @@ -248,13 +248,7 @@ namespace unicode { char32_t value {static_cast(((accu & 0x1F) << 6) | ((accu >> 8) & 0x3f))}; accu >>= 16; bytes_in_accu -= 2; - if (is_valid_unicode<11>(value)) - append_utf<11>(result, value); - else -#if __cplusplus >= 202002L - [[unlikely]] -#endif - throw std::invalid_argument("Invalid Unicode character in 2 byte UTF-8 sequence"); + append_utf<11>(result, value); // 11 bit Unicode values are always valid Unicode } else if ((block_mode || bytes_in_accu >= 3) && (accu & 0xC0C0F0) == 0x8080E0) { // 3 byte sequence char32_t value {static_cast(((accu & 0x0F) << 12) | ((accu >> 2) & 0x0FC0) | ((accu >> 16) & 0x3f))}; accu >>= 24; -- cgit v1.2.3