diff options
author | Roland Reichwein <mail@reichwein.it> | 2022-01-05 20:43:41 +0100 |
---|---|---|
committer | Roland Reichwein <mail@reichwein.it> | 2022-01-05 20:43:41 +0100 |
commit | a138fe998b04693ca350cbc9cd144a4116b4400f (patch) | |
tree | ae6e4eb81d3b1a86cee47970bd1e0fcc1668b8d0 /include | |
parent | d75cefda8a5ea08976d6bb512150d7c6891ac73e (diff) |
Simplify UTF-8 decoding: 2 byte sequences always contain valid Unicode values
Diffstat (limited to 'include')
-rw-r--r-- | include/unicode/optimization.h | 8 |
1 files changed, 1 insertions, 7 deletions
diff --git a/include/unicode/optimization.h b/include/unicode/optimization.h index d7b054d..412c8ab 100644 --- a/include/unicode/optimization.h +++ b/include/unicode/optimization.h @@ -248,13 +248,7 @@ namespace unicode { char32_t value {static_cast<char32_t>(((accu & 0x1F) << 6) | ((accu >> 8) & 0x3f))}; accu >>= 16; bytes_in_accu -= 2; - if (is_valid_unicode<11>(value)) - append_utf<11>(result, value); - else -#if __cplusplus >= 202002L - [[unlikely]] -#endif - throw std::invalid_argument("Invalid Unicode character in 2 byte UTF-8 sequence"); + append_utf<11>(result, value); // 11 bit Unicode values are always valid Unicode } else if ((block_mode || bytes_in_accu >= 3) && (accu & 0xC0C0F0) == 0x8080E0) { // 3 byte sequence char32_t value {static_cast<char32_t>(((accu & 0x0F) << 12) | ((accu >> 2) & 0x0FC0) | ((accu >> 16) & 0x3f))}; accu >>= 24; |