diff options
author | Roland Reichwein <mail@reichwein.it> | 2022-01-05 20:44:48 +0100 |
---|---|---|
committer | Roland Reichwein <mail@reichwein.it> | 2022-01-05 20:44:48 +0100 |
commit | 9caef0ea244504a189dfdd81c4db7792b193e14f (patch) | |
tree | ccbafd19733a841c083f3d32f583ba10ef7db97b /include/unicode/utf.h | |
parent | a138fe998b04693ca350cbc9cd144a4116b4400f (diff) |
Fix UTF-8 decoding: Guard against remaining bytes before iteration
Diffstat (limited to 'include/unicode/utf.h')
-rw-r--r-- | include/unicode/utf.h | 18 |
1 files changed, 8 insertions, 10 deletions
diff --git a/include/unicode/utf.h b/include/unicode/utf.h index 5db9cac..691d4ba 100644 --- a/include/unicode/utf.h +++ b/include/unicode/utf.h @@ -197,14 +197,6 @@ namespace unicode::detail { size_t constexpr sequence_length{sizeof...(Tbytes)}; static_assert(sequence_length >= 1 && sequence_length <= 4); - if constexpr(sequence_length > 1) { - if (remaining_code_units() < sequence_length) -#if __cplusplus >= 202002L - [[unlikely]] -#endif - throw std::invalid_argument("Bad input: Not enough bytes left for decoding UTF-8 sequence"); - } - if (is_utf8_sequence(bytes...)) { std::advance(iterator, sequence_length); internal_type result{decode_utf8_sequence(bytes...)}; @@ -215,9 +207,15 @@ namespace unicode::detail { throw std::invalid_argument("Invalid Unicode character: "s + std::to_string(static_cast<uint32_t>(result))); return result; } else { - if constexpr(sequence_length <= 3) // template recursion break condition: UTF-8 has 1..4 code units + if constexpr(sequence_length <= 3) { // template recursion break condition: UTF-8 has 1..4 code units + if (remaining_code_units() < sequence_length + 1) +#if __cplusplus >= 202002L + [[unlikely]] +#endif + throw std::invalid_argument("Bad input: Not enough bytes left for decoding UTF-8 sequence"); + return calculate_utf8_value(bytes..., static_cast<utf8_t>(get_code_unit<sequence_length>())); - else + } else throw std::invalid_argument("Bad UTF-8 input: Invalid 4 byte sequence"); } } |