diff options
| author | Roland Reichwein <mail@reichwein.it> | 2022-01-05 20:44:48 +0100 | 
|---|---|---|
| committer | Roland Reichwein <mail@reichwein.it> | 2022-01-05 20:44:48 +0100 | 
| commit | 9caef0ea244504a189dfdd81c4db7792b193e14f (patch) | |
| tree | ccbafd19733a841c083f3d32f583ba10ef7db97b | |
| parent | a138fe998b04693ca350cbc9cd144a4116b4400f (diff) | |
Fix UTF-8 decoding: Guard against remaining bytes before iteration
| -rw-r--r-- | include/unicode/utf.h | 18 | 
1 files changed, 8 insertions, 10 deletions
| diff --git a/include/unicode/utf.h b/include/unicode/utf.h index 5db9cac..691d4ba 100644 --- a/include/unicode/utf.h +++ b/include/unicode/utf.h @@ -197,14 +197,6 @@ namespace unicode::detail {     size_t constexpr sequence_length{sizeof...(Tbytes)};     static_assert(sequence_length >= 1 && sequence_length <= 4); -   if constexpr(sequence_length > 1) { -    if (remaining_code_units() < sequence_length) -#if __cplusplus >= 202002L -     [[unlikely]] -#endif -     throw std::invalid_argument("Bad input: Not enough bytes left for decoding UTF-8 sequence"); -   } -     if (is_utf8_sequence(bytes...)) {      std::advance(iterator, sequence_length);      internal_type result{decode_utf8_sequence(bytes...)}; @@ -215,9 +207,15 @@ namespace unicode::detail {       throw std::invalid_argument("Invalid Unicode character: "s + std::to_string(static_cast<uint32_t>(result)));      return result;     } else { -    if constexpr(sequence_length <= 3) // template recursion break condition: UTF-8 has 1..4 code units +    if constexpr(sequence_length <= 3) { // template recursion break condition: UTF-8 has 1..4 code units +     if (remaining_code_units() < sequence_length + 1) +#if __cplusplus >= 202002L +      [[unlikely]] +#endif +      throw std::invalid_argument("Bad input: Not enough bytes left for decoding UTF-8 sequence"); +       return calculate_utf8_value(bytes..., static_cast<utf8_t>(get_code_unit<sequence_length>())); -    else +    } else       throw std::invalid_argument("Bad UTF-8 input: Invalid 4 byte sequence");     }    } | 
