summaryrefslogtreecommitdiffhomepage
path: root/include
diff options
context:
space:
mode:
authorRoland Reichwein <mail@reichwein.it>2022-01-05 20:44:48 +0100
committerRoland Reichwein <mail@reichwein.it>2022-01-05 20:44:48 +0100
commit9caef0ea244504a189dfdd81c4db7792b193e14f (patch)
treeccbafd19733a841c083f3d32f583ba10ef7db97b /include
parenta138fe998b04693ca350cbc9cd144a4116b4400f (diff)
Fix UTF-8 decoding: Guard against remaining bytes before iteration
Diffstat (limited to 'include')
-rw-r--r--include/unicode/utf.h18
1 files changed, 8 insertions, 10 deletions
diff --git a/include/unicode/utf.h b/include/unicode/utf.h
index 5db9cac..691d4ba 100644
--- a/include/unicode/utf.h
+++ b/include/unicode/utf.h
@@ -197,14 +197,6 @@ namespace unicode::detail {
size_t constexpr sequence_length{sizeof...(Tbytes)};
static_assert(sequence_length >= 1 && sequence_length <= 4);
- if constexpr(sequence_length > 1) {
- if (remaining_code_units() < sequence_length)
-#if __cplusplus >= 202002L
- [[unlikely]]
-#endif
- throw std::invalid_argument("Bad input: Not enough bytes left for decoding UTF-8 sequence");
- }
-
if (is_utf8_sequence(bytes...)) {
std::advance(iterator, sequence_length);
internal_type result{decode_utf8_sequence(bytes...)};
@@ -215,9 +207,15 @@ namespace unicode::detail {
throw std::invalid_argument("Invalid Unicode character: "s + std::to_string(static_cast<uint32_t>(result)));
return result;
} else {
- if constexpr(sequence_length <= 3) // template recursion break condition: UTF-8 has 1..4 code units
+ if constexpr(sequence_length <= 3) { // template recursion break condition: UTF-8 has 1..4 code units
+ if (remaining_code_units() < sequence_length + 1)
+#if __cplusplus >= 202002L
+ [[unlikely]]
+#endif
+ throw std::invalid_argument("Bad input: Not enough bytes left for decoding UTF-8 sequence");
+
return calculate_utf8_value(bytes..., static_cast<utf8_t>(get_code_unit<sequence_length>()));
- else
+ } else
throw std::invalid_argument("Bad UTF-8 input: Invalid 4 byte sequence");
}
}