diff options
Diffstat (limited to 'include/unicode')
-rw-r--r-- | include/unicode/predicate.h | 4 | ||||
-rw-r--r-- | include/unicode/utf.h | 27 |
2 files changed, 27 insertions, 4 deletions
diff --git a/include/unicode/predicate.h b/include/unicode/predicate.h index 5f8c6a4..e326529 100644 --- a/include/unicode/predicate.h +++ b/include/unicode/predicate.h @@ -1,3 +1,7 @@ +// +// Unicode library - predicates for Unicode characters +// + #pragma once namespace unicode { diff --git a/include/unicode/utf.h b/include/unicode/utf.h index 046d9c6..6d7c2f7 100644 --- a/include/unicode/utf.h +++ b/include/unicode/utf.h @@ -58,9 +58,11 @@ namespace unicode::detail { if ((((s[i] & 7) << 2) | ((s[i + 1] >> 4) & 3)) >= 0x11) return false; // Unicode too big above 0x10FFFF i += 4; - } else { + } else +#if __cplusplus >= 202002L + [[unlikely]] +#endif return false; - } } return true; } @@ -91,9 +93,11 @@ namespace unicode::detail { i++; } else if ((i < size - 1) && is_utf16_sequence(s[i], s[i + 1])) { i += 2; - } else { + } else +#if __cplusplus >= 202002L + [[unlikely]] +#endif return false; - } } return true; } @@ -185,6 +189,9 @@ namespace unicode::detail { if constexpr(sequence_length > 1) { if (remaining_code_units() < sequence_length) +#if __cplusplus >= 202002L + [[unlikely]] +#endif throw std::invalid_argument("Bad input: Not enough bytes left for decoding UTF-8 sequence"); } @@ -192,6 +199,9 @@ namespace unicode::detail { std::advance(iterator, sequence_length); internal_type result{decode_utf8_sequence(bytes...)}; if (!unicode::is_valid_unicode<sequence_length * 6>(result)) +#if __cplusplus >= 202002L + [[unlikely]] +#endif throw std::invalid_argument("Invalid Unicode character: "s + std::to_string(static_cast<uint32_t>(result))); return result; } else { @@ -218,10 +228,16 @@ namespace unicode::detail { return unit0; } else { if (remaining_code_units() < 2) +#if __cplusplus >= 202002L + [[unlikely]] +#endif throw std::invalid_argument("Bad input: Continuation of first UTF-16 unit missing"); char16_t unit1 {static_cast<char16_t>(get_code_unit<1>())}; if ((unit0 & 0xFC00) != 0xD800 || (unit1 & 0xFC00) != 0xDC00) +#if __cplusplus >= 202002L + [[unlikely]] +#endif throw std::invalid_argument("Bad input: 2 malformed UTF-16 surrogates"); std::advance(iterator, 2); @@ -235,6 +251,9 @@ namespace unicode::detail { internal_type result {static_cast<internal_type>(get_code_unit<0>())}; if (!unicode::is_valid_unicode(result)) +#if __cplusplus >= 202002L + [[unlikely]] +#endif throw std::invalid_argument("Invalid Unicode character: "s + std::to_string(static_cast<uint32_t>(result))); std::advance(iterator, 1); |