From 7241d48ec4d0a0a59325d7a9208bdf5e076ba002 Mon Sep 17 00:00:00 2001 From: Roland Reichwein Date: Sun, 2 Jan 2022 16:44:47 +0100 Subject: Use [[likely]] and [[unlikely]] --- include/unicode.h | 24 ++++++++++++++++++++++-- include/unicode/predicate.h | 4 ++++ include/unicode/utf.h | 27 +++++++++++++++++++++++---- 3 files changed, 49 insertions(+), 6 deletions(-) diff --git a/include/unicode.h b/include/unicode.h index 2bf17f4..34812df 100644 --- a/include/unicode.h +++ b/include/unicode.h @@ -144,7 +144,11 @@ namespace unicode { if (((uintptr_t)(void*)addr & arch_optimizer::addr_mask) == 0) { while (input_distance_bytes(begin, end) >= accu_size) { typename arch_optimizer::accu_type data{*addr}; - if ((data & arch_optimizer::ascii_mask) == arch_optimizer::ascii_value) { + if ((data & arch_optimizer::ascii_mask) == arch_optimizer::ascii_value) +#if __cplusplus >= 202002L + [[likely]] +#endif + { arch_optimizer::template append(reinterpret_cast(addr), result); begin += accu_size / sizeof(typename From::value_type); ++addr; @@ -214,7 +218,11 @@ namespace unicode { template, bool> = true> inline static void append_accu(std::basic_string& result, uint64_t& accu, int& bytes_in_accu) { - if (block_mode && bytes_in_accu == 8 && (accu & 0x8080808080808080) == 0) { + if (block_mode && bytes_in_accu == 8 && (accu & 0x8080808080808080) == 0) +#if __cplusplus >= 202002L + [[likely]] +#endif + { result.append({ static_cast(accu & 0x7F), static_cast((accu >> 8) & 0x7F), @@ -239,6 +247,9 @@ namespace unicode { if (is_valid_unicode<11>(value)) append_utf<11>(result, value); else +#if __cplusplus >= 202002L + [[unlikely]] +#endif throw std::invalid_argument("Invalid Unicode character in 2 byte UTF-8 sequence"); } else if ((block_mode || bytes_in_accu >= 3) && (accu & 0xC0C0F0) == 0x8080E0) { // 3 byte sequence char32_t value {static_cast(((accu & 0x0F) << 12) | ((accu >> 2) & 0x0FC0) | ((accu >> 16) & 0x3f))}; @@ -247,6 +258,9 @@ namespace unicode { if (is_valid_unicode<16>(value)) append_utf<16>(result, value); else +#if __cplusplus >= 202002L + [[unlikely]] +#endif throw std::invalid_argument("Invalid Unicode character in 3 byte UTF-8 sequence"); } else if ((block_mode || bytes_in_accu >= 4) && (accu & 0xC0C0C0F8) == 0x808080F0) { // 4 byte sequence char32_t value {static_cast(((accu & 0x07) << 18) | ((accu << 4) & 0x3f000) | ((accu >> 10) & 0xFC0) | ((accu >> 24) & 0x3f))}; @@ -255,8 +269,14 @@ namespace unicode { if (is_valid_unicode<21>(value)) append_utf(result, value); else +#if __cplusplus >= 202002L + [[unlikely]] +#endif throw std::invalid_argument("Invalid Unicode character in 4 byte UTF-8 sequence"); } else +#if __cplusplus >= 202002L + [[unlikely]] +#endif throw std::invalid_argument("Invalid UTF-8 byte sequence"); } diff --git a/include/unicode/predicate.h b/include/unicode/predicate.h index 5f8c6a4..e326529 100644 --- a/include/unicode/predicate.h +++ b/include/unicode/predicate.h @@ -1,3 +1,7 @@ +// +// Unicode library - predicates for Unicode characters +// + #pragma once namespace unicode { diff --git a/include/unicode/utf.h b/include/unicode/utf.h index 046d9c6..6d7c2f7 100644 --- a/include/unicode/utf.h +++ b/include/unicode/utf.h @@ -58,9 +58,11 @@ namespace unicode::detail { if ((((s[i] & 7) << 2) | ((s[i + 1] >> 4) & 3)) >= 0x11) return false; // Unicode too big above 0x10FFFF i += 4; - } else { + } else +#if __cplusplus >= 202002L + [[unlikely]] +#endif return false; - } } return true; } @@ -91,9 +93,11 @@ namespace unicode::detail { i++; } else if ((i < size - 1) && is_utf16_sequence(s[i], s[i + 1])) { i += 2; - } else { + } else +#if __cplusplus >= 202002L + [[unlikely]] +#endif return false; - } } return true; } @@ -185,6 +189,9 @@ namespace unicode::detail { if constexpr(sequence_length > 1) { if (remaining_code_units() < sequence_length) +#if __cplusplus >= 202002L + [[unlikely]] +#endif throw std::invalid_argument("Bad input: Not enough bytes left for decoding UTF-8 sequence"); } @@ -192,6 +199,9 @@ namespace unicode::detail { std::advance(iterator, sequence_length); internal_type result{decode_utf8_sequence(bytes...)}; if (!unicode::is_valid_unicode(result)) +#if __cplusplus >= 202002L + [[unlikely]] +#endif throw std::invalid_argument("Invalid Unicode character: "s + std::to_string(static_cast(result))); return result; } else { @@ -218,10 +228,16 @@ namespace unicode::detail { return unit0; } else { if (remaining_code_units() < 2) +#if __cplusplus >= 202002L + [[unlikely]] +#endif throw std::invalid_argument("Bad input: Continuation of first UTF-16 unit missing"); char16_t unit1 {static_cast(get_code_unit<1>())}; if ((unit0 & 0xFC00) != 0xD800 || (unit1 & 0xFC00) != 0xDC00) +#if __cplusplus >= 202002L + [[unlikely]] +#endif throw std::invalid_argument("Bad input: 2 malformed UTF-16 surrogates"); std::advance(iterator, 2); @@ -235,6 +251,9 @@ namespace unicode::detail { internal_type result {static_cast(get_code_unit<0>())}; if (!unicode::is_valid_unicode(result)) +#if __cplusplus >= 202002L + [[unlikely]] +#endif throw std::invalid_argument("Invalid Unicode character: "s + std::to_string(static_cast(result))); std::advance(iterator, 1); -- cgit v1.2.3