diff options
author | Roland Reichwein <mail@reichwein.it> | 2021-12-28 19:56:30 +0100 |
---|---|---|
committer | Roland Reichwein <mail@reichwein.it> | 2021-12-28 19:56:30 +0100 |
commit | a5dc41291537d9518fbbd795d118a3b4bcb9764e (patch) | |
tree | 1ad98b5c9b6f7e750dbf9c624469f0b00ac0c8a4 /include | |
parent | 2b27deb54fec75ed529776f30be8eeb4ea239257 (diff) |
Use fold expressions to simplify code
Diffstat (limited to 'include')
-rw-r--r-- | include/unicode.h | 90 |
1 files changed, 35 insertions, 55 deletions
diff --git a/include/unicode.h b/include/unicode.h index 2d7bf71..8ac9f55 100644 --- a/include/unicode.h +++ b/include/unicode.h @@ -27,13 +27,14 @@ typedef char iso_t; namespace unicode { - // usually, char32_t, uint32_t etc. - template<typename T> + // bits_to_compare: limit bits to consider even further than defined by T + // T: usually, char32_t, uint32_t etc. + template<size_t bits_to_compare = 32, typename T> static inline bool is_valid_unicode(const T& value) noexcept { - if constexpr(sizeof(T) == 1) + if constexpr(sizeof(T) == 1 || bits_to_compare <= 15) return true; - else if constexpr(sizeof(T) == 2) + else if constexpr(sizeof(T) == 2 || bits_to_compare <= 20) //return value <= 0xD7FF || value >= 0xE000; return (value & 0xF800) != 0xD800; else @@ -213,45 +214,35 @@ namespace unicode::detail { } } - template<class X = value_type, typename std::enable_if_t<(sizeof(X) == 1), bool> = true> - inline internal_type calculate_value() + template<typename... Tbytes> + inline internal_type calculate_utf8_value(Tbytes... bytes) { - utf8_t byte0 {static_cast<utf8_t>(get_code_unit<0>())}; - if (is_utf8_sequence(byte0)) { // 1 byte: 7 bit ASCII - std::advance(iterator, 1); - return decode_utf8_sequence(byte0); - } else { - internal_type value{}; - if (size_t remaining{remaining_code_units()}; remaining >= 2) { - utf8_t byte1 {static_cast<utf8_t>(get_code_unit<1>())}; - if (is_utf8_sequence(byte0, byte1)) { // 2 bytes - value = decode_utf8_sequence(byte0, byte1); - std::advance(iterator, 2); - } else if (remaining >= 3) { - utf8_t byte2 {static_cast<utf8_t>(get_code_unit<2>())}; - if (is_utf8_sequence(byte0, byte1, byte2)) { // 3 bytes - value = decode_utf8_sequence(byte0, byte1, byte2); - std::advance(iterator, 3); - } else if (remaining >= 4) { - utf8_t byte3 {static_cast<utf8_t>(get_code_unit<3>())}; - if (is_utf8_sequence(byte0, byte1, byte2, byte3)) { // 4 bytes - value = decode_utf8_sequence(byte0, byte1, byte2, byte3); - std::advance(iterator, 4); - } else - throw std::invalid_argument("Bad input: Invalid 4 byte sequence"); - } else - throw std::invalid_argument("Bad UTF-8 input: 4th byte expected, none found (end of sequence)"); - } else - throw std::invalid_argument("Bad UTF-8 input: 3rd byte expected, none found (end of sequence)"); - } else - throw std::invalid_argument("Bad UTF-8 input: 2nd byte expected, none found (end of sequence)"); - - // check only for sequences >= 2 bytes (ASCII is always compliant) - if (!unicode::is_valid_unicode(value)) - throw std::invalid_argument("Invalid Unicode character: "s + std::to_string(static_cast<uint32_t>(value))); + size_t constexpr sequence_length{sizeof...(Tbytes)}; + static_assert(sequence_length >= 1 && sequence_length <= 4); - return value; + if constexpr(sequence_length > 1) { + if (remaining_code_units() < sequence_length) + throw std::invalid_argument("Bad input: Not enough bytes left for decoding UTF-8 sequence"); } + + if (is_utf8_sequence(bytes...)) { + std::advance(iterator, sequence_length); + internal_type result{decode_utf8_sequence(bytes...)}; + if (!unicode::is_valid_unicode<sequence_length * 6>(result)) + throw std::invalid_argument("Invalid Unicode character: "s + std::to_string(static_cast<uint32_t>(result))); + return result; + } else { + if constexpr(sequence_length <= 3) // template recursion break condition: UTF-8 has 1..4 code units + return calculate_utf8_value(bytes..., static_cast<utf8_t>(get_code_unit<sequence_length>())); + else + throw std::invalid_argument("Bad UTF-8 input: Invalid 4 byte sequence"); + } + } + + template<class X = value_type, typename std::enable_if_t<(sizeof(X) == 1), bool> = true> + inline internal_type calculate_value() + { + return calculate_utf8_value(static_cast<utf8_t>(get_code_unit<0>())); } template<class X = value_type, typename std::enable_if_t<(sizeof(X) == 2), bool> = true> @@ -380,24 +371,13 @@ namespace unicode::detail { return trailing_byte<m - n - 1>(value); } - template<typename Arg> - inline void append(Arg&& arg) - { - if constexpr (std::is_same<Container, typename std::basic_string<value_type>>::value) { - s.append({arg}); - } else { - s.emplace_back(arg); - } - } - - template<typename Arg, typename... Args> - inline void append(Arg&& arg, Args&&... args) + template<typename... Args> + inline void append(Args&&... args) { if constexpr (std::is_same<Container, typename std::basic_string<value_type>>::value) { - s.append({arg, args...}); + s.append({args...}); } else { - s.emplace_back(arg); - append(args...); + (s.emplace_back(args), ...); } } |