diff options
| -rw-r--r-- | include/unicode.h | 69 | 
1 files changed, 33 insertions, 36 deletions
| diff --git a/include/unicode.h b/include/unicode.h index 7965a6e..43dc44e 100644 --- a/include/unicode.h +++ b/include/unicode.h @@ -115,16 +115,10 @@ namespace unicode::detail {    template<class X = T, typename std::enable_if<(sizeof(X) == 1), bool>::type = true>    inline value_type calculate_value()    { -   size_t remaining{remaining_code_units()}; -    -   if (!remaining) -    return {}; - -   value_type value{}; -     utf8_t byte0 {static_cast<utf8_t>(get_code_unit<0>())};     if (byte0 & 0x80) { // 2-4 bytes -    if (remaining >= 2) { +    value_type value{}; +    if (size_t remaining{remaining_code_units()}; remaining >= 2) {       utf8_t byte1 {static_cast<utf8_t>(get_code_unit<1>())};       if (is_byte0_of<2>(byte0) && is_continuation_byte(byte1)) { // 2 bytes        value = value_byte0_of<2>(byte0) | continuation_value(byte1); @@ -152,29 +146,23 @@ namespace unicode::detail {      if (!unicode::is_valid_unicode(value))       throw std::invalid_argument("Invalid Unicode character: "s + std::to_string(static_cast<uint32_t>(value))); +    return value;     } else { // 1 byte: 7 bit ASCII -    value = byte0;      std::advance(iterator, 1); +    return byte0;     } - -   return value;    }    template<class X = T, typename std::enable_if<(sizeof(X) == 2), bool>::type = true>    inline value_type calculate_value()    { -   size_t remaining{remaining_code_units()}; -    -   if (!remaining) -    return {}; -     char16_t unit0 {static_cast<char16_t>(get_code_unit<0>())};     if (unit0 <= 0xD7FF || unit0 >= 0xE000) { // 1 unit (BMP Basic Multilingual Plane)      std::advance(iterator, 1);      return unit0;     } else { -    if (remaining < 2) +    if (remaining_code_units() < 2)       throw std::invalid_argument("Bad input: Continuation of first UTF-16 unit missing");      char16_t unit1 {static_cast<char16_t>(get_code_unit<1>())}; @@ -189,11 +177,6 @@ namespace unicode::detail {    template<class X = T, typename std::enable_if<(sizeof(X) == 4), bool>::type = true>    inline value_type calculate_value()    { -   size_t remaining{remaining_code_units()}; - -   if (!remaining) -    return {}; -     value_type result {static_cast<char32_t>(get_code_unit<0>())};     if (!unicode::is_valid_unicode(result)) @@ -284,23 +267,38 @@ namespace unicode::detail {      return trailing_byte<m - n - 1>(value);    } +  template<typename Arg> +  inline void append(Arg&& arg) +  { +   if constexpr (std::is_same<Container, typename std::basic_string<T>>::value) { +    s.append({arg}); +   } else { +    s.emplace_back(arg); +   } +  } + +  template<typename Arg, typename... Args> +  inline void append(Arg&& arg, Args&&... args) +  { +   if constexpr (std::is_same<Container, typename std::basic_string<T>>::value) { +    s.append({arg, args...}); +   } else { +    s.emplace_back(arg); +    append(args...); +   } +  } +    template<class X = T, typename std::enable_if<(sizeof(X) == 1), bool>::type = true>    inline void append_utf(const char32_t& value)    {     if (value < 0x80) { // 1 byte -    s.push_back(static_cast<value_type>(value)); +    append(static_cast<value_type>(value));     } else if (value < 0x800) { // 2 bytes -    s.push_back(byte_n_of_m<0,2>(value)); -    s.push_back(byte_n_of_m<1,2>(value)); +    append(byte_n_of_m<0,2>(value), byte_n_of_m<1,2>(value));     } else if (value < 0x10000) { // 3 bytes -    s.push_back(byte_n_of_m<0,3>(value)); -    s.push_back(byte_n_of_m<1,3>(value)); -    s.push_back(byte_n_of_m<2,3>(value)); +    append(byte_n_of_m<0,3>(value), byte_n_of_m<1,3>(value), byte_n_of_m<2,3>(value));     } else if (value < 0x110000) { // 4 bytes -    s.push_back(byte_n_of_m<0,4>(value)); -    s.push_back(byte_n_of_m<1,4>(value)); -    s.push_back(byte_n_of_m<2,4>(value)); -    s.push_back(byte_n_of_m<3,4>(value)); +    append(byte_n_of_m<0,4>(value), byte_n_of_m<1,4>(value), byte_n_of_m<2,4>(value), byte_n_of_m<3,4>(value));     } else      throw std::runtime_error("Invalid internal Unicode value: "s + std::to_string(static_cast<uint32_t>(value)));    } @@ -309,11 +307,10 @@ namespace unicode::detail {    inline void append_utf(const char32_t& value)    {     if (value <= 0xFFFF) { // expect value to be already valid Unicode values (checked in input iterator) -    s.push_back(static_cast<value_type>(value)); +    append(static_cast<value_type>(value));     } else {      char32_t value_reduced{value - 0x10000}; -    s.push_back((value_reduced >> 10) + 0xD800); -    s.push_back((value_reduced & 0x3FF) + 0xDC00); +    append(static_cast<T>((value_reduced >> 10) + 0xD800), static_cast<T>((value_reduced & 0x3FF) + 0xDC00));     }    } @@ -321,7 +318,7 @@ namespace unicode::detail {    inline void append_utf(const char32_t& value)    {     // expect value to be already valid Unicode values (checked in input iterator) -   s.push_back(value); +   append(static_cast<value_type>(value));    }    reference operator=(const char32_t& value) | 
