diff options
Diffstat (limited to 'include')
| -rw-r--r-- | include/unicode.h | 49 | 
1 files changed, 41 insertions, 8 deletions
| diff --git a/include/unicode.h b/include/unicode.h index 4b676bf..296ba1d 100644 --- a/include/unicode.h +++ b/include/unicode.h @@ -3,6 +3,7 @@  #pragma once  #include <algorithm> +#include <iterator>  #include <memory>  #include <stdexcept>  #include <string> @@ -33,8 +34,12 @@ namespace unicode::detail {   template<typename T>   struct utf_iterator   { -  typedef T value_type; +  typedef T input_type; +  typedef char32_t value_type;    typedef char32_t& reference; +  typedef char32_t* pointer; +  typedef size_t difference_type; +  typedef std::input_iterator_tag iterator_category;    typedef std::basic_string<T> string_type;    utf_iterator(const typename string_type::const_iterator& cbegin, const typename string_type::const_iterator& cend): @@ -217,9 +222,20 @@ namespace unicode::detail {    typedef T value_type;    typedef std::basic_string<T> string_type;    typedef utf_back_insert_iterator& reference; +  typedef utf_back_insert_iterator* pointer; +  typedef size_t difference_type; +  typedef std::output_iterator_tag iterator_category;    utf_back_insert_iterator(string_type& s): s(s) {} +  utf_back_insert_iterator<T>& operator=(const utf_back_insert_iterator<T>& other) +  { +   if (std::addressof(other.s) != std::addressof(s)) +    throw std::runtime_error("utf_back_insert_iterator assignment operator actually called! Iterator should not be assigned to."); + +   return *this; +  } +    // no-op    reference operator++()    { @@ -273,7 +289,7 @@ namespace unicode::detail {    reference operator=<utf8_t>(const char32_t& value)    {     if (value < 0x80) { // 1 byte -    s.push_back(value); +    s.push_back(static_cast<value_type>(value));     } else if (value < 0x800) { // 2 bytes      s.push_back(byte_n_of_m<0,2>(value));      s.push_back(byte_n_of_m<1,2>(value)); @@ -297,7 +313,7 @@ namespace unicode::detail {    reference operator=<char16_t>(const char32_t& value)    {     if (value <= 0xFFFF) { // expect value to be already valid Unicode values -    s.push_back(value); +    s.push_back(static_cast<value_type>(value));     } else {      char32_t value_reduced{value - 0x10000};      s.push_back((value_reduced >> 10) + 0xD800); @@ -349,8 +365,12 @@ namespace unicode {   template<unicode::detail::iso_map_type& Map=iso_8859_1_map>   struct iso_iterator { +  typedef utf8_t input_type;    typedef char32_t value_type;    typedef char32_t& reference; +  typedef char32_t* pointer; +  typedef size_t difference_type; +  typedef std::input_iterator_tag iterator_category;    typedef std::basic_string<utf8_t>::const_iterator iterator;    iso_iterator(const iterator& it): m_it(it) {} @@ -388,10 +408,22 @@ namespace unicode {   template<unicode::detail::iso_map_type_reverse& Map=iso_8859_1_map_reverse>   struct iso_back_insert_iterator {    typedef iso_back_insert_iterator& reference; +  typedef iso_back_insert_iterator* pointer; +  typedef size_t difference_type; +  typedef utf8_t value_type; +  typedef std::output_iterator_tag iterator_category;    typedef std::basic_string<utf8_t> string_type;    iso_back_insert_iterator(string_type& s): s(s) {} +  iso_back_insert_iterator& operator=(const iso_back_insert_iterator& other) +  { +   if (std::addressof(other.s) != std::addressof(s)) +    throw std::runtime_error("iso_back_insert_iterator assignment operator actually called! Iterator should not be assigned to."); + +   return *this; +  } +    // no-op    reference operator++()    { @@ -452,14 +484,15 @@ namespace unicode {   template<typename InputIt, typename OutputIt>   struct UTF   { -  typedef typename InputIt::value_type value_type; // OutputIt::value_type is the same +  typedef typename InputIt::input_type input_type; +  typedef typename OutputIt::value_type value_type; -  static InputIt begin(const std::basic_string<value_type>& s) +  static InputIt begin(const std::basic_string<input_type>& s)    {     return InputIt{s.cbegin(), s.cend()};    } -  static InputIt end(const std::basic_string<value_type>& s) +  static InputIt end(const std::basic_string<input_type>& s)    {     return InputIt{s.cend(), s.cend()};    } @@ -528,8 +561,8 @@ namespace unicode {   bool is_valid_utf(const std::basic_string<T>& s)   {    try { -   std::for_each(Encoding<T>::Facet::begin(s), Encoding<T>::Facet::end(s), [](const T& c){}); -  } catch(...) { +   std::for_each(Encoding<T>::Facet::begin(s), Encoding<T>::Facet::end(s), [](const char32_t& c){}); +  } catch (const std::invalid_argument&) {     return false;    }    return true; | 
