diff options
author | Roland Reichwein <mail@reichwein.it> | 2021-02-03 13:18:25 +0100 |
---|---|---|
committer | Roland Reichwein <mail@reichwein.it> | 2021-02-03 13:18:25 +0100 |
commit | 5572e23e8e2109abd73b916f4f0d278e1aa21f34 (patch) | |
tree | a31b61aa9381bef63be637b203ae6f030f33027d /include/unicode.h | |
parent | 9dfd49e5def7357c59f8bc676981f5466bdb2d2c (diff) |
Add msbuild files
Diffstat (limited to 'include/unicode.h')
-rw-r--r-- | include/unicode.h | 49 |
1 files changed, 41 insertions, 8 deletions
diff --git a/include/unicode.h b/include/unicode.h index 4b676bf..296ba1d 100644 --- a/include/unicode.h +++ b/include/unicode.h @@ -3,6 +3,7 @@ #pragma once #include <algorithm> +#include <iterator> #include <memory> #include <stdexcept> #include <string> @@ -33,8 +34,12 @@ namespace unicode::detail { template<typename T> struct utf_iterator { - typedef T value_type; + typedef T input_type; + typedef char32_t value_type; typedef char32_t& reference; + typedef char32_t* pointer; + typedef size_t difference_type; + typedef std::input_iterator_tag iterator_category; typedef std::basic_string<T> string_type; utf_iterator(const typename string_type::const_iterator& cbegin, const typename string_type::const_iterator& cend): @@ -217,9 +222,20 @@ namespace unicode::detail { typedef T value_type; typedef std::basic_string<T> string_type; typedef utf_back_insert_iterator& reference; + typedef utf_back_insert_iterator* pointer; + typedef size_t difference_type; + typedef std::output_iterator_tag iterator_category; utf_back_insert_iterator(string_type& s): s(s) {} + utf_back_insert_iterator<T>& operator=(const utf_back_insert_iterator<T>& other) + { + if (std::addressof(other.s) != std::addressof(s)) + throw std::runtime_error("utf_back_insert_iterator assignment operator actually called! Iterator should not be assigned to."); + + return *this; + } + // no-op reference operator++() { @@ -273,7 +289,7 @@ namespace unicode::detail { reference operator=<utf8_t>(const char32_t& value) { if (value < 0x80) { // 1 byte - s.push_back(value); + s.push_back(static_cast<value_type>(value)); } else if (value < 0x800) { // 2 bytes s.push_back(byte_n_of_m<0,2>(value)); s.push_back(byte_n_of_m<1,2>(value)); @@ -297,7 +313,7 @@ namespace unicode::detail { reference operator=<char16_t>(const char32_t& value) { if (value <= 0xFFFF) { // expect value to be already valid Unicode values - s.push_back(value); + s.push_back(static_cast<value_type>(value)); } else { char32_t value_reduced{value - 0x10000}; s.push_back((value_reduced >> 10) + 0xD800); @@ -349,8 +365,12 @@ namespace unicode { template<unicode::detail::iso_map_type& Map=iso_8859_1_map> struct iso_iterator { + typedef utf8_t input_type; typedef char32_t value_type; typedef char32_t& reference; + typedef char32_t* pointer; + typedef size_t difference_type; + typedef std::input_iterator_tag iterator_category; typedef std::basic_string<utf8_t>::const_iterator iterator; iso_iterator(const iterator& it): m_it(it) {} @@ -388,10 +408,22 @@ namespace unicode { template<unicode::detail::iso_map_type_reverse& Map=iso_8859_1_map_reverse> struct iso_back_insert_iterator { typedef iso_back_insert_iterator& reference; + typedef iso_back_insert_iterator* pointer; + typedef size_t difference_type; + typedef utf8_t value_type; + typedef std::output_iterator_tag iterator_category; typedef std::basic_string<utf8_t> string_type; iso_back_insert_iterator(string_type& s): s(s) {} + iso_back_insert_iterator& operator=(const iso_back_insert_iterator& other) + { + if (std::addressof(other.s) != std::addressof(s)) + throw std::runtime_error("iso_back_insert_iterator assignment operator actually called! Iterator should not be assigned to."); + + return *this; + } + // no-op reference operator++() { @@ -452,14 +484,15 @@ namespace unicode { template<typename InputIt, typename OutputIt> struct UTF { - typedef typename InputIt::value_type value_type; // OutputIt::value_type is the same + typedef typename InputIt::input_type input_type; + typedef typename OutputIt::value_type value_type; - static InputIt begin(const std::basic_string<value_type>& s) + static InputIt begin(const std::basic_string<input_type>& s) { return InputIt{s.cbegin(), s.cend()}; } - static InputIt end(const std::basic_string<value_type>& s) + static InputIt end(const std::basic_string<input_type>& s) { return InputIt{s.cend(), s.cend()}; } @@ -528,8 +561,8 @@ namespace unicode { bool is_valid_utf(const std::basic_string<T>& s) { try { - std::for_each(Encoding<T>::Facet::begin(s), Encoding<T>::Facet::end(s), [](const T& c){}); - } catch(...) { + std::for_each(Encoding<T>::Facet::begin(s), Encoding<T>::Facet::end(s), [](const char32_t& c){}); + } catch (const std::invalid_argument&) { return false; } return true; |