summaryrefslogtreecommitdiffhomepage
path: root/include/unicode.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/unicode.h')
-rw-r--r--include/unicode.h49
1 files changed, 41 insertions, 8 deletions
diff --git a/include/unicode.h b/include/unicode.h
index 4b676bf..296ba1d 100644
--- a/include/unicode.h
+++ b/include/unicode.h
@@ -3,6 +3,7 @@
#pragma once
#include <algorithm>
+#include <iterator>
#include <memory>
#include <stdexcept>
#include <string>
@@ -33,8 +34,12 @@ namespace unicode::detail {
template<typename T>
struct utf_iterator
{
- typedef T value_type;
+ typedef T input_type;
+ typedef char32_t value_type;
typedef char32_t& reference;
+ typedef char32_t* pointer;
+ typedef size_t difference_type;
+ typedef std::input_iterator_tag iterator_category;
typedef std::basic_string<T> string_type;
utf_iterator(const typename string_type::const_iterator& cbegin, const typename string_type::const_iterator& cend):
@@ -217,9 +222,20 @@ namespace unicode::detail {
typedef T value_type;
typedef std::basic_string<T> string_type;
typedef utf_back_insert_iterator& reference;
+ typedef utf_back_insert_iterator* pointer;
+ typedef size_t difference_type;
+ typedef std::output_iterator_tag iterator_category;
utf_back_insert_iterator(string_type& s): s(s) {}
+ utf_back_insert_iterator<T>& operator=(const utf_back_insert_iterator<T>& other)
+ {
+ if (std::addressof(other.s) != std::addressof(s))
+ throw std::runtime_error("utf_back_insert_iterator assignment operator actually called! Iterator should not be assigned to.");
+
+ return *this;
+ }
+
// no-op
reference operator++()
{
@@ -273,7 +289,7 @@ namespace unicode::detail {
reference operator=<utf8_t>(const char32_t& value)
{
if (value < 0x80) { // 1 byte
- s.push_back(value);
+ s.push_back(static_cast<value_type>(value));
} else if (value < 0x800) { // 2 bytes
s.push_back(byte_n_of_m<0,2>(value));
s.push_back(byte_n_of_m<1,2>(value));
@@ -297,7 +313,7 @@ namespace unicode::detail {
reference operator=<char16_t>(const char32_t& value)
{
if (value <= 0xFFFF) { // expect value to be already valid Unicode values
- s.push_back(value);
+ s.push_back(static_cast<value_type>(value));
} else {
char32_t value_reduced{value - 0x10000};
s.push_back((value_reduced >> 10) + 0xD800);
@@ -349,8 +365,12 @@ namespace unicode {
template<unicode::detail::iso_map_type& Map=iso_8859_1_map>
struct iso_iterator {
+ typedef utf8_t input_type;
typedef char32_t value_type;
typedef char32_t& reference;
+ typedef char32_t* pointer;
+ typedef size_t difference_type;
+ typedef std::input_iterator_tag iterator_category;
typedef std::basic_string<utf8_t>::const_iterator iterator;
iso_iterator(const iterator& it): m_it(it) {}
@@ -388,10 +408,22 @@ namespace unicode {
template<unicode::detail::iso_map_type_reverse& Map=iso_8859_1_map_reverse>
struct iso_back_insert_iterator {
typedef iso_back_insert_iterator& reference;
+ typedef iso_back_insert_iterator* pointer;
+ typedef size_t difference_type;
+ typedef utf8_t value_type;
+ typedef std::output_iterator_tag iterator_category;
typedef std::basic_string<utf8_t> string_type;
iso_back_insert_iterator(string_type& s): s(s) {}
+ iso_back_insert_iterator& operator=(const iso_back_insert_iterator& other)
+ {
+ if (std::addressof(other.s) != std::addressof(s))
+ throw std::runtime_error("iso_back_insert_iterator assignment operator actually called! Iterator should not be assigned to.");
+
+ return *this;
+ }
+
// no-op
reference operator++()
{
@@ -452,14 +484,15 @@ namespace unicode {
template<typename InputIt, typename OutputIt>
struct UTF
{
- typedef typename InputIt::value_type value_type; // OutputIt::value_type is the same
+ typedef typename InputIt::input_type input_type;
+ typedef typename OutputIt::value_type value_type;
- static InputIt begin(const std::basic_string<value_type>& s)
+ static InputIt begin(const std::basic_string<input_type>& s)
{
return InputIt{s.cbegin(), s.cend()};
}
- static InputIt end(const std::basic_string<value_type>& s)
+ static InputIt end(const std::basic_string<input_type>& s)
{
return InputIt{s.cend(), s.cend()};
}
@@ -528,8 +561,8 @@ namespace unicode {
bool is_valid_utf(const std::basic_string<T>& s)
{
try {
- std::for_each(Encoding<T>::Facet::begin(s), Encoding<T>::Facet::end(s), [](const T& c){});
- } catch(...) {
+ std::for_each(Encoding<T>::Facet::begin(s), Encoding<T>::Facet::end(s), [](const char32_t& c){});
+ } catch (const std::invalid_argument&) {
return false;
}
return true;