// libunicode // Copyright (C) 2021 Roland Reichwein #pragma once #include #include namespace { struct utf8_iterator { typedef char32_t value_type; typedef char32_t& reference; void get_value() { // TODO: set value to current data in *iterator ... value = 'X'; } size_t get_number_of_utf8_bytes() { // TODO: how many bytes return 1; } // pre-increment utf8_iterator& operator++() { iterator += get_number_of_utf8_bytes(); return *this; } bool operator!=(const utf8_iterator& other) const { return iterator != other.iterator; } reference operator*() { get_value(); return value; } std::u8string::const_iterator iterator; std::u8string::const_iterator end_iterator; value_type value{}; }; struct utf16_back_insert_iterator { typedef utf16_back_insert_iterator& reference; utf16_back_insert_iterator(std::u16string& s): s(s) {} // no-op utf16_back_insert_iterator& operator++() { return *this; } // support *x = value, together with operator=() reference operator*() { return *this; } // append utf-16 word sequence reference operator=(const char32_t& value) { s.push_back(0); // TODO return *this; } std::u16string& s; }; utf16_back_insert_iterator utf16_back_inserter(std::u16string& s) { return utf16_back_insert_iterator(s); } utf8_iterator utf8_begin(const std::u8string& s) { return utf8_iterator{s.cbegin(), s.cend()}; } utf8_iterator utf8_end(const std::u8string& s) { return utf8_iterator{s.cend(), s.cend()}; } } // namespace namespace unicode { // returns number of bytes in UTF-8 byte sequence of first found code point, // if found. 0 if none found or sequence empty. //size_t utf8_start() //{ //} std::u16string utf8_to_utf16(const std::u8string& s) { std::u16string result; std::copy(utf8_begin(s), utf8_end(s), utf16_back_inserter(result)); return result; } //std::u8string utf16_to_utf8(const std::u16string& s) //{ // std::u8string result; // // std::transform(utf16_begin(s), utf16_end(s), std::back_inserter(result)); // // return result; //} } // namespace unicode