From 5f7ae62649c79683597e33af673ae1dcf5267917 Mon Sep 17 00:00:00 2001 From: Roland Reichwein Date: Sun, 24 Jan 2021 18:48:42 +0100 Subject: Initial commit: Non working initial code --- include/unicode.h | 123 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 123 insertions(+) create mode 100644 include/unicode.h (limited to 'include/unicode.h') diff --git a/include/unicode.h b/include/unicode.h new file mode 100644 index 0000000..2969aa0 --- /dev/null +++ b/include/unicode.h @@ -0,0 +1,123 @@ +// libunicode +// Copyright (C) 2021 Roland Reichwein + +#pragma once + +#include +#include + +namespace { + + struct utf8_iterator + { + typedef char32_t value_type; + typedef char32_t& reference; + + void get_value() + { + // TODO: set value to current data in *iterator ... + value = 'X'; + } + + size_t get_number_of_utf8_bytes() + { + // TODO: how many bytes + return 1; + } + + // pre-increment + utf8_iterator& operator++() + { + iterator += get_number_of_utf8_bytes(); + return *this; + } + + bool operator!=(const utf8_iterator& other) const + { + return iterator != other.iterator; + } + + reference operator*() + { + get_value(); + return value; + } + + std::u8string::iterator iterator; + + std::u8string::iterator end_iterator; + value_type value{}; + }; + + struct utf16_back_insert_iterator + { + typedef utf16_back_insert_iterator& reference; + + utf16_back_insert_iterator(std::u16string& s): s(s) {} + + // no-op + utf16_back_insert_iterator& operator++() + { + return *this; + } + + // support *x = value, together with operator=() + reference operator*() + { + return *this; + } + + // append utf-16 word sequence + reference operator=(const char32_t& value) + { + s.push_back(0); // TODO + } + + std::u16string& s; + }; + + utf16_back_insert_iterator utf16_back_inserter(std::u16string& s) + { + return utf16_back_insert_iterator(s); + } + + utf8_iterator utf8_begin(std::u8string& s) + { + return utf8_iterator{s.begin(), s.end()}; + } + + utf8_iterator utf8_end(std::u8string& s) + { + return utf8_iterator{s.end(), s.end()}; + } + +} // namespace + +namespace unicode { + +// returns number of bytes in UTF-8 byte sequence of first found code point, +// if found. 0 if none found or sequence empty. +//size_t utf8_start() +//{ +//} + +std::u16string utf8_to_utf16(const std::u8string& s) +{ + std::u16string result; + + std::copy(utf8_begin(s), utf8_end(s), utf16_back_inserter(result)); + + return result; +} + +//std::u8string utf16_to_utf8(const std::u16string& s) +//{ +// std::u8string result; +// +// std::transform(utf16_begin(s), utf16_end(s), std::back_inserter(result)); +// +// return result; +//} + +} // namespace unicode + -- cgit v1.2.3