blob: b90ed15a64d840767d548accee4494973c755987 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
|
// libunicode
// Copyright (C) 2021 Roland Reichwein
#pragma once
#include <algorithm>
#include <string>
namespace {
struct utf8_iterator
{
typedef char32_t value_type;
typedef char32_t& reference;
void get_value()
{
// TODO: set value to current data in *iterator ...
value = 'X';
}
size_t get_number_of_utf8_bytes()
{
// TODO: how many bytes
return 1;
}
// pre-increment
utf8_iterator& operator++()
{
iterator += get_number_of_utf8_bytes();
return *this;
}
bool operator!=(const utf8_iterator& other) const
{
return iterator != other.iterator;
}
reference operator*()
{
get_value();
return value;
}
std::u8string::const_iterator iterator;
std::u8string::const_iterator end_iterator;
value_type value{};
};
struct utf16_back_insert_iterator
{
typedef utf16_back_insert_iterator& reference;
utf16_back_insert_iterator(std::u16string& s): s(s) {}
// no-op
utf16_back_insert_iterator& operator++()
{
return *this;
}
// support *x = value, together with operator=()
reference operator*()
{
return *this;
}
// append utf-16 word sequence
reference operator=(const char32_t& value)
{
s.push_back(0); // TODO
return *this;
}
std::u16string& s;
};
utf16_back_insert_iterator utf16_back_inserter(std::u16string& s)
{
return utf16_back_insert_iterator(s);
}
utf8_iterator utf8_begin(const std::u8string& s)
{
return utf8_iterator{s.cbegin(), s.cend()};
}
utf8_iterator utf8_end(const std::u8string& s)
{
return utf8_iterator{s.cend(), s.cend()};
}
} // namespace
namespace unicode {
// returns number of bytes in UTF-8 byte sequence of first found code point,
// if found. 0 if none found or sequence empty.
//size_t utf8_start()
//{
//}
std::u16string utf8_to_utf16(const std::u8string& s)
{
std::u16string result;
std::copy(utf8_begin(s), utf8_end(s), utf16_back_inserter(result));
return result;
}
//std::u8string utf16_to_utf8(const std::u16string& s)
//{
// std::u8string result;
//
// std::transform(utf16_begin(s), utf16_end(s), std::back_inserter(result));
//
// return result;
//}
} // namespace unicode
|