summaryrefslogtreecommitdiffhomepage
path: root/include/unicode.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/unicode.h')
-rw-r--r--include/unicode.h34
1 files changed, 20 insertions, 14 deletions
diff --git a/include/unicode.h b/include/unicode.h
index 908c75f..9e0132b 100644
--- a/include/unicode.h
+++ b/include/unicode.h
@@ -8,6 +8,9 @@
#ifdef __cpp_char8_t
// char8_t available
+ typedef char8_t utf8_t;
+#else
+ typedef char utf8_t;
#endif
namespace unicode {
@@ -21,7 +24,7 @@ namespace unicode {
}
-namespace {
+namespace unicode::detail {
using namespace std::string_literals;
@@ -107,29 +110,30 @@ namespace {
return static_cast<char32_t>(b & (0b1111111 >> n)) << ((n - 1) * 6);
}
+ // GCC Bug: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=85282
// specialization for UTF-8
template<>
- void calculate_value<char8_t>()
+ void calculate_value<utf8_t>()
{
size_t remaining{remaining_code_units()};
if (!remaining)
return;
- char8_t byte0 {get_code_unit<0>()};
+ utf8_t byte0 {get_code_unit<0>()};
if (byte0 & 0x80) { // 2-4 bytes
if (remaining >= 2) {
- char8_t byte1 {get_code_unit<1>()};
+ utf8_t byte1 {get_code_unit<1>()};
if (is_byte0_of<2>(byte0) && is_continuation_byte(byte1)) { // 2 bytes
value = value_byte0_of<2>(byte0) | continuation_value(byte1);
sequence_length = 2;
} else if (remaining >= 3) {
- char8_t byte2 {get_code_unit<2>()};
+ utf8_t byte2 {get_code_unit<2>()};
if (is_byte0_of<3>(byte0) && is_continuation_byte(byte1, byte2)) { // 3 bytes
value = value_byte0_of<3>(byte0) | continuation_value(byte1, byte2);
sequence_length = 3;
} else if (remaining >= 4) {
- char8_t byte3 {get_code_unit<3>()};
+ utf8_t byte3 {get_code_unit<3>()};
if (is_byte0_of<4>(byte0) && is_continuation_byte(byte1, byte2, byte3)) { // 4 bytes
value = value_byte0_of<4>(byte0) | continuation_value(byte1, byte2, byte3);
sequence_length = 4;
@@ -262,7 +266,7 @@ namespace {
// specialization for UTF-8
// append utf-8 byte sequence
template<>
- reference operator=<char8_t>(const char32_t& value)
+ reference operator=<utf8_t>(const char32_t& value)
{
if (value < 0x80) { // 1 byte
s.push_back(value);
@@ -323,15 +327,17 @@ namespace {
namespace unicode {
-template<typename From, typename To>
-std::basic_string<To> utf_to_utf(const std::basic_string<From>& s)
-{
- std::basic_string<To> result;
+ using namespace detail;
- std::copy(utf_begin<From>(s), utf_end<From>(s), utf_back_inserter<To>(result));
+ template<typename From, typename To>
+ std::basic_string<To> utf_to_utf(const std::basic_string<From>& s)
+ {
+ std::basic_string<To> result;
- return result;
-}
+ std::copy(utf_begin<From>(s), utf_end<From>(s), utf_back_inserter<To>(result));
+
+ return result;
+ }
} // namespace unicode