diff options
| -rw-r--r-- | Makefile | 11 | ||||
| -rw-r--r-- | include/unicode.h | 34 | ||||
| -rw-r--r-- | src/test-unicode.cpp | 6 | 
3 files changed, 32 insertions, 19 deletions
| @@ -1,12 +1,19 @@  CXX=clang++-11  #CXX=g++-10 +STANDARD=c++17 +#STANDARD=c++20 +  CXXFLAGS=-O0 -g -D_DEBUG  #CXXFLAGS=-O2 -DNDEBUG -CXXFLAGS+=-Wall -Iinclude -std=c++20 +CXXFLAGS+=-Wall -Iinclude -std=$(STANDARD)  ifeq ($(CXX),clang++-11) +COMPILER_SUITE=clang +endif + +ifeq ($(COMPILER_SUITE),clang)  CXXFLAGS+=-stdlib=libc++  endif @@ -16,7 +23,7 @@ LDLIBS+=\  -lboost_timer \  -lboost_system \ -ifeq ($(CXX),clang++-11) +ifeq ($(COMPILER_SUITE),clang)  LIBS+= \  -fuse-ld=lld-11 \  -lc++ \ diff --git a/include/unicode.h b/include/unicode.h index 908c75f..9e0132b 100644 --- a/include/unicode.h +++ b/include/unicode.h @@ -8,6 +8,9 @@  #ifdef __cpp_char8_t  // char8_t available + typedef char8_t utf8_t; +#else + typedef char utf8_t;  #endif  namespace unicode { @@ -21,7 +24,7 @@ namespace unicode {  } -namespace { +namespace unicode::detail {   using namespace std::string_literals; @@ -107,29 +110,30 @@ namespace {     return static_cast<char32_t>(b & (0b1111111 >> n)) << ((n - 1) * 6);    } +  // GCC Bug: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=85282    // specialization for UTF-8    template<> -  void calculate_value<char8_t>() +  void calculate_value<utf8_t>()    {     size_t remaining{remaining_code_units()};     if (!remaining)      return; -   char8_t byte0 {get_code_unit<0>()}; +   utf8_t byte0 {get_code_unit<0>()};     if (byte0 & 0x80) { // 2-4 bytes      if (remaining >= 2) { -     char8_t byte1 {get_code_unit<1>()}; +     utf8_t byte1 {get_code_unit<1>()};       if (is_byte0_of<2>(byte0) && is_continuation_byte(byte1)) { // 2 bytes        value = value_byte0_of<2>(byte0) | continuation_value(byte1);        sequence_length = 2;       } else if (remaining >= 3) { -      char8_t byte2 {get_code_unit<2>()}; +      utf8_t byte2 {get_code_unit<2>()};        if (is_byte0_of<3>(byte0) && is_continuation_byte(byte1, byte2)) { // 3 bytes         value = value_byte0_of<3>(byte0) | continuation_value(byte1, byte2);         sequence_length = 3;        } else if (remaining >= 4) { -       char8_t byte3 {get_code_unit<3>()}; +       utf8_t byte3 {get_code_unit<3>()};         if (is_byte0_of<4>(byte0) && is_continuation_byte(byte1, byte2, byte3)) { // 4 bytes          value = value_byte0_of<4>(byte0) | continuation_value(byte1, byte2, byte3);          sequence_length = 4; @@ -262,7 +266,7 @@ namespace {    // specialization for UTF-8    // append utf-8 byte sequence    template<> -  reference operator=<char8_t>(const char32_t& value) +  reference operator=<utf8_t>(const char32_t& value)    {     if (value < 0x80) { // 1 byte      s.push_back(value); @@ -323,15 +327,17 @@ namespace {  namespace unicode { -template<typename From, typename To> -std::basic_string<To> utf_to_utf(const std::basic_string<From>& s) -{ - std::basic_string<To> result; + using namespace detail; - std::copy(utf_begin<From>(s), utf_end<From>(s), utf_back_inserter<To>(result)); + template<typename From, typename To> + std::basic_string<To> utf_to_utf(const std::basic_string<From>& s) + { +  std::basic_string<To> result; - return result; -} +  std::copy(utf_begin<From>(s), utf_end<From>(s), utf_back_inserter<To>(result)); + +  return result; + }  } // namespace unicode diff --git a/src/test-unicode.cpp b/src/test-unicode.cpp index 2dfabef..05370c7 100644 --- a/src/test-unicode.cpp +++ b/src/test-unicode.cpp @@ -18,7 +18,7 @@  using namespace std::chrono_literals; -typedef std::tuple<std::basic_string<char8_t>, std::basic_string<char16_t>, std::basic_string<char32_t>> types_collection_type; +typedef std::tuple<std::basic_string<utf8_t>, std::basic_string<char16_t>, std::basic_string<char32_t>> types_collection_type;  // create tuple of the same string, in UTF-8, UTF-16 and UTF-32  #define SUCCESS_TUPLE(x) {u8 ## x, u ## x, U ## x} @@ -34,7 +34,7 @@ std::vector<types_collection_type> success_sets {  };  // Error cases: throwing upon convert to all other types -std::vector<std::basic_string<char8_t>> failure_strings_char8_t { +std::vector<std::basic_string<utf8_t>> failure_strings_char8_t {   u8"\x80", // utf-8 continuation byte   u8"\x81", // utf-8 continuation byte   u8"\xc3ä", // initial byte of utf-8 "ä", followed by valid utf-8 "ä" @@ -56,7 +56,7 @@ std::vector<std::basic_string<char32_t>> failure_strings_char32_t {  // output operators must be in same namespace as the type itself  namespace std { -std::ostream& operator<<(std::ostream& os, std::basic_string<char8_t> const& s) +std::ostream& operator<<(std::ostream& os, std::basic_string<utf8_t> const& s)  {   os << "[";   for (auto& c: s) | 
