diff options
-rw-r--r-- | Makefile | 2 | ||||
-rw-r--r-- | include/unicode.h | 5 | ||||
-rw-r--r-- | include/unicode/iso.h | 1 | ||||
-rw-r--r-- | include/unicode/utf.h | 8 | ||||
-rw-r--r-- | src/test-unicode.cpp | 9 |
5 files changed, 13 insertions, 12 deletions
@@ -45,7 +45,7 @@ ifeq ($(CXX),clang++-13) ifeq ($(ONDEBIAN),yes) COMPILER_SUITE=clang LIBS+=-fuse-ld=lld-13 -STANDARD=c++20 +#STANDARD=c++20 endif endif diff --git a/include/unicode.h b/include/unicode.h index 34812df..feb012a 100644 --- a/include/unicode.h +++ b/include/unicode.h @@ -235,8 +235,7 @@ namespace unicode { }); accu = 0; bytes_in_accu = 0; - } else - if ((accu & 0x80) == 0) { // 1 byte sequence + } else if ((accu & 0x80) == 0) { // 1 byte sequence append_utf<7>(result, static_cast<char32_t>(accu & 0x7F)); accu >>= 8; bytes_in_accu -= 1; @@ -307,7 +306,7 @@ namespace unicode { while (s_index < s_size || bytes_in_accu > 0) { while (s_index < s_size && bytes_in_accu < 8) { - accu |= static_cast<uint64_t>(*reinterpret_cast<const typename From::value_type*>(&(s.data()[s_index]))) << (bytes_in_accu * 8); + accu |= static_cast<uint64_t>(*reinterpret_cast<const uint8_t*>(&(s.data()[s_index]))) << (bytes_in_accu * 8); ++s_index; bytes_in_accu += sizeof(typename From::value_type); } diff --git a/include/unicode/iso.h b/include/unicode/iso.h index 9b20afd..1f5f007 100644 --- a/include/unicode/iso.h +++ b/include/unicode/iso.h @@ -2,6 +2,7 @@ #include "types.h" +#include <algorithm> #include <stdexcept> #include <string> #include <unordered_map> diff --git a/include/unicode/utf.h b/include/unicode/utf.h index 6d7c2f7..1d2f28e 100644 --- a/include/unicode/utf.h +++ b/include/unicode/utf.h @@ -43,8 +43,8 @@ namespace unicode::detail { template<typename T, typename std::enable_if_t<is_utf_8_v<T>, bool> = true> inline bool validate_utf(const std::basic_string<T>& s) { - int i{}; - auto size{s.size()}; + size_t i{}; + size_t size{s.size()}; while (i < size) { if (is_utf8_sequence(s[i])) { i++; @@ -86,8 +86,8 @@ namespace unicode::detail { template<typename T, typename std::enable_if_t<is_utf_16_v<T>, bool> = true> inline bool validate_utf(const std::basic_string<T>& s) { - int i{}; - auto size{s.size()}; + size_t i{}; + size_t size{s.size()}; while (i < size) { if (is_utf16_sequence(s[i])) { i++; diff --git a/src/test-unicode.cpp b/src/test-unicode.cpp index 7216cff..6eb523e 100644 --- a/src/test-unicode.cpp +++ b/src/test-unicode.cpp @@ -49,6 +49,7 @@ std::vector<types_collection_type> success_sets { // separators SUCCESS_TUPLE("abc\r\ndef"), SUCCESS_TUPLE("äöü\0\u20ac"), + SUCCESS_TUPLE("äöü0\u20ac"), // optimization relevant strings SUCCESS_TUPLE("01234567\u20ac01234567"), @@ -306,7 +307,7 @@ void test_utf_to_utf(std::tuple<Ts...>& t) result = unicode::convert<From, To>(std::get<i>(t)); BOOST_CHECK_MESSAGE(std::get<j>(t) == result, "Container: From " << typeid(From).name() << "(" << i << ", " << std::get<i>(t) << ") to " << typeid(To).name() << "(" << j << ", " << std::get<j>(t) << "), got " << result); - // test facet interface + // test encoding interface result = unicode::convert<typename unicode::Encoding_t<typename From::value_type>, typename unicode::Encoding_t<typename To::value_type>>(std::get<i>(t)); BOOST_CHECK_MESSAGE(std::get<j>(t) == result, "Encoding: From " << typeid(From).name() << "(" << i << ", " << std::get<i>(t) << ") to " << typeid(To).name() << "(" << j << ", " << std::get<j>(t) << "), got " << result); @@ -384,7 +385,7 @@ void test_utf_to_utf_failure(std::basic_string<From>& s) BOOST_ERROR("Unexpected error on convert(): " << ex.what()); }; - // via facet + // via encoding try { (void) unicode::convert<typename unicode::Encoding_t<From>,typename unicode::Encoding_t<To>>(s); BOOST_ERROR("Encoding: Expected exception at index: " << index << ", " << typeid(From).name() << " -> " << typeid(To).name()); @@ -530,7 +531,7 @@ void test_random_invalid(random_context& rc, size_t length) BOOST_ERROR("Unexpected error on convert(): " << ex.what()); } - // facet interface + // encoding interface try { To result{unicode::convert<typename unicode::Encoding_t<typename From::value_type>,typename unicode::Encoding_t<typename To::value_type>>(r)}; @@ -602,7 +603,7 @@ void test_random_valid(random_context& rc, size_t length, const std::string& des return unicode::convert<unicode::UTF_32, typename unicode::Encoding_t<typename From::value_type>>(s); }); - for (int i = 0; i < list.size(); i++) { + for (size_t i = 0; i < list.size(); i++) { BOOST_CHECK(list[i].size() >= u32list[i].size()); To result{unicode::convert<typename unicode::Encoding_t<typename From::value_type>,typename unicode::Encoding_t<typename To::value_type>>(list[i])}; BOOST_CHECK(result.size() >= u32list[i].size()); |