diff options
author | Roland Reichwein <mail@reichwein.it> | 2021-12-28 12:46:30 +0100 |
---|---|---|
committer | Roland Reichwein <mail@reichwein.it> | 2021-12-28 12:46:30 +0100 |
commit | 403c885d67f79c637ebcb303722adfd6a4b8195e (patch) | |
tree | d8f40c674a5c65176e028a1c7bb9122baa2e7756 /src | |
parent | 970ba4111160fbf78351b21a024c46c0978e0440 (diff) |
Optimize UTF validation
Diffstat (limited to 'src')
-rw-r--r-- | src/test-unicode.cpp | 77 |
1 files changed, 58 insertions, 19 deletions
diff --git a/src/test-unicode.cpp b/src/test-unicode.cpp index 2675989..99e164b 100644 --- a/src/test-unicode.cpp +++ b/src/test-unicode.cpp @@ -283,6 +283,9 @@ void test_utf_to_utf(std::tuple<Ts...>& t) // test facet interface result = unicode::convert<typename unicode::Encoding<typename From::value_type>::Facet, typename unicode::Encoding<typename To::value_type>::Facet>(std::get<i>(t)); BOOST_CHECK_MESSAGE(std::get<j>(t) == result, "Facet: From " << typeid(From).name() << "(" << i << ", " << std::get<i>(t) << ") to " << typeid(To).name() << "(" << j << ", " << std::get<j>(t) << "), got " << result); + + // test actual results by comparing with boost::locale::conv results + BOOST_CHECK_EQUAL(result, (boost::locale::conv::utf_to_utf<typename To::value_type, typename From::value_type>(std::get<i>(t)))); // iterate over other combinations if constexpr (i + 1 < std::tuple_size<typename std::remove_reference<decltype(t)>::type>::value) @@ -650,26 +653,10 @@ BOOST_AUTO_TEST_CASE(convert) BOOST_CHECK((unicode::convert<char, char32_t>("äöü")) == std::u32string{U"äöü"}); -#ifdef _WIN32 - BOOST_CHECK(sizeof(wchar_t) == 2); -#else // Unix like - BOOST_CHECK(sizeof(wchar_t) == 4); -#endif - - // For the following checks, wchar_t size and encoding is system dependent: - // Windows: UTF-16 - // Linux: UTF-32 - BOOST_CHECK((unicode::convert<char, wchar_t>("äöü")) == std::wstring{L"äöü"}); - BOOST_CHECK((unicode::convert<char, wchar_t>("\u732b")) == std::wstring{L"\u732b"}); - BOOST_CHECK((unicode::convert<char, wchar_t>("\U0001F63A")) == std::wstring{L"\U0001F63A"}); - BOOST_CHECK((unicode::convert<wchar_t, char32_t>(L"\U0001F63A")) == std::u32string{U"\U0001F63A"}); - BOOST_CHECK((unicode::convert<wchar_t, utf8_t>(L"\U0001F63A")) == std::basic_string<utf8_t>{(utf8_t*)"\U0001F63A"}); + // vector + BOOST_CHECK((unicode::convert<std::vector<char>, std::vector<char16_t>>(std::vector<char>{})) == std::vector<char16_t>{}); + BOOST_CHECK((unicode::convert<std::vector<char>, std::vector<char16_t>>(std::vector<char>{'\xc3', '\xa4', '\xc3', '\xb6', '\xc3', '\xbc'})) == (std::vector<char16_t>{u'ä', u'ö', u'ü'})); - BOOST_CHECK((unicode::convert<std::string, std::wstring>(std::string{"äöü"})) == std::wstring{L"äöü"}); - - BOOST_CHECK((unicode::convert<std::vector<char>, std::vector<wchar_t>>(std::vector<char>{})) == std::vector<wchar_t>{}); - BOOST_CHECK((unicode::convert<std::vector<char>, std::vector<wchar_t>>(std::vector<char>{'\xc3', '\xa4', '\xc3', '\xb6', '\xc3', '\xbc'})) == (std::vector<wchar_t>{L'ä', L'ö', L'ü'})); - // deque BOOST_CHECK((unicode::convert<std::deque<char>, std::deque<wchar_t>>(std::deque<char>{})) == std::deque<wchar_t>{}); BOOST_CHECK((unicode::convert<std::deque<char>, std::deque<wchar_t>>(std::deque<char>{'\xc3', '\xa4', '\xc3', '\xb6', '\xc3', '\xbc'})) == (std::deque<wchar_t>{L'ä', L'ö', L'ü'})); @@ -703,6 +690,58 @@ BOOST_AUTO_TEST_CASE(convert) BOOST_CHECK((unicode::convert<std::array<uint8_t, 6>, std::list<uint16_t>>(std::array<uint8_t, 6>{0xc3, 0xa4, 0xc3, 0xb6, 0xc3, 0xbc})) == (std::list<uint16_t>{L'ä', L'ö', L'ü'})); } +// wchar_t specific tests: system dependent +BOOST_AUTO_TEST_CASE(convert_wstring) +{ +#ifdef _WIN32 + BOOST_CHECK(sizeof(wchar_t) == 2); +#else // Unix like + BOOST_CHECK(sizeof(wchar_t) == 4); +#endif + + // For the following checks, wchar_t size and encoding is system dependent: + // Windows: UTF-16 + // Linux: UTF-32 + BOOST_CHECK((unicode::convert<char, wchar_t>("äöü")) == std::wstring{L"äöü"}); + BOOST_CHECK((unicode::convert<char, wchar_t>("\u732b")) == std::wstring{L"\u732b"}); + BOOST_CHECK((unicode::convert<char, wchar_t>("\U0001F63A")) == std::wstring{L"\U0001F63A"}); + BOOST_CHECK((unicode::convert<wchar_t, char32_t>(L"\U0001F63A")) == std::u32string{U"\U0001F63A"}); + BOOST_CHECK((unicode::convert<wchar_t, utf8_t>(L"\U0001F63A")) == std::basic_string<utf8_t>{(utf8_t*)"\U0001F63A"}); + + BOOST_CHECK((unicode::convert<std::string, std::wstring>(std::string{"äöü"})) == std::wstring{L"äöü"}); + + BOOST_CHECK((unicode::convert<std::vector<char>, std::vector<wchar_t>>(std::vector<char>{})) == std::vector<wchar_t>{}); + BOOST_CHECK((unicode::convert<std::vector<char>, std::vector<wchar_t>>(std::vector<char>{'\xc3', '\xa4', '\xc3', '\xb6', '\xc3', '\xbc'})) == (std::vector<wchar_t>{L'ä', L'ö', L'ü'})); + + std::u16string u16_value{u"\U0001F63A"}; + std::u32string u32_value{U"\U0001F63A"}; + std::wstring w_value{L"\U0001F63A"}; + + std::u16string result_u16_value{unicode::convert<std::wstring, std::u16string>(w_value)}; + std::u32string result_u32_value{unicode::convert<std::wstring, std::u32string>(w_value)}; + std::wstring result_w_value_1{unicode::convert<std::u16string, std::wstring>(u16_value)}; + std::wstring result_w_value_2{unicode::convert<std::u32string, std::wstring>(u32_value)}; + + BOOST_CHECK_EQUAL(u16_value.size(), 2); + BOOST_CHECK_EQUAL(u32_value.size(), 1); + BOOST_CHECK_EQUAL(result_u16_value.size(), 2); + BOOST_CHECK_EQUAL(result_u32_value.size(), 1); + BOOST_CHECK_EQUAL(u16_value, result_u16_value); + BOOST_CHECK_EQUAL(u32_value, result_u32_value); + BOOST_CHECK(w_value == result_w_value_1); + BOOST_CHECK(w_value == result_w_value_2); +#ifdef _WIN32 + BOOST_CHECK_EQUAL(w_value.size(), 2); + BOOST_CHECK_EQUAL(result_w_value_1.size(), 2); + BOOST_CHECK_EQUAL(result_w_value_2.size(), 2); +#else // Unix like + BOOST_CHECK_EQUAL(w_value.size(), 1); + BOOST_CHECK_EQUAL(result_w_value_1.size(), 1); + BOOST_CHECK_EQUAL(result_w_value_2.size(), 1); +#endif + +} + BOOST_AUTO_TEST_CASE(is_valid_utf) { BOOST_CHECK(unicode::is_valid_utf<char16_t>(u"äöü")); |