From cd4fad54c0be9fb7fca57e8e03228b8b649b5b51 Mon Sep 17 00:00:00 2001 From: Roland Reichwein Date: Wed, 27 Jan 2021 22:21:04 +0100 Subject: Bugfixes, tests --- src/test-unicode.cpp | 129 +++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 115 insertions(+), 14 deletions(-) (limited to 'src') diff --git a/src/test-unicode.cpp b/src/test-unicode.cpp index 0560c1b..2cc8393 100644 --- a/src/test-unicode.cpp +++ b/src/test-unicode.cpp @@ -1,17 +1,83 @@ #define BOOST_TEST_MODULE unicode_test #include +#include +#include +#include +#include #include #include #include +#include #include -std::tuple, std::basic_string, std::basic_string> t { - u8"Täst", u"Täst", U"Täst" +typedef std::tuple, std::basic_string, std::basic_string> types_collection_type; + +// create tuple of the same string, in UTF-8, UTF-16 and UTF-32 +#define SUCCESS_TUPLE(x) {u8 ## x, u ## x, U ## x} + +// Success cases: convert string to all other types, respectively +std::vector success_sets { + SUCCESS_TUPLE(""), + SUCCESS_TUPLE("ASCII string1"), + SUCCESS_TUPLE("Täst just looks like German"), + SUCCESS_TUPLE("\u732b is chinese for cat"), + SUCCESS_TUPLE("\U0001F63A"), + SUCCESS_TUPLE("\U0001F63A is a smiling cat"), +}; + +// Error cases: throwing upon convert to all other types +std::vector> failure_strings_char8_t { + u8"\x80", + u8"\x81" +}; + +std::vector> failure_strings_char16_t { + u"\xD801", +}; + +std::vector> failure_strings_char32_t { + U"\xD801", + U"\x10000000", }; +// output operators must be in same namespace as the type itself +namespace std { + +std::ostream& operator<<(std::ostream& os, std::basic_string const& s) +{ + os << "["; + for (auto& c: s) + os << " " << std::to_string(static_cast(c)); + os << "]"; + + return os; +} + +std::ostream& operator<<(std::ostream& os, std::basic_string const& s) +{ + os << "["; + for (auto& c: s) + os << " " << std::to_string(static_cast(c)); + os << "]"; + + return os; +} + +std::ostream& operator<<(std::ostream& os, std::basic_string const& s) +{ + os << "["; + for (auto& c: s) + os << " " << std::to_string(static_cast(c)); + os << "]"; + + return os; +} + +} + template void test_utf_to_utf(std::tuple& t) { @@ -21,7 +87,7 @@ void test_utf_to_utf(std::tuple& t) // test To result { unicode::utf_to_utf(std::get(t)) }; - BOOST_CHECK(std::get(t) == result); + BOOST_CHECK_MESSAGE(std::get(t) == result, "From " << typeid(From).name() << "(" << i << ", " << std::get(t) << ") to " << typeid(To).name() << "(" << j << ", " << std::get(t) << "), got " << result); //std::cout << std::to_string(std::tuple_size::type>::value) << "," << std::to_string(i) << "," << std::to_string(j) << std::endl; @@ -32,27 +98,62 @@ void test_utf_to_utf(std::tuple& t) test_utf_to_utf<0, j + 1>(t); } -BOOST_AUTO_TEST_CASE(utf_to_utf) +// We don't use BOOST_DATA_TEST_CASE here because boost::test tries to assign +// a new variable to each tuple element which we don't want +// https://lists.boost.org/boost-bugs/2016/05/45214.php + +BOOST_AUTO_TEST_CASE(utf_to_utf_success) { - test_utf_to_utf(t); + for (auto& t: success_sets) + test_utf_to_utf(t); } -BOOST_AUTO_TEST_CASE(utf8_to_utf16) +// iterate over std::tuple T types +template +void test_utf_to_utf_failure(std::basic_string& s) { - std::u8string u8{u8"ascii string1"}; - - std::u16string u16{unicode::utf_to_utf(u8)}; + typedef typename std::tuple_element::type::value_type To; - BOOST_CHECK(u16 == u"ascii string1"); + try { + unicode::utf_to_utf(s); + BOOST_FAIL("Expected exception at index: " << index << ", " << typeid(From).name() << " -> " << typeid(To).name()); + } catch (...) { + // OK + }; + + // iterate over remaining types + if constexpr (index + 1 < std::tuple_size::value) + test_utf_to_utf_failure(s); } -BOOST_AUTO_TEST_CASE(utf16_to_utf8) +BOOST_AUTO_TEST_CASE(utf_to_utf_failure) { - std::u16string u16{u"ascii string1"}; + for (auto& s: failure_strings_char8_t) + test_utf_to_utf_failure::type::value_type, types_collection_type>(s); - std::u8string u8{unicode::utf_to_utf(u16)}; + for (auto& s: failure_strings_char16_t) + test_utf_to_utf_failure::type::value_type, types_collection_type>(s); + + for (auto& s: failure_strings_char32_t) + test_utf_to_utf_failure::type::value_type, types_collection_type>(s); +} + +BOOST_AUTO_TEST_CASE(is_valid_unicode) +{ + BOOST_CHECK(unicode::is_valid_unicode('\0')); + BOOST_CHECK(unicode::is_valid_unicode(U'a')); + BOOST_CHECK(unicode::is_valid_unicode(U'ä')); + BOOST_CHECK(unicode::is_valid_unicode(U'\u732b')); // cat chinese + BOOST_CHECK(unicode::is_valid_unicode(U'\U0001F63A')); // cat chinese + BOOST_CHECK(unicode::is_valid_unicode(0x0001F63A)); // cat smiley - BOOST_CHECK(u8 == u8"ascii string1"); + BOOST_CHECK(!unicode::is_valid_unicode(0x00110000)); + BOOST_CHECK(!unicode::is_valid_unicode(0xFFFFFFFF)); // U"\UFFFFFFFF" is invalid C++ + BOOST_CHECK(!unicode::is_valid_unicode(0x01234567)); + BOOST_CHECK(!unicode::is_valid_unicode(0x12345678)); + BOOST_CHECK(!unicode::is_valid_unicode(0xD800)); + BOOST_CHECK(!unicode::is_valid_unicode(0xD987)); + BOOST_CHECK(!unicode::is_valid_unicode(0xDFFF)); } // TODO: -- cgit v1.2.3