diff options
Diffstat (limited to 'src/test-unicode.cpp')
-rw-r--r-- | src/test-unicode.cpp | 313 |
1 files changed, 2 insertions, 311 deletions
diff --git a/src/test-unicode.cpp b/src/test-unicode.cpp index 6eb523e..1ea704b 100644 --- a/src/test-unicode.cpp +++ b/src/test-unicode.cpp @@ -27,6 +27,8 @@ #include <unicode.h> +#include "test-helper.h" + using namespace std::chrono_literals; using namespace std::string_literals; @@ -95,123 +97,6 @@ std::vector<std::basic_string<char32_t>> failure_strings_char32_t { U"\x10000000", // invalid unicode (number too big) }; -// output operators must be in same namespace as the type itself -namespace std { - -#ifdef __cpp_char8_t - std::ostream& operator<<(std::ostream& os, std::basic_string<utf8_t> const& s) - { - os << "["; - for (auto& c: s) - os << " " << std::to_string(static_cast<uint8_t>(c)); - os << "]"; - - return os; - } -#endif - - std::ostream& operator<<(std::ostream& os, std::basic_string<char16_t> const& s) - { - os << "["; - for (auto& c: s) - os << " " << std::to_string(static_cast<uint16_t>(c)); - os << "]"; - - return os; - } - - std::ostream& operator<<(std::ostream& os, std::basic_string<char32_t> const& s) - { - os << "["; - for (auto& c: s) - os << " " << std::to_string(static_cast<uint32_t>(c)); - os << "]"; - - return os; - } - -} // namespace std - -namespace { - - // utility wrapper to adapt locale-bound facets for wstring/wbuffer convert - template<class Facet> - struct deletable_facet : Facet - { - template<class ...Args> - deletable_facet(Args&& ...args) : Facet(std::forward<Args>(args)...) {} - ~deletable_facet() {} - }; - - // char8_t instead of char doesn't work w/ clang++-13 + C++20 (yet?) - std::wstring_convert<deletable_facet<std::codecvt<char16_t, char, std::mbstate_t>>, char16_t> conv16; - std::wstring_convert<deletable_facet<std::codecvt<char32_t, char, std::mbstate_t>>, char32_t> conv32; - - template<typename From, typename To> - std::basic_string<To> std_convert(const std::basic_string<From>& s); - - template<> - std::basic_string<utf8_t> std_convert<utf8_t, utf8_t>(const std::basic_string<utf8_t>& s) - { - std::string a{s.begin(), s.end()}; - a = conv32.to_bytes(conv32.from_bytes(a)); - return std::basic_string<utf8_t>{a.begin(), a.end()}; - } - - template<> - std::basic_string<char16_t> std_convert<utf8_t, char16_t>(const std::basic_string<utf8_t>& s) - { - std::string a{s.begin(), s.end()}; - return conv16.from_bytes(a); - } - - template<> - std::basic_string<char32_t> std_convert<utf8_t, char32_t>(const std::basic_string<utf8_t>& s) - { - std::string a{s.begin(), s.end()}; - return conv32.from_bytes(a); - } - - template<> - std::basic_string<utf8_t> std_convert<char16_t, utf8_t>(const std::basic_string<char16_t>& s) - { - auto result{conv16.to_bytes(s)}; - return std::basic_string<utf8_t>(result.begin(), result.end()); - } - - template<> - std::basic_string<char16_t> std_convert<char16_t, char16_t>(const std::basic_string<char16_t>& s) - { - return conv16.from_bytes(conv16.to_bytes(s)); - } - - template<> - std::basic_string<char32_t> std_convert<char16_t, char32_t>(const std::basic_string<char16_t>& s) - { - return conv32.from_bytes(conv16.to_bytes(s)); - } - - template<> - std::basic_string<utf8_t> std_convert<char32_t, utf8_t>(const std::basic_string<char32_t>& s) - { - auto result{conv32.to_bytes(s)}; - return std::basic_string<utf8_t>(result.begin(), result.end()); - } - - template<> - std::basic_string<char16_t> std_convert<char32_t, char16_t>(const std::basic_string<char32_t>& s) - { - return conv16.from_bytes(conv32.to_bytes(s)); - } - - template<> - std::basic_string<char32_t> std_convert<char32_t, char32_t>(const std::basic_string<char32_t>& s) - { - return conv32.from_bytes(conv32.to_bytes(s)); - } - -} // namespace - // check assumptions about environment BOOST_AUTO_TEST_CASE(string_u8string) { @@ -457,200 +342,6 @@ BOOST_AUTO_TEST_CASE(is_valid_unicode) BOOST_CHECK(!unicode::is_valid_unicode(0xDFFF)); } -struct random_context { - random_context(int max_value = 0x10FFFF - 0x800): code_point_distribution(0, max_value) {} - std::random_device rd; // OS random number engine to seed RNG (below) - std::mt19937 gen{rd()}; - std::uniform_int_distribution<size_t> sequence_length{0, 100000}; // length of sequence: 0 ... 100000 code units - std::uniform_int_distribution<unsigned long> code_point_distribution; -}; - -// generates valid and invalid strings of different type -template<typename T> -T generate_random_invalid(random_context& rc, size_t length) -{ - // Using unsigned long for std::uniform_int_distribution<> because it needs to be basic type according to MSVC - std::uniform_int_distribution<unsigned long> code_unit{0, std::numeric_limits<typename T::value_type>::max()}; // code unit value - T result; - std::generate_n(std::back_inserter(result), length, [&](){return static_cast<typename T::value_type>(code_unit(rc.gen));}); - - return result; -} - -char32_t generate_random_char(random_context& rc) -{ - auto result {rc.code_point_distribution(rc.gen)}; - if (result >= 0xD800) - result += 0x800; - return static_cast<char32_t>(result); -} - -std::u32string generate_random_string(random_context& rc, size_t length) -{ - std::u32string result; - std::generate_n(std::back_inserter(result), length, [&](){return generate_random_char(rc);}); - - return result; -} - -template<typename From, typename ToTypesCollectionType, size_t i = 0> -void test_random_invalid(random_context& rc, size_t length) -{ - //std::cerr << "LENGTH: " << length << std::endl; - typedef typename std::tuple_element<i,ToTypesCollectionType>::type To; - - From r {static_cast<From>(generate_random_invalid<From>(rc, length))}; - - // base type interface - try { - To result{unicode::convert<typename From::value_type,typename To::value_type>(r)}; - - if (r.empty()) { - BOOST_CHECK(result.empty()); - } else { - BOOST_CHECK(!result.empty()); - } - } catch (const std::invalid_argument&) { - // OK: this is an expected exception for convert() on bad input - } catch (const std::exception& ex) { - BOOST_ERROR("Unexpected error on convert(): " << ex.what()); - } - - // container type interface - try { - To result{unicode::convert<From, To>(r)}; - - if (r.empty()) { - BOOST_CHECK(result.empty()); - } else { - BOOST_CHECK(!result.empty()); - } - } catch (const std::invalid_argument&) { - // OK: this is an expected exception for convert() on bad input - } catch (const std::exception& ex) { - BOOST_ERROR("Unexpected error on convert(): " << ex.what()); - } - - // encoding interface - try { - To result{unicode::convert<typename unicode::Encoding_t<typename From::value_type>,typename unicode::Encoding_t<typename To::value_type>>(r)}; - - if (r.empty()) { - BOOST_CHECK(result.empty()); - } else { - BOOST_CHECK(!result.empty()); - } - } catch (const std::invalid_argument&) { - // OK: this is an expected exception for convert() on bad input - } catch (const std::exception& ex) { - BOOST_ERROR("Unexpected error on convert(): " << ex.what()); - } - - // iterate over remaining To types - if constexpr (i + 1 < std::tuple_size<ToTypesCollectionType>::value) - test_random_invalid<From, ToTypesCollectionType, i + 1>(rc, length); -} - -BOOST_AUTO_TEST_CASE_TEMPLATE(random_sequences_invalid, T, types_collection_type) -{ - random_context rc; - - for (int i = 0; i < 10; i++) { - test_random_invalid<T,types_collection_type>(rc, rc.sequence_length(rc.gen)); - } -} - -class CPUTimer -{ -public: - CPUTimer(const std::string& name = "Timer"): mName(name), mWallTime0(std::chrono::steady_clock::now()) - { - } - - ~CPUTimer() - { -#if BOOST_VERSION > 106700 - auto elapsed_cpu{mCPUTimer.elapsed()}; -#endif - std::cout << mName << ": " << std::chrono::duration<double>(std::chrono::steady_clock::now() - mWallTime0).count() << - "s" << -#if BOOST_VERSION > 106700 - " (" << (double(elapsed_cpu.user + elapsed_cpu.system) / 1000000000) << "s CPU)" << -#endif - std::endl; - } - -private: - std::string mName; - std::chrono::time_point<std::chrono::steady_clock> mWallTime0; -#if BOOST_VERSION > 106700 - boost::timer::cpu_timer mCPUTimer; -#endif -}; - -template<typename From, typename ToTypesCollectionType, size_t index = 0> -void test_random_valid(random_context& rc, size_t length, const std::string& description) -{ - typedef typename std::tuple_element<index,ToTypesCollectionType>::type To; - - // Fill UTF-32 data list: source for tests - std::vector<std::u32string> u32list; - std::generate_n(std::back_inserter(u32list), 1000, [&](){return generate_random_string(rc, rc.sequence_length(rc.gen));}); - - // Fill From data list - std::vector<From> list; - std::transform(u32list.begin(), u32list.end(), std::back_inserter(list), [](const std::u32string& s){ - return unicode::convert<unicode::UTF_32, typename unicode::Encoding_t<typename From::value_type>>(s); - }); - - for (size_t i = 0; i < list.size(); i++) { - BOOST_CHECK(list[i].size() >= u32list[i].size()); - To result{unicode::convert<typename unicode::Encoding_t<typename From::value_type>,typename unicode::Encoding_t<typename To::value_type>>(list[i])}; - BOOST_CHECK(result.size() >= u32list[i].size()); - auto boost_result{boost::locale::conv::utf_to_utf<typename To::value_type, typename From::value_type>(list[i])}; - BOOST_CHECK_EQUAL(result, boost_result); - } - - { - CPUTimer timer("Performance test for converting "s + std::to_string(list.size()) + - " "s + description + - " from UTF-"s + std::to_string(sizeof(typename From::value_type) * 8) + - " to UTF-"s + std::to_string(sizeof(typename To::value_type) * 8)); - for (const auto& i: list) - To result{unicode::convert<typename unicode::Encoding_t<typename From::value_type>,typename unicode::Encoding_t<typename To::value_type>>(i)}; - } - - { - CPUTimer timer(" -> Compare to boost::locale::conv::utf_to_utf"); - for (const auto& i: list) - To result{boost::locale::conv::utf_to_utf<typename To::value_type, typename From::value_type>(i)}; - } - - { - CPUTimer timer(" -> Compare to std::wstring_convert"); - for (const auto& i: list) - To result{std_convert<typename From::value_type, typename To::value_type>(i)}; - } - - // iterate over remaining To types - if constexpr (index + 1 < std::tuple_size<ToTypesCollectionType>::value) - test_random_valid<From, ToTypesCollectionType, index + 1>(rc, length, description); -} - -BOOST_AUTO_TEST_CASE_TEMPLATE(random_sequences_valid_ascii, T, types_collection_type) -{ - random_context rc{127}; - - test_random_valid<T,types_collection_type>(rc, rc.sequence_length(rc.gen), "ASCII only strings"); -} - -BOOST_AUTO_TEST_CASE_TEMPLATE(random_sequences_valid_all_unicode, T, types_collection_type) -{ - random_context rc; - - test_random_valid<T,types_collection_type>(rc, rc.sequence_length(rc.gen), "All Unicode strings"); -} - // Test ISO encodings BOOST_AUTO_TEST_CASE(convert_iso) { |