From aa2edab739e3daac42cd5dbb44d10234ad880626 Mon Sep 17 00:00:00 2001 From: Roland Reichwein Date: Sun, 2 Jan 2022 19:02:38 +0100 Subject: Separated unit tests and performance tests --- src/test-unicode.cpp | 313 +-------------------------------------------------- 1 file changed, 2 insertions(+), 311 deletions(-) (limited to 'src/test-unicode.cpp') diff --git a/src/test-unicode.cpp b/src/test-unicode.cpp index 6eb523e..1ea704b 100644 --- a/src/test-unicode.cpp +++ b/src/test-unicode.cpp @@ -27,6 +27,8 @@ #include +#include "test-helper.h" + using namespace std::chrono_literals; using namespace std::string_literals; @@ -95,123 +97,6 @@ std::vector> failure_strings_char32_t { U"\x10000000", // invalid unicode (number too big) }; -// output operators must be in same namespace as the type itself -namespace std { - -#ifdef __cpp_char8_t - std::ostream& operator<<(std::ostream& os, std::basic_string const& s) - { - os << "["; - for (auto& c: s) - os << " " << std::to_string(static_cast(c)); - os << "]"; - - return os; - } -#endif - - std::ostream& operator<<(std::ostream& os, std::basic_string const& s) - { - os << "["; - for (auto& c: s) - os << " " << std::to_string(static_cast(c)); - os << "]"; - - return os; - } - - std::ostream& operator<<(std::ostream& os, std::basic_string const& s) - { - os << "["; - for (auto& c: s) - os << " " << std::to_string(static_cast(c)); - os << "]"; - - return os; - } - -} // namespace std - -namespace { - - // utility wrapper to adapt locale-bound facets for wstring/wbuffer convert - template - struct deletable_facet : Facet - { - template - deletable_facet(Args&& ...args) : Facet(std::forward(args)...) {} - ~deletable_facet() {} - }; - - // char8_t instead of char doesn't work w/ clang++-13 + C++20 (yet?) - std::wstring_convert>, char16_t> conv16; - std::wstring_convert>, char32_t> conv32; - - template - std::basic_string std_convert(const std::basic_string& s); - - template<> - std::basic_string std_convert(const std::basic_string& s) - { - std::string a{s.begin(), s.end()}; - a = conv32.to_bytes(conv32.from_bytes(a)); - return std::basic_string{a.begin(), a.end()}; - } - - template<> - std::basic_string std_convert(const std::basic_string& s) - { - std::string a{s.begin(), s.end()}; - return conv16.from_bytes(a); - } - - template<> - std::basic_string std_convert(const std::basic_string& s) - { - std::string a{s.begin(), s.end()}; - return conv32.from_bytes(a); - } - - template<> - std::basic_string std_convert(const std::basic_string& s) - { - auto result{conv16.to_bytes(s)}; - return std::basic_string(result.begin(), result.end()); - } - - template<> - std::basic_string std_convert(const std::basic_string& s) - { - return conv16.from_bytes(conv16.to_bytes(s)); - } - - template<> - std::basic_string std_convert(const std::basic_string& s) - { - return conv32.from_bytes(conv16.to_bytes(s)); - } - - template<> - std::basic_string std_convert(const std::basic_string& s) - { - auto result{conv32.to_bytes(s)}; - return std::basic_string(result.begin(), result.end()); - } - - template<> - std::basic_string std_convert(const std::basic_string& s) - { - return conv16.from_bytes(conv32.to_bytes(s)); - } - - template<> - std::basic_string std_convert(const std::basic_string& s) - { - return conv32.from_bytes(conv32.to_bytes(s)); - } - -} // namespace - // check assumptions about environment BOOST_AUTO_TEST_CASE(string_u8string) { @@ -457,200 +342,6 @@ BOOST_AUTO_TEST_CASE(is_valid_unicode) BOOST_CHECK(!unicode::is_valid_unicode(0xDFFF)); } -struct random_context { - random_context(int max_value = 0x10FFFF - 0x800): code_point_distribution(0, max_value) {} - std::random_device rd; // OS random number engine to seed RNG (below) - std::mt19937 gen{rd()}; - std::uniform_int_distribution sequence_length{0, 100000}; // length of sequence: 0 ... 100000 code units - std::uniform_int_distribution code_point_distribution; -}; - -// generates valid and invalid strings of different type -template -T generate_random_invalid(random_context& rc, size_t length) -{ - // Using unsigned long for std::uniform_int_distribution<> because it needs to be basic type according to MSVC - std::uniform_int_distribution code_unit{0, std::numeric_limits::max()}; // code unit value - T result; - std::generate_n(std::back_inserter(result), length, [&](){return static_cast(code_unit(rc.gen));}); - - return result; -} - -char32_t generate_random_char(random_context& rc) -{ - auto result {rc.code_point_distribution(rc.gen)}; - if (result >= 0xD800) - result += 0x800; - return static_cast(result); -} - -std::u32string generate_random_string(random_context& rc, size_t length) -{ - std::u32string result; - std::generate_n(std::back_inserter(result), length, [&](){return generate_random_char(rc);}); - - return result; -} - -template -void test_random_invalid(random_context& rc, size_t length) -{ - //std::cerr << "LENGTH: " << length << std::endl; - typedef typename std::tuple_element::type To; - - From r {static_cast(generate_random_invalid(rc, length))}; - - // base type interface - try { - To result{unicode::convert(r)}; - - if (r.empty()) { - BOOST_CHECK(result.empty()); - } else { - BOOST_CHECK(!result.empty()); - } - } catch (const std::invalid_argument&) { - // OK: this is an expected exception for convert() on bad input - } catch (const std::exception& ex) { - BOOST_ERROR("Unexpected error on convert(): " << ex.what()); - } - - // container type interface - try { - To result{unicode::convert(r)}; - - if (r.empty()) { - BOOST_CHECK(result.empty()); - } else { - BOOST_CHECK(!result.empty()); - } - } catch (const std::invalid_argument&) { - // OK: this is an expected exception for convert() on bad input - } catch (const std::exception& ex) { - BOOST_ERROR("Unexpected error on convert(): " << ex.what()); - } - - // encoding interface - try { - To result{unicode::convert,typename unicode::Encoding_t>(r)}; - - if (r.empty()) { - BOOST_CHECK(result.empty()); - } else { - BOOST_CHECK(!result.empty()); - } - } catch (const std::invalid_argument&) { - // OK: this is an expected exception for convert() on bad input - } catch (const std::exception& ex) { - BOOST_ERROR("Unexpected error on convert(): " << ex.what()); - } - - // iterate over remaining To types - if constexpr (i + 1 < std::tuple_size::value) - test_random_invalid(rc, length); -} - -BOOST_AUTO_TEST_CASE_TEMPLATE(random_sequences_invalid, T, types_collection_type) -{ - random_context rc; - - for (int i = 0; i < 10; i++) { - test_random_invalid(rc, rc.sequence_length(rc.gen)); - } -} - -class CPUTimer -{ -public: - CPUTimer(const std::string& name = "Timer"): mName(name), mWallTime0(std::chrono::steady_clock::now()) - { - } - - ~CPUTimer() - { -#if BOOST_VERSION > 106700 - auto elapsed_cpu{mCPUTimer.elapsed()}; -#endif - std::cout << mName << ": " << std::chrono::duration(std::chrono::steady_clock::now() - mWallTime0).count() << - "s" << -#if BOOST_VERSION > 106700 - " (" << (double(elapsed_cpu.user + elapsed_cpu.system) / 1000000000) << "s CPU)" << -#endif - std::endl; - } - -private: - std::string mName; - std::chrono::time_point mWallTime0; -#if BOOST_VERSION > 106700 - boost::timer::cpu_timer mCPUTimer; -#endif -}; - -template -void test_random_valid(random_context& rc, size_t length, const std::string& description) -{ - typedef typename std::tuple_element::type To; - - // Fill UTF-32 data list: source for tests - std::vector u32list; - std::generate_n(std::back_inserter(u32list), 1000, [&](){return generate_random_string(rc, rc.sequence_length(rc.gen));}); - - // Fill From data list - std::vector list; - std::transform(u32list.begin(), u32list.end(), std::back_inserter(list), [](const std::u32string& s){ - return unicode::convert>(s); - }); - - for (size_t i = 0; i < list.size(); i++) { - BOOST_CHECK(list[i].size() >= u32list[i].size()); - To result{unicode::convert,typename unicode::Encoding_t>(list[i])}; - BOOST_CHECK(result.size() >= u32list[i].size()); - auto boost_result{boost::locale::conv::utf_to_utf(list[i])}; - BOOST_CHECK_EQUAL(result, boost_result); - } - - { - CPUTimer timer("Performance test for converting "s + std::to_string(list.size()) + - " "s + description + - " from UTF-"s + std::to_string(sizeof(typename From::value_type) * 8) + - " to UTF-"s + std::to_string(sizeof(typename To::value_type) * 8)); - for (const auto& i: list) - To result{unicode::convert,typename unicode::Encoding_t>(i)}; - } - - { - CPUTimer timer(" -> Compare to boost::locale::conv::utf_to_utf"); - for (const auto& i: list) - To result{boost::locale::conv::utf_to_utf(i)}; - } - - { - CPUTimer timer(" -> Compare to std::wstring_convert"); - for (const auto& i: list) - To result{std_convert(i)}; - } - - // iterate over remaining To types - if constexpr (index + 1 < std::tuple_size::value) - test_random_valid(rc, length, description); -} - -BOOST_AUTO_TEST_CASE_TEMPLATE(random_sequences_valid_ascii, T, types_collection_type) -{ - random_context rc{127}; - - test_random_valid(rc, rc.sequence_length(rc.gen), "ASCII only strings"); -} - -BOOST_AUTO_TEST_CASE_TEMPLATE(random_sequences_valid_all_unicode, T, types_collection_type) -{ - random_context rc; - - test_random_valid(rc, rc.sequence_length(rc.gen), "All Unicode strings"); -} - // Test ISO encodings BOOST_AUTO_TEST_CASE(convert_iso) { -- cgit v1.2.3