diff options
| author | Roland Reichwein <mail@reichwein.it> | 2021-12-17 20:04:09 +0100 | 
|---|---|---|
| committer | Roland Reichwein <mail@reichwein.it> | 2021-12-17 20:04:09 +0100 | 
| commit | 7aff3a1a8439e1465e4e5ca99fa4d1e18fe3df38 (patch) | |
| tree | e5434a5763681979d871b615b62ad226a687b78f | |
| parent | 3e5d303e59b66a1c52b14c88b24a59eb07209e5d (diff) | |
Added tests
| -rw-r--r-- | src/test-unicode.cpp | 175 | 
1 files changed, 158 insertions, 17 deletions
| diff --git a/src/test-unicode.cpp b/src/test-unicode.cpp index c793399..29e5c2e 100644 --- a/src/test-unicode.cpp +++ b/src/test-unicode.cpp @@ -53,7 +53,9 @@ std::vector<std::basic_string<char16_t>> failure_strings_char16_t {  };  std::vector<std::basic_string<char32_t>> failure_strings_char32_t { + U"\xD800 and more text", // invalid unicode (surrogate half)   U"blabla \xD801", // invalid unicode (surrogate half) + U"moreblabla \xDFFF", // invalid unicode (surrogate half)   U"\x10000000", // invalid unicode (number too big)  }; @@ -259,26 +261,44 @@ struct random_context {   std::random_device rd;  // OS random number engine to seed RNG (below)   std::mt19937 gen{rd()};   std::uniform_int_distribution<size_t> sequence_length{0, 100000}; // length of sequence: 0 ... 100000 code units + std::uniform_int_distribution<unsigned long> code_point_distribution{0, 0x10FFFF - 0x800};  }; +// generates valid and invalid strings of different type  template<typename T> -T generate_random(random_context& rc, size_t length) +T generate_random_invalid(random_context& rc, size_t length)  {   // Using unsigned long for std::uniform_int_distribution<> because it needs to be basic type according to MSVC - std::uniform_int_distribution<unsigned long> code_unit(std::numeric_limits<typename T::value_type>::max()); // code unit value + std::uniform_int_distribution<unsigned long> code_unit{0, std::numeric_limits<typename T::value_type>::max()}; // code unit value   T result;   std::generate_n(std::back_inserter(result), length, [&](){return static_cast<typename T::value_type>(code_unit(rc.gen));});   return result;  } +char32_t generate_random_char(random_context& rc) +{ + auto result {rc.code_point_distribution(rc.gen)}; + if (result >= 0xD800) +  result += 0x800; + return static_cast<char32_t>(result); +} + +std::u32string generate_random_string(random_context& rc, size_t length) +{ + std::u32string result; + std::generate_n(std::back_inserter(result), length, [&](){return generate_random_char(rc);}); + + return result; +} +  template<typename From, typename ToTypesCollectionType, size_t i = 0>  void test_random(random_context& rc, size_t length)  {   //std::cerr << "LENGTH: " << length << std::endl;   typedef typename std::tuple_element<i,ToTypesCollectionType>::type To; - From r {static_cast<From>(generate_random<From>(rc, length))}; + From r {static_cast<From>(generate_random_invalid<From>(rc, length))};   // base type interface   try { @@ -330,28 +350,139 @@ void test_random(random_context& rc, size_t length)    test_random<From, ToTypesCollectionType, i + 1>(rc, length);  } -BOOST_AUTO_TEST_CASE_TEMPLATE(random_sequences, T, types_collection_type) +BOOST_AUTO_TEST_CASE_TEMPLATE(random_sequences_invalid, T, types_collection_type)  {   random_context rc; - int i{}; - // run for 1s (debug) 10s (release) = total time for all random_sequences types! -#ifdef _DEBUG - const auto timeout{1.0s}; -#else - const auto timeout{10.0s}; -#endif + for (int i = 0; i < 10; i++) { +  test_random<T,types_collection_type>(rc, rc.sequence_length(rc.gen)); + } +} - auto timeout_stamp { std::chrono::steady_clock::now() + (timeout / std::tuple_size<types_collection_type>::value)}; +BOOST_AUTO_TEST_CASE(random_sequences_valid) +{ + random_context rc; - while (!(std::chrono::steady_clock::now() > timeout_stamp)) { -  test_random<T,types_collection_type>(rc, rc.sequence_length(rc.gen)); -  i++; + // Fill UTF-32 data list + std::vector<std::u32string> u32list; + std::generate_n(std::back_inserter(u32list), 1000, [&](){return generate_random_string(rc, rc.sequence_length(rc.gen));}); + + // Fill UTF-16 data list + std::vector<std::u16string> u16list; + std::transform(u32list.begin(), u32list.end(), std::back_inserter(u16list), [](const std::u32string& s){return unicode::convert<unicode::UTF_32, unicode::UTF_16>(s);}); + + // Fill UTF-8 data list + std::vector<std::u8string> u8list; + std::transform(u32list.begin(), u32list.end(), std::back_inserter(u8list), [](const std::u32string& s){return unicode::convert<unicode::UTF_32, unicode::UTF_8>(s);}); + + for (const auto& i : u32list) { +  std::u32string s32{unicode::convert<unicode::UTF_32, unicode::UTF_32>(i)}; +  BOOST_CHECK(s32.size() == i.size()); +  std::u16string s16{unicode::convert<unicode::UTF_32, unicode::UTF_16>(i)}; +  BOOST_CHECK(s16.size() >= i.size()); +  std::u8string s8{unicode::convert<unicode::UTF_32, unicode::UTF_8>(i)}; +  BOOST_CHECK(s8.size() >= i.size()); + } + + for (const auto& i : u16list) { +  std::u32string s32{unicode::convert<unicode::UTF_16, unicode::UTF_32>(i)}; +  BOOST_CHECK(s32.size() > 0 || i.size() == 0); +  std::u16string s16{unicode::convert<unicode::UTF_16, unicode::UTF_16>(i)}; +  BOOST_CHECK(s16.size() == i.size()); +  std::u8string s8{unicode::convert<unicode::UTF_16, unicode::UTF_8>(i)}; +  BOOST_CHECK(s8.size() >= i.size()); + } + + for (const auto& i : u8list) { +  std::u32string s32{unicode::convert<unicode::UTF_8, unicode::UTF_32>(i)}; +  BOOST_CHECK(s32.size() > 0 || i.size() == 0); +  std::u16string s16{unicode::convert<unicode::UTF_8, unicode::UTF_16>(i)}; +  BOOST_CHECK(s16.size() > 0 || i.size() == 0); +  std::u8string s8{unicode::convert<unicode::UTF_8, unicode::UTF_8>(i)}; +  BOOST_CHECK(s8.size() == i.size()); + } + + { +  // Performance test UTF-32 -> UTF-32 +  auto t0{std::chrono::steady_clock::now()}; +  for (const auto& i : u32list) { +   std::u32string s{unicode::convert<unicode::UTF_32, unicode::UTF_32>(i)}; +  } +  std::cout << "Performance test for converting 1M strings from UTF-32 to UTF-32: " << std::chrono::duration<double>(std::chrono::steady_clock::now() - t0).count() << std::endl;   } - BOOST_CHECK_MESSAGE(i > 1, "Not enough iterations done!"); + { +  // Performance test UTF-32 -> UTF-16 +  auto t0{std::chrono::steady_clock::now()}; +  for (const auto& i : u32list) { +   std::u16string s{unicode::convert<unicode::UTF_32, unicode::UTF_16>(i)}; +  } +  std::cout << "Performance test for converting 1M strings from UTF-32 to UTF-16: " << std::chrono::duration<double>(std::chrono::steady_clock::now() - t0).count() << std::endl; + } + + { +  // Performance test UTF-32 -> UTF-8 +  auto t0{std::chrono::steady_clock::now()}; +  for (const auto& i : u32list) { +   std::u8string s{unicode::convert<unicode::UTF_32, unicode::UTF_8>(i)}; +  } +  std::cout << "Performance test for converting 1M strings from UTF-32 to UTF-8: " << std::chrono::duration<double>(std::chrono::steady_clock::now() - t0).count() << std::endl; + } + + { +  // Performance test UTF-16 -> UTF-32 +  auto t0{std::chrono::steady_clock::now()}; +  for (const auto& i : u16list) { +   std::u32string s{unicode::convert<unicode::UTF_16, unicode::UTF_32>(i)}; +  } +  std::cout << "Performance test for converting 1M strings from UTF-16 to UTF-32: " << std::chrono::duration<double>(std::chrono::steady_clock::now() - t0).count() << std::endl; + } + + { +  // Performance test UTF-16 -> UTF-16 +  auto t0{std::chrono::steady_clock::now()}; +  for (const auto& i : u16list) { +   std::u16string s{unicode::convert<unicode::UTF_16, unicode::UTF_16>(i)}; +  } +  std::cout << "Performance test for converting 1M strings from UTF-16 to UTF-16: " << std::chrono::duration<double>(std::chrono::steady_clock::now() - t0).count() << std::endl; + } + + { +  // Performance test UTF-16 -> UTF-8 +  auto t0{std::chrono::steady_clock::now()}; +  for (const auto& i : u16list) { +   std::u8string s{unicode::convert<unicode::UTF_16, unicode::UTF_8>(i)}; +  } +  std::cout << "Performance test for converting 1M strings from UTF-16 to UTF-8: " << std::chrono::duration<double>(std::chrono::steady_clock::now() - t0).count() << std::endl; + } + + { +  // Performance test UTF-8 -> UTF-32 +  auto t0{std::chrono::steady_clock::now()}; +  for (const auto& i : u8list) { +   std::u32string s{unicode::convert<unicode::UTF_8, unicode::UTF_32>(i)}; +  } +  std::cout << "Performance test for converting 1M strings from UTF-8 to UTF-32: " << std::chrono::duration<double>(std::chrono::steady_clock::now() - t0).count() << std::endl; + } + + { +  // Performance test UTF-8 -> UTF-16 +  auto t0{std::chrono::steady_clock::now()}; +  for (const auto& i : u8list) { +   std::u16string s{unicode::convert<unicode::UTF_8, unicode::UTF_16>(i)}; +  } +  std::cout << "Performance test for converting 1M strings from UTF-8 to UTF-16: " << std::chrono::duration<double>(std::chrono::steady_clock::now() - t0).count() << std::endl; + } + + { +  // Performance test UTF-8 -> UTF-8 +  auto t0{std::chrono::steady_clock::now()}; +  for (const auto& i : u8list) { +   std::u8string s{unicode::convert<unicode::UTF_8, unicode::UTF_8>(i)}; +  } +  std::cout << "Performance test for converting 1M strings from UTF-8 to UTF-8: " << std::chrono::duration<double>(std::chrono::steady_clock::now() - t0).count() << std::endl; + } - std::cout << "random_sequences: Completed " << i << " iterations for long random code unit sequences for " << typeid(typename T::value_type).name() << std::endl;  }  // Test ISO and UTF encodings @@ -366,6 +497,15 @@ BOOST_AUTO_TEST_CASE(convert)   BOOST_CHECK_THROW(((void)std::string{unicode::convert<unicode::ISO_8859_15,unicode::ISO_8859_1>("\xa4")}), std::invalid_argument); // € not available in ISO-8859-1 + BOOST_CHECK_THROW(((void)std::string{unicode::convert<unicode::UTF_8,unicode::ISO_8859_1>(u8"\u20ac")}), std::invalid_argument); + BOOST_CHECK_THROW(((void)std::string{unicode::convert<unicode::UTF_16,unicode::ISO_8859_1>(u"\u20ac")}), std::invalid_argument); + BOOST_CHECK_THROW(((void)std::string{unicode::convert<unicode::UTF_32,unicode::ISO_8859_1>(U"\u20ac")}), std::invalid_argument); + BOOST_CHECK_THROW(((void)std::string{unicode::convert<unicode::UTF_8,unicode::ISO_8859_15>(u8"\u732b")}), std::invalid_argument); + BOOST_CHECK_THROW(((void)std::string{unicode::convert<unicode::UTF_16,unicode::ISO_8859_15>(u"\u732b")}), std::invalid_argument); + BOOST_CHECK_THROW(((void)std::string{unicode::convert<unicode::UTF_32,unicode::ISO_8859_15>(U"\u732b")}), std::invalid_argument); + + BOOST_CHECK_THROW((unicode::convert<unicode::UTF_32,unicode::UTF_8>(std::u32string{(char32_t*)"\x00\xD8\x00\x00\x00\x00\x00\x00"})) , std::invalid_argument); +   BOOST_CHECK((unicode::convert<unicode::UTF_8,unicode::UTF_16>(u8"abc")) == std::u16string{u"abc"});   BOOST_CHECK((unicode::convert<unicode::UTF_32,unicode::UTF_16>(U"abc")) == std::u16string{u"abc"}); @@ -431,6 +571,7 @@ BOOST_AUTO_TEST_CASE(is_valid_utf)   BOOST_CHECK(unicode::is_valid_utf<unicode::UTF_8>(u8"äöü"));  } +// check assumptions about environment  BOOST_AUTO_TEST_CASE(string_u8string)  {   std::string a{"\xc3\xa4"}; | 
