summaryrefslogtreecommitdiffhomepage
path: root/src/test-performance.cpp
diff options
context:
space:
mode:
authorRoland Reichwein <mail@reichwein.it>2022-01-02 19:02:38 +0100
committerRoland Reichwein <mail@reichwein.it>2022-01-02 19:02:38 +0100
commitaa2edab739e3daac42cd5dbb44d10234ad880626 (patch)
treea89ff831861346ffb4ece6b0ca770c8be8315770 /src/test-performance.cpp
parentc3124ce89b53e0b1ff3b666aeee9d1829b74229c (diff)
Separated unit tests and performance tests
Diffstat (limited to 'src/test-performance.cpp')
-rw-r--r--src/test-performance.cpp230
1 files changed, 230 insertions, 0 deletions
diff --git a/src/test-performance.cpp b/src/test-performance.cpp
new file mode 100644
index 0000000..629aadd
--- /dev/null
+++ b/src/test-performance.cpp
@@ -0,0 +1,230 @@
+#define BOOST_TEST_MODULE unicode_test
+
+#include <boost/locale.hpp>
+#include <boost/test/included/unit_test.hpp>
+#include <boost/test/data/dataset.hpp>
+#include <boost/test/data/monomorphic.hpp>
+#include <boost/test/data/test_case.hpp>
+#include <boost/version.hpp>
+#if BOOST_VERSION > 106700
+// CPU Timer in Debian 10 boost is broken, so leave it to std::chrono wall clock
+#include <boost/timer/timer.hpp>
+#endif
+
+#include <array>
+#include <chrono>
+#include <codecvt>
+#include <deque>
+#include <exception>
+#include <limits>
+#include <list>
+#include <locale>
+#include <random>
+#include <string>
+#include <tuple>
+#include <type_traits>
+#include <vector>
+
+#include <unicode.h>
+
+#include "test-helper.h"
+
+using namespace std::chrono_literals;
+using namespace std::string_literals;
+
+typedef std::tuple<std::basic_string<utf8_t>, std::basic_string<char16_t>, std::basic_string<char32_t>> types_collection_type;
+
+struct random_context {
+ random_context(int max_value = 0x10FFFF - 0x800): code_point_distribution(0, max_value) {}
+ std::random_device rd; // OS random number engine to seed RNG (below)
+ std::mt19937 gen{rd()};
+ std::uniform_int_distribution<size_t> sequence_length{0, 100000}; // length of sequence: 0 ... 100000 code units
+ std::uniform_int_distribution<unsigned long> code_point_distribution;
+};
+
+// generates valid and invalid strings of different type
+template<typename T>
+T generate_random_invalid(random_context& rc, size_t length)
+{
+ // Using unsigned long for std::uniform_int_distribution<> because it needs to be basic type according to MSVC
+ std::uniform_int_distribution<unsigned long> code_unit{0, std::numeric_limits<typename T::value_type>::max()}; // code unit value
+ T result;
+ std::generate_n(std::back_inserter(result), length, [&](){return static_cast<typename T::value_type>(code_unit(rc.gen));});
+
+ return result;
+}
+
+char32_t generate_random_char(random_context& rc)
+{
+ auto result {rc.code_point_distribution(rc.gen)};
+ if (result >= 0xD800)
+ result += 0x800;
+ return static_cast<char32_t>(result);
+}
+
+std::u32string generate_random_string(random_context& rc, size_t length)
+{
+ std::u32string result;
+ std::generate_n(std::back_inserter(result), length, [&](){return generate_random_char(rc);});
+
+ return result;
+}
+
+template<typename From, typename ToTypesCollectionType, size_t i = 0>
+void test_random_invalid(random_context& rc, size_t length)
+{
+ //std::cerr << "LENGTH: " << length << std::endl;
+ typedef typename std::tuple_element<i,ToTypesCollectionType>::type To;
+
+ From r {static_cast<From>(generate_random_invalid<From>(rc, length))};
+
+ // base type interface
+ try {
+ To result{unicode::convert<typename From::value_type,typename To::value_type>(r)};
+
+ if (r.empty()) {
+ BOOST_CHECK(result.empty());
+ } else {
+ BOOST_CHECK(!result.empty());
+ }
+ } catch (const std::invalid_argument&) {
+ // OK: this is an expected exception for convert() on bad input
+ } catch (const std::exception& ex) {
+ BOOST_ERROR("Unexpected error on convert(): " << ex.what());
+ }
+
+ // container type interface
+ try {
+ To result{unicode::convert<From, To>(r)};
+
+ if (r.empty()) {
+ BOOST_CHECK(result.empty());
+ } else {
+ BOOST_CHECK(!result.empty());
+ }
+ } catch (const std::invalid_argument&) {
+ // OK: this is an expected exception for convert() on bad input
+ } catch (const std::exception& ex) {
+ BOOST_ERROR("Unexpected error on convert(): " << ex.what());
+ }
+
+ // encoding interface
+ try {
+ To result{unicode::convert<typename unicode::Encoding_t<typename From::value_type>,typename unicode::Encoding_t<typename To::value_type>>(r)};
+
+ if (r.empty()) {
+ BOOST_CHECK(result.empty());
+ } else {
+ BOOST_CHECK(!result.empty());
+ }
+ } catch (const std::invalid_argument&) {
+ // OK: this is an expected exception for convert() on bad input
+ } catch (const std::exception& ex) {
+ BOOST_ERROR("Unexpected error on convert(): " << ex.what());
+ }
+
+ // iterate over remaining To types
+ if constexpr (i + 1 < std::tuple_size<ToTypesCollectionType>::value)
+ test_random_invalid<From, ToTypesCollectionType, i + 1>(rc, length);
+}
+
+BOOST_AUTO_TEST_CASE_TEMPLATE(random_sequences_invalid, T, types_collection_type)
+{
+ random_context rc;
+
+ for (int i = 0; i < 10; i++) {
+ test_random_invalid<T,types_collection_type>(rc, rc.sequence_length(rc.gen));
+ }
+}
+
+class CPUTimer
+{
+public:
+ CPUTimer(const std::string& name = "Timer"): mName(name), mWallTime0(std::chrono::steady_clock::now())
+ {
+ }
+
+ ~CPUTimer()
+ {
+#if BOOST_VERSION > 106700
+ auto elapsed_cpu{mCPUTimer.elapsed()};
+#endif
+ std::cout << mName << ": " << std::chrono::duration<double>(std::chrono::steady_clock::now() - mWallTime0).count() <<
+ "s" <<
+#if BOOST_VERSION > 106700
+ " (" << (double(elapsed_cpu.user + elapsed_cpu.system) / 1000000000) << "s CPU)" <<
+#endif
+ std::endl;
+ }
+
+private:
+ std::string mName;
+ std::chrono::time_point<std::chrono::steady_clock> mWallTime0;
+#if BOOST_VERSION > 106700
+ boost::timer::cpu_timer mCPUTimer;
+#endif
+};
+
+template<typename From, typename ToTypesCollectionType, size_t index = 0>
+void test_random_valid(random_context& rc, size_t length, const std::string& description)
+{
+ typedef typename std::tuple_element<index,ToTypesCollectionType>::type To;
+
+ // Fill UTF-32 data list: source for tests
+ std::vector<std::u32string> u32list;
+ std::generate_n(std::back_inserter(u32list), 1000, [&](){return generate_random_string(rc, rc.sequence_length(rc.gen));});
+
+ // Fill From data list
+ std::vector<From> list;
+ std::transform(u32list.begin(), u32list.end(), std::back_inserter(list), [](const std::u32string& s){
+ return unicode::convert<unicode::UTF_32, typename unicode::Encoding_t<typename From::value_type>>(s);
+ });
+
+ for (size_t i = 0; i < list.size(); i++) {
+ BOOST_CHECK(list[i].size() >= u32list[i].size());
+ To result{unicode::convert<typename unicode::Encoding_t<typename From::value_type>,typename unicode::Encoding_t<typename To::value_type>>(list[i])};
+ BOOST_CHECK(result.size() >= u32list[i].size());
+ auto boost_result{boost::locale::conv::utf_to_utf<typename To::value_type, typename From::value_type>(list[i])};
+ BOOST_CHECK_EQUAL(result, boost_result);
+ }
+
+ {
+ CPUTimer timer("Performance test for converting "s + std::to_string(list.size()) +
+ " "s + description +
+ " from UTF-"s + std::to_string(sizeof(typename From::value_type) * 8) +
+ " to UTF-"s + std::to_string(sizeof(typename To::value_type) * 8));
+ for (const auto& i: list)
+ To result{unicode::convert<typename unicode::Encoding_t<typename From::value_type>,typename unicode::Encoding_t<typename To::value_type>>(i)};
+ }
+
+ {
+ CPUTimer timer(" -> Compare to boost::locale::conv::utf_to_utf");
+ for (const auto& i: list)
+ To result{boost::locale::conv::utf_to_utf<typename To::value_type, typename From::value_type>(i)};
+ }
+
+ {
+ CPUTimer timer(" -> Compare to std::wstring_convert");
+ for (const auto& i: list)
+ To result{std_convert<typename From::value_type, typename To::value_type>(i)};
+ }
+
+ // iterate over remaining To types
+ if constexpr (index + 1 < std::tuple_size<ToTypesCollectionType>::value)
+ test_random_valid<From, ToTypesCollectionType, index + 1>(rc, length, description);
+}
+
+BOOST_AUTO_TEST_CASE_TEMPLATE(random_sequences_valid_ascii, T, types_collection_type)
+{
+ random_context rc{127};
+
+ test_random_valid<T,types_collection_type>(rc, rc.sequence_length(rc.gen), "ASCII only strings");
+}
+
+BOOST_AUTO_TEST_CASE_TEMPLATE(random_sequences_valid_all_unicode, T, types_collection_type)
+{
+ random_context rc;
+
+ test_random_valid<T,types_collection_type>(rc, rc.sequence_length(rc.gen), "All Unicode strings");
+}
+