diff options
Diffstat (limited to 'src/test-performance.cpp')
-rw-r--r-- | src/test-performance.cpp | 230 |
1 files changed, 230 insertions, 0 deletions
diff --git a/src/test-performance.cpp b/src/test-performance.cpp new file mode 100644 index 0000000..629aadd --- /dev/null +++ b/src/test-performance.cpp @@ -0,0 +1,230 @@ +#define BOOST_TEST_MODULE unicode_test + +#include <boost/locale.hpp> +#include <boost/test/included/unit_test.hpp> +#include <boost/test/data/dataset.hpp> +#include <boost/test/data/monomorphic.hpp> +#include <boost/test/data/test_case.hpp> +#include <boost/version.hpp> +#if BOOST_VERSION > 106700 +// CPU Timer in Debian 10 boost is broken, so leave it to std::chrono wall clock +#include <boost/timer/timer.hpp> +#endif + +#include <array> +#include <chrono> +#include <codecvt> +#include <deque> +#include <exception> +#include <limits> +#include <list> +#include <locale> +#include <random> +#include <string> +#include <tuple> +#include <type_traits> +#include <vector> + +#include <unicode.h> + +#include "test-helper.h" + +using namespace std::chrono_literals; +using namespace std::string_literals; + +typedef std::tuple<std::basic_string<utf8_t>, std::basic_string<char16_t>, std::basic_string<char32_t>> types_collection_type; + +struct random_context { + random_context(int max_value = 0x10FFFF - 0x800): code_point_distribution(0, max_value) {} + std::random_device rd; // OS random number engine to seed RNG (below) + std::mt19937 gen{rd()}; + std::uniform_int_distribution<size_t> sequence_length{0, 100000}; // length of sequence: 0 ... 100000 code units + std::uniform_int_distribution<unsigned long> code_point_distribution; +}; + +// generates valid and invalid strings of different type +template<typename T> +T generate_random_invalid(random_context& rc, size_t length) +{ + // Using unsigned long for std::uniform_int_distribution<> because it needs to be basic type according to MSVC + std::uniform_int_distribution<unsigned long> code_unit{0, std::numeric_limits<typename T::value_type>::max()}; // code unit value + T result; + std::generate_n(std::back_inserter(result), length, [&](){return static_cast<typename T::value_type>(code_unit(rc.gen));}); + + return result; +} + +char32_t generate_random_char(random_context& rc) +{ + auto result {rc.code_point_distribution(rc.gen)}; + if (result >= 0xD800) + result += 0x800; + return static_cast<char32_t>(result); +} + +std::u32string generate_random_string(random_context& rc, size_t length) +{ + std::u32string result; + std::generate_n(std::back_inserter(result), length, [&](){return generate_random_char(rc);}); + + return result; +} + +template<typename From, typename ToTypesCollectionType, size_t i = 0> +void test_random_invalid(random_context& rc, size_t length) +{ + //std::cerr << "LENGTH: " << length << std::endl; + typedef typename std::tuple_element<i,ToTypesCollectionType>::type To; + + From r {static_cast<From>(generate_random_invalid<From>(rc, length))}; + + // base type interface + try { + To result{unicode::convert<typename From::value_type,typename To::value_type>(r)}; + + if (r.empty()) { + BOOST_CHECK(result.empty()); + } else { + BOOST_CHECK(!result.empty()); + } + } catch (const std::invalid_argument&) { + // OK: this is an expected exception for convert() on bad input + } catch (const std::exception& ex) { + BOOST_ERROR("Unexpected error on convert(): " << ex.what()); + } + + // container type interface + try { + To result{unicode::convert<From, To>(r)}; + + if (r.empty()) { + BOOST_CHECK(result.empty()); + } else { + BOOST_CHECK(!result.empty()); + } + } catch (const std::invalid_argument&) { + // OK: this is an expected exception for convert() on bad input + } catch (const std::exception& ex) { + BOOST_ERROR("Unexpected error on convert(): " << ex.what()); + } + + // encoding interface + try { + To result{unicode::convert<typename unicode::Encoding_t<typename From::value_type>,typename unicode::Encoding_t<typename To::value_type>>(r)}; + + if (r.empty()) { + BOOST_CHECK(result.empty()); + } else { + BOOST_CHECK(!result.empty()); + } + } catch (const std::invalid_argument&) { + // OK: this is an expected exception for convert() on bad input + } catch (const std::exception& ex) { + BOOST_ERROR("Unexpected error on convert(): " << ex.what()); + } + + // iterate over remaining To types + if constexpr (i + 1 < std::tuple_size<ToTypesCollectionType>::value) + test_random_invalid<From, ToTypesCollectionType, i + 1>(rc, length); +} + +BOOST_AUTO_TEST_CASE_TEMPLATE(random_sequences_invalid, T, types_collection_type) +{ + random_context rc; + + for (int i = 0; i < 10; i++) { + test_random_invalid<T,types_collection_type>(rc, rc.sequence_length(rc.gen)); + } +} + +class CPUTimer +{ +public: + CPUTimer(const std::string& name = "Timer"): mName(name), mWallTime0(std::chrono::steady_clock::now()) + { + } + + ~CPUTimer() + { +#if BOOST_VERSION > 106700 + auto elapsed_cpu{mCPUTimer.elapsed()}; +#endif + std::cout << mName << ": " << std::chrono::duration<double>(std::chrono::steady_clock::now() - mWallTime0).count() << + "s" << +#if BOOST_VERSION > 106700 + " (" << (double(elapsed_cpu.user + elapsed_cpu.system) / 1000000000) << "s CPU)" << +#endif + std::endl; + } + +private: + std::string mName; + std::chrono::time_point<std::chrono::steady_clock> mWallTime0; +#if BOOST_VERSION > 106700 + boost::timer::cpu_timer mCPUTimer; +#endif +}; + +template<typename From, typename ToTypesCollectionType, size_t index = 0> +void test_random_valid(random_context& rc, size_t length, const std::string& description) +{ + typedef typename std::tuple_element<index,ToTypesCollectionType>::type To; + + // Fill UTF-32 data list: source for tests + std::vector<std::u32string> u32list; + std::generate_n(std::back_inserter(u32list), 1000, [&](){return generate_random_string(rc, rc.sequence_length(rc.gen));}); + + // Fill From data list + std::vector<From> list; + std::transform(u32list.begin(), u32list.end(), std::back_inserter(list), [](const std::u32string& s){ + return unicode::convert<unicode::UTF_32, typename unicode::Encoding_t<typename From::value_type>>(s); + }); + + for (size_t i = 0; i < list.size(); i++) { + BOOST_CHECK(list[i].size() >= u32list[i].size()); + To result{unicode::convert<typename unicode::Encoding_t<typename From::value_type>,typename unicode::Encoding_t<typename To::value_type>>(list[i])}; + BOOST_CHECK(result.size() >= u32list[i].size()); + auto boost_result{boost::locale::conv::utf_to_utf<typename To::value_type, typename From::value_type>(list[i])}; + BOOST_CHECK_EQUAL(result, boost_result); + } + + { + CPUTimer timer("Performance test for converting "s + std::to_string(list.size()) + + " "s + description + + " from UTF-"s + std::to_string(sizeof(typename From::value_type) * 8) + + " to UTF-"s + std::to_string(sizeof(typename To::value_type) * 8)); + for (const auto& i: list) + To result{unicode::convert<typename unicode::Encoding_t<typename From::value_type>,typename unicode::Encoding_t<typename To::value_type>>(i)}; + } + + { + CPUTimer timer(" -> Compare to boost::locale::conv::utf_to_utf"); + for (const auto& i: list) + To result{boost::locale::conv::utf_to_utf<typename To::value_type, typename From::value_type>(i)}; + } + + { + CPUTimer timer(" -> Compare to std::wstring_convert"); + for (const auto& i: list) + To result{std_convert<typename From::value_type, typename To::value_type>(i)}; + } + + // iterate over remaining To types + if constexpr (index + 1 < std::tuple_size<ToTypesCollectionType>::value) + test_random_valid<From, ToTypesCollectionType, index + 1>(rc, length, description); +} + +BOOST_AUTO_TEST_CASE_TEMPLATE(random_sequences_valid_ascii, T, types_collection_type) +{ + random_context rc{127}; + + test_random_valid<T,types_collection_type>(rc, rc.sequence_length(rc.gen), "ASCII only strings"); +} + +BOOST_AUTO_TEST_CASE_TEMPLATE(random_sequences_valid_all_unicode, T, types_collection_type) +{ + random_context rc; + + test_random_valid<T,types_collection_type>(rc, rc.sequence_length(rc.gen), "All Unicode strings"); +} + |