diff options
-rw-r--r-- | debian/changelog | 6 | ||||
-rw-r--r-- | include/unicode.h | 4 | ||||
-rw-r--r-- | src/recode.cpp | 21 | ||||
-rw-r--r-- | src/test-unicode.cpp | 22 |
4 files changed, 39 insertions, 14 deletions
diff --git a/debian/changelog b/debian/changelog index 82b8c15..b6d362d 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,9 @@ +unicode (1.5) unstable; urgency=medium + + * Fixed C++20 version and tests + + -- Roland Reichwein <mail@reichwein.it> Sun, 05 Dec 2021 20:20:35 +0100 + unicode (1.3) unstable; urgency=medium * Include msbuild project files diff --git a/include/unicode.h b/include/unicode.h index 33b3199..36fc6ae 100644 --- a/include/unicode.h +++ b/include/unicode.h @@ -351,8 +351,8 @@ namespace unicode::detail { typename utf_back_insert_iterator::string_type& s; }; - typedef std::unordered_map<utf8_t, char32_t> iso_map_type; - typedef std::unordered_map<char32_t, utf8_t> iso_map_type_reverse; + typedef std::unordered_map<iso_t, char32_t> iso_map_type; + typedef std::unordered_map<char32_t, iso_t> iso_map_type_reverse; // ISO-8859-1 is lower 8-bit of Unicode, so no exceptions necessary static inline iso_map_type iso_8859_1_map; diff --git a/src/recode.cpp b/src/recode.cpp index 8145fb8..89bd69b 100644 --- a/src/recode.cpp +++ b/src/recode.cpp @@ -4,7 +4,9 @@ #include <boost/algorithm/string/predicate.hpp> #include <boost/endian/conversion.hpp> +#include <boost/version.hpp> +#include <algorithm> #include <filesystem> #include <functional> #include <iostream> @@ -58,10 +60,27 @@ std::string get_id() return get_id(std::string{typeid(From).name()}, typeid(To).name()); } +// workaround for broken boost::endian::endian_reverse_inplace for C++20 in boost 1.74 +template<typename T> +void reverse_endian_inplace(T& c) +{ + size_t size{sizeof(T)}; + uint8_t* p{reinterpret_cast<uint8_t*>(&c)}; + for (int i = 0; i < size / 2; i++) { + std::swap(p[i], p[size - 1 - i]); + } +} + template<typename T> void reverse_endian(std::basic_string<T>& s) { - std::for_each(s.begin(), s.end(), [](T& c){boost::endian::endian_reverse_inplace(c);}); + std::for_each(s.begin(), s.end(), [](T& c){ +#if BOOST_VERSION > 107400 + boost::endian::endian_reverse_inplace(c); +#else + reverse_endian_inplace(c); +#endif + }); } std::unordered_map<std::string, std::function<std::string(const std::string&, bool, bool)>> convert_map {}; diff --git a/src/test-unicode.cpp b/src/test-unicode.cpp index 9c68c59..b5b48f0 100644 --- a/src/test-unicode.cpp +++ b/src/test-unicode.cpp @@ -37,13 +37,13 @@ std::vector<types_collection_type> success_sets { }; // Error cases: throwing upon convert to all other types -std::vector<std::basic_string<iso_t>> failure_strings_char8_t { +std::vector<std::basic_string<utf8_t>> failure_strings_char8_t { // using u8"" here doesn't work on MSVC - "\x80", // utf-8 continuation byte - "\x81", // utf-8 continuation byte - "\xc3\xc3\xa4", // initial byte of utf-8 "ä", followed by valid utf-8 "ä" - "\xF8\x80\x80\x80\x80", // overlong encoding - "\xF7\xBF\xBF\xBF", // valid encoding of invalid code point + u8"\x80", // utf-8 continuation byte + u8"\x81", // utf-8 continuation byte + u8"\xc3\xc3\xa4", // initial byte of utf-8 "ä", followed by valid utf-8 "ä" + u8"\xF8\x80\x80\x80\x80", // overlong encoding + u8"\xF7\xBF\xBF\xBF", // valid encoding of invalid code point }; std::vector<std::basic_string<char16_t>> failure_strings_char16_t { @@ -367,10 +367,10 @@ BOOST_AUTO_TEST_CASE(convert) BOOST_CHECK((unicode::convert<unicode::UTF_8,unicode::UTF_16>(u8"abc")) == std::u16string{u"abc"}); BOOST_CHECK((unicode::convert<unicode::UTF_32,unicode::UTF_16>(U"abc")) == std::u16string{u"abc"}); - BOOST_CHECK((unicode::convert<utf8_t,char16_t>("abc")) == std::u16string{u"abc"}); + BOOST_CHECK((unicode::convert<utf8_t,char16_t>(u8"abc")) == std::u16string{u"abc"}); BOOST_CHECK((unicode::convert<char32_t,char16_t>(U"abc")) == std::u16string{u"abc"}); - BOOST_CHECK((unicode::convert<char, char32_t>(u8"äöü")) == std::u32string{U"äöü"}); + BOOST_CHECK((unicode::convert<char, char32_t>("äöü")) == std::u32string{U"äöü"}); #ifdef _WIN32 BOOST_CHECK(sizeof(wchar_t) == 2); @@ -381,9 +381,9 @@ BOOST_AUTO_TEST_CASE(convert) // For the following checks, wchar_t size and encoding is system dependent: // Windows: UTF-16 // Linux: UTF-32 - BOOST_CHECK((unicode::convert<char, wchar_t>(u8"äöü")) == std::wstring{L"äöü"}); - BOOST_CHECK((unicode::convert<char, wchar_t>(u8"\u732b")) == std::wstring{L"\u732b"}); - BOOST_CHECK((unicode::convert<char, wchar_t>(u8"\U0001F63A")) == std::wstring{L"\U0001F63A"}); + BOOST_CHECK((unicode::convert<char, wchar_t>("äöü")) == std::wstring{L"äöü"}); + BOOST_CHECK((unicode::convert<char, wchar_t>("\u732b")) == std::wstring{L"\u732b"}); + BOOST_CHECK((unicode::convert<char, wchar_t>("\U0001F63A")) == std::wstring{L"\U0001F63A"}); BOOST_CHECK((unicode::convert<wchar_t, char32_t>(L"\U0001F63A")) == std::u32string{U"\U0001F63A"}); BOOST_CHECK((unicode::convert<wchar_t, utf8_t>(L"\U0001F63A")) == std::u8string{u8"\U0001F63A"}); |