diff options
-rw-r--r-- | Makefile | 20 | ||||
-rw-r--r-- | debian/control | 3 | ||||
-rw-r--r-- | include/unicode.h | 11 | ||||
-rw-r--r-- | src/test-unicode.cpp | 50 |
4 files changed, 80 insertions, 4 deletions
@@ -1,22 +1,38 @@ PROJECTNAME=unicode VERSION=$(shell dpkg-parsechangelog --show-field Version) -DISTROS=base #debian10 +DISTROS=base debian10 ubuntu2004 ubuntu2010 +ifeq ($(wildcard $(shell which clang++-11)),) +ifeq ($(wildcard $(shell which clang++)),) +$(error No clang++-11 nor clang++ available!) +else +CXX=clang++ +endif +else CXX=clang++-11 # GCC is buggy: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=85282 #CXX=g++-10 +endif STANDARD=c++17 #STANDARD=c++20 +ifeq ($(CXXFLAGS),) CXXFLAGS=-O0 -g -D_DEBUG #CXXFLAGS=-O2 -DNDEBUG +endif CXXFLAGS+=-Wall -Iinclude -std=$(STANDARD) ifeq ($(CXX),clang++-11) COMPILER_SUITE=clang +LIBS+=-fuse-ld=lld-11 +endif + +ifeq ($(CXX),clang++) +COMPILER_SUITE=clang +LIBS+=-fuse-ld=lld endif ifeq ($(COMPILER_SUITE),clang) @@ -31,7 +47,6 @@ LDLIBS+=\ ifeq ($(COMPILER_SUITE),clang) LIBS+= \ --fuse-ld=lld-11 \ -lc++ \ -lc++abi #-lc++fs @@ -68,6 +83,7 @@ dep: $(SRC:.cpp=.d) clean: -rm -f src/recode src/test-unicode + -rm -rf result -find . -name '*.o' -o -name '*.d' -o -name '*.gcno' -o -name '*.gcda' | xargs rm -f install: diff --git a/debian/control b/debian/control index 42b6c22..1572512 100644 --- a/debian/control +++ b/debian/control @@ -16,3 +16,6 @@ Description: Unicode conversion library . Features: - Additional support for ISO-8859-1 encoding (Latin-1) as subset of Unicode + - Additional support for ISO-8859-15 + - Tested on Debian 10, Ubuntu 2004, Ubuntu 2010 + - C++17 and C++20 compatible diff --git a/include/unicode.h b/include/unicode.h index 9e0132b..f31cbac 100644 --- a/include/unicode.h +++ b/include/unicode.h @@ -339,5 +339,16 @@ namespace unicode { return result; } + template<typename T> + bool is_valid_utf(const std::basic_string<T>& s) + { + try { + std::for_each(utf_begin<T>(s), utf_end<T>(s), [](const T& c){}); + } catch(...) { + return false; + } + return true; + } + } // namespace unicode diff --git a/src/test-unicode.cpp b/src/test-unicode.cpp index 05370c7..3d67124 100644 --- a/src/test-unicode.cpp +++ b/src/test-unicode.cpp @@ -37,7 +37,7 @@ std::vector<types_collection_type> success_sets { std::vector<std::basic_string<utf8_t>> failure_strings_char8_t { u8"\x80", // utf-8 continuation byte u8"\x81", // utf-8 continuation byte - u8"\xc3ä", // initial byte of utf-8 "ä", followed by valid utf-8 "ä" + u8"\xc3\xc3\xa4", // initial byte of utf-8 "ä", followed by valid utf-8 "ä" u8"\xF8\x80\x80\x80\x80", // overlong encoding u8"\xF7\xBF\xBF\xBF", // valid encoding of invalid code point }; @@ -56,6 +56,7 @@ std::vector<std::basic_string<char32_t>> failure_strings_char32_t { // output operators must be in same namespace as the type itself namespace std { +#ifdef __cpp_char8_t std::ostream& operator<<(std::ostream& os, std::basic_string<utf8_t> const& s) { os << "["; @@ -65,6 +66,7 @@ std::ostream& operator<<(std::ostream& os, std::basic_string<utf8_t> const& s) return os; } +#endif std::ostream& operator<<(std::ostream& os, std::basic_string<char16_t> const& s) { @@ -118,6 +120,27 @@ BOOST_AUTO_TEST_CASE(utf_to_utf_success) test_utf_to_utf(t); } +template<size_t i = 0, typename... Ts> +void test_is_valid_utf(std::tuple<Ts...>& t) +{ + typedef typename std::tuple_element<i,typename std::remove_reference<decltype(t)>::type>::type T; + + // test + bool result { unicode::is_valid_utf<typename T::value_type>(std::get<i>(t)) }; + + BOOST_CHECK_MESSAGE(result == true, "is_valid_utf w/ " << typeid(T).name() << "(" << i << ", " << std::get<i>(t) << "), got " << result); + + // iterate over other combinations + if constexpr (i + 1 < std::tuple_size<typename std::remove_reference<decltype(t)>::type>::value) + test_is_valid_utf<i + 1>(t); +} + +BOOST_AUTO_TEST_CASE(is_valid_utf_success) +{ + for (auto& t: success_sets) + test_is_valid_utf(t); +} + // iterate over std::tuple T types template<typename From, typename Collection, size_t index = 0> void test_utf_to_utf_failure(std::basic_string<From>& s) @@ -126,7 +149,7 @@ void test_utf_to_utf_failure(std::basic_string<From>& s) try { unicode::utf_to_utf<From,To>(s); - BOOST_FAIL("Expected exception at index: " << index << ", " << typeid(From).name() << " -> " << typeid(To).name()); + BOOST_ERROR("Expected exception at index: " << index << ", " << typeid(From).name() << " -> " << typeid(To).name()); } catch (...) { // OK }; @@ -148,6 +171,29 @@ BOOST_AUTO_TEST_CASE(utf_to_utf_failure) test_utf_to_utf_failure<typename std::remove_reference<decltype(s)>::type::value_type, types_collection_type>(s); } +// iterate over std::tuple T types +template<typename T, typename Collection, size_t index = 0> +void test_is_valid_utf_failure(std::basic_string<T>& s) +{ + BOOST_CHECK_MESSAGE(unicode::is_valid_utf<T>(s) == false, "Expected bad UTF at index: " << index << ", " << typeid(T).name()); + + // iterate over remaining types + if constexpr (index + 1 < std::tuple_size<Collection>::value) + test_is_valid_utf_failure<T, Collection, index + 1>(s); +} + +BOOST_AUTO_TEST_CASE(is_valid_utf_failure) +{ + for (auto& s: failure_strings_char8_t) + test_is_valid_utf_failure<typename std::remove_reference<decltype(s)>::type::value_type, types_collection_type>(s); + + for (auto& s: failure_strings_char16_t) + test_is_valid_utf_failure<typename std::remove_reference<decltype(s)>::type::value_type, types_collection_type>(s); + + for (auto& s: failure_strings_char32_t) + test_is_valid_utf_failure<typename std::remove_reference<decltype(s)>::type::value_type, types_collection_type>(s); +} + BOOST_AUTO_TEST_CASE(is_valid_unicode) { BOOST_CHECK(unicode::is_valid_unicode('\0')); |