diff options
author | Roland Reichwein <mail@reichwein.it> | 2022-12-15 11:01:37 +0100 |
---|---|---|
committer | Roland Reichwein <mail@reichwein.it> | 2022-12-15 11:01:37 +0100 |
commit | 55d1d3612141ef1fe858b2bccb950da51cfe7a17 (patch) | |
tree | 50cc3a90dd5d3eea8e4f1020da165e2bc31e0556 | |
parent | 53e50b9fd7f051d039e40f77fa00498dd9a8e2b0 (diff) |
-rw-r--r-- | Makefile | 12 | ||||
-rw-r--r-- | debian/changelog | 6 | ||||
-rw-r--r-- | include/unicode/utf.h | 8 | ||||
-rw-r--r-- | src/test-unicode.cpp | 9 |
4 files changed, 31 insertions, 4 deletions
@@ -14,6 +14,7 @@ ifeq ($(DEBIANVERSION),10) CXX=g++-8 else +ifeq ($(wildcard $(shell which clang++-14)),) ifeq ($(wildcard $(shell which clang++-13)),) ifeq ($(wildcard $(shell which clang++-12)),) ifeq ($(wildcard $(shell which clang++-11)),) @@ -31,6 +32,9 @@ endif else CXX=clang++-13 endif +else +CXX=clang++-14 +endif endif @@ -41,6 +45,14 @@ ifeq ($(CXXFLAGS),) CXXFLAGS=-O2 -DNDEBUG endif +ifeq ($(CXX),clang++-14) +ifeq ($(ONDEBIAN),yes) +COMPILER_SUITE=clang +LIBS+=-fuse-ld=lld-14 +STANDARD=c++20 +endif +endif + ifeq ($(CXX),clang++-13) ifeq ($(ONDEBIAN),yes) COMPILER_SUITE=clang diff --git a/debian/changelog b/debian/changelog index e886d47..114bd57 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,9 @@ +unicode (1.10) unstable; urgency=medium + + * Validation bugfix, tests + + -- Roland Reichwein <mail@reichwein.it> Thu, 15 Dec 2022 10:54:15 +0100 + unicode (1.9) unstable; urgency=medium * Optimizations for validation diff --git a/include/unicode/utf.h b/include/unicode/utf.h index 691d4ba..0738242 100644 --- a/include/unicode/utf.h +++ b/include/unicode/utf.h @@ -58,13 +58,13 @@ namespace unicode::detail { while (i < size) { if (is_utf8_sequence(s[i])) { i++; - } else if ((i < size - 1) && is_utf8_sequence(s[i], s[i + 1])) { + } else if ((i + 1 < size) && is_utf8_sequence(s[i], s[i + 1])) { i += 2; - } else if ((i < size - 2) && is_utf8_sequence(s[i], s[i + 1], s[i + 2])) { + } else if ((i + 2 < size) && is_utf8_sequence(s[i], s[i + 1], s[i + 2])) { if (((s[i] & 0xF) == 0xD) && ((s[i + 1] & 0x20) == 0x20)) return false; // Reserved for UTF-16 surrogates: 0xD800..0xDFFF i += 3; - } else if ((i < size - 3) && is_utf8_sequence(s[i], s[i + 1], s[i + 2], s[i + 3])) { + } else if ((i + 3 < size) && is_utf8_sequence(s[i], s[i + 1], s[i + 2], s[i + 3])) { if ((((s[i] & 7) << 2) | ((s[i + 1] >> 4) & 3)) >= 0x11) return false; // Unicode too big above 0x10FFFF i += 4; @@ -101,7 +101,7 @@ namespace unicode::detail { while (i < size) { if (is_utf16_sequence(s[i])) { i++; - } else if ((i < size - 1) && is_utf16_sequence(s[i], s[i + 1])) { + } else if ((i + 1 < size) && is_utf16_sequence(s[i], s[i + 1])) { i += 2; } else #if __cplusplus >= 202002L diff --git a/src/test-unicode.cpp b/src/test-unicode.cpp index 948dbcc..0b5ced7 100644 --- a/src/test-unicode.cpp +++ b/src/test-unicode.cpp @@ -141,6 +141,15 @@ std::vector<std::basic_string<utf8_t>> failure_strings_char8_t { (utf8_t*)"text1\xc3\xc3\xa4text3", (utf8_t*)"text1\xc3text2\xc3\xa4", + (utf8_t*)"\xff", + (utf8_t*)"\xff\xff", + (utf8_t*)"\xff\xff\xff", + (utf8_t*)"\xff\xff\xff\xff", + (utf8_t*)"\xff\xff\xff\xff\xff", + (utf8_t*)"\xff\xff\xff\xff\xff\xff", + (utf8_t*)"\xff\xff\xff\xff\xff\xff\xff", + (utf8_t*)"\xff\xff\xff\xff\xff\xff\xff\xff", + (utf8_t*)"\xF8\x80\x80\x80\x80", // overlong encoding of valid code point (utf8_t*)"text1\xF8\x80\x80\x80\x80text2", (utf8_t*)"\xF8\x80\x80\x80\x80text2", |