diff options
author | Roland Reichwein <mail@reichwein.it> | 2021-01-25 18:54:25 +0100 |
---|---|---|
committer | Roland Reichwein <mail@reichwein.it> | 2021-01-25 18:54:25 +0100 |
commit | 918d015302a004755ce0cf4968793cdf6a61bca8 (patch) | |
tree | a5c962b33a46249dfebf2a8506d9acedefac523c /src | |
parent | d8bddb9dc248bb3cc04116c97259ea6f5c13e6d0 (diff) |
Add first working conversion UTF-8 -> UTF-16
Diffstat (limited to 'src')
-rw-r--r-- | src/test-unicode.cpp | 15 |
1 files changed, 12 insertions, 3 deletions
diff --git a/src/test-unicode.cpp b/src/test-unicode.cpp index 4576d06..41fcd20 100644 --- a/src/test-unicode.cpp +++ b/src/test-unicode.cpp @@ -4,14 +4,23 @@ #include <string> -//#include <unicode.h> +#include <unicode.h> BOOST_AUTO_TEST_CASE(utf8_to_utf16) { std::u8string u8{u8"ascii string1"}; - //std::u16string u16{unicode::utf8_to_utf16(u8)}; + std::u16string u16{unicode::utf8_to_utf16(u8)}; - //BOOST_CHECK_EQUAL(u16, u"ascii string1"); + BOOST_CHECK(u16 == u"ascii string1"); } +// TODO: +// invalid bytes +// an unexpected continuation byte +// a non-continuation byte before the end of the character +// the string ending before the end of the character (which can happen in simple string truncation) +// an overlong encoding +// a sequence that decodes to an invalid code point +// +// high and low surrogate halves used by UTF-16 (U+D800 through U+DFFF) and code points not encodable by UTF-16 (those after U+10FFFF) |