Add first working conversion UTF-8 -> UTF-16

author: Roland Reichwein <mail@reichwein.it> 2021-01-25 18:54:25 +0100
committer: Roland Reichwein <mail@reichwein.it> 2021-01-25 18:54:25 +0100
commit: 918d015302a004755ce0cf4968793cdf6a61bca8 (patch)
tree: a5c962b33a46249dfebf2a8506d9acedefac523c /src
parent: d8bddb9dc248bb3cc04116c97259ea6f5c13e6d0 (diff)
1 files changed, 12 insertions, 3 deletions
diff --git a/src/test-unicode.cpp b/src/test-unicode.cpp
index 4576d06..41fcd20 100644
--- a/src/test-unicode.cpp
+++ b/src/test-unicode.cpp
@@ -4,14 +4,23 @@
 
 #include <string>
 
-//#include <unicode.h>
+#include <unicode.h>
 
 BOOST_AUTO_TEST_CASE(utf8_to_utf16)
 {
  std::u8string u8{u8"ascii string1"};
  
- //std::u16string u16{unicode::utf8_to_utf16(u8)};
+ std::u16string u16{unicode::utf8_to_utf16(u8)};
 
- //BOOST_CHECK_EQUAL(u16, u"ascii string1");
+ BOOST_CHECK(u16 == u"ascii string1");
 }
 
+// TODO:
+//  invalid bytes
+//  an unexpected continuation byte
+//  a non-continuation byte before the end of the character
+//  the string ending before the end of the character (which can happen in simple string truncation)
+//  an overlong encoding
+//  a sequence that decodes to an invalid code point
+//
+//  high and low surrogate halves used by UTF-16 (U+D800 through U+DFFF) and code points not encodable by UTF-16 (those after U+10FFFF)
author	Roland Reichwein <mail@reichwein.it>	2021-01-25 18:54:25 +0100
committer	Roland Reichwein <mail@reichwein.it>	2021-01-25 18:54:25 +0100
commit	918d015302a004755ce0cf4968793cdf6a61bca8 (patch)
tree	a5c962b33a46249dfebf2a8506d9acedefac523c /src
parent	d8bddb9dc248bb3cc04116c97259ea6f5c13e6d0 (diff)