From b9833fd7e6c6f3373978d56f7a7b00ebdb02a94b Mon Sep 17 00:00:00 2001 From: Roland Reichwein Date: Thu, 30 Dec 2021 17:10:49 +0100 Subject: Extended tests --- src/test-unicode.cpp | 58 +++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 53 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/test-unicode.cpp b/src/test-unicode.cpp index 70ec453..7216cff 100644 --- a/src/test-unicode.cpp +++ b/src/test-unicode.cpp @@ -38,11 +38,37 @@ typedef std::tuple, std::basic_string, std:: // Success cases: convert string to all other types, respectively std::vector success_sets { SUCCESS_TUPLE(""), + + // various string SUCCESS_TUPLE("ASCII string1"), SUCCESS_TUPLE("Täst just looks like German"), SUCCESS_TUPLE("\u732b is chinese for cat"), SUCCESS_TUPLE("\U0001F63A"), SUCCESS_TUPLE("\U0001F63A is a smiling cat"), + + // separators + SUCCESS_TUPLE("abc\r\ndef"), + SUCCESS_TUPLE("äöü\0\u20ac"), + + // optimization relevant strings + SUCCESS_TUPLE("01234567\u20ac01234567"), + SUCCESS_TUPLE("0123456\u20ac01234567"), + SUCCESS_TUPLE("012345\u20ac01234567"), + SUCCESS_TUPLE("01234\u20ac01234567"), + SUCCESS_TUPLE("0123\u20ac01234567"), + SUCCESS_TUPLE("012\u20ac01234567"), + SUCCESS_TUPLE("01\u20ac01234567"), + SUCCESS_TUPLE("0\u20ac01234567"), + SUCCESS_TUPLE("\u20ac01234567"), + SUCCESS_TUPLE("0123456701234567\u20ac0123456701234567"), + SUCCESS_TUPLE("012345670123456\u20ac0123456701234567"), + SUCCESS_TUPLE("01234567012345\u20ac0123456701234567"), + SUCCESS_TUPLE("0123456701234\u20ac0123456701234567"), + SUCCESS_TUPLE("012345670123\u20ac0123456701234567"), + SUCCESS_TUPLE("01234567012\u20ac0123456701234567"), + SUCCESS_TUPLE("0123456701\u20ac0123456701234567"), + SUCCESS_TUPLE("012345670\u20ac0123456701234567"), + SUCCESS_TUPLE("01234567\u20ac0123456701234567"), }; // Error cases: throwing upon convert to all other types @@ -624,29 +650,51 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(random_sequences_valid_all_unicode, T, types_colle test_random_valid(rc, rc.sequence_length(rc.gen), "All Unicode strings"); } -// Test ISO and UTF encodings -BOOST_AUTO_TEST_CASE(convert) +// Test ISO encodings +BOOST_AUTO_TEST_CASE(convert_iso) { BOOST_CHECK((std::string{unicode::convert({})}) == std::string{}); BOOST_CHECK((std::string{unicode::convert("abc")}) == std::string{"abc"}); - BOOST_CHECK((std::string{unicode::convert("äöü")}) == std::string{"äöü"}); + BOOST_CHECK((std::string{unicode::convert("\xe4\xf6\xfc")}) == std::string{"\xe4\xf6\xfc"}); // Latin-1 äöü BOOST_CHECK((std::string{unicode::convert("\xa4")}) == std::string{"\xa4"}); // € - BOOST_CHECK((std::string{unicode::convert("\xa4")}) == std::string{"\xa4"}); // € + BOOST_CHECK((std::string{unicode::convert({})}) == std::string{}); + BOOST_CHECK((std::string{unicode::convert("abc")}) == std::string{"abc"}); + BOOST_CHECK((std::string{unicode::convert("\xe4\xf6\xfc")}) == std::string{"\xe4\xf6\xfc"}); // Latin-1 äöü + BOOST_CHECK((std::string{unicode::convert("\xa4")}) == std::string{"\xa4"}); // € + BOOST_CHECK_THROW(((void)std::string{unicode::convert("\xa4")}), std::invalid_argument); BOOST_CHECK_THROW(((void)std::string{unicode::convert("\xa4")}), std::invalid_argument); // € not available in ISO-8859-1 - +} + +// Test conversion between ISO and UTF encodings +BOOST_AUTO_TEST_CASE(convert_iso_utf) +{ BOOST_CHECK_THROW(((void)std::string{unicode::convert(u8"\u20ac")}), std::invalid_argument); BOOST_CHECK_THROW(((void)std::string{unicode::convert(u"\u20ac")}), std::invalid_argument); BOOST_CHECK_THROW(((void)std::string{unicode::convert(U"\u20ac")}), std::invalid_argument); + + BOOST_CHECK((std::string{unicode::convert(u8"\u20ac")}) == std::string{"\xa4"}); // € + BOOST_CHECK((std::string{unicode::convert(u"\u20ac")}) == std::string{"\xa4"}); // € + BOOST_CHECK((std::string{unicode::convert(U"\u20ac")}) == std::string{"\xa4"}); // € + + BOOST_CHECK_THROW(((void)std::string{unicode::convert(u8"\u00A4")}), std::invalid_argument); // currency sign: Latin-1, but not Latin-15 + BOOST_CHECK((std::string{unicode::convert(u8"\u00A4")}) == std::string{"\xa4"}); + BOOST_CHECK_THROW(((void)std::string{unicode::convert(u8"\u732b")}), std::invalid_argument); BOOST_CHECK_THROW(((void)std::string{unicode::convert(u"\u732b")}), std::invalid_argument); BOOST_CHECK_THROW(((void)std::string{unicode::convert(U"\u732b")}), std::invalid_argument); +} +// Test UTF encodings +BOOST_AUTO_TEST_CASE(convert_utf) +{ BOOST_CHECK_THROW((unicode::convert(std::u32string{(char32_t*)"\x00\xD8\x00\x00\x00\x00\x00\x00"})) , std::invalid_argument); BOOST_CHECK((unicode::convert(u8"abc")) == std::u16string{u"abc"}); BOOST_CHECK((unicode::convert(U"abc")) == std::u16string{u"abc"}); + + BOOST_CHECK((unicode::convert(u8"a\0bc")) == std::u16string{u"a\0bc"}); BOOST_CHECK((unicode::convert(u8"abc")) == std::u16string{u"abc"}); BOOST_CHECK((unicode::convert(U"abc")) == std::u16string{u"abc"}); -- cgit v1.2.3