From 5572e23e8e2109abd73b916f4f0d278e1aa21f34 Mon Sep 17 00:00:00 2001 From: Roland Reichwein Date: Wed, 3 Feb 2021 13:18:25 +0100 Subject: Add msbuild files --- include/unicode.h | 49 +++++++++++++++--- msbuild/compiler.props | 11 ++++ msbuild/libunicode.vcxproj | 93 +++++++++++++++++++++++++++++++++ msbuild/libunicode.vcxproj.filters | 26 ++++++++++ msbuild/recode.vcxproj | 99 ++++++++++++++++++++++++++++++++++++ msbuild/recode.vcxproj.filters | 14 +++++ msbuild/test-unicode.vcxproj | 93 +++++++++++++++++++++++++++++++++ msbuild/test-unicode.vcxproj.filters | 14 +++++ msbuild/unicode.sln | 46 +++++++++++++++++ msbuild/validate.vcxproj | 99 ++++++++++++++++++++++++++++++++++++ msbuild/validate.vcxproj.filters | 14 +++++ src/recode.cpp | 2 - src/test-unicode.cpp | 47 +++++++++-------- 13 files changed, 576 insertions(+), 31 deletions(-) create mode 100644 msbuild/compiler.props create mode 100644 msbuild/libunicode.vcxproj create mode 100644 msbuild/libunicode.vcxproj.filters create mode 100644 msbuild/recode.vcxproj create mode 100644 msbuild/recode.vcxproj.filters create mode 100644 msbuild/test-unicode.vcxproj create mode 100644 msbuild/test-unicode.vcxproj.filters create mode 100644 msbuild/unicode.sln create mode 100644 msbuild/validate.vcxproj create mode 100644 msbuild/validate.vcxproj.filters diff --git a/include/unicode.h b/include/unicode.h index 4b676bf..296ba1d 100644 --- a/include/unicode.h +++ b/include/unicode.h @@ -3,6 +3,7 @@ #pragma once #include +#include #include #include #include @@ -33,8 +34,12 @@ namespace unicode::detail { template struct utf_iterator { - typedef T value_type; + typedef T input_type; + typedef char32_t value_type; typedef char32_t& reference; + typedef char32_t* pointer; + typedef size_t difference_type; + typedef std::input_iterator_tag iterator_category; typedef std::basic_string string_type; utf_iterator(const typename string_type::const_iterator& cbegin, const typename string_type::const_iterator& cend): @@ -217,9 +222,20 @@ namespace unicode::detail { typedef T value_type; typedef std::basic_string string_type; typedef utf_back_insert_iterator& reference; + typedef utf_back_insert_iterator* pointer; + typedef size_t difference_type; + typedef std::output_iterator_tag iterator_category; utf_back_insert_iterator(string_type& s): s(s) {} + utf_back_insert_iterator& operator=(const utf_back_insert_iterator& other) + { + if (std::addressof(other.s) != std::addressof(s)) + throw std::runtime_error("utf_back_insert_iterator assignment operator actually called! Iterator should not be assigned to."); + + return *this; + } + // no-op reference operator++() { @@ -273,7 +289,7 @@ namespace unicode::detail { reference operator=(const char32_t& value) { if (value < 0x80) { // 1 byte - s.push_back(value); + s.push_back(static_cast(value)); } else if (value < 0x800) { // 2 bytes s.push_back(byte_n_of_m<0,2>(value)); s.push_back(byte_n_of_m<1,2>(value)); @@ -297,7 +313,7 @@ namespace unicode::detail { reference operator=(const char32_t& value) { if (value <= 0xFFFF) { // expect value to be already valid Unicode values - s.push_back(value); + s.push_back(static_cast(value)); } else { char32_t value_reduced{value - 0x10000}; s.push_back((value_reduced >> 10) + 0xD800); @@ -349,8 +365,12 @@ namespace unicode { template struct iso_iterator { + typedef utf8_t input_type; typedef char32_t value_type; typedef char32_t& reference; + typedef char32_t* pointer; + typedef size_t difference_type; + typedef std::input_iterator_tag iterator_category; typedef std::basic_string::const_iterator iterator; iso_iterator(const iterator& it): m_it(it) {} @@ -388,10 +408,22 @@ namespace unicode { template struct iso_back_insert_iterator { typedef iso_back_insert_iterator& reference; + typedef iso_back_insert_iterator* pointer; + typedef size_t difference_type; + typedef utf8_t value_type; + typedef std::output_iterator_tag iterator_category; typedef std::basic_string string_type; iso_back_insert_iterator(string_type& s): s(s) {} + iso_back_insert_iterator& operator=(const iso_back_insert_iterator& other) + { + if (std::addressof(other.s) != std::addressof(s)) + throw std::runtime_error("iso_back_insert_iterator assignment operator actually called! Iterator should not be assigned to."); + + return *this; + } + // no-op reference operator++() { @@ -452,14 +484,15 @@ namespace unicode { template struct UTF { - typedef typename InputIt::value_type value_type; // OutputIt::value_type is the same + typedef typename InputIt::input_type input_type; + typedef typename OutputIt::value_type value_type; - static InputIt begin(const std::basic_string& s) + static InputIt begin(const std::basic_string& s) { return InputIt{s.cbegin(), s.cend()}; } - static InputIt end(const std::basic_string& s) + static InputIt end(const std::basic_string& s) { return InputIt{s.cend(), s.cend()}; } @@ -528,8 +561,8 @@ namespace unicode { bool is_valid_utf(const std::basic_string& s) { try { - std::for_each(Encoding::Facet::begin(s), Encoding::Facet::end(s), [](const T& c){}); - } catch(...) { + std::for_each(Encoding::Facet::begin(s), Encoding::Facet::end(s), [](const char32_t& c){}); + } catch (const std::invalid_argument&) { return false; } return true; diff --git a/msbuild/compiler.props b/msbuild/compiler.props new file mode 100644 index 0000000..c455de8 --- /dev/null +++ b/msbuild/compiler.props @@ -0,0 +1,11 @@ + + + + + D:\ext\winnt\boost\boost-1_72_0;..\include;%(AdditionalIncludeDirectories) + /utf-8 /D NOMINMAX /FS %(AdditionalOptions) + stdcpp17 + + + + \ No newline at end of file diff --git a/msbuild/libunicode.vcxproj b/msbuild/libunicode.vcxproj new file mode 100644 index 0000000..22b75c1 --- /dev/null +++ b/msbuild/libunicode.vcxproj @@ -0,0 +1,93 @@ + + + + + Debug + x64 + + + Release + x64 + + + + 16.0 + Win32Proj + {c53e2afc-ea45-4798-be36-e959bb5c5a53} + libunicode + 10.0 + + + + StaticLibrary + true + v142 + Unicode + + + StaticLibrary + false + v142 + true + Unicode + + + + + + + + + + + + + + + true + + + false + + + + Level3 + true + _DEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + stdcpp17 + + + Console + true + + + + + Level3 + true + true + true + NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + stdcpp17 + + + Console + true + true + true + + + + + + + + + + + + + + \ No newline at end of file diff --git a/msbuild/libunicode.vcxproj.filters b/msbuild/libunicode.vcxproj.filters new file mode 100644 index 0000000..7b2e69a --- /dev/null +++ b/msbuild/libunicode.vcxproj.filters @@ -0,0 +1,26 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;c++;cppm;ixx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd + + + + + API + + + Implementation + + + + + Implementation + + + \ No newline at end of file diff --git a/msbuild/recode.vcxproj b/msbuild/recode.vcxproj new file mode 100644 index 0000000..4f3de3f --- /dev/null +++ b/msbuild/recode.vcxproj @@ -0,0 +1,99 @@ + + + + + Debug + x64 + + + Release + x64 + + + + 16.0 + Win32Proj + {7b0892e8-1a6f-442e-b38a-c4a07dfb1a1e} + recode + 10.0 + recode + + + + Application + true + v142 + Unicode + + + Application + false + v142 + true + Unicode + + + + + + + + + + + + + + + true + + + false + + + + Level3 + true + _DEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + stdcpp17 + + + Console + true + + + + + + + Level3 + true + true + true + NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + stdcpp17 + + + Console + true + true + true + + + + + + + + + + + {c53e2afc-ea45-4798-be36-e959bb5c5a53} + + + + + + \ No newline at end of file diff --git a/msbuild/recode.vcxproj.filters b/msbuild/recode.vcxproj.filters new file mode 100644 index 0000000..e8375a2 --- /dev/null +++ b/msbuild/recode.vcxproj.filters @@ -0,0 +1,14 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;c++;cppm;ixx;def;odl;idl;hpj;bat;asm;asmx + + + + + Implementation + + + \ No newline at end of file diff --git a/msbuild/test-unicode.vcxproj b/msbuild/test-unicode.vcxproj new file mode 100644 index 0000000..ff010bd --- /dev/null +++ b/msbuild/test-unicode.vcxproj @@ -0,0 +1,93 @@ + + + + + Debug + x64 + + + Release + x64 + + + + 16.0 + Win32Proj + {F1CDB899-0BD2-44F3-A364-F77D19457328} + test-unicode + 10.0 + test-unicode + + + + Application + true + v142 + Unicode + + + Application + false + v142 + true + Unicode + + + + + + + + + + + + + + + true + + + false + + + + + Level3 + true + _DEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + + + Console + true + + + + + Level3 + true + true + true + NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + + + Console + true + true + true + + + + + + + + {c53e2afc-ea45-4798-be36-e959bb5c5a53} + + + + + + \ No newline at end of file diff --git a/msbuild/test-unicode.vcxproj.filters b/msbuild/test-unicode.vcxproj.filters new file mode 100644 index 0000000..ac23eb8 --- /dev/null +++ b/msbuild/test-unicode.vcxproj.filters @@ -0,0 +1,14 @@ + + + + + {A03B3E05-0246-435D-8DE4-B3B8BB72CD12} + cpp;c;cc;cxx;c++;cppm;ixx;def;odl;idl;hpj;bat;asm;asmx + + + + + Implementation + + + \ No newline at end of file diff --git a/msbuild/unicode.sln b/msbuild/unicode.sln new file mode 100644 index 0000000..5b049ff --- /dev/null +++ b/msbuild/unicode.sln @@ -0,0 +1,46 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio Version 16 +VisualStudioVersion = 16.0.30717.126 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "libunicode", "libunicode.vcxproj", "{C53E2AFC-EA45-4798-BE36-E959BB5C5A53}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "recode", "recode.vcxproj", "{7B0892E8-1A6F-442E-B38A-C4A07DFB1A1E}" + ProjectSection(ProjectDependencies) = postProject + {C53E2AFC-EA45-4798-BE36-E959BB5C5A53} = {C53E2AFC-EA45-4798-BE36-E959BB5C5A53} + EndProjectSection +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "validate", "validate.vcxproj", "{D4A2EB75-D845-4B8F-AFFB-0F764013247E}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "test-unicode", "test-unicode.vcxproj", "{F1CDB899-0BD2-44F3-A364-F77D19457328}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|x64 = Debug|x64 + Release|x64 = Release|x64 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {C53E2AFC-EA45-4798-BE36-E959BB5C5A53}.Debug|x64.ActiveCfg = Debug|x64 + {C53E2AFC-EA45-4798-BE36-E959BB5C5A53}.Debug|x64.Build.0 = Debug|x64 + {C53E2AFC-EA45-4798-BE36-E959BB5C5A53}.Release|x64.ActiveCfg = Release|x64 + {C53E2AFC-EA45-4798-BE36-E959BB5C5A53}.Release|x64.Build.0 = Release|x64 + {7B0892E8-1A6F-442E-B38A-C4A07DFB1A1E}.Debug|x64.ActiveCfg = Debug|x64 + {7B0892E8-1A6F-442E-B38A-C4A07DFB1A1E}.Debug|x64.Build.0 = Debug|x64 + {7B0892E8-1A6F-442E-B38A-C4A07DFB1A1E}.Release|x64.ActiveCfg = Release|x64 + {7B0892E8-1A6F-442E-B38A-C4A07DFB1A1E}.Release|x64.Build.0 = Release|x64 + {D4A2EB75-D845-4B8F-AFFB-0F764013247E}.Debug|x64.ActiveCfg = Debug|x64 + {D4A2EB75-D845-4B8F-AFFB-0F764013247E}.Debug|x64.Build.0 = Debug|x64 + {D4A2EB75-D845-4B8F-AFFB-0F764013247E}.Release|x64.ActiveCfg = Release|x64 + {D4A2EB75-D845-4B8F-AFFB-0F764013247E}.Release|x64.Build.0 = Release|x64 + {F1CDB899-0BD2-44F3-A364-F77D19457328}.Debug|x64.ActiveCfg = Debug|x64 + {F1CDB899-0BD2-44F3-A364-F77D19457328}.Debug|x64.Build.0 = Debug|x64 + {F1CDB899-0BD2-44F3-A364-F77D19457328}.Release|x64.ActiveCfg = Release|x64 + {F1CDB899-0BD2-44F3-A364-F77D19457328}.Release|x64.Build.0 = Release|x64 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {C031B3A6-3D01-49B9-B07B-331A618EC2B1} + EndGlobalSection +EndGlobal diff --git a/msbuild/validate.vcxproj b/msbuild/validate.vcxproj new file mode 100644 index 0000000..febeb56 --- /dev/null +++ b/msbuild/validate.vcxproj @@ -0,0 +1,99 @@ + + + + + Debug + x64 + + + Release + x64 + + + + 16.0 + Win32Proj + {D4A2EB75-D845-4B8F-AFFB-0F764013247E} + validate + 10.0 + validate + + + + Application + true + v142 + Unicode + + + Application + false + v142 + true + Unicode + + + + + + + + + + + + + + + true + + + false + + + + Level3 + true + _DEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + stdcpp17 + + + Console + true + + + + + + + Level3 + true + true + true + NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + stdcpp17 + + + Console + true + true + true + + + + + + + + + + + {c53e2afc-ea45-4798-be36-e959bb5c5a53} + + + + + + \ No newline at end of file diff --git a/msbuild/validate.vcxproj.filters b/msbuild/validate.vcxproj.filters new file mode 100644 index 0000000..cdd8b32 --- /dev/null +++ b/msbuild/validate.vcxproj.filters @@ -0,0 +1,14 @@ + + + + + {3F566DF4-E5BC-4831-829B-C1C22783EC65} + cpp;c;cc;cxx;c++;cppm;ixx;def;odl;idl;hpj;bat;asm;asmx + + + + + Implementation + + + \ No newline at end of file diff --git a/src/recode.cpp b/src/recode.cpp index b8ada69..517b381 100644 --- a/src/recode.cpp +++ b/src/recode.cpp @@ -150,8 +150,6 @@ int main(int argc, char* argv[]) std::string id{get_id(it_from->second, it_to->second)}; - std::cout << "DEBUG: " << id << std::endl; - auto it { convert_map.find(id) }; if (it == convert_map.end()) { std::cerr << "Error: Conversion ID " << id << " not supported." << std::endl; diff --git a/src/test-unicode.cpp b/src/test-unicode.cpp index e1aa23d..c169fc9 100644 --- a/src/test-unicode.cpp +++ b/src/test-unicode.cpp @@ -35,11 +35,13 @@ std::vector success_sets { // Error cases: throwing upon convert to all other types std::vector> failure_strings_char8_t { - u8"\x80", // utf-8 continuation byte - u8"\x81", // utf-8 continuation byte - u8"\xc3\xc3\xa4", // initial byte of utf-8 "ä", followed by valid utf-8 "ä" - u8"\xF8\x80\x80\x80\x80", // overlong encoding - u8"\xF7\xBF\xBF\xBF", // valid encoding of invalid code point + // Note: don't encode this as u8"" since MSVC will interpret \x80 as \u0080, +// yet to be encoded to UTF-8 for execution encoding + "\x80", // utf-8 continuation byte + "\x81", // utf-8 continuation byte + "\xc3\xc3\xa4", // initial byte of utf-8 "ä", followed by valid utf-8 "ä" + "\xF8\x80\x80\x80\x80", // overlong encoding + "\xF7\xBF\xBF\xBF", // valid encoding of invalid code point }; std::vector> failure_strings_char16_t { @@ -156,16 +158,20 @@ void test_utf_to_utf_failure(std::basic_string& s) try { (void) unicode::convert(s); BOOST_ERROR("Base: Expected exception at index: " << index << ", " << typeid(From).name() << " -> " << typeid(To).name()); - } catch (...) { - // OK + } catch (const std::invalid_argument&) { + // OK: this is an expected exception for convert() on bad input + } catch (const std::exception& ex) { + BOOST_ERROR("Unexpected error on convert(): " << ex.what()); }; // via facet try { (void) unicode::convert::Facet,typename unicode::Encoding::Facet>(s); BOOST_ERROR("Facet: Expected exception at index: " << index << ", " << typeid(From).name() << " -> " << typeid(To).name()); - } catch (...) { - // OK + } catch (const std::invalid_argument&) { + // OK: this is an expected exception for convert() on bad input + } catch (const std::exception& ex) { + BOOST_ERROR("Unexpected error on convert(): " << ex.what()); }; // iterate over remaining types @@ -229,15 +235,16 @@ BOOST_AUTO_TEST_CASE(is_valid_unicode) struct random_context { std::random_device rd; // OS random number engine to seed RNG (below) std::mt19937 gen{rd()}; - std::uniform_int_distribution<> sequence_length{0, 100000}; // length of sequence: 0 ... 100000 code units + std::uniform_int_distribution sequence_length{0, 100000}; // length of sequence: 0 ... 100000 code units }; template T generate_random(random_context& rc, size_t length) { - std::uniform_int_distribution<> code_unit(0, std::numeric_limits::max()); // code unit value + // Using unsigned long for std::uniform_int_distribution<> because it needs to be basic type according to MSVC + std::uniform_int_distribution code_unit(std::numeric_limits::max()); // code unit value T result; - std::generate_n(std::back_inserter(result), length, [&](){return code_unit(rc.gen);}); + std::generate_n(std::back_inserter(result), length, [&](){return static_cast(code_unit(rc.gen));}); return result; } @@ -248,7 +255,7 @@ void test_random(random_context& rc, size_t length) //std::cerr << "LENGTH: " << length << std::endl; typedef typename std::tuple_element::type To; - From r {generate_random(rc, length)}; + From r {static_cast(generate_random(rc, length))}; // base type interface try { @@ -259,10 +266,10 @@ void test_random(random_context& rc, size_t length) } else { BOOST_CHECK(!result.empty()); } - } catch (const std::runtime_error&) { - // OK: this is an expected exception for utf_to_utf on bad input } catch (const std::invalid_argument&) { - // OK: this is an expected exception for utf_to_utf on bad input + // OK: this is an expected exception for convert() on bad input + } catch (const std::exception& ex) { + BOOST_ERROR("Unexpected error on convert(): " << ex.what()); } // facet interface @@ -274,13 +281,11 @@ void test_random(random_context& rc, size_t length) } else { BOOST_CHECK(!result.empty()); } - } catch (const std::runtime_error&) { - // OK: this is an expected exception for utf_to_utf on bad input } catch (const std::invalid_argument&) { - // OK: this is an expected exception for utf_to_utf on bad input + // OK: this is an expected exception for convert() on bad input + } catch (const std::exception& ex) { + BOOST_ERROR("Unexpected error on convert(): " << ex.what()); } - //std::cerr << "DEBUG: " << typeid(From).name() << std::endl; - //std::cerr << " DEBUG2: " << typeid(To).name() << std::endl; // iterate over remaining To types if constexpr (i + 1 < std::tuple_size::value) -- cgit v1.2.3