From bf8b679555129d30906b9135bd1d2cb10b8d57e9 Mon Sep 17 00:00:00 2001 From: Alexander Grund Date: Wed, 8 Jan 2020 12:50:54 +0100 Subject: [PATCH 1/2] Improve conversion tests - Move common test cases into test_sets - Run test_sets with all variants of conversions - Handle incomplete input in codecvt conversion test --- test/test_codecvt.cpp | 83 +++++++++++++++++-------------- test/test_convert.cpp | 84 +++++++++++++++++-------------- test/test_sets.hpp | 101 +++++++++++++++++++++++++------------- test/test_stackstring.cpp | 22 ++++++++- 4 files changed, 182 insertions(+), 108 deletions(-) diff --git a/test/test_codecvt.cpp b/test/test_codecvt.cpp index 29f48fa..f82baa0 100644 --- a/test/test_codecvt.cpp +++ b/test/test_codecvt.cpp @@ -20,18 +20,6 @@ static const char* utf8_name = "\xf0\x9d\x92\x9e-\xD0\xBF\xD1\x80\xD0\xB8\xD0\xB static const std::wstring wide_name_str = boost::nowide::widen(utf8_name); static const wchar_t* wide_name = wide_name_str.c_str(); -const char* res(std::codecvt_base::result r) -{ - switch(r) - { - case std::codecvt_base::ok: return "ok"; - case std::codecvt_base::partial: return "partial"; - case std::codecvt_base::error: return "error"; - case std::codecvt_base::noconv: return "noconv"; - default: return "error"; - } -} - typedef std::codecvt cvt_type; void test_codecvt_in_n_m(const cvt_type& cvt, int n, int m) @@ -60,8 +48,6 @@ void test_codecvt_in_n_m(const cvt_type& cvt, int n, int m) std::mbstate_t mb2 = mb; std::codecvt_base::result r = cvt.in(mb, from, end, from_next, to, to_end, to_next); - // std::cout << "In from_size=" << (end-from) << " from move=" << (from_next - from) << " to move= " << to_next - to << " state = " - // << res(r) << std::endl; int count = cvt.length(mb2, from, end, to_end - to); #ifndef BOOST_NOWIDE_DO_LENGTH_MBSTATE_CONST @@ -124,8 +110,6 @@ void test_codecvt_out_n_m(const cvt_type& cvt, int n, int m) } std::codecvt_base::result r = cvt.out(mb, from, from_end, from_next, to, to_end, to_next); - // std::cout << "In from_size=" << (end-from) << " from move=" << (from_next - from) << " to move= " << to_next - to << " state = " - // << res(r) << std::endl; if(r == cvt_type::partial) { TEST(to_end - to_next < cvt.max_length()); @@ -184,11 +168,11 @@ void test_codecvt_err() std::cout << "- UTF-8" << std::endl; { - wchar_t buf[4]; - wchar_t* const to = buf; - wchar_t* const to_end = buf + 4; - const char* err_utf = "1\xFF\xFF\xd7\xa9"; { + wchar_t buf[4]; + wchar_t* const to = buf; + wchar_t* const to_end = buf + 4; + const char* err_utf = "1\xFF\xFF\xd7\xa9"; std::mbstate_t mb = std::mbstate_t(); const char* from = err_utf; const char* from_end = from + std::strlen(from); @@ -199,6 +183,21 @@ void test_codecvt_err() TEST(to_next == to + 4); TEST(std::wstring(to, to_end) == boost::nowide::widen(err_utf)); } + { + wchar_t buf[4]; + wchar_t* const to = buf; + wchar_t* const to_end = buf + 4; + const char* err_utf = "1\xd7"; // 1 valid, 1 incomplete UTF-8 char + std::mbstate_t mb = std::mbstate_t(); + const char* from = err_utf; + const char* from_end = from + std::strlen(from); + const char* from_next = from; + wchar_t* to_next = to; + TEST(cvt.in(mb, from, from_end, from_next, to, to_end, to_next) == cvt_type::partial); + TEST(from_next == from + 1); + TEST(to_next == to + 1); + TEST(std::wstring(to, to_next) == std::wstring(L"1")); + } } std::cout << "- UTF-16/32" << std::endl; @@ -217,7 +216,7 @@ void test_codecvt_err() TEST(cvt.out(mb, from, from_end, from_next, to, to_end, to_next) == cvt_type::ok); TEST(from_next == from + 2); TEST(to_next == to + 4); - TEST(std::memcmp(to, "1\xEF\xBF\xBD", 4) == 0); + TEST(std::string(to, to_next) == "1" + boost::nowide::narrow(wreplacement_str)); } } } @@ -229,19 +228,24 @@ std::wstring codecvt_to_wide(const std::string& s) const cvt_type& cvt = std::use_facet(l); std::mbstate_t mb = std::mbstate_t(); - const char* from = s.c_str(); - const char* from_end = from + s.size(); + const char* const from = s.c_str(); + const char* const from_end = from + s.size(); const char* from_next = from; - std::vector buf(s.size() + 1); - wchar_t* to = &buf[0]; - wchar_t* to_end = to + buf.size(); + std::vector buf(s.size() + 2); // +1 for possible incomplete char, +1 for NULL + wchar_t* const to = &buf[0]; + wchar_t* const to_end = to + buf.size(); wchar_t* to_next = to; - TEST(cvt.in(mb, from, from_end, from_next, to, to_end, to_next) == cvt_type::ok); + cvt_type::result res = cvt.in(mb, from, from_end, from_next, to, to_end, to_next); + if(res == cvt_type::partial) + { + TEST(to_next < to_end); + *(to_next++) = BOOST_NOWIDE_REPLACEMENT_CHARACTER; + } else + TEST(res == cvt_type::ok); - std::wstring res(to, to_next); - return res; + return std::wstring(to, to_next); } std::string codecvt_to_narrow(const std::wstring& s) @@ -251,19 +255,24 @@ std::string codecvt_to_narrow(const std::wstring& s) const cvt_type& cvt = std::use_facet(l); std::mbstate_t mb = std::mbstate_t(); - const wchar_t* from = s.c_str(); - const wchar_t* from_end = from + s.size(); + const wchar_t* const from = s.c_str(); + const wchar_t* const from_end = from + s.size(); const wchar_t* from_next = from; - std::vector buf(s.size() * 4 + 1); - char* to = &buf[0]; - char* to_end = to + buf.size(); + std::vector buf((s.size() + 1) * 4 + 1); // +1 for possible incomplete char, +1 for NULL + char* const to = &buf[0]; + char* const to_end = to + buf.size(); char* to_next = to; - TEST(cvt.out(mb, from, from_end, from_next, to, to_end, to_next) == cvt_type::ok); + cvt_type::result res = cvt.out(mb, from, from_end, from_next, to, to_end, to_next); + if(res == cvt_type::partial) + { + TEST(to_next < to_end); + return std::string(to, to_next) + boost::nowide::narrow(wreplacement_str); + } else + TEST(res == cvt_type::ok); - std::string res(to, to_next); - return res; + return std::string(to, to_next); } void test_codecvt_subst() diff --git a/test/test_convert.cpp b/test/test_convert.cpp index dcb3d10..ccf3f2f 100644 --- a/test/test_convert.cpp +++ b/test/test_convert.cpp @@ -15,6 +15,44 @@ #pragma warning(disable : 4428) // universal-character-name encountered in source #endif +std::wstring widen_buf_ptr(const std::string& s) +{ + wchar_t buf[50]; + TEST(boost::nowide::widen(buf, 50, s.c_str()) == buf); + return buf; +} + +std::string narrow_buf_ptr(const std::wstring& s) +{ + char buf[50]; + TEST(boost::nowide::narrow(buf, 50, s.c_str()) == buf); + return buf; +} + +std::wstring widen_buf_range(const std::string& s) +{ + wchar_t buf[50]; + TEST(boost::nowide::widen(buf, 50, s.c_str(), s.c_str() + s.size()) == buf); + return buf; +} + +std::string narrow_buf_range(const std::wstring& s) +{ + char buf[50]; + TEST(boost::nowide::narrow(buf, 50, s.c_str(), s.c_str() + s.size()) == buf); + return buf; +} + +std::wstring widen_raw_string(const std::string& s) +{ + return boost::nowide::widen(s.c_str()); +} + +std::string narrow_raw_string(const std::wstring& s) +{ + return boost::nowide::narrow(s.c_str()); +} + int main() { try @@ -23,14 +61,11 @@ int main() std::wstring whello = L"\u05e9\u05dc\u05d5\u05dd"; std::wstring whello_3e = L"\u05e9\u05dc\u05d5\ufffd"; std::wstring whello_3 = L"\u05e9\u05dc\u05d5"; - // Example filenames used in tests - std::string example = "\xd7\xa9-\xd0\xbc-\xce\xbd.txt"; - std::wstring wexample = L"\u05e9-\u043c-\u03bd.txt"; std::cout << "- boost::nowide::widen" << std::endl; { const char* b = hello.c_str(); - const char* e = b + 8; + const char* e = b + hello.size(); wchar_t buf[6] = {0, 0, 0, 0, 0, 1}; TEST(boost::nowide::widen(buf, 5, b, e) == buf); TEST(buf == whello); @@ -42,26 +77,11 @@ int main() TEST(buf == whello_3); TEST(boost::nowide::widen(buf, 5, b, b) == buf && buf[0] == 0); TEST(boost::nowide::widen(buf, 5, b, b + 2) == buf && buf[1] == 0 && buf[0] == whello[0]); - b = "\xFF\xFF"; - e = b + 2; - TEST(boost::nowide::widen(buf, 5, b, e) == buf); - TEST(buf == std::wstring(L"\ufffd\ufffd")); - b = "\xd7\xa9\xFF"; - e = b + 3; - TEST(boost::nowide::widen(buf, 5, b, e) == buf); - TEST(buf == std::wstring(L"\u05e9\ufffd")); - TEST(boost::nowide::widen(buf, 5, b, b + 1) == buf); - TEST(buf == std::wstring(L"\ufffd")); - b = "\xFF\xd7\xa9"; - e = b + 3; - TEST(boost::nowide::widen(buf, 5, b, e) == buf); - TEST(buf == std::wstring(L"\ufffd\u05e9")); - TEST(boost::nowide::widen(example) == wexample); } std::cout << "- boost::nowide::narrow" << std::endl; { const wchar_t* b = whello.c_str(); - const wchar_t* e = b + 4; + const wchar_t* e = b + whello.size(); char buf[10] = {0}; buf[9] = 1; TEST(boost::nowide::narrow(buf, 9, b, e) == buf); @@ -70,23 +90,15 @@ int main() TEST(boost::nowide::narrow(buf, 8, b, e) == 0); TEST(boost::nowide::narrow(buf, 7, b, e - 1) == buf); TEST(buf == hello.substr(0, 6)); - wchar_t tmp[3] = {0xDC01, 0x05e9, 0}; - b = tmp; - TEST(boost::nowide::narrow(buf, 10, b, b + 2) == buf); - TEST(buf == std::string("\xEF\xBF\xBD\xd7\xa9")); - wchar_t tmp2[3] = {0x05e9, 0xD800, 0}; - b = tmp2; - TEST(boost::nowide::narrow(buf, 10, b, b + 2) == buf); - TEST(buf == std::string("\xd7\xa9\xEF\xBF\xBD")); - TEST(boost::nowide::narrow(wexample) == example); } - { - char buf[3]; - wchar_t wbuf[3]; - TEST(boost::nowide::narrow(buf, 3, L"xy") == std::string("xy")); - TEST(boost::nowide::widen(wbuf, 3, "xy") == std::wstring(L"xy")); - } - std::cout << "- Substitutions" << std::endl; + + std::cout << "- (output_buffer, buffer_size, input_raw_string)" << std::endl; + run_all(widen_buf_ptr, narrow_buf_ptr); + std::cout << "- (output_buffer, buffer_size, input_raw_string, string_len)" << std::endl; + run_all(widen_buf_range, narrow_buf_range); + std::cout << "- (input_raw_string)" << std::endl; + run_all(widen_raw_string, narrow_raw_string); + std::cout << "- (const std::string&)" << std::endl; run_all(boost::nowide::widen, boost::nowide::narrow); } catch(const std::exception& e) { diff --git a/test/test_sets.hpp b/test/test_sets.hpp index 559e699..10243e9 100644 --- a/test/test_sets.hpp +++ b/test/test_sets.hpp @@ -8,7 +8,7 @@ #ifndef BOOST_NOWIDE_TEST_SETS_HPP_INCLUDED #define BOOST_NOWIDE_TEST_SETS_HPP_INCLUDED -#include +#include #include #include @@ -28,67 +28,102 @@ struct wide_to_utf8 #pragma warning(disable : 4428) // universal-character-name encountered in source #endif -utf8_to_wide n2w_tests[] = {{"\xf0\x9d\x92\x9e-\xD0\xBF\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82-\xE3\x82\x84\xE3\x81\x82.txt", - L"\U0001D49E-\u043F\u0440\u0438\u0432\u0435\u0442-\u3084\u3042.txt"}, - {"\xFF\xD0\xBF\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82", L"\uFFFD\u043F\u0440\u0438\u0432\u0435\u0442"}, - {"\xD0\xBF\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82\xFF", L"\u043F\u0440\u0438\u0432\u0435\u0442\uFFFD"}, - {"\xE3\x82\xFF\xE3\x81\x82", L"\ufffd\u3042"}, - {"\xE3\xFF\x84\xE3\x81\x82", L"\ufffd\ufffd\u3042"}}; +const std::wstring wreplacement_str(1, wchar_t(BOOST_NOWIDE_REPLACEMENT_CHARACTER)); -wide_to_utf8 w2n_tests_utf16[] = { - { - L"\U0001D49E-\u043F\u0440\u0438\u0432\u0435\u0442-\u3084\u3042.txt", - "\xf0\x9d\x92\x9e-\xD0\xBF\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82-\xE3\x82\x84\xE3\x81\x82.txt", - }, +// clang-format off +const utf8_to_wide roundtrip_tests[] = { + {"", L""}, + {"\xf0\x9d\x92\x9e-\xD0\xBF\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82-\xE3\x82\x84\xE3\x81\x82.txt", + L"\U0001D49E-\u043F\u0440\u0438\u0432\u0435\u0442-\u3084\u3042.txt"}, + {"\xd7\xa9-\xd0\xbc-\xce\xbd.txt", + L"\u05e9-\u043c-\u03bd.txt"}, + {"\xd7\xa9\xd7\x9c\xd7\x95\xd7\x9d", + L"\u05e9\u05dc\u05d5\u05dd"}, +}; + +const utf8_to_wide invalid_utf8_tests[] = { + {"\xFF\xFF", L"\ufffd\ufffd"}, + {"\xd7\xa9\xFF", L"\u05e9\ufffd"}, + {"\xd7", L"\ufffd"}, + {"\xFF\xd7\xa9", L"\ufffd\u05e9"}, + {"\xFF\xD0\xBF\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82", L"\uFFFD\u043F\u0440\u0438\u0432\u0435\u0442"}, + {"\xD0\xBF\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82\xFF", L"\u043F\u0440\u0438\u0432\u0435\u0442\uFFFD"}, + {"\xE3\x82\xFF\xE3\x81\x82", L"\ufffd\u3042"}, + {"\xE3\xFF\x84\xE3\x81\x82", L"\ufffd\ufffd\u3042"}, +}; + +const wide_to_utf8 invalid_wide_tests[] = { + {L"\xDC01\x05e9", "\xEF\xBF\xBD\xd7\xa9"}, + {L"\x05e9\xD800", "\xd7\xa9\xEF\xBF\xBD"}, + {L"\xDC00\x20\u043F\u0440\u0438\u0432\u0435\u0442-\u3084\u3042", + "\xEF\xBF\xBD \xD0\xBF\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82-\xE3\x82\x84\xE3\x81\x82"}, + {L"\u3084\u3042\xDC00\x20\u043F\u0440\u0438\u0432\u0435\u0442-\u3084\u3042", + "\xE3\x82\x84\xE3\x81\x82\xEF\xBF\xBD \xD0\xBF\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82-\xE3\x82\x84\xE3\x81\x82"}, +}; + + +const wide_to_utf8 invalid_utf16_tests[] = { {L"\xD800\x20\u043F\u0440\u0438\u0432\u0435\u0442-\u3084\u3042", "\xEF\xBF\xBD\xD0\xBF\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82-\xE3\x82\x84\xE3\x81\x82"}, - {L"\xDC00\x20\u043F\u0440\u0438\u0432\u0435\u0442-\u3084\u3042", - "\xEF\xBF\xBD \xD0\xBF\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82-\xE3\x82\x84\xE3\x81\x82"}, {L"\u3084\u3042\xD800\x20\u043F\u0440\u0438\u0432\u0435\u0442-\u3084\u3042", "\xE3\x82\x84\xE3\x81\x82\xEF\xBF\xBD\xD0\xBF\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82-\xE3\x82\x84\xE3\x81\x82"}, - {L"\u3084\u3042\xDC00\x20\u043F\u0440\u0438\u0432\u0435\u0442-\u3084\u3042", - "\xE3\x82\x84\xE3\x81\x82\xEF\xBF\xBD \xD0\xBF\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82-\xE3\x82\x84\xE3\x81\x82"}}; +}; -wide_to_utf8 w2n_tests_utf32[] = { - { - L"\U0001D49E-\u043F\u0440\u0438\u0432\u0435\u0442-\u3084\u3042.txt", - "\xf0\x9d\x92\x9e-\xD0\xBF\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82-\xE3\x82\x84\xE3\x81\x82.txt", - }, +const wide_to_utf8 invalid_utf32_tests[] = { {L"\xD800\x20\u043F\u0440\u0438\u0432\u0435\u0442-\u3084\u3042", "\xEF\xBF\xBD \xD0\xBF\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82-\xE3\x82\x84\xE3\x81\x82"}, - {L"\xDC00\x20\u043F\u0440\u0438\u0432\u0435\u0442-\u3084\u3042", - "\xEF\xBF\xBD \xD0\xBF\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82-\xE3\x82\x84\xE3\x81\x82"}, {L"\u3084\u3042\xD800\x20\u043F\u0440\u0438\u0432\u0435\u0442-\u3084\u3042", "\xE3\x82\x84\xE3\x81\x82\xEF\xBF\xBD \xD0\xBF\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82-\xE3\x82\x84\xE3\x81\x82"}, - {L"\u3084\u3042\xDC00\x20\u043F\u0440\u0438\u0432\u0435\u0442-\u3084\u3042", - "\xE3\x82\x84\xE3\x81\x82\xEF\xBF\xBD \xD0\xBF\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82-\xE3\x82\x84\xE3\x81\x82"}}; +}; + +// clang-format on #ifdef BOOST_MSVC #pragma warning(push) #pragma warning(disable : 4127) // Constant expression detected #endif +template +size_t array_size(const T (&)[N]) +{ + return N; +} + void run_all(std::wstring (*to_wide)(const std::string&), std::string (*to_narrow)(const std::wstring&)) { - for(size_t i = 0; i < sizeof(n2w_tests) / sizeof(n2w_tests[0]); i++) + for(size_t i = 0; i < array_size(roundtrip_tests); i++) { - std::cout << " N2W " << i << std::endl; - TEST(to_wide(n2w_tests[i].utf8) == n2w_tests[i].wide); + std::cout << " Roundtrip " << i << std::endl; + TEST(roundtrip_tests[i].utf8 == to_narrow(roundtrip_tests[i].wide)); + TEST(to_wide(roundtrip_tests[i].utf8) == roundtrip_tests[i].wide); } + + for(size_t i = 0; i < array_size(invalid_utf8_tests); i++) + { + std::cout << " Invalid UTF8 " << i << std::endl; + TEST(to_wide(invalid_utf8_tests[i].utf8) == invalid_utf8_tests[i].wide); + } + + for(size_t i = 0; i < array_size(invalid_wide_tests); i++) + { + std::cout << " Invalid Wide " << i << std::endl; + TEST(to_narrow(invalid_wide_tests[i].wide) == invalid_wide_tests[i].utf8); + } + size_t total = 0; const wide_to_utf8* ptr = 0; if(sizeof(wchar_t) == 2) { - ptr = w2n_tests_utf16; - total = sizeof(w2n_tests_utf16) / sizeof(w2n_tests_utf16[0]); + ptr = invalid_utf16_tests; + total = array_size(invalid_utf16_tests); } else { - ptr = w2n_tests_utf32; - total = sizeof(w2n_tests_utf32) / sizeof(w2n_tests_utf32[0]); + ptr = invalid_utf32_tests; + total = array_size(invalid_utf32_tests); } for(size_t i = 0; i < total; i++) { - std::cout << " W2N " << i << std::endl; + std::cout << " Invalid UTF16/32 " << i << std::endl; TEST(to_narrow(ptr[i].wide) == ptr[i].utf8); } } diff --git a/test/test_stackstring.cpp b/test/test_stackstring.cpp index 6d1c4ee..4046f11 100644 --- a/test/test_stackstring.cpp +++ b/test/test_stackstring.cpp @@ -27,12 +27,24 @@ std::string stackstring_to_narrow(const std::wstring& s) return ss.get(); } +std::wstring heap_stackstring_to_wide(const std::string& s) +{ + const boost::nowide::basic_stackstring ss(s.c_str()); + return ss.get(); +} + +std::string heap_stackstring_to_narrow(const std::wstring& s) +{ + const boost::nowide::basic_stackstring ss(s.c_str()); + return ss.get(); +} + int main() { try { std::string hello = "\xd7\xa9\xd7\x9c\xd7\x95\xd7\x9d"; - std::wstring whello = L"\u05e9\u05dc\u05d5\u05dd"; + std::wstring whello = boost::nowide::widen(hello); const wchar_t* wempty = L""; { @@ -76,6 +88,7 @@ int main() TEST(s2.get() == std::string()); } { + // Will be put on heap TEST(whello.size() >= 3); boost::nowide::basic_stackstring sw; TEST(sw.convert(hello.c_str())); @@ -84,6 +97,7 @@ int main() TEST(sw.get() == whello); } { + // Will be put on stack TEST(whello.size() < 5); boost::nowide::basic_stackstring sw; TEST(sw.convert(hello.c_str())); @@ -92,6 +106,7 @@ int main() TEST(sw.get() == whello); } { + // Will be put on heap TEST(hello.size() >= 5); boost::nowide::basic_stackstring sw; TEST(sw.convert(whello.c_str())); @@ -100,6 +115,7 @@ int main() TEST(sw.get() == hello); } { + // Will be put on stack TEST(hello.size() < 10); boost::nowide::basic_stackstring sw; TEST(sw.convert(whello.c_str())); @@ -168,8 +184,10 @@ int main() TEST(stack.get() == stackVal); TEST(heap.get() == heapVal); } - std::cout << "- Substitutions" << std::endl; + std::cout << "- Stackstring" << std::endl; run_all(stackstring_to_wide, stackstring_to_narrow); + std::cout << "- Heap Stackstring" << std::endl; + run_all(heap_stackstring_to_wide, heap_stackstring_to_narrow); } catch(const std::exception& e) { std::cerr << "Failed :" << e.what() << std::endl; From 14675cd82290fae97da7bfb1cd3aa001679c6850 Mon Sep 17 00:00:00 2001 From: Alexander Grund Date: Wed, 8 Jan 2020 16:45:20 +0100 Subject: [PATCH 2/2] Fix utf8_codecvt conversion of UTF-16 string with trailing surrogate The correct result is `partial`, not `ok` --- include/boost/nowide/utf8_codecvt.hpp | 2 +- test/test_codecvt.cpp | 42 ++++++++++++++++++++++++--- 2 files changed, 39 insertions(+), 5 deletions(-) diff --git a/include/boost/nowide/utf8_codecvt.hpp b/include/boost/nowide/utf8_codecvt.hpp index ff90684..9419295 100644 --- a/include/boost/nowide/utf8_codecvt.hpp +++ b/include/boost/nowide/utf8_codecvt.hpp @@ -282,7 +282,7 @@ namespace nowide { } from_next = from; to_next = to; - if(r == std::codecvt_base::ok && from != from_end) + if(r == std::codecvt_base::ok && (from != from_end || state != 0)) r = std::codecvt_base::partial; detail::write_state(std_state, state); return r; diff --git a/test/test_codecvt.cpp b/test/test_codecvt.cpp index f82baa0..593ed4c 100644 --- a/test/test_codecvt.cpp +++ b/test/test_codecvt.cpp @@ -112,10 +112,15 @@ void test_codecvt_out_n_m(const cvt_type& cvt, int n, int m) std::codecvt_base::result r = cvt.out(mb, from, from_end, from_next, to, to_end, to_next); if(r == cvt_type::partial) { - TEST(to_end - to_next < cvt.max_length()); - to_end += n; - if(to_end > real_to_end) - to_end = real_to_end; + // If those are equal, then "partial" probably means: Need more input + // Otherwise "Need more output" + if(from_next != from_end) + { + TEST(to_end - to_next < cvt.max_length()); + to_end += n; + if(to_end > real_to_end) + to_end = real_to_end; + } } else { TEST(r == cvt_type::ok); @@ -198,6 +203,35 @@ void test_codecvt_err() TEST(to_next == to + 1); TEST(std::wstring(to, to_next) == std::wstring(L"1")); } + { + char buf[4] = {}; + char* const to = buf; + char* const to_end = buf + 4; + char* to_next = to; + const wchar_t* err_utf = L"\xD800"; // Trailing UTF-16 surrogate + std::mbstate_t mb = std::mbstate_t(); + const wchar_t* from = err_utf; + const wchar_t* from_end = from + 1; + const wchar_t* from_next = from; + cvt_type::result res = cvt.out(mb, from, from_end, from_next, to, to_end, to_next); +#ifdef BOOST_MSVC +#pragma warning(disable : 4127) // Constant expression detected +#endif + if(sizeof(wchar_t) == 2) + { + TEST(res == cvt_type::partial); + TEST(from_next == from_end); + TEST(to_next == to); + TEST(buf[0] == 0); + } else + { + TEST(res == cvt_type::ok); + TEST(from_next == from_end); + TEST(to_next == to + 3); + // surrogate is invalid + TEST(std::string(to, to_next) == boost::nowide::narrow(wreplacement_str)); + } + } } std::cout << "- UTF-16/32" << std::endl;