// // Copyright (c) 2015 Artyom Beilis (Tonkikh) // // Distributed under the Boost Software License, Version 1.0. (See // accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) // #include "test.hpp" #include "test_sets.hpp" #include #include #include #include #include #include #include static char const *utf8_name = "\xf0\x9d\x92\x9e-\xD0\xBF\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82-\xE3\x82\x84\xE3\x81\x82.txt"; static const std::wstring wide_name_str = boost::nowide::widen(utf8_name); static wchar_t const *wide_name = wide_name_str.c_str(); char const *res(std::codecvt_base::result r) { switch(r) { case std::codecvt_base::ok: return "ok"; case std::codecvt_base::partial: return "partial"; case std::codecvt_base::error: return "error"; case std::codecvt_base::noconv: return "noconv"; default: return "error"; } } typedef std::codecvt cvt_type; void test_codecvt_in_n_m(cvt_type const &cvt, int n, int m) { wchar_t const *wptr = wide_name; size_t wlen = std::wcslen(wide_name); size_t u8len = std::strlen(utf8_name); char const *from = utf8_name; char const *end = from; char const *real_end = utf8_name + u8len; char const *from_next = from; std::mbstate_t mb = std::mbstate_t(); while(from_next < real_end) { if(from == end) { end = from + n; if(end > real_end) end = real_end; } wchar_t buf[128]; wchar_t *to = buf; wchar_t *to_end = to + m; wchar_t *to_next = to; std::mbstate_t mb2 = mb; std::codecvt_base::result r = cvt.in(mb, from, end, from_next, to, to_end, to_next); // std::cout << "In from_size=" << (end-from) << " from move=" << (from_next - from) << " to move= " << to_next - to << " state = " // << res(r) << std::endl; int count = cvt.length(mb2, from, end, to_end - to); #ifndef BOOST_NOWIDE_DO_LENGTH_MBSTATE_CONST TEST(std::memcmp(&mb, &mb2, sizeof(mb)) == 0); if(count != from_next - from) { std::cout << count << " " << from_next - from << std::endl; } TEST(count == from_next - from); #else TEST(count == to_next - to); #endif if(r == cvt_type::partial) { end += n; if(end > real_end) end = real_end; } else TEST(r == cvt_type::ok); while(to != to_next) { TEST(*wptr == *to); wptr++; to++; } to = to_next; from = from_next; } TEST(wptr == wide_name + wlen); TEST(from == real_end); } void test_codecvt_out_n_m(cvt_type const &cvt, int n, int m) { char const *nptr = utf8_name; size_t wlen = std::wcslen(wide_name); size_t u8len = std::strlen(utf8_name); std::mbstate_t mb = std::mbstate_t(); wchar_t const *from_next = wide_name; wchar_t const *real_from_end = wide_name + wlen; char buf[256]; char *to = buf; char *to_next = to; char *to_end = to + n; char *real_to_end = buf + sizeof(buf); while(from_next < real_from_end) { wchar_t const *from = from_next; wchar_t const *from_end = from + m; if(from_end > real_from_end) from_end = real_from_end; if(to_end == to) { to_end = to + n; } std::codecvt_base::result r = cvt.out(mb, from, from_end, from_next, to, to_end, to_next); // std::cout << "In from_size=" << (end-from) << " from move=" << (from_next - from) << " to move= " << to_next - to << " state = " // << res(r) << std::endl; if(r == cvt_type::partial) { TEST(to_end - to_next < cvt.max_length()); to_end += n; if(to_end > real_to_end) to_end = real_to_end; } else { TEST(r == cvt_type::ok); } while(to != to_next) { TEST(*nptr == *to); nptr++; to++; } from = from_next; } TEST(nptr == utf8_name + u8len); TEST(from_next == real_from_end); TEST(cvt.unshift(mb, to, to + n, to_next) == cvt_type::ok); TEST(to_next == to); } void test_codecvt_conv() { std::cout << "Conversions " << std::endl; std::locale l(std::locale::classic(), new boost::nowide::utf8_codecvt()); cvt_type const &cvt = std::use_facet(l); for(int i = 1; i <= (int)std::strlen(utf8_name) + 1; i++) { for(int j = 1; j <= (int)std::wcslen(wide_name) + 1; j++) { try { test_codecvt_in_n_m(cvt, i, j); test_codecvt_out_n_m(cvt, i, j); } catch(...) { std::cerr << "Wlen=" << j << " Nlen=" << i << std::endl; throw; } } } } void test_codecvt_err() { std::cout << "Errors " << std::endl; std::locale l(std::locale::classic(), new boost::nowide::utf8_codecvt()); cvt_type const &cvt = std::use_facet(l); std::cout << "- UTF-8" << std::endl; { wchar_t buf[4]; wchar_t *const to = buf; wchar_t *const to_end = buf + 4; wchar_t *to_next = to; char const *err_utf = "1\xFF\xFF\xd7\xa9"; { std::mbstate_t mb = std::mbstate_t(); char const *from = err_utf; char const *from_end = from + std::strlen(from); char const *from_next = from; to_next = to; TEST(cvt.in(mb, from, from_end, from_next, to, to_end, to_next) == cvt_type::ok); TEST(from_next == from + 5); TEST(to_next == to + 4); TEST(std::wstring(to, to_end) == boost::nowide::widen(err_utf)); } } std::cout << "- UTF-16/32" << std::endl; { char buf[32]; char *to = buf; char *to_end = buf + 32; char *to_next = to; wchar_t err_buf[3] = {'1', 0xDC9E, 0}; // second surrogate not works both for UTF-16 and 32 wchar_t const *err_utf = err_buf; { std::mbstate_t mb = std::mbstate_t(); wchar_t const *from = err_utf; wchar_t const *from_end = from + std::wcslen(from); wchar_t const *from_next = from; TEST(cvt.out(mb, from, from_end, from_next, to, to_end, to_next) == cvt_type::ok); TEST(from_next == from + 2); TEST(to_next == to + 4); TEST(std::memcmp(to, "1\xEF\xBF\xBD", 4) == 0); } } } std::wstring codecvt_to_wide(std::string const &s) { std::locale l(std::locale::classic(), new boost::nowide::utf8_codecvt()); cvt_type const &cvt = std::use_facet(l); std::vector output(s.size() + 1); std::mbstate_t mb = std::mbstate_t(); char const *from = s.c_str(); char const *from_end = from + s.size(); char const *from_next = from; std::vector buf(s.size() + 1); wchar_t *to = &buf[0]; wchar_t *to_end = to + buf.size(); wchar_t *to_next = to; TEST(cvt.in(mb, from, from_end, from_next, to, to_end, to_next) == cvt_type::ok); std::wstring res(to, to_next); return res; } std::string codecvt_to_narrow(std::wstring const &s) { std::locale l(std::locale::classic(), new boost::nowide::utf8_codecvt()); cvt_type const &cvt = std::use_facet(l); std::vector output(s.size() + 1); std::mbstate_t mb = std::mbstate_t(); wchar_t const *from = s.c_str(); wchar_t const *from_end = from + s.size(); wchar_t const *from_next = from; std::vector buf(s.size() * 4 + 1); char *to = &buf[0]; char *to_end = to + buf.size(); char *to_next = to; TEST(cvt.out(mb, from, from_end, from_next, to, to_end, to_next) == cvt_type::ok); std::string res(to, to_next); return res; } void test_codecvt_subst() { std::cout << "Substitutions " << std::endl; run_all(codecvt_to_wide, codecvt_to_narrow); } int main() { try { test_codecvt_conv(); test_codecvt_err(); test_codecvt_subst(); } catch(std::exception const &e) { std::cerr << "Failed : " << e.what() << std::endl; BOOST_NOWIDE_TEST_RETURN_FAILURE; } return boost::report_errors(); } /// // vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4