2
0
mirror of https://github.com/boostorg/nowide.git synced 2026-02-14 12:52:17 +00:00
Files
nowide/test/test_codecvt.cpp
Alexander Grund 6afb3f15ab Deduplicate tests and improve coverage
Every test had the same pattern of a main catching exceptions and
returning 0 or 1 based on that. Factor that into test.hpp.
As most code in test.hpp is only executed on failure and those should
not occur this file is excluded from coverage.

Finally test.hpp is included last and the header to test first
consistently to check for self-sufficient includes.
2020-04-03 16:28:28 +02:00

329 lines
10 KiB
C++

//
// Copyright (c) 2015 Artyom Beilis (Tonkikh)
//
// Distributed under the Boost Software License, Version 1.0. (See
// accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)
//
#include <boost/nowide/utf8_codecvt.hpp>
#include <boost/nowide/convert.hpp>
#include <cstring>
#include <iomanip>
#include <iostream>
#include <locale>
#include <vector>
#include "test.hpp"
#include "test_sets.hpp"
static const char* utf8_name =
"\xf0\x9d\x92\x9e-\xD0\xBF\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82-\xE3\x82\x84\xE3\x81\x82.txt";
static const std::wstring wide_name_str = boost::nowide::widen(utf8_name);
static const wchar_t* wide_name = wide_name_str.c_str();
typedef std::codecvt<wchar_t, char, std::mbstate_t> cvt_type;
void test_codecvt_in_n_m(const cvt_type& cvt, size_t n, size_t m)
{
const wchar_t* wptr = wide_name;
size_t wlen = std::wcslen(wide_name);
size_t u8len = std::strlen(utf8_name);
const char* from = utf8_name;
const char* end = from;
const char* real_end = utf8_name + u8len;
const char* from_next = from;
std::mbstate_t mb = std::mbstate_t();
while(from_next < real_end)
{
if(from == end)
{
end = from + n;
if(end > real_end)
end = real_end;
}
wchar_t buf[128];
wchar_t* to = buf;
wchar_t* to_end = to + m;
wchar_t* to_next = to;
std::mbstate_t mb2 = mb;
std::codecvt_base::result r = cvt.in(mb, from, end, from_next, to, to_end, to_next);
int count = cvt.length(mb2, from, end, to_end - to);
#ifndef BOOST_NOWIDE_DO_LENGTH_MBSTATE_CONST
TEST(std::memcmp(&mb, &mb2, sizeof(mb)) == 0);
if(count != from_next - from)
{
std::cout << count << " " << from_next - from << std::endl;
}
TEST(count == from_next - from);
#else
TEST(count == to_next - to);
#endif
if(r == cvt_type::partial)
{
end += n;
if(end > real_end)
end = real_end;
} else
TEST(r == cvt_type::ok);
while(to != to_next)
{
TEST(*wptr == *to);
wptr++;
to++;
}
to = to_next;
from = from_next;
}
TEST(wptr == wide_name + wlen);
TEST(from == real_end);
}
void test_codecvt_out_n_m(const cvt_type& cvt, size_t n, size_t m)
{
const char* nptr = utf8_name;
size_t wlen = std::wcslen(wide_name);
size_t u8len = std::strlen(utf8_name);
std::mbstate_t mb = std::mbstate_t();
const wchar_t* from_next = wide_name;
const wchar_t* real_from_end = wide_name + wlen;
char buf[256];
char* to = buf;
char* to_next = to;
char* to_end = to + n;
char* real_to_end = buf + sizeof(buf);
while(from_next < real_from_end)
{
const wchar_t* from = from_next;
const wchar_t* from_end = from + m;
if(from_end > real_from_end)
from_end = real_from_end;
if(to_end == to)
{
to_end = to + n;
}
std::codecvt_base::result r = cvt.out(mb, from, from_end, from_next, to, to_end, to_next);
if(r == cvt_type::partial)
{
// If those are equal, then "partial" probably means: Need more input
// Otherwise "Need more output"
if(from_next != from_end)
{
TEST(to_end - to_next < cvt.max_length());
to_end += n;
if(to_end > real_to_end)
to_end = real_to_end;
}
} else
{
TEST(r == cvt_type::ok);
}
while(to != to_next)
{
TEST(*nptr == *to);
nptr++;
to++;
}
from = from_next;
}
TEST(nptr == utf8_name + u8len);
TEST(from_next == real_from_end);
TEST(cvt.unshift(mb, to, to + n, to_next) == cvt_type::ok);
TEST(to_next == to);
}
void test_codecvt_conv()
{
std::cout << "Conversions " << std::endl;
std::locale l(std::locale::classic(), new boost::nowide::utf8_codecvt<wchar_t>());
const cvt_type& cvt = std::use_facet<cvt_type>(l);
const size_t utf8_len = std::strlen(utf8_name);
const size_t wide_len = std::wcslen(wide_name);
for(size_t i = 1; i <= utf8_len + 1; i++)
{
for(size_t j = 1; j <= wide_len + 1; j++)
{
try
{
test_codecvt_in_n_m(cvt, i, j);
test_codecvt_out_n_m(cvt, i, j);
} catch(...)
{
std::cerr << "Wlen=" << j << " Nlen=" << i << std::endl;
throw;
}
}
}
}
void test_codecvt_err()
{
std::cout << "Errors " << std::endl;
std::locale l(std::locale::classic(), new boost::nowide::utf8_codecvt<wchar_t>());
const cvt_type& cvt = std::use_facet<cvt_type>(l);
std::cout << "- UTF-8" << std::endl;
{
{
wchar_t buf[4];
wchar_t* const to = buf;
wchar_t* const to_end = buf + 4;
const char* err_utf = "1\xFF\xFF\xd7\xa9";
std::mbstate_t mb = std::mbstate_t();
const char* from = err_utf;
const char* from_end = from + std::strlen(from);
const char* from_next = from;
wchar_t* to_next = to;
TEST(cvt.in(mb, from, from_end, from_next, to, to_end, to_next) == cvt_type::ok);
TEST(from_next == from + 5);
TEST(to_next == to + 4);
TEST(std::wstring(to, to_end) == boost::nowide::widen(err_utf));
}
{
wchar_t buf[4];
wchar_t* const to = buf;
wchar_t* const to_end = buf + 4;
const char* err_utf = "1\xd7"; // 1 valid, 1 incomplete UTF-8 char
std::mbstate_t mb = std::mbstate_t();
const char* from = err_utf;
const char* from_end = from + std::strlen(from);
const char* from_next = from;
wchar_t* to_next = to;
TEST(cvt.in(mb, from, from_end, from_next, to, to_end, to_next) == cvt_type::partial);
TEST(from_next == from + 1);
TEST(to_next == to + 1);
TEST(std::wstring(to, to_next) == std::wstring(L"1"));
}
{
char buf[4] = {};
char* const to = buf;
char* const to_end = buf + 4;
char* to_next = to;
const wchar_t* err_utf = L"\xD800"; // Trailing UTF-16 surrogate
std::mbstate_t mb = std::mbstate_t();
const wchar_t* from = err_utf;
const wchar_t* from_end = from + 1;
const wchar_t* from_next = from;
cvt_type::result res = cvt.out(mb, from, from_end, from_next, to, to_end, to_next);
#ifdef BOOST_MSVC
#pragma warning(disable : 4127) // Constant expression detected
#endif
if(sizeof(wchar_t) == 2)
{
TEST(res == cvt_type::partial);
TEST(from_next == from_end);
TEST(to_next == to);
TEST(buf[0] == 0);
} else
{
TEST(res == cvt_type::ok);
TEST(from_next == from_end);
TEST(to_next == to + 3);
// surrogate is invalid
TEST(std::string(to, to_next) == boost::nowide::narrow(wreplacement_str));
}
}
}
std::cout << "- UTF-16/32" << std::endl;
{
char buf[32];
char* to = buf;
char* to_end = buf + 32;
char* to_next = to;
wchar_t err_buf[3] = {'1', 0xDC9E, 0}; // second surrogate not works both for UTF-16 and 32
const wchar_t* err_utf = err_buf;
{
std::mbstate_t mb = std::mbstate_t();
const wchar_t* from = err_utf;
const wchar_t* from_end = from + std::wcslen(from);
const wchar_t* from_next = from;
TEST(cvt.out(mb, from, from_end, from_next, to, to_end, to_next) == cvt_type::ok);
TEST(from_next == from + 2);
TEST(to_next == to + 4);
TEST(std::string(to, to_next) == "1" + boost::nowide::narrow(wreplacement_str));
}
}
}
std::wstring codecvt_to_wide(const std::string& s)
{
std::locale l(std::locale::classic(), new boost::nowide::utf8_codecvt<wchar_t>());
const cvt_type& cvt = std::use_facet<cvt_type>(l);
std::mbstate_t mb = std::mbstate_t();
const char* const from = s.c_str();
const char* const from_end = from + s.size();
const char* from_next = from;
std::vector<wchar_t> buf(s.size() + 2); // +1 for possible incomplete char, +1 for NULL
wchar_t* const to = &buf[0];
wchar_t* const to_end = to + buf.size();
wchar_t* to_next = to;
cvt_type::result res = cvt.in(mb, from, from_end, from_next, to, to_end, to_next);
if(res == cvt_type::partial)
{
TEST(to_next < to_end);
*(to_next++) = BOOST_NOWIDE_REPLACEMENT_CHARACTER;
} else
TEST(res == cvt_type::ok);
return std::wstring(to, to_next);
}
std::string codecvt_to_narrow(const std::wstring& s)
{
std::locale l(std::locale::classic(), new boost::nowide::utf8_codecvt<wchar_t>());
const cvt_type& cvt = std::use_facet<cvt_type>(l);
std::mbstate_t mb = std::mbstate_t();
const wchar_t* const from = s.c_str();
const wchar_t* const from_end = from + s.size();
const wchar_t* from_next = from;
std::vector<char> buf((s.size() + 1) * 4 + 1); // +1 for possible incomplete char, +1 for NULL
char* const to = &buf[0];
char* const to_end = to + buf.size();
char* to_next = to;
cvt_type::result res = cvt.out(mb, from, from_end, from_next, to, to_end, to_next);
if(res == cvt_type::partial)
{
TEST(to_next < to_end);
return std::string(to, to_next) + boost::nowide::narrow(wreplacement_str);
} else
TEST(res == cvt_type::ok);
return std::string(to, to_next);
}
void test_codecvt_subst()
{
std::cout << "Substitutions " << std::endl;
run_all(codecvt_to_wide, codecvt_to_narrow);
}
void test_main(int, char**, char**)
{
test_codecvt_conv();
test_codecvt_err();
test_codecvt_subst();
}