mirror of
https://github.com/boostorg/locale.git
synced 2026-01-19 04:22:08 +00:00
Merge pull request #267 from boostorg/test-improvement
Fix cygwin tests
This commit is contained in:
@@ -37,7 +37,6 @@ Almost every(!) facet has design flaws:
|
||||
|
||||
- \c std::ctype, which is responsible for case conversion, assumes that all conversions can be done on a per-character basis. This is
|
||||
probably correct for many languages but it isn't correct in general.
|
||||
\n
|
||||
-# Case conversion may change a string's length. For example, the German word "grüßen" should be converted to "GRÜSSEN" in upper
|
||||
case: the letter "ß" should be converted to "SS", but the \c toupper function works on a single-character basis.
|
||||
-# Case conversion is context-sensitive. For example, the Greek word "ὈΔΥΣΣΕΎΣ" should be converted to "ὀδυσσεύς", where the Greek letter
|
||||
@@ -48,11 +47,9 @@ Almost every(!) facet has design flaws:
|
||||
- \c std::numpunct and \c std::moneypunct do not specify the code points for digit representation at all,
|
||||
so they cannot format numbers with the digits used under Arabic locales. For example,
|
||||
the number "103" is expected to be displayed as "١٠٣" in the \c ar_EG locale.
|
||||
\n
|
||||
\c std::numpunct and \c std::moneypunct assume that the thousands separator is a single character. This is untrue
|
||||
for the UTF-8 encoding where only Unicode 0-0x7F range can be represented as a single character. As a result, localized numbers can't be
|
||||
represented correctly under locales that use the Unicode "EN SPACE" character for the thousands separator, such as Russian.
|
||||
\n
|
||||
This actually causes real problems under GCC and SunStudio compilers, where formatting numbers under a Russian locale creates invalid
|
||||
UTF-8 sequences.
|
||||
- \c std::time_put and \c std::time_get have several flaws:
|
||||
@@ -60,8 +57,6 @@ Almost every(!) facet has design flaws:
|
||||
countries dates may be displayed using different calendars.
|
||||
-# They always use a global time zone, not allowing specification of the time zone for formatting. The standard \c std::tm doesn't
|
||||
even include a timezone field at all.
|
||||
-# \c std::time_get is not symmetric with \c std::time_put, so you cannot parse dates and times created with \c std::time_put .
|
||||
(This issue is addressed in C++11 and some STL implementation like the Apache standard C++ library.)
|
||||
- \c std::messages does not provide support for plural forms, making it impossible to correctly localize such simple strings as
|
||||
"There are X files in the directory".
|
||||
|
||||
@@ -75,13 +70,13 @@ ICU is a very good localization library, but it has several serious flaws:
|
||||
- It is absolutely unfriendly to C++ developers. It ignores popular C++ idioms (the STL, RTTI, exceptions, etc), instead
|
||||
mostly mimicking the Java API.
|
||||
- It provides support for only one kind of string, UTF-16, when some users may want other Unicode encodings.
|
||||
For example, for XML or HTML processing UTF-8 is much more convenient and UTF-32 easier to use. Also there is no support for
|
||||
For example, for XML or HTML processing UTF-8 is much more convenient and UTF-32 easier to use. Also, there is no support for
|
||||
"narrow" encodings that are still very popular, such as the ISO-8859 encodings.
|
||||
|
||||
For example: Boost.Locale provides direct integration with \c iostream allowing a more natural way of data formatting. For example:
|
||||
|
||||
\code
|
||||
cout << "You have "<<as::currency << 134.45 << " in your account as of "<<as::datetime << std::time(0) << endl;
|
||||
cout << "You have "<<as::currency << 134.45 << " in your account as of "<< as::datetime << std::time(0) << endl;
|
||||
\endcode
|
||||
|
||||
\section why_icu_wrapper Why an ICU wrapper and not an implementation-from-scratch?
|
||||
@@ -145,21 +140,16 @@ There are several reasons:
|
||||
-# A Gregorian Date by definition can't be used to represent locale-independent dates, because not all
|
||||
calendars are Gregorian.
|
||||
-# \c ptime -- definitely could be used, but it has several problems:
|
||||
\n
|
||||
- It is created in GMT or Local time clock, when `time()` gives a representation that is independent of time zones
|
||||
(usually GMT time), and only later should it be represented in a time zone that the user requests.
|
||||
\n
|
||||
The timezone is not a property of time itself, but it is rather a property of time formatting.
|
||||
\n
|
||||
- \c ptime already defines \c operator<< and \c operator>> for time formatting and parsing.
|
||||
- The existing facets for \c ptime formatting and parsing were not designed in a way that the user can override.
|
||||
The major formatting and parsing functions are not virtual. This makes it impossible to reimplement the formatting and
|
||||
parsing functions of \c ptime unless the developers of the Boost.DateTime library decide to change them.
|
||||
\n
|
||||
Also, the facets of \c ptime are not "correctly" designed in terms of division of formatting information and
|
||||
locale information. Formatting information should be stored within \c std::ios_base and information about
|
||||
locale-specific formatting should be stored in the facet itself.
|
||||
\n
|
||||
The user of the library should not have to create new facets to change simple formatting information like "display only
|
||||
the date" or "display both date and time."
|
||||
|
||||
@@ -174,30 +164,28 @@ do not actually know how the text should be encoded -- UTF-8, ISO-8859-1, ISO-88
|
||||
This may vary between different operating systems and depends on the current installation. So it is critical
|
||||
to provide all the required information.
|
||||
- ICU fully understands POSIX locales and knows how to treat them correctly.
|
||||
- They are native locale names for most operating system APIs (with the exception of Windows)
|
||||
- They are native locale names for most operating system APIs (except for Windows)
|
||||
|
||||
\section why_linear_chunks Why do most parts of Boost.Locale work only on linear/contiguous chunks of text?
|
||||
|
||||
There are two reasons:
|
||||
|
||||
- Boost.Locale relies heavily on the third-party APIs like ICU, POSIX or Win32 API, all of them
|
||||
work only on linear chunks of text, so providing non-linear API would just hide the
|
||||
- Boost.Locale relies heavily on third-party APIs like ICU, POSIX or Win32 API, all of them
|
||||
work only on linear chunks of text, so providing a non-linear API would just hide the
|
||||
real situation and would hurt performance.
|
||||
- In fact, all known libraries that work with Unicode: ICU, Qt, Glib, Win32 API, POSIX API
|
||||
and others accept an input as single linear chunks of text and there is a good reason for this:
|
||||
\n
|
||||
-# Most supported operations on text like collation, case handling usually work on small
|
||||
chunks of text. For example: you probably would never want to compare two chapters of a book, but rather
|
||||
their titles.
|
||||
-# We should remember that even very large texts require quite a small amount of memory, for example
|
||||
the entire book "War and Peace" takes only about 3MB of memory.
|
||||
\n
|
||||
|
||||
However:
|
||||
|
||||
- There are API's that support stream processing. For example: character set conversion using
|
||||
- There are APIs that support stream processing. For example: character set conversion using the
|
||||
\c std::codecvt API works on streams of any size without problems.
|
||||
- When new API is introduced into Boost.Locale in future, such that it likely works
|
||||
- When new API is introduced into Boost.Locale in the future, such that it likely works
|
||||
on large chunks of text, will provide an interface for non-linear text handling.
|
||||
|
||||
|
||||
@@ -207,27 +195,9 @@ There are several major reasons:
|
||||
|
||||
- This is how the C++'s \c std::locale class is build. Each feature is represented using a subclass of
|
||||
\c std::locale::facet that provides an abstract API for specific operations it works on, see \ref std_locales.
|
||||
- This approach allows to switch underlying API without changing the actual application code even in run-time depending
|
||||
- This approach allows to switch underlying the API without changing the actual application code even in run-time depending
|
||||
on performance and localization requirements.
|
||||
- This approach reduces compilation times significantly. This is very important for library that may be
|
||||
- This approach reduces compilation times significantly. This is very important for a library that may be
|
||||
used in almost every part of specific program.
|
||||
|
||||
\section why_no_special_character_type Why doesn't Boost.Locale provide char16_t/char32_t for non-C++11 platforms?
|
||||
|
||||
There are several reasons:
|
||||
|
||||
- C++11 defines \c char16_t and \c char32_t as distinct types, so substituting it with something like \c uint16_t or \c uint32_t
|
||||
would not work as for example writing \c uint16_t to \c uint32_t stream would write a number to stream.
|
||||
- The C++ locales system would work only if standard facets like \c std::num_put are installed into the
|
||||
existing instance of \c std::locale, however in the many standard C++ libraries these facets are specialized for each
|
||||
specific character that the standard library supports, so an attempt to create a new facet would
|
||||
fail as it is not specialized.
|
||||
|
||||
These are exactly the reasons why Boost.Locale fails with current limited C++11 characters support on GCC-4.5 (the second reason)
|
||||
and MSVC-2010 (the first reason)
|
||||
|
||||
Basically it is impossible to use non-C++ characters with the C++'s locales framework.
|
||||
|
||||
The best and the most portable solution is to use the C++'s \c char type and UTF-8 encodings.
|
||||
|
||||
*/
|
||||
|
||||
@@ -7,32 +7,30 @@
|
||||
/*!
|
||||
\page status_of_cpp0x_characters_support Status of C++11 char16_t/char32_t support
|
||||
|
||||
The support of C++11 \c char16_t and \c char32_t is experimental, mostly does not work, and is not
|
||||
intended to be used in production with the latest compilers: GCC-4.5, MSVC10 until major
|
||||
compiler flaws are fixed.
|
||||
The support of C++11 \c char16_t and \c char32_t is experimental and is not
|
||||
intended to be used in production until various compiler/standard library flaws are fixed.
|
||||
|
||||
\section status_of_cpp0x_characters_support_gnu GNU GCC 4.5/C++11 Status
|
||||
Many recent C++ compilers provide decent support of C++11 characters, however often:
|
||||
|
||||
GNU C++ compiler provides decent support of C++11 characters however:
|
||||
|
||||
-# Standard library does not install any std::locale::facets for this support so any attempt
|
||||
-# The standard library does not install any std::locale::facets for this support so any attempt
|
||||
to format numbers using \c char16_t or \c char32_t streams would just fail.
|
||||
-# Standard library misses specialization for required \c char16_t/char32_t locale facets,
|
||||
-# The standard library misses specialization for required \c char16_t/char32_t locale facets,
|
||||
so "std" backends is not build-able as essential symbols missing, also \c codecvt facet
|
||||
can't be created as well.
|
||||
|
||||
\section status_of_cpp0x_characters_support_msvc Visual Studio 2010 (MSVC10)/C++11 Status
|
||||
\section status_of_cpp0x_characters_support_msvc Visual Studio
|
||||
|
||||
MSVC provides all required facets however:
|
||||
MSVC provides all required facets since VS 2010 however:
|
||||
|
||||
-# Standard library does not provide installations of std::locale::id for these facets
|
||||
-# The standard library does not provide installations of std::locale::id for these facets
|
||||
in DLL so it is not usable with \c /MD, \c /MDd compiler flags and requires static link of the runtime
|
||||
library.
|
||||
-# \c char16_t and \c char32_t are not distinct types but rather aliases of unsigned short and unsigned
|
||||
types which contradicts to C++11 requirements making it impossible to write \c char16_t/char32_t to stream
|
||||
and causing multiple faults.
|
||||
|
||||
If you want to build or test Boost.Locale with C++11 char16_t and char32_t support you should pass `cxxflags="-DBOOST_LOCALE_ENABLE_CHAR32_T -DBOOST_LOCALE_ENABLE_CHAR16_T"` to `b2` during build and define `BOOST_LOCALE_ENABLE_CHAR32_T` and `BOOST_LOCALE_ENABLE_CHAR32_T` when using Boost.Locale
|
||||
If you want to build or test Boost.Locale with C++11 char16_t and char32_t support
|
||||
you should pass `define=BOOST_LOCALE_ENABLE_CHAR32_T define=BOOST_LOCALE_ENABLE_CHAR16_T` to `b2` during build and define `BOOST_LOCALE_ENABLE_CHAR32_T` and `BOOST_LOCALE_ENABLE_CHAR32_T` when using Boost.Locale
|
||||
|
||||
*/
|
||||
|
||||
|
||||
@@ -94,7 +94,7 @@ problems with this.
|
||||
</tr>
|
||||
<tr>
|
||||
<th>Non UTF-8 encodings</th>
|
||||
<td>Yes</td><td>Yes</td><td>No</td><td>Yes</td>
|
||||
<td>Yes</td><td>Yes</td><td>Yes</td><td>Yes</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>Date/Time Formatting/Parsing</th>
|
||||
@@ -132,10 +132,6 @@ problems with this.
|
||||
<th>Unicode Normalization</th>
|
||||
<td>Yes</td><td>No</td><td>Vista and above</td><td>No</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>C++11 characters</th>
|
||||
<td>Yes</td><td>No</td><td>No</td><td>Yes</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>OS Support</th>
|
||||
<td>Any</td><td>Linux, Mac OS X</td><td>Windows, Cygwin</td><td>Any</td>
|
||||
|
||||
@@ -214,9 +214,8 @@ namespace boost { namespace locale {
|
||||
// mbstate_t is POD type and should be initialized to 0 (i.a. state = stateT())
|
||||
// according to standard. We use it to keep a flag 0/1 for surrogate pair writing
|
||||
//
|
||||
// if 0/false no codepoint above >0xFFFF observed, else a codepoint above 0xFFFF was observed
|
||||
// and first pair is written, but no input consumed
|
||||
bool state = *reinterpret_cast<char*>(&std_state) != 0;
|
||||
// If true then only the high surrogate of a codepoint > 0xFFFF was written, but no input consumed.
|
||||
bool low_surrogate_pending = *reinterpret_cast<char*>(&std_state) != 0;
|
||||
auto cvt_state = implementation().initial_state(to_unicode_state);
|
||||
while(to < to_end && from < from_end) {
|
||||
const char* from_saved = from;
|
||||
@@ -237,31 +236,29 @@ namespace boost { namespace locale {
|
||||
if(ch <= 0xFFFF)
|
||||
*to++ = static_cast<uchar>(ch);
|
||||
else {
|
||||
// For other codepoints we do the following
|
||||
// For other codepoints we can't consume our input as we may find ourselves in a state
|
||||
// where all input is consumed but not all output written, i.e. only the high surrogate is written.
|
||||
//
|
||||
// 1. We can't consume our input as we may find ourselves
|
||||
// in state where all input consumed but not all output written,i.e. only
|
||||
// 1st pair is written
|
||||
// 2. We only write first pair and mark this in the state, we also revert back
|
||||
// the from pointer in order to make sure this codepoint would be read
|
||||
// once again and then we would consume our input together with writing
|
||||
// second surrogate pair
|
||||
// So we write only the high surrogate and mark this in the state.
|
||||
// We also set the from pointer to the previous position, i.e. don't consume the input, so this
|
||||
// codepoint will be read again and then we will consume our input together with writing the low
|
||||
// surrogate.
|
||||
ch -= 0x10000;
|
||||
std::uint16_t w1 = static_cast<std::uint16_t>(0xD800 | (ch >> 10));
|
||||
std::uint16_t w2 = static_cast<std::uint16_t>(0xDC00 | (ch & 0x3FF));
|
||||
if(!state) {
|
||||
const std::uint16_t w1 = static_cast<std::uint16_t>(0xD800 | (ch >> 10));
|
||||
const std::uint16_t w2 = static_cast<std::uint16_t>(0xDC00 | (ch & 0x3FF));
|
||||
if(!low_surrogate_pending) {
|
||||
from = from_saved;
|
||||
*to++ = w1;
|
||||
} else
|
||||
*to++ = w2;
|
||||
state = !state;
|
||||
low_surrogate_pending = !low_surrogate_pending;
|
||||
}
|
||||
}
|
||||
from_next = from;
|
||||
to_next = to;
|
||||
if(r == std::codecvt_base::ok && (from != from_end || state))
|
||||
if(r == std::codecvt_base::ok && (from != from_end || low_surrogate_pending))
|
||||
r = std::codecvt_base::partial;
|
||||
*reinterpret_cast<char*>(&std_state) = state;
|
||||
*reinterpret_cast<char*>(&std_state) = low_surrogate_pending;
|
||||
return r;
|
||||
}
|
||||
|
||||
|
||||
@@ -18,11 +18,14 @@ namespace boost { namespace locale { namespace impl_std {
|
||||
std::locale
|
||||
create_codecvt(const std::locale& in, const std::string& locale_name, char_facet_t type, utf8_support utf)
|
||||
{
|
||||
#if defined(BOOST_WINDOWS)
|
||||
#if defined(BOOST_WINDOWS) || defined(__CYGWIN__)
|
||||
// This isn't fully correct:
|
||||
// It will treat the 2-Byte wchar_t as UTF-16 encoded while it may be UCS-2
|
||||
// std::basic_filebuf explicitely disallows using suche multi-byte codecvts
|
||||
// but it works in practice so far, so use it instead of failing for codepoints above U+FFFF
|
||||
//
|
||||
// Additionally, the stdlib in Cygwin has issues converting long UTF-8 sequences likely due to left-over
|
||||
// state across buffer boundaries. E.g. the low surrogate after a sequence of 255 UTF-16 pairs gets corrupted.
|
||||
if(utf != utf8_support::none)
|
||||
return util::create_utf8_codecvt(in, type);
|
||||
#endif
|
||||
|
||||
@@ -121,6 +121,7 @@ namespace boost { namespace locale { namespace impl_std {
|
||||
|
||||
if(!data_.is_utf8()) {
|
||||
utf_mode_ = utf8_support::none;
|
||||
name_ = "C";
|
||||
if(loadable(lid))
|
||||
name_ = lid;
|
||||
else if(l_win && loadable(l_win.name)) {
|
||||
@@ -128,15 +129,19 @@ namespace boost { namespace locale { namespace impl_std {
|
||||
name_ = l_win.name;
|
||||
else {
|
||||
int codepage_int;
|
||||
if(util::try_to_int(l_win.codepage, codepage_int)
|
||||
&& codepage_int == util::encoding_to_windows_codepage(data_.encoding()))
|
||||
{
|
||||
name_ = l_win.name;
|
||||
} else
|
||||
name_ = "C";
|
||||
if(util::try_to_int(l_win.codepage, codepage_int)) {
|
||||
if(codepage_int == util::encoding_to_windows_codepage(data_.encoding()))
|
||||
name_ = l_win.name;
|
||||
else if(codepage_int == util::encoding_to_windows_codepage("windows-1252")
|
||||
&& util::are_encodings_equal(data_.encoding(), "ISO8859-1"))
|
||||
name_ = l_win.name; // windows-1252 is superset of ISO8859-1
|
||||
else if(codepage_int == util::encoding_to_windows_codepage("windows-1255")
|
||||
&& util::are_encodings_equal(data_.encoding(), "ISO8859-8"))
|
||||
name_ = l_win.name; // windows-1255 is superset of ISO8859-8
|
||||
}
|
||||
}
|
||||
} else
|
||||
name_ = "C";
|
||||
}
|
||||
|
||||
} else {
|
||||
if(loadable(lid)) {
|
||||
name_ = lid;
|
||||
|
||||
@@ -207,6 +207,7 @@ std::string get_std_name(const std::string& name, std::string* real_name = nullp
|
||||
|
||||
#if BOOST_LOCALE_USE_WIN32_API
|
||||
const bool utf8 = name.find("UTF-8") != std::string::npos;
|
||||
const char* alt_name = nullptr;
|
||||
|
||||
if(name == "en_US.UTF-8" || name == "en_US.ISO8859-1") {
|
||||
if(has_std_locale("English_United States.1252")) {
|
||||
@@ -214,33 +215,23 @@ std::string get_std_name(const std::string& name, std::string* real_name = nullp
|
||||
*real_name = "English_United States.1252";
|
||||
return utf8 ? name : "en_US.windows-1252";
|
||||
}
|
||||
return "";
|
||||
} else if(name == "he_IL.UTF-8" || name == "he_IL.ISO8859-8") {
|
||||
if(has_std_locale("Hebrew_Israel.1255")) {
|
||||
if(real_name)
|
||||
*real_name = "Hebrew_Israel.1255";
|
||||
return utf8 ? name : "he_IL.windows-1255";
|
||||
}
|
||||
} else if(name == "ru_RU.UTF-8") {
|
||||
if(has_std_locale("Russian_Russia.1251")) {
|
||||
if(real_name)
|
||||
*real_name = "Russian_Russia.1251";
|
||||
return name;
|
||||
}
|
||||
} else if(name == "tr_TR.UTF-8") {
|
||||
if(has_std_locale("Turkish_Turkey.1254")) {
|
||||
if(real_name)
|
||||
*real_name = "Turkish_Turkey.1254";
|
||||
return name;
|
||||
}
|
||||
}
|
||||
if(name == "ja_JP.SJIS") {
|
||||
if(has_std_locale("Japanese_Japan.932")) {
|
||||
if(real_name)
|
||||
*real_name = "Japanese_Japan.932";
|
||||
return name;
|
||||
}
|
||||
return "";
|
||||
} else if(name == "ru_RU.UTF-8")
|
||||
alt_name = "Russian_Russia.1251";
|
||||
else if(name == "tr_TR.UTF-8")
|
||||
alt_name = "Turkish_Turkey.1254";
|
||||
else if(name == "ja_JP.SJIS")
|
||||
alt_name = "Japanese_Japan.932";
|
||||
|
||||
if(alt_name && has_std_locale(alt_name)) {
|
||||
if(real_name)
|
||||
*real_name = alt_name;
|
||||
return name;
|
||||
}
|
||||
#endif
|
||||
return "";
|
||||
|
||||
@@ -88,6 +88,28 @@ std::ostream& operator<<(std::ostream& s, boost::locale::conv::detail::conv_back
|
||||
return s; // LCOV_EXCL_LINE
|
||||
}
|
||||
|
||||
template<typename Char>
|
||||
std::string char_name()
|
||||
{
|
||||
if(std::is_same<Char, char>::value)
|
||||
return "char";
|
||||
else if(std::is_same<Char, wchar_t>::value)
|
||||
return "wchar_t";
|
||||
#ifdef __cpp_lib_char8_t
|
||||
else if(std::is_same<Char, char8_t>::value)
|
||||
return "char8_t";
|
||||
#endif
|
||||
#ifdef BOOST_LOCALE_ENABLE_CHAR16_T
|
||||
else if(std::is_same<Char, char16_t>::value)
|
||||
return "char16_t";
|
||||
#endif
|
||||
#ifdef BOOST_LOCALE_ENABLE_CHAR32_T
|
||||
else if(std::is_same<Char, char32_t>::value)
|
||||
return "char32_t";
|
||||
#endif
|
||||
return "unknown char type"; // LCOV_EXCL_LINE
|
||||
}
|
||||
|
||||
#define TEST_FAIL_CONVERSION(X) TEST_THROWS(X, boost::locale::conv::conversion_error)
|
||||
|
||||
template<typename Char>
|
||||
@@ -97,12 +119,13 @@ void test_to_utf_for_impls(const std::string& source,
|
||||
const bool expectSuccess = true,
|
||||
const bool test_default = true)
|
||||
{
|
||||
TEST_CONTEXT(encoding << '/' << char_name<Char>());
|
||||
if(test_default) {
|
||||
boost::locale::conv::utf_encoder<Char> conv(encoding);
|
||||
TEST_EQ(conv(source), target);
|
||||
}
|
||||
for(const auto impl : all_conv_backends) {
|
||||
std::cout << "----- " << impl << '\n';
|
||||
std::cout << "----- Convert to UTF w/ " << impl << '\n';
|
||||
using boost::locale::conv::invalid_charset_error;
|
||||
try {
|
||||
auto convPtr =
|
||||
@@ -135,12 +158,13 @@ void test_from_utf_for_impls(const std::basic_string<Char>& source,
|
||||
const bool expectSuccess = true,
|
||||
const bool test_default = true)
|
||||
{
|
||||
TEST_CONTEXT(encoding << '/' << char_name<Char>());
|
||||
if(test_default) {
|
||||
boost::locale::conv::utf_decoder<Char> conv(encoding);
|
||||
TEST_EQ(conv(source), target);
|
||||
}
|
||||
for(const auto impl : all_conv_backends) {
|
||||
std::cout << "----- " << impl << '\n';
|
||||
std::cout << "----- Convert from UTF w/ " << impl << '\n';
|
||||
using boost::locale::conv::invalid_charset_error;
|
||||
try {
|
||||
auto convPtr =
|
||||
@@ -172,6 +196,7 @@ void test_to_from_utf(const std::string& source,
|
||||
const std::string& encoding,
|
||||
const bool test_default = true)
|
||||
{
|
||||
TEST_CONTEXT(__func__ << ':' << encoding << '/' << char_name<Char>());
|
||||
std::cout << "-- " << encoding << std::endl;
|
||||
|
||||
if(test_default) {
|
||||
@@ -185,10 +210,11 @@ void test_to_from_utf(const std::string& source,
|
||||
template<typename Char>
|
||||
void test_error_to_utf(const std::string& source, const std::basic_string<Char>& target, const std::string& encoding)
|
||||
{
|
||||
TEST_CONTEXT(__func__ << ':' << encoding << '/' << char_name<Char>());
|
||||
using boost::locale::conv::to_utf;
|
||||
using boost::locale::conv::stop;
|
||||
|
||||
// Default: Replace, no error
|
||||
// Default: Skip, no error
|
||||
TEST_EQ(to_utf<Char>(source, encoding), target);
|
||||
// Test all overloads with method=stop -> error
|
||||
// source as string, C-String, range
|
||||
@@ -206,10 +232,11 @@ void test_error_to_utf(const std::string& source, const std::basic_string<Char>&
|
||||
template<typename Char>
|
||||
void test_error_from_utf(const std::basic_string<Char>& source, const std::string& target, const std::string& encoding)
|
||||
{
|
||||
TEST_CONTEXT(__func__ << ':' << encoding << '/' << char_name<Char>());
|
||||
using boost::locale::conv::from_utf;
|
||||
using boost::locale::conv::stop;
|
||||
|
||||
// Default: Replace, no error
|
||||
// Default: Skip, no error
|
||||
TEST_EQ(from_utf<Char>(source, encoding), target);
|
||||
// Test all overloads with method=stop -> error
|
||||
// source as string, C-String, range
|
||||
@@ -359,6 +386,7 @@ void test_all_combinations()
|
||||
template<typename Char>
|
||||
void test_utf_for()
|
||||
{
|
||||
std::cout << "- Testing to/from UTF for " << char_name<Char>() << '\n';
|
||||
using boost::locale::conv::invalid_charset_error;
|
||||
|
||||
{
|
||||
@@ -389,7 +417,7 @@ void test_utf_for()
|
||||
if(iconvIssue != MacOSIconvIssue::No_CN_Support)
|
||||
test_to_from_utf<Char>("\x1b\x24\x29\x41\x0e\x4a\x35\xf", utf<Char>("实"), "ISO-2022-CN", false);
|
||||
|
||||
std::cout << "- Testing correct invalid bytes skipping\n";
|
||||
std::cout << "- Testing correct invalid bytes skipping for " << char_name<Char>() << '\n';
|
||||
{
|
||||
std::cout << "-- UTF-8" << std::endl;
|
||||
|
||||
@@ -428,7 +456,7 @@ void test_utf_for()
|
||||
}
|
||||
std::cout << "-- Error for encoding at start" << std::endl;
|
||||
test_error_from_utf<Char>(utf<Char>("שלום hello"), " hello", "ISO8859-1");
|
||||
std::cout << "-- Error for encoding at middle and end" << std::endl;
|
||||
std::cout << "-- Error for encoding at middle" << std::endl;
|
||||
test_error_from_utf<Char>(utf<Char>("hello שלום world"), "hello world", "ISO8859-1");
|
||||
std::cout << "-- Error for encoding at end" << std::endl;
|
||||
test_error_from_utf<Char>(utf<Char>("hello שלום"), "hello ", "ISO8859-1");
|
||||
@@ -461,6 +489,7 @@ void test_utf_for()
|
||||
template<typename Char1, typename Char2>
|
||||
void test_utf_to_utf_for(const std::string& utf8_string)
|
||||
{
|
||||
std::cout << "---- " << char_name<Char1>() << "<->" << char_name<Char1>() << "\n";
|
||||
const auto utf_string1 = utf<Char1>(utf8_string);
|
||||
const auto utf_string2 = utf<Char2>(utf8_string);
|
||||
using boost::locale::conv::utf_to_utf;
|
||||
@@ -474,22 +503,17 @@ template<typename Char>
|
||||
void test_utf_to_utf_for()
|
||||
{
|
||||
const std::string& utf8_string = "A-Za-z0-9grüße'\xf0\xa0\x82\x8a'\xf4\x8f\xbf\xbf";
|
||||
std::cout << "---- char\n";
|
||||
test_utf_to_utf_for<Char, char>(utf8_string);
|
||||
test_to_utf_for_impls(utf8_string, utf<Char>(utf8_string), "UTF-8");
|
||||
test_from_utf_for_impls(utf<Char>(utf8_string), utf8_string, "UTF-8");
|
||||
std::cout << "---- wchar_t\n";
|
||||
test_utf_to_utf_for<Char, wchar_t>(utf8_string);
|
||||
#ifdef __cpp_lib_char8_t
|
||||
std::cout << "---- char8_t\n";
|
||||
test_utf_to_utf_for<Char, char8_t>(utf8_string);
|
||||
#endif
|
||||
#ifdef BOOST_LOCALE_ENABLE_CHAR16_T
|
||||
std::cout << "---- char16_t\n";
|
||||
test_utf_to_utf_for<Char, char16_t>(utf8_string);
|
||||
#endif
|
||||
#ifdef BOOST_LOCALE_ENABLE_CHAR32_T
|
||||
std::cout << "---- char32_t\n";
|
||||
test_utf_to_utf_for<Char, char32_t>(utf8_string);
|
||||
#endif
|
||||
}
|
||||
@@ -832,20 +856,15 @@ void test_main(int /*argc*/, char** /*argv*/)
|
||||
test_utf_to_utf_allocator_support();
|
||||
|
||||
std::cout << "Testing charset to/from UTF conversion functions\n";
|
||||
std::cout << " char" << std::endl;
|
||||
test_utf_for<char>();
|
||||
std::cout << " wchar_t" << std::endl;
|
||||
test_utf_for<wchar_t>();
|
||||
#ifdef __cpp_lib_char8_t
|
||||
std::cout << " char8_t" << std::endl;
|
||||
test_utf_for<char8_t>();
|
||||
#endif
|
||||
#ifdef BOOST_LOCALE_ENABLE_CHAR16_T
|
||||
std::cout << " char16_t" << std::endl;
|
||||
test_utf_for<char16_t>();
|
||||
#endif
|
||||
#ifdef BOOST_LOCALE_ENABLE_CHAR32_T
|
||||
std::cout << " char32_t" << std::endl;
|
||||
test_utf_for<char32_t>();
|
||||
#endif
|
||||
|
||||
|
||||
@@ -907,8 +907,8 @@ void test_uint64_format()
|
||||
TEST_NE(ss.str(), posix_value);
|
||||
|
||||
uint64_t parsed_value{};
|
||||
TEST(ss >> parsed_value);
|
||||
TEST_EQ(parsed_value, value);
|
||||
if TEST(ss >> parsed_value)
|
||||
TEST_EQ(parsed_value, value);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -61,8 +61,8 @@ void test_by_char(const std::locale& l, locale_t lreal)
|
||||
|
||||
TEST(ss << 1045.45);
|
||||
double n;
|
||||
TEST(ss >> n);
|
||||
TEST_EQ(n, 1045.45);
|
||||
if TEST(ss >> n)
|
||||
TEST_EQ(n, 1045.45);
|
||||
TEST_EQ(ss.str(), ascii_to<CharType>("1045.45"));
|
||||
}
|
||||
|
||||
@@ -74,8 +74,8 @@ void test_by_char(const std::locale& l, locale_t lreal)
|
||||
ss << as::number;
|
||||
TEST(ss << 1045.45);
|
||||
double n;
|
||||
TEST(ss >> n);
|
||||
TEST_EQ(n, 1045.45);
|
||||
if TEST(ss >> n)
|
||||
TEST_EQ(n, 1045.45);
|
||||
|
||||
if(std::use_facet<boost::locale::info>(l).country() == "US")
|
||||
TEST_EQ(ss.str(), from_narrow<CharType>("1,045.45", lreal));
|
||||
|
||||
@@ -22,51 +22,52 @@ void test_char()
|
||||
boost::locale::generator gen;
|
||||
|
||||
std::cout << "- Testing at least C" << std::endl;
|
||||
std::locale l = gen("en_US.UTF-8");
|
||||
std::locale l = gen("C");
|
||||
test_one<CharType>(l, "Hello World i", "hello world i", "HELLO WORLD I");
|
||||
boost::locale::case_convert_test::test_no_op_title_case<CharType>(l, "Hello world i");
|
||||
|
||||
std::string name;
|
||||
std::string name, real_name;
|
||||
|
||||
name = get_std_name("en_US.UTF-8");
|
||||
if(!name.empty()) {
|
||||
name = "en_US.UTF-8";
|
||||
if(get_std_name(name, &real_name).empty())
|
||||
std::cout << "- " << name << " is not supported, skipping" << std::endl; // LCOV_EXCL_LINE
|
||||
else {
|
||||
std::cout << "- Testing " << name << std::endl;
|
||||
l = gen(name);
|
||||
test_one<CharType>(l, "Façade", "façade", "FAÇADE");
|
||||
boost::locale::case_convert_test::test_no_op_title_case<CharType>(l, "Hello world i");
|
||||
} else
|
||||
std::cout << "- en_US.UTF-8 is not supported, skipping" << std::endl; // LCOV_EXCL_LINE
|
||||
}
|
||||
|
||||
name = get_std_name("en_US.ISO8859-1");
|
||||
if(!name.empty()) {
|
||||
std::cout << "Testing " << name << std::endl;
|
||||
name = "en_US.ISO8859-1";
|
||||
if(get_std_name(name, &real_name).empty())
|
||||
std::cout << "- " << name << " is not supported, skipping" << std::endl; // LCOV_EXCL_LINE
|
||||
else {
|
||||
std::cout << "- Testing " << name << std::endl;
|
||||
l = gen(name);
|
||||
test_one<CharType>(l, "Hello World", "hello world", "HELLO WORLD");
|
||||
#if BOOST_LOCALE_USE_WIN32_API
|
||||
name = "English_United States";
|
||||
#endif
|
||||
// Check that ç can be converted to Ç by the stdlib (fails on e.g. FreeBSD libstd++)
|
||||
if(std::toupper('\xe7', std::locale(name)) == '\xc7')
|
||||
if(std::toupper('\xe7', std::locale(real_name)) == '\xc7')
|
||||
test_one<CharType>(l, "Façade", "façade", "FAÇADE");
|
||||
else {
|
||||
std::cout << "- en_US.ISO8859-1 (" << name << ") not well supported. "; // LCOV_EXCL_LINE
|
||||
std::cout << "Skipping conv test" << std::endl; // LCOV_EXCL_LINE
|
||||
std::cout << "- " << name << " (" << real_name << ") is not well supported. "; // LCOV_EXCL_LINE
|
||||
std::cout << " Skipping conv test" << std::endl; // LCOV_EXCL_LINE
|
||||
}
|
||||
boost::locale::case_convert_test::test_no_op_title_case<CharType>(l, "Hello world i");
|
||||
} else
|
||||
std::cout << "- en_US.ISO8859-1 is not supported, skipping" << std::endl; // LCOV_EXCL_LINE
|
||||
}
|
||||
|
||||
std::string real_name;
|
||||
name = get_std_name("tr_TR.UTF-8", &real_name);
|
||||
if(!name.empty()) {
|
||||
std::cout << "Testing " << name << std::endl;
|
||||
name = "tr_TR.UTF-8";
|
||||
if(get_std_name(name, &real_name).empty())
|
||||
std::cout << "- " << name << " is not supported, skipping" << std::endl; // LCOV_EXCL_LINE
|
||||
else {
|
||||
std::cout << "- Testing " << name << std::endl;
|
||||
if(std::use_facet<std::ctype<wchar_t>>(std::locale(real_name)).toupper(L'i') != L'I') {
|
||||
l = gen(name);
|
||||
test_one<CharType>(l, "i", "i", "İ");
|
||||
} else
|
||||
std::cout << "Standard library does not support this locale's case conversion correctly" << std::endl;
|
||||
} else
|
||||
std::cout << "- tr_TR.UTF-8 is not supported, skipping" << std::endl; // LCOV_EXCL_LINE
|
||||
} else {
|
||||
std::cout << "- " << name << " (" << real_name << ") is not well supported. "; // LCOV_EXCL_LINE
|
||||
std::cout << " Skipping conv test" << std::endl; // LCOV_EXCL_LINE
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
BOOST_LOCALE_DISABLE_UNREACHABLE_CODE_WARNING
|
||||
|
||||
@@ -31,8 +31,8 @@ void test_by_char(const std::locale& l, const std::locale& lreal)
|
||||
|
||||
TEST(ss << 1045.45);
|
||||
double n;
|
||||
TEST(ss >> n);
|
||||
TEST_EQ(n, 1045.45);
|
||||
if TEST(ss >> n)
|
||||
TEST_EQ(n, 1045.45);
|
||||
TEST_EQ(ss.str(), ascii_to<CharType>("1045.45"));
|
||||
ss_ref_type ss_ref;
|
||||
ss_ref.imbue(std::locale::classic());
|
||||
@@ -51,8 +51,8 @@ void test_by_char(const std::locale& l, const std::locale& lreal)
|
||||
TEST(ss << as::number);
|
||||
TEST(ss << 1045.45);
|
||||
double n;
|
||||
TEST(ss >> n);
|
||||
TEST_EQ(n, 1045.45);
|
||||
if TEST(ss >> n)
|
||||
TEST_EQ(n, 1045.45);
|
||||
|
||||
ss_ref_type ss_ref;
|
||||
ss_ref.imbue(lreal);
|
||||
@@ -62,56 +62,58 @@ void test_by_char(const std::locale& l, const std::locale& lreal)
|
||||
TEST_EQ(to_utf8(ss.str()), to_utf8(ss_ref.str()));
|
||||
}
|
||||
|
||||
{
|
||||
std::cout << "- Testing as::currency national " << std::endl;
|
||||
// workaround MSVC library issues
|
||||
const bool bad_parsing = [&]() {
|
||||
ss_ref_type ss_ref;
|
||||
ss_ref.imbue(lreal);
|
||||
ss_ref << std::showbase << std::put_money(104334, false);
|
||||
std::ios_base::iostate err = std::ios_base::iostate();
|
||||
typename std::money_get<RefCharType>::iter_type end;
|
||||
long double tmp;
|
||||
std::use_facet<std::money_get<RefCharType>>(lreal).get(ss_ref, end, false, ss_ref, err, tmp);
|
||||
if(err & std::ios_base::failbit) {
|
||||
std::cout << "-- Looks like standard library does not support parsing well" << std::endl;
|
||||
return true;
|
||||
} else
|
||||
return false;
|
||||
}();
|
||||
|
||||
bool bad_parsing = false;
|
||||
{
|
||||
ss_ref_type ss_ref;
|
||||
ss_ref.imbue(lreal);
|
||||
ss_ref << std::showbase;
|
||||
std::use_facet<std::money_put<RefCharType>>(lreal).put(ss_ref, false, ss_ref, RefCharType(' '), 104334);
|
||||
{ // workaround MSVC library issues
|
||||
std::ios_base::iostate err = std::ios_base::iostate();
|
||||
typename std::money_get<RefCharType>::iter_type end;
|
||||
long double tmp;
|
||||
std::use_facet<std::money_get<RefCharType>>(lreal).get(ss_ref, end, false, ss_ref, err, tmp);
|
||||
if(err & std::ios_base::failbit) {
|
||||
std::cout << "-- Looks like standard library does not support parsing well" << std::endl;
|
||||
bad_parsing = true;
|
||||
{
|
||||
std::cout << "- Testing as::currency national " << std::endl;
|
||||
ss_type ss;
|
||||
ss.imbue(l);
|
||||
|
||||
TEST(ss << as::currency);
|
||||
TEST(ss << 1043.34);
|
||||
if(!bad_parsing) {
|
||||
double v1;
|
||||
if TEST(ss >> v1)
|
||||
TEST_EQ(v1, 1043.34);
|
||||
}
|
||||
|
||||
empty_stream(ss_ref) << std::put_money(104334, false);
|
||||
TEST_EQ(to_utf8(ss.str()), to_utf8(ss_ref.str()));
|
||||
}
|
||||
{
|
||||
std::cout << "- Testing as::currency iso" << std::endl;
|
||||
ss_type ss;
|
||||
ss.imbue(l);
|
||||
|
||||
ss_type ss;
|
||||
ss.imbue(l);
|
||||
ss << as::currency << as::currency_iso;
|
||||
TEST(ss << 1043.34);
|
||||
if(!bad_parsing) {
|
||||
double v1;
|
||||
if TEST(ss >> v1)
|
||||
TEST_EQ(v1, 1043.34);
|
||||
}
|
||||
|
||||
TEST(ss << as::currency);
|
||||
TEST(ss << 1043.34);
|
||||
if(!bad_parsing) {
|
||||
double v1;
|
||||
TEST(ss >> v1);
|
||||
TEST_EQ(v1, 1043.34);
|
||||
empty_stream(ss_ref) << std::put_money(104334, true);
|
||||
TEST_EQ(to_utf8(ss.str()), to_utf8(ss_ref.str()));
|
||||
}
|
||||
|
||||
TEST_EQ(to_utf8(ss.str()), to_utf8(ss_ref.str()));
|
||||
}
|
||||
|
||||
{
|
||||
std::cout << "- Testing as::currency iso" << std::endl;
|
||||
ss_type ss;
|
||||
ss.imbue(l);
|
||||
|
||||
ss << as::currency << as::currency_iso;
|
||||
TEST(ss << 1043.34);
|
||||
double v1;
|
||||
TEST(ss >> v1);
|
||||
TEST_EQ(v1, 1043.34);
|
||||
|
||||
ss_ref_type ss_ref;
|
||||
ss_ref.imbue(lreal);
|
||||
ss_ref << std::showbase;
|
||||
std::use_facet<std::money_put<RefCharType>>(lreal).put(ss_ref, true, ss_ref, RefCharType(' '), 104334);
|
||||
|
||||
TEST_EQ(to_utf8(ss.str()), to_utf8(ss_ref.str()));
|
||||
}
|
||||
|
||||
{
|
||||
|
||||
@@ -33,6 +33,7 @@ std::basic_string<Char> read_file(std::basic_istream<Char>& in)
|
||||
Char c;
|
||||
while(in.get(c))
|
||||
res += c;
|
||||
TEST(in.eof());
|
||||
return res;
|
||||
}
|
||||
|
||||
@@ -45,10 +46,11 @@ void test_ok(const std::string& content, const std::locale& l, std::basic_string
|
||||
|
||||
{
|
||||
const std::string file_path = boost::locale::test::exe_name + "-test_read.txt";
|
||||
TEST_CONTEXT("File: " << file_path);
|
||||
remove_file_on_exit _(file_path);
|
||||
{
|
||||
std::ofstream out_file(file_path, std::ios::binary);
|
||||
out_file << content;
|
||||
TEST(out_file << content);
|
||||
}
|
||||
stream_type in_file(file_path, stream_type::in);
|
||||
in_file.imbue(l);
|
||||
@@ -57,11 +59,12 @@ void test_ok(const std::string& content, const std::locale& l, std::basic_string
|
||||
|
||||
{
|
||||
const std::string file_path = boost::locale::test::exe_name + "-test_write.txt";
|
||||
TEST_CONTEXT("File: " << file_path);
|
||||
remove_file_on_exit _(file_path);
|
||||
{
|
||||
stream_type out_file(file_path, stream_type::out);
|
||||
out_file.imbue(l);
|
||||
out_file << cmp;
|
||||
TEST(out_file << cmp);
|
||||
}
|
||||
std::ifstream in_file(file_path);
|
||||
TEST_EQ(read_file<char>(in_file), content);
|
||||
@@ -117,20 +120,23 @@ void test_for_char()
|
||||
{
|
||||
boost::locale::generator g;
|
||||
if(test_utf) {
|
||||
auto l = g("en_US.UTF-8");
|
||||
std::cout << " UTF-8" << std::endl;
|
||||
test_ok<Char>("grüße\nn i", g("en_US.UTF-8"));
|
||||
test_read_fail<Char>("abc\xFF\xFF", g("en_US.UTF-8"), 3);
|
||||
test_ok<Char>("grüße\nn i", l);
|
||||
test_read_fail<Char>("abc\xFF\xFF", l, 3);
|
||||
std::cout << " Testing codepoints above 0xFFFF" << std::endl;
|
||||
std::cout << " Single U+2008A" << std::endl;
|
||||
test_ok<Char>("\xf0\xa0\x82\x8a", g("en_US.UTF-8")); // U+2008A
|
||||
test_ok<Char>("\xf0\xa0\x82\x8a", l); // U+2008A
|
||||
std::cout << " Single U+2008A within text" << std::endl;
|
||||
test_ok<Char>("abc\"\xf0\xa0\x82\x8a\"", g("en_US.UTF-8")); // U+2008A
|
||||
test_ok<Char>("abc\"\xf0\xa0\x82\x8a\"", l); // U+2008A
|
||||
constexpr auto repeats = 1000;
|
||||
std::string one = "\xf0\xa0\x82\x8a";
|
||||
std::string res;
|
||||
for(unsigned i = 0; i < 1000; i++)
|
||||
res.reserve(one.size() * repeats);
|
||||
for(unsigned i = 0; i < repeats; i++)
|
||||
res += one;
|
||||
std::cout << " U+2008A x 1000" << std::endl;
|
||||
test_ok<Char>(res.c_str(), g("en_US.UTF-8")); // U+2008A
|
||||
std::cout << " U+2008A x " << repeats << std::endl;
|
||||
test_ok<Char>(res, l); // U+2008A
|
||||
}
|
||||
|
||||
if(test_iso) {
|
||||
|
||||
@@ -39,8 +39,8 @@ void test_by_char(const std::locale& l, std::string name, int lcid)
|
||||
|
||||
TEST(ss << 1045.45);
|
||||
double n;
|
||||
TEST(ss >> n);
|
||||
TEST_EQ(n, 1045.45);
|
||||
if TEST(ss >> n)
|
||||
TEST_EQ(n, 1045.45);
|
||||
TEST_EQ(to_utf8(ss.str()), "1045.45");
|
||||
}
|
||||
|
||||
@@ -52,8 +52,8 @@ void test_by_char(const std::locale& l, std::string name, int lcid)
|
||||
ss << as::number;
|
||||
TEST(ss << 1045.45);
|
||||
double n;
|
||||
TEST(ss >> n);
|
||||
TEST_EQ(n, 1045.45);
|
||||
if TEST(ss >> n)
|
||||
TEST_EQ(n, 1045.45);
|
||||
|
||||
if(name == "ru_RU.UTF-8") {
|
||||
BOOST_LOCALE_START_CONST_CONDITION
|
||||
|
||||
Reference in New Issue
Block a user