// // Copyright (c) 2009-2011 Artyom Beilis (Tonkikh) // // Distributed under the Boost Software License, Version 1.0. // https://www.boost.org/LICENSE_1_0.txt #include #include #include "boostLocale/test/tools.hpp" #include "boostLocale/test/unit_test.hpp" #include #include using namespace boost::locale::utf; const std::uint32_t* u32_seq(std::uint32_t a) { static std::uint32_t buf[2]; buf[0] = a; buf[1] = 0; return buf; } const std::uint16_t* u16_seq(std::uint16_t a) { static std::uint16_t buf[2]; buf[0] = a; buf[1] = 0; return buf; } const std::uint16_t* u16_seq(std::uint16_t a, std::uint16_t b) { static std::uint16_t buf[3]; buf[0] = a; buf[1] = b; buf[2] = 0; return buf; } const char16_t* c16_seq(std::uint16_t a) { static char16_t buf[2]; buf[0] = static_cast(a); buf[1] = 0; return buf; } const char32_t* c32_seq(std::uint32_t a) { static char32_t buf[2]; buf[0] = static_cast(a); buf[1] = 0; return buf; } template void test_from_utf(const CharType* const s, unsigned codepoint) { const CharType* cur = s; const CharType* const end = boost::locale::util::str_end(s); typedef utf_traits tr; static_assert(tr::max_width == 4 / sizeof(CharType), "Wrong max_width"); TEST_EQ(tr::decode(cur, end), codepoint); if(codepoint != illegal) TEST(cur == end); if(codepoint == incomplete) { TEST(*s == 0 || tr::trail_length(*s) > 0); TEST_GE(tr::trail_length(*s), end - s); } if(codepoint != incomplete && codepoint != illegal) { TEST(tr::is_lead(*s)); TEST(!tr::is_trail(*s)); cur = s; while(++cur != end) { TEST(tr::is_trail(*cur)); TEST(!tr::is_lead(*cur)); } TEST_EQ(tr::width(codepoint), end - s); TEST_EQ(tr::trail_length(*s), tr::width(codepoint) - 1); cur = s; TEST_EQ(tr::decode_valid(cur), codepoint); TEST(cur == end); } } template void test_to_utf(const CharType* str, unsigned codepoint) { CharType buf[5] = {1, 1, 1, 1, 1}; CharType* p = buf; p = utf_traits::encode(codepoint, p); const CharType* const end = boost::locale::util::str_end(str); TEST_EQ(end - str, p - buf); TEST(*p); *p = 0; TEST_EQ(memcmp(str, buf, sizeof(CharType) * (end - str)), 0); } template void test_valid_utf(const CharType* str, unsigned codepoint) { test_from_utf(str, codepoint); test_to_utf(str, codepoint); } void test_utf8() { std::cout << "- Test UTF-8\n"; std::cout << "-- Correct" << std::endl; test_valid_utf("\x7f", 0x7f); test_valid_utf("\xc2\x80", 0x80); test_valid_utf("\xdf\xbf", 0x7ff); test_valid_utf("\xe0\xa0\x80", 0x800); test_valid_utf("\xef\xbf\xbf", 0xffff); test_valid_utf("\xf0\x90\x80\x80", 0x10000); test_valid_utf("\xf4\x8f\xbf\xbf", 0x10ffff); // test that this actually works test_from_utf(make2(0x80), 0x80); test_from_utf(make2(0x7ff), 0x7ff); test_from_utf(make3(0x800), 0x800); test_from_utf(make3(0xffff), 0xffff); test_from_utf(make4(0x10000), 0x10000); test_from_utf(make4(0x10ffff), 0x10ffff); std::cout << "-- Too big" << std::endl; test_from_utf("\xf4\x9f\x80\x80", illegal); // 11 0000 test_from_utf("\xfb\xbf\xbf\xbf", illegal); // 3ff ffff test_from_utf("\xf8\x90\x80\x80\x80", illegal); // 400 0000 test_from_utf("\xfd\xbf\xbf\xbf\xbf\xbf", illegal); // 7fff ffff std::cout << "-- Invalid length" << std::endl; test_from_utf(make2(0), illegal); test_from_utf(make3(0), illegal); test_from_utf(make4(0), illegal); test_from_utf(make2(0x7f), illegal); test_from_utf(make3(0x7f), illegal); test_from_utf(make4(0x7f), illegal); test_from_utf(make3(0x80), illegal); test_from_utf(make4(0x80), illegal); test_from_utf(make3(0x7ff), illegal); test_from_utf(make4(0x7ff), illegal); test_from_utf(make4(0x8000), illegal); test_from_utf(make4(0xffff), illegal); test_from_utf(make4(0x110000), illegal); test_from_utf(make4(0x1fffff), illegal); std::cout << "-- Invalid surrogate" << std::endl; test_from_utf(make3(0xd800), illegal); test_from_utf(make3(0xdbff), illegal); test_from_utf(make3(0xdc00), illegal); test_from_utf(make3(0xdfff), illegal); test_from_utf(make4(0xd800), illegal); test_from_utf(make4(0xdbff), illegal); test_from_utf(make4(0xdc00), illegal); test_from_utf(make4(0xdfff), illegal); std::cout << "-- Incomplete" << std::endl; test_from_utf("", incomplete); test_from_utf("\x80", illegal); test_from_utf("\xc2", incomplete); test_from_utf("\xdf", incomplete); test_from_utf("\xe0", incomplete); test_from_utf("\xe0\xa0", incomplete); test_from_utf("\xef\xbf", incomplete); test_from_utf("\xef", incomplete); test_from_utf("\xf0\x90\x80", incomplete); test_from_utf("\xf0\x90", incomplete); test_from_utf("\xf0", incomplete); test_from_utf("\xf4\x8f\xbf", incomplete); test_from_utf("\xf4\x8f", incomplete); test_from_utf("\xf4", incomplete); } void test_utf16() { std::cout << "- Test UTF-16\n"; std::cout << "-- Correct" << std::endl; test_valid_utf(u16_seq(0x10), 0x10); test_valid_utf(u16_seq(0xffff), 0xffff); test_valid_utf(u16_seq(0xD800, 0xDC00), 0x10000); test_valid_utf(u16_seq(0xDBFF, 0xDFFF), 0x10FFFF); std::cout << "-- Invalid surrogate" << std::endl; test_from_utf(u16_seq(0xDFFF), illegal); test_from_utf(u16_seq(0xDC00), illegal); std::cout << "-- Incomplete" << std::endl; test_from_utf(u16_seq(0), incomplete); test_from_utf(u16_seq(0xD800), incomplete); test_from_utf(u16_seq(0xDBFF), incomplete); std::cout << "-- Test char16_t" << std::endl; #if BOOST_WORKAROUND(BOOST_GCC_VERSION, < 50000) test_valid_utf(u"\x0010", 0x10); test_valid_utf(u"\xffff", 0xffff); #else test_valid_utf(u"\u0010", 0x10); test_valid_utf(u"\uffff", 0xffff); #endif test_valid_utf(u"\U00010000", 0x10000); test_valid_utf(u"\U0010FFFF", 0x10FFFF); test_from_utf(c16_seq(0xDFFF), illegal); test_from_utf(c16_seq(0xDC00), illegal); } void test_utf32() { std::cout << "- Test UTF-32\n"; std::cout << "-- Correct" << std::endl; test_valid_utf(u32_seq(0x10), 0x10); test_valid_utf(u32_seq(0xffff), 0xffff); test_valid_utf(u32_seq(0x10000), 0x10000); test_valid_utf(u32_seq(0x10ffff), 0x10ffff); std::cout << "-- Invalid surrogate" << std::endl; test_from_utf(u32_seq(0xD800), illegal); test_from_utf(u32_seq(0xDBFF), illegal); test_from_utf(u32_seq(0xDFFF), illegal); test_from_utf(u32_seq(0xDC00), illegal); test_from_utf(u32_seq(0x110000), illegal); std::cout << "-- Incomplete" << std::endl; test_from_utf(u32_seq(0), incomplete); std::cout << "-- Test char32_t" << std::endl; #if BOOST_WORKAROUND(BOOST_GCC_VERSION, < 50000) test_valid_utf(U"\x0010", 0x10); #else test_valid_utf(U"\U00000010", 0x10); #endif test_valid_utf(U"\U0000ffff", 0xffff); test_valid_utf(U"\U00010000", 0x10000); test_valid_utf(U"\U0010ffff", 0x10ffff); test_from_utf(c32_seq(0xD800), illegal); test_from_utf(c32_seq(0xDBFF), illegal); test_from_utf(c32_seq(0xDFFF), illegal); test_from_utf(c32_seq(0xDC00), illegal); test_from_utf(c32_seq(0x110000), illegal); } void test_main(int /*argc*/, char** /*argv*/) { test_utf8(); test_utf16(); test_utf32(); } // boostinspect:noascii