2
0
mirror of https://github.com/boostorg/uuid.git synced 2026-01-19 04:42:16 +00:00

Added x86 SIMD implementation of to_chars.

Moved the generic to_chars implementation to a separate header and made
to_chars.hpp select the implementation based on the enabled SIMD ISA
extensions. Added an x86 implementation leveraging SSSE3 and later
vector extensions. Added detection of the said extensions to config.hpp.

The performance effect on Intel Golden Cove (Core i7-12700K), gcc 13.3,
in millions of to_chars() calls per second with a 16-byte aligned output buffer:

Char     | Generic | SSE4.1           | AVX2             | AVX-512
=========+=========+==================+==================+=================
char     | 203.190 | 1059.322 (5.21x) | 1053.352 (5.18x) | 1058.089 (5.21x)
char16_t | 184.003 |  848.356 (4.61x) | 1009.489 (5.49x) | 1011.122 (5.50x)
char32_t | 202.425 |  484.801 (2.39x) |  676.338 (3.34x) |  462.770 (2.29x)

The core of the SIMD implementation is using 128-bit vectors, larger vectors
are only used to convert to the target character types. This means that for
1-byte character types all vector implementations are basically the same
(barring the extra ISA flexibility added by AVX) and for 2-byte character
types AVX2 and AVX-512 are basically the same.

For 4-byte character types, AVX-512 showed worse performance than SSE4.1 and
AVX2 on the test system. It isn't clear why that is, but it is possible that
the CPU throttles 512-bit instructions so much that the performance drops
below a 256-bit equivalent. Perhaps, there are just not enough 512-bit
instructions for the CPU to power up the full 512-bit pipeline. Therefore,
the AVX-512 code path for 4-byte character types is currently disabled and
the AVX2 path is used instead (which makes AVX2 and AVX-512 versions basically
equivalent). The AVX-512 path can be enabled again if new CPU microarchitectures
appear that will benefit from it.

Higher alignment values of the output buffer were also tested, but they did not
meaningfully improve performance.
This commit is contained in:
Andrey Semashev
2025-12-16 02:34:16 +03:00
parent f797b2617f
commit 839c431152
5 changed files with 397 additions and 63 deletions

View File

@@ -5,14 +5,19 @@
#include <boost/uuid/uuid.hpp>
#include <boost/uuid/uuid_io.hpp>
#include <boost/core/lightweight_test.hpp>
#include <boost/config/pragma_message.hpp>
#include <boost/array.hpp>
#include <boost/config.hpp>
#include <string>
#if defined(BOOST_UUID_NO_CXX14_CONSTEXPR_RT)
BOOST_PRAGMA_MESSAGE( "Test is not constexpr because BOOST_UUID_NO_CXX14_CONSTEXPR_RT is defined" )
#endif
using namespace boost::uuids;
template<class Ch>
BOOST_CXX14_CONSTEXPR boost::array<Ch, 36> uuid_to_string( uuid const& u )
BOOST_UUID_CXX14_CONSTEXPR_RT boost::array<Ch, 36> uuid_to_string( uuid const& u )
{
boost::array<Ch, 36> r = {{}};
to_chars( u, r.begin(), r.end() );
@@ -25,7 +30,7 @@ int main()
BOOST_CXX14_CONSTEXPR uuid u;
{
BOOST_CXX14_CONSTEXPR auto v = uuid_to_string<char>( u );
BOOST_UUID_CXX14_CONSTEXPR_RT auto v = uuid_to_string<char>( u );
std::string const w( "00000000-0000-0000-0000-000000000000" );
@@ -33,7 +38,7 @@ int main()
}
{
BOOST_CXX14_CONSTEXPR auto v = uuid_to_string<wchar_t>( u );
BOOST_UUID_CXX14_CONSTEXPR_RT auto v = uuid_to_string<wchar_t>( u );
std::wstring const w( L"00000000-0000-0000-0000-000000000000" );
@@ -41,7 +46,7 @@ int main()
}
{
BOOST_CXX14_CONSTEXPR auto v = uuid_to_string<char16_t>( u );
BOOST_UUID_CXX14_CONSTEXPR_RT auto v = uuid_to_string<char16_t>( u );
std::u16string const w( u"00000000-0000-0000-0000-000000000000" );
@@ -49,7 +54,7 @@ int main()
}
{
BOOST_CXX14_CONSTEXPR auto v = uuid_to_string<char32_t>( u );
BOOST_UUID_CXX14_CONSTEXPR_RT auto v = uuid_to_string<char32_t>( u );
std::u32string const w( U"00000000-0000-0000-0000-000000000000" );
@@ -59,7 +64,7 @@ int main()
#if defined(__cpp_char8_t) && __cpp_char8_t >= 201811L
{
BOOST_CXX14_CONSTEXPR auto v = uuid_to_string<char8_t>( u );
BOOST_UUID_CXX14_CONSTEXPR_RT auto v = uuid_to_string<char8_t>( u );
std::u8string const w( u8"00000000-0000-0000-0000-000000000000" );
@@ -73,7 +78,7 @@ int main()
BOOST_CXX14_CONSTEXPR uuid u = {{ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f }};
{
BOOST_CXX14_CONSTEXPR auto v = uuid_to_string<char>( u );
BOOST_UUID_CXX14_CONSTEXPR_RT auto v = uuid_to_string<char>( u );
std::string const w( "00010203-0405-0607-0809-0a0b0c0d0e0f" );
@@ -81,7 +86,7 @@ int main()
}
{
BOOST_CXX14_CONSTEXPR auto v = uuid_to_string<wchar_t>( u );
BOOST_UUID_CXX14_CONSTEXPR_RT auto v = uuid_to_string<wchar_t>( u );
std::wstring const w( L"00010203-0405-0607-0809-0a0b0c0d0e0f" );
@@ -89,7 +94,7 @@ int main()
}
{
BOOST_CXX14_CONSTEXPR auto v = uuid_to_string<char16_t>( u );
BOOST_UUID_CXX14_CONSTEXPR_RT auto v = uuid_to_string<char16_t>( u );
std::u16string const w( u"00010203-0405-0607-0809-0a0b0c0d0e0f" );
@@ -97,7 +102,7 @@ int main()
}
{
BOOST_CXX14_CONSTEXPR auto v = uuid_to_string<char32_t>( u );
BOOST_UUID_CXX14_CONSTEXPR_RT auto v = uuid_to_string<char32_t>( u );
std::u32string const w( U"00010203-0405-0607-0809-0a0b0c0d0e0f" );
@@ -107,7 +112,7 @@ int main()
#if defined(__cpp_char8_t) && __cpp_char8_t >= 201811L
{
BOOST_CXX14_CONSTEXPR auto v = uuid_to_string<char8_t>( u );
BOOST_UUID_CXX14_CONSTEXPR_RT auto v = uuid_to_string<char8_t>( u );
std::u8string const w( u8"00010203-0405-0607-0809-0a0b0c0d0e0f" );
@@ -121,7 +126,7 @@ int main()
BOOST_CXX14_CONSTEXPR uuid u = {{ 0x12, 0x34, 0x56, 0x78, 0x90, 0xab, 0xcd, 0xef, 0x12, 0x34, 0x56, 0x78, 0x90, 0xab, 0xcd, 0xef }};
{
BOOST_CXX14_CONSTEXPR auto v = uuid_to_string<char>( u );
BOOST_UUID_CXX14_CONSTEXPR_RT auto v = uuid_to_string<char>( u );
std::string const w( "12345678-90ab-cdef-1234-567890abcdef" );
@@ -129,7 +134,7 @@ int main()
}
{
BOOST_CXX14_CONSTEXPR auto v = uuid_to_string<wchar_t>( u );
BOOST_UUID_CXX14_CONSTEXPR_RT auto v = uuid_to_string<wchar_t>( u );
std::wstring const w( L"12345678-90ab-cdef-1234-567890abcdef" );
@@ -137,7 +142,7 @@ int main()
}
{
BOOST_CXX14_CONSTEXPR auto v = uuid_to_string<char16_t>( u );
BOOST_UUID_CXX14_CONSTEXPR_RT auto v = uuid_to_string<char16_t>( u );
std::u16string const w( u"12345678-90ab-cdef-1234-567890abcdef" );
@@ -145,7 +150,7 @@ int main()
}
{
BOOST_CXX14_CONSTEXPR auto v = uuid_to_string<char32_t>( u );
BOOST_UUID_CXX14_CONSTEXPR_RT auto v = uuid_to_string<char32_t>( u );
std::u32string const w( U"12345678-90ab-cdef-1234-567890abcdef" );
@@ -155,7 +160,7 @@ int main()
#if defined(__cpp_char8_t) && __cpp_char8_t >= 201811L
{
BOOST_CXX14_CONSTEXPR auto v = uuid_to_string<char8_t>( u );
BOOST_UUID_CXX14_CONSTEXPR_RT auto v = uuid_to_string<char8_t>( u );
std::u8string const w( u8"12345678-90ab-cdef-1234-567890abcdef" );