2
0
mirror of https://github.com/boostorg/uuid.git synced 2026-01-19 04:42:16 +00:00

Added x86 SIMD implementation of to_chars.

Moved the generic to_chars implementation to a separate header and made
to_chars.hpp select the implementation based on the enabled SIMD ISA
extensions. Added an x86 implementation leveraging SSSE3 and later
vector extensions. Added detection of the said extensions to config.hpp.

The performance effect on Intel Golden Cove (Core i7-12700K), gcc 13.3,
in millions of to_chars() calls per second with a 16-byte aligned output buffer:

Char     | Generic | SSE4.1           | AVX2             | AVX-512
=========+=========+==================+==================+=================
char     | 203.190 | 1059.322 (5.21x) | 1053.352 (5.18x) | 1058.089 (5.21x)
char16_t | 184.003 |  848.356 (4.61x) | 1009.489 (5.49x) | 1011.122 (5.50x)
char32_t | 202.425 |  484.801 (2.39x) |  676.338 (3.34x) |  462.770 (2.29x)

The core of the SIMD implementation is using 128-bit vectors, larger vectors
are only used to convert to the target character types. This means that for
1-byte character types all vector implementations are basically the same
(barring the extra ISA flexibility added by AVX) and for 2-byte character
types AVX2 and AVX-512 are basically the same.

For 4-byte character types, AVX-512 showed worse performance than SSE4.1 and
AVX2 on the test system. It isn't clear why that is, but it is possible that
the CPU throttles 512-bit instructions so much that the performance drops
below a 256-bit equivalent. Perhaps, there are just not enough 512-bit
instructions for the CPU to power up the full 512-bit pipeline. Therefore,
the AVX-512 code path for 4-byte character types is currently disabled and
the AVX2 path is used instead (which makes AVX2 and AVX-512 versions basically
equivalent). The AVX-512 path can be enabled again if new CPU microarchitectures
appear that will benefit from it.

Higher alignment values of the output buffer were also tested, but they did not
meaningfully improve performance.
This commit is contained in:
Andrey Semashev
2025-12-16 02:34:16 +03:00
parent f797b2617f
commit 839c431152
5 changed files with 397 additions and 63 deletions

View File

@@ -32,6 +32,10 @@
#define BOOST_UUID_USE_SSE3
#endif
#if defined(__SSSE3__) && !defined(BOOST_UUID_USE_SSSE3)
#define BOOST_UUID_USE_SSSE3
#endif
#if defined(__SSE4_1__) && !defined(BOOST_UUID_USE_SSE41)
#define BOOST_UUID_USE_SSE41
#endif
@@ -40,6 +44,10 @@
#define BOOST_UUID_USE_AVX
#endif
#if defined(__AVX2__) && !defined(BOOST_UUID_USE_AVX2)
#define BOOST_UUID_USE_AVX2
#endif
#if ((defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512BW__)) || defined(__AVX10_1__)) && !defined(BOOST_UUID_USE_AVX10_1)
#define BOOST_UUID_USE_AVX10_1
#endif
@@ -54,6 +62,10 @@
#define BOOST_UUID_USE_AVX
#endif
#if defined(__AVX2__) && !defined(BOOST_UUID_USE_AVX2)
#define BOOST_UUID_USE_AVX2
#endif
#if ((defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512BW__)) || defined(__AVX10_1__)) && !defined(BOOST_UUID_USE_AVX10_1)
#define BOOST_UUID_USE_AVX10_1
#endif
@@ -61,7 +73,11 @@
#endif
// More advanced ISA extensions imply less advanced are also available
#if !defined(BOOST_UUID_USE_AVX) && defined(BOOST_UUID_USE_AVX10_1)
#if !defined(BOOST_UUID_USE_AVX2) && defined(BOOST_UUID_USE_AVX10_1)
#define BOOST_UUID_USE_AVX2
#endif
#if !defined(BOOST_UUID_USE_AVX) && defined(BOOST_UUID_USE_AVX2)
#define BOOST_UUID_USE_AVX
#endif
@@ -69,7 +85,11 @@
#define BOOST_UUID_USE_SSE41
#endif
#if !defined(BOOST_UUID_USE_SSE3) && defined(BOOST_UUID_USE_SSE41)
#if !defined(BOOST_UUID_USE_SSSE3) && defined(BOOST_UUID_USE_SSE41)
#define BOOST_UUID_USE_SSSE3
#endif
#if !defined(BOOST_UUID_USE_SSE3) && defined(BOOST_UUID_USE_SSSE3)
#define BOOST_UUID_USE_SSE3
#endif
@@ -79,8 +99,10 @@
#if !defined(BOOST_UUID_NO_SIMD) && \
!defined(BOOST_UUID_USE_AVX10_1) && \
!defined(BOOST_UUID_USE_AVX2) && \
!defined(BOOST_UUID_USE_AVX) && \
!defined(BOOST_UUID_USE_SSE41) && \
!defined(BOOST_UUID_USE_SSSE3) && \
!defined(BOOST_UUID_USE_SSE3) && \
!defined(BOOST_UUID_USE_SSE2)
#define BOOST_UUID_NO_SIMD

View File

@@ -7,59 +7,31 @@
// https://www.boost.org/LICENSE_1_0.txt
#include <boost/uuid/uuid.hpp>
#include <boost/config.hpp>
#include <boost/uuid/detail/config.hpp>
#include <boost/uuid/detail/is_constant_evaluated.hpp>
#include <boost/uuid/detail/to_chars_generic.hpp>
#if defined(BOOST_UUID_USE_SSSE3)
#include <boost/uuid/detail/to_chars_x86.hpp>
#endif
namespace boost {
namespace uuids {
namespace detail {
constexpr char const* to_chars_digits( char const* ) noexcept
template<class Ch> BOOST_UUID_CXX14_CONSTEXPR_RT inline Ch* to_chars( uuid const& u, Ch* out ) noexcept
{
return "0123456789abcdef-";
}
constexpr wchar_t const* to_chars_digits( wchar_t const* ) noexcept
{
return L"0123456789abcdef-";
}
constexpr char16_t const* to_chars_digits( char16_t const* ) noexcept
{
return u"0123456789abcdef-";
}
constexpr char32_t const* to_chars_digits( char32_t const* ) noexcept
{
return U"0123456789abcdef-";
}
#if defined(__cpp_char8_t) && __cpp_char8_t >= 201811L
constexpr char8_t const* to_chars_digits( char8_t const* ) noexcept
{
return u8"0123456789abcdef-";
}
#endif
template<class Ch> BOOST_CXX14_CONSTEXPR inline Ch* to_chars( uuid const& u, Ch* out ) noexcept
{
constexpr Ch const* digits = to_chars_digits( static_cast<Ch const*>( nullptr ) );
for( std::size_t i = 0; i < 16; ++i )
#if defined(BOOST_UUID_USE_SSSE3)
if( detail::is_constant_evaluated_rt() )
{
std::uint8_t ch = u.data()[ i ];
*out++ = digits[ (ch >> 4) & 0x0F ];
*out++ = digits[ ch & 0x0F ];
if( i == 3 || i == 5 || i == 7 || i == 9 )
{
*out++ = digits[ 16 ];
}
return detail::to_chars_generic( u, out );
}
return out;
else
{
return detail::to_chars_simd( u, out );
}
#else
return detail::to_chars_generic( u, out );
#endif
}
} // namespace detail

View File

@@ -0,0 +1,74 @@
#ifndef BOOST_UUID_DETAIL_TO_CHARS_GENERIC_HPP_INCLUDED
#define BOOST_UUID_DETAIL_TO_CHARS_GENERIC_HPP_INCLUDED
// Copyright 2009 Andy Tompkins
// Copyright 2024 Peter Dimov
// Distributed under the Boost Software License, Version 1.0.
// https://www.boost.org/LICENSE_1_0.txt
#include <boost/uuid/uuid.hpp>
#include <boost/config.hpp>
#if defined(BOOST_UUID_REPORT_IMPLEMENTATION)
#include <boost/config/pragma_message.hpp>
BOOST_PRAGMA_MESSAGE( "Using to_chars_generic.hpp" )
#endif
namespace boost {
namespace uuids {
namespace detail {
constexpr char const* to_chars_digits( char const* ) noexcept
{
return "0123456789abcdef-";
}
constexpr wchar_t const* to_chars_digits( wchar_t const* ) noexcept
{
return L"0123456789abcdef-";
}
constexpr char16_t const* to_chars_digits( char16_t const* ) noexcept
{
return u"0123456789abcdef-";
}
constexpr char32_t const* to_chars_digits( char32_t const* ) noexcept
{
return U"0123456789abcdef-";
}
#if defined(__cpp_char8_t) && __cpp_char8_t >= 201811L
constexpr char8_t const* to_chars_digits( char8_t const* ) noexcept
{
return u8"0123456789abcdef-";
}
#endif
template<class Ch> BOOST_CXX14_CONSTEXPR inline Ch* to_chars_generic( uuid const& u, Ch* out ) noexcept
{
constexpr Ch const* digits = to_chars_digits( static_cast<Ch const*>( nullptr ) );
for( std::size_t i = 0; i < 16; ++i )
{
std::uint8_t ch = u.data()[ i ];
*out++ = digits[ (ch >> 4) & 0x0F ];
*out++ = digits[ ch & 0x0F ];
if( i == 3 || i == 5 || i == 7 || i == 9 )
{
*out++ = digits[ 16 ];
}
}
return out;
}
}}} //namespace boost::uuids::detail
#endif // BOOST_UUID_DETAIL_TO_CHARS_GENERIC_HPP_INCLUDED

View File

@@ -0,0 +1,261 @@
#ifndef BOOST_UUID_DETAIL_TO_CHARS_X86_HPP_INCLUDED
#define BOOST_UUID_DETAIL_TO_CHARS_X86_HPP_INCLUDED
// Copyright 2025 Andrey Semashev
// Distributed under the Boost Software License, Version 1.0.
// https://www.boost.org/LICENSE_1_0.txt
#include <boost/uuid/detail/config.hpp>
#if defined(BOOST_UUID_USE_SSSE3)
#include <cstdint>
#include <boost/uuid/uuid.hpp>
#if defined(BOOST_UUID_REPORT_IMPLEMENTATION)
#include <boost/config/pragma_message.hpp>
#if defined(BOOST_UUID_USE_AVX10_1)
BOOST_PRAGMA_MESSAGE( "Using to_chars_x86.hpp, AVX10.1" )
#elif defined(BOOST_UUID_USE_AVX2)
BOOST_PRAGMA_MESSAGE( "Using to_chars_x86.hpp, AVX2" )
#elif defined(BOOST_UUID_USE_SSE41)
BOOST_PRAGMA_MESSAGE( "Using to_chars_x86.hpp, SSE4.1" )
#else
BOOST_PRAGMA_MESSAGE( "Using to_chars_x86.hpp, SSSE3" )
#endif
#endif // #if defined(BOOST_UUID_REPORT_IMPLEMENTATION)
#if defined(BOOST_UUID_USE_AVX2)
#include <immintrin.h>
#elif defined(BOOST_UUID_USE_SSE41)
#include <smmintrin.h>
#else
#include <tmmintrin.h>
#endif
namespace boost {
namespace uuids {
namespace detail {
template<
typename Char,
bool IsCharASCIICompatible = ('0' == 0x30 && '9' == 0x39 && 'a' == 0x61 && 'f' == 0x66 && '-' == 0x2D),
bool IsWCharASCIICompatible = (L'0' == 0x30 && L'9' == 0x39 && L'a' == 0x61 && L'f' == 0x66 && L'-' == 0x2D)
>
struct to_chars_simd_char_constants
{
alignas(16) static const std::uint8_t mm_char_table[16];
alignas(16) static const std::uint8_t mm_char_dash[16];
};
template< typename Char, bool IsCharASCIICompatible, bool IsWCharASCIICompatible >
alignas(16) const std::uint8_t to_chars_simd_char_constants< Char, IsCharASCIICompatible, IsWCharASCIICompatible >::mm_char_table[16] =
{ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66 }; // 0123456789abcdef in ASCII
template< typename Char, bool IsCharASCIICompatible, bool IsWCharASCIICompatible >
alignas(16) const std::uint8_t to_chars_simd_char_constants< Char, IsCharASCIICompatible, IsWCharASCIICompatible >::mm_char_dash[16] =
{ 0x2D, 0x2D, 0x2D, 0x2D, 0x2D, 0x2D, 0x2D, 0x2D, 0x2D, 0x2D, 0x2D, 0x2D, 0x2D, 0x2D, 0x2D, 0x2D }; // ---------------- in ASCII
template< bool IsWCharASCIICompatible >
struct to_chars_simd_char_constants< char, false, IsWCharASCIICompatible >
{
// This requirement is necessary for the _mm_max_epu8 trick in to_chars_simd_core below to work
static_assert(static_cast< std::uint8_t >('-') < static_cast< std::uint8_t >('0') && static_cast< std::uint8_t >('-') < static_cast< std::uint8_t >('a'),
"Boost.UUID: Unsupported char encoding, '-' character code is expected to be less than any hexadecimal characters");
alignas(16) static const std::uint8_t mm_char_table[16];
alignas(16) static const std::uint8_t mm_char_dash[16];
};
template< bool IsWCharASCIICompatible >
alignas(16) const std::uint8_t to_chars_simd_char_constants< char, false, IsWCharASCIICompatible >::mm_char_table[16] =
{
static_cast< std::uint8_t >('0'), static_cast< std::uint8_t >('1'), static_cast< std::uint8_t >('2'), static_cast< std::uint8_t >('3'),
static_cast< std::uint8_t >('4'), static_cast< std::uint8_t >('5'), static_cast< std::uint8_t >('6'), static_cast< std::uint8_t >('7'),
static_cast< std::uint8_t >('8'), static_cast< std::uint8_t >('9'), static_cast< std::uint8_t >('a'), static_cast< std::uint8_t >('b'),
static_cast< std::uint8_t >('c'), static_cast< std::uint8_t >('d'), static_cast< std::uint8_t >('e'), static_cast< std::uint8_t >('f')
};
template< bool IsWCharASCIICompatible >
alignas(16) const std::uint8_t to_chars_simd_char_constants< char, false, IsWCharASCIICompatible >::mm_char_dash[16] =
{
static_cast< std::uint8_t >('-'), static_cast< std::uint8_t >('-'), static_cast< std::uint8_t >('-'), static_cast< std::uint8_t >('-'),
static_cast< std::uint8_t >('-'), static_cast< std::uint8_t >('-'), static_cast< std::uint8_t >('-'), static_cast< std::uint8_t >('-'),
static_cast< std::uint8_t >('-'), static_cast< std::uint8_t >('-'), static_cast< std::uint8_t >('-'), static_cast< std::uint8_t >('-'),
static_cast< std::uint8_t >('-'), static_cast< std::uint8_t >('-'), static_cast< std::uint8_t >('-'), static_cast< std::uint8_t >('-')
};
template< bool IsCharASCIICompatible >
struct to_chars_simd_char_constants< wchar_t, IsCharASCIICompatible, false >
{
static_assert(static_cast< wchar_t >(static_cast< std::uint8_t >(L'0')) == L'0' && static_cast< wchar_t >(static_cast< std::uint8_t >(L'9')) == L'9' &&
static_cast< wchar_t >(static_cast< std::uint8_t >(L'a')) == L'a' && static_cast< wchar_t >(static_cast< std::uint8_t >(L'f')) == L'f' &&
static_cast< wchar_t >(static_cast< std::uint8_t >(L'-')) == L'-',
"Boost.UUID: Unsupported wchar_t encoding, hexadecimal and dash character codes are expected to be representable by a single byte");
// This requirement is necessary for the _mm_max_epu8 trick in to_chars_simd_core below to work
static_assert(static_cast< std::uint8_t >(L'-') < static_cast< std::uint8_t >(L'0') && static_cast< std::uint8_t >(L'-') < static_cast< std::uint8_t >(L'a'),
"Boost.UUID: Unsupported wchar_t encoding, L'-' character code is expected to be less than any hexadecimal characters");
alignas(16) static const std::uint8_t mm_char_table[16];
alignas(16) static const std::uint8_t mm_char_dash[16];
};
template< bool IsCharASCIICompatible >
alignas(16) const std::uint8_t to_chars_simd_char_constants< wchar_t, IsCharASCIICompatible, false >::mm_char_table[16] =
{
static_cast< std::uint8_t >(L'0'), static_cast< std::uint8_t >(L'1'), static_cast< std::uint8_t >(L'2'), static_cast< std::uint8_t >(L'3'),
static_cast< std::uint8_t >(L'4'), static_cast< std::uint8_t >(L'5'), static_cast< std::uint8_t >(L'6'), static_cast< std::uint8_t >(L'7'),
static_cast< std::uint8_t >(L'8'), static_cast< std::uint8_t >(L'9'), static_cast< std::uint8_t >(L'a'), static_cast< std::uint8_t >(L'b'),
static_cast< std::uint8_t >(L'c'), static_cast< std::uint8_t >(L'd'), static_cast< std::uint8_t >(L'e'), static_cast< std::uint8_t >(L'f')
};
template< bool IsCharASCIICompatible >
alignas(16) const std::uint8_t to_chars_simd_char_constants< wchar_t, IsCharASCIICompatible, false >::mm_char_dash[16] =
{
static_cast< std::uint8_t >(L'-'), static_cast< std::uint8_t >(L'-'), static_cast< std::uint8_t >(L'-'), static_cast< std::uint8_t >(L'-'),
static_cast< std::uint8_t >(L'-'), static_cast< std::uint8_t >(L'-'), static_cast< std::uint8_t >(L'-'), static_cast< std::uint8_t >(L'-'),
static_cast< std::uint8_t >(L'-'), static_cast< std::uint8_t >(L'-'), static_cast< std::uint8_t >(L'-'), static_cast< std::uint8_t >(L'-'),
static_cast< std::uint8_t >(L'-'), static_cast< std::uint8_t >(L'-'), static_cast< std::uint8_t >(L'-'), static_cast< std::uint8_t >(L'-')
};
template< typename >
struct to_chars_simd_constants
{
alignas(16) static const std::uint8_t mm_15[16];
alignas(16) static const std::uint8_t mm_shuffle_pattern1[16];
alignas(16) static const std::uint8_t mm_shuffle_pattern2[16];
};
template< typename T >
alignas(16) const std::uint8_t to_chars_simd_constants< T >::mm_15[16] =
{ 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F };
template< typename T >
alignas(16) const std::uint8_t to_chars_simd_constants< T >::mm_shuffle_pattern1[16] =
{ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x80, 0x08, 0x09, 0x0A, 0x0B, 0x80, 0x0C, 0x0D };
template< typename T >
alignas(16) const std::uint8_t to_chars_simd_constants< T >::mm_shuffle_pattern2[16] =
{ 0x00, 0x01, 0x80, 0x02, 0x03, 0x04, 0x05, 0x80, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D };
//! Converts UUID to a string of 36 characters, where first 32 craracters are returned in mm_chars1 and mm_chars2 and the last 4 in the highest 32 bits of mm_chars3
BOOST_FORCEINLINE void to_chars_simd_core
(
const std::uint8_t* data,
__m128i const& mm_char_table, __m128i const& mm_char_dash,
__m128i& mm_chars1, __m128i& mm_chars2, __m128i& mm_chars3
) noexcept
{
__m128i const& mm_15 = *reinterpret_cast< const __m128i* >(uuids::detail::to_chars_simd_constants< void >::mm_15);
__m128i const& mm_shuffle_pattern1 = *reinterpret_cast< const __m128i* >(uuids::detail::to_chars_simd_constants< void >::mm_shuffle_pattern1);
__m128i const& mm_shuffle_pattern2 = *reinterpret_cast< const __m128i* >(uuids::detail::to_chars_simd_constants< void >::mm_shuffle_pattern2);
__m128i mm_input = uuids::detail::load_unaligned_si128(data);
// Split half-bytes
__m128i mm_input_hi = _mm_and_si128(_mm_srli_epi32(mm_input, 4), mm_15);
__m128i mm_input_lo = _mm_and_si128(mm_input, mm_15);
// Stringize each of the halves
mm_input_hi = _mm_shuffle_epi8(mm_char_table, mm_input_hi);
mm_input_lo = _mm_shuffle_epi8(mm_char_table, mm_input_lo);
// Join them back together
__m128i mm_1 = _mm_unpacklo_epi8(mm_input_hi, mm_input_lo);
__m128i mm_2 = _mm_unpackhi_epi8(mm_input_hi, mm_input_lo);
// Insert dashes at positions 8, 13, 18 and 23
// mm_1 mm_2
// |0123456789abcdef|0123456789abcdef|
// |01234567-89ab-cd|ef-0123-456789ab|
//
// Note that the last "cdef" characters are already available at the end of mm_2
mm_chars1 = _mm_shuffle_epi8(mm_1, mm_shuffle_pattern1);
mm_chars2 = _mm_shuffle_epi8(_mm_alignr_epi8(mm_2, mm_1, 14), mm_shuffle_pattern2);
mm_chars1 = _mm_max_epu8(mm_chars1, mm_char_dash);
mm_chars2 = _mm_max_epu8(mm_chars2, mm_char_dash);
mm_chars3 = mm_2;
}
template< typename Char >
BOOST_FORCEINLINE Char* to_chars_simd(uuid const& u, Char* out) noexcept
{
__m128i mm_chars1, mm_chars2, mm_chars3;
uuids::detail::to_chars_simd_core
(
u.data,
*reinterpret_cast< const __m128i* >(uuids::detail::to_chars_simd_char_constants< Char >::mm_char_table),
*reinterpret_cast< const __m128i* >(uuids::detail::to_chars_simd_char_constants< Char >::mm_char_dash),
mm_chars1, mm_chars2, mm_chars3
);
static_assert(sizeof(Char) == 1u || sizeof(Char) == 2u || sizeof(Char) == 4u, "Boost.UUID: Unsupported output character type for to_chars");
BOOST_IF_CONSTEXPR (sizeof(Char) == 1u)
{
_mm_storeu_si128(reinterpret_cast< __m128i* >(out), mm_chars1);
_mm_storeu_si128(reinterpret_cast< __m128i* >(out + 16), mm_chars2);
#if defined(BOOST_UUID_USE_SSE41)
*reinterpret_cast< BOOST_MAY_ALIAS std::uint32_t* >(out + 32) = _mm_extract_epi32(mm_chars3, 3);
#else
*reinterpret_cast< BOOST_MAY_ALIAS std::uint32_t* >(out + 32) = _mm_cvtsi128_si32(_mm_srli_si128(mm_chars3, 12));
#endif
}
else BOOST_IF_CONSTEXPR (sizeof(Char) == 2u)
{
const __m128i mm_0 = _mm_setzero_si128();
#if defined(BOOST_UUID_USE_AVX2)
_mm256_storeu_si256(reinterpret_cast< __m256i* >(out), _mm256_cvtepu8_epi16(mm_chars1));
_mm256_storeu_si256(reinterpret_cast< __m256i* >(out + 16), _mm256_cvtepu8_epi16(mm_chars2));
#else
_mm_storeu_si128(reinterpret_cast< __m128i* >(out), _mm_unpacklo_epi8(mm_chars1, mm_0));
_mm_storeu_si128(reinterpret_cast< __m128i* >(out + 8), _mm_unpackhi_epi8(mm_chars1, mm_0));
_mm_storeu_si128(reinterpret_cast< __m128i* >(out + 16), _mm_unpacklo_epi8(mm_chars2, mm_0));
_mm_storeu_si128(reinterpret_cast< __m128i* >(out + 24), _mm_unpackhi_epi8(mm_chars2, mm_0));
#endif
#if defined(BOOST_UUID_USE_SSE41) && (defined(__x86_64__) || defined(_M_X64))
*reinterpret_cast< BOOST_MAY_ALIAS std::uint64_t* >(out + 32) = _mm_extract_epi64(_mm_unpackhi_epi8(mm_chars3, mm_0), 1);
#else
_mm_storeh_pd(reinterpret_cast< BOOST_MAY_ALIAS double* >(out + 32), _mm_castsi128_pd(_mm_unpackhi_epi8(mm_chars3, mm_0)));
#endif
}
else
{
const __m128i mm_0 = _mm_setzero_si128();
#if 0 && defined(BOOST_UUID_USE_AVX10_1)
// Slower than the AVX2 version below on Intel Golden Cove. Perhaps, it will become beneficial on newer microarchitectures.
_mm512_storeu_epi32(out, _mm512_cvtepu8_epi32(mm_chars1));
_mm512_storeu_epi32(out + 16, _mm512_cvtepu8_epi32(mm_chars2));
#elif defined(BOOST_UUID_USE_AVX2)
_mm256_storeu_si256(reinterpret_cast< __m256i* >(out), _mm256_cvtepu8_epi32(mm_chars1));
_mm256_storeu_si256(reinterpret_cast< __m256i* >(out + 8), _mm256_cvtepu8_epi32(_mm_unpackhi_epi64(mm_chars1, mm_chars1)));
_mm256_storeu_si256(reinterpret_cast< __m256i* >(out + 16), _mm256_cvtepu8_epi32(mm_chars2));
_mm256_storeu_si256(reinterpret_cast< __m256i* >(out + 24), _mm256_cvtepu8_epi32(_mm_unpackhi_epi64(mm_chars2, mm_chars2)));
#else
__m128i mm = _mm_unpacklo_epi8(mm_chars1, mm_0);
_mm_storeu_si128(reinterpret_cast< __m128i* >(out), _mm_unpacklo_epi16(mm, mm_0));
_mm_storeu_si128(reinterpret_cast< __m128i* >(out + 4), _mm_unpackhi_epi16(mm, mm_0));
mm = _mm_unpackhi_epi8(mm_chars1, mm_0);
_mm_storeu_si128(reinterpret_cast< __m128i* >(out + 8), _mm_unpacklo_epi16(mm, mm_0));
_mm_storeu_si128(reinterpret_cast< __m128i* >(out + 12), _mm_unpackhi_epi16(mm, mm_0));
mm = _mm_unpacklo_epi8(mm_chars2, mm_0);
_mm_storeu_si128(reinterpret_cast< __m128i* >(out + 16), _mm_unpacklo_epi16(mm, mm_0));
_mm_storeu_si128(reinterpret_cast< __m128i* >(out + 20), _mm_unpackhi_epi16(mm, mm_0));
mm = _mm_unpackhi_epi8(mm_chars2, mm_0);
_mm_storeu_si128(reinterpret_cast< __m128i* >(out + 24), _mm_unpacklo_epi16(mm, mm_0));
_mm_storeu_si128(reinterpret_cast< __m128i* >(out + 28), _mm_unpackhi_epi16(mm, mm_0));
#endif
_mm_storeu_si128(reinterpret_cast< __m128i* >(out + 32), _mm_unpackhi_epi16(_mm_unpackhi_epi8(mm_chars3, mm_0), mm_0));
}
return out + 36;
}
} // namespace detail
} // namespace uuids
} // namespace boost
#endif // defined(BOOST_UUID_USE_SSSE3)
#endif // BOOST_UUID_DETAIL_TO_CHARS_X86_HPP_INCLUDED

View File

@@ -5,14 +5,19 @@
#include <boost/uuid/uuid.hpp>
#include <boost/uuid/uuid_io.hpp>
#include <boost/core/lightweight_test.hpp>
#include <boost/config/pragma_message.hpp>
#include <boost/array.hpp>
#include <boost/config.hpp>
#include <string>
#if defined(BOOST_UUID_NO_CXX14_CONSTEXPR_RT)
BOOST_PRAGMA_MESSAGE( "Test is not constexpr because BOOST_UUID_NO_CXX14_CONSTEXPR_RT is defined" )
#endif
using namespace boost::uuids;
template<class Ch>
BOOST_CXX14_CONSTEXPR boost::array<Ch, 36> uuid_to_string( uuid const& u )
BOOST_UUID_CXX14_CONSTEXPR_RT boost::array<Ch, 36> uuid_to_string( uuid const& u )
{
boost::array<Ch, 36> r = {{}};
to_chars( u, r.begin(), r.end() );
@@ -25,7 +30,7 @@ int main()
BOOST_CXX14_CONSTEXPR uuid u;
{
BOOST_CXX14_CONSTEXPR auto v = uuid_to_string<char>( u );
BOOST_UUID_CXX14_CONSTEXPR_RT auto v = uuid_to_string<char>( u );
std::string const w( "00000000-0000-0000-0000-000000000000" );
@@ -33,7 +38,7 @@ int main()
}
{
BOOST_CXX14_CONSTEXPR auto v = uuid_to_string<wchar_t>( u );
BOOST_UUID_CXX14_CONSTEXPR_RT auto v = uuid_to_string<wchar_t>( u );
std::wstring const w( L"00000000-0000-0000-0000-000000000000" );
@@ -41,7 +46,7 @@ int main()
}
{
BOOST_CXX14_CONSTEXPR auto v = uuid_to_string<char16_t>( u );
BOOST_UUID_CXX14_CONSTEXPR_RT auto v = uuid_to_string<char16_t>( u );
std::u16string const w( u"00000000-0000-0000-0000-000000000000" );
@@ -49,7 +54,7 @@ int main()
}
{
BOOST_CXX14_CONSTEXPR auto v = uuid_to_string<char32_t>( u );
BOOST_UUID_CXX14_CONSTEXPR_RT auto v = uuid_to_string<char32_t>( u );
std::u32string const w( U"00000000-0000-0000-0000-000000000000" );
@@ -59,7 +64,7 @@ int main()
#if defined(__cpp_char8_t) && __cpp_char8_t >= 201811L
{
BOOST_CXX14_CONSTEXPR auto v = uuid_to_string<char8_t>( u );
BOOST_UUID_CXX14_CONSTEXPR_RT auto v = uuid_to_string<char8_t>( u );
std::u8string const w( u8"00000000-0000-0000-0000-000000000000" );
@@ -73,7 +78,7 @@ int main()
BOOST_CXX14_CONSTEXPR uuid u = {{ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f }};
{
BOOST_CXX14_CONSTEXPR auto v = uuid_to_string<char>( u );
BOOST_UUID_CXX14_CONSTEXPR_RT auto v = uuid_to_string<char>( u );
std::string const w( "00010203-0405-0607-0809-0a0b0c0d0e0f" );
@@ -81,7 +86,7 @@ int main()
}
{
BOOST_CXX14_CONSTEXPR auto v = uuid_to_string<wchar_t>( u );
BOOST_UUID_CXX14_CONSTEXPR_RT auto v = uuid_to_string<wchar_t>( u );
std::wstring const w( L"00010203-0405-0607-0809-0a0b0c0d0e0f" );
@@ -89,7 +94,7 @@ int main()
}
{
BOOST_CXX14_CONSTEXPR auto v = uuid_to_string<char16_t>( u );
BOOST_UUID_CXX14_CONSTEXPR_RT auto v = uuid_to_string<char16_t>( u );
std::u16string const w( u"00010203-0405-0607-0809-0a0b0c0d0e0f" );
@@ -97,7 +102,7 @@ int main()
}
{
BOOST_CXX14_CONSTEXPR auto v = uuid_to_string<char32_t>( u );
BOOST_UUID_CXX14_CONSTEXPR_RT auto v = uuid_to_string<char32_t>( u );
std::u32string const w( U"00010203-0405-0607-0809-0a0b0c0d0e0f" );
@@ -107,7 +112,7 @@ int main()
#if defined(__cpp_char8_t) && __cpp_char8_t >= 201811L
{
BOOST_CXX14_CONSTEXPR auto v = uuid_to_string<char8_t>( u );
BOOST_UUID_CXX14_CONSTEXPR_RT auto v = uuid_to_string<char8_t>( u );
std::u8string const w( u8"00010203-0405-0607-0809-0a0b0c0d0e0f" );
@@ -121,7 +126,7 @@ int main()
BOOST_CXX14_CONSTEXPR uuid u = {{ 0x12, 0x34, 0x56, 0x78, 0x90, 0xab, 0xcd, 0xef, 0x12, 0x34, 0x56, 0x78, 0x90, 0xab, 0xcd, 0xef }};
{
BOOST_CXX14_CONSTEXPR auto v = uuid_to_string<char>( u );
BOOST_UUID_CXX14_CONSTEXPR_RT auto v = uuid_to_string<char>( u );
std::string const w( "12345678-90ab-cdef-1234-567890abcdef" );
@@ -129,7 +134,7 @@ int main()
}
{
BOOST_CXX14_CONSTEXPR auto v = uuid_to_string<wchar_t>( u );
BOOST_UUID_CXX14_CONSTEXPR_RT auto v = uuid_to_string<wchar_t>( u );
std::wstring const w( L"12345678-90ab-cdef-1234-567890abcdef" );
@@ -137,7 +142,7 @@ int main()
}
{
BOOST_CXX14_CONSTEXPR auto v = uuid_to_string<char16_t>( u );
BOOST_UUID_CXX14_CONSTEXPR_RT auto v = uuid_to_string<char16_t>( u );
std::u16string const w( u"12345678-90ab-cdef-1234-567890abcdef" );
@@ -145,7 +150,7 @@ int main()
}
{
BOOST_CXX14_CONSTEXPR auto v = uuid_to_string<char32_t>( u );
BOOST_UUID_CXX14_CONSTEXPR_RT auto v = uuid_to_string<char32_t>( u );
std::u32string const w( U"12345678-90ab-cdef-1234-567890abcdef" );
@@ -155,7 +160,7 @@ int main()
#if defined(__cpp_char8_t) && __cpp_char8_t >= 201811L
{
BOOST_CXX14_CONSTEXPR auto v = uuid_to_string<char8_t>( u );
BOOST_UUID_CXX14_CONSTEXPR_RT auto v = uuid_to_string<char8_t>( u );
std::u8string const w( u8"12345678-90ab-cdef-1234-567890abcdef" );