2
0
mirror of https://github.com/boostorg/uuid.git synced 2026-01-19 04:42:16 +00:00

Merge pull request #184 from Lastique/feature/to_chars_simd

Add SIMD implementation of `to_chars`
This commit is contained in:
Peter Dimov
2025-12-28 12:09:20 +02:00
committed by GitHub
11 changed files with 444 additions and 77 deletions

View File

@@ -100,6 +100,14 @@ jobs:
container: ubuntu:24.04
install: g++-13-multilib
address-model: 32,64
- toolset: gcc-13
cxxstd: "11,14,17,20,2b"
instruction-set: haswell
cpu-requirements: [ avx2, bmi1, bmi2 ]
os: ubuntu-latest
container: ubuntu:24.04
install: g++-13-multilib
address-model: 32,64
- toolset: gcc-13
cxxstd: "11,14,17,20,2b"
instruction-set: skylake-avx512
@@ -230,6 +238,14 @@ jobs:
container: ubuntu:24.04
os: ubuntu-latest
install: clang-17
- toolset: clang
compiler: clang++-17
cxxstd: "11,14,17,20,2b"
instruction-set: haswell
cpu-requirements: [ avx2, bmi1, bmi2 ]
container: ubuntu:24.04
os: ubuntu-latest
install: clang-17
- toolset: clang
compiler: clang++-17
cxxstd: "11,14,17,20,2b"
@@ -239,14 +255,14 @@ jobs:
os: ubuntu-latest
install: clang-17
- toolset: clang
cxxstd: "11,14,17,20,2b"
os: macos-14
cxxstd: "11,14,17,20,2b"
- toolset: clang
cxxstd: "11,14,17,20,23,2c"
os: macos-15
- toolset: clang
cxxstd: "11,14,17,20,23,2c"
- toolset: clang
os: macos-26
cxxstd: "11,14,17,20,23,2c"
runs-on: ${{matrix.os}}

View File

@@ -9,6 +9,7 @@
* Added a `noexcept` `operator()` overload to `string_generator`.
* `string_generator` now supports the Unicode character types in addition to `char` and `wchar_t`.
* Most `uuid` accessors, operations, and `to_chars` are now `constexpr` when possible (on {cpp}14 and higher and on recent compilers).
* Added SIMD implementation of `to_chars`, which can offer up to 5.5x performance improvement in UUID formatting.
== Changes in Boost 1.90.0

View File

@@ -25,15 +25,22 @@ However, there are a few options that can be enabled by defining macros prior to
|`BOOST_UUID_USE_SSE3`
|If defined, enables optimizations for https://en.wikipedia.org/wiki/SSE3[SSE3] extensions available in x86 processors.
|`BOOST_UUID_USE_SSSE3`
|If defined, enables optimizations for https://en.wikipedia.org/wiki/SSSE3[SSSE3] extensions available in x86 processors.
|`BOOST_UUID_USE_SSE41`
|If defined, enables optimizations for https://en.wikipedia.org/wiki/SSE4#SSE4.1[SSE4.1] extensions available in x86 processors.
|`BOOST_UUID_USE_AVX`
|If defined, enables optimizations for https://en.wikipedia.org/wiki/Advanced_Vector_Extensions[AVX] extensions available in modern x86 processors.
|`BOOST_UUID_USE_AVX2`
|If defined, enables optimizations for https://en.wikipedia.org/wiki/Advanced_Vector_Extensions#Advanced_Vector_Extensions_2[AVX2] extensions available in modern x86 processors.
|`BOOST_UUID_USE_AVX10_1`
|If defined, enables optimizations for https://en.wikipedia.org/wiki/AVX-512[AVX-512] and https://en.wikipedia.org/wiki/Advanced_Vector_Extensions#AVX10[AVX10.1] extensions available in modern x86 processors.
The library does not require 512-bit vectors and is compatible with CPUs implementing AVX-512F, CD, VL, BW and DQ instruction subsets (i.e. equivalent to Intel Skylake-X).
When defined by user, this macro indicates support for the full set of instructions defined in AVX10.1. Currently, the library does not require 512-bit vectors and is compatible with CPUs implementing AVX-512F,
CD, VL, BW and DQ instruction subsets (i.e. equivalent to Intel Skylake-X), so it may auto-detect and use AVX-512 even if only those subsets are supported.
|===

View File

@@ -32,6 +32,10 @@
#define BOOST_UUID_USE_SSE3
#endif
#if defined(__SSSE3__) && !defined(BOOST_UUID_USE_SSSE3)
#define BOOST_UUID_USE_SSSE3
#endif
#if defined(__SSE4_1__) && !defined(BOOST_UUID_USE_SSE41)
#define BOOST_UUID_USE_SSE41
#endif
@@ -40,6 +44,10 @@
#define BOOST_UUID_USE_AVX
#endif
#if defined(__AVX2__) && !defined(BOOST_UUID_USE_AVX2)
#define BOOST_UUID_USE_AVX2
#endif
#if ((defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512BW__)) || defined(__AVX10_1__)) && !defined(BOOST_UUID_USE_AVX10_1)
#define BOOST_UUID_USE_AVX10_1
#endif
@@ -54,6 +62,10 @@
#define BOOST_UUID_USE_AVX
#endif
#if defined(__AVX2__) && !defined(BOOST_UUID_USE_AVX2)
#define BOOST_UUID_USE_AVX2
#endif
#if ((defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512BW__)) || defined(__AVX10_1__)) && !defined(BOOST_UUID_USE_AVX10_1)
#define BOOST_UUID_USE_AVX10_1
#endif
@@ -61,7 +73,11 @@
#endif
// More advanced ISA extensions imply less advanced are also available
#if !defined(BOOST_UUID_USE_AVX) && defined(BOOST_UUID_USE_AVX10_1)
#if !defined(BOOST_UUID_USE_AVX2) && defined(BOOST_UUID_USE_AVX10_1)
#define BOOST_UUID_USE_AVX2
#endif
#if !defined(BOOST_UUID_USE_AVX) && defined(BOOST_UUID_USE_AVX2)
#define BOOST_UUID_USE_AVX
#endif
@@ -69,7 +85,11 @@
#define BOOST_UUID_USE_SSE41
#endif
#if !defined(BOOST_UUID_USE_SSE3) && defined(BOOST_UUID_USE_SSE41)
#if !defined(BOOST_UUID_USE_SSSE3) && defined(BOOST_UUID_USE_SSE41)
#define BOOST_UUID_USE_SSSE3
#endif
#if !defined(BOOST_UUID_USE_SSE3) && defined(BOOST_UUID_USE_SSSE3)
#define BOOST_UUID_USE_SSE3
#endif
@@ -79,8 +99,10 @@
#if !defined(BOOST_UUID_NO_SIMD) && \
!defined(BOOST_UUID_USE_AVX10_1) && \
!defined(BOOST_UUID_USE_AVX2) && \
!defined(BOOST_UUID_USE_AVX) && \
!defined(BOOST_UUID_USE_SSE41) && \
!defined(BOOST_UUID_USE_SSSE3) && \
!defined(BOOST_UUID_USE_SSE3) && \
!defined(BOOST_UUID_USE_SSE2)
#define BOOST_UUID_NO_SIMD

View File

@@ -7,59 +7,31 @@
// https://www.boost.org/LICENSE_1_0.txt
#include <boost/uuid/uuid.hpp>
#include <boost/config.hpp>
#include <boost/uuid/detail/config.hpp>
#include <boost/uuid/detail/is_constant_evaluated.hpp>
#include <boost/uuid/detail/to_chars_generic.hpp>
#if defined(BOOST_UUID_USE_SSSE3)
#include <boost/uuid/detail/to_chars_x86.hpp>
#endif
namespace boost {
namespace uuids {
namespace detail {
constexpr char const* to_chars_digits( char const* ) noexcept
template<class Ch> BOOST_UUID_CXX14_CONSTEXPR_RT inline Ch* to_chars( uuid const& u, Ch* out ) noexcept
{
return "0123456789abcdef-";
}
constexpr wchar_t const* to_chars_digits( wchar_t const* ) noexcept
{
return L"0123456789abcdef-";
}
constexpr char16_t const* to_chars_digits( char16_t const* ) noexcept
{
return u"0123456789abcdef-";
}
constexpr char32_t const* to_chars_digits( char32_t const* ) noexcept
{
return U"0123456789abcdef-";
}
#if defined(__cpp_char8_t) && __cpp_char8_t >= 201811L
constexpr char8_t const* to_chars_digits( char8_t const* ) noexcept
{
return u8"0123456789abcdef-";
}
#endif
template<class Ch> BOOST_CXX14_CONSTEXPR inline Ch* to_chars( uuid const& u, Ch* out ) noexcept
{
constexpr Ch const* digits = to_chars_digits( static_cast<Ch const*>( nullptr ) );
for( std::size_t i = 0; i < 16; ++i )
#if defined(BOOST_UUID_USE_SSSE3)
if( detail::is_constant_evaluated_rt() )
{
std::uint8_t ch = u.data()[ i ];
*out++ = digits[ (ch >> 4) & 0x0F ];
*out++ = digits[ ch & 0x0F ];
if( i == 3 || i == 5 || i == 7 || i == 9 )
{
*out++ = digits[ 16 ];
}
return detail::to_chars_generic( u, out );
}
return out;
else
{
return detail::to_chars_simd( u, out );
}
#else
return detail::to_chars_generic( u, out );
#endif
}
} // namespace detail

View File

@@ -0,0 +1,74 @@
#ifndef BOOST_UUID_DETAIL_TO_CHARS_GENERIC_HPP_INCLUDED
#define BOOST_UUID_DETAIL_TO_CHARS_GENERIC_HPP_INCLUDED
// Copyright 2009 Andy Tompkins
// Copyright 2024 Peter Dimov
// Distributed under the Boost Software License, Version 1.0.
// https://www.boost.org/LICENSE_1_0.txt
#include <boost/uuid/uuid.hpp>
#include <boost/config.hpp>
#if defined(BOOST_UUID_REPORT_IMPLEMENTATION)
#include <boost/config/pragma_message.hpp>
BOOST_PRAGMA_MESSAGE( "Using to_chars_generic.hpp" )
#endif
namespace boost {
namespace uuids {
namespace detail {
constexpr char const* to_chars_digits( char const* ) noexcept
{
return "0123456789abcdef-";
}
constexpr wchar_t const* to_chars_digits( wchar_t const* ) noexcept
{
return L"0123456789abcdef-";
}
constexpr char16_t const* to_chars_digits( char16_t const* ) noexcept
{
return u"0123456789abcdef-";
}
constexpr char32_t const* to_chars_digits( char32_t const* ) noexcept
{
return U"0123456789abcdef-";
}
#if defined(__cpp_char8_t) && __cpp_char8_t >= 201811L
constexpr char8_t const* to_chars_digits( char8_t const* ) noexcept
{
return u8"0123456789abcdef-";
}
#endif
template<class Ch> BOOST_CXX14_CONSTEXPR inline Ch* to_chars_generic( uuid const& u, Ch* out ) noexcept
{
constexpr Ch const* digits = to_chars_digits( static_cast<Ch const*>( nullptr ) );
for( std::size_t i = 0; i < 16; ++i )
{
std::uint8_t ch = u.data()[ i ];
*out++ = digits[ (ch >> 4) & 0x0F ];
*out++ = digits[ ch & 0x0F ];
if( i == 3 || i == 5 || i == 7 || i == 9 )
{
*out++ = digits[ 16 ];
}
}
return out;
}
}}} //namespace boost::uuids::detail
#endif // BOOST_UUID_DETAIL_TO_CHARS_GENERIC_HPP_INCLUDED

View File

@@ -0,0 +1,261 @@
#ifndef BOOST_UUID_DETAIL_TO_CHARS_X86_HPP_INCLUDED
#define BOOST_UUID_DETAIL_TO_CHARS_X86_HPP_INCLUDED
// Copyright 2025 Andrey Semashev
// Distributed under the Boost Software License, Version 1.0.
// https://www.boost.org/LICENSE_1_0.txt
#include <boost/uuid/detail/config.hpp>
#if defined(BOOST_UUID_USE_SSSE3)
#include <cstdint>
#include <boost/uuid/uuid.hpp>
#if defined(BOOST_UUID_REPORT_IMPLEMENTATION)
#include <boost/config/pragma_message.hpp>
#if defined(BOOST_UUID_USE_AVX10_1)
BOOST_PRAGMA_MESSAGE( "Using to_chars_x86.hpp, AVX10.1" )
#elif defined(BOOST_UUID_USE_AVX2)
BOOST_PRAGMA_MESSAGE( "Using to_chars_x86.hpp, AVX2" )
#elif defined(BOOST_UUID_USE_SSE41)
BOOST_PRAGMA_MESSAGE( "Using to_chars_x86.hpp, SSE4.1" )
#else
BOOST_PRAGMA_MESSAGE( "Using to_chars_x86.hpp, SSSE3" )
#endif
#endif // #if defined(BOOST_UUID_REPORT_IMPLEMENTATION)
#if defined(BOOST_UUID_USE_AVX2)
#include <immintrin.h>
#elif defined(BOOST_UUID_USE_SSE41)
#include <smmintrin.h>
#else
#include <tmmintrin.h>
#endif
namespace boost {
namespace uuids {
namespace detail {
template<
typename Char,
bool IsCharASCIICompatible = ('0' == 0x30 && '9' == 0x39 && 'a' == 0x61 && 'f' == 0x66 && '-' == 0x2D),
bool IsWCharASCIICompatible = (L'0' == 0x30 && L'9' == 0x39 && L'a' == 0x61 && L'f' == 0x66 && L'-' == 0x2D)
>
struct to_chars_simd_char_constants
{
alignas(16) static const std::uint8_t mm_char_table[16];
alignas(16) static const std::uint8_t mm_char_dash[16];
};
template< typename Char, bool IsCharASCIICompatible, bool IsWCharASCIICompatible >
alignas(16) const std::uint8_t to_chars_simd_char_constants< Char, IsCharASCIICompatible, IsWCharASCIICompatible >::mm_char_table[16] =
{ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66 }; // 0123456789abcdef in ASCII
template< typename Char, bool IsCharASCIICompatible, bool IsWCharASCIICompatible >
alignas(16) const std::uint8_t to_chars_simd_char_constants< Char, IsCharASCIICompatible, IsWCharASCIICompatible >::mm_char_dash[16] =
{ 0x2D, 0x2D, 0x2D, 0x2D, 0x2D, 0x2D, 0x2D, 0x2D, 0x2D, 0x2D, 0x2D, 0x2D, 0x2D, 0x2D, 0x2D, 0x2D }; // ---------------- in ASCII
template< bool IsWCharASCIICompatible >
struct to_chars_simd_char_constants< char, false, IsWCharASCIICompatible >
{
// This requirement is necessary for the _mm_max_epu8 trick in to_chars_simd_core below to work
static_assert(static_cast< std::uint8_t >('-') < static_cast< std::uint8_t >('0') && static_cast< std::uint8_t >('-') < static_cast< std::uint8_t >('a'),
"Boost.UUID: Unsupported char encoding, '-' character code is expected to be less than any hexadecimal characters");
alignas(16) static const std::uint8_t mm_char_table[16];
alignas(16) static const std::uint8_t mm_char_dash[16];
};
template< bool IsWCharASCIICompatible >
alignas(16) const std::uint8_t to_chars_simd_char_constants< char, false, IsWCharASCIICompatible >::mm_char_table[16] =
{
static_cast< std::uint8_t >('0'), static_cast< std::uint8_t >('1'), static_cast< std::uint8_t >('2'), static_cast< std::uint8_t >('3'),
static_cast< std::uint8_t >('4'), static_cast< std::uint8_t >('5'), static_cast< std::uint8_t >('6'), static_cast< std::uint8_t >('7'),
static_cast< std::uint8_t >('8'), static_cast< std::uint8_t >('9'), static_cast< std::uint8_t >('a'), static_cast< std::uint8_t >('b'),
static_cast< std::uint8_t >('c'), static_cast< std::uint8_t >('d'), static_cast< std::uint8_t >('e'), static_cast< std::uint8_t >('f')
};
template< bool IsWCharASCIICompatible >
alignas(16) const std::uint8_t to_chars_simd_char_constants< char, false, IsWCharASCIICompatible >::mm_char_dash[16] =
{
static_cast< std::uint8_t >('-'), static_cast< std::uint8_t >('-'), static_cast< std::uint8_t >('-'), static_cast< std::uint8_t >('-'),
static_cast< std::uint8_t >('-'), static_cast< std::uint8_t >('-'), static_cast< std::uint8_t >('-'), static_cast< std::uint8_t >('-'),
static_cast< std::uint8_t >('-'), static_cast< std::uint8_t >('-'), static_cast< std::uint8_t >('-'), static_cast< std::uint8_t >('-'),
static_cast< std::uint8_t >('-'), static_cast< std::uint8_t >('-'), static_cast< std::uint8_t >('-'), static_cast< std::uint8_t >('-')
};
template< bool IsCharASCIICompatible >
struct to_chars_simd_char_constants< wchar_t, IsCharASCIICompatible, false >
{
static_assert(static_cast< wchar_t >(static_cast< std::uint8_t >(L'0')) == L'0' && static_cast< wchar_t >(static_cast< std::uint8_t >(L'9')) == L'9' &&
static_cast< wchar_t >(static_cast< std::uint8_t >(L'a')) == L'a' && static_cast< wchar_t >(static_cast< std::uint8_t >(L'f')) == L'f' &&
static_cast< wchar_t >(static_cast< std::uint8_t >(L'-')) == L'-',
"Boost.UUID: Unsupported wchar_t encoding, hexadecimal and dash character codes are expected to be representable by a single byte");
// This requirement is necessary for the _mm_max_epu8 trick in to_chars_simd_core below to work
static_assert(static_cast< std::uint8_t >(L'-') < static_cast< std::uint8_t >(L'0') && static_cast< std::uint8_t >(L'-') < static_cast< std::uint8_t >(L'a'),
"Boost.UUID: Unsupported wchar_t encoding, L'-' character code is expected to be less than any hexadecimal characters");
alignas(16) static const std::uint8_t mm_char_table[16];
alignas(16) static const std::uint8_t mm_char_dash[16];
};
template< bool IsCharASCIICompatible >
alignas(16) const std::uint8_t to_chars_simd_char_constants< wchar_t, IsCharASCIICompatible, false >::mm_char_table[16] =
{
static_cast< std::uint8_t >(L'0'), static_cast< std::uint8_t >(L'1'), static_cast< std::uint8_t >(L'2'), static_cast< std::uint8_t >(L'3'),
static_cast< std::uint8_t >(L'4'), static_cast< std::uint8_t >(L'5'), static_cast< std::uint8_t >(L'6'), static_cast< std::uint8_t >(L'7'),
static_cast< std::uint8_t >(L'8'), static_cast< std::uint8_t >(L'9'), static_cast< std::uint8_t >(L'a'), static_cast< std::uint8_t >(L'b'),
static_cast< std::uint8_t >(L'c'), static_cast< std::uint8_t >(L'd'), static_cast< std::uint8_t >(L'e'), static_cast< std::uint8_t >(L'f')
};
template< bool IsCharASCIICompatible >
alignas(16) const std::uint8_t to_chars_simd_char_constants< wchar_t, IsCharASCIICompatible, false >::mm_char_dash[16] =
{
static_cast< std::uint8_t >(L'-'), static_cast< std::uint8_t >(L'-'), static_cast< std::uint8_t >(L'-'), static_cast< std::uint8_t >(L'-'),
static_cast< std::uint8_t >(L'-'), static_cast< std::uint8_t >(L'-'), static_cast< std::uint8_t >(L'-'), static_cast< std::uint8_t >(L'-'),
static_cast< std::uint8_t >(L'-'), static_cast< std::uint8_t >(L'-'), static_cast< std::uint8_t >(L'-'), static_cast< std::uint8_t >(L'-'),
static_cast< std::uint8_t >(L'-'), static_cast< std::uint8_t >(L'-'), static_cast< std::uint8_t >(L'-'), static_cast< std::uint8_t >(L'-')
};
template< typename >
struct to_chars_simd_constants
{
alignas(16) static const std::uint8_t mm_15[16];
alignas(16) static const std::uint8_t mm_shuffle_pattern1[16];
alignas(16) static const std::uint8_t mm_shuffle_pattern2[16];
};
template< typename T >
alignas(16) const std::uint8_t to_chars_simd_constants< T >::mm_15[16] =
{ 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F };
template< typename T >
alignas(16) const std::uint8_t to_chars_simd_constants< T >::mm_shuffle_pattern1[16] =
{ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x80, 0x08, 0x09, 0x0A, 0x0B, 0x80, 0x0C, 0x0D };
template< typename T >
alignas(16) const std::uint8_t to_chars_simd_constants< T >::mm_shuffle_pattern2[16] =
{ 0x00, 0x01, 0x80, 0x02, 0x03, 0x04, 0x05, 0x80, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D };
//! Converts UUID to a string of 36 characters, where first 32 craracters are returned in mm_chars1 and mm_chars2 and the last 4 in the highest 32 bits of mm_chars3
BOOST_FORCEINLINE void to_chars_simd_core
(
const std::uint8_t* data,
__m128i const& mm_char_table, __m128i const& mm_char_dash,
__m128i& mm_chars1, __m128i& mm_chars2, __m128i& mm_chars3
) noexcept
{
__m128i const& mm_15 = *reinterpret_cast< const __m128i* >(uuids::detail::to_chars_simd_constants< void >::mm_15);
__m128i const& mm_shuffle_pattern1 = *reinterpret_cast< const __m128i* >(uuids::detail::to_chars_simd_constants< void >::mm_shuffle_pattern1);
__m128i const& mm_shuffle_pattern2 = *reinterpret_cast< const __m128i* >(uuids::detail::to_chars_simd_constants< void >::mm_shuffle_pattern2);
__m128i mm_input = uuids::detail::load_unaligned_si128(data);
// Split half-bytes
__m128i mm_input_hi = _mm_and_si128(_mm_srli_epi32(mm_input, 4), mm_15);
__m128i mm_input_lo = _mm_and_si128(mm_input, mm_15);
// Stringize each of the halves
mm_input_hi = _mm_shuffle_epi8(mm_char_table, mm_input_hi);
mm_input_lo = _mm_shuffle_epi8(mm_char_table, mm_input_lo);
// Join them back together
__m128i mm_1 = _mm_unpacklo_epi8(mm_input_hi, mm_input_lo);
__m128i mm_2 = _mm_unpackhi_epi8(mm_input_hi, mm_input_lo);
// Insert dashes at positions 8, 13, 18 and 23
// mm_1 mm_2
// |0123456789abcdef|0123456789abcdef|
// |01234567-89ab-cd|ef-0123-456789ab|
//
// Note that the last "cdef" characters are already available at the end of mm_2
mm_chars1 = _mm_shuffle_epi8(mm_1, mm_shuffle_pattern1);
mm_chars2 = _mm_shuffle_epi8(_mm_alignr_epi8(mm_2, mm_1, 14), mm_shuffle_pattern2);
mm_chars1 = _mm_max_epu8(mm_chars1, mm_char_dash);
mm_chars2 = _mm_max_epu8(mm_chars2, mm_char_dash);
mm_chars3 = mm_2;
}
template< typename Char >
BOOST_FORCEINLINE Char* to_chars_simd(uuid const& u, Char* out) noexcept
{
__m128i mm_chars1, mm_chars2, mm_chars3;
uuids::detail::to_chars_simd_core
(
u.data,
*reinterpret_cast< const __m128i* >(uuids::detail::to_chars_simd_char_constants< Char >::mm_char_table),
*reinterpret_cast< const __m128i* >(uuids::detail::to_chars_simd_char_constants< Char >::mm_char_dash),
mm_chars1, mm_chars2, mm_chars3
);
static_assert(sizeof(Char) == 1u || sizeof(Char) == 2u || sizeof(Char) == 4u, "Boost.UUID: Unsupported output character type for to_chars");
BOOST_IF_CONSTEXPR (sizeof(Char) == 1u)
{
_mm_storeu_si128(reinterpret_cast< __m128i* >(out), mm_chars1);
_mm_storeu_si128(reinterpret_cast< __m128i* >(out + 16), mm_chars2);
#if defined(BOOST_UUID_USE_SSE41)
*reinterpret_cast< BOOST_MAY_ALIAS std::uint32_t* >(out + 32) = _mm_extract_epi32(mm_chars3, 3);
#else
*reinterpret_cast< BOOST_MAY_ALIAS std::uint32_t* >(out + 32) = _mm_cvtsi128_si32(_mm_srli_si128(mm_chars3, 12));
#endif
}
else BOOST_IF_CONSTEXPR (sizeof(Char) == 2u)
{
const __m128i mm_0 = _mm_setzero_si128();
#if defined(BOOST_UUID_USE_AVX2)
_mm256_storeu_si256(reinterpret_cast< __m256i* >(out), _mm256_cvtepu8_epi16(mm_chars1));
_mm256_storeu_si256(reinterpret_cast< __m256i* >(out + 16), _mm256_cvtepu8_epi16(mm_chars2));
#else
_mm_storeu_si128(reinterpret_cast< __m128i* >(out), _mm_unpacklo_epi8(mm_chars1, mm_0));
_mm_storeu_si128(reinterpret_cast< __m128i* >(out + 8), _mm_unpackhi_epi8(mm_chars1, mm_0));
_mm_storeu_si128(reinterpret_cast< __m128i* >(out + 16), _mm_unpacklo_epi8(mm_chars2, mm_0));
_mm_storeu_si128(reinterpret_cast< __m128i* >(out + 24), _mm_unpackhi_epi8(mm_chars2, mm_0));
#endif
#if defined(BOOST_UUID_USE_SSE41) && (defined(__x86_64__) || defined(_M_X64))
*reinterpret_cast< BOOST_MAY_ALIAS std::uint64_t* >(out + 32) = _mm_extract_epi64(_mm_unpackhi_epi8(mm_chars3, mm_0), 1);
#else
_mm_storeh_pd(reinterpret_cast< BOOST_MAY_ALIAS double* >(out + 32), _mm_castsi128_pd(_mm_unpackhi_epi8(mm_chars3, mm_0)));
#endif
}
else
{
const __m128i mm_0 = _mm_setzero_si128();
#if 0 && defined(BOOST_UUID_USE_AVX10_1)
// Slower than the AVX2 version below on Intel Golden Cove. Perhaps, it will become beneficial on newer microarchitectures.
_mm512_storeu_epi32(out, _mm512_cvtepu8_epi32(mm_chars1));
_mm512_storeu_epi32(out + 16, _mm512_cvtepu8_epi32(mm_chars2));
#elif defined(BOOST_UUID_USE_AVX2)
_mm256_storeu_si256(reinterpret_cast< __m256i* >(out), _mm256_cvtepu8_epi32(mm_chars1));
_mm256_storeu_si256(reinterpret_cast< __m256i* >(out + 8), _mm256_cvtepu8_epi32(_mm_unpackhi_epi64(mm_chars1, mm_chars1)));
_mm256_storeu_si256(reinterpret_cast< __m256i* >(out + 16), _mm256_cvtepu8_epi32(mm_chars2));
_mm256_storeu_si256(reinterpret_cast< __m256i* >(out + 24), _mm256_cvtepu8_epi32(_mm_unpackhi_epi64(mm_chars2, mm_chars2)));
#else
__m128i mm = _mm_unpacklo_epi8(mm_chars1, mm_0);
_mm_storeu_si128(reinterpret_cast< __m128i* >(out), _mm_unpacklo_epi16(mm, mm_0));
_mm_storeu_si128(reinterpret_cast< __m128i* >(out + 4), _mm_unpackhi_epi16(mm, mm_0));
mm = _mm_unpackhi_epi8(mm_chars1, mm_0);
_mm_storeu_si128(reinterpret_cast< __m128i* >(out + 8), _mm_unpacklo_epi16(mm, mm_0));
_mm_storeu_si128(reinterpret_cast< __m128i* >(out + 12), _mm_unpackhi_epi16(mm, mm_0));
mm = _mm_unpacklo_epi8(mm_chars2, mm_0);
_mm_storeu_si128(reinterpret_cast< __m128i* >(out + 16), _mm_unpacklo_epi16(mm, mm_0));
_mm_storeu_si128(reinterpret_cast< __m128i* >(out + 20), _mm_unpackhi_epi16(mm, mm_0));
mm = _mm_unpackhi_epi8(mm_chars2, mm_0);
_mm_storeu_si128(reinterpret_cast< __m128i* >(out + 24), _mm_unpacklo_epi16(mm, mm_0));
_mm_storeu_si128(reinterpret_cast< __m128i* >(out + 28), _mm_unpackhi_epi16(mm, mm_0));
#endif
_mm_storeu_si128(reinterpret_cast< __m128i* >(out + 32), _mm_unpackhi_epi16(_mm_unpackhi_epi8(mm_chars3, mm_0), mm_0));
}
return out + 36;
}
} // namespace detail
} // namespace uuids
} // namespace boost
#endif // defined(BOOST_UUID_USE_SSSE3)
#endif // BOOST_UUID_DETAIL_TO_CHARS_X86_HPP_INCLUDED

View File

@@ -31,7 +31,7 @@ namespace uuids {
template<class OutputIterator>
BOOST_CXX14_CONSTEXPR OutputIterator to_chars( uuid const& u, OutputIterator out )
{
char tmp[ 36 ] = {};
alignas( 16 ) char tmp[ 36 ] = {};
detail::to_chars( u, tmp );
for( std::size_t i = 0; i < 36; ++i )
@@ -79,7 +79,7 @@ BOOST_CXX14_CONSTEXPR inline Ch* to_chars( uuid const& u, Ch (&buffer)[ 36 ] ) n
template<class Ch, class Traits>
std::basic_ostream<Ch, Traits>& operator<<( std::basic_ostream<Ch, Traits>& os, uuid const& u )
{
char tmp[ 37 ];
alignas( 16 ) char tmp[ 37 ];
to_chars( u, tmp );
os << tmp;

View File

@@ -29,10 +29,14 @@ boost_test(TYPE run SOURCES test_comparison.cpp COMPILE_DEFINITIONS BOOST_UUID_N
boost_test(TYPE run SOURCES test_include1.cpp test_include2.cpp)
boost_test(TYPE run SOURCES test_io.cpp LINK_LIBRARIES Boost::lexical_cast Boost::predef)
boost_test(TYPE run SOURCES test_io_2.cpp)
boost_test(TYPE run SOURCES test_to_chars.cpp)
boost_test(TYPE run SOURCES test_to_chars_2.cpp)
boost_test(TYPE run SOURCES test_io.cpp LINK_LIBRARIES Boost::lexical_cast Boost::predef COMPILE_DEFINITIONS BOOST_UUID_REPORT_IMPLEMENTATION=1)
boost_test(TYPE run SOURCES test_io.cpp LINK_LIBRARIES Boost::lexical_cast Boost::predef COMPILE_DEFINITIONS BOOST_UUID_NO_SIMD=1 BOOST_UUID_REPORT_IMPLEMENTATION=1 NAME test_io_no_simd)
boost_test(TYPE run SOURCES test_io_2.cpp COMPILE_DEFINITIONS BOOST_UUID_REPORT_IMPLEMENTATION=1)
boost_test(TYPE run SOURCES test_io_2.cpp COMPILE_DEFINITIONS BOOST_UUID_NO_SIMD=1 BOOST_UUID_REPORT_IMPLEMENTATION=1 NAME test_io_2_no_simd)
boost_test(TYPE run SOURCES test_to_chars.cpp COMPILE_DEFINITIONS BOOST_UUID_REPORT_IMPLEMENTATION=1)
boost_test(TYPE run SOURCES test_to_chars.cpp COMPILE_DEFINITIONS BOOST_UUID_NO_SIMD=1 BOOST_UUID_REPORT_IMPLEMENTATION=1 NAME test_to_chars_no_simd)
boost_test(TYPE run SOURCES test_to_chars_2.cpp COMPILE_DEFINITIONS BOOST_UUID_REPORT_IMPLEMENTATION=1)
boost_test(TYPE run SOURCES test_to_chars_2.cpp COMPILE_DEFINITIONS BOOST_UUID_NO_SIMD=1 BOOST_UUID_REPORT_IMPLEMENTATION=1 NAME test_to_chars_2_no_simd)
boost_test(TYPE run SOURCES test_uuid_clock.cpp)

View File

@@ -89,11 +89,16 @@ run test_comparison.cpp : : : <define>BOOST_UUID_NO_SIMD <define>BOOST_UUID_REPO
# test uuid_io.hpp
run test_io.cpp
: : : <library>/boost/lexical_cast//boost_lexical_cast <library>/boost/predef//boost_predef -$(WERROR) ;
run test_io_2.cpp ;
: : : <library>/boost/lexical_cast//boost_lexical_cast <library>/boost/predef//boost_predef <define>BOOST_UUID_REPORT_IMPLEMENTATION -$(WERROR) ;
run test_io.cpp
: : : <library>/boost/lexical_cast//boost_lexical_cast <library>/boost/predef//boost_predef <define>BOOST_UUID_NO_SIMD <define>BOOST_UUID_REPORT_IMPLEMENTATION -$(WERROR) : test_io_no_simd ;
run test_io_2.cpp : : : <define>BOOST_UUID_REPORT_IMPLEMENTATION ;
run test_io_2.cpp : : : <define>BOOST_UUID_NO_SIMD <define>BOOST_UUID_REPORT_IMPLEMENTATION : test_io_2_no_simd ;
run test_to_chars.cpp ;
run test_to_chars_2.cpp ;
run test_to_chars.cpp : : : <define>BOOST_UUID_REPORT_IMPLEMENTATION ;
run test_to_chars.cpp : : : <define>BOOST_UUID_NO_SIMD <define>BOOST_UUID_REPORT_IMPLEMENTATION : test_to_chars_no_simd ;
run test_to_chars_2.cpp : : : <define>BOOST_UUID_REPORT_IMPLEMENTATION ;
run test_to_chars_2.cpp : : : <define>BOOST_UUID_NO_SIMD <define>BOOST_UUID_REPORT_IMPLEMENTATION : test_to_chars_2_no_simd ;
run test_from_chars.cpp ;
run test_from_chars_2.cpp ;

View File

@@ -5,14 +5,19 @@
#include <boost/uuid/uuid.hpp>
#include <boost/uuid/uuid_io.hpp>
#include <boost/core/lightweight_test.hpp>
#include <boost/config/pragma_message.hpp>
#include <boost/array.hpp>
#include <boost/config.hpp>
#include <string>
#if defined(BOOST_UUID_NO_CXX14_CONSTEXPR_RT)
BOOST_PRAGMA_MESSAGE( "Test is not constexpr because BOOST_UUID_NO_CXX14_CONSTEXPR_RT is defined" )
#endif
using namespace boost::uuids;
template<class Ch>
BOOST_CXX14_CONSTEXPR boost::array<Ch, 36> uuid_to_string( uuid const& u )
BOOST_UUID_CXX14_CONSTEXPR_RT boost::array<Ch, 36> uuid_to_string( uuid const& u )
{
boost::array<Ch, 36> r = {{}};
to_chars( u, r.begin(), r.end() );
@@ -25,7 +30,7 @@ int main()
BOOST_CXX14_CONSTEXPR uuid u;
{
BOOST_CXX14_CONSTEXPR auto v = uuid_to_string<char>( u );
BOOST_UUID_CXX14_CONSTEXPR_RT auto v = uuid_to_string<char>( u );
std::string const w( "00000000-0000-0000-0000-000000000000" );
@@ -33,7 +38,7 @@ int main()
}
{
BOOST_CXX14_CONSTEXPR auto v = uuid_to_string<wchar_t>( u );
BOOST_UUID_CXX14_CONSTEXPR_RT auto v = uuid_to_string<wchar_t>( u );
std::wstring const w( L"00000000-0000-0000-0000-000000000000" );
@@ -41,7 +46,7 @@ int main()
}
{
BOOST_CXX14_CONSTEXPR auto v = uuid_to_string<char16_t>( u );
BOOST_UUID_CXX14_CONSTEXPR_RT auto v = uuid_to_string<char16_t>( u );
std::u16string const w( u"00000000-0000-0000-0000-000000000000" );
@@ -49,7 +54,7 @@ int main()
}
{
BOOST_CXX14_CONSTEXPR auto v = uuid_to_string<char32_t>( u );
BOOST_UUID_CXX14_CONSTEXPR_RT auto v = uuid_to_string<char32_t>( u );
std::u32string const w( U"00000000-0000-0000-0000-000000000000" );
@@ -59,7 +64,7 @@ int main()
#if defined(__cpp_char8_t) && __cpp_char8_t >= 201811L
{
BOOST_CXX14_CONSTEXPR auto v = uuid_to_string<char8_t>( u );
BOOST_UUID_CXX14_CONSTEXPR_RT auto v = uuid_to_string<char8_t>( u );
std::u8string const w( u8"00000000-0000-0000-0000-000000000000" );
@@ -73,7 +78,7 @@ int main()
BOOST_CXX14_CONSTEXPR uuid u = {{ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f }};
{
BOOST_CXX14_CONSTEXPR auto v = uuid_to_string<char>( u );
BOOST_UUID_CXX14_CONSTEXPR_RT auto v = uuid_to_string<char>( u );
std::string const w( "00010203-0405-0607-0809-0a0b0c0d0e0f" );
@@ -81,7 +86,7 @@ int main()
}
{
BOOST_CXX14_CONSTEXPR auto v = uuid_to_string<wchar_t>( u );
BOOST_UUID_CXX14_CONSTEXPR_RT auto v = uuid_to_string<wchar_t>( u );
std::wstring const w( L"00010203-0405-0607-0809-0a0b0c0d0e0f" );
@@ -89,7 +94,7 @@ int main()
}
{
BOOST_CXX14_CONSTEXPR auto v = uuid_to_string<char16_t>( u );
BOOST_UUID_CXX14_CONSTEXPR_RT auto v = uuid_to_string<char16_t>( u );
std::u16string const w( u"00010203-0405-0607-0809-0a0b0c0d0e0f" );
@@ -97,7 +102,7 @@ int main()
}
{
BOOST_CXX14_CONSTEXPR auto v = uuid_to_string<char32_t>( u );
BOOST_UUID_CXX14_CONSTEXPR_RT auto v = uuid_to_string<char32_t>( u );
std::u32string const w( U"00010203-0405-0607-0809-0a0b0c0d0e0f" );
@@ -107,7 +112,7 @@ int main()
#if defined(__cpp_char8_t) && __cpp_char8_t >= 201811L
{
BOOST_CXX14_CONSTEXPR auto v = uuid_to_string<char8_t>( u );
BOOST_UUID_CXX14_CONSTEXPR_RT auto v = uuid_to_string<char8_t>( u );
std::u8string const w( u8"00010203-0405-0607-0809-0a0b0c0d0e0f" );
@@ -121,7 +126,7 @@ int main()
BOOST_CXX14_CONSTEXPR uuid u = {{ 0x12, 0x34, 0x56, 0x78, 0x90, 0xab, 0xcd, 0xef, 0x12, 0x34, 0x56, 0x78, 0x90, 0xab, 0xcd, 0xef }};
{
BOOST_CXX14_CONSTEXPR auto v = uuid_to_string<char>( u );
BOOST_UUID_CXX14_CONSTEXPR_RT auto v = uuid_to_string<char>( u );
std::string const w( "12345678-90ab-cdef-1234-567890abcdef" );
@@ -129,7 +134,7 @@ int main()
}
{
BOOST_CXX14_CONSTEXPR auto v = uuid_to_string<wchar_t>( u );
BOOST_UUID_CXX14_CONSTEXPR_RT auto v = uuid_to_string<wchar_t>( u );
std::wstring const w( L"12345678-90ab-cdef-1234-567890abcdef" );
@@ -137,7 +142,7 @@ int main()
}
{
BOOST_CXX14_CONSTEXPR auto v = uuid_to_string<char16_t>( u );
BOOST_UUID_CXX14_CONSTEXPR_RT auto v = uuid_to_string<char16_t>( u );
std::u16string const w( u"12345678-90ab-cdef-1234-567890abcdef" );
@@ -145,7 +150,7 @@ int main()
}
{
BOOST_CXX14_CONSTEXPR auto v = uuid_to_string<char32_t>( u );
BOOST_UUID_CXX14_CONSTEXPR_RT auto v = uuid_to_string<char32_t>( u );
std::u32string const w( U"12345678-90ab-cdef-1234-567890abcdef" );
@@ -155,7 +160,7 @@ int main()
#if defined(__cpp_char8_t) && __cpp_char8_t >= 201811L
{
BOOST_CXX14_CONSTEXPR auto v = uuid_to_string<char8_t>( u );
BOOST_UUID_CXX14_CONSTEXPR_RT auto v = uuid_to_string<char8_t>( u );
std::u8string const w( u8"12345678-90ab-cdef-1234-567890abcdef" );