mirror of
https://github.com/boostorg/uuid.git
synced 2026-01-19 16:52:14 +00:00
Added SSE2 SIMD implementation of to_chars.
This adds SSE2 code paths to to_chars_x86.hpp. The performance effect on Intel Golden Cove (Core i7-12700K), gcc 13.3, in millions of to_chars() calls per second with a 16-byte aligned output buffer: Char | Generic | SSE2 | SSE4.1 | AVX2 | AVX10.1 =========+=========+=================+==================+==================+================= char | 202.314 | 564.857 (2.79x) | 1194.772 (5.91x) | 1192.094 (5.89x) | 1191.838 (5.89x) char16_t | 188.532 | 457.281 (2.43x) | 795.798 (4.22x) | 935.016 (4.96x) | 938.368 (4.98x) char32_t | 193.151 | 345.612 (1.79x) | 489.620 (2.53x) | 688.829 (3.57x) | 689.617 (3.57x) Here, Generic column was generated with BOOST_UUID_NO_SIMD defined and SSE2 with -march=x86-64. SSE2 support can be useful in cases when users need to be compatible with the base x86-64 ISA.
This commit is contained in:
@@ -11,7 +11,7 @@
|
||||
#include <boost/uuid/detail/is_constant_evaluated.hpp>
|
||||
#include <boost/uuid/detail/to_chars_generic.hpp>
|
||||
|
||||
#if defined(BOOST_UUID_USE_SSSE3)
|
||||
#if defined(BOOST_UUID_USE_SSE2)
|
||||
# include <boost/uuid/detail/to_chars_x86.hpp>
|
||||
|
||||
#elif defined(BOOST_UUID_REPORT_IMPLEMENTATION)
|
||||
@@ -26,7 +26,7 @@ namespace detail {
|
||||
|
||||
template<class Ch> BOOST_UUID_CXX14_CONSTEXPR_RT inline Ch* to_chars( uuid const& u, Ch* out ) noexcept
|
||||
{
|
||||
#if defined(BOOST_UUID_USE_SSSE3)
|
||||
#if defined(BOOST_UUID_USE_SSE2)
|
||||
if( detail::is_constant_evaluated_rt() )
|
||||
{
|
||||
return detail::to_chars_generic( u, out );
|
||||
@@ -40,7 +40,6 @@ template<class Ch> BOOST_UUID_CXX14_CONSTEXPR_RT inline Ch* to_chars( uuid const
|
||||
#endif
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
}} //namespace boost::uuids
|
||||
}}} // namespace boost::uuids::detail
|
||||
|
||||
#endif // BOOST_UUID_DETAIL_TO_CHARS_HPP_INCLUDED
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
|
||||
#include <boost/uuid/detail/config.hpp>
|
||||
|
||||
#if defined(BOOST_UUID_USE_SSSE3)
|
||||
#if defined(BOOST_UUID_USE_SSE2)
|
||||
|
||||
#include <cstdint>
|
||||
#include <boost/uuid/uuid.hpp>
|
||||
@@ -26,9 +26,12 @@ BOOST_PRAGMA_MESSAGE( "Using to_chars_x86.hpp, AVX2" )
|
||||
#elif defined(BOOST_UUID_USE_SSE41)
|
||||
BOOST_PRAGMA_MESSAGE( "Using to_chars_x86.hpp, SSE4.1" )
|
||||
|
||||
#else
|
||||
#elif defined(BOOST_UUID_USE_SSSE3)
|
||||
BOOST_PRAGMA_MESSAGE( "Using to_chars_x86.hpp, SSSE3" )
|
||||
|
||||
#else
|
||||
BOOST_PRAGMA_MESSAGE( "Using to_chars_x86.hpp, SSE2" )
|
||||
|
||||
#endif
|
||||
#endif // #if defined(BOOST_UUID_REPORT_IMPLEMENTATION)
|
||||
|
||||
@@ -36,8 +39,10 @@ BOOST_PRAGMA_MESSAGE( "Using to_chars_x86.hpp, SSSE3" )
|
||||
#include <immintrin.h>
|
||||
#elif defined(BOOST_UUID_USE_SSE41)
|
||||
#include <smmintrin.h>
|
||||
#else
|
||||
#elif defined(BOOST_UUID_USE_SSSE3)
|
||||
#include <tmmintrin.h>
|
||||
#else
|
||||
#include <emmintrin.h>
|
||||
#endif
|
||||
|
||||
namespace boost {
|
||||
@@ -51,13 +56,31 @@ template<
|
||||
>
|
||||
struct to_chars_simd_char_constants
|
||||
{
|
||||
#if defined(BOOST_UUID_USE_SSSE3)
|
||||
static const simd_vector128< std::uint8_t > mm_char_table;
|
||||
#else
|
||||
static constexpr std::uint8_t char_a_add = static_cast< std::uint8_t >((0x61 - 10) - 0x30); // ('a' - 10) - '0' in ASCII
|
||||
static const simd_vector128< std::uint8_t > mm_char_0_add;
|
||||
static const simd_vector128< std::uint8_t > mm_char_a_add;
|
||||
#endif
|
||||
static const simd_vector128< std::uint8_t > mm_char_dash;
|
||||
};
|
||||
|
||||
#if defined(BOOST_UUID_USE_SSSE3)
|
||||
template< typename Char, bool IsCharASCIICompatible, bool IsWCharASCIICompatible >
|
||||
const simd_vector128< std::uint8_t > to_chars_simd_char_constants< Char, IsCharASCIICompatible, IsWCharASCIICompatible >::mm_char_table =
|
||||
{{ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66 }}; // 0123456789abcdef in ASCII
|
||||
#else
|
||||
template< typename Char, bool IsCharASCIICompatible, bool IsWCharASCIICompatible >
|
||||
const simd_vector128< std::uint8_t > to_chars_simd_char_constants< Char, IsCharASCIICompatible, IsWCharASCIICompatible >::mm_char_0_add =
|
||||
{{ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30 }}; // 0x30 is '0' in ASCII
|
||||
template< typename Char, bool IsCharASCIICompatible, bool IsWCharASCIICompatible >
|
||||
const simd_vector128< std::uint8_t > to_chars_simd_char_constants< Char, IsCharASCIICompatible, IsWCharASCIICompatible >::mm_char_a_add =
|
||||
{{
|
||||
char_a_add, char_a_add, char_a_add, char_a_add, char_a_add, char_a_add, char_a_add, char_a_add,
|
||||
char_a_add, char_a_add, char_a_add, char_a_add, char_a_add, char_a_add, char_a_add, char_a_add
|
||||
}};
|
||||
#endif
|
||||
template< typename Char, bool IsCharASCIICompatible, bool IsWCharASCIICompatible >
|
||||
const simd_vector128< std::uint8_t > to_chars_simd_char_constants< Char, IsCharASCIICompatible, IsWCharASCIICompatible >::mm_char_dash =
|
||||
{{ 0x2D, 0x2D, 0x2D, 0x2D, 0x2D, 0x2D, 0x2D, 0x2D, 0x2D, 0x2D, 0x2D, 0x2D, 0x2D, 0x2D, 0x2D, 0x2D }}; // ---------------- in ASCII
|
||||
@@ -69,10 +92,17 @@ struct to_chars_simd_char_constants< char, false, IsWCharASCIICompatible >
|
||||
static_assert(static_cast< std::uint8_t >('-') < static_cast< std::uint8_t >('0') && static_cast< std::uint8_t >('-') < static_cast< std::uint8_t >('a'),
|
||||
"Boost.UUID: Unsupported char encoding, '-' character code is expected to be less than any hexadecimal characters");
|
||||
|
||||
#if defined(BOOST_UUID_USE_SSSE3)
|
||||
static const simd_vector128< std::uint8_t > mm_char_table;
|
||||
#else
|
||||
static constexpr std::uint8_t char_a_add = static_cast< std::uint8_t >(('a' - 10) - '0');
|
||||
static const simd_vector128< std::uint8_t > mm_char_0_add;
|
||||
static const simd_vector128< std::uint8_t > mm_char_a_add;
|
||||
#endif
|
||||
static const simd_vector128< std::uint8_t > mm_char_dash;
|
||||
};
|
||||
|
||||
#if defined(BOOST_UUID_USE_SSSE3)
|
||||
template< bool IsWCharASCIICompatible >
|
||||
const simd_vector128< std::uint8_t > to_chars_simd_char_constants< char, false, IsWCharASCIICompatible >::mm_char_table =
|
||||
{{
|
||||
@@ -81,6 +111,22 @@ const simd_vector128< std::uint8_t > to_chars_simd_char_constants< char, false,
|
||||
static_cast< std::uint8_t >('8'), static_cast< std::uint8_t >('9'), static_cast< std::uint8_t >('a'), static_cast< std::uint8_t >('b'),
|
||||
static_cast< std::uint8_t >('c'), static_cast< std::uint8_t >('d'), static_cast< std::uint8_t >('e'), static_cast< std::uint8_t >('f')
|
||||
}};
|
||||
#else
|
||||
template< bool IsWCharASCIICompatible >
|
||||
const simd_vector128< std::uint8_t > to_chars_simd_char_constants< char, false, IsWCharASCIICompatible >::mm_char_0_add =
|
||||
{{
|
||||
static_cast< std::uint8_t >('0'), static_cast< std::uint8_t >('0'), static_cast< std::uint8_t >('0'), static_cast< std::uint8_t >('0'),
|
||||
static_cast< std::uint8_t >('0'), static_cast< std::uint8_t >('0'), static_cast< std::uint8_t >('0'), static_cast< std::uint8_t >('0'),
|
||||
static_cast< std::uint8_t >('0'), static_cast< std::uint8_t >('0'), static_cast< std::uint8_t >('0'), static_cast< std::uint8_t >('0'),
|
||||
static_cast< std::uint8_t >('0'), static_cast< std::uint8_t >('0'), static_cast< std::uint8_t >('0'), static_cast< std::uint8_t >('0')
|
||||
}};
|
||||
template< bool IsWCharASCIICompatible >
|
||||
const simd_vector128< std::uint8_t > to_chars_simd_char_constants< char, false, IsWCharASCIICompatible >::mm_char_a_add =
|
||||
{{
|
||||
char_a_add, char_a_add, char_a_add, char_a_add, char_a_add, char_a_add, char_a_add, char_a_add,
|
||||
char_a_add, char_a_add, char_a_add, char_a_add, char_a_add, char_a_add, char_a_add, char_a_add
|
||||
}};
|
||||
#endif
|
||||
template< bool IsWCharASCIICompatible >
|
||||
const simd_vector128< std::uint8_t > to_chars_simd_char_constants< char, false, IsWCharASCIICompatible >::mm_char_dash =
|
||||
{{
|
||||
@@ -102,10 +148,17 @@ struct to_chars_simd_char_constants< wchar_t, IsCharASCIICompatible, false >
|
||||
static_assert(static_cast< std::uint8_t >(L'-') < static_cast< std::uint8_t >(L'0') && static_cast< std::uint8_t >(L'-') < static_cast< std::uint8_t >(L'a'),
|
||||
"Boost.UUID: Unsupported wchar_t encoding, L'-' character code is expected to be less than any hexadecimal characters");
|
||||
|
||||
#if defined(BOOST_UUID_USE_SSSE3)
|
||||
static const simd_vector128< std::uint8_t > mm_char_table;
|
||||
#else
|
||||
static constexpr std::uint8_t char_a_add = static_cast< std::uint8_t >((L'a' - 10) - L'0');
|
||||
static const simd_vector128< std::uint8_t > mm_char_0_add;
|
||||
static const simd_vector128< std::uint8_t > mm_char_a_add;
|
||||
#endif
|
||||
static const simd_vector128< std::uint8_t > mm_char_dash;
|
||||
};
|
||||
|
||||
#if defined(BOOST_UUID_USE_SSSE3)
|
||||
template< bool IsCharASCIICompatible >
|
||||
const simd_vector128< std::uint8_t > to_chars_simd_char_constants< wchar_t, IsCharASCIICompatible, false >::mm_char_table =
|
||||
{{
|
||||
@@ -114,6 +167,22 @@ const simd_vector128< std::uint8_t > to_chars_simd_char_constants< wchar_t, IsCh
|
||||
static_cast< std::uint8_t >(L'8'), static_cast< std::uint8_t >(L'9'), static_cast< std::uint8_t >(L'a'), static_cast< std::uint8_t >(L'b'),
|
||||
static_cast< std::uint8_t >(L'c'), static_cast< std::uint8_t >(L'd'), static_cast< std::uint8_t >(L'e'), static_cast< std::uint8_t >(L'f')
|
||||
}};
|
||||
#else
|
||||
template< bool IsCharASCIICompatible >
|
||||
const simd_vector128< std::uint8_t > to_chars_simd_char_constants< wchar_t, IsCharASCIICompatible, false >::mm_char_0_add =
|
||||
{{
|
||||
static_cast< std::uint8_t >(L'0'), static_cast< std::uint8_t >(L'0'), static_cast< std::uint8_t >(L'0'), static_cast< std::uint8_t >(L'0'),
|
||||
static_cast< std::uint8_t >(L'0'), static_cast< std::uint8_t >(L'0'), static_cast< std::uint8_t >(L'0'), static_cast< std::uint8_t >(L'0'),
|
||||
static_cast< std::uint8_t >(L'0'), static_cast< std::uint8_t >(L'0'), static_cast< std::uint8_t >(L'0'), static_cast< std::uint8_t >(L'0'),
|
||||
static_cast< std::uint8_t >(L'0'), static_cast< std::uint8_t >(L'0'), static_cast< std::uint8_t >(L'0'), static_cast< std::uint8_t >(L'0')
|
||||
}};
|
||||
template< bool IsCharASCIICompatible >
|
||||
const simd_vector128< std::uint8_t > to_chars_simd_char_constants< wchar_t, IsCharASCIICompatible, false >::mm_char_a_add =
|
||||
{{
|
||||
char_a_add, char_a_add, char_a_add, char_a_add, char_a_add, char_a_add, char_a_add, char_a_add,
|
||||
char_a_add, char_a_add, char_a_add, char_a_add, char_a_add, char_a_add, char_a_add, char_a_add
|
||||
}};
|
||||
#endif
|
||||
template< bool IsCharASCIICompatible >
|
||||
const simd_vector128< std::uint8_t > to_chars_simd_char_constants< wchar_t, IsCharASCIICompatible, false >::mm_char_dash =
|
||||
{{
|
||||
@@ -127,25 +196,55 @@ template< typename >
|
||||
struct to_chars_simd_constants
|
||||
{
|
||||
static const simd_vector128< std::uint8_t > mm_0F;
|
||||
#if defined(BOOST_UUID_USE_SSSE3)
|
||||
static const simd_vector128< std::uint8_t > mm_shuffle_pattern1;
|
||||
static const simd_vector128< std::uint8_t > mm_shuffle_pattern2;
|
||||
#else
|
||||
static const simd_vector128< std::uint8_t > mm_9;
|
||||
static const simd_vector128< std::uint8_t > mm_group1_mask;
|
||||
static const simd_vector128< std::uint8_t > mm_group2_mask;
|
||||
static const simd_vector128< std::uint8_t > mm_group3_mask;
|
||||
#endif
|
||||
};
|
||||
|
||||
template< typename T >
|
||||
const simd_vector128< std::uint8_t > to_chars_simd_constants< T >::mm_0F =
|
||||
{{ 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F }};
|
||||
#if defined(BOOST_UUID_USE_SSSE3)
|
||||
template< typename T >
|
||||
const simd_vector128< std::uint8_t > to_chars_simd_constants< T >::mm_shuffle_pattern1 =
|
||||
{{ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x80, 0x08, 0x09, 0x0A, 0x0B, 0x80, 0x0C, 0x0D }};
|
||||
template< typename T >
|
||||
const simd_vector128< std::uint8_t > to_chars_simd_constants< T >::mm_shuffle_pattern2 =
|
||||
{{ 0x00, 0x01, 0x80, 0x02, 0x03, 0x04, 0x05, 0x80, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D }};
|
||||
#else
|
||||
template< typename T >
|
||||
const simd_vector128< std::uint8_t > to_chars_simd_constants< T >::mm_9 =
|
||||
{{ 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09 }};
|
||||
template< typename T >
|
||||
const simd_vector128< std::uint8_t > to_chars_simd_constants< T >::mm_group1_mask =
|
||||
{{ 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }};
|
||||
template< typename T >
|
||||
const simd_vector128< std::uint8_t > to_chars_simd_constants< T >::mm_group2_mask =
|
||||
{{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }};
|
||||
template< typename T >
|
||||
const simd_vector128< std::uint8_t > to_chars_simd_constants< T >::mm_group3_mask =
|
||||
{{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0x00 }};
|
||||
#endif
|
||||
|
||||
//! Converts UUID to a string of 36 characters, where first 32 craracters are returned in mm_chars1 and mm_chars2 and the last 4 in the highest 32 bits of mm_chars3
|
||||
/*!
|
||||
* Converts UUID to a string of 36 characters, where the first 32 characters are returned in mm_chars1 and mm_chars2.
|
||||
* When SSSE3 is supported, last 4 characters are returned in the highest 32 bits of mm_chars3, otherwise in the lowest 32 bits.
|
||||
*/
|
||||
BOOST_FORCEINLINE void to_chars_simd_core
|
||||
(
|
||||
const std::uint8_t* data,
|
||||
__m128i const& mm_char_table, __m128i const& mm_char_dash,
|
||||
#if defined(BOOST_UUID_USE_SSSE3)
|
||||
__m128i const& mm_char_table,
|
||||
#else
|
||||
__m128i const& mm_char_0_add, __m128i const& mm_char_a_add,
|
||||
#endif
|
||||
__m128i const& mm_char_dash,
|
||||
__m128i& mm_chars1, __m128i& mm_chars2, __m128i& mm_chars3
|
||||
) noexcept
|
||||
{
|
||||
@@ -154,18 +253,31 @@ BOOST_FORCEINLINE void to_chars_simd_core
|
||||
__m128i mm_input = _mm_loadu_si128(reinterpret_cast< const __m128i* >(data));
|
||||
|
||||
// Split half-bytes
|
||||
__m128i const& mm_0F = constants::mm_0F;
|
||||
__m128i mm_input_hi = _mm_and_si128(_mm_srli_epi32(mm_input, 4), mm_0F);
|
||||
__m128i mm_input_lo = _mm_and_si128(mm_input, mm_0F);
|
||||
__m128i mm_input_hi = _mm_and_si128(_mm_srli_epi32(mm_input, 4), constants::mm_0F);
|
||||
__m128i mm_input_lo = _mm_and_si128(mm_input, constants::mm_0F);
|
||||
|
||||
// Stringize each of the halves
|
||||
#if defined(BOOST_UUID_USE_SSSE3)
|
||||
mm_input_hi = _mm_shuffle_epi8(mm_char_table, mm_input_hi);
|
||||
mm_input_lo = _mm_shuffle_epi8(mm_char_table, mm_input_lo);
|
||||
#else
|
||||
{
|
||||
__m128i mm_add_mask_hi = _mm_cmpgt_epi8(mm_input_hi, constants::mm_9);
|
||||
__m128i mm_add_mask_lo = _mm_cmpgt_epi8(mm_input_lo, constants::mm_9);
|
||||
|
||||
__m128i mm_add_hi = _mm_add_epi8(mm_char_0_add, _mm_and_si128(mm_add_mask_hi, mm_char_a_add));
|
||||
__m128i mm_add_lo = _mm_add_epi8(mm_char_0_add, _mm_and_si128(mm_add_mask_lo, mm_char_a_add));
|
||||
|
||||
mm_input_hi = _mm_add_epi8(mm_input_hi, mm_add_hi);
|
||||
mm_input_lo = _mm_add_epi8(mm_input_lo, mm_add_lo);
|
||||
}
|
||||
#endif
|
||||
|
||||
// Join them back together
|
||||
__m128i mm_1 = _mm_unpacklo_epi8(mm_input_hi, mm_input_lo);
|
||||
__m128i mm_2 = _mm_unpackhi_epi8(mm_input_hi, mm_input_lo);
|
||||
|
||||
#if defined(BOOST_UUID_USE_SSSE3)
|
||||
// Insert dashes at positions 8, 13, 18 and 23
|
||||
// mm_1 mm_2
|
||||
// |0123456789abcdef|0123456789abcdef|
|
||||
@@ -178,6 +290,32 @@ BOOST_FORCEINLINE void to_chars_simd_core
|
||||
mm_chars1 = _mm_max_epu8(mm_chars1, mm_char_dash);
|
||||
mm_chars2 = _mm_max_epu8(mm_chars2, mm_char_dash);
|
||||
mm_chars3 = mm_2;
|
||||
#else
|
||||
// Split groups of characters between dashes and shift them into their places
|
||||
// mm_middle: |89abcdef01234567|
|
||||
// mm_group1: |Z89abZZZZZZZZZZZ|
|
||||
// mm_group2: |ZZZZZZcdefZZZZZZ|
|
||||
// mm_group3: |ZZZZZZZZZZZ0123Z|
|
||||
__m128i mm_middle = _mm_castpd_si128(_mm_shuffle_pd(_mm_castsi128_pd(mm_1), _mm_castsi128_pd(mm_2), _MM_SHUFFLE2(0, 1)));
|
||||
__m128i mm_group1 = _mm_slli_epi64(mm_middle, 8);
|
||||
__m128i mm_group2 = _mm_slli_si128(mm_middle, 2);
|
||||
__m128i mm_group3 = _mm_slli_epi64(mm_middle, 24);
|
||||
mm_group1 = _mm_and_si128(mm_group1, constants::mm_group1_mask);
|
||||
mm_group2 = _mm_and_si128(mm_group2, constants::mm_group2_mask);
|
||||
mm_group3 = _mm_and_si128(mm_group3, constants::mm_group3_mask);
|
||||
|
||||
// Merge them back and insert dashes
|
||||
// mm_middle: |-89ab-cdef-0123-|
|
||||
mm_middle = _mm_or_si128(_mm_or_si128(mm_group1, mm_group2), mm_group3);
|
||||
mm_middle = _mm_max_epu8(mm_middle, mm_char_dash);
|
||||
|
||||
// mm_2: |cdef0123456789ab|
|
||||
mm_2 = _mm_shuffle_epi32(mm_2, _MM_SHUFFLE(2, 1, 0, 3));
|
||||
|
||||
mm_chars1 = _mm_unpacklo_epi64(mm_1, mm_middle);
|
||||
mm_chars2 = _mm_unpackhi_epi64(mm_middle, mm_2);
|
||||
mm_chars3 = mm_2;
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined(BOOST_MSVC)
|
||||
@@ -195,7 +333,12 @@ BOOST_FORCEINLINE Char* to_chars_simd(uuid const& u, Char* out) noexcept
|
||||
uuids::detail::to_chars_simd_core
|
||||
(
|
||||
u.data(),
|
||||
#if defined(BOOST_UUID_USE_SSSE3)
|
||||
char_constants::mm_char_table,
|
||||
#else
|
||||
char_constants::mm_char_0_add,
|
||||
char_constants::mm_char_a_add,
|
||||
#endif
|
||||
char_constants::mm_char_dash,
|
||||
mm_chars1, mm_chars2, mm_chars3
|
||||
);
|
||||
@@ -205,11 +348,17 @@ BOOST_FORCEINLINE Char* to_chars_simd(uuid const& u, Char* out) noexcept
|
||||
{
|
||||
_mm_storeu_si128(reinterpret_cast< __m128i* >(out), mm_chars1);
|
||||
_mm_storeu_si128(reinterpret_cast< __m128i* >(out + 16), mm_chars2);
|
||||
detail::store_native_u32
|
||||
(
|
||||
out + 32,
|
||||
#if defined(BOOST_UUID_USE_SSE41)
|
||||
detail::store_native_u32(out + 32, static_cast< std::uint32_t >(_mm_extract_epi32(mm_chars3, 3)));
|
||||
static_cast< std::uint32_t >(_mm_extract_epi32(mm_chars3, 3))
|
||||
#elif defined(BOOST_UUID_USE_SSSE3)
|
||||
static_cast< std::uint32_t >(_mm_cvtsi128_si32(_mm_srli_si128(mm_chars3, 12)))
|
||||
#else
|
||||
detail::store_native_u32(out + 32, static_cast< std::uint32_t >(_mm_cvtsi128_si32(_mm_srli_si128(mm_chars3, 12))));
|
||||
static_cast< std::uint32_t >(_mm_cvtsi128_si32(mm_chars3))
|
||||
#endif
|
||||
);
|
||||
}
|
||||
else BOOST_IF_CONSTEXPR (sizeof(Char) == 2u)
|
||||
{
|
||||
@@ -225,8 +374,10 @@ BOOST_FORCEINLINE Char* to_chars_simd(uuid const& u, Char* out) noexcept
|
||||
#endif
|
||||
#if defined(BOOST_UUID_USE_SSE41) && (defined(__x86_64__) || defined(_M_X64))
|
||||
detail::store_native_u64(out + 32, static_cast< std::uint64_t >(_mm_extract_epi64(_mm_unpackhi_epi8(mm_chars3, mm_0), 1)));
|
||||
#else
|
||||
#elif defined(BOOST_UUID_USE_SSSE3)
|
||||
_mm_storeh_pd(reinterpret_cast< BOOST_MAY_ALIAS double* >(out + 32), _mm_castsi128_pd(_mm_unpackhi_epi8(mm_chars3, mm_0)));
|
||||
#else
|
||||
_mm_storel_epi64(reinterpret_cast< __m128i* >(out + 32), _mm_unpacklo_epi8(mm_chars3, mm_0));
|
||||
#endif
|
||||
}
|
||||
else
|
||||
@@ -255,7 +406,15 @@ BOOST_FORCEINLINE Char* to_chars_simd(uuid const& u, Char* out) noexcept
|
||||
_mm_storeu_si128(reinterpret_cast< __m128i* >(out + 24), _mm_unpacklo_epi16(mm, mm_0));
|
||||
_mm_storeu_si128(reinterpret_cast< __m128i* >(out + 28), _mm_unpackhi_epi16(mm, mm_0));
|
||||
#endif
|
||||
_mm_storeu_si128(reinterpret_cast< __m128i* >(out + 32), _mm_unpackhi_epi16(_mm_unpackhi_epi8(mm_chars3, mm_0), mm_0));
|
||||
_mm_storeu_si128
|
||||
(
|
||||
reinterpret_cast< __m128i* >(out + 32),
|
||||
#if defined(BOOST_UUID_USE_SSSE3)
|
||||
_mm_unpackhi_epi16(_mm_unpackhi_epi8(mm_chars3, mm_0), mm_0)
|
||||
#else
|
||||
_mm_unpacklo_epi16(_mm_unpacklo_epi8(mm_chars3, mm_0), mm_0)
|
||||
#endif
|
||||
);
|
||||
}
|
||||
|
||||
return out + 36;
|
||||
@@ -269,6 +428,6 @@ BOOST_FORCEINLINE Char* to_chars_simd(uuid const& u, Char* out) noexcept
|
||||
} // namespace uuids
|
||||
} // namespace boost
|
||||
|
||||
#endif // defined(BOOST_UUID_USE_SSSE3)
|
||||
#endif // defined(BOOST_UUID_USE_SSE2)
|
||||
|
||||
#endif // BOOST_UUID_DETAIL_TO_CHARS_X86_HPP_INCLUDED
|
||||
|
||||
Reference in New Issue
Block a user