mirror of
https://github.com/boostorg/uuid.git
synced 2026-01-19 04:42:16 +00:00
Added x86 SIMD implementation of from_chars.
This adds SSE4.1, AVX2, AVX-512v1 and AVX10.1 implementations of the from_chars algorithm. The generic implementation is moved to its own header and constexpr is relaxed to only enabled when is_constant_evaluated is supported. The performance effect on Intel Golden Cove (Core i7-12700K), gcc 13.3, in millions of successful from_chars() calls per second: Char | Generic | SSE4.1 | AVX2 | AVX512v1 | AVX10.1 =========+=========+=================+=================+=================+================ char | 38.571 | 560.645 (14.5x) | 501.505 (13.0x) | 540.038 (14.0x) | 480.778 (12.5x) char16_t | 37.998 | 479.308 (12.6x) | 425.728 (11.2x) | 416.379 (11.0x) | 392.326 (10.3x) char32_t | 50.327 | 391.313 (7.78x) | 359.312 (7.14x) | 349.849 (6.95x) | 333.979 (6.64x) The AVX2 version is slightly slower than SSE4.1 because on Intel microarchitectures the VEX-coded vpblendvb instruction is slower than the legacy SSE4.1 pblendvb. The code contains workarounds for this, which have slight performance overhead compared to SSE4.1 version, but are still faster than using vpblendvb. Alternatively, the performance could be improved by using asm blocks to force using pblendvb in AVX2 code, but this may potentially cause SSE/AVX transition penalties if the target vector register happens to have "dirty" upper bits. There's no way to ensure this doesn't happen, so this is not implemented. AVX512v1 claws back some performance and uses less instructions (i.e. smaller code size). The AVX10.1 version is slower as it uses vpermi2b instruction from AVX512_VBMI, which is relatively slow on Intel. It allows for reducing the number of instructions even further and the number of vector constants as well. The instruction is faster on AMD Zen 4 and should offer better performance compared to AVX512v1 code path, although it wasn't tested. This code path is disabled by default, unless BOOST_UUID_FROM_CHARS_X86_USE_VPERMI2B is defined, which can be used to test and tune performance on AMD and newer Intel CPU microarchitectures. Thus, by default, AVX10.1 performance should be roughly equivalent to AVX512v1, barring compiler (mis)optimizations. The unsuccessful parsing case depends on where the error happens, as the generic version may terminate sooner if the error is detected at the beginning of the input string, while the SIMD version performs roughly the same amount of work but faster. Here are some examples for 8-bit character types (for larger types the numbers are more or less comparable): Error | Generic | SSE4.1 | AVX2 | AVX512v1 | AVX10.1 ===================+==========+=================+=================+=================+================ EOI at 35 chars | 43.629 | 356.562 (8.17x) | 326.311 (7.48x) | 322.377 (7.39x) | 308.155 (7.06x) EOI at 1 char | 2645.783 | 444.769 (0.17x) | 400.275 (0.15x) | 404.826 (0.15x) | 403.730 (0.15x) Missing dash at 23 | 73.878 | 514.303 (6.96x) | 474.694 (6.43x) | 507.949 (6.88x) | 474.077 (6.42x) Missing dash at 8 | 223.921 | 516.641 (2.31x) | 472.737 (2.11x) | 506.242 (2.26x) | 473.718 (2.12x) Illegal char at 35 | 47.373 | 368.002 (7.77x) | 333.233 (7.03x) | 318.242 (6.72x) | 301.659 (6.37x) Illegal char at 0 | 1729.087 | 421.511 (0.24x) | 385.217 (0.22x) | 374.047 (0.22x) | 351.944 (0.20x) The above table is collected with BOOST_UUID_FROM_CHARS_X86_USE_VPERMI2B defined. In general, only the very early errors tend to perform worse in the SIMD version and the majority of cases are still faster. Besides BOOST_UUID_FROM_CHARS_X86_USE_VPERMI2B, the implementation also has BOOST_UUID_TO_FROM_CHARS_X86_USE_ZMM control macro, which, if defined, enables usage of 512-bit registers for convertting from 32-bit character types to 8-bit integers. This code path is also slower than the 256-bit path on Golden Cove, and therefore is disabled. The macro is provided primarily to allow for tuning and experimentation with newer CPU microarchitectures, where the 512-bit path may become beneficial. All of the above performance numbers were produced without it.
This commit is contained in:
@@ -6,149 +6,33 @@
|
||||
// https://www.boost.org/LICENSE_1_0.txt
|
||||
|
||||
#include <boost/uuid/uuid.hpp>
|
||||
#include <boost/uuid/detail/config.hpp>
|
||||
#include <boost/uuid/detail/is_constant_evaluated.hpp>
|
||||
#include <boost/uuid/detail/from_chars_result.hpp>
|
||||
#include <boost/config.hpp>
|
||||
#include <boost/uuid/detail/from_chars_generic.hpp>
|
||||
#if defined(BOOST_UUID_USE_SSE41)
|
||||
#include <boost/uuid/detail/from_chars_x86.hpp>
|
||||
#endif
|
||||
|
||||
namespace boost {
|
||||
namespace uuids {
|
||||
namespace detail {
|
||||
|
||||
// 0-9, A-F, a-f are consecutive in both ASCII and EBCDIC
|
||||
|
||||
constexpr char const* from_chars_digits( char const* ) noexcept
|
||||
{
|
||||
return "09AFaf-{}";
|
||||
}
|
||||
|
||||
constexpr wchar_t const* from_chars_digits( wchar_t const* ) noexcept
|
||||
{
|
||||
return L"09AFaf-{}";
|
||||
}
|
||||
|
||||
constexpr char16_t const* from_chars_digits( char16_t const* ) noexcept
|
||||
{
|
||||
return u"09AFaf-{}";
|
||||
}
|
||||
|
||||
constexpr char32_t const* from_chars_digits( char32_t const* ) noexcept
|
||||
{
|
||||
return U"09AFaf-{}";
|
||||
}
|
||||
|
||||
#if defined(__cpp_char8_t) && __cpp_char8_t >= 201811L
|
||||
|
||||
constexpr char8_t const* from_chars_digits( char8_t const* ) noexcept
|
||||
{
|
||||
return u8"09AFaf-{}";
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
template<class Ch>
|
||||
BOOST_CXX14_CONSTEXPR inline
|
||||
unsigned char from_chars_digit_value( Ch ch ) noexcept
|
||||
{
|
||||
constexpr Ch const* digits = detail::from_chars_digits( static_cast<Ch const*>( nullptr ) );
|
||||
|
||||
if( ch >= digits[ 0 ] && ch <= digits[ 1 ] )
|
||||
{
|
||||
return static_cast<unsigned char>( ch - digits[ 0 ] );
|
||||
}
|
||||
|
||||
if( ch >= digits[ 2 ] && ch <= digits[ 3 ] )
|
||||
{
|
||||
return static_cast<unsigned char>( ch - digits[ 2 ] + 10 );
|
||||
}
|
||||
|
||||
if( ch >= digits[ 4 ] && ch <= digits[ 5 ] )
|
||||
{
|
||||
return static_cast<unsigned char>( ch - digits[ 4 ] + 10 );
|
||||
}
|
||||
|
||||
return 255;
|
||||
}
|
||||
|
||||
template<class Ch>
|
||||
BOOST_CXX14_CONSTEXPR inline
|
||||
bool from_chars_is_dash( Ch ch ) noexcept
|
||||
{
|
||||
constexpr Ch const* digits = detail::from_chars_digits( static_cast<Ch const*>( nullptr ) );
|
||||
return ch == digits[ 6 ];
|
||||
}
|
||||
|
||||
template<class Ch>
|
||||
BOOST_CXX14_CONSTEXPR inline
|
||||
bool from_chars_is_opening_brace( Ch ch ) noexcept
|
||||
{
|
||||
constexpr Ch const* digits = detail::from_chars_digits( static_cast<Ch const*>( nullptr ) );
|
||||
return ch == digits[ 7 ];
|
||||
}
|
||||
|
||||
template<class Ch>
|
||||
BOOST_CXX14_CONSTEXPR inline
|
||||
bool from_chars_is_closing_brace( Ch ch ) noexcept
|
||||
{
|
||||
constexpr Ch const* digits = detail::from_chars_digits( static_cast<Ch const*>( nullptr ) );
|
||||
return ch == digits[ 8 ];
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
|
||||
template<class Ch>
|
||||
BOOST_CXX14_CONSTEXPR inline
|
||||
BOOST_UUID_CXX14_CONSTEXPR_RT inline
|
||||
from_chars_result<Ch> from_chars( Ch const* first, Ch const* last, uuid& u ) noexcept
|
||||
{
|
||||
u = {};
|
||||
|
||||
for( std::size_t i = 0; i < 16; ++i )
|
||||
#if defined(BOOST_UUID_USE_SSE41)
|
||||
if( detail::is_constant_evaluated_rt() )
|
||||
{
|
||||
if( first == last )
|
||||
{
|
||||
return { first, from_chars_error::unexpected_end_of_input };
|
||||
}
|
||||
|
||||
unsigned char v1 = detail::from_chars_digit_value( *first );
|
||||
|
||||
if( v1 == 255 )
|
||||
{
|
||||
return { first, from_chars_error::hex_digit_expected };
|
||||
}
|
||||
|
||||
++first;
|
||||
|
||||
if( first == last )
|
||||
{
|
||||
return { first, from_chars_error::unexpected_end_of_input };
|
||||
}
|
||||
|
||||
unsigned char v2 = detail::from_chars_digit_value( *first );
|
||||
|
||||
if( v2 == 255 )
|
||||
{
|
||||
return { first, from_chars_error::hex_digit_expected };
|
||||
}
|
||||
|
||||
++first;
|
||||
|
||||
u.data()[ i ] = static_cast<unsigned char>( ( v1 << 4 ) + v2 );
|
||||
|
||||
if( i == 3 || i == 5 || i == 7 || i == 9 )
|
||||
{
|
||||
if( first == last )
|
||||
{
|
||||
return { first, from_chars_error::unexpected_end_of_input };
|
||||
}
|
||||
|
||||
if( !detail::from_chars_is_dash( *first ) )
|
||||
{
|
||||
return { first, from_chars_error::dash_expected };
|
||||
}
|
||||
|
||||
++first;
|
||||
}
|
||||
return detail::from_chars_generic( first, last, u );
|
||||
}
|
||||
|
||||
return { first, from_chars_error::none };
|
||||
else
|
||||
{
|
||||
return detail::from_chars_simd( first, last, u );
|
||||
}
|
||||
#else
|
||||
return detail::from_chars_generic( first, last, u );
|
||||
#endif
|
||||
}
|
||||
|
||||
}} //namespace boost::uuids
|
||||
|
||||
162
include/boost/uuid/detail/from_chars_generic.hpp
Normal file
162
include/boost/uuid/detail/from_chars_generic.hpp
Normal file
@@ -0,0 +1,162 @@
|
||||
#ifndef BOOST_UUID_DETAIL_FROM_CHARS_GENERIC_HPP_INCLUDED
|
||||
#define BOOST_UUID_DETAIL_FROM_CHARS_GENERIC_HPP_INCLUDED
|
||||
|
||||
// Copyright 2025 Peter Dimov
|
||||
// Distributed under the Boost Software License, Version 1.0.
|
||||
// https://www.boost.org/LICENSE_1_0.txt
|
||||
|
||||
#include <cstddef>
|
||||
#include <boost/uuid/uuid.hpp>
|
||||
#include <boost/uuid/detail/from_chars_result.hpp>
|
||||
#include <boost/config.hpp>
|
||||
|
||||
#if defined(BOOST_UUID_REPORT_IMPLEMENTATION)
|
||||
|
||||
#include <boost/config/pragma_message.hpp>
|
||||
BOOST_PRAGMA_MESSAGE( "Using from_chars_generic.hpp" )
|
||||
|
||||
#endif
|
||||
|
||||
namespace boost {
|
||||
namespace uuids {
|
||||
namespace detail {
|
||||
|
||||
// 0-9, A-F, a-f are consecutive in both ASCII and EBCDIC
|
||||
|
||||
constexpr char const* from_chars_digits( char const* ) noexcept
|
||||
{
|
||||
return "09AFaf-{}";
|
||||
}
|
||||
|
||||
constexpr wchar_t const* from_chars_digits( wchar_t const* ) noexcept
|
||||
{
|
||||
return L"09AFaf-{}";
|
||||
}
|
||||
|
||||
constexpr char16_t const* from_chars_digits( char16_t const* ) noexcept
|
||||
{
|
||||
return u"09AFaf-{}";
|
||||
}
|
||||
|
||||
constexpr char32_t const* from_chars_digits( char32_t const* ) noexcept
|
||||
{
|
||||
return U"09AFaf-{}";
|
||||
}
|
||||
|
||||
#if defined(__cpp_char8_t) && __cpp_char8_t >= 201811L
|
||||
|
||||
constexpr char8_t const* from_chars_digits( char8_t const* ) noexcept
|
||||
{
|
||||
return u8"09AFaf-{}";
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
template<class Ch>
|
||||
BOOST_CXX14_CONSTEXPR inline
|
||||
unsigned char from_chars_digit_value( Ch ch ) noexcept
|
||||
{
|
||||
constexpr Ch const* digits = detail::from_chars_digits( static_cast<Ch const*>( nullptr ) );
|
||||
|
||||
if( ch >= digits[ 0 ] && ch <= digits[ 1 ] )
|
||||
{
|
||||
return static_cast<unsigned char>( ch - digits[ 0 ] );
|
||||
}
|
||||
|
||||
if( ch >= digits[ 2 ] && ch <= digits[ 3 ] )
|
||||
{
|
||||
return static_cast<unsigned char>( ch - digits[ 2 ] + 10 );
|
||||
}
|
||||
|
||||
if( ch >= digits[ 4 ] && ch <= digits[ 5 ] )
|
||||
{
|
||||
return static_cast<unsigned char>( ch - digits[ 4 ] + 10 );
|
||||
}
|
||||
|
||||
return 255;
|
||||
}
|
||||
|
||||
template<class Ch>
|
||||
BOOST_CXX14_CONSTEXPR inline
|
||||
bool from_chars_is_dash( Ch ch ) noexcept
|
||||
{
|
||||
constexpr Ch const* digits = detail::from_chars_digits( static_cast<Ch const*>( nullptr ) );
|
||||
return ch == digits[ 6 ];
|
||||
}
|
||||
|
||||
template<class Ch>
|
||||
BOOST_CXX14_CONSTEXPR inline
|
||||
bool from_chars_is_opening_brace( Ch ch ) noexcept
|
||||
{
|
||||
constexpr Ch const* digits = detail::from_chars_digits( static_cast<Ch const*>( nullptr ) );
|
||||
return ch == digits[ 7 ];
|
||||
}
|
||||
|
||||
template<class Ch>
|
||||
BOOST_CXX14_CONSTEXPR inline
|
||||
bool from_chars_is_closing_brace( Ch ch ) noexcept
|
||||
{
|
||||
constexpr Ch const* digits = detail::from_chars_digits( static_cast<Ch const*>( nullptr ) );
|
||||
return ch == digits[ 8 ];
|
||||
}
|
||||
|
||||
template<class Ch>
|
||||
BOOST_CXX14_CONSTEXPR inline
|
||||
from_chars_result<Ch> from_chars_generic( Ch const* first, Ch const* last, uuid& u ) noexcept
|
||||
{
|
||||
u = {};
|
||||
|
||||
for( std::size_t i = 0; i < 16; ++i )
|
||||
{
|
||||
if( first == last )
|
||||
{
|
||||
return { first, from_chars_error::unexpected_end_of_input };
|
||||
}
|
||||
|
||||
unsigned char v1 = detail::from_chars_digit_value( *first );
|
||||
|
||||
if( v1 == 255 )
|
||||
{
|
||||
return { first, from_chars_error::hex_digit_expected };
|
||||
}
|
||||
|
||||
++first;
|
||||
|
||||
if( first == last )
|
||||
{
|
||||
return { first, from_chars_error::unexpected_end_of_input };
|
||||
}
|
||||
|
||||
unsigned char v2 = detail::from_chars_digit_value( *first );
|
||||
|
||||
if( v2 == 255 )
|
||||
{
|
||||
return { first, from_chars_error::hex_digit_expected };
|
||||
}
|
||||
|
||||
++first;
|
||||
|
||||
u.data()[ i ] = static_cast<unsigned char>( ( v1 << 4 ) + v2 );
|
||||
|
||||
if( i == 3 || i == 5 || i == 7 || i == 9 )
|
||||
{
|
||||
if( first == last )
|
||||
{
|
||||
return { first, from_chars_error::unexpected_end_of_input };
|
||||
}
|
||||
|
||||
if( !detail::from_chars_is_dash( *first ) )
|
||||
{
|
||||
return { first, from_chars_error::dash_expected };
|
||||
}
|
||||
|
||||
++first;
|
||||
}
|
||||
}
|
||||
|
||||
return { first, from_chars_error::none };
|
||||
}
|
||||
|
||||
}}} //namespace boost::uuids::detail
|
||||
|
||||
#endif // BOOST_UUID_DETAIL_TO_CHARS_GENERIC_HPP_INCLUDED
|
||||
951
include/boost/uuid/detail/from_chars_x86.hpp
Normal file
951
include/boost/uuid/detail/from_chars_x86.hpp
Normal file
@@ -0,0 +1,951 @@
|
||||
#ifndef BOOST_UUID_DETAIL_FROM_CHARS_X86_HPP_INCLUDED
|
||||
#define BOOST_UUID_DETAIL_FROM_CHARS_X86_HPP_INCLUDED
|
||||
|
||||
// Copyright 2025 Andrey Semashev
|
||||
// Distributed under the Boost Software License, Version 1.0.
|
||||
// https://www.boost.org/LICENSE_1_0.txt
|
||||
|
||||
#include <boost/uuid/detail/config.hpp>
|
||||
|
||||
#if defined(BOOST_UUID_USE_SSE41)
|
||||
|
||||
#include <cstdint>
|
||||
#include <cstring>
|
||||
#include <boost/uuid/uuid.hpp>
|
||||
#include <boost/uuid/detail/from_chars_result.hpp>
|
||||
#include <boost/uuid/detail/simd_vector.hpp>
|
||||
|
||||
#if defined(BOOST_UUID_REPORT_IMPLEMENTATION)
|
||||
#include <boost/config/pragma_message.hpp>
|
||||
|
||||
#if defined(BOOST_UUID_USE_AVX10_1)
|
||||
BOOST_PRAGMA_MESSAGE( "Using from_chars_x86.hpp, AVX10.1" )
|
||||
|
||||
#elif defined(BOOST_UUID_USE_AVX512_V1)
|
||||
BOOST_PRAGMA_MESSAGE( "Using from_chars_x86.hpp, AVX512v1" )
|
||||
|
||||
#elif defined(BOOST_UUID_USE_AVX)
|
||||
BOOST_PRAGMA_MESSAGE( "Using from_chars_x86.hpp, AVX" )
|
||||
|
||||
#else
|
||||
BOOST_PRAGMA_MESSAGE( "Using from_chars_x86.hpp, SSE4.1" )
|
||||
|
||||
#endif
|
||||
#endif // #if defined(BOOST_UUID_REPORT_IMPLEMENTATION)
|
||||
|
||||
#if defined(BOOST_UUID_USE_AVX)
|
||||
#include <immintrin.h>
|
||||
#else
|
||||
#include <smmintrin.h>
|
||||
#endif
|
||||
#if defined(_MSC_VER) && !defined(__clang__)
|
||||
#include <intrin.h>
|
||||
#pragma intrinsic(_BitScanForward)
|
||||
#endif
|
||||
|
||||
namespace boost {
|
||||
namespace uuids {
|
||||
namespace detail {
|
||||
|
||||
//! Returns the number of trailing zero bits in a non-zero input integer
|
||||
BOOST_FORCEINLINE std::uint32_t countr_zero_nz(std::uint32_t n) noexcept
|
||||
{
|
||||
#if defined(__GNUC__) || defined(__clang__)
|
||||
return __builtin_ctz(n);
|
||||
#elif defined(_MSC_VER) && !defined(__clang__)
|
||||
unsigned long index;
|
||||
_BitScanForward(&index, n);
|
||||
return static_cast< std::uint32_t >(index);
|
||||
#else
|
||||
std::uint32_t index = 0u;
|
||||
if ((n & 0xFFFF) == 0u)
|
||||
{
|
||||
n >>= 16u;
|
||||
index += 16u;
|
||||
}
|
||||
if ((n & 0xFF) == 0u)
|
||||
{
|
||||
n >>= 8u;
|
||||
index += 8u;
|
||||
}
|
||||
if ((n & 0xF) == 0u)
|
||||
{
|
||||
n >>= 4u;
|
||||
index += 4u;
|
||||
}
|
||||
if ((n & 0x3) == 0u)
|
||||
{
|
||||
n >>= 2u;
|
||||
index += 2u;
|
||||
}
|
||||
if ((n & 0x1) == 0u)
|
||||
{
|
||||
index += 1u;
|
||||
}
|
||||
return index;
|
||||
#endif
|
||||
}
|
||||
|
||||
template<
|
||||
typename Char,
|
||||
bool IsCharASCIICompatible = ('0' == 0x30 && '9' == 0x39 && 'A' == 0x41 && 'F' == 0x46 && 'a' == 0x61 && 'f' == 0x66 && '-' == 0x2D),
|
||||
bool IsWCharASCIICompatible = (L'0' == 0x30 && L'9' == 0x39 && L'A' == 0x41 && L'F' == 0x46 && L'a' == 0x61 && L'f' == 0x66 && L'-' == 0x2D)
|
||||
>
|
||||
struct from_chars_simd_char_constants
|
||||
{
|
||||
static const simd_vector128< std::uint8_t > mm_expected_dashes;
|
||||
|
||||
static constexpr std::uint8_t char_code2 = 0x61; // 'a' in ASCII
|
||||
static constexpr std::uint8_t char_code2_sub = static_cast< std::uint8_t >(char_code2 - 10u);
|
||||
static constexpr std::uint8_t char_code1 = 0x41; // 'A' in ASCII
|
||||
static constexpr std::uint8_t char_code1_sub = static_cast< std::uint8_t >(char_code1 - 10u);
|
||||
static constexpr std::uint8_t char_code0 = 0x30; // '0' in ASCII
|
||||
static constexpr std::uint8_t char_code0_sub = char_code0;
|
||||
|
||||
static constexpr std::uint32_t char_code_sub =
|
||||
(static_cast< std::uint32_t >(char_code0_sub) << 16u) | (static_cast< std::uint32_t >(char_code1_sub) << 8u) | char_code2_sub;
|
||||
|
||||
static const simd_vector128< std::uint8_t > mm_char_code2_cmp;
|
||||
static const simd_vector128< std::uint8_t > mm_char_code1_cmp;
|
||||
|
||||
#if defined(BOOST_UUID_USE_AVX512_V1) || !defined(BOOST_UUID_USE_AVX)
|
||||
static const simd_vector128< std::uint8_t > mm_char_code2_sub;
|
||||
static const simd_vector128< std::uint8_t > mm_char_code1_sub;
|
||||
static const simd_vector128< std::uint8_t > mm_char_code0_sub;
|
||||
#endif // defined(BOOST_UUID_USE_AVX512_V1) || !defined(BOOST_UUID_USE_AVX)
|
||||
};
|
||||
|
||||
template< typename Char, bool IsCharASCIICompatible, bool IsWCharASCIICompatible >
|
||||
const simd_vector128< std::uint8_t > from_chars_simd_char_constants< Char, IsCharASCIICompatible, IsWCharASCIICompatible >::mm_expected_dashes =
|
||||
{{ 0x2D, 0x00, 0x00, 0x00, 0x00, 0x2D, 0x00, 0x00, 0x00, 0x00, 0x2D, 0x00, 0x00, 0x00, 0x00, 0x2D }}; // 0x2D is '-' in ASCII
|
||||
|
||||
template< typename Char, bool IsCharASCIICompatible, bool IsWCharASCIICompatible >
|
||||
const simd_vector128< std::uint8_t > from_chars_simd_char_constants< Char, IsCharASCIICompatible, IsWCharASCIICompatible >::mm_char_code2_cmp =
|
||||
{{
|
||||
static_cast< std::uint8_t >(char_code2 - 1u), static_cast< std::uint8_t >(char_code2 - 1u),
|
||||
static_cast< std::uint8_t >(char_code2 - 1u), static_cast< std::uint8_t >(char_code2 - 1u),
|
||||
static_cast< std::uint8_t >(char_code2 - 1u), static_cast< std::uint8_t >(char_code2 - 1u),
|
||||
static_cast< std::uint8_t >(char_code2 - 1u), static_cast< std::uint8_t >(char_code2 - 1u),
|
||||
static_cast< std::uint8_t >(char_code2 - 1u), static_cast< std::uint8_t >(char_code2 - 1u),
|
||||
static_cast< std::uint8_t >(char_code2 - 1u), static_cast< std::uint8_t >(char_code2 - 1u),
|
||||
static_cast< std::uint8_t >(char_code2 - 1u), static_cast< std::uint8_t >(char_code2 - 1u),
|
||||
static_cast< std::uint8_t >(char_code2 - 1u), static_cast< std::uint8_t >(char_code2 - 1u)
|
||||
}};
|
||||
|
||||
template< typename Char, bool IsCharASCIICompatible, bool IsWCharASCIICompatible >
|
||||
const simd_vector128< std::uint8_t > from_chars_simd_char_constants< Char, IsCharASCIICompatible, IsWCharASCIICompatible >::mm_char_code1_cmp =
|
||||
{{
|
||||
static_cast< std::uint8_t >(char_code1 - 1u), static_cast< std::uint8_t >(char_code1 - 1u),
|
||||
static_cast< std::uint8_t >(char_code1 - 1u), static_cast< std::uint8_t >(char_code1 - 1u),
|
||||
static_cast< std::uint8_t >(char_code1 - 1u), static_cast< std::uint8_t >(char_code1 - 1u),
|
||||
static_cast< std::uint8_t >(char_code1 - 1u), static_cast< std::uint8_t >(char_code1 - 1u),
|
||||
static_cast< std::uint8_t >(char_code1 - 1u), static_cast< std::uint8_t >(char_code1 - 1u),
|
||||
static_cast< std::uint8_t >(char_code1 - 1u), static_cast< std::uint8_t >(char_code1 - 1u),
|
||||
static_cast< std::uint8_t >(char_code1 - 1u), static_cast< std::uint8_t >(char_code1 - 1u),
|
||||
static_cast< std::uint8_t >(char_code1 - 1u), static_cast< std::uint8_t >(char_code1 - 1u)
|
||||
}};
|
||||
|
||||
#if defined(BOOST_UUID_USE_AVX512_V1) || !defined(BOOST_UUID_USE_AVX)
|
||||
|
||||
template< typename Char, bool IsCharASCIICompatible, bool IsWCharASCIICompatible >
|
||||
const simd_vector128< std::uint8_t > from_chars_simd_char_constants< Char, IsCharASCIICompatible, IsWCharASCIICompatible >::mm_char_code2_sub =
|
||||
{{
|
||||
char_code2_sub, char_code2_sub, char_code2_sub, char_code2_sub, char_code2_sub, char_code2_sub, char_code2_sub, char_code2_sub,
|
||||
char_code2_sub, char_code2_sub, char_code2_sub, char_code2_sub, char_code2_sub, char_code2_sub, char_code2_sub, char_code2_sub
|
||||
}};
|
||||
|
||||
template< typename Char, bool IsCharASCIICompatible, bool IsWCharASCIICompatible >
|
||||
const simd_vector128< std::uint8_t > from_chars_simd_char_constants< Char, IsCharASCIICompatible, IsWCharASCIICompatible >::mm_char_code1_sub =
|
||||
{{
|
||||
char_code1_sub, char_code1_sub, char_code1_sub, char_code1_sub, char_code1_sub, char_code1_sub, char_code1_sub, char_code1_sub,
|
||||
char_code1_sub, char_code1_sub, char_code1_sub, char_code1_sub, char_code1_sub, char_code1_sub, char_code1_sub, char_code1_sub
|
||||
}};
|
||||
|
||||
template< typename Char, bool IsCharASCIICompatible, bool IsWCharASCIICompatible >
|
||||
const simd_vector128< std::uint8_t > from_chars_simd_char_constants< Char, IsCharASCIICompatible, IsWCharASCIICompatible >::mm_char_code0_sub =
|
||||
{{
|
||||
char_code0_sub, char_code0_sub, char_code0_sub, char_code0_sub, char_code0_sub, char_code0_sub, char_code0_sub, char_code0_sub,
|
||||
char_code0_sub, char_code0_sub, char_code0_sub, char_code0_sub, char_code0_sub, char_code0_sub, char_code0_sub, char_code0_sub
|
||||
}};
|
||||
|
||||
#endif // defined(BOOST_UUID_USE_AVX512_V1) || !defined(BOOST_UUID_USE_AVX)
|
||||
|
||||
template< bool IsWCharASCIICompatible >
|
||||
struct from_chars_simd_char_constants< char, false, IsWCharASCIICompatible >
|
||||
{
|
||||
static_assert(static_cast< std::int8_t >('0') > -128 && static_cast< std::int8_t >('A') > -128 && static_cast< std::int8_t >('a') > -128,
|
||||
"Boost.UUID: Unsupported char encoding, hexadecimal character codes are expected to be greater than -128");
|
||||
|
||||
static const simd_vector128< std::uint8_t > mm_expected_dashes;
|
||||
|
||||
static constexpr std::uint8_t char_code2 = static_cast< std::uint8_t >
|
||||
(
|
||||
static_cast< std::int8_t >('a') > static_cast< std::int8_t >('A') ?
|
||||
(
|
||||
static_cast< std::int8_t >('a') > static_cast< std::int8_t >('0') ? 'a' : '0'
|
||||
) :
|
||||
(
|
||||
static_cast< std::int8_t >('A') > static_cast< std::int8_t >('0') ? 'A' : '0'
|
||||
)
|
||||
);
|
||||
static constexpr std::uint8_t char_code2_sub = char_code2 == static_cast< std::uint8_t >('0') ?
|
||||
static_cast< std::uint8_t >('0') : static_cast< std::uint8_t >(char_code2 - 10u);
|
||||
|
||||
static constexpr std::uint8_t char_code1 = static_cast< std::uint8_t >
|
||||
(
|
||||
static_cast< std::int8_t >('a') > static_cast< std::int8_t >('A') ?
|
||||
(
|
||||
static_cast< std::int8_t >('a') < static_cast< std::int8_t >('0') ? 'a' : '0'
|
||||
) :
|
||||
(
|
||||
static_cast< std::int8_t >('A') < static_cast< std::int8_t >('0') ? 'A' : '0'
|
||||
)
|
||||
);
|
||||
static constexpr std::uint8_t char_code1_sub = char_code1 == static_cast< std::uint8_t >('0') ?
|
||||
static_cast< std::uint8_t >('0') : static_cast< std::uint8_t >(char_code1 - 10u);
|
||||
|
||||
static constexpr std::uint8_t char_code0 = static_cast< std::uint8_t >
|
||||
(
|
||||
static_cast< std::int8_t >('a') < static_cast< std::int8_t >('A') ?
|
||||
(
|
||||
static_cast< std::int8_t >('a') < static_cast< std::int8_t >('0') ? 'a' : '0'
|
||||
) :
|
||||
(
|
||||
static_cast< std::int8_t >('A') < static_cast< std::int8_t >('0') ? 'A' : '0'
|
||||
)
|
||||
);
|
||||
static constexpr std::uint8_t char_code0_sub = char_code0 == static_cast< std::uint8_t >('0') ?
|
||||
static_cast< std::uint8_t >('0') : static_cast< std::uint8_t >(char_code0 - 10u);
|
||||
|
||||
static constexpr std::uint32_t char_code_sub =
|
||||
(static_cast< std::uint32_t >(char_code0_sub) << 16u) | (static_cast< std::uint32_t >(char_code1_sub) << 8u) | char_code2_sub;
|
||||
|
||||
static const simd_vector128< std::uint8_t > mm_char_code2_cmp;
|
||||
static const simd_vector128< std::uint8_t > mm_char_code1_cmp;
|
||||
|
||||
#if defined(BOOST_UUID_USE_AVX512_V1) || !defined(BOOST_UUID_USE_AVX)
|
||||
static const simd_vector128< std::uint8_t > mm_char_code2_sub;
|
||||
static const simd_vector128< std::uint8_t > mm_char_code1_sub;
|
||||
static const simd_vector128< std::uint8_t > mm_char_code0_sub;
|
||||
#endif // defined(BOOST_UUID_USE_AVX512_V1) || !defined(BOOST_UUID_USE_AVX)
|
||||
};
|
||||
|
||||
template< bool IsWCharASCIICompatible >
|
||||
const simd_vector128< std::uint8_t > from_chars_simd_char_constants< char, false, IsWCharASCIICompatible >::mm_expected_dashes =
|
||||
{{
|
||||
static_cast< std::uint8_t >('-'), 0x00, 0x00, 0x00, 0x00, static_cast< std::uint8_t >('-'), 0x00, 0x00,
|
||||
0x00, 0x00, static_cast< std::uint8_t >('-'), 0x00, 0x00, 0x00, 0x00, static_cast< std::uint8_t >('-')
|
||||
}};
|
||||
|
||||
template< bool IsWCharASCIICompatible >
|
||||
const simd_vector128< std::uint8_t > from_chars_simd_char_constants< char, false, IsWCharASCIICompatible >::mm_char_code2_cmp =
|
||||
{{
|
||||
static_cast< std::uint8_t >(char_code2 - 1u), static_cast< std::uint8_t >(char_code2 - 1u),
|
||||
static_cast< std::uint8_t >(char_code2 - 1u), static_cast< std::uint8_t >(char_code2 - 1u),
|
||||
static_cast< std::uint8_t >(char_code2 - 1u), static_cast< std::uint8_t >(char_code2 - 1u),
|
||||
static_cast< std::uint8_t >(char_code2 - 1u), static_cast< std::uint8_t >(char_code2 - 1u),
|
||||
static_cast< std::uint8_t >(char_code2 - 1u), static_cast< std::uint8_t >(char_code2 - 1u),
|
||||
static_cast< std::uint8_t >(char_code2 - 1u), static_cast< std::uint8_t >(char_code2 - 1u),
|
||||
static_cast< std::uint8_t >(char_code2 - 1u), static_cast< std::uint8_t >(char_code2 - 1u),
|
||||
static_cast< std::uint8_t >(char_code2 - 1u), static_cast< std::uint8_t >(char_code2 - 1u)
|
||||
}};
|
||||
|
||||
template< bool IsWCharASCIICompatible >
|
||||
const simd_vector128< std::uint8_t > from_chars_simd_char_constants< char, false, IsWCharASCIICompatible >::mm_char_code1_cmp =
|
||||
{{
|
||||
static_cast< std::uint8_t >(char_code1 - 1u), static_cast< std::uint8_t >(char_code1 - 1u),
|
||||
static_cast< std::uint8_t >(char_code1 - 1u), static_cast< std::uint8_t >(char_code1 - 1u),
|
||||
static_cast< std::uint8_t >(char_code1 - 1u), static_cast< std::uint8_t >(char_code1 - 1u),
|
||||
static_cast< std::uint8_t >(char_code1 - 1u), static_cast< std::uint8_t >(char_code1 - 1u),
|
||||
static_cast< std::uint8_t >(char_code1 - 1u), static_cast< std::uint8_t >(char_code1 - 1u),
|
||||
static_cast< std::uint8_t >(char_code1 - 1u), static_cast< std::uint8_t >(char_code1 - 1u),
|
||||
static_cast< std::uint8_t >(char_code1 - 1u), static_cast< std::uint8_t >(char_code1 - 1u),
|
||||
static_cast< std::uint8_t >(char_code1 - 1u), static_cast< std::uint8_t >(char_code1 - 1u)
|
||||
}};
|
||||
|
||||
#if defined(BOOST_UUID_USE_AVX512_V1) || !defined(BOOST_UUID_USE_AVX)
|
||||
|
||||
template< bool IsWCharASCIICompatible >
|
||||
const simd_vector128< std::uint8_t > from_chars_simd_char_constants< char, false, IsWCharASCIICompatible >::mm_char_code2_sub =
|
||||
{{
|
||||
char_code2_sub, char_code2_sub, char_code2_sub, char_code2_sub, char_code2_sub, char_code2_sub, char_code2_sub, char_code2_sub,
|
||||
char_code2_sub, char_code2_sub, char_code2_sub, char_code2_sub, char_code2_sub, char_code2_sub, char_code2_sub, char_code2_sub
|
||||
}};
|
||||
|
||||
template< bool IsWCharASCIICompatible >
|
||||
const simd_vector128< std::uint8_t > from_chars_simd_char_constants< char, false, IsWCharASCIICompatible >::mm_char_code1_sub =
|
||||
{{
|
||||
char_code1_sub, char_code1_sub, char_code1_sub, char_code1_sub, char_code1_sub, char_code1_sub, char_code1_sub, char_code1_sub,
|
||||
char_code1_sub, char_code1_sub, char_code1_sub, char_code1_sub, char_code1_sub, char_code1_sub, char_code1_sub, char_code1_sub
|
||||
}};
|
||||
|
||||
template< bool IsWCharASCIICompatible >
|
||||
const simd_vector128< std::uint8_t > from_chars_simd_char_constants< char, false, IsWCharASCIICompatible >::mm_char_code0_sub =
|
||||
{{
|
||||
char_code0_sub, char_code0_sub, char_code0_sub, char_code0_sub, char_code0_sub, char_code0_sub, char_code0_sub, char_code0_sub,
|
||||
char_code0_sub, char_code0_sub, char_code0_sub, char_code0_sub, char_code0_sub, char_code0_sub, char_code0_sub, char_code0_sub
|
||||
}};
|
||||
|
||||
#endif // defined(BOOST_UUID_USE_AVX512_V1) || !defined(BOOST_UUID_USE_AVX)
|
||||
|
||||
template< bool IsCharASCIICompatible >
|
||||
struct from_chars_simd_char_constants< wchar_t, IsCharASCIICompatible, false >
|
||||
{
|
||||
static_assert(static_cast< wchar_t >(static_cast< std::uint8_t >(L'0')) == L'0' && static_cast< wchar_t >(static_cast< std::uint8_t >(L'9')) == L'9' &&
|
||||
static_cast< wchar_t >(static_cast< std::uint8_t >(L'a')) == L'a' && static_cast< wchar_t >(static_cast< std::uint8_t >(L'f')) == L'f' &&
|
||||
static_cast< wchar_t >(static_cast< std::uint8_t >(L'-')) == L'-',
|
||||
"Boost.UUID: Unsupported wchar_t encoding, hexadecimal and dash character codes are expected to be representable by a single byte");
|
||||
|
||||
static_assert(static_cast< std::int8_t >(L'0') > -128 && static_cast< std::int8_t >(L'A') > -128 && static_cast< std::int8_t >(L'a') > -128,
|
||||
"Boost.UUID: Unsupported wchar_t encoding, hexadecimal character codes are expected to be greater than -128");
|
||||
|
||||
static const simd_vector128< std::uint8_t > mm_expected_dashes;
|
||||
|
||||
static constexpr std::uint8_t char_code2 = static_cast< std::uint8_t >
|
||||
(
|
||||
static_cast< std::int8_t >(L'a') > static_cast< std::int8_t >(L'A') ?
|
||||
(
|
||||
static_cast< std::int8_t >(L'a') > static_cast< std::int8_t >(L'0') ? L'a' : L'0'
|
||||
) :
|
||||
(
|
||||
static_cast< std::int8_t >(L'A') > static_cast< std::int8_t >(L'0') ? L'A' : L'0'
|
||||
)
|
||||
);
|
||||
static constexpr std::uint8_t char_code2_sub = char_code2 == static_cast< std::uint8_t >(L'0') ?
|
||||
static_cast< std::uint8_t >(L'0') : static_cast< std::uint8_t >(char_code2 - 10u);
|
||||
|
||||
static constexpr std::uint8_t char_code1 = static_cast< std::uint8_t >
|
||||
(
|
||||
static_cast< std::int8_t >(L'a') > static_cast< std::int8_t >(L'A') ?
|
||||
(
|
||||
static_cast< std::int8_t >(L'a') < static_cast< std::int8_t >(L'0') ? L'a' : L'0'
|
||||
) :
|
||||
(
|
||||
static_cast< std::int8_t >(L'A') < static_cast< std::int8_t >(L'0') ? L'A' : L'0'
|
||||
)
|
||||
);
|
||||
static constexpr std::uint8_t char_code1_sub = char_code1 == static_cast< std::uint8_t >(L'0') ?
|
||||
static_cast< std::uint8_t >(L'0') : static_cast< std::uint8_t >(char_code1 - 10u);
|
||||
|
||||
static constexpr std::uint8_t char_code0 = static_cast< std::uint8_t >
|
||||
(
|
||||
static_cast< std::int8_t >(L'a') < static_cast< std::int8_t >(L'A') ?
|
||||
(
|
||||
static_cast< std::int8_t >(L'a') < static_cast< std::int8_t >(L'0') ? L'a' : L'0'
|
||||
) :
|
||||
(
|
||||
static_cast< std::int8_t >(L'A') < static_cast< std::int8_t >(L'0') ? L'A' : L'0'
|
||||
)
|
||||
);
|
||||
static constexpr std::uint8_t char_code0_sub = char_code0 == static_cast< std::uint8_t >(L'0') ?
|
||||
static_cast< std::uint8_t >(L'0') : static_cast< std::uint8_t >(char_code0 - 10u);
|
||||
|
||||
static constexpr std::uint32_t char_code_sub =
|
||||
(static_cast< std::uint32_t >(char_code0_sub) << 16u) | (static_cast< std::uint32_t >(char_code1_sub) << 8u) | char_code2_sub;
|
||||
|
||||
static const simd_vector128< std::uint8_t > mm_char_code2_cmp;
|
||||
static const simd_vector128< std::uint8_t > mm_char_code1_cmp;
|
||||
|
||||
#if defined(BOOST_UUID_USE_AVX512_V1) || !defined(BOOST_UUID_USE_AVX)
|
||||
static const simd_vector128< std::uint8_t > mm_char_code2_sub;
|
||||
static const simd_vector128< std::uint8_t > mm_char_code1_sub;
|
||||
static const simd_vector128< std::uint8_t > mm_char_code0_sub;
|
||||
#endif // defined(BOOST_UUID_USE_AVX512_V1) || !defined(BOOST_UUID_USE_AVX)
|
||||
};
|
||||
|
||||
template< bool IsCharASCIICompatible >
|
||||
const simd_vector128< std::uint8_t > from_chars_simd_char_constants< wchar_t, IsCharASCIICompatible, false >::mm_expected_dashes =
|
||||
{{
|
||||
static_cast< std::uint8_t >(L'-'), 0x00, 0x00, 0x00, 0x00, static_cast< std::uint8_t >(L'-'), 0x00, 0x00,
|
||||
0x00, 0x00, static_cast< std::uint8_t >(L'-'), 0x00, 0x00, 0x00, 0x00, static_cast< std::uint8_t >(L'-')
|
||||
}};
|
||||
|
||||
template< bool IsCharASCIICompatible >
|
||||
const simd_vector128< std::uint8_t > from_chars_simd_char_constants< wchar_t, IsCharASCIICompatible, false >::mm_char_code2_cmp =
|
||||
{{
|
||||
static_cast< std::uint8_t >(char_code2 - 1u), static_cast< std::uint8_t >(char_code2 - 1u),
|
||||
static_cast< std::uint8_t >(char_code2 - 1u), static_cast< std::uint8_t >(char_code2 - 1u),
|
||||
static_cast< std::uint8_t >(char_code2 - 1u), static_cast< std::uint8_t >(char_code2 - 1u),
|
||||
static_cast< std::uint8_t >(char_code2 - 1u), static_cast< std::uint8_t >(char_code2 - 1u),
|
||||
static_cast< std::uint8_t >(char_code2 - 1u), static_cast< std::uint8_t >(char_code2 - 1u),
|
||||
static_cast< std::uint8_t >(char_code2 - 1u), static_cast< std::uint8_t >(char_code2 - 1u),
|
||||
static_cast< std::uint8_t >(char_code2 - 1u), static_cast< std::uint8_t >(char_code2 - 1u),
|
||||
static_cast< std::uint8_t >(char_code2 - 1u), static_cast< std::uint8_t >(char_code2 - 1u)
|
||||
}};
|
||||
|
||||
template< bool IsCharASCIICompatible >
|
||||
const simd_vector128< std::uint8_t > from_chars_simd_char_constants< wchar_t, IsCharASCIICompatible, false >::mm_char_code1_cmp =
|
||||
{{
|
||||
static_cast< std::uint8_t >(char_code1 - 1u), static_cast< std::uint8_t >(char_code1 - 1u),
|
||||
static_cast< std::uint8_t >(char_code1 - 1u), static_cast< std::uint8_t >(char_code1 - 1u),
|
||||
static_cast< std::uint8_t >(char_code1 - 1u), static_cast< std::uint8_t >(char_code1 - 1u),
|
||||
static_cast< std::uint8_t >(char_code1 - 1u), static_cast< std::uint8_t >(char_code1 - 1u),
|
||||
static_cast< std::uint8_t >(char_code1 - 1u), static_cast< std::uint8_t >(char_code1 - 1u),
|
||||
static_cast< std::uint8_t >(char_code1 - 1u), static_cast< std::uint8_t >(char_code1 - 1u),
|
||||
static_cast< std::uint8_t >(char_code1 - 1u), static_cast< std::uint8_t >(char_code1 - 1u),
|
||||
static_cast< std::uint8_t >(char_code1 - 1u), static_cast< std::uint8_t >(char_code1 - 1u)
|
||||
}};
|
||||
|
||||
#if defined(BOOST_UUID_USE_AVX512_V1) || !defined(BOOST_UUID_USE_AVX)
|
||||
|
||||
template< bool IsCharASCIICompatible >
|
||||
const simd_vector128< std::uint8_t > from_chars_simd_char_constants< wchar_t, IsCharASCIICompatible, false >::mm_char_code2_sub =
|
||||
{{
|
||||
char_code2_sub, char_code2_sub, char_code2_sub, char_code2_sub, char_code2_sub, char_code2_sub, char_code2_sub, char_code2_sub,
|
||||
char_code2_sub, char_code2_sub, char_code2_sub, char_code2_sub, char_code2_sub, char_code2_sub, char_code2_sub, char_code2_sub
|
||||
}};
|
||||
|
||||
template< bool IsCharASCIICompatible >
|
||||
const simd_vector128< std::uint8_t > from_chars_simd_char_constants< wchar_t, IsCharASCIICompatible, false >::mm_char_code1_sub =
|
||||
{{
|
||||
char_code1_sub, char_code1_sub, char_code1_sub, char_code1_sub, char_code1_sub, char_code1_sub, char_code1_sub, char_code1_sub,
|
||||
char_code1_sub, char_code1_sub, char_code1_sub, char_code1_sub, char_code1_sub, char_code1_sub, char_code1_sub, char_code1_sub
|
||||
}};
|
||||
|
||||
template< bool IsCharASCIICompatible >
|
||||
const simd_vector128< std::uint8_t > from_chars_simd_char_constants< wchar_t, IsCharASCIICompatible, false >::mm_char_code0_sub =
|
||||
{{
|
||||
char_code0_sub, char_code0_sub, char_code0_sub, char_code0_sub, char_code0_sub, char_code0_sub, char_code0_sub, char_code0_sub,
|
||||
char_code0_sub, char_code0_sub, char_code0_sub, char_code0_sub, char_code0_sub, char_code0_sub, char_code0_sub, char_code0_sub
|
||||
}};
|
||||
|
||||
#endif // defined(BOOST_UUID_USE_AVX512_V1) || !defined(BOOST_UUID_USE_AVX)
|
||||
|
||||
|
||||
template< typename >
|
||||
struct from_chars_simd_constants
|
||||
{
|
||||
static const simd_vector128< std::uint8_t > mm_dashes_mask;
|
||||
|
||||
#if defined(BOOST_UUID_USE_AVX10_1) && defined(BOOST_UUID_FROM_CHARS_X86_USE_VPERMI2B)
|
||||
static const simd_vector128< std::uint8_t > mm_split_half_bytes_pattern1;
|
||||
static const simd_vector128< std::uint8_t > mm_split_half_bytes_pattern2;
|
||||
#else
|
||||
static const simd_vector128< std::uint8_t > mm_split_half_bytes_pattern1;
|
||||
static const simd_vector128< std::uint8_t > mm_split_half_bytes_pattern2;
|
||||
static const simd_vector128< std::uint8_t > mm_split_half_bytes_pattern3;
|
||||
static const simd_vector128< std::uint8_t > mm_split_half_bytes_blend_mask;
|
||||
#endif
|
||||
|
||||
static const simd_vector128< std::uint8_t > mm_F0;
|
||||
|
||||
#if defined(BOOST_UUID_USE_AVX) && !defined(BOOST_UUID_USE_AVX512_V1)
|
||||
static const simd_vector128< std::uint8_t > mm_2;
|
||||
#endif
|
||||
};
|
||||
|
||||
template< typename T >
|
||||
const simd_vector128< std::uint8_t > from_chars_simd_constants< T >::mm_dashes_mask =
|
||||
{{ 0xFF, 0x00, 0x00, 0x00, 0x00, 0xFF, 0x00, 0x00, 0x00, 0x00, 0xFF, 0x00, 0x00, 0x00, 0x00, 0xFF }};
|
||||
#if defined(BOOST_UUID_USE_AVX10_1) && defined(BOOST_UUID_FROM_CHARS_X86_USE_VPERMI2B)
|
||||
template< typename T >
|
||||
const simd_vector128< std::uint8_t > from_chars_simd_constants< T >::mm_split_half_bytes_pattern1 =
|
||||
{{ 0x01, 0x03, 0x05, 0x07, 0x0A, 0x0C, 0x0F, 0x11, 0x00, 0x02, 0x04, 0x06, 0x09, 0x0B, 0x0E, 0x10 }};
|
||||
template< typename T >
|
||||
const simd_vector128< std::uint8_t > from_chars_simd_constants< T >::mm_split_half_bytes_pattern2 =
|
||||
{{ 0x04, 0x06, 0x09, 0x0B, 0x0D, 0x0F, 0x11, 0x13, 0x03, 0x05, 0x08, 0x0A, 0x0C, 0x0E, 0x10, 0x12 }};
|
||||
#else
|
||||
template< typename T >
|
||||
const simd_vector128< std::uint8_t > from_chars_simd_constants< T >::mm_split_half_bytes_pattern1 =
|
||||
{{ 0x01, 0x03, 0x05, 0x07, 0x0A, 0x0C, 0x0F, 0x80, 0x00, 0x02, 0x04, 0x06, 0x09, 0x0B, 0x0E, 0x80 }};
|
||||
template< typename T >
|
||||
const simd_vector128< std::uint8_t > from_chars_simd_constants< T >::mm_split_half_bytes_pattern2 =
|
||||
{{ 0x04, 0x06, 0x09, 0x0B, 0x0D, 0x0F, 0x80, 0x01, 0x03, 0x05, 0x08, 0x0A, 0x0C, 0x0E, 0x80, 0x00 }};
|
||||
template< typename T >
|
||||
const simd_vector128< std::uint8_t > from_chars_simd_constants< T >::mm_split_half_bytes_pattern3 =
|
||||
{{ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x01, 0x03, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x00, 0x02 }};
|
||||
template< typename T >
|
||||
const simd_vector128< std::uint8_t > from_chars_simd_constants< T >::mm_split_half_bytes_blend_mask =
|
||||
{{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF }};
|
||||
#endif
|
||||
template< typename T >
|
||||
const simd_vector128< std::uint8_t > from_chars_simd_constants< T >::mm_F0 =
|
||||
{{ 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0 }};
|
||||
#if defined(BOOST_UUID_USE_AVX) && !defined(BOOST_UUID_USE_AVX512_V1)
|
||||
template< typename T >
|
||||
const simd_vector128< std::uint8_t > from_chars_simd_constants< T >::mm_2 =
|
||||
{{ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02 }};
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(BOOST_GCC) && (BOOST_GCC >= 40600)
|
||||
#pragma GCC diagnostic push
|
||||
// array subscript N is outside array bounds of '<array>'
|
||||
// In the partial loads below, masked loads may be used with pointers beyond the input array of characters.
|
||||
// In all such instances, the actual loads are prevented by the generated masks, don't generate
|
||||
// hardware faults and therefore are safe.
|
||||
#pragma GCC diagnostic ignored "-Warray-bounds"
|
||||
#endif
|
||||
|
||||
template< typename Char, unsigned int Size = sizeof(Char) >
|
||||
struct from_chars_simd_load_traits;
|
||||
|
||||
template< typename Char >
|
||||
struct from_chars_simd_load_traits< Char, 1u >
|
||||
{
|
||||
static BOOST_FORCEINLINE __m128i load_packed_16(const Char* p) noexcept
|
||||
{
|
||||
return _mm_loadu_si128(reinterpret_cast< const __m128i* >(p));
|
||||
}
|
||||
|
||||
static BOOST_FORCEINLINE __m128i load_packed_4(const Char* p) noexcept
|
||||
{
|
||||
return _mm_cvtsi32_si128(*reinterpret_cast< BOOST_MAY_ALIAS int const* >(p));
|
||||
}
|
||||
|
||||
static BOOST_FORCEINLINE __m128i load_packed_n(const Char* p, unsigned int n) noexcept
|
||||
{
|
||||
#if defined(BOOST_UUID_USE_AVX512_V1)
|
||||
return _mm_maskz_loadu_epi8(_cvtu32_mask16((static_cast< std::uint32_t >(1u) << n) - 1u), p);
|
||||
#else
|
||||
std::uint32_t chars = 0u;
|
||||
p += n;
|
||||
if ((n & 1u) != 0u)
|
||||
{
|
||||
p -= 1;
|
||||
chars = *reinterpret_cast< BOOST_MAY_ALIAS std::uint8_t const* >(p);
|
||||
}
|
||||
|
||||
if ((n & 2u) != 0u)
|
||||
{
|
||||
p -= 2;
|
||||
chars = (chars << 16u) | *reinterpret_cast< BOOST_MAY_ALIAS std::uint16_t const* >(p);
|
||||
}
|
||||
|
||||
__m128i mm_chars = _mm_cvtsi32_si128(chars);
|
||||
if ((n & 4u) != 0u)
|
||||
{
|
||||
p -= 4;
|
||||
mm_chars = _mm_unpacklo_epi32(_mm_cvtsi32_si128(*reinterpret_cast< BOOST_MAY_ALIAS int const* >(p)), mm_chars);
|
||||
}
|
||||
|
||||
if ((n & 8u) != 0u)
|
||||
{
|
||||
p -= 8;
|
||||
mm_chars = _mm_unpacklo_epi64(_mm_loadl_epi64(reinterpret_cast< const __m128i* >(p)), mm_chars);
|
||||
}
|
||||
|
||||
return mm_chars;
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
template< typename Char >
|
||||
struct from_chars_simd_load_traits< Char, 2u >
|
||||
{
|
||||
static BOOST_FORCEINLINE __m128i load_packed_16(const Char* p) noexcept
|
||||
{
|
||||
#if defined(BOOST_UUID_USE_AVX512_V1)
|
||||
return _mm256_cvtusepi16_epi8(_mm256_loadu_si256(reinterpret_cast< const __m256i* >(p)));
|
||||
#else
|
||||
return _mm_packus_epi16(_mm_loadu_si128(reinterpret_cast< const __m128i* >(p)), _mm_loadu_si128(reinterpret_cast< const __m128i* >(p + 8)));
|
||||
#endif
|
||||
}
|
||||
|
||||
static BOOST_FORCEINLINE __m128i load_packed_4(const Char* p) noexcept
|
||||
{
|
||||
#if defined(BOOST_UUID_USE_AVX512_V1)
|
||||
return _mm_cvtusepi16_epi8(_mm_loadl_epi64(reinterpret_cast< const __m128i* >(p)));
|
||||
#else
|
||||
return _mm_packus_epi16(_mm_loadl_epi64(reinterpret_cast< const __m128i* >(p)), _mm_setzero_si128());
|
||||
#endif
|
||||
}
|
||||
|
||||
static BOOST_FORCEINLINE __m128i load_packed_n(const Char* p, unsigned int n) noexcept
|
||||
{
|
||||
#if defined(BOOST_UUID_USE_AVX512_V1)
|
||||
return _mm256_cvtusepi16_epi8(_mm256_maskz_loadu_epi16(_cvtu32_mask16((static_cast< std::uint32_t >(1u) << n) - 1u), p));
|
||||
#else
|
||||
__m128i mm_chars1 = _mm_setzero_si128();
|
||||
__m128i mm_chars2 = _mm_setzero_si128();
|
||||
p += n;
|
||||
if ((n & 1u) != 0u)
|
||||
{
|
||||
p -= 1;
|
||||
mm_chars1 = _mm_cvtsi32_si128(*reinterpret_cast< BOOST_MAY_ALIAS std::uint16_t const* >(p));
|
||||
}
|
||||
|
||||
if ((n & 2u) != 0u)
|
||||
{
|
||||
p -= 2;
|
||||
mm_chars1 = _mm_unpacklo_epi32(_mm_cvtsi32_si128(*reinterpret_cast< BOOST_MAY_ALIAS int const* >(p)), mm_chars1);
|
||||
}
|
||||
|
||||
if ((n & 4u) != 0u)
|
||||
{
|
||||
p -= 4;
|
||||
mm_chars1 = _mm_unpacklo_epi64(_mm_loadl_epi64(reinterpret_cast< const __m128i* >(p)), mm_chars1);
|
||||
}
|
||||
|
||||
if ((n & 8u) != 0u)
|
||||
{
|
||||
p -= 8;
|
||||
mm_chars2 = mm_chars1;
|
||||
mm_chars1 = _mm_loadu_si128(reinterpret_cast< const __m128i* >(p));
|
||||
}
|
||||
|
||||
return _mm_packus_epi16(mm_chars1, mm_chars2);
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
template< typename Char >
|
||||
struct from_chars_simd_load_traits< Char, 4u >
|
||||
{
|
||||
static BOOST_FORCEINLINE __m128i load_packed_16(const Char* p) noexcept
|
||||
{
|
||||
#if defined(BOOST_UUID_USE_AVX512_V1)
|
||||
#if defined(BOOST_UUID_TO_FROM_CHARS_X86_USE_ZMM)
|
||||
// Slower than the 256-bit version below on Intel Golden Cove.
|
||||
return _mm512_cvtusepi32_epi8(_mm512_loadu_epi32(p));
|
||||
#else // defined(BOOST_UUID_TO_FROM_CHARS_X86_USE_ZMM)
|
||||
__m128i mm1 = _mm256_cvtusepi32_epi8(_mm256_loadu_si256(reinterpret_cast< const __m256i* >(p)));
|
||||
__m128i mm2 = _mm256_cvtusepi32_epi8(_mm256_loadu_si256(reinterpret_cast< const __m256i* >(p + 8)));
|
||||
return _mm_unpacklo_epi64(mm1, mm2);
|
||||
#endif // defined(BOOST_UUID_TO_FROM_CHARS_X86_USE_ZMM)
|
||||
#else
|
||||
__m128i mm1 = _mm_packus_epi32(_mm_loadu_si128(reinterpret_cast< const __m128i* >(p)), _mm_loadu_si128(reinterpret_cast< const __m128i* >(p + 4)));
|
||||
__m128i mm2 = _mm_packus_epi32(_mm_loadu_si128(reinterpret_cast< const __m128i* >(p + 8)), _mm_loadu_si128(reinterpret_cast< const __m128i* >(p + 12)));
|
||||
return _mm_packus_epi16(mm1, mm2);
|
||||
#endif
|
||||
}
|
||||
|
||||
static BOOST_FORCEINLINE __m128i load_packed_4(const Char* p) noexcept
|
||||
{
|
||||
#if defined(BOOST_UUID_USE_AVX512_V1)
|
||||
return _mm_cvtusepi32_epi8(_mm_loadu_si128(reinterpret_cast< const __m128i* >(p)));
|
||||
#else
|
||||
__m128i mm1 = _mm_loadu_si128(reinterpret_cast< const __m128i* >(p));
|
||||
__m128i mm2 = _mm_setzero_si128();
|
||||
return _mm_packus_epi16(_mm_packus_epi32(mm1, mm2), mm2);
|
||||
#endif
|
||||
}
|
||||
|
||||
static BOOST_FORCEINLINE __m128i load_packed_n(const Char* p, unsigned int n) noexcept
|
||||
{
|
||||
#if defined(BOOST_UUID_USE_AVX512_V1)
|
||||
const std::uint32_t mask = (static_cast< std::uint32_t >(1u) << n) - 1u;
|
||||
#if defined(BOOST_UUID_TO_FROM_CHARS_X86_USE_ZMM)
|
||||
// Slower than the 256-bit version below on Intel Golden Cove.
|
||||
return _mm512_cvtusepi32_epi8(_mm512_maskz_loadu_epi32(_cvtu32_mask16(mask), p));
|
||||
#else // defined(BOOST_UUID_TO_FROM_CHARS_X86_USE_ZMM)
|
||||
__m128i mm1 = _mm256_cvtusepi32_epi8(_mm256_maskz_loadu_epi32(_cvtu32_mask8(mask & 0xFF), p));
|
||||
__m128i mm2 = _mm256_cvtusepi32_epi8(_mm256_maskz_loadu_epi32(_cvtu32_mask8(mask >> 8u), p + 8));
|
||||
return _mm_unpacklo_epi64(mm1, mm2);
|
||||
#endif // defined(BOOST_UUID_TO_FROM_CHARS_X86_USE_ZMM)
|
||||
#else
|
||||
__m128i mm_chars1 = _mm_setzero_si128();
|
||||
__m128i mm_chars2 = _mm_setzero_si128();
|
||||
__m128i mm_chars3 = _mm_setzero_si128();
|
||||
__m128i mm_chars4 = _mm_setzero_si128();
|
||||
p += n;
|
||||
if ((n & 1u) != 0u)
|
||||
{
|
||||
p -= 1;
|
||||
mm_chars1 = _mm_cvtsi32_si128(*reinterpret_cast< BOOST_MAY_ALIAS int const* >(p));
|
||||
}
|
||||
|
||||
if ((n & 2u) != 0u)
|
||||
{
|
||||
p -= 2;
|
||||
mm_chars1 = _mm_unpacklo_epi64(_mm_loadl_epi64(reinterpret_cast< const __m128i* >(p)), mm_chars1);
|
||||
}
|
||||
|
||||
if ((n & 4u) != 0u)
|
||||
{
|
||||
p -= 4;
|
||||
mm_chars2 = mm_chars1;
|
||||
mm_chars1 = _mm_loadu_si128(reinterpret_cast< const __m128i* >(p));
|
||||
}
|
||||
|
||||
if ((n & 8u) != 0u)
|
||||
{
|
||||
p -= 8;
|
||||
mm_chars4 = mm_chars3;
|
||||
mm_chars3 = mm_chars2;
|
||||
mm_chars1 = _mm_loadu_si128(reinterpret_cast< const __m128i* >(p));
|
||||
mm_chars2 = _mm_loadu_si128(reinterpret_cast< const __m128i* >(p + 4));
|
||||
}
|
||||
mm_chars1 = _mm_packus_epi32(mm_chars1, mm_chars2);
|
||||
mm_chars3 = _mm_packus_epi32(mm_chars3, mm_chars4);
|
||||
return _mm_packus_epi16(mm_chars1, mm_chars3);
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
#if defined(BOOST_GCC) && (BOOST_GCC >= 40600)
|
||||
#pragma GCC diagnostic pop
|
||||
#endif
|
||||
|
||||
/*!
|
||||
* Converts a string of 36 hexadecimal UUID characters in `mm_charsN` (with the last 4 characters in the lowest 32 bits of `mm_chars3`)
|
||||
* to a 16-byte binary value and, if successful, stores it into `data`. If not successful, stores the failure character position
|
||||
* to `end_pos` and error code to `ec`.
|
||||
*/
|
||||
BOOST_FORCEINLINE void from_chars_simd_core
|
||||
(
|
||||
__m128i mm_chars1, __m128i mm_chars2, __m128i mm_chars3,
|
||||
__m128i const& mm_expected_dashes,
|
||||
__m128i const& mm_char_code1_cmp,
|
||||
__m128i const& mm_char_code2_cmp,
|
||||
#if defined(BOOST_UUID_USE_AVX512_V1) || !defined(BOOST_UUID_USE_AVX)
|
||||
__m128i const& mm_char_code0_sub,
|
||||
__m128i const& mm_char_code1_sub,
|
||||
__m128i const& mm_char_code2_sub,
|
||||
#else
|
||||
std::uint32_t char_code_sub,
|
||||
#endif
|
||||
std::uint8_t* data, unsigned int& end_pos, from_chars_error& ec
|
||||
)
|
||||
{
|
||||
using constants = uuids::detail::from_chars_simd_constants< void >;
|
||||
|
||||
// mm_chars1 mm_chars2 mm_chars3
|
||||
// |01234567-89ab-cd|ef-0123-456789ab|cdefXXXXXXXXXXXX|
|
||||
//
|
||||
// Check if dashes are in the expected positions
|
||||
{
|
||||
// mm_dashes
|
||||
// |-89ab-cdef-0123-|
|
||||
__m128i mm_dashes = _mm_castpd_si128(_mm_shuffle_pd(_mm_castsi128_pd(mm_chars1), _mm_castsi128_pd(mm_chars2), _MM_SHUFFLE2(0, 1)));
|
||||
if (BOOST_UNLIKELY(!_mm_test_all_zeros(_mm_xor_si128(mm_dashes, mm_expected_dashes), constants::mm_dashes_mask)))
|
||||
{
|
||||
// Some of the dashes are missing
|
||||
mm_dashes = _mm_and_si128(mm_dashes, constants::mm_dashes_mask);
|
||||
std::uint32_t dash_mask = _mm_movemask_epi8(_mm_cmpeq_epi8(mm_dashes, mm_expected_dashes));
|
||||
unsigned int pos = detail::countr_zero_nz(~dash_mask) + 8u;
|
||||
if (pos < end_pos)
|
||||
{
|
||||
end_pos = pos;
|
||||
ec = from_chars_error::dash_expected;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Remove the dashes, deinterleave low and high half-byte digit characters
|
||||
#if defined(BOOST_UUID_USE_AVX10_1) && defined(BOOST_UUID_FROM_CHARS_X86_USE_VPERMI2B)
|
||||
// Note: This code path is disabled by default, unless BOOST_UUID_FROM_CHARS_X86_USE_VPERMI2B is defined, because vpermi2b/vpermt2b
|
||||
// instructions are slow on Intel Golden Cove and older microarchitectures, making the alternative version below faster.
|
||||
// This code path may still be beneficial on AMD CPUs or when Intel optimizes vpermi2b/vpermt2b.
|
||||
// mm_chars1: |02468ace13579bdf|
|
||||
// mm_chars2: |02468ace13579bdf|
|
||||
mm_chars1 = _mm_permutex2var_epi8(mm_chars1, constants::mm_split_half_bytes_pattern1, mm_chars2);
|
||||
mm_chars2 = _mm_permutex2var_epi8(mm_chars2, constants::mm_split_half_bytes_pattern2, mm_chars3);
|
||||
#else
|
||||
// mm_chars1: |02468acZ13579bdZ|
|
||||
// mm_chars2: |02468aZe13579bZf|
|
||||
// mm_chars3: |ZZZZZZceZZZZZZdf|
|
||||
mm_chars1 = _mm_shuffle_epi8(mm_chars1, constants::mm_split_half_bytes_pattern1);
|
||||
mm_chars2 = _mm_shuffle_epi8(mm_chars2, constants::mm_split_half_bytes_pattern2);
|
||||
mm_chars3 = _mm_shuffle_epi8(mm_chars3, constants::mm_split_half_bytes_pattern3);
|
||||
|
||||
// mm_chars1: |02468ace13579bdf|
|
||||
// mm_chars2: |02468ace13579bdf|
|
||||
// Avoid using vpblendvb, which is slow on Intel
|
||||
#if defined(BOOST_UUID_USE_AVX512_V1)
|
||||
mm_chars1 = _mm_ternarylogic_epi64(mm_chars1, mm_chars2, constants::mm_split_half_bytes_blend_mask, 0xD8); // (_MM_TERNLOG_A & ~_MM_TERNLOG_C) | (_MM_TERNLOG_B & _MM_TERNLOG_C)
|
||||
#elif defined(BOOST_UUID_USE_AVX)
|
||||
mm_chars1 = _mm_or_si128(mm_chars1, _mm_and_si128(mm_chars2, constants::mm_split_half_bytes_blend_mask));
|
||||
#else
|
||||
mm_chars1 = _mm_blendv_epi8(mm_chars1, mm_chars2, constants::mm_split_half_bytes_blend_mask);
|
||||
#endif
|
||||
mm_chars2 = _mm_blend_epi16(mm_chars2, mm_chars3, 0x88);
|
||||
#endif
|
||||
|
||||
// Group half-byte digits
|
||||
__m128i mm_lo = _mm_unpacklo_epi64(mm_chars1, mm_chars2);
|
||||
__m128i mm_hi = _mm_unpackhi_epi64(mm_chars1, mm_chars2);
|
||||
|
||||
// Convert characters to 8-bit integers. The algorithm is basically as follows:
|
||||
//
|
||||
// - Order the '0'-'9', 'A'-'F' and 'a'-'f' groups of characters in the order of increasing their character code values. From them, pick the two with
|
||||
// the highest character codes. E.g. in ASCII, that would be 'A'-'F' and 'a'-'f'. This is handled at compile time in from_chars_simd_char_constants.
|
||||
// - Let mm_char_code2_cmp be a vector of the smallest character code of the second picked group minus 1 (i.e. 'a' - 1), and mm_char_code1_cmp - that
|
||||
// of the first picked group ('A' - 1).
|
||||
// - Compare the input hex characters for being greater than mm_char_code2_cmp and mm_char_code1_cmp. This gives the masks where the input contains
|
||||
// hexadecimal characters of the two greatest character groups, with the mask for mm_char_code1_cmp always including the one for mm_char_code2_cmp.
|
||||
// Call those masks mm_char_code1_mask and mm_char_code2_mask.
|
||||
// - For each of the three groups of characters, have a corresponding vector of subtrahends, such that when it is subtracted from the input character codes,
|
||||
// the characters in the group are mapped onto the corresponding value in the range 0-15. I.e. these would be '0', 'A' + 10 and 'a' + 10. Those are called
|
||||
// mm_char_code0_sub, mm_char_code1_sub and mm_char_code2_sub, corresponding to the ordered list of groups of characters.
|
||||
// - Combine the subtrahends such that for elements where mm_char_code2_mask is non-zero, mm_char_code2_sub is used, otherwise where
|
||||
// mm_char_code1_mask is non-zero, mm_char_code1_sub is used, otherwise mm_char_code0_sub is used.
|
||||
// - Subtract the combined subtrahends from the input character codes.
|
||||
//
|
||||
// The result will be a vector of bytes, where the values 0-15 correspond the hexadecimal characters on input.
|
||||
//
|
||||
// Note that there is one caveat due to the fact that there are only signed byte comparisons until AVX-512. This is a problem if the character encoding has
|
||||
// hexadecimal character codes with the highest bit set to 1. This is handled in from_chars_simd_char_constants by constructing mm_char_code1 and
|
||||
// mm_char_code2 in such a way that signed comparisons work as described. We also use signed comparisons in AVX-512 to reuse the same constants.
|
||||
#if defined(BOOST_UUID_USE_AVX512_V1)
|
||||
__mmask16 k_char_code2_mask_lo = _mm_cmpgt_epi8_mask(mm_lo, mm_char_code2_cmp);
|
||||
__mmask16 k_char_code2_mask_hi = _mm_cmpgt_epi8_mask(mm_hi, mm_char_code2_cmp);
|
||||
|
||||
__mmask16 k_char_code1_mask_lo = _mm_cmpgt_epi8_mask(mm_lo, mm_char_code1_cmp);
|
||||
__mmask16 k_char_code1_mask_hi = _mm_cmpgt_epi8_mask(mm_hi, mm_char_code1_cmp);
|
||||
|
||||
__m128i mm_char_code_sub_lo = _mm_mask_blend_epi8(k_char_code2_mask_lo, mm_char_code1_sub, mm_char_code2_sub);
|
||||
__m128i mm_char_code_sub_hi = _mm_mask_blend_epi8(k_char_code2_mask_hi, mm_char_code1_sub, mm_char_code2_sub);
|
||||
|
||||
mm_char_code_sub_lo = _mm_mask_blend_epi8(k_char_code1_mask_lo, mm_char_code0_sub, mm_char_code_sub_lo);
|
||||
mm_char_code_sub_hi = _mm_mask_blend_epi8(k_char_code1_mask_hi, mm_char_code0_sub, mm_char_code_sub_hi);
|
||||
#elif defined(BOOST_UUID_USE_AVX)
|
||||
// Unlike the legacy SSE4.1 pblendvb instruction, the VEX-coded vpblendvb is slow on Intel. Use a different approach:
|
||||
// - Each vpcmpgtb produces a mask, where 0 indicates false and -1 - true.
|
||||
// - mm_char_code1_mask_* always overlaps with the corresponding mm_char_code2_mask_*, which means adding them
|
||||
// produces a vector where 0 means none of the vpcmpgtb matched the value, -1 - where mm_char_code1_mask_* matched
|
||||
// and -2 - where mm_char_code2_mask_* matched.
|
||||
// - Shift that mask to the positive range by adding 2.
|
||||
// - Use it as a pattern for vpshufb to place one of the 3 lowest bytes in char_code_sub to the positions corresponding
|
||||
// to the matched characters. This will be the mm_char_code_sub_* subtrahends.
|
||||
__m128i mm_char_code2_mask_lo = _mm_cmpgt_epi8(mm_lo, mm_char_code2_cmp);
|
||||
__m128i mm_char_code2_mask_hi = _mm_cmpgt_epi8(mm_hi, mm_char_code2_cmp);
|
||||
|
||||
__m128i mm_char_code1_mask_lo = _mm_cmpgt_epi8(mm_lo, mm_char_code1_cmp);
|
||||
__m128i mm_char_code1_mask_hi = _mm_cmpgt_epi8(mm_hi, mm_char_code1_cmp);
|
||||
|
||||
__m128i mm_char_code_pattern_lo = _mm_add_epi8(mm_char_code1_mask_lo, mm_char_code2_mask_lo);
|
||||
__m128i mm_char_code_pattern_hi = _mm_add_epi8(mm_char_code1_mask_hi, mm_char_code2_mask_hi);
|
||||
|
||||
mm_char_code_pattern_lo = _mm_add_epi8(mm_char_code_pattern_lo, constants::mm_2);
|
||||
mm_char_code_pattern_hi = _mm_add_epi8(mm_char_code_pattern_hi, constants::mm_2);
|
||||
|
||||
const __m128i mm_char_code_sub = _mm_cvtsi32_si128(char_code_sub);
|
||||
__m128i mm_char_code_sub_lo = _mm_shuffle_epi8(mm_char_code_sub, mm_char_code_pattern_lo);
|
||||
__m128i mm_char_code_sub_hi = _mm_shuffle_epi8(mm_char_code_sub, mm_char_code_pattern_hi);
|
||||
#else
|
||||
__m128i mm_char_code2_mask_lo = _mm_cmpgt_epi8(mm_lo, mm_char_code2_cmp);
|
||||
__m128i mm_char_code2_mask_hi = _mm_cmpgt_epi8(mm_hi, mm_char_code2_cmp);
|
||||
|
||||
__m128i mm_char_code1_mask_lo = _mm_cmpgt_epi8(mm_lo, mm_char_code1_cmp);
|
||||
__m128i mm_char_code1_mask_hi = _mm_cmpgt_epi8(mm_hi, mm_char_code1_cmp);
|
||||
|
||||
__m128i mm_char_code_sub_lo = _mm_blendv_epi8(mm_char_code1_sub, mm_char_code2_sub, mm_char_code2_mask_lo);
|
||||
__m128i mm_char_code_sub_hi = _mm_blendv_epi8(mm_char_code1_sub, mm_char_code2_sub, mm_char_code2_mask_hi);
|
||||
|
||||
mm_char_code_sub_lo = _mm_blendv_epi8(mm_char_code0_sub, mm_char_code_sub_lo, mm_char_code1_mask_lo);
|
||||
mm_char_code_sub_hi = _mm_blendv_epi8(mm_char_code0_sub, mm_char_code_sub_hi, mm_char_code1_mask_hi);
|
||||
#endif
|
||||
|
||||
mm_lo = _mm_sub_epi8(mm_lo, mm_char_code_sub_lo);
|
||||
mm_hi = _mm_sub_epi8(mm_hi, mm_char_code_sub_hi);
|
||||
|
||||
// Check hexadecimal character validity. Proper hexadecimal characters always convert to values of 0-15 and any other characters convert
|
||||
// to values outside that range. Which means if the upper 4 bits of a resulting integer are non-zero then the corresponding character was invalid.
|
||||
if (BOOST_LIKELY(_mm_test_all_zeros(_mm_or_si128(mm_lo, mm_hi), constants::mm_F0)))
|
||||
{
|
||||
if (BOOST_LIKELY(ec == from_chars_error::none))
|
||||
{
|
||||
__m128i mm = _mm_or_si128(mm_lo, _mm_slli_epi32(mm_hi, 4));
|
||||
_mm_storeu_si128(reinterpret_cast< __m128i* >(data), mm);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Some of the hex digits are invalid
|
||||
const __m128i mm_0 = _mm_setzero_si128();
|
||||
__m128i mm_hi_bits_lo = _mm_and_si128(mm_lo, constants::mm_F0);
|
||||
__m128i mm_hi_bits_hi = _mm_and_si128(mm_hi, constants::mm_F0);
|
||||
mm_hi_bits_lo = _mm_cmpeq_epi8(mm_hi_bits_lo, mm_0);
|
||||
mm_hi_bits_hi = _mm_cmpeq_epi8(mm_hi_bits_hi, mm_0);
|
||||
|
||||
std::uint32_t digits_mask_lo = _mm_movemask_epi8(mm_hi_bits_lo);
|
||||
std::uint32_t digits_mask_hi = _mm_movemask_epi8(mm_hi_bits_hi);
|
||||
|
||||
unsigned int pos_lo = detail::countr_zero_nz(~digits_mask_lo) * 2u + 1u;
|
||||
unsigned int pos_hi = detail::countr_zero_nz(~digits_mask_hi) * 2u;
|
||||
unsigned int pos = pos_lo < pos_hi ? pos_lo : pos_hi;
|
||||
if (pos >= 8u)
|
||||
{
|
||||
unsigned int dash_count = (pos - 4u) / 4u;
|
||||
if (dash_count > 4u)
|
||||
dash_count = 4u;
|
||||
pos += dash_count;
|
||||
}
|
||||
|
||||
if (pos < end_pos)
|
||||
{
|
||||
end_pos = pos;
|
||||
ec = from_chars_error::hex_digit_expected;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template< typename Char >
|
||||
BOOST_FORCEINLINE from_chars_result< Char > from_chars_simd(const Char* begin, const Char* end, uuid& u) noexcept
|
||||
{
|
||||
static_assert(sizeof(Char) == 1u || sizeof(Char) == 2u || sizeof(Char) == 4u, "Boost.UUID: Unsupported output character type for from_chars");
|
||||
|
||||
using char_constants = uuids::detail::from_chars_simd_char_constants< Char >;
|
||||
|
||||
unsigned int end_pos = 36u;
|
||||
from_chars_error ec = from_chars_error::none;
|
||||
__m128i mm_chars1, mm_chars2, mm_chars3;
|
||||
if (BOOST_LIKELY((end - begin) >= 36))
|
||||
{
|
||||
mm_chars1 = from_chars_simd_load_traits< Char >::load_packed_16(begin);
|
||||
mm_chars2 = from_chars_simd_load_traits< Char >::load_packed_16(begin + 16);
|
||||
mm_chars3 = from_chars_simd_load_traits< Char >::load_packed_4(begin + 32);
|
||||
}
|
||||
else
|
||||
{
|
||||
end_pos = static_cast< unsigned int >(end - begin);
|
||||
ec = from_chars_error::unexpected_end_of_input;
|
||||
|
||||
const Char* p = begin;
|
||||
unsigned int n = static_cast< unsigned int >(end - begin);
|
||||
if (n >= 16u)
|
||||
{
|
||||
mm_chars1 = from_chars_simd_load_traits< Char >::load_packed_16(p);
|
||||
p += 16;
|
||||
n -= 16u;
|
||||
}
|
||||
else
|
||||
{
|
||||
mm_chars1 = from_chars_simd_load_traits< Char >::load_packed_n(p, n);
|
||||
p += n;
|
||||
n = 0u;
|
||||
}
|
||||
|
||||
if (n >= 16u)
|
||||
{
|
||||
mm_chars2 = from_chars_simd_load_traits< Char >::load_packed_16(p);
|
||||
p += 16;
|
||||
n -= 16u;
|
||||
}
|
||||
else
|
||||
{
|
||||
mm_chars2 = from_chars_simd_load_traits< Char >::load_packed_n(p, n);
|
||||
p += n;
|
||||
n = 0u;
|
||||
}
|
||||
|
||||
mm_chars3 = from_chars_simd_load_traits< Char >::load_packed_n(p, n);
|
||||
}
|
||||
|
||||
from_chars_simd_core
|
||||
(
|
||||
mm_chars1, mm_chars2, mm_chars3,
|
||||
char_constants::mm_expected_dashes,
|
||||
char_constants::mm_char_code1_cmp,
|
||||
char_constants::mm_char_code2_cmp,
|
||||
#if defined(BOOST_UUID_USE_AVX512_V1) || !defined(BOOST_UUID_USE_AVX)
|
||||
char_constants::mm_char_code0_sub,
|
||||
char_constants::mm_char_code1_sub,
|
||||
char_constants::mm_char_code2_sub,
|
||||
#else
|
||||
char_constants::char_code_sub,
|
||||
#endif
|
||||
u.data(), end_pos, ec
|
||||
);
|
||||
|
||||
return { begin + end_pos, ec };
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
} // namespace uuids
|
||||
} // namespace boost
|
||||
|
||||
#endif // defined(BOOST_UUID_USE_SSE41)
|
||||
|
||||
#endif // BOOST_UUID_DETAIL_FROM_CHARS_X86_HPP_INCLUDED
|
||||
@@ -11,7 +11,7 @@
|
||||
using namespace boost::uuids;
|
||||
|
||||
template<class Ch, std::size_t N>
|
||||
BOOST_CXX14_CONSTEXPR uuid uuid_from_string( Ch const (&str)[ N ] )
|
||||
BOOST_UUID_CXX14_CONSTEXPR_RT uuid uuid_from_string( Ch const (&str)[ N ] )
|
||||
{
|
||||
Ch const* first = str;
|
||||
Ch const* last = first + N - 1;
|
||||
@@ -22,7 +22,7 @@ BOOST_CXX14_CONSTEXPR uuid uuid_from_string( Ch const (&str)[ N ] )
|
||||
return u;
|
||||
}
|
||||
|
||||
#define TEST(str) { BOOST_CXX14_CONSTEXPR auto u = uuid_from_string(str); BOOST_TEST_EQ(u, expected); }
|
||||
#define TEST(str) { BOOST_UUID_CXX14_CONSTEXPR_RT auto u = uuid_from_string(str); BOOST_TEST_EQ(u, expected); }
|
||||
|
||||
int main()
|
||||
{
|
||||
|
||||
@@ -17,7 +17,7 @@ struct test_result
|
||||
};
|
||||
|
||||
template<class Ch, std::size_t N>
|
||||
BOOST_CXX14_CONSTEXPR test_result test( Ch const (&str)[ N ] )
|
||||
BOOST_UUID_CXX14_CONSTEXPR_RT test_result test( Ch const (&str)[ N ] )
|
||||
{
|
||||
Ch const* first = str;
|
||||
Ch const* last = first + N - 1;
|
||||
@@ -28,7 +28,7 @@ BOOST_CXX14_CONSTEXPR test_result test( Ch const (&str)[ N ] )
|
||||
return { r.ptr - first, r.ec };
|
||||
}
|
||||
|
||||
#define TEST(Str, Pos, Ec) { BOOST_CXX14_CONSTEXPR auto r = test(Str); BOOST_TEST_EQ(Pos, r.pos); BOOST_TEST_EQ(static_cast<int>(Ec), static_cast<int>(r.ec)); }
|
||||
#define TEST(Str, Pos, Ec) { BOOST_UUID_CXX14_CONSTEXPR_RT auto r = test(Str); BOOST_TEST_EQ(Pos, r.pos); BOOST_TEST_EQ(static_cast<int>(Ec), static_cast<int>(r.ec)); }
|
||||
|
||||
int main()
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user