mirror of
https://github.com/boostorg/uuid.git
synced 2026-01-19 04:42:16 +00:00
Merge pull request #190 from Lastique/feature/to_from_chars_sse2
Add SSE2 implementations of `to_chars` and `from_chars`
This commit is contained in:
66
.github/workflows/ci.yml
vendored
66
.github/workflows/ci.yml
vendored
@@ -92,6 +92,14 @@ jobs:
|
|||||||
os: ubuntu-latest
|
os: ubuntu-latest
|
||||||
install: g++-15-multilib
|
install: g++-15-multilib
|
||||||
address-model: 32,64
|
address-model: 32,64
|
||||||
|
- toolset: gcc-13
|
||||||
|
cxxstd: "11,14,17,20,2b"
|
||||||
|
instruction-set: core2
|
||||||
|
cpu-requirements: [ ssse3 ]
|
||||||
|
os: ubuntu-latest
|
||||||
|
container: ubuntu:24.04
|
||||||
|
install: g++-13-multilib
|
||||||
|
address-model: 32,64
|
||||||
- toolset: gcc-13
|
- toolset: gcc-13
|
||||||
cxxstd: "11,14,17,20,2b"
|
cxxstd: "11,14,17,20,2b"
|
||||||
instruction-set: nehalem
|
instruction-set: nehalem
|
||||||
@@ -100,6 +108,14 @@ jobs:
|
|||||||
container: ubuntu:24.04
|
container: ubuntu:24.04
|
||||||
install: g++-13-multilib
|
install: g++-13-multilib
|
||||||
address-model: 32,64
|
address-model: 32,64
|
||||||
|
- toolset: gcc-13
|
||||||
|
cxxstd: "11,14,17,20,2b"
|
||||||
|
instruction-set: sandy-bridge
|
||||||
|
cpu-requirements: [ avx ]
|
||||||
|
os: ubuntu-latest
|
||||||
|
container: ubuntu:24.04
|
||||||
|
install: g++-13-multilib
|
||||||
|
address-model: 32,64
|
||||||
- toolset: gcc-13
|
- toolset: gcc-13
|
||||||
cxxstd: "11,14,17,20,2b"
|
cxxstd: "11,14,17,20,2b"
|
||||||
instruction-set: haswell
|
instruction-set: haswell
|
||||||
@@ -124,6 +140,16 @@ jobs:
|
|||||||
container: ubuntu:24.04
|
container: ubuntu:24.04
|
||||||
install: g++-13-multilib
|
install: g++-13-multilib
|
||||||
address-model: 32,64
|
address-model: 32,64
|
||||||
|
# Experimental features
|
||||||
|
- toolset: gcc-13
|
||||||
|
cxxstd: "11,14,17,20,2b"
|
||||||
|
instruction-set: rocketlake
|
||||||
|
cpu-requirements: [ avx512f, avx512cd, avx512vl, avx512dq, avx512bw, avx512vbmi, avx512_vbmi2, avx512_bitalg, bmi1, bmi2 ]
|
||||||
|
defines: [ BOOST_UUID_TO_FROM_CHARS_X86_USE_ZMM, BOOST_UUID_FROM_CHARS_X86_USE_VPERMI2B ]
|
||||||
|
os: ubuntu-latest
|
||||||
|
container: ubuntu:24.04
|
||||||
|
install: g++-13-multilib
|
||||||
|
address-model: 32,64
|
||||||
- toolset: clang
|
- toolset: clang
|
||||||
compiler: clang++-3.9
|
compiler: clang++-3.9
|
||||||
cxxstd: "11,14"
|
cxxstd: "11,14"
|
||||||
@@ -238,6 +264,14 @@ jobs:
|
|||||||
container: ubuntu:25.10
|
container: ubuntu:25.10
|
||||||
os: ubuntu-latest
|
os: ubuntu-latest
|
||||||
install: clang-21
|
install: clang-21
|
||||||
|
- toolset: clang
|
||||||
|
compiler: clang++-17
|
||||||
|
cxxstd: "11,14,17,20,2b"
|
||||||
|
instruction-set: core2
|
||||||
|
cpu-requirements: [ ssse3 ]
|
||||||
|
container: ubuntu:24.04
|
||||||
|
os: ubuntu-latest
|
||||||
|
install: clang-17
|
||||||
- toolset: clang
|
- toolset: clang
|
||||||
compiler: clang++-17
|
compiler: clang++-17
|
||||||
cxxstd: "11,14,17,20,2b"
|
cxxstd: "11,14,17,20,2b"
|
||||||
@@ -246,6 +280,14 @@ jobs:
|
|||||||
container: ubuntu:24.04
|
container: ubuntu:24.04
|
||||||
os: ubuntu-latest
|
os: ubuntu-latest
|
||||||
install: clang-17
|
install: clang-17
|
||||||
|
- toolset: clang
|
||||||
|
compiler: clang++-17
|
||||||
|
cxxstd: "11,14,17,20,2b"
|
||||||
|
instruction-set: sandy-bridge
|
||||||
|
cpu-requirements: [ avx ]
|
||||||
|
container: ubuntu:24.04
|
||||||
|
os: ubuntu-latest
|
||||||
|
install: clang-17
|
||||||
- toolset: clang
|
- toolset: clang
|
||||||
compiler: clang++-17
|
compiler: clang++-17
|
||||||
cxxstd: "11,14,17,20,2b"
|
cxxstd: "11,14,17,20,2b"
|
||||||
@@ -270,6 +312,16 @@ jobs:
|
|||||||
container: ubuntu:24.04
|
container: ubuntu:24.04
|
||||||
os: ubuntu-latest
|
os: ubuntu-latest
|
||||||
install: clang-17
|
install: clang-17
|
||||||
|
# Experimental features
|
||||||
|
- toolset: clang
|
||||||
|
compiler: clang++-17
|
||||||
|
cxxstd: "11,14,17,20,2b"
|
||||||
|
instruction-set: rocketlake
|
||||||
|
cpu-requirements: [ avx512f, avx512cd, avx512vl, avx512dq, avx512bw, avx512vbmi, avx512_vbmi2, avx512_bitalg, bmi1, bmi2 ]
|
||||||
|
defines: [ BOOST_UUID_TO_FROM_CHARS_X86_USE_ZMM, BOOST_UUID_FROM_CHARS_X86_USE_VPERMI2B ]
|
||||||
|
container: ubuntu:24.04
|
||||||
|
os: ubuntu-latest
|
||||||
|
install: clang-17
|
||||||
- toolset: clang
|
- toolset: clang
|
||||||
os: macos-14
|
os: macos-14
|
||||||
cxxstd: "11,14,17,20,2b"
|
cxxstd: "11,14,17,20,2b"
|
||||||
@@ -346,6 +398,7 @@ jobs:
|
|||||||
cd ../boost-root
|
cd ../boost-root
|
||||||
ADDRMD=${{matrix.address-model}}
|
ADDRMD=${{matrix.address-model}}
|
||||||
INSTRUCTION_SET=${{matrix.instruction-set}}
|
INSTRUCTION_SET=${{matrix.instruction-set}}
|
||||||
|
b2_args=(-j2 libs/$LIBRARY/test toolset=${{matrix.toolset}} cxxstd=${{matrix.cxxstd}} ${ADDRMD:+address-model=$ADDRMD} ${INSTRUCTION_SET:+instruction-set=$INSTRUCTION_SET} variant=debug,release)
|
||||||
if [ -n "${{matrix.cpu-requirements}}" ]
|
if [ -n "${{matrix.cpu-requirements}}" ]
|
||||||
then
|
then
|
||||||
cpu_flags="$(lscpu | grep -F "Flags:" | sed "s/^Flags:\\s*//")";
|
cpu_flags="$(lscpu | grep -F "Flags:" | sed "s/^Flags:\\s*//")";
|
||||||
@@ -355,12 +408,19 @@ jobs:
|
|||||||
if ! [[ "$cpu_flags" =~ $re ]]
|
if ! [[ "$cpu_flags" =~ $re ]]
|
||||||
then
|
then
|
||||||
echo "CPU lacks required feature: $requirement"
|
echo "CPU lacks required feature: $requirement"
|
||||||
echo "Skipping testing"
|
echo "Skipping running tests"
|
||||||
exit 0
|
export BOOST_UUID_SKIP_RUNNING_TESTS=1
|
||||||
fi
|
fi
|
||||||
done
|
done
|
||||||
fi
|
fi
|
||||||
./b2 -j2 libs/$LIBRARY/test toolset=${{matrix.toolset}} cxxstd=${{matrix.cxxstd}} ${ADDRMD:+address-model=$ADDRMD} ${INSTRUCTION_SET:+instruction-set=$INSTRUCTION_SET} variant=debug,release
|
if [ -n "${{matrix.defines}}" ]
|
||||||
|
then
|
||||||
|
for define in ${{join(matrix.defines, ' ')}}
|
||||||
|
do
|
||||||
|
b2_args+=("define=$define")
|
||||||
|
done
|
||||||
|
fi
|
||||||
|
./b2 "${b2_args[@]}"
|
||||||
|
|
||||||
windows:
|
windows:
|
||||||
strategy:
|
strategy:
|
||||||
|
|||||||
@@ -11,7 +11,7 @@
|
|||||||
#include <boost/uuid/detail/config.hpp>
|
#include <boost/uuid/detail/config.hpp>
|
||||||
#include <boost/uuid/detail/is_constant_evaluated.hpp>
|
#include <boost/uuid/detail/is_constant_evaluated.hpp>
|
||||||
|
|
||||||
#if defined(BOOST_UUID_USE_SSE41)
|
#if defined(BOOST_UUID_USE_SSE2)
|
||||||
# include <boost/uuid/detail/from_chars_x86.hpp>
|
# include <boost/uuid/detail/from_chars_x86.hpp>
|
||||||
|
|
||||||
#elif defined(BOOST_UUID_REPORT_IMPLEMENTATION)
|
#elif defined(BOOST_UUID_REPORT_IMPLEMENTATION)
|
||||||
@@ -27,7 +27,7 @@ template<class Ch>
|
|||||||
BOOST_UUID_CXX14_CONSTEXPR_RT inline
|
BOOST_UUID_CXX14_CONSTEXPR_RT inline
|
||||||
from_chars_result<Ch> from_chars( Ch const* first, Ch const* last, uuid& u ) noexcept
|
from_chars_result<Ch> from_chars( Ch const* first, Ch const* last, uuid& u ) noexcept
|
||||||
{
|
{
|
||||||
#if defined(BOOST_UUID_USE_SSE41)
|
#if defined(BOOST_UUID_USE_SSE2)
|
||||||
if( detail::is_constant_evaluated_rt() )
|
if( detail::is_constant_evaluated_rt() )
|
||||||
{
|
{
|
||||||
return detail::from_chars_generic( first, last, u );
|
return detail::from_chars_generic( first, last, u );
|
||||||
|
|||||||
@@ -7,8 +7,9 @@
|
|||||||
|
|
||||||
#include <boost/uuid/detail/config.hpp>
|
#include <boost/uuid/detail/config.hpp>
|
||||||
|
|
||||||
#if defined(BOOST_UUID_USE_SSE41)
|
#if defined(BOOST_UUID_USE_SSE2)
|
||||||
|
|
||||||
|
#include <cstddef>
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <boost/uuid/uuid.hpp>
|
#include <boost/uuid/uuid.hpp>
|
||||||
@@ -28,22 +29,48 @@ BOOST_PRAGMA_MESSAGE( "Using from_chars_x86.hpp, AVX512v1" )
|
|||||||
#elif defined(BOOST_UUID_USE_AVX)
|
#elif defined(BOOST_UUID_USE_AVX)
|
||||||
BOOST_PRAGMA_MESSAGE( "Using from_chars_x86.hpp, AVX" )
|
BOOST_PRAGMA_MESSAGE( "Using from_chars_x86.hpp, AVX" )
|
||||||
|
|
||||||
#else
|
#elif defined(BOOST_UUID_USE_SSE41)
|
||||||
BOOST_PRAGMA_MESSAGE( "Using from_chars_x86.hpp, SSE4.1" )
|
BOOST_PRAGMA_MESSAGE( "Using from_chars_x86.hpp, SSE4.1" )
|
||||||
|
|
||||||
|
#elif defined(BOOST_UUID_USE_SSSE3)
|
||||||
|
BOOST_PRAGMA_MESSAGE( "Using from_chars_x86.hpp, SSSE3" )
|
||||||
|
|
||||||
|
#else
|
||||||
|
BOOST_PRAGMA_MESSAGE( "Using from_chars_x86.hpp, SSE2" )
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
#endif // #if defined(BOOST_UUID_REPORT_IMPLEMENTATION)
|
#endif // #if defined(BOOST_UUID_REPORT_IMPLEMENTATION)
|
||||||
|
|
||||||
#if defined(BOOST_UUID_USE_AVX)
|
#if defined(BOOST_UUID_USE_AVX)
|
||||||
#include <immintrin.h>
|
#include <immintrin.h>
|
||||||
#else
|
#elif defined(BOOST_UUID_USE_SSE41)
|
||||||
#include <smmintrin.h>
|
#include <smmintrin.h>
|
||||||
|
#elif defined(BOOST_UUID_USE_SSSE3)
|
||||||
|
#include <tmmintrin.h>
|
||||||
|
#else
|
||||||
|
#include <emmintrin.h>
|
||||||
#endif
|
#endif
|
||||||
#if defined(_MSC_VER) && !defined(__clang__)
|
#if defined(_MSC_VER) && !defined(__clang__)
|
||||||
#include <intrin.h>
|
#include <intrin.h>
|
||||||
#pragma intrinsic(_BitScanForward)
|
#pragma intrinsic(_BitScanForward)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
// Unlike the legacy SSE4.1 pblendvb instruction, the VEX-coded vpblendvb is slow on Intel Lion Cove, Skymont and older.
|
||||||
|
// Newer microarchitectures are unknown at the time of this writing. Also, on Intel Haswell/Broadwell, even the SSE4.1
|
||||||
|
// pblendvb is slow.
|
||||||
|
#if !defined(BOOST_UUID_FROM_CHARS_X86_SLOW_PBLENDVB) && \
|
||||||
|
(defined(__tune_haswell__) || defined(__tune_broadwell__) || defined(BOOST_UUID_USE_AVX))
|
||||||
|
#define BOOST_UUID_FROM_CHARS_X86_SLOW_PBLENDVB
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if !defined(BOOST_UUID_FROM_CHARS_X86_USE_PBLENDVB) && defined(BOOST_UUID_USE_SSE41) && !defined(BOOST_UUID_FROM_CHARS_X86_SLOW_PBLENDVB)
|
||||||
|
#define BOOST_UUID_FROM_CHARS_X86_USE_PBLENDVB
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(BOOST_UUID_USE_AVX512_V1) || (defined(BOOST_UUID_USE_SSE41) && defined(BOOST_UUID_FROM_CHARS_X86_USE_PBLENDVB)) || !defined(BOOST_UUID_USE_SSSE3)
|
||||||
|
#define BOOST_UUID_DETAIL_FROM_CHARS_X86_USE_BLENDS
|
||||||
|
#endif
|
||||||
|
|
||||||
namespace boost {
|
namespace boost {
|
||||||
namespace uuids {
|
namespace uuids {
|
||||||
namespace detail {
|
namespace detail {
|
||||||
@@ -109,11 +136,11 @@ struct from_chars_simd_char_constants
|
|||||||
static const simd_vector128< std::uint8_t > mm_char_code2_cmp;
|
static const simd_vector128< std::uint8_t > mm_char_code2_cmp;
|
||||||
static const simd_vector128< std::uint8_t > mm_char_code1_cmp;
|
static const simd_vector128< std::uint8_t > mm_char_code1_cmp;
|
||||||
|
|
||||||
#if defined(BOOST_UUID_USE_AVX512_V1) || !defined(BOOST_UUID_USE_AVX)
|
#if defined(BOOST_UUID_DETAIL_FROM_CHARS_X86_USE_BLENDS)
|
||||||
static const simd_vector128< std::uint8_t > mm_char_code2_sub;
|
static const simd_vector128< std::uint8_t > mm_char_code2_sub;
|
||||||
static const simd_vector128< std::uint8_t > mm_char_code1_sub;
|
static const simd_vector128< std::uint8_t > mm_char_code1_sub;
|
||||||
static const simd_vector128< std::uint8_t > mm_char_code0_sub;
|
static const simd_vector128< std::uint8_t > mm_char_code0_sub;
|
||||||
#endif // defined(BOOST_UUID_USE_AVX512_V1) || !defined(BOOST_UUID_USE_AVX)
|
#endif // defined(BOOST_UUID_DETAIL_FROM_CHARS_X86_USE_BLENDS)
|
||||||
};
|
};
|
||||||
|
|
||||||
template< typename Char, bool IsCharASCIICompatible, bool IsWCharASCIICompatible >
|
template< typename Char, bool IsCharASCIICompatible, bool IsWCharASCIICompatible >
|
||||||
@@ -146,7 +173,7 @@ const simd_vector128< std::uint8_t > from_chars_simd_char_constants< Char, IsCha
|
|||||||
static_cast< std::uint8_t >(char_code1 - 1u), static_cast< std::uint8_t >(char_code1 - 1u)
|
static_cast< std::uint8_t >(char_code1 - 1u), static_cast< std::uint8_t >(char_code1 - 1u)
|
||||||
}};
|
}};
|
||||||
|
|
||||||
#if defined(BOOST_UUID_USE_AVX512_V1) || !defined(BOOST_UUID_USE_AVX)
|
#if defined(BOOST_UUID_DETAIL_FROM_CHARS_X86_USE_BLENDS)
|
||||||
|
|
||||||
template< typename Char, bool IsCharASCIICompatible, bool IsWCharASCIICompatible >
|
template< typename Char, bool IsCharASCIICompatible, bool IsWCharASCIICompatible >
|
||||||
const simd_vector128< std::uint8_t > from_chars_simd_char_constants< Char, IsCharASCIICompatible, IsWCharASCIICompatible >::mm_char_code2_sub =
|
const simd_vector128< std::uint8_t > from_chars_simd_char_constants< Char, IsCharASCIICompatible, IsWCharASCIICompatible >::mm_char_code2_sub =
|
||||||
@@ -169,7 +196,7 @@ const simd_vector128< std::uint8_t > from_chars_simd_char_constants< Char, IsCha
|
|||||||
char_code0_sub, char_code0_sub, char_code0_sub, char_code0_sub, char_code0_sub, char_code0_sub, char_code0_sub, char_code0_sub
|
char_code0_sub, char_code0_sub, char_code0_sub, char_code0_sub, char_code0_sub, char_code0_sub, char_code0_sub, char_code0_sub
|
||||||
}};
|
}};
|
||||||
|
|
||||||
#endif // defined(BOOST_UUID_USE_AVX512_V1) || !defined(BOOST_UUID_USE_AVX)
|
#endif // defined(BOOST_UUID_DETAIL_FROM_CHARS_X86_USE_BLENDS)
|
||||||
|
|
||||||
template< bool IsWCharASCIICompatible >
|
template< bool IsWCharASCIICompatible >
|
||||||
struct from_chars_simd_char_constants< char, false, IsWCharASCIICompatible >
|
struct from_chars_simd_char_constants< char, false, IsWCharASCIICompatible >
|
||||||
@@ -224,11 +251,11 @@ struct from_chars_simd_char_constants< char, false, IsWCharASCIICompatible >
|
|||||||
static const simd_vector128< std::uint8_t > mm_char_code2_cmp;
|
static const simd_vector128< std::uint8_t > mm_char_code2_cmp;
|
||||||
static const simd_vector128< std::uint8_t > mm_char_code1_cmp;
|
static const simd_vector128< std::uint8_t > mm_char_code1_cmp;
|
||||||
|
|
||||||
#if defined(BOOST_UUID_USE_AVX512_V1) || !defined(BOOST_UUID_USE_AVX)
|
#if defined(BOOST_UUID_DETAIL_FROM_CHARS_X86_USE_BLENDS)
|
||||||
static const simd_vector128< std::uint8_t > mm_char_code2_sub;
|
static const simd_vector128< std::uint8_t > mm_char_code2_sub;
|
||||||
static const simd_vector128< std::uint8_t > mm_char_code1_sub;
|
static const simd_vector128< std::uint8_t > mm_char_code1_sub;
|
||||||
static const simd_vector128< std::uint8_t > mm_char_code0_sub;
|
static const simd_vector128< std::uint8_t > mm_char_code0_sub;
|
||||||
#endif // defined(BOOST_UUID_USE_AVX512_V1) || !defined(BOOST_UUID_USE_AVX)
|
#endif // defined(BOOST_UUID_DETAIL_FROM_CHARS_X86_USE_BLENDS)
|
||||||
};
|
};
|
||||||
|
|
||||||
template< bool IsWCharASCIICompatible >
|
template< bool IsWCharASCIICompatible >
|
||||||
@@ -264,7 +291,7 @@ const simd_vector128< std::uint8_t > from_chars_simd_char_constants< char, false
|
|||||||
static_cast< std::uint8_t >(char_code1 - 1u), static_cast< std::uint8_t >(char_code1 - 1u)
|
static_cast< std::uint8_t >(char_code1 - 1u), static_cast< std::uint8_t >(char_code1 - 1u)
|
||||||
}};
|
}};
|
||||||
|
|
||||||
#if defined(BOOST_UUID_USE_AVX512_V1) || !defined(BOOST_UUID_USE_AVX)
|
#if defined(BOOST_UUID_DETAIL_FROM_CHARS_X86_USE_BLENDS)
|
||||||
|
|
||||||
template< bool IsWCharASCIICompatible >
|
template< bool IsWCharASCIICompatible >
|
||||||
const simd_vector128< std::uint8_t > from_chars_simd_char_constants< char, false, IsWCharASCIICompatible >::mm_char_code2_sub =
|
const simd_vector128< std::uint8_t > from_chars_simd_char_constants< char, false, IsWCharASCIICompatible >::mm_char_code2_sub =
|
||||||
@@ -287,7 +314,7 @@ const simd_vector128< std::uint8_t > from_chars_simd_char_constants< char, false
|
|||||||
char_code0_sub, char_code0_sub, char_code0_sub, char_code0_sub, char_code0_sub, char_code0_sub, char_code0_sub, char_code0_sub
|
char_code0_sub, char_code0_sub, char_code0_sub, char_code0_sub, char_code0_sub, char_code0_sub, char_code0_sub, char_code0_sub
|
||||||
}};
|
}};
|
||||||
|
|
||||||
#endif // defined(BOOST_UUID_USE_AVX512_V1) || !defined(BOOST_UUID_USE_AVX)
|
#endif // defined(BOOST_UUID_DETAIL_FROM_CHARS_X86_USE_BLENDS)
|
||||||
|
|
||||||
template< bool IsCharASCIICompatible >
|
template< bool IsCharASCIICompatible >
|
||||||
struct from_chars_simd_char_constants< wchar_t, IsCharASCIICompatible, false >
|
struct from_chars_simd_char_constants< wchar_t, IsCharASCIICompatible, false >
|
||||||
@@ -347,11 +374,11 @@ struct from_chars_simd_char_constants< wchar_t, IsCharASCIICompatible, false >
|
|||||||
static const simd_vector128< std::uint8_t > mm_char_code2_cmp;
|
static const simd_vector128< std::uint8_t > mm_char_code2_cmp;
|
||||||
static const simd_vector128< std::uint8_t > mm_char_code1_cmp;
|
static const simd_vector128< std::uint8_t > mm_char_code1_cmp;
|
||||||
|
|
||||||
#if defined(BOOST_UUID_USE_AVX512_V1) || !defined(BOOST_UUID_USE_AVX)
|
#if defined(BOOST_UUID_DETAIL_FROM_CHARS_X86_USE_BLENDS)
|
||||||
static const simd_vector128< std::uint8_t > mm_char_code2_sub;
|
static const simd_vector128< std::uint8_t > mm_char_code2_sub;
|
||||||
static const simd_vector128< std::uint8_t > mm_char_code1_sub;
|
static const simd_vector128< std::uint8_t > mm_char_code1_sub;
|
||||||
static const simd_vector128< std::uint8_t > mm_char_code0_sub;
|
static const simd_vector128< std::uint8_t > mm_char_code0_sub;
|
||||||
#endif // defined(BOOST_UUID_USE_AVX512_V1) || !defined(BOOST_UUID_USE_AVX)
|
#endif // defined(BOOST_UUID_DETAIL_FROM_CHARS_X86_USE_BLENDS)
|
||||||
};
|
};
|
||||||
|
|
||||||
template< bool IsCharASCIICompatible >
|
template< bool IsCharASCIICompatible >
|
||||||
@@ -387,7 +414,7 @@ const simd_vector128< std::uint8_t > from_chars_simd_char_constants< wchar_t, Is
|
|||||||
static_cast< std::uint8_t >(char_code1 - 1u), static_cast< std::uint8_t >(char_code1 - 1u)
|
static_cast< std::uint8_t >(char_code1 - 1u), static_cast< std::uint8_t >(char_code1 - 1u)
|
||||||
}};
|
}};
|
||||||
|
|
||||||
#if defined(BOOST_UUID_USE_AVX512_V1) || !defined(BOOST_UUID_USE_AVX)
|
#if defined(BOOST_UUID_DETAIL_FROM_CHARS_X86_USE_BLENDS)
|
||||||
|
|
||||||
template< bool IsCharASCIICompatible >
|
template< bool IsCharASCIICompatible >
|
||||||
const simd_vector128< std::uint8_t > from_chars_simd_char_constants< wchar_t, IsCharASCIICompatible, false >::mm_char_code2_sub =
|
const simd_vector128< std::uint8_t > from_chars_simd_char_constants< wchar_t, IsCharASCIICompatible, false >::mm_char_code2_sub =
|
||||||
@@ -410,7 +437,7 @@ const simd_vector128< std::uint8_t > from_chars_simd_char_constants< wchar_t, Is
|
|||||||
char_code0_sub, char_code0_sub, char_code0_sub, char_code0_sub, char_code0_sub, char_code0_sub, char_code0_sub, char_code0_sub
|
char_code0_sub, char_code0_sub, char_code0_sub, char_code0_sub, char_code0_sub, char_code0_sub, char_code0_sub, char_code0_sub
|
||||||
}};
|
}};
|
||||||
|
|
||||||
#endif // defined(BOOST_UUID_USE_AVX512_V1) || !defined(BOOST_UUID_USE_AVX)
|
#endif // defined(BOOST_UUID_DETAIL_FROM_CHARS_X86_USE_BLENDS)
|
||||||
|
|
||||||
|
|
||||||
template< typename >
|
template< typename >
|
||||||
@@ -425,12 +452,18 @@ struct from_chars_simd_constants
|
|||||||
static const simd_vector128< std::uint8_t > mm_split_half_bytes_pattern1;
|
static const simd_vector128< std::uint8_t > mm_split_half_bytes_pattern1;
|
||||||
static const simd_vector128< std::uint8_t > mm_split_half_bytes_pattern2;
|
static const simd_vector128< std::uint8_t > mm_split_half_bytes_pattern2;
|
||||||
static const simd_vector128< std::uint8_t > mm_split_half_bytes_pattern3;
|
static const simd_vector128< std::uint8_t > mm_split_half_bytes_pattern3;
|
||||||
static const simd_vector128< std::uint8_t > mm_split_half_bytes_blend_mask;
|
static const simd_vector128< std::uint8_t > mm_split_half_bytes_blend_mask1;
|
||||||
|
#if !defined(BOOST_UUID_USE_SSE41)
|
||||||
|
static const simd_vector128< std::uint8_t > mm_split_half_bytes_blend_mask2;
|
||||||
|
#endif
|
||||||
|
#if !defined(BOOST_UUID_USE_SSSE3)
|
||||||
|
static const simd_vector128< std::uint8_t > mm_split_half_byte_chars_mask;
|
||||||
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static const simd_vector128< std::uint8_t > mm_F0;
|
static const simd_vector128< std::uint8_t > mm_F0;
|
||||||
|
|
||||||
#if defined(BOOST_UUID_USE_AVX) && !defined(BOOST_UUID_USE_AVX512_V1)
|
#if !defined(BOOST_UUID_DETAIL_FROM_CHARS_X86_USE_BLENDS)
|
||||||
static const simd_vector128< std::uint8_t > mm_2;
|
static const simd_vector128< std::uint8_t > mm_2;
|
||||||
#endif
|
#endif
|
||||||
};
|
};
|
||||||
@@ -456,13 +489,23 @@ template< typename T >
|
|||||||
const simd_vector128< std::uint8_t > from_chars_simd_constants< T >::mm_split_half_bytes_pattern3 =
|
const simd_vector128< std::uint8_t > from_chars_simd_constants< T >::mm_split_half_bytes_pattern3 =
|
||||||
{{ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x01, 0x03, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x00, 0x02 }};
|
{{ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x01, 0x03, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x00, 0x02 }};
|
||||||
template< typename T >
|
template< typename T >
|
||||||
const simd_vector128< std::uint8_t > from_chars_simd_constants< T >::mm_split_half_bytes_blend_mask =
|
const simd_vector128< std::uint8_t > from_chars_simd_constants< T >::mm_split_half_bytes_blend_mask1 =
|
||||||
{{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF }};
|
{{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF }};
|
||||||
|
#if !defined(BOOST_UUID_USE_SSE41)
|
||||||
|
template< typename T >
|
||||||
|
const simd_vector128< std::uint8_t > from_chars_simd_constants< T >::mm_split_half_bytes_blend_mask2 =
|
||||||
|
{{ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00 }};
|
||||||
|
#endif
|
||||||
|
#if !defined(BOOST_UUID_USE_SSSE3)
|
||||||
|
template< typename T >
|
||||||
|
const simd_vector128< std::uint8_t > from_chars_simd_constants< T >::mm_split_half_byte_chars_mask =
|
||||||
|
{{ 0xFF, 0x00, 0xFF, 0x00, 0xFF, 0x00, 0xFF, 0x00, 0xFF, 0x00, 0xFF, 0x00, 0xFF, 0x00, 0xFF, 0x00 }};
|
||||||
|
#endif
|
||||||
#endif
|
#endif
|
||||||
template< typename T >
|
template< typename T >
|
||||||
const simd_vector128< std::uint8_t > from_chars_simd_constants< T >::mm_F0 =
|
const simd_vector128< std::uint8_t > from_chars_simd_constants< T >::mm_F0 =
|
||||||
{{ 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0 }};
|
{{ 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0 }};
|
||||||
#if defined(BOOST_UUID_USE_AVX) && !defined(BOOST_UUID_USE_AVX512_V1)
|
#if !defined(BOOST_UUID_DETAIL_FROM_CHARS_X86_USE_BLENDS)
|
||||||
template< typename T >
|
template< typename T >
|
||||||
const simd_vector128< std::uint8_t > from_chars_simd_constants< T >::mm_2 =
|
const simd_vector128< std::uint8_t > from_chars_simd_constants< T >::mm_2 =
|
||||||
{{ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02 }};
|
{{ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02 }};
|
||||||
@@ -478,7 +521,7 @@ const simd_vector128< std::uint8_t > from_chars_simd_constants< T >::mm_2 =
|
|||||||
#pragma GCC diagnostic ignored "-Warray-bounds"
|
#pragma GCC diagnostic ignored "-Warray-bounds"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
template< typename Char, unsigned int Size = sizeof(Char) >
|
template< typename Char, std::size_t Size = sizeof(Char) >
|
||||||
struct from_chars_simd_load_traits;
|
struct from_chars_simd_load_traits;
|
||||||
|
|
||||||
template< typename Char >
|
template< typename Char >
|
||||||
@@ -593,6 +636,38 @@ struct from_chars_simd_load_traits< Char, 2u >
|
|||||||
template< typename Char >
|
template< typename Char >
|
||||||
struct from_chars_simd_load_traits< Char, 4u >
|
struct from_chars_simd_load_traits< Char, 4u >
|
||||||
{
|
{
|
||||||
|
#if !defined(BOOST_UUID_USE_SSE41) && defined(BOOST_UUID_USE_SSSE3)
|
||||||
|
static const simd_vector128< std::uint8_t > mm_deinterleave_epi16_pattern;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
static BOOST_FORCEINLINE __m128i mm_packus_epi32(__m128i mm1, __m128i mm2) noexcept
|
||||||
|
{
|
||||||
|
#if defined(BOOST_UUID_USE_SSE41)
|
||||||
|
return _mm_packus_epi32(mm1, mm2);
|
||||||
|
#else // defined(BOOST_UUID_USE_SSE41)
|
||||||
|
#if defined(BOOST_UUID_USE_SSSE3)
|
||||||
|
mm1 = _mm_shuffle_epi8(mm1, mm_deinterleave_epi16_pattern);
|
||||||
|
mm2 = _mm_shuffle_epi8(mm2, mm_deinterleave_epi16_pattern);
|
||||||
|
|
||||||
|
__m128i mm_lo = _mm_unpacklo_epi64(mm1, mm2);
|
||||||
|
__m128i mm_hi = _mm_unpackhi_epi64(mm1, mm2);
|
||||||
|
#else // defined(BOOST_UUID_USE_SSSE3)
|
||||||
|
mm1 = _mm_shufflelo_epi16(mm1, _MM_SHUFFLE(3, 1, 2, 0));
|
||||||
|
mm2 = _mm_shufflelo_epi16(mm2, _MM_SHUFFLE(3, 1, 2, 0));
|
||||||
|
mm1 = _mm_shufflehi_epi16(mm1, _MM_SHUFFLE(3, 1, 2, 0));
|
||||||
|
mm2 = _mm_shufflehi_epi16(mm2, _MM_SHUFFLE(3, 1, 2, 0));
|
||||||
|
|
||||||
|
__m128i mm_lo = _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(mm1), _mm_castsi128_ps(mm2), _MM_SHUFFLE(2, 0, 2, 0)));
|
||||||
|
__m128i mm_hi = _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(mm1), _mm_castsi128_ps(mm2), _MM_SHUFFLE(3, 1, 3, 1)));
|
||||||
|
#endif // defined(BOOST_UUID_USE_SSSE3)
|
||||||
|
const __m128i mm_0 = _mm_setzero_si128();
|
||||||
|
const __m128i mm_FF = _mm_cmpeq_epi32(mm_0, mm_0);
|
||||||
|
|
||||||
|
__m128i mm_sat = _mm_xor_si128(_mm_cmpeq_epi16(mm_hi, mm_0), mm_FF);
|
||||||
|
return _mm_or_si128(mm_lo, mm_sat);
|
||||||
|
#endif // defined(BOOST_UUID_USE_SSE41)
|
||||||
|
}
|
||||||
|
|
||||||
static BOOST_FORCEINLINE __m128i load_packed_16(const Char* p) noexcept
|
static BOOST_FORCEINLINE __m128i load_packed_16(const Char* p) noexcept
|
||||||
{
|
{
|
||||||
#if defined(BOOST_UUID_USE_AVX512_V1)
|
#if defined(BOOST_UUID_USE_AVX512_V1)
|
||||||
@@ -605,8 +680,8 @@ struct from_chars_simd_load_traits< Char, 4u >
|
|||||||
return _mm_unpacklo_epi64(mm1, mm2);
|
return _mm_unpacklo_epi64(mm1, mm2);
|
||||||
#endif // defined(BOOST_UUID_TO_FROM_CHARS_X86_USE_ZMM)
|
#endif // defined(BOOST_UUID_TO_FROM_CHARS_X86_USE_ZMM)
|
||||||
#else
|
#else
|
||||||
__m128i mm1 = _mm_packus_epi32(_mm_loadu_si128(reinterpret_cast< const __m128i* >(p)), _mm_loadu_si128(reinterpret_cast< const __m128i* >(p + 4)));
|
__m128i mm1 = mm_packus_epi32(_mm_loadu_si128(reinterpret_cast< const __m128i* >(p)), _mm_loadu_si128(reinterpret_cast< const __m128i* >(p + 4)));
|
||||||
__m128i mm2 = _mm_packus_epi32(_mm_loadu_si128(reinterpret_cast< const __m128i* >(p + 8)), _mm_loadu_si128(reinterpret_cast< const __m128i* >(p + 12)));
|
__m128i mm2 = mm_packus_epi32(_mm_loadu_si128(reinterpret_cast< const __m128i* >(p + 8)), _mm_loadu_si128(reinterpret_cast< const __m128i* >(p + 12)));
|
||||||
return _mm_packus_epi16(mm1, mm2);
|
return _mm_packus_epi16(mm1, mm2);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
@@ -618,7 +693,7 @@ struct from_chars_simd_load_traits< Char, 4u >
|
|||||||
#else
|
#else
|
||||||
__m128i mm1 = _mm_loadu_si128(reinterpret_cast< const __m128i* >(p));
|
__m128i mm1 = _mm_loadu_si128(reinterpret_cast< const __m128i* >(p));
|
||||||
__m128i mm2 = _mm_setzero_si128();
|
__m128i mm2 = _mm_setzero_si128();
|
||||||
return _mm_packus_epi16(_mm_packus_epi32(mm1, mm2), mm2);
|
return _mm_packus_epi16(mm_packus_epi32(mm1, mm2), mm2);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -667,13 +742,19 @@ struct from_chars_simd_load_traits< Char, 4u >
|
|||||||
mm_chars1 = _mm_loadu_si128(reinterpret_cast< const __m128i* >(p));
|
mm_chars1 = _mm_loadu_si128(reinterpret_cast< const __m128i* >(p));
|
||||||
mm_chars2 = _mm_loadu_si128(reinterpret_cast< const __m128i* >(p + 4));
|
mm_chars2 = _mm_loadu_si128(reinterpret_cast< const __m128i* >(p + 4));
|
||||||
}
|
}
|
||||||
mm_chars1 = _mm_packus_epi32(mm_chars1, mm_chars2);
|
mm_chars1 = mm_packus_epi32(mm_chars1, mm_chars2);
|
||||||
mm_chars3 = _mm_packus_epi32(mm_chars3, mm_chars4);
|
mm_chars3 = mm_packus_epi32(mm_chars3, mm_chars4);
|
||||||
return _mm_packus_epi16(mm_chars1, mm_chars3);
|
return _mm_packus_epi16(mm_chars1, mm_chars3);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#if !defined(BOOST_UUID_USE_SSE41) && defined(BOOST_UUID_USE_SSSE3)
|
||||||
|
template< typename Char >
|
||||||
|
const simd_vector128< std::uint8_t > from_chars_simd_load_traits< Char, 4u >::mm_deinterleave_epi16_pattern =
|
||||||
|
{{ 0x00, 0x01, 0x04, 0x05, 0x08, 0x09, 0x0C, 0x0D, 0x02, 0x03, 0x06, 0x07, 0x0A, 0x0B, 0x0E, 0x0F }};
|
||||||
|
#endif
|
||||||
|
|
||||||
#if defined(BOOST_GCC) && (BOOST_GCC >= 40600)
|
#if defined(BOOST_GCC) && (BOOST_GCC >= 40600)
|
||||||
#pragma GCC diagnostic pop
|
#pragma GCC diagnostic pop
|
||||||
#endif
|
#endif
|
||||||
@@ -689,7 +770,7 @@ BOOST_FORCEINLINE void from_chars_simd_core
|
|||||||
__m128i const& mm_expected_dashes,
|
__m128i const& mm_expected_dashes,
|
||||||
__m128i const& mm_char_code1_cmp,
|
__m128i const& mm_char_code1_cmp,
|
||||||
__m128i const& mm_char_code2_cmp,
|
__m128i const& mm_char_code2_cmp,
|
||||||
#if defined(BOOST_UUID_USE_AVX512_V1) || !defined(BOOST_UUID_USE_AVX)
|
#if defined(BOOST_UUID_DETAIL_FROM_CHARS_X86_USE_BLENDS)
|
||||||
__m128i const& mm_char_code0_sub,
|
__m128i const& mm_char_code0_sub,
|
||||||
__m128i const& mm_char_code1_sub,
|
__m128i const& mm_char_code1_sub,
|
||||||
__m128i const& mm_char_code2_sub,
|
__m128i const& mm_char_code2_sub,
|
||||||
@@ -705,15 +786,24 @@ BOOST_FORCEINLINE void from_chars_simd_core
|
|||||||
// |01234567-89ab-cd|ef-0123-456789ab|cdefXXXXXXXXXXXX|
|
// |01234567-89ab-cd|ef-0123-456789ab|cdefXXXXXXXXXXXX|
|
||||||
//
|
//
|
||||||
// Check if dashes are in the expected positions
|
// Check if dashes are in the expected positions
|
||||||
|
//
|
||||||
|
// mm_middle
|
||||||
|
// |-89ab-cdef-0123-|
|
||||||
|
const __m128i mm_middle = _mm_castpd_si128(_mm_shuffle_pd(_mm_castsi128_pd(mm_chars1), _mm_castsi128_pd(mm_chars2), _MM_SHUFFLE2(0, 1)));
|
||||||
{
|
{
|
||||||
// mm_dashes
|
#if defined(BOOST_UUID_USE_SSE41)
|
||||||
// |-89ab-cdef-0123-|
|
if (BOOST_UNLIKELY(!_mm_test_all_zeros(_mm_xor_si128(mm_middle, mm_expected_dashes), constants::mm_dashes_mask)))
|
||||||
__m128i mm_dashes = _mm_castpd_si128(_mm_shuffle_pd(_mm_castsi128_pd(mm_chars1), _mm_castsi128_pd(mm_chars2), _MM_SHUFFLE2(0, 1)));
|
#else
|
||||||
if (BOOST_UNLIKELY(!_mm_test_all_zeros(_mm_xor_si128(mm_dashes, mm_expected_dashes), constants::mm_dashes_mask)))
|
__m128i mm_dashes = _mm_and_si128(mm_middle, constants::mm_dashes_mask);
|
||||||
|
std::uint32_t dash_mask = static_cast< std::uint32_t >(_mm_movemask_epi8(_mm_cmpeq_epi8(mm_dashes, mm_expected_dashes)));
|
||||||
|
if (BOOST_UNLIKELY(dash_mask != 0xFFFF))
|
||||||
|
#endif
|
||||||
{
|
{
|
||||||
// Some of the dashes are missing
|
// Some of the dashes are missing
|
||||||
mm_dashes = _mm_and_si128(mm_dashes, constants::mm_dashes_mask);
|
#if defined(BOOST_UUID_USE_SSE41)
|
||||||
|
__m128i mm_dashes = _mm_and_si128(mm_middle, constants::mm_dashes_mask);
|
||||||
std::uint32_t dash_mask = static_cast< std::uint32_t >(_mm_movemask_epi8(_mm_cmpeq_epi8(mm_dashes, mm_expected_dashes)));
|
std::uint32_t dash_mask = static_cast< std::uint32_t >(_mm_movemask_epi8(_mm_cmpeq_epi8(mm_dashes, mm_expected_dashes)));
|
||||||
|
#endif
|
||||||
unsigned int pos = detail::countr_zero_nz(~dash_mask) + 8u;
|
unsigned int pos = detail::countr_zero_nz(~dash_mask) + 8u;
|
||||||
if (pos < end_pos)
|
if (pos < end_pos)
|
||||||
{
|
{
|
||||||
@@ -732,7 +822,11 @@ BOOST_FORCEINLINE void from_chars_simd_core
|
|||||||
// mm_chars2: |02468ace13579bdf|
|
// mm_chars2: |02468ace13579bdf|
|
||||||
mm_chars1 = _mm_permutex2var_epi8(mm_chars1, constants::mm_split_half_bytes_pattern1, mm_chars2);
|
mm_chars1 = _mm_permutex2var_epi8(mm_chars1, constants::mm_split_half_bytes_pattern1, mm_chars2);
|
||||||
mm_chars2 = _mm_permutex2var_epi8(mm_chars2, constants::mm_split_half_bytes_pattern2, mm_chars3);
|
mm_chars2 = _mm_permutex2var_epi8(mm_chars2, constants::mm_split_half_bytes_pattern2, mm_chars3);
|
||||||
#else
|
|
||||||
|
// Group half-byte characters
|
||||||
|
__m128i mm_lo = _mm_unpacklo_epi64(mm_chars1, mm_chars2);
|
||||||
|
__m128i mm_hi = _mm_unpackhi_epi64(mm_chars1, mm_chars2);
|
||||||
|
#elif defined(BOOST_UUID_USE_SSSE3)
|
||||||
// mm_chars1: |02468acZ13579bdZ|
|
// mm_chars1: |02468acZ13579bdZ|
|
||||||
// mm_chars2: |02468aZe13579bZf|
|
// mm_chars2: |02468aZe13579bZf|
|
||||||
// mm_chars3: |ZZZZZZceZZZZZZdf|
|
// mm_chars3: |ZZZZZZceZZZZZZdf|
|
||||||
@@ -742,20 +836,47 @@ BOOST_FORCEINLINE void from_chars_simd_core
|
|||||||
|
|
||||||
// mm_chars1: |02468ace13579bdf|
|
// mm_chars1: |02468ace13579bdf|
|
||||||
// mm_chars2: |02468ace13579bdf|
|
// mm_chars2: |02468ace13579bdf|
|
||||||
// Avoid using vpblendvb, which is slow on Intel
|
|
||||||
#if defined(BOOST_UUID_USE_AVX512_V1)
|
#if defined(BOOST_UUID_USE_AVX512_V1)
|
||||||
mm_chars1 = _mm_ternarylogic_epi64(mm_chars1, mm_chars2, constants::mm_split_half_bytes_blend_mask, 0xD8); // (_MM_TERNLOG_A & ~_MM_TERNLOG_C) | (_MM_TERNLOG_B & _MM_TERNLOG_C)
|
// Avoid using vpblendvb, which is slow on Intel
|
||||||
#elif defined(BOOST_UUID_USE_AVX)
|
mm_chars1 = _mm_ternarylogic_epi64(mm_chars1, mm_chars2, constants::mm_split_half_bytes_blend_mask1, 0xD8); // (_MM_TERNLOG_A & ~_MM_TERNLOG_C) | (_MM_TERNLOG_B & _MM_TERNLOG_C)
|
||||||
mm_chars1 = _mm_or_si128(mm_chars1, _mm_and_si128(mm_chars2, constants::mm_split_half_bytes_blend_mask));
|
#elif defined(BOOST_UUID_USE_SSE41) && defined(BOOST_UUID_FROM_CHARS_X86_USE_PBLENDVB)
|
||||||
|
mm_chars1 = _mm_blendv_epi8(mm_chars1, mm_chars2, constants::mm_split_half_bytes_blend_mask1);
|
||||||
#else
|
#else
|
||||||
mm_chars1 = _mm_blendv_epi8(mm_chars1, mm_chars2, constants::mm_split_half_bytes_blend_mask);
|
mm_chars1 = _mm_or_si128(mm_chars1, _mm_and_si128(mm_chars2, constants::mm_split_half_bytes_blend_mask1));
|
||||||
#endif
|
#endif
|
||||||
|
#if defined(BOOST_UUID_USE_SSE41)
|
||||||
mm_chars2 = _mm_blend_epi16(mm_chars2, mm_chars3, 0x88);
|
mm_chars2 = _mm_blend_epi16(mm_chars2, mm_chars3, 0x88);
|
||||||
|
#else
|
||||||
|
mm_chars2 = _mm_or_si128(_mm_and_si128(mm_chars2, constants::mm_split_half_bytes_blend_mask2), mm_chars3);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Group half-byte digits
|
// Group half-byte characters
|
||||||
__m128i mm_lo = _mm_unpacklo_epi64(mm_chars1, mm_chars2);
|
__m128i mm_lo = _mm_unpacklo_epi64(mm_chars1, mm_chars2);
|
||||||
__m128i mm_hi = _mm_unpackhi_epi64(mm_chars1, mm_chars2);
|
__m128i mm_hi = _mm_unpackhi_epi64(mm_chars1, mm_chars2);
|
||||||
|
#else
|
||||||
|
__m128i mm_lo, mm_hi;
|
||||||
|
{
|
||||||
|
// Remove dashes
|
||||||
|
__m128i mm_group1 = _mm_srli_epi64(mm_middle, 8);
|
||||||
|
__m128i mm_group2 = _mm_srli_si128(mm_middle, 6);
|
||||||
|
__m128i mm_group3 = _mm_srli_si128(mm_middle, 11);
|
||||||
|
|
||||||
|
mm_chars1 = _mm_unpacklo_epi64(mm_chars1, _mm_unpacklo_epi32(mm_group1, mm_group2));
|
||||||
|
|
||||||
|
mm_chars2 = _mm_castpd_si128(_mm_move_sd(_mm_castsi128_pd(mm_chars2), _mm_castsi128_pd(_mm_unpacklo_epi32(mm_group3, mm_chars3))));
|
||||||
|
mm_chars2 = _mm_shuffle_epi32(mm_chars2, _MM_SHUFFLE(1, 3, 2, 0));
|
||||||
|
|
||||||
|
// Deinterleave half-byte characters
|
||||||
|
__m128i mm_lo1 = _mm_srli_epi16(mm_chars1, 8);
|
||||||
|
__m128i mm_lo2 = _mm_srli_epi16(mm_chars2, 8);
|
||||||
|
|
||||||
|
__m128i mm_hi1 = _mm_and_si128(mm_chars1, constants::mm_split_half_byte_chars_mask);
|
||||||
|
__m128i mm_hi2 = _mm_and_si128(mm_chars2, constants::mm_split_half_byte_chars_mask);
|
||||||
|
|
||||||
|
mm_lo = _mm_packus_epi16(mm_lo1, mm_lo2);
|
||||||
|
mm_hi = _mm_packus_epi16(mm_hi1, mm_hi2);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
// Convert characters to 8-bit integers. The algorithm is basically as follows:
|
// Convert characters to 8-bit integers. The algorithm is basically as follows:
|
||||||
//
|
//
|
||||||
@@ -778,6 +899,7 @@ BOOST_FORCEINLINE void from_chars_simd_core
|
|||||||
// Note that there is one caveat due to the fact that there are only signed byte comparisons until AVX-512. This is a problem if the character encoding has
|
// Note that there is one caveat due to the fact that there are only signed byte comparisons until AVX-512. This is a problem if the character encoding has
|
||||||
// hexadecimal character codes with the highest bit set to 1. This is handled in from_chars_simd_char_constants by constructing mm_char_code1 and
|
// hexadecimal character codes with the highest bit set to 1. This is handled in from_chars_simd_char_constants by constructing mm_char_code1 and
|
||||||
// mm_char_code2 in such a way that signed comparisons work as described. We also use signed comparisons in AVX-512 to reuse the same constants.
|
// mm_char_code2 in such a way that signed comparisons work as described. We also use signed comparisons in AVX-512 to reuse the same constants.
|
||||||
|
#if defined(BOOST_UUID_DETAIL_FROM_CHARS_X86_USE_BLENDS)
|
||||||
#if defined(BOOST_UUID_USE_AVX512_V1)
|
#if defined(BOOST_UUID_USE_AVX512_V1)
|
||||||
__mmask16 k_char_code2_mask_lo = _mm_cmpgt_epi8_mask(mm_lo, mm_char_code2_cmp);
|
__mmask16 k_char_code2_mask_lo = _mm_cmpgt_epi8_mask(mm_lo, mm_char_code2_cmp);
|
||||||
__mmask16 k_char_code2_mask_hi = _mm_cmpgt_epi8_mask(mm_hi, mm_char_code2_cmp);
|
__mmask16 k_char_code2_mask_hi = _mm_cmpgt_epi8_mask(mm_hi, mm_char_code2_cmp);
|
||||||
@@ -790,8 +912,29 @@ BOOST_FORCEINLINE void from_chars_simd_core
|
|||||||
|
|
||||||
mm_char_code_sub_lo = _mm_mask_blend_epi8(k_char_code1_mask_lo, mm_char_code0_sub, mm_char_code_sub_lo);
|
mm_char_code_sub_lo = _mm_mask_blend_epi8(k_char_code1_mask_lo, mm_char_code0_sub, mm_char_code_sub_lo);
|
||||||
mm_char_code_sub_hi = _mm_mask_blend_epi8(k_char_code1_mask_hi, mm_char_code0_sub, mm_char_code_sub_hi);
|
mm_char_code_sub_hi = _mm_mask_blend_epi8(k_char_code1_mask_hi, mm_char_code0_sub, mm_char_code_sub_hi);
|
||||||
#elif defined(BOOST_UUID_USE_AVX)
|
#else
|
||||||
// Unlike the legacy SSE4.1 pblendvb instruction, the VEX-coded vpblendvb is slow on Intel. Use a different approach:
|
__m128i mm_char_code2_mask_lo = _mm_cmpgt_epi8(mm_lo, mm_char_code2_cmp);
|
||||||
|
__m128i mm_char_code2_mask_hi = _mm_cmpgt_epi8(mm_hi, mm_char_code2_cmp);
|
||||||
|
|
||||||
|
__m128i mm_char_code1_mask_lo = _mm_cmpgt_epi8(mm_lo, mm_char_code1_cmp);
|
||||||
|
__m128i mm_char_code1_mask_hi = _mm_cmpgt_epi8(mm_hi, mm_char_code1_cmp);
|
||||||
|
|
||||||
|
#if defined(BOOST_UUID_USE_SSE41) && defined(BOOST_UUID_FROM_CHARS_X86_USE_PBLENDVB)
|
||||||
|
__m128i mm_char_code_sub_lo = _mm_blendv_epi8(mm_char_code1_sub, mm_char_code2_sub, mm_char_code2_mask_lo);
|
||||||
|
__m128i mm_char_code_sub_hi = _mm_blendv_epi8(mm_char_code1_sub, mm_char_code2_sub, mm_char_code2_mask_hi);
|
||||||
|
|
||||||
|
mm_char_code_sub_lo = _mm_blendv_epi8(mm_char_code0_sub, mm_char_code_sub_lo, mm_char_code1_mask_lo);
|
||||||
|
mm_char_code_sub_hi = _mm_blendv_epi8(mm_char_code0_sub, mm_char_code_sub_hi, mm_char_code1_mask_hi);
|
||||||
|
#else
|
||||||
|
__m128i mm_char_code_sub_lo = _mm_or_si128(_mm_andnot_si128(mm_char_code2_mask_lo, mm_char_code1_sub), _mm_and_si128(mm_char_code2_mask_lo, mm_char_code2_sub));
|
||||||
|
__m128i mm_char_code_sub_hi = _mm_or_si128(_mm_andnot_si128(mm_char_code2_mask_hi, mm_char_code1_sub), _mm_and_si128(mm_char_code2_mask_hi, mm_char_code2_sub));
|
||||||
|
|
||||||
|
mm_char_code_sub_lo = _mm_or_si128(_mm_andnot_si128(mm_char_code1_mask_lo, mm_char_code0_sub), _mm_and_si128(mm_char_code1_mask_lo, mm_char_code_sub_lo));
|
||||||
|
mm_char_code_sub_hi = _mm_or_si128(_mm_andnot_si128(mm_char_code1_mask_hi, mm_char_code0_sub), _mm_and_si128(mm_char_code1_mask_hi, mm_char_code_sub_hi));
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
#else // defined(BOOST_UUID_DETAIL_FROM_CHARS_X86_USE_BLENDS)
|
||||||
|
// Use a different approach:
|
||||||
// - Each vpcmpgtb produces a mask, where 0 indicates false and -1 - true.
|
// - Each vpcmpgtb produces a mask, where 0 indicates false and -1 - true.
|
||||||
// - mm_char_code1_mask_* always overlaps with the corresponding mm_char_code2_mask_*, which means adding them
|
// - mm_char_code1_mask_* always overlaps with the corresponding mm_char_code2_mask_*, which means adding them
|
||||||
// produces a vector where 0 means none of the vpcmpgtb matched the value, -1 - where mm_char_code1_mask_* matched
|
// produces a vector where 0 means none of the vpcmpgtb matched the value, -1 - where mm_char_code1_mask_* matched
|
||||||
@@ -814,26 +957,19 @@ BOOST_FORCEINLINE void from_chars_simd_core
|
|||||||
const __m128i mm_char_code_sub = _mm_cvtsi32_si128(static_cast< int >(char_code_sub));
|
const __m128i mm_char_code_sub = _mm_cvtsi32_si128(static_cast< int >(char_code_sub));
|
||||||
__m128i mm_char_code_sub_lo = _mm_shuffle_epi8(mm_char_code_sub, mm_char_code_pattern_lo);
|
__m128i mm_char_code_sub_lo = _mm_shuffle_epi8(mm_char_code_sub, mm_char_code_pattern_lo);
|
||||||
__m128i mm_char_code_sub_hi = _mm_shuffle_epi8(mm_char_code_sub, mm_char_code_pattern_hi);
|
__m128i mm_char_code_sub_hi = _mm_shuffle_epi8(mm_char_code_sub, mm_char_code_pattern_hi);
|
||||||
#else
|
#endif // defined(BOOST_UUID_DETAIL_FROM_CHARS_X86_USE_BLENDS)
|
||||||
__m128i mm_char_code2_mask_lo = _mm_cmpgt_epi8(mm_lo, mm_char_code2_cmp);
|
|
||||||
__m128i mm_char_code2_mask_hi = _mm_cmpgt_epi8(mm_hi, mm_char_code2_cmp);
|
|
||||||
|
|
||||||
__m128i mm_char_code1_mask_lo = _mm_cmpgt_epi8(mm_lo, mm_char_code1_cmp);
|
|
||||||
__m128i mm_char_code1_mask_hi = _mm_cmpgt_epi8(mm_hi, mm_char_code1_cmp);
|
|
||||||
|
|
||||||
__m128i mm_char_code_sub_lo = _mm_blendv_epi8(mm_char_code1_sub, mm_char_code2_sub, mm_char_code2_mask_lo);
|
|
||||||
__m128i mm_char_code_sub_hi = _mm_blendv_epi8(mm_char_code1_sub, mm_char_code2_sub, mm_char_code2_mask_hi);
|
|
||||||
|
|
||||||
mm_char_code_sub_lo = _mm_blendv_epi8(mm_char_code0_sub, mm_char_code_sub_lo, mm_char_code1_mask_lo);
|
|
||||||
mm_char_code_sub_hi = _mm_blendv_epi8(mm_char_code0_sub, mm_char_code_sub_hi, mm_char_code1_mask_hi);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
mm_lo = _mm_sub_epi8(mm_lo, mm_char_code_sub_lo);
|
mm_lo = _mm_sub_epi8(mm_lo, mm_char_code_sub_lo);
|
||||||
mm_hi = _mm_sub_epi8(mm_hi, mm_char_code_sub_hi);
|
mm_hi = _mm_sub_epi8(mm_hi, mm_char_code_sub_hi);
|
||||||
|
|
||||||
// Check hexadecimal character validity. Proper hexadecimal characters always convert to values of 0-15 and any other characters convert
|
// Check hexadecimal character validity. Proper hexadecimal characters always convert to values of 0-15 and any other characters convert
|
||||||
// to values outside that range. Which means if the upper 4 bits of a resulting integer are non-zero then the corresponding character was invalid.
|
// to values outside that range. Which means if the upper 4 bits of a resulting integer are non-zero then the corresponding character was invalid.
|
||||||
|
#if defined(BOOST_UUID_USE_SSE41)
|
||||||
if (BOOST_LIKELY(_mm_test_all_zeros(_mm_or_si128(mm_lo, mm_hi), constants::mm_F0)))
|
if (BOOST_LIKELY(_mm_test_all_zeros(_mm_or_si128(mm_lo, mm_hi), constants::mm_F0)))
|
||||||
|
#else
|
||||||
|
const __m128i mm_0 = _mm_setzero_si128();
|
||||||
|
if (BOOST_LIKELY(_mm_movemask_epi8(_mm_cmpeq_epi8(_mm_and_si128(_mm_or_si128(mm_lo, mm_hi), constants::mm_F0), mm_0)) == 0xFFFF))
|
||||||
|
#endif
|
||||||
{
|
{
|
||||||
if (BOOST_LIKELY(ec == from_chars_error::none))
|
if (BOOST_LIKELY(ec == from_chars_error::none))
|
||||||
{
|
{
|
||||||
@@ -844,12 +980,13 @@ BOOST_FORCEINLINE void from_chars_simd_core
|
|||||||
else
|
else
|
||||||
{
|
{
|
||||||
// Some of the hex digits are invalid
|
// Some of the hex digits are invalid
|
||||||
|
#if defined(BOOST_UUID_USE_SSE41)
|
||||||
const __m128i mm_0 = _mm_setzero_si128();
|
const __m128i mm_0 = _mm_setzero_si128();
|
||||||
|
#endif
|
||||||
__m128i mm_hi_bits_lo = _mm_and_si128(mm_lo, constants::mm_F0);
|
__m128i mm_hi_bits_lo = _mm_and_si128(mm_lo, constants::mm_F0);
|
||||||
__m128i mm_hi_bits_hi = _mm_and_si128(mm_hi, constants::mm_F0);
|
__m128i mm_hi_bits_hi = _mm_and_si128(mm_hi, constants::mm_F0);
|
||||||
mm_hi_bits_lo = _mm_cmpeq_epi8(mm_hi_bits_lo, mm_0);
|
mm_hi_bits_lo = _mm_cmpeq_epi8(mm_hi_bits_lo, mm_0);
|
||||||
mm_hi_bits_hi = _mm_cmpeq_epi8(mm_hi_bits_hi, mm_0);
|
mm_hi_bits_hi = _mm_cmpeq_epi8(mm_hi_bits_hi, mm_0);
|
||||||
|
|
||||||
std::uint32_t digits_mask_lo = static_cast< std::uint32_t >(_mm_movemask_epi8(mm_hi_bits_lo));
|
std::uint32_t digits_mask_lo = static_cast< std::uint32_t >(_mm_movemask_epi8(mm_hi_bits_lo));
|
||||||
std::uint32_t digits_mask_hi = static_cast< std::uint32_t >(_mm_movemask_epi8(mm_hi_bits_hi));
|
std::uint32_t digits_mask_hi = static_cast< std::uint32_t >(_mm_movemask_epi8(mm_hi_bits_hi));
|
||||||
|
|
||||||
@@ -930,7 +1067,7 @@ BOOST_FORCEINLINE from_chars_result< Char > from_chars_simd(const Char* begin, c
|
|||||||
char_constants::mm_expected_dashes,
|
char_constants::mm_expected_dashes,
|
||||||
char_constants::mm_char_code1_cmp,
|
char_constants::mm_char_code1_cmp,
|
||||||
char_constants::mm_char_code2_cmp,
|
char_constants::mm_char_code2_cmp,
|
||||||
#if defined(BOOST_UUID_USE_AVX512_V1) || !defined(BOOST_UUID_USE_AVX)
|
#if defined(BOOST_UUID_DETAIL_FROM_CHARS_X86_USE_BLENDS)
|
||||||
char_constants::mm_char_code0_sub,
|
char_constants::mm_char_code0_sub,
|
||||||
char_constants::mm_char_code1_sub,
|
char_constants::mm_char_code1_sub,
|
||||||
char_constants::mm_char_code2_sub,
|
char_constants::mm_char_code2_sub,
|
||||||
@@ -947,6 +1084,8 @@ BOOST_FORCEINLINE from_chars_result< Char > from_chars_simd(const Char* begin, c
|
|||||||
} // namespace uuids
|
} // namespace uuids
|
||||||
} // namespace boost
|
} // namespace boost
|
||||||
|
|
||||||
#endif // defined(BOOST_UUID_USE_SSE41)
|
#undef BOOST_UUID_DETAIL_FROM_CHARS_X86_USE_BLENDS
|
||||||
|
|
||||||
|
#endif // defined(BOOST_UUID_USE_SSE2)
|
||||||
|
|
||||||
#endif // BOOST_UUID_DETAIL_FROM_CHARS_X86_HPP_INCLUDED
|
#endif // BOOST_UUID_DETAIL_FROM_CHARS_X86_HPP_INCLUDED
|
||||||
|
|||||||
@@ -27,12 +27,22 @@ union simd_vector
|
|||||||
>
|
>
|
||||||
BOOST_FORCEINLINE operator Vector () const noexcept { return get< Vector >(); }
|
BOOST_FORCEINLINE operator Vector () const noexcept { return get< Vector >(); }
|
||||||
|
|
||||||
|
#if defined(BOOST_GCC) && (BOOST_GCC >= 40600)
|
||||||
|
#pragma GCC diagnostic push
|
||||||
|
// dereferencing type-punned pointer will break strict-aliasing rules
|
||||||
|
#pragma GCC diagnostic ignored "-Wstrict-aliasing"
|
||||||
|
#endif
|
||||||
|
|
||||||
template< typename Vector >
|
template< typename Vector >
|
||||||
BOOST_FORCEINLINE typename std::enable_if< sizeof(Vector) <= ByteSize, Vector >::type get() const noexcept
|
BOOST_FORCEINLINE typename std::enable_if< sizeof(Vector) <= ByteSize, Vector >::type get() const noexcept
|
||||||
{
|
{
|
||||||
using vector_type = typename std::remove_cv< typename std::remove_reference< Vector >::type >::type;
|
using vector_type = typename std::remove_cv< typename std::remove_reference< Vector >::type >::type;
|
||||||
return *reinterpret_cast< const vector_type* >(bytes);
|
return *reinterpret_cast< const vector_type* >(bytes);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if defined(BOOST_GCC) && (BOOST_GCC >= 40800)
|
||||||
|
#pragma GCC diagnostic pop
|
||||||
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
template< typename T >
|
template< typename T >
|
||||||
|
|||||||
@@ -11,7 +11,7 @@
|
|||||||
#include <boost/uuid/detail/is_constant_evaluated.hpp>
|
#include <boost/uuid/detail/is_constant_evaluated.hpp>
|
||||||
#include <boost/uuid/detail/to_chars_generic.hpp>
|
#include <boost/uuid/detail/to_chars_generic.hpp>
|
||||||
|
|
||||||
#if defined(BOOST_UUID_USE_SSSE3)
|
#if defined(BOOST_UUID_USE_SSE2)
|
||||||
# include <boost/uuid/detail/to_chars_x86.hpp>
|
# include <boost/uuid/detail/to_chars_x86.hpp>
|
||||||
|
|
||||||
#elif defined(BOOST_UUID_REPORT_IMPLEMENTATION)
|
#elif defined(BOOST_UUID_REPORT_IMPLEMENTATION)
|
||||||
@@ -26,7 +26,7 @@ namespace detail {
|
|||||||
|
|
||||||
template<class Ch> BOOST_UUID_CXX14_CONSTEXPR_RT inline Ch* to_chars( uuid const& u, Ch* out ) noexcept
|
template<class Ch> BOOST_UUID_CXX14_CONSTEXPR_RT inline Ch* to_chars( uuid const& u, Ch* out ) noexcept
|
||||||
{
|
{
|
||||||
#if defined(BOOST_UUID_USE_SSSE3)
|
#if defined(BOOST_UUID_USE_SSE2)
|
||||||
if( detail::is_constant_evaluated_rt() )
|
if( detail::is_constant_evaluated_rt() )
|
||||||
{
|
{
|
||||||
return detail::to_chars_generic( u, out );
|
return detail::to_chars_generic( u, out );
|
||||||
@@ -40,7 +40,6 @@ template<class Ch> BOOST_UUID_CXX14_CONSTEXPR_RT inline Ch* to_chars( uuid const
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace detail
|
}}} // namespace boost::uuids::detail
|
||||||
}} //namespace boost::uuids
|
|
||||||
|
|
||||||
#endif // BOOST_UUID_DETAIL_TO_CHARS_HPP_INCLUDED
|
#endif // BOOST_UUID_DETAIL_TO_CHARS_HPP_INCLUDED
|
||||||
|
|||||||
@@ -7,7 +7,7 @@
|
|||||||
|
|
||||||
#include <boost/uuid/detail/config.hpp>
|
#include <boost/uuid/detail/config.hpp>
|
||||||
|
|
||||||
#if defined(BOOST_UUID_USE_SSSE3)
|
#if defined(BOOST_UUID_USE_SSE2)
|
||||||
|
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
#include <boost/uuid/uuid.hpp>
|
#include <boost/uuid/uuid.hpp>
|
||||||
@@ -26,9 +26,12 @@ BOOST_PRAGMA_MESSAGE( "Using to_chars_x86.hpp, AVX2" )
|
|||||||
#elif defined(BOOST_UUID_USE_SSE41)
|
#elif defined(BOOST_UUID_USE_SSE41)
|
||||||
BOOST_PRAGMA_MESSAGE( "Using to_chars_x86.hpp, SSE4.1" )
|
BOOST_PRAGMA_MESSAGE( "Using to_chars_x86.hpp, SSE4.1" )
|
||||||
|
|
||||||
#else
|
#elif defined(BOOST_UUID_USE_SSSE3)
|
||||||
BOOST_PRAGMA_MESSAGE( "Using to_chars_x86.hpp, SSSE3" )
|
BOOST_PRAGMA_MESSAGE( "Using to_chars_x86.hpp, SSSE3" )
|
||||||
|
|
||||||
|
#else
|
||||||
|
BOOST_PRAGMA_MESSAGE( "Using to_chars_x86.hpp, SSE2" )
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
#endif // #if defined(BOOST_UUID_REPORT_IMPLEMENTATION)
|
#endif // #if defined(BOOST_UUID_REPORT_IMPLEMENTATION)
|
||||||
|
|
||||||
@@ -36,8 +39,10 @@ BOOST_PRAGMA_MESSAGE( "Using to_chars_x86.hpp, SSSE3" )
|
|||||||
#include <immintrin.h>
|
#include <immintrin.h>
|
||||||
#elif defined(BOOST_UUID_USE_SSE41)
|
#elif defined(BOOST_UUID_USE_SSE41)
|
||||||
#include <smmintrin.h>
|
#include <smmintrin.h>
|
||||||
#else
|
#elif defined(BOOST_UUID_USE_SSSE3)
|
||||||
#include <tmmintrin.h>
|
#include <tmmintrin.h>
|
||||||
|
#else
|
||||||
|
#include <emmintrin.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
namespace boost {
|
namespace boost {
|
||||||
@@ -51,13 +56,31 @@ template<
|
|||||||
>
|
>
|
||||||
struct to_chars_simd_char_constants
|
struct to_chars_simd_char_constants
|
||||||
{
|
{
|
||||||
|
#if defined(BOOST_UUID_USE_SSSE3)
|
||||||
static const simd_vector128< std::uint8_t > mm_char_table;
|
static const simd_vector128< std::uint8_t > mm_char_table;
|
||||||
|
#else
|
||||||
|
static constexpr std::uint8_t char_a_add = static_cast< std::uint8_t >((0x61 - 10) - 0x30); // ('a' - 10) - '0' in ASCII
|
||||||
|
static const simd_vector128< std::uint8_t > mm_char_0_add;
|
||||||
|
static const simd_vector128< std::uint8_t > mm_char_a_add;
|
||||||
|
#endif
|
||||||
static const simd_vector128< std::uint8_t > mm_char_dash;
|
static const simd_vector128< std::uint8_t > mm_char_dash;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#if defined(BOOST_UUID_USE_SSSE3)
|
||||||
template< typename Char, bool IsCharASCIICompatible, bool IsWCharASCIICompatible >
|
template< typename Char, bool IsCharASCIICompatible, bool IsWCharASCIICompatible >
|
||||||
const simd_vector128< std::uint8_t > to_chars_simd_char_constants< Char, IsCharASCIICompatible, IsWCharASCIICompatible >::mm_char_table =
|
const simd_vector128< std::uint8_t > to_chars_simd_char_constants< Char, IsCharASCIICompatible, IsWCharASCIICompatible >::mm_char_table =
|
||||||
{{ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66 }}; // 0123456789abcdef in ASCII
|
{{ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66 }}; // 0123456789abcdef in ASCII
|
||||||
|
#else
|
||||||
|
template< typename Char, bool IsCharASCIICompatible, bool IsWCharASCIICompatible >
|
||||||
|
const simd_vector128< std::uint8_t > to_chars_simd_char_constants< Char, IsCharASCIICompatible, IsWCharASCIICompatible >::mm_char_0_add =
|
||||||
|
{{ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30 }}; // 0x30 is '0' in ASCII
|
||||||
|
template< typename Char, bool IsCharASCIICompatible, bool IsWCharASCIICompatible >
|
||||||
|
const simd_vector128< std::uint8_t > to_chars_simd_char_constants< Char, IsCharASCIICompatible, IsWCharASCIICompatible >::mm_char_a_add =
|
||||||
|
{{
|
||||||
|
char_a_add, char_a_add, char_a_add, char_a_add, char_a_add, char_a_add, char_a_add, char_a_add,
|
||||||
|
char_a_add, char_a_add, char_a_add, char_a_add, char_a_add, char_a_add, char_a_add, char_a_add
|
||||||
|
}};
|
||||||
|
#endif
|
||||||
template< typename Char, bool IsCharASCIICompatible, bool IsWCharASCIICompatible >
|
template< typename Char, bool IsCharASCIICompatible, bool IsWCharASCIICompatible >
|
||||||
const simd_vector128< std::uint8_t > to_chars_simd_char_constants< Char, IsCharASCIICompatible, IsWCharASCIICompatible >::mm_char_dash =
|
const simd_vector128< std::uint8_t > to_chars_simd_char_constants< Char, IsCharASCIICompatible, IsWCharASCIICompatible >::mm_char_dash =
|
||||||
{{ 0x2D, 0x2D, 0x2D, 0x2D, 0x2D, 0x2D, 0x2D, 0x2D, 0x2D, 0x2D, 0x2D, 0x2D, 0x2D, 0x2D, 0x2D, 0x2D }}; // ---------------- in ASCII
|
{{ 0x2D, 0x2D, 0x2D, 0x2D, 0x2D, 0x2D, 0x2D, 0x2D, 0x2D, 0x2D, 0x2D, 0x2D, 0x2D, 0x2D, 0x2D, 0x2D }}; // ---------------- in ASCII
|
||||||
@@ -69,10 +92,17 @@ struct to_chars_simd_char_constants< char, false, IsWCharASCIICompatible >
|
|||||||
static_assert(static_cast< std::uint8_t >('-') < static_cast< std::uint8_t >('0') && static_cast< std::uint8_t >('-') < static_cast< std::uint8_t >('a'),
|
static_assert(static_cast< std::uint8_t >('-') < static_cast< std::uint8_t >('0') && static_cast< std::uint8_t >('-') < static_cast< std::uint8_t >('a'),
|
||||||
"Boost.UUID: Unsupported char encoding, '-' character code is expected to be less than any hexadecimal characters");
|
"Boost.UUID: Unsupported char encoding, '-' character code is expected to be less than any hexadecimal characters");
|
||||||
|
|
||||||
|
#if defined(BOOST_UUID_USE_SSSE3)
|
||||||
static const simd_vector128< std::uint8_t > mm_char_table;
|
static const simd_vector128< std::uint8_t > mm_char_table;
|
||||||
|
#else
|
||||||
|
static constexpr std::uint8_t char_a_add = static_cast< std::uint8_t >(('a' - 10) - '0');
|
||||||
|
static const simd_vector128< std::uint8_t > mm_char_0_add;
|
||||||
|
static const simd_vector128< std::uint8_t > mm_char_a_add;
|
||||||
|
#endif
|
||||||
static const simd_vector128< std::uint8_t > mm_char_dash;
|
static const simd_vector128< std::uint8_t > mm_char_dash;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#if defined(BOOST_UUID_USE_SSSE3)
|
||||||
template< bool IsWCharASCIICompatible >
|
template< bool IsWCharASCIICompatible >
|
||||||
const simd_vector128< std::uint8_t > to_chars_simd_char_constants< char, false, IsWCharASCIICompatible >::mm_char_table =
|
const simd_vector128< std::uint8_t > to_chars_simd_char_constants< char, false, IsWCharASCIICompatible >::mm_char_table =
|
||||||
{{
|
{{
|
||||||
@@ -81,6 +111,22 @@ const simd_vector128< std::uint8_t > to_chars_simd_char_constants< char, false,
|
|||||||
static_cast< std::uint8_t >('8'), static_cast< std::uint8_t >('9'), static_cast< std::uint8_t >('a'), static_cast< std::uint8_t >('b'),
|
static_cast< std::uint8_t >('8'), static_cast< std::uint8_t >('9'), static_cast< std::uint8_t >('a'), static_cast< std::uint8_t >('b'),
|
||||||
static_cast< std::uint8_t >('c'), static_cast< std::uint8_t >('d'), static_cast< std::uint8_t >('e'), static_cast< std::uint8_t >('f')
|
static_cast< std::uint8_t >('c'), static_cast< std::uint8_t >('d'), static_cast< std::uint8_t >('e'), static_cast< std::uint8_t >('f')
|
||||||
}};
|
}};
|
||||||
|
#else
|
||||||
|
template< bool IsWCharASCIICompatible >
|
||||||
|
const simd_vector128< std::uint8_t > to_chars_simd_char_constants< char, false, IsWCharASCIICompatible >::mm_char_0_add =
|
||||||
|
{{
|
||||||
|
static_cast< std::uint8_t >('0'), static_cast< std::uint8_t >('0'), static_cast< std::uint8_t >('0'), static_cast< std::uint8_t >('0'),
|
||||||
|
static_cast< std::uint8_t >('0'), static_cast< std::uint8_t >('0'), static_cast< std::uint8_t >('0'), static_cast< std::uint8_t >('0'),
|
||||||
|
static_cast< std::uint8_t >('0'), static_cast< std::uint8_t >('0'), static_cast< std::uint8_t >('0'), static_cast< std::uint8_t >('0'),
|
||||||
|
static_cast< std::uint8_t >('0'), static_cast< std::uint8_t >('0'), static_cast< std::uint8_t >('0'), static_cast< std::uint8_t >('0')
|
||||||
|
}};
|
||||||
|
template< bool IsWCharASCIICompatible >
|
||||||
|
const simd_vector128< std::uint8_t > to_chars_simd_char_constants< char, false, IsWCharASCIICompatible >::mm_char_a_add =
|
||||||
|
{{
|
||||||
|
char_a_add, char_a_add, char_a_add, char_a_add, char_a_add, char_a_add, char_a_add, char_a_add,
|
||||||
|
char_a_add, char_a_add, char_a_add, char_a_add, char_a_add, char_a_add, char_a_add, char_a_add
|
||||||
|
}};
|
||||||
|
#endif
|
||||||
template< bool IsWCharASCIICompatible >
|
template< bool IsWCharASCIICompatible >
|
||||||
const simd_vector128< std::uint8_t > to_chars_simd_char_constants< char, false, IsWCharASCIICompatible >::mm_char_dash =
|
const simd_vector128< std::uint8_t > to_chars_simd_char_constants< char, false, IsWCharASCIICompatible >::mm_char_dash =
|
||||||
{{
|
{{
|
||||||
@@ -102,10 +148,17 @@ struct to_chars_simd_char_constants< wchar_t, IsCharASCIICompatible, false >
|
|||||||
static_assert(static_cast< std::uint8_t >(L'-') < static_cast< std::uint8_t >(L'0') && static_cast< std::uint8_t >(L'-') < static_cast< std::uint8_t >(L'a'),
|
static_assert(static_cast< std::uint8_t >(L'-') < static_cast< std::uint8_t >(L'0') && static_cast< std::uint8_t >(L'-') < static_cast< std::uint8_t >(L'a'),
|
||||||
"Boost.UUID: Unsupported wchar_t encoding, L'-' character code is expected to be less than any hexadecimal characters");
|
"Boost.UUID: Unsupported wchar_t encoding, L'-' character code is expected to be less than any hexadecimal characters");
|
||||||
|
|
||||||
|
#if defined(BOOST_UUID_USE_SSSE3)
|
||||||
static const simd_vector128< std::uint8_t > mm_char_table;
|
static const simd_vector128< std::uint8_t > mm_char_table;
|
||||||
|
#else
|
||||||
|
static constexpr std::uint8_t char_a_add = static_cast< std::uint8_t >((L'a' - 10) - L'0');
|
||||||
|
static const simd_vector128< std::uint8_t > mm_char_0_add;
|
||||||
|
static const simd_vector128< std::uint8_t > mm_char_a_add;
|
||||||
|
#endif
|
||||||
static const simd_vector128< std::uint8_t > mm_char_dash;
|
static const simd_vector128< std::uint8_t > mm_char_dash;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#if defined(BOOST_UUID_USE_SSSE3)
|
||||||
template< bool IsCharASCIICompatible >
|
template< bool IsCharASCIICompatible >
|
||||||
const simd_vector128< std::uint8_t > to_chars_simd_char_constants< wchar_t, IsCharASCIICompatible, false >::mm_char_table =
|
const simd_vector128< std::uint8_t > to_chars_simd_char_constants< wchar_t, IsCharASCIICompatible, false >::mm_char_table =
|
||||||
{{
|
{{
|
||||||
@@ -114,6 +167,22 @@ const simd_vector128< std::uint8_t > to_chars_simd_char_constants< wchar_t, IsCh
|
|||||||
static_cast< std::uint8_t >(L'8'), static_cast< std::uint8_t >(L'9'), static_cast< std::uint8_t >(L'a'), static_cast< std::uint8_t >(L'b'),
|
static_cast< std::uint8_t >(L'8'), static_cast< std::uint8_t >(L'9'), static_cast< std::uint8_t >(L'a'), static_cast< std::uint8_t >(L'b'),
|
||||||
static_cast< std::uint8_t >(L'c'), static_cast< std::uint8_t >(L'd'), static_cast< std::uint8_t >(L'e'), static_cast< std::uint8_t >(L'f')
|
static_cast< std::uint8_t >(L'c'), static_cast< std::uint8_t >(L'd'), static_cast< std::uint8_t >(L'e'), static_cast< std::uint8_t >(L'f')
|
||||||
}};
|
}};
|
||||||
|
#else
|
||||||
|
template< bool IsCharASCIICompatible >
|
||||||
|
const simd_vector128< std::uint8_t > to_chars_simd_char_constants< wchar_t, IsCharASCIICompatible, false >::mm_char_0_add =
|
||||||
|
{{
|
||||||
|
static_cast< std::uint8_t >(L'0'), static_cast< std::uint8_t >(L'0'), static_cast< std::uint8_t >(L'0'), static_cast< std::uint8_t >(L'0'),
|
||||||
|
static_cast< std::uint8_t >(L'0'), static_cast< std::uint8_t >(L'0'), static_cast< std::uint8_t >(L'0'), static_cast< std::uint8_t >(L'0'),
|
||||||
|
static_cast< std::uint8_t >(L'0'), static_cast< std::uint8_t >(L'0'), static_cast< std::uint8_t >(L'0'), static_cast< std::uint8_t >(L'0'),
|
||||||
|
static_cast< std::uint8_t >(L'0'), static_cast< std::uint8_t >(L'0'), static_cast< std::uint8_t >(L'0'), static_cast< std::uint8_t >(L'0')
|
||||||
|
}};
|
||||||
|
template< bool IsCharASCIICompatible >
|
||||||
|
const simd_vector128< std::uint8_t > to_chars_simd_char_constants< wchar_t, IsCharASCIICompatible, false >::mm_char_a_add =
|
||||||
|
{{
|
||||||
|
char_a_add, char_a_add, char_a_add, char_a_add, char_a_add, char_a_add, char_a_add, char_a_add,
|
||||||
|
char_a_add, char_a_add, char_a_add, char_a_add, char_a_add, char_a_add, char_a_add, char_a_add
|
||||||
|
}};
|
||||||
|
#endif
|
||||||
template< bool IsCharASCIICompatible >
|
template< bool IsCharASCIICompatible >
|
||||||
const simd_vector128< std::uint8_t > to_chars_simd_char_constants< wchar_t, IsCharASCIICompatible, false >::mm_char_dash =
|
const simd_vector128< std::uint8_t > to_chars_simd_char_constants< wchar_t, IsCharASCIICompatible, false >::mm_char_dash =
|
||||||
{{
|
{{
|
||||||
@@ -127,25 +196,55 @@ template< typename >
|
|||||||
struct to_chars_simd_constants
|
struct to_chars_simd_constants
|
||||||
{
|
{
|
||||||
static const simd_vector128< std::uint8_t > mm_0F;
|
static const simd_vector128< std::uint8_t > mm_0F;
|
||||||
|
#if defined(BOOST_UUID_USE_SSSE3)
|
||||||
static const simd_vector128< std::uint8_t > mm_shuffle_pattern1;
|
static const simd_vector128< std::uint8_t > mm_shuffle_pattern1;
|
||||||
static const simd_vector128< std::uint8_t > mm_shuffle_pattern2;
|
static const simd_vector128< std::uint8_t > mm_shuffle_pattern2;
|
||||||
|
#else
|
||||||
|
static const simd_vector128< std::uint8_t > mm_9;
|
||||||
|
static const simd_vector128< std::uint8_t > mm_group1_mask;
|
||||||
|
static const simd_vector128< std::uint8_t > mm_group2_mask;
|
||||||
|
static const simd_vector128< std::uint8_t > mm_group3_mask;
|
||||||
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
template< typename T >
|
template< typename T >
|
||||||
const simd_vector128< std::uint8_t > to_chars_simd_constants< T >::mm_0F =
|
const simd_vector128< std::uint8_t > to_chars_simd_constants< T >::mm_0F =
|
||||||
{{ 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F }};
|
{{ 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F }};
|
||||||
|
#if defined(BOOST_UUID_USE_SSSE3)
|
||||||
template< typename T >
|
template< typename T >
|
||||||
const simd_vector128< std::uint8_t > to_chars_simd_constants< T >::mm_shuffle_pattern1 =
|
const simd_vector128< std::uint8_t > to_chars_simd_constants< T >::mm_shuffle_pattern1 =
|
||||||
{{ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x80, 0x08, 0x09, 0x0A, 0x0B, 0x80, 0x0C, 0x0D }};
|
{{ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x80, 0x08, 0x09, 0x0A, 0x0B, 0x80, 0x0C, 0x0D }};
|
||||||
template< typename T >
|
template< typename T >
|
||||||
const simd_vector128< std::uint8_t > to_chars_simd_constants< T >::mm_shuffle_pattern2 =
|
const simd_vector128< std::uint8_t > to_chars_simd_constants< T >::mm_shuffle_pattern2 =
|
||||||
{{ 0x00, 0x01, 0x80, 0x02, 0x03, 0x04, 0x05, 0x80, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D }};
|
{{ 0x00, 0x01, 0x80, 0x02, 0x03, 0x04, 0x05, 0x80, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D }};
|
||||||
|
#else
|
||||||
|
template< typename T >
|
||||||
|
const simd_vector128< std::uint8_t > to_chars_simd_constants< T >::mm_9 =
|
||||||
|
{{ 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09 }};
|
||||||
|
template< typename T >
|
||||||
|
const simd_vector128< std::uint8_t > to_chars_simd_constants< T >::mm_group1_mask =
|
||||||
|
{{ 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }};
|
||||||
|
template< typename T >
|
||||||
|
const simd_vector128< std::uint8_t > to_chars_simd_constants< T >::mm_group2_mask =
|
||||||
|
{{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }};
|
||||||
|
template< typename T >
|
||||||
|
const simd_vector128< std::uint8_t > to_chars_simd_constants< T >::mm_group3_mask =
|
||||||
|
{{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0x00 }};
|
||||||
|
#endif
|
||||||
|
|
||||||
//! Converts UUID to a string of 36 characters, where first 32 craracters are returned in mm_chars1 and mm_chars2 and the last 4 in the highest 32 bits of mm_chars3
|
/*!
|
||||||
|
* Converts UUID to a string of 36 characters, where the first 32 characters are returned in mm_chars1 and mm_chars2.
|
||||||
|
* When SSSE3 is supported, last 4 characters are returned in the highest 32 bits of mm_chars3, otherwise in the lowest 32 bits.
|
||||||
|
*/
|
||||||
BOOST_FORCEINLINE void to_chars_simd_core
|
BOOST_FORCEINLINE void to_chars_simd_core
|
||||||
(
|
(
|
||||||
const std::uint8_t* data,
|
const std::uint8_t* data,
|
||||||
__m128i const& mm_char_table, __m128i const& mm_char_dash,
|
#if defined(BOOST_UUID_USE_SSSE3)
|
||||||
|
__m128i const& mm_char_table,
|
||||||
|
#else
|
||||||
|
__m128i const& mm_char_0_add, __m128i const& mm_char_a_add,
|
||||||
|
#endif
|
||||||
|
__m128i const& mm_char_dash,
|
||||||
__m128i& mm_chars1, __m128i& mm_chars2, __m128i& mm_chars3
|
__m128i& mm_chars1, __m128i& mm_chars2, __m128i& mm_chars3
|
||||||
) noexcept
|
) noexcept
|
||||||
{
|
{
|
||||||
@@ -154,18 +253,31 @@ BOOST_FORCEINLINE void to_chars_simd_core
|
|||||||
__m128i mm_input = _mm_loadu_si128(reinterpret_cast< const __m128i* >(data));
|
__m128i mm_input = _mm_loadu_si128(reinterpret_cast< const __m128i* >(data));
|
||||||
|
|
||||||
// Split half-bytes
|
// Split half-bytes
|
||||||
__m128i const& mm_0F = constants::mm_0F;
|
__m128i mm_input_hi = _mm_and_si128(_mm_srli_epi32(mm_input, 4), constants::mm_0F);
|
||||||
__m128i mm_input_hi = _mm_and_si128(_mm_srli_epi32(mm_input, 4), mm_0F);
|
__m128i mm_input_lo = _mm_and_si128(mm_input, constants::mm_0F);
|
||||||
__m128i mm_input_lo = _mm_and_si128(mm_input, mm_0F);
|
|
||||||
|
|
||||||
// Stringize each of the halves
|
// Stringize each of the halves
|
||||||
|
#if defined(BOOST_UUID_USE_SSSE3)
|
||||||
mm_input_hi = _mm_shuffle_epi8(mm_char_table, mm_input_hi);
|
mm_input_hi = _mm_shuffle_epi8(mm_char_table, mm_input_hi);
|
||||||
mm_input_lo = _mm_shuffle_epi8(mm_char_table, mm_input_lo);
|
mm_input_lo = _mm_shuffle_epi8(mm_char_table, mm_input_lo);
|
||||||
|
#else
|
||||||
|
{
|
||||||
|
__m128i mm_add_mask_hi = _mm_cmpgt_epi8(mm_input_hi, constants::mm_9);
|
||||||
|
__m128i mm_add_mask_lo = _mm_cmpgt_epi8(mm_input_lo, constants::mm_9);
|
||||||
|
|
||||||
|
__m128i mm_add_hi = _mm_add_epi8(mm_char_0_add, _mm_and_si128(mm_add_mask_hi, mm_char_a_add));
|
||||||
|
__m128i mm_add_lo = _mm_add_epi8(mm_char_0_add, _mm_and_si128(mm_add_mask_lo, mm_char_a_add));
|
||||||
|
|
||||||
|
mm_input_hi = _mm_add_epi8(mm_input_hi, mm_add_hi);
|
||||||
|
mm_input_lo = _mm_add_epi8(mm_input_lo, mm_add_lo);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
// Join them back together
|
// Join them back together
|
||||||
__m128i mm_1 = _mm_unpacklo_epi8(mm_input_hi, mm_input_lo);
|
__m128i mm_1 = _mm_unpacklo_epi8(mm_input_hi, mm_input_lo);
|
||||||
__m128i mm_2 = _mm_unpackhi_epi8(mm_input_hi, mm_input_lo);
|
__m128i mm_2 = _mm_unpackhi_epi8(mm_input_hi, mm_input_lo);
|
||||||
|
|
||||||
|
#if defined(BOOST_UUID_USE_SSSE3)
|
||||||
// Insert dashes at positions 8, 13, 18 and 23
|
// Insert dashes at positions 8, 13, 18 and 23
|
||||||
// mm_1 mm_2
|
// mm_1 mm_2
|
||||||
// |0123456789abcdef|0123456789abcdef|
|
// |0123456789abcdef|0123456789abcdef|
|
||||||
@@ -178,6 +290,32 @@ BOOST_FORCEINLINE void to_chars_simd_core
|
|||||||
mm_chars1 = _mm_max_epu8(mm_chars1, mm_char_dash);
|
mm_chars1 = _mm_max_epu8(mm_chars1, mm_char_dash);
|
||||||
mm_chars2 = _mm_max_epu8(mm_chars2, mm_char_dash);
|
mm_chars2 = _mm_max_epu8(mm_chars2, mm_char_dash);
|
||||||
mm_chars3 = mm_2;
|
mm_chars3 = mm_2;
|
||||||
|
#else
|
||||||
|
// Split groups of characters between dashes and shift them into their places
|
||||||
|
// mm_middle: |89abcdef01234567|
|
||||||
|
// mm_group1: |Z89abZZZZZZZZZZZ|
|
||||||
|
// mm_group2: |ZZZZZZcdefZZZZZZ|
|
||||||
|
// mm_group3: |ZZZZZZZZZZZ0123Z|
|
||||||
|
__m128i mm_middle = _mm_castpd_si128(_mm_shuffle_pd(_mm_castsi128_pd(mm_1), _mm_castsi128_pd(mm_2), _MM_SHUFFLE2(0, 1)));
|
||||||
|
__m128i mm_group1 = _mm_slli_epi64(mm_middle, 8);
|
||||||
|
__m128i mm_group2 = _mm_slli_si128(mm_middle, 2);
|
||||||
|
__m128i mm_group3 = _mm_slli_epi64(mm_middle, 24);
|
||||||
|
mm_group1 = _mm_and_si128(mm_group1, constants::mm_group1_mask);
|
||||||
|
mm_group2 = _mm_and_si128(mm_group2, constants::mm_group2_mask);
|
||||||
|
mm_group3 = _mm_and_si128(mm_group3, constants::mm_group3_mask);
|
||||||
|
|
||||||
|
// Merge them back and insert dashes
|
||||||
|
// mm_middle: |-89ab-cdef-0123-|
|
||||||
|
mm_middle = _mm_or_si128(_mm_or_si128(mm_group1, mm_group2), mm_group3);
|
||||||
|
mm_middle = _mm_max_epu8(mm_middle, mm_char_dash);
|
||||||
|
|
||||||
|
// mm_2: |cdef0123456789ab|
|
||||||
|
mm_2 = _mm_shuffle_epi32(mm_2, _MM_SHUFFLE(2, 1, 0, 3));
|
||||||
|
|
||||||
|
mm_chars1 = _mm_unpacklo_epi64(mm_1, mm_middle);
|
||||||
|
mm_chars2 = _mm_unpackhi_epi64(mm_middle, mm_2);
|
||||||
|
mm_chars3 = mm_2;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(BOOST_MSVC)
|
#if defined(BOOST_MSVC)
|
||||||
@@ -195,7 +333,12 @@ BOOST_FORCEINLINE Char* to_chars_simd(uuid const& u, Char* out) noexcept
|
|||||||
uuids::detail::to_chars_simd_core
|
uuids::detail::to_chars_simd_core
|
||||||
(
|
(
|
||||||
u.data(),
|
u.data(),
|
||||||
|
#if defined(BOOST_UUID_USE_SSSE3)
|
||||||
char_constants::mm_char_table,
|
char_constants::mm_char_table,
|
||||||
|
#else
|
||||||
|
char_constants::mm_char_0_add,
|
||||||
|
char_constants::mm_char_a_add,
|
||||||
|
#endif
|
||||||
char_constants::mm_char_dash,
|
char_constants::mm_char_dash,
|
||||||
mm_chars1, mm_chars2, mm_chars3
|
mm_chars1, mm_chars2, mm_chars3
|
||||||
);
|
);
|
||||||
@@ -205,11 +348,17 @@ BOOST_FORCEINLINE Char* to_chars_simd(uuid const& u, Char* out) noexcept
|
|||||||
{
|
{
|
||||||
_mm_storeu_si128(reinterpret_cast< __m128i* >(out), mm_chars1);
|
_mm_storeu_si128(reinterpret_cast< __m128i* >(out), mm_chars1);
|
||||||
_mm_storeu_si128(reinterpret_cast< __m128i* >(out + 16), mm_chars2);
|
_mm_storeu_si128(reinterpret_cast< __m128i* >(out + 16), mm_chars2);
|
||||||
|
detail::store_native_u32
|
||||||
|
(
|
||||||
|
out + 32,
|
||||||
#if defined(BOOST_UUID_USE_SSE41)
|
#if defined(BOOST_UUID_USE_SSE41)
|
||||||
detail::store_native_u32(out + 32, static_cast< std::uint32_t >(_mm_extract_epi32(mm_chars3, 3)));
|
static_cast< std::uint32_t >(_mm_extract_epi32(mm_chars3, 3))
|
||||||
|
#elif defined(BOOST_UUID_USE_SSSE3)
|
||||||
|
static_cast< std::uint32_t >(_mm_cvtsi128_si32(_mm_srli_si128(mm_chars3, 12)))
|
||||||
#else
|
#else
|
||||||
detail::store_native_u32(out + 32, static_cast< std::uint32_t >(_mm_cvtsi128_si32(_mm_srli_si128(mm_chars3, 12))));
|
static_cast< std::uint32_t >(_mm_cvtsi128_si32(mm_chars3))
|
||||||
#endif
|
#endif
|
||||||
|
);
|
||||||
}
|
}
|
||||||
else BOOST_IF_CONSTEXPR (sizeof(Char) == 2u)
|
else BOOST_IF_CONSTEXPR (sizeof(Char) == 2u)
|
||||||
{
|
{
|
||||||
@@ -225,8 +374,10 @@ BOOST_FORCEINLINE Char* to_chars_simd(uuid const& u, Char* out) noexcept
|
|||||||
#endif
|
#endif
|
||||||
#if defined(BOOST_UUID_USE_SSE41) && (defined(__x86_64__) || defined(_M_X64))
|
#if defined(BOOST_UUID_USE_SSE41) && (defined(__x86_64__) || defined(_M_X64))
|
||||||
detail::store_native_u64(out + 32, static_cast< std::uint64_t >(_mm_extract_epi64(_mm_unpackhi_epi8(mm_chars3, mm_0), 1)));
|
detail::store_native_u64(out + 32, static_cast< std::uint64_t >(_mm_extract_epi64(_mm_unpackhi_epi8(mm_chars3, mm_0), 1)));
|
||||||
#else
|
#elif defined(BOOST_UUID_USE_SSSE3)
|
||||||
_mm_storeh_pd(reinterpret_cast< BOOST_MAY_ALIAS double* >(out + 32), _mm_castsi128_pd(_mm_unpackhi_epi8(mm_chars3, mm_0)));
|
_mm_storeh_pd(reinterpret_cast< BOOST_MAY_ALIAS double* >(out + 32), _mm_castsi128_pd(_mm_unpackhi_epi8(mm_chars3, mm_0)));
|
||||||
|
#else
|
||||||
|
_mm_storel_epi64(reinterpret_cast< __m128i* >(out + 32), _mm_unpacklo_epi8(mm_chars3, mm_0));
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@@ -255,7 +406,15 @@ BOOST_FORCEINLINE Char* to_chars_simd(uuid const& u, Char* out) noexcept
|
|||||||
_mm_storeu_si128(reinterpret_cast< __m128i* >(out + 24), _mm_unpacklo_epi16(mm, mm_0));
|
_mm_storeu_si128(reinterpret_cast< __m128i* >(out + 24), _mm_unpacklo_epi16(mm, mm_0));
|
||||||
_mm_storeu_si128(reinterpret_cast< __m128i* >(out + 28), _mm_unpackhi_epi16(mm, mm_0));
|
_mm_storeu_si128(reinterpret_cast< __m128i* >(out + 28), _mm_unpackhi_epi16(mm, mm_0));
|
||||||
#endif
|
#endif
|
||||||
_mm_storeu_si128(reinterpret_cast< __m128i* >(out + 32), _mm_unpackhi_epi16(_mm_unpackhi_epi8(mm_chars3, mm_0), mm_0));
|
_mm_storeu_si128
|
||||||
|
(
|
||||||
|
reinterpret_cast< __m128i* >(out + 32),
|
||||||
|
#if defined(BOOST_UUID_USE_SSSE3)
|
||||||
|
_mm_unpackhi_epi16(_mm_unpackhi_epi8(mm_chars3, mm_0), mm_0)
|
||||||
|
#else
|
||||||
|
_mm_unpacklo_epi16(_mm_unpacklo_epi8(mm_chars3, mm_0), mm_0)
|
||||||
|
#endif
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
return out + 36;
|
return out + 36;
|
||||||
@@ -269,6 +428,6 @@ BOOST_FORCEINLINE Char* to_chars_simd(uuid const& u, Char* out) noexcept
|
|||||||
} // namespace uuids
|
} // namespace uuids
|
||||||
} // namespace boost
|
} // namespace boost
|
||||||
|
|
||||||
#endif // defined(BOOST_UUID_USE_SSSE3)
|
#endif // defined(BOOST_UUID_USE_SSE2)
|
||||||
|
|
||||||
#endif // BOOST_UUID_DETAIL_TO_CHARS_X86_HPP_INCLUDED
|
#endif // BOOST_UUID_DETAIL_TO_CHARS_X86_HPP_INCLUDED
|
||||||
|
|||||||
@@ -34,6 +34,20 @@ import path ;
|
|||||||
import regex ;
|
import regex ;
|
||||||
import testing ;
|
import testing ;
|
||||||
|
|
||||||
|
# The rule allows for suppressing running tests and instead only compile them.
|
||||||
|
# This is useful e.g. if the tests are compiled for a target ISA that is not supported by the CPU.
|
||||||
|
local rule run ( sources + : args * : input-files * : requirements * : target-name ? : default-build * )
|
||||||
|
{
|
||||||
|
if [ os.environ BOOST_UUID_SKIP_RUNNING_TESTS ]
|
||||||
|
{
|
||||||
|
return [ testing.compile $(sources) : $(requirements) : $(target-name) ] ;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
return [ testing.run $(sources) : $(args) : $(input-files) : $(requirements) : $(target-name) : $(default-build) ] ;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
# this rule enumerates through all the headers and ensures
|
# this rule enumerates through all the headers and ensures
|
||||||
# that inclusion of the header by itself is sufficient to
|
# that inclusion of the header by itself is sufficient to
|
||||||
# compile successfully, proving the header does not depend
|
# compile successfully, proving the header does not depend
|
||||||
|
|||||||
Reference in New Issue
Block a user