From 1a4e7ed9a8eb83c1f1bd575098c06cae5abb6e4f Mon Sep 17 00:00:00 2001 From: Andrey Semashev Date: Sat, 15 Apr 2023 21:24:52 +0300 Subject: [PATCH] Improve generated x86 code for AVX targets (#138) * Re-format the test code for MSVC bug 981648. * Improved generated x86 code for SSE4.1 and later targets. Prefer movdqu to lddqu on CPUs supporting SSE4.1 and later. lddqu has one extra cycle latency on Skylake and later Intel CPUs, and with AVX vlddqu is not merged to the following instructions as a memory operand, which makes the code slightly larger. Legacy SSE3 lddqu is still preferred when SSE4.1 is not enabled because it is faster on Prescott and the same as movdqu on AMD CPUs. It also doesn't affect code size because movdqu cannot be converted to a memory operand as memory operands are required to be aligned in SSE. Closes https://github.com/boostorg/uuid/issues/137. * Use movdqu universally for loading UUIDs. This effectively drops the optimization for NetBurst CPUs and instead prefers code that is slightly better on Skylake and later Intel CPUs, even when the code is compiled for SSE3 and not SSE4.1. --- include/boost/uuid/detail/config.hpp | 19 ++++++- include/boost/uuid/detail/uuid_x86.ipp | 8 ++- test/test_msvc_simd_bug981648.hpp | 51 ++++++++++--------- test/test_msvc_simd_bug981648_foo.cpp | 55 +++++++++++--------- test/test_msvc_simd_bug981648_main.cpp | 70 ++++++++++++++------------ 5 files changed, 117 insertions(+), 86 deletions(-) diff --git a/include/boost/uuid/detail/config.hpp b/include/boost/uuid/detail/config.hpp index 8cd7b7d..f78b97a 100644 --- a/include/boost/uuid/detail/config.hpp +++ b/include/boost/uuid/detail/config.hpp @@ -1,5 +1,5 @@ /* - * Copyright Andrey Semashev 2013. + * Copyright Andrey Semashev 2013, 2022. * Distributed under the Boost Software License, Version 1.0. * (See accompanying file LICENSE_1_0.txt or copy at * https://www.boost.org/LICENSE_1_0.txt) @@ -36,6 +36,10 @@ #define BOOST_UUID_USE_SSE41 #endif +#if defined(__AVX__) && !defined(BOOST_UUID_USE_AVX) +#define BOOST_UUID_USE_AVX +#endif + #elif defined(_MSC_VER) #if (defined(_M_X64) || (defined(_M_IX86) && defined(_M_IX86_FP) && _M_IX86_FP >= 2)) && !defined(BOOST_UUID_USE_SSE2) @@ -43,6 +47,9 @@ #endif #if defined(__AVX__) +#if !defined(BOOST_UUID_USE_AVX) +#define BOOST_UUID_USE_AVX +#endif #if !defined(BOOST_UUID_USE_SSE41) #define BOOST_UUID_USE_SSE41 #endif @@ -57,6 +64,10 @@ #endif // More advanced ISA extensions imply less advanced are also available +#if !defined(BOOST_UUID_USE_SSE41) && defined(BOOST_UUID_USE_AVX) +#define BOOST_UUID_USE_SSE41 +#endif + #if !defined(BOOST_UUID_USE_SSE3) && defined(BOOST_UUID_USE_SSE41) #define BOOST_UUID_USE_SSE3 #endif @@ -65,7 +76,11 @@ #define BOOST_UUID_USE_SSE2 #endif -#if !defined(BOOST_UUID_NO_SIMD) && !defined(BOOST_UUID_USE_SSE41) && !defined(BOOST_UUID_USE_SSE3) && !defined(BOOST_UUID_USE_SSE2) +#if !defined(BOOST_UUID_NO_SIMD) && \ + !defined(BOOST_UUID_USE_AVX) && \ + !defined(BOOST_UUID_USE_SSE41) && \ + !defined(BOOST_UUID_USE_SSE3) && \ + !defined(BOOST_UUID_USE_SSE2) #define BOOST_UUID_NO_SIMD #endif diff --git a/include/boost/uuid/detail/uuid_x86.ipp b/include/boost/uuid/detail/uuid_x86.ipp index ecbf9a5..73680af 100644 --- a/include/boost/uuid/detail/uuid_x86.ipp +++ b/include/boost/uuid/detail/uuid_x86.ipp @@ -1,5 +1,5 @@ /* - * Copyright Andrey Semashev 2013. + * Copyright Andrey Semashev 2013, 2022. * Distributed under the Boost Software License, Version 1.0. * (See accompanying file LICENSE_1_0.txt or copy at * https://www.boost.org/LICENSE_1_0.txt) @@ -22,7 +22,7 @@ #include #endif -#if defined(BOOST_MSVC) && defined(_M_X64) && !defined(BOOST_UUID_USE_SSE3) && (BOOST_MSVC < 1900 /* Fixed in Visual Studio 2015 */ ) +#if defined(BOOST_MSVC) && defined(_M_X64) && (BOOST_MSVC < 1900 /* Fixed in Visual Studio 2015 */ ) // At least MSVC 9 (VS2008) and 12 (VS2013) have an optimizer bug that sometimes results in incorrect SIMD code // generated in Release x64 mode. In particular, it affects operator==, where the compiler sometimes generates // pcmpeqd with a memory opereand instead of movdqu followed by pcmpeqd. The problem is that uuid can be @@ -45,9 +45,7 @@ namespace detail { BOOST_FORCEINLINE __m128i load_unaligned_si128(const uint8_t* p) BOOST_NOEXCEPT { -#if defined(BOOST_UUID_USE_SSE3) - return _mm_lddqu_si128(reinterpret_cast< const __m128i* >(p)); -#elif !defined(BOOST_UUID_DETAIL_MSVC_BUG981648) +#if !defined(BOOST_UUID_DETAIL_MSVC_BUG981648) || defined(BOOST_UUID_USE_AVX) return _mm_loadu_si128(reinterpret_cast< const __m128i* >(p)); #elif defined(BOOST_MSVC) && BOOST_MSVC >= 1600 __m128i mm = _mm_loadu_si128(reinterpret_cast< const __m128i* >(p)); diff --git a/test/test_msvc_simd_bug981648.hpp b/test/test_msvc_simd_bug981648.hpp index a0ad645..fe7990e 100644 --- a/test/test_msvc_simd_bug981648.hpp +++ b/test/test_msvc_simd_bug981648.hpp @@ -1,34 +1,39 @@ /* -* Copyright 2014 Andrey Semashev -* -* Distributed under the Boost Software License, Version 1.0. -* See accompanying file LICENSE_1_0.txt or copy at -* https://www.boost.org/LICENSE_1_0.txt -*/ + * Copyright 2014 Andrey Semashev + * + * Distributed under the Boost Software License, Version 1.0. + * See accompanying file LICENSE_1_0.txt or copy at + * https://www.boost.org/LICENSE_1_0.txt + */ /* -* This is a part of the test for a workaround for MSVC 12 (VS2013) optimizer bug -* which causes incorrect SIMD code generation for operator==. See: -* -* https://svn.boost.org/trac/boost/ticket/8509#comment:3 -* https://connect.microsoft.com/VisualStudio/feedbackdetail/view/981648#tabs -* -* The header contains common definitions for the two source files. -*/ + * This is a part of the test for a workaround for MSVC 12 (VS2013) optimizer bug + * which causes incorrect SIMD code generation for operator==. See: + * + * https://svn.boost.org/trac/boost/ticket/8509#comment:3 + * https://connect.microsoft.com/VisualStudio/feedbackdetail/view/981648#tabs + * + * The header contains common definitions for the two source files. + */ + #include + using boost::uuids::uuid; + class headerProperty { public: -virtual ~headerProperty() {} + virtual ~headerProperty() {} }; -class my_obj: -public headerProperty + +class my_obj : + public headerProperty { public: -// This char tmp[8] forces the following uuid to be misaligned. -char tmp[8]; -// This m_uuid is misaligned (not 16-byte aligned) and causes the != operator to crash. -uuid m_uuid; -const uuid &get_marker_id() const { return m_uuid; } -uuid get_id() const { return m_uuid; } + // This char tmp[8] forces the following uuid to be misaligned. + char tmp[8]; + // This m_uuid is misaligned (not 16-byte aligned) and causes the != operator to crash. + uuid m_uuid; + + const uuid &get_marker_id() const { return m_uuid; } + uuid get_id() const { return m_uuid; } }; diff --git a/test/test_msvc_simd_bug981648_foo.cpp b/test/test_msvc_simd_bug981648_foo.cpp index fdd1e85..efa2b87 100644 --- a/test/test_msvc_simd_bug981648_foo.cpp +++ b/test/test_msvc_simd_bug981648_foo.cpp @@ -1,32 +1,37 @@ /* -* Copyright 2014 Andrey Semashev -* -* Distributed under the Boost Software License, Version 1.0. -* See accompanying file LICENSE_1_0.txt or copy at -* https://www.boost.org/LICENSE_1_0.txt -*/ + * Copyright 2014 Andrey Semashev + * + * Distributed under the Boost Software License, Version 1.0. + * See accompanying file LICENSE_1_0.txt or copy at + * https://www.boost.org/LICENSE_1_0.txt + */ /* -* This is a part of the test for a workaround for MSVC 12 (VS2013) optimizer bug -* which causes incorrect SIMD code generation for operator==. See: -* -* https://svn.boost.org/trac/boost/ticket/8509#comment:3 -* https://connect.microsoft.com/VisualStudio/feedbackdetail/view/981648#tabs -* -* The file contains the function that actually causes the crash. Reproduces only -* in Release x64 builds. -*/ + * This is a part of the test for a workaround for MSVC 12 (VS2013) optimizer bug + * which causes incorrect SIMD code generation for operator==. See: + * + * https://svn.boost.org/trac/boost/ticket/8509#comment:3 + * https://connect.microsoft.com/VisualStudio/feedbackdetail/view/981648#tabs + * + * The file contains the function that actually causes the crash. Reproduces only + * in Release x64 builds. + */ + #include +#include #include "test_msvc_simd_bug981648.hpp" + void mp_grid_update_marker_parameters(headerProperty* header_prop, my_obj ¤t_marker) { -headerProperty *old_header_prop = NULL; -my_obj *p = dynamic_cast(header_prop); -/* -* This != statement crashes with a GP. -* */ -if (p != NULL && (current_marker.get_id() != p->get_marker_id())) { -std::printf("works okay, if it reaches this printf: %p\n", reinterpret_cast(p)); -old_header_prop = header_prop; -if (old_header_prop == 0) { fprintf(stderr, "ouch"); } -} + headerProperty *old_header_prop = NULL; + my_obj *p = dynamic_cast(header_prop); + /* + * This != statement crashes with a GP. + */ + if (p != NULL && (current_marker.get_id() != p->get_marker_id())) { + std::printf("works okay, if it reaches this printf: %p\n", reinterpret_cast(p)); + old_header_prop = header_prop; + if (old_header_prop == 0) { + std::fprintf(stderr, "ouch"); + } + } } diff --git a/test/test_msvc_simd_bug981648_main.cpp b/test/test_msvc_simd_bug981648_main.cpp index 88a6581..06a154e 100644 --- a/test/test_msvc_simd_bug981648_main.cpp +++ b/test/test_msvc_simd_bug981648_main.cpp @@ -1,40 +1,48 @@ /* -* Copyright 2014 Andrey Semashev -* -* Distributed under the Boost Software License, Version 1.0. -* See accompanying file LICENSE_1_0.txt or copy at -* https://www.boost.org/LICENSE_1_0.txt -*/ + * Copyright 2014 Andrey Semashev + * + * Distributed under the Boost Software License, Version 1.0. + * See accompanying file LICENSE_1_0.txt or copy at + * https://www.boost.org/LICENSE_1_0.txt + */ /* -* This is a part of the test for a workaround for MSVC 12 (VS2013) optimizer bug -* which causes incorrect SIMD code generation for operator==. See: -* -* https://svn.boost.org/trac/boost/ticket/8509#comment:3 -* https://connect.microsoft.com/VisualStudio/feedbackdetail/view/981648#tabs -* -* This file contains the main entry point. -*/ + * This is a part of the test for a workaround for MSVC 12 (VS2013) optimizer bug + * which causes incorrect SIMD code generation for operator==. See: + * + * https://svn.boost.org/trac/boost/ticket/8509#comment:3 + * https://connect.microsoft.com/VisualStudio/feedbackdetail/view/981648#tabs + * + * This file contains the main entry point. + */ + #include #include "test_msvc_simd_bug981648.hpp" + extern void mp_grid_update_marker_parameters(headerProperty* header_prop, my_obj ¤t_marker); + static my_obj g_my_obj; + int main(void) { -my_obj *p = &g_my_obj; -p->m_uuid = uuid(); -uuid one, two; -one.data[0] = 0; two.data[0] = 1; -//***************************************** -// This != statement generates two movdqu statements or pcmpeqd with a memory operand which crashes -if (one != two) { -std::printf("The first != operator works okay if it reaches this printf.\n"); -} -my_obj a; -a.m_uuid.data[0] = 1; -std::printf("There should be a another printf coming next.\n"); -//***************************************** -// The != statement in this function generates a movups and a movdqu statement. -// It also crashes because the optimizer also creates a pcmpeqd for a non-aligned memory location. -mp_grid_update_marker_parameters(p, a); -return 0; + my_obj *p = &g_my_obj; + p->m_uuid = uuid(); + uuid one, two; + one.data[0] = 0; two.data[0] = 1; + + //***************************************** + // This != statement generates two movdqu statements or pcmpeqd with a memory operand which crashes + if (one != two) { + std::printf("The first != operator works okay if it reaches this printf.\n"); + } + + my_obj a; + a.m_uuid.data[0] = 1; + std::printf("There should be a another printf coming next.\n"); + + //***************************************** + // The != statement in this function generates a movups and a movdqu statement. + // It also crashes because the optimizer also creates a pcmpeqd for a non-aligned memory location. + mp_grid_update_marker_parameters(p, a); + + return 0; }