mirror of
https://github.com/boostorg/fiber.git
synced 2026-02-20 14:42:21 +00:00
some adjustment of ttas spinlocks
This commit is contained in:
committed by
Oliver Kowalke
parent
fc054f3f97
commit
418f6c60b2
@@ -30,7 +30,7 @@
|
||||
|
||||
#ifdef _MSC_VER
|
||||
# pragma warning(push)
|
||||
# pragma warning(disable:4251)
|
||||
//# pragma warning(disable:4251)
|
||||
#endif
|
||||
|
||||
namespace boost {
|
||||
|
||||
@@ -10,6 +10,7 @@
|
||||
#include <cstddef>
|
||||
|
||||
#include <boost/config.hpp>
|
||||
#include <boost/predef.h>
|
||||
#include <boost/detail/workaround.hpp>
|
||||
|
||||
#ifdef BOOST_FIBERS_DECL
|
||||
@@ -37,12 +38,21 @@
|
||||
# include <boost/config/auto_link.hpp>
|
||||
#endif
|
||||
|
||||
#if !defined(BOOST_FIBERS_SPIN_MAX_CPURELAX_ITER)
|
||||
# define BOOST_FIBERS_SPIN_MAX_CPURELAX_ITER 0x4000
|
||||
#if BOOST_OS_LINUX || BOOST_OS_WINDOWS
|
||||
# define BOOST_FIBERS_HAS_FUTEX
|
||||
#endif
|
||||
|
||||
#if !defined(BOOST_FIBERS_SPIN_MAX_SLEEPFOR_ITER)
|
||||
# define BOOST_FIBERS_SPIN_MAX_SLEEPFOR_ITER 0x4016
|
||||
#if (!defined(BOOST_FIBERS_HAS_FUTEX) && \
|
||||
(defined(BOOST_FIBERS_SPINLOCK_TTAS_FUTEX) || defined(BOOST_FIBERS_SPINLOCK_TTAS_ADAPTIVE_FUTEX)))
|
||||
# error "futex not supported on this platform"
|
||||
#endif
|
||||
|
||||
#if !defined(BOOST_FIBERS_SPIN_MAX_COLLISIONS)
|
||||
# define BOOST_FIBERS_SPIN_MAX_COLLISIONS 16
|
||||
#endif
|
||||
|
||||
#if !defined(BOOST_FIBERS_SPIN_MAX_TESTS)
|
||||
# define BOOST_FIBERS_SPIN_MAX_TESTS 100
|
||||
#endif
|
||||
|
||||
// modern architectures have cachelines with 64byte length
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
|
||||
|
||||
// Copyright Oliver Kowalke 2013.
|
||||
// Distributed under the Boost Software License, Version 1.0.
|
||||
// (See accompanying file LICENSE_1_0.txt or copy at
|
||||
@@ -39,56 +39,56 @@ private:
|
||||
class array {
|
||||
private:
|
||||
typedef std::atomic< context * > atomic_type;
|
||||
typedef typename std::aligned_storage<
|
||||
typedef std::aligned_storage<
|
||||
sizeof( atomic_type), cache_alignment
|
||||
>::type storage_type;
|
||||
|
||||
std::int64_t size_;
|
||||
std::size_t size_;
|
||||
storage_type * storage_;
|
||||
|
||||
public:
|
||||
array( std::int64_t size) :
|
||||
array( std::size_t size) :
|
||||
size_{ size },
|
||||
storage_{ new storage_type[size_] } {
|
||||
for ( std::int64_t i = 0; i < size_; ++i) {
|
||||
for ( std::size_t i = 0; i < size_; ++i) {
|
||||
::new ( static_cast< void * >( std::addressof( storage_[i]) ) ) atomic_type{ nullptr };
|
||||
}
|
||||
}
|
||||
|
||||
~array() {
|
||||
for ( std::int64_t i = 0; i < size_; ++i) {
|
||||
for ( std::size_t i = 0; i < size_; ++i) {
|
||||
reinterpret_cast< atomic_type * >( std::addressof( storage_[i]) )->~atomic_type();
|
||||
}
|
||||
delete [] storage_;
|
||||
}
|
||||
|
||||
std::int64_t size() const noexcept {
|
||||
std::size_t size() const noexcept {
|
||||
return size_;
|
||||
}
|
||||
|
||||
void push( std::int64_t bottom, context * ctx) noexcept {
|
||||
void push( std::size_t bottom, context * ctx) noexcept {
|
||||
reinterpret_cast< atomic_type * >(
|
||||
std::addressof( storage_[bottom % size_]) )
|
||||
->store( ctx, std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
context * pop( std::int64_t top) noexcept {
|
||||
context * pop( std::size_t top) noexcept {
|
||||
return reinterpret_cast< atomic_type * >(
|
||||
std::addressof( storage_[top % size_]) )
|
||||
->load( std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
array * resize( std::int64_t bottom, std::int64_t top) {
|
||||
array * resize( std::size_t bottom, std::size_t top) {
|
||||
std::unique_ptr< array > tmp{ new array{ 2 * size_ } };
|
||||
for ( std::int64_t i = top; i != bottom; ++i) {
|
||||
for ( std::size_t i = top; i != bottom; ++i) {
|
||||
tmp->push( i, pop( i) );
|
||||
}
|
||||
return tmp.release();
|
||||
}
|
||||
};
|
||||
|
||||
alignas(cache_alignment) std::atomic< std::int64_t > top_{ 0 };
|
||||
alignas(cache_alignment) std::atomic< std::int64_t > bottom_{ 0 };
|
||||
alignas(cache_alignment) std::atomic< std::size_t > top_{ 0 };
|
||||
alignas(cache_alignment) std::atomic< std::size_t > bottom_{ 0 };
|
||||
alignas(cache_alignment) std::atomic< array * > array_;
|
||||
std::vector< array * > old_arrays_{};
|
||||
char padding_[cacheline_length];
|
||||
@@ -110,14 +110,14 @@ public:
|
||||
context_spmc_queue & operator=( context_spmc_queue const&) = delete;
|
||||
|
||||
bool empty() const noexcept {
|
||||
std::int64_t bottom{ bottom_.load( std::memory_order_relaxed) };
|
||||
std::int64_t top{ top_.load( std::memory_order_relaxed) };
|
||||
std::size_t bottom{ bottom_.load( std::memory_order_relaxed) };
|
||||
std::size_t top{ top_.load( std::memory_order_relaxed) };
|
||||
return bottom <= top;
|
||||
}
|
||||
|
||||
void push( context * ctx) {
|
||||
std::int64_t bottom{ bottom_.load( std::memory_order_relaxed) };
|
||||
std::int64_t top{ top_.load( std::memory_order_acquire) };
|
||||
std::size_t bottom{ bottom_.load( std::memory_order_relaxed) };
|
||||
std::size_t top{ top_.load( std::memory_order_acquire) };
|
||||
array * a{ array_.load( std::memory_order_relaxed) };
|
||||
if ( (a->size() - 1) < (bottom - top) ) {
|
||||
// queue is full
|
||||
@@ -133,9 +133,9 @@ public:
|
||||
}
|
||||
|
||||
context * pop() {
|
||||
std::int64_t top{ top_.load( std::memory_order_acquire) };
|
||||
std::size_t top{ top_.load( std::memory_order_acquire) };
|
||||
std::atomic_thread_fence( std::memory_order_seq_cst);
|
||||
std::int64_t bottom{ bottom_.load( std::memory_order_acquire) };
|
||||
std::size_t bottom{ bottom_.load( std::memory_order_acquire) };
|
||||
context * ctx{ nullptr };
|
||||
if ( top < bottom) {
|
||||
// queue is not empty
|
||||
|
||||
61
include/boost/fiber/detail/futex.hpp
Normal file
61
include/boost/fiber/detail/futex.hpp
Normal file
@@ -0,0 +1,61 @@
|
||||
|
||||
// Copyright Oliver Kowalke 2016.
|
||||
// Distributed under the Boost Software License, Version 1.0.
|
||||
// (See accompanying file LICENSE_1_0.txt or copy at
|
||||
// http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
#ifndef BOOST_FIBERS_DETAIL_FUTEX_H
|
||||
#define BOOST_FIBERS_DETAIL_FUTEX_H
|
||||
|
||||
#include <boost/config.hpp>
|
||||
#include <boost/predef.h>
|
||||
|
||||
#include <boost/fiber/detail/config.hpp>
|
||||
|
||||
#if BOOST_OS_LINUX
|
||||
extern "C" {
|
||||
#include <linux/futex.h>
|
||||
#include <sys/syscall.h>
|
||||
}
|
||||
#elif BOOST_OS_WINDOWS
|
||||
#include <Windows.h>
|
||||
#endif
|
||||
|
||||
namespace boost {
|
||||
namespace fibers {
|
||||
namespace detail {
|
||||
|
||||
#if BOOST_OS_LINUX
|
||||
inline
|
||||
int sys_futex( void * addr, std::int32_t op, std::int32_t x) {
|
||||
return ::syscall( SYS_futex, addr, op, x, nullptr, nullptr, 0);
|
||||
}
|
||||
|
||||
inline
|
||||
int futex_wake( std::atomic< std::int32_t > * addr) {
|
||||
return 0 <= sys_futex( static_cast< void * >( addr), FUTEX_WAKE_PRIVATE, 1) ? 0 : -1;
|
||||
}
|
||||
|
||||
inline
|
||||
int futex_wait( std::atomic< std::int32_t > * addr, std::int32_t x) {
|
||||
return 0 <= sys_futex( static_cast< void * >( addr), FUTEX_WAIT_PRIVATE, x) ? 0 : -1;
|
||||
}
|
||||
#elif BOOST_OS_WINDOWS
|
||||
inline
|
||||
int futex_wake( std::atomic< std::int32_t > * addr) {
|
||||
::WakeByAddressSingle( static_cast< void * >( addr) );
|
||||
return 0;
|
||||
}
|
||||
|
||||
inline
|
||||
int futex_wait( std::atomic< std::int32_t > * addr, std::int32_t x) {
|
||||
::WaitOnAddress( static_cast< volatile void * >( addr), & x, sizeof( x), -1);
|
||||
return 0;
|
||||
}
|
||||
#else
|
||||
# warn "no futex support on this platform"
|
||||
#endif
|
||||
|
||||
}}}
|
||||
|
||||
#endif // BOOST_FIBERS_DETAIL_FUTEX_H
|
||||
@@ -15,6 +15,10 @@
|
||||
# include <mutex>
|
||||
# include <boost/fiber/detail/spinlock_ttas.hpp>
|
||||
# include <boost/fiber/detail/spinlock_ttas_adaptive.hpp>
|
||||
# if defined(BOOST_FIBERS_HAS_FUTEX)
|
||||
# include <boost/fiber/detail/spinlock_ttas_futex.hpp>
|
||||
# include <boost/fiber/detail/spinlock_ttas_adaptive_futex.hpp>
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifdef BOOST_HAS_ABI_HEADERS
|
||||
@@ -40,6 +44,10 @@ struct spinlock_lock {
|
||||
#else
|
||||
# if defined(BOOST_FIBERS_SPINLOCK_STD_MUTEX)
|
||||
using spinlock = std::mutex;
|
||||
# elif defined(BOOST_FIBERS_SPINLOCK_TTAS_FUTEX)
|
||||
using spinlock = spinlock_ttas_futex;
|
||||
# elif defined(BOOST_FIBERS_SPINLOCK_TTAS_ADAPTIVE_FUTEX)
|
||||
using spinlock = spinlock_ttas_adaptive_futex;
|
||||
# elif defined(BOOST_FIBERS_SPINLOCK_TTAS_ADAPTIVE)
|
||||
using spinlock = spinlock_ttas_adaptive;
|
||||
# else
|
||||
|
||||
@@ -25,9 +25,6 @@ namespace detail {
|
||||
|
||||
class spinlock_ttas {
|
||||
private:
|
||||
static_assert(BOOST_FIBERS_SPIN_MAX_CPURELAX_ITER < BOOST_FIBERS_SPIN_MAX_SLEEPFOR_ITER,
|
||||
"BOOST_FIBERS_SPIN_MAX_CPURELAX_ITER must be smaller than BOOST_FIBERS_SPIN_MAX_SLEEPFOR_ITER");
|
||||
|
||||
enum class spinlock_status {
|
||||
locked = 0,
|
||||
unlocked
|
||||
@@ -60,14 +57,14 @@ public:
|
||||
// cached 'state_' is invalidated -> cache miss
|
||||
while ( spinlock_status::locked == state_.load( std::memory_order_relaxed) ) {
|
||||
#if !defined(BOOST_FIBERS_SPIN_SINGLE_CORE)
|
||||
if ( BOOST_FIBERS_SPIN_MAX_CPURELAX_ITER > tests) {
|
||||
if ( BOOST_FIBERS_SPIN_MAX_TESTS > tests) {
|
||||
++tests;
|
||||
// give CPU a hint that this thread is in a "spin-wait" loop
|
||||
// delays the next instruction's execution for a finite period of time (depends on processor family)
|
||||
// the CPU is not under demand, parts of the pipeline are no longer being used
|
||||
// -> reduces the power consumed by the CPU
|
||||
cpu_relax();
|
||||
} else if ( BOOST_FIBERS_SPIN_MAX_SLEEPFOR_ITER > tests) {
|
||||
} else if ( BOOST_FIBERS_SPIN_MAX_TESTS + 20 > tests) {
|
||||
++tests;
|
||||
// std::this_thread::sleep_for( 0us) has a fairly long instruction path length,
|
||||
// combined with an expensive ring3 to ring 0 transition costing about 1000 cycles
|
||||
|
||||
@@ -33,8 +33,7 @@ private:
|
||||
|
||||
// align shared variable 'state_' at cache line to prevent false sharing
|
||||
alignas(cache_alignment) std::atomic< spinlock_status > state_{ spinlock_status::unlocked };
|
||||
// align shared variable 'tests_' at cache line to prevent false sharing
|
||||
alignas(cache_alignment) std::atomic< std::size_t > tests_{ 0 };
|
||||
std::atomic< std::size_t > tests_{ 0 };
|
||||
// padding to avoid other data one the cacheline of shared variable 'state_'
|
||||
char pad[cacheline_length];
|
||||
|
||||
@@ -49,7 +48,7 @@ public:
|
||||
for (;;) {
|
||||
std::size_t tests = 0;
|
||||
const std::size_t prev_tests = tests_.load( std::memory_order_relaxed);
|
||||
const std::size_t max_tests = (std::min)( static_cast< std::size_t >( BOOST_FIBERS_SPIN_MAX_CPURELAX_ITER), 2 * prev_tests + 10);
|
||||
const std::size_t max_tests = (std::min)( static_cast< std::size_t >( BOOST_FIBERS_SPIN_MAX_TESTS), 2 * prev_tests + 10);
|
||||
// avoid using multiple pause instructions for a delay of a specific cycle count
|
||||
// the delay of cpu_relax() (pause on Intel) depends on the processor family
|
||||
// the cycle count can not guaranteed from one system to the next
|
||||
@@ -69,7 +68,7 @@ public:
|
||||
// the CPU is not under demand, parts of the pipeline are no longer being used
|
||||
// -> reduces the power consumed by the CPU
|
||||
cpu_relax();
|
||||
} else if ( BOOST_FIBERS_SPIN_MAX_SLEEPFOR_ITER > tests) {
|
||||
} else if ( BOOST_FIBERS_SPIN_MAX_TESTS + 20 > tests) {
|
||||
++tests;
|
||||
// std::this_thread::sleep_for( 0us) has a fairly long instruction path length,
|
||||
// combined with an expensive ring3 to ring 0 transition costing about 1000 cycles
|
||||
|
||||
111
include/boost/fiber/detail/spinlock_ttas_adaptive_futex.hpp
Normal file
111
include/boost/fiber/detail/spinlock_ttas_adaptive_futex.hpp
Normal file
@@ -0,0 +1,111 @@
|
||||
|
||||
// Copyright Oliver Kowalke 2016.
|
||||
// Distributed under the Boost Software License, Version 1.0.
|
||||
// (See accompanying file LICENSE_1_0.txt or copy at
|
||||
// http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
#ifndef BOOST_FIBERS_SPINLOCK_TTAS_ADAPTIVE_FUTEX_H
|
||||
#define BOOST_FIBERS_SPINLOCK_TTAS_ADAPTIVE_FUTEX_H
|
||||
|
||||
#include <atomic>
|
||||
#include <cmath>
|
||||
#include <random>
|
||||
#include <thread>
|
||||
|
||||
#include <boost/fiber/detail/config.hpp>
|
||||
#include <boost/fiber/detail/cpu_relax.hpp>
|
||||
#include <boost/fiber/detail/futex.hpp>
|
||||
|
||||
// based on informations from:
|
||||
// https://software.intel.com/en-us/articles/benefitting-power-and-performance-sleep-loops
|
||||
// https://software.intel.com/en-us/articles/long-duration-spin-wait-loops-on-hyper-threading-technology-enabled-intel-processors
|
||||
|
||||
namespace boost {
|
||||
namespace fibers {
|
||||
namespace detail {
|
||||
|
||||
class spinlock_ttas_adaptive_futex {
|
||||
private:
|
||||
// align shared variable 'value_' at cache line to prevent false sharing
|
||||
alignas(cache_alignment) std::atomic< std::int32_t > value_{ 0 };
|
||||
std::atomic< std::int32_t > tests_{ 0 };
|
||||
// padding to avoid other data one the cacheline of shared variable 'value_'
|
||||
char pad_[cacheline_length];
|
||||
|
||||
public:
|
||||
spinlock_ttas_adaptive_futex() noexcept = default;
|
||||
|
||||
spinlock_ttas_adaptive_futex( spinlock_ttas_adaptive_futex const&) = delete;
|
||||
spinlock_ttas_adaptive_futex & operator=( spinlock_ttas_adaptive_futex const&) = delete;
|
||||
|
||||
void lock() noexcept {
|
||||
std::int32_t collisions = 0, tests = 0, expected = 0;
|
||||
const std::int32_t prev_tests = tests_.load( std::memory_order_relaxed);
|
||||
const std::int32_t max_tests = (std::min)( static_cast< std::int32_t >( BOOST_FIBERS_SPIN_MAX_TESTS), 2 * prev_tests + 10);
|
||||
// after max. spins or collisions suspend via futex
|
||||
while ( max_tests > tests && BOOST_FIBERS_SPIN_MAX_COLLISIONS > collisions) {
|
||||
// avoid using multiple pause instructions for a delay of a specific cycle count
|
||||
// the delay of cpu_relax() (pause on Intel) depends on the processor family
|
||||
// the cycle count can not guaranteed from one system to the next
|
||||
// -> check the shared variable 'value_' in between each cpu_relax() to prevent
|
||||
// unnecessarily long delays on some systems
|
||||
// test shared variable 'status_'
|
||||
// first access to 'value_' -> chache miss
|
||||
// sucessive acccess to 'value_' -> cache hit
|
||||
// if 'value_' was released by other fiber
|
||||
// cached 'value_' is invalidated -> cache miss
|
||||
if ( 0 != ( expected = value_.load( std::memory_order_relaxed) ) ) {
|
||||
++tests;
|
||||
#if !defined(BOOST_FIBERS_SPIN_SINGLE_CORE)
|
||||
// give CPU a hint that this thread is in a "spin-wait" loop
|
||||
// delays the next instruction's execution for a finite period of time (depends on processor family)
|
||||
// the CPU is not under demand, parts of the pipeline are no longer being used
|
||||
// -> reduces the power consumed by the CPU
|
||||
cpu_relax();
|
||||
#else
|
||||
// std::this_thread::yield() allows this_thread to give up the remaining part of its time slice,
|
||||
// but only to another thread on the same processor
|
||||
// instead of constant checking, a thread only checks if no other useful work is pending
|
||||
std::this_thread::yield();
|
||||
#endif
|
||||
} else if ( ! value_.compare_exchange_strong( expected, 1, std::memory_order_acquire, std::memory_order_release) ) {
|
||||
// spinlock now contended
|
||||
// utilize 'Binary Exponential Backoff' algorithm
|
||||
// linear_congruential_engine is a random number engine based on Linear congruential generator (LCG)
|
||||
static thread_local std::minstd_rand generator;
|
||||
const std::int32_t z = std::uniform_int_distribution< std::int32_t >{
|
||||
0, static_cast< std::int32_t >( 1) << collisions }( generator);
|
||||
++collisions;
|
||||
for ( std::int32_t i = 0; i < z; ++i) {
|
||||
cpu_relax();
|
||||
}
|
||||
} else {
|
||||
// success, lock acquired
|
||||
tests_.store( prev_tests + (tests - prev_tests) / 8, std::memory_order_relaxed);
|
||||
return;
|
||||
}
|
||||
}
|
||||
// failure, lock not acquired
|
||||
// pause via futex
|
||||
if ( 2 != expected) {
|
||||
expected = value_.exchange( 2, std::memory_order_acquire);
|
||||
}
|
||||
while ( 0 != expected) {
|
||||
futex_wait( & value_, 2);
|
||||
expected = value_.exchange( 2, std::memory_order_acquire);
|
||||
}
|
||||
// success, lock acquired
|
||||
tests_.store( prev_tests + (tests - prev_tests) / 8, std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
void unlock() noexcept {
|
||||
if ( 1 != value_.fetch_sub( 1, std::memory_order_acquire) ) {
|
||||
value_.store( 0, std::memory_order_release);
|
||||
futex_wake( & value_);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
}}}
|
||||
|
||||
#endif // BOOST_FIBERS_SPINLOCK_TTAS_ADAPTIVE_FUTEX_H
|
||||
104
include/boost/fiber/detail/spinlock_ttas_futex.hpp
Normal file
104
include/boost/fiber/detail/spinlock_ttas_futex.hpp
Normal file
@@ -0,0 +1,104 @@
|
||||
|
||||
// Copyright Oliver Kowalke 2016.
|
||||
// Distributed under the Boost Software License, Version 1.0.
|
||||
// (See accompanying file LICENSE_1_0.txt or copy at
|
||||
// http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
#ifndef BOOST_FIBERS_spinlock_ttas_futex_FUTEX_H
|
||||
#define BOOST_FIBERS_spinlock_ttas_futex_FUTEX_H
|
||||
|
||||
#include <atomic>
|
||||
#include <random>
|
||||
#include <thread>
|
||||
|
||||
#include <boost/fiber/detail/config.hpp>
|
||||
#include <boost/fiber/detail/cpu_relax.hpp>
|
||||
#include <boost/fiber/detail/futex.hpp>
|
||||
|
||||
// based on informations from:
|
||||
// https://software.intel.com/en-us/articles/benefitting-power-and-performance-sleep-loops
|
||||
// https://software.intel.com/en-us/articles/long-duration-spin-wait-loops-on-hyper-threading-technology-enabled-intel-processors
|
||||
|
||||
namespace boost {
|
||||
namespace fibers {
|
||||
namespace detail {
|
||||
|
||||
class spinlock_ttas_futex {
|
||||
private:
|
||||
// align shared variable 'value_' at cache line to prevent false sharing
|
||||
alignas(cache_alignment) std::atomic< std::int32_t > value_{ 0 };
|
||||
// padding to avoid other data one the cacheline of shared variable 'value_'
|
||||
char pad_[cacheline_length];
|
||||
|
||||
public:
|
||||
spinlock_ttas_futex() noexcept = default;
|
||||
|
||||
spinlock_ttas_futex( spinlock_ttas_futex const&) = delete;
|
||||
spinlock_ttas_futex & operator=( spinlock_ttas_futex const&) = delete;
|
||||
|
||||
void lock() noexcept {
|
||||
std::int32_t collisions = 0, tests = 0, expected = 0;
|
||||
// after max. spins or collisions suspend via futex
|
||||
while ( BOOST_FIBERS_SPIN_MAX_TESTS > tests && BOOST_FIBERS_SPIN_MAX_COLLISIONS > collisions) {
|
||||
// avoid using multiple pause instructions for a delay of a specific cycle count
|
||||
// the delay of cpu_relax() (pause on Intel) depends on the processor family
|
||||
// the cycle count can not guaranteed from one system to the next
|
||||
// -> check the shared variable 'value_' in between each cpu_relax() to prevent
|
||||
// unnecessarily long delays on some systems
|
||||
// test shared variable 'status_'
|
||||
// first access to 'value_' -> chache miss
|
||||
// sucessive acccess to 'value_' -> cache hit
|
||||
// if 'value_' was released by other fiber
|
||||
// cached 'value_' is invalidated -> cache miss
|
||||
if ( 0 != ( expected = value_.load( std::memory_order_relaxed) ) ) {
|
||||
++tests;
|
||||
#if !defined(BOOST_FIBERS_SPIN_SINGLE_CORE)
|
||||
// give CPU a hint that this thread is in a "spin-wait" loop
|
||||
// delays the next instruction's execution for a finite period of time (depends on processor family)
|
||||
// the CPU is not under demand, parts of the pipeline are no longer being used
|
||||
// -> reduces the power consumed by the CPU
|
||||
cpu_relax();
|
||||
#else
|
||||
// std::this_thread::yield() allows this_thread to give up the remaining part of its time slice,
|
||||
// but only to another thread on the same processor
|
||||
// instead of constant checking, a thread only checks if no other useful work is pending
|
||||
std::this_thread::yield();
|
||||
#endif
|
||||
} else if ( ! value_.compare_exchange_strong( expected, 1, std::memory_order_acquire, std::memory_order_release) ) {
|
||||
// spinlock now contended
|
||||
// utilize 'Binary Exponential Backoff' algorithm
|
||||
// linear_congruential_engine is a random number engine based on Linear congruential generator (LCG)
|
||||
static thread_local std::minstd_rand generator;
|
||||
const std::int32_t z = std::uniform_int_distribution< std::int32_t >{
|
||||
0, static_cast< std::int32_t >( 1) << collisions }( generator);
|
||||
++collisions;
|
||||
for ( std::int32_t i = 0; i < z; ++i) {
|
||||
cpu_relax();
|
||||
}
|
||||
} else {
|
||||
// success, lock acquired
|
||||
return;
|
||||
}
|
||||
}
|
||||
// failure, lock not acquired
|
||||
// pause via futex
|
||||
if ( 2 != expected) {
|
||||
expected = value_.exchange( 2, std::memory_order_acquire);
|
||||
}
|
||||
while ( 0 != expected) {
|
||||
futex_wait( & value_, 2);
|
||||
expected = value_.exchange( 2, std::memory_order_acquire);
|
||||
}
|
||||
}
|
||||
|
||||
void unlock() noexcept {
|
||||
if ( 1 != value_.fetch_sub( 1, std::memory_order_acquire) ) {
|
||||
value_.store( 0, std::memory_order_release);
|
||||
futex_wake( & value_);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
}}}
|
||||
|
||||
#endif // BOOST_FIBERS_spinlock_ttas_futex_FUTEX_H
|
||||
Reference in New Issue
Block a user