mirror of
https://github.com/boostorg/fiber.git
synced 2026-02-20 02:32:19 +00:00
boost.atomic new verson
This commit is contained in:
@@ -164,7 +164,7 @@ public:
|
||||
|
||||
bool test_and_set(memory_order order=memory_order_seq_cst)
|
||||
{
|
||||
return super::exchange(1, order);
|
||||
return super::exchange(1, order) != 0;
|
||||
}
|
||||
void clear(memory_order order=memory_order_seq_cst)
|
||||
{
|
||||
|
||||
@@ -25,7 +25,7 @@ public:
|
||||
detail::spinlock_pool<0>::scoped_lock guard(const_cast<T*>(&i));
|
||||
memcpy((void*)&i, &t, sizeof(T));
|
||||
}
|
||||
T load(memory_order order=memory_order_seq_cst) volatile const
|
||||
T load(memory_order /*order*/=memory_order_seq_cst) volatile const
|
||||
{
|
||||
detail::spinlock_pool<0>::scoped_lock guard(const_cast<T*>(&i));
|
||||
T tmp;
|
||||
@@ -35,8 +35,8 @@ public:
|
||||
bool compare_exchange_strong(
|
||||
T &expected,
|
||||
T desired,
|
||||
memory_order success_order,
|
||||
memory_order failure_order) volatile
|
||||
memory_order /*success_order*/,
|
||||
memory_order /*failure_order*/) volatile
|
||||
{
|
||||
detail::spinlock_pool<0>::scoped_lock guard(const_cast<T*>(&i));
|
||||
if (memcmp((void*)&i, &expected, sizeof(T))==0) {
|
||||
@@ -55,7 +55,7 @@ public:
|
||||
{
|
||||
return compare_exchange_strong(expected, desired, success_order, failure_order);
|
||||
}
|
||||
T exchange(T replacement, memory_order order=memory_order_seq_cst) volatile
|
||||
T exchange(T replacement, memory_order /*order*/=memory_order_seq_cst) volatile
|
||||
{
|
||||
detail::spinlock_pool<0>::scoped_lock guard(const_cast<T*>(&i));
|
||||
T tmp;
|
||||
|
||||
@@ -68,7 +68,7 @@ static inline void fence_after(memory_order order)
|
||||
}
|
||||
|
||||
template<>
|
||||
static inline void platform_atomic_thread_fence(memory_order order)
|
||||
inline void platform_atomic_thread_fence(memory_order order)
|
||||
{
|
||||
switch(order) {
|
||||
case memory_order_acquire:
|
||||
|
||||
299
boost/atomic/detail/gcc-armv6+.hpp
Normal file
299
boost/atomic/detail/gcc-armv6+.hpp
Normal file
@@ -0,0 +1,299 @@
|
||||
#ifndef BOOST_DETAIL_ATOMIC_GCC_ARMV6P_HPP
|
||||
#define BOOST_DETAIL_ATOMIC_GCC_ARMV6P_HPP
|
||||
|
||||
// Distributed under the Boost Software License, Version 1.0.
|
||||
// See accompanying file LICENSE_1_0.txt or copy at
|
||||
// http://www.boost.org/LICENSE_1_0.txt)
|
||||
//
|
||||
// Copyright (c) 2009 Helge Bahmann
|
||||
// Copyright (c) 2009 Phil Endecott
|
||||
// ARM Code by Phil Endecott, based on other architectures.
|
||||
|
||||
|
||||
#include <boost/memory_order.hpp>
|
||||
#include <boost/atomic/detail/base.hpp>
|
||||
#include <boost/atomic/detail/builder.hpp>
|
||||
|
||||
// From the ARM Architecture Reference Manual for architecture v6:
|
||||
//
|
||||
// LDREX{<cond>} <Rd>, [<Rn>]
|
||||
// <Rd> Specifies the destination register for the memory word addressed by <Rd>
|
||||
// <Rn> Specifies the register containing the address.
|
||||
//
|
||||
// STREX{<cond>} <Rd>, <Rm>, [<Rn>]
|
||||
// <Rd> Specifies the destination register for the returned status value.
|
||||
// 0 if the operation updates memory
|
||||
// 1 if the operation fails to update memory
|
||||
// <Rm> Specifies the register containing the word to be stored to memory.
|
||||
// <Rn> Specifies the register containing the address.
|
||||
// Rd must not be the same register is Rm or Rn.
|
||||
//
|
||||
// ARM v7 is like ARM v6 plus:
|
||||
// There are half-word and byte versions of the LDREX and STREX instructions,
|
||||
// LDREXH, LDREXB, STREXH and STREXB.
|
||||
// There are also double-word versions, LDREXD and STREXD.
|
||||
// (Actually it looks like these are available from version 6k onwards.)
|
||||
// FIXME these are not yet used; should be mostly a matter of copy-and-paste.
|
||||
// I think you can supply an immediate offset to the address.
|
||||
//
|
||||
// A memory barrier is effected using a "co-processor 15" instruction,
|
||||
// though a separate assembler mnemonic is available for it in v7.
|
||||
|
||||
namespace boost {
|
||||
namespace detail {
|
||||
namespace atomic {
|
||||
|
||||
|
||||
// "Thumb 1" is a subset of the ARM instruction set that uses a 16-bit encoding. It
|
||||
// doesn't include all instructions and in particular it doesn't include the co-processor
|
||||
// instruction used for the memory barrier or the load-locked/store-conditional
|
||||
// instructions. So, if we're compiling in "Thumb 1" mode, we need to wrap all of our
|
||||
// asm blocks with code to temporarily change to ARM mode.
|
||||
//
|
||||
// You can only change between ARM and Thumb modes when branching using the bx instruction.
|
||||
// bx takes an address specified in a register. The least significant bit of the address
|
||||
// indicates the mode, so 1 is added to indicate that the destination code is Thumb.
|
||||
// A temporary register is needed for the address and is passed as an argument to these
|
||||
// macros. It must be one of the "low" registers accessible to Thumb code, specified
|
||||
// usng the "l" attribute in the asm statement.
|
||||
//
|
||||
// Architecture v7 introduces "Thumb 2", which does include (almost?) all of the ARM
|
||||
// instruction set. So in v7 we don't need to change to ARM mode; we can write "universal
|
||||
// assembler" which will assemble to Thumb 2 or ARM code as appropriate. The only thing
|
||||
// we need to do to make this "universal" assembler mode work is to insert "IT" instructions
|
||||
// to annotate the conditional instructions. These are ignored in other modes (e.g. v6),
|
||||
// so they can always be present.
|
||||
|
||||
#if defined(__thumb__) && !defined(__ARM_ARCH_7A__)
|
||||
// FIXME also other v7 variants.
|
||||
#define BOOST_ATOMIC_ARM_ASM_START(TMPREG) "adr " #TMPREG ", 1f\n" "bx " #TMPREG "\n" ".arm\n" ".align 4\n" "1: "
|
||||
#define BOOST_ATOMIC_ARM_ASM_END(TMPREG) "adr " #TMPREG ", 1f + 1\n" "bx " #TMPREG "\n" ".thumb\n" ".align 2\n" "1: "
|
||||
|
||||
#else
|
||||
// The tmpreg is wasted in this case, which is non-optimal.
|
||||
#define BOOST_ATOMIC_ARM_ASM_START(TMPREG)
|
||||
#define BOOST_ATOMIC_ARM_ASM_END(TMPREG)
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(__ARM_ARCH_7A__)
|
||||
// FIXME ditto.
|
||||
#define BOOST_ATOMIC_ARM_DMB "dmb\n"
|
||||
#else
|
||||
#define BOOST_ATOMIC_ARM_DMB "mcr\tp15, 0, r0, c7, c10, 5\n"
|
||||
#endif
|
||||
|
||||
// There is also a "Data Synchronisation Barrier" DSB; this exists in v6 as another co-processor
|
||||
// instruction like the above.
|
||||
|
||||
|
||||
static inline void fence_before(memory_order order)
|
||||
{
|
||||
// FIXME I don't understand enough about barriers to know what this should do.
|
||||
switch(order) {
|
||||
case memory_order_release:
|
||||
case memory_order_acq_rel:
|
||||
case memory_order_seq_cst:
|
||||
int brtmp;
|
||||
__asm__ __volatile__ (
|
||||
BOOST_ATOMIC_ARM_ASM_START(%0)
|
||||
BOOST_ATOMIC_ARM_DMB
|
||||
BOOST_ATOMIC_ARM_ASM_END(%0)
|
||||
: "=&l" (brtmp) :: "memory"
|
||||
);
|
||||
default:;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void fence_after(memory_order order)
|
||||
{
|
||||
// FIXME I don't understand enough about barriers to know what this should do.
|
||||
switch(order) {
|
||||
case memory_order_acquire:
|
||||
case memory_order_acq_rel:
|
||||
case memory_order_seq_cst:
|
||||
int brtmp;
|
||||
__asm__ __volatile__ (
|
||||
BOOST_ATOMIC_ARM_ASM_START(%0)
|
||||
BOOST_ATOMIC_ARM_DMB
|
||||
BOOST_ATOMIC_ARM_ASM_END(%0)
|
||||
: "=&l" (brtmp) :: "memory"
|
||||
);
|
||||
case memory_order_consume:
|
||||
__asm__ __volatile__ ("" ::: "memory");
|
||||
default:;
|
||||
}
|
||||
}
|
||||
|
||||
#undef BOOST_ATOMIC_ARM_DMB
|
||||
|
||||
|
||||
template<typename T>
|
||||
class atomic_arm_4 {
|
||||
public:
|
||||
typedef T integral_type;
|
||||
explicit atomic_arm_4(T v) : i(v) {}
|
||||
atomic_arm_4() {}
|
||||
T load(memory_order order=memory_order_seq_cst) const volatile
|
||||
{
|
||||
T v=const_cast<volatile const T &>(i);
|
||||
fence_after(order);
|
||||
return v;
|
||||
}
|
||||
void store(T v, memory_order order=memory_order_seq_cst) volatile
|
||||
{
|
||||
fence_before(order);
|
||||
const_cast<volatile T &>(i)=v;
|
||||
}
|
||||
bool compare_exchange_weak(
|
||||
T &expected,
|
||||
T desired,
|
||||
memory_order success_order,
|
||||
memory_order failure_order) volatile
|
||||
{
|
||||
fence_before(success_order);
|
||||
int success;
|
||||
int tmp;
|
||||
__asm__ __volatile__(
|
||||
BOOST_ATOMIC_ARM_ASM_START(%2)
|
||||
"mov %1, #0\n" // success = 0
|
||||
"ldrex %0, [%3]\n" // expected' = *(&i)
|
||||
"teq %0, %4\n" // flags = expected'==expected
|
||||
"ittt eq\n"
|
||||
"strexeq %2, %5, [%3]\n" // if (flags.equal) *(&i) = desired, tmp = !OK
|
||||
"teqeq %2, #0\n" // if (flags.equal) flags = tmp==0
|
||||
"moveq %1, #1\n" // if (flags.equal) success = 1
|
||||
BOOST_ATOMIC_ARM_ASM_END(%2)
|
||||
: "=&r" (expected), // %0
|
||||
"=&r" (success), // %1
|
||||
"=&l" (tmp) // %2
|
||||
: "r" (&i), // %3
|
||||
"r" (expected), // %4
|
||||
"r" ((int)desired) // %5
|
||||
: "cc"
|
||||
);
|
||||
if (success) fence_after(success_order);
|
||||
else fence_after(failure_order);
|
||||
return success;
|
||||
}
|
||||
|
||||
bool is_lock_free(void) const volatile {return true;}
|
||||
protected:
|
||||
inline T fetch_add_var(T c, memory_order order) volatile
|
||||
{
|
||||
fence_before(order);
|
||||
T original, tmp;
|
||||
int tmp2;
|
||||
__asm__ __volatile__(
|
||||
BOOST_ATOMIC_ARM_ASM_START(%2)
|
||||
"1: ldrex %0, [%3]\n" // original = *(&i)
|
||||
"add %1, %0, %4\n" // tmp = original + c
|
||||
"strex %2, %1, [%3]\n" // *(&i) = tmp; tmp2 = !OK
|
||||
"teq %2, #0\n" // flags = tmp2==0
|
||||
"it ne\n"
|
||||
"bne 1b\n" // if (!flags.equal) goto 1
|
||||
BOOST_ATOMIC_ARM_ASM_END(%2)
|
||||
: "=&r" (original), // %0
|
||||
"=&r" (tmp), // %1
|
||||
"=&l" (tmp2) // %2
|
||||
: "r" (&i), // %3
|
||||
"r" (c) // %4
|
||||
: "cc"
|
||||
);
|
||||
fence_after(order);
|
||||
return original;
|
||||
}
|
||||
inline T fetch_inc(memory_order order) volatile
|
||||
{
|
||||
fence_before(order);
|
||||
T original, tmp;
|
||||
int tmp2;
|
||||
__asm__ __volatile__(
|
||||
BOOST_ATOMIC_ARM_ASM_START(%2)
|
||||
"1: ldrex %0, [%3]\n" // original = *(&i)
|
||||
"add %1, %0, #1\n" // tmp = original + 1
|
||||
"strex %2, %1, [%3]\n" // *(&i) = tmp; tmp2 = !OK
|
||||
"teq %2, #0\n" // flags = tmp2==0
|
||||
"it ne\n"
|
||||
"bne 1b\n" // if (!flags.equal) goto 1
|
||||
BOOST_ATOMIC_ARM_ASM_END(%2)
|
||||
: "=&r" (original), // %0
|
||||
"=&r" (tmp), // %1
|
||||
"=&l" (tmp2) // %2
|
||||
: "r" (&i) // %3
|
||||
: "cc"
|
||||
);
|
||||
fence_after(order);
|
||||
return original;
|
||||
}
|
||||
inline T fetch_dec(memory_order order) volatile
|
||||
{
|
||||
fence_before(order);
|
||||
T original, tmp;
|
||||
int tmp2;
|
||||
__asm__ __volatile__(
|
||||
BOOST_ATOMIC_ARM_ASM_START(%2)
|
||||
"1: ldrex %0, [%3]\n" // original = *(&i)
|
||||
"sub %1, %0, #1\n" // tmp = original - 1
|
||||
"strex %2, %1, [%3]\n" // *(&i) = tmp; tmp2 = !OK
|
||||
"teq %2, #0\n" // flags = tmp2==0
|
||||
"it ne\n"
|
||||
"bne 1b\n" // if (!flags.equal) goto 1
|
||||
BOOST_ATOMIC_ARM_ASM_END(%2)
|
||||
: "=&r" (original), // %0
|
||||
"=&r" (tmp), // %1
|
||||
"=&l" (tmp2) // %2
|
||||
: "r" (&i) // %3
|
||||
: "cc"
|
||||
);
|
||||
fence_after(order);
|
||||
return original;
|
||||
}
|
||||
private:
|
||||
T i;
|
||||
};
|
||||
|
||||
|
||||
// #ifdef _ARM_ARCH_7
|
||||
// FIXME TODO can add native byte and halfword version here
|
||||
|
||||
|
||||
template<typename T>
|
||||
class platform_atomic_integral<T, 4> : public build_atomic_from_typical<build_exchange<atomic_arm_4<T> > > {
|
||||
public:
|
||||
typedef build_atomic_from_typical<build_exchange<atomic_arm_4<T> > > super;
|
||||
explicit platform_atomic_integral(T v) : super(v) {}
|
||||
platform_atomic_integral(void) {}
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
class platform_atomic_integral<T, 1>: public build_atomic_from_larger_type<atomic_arm_4<uint32_t>, T> {
|
||||
public:
|
||||
typedef build_atomic_from_larger_type<atomic_arm_4<uint32_t>, T> super;
|
||||
|
||||
explicit platform_atomic_integral(T v) : super(v) {}
|
||||
platform_atomic_integral(void) {}
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
class platform_atomic_integral<T, 2>: public build_atomic_from_larger_type<atomic_arm_4<uint32_t>, T> {
|
||||
public:
|
||||
typedef build_atomic_from_larger_type<atomic_arm_4<uint32_t>, T> super;
|
||||
|
||||
explicit platform_atomic_integral(T v) : super(v) {}
|
||||
platform_atomic_integral(void) {}
|
||||
};
|
||||
|
||||
|
||||
|
||||
typedef build_exchange<atomic_arm_4<void *> > platform_atomic_address;
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#undef BOOST_ATOMIC_ARM_ASM_START
|
||||
#undef BOOST_ATOMIC_ARM_ASM_END
|
||||
|
||||
|
||||
#endif
|
||||
@@ -73,7 +73,7 @@ static inline void fence_after(memory_order order)
|
||||
}
|
||||
|
||||
template<>
|
||||
static inline void platform_atomic_thread_fence(memory_order order)
|
||||
inline void platform_atomic_thread_fence(memory_order order)
|
||||
{
|
||||
switch(order) {
|
||||
case memory_order_acquire:
|
||||
|
||||
@@ -43,7 +43,7 @@ static inline void full_fence(void)
|
||||
__asm__ __volatile__("mfence" ::: "memory");
|
||||
#else
|
||||
/* could use mfence iff i686, but it does not appear to matter much */
|
||||
__asm__ __volatile__("lock addl $0, (%%esp)" ::: "memory");
|
||||
__asm__ __volatile__("lock; addl $0, (%%esp)" ::: "memory");
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -59,7 +59,8 @@ static inline void fence_after_load(memory_order order)
|
||||
}
|
||||
}
|
||||
|
||||
static inline void platform_atomic_thread_fence(memory_order order)
|
||||
template<>
|
||||
inline void platform_atomic_thread_fence(memory_order order)
|
||||
{
|
||||
switch(order) {
|
||||
case memory_order_seq_cst:
|
||||
@@ -101,7 +102,7 @@ public:
|
||||
{
|
||||
fence_before(success_order);
|
||||
T prev=expected;
|
||||
__asm__ __volatile__("lock cmpxchgb %1, %2\n" : "=a" (prev) : "q" (desired), "m" (i), "a" (expected) : "memory");
|
||||
__asm__ __volatile__("lock; cmpxchgb %1, %2\n" : "=a" (prev) : "q" (desired), "m" (i), "a" (expected) : "memory");
|
||||
bool success=(prev==expected);
|
||||
if (success) fence_after(success_order);
|
||||
else fence_after(failure_order);
|
||||
@@ -118,12 +119,12 @@ public:
|
||||
}
|
||||
T exchange(T r, memory_order order=memory_order_seq_cst) volatile
|
||||
{
|
||||
__asm__ __volatile__("xchgb %0, %1\n" : "=r" (r) : "m"(i), "0" (r) : "memory");
|
||||
__asm__ __volatile__("xchgb %0, %1\n" : "=q" (r) : "m"(i), "0" (r) : "memory");
|
||||
return r;
|
||||
}
|
||||
T fetch_add(T c, memory_order order=memory_order_seq_cst) volatile
|
||||
{
|
||||
__asm__ __volatile__("lock xaddb %0, %1" : "+r" (c), "+m" (i) :: "memory");
|
||||
__asm__ __volatile__("lock; xaddb %0, %1" : "+q" (c), "+m" (i) :: "memory");
|
||||
return c;
|
||||
}
|
||||
|
||||
@@ -170,7 +171,7 @@ public:
|
||||
{
|
||||
fence_before(success_order);
|
||||
T prev=expected;
|
||||
__asm__ __volatile__("lock cmpxchgw %1, %2\n" : "=a" (prev) : "q" (desired), "m" (i), "a" (expected) : "memory");
|
||||
__asm__ __volatile__("lock; cmpxchgw %1, %2\n" : "=a" (prev) : "q" (desired), "m" (i), "a" (expected) : "memory");
|
||||
bool success=(prev==expected);
|
||||
if (success) fence_after(success_order);
|
||||
else fence_after(failure_order);
|
||||
@@ -192,7 +193,7 @@ public:
|
||||
}
|
||||
T fetch_add(T c, memory_order order=memory_order_seq_cst) volatile
|
||||
{
|
||||
__asm__ __volatile__("lock xaddw %0, %1" : "+r" (c), "+m" (i) :: "memory");
|
||||
__asm__ __volatile__("lock; xaddw %0, %1" : "+r" (c), "+m" (i) :: "memory");
|
||||
return c;
|
||||
}
|
||||
|
||||
@@ -239,7 +240,7 @@ public:
|
||||
{
|
||||
fence_before(success_order);
|
||||
T prev=expected;
|
||||
__asm__ __volatile__("lock cmpxchgl %1, %2\n" : "=a" (prev) : "q" (desired), "m" (i), "a" (expected) : "memory");
|
||||
__asm__ __volatile__("lock; cmpxchgl %1, %2\n" : "=a" (prev) : "q" (desired), "m" (i), "a" (expected) : "memory");
|
||||
bool success=(prev==expected);
|
||||
if (success) fence_after(success_order);
|
||||
else fence_after(failure_order);
|
||||
@@ -261,7 +262,7 @@ public:
|
||||
}
|
||||
T fetch_add(T c, memory_order order=memory_order_seq_cst) volatile
|
||||
{
|
||||
__asm__ __volatile__("lock xaddl %0, %1" : "+r" (c), "+m" (i) :: "memory");
|
||||
__asm__ __volatile__("lock; xaddl %0, %1" : "+r" (c), "+m" (i) :: "memory");
|
||||
return c;
|
||||
}
|
||||
|
||||
@@ -309,7 +310,7 @@ public:
|
||||
{
|
||||
fence_before(success_order);
|
||||
T prev=expected;
|
||||
__asm__ __volatile__("lock cmpxchgq %1, %2\n" : "=a" (prev) : "q" (desired), "m" (i), "a" (expected) : "memory");
|
||||
__asm__ __volatile__("lock; cmpxchgq %1, %2\n" : "=a" (prev) : "q" (desired), "m" (i), "a" (expected) : "memory");
|
||||
bool success=(prev==expected);
|
||||
if (success) fence_after(success_order);
|
||||
else fence_after(failure_order);
|
||||
@@ -331,7 +332,7 @@ public:
|
||||
}
|
||||
T fetch_add(T c, memory_order order=memory_order_seq_cst) volatile
|
||||
{
|
||||
__asm__ __volatile__("lock xaddq %0, %1" : "+r" (c), "+m" (i) :: "memory");
|
||||
__asm__ __volatile__("lock; xaddq %0, %1" : "+r" (c), "+m" (i) :: "memory");
|
||||
return c;
|
||||
}
|
||||
|
||||
@@ -358,10 +359,31 @@ public:
|
||||
memory_order success_order,
|
||||
memory_order failure_order) volatile
|
||||
{
|
||||
long scratch;
|
||||
fence_before(success_order);
|
||||
T prev=expected;
|
||||
__asm__ __volatile__("lock cmpxchg8b %3\n" :
|
||||
"=A" (prev) : "b" ((long)desired), "c" ((long)(desired>>32)), "m" (i), "0" (prev) : "memory");
|
||||
/* Make sure ebx is saved and restored properly in case
|
||||
this object is compiled as "position independent". Since
|
||||
programmers on x86 tend to forget specifying -DPIC or
|
||||
similar, always assume PIC.
|
||||
|
||||
To make this work uniformly even in the non-PIC case,
|
||||
setup register constraints such that ebx can not be
|
||||
used by accident e.g. as base address for the variable
|
||||
to be modified. Accessing "scratch" should always be okay,
|
||||
as it can only be placed on the stack (and therefore
|
||||
accessed through ebp or esp only).
|
||||
|
||||
In theory, could push/pop ebx onto/off the stack, but movs
|
||||
to a prepared stack slot turn out to be faster. */
|
||||
__asm__ __volatile__(
|
||||
"movl %%ebx, %1\n"
|
||||
"movl %2, %%ebx\n"
|
||||
"lock; cmpxchg8b 0(%4)\n"
|
||||
"movl %1, %%ebx\n"
|
||||
: "=A" (prev), "=m" (scratch)
|
||||
: "D" ((long)desired), "c" ((long)(desired>>32)), "S" (&i), "0" (prev)
|
||||
: "memory");
|
||||
bool success=(prev==expected);
|
||||
if (success) fence_after(success_order);
|
||||
else fence_after(failure_order);
|
||||
|
||||
@@ -23,7 +23,7 @@ static inline void full_fence(void)
|
||||
}
|
||||
|
||||
template<>
|
||||
static inline void platform_atomic_thread_fence(memory_order order)
|
||||
inline void platform_atomic_thread_fence(memory_order order)
|
||||
{
|
||||
switch(order) {
|
||||
case memory_order_seq_cst:
|
||||
|
||||
169
boost/atomic/detail/linux-arm.hpp
Normal file
169
boost/atomic/detail/linux-arm.hpp
Normal file
@@ -0,0 +1,169 @@
|
||||
#ifndef BOOST_DETAIL_ATOMIC_LINUX_ARM_HPP
|
||||
#define BOOST_DETAIL_ATOMIC_LINUX_ARM_HPP
|
||||
|
||||
// Distributed under the Boost Software License, Version 1.0.
|
||||
// See accompanying file LICENSE_1_0.txt or copy at
|
||||
// http://www.boost.org/LICENSE_1_0.txt)
|
||||
//
|
||||
// Copyright (c) 2009 Helge Bahmann
|
||||
// Copyright (c) 2009 Phil Endecott
|
||||
// ARM Code by Phil Endecott, based on other architectures.
|
||||
|
||||
#include <boost/memory_order.hpp>
|
||||
#include <boost/atomic/detail/base.hpp>
|
||||
#include <boost/atomic/detail/builder.hpp>
|
||||
|
||||
namespace boost {
|
||||
namespace detail {
|
||||
namespace atomic {
|
||||
|
||||
|
||||
// Different ARM processors have different atomic instructions. In particular,
|
||||
// architecture versions before v6 (which are still in widespread use, e.g. the
|
||||
// Intel/Marvell XScale chips like the one in the NSLU2) have only atomic swap.
|
||||
// On Linux the kernel provides some support that lets us abstract away from
|
||||
// these differences: it provides emulated CAS and barrier functions at special
|
||||
// addresses that are garaunteed not to be interrupted by the kernel. Using
|
||||
// this facility is slightly slower than inline assembler would be, but much
|
||||
// faster than a system call.
|
||||
//
|
||||
// For documentation, see arch/arm/kernel/entry-armv.S in the kernel source
|
||||
// (search for "User Helpers").
|
||||
|
||||
|
||||
typedef void (kernel_dmb_t)(void);
|
||||
#define BOOST_ATOMIC_KERNEL_DMB (*(kernel_dmb_t *)0xffff0fa0)
|
||||
|
||||
static inline void fence_before(memory_order order)
|
||||
{
|
||||
switch(order) {
|
||||
// FIXME I really don't know which of these cases should call
|
||||
// kernel_dmb() and which shouldn't...
|
||||
case memory_order_consume:
|
||||
case memory_order_release:
|
||||
case memory_order_acq_rel:
|
||||
case memory_order_seq_cst:
|
||||
BOOST_ATOMIC_KERNEL_DMB();
|
||||
default:;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void fence_after(memory_order order)
|
||||
{
|
||||
switch(order) {
|
||||
// FIXME I really don't know which of these cases should call
|
||||
// kernel_dmb() and which shouldn't...
|
||||
case memory_order_acquire:
|
||||
case memory_order_acq_rel:
|
||||
case memory_order_seq_cst:
|
||||
BOOST_ATOMIC_KERNEL_DMB();
|
||||
default:;
|
||||
}
|
||||
}
|
||||
|
||||
#undef BOOST_ATOMIC_KERNEL_DMB
|
||||
|
||||
|
||||
template<typename T>
|
||||
class atomic_linux_arm_4 {
|
||||
|
||||
typedef int (kernel_cmpxchg_t)(T oldval, T newval, T *ptr);
|
||||
# define BOOST_ATOMIC_KERNEL_CMPXCHG (*(kernel_cmpxchg_t *)0xffff0fc0)
|
||||
// Returns 0 if *ptr was changed.
|
||||
|
||||
public:
|
||||
explicit atomic_linux_arm_4(T v) : i(v) {}
|
||||
atomic_linux_arm_4() {}
|
||||
T load(memory_order order=memory_order_seq_cst) const volatile
|
||||
{
|
||||
T v=const_cast<volatile const T &>(i);
|
||||
fence_after(order);
|
||||
return v;
|
||||
}
|
||||
void store(T v, memory_order order=memory_order_seq_cst) volatile
|
||||
{
|
||||
fence_before(order);
|
||||
const_cast<volatile T &>(i)=v;
|
||||
}
|
||||
bool compare_exchange_strong(
|
||||
T &expected,
|
||||
T desired,
|
||||
memory_order success_order,
|
||||
memory_order failure_order) volatile
|
||||
{
|
||||
// Aparently we can consider kernel_cmpxchg to be strong if it is retried
|
||||
// by the kernel after being interrupted, which I think it is.
|
||||
// Also it seems that when an ll/sc implementation is used the kernel
|
||||
// loops until the store succeeds.
|
||||
bool success = BOOST_ATOMIC_KERNEL_CMPXCHG(expected,desired,&i)==0;
|
||||
if (!success) e = load(memory_order_relaxed);
|
||||
return success;
|
||||
}
|
||||
bool compare_exchange_weak(
|
||||
T &expected,
|
||||
T desired,
|
||||
memory_order success_order,
|
||||
memory_order failure_order) volatile
|
||||
{
|
||||
return compare_exchange_strong(expected, desired, success_order, failure_order);
|
||||
}
|
||||
T exchange(T replacement, memory_order order=memory_order_seq_cst) volatile
|
||||
{
|
||||
// Copied from build_exchange.
|
||||
T o=load(memory_order_relaxed);
|
||||
do {} while(!compare_exchange_weak(o, replacement, order));
|
||||
return o;
|
||||
// Note that ARM has an atomic swap instruction that we could use here:
|
||||
// T oldval;
|
||||
// asm volatile ("swp\t%0, %1, [%2]" : "=&r"(oldval) : "r" (replacement), "r" (&i) : "memory");
|
||||
// return oldval;
|
||||
// This instruction is deprecated in architecture >= 6. I'm unsure how inefficient
|
||||
// its implementation is on those newer architectures. I don't think this would gain
|
||||
// much since exchange() is not used often.
|
||||
}
|
||||
|
||||
bool is_lock_free(void) const volatile {return true;}
|
||||
protected:
|
||||
typedef T integral_type;
|
||||
private:
|
||||
T i;
|
||||
|
||||
# undef BOOST_ATOMIC_KERNEL_CMPXCHG
|
||||
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
class platform_atomic_integral<T, 4> : public build_atomic_from_exchange<atomic_linux_arm_4<T> > {
|
||||
public:
|
||||
typedef build_atomic_from_exchange<atomic_linux_arm_4<T> > super;
|
||||
explicit platform_atomic_integral(T v) : super(v) {}
|
||||
platform_atomic_integral(void) {}
|
||||
};
|
||||
|
||||
|
||||
template<typename T>
|
||||
class platform_atomic_integral<T, 1> : public build_atomic_from_larger_type<atomic_linux_arm_4<uint32_t>, T > {
|
||||
public:
|
||||
typedef build_atomic_from_larger_type<atomic_linux_arm_4<uint32_t>, T> super;
|
||||
explicit platform_atomic_integral(T v) : super(v) {}
|
||||
platform_atomic_integral(void) {}
|
||||
};
|
||||
|
||||
|
||||
template<typename T>
|
||||
class platform_atomic_integral<T, 2> : public build_atomic_from_larger_type<atomic_linux_arm_4<uint32_t>, T > {
|
||||
public:
|
||||
typedef build_atomic_from_larger_type<atomic_linux_arm_4<uint32_t>, T> super;
|
||||
explicit platform_atomic_integral(T v) : super(v) {}
|
||||
platform_atomic_integral(void) {}
|
||||
};
|
||||
|
||||
|
||||
typedef atomic_linux_arm_4<void *> platform_atomic_address;
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -18,12 +18,25 @@
|
||||
|
||||
#include <boost/atomic/detail/gcc-ppc.hpp>
|
||||
|
||||
// This list of ARM architecture versions comes from Apple's arm/arch.h header.
|
||||
// I don't know how complete it is.
|
||||
#elif defined(__GNUC__) && (defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \
|
||||
|| defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) \
|
||||
|| defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_7A__))
|
||||
|
||||
#include <boost/atomic/detail/gcc-armv6+.hpp>
|
||||
|
||||
#elif defined(__linux__) && defined(__arm__)
|
||||
|
||||
#include <boost/atomic/detail/linux-arm.hpp>
|
||||
|
||||
#elif defined(BOOST_USE_WINDOWS_H) || defined(_WIN32_CE) || defined(BOOST_MSVC) || defined(BOOST_INTEL_WIN) || defined(WIN32) || defined(_WIN32) || defined(__WIN32__) || defined(__CYGWIN__)
|
||||
|
||||
#include <boost/atomic/detail/interlocked.hpp>
|
||||
|
||||
#else
|
||||
|
||||
|
||||
#warning "Using slow fallback atomic implementation"
|
||||
#include <boost/atomic/detail/generic-cas.hpp>
|
||||
|
||||
#endif
|
||||
|
||||
@@ -45,7 +45,7 @@ soon as the reference counter reaches zero.
|
||||
|
||||
Increasing the reference counter can always be done with
|
||||
[^memory_order_relaxed]: New references to an object can only
|
||||
be formed from an existing reference, an passing an existing
|
||||
be formed from an existing reference, and passing an existing
|
||||
reference from one thread to another must already provide any
|
||||
required synchronization.
|
||||
|
||||
@@ -75,7 +75,7 @@ soon as the reference counter reaches zero. There may be
|
||||
the object from being freed. "True" references may be formed from
|
||||
"weak" references unless the object has been deleted already.
|
||||
|
||||
FIXME: The point to make here is that for upgrading "weak" to "full"
|
||||
FIXME: The point to make here is that upgrading "weak" to "full"
|
||||
references requires memory_order_acquire.
|
||||
|
||||
[endsect]
|
||||
@@ -159,7 +159,7 @@ and degrade the performance of other system components.
|
||||
|
||||
[section:singleton Double-checked singleton pattern]
|
||||
|
||||
The purpose of the ['double-cheked singleton pattern] is to ensure
|
||||
The purpose of the ['double-checked singleton pattern] is to ensure
|
||||
that at most one instance of a particular object is created.
|
||||
If one instance has been created already, access to the existing
|
||||
object should be as light-weight as possible.
|
||||
@@ -227,7 +227,7 @@ the pointer need to be ordered.
|
||||
A ['wait-free ring buffer] provides a mechanism for relaying objects
|
||||
from one single "producer" thread to one single "consumer" thread without
|
||||
any locks. The operations on this data structure are "wait-free" which
|
||||
means that each operation finites within a constant number of steps.
|
||||
means that each operation finishes within a constant number of steps.
|
||||
This makes this data structure suitable for use in hard real-time systems
|
||||
or for communication with interrupt/signal handlers.
|
||||
|
||||
@@ -292,7 +292,7 @@ are either lost or read twice.
|
||||
|
||||
Furthermore it must guarantee that read-access to a
|
||||
particular object in [^pop] "happens after" it has been
|
||||
written in [^push]. This is achieved by writing [^head_]
|
||||
written in [^push]. This is achieved by writing [^head_ ]
|
||||
with "release" and reading it with "acquire". Conversely
|
||||
the implementation also ensures that read access to
|
||||
a particular ring element "happens before" before
|
||||
@@ -313,7 +313,6 @@ retrieved and processed in FIFO order by a single consumer.
|
||||
|
||||
[c++]
|
||||
|
||||
// assume that every "T" object has a pointer "T * T::next"
|
||||
template<typename T>
|
||||
class waitfree_queue {
|
||||
public:
|
||||
|
||||
Reference in New Issue
Block a user