diff --git a/include/boost/interprocess/detail/atomic.hpp b/include/boost/interprocess/detail/atomic.hpp index f0e234f..e41248e 100644 --- a/include/boost/interprocess/detail/atomic.hpp +++ b/include/boost/interprocess/detail/atomic.hpp @@ -1,8 +1,11 @@ ////////////////////////////////////////////////////////////////////////////// // -// (C) Copyright Ion Gaztanaga 2006-2007. Distributed under the Boost -// Software License, Version 1.0. (See accompanying file -// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// (C) Copyright Ion Gaztanaga 2006-2007 +// (C) Copyright Markus Schoepflin 2007 +// +// Distributed under the Boost Software License, Version 1.0. (See +// accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) // // See http://www.boost.org/libs/interprocess for documentation. // @@ -372,17 +375,19 @@ namespace boost{ namespace interprocess{ namespace detail{ -//! Atomically increment an apr_uint32_t by 1 +//! Atomically decrement a uint32_t by 1 +//! "mem": pointer to the atomic value +//! Returns the old value pointed to by mem +//! Acquire, memory barrier after decrement. +inline boost::uint32_t atomic_dec32(volatile boost::uint32_t *mem) +{ boost::uint32_t old_val = __ATOMIC_DECREMENT_LONG(mem); __MB(); return old_val; } + +//! Atomically increment a uint32_t by 1 //! "mem": pointer to the object //! Returns the old value pointed to by mem +//! Release, memory barrier before increment. inline boost::uint32_t atomic_inc32(volatile boost::uint32_t *mem) -{ return __ATOMIC_INCREMENT_LONG(mem); } - -//! Atomically decrement an boost::uint32_t by 1 -//! "mem": pointer to the atomic value -//! Returns false if the value becomes zero on decrement, otherwise true -inline boost::uint32_t atomic_dec32(volatile boost::uint32_t *mem) -{ return __ATOMIC_DECREMENT_LONG(mem); } +{ __MB(); return __ATOMIC_INCREMENT_LONG(mem); } // Rational for the implementation of the atomic read and write functions. // @@ -396,14 +401,16 @@ inline boost::uint32_t atomic_dec32(volatile boost::uint32_t *mem) // aligned. //! Atomically read an boost::uint32_t from memory +//! Acquire, memory barrier after load. inline boost::uint32_t atomic_read32(volatile boost::uint32_t *mem) -{ return *mem; } +{ boost::uint32_t old_val = *mem; __MB(); return old_val; } //! Atomically set an boost::uint32_t in memory //! "mem": pointer to the object //! "param": val value that the object will assume +//! Release, memory barrier before store. inline void atomic_write32(volatile boost::uint32_t *mem, boost::uint32_t val) -{ *mem = val; } +{ __MB(); *mem = val; } //! Compare an boost::uint32_t's value with "cmp". //! If they are the same swap the value with "with" @@ -411,28 +418,43 @@ inline void atomic_write32(volatile boost::uint32_t *mem, boost::uint32_t val) //! "with" what to swap it with //! "cmp": the value to compare it to //! Returns the old value of *mem -inline boost::uint32_t atomic_cas32 - (volatile boost::uint32_t *mem, boost::uint32_t with, boost::uint32_t cmp) +//! Memory barrier between load and store. +inline boost::uint32_t atomic_cas32( + volatile boost::uint32_t *mem, boost::uint32_t with, boost::uint32_t cmp) { - // Notes: + // Note: // - // 1. Branch prediction prefers branches, as we assume that the lock - // is not stolen usually, we branch forward conditionally on success - // of the store, and not conditionally backwards on failure. + // Branch prediction prefers backward branches, and the Alpha Architecture + // Handbook explicitely states that the loop should not be implemented like + // it is below. (See chapter 4.2.5.) Therefore the code should probably look + // like this: // - // 2. The memory lock is invalidated when a branch is taken between - // load and store. Therefore we can only branch if we don't need a - // store. + // return asm( + // "10: ldl_l %v0,(%a0) ;" + // " cmpeq %v0,%a2,%t0 ;" + // " beq %t0,20f ;" + // " mb ;" + // " mov %a1,%t0 ;" + // " stl_c %t0,(%a0) ;" + // " beq %t0,30f ;" + // "20: ret ;" + // "30: br 10b;", + // mem, with, cmp); + // + // But as the compiler always transforms this into the form where a backward + // branch is taken on failure, we can as well implement it in the straight + // forward form, as this is what it will end up in anyway. - return asm("10: ldl_l %v0,(%a0) ;" // load prev value from mem and lock mem - " cmpeq %v0,%a2,%t0 ;" // compare with given value - " beq %t0,20f ;" // if not equal, we're done - " mov %a1,%t0 ;" // load new value into scratch register - " stl_c %t0,(%a0) ;" // store new value to locked mem (overwriting scratch) - " bne %t0,20f ;" // store succeeded, we're done - " br 10b ;" // lock has been stolen, retry - "20: ", - mem, with, cmp); + return asm( + "10: ldl_l %v0,(%a0) ;" // load prev value from mem and lock mem + " cmpeq %v0,%a2,%t0 ;" // compare with given value + " beq %t0,20f ;" // if not equal, we're done + " mb ;" // memory barrier + " mov %a1,%t0 ;" // load new value into scratch register + " stl_c %t0,(%a0) ;" // store new value to locked mem (overwriting scratch) + " beq %t0,10b ;" // store failed because lock has been stolen, retry + "20: ", + mem, with, cmp); } } //namespace detail{