monte carlo integration: document and/or make explicit the semantics of all atomic operations.

2026-01-19 04:22:09 +00:00 · 2018-02-21 18:11:28 +00:00
parent e627adfef2
commit 3fa8aadde2
1 changed files with 41 additions and 43 deletions
--- a/include/boost/math/quadrature/naive_monte_carlo.hpp
+++ b/include/boost/math/quadrature/naive_monte_carlo.hpp
@@ -173,25 +173,25 @@ public:
                x[j] = (gen()-(gen.min)())*inv_denom;
            }
            Real y = m_integrand(x);
-            m_thread_averages[i] = y;
+            m_thread_averages[i] = y; // relaxed store
            m_thread_calls[i] = 1;
            m_thread_Ss[i] = 0;
            avg += y;
        }
        avg /= m_num_threads;
-        m_avg = avg;
+        m_avg = avg; // relaxed store

-        m_error_goal = error_goal;
+        m_error_goal = error_goal; // relaxed store
        m_start = std::chrono::system_clock::now();
-        m_done = false;
-        m_total_calls = m_num_threads;
+        m_done = false; // relaxed store
+        m_total_calls = m_num_threads;  // relaxed store
        m_variance = (numeric_limits<Real>::max)();
    }

    std::future<Real> integrate()
    {
        // Set done to false in case we wish to restart:
-        m_done = false;
+        m_done.store(false); // relaxed store, no worker threads yet
        return std::async(std::launch::async,
                          &naive_monte_carlo::m_integrate, this);
    }
@@ -201,7 +201,7 @@ public:
        // If seed = 0 (meaning have the routine pick the seed), this leaves the seed the same.
        // If seed != 0, then the seed is changed, so a restart doesn't do the exact same thing.
        m_seed = m_seed*m_seed;
-        m_done = true;
+        m_done = true; // relaxed store, worker threads will get the message eventually
    }

    Real variance() const
@@ -212,6 +212,10 @@ public:
    Real current_error_estimate() const
    {
        using std::sqrt;
+        //
+        // There is a bug here: m_variance and m_total_calls get updated asynchronously
+        // and may be out of synch when we compute the error estimate, not sure if it matters though...
+        //
        return sqrt(m_variance.load()/m_total_calls.load());
    }

@@ -219,7 +223,7 @@ public:
    {
        auto now = std::chrono::system_clock::now();
        std::chrono::duration<Real> elapsed_seconds = now - m_start;
-        Real r = this->current_error_estimate()/m_error_goal.load();
+        Real r = this->current_error_estimate()/m_error_goal.load(); // relaxed load
        if (r*r <= 1) {
            return 0*elapsed_seconds;
        }
@@ -228,12 +232,12 @@ public:

    void update_target_error(Real new_target_error)
    {
-        m_error_goal = new_target_error;
+        m_error_goal = new_target_error;  // relaxed store
    }

    Real progress() const
    {
-        Real r = m_error_goal.load()/this->current_error_estimate();
+        Real r = m_error_goal.load()/this->current_error_estimate();  // relaxed load
        if (r*r >= 1)
        {
            return 1;
@@ -248,7 +252,7 @@ public:

    size_t calls() const
    {
-        return m_total_calls.load();
+        return m_total_calls.load();  // relaxed load
    }

 private:
@@ -276,30 +280,27 @@ private:
        do {
            std::this_thread::sleep_for(std::chrono::milliseconds(100));
            size_t total_calls = 0;
-            for (size_t i = 0; i < m_num_threads; ++i)
-            {
-                total_calls += m_thread_calls[i];
-            }
            Real variance = 0;
            Real avg = 0;
            for (size_t i = 0; i < m_num_threads; ++i)
            {
-                size_t t_calls = m_thread_calls[i];
+               size_t t_calls = m_thread_calls[i].load(boost::memory_order::consume);
+               total_calls += t_calls;
                // Will this overflow? Not hard to remove . . .
-                avg += m_thread_averages[i]*( (Real) t_calls/ (Real) total_calls);
-                variance += m_thread_Ss[i];
+                avg += m_thread_averages[i].load(boost::memory_order::relaxed)*( (Real) t_calls/ (Real) total_calls);
+                variance += m_thread_Ss[i].load(boost::memory_order::relaxed);
            }
-            m_avg = avg;
-            m_variance = variance/(total_calls - 1);
-            m_total_calls = total_calls;
+            m_avg.store(avg, boost::memory_order::release);
+            m_variance.store(variance/(total_calls - 1), boost::memory_order::release);
+            m_total_calls = total_calls; // relaxed store, it's just for user feedback
            // Allow cancellation:
-            if (m_done)
+            if (m_done) // relaxed load
            {
                break;
            }
-        } while (this->current_error_estimate() > m_error_goal);
+        } while (this->current_error_estimate() > m_error_goal.load(boost::memory_order::consume));
        // Error bound met; signal the threads:
-        m_done = true;
+        m_done = true; // relaxed store, threads will get the message in the end
        std::for_each(threads.begin(), threads.end(),
                      std::mem_fn(&std::thread::join));
        if (m_exception)
@@ -308,24 +309,21 @@ private:
        }
        // Incorporate their work into the final estimate:
        size_t total_calls = 0;
-        for (size_t i = 0; i < m_num_threads; ++i)
-        {
-            total_calls += m_thread_calls[i];
-        }
        Real variance = 0;
        Real avg = 0;
        for (size_t i = 0; i < m_num_threads; ++i)
        {
-            size_t t_calls = m_thread_calls[i];
+           size_t t_calls = m_thread_calls[i].load(boost::memory_order::consume);
+           total_calls += t_calls;
            // Averages weighted by the number of calls the thread made:
-            avg += m_thread_averages[i]*( (Real) t_calls/ (Real) total_calls);
-            variance += m_thread_Ss[i];
+            avg += m_thread_averages[i].load(boost::memory_order::relaxed)*( (Real) t_calls/ (Real) total_calls);
+            variance += m_thread_Ss[i].load(boost::memory_order::relaxed);
        }
-        m_avg = avg;
-        m_variance = variance/(total_calls - 1);
-        m_total_calls = total_calls;
+        m_avg.store(avg, boost::memory_order::release);
+        m_variance.store(variance/(total_calls - 1), boost::memory_order::release);
+        m_total_calls = total_calls; // relaxed store, this is just user feedback

-        return m_avg.load();
+        return m_avg.load(boost::memory_order::consume);
    }

    void m_thread_monte(size_t thread_index, size_t seed)
@@ -336,15 +334,15 @@ private:
            std::vector<Real> x(m_lbs.size());
            RandomNumberGenerator gen(seed);
            Real inv_denom = (Real) 1/(Real)( (gen.max)() - (gen.min)()  );
-            Real M1 = m_thread_averages[thread_index];
-            Real S = m_thread_Ss[thread_index];
+            Real M1 = m_thread_averages[thread_index].load(boost::memory_order::consume);
+            Real S = m_thread_Ss[thread_index].load(boost::memory_order::consume);
            // Kahan summation is required or the value of the integrand will go on a random walk during long computations.
            // See the implementation discussion.
            // The idea is that the unstabilized additions have error sigma(f)/sqrt(N) + epsilon*N, which diverges faster than it converges!
            // Kahan summation turns this to sigma(f)/sqrt(N) + epsilon^2*N, and the random walk occurs on a timescale of 10^14 years (on current hardware)
            Real compensator = 0;
-            size_t k = m_thread_calls[thread_index];
-            while (!m_done)
+            size_t k = m_thread_calls[thread_index].load(boost::memory_order::consume);
+            while (!m_done) // relaxed load
            {
                int j = 0;
                // If we don't have a certain number of calls before an update, we can easily terminate prematurely
@@ -381,15 +379,15 @@ private:
                    S += (f - M1)*(f - M2);
                    M1 = M2;
                }
-                m_thread_averages[thread_index] = M1;
-                m_thread_Ss[thread_index] = S;
-                m_thread_calls[thread_index] = k;
+                m_thread_averages[thread_index].store(M1, boost::memory_order::release);
+                m_thread_Ss[thread_index].store(S, boost::memory_order::release);
+                m_thread_calls[thread_index].store(k, boost::memory_order::release);
            }
        }
        catch (...)
        {
            // Signal the other threads that the computation is ruined:
-            m_done = true;
+            m_done = true; // relaxed store
            m_exception = std::current_exception();
        }
    }