diff --git a/include/boost/histogram/accumulators/mean.hpp b/include/boost/histogram/accumulators/mean.hpp index 7af64df3..fedea7f4 100644 --- a/include/boost/histogram/accumulators/mean.hpp +++ b/include/boost/histogram/accumulators/mean.hpp @@ -31,16 +31,16 @@ public: mean() = default; - /// Allow implicit conversion from mean + /// Allow implicit conversion from mean. template mean(const mean& o) noexcept : sum_{o.sum_}, mean_{o.mean_}, sum_of_deltas_squared_{o.sum_of_deltas_squared_} {} - /// Initialize to external count, mean, and variance + /// Initialize to external count, mean, and variance. mean(const_reference n, const_reference mean, const_reference variance) noexcept : sum_(n), mean_(mean), sum_of_deltas_squared_(variance * (n - 1)) {} - /// Insert sample x + /// Insert sample x. void operator()(const_reference x) noexcept { sum_ += static_cast(1); const auto delta = x - mean_; @@ -48,7 +48,7 @@ public: sum_of_deltas_squared_ += delta * (x - mean_); } - /// Insert sample x with weight w + /// Insert sample x with weight w. void operator()(const weight_type& w, const_reference x) noexcept { sum_ += w.value; const auto delta = x - mean_; @@ -56,18 +56,43 @@ public: sum_of_deltas_squared_ += w.value * delta * (x - mean_); } - /// Add another mean accumulator + /// Add another mean accumulator. mean& operator+=(const mean& rhs) noexcept { - if (sum_ != 0 || rhs.sum_ != 0) { - const auto tmp = mean_ * sum_ + rhs.mean_ * rhs.sum_; - sum_ += rhs.sum_; - mean_ = tmp / sum_; - } + if (rhs.sum_ == 0) return *this; + + /* + sum_of_deltas_squared + = sum_i (x_i - mu)^2 + = sum_i (x_i - mu)^2 + sum_k (x_k - mu)^2 + = sum_i (x_i - mu1 + (mu1 - mu))^2 + sum_k (x_k - mu2 + (mu2 - mu))^2 + + first part: + sum_i (x_i - mu1 + (mu1 - mu))^2 + = sum_i (x_i - mu1)^2 + n1 (mu1 - mu))^2 + 2 (mu1 - mu) sum_i (x_i - mu1) + = sum_i (x_i - mu1)^2 + n1 (mu1 - mu))^2 + since sum_i (x_i - mu1) = n1 mu1 - n1 mu1 = 0 + + Putting it together: + sum_of_deltas_squared + = sum_of_deltas_squared_1 + n1 (mu1 - mu))^2 + + sum_of_deltas_squared_2 + n2 (mu2 - mu))^2 + */ + + const auto mu1 = mean_; + const auto mu2 = rhs.mean_; + const auto n1 = sum_; + const auto n2 = rhs.sum_; + + sum_ += rhs.sum_; + mean_ = (n1 * mu1 + n2 * mu2) / sum_; sum_of_deltas_squared_ += rhs.sum_of_deltas_squared_; + sum_of_deltas_squared_ += + n1 * (mean_ - mu1) * (mean_ - mu1) + n2 * (mean_ - mu2) * (mean_ - mu2); + return *this; } - /** Scale by value + /** Scale by value. This acts as if all samples were scaled by the value. */ @@ -84,7 +109,7 @@ public: bool operator!=(const mean& rhs) const noexcept { return !operator==(rhs); } - /// Return how many samples were accumulated + /// Return how many samples were accumulated. const_reference count() const noexcept { return sum_; } /** Return mean value of accumulated samples. @@ -93,7 +118,7 @@ public: */ const_reference value() const noexcept { return mean_; } - /** Return variance of accumulated samples + /** Return variance of accumulated samples. The result is undefined, if `count() < 2`. */ diff --git a/include/boost/histogram/algorithm/sum.hpp b/include/boost/histogram/algorithm/sum.hpp index c6573be9..cf41e0f3 100644 --- a/include/boost/histogram/algorithm/sum.hpp +++ b/include/boost/histogram/algorithm/sum.hpp @@ -49,6 +49,7 @@ namespace algorithm { template auto sum(const histogram& hist, const coverage cov = coverage::all) { using T = typename histogram::value_type; + // T is arithmetic, compute sum accurately with high dynamic range using sum_type = mp11::mp_if, accumulators::sum, T>; sum_type sum; if (cov == coverage::all) diff --git a/test/accumulators_mean_test.cpp b/test/accumulators_mean_test.cpp index d9a3c5d1..f7fe938a 100644 --- a/test/accumulators_mean_test.cpp +++ b/test/accumulators_mean_test.cpp @@ -18,52 +18,97 @@ using namespace std::literals; int main() { using m_t = accumulators::mean; - m_t a; - BOOST_TEST_EQ(a.count(), 0); - BOOST_TEST_EQ(a, m_t{}); - a(4); - a(7); - a(13); - a(16); + // basic interface, string conversion + { + m_t a; + BOOST_TEST_EQ(a.count(), 0); + BOOST_TEST_EQ(a, m_t{}); - BOOST_TEST_EQ(a.count(), 4); - BOOST_TEST_EQ(a.value(), 10); - BOOST_TEST_EQ(a.variance(), 30); + a(4); + a(7); + a(13); + a(16); - BOOST_TEST_EQ(str(a), "mean(4, 10, 30)"s); - BOOST_TEST_EQ(str(a, 20, false), " mean(4, 10, 30)"s); - BOOST_TEST_EQ(str(a, 20, true), "mean(4, 10, 30) "s); + BOOST_TEST_EQ(a.count(), 4); + BOOST_TEST_EQ(a.value(), 10); + BOOST_TEST_EQ(a.variance(), 30); - m_t b; - b(1e8 + 4); - b(1e8 + 7); - b(1e8 + 13); - b(1e8 + 16); + BOOST_TEST_EQ(str(a), "mean(4, 10, 30)"s); + BOOST_TEST_EQ(str(a, 20, false), " mean(4, 10, 30)"s); + BOOST_TEST_EQ(str(a, 20, true), "mean(4, 10, 30) "s); + } - BOOST_TEST_EQ(b.count(), 4); - BOOST_TEST_EQ(b.value(), 1e8 + 10); - BOOST_TEST_EQ(b.variance(), 30); + // small variation on large number + { + m_t a; + a(1e8 + 4); + a(1e8 + 7); + a(1e8 + 13); + a(1e8 + 16); - auto c = a; - c += a; // same as feeding all samples twice + BOOST_TEST_EQ(a.count(), 4); + BOOST_TEST_EQ(a.value(), 1e8 + 10); + BOOST_TEST_EQ(a.variance(), 30); + } - BOOST_TEST_EQ(c.count(), 8); - BOOST_TEST_EQ(c.value(), 10); - BOOST_TEST_IS_CLOSE(c.variance(), 25.714, 1e-3); + // addition of zero element + { + BOOST_TEST_EQ(m_t() += m_t(), m_t()); + BOOST_TEST_EQ(m_t(1, 2, 3) += m_t(), m_t(1, 2, 3)); + BOOST_TEST_EQ(m_t() += m_t(1, 2, 3), m_t(1, 2, 3)); + } - // also same as feeding all samples twice - m_t d; - d(weight(2), 4); - d(weight(2), 7); - d(weight(2), 13); - d(weight(2), 16); + // addition + { + m_t a, b, c; - BOOST_TEST_EQ(d, c); + a(1); + a(2); + a(3); + BOOST_TEST_EQ(a.count(), 3); + BOOST_TEST_EQ(a.value(), 2); + BOOST_TEST_EQ(a.variance(), 1); - BOOST_TEST_EQ(m_t() += m_t(), m_t()); - BOOST_TEST_EQ(m_t(1, 2, 3) += m_t(), m_t(1, 2, 3)); - BOOST_TEST_EQ(m_t() += m_t(1, 2, 3), m_t(1, 2, 3)); + b(4); + b(6); + BOOST_TEST_EQ(b.count(), 2); + BOOST_TEST_EQ(b.value(), 5); + BOOST_TEST_EQ(b.variance(), 2); + + c(1); + c(2); + c(3); + c(4); + c(6); + + auto d = a; + d += b; + BOOST_TEST_EQ(c.count(), d.count()); + BOOST_TEST_EQ(c.value(), d.value()); + BOOST_TEST_IS_CLOSE(c.variance(), d.variance(), 1e-3); + } + + // using weight=2 must be same as adding all samples twice + { + m_t a, b; + + for (int i = 0; i < 2; ++i) { + a(4); + a(7); + a(13); + a(16); + } + + b(weight(2), 4); + b(weight(2), 7); + b(weight(2), 13); + b(weight(2), 16); + + BOOST_TEST_EQ(a.count(), b.count()); + BOOST_TEST_EQ(a.value(), b.value()); + BOOST_TEST_IS_CLOSE(a.variance(), b.variance(), 1e-3); + } return boost::report_errors(); }