mirror of
https://github.com/boostorg/histogram.git
synced 2026-01-19 04:12:12 +00:00
Fix variance calculation in accumulators::mean::operator+= (#308)
This commit is contained in:
@@ -31,16 +31,16 @@ public:
|
||||
|
||||
mean() = default;
|
||||
|
||||
/// Allow implicit conversion from mean<T>
|
||||
/// Allow implicit conversion from mean<T>.
|
||||
template <class T>
|
||||
mean(const mean<T>& o) noexcept
|
||||
: sum_{o.sum_}, mean_{o.mean_}, sum_of_deltas_squared_{o.sum_of_deltas_squared_} {}
|
||||
|
||||
/// Initialize to external count, mean, and variance
|
||||
/// Initialize to external count, mean, and variance.
|
||||
mean(const_reference n, const_reference mean, const_reference variance) noexcept
|
||||
: sum_(n), mean_(mean), sum_of_deltas_squared_(variance * (n - 1)) {}
|
||||
|
||||
/// Insert sample x
|
||||
/// Insert sample x.
|
||||
void operator()(const_reference x) noexcept {
|
||||
sum_ += static_cast<value_type>(1);
|
||||
const auto delta = x - mean_;
|
||||
@@ -48,7 +48,7 @@ public:
|
||||
sum_of_deltas_squared_ += delta * (x - mean_);
|
||||
}
|
||||
|
||||
/// Insert sample x with weight w
|
||||
/// Insert sample x with weight w.
|
||||
void operator()(const weight_type<value_type>& w, const_reference x) noexcept {
|
||||
sum_ += w.value;
|
||||
const auto delta = x - mean_;
|
||||
@@ -56,18 +56,43 @@ public:
|
||||
sum_of_deltas_squared_ += w.value * delta * (x - mean_);
|
||||
}
|
||||
|
||||
/// Add another mean accumulator
|
||||
/// Add another mean accumulator.
|
||||
mean& operator+=(const mean& rhs) noexcept {
|
||||
if (sum_ != 0 || rhs.sum_ != 0) {
|
||||
const auto tmp = mean_ * sum_ + rhs.mean_ * rhs.sum_;
|
||||
sum_ += rhs.sum_;
|
||||
mean_ = tmp / sum_;
|
||||
}
|
||||
if (rhs.sum_ == 0) return *this;
|
||||
|
||||
/*
|
||||
sum_of_deltas_squared
|
||||
= sum_i (x_i - mu)^2
|
||||
= sum_i (x_i - mu)^2 + sum_k (x_k - mu)^2
|
||||
= sum_i (x_i - mu1 + (mu1 - mu))^2 + sum_k (x_k - mu2 + (mu2 - mu))^2
|
||||
|
||||
first part:
|
||||
sum_i (x_i - mu1 + (mu1 - mu))^2
|
||||
= sum_i (x_i - mu1)^2 + n1 (mu1 - mu))^2 + 2 (mu1 - mu) sum_i (x_i - mu1)
|
||||
= sum_i (x_i - mu1)^2 + n1 (mu1 - mu))^2
|
||||
since sum_i (x_i - mu1) = n1 mu1 - n1 mu1 = 0
|
||||
|
||||
Putting it together:
|
||||
sum_of_deltas_squared
|
||||
= sum_of_deltas_squared_1 + n1 (mu1 - mu))^2
|
||||
+ sum_of_deltas_squared_2 + n2 (mu2 - mu))^2
|
||||
*/
|
||||
|
||||
const auto mu1 = mean_;
|
||||
const auto mu2 = rhs.mean_;
|
||||
const auto n1 = sum_;
|
||||
const auto n2 = rhs.sum_;
|
||||
|
||||
sum_ += rhs.sum_;
|
||||
mean_ = (n1 * mu1 + n2 * mu2) / sum_;
|
||||
sum_of_deltas_squared_ += rhs.sum_of_deltas_squared_;
|
||||
sum_of_deltas_squared_ +=
|
||||
n1 * (mean_ - mu1) * (mean_ - mu1) + n2 * (mean_ - mu2) * (mean_ - mu2);
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
/** Scale by value
|
||||
/** Scale by value.
|
||||
|
||||
This acts as if all samples were scaled by the value.
|
||||
*/
|
||||
@@ -84,7 +109,7 @@ public:
|
||||
|
||||
bool operator!=(const mean& rhs) const noexcept { return !operator==(rhs); }
|
||||
|
||||
/// Return how many samples were accumulated
|
||||
/// Return how many samples were accumulated.
|
||||
const_reference count() const noexcept { return sum_; }
|
||||
|
||||
/** Return mean value of accumulated samples.
|
||||
@@ -93,7 +118,7 @@ public:
|
||||
*/
|
||||
const_reference value() const noexcept { return mean_; }
|
||||
|
||||
/** Return variance of accumulated samples
|
||||
/** Return variance of accumulated samples.
|
||||
|
||||
The result is undefined, if `count() < 2`.
|
||||
*/
|
||||
|
||||
@@ -49,6 +49,7 @@ namespace algorithm {
|
||||
template <class A, class S>
|
||||
auto sum(const histogram<A, S>& hist, const coverage cov = coverage::all) {
|
||||
using T = typename histogram<A, S>::value_type;
|
||||
// T is arithmetic, compute sum accurately with high dynamic range
|
||||
using sum_type = mp11::mp_if<std::is_arithmetic<T>, accumulators::sum<double>, T>;
|
||||
sum_type sum;
|
||||
if (cov == coverage::all)
|
||||
|
||||
@@ -18,52 +18,97 @@ using namespace std::literals;
|
||||
|
||||
int main() {
|
||||
using m_t = accumulators::mean<double>;
|
||||
m_t a;
|
||||
BOOST_TEST_EQ(a.count(), 0);
|
||||
BOOST_TEST_EQ(a, m_t{});
|
||||
|
||||
a(4);
|
||||
a(7);
|
||||
a(13);
|
||||
a(16);
|
||||
// basic interface, string conversion
|
||||
{
|
||||
m_t a;
|
||||
BOOST_TEST_EQ(a.count(), 0);
|
||||
BOOST_TEST_EQ(a, m_t{});
|
||||
|
||||
BOOST_TEST_EQ(a.count(), 4);
|
||||
BOOST_TEST_EQ(a.value(), 10);
|
||||
BOOST_TEST_EQ(a.variance(), 30);
|
||||
a(4);
|
||||
a(7);
|
||||
a(13);
|
||||
a(16);
|
||||
|
||||
BOOST_TEST_EQ(str(a), "mean(4, 10, 30)"s);
|
||||
BOOST_TEST_EQ(str(a, 20, false), " mean(4, 10, 30)"s);
|
||||
BOOST_TEST_EQ(str(a, 20, true), "mean(4, 10, 30) "s);
|
||||
BOOST_TEST_EQ(a.count(), 4);
|
||||
BOOST_TEST_EQ(a.value(), 10);
|
||||
BOOST_TEST_EQ(a.variance(), 30);
|
||||
|
||||
m_t b;
|
||||
b(1e8 + 4);
|
||||
b(1e8 + 7);
|
||||
b(1e8 + 13);
|
||||
b(1e8 + 16);
|
||||
BOOST_TEST_EQ(str(a), "mean(4, 10, 30)"s);
|
||||
BOOST_TEST_EQ(str(a, 20, false), " mean(4, 10, 30)"s);
|
||||
BOOST_TEST_EQ(str(a, 20, true), "mean(4, 10, 30) "s);
|
||||
}
|
||||
|
||||
BOOST_TEST_EQ(b.count(), 4);
|
||||
BOOST_TEST_EQ(b.value(), 1e8 + 10);
|
||||
BOOST_TEST_EQ(b.variance(), 30);
|
||||
// small variation on large number
|
||||
{
|
||||
m_t a;
|
||||
a(1e8 + 4);
|
||||
a(1e8 + 7);
|
||||
a(1e8 + 13);
|
||||
a(1e8 + 16);
|
||||
|
||||
auto c = a;
|
||||
c += a; // same as feeding all samples twice
|
||||
BOOST_TEST_EQ(a.count(), 4);
|
||||
BOOST_TEST_EQ(a.value(), 1e8 + 10);
|
||||
BOOST_TEST_EQ(a.variance(), 30);
|
||||
}
|
||||
|
||||
BOOST_TEST_EQ(c.count(), 8);
|
||||
BOOST_TEST_EQ(c.value(), 10);
|
||||
BOOST_TEST_IS_CLOSE(c.variance(), 25.714, 1e-3);
|
||||
// addition of zero element
|
||||
{
|
||||
BOOST_TEST_EQ(m_t() += m_t(), m_t());
|
||||
BOOST_TEST_EQ(m_t(1, 2, 3) += m_t(), m_t(1, 2, 3));
|
||||
BOOST_TEST_EQ(m_t() += m_t(1, 2, 3), m_t(1, 2, 3));
|
||||
}
|
||||
|
||||
// also same as feeding all samples twice
|
||||
m_t d;
|
||||
d(weight(2), 4);
|
||||
d(weight(2), 7);
|
||||
d(weight(2), 13);
|
||||
d(weight(2), 16);
|
||||
// addition
|
||||
{
|
||||
m_t a, b, c;
|
||||
|
||||
BOOST_TEST_EQ(d, c);
|
||||
a(1);
|
||||
a(2);
|
||||
a(3);
|
||||
BOOST_TEST_EQ(a.count(), 3);
|
||||
BOOST_TEST_EQ(a.value(), 2);
|
||||
BOOST_TEST_EQ(a.variance(), 1);
|
||||
|
||||
BOOST_TEST_EQ(m_t() += m_t(), m_t());
|
||||
BOOST_TEST_EQ(m_t(1, 2, 3) += m_t(), m_t(1, 2, 3));
|
||||
BOOST_TEST_EQ(m_t() += m_t(1, 2, 3), m_t(1, 2, 3));
|
||||
b(4);
|
||||
b(6);
|
||||
BOOST_TEST_EQ(b.count(), 2);
|
||||
BOOST_TEST_EQ(b.value(), 5);
|
||||
BOOST_TEST_EQ(b.variance(), 2);
|
||||
|
||||
c(1);
|
||||
c(2);
|
||||
c(3);
|
||||
c(4);
|
||||
c(6);
|
||||
|
||||
auto d = a;
|
||||
d += b;
|
||||
BOOST_TEST_EQ(c.count(), d.count());
|
||||
BOOST_TEST_EQ(c.value(), d.value());
|
||||
BOOST_TEST_IS_CLOSE(c.variance(), d.variance(), 1e-3);
|
||||
}
|
||||
|
||||
// using weight=2 must be same as adding all samples twice
|
||||
{
|
||||
m_t a, b;
|
||||
|
||||
for (int i = 0; i < 2; ++i) {
|
||||
a(4);
|
||||
a(7);
|
||||
a(13);
|
||||
a(16);
|
||||
}
|
||||
|
||||
b(weight(2), 4);
|
||||
b(weight(2), 7);
|
||||
b(weight(2), 13);
|
||||
b(weight(2), 16);
|
||||
|
||||
BOOST_TEST_EQ(a.count(), b.count());
|
||||
BOOST_TEST_EQ(a.value(), b.value());
|
||||
BOOST_TEST_IS_CLOSE(a.variance(), b.variance(), 1e-3);
|
||||
}
|
||||
|
||||
return boost::report_errors();
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user