diff --git a/doc/graphs/paired_sample_t_statistic.svg b/doc/graphs/paired_sample_t_statistic.svg
new file mode 100644
index 000000000..50995a063
--- /dev/null
+++ b/doc/graphs/paired_sample_t_statistic.svg
@@ -0,0 +1,46 @@
+
\ No newline at end of file
diff --git a/doc/graphs/two_sample_pooled_variance.svg b/doc/graphs/two_sample_pooled_variance.svg
new file mode 100644
index 000000000..74f06026d
--- /dev/null
+++ b/doc/graphs/two_sample_pooled_variance.svg
@@ -0,0 +1,79 @@
+
\ No newline at end of file
diff --git a/doc/graphs/two_sample_t_statistic.svg b/doc/graphs/two_sample_t_statistic.svg
new file mode 100644
index 000000000..e24d64253
--- /dev/null
+++ b/doc/graphs/two_sample_t_statistic.svg
@@ -0,0 +1,68 @@
+
\ No newline at end of file
diff --git a/doc/graphs/welchs_t_dof.svg b/doc/graphs/welchs_t_dof.svg
new file mode 100644
index 000000000..5e925223f
--- /dev/null
+++ b/doc/graphs/welchs_t_dof.svg
@@ -0,0 +1,122 @@
+
\ No newline at end of file
diff --git a/doc/graphs/welchs_t_statistic.svg b/doc/graphs/welchs_t_statistic.svg
new file mode 100644
index 000000000..8b5134c88
--- /dev/null
+++ b/doc/graphs/welchs_t_statistic.svg
@@ -0,0 +1,41 @@
+
\ No newline at end of file
diff --git a/doc/graphs/welchs_t_variance.svg b/doc/graphs/welchs_t_variance.svg
new file mode 100644
index 000000000..284c7eaaf
--- /dev/null
+++ b/doc/graphs/welchs_t_variance.svg
@@ -0,0 +1,57 @@
+
\ No newline at end of file
diff --git a/doc/statistics/t_test.qbk b/doc/statistics/t_test.qbk
index da2b06a42..76adb69a2 100644
--- a/doc/statistics/t_test.qbk
+++ b/doc/statistics/t_test.qbk
@@ -1,5 +1,6 @@
[/
Copyright (c) 2019 Nick Thompson
+Copyright (c) 2021 Matt Borland
Use, modification and distribution are subject to the
Boost Software License, Version 1.0. (See accompanying file
LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
@@ -14,20 +15,38 @@ LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
namespace boost::math::statistics {
-template
-std::pair one_sample_t_test(Real sample_mean, Real sample_variance, Real num_samples, Real assumed_mean);
+template
+std::pair one_sample_t_test(Real sample_mean, Real sample_variance, Size num_samples, Real assumed_mean);
-template
-auto one_sample_t_test(ForwardIterator begin, ForwardIterator end, typename std::iterator_traits::value_type assumed_mean);
+template::value_type>
+std::pair one_sample_t_test(ForwardIterator begin, ForwardIterator end, Real assumed_mean);
-template
-auto one_sample_t_test(Container const & v, typename Container::value_type assumed_mean);
+template
+std::pair one_sample_t_test(Container const & v, typename Container::value_type assumed_mean);
-}}}
+template::value_type>
+std::pair two_sample_t_test(ForwardIterator begin_1, ForwardIterator end_1, ForwardIterator begin_2, ForwardIterator begin_2);
+
+template
+std::pair two_sample_t_test(Container const & u, Container const & v);
+
+template::value_type>
+std::pair paired_samples_t_test(ForwardIterator begin_1, ForwardIterator end_1, ForwardIterator begin_2, ForwardIterator begin_2);
+
+template
+std::pair paired_samples_t_test(Container const & u, Container const & v);
+
+}
```
[heading Background]
+A set of C++11 compatible functions for various [@https://en.wikipedia.org/wiki/Student's_t-test Student's t-tests].
+The input can be any real number or set of real numbers.
+In the event that the input is an integer or a set of integers typename Real will be deduced as a double precision type.
+
+[heading One-sample /t/-test]
+
A one-sample /t/-test attempts to answer the question "given a sample mean, is it likely that the population mean of my data is a certain value?"
The test statistic is
@@ -61,6 +80,76 @@ auto [t, p] = boost::math::statistics::one_sample_t_test(v, 0.0);
The test statistic is the first element of the pair, and the /p/-value is the second element.
+[heading Independent two-sample /t/-test]
+
+A two-sample /t/-test determines if the means of two sets of data have a statistically significant difference from each other.
+There are two underlying implementations based off the variances of the sets of data.
+If /s/[sub 1] and /s/[sub 2] are within a factor of 2 of each other the following formulas will be used:
+
+[$../graphs/two_sample_t_statistic.svg]
+
+where
+
+[$../graphs/two_sample_pooled_variance.svg]
+
+If /s/[sub 1] and /s/[sub 2] are not within a factor of 2 of each other Welch's t-test will be used:
+
+[$../graphs/welchs_t_statistic.svg]
+
+where
+
+[$../graphs/welchs_t_variance.svg]
+
+and
+
+[$../graphs/welchs_t_dof.svg]
+
+An example of usage is as follows:
+
+```
+#include
+#include
+#include
+
+std::random_device rd;
+std::mt19937 gen{rd()};
+std::normal_distribution dis{0,1};
+std::vector u(1024);
+std::vector v(1024);
+for(std::size_t i = 0; i < u.size(); ++i)
+{
+ u[i] = dis(gen);
+ v[i] = dis(gen);
+}
+
+auto [t, p] = boost::math::statistics::two_sample_t_test(u, v);
+```
+
+/Nota bene:/ The sample sizes for the two sets of data do not need to be equal.
+
+/Nota bene:/ std::distance is used in the implementation leading to lower performance with ForwardIterator than RandomAccessIterator.
+
+[heading Dependent /t/-test for paired samples]
+
+This test is used when the samples are dependent such as when one sample has been tested twice, or when two samples have been paired.
+Much like the independent t-test it is used to determine if the means of two sets of data have a statistically significant difference from each other.
+
+[$../graphs/paired_sample_t_statistic.svg]
+
+where /X/[sub D] and /s/[sub D] are the mean and standard deviation of the differences between all pairs.
+
+An example of usage is as follows:
+
+```
+#include
+#include
+#include
+
+std::vector first_test {35, 50, 90, 78};
+std::vector second_test {67, 46, 86, 91};
+
+auto [t, p] = boost::math::statistics::paired_samples_t_test(first_test, second_test);
+```
[heading Performance]
diff --git a/include/boost/math/statistics/t_test.hpp b/include/boost/math/statistics/t_test.hpp
index a3aa13025..ec06bda8c 100644
--- a/include/boost/math/statistics/t_test.hpp
+++ b/include/boost/math/statistics/t_test.hpp
@@ -8,9 +8,12 @@
#define BOOST_MATH_STATISTICS_T_TEST_HPP
#include
+#include
#include
#include
#include
+#include
+#include
#include
#include
@@ -47,6 +50,133 @@ ReturnType one_sample_t_test_impl(ForwardIterator begin, ForwardIterator end, ty
Real s_sq = std::get<1>(temp);
return one_sample_t_test_impl(mu, s_sq, Real(std::distance(begin, end)), Real(assumed_mean));
}
+
+// https://en.wikipedia.org/wiki/Student%27s_t-test#Equal_or_unequal_sample_sizes,_unequal_variances_(sX1_%3E_2sX2_or_sX2_%3E_2sX1)
+template
+ReturnType welchs_t_test_impl(T mean_1, T variance_1, T size_1, T mean_2, T variance_2, T size_2)
+{
+ using Real = typename std::tuple_element<0, ReturnType>::type;
+ using no_promote_policy = boost::math::policies::policy, boost::math::policies::promote_double>;
+ using std::sqrt;
+
+ Real dof_num = (variance_1/size_1 + variance_2/size_2) * (variance_1/size_1 + variance_2/size_2);
+ Real dof_denom = ((variance_1/size_1) * (variance_1/size_1))/(size_1 - 1) +
+ ((variance_2/size_2) * (variance_2/size_2))/(size_2 - 1);
+ Real dof = dof_num / dof_denom;
+
+ Real s_estimator = sqrt((variance_1/size_1) + (variance_2/size_2));
+
+ Real test_statistic = (static_cast(mean_1) - static_cast(mean_2))/s_estimator;
+ auto student = boost::math::students_t_distribution(dof);
+ Real pvalue;
+ if (test_statistic > 0)
+ {
+ pvalue = 2*boost::math::cdf(student, -test_statistic);;
+ }
+ else
+ {
+ pvalue = 2*boost::math::cdf(student, test_statistic);
+ }
+
+ return std::make_pair(test_statistic, pvalue);
+}
+
+// https://en.wikipedia.org/wiki/Student%27s_t-test#Equal_or_unequal_sample_sizes,_similar_variances_(1/2_%3C_sX1/sX2_%3C_2)
+template
+ReturnType two_sample_t_test_impl(T mean_1, T variance_1, T size_1, T mean_2, T variance_2, T size_2)
+{
+ using Real = typename std::tuple_element<0, ReturnType>::type;
+ using no_promote_policy = boost::math::policies::policy, boost::math::policies::promote_double>;
+ using std::sqrt;
+
+ Real dof = size_1 + size_2 - 2;
+ Real pooled_std_dev = sqrt(((size_1-1)*variance_1 + (size_2-1)*variance_2) / dof);
+ Real test_statistic = (mean_1-mean_2) / (pooled_std_dev*sqrt(1.0/static_cast(size_1) + 1.0/static_cast(size_2)));
+
+ auto student = boost::math::students_t_distribution(dof);
+ Real pvalue;
+ if (test_statistic > 0)
+ {
+ pvalue = 2*boost::math::cdf(student, -test_statistic);;
+ }
+ else
+ {
+ pvalue = 2*boost::math::cdf(student, test_statistic);
+ }
+
+ return std::make_pair(test_statistic, pvalue);
+}
+
+template
+ReturnType two_sample_t_test_impl(ForwardIterator begin_1, ForwardIterator end_1, ForwardIterator begin_2, ForwardIterator end_2)
+{
+ using Real = typename std::tuple_element<0, ReturnType>::type;
+ using std::sqrt;
+ auto n1 = std::distance(begin_1, end_1);
+ auto n2 = std::distance(begin_2, end_2);
+
+ ReturnType temp_1 = mean_and_sample_variance(begin_1, end_1);
+ Real mean_1 = std::get<0>(temp_1);
+ Real variance_1 = std::get<1>(temp_1);
+ Real std_dev_1 = sqrt(variance_1);
+
+ ReturnType temp_2 = mean_and_sample_variance(begin_2, end_2);
+ Real mean_2 = std::get<0>(temp_2);
+ Real variance_2 = std::get<1>(temp_2);
+ Real std_dev_2 = sqrt(variance_2);
+
+ if(std_dev_1 > 2 * std_dev_2 || std_dev_2 > 2 * std_dev_1)
+ {
+ return welchs_t_test_impl(mean_1, variance_1, Real(n1), mean_2, variance_2, Real(n2));
+ }
+ else
+ {
+ return two_sample_t_test_impl(mean_1, variance_1, Real(n1), mean_2, variance_2, Real(n2));
+ }
+}
+
+// https://en.wikipedia.org/wiki/Student%27s_t-test#Dependent_t-test_for_paired_samples
+template
+ReturnType paired_samples_t_test_impl(ForwardIterator begin_1, ForwardIterator end_1, ForwardIterator begin_2, ForwardIterator end_2)
+{
+ using Real = typename std::tuple_element<0, ReturnType>::type;
+ using no_promote_policy = boost::math::policies::policy, boost::math::policies::promote_double>;
+ using std::sqrt;
+
+ std::vector delta;
+ ForwardIterator it_1 = begin_1;
+ ForwardIterator it_2 = begin_2;
+ std::size_t n = 0;
+ while(it_1 != end_1 && it_2 != end_2)
+ {
+ delta.emplace_back(static_cast(*it_1++) - static_cast(*it_2++));
+ ++n;
+ }
+
+ if(it_1 != end_1 || it_2 != end_2)
+ {
+ throw std::domain_error("Both sets must have the same number of values.");
+ }
+
+ std::pair temp = mean_and_sample_variance(delta.begin(), delta.end());
+ Real delta_mean = std::get<0>(temp);
+ Real delta_std_dev = sqrt(std::get<1>(temp));
+
+ Real test_statistic = delta_mean/(delta_std_dev/sqrt(n));
+
+ auto student = boost::math::students_t_distribution(n - 1);
+ Real pvalue;
+ if (test_statistic > 0)
+ {
+ pvalue = 2*boost::math::cdf(student, -test_statistic);;
+ }
+ else
+ {
+ pvalue = 2*boost::math::cdf(student, test_statistic);
+ }
+
+ return std::make_pair(test_statistic, pvalue);
+}
} // namespace detail
template::value, bool>::type = true>
@@ -89,5 +219,57 @@ inline auto one_sample_t_test(Container const & v, Real assumed_mean) -> std::pa
return detail::one_sample_t_test_impl>(std::begin(v), std::end(v), assumed_mean);
}
+template::value_type,
+ typename std::enable_if::value, bool>::type = true>
+inline auto two_sample_t_test(ForwardIterator begin_1, ForwardIterator end_1, ForwardIterator begin_2, ForwardIterator end_2) -> std::pair
+{
+ return detail::two_sample_t_test_impl>(begin_1, end_1, begin_2, end_2);
+}
+
+template::value_type,
+ typename std::enable_if::value, bool>::type = true>
+inline auto two_sample_t_test(ForwardIterator begin_1, ForwardIterator end_1, ForwardIterator begin_2, ForwardIterator end_2) -> std::pair
+{
+ return detail::two_sample_t_test_impl>(begin_1, end_1, begin_2, end_2);
+}
+
+template::value, bool>::type = true>
+inline auto two_sample_t_test(Container const & u, Container const & v) -> std::pair
+{
+ return detail::two_sample_t_test_impl>(std::begin(u), std::end(u), std::begin(v), std::end(v));
+}
+
+template::value, bool>::type = true>
+inline auto two_sample_t_test(Container const & u, Container const & v) -> std::pair
+{
+ return detail::two_sample_t_test_impl>(std::begin(u), std::end(u), std::begin(v), std::end(v));
+}
+
+template::value_type,
+ typename std::enable_if::value, bool>::type = true>
+inline auto paired_samples_t_test(ForwardIterator begin_1, ForwardIterator end_1, ForwardIterator begin_2, ForwardIterator end_2) -> std::pair
+{
+ return detail::paired_samples_t_test_impl>(begin_1, end_1, begin_2, end_2);
+}
+
+template::value_type,
+ typename std::enable_if::value, bool>::type = true>
+inline auto paired_samples_t_test(ForwardIterator begin_1, ForwardIterator end_1, ForwardIterator begin_2, ForwardIterator end_2) -> std::pair
+{
+ return detail::paired_samples_t_test_impl>(begin_1, end_1, begin_2, end_2);
+}
+
+template::value, bool>::type = true>
+inline auto paired_samples_t_test(Container const & u, Container const & v) -> std::pair
+{
+ return detail::paired_samples_t_test_impl>(std::begin(u), std::end(u), std::begin(v), std::end(v));
+}
+
+template::value, bool>::type = true>
+inline auto paired_samples_t_test(Container const & u, Container const & v) -> std::pair
+{
+ return detail::paired_samples_t_test_impl>(std::begin(u), std::end(u), std::begin(v), std::end(v));
+}
+
}}} // namespace boost::math::statistics
#endif
diff --git a/test/test_t_test.cpp b/test/test_t_test.cpp
index 8e3e36415..4fefcc5b6 100644
--- a/test/test_t_test.cpp
+++ b/test/test_t_test.cpp
@@ -11,6 +11,9 @@
#include
#include
#include
+#include
+
+using quad = boost::multiprecision::cpp_bin_float_quad;
template
void test_exact_mean()
@@ -34,6 +37,24 @@ void test_exact_mean()
CHECK_ULP_CLOSE(Real(1), computed_pvalue, 9);
}
+template
+void test_multiprecision_exact_mean()
+{
+ std::mt19937 gen{5125122};
+ std::normal_distribution dis{0,3};
+ std::vector v(1024);
+ for (auto & x : v) {
+ x = dis(gen);
+ }
+
+ Real mu = boost::math::statistics::mean(v);
+
+ auto [computed_statistic, computed_pvalue] = boost::math::statistics::one_sample_t_test(v, mu);
+
+ CHECK_MOLLIFIED_CLOSE(Real(0), computed_statistic, 10*std::numeric_limits::epsilon());
+ CHECK_ULP_CLOSE(Real(1), computed_pvalue, 9);
+}
+
template
void test_integer()
{
@@ -95,15 +116,117 @@ void test_agreement_with_mathematica()
}
}
+template
+void test_two_sample_t()
+{
+ auto [computed_statistic, computed_pvalue] =
+ boost::math::statistics::detail::two_sample_t_test_impl>(Real(10.0), Real(1.0), Real(20), Real(5.0), Real(0.25), Real(20));
+
+ CHECK_ULP_CLOSE(Real(20), computed_statistic, 5);
+ CHECK_MOLLIFIED_CLOSE(Real(0), computed_pvalue, 1e-21);
+
+ std::vector set_1 {301, 298, 295, 297, 304, 305, 309, 298, 291, 299, 293, 304};
+
+ auto [computed_statistic_2, computed_pvalue_2] = boost::math::statistics::two_sample_t_test(set_1, set_1);
+ CHECK_ULP_CLOSE(Real(0), computed_statistic_2, 5);
+ CHECK_ULP_CLOSE(Real(1), computed_pvalue_2, 5);
+}
+
+template
+void test_integer_two_sample_t()
+{
+ auto [computed_statistic, computed_pvalue] =
+ boost::math::statistics::detail::two_sample_t_test_impl>(Z(10), Z(4), Z(20), Z(5), Z(1), Z(20));
+
+ CHECK_ULP_CLOSE(10.0, computed_statistic, 5);
+
+ std::vector set_1 {301, 298, 295, 297, 304, 305, 309, 298, 291, 299, 293, 304};
+
+ auto [computed_statistic_2, computed_pvalue_2] = boost::math::statistics::two_sample_t_test(set_1, set_1);
+ CHECK_ULP_CLOSE(0.0, computed_statistic_2, 5);
+ CHECK_ULP_CLOSE(1.0, computed_pvalue_2, 5);
+}
+
+template
+void test_welch()
+{
+ using std::sqrt;
+
+ auto [computed_statistic, computed_pvalue] =
+ boost::math::statistics::detail::welchs_t_test_impl>(Real(10.0), Real(1.0), Real(20), Real(5.0), Real(0.25), Real(20));
+
+ CHECK_ULP_CLOSE(Real(20), computed_statistic, 5);
+ CHECK_MOLLIFIED_CLOSE(Real(0), computed_pvalue, 5e-18);
+
+ auto [computed_statistic_2, computed_pvalue_2] =
+ boost::math::statistics::detail::welchs_t_test_impl>(Real(10.0), Real(0.5), Real(20), Real(10.0), Real(0.5), Real(20));
+
+ CHECK_ULP_CLOSE(Real(0), computed_statistic_2, 5);
+ CHECK_ULP_CLOSE(Real(1), computed_pvalue_2, 5);
+}
+
+template
+void test_integer_welch()
+{
+ auto [computed_statistic, computed_pvalue] =
+ boost::math::statistics::detail::welchs_t_test_impl>(10.0, 4.0, 20.0, 5.0, 1.0, 20.0);
+
+ CHECK_ULP_CLOSE(Z(10), computed_statistic, 5);
+
+ auto [computed_statistic_2, computed_pvalue_2] =
+ boost::math::statistics::detail::welchs_t_test_impl>(10.0, 0.5, 20.0, 10.0, 0.5, 20.0);
+ CHECK_ULP_CLOSE(Z(0), computed_statistic_2, 5);
+ CHECK_ULP_CLOSE(Z(1), computed_pvalue_2, 5);
+}
+
+template
+void test_paired_samples()
+{
+ std::vector set_1 {2,4};
+ std::vector set_2 {1,2};
+
+ auto [computed_statistic, computed_pvalue] = boost::math::statistics::paired_samples_t_test(set_1, set_2);
+
+ CHECK_ULP_CLOSE(Real(3), computed_statistic, 5);
+}
int main()
{
test_agreement_with_mathematica();
+
test_exact_mean();
test_exact_mean();
+ test_multiprecision_exact_mean();
+
test_integer();
test_integer();
test_integer();
test_integer();
+
+ test_two_sample_t();
+ test_two_sample_t();
+ test_two_sample_t();
+
+ test_integer_two_sample_t();
+ test_integer_two_sample_t();
+ test_integer_two_sample_t();
+ test_integer_two_sample_t();
+
+ test_welch();
+ test_welch();
+ test_welch();
+
+ test_integer_welch();
+ test_integer_welch();
+ test_integer_welch();
+ test_integer_welch();
+
+ test_paired_samples();
+ test_paired_samples();
+ test_paired_samples();
+ test_paired_samples();
+ test_paired_samples();
+ test_paired_samples();
+
return boost::math::test::report_errors();
}