diff --git a/doc/statistics/bivariate_statistics.qbk b/doc/statistics/bivariate_statistics.qbk
index 8bb3aece0..8b5e6577d 100644
--- a/doc/statistics/bivariate_statistics.qbk
+++ b/doc/statistics/bivariate_statistics.qbk
@@ -1,5 +1,6 @@
 [/
   Copyright 2018 Nick Thompson
+  Copyright 2021 Matt Borland
 
   Distributed under the Boost Software License, Version 1.0.
   (See accompanying file LICENSE_1_0.txt or copy at
@@ -15,13 +16,22 @@
 
 namespace boost{ namespace math{ namespace statistics {
 
-    template<class Container>
+    template<typename ExecutionPolicy, typename Container>
+    auto covariance(ExecutionPolicy&& exec, Container const & u, Container const & v);
+
+    template<typename Container>
     auto covariance(Container const & u, Container const & v);
 
-    template<class Container>
+    template<typename ExecutionPolicy, typename Container>
+    auto means_and_covariance(ExecutionPolicy&& exec, Container const & u, Container const & v);
+
+    template<typename Container>
     auto means_and_covariance(Container const & u, Container const & v);
 
-    template<class Container>
+    template<typename ExecutionPolicy, typename Container>
+    auto correlation_coefficient(ExecutionPolicy&& exec, Container const & u, Container const & v);
+
+    template<typename Container>
     auto correlation_coefficient(Container const & u, Container const & v);
 
 }}}
@@ -30,6 +40,8 @@ namespace boost{ namespace math{ namespace statistics {
 [heading Description]
 
 This file provides functions for computing bivariate statistics.
+The functions are C++11 compatible, but require C++17 to use execution policies.
+If an execution policy is not passed to the function the default is std::execution::seq.
 
 [heading Covariance]
 
@@ -40,9 +52,12 @@ Computes the population covariance of two datasets:
     double cov_uv = boost::math::statistics::covariance(u, v);
 
 The implementation follows [@https://doi.org/10.1109/CLUSTR.2009.5289161 Bennet et al].
-The data is not modified. Requires a random-access container.
+The parallel implementation follows [@https://dl.acm.org/doi/10.1145/3221269.3223036 Schubert et al].
+The data is not modified.
 Works with real-valued inputs and does not work with complex-valued inputs.
 
+/Nota bene:/ If the input is an integer type the output will be a double precision type.
+
 The algorithm used herein simultaneously generates the mean values of the input data /u/ and /v/.
 For certain applications, it might be useful to get them in a single pass through the data.
 As such, we provide `means_and_covariance`:
@@ -60,7 +75,9 @@ Computes the [@https://en.wikipedia.org/wiki/Pearson_correlation_coefficient Pea
     double rho_uv = boost::math::statistics::correlation_coefficient(u, v);
     // rho_uv = 1.
 
-The data must be random access and cannot be complex.
+Works with real-valued inputs and does not work with complex-valued inputs.
+
+/Nota bene:/ If the input is an integer type the output will be a double precision type.
 
 If one or both of the datasets is constant, the correlation coefficient is an indeterminant form (0/0) and definitions must be introduced to assign it a value.
 We use the following: If both datasets are constant, then the correlation coefficient is 1.
@@ -70,6 +87,7 @@ If one dataset is constant, and the other is not, then the correlation coefficie
 [heading References]
 
 * Bennett, Janine, et al. ['Numerically stable, single-pass, parallel statistics algorithms.] Cluster Computing and Workshops, 2009. CLUSTER'09. IEEE International Conference on. IEEE, 2009.
+* Schubert, Erich; Gertz, Michael ['Numerically stable parallel computation of (co-)variance'] Proceedings of the 30th International Conference on Scientific and Statistical Database Management, 2018.
 
 [endsect]
 [/section:bivariate_statistics Bivariate Statistics]
diff --git a/include/boost/math/statistics/bivariate_statistics.hpp b/include/boost/math/statistics/bivariate_statistics.hpp
index 6841f833e..c8b29021b 100644
--- a/include/boost/math/statistics/bivariate_statistics.hpp
+++ b/include/boost/math/statistics/bivariate_statistics.hpp
@@ -10,72 +10,183 @@
 #include <iterator>
 #include <tuple>
 #include <type_traits>
+#include <stdexcept>
+#include <future>
+#include <thread>
+#include <vector>
+#include <algorithm>
 #include <cmath>
 #include <cstddef>
 #include <boost/assert.hpp>
 
+// Support compilers with P0024R2 implemented without linking TBB
+// https://en.cppreference.com/w/cpp/compiler_support
+#if (__cplusplus > 201700 || _MSVC_LANG > 201700) && (__GNUC__ > 9 || (__clang_major__ > 9 && defined __GLIBCXX__)  || _MSC_VER > 1927)
+#include <execution>
+#define EXEC_COMPATIBLE
+#endif
 
 namespace boost{ namespace math{ namespace statistics { namespace detail {
 
-template<class ReturnType, class Container>
-ReturnType means_and_covariance_impl(Container const & u, Container const & v)
+// See Equation III.9 of "Numerically Stable, Single-Pass, Parallel Statistics Algorithms", Bennet et al.
+template<typename ReturnType, typename ForwardIterator>
+ReturnType means_and_covariance_seq_impl(ForwardIterator u_begin, ForwardIterator u_end, ForwardIterator v_begin, ForwardIterator v_end)
 {
     using Real = typename std::tuple_element<0, ReturnType>::type;
 
-    BOOST_ASSERT_MSG(u.size() == v.size(), "The size of each vector must be the same to compute covariance.");
-    BOOST_ASSERT_MSG(u.size() > 0, "Computing covariance requires at least one sample.");
-
-    // See Equation III.9 of "Numerically Stable, Single-Pass, Parallel Statistics Algorithms", Bennet et al.
     Real cov = 0;
-    Real mu_u = u[0];
-    Real mu_v = v[0];
+    ForwardIterator u_it = u_begin;
+    ForwardIterator v_it = v_begin;
+    Real mu_u = *u_it++;
+    Real mu_v = *v_it++;
+    std::size_t i = 1;
 
-    for(std::size_t i = 1; i < u.size(); ++i)
+    while(u_it != u_end && v_it != v_end)
     {
-        Real u_tmp = (u[i] - mu_u)/(i+1);
-        Real v_tmp = v[i] - mu_v;
-        cov += i*u_tmp*v_tmp;
-        mu_u = mu_u + u_tmp;
-        mu_v = mu_v + v_tmp/(i+1);
+        Real u_temp = (*u_it++ - mu_u)/(i+1);
+        Real v_temp = *v_it++ - mu_v;
+        cov += i*u_temp*v_temp;
+        mu_u = mu_u + u_temp;
+        mu_v = mu_v + v_temp/(i+1);
+        i = i + 1;
     }
 
-    return std::make_tuple(mu_u, mu_v, cov/u.size());
+    if(u_it != u_end || v_it != v_end)
+    {
+        throw std::domain_error("The size of each sample set must be the same to compute covariance");
+    }
+
+    return std::make_tuple(mu_u, mu_v, cov/i, i);
 }
 
-template<class ReturnType, class Container>
-ReturnType correlation_coefficient_impl(Container const & u, Container const & v)
+// Numerically stable parallel computation of (co-)variance
+// https://dl.acm.org/doi/10.1145/3221269.3223036
+template<typename ReturnType, typename ForwardIterator>
+ReturnType means_and_covariance_parallel_impl(ForwardIterator u_begin, ForwardIterator u_end, ForwardIterator v_begin, ForwardIterator v_end)
 {
-    using Real = ReturnType;
+    using Real = typename std::tuple_element<0, ReturnType>::type;
+
+    const auto u_elements = std::distance(u_begin, u_end);
+    const auto v_elements = std::distance(v_begin, v_end);
+
+    if(u_elements != v_elements)
+    {
+        throw std::domain_error("The size of each sample set must be the same to compute covariance");
+    }
+
+    const unsigned max_concurrency = std::thread::hardware_concurrency() == 0 ? 2u : std::thread::hardware_concurrency();
+    unsigned num_threads = 2u;
+    
+    // 5.16 comes from benchmarking. See boost/math/reporting/performance/bivariate_statistics_performance.cpp
+    // Threading is faster for: 10 + 5.16e-3 N/j <= 5.16e-3N => N >= 10^4j/5.16(j-1).
+    const auto parallel_lower_bound = 10e4*max_concurrency/(5.16*(max_concurrency-1));
+    const auto parallel_upper_bound = 10e4*2/5.16; // j = 2
+
+    // https://lemire.me/blog/2020/01/30/cost-of-a-thread-in-c-under-linux/
+    if(u_elements < parallel_lower_bound)
+    {
+        return means_and_covariance_seq_impl<ReturnType>(u_begin, u_end, v_begin, v_end);
+    }
+    else if(u_elements >= parallel_upper_bound)
+    {
+        num_threads = max_concurrency;
+    }
+    else
+    {
+        for(unsigned i = 3; i < max_concurrency; ++i)
+        {
+            if(parallel_lower_bound < 10e4*i/(5.16*(i-1)))
+            {
+                num_threads = i;
+                break;
+            }
+        }
+    }
+
+    std::vector<std::future<ReturnType>> future_manager;
+    const auto elements_per_thread = std::ceil(static_cast<double>(u_elements)/num_threads);
+
+    ForwardIterator u_it = u_begin;
+    ForwardIterator v_it = v_begin;
+
+    for(std::size_t i = 0; i < num_threads - 1; ++i)
+    {
+        future_manager.emplace_back(std::async(std::launch::async | std::launch::deferred, [u_it, v_it, elements_per_thread]() -> ReturnType
+        {
+            return means_and_covariance_seq_impl<ReturnType>(u_it, std::next(u_it, elements_per_thread), v_it, std::next(v_it, elements_per_thread));
+        }));
+        u_it = std::next(u_it, elements_per_thread);
+        v_it = std::next(v_it, elements_per_thread);
+    }
+
+    future_manager.emplace_back(std::async(std::launch::async | std::launch::deferred, [u_it, u_end, v_it, v_end]() -> ReturnType
+    {
+        return means_and_covariance_seq_impl<ReturnType>(u_it, u_end, v_it, v_end);
+    }));
+
+    ReturnType temp = future_manager[0].get();
+    Real mu_u_a = std::get<0>(temp);
+    Real mu_v_a = std::get<1>(temp);
+    Real cov_a = std::get<2>(temp);
+    Real n_a = std::get<3>(temp);
+
+    for(std::size_t i = 1; i < future_manager.size(); ++i)
+    {
+        temp = future_manager[i].get();
+        Real mu_u_b = std::get<0>(temp);
+        Real mu_v_b = std::get<1>(temp);
+        Real cov_b = std::get<2>(temp);
+        Real n_b = std::get<3>(temp);
+
+        const Real n_ab = n_a + n_b;
+        const Real delta_u = mu_u_b - mu_u_a;
+        const Real delta_v = mu_v_b - mu_v_a;
+
+        cov_a = cov_a + cov_b + (-delta_u)*(-delta_v)*((n_a*n_b)/n_ab);
+        mu_u_a = mu_u_a + delta_u*(n_b/n_ab);
+        mu_v_a = mu_v_a + delta_v*(n_b/n_ab);
+        n_a = n_ab;
+    }
+
+    return std::make_tuple(mu_u_a, mu_v_a, cov_a, n_a);
+}
+
+template<typename ReturnType, typename ForwardIterator>
+ReturnType correlation_coefficient_seq_impl(ForwardIterator u_begin, ForwardIterator u_end, ForwardIterator v_begin, ForwardIterator v_end)
+{
+    using Real = typename std::tuple_element<0, ReturnType>::type;
     using std::sqrt;
-    BOOST_ASSERT_MSG(u.size() == v.size(), "The size of each vector must be the same to compute covariance.");
-    BOOST_ASSERT_MSG(u.size() > 0, "Computing covariance requires at least two samples.");
 
     Real cov = 0;
-    Real mu_u = u[0];
-    Real mu_v = v[0];
+    ForwardIterator u_it = u_begin;
+    ForwardIterator v_it = v_begin;
+    Real mu_u = *u_it++;
+    Real mu_v = *v_it++;
     Real Qu = 0;
     Real Qv = 0;
+    std::size_t i = 1;
 
-    for(std::size_t i = 1; i < u.size(); ++i)
+    while(u_it != u_end && v_it != v_end)
     {
-        Real u_tmp = u[i] - mu_u;
-        Real v_tmp = v[i] - mu_v;
+        Real u_tmp = *u_it++ - mu_u;
+        Real v_tmp = *v_it++ - mu_v;
         Qu = Qu + (i*u_tmp*u_tmp)/(i+1);
         Qv = Qv + (i*v_tmp*v_tmp)/(i+1);
         cov += i*u_tmp*v_tmp/(i+1);
         mu_u = mu_u + u_tmp/(i+1);
         mu_v = mu_v + v_tmp/(i+1);
+        ++i;
     }
 
     // If both datasets are constant, then they are perfectly correlated.
     if (Qu == 0 && Qv == 0)
     {
-        return Real(1);
+        return std::make_tuple(mu_u, Qu, mu_v, Qv, cov, Real(1), i);
     }
     // If one dataset is constant and the other isn't, then they have no correlation:
     if (Qu == 0 || Qv == 0)
     {
-        return Real(0);
+        return std::make_tuple(mu_u, Qu, mu_v, Qv, cov, Real(0), i);
     }
 
     // Make sure rho in [-1, 1], even in the presence of numerical noise.
@@ -86,47 +197,273 @@ ReturnType correlation_coefficient_impl(Container const & u, Container const & v
     if (rho < -1) {
         rho = -1;
     }
-    return rho;
+
+    return std::make_tuple(mu_u, Qu, mu_v, Qv, cov, rho, i);
 }
+
+// Numerically stable parallel computation of (co-)variance:
+// https://dl.acm.org/doi/10.1145/3221269.3223036
+//
+// Parallel computation of variance:
+// http://i.stanford.edu/pub/cstr/reports/cs/tr/79/773/CS-TR-79-773.pdf
+template<typename ReturnType, typename ForwardIterator>
+ReturnType correlation_coefficient_parallel_impl(ForwardIterator u_begin, ForwardIterator u_end, ForwardIterator v_begin, ForwardIterator v_end)
+{
+    using Real = typename std::tuple_element<0, ReturnType>::type;
+
+    const auto u_elements = std::distance(u_begin, u_end);
+    const auto v_elements = std::distance(v_begin, v_end);
+
+    if(u_elements != v_elements)
+    {
+        throw std::domain_error("The size of each sample set must be the same to compute covariance");
+    }
+
+    const unsigned max_concurrency = std::thread::hardware_concurrency() == 0 ? 2u : std::thread::hardware_concurrency();
+    unsigned num_threads = 2u;
+    
+    // 3.25 comes from benchmarking. See boost/math/reporting/performance/bivariate_statistics_performance.cpp
+    // Threading is faster for: 10 + 3.25e-3 N/j <= 3.25e-3N => N >= 10^4j/3.25(j-1).
+    const auto parallel_lower_bound = 10e4*max_concurrency/(3.25*(max_concurrency-1));
+    const auto parallel_upper_bound = 10e4*2/3.25; // j = 2
+
+    // https://lemire.me/blog/2020/01/30/cost-of-a-thread-in-c-under-linux/
+    if(u_elements < parallel_lower_bound)
+    {
+        return correlation_coefficient_seq_impl<ReturnType>(u_begin, u_end, v_begin, v_end);
+    }
+    else if(u_elements >= parallel_upper_bound)
+    {
+        num_threads = max_concurrency;
+    }
+    else
+    {
+        for(unsigned i = 3; i < max_concurrency; ++i)
+        {
+            if(parallel_lower_bound < 10e4*i/(3.25*(i-1)))
+            {
+                num_threads = i;
+                break;
+            }
+        }
+    }
+
+    std::vector<std::future<ReturnType>> future_manager;
+    const auto elements_per_thread = std::ceil(static_cast<double>(u_elements)/num_threads);
+
+    ForwardIterator u_it = u_begin;
+    ForwardIterator v_it = v_begin;
+
+    for(std::size_t i = 0; i < num_threads - 1; ++i)
+    {
+        future_manager.emplace_back(std::async(std::launch::async | std::launch::deferred, [u_it, v_it, elements_per_thread]() -> ReturnType
+        {
+            return correlation_coefficient_seq_impl<ReturnType>(u_it, std::next(u_it, elements_per_thread), v_it, std::next(v_it, elements_per_thread));
+        }));
+        u_it = std::next(u_it, elements_per_thread);
+        v_it = std::next(v_it, elements_per_thread);
+    }
+
+    future_manager.emplace_back(std::async(std::launch::async | std::launch::deferred, [u_it, u_end, v_it, v_end]() -> ReturnType
+    {
+        return correlation_coefficient_seq_impl<ReturnType>(u_it, u_end, v_it, v_end);
+    }));
+
+    ReturnType temp = future_manager[0].get();
+    Real mu_u_a = std::get<0>(temp);
+    Real Qu_a = std::get<1>(temp);
+    Real mu_v_a = std::get<2>(temp);
+    Real Qv_a = std::get<3>(temp);
+    Real cov_a = std::get<4>(temp);
+    Real n_a = std::get<6>(temp);
+
+    for(std::size_t i = 1; i < future_manager.size(); ++i)
+    {
+        temp = future_manager[i].get();
+        Real mu_u_b = std::get<0>(temp);
+        Real Qu_b = std::get<1>(temp);
+        Real mu_v_b = std::get<2>(temp);
+        Real Qv_b = std::get<3>(temp);
+        Real cov_b = std::get<4>(temp);
+        Real n_b = std::get<6>(temp);
+
+        const Real n_ab = n_a + n_b;
+        const Real delta_u = mu_u_b - mu_u_a;
+        const Real delta_v = mu_v_b - mu_v_a;
+
+        cov_a = cov_a + cov_b + (-delta_u)*(-delta_v)*((n_a*n_b)/n_ab);
+        mu_u_a = mu_u_a + delta_u*(n_b/n_ab);
+        mu_v_a = mu_v_a + delta_v*(n_b/n_ab);
+        Qu_a = Qu_a + Qu_b + delta_u*delta_u*((n_a*n_b)/n_ab);
+        Qv_b = Qv_a + Qv_b + delta_v*delta_v*((n_a*n_b)/n_ab);
+        n_a = n_ab;
+    }
+
+    // If both datasets are constant, then they are perfectly correlated.
+    if (Qu_a == 0 && Qv_a == 0)
+    {
+        return std::make_tuple(mu_u_a, Qu_a, mu_v_a, Qv_a, cov_a, Real(1), n_a);
+    }
+    // If one dataset is constant and the other isn't, then they have no correlation:
+    if (Qu_a == 0 || Qv_a == 0)
+    {
+        return std::make_tuple(mu_u_a, Qu_a, mu_v_a, Qv_a, cov_a, Real(0), n_a);
+    }
+
+    // Make sure rho in [-1, 1], even in the presence of numerical noise.
+    Real rho = cov_a/sqrt(Qu_a*Qv_a);
+    if (rho > 1) {
+        rho = 1;
+    }
+    if (rho < -1) {
+        rho = -1;
+    }
+
+    return std::make_tuple(mu_u_a, Qu_a, mu_v_a, Qv_a, cov_a, rho, n_a);
+}
+
 } // namespace detail
 
+#ifdef EXEC_COMPATIBLE
+
+template<typename ExecutionPolicy, typename Container, typename Real = typename Container::value_type>
+inline auto means_and_covariance(ExecutionPolicy&& exec, Container const & u, Container const & v)
+{
+    if constexpr (std::is_same_v<std::remove_reference_t<decltype(exec)>, decltype(std::execution::seq)>)
+    {
+        if constexpr (std::is_integral_v<Real>)
+        {
+            using ReturnType = std::tuple<double, double, double, double>;
+            ReturnType temp = detail::means_and_covariance_seq_impl<ReturnType>(std::begin(u), std::end(u), std::begin(v), std::end(v));
+            return std::make_tuple(std::get<0>(temp), std::get<1>(temp), std::get<2>(temp));
+        }
+        else
+        {
+            using ReturnType = std::tuple<Real, Real, Real, Real>;
+            ReturnType temp = detail::means_and_covariance_seq_impl<ReturnType>(std::begin(u), std::end(u), std::begin(v), std::end(v));
+            return std::make_tuple(std::get<0>(temp), std::get<1>(temp), std::get<2>(temp));
+        }
+    }
+    else
+    {
+        if constexpr (std::is_integral_v<Real>)
+        {
+            using ReturnType = std::tuple<double, double, double, double>;
+            ReturnType temp = detail::means_and_covariance_parallel_impl<ReturnType>(std::begin(u), std::end(u), std::begin(v), std::end(v));
+            return std::make_tuple(std::get<0>(temp), std::get<1>(temp), std::get<2>(temp));
+        }
+        else
+        {
+            using ReturnType = std::tuple<Real, Real, Real, Real>;
+            ReturnType temp = detail::means_and_covariance_parallel_impl<ReturnType>(std::begin(u), std::end(u), std::begin(v), std::end(v));
+            return std::make_tuple(std::get<0>(temp), std::get<1>(temp), std::get<2>(temp));
+        }
+    }
+}
+
+template<typename Container>
+inline auto means_and_covariance(Container const & u, Container const & v)
+{
+    return means_and_covariance(std::execution::seq, u, v);
+}
+
+template<typename ExecutionPolicy, typename Container>
+inline auto covariance(ExecutionPolicy&& exec, Container const & u, Container const & v)
+{
+    return std::get<2>(means_and_covariance(exec, u, v));
+}
+
+template<typename Container>
+inline auto covariance(Container const & u, Container const & v)
+{
+    return covariance(std::execution::seq, u, v);
+}
+
+template<typename ExecutionPolicy, typename Container, typename Real = typename Container::value_type>
+inline auto correlation_coefficient(ExecutionPolicy&& exec, Container const & u, Container const & v)
+{
+    if constexpr (std::is_same_v<std::remove_reference_t<decltype(exec)>, decltype(std::execution::seq)>)
+    {
+        if constexpr (std::is_integral_v<Real>)
+        {
+            using ReturnType = std::tuple<double, double, double, double, double, double, double>;
+            return std::get<5>(detail::correlation_coefficient_seq_impl<ReturnType>(std::begin(u), std::end(u), std::begin(v), std::end(v)));
+        }
+        else
+        {
+            using ReturnType = std::tuple<Real, Real, Real, Real, Real, Real, Real>;
+            return std::get<5>(detail::correlation_coefficient_seq_impl<ReturnType>(std::begin(u), std::end(u), std::begin(v), std::end(v)));
+        }
+    }
+    else
+    {
+        if constexpr (std::is_integral_v<Real>)
+        {
+            using ReturnType = std::tuple<double, double, double, double, double, double, double>;
+            return std::get<5>(detail::correlation_coefficient_parallel_impl<ReturnType>(std::begin(u), std::end(u), std::begin(v), std::end(v)));
+        }
+        else
+        {
+            using ReturnType = std::tuple<Real, Real, Real, Real, Real, Real, Real>;
+            return std::get<5>(detail::correlation_coefficient_parallel_impl<ReturnType>(std::begin(u), std::end(u), std::begin(v), std::end(v)));
+        }
+    }
+}
+
+template<typename Container, typename Real = typename Container::value_type>
+inline auto correlation_coefficient(Container const & u, Container const & v)
+{
+    return correlation_coefficient(std::execution::seq, u, v);
+}
+
+#else // C++11 bindings
+
 template<typename Container, typename Real = typename Container::value_type, typename std::enable_if<std::is_integral<Real>::value, bool>::type = true>
 inline auto means_and_covariance(Container const & u, Container const & v) -> std::tuple<double, double, double>
 {
-    return detail::means_and_covariance_impl<std::tuple<double, double, double>>(u, v);
+    using ReturnType = std::tuple<double, double, double, double>;
+    ReturnType temp = detail::means_and_covariance_seq_impl<ReturnType>(std::begin(u), std::end(u), std::begin(v), std::end(v));
+    return std::make_tuple(std::get<0>(temp), std::get<1>(temp), std::get<2>(temp));
 }
 
 template<typename Container, typename Real = typename Container::value_type, typename std::enable_if<!std::is_integral<Real>::value, bool>::type = true>
 inline auto means_and_covariance(Container const & u, Container const & v) -> std::tuple<Real, Real, Real>
 {
-    return detail::means_and_covariance_impl<std::tuple<Real, Real, Real>>(u, v);
+    using ReturnType = std::tuple<Real, Real, Real, Real>;
+    ReturnType temp = detail::means_and_covariance_seq_impl<ReturnType>(std::begin(u), std::end(u), std::begin(v), std::end(v));
+    return std::make_tuple(std::get<0>(temp), std::get<1>(temp), std::get<2>(temp));
 }
 
 template<typename Container, typename Real = typename Container::value_type, typename std::enable_if<std::is_integral<Real>::value, bool>::type = true>
 inline double covariance(Container const & u, Container const & v)
 {
-    std::tuple<double, double, double> temp = detail::means_and_covariance_impl<std::tuple<double, double, double>>(u, v);
-    return std::get<2>(temp);
+    using ReturnType = std::tuple<double, double, double, double>;
+    return std::get<2>(detail::means_and_covariance_seq_impl<ReturnType>(std::begin(u), std::end(u), std::begin(v), std::end(v)));
 }
 
 template<typename Container, typename Real = typename Container::value_type, typename std::enable_if<!std::is_integral<Real>::value, bool>::type = true>
 inline Real covariance(Container const & u, Container const & v)
 {
-    std::tuple<Real, Real, Real> temp = detail::means_and_covariance_impl<std::tuple<Real, Real, Real>>(u, v);
-    return std::get<2>(temp);
+    using ReturnType = std::tuple<Real, Real, Real, Real>;
+    return std::get<2>(detail::means_and_covariance_seq_impl<ReturnType>(std::begin(u), std::end(u), std::begin(v), std::end(v)));
 }
 
 template<typename Container, typename Real = typename Container::value_type, typename std::enable_if<std::is_integral<Real>::value, bool>::type = true>
 inline double correlation_coefficient(Container const & u, Container const & v)
 {
-    return detail::correlation_coefficient_impl<double>(u, v);
+    using ReturnType = std::tuple<double, double, double, double, double, double, double>;
+    return std::get<5>(detail::correlation_coefficient_seq_impl<ReturnType>(std::begin(u), std::end(u), std::begin(v), std::end(v)));
 }
 
 template<typename Container, typename Real = typename Container::value_type, typename std::enable_if<!std::is_integral<Real>::value, bool>::type = true>
 inline Real correlation_coefficient(Container const & u, Container const & v)
 {
-    return detail::correlation_coefficient_impl<Real>(u, v);
+    using ReturnType = std::tuple<Real, Real, Real, Real, Real, Real, Real>;
+    return std::get<5>(detail::correlation_coefficient_seq_impl<ReturnType>(std::begin(u), std::end(u), std::begin(v), std::end(v)));
 }
 
-}}} // namespace boost::math::statistics
+#endif
+
+}}} // namespace boost::math::statistics
+
 #endif
diff --git a/include/boost/math/tools/random_vector.hpp b/include/boost/math/tools/random_vector.hpp
new file mode 100644
index 000000000..f528bdd85
--- /dev/null
+++ b/include/boost/math/tools/random_vector.hpp
@@ -0,0 +1,124 @@
+//  (C) Copyright Nick Thompson 2018.
+//  (C) Copyright Matt Borland 2021.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <vector>
+#include <random>
+#include <type_traits>
+#include <cstddef>
+#include <boost/multiprecision/cpp_bin_float.hpp>
+#include <boost/multiprecision/cpp_complex.hpp>
+
+using boost::multiprecision::cpp_bin_float_50;
+using boost::multiprecision::cpp_complex_50;
+
+namespace boost { namespace math {
+
+// To stress test, set global_seed = 0, global_size = huge.
+static const std::size_t global_seed = 0;
+static const std::size_t global_size = 128;
+
+template<typename T, typename std::enable_if<std::is_floating_point<T>::value, bool>::type = true>
+std::vector<T> generate_random_vector(std::size_t size, std::size_t seed)
+{
+    if (seed == 0)
+    {
+        std::random_device rd;
+        seed = rd();
+    }
+    std::vector<T> v(size);
+
+    std::mt19937 gen(seed);
+
+    std::normal_distribution<T> dis(0, 1);
+    for(std::size_t i = 0; i < v.size(); ++i)
+    {
+        v[i] = dis(gen);
+    }
+    return v;
+}
+
+template<typename T, typename std::enable_if<std::is_integral<T>::value, bool>::type = true>
+std::vector<T> generate_random_vector(std::size_t size, std::size_t seed)
+{
+    if (seed == 0)
+    {
+        std::random_device rd;
+        seed = rd();
+    }
+    std::vector<T> v(size);
+
+    std::mt19937 gen(seed);
+
+    // Rescaling by larger than 2 is UB!
+    std::uniform_int_distribution<T> dis(std::numeric_limits<T>::lowest()/2, (std::numeric_limits<T>::max)()/2);
+    for (std::size_t i = 0; i < v.size(); ++i)
+    {
+        v[i] = dis(gen);
+    }
+    return v;
+}
+
+template<typename T, typename std::enable_if<boost::is_complex<T>::value, bool>::type = true>
+std::vector<T> generate_random_vector(std::size_t size, std::size_t seed)
+{
+    if (seed == 0)
+    {
+        std::random_device rd;
+        seed = rd();
+    }
+    std::vector<T> v(size);
+
+    std::mt19937 gen(seed);
+    
+    std::normal_distribution<typename T::value_type> dis(0, 1);
+    for (std::size_t i = 0; i < v.size(); ++i)
+    {
+        v[i] = {dis(gen), dis(gen)};
+    }
+    return v;  
+}
+
+template<typename T, typename std::enable_if<std::is_same<T, cpp_complex_50>::value , bool>::type = true>
+std::vector<T> generate_random_vector(std::size_t size, std::size_t seed)
+{
+    if (seed == 0)
+    {
+        std::random_device rd;
+        seed = rd();
+    }
+    std::vector<T> v(size);
+
+    std::mt19937 gen(seed);
+    
+    std::normal_distribution<long double> dis(0, 1);
+    for (std::size_t i = 0; i < v.size(); ++i)
+    {
+        v[i] = {dis(gen), dis(gen)};
+    }
+    return v;
+}
+
+template<typename T, typename std::enable_if<std::is_same<T, cpp_bin_float_50>::value , bool>::type = true>
+std::vector<T> generate_random_vector(std::size_t size, std::size_t seed)
+{
+    if (seed == 0)
+    {
+        std::random_device rd;
+        seed = rd();
+    }
+    std::vector<T> v(size);
+
+    std::mt19937 gen(seed);
+
+    std::normal_distribution<long double> dis(0, 1);
+    for (std::size_t i = 0; i < v.size(); ++i)
+    {
+        v[i] = dis(gen);
+    }
+    return v;
+}
+
+}} // Namesapces
diff --git a/reporting/performance/bivariate_statistics_performance.cpp b/reporting/performance/bivariate_statistics_performance.cpp
new file mode 100644
index 000000000..a0e73b099
--- /dev/null
+++ b/reporting/performance/bivariate_statistics_performance.cpp
@@ -0,0 +1,79 @@
+//  (C) Copyright Matt Borland 2021.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <vector>
+#include <boost/math/tools/random_vector.hpp>
+#include <boost/math/statistics/bivariate_statistics.hpp>
+#include <benchmark/benchmark.h>
+
+using boost::math::generate_random_vector;
+
+template<typename T>
+void seq_covariance(benchmark::State& state)
+{
+    constexpr std::size_t seed {};
+    const std::size_t size = state.range(0);
+    std::vector<T> u = generate_random_vector<T>(size, seed);
+    std::vector<T> v = generate_random_vector<T>(size, seed);
+
+    for(auto _ : state)
+    {
+        benchmark::DoNotOptimize(boost::math::statistics::covariance(std::execution::seq, u, v));
+    }
+    state.SetComplexityN(state.range(0));
+}
+
+template<typename T>
+void par_covariance(benchmark::State& state)
+{
+    constexpr std::size_t seed {};
+    const std::size_t size = state.range(0);
+    std::vector<T> u = generate_random_vector<T>(size, seed);
+    std::vector<T> v = generate_random_vector<T>(size, seed);
+
+    for(auto _ : state)
+    {
+        benchmark::DoNotOptimize(boost::math::statistics::covariance(std::execution::par, u, v));
+    }
+    state.SetComplexityN(state.range(0));
+}
+
+template<typename T>
+void seq_correlation(benchmark::State& state)
+{
+    constexpr std::size_t seed {};
+    const std::size_t size = state.range(0);
+    std::vector<T> u = generate_random_vector<T>(size, seed);
+    std::vector<T> v = generate_random_vector<T>(size, seed);
+
+    for(auto _ : state)
+    {
+        benchmark::DoNotOptimize(boost::math::statistics::correlation_coefficient(std::execution::seq, u, v));
+    }
+    state.SetComplexityN(state.range(0));
+}
+
+template<typename T>
+void par_correlation(benchmark::State& state)
+{
+    constexpr std::size_t seed {};
+    const std::size_t size = state.range(0);
+    std::vector<T> u = generate_random_vector<T>(size, seed);
+    std::vector<T> v = generate_random_vector<T>(size, seed);
+
+    for(auto _ : state)
+    {
+        benchmark::DoNotOptimize(boost::math::statistics::correlation_coefficient(std::execution::par, u, v));
+    }
+    state.SetComplexityN(state.range(0));
+}
+
+BENCHMARK_TEMPLATE(seq_covariance, double)->RangeMultiplier(2)->Range(1 << 6, 1 << 20)->Complexity(benchmark::oN)->UseRealTime();
+BENCHMARK_TEMPLATE(par_covariance, double)->RangeMultiplier(2)->Range(1 << 6, 1 << 20)->Complexity(benchmark::oN)->UseRealTime();
+
+BENCHMARK_TEMPLATE(seq_correlation, double)->RangeMultiplier(2)->Range(1 << 6, 1 << 20)->Complexity(benchmark::oN)->UseRealTime();
+BENCHMARK_TEMPLATE(par_correlation, double)->RangeMultiplier(2)->Range(1 << 6, 1 << 20)->Complexity(benchmark::oN)->UseRealTime();
+
+BENCHMARK_MAIN();
diff --git a/test/bivariate_statistics_test.cpp b/test/bivariate_statistics_test.cpp
index 19acb937e..ff064401e 100644
--- a/test/bivariate_statistics_test.cpp
+++ b/test/bivariate_statistics_test.cpp
@@ -38,7 +38,314 @@ using boost::multiprecision::cpp_complex_50;
 using  boost::math::statistics::means_and_covariance;
 using  boost::math::statistics::covariance;
 
-template<class Real>
+#if (__cplusplus > 201700 || _MSVC_LANG > 201700) && (__GNUC__ > 9 || (__clang_major__ > 9 && defined __GLIBCXX__)  || _MSC_VER > 1927)
+#include <execution>
+
+template<typename Real, typename ExecutionPolicy>
+void test_covariance(ExecutionPolicy&& exec)
+{
+    std::cout << std::setprecision(std::numeric_limits<Real>::digits10+1);
+    Real tol = std::numeric_limits<Real>::epsilon();
+    using std::abs;
+
+    // Covariance of a single thing is zero:
+    std::array<Real, 1> u1{8};
+    std::array<Real, 1> v1{17};
+    std::tuple<Real, Real, Real> temp = means_and_covariance(exec, u1, v1);
+    Real mu_u1 = std::get<0>(temp);
+    Real mu_v1 = std::get<1>(temp);
+    Real cov1 = std::get<2>(temp);
+
+    BOOST_TEST(abs(cov1) < tol);
+    BOOST_TEST(abs(mu_u1 - 8) < tol);
+    BOOST_TEST(abs(mu_v1 - 17) < tol);
+
+
+    std::array<Real, 2> u2{8, 4};
+    std::array<Real, 2> v2{3, 7};
+    temp = means_and_covariance(exec, u2, v2);
+    Real mu_u2 = std::get<0>(temp);
+    Real mu_v2 = std::get<1>(temp);
+    Real cov2 = std::get<2>(temp);
+
+    BOOST_TEST(abs(cov2+4) < tol);
+    BOOST_TEST(abs(mu_u2 - 6) < tol);
+    BOOST_TEST(abs(mu_v2 - 5) < tol);
+
+    std::vector<Real> u3{1,2,3};
+    std::vector<Real> v3{1,1,1};
+
+    temp = means_and_covariance(exec, u3, v3);
+    Real mu_u3 = std::get<0>(temp);
+    Real mu_v3 = std::get<1>(temp);
+    Real cov3 = std::get<2>(temp);
+
+    // Since v is constant, covariance(u,v) = 0 against everything any u:
+    BOOST_TEST(abs(cov3) < tol);
+    BOOST_TEST(abs(mu_u3 - 2) < tol);
+    BOOST_TEST(abs(mu_v3 - 1) < tol);
+    // Make sure we pull the correct symbol out of means_and_covariance:
+    cov3 = covariance(exec, u3, v3);
+    BOOST_TEST(abs(cov3) < tol);
+
+    cov3 = covariance(exec, v3, u3);
+    // Covariance is symmetric: cov(u,v) = cov(v,u)
+    BOOST_TEST(abs(cov3) < tol);
+
+    // cov(u,u) = sigma(u)^2:
+    cov3 = covariance(exec, u3, u3);
+    Real expected = Real(2)/Real(3);
+
+    BOOST_TEST(abs(cov3 - expected) < tol);
+
+    std::mt19937 gen(15);
+    // Can't template standard library on multiprecision, so use double and cast back:
+    std::uniform_real_distribution<double> dis(-1.0, 1.0);
+    std::vector<Real> u(500);
+    std::vector<Real> v(500);
+    for(size_t i = 0; i < u.size(); ++i)
+    {
+        u[i] = (Real) dis(gen);
+        v[i] = (Real) dis(gen);
+    }
+
+    Real mu_u = boost::math::statistics::mean(u);
+    Real mu_v = boost::math::statistics::mean(v);
+    Real sigma_u_sq = boost::math::statistics::variance(u);
+    Real sigma_v_sq = boost::math::statistics::variance(v);
+
+    temp = means_and_covariance(exec, u, v);
+    Real mu_u_ = std::get<0>(temp);
+    Real mu_v_ = std::get<1>(temp);
+    Real cov_uv = std::get<2>(temp);
+
+    BOOST_TEST(abs(mu_u - mu_u_) < tol);
+    BOOST_TEST(abs(mu_v - mu_v_) < tol);
+
+    // Cauchy-Schwartz inequality:
+    BOOST_TEST(cov_uv*cov_uv <= sigma_u_sq*sigma_v_sq);
+    // cov(X, X) = sigma(X)^2:
+    Real cov_uu = covariance(exec, u, u);
+    BOOST_TEST(abs(cov_uu - sigma_u_sq) < tol);
+    Real cov_vv = covariance(exec, v, v);
+    BOOST_TEST(abs(cov_vv - sigma_v_sq) < tol);
+}
+
+template<typename Z, typename ExecutionPolicy>
+void test_integer_covariance(ExecutionPolicy&& exec)
+{
+    std::cout << std::setprecision(std::numeric_limits<double>::digits10+1);
+    double tol = std::numeric_limits<double>::epsilon();
+    using std::abs;
+
+    // Covariance of a single thing is zero:
+    std::array<Z, 1> u1{8};
+    std::array<Z, 1> v1{17};
+    std::tuple<double, double, double> temp = means_and_covariance(exec, u1, v1);
+    double mu_u1 = std::get<0>(temp);
+    double mu_v1 = std::get<1>(temp);
+    double cov1 = std::get<2>(temp);
+
+    BOOST_TEST(abs(cov1) < tol);
+    BOOST_TEST(abs(mu_u1 - 8) < tol);
+    BOOST_TEST(abs(mu_v1 - 17) < tol);
+
+
+    std::array<Z, 2> u2{8, 4};
+    std::array<Z, 2> v2{3, 7};
+    temp = means_and_covariance(exec, u2, v2);
+    double mu_u2 = std::get<0>(temp);
+    double mu_v2 = std::get<1>(temp);
+    double cov2 = std::get<2>(temp);
+
+    BOOST_TEST(abs(cov2+4) < tol);
+    BOOST_TEST(abs(mu_u2 - 6) < tol);
+    BOOST_TEST(abs(mu_v2 - 5) < tol);
+
+    std::vector<Z> u3{1,2,3};
+    std::vector<Z> v3{1,1,1};
+
+    temp = means_and_covariance(exec, u3, v3);
+    double mu_u3 = std::get<0>(temp);
+    double mu_v3 = std::get<1>(temp);
+    double cov3 = std::get<2>(temp);
+
+    // Since v is constant, covariance(u,v) = 0 against everything any u:
+    BOOST_TEST(abs(cov3) < tol);
+    BOOST_TEST(abs(mu_u3 - 2) < tol);
+    BOOST_TEST(abs(mu_v3 - 1) < tol);
+    // Make sure we pull the correct symbol out of means_and_covariance:
+    cov3 = covariance(exec, u3, v3);
+    BOOST_TEST(abs(cov3) < tol);
+
+    cov3 = covariance(exec, v3, u3);
+    // Covariance is symmetric: cov(u,v) = cov(v,u)
+    BOOST_TEST(abs(cov3) < tol);
+
+    // cov(u,u) = sigma(u)^2:
+    cov3 = covariance(exec, u3, u3);
+    double expected = double(2)/double(3);
+
+    BOOST_TEST(abs(cov3 - expected) < tol);
+
+    std::mt19937 gen(15);
+    // Can't template standard library on multiprecision, so use double and cast back:
+    std::uniform_real_distribution<double> dis(-1.0, 1.0);
+    std::vector<Z> u(500);
+    std::vector<Z> v(500);
+    for(size_t i = 0; i < u.size(); ++i)
+    {
+        u[i] = (Z) dis(gen);
+        v[i] = (Z) dis(gen);
+    }
+
+    double mu_u = boost::math::statistics::mean(u);
+    double mu_v = boost::math::statistics::mean(v);
+    double sigma_u_sq = boost::math::statistics::variance(u);
+    double sigma_v_sq = boost::math::statistics::variance(v);
+
+    temp = means_and_covariance(exec, u, v);
+    double mu_u_ = std::get<0>(temp);
+    double mu_v_ = std::get<1>(temp);
+    double cov_uv = std::get<2>(temp);
+
+    BOOST_TEST(abs(mu_u - mu_u_) < tol);
+    BOOST_TEST(abs(mu_v - mu_v_) < tol);
+
+    // Cauchy-Schwartz inequality:
+    BOOST_TEST(cov_uv*cov_uv <= sigma_u_sq*sigma_v_sq);
+    // cov(X, X) = sigma(X)^2:
+    double cov_uu = covariance(exec, u, u);
+    BOOST_TEST(abs(cov_uu - sigma_u_sq) < tol);
+    double cov_vv = covariance(exec, v, v);
+    BOOST_TEST(abs(cov_vv - sigma_v_sq) < tol);
+}
+
+template<typename Real, typename ExecutionPolicy>
+void test_correlation_coefficient(ExecutionPolicy&& exec)
+{
+    using boost::math::statistics::correlation_coefficient;
+    using std::abs;
+    using std::sqrt;
+
+    Real tol = std::numeric_limits<Real>::epsilon();
+    std::vector<Real> u{1};
+    std::vector<Real> v{1};
+    Real rho_uv = correlation_coefficient(exec, u, v);
+    BOOST_TEST(abs(rho_uv - 1) < tol);
+
+    u = {1,1};
+    v = {1,1};
+    rho_uv = correlation_coefficient(exec, u, v);
+    BOOST_TEST(abs(rho_uv - 1) < tol);
+
+    u = {1, 2, 3};
+    v = {1, 2, 3};
+    rho_uv = correlation_coefficient(exec, u, v);
+    BOOST_TEST(abs(rho_uv - 1) < tol);
+
+    u = {1, 2, 3};
+    v = {-1, -2, -3};
+    rho_uv = correlation_coefficient(exec, u, v);
+    BOOST_TEST(abs(rho_uv + 1) < tol);
+
+    rho_uv = correlation_coefficient(exec, v, u);
+    BOOST_TEST(abs(rho_uv + 1) < tol);
+
+    u = {1, 2, 3};
+    v = {0, 0, 0};
+    rho_uv = correlation_coefficient(exec, v, u);
+    BOOST_TEST(abs(rho_uv) < tol);
+
+    u = {1, 2, 3};
+    v = {0, 0, 3};
+    rho_uv = correlation_coefficient(exec, v, u);
+    // mu_u = 2, sigma_u^2 = 2/3, mu_v = 1, sigma_v^2 = 2, cov(u,v) = 1.
+    BOOST_TEST(abs(rho_uv - sqrt(Real(3))/Real(2)) < tol);
+}
+
+template<typename Z, typename ExecutionPolicy>
+void test_integer_correlation_coefficient(ExecutionPolicy&& exec)
+{
+    using boost::math::statistics::correlation_coefficient;
+    using std::abs;
+    using std::sqrt;
+
+    double tol = std::numeric_limits<double>::epsilon();
+    std::vector<Z> u{1};
+    std::vector<Z> v{1};
+    double rho_uv = correlation_coefficient(exec, u, v);
+    BOOST_TEST(abs(rho_uv - 1.0) < tol);
+
+    u = {1,1};
+    v = {1,1};
+    rho_uv = correlation_coefficient(exec, u, v);
+    BOOST_TEST(abs(rho_uv - 1.0) < tol);
+
+    u = {1, 2, 3};
+    v = {1, 2, 3};
+    rho_uv = correlation_coefficient(exec, u, v);
+    BOOST_TEST(abs(rho_uv - 1.0) < tol);
+
+    rho_uv = correlation_coefficient(exec, v, u);
+    BOOST_TEST(abs(rho_uv - 1.0) < tol);
+
+    u = {1, 2, 3};
+    v = {0, 0, 0};
+    rho_uv = correlation_coefficient(exec, v, u);
+    BOOST_TEST(abs(rho_uv) < tol);
+
+    u = {1, 2, 3};
+    v = {0, 0, 3};
+    rho_uv = correlation_coefficient(exec, v, u);
+    // mu_u = 2, sigma_u^2 = 2/3, mu_v = 1, sigma_v^2 = 2, cov(u,v) = 1.
+    BOOST_TEST(abs(rho_uv - sqrt(double(3))/double(2)) < tol);
+}
+
+int main()
+{
+    test_covariance<float>(std::execution::seq);
+    test_covariance<float>(std::execution::par);
+    test_covariance<double>(std::execution::seq);
+    test_covariance<double>(std::execution::par);
+    test_covariance<long double>(std::execution::seq);
+    test_covariance<long double>(std::execution::par);
+    test_covariance<cpp_bin_float_50>(std::execution::seq);
+    test_covariance<cpp_bin_float_50>(std::execution::par);
+    
+    test_integer_covariance<int>(std::execution::seq);
+    test_integer_covariance<int>(std::execution::par);
+    test_integer_covariance<int32_t>(std::execution::seq);
+    test_integer_covariance<int32_t>(std::execution::par);
+    test_integer_covariance<int64_t>(std::execution::seq);
+    test_integer_covariance<int64_t>(std::execution::par);
+    test_integer_covariance<uint32_t>(std::execution::seq);
+    test_integer_covariance<uint32_t>(std::execution::par);
+
+    test_correlation_coefficient<float>(std::execution::seq);
+    test_correlation_coefficient<float>(std::execution::par);
+    test_correlation_coefficient<double>(std::execution::seq);
+    test_correlation_coefficient<double>(std::execution::par);
+    test_correlation_coefficient<long double>(std::execution::seq);
+    test_correlation_coefficient<long double>(std::execution::par);
+    test_correlation_coefficient<cpp_bin_float_50>(std::execution::seq);
+    test_correlation_coefficient<cpp_bin_float_50>(std::execution::par);
+    
+    test_integer_correlation_coefficient<int>(std::execution::seq);
+    test_integer_correlation_coefficient<int>(std::execution::par);
+    test_integer_correlation_coefficient<int32_t>(std::execution::seq);
+    test_integer_correlation_coefficient<int32_t>(std::execution::par);
+    test_integer_correlation_coefficient<int64_t>(std::execution::seq);
+    test_integer_correlation_coefficient<int64_t>(std::execution::par);
+    test_integer_correlation_coefficient<uint32_t>(std::execution::seq);
+    test_integer_correlation_coefficient<uint32_t>(std::execution::par);
+    
+    return boost::report_errors();
+}
+
+#else
+
+template<typename Real>
 void test_covariance()
 {
     std::cout << std::setprecision(std::numeric_limits<Real>::digits10+1);
@@ -128,7 +435,7 @@ void test_covariance()
     BOOST_TEST(abs(cov_vv - sigma_v_sq) < tol);
 }
 
-template<class Z>
+template<typename Z>
 void test_integer_covariance()
 {
     std::cout << std::setprecision(std::numeric_limits<double>::digits10+1);
@@ -218,7 +525,7 @@ void test_integer_covariance()
     BOOST_TEST(abs(cov_vv - sigma_v_sq) < tol);
 }
 
-template<class Real>
+template<typename Real>
 void test_correlation_coefficient()
 {
     using boost::math::statistics::correlation_coefficient;
@@ -261,7 +568,7 @@ void test_correlation_coefficient()
     BOOST_TEST(abs(rho_uv - sqrt(Real(3))/Real(2)) < tol);
 }
 
-template<class Z>
+template<typename Z>
 void test_integer_correlation_coefficient()
 {
     using boost::math::statistics::correlation_coefficient;
@@ -323,3 +630,5 @@ int main()
 
     return boost::report_errors();
 }
+
+#endif