mirror of
https://github.com/boostorg/math.git
synced 2026-01-19 04:22:09 +00:00
@@ -6,7 +6,11 @@
|
||||
#ifndef BOOST_MATH_DISTRIBUTIONS_DETAIL_INV_DISCRETE_QUANTILE
|
||||
#define BOOST_MATH_DISTRIBUTIONS_DETAIL_INV_DISCRETE_QUANTILE
|
||||
|
||||
#include <algorithm>
|
||||
#include <boost/math/tools/config.hpp>
|
||||
#include <boost/math/tools/cstdint.hpp>
|
||||
#include <boost/math/tools/precision.hpp>
|
||||
#include <boost/math/tools/toms748_solve.hpp>
|
||||
#include <boost/math/tools/tuple.hpp>
|
||||
|
||||
namespace boost{ namespace math{ namespace detail{
|
||||
|
||||
@@ -19,10 +23,10 @@ struct distribution_quantile_finder
|
||||
typedef typename Dist::value_type value_type;
|
||||
typedef typename Dist::policy_type policy_type;
|
||||
|
||||
distribution_quantile_finder(const Dist d, value_type p, bool c)
|
||||
BOOST_MATH_GPU_ENABLED distribution_quantile_finder(const Dist d, value_type p, bool c)
|
||||
: dist(d), target(p), comp(c) {}
|
||||
|
||||
value_type operator()(value_type const& x)
|
||||
BOOST_MATH_GPU_ENABLED value_type operator()(value_type const& x)
|
||||
{
|
||||
return comp ? value_type(target - cdf(complement(dist, x))) : value_type(cdf(dist, x) - target);
|
||||
}
|
||||
@@ -42,24 +46,24 @@ private:
|
||||
// in the root no longer being bracketed.
|
||||
//
|
||||
template <class Real, class Tol>
|
||||
void adjust_bounds(Real& /* a */, Real& /* b */, Tol const& /* tol */){}
|
||||
BOOST_MATH_GPU_ENABLED void adjust_bounds(Real& /* a */, Real& /* b */, Tol const& /* tol */){}
|
||||
|
||||
template <class Real>
|
||||
void adjust_bounds(Real& /* a */, Real& b, tools::equal_floor const& /* tol */)
|
||||
BOOST_MATH_GPU_ENABLED void adjust_bounds(Real& /* a */, Real& b, tools::equal_floor const& /* tol */)
|
||||
{
|
||||
BOOST_MATH_STD_USING
|
||||
b -= tools::epsilon<Real>() * b;
|
||||
}
|
||||
|
||||
template <class Real>
|
||||
void adjust_bounds(Real& a, Real& /* b */, tools::equal_ceil const& /* tol */)
|
||||
BOOST_MATH_GPU_ENABLED void adjust_bounds(Real& a, Real& /* b */, tools::equal_ceil const& /* tol */)
|
||||
{
|
||||
BOOST_MATH_STD_USING
|
||||
a += tools::epsilon<Real>() * a;
|
||||
}
|
||||
|
||||
template <class Real>
|
||||
void adjust_bounds(Real& a, Real& b, tools::equal_nearest_integer const& /* tol */)
|
||||
BOOST_MATH_GPU_ENABLED void adjust_bounds(Real& a, Real& b, tools::equal_nearest_integer const& /* tol */)
|
||||
{
|
||||
BOOST_MATH_STD_USING
|
||||
a += tools::epsilon<Real>() * a;
|
||||
@@ -69,7 +73,7 @@ void adjust_bounds(Real& a, Real& b, tools::equal_nearest_integer const& /* tol
|
||||
// This is where all the work is done:
|
||||
//
|
||||
template <class Dist, class Tolerance>
|
||||
typename Dist::value_type
|
||||
BOOST_MATH_GPU_ENABLED typename Dist::value_type
|
||||
do_inverse_discrete_quantile(
|
||||
const Dist& dist,
|
||||
const typename Dist::value_type& p,
|
||||
@@ -78,7 +82,7 @@ typename Dist::value_type
|
||||
const typename Dist::value_type& multiplier,
|
||||
typename Dist::value_type adder,
|
||||
const Tolerance& tol,
|
||||
std::uintmax_t& max_iter)
|
||||
boost::math::uintmax_t& max_iter)
|
||||
{
|
||||
typedef typename Dist::value_type value_type;
|
||||
typedef typename Dist::policy_type policy_type;
|
||||
@@ -100,7 +104,7 @@ typename Dist::value_type
|
||||
guess = min_bound;
|
||||
|
||||
value_type fa = f(guess);
|
||||
std::uintmax_t count = max_iter - 1;
|
||||
boost::math::uintmax_t count = max_iter - 1;
|
||||
value_type fb(fa), a(guess), b =0; // Compiler warning C4701: potentially uninitialized local variable 'b' used
|
||||
|
||||
if(fa == 0)
|
||||
@@ -130,7 +134,7 @@ typename Dist::value_type
|
||||
else
|
||||
{
|
||||
b = a;
|
||||
a = (std::max)(value_type(b - 1), value_type(0));
|
||||
a = BOOST_MATH_GPU_SAFE_MAX(value_type(b - 1), value_type(0));
|
||||
if(a < min_bound)
|
||||
a = min_bound;
|
||||
fa = f(a);
|
||||
@@ -153,7 +157,7 @@ typename Dist::value_type
|
||||
// If we're looking for a large result, then bump "adder" up
|
||||
// by a bit to increase our chances of bracketing the root:
|
||||
//
|
||||
//adder = (std::max)(adder, 0.001f * guess);
|
||||
//adder = BOOST_MATH_GPU_SAFE_MAX(adder, 0.001f * guess);
|
||||
if(fa < 0)
|
||||
{
|
||||
b = a + adder;
|
||||
@@ -162,7 +166,7 @@ typename Dist::value_type
|
||||
}
|
||||
else
|
||||
{
|
||||
b = (std::max)(value_type(a - adder), value_type(0));
|
||||
b = BOOST_MATH_GPU_SAFE_MAX(value_type(a - adder), value_type(0));
|
||||
if(b < min_bound)
|
||||
b = min_bound;
|
||||
}
|
||||
@@ -186,7 +190,7 @@ typename Dist::value_type
|
||||
}
|
||||
else
|
||||
{
|
||||
b = (std::max)(value_type(a - adder), value_type(0));
|
||||
b = BOOST_MATH_GPU_SAFE_MAX(value_type(a - adder), value_type(0));
|
||||
if(b < min_bound)
|
||||
b = min_bound;
|
||||
}
|
||||
@@ -195,9 +199,8 @@ typename Dist::value_type
|
||||
}
|
||||
if(a > b)
|
||||
{
|
||||
using std::swap;
|
||||
swap(a, b);
|
||||
swap(fa, fb);
|
||||
BOOST_MATH_GPU_SAFE_SWAP(a, b);
|
||||
BOOST_MATH_GPU_SAFE_SWAP(fa, fb);
|
||||
}
|
||||
}
|
||||
//
|
||||
@@ -274,7 +277,7 @@ typename Dist::value_type
|
||||
//
|
||||
// Go ahead and find the root:
|
||||
//
|
||||
std::pair<value_type, value_type> r = toms748_solve(f, a, b, fa, fb, tol, count, policy_type());
|
||||
boost::math::pair<value_type, value_type> r = toms748_solve(f, a, b, fa, fb, tol, count, policy_type());
|
||||
max_iter += count;
|
||||
if (max_iter >= policies::get_max_root_iterations<policy_type>())
|
||||
{
|
||||
@@ -293,7 +296,7 @@ typename Dist::value_type
|
||||
// is very close 1.
|
||||
//
|
||||
template <class Dist>
|
||||
inline typename Dist::value_type round_to_floor(const Dist& d, typename Dist::value_type result, typename Dist::value_type p, bool c)
|
||||
BOOST_MATH_GPU_ENABLED inline typename Dist::value_type round_to_floor(const Dist& d, typename Dist::value_type result, typename Dist::value_type p, bool c)
|
||||
{
|
||||
BOOST_MATH_STD_USING
|
||||
typename Dist::value_type cc = ceil(result);
|
||||
@@ -325,7 +328,7 @@ inline typename Dist::value_type round_to_floor(const Dist& d, typename Dist::va
|
||||
#endif
|
||||
|
||||
template <class Dist>
|
||||
inline typename Dist::value_type round_to_ceil(const Dist& d, typename Dist::value_type result, typename Dist::value_type p, bool c)
|
||||
BOOST_MATH_GPU_ENABLED inline typename Dist::value_type round_to_ceil(const Dist& d, typename Dist::value_type result, typename Dist::value_type p, bool c)
|
||||
{
|
||||
BOOST_MATH_STD_USING
|
||||
typename Dist::value_type cc = floor(result);
|
||||
@@ -339,7 +342,11 @@ inline typename Dist::value_type round_to_ceil(const Dist& d, typename Dist::val
|
||||
//
|
||||
while(true)
|
||||
{
|
||||
#ifdef BOOST_MATH_HAS_GPU_SUPPORT
|
||||
cc = ceil(nextafter(result, tools::max_value<typename Dist::value_type>()));
|
||||
#else
|
||||
cc = ceil(float_next(result));
|
||||
#endif
|
||||
if(cc > support(d).second)
|
||||
break;
|
||||
pp = c ? cdf(complement(d, cc)) : cdf(d, cc);
|
||||
@@ -362,7 +369,7 @@ inline typename Dist::value_type round_to_ceil(const Dist& d, typename Dist::val
|
||||
// to an int where required.
|
||||
//
|
||||
template <class Dist>
|
||||
inline typename Dist::value_type
|
||||
BOOST_MATH_GPU_ENABLED inline typename Dist::value_type
|
||||
inverse_discrete_quantile(
|
||||
const Dist& dist,
|
||||
typename Dist::value_type p,
|
||||
@@ -371,7 +378,7 @@ inline typename Dist::value_type
|
||||
const typename Dist::value_type& multiplier,
|
||||
const typename Dist::value_type& adder,
|
||||
const policies::discrete_quantile<policies::real>&,
|
||||
std::uintmax_t& max_iter)
|
||||
boost::math::uintmax_t& max_iter)
|
||||
{
|
||||
if(p > 0.5)
|
||||
{
|
||||
@@ -393,7 +400,7 @@ inline typename Dist::value_type
|
||||
}
|
||||
|
||||
template <class Dist>
|
||||
inline typename Dist::value_type
|
||||
BOOST_MATH_GPU_ENABLED inline typename Dist::value_type
|
||||
inverse_discrete_quantile(
|
||||
const Dist& dist,
|
||||
const typename Dist::value_type& p,
|
||||
@@ -402,7 +409,7 @@ inline typename Dist::value_type
|
||||
const typename Dist::value_type& multiplier,
|
||||
const typename Dist::value_type& adder,
|
||||
const policies::discrete_quantile<policies::integer_round_outwards>&,
|
||||
std::uintmax_t& max_iter)
|
||||
boost::math::uintmax_t& max_iter)
|
||||
{
|
||||
typedef typename Dist::value_type value_type;
|
||||
BOOST_MATH_STD_USING
|
||||
@@ -436,7 +443,7 @@ inline typename Dist::value_type
|
||||
}
|
||||
|
||||
template <class Dist>
|
||||
inline typename Dist::value_type
|
||||
BOOST_MATH_GPU_ENABLED inline typename Dist::value_type
|
||||
inverse_discrete_quantile(
|
||||
const Dist& dist,
|
||||
const typename Dist::value_type& p,
|
||||
@@ -445,7 +452,7 @@ inline typename Dist::value_type
|
||||
const typename Dist::value_type& multiplier,
|
||||
const typename Dist::value_type& adder,
|
||||
const policies::discrete_quantile<policies::integer_round_inwards>&,
|
||||
std::uintmax_t& max_iter)
|
||||
boost::math::uintmax_t& max_iter)
|
||||
{
|
||||
typedef typename Dist::value_type value_type;
|
||||
BOOST_MATH_STD_USING
|
||||
@@ -479,7 +486,7 @@ inline typename Dist::value_type
|
||||
}
|
||||
|
||||
template <class Dist>
|
||||
inline typename Dist::value_type
|
||||
BOOST_MATH_GPU_ENABLED inline typename Dist::value_type
|
||||
inverse_discrete_quantile(
|
||||
const Dist& dist,
|
||||
const typename Dist::value_type& p,
|
||||
@@ -488,7 +495,7 @@ inline typename Dist::value_type
|
||||
const typename Dist::value_type& multiplier,
|
||||
const typename Dist::value_type& adder,
|
||||
const policies::discrete_quantile<policies::integer_round_down>&,
|
||||
std::uintmax_t& max_iter)
|
||||
boost::math::uintmax_t& max_iter)
|
||||
{
|
||||
typedef typename Dist::value_type value_type;
|
||||
BOOST_MATH_STD_USING
|
||||
@@ -507,7 +514,7 @@ inline typename Dist::value_type
|
||||
}
|
||||
|
||||
template <class Dist>
|
||||
inline typename Dist::value_type
|
||||
BOOST_MATH_GPU_ENABLED inline typename Dist::value_type
|
||||
inverse_discrete_quantile(
|
||||
const Dist& dist,
|
||||
const typename Dist::value_type& p,
|
||||
@@ -516,7 +523,7 @@ inline typename Dist::value_type
|
||||
const typename Dist::value_type& multiplier,
|
||||
const typename Dist::value_type& adder,
|
||||
const policies::discrete_quantile<policies::integer_round_up>&,
|
||||
std::uintmax_t& max_iter)
|
||||
boost::math::uintmax_t& max_iter)
|
||||
{
|
||||
BOOST_MATH_STD_USING
|
||||
typename Dist::value_type pp = c ? 1 - p : p;
|
||||
@@ -534,7 +541,7 @@ inline typename Dist::value_type
|
||||
}
|
||||
|
||||
template <class Dist>
|
||||
inline typename Dist::value_type
|
||||
BOOST_MATH_GPU_ENABLED inline typename Dist::value_type
|
||||
inverse_discrete_quantile(
|
||||
const Dist& dist,
|
||||
const typename Dist::value_type& p,
|
||||
@@ -543,7 +550,7 @@ inline typename Dist::value_type
|
||||
const typename Dist::value_type& multiplier,
|
||||
const typename Dist::value_type& adder,
|
||||
const policies::discrete_quantile<policies::integer_round_nearest>&,
|
||||
std::uintmax_t& max_iter)
|
||||
boost::math::uintmax_t& max_iter)
|
||||
{
|
||||
typedef typename Dist::value_type value_type;
|
||||
BOOST_MATH_STD_USING
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Copyright John Maddock 2006.
|
||||
|
||||
// Copyright Matt Borland 2024.
|
||||
// Use, modification and distribution are subject to the
|
||||
// Boost Software License, Version 1.0.
|
||||
// (See accompanying file LICENSE_1_0.txt
|
||||
@@ -8,14 +8,15 @@
|
||||
#ifndef BOOST_MATH_DISTRIBUTIONS_FISHER_F_HPP
|
||||
#define BOOST_MATH_DISTRIBUTIONS_FISHER_F_HPP
|
||||
|
||||
#include <boost/math/tools/config.hpp>
|
||||
#include <boost/math/tools/tuple.hpp>
|
||||
#include <boost/math/tools/promotion.hpp>
|
||||
#include <boost/math/distributions/fwd.hpp>
|
||||
#include <boost/math/special_functions/beta.hpp> // for incomplete beta.
|
||||
#include <boost/math/distributions/complement.hpp> // complements
|
||||
#include <boost/math/distributions/detail/common_error_handling.hpp> // error checks
|
||||
#include <boost/math/special_functions/fpclassify.hpp>
|
||||
|
||||
#include <utility>
|
||||
|
||||
namespace boost{ namespace math{
|
||||
|
||||
template <class RealType = double, class Policy = policies::policy<> >
|
||||
@@ -25,9 +26,9 @@ public:
|
||||
typedef RealType value_type;
|
||||
typedef Policy policy_type;
|
||||
|
||||
fisher_f_distribution(const RealType& i, const RealType& j) : m_df1(i), m_df2(j)
|
||||
BOOST_MATH_GPU_ENABLED fisher_f_distribution(const RealType& i, const RealType& j) : m_df1(i), m_df2(j)
|
||||
{
|
||||
static const char* function = "fisher_f_distribution<%1%>::fisher_f_distribution";
|
||||
constexpr auto function = "fisher_f_distribution<%1%>::fisher_f_distribution";
|
||||
RealType result;
|
||||
detail::check_df(
|
||||
function, m_df1, &result, Policy());
|
||||
@@ -35,11 +36,11 @@ public:
|
||||
function, m_df2, &result, Policy());
|
||||
} // fisher_f_distribution
|
||||
|
||||
RealType degrees_of_freedom1()const
|
||||
BOOST_MATH_GPU_ENABLED RealType degrees_of_freedom1()const
|
||||
{
|
||||
return m_df1;
|
||||
}
|
||||
RealType degrees_of_freedom2()const
|
||||
BOOST_MATH_GPU_ENABLED RealType degrees_of_freedom2()const
|
||||
{
|
||||
return m_df2;
|
||||
}
|
||||
@@ -60,29 +61,29 @@ fisher_f_distribution(RealType,RealType)->fisher_f_distribution<typename boost::
|
||||
#endif
|
||||
|
||||
template <class RealType, class Policy>
|
||||
inline const std::pair<RealType, RealType> range(const fisher_f_distribution<RealType, Policy>& /*dist*/)
|
||||
BOOST_MATH_GPU_ENABLED inline const boost::math::pair<RealType, RealType> range(const fisher_f_distribution<RealType, Policy>& /*dist*/)
|
||||
{ // Range of permissible values for random variable x.
|
||||
using boost::math::tools::max_value;
|
||||
return std::pair<RealType, RealType>(static_cast<RealType>(0), max_value<RealType>());
|
||||
return boost::math::pair<RealType, RealType>(static_cast<RealType>(0), max_value<RealType>());
|
||||
}
|
||||
|
||||
template <class RealType, class Policy>
|
||||
inline const std::pair<RealType, RealType> support(const fisher_f_distribution<RealType, Policy>& /*dist*/)
|
||||
BOOST_MATH_GPU_ENABLED inline const boost::math::pair<RealType, RealType> support(const fisher_f_distribution<RealType, Policy>& /*dist*/)
|
||||
{ // Range of supported values for random variable x.
|
||||
// This is range where cdf rises from 0 to 1, and outside it, the pdf is zero.
|
||||
using boost::math::tools::max_value;
|
||||
return std::pair<RealType, RealType>(static_cast<RealType>(0), max_value<RealType>());
|
||||
return boost::math::pair<RealType, RealType>(static_cast<RealType>(0), max_value<RealType>());
|
||||
}
|
||||
|
||||
template <class RealType, class Policy>
|
||||
RealType pdf(const fisher_f_distribution<RealType, Policy>& dist, const RealType& x)
|
||||
BOOST_MATH_GPU_ENABLED RealType pdf(const fisher_f_distribution<RealType, Policy>& dist, const RealType& x)
|
||||
{
|
||||
BOOST_MATH_STD_USING // for ADL of std functions
|
||||
RealType df1 = dist.degrees_of_freedom1();
|
||||
RealType df2 = dist.degrees_of_freedom2();
|
||||
// Error check:
|
||||
RealType error_result = 0;
|
||||
static const char* function = "boost::math::pdf(fisher_f_distribution<%1%> const&, %1%)";
|
||||
constexpr auto function = "boost::math::pdf(fisher_f_distribution<%1%> const&, %1%)";
|
||||
if(false == (detail::check_df(
|
||||
function, df1, &error_result, Policy())
|
||||
&& detail::check_df(
|
||||
@@ -132,9 +133,9 @@ RealType pdf(const fisher_f_distribution<RealType, Policy>& dist, const RealType
|
||||
} // pdf
|
||||
|
||||
template <class RealType, class Policy>
|
||||
inline RealType cdf(const fisher_f_distribution<RealType, Policy>& dist, const RealType& x)
|
||||
BOOST_MATH_GPU_ENABLED inline RealType cdf(const fisher_f_distribution<RealType, Policy>& dist, const RealType& x)
|
||||
{
|
||||
static const char* function = "boost::math::cdf(fisher_f_distribution<%1%> const&, %1%)";
|
||||
constexpr auto function = "boost::math::cdf(fisher_f_distribution<%1%> const&, %1%)";
|
||||
RealType df1 = dist.degrees_of_freedom1();
|
||||
RealType df2 = dist.degrees_of_freedom2();
|
||||
// Error check:
|
||||
@@ -167,9 +168,9 @@ inline RealType cdf(const fisher_f_distribution<RealType, Policy>& dist, const R
|
||||
} // cdf
|
||||
|
||||
template <class RealType, class Policy>
|
||||
inline RealType quantile(const fisher_f_distribution<RealType, Policy>& dist, const RealType& p)
|
||||
BOOST_MATH_GPU_ENABLED inline RealType quantile(const fisher_f_distribution<RealType, Policy>& dist, const RealType& p)
|
||||
{
|
||||
static const char* function = "boost::math::quantile(fisher_f_distribution<%1%> const&, %1%)";
|
||||
constexpr auto function = "boost::math::quantile(fisher_f_distribution<%1%> const&, %1%)";
|
||||
RealType df1 = dist.degrees_of_freedom1();
|
||||
RealType df2 = dist.degrees_of_freedom2();
|
||||
// Error check:
|
||||
@@ -192,9 +193,9 @@ inline RealType quantile(const fisher_f_distribution<RealType, Policy>& dist, co
|
||||
} // quantile
|
||||
|
||||
template <class RealType, class Policy>
|
||||
inline RealType cdf(const complemented2_type<fisher_f_distribution<RealType, Policy>, RealType>& c)
|
||||
BOOST_MATH_GPU_ENABLED inline RealType cdf(const complemented2_type<fisher_f_distribution<RealType, Policy>, RealType>& c)
|
||||
{
|
||||
static const char* function = "boost::math::cdf(fisher_f_distribution<%1%> const&, %1%)";
|
||||
constexpr auto function = "boost::math::cdf(fisher_f_distribution<%1%> const&, %1%)";
|
||||
RealType df1 = c.dist.degrees_of_freedom1();
|
||||
RealType df2 = c.dist.degrees_of_freedom2();
|
||||
RealType x = c.param;
|
||||
@@ -228,9 +229,9 @@ inline RealType cdf(const complemented2_type<fisher_f_distribution<RealType, Pol
|
||||
}
|
||||
|
||||
template <class RealType, class Policy>
|
||||
inline RealType quantile(const complemented2_type<fisher_f_distribution<RealType, Policy>, RealType>& c)
|
||||
BOOST_MATH_GPU_ENABLED inline RealType quantile(const complemented2_type<fisher_f_distribution<RealType, Policy>, RealType>& c)
|
||||
{
|
||||
static const char* function = "boost::math::quantile(fisher_f_distribution<%1%> const&, %1%)";
|
||||
constexpr auto function = "boost::math::quantile(fisher_f_distribution<%1%> const&, %1%)";
|
||||
RealType df1 = c.dist.degrees_of_freedom1();
|
||||
RealType df2 = c.dist.degrees_of_freedom2();
|
||||
RealType p = c.param;
|
||||
@@ -252,9 +253,9 @@ inline RealType quantile(const complemented2_type<fisher_f_distribution<RealType
|
||||
}
|
||||
|
||||
template <class RealType, class Policy>
|
||||
inline RealType mean(const fisher_f_distribution<RealType, Policy>& dist)
|
||||
BOOST_MATH_GPU_ENABLED inline RealType mean(const fisher_f_distribution<RealType, Policy>& dist)
|
||||
{ // Mean of F distribution = v.
|
||||
static const char* function = "boost::math::mean(fisher_f_distribution<%1%> const&)";
|
||||
constexpr auto function = "boost::math::mean(fisher_f_distribution<%1%> const&)";
|
||||
RealType df1 = dist.degrees_of_freedom1();
|
||||
RealType df2 = dist.degrees_of_freedom2();
|
||||
// Error check:
|
||||
@@ -273,9 +274,9 @@ inline RealType mean(const fisher_f_distribution<RealType, Policy>& dist)
|
||||
} // mean
|
||||
|
||||
template <class RealType, class Policy>
|
||||
inline RealType variance(const fisher_f_distribution<RealType, Policy>& dist)
|
||||
BOOST_MATH_GPU_ENABLED inline RealType variance(const fisher_f_distribution<RealType, Policy>& dist)
|
||||
{ // Variance of F distribution.
|
||||
static const char* function = "boost::math::variance(fisher_f_distribution<%1%> const&)";
|
||||
constexpr auto function = "boost::math::variance(fisher_f_distribution<%1%> const&)";
|
||||
RealType df1 = dist.degrees_of_freedom1();
|
||||
RealType df2 = dist.degrees_of_freedom2();
|
||||
// Error check:
|
||||
@@ -294,9 +295,9 @@ inline RealType variance(const fisher_f_distribution<RealType, Policy>& dist)
|
||||
} // variance
|
||||
|
||||
template <class RealType, class Policy>
|
||||
inline RealType mode(const fisher_f_distribution<RealType, Policy>& dist)
|
||||
BOOST_MATH_GPU_ENABLED inline RealType mode(const fisher_f_distribution<RealType, Policy>& dist)
|
||||
{
|
||||
static const char* function = "boost::math::mode(fisher_f_distribution<%1%> const&)";
|
||||
constexpr auto function = "boost::math::mode(fisher_f_distribution<%1%> const&)";
|
||||
RealType df1 = dist.degrees_of_freedom1();
|
||||
RealType df2 = dist.degrees_of_freedom2();
|
||||
// Error check:
|
||||
@@ -317,15 +318,15 @@ inline RealType mode(const fisher_f_distribution<RealType, Policy>& dist)
|
||||
//template <class RealType, class Policy>
|
||||
//inline RealType median(const fisher_f_distribution<RealType, Policy>& dist)
|
||||
//{ // Median of Fisher F distribution is not defined.
|
||||
// return tools::domain_error<RealType>(BOOST_CURRENT_FUNCTION, "Median is not implemented, result is %1%!", std::numeric_limits<RealType>::quiet_NaN());
|
||||
// return tools::domain_error<RealType>(BOOST_CURRENT_FUNCTION, "Median is not implemented, result is %1%!", boost::math::numeric_limits<RealType>::quiet_NaN());
|
||||
// } // median
|
||||
|
||||
// Now implemented via quantile(half) in derived accessors.
|
||||
|
||||
template <class RealType, class Policy>
|
||||
inline RealType skewness(const fisher_f_distribution<RealType, Policy>& dist)
|
||||
BOOST_MATH_GPU_ENABLED inline RealType skewness(const fisher_f_distribution<RealType, Policy>& dist)
|
||||
{
|
||||
static const char* function = "boost::math::skewness(fisher_f_distribution<%1%> const&)";
|
||||
constexpr auto function = "boost::math::skewness(fisher_f_distribution<%1%> const&)";
|
||||
BOOST_MATH_STD_USING // ADL of std names
|
||||
// See http://mathworld.wolfram.com/F-Distribution.html
|
||||
RealType df1 = dist.degrees_of_freedom1();
|
||||
@@ -346,18 +347,18 @@ inline RealType skewness(const fisher_f_distribution<RealType, Policy>& dist)
|
||||
}
|
||||
|
||||
template <class RealType, class Policy>
|
||||
RealType kurtosis_excess(const fisher_f_distribution<RealType, Policy>& dist);
|
||||
BOOST_MATH_GPU_ENABLED RealType kurtosis_excess(const fisher_f_distribution<RealType, Policy>& dist);
|
||||
|
||||
template <class RealType, class Policy>
|
||||
inline RealType kurtosis(const fisher_f_distribution<RealType, Policy>& dist)
|
||||
BOOST_MATH_GPU_ENABLED inline RealType kurtosis(const fisher_f_distribution<RealType, Policy>& dist)
|
||||
{
|
||||
return 3 + kurtosis_excess(dist);
|
||||
}
|
||||
|
||||
template <class RealType, class Policy>
|
||||
inline RealType kurtosis_excess(const fisher_f_distribution<RealType, Policy>& dist)
|
||||
BOOST_MATH_GPU_ENABLED inline RealType kurtosis_excess(const fisher_f_distribution<RealType, Policy>& dist)
|
||||
{
|
||||
static const char* function = "boost::math::kurtosis_excess(fisher_f_distribution<%1%> const&)";
|
||||
constexpr auto function = "boost::math::kurtosis_excess(fisher_f_distribution<%1%> const&)";
|
||||
// See http://mathworld.wolfram.com/F-Distribution.html
|
||||
RealType df1 = dist.degrees_of_freedom1();
|
||||
RealType df2 = dist.degrees_of_freedom2();
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
// Copyright John Maddock 2006.
|
||||
// Copyright Matt Borland 2024.
|
||||
// Use, modification and distribution are subject to the
|
||||
// Boost Software License, Version 1.0. (See accompanying file
|
||||
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
@@ -10,22 +11,22 @@
|
||||
// http://mathworld.wolfram.com/GammaDistribution.html
|
||||
// http://en.wikipedia.org/wiki/Gamma_distribution
|
||||
|
||||
#include <boost/math/tools/config.hpp>
|
||||
#include <boost/math/tools/tuple.hpp>
|
||||
#include <boost/math/tools/numeric_limits.hpp>
|
||||
#include <boost/math/distributions/fwd.hpp>
|
||||
#include <boost/math/special_functions/gamma.hpp>
|
||||
#include <boost/math/special_functions/digamma.hpp>
|
||||
#include <boost/math/distributions/detail/common_error_handling.hpp>
|
||||
#include <boost/math/distributions/complement.hpp>
|
||||
|
||||
#include <utility>
|
||||
#include <type_traits>
|
||||
|
||||
namespace boost{ namespace math
|
||||
{
|
||||
namespace detail
|
||||
{
|
||||
|
||||
template <class RealType, class Policy>
|
||||
inline bool check_gamma_shape(
|
||||
BOOST_MATH_GPU_ENABLED inline bool check_gamma_shape(
|
||||
const char* function,
|
||||
RealType shape,
|
||||
RealType* result, const Policy& pol)
|
||||
@@ -41,7 +42,7 @@ inline bool check_gamma_shape(
|
||||
}
|
||||
|
||||
template <class RealType, class Policy>
|
||||
inline bool check_gamma_x(
|
||||
BOOST_MATH_GPU_ENABLED inline bool check_gamma_x(
|
||||
const char* function,
|
||||
RealType const& x,
|
||||
RealType* result, const Policy& pol)
|
||||
@@ -57,7 +58,7 @@ inline bool check_gamma_x(
|
||||
}
|
||||
|
||||
template <class RealType, class Policy>
|
||||
inline bool check_gamma(
|
||||
BOOST_MATH_GPU_ENABLED inline bool check_gamma(
|
||||
const char* function,
|
||||
RealType scale,
|
||||
RealType shape,
|
||||
@@ -75,19 +76,19 @@ public:
|
||||
using value_type = RealType;
|
||||
using policy_type = Policy;
|
||||
|
||||
explicit gamma_distribution(RealType l_shape, RealType l_scale = 1)
|
||||
BOOST_MATH_GPU_ENABLED explicit gamma_distribution(RealType l_shape, RealType l_scale = 1)
|
||||
: m_shape(l_shape), m_scale(l_scale)
|
||||
{
|
||||
RealType result;
|
||||
detail::check_gamma("boost::math::gamma_distribution<%1%>::gamma_distribution", l_scale, l_shape, &result, Policy());
|
||||
}
|
||||
|
||||
RealType shape()const
|
||||
BOOST_MATH_GPU_ENABLED RealType shape()const
|
||||
{
|
||||
return m_shape;
|
||||
}
|
||||
|
||||
RealType scale()const
|
||||
BOOST_MATH_GPU_ENABLED RealType scale()const
|
||||
{
|
||||
return m_scale;
|
||||
}
|
||||
@@ -109,27 +110,27 @@ gamma_distribution(RealType,RealType)->gamma_distribution<typename boost::math::
|
||||
#endif
|
||||
|
||||
template <class RealType, class Policy>
|
||||
inline std::pair<RealType, RealType> range(const gamma_distribution<RealType, Policy>& /* dist */)
|
||||
BOOST_MATH_GPU_ENABLED inline boost::math::pair<RealType, RealType> range(const gamma_distribution<RealType, Policy>& /* dist */)
|
||||
{ // Range of permissible values for random variable x.
|
||||
using boost::math::tools::max_value;
|
||||
return std::pair<RealType, RealType>(static_cast<RealType>(0), max_value<RealType>());
|
||||
return boost::math::pair<RealType, RealType>(static_cast<RealType>(0), max_value<RealType>());
|
||||
}
|
||||
|
||||
template <class RealType, class Policy>
|
||||
inline std::pair<RealType, RealType> support(const gamma_distribution<RealType, Policy>& /* dist */)
|
||||
BOOST_MATH_GPU_ENABLED inline boost::math::pair<RealType, RealType> support(const gamma_distribution<RealType, Policy>& /* dist */)
|
||||
{ // Range of supported values for random variable x.
|
||||
// This is range where cdf rises from 0 to 1, and outside it, the pdf is zero.
|
||||
using boost::math::tools::max_value;
|
||||
using boost::math::tools::min_value;
|
||||
return std::pair<RealType, RealType>(min_value<RealType>(), max_value<RealType>());
|
||||
return boost::math::pair<RealType, RealType>(min_value<RealType>(), max_value<RealType>());
|
||||
}
|
||||
|
||||
template <class RealType, class Policy>
|
||||
inline RealType pdf(const gamma_distribution<RealType, Policy>& dist, const RealType& x)
|
||||
BOOST_MATH_GPU_ENABLED inline RealType pdf(const gamma_distribution<RealType, Policy>& dist, const RealType& x)
|
||||
{
|
||||
BOOST_MATH_STD_USING // for ADL of std functions
|
||||
|
||||
static const char* function = "boost::math::pdf(const gamma_distribution<%1%>&, %1%)";
|
||||
constexpr auto function = "boost::math::pdf(const gamma_distribution<%1%>&, %1%)";
|
||||
|
||||
RealType shape = dist.shape();
|
||||
RealType scale = dist.scale();
|
||||
@@ -149,17 +150,17 @@ inline RealType pdf(const gamma_distribution<RealType, Policy>& dist, const Real
|
||||
} // pdf
|
||||
|
||||
template <class RealType, class Policy>
|
||||
inline RealType logpdf(const gamma_distribution<RealType, Policy>& dist, const RealType& x)
|
||||
BOOST_MATH_GPU_ENABLED inline RealType logpdf(const gamma_distribution<RealType, Policy>& dist, const RealType& x)
|
||||
{
|
||||
BOOST_MATH_STD_USING // for ADL of std functions
|
||||
using boost::math::lgamma;
|
||||
|
||||
static const char* function = "boost::math::logpdf(const gamma_distribution<%1%>&, %1%)";
|
||||
constexpr auto function = "boost::math::logpdf(const gamma_distribution<%1%>&, %1%)";
|
||||
|
||||
RealType k = dist.shape();
|
||||
RealType theta = dist.scale();
|
||||
|
||||
RealType result = -std::numeric_limits<RealType>::infinity();
|
||||
RealType result = -boost::math::numeric_limits<RealType>::infinity();
|
||||
if(false == detail::check_gamma(function, theta, k, &result, Policy()))
|
||||
return result;
|
||||
if(false == detail::check_gamma_x(function, x, &result, Policy()))
|
||||
@@ -167,7 +168,7 @@ inline RealType logpdf(const gamma_distribution<RealType, Policy>& dist, const R
|
||||
|
||||
if(x == 0)
|
||||
{
|
||||
return std::numeric_limits<RealType>::quiet_NaN();
|
||||
return boost::math::numeric_limits<RealType>::quiet_NaN();
|
||||
}
|
||||
|
||||
result = -k*log(theta) + (k-1)*log(x) - lgamma(k) - (x/theta);
|
||||
@@ -176,11 +177,11 @@ inline RealType logpdf(const gamma_distribution<RealType, Policy>& dist, const R
|
||||
} // logpdf
|
||||
|
||||
template <class RealType, class Policy>
|
||||
inline RealType cdf(const gamma_distribution<RealType, Policy>& dist, const RealType& x)
|
||||
BOOST_MATH_GPU_ENABLED inline RealType cdf(const gamma_distribution<RealType, Policy>& dist, const RealType& x)
|
||||
{
|
||||
BOOST_MATH_STD_USING // for ADL of std functions
|
||||
|
||||
static const char* function = "boost::math::cdf(const gamma_distribution<%1%>&, %1%)";
|
||||
constexpr auto function = "boost::math::cdf(const gamma_distribution<%1%>&, %1%)";
|
||||
|
||||
RealType shape = dist.shape();
|
||||
RealType scale = dist.scale();
|
||||
@@ -196,11 +197,11 @@ inline RealType cdf(const gamma_distribution<RealType, Policy>& dist, const Real
|
||||
} // cdf
|
||||
|
||||
template <class RealType, class Policy>
|
||||
inline RealType quantile(const gamma_distribution<RealType, Policy>& dist, const RealType& p)
|
||||
BOOST_MATH_GPU_ENABLED inline RealType quantile(const gamma_distribution<RealType, Policy>& dist, const RealType& p)
|
||||
{
|
||||
BOOST_MATH_STD_USING // for ADL of std functions
|
||||
|
||||
static const char* function = "boost::math::quantile(const gamma_distribution<%1%>&, %1%)";
|
||||
constexpr auto function = "boost::math::quantile(const gamma_distribution<%1%>&, %1%)";
|
||||
|
||||
RealType shape = dist.shape();
|
||||
RealType scale = dist.scale();
|
||||
@@ -220,11 +221,11 @@ inline RealType quantile(const gamma_distribution<RealType, Policy>& dist, const
|
||||
}
|
||||
|
||||
template <class RealType, class Policy>
|
||||
inline RealType cdf(const complemented2_type<gamma_distribution<RealType, Policy>, RealType>& c)
|
||||
BOOST_MATH_GPU_ENABLED inline RealType cdf(const complemented2_type<gamma_distribution<RealType, Policy>, RealType>& c)
|
||||
{
|
||||
BOOST_MATH_STD_USING // for ADL of std functions
|
||||
|
||||
static const char* function = "boost::math::quantile(const gamma_distribution<%1%>&, %1%)";
|
||||
constexpr auto function = "boost::math::quantile(const gamma_distribution<%1%>&, %1%)";
|
||||
|
||||
RealType shape = c.dist.shape();
|
||||
RealType scale = c.dist.scale();
|
||||
@@ -241,11 +242,11 @@ inline RealType cdf(const complemented2_type<gamma_distribution<RealType, Policy
|
||||
}
|
||||
|
||||
template <class RealType, class Policy>
|
||||
inline RealType quantile(const complemented2_type<gamma_distribution<RealType, Policy>, RealType>& c)
|
||||
BOOST_MATH_GPU_ENABLED inline RealType quantile(const complemented2_type<gamma_distribution<RealType, Policy>, RealType>& c)
|
||||
{
|
||||
BOOST_MATH_STD_USING // for ADL of std functions
|
||||
|
||||
static const char* function = "boost::math::quantile(const gamma_distribution<%1%>&, %1%)";
|
||||
constexpr auto function = "boost::math::quantile(const gamma_distribution<%1%>&, %1%)";
|
||||
|
||||
RealType shape = c.dist.shape();
|
||||
RealType scale = c.dist.scale();
|
||||
@@ -266,11 +267,11 @@ inline RealType quantile(const complemented2_type<gamma_distribution<RealType, P
|
||||
}
|
||||
|
||||
template <class RealType, class Policy>
|
||||
inline RealType mean(const gamma_distribution<RealType, Policy>& dist)
|
||||
BOOST_MATH_GPU_ENABLED inline RealType mean(const gamma_distribution<RealType, Policy>& dist)
|
||||
{
|
||||
BOOST_MATH_STD_USING // for ADL of std functions
|
||||
|
||||
static const char* function = "boost::math::mean(const gamma_distribution<%1%>&)";
|
||||
constexpr auto function = "boost::math::mean(const gamma_distribution<%1%>&)";
|
||||
|
||||
RealType shape = dist.shape();
|
||||
RealType scale = dist.scale();
|
||||
@@ -284,11 +285,11 @@ inline RealType mean(const gamma_distribution<RealType, Policy>& dist)
|
||||
}
|
||||
|
||||
template <class RealType, class Policy>
|
||||
inline RealType variance(const gamma_distribution<RealType, Policy>& dist)
|
||||
BOOST_MATH_GPU_ENABLED inline RealType variance(const gamma_distribution<RealType, Policy>& dist)
|
||||
{
|
||||
BOOST_MATH_STD_USING // for ADL of std functions
|
||||
|
||||
static const char* function = "boost::math::variance(const gamma_distribution<%1%>&)";
|
||||
constexpr auto function = "boost::math::variance(const gamma_distribution<%1%>&)";
|
||||
|
||||
RealType shape = dist.shape();
|
||||
RealType scale = dist.scale();
|
||||
@@ -302,11 +303,11 @@ inline RealType variance(const gamma_distribution<RealType, Policy>& dist)
|
||||
}
|
||||
|
||||
template <class RealType, class Policy>
|
||||
inline RealType mode(const gamma_distribution<RealType, Policy>& dist)
|
||||
BOOST_MATH_GPU_ENABLED inline RealType mode(const gamma_distribution<RealType, Policy>& dist)
|
||||
{
|
||||
BOOST_MATH_STD_USING // for ADL of std functions
|
||||
|
||||
static const char* function = "boost::math::mode(const gamma_distribution<%1%>&)";
|
||||
constexpr auto function = "boost::math::mode(const gamma_distribution<%1%>&)";
|
||||
|
||||
RealType shape = dist.shape();
|
||||
RealType scale = dist.scale();
|
||||
@@ -331,11 +332,11 @@ inline RealType mode(const gamma_distribution<RealType, Policy>& dist)
|
||||
//}
|
||||
|
||||
template <class RealType, class Policy>
|
||||
inline RealType skewness(const gamma_distribution<RealType, Policy>& dist)
|
||||
BOOST_MATH_GPU_ENABLED inline RealType skewness(const gamma_distribution<RealType, Policy>& dist)
|
||||
{
|
||||
BOOST_MATH_STD_USING // for ADL of std functions
|
||||
|
||||
static const char* function = "boost::math::skewness(const gamma_distribution<%1%>&)";
|
||||
constexpr auto function = "boost::math::skewness(const gamma_distribution<%1%>&)";
|
||||
|
||||
RealType shape = dist.shape();
|
||||
RealType scale = dist.scale();
|
||||
@@ -349,11 +350,11 @@ inline RealType skewness(const gamma_distribution<RealType, Policy>& dist)
|
||||
}
|
||||
|
||||
template <class RealType, class Policy>
|
||||
inline RealType kurtosis_excess(const gamma_distribution<RealType, Policy>& dist)
|
||||
BOOST_MATH_GPU_ENABLED inline RealType kurtosis_excess(const gamma_distribution<RealType, Policy>& dist)
|
||||
{
|
||||
BOOST_MATH_STD_USING // for ADL of std functions
|
||||
|
||||
static const char* function = "boost::math::kurtosis_excess(const gamma_distribution<%1%>&)";
|
||||
constexpr auto function = "boost::math::kurtosis_excess(const gamma_distribution<%1%>&)";
|
||||
|
||||
RealType shape = dist.shape();
|
||||
RealType scale = dist.scale();
|
||||
@@ -367,18 +368,19 @@ inline RealType kurtosis_excess(const gamma_distribution<RealType, Policy>& dist
|
||||
}
|
||||
|
||||
template <class RealType, class Policy>
|
||||
inline RealType kurtosis(const gamma_distribution<RealType, Policy>& dist)
|
||||
BOOST_MATH_GPU_ENABLED inline RealType kurtosis(const gamma_distribution<RealType, Policy>& dist)
|
||||
{
|
||||
return kurtosis_excess(dist) + 3;
|
||||
}
|
||||
|
||||
template <class RealType, class Policy>
|
||||
inline RealType entropy(const gamma_distribution<RealType, Policy>& dist)
|
||||
BOOST_MATH_GPU_ENABLED inline RealType entropy(const gamma_distribution<RealType, Policy>& dist)
|
||||
{
|
||||
BOOST_MATH_STD_USING
|
||||
|
||||
RealType k = dist.shape();
|
||||
RealType theta = dist.scale();
|
||||
using std::log;
|
||||
return k + log(theta) + lgamma(k) + (1-k)*digamma(k);
|
||||
return k + log(theta) + boost::math::lgamma(k) + (1-k)*digamma(k);
|
||||
}
|
||||
|
||||
} // namespace math
|
||||
|
||||
@@ -36,6 +36,9 @@
|
||||
#ifndef BOOST_MATH_SPECIAL_GEOMETRIC_HPP
|
||||
#define BOOST_MATH_SPECIAL_GEOMETRIC_HPP
|
||||
|
||||
#include <boost/math/tools/config.hpp>
|
||||
#include <boost/math/tools/tuple.hpp>
|
||||
#include <boost/math/tools/numeric_limits.hpp>
|
||||
#include <boost/math/distributions/fwd.hpp>
|
||||
#include <boost/math/special_functions/beta.hpp> // for ibeta(a, b, x) == Ix(a, b).
|
||||
#include <boost/math/distributions/complement.hpp> // complement.
|
||||
@@ -45,10 +48,6 @@
|
||||
#include <boost/math/distributions/detail/inv_discrete_quantile.hpp>
|
||||
#include <boost/math/special_functions/log1p.hpp>
|
||||
|
||||
#include <limits> // using std::numeric_limits;
|
||||
#include <utility>
|
||||
#include <cmath>
|
||||
|
||||
#if defined (BOOST_MSVC)
|
||||
# pragma warning(push)
|
||||
// This believed not now necessary, so commented out.
|
||||
@@ -64,7 +63,7 @@ namespace boost
|
||||
{
|
||||
// Common error checking routines for geometric distribution function:
|
||||
template <class RealType, class Policy>
|
||||
inline bool check_success_fraction(const char* function, const RealType& p, RealType* result, const Policy& pol)
|
||||
BOOST_MATH_GPU_ENABLED inline bool check_success_fraction(const char* function, const RealType& p, RealType* result, const Policy& pol)
|
||||
{
|
||||
if( !(boost::math::isfinite)(p) || (p < 0) || (p > 1) )
|
||||
{
|
||||
@@ -77,13 +76,13 @@ namespace boost
|
||||
}
|
||||
|
||||
template <class RealType, class Policy>
|
||||
inline bool check_dist(const char* function, const RealType& p, RealType* result, const Policy& pol)
|
||||
BOOST_MATH_GPU_ENABLED inline bool check_dist(const char* function, const RealType& p, RealType* result, const Policy& pol)
|
||||
{
|
||||
return check_success_fraction(function, p, result, pol);
|
||||
}
|
||||
|
||||
template <class RealType, class Policy>
|
||||
inline bool check_dist_and_k(const char* function, const RealType& p, RealType k, RealType* result, const Policy& pol)
|
||||
BOOST_MATH_GPU_ENABLED inline bool check_dist_and_k(const char* function, const RealType& p, RealType k, RealType* result, const Policy& pol)
|
||||
{
|
||||
if(check_dist(function, p, result, pol) == false)
|
||||
{
|
||||
@@ -100,7 +99,7 @@ namespace boost
|
||||
} // Check_dist_and_k
|
||||
|
||||
template <class RealType, class Policy>
|
||||
inline bool check_dist_and_prob(const char* function, RealType p, RealType prob, RealType* result, const Policy& pol)
|
||||
BOOST_MATH_GPU_ENABLED inline bool check_dist_and_prob(const char* function, RealType p, RealType prob, RealType* result, const Policy& pol)
|
||||
{
|
||||
if((check_dist(function, p, result, pol) && detail::check_probability(function, prob, result, pol)) == false)
|
||||
{
|
||||
@@ -117,7 +116,7 @@ namespace boost
|
||||
typedef RealType value_type;
|
||||
typedef Policy policy_type;
|
||||
|
||||
geometric_distribution(RealType p) : m_p(p)
|
||||
BOOST_MATH_GPU_ENABLED geometric_distribution(RealType p) : m_p(p)
|
||||
{ // Constructor stores success_fraction p.
|
||||
RealType result;
|
||||
geometric_detail::check_dist(
|
||||
@@ -127,22 +126,22 @@ namespace boost
|
||||
} // geometric_distribution constructor.
|
||||
|
||||
// Private data getter class member functions.
|
||||
RealType success_fraction() const
|
||||
BOOST_MATH_GPU_ENABLED RealType success_fraction() const
|
||||
{ // Probability of success as fraction in range 0 to 1.
|
||||
return m_p;
|
||||
}
|
||||
RealType successes() const
|
||||
BOOST_MATH_GPU_ENABLED RealType successes() const
|
||||
{ // Total number of successes r = 1 (for compatibility with negative binomial?).
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Parameter estimation.
|
||||
// (These are copies of negative_binomial distribution with successes = 1).
|
||||
static RealType find_lower_bound_on_p(
|
||||
BOOST_MATH_GPU_ENABLED static RealType find_lower_bound_on_p(
|
||||
RealType trials,
|
||||
RealType alpha) // alpha 0.05 equivalent to 95% for one-sided test.
|
||||
{
|
||||
static const char* function = "boost::math::geometric<%1%>::find_lower_bound_on_p";
|
||||
constexpr auto function = "boost::math::geometric<%1%>::find_lower_bound_on_p";
|
||||
RealType result = 0; // of error checks.
|
||||
RealType successes = 1;
|
||||
RealType failures = trials - successes;
|
||||
@@ -163,11 +162,11 @@ namespace boost
|
||||
return ibeta_inv(successes, failures + 1, alpha, static_cast<RealType*>(nullptr), Policy());
|
||||
} // find_lower_bound_on_p
|
||||
|
||||
static RealType find_upper_bound_on_p(
|
||||
BOOST_MATH_GPU_ENABLED static RealType find_upper_bound_on_p(
|
||||
RealType trials,
|
||||
RealType alpha) // alpha 0.05 equivalent to 95% for one-sided test.
|
||||
{
|
||||
static const char* function = "boost::math::geometric<%1%>::find_upper_bound_on_p";
|
||||
constexpr auto function = "boost::math::geometric<%1%>::find_upper_bound_on_p";
|
||||
RealType result = 0; // of error checks.
|
||||
RealType successes = 1;
|
||||
RealType failures = trials - successes;
|
||||
@@ -195,12 +194,12 @@ namespace boost
|
||||
// Estimate number of trials :
|
||||
// "How many trials do I need to be P% sure of seeing k or fewer failures?"
|
||||
|
||||
static RealType find_minimum_number_of_trials(
|
||||
BOOST_MATH_GPU_ENABLED static RealType find_minimum_number_of_trials(
|
||||
RealType k, // number of failures (k >= 0).
|
||||
RealType p, // success fraction 0 <= p <= 1.
|
||||
RealType alpha) // risk level threshold 0 <= alpha <= 1.
|
||||
{
|
||||
static const char* function = "boost::math::geometric<%1%>::find_minimum_number_of_trials";
|
||||
constexpr auto function = "boost::math::geometric<%1%>::find_minimum_number_of_trials";
|
||||
// Error checks:
|
||||
RealType result = 0;
|
||||
if(false == geometric_detail::check_dist_and_k(
|
||||
@@ -213,12 +212,12 @@ namespace boost
|
||||
return result + k;
|
||||
} // RealType find_number_of_failures
|
||||
|
||||
static RealType find_maximum_number_of_trials(
|
||||
BOOST_MATH_GPU_ENABLED static RealType find_maximum_number_of_trials(
|
||||
RealType k, // number of failures (k >= 0).
|
||||
RealType p, // success fraction 0 <= p <= 1.
|
||||
RealType alpha) // risk level threshold 0 <= alpha <= 1.
|
||||
{
|
||||
static const char* function = "boost::math::geometric<%1%>::find_maximum_number_of_trials";
|
||||
constexpr auto function = "boost::math::geometric<%1%>::find_maximum_number_of_trials";
|
||||
// Error checks:
|
||||
RealType result = 0;
|
||||
if(false == geometric_detail::check_dist_and_k(
|
||||
@@ -244,22 +243,22 @@ namespace boost
|
||||
#endif
|
||||
|
||||
template <class RealType, class Policy>
|
||||
inline const std::pair<RealType, RealType> range(const geometric_distribution<RealType, Policy>& /* dist */)
|
||||
BOOST_MATH_GPU_ENABLED inline const boost::math::pair<RealType, RealType> range(const geometric_distribution<RealType, Policy>& /* dist */)
|
||||
{ // Range of permissible values for random variable k.
|
||||
using boost::math::tools::max_value;
|
||||
return std::pair<RealType, RealType>(static_cast<RealType>(0), max_value<RealType>()); // max_integer?
|
||||
return boost::math::pair<RealType, RealType>(static_cast<RealType>(0), max_value<RealType>()); // max_integer?
|
||||
}
|
||||
|
||||
template <class RealType, class Policy>
|
||||
inline const std::pair<RealType, RealType> support(const geometric_distribution<RealType, Policy>& /* dist */)
|
||||
BOOST_MATH_GPU_ENABLED inline const boost::math::pair<RealType, RealType> support(const geometric_distribution<RealType, Policy>& /* dist */)
|
||||
{ // Range of supported values for random variable k.
|
||||
// This is range where cdf rises from 0 to 1, and outside it, the pdf is zero.
|
||||
using boost::math::tools::max_value;
|
||||
return std::pair<RealType, RealType>(static_cast<RealType>(0), max_value<RealType>()); // max_integer?
|
||||
return boost::math::pair<RealType, RealType>(static_cast<RealType>(0), max_value<RealType>()); // max_integer?
|
||||
}
|
||||
|
||||
template <class RealType, class Policy>
|
||||
inline RealType mean(const geometric_distribution<RealType, Policy>& dist)
|
||||
BOOST_MATH_GPU_ENABLED inline RealType mean(const geometric_distribution<RealType, Policy>& dist)
|
||||
{ // Mean of geometric distribution = (1-p)/p.
|
||||
return (1 - dist.success_fraction() ) / dist.success_fraction();
|
||||
} // mean
|
||||
@@ -267,21 +266,21 @@ namespace boost
|
||||
// median implemented via quantile(half) in derived accessors.
|
||||
|
||||
template <class RealType, class Policy>
|
||||
inline RealType mode(const geometric_distribution<RealType, Policy>&)
|
||||
BOOST_MATH_GPU_ENABLED inline RealType mode(const geometric_distribution<RealType, Policy>&)
|
||||
{ // Mode of geometric distribution = zero.
|
||||
BOOST_MATH_STD_USING // ADL of std functions.
|
||||
return 0;
|
||||
} // mode
|
||||
|
||||
template <class RealType, class Policy>
|
||||
inline RealType variance(const geometric_distribution<RealType, Policy>& dist)
|
||||
BOOST_MATH_GPU_ENABLED inline RealType variance(const geometric_distribution<RealType, Policy>& dist)
|
||||
{ // Variance of Binomial distribution = (1-p) / p^2.
|
||||
return (1 - dist.success_fraction())
|
||||
/ (dist.success_fraction() * dist.success_fraction());
|
||||
} // variance
|
||||
|
||||
template <class RealType, class Policy>
|
||||
inline RealType skewness(const geometric_distribution<RealType, Policy>& dist)
|
||||
BOOST_MATH_GPU_ENABLED inline RealType skewness(const geometric_distribution<RealType, Policy>& dist)
|
||||
{ // skewness of geometric distribution = 2-p / (sqrt(r(1-p))
|
||||
BOOST_MATH_STD_USING // ADL of std functions.
|
||||
RealType p = dist.success_fraction();
|
||||
@@ -289,7 +288,7 @@ namespace boost
|
||||
} // skewness
|
||||
|
||||
template <class RealType, class Policy>
|
||||
inline RealType kurtosis(const geometric_distribution<RealType, Policy>& dist)
|
||||
BOOST_MATH_GPU_ENABLED inline RealType kurtosis(const geometric_distribution<RealType, Policy>& dist)
|
||||
{ // kurtosis of geometric distribution
|
||||
// http://en.wikipedia.org/wiki/geometric is kurtosis_excess so add 3
|
||||
RealType p = dist.success_fraction();
|
||||
@@ -297,7 +296,7 @@ namespace boost
|
||||
} // kurtosis
|
||||
|
||||
template <class RealType, class Policy>
|
||||
inline RealType kurtosis_excess(const geometric_distribution<RealType, Policy>& dist)
|
||||
BOOST_MATH_GPU_ENABLED inline RealType kurtosis_excess(const geometric_distribution<RealType, Policy>& dist)
|
||||
{ // kurtosis excess of geometric distribution
|
||||
// http://mathworld.wolfram.com/Kurtosis.html table of kurtosis_excess
|
||||
RealType p = dist.success_fraction();
|
||||
@@ -312,11 +311,11 @@ namespace boost
|
||||
// chf of geometric distribution provided by derived accessors.
|
||||
|
||||
template <class RealType, class Policy>
|
||||
inline RealType pdf(const geometric_distribution<RealType, Policy>& dist, const RealType& k)
|
||||
BOOST_MATH_GPU_ENABLED inline RealType pdf(const geometric_distribution<RealType, Policy>& dist, const RealType& k)
|
||||
{ // Probability Density/Mass Function.
|
||||
BOOST_FPU_EXCEPTION_GUARD
|
||||
BOOST_MATH_STD_USING // For ADL of math functions.
|
||||
static const char* function = "boost::math::pdf(const geometric_distribution<%1%>&, %1%)";
|
||||
constexpr auto function = "boost::math::pdf(const geometric_distribution<%1%>&, %1%)";
|
||||
|
||||
RealType p = dist.success_fraction();
|
||||
RealType result = 0;
|
||||
@@ -350,9 +349,9 @@ namespace boost
|
||||
} // geometric_pdf
|
||||
|
||||
template <class RealType, class Policy>
|
||||
inline RealType cdf(const geometric_distribution<RealType, Policy>& dist, const RealType& k)
|
||||
BOOST_MATH_GPU_ENABLED inline RealType cdf(const geometric_distribution<RealType, Policy>& dist, const RealType& k)
|
||||
{ // Cumulative Distribution Function of geometric.
|
||||
static const char* function = "boost::math::cdf(const geometric_distribution<%1%>&, %1%)";
|
||||
constexpr auto function = "boost::math::cdf(const geometric_distribution<%1%>&, %1%)";
|
||||
|
||||
// k argument may be integral, signed, or unsigned, or floating point.
|
||||
// If necessary, it has already been promoted from an integral type.
|
||||
@@ -381,12 +380,10 @@ namespace boost
|
||||
} // cdf Cumulative Distribution Function geometric.
|
||||
|
||||
template <class RealType, class Policy>
|
||||
inline RealType logcdf(const geometric_distribution<RealType, Policy>& dist, const RealType& k)
|
||||
BOOST_MATH_GPU_ENABLED inline RealType logcdf(const geometric_distribution<RealType, Policy>& dist, const RealType& k)
|
||||
{ // Cumulative Distribution Function of geometric.
|
||||
using std::pow;
|
||||
using std::log;
|
||||
using std::exp;
|
||||
static const char* function = "boost::math::logcdf(const geometric_distribution<%1%>&, %1%)";
|
||||
BOOST_MATH_STD_USING
|
||||
constexpr auto function = "boost::math::logcdf(const geometric_distribution<%1%>&, %1%)";
|
||||
|
||||
// k argument may be integral, signed, or unsigned, or floating point.
|
||||
// If necessary, it has already been promoted from an integral type.
|
||||
@@ -399,7 +396,7 @@ namespace boost
|
||||
k,
|
||||
&result, Policy()))
|
||||
{
|
||||
return -std::numeric_limits<RealType>::infinity();
|
||||
return -boost::math::numeric_limits<RealType>::infinity();
|
||||
}
|
||||
if(k == 0)
|
||||
{
|
||||
@@ -413,10 +410,10 @@ namespace boost
|
||||
} // logcdf Cumulative Distribution Function geometric.
|
||||
|
||||
template <class RealType, class Policy>
|
||||
inline RealType cdf(const complemented2_type<geometric_distribution<RealType, Policy>, RealType>& c)
|
||||
BOOST_MATH_GPU_ENABLED inline RealType cdf(const complemented2_type<geometric_distribution<RealType, Policy>, RealType>& c)
|
||||
{ // Complemented Cumulative Distribution Function geometric.
|
||||
BOOST_MATH_STD_USING
|
||||
static const char* function = "boost::math::cdf(const geometric_distribution<%1%>&, %1%)";
|
||||
constexpr auto function = "boost::math::cdf(const geometric_distribution<%1%>&, %1%)";
|
||||
// k argument may be integral, signed, or unsigned, or floating point.
|
||||
// If necessary, it has already been promoted from an integral type.
|
||||
RealType const& k = c.param;
|
||||
@@ -438,10 +435,10 @@ namespace boost
|
||||
} // cdf Complemented Cumulative Distribution Function geometric.
|
||||
|
||||
template <class RealType, class Policy>
|
||||
inline RealType logcdf(const complemented2_type<geometric_distribution<RealType, Policy>, RealType>& c)
|
||||
BOOST_MATH_GPU_ENABLED inline RealType logcdf(const complemented2_type<geometric_distribution<RealType, Policy>, RealType>& c)
|
||||
{ // Complemented Cumulative Distribution Function geometric.
|
||||
BOOST_MATH_STD_USING
|
||||
static const char* function = "boost::math::logcdf(const geometric_distribution<%1%>&, %1%)";
|
||||
constexpr auto function = "boost::math::logcdf(const geometric_distribution<%1%>&, %1%)";
|
||||
// k argument may be integral, signed, or unsigned, or floating point.
|
||||
// If necessary, it has already been promoted from an integral type.
|
||||
RealType const& k = c.param;
|
||||
@@ -455,21 +452,21 @@ namespace boost
|
||||
k,
|
||||
&result, Policy()))
|
||||
{
|
||||
return -std::numeric_limits<RealType>::infinity();
|
||||
return -boost::math::numeric_limits<RealType>::infinity();
|
||||
}
|
||||
|
||||
return boost::math::log1p(-p, Policy()) * (k+1);
|
||||
} // logcdf Complemented Cumulative Distribution Function geometric.
|
||||
|
||||
template <class RealType, class Policy>
|
||||
inline RealType quantile(const geometric_distribution<RealType, Policy>& dist, const RealType& x)
|
||||
BOOST_MATH_GPU_ENABLED inline RealType quantile(const geometric_distribution<RealType, Policy>& dist, const RealType& x)
|
||||
{ // Quantile, percentile/100 or Percent Point geometric function.
|
||||
// Return the number of expected failures k for a given probability p.
|
||||
|
||||
// Inverse cumulative Distribution Function or Quantile (percentile / 100) of geometric Probability.
|
||||
// k argument may be integral, signed, or unsigned, or floating point.
|
||||
|
||||
static const char* function = "boost::math::quantile(const geometric_distribution<%1%>&, %1%)";
|
||||
constexpr auto function = "boost::math::quantile(const geometric_distribution<%1%>&, %1%)";
|
||||
BOOST_MATH_STD_USING // ADL of std functions.
|
||||
|
||||
RealType success_fraction = dist.success_fraction();
|
||||
@@ -513,11 +510,11 @@ namespace boost
|
||||
} // RealType quantile(const geometric_distribution dist, p)
|
||||
|
||||
template <class RealType, class Policy>
|
||||
inline RealType quantile(const complemented2_type<geometric_distribution<RealType, Policy>, RealType>& c)
|
||||
BOOST_MATH_GPU_ENABLED inline RealType quantile(const complemented2_type<geometric_distribution<RealType, Policy>, RealType>& c)
|
||||
{ // Quantile or Percent Point Binomial function.
|
||||
// Return the number of expected failures k for a given
|
||||
// complement of the probability Q = 1 - P.
|
||||
static const char* function = "boost::math::quantile(const geometric_distribution<%1%>&, %1%)";
|
||||
constexpr auto function = "boost::math::quantile(const geometric_distribution<%1%>&, %1%)";
|
||||
BOOST_MATH_STD_USING
|
||||
// Error checks:
|
||||
RealType x = c.param;
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
// Copyright John Maddock 2010.
|
||||
// Copyright Paul A. Bristow 2010.
|
||||
|
||||
// Copyright Matt Borland 2024.
|
||||
// Use, modification and distribution are subject to the
|
||||
// Boost Software License, Version 1.0.
|
||||
// (See accompanying file LICENSE_1_0.txt
|
||||
@@ -9,6 +9,8 @@
|
||||
#ifndef BOOST_MATH_DISTRIBUTIONS_INVERSE_CHI_SQUARED_HPP
|
||||
#define BOOST_MATH_DISTRIBUTIONS_INVERSE_CHI_SQUARED_HPP
|
||||
|
||||
#include <boost/math/tools/config.hpp>
|
||||
#include <boost/math/tools/tuple.hpp>
|
||||
#include <boost/math/distributions/fwd.hpp>
|
||||
#include <boost/math/special_functions/gamma.hpp> // for incomplete beta.
|
||||
#include <boost/math/distributions/complement.hpp> // for complements.
|
||||
@@ -24,14 +26,12 @@
|
||||
// Weisstein, Eric W. "Inverse Chi-Squared Distribution." From MathWorld--A Wolfram Web Resource.
|
||||
// http://mathworld.wolfram.com/InverseChi-SquaredDistribution.html
|
||||
|
||||
#include <utility>
|
||||
|
||||
namespace boost{ namespace math{
|
||||
|
||||
namespace detail
|
||||
{
|
||||
template <class RealType, class Policy>
|
||||
inline bool check_inverse_chi_squared( // Check both distribution parameters.
|
||||
BOOST_MATH_GPU_ENABLED inline bool check_inverse_chi_squared( // Check both distribution parameters.
|
||||
const char* function,
|
||||
RealType degrees_of_freedom, // degrees_of_freedom (aka nu).
|
||||
RealType scale, // scale (aka sigma^2)
|
||||
@@ -51,7 +51,7 @@ public:
|
||||
typedef RealType value_type;
|
||||
typedef Policy policy_type;
|
||||
|
||||
inverse_chi_squared_distribution(RealType df, RealType l_scale) : m_df(df), m_scale (l_scale)
|
||||
BOOST_MATH_GPU_ENABLED inverse_chi_squared_distribution(RealType df, RealType l_scale) : m_df(df), m_scale (l_scale)
|
||||
{
|
||||
RealType result;
|
||||
detail::check_df(
|
||||
@@ -62,7 +62,7 @@ public:
|
||||
m_scale, &result, Policy());
|
||||
} // inverse_chi_squared_distribution constructor
|
||||
|
||||
inverse_chi_squared_distribution(RealType df = 1) : m_df(df)
|
||||
BOOST_MATH_GPU_ENABLED inverse_chi_squared_distribution(RealType df = 1) : m_df(df)
|
||||
{
|
||||
RealType result;
|
||||
m_scale = 1 / m_df ; // Default scale = 1 / degrees of freedom (Wikipedia definition 1).
|
||||
@@ -71,11 +71,11 @@ public:
|
||||
m_df, &result, Policy());
|
||||
} // inverse_chi_squared_distribution
|
||||
|
||||
RealType degrees_of_freedom()const
|
||||
BOOST_MATH_GPU_ENABLED RealType degrees_of_freedom()const
|
||||
{
|
||||
return m_df; // aka nu
|
||||
}
|
||||
RealType scale()const
|
||||
BOOST_MATH_GPU_ENABLED RealType scale()const
|
||||
{
|
||||
return m_scale; // aka xi
|
||||
}
|
||||
@@ -105,28 +105,28 @@ inverse_chi_squared_distribution(RealType,RealType)->inverse_chi_squared_distrib
|
||||
#endif
|
||||
|
||||
template <class RealType, class Policy>
|
||||
inline const std::pair<RealType, RealType> range(const inverse_chi_squared_distribution<RealType, Policy>& /*dist*/)
|
||||
BOOST_MATH_GPU_ENABLED inline const boost::math::pair<RealType, RealType> range(const inverse_chi_squared_distribution<RealType, Policy>& /*dist*/)
|
||||
{ // Range of permissible values for random variable x.
|
||||
using boost::math::tools::max_value;
|
||||
return std::pair<RealType, RealType>(static_cast<RealType>(0), max_value<RealType>()); // 0 to + infinity.
|
||||
return boost::math::pair<RealType, RealType>(static_cast<RealType>(0), max_value<RealType>()); // 0 to + infinity.
|
||||
}
|
||||
|
||||
template <class RealType, class Policy>
|
||||
inline const std::pair<RealType, RealType> support(const inverse_chi_squared_distribution<RealType, Policy>& /*dist*/)
|
||||
BOOST_MATH_GPU_ENABLED inline const boost::math::pair<RealType, RealType> support(const inverse_chi_squared_distribution<RealType, Policy>& /*dist*/)
|
||||
{ // Range of supported values for random variable x.
|
||||
// This is range where cdf rises from 0 to 1, and outside it, the pdf is zero.
|
||||
return std::pair<RealType, RealType>(static_cast<RealType>(0), tools::max_value<RealType>()); // 0 to + infinity.
|
||||
return boost::math::pair<RealType, RealType>(static_cast<RealType>(0), tools::max_value<RealType>()); // 0 to + infinity.
|
||||
}
|
||||
|
||||
template <class RealType, class Policy>
|
||||
RealType pdf(const inverse_chi_squared_distribution<RealType, Policy>& dist, const RealType& x)
|
||||
BOOST_MATH_GPU_ENABLED RealType pdf(const inverse_chi_squared_distribution<RealType, Policy>& dist, const RealType& x)
|
||||
{
|
||||
BOOST_MATH_STD_USING // for ADL of std functions.
|
||||
RealType df = dist.degrees_of_freedom();
|
||||
RealType scale = dist.scale();
|
||||
RealType error_result;
|
||||
|
||||
static const char* function = "boost::math::pdf(const inverse_chi_squared_distribution<%1%>&, %1%)";
|
||||
constexpr auto function = "boost::math::pdf(const inverse_chi_squared_distribution<%1%>&, %1%)";
|
||||
|
||||
if(false == detail::check_inverse_chi_squared
|
||||
(function, df, scale, &error_result, Policy())
|
||||
@@ -159,9 +159,9 @@ RealType pdf(const inverse_chi_squared_distribution<RealType, Policy>& dist, con
|
||||
} // pdf
|
||||
|
||||
template <class RealType, class Policy>
|
||||
inline RealType cdf(const inverse_chi_squared_distribution<RealType, Policy>& dist, const RealType& x)
|
||||
BOOST_MATH_GPU_ENABLED inline RealType cdf(const inverse_chi_squared_distribution<RealType, Policy>& dist, const RealType& x)
|
||||
{
|
||||
static const char* function = "boost::math::cdf(const inverse_chi_squared_distribution<%1%>&, %1%)";
|
||||
constexpr auto function = "boost::math::cdf(const inverse_chi_squared_distribution<%1%>&, %1%)";
|
||||
RealType df = dist.degrees_of_freedom();
|
||||
RealType scale = dist.scale();
|
||||
RealType error_result;
|
||||
@@ -188,13 +188,13 @@ inline RealType cdf(const inverse_chi_squared_distribution<RealType, Policy>& di
|
||||
} // cdf
|
||||
|
||||
template <class RealType, class Policy>
|
||||
inline RealType quantile(const inverse_chi_squared_distribution<RealType, Policy>& dist, const RealType& p)
|
||||
BOOST_MATH_GPU_ENABLED inline RealType quantile(const inverse_chi_squared_distribution<RealType, Policy>& dist, const RealType& p)
|
||||
{
|
||||
using boost::math::gamma_q_inv;
|
||||
RealType df = dist.degrees_of_freedom();
|
||||
RealType scale = dist.scale();
|
||||
|
||||
static const char* function = "boost::math::quantile(const inverse_chi_squared_distribution<%1%>&, %1%)";
|
||||
constexpr auto function = "boost::math::quantile(const inverse_chi_squared_distribution<%1%>&, %1%)";
|
||||
// Error check:
|
||||
RealType error_result;
|
||||
if(false == detail::check_df(
|
||||
@@ -220,13 +220,13 @@ inline RealType quantile(const inverse_chi_squared_distribution<RealType, Policy
|
||||
} // quantile
|
||||
|
||||
template <class RealType, class Policy>
|
||||
inline RealType cdf(const complemented2_type<inverse_chi_squared_distribution<RealType, Policy>, RealType>& c)
|
||||
BOOST_MATH_GPU_ENABLED inline RealType cdf(const complemented2_type<inverse_chi_squared_distribution<RealType, Policy>, RealType>& c)
|
||||
{
|
||||
using boost::math::gamma_q_inv;
|
||||
RealType const& df = c.dist.degrees_of_freedom();
|
||||
RealType const& scale = c.dist.scale();
|
||||
RealType const& x = c.param;
|
||||
static const char* function = "boost::math::cdf(const inverse_chi_squared_distribution<%1%>&, %1%)";
|
||||
constexpr auto function = "boost::math::cdf(const inverse_chi_squared_distribution<%1%>&, %1%)";
|
||||
// Error check:
|
||||
RealType error_result;
|
||||
if(false == detail::check_df(
|
||||
@@ -251,14 +251,14 @@ inline RealType cdf(const complemented2_type<inverse_chi_squared_distribution<Re
|
||||
} // cdf(complemented
|
||||
|
||||
template <class RealType, class Policy>
|
||||
inline RealType quantile(const complemented2_type<inverse_chi_squared_distribution<RealType, Policy>, RealType>& c)
|
||||
BOOST_MATH_GPU_ENABLED inline RealType quantile(const complemented2_type<inverse_chi_squared_distribution<RealType, Policy>, RealType>& c)
|
||||
{
|
||||
using boost::math::gamma_q_inv;
|
||||
|
||||
RealType const& df = c.dist.degrees_of_freedom();
|
||||
RealType const& scale = c.dist.scale();
|
||||
RealType const& q = c.param;
|
||||
static const char* function = "boost::math::quantile(const inverse_chi_squared_distribution<%1%>&, %1%)";
|
||||
constexpr auto function = "boost::math::quantile(const inverse_chi_squared_distribution<%1%>&, %1%)";
|
||||
// Error check:
|
||||
RealType error_result;
|
||||
if(false == detail::check_df(function, df, &error_result, Policy()))
|
||||
@@ -280,12 +280,12 @@ inline RealType quantile(const complemented2_type<inverse_chi_squared_distributi
|
||||
} // quantile(const complement
|
||||
|
||||
template <class RealType, class Policy>
|
||||
inline RealType mean(const inverse_chi_squared_distribution<RealType, Policy>& dist)
|
||||
BOOST_MATH_GPU_ENABLED inline RealType mean(const inverse_chi_squared_distribution<RealType, Policy>& dist)
|
||||
{ // Mean of inverse Chi-Squared distribution.
|
||||
RealType df = dist.degrees_of_freedom();
|
||||
RealType scale = dist.scale();
|
||||
|
||||
static const char* function = "boost::math::mean(const inverse_chi_squared_distribution<%1%>&)";
|
||||
constexpr auto function = "boost::math::mean(const inverse_chi_squared_distribution<%1%>&)";
|
||||
if(df <= 2)
|
||||
return policies::raise_domain_error<RealType>(
|
||||
function,
|
||||
@@ -295,11 +295,11 @@ inline RealType mean(const inverse_chi_squared_distribution<RealType, Policy>& d
|
||||
} // mean
|
||||
|
||||
template <class RealType, class Policy>
|
||||
inline RealType variance(const inverse_chi_squared_distribution<RealType, Policy>& dist)
|
||||
BOOST_MATH_GPU_ENABLED inline RealType variance(const inverse_chi_squared_distribution<RealType, Policy>& dist)
|
||||
{ // Variance of inverse Chi-Squared distribution.
|
||||
RealType df = dist.degrees_of_freedom();
|
||||
RealType scale = dist.scale();
|
||||
static const char* function = "boost::math::variance(const inverse_chi_squared_distribution<%1%>&)";
|
||||
constexpr auto function = "boost::math::variance(const inverse_chi_squared_distribution<%1%>&)";
|
||||
if(df <= 4)
|
||||
{
|
||||
return policies::raise_domain_error<RealType>(
|
||||
@@ -311,14 +311,14 @@ inline RealType variance(const inverse_chi_squared_distribution<RealType, Policy
|
||||
} // variance
|
||||
|
||||
template <class RealType, class Policy>
|
||||
inline RealType mode(const inverse_chi_squared_distribution<RealType, Policy>& dist)
|
||||
BOOST_MATH_GPU_ENABLED inline RealType mode(const inverse_chi_squared_distribution<RealType, Policy>& dist)
|
||||
{ // mode is not defined in Mathematica.
|
||||
// See Discussion section http://en.wikipedia.org/wiki/Talk:Scaled-inverse-chi-square_distribution
|
||||
// for origin of the formula used below.
|
||||
|
||||
RealType df = dist.degrees_of_freedom();
|
||||
RealType scale = dist.scale();
|
||||
static const char* function = "boost::math::mode(const inverse_chi_squared_distribution<%1%>&)";
|
||||
constexpr auto function = "boost::math::mode(const inverse_chi_squared_distribution<%1%>&)";
|
||||
if(df < 0)
|
||||
return policies::raise_domain_error<RealType>(
|
||||
function,
|
||||
@@ -341,11 +341,11 @@ inline RealType mode(const inverse_chi_squared_distribution<RealType, Policy>& d
|
||||
// Now implemented via quantile(half) in derived accessors.
|
||||
|
||||
template <class RealType, class Policy>
|
||||
inline RealType skewness(const inverse_chi_squared_distribution<RealType, Policy>& dist)
|
||||
BOOST_MATH_GPU_ENABLED inline RealType skewness(const inverse_chi_squared_distribution<RealType, Policy>& dist)
|
||||
{
|
||||
BOOST_MATH_STD_USING // For ADL
|
||||
RealType df = dist.degrees_of_freedom();
|
||||
static const char* function = "boost::math::skewness(const inverse_chi_squared_distribution<%1%>&)";
|
||||
constexpr auto function = "boost::math::skewness(const inverse_chi_squared_distribution<%1%>&)";
|
||||
if(df <= 6)
|
||||
return policies::raise_domain_error<RealType>(
|
||||
function,
|
||||
@@ -356,10 +356,10 @@ inline RealType skewness(const inverse_chi_squared_distribution<RealType, Policy
|
||||
}
|
||||
|
||||
template <class RealType, class Policy>
|
||||
inline RealType kurtosis(const inverse_chi_squared_distribution<RealType, Policy>& dist)
|
||||
BOOST_MATH_GPU_ENABLED inline RealType kurtosis(const inverse_chi_squared_distribution<RealType, Policy>& dist)
|
||||
{
|
||||
RealType df = dist.degrees_of_freedom();
|
||||
static const char* function = "boost::math::kurtosis(const inverse_chi_squared_distribution<%1%>&)";
|
||||
constexpr auto function = "boost::math::kurtosis(const inverse_chi_squared_distribution<%1%>&)";
|
||||
if(df <= 8)
|
||||
return policies::raise_domain_error<RealType>(
|
||||
function,
|
||||
@@ -370,10 +370,10 @@ inline RealType kurtosis(const inverse_chi_squared_distribution<RealType, Policy
|
||||
}
|
||||
|
||||
template <class RealType, class Policy>
|
||||
inline RealType kurtosis_excess(const inverse_chi_squared_distribution<RealType, Policy>& dist)
|
||||
BOOST_MATH_GPU_ENABLED inline RealType kurtosis_excess(const inverse_chi_squared_distribution<RealType, Policy>& dist)
|
||||
{
|
||||
RealType df = dist.degrees_of_freedom();
|
||||
static const char* function = "boost::math::kurtosis(const inverse_chi_squared_distribution<%1%>&)";
|
||||
constexpr auto function = "boost::math::kurtosis(const inverse_chi_squared_distribution<%1%>&)";
|
||||
if(df <= 8)
|
||||
return policies::raise_domain_error<RealType>(
|
||||
function,
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
|
||||
// Copyright Paul A. Bristow 2010.
|
||||
// Copyright John Maddock 2010.
|
||||
// Copyright Matt Borland 2024.
|
||||
// Use, modification and distribution are subject to the
|
||||
// Boost Software License, Version 1.0. (See accompanying file
|
||||
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
@@ -22,21 +23,21 @@
|
||||
// http://mathworld.wolfram.com/GammaDistribution.html
|
||||
// http://en.wikipedia.org/wiki/Gamma_distribution
|
||||
|
||||
#include <boost/math/tools/config.hpp>
|
||||
#include <boost/math/tools/tuple.hpp>
|
||||
#include <boost/math/tools/numeric_limits.hpp>
|
||||
#include <boost/math/distributions/fwd.hpp>
|
||||
#include <boost/math/special_functions/gamma.hpp>
|
||||
#include <boost/math/distributions/detail/common_error_handling.hpp>
|
||||
#include <boost/math/distributions/complement.hpp>
|
||||
|
||||
#include <utility>
|
||||
#include <cfenv>
|
||||
|
||||
namespace boost{ namespace math
|
||||
{
|
||||
namespace detail
|
||||
{
|
||||
|
||||
template <class RealType, class Policy>
|
||||
inline bool check_inverse_gamma_shape(
|
||||
BOOST_MATH_GPU_ENABLED inline bool check_inverse_gamma_shape(
|
||||
const char* function, // inverse_gamma
|
||||
RealType shape, // shape aka alpha
|
||||
RealType* result, // to update, perhaps with NaN
|
||||
@@ -57,7 +58,7 @@ inline bool check_inverse_gamma_shape(
|
||||
} //bool check_inverse_gamma_shape
|
||||
|
||||
template <class RealType, class Policy>
|
||||
inline bool check_inverse_gamma_x(
|
||||
BOOST_MATH_GPU_ENABLED inline bool check_inverse_gamma_x(
|
||||
const char* function,
|
||||
RealType const& x,
|
||||
RealType* result, const Policy& pol)
|
||||
@@ -73,7 +74,7 @@ inline bool check_inverse_gamma_x(
|
||||
}
|
||||
|
||||
template <class RealType, class Policy>
|
||||
inline bool check_inverse_gamma(
|
||||
BOOST_MATH_GPU_ENABLED inline bool check_inverse_gamma(
|
||||
const char* function, // TODO swap these over, so shape is first.
|
||||
RealType scale, // scale aka beta
|
||||
RealType shape, // shape aka alpha
|
||||
@@ -92,7 +93,7 @@ public:
|
||||
using value_type = RealType;
|
||||
using policy_type = Policy;
|
||||
|
||||
explicit inverse_gamma_distribution(RealType l_shape = 1, RealType l_scale = 1)
|
||||
BOOST_MATH_GPU_ENABLED explicit inverse_gamma_distribution(RealType l_shape = 1, RealType l_scale = 1)
|
||||
: m_shape(l_shape), m_scale(l_scale)
|
||||
{
|
||||
RealType result;
|
||||
@@ -101,12 +102,12 @@ public:
|
||||
l_scale, l_shape, &result, Policy());
|
||||
}
|
||||
|
||||
RealType shape()const
|
||||
BOOST_MATH_GPU_ENABLED RealType shape()const
|
||||
{
|
||||
return m_shape;
|
||||
}
|
||||
|
||||
RealType scale()const
|
||||
BOOST_MATH_GPU_ENABLED RealType scale()const
|
||||
{
|
||||
return m_scale;
|
||||
}
|
||||
@@ -132,27 +133,27 @@ inverse_gamma_distribution(RealType,RealType)->inverse_gamma_distribution<typena
|
||||
// Allow random variable x to be zero, treated as a special case (unlike some definitions).
|
||||
|
||||
template <class RealType, class Policy>
|
||||
inline std::pair<RealType, RealType> range(const inverse_gamma_distribution<RealType, Policy>& /* dist */)
|
||||
BOOST_MATH_GPU_ENABLED inline boost::math::pair<RealType, RealType> range(const inverse_gamma_distribution<RealType, Policy>& /* dist */)
|
||||
{ // Range of permissible values for random variable x.
|
||||
using boost::math::tools::max_value;
|
||||
return std::pair<RealType, RealType>(static_cast<RealType>(0), max_value<RealType>());
|
||||
return boost::math::pair<RealType, RealType>(static_cast<RealType>(0), max_value<RealType>());
|
||||
}
|
||||
|
||||
template <class RealType, class Policy>
|
||||
inline std::pair<RealType, RealType> support(const inverse_gamma_distribution<RealType, Policy>& /* dist */)
|
||||
BOOST_MATH_GPU_ENABLED inline boost::math::pair<RealType, RealType> support(const inverse_gamma_distribution<RealType, Policy>& /* dist */)
|
||||
{ // Range of supported values for random variable x.
|
||||
// This is range where cdf rises from 0 to 1, and outside it, the pdf is zero.
|
||||
using boost::math::tools::max_value;
|
||||
using boost::math::tools::min_value;
|
||||
return std::pair<RealType, RealType>(static_cast<RealType>(0), max_value<RealType>());
|
||||
return boost::math::pair<RealType, RealType>(static_cast<RealType>(0), max_value<RealType>());
|
||||
}
|
||||
|
||||
template <class RealType, class Policy>
|
||||
inline RealType pdf(const inverse_gamma_distribution<RealType, Policy>& dist, const RealType& x)
|
||||
BOOST_MATH_GPU_ENABLED inline RealType pdf(const inverse_gamma_distribution<RealType, Policy>& dist, const RealType& x)
|
||||
{
|
||||
BOOST_MATH_STD_USING // for ADL of std functions
|
||||
|
||||
static const char* function = "boost::math::pdf(const inverse_gamma_distribution<%1%>&, %1%)";
|
||||
constexpr auto function = "boost::math::pdf(const inverse_gamma_distribution<%1%>&, %1%)";
|
||||
|
||||
RealType shape = dist.shape();
|
||||
RealType scale = dist.scale();
|
||||
@@ -195,17 +196,17 @@ inline RealType pdf(const inverse_gamma_distribution<RealType, Policy>& dist, co
|
||||
} // pdf
|
||||
|
||||
template <class RealType, class Policy>
|
||||
inline RealType logpdf(const inverse_gamma_distribution<RealType, Policy>& dist, const RealType& x)
|
||||
BOOST_MATH_GPU_ENABLED inline RealType logpdf(const inverse_gamma_distribution<RealType, Policy>& dist, const RealType& x)
|
||||
{
|
||||
BOOST_MATH_STD_USING // for ADL of std functions
|
||||
using boost::math::lgamma;
|
||||
|
||||
static const char* function = "boost::math::logpdf(const inverse_gamma_distribution<%1%>&, %1%)";
|
||||
constexpr auto function = "boost::math::logpdf(const inverse_gamma_distribution<%1%>&, %1%)";
|
||||
|
||||
RealType shape = dist.shape();
|
||||
RealType scale = dist.scale();
|
||||
|
||||
RealType result = -std::numeric_limits<RealType>::infinity();
|
||||
RealType result = -boost::math::numeric_limits<RealType>::infinity();
|
||||
if(false == detail::check_inverse_gamma(function, scale, shape, &result, Policy()))
|
||||
{ // distribution parameters bad.
|
||||
return result;
|
||||
@@ -232,11 +233,11 @@ inline RealType logpdf(const inverse_gamma_distribution<RealType, Policy>& dist,
|
||||
} // pdf
|
||||
|
||||
template <class RealType, class Policy>
|
||||
inline RealType cdf(const inverse_gamma_distribution<RealType, Policy>& dist, const RealType& x)
|
||||
BOOST_MATH_GPU_ENABLED inline RealType cdf(const inverse_gamma_distribution<RealType, Policy>& dist, const RealType& x)
|
||||
{
|
||||
BOOST_MATH_STD_USING // for ADL of std functions
|
||||
|
||||
static const char* function = "boost::math::cdf(const inverse_gamma_distribution<%1%>&, %1%)";
|
||||
constexpr auto function = "boost::math::cdf(const inverse_gamma_distribution<%1%>&, %1%)";
|
||||
|
||||
RealType shape = dist.shape();
|
||||
RealType scale = dist.scale();
|
||||
@@ -260,12 +261,12 @@ inline RealType cdf(const inverse_gamma_distribution<RealType, Policy>& dist, co
|
||||
} // cdf
|
||||
|
||||
template <class RealType, class Policy>
|
||||
inline RealType quantile(const inverse_gamma_distribution<RealType, Policy>& dist, const RealType& p)
|
||||
BOOST_MATH_GPU_ENABLED inline RealType quantile(const inverse_gamma_distribution<RealType, Policy>& dist, const RealType& p)
|
||||
{
|
||||
BOOST_MATH_STD_USING // for ADL of std functions
|
||||
using boost::math::gamma_q_inv;
|
||||
|
||||
static const char* function = "boost::math::quantile(const inverse_gamma_distribution<%1%>&, %1%)";
|
||||
constexpr auto function = "boost::math::quantile(const inverse_gamma_distribution<%1%>&, %1%)";
|
||||
|
||||
RealType shape = dist.shape();
|
||||
RealType scale = dist.scale();
|
||||
@@ -287,11 +288,11 @@ inline RealType quantile(const inverse_gamma_distribution<RealType, Policy>& dis
|
||||
}
|
||||
|
||||
template <class RealType, class Policy>
|
||||
inline RealType cdf(const complemented2_type<inverse_gamma_distribution<RealType, Policy>, RealType>& c)
|
||||
BOOST_MATH_GPU_ENABLED inline RealType cdf(const complemented2_type<inverse_gamma_distribution<RealType, Policy>, RealType>& c)
|
||||
{
|
||||
BOOST_MATH_STD_USING // for ADL of std functions
|
||||
|
||||
static const char* function = "boost::math::quantile(const gamma_distribution<%1%>&, %1%)";
|
||||
constexpr auto function = "boost::math::quantile(const gamma_distribution<%1%>&, %1%)";
|
||||
|
||||
RealType shape = c.dist.shape();
|
||||
RealType scale = c.dist.scale();
|
||||
@@ -310,11 +311,11 @@ inline RealType cdf(const complemented2_type<inverse_gamma_distribution<RealType
|
||||
}
|
||||
|
||||
template <class RealType, class Policy>
|
||||
inline RealType quantile(const complemented2_type<inverse_gamma_distribution<RealType, Policy>, RealType>& c)
|
||||
BOOST_MATH_GPU_ENABLED inline RealType quantile(const complemented2_type<inverse_gamma_distribution<RealType, Policy>, RealType>& c)
|
||||
{
|
||||
BOOST_MATH_STD_USING // for ADL of std functions
|
||||
|
||||
static const char* function = "boost::math::quantile(const inverse_gamma_distribution<%1%>&, %1%)";
|
||||
constexpr auto function = "boost::math::quantile(const inverse_gamma_distribution<%1%>&, %1%)";
|
||||
|
||||
RealType shape = c.dist.shape();
|
||||
RealType scale = c.dist.scale();
|
||||
@@ -338,11 +339,11 @@ inline RealType quantile(const complemented2_type<inverse_gamma_distribution<Rea
|
||||
}
|
||||
|
||||
template <class RealType, class Policy>
|
||||
inline RealType mean(const inverse_gamma_distribution<RealType, Policy>& dist)
|
||||
BOOST_MATH_GPU_ENABLED inline RealType mean(const inverse_gamma_distribution<RealType, Policy>& dist)
|
||||
{
|
||||
BOOST_MATH_STD_USING // for ADL of std functions
|
||||
|
||||
static const char* function = "boost::math::mean(const inverse_gamma_distribution<%1%>&)";
|
||||
constexpr auto function = "boost::math::mean(const inverse_gamma_distribution<%1%>&)";
|
||||
|
||||
RealType shape = dist.shape();
|
||||
RealType scale = dist.scale();
|
||||
@@ -365,11 +366,11 @@ inline RealType mean(const inverse_gamma_distribution<RealType, Policy>& dist)
|
||||
} // mean
|
||||
|
||||
template <class RealType, class Policy>
|
||||
inline RealType variance(const inverse_gamma_distribution<RealType, Policy>& dist)
|
||||
BOOST_MATH_GPU_ENABLED inline RealType variance(const inverse_gamma_distribution<RealType, Policy>& dist)
|
||||
{
|
||||
BOOST_MATH_STD_USING // for ADL of std functions
|
||||
|
||||
static const char* function = "boost::math::variance(const inverse_gamma_distribution<%1%>&)";
|
||||
constexpr auto function = "boost::math::variance(const inverse_gamma_distribution<%1%>&)";
|
||||
|
||||
RealType shape = dist.shape();
|
||||
RealType scale = dist.scale();
|
||||
@@ -391,11 +392,11 @@ inline RealType variance(const inverse_gamma_distribution<RealType, Policy>& dis
|
||||
}
|
||||
|
||||
template <class RealType, class Policy>
|
||||
inline RealType mode(const inverse_gamma_distribution<RealType, Policy>& dist)
|
||||
BOOST_MATH_GPU_ENABLED inline RealType mode(const inverse_gamma_distribution<RealType, Policy>& dist)
|
||||
{
|
||||
BOOST_MATH_STD_USING // for ADL of std functions
|
||||
|
||||
static const char* function = "boost::math::mode(const inverse_gamma_distribution<%1%>&)";
|
||||
constexpr auto function = "boost::math::mode(const inverse_gamma_distribution<%1%>&)";
|
||||
|
||||
RealType shape = dist.shape();
|
||||
RealType scale = dist.scale();
|
||||
@@ -418,11 +419,11 @@ inline RealType mode(const inverse_gamma_distribution<RealType, Policy>& dist)
|
||||
//}
|
||||
|
||||
template <class RealType, class Policy>
|
||||
inline RealType skewness(const inverse_gamma_distribution<RealType, Policy>& dist)
|
||||
BOOST_MATH_GPU_ENABLED inline RealType skewness(const inverse_gamma_distribution<RealType, Policy>& dist)
|
||||
{
|
||||
BOOST_MATH_STD_USING // for ADL of std functions
|
||||
|
||||
static const char* function = "boost::math::skewness(const inverse_gamma_distribution<%1%>&)";
|
||||
constexpr auto function = "boost::math::skewness(const inverse_gamma_distribution<%1%>&)";
|
||||
|
||||
RealType shape = dist.shape();
|
||||
RealType scale = dist.scale();
|
||||
@@ -444,11 +445,11 @@ inline RealType skewness(const inverse_gamma_distribution<RealType, Policy>& dis
|
||||
}
|
||||
|
||||
template <class RealType, class Policy>
|
||||
inline RealType kurtosis_excess(const inverse_gamma_distribution<RealType, Policy>& dist)
|
||||
BOOST_MATH_GPU_ENABLED inline RealType kurtosis_excess(const inverse_gamma_distribution<RealType, Policy>& dist)
|
||||
{
|
||||
BOOST_MATH_STD_USING // for ADL of std functions
|
||||
|
||||
static const char* function = "boost::math::kurtosis_excess(const inverse_gamma_distribution<%1%>&)";
|
||||
constexpr auto function = "boost::math::kurtosis_excess(const inverse_gamma_distribution<%1%>&)";
|
||||
|
||||
RealType shape = dist.shape();
|
||||
RealType scale = dist.scale();
|
||||
@@ -470,9 +471,9 @@ inline RealType kurtosis_excess(const inverse_gamma_distribution<RealType, Polic
|
||||
}
|
||||
|
||||
template <class RealType, class Policy>
|
||||
inline RealType kurtosis(const inverse_gamma_distribution<RealType, Policy>& dist)
|
||||
BOOST_MATH_GPU_ENABLED inline RealType kurtosis(const inverse_gamma_distribution<RealType, Policy>& dist)
|
||||
{
|
||||
static const char* function = "boost::math::kurtosis(const inverse_gamma_distribution<%1%>&)";
|
||||
constexpr auto function = "boost::math::kurtosis(const inverse_gamma_distribution<%1%>&)";
|
||||
RealType shape = dist.shape();
|
||||
RealType scale = dist.scale();
|
||||
|
||||
|
||||
@@ -23,6 +23,7 @@ using cuda::std::tuple;
|
||||
|
||||
using cuda::std::make_pair;
|
||||
|
||||
using cuda::std::tie;
|
||||
using cuda::std::get;
|
||||
|
||||
using cuda::std::tuple_size;
|
||||
|
||||
@@ -64,11 +64,46 @@ run test_extreme_value_pdf_float.cu ;
|
||||
run test_extreme_value_quan_double.cu ;
|
||||
run test_extreme_value_quan_float.cu ;
|
||||
|
||||
run test_fisher_f_cdf_double.cu ;
|
||||
run test_fisher_f_cdf_float.cu ;
|
||||
run test_fisher_f_pdf_double.cu ;
|
||||
run test_fisher_f_pdf_float.cu ;
|
||||
run test_fisher_f_quan_double.cu ;
|
||||
run test_fisher_f_quan_float.cu ;
|
||||
|
||||
run test_gamma_dist_cdf_double.cu ;
|
||||
run test_gamma_dist_cdf_float.cu ;
|
||||
run test_gamma_dist_pdf_double.cu ;
|
||||
run test_gamma_dist_pdf_float.cu ;
|
||||
run test_gamma_dist_quan_double.cu ;
|
||||
run test_gamma_dist_quan_float.cu ;
|
||||
|
||||
run test_geometric_dist_cdf_double.cu ;
|
||||
run test_geometric_dist_cdf_float.cu ;
|
||||
run test_geometric_dist_pdf_double.cu ;
|
||||
run test_geometric_dist_pdf_float.cu ;
|
||||
run test_geometric_dist_quan_double.cu ;
|
||||
run test_geometric_dist_quan_float.cu ;
|
||||
|
||||
run test_holtsmark_cdf_double.cu ;
|
||||
run test_holtsmark_cdf_float.cu ;
|
||||
run test_holtsmark_pdf_double.cu ;
|
||||
run test_holtsmark_pdf_float.cu ;
|
||||
|
||||
run test_inverse_chi_squared_cdf_double.cu ;
|
||||
run test_inverse_chi_squared_cdf_float.cu ;
|
||||
run test_inverse_chi_squared_pdf_double.cu ;
|
||||
run test_inverse_chi_squared_pdf_float.cu ;
|
||||
run test_inverse_chi_squared_quan_double.cu ;
|
||||
run test_inverse_chi_squared_quan_float.cu ;
|
||||
|
||||
run test_inverse_gamma_cdf_double.cu ;
|
||||
run test_inverse_gamma_cdf_float.cu ;
|
||||
run test_inverse_gamma_pdf_double.cu ;
|
||||
run test_inverse_gamma_pdf_float.cu ;
|
||||
run test_inverse_gamma_quan_double.cu ;
|
||||
run test_inverse_gamma_quan_float.cu ;
|
||||
|
||||
run test_landau_cdf_double.cu ;
|
||||
run test_landau_cdf_float.cu ;
|
||||
run test_landau_pdf_double.cu ;
|
||||
|
||||
@@ -59,6 +59,27 @@ run test_extreme_value_pdf_nvrtc_float.cpp ;
|
||||
run test_extreme_value_quan_nvrtc_double.cpp ;
|
||||
run test_extreme_value_quan_nvrtc_float.cpp ;
|
||||
|
||||
run test_fisher_f_cdf_nvrtc_double.cpp ;
|
||||
run test_fisher_f_cdf_nvrtc_float.cpp ;
|
||||
run test_fisher_f_pdf_nvrtc_double.cpp ;
|
||||
run test_fisher_f_pdf_nvrtc_float.cpp ;
|
||||
run test_fisher_f_quan_nvrtc_double.cpp ;
|
||||
run test_fisher_f_quan_nvrtc_float.cpp ;
|
||||
|
||||
run test_gamma_dist_cdf_nvrtc_double.cpp ;
|
||||
run test_gamma_dist_cdf_nvrtc_float.cpp ;
|
||||
run test_gamma_dist_pdf_nvrtc_double.cpp ;
|
||||
run test_gamma_dist_pdf_nvrtc_float.cpp ;
|
||||
run test_gamma_dist_quan_nvrtc_double.cpp ;
|
||||
run test_gamma_dist_quan_nvrtc_float.cpp ;
|
||||
|
||||
run test_geometric_dist_cdf_nvrtc_double.cpp ;
|
||||
run test_geometric_dist_cdf_nvrtc_float.cpp ;
|
||||
run test_geometric_dist_pdf_nvrtc_double.cpp ;
|
||||
run test_geometric_dist_pdf_nvrtc_float.cpp ;
|
||||
run test_geometric_dist_quan_nvrtc_double.cpp ;
|
||||
run test_geometric_dist_quan_nvrtc_float.cpp ;
|
||||
|
||||
run test_holtsmark_cdf_nvrtc_double.cpp ;
|
||||
run test_holtsmark_cdf_nvrtc_float.cpp ;
|
||||
run test_holtsmark_pdf_nvrtc_double.cpp ;
|
||||
@@ -66,6 +87,20 @@ run test_holtsmark_pdf_nvrtc_float.cpp ;
|
||||
run test_holtsmark_quan_nvrtc_double.cpp ;
|
||||
run test_holtsmark_quan_nvrtc_float.cpp ;
|
||||
|
||||
run test_inverse_chi_squared_cdf_nvrtc_double.cpp ;
|
||||
run test_inverse_chi_squared_cdf_nvrtc_float.cpp ;
|
||||
run test_inverse_chi_squared_pdf_nvrtc_double.cpp ;
|
||||
run test_inverse_chi_squared_pdf_nvrtc_float.cpp ;
|
||||
run test_inverse_chi_squared_quan_nvrtc_double.cpp ;
|
||||
run test_inverse_chi_squared_quan_nvrtc_float.cpp ;
|
||||
|
||||
run test_inverse_gamma_cdf_nvrtc_double.cpp ;
|
||||
run test_inverse_gamma_cdf_nvrtc_float.cpp ;
|
||||
run test_inverse_gamma_pdf_nvrtc_double.cpp ;
|
||||
run test_inverse_gamma_pdf_nvrtc_float.cpp ;
|
||||
run test_inverse_gamma_quan_nvrtc_double.cpp ;
|
||||
run test_inverse_gamma_quan_nvrtc_float.cpp ;
|
||||
|
||||
run test_landau_cdf_nvrtc_double.cpp ;
|
||||
run test_landau_cdf_nvrtc_float.cpp ;
|
||||
run test_landau_pdf_nvrtc_double.cpp ;
|
||||
|
||||
@@ -17,7 +17,12 @@ run test_cauchy.cpp ;
|
||||
run test_chi_squared.cpp ;
|
||||
run test_exponential_dist.cpp ;
|
||||
run test_extreme_value.cpp ;
|
||||
run test_fisher_f.cpp ;
|
||||
run test_gamma_dist.cpp ;
|
||||
run test_geometric.cpp ;
|
||||
run test_holtsmark.cpp ;
|
||||
run test_inverse_chi_squared_distribution.cpp ;
|
||||
run test_inverse_gamma_distribution.cpp ;
|
||||
run test_landau.cpp ;
|
||||
run test_laplace.cpp ;
|
||||
run test_logistic_dist.cpp ;
|
||||
|
||||
@@ -64,7 +64,7 @@ int main(void)
|
||||
}
|
||||
|
||||
// Launch the Vector Add CUDA Kernel
|
||||
int threadsPerBlock = 512;
|
||||
int threadsPerBlock = 256;
|
||||
int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
|
||||
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
|
||||
|
||||
|
||||
@@ -64,7 +64,7 @@ int main(void)
|
||||
}
|
||||
|
||||
// Launch the Vector Add CUDA Kernel
|
||||
int threadsPerBlock = 512;
|
||||
int threadsPerBlock = 256;
|
||||
int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
|
||||
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
|
||||
|
||||
|
||||
@@ -64,7 +64,7 @@ int main(void)
|
||||
}
|
||||
|
||||
// Launch the Vector Add CUDA Kernel
|
||||
int threadsPerBlock = 512;
|
||||
int threadsPerBlock = 256;
|
||||
int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
|
||||
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
|
||||
|
||||
|
||||
@@ -64,7 +64,7 @@ int main(void)
|
||||
}
|
||||
|
||||
// Launch the Vector Add CUDA Kernel
|
||||
int threadsPerBlock = 512;
|
||||
int threadsPerBlock = 256;
|
||||
int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
|
||||
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
|
||||
|
||||
|
||||
@@ -64,7 +64,7 @@ int main(void)
|
||||
}
|
||||
|
||||
// Launch the Vector Add CUDA Kernel
|
||||
int threadsPerBlock = 512;
|
||||
int threadsPerBlock = 256;
|
||||
int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
|
||||
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
|
||||
|
||||
|
||||
@@ -64,7 +64,7 @@ int main(void)
|
||||
}
|
||||
|
||||
// Launch the Vector Add CUDA Kernel
|
||||
int threadsPerBlock = 512;
|
||||
int threadsPerBlock = 256;
|
||||
int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
|
||||
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
|
||||
|
||||
|
||||
@@ -65,7 +65,7 @@ int main(void)
|
||||
}
|
||||
|
||||
// Launch the Vector Add CUDA Kernel
|
||||
int threadsPerBlock = 512;
|
||||
int threadsPerBlock = 256;
|
||||
int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
|
||||
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
|
||||
|
||||
|
||||
@@ -65,7 +65,7 @@ int main(void)
|
||||
}
|
||||
|
||||
// Launch the Vector Add CUDA Kernel
|
||||
int threadsPerBlock = 512;
|
||||
int threadsPerBlock = 256;
|
||||
int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
|
||||
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
|
||||
|
||||
|
||||
@@ -65,7 +65,7 @@ int main(void)
|
||||
}
|
||||
|
||||
// Launch the Vector Add CUDA Kernel
|
||||
int threadsPerBlock = 512;
|
||||
int threadsPerBlock = 256;
|
||||
int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
|
||||
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
|
||||
|
||||
|
||||
@@ -65,7 +65,7 @@ int main(void)
|
||||
}
|
||||
|
||||
// Launch the Vector Add CUDA Kernel
|
||||
int threadsPerBlock = 512;
|
||||
int threadsPerBlock = 256;
|
||||
int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
|
||||
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
|
||||
|
||||
|
||||
@@ -65,7 +65,7 @@ int main(void)
|
||||
}
|
||||
|
||||
// Launch the Vector Add CUDA Kernel
|
||||
int threadsPerBlock = 512;
|
||||
int threadsPerBlock = 256;
|
||||
int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
|
||||
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
|
||||
|
||||
|
||||
@@ -65,7 +65,7 @@ int main(void)
|
||||
}
|
||||
|
||||
// Launch the Vector Add CUDA Kernel
|
||||
int threadsPerBlock = 512;
|
||||
int threadsPerBlock = 256;
|
||||
int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
|
||||
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
|
||||
|
||||
|
||||
@@ -65,7 +65,7 @@ int main(void)
|
||||
}
|
||||
|
||||
// Launch the Vector Add CUDA Kernel
|
||||
int threadsPerBlock = 512;
|
||||
int threadsPerBlock = 256;
|
||||
int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
|
||||
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
|
||||
|
||||
|
||||
@@ -65,7 +65,7 @@ int main(void)
|
||||
}
|
||||
|
||||
// Launch the Vector Add CUDA Kernel
|
||||
int threadsPerBlock = 512;
|
||||
int threadsPerBlock = 256;
|
||||
int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
|
||||
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
|
||||
|
||||
|
||||
@@ -64,7 +64,7 @@ int main(void)
|
||||
}
|
||||
|
||||
// Launch the Vector Add CUDA Kernel
|
||||
int threadsPerBlock = 512;
|
||||
int threadsPerBlock = 256;
|
||||
int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
|
||||
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
|
||||
|
||||
|
||||
@@ -64,7 +64,7 @@ int main(void)
|
||||
}
|
||||
|
||||
// Launch the Vector Add CUDA Kernel
|
||||
int threadsPerBlock = 512;
|
||||
int threadsPerBlock = 256;
|
||||
int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
|
||||
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
|
||||
|
||||
|
||||
@@ -64,7 +64,7 @@ int main(void)
|
||||
}
|
||||
|
||||
// Launch the Vector Add CUDA Kernel
|
||||
int threadsPerBlock = 512;
|
||||
int threadsPerBlock = 256;
|
||||
int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
|
||||
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
|
||||
|
||||
|
||||
@@ -64,7 +64,7 @@ int main(void)
|
||||
}
|
||||
|
||||
// Launch the Vector Add CUDA Kernel
|
||||
int threadsPerBlock = 512;
|
||||
int threadsPerBlock = 256;
|
||||
int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
|
||||
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
|
||||
|
||||
|
||||
@@ -64,7 +64,7 @@ int main(void)
|
||||
}
|
||||
|
||||
// Launch the Vector Add CUDA Kernel
|
||||
int threadsPerBlock = 512;
|
||||
int threadsPerBlock = 256;
|
||||
int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
|
||||
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
|
||||
|
||||
|
||||
@@ -64,7 +64,7 @@ int main(void)
|
||||
}
|
||||
|
||||
// Launch the Vector Add CUDA Kernel
|
||||
int threadsPerBlock = 512;
|
||||
int threadsPerBlock = 256;
|
||||
int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
|
||||
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
|
||||
|
||||
|
||||
@@ -64,7 +64,7 @@ int main(void)
|
||||
}
|
||||
|
||||
// Launch the Vector Add CUDA Kernel
|
||||
int threadsPerBlock = 512;
|
||||
int threadsPerBlock = 256;
|
||||
int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
|
||||
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
|
||||
|
||||
|
||||
@@ -64,7 +64,7 @@ int main(void)
|
||||
}
|
||||
|
||||
// Launch the Vector Add CUDA Kernel
|
||||
int threadsPerBlock = 512;
|
||||
int threadsPerBlock = 256;
|
||||
int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
|
||||
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
|
||||
|
||||
|
||||
@@ -64,7 +64,7 @@ int main(void)
|
||||
}
|
||||
|
||||
// Launch the Vector Add CUDA Kernel
|
||||
int threadsPerBlock = 512;
|
||||
int threadsPerBlock = 256;
|
||||
int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
|
||||
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
|
||||
|
||||
|
||||
@@ -64,7 +64,7 @@ int main(void)
|
||||
}
|
||||
|
||||
// Launch the Vector Add CUDA Kernel
|
||||
int threadsPerBlock = 512;
|
||||
int threadsPerBlock = 256;
|
||||
int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
|
||||
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
|
||||
|
||||
|
||||
@@ -64,7 +64,7 @@ int main(void)
|
||||
}
|
||||
|
||||
// Launch the Vector Add CUDA Kernel
|
||||
int threadsPerBlock = 512;
|
||||
int threadsPerBlock = 256;
|
||||
int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
|
||||
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
|
||||
|
||||
|
||||
@@ -64,7 +64,7 @@ int main(void)
|
||||
}
|
||||
|
||||
// Launch the Vector Add CUDA Kernel
|
||||
int threadsPerBlock = 512;
|
||||
int threadsPerBlock = 256;
|
||||
int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
|
||||
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
|
||||
|
||||
|
||||
@@ -65,7 +65,7 @@ int main(void)
|
||||
}
|
||||
|
||||
// Launch the Vector Add CUDA Kernel
|
||||
int threadsPerBlock = 512;
|
||||
int threadsPerBlock = 256;
|
||||
int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
|
||||
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
|
||||
|
||||
|
||||
@@ -65,7 +65,7 @@ int main(void)
|
||||
}
|
||||
|
||||
// Launch the Vector Add CUDA Kernel
|
||||
int threadsPerBlock = 512;
|
||||
int threadsPerBlock = 256;
|
||||
int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
|
||||
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
|
||||
|
||||
|
||||
@@ -64,7 +64,7 @@ int main(void)
|
||||
}
|
||||
|
||||
// Launch the Vector Add CUDA Kernel
|
||||
int threadsPerBlock = 512;
|
||||
int threadsPerBlock = 256;
|
||||
int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
|
||||
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
|
||||
|
||||
|
||||
@@ -64,7 +64,7 @@ int main(void)
|
||||
}
|
||||
|
||||
// Launch the Vector Add CUDA Kernel
|
||||
int threadsPerBlock = 512;
|
||||
int threadsPerBlock = 256;
|
||||
int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
|
||||
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
|
||||
|
||||
|
||||
@@ -64,7 +64,7 @@ int main(void)
|
||||
}
|
||||
|
||||
// Launch the Vector Add CUDA Kernel
|
||||
int threadsPerBlock = 512;
|
||||
int threadsPerBlock = 256;
|
||||
int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
|
||||
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
|
||||
|
||||
|
||||
@@ -64,7 +64,7 @@ int main(void)
|
||||
}
|
||||
|
||||
// Launch the Vector Add CUDA Kernel
|
||||
int threadsPerBlock = 512;
|
||||
int threadsPerBlock = 256;
|
||||
int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
|
||||
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
|
||||
|
||||
|
||||
@@ -64,7 +64,7 @@ int main(void)
|
||||
}
|
||||
|
||||
// Launch the Vector Add CUDA Kernel
|
||||
int threadsPerBlock = 512;
|
||||
int threadsPerBlock = 256;
|
||||
int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
|
||||
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
|
||||
|
||||
|
||||
@@ -64,7 +64,7 @@ int main(void)
|
||||
}
|
||||
|
||||
// Launch the Vector Add CUDA Kernel
|
||||
int threadsPerBlock = 512;
|
||||
int threadsPerBlock = 256;
|
||||
int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
|
||||
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
|
||||
|
||||
|
||||
@@ -65,7 +65,7 @@ int main(void)
|
||||
}
|
||||
|
||||
// Launch the Vector Add CUDA Kernel
|
||||
int threadsPerBlock = 512;
|
||||
int threadsPerBlock = 256;
|
||||
int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
|
||||
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
|
||||
|
||||
|
||||
@@ -65,7 +65,7 @@ int main(void)
|
||||
}
|
||||
|
||||
// Launch the Vector Add CUDA Kernel
|
||||
int threadsPerBlock = 512;
|
||||
int threadsPerBlock = 256;
|
||||
int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
|
||||
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
|
||||
|
||||
|
||||
@@ -65,7 +65,7 @@ int main(void)
|
||||
}
|
||||
|
||||
// Launch the Vector Add CUDA Kernel
|
||||
int threadsPerBlock = 512;
|
||||
int threadsPerBlock = 256;
|
||||
int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
|
||||
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
|
||||
|
||||
|
||||
@@ -65,7 +65,7 @@ int main(void)
|
||||
}
|
||||
|
||||
// Launch the Vector Add CUDA Kernel
|
||||
int threadsPerBlock = 512;
|
||||
int threadsPerBlock = 256;
|
||||
int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
|
||||
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
|
||||
|
||||
|
||||
@@ -65,7 +65,7 @@ int main(void)
|
||||
}
|
||||
|
||||
// Launch the Vector Add CUDA Kernel
|
||||
int threadsPerBlock = 512;
|
||||
int threadsPerBlock = 256;
|
||||
int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
|
||||
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
|
||||
|
||||
|
||||
@@ -65,7 +65,7 @@ int main(void)
|
||||
}
|
||||
|
||||
// Launch the Vector Add CUDA Kernel
|
||||
int threadsPerBlock = 512;
|
||||
int threadsPerBlock = 256;
|
||||
int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
|
||||
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
|
||||
|
||||
|
||||
@@ -65,7 +65,7 @@ int main(void)
|
||||
}
|
||||
|
||||
// Launch the Vector Add CUDA Kernel
|
||||
int threadsPerBlock = 512;
|
||||
int threadsPerBlock = 256;
|
||||
int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
|
||||
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
|
||||
|
||||
|
||||
@@ -65,7 +65,7 @@ int main(void)
|
||||
}
|
||||
|
||||
// Launch the Vector Add CUDA Kernel
|
||||
int threadsPerBlock = 512;
|
||||
int threadsPerBlock = 256;
|
||||
int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
|
||||
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
|
||||
|
||||
|
||||
@@ -65,7 +65,7 @@ int main(void)
|
||||
}
|
||||
|
||||
// Launch the Vector Add CUDA Kernel
|
||||
int threadsPerBlock = 512;
|
||||
int threadsPerBlock = 256;
|
||||
int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
|
||||
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
|
||||
|
||||
|
||||
@@ -65,7 +65,7 @@ int main(void)
|
||||
}
|
||||
|
||||
// Launch the Vector Add CUDA Kernel
|
||||
int threadsPerBlock = 512;
|
||||
int threadsPerBlock = 256;
|
||||
int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
|
||||
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
|
||||
|
||||
|
||||
@@ -65,7 +65,7 @@ int main(void)
|
||||
}
|
||||
|
||||
// Launch the Vector Add CUDA Kernel
|
||||
int threadsPerBlock = 512;
|
||||
int threadsPerBlock = 256;
|
||||
int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
|
||||
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
|
||||
|
||||
|
||||
@@ -65,7 +65,7 @@ int main(void)
|
||||
}
|
||||
|
||||
// Launch the Vector Add CUDA Kernel
|
||||
int threadsPerBlock = 512;
|
||||
int threadsPerBlock = 256;
|
||||
int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
|
||||
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
|
||||
|
||||
|
||||
@@ -65,7 +65,7 @@ int main(void)
|
||||
}
|
||||
|
||||
// Launch the Vector Add CUDA Kernel
|
||||
int threadsPerBlock = 512;
|
||||
int threadsPerBlock = 256;
|
||||
int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
|
||||
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
|
||||
|
||||
|
||||
@@ -65,7 +65,7 @@ int main(void)
|
||||
}
|
||||
|
||||
// Launch the Vector Add CUDA Kernel
|
||||
int threadsPerBlock = 512;
|
||||
int threadsPerBlock = 256;
|
||||
int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
|
||||
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
|
||||
|
||||
|
||||
@@ -8,9 +8,13 @@
|
||||
// (See accompanying file LICENSE_1_0.txt
|
||||
// or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
#include <boost/math/tools/test.hpp>
|
||||
#include <boost/math/tools/config.hpp>
|
||||
#include "../include_private/boost/math/tools/test.hpp"
|
||||
|
||||
#ifndef BOOST_MATH_NO_REAL_CONCEPT_TESTS
|
||||
#include <boost/math/concepts/real_concept.hpp> // for real_concept
|
||||
using ::boost::math::concepts::real_concept;
|
||||
#endif
|
||||
|
||||
#include <boost/math/distributions/fisher_f.hpp> // for fisher_f_distribution
|
||||
using boost::math::fisher_f_distribution;
|
||||
|
||||
109
test/test_fisher_f_cdf_double.cu
Normal file
109
test/test_fisher_f_cdf_double.cu
Normal file
@@ -0,0 +1,109 @@
|
||||
// Copyright John Maddock 2016.
|
||||
// Use, modification and distribution are subject to the
|
||||
// Boost Software License, Version 1.0. (See accompanying file
|
||||
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
|
||||
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <vector>
|
||||
#include <boost/math/distributions/fisher_f.hpp>
|
||||
#include <boost/math/special_functions/relative_difference.hpp>
|
||||
#include <boost/random/mersenne_twister.hpp>
|
||||
#include <boost/random/uniform_real_distribution.hpp>
|
||||
#include "cuda_managed_ptr.hpp"
|
||||
#include "stopwatch.hpp"
|
||||
|
||||
// For the CUDA runtime routines (prefixed with "cuda_")
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
typedef double float_type;
|
||||
|
||||
/**
|
||||
* CUDA Kernel Device code
|
||||
*
|
||||
*/
|
||||
__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
|
||||
{
|
||||
using std::cos;
|
||||
int i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
|
||||
if (i < numElements)
|
||||
{
|
||||
out[i] = cdf(boost::math::fisher_f_distribution<float_type>(1, 1), in1[i]);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Host main routine
|
||||
*/
|
||||
int main(void)
|
||||
{
|
||||
try{
|
||||
|
||||
// Error code to check return values for CUDA calls
|
||||
cudaError_t err = cudaSuccess;
|
||||
|
||||
// Print the vector length to be used, and compute its size
|
||||
int numElements = 50000;
|
||||
std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
|
||||
|
||||
// Allocate the managed input vector A
|
||||
cuda_managed_ptr<float_type> input_vector1(numElements);
|
||||
|
||||
// Allocate the managed output vector C
|
||||
cuda_managed_ptr<float_type> output_vector(numElements);
|
||||
|
||||
boost::random::mt19937 gen;
|
||||
boost::random::uniform_real_distribution<float_type> dist;
|
||||
// Initialize the input vectors
|
||||
for (int i = 0; i < numElements; ++i)
|
||||
{
|
||||
input_vector1[i] = dist(gen);
|
||||
}
|
||||
|
||||
// Launch the Vector Add CUDA Kernel
|
||||
int threadsPerBlock = 256;
|
||||
int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
|
||||
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
|
||||
|
||||
watch w;
|
||||
cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
|
||||
cudaDeviceSynchronize();
|
||||
std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
|
||||
|
||||
err = cudaGetLastError();
|
||||
if (err != cudaSuccess)
|
||||
{
|
||||
std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
// Verify that the result vector is correct
|
||||
std::vector<float_type> results;
|
||||
results.reserve(numElements);
|
||||
w.reset();
|
||||
for(int i = 0; i < numElements; ++i)
|
||||
results.push_back(cdf(boost::math::fisher_f_distribution<float_type>(1, 1), input_vector1[i]));
|
||||
double t = w.elapsed();
|
||||
// check the results
|
||||
for(int i = 0; i < numElements; ++i)
|
||||
{
|
||||
if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
|
||||
{
|
||||
std::cerr << "Result verification failed at element " << i << "!" << std::endl;
|
||||
std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
|
||||
std::cout << "Done\n";
|
||||
}
|
||||
catch(const std::exception& e)
|
||||
{
|
||||
std::cerr << "Stopped with exception: " << e.what() << std::endl;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
109
test/test_fisher_f_cdf_float.cu
Normal file
109
test/test_fisher_f_cdf_float.cu
Normal file
@@ -0,0 +1,109 @@
|
||||
// Copyright John Maddock 2016.
|
||||
// Use, modification and distribution are subject to the
|
||||
// Boost Software License, Version 1.0. (See accompanying file
|
||||
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
|
||||
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <vector>
|
||||
#include <boost/math/distributions/fisher_f.hpp>
|
||||
#include <boost/math/special_functions/relative_difference.hpp>
|
||||
#include <boost/random/mersenne_twister.hpp>
|
||||
#include <boost/random/uniform_real_distribution.hpp>
|
||||
#include "cuda_managed_ptr.hpp"
|
||||
#include "stopwatch.hpp"
|
||||
|
||||
// For the CUDA runtime routines (prefixed with "cuda_")
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
typedef float float_type;
|
||||
|
||||
/**
|
||||
* CUDA Kernel Device code
|
||||
*
|
||||
*/
|
||||
__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
|
||||
{
|
||||
using std::cos;
|
||||
int i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
|
||||
if (i < numElements)
|
||||
{
|
||||
out[i] = cdf(boost::math::fisher_f_distribution<float_type>(1, 1), in1[i]);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Host main routine
|
||||
*/
|
||||
int main(void)
|
||||
{
|
||||
try{
|
||||
|
||||
// Error code to check return values for CUDA calls
|
||||
cudaError_t err = cudaSuccess;
|
||||
|
||||
// Print the vector length to be used, and compute its size
|
||||
int numElements = 50000;
|
||||
std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
|
||||
|
||||
// Allocate the managed input vector A
|
||||
cuda_managed_ptr<float_type> input_vector1(numElements);
|
||||
|
||||
// Allocate the managed output vector C
|
||||
cuda_managed_ptr<float_type> output_vector(numElements);
|
||||
|
||||
boost::random::mt19937 gen;
|
||||
boost::random::uniform_real_distribution<float_type> dist;
|
||||
// Initialize the input vectors
|
||||
for (int i = 0; i < numElements; ++i)
|
||||
{
|
||||
input_vector1[i] = dist(gen);
|
||||
}
|
||||
|
||||
// Launch the Vector Add CUDA Kernel
|
||||
int threadsPerBlock = 256;
|
||||
int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
|
||||
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
|
||||
|
||||
watch w;
|
||||
cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
|
||||
cudaDeviceSynchronize();
|
||||
std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
|
||||
|
||||
err = cudaGetLastError();
|
||||
if (err != cudaSuccess)
|
||||
{
|
||||
std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
// Verify that the result vector is correct
|
||||
std::vector<float_type> results;
|
||||
results.reserve(numElements);
|
||||
w.reset();
|
||||
for(int i = 0; i < numElements; ++i)
|
||||
results.push_back(cdf(boost::math::fisher_f_distribution<float_type>(1, 1), input_vector1[i]));
|
||||
double t = w.elapsed();
|
||||
// check the results
|
||||
for(int i = 0; i < numElements; ++i)
|
||||
{
|
||||
if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
|
||||
{
|
||||
std::cerr << "Result verification failed at element " << i << "!" << std::endl;
|
||||
std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
|
||||
std::cout << "Done\n";
|
||||
}
|
||||
catch(const std::exception& e)
|
||||
{
|
||||
std::cerr << "Stopped with exception: " << e.what() << std::endl;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
191
test/test_fisher_f_cdf_nvrtc_double.cpp
Normal file
191
test/test_fisher_f_cdf_nvrtc_double.cpp
Normal file
@@ -0,0 +1,191 @@
|
||||
// Copyright John Maddock 2016.
|
||||
// Copyright Matt Borland 2024.
|
||||
// Use, modification and distribution are subject to the
|
||||
// Boost Software License, Version 1.0. (See accompanying file
|
||||
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
|
||||
#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
|
||||
|
||||
// Must be included first
|
||||
#include <nvrtc.h>
|
||||
#include <cuda.h>
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <vector>
|
||||
#include <random>
|
||||
#include <exception>
|
||||
#include <boost/math/distributions/fisher_f.hpp>
|
||||
#include <boost/math/special_functions/fpclassify.hpp>
|
||||
#include <boost/math/special_functions/relative_difference.hpp>
|
||||
|
||||
typedef double float_type;
|
||||
|
||||
const char* cuda_kernel = R"(
|
||||
typedef double float_type;
|
||||
#include <cuda/std/type_traits>
|
||||
#include <boost/math/distributions/fisher_f.hpp>
|
||||
extern "C" __global__
|
||||
void test_fisher_f_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
|
||||
{
|
||||
int i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (i < numElements)
|
||||
{
|
||||
out[i] = cdf(boost::math::fisher_f_distribution<float_type>(1, 1), in1[i]);
|
||||
}
|
||||
}
|
||||
)";
|
||||
|
||||
void checkCUDAError(cudaError_t result, const char* msg)
|
||||
{
|
||||
if (result != cudaSuccess)
|
||||
{
|
||||
std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
void checkCUError(CUresult result, const char* msg)
|
||||
{
|
||||
if (result != CUDA_SUCCESS)
|
||||
{
|
||||
const char* errorStr;
|
||||
cuGetErrorString(result, &errorStr);
|
||||
std::cerr << msg << ": " << errorStr << std::endl;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
void checkNVRTCError(nvrtcResult result, const char* msg)
|
||||
{
|
||||
if (result != NVRTC_SUCCESS)
|
||||
{
|
||||
std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
try
|
||||
{
|
||||
// Initialize CUDA driver API
|
||||
checkCUError(cuInit(0), "Failed to initialize CUDA");
|
||||
|
||||
// Create CUDA context
|
||||
CUcontext context;
|
||||
CUdevice device;
|
||||
checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
|
||||
checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
|
||||
|
||||
nvrtcProgram prog;
|
||||
nvrtcResult res;
|
||||
|
||||
res = nvrtcCreateProgram(&prog, cuda_kernel, "test_fisher_f_kernel.cu", 0, nullptr, nullptr);
|
||||
checkNVRTCError(res, "Failed to create NVRTC program");
|
||||
|
||||
nvrtcAddNameExpression(prog, "test_fisher_f_kernel");
|
||||
|
||||
#ifdef BOOST_MATH_NVRTC_CI_RUN
|
||||
const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
|
||||
#else
|
||||
const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
|
||||
#endif
|
||||
|
||||
// Compile the program
|
||||
res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
|
||||
if (res != NVRTC_SUCCESS)
|
||||
{
|
||||
size_t log_size;
|
||||
nvrtcGetProgramLogSize(prog, &log_size);
|
||||
char* log = new char[log_size];
|
||||
nvrtcGetProgramLog(prog, log);
|
||||
std::cerr << "Compilation failed:\n" << log << std::endl;
|
||||
delete[] log;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
// Get PTX from the program
|
||||
size_t ptx_size;
|
||||
nvrtcGetPTXSize(prog, &ptx_size);
|
||||
char* ptx = new char[ptx_size];
|
||||
nvrtcGetPTX(prog, ptx);
|
||||
|
||||
// Load PTX into CUDA module
|
||||
CUmodule module;
|
||||
CUfunction kernel;
|
||||
checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
|
||||
checkCUError(cuModuleGetFunction(&kernel, module, "test_fisher_f_kernel"), "Failed to get kernel function");
|
||||
|
||||
int numElements = 5000;
|
||||
float_type *h_in1, *h_in2, *h_out;
|
||||
float_type *d_in1, *d_in2, *d_out;
|
||||
|
||||
// Allocate memory on the host
|
||||
h_in1 = new float_type[numElements];
|
||||
h_in2 = new float_type[numElements];
|
||||
h_out = new float_type[numElements];
|
||||
|
||||
// Initialize input arrays
|
||||
std::mt19937_64 rng(42);
|
||||
std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
|
||||
for (int i = 0; i < numElements; ++i)
|
||||
{
|
||||
h_in1[i] = static_cast<float_type>(dist(rng));
|
||||
h_in2[i] = static_cast<float_type>(dist(rng));
|
||||
}
|
||||
|
||||
checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
|
||||
checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
|
||||
checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
|
||||
|
||||
checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
|
||||
checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
|
||||
|
||||
int blockSize = 256;
|
||||
int numBlocks = (numElements + blockSize - 1) / blockSize;
|
||||
void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
|
||||
checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
|
||||
|
||||
checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
|
||||
|
||||
// Verify Result
|
||||
for (int i = 0; i < numElements; ++i)
|
||||
{
|
||||
auto res = cdf(boost::math::fisher_f_distribution<float_type>(1, 1), h_in1[i]);
|
||||
|
||||
if (boost::math::isfinite(res))
|
||||
{
|
||||
if (boost::math::epsilon_difference(res, h_out[i]) > 300)
|
||||
{
|
||||
std::cout << "error at line: " << i
|
||||
<< "\nParallel: " << h_out[i]
|
||||
<< "\n Serial: " << res
|
||||
<< "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cudaFree(d_in1);
|
||||
cudaFree(d_in2);
|
||||
cudaFree(d_out);
|
||||
delete[] h_in1;
|
||||
delete[] h_in2;
|
||||
delete[] h_out;
|
||||
|
||||
nvrtcDestroyProgram(&prog);
|
||||
delete[] ptx;
|
||||
|
||||
cuCtxDestroy(context);
|
||||
|
||||
std::cout << "Kernel executed successfully." << std::endl;
|
||||
return 0;
|
||||
}
|
||||
catch(const std::exception& e)
|
||||
{
|
||||
std::cerr << "Stopped with exception: " << e.what() << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
}
|
||||
191
test/test_fisher_f_cdf_nvrtc_float.cpp
Normal file
191
test/test_fisher_f_cdf_nvrtc_float.cpp
Normal file
@@ -0,0 +1,191 @@
|
||||
// Copyright John Maddock 2016.
|
||||
// Copyright Matt Borland 2024.
|
||||
// Use, modification and distribution are subject to the
|
||||
// Boost Software License, Version 1.0. (See accompanying file
|
||||
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
|
||||
#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
|
||||
|
||||
// Must be included first
|
||||
#include <nvrtc.h>
|
||||
#include <cuda.h>
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <vector>
|
||||
#include <random>
|
||||
#include <exception>
|
||||
#include <boost/math/distributions/fisher_f.hpp>
|
||||
#include <boost/math/special_functions/fpclassify.hpp>
|
||||
#include <boost/math/special_functions/relative_difference.hpp>
|
||||
|
||||
typedef float float_type;
|
||||
|
||||
const char* cuda_kernel = R"(
|
||||
typedef float float_type;
|
||||
#include <cuda/std/type_traits>
|
||||
#include <boost/math/distributions/fisher_f.hpp>
|
||||
extern "C" __global__
|
||||
void test_fisher_f_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
|
||||
{
|
||||
int i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (i < numElements)
|
||||
{
|
||||
out[i] = cdf(boost::math::fisher_f_distribution<float_type>(1, 1), in1[i]);
|
||||
}
|
||||
}
|
||||
)";
|
||||
|
||||
void checkCUDAError(cudaError_t result, const char* msg)
|
||||
{
|
||||
if (result != cudaSuccess)
|
||||
{
|
||||
std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
void checkCUError(CUresult result, const char* msg)
|
||||
{
|
||||
if (result != CUDA_SUCCESS)
|
||||
{
|
||||
const char* errorStr;
|
||||
cuGetErrorString(result, &errorStr);
|
||||
std::cerr << msg << ": " << errorStr << std::endl;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
void checkNVRTCError(nvrtcResult result, const char* msg)
|
||||
{
|
||||
if (result != NVRTC_SUCCESS)
|
||||
{
|
||||
std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
try
|
||||
{
|
||||
// Initialize CUDA driver API
|
||||
checkCUError(cuInit(0), "Failed to initialize CUDA");
|
||||
|
||||
// Create CUDA context
|
||||
CUcontext context;
|
||||
CUdevice device;
|
||||
checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
|
||||
checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
|
||||
|
||||
nvrtcProgram prog;
|
||||
nvrtcResult res;
|
||||
|
||||
res = nvrtcCreateProgram(&prog, cuda_kernel, "test_fisher_f_kernel.cu", 0, nullptr, nullptr);
|
||||
checkNVRTCError(res, "Failed to create NVRTC program");
|
||||
|
||||
nvrtcAddNameExpression(prog, "test_fisher_f_kernel");
|
||||
|
||||
#ifdef BOOST_MATH_NVRTC_CI_RUN
|
||||
const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
|
||||
#else
|
||||
const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
|
||||
#endif
|
||||
|
||||
// Compile the program
|
||||
res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
|
||||
if (res != NVRTC_SUCCESS)
|
||||
{
|
||||
size_t log_size;
|
||||
nvrtcGetProgramLogSize(prog, &log_size);
|
||||
char* log = new char[log_size];
|
||||
nvrtcGetProgramLog(prog, log);
|
||||
std::cerr << "Compilation failed:\n" << log << std::endl;
|
||||
delete[] log;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
// Get PTX from the program
|
||||
size_t ptx_size;
|
||||
nvrtcGetPTXSize(prog, &ptx_size);
|
||||
char* ptx = new char[ptx_size];
|
||||
nvrtcGetPTX(prog, ptx);
|
||||
|
||||
// Load PTX into CUDA module
|
||||
CUmodule module;
|
||||
CUfunction kernel;
|
||||
checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
|
||||
checkCUError(cuModuleGetFunction(&kernel, module, "test_fisher_f_kernel"), "Failed to get kernel function");
|
||||
|
||||
int numElements = 5000;
|
||||
float_type *h_in1, *h_in2, *h_out;
|
||||
float_type *d_in1, *d_in2, *d_out;
|
||||
|
||||
// Allocate memory on the host
|
||||
h_in1 = new float_type[numElements];
|
||||
h_in2 = new float_type[numElements];
|
||||
h_out = new float_type[numElements];
|
||||
|
||||
// Initialize input arrays
|
||||
std::mt19937_64 rng(42);
|
||||
std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
|
||||
for (int i = 0; i < numElements; ++i)
|
||||
{
|
||||
h_in1[i] = static_cast<float_type>(dist(rng));
|
||||
h_in2[i] = static_cast<float_type>(dist(rng));
|
||||
}
|
||||
|
||||
checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
|
||||
checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
|
||||
checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
|
||||
|
||||
checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
|
||||
checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
|
||||
|
||||
int blockSize = 256;
|
||||
int numBlocks = (numElements + blockSize - 1) / blockSize;
|
||||
void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
|
||||
checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
|
||||
|
||||
checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
|
||||
|
||||
// Verify Result
|
||||
for (int i = 0; i < numElements; ++i)
|
||||
{
|
||||
auto res = cdf(boost::math::fisher_f_distribution<float_type>(1, 1), h_in1[i]);
|
||||
|
||||
if (boost::math::isfinite(res))
|
||||
{
|
||||
if (boost::math::epsilon_difference(res, h_out[i]) > 300)
|
||||
{
|
||||
std::cout << "error at line: " << i
|
||||
<< "\nParallel: " << h_out[i]
|
||||
<< "\n Serial: " << res
|
||||
<< "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cudaFree(d_in1);
|
||||
cudaFree(d_in2);
|
||||
cudaFree(d_out);
|
||||
delete[] h_in1;
|
||||
delete[] h_in2;
|
||||
delete[] h_out;
|
||||
|
||||
nvrtcDestroyProgram(&prog);
|
||||
delete[] ptx;
|
||||
|
||||
cuCtxDestroy(context);
|
||||
|
||||
std::cout << "Kernel executed successfully." << std::endl;
|
||||
return 0;
|
||||
}
|
||||
catch(const std::exception& e)
|
||||
{
|
||||
std::cerr << "Stopped with exception: " << e.what() << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
}
|
||||
109
test/test_fisher_f_pdf_double.cu
Normal file
109
test/test_fisher_f_pdf_double.cu
Normal file
@@ -0,0 +1,109 @@
|
||||
// Copyright John Maddock 2016.
|
||||
// Use, modification and distribution are subject to the
|
||||
// Boost Software License, Version 1.0. (See accompanying file
|
||||
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
|
||||
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <vector>
|
||||
#include <boost/math/distributions/fisher_f.hpp>
|
||||
#include <boost/math/special_functions/relative_difference.hpp>
|
||||
#include <boost/random/mersenne_twister.hpp>
|
||||
#include <boost/random/uniform_real_distribution.hpp>
|
||||
#include "cuda_managed_ptr.hpp"
|
||||
#include "stopwatch.hpp"
|
||||
|
||||
// For the CUDA runtime routines (prefixed with "cuda_")
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
typedef double float_type;
|
||||
|
||||
/**
|
||||
* CUDA Kernel Device code
|
||||
*
|
||||
*/
|
||||
__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
|
||||
{
|
||||
using std::cos;
|
||||
int i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
|
||||
if (i < numElements)
|
||||
{
|
||||
out[i] = pdf(boost::math::fisher_f_distribution<float_type>(1, 1), in1[i]);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Host main routine
|
||||
*/
|
||||
int main(void)
|
||||
{
|
||||
try{
|
||||
|
||||
// Error code to check return values for CUDA calls
|
||||
cudaError_t err = cudaSuccess;
|
||||
|
||||
// Print the vector length to be used, and compute its size
|
||||
int numElements = 50000;
|
||||
std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
|
||||
|
||||
// Allocate the managed input vector A
|
||||
cuda_managed_ptr<float_type> input_vector1(numElements);
|
||||
|
||||
// Allocate the managed output vector C
|
||||
cuda_managed_ptr<float_type> output_vector(numElements);
|
||||
|
||||
boost::random::mt19937 gen;
|
||||
boost::random::uniform_real_distribution<float_type> dist;
|
||||
// Initialize the input vectors
|
||||
for (int i = 0; i < numElements; ++i)
|
||||
{
|
||||
input_vector1[i] = dist(gen);
|
||||
}
|
||||
|
||||
// Launch the Vector Add CUDA Kernel
|
||||
int threadsPerBlock = 256;
|
||||
int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
|
||||
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
|
||||
|
||||
watch w;
|
||||
cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
|
||||
cudaDeviceSynchronize();
|
||||
std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
|
||||
|
||||
err = cudaGetLastError();
|
||||
if (err != cudaSuccess)
|
||||
{
|
||||
std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
// Verify that the result vector is correct
|
||||
std::vector<float_type> results;
|
||||
results.reserve(numElements);
|
||||
w.reset();
|
||||
for(int i = 0; i < numElements; ++i)
|
||||
results.push_back(pdf(boost::math::fisher_f_distribution<float_type>(1, 1), input_vector1[i]));
|
||||
double t = w.elapsed();
|
||||
// check the results
|
||||
for(int i = 0; i < numElements; ++i)
|
||||
{
|
||||
if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
|
||||
{
|
||||
std::cerr << "Result verification failed at element " << i << "!" << std::endl;
|
||||
std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
|
||||
std::cout << "Done\n";
|
||||
}
|
||||
catch(const std::exception& e)
|
||||
{
|
||||
std::cerr << "Stopped with exception: " << e.what() << std::endl;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
109
test/test_fisher_f_pdf_float.cu
Normal file
109
test/test_fisher_f_pdf_float.cu
Normal file
@@ -0,0 +1,109 @@
|
||||
// Copyright John Maddock 2016.
|
||||
// Use, modification and distribution are subject to the
|
||||
// Boost Software License, Version 1.0. (See accompanying file
|
||||
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
|
||||
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <vector>
|
||||
#include <boost/math/distributions/fisher_f.hpp>
|
||||
#include <boost/math/special_functions/relative_difference.hpp>
|
||||
#include <boost/random/mersenne_twister.hpp>
|
||||
#include <boost/random/uniform_real_distribution.hpp>
|
||||
#include "cuda_managed_ptr.hpp"
|
||||
#include "stopwatch.hpp"
|
||||
|
||||
// For the CUDA runtime routines (prefixed with "cuda_")
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
typedef float float_type;
|
||||
|
||||
/**
|
||||
* CUDA Kernel Device code
|
||||
*
|
||||
*/
|
||||
__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
|
||||
{
|
||||
using std::cos;
|
||||
int i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
|
||||
if (i < numElements)
|
||||
{
|
||||
out[i] = pdf(boost::math::fisher_f_distribution<float_type>(1, 1), in1[i]);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Host main routine
|
||||
*/
|
||||
int main(void)
|
||||
{
|
||||
try{
|
||||
|
||||
// Error code to check return values for CUDA calls
|
||||
cudaError_t err = cudaSuccess;
|
||||
|
||||
// Print the vector length to be used, and compute its size
|
||||
int numElements = 50000;
|
||||
std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
|
||||
|
||||
// Allocate the managed input vector A
|
||||
cuda_managed_ptr<float_type> input_vector1(numElements);
|
||||
|
||||
// Allocate the managed output vector C
|
||||
cuda_managed_ptr<float_type> output_vector(numElements);
|
||||
|
||||
boost::random::mt19937 gen;
|
||||
boost::random::uniform_real_distribution<float_type> dist;
|
||||
// Initialize the input vectors
|
||||
for (int i = 0; i < numElements; ++i)
|
||||
{
|
||||
input_vector1[i] = dist(gen);
|
||||
}
|
||||
|
||||
// Launch the Vector Add CUDA Kernel
|
||||
int threadsPerBlock = 256;
|
||||
int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
|
||||
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
|
||||
|
||||
watch w;
|
||||
cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
|
||||
cudaDeviceSynchronize();
|
||||
std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
|
||||
|
||||
err = cudaGetLastError();
|
||||
if (err != cudaSuccess)
|
||||
{
|
||||
std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
// Verify that the result vector is correct
|
||||
std::vector<float_type> results;
|
||||
results.reserve(numElements);
|
||||
w.reset();
|
||||
for(int i = 0; i < numElements; ++i)
|
||||
results.push_back(pdf(boost::math::fisher_f_distribution<float_type>(1, 1), input_vector1[i]));
|
||||
double t = w.elapsed();
|
||||
// check the results
|
||||
for(int i = 0; i < numElements; ++i)
|
||||
{
|
||||
if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
|
||||
{
|
||||
std::cerr << "Result verification failed at element " << i << "!" << std::endl;
|
||||
std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
|
||||
std::cout << "Done\n";
|
||||
}
|
||||
catch(const std::exception& e)
|
||||
{
|
||||
std::cerr << "Stopped with exception: " << e.what() << std::endl;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
191
test/test_fisher_f_pdf_nvrtc_double.cpp
Normal file
191
test/test_fisher_f_pdf_nvrtc_double.cpp
Normal file
@@ -0,0 +1,191 @@
|
||||
// Copyright John Maddock 2016.
|
||||
// Copyright Matt Borland 2024.
|
||||
// Use, modification and distribution are subject to the
|
||||
// Boost Software License, Version 1.0. (See accompanying file
|
||||
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
|
||||
#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
|
||||
|
||||
// Must be included first
|
||||
#include <nvrtc.h>
|
||||
#include <cuda.h>
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <vector>
|
||||
#include <random>
|
||||
#include <exception>
|
||||
#include <boost/math/distributions/fisher_f.hpp>
|
||||
#include <boost/math/special_functions/fpclassify.hpp>
|
||||
#include <boost/math/special_functions/relative_difference.hpp>
|
||||
|
||||
typedef double float_type;
|
||||
|
||||
const char* cuda_kernel = R"(
|
||||
typedef double float_type;
|
||||
#include <cuda/std/type_traits>
|
||||
#include <boost/math/distributions/fisher_f.hpp>
|
||||
extern "C" __global__
|
||||
void test_fisher_f_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
|
||||
{
|
||||
int i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (i < numElements)
|
||||
{
|
||||
out[i] = pdf(boost::math::fisher_f_distribution<float_type>(1, 1), in1[i]);
|
||||
}
|
||||
}
|
||||
)";
|
||||
|
||||
void checkCUDAError(cudaError_t result, const char* msg)
|
||||
{
|
||||
if (result != cudaSuccess)
|
||||
{
|
||||
std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
void checkCUError(CUresult result, const char* msg)
|
||||
{
|
||||
if (result != CUDA_SUCCESS)
|
||||
{
|
||||
const char* errorStr;
|
||||
cuGetErrorString(result, &errorStr);
|
||||
std::cerr << msg << ": " << errorStr << std::endl;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
void checkNVRTCError(nvrtcResult result, const char* msg)
|
||||
{
|
||||
if (result != NVRTC_SUCCESS)
|
||||
{
|
||||
std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
try
|
||||
{
|
||||
// Initialize CUDA driver API
|
||||
checkCUError(cuInit(0), "Failed to initialize CUDA");
|
||||
|
||||
// Create CUDA context
|
||||
CUcontext context;
|
||||
CUdevice device;
|
||||
checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
|
||||
checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
|
||||
|
||||
nvrtcProgram prog;
|
||||
nvrtcResult res;
|
||||
|
||||
res = nvrtcCreateProgram(&prog, cuda_kernel, "test_fisher_f_kernel.cu", 0, nullptr, nullptr);
|
||||
checkNVRTCError(res, "Failed to create NVRTC program");
|
||||
|
||||
nvrtcAddNameExpression(prog, "test_fisher_f_kernel");
|
||||
|
||||
#ifdef BOOST_MATH_NVRTC_CI_RUN
|
||||
const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
|
||||
#else
|
||||
const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
|
||||
#endif
|
||||
|
||||
// Compile the program
|
||||
res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
|
||||
if (res != NVRTC_SUCCESS)
|
||||
{
|
||||
size_t log_size;
|
||||
nvrtcGetProgramLogSize(prog, &log_size);
|
||||
char* log = new char[log_size];
|
||||
nvrtcGetProgramLog(prog, log);
|
||||
std::cerr << "Compilation failed:\n" << log << std::endl;
|
||||
delete[] log;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
// Get PTX from the program
|
||||
size_t ptx_size;
|
||||
nvrtcGetPTXSize(prog, &ptx_size);
|
||||
char* ptx = new char[ptx_size];
|
||||
nvrtcGetPTX(prog, ptx);
|
||||
|
||||
// Load PTX into CUDA module
|
||||
CUmodule module;
|
||||
CUfunction kernel;
|
||||
checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
|
||||
checkCUError(cuModuleGetFunction(&kernel, module, "test_fisher_f_kernel"), "Failed to get kernel function");
|
||||
|
||||
int numElements = 5000;
|
||||
float_type *h_in1, *h_in2, *h_out;
|
||||
float_type *d_in1, *d_in2, *d_out;
|
||||
|
||||
// Allocate memory on the host
|
||||
h_in1 = new float_type[numElements];
|
||||
h_in2 = new float_type[numElements];
|
||||
h_out = new float_type[numElements];
|
||||
|
||||
// Initialize input arrays
|
||||
std::mt19937_64 rng(42);
|
||||
std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
|
||||
for (int i = 0; i < numElements; ++i)
|
||||
{
|
||||
h_in1[i] = static_cast<float_type>(dist(rng));
|
||||
h_in2[i] = static_cast<float_type>(dist(rng));
|
||||
}
|
||||
|
||||
checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
|
||||
checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
|
||||
checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
|
||||
|
||||
checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
|
||||
checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
|
||||
|
||||
int blockSize = 256;
|
||||
int numBlocks = (numElements + blockSize - 1) / blockSize;
|
||||
void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
|
||||
checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
|
||||
|
||||
checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
|
||||
|
||||
// Verify Result
|
||||
for (int i = 0; i < numElements; ++i)
|
||||
{
|
||||
auto res = pdf(boost::math::fisher_f_distribution<float_type>(1, 1), h_in1[i]);
|
||||
|
||||
if (boost::math::isfinite(res))
|
||||
{
|
||||
if (boost::math::epsilon_difference(res, h_out[i]) > 300)
|
||||
{
|
||||
std::cout << "error at line: " << i
|
||||
<< "\nParallel: " << h_out[i]
|
||||
<< "\n Serial: " << res
|
||||
<< "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cudaFree(d_in1);
|
||||
cudaFree(d_in2);
|
||||
cudaFree(d_out);
|
||||
delete[] h_in1;
|
||||
delete[] h_in2;
|
||||
delete[] h_out;
|
||||
|
||||
nvrtcDestroyProgram(&prog);
|
||||
delete[] ptx;
|
||||
|
||||
cuCtxDestroy(context);
|
||||
|
||||
std::cout << "Kernel executed successfully." << std::endl;
|
||||
return 0;
|
||||
}
|
||||
catch(const std::exception& e)
|
||||
{
|
||||
std::cerr << "Stopped with exception: " << e.what() << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
}
|
||||
191
test/test_fisher_f_pdf_nvrtc_float.cpp
Normal file
191
test/test_fisher_f_pdf_nvrtc_float.cpp
Normal file
@@ -0,0 +1,191 @@
|
||||
// Copyright John Maddock 2016.
|
||||
// Copyright Matt Borland 2024.
|
||||
// Use, modification and distribution are subject to the
|
||||
// Boost Software License, Version 1.0. (See accompanying file
|
||||
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
|
||||
#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
|
||||
|
||||
// Must be included first
|
||||
#include <nvrtc.h>
|
||||
#include <cuda.h>
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <vector>
|
||||
#include <random>
|
||||
#include <exception>
|
||||
#include <boost/math/distributions/fisher_f.hpp>
|
||||
#include <boost/math/special_functions/fpclassify.hpp>
|
||||
#include <boost/math/special_functions/relative_difference.hpp>
|
||||
|
||||
typedef float float_type;
|
||||
|
||||
const char* cuda_kernel = R"(
|
||||
typedef float float_type;
|
||||
#include <cuda/std/type_traits>
|
||||
#include <boost/math/distributions/fisher_f.hpp>
|
||||
extern "C" __global__
|
||||
void test_fisher_f_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
|
||||
{
|
||||
int i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (i < numElements)
|
||||
{
|
||||
out[i] = pdf(boost::math::fisher_f_distribution<float_type>(1, 1), in1[i]);
|
||||
}
|
||||
}
|
||||
)";
|
||||
|
||||
void checkCUDAError(cudaError_t result, const char* msg)
|
||||
{
|
||||
if (result != cudaSuccess)
|
||||
{
|
||||
std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
void checkCUError(CUresult result, const char* msg)
|
||||
{
|
||||
if (result != CUDA_SUCCESS)
|
||||
{
|
||||
const char* errorStr;
|
||||
cuGetErrorString(result, &errorStr);
|
||||
std::cerr << msg << ": " << errorStr << std::endl;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
void checkNVRTCError(nvrtcResult result, const char* msg)
|
||||
{
|
||||
if (result != NVRTC_SUCCESS)
|
||||
{
|
||||
std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
try
|
||||
{
|
||||
// Initialize CUDA driver API
|
||||
checkCUError(cuInit(0), "Failed to initialize CUDA");
|
||||
|
||||
// Create CUDA context
|
||||
CUcontext context;
|
||||
CUdevice device;
|
||||
checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
|
||||
checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
|
||||
|
||||
nvrtcProgram prog;
|
||||
nvrtcResult res;
|
||||
|
||||
res = nvrtcCreateProgram(&prog, cuda_kernel, "test_fisher_f_kernel.cu", 0, nullptr, nullptr);
|
||||
checkNVRTCError(res, "Failed to create NVRTC program");
|
||||
|
||||
nvrtcAddNameExpression(prog, "test_fisher_f_kernel");
|
||||
|
||||
#ifdef BOOST_MATH_NVRTC_CI_RUN
|
||||
const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
|
||||
#else
|
||||
const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
|
||||
#endif
|
||||
|
||||
// Compile the program
|
||||
res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
|
||||
if (res != NVRTC_SUCCESS)
|
||||
{
|
||||
size_t log_size;
|
||||
nvrtcGetProgramLogSize(prog, &log_size);
|
||||
char* log = new char[log_size];
|
||||
nvrtcGetProgramLog(prog, log);
|
||||
std::cerr << "Compilation failed:\n" << log << std::endl;
|
||||
delete[] log;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
// Get PTX from the program
|
||||
size_t ptx_size;
|
||||
nvrtcGetPTXSize(prog, &ptx_size);
|
||||
char* ptx = new char[ptx_size];
|
||||
nvrtcGetPTX(prog, ptx);
|
||||
|
||||
// Load PTX into CUDA module
|
||||
CUmodule module;
|
||||
CUfunction kernel;
|
||||
checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
|
||||
checkCUError(cuModuleGetFunction(&kernel, module, "test_fisher_f_kernel"), "Failed to get kernel function");
|
||||
|
||||
int numElements = 5000;
|
||||
float_type *h_in1, *h_in2, *h_out;
|
||||
float_type *d_in1, *d_in2, *d_out;
|
||||
|
||||
// Allocate memory on the host
|
||||
h_in1 = new float_type[numElements];
|
||||
h_in2 = new float_type[numElements];
|
||||
h_out = new float_type[numElements];
|
||||
|
||||
// Initialize input arrays
|
||||
std::mt19937_64 rng(42);
|
||||
std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
|
||||
for (int i = 0; i < numElements; ++i)
|
||||
{
|
||||
h_in1[i] = static_cast<float_type>(dist(rng));
|
||||
h_in2[i] = static_cast<float_type>(dist(rng));
|
||||
}
|
||||
|
||||
checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
|
||||
checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
|
||||
checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
|
||||
|
||||
checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
|
||||
checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
|
||||
|
||||
int blockSize = 256;
|
||||
int numBlocks = (numElements + blockSize - 1) / blockSize;
|
||||
void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
|
||||
checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
|
||||
|
||||
checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
|
||||
|
||||
// Verify Result
|
||||
for (int i = 0; i < numElements; ++i)
|
||||
{
|
||||
auto res = pdf(boost::math::fisher_f_distribution<float_type>(1, 1), h_in1[i]);
|
||||
|
||||
if (boost::math::isfinite(res))
|
||||
{
|
||||
if (boost::math::epsilon_difference(res, h_out[i]) > 300)
|
||||
{
|
||||
std::cout << "error at line: " << i
|
||||
<< "\nParallel: " << h_out[i]
|
||||
<< "\n Serial: " << res
|
||||
<< "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cudaFree(d_in1);
|
||||
cudaFree(d_in2);
|
||||
cudaFree(d_out);
|
||||
delete[] h_in1;
|
||||
delete[] h_in2;
|
||||
delete[] h_out;
|
||||
|
||||
nvrtcDestroyProgram(&prog);
|
||||
delete[] ptx;
|
||||
|
||||
cuCtxDestroy(context);
|
||||
|
||||
std::cout << "Kernel executed successfully." << std::endl;
|
||||
return 0;
|
||||
}
|
||||
catch(const std::exception& e)
|
||||
{
|
||||
std::cerr << "Stopped with exception: " << e.what() << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
}
|
||||
109
test/test_fisher_f_quan_double.cu
Normal file
109
test/test_fisher_f_quan_double.cu
Normal file
@@ -0,0 +1,109 @@
|
||||
// Copyright John Maddock 2016.
|
||||
// Use, modification and distribution are subject to the
|
||||
// Boost Software License, Version 1.0. (See accompanying file
|
||||
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
|
||||
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <vector>
|
||||
#include <boost/math/distributions/fisher_f.hpp>
|
||||
#include <boost/math/special_functions/relative_difference.hpp>
|
||||
#include <boost/random/mersenne_twister.hpp>
|
||||
#include <boost/random/uniform_real_distribution.hpp>
|
||||
#include "cuda_managed_ptr.hpp"
|
||||
#include "stopwatch.hpp"
|
||||
|
||||
// For the CUDA runtime routines (prefixed with "cuda_")
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
typedef double float_type;
|
||||
|
||||
/**
|
||||
* CUDA Kernel Device code
|
||||
*
|
||||
*/
|
||||
__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
|
||||
{
|
||||
using std::cos;
|
||||
int i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
|
||||
if (i < numElements)
|
||||
{
|
||||
out[i] = quantile(boost::math::fisher_f_distribution<float_type>(1, 1), in1[i]);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Host main routine
|
||||
*/
|
||||
int main(void)
|
||||
{
|
||||
try{
|
||||
|
||||
// Error code to check return values for CUDA calls
|
||||
cudaError_t err = cudaSuccess;
|
||||
|
||||
// Print the vector length to be used, and compute its size
|
||||
int numElements = 50000;
|
||||
std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
|
||||
|
||||
// Allocate the managed input vector A
|
||||
cuda_managed_ptr<float_type> input_vector1(numElements);
|
||||
|
||||
// Allocate the managed output vector C
|
||||
cuda_managed_ptr<float_type> output_vector(numElements);
|
||||
|
||||
boost::random::mt19937 gen;
|
||||
boost::random::uniform_real_distribution<float_type> dist;
|
||||
// Initialize the input vectors
|
||||
for (int i = 0; i < numElements; ++i)
|
||||
{
|
||||
input_vector1[i] = dist(gen);
|
||||
}
|
||||
|
||||
// Launch the Vector Add CUDA Kernel
|
||||
int threadsPerBlock = 256;
|
||||
int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
|
||||
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
|
||||
|
||||
watch w;
|
||||
cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
|
||||
cudaDeviceSynchronize();
|
||||
std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
|
||||
|
||||
err = cudaGetLastError();
|
||||
if (err != cudaSuccess)
|
||||
{
|
||||
std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
// Verify that the result vector is correct
|
||||
std::vector<float_type> results;
|
||||
results.reserve(numElements);
|
||||
w.reset();
|
||||
for(int i = 0; i < numElements; ++i)
|
||||
results.push_back(quantile(boost::math::fisher_f_distribution<float_type>(1, 1), input_vector1[i]));
|
||||
double t = w.elapsed();
|
||||
// check the results
|
||||
for(int i = 0; i < numElements; ++i)
|
||||
{
|
||||
if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
|
||||
{
|
||||
std::cerr << "Result verification failed at element " << i << "!" << std::endl;
|
||||
std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
|
||||
std::cout << "Done\n";
|
||||
}
|
||||
catch(const std::exception& e)
|
||||
{
|
||||
std::cerr << "Stopped with exception: " << e.what() << std::endl;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
109
test/test_fisher_f_quan_float.cu
Normal file
109
test/test_fisher_f_quan_float.cu
Normal file
@@ -0,0 +1,109 @@
|
||||
// Copyright John Maddock 2016.
|
||||
// Use, modification and distribution are subject to the
|
||||
// Boost Software License, Version 1.0. (See accompanying file
|
||||
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
|
||||
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <vector>
|
||||
#include <boost/math/distributions/fisher_f.hpp>
|
||||
#include <boost/math/special_functions/relative_difference.hpp>
|
||||
#include <boost/random/mersenne_twister.hpp>
|
||||
#include <boost/random/uniform_real_distribution.hpp>
|
||||
#include "cuda_managed_ptr.hpp"
|
||||
#include "stopwatch.hpp"
|
||||
|
||||
// For the CUDA runtime routines (prefixed with "cuda_")
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
typedef float float_type;
|
||||
|
||||
/**
|
||||
* CUDA Kernel Device code
|
||||
*
|
||||
*/
|
||||
__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
|
||||
{
|
||||
using std::cos;
|
||||
int i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
|
||||
if (i < numElements)
|
||||
{
|
||||
out[i] = quantile(boost::math::fisher_f_distribution<float_type>(1, 1), in1[i]);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Host main routine
|
||||
*/
|
||||
int main(void)
|
||||
{
|
||||
try{
|
||||
|
||||
// Error code to check return values for CUDA calls
|
||||
cudaError_t err = cudaSuccess;
|
||||
|
||||
// Print the vector length to be used, and compute its size
|
||||
int numElements = 50000;
|
||||
std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
|
||||
|
||||
// Allocate the managed input vector A
|
||||
cuda_managed_ptr<float_type> input_vector1(numElements);
|
||||
|
||||
// Allocate the managed output vector C
|
||||
cuda_managed_ptr<float_type> output_vector(numElements);
|
||||
|
||||
boost::random::mt19937 gen;
|
||||
boost::random::uniform_real_distribution<float_type> dist;
|
||||
// Initialize the input vectors
|
||||
for (int i = 0; i < numElements; ++i)
|
||||
{
|
||||
input_vector1[i] = dist(gen);
|
||||
}
|
||||
|
||||
// Launch the Vector Add CUDA Kernel
|
||||
int threadsPerBlock = 256;
|
||||
int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
|
||||
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
|
||||
|
||||
watch w;
|
||||
cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
|
||||
cudaDeviceSynchronize();
|
||||
std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
|
||||
|
||||
err = cudaGetLastError();
|
||||
if (err != cudaSuccess)
|
||||
{
|
||||
std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
// Verify that the result vector is correct
|
||||
std::vector<float_type> results;
|
||||
results.reserve(numElements);
|
||||
w.reset();
|
||||
for(int i = 0; i < numElements; ++i)
|
||||
results.push_back(quantile(boost::math::fisher_f_distribution<float_type>(1, 1), input_vector1[i]));
|
||||
double t = w.elapsed();
|
||||
// check the results
|
||||
for(int i = 0; i < numElements; ++i)
|
||||
{
|
||||
if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
|
||||
{
|
||||
std::cerr << "Result verification failed at element " << i << "!" << std::endl;
|
||||
std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
|
||||
std::cout << "Done\n";
|
||||
}
|
||||
catch(const std::exception& e)
|
||||
{
|
||||
std::cerr << "Stopped with exception: " << e.what() << std::endl;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
191
test/test_fisher_f_quan_nvrtc_double.cpp
Normal file
191
test/test_fisher_f_quan_nvrtc_double.cpp
Normal file
@@ -0,0 +1,191 @@
|
||||
// Copyright John Maddock 2016.
|
||||
// Copyright Matt Borland 2024.
|
||||
// Use, modification and distribution are subject to the
|
||||
// Boost Software License, Version 1.0. (See accompanying file
|
||||
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
|
||||
#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
|
||||
|
||||
// Must be included first
|
||||
#include <nvrtc.h>
|
||||
#include <cuda.h>
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <vector>
|
||||
#include <random>
|
||||
#include <exception>
|
||||
#include <boost/math/distributions/fisher_f.hpp>
|
||||
#include <boost/math/special_functions/fpclassify.hpp>
|
||||
#include <boost/math/special_functions/relative_difference.hpp>
|
||||
|
||||
typedef double float_type;
|
||||
|
||||
const char* cuda_kernel = R"(
|
||||
typedef double float_type;
|
||||
#include <cuda/std/type_traits>
|
||||
#include <boost/math/distributions/fisher_f.hpp>
|
||||
extern "C" __global__
|
||||
void test_fisher_f_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
|
||||
{
|
||||
int i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (i < numElements)
|
||||
{
|
||||
out[i] = quantile(boost::math::fisher_f_distribution<float_type>(1, 1), in1[i]);
|
||||
}
|
||||
}
|
||||
)";
|
||||
|
||||
void checkCUDAError(cudaError_t result, const char* msg)
|
||||
{
|
||||
if (result != cudaSuccess)
|
||||
{
|
||||
std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
void checkCUError(CUresult result, const char* msg)
|
||||
{
|
||||
if (result != CUDA_SUCCESS)
|
||||
{
|
||||
const char* errorStr;
|
||||
cuGetErrorString(result, &errorStr);
|
||||
std::cerr << msg << ": " << errorStr << std::endl;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
void checkNVRTCError(nvrtcResult result, const char* msg)
|
||||
{
|
||||
if (result != NVRTC_SUCCESS)
|
||||
{
|
||||
std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
try
|
||||
{
|
||||
// Initialize CUDA driver API
|
||||
checkCUError(cuInit(0), "Failed to initialize CUDA");
|
||||
|
||||
// Create CUDA context
|
||||
CUcontext context;
|
||||
CUdevice device;
|
||||
checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
|
||||
checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
|
||||
|
||||
nvrtcProgram prog;
|
||||
nvrtcResult res;
|
||||
|
||||
res = nvrtcCreateProgram(&prog, cuda_kernel, "test_fisher_f_kernel.cu", 0, nullptr, nullptr);
|
||||
checkNVRTCError(res, "Failed to create NVRTC program");
|
||||
|
||||
nvrtcAddNameExpression(prog, "test_fisher_f_kernel");
|
||||
|
||||
#ifdef BOOST_MATH_NVRTC_CI_RUN
|
||||
const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
|
||||
#else
|
||||
const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
|
||||
#endif
|
||||
|
||||
// Compile the program
|
||||
res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
|
||||
if (res != NVRTC_SUCCESS)
|
||||
{
|
||||
size_t log_size;
|
||||
nvrtcGetProgramLogSize(prog, &log_size);
|
||||
char* log = new char[log_size];
|
||||
nvrtcGetProgramLog(prog, log);
|
||||
std::cerr << "Compilation failed:\n" << log << std::endl;
|
||||
delete[] log;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
// Get PTX from the program
|
||||
size_t ptx_size;
|
||||
nvrtcGetPTXSize(prog, &ptx_size);
|
||||
char* ptx = new char[ptx_size];
|
||||
nvrtcGetPTX(prog, ptx);
|
||||
|
||||
// Load PTX into CUDA module
|
||||
CUmodule module;
|
||||
CUfunction kernel;
|
||||
checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
|
||||
checkCUError(cuModuleGetFunction(&kernel, module, "test_fisher_f_kernel"), "Failed to get kernel function");
|
||||
|
||||
int numElements = 5000;
|
||||
float_type *h_in1, *h_in2, *h_out;
|
||||
float_type *d_in1, *d_in2, *d_out;
|
||||
|
||||
// Allocate memory on the host
|
||||
h_in1 = new float_type[numElements];
|
||||
h_in2 = new float_type[numElements];
|
||||
h_out = new float_type[numElements];
|
||||
|
||||
// Initialize input arrays
|
||||
std::mt19937_64 rng(42);
|
||||
std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
|
||||
for (int i = 0; i < numElements; ++i)
|
||||
{
|
||||
h_in1[i] = static_cast<float_type>(dist(rng));
|
||||
h_in2[i] = static_cast<float_type>(dist(rng));
|
||||
}
|
||||
|
||||
checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
|
||||
checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
|
||||
checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
|
||||
|
||||
checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
|
||||
checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
|
||||
|
||||
int blockSize = 256;
|
||||
int numBlocks = (numElements + blockSize - 1) / blockSize;
|
||||
void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
|
||||
checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
|
||||
|
||||
checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
|
||||
|
||||
// Verify Result
|
||||
for (int i = 0; i < numElements; ++i)
|
||||
{
|
||||
auto res = quantile(boost::math::fisher_f_distribution<float_type>(1, 1), h_in1[i]);
|
||||
|
||||
if (boost::math::isfinite(res))
|
||||
{
|
||||
if (boost::math::epsilon_difference(res, h_out[i]) > 300)
|
||||
{
|
||||
std::cout << "error at line: " << i
|
||||
<< "\nParallel: " << h_out[i]
|
||||
<< "\n Serial: " << res
|
||||
<< "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cudaFree(d_in1);
|
||||
cudaFree(d_in2);
|
||||
cudaFree(d_out);
|
||||
delete[] h_in1;
|
||||
delete[] h_in2;
|
||||
delete[] h_out;
|
||||
|
||||
nvrtcDestroyProgram(&prog);
|
||||
delete[] ptx;
|
||||
|
||||
cuCtxDestroy(context);
|
||||
|
||||
std::cout << "Kernel executed successfully." << std::endl;
|
||||
return 0;
|
||||
}
|
||||
catch(const std::exception& e)
|
||||
{
|
||||
std::cerr << "Stopped with exception: " << e.what() << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
}
|
||||
191
test/test_fisher_f_quan_nvrtc_float.cpp
Normal file
191
test/test_fisher_f_quan_nvrtc_float.cpp
Normal file
@@ -0,0 +1,191 @@
|
||||
// Copyright John Maddock 2016.
|
||||
// Copyright Matt Borland 2024.
|
||||
// Use, modification and distribution are subject to the
|
||||
// Boost Software License, Version 1.0. (See accompanying file
|
||||
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
|
||||
#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
|
||||
|
||||
// Must be included first
|
||||
#include <nvrtc.h>
|
||||
#include <cuda.h>
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <vector>
|
||||
#include <random>
|
||||
#include <exception>
|
||||
#include <boost/math/distributions/fisher_f.hpp>
|
||||
#include <boost/math/special_functions/fpclassify.hpp>
|
||||
#include <boost/math/special_functions/relative_difference.hpp>
|
||||
|
||||
typedef float float_type;
|
||||
|
||||
const char* cuda_kernel = R"(
|
||||
typedef float float_type;
|
||||
#include <cuda/std/type_traits>
|
||||
#include <boost/math/distributions/fisher_f.hpp>
|
||||
extern "C" __global__
|
||||
void test_fisher_f_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
|
||||
{
|
||||
int i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (i < numElements)
|
||||
{
|
||||
out[i] = quantile(boost::math::fisher_f_distribution<float_type>(1, 1), in1[i]);
|
||||
}
|
||||
}
|
||||
)";
|
||||
|
||||
void checkCUDAError(cudaError_t result, const char* msg)
|
||||
{
|
||||
if (result != cudaSuccess)
|
||||
{
|
||||
std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
void checkCUError(CUresult result, const char* msg)
|
||||
{
|
||||
if (result != CUDA_SUCCESS)
|
||||
{
|
||||
const char* errorStr;
|
||||
cuGetErrorString(result, &errorStr);
|
||||
std::cerr << msg << ": " << errorStr << std::endl;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
void checkNVRTCError(nvrtcResult result, const char* msg)
|
||||
{
|
||||
if (result != NVRTC_SUCCESS)
|
||||
{
|
||||
std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
try
|
||||
{
|
||||
// Initialize CUDA driver API
|
||||
checkCUError(cuInit(0), "Failed to initialize CUDA");
|
||||
|
||||
// Create CUDA context
|
||||
CUcontext context;
|
||||
CUdevice device;
|
||||
checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
|
||||
checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
|
||||
|
||||
nvrtcProgram prog;
|
||||
nvrtcResult res;
|
||||
|
||||
res = nvrtcCreateProgram(&prog, cuda_kernel, "test_fisher_f_kernel.cu", 0, nullptr, nullptr);
|
||||
checkNVRTCError(res, "Failed to create NVRTC program");
|
||||
|
||||
nvrtcAddNameExpression(prog, "test_fisher_f_kernel");
|
||||
|
||||
#ifdef BOOST_MATH_NVRTC_CI_RUN
|
||||
const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
|
||||
#else
|
||||
const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
|
||||
#endif
|
||||
|
||||
// Compile the program
|
||||
res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
|
||||
if (res != NVRTC_SUCCESS)
|
||||
{
|
||||
size_t log_size;
|
||||
nvrtcGetProgramLogSize(prog, &log_size);
|
||||
char* log = new char[log_size];
|
||||
nvrtcGetProgramLog(prog, log);
|
||||
std::cerr << "Compilation failed:\n" << log << std::endl;
|
||||
delete[] log;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
// Get PTX from the program
|
||||
size_t ptx_size;
|
||||
nvrtcGetPTXSize(prog, &ptx_size);
|
||||
char* ptx = new char[ptx_size];
|
||||
nvrtcGetPTX(prog, ptx);
|
||||
|
||||
// Load PTX into CUDA module
|
||||
CUmodule module;
|
||||
CUfunction kernel;
|
||||
checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
|
||||
checkCUError(cuModuleGetFunction(&kernel, module, "test_fisher_f_kernel"), "Failed to get kernel function");
|
||||
|
||||
int numElements = 5000;
|
||||
float_type *h_in1, *h_in2, *h_out;
|
||||
float_type *d_in1, *d_in2, *d_out;
|
||||
|
||||
// Allocate memory on the host
|
||||
h_in1 = new float_type[numElements];
|
||||
h_in2 = new float_type[numElements];
|
||||
h_out = new float_type[numElements];
|
||||
|
||||
// Initialize input arrays
|
||||
std::mt19937_64 rng(42);
|
||||
std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
|
||||
for (int i = 0; i < numElements; ++i)
|
||||
{
|
||||
h_in1[i] = static_cast<float_type>(dist(rng));
|
||||
h_in2[i] = static_cast<float_type>(dist(rng));
|
||||
}
|
||||
|
||||
checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
|
||||
checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
|
||||
checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
|
||||
|
||||
checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
|
||||
checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
|
||||
|
||||
int blockSize = 256;
|
||||
int numBlocks = (numElements + blockSize - 1) / blockSize;
|
||||
void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
|
||||
checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
|
||||
|
||||
checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
|
||||
|
||||
// Verify Result
|
||||
for (int i = 0; i < numElements; ++i)
|
||||
{
|
||||
auto res = quantile(boost::math::fisher_f_distribution<float_type>(1, 1), h_in1[i]);
|
||||
|
||||
if (boost::math::isfinite(res))
|
||||
{
|
||||
if (boost::math::epsilon_difference(res, h_out[i]) > 300)
|
||||
{
|
||||
std::cout << "error at line: " << i
|
||||
<< "\nParallel: " << h_out[i]
|
||||
<< "\n Serial: " << res
|
||||
<< "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cudaFree(d_in1);
|
||||
cudaFree(d_in2);
|
||||
cudaFree(d_out);
|
||||
delete[] h_in1;
|
||||
delete[] h_in2;
|
||||
delete[] h_out;
|
||||
|
||||
nvrtcDestroyProgram(&prog);
|
||||
delete[] ptx;
|
||||
|
||||
cuCtxDestroy(context);
|
||||
|
||||
std::cout << "Kernel executed successfully." << std::endl;
|
||||
return 0;
|
||||
}
|
||||
catch(const std::exception& e)
|
||||
{
|
||||
std::cerr << "Stopped with exception: " << e.what() << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
}
|
||||
@@ -15,16 +15,23 @@
|
||||
// From MathWorld--A Wolfram Web Resource.
|
||||
// http://mathworld.wolfram.com/GammaDistribution.html
|
||||
|
||||
#ifndef SYCL_LANGUAGE_VERSION
|
||||
#include <pch.hpp> // include directory libs/math/src/tr1/ is needed.
|
||||
#endif
|
||||
|
||||
#include <boost/math/tools/config.hpp>
|
||||
|
||||
#ifndef BOOST_MATH_NO_REAL_CONCEPT_TESTS
|
||||
#include <boost/math/concepts/real_concept.hpp> // for real_concept
|
||||
#endif
|
||||
|
||||
#define BOOST_TEST_MAIN
|
||||
#include <boost/test/unit_test.hpp> // Boost.Test
|
||||
#include <boost/test/tools/floating_point_comparison.hpp>
|
||||
|
||||
#include <boost/math/distributions/gamma.hpp>
|
||||
using boost::math::gamma_distribution;
|
||||
#include <boost/math/tools/test.hpp>
|
||||
#include "../include_private/boost/math/tools/test.hpp"
|
||||
#include "test_out_of_range.hpp"
|
||||
|
||||
#include <iostream>
|
||||
|
||||
109
test/test_gamma_dist_cdf_double.cu
Normal file
109
test/test_gamma_dist_cdf_double.cu
Normal file
@@ -0,0 +1,109 @@
|
||||
// Copyright John Maddock 2016.
|
||||
// Use, modification and distribution are subject to the
|
||||
// Boost Software License, Version 1.0. (See accompanying file
|
||||
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
|
||||
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <vector>
|
||||
#include <boost/math/distributions/gamma.hpp>
|
||||
#include <boost/math/special_functions/relative_difference.hpp>
|
||||
#include <boost/random/mersenne_twister.hpp>
|
||||
#include <boost/random/uniform_real_distribution.hpp>
|
||||
#include "cuda_managed_ptr.hpp"
|
||||
#include "stopwatch.hpp"
|
||||
|
||||
// For the CUDA runtime routines (prefixed with "cuda_")
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
typedef double float_type;
|
||||
|
||||
/**
|
||||
* CUDA Kernel Device code
|
||||
*
|
||||
*/
|
||||
__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
|
||||
{
|
||||
using std::cos;
|
||||
int i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
|
||||
if (i < numElements)
|
||||
{
|
||||
out[i] = cdf(boost::math::gamma_distribution<float_type>(1, 1), in1[i]);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Host main routine
|
||||
*/
|
||||
int main(void)
|
||||
{
|
||||
try{
|
||||
|
||||
// Error code to check return values for CUDA calls
|
||||
cudaError_t err = cudaSuccess;
|
||||
|
||||
// Print the vector length to be used, and compute its size
|
||||
int numElements = 50000;
|
||||
std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
|
||||
|
||||
// Allocate the managed input vector A
|
||||
cuda_managed_ptr<float_type> input_vector1(numElements);
|
||||
|
||||
// Allocate the managed output vector C
|
||||
cuda_managed_ptr<float_type> output_vector(numElements);
|
||||
|
||||
boost::random::mt19937 gen;
|
||||
boost::random::uniform_real_distribution<float_type> dist;
|
||||
// Initialize the input vectors
|
||||
for (int i = 0; i < numElements; ++i)
|
||||
{
|
||||
input_vector1[i] = dist(gen);
|
||||
}
|
||||
|
||||
// Launch the Vector Add CUDA Kernel
|
||||
int threadsPerBlock = 256;
|
||||
int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
|
||||
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
|
||||
|
||||
watch w;
|
||||
cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
|
||||
cudaDeviceSynchronize();
|
||||
std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
|
||||
|
||||
err = cudaGetLastError();
|
||||
if (err != cudaSuccess)
|
||||
{
|
||||
std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
// Verify that the result vector is correct
|
||||
std::vector<float_type> results;
|
||||
results.reserve(numElements);
|
||||
w.reset();
|
||||
for(int i = 0; i < numElements; ++i)
|
||||
results.push_back(cdf(boost::math::gamma_distribution<float_type>(1, 1), input_vector1[i]));
|
||||
double t = w.elapsed();
|
||||
// check the results
|
||||
for(int i = 0; i < numElements; ++i)
|
||||
{
|
||||
if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
|
||||
{
|
||||
std::cerr << "Result verification failed at element " << i << "!" << std::endl;
|
||||
std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
|
||||
std::cout << "Done\n";
|
||||
}
|
||||
catch(const std::exception& e)
|
||||
{
|
||||
std::cerr << "Stopped with exception: " << e.what() << std::endl;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
109
test/test_gamma_dist_cdf_float.cu
Normal file
109
test/test_gamma_dist_cdf_float.cu
Normal file
@@ -0,0 +1,109 @@
|
||||
// Copyright John Maddock 2016.
|
||||
// Use, modification and distribution are subject to the
|
||||
// Boost Software License, Version 1.0. (See accompanying file
|
||||
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
|
||||
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <vector>
|
||||
#include <boost/math/distributions/gamma.hpp>
|
||||
#include <boost/math/special_functions/relative_difference.hpp>
|
||||
#include <boost/random/mersenne_twister.hpp>
|
||||
#include <boost/random/uniform_real_distribution.hpp>
|
||||
#include "cuda_managed_ptr.hpp"
|
||||
#include "stopwatch.hpp"
|
||||
|
||||
// For the CUDA runtime routines (prefixed with "cuda_")
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
typedef float float_type;
|
||||
|
||||
/**
|
||||
* CUDA Kernel Device code
|
||||
*
|
||||
*/
|
||||
__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
|
||||
{
|
||||
using std::cos;
|
||||
int i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
|
||||
if (i < numElements)
|
||||
{
|
||||
out[i] = cdf(boost::math::gamma_distribution<float_type>(1, 1), in1[i]);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Host main routine
|
||||
*/
|
||||
int main(void)
|
||||
{
|
||||
try{
|
||||
|
||||
// Error code to check return values for CUDA calls
|
||||
cudaError_t err = cudaSuccess;
|
||||
|
||||
// Print the vector length to be used, and compute its size
|
||||
int numElements = 50000;
|
||||
std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
|
||||
|
||||
// Allocate the managed input vector A
|
||||
cuda_managed_ptr<float_type> input_vector1(numElements);
|
||||
|
||||
// Allocate the managed output vector C
|
||||
cuda_managed_ptr<float_type> output_vector(numElements);
|
||||
|
||||
boost::random::mt19937 gen;
|
||||
boost::random::uniform_real_distribution<float_type> dist;
|
||||
// Initialize the input vectors
|
||||
for (int i = 0; i < numElements; ++i)
|
||||
{
|
||||
input_vector1[i] = dist(gen);
|
||||
}
|
||||
|
||||
// Launch the Vector Add CUDA Kernel
|
||||
int threadsPerBlock = 256;
|
||||
int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
|
||||
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
|
||||
|
||||
watch w;
|
||||
cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
|
||||
cudaDeviceSynchronize();
|
||||
std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
|
||||
|
||||
err = cudaGetLastError();
|
||||
if (err != cudaSuccess)
|
||||
{
|
||||
std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
// Verify that the result vector is correct
|
||||
std::vector<float_type> results;
|
||||
results.reserve(numElements);
|
||||
w.reset();
|
||||
for(int i = 0; i < numElements; ++i)
|
||||
results.push_back(cdf(boost::math::gamma_distribution<float_type>(1, 1), input_vector1[i]));
|
||||
double t = w.elapsed();
|
||||
// check the results
|
||||
for(int i = 0; i < numElements; ++i)
|
||||
{
|
||||
if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
|
||||
{
|
||||
std::cerr << "Result verification failed at element " << i << "!" << std::endl;
|
||||
std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
|
||||
std::cout << "Done\n";
|
||||
}
|
||||
catch(const std::exception& e)
|
||||
{
|
||||
std::cerr << "Stopped with exception: " << e.what() << std::endl;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
191
test/test_gamma_dist_cdf_nvrtc_double.cpp
Normal file
191
test/test_gamma_dist_cdf_nvrtc_double.cpp
Normal file
@@ -0,0 +1,191 @@
|
||||
// Copyright John Maddock 2016.
|
||||
// Copyright Matt Borland 2024.
|
||||
// Use, modification and distribution are subject to the
|
||||
// Boost Software License, Version 1.0. (See accompanying file
|
||||
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
|
||||
#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
|
||||
|
||||
// Must be included first
|
||||
#include <nvrtc.h>
|
||||
#include <cuda.h>
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <vector>
|
||||
#include <random>
|
||||
#include <exception>
|
||||
#include <boost/math/distributions/gamma.hpp>
|
||||
#include <boost/math/special_functions/fpclassify.hpp>
|
||||
#include <boost/math/special_functions/relative_difference.hpp>
|
||||
|
||||
typedef double float_type;
|
||||
|
||||
const char* cuda_kernel = R"(
|
||||
typedef double float_type;
|
||||
#include <cuda/std/type_traits>
|
||||
#include <boost/math/distributions/gamma.hpp>
|
||||
extern "C" __global__
|
||||
void test_gamma_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
|
||||
{
|
||||
int i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (i < numElements)
|
||||
{
|
||||
out[i] = cdf(boost::math::gamma_distribution<float_type>(1, 1), in1[i]);
|
||||
}
|
||||
}
|
||||
)";
|
||||
|
||||
void checkCUDAError(cudaError_t result, const char* msg)
|
||||
{
|
||||
if (result != cudaSuccess)
|
||||
{
|
||||
std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
void checkCUError(CUresult result, const char* msg)
|
||||
{
|
||||
if (result != CUDA_SUCCESS)
|
||||
{
|
||||
const char* errorStr;
|
||||
cuGetErrorString(result, &errorStr);
|
||||
std::cerr << msg << ": " << errorStr << std::endl;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
void checkNVRTCError(nvrtcResult result, const char* msg)
|
||||
{
|
||||
if (result != NVRTC_SUCCESS)
|
||||
{
|
||||
std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
try
|
||||
{
|
||||
// Initialize CUDA driver API
|
||||
checkCUError(cuInit(0), "Failed to initialize CUDA");
|
||||
|
||||
// Create CUDA context
|
||||
CUcontext context;
|
||||
CUdevice device;
|
||||
checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
|
||||
checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
|
||||
|
||||
nvrtcProgram prog;
|
||||
nvrtcResult res;
|
||||
|
||||
res = nvrtcCreateProgram(&prog, cuda_kernel, "test_gamma_kernel.cu", 0, nullptr, nullptr);
|
||||
checkNVRTCError(res, "Failed to create NVRTC program");
|
||||
|
||||
nvrtcAddNameExpression(prog, "test_gamma_kernel");
|
||||
|
||||
#ifdef BOOST_MATH_NVRTC_CI_RUN
|
||||
const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
|
||||
#else
|
||||
const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
|
||||
#endif
|
||||
|
||||
// Compile the program
|
||||
res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
|
||||
if (res != NVRTC_SUCCESS)
|
||||
{
|
||||
size_t log_size;
|
||||
nvrtcGetProgramLogSize(prog, &log_size);
|
||||
char* log = new char[log_size];
|
||||
nvrtcGetProgramLog(prog, log);
|
||||
std::cerr << "Compilation failed:\n" << log << std::endl;
|
||||
delete[] log;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
// Get PTX from the program
|
||||
size_t ptx_size;
|
||||
nvrtcGetPTXSize(prog, &ptx_size);
|
||||
char* ptx = new char[ptx_size];
|
||||
nvrtcGetPTX(prog, ptx);
|
||||
|
||||
// Load PTX into CUDA module
|
||||
CUmodule module;
|
||||
CUfunction kernel;
|
||||
checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
|
||||
checkCUError(cuModuleGetFunction(&kernel, module, "test_gamma_kernel"), "Failed to get kernel function");
|
||||
|
||||
int numElements = 5000;
|
||||
float_type *h_in1, *h_in2, *h_out;
|
||||
float_type *d_in1, *d_in2, *d_out;
|
||||
|
||||
// Allocate memory on the host
|
||||
h_in1 = new float_type[numElements];
|
||||
h_in2 = new float_type[numElements];
|
||||
h_out = new float_type[numElements];
|
||||
|
||||
// Initialize input arrays
|
||||
std::mt19937_64 rng(42);
|
||||
std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
|
||||
for (int i = 0; i < numElements; ++i)
|
||||
{
|
||||
h_in1[i] = static_cast<float_type>(dist(rng));
|
||||
h_in2[i] = static_cast<float_type>(dist(rng));
|
||||
}
|
||||
|
||||
checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
|
||||
checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
|
||||
checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
|
||||
|
||||
checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
|
||||
checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
|
||||
|
||||
int blockSize = 256;
|
||||
int numBlocks = (numElements + blockSize - 1) / blockSize;
|
||||
void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
|
||||
checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
|
||||
|
||||
checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
|
||||
|
||||
// Verify Result
|
||||
for (int i = 0; i < numElements; ++i)
|
||||
{
|
||||
auto res = cdf(boost::math::gamma_distribution<float_type>(1, 1), h_in1[i]);
|
||||
|
||||
if (boost::math::isfinite(res))
|
||||
{
|
||||
if (boost::math::epsilon_difference(res, h_out[i]) > 300)
|
||||
{
|
||||
std::cout << "error at line: " << i
|
||||
<< "\nParallel: " << h_out[i]
|
||||
<< "\n Serial: " << res
|
||||
<< "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cudaFree(d_in1);
|
||||
cudaFree(d_in2);
|
||||
cudaFree(d_out);
|
||||
delete[] h_in1;
|
||||
delete[] h_in2;
|
||||
delete[] h_out;
|
||||
|
||||
nvrtcDestroyProgram(&prog);
|
||||
delete[] ptx;
|
||||
|
||||
cuCtxDestroy(context);
|
||||
|
||||
std::cout << "Kernel executed successfully." << std::endl;
|
||||
return 0;
|
||||
}
|
||||
catch(const std::exception& e)
|
||||
{
|
||||
std::cerr << "Stopped with exception: " << e.what() << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
}
|
||||
191
test/test_gamma_dist_cdf_nvrtc_float.cpp
Normal file
191
test/test_gamma_dist_cdf_nvrtc_float.cpp
Normal file
@@ -0,0 +1,191 @@
|
||||
// Copyright John Maddock 2016.
|
||||
// Copyright Matt Borland 2024.
|
||||
// Use, modification and distribution are subject to the
|
||||
// Boost Software License, Version 1.0. (See accompanying file
|
||||
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
|
||||
#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
|
||||
|
||||
// Must be included first
|
||||
#include <nvrtc.h>
|
||||
#include <cuda.h>
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <vector>
|
||||
#include <random>
|
||||
#include <exception>
|
||||
#include <boost/math/distributions/gamma.hpp>
|
||||
#include <boost/math/special_functions/fpclassify.hpp>
|
||||
#include <boost/math/special_functions/relative_difference.hpp>
|
||||
|
||||
typedef float float_type;
|
||||
|
||||
const char* cuda_kernel = R"(
|
||||
typedef float float_type;
|
||||
#include <cuda/std/type_traits>
|
||||
#include <boost/math/distributions/gamma.hpp>
|
||||
extern "C" __global__
|
||||
void test_gamma_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
|
||||
{
|
||||
int i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (i < numElements)
|
||||
{
|
||||
out[i] = cdf(boost::math::gamma_distribution<float_type>(1, 1), in1[i]);
|
||||
}
|
||||
}
|
||||
)";
|
||||
|
||||
void checkCUDAError(cudaError_t result, const char* msg)
|
||||
{
|
||||
if (result != cudaSuccess)
|
||||
{
|
||||
std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
void checkCUError(CUresult result, const char* msg)
|
||||
{
|
||||
if (result != CUDA_SUCCESS)
|
||||
{
|
||||
const char* errorStr;
|
||||
cuGetErrorString(result, &errorStr);
|
||||
std::cerr << msg << ": " << errorStr << std::endl;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
void checkNVRTCError(nvrtcResult result, const char* msg)
|
||||
{
|
||||
if (result != NVRTC_SUCCESS)
|
||||
{
|
||||
std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
try
|
||||
{
|
||||
// Initialize CUDA driver API
|
||||
checkCUError(cuInit(0), "Failed to initialize CUDA");
|
||||
|
||||
// Create CUDA context
|
||||
CUcontext context;
|
||||
CUdevice device;
|
||||
checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
|
||||
checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
|
||||
|
||||
nvrtcProgram prog;
|
||||
nvrtcResult res;
|
||||
|
||||
res = nvrtcCreateProgram(&prog, cuda_kernel, "test_gamma_kernel.cu", 0, nullptr, nullptr);
|
||||
checkNVRTCError(res, "Failed to create NVRTC program");
|
||||
|
||||
nvrtcAddNameExpression(prog, "test_gamma_kernel");
|
||||
|
||||
#ifdef BOOST_MATH_NVRTC_CI_RUN
|
||||
const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
|
||||
#else
|
||||
const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
|
||||
#endif
|
||||
|
||||
// Compile the program
|
||||
res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
|
||||
if (res != NVRTC_SUCCESS)
|
||||
{
|
||||
size_t log_size;
|
||||
nvrtcGetProgramLogSize(prog, &log_size);
|
||||
char* log = new char[log_size];
|
||||
nvrtcGetProgramLog(prog, log);
|
||||
std::cerr << "Compilation failed:\n" << log << std::endl;
|
||||
delete[] log;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
// Get PTX from the program
|
||||
size_t ptx_size;
|
||||
nvrtcGetPTXSize(prog, &ptx_size);
|
||||
char* ptx = new char[ptx_size];
|
||||
nvrtcGetPTX(prog, ptx);
|
||||
|
||||
// Load PTX into CUDA module
|
||||
CUmodule module;
|
||||
CUfunction kernel;
|
||||
checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
|
||||
checkCUError(cuModuleGetFunction(&kernel, module, "test_gamma_kernel"), "Failed to get kernel function");
|
||||
|
||||
int numElements = 5000;
|
||||
float_type *h_in1, *h_in2, *h_out;
|
||||
float_type *d_in1, *d_in2, *d_out;
|
||||
|
||||
// Allocate memory on the host
|
||||
h_in1 = new float_type[numElements];
|
||||
h_in2 = new float_type[numElements];
|
||||
h_out = new float_type[numElements];
|
||||
|
||||
// Initialize input arrays
|
||||
std::mt19937_64 rng(42);
|
||||
std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
|
||||
for (int i = 0; i < numElements; ++i)
|
||||
{
|
||||
h_in1[i] = static_cast<float_type>(dist(rng));
|
||||
h_in2[i] = static_cast<float_type>(dist(rng));
|
||||
}
|
||||
|
||||
checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
|
||||
checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
|
||||
checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
|
||||
|
||||
checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
|
||||
checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
|
||||
|
||||
int blockSize = 256;
|
||||
int numBlocks = (numElements + blockSize - 1) / blockSize;
|
||||
void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
|
||||
checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
|
||||
|
||||
checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
|
||||
|
||||
// Verify Result
|
||||
for (int i = 0; i < numElements; ++i)
|
||||
{
|
||||
auto res = cdf(boost::math::gamma_distribution<float_type>(1, 1), h_in1[i]);
|
||||
|
||||
if (boost::math::isfinite(res))
|
||||
{
|
||||
if (boost::math::epsilon_difference(res, h_out[i]) > 300)
|
||||
{
|
||||
std::cout << "error at line: " << i
|
||||
<< "\nParallel: " << h_out[i]
|
||||
<< "\n Serial: " << res
|
||||
<< "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cudaFree(d_in1);
|
||||
cudaFree(d_in2);
|
||||
cudaFree(d_out);
|
||||
delete[] h_in1;
|
||||
delete[] h_in2;
|
||||
delete[] h_out;
|
||||
|
||||
nvrtcDestroyProgram(&prog);
|
||||
delete[] ptx;
|
||||
|
||||
cuCtxDestroy(context);
|
||||
|
||||
std::cout << "Kernel executed successfully." << std::endl;
|
||||
return 0;
|
||||
}
|
||||
catch(const std::exception& e)
|
||||
{
|
||||
std::cerr << "Stopped with exception: " << e.what() << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
}
|
||||
109
test/test_gamma_dist_pdf_double.cu
Normal file
109
test/test_gamma_dist_pdf_double.cu
Normal file
@@ -0,0 +1,109 @@
|
||||
// Copyright John Maddock 2016.
|
||||
// Use, modification and distribution are subject to the
|
||||
// Boost Software License, Version 1.0. (See accompanying file
|
||||
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
|
||||
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <vector>
|
||||
#include <boost/math/distributions/gamma.hpp>
|
||||
#include <boost/math/special_functions/relative_difference.hpp>
|
||||
#include <boost/random/mersenne_twister.hpp>
|
||||
#include <boost/random/uniform_real_distribution.hpp>
|
||||
#include "cuda_managed_ptr.hpp"
|
||||
#include "stopwatch.hpp"
|
||||
|
||||
// For the CUDA runtime routines (prefixed with "cuda_")
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
typedef double float_type;
|
||||
|
||||
/**
|
||||
* CUDA Kernel Device code
|
||||
*
|
||||
*/
|
||||
__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
|
||||
{
|
||||
using std::cos;
|
||||
int i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
|
||||
if (i < numElements)
|
||||
{
|
||||
out[i] = pdf(boost::math::gamma_distribution<float_type>(1, 1), in1[i]);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Host main routine
|
||||
*/
|
||||
int main(void)
|
||||
{
|
||||
try{
|
||||
|
||||
// Error code to check return values for CUDA calls
|
||||
cudaError_t err = cudaSuccess;
|
||||
|
||||
// Print the vector length to be used, and compute its size
|
||||
int numElements = 50000;
|
||||
std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
|
||||
|
||||
// Allocate the managed input vector A
|
||||
cuda_managed_ptr<float_type> input_vector1(numElements);
|
||||
|
||||
// Allocate the managed output vector C
|
||||
cuda_managed_ptr<float_type> output_vector(numElements);
|
||||
|
||||
boost::random::mt19937 gen;
|
||||
boost::random::uniform_real_distribution<float_type> dist;
|
||||
// Initialize the input vectors
|
||||
for (int i = 0; i < numElements; ++i)
|
||||
{
|
||||
input_vector1[i] = dist(gen);
|
||||
}
|
||||
|
||||
// Launch the Vector Add CUDA Kernel
|
||||
int threadsPerBlock = 256;
|
||||
int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
|
||||
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
|
||||
|
||||
watch w;
|
||||
cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
|
||||
cudaDeviceSynchronize();
|
||||
std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
|
||||
|
||||
err = cudaGetLastError();
|
||||
if (err != cudaSuccess)
|
||||
{
|
||||
std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
// Verify that the result vector is correct
|
||||
std::vector<float_type> results;
|
||||
results.reserve(numElements);
|
||||
w.reset();
|
||||
for(int i = 0; i < numElements; ++i)
|
||||
results.push_back(pdf(boost::math::gamma_distribution<float_type>(1, 1), input_vector1[i]));
|
||||
double t = w.elapsed();
|
||||
// check the results
|
||||
for(int i = 0; i < numElements; ++i)
|
||||
{
|
||||
if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
|
||||
{
|
||||
std::cerr << "Result verification failed at element " << i << "!" << std::endl;
|
||||
std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
|
||||
std::cout << "Done\n";
|
||||
}
|
||||
catch(const std::exception& e)
|
||||
{
|
||||
std::cerr << "Stopped with exception: " << e.what() << std::endl;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
109
test/test_gamma_dist_pdf_float.cu
Normal file
109
test/test_gamma_dist_pdf_float.cu
Normal file
@@ -0,0 +1,109 @@
|
||||
// Copyright John Maddock 2016.
|
||||
// Use, modification and distribution are subject to the
|
||||
// Boost Software License, Version 1.0. (See accompanying file
|
||||
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
|
||||
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <vector>
|
||||
#include <boost/math/distributions/gamma.hpp>
|
||||
#include <boost/math/special_functions/relative_difference.hpp>
|
||||
#include <boost/random/mersenne_twister.hpp>
|
||||
#include <boost/random/uniform_real_distribution.hpp>
|
||||
#include "cuda_managed_ptr.hpp"
|
||||
#include "stopwatch.hpp"
|
||||
|
||||
// For the CUDA runtime routines (prefixed with "cuda_")
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
typedef float float_type;
|
||||
|
||||
/**
|
||||
* CUDA Kernel Device code
|
||||
*
|
||||
*/
|
||||
__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
|
||||
{
|
||||
using std::cos;
|
||||
int i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
|
||||
if (i < numElements)
|
||||
{
|
||||
out[i] = pdf(boost::math::gamma_distribution<float_type>(1, 1), in1[i]);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Host main routine
|
||||
*/
|
||||
int main(void)
|
||||
{
|
||||
try{
|
||||
|
||||
// Error code to check return values for CUDA calls
|
||||
cudaError_t err = cudaSuccess;
|
||||
|
||||
// Print the vector length to be used, and compute its size
|
||||
int numElements = 50000;
|
||||
std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
|
||||
|
||||
// Allocate the managed input vector A
|
||||
cuda_managed_ptr<float_type> input_vector1(numElements);
|
||||
|
||||
// Allocate the managed output vector C
|
||||
cuda_managed_ptr<float_type> output_vector(numElements);
|
||||
|
||||
boost::random::mt19937 gen;
|
||||
boost::random::uniform_real_distribution<float_type> dist;
|
||||
// Initialize the input vectors
|
||||
for (int i = 0; i < numElements; ++i)
|
||||
{
|
||||
input_vector1[i] = dist(gen);
|
||||
}
|
||||
|
||||
// Launch the Vector Add CUDA Kernel
|
||||
int threadsPerBlock = 256;
|
||||
int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
|
||||
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
|
||||
|
||||
watch w;
|
||||
cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
|
||||
cudaDeviceSynchronize();
|
||||
std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
|
||||
|
||||
err = cudaGetLastError();
|
||||
if (err != cudaSuccess)
|
||||
{
|
||||
std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
// Verify that the result vector is correct
|
||||
std::vector<float_type> results;
|
||||
results.reserve(numElements);
|
||||
w.reset();
|
||||
for(int i = 0; i < numElements; ++i)
|
||||
results.push_back(pdf(boost::math::gamma_distribution<float_type>(1, 1), input_vector1[i]));
|
||||
double t = w.elapsed();
|
||||
// check the results
|
||||
for(int i = 0; i < numElements; ++i)
|
||||
{
|
||||
if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
|
||||
{
|
||||
std::cerr << "Result verification failed at element " << i << "!" << std::endl;
|
||||
std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
|
||||
std::cout << "Done\n";
|
||||
}
|
||||
catch(const std::exception& e)
|
||||
{
|
||||
std::cerr << "Stopped with exception: " << e.what() << std::endl;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
191
test/test_gamma_dist_pdf_nvrtc_double.cpp
Normal file
191
test/test_gamma_dist_pdf_nvrtc_double.cpp
Normal file
@@ -0,0 +1,191 @@
|
||||
// Copyright John Maddock 2016.
|
||||
// Copyright Matt Borland 2024.
|
||||
// Use, modification and distribution are subject to the
|
||||
// Boost Software License, Version 1.0. (See accompanying file
|
||||
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
|
||||
#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
|
||||
|
||||
// Must be included first
|
||||
#include <nvrtc.h>
|
||||
#include <cuda.h>
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <vector>
|
||||
#include <random>
|
||||
#include <exception>
|
||||
#include <boost/math/distributions/gamma.hpp>
|
||||
#include <boost/math/special_functions/fpclassify.hpp>
|
||||
#include <boost/math/special_functions/relative_difference.hpp>
|
||||
|
||||
typedef double float_type;
|
||||
|
||||
const char* cuda_kernel = R"(
|
||||
typedef double float_type;
|
||||
#include <cuda/std/type_traits>
|
||||
#include <boost/math/distributions/gamma.hpp>
|
||||
extern "C" __global__
|
||||
void test_gamma_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
|
||||
{
|
||||
int i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (i < numElements)
|
||||
{
|
||||
out[i] = pdf(boost::math::gamma_distribution<float_type>(1, 1), in1[i]);
|
||||
}
|
||||
}
|
||||
)";
|
||||
|
||||
void checkCUDAError(cudaError_t result, const char* msg)
|
||||
{
|
||||
if (result != cudaSuccess)
|
||||
{
|
||||
std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
void checkCUError(CUresult result, const char* msg)
|
||||
{
|
||||
if (result != CUDA_SUCCESS)
|
||||
{
|
||||
const char* errorStr;
|
||||
cuGetErrorString(result, &errorStr);
|
||||
std::cerr << msg << ": " << errorStr << std::endl;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
void checkNVRTCError(nvrtcResult result, const char* msg)
|
||||
{
|
||||
if (result != NVRTC_SUCCESS)
|
||||
{
|
||||
std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
try
|
||||
{
|
||||
// Initialize CUDA driver API
|
||||
checkCUError(cuInit(0), "Failed to initialize CUDA");
|
||||
|
||||
// Create CUDA context
|
||||
CUcontext context;
|
||||
CUdevice device;
|
||||
checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
|
||||
checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
|
||||
|
||||
nvrtcProgram prog;
|
||||
nvrtcResult res;
|
||||
|
||||
res = nvrtcCreateProgram(&prog, cuda_kernel, "test_gamma_kernel.cu", 0, nullptr, nullptr);
|
||||
checkNVRTCError(res, "Failed to create NVRTC program");
|
||||
|
||||
nvrtcAddNameExpression(prog, "test_gamma_kernel");
|
||||
|
||||
#ifdef BOOST_MATH_NVRTC_CI_RUN
|
||||
const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
|
||||
#else
|
||||
const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
|
||||
#endif
|
||||
|
||||
// Compile the program
|
||||
res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
|
||||
if (res != NVRTC_SUCCESS)
|
||||
{
|
||||
size_t log_size;
|
||||
nvrtcGetProgramLogSize(prog, &log_size);
|
||||
char* log = new char[log_size];
|
||||
nvrtcGetProgramLog(prog, log);
|
||||
std::cerr << "Compilation failed:\n" << log << std::endl;
|
||||
delete[] log;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
// Get PTX from the program
|
||||
size_t ptx_size;
|
||||
nvrtcGetPTXSize(prog, &ptx_size);
|
||||
char* ptx = new char[ptx_size];
|
||||
nvrtcGetPTX(prog, ptx);
|
||||
|
||||
// Load PTX into CUDA module
|
||||
CUmodule module;
|
||||
CUfunction kernel;
|
||||
checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
|
||||
checkCUError(cuModuleGetFunction(&kernel, module, "test_gamma_kernel"), "Failed to get kernel function");
|
||||
|
||||
int numElements = 5000;
|
||||
float_type *h_in1, *h_in2, *h_out;
|
||||
float_type *d_in1, *d_in2, *d_out;
|
||||
|
||||
// Allocate memory on the host
|
||||
h_in1 = new float_type[numElements];
|
||||
h_in2 = new float_type[numElements];
|
||||
h_out = new float_type[numElements];
|
||||
|
||||
// Initialize input arrays
|
||||
std::mt19937_64 rng(42);
|
||||
std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
|
||||
for (int i = 0; i < numElements; ++i)
|
||||
{
|
||||
h_in1[i] = static_cast<float_type>(dist(rng));
|
||||
h_in2[i] = static_cast<float_type>(dist(rng));
|
||||
}
|
||||
|
||||
checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
|
||||
checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
|
||||
checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
|
||||
|
||||
checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
|
||||
checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
|
||||
|
||||
int blockSize = 256;
|
||||
int numBlocks = (numElements + blockSize - 1) / blockSize;
|
||||
void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
|
||||
checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
|
||||
|
||||
checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
|
||||
|
||||
// Verify Result
|
||||
for (int i = 0; i < numElements; ++i)
|
||||
{
|
||||
auto res = pdf(boost::math::gamma_distribution<float_type>(1, 1), h_in1[i]);
|
||||
|
||||
if (boost::math::isfinite(res))
|
||||
{
|
||||
if (boost::math::epsilon_difference(res, h_out[i]) > 300)
|
||||
{
|
||||
std::cout << "error at line: " << i
|
||||
<< "\nParallel: " << h_out[i]
|
||||
<< "\n Serial: " << res
|
||||
<< "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cudaFree(d_in1);
|
||||
cudaFree(d_in2);
|
||||
cudaFree(d_out);
|
||||
delete[] h_in1;
|
||||
delete[] h_in2;
|
||||
delete[] h_out;
|
||||
|
||||
nvrtcDestroyProgram(&prog);
|
||||
delete[] ptx;
|
||||
|
||||
cuCtxDestroy(context);
|
||||
|
||||
std::cout << "Kernel executed successfully." << std::endl;
|
||||
return 0;
|
||||
}
|
||||
catch(const std::exception& e)
|
||||
{
|
||||
std::cerr << "Stopped with exception: " << e.what() << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
}
|
||||
191
test/test_gamma_dist_pdf_nvrtc_float.cpp
Normal file
191
test/test_gamma_dist_pdf_nvrtc_float.cpp
Normal file
@@ -0,0 +1,191 @@
|
||||
// Copyright John Maddock 2016.
|
||||
// Copyright Matt Borland 2024.
|
||||
// Use, modification and distribution are subject to the
|
||||
// Boost Software License, Version 1.0. (See accompanying file
|
||||
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
|
||||
#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
|
||||
|
||||
// Must be included first
|
||||
#include <nvrtc.h>
|
||||
#include <cuda.h>
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <vector>
|
||||
#include <random>
|
||||
#include <exception>
|
||||
#include <boost/math/distributions/gamma.hpp>
|
||||
#include <boost/math/special_functions/fpclassify.hpp>
|
||||
#include <boost/math/special_functions/relative_difference.hpp>
|
||||
|
||||
typedef float float_type;
|
||||
|
||||
const char* cuda_kernel = R"(
|
||||
typedef float float_type;
|
||||
#include <cuda/std/type_traits>
|
||||
#include <boost/math/distributions/gamma.hpp>
|
||||
extern "C" __global__
|
||||
void test_gamma_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
|
||||
{
|
||||
int i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (i < numElements)
|
||||
{
|
||||
out[i] = pdf(boost::math::gamma_distribution<float_type>(1, 1), in1[i]);
|
||||
}
|
||||
}
|
||||
)";
|
||||
|
||||
void checkCUDAError(cudaError_t result, const char* msg)
|
||||
{
|
||||
if (result != cudaSuccess)
|
||||
{
|
||||
std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
void checkCUError(CUresult result, const char* msg)
|
||||
{
|
||||
if (result != CUDA_SUCCESS)
|
||||
{
|
||||
const char* errorStr;
|
||||
cuGetErrorString(result, &errorStr);
|
||||
std::cerr << msg << ": " << errorStr << std::endl;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
void checkNVRTCError(nvrtcResult result, const char* msg)
|
||||
{
|
||||
if (result != NVRTC_SUCCESS)
|
||||
{
|
||||
std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
try
|
||||
{
|
||||
// Initialize CUDA driver API
|
||||
checkCUError(cuInit(0), "Failed to initialize CUDA");
|
||||
|
||||
// Create CUDA context
|
||||
CUcontext context;
|
||||
CUdevice device;
|
||||
checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
|
||||
checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
|
||||
|
||||
nvrtcProgram prog;
|
||||
nvrtcResult res;
|
||||
|
||||
res = nvrtcCreateProgram(&prog, cuda_kernel, "test_gamma_kernel.cu", 0, nullptr, nullptr);
|
||||
checkNVRTCError(res, "Failed to create NVRTC program");
|
||||
|
||||
nvrtcAddNameExpression(prog, "test_gamma_kernel");
|
||||
|
||||
#ifdef BOOST_MATH_NVRTC_CI_RUN
|
||||
const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
|
||||
#else
|
||||
const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
|
||||
#endif
|
||||
|
||||
// Compile the program
|
||||
res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
|
||||
if (res != NVRTC_SUCCESS)
|
||||
{
|
||||
size_t log_size;
|
||||
nvrtcGetProgramLogSize(prog, &log_size);
|
||||
char* log = new char[log_size];
|
||||
nvrtcGetProgramLog(prog, log);
|
||||
std::cerr << "Compilation failed:\n" << log << std::endl;
|
||||
delete[] log;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
// Get PTX from the program
|
||||
size_t ptx_size;
|
||||
nvrtcGetPTXSize(prog, &ptx_size);
|
||||
char* ptx = new char[ptx_size];
|
||||
nvrtcGetPTX(prog, ptx);
|
||||
|
||||
// Load PTX into CUDA module
|
||||
CUmodule module;
|
||||
CUfunction kernel;
|
||||
checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
|
||||
checkCUError(cuModuleGetFunction(&kernel, module, "test_gamma_kernel"), "Failed to get kernel function");
|
||||
|
||||
int numElements = 5000;
|
||||
float_type *h_in1, *h_in2, *h_out;
|
||||
float_type *d_in1, *d_in2, *d_out;
|
||||
|
||||
// Allocate memory on the host
|
||||
h_in1 = new float_type[numElements];
|
||||
h_in2 = new float_type[numElements];
|
||||
h_out = new float_type[numElements];
|
||||
|
||||
// Initialize input arrays
|
||||
std::mt19937_64 rng(42);
|
||||
std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
|
||||
for (int i = 0; i < numElements; ++i)
|
||||
{
|
||||
h_in1[i] = static_cast<float_type>(dist(rng));
|
||||
h_in2[i] = static_cast<float_type>(dist(rng));
|
||||
}
|
||||
|
||||
checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
|
||||
checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
|
||||
checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
|
||||
|
||||
checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
|
||||
checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
|
||||
|
||||
int blockSize = 256;
|
||||
int numBlocks = (numElements + blockSize - 1) / blockSize;
|
||||
void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
|
||||
checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
|
||||
|
||||
checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
|
||||
|
||||
// Verify Result
|
||||
for (int i = 0; i < numElements; ++i)
|
||||
{
|
||||
auto res = pdf(boost::math::gamma_distribution<float_type>(1, 1), h_in1[i]);
|
||||
|
||||
if (boost::math::isfinite(res))
|
||||
{
|
||||
if (boost::math::epsilon_difference(res, h_out[i]) > 300)
|
||||
{
|
||||
std::cout << "error at line: " << i
|
||||
<< "\nParallel: " << h_out[i]
|
||||
<< "\n Serial: " << res
|
||||
<< "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cudaFree(d_in1);
|
||||
cudaFree(d_in2);
|
||||
cudaFree(d_out);
|
||||
delete[] h_in1;
|
||||
delete[] h_in2;
|
||||
delete[] h_out;
|
||||
|
||||
nvrtcDestroyProgram(&prog);
|
||||
delete[] ptx;
|
||||
|
||||
cuCtxDestroy(context);
|
||||
|
||||
std::cout << "Kernel executed successfully." << std::endl;
|
||||
return 0;
|
||||
}
|
||||
catch(const std::exception& e)
|
||||
{
|
||||
std::cerr << "Stopped with exception: " << e.what() << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
}
|
||||
109
test/test_gamma_dist_quan_double.cu
Normal file
109
test/test_gamma_dist_quan_double.cu
Normal file
@@ -0,0 +1,109 @@
|
||||
// Copyright John Maddock 2016.
|
||||
// Use, modification and distribution are subject to the
|
||||
// Boost Software License, Version 1.0. (See accompanying file
|
||||
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
|
||||
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <vector>
|
||||
#include <boost/math/distributions/gamma.hpp>
|
||||
#include <boost/math/special_functions/relative_difference.hpp>
|
||||
#include <boost/random/mersenne_twister.hpp>
|
||||
#include <boost/random/uniform_real_distribution.hpp>
|
||||
#include "cuda_managed_ptr.hpp"
|
||||
#include "stopwatch.hpp"
|
||||
|
||||
// For the CUDA runtime routines (prefixed with "cuda_")
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
typedef double float_type;
|
||||
|
||||
/**
|
||||
* CUDA Kernel Device code
|
||||
*
|
||||
*/
|
||||
__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
|
||||
{
|
||||
using std::cos;
|
||||
int i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
|
||||
if (i < numElements)
|
||||
{
|
||||
out[i] = quantile(boost::math::gamma_distribution<float_type>(1, 1), in1[i]);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Host main routine
|
||||
*/
|
||||
int main(void)
|
||||
{
|
||||
try{
|
||||
|
||||
// Error code to check return values for CUDA calls
|
||||
cudaError_t err = cudaSuccess;
|
||||
|
||||
// Print the vector length to be used, and compute its size
|
||||
int numElements = 50000;
|
||||
std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
|
||||
|
||||
// Allocate the managed input vector A
|
||||
cuda_managed_ptr<float_type> input_vector1(numElements);
|
||||
|
||||
// Allocate the managed output vector C
|
||||
cuda_managed_ptr<float_type> output_vector(numElements);
|
||||
|
||||
boost::random::mt19937 gen;
|
||||
boost::random::uniform_real_distribution<float_type> dist;
|
||||
// Initialize the input vectors
|
||||
for (int i = 0; i < numElements; ++i)
|
||||
{
|
||||
input_vector1[i] = dist(gen);
|
||||
}
|
||||
|
||||
// Launch the Vector Add CUDA Kernel
|
||||
int threadsPerBlock = 256;
|
||||
int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
|
||||
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
|
||||
|
||||
watch w;
|
||||
cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
|
||||
cudaDeviceSynchronize();
|
||||
std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
|
||||
|
||||
err = cudaGetLastError();
|
||||
if (err != cudaSuccess)
|
||||
{
|
||||
std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
// Verify that the result vector is correct
|
||||
std::vector<float_type> results;
|
||||
results.reserve(numElements);
|
||||
w.reset();
|
||||
for(int i = 0; i < numElements; ++i)
|
||||
results.push_back(quantile(boost::math::gamma_distribution<float_type>(1, 1), input_vector1[i]));
|
||||
double t = w.elapsed();
|
||||
// check the results
|
||||
for(int i = 0; i < numElements; ++i)
|
||||
{
|
||||
if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
|
||||
{
|
||||
std::cerr << "Result verification failed at element " << i << "!" << std::endl;
|
||||
std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
|
||||
std::cout << "Done\n";
|
||||
}
|
||||
catch(const std::exception& e)
|
||||
{
|
||||
std::cerr << "Stopped with exception: " << e.what() << std::endl;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
109
test/test_gamma_dist_quan_float.cu
Normal file
109
test/test_gamma_dist_quan_float.cu
Normal file
@@ -0,0 +1,109 @@
|
||||
// Copyright John Maddock 2016.
|
||||
// Use, modification and distribution are subject to the
|
||||
// Boost Software License, Version 1.0. (See accompanying file
|
||||
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
|
||||
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <vector>
|
||||
#include <boost/math/distributions/gamma.hpp>
|
||||
#include <boost/math/special_functions/relative_difference.hpp>
|
||||
#include <boost/random/mersenne_twister.hpp>
|
||||
#include <boost/random/uniform_real_distribution.hpp>
|
||||
#include "cuda_managed_ptr.hpp"
|
||||
#include "stopwatch.hpp"
|
||||
|
||||
// For the CUDA runtime routines (prefixed with "cuda_")
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
typedef float float_type;
|
||||
|
||||
/**
|
||||
* CUDA Kernel Device code
|
||||
*
|
||||
*/
|
||||
__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
|
||||
{
|
||||
using std::cos;
|
||||
int i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
|
||||
if (i < numElements)
|
||||
{
|
||||
out[i] = quantile(boost::math::gamma_distribution<float_type>(1, 1), in1[i]);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Host main routine
|
||||
*/
|
||||
int main(void)
|
||||
{
|
||||
try{
|
||||
|
||||
// Error code to check return values for CUDA calls
|
||||
cudaError_t err = cudaSuccess;
|
||||
|
||||
// Print the vector length to be used, and compute its size
|
||||
int numElements = 50000;
|
||||
std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
|
||||
|
||||
// Allocate the managed input vector A
|
||||
cuda_managed_ptr<float_type> input_vector1(numElements);
|
||||
|
||||
// Allocate the managed output vector C
|
||||
cuda_managed_ptr<float_type> output_vector(numElements);
|
||||
|
||||
boost::random::mt19937 gen;
|
||||
boost::random::uniform_real_distribution<float_type> dist;
|
||||
// Initialize the input vectors
|
||||
for (int i = 0; i < numElements; ++i)
|
||||
{
|
||||
input_vector1[i] = dist(gen);
|
||||
}
|
||||
|
||||
// Launch the Vector Add CUDA Kernel
|
||||
int threadsPerBlock = 256;
|
||||
int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
|
||||
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
|
||||
|
||||
watch w;
|
||||
cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
|
||||
cudaDeviceSynchronize();
|
||||
std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
|
||||
|
||||
err = cudaGetLastError();
|
||||
if (err != cudaSuccess)
|
||||
{
|
||||
std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
// Verify that the result vector is correct
|
||||
std::vector<float_type> results;
|
||||
results.reserve(numElements);
|
||||
w.reset();
|
||||
for(int i = 0; i < numElements; ++i)
|
||||
results.push_back(quantile(boost::math::gamma_distribution<float_type>(1, 1), input_vector1[i]));
|
||||
double t = w.elapsed();
|
||||
// check the results
|
||||
for(int i = 0; i < numElements; ++i)
|
||||
{
|
||||
if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
|
||||
{
|
||||
std::cerr << "Result verification failed at element " << i << "!" << std::endl;
|
||||
std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
|
||||
std::cout << "Done\n";
|
||||
}
|
||||
catch(const std::exception& e)
|
||||
{
|
||||
std::cerr << "Stopped with exception: " << e.what() << std::endl;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
191
test/test_gamma_dist_quan_nvrtc_double.cpp
Normal file
191
test/test_gamma_dist_quan_nvrtc_double.cpp
Normal file
@@ -0,0 +1,191 @@
|
||||
// Copyright John Maddock 2016.
|
||||
// Copyright Matt Borland 2024.
|
||||
// Use, modification and distribution are subject to the
|
||||
// Boost Software License, Version 1.0. (See accompanying file
|
||||
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
|
||||
#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
|
||||
|
||||
// Must be included first
|
||||
#include <nvrtc.h>
|
||||
#include <cuda.h>
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <vector>
|
||||
#include <random>
|
||||
#include <exception>
|
||||
#include <boost/math/distributions/gamma.hpp>
|
||||
#include <boost/math/special_functions/fpclassify.hpp>
|
||||
#include <boost/math/special_functions/relative_difference.hpp>
|
||||
|
||||
typedef double float_type;
|
||||
|
||||
const char* cuda_kernel = R"(
|
||||
typedef double float_type;
|
||||
#include <cuda/std/type_traits>
|
||||
#include <boost/math/distributions/gamma.hpp>
|
||||
extern "C" __global__
|
||||
void test_gamma_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
|
||||
{
|
||||
int i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (i < numElements)
|
||||
{
|
||||
out[i] = quantile(boost::math::gamma_distribution<float_type>(1, 1), in1[i]);
|
||||
}
|
||||
}
|
||||
)";
|
||||
|
||||
void checkCUDAError(cudaError_t result, const char* msg)
|
||||
{
|
||||
if (result != cudaSuccess)
|
||||
{
|
||||
std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
void checkCUError(CUresult result, const char* msg)
|
||||
{
|
||||
if (result != CUDA_SUCCESS)
|
||||
{
|
||||
const char* errorStr;
|
||||
cuGetErrorString(result, &errorStr);
|
||||
std::cerr << msg << ": " << errorStr << std::endl;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
void checkNVRTCError(nvrtcResult result, const char* msg)
|
||||
{
|
||||
if (result != NVRTC_SUCCESS)
|
||||
{
|
||||
std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
try
|
||||
{
|
||||
// Initialize CUDA driver API
|
||||
checkCUError(cuInit(0), "Failed to initialize CUDA");
|
||||
|
||||
// Create CUDA context
|
||||
CUcontext context;
|
||||
CUdevice device;
|
||||
checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
|
||||
checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
|
||||
|
||||
nvrtcProgram prog;
|
||||
nvrtcResult res;
|
||||
|
||||
res = nvrtcCreateProgram(&prog, cuda_kernel, "test_gamma_kernel.cu", 0, nullptr, nullptr);
|
||||
checkNVRTCError(res, "Failed to create NVRTC program");
|
||||
|
||||
nvrtcAddNameExpression(prog, "test_gamma_kernel");
|
||||
|
||||
#ifdef BOOST_MATH_NVRTC_CI_RUN
|
||||
const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
|
||||
#else
|
||||
const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
|
||||
#endif
|
||||
|
||||
// Compile the program
|
||||
res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
|
||||
if (res != NVRTC_SUCCESS)
|
||||
{
|
||||
size_t log_size;
|
||||
nvrtcGetProgramLogSize(prog, &log_size);
|
||||
char* log = new char[log_size];
|
||||
nvrtcGetProgramLog(prog, log);
|
||||
std::cerr << "Compilation failed:\n" << log << std::endl;
|
||||
delete[] log;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
// Get PTX from the program
|
||||
size_t ptx_size;
|
||||
nvrtcGetPTXSize(prog, &ptx_size);
|
||||
char* ptx = new char[ptx_size];
|
||||
nvrtcGetPTX(prog, ptx);
|
||||
|
||||
// Load PTX into CUDA module
|
||||
CUmodule module;
|
||||
CUfunction kernel;
|
||||
checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
|
||||
checkCUError(cuModuleGetFunction(&kernel, module, "test_gamma_kernel"), "Failed to get kernel function");
|
||||
|
||||
int numElements = 5000;
|
||||
float_type *h_in1, *h_in2, *h_out;
|
||||
float_type *d_in1, *d_in2, *d_out;
|
||||
|
||||
// Allocate memory on the host
|
||||
h_in1 = new float_type[numElements];
|
||||
h_in2 = new float_type[numElements];
|
||||
h_out = new float_type[numElements];
|
||||
|
||||
// Initialize input arrays
|
||||
std::mt19937_64 rng(42);
|
||||
std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
|
||||
for (int i = 0; i < numElements; ++i)
|
||||
{
|
||||
h_in1[i] = static_cast<float_type>(dist(rng));
|
||||
h_in2[i] = static_cast<float_type>(dist(rng));
|
||||
}
|
||||
|
||||
checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
|
||||
checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
|
||||
checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
|
||||
|
||||
checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
|
||||
checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
|
||||
|
||||
int blockSize = 256;
|
||||
int numBlocks = (numElements + blockSize - 1) / blockSize;
|
||||
void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
|
||||
checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
|
||||
|
||||
checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
|
||||
|
||||
// Verify Result
|
||||
for (int i = 0; i < numElements; ++i)
|
||||
{
|
||||
auto res = quantile(boost::math::gamma_distribution<float_type>(1, 1), h_in1[i]);
|
||||
|
||||
if (boost::math::isfinite(res))
|
||||
{
|
||||
if (boost::math::epsilon_difference(res, h_out[i]) > 300)
|
||||
{
|
||||
std::cout << "error at line: " << i
|
||||
<< "\nParallel: " << h_out[i]
|
||||
<< "\n Serial: " << res
|
||||
<< "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cudaFree(d_in1);
|
||||
cudaFree(d_in2);
|
||||
cudaFree(d_out);
|
||||
delete[] h_in1;
|
||||
delete[] h_in2;
|
||||
delete[] h_out;
|
||||
|
||||
nvrtcDestroyProgram(&prog);
|
||||
delete[] ptx;
|
||||
|
||||
cuCtxDestroy(context);
|
||||
|
||||
std::cout << "Kernel executed successfully." << std::endl;
|
||||
return 0;
|
||||
}
|
||||
catch(const std::exception& e)
|
||||
{
|
||||
std::cerr << "Stopped with exception: " << e.what() << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
}
|
||||
191
test/test_gamma_dist_quan_nvrtc_float.cpp
Normal file
191
test/test_gamma_dist_quan_nvrtc_float.cpp
Normal file
@@ -0,0 +1,191 @@
|
||||
// Copyright John Maddock 2016.
|
||||
// Copyright Matt Borland 2024.
|
||||
// Use, modification and distribution are subject to the
|
||||
// Boost Software License, Version 1.0. (See accompanying file
|
||||
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
|
||||
#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
|
||||
|
||||
// Must be included first
|
||||
#include <nvrtc.h>
|
||||
#include <cuda.h>
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <vector>
|
||||
#include <random>
|
||||
#include <exception>
|
||||
#include <boost/math/distributions/gamma.hpp>
|
||||
#include <boost/math/special_functions/fpclassify.hpp>
|
||||
#include <boost/math/special_functions/relative_difference.hpp>
|
||||
|
||||
typedef float float_type;
|
||||
|
||||
const char* cuda_kernel = R"(
|
||||
typedef float float_type;
|
||||
#include <cuda/std/type_traits>
|
||||
#include <boost/math/distributions/gamma.hpp>
|
||||
extern "C" __global__
|
||||
void test_gamma_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
|
||||
{
|
||||
int i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (i < numElements)
|
||||
{
|
||||
out[i] = quantile(boost::math::gamma_distribution<float_type>(1, 1), in1[i]);
|
||||
}
|
||||
}
|
||||
)";
|
||||
|
||||
void checkCUDAError(cudaError_t result, const char* msg)
|
||||
{
|
||||
if (result != cudaSuccess)
|
||||
{
|
||||
std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
void checkCUError(CUresult result, const char* msg)
|
||||
{
|
||||
if (result != CUDA_SUCCESS)
|
||||
{
|
||||
const char* errorStr;
|
||||
cuGetErrorString(result, &errorStr);
|
||||
std::cerr << msg << ": " << errorStr << std::endl;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
void checkNVRTCError(nvrtcResult result, const char* msg)
|
||||
{
|
||||
if (result != NVRTC_SUCCESS)
|
||||
{
|
||||
std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
try
|
||||
{
|
||||
// Initialize CUDA driver API
|
||||
checkCUError(cuInit(0), "Failed to initialize CUDA");
|
||||
|
||||
// Create CUDA context
|
||||
CUcontext context;
|
||||
CUdevice device;
|
||||
checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
|
||||
checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
|
||||
|
||||
nvrtcProgram prog;
|
||||
nvrtcResult res;
|
||||
|
||||
res = nvrtcCreateProgram(&prog, cuda_kernel, "test_gamma_kernel.cu", 0, nullptr, nullptr);
|
||||
checkNVRTCError(res, "Failed to create NVRTC program");
|
||||
|
||||
nvrtcAddNameExpression(prog, "test_gamma_kernel");
|
||||
|
||||
#ifdef BOOST_MATH_NVRTC_CI_RUN
|
||||
const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
|
||||
#else
|
||||
const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
|
||||
#endif
|
||||
|
||||
// Compile the program
|
||||
res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
|
||||
if (res != NVRTC_SUCCESS)
|
||||
{
|
||||
size_t log_size;
|
||||
nvrtcGetProgramLogSize(prog, &log_size);
|
||||
char* log = new char[log_size];
|
||||
nvrtcGetProgramLog(prog, log);
|
||||
std::cerr << "Compilation failed:\n" << log << std::endl;
|
||||
delete[] log;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
// Get PTX from the program
|
||||
size_t ptx_size;
|
||||
nvrtcGetPTXSize(prog, &ptx_size);
|
||||
char* ptx = new char[ptx_size];
|
||||
nvrtcGetPTX(prog, ptx);
|
||||
|
||||
// Load PTX into CUDA module
|
||||
CUmodule module;
|
||||
CUfunction kernel;
|
||||
checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
|
||||
checkCUError(cuModuleGetFunction(&kernel, module, "test_gamma_kernel"), "Failed to get kernel function");
|
||||
|
||||
int numElements = 5000;
|
||||
float_type *h_in1, *h_in2, *h_out;
|
||||
float_type *d_in1, *d_in2, *d_out;
|
||||
|
||||
// Allocate memory on the host
|
||||
h_in1 = new float_type[numElements];
|
||||
h_in2 = new float_type[numElements];
|
||||
h_out = new float_type[numElements];
|
||||
|
||||
// Initialize input arrays
|
||||
std::mt19937_64 rng(42);
|
||||
std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
|
||||
for (int i = 0; i < numElements; ++i)
|
||||
{
|
||||
h_in1[i] = static_cast<float_type>(dist(rng));
|
||||
h_in2[i] = static_cast<float_type>(dist(rng));
|
||||
}
|
||||
|
||||
checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
|
||||
checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
|
||||
checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
|
||||
|
||||
checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
|
||||
checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
|
||||
|
||||
int blockSize = 256;
|
||||
int numBlocks = (numElements + blockSize - 1) / blockSize;
|
||||
void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
|
||||
checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
|
||||
|
||||
checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
|
||||
|
||||
// Verify Result
|
||||
for (int i = 0; i < numElements; ++i)
|
||||
{
|
||||
auto res = quantile(boost::math::gamma_distribution<float_type>(1, 1), h_in1[i]);
|
||||
|
||||
if (boost::math::isfinite(res))
|
||||
{
|
||||
if (boost::math::epsilon_difference(res, h_out[i]) > 300)
|
||||
{
|
||||
std::cout << "error at line: " << i
|
||||
<< "\nParallel: " << h_out[i]
|
||||
<< "\n Serial: " << res
|
||||
<< "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cudaFree(d_in1);
|
||||
cudaFree(d_in2);
|
||||
cudaFree(d_out);
|
||||
delete[] h_in1;
|
||||
delete[] h_in2;
|
||||
delete[] h_out;
|
||||
|
||||
nvrtcDestroyProgram(&prog);
|
||||
delete[] ptx;
|
||||
|
||||
cuCtxDestroy(context);
|
||||
|
||||
std::cout << "Kernel executed successfully." << std::endl;
|
||||
return 0;
|
||||
}
|
||||
catch(const std::exception& e)
|
||||
{
|
||||
std::cerr << "Stopped with exception: " << e.what() << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
}
|
||||
@@ -26,9 +26,14 @@
|
||||
# define TEST_REAL_CONCEPT
|
||||
#endif
|
||||
|
||||
#include <boost/math/tools/test.hpp>
|
||||
#include <boost/math/tools/config.hpp>
|
||||
|
||||
#include "../include_private/boost/math/tools/test.hpp"
|
||||
|
||||
#ifndef BOOST_MATH_NO_REAL_CONCEPT_TESTS
|
||||
#include <boost/math/concepts/real_concept.hpp> // for real_concept
|
||||
using ::boost::math::concepts::real_concept;
|
||||
#endif
|
||||
|
||||
#include <boost/math/distributions/geometric.hpp> // for geometric_distribution
|
||||
using boost::math::geometric_distribution;
|
||||
@@ -64,7 +69,11 @@ void test_spot( // Test a single spot value against 'known good' values.
|
||||
RealType tol, // Test tolerance
|
||||
RealType logtol) // Logcdf Test tolerance.
|
||||
{
|
||||
BOOST_IF_CONSTEXPR (std::is_same<RealType, long double>::value || std::is_same<RealType, real_concept>::value)
|
||||
BOOST_IF_CONSTEXPR (std::is_same<RealType, long double>::value
|
||||
#ifndef BOOST_MATH_NO_REAL_CONCEPT_TESTS
|
||||
|| std::is_same<RealType, real_concept>::value
|
||||
#endif
|
||||
)
|
||||
{
|
||||
logtol *= 100;
|
||||
}
|
||||
@@ -376,7 +385,9 @@ if(std::numeric_limits<RealType>::is_specialized)
|
||||
static_cast<RealType>(9.9000000000003448e-201L), //
|
||||
100 * tolerance); // Note difference
|
||||
|
||||
// p nearer unity.
|
||||
// p nearer unity.
|
||||
// On GPU this gets flushed to 0 which has an eps difference of 3.4e+38
|
||||
#ifndef BOOST_MATH_HAS_GPU_SUPPORT
|
||||
BOOST_CHECK_CLOSE_FRACTION( //
|
||||
pdf(geometric_distribution<RealType>(static_cast<RealType>(0.9999)),
|
||||
static_cast<RealType>(10) ), // Number of failures, k
|
||||
@@ -384,6 +395,7 @@ if(std::numeric_limits<RealType>::is_specialized)
|
||||
// static_cast<float>(1.00156406e-040)
|
||||
static_cast<RealType>(9.999e-41), // exact from 100 digit calculator.
|
||||
2e3 * tolerance); // Note bigger tolerance needed.
|
||||
#endif
|
||||
|
||||
// Moshier Cephes 100 digits calculator says 9.999e-41
|
||||
//0.9999*pow(1-0.9999,10)
|
||||
|
||||
109
test/test_geometric_dist_cdf_double.cu
Normal file
109
test/test_geometric_dist_cdf_double.cu
Normal file
@@ -0,0 +1,109 @@
|
||||
// Copyright John Maddock 2016.
|
||||
// Use, modification and distribution are subject to the
|
||||
// Boost Software License, Version 1.0. (See accompanying file
|
||||
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
|
||||
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <vector>
|
||||
#include <boost/math/distributions/geometric.hpp>
|
||||
#include <boost/math/special_functions/relative_difference.hpp>
|
||||
#include <boost/random/mersenne_twister.hpp>
|
||||
#include <boost/random/uniform_real_distribution.hpp>
|
||||
#include "cuda_managed_ptr.hpp"
|
||||
#include "stopwatch.hpp"
|
||||
|
||||
// For the CUDA runtime routines (prefixed with "cuda_")
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
typedef double float_type;
|
||||
|
||||
/**
|
||||
* CUDA Kernel Device code
|
||||
*
|
||||
*/
|
||||
__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
|
||||
{
|
||||
using std::cos;
|
||||
int i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
|
||||
if (i < numElements)
|
||||
{
|
||||
out[i] = cdf(boost::math::geometric_distribution<float_type>(0.5), in1[i]);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Host main routine
|
||||
*/
|
||||
int main(void)
|
||||
{
|
||||
try{
|
||||
|
||||
// Error code to check return values for CUDA calls
|
||||
cudaError_t err = cudaSuccess;
|
||||
|
||||
// Print the vector length to be used, and compute its size
|
||||
int numElements = 50000;
|
||||
std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
|
||||
|
||||
// Allocate the managed input vector A
|
||||
cuda_managed_ptr<float_type> input_vector1(numElements);
|
||||
|
||||
// Allocate the managed output vector C
|
||||
cuda_managed_ptr<float_type> output_vector(numElements);
|
||||
|
||||
boost::random::mt19937 gen;
|
||||
boost::random::uniform_real_distribution<float_type> dist;
|
||||
// Initialize the input vectors
|
||||
for (int i = 0; i < numElements; ++i)
|
||||
{
|
||||
input_vector1[i] = dist(gen);
|
||||
}
|
||||
|
||||
// Launch the Vector Add CUDA Kernel
|
||||
int threadsPerBlock = 256;
|
||||
int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
|
||||
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
|
||||
|
||||
watch w;
|
||||
cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
|
||||
cudaDeviceSynchronize();
|
||||
std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
|
||||
|
||||
err = cudaGetLastError();
|
||||
if (err != cudaSuccess)
|
||||
{
|
||||
std::cerr << "Failed to launch geometric distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
// Verify that the result vector is correct
|
||||
std::vector<float_type> results;
|
||||
results.reserve(numElements);
|
||||
w.reset();
|
||||
for(int i = 0; i < numElements; ++i)
|
||||
results.push_back(cdf(boost::math::geometric_distribution<float_type>(0.5), input_vector1[i]));
|
||||
double t = w.elapsed();
|
||||
// check the results
|
||||
for(int i = 0; i < numElements; ++i)
|
||||
{
|
||||
if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
|
||||
{
|
||||
std::cerr << "Result verification failed at element " << i << "!" << std::endl;
|
||||
std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
|
||||
std::cout << "Done\n";
|
||||
}
|
||||
catch(const std::exception& e)
|
||||
{
|
||||
std::cerr << "Stopped with exception: " << e.what() << std::endl;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
109
test/test_geometric_dist_cdf_float.cu
Normal file
109
test/test_geometric_dist_cdf_float.cu
Normal file
@@ -0,0 +1,109 @@
|
||||
// Copyright John Maddock 2016.
|
||||
// Use, modification and distribution are subject to the
|
||||
// Boost Software License, Version 1.0. (See accompanying file
|
||||
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
|
||||
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <vector>
|
||||
#include <boost/math/distributions/geometric.hpp>
|
||||
#include <boost/math/special_functions/relative_difference.hpp>
|
||||
#include <boost/random/mersenne_twister.hpp>
|
||||
#include <boost/random/uniform_real_distribution.hpp>
|
||||
#include "cuda_managed_ptr.hpp"
|
||||
#include "stopwatch.hpp"
|
||||
|
||||
// For the CUDA runtime routines (prefixed with "cuda_")
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
typedef float float_type;
|
||||
|
||||
/**
|
||||
* CUDA Kernel Device code
|
||||
*
|
||||
*/
|
||||
__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
|
||||
{
|
||||
using std::cos;
|
||||
int i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
|
||||
if (i < numElements)
|
||||
{
|
||||
out[i] = cdf(boost::math::geometric_distribution<float_type>(0.5), in1[i]);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Host main routine
|
||||
*/
|
||||
int main(void)
|
||||
{
|
||||
try{
|
||||
|
||||
// Error code to check return values for CUDA calls
|
||||
cudaError_t err = cudaSuccess;
|
||||
|
||||
// Print the vector length to be used, and compute its size
|
||||
int numElements = 50000;
|
||||
std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
|
||||
|
||||
// Allocate the managed input vector A
|
||||
cuda_managed_ptr<float_type> input_vector1(numElements);
|
||||
|
||||
// Allocate the managed output vector C
|
||||
cuda_managed_ptr<float_type> output_vector(numElements);
|
||||
|
||||
boost::random::mt19937 gen;
|
||||
boost::random::uniform_real_distribution<float_type> dist;
|
||||
// Initialize the input vectors
|
||||
for (int i = 0; i < numElements; ++i)
|
||||
{
|
||||
input_vector1[i] = dist(gen);
|
||||
}
|
||||
|
||||
// Launch the Vector Add CUDA Kernel
|
||||
int threadsPerBlock = 256;
|
||||
int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
|
||||
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
|
||||
|
||||
watch w;
|
||||
cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
|
||||
cudaDeviceSynchronize();
|
||||
std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
|
||||
|
||||
err = cudaGetLastError();
|
||||
if (err != cudaSuccess)
|
||||
{
|
||||
std::cerr << "Failed to launch geometric distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
// Verify that the result vector is correct
|
||||
std::vector<float_type> results;
|
||||
results.reserve(numElements);
|
||||
w.reset();
|
||||
for(int i = 0; i < numElements; ++i)
|
||||
results.push_back(cdf(boost::math::geometric_distribution<float_type>(0.5), input_vector1[i]));
|
||||
double t = w.elapsed();
|
||||
// check the results
|
||||
for(int i = 0; i < numElements; ++i)
|
||||
{
|
||||
if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
|
||||
{
|
||||
std::cerr << "Result verification failed at element " << i << "!" << std::endl;
|
||||
std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
|
||||
std::cout << "Done\n";
|
||||
}
|
||||
catch(const std::exception& e)
|
||||
{
|
||||
std::cerr << "Stopped with exception: " << e.what() << std::endl;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
191
test/test_geometric_dist_cdf_nvrtc_double.cpp
Normal file
191
test/test_geometric_dist_cdf_nvrtc_double.cpp
Normal file
@@ -0,0 +1,191 @@
|
||||
// Copyright John Maddock 2016.
|
||||
// Copyright Matt Borland 2024.
|
||||
// Use, modification and distribution are subject to the
|
||||
// Boost Software License, Version 1.0. (See accompanying file
|
||||
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
|
||||
#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
|
||||
|
||||
// Must be included first
|
||||
#include <nvrtc.h>
|
||||
#include <cuda.h>
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <vector>
|
||||
#include <random>
|
||||
#include <exception>
|
||||
#include <boost/math/distributions/geometric.hpp>
|
||||
#include <boost/math/special_functions/fpclassify.hpp>
|
||||
#include <boost/math/special_functions/relative_difference.hpp>
|
||||
|
||||
typedef double float_type;
|
||||
|
||||
const char* cuda_kernel = R"(
|
||||
typedef double float_type;
|
||||
#include <cuda/std/type_traits>
|
||||
#include <boost/math/distributions/geometric.hpp>
|
||||
extern "C" __global__
|
||||
void test_geometric_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
|
||||
{
|
||||
int i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (i < numElements)
|
||||
{
|
||||
out[i] = cdf(boost::math::geometric_distribution<float_type>(0.5), in1[i]);
|
||||
}
|
||||
}
|
||||
)";
|
||||
|
||||
void checkCUDAError(cudaError_t result, const char* msg)
|
||||
{
|
||||
if (result != cudaSuccess)
|
||||
{
|
||||
std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
void checkCUError(CUresult result, const char* msg)
|
||||
{
|
||||
if (result != CUDA_SUCCESS)
|
||||
{
|
||||
const char* errorStr;
|
||||
cuGetErrorString(result, &errorStr);
|
||||
std::cerr << msg << ": " << errorStr << std::endl;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
void checkNVRTCError(nvrtcResult result, const char* msg)
|
||||
{
|
||||
if (result != NVRTC_SUCCESS)
|
||||
{
|
||||
std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
try
|
||||
{
|
||||
// Initialize CUDA driver API
|
||||
checkCUError(cuInit(0), "Failed to initialize CUDA");
|
||||
|
||||
// Create CUDA context
|
||||
CUcontext context;
|
||||
CUdevice device;
|
||||
checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
|
||||
checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
|
||||
|
||||
nvrtcProgram prog;
|
||||
nvrtcResult res;
|
||||
|
||||
res = nvrtcCreateProgram(&prog, cuda_kernel, "test_geometric_kernel.cu", 0, nullptr, nullptr);
|
||||
checkNVRTCError(res, "Failed to create NVRTC program");
|
||||
|
||||
nvrtcAddNameExpression(prog, "test_geometric_kernel");
|
||||
|
||||
#ifdef BOOST_MATH_NVRTC_CI_RUN
|
||||
const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
|
||||
#else
|
||||
const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
|
||||
#endif
|
||||
|
||||
// Compile the program
|
||||
res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
|
||||
if (res != NVRTC_SUCCESS)
|
||||
{
|
||||
size_t log_size;
|
||||
nvrtcGetProgramLogSize(prog, &log_size);
|
||||
char* log = new char[log_size];
|
||||
nvrtcGetProgramLog(prog, log);
|
||||
std::cerr << "Compilation failed:\n" << log << std::endl;
|
||||
delete[] log;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
// Get PTX from the program
|
||||
size_t ptx_size;
|
||||
nvrtcGetPTXSize(prog, &ptx_size);
|
||||
char* ptx = new char[ptx_size];
|
||||
nvrtcGetPTX(prog, ptx);
|
||||
|
||||
// Load PTX into CUDA module
|
||||
CUmodule module;
|
||||
CUfunction kernel;
|
||||
checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
|
||||
checkCUError(cuModuleGetFunction(&kernel, module, "test_geometric_kernel"), "Failed to get kernel function");
|
||||
|
||||
int numElements = 5000;
|
||||
float_type *h_in1, *h_in2, *h_out;
|
||||
float_type *d_in1, *d_in2, *d_out;
|
||||
|
||||
// Allocate memory on the host
|
||||
h_in1 = new float_type[numElements];
|
||||
h_in2 = new float_type[numElements];
|
||||
h_out = new float_type[numElements];
|
||||
|
||||
// Initialize input arrays
|
||||
std::mt19937_64 rng(42);
|
||||
std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
|
||||
for (int i = 0; i < numElements; ++i)
|
||||
{
|
||||
h_in1[i] = static_cast<float_type>(dist(rng));
|
||||
h_in2[i] = static_cast<float_type>(dist(rng));
|
||||
}
|
||||
|
||||
checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
|
||||
checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
|
||||
checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
|
||||
|
||||
checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
|
||||
checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
|
||||
|
||||
int blockSize = 256;
|
||||
int numBlocks = (numElements + blockSize - 1) / blockSize;
|
||||
void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
|
||||
checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
|
||||
|
||||
checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
|
||||
|
||||
// Verify Result
|
||||
for (int i = 0; i < numElements; ++i)
|
||||
{
|
||||
auto res = cdf(boost::math::geometric_distribution<float_type>(0.5), h_in1[i]);
|
||||
|
||||
if (boost::math::isfinite(res))
|
||||
{
|
||||
if (boost::math::epsilon_difference(res, h_out[i]) > 300)
|
||||
{
|
||||
std::cout << "error at line: " << i
|
||||
<< "\nParallel: " << h_out[i]
|
||||
<< "\n Serial: " << res
|
||||
<< "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cudaFree(d_in1);
|
||||
cudaFree(d_in2);
|
||||
cudaFree(d_out);
|
||||
delete[] h_in1;
|
||||
delete[] h_in2;
|
||||
delete[] h_out;
|
||||
|
||||
nvrtcDestroyProgram(&prog);
|
||||
delete[] ptx;
|
||||
|
||||
cuCtxDestroy(context);
|
||||
|
||||
std::cout << "Kernel executed successfully." << std::endl;
|
||||
return 0;
|
||||
}
|
||||
catch(const std::exception& e)
|
||||
{
|
||||
std::cerr << "Stopped with exception: " << e.what() << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
}
|
||||
191
test/test_geometric_dist_cdf_nvrtc_float.cpp
Normal file
191
test/test_geometric_dist_cdf_nvrtc_float.cpp
Normal file
@@ -0,0 +1,191 @@
|
||||
// Copyright John Maddock 2016.
|
||||
// Copyright Matt Borland 2024.
|
||||
// Use, modification and distribution are subject to the
|
||||
// Boost Software License, Version 1.0. (See accompanying file
|
||||
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
|
||||
#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
|
||||
|
||||
// Must be included first
|
||||
#include <nvrtc.h>
|
||||
#include <cuda.h>
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <vector>
|
||||
#include <random>
|
||||
#include <exception>
|
||||
#include <boost/math/distributions/geometric.hpp>
|
||||
#include <boost/math/special_functions/fpclassify.hpp>
|
||||
#include <boost/math/special_functions/relative_difference.hpp>
|
||||
|
||||
typedef float float_type;
|
||||
|
||||
const char* cuda_kernel = R"(
|
||||
typedef float float_type;
|
||||
#include <cuda/std/type_traits>
|
||||
#include <boost/math/distributions/geometric.hpp>
|
||||
extern "C" __global__
|
||||
void test_geometric_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
|
||||
{
|
||||
int i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (i < numElements)
|
||||
{
|
||||
out[i] = cdf(boost::math::geometric_distribution<float_type>(0.5), in1[i]);
|
||||
}
|
||||
}
|
||||
)";
|
||||
|
||||
void checkCUDAError(cudaError_t result, const char* msg)
|
||||
{
|
||||
if (result != cudaSuccess)
|
||||
{
|
||||
std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
void checkCUError(CUresult result, const char* msg)
|
||||
{
|
||||
if (result != CUDA_SUCCESS)
|
||||
{
|
||||
const char* errorStr;
|
||||
cuGetErrorString(result, &errorStr);
|
||||
std::cerr << msg << ": " << errorStr << std::endl;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
void checkNVRTCError(nvrtcResult result, const char* msg)
|
||||
{
|
||||
if (result != NVRTC_SUCCESS)
|
||||
{
|
||||
std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
try
|
||||
{
|
||||
// Initialize CUDA driver API
|
||||
checkCUError(cuInit(0), "Failed to initialize CUDA");
|
||||
|
||||
// Create CUDA context
|
||||
CUcontext context;
|
||||
CUdevice device;
|
||||
checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
|
||||
checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
|
||||
|
||||
nvrtcProgram prog;
|
||||
nvrtcResult res;
|
||||
|
||||
res = nvrtcCreateProgram(&prog, cuda_kernel, "test_geometric_kernel.cu", 0, nullptr, nullptr);
|
||||
checkNVRTCError(res, "Failed to create NVRTC program");
|
||||
|
||||
nvrtcAddNameExpression(prog, "test_geometric_kernel");
|
||||
|
||||
#ifdef BOOST_MATH_NVRTC_CI_RUN
|
||||
const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
|
||||
#else
|
||||
const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
|
||||
#endif
|
||||
|
||||
// Compile the program
|
||||
res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
|
||||
if (res != NVRTC_SUCCESS)
|
||||
{
|
||||
size_t log_size;
|
||||
nvrtcGetProgramLogSize(prog, &log_size);
|
||||
char* log = new char[log_size];
|
||||
nvrtcGetProgramLog(prog, log);
|
||||
std::cerr << "Compilation failed:\n" << log << std::endl;
|
||||
delete[] log;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
// Get PTX from the program
|
||||
size_t ptx_size;
|
||||
nvrtcGetPTXSize(prog, &ptx_size);
|
||||
char* ptx = new char[ptx_size];
|
||||
nvrtcGetPTX(prog, ptx);
|
||||
|
||||
// Load PTX into CUDA module
|
||||
CUmodule module;
|
||||
CUfunction kernel;
|
||||
checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
|
||||
checkCUError(cuModuleGetFunction(&kernel, module, "test_geometric_kernel"), "Failed to get kernel function");
|
||||
|
||||
int numElements = 5000;
|
||||
float_type *h_in1, *h_in2, *h_out;
|
||||
float_type *d_in1, *d_in2, *d_out;
|
||||
|
||||
// Allocate memory on the host
|
||||
h_in1 = new float_type[numElements];
|
||||
h_in2 = new float_type[numElements];
|
||||
h_out = new float_type[numElements];
|
||||
|
||||
// Initialize input arrays
|
||||
std::mt19937_64 rng(42);
|
||||
std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
|
||||
for (int i = 0; i < numElements; ++i)
|
||||
{
|
||||
h_in1[i] = static_cast<float_type>(dist(rng));
|
||||
h_in2[i] = static_cast<float_type>(dist(rng));
|
||||
}
|
||||
|
||||
checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
|
||||
checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
|
||||
checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
|
||||
|
||||
checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
|
||||
checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
|
||||
|
||||
int blockSize = 256;
|
||||
int numBlocks = (numElements + blockSize - 1) / blockSize;
|
||||
void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
|
||||
checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
|
||||
|
||||
checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
|
||||
|
||||
// Verify Result
|
||||
for (int i = 0; i < numElements; ++i)
|
||||
{
|
||||
auto res = cdf(boost::math::geometric_distribution<float_type>(0.5), h_in1[i]);
|
||||
|
||||
if (boost::math::isfinite(res))
|
||||
{
|
||||
if (boost::math::epsilon_difference(res, h_out[i]) > 300)
|
||||
{
|
||||
std::cout << "error at line: " << i
|
||||
<< "\nParallel: " << h_out[i]
|
||||
<< "\n Serial: " << res
|
||||
<< "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cudaFree(d_in1);
|
||||
cudaFree(d_in2);
|
||||
cudaFree(d_out);
|
||||
delete[] h_in1;
|
||||
delete[] h_in2;
|
||||
delete[] h_out;
|
||||
|
||||
nvrtcDestroyProgram(&prog);
|
||||
delete[] ptx;
|
||||
|
||||
cuCtxDestroy(context);
|
||||
|
||||
std::cout << "Kernel executed successfully." << std::endl;
|
||||
return 0;
|
||||
}
|
||||
catch(const std::exception& e)
|
||||
{
|
||||
std::cerr << "Stopped with exception: " << e.what() << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
}
|
||||
109
test/test_geometric_dist_pdf_double.cu
Normal file
109
test/test_geometric_dist_pdf_double.cu
Normal file
@@ -0,0 +1,109 @@
|
||||
// Copyright John Maddock 2016.
|
||||
// Use, modification and distribution are subject to the
|
||||
// Boost Software License, Version 1.0. (See accompanying file
|
||||
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
|
||||
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <vector>
|
||||
#include <boost/math/distributions/geometric.hpp>
|
||||
#include <boost/math/special_functions/relative_difference.hpp>
|
||||
#include <boost/random/mersenne_twister.hpp>
|
||||
#include <boost/random/uniform_real_distribution.hpp>
|
||||
#include "cuda_managed_ptr.hpp"
|
||||
#include "stopwatch.hpp"
|
||||
|
||||
// For the CUDA runtime routines (prefixed with "cuda_")
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
typedef double float_type;
|
||||
|
||||
/**
|
||||
* CUDA Kernel Device code
|
||||
*
|
||||
*/
|
||||
__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
|
||||
{
|
||||
using std::cos;
|
||||
int i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
|
||||
if (i < numElements)
|
||||
{
|
||||
out[i] = pdf(boost::math::geometric_distribution<float_type>(0.5), in1[i]);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Host main routine
|
||||
*/
|
||||
int main(void)
|
||||
{
|
||||
try{
|
||||
|
||||
// Error code to check return values for CUDA calls
|
||||
cudaError_t err = cudaSuccess;
|
||||
|
||||
// Print the vector length to be used, and compute its size
|
||||
int numElements = 50000;
|
||||
std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
|
||||
|
||||
// Allocate the managed input vector A
|
||||
cuda_managed_ptr<float_type> input_vector1(numElements);
|
||||
|
||||
// Allocate the managed output vector C
|
||||
cuda_managed_ptr<float_type> output_vector(numElements);
|
||||
|
||||
boost::random::mt19937 gen;
|
||||
boost::random::uniform_real_distribution<float_type> dist;
|
||||
// Initialize the input vectors
|
||||
for (int i = 0; i < numElements; ++i)
|
||||
{
|
||||
input_vector1[i] = dist(gen);
|
||||
}
|
||||
|
||||
// Launch the Vector Add CUDA Kernel
|
||||
int threadsPerBlock = 256;
|
||||
int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
|
||||
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
|
||||
|
||||
watch w;
|
||||
cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
|
||||
cudaDeviceSynchronize();
|
||||
std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
|
||||
|
||||
err = cudaGetLastError();
|
||||
if (err != cudaSuccess)
|
||||
{
|
||||
std::cerr << "Failed to launch geometric distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
// Verify that the result vector is correct
|
||||
std::vector<float_type> results;
|
||||
results.reserve(numElements);
|
||||
w.reset();
|
||||
for(int i = 0; i < numElements; ++i)
|
||||
results.push_back(pdf(boost::math::geometric_distribution<float_type>(0.5), input_vector1[i]));
|
||||
double t = w.elapsed();
|
||||
// check the results
|
||||
for(int i = 0; i < numElements; ++i)
|
||||
{
|
||||
if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
|
||||
{
|
||||
std::cerr << "Result verification failed at element " << i << "!" << std::endl;
|
||||
std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
|
||||
std::cout << "Done\n";
|
||||
}
|
||||
catch(const std::exception& e)
|
||||
{
|
||||
std::cerr << "Stopped with exception: " << e.what() << std::endl;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
109
test/test_geometric_dist_pdf_float.cu
Normal file
109
test/test_geometric_dist_pdf_float.cu
Normal file
@@ -0,0 +1,109 @@
|
||||
// Copyright John Maddock 2016.
|
||||
// Use, modification and distribution are subject to the
|
||||
// Boost Software License, Version 1.0. (See accompanying file
|
||||
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
|
||||
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <vector>
|
||||
#include <boost/math/distributions/geometric.hpp>
|
||||
#include <boost/math/special_functions/relative_difference.hpp>
|
||||
#include <boost/random/mersenne_twister.hpp>
|
||||
#include <boost/random/uniform_real_distribution.hpp>
|
||||
#include "cuda_managed_ptr.hpp"
|
||||
#include "stopwatch.hpp"
|
||||
|
||||
// For the CUDA runtime routines (prefixed with "cuda_")
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
typedef float float_type;
|
||||
|
||||
/**
|
||||
* CUDA Kernel Device code
|
||||
*
|
||||
*/
|
||||
__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
|
||||
{
|
||||
using std::cos;
|
||||
int i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
|
||||
if (i < numElements)
|
||||
{
|
||||
out[i] = pdf(boost::math::geometric_distribution<float_type>(0.5), in1[i]);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Host main routine
|
||||
*/
|
||||
int main(void)
|
||||
{
|
||||
try{
|
||||
|
||||
// Error code to check return values for CUDA calls
|
||||
cudaError_t err = cudaSuccess;
|
||||
|
||||
// Print the vector length to be used, and compute its size
|
||||
int numElements = 50000;
|
||||
std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
|
||||
|
||||
// Allocate the managed input vector A
|
||||
cuda_managed_ptr<float_type> input_vector1(numElements);
|
||||
|
||||
// Allocate the managed output vector C
|
||||
cuda_managed_ptr<float_type> output_vector(numElements);
|
||||
|
||||
boost::random::mt19937 gen;
|
||||
boost::random::uniform_real_distribution<float_type> dist;
|
||||
// Initialize the input vectors
|
||||
for (int i = 0; i < numElements; ++i)
|
||||
{
|
||||
input_vector1[i] = dist(gen);
|
||||
}
|
||||
|
||||
// Launch the Vector Add CUDA Kernel
|
||||
int threadsPerBlock = 256;
|
||||
int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
|
||||
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
|
||||
|
||||
watch w;
|
||||
cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
|
||||
cudaDeviceSynchronize();
|
||||
std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
|
||||
|
||||
err = cudaGetLastError();
|
||||
if (err != cudaSuccess)
|
||||
{
|
||||
std::cerr << "Failed to launch geometric distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
// Verify that the result vector is correct
|
||||
std::vector<float_type> results;
|
||||
results.reserve(numElements);
|
||||
w.reset();
|
||||
for(int i = 0; i < numElements; ++i)
|
||||
results.push_back(pdf(boost::math::geometric_distribution<float_type>(0.5), input_vector1[i]));
|
||||
double t = w.elapsed();
|
||||
// check the results
|
||||
for(int i = 0; i < numElements; ++i)
|
||||
{
|
||||
if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
|
||||
{
|
||||
std::cerr << "Result verification failed at element " << i << "!" << std::endl;
|
||||
std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
|
||||
std::cout << "Done\n";
|
||||
}
|
||||
catch(const std::exception& e)
|
||||
{
|
||||
std::cerr << "Stopped with exception: " << e.what() << std::endl;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
191
test/test_geometric_dist_pdf_nvrtc_double.cpp
Normal file
191
test/test_geometric_dist_pdf_nvrtc_double.cpp
Normal file
@@ -0,0 +1,191 @@
|
||||
// Copyright John Maddock 2016.
|
||||
// Copyright Matt Borland 2024.
|
||||
// Use, modification and distribution are subject to the
|
||||
// Boost Software License, Version 1.0. (See accompanying file
|
||||
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
|
||||
#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
|
||||
|
||||
// Must be included first
|
||||
#include <nvrtc.h>
|
||||
#include <cuda.h>
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <vector>
|
||||
#include <random>
|
||||
#include <exception>
|
||||
#include <boost/math/distributions/geometric.hpp>
|
||||
#include <boost/math/special_functions/fpclassify.hpp>
|
||||
#include <boost/math/special_functions/relative_difference.hpp>
|
||||
|
||||
typedef double float_type;
|
||||
|
||||
const char* cuda_kernel = R"(
|
||||
typedef double float_type;
|
||||
#include <cuda/std/type_traits>
|
||||
#include <boost/math/distributions/geometric.hpp>
|
||||
extern "C" __global__
|
||||
void test_geometric_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
|
||||
{
|
||||
int i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (i < numElements)
|
||||
{
|
||||
out[i] = pdf(boost::math::geometric_distribution<float_type>(0.5), in1[i]);
|
||||
}
|
||||
}
|
||||
)";
|
||||
|
||||
void checkCUDAError(cudaError_t result, const char* msg)
|
||||
{
|
||||
if (result != cudaSuccess)
|
||||
{
|
||||
std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
void checkCUError(CUresult result, const char* msg)
|
||||
{
|
||||
if (result != CUDA_SUCCESS)
|
||||
{
|
||||
const char* errorStr;
|
||||
cuGetErrorString(result, &errorStr);
|
||||
std::cerr << msg << ": " << errorStr << std::endl;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
void checkNVRTCError(nvrtcResult result, const char* msg)
|
||||
{
|
||||
if (result != NVRTC_SUCCESS)
|
||||
{
|
||||
std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
try
|
||||
{
|
||||
// Initialize CUDA driver API
|
||||
checkCUError(cuInit(0), "Failed to initialize CUDA");
|
||||
|
||||
// Create CUDA context
|
||||
CUcontext context;
|
||||
CUdevice device;
|
||||
checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
|
||||
checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
|
||||
|
||||
nvrtcProgram prog;
|
||||
nvrtcResult res;
|
||||
|
||||
res = nvrtcCreateProgram(&prog, cuda_kernel, "test_geometric_kernel.cu", 0, nullptr, nullptr);
|
||||
checkNVRTCError(res, "Failed to create NVRTC program");
|
||||
|
||||
nvrtcAddNameExpression(prog, "test_geometric_kernel");
|
||||
|
||||
#ifdef BOOST_MATH_NVRTC_CI_RUN
|
||||
const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
|
||||
#else
|
||||
const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
|
||||
#endif
|
||||
|
||||
// Compile the program
|
||||
res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
|
||||
if (res != NVRTC_SUCCESS)
|
||||
{
|
||||
size_t log_size;
|
||||
nvrtcGetProgramLogSize(prog, &log_size);
|
||||
char* log = new char[log_size];
|
||||
nvrtcGetProgramLog(prog, log);
|
||||
std::cerr << "Compilation failed:\n" << log << std::endl;
|
||||
delete[] log;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
// Get PTX from the program
|
||||
size_t ptx_size;
|
||||
nvrtcGetPTXSize(prog, &ptx_size);
|
||||
char* ptx = new char[ptx_size];
|
||||
nvrtcGetPTX(prog, ptx);
|
||||
|
||||
// Load PTX into CUDA module
|
||||
CUmodule module;
|
||||
CUfunction kernel;
|
||||
checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
|
||||
checkCUError(cuModuleGetFunction(&kernel, module, "test_geometric_kernel"), "Failed to get kernel function");
|
||||
|
||||
int numElements = 5000;
|
||||
float_type *h_in1, *h_in2, *h_out;
|
||||
float_type *d_in1, *d_in2, *d_out;
|
||||
|
||||
// Allocate memory on the host
|
||||
h_in1 = new float_type[numElements];
|
||||
h_in2 = new float_type[numElements];
|
||||
h_out = new float_type[numElements];
|
||||
|
||||
// Initialize input arrays
|
||||
std::mt19937_64 rng(42);
|
||||
std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
|
||||
for (int i = 0; i < numElements; ++i)
|
||||
{
|
||||
h_in1[i] = static_cast<float_type>(dist(rng));
|
||||
h_in2[i] = static_cast<float_type>(dist(rng));
|
||||
}
|
||||
|
||||
checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
|
||||
checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
|
||||
checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
|
||||
|
||||
checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
|
||||
checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
|
||||
|
||||
int blockSize = 256;
|
||||
int numBlocks = (numElements + blockSize - 1) / blockSize;
|
||||
void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
|
||||
checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
|
||||
|
||||
checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
|
||||
|
||||
// Verify Result
|
||||
for (int i = 0; i < numElements; ++i)
|
||||
{
|
||||
auto res = pdf(boost::math::geometric_distribution<float_type>(0.5), h_in1[i]);
|
||||
|
||||
if (boost::math::isfinite(res))
|
||||
{
|
||||
if (boost::math::epsilon_difference(res, h_out[i]) > 300)
|
||||
{
|
||||
std::cout << "error at line: " << i
|
||||
<< "\nParallel: " << h_out[i]
|
||||
<< "\n Serial: " << res
|
||||
<< "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cudaFree(d_in1);
|
||||
cudaFree(d_in2);
|
||||
cudaFree(d_out);
|
||||
delete[] h_in1;
|
||||
delete[] h_in2;
|
||||
delete[] h_out;
|
||||
|
||||
nvrtcDestroyProgram(&prog);
|
||||
delete[] ptx;
|
||||
|
||||
cuCtxDestroy(context);
|
||||
|
||||
std::cout << "Kernel executed successfully." << std::endl;
|
||||
return 0;
|
||||
}
|
||||
catch(const std::exception& e)
|
||||
{
|
||||
std::cerr << "Stopped with exception: " << e.what() << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
}
|
||||
191
test/test_geometric_dist_pdf_nvrtc_float.cpp
Normal file
191
test/test_geometric_dist_pdf_nvrtc_float.cpp
Normal file
@@ -0,0 +1,191 @@
|
||||
// Copyright John Maddock 2016.
|
||||
// Copyright Matt Borland 2024.
|
||||
// Use, modification and distribution are subject to the
|
||||
// Boost Software License, Version 1.0. (See accompanying file
|
||||
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
|
||||
#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
|
||||
|
||||
// Must be included first
|
||||
#include <nvrtc.h>
|
||||
#include <cuda.h>
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <vector>
|
||||
#include <random>
|
||||
#include <exception>
|
||||
#include <boost/math/distributions/geometric.hpp>
|
||||
#include <boost/math/special_functions/fpclassify.hpp>
|
||||
#include <boost/math/special_functions/relative_difference.hpp>
|
||||
|
||||
typedef float float_type;
|
||||
|
||||
const char* cuda_kernel = R"(
|
||||
typedef float float_type;
|
||||
#include <cuda/std/type_traits>
|
||||
#include <boost/math/distributions/geometric.hpp>
|
||||
extern "C" __global__
|
||||
void test_geometric_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
|
||||
{
|
||||
int i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (i < numElements)
|
||||
{
|
||||
out[i] = pdf(boost::math::geometric_distribution<float_type>(0.5), in1[i]);
|
||||
}
|
||||
}
|
||||
)";
|
||||
|
||||
void checkCUDAError(cudaError_t result, const char* msg)
|
||||
{
|
||||
if (result != cudaSuccess)
|
||||
{
|
||||
std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
void checkCUError(CUresult result, const char* msg)
|
||||
{
|
||||
if (result != CUDA_SUCCESS)
|
||||
{
|
||||
const char* errorStr;
|
||||
cuGetErrorString(result, &errorStr);
|
||||
std::cerr << msg << ": " << errorStr << std::endl;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
void checkNVRTCError(nvrtcResult result, const char* msg)
|
||||
{
|
||||
if (result != NVRTC_SUCCESS)
|
||||
{
|
||||
std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
try
|
||||
{
|
||||
// Initialize CUDA driver API
|
||||
checkCUError(cuInit(0), "Failed to initialize CUDA");
|
||||
|
||||
// Create CUDA context
|
||||
CUcontext context;
|
||||
CUdevice device;
|
||||
checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
|
||||
checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
|
||||
|
||||
nvrtcProgram prog;
|
||||
nvrtcResult res;
|
||||
|
||||
res = nvrtcCreateProgram(&prog, cuda_kernel, "test_geometric_kernel.cu", 0, nullptr, nullptr);
|
||||
checkNVRTCError(res, "Failed to create NVRTC program");
|
||||
|
||||
nvrtcAddNameExpression(prog, "test_geometric_kernel");
|
||||
|
||||
#ifdef BOOST_MATH_NVRTC_CI_RUN
|
||||
const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
|
||||
#else
|
||||
const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
|
||||
#endif
|
||||
|
||||
// Compile the program
|
||||
res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
|
||||
if (res != NVRTC_SUCCESS)
|
||||
{
|
||||
size_t log_size;
|
||||
nvrtcGetProgramLogSize(prog, &log_size);
|
||||
char* log = new char[log_size];
|
||||
nvrtcGetProgramLog(prog, log);
|
||||
std::cerr << "Compilation failed:\n" << log << std::endl;
|
||||
delete[] log;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
// Get PTX from the program
|
||||
size_t ptx_size;
|
||||
nvrtcGetPTXSize(prog, &ptx_size);
|
||||
char* ptx = new char[ptx_size];
|
||||
nvrtcGetPTX(prog, ptx);
|
||||
|
||||
// Load PTX into CUDA module
|
||||
CUmodule module;
|
||||
CUfunction kernel;
|
||||
checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
|
||||
checkCUError(cuModuleGetFunction(&kernel, module, "test_geometric_kernel"), "Failed to get kernel function");
|
||||
|
||||
int numElements = 5000;
|
||||
float_type *h_in1, *h_in2, *h_out;
|
||||
float_type *d_in1, *d_in2, *d_out;
|
||||
|
||||
// Allocate memory on the host
|
||||
h_in1 = new float_type[numElements];
|
||||
h_in2 = new float_type[numElements];
|
||||
h_out = new float_type[numElements];
|
||||
|
||||
// Initialize input arrays
|
||||
std::mt19937_64 rng(42);
|
||||
std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
|
||||
for (int i = 0; i < numElements; ++i)
|
||||
{
|
||||
h_in1[i] = static_cast<float_type>(dist(rng));
|
||||
h_in2[i] = static_cast<float_type>(dist(rng));
|
||||
}
|
||||
|
||||
checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
|
||||
checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
|
||||
checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
|
||||
|
||||
checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
|
||||
checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
|
||||
|
||||
int blockSize = 256;
|
||||
int numBlocks = (numElements + blockSize - 1) / blockSize;
|
||||
void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
|
||||
checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
|
||||
|
||||
checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
|
||||
|
||||
// Verify Result
|
||||
for (int i = 0; i < numElements; ++i)
|
||||
{
|
||||
auto res = pdf(boost::math::geometric_distribution<float_type>(0.5), h_in1[i]);
|
||||
|
||||
if (boost::math::isfinite(res))
|
||||
{
|
||||
if (boost::math::epsilon_difference(res, h_out[i]) > 300)
|
||||
{
|
||||
std::cout << "error at line: " << i
|
||||
<< "\nParallel: " << h_out[i]
|
||||
<< "\n Serial: " << res
|
||||
<< "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cudaFree(d_in1);
|
||||
cudaFree(d_in2);
|
||||
cudaFree(d_out);
|
||||
delete[] h_in1;
|
||||
delete[] h_in2;
|
||||
delete[] h_out;
|
||||
|
||||
nvrtcDestroyProgram(&prog);
|
||||
delete[] ptx;
|
||||
|
||||
cuCtxDestroy(context);
|
||||
|
||||
std::cout << "Kernel executed successfully." << std::endl;
|
||||
return 0;
|
||||
}
|
||||
catch(const std::exception& e)
|
||||
{
|
||||
std::cerr << "Stopped with exception: " << e.what() << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
}
|
||||
109
test/test_geometric_dist_quan_double.cu
Normal file
109
test/test_geometric_dist_quan_double.cu
Normal file
@@ -0,0 +1,109 @@
|
||||
// Copyright John Maddock 2016.
|
||||
// Use, modification and distribution are subject to the
|
||||
// Boost Software License, Version 1.0. (See accompanying file
|
||||
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
|
||||
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <vector>
|
||||
#include <boost/math/distributions/geometric.hpp>
|
||||
#include <boost/math/special_functions/relative_difference.hpp>
|
||||
#include <boost/random/mersenne_twister.hpp>
|
||||
#include <boost/random/uniform_real_distribution.hpp>
|
||||
#include "cuda_managed_ptr.hpp"
|
||||
#include "stopwatch.hpp"
|
||||
|
||||
// For the CUDA runtime routines (prefixed with "cuda_")
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
typedef double float_type;
|
||||
|
||||
/**
|
||||
* CUDA Kernel Device code
|
||||
*
|
||||
*/
|
||||
__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
|
||||
{
|
||||
using std::cos;
|
||||
int i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
|
||||
if (i < numElements)
|
||||
{
|
||||
out[i] = quantile(boost::math::geometric_distribution<float_type>(0.5), in1[i]);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Host main routine
|
||||
*/
|
||||
int main(void)
|
||||
{
|
||||
try{
|
||||
|
||||
// Error code to check return values for CUDA calls
|
||||
cudaError_t err = cudaSuccess;
|
||||
|
||||
// Print the vector length to be used, and compute its size
|
||||
int numElements = 50000;
|
||||
std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
|
||||
|
||||
// Allocate the managed input vector A
|
||||
cuda_managed_ptr<float_type> input_vector1(numElements);
|
||||
|
||||
// Allocate the managed output vector C
|
||||
cuda_managed_ptr<float_type> output_vector(numElements);
|
||||
|
||||
boost::random::mt19937 gen;
|
||||
boost::random::uniform_real_distribution<float_type> dist;
|
||||
// Initialize the input vectors
|
||||
for (int i = 0; i < numElements; ++i)
|
||||
{
|
||||
input_vector1[i] = dist(gen);
|
||||
}
|
||||
|
||||
// Launch the Vector Add CUDA Kernel
|
||||
int threadsPerBlock = 256;
|
||||
int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
|
||||
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
|
||||
|
||||
watch w;
|
||||
cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
|
||||
cudaDeviceSynchronize();
|
||||
std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
|
||||
|
||||
err = cudaGetLastError();
|
||||
if (err != cudaSuccess)
|
||||
{
|
||||
std::cerr << "Failed to launch geometric distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
// Verify that the result vector is correct
|
||||
std::vector<float_type> results;
|
||||
results.reserve(numElements);
|
||||
w.reset();
|
||||
for(int i = 0; i < numElements; ++i)
|
||||
results.push_back(quantile(boost::math::geometric_distribution<float_type>(0.5), input_vector1[i]));
|
||||
double t = w.elapsed();
|
||||
// check the results
|
||||
for(int i = 0; i < numElements; ++i)
|
||||
{
|
||||
if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
|
||||
{
|
||||
std::cerr << "Result verification failed at element " << i << "!" << std::endl;
|
||||
std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
|
||||
std::cout << "Done\n";
|
||||
}
|
||||
catch(const std::exception& e)
|
||||
{
|
||||
std::cerr << "Stopped with exception: " << e.what() << std::endl;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
109
test/test_geometric_dist_quan_float.cu
Normal file
109
test/test_geometric_dist_quan_float.cu
Normal file
@@ -0,0 +1,109 @@
|
||||
// Copyright John Maddock 2016.
|
||||
// Use, modification and distribution are subject to the
|
||||
// Boost Software License, Version 1.0. (See accompanying file
|
||||
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
|
||||
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <vector>
|
||||
#include <boost/math/distributions/geometric.hpp>
|
||||
#include <boost/math/special_functions/relative_difference.hpp>
|
||||
#include <boost/random/mersenne_twister.hpp>
|
||||
#include <boost/random/uniform_real_distribution.hpp>
|
||||
#include "cuda_managed_ptr.hpp"
|
||||
#include "stopwatch.hpp"
|
||||
|
||||
// For the CUDA runtime routines (prefixed with "cuda_")
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
typedef float float_type;
|
||||
|
||||
/**
|
||||
* CUDA Kernel Device code
|
||||
*
|
||||
*/
|
||||
__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
|
||||
{
|
||||
using std::cos;
|
||||
int i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
|
||||
if (i < numElements)
|
||||
{
|
||||
out[i] = quantile(boost::math::geometric_distribution<float_type>(0.5), in1[i]);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Host main routine
|
||||
*/
|
||||
int main(void)
|
||||
{
|
||||
try{
|
||||
|
||||
// Error code to check return values for CUDA calls
|
||||
cudaError_t err = cudaSuccess;
|
||||
|
||||
// Print the vector length to be used, and compute its size
|
||||
int numElements = 50000;
|
||||
std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
|
||||
|
||||
// Allocate the managed input vector A
|
||||
cuda_managed_ptr<float_type> input_vector1(numElements);
|
||||
|
||||
// Allocate the managed output vector C
|
||||
cuda_managed_ptr<float_type> output_vector(numElements);
|
||||
|
||||
boost::random::mt19937 gen;
|
||||
boost::random::uniform_real_distribution<float_type> dist;
|
||||
// Initialize the input vectors
|
||||
for (int i = 0; i < numElements; ++i)
|
||||
{
|
||||
input_vector1[i] = dist(gen);
|
||||
}
|
||||
|
||||
// Launch the Vector Add CUDA Kernel
|
||||
int threadsPerBlock = 256;
|
||||
int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
|
||||
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
|
||||
|
||||
watch w;
|
||||
cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
|
||||
cudaDeviceSynchronize();
|
||||
std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
|
||||
|
||||
err = cudaGetLastError();
|
||||
if (err != cudaSuccess)
|
||||
{
|
||||
std::cerr << "Failed to launch geometric distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
// Verify that the result vector is correct
|
||||
std::vector<float_type> results;
|
||||
results.reserve(numElements);
|
||||
w.reset();
|
||||
for(int i = 0; i < numElements; ++i)
|
||||
results.push_back(quantile(boost::math::geometric_distribution<float_type>(0.5), input_vector1[i]));
|
||||
double t = w.elapsed();
|
||||
// check the results
|
||||
for(int i = 0; i < numElements; ++i)
|
||||
{
|
||||
if (boost::math::epsilon_difference(output_vector[i], results[i]) > 1000.0)
|
||||
{
|
||||
std::cerr << "Result verification failed at element " << i << "!" << std::endl;
|
||||
std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
|
||||
std::cout << "Done\n";
|
||||
}
|
||||
catch(const std::exception& e)
|
||||
{
|
||||
std::cerr << "Stopped with exception: " << e.what() << std::endl;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
191
test/test_geometric_dist_quan_nvrtc_double.cpp
Normal file
191
test/test_geometric_dist_quan_nvrtc_double.cpp
Normal file
@@ -0,0 +1,191 @@
|
||||
// Copyright John Maddock 2016.
|
||||
// Copyright Matt Borland 2024.
|
||||
// Use, modification and distribution are subject to the
|
||||
// Boost Software License, Version 1.0. (See accompanying file
|
||||
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
|
||||
#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
|
||||
|
||||
// Must be included first
|
||||
#include <nvrtc.h>
|
||||
#include <cuda.h>
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <vector>
|
||||
#include <random>
|
||||
#include <exception>
|
||||
#include <boost/math/distributions/geometric.hpp>
|
||||
#include <boost/math/special_functions/fpclassify.hpp>
|
||||
#include <boost/math/special_functions/relative_difference.hpp>
|
||||
|
||||
typedef double float_type;
|
||||
|
||||
const char* cuda_kernel = R"(
|
||||
typedef double float_type;
|
||||
#include <cuda/std/type_traits>
|
||||
#include <boost/math/distributions/geometric.hpp>
|
||||
extern "C" __global__
|
||||
void test_geometric_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
|
||||
{
|
||||
int i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (i < numElements)
|
||||
{
|
||||
out[i] = quantile(boost::math::geometric_distribution<float_type>(0.5), in1[i]);
|
||||
}
|
||||
}
|
||||
)";
|
||||
|
||||
void checkCUDAError(cudaError_t result, const char* msg)
|
||||
{
|
||||
if (result != cudaSuccess)
|
||||
{
|
||||
std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
void checkCUError(CUresult result, const char* msg)
|
||||
{
|
||||
if (result != CUDA_SUCCESS)
|
||||
{
|
||||
const char* errorStr;
|
||||
cuGetErrorString(result, &errorStr);
|
||||
std::cerr << msg << ": " << errorStr << std::endl;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
void checkNVRTCError(nvrtcResult result, const char* msg)
|
||||
{
|
||||
if (result != NVRTC_SUCCESS)
|
||||
{
|
||||
std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
try
|
||||
{
|
||||
// Initialize CUDA driver API
|
||||
checkCUError(cuInit(0), "Failed to initialize CUDA");
|
||||
|
||||
// Create CUDA context
|
||||
CUcontext context;
|
||||
CUdevice device;
|
||||
checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
|
||||
checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
|
||||
|
||||
nvrtcProgram prog;
|
||||
nvrtcResult res;
|
||||
|
||||
res = nvrtcCreateProgram(&prog, cuda_kernel, "test_geometric_kernel.cu", 0, nullptr, nullptr);
|
||||
checkNVRTCError(res, "Failed to create NVRTC program");
|
||||
|
||||
nvrtcAddNameExpression(prog, "test_geometric_kernel");
|
||||
|
||||
#ifdef BOOST_MATH_NVRTC_CI_RUN
|
||||
const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
|
||||
#else
|
||||
const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
|
||||
#endif
|
||||
|
||||
// Compile the program
|
||||
res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
|
||||
if (res != NVRTC_SUCCESS)
|
||||
{
|
||||
size_t log_size;
|
||||
nvrtcGetProgramLogSize(prog, &log_size);
|
||||
char* log = new char[log_size];
|
||||
nvrtcGetProgramLog(prog, log);
|
||||
std::cerr << "Compilation failed:\n" << log << std::endl;
|
||||
delete[] log;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
// Get PTX from the program
|
||||
size_t ptx_size;
|
||||
nvrtcGetPTXSize(prog, &ptx_size);
|
||||
char* ptx = new char[ptx_size];
|
||||
nvrtcGetPTX(prog, ptx);
|
||||
|
||||
// Load PTX into CUDA module
|
||||
CUmodule module;
|
||||
CUfunction kernel;
|
||||
checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
|
||||
checkCUError(cuModuleGetFunction(&kernel, module, "test_geometric_kernel"), "Failed to get kernel function");
|
||||
|
||||
int numElements = 5000;
|
||||
float_type *h_in1, *h_in2, *h_out;
|
||||
float_type *d_in1, *d_in2, *d_out;
|
||||
|
||||
// Allocate memory on the host
|
||||
h_in1 = new float_type[numElements];
|
||||
h_in2 = new float_type[numElements];
|
||||
h_out = new float_type[numElements];
|
||||
|
||||
// Initialize input arrays
|
||||
std::mt19937_64 rng(42);
|
||||
std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
|
||||
for (int i = 0; i < numElements; ++i)
|
||||
{
|
||||
h_in1[i] = static_cast<float_type>(dist(rng));
|
||||
h_in2[i] = static_cast<float_type>(dist(rng));
|
||||
}
|
||||
|
||||
checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
|
||||
checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
|
||||
checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
|
||||
|
||||
checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
|
||||
checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
|
||||
|
||||
int blockSize = 256;
|
||||
int numBlocks = (numElements + blockSize - 1) / blockSize;
|
||||
void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
|
||||
checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
|
||||
|
||||
checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
|
||||
|
||||
// Verify Result
|
||||
for (int i = 0; i < numElements; ++i)
|
||||
{
|
||||
auto res = quantile(boost::math::geometric_distribution<float_type>(0.5), h_in1[i]);
|
||||
|
||||
if (boost::math::isfinite(res))
|
||||
{
|
||||
if (boost::math::epsilon_difference(res, h_out[i]) > 300)
|
||||
{
|
||||
std::cout << "error at line: " << i
|
||||
<< "\nParallel: " << h_out[i]
|
||||
<< "\n Serial: " << res
|
||||
<< "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cudaFree(d_in1);
|
||||
cudaFree(d_in2);
|
||||
cudaFree(d_out);
|
||||
delete[] h_in1;
|
||||
delete[] h_in2;
|
||||
delete[] h_out;
|
||||
|
||||
nvrtcDestroyProgram(&prog);
|
||||
delete[] ptx;
|
||||
|
||||
cuCtxDestroy(context);
|
||||
|
||||
std::cout << "Kernel executed successfully." << std::endl;
|
||||
return 0;
|
||||
}
|
||||
catch(const std::exception& e)
|
||||
{
|
||||
std::cerr << "Stopped with exception: " << e.what() << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
}
|
||||
191
test/test_geometric_dist_quan_nvrtc_float.cpp
Normal file
191
test/test_geometric_dist_quan_nvrtc_float.cpp
Normal file
@@ -0,0 +1,191 @@
|
||||
// Copyright John Maddock 2016.
|
||||
// Copyright Matt Borland 2024.
|
||||
// Use, modification and distribution are subject to the
|
||||
// Boost Software License, Version 1.0. (See accompanying file
|
||||
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
|
||||
#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
|
||||
|
||||
// Must be included first
|
||||
#include <nvrtc.h>
|
||||
#include <cuda.h>
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <vector>
|
||||
#include <random>
|
||||
#include <exception>
|
||||
#include <boost/math/distributions/geometric.hpp>
|
||||
#include <boost/math/special_functions/fpclassify.hpp>
|
||||
#include <boost/math/special_functions/relative_difference.hpp>
|
||||
|
||||
typedef float float_type;
|
||||
|
||||
const char* cuda_kernel = R"(
|
||||
typedef float float_type;
|
||||
#include <cuda/std/type_traits>
|
||||
#include <boost/math/distributions/geometric.hpp>
|
||||
extern "C" __global__
|
||||
void test_geometric_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
|
||||
{
|
||||
int i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (i < numElements)
|
||||
{
|
||||
out[i] = quantile(boost::math::geometric_distribution<float_type>(0.5), in1[i]);
|
||||
}
|
||||
}
|
||||
)";
|
||||
|
||||
void checkCUDAError(cudaError_t result, const char* msg)
|
||||
{
|
||||
if (result != cudaSuccess)
|
||||
{
|
||||
std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
void checkCUError(CUresult result, const char* msg)
|
||||
{
|
||||
if (result != CUDA_SUCCESS)
|
||||
{
|
||||
const char* errorStr;
|
||||
cuGetErrorString(result, &errorStr);
|
||||
std::cerr << msg << ": " << errorStr << std::endl;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
void checkNVRTCError(nvrtcResult result, const char* msg)
|
||||
{
|
||||
if (result != NVRTC_SUCCESS)
|
||||
{
|
||||
std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
try
|
||||
{
|
||||
// Initialize CUDA driver API
|
||||
checkCUError(cuInit(0), "Failed to initialize CUDA");
|
||||
|
||||
// Create CUDA context
|
||||
CUcontext context;
|
||||
CUdevice device;
|
||||
checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
|
||||
checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
|
||||
|
||||
nvrtcProgram prog;
|
||||
nvrtcResult res;
|
||||
|
||||
res = nvrtcCreateProgram(&prog, cuda_kernel, "test_geometric_kernel.cu", 0, nullptr, nullptr);
|
||||
checkNVRTCError(res, "Failed to create NVRTC program");
|
||||
|
||||
nvrtcAddNameExpression(prog, "test_geometric_kernel");
|
||||
|
||||
#ifdef BOOST_MATH_NVRTC_CI_RUN
|
||||
const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
|
||||
#else
|
||||
const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
|
||||
#endif
|
||||
|
||||
// Compile the program
|
||||
res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
|
||||
if (res != NVRTC_SUCCESS)
|
||||
{
|
||||
size_t log_size;
|
||||
nvrtcGetProgramLogSize(prog, &log_size);
|
||||
char* log = new char[log_size];
|
||||
nvrtcGetProgramLog(prog, log);
|
||||
std::cerr << "Compilation failed:\n" << log << std::endl;
|
||||
delete[] log;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
// Get PTX from the program
|
||||
size_t ptx_size;
|
||||
nvrtcGetPTXSize(prog, &ptx_size);
|
||||
char* ptx = new char[ptx_size];
|
||||
nvrtcGetPTX(prog, ptx);
|
||||
|
||||
// Load PTX into CUDA module
|
||||
CUmodule module;
|
||||
CUfunction kernel;
|
||||
checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
|
||||
checkCUError(cuModuleGetFunction(&kernel, module, "test_geometric_kernel"), "Failed to get kernel function");
|
||||
|
||||
int numElements = 5000;
|
||||
float_type *h_in1, *h_in2, *h_out;
|
||||
float_type *d_in1, *d_in2, *d_out;
|
||||
|
||||
// Allocate memory on the host
|
||||
h_in1 = new float_type[numElements];
|
||||
h_in2 = new float_type[numElements];
|
||||
h_out = new float_type[numElements];
|
||||
|
||||
// Initialize input arrays
|
||||
std::mt19937_64 rng(42);
|
||||
std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
|
||||
for (int i = 0; i < numElements; ++i)
|
||||
{
|
||||
h_in1[i] = static_cast<float_type>(dist(rng));
|
||||
h_in2[i] = static_cast<float_type>(dist(rng));
|
||||
}
|
||||
|
||||
checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
|
||||
checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
|
||||
checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
|
||||
|
||||
checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
|
||||
checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
|
||||
|
||||
int blockSize = 256;
|
||||
int numBlocks = (numElements + blockSize - 1) / blockSize;
|
||||
void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
|
||||
checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
|
||||
|
||||
checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
|
||||
|
||||
// Verify Result
|
||||
for (int i = 0; i < numElements; ++i)
|
||||
{
|
||||
auto res = quantile(boost::math::geometric_distribution<float_type>(0.5), h_in1[i]);
|
||||
|
||||
if (boost::math::isfinite(res))
|
||||
{
|
||||
if (boost::math::epsilon_difference(res, h_out[i]) > 300)
|
||||
{
|
||||
std::cout << "error at line: " << i
|
||||
<< "\nParallel: " << h_out[i]
|
||||
<< "\n Serial: " << res
|
||||
<< "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cudaFree(d_in1);
|
||||
cudaFree(d_in2);
|
||||
cudaFree(d_out);
|
||||
delete[] h_in1;
|
||||
delete[] h_in2;
|
||||
delete[] h_out;
|
||||
|
||||
nvrtcDestroyProgram(&prog);
|
||||
delete[] ptx;
|
||||
|
||||
cuCtxDestroy(context);
|
||||
|
||||
std::cout << "Kernel executed successfully." << std::endl;
|
||||
return 0;
|
||||
}
|
||||
catch(const std::exception& e)
|
||||
{
|
||||
std::cerr << "Stopped with exception: " << e.what() << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
}
|
||||
@@ -65,7 +65,7 @@ int main(void)
|
||||
}
|
||||
|
||||
// Launch the Vector Add CUDA Kernel
|
||||
int threadsPerBlock = 512;
|
||||
int threadsPerBlock = 256;
|
||||
int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
|
||||
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
|
||||
|
||||
|
||||
@@ -65,7 +65,7 @@ int main(void)
|
||||
}
|
||||
|
||||
// Launch the Vector Add CUDA Kernel
|
||||
int threadsPerBlock = 512;
|
||||
int threadsPerBlock = 256;
|
||||
int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
|
||||
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
|
||||
|
||||
|
||||
@@ -65,7 +65,7 @@ int main(void)
|
||||
}
|
||||
|
||||
// Launch the Vector Add CUDA Kernel
|
||||
int threadsPerBlock = 512;
|
||||
int threadsPerBlock = 256;
|
||||
int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
|
||||
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user