mirror of
https://github.com/boostorg/math.git
synced 2026-02-01 20:42:19 +00:00
Merge pull request #1349 from JacobHass8/log-lower-incomplete-gamma
Implementation of the log of the lower incomplete gamma function
This commit is contained in:
@@ -26,6 +26,12 @@
|
||||
template <class T1, class T2, class ``__Policy``>
|
||||
BOOST_MATH_GPU_ENABLED ``__sf_result`` lgamma_q(T1 a, T2 z, const ``__Policy``&);
|
||||
|
||||
template <class T1, class T2>
|
||||
BOOST_MATH_GPU_ENABLED ``__sf_result`` lgamma_p(T1 a, T2 z);
|
||||
|
||||
template <class T1, class T2, class ``__Policy``>
|
||||
BOOST_MATH_GPU_ENABLED ``__sf_result`` lgamma_p(T1 a, T2 z, const ``__Policy``&);
|
||||
|
||||
template <class T1, class T2>
|
||||
BOOST_MATH_GPU_ENABLED ``__sf_result`` tgamma_lower(T1 a, T2 z);
|
||||
|
||||
@@ -72,6 +78,15 @@ This function changes rapidly from 0 to 1 around the point z == a:
|
||||
|
||||
[graph gamma_p]
|
||||
|
||||
template <class T1, class T2>
|
||||
BOOST_MATH_GPU_ENABLED ``__sf_result`` lgamma_p(T1 a, T2 z);
|
||||
|
||||
template <class T1, class T2, class ``__Policy``>
|
||||
BOOST_MATH_GPU_ENABLED ``__sf_result`` lgamma_p(T1 a, T2 z, const ``__Policy``&);
|
||||
|
||||
Returns the natural log of the normalized lower incomplete gamma function
|
||||
of a and z.
|
||||
|
||||
template <class T1, class T2>
|
||||
BOOST_MATH_GPU_ENABLED ``__sf_result`` gamma_q(T1 a, T2 z);
|
||||
|
||||
|
||||
@@ -1290,7 +1290,6 @@ BOOST_MATH_GPU_ENABLED T incomplete_tgamma_large_x(const T& a, const T& x, const
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
//
|
||||
// Main incomplete gamma entry point, handles all four incomplete gamma's:
|
||||
//
|
||||
@@ -1813,6 +1812,46 @@ BOOST_MATH_GPU_ENABLED T lgamma_incomplete_imp(T a, T x, const Policy& pol)
|
||||
return log(gamma_q(a, x, pol));
|
||||
}
|
||||
|
||||
// Calculate log of incomplete gamma function
|
||||
template <class T, class Policy>
|
||||
BOOST_MATH_GPU_ENABLED T lgamma_incomplete_lower_imp(T a, T x, const Policy& pol)
|
||||
{
|
||||
using namespace boost::math; // temporary until we're in the right namespace
|
||||
|
||||
BOOST_MATH_STD_USING_CORE
|
||||
|
||||
// Check for invalid inputs (a < 0 or x < 0)
|
||||
constexpr auto function = "boost::math::lgamma_p<%1%>(%1%, %1%)";
|
||||
if(a <= 0)
|
||||
return policies::raise_domain_error<T>(function, "Argument a to the incomplete gamma function must be greater than zero (got a=%1%).", a, pol);
|
||||
if(x < 0)
|
||||
return policies::raise_domain_error<T>(function, "Argument x to the incomplete gamma function must be >= 0 (got x=%1%).", x, pol);
|
||||
|
||||
// Taken from conditions on Line 1709. There are also
|
||||
// conditions on Line 1368, but didn't implement that one here.
|
||||
// The second condition ensures floats do not return -inf for small
|
||||
// values of x.
|
||||
if (((a > 4 * x) && (a >= max_factorial<T>::value)) || ((T(-0.4) / log(x) < a) && (x < T(1.0)))){
|
||||
return log(detail::lower_gamma_series(a, x, pol)) - log(a) + a * log(x) - x - lgamma(a, pol);
|
||||
}
|
||||
|
||||
//
|
||||
// Can't do better than taking the log of P, but...
|
||||
//
|
||||
// Figure out whether we need P or Q, since if we calculate P and it's too close to unity
|
||||
// we will lose precision in the result, selection logic here is extracted from gamma_incomplete_imp_final:
|
||||
//
|
||||
bool need_p = false;
|
||||
if ((x < 1.1) && (x >= 0.5) && (x * 0.75f < a))
|
||||
need_p = true;
|
||||
else if ((x < a) && (x >= 1.1))
|
||||
need_p = true;
|
||||
|
||||
if (need_p)
|
||||
return log(gamma_p(a, x, pol));
|
||||
return log1p(-gamma_q(a, x, pol), pol);
|
||||
}
|
||||
|
||||
//
|
||||
// Ratios of two gamma functions:
|
||||
//
|
||||
@@ -2454,6 +2493,29 @@ BOOST_MATH_GPU_ENABLED inline tools::promote_args_t<T1, T2> lgamma_q(T1 a, T2 z)
|
||||
{
|
||||
return lgamma_q(a, z, policies::policy<>());
|
||||
}
|
||||
|
||||
template <class T1, class T2, class Policy>
|
||||
BOOST_MATH_GPU_ENABLED inline tools::promote_args_t<T1, T2> lgamma_p(T1 a, T2 z, const Policy& /* pol */)
|
||||
{
|
||||
typedef tools::promote_args_t<T1, T2> result_type;
|
||||
typedef typename policies::evaluation<result_type, Policy>::type value_type;
|
||||
typedef typename policies::normalise<
|
||||
Policy,
|
||||
policies::promote_float<false>,
|
||||
policies::promote_double<false>,
|
||||
policies::discrete_quantile<>,
|
||||
policies::assert_undefined<> >::type forwarding_policy;
|
||||
|
||||
return policies::checked_narrowing_cast<result_type, forwarding_policy>(
|
||||
detail::lgamma_incomplete_lower_imp(static_cast<value_type>(a),
|
||||
static_cast<value_type>(z), forwarding_policy()), "lgamma_p<%1%>(%1%, %1%)");
|
||||
}
|
||||
|
||||
template <class T1, class T2>
|
||||
BOOST_MATH_GPU_ENABLED inline tools::promote_args_t<T1, T2> lgamma_p(T1 a, T2 z)
|
||||
{
|
||||
return lgamma_p(a, z, policies::policy<>());
|
||||
}
|
||||
//
|
||||
// Regularised lower incomplete gamma:
|
||||
//
|
||||
|
||||
@@ -567,6 +567,12 @@ namespace boost
|
||||
template <class RT1, class RT2, class Policy>
|
||||
BOOST_MATH_GPU_ENABLED tools::promote_args_t<RT1, RT2> lgamma_q(RT1 a, RT2 z, const Policy&);
|
||||
|
||||
template <class RT1, class RT2>
|
||||
BOOST_MATH_GPU_ENABLED tools::promote_args_t<RT1, RT2> lgamma_p(RT1 a, RT2 z);
|
||||
|
||||
template <class RT1, class RT2, class Policy>
|
||||
BOOST_MATH_GPU_ENABLED tools::promote_args_t<RT1, RT2> lgamma_p(RT1 a, RT2 z, const Policy&);
|
||||
|
||||
template <class RT1, class RT2>
|
||||
BOOST_MATH_GPU_ENABLED tools::promote_args_t<RT1, RT2> gamma_p(RT1 a, RT2 z);
|
||||
|
||||
@@ -1525,6 +1531,9 @@ namespace boost
|
||||
\
|
||||
template <class RT1, class RT2>\
|
||||
BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t<RT1, RT2> lgamma_q(RT1 a, RT2 z){ return boost::math::lgamma_q(a, z, Policy()); }\
|
||||
\
|
||||
template <class RT1, class RT2>\
|
||||
BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t<RT1, RT2> lgamma_p(RT1 a, RT2 z){ return boost::math::lgamma_p(a, z, Policy()); }\
|
||||
\
|
||||
template <class RT1, class RT2>\
|
||||
BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t<RT1, RT2> gamma_p(RT1 a, RT2 z){ return boost::math::gamma_p(a, z, Policy()); }\
|
||||
|
||||
@@ -264,6 +264,7 @@ void instantiate(RealType)
|
||||
boost::math::gamma_p(v1, v2);
|
||||
boost::math::gamma_q(v1, v2);
|
||||
boost::math::lgamma_q(v1, v2);
|
||||
boost::math::lgamma_p(v1, v2);
|
||||
boost::math::gamma_p_inv(v1, v2);
|
||||
boost::math::gamma_q_inv(v1, v2);
|
||||
boost::math::gamma_p_inva(v1, v2);
|
||||
@@ -544,6 +545,7 @@ void instantiate(RealType)
|
||||
boost::math::gamma_p(v1 * 1, v2 + 0);
|
||||
boost::math::gamma_q(v1 * 1, v2 + 0);
|
||||
boost::math::lgamma_q(v1 * 1, v2 + 0);
|
||||
boost::math::lgamma_p(v1 * 1, v2 + 0);
|
||||
boost::math::gamma_p_inv(v1 * 1, v2 + 0);
|
||||
boost::math::gamma_q_inv(v1 * 1, v2 + 0);
|
||||
boost::math::gamma_p_inva(v1 * 1, v2 + 0);
|
||||
@@ -796,6 +798,7 @@ void instantiate(RealType)
|
||||
boost::math::gamma_p(v1, v2, pol);
|
||||
boost::math::gamma_q(v1, v2, pol);
|
||||
boost::math::lgamma_q(v1, v2, pol);
|
||||
boost::math::lgamma_p(v1, v2, pol);
|
||||
boost::math::gamma_p_inv(v1, v2, pol);
|
||||
boost::math::gamma_q_inv(v1, v2, pol);
|
||||
boost::math::gamma_p_inva(v1, v2, pol);
|
||||
@@ -1074,6 +1077,7 @@ void instantiate(RealType)
|
||||
test::gamma_p(v1, v2);
|
||||
test::gamma_q(v1, v2);
|
||||
test::lgamma_q(v1, v2);
|
||||
test::lgamma_p(v1, v2);
|
||||
test::gamma_p_inv(v1, v2);
|
||||
test::gamma_q_inv(v1, v2);
|
||||
test::gamma_p_inva(v1, v2);
|
||||
@@ -1356,6 +1360,7 @@ void instantiate_mixed(RealType)
|
||||
boost::math::gamma_p(fr, lr);
|
||||
boost::math::gamma_q(i, s);
|
||||
boost::math::lgamma_q(i, s);
|
||||
boost::math::lgamma_p(i, s);
|
||||
boost::math::gamma_q(fr, lr);
|
||||
boost::math::gamma_p_inv(i, fr);
|
||||
boost::math::gamma_q_inv(s, fr);
|
||||
@@ -1572,6 +1577,7 @@ void instantiate_mixed(RealType)
|
||||
boost::math::gamma_p(fr, lr, pol);
|
||||
boost::math::gamma_q(i, s, pol);
|
||||
boost::math::lgamma_q(i, s, pol);
|
||||
boost::math::lgamma_p(i, s, pol);
|
||||
boost::math::gamma_q(fr, lr, pol);
|
||||
boost::math::gamma_p_inv(i, fr, pol);
|
||||
boost::math::gamma_q_inv(s, fr, pol);
|
||||
@@ -1784,8 +1790,10 @@ void instantiate_mixed(RealType)
|
||||
test::gamma_p(fr, lr);
|
||||
test::gamma_q(i, s);
|
||||
test::lgamma_q(i, s);
|
||||
test::lgamma_p(i, s);
|
||||
test::gamma_q(fr, lr);
|
||||
test::lgamma_q(fr, lr);
|
||||
test::lgamma_p(fr, lr);
|
||||
test::gamma_p_inv(i, fr);
|
||||
test::gamma_q_inv(s, fr);
|
||||
test::gamma_p_inva(i, lr);
|
||||
|
||||
@@ -45,6 +45,12 @@ void compile_and_link_test()
|
||||
check_result<long double>(boost::math::lgamma_q<long double>(l, l));
|
||||
#endif
|
||||
|
||||
check_result<float>(boost::math::lgamma_p<float>(f, f));
|
||||
check_result<double>(boost::math::lgamma_p<double>(d, d));
|
||||
#ifndef BOOST_MATH_NO_LONG_DOUBLE_MATH_FUNCTIONS
|
||||
check_result<long double>(boost::math::lgamma_p<long double>(l, l));
|
||||
#endif
|
||||
|
||||
check_result<float>(boost::math::gamma_p_inv<float>(f, f));
|
||||
check_result<double>(boost::math::gamma_p_inv<double>(d, d));
|
||||
#ifndef BOOST_MATH_NO_LONG_DOUBLE_MATH_FUNCTIONS
|
||||
|
||||
@@ -371,6 +371,8 @@ run test_gamma_p_inv_double.cu ;
|
||||
run test_gamma_p_inv_float.cu ;
|
||||
run test_lgamma_q_double.cu ;
|
||||
run test_lgamma_q_float.cu ;
|
||||
run test_lgamma_p_double.cu ;
|
||||
run test_lgamma_p_float.cu ;
|
||||
|
||||
run test_log1p_double.cu ;
|
||||
run test_log1p_float.cu ;
|
||||
|
||||
@@ -263,7 +263,19 @@ void test_spots(T, const char* name = nullptr)
|
||||
BOOST_CHECK_CLOSE(::boost::math::lgamma_q(static_cast<T>(501.25), static_cast<T>(2000)), static_cast<T>(-810.2453406781655559126505101822969531699112391075198076300675402L), tolerance);
|
||||
BOOST_CHECK_CLOSE(::boost::math::lgamma_q(static_cast<T>(20), static_cast<T>(0.25)), static_cast<T>(-2.946458104491857816330873290969917497748067639461638294404e-31L), tolerance);
|
||||
BOOST_CHECK_CLOSE(::boost::math::lgamma_q(static_cast<T>(40), static_cast<T>(0.75)), static_cast<T>(-5.930604927955460343652485525435087275997461623988991819824e-54L), tolerance);
|
||||
#if defined(__CYGWIN__) || defined(__MINGW32__)
|
||||
|
||||
//
|
||||
// Check that lgamma_q returns correct values with spot values calculated via wolframalpha log(P[a, x])
|
||||
// This is calculated using: N[Log[GammaRegularized[a,0, z]],64]
|
||||
//
|
||||
BOOST_CHECK_CLOSE(::boost::math::lgamma_p(static_cast<T>(500), static_cast<T>(10)), static_cast<T>(-1470.017750815998931281954666549641187420649099004671023115157832L), tolerance);
|
||||
BOOST_CHECK_CLOSE(::boost::math::lgamma_p(static_cast<T>(100), static_cast<T>(0.25)), static_cast<T>(-502.6163334118978895536207514636026023439623265152862757105793000L), tolerance);
|
||||
BOOST_CHECK_CLOSE(::boost::math::lgamma_p(static_cast<T>(20), static_cast<T>(10.25)), static_cast<T>(-5.404004887981642339930593767572610169901594898478031307722239712L), tolerance);
|
||||
// Small "a" produce larger errors
|
||||
BOOST_CHECK_CLOSE(::boost::math::lgamma_p(static_cast<T>(0.25), static_cast<T>(100)), static_cast<T>(-3.220751038854414755009496530271388459559061551701603447517040280e-46L), tolerance);
|
||||
BOOST_CHECK_CLOSE(::boost::math::lgamma_p(static_cast<T>(0.25), static_cast<T>(10)), static_cast<T>(-2.083032578160285760530530498275075010777428544413918699832758176e-6L), tolerance);
|
||||
|
||||
#if defined(__CYGWIN__) || defined(__MINGW32__)
|
||||
T gcc_win_mul = 2;
|
||||
#else
|
||||
T gcc_win_mul = 1;
|
||||
@@ -287,6 +299,24 @@ void test_spots(T, const char* name = nullptr)
|
||||
BOOST_CHECK_CLOSE(::boost::math::lgamma_q(static_cast<T>(1200), static_cast<T>(1250.25)), static_cast<T>(-2.591934862117586205519309712218581885256650074210410262843591453L), tolerance * ((std::numeric_limits<T>::max_digits10 >= 36 || std::is_same<T, boost::math::concepts::real_concept>::value) ? 750 : (std::is_same<T, float>::value ? 1 : 50))); // Test fails on ARM64 and s390x long doubles and real_concept types unless tolerance is adjusted
|
||||
BOOST_CHECK_CLOSE(::boost::math::lgamma_q(static_cast<T>(2200), static_cast<T>(2249.75)), static_cast<T>(-1.933779894897391651410597618307863427927461116308937004149240320L), tolerance * (std::is_floating_point<T>::value ? 1 : 10));
|
||||
BOOST_CHECK_CLOSE(::boost::math::lgamma_q(static_cast<T>(2200), static_cast<T>(2250.25)), static_cast<T>(-1.950346484067948344620463026377077515919992808640737320057812268L), tolerance * (std::is_same<T, float>::value ? 1 : (std::is_floating_point<T>::value ? 100 : 200)));
|
||||
|
||||
// Long double and real_concept types need increased precision
|
||||
T real_concept_tol = 1;
|
||||
if (std::is_same<T, boost::math::concepts::real_concept>::value || std::is_same<T, long double>::value){
|
||||
real_concept_tol = 3;
|
||||
}
|
||||
// Pair of tests that bisect the crossover condition in our code at double and then quad precision
|
||||
// Oddly, the crossover condition is smaller for quad precision. This is because max_factorial is 100
|
||||
// for boost::multiprecision::cpp_bin_float_quad and 170 for doubles.
|
||||
BOOST_CHECK_CLOSE(::boost::math::lgamma_p(static_cast<T>(169.75), static_cast<T>(0.75)), static_cast<T>(-754.8681912874632573100058312311927462406154378562940316233389406L), tolerance * real_concept_tol);
|
||||
BOOST_CHECK_CLOSE(::boost::math::lgamma_p(static_cast<T>(170.25), static_cast<T>(0.75)), static_cast<T>(-757.5814133895304434271729579978676692688834086380018151200693572L), tolerance * real_concept_tol);
|
||||
BOOST_CHECK_CLOSE(::boost::math::lgamma_p(static_cast<T>(99.75), static_cast<T>(0.75)), static_cast<T>(-392.0259615581237826290999388631292473247947826682978959914359465L), tolerance * real_concept_tol);
|
||||
BOOST_CHECK_CLOSE(::boost::math::lgamma_p(static_cast<T>(100.25), static_cast<T>(0.75)), static_cast<T>(-394.4749200332583219473980963811639065003421270272773619742710832L), tolerance * real_concept_tol);
|
||||
|
||||
// Check large a, x values. Precision just isn't great here.
|
||||
BOOST_CHECK_CLOSE(::boost::math::lgamma_p(static_cast<T>(1450.25), static_cast<T>(1500.75)), static_cast<T>(-0.09812447528127799786140178403478691390753413399549580160096975713L), tolerance * (std::is_same<T, boost::math::concepts::real_concept>::value ? 16 : 1));
|
||||
BOOST_CHECK_CLOSE(::boost::math::lgamma_p(static_cast<T>(2000), static_cast<T>(1900)), static_cast<T>(-4.448523733381445722945397105917814000790587922314824687065050805L), tolerance * gcc_win_mul * (std::is_same<T, boost::math::concepts::real_concept>::value ? 8 : 1));
|
||||
|
||||
//
|
||||
// Coverage:
|
||||
//
|
||||
@@ -302,6 +332,10 @@ void test_spots(T, const char* name = nullptr)
|
||||
BOOST_CHECK_THROW(boost::math::lgamma_q(static_cast<T>(1), static_cast<T>(-2)), std::domain_error);
|
||||
BOOST_CHECK_THROW(boost::math::lgamma_q(static_cast<T>(0), static_cast<T>(2)), std::domain_error);
|
||||
|
||||
BOOST_CHECK_THROW(boost::math::lgamma_p(static_cast<T>(-1), static_cast<T>(2)), std::domain_error);
|
||||
BOOST_CHECK_THROW(boost::math::lgamma_p(static_cast<T>(1), static_cast<T>(-2)), std::domain_error);
|
||||
BOOST_CHECK_THROW(boost::math::lgamma_p(static_cast<T>(0), static_cast<T>(2)), std::domain_error);
|
||||
|
||||
BOOST_CHECK_THROW(boost::math::gamma_p_derivative(static_cast<T>(-1), static_cast<T>(2)), std::domain_error);
|
||||
BOOST_CHECK_THROW(boost::math::gamma_p_derivative(static_cast<T>(1), static_cast<T>(-2)), std::domain_error);
|
||||
BOOST_CHECK_THROW(boost::math::gamma_p_derivative(static_cast<T>(0), static_cast<T>(2)), std::domain_error);
|
||||
@@ -317,6 +351,10 @@ void test_spots(T, const char* name = nullptr)
|
||||
BOOST_CHECK((boost::math::isnan)(boost::math::lgamma_q(static_cast<T>(1), static_cast<T>(-2))));
|
||||
BOOST_CHECK((boost::math::isnan)(boost::math::lgamma_q(static_cast<T>(0), static_cast<T>(2))));
|
||||
|
||||
BOOST_CHECK((boost::math::isnan)(boost::math::lgamma_p(static_cast<T>(-1), static_cast<T>(2))));
|
||||
BOOST_CHECK((boost::math::isnan)(boost::math::lgamma_p(static_cast<T>(1), static_cast<T>(-2))));
|
||||
BOOST_CHECK((boost::math::isnan)(boost::math::lgamma_p(static_cast<T>(0), static_cast<T>(2))));
|
||||
|
||||
BOOST_CHECK((boost::math::isnan)(boost::math::gamma_p_derivative(static_cast<T>(-1), static_cast<T>(2))));
|
||||
BOOST_CHECK((boost::math::isnan)(boost::math::gamma_p_derivative(static_cast<T>(1), static_cast<T>(-2))));
|
||||
BOOST_CHECK((boost::math::isnan)(boost::math::gamma_p_derivative(static_cast<T>(0), static_cast<T>(2))));
|
||||
|
||||
102
test/test_lgamma_p_double.cu
Normal file
102
test/test_lgamma_p_double.cu
Normal file
@@ -0,0 +1,102 @@
|
||||
|
||||
// Copyright John Maddock 2016.
|
||||
// Copyright Matt Borland 2024.
|
||||
// Use, modification and distribution are subject to the
|
||||
// Boost Software License, Version 1.0. (See accompanying file
|
||||
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
|
||||
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <vector>
|
||||
#include <boost/math/special_functions.hpp>
|
||||
#include "cuda_managed_ptr.hpp"
|
||||
#include "stopwatch.hpp"
|
||||
|
||||
// For the CUDA runtime routines (prefixed with "cuda_")
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
typedef double float_type;
|
||||
|
||||
/**
|
||||
* CUDA Kernel Device code
|
||||
*
|
||||
*/
|
||||
__global__ void cuda_test(const float_type *in, float_type *out, int numElements)
|
||||
{
|
||||
using std::cos;
|
||||
int i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
|
||||
if (i < numElements)
|
||||
{
|
||||
out[i] = boost::math::lgamma_p(in[i], in[i]);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Host main routine
|
||||
*/
|
||||
int main(void)
|
||||
{
|
||||
// Error code to check return values for CUDA calls
|
||||
cudaError_t err = cudaSuccess;
|
||||
|
||||
// Print the vector length to be used, and compute its size
|
||||
int numElements = 50000;
|
||||
std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
|
||||
|
||||
// Allocate the managed input vector A
|
||||
cuda_managed_ptr<float_type> input_vector(numElements);
|
||||
|
||||
// Allocate the managed output vector C
|
||||
cuda_managed_ptr<float_type> output_vector(numElements);
|
||||
|
||||
// Initialize the input vectors
|
||||
for (int i = 0; i < numElements; ++i)
|
||||
{
|
||||
input_vector[i] = rand()/(float_type)RAND_MAX;
|
||||
}
|
||||
|
||||
// Launch the Vector Add CUDA Kernel
|
||||
int threadsPerBlock = 1024;
|
||||
int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
|
||||
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
|
||||
|
||||
watch w;
|
||||
|
||||
cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_vector.get(), numElements);
|
||||
cudaDeviceSynchronize();
|
||||
|
||||
std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
|
||||
|
||||
err = cudaGetLastError();
|
||||
|
||||
if (err != cudaSuccess)
|
||||
{
|
||||
std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
// Verify that the result vector is correct
|
||||
std::vector<float_type> results;
|
||||
results.reserve(numElements);
|
||||
w.reset();
|
||||
for(int i = 0; i < numElements; ++i)
|
||||
results.push_back(boost::math::lgamma_p(input_vector[i], input_vector[i]));
|
||||
double t = w.elapsed();
|
||||
// check the results
|
||||
for(int i = 0; i < numElements; ++i)
|
||||
{
|
||||
if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10)
|
||||
{
|
||||
std::cerr << "Result verification failed at element " << i << "!" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
|
||||
std::cout << "Done\n";
|
||||
|
||||
return 0;
|
||||
}
|
||||
102
test/test_lgamma_p_float.cu
Normal file
102
test/test_lgamma_p_float.cu
Normal file
@@ -0,0 +1,102 @@
|
||||
|
||||
// Copyright John Maddock 2016.
|
||||
// Copyright Matt Borland 2024.
|
||||
// Use, modification and distribution are subject to the
|
||||
// Boost Software License, Version 1.0. (See accompanying file
|
||||
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
|
||||
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <vector>
|
||||
#include <boost/math/special_functions.hpp>
|
||||
#include "cuda_managed_ptr.hpp"
|
||||
#include "stopwatch.hpp"
|
||||
|
||||
// For the CUDA runtime routines (prefixed with "cuda_")
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
typedef float float_type;
|
||||
|
||||
/**
|
||||
* CUDA Kernel Device code
|
||||
*
|
||||
*/
|
||||
__global__ void cuda_test(const float_type *in, float_type *out, int numElements)
|
||||
{
|
||||
using std::cos;
|
||||
int i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
|
||||
if (i < numElements)
|
||||
{
|
||||
out[i] = boost::math::lgamma_p(in[i], in[i]);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Host main routine
|
||||
*/
|
||||
int main(void)
|
||||
{
|
||||
// Error code to check return values for CUDA calls
|
||||
cudaError_t err = cudaSuccess;
|
||||
|
||||
// Print the vector length to be used, and compute its size
|
||||
int numElements = 50000;
|
||||
std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
|
||||
|
||||
// Allocate the managed input vector A
|
||||
cuda_managed_ptr<float_type> input_vector(numElements);
|
||||
|
||||
// Allocate the managed output vector C
|
||||
cuda_managed_ptr<float_type> output_vector(numElements);
|
||||
|
||||
// Initialize the input vectors
|
||||
for (int i = 0; i < numElements; ++i)
|
||||
{
|
||||
input_vector[i] = rand()/(float_type)RAND_MAX;
|
||||
}
|
||||
|
||||
// Launch the Vector Add CUDA Kernel
|
||||
int threadsPerBlock = 1024;
|
||||
int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
|
||||
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
|
||||
|
||||
watch w;
|
||||
|
||||
cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_vector.get(), numElements);
|
||||
cudaDeviceSynchronize();
|
||||
|
||||
std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
|
||||
|
||||
err = cudaGetLastError();
|
||||
|
||||
if (err != cudaSuccess)
|
||||
{
|
||||
std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
// Verify that the result vector is correct
|
||||
std::vector<float_type> results;
|
||||
results.reserve(numElements);
|
||||
w.reset();
|
||||
for(int i = 0; i < numElements; ++i)
|
||||
results.push_back(boost::math::lgamma_p(input_vector[i], input_vector[i]));
|
||||
double t = w.elapsed();
|
||||
// check the results
|
||||
for(int i = 0; i < numElements; ++i)
|
||||
{
|
||||
if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10)
|
||||
{
|
||||
std::cerr << "Result verification failed at element " << i << "!" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
|
||||
std::cout << "Done\n";
|
||||
|
||||
return 0;
|
||||
}
|
||||
Reference in New Issue
Block a user