mirror of
https://github.com/boostorg/math.git
synced 2026-01-27 19:12:08 +00:00
Add CUDA support for pareto and poisson.
This commit is contained in:
@@ -7,6 +7,8 @@
|
||||
#ifndef BOOST_STATS_COMPLEMENT_HPP
|
||||
#define BOOST_STATS_COMPLEMENT_HPP
|
||||
|
||||
#include <boost/config.hpp>
|
||||
|
||||
//
|
||||
// This code really defines our own tuple type.
|
||||
// It would be nice to reuse boost::math::tuple
|
||||
|
||||
@@ -30,7 +30,7 @@ namespace boost
|
||||
namespace detail
|
||||
{ // Parameter checking.
|
||||
template <class RealType, class Policy>
|
||||
inline bool check_pareto_scale(
|
||||
inline BOOST_GPU_ENABLED bool check_pareto_scale(
|
||||
const char* function,
|
||||
RealType scale,
|
||||
RealType* result, const Policy& pol)
|
||||
@@ -59,7 +59,7 @@ namespace boost
|
||||
} // bool check_pareto_scale
|
||||
|
||||
template <class RealType, class Policy>
|
||||
inline bool check_pareto_shape(
|
||||
inline BOOST_GPU_ENABLED bool check_pareto_shape(
|
||||
const char* function,
|
||||
RealType shape,
|
||||
RealType* result, const Policy& pol)
|
||||
@@ -88,7 +88,7 @@ namespace boost
|
||||
} // bool check_pareto_shape(
|
||||
|
||||
template <class RealType, class Policy>
|
||||
inline bool check_pareto_x(
|
||||
inline BOOST_GPU_ENABLED bool check_pareto_x(
|
||||
const char* function,
|
||||
RealType const& x,
|
||||
RealType* result, const Policy& pol)
|
||||
@@ -117,7 +117,7 @@ namespace boost
|
||||
} // bool check_pareto_x
|
||||
|
||||
template <class RealType, class Policy>
|
||||
inline bool check_pareto( // distribution parameters.
|
||||
inline BOOST_GPU_ENABLED bool check_pareto( // distribution parameters.
|
||||
const char* function,
|
||||
RealType scale,
|
||||
RealType shape,
|
||||
@@ -136,19 +136,19 @@ namespace boost
|
||||
typedef RealType value_type;
|
||||
typedef Policy policy_type;
|
||||
|
||||
pareto_distribution(RealType l_scale = 1, RealType l_shape = 1)
|
||||
BOOST_GPU_ENABLED pareto_distribution(RealType l_scale = 1, RealType l_shape = 1)
|
||||
: m_scale(l_scale), m_shape(l_shape)
|
||||
{ // Constructor.
|
||||
RealType result = 0;
|
||||
detail::check_pareto("boost::math::pareto_distribution<%1%>::pareto_distribution", l_scale, l_shape, &result, Policy());
|
||||
}
|
||||
|
||||
RealType scale()const
|
||||
BOOST_GPU_ENABLED RealType scale()const
|
||||
{ // AKA Xm and Wolfram b and beta
|
||||
return m_scale;
|
||||
}
|
||||
|
||||
RealType shape()const
|
||||
BOOST_GPU_ENABLED RealType shape()const
|
||||
{ // AKA k and Wolfram a and alpha
|
||||
return m_shape;
|
||||
}
|
||||
@@ -176,10 +176,10 @@ namespace boost
|
||||
} // support
|
||||
|
||||
template <class RealType, class Policy>
|
||||
inline RealType pdf(const pareto_distribution<RealType, Policy>& dist, const RealType& x)
|
||||
inline BOOST_GPU_ENABLED RealType pdf(const pareto_distribution<RealType, Policy>& dist, const RealType& x)
|
||||
{
|
||||
BOOST_MATH_STD_USING // for ADL of std function pow.
|
||||
static const char* function = "boost::math::pdf(const pareto_distribution<%1%>&, %1%)";
|
||||
BOOST_MATH_GPU_STATIC const char* function = "boost::math::pdf(const pareto_distribution<%1%>&, %1%)";
|
||||
RealType scale = dist.scale();
|
||||
RealType shape = dist.shape();
|
||||
RealType result = 0;
|
||||
@@ -195,10 +195,10 @@ namespace boost
|
||||
} // pdf
|
||||
|
||||
template <class RealType, class Policy>
|
||||
inline RealType cdf(const pareto_distribution<RealType, Policy>& dist, const RealType& x)
|
||||
inline BOOST_GPU_ENABLED RealType cdf(const pareto_distribution<RealType, Policy>& dist, const RealType& x)
|
||||
{
|
||||
BOOST_MATH_STD_USING // for ADL of std function pow.
|
||||
static const char* function = "boost::math::cdf(const pareto_distribution<%1%>&, %1%)";
|
||||
BOOST_MATH_GPU_STATIC const char* function = "boost::math::cdf(const pareto_distribution<%1%>&, %1%)";
|
||||
RealType scale = dist.scale();
|
||||
RealType shape = dist.shape();
|
||||
RealType result = 0;
|
||||
@@ -218,10 +218,10 @@ namespace boost
|
||||
} // cdf
|
||||
|
||||
template <class RealType, class Policy>
|
||||
inline RealType quantile(const pareto_distribution<RealType, Policy>& dist, const RealType& p)
|
||||
inline BOOST_GPU_ENABLED RealType quantile(const pareto_distribution<RealType, Policy>& dist, const RealType& p)
|
||||
{
|
||||
BOOST_MATH_STD_USING // for ADL of std function pow.
|
||||
static const char* function = "boost::math::quantile(const pareto_distribution<%1%>&, %1%)";
|
||||
BOOST_MATH_GPU_STATIC const char* function = "boost::math::quantile(const pareto_distribution<%1%>&, %1%)";
|
||||
RealType result = 0;
|
||||
RealType scale = dist.scale();
|
||||
RealType shape = dist.shape();
|
||||
@@ -245,10 +245,10 @@ namespace boost
|
||||
} // quantile
|
||||
|
||||
template <class RealType, class Policy>
|
||||
inline RealType cdf(const complemented2_type<pareto_distribution<RealType, Policy>, RealType>& c)
|
||||
inline BOOST_GPU_ENABLED RealType cdf(const complemented2_type<pareto_distribution<RealType, Policy>, RealType>& c)
|
||||
{
|
||||
BOOST_MATH_STD_USING // for ADL of std function pow.
|
||||
static const char* function = "boost::math::cdf(const pareto_distribution<%1%>&, %1%)";
|
||||
BOOST_MATH_GPU_STATIC const char* function = "boost::math::cdf(const pareto_distribution<%1%>&, %1%)";
|
||||
RealType result = 0;
|
||||
RealType x = c.param;
|
||||
RealType scale = c.dist.scale();
|
||||
@@ -267,10 +267,10 @@ namespace boost
|
||||
} // cdf complement
|
||||
|
||||
template <class RealType, class Policy>
|
||||
inline RealType quantile(const complemented2_type<pareto_distribution<RealType, Policy>, RealType>& c)
|
||||
inline BOOST_GPU_ENABLED RealType quantile(const complemented2_type<pareto_distribution<RealType, Policy>, RealType>& c)
|
||||
{
|
||||
BOOST_MATH_STD_USING // for ADL of std function pow.
|
||||
static const char* function = "boost::math::quantile(const pareto_distribution<%1%>&, %1%)";
|
||||
BOOST_MATH_GPU_STATIC const char* function = "boost::math::quantile(const pareto_distribution<%1%>&, %1%)";
|
||||
RealType result = 0;
|
||||
RealType q = c.param;
|
||||
RealType scale = c.dist.scale();
|
||||
@@ -294,10 +294,10 @@ namespace boost
|
||||
} // quantile complement
|
||||
|
||||
template <class RealType, class Policy>
|
||||
inline RealType mean(const pareto_distribution<RealType, Policy>& dist)
|
||||
inline BOOST_GPU_ENABLED RealType mean(const pareto_distribution<RealType, Policy>& dist)
|
||||
{
|
||||
RealType result = 0;
|
||||
static const char* function = "boost::math::mean(const pareto_distribution<%1%>&, %1%)";
|
||||
BOOST_MATH_GPU_STATIC const char* function = "boost::math::mean(const pareto_distribution<%1%>&, %1%)";
|
||||
if(false == detail::check_pareto(function, dist.scale(), dist.shape(), &result, Policy()))
|
||||
{
|
||||
return result;
|
||||
@@ -314,16 +314,16 @@ namespace boost
|
||||
} // mean
|
||||
|
||||
template <class RealType, class Policy>
|
||||
inline RealType mode(const pareto_distribution<RealType, Policy>& dist)
|
||||
inline BOOST_GPU_ENABLED RealType mode(const pareto_distribution<RealType, Policy>& dist)
|
||||
{
|
||||
return dist.scale();
|
||||
} // mode
|
||||
|
||||
template <class RealType, class Policy>
|
||||
inline RealType median(const pareto_distribution<RealType, Policy>& dist)
|
||||
inline BOOST_GPU_ENABLED RealType median(const pareto_distribution<RealType, Policy>& dist)
|
||||
{
|
||||
RealType result = 0;
|
||||
static const char* function = "boost::math::median(const pareto_distribution<%1%>&, %1%)";
|
||||
BOOST_MATH_GPU_STATIC const char* function = "boost::math::median(const pareto_distribution<%1%>&, %1%)";
|
||||
if(false == detail::check_pareto(function, dist.scale(), dist.shape(), &result, Policy()))
|
||||
{
|
||||
return result;
|
||||
@@ -333,12 +333,12 @@ namespace boost
|
||||
} // median
|
||||
|
||||
template <class RealType, class Policy>
|
||||
inline RealType variance(const pareto_distribution<RealType, Policy>& dist)
|
||||
inline BOOST_GPU_ENABLED RealType variance(const pareto_distribution<RealType, Policy>& dist)
|
||||
{
|
||||
RealType result = 0;
|
||||
RealType scale = dist.scale();
|
||||
RealType shape = dist.shape();
|
||||
static const char* function = "boost::math::variance(const pareto_distribution<%1%>&, %1%)";
|
||||
BOOST_MATH_GPU_STATIC const char* function = "boost::math::variance(const pareto_distribution<%1%>&, %1%)";
|
||||
if(false == detail::check_pareto(function, scale, shape, &result, Policy()))
|
||||
{
|
||||
return result;
|
||||
@@ -358,12 +358,12 @@ namespace boost
|
||||
} // variance
|
||||
|
||||
template <class RealType, class Policy>
|
||||
inline RealType skewness(const pareto_distribution<RealType, Policy>& dist)
|
||||
inline BOOST_GPU_ENABLED RealType skewness(const pareto_distribution<RealType, Policy>& dist)
|
||||
{
|
||||
BOOST_MATH_STD_USING
|
||||
RealType result = 0;
|
||||
RealType shape = dist.shape();
|
||||
static const char* function = "boost::math::pdf(const pareto_distribution<%1%>&, %1%)";
|
||||
BOOST_MATH_GPU_STATIC const char* function = "boost::math::pdf(const pareto_distribution<%1%>&, %1%)";
|
||||
if(false == detail::check_pareto(function, dist.scale(), shape, &result, Policy()))
|
||||
{
|
||||
return result;
|
||||
@@ -384,11 +384,11 @@ namespace boost
|
||||
} // skewness
|
||||
|
||||
template <class RealType, class Policy>
|
||||
inline RealType kurtosis(const pareto_distribution<RealType, Policy>& dist)
|
||||
inline BOOST_GPU_ENABLED RealType kurtosis(const pareto_distribution<RealType, Policy>& dist)
|
||||
{
|
||||
RealType result = 0;
|
||||
RealType shape = dist.shape();
|
||||
static const char* function = "boost::math::pdf(const pareto_distribution<%1%>&, %1%)";
|
||||
BOOST_MATH_GPU_STATIC const char* function = "boost::math::pdf(const pareto_distribution<%1%>&, %1%)";
|
||||
if(false == detail::check_pareto(function, dist.scale(), shape, &result, Policy()))
|
||||
{
|
||||
return result;
|
||||
@@ -408,11 +408,11 @@ namespace boost
|
||||
} // kurtosis
|
||||
|
||||
template <class RealType, class Policy>
|
||||
inline RealType kurtosis_excess(const pareto_distribution<RealType, Policy>& dist)
|
||||
inline BOOST_GPU_ENABLED RealType kurtosis_excess(const pareto_distribution<RealType, Policy>& dist)
|
||||
{
|
||||
RealType result = 0;
|
||||
RealType shape = dist.shape();
|
||||
static const char* function = "boost::math::pdf(const pareto_distribution<%1%>&, %1%)";
|
||||
BOOST_MATH_GPU_STATIC const char* function = "boost::math::pdf(const pareto_distribution<%1%>&, %1%)";
|
||||
if(false == detail::check_pareto(function, dist.scale(), shape, &result, Policy()))
|
||||
{
|
||||
return result;
|
||||
|
||||
@@ -59,7 +59,7 @@ namespace boost
|
||||
// checks are always performed, even if exceptions are not enabled.
|
||||
|
||||
template <class RealType, class Policy>
|
||||
inline bool check_mean(const char* function, const RealType& mean, RealType* result, const Policy& pol)
|
||||
inline BOOST_GPU_ENABLED bool check_mean(const char* function, const RealType& mean, RealType* result, const Policy& pol)
|
||||
{
|
||||
if(!(boost::math::isfinite)(mean) || (mean < 0))
|
||||
{
|
||||
@@ -72,7 +72,7 @@ namespace boost
|
||||
} // bool check_mean
|
||||
|
||||
template <class RealType, class Policy>
|
||||
inline bool check_mean_NZ(const char* function, const RealType& mean, RealType* result, const Policy& pol)
|
||||
inline BOOST_GPU_ENABLED bool check_mean_NZ(const char* function, const RealType& mean, RealType* result, const Policy& pol)
|
||||
{ // mean == 0 is considered an error.
|
||||
if( !(boost::math::isfinite)(mean) || (mean <= 0))
|
||||
{
|
||||
@@ -85,13 +85,13 @@ namespace boost
|
||||
} // bool check_mean_NZ
|
||||
|
||||
template <class RealType, class Policy>
|
||||
inline bool check_dist(const char* function, const RealType& mean, RealType* result, const Policy& pol)
|
||||
inline BOOST_GPU_ENABLED bool check_dist(const char* function, const RealType& mean, RealType* result, const Policy& pol)
|
||||
{ // Only one check, so this is redundant really but should be optimized away.
|
||||
return check_mean_NZ(function, mean, result, pol);
|
||||
} // bool check_dist
|
||||
|
||||
template <class RealType, class Policy>
|
||||
inline bool check_k(const char* function, const RealType& k, RealType* result, const Policy& pol)
|
||||
inline BOOST_GPU_ENABLED bool check_k(const char* function, const RealType& k, RealType* result, const Policy& pol)
|
||||
{
|
||||
if((k < 0) || !(boost::math::isfinite)(k))
|
||||
{
|
||||
@@ -104,7 +104,7 @@ namespace boost
|
||||
} // bool check_k
|
||||
|
||||
template <class RealType, class Policy>
|
||||
inline bool check_dist_and_k(const char* function, RealType mean, RealType k, RealType* result, const Policy& pol)
|
||||
inline BOOST_GPU_ENABLED bool check_dist_and_k(const char* function, RealType mean, RealType k, RealType* result, const Policy& pol)
|
||||
{
|
||||
if((check_dist(function, mean, result, pol) == false) ||
|
||||
(check_k(function, k, result, pol) == false))
|
||||
@@ -115,7 +115,7 @@ namespace boost
|
||||
} // bool check_dist_and_k
|
||||
|
||||
template <class RealType, class Policy>
|
||||
inline bool check_prob(const char* function, const RealType& p, RealType* result, const Policy& pol)
|
||||
inline BOOST_GPU_ENABLED bool check_prob(const char* function, const RealType& p, RealType* result, const Policy& pol)
|
||||
{ // Check 0 <= p <= 1
|
||||
if(!(boost::math::isfinite)(p) || (p < 0) || (p > 1))
|
||||
{
|
||||
@@ -128,7 +128,7 @@ namespace boost
|
||||
} // bool check_prob
|
||||
|
||||
template <class RealType, class Policy>
|
||||
inline bool check_dist_and_prob(const char* function, RealType mean, RealType p, RealType* result, const Policy& pol)
|
||||
inline BOOST_GPU_ENABLED bool check_dist_and_prob(const char* function, RealType mean, RealType p, RealType* result, const Policy& pol)
|
||||
{
|
||||
if((check_dist(function, mean, result, pol) == false) ||
|
||||
(check_prob(function, p, result, pol) == false))
|
||||
@@ -147,7 +147,7 @@ namespace boost
|
||||
typedef RealType value_type;
|
||||
typedef Policy policy_type;
|
||||
|
||||
poisson_distribution(RealType l_mean = 1) : m_l(l_mean) // mean (lambda).
|
||||
BOOST_GPU_ENABLED poisson_distribution(RealType l_mean = 1) : m_l(l_mean) // mean (lambda).
|
||||
{ // Expected mean number of events that occur during the given interval.
|
||||
RealType r;
|
||||
poisson_detail::check_dist(
|
||||
@@ -156,7 +156,7 @@ namespace boost
|
||||
&r, Policy());
|
||||
} // poisson_distribution constructor.
|
||||
|
||||
RealType mean() const
|
||||
BOOST_GPU_ENABLED RealType mean() const
|
||||
{ // Private data getter function.
|
||||
return m_l;
|
||||
}
|
||||
@@ -185,13 +185,13 @@ namespace boost
|
||||
}
|
||||
|
||||
template <class RealType, class Policy>
|
||||
inline RealType mean(const poisson_distribution<RealType, Policy>& dist)
|
||||
inline BOOST_GPU_ENABLED RealType mean(const poisson_distribution<RealType, Policy>& dist)
|
||||
{ // Mean of poisson distribution = lambda.
|
||||
return dist.mean();
|
||||
} // mean
|
||||
|
||||
template <class RealType, class Policy>
|
||||
inline RealType mode(const poisson_distribution<RealType, Policy>& dist)
|
||||
inline BOOST_GPU_ENABLED RealType mode(const poisson_distribution<RealType, Policy>& dist)
|
||||
{ // mode.
|
||||
BOOST_MATH_STD_USING // ADL of std functions.
|
||||
return floor(dist.mean());
|
||||
@@ -208,7 +208,7 @@ namespace boost
|
||||
// Now implemented via quantile(half) in derived accessors.
|
||||
|
||||
template <class RealType, class Policy>
|
||||
inline RealType variance(const poisson_distribution<RealType, Policy>& dist)
|
||||
inline BOOST_GPU_ENABLED RealType variance(const poisson_distribution<RealType, Policy>& dist)
|
||||
{ // variance.
|
||||
return dist.mean();
|
||||
}
|
||||
@@ -217,14 +217,14 @@ namespace boost
|
||||
// standard_deviation provided by derived accessors.
|
||||
|
||||
template <class RealType, class Policy>
|
||||
inline RealType skewness(const poisson_distribution<RealType, Policy>& dist)
|
||||
inline BOOST_GPU_ENABLED RealType skewness(const poisson_distribution<RealType, Policy>& dist)
|
||||
{ // skewness = sqrt(l).
|
||||
BOOST_MATH_STD_USING // ADL of std functions.
|
||||
return 1 / sqrt(dist.mean());
|
||||
}
|
||||
|
||||
template <class RealType, class Policy>
|
||||
inline RealType kurtosis_excess(const poisson_distribution<RealType, Policy>& dist)
|
||||
inline BOOST_GPU_ENABLED RealType kurtosis_excess(const poisson_distribution<RealType, Policy>& dist)
|
||||
{ // skewness = sqrt(l).
|
||||
return 1 / dist.mean(); // kurtosis_excess 1/mean from Wiki & MathWorld eq 31.
|
||||
// http://mathworld.wolfram.com/Kurtosis.html explains that the kurtosis excess
|
||||
@@ -233,7 +233,7 @@ namespace boost
|
||||
} // RealType kurtosis_excess
|
||||
|
||||
template <class RealType, class Policy>
|
||||
inline RealType kurtosis(const poisson_distribution<RealType, Policy>& dist)
|
||||
inline BOOST_GPU_ENABLED RealType kurtosis(const poisson_distribution<RealType, Policy>& dist)
|
||||
{ // kurtosis is 4th moment about the mean = u4 / sd ^ 4
|
||||
// http://en.wikipedia.org/wiki/Curtosis
|
||||
// kurtosis can range from -2 (flat top) to +infinity (sharp peak & heavy tails).
|
||||
@@ -245,7 +245,7 @@ namespace boost
|
||||
} // RealType kurtosis
|
||||
|
||||
template <class RealType, class Policy>
|
||||
RealType pdf(const poisson_distribution<RealType, Policy>& dist, const RealType& k)
|
||||
BOOST_GPU_ENABLED RealType pdf(const poisson_distribution<RealType, Policy>& dist, const RealType& k)
|
||||
{ // Probability Density/Mass Function.
|
||||
// Probability that there are EXACTLY k occurrences (or arrivals).
|
||||
BOOST_FPU_EXCEPTION_GUARD
|
||||
@@ -277,7 +277,7 @@ namespace boost
|
||||
} // pdf
|
||||
|
||||
template <class RealType, class Policy>
|
||||
RealType cdf(const poisson_distribution<RealType, Policy>& dist, const RealType& k)
|
||||
BOOST_GPU_ENABLED RealType cdf(const poisson_distribution<RealType, Policy>& dist, const RealType& k)
|
||||
{ // Cumulative Distribution Function Poisson.
|
||||
// The random variate k is the number of occurrences(or arrivals)
|
||||
// k argument may be integral, signed, or unsigned, or floating point.
|
||||
@@ -328,7 +328,7 @@ namespace boost
|
||||
} // binomial cdf
|
||||
|
||||
template <class RealType, class Policy>
|
||||
RealType cdf(const complemented2_type<poisson_distribution<RealType, Policy>, RealType>& c)
|
||||
BOOST_GPU_ENABLED RealType cdf(const complemented2_type<poisson_distribution<RealType, Policy>, RealType>& c)
|
||||
{ // Complemented Cumulative Distribution Function Poisson
|
||||
// The random variate k is the number of events, occurrences or arrivals.
|
||||
// k argument may be integral, signed, or unsigned, or floating point.
|
||||
|
||||
108
test/cuda/pareto_cdf_double.cu
Normal file
108
test/cuda/pareto_cdf_double.cu
Normal file
@@ -0,0 +1,108 @@
|
||||
// Copyright John Maddock 2016.
|
||||
// Use, modification and distribution are subject to the
|
||||
// Boost Software License, Version 1.0. (See accompanying file
|
||||
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
|
||||
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <boost/math/distributions/pareto.hpp>
|
||||
#include <boost/math/special_functions/relative_difference.hpp>
|
||||
#include <boost/random/mersenne_twister.hpp>
|
||||
#include <boost/random/uniform_real_distribution.hpp>
|
||||
#include "cuda_managed_ptr.hpp"
|
||||
#include "stopwatch.hpp"
|
||||
|
||||
// For the CUDA runtime routines (prefixed with "cuda_")
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
typedef double float_type;
|
||||
|
||||
/**
|
||||
* CUDA Kernel Device code
|
||||
*
|
||||
*/
|
||||
__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
|
||||
{
|
||||
using std::cos;
|
||||
int i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
|
||||
if (i < numElements)
|
||||
{
|
||||
out[i] = cdf(boost::math::pareto_distribution<float_type>(23), in1[i]);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Host main routine
|
||||
*/
|
||||
int main(void)
|
||||
{
|
||||
try{
|
||||
|
||||
// Error code to check return values for CUDA calls
|
||||
cudaError_t err = cudaSuccess;
|
||||
|
||||
// Print the vector length to be used, and compute its size
|
||||
int numElements = 50000;
|
||||
std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
|
||||
|
||||
// Allocate the managed input vector A
|
||||
cuda_managed_ptr<float_type> input_vector1(numElements);
|
||||
|
||||
// Allocate the managed output vector C
|
||||
cuda_managed_ptr<float_type> output_vector(numElements);
|
||||
|
||||
boost::random::mt19937 gen;
|
||||
boost::random::uniform_real_distribution<float_type> dist(0.00001, 10000);
|
||||
// Initialize the input vectors
|
||||
for (int i = 0; i < numElements; ++i)
|
||||
{
|
||||
input_vector1[i] = dist(gen);
|
||||
}
|
||||
|
||||
// Launch the Vector Add CUDA Kernel
|
||||
int threadsPerBlock = 512;
|
||||
int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
|
||||
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
|
||||
|
||||
watch w;
|
||||
cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
|
||||
std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
|
||||
|
||||
err = cudaGetLastError();
|
||||
if (err != cudaSuccess)
|
||||
{
|
||||
std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
// Verify that the result vector is correct
|
||||
std::vector<float_type> results;
|
||||
results.reserve(numElements);
|
||||
w.reset();
|
||||
for(int i = 0; i < numElements; ++i)
|
||||
results.push_back(cdf(boost::math::pareto_distribution<float_type>(23), input_vector1[i]));
|
||||
double t = w.elapsed();
|
||||
// check the results
|
||||
for(int i = 0; i < numElements; ++i)
|
||||
{
|
||||
if (boost::math::epsilon_difference(output_vector[i], results[i]) > 500.0)
|
||||
{
|
||||
std::cerr << "Result verification failed at element " << i << "!" << std::endl;
|
||||
std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
|
||||
std::cout << "Done\n";
|
||||
}
|
||||
catch(const std::exception& e)
|
||||
{
|
||||
std::cerr << "Stopped with exception: " << e.what() << std::endl;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
108
test/cuda/pareto_cdf_float.cu
Normal file
108
test/cuda/pareto_cdf_float.cu
Normal file
@@ -0,0 +1,108 @@
|
||||
// Copyright John Maddock 2016.
|
||||
// Use, modification and distribution are subject to the
|
||||
// Boost Software License, Version 1.0. (See accompanying file
|
||||
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
|
||||
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <boost/math/distributions/pareto.hpp>
|
||||
#include <boost/math/special_functions/relative_difference.hpp>
|
||||
#include <boost/random/mersenne_twister.hpp>
|
||||
#include <boost/random/uniform_real_distribution.hpp>
|
||||
#include "cuda_managed_ptr.hpp"
|
||||
#include "stopwatch.hpp"
|
||||
|
||||
// For the CUDA runtime routines (prefixed with "cuda_")
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
typedef float float_type;
|
||||
|
||||
/**
|
||||
* CUDA Kernel Device code
|
||||
*
|
||||
*/
|
||||
__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
|
||||
{
|
||||
using std::cos;
|
||||
int i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
|
||||
if (i < numElements)
|
||||
{
|
||||
out[i] = cdf(boost::math::pareto_distribution<float_type>(23), in1[i]);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Host main routine
|
||||
*/
|
||||
int main(void)
|
||||
{
|
||||
try{
|
||||
|
||||
// Error code to check return values for CUDA calls
|
||||
cudaError_t err = cudaSuccess;
|
||||
|
||||
// Print the vector length to be used, and compute its size
|
||||
int numElements = 50000;
|
||||
std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
|
||||
|
||||
// Allocate the managed input vector A
|
||||
cuda_managed_ptr<float_type> input_vector1(numElements);
|
||||
|
||||
// Allocate the managed output vector C
|
||||
cuda_managed_ptr<float_type> output_vector(numElements);
|
||||
|
||||
boost::random::mt19937 gen;
|
||||
boost::random::uniform_real_distribution<float_type> dist(0.00001, 10000);
|
||||
// Initialize the input vectors
|
||||
for (int i = 0; i < numElements; ++i)
|
||||
{
|
||||
input_vector1[i] = dist(gen);
|
||||
}
|
||||
|
||||
// Launch the Vector Add CUDA Kernel
|
||||
int threadsPerBlock = 512;
|
||||
int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
|
||||
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
|
||||
|
||||
watch w;
|
||||
cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
|
||||
std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
|
||||
|
||||
err = cudaGetLastError();
|
||||
if (err != cudaSuccess)
|
||||
{
|
||||
std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
// Verify that the result vector is correct
|
||||
std::vector<float_type> results;
|
||||
results.reserve(numElements);
|
||||
w.reset();
|
||||
for(int i = 0; i < numElements; ++i)
|
||||
results.push_back(cdf(boost::math::pareto_distribution<float_type>(23), input_vector1[i]));
|
||||
double t = w.elapsed();
|
||||
// check the results
|
||||
for(int i = 0; i < numElements; ++i)
|
||||
{
|
||||
if (boost::math::epsilon_difference(output_vector[i], results[i]) > 500.0)
|
||||
{
|
||||
std::cerr << "Result verification failed at element " << i << "!" << std::endl;
|
||||
std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
|
||||
std::cout << "Done\n";
|
||||
}
|
||||
catch(const std::exception& e)
|
||||
{
|
||||
std::cerr << "Stopped with exception: " << e.what() << std::endl;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
108
test/cuda/pareto_pdf_double.cu
Normal file
108
test/cuda/pareto_pdf_double.cu
Normal file
@@ -0,0 +1,108 @@
|
||||
// Copyright John Maddock 2016.
|
||||
// Use, modification and distribution are subject to the
|
||||
// Boost Software License, Version 1.0. (See accompanying file
|
||||
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
|
||||
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <boost/math/distributions/pareto.hpp>
|
||||
#include <boost/math/special_functions/relative_difference.hpp>
|
||||
#include <boost/random/mersenne_twister.hpp>
|
||||
#include <boost/random/uniform_real_distribution.hpp>
|
||||
#include "cuda_managed_ptr.hpp"
|
||||
#include "stopwatch.hpp"
|
||||
|
||||
// For the CUDA runtime routines (prefixed with "cuda_")
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
typedef double float_type;
|
||||
|
||||
/**
|
||||
* CUDA Kernel Device code
|
||||
*
|
||||
*/
|
||||
__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
|
||||
{
|
||||
using std::cos;
|
||||
int i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
|
||||
if (i < numElements)
|
||||
{
|
||||
out[i] = pdf(boost::math::pareto_distribution<float_type>(23), in1[i]);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Host main routine
|
||||
*/
|
||||
int main(void)
|
||||
{
|
||||
try{
|
||||
|
||||
// Error code to check return values for CUDA calls
|
||||
cudaError_t err = cudaSuccess;
|
||||
|
||||
// Print the vector length to be used, and compute its size
|
||||
int numElements = 50000;
|
||||
std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
|
||||
|
||||
// Allocate the managed input vector A
|
||||
cuda_managed_ptr<float_type> input_vector1(numElements);
|
||||
|
||||
// Allocate the managed output vector C
|
||||
cuda_managed_ptr<float_type> output_vector(numElements);
|
||||
|
||||
boost::random::mt19937 gen;
|
||||
boost::random::uniform_real_distribution<float_type> dist(0.00001, 10000);
|
||||
// Initialize the input vectors
|
||||
for (int i = 0; i < numElements; ++i)
|
||||
{
|
||||
input_vector1[i] = dist(gen);
|
||||
}
|
||||
|
||||
// Launch the Vector Add CUDA Kernel
|
||||
int threadsPerBlock = 512;
|
||||
int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
|
||||
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
|
||||
|
||||
watch w;
|
||||
cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
|
||||
std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
|
||||
|
||||
err = cudaGetLastError();
|
||||
if (err != cudaSuccess)
|
||||
{
|
||||
std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
// Verify that the result vector is correct
|
||||
std::vector<float_type> results;
|
||||
results.reserve(numElements);
|
||||
w.reset();
|
||||
for(int i = 0; i < numElements; ++i)
|
||||
results.push_back(pdf(boost::math::pareto_distribution<float_type>(23), input_vector1[i]));
|
||||
double t = w.elapsed();
|
||||
// check the results
|
||||
for(int i = 0; i < numElements; ++i)
|
||||
{
|
||||
if (boost::math::epsilon_difference(output_vector[i], results[i]) > 500.0)
|
||||
{
|
||||
std::cerr << "Result verification failed at element " << i << "!" << std::endl;
|
||||
std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
|
||||
std::cout << "Done\n";
|
||||
}
|
||||
catch(const std::exception& e)
|
||||
{
|
||||
std::cerr << "Stopped with exception: " << e.what() << std::endl;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
108
test/cuda/pareto_pdf_float.cu
Normal file
108
test/cuda/pareto_pdf_float.cu
Normal file
@@ -0,0 +1,108 @@
|
||||
// Copyright John Maddock 2016.
|
||||
// Use, modification and distribution are subject to the
|
||||
// Boost Software License, Version 1.0. (See accompanying file
|
||||
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
|
||||
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <boost/math/distributions/pareto.hpp>
|
||||
#include <boost/math/special_functions/relative_difference.hpp>
|
||||
#include <boost/random/mersenne_twister.hpp>
|
||||
#include <boost/random/uniform_real_distribution.hpp>
|
||||
#include "cuda_managed_ptr.hpp"
|
||||
#include "stopwatch.hpp"
|
||||
|
||||
// For the CUDA runtime routines (prefixed with "cuda_")
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
typedef float float_type;
|
||||
|
||||
/**
|
||||
* CUDA Kernel Device code
|
||||
*
|
||||
*/
|
||||
__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
|
||||
{
|
||||
using std::cos;
|
||||
int i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
|
||||
if (i < numElements)
|
||||
{
|
||||
out[i] = pdf(boost::math::pareto_distribution<float_type>(23), in1[i]);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Host main routine
|
||||
*/
|
||||
int main(void)
|
||||
{
|
||||
try{
|
||||
|
||||
// Error code to check return values for CUDA calls
|
||||
cudaError_t err = cudaSuccess;
|
||||
|
||||
// Print the vector length to be used, and compute its size
|
||||
int numElements = 50000;
|
||||
std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
|
||||
|
||||
// Allocate the managed input vector A
|
||||
cuda_managed_ptr<float_type> input_vector1(numElements);
|
||||
|
||||
// Allocate the managed output vector C
|
||||
cuda_managed_ptr<float_type> output_vector(numElements);
|
||||
|
||||
boost::random::mt19937 gen;
|
||||
boost::random::uniform_real_distribution<float_type> dist(0.00001, 10000);
|
||||
// Initialize the input vectors
|
||||
for (int i = 0; i < numElements; ++i)
|
||||
{
|
||||
input_vector1[i] = dist(gen);
|
||||
}
|
||||
|
||||
// Launch the Vector Add CUDA Kernel
|
||||
int threadsPerBlock = 512;
|
||||
int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
|
||||
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
|
||||
|
||||
watch w;
|
||||
cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
|
||||
std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
|
||||
|
||||
err = cudaGetLastError();
|
||||
if (err != cudaSuccess)
|
||||
{
|
||||
std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
// Verify that the result vector is correct
|
||||
std::vector<float_type> results;
|
||||
results.reserve(numElements);
|
||||
w.reset();
|
||||
for(int i = 0; i < numElements; ++i)
|
||||
results.push_back(pdf(boost::math::pareto_distribution<float_type>(23), input_vector1[i]));
|
||||
double t = w.elapsed();
|
||||
// check the results
|
||||
for(int i = 0; i < numElements; ++i)
|
||||
{
|
||||
if (boost::math::epsilon_difference(output_vector[i], results[i]) > 500.0)
|
||||
{
|
||||
std::cerr << "Result verification failed at element " << i << "!" << std::endl;
|
||||
std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
|
||||
std::cout << "Done\n";
|
||||
}
|
||||
catch(const std::exception& e)
|
||||
{
|
||||
std::cerr << "Stopped with exception: " << e.what() << std::endl;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
108
test/cuda/pareto_quan_double.cu
Normal file
108
test/cuda/pareto_quan_double.cu
Normal file
@@ -0,0 +1,108 @@
|
||||
// Copyright John Maddock 2016.
|
||||
// Use, modification and distribution are subject to the
|
||||
// Boost Software License, Version 1.0. (See accompanying file
|
||||
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
|
||||
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <boost/math/distributions/pareto.hpp>
|
||||
#include <boost/math/special_functions/relative_difference.hpp>
|
||||
#include <boost/random/mersenne_twister.hpp>
|
||||
#include <boost/random/uniform_real_distribution.hpp>
|
||||
#include "cuda_managed_ptr.hpp"
|
||||
#include "stopwatch.hpp"
|
||||
|
||||
// For the CUDA runtime routines (prefixed with "cuda_")
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
typedef double float_type;
|
||||
|
||||
/**
|
||||
* CUDA Kernel Device code
|
||||
*
|
||||
*/
|
||||
__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
|
||||
{
|
||||
using std::cos;
|
||||
int i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
|
||||
if (i < numElements)
|
||||
{
|
||||
out[i] = quantile(boost::math::pareto_distribution<float_type>(0.25), in1[i]);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Host main routine
|
||||
*/
|
||||
int main(void)
|
||||
{
|
||||
try{
|
||||
|
||||
// Error code to check return values for CUDA calls
|
||||
cudaError_t err = cudaSuccess;
|
||||
|
||||
// Print the vector length to be used, and compute its size
|
||||
int numElements = 50000;
|
||||
std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
|
||||
|
||||
// Allocate the managed input vector A
|
||||
cuda_managed_ptr<float_type> input_vector1(numElements);
|
||||
|
||||
// Allocate the managed output vector C
|
||||
cuda_managed_ptr<float_type> output_vector(numElements);
|
||||
|
||||
boost::random::mt19937 gen;
|
||||
boost::random::uniform_real_distribution<float_type> dist;
|
||||
// Initialize the input vectors
|
||||
for (int i = 0; i < numElements; ++i)
|
||||
{
|
||||
input_vector1[i] = dist(gen);
|
||||
}
|
||||
|
||||
// Launch the Vector Add CUDA Kernel
|
||||
int threadsPerBlock = 32;
|
||||
int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
|
||||
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
|
||||
|
||||
watch w;
|
||||
cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
|
||||
std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
|
||||
|
||||
err = cudaGetLastError();
|
||||
if (err != cudaSuccess)
|
||||
{
|
||||
std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
// Verify that the result vector is correct
|
||||
std::vector<float_type> results;
|
||||
results.reserve(numElements);
|
||||
w.reset();
|
||||
for(int i = 0; i < numElements; ++i)
|
||||
results.push_back(quantile(boost::math::pareto_distribution<float_type>(0.25), input_vector1[i]));
|
||||
double t = w.elapsed();
|
||||
// check the results
|
||||
for(int i = 0; i < numElements; ++i)
|
||||
{
|
||||
if (boost::math::epsilon_difference(output_vector[i], results[i]) > 6000.0)
|
||||
{
|
||||
std::cerr << "Result verification failed at element " << i << "!" << std::endl;
|
||||
std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
|
||||
std::cout << "Done\n";
|
||||
}
|
||||
catch(const std::exception& e)
|
||||
{
|
||||
std::cerr << "Stopped with exception: " << e.what() << std::endl;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
108
test/cuda/pareto_quan_float.cu
Normal file
108
test/cuda/pareto_quan_float.cu
Normal file
@@ -0,0 +1,108 @@
|
||||
// Copyright John Maddock 2016.
|
||||
// Use, modification and distribution are subject to the
|
||||
// Boost Software License, Version 1.0. (See accompanying file
|
||||
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
|
||||
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <boost/math/distributions/pareto.hpp>
|
||||
#include <boost/math/special_functions/relative_difference.hpp>
|
||||
#include <boost/random/mersenne_twister.hpp>
|
||||
#include <boost/random/uniform_real_distribution.hpp>
|
||||
#include "cuda_managed_ptr.hpp"
|
||||
#include "stopwatch.hpp"
|
||||
|
||||
// For the CUDA runtime routines (prefixed with "cuda_")
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
typedef float float_type;
|
||||
|
||||
/**
|
||||
* CUDA Kernel Device code
|
||||
*
|
||||
*/
|
||||
__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
|
||||
{
|
||||
using std::cos;
|
||||
int i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
|
||||
if (i < numElements)
|
||||
{
|
||||
out[i] = quantile(boost::math::pareto_distribution<float_type>(0.25), in1[i]);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Host main routine
|
||||
*/
|
||||
int main(void)
|
||||
{
|
||||
try{
|
||||
|
||||
// Error code to check return values for CUDA calls
|
||||
cudaError_t err = cudaSuccess;
|
||||
|
||||
// Print the vector length to be used, and compute its size
|
||||
int numElements = 50000;
|
||||
std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
|
||||
|
||||
// Allocate the managed input vector A
|
||||
cuda_managed_ptr<float_type> input_vector1(numElements);
|
||||
|
||||
// Allocate the managed output vector C
|
||||
cuda_managed_ptr<float_type> output_vector(numElements);
|
||||
|
||||
boost::random::mt19937 gen;
|
||||
boost::random::uniform_real_distribution<float_type> dist;
|
||||
// Initialize the input vectors
|
||||
for (int i = 0; i < numElements; ++i)
|
||||
{
|
||||
input_vector1[i] = dist(gen);
|
||||
}
|
||||
|
||||
// Launch the Vector Add CUDA Kernel
|
||||
int threadsPerBlock = 32;
|
||||
int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
|
||||
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
|
||||
|
||||
watch w;
|
||||
cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
|
||||
std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
|
||||
|
||||
err = cudaGetLastError();
|
||||
if (err != cudaSuccess)
|
||||
{
|
||||
std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
// Verify that the result vector is correct
|
||||
std::vector<float_type> results;
|
||||
results.reserve(numElements);
|
||||
w.reset();
|
||||
for(int i = 0; i < numElements; ++i)
|
||||
results.push_back(quantile(boost::math::pareto_distribution<float_type>(0.25), input_vector1[i]));
|
||||
double t = w.elapsed();
|
||||
// check the results
|
||||
for(int i = 0; i < numElements; ++i)
|
||||
{
|
||||
if (boost::math::epsilon_difference(output_vector[i], results[i]) > 6000.0)
|
||||
{
|
||||
std::cerr << "Result verification failed at element " << i << "!" << std::endl;
|
||||
std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
|
||||
std::cout << "Done\n";
|
||||
}
|
||||
catch(const std::exception& e)
|
||||
{
|
||||
std::cerr << "Stopped with exception: " << e.what() << std::endl;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
108
test/cuda/poisson_cdf_double.cu
Normal file
108
test/cuda/poisson_cdf_double.cu
Normal file
@@ -0,0 +1,108 @@
|
||||
// Copyright John Maddock 2016.
|
||||
// Use, modification and distribution are subject to the
|
||||
// Boost Software License, Version 1.0. (See accompanying file
|
||||
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
|
||||
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <boost/math/distributions/poisson.hpp>
|
||||
#include <boost/math/special_functions/relative_difference.hpp>
|
||||
#include <boost/random/mersenne_twister.hpp>
|
||||
#include <boost/random/uniform_real_distribution.hpp>
|
||||
#include "cuda_managed_ptr.hpp"
|
||||
#include "stopwatch.hpp"
|
||||
|
||||
// For the CUDA runtime routines (prefixed with "cuda_")
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
typedef double float_type;
|
||||
|
||||
/**
|
||||
* CUDA Kernel Device code
|
||||
*
|
||||
*/
|
||||
__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
|
||||
{
|
||||
using std::cos;
|
||||
int i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
|
||||
if (i < numElements)
|
||||
{
|
||||
out[i] = cdf(boost::math::poisson_distribution<float_type>(23), in1[i]);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Host main routine
|
||||
*/
|
||||
int main(void)
|
||||
{
|
||||
try{
|
||||
|
||||
// Error code to check return values for CUDA calls
|
||||
cudaError_t err = cudaSuccess;
|
||||
|
||||
// Print the vector length to be used, and compute its size
|
||||
int numElements = 50000;
|
||||
std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
|
||||
|
||||
// Allocate the managed input vector A
|
||||
cuda_managed_ptr<float_type> input_vector1(numElements);
|
||||
|
||||
// Allocate the managed output vector C
|
||||
cuda_managed_ptr<float_type> output_vector(numElements);
|
||||
|
||||
boost::random::mt19937 gen;
|
||||
boost::random::uniform_real_distribution<float_type> dist(0.00001, 10000);
|
||||
// Initialize the input vectors
|
||||
for (int i = 0; i < numElements; ++i)
|
||||
{
|
||||
input_vector1[i] = dist(gen);
|
||||
}
|
||||
|
||||
// Launch the Vector Add CUDA Kernel
|
||||
int threadsPerBlock = 512;
|
||||
int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
|
||||
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
|
||||
|
||||
watch w;
|
||||
cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
|
||||
std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
|
||||
|
||||
err = cudaGetLastError();
|
||||
if (err != cudaSuccess)
|
||||
{
|
||||
std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
// Verify that the result vector is correct
|
||||
std::vector<float_type> results;
|
||||
results.reserve(numElements);
|
||||
w.reset();
|
||||
for(int i = 0; i < numElements; ++i)
|
||||
results.push_back(cdf(boost::math::poisson_distribution<float_type>(23), input_vector1[i]));
|
||||
double t = w.elapsed();
|
||||
// check the results
|
||||
for(int i = 0; i < numElements; ++i)
|
||||
{
|
||||
if (boost::math::epsilon_difference(output_vector[i], results[i]) > 500.0)
|
||||
{
|
||||
std::cerr << "Result verification failed at element " << i << "!" << std::endl;
|
||||
std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
|
||||
std::cout << "Done\n";
|
||||
}
|
||||
catch(const std::exception& e)
|
||||
{
|
||||
std::cerr << "Stopped with exception: " << e.what() << std::endl;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
108
test/cuda/poisson_cdf_float.cu
Normal file
108
test/cuda/poisson_cdf_float.cu
Normal file
@@ -0,0 +1,108 @@
|
||||
// Copyright John Maddock 2016.
|
||||
// Use, modification and distribution are subject to the
|
||||
// Boost Software License, Version 1.0. (See accompanying file
|
||||
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
|
||||
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <boost/math/distributions/poisson.hpp>
|
||||
#include <boost/math/special_functions/relative_difference.hpp>
|
||||
#include <boost/random/mersenne_twister.hpp>
|
||||
#include <boost/random/uniform_real_distribution.hpp>
|
||||
#include "cuda_managed_ptr.hpp"
|
||||
#include "stopwatch.hpp"
|
||||
|
||||
// For the CUDA runtime routines (prefixed with "cuda_")
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
typedef float float_type;
|
||||
|
||||
/**
|
||||
* CUDA Kernel Device code
|
||||
*
|
||||
*/
|
||||
__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
|
||||
{
|
||||
using std::cos;
|
||||
int i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
|
||||
if (i < numElements)
|
||||
{
|
||||
out[i] = cdf(boost::math::poisson_distribution<float_type>(23), in1[i]);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Host main routine
|
||||
*/
|
||||
int main(void)
|
||||
{
|
||||
try{
|
||||
|
||||
// Error code to check return values for CUDA calls
|
||||
cudaError_t err = cudaSuccess;
|
||||
|
||||
// Print the vector length to be used, and compute its size
|
||||
int numElements = 50000;
|
||||
std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
|
||||
|
||||
// Allocate the managed input vector A
|
||||
cuda_managed_ptr<float_type> input_vector1(numElements);
|
||||
|
||||
// Allocate the managed output vector C
|
||||
cuda_managed_ptr<float_type> output_vector(numElements);
|
||||
|
||||
boost::random::mt19937 gen;
|
||||
boost::random::uniform_real_distribution<float_type> dist(0.00001, 10000);
|
||||
// Initialize the input vectors
|
||||
for (int i = 0; i < numElements; ++i)
|
||||
{
|
||||
input_vector1[i] = dist(gen);
|
||||
}
|
||||
|
||||
// Launch the Vector Add CUDA Kernel
|
||||
int threadsPerBlock = 512;
|
||||
int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
|
||||
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
|
||||
|
||||
watch w;
|
||||
cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
|
||||
std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
|
||||
|
||||
err = cudaGetLastError();
|
||||
if (err != cudaSuccess)
|
||||
{
|
||||
std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
// Verify that the result vector is correct
|
||||
std::vector<float_type> results;
|
||||
results.reserve(numElements);
|
||||
w.reset();
|
||||
for(int i = 0; i < numElements; ++i)
|
||||
results.push_back(cdf(boost::math::poisson_distribution<float_type>(23), input_vector1[i]));
|
||||
double t = w.elapsed();
|
||||
// check the results
|
||||
for(int i = 0; i < numElements; ++i)
|
||||
{
|
||||
if (boost::math::epsilon_difference(output_vector[i], results[i]) > 500.0)
|
||||
{
|
||||
std::cerr << "Result verification failed at element " << i << "!" << std::endl;
|
||||
std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
|
||||
std::cout << "Done\n";
|
||||
}
|
||||
catch(const std::exception& e)
|
||||
{
|
||||
std::cerr << "Stopped with exception: " << e.what() << std::endl;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
108
test/cuda/poisson_pdf_double.cu
Normal file
108
test/cuda/poisson_pdf_double.cu
Normal file
@@ -0,0 +1,108 @@
|
||||
// Copyright John Maddock 2016.
|
||||
// Use, modification and distribution are subject to the
|
||||
// Boost Software License, Version 1.0. (See accompanying file
|
||||
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
|
||||
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <boost/math/distributions/poisson.hpp>
|
||||
#include <boost/math/special_functions/relative_difference.hpp>
|
||||
#include <boost/random/mersenne_twister.hpp>
|
||||
#include <boost/random/uniform_real_distribution.hpp>
|
||||
#include "cuda_managed_ptr.hpp"
|
||||
#include "stopwatch.hpp"
|
||||
|
||||
// For the CUDA runtime routines (prefixed with "cuda_")
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
typedef double float_type;
|
||||
|
||||
/**
|
||||
* CUDA Kernel Device code
|
||||
*
|
||||
*/
|
||||
__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
|
||||
{
|
||||
using std::cos;
|
||||
int i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
|
||||
if (i < numElements)
|
||||
{
|
||||
out[i] = pdf(boost::math::poisson_distribution<float_type>(23), in1[i]);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Host main routine
|
||||
*/
|
||||
int main(void)
|
||||
{
|
||||
try{
|
||||
|
||||
// Error code to check return values for CUDA calls
|
||||
cudaError_t err = cudaSuccess;
|
||||
|
||||
// Print the vector length to be used, and compute its size
|
||||
int numElements = 50000;
|
||||
std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
|
||||
|
||||
// Allocate the managed input vector A
|
||||
cuda_managed_ptr<float_type> input_vector1(numElements);
|
||||
|
||||
// Allocate the managed output vector C
|
||||
cuda_managed_ptr<float_type> output_vector(numElements);
|
||||
|
||||
boost::random::mt19937 gen;
|
||||
boost::random::uniform_real_distribution<float_type> dist(0.00001, 10000);
|
||||
// Initialize the input vectors
|
||||
for (int i = 0; i < numElements; ++i)
|
||||
{
|
||||
input_vector1[i] = dist(gen);
|
||||
}
|
||||
|
||||
// Launch the Vector Add CUDA Kernel
|
||||
int threadsPerBlock = 512;
|
||||
int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
|
||||
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
|
||||
|
||||
watch w;
|
||||
cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
|
||||
std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
|
||||
|
||||
err = cudaGetLastError();
|
||||
if (err != cudaSuccess)
|
||||
{
|
||||
std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
// Verify that the result vector is correct
|
||||
std::vector<float_type> results;
|
||||
results.reserve(numElements);
|
||||
w.reset();
|
||||
for(int i = 0; i < numElements; ++i)
|
||||
results.push_back(pdf(boost::math::poisson_distribution<float_type>(23), input_vector1[i]));
|
||||
double t = w.elapsed();
|
||||
// check the results
|
||||
for(int i = 0; i < numElements; ++i)
|
||||
{
|
||||
if (boost::math::epsilon_difference(output_vector[i], results[i]) > 500.0)
|
||||
{
|
||||
std::cerr << "Result verification failed at element " << i << "!" << std::endl;
|
||||
std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
|
||||
std::cout << "Done\n";
|
||||
}
|
||||
catch(const std::exception& e)
|
||||
{
|
||||
std::cerr << "Stopped with exception: " << e.what() << std::endl;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
108
test/cuda/poisson_pdf_float.cu
Normal file
108
test/cuda/poisson_pdf_float.cu
Normal file
@@ -0,0 +1,108 @@
|
||||
// Copyright John Maddock 2016.
|
||||
// Use, modification and distribution are subject to the
|
||||
// Boost Software License, Version 1.0. (See accompanying file
|
||||
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
|
||||
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <boost/math/distributions/poisson.hpp>
|
||||
#include <boost/math/special_functions/relative_difference.hpp>
|
||||
#include <boost/random/mersenne_twister.hpp>
|
||||
#include <boost/random/uniform_real_distribution.hpp>
|
||||
#include "cuda_managed_ptr.hpp"
|
||||
#include "stopwatch.hpp"
|
||||
|
||||
// For the CUDA runtime routines (prefixed with "cuda_")
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
typedef float float_type;
|
||||
|
||||
/**
|
||||
* CUDA Kernel Device code
|
||||
*
|
||||
*/
|
||||
__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
|
||||
{
|
||||
using std::cos;
|
||||
int i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
|
||||
if (i < numElements)
|
||||
{
|
||||
out[i] = pdf(boost::math::poisson_distribution<float_type>(23), in1[i]);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Host main routine
|
||||
*/
|
||||
int main(void)
|
||||
{
|
||||
try{
|
||||
|
||||
// Error code to check return values for CUDA calls
|
||||
cudaError_t err = cudaSuccess;
|
||||
|
||||
// Print the vector length to be used, and compute its size
|
||||
int numElements = 50000;
|
||||
std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
|
||||
|
||||
// Allocate the managed input vector A
|
||||
cuda_managed_ptr<float_type> input_vector1(numElements);
|
||||
|
||||
// Allocate the managed output vector C
|
||||
cuda_managed_ptr<float_type> output_vector(numElements);
|
||||
|
||||
boost::random::mt19937 gen;
|
||||
boost::random::uniform_real_distribution<float_type> dist(0.00001, 10000);
|
||||
// Initialize the input vectors
|
||||
for (int i = 0; i < numElements; ++i)
|
||||
{
|
||||
input_vector1[i] = dist(gen);
|
||||
}
|
||||
|
||||
// Launch the Vector Add CUDA Kernel
|
||||
int threadsPerBlock = 512;
|
||||
int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
|
||||
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
|
||||
|
||||
watch w;
|
||||
cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
|
||||
std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
|
||||
|
||||
err = cudaGetLastError();
|
||||
if (err != cudaSuccess)
|
||||
{
|
||||
std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
// Verify that the result vector is correct
|
||||
std::vector<float_type> results;
|
||||
results.reserve(numElements);
|
||||
w.reset();
|
||||
for(int i = 0; i < numElements; ++i)
|
||||
results.push_back(pdf(boost::math::poisson_distribution<float_type>(23), input_vector1[i]));
|
||||
double t = w.elapsed();
|
||||
// check the results
|
||||
for(int i = 0; i < numElements; ++i)
|
||||
{
|
||||
if (boost::math::epsilon_difference(output_vector[i], results[i]) > 500.0)
|
||||
{
|
||||
std::cerr << "Result verification failed at element " << i << "!" << std::endl;
|
||||
std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
|
||||
std::cout << "Done\n";
|
||||
}
|
||||
catch(const std::exception& e)
|
||||
{
|
||||
std::cerr << "Stopped with exception: " << e.what() << std::endl;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user