From 0da48a002e33999f59d380f7d48b341bc4313b83 Mon Sep 17 00:00:00 2001 From: jzmaddock Date: Wed, 3 Feb 2016 18:52:12 +0000 Subject: [PATCH] Add CUDA support for pareto and poisson. --- .../boost/math/distributions/complement.hpp | 2 + include/boost/math/distributions/pareto.hpp | 60 +++++----- include/boost/math/distributions/poisson.hpp | 36 +++--- test/cuda/pareto_cdf_double.cu | 108 ++++++++++++++++++ test/cuda/pareto_cdf_float.cu | 108 ++++++++++++++++++ test/cuda/pareto_pdf_double.cu | 108 ++++++++++++++++++ test/cuda/pareto_pdf_float.cu | 108 ++++++++++++++++++ test/cuda/pareto_quan_double.cu | 108 ++++++++++++++++++ test/cuda/pareto_quan_float.cu | 108 ++++++++++++++++++ test/cuda/poisson_cdf_double.cu | 108 ++++++++++++++++++ test/cuda/poisson_cdf_float.cu | 108 ++++++++++++++++++ test/cuda/poisson_pdf_double.cu | 108 ++++++++++++++++++ test/cuda/poisson_pdf_float.cu | 108 ++++++++++++++++++ 13 files changed, 1130 insertions(+), 48 deletions(-) create mode 100644 test/cuda/pareto_cdf_double.cu create mode 100644 test/cuda/pareto_cdf_float.cu create mode 100644 test/cuda/pareto_pdf_double.cu create mode 100644 test/cuda/pareto_pdf_float.cu create mode 100644 test/cuda/pareto_quan_double.cu create mode 100644 test/cuda/pareto_quan_float.cu create mode 100644 test/cuda/poisson_cdf_double.cu create mode 100644 test/cuda/poisson_cdf_float.cu create mode 100644 test/cuda/poisson_pdf_double.cu create mode 100644 test/cuda/poisson_pdf_float.cu diff --git a/include/boost/math/distributions/complement.hpp b/include/boost/math/distributions/complement.hpp index d45ebef5d..371a44f29 100644 --- a/include/boost/math/distributions/complement.hpp +++ b/include/boost/math/distributions/complement.hpp @@ -7,6 +7,8 @@ #ifndef BOOST_STATS_COMPLEMENT_HPP #define BOOST_STATS_COMPLEMENT_HPP +#include + // // This code really defines our own tuple type. // It would be nice to reuse boost::math::tuple diff --git a/include/boost/math/distributions/pareto.hpp b/include/boost/math/distributions/pareto.hpp index 1c6cf350f..0e2b1fe50 100644 --- a/include/boost/math/distributions/pareto.hpp +++ b/include/boost/math/distributions/pareto.hpp @@ -30,7 +30,7 @@ namespace boost namespace detail { // Parameter checking. template - inline bool check_pareto_scale( + inline BOOST_GPU_ENABLED bool check_pareto_scale( const char* function, RealType scale, RealType* result, const Policy& pol) @@ -59,7 +59,7 @@ namespace boost } // bool check_pareto_scale template - inline bool check_pareto_shape( + inline BOOST_GPU_ENABLED bool check_pareto_shape( const char* function, RealType shape, RealType* result, const Policy& pol) @@ -88,7 +88,7 @@ namespace boost } // bool check_pareto_shape( template - inline bool check_pareto_x( + inline BOOST_GPU_ENABLED bool check_pareto_x( const char* function, RealType const& x, RealType* result, const Policy& pol) @@ -117,7 +117,7 @@ namespace boost } // bool check_pareto_x template - inline bool check_pareto( // distribution parameters. + inline BOOST_GPU_ENABLED bool check_pareto( // distribution parameters. const char* function, RealType scale, RealType shape, @@ -136,19 +136,19 @@ namespace boost typedef RealType value_type; typedef Policy policy_type; - pareto_distribution(RealType l_scale = 1, RealType l_shape = 1) + BOOST_GPU_ENABLED pareto_distribution(RealType l_scale = 1, RealType l_shape = 1) : m_scale(l_scale), m_shape(l_shape) { // Constructor. RealType result = 0; detail::check_pareto("boost::math::pareto_distribution<%1%>::pareto_distribution", l_scale, l_shape, &result, Policy()); } - RealType scale()const + BOOST_GPU_ENABLED RealType scale()const { // AKA Xm and Wolfram b and beta return m_scale; } - RealType shape()const + BOOST_GPU_ENABLED RealType shape()const { // AKA k and Wolfram a and alpha return m_shape; } @@ -176,10 +176,10 @@ namespace boost } // support template - inline RealType pdf(const pareto_distribution& dist, const RealType& x) + inline BOOST_GPU_ENABLED RealType pdf(const pareto_distribution& dist, const RealType& x) { BOOST_MATH_STD_USING // for ADL of std function pow. - static const char* function = "boost::math::pdf(const pareto_distribution<%1%>&, %1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::pdf(const pareto_distribution<%1%>&, %1%)"; RealType scale = dist.scale(); RealType shape = dist.shape(); RealType result = 0; @@ -195,10 +195,10 @@ namespace boost } // pdf template - inline RealType cdf(const pareto_distribution& dist, const RealType& x) + inline BOOST_GPU_ENABLED RealType cdf(const pareto_distribution& dist, const RealType& x) { BOOST_MATH_STD_USING // for ADL of std function pow. - static const char* function = "boost::math::cdf(const pareto_distribution<%1%>&, %1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::cdf(const pareto_distribution<%1%>&, %1%)"; RealType scale = dist.scale(); RealType shape = dist.shape(); RealType result = 0; @@ -218,10 +218,10 @@ namespace boost } // cdf template - inline RealType quantile(const pareto_distribution& dist, const RealType& p) + inline BOOST_GPU_ENABLED RealType quantile(const pareto_distribution& dist, const RealType& p) { BOOST_MATH_STD_USING // for ADL of std function pow. - static const char* function = "boost::math::quantile(const pareto_distribution<%1%>&, %1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::quantile(const pareto_distribution<%1%>&, %1%)"; RealType result = 0; RealType scale = dist.scale(); RealType shape = dist.shape(); @@ -245,10 +245,10 @@ namespace boost } // quantile template - inline RealType cdf(const complemented2_type, RealType>& c) + inline BOOST_GPU_ENABLED RealType cdf(const complemented2_type, RealType>& c) { BOOST_MATH_STD_USING // for ADL of std function pow. - static const char* function = "boost::math::cdf(const pareto_distribution<%1%>&, %1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::cdf(const pareto_distribution<%1%>&, %1%)"; RealType result = 0; RealType x = c.param; RealType scale = c.dist.scale(); @@ -267,10 +267,10 @@ namespace boost } // cdf complement template - inline RealType quantile(const complemented2_type, RealType>& c) + inline BOOST_GPU_ENABLED RealType quantile(const complemented2_type, RealType>& c) { BOOST_MATH_STD_USING // for ADL of std function pow. - static const char* function = "boost::math::quantile(const pareto_distribution<%1%>&, %1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::quantile(const pareto_distribution<%1%>&, %1%)"; RealType result = 0; RealType q = c.param; RealType scale = c.dist.scale(); @@ -294,10 +294,10 @@ namespace boost } // quantile complement template - inline RealType mean(const pareto_distribution& dist) + inline BOOST_GPU_ENABLED RealType mean(const pareto_distribution& dist) { RealType result = 0; - static const char* function = "boost::math::mean(const pareto_distribution<%1%>&, %1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::mean(const pareto_distribution<%1%>&, %1%)"; if(false == detail::check_pareto(function, dist.scale(), dist.shape(), &result, Policy())) { return result; @@ -314,16 +314,16 @@ namespace boost } // mean template - inline RealType mode(const pareto_distribution& dist) + inline BOOST_GPU_ENABLED RealType mode(const pareto_distribution& dist) { return dist.scale(); } // mode template - inline RealType median(const pareto_distribution& dist) + inline BOOST_GPU_ENABLED RealType median(const pareto_distribution& dist) { RealType result = 0; - static const char* function = "boost::math::median(const pareto_distribution<%1%>&, %1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::median(const pareto_distribution<%1%>&, %1%)"; if(false == detail::check_pareto(function, dist.scale(), dist.shape(), &result, Policy())) { return result; @@ -333,12 +333,12 @@ namespace boost } // median template - inline RealType variance(const pareto_distribution& dist) + inline BOOST_GPU_ENABLED RealType variance(const pareto_distribution& dist) { RealType result = 0; RealType scale = dist.scale(); RealType shape = dist.shape(); - static const char* function = "boost::math::variance(const pareto_distribution<%1%>&, %1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::variance(const pareto_distribution<%1%>&, %1%)"; if(false == detail::check_pareto(function, scale, shape, &result, Policy())) { return result; @@ -358,12 +358,12 @@ namespace boost } // variance template - inline RealType skewness(const pareto_distribution& dist) + inline BOOST_GPU_ENABLED RealType skewness(const pareto_distribution& dist) { BOOST_MATH_STD_USING RealType result = 0; RealType shape = dist.shape(); - static const char* function = "boost::math::pdf(const pareto_distribution<%1%>&, %1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::pdf(const pareto_distribution<%1%>&, %1%)"; if(false == detail::check_pareto(function, dist.scale(), shape, &result, Policy())) { return result; @@ -384,11 +384,11 @@ namespace boost } // skewness template - inline RealType kurtosis(const pareto_distribution& dist) + inline BOOST_GPU_ENABLED RealType kurtosis(const pareto_distribution& dist) { RealType result = 0; RealType shape = dist.shape(); - static const char* function = "boost::math::pdf(const pareto_distribution<%1%>&, %1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::pdf(const pareto_distribution<%1%>&, %1%)"; if(false == detail::check_pareto(function, dist.scale(), shape, &result, Policy())) { return result; @@ -408,11 +408,11 @@ namespace boost } // kurtosis template - inline RealType kurtosis_excess(const pareto_distribution& dist) + inline BOOST_GPU_ENABLED RealType kurtosis_excess(const pareto_distribution& dist) { RealType result = 0; RealType shape = dist.shape(); - static const char* function = "boost::math::pdf(const pareto_distribution<%1%>&, %1%)"; + BOOST_MATH_GPU_STATIC const char* function = "boost::math::pdf(const pareto_distribution<%1%>&, %1%)"; if(false == detail::check_pareto(function, dist.scale(), shape, &result, Policy())) { return result; diff --git a/include/boost/math/distributions/poisson.hpp b/include/boost/math/distributions/poisson.hpp index e4665bff6..827d45563 100644 --- a/include/boost/math/distributions/poisson.hpp +++ b/include/boost/math/distributions/poisson.hpp @@ -59,7 +59,7 @@ namespace boost // checks are always performed, even if exceptions are not enabled. template - inline bool check_mean(const char* function, const RealType& mean, RealType* result, const Policy& pol) + inline BOOST_GPU_ENABLED bool check_mean(const char* function, const RealType& mean, RealType* result, const Policy& pol) { if(!(boost::math::isfinite)(mean) || (mean < 0)) { @@ -72,7 +72,7 @@ namespace boost } // bool check_mean template - inline bool check_mean_NZ(const char* function, const RealType& mean, RealType* result, const Policy& pol) + inline BOOST_GPU_ENABLED bool check_mean_NZ(const char* function, const RealType& mean, RealType* result, const Policy& pol) { // mean == 0 is considered an error. if( !(boost::math::isfinite)(mean) || (mean <= 0)) { @@ -85,13 +85,13 @@ namespace boost } // bool check_mean_NZ template - inline bool check_dist(const char* function, const RealType& mean, RealType* result, const Policy& pol) + inline BOOST_GPU_ENABLED bool check_dist(const char* function, const RealType& mean, RealType* result, const Policy& pol) { // Only one check, so this is redundant really but should be optimized away. return check_mean_NZ(function, mean, result, pol); } // bool check_dist template - inline bool check_k(const char* function, const RealType& k, RealType* result, const Policy& pol) + inline BOOST_GPU_ENABLED bool check_k(const char* function, const RealType& k, RealType* result, const Policy& pol) { if((k < 0) || !(boost::math::isfinite)(k)) { @@ -104,7 +104,7 @@ namespace boost } // bool check_k template - inline bool check_dist_and_k(const char* function, RealType mean, RealType k, RealType* result, const Policy& pol) + inline BOOST_GPU_ENABLED bool check_dist_and_k(const char* function, RealType mean, RealType k, RealType* result, const Policy& pol) { if((check_dist(function, mean, result, pol) == false) || (check_k(function, k, result, pol) == false)) @@ -115,7 +115,7 @@ namespace boost } // bool check_dist_and_k template - inline bool check_prob(const char* function, const RealType& p, RealType* result, const Policy& pol) + inline BOOST_GPU_ENABLED bool check_prob(const char* function, const RealType& p, RealType* result, const Policy& pol) { // Check 0 <= p <= 1 if(!(boost::math::isfinite)(p) || (p < 0) || (p > 1)) { @@ -128,7 +128,7 @@ namespace boost } // bool check_prob template - inline bool check_dist_and_prob(const char* function, RealType mean, RealType p, RealType* result, const Policy& pol) + inline BOOST_GPU_ENABLED bool check_dist_and_prob(const char* function, RealType mean, RealType p, RealType* result, const Policy& pol) { if((check_dist(function, mean, result, pol) == false) || (check_prob(function, p, result, pol) == false)) @@ -147,7 +147,7 @@ namespace boost typedef RealType value_type; typedef Policy policy_type; - poisson_distribution(RealType l_mean = 1) : m_l(l_mean) // mean (lambda). + BOOST_GPU_ENABLED poisson_distribution(RealType l_mean = 1) : m_l(l_mean) // mean (lambda). { // Expected mean number of events that occur during the given interval. RealType r; poisson_detail::check_dist( @@ -156,7 +156,7 @@ namespace boost &r, Policy()); } // poisson_distribution constructor. - RealType mean() const + BOOST_GPU_ENABLED RealType mean() const { // Private data getter function. return m_l; } @@ -185,13 +185,13 @@ namespace boost } template - inline RealType mean(const poisson_distribution& dist) + inline BOOST_GPU_ENABLED RealType mean(const poisson_distribution& dist) { // Mean of poisson distribution = lambda. return dist.mean(); } // mean template - inline RealType mode(const poisson_distribution& dist) + inline BOOST_GPU_ENABLED RealType mode(const poisson_distribution& dist) { // mode. BOOST_MATH_STD_USING // ADL of std functions. return floor(dist.mean()); @@ -208,7 +208,7 @@ namespace boost // Now implemented via quantile(half) in derived accessors. template - inline RealType variance(const poisson_distribution& dist) + inline BOOST_GPU_ENABLED RealType variance(const poisson_distribution& dist) { // variance. return dist.mean(); } @@ -217,14 +217,14 @@ namespace boost // standard_deviation provided by derived accessors. template - inline RealType skewness(const poisson_distribution& dist) + inline BOOST_GPU_ENABLED RealType skewness(const poisson_distribution& dist) { // skewness = sqrt(l). BOOST_MATH_STD_USING // ADL of std functions. return 1 / sqrt(dist.mean()); } template - inline RealType kurtosis_excess(const poisson_distribution& dist) + inline BOOST_GPU_ENABLED RealType kurtosis_excess(const poisson_distribution& dist) { // skewness = sqrt(l). return 1 / dist.mean(); // kurtosis_excess 1/mean from Wiki & MathWorld eq 31. // http://mathworld.wolfram.com/Kurtosis.html explains that the kurtosis excess @@ -233,7 +233,7 @@ namespace boost } // RealType kurtosis_excess template - inline RealType kurtosis(const poisson_distribution& dist) + inline BOOST_GPU_ENABLED RealType kurtosis(const poisson_distribution& dist) { // kurtosis is 4th moment about the mean = u4 / sd ^ 4 // http://en.wikipedia.org/wiki/Curtosis // kurtosis can range from -2 (flat top) to +infinity (sharp peak & heavy tails). @@ -245,7 +245,7 @@ namespace boost } // RealType kurtosis template - RealType pdf(const poisson_distribution& dist, const RealType& k) + BOOST_GPU_ENABLED RealType pdf(const poisson_distribution& dist, const RealType& k) { // Probability Density/Mass Function. // Probability that there are EXACTLY k occurrences (or arrivals). BOOST_FPU_EXCEPTION_GUARD @@ -277,7 +277,7 @@ namespace boost } // pdf template - RealType cdf(const poisson_distribution& dist, const RealType& k) + BOOST_GPU_ENABLED RealType cdf(const poisson_distribution& dist, const RealType& k) { // Cumulative Distribution Function Poisson. // The random variate k is the number of occurrences(or arrivals) // k argument may be integral, signed, or unsigned, or floating point. @@ -328,7 +328,7 @@ namespace boost } // binomial cdf template - RealType cdf(const complemented2_type, RealType>& c) + BOOST_GPU_ENABLED RealType cdf(const complemented2_type, RealType>& c) { // Complemented Cumulative Distribution Function Poisson // The random variate k is the number of events, occurrences or arrivals. // k argument may be integral, signed, or unsigned, or floating point. diff --git a/test/cuda/pareto_cdf_double.cu b/test/cuda/pareto_cdf_double.cu new file mode 100644 index 000000000..6ee1cb0e6 --- /dev/null +++ b/test/cuda/pareto_cdf_double.cu @@ -0,0 +1,108 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::pareto_distribution(23), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0.00001, 10000); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::pareto_distribution(23), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 500.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/pareto_cdf_float.cu b/test/cuda/pareto_cdf_float.cu new file mode 100644 index 000000000..c77f1d6a6 --- /dev/null +++ b/test/cuda/pareto_cdf_float.cu @@ -0,0 +1,108 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::pareto_distribution(23), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0.00001, 10000); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::pareto_distribution(23), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 500.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/pareto_pdf_double.cu b/test/cuda/pareto_pdf_double.cu new file mode 100644 index 000000000..f572abd64 --- /dev/null +++ b/test/cuda/pareto_pdf_double.cu @@ -0,0 +1,108 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::pareto_distribution(23), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0.00001, 10000); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::pareto_distribution(23), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 500.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/pareto_pdf_float.cu b/test/cuda/pareto_pdf_float.cu new file mode 100644 index 000000000..4edcd152a --- /dev/null +++ b/test/cuda/pareto_pdf_float.cu @@ -0,0 +1,108 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::pareto_distribution(23), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0.00001, 10000); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::pareto_distribution(23), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 500.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/pareto_quan_double.cu b/test/cuda/pareto_quan_double.cu new file mode 100644 index 000000000..883e3795b --- /dev/null +++ b/test/cuda/pareto_quan_double.cu @@ -0,0 +1,108 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = quantile(boost::math::pareto_distribution(0.25), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 32; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(quantile(boost::math::pareto_distribution(0.25), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 6000.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/pareto_quan_float.cu b/test/cuda/pareto_quan_float.cu new file mode 100644 index 000000000..0b6aca4a3 --- /dev/null +++ b/test/cuda/pareto_quan_float.cu @@ -0,0 +1,108 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = quantile(boost::math::pareto_distribution(0.25), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 32; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(quantile(boost::math::pareto_distribution(0.25), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 6000.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/poisson_cdf_double.cu b/test/cuda/poisson_cdf_double.cu new file mode 100644 index 000000000..8657adde4 --- /dev/null +++ b/test/cuda/poisson_cdf_double.cu @@ -0,0 +1,108 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::poisson_distribution(23), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0.00001, 10000); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::poisson_distribution(23), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 500.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/poisson_cdf_float.cu b/test/cuda/poisson_cdf_float.cu new file mode 100644 index 000000000..c4252fa41 --- /dev/null +++ b/test/cuda/poisson_cdf_float.cu @@ -0,0 +1,108 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::poisson_distribution(23), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0.00001, 10000); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::poisson_distribution(23), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 500.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/poisson_pdf_double.cu b/test/cuda/poisson_pdf_double.cu new file mode 100644 index 000000000..2bde08298 --- /dev/null +++ b/test/cuda/poisson_pdf_double.cu @@ -0,0 +1,108 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::poisson_distribution(23), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0.00001, 10000); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::poisson_distribution(23), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 500.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + diff --git a/test/cuda/poisson_pdf_float.cu b/test/cuda/poisson_pdf_float.cu new file mode 100644 index 000000000..3c44482a8 --- /dev/null +++ b/test/cuda/poisson_pdf_float.cu @@ -0,0 +1,108 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::poisson_distribution(23), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0.00001, 10000); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::poisson_distribution(23), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 500.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} +