diff --git a/test/cuda_jamfile b/test/cuda_jamfile new file mode 100644 index 000000000..f4740a210 --- /dev/null +++ b/test/cuda_jamfile @@ -0,0 +1,31 @@ +# Copyright 2024 Matt Borland +# Distributed under the Boost Software License, Version 1.0. +# https://www.boost.org/LICENSE_1_0.txt + +import testing ; +import ../../config/checks/config : requires ; + +project : requirements + [ requires cxx14_decltype_auto cxx14_generic_lambdas cxx14_return_type_deduction cxx14_variable_templates cxx14_constexpr ] + ; + +# Autodiff Tests +run test_autodiff_1.cpp ; + +# Tools Tests +run test_polynomial.cpp ; + +# Distributions +run test_arcsine.cpp ; +run test_arcsine_cdf_double.cu ; +run test_arcsine_cdf_float.cu ; +run test_arcsine_pdf_double.cu ; +run test_arcsine_pdf_float.cu ; +run test_arcsine_quan_double.cu ; +run test_arcsine_quan_float.cu ; +run test_binomial.cpp ; + +# Special Functions +# run test_beta_simple.cpp ; +run test_beta_double.cu ; +run test_beta_float.cu ; \ No newline at end of file diff --git a/test/test_arcsine_cdf_double.cu b/test/test_arcsine_cdf_double.cu new file mode 100644 index 000000000..d6f6f7b35 --- /dev/null +++ b/test/test_arcsine_cdf_double.cu @@ -0,0 +1,109 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::arcsine_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::arcsine_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} \ No newline at end of file diff --git a/test/test_arcsine_cdf_float.cu b/test/test_arcsine_cdf_float.cu new file mode 100644 index 000000000..148b1dffb --- /dev/null +++ b/test/test_arcsine_cdf_float.cu @@ -0,0 +1,109 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::arcsine_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::arcsine_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_arcsine_pdf_double.cu b/test/test_arcsine_pdf_double.cu new file mode 100644 index 000000000..7a73bb34e --- /dev/null +++ b/test/test_arcsine_pdf_double.cu @@ -0,0 +1,109 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::arcsine_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::arcsine_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_arcsine_pdf_float.cu b/test/test_arcsine_pdf_float.cu new file mode 100644 index 000000000..54a11253c --- /dev/null +++ b/test/test_arcsine_pdf_float.cu @@ -0,0 +1,109 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::arcsine_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::arcsine_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_arcsine_quan_double.cu b/test/test_arcsine_quan_double.cu new file mode 100644 index 000000000..31f6eac8a --- /dev/null +++ b/test/test_arcsine_quan_double.cu @@ -0,0 +1,109 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = quantile(boost::math::arcsine_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(quantile(boost::math::arcsine_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_arcsine_quan_float.cu b/test/test_arcsine_quan_float.cu new file mode 100644 index 000000000..6decb347b --- /dev/null +++ b/test/test_arcsine_quan_float.cu @@ -0,0 +1,109 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = quantile(boost::math::arcsine_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(quantile(boost::math::arcsine_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_beta_double.cu b/test/test_beta_double.cu new file mode 100644 index 000000000..cd5860158 --- /dev/null +++ b/test/test_beta_double.cu @@ -0,0 +1,132 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, const float_type * in2, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::beta(in1[i], in2[i]); + } +} + +template struct table_type { typedef T type; }; +typedef float_type T; +#define SC_(x) static_cast(x) + +#include "beta_med_data.ipp" +#include "beta_small_data.ipp" + +/** + * Host main routine + */ +int main(void) +{ + try{ + // Consolidate the test data: + std::vector v1, v2; + + for(unsigned i = 0; i < beta_med_data.size(); ++i) + { + v1.push_back(beta_med_data[i][0]); + v2.push_back(beta_med_data[i][1]); + } + for(unsigned i = 0; i < beta_small_data.size(); ++i) + { + v1.push_back(beta_small_data[i][0]); + v2.push_back(beta_small_data[i][1]); + } + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + cuda_managed_ptr input_vector2(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + int table_id = i % v1.size(); + input_vector1[i] = v1[table_id]; + input_vector2[i] = v2[table_id]; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::beta(input_vector1[i], input_vector2[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 300) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} + + diff --git a/test/test_beta_float.cu b/test/test_beta_float.cu new file mode 100644 index 000000000..c4c078f37 --- /dev/null +++ b/test/test_beta_float.cu @@ -0,0 +1,130 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, const float_type * in2, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::beta(in1[i], in2[i]); + } +} + +template struct table_type { typedef T type; }; +typedef float_type T; +#define SC_(x) static_cast(x) + +#include "beta_med_data.ipp" +#include "beta_small_data.ipp" + +/** + * Host main routine + */ +int main(void) +{ + try{ + // Consolidate the test data: + std::vector v1, v2; + + for(unsigned i = 0; i < beta_med_data.size(); ++i) + { + v1.push_back(beta_med_data[i][0]); + v2.push_back(beta_med_data[i][1]); + } + for(unsigned i = 0; i < beta_small_data.size(); ++i) + { + v1.push_back(beta_small_data[i][0]); + v2.push_back(beta_small_data[i][1]); + } + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + cuda_managed_ptr input_vector2(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + int table_id = i % v1.size(); + input_vector1[i] = v1[table_id]; + input_vector2[i] = v2[table_id]; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::beta(input_vector1[i], input_vector2[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 300) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} +