diff --git a/include/boost/math/policies/error_handling.hpp b/include/boost/math/policies/error_handling.hpp index ff9d7a97a..e28933b74 100644 --- a/include/boost/math/policies/error_handling.hpp +++ b/include/boost/math/policies/error_handling.hpp @@ -551,6 +551,16 @@ inline BOOST_GPU_ENABLED BOOST_MATH_CONSTEXPR long raise_rounding_error( { return val > 0 ? LONG_MAX : LONG_MIN; } +template +inline BOOST_GPU_ENABLED BOOST_MATH_CONSTEXPR long long raise_rounding_error( + const char*, + const char*, + const T& val, + const long long&, + const ::boost::math::policies::rounding_error< ::boost::math::policies::ignore_error>&) BOOST_MATH_NOEXCEPT(T) +{ + return val > 0 ? LLONG_MAX : LLONG_MIN; +} #endif template diff --git a/include/boost/math/special_functions/detail/round_fwd.hpp b/include/boost/math/special_functions/detail/round_fwd.hpp index dd97dc520..cf58793f9 100644 --- a/include/boost/math/special_functions/detail/round_fwd.hpp +++ b/include/boost/math/special_functions/detail/round_fwd.hpp @@ -39,40 +39,40 @@ namespace boost BOOST_GPU_ENABLED boost::long_long_type lltrunc(const T& v); #endif template - typename tools::promote_args::type round(const T& v, const Policy& pol); + BOOST_GPU_ENABLED typename tools::promote_args::type round(const T& v, const Policy& pol); template - typename tools::promote_args::type round(const T& v); + BOOST_GPU_ENABLED typename tools::promote_args::type round(const T& v); template - int iround(const T& v, const Policy& pol); + BOOST_GPU_ENABLED int iround(const T& v, const Policy& pol); template - int iround(const T& v); + BOOST_GPU_ENABLED int iround(const T& v); template - long lround(const T& v, const Policy& pol); + BOOST_GPU_ENABLED long lround(const T& v, const Policy& pol); template - long lround(const T& v); + BOOST_GPU_ENABLED long lround(const T& v); #ifdef BOOST_HAS_LONG_LONG template - boost::long_long_type llround(const T& v, const Policy& pol); + BOOST_GPU_ENABLED boost::long_long_type llround(const T& v, const Policy& pol); template - boost::long_long_type llround(const T& v); + BOOST_GPU_ENABLED boost::long_long_type llround(const T& v); #endif template - T modf(const T& v, T* ipart, const Policy& pol); + BOOST_GPU_ENABLED T modf(const T& v, T* ipart, const Policy& pol); template - T modf(const T& v, T* ipart); + BOOST_GPU_ENABLED T modf(const T& v, T* ipart); template - T modf(const T& v, int* ipart, const Policy& pol); + BOOST_GPU_ENABLED T modf(const T& v, int* ipart, const Policy& pol); template - T modf(const T& v, int* ipart); + BOOST_GPU_ENABLED T modf(const T& v, int* ipart); template - T modf(const T& v, long* ipart, const Policy& pol); + BOOST_GPU_ENABLED T modf(const T& v, long* ipart, const Policy& pol); template - T modf(const T& v, long* ipart); + BOOST_GPU_ENABLED T modf(const T& v, long* ipart); #ifdef BOOST_HAS_LONG_LONG template - T modf(const T& v, boost::long_long_type* ipart, const Policy& pol); + BOOST_GPU_ENABLED T modf(const T& v, boost::long_long_type* ipart, const Policy& pol); template - T modf(const T& v, boost::long_long_type* ipart); + BOOST_GPU_ENABLED T modf(const T& v, boost::long_long_type* ipart); #endif } diff --git a/include/boost/math/special_functions/modf.hpp b/include/boost/math/special_functions/modf.hpp index 3ce74e7aa..20321664d 100644 --- a/include/boost/math/special_functions/modf.hpp +++ b/include/boost/math/special_functions/modf.hpp @@ -17,50 +17,50 @@ namespace boost{ namespace math{ template -inline T modf(const T& v, T* ipart, const Policy& pol) +inline BOOST_GPU_ENABLED T modf(const T& v, T* ipart, const Policy& pol) { *ipart = trunc(v, pol); return v - *ipart; } template -inline T modf(const T& v, T* ipart) +inline BOOST_GPU_ENABLED T modf(const T& v, T* ipart) { return modf(v, ipart, policies::policy<>()); } template -inline T modf(const T& v, int* ipart, const Policy& pol) +inline BOOST_GPU_ENABLED T modf(const T& v, int* ipart, const Policy& pol) { *ipart = itrunc(v, pol); return v - *ipart; } template -inline T modf(const T& v, int* ipart) +inline BOOST_GPU_ENABLED T modf(const T& v, int* ipart) { return modf(v, ipart, policies::policy<>()); } template -inline T modf(const T& v, long* ipart, const Policy& pol) +inline BOOST_GPU_ENABLED T modf(const T& v, long* ipart, const Policy& pol) { *ipart = ltrunc(v, pol); return v - *ipart; } template -inline T modf(const T& v, long* ipart) +inline BOOST_GPU_ENABLED T modf(const T& v, long* ipart) { return modf(v, ipart, policies::policy<>()); } #ifdef BOOST_HAS_LONG_LONG template -inline T modf(const T& v, boost::long_long_type* ipart, const Policy& pol) +inline BOOST_GPU_ENABLED T modf(const T& v, boost::long_long_type* ipart, const Policy& pol) { *ipart = lltrunc(v, pol); return v - *ipart; } template -inline T modf(const T& v, boost::long_long_type* ipart) +inline BOOST_GPU_ENABLED T modf(const T& v, boost::long_long_type* ipart) { return modf(v, ipart, policies::policy<>()); } diff --git a/include/boost/math/special_functions/next.hpp b/include/boost/math/special_functions/next.hpp index 9602bc769..1de352ba5 100644 --- a/include/boost/math/special_functions/next.hpp +++ b/include/boost/math/special_functions/next.hpp @@ -30,14 +30,14 @@ namespace boost{ namespace math{ namespace detail{ template -inline T get_smallest_value(mpl::true_ const&) +inline BOOST_GPU_ENABLED T get_smallest_value(mpl::true_ const&) { // // numeric_limits lies about denorms being present - particularly // when this can be turned on or off at runtime, as is the case // when using the SSE2 registers in DAZ or FTZ mode. // - static const T m = std::numeric_limits::denorm_min(); + BOOST_MATH_GPU_STATIC const T m = std::numeric_limits::denorm_min(); #ifdef BOOST_MATH_CHECK_SSE2 return (_mm_getcsr() & (_MM_FLUSH_ZERO_ON | 0x40)) ? tools::min_value() : m;; #else @@ -46,15 +46,17 @@ inline T get_smallest_value(mpl::true_ const&) } template -inline T get_smallest_value(mpl::false_ const&) +inline BOOST_GPU_ENABLED T get_smallest_value(mpl::false_ const&) { return tools::min_value(); } template -inline T get_smallest_value() +inline BOOST_GPU_ENABLED T get_smallest_value() { -#if defined(BOOST_MSVC) && (BOOST_MSVC <= 1310) +#ifdef __CUDA_ARCH__ + return get_smallest_value(mpl::bool_()); +#elif defined(BOOST_MSVC) && (BOOST_MSVC <= 1310) return get_smallest_value(mpl::bool_::is_specialized && (std::numeric_limits::has_denorm == 1)>()); #else return get_smallest_value(mpl::bool_::is_specialized && (std::numeric_limits::has_denorm == std::denorm_present)>()); @@ -66,25 +68,25 @@ inline T get_smallest_value() // we calculate the value of the least-significant-bit: // template -T get_min_shift_value(); +BOOST_GPU_ENABLED T get_min_shift_value(); template struct min_shift_initializer { struct init { - init() + BOOST_GPU_ENABLED init() { do_init(); } - static void do_init() + static BOOST_GPU_ENABLED void do_init() { get_min_shift_value(); } - void force_instantiate()const{} + BOOST_GPU_ENABLED void force_instantiate()const{} }; static const init initializer; - static void force_instantiate() + static BOOST_GPU_ENABLED void force_instantiate() { initializer.force_instantiate(); } @@ -95,21 +97,21 @@ const typename min_shift_initializer::init min_shift_initializer::initiali template -inline T get_min_shift_value() +inline BOOST_GPU_ENABLED T get_min_shift_value() { BOOST_MATH_STD_USING - static const T val = ldexp(tools::min_value(), tools::digits() + 1); + BOOST_MATH_GPU_STATIC const T val = ldexp(tools::min_value(), tools::digits() + 1); min_shift_initializer::force_instantiate(); return val; } template -T float_next_imp(const T& val, const Policy& pol) +BOOST_GPU_ENABLED T float_next_imp(const T& val, const Policy& pol) { BOOST_MATH_STD_USING int expon; - static const char* function = "float_next<%1%>(%1%)"; + BOOST_MATH_GPU_STATIC const char* function = "float_next<%1%>(%1%)"; int fpclass = (boost::math::fpclassify)(val); @@ -149,7 +151,7 @@ T float_next_imp(const T& val, const Policy& pol) } template -inline typename tools::promote_args::type float_next(const T& val, const Policy& pol) +inline BOOST_GPU_ENABLED typename tools::promote_args::type float_next(const T& val, const Policy& pol) { typedef typename tools::promote_args::type result_type; return detail::float_next_imp(static_cast(val), pol); @@ -164,7 +166,7 @@ inline typename tools::promote_args::type float_next(const T& val, const Poli template inline double float_next(const double& val, const Policy& pol) { - static const char* function = "float_next<%1%>(%1%)"; + BOOST_MATH_GPU_STATIC const char* function = "float_next<%1%>(%1%)"; if(!(boost::math::isfinite)(val) && (val > 0)) return policies::raise_domain_error( @@ -179,7 +181,7 @@ inline double float_next(const double& val, const Policy& pol) #endif template -inline typename tools::promote_args::type float_next(const T& val) +inline BOOST_GPU_ENABLED typename tools::promote_args::type float_next(const T& val) { return float_next(val, policies::policy<>()); } @@ -187,11 +189,11 @@ inline typename tools::promote_args::type float_next(const T& val) namespace detail{ template -T float_prior_imp(const T& val, const Policy& pol) +BOOST_GPU_ENABLED T float_prior_imp(const T& val, const Policy& pol) { BOOST_MATH_STD_USING int expon; - static const char* function = "float_prior<%1%>(%1%)"; + BOOST_MATH_GPU_STATIC const char* function = "float_prior<%1%>(%1%)"; int fpclass = (boost::math::fpclassify)(val); @@ -232,7 +234,7 @@ T float_prior_imp(const T& val, const Policy& pol) } template -inline typename tools::promote_args::type float_prior(const T& val, const Policy& pol) +inline BOOST_GPU_ENABLED typename tools::promote_args::type float_prior(const T& val, const Policy& pol) { typedef typename tools::promote_args::type result_type; return detail::float_prior_imp(static_cast(val), pol); @@ -247,7 +249,7 @@ inline typename tools::promote_args::type float_prior(const T& val, const Pol template inline double float_prior(const double& val, const Policy& pol) { - static const char* function = "float_prior<%1%>(%1%)"; + BOOST_MATH_GPU_STATIC const char* function = "float_prior<%1%>(%1%)"; if(!(boost::math::isfinite)(val) && (val < 0)) return policies::raise_domain_error( @@ -262,20 +264,20 @@ inline double float_prior(const double& val, const Policy& pol) #endif template -inline typename tools::promote_args::type float_prior(const T& val) +inline BOOST_GPU_ENABLED typename tools::promote_args::type float_prior(const T& val) { return float_prior(val, policies::policy<>()); } template -inline typename tools::promote_args::type nextafter(const T& val, const U& direction, const Policy& pol) +inline BOOST_GPU_ENABLED typename tools::promote_args::type nextafter(const T& val, const U& direction, const Policy& pol) { typedef typename tools::promote_args::type result_type; return val < direction ? boost::math::float_next(val, pol) : val == direction ? val : boost::math::float_prior(val, pol); } template -inline typename tools::promote_args::type nextafter(const T& val, const U& direction) +inline BOOST_GPU_ENABLED typename tools::promote_args::type nextafter(const T& val, const U& direction) { return nextafter(val, direction, policies::policy<>()); } @@ -283,13 +285,13 @@ inline typename tools::promote_args::type nextafter(const T& val, const U& namespace detail{ template -T float_distance_imp(const T& a, const T& b, const Policy& pol) +BOOST_GPU_ENABLED T float_distance_imp(const T& a, const T& b, const Policy& pol) { BOOST_MATH_STD_USING // // Error handling: // - static const char* function = "float_distance<%1%>(%1%, %1%)"; + BOOST_MATH_GPU_STATIC const char* function = "float_distance<%1%>(%1%, %1%)"; if(!(boost::math::isfinite)(a)) return policies::raise_domain_error( function, @@ -384,14 +386,14 @@ T float_distance_imp(const T& a, const T& b, const Policy& pol) } template -inline typename tools::promote_args::type float_distance(const T& a, const U& b, const Policy& pol) +inline BOOST_GPU_ENABLED typename tools::promote_args::type float_distance(const T& a, const U& b, const Policy& pol) { typedef typename tools::promote_args::type result_type; return detail::float_distance_imp(static_cast(a), static_cast(b), pol); } template -typename tools::promote_args::type float_distance(const T& a, const U& b) +inline BOOST_GPU_ENABLED typename tools::promote_args::type float_distance(const T& a, const U& b) { return boost::math::float_distance(a, b, policies::policy<>()); } @@ -399,13 +401,13 @@ typename tools::promote_args::type float_distance(const T& a, const U& b) namespace detail{ template -T float_advance_imp(T val, int distance, const Policy& pol) +BOOST_GPU_ENABLED T float_advance_imp(T val, int distance, const Policy& pol) { BOOST_MATH_STD_USING // // Error handling: // - static const char* function = "float_advance<%1%>(%1%, int)"; + BOOST_MATH_GPU_STATIC const char* function = "float_advance<%1%>(%1%, int)"; int fpclass = (boost::math::fpclassify)(val); @@ -482,14 +484,14 @@ T float_advance_imp(T val, int distance, const Policy& pol) } template -inline typename tools::promote_args::type float_advance(T val, int distance, const Policy& pol) +inline BOOST_GPU_ENABLED typename tools::promote_args::type float_advance(T val, int distance, const Policy& pol) { typedef typename tools::promote_args::type result_type; return detail::float_advance_imp(static_cast(val), distance, pol); } template -inline typename tools::promote_args::type float_advance(const T& val, int distance) +inline BOOST_GPU_ENABLED typename tools::promote_args::type float_advance(const T& val, int distance) { return boost::math::float_advance(val, distance, policies::policy<>()); } diff --git a/include/boost/math/special_functions/round.hpp b/include/boost/math/special_functions/round.hpp index e21f7185d..cb5990a67 100644 --- a/include/boost/math/special_functions/round.hpp +++ b/include/boost/math/special_functions/round.hpp @@ -20,7 +20,7 @@ namespace boost{ namespace math{ namespace detail{ template -inline typename tools::promote_args::type round(const T& v, const Policy& pol, const mpl::false_) +inline BOOST_GPU_ENABLED typename tools::promote_args::type round(const T& v, const Policy& pol, const mpl::false_) { BOOST_MATH_STD_USING typedef typename tools::promote_args::type result_type; @@ -52,7 +52,7 @@ inline typename tools::promote_args::type round(const T& v, const Policy& pol } } template -inline typename tools::promote_args::type round(const T& v, const Policy&, const mpl::true_) +inline BOOST_GPU_ENABLED typename tools::promote_args::type round(const T& v, const Policy&, const mpl::true_) { return v; } @@ -60,12 +60,12 @@ inline typename tools::promote_args::type round(const T& v, const Policy&, co } // namespace detail template -inline typename tools::promote_args::type round(const T& v, const Policy& pol) +inline BOOST_GPU_ENABLED typename tools::promote_args::type round(const T& v, const Policy& pol) { return detail::round(v, pol, mpl::bool_::value>()); } template -inline typename tools::promote_args::type round(const T& v) +inline BOOST_GPU_ENABLED typename tools::promote_args::type round(const T& v) { return round(v, policies::policy<>()); } @@ -79,31 +79,39 @@ inline typename tools::promote_args::type round(const T& v) // dependent lookup. See our concept archetypes for examples. // template -inline int iround(const T& v, const Policy& pol) +inline BOOST_GPU_ENABLED int iround(const T& v, const Policy& pol) { BOOST_MATH_STD_USING T r = boost::math::round(v, pol); +#ifdef __CUDA_ARCH__ + if((r > INT_MAX) || (r < INT_MIN)) +#else if((r > (std::numeric_limits::max)()) || (r < (std::numeric_limits::min)())) +#endif return static_cast(policies::raise_rounding_error("boost::math::iround<%1%>(%1%)", 0, v, 0, pol)); return static_cast(r); } template -inline int iround(const T& v) +inline BOOST_GPU_ENABLED int iround(const T& v) { return iround(v, policies::policy<>()); } template -inline long lround(const T& v, const Policy& pol) +inline BOOST_GPU_ENABLED long lround(const T& v, const Policy& pol) { BOOST_MATH_STD_USING T r = boost::math::round(v, pol); +#ifdef __CUDA_ARCH__ + if((r > LONG_MAX) || (r < LONG_MIN)) +#else if((r > (std::numeric_limits::max)()) || (r < (std::numeric_limits::min)())) +#endif return static_cast(policies::raise_rounding_error("boost::math::lround<%1%>(%1%)", 0, v, 0L, pol)); return static_cast(r); } template -inline long lround(const T& v) +inline BOOST_GPU_ENABLED long lround(const T& v) { return lround(v, policies::policy<>()); } @@ -111,16 +119,20 @@ inline long lround(const T& v) #ifdef BOOST_HAS_LONG_LONG template -inline boost::long_long_type llround(const T& v, const Policy& pol) +inline BOOST_GPU_ENABLED boost::long_long_type llround(const T& v, const Policy& pol) { BOOST_MATH_STD_USING T r = boost::math::round(v, pol); +#ifdef __CUDA_ARCH__ + if((r > LLONG_MAX) || (r < LLONG_MIN)) +#else if((r > (std::numeric_limits::max)()) || (r < (std::numeric_limits::min)())) +#endif return static_cast(policies::raise_rounding_error("boost::math::llround<%1%>(%1%)", 0, v, static_cast(0), pol)); return static_cast(r); } template -inline boost::long_long_type llround(const T& v) +inline BOOST_GPU_ENABLED boost::long_long_type llround(const T& v) { return llround(v, policies::policy<>()); } diff --git a/include/boost/math/special_functions/trunc.hpp b/include/boost/math/special_functions/trunc.hpp index 5913a4e7d..4adefaab3 100644 --- a/include/boost/math/special_functions/trunc.hpp +++ b/include/boost/math/special_functions/trunc.hpp @@ -81,7 +81,7 @@ inline BOOST_GPU_ENABLED long ltrunc(const T& v, const Policy& pol) typedef typename tools::promote_args::type result_type; result_type r = boost::math::trunc(v, pol); #ifdef __CUDA_ARCH__ - if((r > LONG_MAX) || (r < LONG_LIN)) + if((r > LONG_MAX) || (r < LONG_MIN)) #else if((r > (std::numeric_limits::max)()) || (r < (std::numeric_limits::min)())) #endif @@ -102,7 +102,11 @@ inline BOOST_GPU_ENABLED boost::long_long_type lltrunc(const T& v, const Policy& BOOST_MATH_STD_USING typedef typename tools::promote_args::type result_type; result_type r = boost::math::trunc(v, pol); +#ifdef __CUDA_ARCH__ + if((r > LLONG_MAX) || (r < LLONG_MIN)) +#else if((r > (std::numeric_limits::max)()) || (r < (std::numeric_limits::min)())) +#endif return static_cast(policies::raise_rounding_error("boost::math::lltrunc<%1%>(%1%)", 0, v, static_cast(0), pol)); return static_cast(r); } diff --git a/test/cuda/changesign_double.cu b/test/cuda/changesign_double.cu new file mode 100644 index 000000000..afa17176f --- /dev/null +++ b/test/cuda/changesign_double.cu @@ -0,0 +1,111 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::changesign(in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector addition of " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr h_A(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr h_C(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + h_A[i] = rand()/(float_type)RAND_MAX; + switch(i % 55) + { + case 1: + h_A[i] = 0; + break; + case 2: + h_A[i] = std::numeric_limits::infinity(); + break; + case 3: + h_A[i] = -std::numeric_limits::infinity(); + break; + } + if(i % 1) + h_A[i] = -h_A[i]; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(h_A.get(), h_C.get(), numElements); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::changesign(h_A[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (h_C[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} + diff --git a/test/cuda/copysign_double.cu b/test/cuda/copysign_double.cu new file mode 100644 index 000000000..acb8adeb2 --- /dev/null +++ b/test/cuda/copysign_double.cu @@ -0,0 +1,111 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::copysign(in[i], float_type(-1.0)); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector addition of " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr h_A(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr h_C(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + h_A[i] = rand()/(float_type)RAND_MAX; + switch(i % 55) + { + case 1: + h_A[i] = 0; + break; + case 2: + h_A[i] = std::numeric_limits::infinity(); + break; + case 3: + h_A[i] = -std::numeric_limits::infinity(); + break; + } + if(i % 1) + h_A[i] = -h_A[i]; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(h_A.get(), h_C.get(), numElements); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::copysign(h_A[i], float_type(-1.0))); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (h_C[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} + diff --git a/test/cuda/fpclassify_double.cu b/test/cuda/fpclassify_double.cu new file mode 100644 index 000000000..12c7e2990 --- /dev/null +++ b/test/cuda/fpclassify_double.cu @@ -0,0 +1,112 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, int *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::fpclassify(in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector addition of " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr h_A(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr h_C(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + h_A[i] = rand()/(float_type)RAND_MAX; + switch(i % 55) + { + case 1: + h_A[i] = 0; + break; + case 2: + h_A[i] = std::numeric_limits::infinity(); + break; + case 3: + h_A[i] = -std::numeric_limits::infinity(); + break; + case 4: + h_A[i] = std::numeric_limits::quiet_NaN(); + break; + } + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(h_A.get(), h_C.get(), numElements); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::fpclassify(h_A[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (h_C[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} + diff --git a/test/cuda/isfinite_double.cu b/test/cuda/isfinite_double.cu new file mode 100644 index 000000000..f5be9e0d4 --- /dev/null +++ b/test/cuda/isfinite_double.cu @@ -0,0 +1,112 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, bool *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::isfinite(in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector addition of " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr h_A(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr h_C(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + h_A[i] = rand()/(float_type)RAND_MAX; + switch(i % 55) + { + case 1: + h_A[i] = 0; + break; + case 2: + h_A[i] = std::numeric_limits::infinity(); + break; + case 3: + h_A[i] = -std::numeric_limits::infinity(); + break; + case 4: + h_A[i] = std::numeric_limits::quiet_NaN(); + break; + } + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(h_A.get(), h_C.get(), numElements); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::isfinite(h_A[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (h_C[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} + diff --git a/test/cuda/isinf_double.cu b/test/cuda/isinf_double.cu new file mode 100644 index 000000000..81f45b1ff --- /dev/null +++ b/test/cuda/isinf_double.cu @@ -0,0 +1,112 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, bool *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::isinf(in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector addition of " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr h_A(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr h_C(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + h_A[i] = rand()/(float_type)RAND_MAX; + switch(i % 55) + { + case 1: + h_A[i] = 0; + break; + case 2: + h_A[i] = std::numeric_limits::infinity(); + break; + case 3: + h_A[i] = -std::numeric_limits::infinity(); + break; + case 4: + h_A[i] = std::numeric_limits::quiet_NaN(); + break; + } + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(h_A.get(), h_C.get(), numElements); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::isinf(h_A[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (h_C[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} + diff --git a/test/cuda/isnan_double.cu b/test/cuda/isnan_double.cu new file mode 100644 index 000000000..7baa7480d --- /dev/null +++ b/test/cuda/isnan_double.cu @@ -0,0 +1,112 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, bool *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::isnan(in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector addition of " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr h_A(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr h_C(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + h_A[i] = rand()/(float_type)RAND_MAX; + switch(i % 55) + { + case 1: + h_A[i] = 0; + break; + case 2: + h_A[i] = std::numeric_limits::infinity(); + break; + case 3: + h_A[i] = -std::numeric_limits::infinity(); + break; + case 4: + h_A[i] = std::numeric_limits::quiet_NaN(); + break; + } + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(h_A.get(), h_C.get(), numElements); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::isnan(h_A[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (h_C[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} + diff --git a/test/cuda/isnormal_double.cu b/test/cuda/isnormal_double.cu new file mode 100644 index 000000000..7903a336d --- /dev/null +++ b/test/cuda/isnormal_double.cu @@ -0,0 +1,112 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, bool *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::isnormal(in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector addition of " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr h_A(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr h_C(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + h_A[i] = rand()/(float_type)RAND_MAX; + switch(i % 55) + { + case 1: + h_A[i] = 0; + break; + case 2: + h_A[i] = std::numeric_limits::infinity(); + break; + case 3: + h_A[i] = -std::numeric_limits::infinity(); + break; + case 4: + h_A[i] = std::numeric_limits::quiet_NaN(); + break; + } + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(h_A.get(), h_C.get(), numElements); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::isnormal(h_A[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (h_C[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} + diff --git a/test/cuda/modf_double.cu b/test/cuda/modf_double.cu new file mode 100644 index 000000000..9a8cff2aa --- /dev/null +++ b/test/cuda/modf_double.cu @@ -0,0 +1,102 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + float_type fract; + int i_part; + long l_part; + long long ll_part; + + if (i < numElements) + { + out[i] = boost::math::modf(in[i], &fract) + boost::math::modf(in[i], &i_part) + boost::math::modf(in[i], &l_part) + boost::math::modf(in[i], &ll_part); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector addition of " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr h_A(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr h_C(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + h_A[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(h_A.get(), h_C.get(), numElements); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + float_type fract; + for(int i = 0; i < numElements; ++i) + results.push_back(4 * boost::math::modf(h_A[i], &fract)); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(h_C[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} + diff --git a/test/cuda/round_double.cu b/test/cuda/round_double.cu new file mode 100644 index 000000000..1fe0ecea9 --- /dev/null +++ b/test/cuda/round_double.cu @@ -0,0 +1,96 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::round(in[i]) + boost::math::iround(in[i]) + boost::math::lround(in[i]) + boost::math::llround(in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector addition of " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr h_A(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr h_C(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + h_A[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(h_A.get(), h_C.get(), numElements); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(4 * boost::math::round(h_A[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(h_C[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} + diff --git a/test/cuda/sign_double.cu b/test/cuda/sign_double.cu new file mode 100644 index 000000000..64b78db57 --- /dev/null +++ b/test/cuda/sign_double.cu @@ -0,0 +1,114 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, int *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::sign(in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector addition of " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr h_A(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr h_C(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + h_A[i] = rand()/(float_type)RAND_MAX; + switch(i % 55) + { + case 1: + h_A[i] = 0; + break; + case 2: + h_A[i] = std::numeric_limits::infinity(); + break; + case 3: + h_A[i] = -std::numeric_limits::infinity(); + break; + case 4: + h_A[i] = std::numeric_limits::quiet_NaN(); + break; + } + if(i % 1) + h_A[i] = -h_A[i]; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(h_A.get(), h_C.get(), numElements); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::sign(h_A[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (h_C[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} + diff --git a/test/cuda/signbit_double.cu b/test/cuda/signbit_double.cu new file mode 100644 index 000000000..a704575a7 --- /dev/null +++ b/test/cuda/signbit_double.cu @@ -0,0 +1,114 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, int *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::signbit(in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector addition of " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr h_A(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr h_C(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + h_A[i] = rand()/(float_type)RAND_MAX; + switch(i % 55) + { + case 1: + h_A[i] = 0; + break; + case 2: + h_A[i] = std::numeric_limits::infinity(); + break; + case 3: + h_A[i] = -std::numeric_limits::infinity(); + break; + case 4: + h_A[i] = std::numeric_limits::quiet_NaN(); + break; + } + if(i % 1) + h_A[i] = -h_A[i]; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(h_A.get(), h_C.get(), numElements); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::signbit(h_A[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (h_C[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} + diff --git a/test/cuda/trunc_double.cu b/test/cuda/trunc_double.cu new file mode 100644 index 000000000..680175883 --- /dev/null +++ b/test/cuda/trunc_double.cu @@ -0,0 +1,96 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::trunc(in[i]) + boost::math::itrunc(in[i]) + boost::math::ltrunc(in[i]) + boost::math::lltrunc(in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector addition of " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr h_A(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr h_C(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + h_A[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(h_A.get(), h_C.get(), numElements); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(4 * boost::math::trunc(h_A[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(h_C[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} +