mirror of
https://github.com/boostorg/math.git
synced 2026-01-27 19:12:08 +00:00
Add fp-util support for CUDA.
This commit is contained in:
@@ -551,6 +551,16 @@ inline BOOST_GPU_ENABLED BOOST_MATH_CONSTEXPR long raise_rounding_error(
|
||||
{
|
||||
return val > 0 ? LONG_MAX : LONG_MIN;
|
||||
}
|
||||
template <class T>
|
||||
inline BOOST_GPU_ENABLED BOOST_MATH_CONSTEXPR long long raise_rounding_error(
|
||||
const char*,
|
||||
const char*,
|
||||
const T& val,
|
||||
const long long&,
|
||||
const ::boost::math::policies::rounding_error< ::boost::math::policies::ignore_error>&) BOOST_MATH_NOEXCEPT(T)
|
||||
{
|
||||
return val > 0 ? LLONG_MAX : LLONG_MIN;
|
||||
}
|
||||
#endif
|
||||
|
||||
template <class T, class TargetType>
|
||||
|
||||
@@ -39,40 +39,40 @@ namespace boost
|
||||
BOOST_GPU_ENABLED boost::long_long_type lltrunc(const T& v);
|
||||
#endif
|
||||
template <class T, class Policy>
|
||||
typename tools::promote_args<T>::type round(const T& v, const Policy& pol);
|
||||
BOOST_GPU_ENABLED typename tools::promote_args<T>::type round(const T& v, const Policy& pol);
|
||||
template <class T>
|
||||
typename tools::promote_args<T>::type round(const T& v);
|
||||
BOOST_GPU_ENABLED typename tools::promote_args<T>::type round(const T& v);
|
||||
template <class T, class Policy>
|
||||
int iround(const T& v, const Policy& pol);
|
||||
BOOST_GPU_ENABLED int iround(const T& v, const Policy& pol);
|
||||
template <class T>
|
||||
int iround(const T& v);
|
||||
BOOST_GPU_ENABLED int iround(const T& v);
|
||||
template <class T, class Policy>
|
||||
long lround(const T& v, const Policy& pol);
|
||||
BOOST_GPU_ENABLED long lround(const T& v, const Policy& pol);
|
||||
template <class T>
|
||||
long lround(const T& v);
|
||||
BOOST_GPU_ENABLED long lround(const T& v);
|
||||
#ifdef BOOST_HAS_LONG_LONG
|
||||
template <class T, class Policy>
|
||||
boost::long_long_type llround(const T& v, const Policy& pol);
|
||||
BOOST_GPU_ENABLED boost::long_long_type llround(const T& v, const Policy& pol);
|
||||
template <class T>
|
||||
boost::long_long_type llround(const T& v);
|
||||
BOOST_GPU_ENABLED boost::long_long_type llround(const T& v);
|
||||
#endif
|
||||
template <class T, class Policy>
|
||||
T modf(const T& v, T* ipart, const Policy& pol);
|
||||
BOOST_GPU_ENABLED T modf(const T& v, T* ipart, const Policy& pol);
|
||||
template <class T>
|
||||
T modf(const T& v, T* ipart);
|
||||
BOOST_GPU_ENABLED T modf(const T& v, T* ipart);
|
||||
template <class T, class Policy>
|
||||
T modf(const T& v, int* ipart, const Policy& pol);
|
||||
BOOST_GPU_ENABLED T modf(const T& v, int* ipart, const Policy& pol);
|
||||
template <class T>
|
||||
T modf(const T& v, int* ipart);
|
||||
BOOST_GPU_ENABLED T modf(const T& v, int* ipart);
|
||||
template <class T, class Policy>
|
||||
T modf(const T& v, long* ipart, const Policy& pol);
|
||||
BOOST_GPU_ENABLED T modf(const T& v, long* ipart, const Policy& pol);
|
||||
template <class T>
|
||||
T modf(const T& v, long* ipart);
|
||||
BOOST_GPU_ENABLED T modf(const T& v, long* ipart);
|
||||
#ifdef BOOST_HAS_LONG_LONG
|
||||
template <class T, class Policy>
|
||||
T modf(const T& v, boost::long_long_type* ipart, const Policy& pol);
|
||||
BOOST_GPU_ENABLED T modf(const T& v, boost::long_long_type* ipart, const Policy& pol);
|
||||
template <class T>
|
||||
T modf(const T& v, boost::long_long_type* ipart);
|
||||
BOOST_GPU_ENABLED T modf(const T& v, boost::long_long_type* ipart);
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
@@ -17,50 +17,50 @@
|
||||
namespace boost{ namespace math{
|
||||
|
||||
template <class T, class Policy>
|
||||
inline T modf(const T& v, T* ipart, const Policy& pol)
|
||||
inline BOOST_GPU_ENABLED T modf(const T& v, T* ipart, const Policy& pol)
|
||||
{
|
||||
*ipart = trunc(v, pol);
|
||||
return v - *ipart;
|
||||
}
|
||||
template <class T>
|
||||
inline T modf(const T& v, T* ipart)
|
||||
inline BOOST_GPU_ENABLED T modf(const T& v, T* ipart)
|
||||
{
|
||||
return modf(v, ipart, policies::policy<>());
|
||||
}
|
||||
|
||||
template <class T, class Policy>
|
||||
inline T modf(const T& v, int* ipart, const Policy& pol)
|
||||
inline BOOST_GPU_ENABLED T modf(const T& v, int* ipart, const Policy& pol)
|
||||
{
|
||||
*ipart = itrunc(v, pol);
|
||||
return v - *ipart;
|
||||
}
|
||||
template <class T>
|
||||
inline T modf(const T& v, int* ipart)
|
||||
inline BOOST_GPU_ENABLED T modf(const T& v, int* ipart)
|
||||
{
|
||||
return modf(v, ipart, policies::policy<>());
|
||||
}
|
||||
|
||||
template <class T, class Policy>
|
||||
inline T modf(const T& v, long* ipart, const Policy& pol)
|
||||
inline BOOST_GPU_ENABLED T modf(const T& v, long* ipart, const Policy& pol)
|
||||
{
|
||||
*ipart = ltrunc(v, pol);
|
||||
return v - *ipart;
|
||||
}
|
||||
template <class T>
|
||||
inline T modf(const T& v, long* ipart)
|
||||
inline BOOST_GPU_ENABLED T modf(const T& v, long* ipart)
|
||||
{
|
||||
return modf(v, ipart, policies::policy<>());
|
||||
}
|
||||
|
||||
#ifdef BOOST_HAS_LONG_LONG
|
||||
template <class T, class Policy>
|
||||
inline T modf(const T& v, boost::long_long_type* ipart, const Policy& pol)
|
||||
inline BOOST_GPU_ENABLED T modf(const T& v, boost::long_long_type* ipart, const Policy& pol)
|
||||
{
|
||||
*ipart = lltrunc(v, pol);
|
||||
return v - *ipart;
|
||||
}
|
||||
template <class T>
|
||||
inline T modf(const T& v, boost::long_long_type* ipart)
|
||||
inline BOOST_GPU_ENABLED T modf(const T& v, boost::long_long_type* ipart)
|
||||
{
|
||||
return modf(v, ipart, policies::policy<>());
|
||||
}
|
||||
|
||||
@@ -30,14 +30,14 @@ namespace boost{ namespace math{
|
||||
namespace detail{
|
||||
|
||||
template <class T>
|
||||
inline T get_smallest_value(mpl::true_ const&)
|
||||
inline BOOST_GPU_ENABLED T get_smallest_value(mpl::true_ const&)
|
||||
{
|
||||
//
|
||||
// numeric_limits lies about denorms being present - particularly
|
||||
// when this can be turned on or off at runtime, as is the case
|
||||
// when using the SSE2 registers in DAZ or FTZ mode.
|
||||
//
|
||||
static const T m = std::numeric_limits<T>::denorm_min();
|
||||
BOOST_MATH_GPU_STATIC const T m = std::numeric_limits<T>::denorm_min();
|
||||
#ifdef BOOST_MATH_CHECK_SSE2
|
||||
return (_mm_getcsr() & (_MM_FLUSH_ZERO_ON | 0x40)) ? tools::min_value<T>() : m;;
|
||||
#else
|
||||
@@ -46,15 +46,17 @@ inline T get_smallest_value(mpl::true_ const&)
|
||||
}
|
||||
|
||||
template <class T>
|
||||
inline T get_smallest_value(mpl::false_ const&)
|
||||
inline BOOST_GPU_ENABLED T get_smallest_value(mpl::false_ const&)
|
||||
{
|
||||
return tools::min_value<T>();
|
||||
}
|
||||
|
||||
template <class T>
|
||||
inline T get_smallest_value()
|
||||
inline BOOST_GPU_ENABLED T get_smallest_value()
|
||||
{
|
||||
#if defined(BOOST_MSVC) && (BOOST_MSVC <= 1310)
|
||||
#ifdef __CUDA_ARCH__
|
||||
return get_smallest_value<T>(mpl::bool_<false>());
|
||||
#elif defined(BOOST_MSVC) && (BOOST_MSVC <= 1310)
|
||||
return get_smallest_value<T>(mpl::bool_<std::numeric_limits<T>::is_specialized && (std::numeric_limits<T>::has_denorm == 1)>());
|
||||
#else
|
||||
return get_smallest_value<T>(mpl::bool_<std::numeric_limits<T>::is_specialized && (std::numeric_limits<T>::has_denorm == std::denorm_present)>());
|
||||
@@ -66,25 +68,25 @@ inline T get_smallest_value()
|
||||
// we calculate the value of the least-significant-bit:
|
||||
//
|
||||
template <class T>
|
||||
T get_min_shift_value();
|
||||
BOOST_GPU_ENABLED T get_min_shift_value();
|
||||
|
||||
template <class T>
|
||||
struct min_shift_initializer
|
||||
{
|
||||
struct init
|
||||
{
|
||||
init()
|
||||
BOOST_GPU_ENABLED init()
|
||||
{
|
||||
do_init();
|
||||
}
|
||||
static void do_init()
|
||||
static BOOST_GPU_ENABLED void do_init()
|
||||
{
|
||||
get_min_shift_value<T>();
|
||||
}
|
||||
void force_instantiate()const{}
|
||||
BOOST_GPU_ENABLED void force_instantiate()const{}
|
||||
};
|
||||
static const init initializer;
|
||||
static void force_instantiate()
|
||||
static BOOST_GPU_ENABLED void force_instantiate()
|
||||
{
|
||||
initializer.force_instantiate();
|
||||
}
|
||||
@@ -95,21 +97,21 @@ const typename min_shift_initializer<T>::init min_shift_initializer<T>::initiali
|
||||
|
||||
|
||||
template <class T>
|
||||
inline T get_min_shift_value()
|
||||
inline BOOST_GPU_ENABLED T get_min_shift_value()
|
||||
{
|
||||
BOOST_MATH_STD_USING
|
||||
static const T val = ldexp(tools::min_value<T>(), tools::digits<T>() + 1);
|
||||
BOOST_MATH_GPU_STATIC const T val = ldexp(tools::min_value<T>(), tools::digits<T>() + 1);
|
||||
min_shift_initializer<T>::force_instantiate();
|
||||
|
||||
return val;
|
||||
}
|
||||
|
||||
template <class T, class Policy>
|
||||
T float_next_imp(const T& val, const Policy& pol)
|
||||
BOOST_GPU_ENABLED T float_next_imp(const T& val, const Policy& pol)
|
||||
{
|
||||
BOOST_MATH_STD_USING
|
||||
int expon;
|
||||
static const char* function = "float_next<%1%>(%1%)";
|
||||
BOOST_MATH_GPU_STATIC const char* function = "float_next<%1%>(%1%)";
|
||||
|
||||
int fpclass = (boost::math::fpclassify)(val);
|
||||
|
||||
@@ -149,7 +151,7 @@ T float_next_imp(const T& val, const Policy& pol)
|
||||
}
|
||||
|
||||
template <class T, class Policy>
|
||||
inline typename tools::promote_args<T>::type float_next(const T& val, const Policy& pol)
|
||||
inline BOOST_GPU_ENABLED typename tools::promote_args<T>::type float_next(const T& val, const Policy& pol)
|
||||
{
|
||||
typedef typename tools::promote_args<T>::type result_type;
|
||||
return detail::float_next_imp(static_cast<result_type>(val), pol);
|
||||
@@ -164,7 +166,7 @@ inline typename tools::promote_args<T>::type float_next(const T& val, const Poli
|
||||
template <class Policy>
|
||||
inline double float_next(const double& val, const Policy& pol)
|
||||
{
|
||||
static const char* function = "float_next<%1%>(%1%)";
|
||||
BOOST_MATH_GPU_STATIC const char* function = "float_next<%1%>(%1%)";
|
||||
|
||||
if(!(boost::math::isfinite)(val) && (val > 0))
|
||||
return policies::raise_domain_error<double>(
|
||||
@@ -179,7 +181,7 @@ inline double float_next(const double& val, const Policy& pol)
|
||||
#endif
|
||||
|
||||
template <class T>
|
||||
inline typename tools::promote_args<T>::type float_next(const T& val)
|
||||
inline BOOST_GPU_ENABLED typename tools::promote_args<T>::type float_next(const T& val)
|
||||
{
|
||||
return float_next(val, policies::policy<>());
|
||||
}
|
||||
@@ -187,11 +189,11 @@ inline typename tools::promote_args<T>::type float_next(const T& val)
|
||||
namespace detail{
|
||||
|
||||
template <class T, class Policy>
|
||||
T float_prior_imp(const T& val, const Policy& pol)
|
||||
BOOST_GPU_ENABLED T float_prior_imp(const T& val, const Policy& pol)
|
||||
{
|
||||
BOOST_MATH_STD_USING
|
||||
int expon;
|
||||
static const char* function = "float_prior<%1%>(%1%)";
|
||||
BOOST_MATH_GPU_STATIC const char* function = "float_prior<%1%>(%1%)";
|
||||
|
||||
int fpclass = (boost::math::fpclassify)(val);
|
||||
|
||||
@@ -232,7 +234,7 @@ T float_prior_imp(const T& val, const Policy& pol)
|
||||
}
|
||||
|
||||
template <class T, class Policy>
|
||||
inline typename tools::promote_args<T>::type float_prior(const T& val, const Policy& pol)
|
||||
inline BOOST_GPU_ENABLED typename tools::promote_args<T>::type float_prior(const T& val, const Policy& pol)
|
||||
{
|
||||
typedef typename tools::promote_args<T>::type result_type;
|
||||
return detail::float_prior_imp(static_cast<result_type>(val), pol);
|
||||
@@ -247,7 +249,7 @@ inline typename tools::promote_args<T>::type float_prior(const T& val, const Pol
|
||||
template <class Policy>
|
||||
inline double float_prior(const double& val, const Policy& pol)
|
||||
{
|
||||
static const char* function = "float_prior<%1%>(%1%)";
|
||||
BOOST_MATH_GPU_STATIC const char* function = "float_prior<%1%>(%1%)";
|
||||
|
||||
if(!(boost::math::isfinite)(val) && (val < 0))
|
||||
return policies::raise_domain_error<double>(
|
||||
@@ -262,20 +264,20 @@ inline double float_prior(const double& val, const Policy& pol)
|
||||
#endif
|
||||
|
||||
template <class T>
|
||||
inline typename tools::promote_args<T>::type float_prior(const T& val)
|
||||
inline BOOST_GPU_ENABLED typename tools::promote_args<T>::type float_prior(const T& val)
|
||||
{
|
||||
return float_prior(val, policies::policy<>());
|
||||
}
|
||||
|
||||
template <class T, class U, class Policy>
|
||||
inline typename tools::promote_args<T, U>::type nextafter(const T& val, const U& direction, const Policy& pol)
|
||||
inline BOOST_GPU_ENABLED typename tools::promote_args<T, U>::type nextafter(const T& val, const U& direction, const Policy& pol)
|
||||
{
|
||||
typedef typename tools::promote_args<T, U>::type result_type;
|
||||
return val < direction ? boost::math::float_next<result_type>(val, pol) : val == direction ? val : boost::math::float_prior<result_type>(val, pol);
|
||||
}
|
||||
|
||||
template <class T, class U>
|
||||
inline typename tools::promote_args<T, U>::type nextafter(const T& val, const U& direction)
|
||||
inline BOOST_GPU_ENABLED typename tools::promote_args<T, U>::type nextafter(const T& val, const U& direction)
|
||||
{
|
||||
return nextafter(val, direction, policies::policy<>());
|
||||
}
|
||||
@@ -283,13 +285,13 @@ inline typename tools::promote_args<T, U>::type nextafter(const T& val, const U&
|
||||
namespace detail{
|
||||
|
||||
template <class T, class Policy>
|
||||
T float_distance_imp(const T& a, const T& b, const Policy& pol)
|
||||
BOOST_GPU_ENABLED T float_distance_imp(const T& a, const T& b, const Policy& pol)
|
||||
{
|
||||
BOOST_MATH_STD_USING
|
||||
//
|
||||
// Error handling:
|
||||
//
|
||||
static const char* function = "float_distance<%1%>(%1%, %1%)";
|
||||
BOOST_MATH_GPU_STATIC const char* function = "float_distance<%1%>(%1%, %1%)";
|
||||
if(!(boost::math::isfinite)(a))
|
||||
return policies::raise_domain_error<T>(
|
||||
function,
|
||||
@@ -384,14 +386,14 @@ T float_distance_imp(const T& a, const T& b, const Policy& pol)
|
||||
}
|
||||
|
||||
template <class T, class U, class Policy>
|
||||
inline typename tools::promote_args<T, U>::type float_distance(const T& a, const U& b, const Policy& pol)
|
||||
inline BOOST_GPU_ENABLED typename tools::promote_args<T, U>::type float_distance(const T& a, const U& b, const Policy& pol)
|
||||
{
|
||||
typedef typename tools::promote_args<T, U>::type result_type;
|
||||
return detail::float_distance_imp(static_cast<result_type>(a), static_cast<result_type>(b), pol);
|
||||
}
|
||||
|
||||
template <class T, class U>
|
||||
typename tools::promote_args<T, U>::type float_distance(const T& a, const U& b)
|
||||
inline BOOST_GPU_ENABLED typename tools::promote_args<T, U>::type float_distance(const T& a, const U& b)
|
||||
{
|
||||
return boost::math::float_distance(a, b, policies::policy<>());
|
||||
}
|
||||
@@ -399,13 +401,13 @@ typename tools::promote_args<T, U>::type float_distance(const T& a, const U& b)
|
||||
namespace detail{
|
||||
|
||||
template <class T, class Policy>
|
||||
T float_advance_imp(T val, int distance, const Policy& pol)
|
||||
BOOST_GPU_ENABLED T float_advance_imp(T val, int distance, const Policy& pol)
|
||||
{
|
||||
BOOST_MATH_STD_USING
|
||||
//
|
||||
// Error handling:
|
||||
//
|
||||
static const char* function = "float_advance<%1%>(%1%, int)";
|
||||
BOOST_MATH_GPU_STATIC const char* function = "float_advance<%1%>(%1%, int)";
|
||||
|
||||
int fpclass = (boost::math::fpclassify)(val);
|
||||
|
||||
@@ -482,14 +484,14 @@ T float_advance_imp(T val, int distance, const Policy& pol)
|
||||
}
|
||||
|
||||
template <class T, class Policy>
|
||||
inline typename tools::promote_args<T>::type float_advance(T val, int distance, const Policy& pol)
|
||||
inline BOOST_GPU_ENABLED typename tools::promote_args<T>::type float_advance(T val, int distance, const Policy& pol)
|
||||
{
|
||||
typedef typename tools::promote_args<T>::type result_type;
|
||||
return detail::float_advance_imp(static_cast<result_type>(val), distance, pol);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
inline typename tools::promote_args<T>::type float_advance(const T& val, int distance)
|
||||
inline BOOST_GPU_ENABLED typename tools::promote_args<T>::type float_advance(const T& val, int distance)
|
||||
{
|
||||
return boost::math::float_advance(val, distance, policies::policy<>());
|
||||
}
|
||||
|
||||
@@ -20,7 +20,7 @@ namespace boost{ namespace math{
|
||||
namespace detail{
|
||||
|
||||
template <class T, class Policy>
|
||||
inline typename tools::promote_args<T>::type round(const T& v, const Policy& pol, const mpl::false_)
|
||||
inline BOOST_GPU_ENABLED typename tools::promote_args<T>::type round(const T& v, const Policy& pol, const mpl::false_)
|
||||
{
|
||||
BOOST_MATH_STD_USING
|
||||
typedef typename tools::promote_args<T>::type result_type;
|
||||
@@ -52,7 +52,7 @@ inline typename tools::promote_args<T>::type round(const T& v, const Policy& pol
|
||||
}
|
||||
}
|
||||
template <class T, class Policy>
|
||||
inline typename tools::promote_args<T>::type round(const T& v, const Policy&, const mpl::true_)
|
||||
inline BOOST_GPU_ENABLED typename tools::promote_args<T>::type round(const T& v, const Policy&, const mpl::true_)
|
||||
{
|
||||
return v;
|
||||
}
|
||||
@@ -60,12 +60,12 @@ inline typename tools::promote_args<T>::type round(const T& v, const Policy&, co
|
||||
} // namespace detail
|
||||
|
||||
template <class T, class Policy>
|
||||
inline typename tools::promote_args<T>::type round(const T& v, const Policy& pol)
|
||||
inline BOOST_GPU_ENABLED typename tools::promote_args<T>::type round(const T& v, const Policy& pol)
|
||||
{
|
||||
return detail::round(v, pol, mpl::bool_<detail::is_integer_for_rounding<T>::value>());
|
||||
}
|
||||
template <class T>
|
||||
inline typename tools::promote_args<T>::type round(const T& v)
|
||||
inline BOOST_GPU_ENABLED typename tools::promote_args<T>::type round(const T& v)
|
||||
{
|
||||
return round(v, policies::policy<>());
|
||||
}
|
||||
@@ -79,31 +79,39 @@ inline typename tools::promote_args<T>::type round(const T& v)
|
||||
// dependent lookup. See our concept archetypes for examples.
|
||||
//
|
||||
template <class T, class Policy>
|
||||
inline int iround(const T& v, const Policy& pol)
|
||||
inline BOOST_GPU_ENABLED int iround(const T& v, const Policy& pol)
|
||||
{
|
||||
BOOST_MATH_STD_USING
|
||||
T r = boost::math::round(v, pol);
|
||||
#ifdef __CUDA_ARCH__
|
||||
if((r > INT_MAX) || (r < INT_MIN))
|
||||
#else
|
||||
if((r > (std::numeric_limits<int>::max)()) || (r < (std::numeric_limits<int>::min)()))
|
||||
#endif
|
||||
return static_cast<int>(policies::raise_rounding_error("boost::math::iround<%1%>(%1%)", 0, v, 0, pol));
|
||||
return static_cast<int>(r);
|
||||
}
|
||||
template <class T>
|
||||
inline int iround(const T& v)
|
||||
inline BOOST_GPU_ENABLED int iround(const T& v)
|
||||
{
|
||||
return iround(v, policies::policy<>());
|
||||
}
|
||||
|
||||
template <class T, class Policy>
|
||||
inline long lround(const T& v, const Policy& pol)
|
||||
inline BOOST_GPU_ENABLED long lround(const T& v, const Policy& pol)
|
||||
{
|
||||
BOOST_MATH_STD_USING
|
||||
T r = boost::math::round(v, pol);
|
||||
#ifdef __CUDA_ARCH__
|
||||
if((r > LONG_MAX) || (r < LONG_MIN))
|
||||
#else
|
||||
if((r > (std::numeric_limits<long>::max)()) || (r < (std::numeric_limits<long>::min)()))
|
||||
#endif
|
||||
return static_cast<long int>(policies::raise_rounding_error("boost::math::lround<%1%>(%1%)", 0, v, 0L, pol));
|
||||
return static_cast<long int>(r);
|
||||
}
|
||||
template <class T>
|
||||
inline long lround(const T& v)
|
||||
inline BOOST_GPU_ENABLED long lround(const T& v)
|
||||
{
|
||||
return lround(v, policies::policy<>());
|
||||
}
|
||||
@@ -111,16 +119,20 @@ inline long lround(const T& v)
|
||||
#ifdef BOOST_HAS_LONG_LONG
|
||||
|
||||
template <class T, class Policy>
|
||||
inline boost::long_long_type llround(const T& v, const Policy& pol)
|
||||
inline BOOST_GPU_ENABLED boost::long_long_type llround(const T& v, const Policy& pol)
|
||||
{
|
||||
BOOST_MATH_STD_USING
|
||||
T r = boost::math::round(v, pol);
|
||||
#ifdef __CUDA_ARCH__
|
||||
if((r > LLONG_MAX) || (r < LLONG_MIN))
|
||||
#else
|
||||
if((r > (std::numeric_limits<boost::long_long_type>::max)()) || (r < (std::numeric_limits<boost::long_long_type>::min)()))
|
||||
#endif
|
||||
return static_cast<boost::long_long_type>(policies::raise_rounding_error("boost::math::llround<%1%>(%1%)", 0, v, static_cast<boost::long_long_type>(0), pol));
|
||||
return static_cast<boost::long_long_type>(r);
|
||||
}
|
||||
template <class T>
|
||||
inline boost::long_long_type llround(const T& v)
|
||||
inline BOOST_GPU_ENABLED boost::long_long_type llround(const T& v)
|
||||
{
|
||||
return llround(v, policies::policy<>());
|
||||
}
|
||||
|
||||
@@ -81,7 +81,7 @@ inline BOOST_GPU_ENABLED long ltrunc(const T& v, const Policy& pol)
|
||||
typedef typename tools::promote_args<T>::type result_type;
|
||||
result_type r = boost::math::trunc(v, pol);
|
||||
#ifdef __CUDA_ARCH__
|
||||
if((r > LONG_MAX) || (r < LONG_LIN))
|
||||
if((r > LONG_MAX) || (r < LONG_MIN))
|
||||
#else
|
||||
if((r > (std::numeric_limits<long>::max)()) || (r < (std::numeric_limits<long>::min)()))
|
||||
#endif
|
||||
@@ -102,7 +102,11 @@ inline BOOST_GPU_ENABLED boost::long_long_type lltrunc(const T& v, const Policy&
|
||||
BOOST_MATH_STD_USING
|
||||
typedef typename tools::promote_args<T>::type result_type;
|
||||
result_type r = boost::math::trunc(v, pol);
|
||||
#ifdef __CUDA_ARCH__
|
||||
if((r > LLONG_MAX) || (r < LLONG_MIN))
|
||||
#else
|
||||
if((r > (std::numeric_limits<boost::long_long_type>::max)()) || (r < (std::numeric_limits<boost::long_long_type>::min)()))
|
||||
#endif
|
||||
return static_cast<boost::long_long_type>(policies::raise_rounding_error("boost::math::lltrunc<%1%>(%1%)", 0, v, static_cast<boost::long_long_type>(0), pol));
|
||||
return static_cast<boost::long_long_type>(r);
|
||||
}
|
||||
|
||||
111
test/cuda/changesign_double.cu
Normal file
111
test/cuda/changesign_double.cu
Normal file
@@ -0,0 +1,111 @@
|
||||
// Copyright John Maddock 2016.
|
||||
// Use, modification and distribution are subject to the
|
||||
// Boost Software License, Version 1.0. (See accompanying file
|
||||
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <vector>
|
||||
#include <boost/math/special_functions/sign.hpp>
|
||||
#include <boost/math/special_functions/relative_difference.hpp>
|
||||
#include "cuda_managed_ptr.hpp"
|
||||
#include "stopwatch.hpp"
|
||||
|
||||
// For the CUDA runtime routines (prefixed with "cuda_")
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
typedef double float_type;
|
||||
|
||||
/**
|
||||
* CUDA Kernel Device code
|
||||
*
|
||||
*/
|
||||
__global__ void cuda_test(const float_type *in, float_type *out, int numElements)
|
||||
{
|
||||
using std::cos;
|
||||
int i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
|
||||
if (i < numElements)
|
||||
{
|
||||
out[i] = boost::math::changesign(in[i]);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Host main routine
|
||||
*/
|
||||
int main(void)
|
||||
{
|
||||
// Error code to check return values for CUDA calls
|
||||
cudaError_t err = cudaSuccess;
|
||||
|
||||
// Print the vector length to be used, and compute its size
|
||||
int numElements = 50000;
|
||||
std::cout << "[Vector addition of " << numElements << " elements]" << std::endl;
|
||||
|
||||
// Allocate the managed input vector A
|
||||
cuda_managed_ptr<float_type> h_A(numElements);
|
||||
|
||||
// Allocate the managed output vector C
|
||||
cuda_managed_ptr<float_type> h_C(numElements);
|
||||
|
||||
// Initialize the input vectors
|
||||
for (int i = 0; i < numElements; ++i)
|
||||
{
|
||||
h_A[i] = rand()/(float_type)RAND_MAX;
|
||||
switch(i % 55)
|
||||
{
|
||||
case 1:
|
||||
h_A[i] = 0;
|
||||
break;
|
||||
case 2:
|
||||
h_A[i] = std::numeric_limits<float_type>::infinity();
|
||||
break;
|
||||
case 3:
|
||||
h_A[i] = -std::numeric_limits<float_type>::infinity();
|
||||
break;
|
||||
}
|
||||
if(i % 1)
|
||||
h_A[i] = -h_A[i];
|
||||
}
|
||||
|
||||
// Launch the Vector Add CUDA Kernel
|
||||
int threadsPerBlock = 1024;
|
||||
int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
|
||||
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
|
||||
|
||||
watch w;
|
||||
cuda_test<<<blocksPerGrid, threadsPerBlock>>>(h_A.get(), h_C.get(), numElements);
|
||||
std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
|
||||
|
||||
err = cudaGetLastError();
|
||||
|
||||
if (err != cudaSuccess)
|
||||
{
|
||||
std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
// Verify that the result vector is correct
|
||||
std::vector<float_type> results;
|
||||
results.reserve(numElements);
|
||||
w.reset();
|
||||
for(int i = 0; i < numElements; ++i)
|
||||
results.push_back(boost::math::changesign(h_A[i]));
|
||||
double t = w.elapsed();
|
||||
// check the results
|
||||
for(int i = 0; i < numElements; ++i)
|
||||
{
|
||||
if (h_C[i] != results[i])
|
||||
{
|
||||
std::cerr << "Result verification failed at element " << i << "!" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
|
||||
std::cout << "Done\n";
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
111
test/cuda/copysign_double.cu
Normal file
111
test/cuda/copysign_double.cu
Normal file
@@ -0,0 +1,111 @@
|
||||
// Copyright John Maddock 2016.
|
||||
// Use, modification and distribution are subject to the
|
||||
// Boost Software License, Version 1.0. (See accompanying file
|
||||
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <vector>
|
||||
#include <boost/math/special_functions/sign.hpp>
|
||||
#include <boost/math/special_functions/relative_difference.hpp>
|
||||
#include "cuda_managed_ptr.hpp"
|
||||
#include "stopwatch.hpp"
|
||||
|
||||
// For the CUDA runtime routines (prefixed with "cuda_")
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
typedef double float_type;
|
||||
|
||||
/**
|
||||
* CUDA Kernel Device code
|
||||
*
|
||||
*/
|
||||
__global__ void cuda_test(const float_type *in, float_type *out, int numElements)
|
||||
{
|
||||
using std::cos;
|
||||
int i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
|
||||
if (i < numElements)
|
||||
{
|
||||
out[i] = boost::math::copysign(in[i], float_type(-1.0));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Host main routine
|
||||
*/
|
||||
int main(void)
|
||||
{
|
||||
// Error code to check return values for CUDA calls
|
||||
cudaError_t err = cudaSuccess;
|
||||
|
||||
// Print the vector length to be used, and compute its size
|
||||
int numElements = 50000;
|
||||
std::cout << "[Vector addition of " << numElements << " elements]" << std::endl;
|
||||
|
||||
// Allocate the managed input vector A
|
||||
cuda_managed_ptr<float_type> h_A(numElements);
|
||||
|
||||
// Allocate the managed output vector C
|
||||
cuda_managed_ptr<float_type> h_C(numElements);
|
||||
|
||||
// Initialize the input vectors
|
||||
for (int i = 0; i < numElements; ++i)
|
||||
{
|
||||
h_A[i] = rand()/(float_type)RAND_MAX;
|
||||
switch(i % 55)
|
||||
{
|
||||
case 1:
|
||||
h_A[i] = 0;
|
||||
break;
|
||||
case 2:
|
||||
h_A[i] = std::numeric_limits<float_type>::infinity();
|
||||
break;
|
||||
case 3:
|
||||
h_A[i] = -std::numeric_limits<float_type>::infinity();
|
||||
break;
|
||||
}
|
||||
if(i % 1)
|
||||
h_A[i] = -h_A[i];
|
||||
}
|
||||
|
||||
// Launch the Vector Add CUDA Kernel
|
||||
int threadsPerBlock = 1024;
|
||||
int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
|
||||
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
|
||||
|
||||
watch w;
|
||||
cuda_test<<<blocksPerGrid, threadsPerBlock>>>(h_A.get(), h_C.get(), numElements);
|
||||
std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
|
||||
|
||||
err = cudaGetLastError();
|
||||
|
||||
if (err != cudaSuccess)
|
||||
{
|
||||
std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
// Verify that the result vector is correct
|
||||
std::vector<float_type> results;
|
||||
results.reserve(numElements);
|
||||
w.reset();
|
||||
for(int i = 0; i < numElements; ++i)
|
||||
results.push_back(boost::math::copysign(h_A[i], float_type(-1.0)));
|
||||
double t = w.elapsed();
|
||||
// check the results
|
||||
for(int i = 0; i < numElements; ++i)
|
||||
{
|
||||
if (h_C[i] != results[i])
|
||||
{
|
||||
std::cerr << "Result verification failed at element " << i << "!" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
|
||||
std::cout << "Done\n";
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
112
test/cuda/fpclassify_double.cu
Normal file
112
test/cuda/fpclassify_double.cu
Normal file
@@ -0,0 +1,112 @@
|
||||
// Copyright John Maddock 2016.
|
||||
// Use, modification and distribution are subject to the
|
||||
// Boost Software License, Version 1.0. (See accompanying file
|
||||
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <vector>
|
||||
#include <boost/math/special_functions/fpclassify.hpp>
|
||||
#include <boost/math/special_functions/relative_difference.hpp>
|
||||
#include "cuda_managed_ptr.hpp"
|
||||
#include "stopwatch.hpp"
|
||||
|
||||
// For the CUDA runtime routines (prefixed with "cuda_")
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
typedef double float_type;
|
||||
|
||||
/**
|
||||
* CUDA Kernel Device code
|
||||
*
|
||||
*/
|
||||
__global__ void cuda_test(const float_type *in, int *out, int numElements)
|
||||
{
|
||||
using std::cos;
|
||||
int i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
|
||||
if (i < numElements)
|
||||
{
|
||||
out[i] = boost::math::fpclassify(in[i]);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Host main routine
|
||||
*/
|
||||
int main(void)
|
||||
{
|
||||
// Error code to check return values for CUDA calls
|
||||
cudaError_t err = cudaSuccess;
|
||||
|
||||
// Print the vector length to be used, and compute its size
|
||||
int numElements = 50000;
|
||||
std::cout << "[Vector addition of " << numElements << " elements]" << std::endl;
|
||||
|
||||
// Allocate the managed input vector A
|
||||
cuda_managed_ptr<float_type> h_A(numElements);
|
||||
|
||||
// Allocate the managed output vector C
|
||||
cuda_managed_ptr<int> h_C(numElements);
|
||||
|
||||
// Initialize the input vectors
|
||||
for (int i = 0; i < numElements; ++i)
|
||||
{
|
||||
h_A[i] = rand()/(float_type)RAND_MAX;
|
||||
switch(i % 55)
|
||||
{
|
||||
case 1:
|
||||
h_A[i] = 0;
|
||||
break;
|
||||
case 2:
|
||||
h_A[i] = std::numeric_limits<float_type>::infinity();
|
||||
break;
|
||||
case 3:
|
||||
h_A[i] = -std::numeric_limits<float_type>::infinity();
|
||||
break;
|
||||
case 4:
|
||||
h_A[i] = std::numeric_limits<float_type>::quiet_NaN();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Launch the Vector Add CUDA Kernel
|
||||
int threadsPerBlock = 1024;
|
||||
int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
|
||||
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
|
||||
|
||||
watch w;
|
||||
cuda_test<<<blocksPerGrid, threadsPerBlock>>>(h_A.get(), h_C.get(), numElements);
|
||||
std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
|
||||
|
||||
err = cudaGetLastError();
|
||||
|
||||
if (err != cudaSuccess)
|
||||
{
|
||||
std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
// Verify that the result vector is correct
|
||||
std::vector<int> results;
|
||||
results.reserve(numElements);
|
||||
w.reset();
|
||||
for(int i = 0; i < numElements; ++i)
|
||||
results.push_back(boost::math::fpclassify(h_A[i]));
|
||||
double t = w.elapsed();
|
||||
// check the results
|
||||
for(int i = 0; i < numElements; ++i)
|
||||
{
|
||||
if (h_C[i] != results[i])
|
||||
{
|
||||
std::cerr << "Result verification failed at element " << i << "!" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
|
||||
std::cout << "Done\n";
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
112
test/cuda/isfinite_double.cu
Normal file
112
test/cuda/isfinite_double.cu
Normal file
@@ -0,0 +1,112 @@
|
||||
// Copyright John Maddock 2016.
|
||||
// Use, modification and distribution are subject to the
|
||||
// Boost Software License, Version 1.0. (See accompanying file
|
||||
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <vector>
|
||||
#include <boost/math/special_functions/fpclassify.hpp>
|
||||
#include <boost/math/special_functions/relative_difference.hpp>
|
||||
#include "cuda_managed_ptr.hpp"
|
||||
#include "stopwatch.hpp"
|
||||
|
||||
// For the CUDA runtime routines (prefixed with "cuda_")
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
typedef double float_type;
|
||||
|
||||
/**
|
||||
* CUDA Kernel Device code
|
||||
*
|
||||
*/
|
||||
__global__ void cuda_test(const float_type *in, bool *out, int numElements)
|
||||
{
|
||||
using std::cos;
|
||||
int i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
|
||||
if (i < numElements)
|
||||
{
|
||||
out[i] = boost::math::isfinite(in[i]);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Host main routine
|
||||
*/
|
||||
int main(void)
|
||||
{
|
||||
// Error code to check return values for CUDA calls
|
||||
cudaError_t err = cudaSuccess;
|
||||
|
||||
// Print the vector length to be used, and compute its size
|
||||
int numElements = 50000;
|
||||
std::cout << "[Vector addition of " << numElements << " elements]" << std::endl;
|
||||
|
||||
// Allocate the managed input vector A
|
||||
cuda_managed_ptr<float_type> h_A(numElements);
|
||||
|
||||
// Allocate the managed output vector C
|
||||
cuda_managed_ptr<bool> h_C(numElements);
|
||||
|
||||
// Initialize the input vectors
|
||||
for (int i = 0; i < numElements; ++i)
|
||||
{
|
||||
h_A[i] = rand()/(float_type)RAND_MAX;
|
||||
switch(i % 55)
|
||||
{
|
||||
case 1:
|
||||
h_A[i] = 0;
|
||||
break;
|
||||
case 2:
|
||||
h_A[i] = std::numeric_limits<float_type>::infinity();
|
||||
break;
|
||||
case 3:
|
||||
h_A[i] = -std::numeric_limits<float_type>::infinity();
|
||||
break;
|
||||
case 4:
|
||||
h_A[i] = std::numeric_limits<float_type>::quiet_NaN();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Launch the Vector Add CUDA Kernel
|
||||
int threadsPerBlock = 1024;
|
||||
int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
|
||||
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
|
||||
|
||||
watch w;
|
||||
cuda_test<<<blocksPerGrid, threadsPerBlock>>>(h_A.get(), h_C.get(), numElements);
|
||||
std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
|
||||
|
||||
err = cudaGetLastError();
|
||||
|
||||
if (err != cudaSuccess)
|
||||
{
|
||||
std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
// Verify that the result vector is correct
|
||||
std::vector<bool> results;
|
||||
results.reserve(numElements);
|
||||
w.reset();
|
||||
for(int i = 0; i < numElements; ++i)
|
||||
results.push_back(boost::math::isfinite(h_A[i]));
|
||||
double t = w.elapsed();
|
||||
// check the results
|
||||
for(int i = 0; i < numElements; ++i)
|
||||
{
|
||||
if (h_C[i] != results[i])
|
||||
{
|
||||
std::cerr << "Result verification failed at element " << i << "!" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
|
||||
std::cout << "Done\n";
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
112
test/cuda/isinf_double.cu
Normal file
112
test/cuda/isinf_double.cu
Normal file
@@ -0,0 +1,112 @@
|
||||
// Copyright John Maddock 2016.
|
||||
// Use, modification and distribution are subject to the
|
||||
// Boost Software License, Version 1.0. (See accompanying file
|
||||
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <vector>
|
||||
#include <boost/math/special_functions/fpclassify.hpp>
|
||||
#include <boost/math/special_functions/relative_difference.hpp>
|
||||
#include "cuda_managed_ptr.hpp"
|
||||
#include "stopwatch.hpp"
|
||||
|
||||
// For the CUDA runtime routines (prefixed with "cuda_")
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
typedef double float_type;
|
||||
|
||||
/**
|
||||
* CUDA Kernel Device code
|
||||
*
|
||||
*/
|
||||
__global__ void cuda_test(const float_type *in, bool *out, int numElements)
|
||||
{
|
||||
using std::cos;
|
||||
int i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
|
||||
if (i < numElements)
|
||||
{
|
||||
out[i] = boost::math::isinf(in[i]);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Host main routine
|
||||
*/
|
||||
int main(void)
|
||||
{
|
||||
// Error code to check return values for CUDA calls
|
||||
cudaError_t err = cudaSuccess;
|
||||
|
||||
// Print the vector length to be used, and compute its size
|
||||
int numElements = 50000;
|
||||
std::cout << "[Vector addition of " << numElements << " elements]" << std::endl;
|
||||
|
||||
// Allocate the managed input vector A
|
||||
cuda_managed_ptr<float_type> h_A(numElements);
|
||||
|
||||
// Allocate the managed output vector C
|
||||
cuda_managed_ptr<bool> h_C(numElements);
|
||||
|
||||
// Initialize the input vectors
|
||||
for (int i = 0; i < numElements; ++i)
|
||||
{
|
||||
h_A[i] = rand()/(float_type)RAND_MAX;
|
||||
switch(i % 55)
|
||||
{
|
||||
case 1:
|
||||
h_A[i] = 0;
|
||||
break;
|
||||
case 2:
|
||||
h_A[i] = std::numeric_limits<float_type>::infinity();
|
||||
break;
|
||||
case 3:
|
||||
h_A[i] = -std::numeric_limits<float_type>::infinity();
|
||||
break;
|
||||
case 4:
|
||||
h_A[i] = std::numeric_limits<float_type>::quiet_NaN();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Launch the Vector Add CUDA Kernel
|
||||
int threadsPerBlock = 1024;
|
||||
int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
|
||||
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
|
||||
|
||||
watch w;
|
||||
cuda_test<<<blocksPerGrid, threadsPerBlock>>>(h_A.get(), h_C.get(), numElements);
|
||||
std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
|
||||
|
||||
err = cudaGetLastError();
|
||||
|
||||
if (err != cudaSuccess)
|
||||
{
|
||||
std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
// Verify that the result vector is correct
|
||||
std::vector<bool> results;
|
||||
results.reserve(numElements);
|
||||
w.reset();
|
||||
for(int i = 0; i < numElements; ++i)
|
||||
results.push_back(boost::math::isinf(h_A[i]));
|
||||
double t = w.elapsed();
|
||||
// check the results
|
||||
for(int i = 0; i < numElements; ++i)
|
||||
{
|
||||
if (h_C[i] != results[i])
|
||||
{
|
||||
std::cerr << "Result verification failed at element " << i << "!" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
|
||||
std::cout << "Done\n";
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
112
test/cuda/isnan_double.cu
Normal file
112
test/cuda/isnan_double.cu
Normal file
@@ -0,0 +1,112 @@
|
||||
// Copyright John Maddock 2016.
|
||||
// Use, modification and distribution are subject to the
|
||||
// Boost Software License, Version 1.0. (See accompanying file
|
||||
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <vector>
|
||||
#include <boost/math/special_functions/fpclassify.hpp>
|
||||
#include <boost/math/special_functions/relative_difference.hpp>
|
||||
#include "cuda_managed_ptr.hpp"
|
||||
#include "stopwatch.hpp"
|
||||
|
||||
// For the CUDA runtime routines (prefixed with "cuda_")
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
typedef double float_type;
|
||||
|
||||
/**
|
||||
* CUDA Kernel Device code
|
||||
*
|
||||
*/
|
||||
__global__ void cuda_test(const float_type *in, bool *out, int numElements)
|
||||
{
|
||||
using std::cos;
|
||||
int i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
|
||||
if (i < numElements)
|
||||
{
|
||||
out[i] = boost::math::isnan(in[i]);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Host main routine
|
||||
*/
|
||||
int main(void)
|
||||
{
|
||||
// Error code to check return values for CUDA calls
|
||||
cudaError_t err = cudaSuccess;
|
||||
|
||||
// Print the vector length to be used, and compute its size
|
||||
int numElements = 50000;
|
||||
std::cout << "[Vector addition of " << numElements << " elements]" << std::endl;
|
||||
|
||||
// Allocate the managed input vector A
|
||||
cuda_managed_ptr<float_type> h_A(numElements);
|
||||
|
||||
// Allocate the managed output vector C
|
||||
cuda_managed_ptr<bool> h_C(numElements);
|
||||
|
||||
// Initialize the input vectors
|
||||
for (int i = 0; i < numElements; ++i)
|
||||
{
|
||||
h_A[i] = rand()/(float_type)RAND_MAX;
|
||||
switch(i % 55)
|
||||
{
|
||||
case 1:
|
||||
h_A[i] = 0;
|
||||
break;
|
||||
case 2:
|
||||
h_A[i] = std::numeric_limits<float_type>::infinity();
|
||||
break;
|
||||
case 3:
|
||||
h_A[i] = -std::numeric_limits<float_type>::infinity();
|
||||
break;
|
||||
case 4:
|
||||
h_A[i] = std::numeric_limits<float_type>::quiet_NaN();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Launch the Vector Add CUDA Kernel
|
||||
int threadsPerBlock = 1024;
|
||||
int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
|
||||
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
|
||||
|
||||
watch w;
|
||||
cuda_test<<<blocksPerGrid, threadsPerBlock>>>(h_A.get(), h_C.get(), numElements);
|
||||
std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
|
||||
|
||||
err = cudaGetLastError();
|
||||
|
||||
if (err != cudaSuccess)
|
||||
{
|
||||
std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
// Verify that the result vector is correct
|
||||
std::vector<bool> results;
|
||||
results.reserve(numElements);
|
||||
w.reset();
|
||||
for(int i = 0; i < numElements; ++i)
|
||||
results.push_back(boost::math::isnan(h_A[i]));
|
||||
double t = w.elapsed();
|
||||
// check the results
|
||||
for(int i = 0; i < numElements; ++i)
|
||||
{
|
||||
if (h_C[i] != results[i])
|
||||
{
|
||||
std::cerr << "Result verification failed at element " << i << "!" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
|
||||
std::cout << "Done\n";
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
112
test/cuda/isnormal_double.cu
Normal file
112
test/cuda/isnormal_double.cu
Normal file
@@ -0,0 +1,112 @@
|
||||
// Copyright John Maddock 2016.
|
||||
// Use, modification and distribution are subject to the
|
||||
// Boost Software License, Version 1.0. (See accompanying file
|
||||
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <vector>
|
||||
#include <boost/math/special_functions/fpclassify.hpp>
|
||||
#include <boost/math/special_functions/relative_difference.hpp>
|
||||
#include "cuda_managed_ptr.hpp"
|
||||
#include "stopwatch.hpp"
|
||||
|
||||
// For the CUDA runtime routines (prefixed with "cuda_")
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
typedef double float_type;
|
||||
|
||||
/**
|
||||
* CUDA Kernel Device code
|
||||
*
|
||||
*/
|
||||
__global__ void cuda_test(const float_type *in, bool *out, int numElements)
|
||||
{
|
||||
using std::cos;
|
||||
int i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
|
||||
if (i < numElements)
|
||||
{
|
||||
out[i] = boost::math::isnormal(in[i]);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Host main routine
|
||||
*/
|
||||
int main(void)
|
||||
{
|
||||
// Error code to check return values for CUDA calls
|
||||
cudaError_t err = cudaSuccess;
|
||||
|
||||
// Print the vector length to be used, and compute its size
|
||||
int numElements = 50000;
|
||||
std::cout << "[Vector addition of " << numElements << " elements]" << std::endl;
|
||||
|
||||
// Allocate the managed input vector A
|
||||
cuda_managed_ptr<float_type> h_A(numElements);
|
||||
|
||||
// Allocate the managed output vector C
|
||||
cuda_managed_ptr<bool> h_C(numElements);
|
||||
|
||||
// Initialize the input vectors
|
||||
for (int i = 0; i < numElements; ++i)
|
||||
{
|
||||
h_A[i] = rand()/(float_type)RAND_MAX;
|
||||
switch(i % 55)
|
||||
{
|
||||
case 1:
|
||||
h_A[i] = 0;
|
||||
break;
|
||||
case 2:
|
||||
h_A[i] = std::numeric_limits<float_type>::infinity();
|
||||
break;
|
||||
case 3:
|
||||
h_A[i] = -std::numeric_limits<float_type>::infinity();
|
||||
break;
|
||||
case 4:
|
||||
h_A[i] = std::numeric_limits<float_type>::quiet_NaN();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Launch the Vector Add CUDA Kernel
|
||||
int threadsPerBlock = 1024;
|
||||
int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
|
||||
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
|
||||
|
||||
watch w;
|
||||
cuda_test<<<blocksPerGrid, threadsPerBlock>>>(h_A.get(), h_C.get(), numElements);
|
||||
std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
|
||||
|
||||
err = cudaGetLastError();
|
||||
|
||||
if (err != cudaSuccess)
|
||||
{
|
||||
std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
// Verify that the result vector is correct
|
||||
std::vector<bool> results;
|
||||
results.reserve(numElements);
|
||||
w.reset();
|
||||
for(int i = 0; i < numElements; ++i)
|
||||
results.push_back(boost::math::isnormal(h_A[i]));
|
||||
double t = w.elapsed();
|
||||
// check the results
|
||||
for(int i = 0; i < numElements; ++i)
|
||||
{
|
||||
if (h_C[i] != results[i])
|
||||
{
|
||||
std::cerr << "Result verification failed at element " << i << "!" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
|
||||
std::cout << "Done\n";
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
102
test/cuda/modf_double.cu
Normal file
102
test/cuda/modf_double.cu
Normal file
@@ -0,0 +1,102 @@
|
||||
// Copyright John Maddock 2016.
|
||||
// Use, modification and distribution are subject to the
|
||||
// Boost Software License, Version 1.0. (See accompanying file
|
||||
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <boost/math/special_functions/modf.hpp>
|
||||
#include <boost/math/special_functions/relative_difference.hpp>
|
||||
#include "cuda_managed_ptr.hpp"
|
||||
#include "stopwatch.hpp"
|
||||
|
||||
// For the CUDA runtime routines (prefixed with "cuda_")
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
typedef double float_type;
|
||||
|
||||
/**
|
||||
* CUDA Kernel Device code
|
||||
*
|
||||
*/
|
||||
__global__ void cuda_test(const float_type *in, float_type *out, int numElements)
|
||||
{
|
||||
using std::cos;
|
||||
int i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
|
||||
float_type fract;
|
||||
int i_part;
|
||||
long l_part;
|
||||
long long ll_part;
|
||||
|
||||
if (i < numElements)
|
||||
{
|
||||
out[i] = boost::math::modf(in[i], &fract) + boost::math::modf(in[i], &i_part) + boost::math::modf(in[i], &l_part) + boost::math::modf(in[i], &ll_part);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Host main routine
|
||||
*/
|
||||
int main(void)
|
||||
{
|
||||
// Error code to check return values for CUDA calls
|
||||
cudaError_t err = cudaSuccess;
|
||||
|
||||
// Print the vector length to be used, and compute its size
|
||||
int numElements = 50000;
|
||||
std::cout << "[Vector addition of " << numElements << " elements]" << std::endl;
|
||||
|
||||
// Allocate the managed input vector A
|
||||
cuda_managed_ptr<float_type> h_A(numElements);
|
||||
|
||||
// Allocate the managed output vector C
|
||||
cuda_managed_ptr<float_type> h_C(numElements);
|
||||
|
||||
// Initialize the input vectors
|
||||
for (int i = 0; i < numElements; ++i)
|
||||
{
|
||||
h_A[i] = rand()/(float_type)RAND_MAX;
|
||||
}
|
||||
|
||||
// Launch the Vector Add CUDA Kernel
|
||||
int threadsPerBlock = 1024;
|
||||
int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
|
||||
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
|
||||
|
||||
watch w;
|
||||
cuda_test<<<blocksPerGrid, threadsPerBlock>>>(h_A.get(), h_C.get(), numElements);
|
||||
std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
|
||||
|
||||
err = cudaGetLastError();
|
||||
|
||||
if (err != cudaSuccess)
|
||||
{
|
||||
std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
// Verify that the result vector is correct
|
||||
std::vector<float_type> results;
|
||||
results.reserve(numElements);
|
||||
w.reset();
|
||||
float_type fract;
|
||||
for(int i = 0; i < numElements; ++i)
|
||||
results.push_back(4 * boost::math::modf(h_A[i], &fract));
|
||||
double t = w.elapsed();
|
||||
// check the results
|
||||
for(int i = 0; i < numElements; ++i)
|
||||
{
|
||||
if (boost::math::epsilon_difference(h_C[i], results[i]) > 10)
|
||||
{
|
||||
std::cerr << "Result verification failed at element " << i << "!" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
|
||||
std::cout << "Done\n";
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
96
test/cuda/round_double.cu
Normal file
96
test/cuda/round_double.cu
Normal file
@@ -0,0 +1,96 @@
|
||||
// Copyright John Maddock 2016.
|
||||
// Use, modification and distribution are subject to the
|
||||
// Boost Software License, Version 1.0. (See accompanying file
|
||||
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <boost/math/special_functions/round.hpp>
|
||||
#include <boost/math/special_functions/relative_difference.hpp>
|
||||
#include "cuda_managed_ptr.hpp"
|
||||
#include "stopwatch.hpp"
|
||||
|
||||
// For the CUDA runtime routines (prefixed with "cuda_")
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
typedef double float_type;
|
||||
|
||||
/**
|
||||
* CUDA Kernel Device code
|
||||
*
|
||||
*/
|
||||
__global__ void cuda_test(const float_type *in, float_type *out, int numElements)
|
||||
{
|
||||
using std::cos;
|
||||
int i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
|
||||
if (i < numElements)
|
||||
{
|
||||
out[i] = boost::math::round(in[i]) + boost::math::iround(in[i]) + boost::math::lround(in[i]) + boost::math::llround(in[i]);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Host main routine
|
||||
*/
|
||||
int main(void)
|
||||
{
|
||||
// Error code to check return values for CUDA calls
|
||||
cudaError_t err = cudaSuccess;
|
||||
|
||||
// Print the vector length to be used, and compute its size
|
||||
int numElements = 50000;
|
||||
std::cout << "[Vector addition of " << numElements << " elements]" << std::endl;
|
||||
|
||||
// Allocate the managed input vector A
|
||||
cuda_managed_ptr<float_type> h_A(numElements);
|
||||
|
||||
// Allocate the managed output vector C
|
||||
cuda_managed_ptr<float_type> h_C(numElements);
|
||||
|
||||
// Initialize the input vectors
|
||||
for (int i = 0; i < numElements; ++i)
|
||||
{
|
||||
h_A[i] = rand()/(float_type)RAND_MAX;
|
||||
}
|
||||
|
||||
// Launch the Vector Add CUDA Kernel
|
||||
int threadsPerBlock = 1024;
|
||||
int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
|
||||
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
|
||||
|
||||
watch w;
|
||||
cuda_test<<<blocksPerGrid, threadsPerBlock>>>(h_A.get(), h_C.get(), numElements);
|
||||
std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
|
||||
|
||||
err = cudaGetLastError();
|
||||
|
||||
if (err != cudaSuccess)
|
||||
{
|
||||
std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
// Verify that the result vector is correct
|
||||
std::vector<float_type> results;
|
||||
results.reserve(numElements);
|
||||
w.reset();
|
||||
for(int i = 0; i < numElements; ++i)
|
||||
results.push_back(4 * boost::math::round(h_A[i]));
|
||||
double t = w.elapsed();
|
||||
// check the results
|
||||
for(int i = 0; i < numElements; ++i)
|
||||
{
|
||||
if (boost::math::epsilon_difference(h_C[i], results[i]) > 10)
|
||||
{
|
||||
std::cerr << "Result verification failed at element " << i << "!" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
|
||||
std::cout << "Done\n";
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
114
test/cuda/sign_double.cu
Normal file
114
test/cuda/sign_double.cu
Normal file
@@ -0,0 +1,114 @@
|
||||
// Copyright John Maddock 2016.
|
||||
// Use, modification and distribution are subject to the
|
||||
// Boost Software License, Version 1.0. (See accompanying file
|
||||
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <vector>
|
||||
#include <boost/math/special_functions/sign.hpp>
|
||||
#include <boost/math/special_functions/relative_difference.hpp>
|
||||
#include "cuda_managed_ptr.hpp"
|
||||
#include "stopwatch.hpp"
|
||||
|
||||
// For the CUDA runtime routines (prefixed with "cuda_")
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
typedef double float_type;
|
||||
|
||||
/**
|
||||
* CUDA Kernel Device code
|
||||
*
|
||||
*/
|
||||
__global__ void cuda_test(const float_type *in, int *out, int numElements)
|
||||
{
|
||||
using std::cos;
|
||||
int i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
|
||||
if (i < numElements)
|
||||
{
|
||||
out[i] = boost::math::sign(in[i]);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Host main routine
|
||||
*/
|
||||
int main(void)
|
||||
{
|
||||
// Error code to check return values for CUDA calls
|
||||
cudaError_t err = cudaSuccess;
|
||||
|
||||
// Print the vector length to be used, and compute its size
|
||||
int numElements = 50000;
|
||||
std::cout << "[Vector addition of " << numElements << " elements]" << std::endl;
|
||||
|
||||
// Allocate the managed input vector A
|
||||
cuda_managed_ptr<float_type> h_A(numElements);
|
||||
|
||||
// Allocate the managed output vector C
|
||||
cuda_managed_ptr<int> h_C(numElements);
|
||||
|
||||
// Initialize the input vectors
|
||||
for (int i = 0; i < numElements; ++i)
|
||||
{
|
||||
h_A[i] = rand()/(float_type)RAND_MAX;
|
||||
switch(i % 55)
|
||||
{
|
||||
case 1:
|
||||
h_A[i] = 0;
|
||||
break;
|
||||
case 2:
|
||||
h_A[i] = std::numeric_limits<float_type>::infinity();
|
||||
break;
|
||||
case 3:
|
||||
h_A[i] = -std::numeric_limits<float_type>::infinity();
|
||||
break;
|
||||
case 4:
|
||||
h_A[i] = std::numeric_limits<float_type>::quiet_NaN();
|
||||
break;
|
||||
}
|
||||
if(i % 1)
|
||||
h_A[i] = -h_A[i];
|
||||
}
|
||||
|
||||
// Launch the Vector Add CUDA Kernel
|
||||
int threadsPerBlock = 1024;
|
||||
int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
|
||||
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
|
||||
|
||||
watch w;
|
||||
cuda_test<<<blocksPerGrid, threadsPerBlock>>>(h_A.get(), h_C.get(), numElements);
|
||||
std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
|
||||
|
||||
err = cudaGetLastError();
|
||||
|
||||
if (err != cudaSuccess)
|
||||
{
|
||||
std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
// Verify that the result vector is correct
|
||||
std::vector<int> results;
|
||||
results.reserve(numElements);
|
||||
w.reset();
|
||||
for(int i = 0; i < numElements; ++i)
|
||||
results.push_back(boost::math::sign(h_A[i]));
|
||||
double t = w.elapsed();
|
||||
// check the results
|
||||
for(int i = 0; i < numElements; ++i)
|
||||
{
|
||||
if (h_C[i] != results[i])
|
||||
{
|
||||
std::cerr << "Result verification failed at element " << i << "!" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
|
||||
std::cout << "Done\n";
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
114
test/cuda/signbit_double.cu
Normal file
114
test/cuda/signbit_double.cu
Normal file
@@ -0,0 +1,114 @@
|
||||
// Copyright John Maddock 2016.
|
||||
// Use, modification and distribution are subject to the
|
||||
// Boost Software License, Version 1.0. (See accompanying file
|
||||
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <vector>
|
||||
#include <boost/math/special_functions/sign.hpp>
|
||||
#include <boost/math/special_functions/relative_difference.hpp>
|
||||
#include "cuda_managed_ptr.hpp"
|
||||
#include "stopwatch.hpp"
|
||||
|
||||
// For the CUDA runtime routines (prefixed with "cuda_")
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
typedef double float_type;
|
||||
|
||||
/**
|
||||
* CUDA Kernel Device code
|
||||
*
|
||||
*/
|
||||
__global__ void cuda_test(const float_type *in, int *out, int numElements)
|
||||
{
|
||||
using std::cos;
|
||||
int i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
|
||||
if (i < numElements)
|
||||
{
|
||||
out[i] = boost::math::signbit(in[i]);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Host main routine
|
||||
*/
|
||||
int main(void)
|
||||
{
|
||||
// Error code to check return values for CUDA calls
|
||||
cudaError_t err = cudaSuccess;
|
||||
|
||||
// Print the vector length to be used, and compute its size
|
||||
int numElements = 50000;
|
||||
std::cout << "[Vector addition of " << numElements << " elements]" << std::endl;
|
||||
|
||||
// Allocate the managed input vector A
|
||||
cuda_managed_ptr<float_type> h_A(numElements);
|
||||
|
||||
// Allocate the managed output vector C
|
||||
cuda_managed_ptr<int> h_C(numElements);
|
||||
|
||||
// Initialize the input vectors
|
||||
for (int i = 0; i < numElements; ++i)
|
||||
{
|
||||
h_A[i] = rand()/(float_type)RAND_MAX;
|
||||
switch(i % 55)
|
||||
{
|
||||
case 1:
|
||||
h_A[i] = 0;
|
||||
break;
|
||||
case 2:
|
||||
h_A[i] = std::numeric_limits<float_type>::infinity();
|
||||
break;
|
||||
case 3:
|
||||
h_A[i] = -std::numeric_limits<float_type>::infinity();
|
||||
break;
|
||||
case 4:
|
||||
h_A[i] = std::numeric_limits<float_type>::quiet_NaN();
|
||||
break;
|
||||
}
|
||||
if(i % 1)
|
||||
h_A[i] = -h_A[i];
|
||||
}
|
||||
|
||||
// Launch the Vector Add CUDA Kernel
|
||||
int threadsPerBlock = 1024;
|
||||
int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
|
||||
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
|
||||
|
||||
watch w;
|
||||
cuda_test<<<blocksPerGrid, threadsPerBlock>>>(h_A.get(), h_C.get(), numElements);
|
||||
std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
|
||||
|
||||
err = cudaGetLastError();
|
||||
|
||||
if (err != cudaSuccess)
|
||||
{
|
||||
std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
// Verify that the result vector is correct
|
||||
std::vector<int> results;
|
||||
results.reserve(numElements);
|
||||
w.reset();
|
||||
for(int i = 0; i < numElements; ++i)
|
||||
results.push_back(boost::math::signbit(h_A[i]));
|
||||
double t = w.elapsed();
|
||||
// check the results
|
||||
for(int i = 0; i < numElements; ++i)
|
||||
{
|
||||
if (h_C[i] != results[i])
|
||||
{
|
||||
std::cerr << "Result verification failed at element " << i << "!" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
|
||||
std::cout << "Done\n";
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
96
test/cuda/trunc_double.cu
Normal file
96
test/cuda/trunc_double.cu
Normal file
@@ -0,0 +1,96 @@
|
||||
// Copyright John Maddock 2016.
|
||||
// Use, modification and distribution are subject to the
|
||||
// Boost Software License, Version 1.0. (See accompanying file
|
||||
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <boost/math/special_functions/trunc.hpp>
|
||||
#include <boost/math/special_functions/relative_difference.hpp>
|
||||
#include "cuda_managed_ptr.hpp"
|
||||
#include "stopwatch.hpp"
|
||||
|
||||
// For the CUDA runtime routines (prefixed with "cuda_")
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
typedef double float_type;
|
||||
|
||||
/**
|
||||
* CUDA Kernel Device code
|
||||
*
|
||||
*/
|
||||
__global__ void cuda_test(const float_type *in, float_type *out, int numElements)
|
||||
{
|
||||
using std::cos;
|
||||
int i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
|
||||
if (i < numElements)
|
||||
{
|
||||
out[i] = boost::math::trunc(in[i]) + boost::math::itrunc(in[i]) + boost::math::ltrunc(in[i]) + boost::math::lltrunc(in[i]);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Host main routine
|
||||
*/
|
||||
int main(void)
|
||||
{
|
||||
// Error code to check return values for CUDA calls
|
||||
cudaError_t err = cudaSuccess;
|
||||
|
||||
// Print the vector length to be used, and compute its size
|
||||
int numElements = 50000;
|
||||
std::cout << "[Vector addition of " << numElements << " elements]" << std::endl;
|
||||
|
||||
// Allocate the managed input vector A
|
||||
cuda_managed_ptr<float_type> h_A(numElements);
|
||||
|
||||
// Allocate the managed output vector C
|
||||
cuda_managed_ptr<float_type> h_C(numElements);
|
||||
|
||||
// Initialize the input vectors
|
||||
for (int i = 0; i < numElements; ++i)
|
||||
{
|
||||
h_A[i] = rand()/(float_type)RAND_MAX;
|
||||
}
|
||||
|
||||
// Launch the Vector Add CUDA Kernel
|
||||
int threadsPerBlock = 1024;
|
||||
int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
|
||||
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
|
||||
|
||||
watch w;
|
||||
cuda_test<<<blocksPerGrid, threadsPerBlock>>>(h_A.get(), h_C.get(), numElements);
|
||||
std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
|
||||
|
||||
err = cudaGetLastError();
|
||||
|
||||
if (err != cudaSuccess)
|
||||
{
|
||||
std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
// Verify that the result vector is correct
|
||||
std::vector<float_type> results;
|
||||
results.reserve(numElements);
|
||||
w.reset();
|
||||
for(int i = 0; i < numElements; ++i)
|
||||
results.push_back(4 * boost::math::trunc(h_A[i]));
|
||||
double t = w.elapsed();
|
||||
// check the results
|
||||
for(int i = 0; i < numElements; ++i)
|
||||
{
|
||||
if (boost::math::epsilon_difference(h_C[i], results[i]) > 10)
|
||||
{
|
||||
std::cerr << "Result verification failed at element " << i << "!" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
|
||||
std::cout << "Done\n";
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user