From 814cb6e49f909cae34c666b605e79a15e584764f Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Fri, 30 Aug 2024 15:46:17 -0400 Subject: [PATCH] Add overview --- doc/math.qbk | 1 + doc/overview/gpu.qbk | 59 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+) create mode 100644 doc/overview/gpu.qbk diff --git a/doc/math.qbk b/doc/math.qbk index d6b90efb0..385c93a5e 100644 --- a/doc/math.qbk +++ b/doc/math.qbk @@ -557,6 +557,7 @@ and as a CD ISBN 0-9504833-2-X 978-0-9504833-2-0, Classification 519.2-dc22. [include overview/standalone.qbk] [include overview/result_type_calc.qbk] [include overview/error_handling.qbk] +[include overview/gpu.qbk] [section:compilers_overview Compilers] [compilers_overview] diff --git a/doc/overview/gpu.qbk b/doc/overview/gpu.qbk new file mode 100644 index 000000000..70f0164e0 --- /dev/null +++ b/doc/overview/gpu.qbk @@ -0,0 +1,59 @@ +[section:gpu Support for GPU programming in Boost.Math] + +[h4 GPU Support] + +Selected functions, distributions, tools, etc. support running on both host and devices. +These functions will have the annotation `BOOST_MATH_GPU_ENABLED` next to their individual documentation. +We test using CUDA (both NVCC and NVRTC) as well as SYCL to provide a wide range of support. + +[h4 How to build with device support] + +When compiling with CUDA or SYCL you will have to ensure that your code is being run inside of a kernel function. +It is not enough to simply compile existing code with the NVCC compiler to run the code on the device. +A simple CUDA kernel to run the Beta Distribution CDF on NVCC would be: + + __global__ void cuda_beta_dist(const double* in, double* out, int num_elements) + { + const int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < num_elements) + { + out[i] = cdf(boost::math::beta_distribution(), in[i]); + } + } + +And on CUDA on NVRTC: + + const char* cuda_kernel = R"( + #include + extern "C" __global__ + void test_beta_dist_kernel(const double* in, double* out, int num_elements) + { + const int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < num_elements) + { + out[i] = boost::math::cdf(boost::math::beta_distribution(), in[i]); + } + } + )"; + +And lastly on SYCL: + + void sycl_beta_dist(const double* in, double* out, int num_elements, sycl::queue& q) + { + q.submit([&](sycl::handler& h) { + h.parallel_for(sycl::range<1>(num_elements), [=](sycl::id<1> i) { + out[i] = boost::math::cdf(boost::math::beta_distribution(), in[i]); + }); + }); + } + +Once your kernel function has been written then use the framework mechanism for launching the kernel. + +[/ + Copyright 2024. Matt Borland + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt). +] +