// Copyright John Maddock 2016. // Copyright Matt Borland 2024. // Use, modification and distribution are subject to the // Boost Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) #include #include #include #include #include #include #include "cuda_managed_ptr.hpp" #include "stopwatch.hpp" // For the CUDA runtime routines (prefixed with "cuda_") #include typedef float float_type; __host__ __device__ float_type func(float_type x) { BOOST_MATH_STD_USING return 1/(1+x*x); } /** * CUDA Kernel Device code * */ __global__ void cuda_test(float_type *out, int numElements) { int i = blockDim.x * blockIdx.x + threadIdx.x; float_type tol = boost::math::tools::root_epsilon(); float_type error; float_type L1; boost::math::size_t levels; if (i < numElements) { out[i] = boost::math::quadrature::exp_sinh_integrate(func, tol, &error, &L1, &levels); } } /** * Host main routine */ int main(void) { // Error code to check return values for CUDA calls cudaError_t err = cudaSuccess; // Print the vector length to be used, and compute its size int numElements = 50000; std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; // Allocate the managed input vector A cuda_managed_ptr input_vector(numElements); // Allocate the managed output vector C cuda_managed_ptr output_vector(numElements); // Initialize the input vectors for (int i = 0; i < numElements; ++i) { input_vector[i] = M_PI * (static_cast(i) / numElements); } // Launch the Vector Add CUDA Kernel int threadsPerBlock = 512; int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; watch w; cuda_test<<>>(output_vector.get(), numElements); cudaDeviceSynchronize(); std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; err = cudaGetLastError(); if (err != cudaSuccess) { std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; return EXIT_FAILURE; } // Verify that the result vector is correct std::vector results; results.reserve(numElements); w.reset(); float_type tol = boost::math::tools::root_epsilon(); float_type error; float_type L1; boost::math::quadrature::exp_sinh integrator; for(int i = 0; i < numElements; ++i) { results.push_back(integrator.integrate(func, tol, &error, &L1)); } double t = w.elapsed(); // check the results int failed_count = 0; for(int i = 0; i < numElements; ++i) { const auto eps = boost::math::epsilon_difference(output_vector[i], results[i]); if (eps > 10) { std::cerr << std::setprecision(std::numeric_limits::digits10) << "Result verification failed at element " << i << "!\n" << "Device: " << output_vector[i] << "\n Host: " << results[i] << "\n Eps: " << eps << "\n"; failed_count++; } if (failed_count > 100) { break; } } if (failed_count != 0) { std::cout << "Test FAILED" << std::endl; return EXIT_FAILURE; } std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; std::cout << "Done\n"; return 0; }