2
0
mirror of https://github.com/boostorg/math.git synced 2026-01-19 04:22:09 +00:00

Merge pull request #1196 from boostorg/GPU_Batch_11

GPU Batch 11
This commit is contained in:
Matt Borland
2024-09-16 08:41:54 -04:00
committed by GitHub
15 changed files with 3831 additions and 7 deletions

View File

@@ -1,5 +1,6 @@
[/
Copyright (c) 2017 Nick Thompson
Copyright (c) 2024 Matt Borland
Use, modification and distribution are subject to the
Boost Software License, Version 1.0. (See accompanying file
LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
@@ -538,6 +539,30 @@ This form integrates just fine over (-log([pi]/2), +[infin]) using either the `t
[endsect] [/section:de_caveats Caveats]
[section:gpu_usage GPU Usage]
``
#include <boost/math/quadrature/exp_sinh.hpp>
namespace boost{ namespace math{ namespace quadrature {
template <class F, class Real, class Policy = policies::policy<> >
__device__ auto exp_sinh_integrate(const F& f, Real a, Real b, Real tolerance, Real* error, Real* L1, boost::math::size_t* levels)
template <class F, class Real, class Policy = policies::policy<> >
__device__ auto exp_sinh_integrate(const F& f, Real tolerance, Real* error, Real* L1, boost::math::size_t* levels)
}}}
``
Quadrature is additionally able to run on CUDA (NVCC and NVRTC) platforms.
The major difference is outlined in the above function signatures.
When used on device these are free standing functions instead of using OOP like on the host.
The tables of abscissas and weights are stored in shared read only memory on the device instead of being initialized when the class is constructed.
An example use case would be in the finite elements method computing a stiffness matrix since it would consist of many different functions.
[endsect] [/section:gpu_usage Usage]
[section:de_refes References]
* Hidetosi Takahasi and Masatake Mori, ['Double Exponential Formulas for Numerical Integration] Publ. Res. Inst. Math. Sci., 9 (1974), pp. 721-741.

File diff suppressed because it is too large Load Diff

View File

@@ -1,4 +1,5 @@
// Copyright Nick Thompson, 2017
// Copyright Matt Borland, 2024
// Use, modification and distribution are subject to the
// Boost Software License, Version 1.0.
// (See accompanying file LICENSE_1_0.txt
@@ -7,6 +8,10 @@
#ifndef BOOST_MATH_QUADRATURE_DETAIL_SINH_SINH_DETAIL_HPP
#define BOOST_MATH_QUADRATURE_DETAIL_SINH_SINH_DETAIL_HPP
#include <boost/math/tools/config.hpp>
#ifndef BOOST_MATH_HAS_NVRTC
#include <cmath>
#include <string>
#include <vector>
@@ -15,7 +20,6 @@
#include <boost/math/tools/atomic.hpp>
#include <boost/math/policies/error_handling.hpp>
#include <boost/math/special_functions/trunc.hpp>
#include <boost/math/tools/config.hpp>
#ifdef BOOST_MATH_HAS_THREADS
#include <mutex>
@@ -485,4 +489,865 @@ void sinh_sinh_detail<Real, Policy>::init(const std::integral_constant<int, 4>&)
#endif
}}}}
#endif
#endif // BOOST_MATH_HAS_NVRTC
#ifdef BOOST_MATH_ENABLE_CUDA
#include <boost/math/tools/cstdint.hpp>
#include <boost/math/tools/precision.hpp>
#include <boost/math/tools/type_traits.hpp>
#include <boost/math/constants/constants.hpp>
#include <boost/math/special_functions/fpclassify.hpp>
#include <boost/math/policies/error_handling.hpp>
namespace boost {
namespace math {
namespace quadrature {
namespace detail {
__constant__ float m_abscissas_float_1[4] =
{ 3.08828742e+00f, 1.48993185e+02f, 3.41228925e+06f, 2.06932577e+18f, };
__constant__ float m_abscissas_float_2[4] =
{ 9.13048763e-01f, 1.41578929e+01f, 6.70421552e+03f, 9.64172533e+10f, };
__constant__ float m_abscissas_float_3[8] =
{ 4.07297690e-01f, 1.68206671e+00f, 6.15089799e+00f, 4.00396235e+01f, 7.92920025e+02f, 1.02984971e+05f,
3.03862311e+08f, 1.56544547e+14f, };
__constant__ float m_abscissas_float_4[16] =
{ 1.98135272e-01f, 6.40155674e-01f, 1.24892870e+00f, 2.26608084e+00f, 4.29646270e+00f, 9.13029039e+00f,
2.31110765e+01f, 7.42770603e+01f, 3.26720921e+02f, 2.15948569e+03f, 2.41501526e+04f, 5.31819400e+05f,
2.80058686e+07f, 4.52406508e+09f, 3.08561257e+12f, 1.33882673e+16f, };
__constant__ float m_abscissas_float_5[32] =
{ 9.83967894e-02f, 3.00605618e-01f, 5.19857979e-01f, 7.70362083e-01f, 1.07131137e+00f, 1.45056976e+00f,
1.95077855e+00f, 2.64003177e+00f, 3.63137237e+00f, 5.11991533e+00f, 7.45666098e+00f, 1.13022613e+01f,
1.79641069e+01f, 3.01781070e+01f, 5.40387580e+01f, 1.04107731e+02f, 2.18029520e+02f, 5.02155699e+02f,
1.28862131e+03f, 3.73921687e+03f, 1.24750730e+04f, 4.87639975e+04f, 2.28145658e+05f, 1.30877796e+06f,
9.46084663e+06f, 8.88883120e+07f, 1.12416883e+09f, 1.99127673e+10f, 5.16743469e+11f, 2.06721881e+13f,
1.35061503e+15f, 1.53854066e+17f, };
__constant__ float m_abscissas_float_6[65] =
{ 4.91151004e-02f, 1.48013150e-01f, 2.48938814e-01f, 3.53325424e-01f, 4.62733557e-01f, 5.78912068e-01f,
7.03870253e-01f, 8.39965859e-01f, 9.90015066e-01f, 1.15743257e+00f, 1.34641276e+00f, 1.56216711e+00f,
1.81123885e+00f, 2.10192442e+00f, 2.44484389e+00f, 2.85372075e+00f, 3.34645891e+00f, 3.94664582e+00f,
4.68567310e+00f, 5.60576223e+00f, 6.76433234e+00f, 8.24038318e+00f, 1.01439436e+01f, 1.26302471e+01f,
1.59213040e+01f, 2.03392186e+01f, 2.63584645e+01f, 3.46892633e+01f, 4.64129147e+01f, 6.32055079e+01f,
8.77149726e+01f, 1.24209693e+02f, 1.79718635e+02f, 2.66081728e+02f, 4.03727303e+02f, 6.28811307e+02f,
1.00707984e+03f, 1.66156823e+03f, 2.82965144e+03f, 4.98438627e+03f, 9.10154693e+03f, 1.72689266e+04f,
3.41309958e+04f, 7.04566898e+04f, 1.52340422e+05f, 3.46047978e+05f, 8.28472421e+05f, 2.09759615e+06f,
5.63695080e+06f, 1.61407141e+07f, 4.94473068e+07f, 1.62781052e+08f, 5.78533297e+08f, 2.23083854e+09f,
9.38239131e+09f, 4.32814954e+10f, 2.20307274e+11f, 1.24524507e+12f, 7.86900053e+12f, 5.59953143e+13f,
4.52148695e+14f, 4.17688952e+15f, 4.45286776e+16f, 5.52914285e+17f, 8.07573252e+18f, };
__constant__ float m_abscissas_float_7[129] =
{ 2.45471558e-02f, 7.37246687e-02f, 1.23152531e-01f, 1.73000138e-01f, 2.23440665e-01f, 2.74652655e-01f,
3.26821679e-01f, 3.80142101e-01f, 4.34818964e-01f, 4.91070037e-01f, 5.49128046e-01f, 6.09243132e-01f,
6.71685571e-01f, 7.36748805e-01f, 8.04752842e-01f, 8.76048080e-01f, 9.51019635e-01f, 1.03009224e+00f,
1.11373586e+00f, 1.20247203e+00f, 1.29688123e+00f, 1.39761124e+00f, 1.50538689e+00f, 1.62102121e+00f,
1.74542840e+00f, 1.87963895e+00f, 2.02481711e+00f, 2.18228138e+00f, 2.35352849e+00f, 2.54026147e+00f,
2.74442267e+00f, 2.96823279e+00f, 3.21423687e+00f, 3.48535896e+00f, 3.78496698e+00f, 4.11695014e+00f,
4.48581137e+00f, 4.89677825e+00f, 5.35593629e+00f, 5.87038976e+00f, 6.44845619e+00f, 7.09990245e+00f,
7.83623225e+00f, 8.67103729e+00f, 9.62042778e+00f, 1.07035620e+01f, 1.19433001e+01f, 1.33670142e+01f,
1.50075962e+01f, 1.69047155e+01f, 1.91063967e+01f, 2.16710044e+01f, 2.46697527e+01f, 2.81898903e+01f,
3.23387613e+01f, 3.72490076e+01f, 4.30852608e+01f, 5.00527965e+01f, 5.84087761e+01f, 6.84769282e+01f,
8.06668178e+01f, 9.54992727e+01f, 1.13640120e+02f, 1.35945194e+02f, 1.63520745e+02f, 1.97804969e+02f,
2.40678754e+02f, 2.94617029e+02f, 3.62896953e+02f, 4.49886178e+02f, 5.61444735e+02f, 7.05489247e+02f,
8.92790773e+02f, 1.13811142e+03f, 1.46183599e+03f, 1.89233262e+03f, 2.46939604e+03f, 3.24931157e+03f,
4.31236711e+03f, 5.77409475e+03f, 7.80224724e+03f, 1.06426753e+04f, 1.46591538e+04f, 2.03952854e+04f,
2.86717062e+04f, 4.07403376e+04f, 5.85318231e+04f, 8.50568927e+04f, 1.25064927e+05f, 1.86137394e+05f,
2.80525578e+05f, 4.28278249e+05f, 6.62634051e+05f, 1.03944324e+06f, 1.65385743e+06f, 2.67031565e+06f,
4.37721203e+06f, 7.28807171e+06f, 1.23317299e+07f, 2.12155729e+07f, 3.71308625e+07f, 6.61457938e+07f,
1.20005529e+08f, 2.21862941e+08f, 4.18228294e+08f, 8.04370413e+08f, 1.57939299e+09f, 3.16812242e+09f,
6.49660681e+09f, 1.36285199e+10f, 2.92686390e+10f, 6.43979867e+10f, 1.45275523e+11f, 3.36285446e+11f,
7.99420279e+11f, 1.95326423e+12f, 4.90958187e+12f, 1.27062273e+13f, 3.38907099e+13f, 9.32508403e+13f,
2.64948942e+14f, 7.78129518e+14f, 2.36471505e+15f, 7.44413803e+15f, 2.43021724e+16f, 8.23706864e+16f,
2.90211705e+17f, 1.06415768e+18f, 4.06627711e+18f, };
__constant__ float m_abscissas_float_8[259] =
{ 1.22722792e-02f, 3.68272289e-02f, 6.14133763e-02f, 8.60515971e-02f, 1.10762884e-01f, 1.35568393e-01f,
1.60489494e-01f, 1.85547813e-01f, 2.10765290e-01f, 2.36164222e-01f, 2.61767321e-01f, 2.87597761e-01f,
3.13679240e-01f, 3.40036029e-01f, 3.66693040e-01f, 3.93675878e-01f, 4.21010910e-01f, 4.48725333e-01f,
4.76847237e-01f, 5.05405685e-01f, 5.34430786e-01f, 5.63953775e-01f, 5.94007101e-01f, 6.24624511e-01f,
6.55841151e-01f, 6.87693662e-01f, 7.20220285e-01f, 7.53460977e-01f, 7.87457528e-01f, 8.22253686e-01f,
8.57895297e-01f, 8.94430441e-01f, 9.31909591e-01f, 9.70385775e-01f, 1.00991475e+00f, 1.05055518e+00f,
1.09236885e+00f, 1.13542087e+00f, 1.17977990e+00f, 1.22551840e+00f, 1.27271289e+00f, 1.32144424e+00f,
1.37179794e+00f, 1.42386447e+00f, 1.47773961e+00f, 1.53352485e+00f, 1.59132774e+00f, 1.65126241e+00f,
1.71344993e+00f, 1.77801893e+00f, 1.84510605e+00f, 1.91485658e+00f, 1.98742510e+00f, 2.06297613e+00f,
2.14168493e+00f, 2.22373826e+00f, 2.30933526e+00f, 2.39868843e+00f, 2.49202464e+00f, 2.58958621e+00f,
2.69163219e+00f, 2.79843963e+00f, 2.91030501e+00f, 3.02754584e+00f, 3.15050230e+00f, 3.27953915e+00f,
3.41504770e+00f, 3.55744805e+00f, 3.70719145e+00f, 3.86476298e+00f, 4.03068439e+00f, 4.20551725e+00f,
4.38986641e+00f, 4.58438376e+00f, 4.78977239e+00f, 5.00679110e+00f, 5.23625945e+00f, 5.47906320e+00f,
5.73616037e+00f, 6.00858792e+00f, 6.29746901e+00f, 6.60402117e+00f, 6.92956515e+00f, 7.27553483e+00f,
7.64348809e+00f, 8.03511888e+00f, 8.45227058e+00f, 8.89695079e+00f, 9.37134780e+00f, 9.87784877e+00f,
1.04190601e+01f, 1.09978298e+01f, 1.16172728e+01f, 1.22807990e+01f, 1.29921443e+01f, 1.37554055e+01f,
1.45750793e+01f, 1.54561061e+01f, 1.64039187e+01f, 1.74244972e+01f, 1.85244301e+01f, 1.97109839e+01f,
2.09921804e+01f, 2.23768845e+01f, 2.38749023e+01f, 2.54970927e+01f, 2.72554930e+01f, 2.91634608e+01f,
3.12358351e+01f, 3.34891185e+01f, 3.59416839e+01f, 3.86140099e+01f, 4.15289481e+01f, 4.47120276e+01f,
4.81918020e+01f, 5.20002465e+01f, 5.61732106e+01f, 6.07509371e+01f, 6.57786566e+01f, 7.13072704e+01f,
7.73941341e+01f, 8.41039609e+01f, 9.15098607e+01f, 9.96945411e+01f, 1.08751694e+02f, 1.18787600e+02f,
1.29922990e+02f, 1.42295202e+02f, 1.56060691e+02f, 1.71397955e+02f, 1.88510933e+02f, 2.07632988e+02f,
2.29031559e+02f, 2.53013612e+02f, 2.79932028e+02f, 3.10193130e+02f, 3.44265522e+02f, 3.82690530e+02f,
4.26094527e+02f, 4.75203518e+02f, 5.30860437e+02f, 5.94045681e+02f, 6.65901543e+02f, 7.47761337e+02f,
8.41184173e+02f, 9.47996570e+02f, 1.07034233e+03f, 1.21074246e+03f, 1.37216724e+03f, 1.55812321e+03f,
1.77275819e+03f, 2.02098849e+03f, 2.30865326e+03f, 2.64270219e+03f, 3.03142418e+03f, 3.48472668e+03f,
4.01447750e+03f, 4.63492426e+03f, 5.36320995e+03f, 6.22000841e+03f, 7.23030933e+03f, 8.42439022e+03f,
9.83902287e+03f, 1.15189746e+04f, 1.35188810e+04f, 1.59055875e+04f, 1.87610857e+04f, 2.21862046e+04f,
2.63052621e+04f, 3.12719440e+04f, 3.72767546e+04f, 4.45564828e+04f, 5.34062659e+04f, 6.41950058e+04f,
7.73851264e+04f, 9.35579699e+04f, 1.13446538e+05f, 1.37977827e+05f, 1.68327749e+05f, 2.05992575e+05f,
2.52882202e+05f, 3.11442272e+05f, 3.84814591e+05f, 4.77048586e+05f, 5.93380932e+05f, 7.40606619e+05f,
9.27573047e+05f, 1.16584026e+06f, 1.47056632e+06f, 1.86169890e+06f, 2.36558487e+06f, 3.01715270e+06f,
3.86288257e+06f, 4.96486431e+06f, 6.40636283e+06f, 8.29948185e+06f, 1.07957589e+07f, 1.41008733e+07f,
1.84951472e+07f, 2.43622442e+07f, 3.22295113e+07f, 4.28249388e+07f, 5.71579339e+07f, 7.66343793e+07f,
1.03221273e+08f, 1.39683399e+08f, 1.89925150e+08f, 2.59486540e+08f, 3.56266474e+08f, 4.91582541e+08f,
6.81731647e+08f, 9.50299811e+08f, 1.33159830e+09f, 1.87580198e+09f, 2.65667391e+09f, 3.78324022e+09f,
5.41753185e+09f, 7.80169537e+09f, 1.12996537e+10f, 1.64614916e+10f, 2.41235400e+10f, 3.55648690e+10f,
5.27534501e+10f, 7.87357211e+10f, 1.18256902e+11f, 1.78754944e+11f, 2.71963306e+11f, 4.16512215e+11f,
6.42178186e+11f, 9.96872550e+11f, 1.55821233e+12f, 2.45280998e+12f, 3.88865623e+12f, 6.20986899e+12f,
9.98992422e+12f, 1.61915800e+13f, 2.64432452e+13f, 4.35201885e+13f, 7.21888469e+13f, 1.20699764e+14f,
2.03448372e+14f, 3.45755310e+14f, 5.92524851e+14f, 1.02405779e+15f, 1.78517405e+15f, 3.13930699e+15f,
5.56985627e+15f, 9.97176335e+15f, 1.80168749e+16f, 3.28570986e+16f, 6.04901854e+16f, 1.12437528e+17f,
2.11044513e+17f, 4.00073701e+17f, 7.66084936e+17f, 1.48201877e+18f, 2.89694543e+18f, 5.72279017e+18f,
1.14268996e+19f, };
__constant__ float* m_abscissas_float[8] = {
m_abscissas_float_1,
m_abscissas_float_2,
m_abscissas_float_3,
m_abscissas_float_4,
m_abscissas_float_5,
m_abscissas_float_6,
m_abscissas_float_7,
m_abscissas_float_8,
};
__constant__ float m_weights_float_1[4] =
{ 7.86824160e+00f, 8.80516388e+02f, 5.39627832e+07f, 8.87651190e+19f, };
__constant__ float m_weights_float_2[4] =
{ 2.39852428e+00f, 5.24459642e+01f, 6.45788782e+04f, 2.50998524e+12f, };
__constant__ float m_weights_float_3[8] =
{ 1.74936958e+00f, 3.97965898e+00f, 1.84851460e+01f, 1.86488072e+02f, 5.97420570e+03f, 1.27041264e+06f,
6.16419301e+09f, 5.23085003e+15f, };
__constant__ float m_weights_float_4[16] =
{ 1.61385906e+00f, 1.99776729e+00f, 3.02023198e+00f, 5.47764184e+00f, 1.17966092e+01f, 3.03550485e+01f,
9.58442179e+01f, 3.89387024e+02f, 2.17919325e+03f, 1.83920812e+04f, 2.63212061e+05f, 7.42729651e+06f,
5.01587565e+08f, 1.03961087e+11f, 9.10032891e+13f, 5.06865116e+17f, };
__constant__ float m_weights_float_5[32] =
{ 1.58146596e+00f, 1.66914991e+00f, 1.85752319e+00f, 2.17566262e+00f, 2.67590138e+00f, 3.44773868e+00f,
4.64394654e+00f, 6.53020450e+00f, 9.58228502e+00f, 1.46836141e+01f, 2.35444955e+01f, 3.96352727e+01f,
7.03763521e+01f, 1.32588012e+02f, 2.66962565e+02f, 5.79374920e+02f, 1.36869193e+03f, 3.55943572e+03f,
1.03218668e+04f, 3.38662130e+04f, 1.27816626e+05f, 5.65408251e+05f, 2.99446204e+06f, 1.94497502e+07f,
1.59219301e+08f, 1.69428882e+09f, 2.42715618e+10f, 4.87031785e+11f, 1.43181966e+13f, 6.48947152e+14f,
4.80375775e+16f, 6.20009636e+18f, };
__constant__ float m_weights_float_6[65] =
{ 1.57345777e+00f, 1.59489276e+00f, 1.63853652e+00f, 1.70598041e+00f, 1.79972439e+00f, 1.92332285e+00f,
2.08159737e+00f, 2.28093488e+00f, 2.52969785e+00f, 2.83878478e+00f, 3.22239575e+00f, 3.69908136e+00f,
4.29318827e+00f, 5.03686536e+00f, 5.97287114e+00f, 7.15853842e+00f, 8.67142780e+00f, 1.06174736e+01f,
1.31428500e+01f, 1.64514563e+01f, 2.08309945e+01f, 2.66923599e+01f, 3.46299351e+01f, 4.55151836e+01f,
6.06440809e+01f, 8.19729692e+01f, 1.12502047e+02f, 1.56909655e+02f, 2.22620435e+02f, 3.21638549e+02f,
4.73757451e+02f, 7.12299455e+02f, 1.09460965e+03f, 1.72169779e+03f, 2.77592491e+03f, 4.59523007e+03f,
7.82342759e+03f, 1.37235744e+04f, 2.48518896e+04f, 4.65553875e+04f, 9.04176678e+04f, 1.82484396e+05f,
3.83680026e+05f, 8.42627197e+05f, 1.93843257e+06f, 4.68511285e+06f, 1.19352867e+07f, 3.21564375e+07f,
9.19600893e+07f, 2.80222318e+08f, 9.13611083e+08f, 3.20091090e+09f, 1.21076526e+10f, 4.96902475e+10f,
2.22431575e+11f, 1.09212534e+12f, 5.91688298e+12f, 3.55974344e+13f, 2.39435365e+14f, 1.81355107e+15f,
1.55873671e+16f, 1.53271488e+17f, 1.73927478e+18f, 2.29884122e+19f, 3.57403070e+20f, };
__constant__ float m_weights_float_7[129] =
{ 1.57146132e+00f, 1.57679017e+00f, 1.58749564e+00f, 1.60367396e+00f, 1.62547113e+00f, 1.65308501e+00f,
1.68676814e+00f, 1.72683132e+00f, 1.77364814e+00f, 1.82766042e+00f, 1.88938482e+00f, 1.95942057e+00f,
2.03845873e+00f, 2.12729290e+00f, 2.22683194e+00f, 2.33811466e+00f, 2.46232715e+00f, 2.60082286e+00f,
2.75514621e+00f, 2.92706011e+00f, 3.11857817e+00f, 3.33200254e+00f, 3.56996830e+00f, 3.83549565e+00f,
4.13205150e+00f, 4.46362211e+00f, 4.83479919e+00f, 5.25088196e+00f, 5.71799849e+00f, 6.24325042e+00f,
6.83488580e+00f, 7.50250620e+00f, 8.25731548e+00f, 9.11241941e+00f, 1.00831875e+01f, 1.11876913e+01f,
1.24472371e+01f, 1.38870139e+01f, 1.55368872e+01f, 1.74323700e+01f, 1.96158189e+01f, 2.21379089e+01f,
2.50594593e+01f, 2.84537038e+01f, 3.24091185e+01f, 3.70329629e+01f, 4.24557264e+01f, 4.88367348e+01f,
5.63712464e+01f, 6.52994709e+01f, 7.59180776e+01f, 8.85949425e+01f, 1.03788130e+02f, 1.22070426e+02f,
1.44161210e+02f, 1.70968019e+02f, 2.03641059e+02f, 2.43645006e+02f, 2.92854081e+02f, 3.53678602e+02f,
4.29234308e+02f, 5.23570184e+02f, 6.41976690e+02f, 7.91405208e+02f, 9.81042209e+02f, 1.22309999e+03f,
1.53391256e+03f, 1.93546401e+03f, 2.45753455e+03f, 3.14073373e+03f, 4.04081819e+03f, 5.23488160e+03f,
6.83029446e+03f, 8.97771323e+03f, 1.18901592e+04f, 1.58712239e+04f, 2.13571111e+04f, 2.89798371e+04f,
3.96630673e+04f, 5.47687519e+04f, 7.63235654e+04f, 1.07371915e+05f, 1.52531667e+05f, 2.18877843e+05f,
3.17362450e+05f, 4.65120153e+05f, 6.89253766e+05f, 1.03311989e+06f, 1.56688798e+06f, 2.40549203e+06f,
3.73952896e+06f, 5.88912115e+06f, 9.39904635e+06f, 1.52090328e+07f, 2.49628719e+07f, 4.15775926e+07f,
7.03070537e+07f, 1.20759856e+08f, 2.10788251e+08f, 3.74104720e+08f, 6.75449459e+08f, 1.24131674e+09f,
2.32331003e+09f, 4.43117602e+09f, 8.61744649e+09f, 1.70983691e+10f, 3.46357452e+10f, 7.16760712e+10f,
1.51634762e+11f, 3.28172932e+11f, 7.27110260e+11f, 1.65049955e+12f, 3.84133815e+12f, 9.17374427e+12f,
2.24990195e+13f, 5.67153509e+13f, 1.47074225e+14f, 3.92701252e+14f, 1.08063998e+15f, 3.06767147e+15f,
8.99238679e+15f, 2.72472254e+16f, 8.54294612e+16f, 2.77461372e+17f, 9.34529948e+17f, 3.26799612e+18f,
1.18791443e+19f, 4.49405341e+19f, 1.77170665e+20f, };
__constant__ float m_weights_float_8[259] =
{ 1.57096255e+00f, 1.57229290e+00f, 1.57495658e+00f, 1.57895955e+00f, 1.58431079e+00f, 1.59102230e+00f,
1.59910918e+00f, 1.60858966e+00f, 1.61948515e+00f, 1.63182037e+00f, 1.64562338e+00f, 1.66092569e+00f,
1.67776241e+00f, 1.69617233e+00f, 1.71619809e+00f, 1.73788633e+00f, 1.76128784e+00f, 1.78645779e+00f,
1.81345587e+00f, 1.84234658e+00f, 1.87319943e+00f, 1.90608922e+00f, 1.94109632e+00f, 1.97830698e+00f,
2.01781368e+00f, 2.05971547e+00f, 2.10411838e+00f, 2.15113585e+00f, 2.20088916e+00f, 2.25350798e+00f,
2.30913084e+00f, 2.36790578e+00f, 2.42999091e+00f, 2.49555516e+00f, 2.56477893e+00f, 2.63785496e+00f,
2.71498915e+00f, 2.79640147e+00f, 2.88232702e+00f, 2.97301705e+00f, 3.06874019e+00f, 3.16978367e+00f,
3.27645477e+00f, 3.38908227e+00f, 3.50801806e+00f, 3.63363896e+00f, 3.76634859e+00f, 3.90657947e+00f,
4.05479525e+00f, 4.21149322e+00f, 4.37720695e+00f, 4.55250922e+00f, 4.73801517e+00f, 4.93438579e+00f,
5.14233166e+00f, 5.36261713e+00f, 5.59606472e+00f, 5.84356014e+00f, 6.10605759e+00f, 6.38458564e+00f,
6.68025373e+00f, 6.99425915e+00f, 7.32789480e+00f, 7.68255767e+00f, 8.05975815e+00f, 8.46113023e+00f,
8.88844279e+00f, 9.34361190e+00f, 9.82871448e+00f, 1.03460033e+01f, 1.08979234e+01f, 1.14871305e+01f,
1.21165112e+01f, 1.27892047e+01f, 1.35086281e+01f, 1.42785033e+01f, 1.51028871e+01f, 1.59862046e+01f,
1.69332867e+01f, 1.79494108e+01f, 1.90403465e+01f, 2.02124072e+01f, 2.14725057e+01f, 2.28282181e+01f,
2.42878539e+01f, 2.58605342e+01f, 2.75562800e+01f, 2.93861096e+01f, 3.13621485e+01f, 3.34977526e+01f,
3.58076454e+01f, 3.83080730e+01f, 4.10169773e+01f, 4.39541917e+01f, 4.71416602e+01f, 5.06036855e+01f,
5.43672075e+01f, 5.84621188e+01f, 6.29216205e+01f, 6.77826252e+01f, 7.30862125e+01f, 7.88781469e+01f,
8.52094636e+01f, 9.21371360e+01f, 9.97248336e+01f, 1.08043785e+02f, 1.17173764e+02f, 1.27204209e+02f,
1.38235512e+02f, 1.50380485e+02f, 1.63766039e+02f, 1.78535118e+02f, 1.94848913e+02f, 2.12889407e+02f,
2.32862309e+02f, 2.55000432e+02f, 2.79567594e+02f, 3.06863126e+02f, 3.37227087e+02f, 3.71046310e+02f,
4.08761417e+02f, 4.50874968e+02f, 4.97960949e+02f, 5.50675821e+02f, 6.09771424e+02f, 6.76110054e+02f,
7.50682104e+02f, 8.34626760e+02f, 9.29256285e+02f, 1.03608458e+03f, 1.15686082e+03f, 1.29360914e+03f,
1.44867552e+03f, 1.62478326e+03f, 1.82509876e+03f, 2.05330964e+03f, 2.31371761e+03f, 2.61134924e+03f,
2.95208799e+03f, 3.34283233e+03f, 3.79168493e+03f, 4.30817984e+03f, 4.90355562e+03f, 5.59108434e+03f,
6.38646863e+03f, 7.30832183e+03f, 8.37874981e+03f, 9.62405722e+03f, 1.10756067e+04f, 1.27708661e+04f,
1.47546879e+04f, 1.70808754e+04f, 1.98141031e+04f, 2.30322789e+04f, 2.68294532e+04f, 3.13194118e+04f,
3.66401221e+04f, 4.29592484e+04f, 5.04810088e+04f, 5.94547213e+04f, 7.01854788e+04f, 8.30475173e+04f,
9.85009981e+04f, 1.17113127e+05f, 1.39584798e+05f, 1.66784302e+05f, 1.99790063e+05f, 2.39944995e+05f,
2.88925794e+05f, 3.48831531e+05f, 4.22297220e+05f, 5.12639825e+05f, 6.24046488e+05f, 7.61817907e+05f,
9.32683930e+05f, 1.14521401e+06f, 1.41035265e+06f, 1.74212004e+06f, 2.15853172e+06f, 2.68280941e+06f,
3.34498056e+06f, 4.18399797e+06f, 5.25055801e+06f, 6.61086017e+06f, 8.35163942e+06f, 1.05869253e+07f,
1.34671524e+07f, 1.71914827e+07f, 2.20245345e+07f, 2.83191730e+07f, 3.65476782e+07f, 4.73445266e+07f,
6.15653406e+07f, 8.03684303e+07f, 1.05328028e+08f, 1.38592169e+08f, 1.83103699e+08f, 2.42910946e+08f,
3.23606239e+08f, 4.32947522e+08f, 5.81743297e+08f, 7.85117979e+08f, 1.06432920e+09f, 1.44938958e+09f,
1.98286647e+09f, 2.72541431e+09f, 3.76386796e+09f, 5.22313881e+09f, 7.28378581e+09f, 1.02080964e+10f,
1.43789932e+10f, 2.03583681e+10f, 2.89749983e+10f, 4.14577375e+10f, 5.96383768e+10f, 8.62622848e+10f,
1.25466705e+11f, 1.83521298e+11f, 2.69981221e+11f, 3.99492845e+11f, 5.94638056e+11f, 8.90440997e+11f,
1.34155194e+12f, 2.03376855e+12f, 3.10262796e+12f, 4.76359832e+12f, 7.36142036e+12f, 1.14512696e+13f,
1.79331419e+13f, 2.82758550e+13f, 4.48929705e+13f, 7.17780287e+13f, 1.15585510e+14f, 1.87483389e+14f,
3.06351036e+14f, 5.04340065e+14f, 8.36616340e+14f, 1.39855635e+15f, 2.35633575e+15f, 4.00176517e+15f,
6.85137513e+15f, 1.18269011e+16f, 2.05867353e+16f, 3.61396878e+16f, 6.39911218e+16f, 1.14301619e+17f,
2.05988138e+17f, 3.74584679e+17f, 6.87444303e+17f, 1.27340764e+18f, 2.38124192e+18f, 4.49583562e+18f,
8.57144202e+18f, 1.65044358e+19f, 3.21010035e+19f, 6.30778012e+19f, 1.25240403e+20f, 2.51300530e+20f,
5.09677626e+20f, };
__constant__ float* m_weights_float[8] = {
m_weights_float_1,
m_weights_float_2,
m_weights_float_3,
m_weights_float_4,
m_weights_float_5,
m_weights_float_6,
m_weights_float_7,
m_weights_float_8
};
__constant__ double m_abscissas_double_1[6] =
{ 3.088287417976322866e+00, 1.489931846492091580e+02, 3.412289247883437102e+06, 2.069325766042617791e+18,
2.087002407609475560e+50, 2.019766160717908151e+137, };
__constant__ double m_abscissas_double_2[6] =
{ 9.130487626376696748e-01, 1.415789294662811592e+01, 6.704215516223276482e+03, 9.641725327150499415e+10,
2.508950760085778485e+30, 1.447263535710337145e+83, };
__constant__ double m_abscissas_double_3[12] =
{ 4.072976900657586902e-01, 1.682066707021148743e+00, 6.150897986386729515e+00, 4.003962351929400222e+01,
7.929200247931026321e+02, 1.029849713330979583e+05, 3.038623109252438574e+08, 1.565445474362494869e+14,
4.042465098430219104e+23, 1.321706827429658179e+39, 4.991231782099557998e+64, 7.352943850359875966e+106, };
__constant__ double m_abscissas_double_4[24] =
{ 1.981352722514781726e-01, 6.401556735005260177e-01, 1.248928698253977663e+00, 2.266080840944321232e+00,
4.296462696702327381e+00, 9.130290387099955696e+00, 2.311107653864279933e+01, 7.427706034324012430e+01,
3.267209207115258917e+02, 2.159485694311818716e+03, 2.415015262896413060e+04, 5.318194002756929158e+05,
2.800586857217043323e+07, 4.524065079794338780e+09, 3.085612573980677122e+12, 1.338826733015807478e+16,
6.254617176562341381e+20, 6.182098535814164754e+26, 3.077293649788458067e+34, 2.348957289370104303e+44,
1.148543197899469758e+57, 2.255300070010069868e+73, 1.877919500569195394e+94, 1.367473887938624280e+121, };
__constant__ double m_abscissas_double_5[49] =
{ 9.839678940067320339e-02, 3.006056176599550351e-01, 5.198579789949384900e-01, 7.703620832988877009e-01,
1.071311369641311830e+00, 1.450569758088998445e+00, 1.950778549520360334e+00, 2.640031773695551468e+00,
3.631372373667412273e+00, 5.119915330903350570e+00, 7.456660981404883289e+00, 1.130226126889972624e+01,
1.796410692472772550e+01, 3.017810704601898222e+01, 5.403875800312370567e+01, 1.041077314477469548e+02,
2.180295201202628077e+02, 5.021556986259101646e+02, 1.288621310998222420e+03, 3.739216870800548324e+03,
1.247507297020191232e+04, 4.876399753226692124e+04, 2.281456582219130122e+05, 1.308777960064843017e+06,
9.460846634209664077e+06, 8.888831203637279622e+07, 1.124168828974344134e+09, 1.991276729532144470e+10,
5.167434691060984650e+11, 2.067218814203990888e+13, 1.350615033184100406e+15, 1.538540662836508188e+17,
3.290747290540350661e+19, 1.437291381884498816e+22, 1.409832445530347286e+25, 3.459135480277971441e+28,
2.398720582340954092e+32, 5.398806604617292960e+36, 4.613340002580628610e+41, 1.787685909667902457e+47,
3.841984370124338536e+53, 5.752797955708583700e+60, 7.771812038427286551e+68, 1.269673044204081626e+78,
3.495676773765731568e+88, 2.362519474971692445e+100, 6.002143893273651123e+113, 9.290716303464155539e+128,
1.514442238033847090e+146, };
__constant__ double m_abscissas_double_6[98] =
{ 4.911510035029024930e-02, 1.480131496743607333e-01, 2.489388137406836857e-01, 3.533254236926684378e-01,
4.627335566122353259e-01, 5.789120681640963067e-01, 7.038702533860627799e-01, 8.399658591446505688e-01,
9.900150664244376147e-01, 1.157432570143699131e+00, 1.346412759185361763e+00, 1.562167113901335551e+00,
1.811238852782323380e+00, 2.101924419006550301e+00, 2.444843885584197934e+00, 2.853720746632915024e+00,
3.346458910955350787e+00, 3.946645821057838387e+00, 4.685673101596678529e+00, 5.605762230908151175e+00,
6.764332336830574204e+00, 8.240383175379985221e+00, 1.014394356129857730e+01, 1.263024714338892472e+01,
1.592130395780345258e+01, 2.033921861921857185e+01, 2.635846445760633752e+01, 3.468926333224152409e+01,
4.641291467019728963e+01, 6.320550793890424203e+01, 8.771497261808906374e+01, 1.242096926240411498e+02,
1.797186347845127557e+02, 2.660817283327900190e+02, 4.037273029575712841e+02, 6.288113066545908703e+02,
1.007079837507490594e+03, 1.661568229185114288e+03, 2.829651440786582598e+03, 4.984386266585669139e+03,
9.101546927647810893e+03, 1.726892655475049727e+04, 3.413099578778601190e+04, 7.045668977053092802e+04,
1.523404217761279128e+05, 3.460479782897947414e+05, 8.284724209233183002e+05, 2.097596146601193946e+06,
5.636950798861273236e+06, 1.614071410855607245e+07, 4.944730678915060360e+07, 1.627810516820991356e+08,
5.785332971632280838e+08, 2.230838540681955690e+09, 9.382391306064739643e+09, 4.328149544776551692e+10,
2.203072744049242904e+11, 1.245245067109136413e+12, 7.869000534957822375e+12, 5.599531432979422461e+13,
4.521486949902090877e+14, 4.176889516548293265e+15, 4.452867759650496656e+16, 5.529142853140498068e+17,
8.075732516562854275e+18, 1.402046916260468698e+20, 2.925791412832239850e+21, 7.426433029335410886e+22,
2.321996331245735364e+24, 9.064194250638442432e+25, 4.481279048819445609e+27, 2.849046304726990645e+29,
2.367381159183355975e+31, 2.615825578455121227e+33, 3.914764948263290808e+35, 8.092042448555929219e+37,
2.358921320940630332e+40, 9.915218648535332591e+42, 6.152851059342658764e+45, 5.780276340144515388e+48,
8.443751734186488626e+51, 1.973343350899766708e+55, 7.605247378556219980e+58, 4.992057104939510418e+62,
5.775863423903912316e+66, 1.221808201945355603e+71, 4.912917230387133816e+75, 3.913971813732202372e+80,
6.456388069905286787e+85, 2.311225068528010358e+91, 1.887458157719431339e+97, 3.708483165438453094e+103,
1.855198812283538635e+110, 2.509787873171705318e+117, 9.790423755591216617e+124, 1.179088807944050747e+133,
4.714631846722476620e+141, 6.762657785959713240e+150, };
__constant__ double m_abscissas_double_7[196] =
{ 2.454715583629863651e-02, 7.372466873903346224e-02, 1.231525309416766543e-01, 1.730001377719248556e-01,
2.234406649596860001e-01, 2.746526549718518258e-01, 3.268216792980646669e-01, 3.801421009804789245e-01,
4.348189637215614948e-01, 4.910700365099428407e-01, 5.491280459480215441e-01, 6.092431324382654397e-01,
6.716855712021148069e-01, 7.367488049067938643e-01, 8.047528416336950644e-01, 8.760480802482050705e-01,
9.510196351823332253e-01, 1.030092244532470067e+00, 1.113735859588680765e+00, 1.202472030918058876e+00,
1.296881226496863751e+00, 1.397611241828373026e+00, 1.505386891360545205e+00, 1.621021205894798030e+00,
1.745428403369044572e+00, 1.879638952031029331e+00, 2.024817107609328524e+00, 2.182281382147884181e+00,
2.353528494823881355e+00, 2.540261468229626457e+00, 2.744422672171478111e+00, 2.968232787190606619e+00,
3.214236869520657666e+00, 3.485358957907730467e+00, 3.784966983117372821e+00, 4.116950138940295100e+00,
4.485811369388231710e+00, 4.896778246562001812e+00, 5.355936290826725948e+00, 5.870389762600956907e+00,
6.448456189131117605e+00, 7.099902452679558236e+00, 7.836232253282841261e+00, 8.671037293575230635e+00,
9.620427777985990363e+00, 1.070356198876799531e+01, 1.194330008139441022e+01, 1.336701421038499647e+01,
1.500759615914396343e+01, 1.690471548203528376e+01, 1.910639668731689597e+01, 2.167100443216577994e+01,
2.466975274695099197e+01, 2.818989025157845355e+01, 3.233876132429401745e+01, 3.724900758097245740e+01,
4.308526084907741997e+01, 5.005279647654703975e+01, 5.840877607253876528e+01, 6.847692821534239862e+01,
8.066681777060714848e+01, 9.549927270200249260e+01, 1.136401195769487885e+02, 1.359451944976603209e+02,
1.635207451879744447e+02, 1.978049687912586950e+02, 2.406787535889776661e+02, 2.946170292930555023e+02,
3.628969532147125333e+02, 4.498861782715596902e+02, 5.614447353133496106e+02, 7.054892470899271429e+02,
8.927907732799964116e+02, 1.138111424979478376e+03, 1.461835991563605367e+03, 1.892332623444716186e+03,
2.469396036186133479e+03, 3.249311569298824731e+03, 4.312367113170283012e+03, 5.774094754500139661e+03,
7.802247237500851845e+03, 1.064267530975806972e+04, 1.465915383535674990e+04, 2.039528541239754835e+04,
2.867170622421556265e+04, 4.074033762183453297e+04, 5.853182310596923393e+04, 8.505689265265206640e+04,
1.250649269847856615e+05, 1.861373943166749766e+05, 2.805255777452010927e+05, 4.282782486084761748e+05,
6.626340506127657304e+05, 1.039443239650339565e+06, 1.653857426112961316e+06, 2.670315650125279161e+06,
4.377212026624358795e+06, 7.288071713698413821e+06, 1.233172993400331694e+07, 2.121557285769933699e+07,
3.713086254861535383e+07, 6.614579377352135534e+07, 1.200055291694917110e+08, 2.218629410296880690e+08,
4.182282939928687703e+08, 8.043704132493714804e+08, 1.579392989425668114e+09, 3.168122415524104635e+09,
6.496606811549861323e+09, 1.362851988356444486e+10, 2.926863897008707708e+10, 6.439798665209493735e+10,
1.452755233772903022e+11, 3.362854459389246576e+11, 7.994202785433479271e+11, 1.953264233362291960e+12,
4.909581868242554569e+12, 1.270622730765015610e+13, 3.389070986742985764e+13, 9.325084030208844833e+13,
2.649489423834534140e+14, 7.781295184094957195e+14, 2.364715052527355639e+15, 7.444138031465958255e+15,
2.430217240684749635e+16, 8.237068641534357762e+16, 2.902117050664548840e+17, 1.064157679404037013e+18,
4.066277106061960017e+18, 1.621274233630359097e+19, 6.754156830915450013e+19, 2.944056841733781919e+20,
1.344640139549107817e+21, 6.444586158944723300e+21, 3.246218667554608934e+22, 1.721234579556653533e+23,
9.622533890240474391e+23, 5.681407260417956671e+24, 3.548890779995928184e+25, 2.349506425672269562e+26,
1.651618130605205643e+27, 1.235147426493113059e+28, 9.845947239792057550e+28, 8.383130781984610418e+29,
7.639649461399172445e+30, 7.467862732233885201e+31, 7.847691482004993660e+32, 8.886032557626454704e+33,
1.086734890678302436e+35, 1.438967777036538458e+36, 2.068168865475603521e+37, 3.234885320223912385e+38,
5.521233641542628514e+39, 1.031148231194663855e+41, 2.113272035816365982e+42, 4.766724345485077520e+43,
1.186961550990218287e+45, 3.273172169205847573e+46, 1.002821226769167753e+48, 3.424933903935156479e+49,
1.308436017026428736e+51, 5.611378330048420503e+52, 2.711424806327139291e+54, 1.481771793644066442e+56,
9.194282071042778804e+57, 6.503661455875355562e+59, 5.266329986868627303e+61, 4.902662807969347359e+63,
5.270511057289557050e+65, 6.572856511670583316e+67, 9.553956030013225387e+69, 1.626491911159411616e+72,
3.259410915500951223e+74, 7.728460318113614280e+76, 2.179881996905918059e+79, 7.354484388371505915e+81,
2.984831270803957746e+84, 1.465828267813438962e+87, 8.763355972629864261e+89, 6.417909665847831130e+92,
5.794958649229893510e+95, 6.494224472311908365e+98, 9.095000156016433698e+101, 1.603058498455299102e+105,
3.582099119119320529e+108, 1.022441227139854687e+112, 3.756872185015086057e+115, 1.791363463832849159e+119,
1.117641882039472124e+123, 9.202159565546528285e+126, 1.008716474827888568e+131, 1.485546487089301805e+135,
2.966961534830566097e+139, 8.114207284664369360e+143, 3.069178087507669739e+148, 1.622223681147791473e+153, };
__constant__ double m_abscissas_double_8[391] =
{ 1.227227917054637830e-02, 3.682722894492590471e-02, 6.141337626871079991e-02, 8.605159708778207907e-02,
1.107628840017845446e-01, 1.355683934957785482e-01, 1.604894937454335489e-01, 1.855478131645089496e-01,
2.107652898670700524e-01, 2.361642222214626268e-01, 2.617673206785495261e-01, 2.875977610631342900e-01,
3.136792395249035647e-01, 3.400360293536632770e-01, 3.666930398731810193e-01, 3.936758776386451797e-01,
4.210109101746846268e-01, 4.487253325041450341e-01, 4.768472367324829462e-01, 5.054056849688209375e-01,
5.344307858825229079e-01, 5.639537752137267134e-01, 5.940071005777549000e-01, 6.246245109268716053e-01,
6.558411510586397969e-01, 6.876936615883514922e-01, 7.202202848338683401e-01, 7.534609770949572224e-01,
7.874575278460963461e-01, 8.222536864020499377e-01, 8.578952966595825808e-01, 8.944304405668593009e-01,
9.319095910247435485e-01, 9.703857749817920659e-01, 1.009914747547728584e+00, 1.050555178019083150e+00,
1.092368848786092579e+00, 1.135420868172514300e+00, 1.179779898350424466e+00, 1.225518399571142610e+00,
1.272712892062026473e+00, 1.321444237057985065e+00, 1.371797938567245953e+00, 1.423864467614384096e+00,
1.477739610861208115e+00, 1.533524845679288858e+00, 1.591327743938355098e+00, 1.651262406984310076e+00,
1.713449934511288211e+00, 1.778018930286256858e+00, 1.845106047964720870e+00, 1.914856580544951899e+00,
1.987425097349017093e+00, 2.062976132795275283e+00, 2.141684931642916785e+00, 2.223738255848994521e+00,
2.309335258687213796e+00, 2.398688432341103821e+00, 2.492024635808356095e+00, 2.589586210645122756e+00,
2.691632192846832444e+00, 2.798439630014497291e+00, 2.910305013902562652e+00, 3.027545839497364963e+00,
3.150502302946919722e+00, 3.279539151967394330e+00, 3.415047703805410611e+00, 3.557448047456550733e+00,
3.707191448649779817e+00, 3.864762978128342125e+00, 4.030684386016531344e+00, 4.205517247588613835e+00,
4.389866408585172458e+00, 4.584383761391930748e+00, 4.789772386950687695e+00, 5.006791101261363264e+00,
5.236259449815274050e+00, 5.479063198337523150e+00, 5.736160373884817415e+00, 6.008587916728619858e+00,
6.297469010648863048e+00, 6.604021167380929133e+00, 6.929565150124677837e+00, 7.275534831383860972e+00,
7.643488092123492064e+00, 8.035118882502459288e+00, 8.452270579478188130e+00, 8.896950793641785313e+00,
9.371347797016395173e+00, 9.877848765573446033e+00, 1.041906005527762037e+01, 1.099782975900831706e+01,
1.161727282423952258e+01, 1.228079904848924611e+01, 1.299214431196691048e+01, 1.375540545535625881e+01,
1.457507926620621316e+01, 1.545610610104852468e+01, 1.640391874338302925e+01, 1.742449718154208970e+01,
1.852443008688437526e+01, 1.971098388378266494e+01, 2.099218043080961648e+01, 2.237688448013982946e+01,
2.387490225270073820e+01, 2.549709266380430464e+01, 2.725549296232531555e+01, 2.916346081119624987e+01,
3.123583514423284962e+01, 3.348911849136805118e+01, 3.594168387985465099e+01, 3.861400990307230737e+01,
4.152894811329303023e+01, 4.471202755441533396e+01, 4.819180202224910174e+01, 5.200024654361558757e+01,
5.617321062537384494e+01, 6.075093706918782079e+01, 6.577865661168003966e+01, 7.130727037357721343e+01,
7.739413413465805794e+01, 8.410396085269633392e+01, 9.150986068496734448e+01, 9.969454113547704016e+01,
1.087516939426018897e+02, 1.187876000643037532e+02, 1.299229897614516371e+02, 1.422952015056372537e+02,
1.560606914665002671e+02, 1.713979549326432406e+02, 1.885109325154830073e+02, 2.076329877740125935e+02,
2.290315594654587370e+02, 2.530136115655676467e+02, 2.799320282398896912e+02, 3.101931299766730890e+02,
3.442655222107529892e+02, 3.826905303289378387e+02, 4.260945266207607701e+02, 4.752035175892902045e+02,
5.308604366239058864e+02, 5.940456805372995009e+02, 6.659015428338778262e+02, 7.477613367309153870e+02,
8.411841730471343023e+02, 9.479965698013741524e+02, 1.070342331375881840e+03, 1.210742457518582660e+03,
1.372167241552205820e+03, 1.558123212187692722e+03, 1.772758188662716282e+03, 2.020988485411862984e+03,
2.308653259329163157e+03, 2.642702189813684273e+03, 3.031424182869210212e+03, 3.484726676985756018e+03,
4.014477504733973505e+03, 4.634924264049394751e+03, 5.363209949773439749e+03, 6.220008412114342803e+03,
7.230309332853029956e+03, 8.424390216735217783e+03, 9.839022871538541787e+03, 1.151897463083113988e+04,
1.351888098874374202e+04, 1.590558745460066947e+04, 1.876108572764816176e+04, 2.218620462393366275e+04,
2.630526205054915357e+04, 3.127194401941711057e+04, 3.727675461256652923e+04, 4.455648280312273249e+04,
5.340626592018903930e+04, 6.419500580388918123e+04, 7.738512642386820060e+04, 9.355796993981725963e+04,
1.134465375820669470e+05, 1.379778272209741713e+05, 1.683277485807887053e+05, 2.059925746120735305e+05,
2.528822024503158254e+05, 3.114422718347725915e+05, 3.848145913435570736e+05, 4.770485864966822643e+05,
5.933809324724740854e+05, 7.406066190351666115e+05, 9.275730471470643372e+05, 1.165840260940180415e+06,
1.470566322118246135e+06, 1.861698899014921971e+06, 2.365584870298354495e+06, 3.017152695505764877e+06,
3.862882573599929249e+06, 4.964864305589750358e+06, 6.406362829959736606e+06, 8.299481847261302115e+06,
1.079575892642401854e+07, 1.410087327474604091e+07, 1.849514724418250100e+07, 2.436224419670805500e+07,
3.222951131863941234e+07, 4.282493882385925337e+07, 5.715793394339267637e+07, 7.663437932745451635e+07,
1.032212725498489699e+08, 1.396833991976194842e+08, 1.899251497664892740e+08, 2.594865396467505851e+08,
3.562664742464501497e+08, 4.915825413172413471e+08, 6.817316470116958142e+08, 9.502998105202541438e+08,
1.331598295343277538e+09, 1.875801976010459831e+09, 2.656673907709731487e+09, 3.783240215616365909e+09,
5.417531848500136979e+09, 7.801695369892847510e+09, 1.129965368955098833e+10, 1.646149161390821924e+10,
2.412353995736687694e+10, 3.556486895431927094e+10, 5.275345014093760519e+10, 7.873572108325378177e+10,
1.182569020317863604e+11, 1.787549442508363461e+11, 2.719633064979986142e+11, 4.165122153119897946e+11,
6.421781858205134197e+11, 9.968725497576275918e+11, 1.558212327122960399e+12, 2.452809984907093786e+12,
3.888656232828140210e+12, 6.209868990509424909e+12, 9.989924216297983665e+12, 1.619158001378611351e+13,
2.644324518669926559e+13, 4.352018847904374786e+13, 7.218884688202741709e+13, 1.206997640727349538e+14,
2.034483722445207402e+14, 3.457553102874402920e+14, 5.925248511957505706e+14, 1.024057793713038672e+15,
1.785174045941642162e+15, 3.139306988668494696e+15, 5.569856270174890128e+15, 9.971763353834460328e+15,
1.801687491114883092e+16, 3.285709858322565542e+16, 6.049018540910759710e+16, 1.124375283211369572e+17,
2.110445125952435305e+17, 4.000737007891229992e+17, 7.660849361564329309e+17, 1.482018770996176700e+18,
2.896945433910857945e+18, 5.722790165693470493e+18, 1.142689960439921462e+19, 2.306616559984106723e+19,
4.707857184616093863e+19, 9.717346347495342813e+19, 2.028735605622585444e+20, 4.284840254171000581e+20,
9.157027329021623836e+20, 1.980457834766411777e+21, 4.335604886702252004e+21, 9.609258559714223995e+21,
2.156604630608586997e+22, 4.902045909695270289e+22, 1.128749227121328467e+23, 2.633414623049930879e+23,
6.226335684490998543e+23, 1.492205279014148921e+24, 3.625768249717590109e+24, 8.933899764961444882e+24,
2.232786981682262383e+25, 5.661295336293986732e+25, 1.456616710298133142e+26, 3.803959852868488245e+26,
1.008531585603036490e+27, 2.715247425129423358e+27, 7.425071766766651967e+27, 2.062860712173225003e+28,
5.824055458799413312e+28, 1.671388836696436644e+29, 4.876830632023956392e+29, 1.447170071146107156e+30,
4.368562208925583783e+30, 1.341873806249251338e+31, 4.195251632754338682e+31, 1.335360134828214136e+32,
4.328681350715136340e+32, 1.429401866150319186e+33, 4.809736146227180696e+33, 1.649624114567602575e+34,
5.768677492419801469e+34, 2.057442854162761350e+35, 7.486423509917811063e+35, 2.780052791791155051e+36,
1.053908347660081874e+37, 4.080046334235754223e+37, 1.613553311592805373e+38, 6.520836332997615098e+38,
2.693848186257510992e+39, 1.138002408430710800e+40, 4.917748008813924613e+40, 2.174691073191358676e+41,
9.844523745430526502e+41, 4.563707467590116732e+42, 2.167352073708379137e+43, 1.054860193887170754e+44,
5.263588225566847365e+44, 2.693772458797916623e+45, 1.414506760560163074e+46, 7.624126763512016620e+46,
4.219828148762794411e+47, 2.399387665831793264e+48, 1.402139947254117434e+49, 8.424706325525422943e+49,
5.206918479942619318e+50, 3.311787866477716151e+51, 2.168683295509859155e+52, 1.462786368779206713e+53,
1.016761784575838363e+54, 7.286460995145043184e+54, 5.386194237448865407e+55, 4.108917480528740640e+56,
3.236445625945552728e+57, 2.633440652417619669e+58, 2.214702339357939268e+59, 1.926058995948268392e+60,
1.733067740414174932e+61, 1.614307160124426969e+62, 1.557464328486352138e+63, 1.557226155197192031e+64,
1.614473962707995344e+65, 1.736617406327386105e+66, 1.939201243451190521e+67, 2.249277732936622876e+68,
2.711593798719765599e+69, 3.399628732048687119e+70, 4.435389696730206291e+71, 6.025566076164003981e+72,
8.529161425383779849e+73, 1.258746322992988688e+75, 1.938112175186560210e+76, 3.115432363572610661e+77,
5.231797674434390018e+78, 9.184930207860680757e+79, 1.686929404780378772e+81, 3.243565624474232635e+82,
6.533812498930220075e+83, 1.379898823144620314e+85, 3.057650444842839916e+86, 7.114050545839171245e+87,
1.739275024442258674e+89, 4.471782915853177804e+90, 1.210036789494028144e+92, 3.448828044590862359e+93,
1.036226783750561565e+95, 3.284801914751206038e+96, 1.099514933602224638e+98, 3.889581731378242597e+99,
1.455434287901069991e+101, 5.765729934387419019e+102, 2.420349568745475582e+104, 1.077606625929777536e+106,
5.093346988695851845e+107, 2.558090824110323997e+109, 1.366512508719047964e+111, 7.771735800763526406e+112,
4.710398638793014918e+114, 3.045563885587013954e+116, 2.102762552861442993e+118, 1.551937536212596136e+120,
1.225676354426075970e+122, 1.036950946169703711e+124, 9.407885268970827717e+125, 9.163369107785093171e+127,
9.592531095671168926e+129, 1.080486293361823875e+132, 1.311034829557782450e+134, 1.715642975932639188e+136,
2.424231742707881878e+138, 3.703231223333127919e+140, 6.123225027409988902e+142, 1.097271040771196765e+145,
2.133693643241295977e+147, 4.508099184895777328e+149, 1.036252806686291189e+152, };
__constant__ double* m_abscissas_double[8] = {
m_abscissas_double_1,
m_abscissas_double_2,
m_abscissas_double_3,
m_abscissas_double_4,
m_abscissas_double_5,
m_abscissas_double_6,
m_abscissas_double_7,
m_abscissas_double_8,
};
__constant__ double m_weights_double_1[6] =
{ 7.868241604839621507e+00, 8.805163880733011116e+02, 5.396278323520705668e+07, 8.876511896968161317e+19,
2.432791879269225553e+52, 6.399713512080202911e+139, };
__constant__ double m_weights_double_2[6] =
{ 2.398524276302635218e+00, 5.244596423726681022e+01, 6.457887819598201760e+04, 2.509985242511374506e+12,
1.774029269327138701e+32, 2.781406115983097314e+85, };
__constant__ double m_weights_double_3[12] =
{ 1.749369583108386852e+00, 3.979658981934607813e+00, 1.848514598574449570e+01, 1.864880718932067988e+02,
5.974205695263265855e+03, 1.270412635144623341e+06, 6.164193014295984071e+09, 5.230850031811222530e+15,
2.226260929943369774e+25, 1.199931102042181592e+41, 7.470602144275146214e+66, 1.814465860528410676e+109, };
__constant__ double m_weights_double_4[24] =
{ 1.613859062188366173e+00, 1.997767291869673262e+00, 3.020231979908834220e+00, 5.477641843859057761e+00,
1.179660916492671672e+01, 3.035504848518598294e+01, 9.584421793794920860e+01, 3.893870238229992076e+02,
2.179193250357911344e+03, 1.839208123964132852e+04, 2.632120612599856167e+05, 7.427296507169468210e+06,
5.015875648341232356e+08, 1.039610867241544113e+11, 9.100328911818091977e+13, 5.068651163890231571e+17,
3.039966520714902616e+22, 3.857740194672007962e+28, 2.465542763666581087e+36, 2.416439449167799461e+46,
1.517091553926604149e+59, 3.825043412021411380e+75, 4.089582396821598640e+96, 3.823775894295564050e+123, };
__constant__ double m_weights_double_5[49] =
{ 1.581465959536694744e+00, 1.669149910438534746e+00, 1.857523188595005770e+00, 2.175662623626994120e+00,
2.675901375211020564e+00, 3.447738682498791744e+00, 4.643946540355464126e+00, 6.530204496574248616e+00,
9.582285015566804961e+00, 1.468361407515440960e+01, 2.354449548740987533e+01, 3.963527273305166705e+01,
7.037635206267538547e+01, 1.325880124784838868e+02, 2.669625649541569172e+02, 5.793749198508472676e+02,
1.368691928321303605e+03, 3.559435721533130554e+03, 1.032186677270763318e+04, 3.386621302858741487e+04,
1.278166259840246830e+05, 5.654082513926693098e+05, 2.994462044781721833e+06, 1.944975023421914947e+07,
1.592193007690560588e+08, 1.694288818617459913e+09, 2.427156182311303271e+10, 4.870317848199455490e+11,
1.431819656229181793e+13, 6.489471523099301256e+14, 4.803757752508989106e+16, 6.200096361305331541e+18,
1.502568562439914899e+21, 7.436061367189688251e+23, 8.264761218677928603e+26, 2.297735027897804345e+30,
1.805449779569534997e+34, 4.604472360199061931e+38, 4.458371212030626854e+43, 1.957638261114809309e+49,
4.767368137162500764e+55, 8.088820139476721285e+62, 1.238260897349286357e+71, 2.292272505278842062e+80,
7.151392373749193549e+90, 5.476714850156044431e+102, 1.576655618370700681e+116, 2.765448595957851958e+131,
5.108051255283132673e+148, };
__constant__ double m_weights_double_6[98] =
{ 1.573457773573108386e+00, 1.594892755038663787e+00, 1.638536515530234742e+00, 1.705980408212213620e+00,
1.799724394608737275e+00, 1.923322854425656307e+00, 2.081597373313268178e+00, 2.280934883790070511e+00,
2.529697852387704655e+00, 2.838784782552951185e+00, 3.222395745020980612e+00, 3.699081358854235112e+00,
4.293188274330526800e+00, 5.036865356322330076e+00, 5.972871140910932199e+00, 7.158538424311077564e+00,
8.671427800892076385e+00, 1.061747360297922326e+01, 1.314285002260235600e+01, 1.645145625668428040e+01,
2.083099449998189069e+01, 2.669235989791640190e+01, 3.462993514791378189e+01, 4.551518362653662579e+01,
6.064408087764392116e+01, 8.197296917485846798e+01, 1.125020468081652564e+02, 1.569096552844714123e+02,
2.226204347868638276e+02, 3.216385489504077755e+02, 4.737574505945461739e+02, 7.122994548146997637e+02,
1.094609652686376553e+03, 1.721697789176049576e+03, 2.775924909253835146e+03, 4.595230066268149347e+03,
7.823427586641573672e+03, 1.372357435269105405e+04, 2.485188961645119553e+04, 4.655538745425972783e+04,
9.041766782135686884e+04, 1.824843964862728392e+05, 3.836800264094614027e+05, 8.426271970245168026e+05,
1.938432574158782634e+06, 4.685112849356485528e+06, 1.193528667218607927e+07, 3.215643752247989316e+07,
9.196008928386600386e+07, 2.802223178457559964e+08, 9.136110825267458886e+08, 3.200910900783148591e+09,
1.210765264234723689e+10, 4.969024745093101808e+10, 2.224315751863855216e+11, 1.092125344449313660e+12,
5.916882980019919359e+12, 3.559743438494577249e+13, 2.394353652945465191e+14, 1.813551073517501917e+15,
1.558736706166165738e+16, 1.532714875555114333e+17, 1.739274776190789212e+18, 2.298841216802216313e+19,
3.574030698837762664e+20, 6.604899705451419080e+21, 1.467155879591820659e+23, 3.964094964398509381e+24,
1.319342840595348793e+26, 5.482251971340400742e+27, 2.885137894723827518e+29, 1.952539840765392110e+31,
1.727051489032222797e+33, 2.031343507095439396e+35, 3.236074146972599980e+37, 7.120487412983497200e+39,
2.209552707411017265e+42, 9.886282647791384648e+44, 6.530514048788273529e+47, 6.530706672481546528e+50,
1.015518807431281951e+54, 2.526366773162394510e+57, 1.036450519906790297e+61, 7.241966032627135861e+64,
8.919402520769714938e+68, 2.008463619152992905e+73, 8.596914764830260020e+77, 7.290599546829495220e+82,
1.280199563216419112e+88, 4.878349285603201150e+93, 4.240828248064127940e+99, 8.869771764721598720e+105,
4.723342575741417669e+112, 6.802035963326188581e+119, 2.824531180990009549e+127, 3.621049216745982252e+135,
1.541270150334942520e+144, 2.353376995174362785e+153, };
__constant__ double m_weights_double_7[196] =
{ 1.571461316550783294e+00, 1.576790166316938345e+00, 1.587495640370383316e+00, 1.603673956341370210e+00,
1.625471125457493943e+00, 1.653085011915939302e+00, 1.686768142525911236e+00, 1.726831323537516202e+00,
1.773648138667236602e+00, 1.827660421478661448e+00, 1.889384817044018196e+00, 1.959420572855037091e+00,
2.038458728047908923e+00, 2.127292904083847225e+00, 2.226831940199076941e+00, 2.338114664555130296e+00,
2.462327148722991304e+00, 2.600822860927085164e+00, 2.755146214814554359e+00, 2.927060108424483555e+00,
3.118578166240921951e+00, 3.332002540339506630e+00, 3.569968300410740276e+00, 3.835495653996447262e+00,
4.132051496512934885e+00, 4.463622106699067881e+00, 4.834799191008006557e+00, 5.250881957765679608e+00,
5.717998490875333124e+00, 6.243250421598568105e+00, 6.834885801226541839e+00, 7.502506202789340802e+00,
8.257315484493544201e+00, 9.112419405864642634e+00, 1.008318749543997758e+01, 1.118769134993865202e+01,
1.244723705914106881e+01, 1.388701390605507587e+01, 1.553688715915900190e+01, 1.743237000680942831e+01,
1.961581894823993424e+01, 2.213790886354273806e+01, 2.505945934677137610e+01, 2.845370377742137561e+01,
3.240911845969524834e+01, 3.703296289480230161e+01, 4.245572644746267911e+01, 4.883673480337985582e+01,
5.637124640586975420e+01, 6.529947092752610340e+01, 7.591807755694122837e+01, 8.859494252391663822e+01,
1.037881295005788124e+02, 1.220704263969226746e+02, 1.441612098131200535e+02, 1.709680191245773511e+02,
2.036410593843575570e+02, 2.436450058708723643e+02, 2.928540812182076105e+02, 3.536786019152253392e+02,
4.292343083967296939e+02, 5.235701840488733027e+02, 6.419766898003024575e+02, 7.914052083668759283e+02,
9.810422089081931637e+02, 1.223099994999740393e+03, 1.533912555427112127e+03, 1.935464013605830339e+03,
2.457534549912886852e+03, 3.140733731623635519e+03, 4.040818188564651898e+03, 5.234881599712225681e+03,
6.830294457607329226e+03, 8.977713228649887143e+03, 1.189015920967326839e+04, 1.587122387044346962e+04,
2.135711106445789331e+04, 2.897983705189681437e+04, 3.966306726795547950e+04, 5.476875193750000787e+04,
7.632356539388055680e+04, 1.073719149754976951e+05, 1.525316674555574152e+05, 2.188778434744216586e+05,
3.173624496019295608e+05, 4.651201525869328462e+05, 6.892537656280580572e+05, 1.033119885120019982e+06,
1.566887981043252499e+06, 2.405492027026531795e+06, 3.739528964815910340e+06, 5.889121154895580032e+06,
9.399046351922342030e+06, 1.520903276129653518e+07, 2.496287187293576168e+07, 4.157759259963074840e+07,
7.030705366950267312e+07, 1.207598558452493366e+08, 2.107882509464846833e+08, 3.741047199023457864e+08,
6.754494594987415572e+08, 1.241316740415880537e+09, 2.323310032649552862e+09, 4.431176019026625759e+09,
8.617446487400900130e+09, 1.709836906604031513e+10, 3.463574521880171339e+10, 7.167607123799270726e+10,
1.516347620910054079e+11, 3.281729323238950526e+11, 7.271102600298280790e+11, 1.650499552378780378e+12,
3.841338149508803917e+12, 9.173744267785176575e+12, 2.249901946357519979e+13, 5.671535089900611731e+13,
1.470742250307697019e+14, 3.927012518464311775e+14, 1.080639977391212820e+15, 3.067671466720475189e+15,
8.992386789198328428e+15, 2.724722536524592111e+16, 8.542946122263389258e+16, 2.774613718725574755e+17,
9.345299479382029121e+17, 3.267996122987731882e+18, 1.187914433455468315e+19, 4.494053408418564214e+19,
1.771706652195486743e+20, 7.288102552885931527e+20, 3.132512430816625349e+21, 1.408743767951073110e+22,
6.638294268236060414e+22, 3.282543608403565013e+23, 1.705920098038394064e+24, 9.332259385148524285e+24,
5.382727175874888312e+25, 3.278954235122093249e+26, 2.113191697957458099e+27, 1.443411041499643040e+28,
1.046864394654982423e+29, 8.077319226958905700e+29, 6.643146963432616277e+30, 5.835670121359986260e+31,
5.486890296790230798e+32, 5.533726968508261614e+33, 5.999734996418352834e+34, 7.009176119466122569e+35,
8.844061966424597499e+36, 1.208226860869605961e+38, 1.791648514311063338e+39, 2.891313916713205762e+40,
5.091457860211527298e+41, 9.810630588402496553e+42, 2.074441239147378860e+44, 4.827650116937700540e+45,
1.240287939111549029e+47, 3.528782858644784616e+48, 1.115449490471696659e+50, 3.930510643328196314e+51,
1.549243712957852337e+53, 6.854998238041301002e+54, 3.417479961583207704e+56, 1.926905498641079990e+58,
1.233580963004919450e+60, 9.002819902898076915e+61, 7.521415141253441645e+63, 7.224277554900578993e+65,
8.012832830535078610e+67, 1.030999620286380369e+70, 1.546174957076748679e+72, 2.715803772613248694e+74,
5.615089920571746438e+76, 1.373667859345343337e+79, 3.997541020769625126e+81, 1.391500589339800087e+84,
5.826693844912022892e+86, 2.952274820929549096e+89, 1.821023061478466282e+92, 1.375973022137941526e+95,
1.281852367543412945e+98, 1.482130127201990503e+101, 2.141574273792435314e+104, 3.894495540947112380e+107,
8.978646362580102961e+110, 2.644131589807244050e+114, 1.002403539841913834e+118, 4.931412804903905259e+121,
3.174401112435865044e+125, 2.696624001761892390e+129, 3.049799322320447166e+133, 4.634041526818687785e+137,
9.548983134803106512e+141, 2.694404866192089829e+146, 1.051502720036395325e+151, 5.734170640626244955e+155, };
__constant__ double m_weights_double_8[391] =
{ 1.570962550997832611e+00, 1.572292902367211961e+00, 1.574956581912666755e+00, 1.578959553636163985e+00,
1.584310789563614305e+00, 1.591022301117035107e+00, 1.599109181186160337e+00, 1.608589657109067468e+00,
1.619485154826419743e+00, 1.631820374530739318e+00, 1.645623378191125679e+00, 1.660925689395424109e+00,
1.677762406016463717e+00, 1.696172326277082973e+00, 1.716198088860732467e+00, 1.737886327791014562e+00,
1.761287842885152410e+00, 1.786457786673686420e+00, 1.813455868772335587e+00, 1.842346578792652542e+00,
1.873199428986627521e+00, 1.906089217937612619e+00, 1.941096316736779451e+00, 1.978306979221816566e+00,
2.017813678003844337e+00, 2.059715468170813895e+00, 2.104118380732327493e+00, 2.151135848063375554e+00,
2.200889163814591418e+00, 2.253507979986114202e+00, 2.309130844113053375e+00, 2.367905779785113334e+00,
2.429990914023652954e+00, 2.495555155369085590e+00, 2.564778926893134514e+00, 2.637854958747451684e+00,
2.714989145296268067e+00, 2.796401472360280536e+00, 2.882327020626578700e+00, 2.973017051860293803e+00,
3.068740185193628238e+00, 3.169783671473487386e+00, 3.276454774427328601e+00, 3.389082268266156098e+00,
3.508018062292869136e+00, 3.633638964133530274e+00, 3.766348594369884204e+00, 3.906579466636309289e+00,
4.054795248667541120e+00, 4.211493221360917802e+00, 4.377206954666462219e+00, 4.552509221059946388e+00,
4.738015169510782826e+00, 4.934385785253587887e+00, 5.142331663338191074e+00, 5.362617126899976224e+00,
5.596064724397100194e+00, 5.843560143744373307e+00, 6.106057585381734693e+00, 6.384585640900671436e+00,
6.680253728973824449e+00, 6.994259146058412709e+00, 7.327894795748901060e+00, 7.682557667824588764e+00,
8.059758146071137270e+00, 8.461130232962342889e+00, 8.888442789395671080e+00, 9.343611899025485155e+00,
9.828714479494622022e+00, 1.034600327721380625e+01, 1.089792339849122916e+01, 1.148713054801325790e+01,
1.211651116619788555e+01, 1.278920468010096321e+01, 1.350862810871281096e+01, 1.427850329305334421e+01,
1.510288705493181327e+01, 1.598620462612703196e+01, 1.693328673269081128e+01, 1.794941076780000506e+01,
1.904034654190823159e+01, 2.021240716182964334e+01, 2.147250566192247370e+01, 2.282821809199713505e+01,
2.428785385941680425e+01, 2.586053422878117785e+01, 2.755628000354674426e+01, 2.938610955221109564e+01,
3.136214849990951329e+01, 3.349775258749912582e+01, 3.580764540799625468e+01, 3.830807296872530167e+01,
4.101697730155473447e+01, 4.395419165876113623e+01, 4.714166019494196927e+01, 5.060368545366659226e+01,
5.436720746019445252e+01, 5.846211877912138439e+01, 6.292162054058128784e+01, 6.778262518512416663e+01,
7.308621254265223015e+01, 7.887814686488147292e+01, 8.520946359734658334e+01, 9.213713603387774717e+01,
9.972483357670754649e+01, 1.080437851679046426e+02, 1.171737636088621692e+02, 1.272042089988687372e+02,
1.382355124664102373e+02, 1.503804848151483311e+02, 1.637660387526102742e+02, 1.785351181233383403e+02,
1.948489131607280604e+02, 2.128894073598352670e+02, 2.328623093447990790e+02, 2.550004322843281994e+02,
2.795675942672445782e+02, 3.068631259124280934e+02, 3.372270867451200874e+02, 3.710463099965576255e+02,
4.087614170466174911e+02, 4.508749684194593670e+02, 4.979609488959773491e+02, 5.506758209385785877e+02,
6.097714244663179092e+02, 6.761100535726473685e+02, 7.506821038741422446e+02, 8.346267600518081192e+02,
9.292562845315541998e+02, 1.036084578498234728e+03, 1.156860819661897657e+03, 1.293609142453808600e+03,
1.448675521854205144e+03, 1.624783259532197615e+03, 1.825098759915318560e+03, 2.053309635972617554e+03,
2.313717614494777200e+03, 2.611349236640186999e+03, 2.952087994093624299e+03, 3.342832332560548180e+03,
3.791684927756595099e+03, 4.308179838716318955e+03, 4.903555624570201673e+03, 5.591084343634811452e+03,
6.386468625571246341e+03, 7.308321829412979440e+03, 8.378749812799703561e+03, 9.624057218749638059e+03,
1.107560666191146008e+04, 1.277086605445904388e+04, 1.475468792019489452e+04, 1.708087537417066343e+04,
1.981410309695485051e+04, 2.303227888204754908e+04, 2.682945317928632535e+04, 3.131941178398428200e+04,
3.664012209706997997e+04, 4.295924836668690170e+04, 5.048100882639843572e+04, 5.945472133180055290e+04,
7.018547875172689579e+04, 8.304751726175694003e+04, 9.850099805053575446e+04, 1.171131266261766060e+05,
1.395847982160589845e+05, 1.667843016393077556e+05, 1.997900626520524686e+05, 2.399449946032992187e+05,
2.889257939838013232e+05, 3.488315309194304548e+05, 4.222972201496778447e+05, 5.126398246369253619e+05,
6.240464876221989792e+05, 7.618179073233615941e+05, 9.326839300224119257e+05, 1.145214007774297539e+06,
1.410352646274233119e+06, 1.742120041875863385e+06, 2.158531716934287014e+06, 2.682809410126426731e+06,
3.344980563595418861e+06, 4.183997972337706048e+06, 5.250558008165501752e+06, 6.610860174141680988e+06,
8.351639423967558693e+06, 1.058692532393929900e+07, 1.346715235106239409e+07, 1.719148271024263021e+07,
2.202453449027701694e+07, 2.831917301724337797e+07, 3.654767820268344932e+07, 4.734452657230626106e+07,
6.156534063509513873e+07, 8.036843026897869248e+07, 1.053280284359690289e+08, 1.385921689084126286e+08,
1.831036985925683524e+08, 2.429109457458640820e+08, 3.236062393759667463e+08, 4.329475218599986663e+08,
5.817432967962929479e+08, 7.851179789388191786e+08, 1.064329197627075307e+09, 1.449389582912945485e+09,
1.982866469377991849e+09, 2.725414314698094324e+09, 3.763867964111621444e+09, 5.223138814950990937e+09,
7.283785810644397704e+09, 1.020809642381158743e+10, 1.437899318470510521e+10, 2.035836812543633578e+10,
2.897499827080027444e+10, 4.145773751645494878e+10, 5.963837683872426287e+10, 8.626228483915530800e+10,
1.254667045389825180e+11, 1.835212982264913186e+11, 2.699812207400151604e+11, 3.994928452151922954e+11,
5.946380558701434550e+11, 8.904409967424091107e+11, 1.341551941677775838e+12, 2.033768550332151892e+12,
3.102627959875753214e+12, 4.763598321705862063e+12, 7.361420360560813584e+12, 1.145126961456557423e+13,
1.793314186996273926e+13, 2.827585501285792232e+13, 4.489297053678444669e+13, 7.177802872658499571e+13,
1.155855098545820625e+14, 1.874833886367883093e+14, 3.063510356402174454e+14, 5.043400653005970242e+14,
8.366163396892429890e+14, 1.398556351640947289e+15, 2.356335749516164682e+15, 4.001765167382637456e+15,
6.851375128404941445e+15, 1.182690111761543990e+16, 2.058673527013806443e+16, 3.613968784314904633e+16,
6.399112184394213551e+16, 1.143016185628376923e+17, 2.059881383915666443e+17, 3.745846788353680914e+17,
6.874443034683149068e+17, 1.273407643613485314e+18, 2.381241916829895366e+18, 4.495835617307108399e+18,
8.571442024901952701e+18, 1.650443584181656965e+19, 3.210100352421317851e+19, 6.307780124442703091e+19,
1.252404031157661279e+20, 2.513005295649985394e+20, 5.096776255690838436e+20, 1.045019200016673046e+21,
2.166476479260878466e+21, 4.542138145678395463e+21, 9.632082324449137128e+21, 2.066386536688254528e+22,
4.485529785554428251e+22, 9.853879573610977508e+22, 2.191158874464374408e+23, 4.932835964390971668e+23,
1.124501529971774363e+24, 2.596269136156756008e+24, 6.072292938313625501e+24, 1.438989066308003836e+25,
3.455841956406570469e+25, 8.412655191713576490e+25, 2.076289061650816510e+26, 5.196515024640220322e+26,
1.319173194089644043e+27, 3.397455895980380794e+27, 8.879057454438503591e+27, 2.355272361492064126e+28,
6.342762007722624824e+28, 1.734531093990859705e+29, 4.817893170606830871e+29, 1.359597346490148232e+30,
3.898969689906500392e+30, 1.136542986529989936e+31, 3.368450043991780017e+31, 1.015304084709817260e+32,
3.113144376221918237e+32, 9.713072739730140403e+32, 3.084517643581725946e+33, 9.972682139820497284e+33,
3.283625052288491586e+34, 1.101378785390827536e+35, 3.764333367592714297e+35, 1.311403465938242926e+36,
4.658135710682813672e+36, 1.687517347470511392e+37, 6.237053685018323490e+37, 2.352571314427744869e+38,
9.058938240219699936e+38, 3.562249097611136071e+39, 1.430959291578558210e+40, 5.873974584984375049e+40,
2.464828549811283787e+41, 1.057649203090855628e+42, 4.642475639281078035e+42, 2.085287118272421779e+43,
9.588439985186632177e+43, 4.514982011246092280e+44, 2.177974048341973204e+45, 1.076720976822900458e+46,
5.457267432929085589e+46, 2.836869270455781134e+47, 1.513103201392011626e+48, 8.283974667225617075e+48,
4.657239491995971344e+49, 2.689796370712836937e+50, 1.596597846911970388e+51, 9.744154538256586629e+51,
6.117238394843313065e+52, 3.952049650585241827e+53, 2.628701592074258213e+54, 1.800990196502679393e+55,
1.271554462563068383e+56, 9.255880104477760711e+56, 6.949737920133919393e+57, 5.385167200769965621e+58,
4.308493668102978774e+59, 3.560951557542178371e+60, 3.041888528384649992e+61, 2.687094441930837189e+62,
2.455920538900000855e+63, 2.323648254168641537e+64, 2.277129741584892331e+65, 2.312633552913224734e+66,
2.435407592981291129e+67, 2.660910388822465246e+68, 3.018105943423533920e+69, 3.555823489510192503e+70,
4.354188877793849013e+71, 5.544975795511813315e+72, 7.348276481909886336e+73, 1.013998025722423261e+75,
1.457911462244607943e+76, 2.185488876819505295e+77, 3.418022153286623008e+78, 5.580843920601835728e+79,
9.519586502799733908e+80, 1.697573578247197786e+82, 3.166906670990180014e+83, 6.185099106418675430e+84,
1.265541134386934377e+86, 2.714828965877756899e+87, 6.110386802964494082e+88, 1.444054086171083239e+90,
3.586083726638388165e+91, 9.365231868063239600e+92, 2.574080116205122449e+94, 7.452134689862302719e+95,
2.274309903836169819e+97, 7.323011134121164749e+98, 2.489816421737932462e+100, 8.946533386359281588e+101,
3.400401372391165979e+103, 1.368288186208928217e+105, 5.834277489829591931e+106, 2.638486937672383424e+108,
1.266728882767139521e+110, 6.462225178314182803e+111, 3.506432320607573604e+113, 2.025608933943268165e+115,
1.247041677084784707e+117, 8.189865188405279038e+118, 5.743610894406099965e+120, 4.305808934084489763e+122,
3.454156966079496755e+124, 2.968316601530352737e+126, 2.735456242372183592e+128, 2.706317176690077847e+130,
2.877679916342060385e+132, 3.292412878268106390e+134, 4.057840961953725969e+136, 5.393783049105737324e+138,
7.741523901672235406e+140, 1.201209962310668456e+143, 2.017456079556807301e+145, 3.672176623483062526e+147,
7.253163798058577630e+149, 1.556591535302570570e+152, 3.634399832790394885e+154, };
__constant__ double* m_weights_double[8] = {
m_weights_double_1,
m_weights_double_2,
m_weights_double_3,
m_weights_double_4,
m_weights_double_5,
m_weights_double_6,
m_weights_double_7,
m_weights_double_8
};
__constant__ boost::math::size_t float_coefficients_size[8] = {4, 4, 8, 16, 32, 65, 129, 259};
__constant__ boost::math::size_t double_coefficients_size[8] = {6, 6, 12, 24, 49, 98, 196, 391};
template<typename T>
struct coefficients_selector;
template<>
struct coefficients_selector<float>
{
__device__ static const auto abscissas() { return m_abscissas_float; }
__device__ static const auto weights() { return m_weights_float; }
__device__ static const auto size() { return float_coefficients_size; }
};
template<>
struct coefficients_selector<double>
{
__device__ static const auto abscissas() { return m_abscissas_double; }
__device__ static const auto weights() { return m_weights_double; }
__device__ static const auto size() { return double_coefficients_size; }
};
template <class F, class Real, class Policy = boost::math::policies::policy<> >
__device__ auto sinh_sinh_integrate_impl(const F& f, Real tolerance, Real* error, Real* L1, boost::math::size_t* levels)
{
BOOST_MATH_STD_USING
using boost::math::constants::half;
using boost::math::constants::half_pi;
using boost::math::size_t;
constexpr auto function = "boost::math::quadrature::sinh_sinh<%1%>::integrate";
using K = decltype(f(static_cast<Real>(0)));
static_assert(!boost::math::is_integral<K>::value,
"The return type cannot be integral, it must be either a real or complex floating point type.");
K y_max = f(boost::math::tools::max_value<Real>());
if(abs(y_max) > boost::math::tools::epsilon<Real>())
{
return static_cast<K>(policies::raise_domain_error(function,
"The function you are trying to integrate does not go to zero at infinity, and instead evaluates to %1%", y_max, Policy()));
}
K y_min = f(-boost::math::tools::max_value<Real>());
if(abs(y_min) > boost::math::tools::epsilon<Real>())
{
return static_cast<K>(policies::raise_domain_error(function,
"The function you are trying to integrate does not go to zero at -infinity, and instead evaluates to %1%", y_max, Policy()));
}
// Get the party started with two estimates of the integral:
const auto m_abscissas = coefficients_selector<Real>::abscissas();
const auto m_weights = coefficients_selector<Real>::weights();
const auto m_size = coefficients_selector<Real>::size();
K I0 = f(0)*half_pi<Real>();
Real L1_I0 = abs(I0);
for(size_t i = 0; i < m_size[0]; ++i)
{
Real x = m_abscissas[0][i];
K yp = f(x);
K ym = f(-x);
I0 += (yp + ym)*m_weights[0][i];
L1_I0 += (abs(yp)+abs(ym))*m_weights[0][i];
}
K I1 = I0;
Real L1_I1 = L1_I0;
for (size_t i = 0; i < m_size[1]; ++i)
{
Real x= m_abscissas[1][i];
K yp = f(x);
K ym = f(-x);
I1 += (yp + ym)*m_weights[1][i];
L1_I1 += (abs(yp) + abs(ym))*m_weights[1][i];
}
I1 *= half<Real>();
L1_I1 *= half<Real>();
Real err = abs(I0 - I1);
size_t i = 2;
for(; i <= 8U; ++i)
{
I0 = I1;
L1_I0 = L1_I1;
I1 = half<Real>()*I0;
L1_I1 = half<Real>()*L1_I0;
Real h = static_cast<Real>(1) / static_cast<Real>(1 << i);
K sum = 0;
Real absum = 0;
Real abterm1 = 1;
Real eps = boost::math::tools::epsilon<Real>()*L1_I1;
auto abscissa_row = m_abscissas[i];
auto weight_row = m_weights[i];
for(size_t j = 0; j < m_size[i]; ++j)
{
Real x = abscissa_row[j];
K yp = f(x);
K ym = f(-x);
sum += (yp + ym)*weight_row[j];
Real abterm0 = (abs(yp) + abs(ym))*weight_row[j];
absum += abterm0;
// We require two consecutive terms to be < eps in case we hit a zero of f.
if (x > static_cast<Real>(100) && abterm0 < eps && abterm1 < eps)
{
break;
}
abterm1 = abterm0;
}
I1 += sum*h;
L1_I1 += absum*h;
err = abs(I0 - I1);
if (!(boost::math::isfinite)(L1_I1))
{
constexpr auto err_msg = "The sinh_sinh quadrature evaluated your function at a singular point, leading to the value %1%.\n"
"sinh_sinh quadrature cannot handle singularities in the domain.\n"
"If you are sure your function has no singularities, please submit a bug against boost.math\n";
return static_cast<K>(policies::raise_evaluation_error(function, err_msg, I1, Policy()));
}
if (err <= tolerance*L1_I1)
{
break;
}
}
if (error)
{
*error = err;
}
if (L1)
{
*L1 = L1_I1;
}
if (levels)
{
*levels = i;
}
return I1;
}
} // Namespace detail
} // Namespace quadrature
} // Namespace math
} // Namespace boost
#endif // BOOST_MATH_ENABLE_CUDA
#endif // BOOST_MATH_QUADRATURE_DETAIL_SINH_SINH_DETAIL_HPP

View File

@@ -15,11 +15,15 @@
#ifndef BOOST_MATH_QUADRATURE_EXP_SINH_HPP
#define BOOST_MATH_QUADRATURE_EXP_SINH_HPP
#include <boost/math/tools/config.hpp>
#include <boost/math/quadrature/detail/exp_sinh_detail.hpp>
#ifndef BOOST_MATH_HAS_NVRTC
#include <cmath>
#include <limits>
#include <memory>
#include <string>
#include <boost/math/quadrature/detail/exp_sinh_detail.hpp>
namespace boost{ namespace math{ namespace quadrature {
@@ -98,4 +102,79 @@ auto exp_sinh<Real, Policy>::integrate(const F& f, Real tolerance, Real* error,
}}}
#endif
#endif // BOOST_MATH_HAS_NVRTC
#ifdef BOOST_MATH_ENABLE_CUDA
#include <boost/math/tools/type_traits.hpp>
#include <boost/math/tools/cstdint.hpp>
#include <boost/math/tools/precision.hpp>
#include <boost/math/policies/error_handling.hpp>
#include <boost/math/constants/constants.hpp>
namespace boost {
namespace math {
namespace quadrature {
template <class F, class Real, class Policy = policies::policy<> >
__device__ auto exp_sinh_integrate(const F& f, Real a, Real b, Real tolerance, Real* error, Real* L1, boost::math::size_t* levels)
{
BOOST_MATH_STD_USING
using K = decltype(f(a));
static_assert(!boost::math::is_integral<K>::value,
"The return type cannot be integral, it must be either a real or complex floating point type.");
constexpr auto function = "boost::math::quadrature::exp_sinh<%1%>::integrate";
// Neither limit may be a NaN:
if((boost::math::isnan)(a) || (boost::math::isnan)(b))
{
return static_cast<K>(policies::raise_domain_error(function, "NaN supplied as one limit of integration - sorry I don't know what to do", a, Policy()));
}
// Right limit is infinite:
if ((boost::math::isfinite)(a) && (b >= boost::math::tools::max_value<Real>()))
{
// If a = 0, don't use an additional level of indirection:
if (a == static_cast<Real>(0))
{
return detail::exp_sinh_integrate_impl(f, tolerance, error, L1, levels);
}
const auto u = [&](Real t)->K { return f(t + a); };
return detail::exp_sinh_integrate_impl(u, tolerance, error, L1, levels);
}
if ((boost::math::isfinite)(b) && a <= -boost::math::tools::max_value<Real>())
{
const auto u = [&](Real t)->K { return f(b-t);};
return detail::exp_sinh_integrate_impl(u, tolerance, error, L1, levels);
}
// Infinite limits:
if ((a <= -boost::math::tools::max_value<Real>()) && (b >= boost::math::tools::max_value<Real>()))
{
return static_cast<K>(policies::raise_domain_error(function, "Use sinh_sinh quadrature for integration over the whole real line; exp_sinh is for half infinite integrals.", a, Policy()));
}
// If we get to here then both ends must necessarily be finite:
return static_cast<K>(policies::raise_domain_error(function, "Use tanh_sinh quadrature for integration over finite domains; exp_sinh is for half infinite integrals.", a, Policy()));
}
template <class F, class Real, class Policy = policies::policy<> >
__device__ auto exp_sinh_integrate(const F& f, Real tolerance, Real* error, Real* L1, boost::math::size_t* levels)
{
BOOST_MATH_STD_USING
constexpr auto function = "boost::math::quadrature::exp_sinh<%1%>::integrate";
if (abs(tolerance) > 1) {
return policies::raise_domain_error(function, "The tolerance provided (%1%) is unusually large; did you confuse it with a domain bound?", tolerance, Policy());
}
return detail::exp_sinh_integrate_impl(f, tolerance, error, L1, levels);
}
} // namespace quadrature
} // namespace math
} // namespace boost
#endif // BOOST_MATH_ENABLE_CUDA
#endif // BOOST_MATH_QUADRATURE_EXP_SINH_HPP

View File

@@ -1,4 +1,5 @@
// Copyright Nick Thompson, 2017
// Copyright Matt Borland, 2024
// Use, modification and distribution are subject to the
// Boost Software License, Version 1.0.
// (See accompanying file LICENSE_1_0.txt
@@ -15,10 +16,17 @@
#ifndef BOOST_MATH_QUADRATURE_SINH_SINH_HPP
#define BOOST_MATH_QUADRATURE_SINH_SINH_HPP
#include <boost/math/tools/config.hpp>
#include <boost/math/tools/precision.hpp>
#include <boost/math/tools/cstdint.hpp>
#include <boost/math/quadrature/detail/sinh_sinh_detail.hpp>
#include <boost/math/policies/error_handling.hpp>
#ifndef BOOST_MATH_HAS_NVRTC
#include <cmath>
#include <limits>
#include <memory>
#include <boost/math/quadrature/detail/sinh_sinh_detail.hpp>
namespace boost{ namespace math{ namespace quadrature {
@@ -40,4 +48,25 @@ private:
};
}}}
#endif
#endif // BOOST_MATH_HAS_NVRTC
#ifdef BOOST_MATH_ENABLE_CUDA
namespace boost {
namespace math {
namespace quadrature {
template <class F, class Real, class Policy = boost::math::policies::policy<> >
__device__ auto sinh_sinh_integrate(const F& f, Real tol = boost::math::tools::root_epsilon<Real>(), Real* error = nullptr, Real* L1 = nullptr, boost::math::size_t* levels = nullptr)
{
return detail::sinh_sinh_integrate_impl(f, tol, error, L1, levels);
}
} // namespace quadrature
} // namespace math
} // namespace boost
#endif // BOOST_MATH_ENABLE_CUDA
#endif // BOOST_MATH_QUADRATURE_SINH_SINH_HPP

View File

@@ -9,6 +9,12 @@ project : requirements
[ requires cxx14_decltype_auto cxx14_generic_lambdas cxx14_return_type_deduction cxx14_variable_templates cxx14_constexpr ]
;
# Quad
run test_exp_sinh_quad_float.cu ;
run test_exp_sinh_quad_double.cu ;
run test_sinh_sinh_quad_float.cu ;
run test_sinh_sinh_quad_double.cu ;
# Distributions
run test_arcsine_cdf_double.cu ;
run test_arcsine_cdf_float.cu ;

View File

@@ -9,6 +9,12 @@ project : requirements
[ requires cxx14_decltype_auto cxx14_generic_lambdas cxx14_return_type_deduction cxx14_variable_templates cxx14_constexpr ]
;
# Quad
run test_exp_sinh_quad_nvrtc_float.cpp ;
run test_exp_sinh_quad_nvrtc_double.cpp ;
run test_sinh_sinh_quad_nvrtc_float.cpp ;
run test_sinh_sinh_quad_nvrtc_double.cpp ;
# Distributions
run test_arcsine_cdf_nvrtc_double.cpp ;
run test_arcsine_cdf_nvrtc_float.cpp ;

View File

@@ -0,0 +1,133 @@
// Copyright John Maddock 2016.
// Copyright Matt Borland 2024.
// Use, modification and distribution are subject to the
// Boost Software License, Version 1.0. (See accompanying file
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
#include <iostream>
#include <iomanip>
#include <vector>
#include <boost/math/quadrature/exp_sinh.hpp>
#include <boost/math/special_functions.hpp>
#include <boost/math/tools/precision.hpp>
#include "cuda_managed_ptr.hpp"
#include "stopwatch.hpp"
// For the CUDA runtime routines (prefixed with "cuda_")
#include <cuda_runtime.h>
typedef double float_type;
__host__ __device__ float_type func(float_type x)
{
BOOST_MATH_STD_USING
return 1/(1+x*x);
}
/**
* CUDA Kernel Device code
*
*/
__global__ void cuda_test(float_type *out, int numElements)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
float_type tol = boost::math::tools::root_epsilon<float_type>();
float_type error;
float_type L1;
boost::math::size_t levels;
if (i < numElements)
{
out[i] = boost::math::quadrature::exp_sinh_integrate(func, tol, &error, &L1, &levels);
}
}
/**
* Host main routine
*/
int main(void)
{
// Error code to check return values for CUDA calls
cudaError_t err = cudaSuccess;
// Print the vector length to be used, and compute its size
int numElements = 50000;
std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
// Allocate the managed input vector A
cuda_managed_ptr<float_type> input_vector(numElements);
// Allocate the managed output vector C
cuda_managed_ptr<float_type> output_vector(numElements);
// Initialize the input vectors
for (int i = 0; i < numElements; ++i)
{
input_vector[i] = M_PI * (static_cast<float_type>(i) / numElements);
}
// Launch the Vector Add CUDA Kernel
int threadsPerBlock = 512;
int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock;
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
watch w;
cuda_test<<<blocksPerGrid, threadsPerBlock>>>(output_vector.get(), numElements);
cudaDeviceSynchronize();
std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
err = cudaGetLastError();
if (err != cudaSuccess)
{
std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
return EXIT_FAILURE;
}
// Verify that the result vector is correct
std::vector<float_type> results;
results.reserve(numElements);
w.reset();
float_type tol = boost::math::tools::root_epsilon<float_type>();
float_type error;
float_type L1;
boost::math::quadrature::exp_sinh<float_type> integrator;
for(int i = 0; i < numElements; ++i)
{
results.push_back(integrator.integrate(func, tol, &error, &L1));
}
double t = w.elapsed();
// check the results
int failed_count = 0;
for(int i = 0; i < numElements; ++i)
{
const auto eps = boost::math::epsilon_difference(output_vector[i], results[i]);
if (eps > 10)
{
std::cerr << std::setprecision(std::numeric_limits<float_type>::digits10)
<< "Result verification failed at element " << i << "!\n"
<< "Device: " << output_vector[i]
<< "\n Host: " << results[i]
<< "\n Eps: " << eps << "\n";
failed_count++;
}
if (failed_count > 100)
{
break;
}
}
if (failed_count != 0)
{
std::cout << "Test FAILED" << std::endl;
return EXIT_FAILURE;
}
std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
std::cout << "Done\n";
return 0;
}

View File

@@ -0,0 +1,133 @@
// Copyright John Maddock 2016.
// Copyright Matt Borland 2024.
// Use, modification and distribution are subject to the
// Boost Software License, Version 1.0. (See accompanying file
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
#include <iostream>
#include <iomanip>
#include <vector>
#include <boost/math/quadrature/exp_sinh.hpp>
#include <boost/math/special_functions.hpp>
#include <boost/math/tools/precision.hpp>
#include "cuda_managed_ptr.hpp"
#include "stopwatch.hpp"
// For the CUDA runtime routines (prefixed with "cuda_")
#include <cuda_runtime.h>
typedef float float_type;
__host__ __device__ float_type func(float_type x)
{
BOOST_MATH_STD_USING
return 1/(1+x*x);
}
/**
* CUDA Kernel Device code
*
*/
__global__ void cuda_test(float_type *out, int numElements)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
float_type tol = boost::math::tools::root_epsilon<float_type>();
float_type error;
float_type L1;
boost::math::size_t levels;
if (i < numElements)
{
out[i] = boost::math::quadrature::exp_sinh_integrate(func, tol, &error, &L1, &levels);
}
}
/**
* Host main routine
*/
int main(void)
{
// Error code to check return values for CUDA calls
cudaError_t err = cudaSuccess;
// Print the vector length to be used, and compute its size
int numElements = 50000;
std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
// Allocate the managed input vector A
cuda_managed_ptr<float_type> input_vector(numElements);
// Allocate the managed output vector C
cuda_managed_ptr<float_type> output_vector(numElements);
// Initialize the input vectors
for (int i = 0; i < numElements; ++i)
{
input_vector[i] = M_PI * (static_cast<float_type>(i) / numElements);
}
// Launch the Vector Add CUDA Kernel
int threadsPerBlock = 512;
int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock;
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
watch w;
cuda_test<<<blocksPerGrid, threadsPerBlock>>>(output_vector.get(), numElements);
cudaDeviceSynchronize();
std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
err = cudaGetLastError();
if (err != cudaSuccess)
{
std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
return EXIT_FAILURE;
}
// Verify that the result vector is correct
std::vector<float_type> results;
results.reserve(numElements);
w.reset();
float_type tol = boost::math::tools::root_epsilon<float_type>();
float_type error;
float_type L1;
boost::math::quadrature::exp_sinh<float_type> integrator;
for(int i = 0; i < numElements; ++i)
{
results.push_back(integrator.integrate(func, tol, &error, &L1));
}
double t = w.elapsed();
// check the results
int failed_count = 0;
for(int i = 0; i < numElements; ++i)
{
const auto eps = boost::math::epsilon_difference(output_vector[i], results[i]);
if (eps > 10)
{
std::cerr << std::setprecision(std::numeric_limits<float_type>::digits10)
<< "Result verification failed at element " << i << "!\n"
<< "Device: " << output_vector[i]
<< "\n Host: " << results[i]
<< "\n Eps: " << eps << "\n";
failed_count++;
}
if (failed_count > 100)
{
break;
}
}
if (failed_count != 0)
{
std::cout << "Test FAILED" << std::endl;
return EXIT_FAILURE;
}
std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
std::cout << "Done\n";
return 0;
}

View File

@@ -0,0 +1,206 @@
// Copyright John Maddock 2016.
// Copyright Matt Borland 2024.
// Use, modification and distribution are subject to the
// Boost Software License, Version 1.0. (See accompanying file
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
#include <iostream>
#include <iomanip>
#include <vector>
#include <random>
#include <exception>
#include <boost/math/quadrature/exp_sinh.hpp>
#include <boost/math/special_functions/relative_difference.hpp>
#include <cuda.h>
#include <cuda_runtime.h>
#include <nvrtc.h>
typedef double float_type;
const char* cuda_kernel = R"(
typedef double float_type;
#include <boost/math/quadrature/exp_sinh.hpp>
__host__ __device__ float_type func(float_type x)
{
return 1/(1+x*x);
}
extern "C" __global__
void test_expm1_kernel(const float_type*, const float_type*, float_type *out, int numElements)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
float_type tol = boost::math::tools::root_epsilon<float_type>();
float_type error;
float_type L1;
boost::math::size_t levels;
if (i < numElements)
{
out[i] = boost::math::quadrature::exp_sinh_integrate(func, tol, &error, &L1, &levels);
}
}
)";
__host__ __device__ float_type func(float_type x)
{
return 1/(1+x*x);
}
void checkCUDAError(cudaError_t result, const char* msg)
{
if (result != cudaSuccess)
{
std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
exit(EXIT_FAILURE);
}
}
void checkCUError(CUresult result, const char* msg)
{
if (result != CUDA_SUCCESS)
{
const char* errorStr;
cuGetErrorString(result, &errorStr);
std::cerr << msg << ": " << errorStr << std::endl;
exit(EXIT_FAILURE);
}
}
void checkNVRTCError(nvrtcResult result, const char* msg)
{
if (result != NVRTC_SUCCESS)
{
std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
exit(EXIT_FAILURE);
}
}
int main()
{
try
{
// Initialize CUDA driver API
checkCUError(cuInit(0), "Failed to initialize CUDA");
// Create CUDA context
CUcontext context;
CUdevice device;
checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
nvrtcProgram prog;
nvrtcResult res;
res = nvrtcCreateProgram(&prog, cuda_kernel, "test_expm1_kernel.cu", 0, nullptr, nullptr);
checkNVRTCError(res, "Failed to create NVRTC program");
nvrtcAddNameExpression(prog, "test_expm1_kernel");
#ifdef BOOST_MATH_NVRTC_CI_RUN
const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
#else
const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
#endif
// Compile the program
res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
if (res != NVRTC_SUCCESS)
{
size_t log_size;
nvrtcGetProgramLogSize(prog, &log_size);
char* log = new char[log_size];
nvrtcGetProgramLog(prog, log);
std::cerr << "Compilation failed:\n" << log << std::endl;
delete[] log;
exit(EXIT_FAILURE);
}
// Get PTX from the program
size_t ptx_size;
nvrtcGetPTXSize(prog, &ptx_size);
char* ptx = new char[ptx_size];
nvrtcGetPTX(prog, ptx);
// Load PTX into CUDA module
CUmodule module;
CUfunction kernel;
checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
checkCUError(cuModuleGetFunction(&kernel, module, "test_expm1_kernel"), "Failed to get kernel function");
int numElements = 50000;
float_type *h_in1, *h_in2, *h_out;
float_type *d_in1, *d_in2, *d_out;
// Allocate memory on the host
h_in1 = new float_type[numElements];
h_in2 = new float_type[numElements];
h_out = new float_type[numElements];
// Initialize input arrays
std::mt19937_64 rng(42);
std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
for (int i = 0; i < numElements; ++i)
{
h_in1[i] = static_cast<float_type>(dist(rng));
h_in2[i] = static_cast<float_type>(dist(rng));
}
checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
int blockSize = 256;
int numBlocks = (numElements + blockSize - 1) / blockSize;
void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
// Verify Result
float_type tol = boost::math::tools::root_epsilon<float_type>();
float_type error;
float_type L1;
boost::math::quadrature::exp_sinh<float_type> integrator;
for (int i = 0; i < numElements; ++i)
{
auto res = integrator.integrate(func, tol, &error, &L1);
if (std::isfinite(res))
{
if (boost::math::epsilon_difference(res, h_out[i]) > 300)
{
std::cout << "error at line: " << i
<< "\nParallel: " << h_out[i]
<< "\n Serial: " << res
<< "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
}
}
}
cudaFree(d_in1);
cudaFree(d_in2);
cudaFree(d_out);
delete[] h_in1;
delete[] h_in2;
delete[] h_out;
nvrtcDestroyProgram(&prog);
delete[] ptx;
cuCtxDestroy(context);
std::cout << "Kernel executed successfully." << std::endl;
return 0;
}
catch(const std::exception& e)
{
std::cerr << "Stopped with exception: " << e.what() << std::endl;
return EXIT_FAILURE;
}
}

View File

@@ -0,0 +1,206 @@
// Copyright John Maddock 2016.
// Copyright Matt Borland 2024.
// Use, modification and distribution are subject to the
// Boost Software License, Version 1.0. (See accompanying file
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
#include <iostream>
#include <iomanip>
#include <vector>
#include <random>
#include <exception>
#include <boost/math/quadrature/exp_sinh.hpp>
#include <boost/math/special_functions/relative_difference.hpp>
#include <cuda.h>
#include <cuda_runtime.h>
#include <nvrtc.h>
typedef float float_type;
const char* cuda_kernel = R"(
typedef float float_type;
#include <boost/math/quadrature/exp_sinh.hpp>
__host__ __device__ float_type func(float_type x)
{
return 1/(1+x*x);
}
extern "C" __global__
void test_expm1_kernel(const float_type*, const float_type*, float_type *out, int numElements)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
float_type tol = boost::math::tools::root_epsilon<float_type>();
float_type error;
float_type L1;
boost::math::size_t levels;
if (i < numElements)
{
out[i] = boost::math::quadrature::exp_sinh_integrate(func, tol, &error, &L1, &levels);
}
}
)";
__host__ __device__ float_type func(float_type x)
{
return 1/(1+x*x);
}
void checkCUDAError(cudaError_t result, const char* msg)
{
if (result != cudaSuccess)
{
std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
exit(EXIT_FAILURE);
}
}
void checkCUError(CUresult result, const char* msg)
{
if (result != CUDA_SUCCESS)
{
const char* errorStr;
cuGetErrorString(result, &errorStr);
std::cerr << msg << ": " << errorStr << std::endl;
exit(EXIT_FAILURE);
}
}
void checkNVRTCError(nvrtcResult result, const char* msg)
{
if (result != NVRTC_SUCCESS)
{
std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
exit(EXIT_FAILURE);
}
}
int main()
{
try
{
// Initialize CUDA driver API
checkCUError(cuInit(0), "Failed to initialize CUDA");
// Create CUDA context
CUcontext context;
CUdevice device;
checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
nvrtcProgram prog;
nvrtcResult res;
res = nvrtcCreateProgram(&prog, cuda_kernel, "test_expm1_kernel.cu", 0, nullptr, nullptr);
checkNVRTCError(res, "Failed to create NVRTC program");
nvrtcAddNameExpression(prog, "test_expm1_kernel");
#ifdef BOOST_MATH_NVRTC_CI_RUN
const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
#else
const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
#endif
// Compile the program
res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
if (res != NVRTC_SUCCESS)
{
size_t log_size;
nvrtcGetProgramLogSize(prog, &log_size);
char* log = new char[log_size];
nvrtcGetProgramLog(prog, log);
std::cerr << "Compilation failed:\n" << log << std::endl;
delete[] log;
exit(EXIT_FAILURE);
}
// Get PTX from the program
size_t ptx_size;
nvrtcGetPTXSize(prog, &ptx_size);
char* ptx = new char[ptx_size];
nvrtcGetPTX(prog, ptx);
// Load PTX into CUDA module
CUmodule module;
CUfunction kernel;
checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
checkCUError(cuModuleGetFunction(&kernel, module, "test_expm1_kernel"), "Failed to get kernel function");
int numElements = 50000;
float_type *h_in1, *h_in2, *h_out;
float_type *d_in1, *d_in2, *d_out;
// Allocate memory on the host
h_in1 = new float_type[numElements];
h_in2 = new float_type[numElements];
h_out = new float_type[numElements];
// Initialize input arrays
std::mt19937_64 rng(42);
std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
for (int i = 0; i < numElements; ++i)
{
h_in1[i] = static_cast<float_type>(dist(rng));
h_in2[i] = static_cast<float_type>(dist(rng));
}
checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
int blockSize = 256;
int numBlocks = (numElements + blockSize - 1) / blockSize;
void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
// Verify Result
float_type tol = boost::math::tools::root_epsilon<float_type>();
float_type error;
float_type L1;
boost::math::quadrature::exp_sinh<float_type> integrator;
for (int i = 0; i < numElements; ++i)
{
auto res = integrator.integrate(func, tol, &error, &L1);
if (std::isfinite(res))
{
if (boost::math::epsilon_difference(res, h_out[i]) > 300)
{
std::cout << "error at line: " << i
<< "\nParallel: " << h_out[i]
<< "\n Serial: " << res
<< "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
}
}
}
cudaFree(d_in1);
cudaFree(d_in2);
cudaFree(d_out);
delete[] h_in1;
delete[] h_in2;
delete[] h_out;
nvrtcDestroyProgram(&prog);
delete[] ptx;
cuCtxDestroy(context);
std::cout << "Kernel executed successfully." << std::endl;
return 0;
}
catch(const std::exception& e)
{
std::cerr << "Stopped with exception: " << e.what() << std::endl;
return EXIT_FAILURE;
}
}

View File

@@ -0,0 +1,133 @@
// Copyright John Maddock 2016.
// Copyright Matt Borland 2024.
// Use, modification and distribution are subject to the
// Boost Software License, Version 1.0. (See accompanying file
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
#include <iostream>
#include <iomanip>
#include <vector>
#include <boost/math/quadrature/sinh_sinh.hpp>
#include <boost/math/special_functions.hpp>
#include <boost/math/tools/precision.hpp>
#include "cuda_managed_ptr.hpp"
#include "stopwatch.hpp"
// For the CUDA runtime routines (prefixed with "cuda_")
#include <cuda_runtime.h>
typedef double float_type;
__host__ __device__ float_type func(float_type x)
{
BOOST_MATH_STD_USING
return 1/(1+x*x);
}
/**
* CUDA Kernel Device code
*
*/
__global__ void cuda_test(float_type *out, int numElements)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
float_type tol = boost::math::tools::root_epsilon<float_type>();
float_type error;
float_type L1;
boost::math::size_t levels;
if (i < numElements)
{
out[i] = boost::math::quadrature::sinh_sinh_integrate(func, tol, &error, &L1, &levels);
}
}
/**
* Host main routine
*/
int main(void)
{
// Error code to check return values for CUDA calls
cudaError_t err = cudaSuccess;
// Print the vector length to be used, and compute its size
int numElements = 50000;
std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
// Allocate the managed input vector A
cuda_managed_ptr<float_type> input_vector(numElements);
// Allocate the managed output vector C
cuda_managed_ptr<float_type> output_vector(numElements);
// Initialize the input vectors
for (int i = 0; i < numElements; ++i)
{
input_vector[i] = M_PI * (static_cast<float_type>(i) / numElements);
}
// Launch the Vector Add CUDA Kernel
int threadsPerBlock = 512;
int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock;
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
watch w;
cuda_test<<<blocksPerGrid, threadsPerBlock>>>(output_vector.get(), numElements);
cudaDeviceSynchronize();
std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
err = cudaGetLastError();
if (err != cudaSuccess)
{
std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
return EXIT_FAILURE;
}
// Verify that the result vector is correct
std::vector<float_type> results;
results.reserve(numElements);
w.reset();
float_type tol = boost::math::tools::root_epsilon<float_type>();
float_type error;
float_type L1;
boost::math::quadrature::sinh_sinh<float_type> integrator;
for(int i = 0; i < numElements; ++i)
{
results.push_back(integrator.integrate(func, tol, &error, &L1));
}
double t = w.elapsed();
// check the results
int failed_count = 0;
for(int i = 0; i < numElements; ++i)
{
const auto eps = boost::math::epsilon_difference(output_vector[i], results[i]);
if (eps > 10)
{
std::cerr << std::setprecision(std::numeric_limits<float_type>::digits10)
<< "Result verification failed at element " << i << "!\n"
<< "Device: " << output_vector[i]
<< "\n Host: " << results[i]
<< "\n Eps: " << eps << "\n";
failed_count++;
}
if (failed_count > 100)
{
break;
}
}
if (failed_count != 0)
{
std::cout << "Test FAILED" << std::endl;
return EXIT_FAILURE;
}
std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
std::cout << "Done\n";
return 0;
}

View File

@@ -0,0 +1,133 @@
// Copyright John Maddock 2016.
// Copyright Matt Borland 2024.
// Use, modification and distribution are subject to the
// Boost Software License, Version 1.0. (See accompanying file
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
#include <iostream>
#include <iomanip>
#include <vector>
#include <boost/math/quadrature/sinh_sinh.hpp>
#include <boost/math/special_functions.hpp>
#include <boost/math/tools/precision.hpp>
#include "cuda_managed_ptr.hpp"
#include "stopwatch.hpp"
// For the CUDA runtime routines (prefixed with "cuda_")
#include <cuda_runtime.h>
typedef float float_type;
__host__ __device__ float_type func(float_type x)
{
BOOST_MATH_STD_USING
return 1/(1+x*x);
}
/**
* CUDA Kernel Device code
*
*/
__global__ void cuda_test(float_type *out, int numElements)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
float_type tol = boost::math::tools::root_epsilon<float_type>();
float_type error;
float_type L1;
boost::math::size_t levels;
if (i < numElements)
{
out[i] = boost::math::quadrature::sinh_sinh_integrate(func, tol, &error, &L1, &levels);
}
}
/**
* Host main routine
*/
int main(void)
{
// Error code to check return values for CUDA calls
cudaError_t err = cudaSuccess;
// Print the vector length to be used, and compute its size
int numElements = 50000;
std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
// Allocate the managed input vector A
cuda_managed_ptr<float_type> input_vector(numElements);
// Allocate the managed output vector C
cuda_managed_ptr<float_type> output_vector(numElements);
// Initialize the input vectors
for (int i = 0; i < numElements; ++i)
{
input_vector[i] = M_PI * (static_cast<float_type>(i) / numElements);
}
// Launch the Vector Add CUDA Kernel
int threadsPerBlock = 512;
int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock;
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
watch w;
cuda_test<<<blocksPerGrid, threadsPerBlock>>>(output_vector.get(), numElements);
cudaDeviceSynchronize();
std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
err = cudaGetLastError();
if (err != cudaSuccess)
{
std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
return EXIT_FAILURE;
}
// Verify that the result vector is correct
std::vector<float_type> results;
results.reserve(numElements);
w.reset();
float_type tol = boost::math::tools::root_epsilon<float_type>();
float_type error;
float_type L1;
boost::math::quadrature::sinh_sinh<float_type> integrator;
for(int i = 0; i < numElements; ++i)
{
results.push_back(integrator.integrate(func, tol, &error, &L1));
}
double t = w.elapsed();
// check the results
int failed_count = 0;
for(int i = 0; i < numElements; ++i)
{
const auto eps = boost::math::epsilon_difference(output_vector[i], results[i]);
if (eps > 10)
{
std::cerr << std::setprecision(std::numeric_limits<float_type>::digits10)
<< "Result verification failed at element " << i << "!\n"
<< "Device: " << output_vector[i]
<< "\n Host: " << results[i]
<< "\n Eps: " << eps << "\n";
failed_count++;
}
if (failed_count > 100)
{
break;
}
}
if (failed_count != 0)
{
std::cout << "Test FAILED" << std::endl;
return EXIT_FAILURE;
}
std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
std::cout << "Done\n";
return 0;
}

View File

@@ -0,0 +1,206 @@
// Copyright John Maddock 2016.
// Copyright Matt Borland 2024.
// Use, modification and distribution are subject to the
// Boost Software License, Version 1.0. (See accompanying file
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
#include <iostream>
#include <iomanip>
#include <vector>
#include <random>
#include <exception>
#include <boost/math/quadrature/sinh_sinh.hpp>
#include <boost/math/special_functions/relative_difference.hpp>
#include <cuda.h>
#include <cuda_runtime.h>
#include <nvrtc.h>
typedef double float_type;
const char* cuda_kernel = R"(
typedef double float_type;
#include <boost/math/quadrature/sinh_sinh.hpp>
__host__ __device__ float_type func(float_type x)
{
return 1/(1+x*x);
}
extern "C" __global__
void test_sinh_sinh_kernel(const float_type*, const float_type*, float_type *out, int numElements)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
float_type tol = boost::math::tools::root_epsilon<float_type>();
float_type error;
float_type L1;
boost::math::size_t levels;
if (i < numElements)
{
out[i] = boost::math::quadrature::sinh_sinh_integrate(func, tol, &error, &L1, &levels);
}
}
)";
__host__ __device__ float_type func(float_type x)
{
return 1/(1+x*x);
}
void checkCUDAError(cudaError_t result, const char* msg)
{
if (result != cudaSuccess)
{
std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
exit(EXIT_FAILURE);
}
}
void checkCUError(CUresult result, const char* msg)
{
if (result != CUDA_SUCCESS)
{
const char* errorStr;
cuGetErrorString(result, &errorStr);
std::cerr << msg << ": " << errorStr << std::endl;
exit(EXIT_FAILURE);
}
}
void checkNVRTCError(nvrtcResult result, const char* msg)
{
if (result != NVRTC_SUCCESS)
{
std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
exit(EXIT_FAILURE);
}
}
int main()
{
try
{
// Initialize CUDA driver API
checkCUError(cuInit(0), "Failed to initialize CUDA");
// Create CUDA context
CUcontext context;
CUdevice device;
checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
nvrtcProgram prog;
nvrtcResult res;
res = nvrtcCreateProgram(&prog, cuda_kernel, "test_sinh_sinh_kernel.cu", 0, nullptr, nullptr);
checkNVRTCError(res, "Failed to create NVRTC program");
nvrtcAddNameExpression(prog, "test_sinh_sinh_kernel");
#ifdef BOOST_MATH_NVRTC_CI_RUN
const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
#else
const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
#endif
// Compile the program
res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
if (res != NVRTC_SUCCESS)
{
size_t log_size;
nvrtcGetProgramLogSize(prog, &log_size);
char* log = new char[log_size];
nvrtcGetProgramLog(prog, log);
std::cerr << "Compilation failed:\n" << log << std::endl;
delete[] log;
exit(EXIT_FAILURE);
}
// Get PTX from the program
size_t ptx_size;
nvrtcGetPTXSize(prog, &ptx_size);
char* ptx = new char[ptx_size];
nvrtcGetPTX(prog, ptx);
// Load PTX into CUDA module
CUmodule module;
CUfunction kernel;
checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
checkCUError(cuModuleGetFunction(&kernel, module, "test_sinh_sinh_kernel"), "Failed to get kernel function");
int numElements = 50000;
float_type *h_in1, *h_in2, *h_out;
float_type *d_in1, *d_in2, *d_out;
// Allocate memory on the host
h_in1 = new float_type[numElements];
h_in2 = new float_type[numElements];
h_out = new float_type[numElements];
// Initialize input arrays
std::mt19937_64 rng(42);
std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
for (int i = 0; i < numElements; ++i)
{
h_in1[i] = static_cast<float_type>(dist(rng));
h_in2[i] = static_cast<float_type>(dist(rng));
}
checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
int blockSize = 256;
int numBlocks = (numElements + blockSize - 1) / blockSize;
void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
// Verify Result
float_type tol = boost::math::tools::root_epsilon<float_type>();
float_type error;
float_type L1;
boost::math::quadrature::sinh_sinh<float_type> integrator;
for (int i = 0; i < numElements; ++i)
{
auto res = integrator.integrate(func, tol, &error, &L1);
if (std::isfinite(res))
{
if (boost::math::epsilon_difference(res, h_out[i]) > 300)
{
std::cout << "error at line: " << i
<< "\nParallel: " << h_out[i]
<< "\n Serial: " << res
<< "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
}
}
}
cudaFree(d_in1);
cudaFree(d_in2);
cudaFree(d_out);
delete[] h_in1;
delete[] h_in2;
delete[] h_out;
nvrtcDestroyProgram(&prog);
delete[] ptx;
cuCtxDestroy(context);
std::cout << "Kernel executed successfully." << std::endl;
return 0;
}
catch(const std::exception& e)
{
std::cerr << "Stopped with exception: " << e.what() << std::endl;
return EXIT_FAILURE;
}
}

View File

@@ -0,0 +1,206 @@
// Copyright John Maddock 2016.
// Copyright Matt Borland 2024.
// Use, modification and distribution are subject to the
// Boost Software License, Version 1.0. (See accompanying file
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
#include <iostream>
#include <iomanip>
#include <vector>
#include <random>
#include <exception>
#include <boost/math/quadrature/sinh_sinh.hpp>
#include <boost/math/special_functions/relative_difference.hpp>
#include <cuda.h>
#include <cuda_runtime.h>
#include <nvrtc.h>
typedef float float_type;
const char* cuda_kernel = R"(
typedef float float_type;
#include <boost/math/quadrature/sinh_sinh.hpp>
__host__ __device__ float_type func(float_type x)
{
return 1/(1+x*x);
}
extern "C" __global__
void test_sinh_sinh_kernel(const float_type*, const float_type*, float_type *out, int numElements)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
float_type tol = boost::math::tools::root_epsilon<float_type>();
float_type error;
float_type L1;
boost::math::size_t levels;
if (i < numElements)
{
out[i] = boost::math::quadrature::sinh_sinh_integrate(func, tol, &error, &L1, &levels);
}
}
)";
__host__ __device__ float_type func(float_type x)
{
return 1/(1+x*x);
}
void checkCUDAError(cudaError_t result, const char* msg)
{
if (result != cudaSuccess)
{
std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
exit(EXIT_FAILURE);
}
}
void checkCUError(CUresult result, const char* msg)
{
if (result != CUDA_SUCCESS)
{
const char* errorStr;
cuGetErrorString(result, &errorStr);
std::cerr << msg << ": " << errorStr << std::endl;
exit(EXIT_FAILURE);
}
}
void checkNVRTCError(nvrtcResult result, const char* msg)
{
if (result != NVRTC_SUCCESS)
{
std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
exit(EXIT_FAILURE);
}
}
int main()
{
try
{
// Initialize CUDA driver API
checkCUError(cuInit(0), "Failed to initialize CUDA");
// Create CUDA context
CUcontext context;
CUdevice device;
checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
nvrtcProgram prog;
nvrtcResult res;
res = nvrtcCreateProgram(&prog, cuda_kernel, "test_sinh_sinh_kernel.cu", 0, nullptr, nullptr);
checkNVRTCError(res, "Failed to create NVRTC program");
nvrtcAddNameExpression(prog, "test_sinh_sinh_kernel");
#ifdef BOOST_MATH_NVRTC_CI_RUN
const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
#else
const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
#endif
// Compile the program
res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
if (res != NVRTC_SUCCESS)
{
size_t log_size;
nvrtcGetProgramLogSize(prog, &log_size);
char* log = new char[log_size];
nvrtcGetProgramLog(prog, log);
std::cerr << "Compilation failed:\n" << log << std::endl;
delete[] log;
exit(EXIT_FAILURE);
}
// Get PTX from the program
size_t ptx_size;
nvrtcGetPTXSize(prog, &ptx_size);
char* ptx = new char[ptx_size];
nvrtcGetPTX(prog, ptx);
// Load PTX into CUDA module
CUmodule module;
CUfunction kernel;
checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
checkCUError(cuModuleGetFunction(&kernel, module, "test_sinh_sinh_kernel"), "Failed to get kernel function");
int numElements = 50000;
float_type *h_in1, *h_in2, *h_out;
float_type *d_in1, *d_in2, *d_out;
// Allocate memory on the host
h_in1 = new float_type[numElements];
h_in2 = new float_type[numElements];
h_out = new float_type[numElements];
// Initialize input arrays
std::mt19937_64 rng(42);
std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
for (int i = 0; i < numElements; ++i)
{
h_in1[i] = static_cast<float_type>(dist(rng));
h_in2[i] = static_cast<float_type>(dist(rng));
}
checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
int blockSize = 256;
int numBlocks = (numElements + blockSize - 1) / blockSize;
void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
// Verify Result
float_type tol = boost::math::tools::root_epsilon<float_type>();
float_type error;
float_type L1;
boost::math::quadrature::sinh_sinh<float_type> integrator;
for (int i = 0; i < numElements; ++i)
{
auto res = integrator.integrate(func, tol, &error, &L1);
if (std::isfinite(res))
{
if (boost::math::epsilon_difference(res, h_out[i]) > 300)
{
std::cout << "error at line: " << i
<< "\nParallel: " << h_out[i]
<< "\n Serial: " << res
<< "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
}
}
}
cudaFree(d_in1);
cudaFree(d_in2);
cudaFree(d_out);
delete[] h_in1;
delete[] h_in2;
delete[] h_out;
nvrtcDestroyProgram(&prog);
delete[] ptx;
cuCtxDestroy(context);
std::cout << "Kernel executed successfully." << std::endl;
return 0;
}
catch(const std::exception& e)
{
std::cerr << "Stopped with exception: " << e.what() << std::endl;
return EXIT_FAILURE;
}
}