mirror of
https://github.com/boostorg/math.git
synced 2026-01-19 04:22:09 +00:00
@@ -1,5 +1,6 @@
|
||||
[/
|
||||
Copyright (c) 2017 Nick Thompson
|
||||
Copyright (c) 2024 Matt Borland
|
||||
Use, modification and distribution are subject to the
|
||||
Boost Software License, Version 1.0. (See accompanying file
|
||||
LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
@@ -538,6 +539,30 @@ This form integrates just fine over (-log([pi]/2), +[infin]) using either the `t
|
||||
|
||||
[endsect] [/section:de_caveats Caveats]
|
||||
|
||||
[section:gpu_usage GPU Usage]
|
||||
|
||||
``
|
||||
#include <boost/math/quadrature/exp_sinh.hpp>
|
||||
|
||||
namespace boost{ namespace math{ namespace quadrature {
|
||||
|
||||
template <class F, class Real, class Policy = policies::policy<> >
|
||||
__device__ auto exp_sinh_integrate(const F& f, Real a, Real b, Real tolerance, Real* error, Real* L1, boost::math::size_t* levels)
|
||||
|
||||
template <class F, class Real, class Policy = policies::policy<> >
|
||||
__device__ auto exp_sinh_integrate(const F& f, Real tolerance, Real* error, Real* L1, boost::math::size_t* levels)
|
||||
|
||||
}}}
|
||||
``
|
||||
|
||||
Quadrature is additionally able to run on CUDA (NVCC and NVRTC) platforms.
|
||||
The major difference is outlined in the above function signatures.
|
||||
When used on device these are free standing functions instead of using OOP like on the host.
|
||||
The tables of abscissas and weights are stored in shared read only memory on the device instead of being initialized when the class is constructed.
|
||||
An example use case would be in the finite elements method computing a stiffness matrix since it would consist of many different functions.
|
||||
|
||||
[endsect] [/section:gpu_usage Usage]
|
||||
|
||||
[section:de_refes References]
|
||||
|
||||
* Hidetosi Takahasi and Masatake Mori, ['Double Exponential Formulas for Numerical Integration] Publ. Res. Inst. Math. Sci., 9 (1974), pp. 721-741.
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,4 +1,5 @@
|
||||
// Copyright Nick Thompson, 2017
|
||||
// Copyright Matt Borland, 2024
|
||||
// Use, modification and distribution are subject to the
|
||||
// Boost Software License, Version 1.0.
|
||||
// (See accompanying file LICENSE_1_0.txt
|
||||
@@ -7,6 +8,10 @@
|
||||
#ifndef BOOST_MATH_QUADRATURE_DETAIL_SINH_SINH_DETAIL_HPP
|
||||
#define BOOST_MATH_QUADRATURE_DETAIL_SINH_SINH_DETAIL_HPP
|
||||
|
||||
#include <boost/math/tools/config.hpp>
|
||||
|
||||
#ifndef BOOST_MATH_HAS_NVRTC
|
||||
|
||||
#include <cmath>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
@@ -15,7 +20,6 @@
|
||||
#include <boost/math/tools/atomic.hpp>
|
||||
#include <boost/math/policies/error_handling.hpp>
|
||||
#include <boost/math/special_functions/trunc.hpp>
|
||||
#include <boost/math/tools/config.hpp>
|
||||
|
||||
#ifdef BOOST_MATH_HAS_THREADS
|
||||
#include <mutex>
|
||||
@@ -485,4 +489,865 @@ void sinh_sinh_detail<Real, Policy>::init(const std::integral_constant<int, 4>&)
|
||||
#endif
|
||||
|
||||
}}}}
|
||||
#endif
|
||||
|
||||
#endif // BOOST_MATH_HAS_NVRTC
|
||||
|
||||
#ifdef BOOST_MATH_ENABLE_CUDA
|
||||
|
||||
#include <boost/math/tools/cstdint.hpp>
|
||||
#include <boost/math/tools/precision.hpp>
|
||||
#include <boost/math/tools/type_traits.hpp>
|
||||
#include <boost/math/constants/constants.hpp>
|
||||
#include <boost/math/special_functions/fpclassify.hpp>
|
||||
#include <boost/math/policies/error_handling.hpp>
|
||||
|
||||
namespace boost {
|
||||
namespace math {
|
||||
namespace quadrature {
|
||||
namespace detail {
|
||||
|
||||
__constant__ float m_abscissas_float_1[4] =
|
||||
{ 3.08828742e+00f, 1.48993185e+02f, 3.41228925e+06f, 2.06932577e+18f, };
|
||||
|
||||
__constant__ float m_abscissas_float_2[4] =
|
||||
{ 9.13048763e-01f, 1.41578929e+01f, 6.70421552e+03f, 9.64172533e+10f, };
|
||||
|
||||
__constant__ float m_abscissas_float_3[8] =
|
||||
{ 4.07297690e-01f, 1.68206671e+00f, 6.15089799e+00f, 4.00396235e+01f, 7.92920025e+02f, 1.02984971e+05f,
|
||||
3.03862311e+08f, 1.56544547e+14f, };
|
||||
|
||||
__constant__ float m_abscissas_float_4[16] =
|
||||
{ 1.98135272e-01f, 6.40155674e-01f, 1.24892870e+00f, 2.26608084e+00f, 4.29646270e+00f, 9.13029039e+00f,
|
||||
2.31110765e+01f, 7.42770603e+01f, 3.26720921e+02f, 2.15948569e+03f, 2.41501526e+04f, 5.31819400e+05f,
|
||||
2.80058686e+07f, 4.52406508e+09f, 3.08561257e+12f, 1.33882673e+16f, };
|
||||
|
||||
__constant__ float m_abscissas_float_5[32] =
|
||||
{ 9.83967894e-02f, 3.00605618e-01f, 5.19857979e-01f, 7.70362083e-01f, 1.07131137e+00f, 1.45056976e+00f,
|
||||
1.95077855e+00f, 2.64003177e+00f, 3.63137237e+00f, 5.11991533e+00f, 7.45666098e+00f, 1.13022613e+01f,
|
||||
1.79641069e+01f, 3.01781070e+01f, 5.40387580e+01f, 1.04107731e+02f, 2.18029520e+02f, 5.02155699e+02f,
|
||||
1.28862131e+03f, 3.73921687e+03f, 1.24750730e+04f, 4.87639975e+04f, 2.28145658e+05f, 1.30877796e+06f,
|
||||
9.46084663e+06f, 8.88883120e+07f, 1.12416883e+09f, 1.99127673e+10f, 5.16743469e+11f, 2.06721881e+13f,
|
||||
1.35061503e+15f, 1.53854066e+17f, };
|
||||
|
||||
__constant__ float m_abscissas_float_6[65] =
|
||||
{ 4.91151004e-02f, 1.48013150e-01f, 2.48938814e-01f, 3.53325424e-01f, 4.62733557e-01f, 5.78912068e-01f,
|
||||
7.03870253e-01f, 8.39965859e-01f, 9.90015066e-01f, 1.15743257e+00f, 1.34641276e+00f, 1.56216711e+00f,
|
||||
1.81123885e+00f, 2.10192442e+00f, 2.44484389e+00f, 2.85372075e+00f, 3.34645891e+00f, 3.94664582e+00f,
|
||||
4.68567310e+00f, 5.60576223e+00f, 6.76433234e+00f, 8.24038318e+00f, 1.01439436e+01f, 1.26302471e+01f,
|
||||
1.59213040e+01f, 2.03392186e+01f, 2.63584645e+01f, 3.46892633e+01f, 4.64129147e+01f, 6.32055079e+01f,
|
||||
8.77149726e+01f, 1.24209693e+02f, 1.79718635e+02f, 2.66081728e+02f, 4.03727303e+02f, 6.28811307e+02f,
|
||||
1.00707984e+03f, 1.66156823e+03f, 2.82965144e+03f, 4.98438627e+03f, 9.10154693e+03f, 1.72689266e+04f,
|
||||
3.41309958e+04f, 7.04566898e+04f, 1.52340422e+05f, 3.46047978e+05f, 8.28472421e+05f, 2.09759615e+06f,
|
||||
5.63695080e+06f, 1.61407141e+07f, 4.94473068e+07f, 1.62781052e+08f, 5.78533297e+08f, 2.23083854e+09f,
|
||||
9.38239131e+09f, 4.32814954e+10f, 2.20307274e+11f, 1.24524507e+12f, 7.86900053e+12f, 5.59953143e+13f,
|
||||
4.52148695e+14f, 4.17688952e+15f, 4.45286776e+16f, 5.52914285e+17f, 8.07573252e+18f, };
|
||||
|
||||
__constant__ float m_abscissas_float_7[129] =
|
||||
{ 2.45471558e-02f, 7.37246687e-02f, 1.23152531e-01f, 1.73000138e-01f, 2.23440665e-01f, 2.74652655e-01f,
|
||||
3.26821679e-01f, 3.80142101e-01f, 4.34818964e-01f, 4.91070037e-01f, 5.49128046e-01f, 6.09243132e-01f,
|
||||
6.71685571e-01f, 7.36748805e-01f, 8.04752842e-01f, 8.76048080e-01f, 9.51019635e-01f, 1.03009224e+00f,
|
||||
1.11373586e+00f, 1.20247203e+00f, 1.29688123e+00f, 1.39761124e+00f, 1.50538689e+00f, 1.62102121e+00f,
|
||||
1.74542840e+00f, 1.87963895e+00f, 2.02481711e+00f, 2.18228138e+00f, 2.35352849e+00f, 2.54026147e+00f,
|
||||
2.74442267e+00f, 2.96823279e+00f, 3.21423687e+00f, 3.48535896e+00f, 3.78496698e+00f, 4.11695014e+00f,
|
||||
4.48581137e+00f, 4.89677825e+00f, 5.35593629e+00f, 5.87038976e+00f, 6.44845619e+00f, 7.09990245e+00f,
|
||||
7.83623225e+00f, 8.67103729e+00f, 9.62042778e+00f, 1.07035620e+01f, 1.19433001e+01f, 1.33670142e+01f,
|
||||
1.50075962e+01f, 1.69047155e+01f, 1.91063967e+01f, 2.16710044e+01f, 2.46697527e+01f, 2.81898903e+01f,
|
||||
3.23387613e+01f, 3.72490076e+01f, 4.30852608e+01f, 5.00527965e+01f, 5.84087761e+01f, 6.84769282e+01f,
|
||||
8.06668178e+01f, 9.54992727e+01f, 1.13640120e+02f, 1.35945194e+02f, 1.63520745e+02f, 1.97804969e+02f,
|
||||
2.40678754e+02f, 2.94617029e+02f, 3.62896953e+02f, 4.49886178e+02f, 5.61444735e+02f, 7.05489247e+02f,
|
||||
8.92790773e+02f, 1.13811142e+03f, 1.46183599e+03f, 1.89233262e+03f, 2.46939604e+03f, 3.24931157e+03f,
|
||||
4.31236711e+03f, 5.77409475e+03f, 7.80224724e+03f, 1.06426753e+04f, 1.46591538e+04f, 2.03952854e+04f,
|
||||
2.86717062e+04f, 4.07403376e+04f, 5.85318231e+04f, 8.50568927e+04f, 1.25064927e+05f, 1.86137394e+05f,
|
||||
2.80525578e+05f, 4.28278249e+05f, 6.62634051e+05f, 1.03944324e+06f, 1.65385743e+06f, 2.67031565e+06f,
|
||||
4.37721203e+06f, 7.28807171e+06f, 1.23317299e+07f, 2.12155729e+07f, 3.71308625e+07f, 6.61457938e+07f,
|
||||
1.20005529e+08f, 2.21862941e+08f, 4.18228294e+08f, 8.04370413e+08f, 1.57939299e+09f, 3.16812242e+09f,
|
||||
6.49660681e+09f, 1.36285199e+10f, 2.92686390e+10f, 6.43979867e+10f, 1.45275523e+11f, 3.36285446e+11f,
|
||||
7.99420279e+11f, 1.95326423e+12f, 4.90958187e+12f, 1.27062273e+13f, 3.38907099e+13f, 9.32508403e+13f,
|
||||
2.64948942e+14f, 7.78129518e+14f, 2.36471505e+15f, 7.44413803e+15f, 2.43021724e+16f, 8.23706864e+16f,
|
||||
2.90211705e+17f, 1.06415768e+18f, 4.06627711e+18f, };
|
||||
|
||||
__constant__ float m_abscissas_float_8[259] =
|
||||
{ 1.22722792e-02f, 3.68272289e-02f, 6.14133763e-02f, 8.60515971e-02f, 1.10762884e-01f, 1.35568393e-01f,
|
||||
1.60489494e-01f, 1.85547813e-01f, 2.10765290e-01f, 2.36164222e-01f, 2.61767321e-01f, 2.87597761e-01f,
|
||||
3.13679240e-01f, 3.40036029e-01f, 3.66693040e-01f, 3.93675878e-01f, 4.21010910e-01f, 4.48725333e-01f,
|
||||
4.76847237e-01f, 5.05405685e-01f, 5.34430786e-01f, 5.63953775e-01f, 5.94007101e-01f, 6.24624511e-01f,
|
||||
6.55841151e-01f, 6.87693662e-01f, 7.20220285e-01f, 7.53460977e-01f, 7.87457528e-01f, 8.22253686e-01f,
|
||||
8.57895297e-01f, 8.94430441e-01f, 9.31909591e-01f, 9.70385775e-01f, 1.00991475e+00f, 1.05055518e+00f,
|
||||
1.09236885e+00f, 1.13542087e+00f, 1.17977990e+00f, 1.22551840e+00f, 1.27271289e+00f, 1.32144424e+00f,
|
||||
1.37179794e+00f, 1.42386447e+00f, 1.47773961e+00f, 1.53352485e+00f, 1.59132774e+00f, 1.65126241e+00f,
|
||||
1.71344993e+00f, 1.77801893e+00f, 1.84510605e+00f, 1.91485658e+00f, 1.98742510e+00f, 2.06297613e+00f,
|
||||
2.14168493e+00f, 2.22373826e+00f, 2.30933526e+00f, 2.39868843e+00f, 2.49202464e+00f, 2.58958621e+00f,
|
||||
2.69163219e+00f, 2.79843963e+00f, 2.91030501e+00f, 3.02754584e+00f, 3.15050230e+00f, 3.27953915e+00f,
|
||||
3.41504770e+00f, 3.55744805e+00f, 3.70719145e+00f, 3.86476298e+00f, 4.03068439e+00f, 4.20551725e+00f,
|
||||
4.38986641e+00f, 4.58438376e+00f, 4.78977239e+00f, 5.00679110e+00f, 5.23625945e+00f, 5.47906320e+00f,
|
||||
5.73616037e+00f, 6.00858792e+00f, 6.29746901e+00f, 6.60402117e+00f, 6.92956515e+00f, 7.27553483e+00f,
|
||||
7.64348809e+00f, 8.03511888e+00f, 8.45227058e+00f, 8.89695079e+00f, 9.37134780e+00f, 9.87784877e+00f,
|
||||
1.04190601e+01f, 1.09978298e+01f, 1.16172728e+01f, 1.22807990e+01f, 1.29921443e+01f, 1.37554055e+01f,
|
||||
1.45750793e+01f, 1.54561061e+01f, 1.64039187e+01f, 1.74244972e+01f, 1.85244301e+01f, 1.97109839e+01f,
|
||||
2.09921804e+01f, 2.23768845e+01f, 2.38749023e+01f, 2.54970927e+01f, 2.72554930e+01f, 2.91634608e+01f,
|
||||
3.12358351e+01f, 3.34891185e+01f, 3.59416839e+01f, 3.86140099e+01f, 4.15289481e+01f, 4.47120276e+01f,
|
||||
4.81918020e+01f, 5.20002465e+01f, 5.61732106e+01f, 6.07509371e+01f, 6.57786566e+01f, 7.13072704e+01f,
|
||||
7.73941341e+01f, 8.41039609e+01f, 9.15098607e+01f, 9.96945411e+01f, 1.08751694e+02f, 1.18787600e+02f,
|
||||
1.29922990e+02f, 1.42295202e+02f, 1.56060691e+02f, 1.71397955e+02f, 1.88510933e+02f, 2.07632988e+02f,
|
||||
2.29031559e+02f, 2.53013612e+02f, 2.79932028e+02f, 3.10193130e+02f, 3.44265522e+02f, 3.82690530e+02f,
|
||||
4.26094527e+02f, 4.75203518e+02f, 5.30860437e+02f, 5.94045681e+02f, 6.65901543e+02f, 7.47761337e+02f,
|
||||
8.41184173e+02f, 9.47996570e+02f, 1.07034233e+03f, 1.21074246e+03f, 1.37216724e+03f, 1.55812321e+03f,
|
||||
1.77275819e+03f, 2.02098849e+03f, 2.30865326e+03f, 2.64270219e+03f, 3.03142418e+03f, 3.48472668e+03f,
|
||||
4.01447750e+03f, 4.63492426e+03f, 5.36320995e+03f, 6.22000841e+03f, 7.23030933e+03f, 8.42439022e+03f,
|
||||
9.83902287e+03f, 1.15189746e+04f, 1.35188810e+04f, 1.59055875e+04f, 1.87610857e+04f, 2.21862046e+04f,
|
||||
2.63052621e+04f, 3.12719440e+04f, 3.72767546e+04f, 4.45564828e+04f, 5.34062659e+04f, 6.41950058e+04f,
|
||||
7.73851264e+04f, 9.35579699e+04f, 1.13446538e+05f, 1.37977827e+05f, 1.68327749e+05f, 2.05992575e+05f,
|
||||
2.52882202e+05f, 3.11442272e+05f, 3.84814591e+05f, 4.77048586e+05f, 5.93380932e+05f, 7.40606619e+05f,
|
||||
9.27573047e+05f, 1.16584026e+06f, 1.47056632e+06f, 1.86169890e+06f, 2.36558487e+06f, 3.01715270e+06f,
|
||||
3.86288257e+06f, 4.96486431e+06f, 6.40636283e+06f, 8.29948185e+06f, 1.07957589e+07f, 1.41008733e+07f,
|
||||
1.84951472e+07f, 2.43622442e+07f, 3.22295113e+07f, 4.28249388e+07f, 5.71579339e+07f, 7.66343793e+07f,
|
||||
1.03221273e+08f, 1.39683399e+08f, 1.89925150e+08f, 2.59486540e+08f, 3.56266474e+08f, 4.91582541e+08f,
|
||||
6.81731647e+08f, 9.50299811e+08f, 1.33159830e+09f, 1.87580198e+09f, 2.65667391e+09f, 3.78324022e+09f,
|
||||
5.41753185e+09f, 7.80169537e+09f, 1.12996537e+10f, 1.64614916e+10f, 2.41235400e+10f, 3.55648690e+10f,
|
||||
5.27534501e+10f, 7.87357211e+10f, 1.18256902e+11f, 1.78754944e+11f, 2.71963306e+11f, 4.16512215e+11f,
|
||||
6.42178186e+11f, 9.96872550e+11f, 1.55821233e+12f, 2.45280998e+12f, 3.88865623e+12f, 6.20986899e+12f,
|
||||
9.98992422e+12f, 1.61915800e+13f, 2.64432452e+13f, 4.35201885e+13f, 7.21888469e+13f, 1.20699764e+14f,
|
||||
2.03448372e+14f, 3.45755310e+14f, 5.92524851e+14f, 1.02405779e+15f, 1.78517405e+15f, 3.13930699e+15f,
|
||||
5.56985627e+15f, 9.97176335e+15f, 1.80168749e+16f, 3.28570986e+16f, 6.04901854e+16f, 1.12437528e+17f,
|
||||
2.11044513e+17f, 4.00073701e+17f, 7.66084936e+17f, 1.48201877e+18f, 2.89694543e+18f, 5.72279017e+18f,
|
||||
1.14268996e+19f, };
|
||||
|
||||
__constant__ float* m_abscissas_float[8] = {
|
||||
m_abscissas_float_1,
|
||||
m_abscissas_float_2,
|
||||
m_abscissas_float_3,
|
||||
m_abscissas_float_4,
|
||||
m_abscissas_float_5,
|
||||
m_abscissas_float_6,
|
||||
m_abscissas_float_7,
|
||||
m_abscissas_float_8,
|
||||
};
|
||||
|
||||
__constant__ float m_weights_float_1[4] =
|
||||
{ 7.86824160e+00f, 8.80516388e+02f, 5.39627832e+07f, 8.87651190e+19f, };
|
||||
|
||||
__constant__ float m_weights_float_2[4] =
|
||||
{ 2.39852428e+00f, 5.24459642e+01f, 6.45788782e+04f, 2.50998524e+12f, };
|
||||
|
||||
__constant__ float m_weights_float_3[8] =
|
||||
{ 1.74936958e+00f, 3.97965898e+00f, 1.84851460e+01f, 1.86488072e+02f, 5.97420570e+03f, 1.27041264e+06f,
|
||||
6.16419301e+09f, 5.23085003e+15f, };
|
||||
|
||||
__constant__ float m_weights_float_4[16] =
|
||||
{ 1.61385906e+00f, 1.99776729e+00f, 3.02023198e+00f, 5.47764184e+00f, 1.17966092e+01f, 3.03550485e+01f,
|
||||
9.58442179e+01f, 3.89387024e+02f, 2.17919325e+03f, 1.83920812e+04f, 2.63212061e+05f, 7.42729651e+06f,
|
||||
5.01587565e+08f, 1.03961087e+11f, 9.10032891e+13f, 5.06865116e+17f, };
|
||||
|
||||
__constant__ float m_weights_float_5[32] =
|
||||
{ 1.58146596e+00f, 1.66914991e+00f, 1.85752319e+00f, 2.17566262e+00f, 2.67590138e+00f, 3.44773868e+00f,
|
||||
4.64394654e+00f, 6.53020450e+00f, 9.58228502e+00f, 1.46836141e+01f, 2.35444955e+01f, 3.96352727e+01f,
|
||||
7.03763521e+01f, 1.32588012e+02f, 2.66962565e+02f, 5.79374920e+02f, 1.36869193e+03f, 3.55943572e+03f,
|
||||
1.03218668e+04f, 3.38662130e+04f, 1.27816626e+05f, 5.65408251e+05f, 2.99446204e+06f, 1.94497502e+07f,
|
||||
1.59219301e+08f, 1.69428882e+09f, 2.42715618e+10f, 4.87031785e+11f, 1.43181966e+13f, 6.48947152e+14f,
|
||||
4.80375775e+16f, 6.20009636e+18f, };
|
||||
|
||||
__constant__ float m_weights_float_6[65] =
|
||||
{ 1.57345777e+00f, 1.59489276e+00f, 1.63853652e+00f, 1.70598041e+00f, 1.79972439e+00f, 1.92332285e+00f,
|
||||
2.08159737e+00f, 2.28093488e+00f, 2.52969785e+00f, 2.83878478e+00f, 3.22239575e+00f, 3.69908136e+00f,
|
||||
4.29318827e+00f, 5.03686536e+00f, 5.97287114e+00f, 7.15853842e+00f, 8.67142780e+00f, 1.06174736e+01f,
|
||||
1.31428500e+01f, 1.64514563e+01f, 2.08309945e+01f, 2.66923599e+01f, 3.46299351e+01f, 4.55151836e+01f,
|
||||
6.06440809e+01f, 8.19729692e+01f, 1.12502047e+02f, 1.56909655e+02f, 2.22620435e+02f, 3.21638549e+02f,
|
||||
4.73757451e+02f, 7.12299455e+02f, 1.09460965e+03f, 1.72169779e+03f, 2.77592491e+03f, 4.59523007e+03f,
|
||||
7.82342759e+03f, 1.37235744e+04f, 2.48518896e+04f, 4.65553875e+04f, 9.04176678e+04f, 1.82484396e+05f,
|
||||
3.83680026e+05f, 8.42627197e+05f, 1.93843257e+06f, 4.68511285e+06f, 1.19352867e+07f, 3.21564375e+07f,
|
||||
9.19600893e+07f, 2.80222318e+08f, 9.13611083e+08f, 3.20091090e+09f, 1.21076526e+10f, 4.96902475e+10f,
|
||||
2.22431575e+11f, 1.09212534e+12f, 5.91688298e+12f, 3.55974344e+13f, 2.39435365e+14f, 1.81355107e+15f,
|
||||
1.55873671e+16f, 1.53271488e+17f, 1.73927478e+18f, 2.29884122e+19f, 3.57403070e+20f, };
|
||||
|
||||
__constant__ float m_weights_float_7[129] =
|
||||
{ 1.57146132e+00f, 1.57679017e+00f, 1.58749564e+00f, 1.60367396e+00f, 1.62547113e+00f, 1.65308501e+00f,
|
||||
1.68676814e+00f, 1.72683132e+00f, 1.77364814e+00f, 1.82766042e+00f, 1.88938482e+00f, 1.95942057e+00f,
|
||||
2.03845873e+00f, 2.12729290e+00f, 2.22683194e+00f, 2.33811466e+00f, 2.46232715e+00f, 2.60082286e+00f,
|
||||
2.75514621e+00f, 2.92706011e+00f, 3.11857817e+00f, 3.33200254e+00f, 3.56996830e+00f, 3.83549565e+00f,
|
||||
4.13205150e+00f, 4.46362211e+00f, 4.83479919e+00f, 5.25088196e+00f, 5.71799849e+00f, 6.24325042e+00f,
|
||||
6.83488580e+00f, 7.50250620e+00f, 8.25731548e+00f, 9.11241941e+00f, 1.00831875e+01f, 1.11876913e+01f,
|
||||
1.24472371e+01f, 1.38870139e+01f, 1.55368872e+01f, 1.74323700e+01f, 1.96158189e+01f, 2.21379089e+01f,
|
||||
2.50594593e+01f, 2.84537038e+01f, 3.24091185e+01f, 3.70329629e+01f, 4.24557264e+01f, 4.88367348e+01f,
|
||||
5.63712464e+01f, 6.52994709e+01f, 7.59180776e+01f, 8.85949425e+01f, 1.03788130e+02f, 1.22070426e+02f,
|
||||
1.44161210e+02f, 1.70968019e+02f, 2.03641059e+02f, 2.43645006e+02f, 2.92854081e+02f, 3.53678602e+02f,
|
||||
4.29234308e+02f, 5.23570184e+02f, 6.41976690e+02f, 7.91405208e+02f, 9.81042209e+02f, 1.22309999e+03f,
|
||||
1.53391256e+03f, 1.93546401e+03f, 2.45753455e+03f, 3.14073373e+03f, 4.04081819e+03f, 5.23488160e+03f,
|
||||
6.83029446e+03f, 8.97771323e+03f, 1.18901592e+04f, 1.58712239e+04f, 2.13571111e+04f, 2.89798371e+04f,
|
||||
3.96630673e+04f, 5.47687519e+04f, 7.63235654e+04f, 1.07371915e+05f, 1.52531667e+05f, 2.18877843e+05f,
|
||||
3.17362450e+05f, 4.65120153e+05f, 6.89253766e+05f, 1.03311989e+06f, 1.56688798e+06f, 2.40549203e+06f,
|
||||
3.73952896e+06f, 5.88912115e+06f, 9.39904635e+06f, 1.52090328e+07f, 2.49628719e+07f, 4.15775926e+07f,
|
||||
7.03070537e+07f, 1.20759856e+08f, 2.10788251e+08f, 3.74104720e+08f, 6.75449459e+08f, 1.24131674e+09f,
|
||||
2.32331003e+09f, 4.43117602e+09f, 8.61744649e+09f, 1.70983691e+10f, 3.46357452e+10f, 7.16760712e+10f,
|
||||
1.51634762e+11f, 3.28172932e+11f, 7.27110260e+11f, 1.65049955e+12f, 3.84133815e+12f, 9.17374427e+12f,
|
||||
2.24990195e+13f, 5.67153509e+13f, 1.47074225e+14f, 3.92701252e+14f, 1.08063998e+15f, 3.06767147e+15f,
|
||||
8.99238679e+15f, 2.72472254e+16f, 8.54294612e+16f, 2.77461372e+17f, 9.34529948e+17f, 3.26799612e+18f,
|
||||
1.18791443e+19f, 4.49405341e+19f, 1.77170665e+20f, };
|
||||
|
||||
__constant__ float m_weights_float_8[259] =
|
||||
{ 1.57096255e+00f, 1.57229290e+00f, 1.57495658e+00f, 1.57895955e+00f, 1.58431079e+00f, 1.59102230e+00f,
|
||||
1.59910918e+00f, 1.60858966e+00f, 1.61948515e+00f, 1.63182037e+00f, 1.64562338e+00f, 1.66092569e+00f,
|
||||
1.67776241e+00f, 1.69617233e+00f, 1.71619809e+00f, 1.73788633e+00f, 1.76128784e+00f, 1.78645779e+00f,
|
||||
1.81345587e+00f, 1.84234658e+00f, 1.87319943e+00f, 1.90608922e+00f, 1.94109632e+00f, 1.97830698e+00f,
|
||||
2.01781368e+00f, 2.05971547e+00f, 2.10411838e+00f, 2.15113585e+00f, 2.20088916e+00f, 2.25350798e+00f,
|
||||
2.30913084e+00f, 2.36790578e+00f, 2.42999091e+00f, 2.49555516e+00f, 2.56477893e+00f, 2.63785496e+00f,
|
||||
2.71498915e+00f, 2.79640147e+00f, 2.88232702e+00f, 2.97301705e+00f, 3.06874019e+00f, 3.16978367e+00f,
|
||||
3.27645477e+00f, 3.38908227e+00f, 3.50801806e+00f, 3.63363896e+00f, 3.76634859e+00f, 3.90657947e+00f,
|
||||
4.05479525e+00f, 4.21149322e+00f, 4.37720695e+00f, 4.55250922e+00f, 4.73801517e+00f, 4.93438579e+00f,
|
||||
5.14233166e+00f, 5.36261713e+00f, 5.59606472e+00f, 5.84356014e+00f, 6.10605759e+00f, 6.38458564e+00f,
|
||||
6.68025373e+00f, 6.99425915e+00f, 7.32789480e+00f, 7.68255767e+00f, 8.05975815e+00f, 8.46113023e+00f,
|
||||
8.88844279e+00f, 9.34361190e+00f, 9.82871448e+00f, 1.03460033e+01f, 1.08979234e+01f, 1.14871305e+01f,
|
||||
1.21165112e+01f, 1.27892047e+01f, 1.35086281e+01f, 1.42785033e+01f, 1.51028871e+01f, 1.59862046e+01f,
|
||||
1.69332867e+01f, 1.79494108e+01f, 1.90403465e+01f, 2.02124072e+01f, 2.14725057e+01f, 2.28282181e+01f,
|
||||
2.42878539e+01f, 2.58605342e+01f, 2.75562800e+01f, 2.93861096e+01f, 3.13621485e+01f, 3.34977526e+01f,
|
||||
3.58076454e+01f, 3.83080730e+01f, 4.10169773e+01f, 4.39541917e+01f, 4.71416602e+01f, 5.06036855e+01f,
|
||||
5.43672075e+01f, 5.84621188e+01f, 6.29216205e+01f, 6.77826252e+01f, 7.30862125e+01f, 7.88781469e+01f,
|
||||
8.52094636e+01f, 9.21371360e+01f, 9.97248336e+01f, 1.08043785e+02f, 1.17173764e+02f, 1.27204209e+02f,
|
||||
1.38235512e+02f, 1.50380485e+02f, 1.63766039e+02f, 1.78535118e+02f, 1.94848913e+02f, 2.12889407e+02f,
|
||||
2.32862309e+02f, 2.55000432e+02f, 2.79567594e+02f, 3.06863126e+02f, 3.37227087e+02f, 3.71046310e+02f,
|
||||
4.08761417e+02f, 4.50874968e+02f, 4.97960949e+02f, 5.50675821e+02f, 6.09771424e+02f, 6.76110054e+02f,
|
||||
7.50682104e+02f, 8.34626760e+02f, 9.29256285e+02f, 1.03608458e+03f, 1.15686082e+03f, 1.29360914e+03f,
|
||||
1.44867552e+03f, 1.62478326e+03f, 1.82509876e+03f, 2.05330964e+03f, 2.31371761e+03f, 2.61134924e+03f,
|
||||
2.95208799e+03f, 3.34283233e+03f, 3.79168493e+03f, 4.30817984e+03f, 4.90355562e+03f, 5.59108434e+03f,
|
||||
6.38646863e+03f, 7.30832183e+03f, 8.37874981e+03f, 9.62405722e+03f, 1.10756067e+04f, 1.27708661e+04f,
|
||||
1.47546879e+04f, 1.70808754e+04f, 1.98141031e+04f, 2.30322789e+04f, 2.68294532e+04f, 3.13194118e+04f,
|
||||
3.66401221e+04f, 4.29592484e+04f, 5.04810088e+04f, 5.94547213e+04f, 7.01854788e+04f, 8.30475173e+04f,
|
||||
9.85009981e+04f, 1.17113127e+05f, 1.39584798e+05f, 1.66784302e+05f, 1.99790063e+05f, 2.39944995e+05f,
|
||||
2.88925794e+05f, 3.48831531e+05f, 4.22297220e+05f, 5.12639825e+05f, 6.24046488e+05f, 7.61817907e+05f,
|
||||
9.32683930e+05f, 1.14521401e+06f, 1.41035265e+06f, 1.74212004e+06f, 2.15853172e+06f, 2.68280941e+06f,
|
||||
3.34498056e+06f, 4.18399797e+06f, 5.25055801e+06f, 6.61086017e+06f, 8.35163942e+06f, 1.05869253e+07f,
|
||||
1.34671524e+07f, 1.71914827e+07f, 2.20245345e+07f, 2.83191730e+07f, 3.65476782e+07f, 4.73445266e+07f,
|
||||
6.15653406e+07f, 8.03684303e+07f, 1.05328028e+08f, 1.38592169e+08f, 1.83103699e+08f, 2.42910946e+08f,
|
||||
3.23606239e+08f, 4.32947522e+08f, 5.81743297e+08f, 7.85117979e+08f, 1.06432920e+09f, 1.44938958e+09f,
|
||||
1.98286647e+09f, 2.72541431e+09f, 3.76386796e+09f, 5.22313881e+09f, 7.28378581e+09f, 1.02080964e+10f,
|
||||
1.43789932e+10f, 2.03583681e+10f, 2.89749983e+10f, 4.14577375e+10f, 5.96383768e+10f, 8.62622848e+10f,
|
||||
1.25466705e+11f, 1.83521298e+11f, 2.69981221e+11f, 3.99492845e+11f, 5.94638056e+11f, 8.90440997e+11f,
|
||||
1.34155194e+12f, 2.03376855e+12f, 3.10262796e+12f, 4.76359832e+12f, 7.36142036e+12f, 1.14512696e+13f,
|
||||
1.79331419e+13f, 2.82758550e+13f, 4.48929705e+13f, 7.17780287e+13f, 1.15585510e+14f, 1.87483389e+14f,
|
||||
3.06351036e+14f, 5.04340065e+14f, 8.36616340e+14f, 1.39855635e+15f, 2.35633575e+15f, 4.00176517e+15f,
|
||||
6.85137513e+15f, 1.18269011e+16f, 2.05867353e+16f, 3.61396878e+16f, 6.39911218e+16f, 1.14301619e+17f,
|
||||
2.05988138e+17f, 3.74584679e+17f, 6.87444303e+17f, 1.27340764e+18f, 2.38124192e+18f, 4.49583562e+18f,
|
||||
8.57144202e+18f, 1.65044358e+19f, 3.21010035e+19f, 6.30778012e+19f, 1.25240403e+20f, 2.51300530e+20f,
|
||||
5.09677626e+20f, };
|
||||
|
||||
__constant__ float* m_weights_float[8] = {
|
||||
m_weights_float_1,
|
||||
m_weights_float_2,
|
||||
m_weights_float_3,
|
||||
m_weights_float_4,
|
||||
m_weights_float_5,
|
||||
m_weights_float_6,
|
||||
m_weights_float_7,
|
||||
m_weights_float_8
|
||||
};
|
||||
|
||||
__constant__ double m_abscissas_double_1[6] =
|
||||
{ 3.088287417976322866e+00, 1.489931846492091580e+02, 3.412289247883437102e+06, 2.069325766042617791e+18,
|
||||
2.087002407609475560e+50, 2.019766160717908151e+137, };
|
||||
|
||||
__constant__ double m_abscissas_double_2[6] =
|
||||
{ 9.130487626376696748e-01, 1.415789294662811592e+01, 6.704215516223276482e+03, 9.641725327150499415e+10,
|
||||
2.508950760085778485e+30, 1.447263535710337145e+83, };
|
||||
|
||||
__constant__ double m_abscissas_double_3[12] =
|
||||
{ 4.072976900657586902e-01, 1.682066707021148743e+00, 6.150897986386729515e+00, 4.003962351929400222e+01,
|
||||
7.929200247931026321e+02, 1.029849713330979583e+05, 3.038623109252438574e+08, 1.565445474362494869e+14,
|
||||
4.042465098430219104e+23, 1.321706827429658179e+39, 4.991231782099557998e+64, 7.352943850359875966e+106, };
|
||||
|
||||
__constant__ double m_abscissas_double_4[24] =
|
||||
{ 1.981352722514781726e-01, 6.401556735005260177e-01, 1.248928698253977663e+00, 2.266080840944321232e+00,
|
||||
4.296462696702327381e+00, 9.130290387099955696e+00, 2.311107653864279933e+01, 7.427706034324012430e+01,
|
||||
3.267209207115258917e+02, 2.159485694311818716e+03, 2.415015262896413060e+04, 5.318194002756929158e+05,
|
||||
2.800586857217043323e+07, 4.524065079794338780e+09, 3.085612573980677122e+12, 1.338826733015807478e+16,
|
||||
6.254617176562341381e+20, 6.182098535814164754e+26, 3.077293649788458067e+34, 2.348957289370104303e+44,
|
||||
1.148543197899469758e+57, 2.255300070010069868e+73, 1.877919500569195394e+94, 1.367473887938624280e+121, };
|
||||
|
||||
__constant__ double m_abscissas_double_5[49] =
|
||||
{ 9.839678940067320339e-02, 3.006056176599550351e-01, 5.198579789949384900e-01, 7.703620832988877009e-01,
|
||||
1.071311369641311830e+00, 1.450569758088998445e+00, 1.950778549520360334e+00, 2.640031773695551468e+00,
|
||||
3.631372373667412273e+00, 5.119915330903350570e+00, 7.456660981404883289e+00, 1.130226126889972624e+01,
|
||||
1.796410692472772550e+01, 3.017810704601898222e+01, 5.403875800312370567e+01, 1.041077314477469548e+02,
|
||||
2.180295201202628077e+02, 5.021556986259101646e+02, 1.288621310998222420e+03, 3.739216870800548324e+03,
|
||||
1.247507297020191232e+04, 4.876399753226692124e+04, 2.281456582219130122e+05, 1.308777960064843017e+06,
|
||||
9.460846634209664077e+06, 8.888831203637279622e+07, 1.124168828974344134e+09, 1.991276729532144470e+10,
|
||||
5.167434691060984650e+11, 2.067218814203990888e+13, 1.350615033184100406e+15, 1.538540662836508188e+17,
|
||||
3.290747290540350661e+19, 1.437291381884498816e+22, 1.409832445530347286e+25, 3.459135480277971441e+28,
|
||||
2.398720582340954092e+32, 5.398806604617292960e+36, 4.613340002580628610e+41, 1.787685909667902457e+47,
|
||||
3.841984370124338536e+53, 5.752797955708583700e+60, 7.771812038427286551e+68, 1.269673044204081626e+78,
|
||||
3.495676773765731568e+88, 2.362519474971692445e+100, 6.002143893273651123e+113, 9.290716303464155539e+128,
|
||||
1.514442238033847090e+146, };
|
||||
|
||||
__constant__ double m_abscissas_double_6[98] =
|
||||
{ 4.911510035029024930e-02, 1.480131496743607333e-01, 2.489388137406836857e-01, 3.533254236926684378e-01,
|
||||
4.627335566122353259e-01, 5.789120681640963067e-01, 7.038702533860627799e-01, 8.399658591446505688e-01,
|
||||
9.900150664244376147e-01, 1.157432570143699131e+00, 1.346412759185361763e+00, 1.562167113901335551e+00,
|
||||
1.811238852782323380e+00, 2.101924419006550301e+00, 2.444843885584197934e+00, 2.853720746632915024e+00,
|
||||
3.346458910955350787e+00, 3.946645821057838387e+00, 4.685673101596678529e+00, 5.605762230908151175e+00,
|
||||
6.764332336830574204e+00, 8.240383175379985221e+00, 1.014394356129857730e+01, 1.263024714338892472e+01,
|
||||
1.592130395780345258e+01, 2.033921861921857185e+01, 2.635846445760633752e+01, 3.468926333224152409e+01,
|
||||
4.641291467019728963e+01, 6.320550793890424203e+01, 8.771497261808906374e+01, 1.242096926240411498e+02,
|
||||
1.797186347845127557e+02, 2.660817283327900190e+02, 4.037273029575712841e+02, 6.288113066545908703e+02,
|
||||
1.007079837507490594e+03, 1.661568229185114288e+03, 2.829651440786582598e+03, 4.984386266585669139e+03,
|
||||
9.101546927647810893e+03, 1.726892655475049727e+04, 3.413099578778601190e+04, 7.045668977053092802e+04,
|
||||
1.523404217761279128e+05, 3.460479782897947414e+05, 8.284724209233183002e+05, 2.097596146601193946e+06,
|
||||
5.636950798861273236e+06, 1.614071410855607245e+07, 4.944730678915060360e+07, 1.627810516820991356e+08,
|
||||
5.785332971632280838e+08, 2.230838540681955690e+09, 9.382391306064739643e+09, 4.328149544776551692e+10,
|
||||
2.203072744049242904e+11, 1.245245067109136413e+12, 7.869000534957822375e+12, 5.599531432979422461e+13,
|
||||
4.521486949902090877e+14, 4.176889516548293265e+15, 4.452867759650496656e+16, 5.529142853140498068e+17,
|
||||
8.075732516562854275e+18, 1.402046916260468698e+20, 2.925791412832239850e+21, 7.426433029335410886e+22,
|
||||
2.321996331245735364e+24, 9.064194250638442432e+25, 4.481279048819445609e+27, 2.849046304726990645e+29,
|
||||
2.367381159183355975e+31, 2.615825578455121227e+33, 3.914764948263290808e+35, 8.092042448555929219e+37,
|
||||
2.358921320940630332e+40, 9.915218648535332591e+42, 6.152851059342658764e+45, 5.780276340144515388e+48,
|
||||
8.443751734186488626e+51, 1.973343350899766708e+55, 7.605247378556219980e+58, 4.992057104939510418e+62,
|
||||
5.775863423903912316e+66, 1.221808201945355603e+71, 4.912917230387133816e+75, 3.913971813732202372e+80,
|
||||
6.456388069905286787e+85, 2.311225068528010358e+91, 1.887458157719431339e+97, 3.708483165438453094e+103,
|
||||
1.855198812283538635e+110, 2.509787873171705318e+117, 9.790423755591216617e+124, 1.179088807944050747e+133,
|
||||
4.714631846722476620e+141, 6.762657785959713240e+150, };
|
||||
|
||||
__constant__ double m_abscissas_double_7[196] =
|
||||
{ 2.454715583629863651e-02, 7.372466873903346224e-02, 1.231525309416766543e-01, 1.730001377719248556e-01,
|
||||
2.234406649596860001e-01, 2.746526549718518258e-01, 3.268216792980646669e-01, 3.801421009804789245e-01,
|
||||
4.348189637215614948e-01, 4.910700365099428407e-01, 5.491280459480215441e-01, 6.092431324382654397e-01,
|
||||
6.716855712021148069e-01, 7.367488049067938643e-01, 8.047528416336950644e-01, 8.760480802482050705e-01,
|
||||
9.510196351823332253e-01, 1.030092244532470067e+00, 1.113735859588680765e+00, 1.202472030918058876e+00,
|
||||
1.296881226496863751e+00, 1.397611241828373026e+00, 1.505386891360545205e+00, 1.621021205894798030e+00,
|
||||
1.745428403369044572e+00, 1.879638952031029331e+00, 2.024817107609328524e+00, 2.182281382147884181e+00,
|
||||
2.353528494823881355e+00, 2.540261468229626457e+00, 2.744422672171478111e+00, 2.968232787190606619e+00,
|
||||
3.214236869520657666e+00, 3.485358957907730467e+00, 3.784966983117372821e+00, 4.116950138940295100e+00,
|
||||
4.485811369388231710e+00, 4.896778246562001812e+00, 5.355936290826725948e+00, 5.870389762600956907e+00,
|
||||
6.448456189131117605e+00, 7.099902452679558236e+00, 7.836232253282841261e+00, 8.671037293575230635e+00,
|
||||
9.620427777985990363e+00, 1.070356198876799531e+01, 1.194330008139441022e+01, 1.336701421038499647e+01,
|
||||
1.500759615914396343e+01, 1.690471548203528376e+01, 1.910639668731689597e+01, 2.167100443216577994e+01,
|
||||
2.466975274695099197e+01, 2.818989025157845355e+01, 3.233876132429401745e+01, 3.724900758097245740e+01,
|
||||
4.308526084907741997e+01, 5.005279647654703975e+01, 5.840877607253876528e+01, 6.847692821534239862e+01,
|
||||
8.066681777060714848e+01, 9.549927270200249260e+01, 1.136401195769487885e+02, 1.359451944976603209e+02,
|
||||
1.635207451879744447e+02, 1.978049687912586950e+02, 2.406787535889776661e+02, 2.946170292930555023e+02,
|
||||
3.628969532147125333e+02, 4.498861782715596902e+02, 5.614447353133496106e+02, 7.054892470899271429e+02,
|
||||
8.927907732799964116e+02, 1.138111424979478376e+03, 1.461835991563605367e+03, 1.892332623444716186e+03,
|
||||
2.469396036186133479e+03, 3.249311569298824731e+03, 4.312367113170283012e+03, 5.774094754500139661e+03,
|
||||
7.802247237500851845e+03, 1.064267530975806972e+04, 1.465915383535674990e+04, 2.039528541239754835e+04,
|
||||
2.867170622421556265e+04, 4.074033762183453297e+04, 5.853182310596923393e+04, 8.505689265265206640e+04,
|
||||
1.250649269847856615e+05, 1.861373943166749766e+05, 2.805255777452010927e+05, 4.282782486084761748e+05,
|
||||
6.626340506127657304e+05, 1.039443239650339565e+06, 1.653857426112961316e+06, 2.670315650125279161e+06,
|
||||
4.377212026624358795e+06, 7.288071713698413821e+06, 1.233172993400331694e+07, 2.121557285769933699e+07,
|
||||
3.713086254861535383e+07, 6.614579377352135534e+07, 1.200055291694917110e+08, 2.218629410296880690e+08,
|
||||
4.182282939928687703e+08, 8.043704132493714804e+08, 1.579392989425668114e+09, 3.168122415524104635e+09,
|
||||
6.496606811549861323e+09, 1.362851988356444486e+10, 2.926863897008707708e+10, 6.439798665209493735e+10,
|
||||
1.452755233772903022e+11, 3.362854459389246576e+11, 7.994202785433479271e+11, 1.953264233362291960e+12,
|
||||
4.909581868242554569e+12, 1.270622730765015610e+13, 3.389070986742985764e+13, 9.325084030208844833e+13,
|
||||
2.649489423834534140e+14, 7.781295184094957195e+14, 2.364715052527355639e+15, 7.444138031465958255e+15,
|
||||
2.430217240684749635e+16, 8.237068641534357762e+16, 2.902117050664548840e+17, 1.064157679404037013e+18,
|
||||
4.066277106061960017e+18, 1.621274233630359097e+19, 6.754156830915450013e+19, 2.944056841733781919e+20,
|
||||
1.344640139549107817e+21, 6.444586158944723300e+21, 3.246218667554608934e+22, 1.721234579556653533e+23,
|
||||
9.622533890240474391e+23, 5.681407260417956671e+24, 3.548890779995928184e+25, 2.349506425672269562e+26,
|
||||
1.651618130605205643e+27, 1.235147426493113059e+28, 9.845947239792057550e+28, 8.383130781984610418e+29,
|
||||
7.639649461399172445e+30, 7.467862732233885201e+31, 7.847691482004993660e+32, 8.886032557626454704e+33,
|
||||
1.086734890678302436e+35, 1.438967777036538458e+36, 2.068168865475603521e+37, 3.234885320223912385e+38,
|
||||
5.521233641542628514e+39, 1.031148231194663855e+41, 2.113272035816365982e+42, 4.766724345485077520e+43,
|
||||
1.186961550990218287e+45, 3.273172169205847573e+46, 1.002821226769167753e+48, 3.424933903935156479e+49,
|
||||
1.308436017026428736e+51, 5.611378330048420503e+52, 2.711424806327139291e+54, 1.481771793644066442e+56,
|
||||
9.194282071042778804e+57, 6.503661455875355562e+59, 5.266329986868627303e+61, 4.902662807969347359e+63,
|
||||
5.270511057289557050e+65, 6.572856511670583316e+67, 9.553956030013225387e+69, 1.626491911159411616e+72,
|
||||
3.259410915500951223e+74, 7.728460318113614280e+76, 2.179881996905918059e+79, 7.354484388371505915e+81,
|
||||
2.984831270803957746e+84, 1.465828267813438962e+87, 8.763355972629864261e+89, 6.417909665847831130e+92,
|
||||
5.794958649229893510e+95, 6.494224472311908365e+98, 9.095000156016433698e+101, 1.603058498455299102e+105,
|
||||
3.582099119119320529e+108, 1.022441227139854687e+112, 3.756872185015086057e+115, 1.791363463832849159e+119,
|
||||
1.117641882039472124e+123, 9.202159565546528285e+126, 1.008716474827888568e+131, 1.485546487089301805e+135,
|
||||
2.966961534830566097e+139, 8.114207284664369360e+143, 3.069178087507669739e+148, 1.622223681147791473e+153, };
|
||||
|
||||
__constant__ double m_abscissas_double_8[391] =
|
||||
{ 1.227227917054637830e-02, 3.682722894492590471e-02, 6.141337626871079991e-02, 8.605159708778207907e-02,
|
||||
1.107628840017845446e-01, 1.355683934957785482e-01, 1.604894937454335489e-01, 1.855478131645089496e-01,
|
||||
2.107652898670700524e-01, 2.361642222214626268e-01, 2.617673206785495261e-01, 2.875977610631342900e-01,
|
||||
3.136792395249035647e-01, 3.400360293536632770e-01, 3.666930398731810193e-01, 3.936758776386451797e-01,
|
||||
4.210109101746846268e-01, 4.487253325041450341e-01, 4.768472367324829462e-01, 5.054056849688209375e-01,
|
||||
5.344307858825229079e-01, 5.639537752137267134e-01, 5.940071005777549000e-01, 6.246245109268716053e-01,
|
||||
6.558411510586397969e-01, 6.876936615883514922e-01, 7.202202848338683401e-01, 7.534609770949572224e-01,
|
||||
7.874575278460963461e-01, 8.222536864020499377e-01, 8.578952966595825808e-01, 8.944304405668593009e-01,
|
||||
9.319095910247435485e-01, 9.703857749817920659e-01, 1.009914747547728584e+00, 1.050555178019083150e+00,
|
||||
1.092368848786092579e+00, 1.135420868172514300e+00, 1.179779898350424466e+00, 1.225518399571142610e+00,
|
||||
1.272712892062026473e+00, 1.321444237057985065e+00, 1.371797938567245953e+00, 1.423864467614384096e+00,
|
||||
1.477739610861208115e+00, 1.533524845679288858e+00, 1.591327743938355098e+00, 1.651262406984310076e+00,
|
||||
1.713449934511288211e+00, 1.778018930286256858e+00, 1.845106047964720870e+00, 1.914856580544951899e+00,
|
||||
1.987425097349017093e+00, 2.062976132795275283e+00, 2.141684931642916785e+00, 2.223738255848994521e+00,
|
||||
2.309335258687213796e+00, 2.398688432341103821e+00, 2.492024635808356095e+00, 2.589586210645122756e+00,
|
||||
2.691632192846832444e+00, 2.798439630014497291e+00, 2.910305013902562652e+00, 3.027545839497364963e+00,
|
||||
3.150502302946919722e+00, 3.279539151967394330e+00, 3.415047703805410611e+00, 3.557448047456550733e+00,
|
||||
3.707191448649779817e+00, 3.864762978128342125e+00, 4.030684386016531344e+00, 4.205517247588613835e+00,
|
||||
4.389866408585172458e+00, 4.584383761391930748e+00, 4.789772386950687695e+00, 5.006791101261363264e+00,
|
||||
5.236259449815274050e+00, 5.479063198337523150e+00, 5.736160373884817415e+00, 6.008587916728619858e+00,
|
||||
6.297469010648863048e+00, 6.604021167380929133e+00, 6.929565150124677837e+00, 7.275534831383860972e+00,
|
||||
7.643488092123492064e+00, 8.035118882502459288e+00, 8.452270579478188130e+00, 8.896950793641785313e+00,
|
||||
9.371347797016395173e+00, 9.877848765573446033e+00, 1.041906005527762037e+01, 1.099782975900831706e+01,
|
||||
1.161727282423952258e+01, 1.228079904848924611e+01, 1.299214431196691048e+01, 1.375540545535625881e+01,
|
||||
1.457507926620621316e+01, 1.545610610104852468e+01, 1.640391874338302925e+01, 1.742449718154208970e+01,
|
||||
1.852443008688437526e+01, 1.971098388378266494e+01, 2.099218043080961648e+01, 2.237688448013982946e+01,
|
||||
2.387490225270073820e+01, 2.549709266380430464e+01, 2.725549296232531555e+01, 2.916346081119624987e+01,
|
||||
3.123583514423284962e+01, 3.348911849136805118e+01, 3.594168387985465099e+01, 3.861400990307230737e+01,
|
||||
4.152894811329303023e+01, 4.471202755441533396e+01, 4.819180202224910174e+01, 5.200024654361558757e+01,
|
||||
5.617321062537384494e+01, 6.075093706918782079e+01, 6.577865661168003966e+01, 7.130727037357721343e+01,
|
||||
7.739413413465805794e+01, 8.410396085269633392e+01, 9.150986068496734448e+01, 9.969454113547704016e+01,
|
||||
1.087516939426018897e+02, 1.187876000643037532e+02, 1.299229897614516371e+02, 1.422952015056372537e+02,
|
||||
1.560606914665002671e+02, 1.713979549326432406e+02, 1.885109325154830073e+02, 2.076329877740125935e+02,
|
||||
2.290315594654587370e+02, 2.530136115655676467e+02, 2.799320282398896912e+02, 3.101931299766730890e+02,
|
||||
3.442655222107529892e+02, 3.826905303289378387e+02, 4.260945266207607701e+02, 4.752035175892902045e+02,
|
||||
5.308604366239058864e+02, 5.940456805372995009e+02, 6.659015428338778262e+02, 7.477613367309153870e+02,
|
||||
8.411841730471343023e+02, 9.479965698013741524e+02, 1.070342331375881840e+03, 1.210742457518582660e+03,
|
||||
1.372167241552205820e+03, 1.558123212187692722e+03, 1.772758188662716282e+03, 2.020988485411862984e+03,
|
||||
2.308653259329163157e+03, 2.642702189813684273e+03, 3.031424182869210212e+03, 3.484726676985756018e+03,
|
||||
4.014477504733973505e+03, 4.634924264049394751e+03, 5.363209949773439749e+03, 6.220008412114342803e+03,
|
||||
7.230309332853029956e+03, 8.424390216735217783e+03, 9.839022871538541787e+03, 1.151897463083113988e+04,
|
||||
1.351888098874374202e+04, 1.590558745460066947e+04, 1.876108572764816176e+04, 2.218620462393366275e+04,
|
||||
2.630526205054915357e+04, 3.127194401941711057e+04, 3.727675461256652923e+04, 4.455648280312273249e+04,
|
||||
5.340626592018903930e+04, 6.419500580388918123e+04, 7.738512642386820060e+04, 9.355796993981725963e+04,
|
||||
1.134465375820669470e+05, 1.379778272209741713e+05, 1.683277485807887053e+05, 2.059925746120735305e+05,
|
||||
2.528822024503158254e+05, 3.114422718347725915e+05, 3.848145913435570736e+05, 4.770485864966822643e+05,
|
||||
5.933809324724740854e+05, 7.406066190351666115e+05, 9.275730471470643372e+05, 1.165840260940180415e+06,
|
||||
1.470566322118246135e+06, 1.861698899014921971e+06, 2.365584870298354495e+06, 3.017152695505764877e+06,
|
||||
3.862882573599929249e+06, 4.964864305589750358e+06, 6.406362829959736606e+06, 8.299481847261302115e+06,
|
||||
1.079575892642401854e+07, 1.410087327474604091e+07, 1.849514724418250100e+07, 2.436224419670805500e+07,
|
||||
3.222951131863941234e+07, 4.282493882385925337e+07, 5.715793394339267637e+07, 7.663437932745451635e+07,
|
||||
1.032212725498489699e+08, 1.396833991976194842e+08, 1.899251497664892740e+08, 2.594865396467505851e+08,
|
||||
3.562664742464501497e+08, 4.915825413172413471e+08, 6.817316470116958142e+08, 9.502998105202541438e+08,
|
||||
1.331598295343277538e+09, 1.875801976010459831e+09, 2.656673907709731487e+09, 3.783240215616365909e+09,
|
||||
5.417531848500136979e+09, 7.801695369892847510e+09, 1.129965368955098833e+10, 1.646149161390821924e+10,
|
||||
2.412353995736687694e+10, 3.556486895431927094e+10, 5.275345014093760519e+10, 7.873572108325378177e+10,
|
||||
1.182569020317863604e+11, 1.787549442508363461e+11, 2.719633064979986142e+11, 4.165122153119897946e+11,
|
||||
6.421781858205134197e+11, 9.968725497576275918e+11, 1.558212327122960399e+12, 2.452809984907093786e+12,
|
||||
3.888656232828140210e+12, 6.209868990509424909e+12, 9.989924216297983665e+12, 1.619158001378611351e+13,
|
||||
2.644324518669926559e+13, 4.352018847904374786e+13, 7.218884688202741709e+13, 1.206997640727349538e+14,
|
||||
2.034483722445207402e+14, 3.457553102874402920e+14, 5.925248511957505706e+14, 1.024057793713038672e+15,
|
||||
1.785174045941642162e+15, 3.139306988668494696e+15, 5.569856270174890128e+15, 9.971763353834460328e+15,
|
||||
1.801687491114883092e+16, 3.285709858322565542e+16, 6.049018540910759710e+16, 1.124375283211369572e+17,
|
||||
2.110445125952435305e+17, 4.000737007891229992e+17, 7.660849361564329309e+17, 1.482018770996176700e+18,
|
||||
2.896945433910857945e+18, 5.722790165693470493e+18, 1.142689960439921462e+19, 2.306616559984106723e+19,
|
||||
4.707857184616093863e+19, 9.717346347495342813e+19, 2.028735605622585444e+20, 4.284840254171000581e+20,
|
||||
9.157027329021623836e+20, 1.980457834766411777e+21, 4.335604886702252004e+21, 9.609258559714223995e+21,
|
||||
2.156604630608586997e+22, 4.902045909695270289e+22, 1.128749227121328467e+23, 2.633414623049930879e+23,
|
||||
6.226335684490998543e+23, 1.492205279014148921e+24, 3.625768249717590109e+24, 8.933899764961444882e+24,
|
||||
2.232786981682262383e+25, 5.661295336293986732e+25, 1.456616710298133142e+26, 3.803959852868488245e+26,
|
||||
1.008531585603036490e+27, 2.715247425129423358e+27, 7.425071766766651967e+27, 2.062860712173225003e+28,
|
||||
5.824055458799413312e+28, 1.671388836696436644e+29, 4.876830632023956392e+29, 1.447170071146107156e+30,
|
||||
4.368562208925583783e+30, 1.341873806249251338e+31, 4.195251632754338682e+31, 1.335360134828214136e+32,
|
||||
4.328681350715136340e+32, 1.429401866150319186e+33, 4.809736146227180696e+33, 1.649624114567602575e+34,
|
||||
5.768677492419801469e+34, 2.057442854162761350e+35, 7.486423509917811063e+35, 2.780052791791155051e+36,
|
||||
1.053908347660081874e+37, 4.080046334235754223e+37, 1.613553311592805373e+38, 6.520836332997615098e+38,
|
||||
2.693848186257510992e+39, 1.138002408430710800e+40, 4.917748008813924613e+40, 2.174691073191358676e+41,
|
||||
9.844523745430526502e+41, 4.563707467590116732e+42, 2.167352073708379137e+43, 1.054860193887170754e+44,
|
||||
5.263588225566847365e+44, 2.693772458797916623e+45, 1.414506760560163074e+46, 7.624126763512016620e+46,
|
||||
4.219828148762794411e+47, 2.399387665831793264e+48, 1.402139947254117434e+49, 8.424706325525422943e+49,
|
||||
5.206918479942619318e+50, 3.311787866477716151e+51, 2.168683295509859155e+52, 1.462786368779206713e+53,
|
||||
1.016761784575838363e+54, 7.286460995145043184e+54, 5.386194237448865407e+55, 4.108917480528740640e+56,
|
||||
3.236445625945552728e+57, 2.633440652417619669e+58, 2.214702339357939268e+59, 1.926058995948268392e+60,
|
||||
1.733067740414174932e+61, 1.614307160124426969e+62, 1.557464328486352138e+63, 1.557226155197192031e+64,
|
||||
1.614473962707995344e+65, 1.736617406327386105e+66, 1.939201243451190521e+67, 2.249277732936622876e+68,
|
||||
2.711593798719765599e+69, 3.399628732048687119e+70, 4.435389696730206291e+71, 6.025566076164003981e+72,
|
||||
8.529161425383779849e+73, 1.258746322992988688e+75, 1.938112175186560210e+76, 3.115432363572610661e+77,
|
||||
5.231797674434390018e+78, 9.184930207860680757e+79, 1.686929404780378772e+81, 3.243565624474232635e+82,
|
||||
6.533812498930220075e+83, 1.379898823144620314e+85, 3.057650444842839916e+86, 7.114050545839171245e+87,
|
||||
1.739275024442258674e+89, 4.471782915853177804e+90, 1.210036789494028144e+92, 3.448828044590862359e+93,
|
||||
1.036226783750561565e+95, 3.284801914751206038e+96, 1.099514933602224638e+98, 3.889581731378242597e+99,
|
||||
1.455434287901069991e+101, 5.765729934387419019e+102, 2.420349568745475582e+104, 1.077606625929777536e+106,
|
||||
5.093346988695851845e+107, 2.558090824110323997e+109, 1.366512508719047964e+111, 7.771735800763526406e+112,
|
||||
4.710398638793014918e+114, 3.045563885587013954e+116, 2.102762552861442993e+118, 1.551937536212596136e+120,
|
||||
1.225676354426075970e+122, 1.036950946169703711e+124, 9.407885268970827717e+125, 9.163369107785093171e+127,
|
||||
9.592531095671168926e+129, 1.080486293361823875e+132, 1.311034829557782450e+134, 1.715642975932639188e+136,
|
||||
2.424231742707881878e+138, 3.703231223333127919e+140, 6.123225027409988902e+142, 1.097271040771196765e+145,
|
||||
2.133693643241295977e+147, 4.508099184895777328e+149, 1.036252806686291189e+152, };
|
||||
|
||||
__constant__ double* m_abscissas_double[8] = {
|
||||
m_abscissas_double_1,
|
||||
m_abscissas_double_2,
|
||||
m_abscissas_double_3,
|
||||
m_abscissas_double_4,
|
||||
m_abscissas_double_5,
|
||||
m_abscissas_double_6,
|
||||
m_abscissas_double_7,
|
||||
m_abscissas_double_8,
|
||||
};
|
||||
|
||||
__constant__ double m_weights_double_1[6] =
|
||||
{ 7.868241604839621507e+00, 8.805163880733011116e+02, 5.396278323520705668e+07, 8.876511896968161317e+19,
|
||||
2.432791879269225553e+52, 6.399713512080202911e+139, };
|
||||
|
||||
__constant__ double m_weights_double_2[6] =
|
||||
{ 2.398524276302635218e+00, 5.244596423726681022e+01, 6.457887819598201760e+04, 2.509985242511374506e+12,
|
||||
1.774029269327138701e+32, 2.781406115983097314e+85, };
|
||||
|
||||
__constant__ double m_weights_double_3[12] =
|
||||
{ 1.749369583108386852e+00, 3.979658981934607813e+00, 1.848514598574449570e+01, 1.864880718932067988e+02,
|
||||
5.974205695263265855e+03, 1.270412635144623341e+06, 6.164193014295984071e+09, 5.230850031811222530e+15,
|
||||
2.226260929943369774e+25, 1.199931102042181592e+41, 7.470602144275146214e+66, 1.814465860528410676e+109, };
|
||||
|
||||
__constant__ double m_weights_double_4[24] =
|
||||
{ 1.613859062188366173e+00, 1.997767291869673262e+00, 3.020231979908834220e+00, 5.477641843859057761e+00,
|
||||
1.179660916492671672e+01, 3.035504848518598294e+01, 9.584421793794920860e+01, 3.893870238229992076e+02,
|
||||
2.179193250357911344e+03, 1.839208123964132852e+04, 2.632120612599856167e+05, 7.427296507169468210e+06,
|
||||
5.015875648341232356e+08, 1.039610867241544113e+11, 9.100328911818091977e+13, 5.068651163890231571e+17,
|
||||
3.039966520714902616e+22, 3.857740194672007962e+28, 2.465542763666581087e+36, 2.416439449167799461e+46,
|
||||
1.517091553926604149e+59, 3.825043412021411380e+75, 4.089582396821598640e+96, 3.823775894295564050e+123, };
|
||||
|
||||
__constant__ double m_weights_double_5[49] =
|
||||
{ 1.581465959536694744e+00, 1.669149910438534746e+00, 1.857523188595005770e+00, 2.175662623626994120e+00,
|
||||
2.675901375211020564e+00, 3.447738682498791744e+00, 4.643946540355464126e+00, 6.530204496574248616e+00,
|
||||
9.582285015566804961e+00, 1.468361407515440960e+01, 2.354449548740987533e+01, 3.963527273305166705e+01,
|
||||
7.037635206267538547e+01, 1.325880124784838868e+02, 2.669625649541569172e+02, 5.793749198508472676e+02,
|
||||
1.368691928321303605e+03, 3.559435721533130554e+03, 1.032186677270763318e+04, 3.386621302858741487e+04,
|
||||
1.278166259840246830e+05, 5.654082513926693098e+05, 2.994462044781721833e+06, 1.944975023421914947e+07,
|
||||
1.592193007690560588e+08, 1.694288818617459913e+09, 2.427156182311303271e+10, 4.870317848199455490e+11,
|
||||
1.431819656229181793e+13, 6.489471523099301256e+14, 4.803757752508989106e+16, 6.200096361305331541e+18,
|
||||
1.502568562439914899e+21, 7.436061367189688251e+23, 8.264761218677928603e+26, 2.297735027897804345e+30,
|
||||
1.805449779569534997e+34, 4.604472360199061931e+38, 4.458371212030626854e+43, 1.957638261114809309e+49,
|
||||
4.767368137162500764e+55, 8.088820139476721285e+62, 1.238260897349286357e+71, 2.292272505278842062e+80,
|
||||
7.151392373749193549e+90, 5.476714850156044431e+102, 1.576655618370700681e+116, 2.765448595957851958e+131,
|
||||
5.108051255283132673e+148, };
|
||||
|
||||
__constant__ double m_weights_double_6[98] =
|
||||
{ 1.573457773573108386e+00, 1.594892755038663787e+00, 1.638536515530234742e+00, 1.705980408212213620e+00,
|
||||
1.799724394608737275e+00, 1.923322854425656307e+00, 2.081597373313268178e+00, 2.280934883790070511e+00,
|
||||
2.529697852387704655e+00, 2.838784782552951185e+00, 3.222395745020980612e+00, 3.699081358854235112e+00,
|
||||
4.293188274330526800e+00, 5.036865356322330076e+00, 5.972871140910932199e+00, 7.158538424311077564e+00,
|
||||
8.671427800892076385e+00, 1.061747360297922326e+01, 1.314285002260235600e+01, 1.645145625668428040e+01,
|
||||
2.083099449998189069e+01, 2.669235989791640190e+01, 3.462993514791378189e+01, 4.551518362653662579e+01,
|
||||
6.064408087764392116e+01, 8.197296917485846798e+01, 1.125020468081652564e+02, 1.569096552844714123e+02,
|
||||
2.226204347868638276e+02, 3.216385489504077755e+02, 4.737574505945461739e+02, 7.122994548146997637e+02,
|
||||
1.094609652686376553e+03, 1.721697789176049576e+03, 2.775924909253835146e+03, 4.595230066268149347e+03,
|
||||
7.823427586641573672e+03, 1.372357435269105405e+04, 2.485188961645119553e+04, 4.655538745425972783e+04,
|
||||
9.041766782135686884e+04, 1.824843964862728392e+05, 3.836800264094614027e+05, 8.426271970245168026e+05,
|
||||
1.938432574158782634e+06, 4.685112849356485528e+06, 1.193528667218607927e+07, 3.215643752247989316e+07,
|
||||
9.196008928386600386e+07, 2.802223178457559964e+08, 9.136110825267458886e+08, 3.200910900783148591e+09,
|
||||
1.210765264234723689e+10, 4.969024745093101808e+10, 2.224315751863855216e+11, 1.092125344449313660e+12,
|
||||
5.916882980019919359e+12, 3.559743438494577249e+13, 2.394353652945465191e+14, 1.813551073517501917e+15,
|
||||
1.558736706166165738e+16, 1.532714875555114333e+17, 1.739274776190789212e+18, 2.298841216802216313e+19,
|
||||
3.574030698837762664e+20, 6.604899705451419080e+21, 1.467155879591820659e+23, 3.964094964398509381e+24,
|
||||
1.319342840595348793e+26, 5.482251971340400742e+27, 2.885137894723827518e+29, 1.952539840765392110e+31,
|
||||
1.727051489032222797e+33, 2.031343507095439396e+35, 3.236074146972599980e+37, 7.120487412983497200e+39,
|
||||
2.209552707411017265e+42, 9.886282647791384648e+44, 6.530514048788273529e+47, 6.530706672481546528e+50,
|
||||
1.015518807431281951e+54, 2.526366773162394510e+57, 1.036450519906790297e+61, 7.241966032627135861e+64,
|
||||
8.919402520769714938e+68, 2.008463619152992905e+73, 8.596914764830260020e+77, 7.290599546829495220e+82,
|
||||
1.280199563216419112e+88, 4.878349285603201150e+93, 4.240828248064127940e+99, 8.869771764721598720e+105,
|
||||
4.723342575741417669e+112, 6.802035963326188581e+119, 2.824531180990009549e+127, 3.621049216745982252e+135,
|
||||
1.541270150334942520e+144, 2.353376995174362785e+153, };
|
||||
|
||||
__constant__ double m_weights_double_7[196] =
|
||||
{ 1.571461316550783294e+00, 1.576790166316938345e+00, 1.587495640370383316e+00, 1.603673956341370210e+00,
|
||||
1.625471125457493943e+00, 1.653085011915939302e+00, 1.686768142525911236e+00, 1.726831323537516202e+00,
|
||||
1.773648138667236602e+00, 1.827660421478661448e+00, 1.889384817044018196e+00, 1.959420572855037091e+00,
|
||||
2.038458728047908923e+00, 2.127292904083847225e+00, 2.226831940199076941e+00, 2.338114664555130296e+00,
|
||||
2.462327148722991304e+00, 2.600822860927085164e+00, 2.755146214814554359e+00, 2.927060108424483555e+00,
|
||||
3.118578166240921951e+00, 3.332002540339506630e+00, 3.569968300410740276e+00, 3.835495653996447262e+00,
|
||||
4.132051496512934885e+00, 4.463622106699067881e+00, 4.834799191008006557e+00, 5.250881957765679608e+00,
|
||||
5.717998490875333124e+00, 6.243250421598568105e+00, 6.834885801226541839e+00, 7.502506202789340802e+00,
|
||||
8.257315484493544201e+00, 9.112419405864642634e+00, 1.008318749543997758e+01, 1.118769134993865202e+01,
|
||||
1.244723705914106881e+01, 1.388701390605507587e+01, 1.553688715915900190e+01, 1.743237000680942831e+01,
|
||||
1.961581894823993424e+01, 2.213790886354273806e+01, 2.505945934677137610e+01, 2.845370377742137561e+01,
|
||||
3.240911845969524834e+01, 3.703296289480230161e+01, 4.245572644746267911e+01, 4.883673480337985582e+01,
|
||||
5.637124640586975420e+01, 6.529947092752610340e+01, 7.591807755694122837e+01, 8.859494252391663822e+01,
|
||||
1.037881295005788124e+02, 1.220704263969226746e+02, 1.441612098131200535e+02, 1.709680191245773511e+02,
|
||||
2.036410593843575570e+02, 2.436450058708723643e+02, 2.928540812182076105e+02, 3.536786019152253392e+02,
|
||||
4.292343083967296939e+02, 5.235701840488733027e+02, 6.419766898003024575e+02, 7.914052083668759283e+02,
|
||||
9.810422089081931637e+02, 1.223099994999740393e+03, 1.533912555427112127e+03, 1.935464013605830339e+03,
|
||||
2.457534549912886852e+03, 3.140733731623635519e+03, 4.040818188564651898e+03, 5.234881599712225681e+03,
|
||||
6.830294457607329226e+03, 8.977713228649887143e+03, 1.189015920967326839e+04, 1.587122387044346962e+04,
|
||||
2.135711106445789331e+04, 2.897983705189681437e+04, 3.966306726795547950e+04, 5.476875193750000787e+04,
|
||||
7.632356539388055680e+04, 1.073719149754976951e+05, 1.525316674555574152e+05, 2.188778434744216586e+05,
|
||||
3.173624496019295608e+05, 4.651201525869328462e+05, 6.892537656280580572e+05, 1.033119885120019982e+06,
|
||||
1.566887981043252499e+06, 2.405492027026531795e+06, 3.739528964815910340e+06, 5.889121154895580032e+06,
|
||||
9.399046351922342030e+06, 1.520903276129653518e+07, 2.496287187293576168e+07, 4.157759259963074840e+07,
|
||||
7.030705366950267312e+07, 1.207598558452493366e+08, 2.107882509464846833e+08, 3.741047199023457864e+08,
|
||||
6.754494594987415572e+08, 1.241316740415880537e+09, 2.323310032649552862e+09, 4.431176019026625759e+09,
|
||||
8.617446487400900130e+09, 1.709836906604031513e+10, 3.463574521880171339e+10, 7.167607123799270726e+10,
|
||||
1.516347620910054079e+11, 3.281729323238950526e+11, 7.271102600298280790e+11, 1.650499552378780378e+12,
|
||||
3.841338149508803917e+12, 9.173744267785176575e+12, 2.249901946357519979e+13, 5.671535089900611731e+13,
|
||||
1.470742250307697019e+14, 3.927012518464311775e+14, 1.080639977391212820e+15, 3.067671466720475189e+15,
|
||||
8.992386789198328428e+15, 2.724722536524592111e+16, 8.542946122263389258e+16, 2.774613718725574755e+17,
|
||||
9.345299479382029121e+17, 3.267996122987731882e+18, 1.187914433455468315e+19, 4.494053408418564214e+19,
|
||||
1.771706652195486743e+20, 7.288102552885931527e+20, 3.132512430816625349e+21, 1.408743767951073110e+22,
|
||||
6.638294268236060414e+22, 3.282543608403565013e+23, 1.705920098038394064e+24, 9.332259385148524285e+24,
|
||||
5.382727175874888312e+25, 3.278954235122093249e+26, 2.113191697957458099e+27, 1.443411041499643040e+28,
|
||||
1.046864394654982423e+29, 8.077319226958905700e+29, 6.643146963432616277e+30, 5.835670121359986260e+31,
|
||||
5.486890296790230798e+32, 5.533726968508261614e+33, 5.999734996418352834e+34, 7.009176119466122569e+35,
|
||||
8.844061966424597499e+36, 1.208226860869605961e+38, 1.791648514311063338e+39, 2.891313916713205762e+40,
|
||||
5.091457860211527298e+41, 9.810630588402496553e+42, 2.074441239147378860e+44, 4.827650116937700540e+45,
|
||||
1.240287939111549029e+47, 3.528782858644784616e+48, 1.115449490471696659e+50, 3.930510643328196314e+51,
|
||||
1.549243712957852337e+53, 6.854998238041301002e+54, 3.417479961583207704e+56, 1.926905498641079990e+58,
|
||||
1.233580963004919450e+60, 9.002819902898076915e+61, 7.521415141253441645e+63, 7.224277554900578993e+65,
|
||||
8.012832830535078610e+67, 1.030999620286380369e+70, 1.546174957076748679e+72, 2.715803772613248694e+74,
|
||||
5.615089920571746438e+76, 1.373667859345343337e+79, 3.997541020769625126e+81, 1.391500589339800087e+84,
|
||||
5.826693844912022892e+86, 2.952274820929549096e+89, 1.821023061478466282e+92, 1.375973022137941526e+95,
|
||||
1.281852367543412945e+98, 1.482130127201990503e+101, 2.141574273792435314e+104, 3.894495540947112380e+107,
|
||||
8.978646362580102961e+110, 2.644131589807244050e+114, 1.002403539841913834e+118, 4.931412804903905259e+121,
|
||||
3.174401112435865044e+125, 2.696624001761892390e+129, 3.049799322320447166e+133, 4.634041526818687785e+137,
|
||||
9.548983134803106512e+141, 2.694404866192089829e+146, 1.051502720036395325e+151, 5.734170640626244955e+155, };
|
||||
|
||||
__constant__ double m_weights_double_8[391] =
|
||||
{ 1.570962550997832611e+00, 1.572292902367211961e+00, 1.574956581912666755e+00, 1.578959553636163985e+00,
|
||||
1.584310789563614305e+00, 1.591022301117035107e+00, 1.599109181186160337e+00, 1.608589657109067468e+00,
|
||||
1.619485154826419743e+00, 1.631820374530739318e+00, 1.645623378191125679e+00, 1.660925689395424109e+00,
|
||||
1.677762406016463717e+00, 1.696172326277082973e+00, 1.716198088860732467e+00, 1.737886327791014562e+00,
|
||||
1.761287842885152410e+00, 1.786457786673686420e+00, 1.813455868772335587e+00, 1.842346578792652542e+00,
|
||||
1.873199428986627521e+00, 1.906089217937612619e+00, 1.941096316736779451e+00, 1.978306979221816566e+00,
|
||||
2.017813678003844337e+00, 2.059715468170813895e+00, 2.104118380732327493e+00, 2.151135848063375554e+00,
|
||||
2.200889163814591418e+00, 2.253507979986114202e+00, 2.309130844113053375e+00, 2.367905779785113334e+00,
|
||||
2.429990914023652954e+00, 2.495555155369085590e+00, 2.564778926893134514e+00, 2.637854958747451684e+00,
|
||||
2.714989145296268067e+00, 2.796401472360280536e+00, 2.882327020626578700e+00, 2.973017051860293803e+00,
|
||||
3.068740185193628238e+00, 3.169783671473487386e+00, 3.276454774427328601e+00, 3.389082268266156098e+00,
|
||||
3.508018062292869136e+00, 3.633638964133530274e+00, 3.766348594369884204e+00, 3.906579466636309289e+00,
|
||||
4.054795248667541120e+00, 4.211493221360917802e+00, 4.377206954666462219e+00, 4.552509221059946388e+00,
|
||||
4.738015169510782826e+00, 4.934385785253587887e+00, 5.142331663338191074e+00, 5.362617126899976224e+00,
|
||||
5.596064724397100194e+00, 5.843560143744373307e+00, 6.106057585381734693e+00, 6.384585640900671436e+00,
|
||||
6.680253728973824449e+00, 6.994259146058412709e+00, 7.327894795748901060e+00, 7.682557667824588764e+00,
|
||||
8.059758146071137270e+00, 8.461130232962342889e+00, 8.888442789395671080e+00, 9.343611899025485155e+00,
|
||||
9.828714479494622022e+00, 1.034600327721380625e+01, 1.089792339849122916e+01, 1.148713054801325790e+01,
|
||||
1.211651116619788555e+01, 1.278920468010096321e+01, 1.350862810871281096e+01, 1.427850329305334421e+01,
|
||||
1.510288705493181327e+01, 1.598620462612703196e+01, 1.693328673269081128e+01, 1.794941076780000506e+01,
|
||||
1.904034654190823159e+01, 2.021240716182964334e+01, 2.147250566192247370e+01, 2.282821809199713505e+01,
|
||||
2.428785385941680425e+01, 2.586053422878117785e+01, 2.755628000354674426e+01, 2.938610955221109564e+01,
|
||||
3.136214849990951329e+01, 3.349775258749912582e+01, 3.580764540799625468e+01, 3.830807296872530167e+01,
|
||||
4.101697730155473447e+01, 4.395419165876113623e+01, 4.714166019494196927e+01, 5.060368545366659226e+01,
|
||||
5.436720746019445252e+01, 5.846211877912138439e+01, 6.292162054058128784e+01, 6.778262518512416663e+01,
|
||||
7.308621254265223015e+01, 7.887814686488147292e+01, 8.520946359734658334e+01, 9.213713603387774717e+01,
|
||||
9.972483357670754649e+01, 1.080437851679046426e+02, 1.171737636088621692e+02, 1.272042089988687372e+02,
|
||||
1.382355124664102373e+02, 1.503804848151483311e+02, 1.637660387526102742e+02, 1.785351181233383403e+02,
|
||||
1.948489131607280604e+02, 2.128894073598352670e+02, 2.328623093447990790e+02, 2.550004322843281994e+02,
|
||||
2.795675942672445782e+02, 3.068631259124280934e+02, 3.372270867451200874e+02, 3.710463099965576255e+02,
|
||||
4.087614170466174911e+02, 4.508749684194593670e+02, 4.979609488959773491e+02, 5.506758209385785877e+02,
|
||||
6.097714244663179092e+02, 6.761100535726473685e+02, 7.506821038741422446e+02, 8.346267600518081192e+02,
|
||||
9.292562845315541998e+02, 1.036084578498234728e+03, 1.156860819661897657e+03, 1.293609142453808600e+03,
|
||||
1.448675521854205144e+03, 1.624783259532197615e+03, 1.825098759915318560e+03, 2.053309635972617554e+03,
|
||||
2.313717614494777200e+03, 2.611349236640186999e+03, 2.952087994093624299e+03, 3.342832332560548180e+03,
|
||||
3.791684927756595099e+03, 4.308179838716318955e+03, 4.903555624570201673e+03, 5.591084343634811452e+03,
|
||||
6.386468625571246341e+03, 7.308321829412979440e+03, 8.378749812799703561e+03, 9.624057218749638059e+03,
|
||||
1.107560666191146008e+04, 1.277086605445904388e+04, 1.475468792019489452e+04, 1.708087537417066343e+04,
|
||||
1.981410309695485051e+04, 2.303227888204754908e+04, 2.682945317928632535e+04, 3.131941178398428200e+04,
|
||||
3.664012209706997997e+04, 4.295924836668690170e+04, 5.048100882639843572e+04, 5.945472133180055290e+04,
|
||||
7.018547875172689579e+04, 8.304751726175694003e+04, 9.850099805053575446e+04, 1.171131266261766060e+05,
|
||||
1.395847982160589845e+05, 1.667843016393077556e+05, 1.997900626520524686e+05, 2.399449946032992187e+05,
|
||||
2.889257939838013232e+05, 3.488315309194304548e+05, 4.222972201496778447e+05, 5.126398246369253619e+05,
|
||||
6.240464876221989792e+05, 7.618179073233615941e+05, 9.326839300224119257e+05, 1.145214007774297539e+06,
|
||||
1.410352646274233119e+06, 1.742120041875863385e+06, 2.158531716934287014e+06, 2.682809410126426731e+06,
|
||||
3.344980563595418861e+06, 4.183997972337706048e+06, 5.250558008165501752e+06, 6.610860174141680988e+06,
|
||||
8.351639423967558693e+06, 1.058692532393929900e+07, 1.346715235106239409e+07, 1.719148271024263021e+07,
|
||||
2.202453449027701694e+07, 2.831917301724337797e+07, 3.654767820268344932e+07, 4.734452657230626106e+07,
|
||||
6.156534063509513873e+07, 8.036843026897869248e+07, 1.053280284359690289e+08, 1.385921689084126286e+08,
|
||||
1.831036985925683524e+08, 2.429109457458640820e+08, 3.236062393759667463e+08, 4.329475218599986663e+08,
|
||||
5.817432967962929479e+08, 7.851179789388191786e+08, 1.064329197627075307e+09, 1.449389582912945485e+09,
|
||||
1.982866469377991849e+09, 2.725414314698094324e+09, 3.763867964111621444e+09, 5.223138814950990937e+09,
|
||||
7.283785810644397704e+09, 1.020809642381158743e+10, 1.437899318470510521e+10, 2.035836812543633578e+10,
|
||||
2.897499827080027444e+10, 4.145773751645494878e+10, 5.963837683872426287e+10, 8.626228483915530800e+10,
|
||||
1.254667045389825180e+11, 1.835212982264913186e+11, 2.699812207400151604e+11, 3.994928452151922954e+11,
|
||||
5.946380558701434550e+11, 8.904409967424091107e+11, 1.341551941677775838e+12, 2.033768550332151892e+12,
|
||||
3.102627959875753214e+12, 4.763598321705862063e+12, 7.361420360560813584e+12, 1.145126961456557423e+13,
|
||||
1.793314186996273926e+13, 2.827585501285792232e+13, 4.489297053678444669e+13, 7.177802872658499571e+13,
|
||||
1.155855098545820625e+14, 1.874833886367883093e+14, 3.063510356402174454e+14, 5.043400653005970242e+14,
|
||||
8.366163396892429890e+14, 1.398556351640947289e+15, 2.356335749516164682e+15, 4.001765167382637456e+15,
|
||||
6.851375128404941445e+15, 1.182690111761543990e+16, 2.058673527013806443e+16, 3.613968784314904633e+16,
|
||||
6.399112184394213551e+16, 1.143016185628376923e+17, 2.059881383915666443e+17, 3.745846788353680914e+17,
|
||||
6.874443034683149068e+17, 1.273407643613485314e+18, 2.381241916829895366e+18, 4.495835617307108399e+18,
|
||||
8.571442024901952701e+18, 1.650443584181656965e+19, 3.210100352421317851e+19, 6.307780124442703091e+19,
|
||||
1.252404031157661279e+20, 2.513005295649985394e+20, 5.096776255690838436e+20, 1.045019200016673046e+21,
|
||||
2.166476479260878466e+21, 4.542138145678395463e+21, 9.632082324449137128e+21, 2.066386536688254528e+22,
|
||||
4.485529785554428251e+22, 9.853879573610977508e+22, 2.191158874464374408e+23, 4.932835964390971668e+23,
|
||||
1.124501529971774363e+24, 2.596269136156756008e+24, 6.072292938313625501e+24, 1.438989066308003836e+25,
|
||||
3.455841956406570469e+25, 8.412655191713576490e+25, 2.076289061650816510e+26, 5.196515024640220322e+26,
|
||||
1.319173194089644043e+27, 3.397455895980380794e+27, 8.879057454438503591e+27, 2.355272361492064126e+28,
|
||||
6.342762007722624824e+28, 1.734531093990859705e+29, 4.817893170606830871e+29, 1.359597346490148232e+30,
|
||||
3.898969689906500392e+30, 1.136542986529989936e+31, 3.368450043991780017e+31, 1.015304084709817260e+32,
|
||||
3.113144376221918237e+32, 9.713072739730140403e+32, 3.084517643581725946e+33, 9.972682139820497284e+33,
|
||||
3.283625052288491586e+34, 1.101378785390827536e+35, 3.764333367592714297e+35, 1.311403465938242926e+36,
|
||||
4.658135710682813672e+36, 1.687517347470511392e+37, 6.237053685018323490e+37, 2.352571314427744869e+38,
|
||||
9.058938240219699936e+38, 3.562249097611136071e+39, 1.430959291578558210e+40, 5.873974584984375049e+40,
|
||||
2.464828549811283787e+41, 1.057649203090855628e+42, 4.642475639281078035e+42, 2.085287118272421779e+43,
|
||||
9.588439985186632177e+43, 4.514982011246092280e+44, 2.177974048341973204e+45, 1.076720976822900458e+46,
|
||||
5.457267432929085589e+46, 2.836869270455781134e+47, 1.513103201392011626e+48, 8.283974667225617075e+48,
|
||||
4.657239491995971344e+49, 2.689796370712836937e+50, 1.596597846911970388e+51, 9.744154538256586629e+51,
|
||||
6.117238394843313065e+52, 3.952049650585241827e+53, 2.628701592074258213e+54, 1.800990196502679393e+55,
|
||||
1.271554462563068383e+56, 9.255880104477760711e+56, 6.949737920133919393e+57, 5.385167200769965621e+58,
|
||||
4.308493668102978774e+59, 3.560951557542178371e+60, 3.041888528384649992e+61, 2.687094441930837189e+62,
|
||||
2.455920538900000855e+63, 2.323648254168641537e+64, 2.277129741584892331e+65, 2.312633552913224734e+66,
|
||||
2.435407592981291129e+67, 2.660910388822465246e+68, 3.018105943423533920e+69, 3.555823489510192503e+70,
|
||||
4.354188877793849013e+71, 5.544975795511813315e+72, 7.348276481909886336e+73, 1.013998025722423261e+75,
|
||||
1.457911462244607943e+76, 2.185488876819505295e+77, 3.418022153286623008e+78, 5.580843920601835728e+79,
|
||||
9.519586502799733908e+80, 1.697573578247197786e+82, 3.166906670990180014e+83, 6.185099106418675430e+84,
|
||||
1.265541134386934377e+86, 2.714828965877756899e+87, 6.110386802964494082e+88, 1.444054086171083239e+90,
|
||||
3.586083726638388165e+91, 9.365231868063239600e+92, 2.574080116205122449e+94, 7.452134689862302719e+95,
|
||||
2.274309903836169819e+97, 7.323011134121164749e+98, 2.489816421737932462e+100, 8.946533386359281588e+101,
|
||||
3.400401372391165979e+103, 1.368288186208928217e+105, 5.834277489829591931e+106, 2.638486937672383424e+108,
|
||||
1.266728882767139521e+110, 6.462225178314182803e+111, 3.506432320607573604e+113, 2.025608933943268165e+115,
|
||||
1.247041677084784707e+117, 8.189865188405279038e+118, 5.743610894406099965e+120, 4.305808934084489763e+122,
|
||||
3.454156966079496755e+124, 2.968316601530352737e+126, 2.735456242372183592e+128, 2.706317176690077847e+130,
|
||||
2.877679916342060385e+132, 3.292412878268106390e+134, 4.057840961953725969e+136, 5.393783049105737324e+138,
|
||||
7.741523901672235406e+140, 1.201209962310668456e+143, 2.017456079556807301e+145, 3.672176623483062526e+147,
|
||||
7.253163798058577630e+149, 1.556591535302570570e+152, 3.634399832790394885e+154, };
|
||||
|
||||
__constant__ double* m_weights_double[8] = {
|
||||
m_weights_double_1,
|
||||
m_weights_double_2,
|
||||
m_weights_double_3,
|
||||
m_weights_double_4,
|
||||
m_weights_double_5,
|
||||
m_weights_double_6,
|
||||
m_weights_double_7,
|
||||
m_weights_double_8
|
||||
};
|
||||
__constant__ boost::math::size_t float_coefficients_size[8] = {4, 4, 8, 16, 32, 65, 129, 259};
|
||||
|
||||
__constant__ boost::math::size_t double_coefficients_size[8] = {6, 6, 12, 24, 49, 98, 196, 391};
|
||||
|
||||
template<typename T>
|
||||
struct coefficients_selector;
|
||||
|
||||
template<>
|
||||
struct coefficients_selector<float>
|
||||
{
|
||||
__device__ static const auto abscissas() { return m_abscissas_float; }
|
||||
__device__ static const auto weights() { return m_weights_float; }
|
||||
__device__ static const auto size() { return float_coefficients_size; }
|
||||
};
|
||||
|
||||
template<>
|
||||
struct coefficients_selector<double>
|
||||
{
|
||||
__device__ static const auto abscissas() { return m_abscissas_double; }
|
||||
__device__ static const auto weights() { return m_weights_double; }
|
||||
__device__ static const auto size() { return double_coefficients_size; }
|
||||
};
|
||||
|
||||
template <class F, class Real, class Policy = boost::math::policies::policy<> >
|
||||
__device__ auto sinh_sinh_integrate_impl(const F& f, Real tolerance, Real* error, Real* L1, boost::math::size_t* levels)
|
||||
{
|
||||
BOOST_MATH_STD_USING
|
||||
using boost::math::constants::half;
|
||||
using boost::math::constants::half_pi;
|
||||
using boost::math::size_t;
|
||||
|
||||
constexpr auto function = "boost::math::quadrature::sinh_sinh<%1%>::integrate";
|
||||
|
||||
using K = decltype(f(static_cast<Real>(0)));
|
||||
static_assert(!boost::math::is_integral<K>::value,
|
||||
"The return type cannot be integral, it must be either a real or complex floating point type.");
|
||||
|
||||
K y_max = f(boost::math::tools::max_value<Real>());
|
||||
|
||||
if(abs(y_max) > boost::math::tools::epsilon<Real>())
|
||||
{
|
||||
return static_cast<K>(policies::raise_domain_error(function,
|
||||
"The function you are trying to integrate does not go to zero at infinity, and instead evaluates to %1%", y_max, Policy()));
|
||||
}
|
||||
|
||||
K y_min = f(-boost::math::tools::max_value<Real>());
|
||||
|
||||
if(abs(y_min) > boost::math::tools::epsilon<Real>())
|
||||
{
|
||||
return static_cast<K>(policies::raise_domain_error(function,
|
||||
"The function you are trying to integrate does not go to zero at -infinity, and instead evaluates to %1%", y_max, Policy()));
|
||||
}
|
||||
|
||||
// Get the party started with two estimates of the integral:
|
||||
const auto m_abscissas = coefficients_selector<Real>::abscissas();
|
||||
const auto m_weights = coefficients_selector<Real>::weights();
|
||||
const auto m_size = coefficients_selector<Real>::size();
|
||||
|
||||
K I0 = f(0)*half_pi<Real>();
|
||||
Real L1_I0 = abs(I0);
|
||||
for(size_t i = 0; i < m_size[0]; ++i)
|
||||
{
|
||||
Real x = m_abscissas[0][i];
|
||||
K yp = f(x);
|
||||
K ym = f(-x);
|
||||
I0 += (yp + ym)*m_weights[0][i];
|
||||
L1_I0 += (abs(yp)+abs(ym))*m_weights[0][i];
|
||||
}
|
||||
|
||||
K I1 = I0;
|
||||
Real L1_I1 = L1_I0;
|
||||
for (size_t i = 0; i < m_size[1]; ++i)
|
||||
{
|
||||
Real x= m_abscissas[1][i];
|
||||
K yp = f(x);
|
||||
K ym = f(-x);
|
||||
I1 += (yp + ym)*m_weights[1][i];
|
||||
L1_I1 += (abs(yp) + abs(ym))*m_weights[1][i];
|
||||
}
|
||||
|
||||
I1 *= half<Real>();
|
||||
L1_I1 *= half<Real>();
|
||||
Real err = abs(I0 - I1);
|
||||
|
||||
size_t i = 2;
|
||||
for(; i <= 8U; ++i)
|
||||
{
|
||||
I0 = I1;
|
||||
L1_I0 = L1_I1;
|
||||
|
||||
I1 = half<Real>()*I0;
|
||||
L1_I1 = half<Real>()*L1_I0;
|
||||
Real h = static_cast<Real>(1) / static_cast<Real>(1 << i);
|
||||
K sum = 0;
|
||||
Real absum = 0;
|
||||
|
||||
Real abterm1 = 1;
|
||||
Real eps = boost::math::tools::epsilon<Real>()*L1_I1;
|
||||
|
||||
auto abscissa_row = m_abscissas[i];
|
||||
auto weight_row = m_weights[i];
|
||||
|
||||
for(size_t j = 0; j < m_size[i]; ++j)
|
||||
{
|
||||
Real x = abscissa_row[j];
|
||||
K yp = f(x);
|
||||
K ym = f(-x);
|
||||
sum += (yp + ym)*weight_row[j];
|
||||
Real abterm0 = (abs(yp) + abs(ym))*weight_row[j];
|
||||
absum += abterm0;
|
||||
|
||||
// We require two consecutive terms to be < eps in case we hit a zero of f.
|
||||
if (x > static_cast<Real>(100) && abterm0 < eps && abterm1 < eps)
|
||||
{
|
||||
break;
|
||||
}
|
||||
abterm1 = abterm0;
|
||||
}
|
||||
|
||||
I1 += sum*h;
|
||||
L1_I1 += absum*h;
|
||||
err = abs(I0 - I1);
|
||||
|
||||
if (!(boost::math::isfinite)(L1_I1))
|
||||
{
|
||||
constexpr auto err_msg = "The sinh_sinh quadrature evaluated your function at a singular point, leading to the value %1%.\n"
|
||||
"sinh_sinh quadrature cannot handle singularities in the domain.\n"
|
||||
"If you are sure your function has no singularities, please submit a bug against boost.math\n";
|
||||
return static_cast<K>(policies::raise_evaluation_error(function, err_msg, I1, Policy()));
|
||||
}
|
||||
if (err <= tolerance*L1_I1)
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (error)
|
||||
{
|
||||
*error = err;
|
||||
}
|
||||
|
||||
if (L1)
|
||||
{
|
||||
*L1 = L1_I1;
|
||||
}
|
||||
|
||||
if (levels)
|
||||
{
|
||||
*levels = i;
|
||||
}
|
||||
|
||||
return I1;
|
||||
}
|
||||
|
||||
} // Namespace detail
|
||||
} // Namespace quadrature
|
||||
} // Namespace math
|
||||
} // Namespace boost
|
||||
|
||||
#endif // BOOST_MATH_ENABLE_CUDA
|
||||
|
||||
#endif // BOOST_MATH_QUADRATURE_DETAIL_SINH_SINH_DETAIL_HPP
|
||||
|
||||
@@ -15,11 +15,15 @@
|
||||
#ifndef BOOST_MATH_QUADRATURE_EXP_SINH_HPP
|
||||
#define BOOST_MATH_QUADRATURE_EXP_SINH_HPP
|
||||
|
||||
#include <boost/math/tools/config.hpp>
|
||||
#include <boost/math/quadrature/detail/exp_sinh_detail.hpp>
|
||||
|
||||
#ifndef BOOST_MATH_HAS_NVRTC
|
||||
|
||||
#include <cmath>
|
||||
#include <limits>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <boost/math/quadrature/detail/exp_sinh_detail.hpp>
|
||||
|
||||
namespace boost{ namespace math{ namespace quadrature {
|
||||
|
||||
@@ -98,4 +102,79 @@ auto exp_sinh<Real, Policy>::integrate(const F& f, Real tolerance, Real* error,
|
||||
|
||||
|
||||
}}}
|
||||
#endif
|
||||
|
||||
#endif // BOOST_MATH_HAS_NVRTC
|
||||
|
||||
#ifdef BOOST_MATH_ENABLE_CUDA
|
||||
|
||||
#include <boost/math/tools/type_traits.hpp>
|
||||
#include <boost/math/tools/cstdint.hpp>
|
||||
#include <boost/math/tools/precision.hpp>
|
||||
#include <boost/math/policies/error_handling.hpp>
|
||||
#include <boost/math/constants/constants.hpp>
|
||||
|
||||
namespace boost {
|
||||
namespace math {
|
||||
namespace quadrature {
|
||||
|
||||
template <class F, class Real, class Policy = policies::policy<> >
|
||||
__device__ auto exp_sinh_integrate(const F& f, Real a, Real b, Real tolerance, Real* error, Real* L1, boost::math::size_t* levels)
|
||||
{
|
||||
BOOST_MATH_STD_USING
|
||||
|
||||
using K = decltype(f(a));
|
||||
static_assert(!boost::math::is_integral<K>::value,
|
||||
"The return type cannot be integral, it must be either a real or complex floating point type.");
|
||||
|
||||
constexpr auto function = "boost::math::quadrature::exp_sinh<%1%>::integrate";
|
||||
|
||||
// Neither limit may be a NaN:
|
||||
if((boost::math::isnan)(a) || (boost::math::isnan)(b))
|
||||
{
|
||||
return static_cast<K>(policies::raise_domain_error(function, "NaN supplied as one limit of integration - sorry I don't know what to do", a, Policy()));
|
||||
}
|
||||
// Right limit is infinite:
|
||||
if ((boost::math::isfinite)(a) && (b >= boost::math::tools::max_value<Real>()))
|
||||
{
|
||||
// If a = 0, don't use an additional level of indirection:
|
||||
if (a == static_cast<Real>(0))
|
||||
{
|
||||
return detail::exp_sinh_integrate_impl(f, tolerance, error, L1, levels);
|
||||
}
|
||||
const auto u = [&](Real t)->K { return f(t + a); };
|
||||
return detail::exp_sinh_integrate_impl(u, tolerance, error, L1, levels);
|
||||
}
|
||||
|
||||
if ((boost::math::isfinite)(b) && a <= -boost::math::tools::max_value<Real>())
|
||||
{
|
||||
const auto u = [&](Real t)->K { return f(b-t);};
|
||||
return detail::exp_sinh_integrate_impl(u, tolerance, error, L1, levels);
|
||||
}
|
||||
|
||||
// Infinite limits:
|
||||
if ((a <= -boost::math::tools::max_value<Real>()) && (b >= boost::math::tools::max_value<Real>()))
|
||||
{
|
||||
return static_cast<K>(policies::raise_domain_error(function, "Use sinh_sinh quadrature for integration over the whole real line; exp_sinh is for half infinite integrals.", a, Policy()));
|
||||
}
|
||||
// If we get to here then both ends must necessarily be finite:
|
||||
return static_cast<K>(policies::raise_domain_error(function, "Use tanh_sinh quadrature for integration over finite domains; exp_sinh is for half infinite integrals.", a, Policy()));
|
||||
}
|
||||
|
||||
template <class F, class Real, class Policy = policies::policy<> >
|
||||
__device__ auto exp_sinh_integrate(const F& f, Real tolerance, Real* error, Real* L1, boost::math::size_t* levels)
|
||||
{
|
||||
BOOST_MATH_STD_USING
|
||||
constexpr auto function = "boost::math::quadrature::exp_sinh<%1%>::integrate";
|
||||
if (abs(tolerance) > 1) {
|
||||
return policies::raise_domain_error(function, "The tolerance provided (%1%) is unusually large; did you confuse it with a domain bound?", tolerance, Policy());
|
||||
}
|
||||
return detail::exp_sinh_integrate_impl(f, tolerance, error, L1, levels);
|
||||
}
|
||||
|
||||
} // namespace quadrature
|
||||
} // namespace math
|
||||
} // namespace boost
|
||||
|
||||
#endif // BOOST_MATH_ENABLE_CUDA
|
||||
|
||||
#endif // BOOST_MATH_QUADRATURE_EXP_SINH_HPP
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
// Copyright Nick Thompson, 2017
|
||||
// Copyright Matt Borland, 2024
|
||||
// Use, modification and distribution are subject to the
|
||||
// Boost Software License, Version 1.0.
|
||||
// (See accompanying file LICENSE_1_0.txt
|
||||
@@ -15,10 +16,17 @@
|
||||
#ifndef BOOST_MATH_QUADRATURE_SINH_SINH_HPP
|
||||
#define BOOST_MATH_QUADRATURE_SINH_SINH_HPP
|
||||
|
||||
#include <boost/math/tools/config.hpp>
|
||||
#include <boost/math/tools/precision.hpp>
|
||||
#include <boost/math/tools/cstdint.hpp>
|
||||
#include <boost/math/quadrature/detail/sinh_sinh_detail.hpp>
|
||||
#include <boost/math/policies/error_handling.hpp>
|
||||
|
||||
#ifndef BOOST_MATH_HAS_NVRTC
|
||||
|
||||
#include <cmath>
|
||||
#include <limits>
|
||||
#include <memory>
|
||||
#include <boost/math/quadrature/detail/sinh_sinh_detail.hpp>
|
||||
|
||||
namespace boost{ namespace math{ namespace quadrature {
|
||||
|
||||
@@ -40,4 +48,25 @@ private:
|
||||
};
|
||||
|
||||
}}}
|
||||
#endif
|
||||
|
||||
#endif // BOOST_MATH_HAS_NVRTC
|
||||
|
||||
#ifdef BOOST_MATH_ENABLE_CUDA
|
||||
|
||||
namespace boost {
|
||||
namespace math {
|
||||
namespace quadrature {
|
||||
|
||||
template <class F, class Real, class Policy = boost::math::policies::policy<> >
|
||||
__device__ auto sinh_sinh_integrate(const F& f, Real tol = boost::math::tools::root_epsilon<Real>(), Real* error = nullptr, Real* L1 = nullptr, boost::math::size_t* levels = nullptr)
|
||||
{
|
||||
return detail::sinh_sinh_integrate_impl(f, tol, error, L1, levels);
|
||||
}
|
||||
|
||||
} // namespace quadrature
|
||||
} // namespace math
|
||||
} // namespace boost
|
||||
|
||||
#endif // BOOST_MATH_ENABLE_CUDA
|
||||
|
||||
#endif // BOOST_MATH_QUADRATURE_SINH_SINH_HPP
|
||||
|
||||
@@ -9,6 +9,12 @@ project : requirements
|
||||
[ requires cxx14_decltype_auto cxx14_generic_lambdas cxx14_return_type_deduction cxx14_variable_templates cxx14_constexpr ]
|
||||
;
|
||||
|
||||
# Quad
|
||||
run test_exp_sinh_quad_float.cu ;
|
||||
run test_exp_sinh_quad_double.cu ;
|
||||
run test_sinh_sinh_quad_float.cu ;
|
||||
run test_sinh_sinh_quad_double.cu ;
|
||||
|
||||
# Distributions
|
||||
run test_arcsine_cdf_double.cu ;
|
||||
run test_arcsine_cdf_float.cu ;
|
||||
|
||||
@@ -9,6 +9,12 @@ project : requirements
|
||||
[ requires cxx14_decltype_auto cxx14_generic_lambdas cxx14_return_type_deduction cxx14_variable_templates cxx14_constexpr ]
|
||||
;
|
||||
|
||||
# Quad
|
||||
run test_exp_sinh_quad_nvrtc_float.cpp ;
|
||||
run test_exp_sinh_quad_nvrtc_double.cpp ;
|
||||
run test_sinh_sinh_quad_nvrtc_float.cpp ;
|
||||
run test_sinh_sinh_quad_nvrtc_double.cpp ;
|
||||
|
||||
# Distributions
|
||||
run test_arcsine_cdf_nvrtc_double.cpp ;
|
||||
run test_arcsine_cdf_nvrtc_float.cpp ;
|
||||
|
||||
133
test/test_exp_sinh_quad_double.cu
Normal file
133
test/test_exp_sinh_quad_double.cu
Normal file
@@ -0,0 +1,133 @@
|
||||
|
||||
// Copyright John Maddock 2016.
|
||||
// Copyright Matt Borland 2024.
|
||||
// Use, modification and distribution are subject to the
|
||||
// Boost Software License, Version 1.0. (See accompanying file
|
||||
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <vector>
|
||||
#include <boost/math/quadrature/exp_sinh.hpp>
|
||||
#include <boost/math/special_functions.hpp>
|
||||
#include <boost/math/tools/precision.hpp>
|
||||
#include "cuda_managed_ptr.hpp"
|
||||
#include "stopwatch.hpp"
|
||||
|
||||
// For the CUDA runtime routines (prefixed with "cuda_")
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
typedef double float_type;
|
||||
|
||||
__host__ __device__ float_type func(float_type x)
|
||||
{
|
||||
BOOST_MATH_STD_USING
|
||||
return 1/(1+x*x);
|
||||
}
|
||||
|
||||
/**
|
||||
* CUDA Kernel Device code
|
||||
*
|
||||
*/
|
||||
__global__ void cuda_test(float_type *out, int numElements)
|
||||
{
|
||||
int i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
float_type tol = boost::math::tools::root_epsilon<float_type>();
|
||||
float_type error;
|
||||
float_type L1;
|
||||
boost::math::size_t levels;
|
||||
|
||||
if (i < numElements)
|
||||
{
|
||||
out[i] = boost::math::quadrature::exp_sinh_integrate(func, tol, &error, &L1, &levels);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Host main routine
|
||||
*/
|
||||
int main(void)
|
||||
{
|
||||
// Error code to check return values for CUDA calls
|
||||
cudaError_t err = cudaSuccess;
|
||||
|
||||
// Print the vector length to be used, and compute its size
|
||||
int numElements = 50000;
|
||||
std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
|
||||
|
||||
// Allocate the managed input vector A
|
||||
cuda_managed_ptr<float_type> input_vector(numElements);
|
||||
|
||||
// Allocate the managed output vector C
|
||||
cuda_managed_ptr<float_type> output_vector(numElements);
|
||||
|
||||
// Initialize the input vectors
|
||||
for (int i = 0; i < numElements; ++i)
|
||||
{
|
||||
input_vector[i] = M_PI * (static_cast<float_type>(i) / numElements);
|
||||
}
|
||||
|
||||
// Launch the Vector Add CUDA Kernel
|
||||
int threadsPerBlock = 512;
|
||||
int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock;
|
||||
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
|
||||
|
||||
watch w;
|
||||
|
||||
cuda_test<<<blocksPerGrid, threadsPerBlock>>>(output_vector.get(), numElements);
|
||||
cudaDeviceSynchronize();
|
||||
|
||||
std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
|
||||
|
||||
err = cudaGetLastError();
|
||||
|
||||
if (err != cudaSuccess)
|
||||
{
|
||||
std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
// Verify that the result vector is correct
|
||||
std::vector<float_type> results;
|
||||
results.reserve(numElements);
|
||||
w.reset();
|
||||
float_type tol = boost::math::tools::root_epsilon<float_type>();
|
||||
float_type error;
|
||||
float_type L1;
|
||||
boost::math::quadrature::exp_sinh<float_type> integrator;
|
||||
for(int i = 0; i < numElements; ++i)
|
||||
{
|
||||
results.push_back(integrator.integrate(func, tol, &error, &L1));
|
||||
}
|
||||
double t = w.elapsed();
|
||||
// check the results
|
||||
int failed_count = 0;
|
||||
for(int i = 0; i < numElements; ++i)
|
||||
{
|
||||
const auto eps = boost::math::epsilon_difference(output_vector[i], results[i]);
|
||||
if (eps > 10)
|
||||
{
|
||||
std::cerr << std::setprecision(std::numeric_limits<float_type>::digits10)
|
||||
<< "Result verification failed at element " << i << "!\n"
|
||||
<< "Device: " << output_vector[i]
|
||||
<< "\n Host: " << results[i]
|
||||
<< "\n Eps: " << eps << "\n";
|
||||
failed_count++;
|
||||
}
|
||||
if (failed_count > 100)
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (failed_count != 0)
|
||||
{
|
||||
std::cout << "Test FAILED" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
|
||||
std::cout << "Done\n";
|
||||
|
||||
return 0;
|
||||
}
|
||||
133
test/test_exp_sinh_quad_float.cu
Normal file
133
test/test_exp_sinh_quad_float.cu
Normal file
@@ -0,0 +1,133 @@
|
||||
|
||||
// Copyright John Maddock 2016.
|
||||
// Copyright Matt Borland 2024.
|
||||
// Use, modification and distribution are subject to the
|
||||
// Boost Software License, Version 1.0. (See accompanying file
|
||||
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <vector>
|
||||
#include <boost/math/quadrature/exp_sinh.hpp>
|
||||
#include <boost/math/special_functions.hpp>
|
||||
#include <boost/math/tools/precision.hpp>
|
||||
#include "cuda_managed_ptr.hpp"
|
||||
#include "stopwatch.hpp"
|
||||
|
||||
// For the CUDA runtime routines (prefixed with "cuda_")
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
typedef float float_type;
|
||||
|
||||
__host__ __device__ float_type func(float_type x)
|
||||
{
|
||||
BOOST_MATH_STD_USING
|
||||
return 1/(1+x*x);
|
||||
}
|
||||
|
||||
/**
|
||||
* CUDA Kernel Device code
|
||||
*
|
||||
*/
|
||||
__global__ void cuda_test(float_type *out, int numElements)
|
||||
{
|
||||
int i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
float_type tol = boost::math::tools::root_epsilon<float_type>();
|
||||
float_type error;
|
||||
float_type L1;
|
||||
boost::math::size_t levels;
|
||||
|
||||
if (i < numElements)
|
||||
{
|
||||
out[i] = boost::math::quadrature::exp_sinh_integrate(func, tol, &error, &L1, &levels);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Host main routine
|
||||
*/
|
||||
int main(void)
|
||||
{
|
||||
// Error code to check return values for CUDA calls
|
||||
cudaError_t err = cudaSuccess;
|
||||
|
||||
// Print the vector length to be used, and compute its size
|
||||
int numElements = 50000;
|
||||
std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
|
||||
|
||||
// Allocate the managed input vector A
|
||||
cuda_managed_ptr<float_type> input_vector(numElements);
|
||||
|
||||
// Allocate the managed output vector C
|
||||
cuda_managed_ptr<float_type> output_vector(numElements);
|
||||
|
||||
// Initialize the input vectors
|
||||
for (int i = 0; i < numElements; ++i)
|
||||
{
|
||||
input_vector[i] = M_PI * (static_cast<float_type>(i) / numElements);
|
||||
}
|
||||
|
||||
// Launch the Vector Add CUDA Kernel
|
||||
int threadsPerBlock = 512;
|
||||
int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock;
|
||||
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
|
||||
|
||||
watch w;
|
||||
|
||||
cuda_test<<<blocksPerGrid, threadsPerBlock>>>(output_vector.get(), numElements);
|
||||
cudaDeviceSynchronize();
|
||||
|
||||
std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
|
||||
|
||||
err = cudaGetLastError();
|
||||
|
||||
if (err != cudaSuccess)
|
||||
{
|
||||
std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
// Verify that the result vector is correct
|
||||
std::vector<float_type> results;
|
||||
results.reserve(numElements);
|
||||
w.reset();
|
||||
float_type tol = boost::math::tools::root_epsilon<float_type>();
|
||||
float_type error;
|
||||
float_type L1;
|
||||
boost::math::quadrature::exp_sinh<float_type> integrator;
|
||||
for(int i = 0; i < numElements; ++i)
|
||||
{
|
||||
results.push_back(integrator.integrate(func, tol, &error, &L1));
|
||||
}
|
||||
double t = w.elapsed();
|
||||
// check the results
|
||||
int failed_count = 0;
|
||||
for(int i = 0; i < numElements; ++i)
|
||||
{
|
||||
const auto eps = boost::math::epsilon_difference(output_vector[i], results[i]);
|
||||
if (eps > 10)
|
||||
{
|
||||
std::cerr << std::setprecision(std::numeric_limits<float_type>::digits10)
|
||||
<< "Result verification failed at element " << i << "!\n"
|
||||
<< "Device: " << output_vector[i]
|
||||
<< "\n Host: " << results[i]
|
||||
<< "\n Eps: " << eps << "\n";
|
||||
failed_count++;
|
||||
}
|
||||
if (failed_count > 100)
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (failed_count != 0)
|
||||
{
|
||||
std::cout << "Test FAILED" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
|
||||
std::cout << "Done\n";
|
||||
|
||||
return 0;
|
||||
}
|
||||
206
test/test_exp_sinh_quad_nvrtc_double.cpp
Normal file
206
test/test_exp_sinh_quad_nvrtc_double.cpp
Normal file
@@ -0,0 +1,206 @@
|
||||
// Copyright John Maddock 2016.
|
||||
// Copyright Matt Borland 2024.
|
||||
// Use, modification and distribution are subject to the
|
||||
// Boost Software License, Version 1.0. (See accompanying file
|
||||
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
|
||||
#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
|
||||
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <vector>
|
||||
#include <random>
|
||||
#include <exception>
|
||||
#include <boost/math/quadrature/exp_sinh.hpp>
|
||||
#include <boost/math/special_functions/relative_difference.hpp>
|
||||
#include <cuda.h>
|
||||
#include <cuda_runtime.h>
|
||||
#include <nvrtc.h>
|
||||
|
||||
typedef double float_type;
|
||||
|
||||
const char* cuda_kernel = R"(
|
||||
typedef double float_type;
|
||||
#include <boost/math/quadrature/exp_sinh.hpp>
|
||||
|
||||
__host__ __device__ float_type func(float_type x)
|
||||
{
|
||||
return 1/(1+x*x);
|
||||
}
|
||||
|
||||
extern "C" __global__
|
||||
void test_expm1_kernel(const float_type*, const float_type*, float_type *out, int numElements)
|
||||
{
|
||||
int i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
float_type tol = boost::math::tools::root_epsilon<float_type>();
|
||||
float_type error;
|
||||
float_type L1;
|
||||
boost::math::size_t levels;
|
||||
|
||||
if (i < numElements)
|
||||
{
|
||||
out[i] = boost::math::quadrature::exp_sinh_integrate(func, tol, &error, &L1, &levels);
|
||||
}
|
||||
}
|
||||
)";
|
||||
|
||||
__host__ __device__ float_type func(float_type x)
|
||||
{
|
||||
return 1/(1+x*x);
|
||||
}
|
||||
|
||||
void checkCUDAError(cudaError_t result, const char* msg)
|
||||
{
|
||||
if (result != cudaSuccess)
|
||||
{
|
||||
std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
void checkCUError(CUresult result, const char* msg)
|
||||
{
|
||||
if (result != CUDA_SUCCESS)
|
||||
{
|
||||
const char* errorStr;
|
||||
cuGetErrorString(result, &errorStr);
|
||||
std::cerr << msg << ": " << errorStr << std::endl;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
void checkNVRTCError(nvrtcResult result, const char* msg)
|
||||
{
|
||||
if (result != NVRTC_SUCCESS)
|
||||
{
|
||||
std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
try
|
||||
{
|
||||
// Initialize CUDA driver API
|
||||
checkCUError(cuInit(0), "Failed to initialize CUDA");
|
||||
|
||||
// Create CUDA context
|
||||
CUcontext context;
|
||||
CUdevice device;
|
||||
checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
|
||||
checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
|
||||
|
||||
nvrtcProgram prog;
|
||||
nvrtcResult res;
|
||||
|
||||
res = nvrtcCreateProgram(&prog, cuda_kernel, "test_expm1_kernel.cu", 0, nullptr, nullptr);
|
||||
checkNVRTCError(res, "Failed to create NVRTC program");
|
||||
|
||||
nvrtcAddNameExpression(prog, "test_expm1_kernel");
|
||||
|
||||
#ifdef BOOST_MATH_NVRTC_CI_RUN
|
||||
const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
|
||||
#else
|
||||
const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
|
||||
#endif
|
||||
|
||||
// Compile the program
|
||||
res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
|
||||
if (res != NVRTC_SUCCESS)
|
||||
{
|
||||
size_t log_size;
|
||||
nvrtcGetProgramLogSize(prog, &log_size);
|
||||
char* log = new char[log_size];
|
||||
nvrtcGetProgramLog(prog, log);
|
||||
std::cerr << "Compilation failed:\n" << log << std::endl;
|
||||
delete[] log;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
// Get PTX from the program
|
||||
size_t ptx_size;
|
||||
nvrtcGetPTXSize(prog, &ptx_size);
|
||||
char* ptx = new char[ptx_size];
|
||||
nvrtcGetPTX(prog, ptx);
|
||||
|
||||
// Load PTX into CUDA module
|
||||
CUmodule module;
|
||||
CUfunction kernel;
|
||||
checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
|
||||
checkCUError(cuModuleGetFunction(&kernel, module, "test_expm1_kernel"), "Failed to get kernel function");
|
||||
|
||||
int numElements = 50000;
|
||||
float_type *h_in1, *h_in2, *h_out;
|
||||
float_type *d_in1, *d_in2, *d_out;
|
||||
|
||||
// Allocate memory on the host
|
||||
h_in1 = new float_type[numElements];
|
||||
h_in2 = new float_type[numElements];
|
||||
h_out = new float_type[numElements];
|
||||
|
||||
// Initialize input arrays
|
||||
std::mt19937_64 rng(42);
|
||||
std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
|
||||
for (int i = 0; i < numElements; ++i)
|
||||
{
|
||||
h_in1[i] = static_cast<float_type>(dist(rng));
|
||||
h_in2[i] = static_cast<float_type>(dist(rng));
|
||||
}
|
||||
|
||||
checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
|
||||
checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
|
||||
checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
|
||||
|
||||
checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
|
||||
checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
|
||||
|
||||
int blockSize = 256;
|
||||
int numBlocks = (numElements + blockSize - 1) / blockSize;
|
||||
void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
|
||||
checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
|
||||
|
||||
checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
|
||||
|
||||
// Verify Result
|
||||
float_type tol = boost::math::tools::root_epsilon<float_type>();
|
||||
float_type error;
|
||||
float_type L1;
|
||||
boost::math::quadrature::exp_sinh<float_type> integrator;
|
||||
for (int i = 0; i < numElements; ++i)
|
||||
{
|
||||
auto res = integrator.integrate(func, tol, &error, &L1);
|
||||
if (std::isfinite(res))
|
||||
{
|
||||
if (boost::math::epsilon_difference(res, h_out[i]) > 300)
|
||||
{
|
||||
std::cout << "error at line: " << i
|
||||
<< "\nParallel: " << h_out[i]
|
||||
<< "\n Serial: " << res
|
||||
<< "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cudaFree(d_in1);
|
||||
cudaFree(d_in2);
|
||||
cudaFree(d_out);
|
||||
delete[] h_in1;
|
||||
delete[] h_in2;
|
||||
delete[] h_out;
|
||||
|
||||
nvrtcDestroyProgram(&prog);
|
||||
delete[] ptx;
|
||||
|
||||
cuCtxDestroy(context);
|
||||
|
||||
std::cout << "Kernel executed successfully." << std::endl;
|
||||
return 0;
|
||||
}
|
||||
catch(const std::exception& e)
|
||||
{
|
||||
std::cerr << "Stopped with exception: " << e.what() << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
}
|
||||
206
test/test_exp_sinh_quad_nvrtc_float.cpp
Normal file
206
test/test_exp_sinh_quad_nvrtc_float.cpp
Normal file
@@ -0,0 +1,206 @@
|
||||
// Copyright John Maddock 2016.
|
||||
// Copyright Matt Borland 2024.
|
||||
// Use, modification and distribution are subject to the
|
||||
// Boost Software License, Version 1.0. (See accompanying file
|
||||
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
|
||||
#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
|
||||
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <vector>
|
||||
#include <random>
|
||||
#include <exception>
|
||||
#include <boost/math/quadrature/exp_sinh.hpp>
|
||||
#include <boost/math/special_functions/relative_difference.hpp>
|
||||
#include <cuda.h>
|
||||
#include <cuda_runtime.h>
|
||||
#include <nvrtc.h>
|
||||
|
||||
typedef float float_type;
|
||||
|
||||
const char* cuda_kernel = R"(
|
||||
typedef float float_type;
|
||||
#include <boost/math/quadrature/exp_sinh.hpp>
|
||||
|
||||
__host__ __device__ float_type func(float_type x)
|
||||
{
|
||||
return 1/(1+x*x);
|
||||
}
|
||||
|
||||
extern "C" __global__
|
||||
void test_expm1_kernel(const float_type*, const float_type*, float_type *out, int numElements)
|
||||
{
|
||||
int i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
float_type tol = boost::math::tools::root_epsilon<float_type>();
|
||||
float_type error;
|
||||
float_type L1;
|
||||
boost::math::size_t levels;
|
||||
|
||||
if (i < numElements)
|
||||
{
|
||||
out[i] = boost::math::quadrature::exp_sinh_integrate(func, tol, &error, &L1, &levels);
|
||||
}
|
||||
}
|
||||
)";
|
||||
|
||||
__host__ __device__ float_type func(float_type x)
|
||||
{
|
||||
return 1/(1+x*x);
|
||||
}
|
||||
|
||||
void checkCUDAError(cudaError_t result, const char* msg)
|
||||
{
|
||||
if (result != cudaSuccess)
|
||||
{
|
||||
std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
void checkCUError(CUresult result, const char* msg)
|
||||
{
|
||||
if (result != CUDA_SUCCESS)
|
||||
{
|
||||
const char* errorStr;
|
||||
cuGetErrorString(result, &errorStr);
|
||||
std::cerr << msg << ": " << errorStr << std::endl;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
void checkNVRTCError(nvrtcResult result, const char* msg)
|
||||
{
|
||||
if (result != NVRTC_SUCCESS)
|
||||
{
|
||||
std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
try
|
||||
{
|
||||
// Initialize CUDA driver API
|
||||
checkCUError(cuInit(0), "Failed to initialize CUDA");
|
||||
|
||||
// Create CUDA context
|
||||
CUcontext context;
|
||||
CUdevice device;
|
||||
checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
|
||||
checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
|
||||
|
||||
nvrtcProgram prog;
|
||||
nvrtcResult res;
|
||||
|
||||
res = nvrtcCreateProgram(&prog, cuda_kernel, "test_expm1_kernel.cu", 0, nullptr, nullptr);
|
||||
checkNVRTCError(res, "Failed to create NVRTC program");
|
||||
|
||||
nvrtcAddNameExpression(prog, "test_expm1_kernel");
|
||||
|
||||
#ifdef BOOST_MATH_NVRTC_CI_RUN
|
||||
const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
|
||||
#else
|
||||
const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
|
||||
#endif
|
||||
|
||||
// Compile the program
|
||||
res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
|
||||
if (res != NVRTC_SUCCESS)
|
||||
{
|
||||
size_t log_size;
|
||||
nvrtcGetProgramLogSize(prog, &log_size);
|
||||
char* log = new char[log_size];
|
||||
nvrtcGetProgramLog(prog, log);
|
||||
std::cerr << "Compilation failed:\n" << log << std::endl;
|
||||
delete[] log;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
// Get PTX from the program
|
||||
size_t ptx_size;
|
||||
nvrtcGetPTXSize(prog, &ptx_size);
|
||||
char* ptx = new char[ptx_size];
|
||||
nvrtcGetPTX(prog, ptx);
|
||||
|
||||
// Load PTX into CUDA module
|
||||
CUmodule module;
|
||||
CUfunction kernel;
|
||||
checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
|
||||
checkCUError(cuModuleGetFunction(&kernel, module, "test_expm1_kernel"), "Failed to get kernel function");
|
||||
|
||||
int numElements = 50000;
|
||||
float_type *h_in1, *h_in2, *h_out;
|
||||
float_type *d_in1, *d_in2, *d_out;
|
||||
|
||||
// Allocate memory on the host
|
||||
h_in1 = new float_type[numElements];
|
||||
h_in2 = new float_type[numElements];
|
||||
h_out = new float_type[numElements];
|
||||
|
||||
// Initialize input arrays
|
||||
std::mt19937_64 rng(42);
|
||||
std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
|
||||
for (int i = 0; i < numElements; ++i)
|
||||
{
|
||||
h_in1[i] = static_cast<float_type>(dist(rng));
|
||||
h_in2[i] = static_cast<float_type>(dist(rng));
|
||||
}
|
||||
|
||||
checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
|
||||
checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
|
||||
checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
|
||||
|
||||
checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
|
||||
checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
|
||||
|
||||
int blockSize = 256;
|
||||
int numBlocks = (numElements + blockSize - 1) / blockSize;
|
||||
void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
|
||||
checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
|
||||
|
||||
checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
|
||||
|
||||
// Verify Result
|
||||
float_type tol = boost::math::tools::root_epsilon<float_type>();
|
||||
float_type error;
|
||||
float_type L1;
|
||||
boost::math::quadrature::exp_sinh<float_type> integrator;
|
||||
for (int i = 0; i < numElements; ++i)
|
||||
{
|
||||
auto res = integrator.integrate(func, tol, &error, &L1);
|
||||
if (std::isfinite(res))
|
||||
{
|
||||
if (boost::math::epsilon_difference(res, h_out[i]) > 300)
|
||||
{
|
||||
std::cout << "error at line: " << i
|
||||
<< "\nParallel: " << h_out[i]
|
||||
<< "\n Serial: " << res
|
||||
<< "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cudaFree(d_in1);
|
||||
cudaFree(d_in2);
|
||||
cudaFree(d_out);
|
||||
delete[] h_in1;
|
||||
delete[] h_in2;
|
||||
delete[] h_out;
|
||||
|
||||
nvrtcDestroyProgram(&prog);
|
||||
delete[] ptx;
|
||||
|
||||
cuCtxDestroy(context);
|
||||
|
||||
std::cout << "Kernel executed successfully." << std::endl;
|
||||
return 0;
|
||||
}
|
||||
catch(const std::exception& e)
|
||||
{
|
||||
std::cerr << "Stopped with exception: " << e.what() << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
}
|
||||
133
test/test_sinh_sinh_quad_double.cu
Normal file
133
test/test_sinh_sinh_quad_double.cu
Normal file
@@ -0,0 +1,133 @@
|
||||
|
||||
// Copyright John Maddock 2016.
|
||||
// Copyright Matt Borland 2024.
|
||||
// Use, modification and distribution are subject to the
|
||||
// Boost Software License, Version 1.0. (See accompanying file
|
||||
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <vector>
|
||||
#include <boost/math/quadrature/sinh_sinh.hpp>
|
||||
#include <boost/math/special_functions.hpp>
|
||||
#include <boost/math/tools/precision.hpp>
|
||||
#include "cuda_managed_ptr.hpp"
|
||||
#include "stopwatch.hpp"
|
||||
|
||||
// For the CUDA runtime routines (prefixed with "cuda_")
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
typedef double float_type;
|
||||
|
||||
__host__ __device__ float_type func(float_type x)
|
||||
{
|
||||
BOOST_MATH_STD_USING
|
||||
return 1/(1+x*x);
|
||||
}
|
||||
|
||||
/**
|
||||
* CUDA Kernel Device code
|
||||
*
|
||||
*/
|
||||
__global__ void cuda_test(float_type *out, int numElements)
|
||||
{
|
||||
int i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
float_type tol = boost::math::tools::root_epsilon<float_type>();
|
||||
float_type error;
|
||||
float_type L1;
|
||||
boost::math::size_t levels;
|
||||
|
||||
if (i < numElements)
|
||||
{
|
||||
out[i] = boost::math::quadrature::sinh_sinh_integrate(func, tol, &error, &L1, &levels);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Host main routine
|
||||
*/
|
||||
int main(void)
|
||||
{
|
||||
// Error code to check return values for CUDA calls
|
||||
cudaError_t err = cudaSuccess;
|
||||
|
||||
// Print the vector length to be used, and compute its size
|
||||
int numElements = 50000;
|
||||
std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
|
||||
|
||||
// Allocate the managed input vector A
|
||||
cuda_managed_ptr<float_type> input_vector(numElements);
|
||||
|
||||
// Allocate the managed output vector C
|
||||
cuda_managed_ptr<float_type> output_vector(numElements);
|
||||
|
||||
// Initialize the input vectors
|
||||
for (int i = 0; i < numElements; ++i)
|
||||
{
|
||||
input_vector[i] = M_PI * (static_cast<float_type>(i) / numElements);
|
||||
}
|
||||
|
||||
// Launch the Vector Add CUDA Kernel
|
||||
int threadsPerBlock = 512;
|
||||
int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock;
|
||||
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
|
||||
|
||||
watch w;
|
||||
|
||||
cuda_test<<<blocksPerGrid, threadsPerBlock>>>(output_vector.get(), numElements);
|
||||
cudaDeviceSynchronize();
|
||||
|
||||
std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
|
||||
|
||||
err = cudaGetLastError();
|
||||
|
||||
if (err != cudaSuccess)
|
||||
{
|
||||
std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
// Verify that the result vector is correct
|
||||
std::vector<float_type> results;
|
||||
results.reserve(numElements);
|
||||
w.reset();
|
||||
float_type tol = boost::math::tools::root_epsilon<float_type>();
|
||||
float_type error;
|
||||
float_type L1;
|
||||
boost::math::quadrature::sinh_sinh<float_type> integrator;
|
||||
for(int i = 0; i < numElements; ++i)
|
||||
{
|
||||
results.push_back(integrator.integrate(func, tol, &error, &L1));
|
||||
}
|
||||
double t = w.elapsed();
|
||||
// check the results
|
||||
int failed_count = 0;
|
||||
for(int i = 0; i < numElements; ++i)
|
||||
{
|
||||
const auto eps = boost::math::epsilon_difference(output_vector[i], results[i]);
|
||||
if (eps > 10)
|
||||
{
|
||||
std::cerr << std::setprecision(std::numeric_limits<float_type>::digits10)
|
||||
<< "Result verification failed at element " << i << "!\n"
|
||||
<< "Device: " << output_vector[i]
|
||||
<< "\n Host: " << results[i]
|
||||
<< "\n Eps: " << eps << "\n";
|
||||
failed_count++;
|
||||
}
|
||||
if (failed_count > 100)
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (failed_count != 0)
|
||||
{
|
||||
std::cout << "Test FAILED" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
|
||||
std::cout << "Done\n";
|
||||
|
||||
return 0;
|
||||
}
|
||||
133
test/test_sinh_sinh_quad_float.cu
Normal file
133
test/test_sinh_sinh_quad_float.cu
Normal file
@@ -0,0 +1,133 @@
|
||||
|
||||
// Copyright John Maddock 2016.
|
||||
// Copyright Matt Borland 2024.
|
||||
// Use, modification and distribution are subject to the
|
||||
// Boost Software License, Version 1.0. (See accompanying file
|
||||
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <vector>
|
||||
#include <boost/math/quadrature/sinh_sinh.hpp>
|
||||
#include <boost/math/special_functions.hpp>
|
||||
#include <boost/math/tools/precision.hpp>
|
||||
#include "cuda_managed_ptr.hpp"
|
||||
#include "stopwatch.hpp"
|
||||
|
||||
// For the CUDA runtime routines (prefixed with "cuda_")
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
typedef float float_type;
|
||||
|
||||
__host__ __device__ float_type func(float_type x)
|
||||
{
|
||||
BOOST_MATH_STD_USING
|
||||
return 1/(1+x*x);
|
||||
}
|
||||
|
||||
/**
|
||||
* CUDA Kernel Device code
|
||||
*
|
||||
*/
|
||||
__global__ void cuda_test(float_type *out, int numElements)
|
||||
{
|
||||
int i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
float_type tol = boost::math::tools::root_epsilon<float_type>();
|
||||
float_type error;
|
||||
float_type L1;
|
||||
boost::math::size_t levels;
|
||||
|
||||
if (i < numElements)
|
||||
{
|
||||
out[i] = boost::math::quadrature::sinh_sinh_integrate(func, tol, &error, &L1, &levels);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Host main routine
|
||||
*/
|
||||
int main(void)
|
||||
{
|
||||
// Error code to check return values for CUDA calls
|
||||
cudaError_t err = cudaSuccess;
|
||||
|
||||
// Print the vector length to be used, and compute its size
|
||||
int numElements = 50000;
|
||||
std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
|
||||
|
||||
// Allocate the managed input vector A
|
||||
cuda_managed_ptr<float_type> input_vector(numElements);
|
||||
|
||||
// Allocate the managed output vector C
|
||||
cuda_managed_ptr<float_type> output_vector(numElements);
|
||||
|
||||
// Initialize the input vectors
|
||||
for (int i = 0; i < numElements; ++i)
|
||||
{
|
||||
input_vector[i] = M_PI * (static_cast<float_type>(i) / numElements);
|
||||
}
|
||||
|
||||
// Launch the Vector Add CUDA Kernel
|
||||
int threadsPerBlock = 512;
|
||||
int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock;
|
||||
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
|
||||
|
||||
watch w;
|
||||
|
||||
cuda_test<<<blocksPerGrid, threadsPerBlock>>>(output_vector.get(), numElements);
|
||||
cudaDeviceSynchronize();
|
||||
|
||||
std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
|
||||
|
||||
err = cudaGetLastError();
|
||||
|
||||
if (err != cudaSuccess)
|
||||
{
|
||||
std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
// Verify that the result vector is correct
|
||||
std::vector<float_type> results;
|
||||
results.reserve(numElements);
|
||||
w.reset();
|
||||
float_type tol = boost::math::tools::root_epsilon<float_type>();
|
||||
float_type error;
|
||||
float_type L1;
|
||||
boost::math::quadrature::sinh_sinh<float_type> integrator;
|
||||
for(int i = 0; i < numElements; ++i)
|
||||
{
|
||||
results.push_back(integrator.integrate(func, tol, &error, &L1));
|
||||
}
|
||||
double t = w.elapsed();
|
||||
// check the results
|
||||
int failed_count = 0;
|
||||
for(int i = 0; i < numElements; ++i)
|
||||
{
|
||||
const auto eps = boost::math::epsilon_difference(output_vector[i], results[i]);
|
||||
if (eps > 10)
|
||||
{
|
||||
std::cerr << std::setprecision(std::numeric_limits<float_type>::digits10)
|
||||
<< "Result verification failed at element " << i << "!\n"
|
||||
<< "Device: " << output_vector[i]
|
||||
<< "\n Host: " << results[i]
|
||||
<< "\n Eps: " << eps << "\n";
|
||||
failed_count++;
|
||||
}
|
||||
if (failed_count > 100)
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (failed_count != 0)
|
||||
{
|
||||
std::cout << "Test FAILED" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
|
||||
std::cout << "Done\n";
|
||||
|
||||
return 0;
|
||||
}
|
||||
206
test/test_sinh_sinh_quad_nvrtc_double.cpp
Normal file
206
test/test_sinh_sinh_quad_nvrtc_double.cpp
Normal file
@@ -0,0 +1,206 @@
|
||||
// Copyright John Maddock 2016.
|
||||
// Copyright Matt Borland 2024.
|
||||
// Use, modification and distribution are subject to the
|
||||
// Boost Software License, Version 1.0. (See accompanying file
|
||||
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
|
||||
#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
|
||||
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <vector>
|
||||
#include <random>
|
||||
#include <exception>
|
||||
#include <boost/math/quadrature/sinh_sinh.hpp>
|
||||
#include <boost/math/special_functions/relative_difference.hpp>
|
||||
#include <cuda.h>
|
||||
#include <cuda_runtime.h>
|
||||
#include <nvrtc.h>
|
||||
|
||||
typedef double float_type;
|
||||
|
||||
const char* cuda_kernel = R"(
|
||||
typedef double float_type;
|
||||
#include <boost/math/quadrature/sinh_sinh.hpp>
|
||||
|
||||
__host__ __device__ float_type func(float_type x)
|
||||
{
|
||||
return 1/(1+x*x);
|
||||
}
|
||||
|
||||
extern "C" __global__
|
||||
void test_sinh_sinh_kernel(const float_type*, const float_type*, float_type *out, int numElements)
|
||||
{
|
||||
int i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
float_type tol = boost::math::tools::root_epsilon<float_type>();
|
||||
float_type error;
|
||||
float_type L1;
|
||||
boost::math::size_t levels;
|
||||
|
||||
if (i < numElements)
|
||||
{
|
||||
out[i] = boost::math::quadrature::sinh_sinh_integrate(func, tol, &error, &L1, &levels);
|
||||
}
|
||||
}
|
||||
)";
|
||||
|
||||
__host__ __device__ float_type func(float_type x)
|
||||
{
|
||||
return 1/(1+x*x);
|
||||
}
|
||||
|
||||
void checkCUDAError(cudaError_t result, const char* msg)
|
||||
{
|
||||
if (result != cudaSuccess)
|
||||
{
|
||||
std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
void checkCUError(CUresult result, const char* msg)
|
||||
{
|
||||
if (result != CUDA_SUCCESS)
|
||||
{
|
||||
const char* errorStr;
|
||||
cuGetErrorString(result, &errorStr);
|
||||
std::cerr << msg << ": " << errorStr << std::endl;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
void checkNVRTCError(nvrtcResult result, const char* msg)
|
||||
{
|
||||
if (result != NVRTC_SUCCESS)
|
||||
{
|
||||
std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
try
|
||||
{
|
||||
// Initialize CUDA driver API
|
||||
checkCUError(cuInit(0), "Failed to initialize CUDA");
|
||||
|
||||
// Create CUDA context
|
||||
CUcontext context;
|
||||
CUdevice device;
|
||||
checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
|
||||
checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
|
||||
|
||||
nvrtcProgram prog;
|
||||
nvrtcResult res;
|
||||
|
||||
res = nvrtcCreateProgram(&prog, cuda_kernel, "test_sinh_sinh_kernel.cu", 0, nullptr, nullptr);
|
||||
checkNVRTCError(res, "Failed to create NVRTC program");
|
||||
|
||||
nvrtcAddNameExpression(prog, "test_sinh_sinh_kernel");
|
||||
|
||||
#ifdef BOOST_MATH_NVRTC_CI_RUN
|
||||
const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
|
||||
#else
|
||||
const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
|
||||
#endif
|
||||
|
||||
// Compile the program
|
||||
res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
|
||||
if (res != NVRTC_SUCCESS)
|
||||
{
|
||||
size_t log_size;
|
||||
nvrtcGetProgramLogSize(prog, &log_size);
|
||||
char* log = new char[log_size];
|
||||
nvrtcGetProgramLog(prog, log);
|
||||
std::cerr << "Compilation failed:\n" << log << std::endl;
|
||||
delete[] log;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
// Get PTX from the program
|
||||
size_t ptx_size;
|
||||
nvrtcGetPTXSize(prog, &ptx_size);
|
||||
char* ptx = new char[ptx_size];
|
||||
nvrtcGetPTX(prog, ptx);
|
||||
|
||||
// Load PTX into CUDA module
|
||||
CUmodule module;
|
||||
CUfunction kernel;
|
||||
checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
|
||||
checkCUError(cuModuleGetFunction(&kernel, module, "test_sinh_sinh_kernel"), "Failed to get kernel function");
|
||||
|
||||
int numElements = 50000;
|
||||
float_type *h_in1, *h_in2, *h_out;
|
||||
float_type *d_in1, *d_in2, *d_out;
|
||||
|
||||
// Allocate memory on the host
|
||||
h_in1 = new float_type[numElements];
|
||||
h_in2 = new float_type[numElements];
|
||||
h_out = new float_type[numElements];
|
||||
|
||||
// Initialize input arrays
|
||||
std::mt19937_64 rng(42);
|
||||
std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
|
||||
for (int i = 0; i < numElements; ++i)
|
||||
{
|
||||
h_in1[i] = static_cast<float_type>(dist(rng));
|
||||
h_in2[i] = static_cast<float_type>(dist(rng));
|
||||
}
|
||||
|
||||
checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
|
||||
checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
|
||||
checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
|
||||
|
||||
checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
|
||||
checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
|
||||
|
||||
int blockSize = 256;
|
||||
int numBlocks = (numElements + blockSize - 1) / blockSize;
|
||||
void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
|
||||
checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
|
||||
|
||||
checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
|
||||
|
||||
// Verify Result
|
||||
float_type tol = boost::math::tools::root_epsilon<float_type>();
|
||||
float_type error;
|
||||
float_type L1;
|
||||
boost::math::quadrature::sinh_sinh<float_type> integrator;
|
||||
for (int i = 0; i < numElements; ++i)
|
||||
{
|
||||
auto res = integrator.integrate(func, tol, &error, &L1);
|
||||
if (std::isfinite(res))
|
||||
{
|
||||
if (boost::math::epsilon_difference(res, h_out[i]) > 300)
|
||||
{
|
||||
std::cout << "error at line: " << i
|
||||
<< "\nParallel: " << h_out[i]
|
||||
<< "\n Serial: " << res
|
||||
<< "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cudaFree(d_in1);
|
||||
cudaFree(d_in2);
|
||||
cudaFree(d_out);
|
||||
delete[] h_in1;
|
||||
delete[] h_in2;
|
||||
delete[] h_out;
|
||||
|
||||
nvrtcDestroyProgram(&prog);
|
||||
delete[] ptx;
|
||||
|
||||
cuCtxDestroy(context);
|
||||
|
||||
std::cout << "Kernel executed successfully." << std::endl;
|
||||
return 0;
|
||||
}
|
||||
catch(const std::exception& e)
|
||||
{
|
||||
std::cerr << "Stopped with exception: " << e.what() << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
}
|
||||
206
test/test_sinh_sinh_quad_nvrtc_float.cpp
Normal file
206
test/test_sinh_sinh_quad_nvrtc_float.cpp
Normal file
@@ -0,0 +1,206 @@
|
||||
// Copyright John Maddock 2016.
|
||||
// Copyright Matt Borland 2024.
|
||||
// Use, modification and distribution are subject to the
|
||||
// Boost Software License, Version 1.0. (See accompanying file
|
||||
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
|
||||
#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
|
||||
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <vector>
|
||||
#include <random>
|
||||
#include <exception>
|
||||
#include <boost/math/quadrature/sinh_sinh.hpp>
|
||||
#include <boost/math/special_functions/relative_difference.hpp>
|
||||
#include <cuda.h>
|
||||
#include <cuda_runtime.h>
|
||||
#include <nvrtc.h>
|
||||
|
||||
typedef float float_type;
|
||||
|
||||
const char* cuda_kernel = R"(
|
||||
typedef float float_type;
|
||||
#include <boost/math/quadrature/sinh_sinh.hpp>
|
||||
|
||||
__host__ __device__ float_type func(float_type x)
|
||||
{
|
||||
return 1/(1+x*x);
|
||||
}
|
||||
|
||||
extern "C" __global__
|
||||
void test_sinh_sinh_kernel(const float_type*, const float_type*, float_type *out, int numElements)
|
||||
{
|
||||
int i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
float_type tol = boost::math::tools::root_epsilon<float_type>();
|
||||
float_type error;
|
||||
float_type L1;
|
||||
boost::math::size_t levels;
|
||||
|
||||
if (i < numElements)
|
||||
{
|
||||
out[i] = boost::math::quadrature::sinh_sinh_integrate(func, tol, &error, &L1, &levels);
|
||||
}
|
||||
}
|
||||
)";
|
||||
|
||||
__host__ __device__ float_type func(float_type x)
|
||||
{
|
||||
return 1/(1+x*x);
|
||||
}
|
||||
|
||||
void checkCUDAError(cudaError_t result, const char* msg)
|
||||
{
|
||||
if (result != cudaSuccess)
|
||||
{
|
||||
std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
void checkCUError(CUresult result, const char* msg)
|
||||
{
|
||||
if (result != CUDA_SUCCESS)
|
||||
{
|
||||
const char* errorStr;
|
||||
cuGetErrorString(result, &errorStr);
|
||||
std::cerr << msg << ": " << errorStr << std::endl;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
void checkNVRTCError(nvrtcResult result, const char* msg)
|
||||
{
|
||||
if (result != NVRTC_SUCCESS)
|
||||
{
|
||||
std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
try
|
||||
{
|
||||
// Initialize CUDA driver API
|
||||
checkCUError(cuInit(0), "Failed to initialize CUDA");
|
||||
|
||||
// Create CUDA context
|
||||
CUcontext context;
|
||||
CUdevice device;
|
||||
checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
|
||||
checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
|
||||
|
||||
nvrtcProgram prog;
|
||||
nvrtcResult res;
|
||||
|
||||
res = nvrtcCreateProgram(&prog, cuda_kernel, "test_sinh_sinh_kernel.cu", 0, nullptr, nullptr);
|
||||
checkNVRTCError(res, "Failed to create NVRTC program");
|
||||
|
||||
nvrtcAddNameExpression(prog, "test_sinh_sinh_kernel");
|
||||
|
||||
#ifdef BOOST_MATH_NVRTC_CI_RUN
|
||||
const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
|
||||
#else
|
||||
const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
|
||||
#endif
|
||||
|
||||
// Compile the program
|
||||
res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
|
||||
if (res != NVRTC_SUCCESS)
|
||||
{
|
||||
size_t log_size;
|
||||
nvrtcGetProgramLogSize(prog, &log_size);
|
||||
char* log = new char[log_size];
|
||||
nvrtcGetProgramLog(prog, log);
|
||||
std::cerr << "Compilation failed:\n" << log << std::endl;
|
||||
delete[] log;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
// Get PTX from the program
|
||||
size_t ptx_size;
|
||||
nvrtcGetPTXSize(prog, &ptx_size);
|
||||
char* ptx = new char[ptx_size];
|
||||
nvrtcGetPTX(prog, ptx);
|
||||
|
||||
// Load PTX into CUDA module
|
||||
CUmodule module;
|
||||
CUfunction kernel;
|
||||
checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
|
||||
checkCUError(cuModuleGetFunction(&kernel, module, "test_sinh_sinh_kernel"), "Failed to get kernel function");
|
||||
|
||||
int numElements = 50000;
|
||||
float_type *h_in1, *h_in2, *h_out;
|
||||
float_type *d_in1, *d_in2, *d_out;
|
||||
|
||||
// Allocate memory on the host
|
||||
h_in1 = new float_type[numElements];
|
||||
h_in2 = new float_type[numElements];
|
||||
h_out = new float_type[numElements];
|
||||
|
||||
// Initialize input arrays
|
||||
std::mt19937_64 rng(42);
|
||||
std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
|
||||
for (int i = 0; i < numElements; ++i)
|
||||
{
|
||||
h_in1[i] = static_cast<float_type>(dist(rng));
|
||||
h_in2[i] = static_cast<float_type>(dist(rng));
|
||||
}
|
||||
|
||||
checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
|
||||
checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
|
||||
checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
|
||||
|
||||
checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
|
||||
checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
|
||||
|
||||
int blockSize = 256;
|
||||
int numBlocks = (numElements + blockSize - 1) / blockSize;
|
||||
void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
|
||||
checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
|
||||
|
||||
checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
|
||||
|
||||
// Verify Result
|
||||
float_type tol = boost::math::tools::root_epsilon<float_type>();
|
||||
float_type error;
|
||||
float_type L1;
|
||||
boost::math::quadrature::sinh_sinh<float_type> integrator;
|
||||
for (int i = 0; i < numElements; ++i)
|
||||
{
|
||||
auto res = integrator.integrate(func, tol, &error, &L1);
|
||||
if (std::isfinite(res))
|
||||
{
|
||||
if (boost::math::epsilon_difference(res, h_out[i]) > 300)
|
||||
{
|
||||
std::cout << "error at line: " << i
|
||||
<< "\nParallel: " << h_out[i]
|
||||
<< "\n Serial: " << res
|
||||
<< "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cudaFree(d_in1);
|
||||
cudaFree(d_in2);
|
||||
cudaFree(d_out);
|
||||
delete[] h_in1;
|
||||
delete[] h_in2;
|
||||
delete[] h_out;
|
||||
|
||||
nvrtcDestroyProgram(&prog);
|
||||
delete[] ptx;
|
||||
|
||||
cuCtxDestroy(context);
|
||||
|
||||
std::cout << "Kernel executed successfully." << std::endl;
|
||||
return 0;
|
||||
}
|
||||
catch(const std::exception& e)
|
||||
{
|
||||
std::cerr << "Stopped with exception: " << e.what() << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user