diff --git a/include/boost/math/special_functions/chebyshev_transform.hpp b/include/boost/math/special_functions/chebyshev_transform.hpp index 48e23b475..d4ce106d7 100644 --- a/include/boost/math/special_functions/chebyshev_transform.hpp +++ b/include/boost/math/special_functions/chebyshev_transform.hpp @@ -17,6 +17,98 @@ namespace boost { namespace math { +namespace detail{ + +template +struct fftw_cos_transform; + +template<> +struct fftw_cos_transform +{ + fftw_cos_transform(int n, double* data1, double* data2) + { + plan = fftw_plan_r2r_1d(n, data1, data2, FFTW_REDFT10, FFTW_ESTIMATE); + } + ~fftw_cos_transform() + { + fftw_destroy_plan(plan); + } + void execute(double* data1, double* data2) + { + fftw_execute_r2r(plan, data1, data2); + } + static double cos(double x) { return std::cos(x); } + static double fabs(double x) { return std::fabs(x); } +private: + fftw_plan plan; +}; + +template<> +struct fftw_cos_transform +{ + fftw_cos_transform(int n, float* data1, float* data2) + { + plan = fftwf_plan_r2r_1d(n, data1, data2, FFTW_REDFT10, FFTW_ESTIMATE); + } + ~fftw_cos_transform() + { + fftwf_destroy_plan(plan); + } + void execute(float* data1, float* data2) + { + fftwf_execute_r2r(plan, data1, data2); + } + static float cos(float x) { return std::cos(x); } + static float fabs(float x) { return std::fabs(x); } +private: + fftwf_plan plan; +}; + +template<> +struct fftw_cos_transform +{ + fftw_cos_transform(int n, long double* data1, long double* data2) + { + plan = fftwl_plan_r2r_1d(n, data1, data2, FFTW_REDFT10, FFTW_ESTIMATE); + } + ~fftw_cos_transform() + { + fftwl_destroy_plan(plan); + } + void execute(long double* data1, long double* data2) + { + fftwl_execute_r2r(plan, data1, data2); + } + static long double cos(long double x) { return std::cos(x); } + static long double fabs(long double x) { return std::fabs(x); } +private: + fftwl_plan plan; +}; +#ifdef BOOST_HAS_FLOAT128 +template<> +struct fftw_cos_transform<__float128> +{ + fftw_cos_transform(int n, __float128* data1, __float128* data2) + { + plan = fftwq_plan_r2r_1d(n, data1, data2, FFTW_REDFT10, FFTW_ESTIMATE); + } + ~fftw_cos_transform() + { + fftwq_destroy_plan(plan); + } + void execute(__float128* data1, __float128* data2) + { + fftwq_execute_r2r(plan, data1, data2); + } + static __float128 cos(__float128 x) { return cosq(x); } + static __float128 fabs(__float128 x) { return fabsq(x); } +private: + fftwq_plan plan; +}; + +#endif +} + template class chebyshev_transform { @@ -25,20 +117,6 @@ public: chebyshev_transform(const F& f, Real a, Real b, Real tol = 500 * std::numeric_limits::epsilon(), size_t max_refinements = 15) : m_a(a), m_b(b) - { - typedef mpl::int_ < - boost::is_same::value ? 0 : - boost::is_same::value ? 1 : - boost::is_same::value ? 2 : -#ifdef BOOST_HAS_FLOAT128 - boost::is_same<__float128, Real>::value ? 3 : -#endif - -1 - > tag_type; - init(f, a, b, tol, max_refinements, tag_type()); - } - template - void init(const F& f, Real a, Real b, Real tol, size_t max_refinements, const mpl::int_<1>&) { if (a >= b) { @@ -59,24 +137,23 @@ public: vf.resize(n); m_coeffs.resize(n); - fftw_plan plan = fftw_plan_r2r_1d(static_cast(n), vf.data(), m_coeffs.data(), FFTW_REDFT10, FFTW_ESTIMATE); + detail::fftw_cos_transform plan(static_cast(n), vf.data(), m_coeffs.data()); Real inv_n = 1/static_cast(n); for(size_t j = 0; j < n/2; ++j) { // Use symmetry cos((j+1/2)pi/n) = - cos((n-1-j+1/2)pi/n) - Real y = cos(pi()*(j+half())*inv_n); + Real y = detail::fftw_cos_transform::cos(pi()*(j+half())*inv_n); vf[j] = f(y*bma + bpa)*inv_n; vf[n-1-j]= f(bpa-y*bma)*inv_n; } - fftw_execute_r2r(plan, vf.data(), m_coeffs.data()); - fftw_destroy_plan(plan); + plan.execute(vf.data(), m_coeffs.data()); Real max_coeff = 0; for (auto const & coeff : m_coeffs) { - if (abs(coeff) > max_coeff) + if (detail::fftw_cos_transform::fabs(coeff) > max_coeff) { - max_coeff = abs(coeff); + max_coeff = detail::fftw_cos_transform::fabs(coeff); } } size_t j = m_coeffs.size() - 1; @@ -97,186 +174,6 @@ public: } } - template - void init(const F& f, Real a, Real b, Real tol, size_t max_refinements, const mpl::int_<0>&) - { - if (a >= b) - { - throw std::domain_error("a < b is required.\n"); - } - using boost::math::constants::half; - using boost::math::constants::pi; - using std::cos; - using std::abs; - Real bma = (b-a)*half(); - Real bpa = (b+a)*half(); - size_t n = 256; - std::vector vf; - - size_t refinements = 0; - while(refinements < max_refinements) - { - vf.resize(n); - m_coeffs.resize(n); - - fftwf_plan plan = fftwf_plan_r2r_1d(static_cast(n), vf.data(), m_coeffs.data(), FFTW_REDFT10, FFTW_ESTIMATE); - Real inv_n = 1/static_cast(n); - for(size_t j = 0; j < n/2; ++j) - { - // Use symmetry cos((j+1/2)pi/n) = - cos((n-1-j+1/2)pi/n) - Real y = cos(pi()*(j+half())*inv_n); - vf[j] = f(y*bma + bpa)*inv_n; - vf[n-1-j]= f(bpa-y*bma)*inv_n; - } - - fftwf_execute_r2r(plan, vf.data(), m_coeffs.data()); - fftwf_destroy_plan(plan); - Real max_coeff = 0; - for (auto const & coeff : m_coeffs) - { - if (abs(coeff) > max_coeff) - { - max_coeff = abs(coeff); - } - } - size_t j = m_coeffs.size() - 1; - while (abs(m_coeffs[j])/max_coeff < tol) - { - --j; - } - // If ten coefficients are eliminated, the we say we've done all - // we need to do: - if (n - j > 10) - { - m_coeffs.resize(j+1); - return; - } - - n *= 2; - ++refinements; - } - } - - template - void init(const F& f, Real a, Real b, Real tol, size_t max_refinements, const mpl::int_<2>&) - { - if (a >= b) - { - throw std::domain_error("a < b is required.\n"); - } - using boost::math::constants::half; - using boost::math::constants::pi; - using std::cos; - using std::abs; - Real bma = (b-a)*half(); - Real bpa = (b+a)*half(); - size_t n = 256; - std::vector vf; - - size_t refinements = 0; - while(refinements < max_refinements) - { - vf.resize(n); - m_coeffs.resize(n); - - fftwl_plan plan = fftwl_plan_r2r_1d(static_cast(n), vf.data(), m_coeffs.data(), FFTW_REDFT10, FFTW_ESTIMATE); - Real inv_n = 1/static_cast(n); - for(size_t j = 0; j < n/2; ++j) - { - // Use symmetry cos((j+1/2)pi/n) = - cos((n-1-j+1/2)pi/n) - Real y = cos(pi()*(j+half())*inv_n); - vf[j] = f(y*bma + bpa)*inv_n; - vf[n-1-j]= f(bpa-y*bma)*inv_n; - } - - fftwl_execute_r2r(plan, vf.data(), m_coeffs.data()); - fftwl_destroy_plan(plan); - Real max_coeff = 0; - for (auto const & coeff : m_coeffs) - { - if (abs(coeff) > max_coeff) - { - max_coeff = abs(coeff); - } - } - size_t j = m_coeffs.size() - 1; - while (abs(m_coeffs[j])/max_coeff < tol) - { - --j; - } - // If ten coefficients are eliminated, the we say we've done all - // we need to do: - if (n - j > 10) - { - m_coeffs.resize(j+1); - return; - } - - n *= 2; - ++refinements; - } - } - -#ifdef BOOST_HAS_FLOAT128 - template - void init(const F& f, __float128 a, __float128 b, __float128 tol, size_t max_refinements, const mpl::int_<3>&) - { - if (a >= b) - { - throw std::domain_error("a < b is required.\n"); - } - using boost::math::constants::half; - using boost::math::constants::pi; - Real bma = (b-a)*half(); - Real bpa = (b+a)*half(); - size_t n = 256; - std::vector vf; - - size_t refinements = 0; - while(refinements < max_refinements) - { - vf.resize(n); - m_coeffs.resize(n); - - fftwq_plan plan = fftwq_plan_r2r_1d(static_cast(n), vf.data(), m_coeffs.data(), FFTW_REDFT10, FFTW_ESTIMATE); - Real inv_n = 1/static_cast(n); - for(size_t j = 0; j < n/2; ++j) - { - // Use symmetry cos((j+1/2)pi/n) = - cos((n-1-j+1/2)pi/n) - Real y = cosq(pi()*(j+half())*inv_n); - vf[j] = f(y*bma + bpa)*inv_n; - vf[n-1-j]= f(bpa-y*bma)*inv_n; - } - - fftwq_execute_r2r(plan, vf.data(), m_coeffs.data()); - fftwq_destroy_plan(plan); - Real max_coeff = 0; - for (auto const & coeff : m_coeffs) - { - if (fabsq(coeff) > max_coeff) - { - max_coeff = fabsq(coeff); - } - } - size_t j = m_coeffs.size() - 1; - while (abs(m_coeffs[j])/max_coeff < tol) - { - --j; - } - // If ten coefficients are eliminated, the we say we've done all - // we need to do: - if (n - j > 10) - { - m_coeffs.resize(j+1); - return; - } - - n *= 2; - ++refinements; - } - } -#endif - Real operator()(Real x) const { using boost::math::constants::half;