2
0
mirror of https://github.com/boostorg/math.git synced 2026-01-19 04:22:09 +00:00

Save an xorpd instruction by initializing not to zero, but to the first value. Save a division for each element by dividing the filters by the spacing (square of the spacing for the second derivative). [CI SKIP]

This commit is contained in:
Nick Thompson
2019-01-26 21:34:16 -07:00
parent 0ca8faf3cc
commit a1cade5a90
3 changed files with 73 additions and 62 deletions

View File

@@ -27,8 +27,6 @@ namespace boost::math::differentiation {
template<class RandomAccessContainer>
RandomAccessContainer operator()(RandomAccessContainer const & v) const;
void set_spacing(Real spacing);
Real get_spacing() const;
};
@@ -132,7 +130,11 @@ We can see that the smoothing derivative tracks the increase and decrease in the
The computation of the filters is ill-conditioned for large /p/.
In order to mitigate this problem, we have computed the filters in higher precision and cast the results to the desired type.
However, this simply pushes the problem to larger /p/.
In practice, this is not a problem, as large /p/ corresponds to less powerful denoising.
In practice, this is not a problem, as large /p/ corresponds to less powerful denoising, but keep it in mind.
In addition, the `-ffast-math` flag has a very large effect on the speed of these functions.
In our benchmarks, they were 50% faster with the flag enabled, which is much larger than the usual performance increases we see by turning on this flag.
Hence, if the default performance is not sufficient, this flag is available, though it comes with its own problems.
[heading References]

View File

@@ -283,12 +283,16 @@ public:
m_f.resize(interior.size());
for (size_t j = 0; j < interior.size(); ++j)
{
m_f[j] = static_cast<Real>(interior[j]);
m_f[j] = static_cast<Real>(interior[j])/m_dt;
}
}
else
{
m_f = detail::interior_velocity_filter<Real>(n, approximation_order);
for (auto & x : m_f)
{
x /= m_dt;
}
}
m_boundary_filters.resize(n);
@@ -303,13 +307,17 @@ public:
m_boundary_filters[i].resize(bf.size());
for (size_t j = 0; j < bf.size(); ++j)
{
m_boundary_filters[i][j] = static_cast<Real>(bf[j]);
m_boundary_filters[i][j] = static_cast<Real>(bf[j])/m_dt;
}
}
else
{
int64_t s = static_cast<int64_t>(i) - static_cast<int64_t>(n);
m_boundary_filters[i] = detail::boundary_velocity_filter<Real>(n, approximation_order, s);
for (auto & bf : m_boundary_filters[i])
{
bf /= m_dt;
}
}
}
}
@@ -326,7 +334,7 @@ public:
m_f.resize(n+1);
for (size_t i = 0; i < m_f.size(); ++i)
{
m_f[i] = static_cast<Real>(f[i+n]);
m_f[i] = static_cast<Real>(f[i+n])/(m_dt*m_dt);
}
m_boundary_filters.resize(n);
for (size_t i = 0; i < n; ++i)
@@ -336,7 +344,7 @@ public:
m_boundary_filters[i].resize(bf.size());
for (size_t j = 0; j < bf.size(); ++j)
{
m_boundary_filters[i][j] = static_cast<Real>(bf[j]);
m_boundary_filters[i][j] = static_cast<Real>(bf[j])/(m_dt*m_dt);
}
}
}
@@ -348,13 +356,17 @@ public:
m_f.resize(n+1);
for (size_t i = 0; i < m_f.size(); ++i)
{
m_f[i] = f[i+n];
m_f[i] = f[i+n]/(m_dt*m_dt);
}
m_boundary_filters.resize(n);
for (size_t i = 0; i < n; ++i)
{
int64_t s = static_cast<int64_t>(i) - static_cast<int64_t>(n);
m_boundary_filters[i] = detail::acceleration_filter<Real>(n, approximation_order, s);
for (auto & bf : m_boundary_filters[i])
{
bf /= (m_dt*m_dt);
}
}
}
}
@@ -364,12 +376,6 @@ public:
}
}
void set_spacing(Real const & spacing)
{
BOOST_ASSERT_MSG(spacing > 0, "Spacing between samples must be > 0.");
m_dt = spacing;
}
Real get_spacing() const
{
return m_dt;
@@ -388,72 +394,73 @@ public:
{
if (i >= m_f.size() - 1 && i <= size(v) - m_f.size())
{
Real dv = 0;
for (size_t j = 1; j < m_f.size(); ++j)
// The filter has length >= 1:
Real dvdt = m_f[1] * (v[i + 1] - v[i - 1]);
for (size_t j = 2; j < m_f.size(); ++j)
{
dv += m_f[j] * (v[i + j] - v[i - j]);
dvdt += m_f[j] * (v[i + j] - v[i - j]);
}
return dv / m_dt;
return dvdt;
}
// m_f.size() = N+1
if (i < m_f.size() - 1)
{
auto &bf = m_boundary_filters[i];
Real dv = 0;
for (size_t j = 0; j < bf.size(); ++j)
Real dvdt = bf[0]*v[0];
for (size_t j = 1; j < bf.size(); ++j)
{
dv += bf[j] * v[j];
dvdt += bf[j] * v[j];
}
return dv / m_dt;
return dvdt;
}
if (i > size(v) - m_f.size() && i < size(v))
{
int k = size(v) - 1 - i;
auto &bf = m_boundary_filters[k];
Real dv = 0;
for (size_t j = 0; j < bf.size(); ++j)
Real dvdt = bf[0]*v[size(v)-1];
for (size_t j = 1; j < bf.size(); ++j)
{
dv += bf[j] * v[size(v) - 1 - j];
dvdt += bf[j] * v[size(v) - 1 - j];
}
return -dv / m_dt;
return -dvdt;
}
}
else if constexpr (order==2)
{
if (i >= m_f.size() - 1 && i <= size(v) - m_f.size())
{
Real d2v = m_f[0]*v[i];
Real d2vdt2 = m_f[0]*v[i];
for (size_t j = 1; j < m_f.size(); ++j)
{
d2v += m_f[j] * (v[i + j] + v[i - j]);
d2vdt2 += m_f[j] * (v[i + j] + v[i - j]);
}
return d2v / (m_dt*m_dt);
return d2vdt2;
}
// m_f.size() = N+1
if (i < m_f.size() - 1)
{
auto &bf = m_boundary_filters[i];
Real d2v = 0;
for (size_t j = 0; j < bf.size(); ++j)
Real d2vdt2 = bf[0]*v[0];
for (size_t j = 1; j < bf.size(); ++j)
{
d2v += bf[j] * v[j];
d2vdt2 += bf[j] * v[j];
}
return d2v / (m_dt*m_dt);
return d2vdt2;
}
if (i > size(v) - m_f.size() && i < size(v))
{
int k = size(v) - 1 - i;
auto &bf = m_boundary_filters[k];
Real d2v = 0;
for (size_t j = 0; j < bf.size(); ++j)
Real d2vdt2 = bf[0] * v[size(v) - 1];
for (size_t j = 1; j < bf.size(); ++j)
{
d2v += bf[j] * v[size(v) - 1 - j];
d2vdt2 += bf[j] * v[size(v) - 1 - j];
}
return d2v / (m_dt*m_dt);
return d2vdt2;
}
}
@@ -477,22 +484,22 @@ public:
for (size_t i = 0; i < m_f.size() - 1; ++i)
{
auto &bf = m_boundary_filters[i];
Real dv = 0;
for (size_t j = 0; j < bf.size(); ++j)
Real dvdt = bf[0] * v[0];
for (size_t j = 1; j < bf.size(); ++j)
{
dv += bf[j] * v[j];
dvdt += bf[j] * v[j];
}
w[i] = dv / m_dt;
w[i] = dvdt;
}
for(size_t i = m_f.size() - 1; i <= size(v) - m_f.size(); ++i)
{
Real dv = 0;
for (size_t j = 1; j < m_f.size(); ++j)
Real dvdt = m_f[1] * (v[i + 1] - v[i - 1]);
for (size_t j = 2; j < m_f.size(); ++j)
{
dv += m_f[j] * (v[i + j] - v[i - j]);
dvdt += m_f[j] *(v[i + j] - v[i - j]);
}
w[i] = dv / m_dt;
w[i] = dvdt;
}
@@ -500,12 +507,12 @@ public:
{
int k = size(v) - 1 - i;
auto &f = m_boundary_filters[k];
Real dv = 0;
for (size_t j = 0; j < f.size(); ++j)
Real dvdt = f[0] * v[size(v) - 1];;
for (size_t j = 1; j < f.size(); ++j)
{
dv += f[j] * v[size(v) - 1 - j];
dvdt += f[j] * v[size(v) - 1 - j];
}
w[i] = -dv / m_dt;
w[i] = -dvdt;
}
}
else if constexpr (order==2)
@@ -514,35 +521,34 @@ public:
for (size_t i = 0; i < m_f.size() - 1; ++i)
{
auto &bf = m_boundary_filters[i];
Real d2v = 0;
Real d2vdt2 = 0;
for (size_t j = 0; j < bf.size(); ++j)
{
d2v += bf[j] * v[j];
d2vdt2 += bf[j] * v[j];
}
w[i] = d2v / (m_dt*m_dt);
w[i] = d2vdt2;
}
for (size_t i = m_f.size() - 1; i <= size(v) - m_f.size(); ++i)
{
Real d2v = m_f[0]*v[i];
Real d2vdt2 = m_f[0]*v[i];
for (size_t j = 1; j < m_f.size(); ++j)
{
d2v += m_f[j] * (v[i + j] + v[i - j]);
d2vdt2 += m_f[j] * (v[i + j] + v[i - j]);
}
w[i] = d2v / (m_dt*m_dt);
w[i] = d2vdt2;
}
for (size_t i = size(v) - m_f.size() + 1; i < size(v); ++i)
{
int k = size(v) - 1 - i;
auto &bf = m_boundary_filters[k];
Real d2v = 0;
for (size_t j = 0; j < bf.size(); ++j)
Real d2vdt2 = bf[0] * v[size(v) - 1];
for (size_t j = 1; j < bf.size(); ++j)
{
d2v += bf[j] * v[size(v) - 1 - j];
d2vdt2 += bf[j] * v[size(v) - 1 - j];
}
w[i] = d2v / (m_dt*m_dt);
w[i] = d2vdt2;
}
}

View File

@@ -598,10 +598,13 @@ BOOST_AUTO_TEST_CASE(lanczos_smoothing_test)
test_dlp_evaluation<double>();
test_dlp_derivatives<double>();
test_dlp_next<double>();
test_dlp_norms<cpp_bin_float_50>();
// Takes too long!
//test_dlp_norms<cpp_bin_float_50>();
test_boundary_velocity_filters<double>();
test_boundary_velocity_filters<long double>();
test_boundary_velocity_filters<cpp_bin_float_50>();
// Takes too long!
//test_boundary_velocity_filters<cpp_bin_float_50>();
test_boundary_lanczos<double>();
test_boundary_lanczos<long double>();
// Takes too long!