From 923ed19a07f4b409af9870dca9196ccaff8d0eea Mon Sep 17 00:00:00 2001
From: Nick <NAThompson@users.noreply.github.com>
Date: Tue, 26 Oct 2021 20:54:29 -0700
Subject: [PATCH] Cubic roots (#703)

---
 doc/roots/cubic_roots.qbk                     | 111 ++++++++++++++
 doc/roots/roots_overview.qbk                  |   1 +
 include/boost/math/tools/cubic_roots.hpp      | 137 ++++++++++++++++++
 .../performance/cubic_roots_performance.cpp   |  40 +++++
 test/Jamfile.v2                               |   1 +
 test/cubic_roots_test.cpp                     | 132 +++++++++++++++++
 6 files changed, 422 insertions(+)
 create mode 100644 doc/roots/cubic_roots.qbk
 create mode 100644 include/boost/math/tools/cubic_roots.hpp
 create mode 100644 reporting/performance/cubic_roots_performance.cpp
 create mode 100644 test/cubic_roots_test.cpp
diff --git a/doc/roots/cubic_roots.qbk b/doc/roots/cubic_roots.qbk
new file mode 100644
index 000000000..639c1b2ef
--- /dev/null
+++ b/doc/roots/cubic_roots.qbk
@@ -0,0 +1,111 @@
+[/
+Copyright (c) 2021 Nick Thompson
+Use, modification and distribution are subject to the
+Boost Software License, Version 1.0. (See accompanying file
+LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+]
+
+[section:cubic_roots Roots of Cubic Polynomials]
+
+[heading Synopsis]
+
+```
+#include <boost/math/roots/cubic_roots.hpp>
+
+namespace boost::math::tools {
+
+// Solves ax³ + bx² + cx + d = 0.
+std::array<Real,3> cubic_roots(Real a, Real b, Real c, Real d);
+
+// Computes the empirical residual p(r) (first element) and expected residual ε|rṗ(r)| (second element) for a root.
+// Recall that for a numerically computed root r satisfying r = r⁎(1+ε) for the exact root r⁎ of a function p, |p(r)| ≲ ε|rṗ(r)|.
+template<typename Real>
+std::array<Real, 2> cubic_root_residual(Real a, Real b, Real c, Real d, Real root);
+}
+```
+
+[heading Background]
+
+The `cubic_roots` function extracts all real roots of a cubic polynomial ax³ + bx² + cx + d.
+The result is a `std::array<Real, 3>`, which has length three, irrespective of whether there are 3 real roots.
+There is always 1 real root, and hence (barring overflow or other exceptional circumstances), the first element of the
+`std::array` is always populated.
+If there is only one real root of the polynomial, then the second and third elements are set to `nans`.
+The roots are returned in nondecreasing order.
+
+Be careful with double roots.
+First, if you have a real double root, it is numerically indistinguishable from a complex conjugate pair of roots,
+where the complex part is tiny.
+Second, the condition number of rootfinding is infinite at a double root,
+so even changes as subtle as different instruction generation can change the outcome.
+We have some heuristics in place to detect double roots, but these should be regarded with suspicion.
+
+
+[heading Example]
+
+```
+#include <iostream>
+#include <sstream>
+#include <boost/math/tools/cubic_roots.hpp>
+
+using boost::math::tools::cubic_roots;
+using boost::math::tools::cubic_root_residual;
+
+template<typename Real>
+std::string print_roots(std::array<Real, 3> const & roots) {
+    std::ostringstream out;
+    out << "{" << roots[0] << ", " << roots[1] << ", " << roots[2] << "}";
+    return out.str();
+}
+
+int main() {
+    // Solves x³ - 6x² + 11x - 6 = (x-1)(x-2)(x-3).
+    auto roots = cubic_roots(1.0, -6.0, 11.0, -6.0);
+    std::cout << "The roots of x³ - 6x² + 11x - 6 are " << print_roots(roots) << ".\n";
+
+    // Double root; YMMV:
+    // (x+1)²(x-2) = x³ - 3x - 2:
+    roots = cubic_roots(1.0, 0.0, -3.0, -2.0);
+    std::cout << "The roots of x³ - 3x - 2 are " << print_roots(roots) << ".\n";
+
+    // Single root: (x-i)(x+i)(x-3) = x³ - 3x² + x - 3:
+    roots = cubic_roots(1.0, -3.0, 1.0, -3.0);
+    std::cout << "The real roots of x³ - 3x² + x - 3 are " << print_roots(roots) << ".\n";
+
+    // I don't know the roots of x³ + 6.28x² + 2.3x + 3.6;
+    // how can I see if they've been computed correctly?
+    roots = cubic_roots(1.0, 6.28, 2.3, 3.6);
+    std::cout << "The real root of x³ +6.28x² + 2.3x + 3.6 is " << roots[0] << ".\n";
+    auto res = cubic_root_residual(1.0, 6.28, 2.3, 3.6, roots[0]);
+    std::cout << "The residual is " << res[0] << ", and the expected residual is " << res[1] << ". ";
+    if (abs(res[0]) <= res[1]) {
+        std::cout << "This is an expected accuracy.\n";
+    } else {
+        std::cerr << "The residual is unexpectedly large.\n";
+    }
+}
+```
+
+This prints:
+
+```
+The roots of  x³ - 6x² + 11x - 6 are {1, 2, 3}.
+The roots of  x³ - 3x - 2 are {-1, -1, 2}.
+The real roots of x³ - 3x² + x - 3 are {3, nan, nan}.
+The real root of x³ +6.28x² + 2.3x + 3.6 is -5.99656.
+The residual is -1.56586e-14, and the expected residual is 4.64155e-14. This is an expected accuracy.
+```
+
+[heading Performance and Accuracy]
+
+On an Intel laptop chip running at 2700 MHz, we observe 3 roots taking ~90ns to compute.
+If the polynomial only possesses a single real root, it takes ~50ns.
+See `reporting/performance/cubic_roots_performance.cpp`.
+
+The forward error cannot be effectively bounded.
+However, for an approximate root r returned by this routine, the residuals should be constrained by |p(r)| ≲ ε|rṗ(r)|,
+where '≲' should be interpreted as an order of magnitude estimate.
+
+
+[endsect]
+[/section:cubic_roots]
diff --git a/doc/roots/roots_overview.qbk b/doc/roots/roots_overview.qbk
index 5f1490d66..770432af5 100644
--- a/doc/roots/roots_overview.qbk
+++ b/doc/roots/roots_overview.qbk
@@ -17,6 +17,7 @@ There are several fully-worked __root_finding_examples, including:
 
 [include roots_without_derivatives.qbk]
 [include roots.qbk]
+[include cubic_roots.qbk]
 [include root_finding_examples.qbk]
 [include minima.qbk]
 [include root_comparison.qbk]
diff --git a/include/boost/math/tools/cubic_roots.hpp b/include/boost/math/tools/cubic_roots.hpp
new file mode 100644
index 000000000..e6b7a028b
--- /dev/null
+++ b/include/boost/math/tools/cubic_roots.hpp
@@ -0,0 +1,137 @@
+//  (C) Copyright Nick Thompson 2021.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+#ifndef BOOST_MATH_TOOLS_CUBIC_ROOTS_HPP
+#define BOOST_MATH_TOOLS_CUBIC_ROOTS_HPP
+#include <array>
+#include <algorithm>
+#include <boost/math/special_functions/sign.hpp>
+#include <boost/math/tools/roots.hpp>
+
+namespace boost::math::tools {
+
+// Solves ax³ + bx² + cx + d = 0.
+// Only returns the real roots, as types get weird for real coefficients and complex roots.
+// Follows Numerical Recipes, Chapter 5, section 6.
+// NB: A better algorithm apparently exists:
+// Algorithm 954: An Accurate and Efficient Cubic and Quartic Equation Solver for Physical Applications
+// However, I don't have access to that paper!
+template<typename Real>
+std::array<Real, 3> cubic_roots(Real a, Real b, Real c, Real d) {
+    using std::sqrt;
+    using std::acos;
+    using std::cos;
+    using std::cbrt;
+    using std::abs;
+    using std::fma;
+    std::array<Real, 3> roots = {std::numeric_limits<Real>::quiet_NaN(),
+                                 std::numeric_limits<Real>::quiet_NaN(),
+                                 std::numeric_limits<Real>::quiet_NaN()};
+    if (a == 0) {
+        // bx² + cx + d = 0:
+        if (b == 0) {
+            // cx + d = 0:
+            if (c == 0) {
+                if (d != 0) {
+                    // No solutions:
+                    return roots;
+                }
+                roots[0] = 0;
+                roots[1] = 0;
+                roots[2] = 0;
+                return roots;
+            }
+            roots[0] = -d/c;
+            return roots;
+        }
+        auto [x0, x1] = quadratic_roots(b, c, d);
+        roots[0] = x0;
+        roots[1] = x1;
+        return roots;
+    }
+    if (d == 0) {
+        auto [x0, x1] = quadratic_roots(a, b, c);
+        roots[0] = x0;
+        roots[1] = x1;
+        roots[2] = 0;
+        std::sort(roots.begin(), roots.end());
+        return roots;
+    }
+    Real p = b/a;
+    Real q = c/a;
+    Real r = d/a;
+    Real Q = (p*p - 3*q)/9;
+    Real R = (2*p*p*p - 9*p*q + 27*r)/54;
+    if (R*R < Q*Q*Q) {
+        //std::cout << "In the R² < Q³ branch.\n";
+        Real rtQ = sqrt(Q);
+        Real theta = acos(R/(Q*rtQ))/3;
+        Real st = sin(theta);
+        Real ct = cos(theta);
+        roots[0] = -2*rtQ*ct - p/3;
+        roots[1] = -rtQ*(-ct + sqrt(Real(3))*st) - p/3;
+        roots[2] = rtQ*(ct + sqrt(Real(3))*st) - p/3;
+    } else {
+        // In Numerical Recipes, Chapter 5, Section 6, it is claimed that we only have one real root
+        // if R² >= Q³. But this isn't true; we can even see this from equation 5.6.18.
+        // The condition for having three real roots is that A = B.
+        // It *is* the case that if we're in this branch, and we have 3 real roots, two are a double root.
+        // Take (x+1)²(x-2) = x³ - 3x -2 as an example. This clearly has a double root at x = -1,
+        // and it gets sent into this branch.
+        Real arg = R*R - Q*Q*Q;
+        Real A = -boost::math::sign(R)*cbrt(abs(R) + sqrt(arg));
+        Real B = 0;
+        if (A != 0) {
+            B = Q/A;
+        }
+        roots[0] = A + B - p/3;
+        // Yes, we're comparing floats for equality:
+        // Any perturbation pushes the roots into the complex plane; out of the bailiwick of this routine.
+        if (A == B || arg == 0) {
+            roots[1] = -A - p/3;
+            roots[2] = -A - p/3;
+        }
+    }
+    // Root polishing:
+    for (auto & r : roots) {
+        // Horner's method.
+        // Here I'll take John Gustaffson's opinion that the fma is a *distinct* operation from a*x +b:
+        // Make sure to compile these fmas into a single instruction and not a function call!
+        // (I'm looking at you Windows.)
+        Real f = fma(a, r, b);
+        f = fma(f,r,c);
+        f = fma(f,r,d);
+        Real df = fma(3*a, r, 2*b);
+        df = fma(df, r, c);
+        if (df != 0) {
+            // No standard library feature for fused-divide add!
+            r -= f/df;
+        }
+    }
+    std::sort(roots.begin(), roots.end());
+    return roots;
+}
+
+// Computes the empirical residual p(r) (first element) and expected residual ε|rṗ(r)| (second element) for a root.
+// Recall that for a numerically computed root r satisfying r = r⁎(1+ε) of a function p, |p(r)| ≲ ε|rṗ(r)|.
+template<typename Real>
+std::array<Real, 2> cubic_root_residual(Real a, Real b, Real c, Real d, Real root) {
+    using std::fma;
+    using std::abs;
+    std::array<Real, 2> out;
+    Real residual = fma(a, root, b);
+    residual = fma(residual,root,c);
+    residual = fma(residual,root,d);
+
+    out[0] = residual;
+
+    Real expected_residual = fma(3*a, root, 2*b);
+    expected_residual = fma(expected_residual, root, c);
+    expected_residual = abs(root*expected_residual)*std::numeric_limits<Real>::epsilon();
+    out[1] = expected_residual;
+    return out;
+}
+
+}
+#endif
diff --git a/reporting/performance/cubic_roots_performance.cpp b/reporting/performance/cubic_roots_performance.cpp
new file mode 100644
index 000000000..3550677dd
--- /dev/null
+++ b/reporting/performance/cubic_roots_performance.cpp
@@ -0,0 +1,40 @@
+//  (C) Copyright Nick Thompson 2021.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <random>
+#include <array>
+#include <vector>
+#include <iostream>
+#include <benchmark/benchmark.h>
+#include <boost/math/tools/cubic_roots.hpp>
+
+using boost::math::tools::cubic_roots;
+
+template<class Real>
+void CubicRoots(benchmark::State& state)
+{
+    std::random_device rd;
+    auto seed = rd();
+    // This seed generates 3 real roots:
+    //uint32_t seed = 416683252;
+    std::mt19937_64 mt(seed);
+    std::uniform_real_distribution<Real> unif(-10, 10);
+
+    Real a = unif(mt);
+    Real b = unif(mt);
+    Real c = unif(mt);
+    Real d = unif(mt);
+    for (auto _ : state)
+    {
+        auto roots = cubic_roots(a,b,c,d);
+        benchmark::DoNotOptimize(roots[0]);
+    }
+}
+
+BENCHMARK_TEMPLATE(CubicRoots, float);
+BENCHMARK_TEMPLATE(CubicRoots, double);
+BENCHMARK_TEMPLATE(CubicRoots, long double);
+
+BENCHMARK_MAIN();
diff --git a/test/Jamfile.v2 b/test/Jamfile.v2
index dc6682aba..bc1859fbb 100644
--- a/test/Jamfile.v2
+++ b/test/Jamfile.v2
@@ -990,6 +990,7 @@ test-suite misc :
    [ run signal_statistics_test.cpp : : : [ requires cxx17_if_constexpr cxx17_std_apply ] ]
    [ run anderson_darling_test.cpp : : : [ requires cxx17_if_constexpr cxx17_std_apply ] ]
    [ run ljung_box_test.cpp : : : [ requires cxx17_if_constexpr cxx17_std_apply ] ]
+   [ run cubic_roots_test.cpp : : : [ requires cxx17_if_constexpr cxx17_std_apply ] ]
    [ run test_t_test.cpp : : : [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <define>BOOST_MATH_TEST_FLOAT128 <linkflags>"-Bstatic -lquadmath -Bdynamic" ] [ requires cxx11_hdr_forward_list cxx11_hdr_atomic cxx11_hdr_thread cxx11_hdr_tuple cxx11_hdr_future cxx11_sfinae_expr ]  ]
    [ run test_z_test.cpp : : : [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <define>BOOST_MATH_TEST_FLOAT128 <linkflags>"-Bstatic -lquadmath -Bdynamic" ] [ requires cxx11_hdr_forward_list cxx11_hdr_atomic cxx11_hdr_thread cxx11_hdr_tuple cxx11_hdr_future cxx11_sfinae_expr ]  ]
    [ run bivariate_statistics_test.cpp : : : [ requires cxx11_hdr_forward_list cxx11_hdr_atomic cxx11_hdr_thread cxx11_hdr_tuple cxx11_hdr_future cxx11_sfinae_expr ]  ]
diff --git a/test/cubic_roots_test.cpp b/test/cubic_roots_test.cpp
new file mode 100644
index 000000000..a53fecf4b
--- /dev/null
+++ b/test/cubic_roots_test.cpp
@@ -0,0 +1,132 @@
+/*
+ * Copyright Nick Thompson, 2021
+ * Use, modification and distribution are subject to the
+ * Boost Software License, Version 1.0. (See accompanying file
+ * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+ */
+
+#include "math_unit_test.hpp"
+#include <random>
+#include <boost/math/tools/cubic_roots.hpp>
+#ifdef BOOST_HAS_FLOAT128
+#include <boost/multiprecision/float128.hpp>
+using boost::multiprecision::float128;
+#endif
+
+using boost::math::tools::cubic_roots;
+using boost::math::tools::cubic_root_residual;
+using std::cbrt;
+
+template<class Real>
+void test_zero_coefficients()
+{
+    Real a = 0;
+    Real b = 0;
+    Real c = 0;
+    Real d = 0;
+    auto roots = cubic_roots(a,b,c,d);
+    CHECK_EQUAL(roots[0], Real(0));
+    CHECK_EQUAL(roots[1], Real(0));
+    CHECK_EQUAL(roots[2], Real(0));
+
+    a = 1;
+    roots = cubic_roots(a,b,c,d);
+    CHECK_EQUAL(roots[0], Real(0));
+    CHECK_EQUAL(roots[1], Real(0));
+    CHECK_EQUAL(roots[2], Real(0));
+
+    a = 1;
+    d = 1;
+    // x^3 + 1 = 0:
+    roots = cubic_roots(a,b,c,d);
+    CHECK_EQUAL(roots[0], Real(-1));
+    CHECK_NAN(roots[1]);
+    CHECK_NAN(roots[2]);
+    d = -1;
+    // x^3 - 1 = 0:
+    roots = cubic_roots(a,b,c,d);
+    CHECK_EQUAL(roots[0], Real(1));
+    CHECK_NAN(roots[1]);
+    CHECK_NAN(roots[2]);
+
+    d = -2;
+    // x^3 - 2 = 0
+    roots = cubic_roots(a,b,c,d);
+    // Let's go for equality here!
+    CHECK_EQUAL(roots[0], cbrt(Real(2)));
+    CHECK_NAN(roots[1]);
+    CHECK_NAN(roots[2]);
+
+    d = -8;
+    roots = cubic_roots(a,b,c,d);
+    CHECK_EQUAL(roots[0], Real(2));
+    CHECK_NAN(roots[1]);
+    CHECK_NAN(roots[2]);
+
+
+    // (x-1)(x-2)(x-3) = x^3 - 6x^2 + 11x - 6
+    roots = cubic_roots(Real(1), Real(-6), Real(11), Real(-6));
+    CHECK_ULP_CLOSE(roots[0], Real(1), 2);
+    CHECK_ULP_CLOSE(roots[1], Real(2), 2);
+    CHECK_ULP_CLOSE(roots[2], Real(3), 2);
+
+    // Double root:
+    // (x+1)^2(x-2) = x^3 - 3x - 2:
+    // Note: This test is unstable wrt to perturbations!
+    roots = cubic_roots(Real(1), Real(0), Real(-3), Real(-2));
+    CHECK_ULP_CLOSE(-1, roots[0], 2);
+    CHECK_ULP_CLOSE(-1, roots[1], 2);
+    CHECK_ULP_CLOSE(2, roots[2], 2);
+
+    std::uniform_real_distribution<Real> dis(-2,2);
+    std::mt19937 gen(12345);
+    // Expected roots
+    std::array<Real, 3> r;
+    int trials = 10;
+    for (int i = 0; i < trials; ++i) {
+        // Mathematica:
+        // Expand[(x - r0)*(x - r1)*(x - r2)]
+        // - r0 r1 r2 + (r0 r1 + r0 r2 + r1 r2) x
+        // - (r0 + r1 + r2) x^2 + x^3
+        for (auto & root : r) {
+            root = static_cast<Real>(dis(gen));
+        }
+        std::sort(r.begin(), r.end());
+        Real a = 1;
+        Real b = -(r[0] + r[1] + r[2]);
+        Real c = r[0]*r[1] + r[0]*r[2] + r[1]*r[2];
+        Real d = -r[0]*r[1]*r[2];
+
+        auto roots = cubic_roots(a, b, c, d);
+        // I could check the condition number here, but this is fine right?
+        if(!CHECK_ULP_CLOSE(r[0], roots[0], 25)) {
+            std::cerr << "  Polynomial x^3 + " << b << "x^2 + " << c << "x + " << d << " has roots {";
+            std::cerr << r[0] << ", " << r[1] << ", " << r[2] << "}, but the computed roots are {";
+            std::cerr << roots[0] << ", " << roots[1] << ", " << roots[2] << "}\n";
+        }
+        CHECK_ULP_CLOSE(r[1], roots[1], 25);
+        CHECK_ULP_CLOSE(r[2], roots[2], 25);
+        for (auto root : roots) {
+            auto res = cubic_root_residual(a, b,c,d, root);
+            CHECK_LE(abs(res[0]), 20*res[1]);
+        }
+    }
+}
+
+
+int main()
+{
+    test_zero_coefficients<float>();
+    test_zero_coefficients<double>();
+#ifndef BOOST_MATH_NO_LONG_DOUBLE_MATH_FUNCTIONS
+    test_zero_coefficients<long double>();
+#endif
+
+#ifdef BOOST_HAS_FLOAT128
+    // For some reason, the quadmath is way less accurate than the float/double/long double:
+    //test_zero_coefficients<float128>();
+#endif
+
+
+    return boost::math::test::report_errors();
+}