mostly working optimizers

2026-02-26 16:52:27 +00:00 · 2025-10-09 16:23:59 +02:00
parent e2ce13e3fb
commit 6e5e7c8229
3823 changed files with 11070 additions and 691 deletions
--- a/test/test_functions_for_optimization.hpp
+++ b/test/test_functions_for_optimization.hpp
@@ -22,70 +22,84 @@ RealType quadratic(std::vector<RealType> &x)
    }
    return res;
 }
+
+template<typename RealType>
+RealType quadratic_high_cond_2D(std::vector<RealType> &x)
+{
+    return 1000 * x[0] * x[0] + x[1] * x[1];
+}
+
 // Taken from: https://en.wikipedia.org/wiki/Test_functions_for_optimization
-template <typename Real> Real ackley(std::array<Real, 2> const &v) {
-  using std::sqrt;
-  using std::cos;
-  using std::exp;
-  using boost::math::constants::two_pi;
-  using boost::math::constants::e;
-  Real x = v[0];
-  Real y = v[1];
-  Real arg1 = -sqrt((x * x + y * y) / 2) / 5;
-  Real arg2 = cos(two_pi<Real>() * x) + cos(two_pi<Real>() * y);
-  return -20 * exp(arg1) - exp(arg2 / 2) + 20 + e<Real>();
+template<typename Real>
+Real ackley(std::array<Real, 2> const &v)
+{
+    using boost::math::constants::e;
+    using boost::math::constants::two_pi;
+    using std::cos;
+    using std::exp;
+    using std::sqrt;
+    Real x    = v[0];
+    Real y    = v[1];
+    Real arg1 = -sqrt((x * x + y * y) / 2) / 5;
+    Real arg2 = cos(two_pi<Real>() * x) + cos(two_pi<Real>() * y);
+    return -20 * exp(arg1) - exp(arg2 / 2) + 20 + e<Real>();
 }

-template <typename Real> auto rosenbrock_saddle(std::array<Real, 2> const &v) -> Real {
-  Real x { v[0] };
-  Real y { v[1] };
-  return static_cast<Real>(100 * (x * x - y) * (x * x - y) + (1 - x) * (1 - x));
+template<typename Real>
+auto rosenbrock_saddle(std::array<Real, 2> const &v) -> Real
+{
+    Real x{v[0]};
+    Real y{v[1]};
+    return static_cast<Real>(100 * (x * x - y) * (x * x - y) + (1 - x) * (1 - x));
 }

-
-template <class Real> Real rastrigin(std::vector<Real> const &v) {
-  using std::cos;
-  using boost::math::constants::two_pi;
-  auto A = static_cast<Real>(10);
-  auto y = static_cast<Real>(10 * v.size());
-  for (auto x : v) {
-    y += x * x - A * cos(two_pi<Real>() * x);
-  }
-  return y;
+template<class Real>
+Real rastrigin(std::vector<Real> const &v)
+{
+    using boost::math::constants::two_pi;
+    using std::cos;
+    auto A = static_cast<Real>(10);
+    auto y = static_cast<Real>(10 * v.size());
+    for (auto x : v) {
+        y += x * x - A * cos(two_pi<Real>() * x);
+    }
+    return y;
 }

 // Useful for testing return-type != scalar argument type,
 // and robustness to NaNs:
-double sphere(std::vector<float> const &v) {
-  double r = 0.0;
-  for (auto x : v) {
-    double x_ = static_cast<double>(x);
-    r += x_ * x_;
-  }
-  if (r >= 1) {
-    return std::numeric_limits<double>::quiet_NaN();
-  }
-  return r;
+double sphere(std::vector<float> const &v)
+{
+    double r = 0.0;
+    for (auto x : v) {
+        double x_  = static_cast<double>(x);
+        r         += x_ * x_;
+    }
+    if (r >= 1) {
+        return std::numeric_limits<double>::quiet_NaN();
+    }
+    return r;
 }

 template<typename Real>
-Real three_hump_camel(std::array<Real, 2> const & v) {
-  Real x = v[0];
-  Real y = v[1];
-  auto xsq = x*x;
-  return 2*xsq - (1 + Real(1)/Real(20))*xsq*xsq  + xsq*xsq*xsq/6 + x*y + y*y;
+Real three_hump_camel(std::array<Real, 2> const &v)
+{
+    Real x   = v[0];
+    Real y   = v[1];
+    auto xsq = x * x;
+    return 2 * xsq - (1 + Real(1) / Real(20)) * xsq * xsq + xsq * xsq * xsq / 6 + x * y + y * y;
 }

 // Minima occurs at (3, 1/2) with value 0:
 template<typename Real>
-Real beale(std::array<Real, 2> const & v) {
-  Real x = v[0];
-  Real y = v[1];
-  Real t1 = Real(3)/Real(2) -x + x*y;
-  Real t2 = Real(9)/Real(4) -x  + x*y*y;
-  Real t3 = Real(21)/Real(8) -x  + x*y*y*y;
-  return t1*t1 + t2*t2 + t3*t3;
+Real beale(std::array<Real, 2> const &v)
+{
+    Real x  = v[0];
+    Real y  = v[1];
+    Real t1 = Real(3) / Real(2) - x + x * y;
+    Real t2 = Real(9) / Real(4) - x + x * y * y;
+    Real t3 = Real(21) / Real(8) - x + x * y * y * y;
+    return t1 * t1 + t2 * t2 + t3 * t3;
 }

-
 #endif
--- a/test/test_gradient_descent_optimizer.cpp
+++ b/test/test_gradient_descent_optimizer.cpp
@@ -49,7 +49,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(random_initializer_test, T, all_float_types)
    T                              lr = T{1e-2};
    std::vector<rdiff::rvar<T, 1>> x(N);

-    auto gdopt = bopt::make_gradient_descent(
+    auto                           gdopt = bopt::make_gradient_descent(
        &quadratic<rdiff::rvar<T, 1>>,
        x,
        lr,
@@ -88,7 +88,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(box_constraint_test, T, all_float_types)

    auto gdopt = bopt::make_gradient_descent(&quadratic<rdiff::rvar<T, 1>>, x, lr);

-    auto res = bopt::minimize(gdopt,
+    auto res   = bopt::minimize(gdopt,
                              bopt::box_constraints<std::vector<rdiff::rvar<T, 1>>, T>(-1.0, 1.0));

    for (auto& xi : x) {
@@ -102,7 +102,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(max_iter_test, T, all_float_types)
    T                              lr = T{1e-6}; // very slow learning
    std::vector<rdiff::rvar<T, 1>> x  = {T{5}, T{5}};

-    auto gdopt = bopt::make_gradient_descent(&quadratic<rdiff::rvar<T, 1>>, x, lr);
+    auto   gdopt    = bopt::make_gradient_descent(&quadratic<rdiff::rvar<T, 1>>, x, lr);

    size_t max_iter = 50;
    auto   res      = bopt::minimize(gdopt,
@@ -121,7 +121,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(history_tracking_test, T, all_float_types)

    auto gdopt = bopt::make_gradient_descent(&quadratic<rdiff::rvar<T, 1>>, x, lr);

-    auto res = bopt::minimize(gdopt,
+    auto res   = bopt::minimize(gdopt,
                              bopt::unconstrained_policy<std::vector<rdiff::rvar<T, 1>>>{},
                              bopt::gradient_norm_convergence_policy<T>(T{1e-6}),
                              bopt::max_iter_termination_policy(1000),
@@ -137,7 +137,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(rosenbrock_test, T, all_float_types)

    auto gdopt = bopt::make_gradient_descent(&rosenbrock_saddle<rdiff::rvar<T, 1>>, x, lr);

-    auto res = bopt::minimize(gdopt,
+    auto res   = bopt::minimize(gdopt,
                              bopt::unconstrained_policy<std::array<rdiff::rvar<T, 1>, 2>>{},
                              bopt::gradient_norm_convergence_policy<T>(T{1e-4}),
                              bopt::max_iter_termination_policy(50000));
@@ -145,113 +145,113 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(rosenbrock_test, T, all_float_types)
    BOOST_REQUIRE_CLOSE(x[0].item(), T{1.0}, T{1e-1});
    BOOST_REQUIRE_CLOSE(x[1].item(), T{1.0}, T{1e-1});
 }
-BOOST_AUTO_TEST_CASE(objective_tol_convergence_test)
+BOOST_AUTO_TEST_CASE_TEMPLATE(objective_tol_convergence_test, T, all_float_types)
 {
-    using policy_t = bopt::objective_tol_convergence_policy<double>;
-    policy_t            pol(1e-3);
-    std::vector<double> dummy_grad;
+    using policy_t = bopt::objective_tol_convergence_policy<T>;
+    policy_t       pol(1e-3);
+    std::vector<T> dummy_grad;

    BOOST_TEST(!pol(dummy_grad, 100.0));
    BOOST_TEST(!pol(dummy_grad, 99.0));
    BOOST_TEST(pol(dummy_grad, 99.0005));
 }

-BOOST_AUTO_TEST_CASE(relative_objective_tol_test)
+BOOST_AUTO_TEST_CASE_TEMPLATE(relative_objective_tol_test, T, all_float_types)
 {
-    using policy_t = bopt::relative_objective_tol_policy<double>;
-    policy_t pol(1e-3);
+    using policy_t = bopt::relative_objective_tol_policy<T>;
+    policy_t       pol(1e-3);

-    std::vector<double> dummy_grad;
+    std::vector<T> dummy_grad;
    BOOST_TEST(!pol(dummy_grad, 1000.0));
    BOOST_TEST(!pol(dummy_grad, 1010.0));
    BOOST_TEST(pol(dummy_grad, 1010.5));
 }

-BOOST_AUTO_TEST_CASE(combined_policy_test)
+BOOST_AUTO_TEST_CASE_TEMPLATE(combined_policy_test, T, all_float_types)
 {
-    using pol_abs  = bopt::objective_tol_convergence_policy<double>;
-    using pol_rel  = bopt::relative_objective_tol_policy<double>;
+    using pol_abs  = bopt::objective_tol_convergence_policy<T>;
+    using pol_rel  = bopt::relative_objective_tol_policy<T>;
    using pol_comb = bopt::combined_convergence_policy<pol_abs, pol_rel>;

-    pol_abs  abs_pol(1e-6);
-    pol_rel  rel_pol(1e-3);
-    pol_comb comb(abs_pol, rel_pol);
+    pol_abs        abs_pol(1e-6);
+    pol_rel        rel_pol(1e-3);
+    pol_comb       comb(abs_pol, rel_pol);

-    std::vector<double> dummy_grad;
+    std::vector<T> dummy_grad;

    BOOST_TEST(!comb(dummy_grad, 100.0));
    BOOST_TEST(!comb(dummy_grad, 110.0));
    BOOST_TEST(comb(dummy_grad, 110.1));
    BOOST_TEST(comb(dummy_grad, 110.1000001));
 }
-BOOST_AUTO_TEST_CASE(nonnegativity_constraint_test)
+BOOST_AUTO_TEST_CASE_TEMPLATE(nonnegativity_constraint_test, T, all_float_types)
 {
-    std::vector<double>                                         x = {1.0, -2.0, 3.0, -4.0};
-    bopt::nonnegativity_constraint<std::vector<double>, double> proj;
+    std::vector<T>                                    x = {1.0, -2.0, 3.0, -4.0};
+    bopt::nonnegativity_constraint<std::vector<T>, T> proj;
    proj(x);

    for (auto& xi : x)
        BOOST_TEST(xi >= 0.0);
-    BOOST_TEST(x == std::vector<double>({1.0, 0.0, 3.0, 0.0}));
+    BOOST_TEST(x == std::vector<T>({1.0, 0.0, 3.0, 0.0}));
 }

-BOOST_AUTO_TEST_CASE(l2_ball_constraint_test)
+BOOST_AUTO_TEST_CASE_TEMPLATE(l2_ball_constraint_test, T, all_float_types)
 {
-    std::vector<double>                                   x = {3.0, 4.0}; // norm = 5
-    bopt::l2_ball_constraint<std::vector<double>, double> proj(1.0);
+    std::vector<T>                              x = {3.0, 4.0}; // norm = 5
+    bopt::l2_ball_constraint<std::vector<T>, T> proj(1.0);
    proj(x);

-    double norm = std::sqrt(x[0] * x[0] + x[1] * x[1]);
-    BOOST_TEST(std::abs(norm - 1.0) < 1e-12); // projected to unit circle
+    T norm = sqrt(x[0] * x[0] + x[1] * x[1]);
+    BOOST_TEST(abs(norm - 1.0) < 1e-12); // projected to unit circle
 }

-BOOST_AUTO_TEST_CASE(l1_ball_constraint_test)
+BOOST_AUTO_TEST_CASE_TEMPLATE(l1_ball_constraint_test, T, all_float_types)
 {
-    std::vector<double>                                   x = {3.0, 4.0}; // L1 norm = 7
-    bopt::l1_ball_constraint<std::vector<double>, double> proj(2.0);
+    std::vector<T>                              x = {3.0, 4.0}; // L1 norm = 7
+    bopt::l1_ball_constraint<std::vector<T>, T> proj(2.0);
    proj(x);

-    double norm1 = std::abs(x[0]) + std::abs(x[1]);
-    BOOST_TEST(std::abs(norm1 - 2.0) < 1e-12);
+    T norm1 = abs(x[0]) + abs(x[1]);
+    BOOST_TEST(abs(norm1 - 2.0) < T{1e-12});
 }

-BOOST_AUTO_TEST_CASE(simplex_constraint_test)
+BOOST_AUTO_TEST_CASE_TEMPLATE(simplex_constraint_test, T, all_float_types)
 {
-    std::vector<double> x = {-1.0, 2.0, 3.0}; // has negative and sum != 1
-    bopt::simplex_constraint<std::vector<double>, double> proj;
+    std::vector<T>                              x = {-1.0, 2.0, 3.0}; // has negative and sum != 1
+    bopt::simplex_constraint<std::vector<T>, T> proj;
    proj(x);

-    double sum = 0.0;
+    T sum = 0.0;
    for (auto& xi : x) {
        BOOST_TEST(xi >= 0.0); // all nonnegative
        sum += xi;
    }
-    BOOST_TEST(std::abs(sum - 1.0) < 1e-12); // normalized to sum=1
+    BOOST_TEST(abs(sum - 1.0) < 1e-12); // normalized to sum=1
 }

-BOOST_AUTO_TEST_CASE(unit_sphere_constraint_test)
+BOOST_AUTO_TEST_CASE_TEMPLATE(unit_sphere_constraint_test, T, all_float_types)
 {
-    std::vector<double>                                       x = {0.3, 0.4}; // norm = 0.5
-    bopt::unit_sphere_constraint<std::vector<double>, double> proj;
+    std::vector<T>                                  x = {0.3, 0.4}; // norm = 0.5
+    bopt::unit_sphere_constraint<std::vector<T>, T> proj;
    proj(x);

-    double norm = std::sqrt(x[0] * x[0] + x[1] * x[1]);
-    BOOST_TEST(std::abs(norm - 1.0) < 1e-12); // always projected to sphere
+    T norm = sqrt(x[0] * x[0] + x[1] * x[1]);
+    BOOST_TEST(abs(norm - 1.0) < 1e-12); // always projected to sphere
 }

-BOOST_AUTO_TEST_CASE(function_constraint_test)
+BOOST_AUTO_TEST_CASE_TEMPLATE(function_constraint_test, T, all_float_types)
 {
-    auto clip_to_half = [](std::vector<double>& v) {
+    auto clip_to_half = [](std::vector<T>& v) {
        for (auto& xi : v)
            if (xi > 0.5)
                xi = 0.5;
    };

-    bopt::function_constraint<std::vector<double>> proj(clip_to_half);
-    std::vector<double>                            x = {0.2, 0.7, 1.5};
+    bopt::function_constraint<std::vector<T>> proj(clip_to_half);
+    std::vector<T>                            x = {0.2, 0.7, 1.5};
    proj(x);

-    BOOST_TEST(x == std::vector<double>({0.2, 0.5, 0.5}));
+    BOOST_TEST(x == std::vector<T>({0.2, 0.5, 0.5}));
 }

 template<typename RealType>
@@ -273,26 +273,16 @@ struct analytic_objective_eval_pol
 template<typename RealType>
 struct analytic_gradient_eval_pol
 {
-    std::vector<RealType> grad_container;
-    template<typename ArgumentContainer>
-    void bind(ArgumentContainer& x, std::vector<std::reference_wrapper<RealType>>& g)
-    {
-        grad_container.resize(x.size(), RealType{0.0});
-        g.clear();
-        g.reserve(x.size());
-        for (size_t i = 0; i < x.size(); ++i) {
-            g.push_back(std::ref(grad_container[i]));
-        }
-    }
    template<class Objective, class ArgumentContainer, class FunctionEvaluationPolicy>
    void operator()(Objective&&                obj_f,
                    ArgumentContainer&         x,
                    FunctionEvaluationPolicy&& f_eval_pol,
-                    RealType&                  obj_v)
+                    RealType&                  obj_v,
+                    std::vector<RealType>&     grad_container)
    {
-        // compute objective via eval policy that takes care of tape
        RealType v = f_eval_pol(obj_f, x);
        obj_v      = v;
+        grad_container.resize(x.size());
        for (size_t i = 0; i < x.size(); ++i) {
            grad_container[i] = 2 * x[i];
        }
--- a/test/test_lbfgs.cpp
+++ b/test/test_lbfgs.cpp
@@ -0,0 +1,34 @@
+#include "test_autodiff_reverse.hpp" // reuse for same test infra
+#include "test_functions_for_optimization.hpp"
+#include <boost/math/differentiation/autodiff_reverse.hpp>
+#include <boost/math/optimization/lbfgs.hpp>
+#include <boost/math/optimization/minimizer.hpp>
+
+namespace rdiff = boost::math::differentiation::reverse_mode;
+namespace bopt = boost::math::optimization;
+
+BOOST_AUTO_TEST_SUITE(basic_lbfgs)
+
+BOOST_AUTO_TEST_CASE(default_lbfgs_test) {
+  using T = double;
+  constexpr size_t NITER = 10;
+  constexpr size_t M = 10;
+  const T eps = T{1e-8};
+
+  RandomSample<T> rng{T(-10), T(10)};
+  std::array<rdiff::rvar<T, 1>, 2> x;
+  x[0] = rng.next();
+  x[1] = rng.next();
+
+  auto opt = bopt::make_lbfgs<decltype(&rosenbrock_saddle<rdiff::rvar<T, 1>>),
+                              std::array<rdiff::rvar<T, 1>, 2>, T>(
+      &rosenbrock_saddle<rdiff::rvar<T, 1>>, x, M);
+
+  auto result = minimize(opt);
+  std::cout << result << std::endl;
+  for (auto &xi : x) {
+    BOOST_REQUIRE_CLOSE(xi, T{1.0}, eps);
+  }
+}
+
+BOOST_AUTO_TEST_SUITE_END()
--- a/test/test_nesterov_optimizer.cpp
+++ b/test/test_nesterov_optimizer.cpp
@@ -0,0 +1,26 @@
+#include "test_autodiff_reverse.hpp" // reuse for some basic options
+#include "test_functions_for_optimization.hpp"
+#include <boost/math/differentiation/autodiff_reverse.hpp>
+#include <boost/math/optimization/minimizer.hpp>
+#include <boost/math/optimization/nesterov.hpp>
+namespace rdiff = boost::math::differentiation::reverse_mode;
+namespace bopt  = boost::math::optimization;
+BOOST_AUTO_TEST_SUITE(basic_gradient_descent)
+
+BOOST_AUTO_TEST_CASE_TEMPLATE(default_nesterov_test, T, all_float_types)
+{
+    size_t                         NITER = 5;
+    T                              lr    = T{1e-3};
+    T                              mu    = T{0.95};
+    RandomSample<T>                rng{T(-10), (10)};
+    std::vector<rdiff::rvar<T, 1>> x;
+    x.push_back(rng.next());
+    x.push_back(rng.next());
+    T    eps = T{1e-8};
+    auto nag = bopt::make_nag(&quadratic_high_cond_2D<rdiff::rvar<T, 1>>, x, lr, mu);
+    auto z   = minimize(nag);
+    for (auto& xi : x) {
+        BOOST_REQUIRE_SMALL(xi.item(), eps);
+    }
+}
+BOOST_AUTO_TEST_SUITE_END()