updates

2026-02-24 04:02:18 +00:00 · 2025-10-17 19:02:59 +02:00
parent 4bef323a75
commit 86a06f4131
12 changed files with 1394 additions and 641 deletions
--- a/include/boost/math/optimization/detail/differentiable_opt_utilties.hpp
+++ b/include/boost/math/optimization/detail/differentiable_opt_utilties.hpp
@@ -1,5 +1,10 @@
+//           Copyright Maksym Zhelyenzyakov 2025-2026.
+// Distributed under the Boost Software License, Version 1.0.
+//      (See accompanying file LICENSE_1_0.txt or copy at
+//           https://www.boost.org/LICENSE_1_0.txt)
 #ifndef DIFFERENTIABLE_OPT_UTILITIES_HPP
 #define DIFFERENTIABLE_OPT_UTILITIES_HPP
+#include <boost/math/differentiation/autodiff_reverse.hpp>
 #include <boost/random/mersenne_twister.hpp>
 #include <boost/random/uniform_real_distribution.hpp>
 #include <cmath>
@@ -10,47 +15,92 @@
 namespace boost {
 namespace math {
 namespace optimization {
-template <typename UpdPol> struct update_policy_real_type;

-template <typename UpdPol> struct update_policy_real_type;
+namespace rdiff = boost::math::differentiation::reverse_mode;

-template <template <typename> class UpdPol, typename RealType>
-struct update_policy_real_type<UpdPol<RealType>> {
+/** @brief> helper to get the underlying realtype from
+ *  update policy
+ *  */
+template<typename UpdPol>
+struct update_policy_real_type;
+
+template<template<typename> class UpdPol, typename RealType>
+struct update_policy_real_type<UpdPol<RealType>>
+{
  using type = RealType;
 };

-template <typename UpdPol>
+template<typename UpdPol>
 using update_policy_real_type_t =
-    typename update_policy_real_type<typename std::decay<UpdPol>::type>::type;
+  typename update_policy_real_type<typename std::decay<UpdPol>::type>::type;

+/** @brief> get realtype from argument container
+ *  */
+template<class Container>
+struct argument_container_t;
+
+template<template<typename, typename...> class Container,
+         typename ValueType,
+         typename... Args>
+struct argument_container_t<Container<ValueType, Args...>>
+{
+  using type = ValueType;
+};
+template<template<typename, typename...> class Container,
+         typename RealType,
+         int N,
+         typename... Args>
+struct argument_container_t<Container<rdiff::rvar<RealType, N>, Args...>>
+{
+  using type = RealType;
+};
+template<typename ValueType, std::size_t N>
+struct argument_container_t<std::array<ValueType, N>>
+{
+  using type = ValueType;
+};
+
+template<typename RealType, int M, std::size_t N>
+struct argument_container_t<std::array<rdiff::rvar<RealType, M>, N>>
+{
+  using type = RealType;
+};
 /******************************************************************************/
 /** @brief simple blas helpers
 * may optimize later if benchmarks show its needed, or just switch to Eigen
 */
-template <typename Container>
-auto dot(const Container &x, const Container &y) ->
-    typename Container::value_type {
+template<typename Container>
+auto
+dot(const Container& x, const Container& y) -> typename Container::value_type
+{
  using T = typename Container::value_type;
  BOOST_MATH_ASSERT(x.size() == y.size());
  return std::inner_product(x.begin(), x.end(), y.begin(), T(0));
 }

-template <typename Container>
-auto norm_2(const Container &x) -> typename Container::value_type {
+template<typename Container>
+auto
+norm_2(const Container& x) -> typename Container::value_type
+{
  return sqrt(dot(x, x));
 }

-template <typename Container>
-auto norm_1(const Container &x) -> typename Container::value_type {
+template<typename Container>
+auto
+norm_1(const Container& x) -> typename Container::value_type
+{
  using T = typename Container::value_type;
-  T ret{0};
-  for (auto &xi : x) {
+  T ret{ 0 };
+  for (auto& xi : x) {
    ret += abs(xi);
  }
  return ret;
 }

-template <typename T> T norm_inf(const std::vector<T> &x) {
+template<typename T>
+T
+norm_inf(const std::vector<T>& x)
+{
  assert(!x.empty());

  T max_val = std::abs(x[0]);
@@ -64,17 +114,21 @@ template <typename T> T norm_inf(const std::vector<T> &x) {
  return max_val;
 }
 /** @brief alpha*x (alpha is scalar, x is vector */
-template <typename Container, typename RealType>
-void scale(Container &x, const RealType &alpha) {
-  for (auto &xi : x) {
+template<typename Container, typename RealType>
+void
+scale(Container& x, const RealType& alpha)
+{
+  for (auto& xi : x) {
    xi *= alpha;
  }
 }

 /** @brief y += alpha * x
 */
-template <typename Container, typename RealType>
-void axpy(RealType alpha, const Container &x, Container &y) {
+template<typename ContainerX, typename ContainerY, typename RealType>
+void
+axpy(RealType alpha, const ContainerX& x, ContainerY& y)
+{
  BOOST_MATH_ASSERT(x.size() == y.size());
  const size_t n = x.size();
  for (size_t i = 0; i < n; ++i) {
@@ -82,7 +136,10 @@ void axpy(RealType alpha, const Container &x, Container &y) {
  }
 }
 /******************************************************************************/
-template <typename RealType> std::vector<RealType> random_vector(size_t n) {
+template<typename RealType>
+std::vector<RealType>
+random_vector(size_t n)
+{
  /** @brief> generates a random std::vector<RealType> of size n
   * using mt19937 algorithm
   */
@@ -92,7 +149,7 @@ template <typename RealType> std::vector<RealType> random_vector(size_t n) {
   *
   * TODO: benchmark.
   */
-  static boost::random::mt19937 rng{std::random_device{}()};
+  static boost::random::mt19937 rng{ std::random_device{}() };
  static boost::random::uniform_real_distribution<RealType> dist(0.0, 1.0);

  std::vector<RealType> result(n);
--- a/include/boost/math/optimization/detail/gradient_opt_base.hpp
+++ b/include/boost/math/optimization/detail/gradient_opt_base.hpp
@@ -1,3 +1,7 @@
+//           Copyright Maksym Zhelyenzyakov 2025-2026.
+// Distributed under the Boost Software License, Version 1.0.
+//      (See accompanying file LICENSE_1_0.txt or copy at
+//           https://www.boost.org/LICENSE_1_0.txt)
 #ifndef GRADIENT_OPT_BASE_HPP
 #define GRADIENT_OPT_BASE_HPP
 #include <boost/math/differentiation/autodiff_reverse.hpp>
@@ -11,15 +15,27 @@ namespace rdiff = boost::math::differentiation::reverse_mode;
 /**
 * @brief The abstract_optimizer class implementing common variables
 * and methods across optimizers
+ *
+ * @tparam> ArgumentContainer
+ * @tparam> RealType
+ * @tparam> Objective
+ * @tparam> InitializationPolicy
+ *
 */

-template <typename ArgumentContainer, typename RealType, class Objective,
-          class InitializationPolicy, class ObjectiveEvalPolicy,
-          class GradEvalPolicy, class UpdatePolicy, typename DerivedOptimizer>
-class abstract_optimizer {
+template<typename ArgumentContainer,
+         typename RealType,
+         class Objective,
+         class InitializationPolicy,
+         class ObjectiveEvalPolicy,
+         class GradEvalPolicy,
+         class UpdatePolicy,
+         typename DerivedOptimizer>
+class abstract_optimizer
+{
 protected:
  Objective objective_;          // obj function
-  ArgumentContainer &x_;         // arguments to objective function
+  ArgumentContainer& x_;         // arguments to objective function
  std::vector<RealType> g_;      // container of references to gradients
  ObjectiveEvalPolicy obj_eval_; // how to evaluate your funciton
  GradEvalPolicy grad_eval_;     // how to evaluate/bind gradients
@@ -27,12 +43,14 @@ protected:
  UpdatePolicy update_;          // update step
  RealType obj_v_;               // objective value (for history)
  // access derived class
-  DerivedOptimizer &derived() { return static_cast<DerivedOptimizer &>(*this); }
-  const DerivedOptimizer &derived() const {
-    return static_cast<const DerivedOptimizer &>(*this);
+  DerivedOptimizer& derived() { return static_cast<DerivedOptimizer&>(*this); }
+  const DerivedOptimizer& derived() const
+  {
+    return static_cast<const DerivedOptimizer&>(*this);
  }

-  void step_impl() {
+  void step_impl()
+  {
    grad_eval_(objective_, x_, obj_eval_, obj_v_, g_);
    for (size_t i = 0; i < x_.size(); ++i) {
      update_(x_[i], g_[i]);
@@ -43,25 +61,30 @@ public:
  using argument_container_t = ArgumentContainer;
  using real_type_t = RealType;

-  abstract_optimizer(Objective &&objective, ArgumentContainer &x,
-                     InitializationPolicy &&ip, ObjectiveEvalPolicy &&oep,
-                     GradEvalPolicy &&gep, UpdatePolicy &&up)
-      : objective_(std::forward<Objective>(objective)), x_(x),
-        obj_eval_(std::forward<ObjectiveEvalPolicy>(oep)),
-        grad_eval_(std::forward<GradEvalPolicy>(gep)),
-        init_(std::forward<InitializationPolicy>(ip)),
-        update_(std::forward<UpdatePolicy>(up)) {
+  abstract_optimizer(Objective&& objective,
+                     ArgumentContainer& x,
+                     InitializationPolicy&& ip,
+                     ObjectiveEvalPolicy&& oep,
+                     GradEvalPolicy&& gep,
+                     UpdatePolicy&& up)
+    : objective_(std::forward<Objective>(objective))
+    , x_(x)
+    , obj_eval_(std::forward<ObjectiveEvalPolicy>(oep))
+    , grad_eval_(std::forward<GradEvalPolicy>(gep))
+    , init_(std::forward<InitializationPolicy>(ip))
+    , update_(std::forward<UpdatePolicy>(up))
+  {
    init_(x_);            // initialize your problem
    g_.resize(x_.size()); // initialize space for gradients
  }

-  ArgumentContainer &arguments() { return derived().x_; }
-  const ArgumentContainer &arguments() const { return derived().x_; }
+  ArgumentContainer& arguments() { return derived().x_; }
+  const ArgumentContainer& arguments() const { return derived().x_; }

-  RealType &objective_value() { return derived().obj_v_; }
-  const RealType &objective_value() const { return derived().obj_v_; }
-  std::vector<RealType> &gradients() { return derived().g_; }
-  const std::vector<RealType> &gradients() const { return derived().g_; }
+  RealType& objective_value() { return derived().obj_v_; }
+  const RealType& objective_value() const { return derived().obj_v_; }
+  std::vector<RealType>& gradients() { return derived().g_; }
+  const std::vector<RealType>& gradients() const { return derived().g_; }
 };
 } // namespace optimization
 } // namespace math
--- a/include/boost/math/optimization/detail/line_search_policies.hpp
+++ b/include/boost/math/optimization/detail/line_search_policies.hpp
@@ -1,6 +1,12 @@
+//           Copyright Maksym Zhelyenzyakov 2025-2026.
+// Distributed under the Boost Software License, Version 1.0.
+//      (See accompanying file LICENSE_1_0.txt or copy at
+//           https://www.boost.org/LICENSE_1_0.txt)
+
 #ifndef LINE_SEARCH_POLICIES_HPP
 #define LINE_SEARCH_POLICIES_HPP

+#include <boost/math/optimization/detail/differentiable_opt_utilties.hpp>
 #include <cmath>
 #include <iostream>
 #include <numeric>
@@ -13,9 +19,18 @@ namespace optimization {
 * @brief> Armijo condition backtracking line search
 * https://en.wikipedia.org/wiki/Backtracking_line_search
 *
- *  f(x+alpha p) <= f(x) + alpha * c * grad(f)^T p
- * */
-template <typename RealType> class armijo_line_search_policy {
+ *  f(x+alpha p) <= f(x) + alpha * c * grad(f)^T
+ *
+ * Jorge Nocedal and Stephen J. Wright,
+ *  Numerical Optimization, 2nd Edition,
+ *  Springer, 2006.
+ *
+ *  Algorithm 3.1: Backtracking Line Search
+ *  (Page 37)
+ */
+template<typename RealType>
+class armijo_line_search_policy
+{
 private:
  RealType alpha0_; // initial step size
  RealType c_;      // sufficient decrease constant
@@ -23,35 +38,170 @@ private:
  int max_iter_;    // maximum backtracking steps

 public:
-  armijo_line_search_policy(RealType alpha0 = 1.0, RealType c = 1e-4,
-                            RealType rho = 0.5, int max_iter = 20)
-      : alpha0_(alpha0), c_(c), rho_(rho), max_iter_(max_iter) {}
+  armijo_line_search_policy(RealType alpha0 = 1.0,
+                            RealType c = 1e-4,
+                            RealType rho = 0.5,
+                            int max_iter = 20)
+    : alpha0_(alpha0)
+    , c_(c)
+    , rho_(rho)
+    , max_iter_(max_iter)
+  {
+  }

-  template <class Objective, class ObjectiveEvalPolicy,
-            class GradientEvalPolicy, class ArgumentContainer>
-  RealType operator()(Objective &objective, ObjectiveEvalPolicy &obj_eval,
-                      GradientEvalPolicy &grad_eval, ArgumentContainer &x,
-                      const std::vector<RealType> &g,
-                      const std::vector<RealType> &p, RealType f_x) const {
+  template<class Objective,
+           class ObjectiveEvalPolicy,
+           class GradientEvalPolicy,
+           class ArgumentContainer>
+  RealType operator()(Objective& objective,
+                      ObjectiveEvalPolicy& obj_eval,
+                      GradientEvalPolicy& grad_eval,
+                      ArgumentContainer& x,
+                      const std::vector<RealType>& g,
+                      const std::vector<RealType>& p,
+                      RealType f_x) const
+  {
    /** @brief> line search
     * */
    RealType alpha = alpha0_;
-    ArgumentContainer x_trial = x; // copy
+    ArgumentContainer x_trial; // = x; // copy
    const RealType gTp = dot(g, p);

    for (int iter = 0; iter < max_iter_; ++iter) {
-      for (size_t i = 0; i < x.size(); ++i)
-        x_trial[i] = x[i] + alpha * p[i];
+      x_trial = x;
+      axpy(alpha, p, x_trial);
      auto f_trial = obj_eval(objective, x_trial);
      if (f_trial <=
          f_x + c_ * alpha * gTp) // check if armijo condition is satisfied
        return alpha;
-      alpha *= rho_; // half by default
+      alpha *= rho_;
    }
    return alpha;
  }
 };

+/** @brief> Strong-Wolfe line search:
+ *  Jorge Nocedal and Stephen J. Wright,
+ *  Numerical Optimization, 2nd Edition,
+ *  Springer, 2006.
+ *
+ *  Algorithm 3.5 — Line Search Algorithm (Strong Wolfe Conditions)
+ *  Pages 60–61
+ */
+template<typename RealType>
+class strong_wolfe_line_search_policy
+{
+private:
+  RealType alpha0_; // initial step size
+  RealType c1_;     // Armijo constant (sufficient decrease)
+  RealType c2_;     // curvature constant
+  RealType rho_;    // backtracking factor
+  int max_iter_;    // maximum iterations
+public:
+  strong_wolfe_line_search_policy(RealType alpha0 = 1.0,
+                                  RealType c1 = 1e-4,
+                                  RealType c2 = 0.9,
+                                  RealType rho = 2.0,
+                                  int max_iter = 20)
+    : alpha0_(alpha0)
+    , c1_(c1)
+    , c2_(c2)
+    , rho_(rho)
+    , max_iter_(max_iter)
+  {
+  }
+  template<class Objective,
+           class ObjectiveEvalPolicy,
+           class GradientEvalPolicy,
+           class ArgumentContainer>
+  RealType operator()(Objective& objective,
+                      ObjectiveEvalPolicy& obj_eval,
+                      GradientEvalPolicy& grad_eval,
+                      ArgumentContainer& x,
+                      const std::vector<RealType>& g,
+                      const std::vector<RealType>& p,
+                      RealType f_x) const
+  {
+    RealType gTp0 = dot(g, p);
+    RealType alpha_prev = 0;
+    RealType f_prev = f_x;
+    RealType alpha = alpha0_;
+
+    ArgumentContainer x_trial;
+    std::vector<RealType> g_trial(g.size());
+    for (int i = 0; i < max_iter_; ++i) {
+      x_trial = x; // explicit copy
+      axpy(alpha, p, x_trial);
+      RealType f_trial = static_cast<RealType>(obj_eval(objective, x_trial));
+      if ((f_trial > f_x + c1_ * alpha * gTp0) ||
+          (i > 0 && f_trial >= f_prev)) {
+        return zoom(
+          objective, obj_eval, grad_eval, x, p, f_x, gTp0, alpha_prev, alpha);
+      }
+      grad_eval(objective, x_trial, obj_eval, f_trial, g_trial);
+      RealType gTp = dot(g_trial, p);
+
+      if (fabs(gTp) <= c2_ * fabs(gTp0)) {
+        return alpha;
+      }
+
+      if (gTp >= 0) {
+        return zoom(
+          objective, obj_eval, grad_eval, x, p, f_x, gTp0, alpha, alpha_prev);
+      }
+      alpha_prev = alpha;
+      f_prev = f_trial;
+      alpha *= rho_;
+    }
+
+    return alpha;
+  }
+
+private:
+  template<class Objective,
+           class ObjectiveEvalPolicy,
+           class GradientEvalPolicy,
+           class ArgumentContainer>
+  RealType zoom(Objective& objective,
+                ObjectiveEvalPolicy& obj_eval,
+                GradientEvalPolicy& grad_eval,
+                ArgumentContainer& x,
+                const std::vector<RealType>& p,
+                RealType f_x,
+                RealType gTp0,
+                RealType alpha_lo,
+                RealType alpha_hi) const
+  {
+    ArgumentContainer x_trial;
+    std::vector<RealType> g_trial(p.size());
+
+    for (int iter = 0; iter < max_iter_; ++iter) {
+      const RealType alpha_mid = 0.5 * (alpha_lo + alpha_hi);
+      x_trial = x;
+      axpy(alpha_mid, p, x_trial);
+      RealType f_mid;
+      grad_eval(objective, x_trial, obj_eval, f_mid, g_trial);
+      RealType gTp_mid = dot(g_trial, p);
+      ArgumentContainer x_lo = x;
+      axpy(alpha_lo, p, x_lo);
+      RealType f_lo = static_cast<RealType>(obj_eval(objective, x_lo));
+      if ((f_mid > f_x + c1_ * alpha_mid * gTp0) || (f_mid >= f_lo)) {
+        alpha_hi = alpha_mid;
+      } else {
+        if (fabs(gTp_mid) <= c2_ * fabs(gTp0)) {
+          return alpha_mid;
+        }
+        if (gTp_mid * (alpha_hi - alpha_lo) >= 0) {
+          alpha_hi = alpha_lo;
+        }
+        alpha_lo = alpha_mid;
+      }
+    }
+
+    return 0.5 * (alpha_lo + alpha_hi);
+  }
+};
+
 } // namespace optimization
 } // namespace math
 } // namespace boost
--- a/include/boost/math/optimization/detail/rdiff_optimization_policies.hpp
+++ b/include/boost/math/optimization/detail/rdiff_optimization_policies.hpp
@@ -1,3 +1,7 @@
+//           Copyright Maksym Zhelyenzyakov 2025-2026.
+// Distributed under the Boost Software License, Version 1.0.
+//      (See accompanying file LICENSE_1_0.txt or copy at
+//           https://www.boost.org/LICENSE_1_0.txt)
 #ifndef RDIFF_OPTIMIZATION_POLICIES_HPP__
 #define RDIFF_OPTIMIZATION_POLICIES_HPP__

@@ -11,24 +15,29 @@ namespace optimization {

 namespace rdiff = boost::math::differentiation::reverse_mode;

-/******************************************************************
+/******************************************************************/
+/**
 * @brief> function evaluation policy for reverse mode autodiff
 * @arg objective> objective function to evaluate
 * @arg x> argument list
 */
-template <typename RealType>
-struct reverse_mode_function_eval_policy {
-  template <typename Objective, class ArgumentContainer>
-  rdiff::rvar<RealType, 1> operator()(Objective &&objective,
-                                      ArgumentContainer &x) {
-    auto &tape = rdiff::get_active_tape<RealType, 1>();
+template<typename RealType>
+struct reverse_mode_function_eval_policy
+{
+  template<typename Objective, class ArgumentContainer>
+  rdiff::rvar<RealType, 1> operator()(Objective&& objective,
+                                      ArgumentContainer& x)
+  {
+    auto& tape = rdiff::get_active_tape<RealType, 1>();
    tape.zero_grad();
    tape.rewind_to_last_checkpoint();

    return objective(x);
  }
 };
-/******************************************************************
+/******************************************************************/
+
+/**
 * @brief> gradient evaluation policy
 * @arg obj_f> objective
 * @arg x> argument list
@@ -36,13 +45,18 @@ struct reverse_mode_function_eval_policy {
 *                    done in tandem
 * @arg obj_v> reference to variable inside gradient class
 */
-template <typename RealType>
-struct reverse_mode_gradient_evaluation_policy {
-  template <class Objective, class ArgumentContainer,
-            class FunctionEvaluationPolicy>
-  void operator()(Objective &&obj_f, ArgumentContainer &x,
-                  FunctionEvaluationPolicy &&f_eval_pol, RealType &obj_v,
-                  std::vector<RealType> &g) {
+template<typename RealType>
+struct reverse_mode_gradient_evaluation_policy
+{
+  template<class Objective,
+           class ArgumentContainer,
+           class FunctionEvaluationPolicy>
+  void operator()(Objective&& obj_f,
+                  ArgumentContainer& x,
+                  FunctionEvaluationPolicy&& f_eval_pol,
+                  RealType& obj_v,
+                  std::vector<RealType>& g)
+  {
    // compute objective via eval policy that takes care of tape
    rdiff::rvar<RealType, 1> v = f_eval_pol(obj_f, x);
    v.backward();
@@ -58,54 +72,64 @@ struct reverse_mode_gradient_evaluation_policy {
 /******************************************************************
 * init policies
 */
-template <typename RealType>
-struct tape_initializer_rvar {
-  template <class ArgumentContainer>
-  void operator()(ArgumentContainer &) const noexcept {
+template<typename RealType>
+struct tape_initializer_rvar
+{
+  template<class ArgumentContainer>
+  void operator()(ArgumentContainer&) const noexcept
+  {
    static_assert(
-        std::is_same<typename ArgumentContainer::value_type,
-                     rdiff::rvar<RealType, 1>>::value,
-        "ArgumentContainer::value_type must be rdiff::rvar<RealType,1>");
-    auto &tape = rdiff::get_active_tape<RealType, 1>();
+      std::is_same<typename ArgumentContainer::value_type,
+                   rdiff::rvar<RealType, 1>>::value,
+      "ArgumentContainer::value_type must be rdiff::rvar<RealType,1>");
+    auto& tape = rdiff::get_active_tape<RealType, 1>();
    tape.add_checkpoint();
  }
 };

-template <typename RealType>
-struct random_uniform_initializer_rvar {
+template<typename RealType>
+struct random_uniform_initializer_rvar
+{
  RealType low_, high_;
  size_t seed_;
-  random_uniform_initializer_rvar(RealType low = 0, RealType high = 1,
+  random_uniform_initializer_rvar(RealType low = 0,
+                                  RealType high = 1,
                                  size_t seed = std::random_device{}())
-      : low_(low), high_(high), seed_(seed) {};
+    : low_(low)
+    , high_(high)
+    , seed_(seed) {};

-  template <class ArgumentContainer>
-  void operator()(ArgumentContainer &x) const {
+  template<class ArgumentContainer>
+  void operator()(ArgumentContainer& x) const
+  {
    boost::random::mt19937 gen(seed_);
    boost::random::uniform_real_distribution<RealType> dist(low_, high_);
-    for (auto &xi : x) {
+    for (auto& xi : x) {
      xi = rdiff::rvar<RealType, 1>(dist(gen));
    }
-    auto &tape = rdiff::get_active_tape<RealType, 1>();
+    auto& tape = rdiff::get_active_tape<RealType, 1>();
    tape.add_checkpoint();
  }
 };

-template <typename RealType>
-struct costant_initializer_rvar {
+template<typename RealType>
+struct costant_initializer_rvar
+{
  RealType constant;
-  explicit costant_initializer_rvar(RealType v = 0) : constant(v) {};
-  template <class ArgumentContainer>
-  void operator()(ArgumentContainer &x) const {
-    for (auto &xi : x) {
+  explicit costant_initializer_rvar(RealType v = 0)
+    : constant(v) {};
+  template<class ArgumentContainer>
+  void operator()(ArgumentContainer& x) const
+  {
+    for (auto& xi : x) {
      xi = rdiff::rvar<RealType, 1>(constant);
    }
-    auto &tape = rdiff::get_active_tape<RealType, 1>();
+    auto& tape = rdiff::get_active_tape<RealType, 1>();
    tape.add_checkpoint();
  }
 };
-}  // namespace optimization
-}  // namespace math
-}  // namespace boost
+} // namespace optimization
+} // namespace math
+} // namespace boost

 #endif
--- a/include/boost/math/optimization/gradient_descent.hpp
+++ b/include/boost/math/optimization/gradient_descent.hpp
@@ -1,3 +1,7 @@
+//           Copyright Maksym Zhelyenzyakov 2025-2026.
+// Distributed under the Boost Software License, Version 1.0.
+//      (See accompanying file LICENSE_1_0.txt or copy at
+//           https://www.boost.org/LICENSE_1_0.txt)
 #ifndef GRADIENT_DESCENT_HPP
 #define GRADIENT_DESCENT_HPP
 #include <boost/math/optimization/detail/differentiable_opt_utilties.hpp>
@@ -9,27 +13,31 @@ namespace boost {
 namespace math {
 namespace optimization {

-template <typename RealType> struct gradient_descent_update_policy {
+template<typename RealType>
+struct gradient_descent_update_policy
+{
  RealType lr_;
-  gradient_descent_update_policy(RealType lr) : lr_(lr) {};
+  gradient_descent_update_policy(RealType lr)
+    : lr_(lr) {};

-  template <typename ArgumentType,
-            typename = typename std::enable_if<
-                boost::math::differentiation::reverse_mode::detail::
-                    is_expression<ArgumentType>::value>::type>
-  void operator()(ArgumentType &x, RealType &g) {
+  template<typename ArgumentType,
+           typename = typename std::enable_if<
+             boost::math::differentiation::reverse_mode::detail::is_expression<
+               ArgumentType>::value>::type>
+  void operator()(ArgumentType& x, RealType& g)
+  {
    // this update effectively "mutes" the tape
    // TODO: add a tape scope guard method so that
    // you can do math on autodiff types without
    // accumulating gradients
    x.get_value() -= lr_ * g;
  }
-  template <
-      typename ArgumentType,
-      typename std::enable_if<!boost::math::differentiation::reverse_mode::
-                                  detail::is_expression<ArgumentType>::value,
-                              int>::type = 0>
-  void operator()(ArgumentType &x, RealType &g) const {
+  template<typename ArgumentType,
+           typename std::enable_if<!boost::math::differentiation::reverse_mode::
+                                     detail::is_expression<ArgumentType>::value,
+                                   int>::type = 0>
+  void operator()(ArgumentType& x, RealType& g) const
+  {
    x -= lr_ * g;
  }
 };
@@ -43,34 +51,49 @@ template <typename RealType> struct gradient_descent_update_policy {
 * general optimization framework.
 *
 * @tparam> ArgumentContainer Type of the parameter container (e.g.
- * std::vector<SomeDifferentiableType or RealType>).
+ *          std::vector<SomeDifferentiableType or RealType>).
 * @tparam> RealType          Floating-point type
 * @tparam> Objective         Objective function type (functor or callable).
 * @tparam> InitializationPolicy Policy controlling initialization of
- * differentiable variables.
+ *          differentiable variables.
 * @tparam> ObjectiveEvalPolicy  Policy defining how the objective is evaluated.
 * @tparam> GradEvalPolicy       Policy defining how the gradient is computed.
 */

-template <typename ArgumentContainer, typename RealType, class Objective,
-          class InitializationPolicy, class ObjectiveEvalPolicy,
-          class GradEvalPolicy>
+template<typename ArgumentContainer,
+         typename RealType,
+         class Objective,
+         class InitializationPolicy,
+         class ObjectiveEvalPolicy,
+         class GradEvalPolicy>
 class gradient_descent
-    : public abstract_optimizer<
-          ArgumentContainer, RealType, Objective, InitializationPolicy,
-          ObjectiveEvalPolicy, GradEvalPolicy,
-          gradient_descent_update_policy<RealType>,
-          gradient_descent<ArgumentContainer, RealType, Objective,
-                           InitializationPolicy, ObjectiveEvalPolicy,
-                           GradEvalPolicy>> {
-  using base_opt =
-      abstract_optimizer<ArgumentContainer, RealType, Objective,
-                         InitializationPolicy, ObjectiveEvalPolicy,
-                         GradEvalPolicy,
-                         gradient_descent_update_policy<RealType>,
-                         gradient_descent<ArgumentContainer, RealType,
-                                          Objective, InitializationPolicy,
-                                          ObjectiveEvalPolicy, GradEvalPolicy>>;
+  : public abstract_optimizer<ArgumentContainer,
+                              RealType,
+                              Objective,
+                              InitializationPolicy,
+                              ObjectiveEvalPolicy,
+                              GradEvalPolicy,
+                              gradient_descent_update_policy<RealType>,
+                              gradient_descent<ArgumentContainer,
+                                               RealType,
+                                               Objective,
+                                               InitializationPolicy,
+                                               ObjectiveEvalPolicy,
+                                               GradEvalPolicy>>
+{
+  using base_opt = abstract_optimizer<ArgumentContainer,
+                                      RealType,
+                                      Objective,
+                                      InitializationPolicy,
+                                      ObjectiveEvalPolicy,
+                                      GradEvalPolicy,
+                                      gradient_descent_update_policy<RealType>,
+                                      gradient_descent<ArgumentContainer,
+                                                       RealType,
+                                                       Objective,
+                                                       InitializationPolicy,
+                                                       ObjectiveEvalPolicy,
+                                                       GradEvalPolicy>>;

 public:
  using base_opt::base_opt;
@@ -106,60 +129,76 @@ public:
 *      custom learning rate
 */

-template <class Objective, typename ArgumentContainer, typename RealType>
-auto make_gradient_descent(Objective &&obj, ArgumentContainer &x,
-                           RealType lr = RealType{0.01}) {
-  return gradient_descent<ArgumentContainer, RealType, std::decay_t<Objective>,
+template<class Objective, typename ArgumentContainer, typename RealType>
+auto
+make_gradient_descent(Objective&& obj,
+                      ArgumentContainer& x,
+                      RealType lr = RealType{ 0.01 })
+{
+  return gradient_descent<ArgumentContainer,
+                          RealType,
+                          std::decay_t<Objective>,
                          tape_initializer_rvar<RealType>,
                          reverse_mode_function_eval_policy<RealType>,
                          reverse_mode_gradient_evaluation_policy<RealType>>(
-      std::forward<Objective>(obj), x, tape_initializer_rvar<RealType>{},
-      reverse_mode_function_eval_policy<RealType>{},
-      reverse_mode_gradient_evaluation_policy<RealType>{},
-      gradient_descent_update_policy<RealType>(lr));
+    std::forward<Objective>(obj),
+    x,
+    tape_initializer_rvar<RealType>{},
+    reverse_mode_function_eval_policy<RealType>{},
+    reverse_mode_gradient_evaluation_policy<RealType>{},
+    gradient_descent_update_policy<RealType>(lr));
 }

-/**
- * @brief> convenience factory
- *
- * make_gradient_descent(objective, x, learning rate, initialization policy)
- *
- *      possible initialization policies implemented:
- *              tape_initializer_rvar
- *              random_uniform_initializer_rvar
- *              costant_initializer_rvar
- *
- *      these are structs that initialize x via () operator
- * Default parameters:
- *      reverse_mode_function_eval_policy
- *      reverse_mode_gradient_evaluation_policy
- * */
-template <class Objective, typename ArgumentContainer, typename RealType,
-          class InitializationPolicy>
-auto make_gradient_descent(Objective &&obj, ArgumentContainer &x, RealType lr,
-                           InitializationPolicy &&ip) {
-  return gradient_descent<ArgumentContainer, RealType, std::decay_t<Objective>,
+template<class Objective,
+         typename ArgumentContainer,
+         typename RealType,
+         class InitializationPolicy>
+auto
+make_gradient_descent(Objective&& obj,
+                      ArgumentContainer& x,
+                      RealType lr,
+                      InitializationPolicy&& ip)
+{
+  return gradient_descent<ArgumentContainer,
+                          RealType,
+                          std::decay_t<Objective>,
                          InitializationPolicy,
                          reverse_mode_function_eval_policy<RealType>,
                          reverse_mode_gradient_evaluation_policy<RealType>>(
-      std::forward<Objective>(obj), x, std::forward<InitializationPolicy>(ip),
-      reverse_mode_function_eval_policy<RealType>{},
-      reverse_mode_gradient_evaluation_policy<RealType>{},
-      gradient_descent_update_policy<RealType>(lr));
+    std::forward<Objective>(obj),
+    x,
+    std::forward<InitializationPolicy>(ip),
+    reverse_mode_function_eval_policy<RealType>{},
+    reverse_mode_gradient_evaluation_policy<RealType>{},
+    gradient_descent_update_policy<RealType>(lr));
 }

-template <typename ArgumentContainer, typename RealType, class Objective,
-          class InitializationPolicy, class ObjectiveEvalPolicy,
-          class GradEvalPolicy>
-auto make_gradient_descent(Objective &&obj, ArgumentContainer &x, RealType &lr,
-                           InitializationPolicy &&ip, ObjectiveEvalPolicy &&oep,
-                           GradEvalPolicy &&gep) {
-  return gradient_descent<ArgumentContainer, RealType, std::decay_t<Objective>,
-                          InitializationPolicy, ObjectiveEvalPolicy,
+template<typename ArgumentContainer,
+         typename RealType,
+         class Objective,
+         class InitializationPolicy,
+         class ObjectiveEvalPolicy,
+         class GradEvalPolicy>
+auto
+make_gradient_descent(Objective&& obj,
+                      ArgumentContainer& x,
+                      RealType& lr,
+                      InitializationPolicy&& ip,
+                      ObjectiveEvalPolicy&& oep,
+                      GradEvalPolicy&& gep)
+{
+  return gradient_descent<ArgumentContainer,
+                          RealType,
+                          std::decay_t<Objective>,
+                          InitializationPolicy,
+                          ObjectiveEvalPolicy,
                          GradEvalPolicy>(
-      std::forward<Objective>(obj), x, std::forward<InitializationPolicy>(ip),
-      std::forward<ObjectiveEvalPolicy>(oep), std::forward<GradEvalPolicy>(gep),
-      gradient_descent_update_policy<RealType>{lr});
+    std::forward<Objective>(obj),
+    x,
+    std::forward<InitializationPolicy>(ip),
+    std::forward<ObjectiveEvalPolicy>(oep),
+    std::forward<GradEvalPolicy>(gep),
+    gradient_descent_update_policy<RealType>{ lr });
 }

 } // namespace optimization
--- a/include/boost/math/optimization/gradient_optimizers.hpp
+++ b/include/boost/math/optimization/gradient_optimizers.hpp
@@ -1,3 +1,7 @@
+//           Copyright Maksym Zhelyenzyakov 2025-2026.
+// Distributed under the Boost Software License, Version 1.0.
+//      (See accompanying file LICENSE_1_0.txt or copy at
+//           https://www.boost.org/LICENSE_1_0.txt)
 #ifndef GRADIENT_OPTIMIZERS_HPP
 #define GRADIENT_OPTIMIZERS_HPP
 #include <boost/math/differentiation/autodiff_reverse.hpp>
@@ -7,7 +11,8 @@

 namespace boost {
 namespace math {
-namespace optimization {} // namespace optimization
+namespace optimization {
+} // namespace optimization
 } // namespace math
 } // namespace boost
 #endif
--- a/include/boost/math/optimization/lbfgs.hpp
+++ b/include/boost/math/optimization/lbfgs.hpp
@@ -1,5 +1,9 @@
-#ifndef LBFGS_HPP__
-#define LBFGS_HPP__
+//           Copyright Maksym Zhelyenzyakov 2025-2026.
+// Distributed under the Boost Software License, Version 1.0.
+//      (See accompanying file LICENSE_1_0.txt or copy at
+//           https://www.boost.org/LICENSE_1_0.txt)
+#ifndef LBFGS_HPP
+#define LBFGS_HPP
 #include <boost/math/optimization/detail/differentiable_opt_utilties.hpp>
 #include <boost/math/optimization/detail/gradient_opt_base.hpp>
 #include <boost/math/optimization/detail/rdiff_optimization_policies.hpp>
@@ -24,8 +28,17 @@ namespace rdiff = boost::math::differentiation::reverse_mode;
 *  @param -> f_prev - > previous function value
 *
 *  https://en.wikipedia.org/wiki/Limited-memory_BFGS
+ *
+ *  Jorge Nocedal and Stephen J. Wright,
+ *  Numerical Optimization, 2nd Edition,
+ *  Springer, 2006.
+ *
+ *  pages 176-180
+ *  algorithms 7.4/7.5
 *  */
-template <typename RealType> struct lbfgs_optimizer_state {
+template<typename RealType>
+struct lbfgs_optimizer_state
+{
  size_t m = 10; // default history length
  std::deque<std::vector<RealType>> S, Y;
  std::deque<RealType> rho;
@@ -33,18 +46,23 @@ template <typename RealType> struct lbfgs_optimizer_state {
  RealType f_prev = std::numeric_limits<RealType>::quiet_NaN();
  const RealType EPS = std::numeric_limits<RealType>::epsilon();

-  template <typename ArgumentContainer>
-  void update_state(ArgumentContainer &x, std::vector<RealType> &g_k,
-                    RealType fk) {
+  template<typename ArgumentContainer>
+  void update_state(ArgumentContainer& x,
+                    std::vector<RealType>& g_k,
+                    RealType fk)
+  {

+    // iteration 0
    if (g_prev.empty()) {
      g_prev.assign(g_k.begin(), g_k.end());
      x_prev.resize(x.size());
-      std::transform(x.begin(), x.end(), x_prev.begin(),
-                     [](const auto &xi) { return static_cast<RealType>(xi); });
+      std::transform(x.begin(), x.end(), x_prev.begin(), [](const auto& xi) {
+        return static_cast<RealType>(xi);
+      });
      f_prev = fk;
      return;
    }
+
    std::vector<RealType> s_k(x.size()), y_k(g_k.size());
    for (size_t i = 0; i < x.size(); ++i) {
      s_k[i] = static_cast<RealType>(x[i]) - x_prev[i];
@@ -56,8 +74,8 @@ template <typename RealType> struct lbfgs_optimizer_state {
    RealType yn = sqrt(dot(y_k, y_k));

    const RealType threshold = EPS * sn * yn;
-    if (ys > threshold && ys > RealType(0)) {
-      if (S.size() == m) {
+    if (ys > threshold && ys > RealType(0)) { // check if curvature if non-zero
+      if (S.size() == m) {                    // iteration > m
        S.pop_front();
        Y.pop_front();
        rho.pop_front();
@@ -68,8 +86,10 @@ template <typename RealType> struct lbfgs_optimizer_state {
    }

    g_prev.assign(g_k.begin(), g_k.end());
-    std::transform(x.begin(), x.end(), x_prev.begin(),
-                   [](const auto &xi) { return static_cast<RealType>(xi); });
+    // safely cast to realtype
+    std::transform(x.begin(), x.end(), x_prev.begin(), [](const auto& xi) {
+      return static_cast<RealType>(xi);
+    });
    f_prev = fk;
  }
 };
@@ -77,20 +97,23 @@ template <typename RealType> struct lbfgs_optimizer_state {
 /** @brief> helper update for l-bfgs
 * x +=  alpha  * search direction
 * */
-template <typename RealType> struct lbfgs_update_policy {
-  template <typename ArgumentType,
-            typename = typename std::enable_if<
-                boost::math::differentiation::reverse_mode::detail::
-                    is_expression<ArgumentType>::value>::type>
-  void operator()(ArgumentType &x, RealType pk, RealType alpha) {
+template<typename RealType>
+struct lbfgs_update_policy
+{
+  template<typename ArgumentType,
+           typename = typename std::enable_if<
+             boost::math::differentiation::reverse_mode::detail::is_expression<
+               ArgumentType>::value>::type>
+  void operator()(ArgumentType& x, RealType pk, RealType alpha)
+  {
    x.get_value() += alpha * pk;
  }
-  template <
-      typename ArgumentType,
-      typename std::enable_if<!boost::math::differentiation::reverse_mode::
-                                  detail::is_expression<ArgumentType>::value,
-                              int>::type = 0>
-  void operator()(ArgumentType &x, RealType pk, RealType alpha) {
+  template<typename ArgumentType,
+           typename std::enable_if<!boost::math::differentiation::reverse_mode::
+                                     detail::is_expression<ArgumentType>::value,
+                                   int>::type = 0>
+  void operator()(ArgumentType& x, RealType pk, RealType alpha)
+  {
    x += alpha * pk;
  }
 };
@@ -116,32 +139,57 @@ template <typename RealType> struct lbfgs_update_policy {
 * https://en.wikipedia.org/wiki/Limited-memory_BFGS
 */

-template <typename ArgumentContainer, typename RealType, class Objective,
-          class InitializationPolicy, class ObjectiveEvalPolicy,
-          class GradEvalPolicy, class LineSearchPolicy>
+template<typename ArgumentContainer,
+         typename RealType,
+         class Objective,
+         class InitializationPolicy,
+         class ObjectiveEvalPolicy,
+         class GradEvalPolicy,
+         class LineSearchPolicy>
 class lbfgs
-    : public abstract_optimizer<
-          ArgumentContainer, RealType, Objective, InitializationPolicy,
-          ObjectiveEvalPolicy, GradEvalPolicy, lbfgs_update_policy<RealType>,
-          lbfgs<ArgumentContainer, RealType, Objective, InitializationPolicy,
-                ObjectiveEvalPolicy, GradEvalPolicy, LineSearchPolicy>> {
-  using base_opt = abstract_optimizer<
-      ArgumentContainer, RealType, Objective, InitializationPolicy,
-      ObjectiveEvalPolicy, GradEvalPolicy, lbfgs_update_policy<RealType>,
-      lbfgs<ArgumentContainer, RealType, Objective, InitializationPolicy,
-            ObjectiveEvalPolicy, GradEvalPolicy, LineSearchPolicy>>;
+  : public abstract_optimizer<ArgumentContainer,
+                              RealType,
+                              Objective,
+                              InitializationPolicy,
+                              ObjectiveEvalPolicy,
+                              GradEvalPolicy,
+                              lbfgs_update_policy<RealType>,
+                              lbfgs<ArgumentContainer,
+                                    RealType,
+                                    Objective,
+                                    InitializationPolicy,
+                                    ObjectiveEvalPolicy,
+                                    GradEvalPolicy,
+                                    LineSearchPolicy>>
+{
+  using base_opt = abstract_optimizer<ArgumentContainer,
+                                      RealType,
+                                      Objective,
+                                      InitializationPolicy,
+                                      ObjectiveEvalPolicy,
+                                      GradEvalPolicy,
+                                      lbfgs_update_policy<RealType>,
+                                      lbfgs<ArgumentContainer,
+                                            RealType,
+                                            Objective,
+                                            InitializationPolicy,
+                                            ObjectiveEvalPolicy,
+                                            GradEvalPolicy,
+                                            LineSearchPolicy>>;
+
  const RealType EPS = std::numeric_limits<RealType>::epsilon();
  lbfgs_optimizer_state<RealType> state_;
  LineSearchPolicy line_search_;

-  std::vector<RealType> compute_direction(const std::vector<RealType> &gk) {
+  std::vector<RealType> compute_direction(const std::vector<RealType>& gk)
+  {
    const size_t n = gk.size();
    const size_t L = state_.S.size(); // since S changes when iter < m

    if (L == 0) {
      std::vector<RealType> p(n);
-      std::transform(gk.begin(), gk.end(), p.begin(),
-                     [](RealType gi) { return -gi; });
+      std::transform(
+        gk.begin(), gk.end(), p.begin(), [](RealType gi) { return -gi; });
      return p;
    }

@@ -166,22 +214,28 @@ class lbfgs
      const RealType beta = state_.rho[i] * yTr;
      axpy(alpha[i] - beta, state_.S[i], r);
    }
-    scale(r, RealType{-1});
+    scale(r, RealType{ -1 });
    return r;
  }

 public:
  using base_opt::base_opt;
-  lbfgs(Objective &&objective, ArgumentContainer &x, size_t m,
-        InitializationPolicy &&ip, ObjectiveEvalPolicy &&oep,
-        GradEvalPolicy &&gep, lbfgs_update_policy<RealType> &&up,
-        LineSearchPolicy &&lsp)
-      : base_opt(std::forward<Objective>(objective), x,
-                 std::forward<InitializationPolicy>(ip),
-                 std::forward<ObjectiveEvalPolicy>(oep),
-                 std::forward<GradEvalPolicy>(gep),
-                 std::forward<lbfgs_update_policy<RealType>>(up)),
-        line_search_(lsp) {
+  lbfgs(Objective&& objective,
+        ArgumentContainer& x,
+        size_t m,
+        InitializationPolicy&& ip,
+        ObjectiveEvalPolicy&& oep,
+        GradEvalPolicy&& gep,
+        lbfgs_update_policy<RealType>&& up,
+        LineSearchPolicy&& lsp)
+    : base_opt(std::forward<Objective>(objective),
+               x,
+               std::forward<InitializationPolicy>(ip),
+               std::forward<ObjectiveEvalPolicy>(oep),
+               std::forward<GradEvalPolicy>(gep),
+               std::forward<lbfgs_update_policy<RealType>>(up))
+    , line_search_(lsp)
+  {
    state_.m = m;

    state_.S.clear();
@@ -191,14 +245,15 @@ public:
    state_.f_prev = std::numeric_limits<RealType>::quiet_NaN();
  }

-  void step() {
-    auto &x = this->arguments();
-    auto &g = this->gradients();
-    auto &obj = this->objective_value();
-    auto &obj_eval = this->obj_eval_;
-    auto &grad_eval = this->grad_eval_;
-    auto &objective = this->objective_;
-    auto &update = this->update_;
+  void step()
+  {
+    auto& x = this->arguments();
+    auto& g = this->gradients();
+    auto& obj = this->objective_value();
+    auto& obj_eval = this->obj_eval_;
+    auto& grad_eval = this->grad_eval_;
+    auto& objective = this->objective_;
+    auto& update = this->update_;

    grad_eval(objective, x, obj_eval, obj, g);
    state_.update_state(x, g, obj);
@@ -210,17 +265,84 @@ public:
  }
 };

-template <class Objective, typename ArgumentContainer, typename RealType>
-auto make_lbfgs(Objective &&obj, ArgumentContainer &x, std::size_t m = 10) {
-  return lbfgs<ArgumentContainer, RealType, std::decay_t<Objective>,
+template<class Objective, typename ArgumentContainer>
+auto
+make_lbfgs(Objective&& obj, ArgumentContainer& x, std::size_t m = 10)
+{
+  using RealType = typename argument_container_t<ArgumentContainer>::type;
+  return lbfgs<ArgumentContainer,
+               RealType,
+               std::decay_t<Objective>,
               tape_initializer_rvar<RealType>,
               reverse_mode_function_eval_policy<RealType>,
               reverse_mode_gradient_evaluation_policy<RealType>,
-               armijo_line_search_policy<RealType>>(
-      std::forward<Objective>(obj), x, m, tape_initializer_rvar<RealType>{},
-      reverse_mode_function_eval_policy<RealType>{},
-      reverse_mode_gradient_evaluation_policy<RealType>{},
-      lbfgs_update_policy<RealType>{}, armijo_line_search_policy<RealType>{});
+               strong_wolfe_line_search_policy<RealType>>(
+    std::forward<Objective>(obj),
+    x,
+    m,
+    tape_initializer_rvar<RealType>{},
+    reverse_mode_function_eval_policy<RealType>{},
+    reverse_mode_gradient_evaluation_policy<RealType>{},
+    lbfgs_update_policy<RealType>{},
+    strong_wolfe_line_search_policy<RealType>{});
+}
+
+template<class Objective,
+         typename ArgumentContainer,
+         class InitializationPolicy>
+auto
+make_lbfgs(Objective&& obj,
+           ArgumentContainer& x,
+           std::size_t m,
+           InitializationPolicy&& ip)
+{
+  using RealType = typename argument_container_t<ArgumentContainer>::type;
+
+  return lbfgs<ArgumentContainer,
+               RealType,
+               std::decay_t<Objective>,
+               InitializationPolicy,
+               reverse_mode_function_eval_policy<RealType>,
+               reverse_mode_gradient_evaluation_policy<RealType>,
+               strong_wolfe_line_search_policy<RealType>>(
+    std::forward<Objective>(obj),
+    x,
+    m,
+    std::forward<InitializationPolicy>(ip),
+    reverse_mode_function_eval_policy<RealType>{},
+    reverse_mode_gradient_evaluation_policy<RealType>{},
+    lbfgs_update_policy<RealType>{},
+    strong_wolfe_line_search_policy<RealType>{});
+}
+
+template<class Objective,
+         typename ArgumentContainer,
+         class InitializationPolicy,
+         class LineSearchPolicy>
+auto
+make_lbfgs(Objective&& obj,
+           ArgumentContainer& x,
+           std::size_t m,
+           InitializationPolicy&& ip,
+           LineSearchPolicy&& lsp)
+{
+  using RealType = typename argument_container_t<ArgumentContainer>::type;
+
+  return lbfgs<ArgumentContainer,
+               RealType,
+               std::decay_t<Objective>,
+               InitializationPolicy,
+               reverse_mode_function_eval_policy<RealType>,
+               reverse_mode_gradient_evaluation_policy<RealType>,
+               LineSearchPolicy>(
+    std::forward<Objective>(obj),
+    x,
+    m,
+    std::forward<InitializationPolicy>(ip),
+    reverse_mode_function_eval_policy<RealType>{},
+    reverse_mode_gradient_evaluation_policy<RealType>{},
+    lbfgs_update_policy<RealType>{},
+    std::forward<LineSearchPolicy>(lsp));
 }
 } // namespace optimization
 } // namespace math
--- a/include/boost/math/optimization/minimizer.hpp
+++ b/include/boost/math/optimization/minimizer.hpp
@@ -1,3 +1,7 @@
+//           Copyright Maksym Zhelyenzyakov 2025-2026.
+// Distributed under the Boost Software License, Version 1.0.
+//      (See accompanying file LICENSE_1_0.txt or copy at
+//           https://www.boost.org/LICENSE_1_0.txt)
 #ifndef MINIMIZER_HPP
 #define MINIMIZER_HPP
 #include <boost/math/optimization/detail/differentiable_opt_utilties.hpp>
@@ -6,16 +10,19 @@
 namespace boost {
 namespace math {
 namespace optimization {
-template <typename RealType> struct optimization_result {
+template<typename RealType>
+struct optimization_result
+{
  size_t num_iter = 0;
  RealType objective_value;
  std::vector<RealType> objective_history;
  bool converged;
 };

-template <typename RealType>
-std::ostream &operator<<(std::ostream &os,
-                         const optimization_result<RealType> &r) {
+template<typename RealType>
+std::ostream&
+operator<<(std::ostream& os, const optimization_result<RealType>& r)
+{
  os << "optimization_result {\n"
     << "  num_iter        = " << r.num_iter << "\n"
     << "  objective_value = " << r.objective_value << "\n"
@@ -32,26 +39,39 @@ std::ostream &operator<<(std::ostream &os,
  return os;
 }
 /*****************************************************************************************/
-template <typename RealType> struct gradient_norm_convergence_policy {
+template<typename RealType>
+struct gradient_norm_convergence_policy
+{
  RealType tol_;
-  explicit gradient_norm_convergence_policy(RealType tol) : tol_(tol) {}
+  explicit gradient_norm_convergence_policy(RealType tol)
+    : tol_(tol)
+  {
+  }

-  template <class GradientContainer>
-  bool operator()(const GradientContainer &g, RealType /*objective_v*/) const {
+  template<class GradientContainer>
+  bool operator()(const GradientContainer& g, RealType /*objective_v*/) const
+  {
    return norm_2(g) < tol_;
  }
 };

-template <typename RealType> struct objective_tol_convergence_policy {
+template<typename RealType>
+struct objective_tol_convergence_policy
+{
  RealType tol_;
  mutable RealType last_value_;
  mutable bool first_call_;

  explicit objective_tol_convergence_policy(RealType tol)
-      : tol_(tol), last_value_(0), first_call_(true) {}
+    : tol_(tol)
+    , last_value_(0)
+    , first_call_(true)
+  {
+  }

-  template <class GradientContainer>
-  bool operator()(const GradientContainer &, RealType objective_v) const {
+  template<class GradientContainer>
+  bool operator()(const GradientContainer&, RealType objective_v) const
+  {
    if (first_call_) {
      last_value_ = objective_v;
      first_call_ = false;
@@ -63,16 +83,23 @@ template <typename RealType> struct objective_tol_convergence_policy {
  }
 };

-template <typename RealType> struct relative_objective_tol_policy {
+template<typename RealType>
+struct relative_objective_tol_policy
+{
  RealType rel_tol_;
  mutable RealType last_value_;
  mutable bool first_call_;

  explicit relative_objective_tol_policy(RealType rel_tol)
-      : rel_tol_(rel_tol), last_value_(0), first_call_(true) {}
+    : rel_tol_(rel_tol)
+    , last_value_(0)
+    , first_call_(true)
+  {
+  }

-  template <class GradientContainer>
-  bool operator()(const GradientContainer &, RealType objective_v) const {
+  template<class GradientContainer>
+  bool operator()(const GradientContainer&, RealType objective_v) const
+  {
    if (first_call_) {
      last_value_ = objective_v;
      first_call_ = false;
@@ -85,23 +112,33 @@ template <typename RealType> struct relative_objective_tol_policy {
  }
 };

-template <class Policy1, class Policy2> struct combined_convergence_policy {
+template<class Policy1, class Policy2>
+struct combined_convergence_policy
+{
  Policy1 p1_;
  Policy2 p2_;

-  combined_convergence_policy(Policy1 p1, Policy2 p2) : p1_(p1), p2_(p2) {}
+  combined_convergence_policy(Policy1 p1, Policy2 p2)
+    : p1_(p1)
+    , p2_(p2)
+  {
+  }

-  template <class GradientContainer, class RealType>
-  bool operator()(const GradientContainer &g, RealType obj) const {
+  template<class GradientContainer, class RealType>
+  bool operator()(const GradientContainer& g, RealType obj) const
+  {
    return p1_(g, obj) || p2_(g, obj);
  }
 };

 /*****************************************************************************************/
-struct max_iter_termination_policy {
+struct max_iter_termination_policy
+{
  size_t max_iter_;
-  max_iter_termination_policy(size_t max_iter) : max_iter_(max_iter) {};
-  bool operator()(size_t iter) {
+  max_iter_termination_policy(size_t max_iter)
+    : max_iter_(max_iter) {};
+  bool operator()(size_t iter)
+  {
    if (iter < max_iter_) {
      return false;
    }
@@ -109,118 +146,156 @@ struct max_iter_termination_policy {
  }
 };

-struct wallclock_termination_policy {
+struct wallclock_termination_policy
+{
  std::chrono::steady_clock::time_point start_;
  std::chrono::milliseconds max_time_;

  explicit wallclock_termination_policy(std::chrono::milliseconds max_time)
-      : start_(std::chrono::steady_clock::now()), max_time_(max_time) {}
+    : start_(std::chrono::steady_clock::now())
+    , max_time_(max_time)
+  {
+  }

-  bool operator()(size_t /*iter*/) const {
+  bool operator()(size_t /*iter*/) const
+  {
    return std::chrono::steady_clock::now() - start_ > max_time_;
  }
 };

 /*****************************************************************************************/
-template <typename ArgumentContainer> struct unconstrained_policy {
-  void operator()(ArgumentContainer &) {}
+template<typename ArgumentContainer>
+struct unconstrained_policy
+{
+  void operator()(ArgumentContainer&) {}
 };

-template <typename ArgumentContainer, typename RealType>
-struct box_constraints {
+template<typename ArgumentContainer, typename RealType>
+struct box_constraints
+{
  RealType min_, max_;
-  box_constraints(RealType min, RealType max) : min_(min), max_(max) {};
-  void operator()(ArgumentContainer &x) {
-    for (auto &xi : x) {
+  box_constraints(RealType min, RealType max)
+    : min_(min)
+    , max_(max) {};
+  void operator()(ArgumentContainer& x)
+  {
+    for (auto& xi : x) {
      xi = (xi < min_) ? min_ : (max_ < xi) ? max_ : xi;
    }
  }
 };

-template <typename ArgumentContainer, typename RealType>
-struct nonnegativity_constraint {
-  void operator()(ArgumentContainer &x) const {
-    for (auto &xi : x) {
-      if (xi < RealType{0})
-        xi = RealType{0};
+template<typename ArgumentContainer, typename RealType>
+struct nonnegativity_constraint
+{
+  void operator()(ArgumentContainer& x) const
+  {
+    for (auto& xi : x) {
+      if (xi < RealType{ 0 })
+        xi = RealType{ 0 };
    }
  }
 };
-template <typename ArgumentContainer, typename RealType>
-struct l2_ball_constraint {
+template<typename ArgumentContainer, typename RealType>
+struct l2_ball_constraint
+{
  RealType radius_;

-  explicit l2_ball_constraint(RealType radius) : radius_(radius) {}
+  explicit l2_ball_constraint(RealType radius)
+    : radius_(radius)
+  {
+  }

-  void operator()(ArgumentContainer &x) const {
+  void operator()(ArgumentContainer& x) const
+  {
    RealType norm2v = norm_2(x);
    if (norm2v > radius_) {
      RealType scale = radius_ / norm2v;
-      for (auto &xi : x)
+      for (auto& xi : x)
        xi *= scale;
    }
  }
 };

-template <typename ArgumentContainer, typename RealType>
-struct l1_ball_constraint {
+template<typename ArgumentContainer, typename RealType>
+struct l1_ball_constraint
+{
  RealType radius_;

-  explicit l1_ball_constraint(RealType radius) : radius_(radius) {}
+  explicit l1_ball_constraint(RealType radius)
+    : radius_(radius)
+  {
+  }

-  void operator()(ArgumentContainer &x) const {
+  void operator()(ArgumentContainer& x) const
+  {
    RealType norm1v = norm_1(x);

    if (norm1v > radius_) {
      RealType scale = radius_ / norm1v;
-      for (auto &xi : x)
+      for (auto& xi : x)
        xi *= scale;
    }
  }
 };
-template <typename ArgumentContainer, typename RealType>
-struct simplex_constraint {
-  void operator()(ArgumentContainer &x) const {
-    RealType sum = RealType{0};
-    for (auto &xi : x) {
-      if (xi < RealType{0})
-        xi = RealType{0}; // clip negatives
+template<typename ArgumentContainer, typename RealType>
+struct simplex_constraint
+{
+  void operator()(ArgumentContainer& x) const
+  {
+    RealType sum = RealType{ 0 };
+    for (auto& xi : x) {
+      if (xi < RealType{ 0 })
+        xi = RealType{ 0 }; // clip negatives
      sum += xi;
    }
-    if (sum > RealType{0}) {
-      for (auto &xi : x)
+    if (sum > RealType{ 0 }) {
+      for (auto& xi : x)
        xi /= sum;
    }
  }
 };

-template <typename ArgumentContainer> struct function_constraint {
-  using func_t = void (*)(ArgumentContainer &);
+template<typename ArgumentContainer>
+struct function_constraint
+{
+  using func_t = void (*)(ArgumentContainer&);

  func_t f_;

-  explicit function_constraint(func_t f) : f_(f) {}
+  explicit function_constraint(func_t f)
+    : f_(f)
+  {
+  }

-  void operator()(ArgumentContainer &x) const { f_(x); }
+  void operator()(ArgumentContainer& x) const { f_(x); }
 };
-template <typename ArgumentContainer, typename RealType>
-struct unit_sphere_constraint {
-  void operator()(ArgumentContainer &x) const {
+template<typename ArgumentContainer, typename RealType>
+struct unit_sphere_constraint
+{
+  void operator()(ArgumentContainer& x) const
+  {
    RealType norm2v = norm_2(x);
    RealType norm = sqrt(norm2v);
-    if (norm > RealType{0}) {
-      for (auto &xi : x)
+    if (norm > RealType{ 0 }) {
+      for (auto& xi : x)
        xi /= norm;
    }
  }
 };
 /*****************************************************************************************/

-template <class Optimizer, class ConstraintPolicy, class ConvergencePolicy,
-          class TerminationPolicy>
-auto minimize_impl(Optimizer &opt, ConstraintPolicy project,
-                   ConvergencePolicy converged, TerminationPolicy terminate,
-                   bool history) {
+template<class Optimizer,
+         class ConstraintPolicy,
+         class ConvergencePolicy,
+         class TerminationPolicy>
+auto
+minimize_impl(Optimizer& opt,
+              ConstraintPolicy project,
+              ConvergencePolicy converged,
+              TerminationPolicy terminate,
+              bool history)
+{
  optimization_result<typename Optimizer::real_type_t> result;
  size_t iter = 0;
  do {
@@ -238,18 +313,21 @@ auto minimize_impl(Optimizer &opt, ConstraintPolicy project,
  result.converged = converged(opt.gradients(), opt.objective_value());
  return result;
 }
-template <class Optimizer,
-          class ConstraintPolicy =
-              unconstrained_policy<typename Optimizer::argument_container_t>,
-          class ConvergencePolicy =
-              gradient_norm_convergence_policy<typename Optimizer::real_type_t>,
-          class TerminationPolicy = max_iter_termination_policy>
-auto minimize(Optimizer &opt, ConstraintPolicy project = ConstraintPolicy{},
-              ConvergencePolicy converged =
-                  ConvergencePolicy{
-                      static_cast<typename Optimizer::real_type_t>(1e-8)},
-              TerminationPolicy terminate = TerminationPolicy(10000),
-              bool history = true) {
+template<class Optimizer,
+         class ConstraintPolicy =
+           unconstrained_policy<typename Optimizer::argument_container_t>,
+         class ConvergencePolicy =
+           gradient_norm_convergence_policy<typename Optimizer::real_type_t>,
+         class TerminationPolicy = max_iter_termination_policy>
+auto
+minimize(Optimizer& opt,
+         ConstraintPolicy project = ConstraintPolicy{},
+         ConvergencePolicy converged =
+           ConvergencePolicy{
+             static_cast<typename Optimizer::real_type_t>(1e-8) },
+         TerminationPolicy terminate = TerminationPolicy(10000),
+         bool history = true)
+{
  return minimize_impl(opt, project, converged, terminate, history);
 }
 } // namespace optimization
--- a/include/boost/math/optimization/nesterov.hpp
+++ b/include/boost/math/optimization/nesterov.hpp
@@ -1,5 +1,9 @@
-#ifndef NESTEROV_HPP__
-#define NESTEROV_HPP__
+//           Copyright Maksym Zhelyenzyakov 2025-2026.
+// Distributed under the Boost Software License, Version 1.0.
+//      (See accompanying file LICENSE_1_0.txt or copy at
+//           https://www.boost.org/LICENSE_1_0.txt)
+#ifndef NESTEROV_HPP
+#define NESTEROV_HPP
 #include <boost/math/optimization/detail/differentiable_opt_utilties.hpp>
 #include <boost/math/optimization/detail/gradient_opt_base.hpp>
 #include <boost/math/optimization/detail/rdiff_optimization_policies.hpp>
@@ -11,69 +15,109 @@ namespace optimization {

 namespace rdiff = boost::math::differentiation::reverse_mode;

-template <typename RealType> struct nesterov_update_policy {
+/**
+ * @brief The nesterov_update_policy class
+ */
+template<typename RealType>
+struct nesterov_update_policy
+{
  RealType lr_, mu_;
-  nesterov_update_policy(RealType lr, RealType mu) : lr_(lr), mu_(mu) {};
+  nesterov_update_policy(RealType lr, RealType mu)
+    : lr_(lr)
+    , mu_(mu) {};

-  template <typename ArgumentType,
-            typename = typename std::enable_if<
-                boost::math::differentiation::reverse_mode::detail::
-                    is_expression<ArgumentType>::value>::type>
-  void operator()(ArgumentType &x, RealType &g, RealType &v) {
+  template<typename ArgumentType,
+           typename = typename std::enable_if<
+             boost::math::differentiation::reverse_mode::detail::is_expression<
+               ArgumentType>::value>::type>
+  void operator()(ArgumentType& x, RealType& g, RealType& v)
+  {
    v = mu_ * v - lr_ * g;
    x.get_value() += v;
  }
-  template <
-      typename ArgumentType,
-      typename std::enable_if<!boost::math::differentiation::reverse_mode::
-                                  detail::is_expression<ArgumentType>::value,
-                              int>::type = 0>
-  void operator()(ArgumentType &x, RealType &g, RealType &v) const {
+  template<typename ArgumentType,
+           typename std::enable_if<!boost::math::differentiation::reverse_mode::
+                                     detail::is_expression<ArgumentType>::value,
+                                   int>::type = 0>
+  void operator()(ArgumentType& x, RealType& g, RealType& v) const
+  {
    v = mu_ * v - lr_ * g;
    x += v;
  }
  RealType lr() const noexcept { return lr_; }
  RealType mu() const noexcept { return mu_; }
 };
-template <typename ArgumentContainer, typename RealType, class Objective,
-          class InitializationPolicy, class ObjectiveEvalPolicy,
-          class GradEvalPolicy>
+
+/**
+ * @brief The nesterov_accelerated_gradient class
+ *
+ * https://jlmelville.github.io/mize/nesterov.html
+ */
+template<typename ArgumentContainer,
+         typename RealType,
+         class Objective,
+         class InitializationPolicy,
+         class ObjectiveEvalPolicy,
+         class GradEvalPolicy>
 class nesterov_accelerated_gradient
-    : public abstract_optimizer<
-          ArgumentContainer, RealType, Objective, InitializationPolicy,
-          ObjectiveEvalPolicy, GradEvalPolicy, nesterov_update_policy<RealType>,
-          nesterov_accelerated_gradient<ArgumentContainer, RealType, Objective,
-                                        InitializationPolicy,
-                                        ObjectiveEvalPolicy, GradEvalPolicy>> {
-  using base_opt = abstract_optimizer<
-      ArgumentContainer, RealType, Objective, InitializationPolicy,
-      ObjectiveEvalPolicy, GradEvalPolicy, nesterov_update_policy<RealType>,
-      nesterov_accelerated_gradient<ArgumentContainer, RealType, Objective,
-                                    InitializationPolicy, ObjectiveEvalPolicy,
-                                    GradEvalPolicy>>;
+  : public abstract_optimizer<
+      ArgumentContainer,
+      RealType,
+      Objective,
+      InitializationPolicy,
+      ObjectiveEvalPolicy,
+      GradEvalPolicy,
+      nesterov_update_policy<RealType>,
+      nesterov_accelerated_gradient<ArgumentContainer,
+                                    RealType,
+                                    Objective,
+                                    InitializationPolicy,
+                                    ObjectiveEvalPolicy,
+                                    GradEvalPolicy>>
+{
+  using base_opt =
+    abstract_optimizer<ArgumentContainer,
+                       RealType,
+                       Objective,
+                       InitializationPolicy,
+                       ObjectiveEvalPolicy,
+                       GradEvalPolicy,
+                       nesterov_update_policy<RealType>,
+                       nesterov_accelerated_gradient<ArgumentContainer,
+                                                     RealType,
+                                                     Objective,
+                                                     InitializationPolicy,
+                                                     ObjectiveEvalPolicy,
+                                                     GradEvalPolicy>>;
  std::vector<RealType> v_;

 public:
  using base_opt::base_opt;
-  nesterov_accelerated_gradient(Objective &&objective, ArgumentContainer &x,
-                                InitializationPolicy &&ip,
-                                ObjectiveEvalPolicy &&oep, GradEvalPolicy &&gep,
-                                nesterov_update_policy<RealType> &&up)
-      : base_opt(std::forward<Objective>(objective), x,
-                 std::forward<InitializationPolicy>(ip),
-                 std::forward<ObjectiveEvalPolicy>(oep),
-                 std::forward<GradEvalPolicy>(gep),
-                 std::forward<nesterov_update_policy<RealType>>(up)),
-        v_(x.size(), RealType(0)) {}
+  nesterov_accelerated_gradient(Objective&& objective,
+                                ArgumentContainer& x,
+                                InitializationPolicy&& ip,
+                                ObjectiveEvalPolicy&& oep,
+                                GradEvalPolicy&& gep,
+                                nesterov_update_policy<RealType>&& up)
+    : base_opt(std::forward<Objective>(objective),
+               x,
+               std::forward<InitializationPolicy>(ip),
+               std::forward<ObjectiveEvalPolicy>(oep),
+               std::forward<GradEvalPolicy>(gep),
+               std::forward<nesterov_update_policy<RealType>>(up))
+    , v_(x.size(), RealType(0))
+  {
+  }

-  void step() {
-    auto &x = this->arguments();
-    auto &g = this->gradients();
-    auto &obj = this->objective_value();
-    auto &obj_eval = this->obj_eval_;
-    auto &grad_eval = this->grad_eval_;
-    auto &objective = this->objective_;
-    auto &update = this->update_;
+  void step()
+  {
+    auto& x = this->arguments();
+    auto& g = this->gradients();
+    auto& obj = this->objective_value();
+    auto& obj_eval = this->obj_eval_;
+    auto& grad_eval = this->grad_eval_;
+    auto& objective = this->objective_;
+    auto& update = this->update_;

    grad_eval(objective, x, obj_eval, obj, g);

@@ -82,44 +126,79 @@ public:
    }
  }
 };
-template <class Objective, typename ArgumentContainer, typename RealType>
-auto make_nag(Objective &&obj, ArgumentContainer &x,
-              RealType lr = RealType{0.01}, RealType mu = RealType{0.95}) {
+template<class Objective, typename ArgumentContainer, typename RealType>
+auto
+make_nag(Objective&& obj,
+         ArgumentContainer& x,
+         RealType lr = RealType{ 0.01 },
+         RealType mu = RealType{ 0.95 })
+{
  return nesterov_accelerated_gradient<
-      ArgumentContainer, RealType, std::decay_t<Objective>,
-      tape_initializer_rvar<RealType>,
-      reverse_mode_function_eval_policy<RealType>,
-      reverse_mode_gradient_evaluation_policy<RealType>>(
-      std::forward<Objective>(obj), x, tape_initializer_rvar<RealType>{},
-      reverse_mode_function_eval_policy<RealType>{},
-      reverse_mode_gradient_evaluation_policy<RealType>{},
-      nesterov_update_policy<RealType>(lr, mu));
+    ArgumentContainer,
+    RealType,
+    std::decay_t<Objective>,
+    tape_initializer_rvar<RealType>,
+    reverse_mode_function_eval_policy<RealType>,
+    reverse_mode_gradient_evaluation_policy<RealType>>(
+    std::forward<Objective>(obj),
+    x,
+    tape_initializer_rvar<RealType>{},
+    reverse_mode_function_eval_policy<RealType>{},
+    reverse_mode_gradient_evaluation_policy<RealType>{},
+    nesterov_update_policy<RealType>(lr, mu));
 }
-template <class Objective, typename ArgumentContainer, typename RealType,
-          class InitializationPolicy>
-auto make_nag(Objective &&obj, ArgumentContainer &x, RealType lr, RealType mu,
-              InitializationPolicy &&ip) {
+template<class Objective,
+         typename ArgumentContainer,
+         typename RealType,
+         class InitializationPolicy>
+auto
+make_nag(Objective&& obj,
+         ArgumentContainer& x,
+         RealType lr,
+         RealType mu,
+         InitializationPolicy&& ip)
+{
  return nesterov_accelerated_gradient<
-      ArgumentContainer, RealType, std::decay_t<Objective>,
-      InitializationPolicy, reverse_mode_function_eval_policy<RealType>,
-      reverse_mode_gradient_evaluation_policy<RealType>>(
-      std::forward<Objective>(obj), x, std::forward<InitializationPolicy>(ip),
-      reverse_mode_function_eval_policy<RealType>{},
-      reverse_mode_gradient_evaluation_policy<RealType>{},
-      nesterov_update_policy<RealType>(lr, mu));
+    ArgumentContainer,
+    RealType,
+    std::decay_t<Objective>,
+    InitializationPolicy,
+    reverse_mode_function_eval_policy<RealType>,
+    reverse_mode_gradient_evaluation_policy<RealType>>(
+    std::forward<Objective>(obj),
+    x,
+    std::forward<InitializationPolicy>(ip),
+    reverse_mode_function_eval_policy<RealType>{},
+    reverse_mode_gradient_evaluation_policy<RealType>{},
+    nesterov_update_policy<RealType>(lr, mu));
 }
-template <typename ArgumentContainer, typename RealType, class Objective,
-          class InitializationPolicy, class ObjectiveEvalPolicy,
-          class GradEvalPolicy>
-auto make_nag(Objective &&obj, ArgumentContainer &x, RealType lr, RealType mu,
-              InitializationPolicy &&ip, ObjectiveEvalPolicy &&oep,
-              GradEvalPolicy &&gep) {
-  return nesterov_accelerated_gradient<
-      ArgumentContainer, RealType, std::decay_t<Objective>,
-      InitializationPolicy, ObjectiveEvalPolicy, GradEvalPolicy>(
-      std::forward<Objective>(obj), x, std::forward<InitializationPolicy>(ip),
-      std::forward<ObjectiveEvalPolicy>(oep), std::forward<GradEvalPolicy>(gep),
-      nesterov_update_policy<RealType>{lr, mu});
+template<typename ArgumentContainer,
+         typename RealType,
+         class Objective,
+         class InitializationPolicy,
+         class ObjectiveEvalPolicy,
+         class GradEvalPolicy>
+auto
+make_nag(Objective&& obj,
+         ArgumentContainer& x,
+         RealType lr,
+         RealType mu,
+         InitializationPolicy&& ip,
+         ObjectiveEvalPolicy&& oep,
+         GradEvalPolicy&& gep)
+{
+  return nesterov_accelerated_gradient<ArgumentContainer,
+                                       RealType,
+                                       std::decay_t<Objective>,
+                                       InitializationPolicy,
+                                       ObjectiveEvalPolicy,
+                                       GradEvalPolicy>(
+    std::forward<Objective>(obj),
+    x,
+    std::forward<InitializationPolicy>(ip),
+    std::forward<ObjectiveEvalPolicy>(oep),
+    std::forward<GradEvalPolicy>(gep),
+    nesterov_update_policy<RealType>{ lr, mu });
 }
 } // namespace optimization
 } // namespace math
--- a/test/test_gradient_descent_optimizer.cpp
+++ b/test/test_gradient_descent_optimizer.cpp
@@ -1,318 +1,336 @@
+//           Copyright Maksym Zhelyenzyakov 2025-2026.
+// Distributed under the Boost Software License, Version 1.0.
+//      (See accompanying file LICENSE_1_0.txt or copy at
+//           https://www.boost.org/LICENSE_1_0.txt)
 #include "test_autodiff_reverse.hpp" // reuse for some basic options
 #include "test_functions_for_optimization.hpp"
 #include <boost/math/differentiation/autodiff_reverse.hpp>
 #include <boost/math/optimization/gradient_descent.hpp>
 #include <boost/math/optimization/minimizer.hpp>
 namespace rdiff = boost::math::differentiation::reverse_mode;
-namespace bopt  = boost::math::optimization;
+namespace bopt = boost::math::optimization;
 BOOST_AUTO_TEST_SUITE(basic_gradient_descent)

 BOOST_AUTO_TEST_CASE_TEMPLATE(default_gd_test, T, all_float_types)
 {
-    size_t                         NITER = 2000;
-    size_t                         N     = 15;
-    T                              lr    = T{1e-2};
-    RandomSample<T>                rng{T(-100), (100)};
-    std::vector<rdiff::rvar<T, 1>> x_ad;
-    T                              eps = T{1e-3};
-    for (size_t i = 0; i < N; ++i) {
-        x_ad.push_back(rng.next());
-    }
-    auto gdopt = bopt::make_gradient_descent(&quadratic<rdiff::rvar<T, 1>>, x_ad, lr);
-    for (size_t i = 0; i < NITER; ++i) {
-        gdopt.step();
-    }
-    for (auto& x : x_ad) {
-        BOOST_REQUIRE_SMALL(x.item(), eps);
-    }
+  size_t NITER = 2000;
+  size_t N = 15;
+  T lr = T{ 1e-2 };
+  RandomSample<T> rng{ T(-100), (100) };
+  std::vector<rdiff::rvar<T, 1>> x_ad;
+  T eps = T{ 1e-3 };
+  for (size_t i = 0; i < N; ++i) {
+    x_ad.push_back(rng.next());
+  }
+  auto gdopt =
+    bopt::make_gradient_descent(&quadratic<rdiff::rvar<T, 1>>, x_ad, lr);
+  for (size_t i = 0; i < NITER; ++i) {
+    gdopt.step();
+  }
+  for (auto& x : x_ad) {
+    BOOST_REQUIRE_SMALL(x.item(), eps);
+  }
 }
 BOOST_AUTO_TEST_CASE_TEMPLATE(test_minimize, T, all_float_types)
 {
-    size_t                         NITER = 2000;
-    size_t                         N     = 15;
-    T                              lr    = T{1e-2};
-    RandomSample<T>                rng{T(-100), (100)};
-    std::vector<rdiff::rvar<T, 1>> x_ad;
-    T                              eps = T{1e-3};
-    for (size_t i = 0; i < N; ++i) {
-        x_ad.push_back(rng.next());
-    }
-    auto gdopt = bopt::make_gradient_descent(&quadratic<rdiff::rvar<T, 1>>, x_ad, lr);
-    auto z     = minimize(gdopt);
-    for (auto& x : x_ad) {
-        BOOST_REQUIRE_SMALL(x.item(), eps);
-    }
+  size_t NITER = 2000;
+  size_t N = 15;
+  T lr = T{ 1e-2 };
+  RandomSample<T> rng{ T(-100), (100) };
+  std::vector<rdiff::rvar<T, 1>> x_ad;
+  T eps = T{ 1e-3 };
+  for (size_t i = 0; i < N; ++i) {
+    x_ad.push_back(rng.next());
+  }
+  auto gdopt =
+    bopt::make_gradient_descent(&quadratic<rdiff::rvar<T, 1>>, x_ad, lr);
+  auto z = minimize(gdopt);
+  for (auto& x : x_ad) {
+    BOOST_REQUIRE_SMALL(x.item(), eps);
+  }
 }
 BOOST_AUTO_TEST_CASE_TEMPLATE(random_initializer_test, T, all_float_types)
 {
-    size_t                         N  = 10;
-    T                              lr = T{1e-2};
-    std::vector<rdiff::rvar<T, 1>> x(N);
+  size_t N = 10;
+  T lr = T{ 1e-2 };
+  std::vector<rdiff::rvar<T, 1>> x(N);

-    auto                           gdopt = bopt::make_gradient_descent(
-        &quadratic<rdiff::rvar<T, 1>>,
-        x,
-        lr,
-        bopt::random_uniform_initializer_rvar<T>(-2.0, 2.0, 1234)); // all initialized to 5
-    for (auto& xi : x) {
-        T v = xi.item();
-        BOOST_TEST(v >= -2);
-        BOOST_TEST(v <= 2);
-    }
-    gdopt.step();
+  auto gdopt =
+    bopt::make_gradient_descent(&quadratic<rdiff::rvar<T, 1>>,
+                                x,
+                                lr,
+                                bopt::random_uniform_initializer_rvar<T>(
+                                  -2.0, 2.0, 1234)); // all initialized to 5
+  for (auto& xi : x) {
+    T v = xi.item();
+    BOOST_TEST(v >= -2);
+    BOOST_TEST(v <= 2);
+  }
+  gdopt.step();
 }
 BOOST_AUTO_TEST_CASE_TEMPLATE(const_initializer_test, T, all_float_types)
 {
-    size_t                         N  = 10;
-    T                              lr = T{1e-2};
-    std::vector<rdiff::rvar<T, 1>> x(N);
+  size_t N = 10;
+  T lr = T{ 1e-2 };
+  std::vector<rdiff::rvar<T, 1>> x(N);

-    auto gdopt = bopt::make_gradient_descent(&quadratic<rdiff::rvar<T, 1>>,
-                                             x,
-                                             lr,
-                                             bopt::costant_initializer_rvar<T>(
-                                                 T{5.0})); // all initialized to 5
+  auto gdopt = bopt::make_gradient_descent(
+    &quadratic<rdiff::rvar<T, 1>>,
+    x,
+    lr,
+    bopt::costant_initializer_rvar<T>(T{ 5.0 })); // all initialized to 5

-    for (auto& xi : x) {
-        T v = xi.item();
-        BOOST_REQUIRE_CLOSE(v, T{5.0}, T{1e-3});
-    }
-    gdopt.step();
+  for (auto& xi : x) {
+    T v = xi.item();
+    BOOST_REQUIRE_CLOSE(v, T{ 5.0 }, T{ 1e-3 });
+  }
+  gdopt.step();
 }

 BOOST_AUTO_TEST_CASE_TEMPLATE(box_constraint_test, T, all_float_types)
 {
-    size_t                         N  = 5;
-    T                              lr = T{1e-2};
-    std::vector<rdiff::rvar<T, 1>> x(N, T{10});
+  size_t N = 5;
+  T lr = T{ 1e-2 };
+  std::vector<rdiff::rvar<T, 1>> x(N, T{ 10 });

-    auto gdopt = bopt::make_gradient_descent(&quadratic<rdiff::rvar<T, 1>>, x, lr);
+  auto gdopt =
+    bopt::make_gradient_descent(&quadratic<rdiff::rvar<T, 1>>, x, lr);

-    auto res   = bopt::minimize(gdopt,
-                              bopt::box_constraints<std::vector<rdiff::rvar<T, 1>>, T>(-1.0, 1.0));
+  auto res = bopt::minimize(
+    gdopt, bopt::box_constraints<std::vector<rdiff::rvar<T, 1>>, T>(-1.0, 1.0));

-    for (auto& xi : x) {
-        BOOST_TEST(xi.item() >= -1.0);
-        BOOST_TEST(xi.item() <= 1.0);
-    }
+  for (auto& xi : x) {
+    BOOST_TEST(xi.item() >= -1.0);
+    BOOST_TEST(xi.item() <= 1.0);
+  }
 }
 BOOST_AUTO_TEST_CASE_TEMPLATE(max_iter_test, T, all_float_types)
 {
-    size_t                         N  = 2;
-    T                              lr = T{1e-6}; // very slow learning
-    std::vector<rdiff::rvar<T, 1>> x  = {T{5}, T{5}};
+  size_t N = 2;
+  T lr = T{ 1e-6 }; // very slow learning
+  std::vector<rdiff::rvar<T, 1>> x = { T{ 5 }, T{ 5 } };

-    auto   gdopt    = bopt::make_gradient_descent(&quadratic<rdiff::rvar<T, 1>>, x, lr);
+  auto gdopt =
+    bopt::make_gradient_descent(&quadratic<rdiff::rvar<T, 1>>, x, lr);

-    size_t max_iter = 50;
-    auto   res      = bopt::minimize(gdopt,
-                              bopt::unconstrained_policy<std::vector<rdiff::rvar<T, 1>>>{},
-                              bopt::gradient_norm_convergence_policy<T>(T{1e-20}),
-                              bopt::max_iter_termination_policy(max_iter));
+  size_t max_iter = 50;
+  auto res =
+    bopt::minimize(gdopt,
+                   bopt::unconstrained_policy<std::vector<rdiff::rvar<T, 1>>>{},
+                   bopt::gradient_norm_convergence_policy<T>(T{ 1e-20 }),
+                   bopt::max_iter_termination_policy(max_iter));

-    BOOST_TEST(!res.converged); // should not converge with tiny lr
-    BOOST_REQUIRE_EQUAL(res.num_iter, max_iter);
+  BOOST_TEST(!res.converged); // should not converge with tiny lr
+  BOOST_REQUIRE_EQUAL(res.num_iter, max_iter);
 }
 BOOST_AUTO_TEST_CASE_TEMPLATE(history_tracking_test, T, all_float_types)
 {
-    size_t                         N  = 3;
-    T                              lr = T{1e-2};
-    std::vector<rdiff::rvar<T, 1>> x  = {T{3}, T{-4}, T{5}};
+  size_t N = 3;
+  T lr = T{ 1e-2 };
+  std::vector<rdiff::rvar<T, 1>> x = { T{ 3 }, T{ -4 }, T{ 5 } };

-    auto gdopt = bopt::make_gradient_descent(&quadratic<rdiff::rvar<T, 1>>, x, lr);
+  auto gdopt =
+    bopt::make_gradient_descent(&quadratic<rdiff::rvar<T, 1>>, x, lr);

-    auto res   = bopt::minimize(gdopt,
-                              bopt::unconstrained_policy<std::vector<rdiff::rvar<T, 1>>>{},
-                              bopt::gradient_norm_convergence_policy<T>(T{1e-6}),
-                              bopt::max_iter_termination_policy(1000),
-                              true); // enable history
+  auto res =
+    bopt::minimize(gdopt,
+                   bopt::unconstrained_policy<std::vector<rdiff::rvar<T, 1>>>{},
+                   bopt::gradient_norm_convergence_policy<T>(T{ 1e-6 }),
+                   bopt::max_iter_termination_policy(1000),
+                   true); // enable history

-    BOOST_TEST(!res.objective_history.empty());
-    BOOST_TEST(res.objective_history.front() > res.objective_history.back());
+  BOOST_TEST(!res.objective_history.empty());
+  BOOST_TEST(res.objective_history.front() > res.objective_history.back());
 }
 BOOST_AUTO_TEST_CASE_TEMPLATE(rosenbrock_test, T, all_float_types)
 {
-    std::array<rdiff::rvar<T, 1>, 2> x  = {T{-1.2}, T{1.0}}; // bad start
-    T                                lr = T{1e-3};
+  std::array<rdiff::rvar<T, 1>, 2> x = { T{ -1.2 }, T{ 1.0 } }; // bad start
+  T lr = T{ 1e-3 };

-    auto gdopt = bopt::make_gradient_descent(&rosenbrock_saddle<rdiff::rvar<T, 1>>, x, lr);
+  auto gdopt =
+    bopt::make_gradient_descent(&rosenbrock_saddle<rdiff::rvar<T, 1>>, x, lr);

-    auto res   = bopt::minimize(gdopt,
-                              bopt::unconstrained_policy<std::array<rdiff::rvar<T, 1>, 2>>{},
-                              bopt::gradient_norm_convergence_policy<T>(T{1e-4}),
-                              bopt::max_iter_termination_policy(50000));
-    BOOST_TEST(res.converged);
-    BOOST_REQUIRE_CLOSE(x[0].item(), T{1.0}, T{1e-1});
-    BOOST_REQUIRE_CLOSE(x[1].item(), T{1.0}, T{1e-1});
+  auto res = bopt::minimize(
+    gdopt,
+    bopt::unconstrained_policy<std::array<rdiff::rvar<T, 1>, 2>>{},
+    bopt::gradient_norm_convergence_policy<T>(T{ 1e-4 }),
+    bopt::max_iter_termination_policy(50000));
+  BOOST_TEST(res.converged);
+  BOOST_REQUIRE_CLOSE(x[0].item(), T{ 1.0 }, T{ 1e-1 });
+  BOOST_REQUIRE_CLOSE(x[1].item(), T{ 1.0 }, T{ 1e-1 });
 }
-BOOST_AUTO_TEST_CASE_TEMPLATE(objective_tol_convergence_test, T, all_float_types)
+BOOST_AUTO_TEST_CASE_TEMPLATE(objective_tol_convergence_test,
+                              T,
+                              all_float_types)
 {
-    using policy_t = bopt::objective_tol_convergence_policy<T>;
-    policy_t       pol(1e-3);
-    std::vector<T> dummy_grad;
+  using policy_t = bopt::objective_tol_convergence_policy<T>;
+  policy_t pol(1e-3);
+  std::vector<T> dummy_grad;

-    BOOST_TEST(!pol(dummy_grad, 100.0));
-    BOOST_TEST(!pol(dummy_grad, 99.0));
-    BOOST_TEST(pol(dummy_grad, 99.0005));
+  BOOST_TEST(!pol(dummy_grad, 100.0));
+  BOOST_TEST(!pol(dummy_grad, 99.0));
+  BOOST_TEST(pol(dummy_grad, 99.0005));
 }

 BOOST_AUTO_TEST_CASE_TEMPLATE(relative_objective_tol_test, T, all_float_types)
 {
-    using policy_t = bopt::relative_objective_tol_policy<T>;
-    policy_t       pol(1e-3);
+  using policy_t = bopt::relative_objective_tol_policy<T>;
+  policy_t pol(1e-3);

-    std::vector<T> dummy_grad;
-    BOOST_TEST(!pol(dummy_grad, 1000.0));
-    BOOST_TEST(!pol(dummy_grad, 1010.0));
-    BOOST_TEST(pol(dummy_grad, 1010.5));
+  std::vector<T> dummy_grad;
+  BOOST_TEST(!pol(dummy_grad, 1000.0));
+  BOOST_TEST(!pol(dummy_grad, 1010.0));
+  BOOST_TEST(pol(dummy_grad, 1010.5));
 }

 BOOST_AUTO_TEST_CASE_TEMPLATE(combined_policy_test, T, all_float_types)
 {
-    using pol_abs  = bopt::objective_tol_convergence_policy<T>;
-    using pol_rel  = bopt::relative_objective_tol_policy<T>;
-    using pol_comb = bopt::combined_convergence_policy<pol_abs, pol_rel>;
+  using pol_abs = bopt::objective_tol_convergence_policy<T>;
+  using pol_rel = bopt::relative_objective_tol_policy<T>;
+  using pol_comb = bopt::combined_convergence_policy<pol_abs, pol_rel>;

-    pol_abs        abs_pol(1e-6);
-    pol_rel        rel_pol(1e-3);
-    pol_comb       comb(abs_pol, rel_pol);
+  pol_abs abs_pol(1e-6);
+  pol_rel rel_pol(1e-3);
+  pol_comb comb(abs_pol, rel_pol);

-    std::vector<T> dummy_grad;
+  std::vector<T> dummy_grad;

-    BOOST_TEST(!comb(dummy_grad, 100.0));
-    BOOST_TEST(!comb(dummy_grad, 110.0));
-    BOOST_TEST(comb(dummy_grad, 110.1));
-    BOOST_TEST(comb(dummy_grad, 110.1000001));
+  BOOST_TEST(!comb(dummy_grad, 100.0));
+  BOOST_TEST(!comb(dummy_grad, 110.0));
+  BOOST_TEST(comb(dummy_grad, 110.1));
+  BOOST_TEST(comb(dummy_grad, 110.1000001));
 }
 BOOST_AUTO_TEST_CASE_TEMPLATE(nonnegativity_constraint_test, T, all_float_types)
 {
-    std::vector<T>                                    x = {1.0, -2.0, 3.0, -4.0};
-    bopt::nonnegativity_constraint<std::vector<T>, T> proj;
-    proj(x);
+  std::vector<T> x = { 1.0, -2.0, 3.0, -4.0 };
+  bopt::nonnegativity_constraint<std::vector<T>, T> proj;
+  proj(x);

-    for (auto& xi : x)
-        BOOST_TEST(xi >= 0.0);
-    BOOST_TEST(x == std::vector<T>({1.0, 0.0, 3.0, 0.0}));
+  for (auto& xi : x)
+    BOOST_TEST(xi >= 0.0);
+  BOOST_TEST(x == std::vector<T>({ 1.0, 0.0, 3.0, 0.0 }));
 }

 BOOST_AUTO_TEST_CASE_TEMPLATE(l2_ball_constraint_test, T, all_float_types)
 {
-    std::vector<T>                              x = {3.0, 4.0}; // norm = 5
-    bopt::l2_ball_constraint<std::vector<T>, T> proj(1.0);
-    proj(x);
+  std::vector<T> x = { 3.0, 4.0 }; // norm = 5
+  bopt::l2_ball_constraint<std::vector<T>, T> proj(1.0);
+  proj(x);

-    T norm = sqrt(x[0] * x[0] + x[1] * x[1]);
-    BOOST_TEST(abs(norm - 1.0) < 1e-12); // projected to unit circle
+  T norm = sqrt(x[0] * x[0] + x[1] * x[1]);
+  BOOST_TEST(abs(norm - 1.0) < 1e-12); // projected to unit circle
 }

 BOOST_AUTO_TEST_CASE_TEMPLATE(l1_ball_constraint_test, T, all_float_types)
 {
-    std::vector<T>                              x = {3.0, 4.0}; // L1 norm = 7
-    bopt::l1_ball_constraint<std::vector<T>, T> proj(2.0);
-    proj(x);
+  std::vector<T> x = { 3.0, 4.0 }; // L1 norm = 7
+  bopt::l1_ball_constraint<std::vector<T>, T> proj(2.0);
+  proj(x);

-    T norm1 = abs(x[0]) + abs(x[1]);
-    BOOST_TEST(abs(norm1 - 2.0) < T{1e-12});
+  T norm1 = abs(x[0]) + abs(x[1]);
+  BOOST_TEST(abs(norm1 - 2.0) < T{ 1e-12 });
 }

 BOOST_AUTO_TEST_CASE_TEMPLATE(simplex_constraint_test, T, all_float_types)
 {
-    std::vector<T>                              x = {-1.0, 2.0, 3.0}; // has negative and sum != 1
-    bopt::simplex_constraint<std::vector<T>, T> proj;
-    proj(x);
+  std::vector<T> x = { -1.0, 2.0, 3.0 }; // has negative and sum != 1
+  bopt::simplex_constraint<std::vector<T>, T> proj;
+  proj(x);

-    T sum = 0.0;
-    for (auto& xi : x) {
-        BOOST_TEST(xi >= 0.0); // all nonnegative
-        sum += xi;
-    }
-    BOOST_TEST(abs(sum - 1.0) < 1e-12); // normalized to sum=1
+  T sum = 0.0;
+  for (auto& xi : x) {
+    BOOST_TEST(xi >= 0.0); // all nonnegative
+    sum += xi;
+  }
+  BOOST_TEST(abs(sum - 1.0) < 1e-12); // normalized to sum=1
 }

 BOOST_AUTO_TEST_CASE_TEMPLATE(unit_sphere_constraint_test, T, all_float_types)
 {
-    std::vector<T>                                  x = {0.3, 0.4}; // norm = 0.5
-    bopt::unit_sphere_constraint<std::vector<T>, T> proj;
-    proj(x);
+  std::vector<T> x = { 0.3, 0.4 }; // norm = 0.5
+  bopt::unit_sphere_constraint<std::vector<T>, T> proj;
+  proj(x);

-    T norm = sqrt(x[0] * x[0] + x[1] * x[1]);
-    BOOST_TEST(abs(norm - 1.0) < 1e-12); // always projected to sphere
+  T norm = sqrt(x[0] * x[0] + x[1] * x[1]);
+  BOOST_TEST(abs(norm - 1.0) < 1e-12); // always projected to sphere
 }

 BOOST_AUTO_TEST_CASE_TEMPLATE(function_constraint_test, T, all_float_types)
 {
-    auto clip_to_half = [](std::vector<T>& v) {
-        for (auto& xi : v)
-            if (xi > 0.5)
-                xi = 0.5;
-    };
+  auto clip_to_half = [](std::vector<T>& v) {
+    for (auto& xi : v)
+      if (xi > 0.5)
+        xi = 0.5;
+  };

-    bopt::function_constraint<std::vector<T>> proj(clip_to_half);
-    std::vector<T>                            x = {0.2, 0.7, 1.5};
-    proj(x);
+  bopt::function_constraint<std::vector<T>> proj(clip_to_half);
+  std::vector<T> x = { 0.2, 0.7, 1.5 };
+  proj(x);

-    BOOST_TEST(x == std::vector<T>({0.2, 0.5, 0.5}));
+  BOOST_TEST(x == std::vector<T>({ 0.2, 0.5, 0.5 }));
 }

 template<typename RealType>
 struct no_init_policy
 {
-    void operator()(std::vector<RealType>& x) const noexcept {}
+  void operator()(std::vector<RealType>& x) const noexcept {}
 };

 template<typename RealType>
 struct analytic_objective_eval_pol
 {
-    template<typename Objective, typename ArgumentContainer>
-    RealType operator()(Objective&& objective, ArgumentContainer& x)
-    {
-        return objective(x);
-    }
+  template<typename Objective, typename ArgumentContainer>
+  RealType operator()(Objective&& objective, ArgumentContainer& x)
+  {
+    return objective(x);
+  }
 };

 template<typename RealType>
 struct analytic_gradient_eval_pol
 {
-    template<class Objective, class ArgumentContainer, class FunctionEvaluationPolicy>
-    void operator()(Objective&&                obj_f,
-                    ArgumentContainer&         x,
-                    FunctionEvaluationPolicy&& f_eval_pol,
-                    RealType&                  obj_v,
-                    std::vector<RealType>&     grad_container)
-    {
-        RealType v = f_eval_pol(obj_f, x);
-        obj_v      = v;
-        grad_container.resize(x.size());
-        for (size_t i = 0; i < x.size(); ++i) {
-            grad_container[i] = 2 * x[i];
-        }
+  template<class Objective,
+           class ArgumentContainer,
+           class FunctionEvaluationPolicy>
+  void operator()(Objective&& obj_f,
+                  ArgumentContainer& x,
+                  FunctionEvaluationPolicy&& f_eval_pol,
+                  RealType& obj_v,
+                  std::vector<RealType>& grad_container)
+  {
+    RealType v = f_eval_pol(obj_f, x);
+    obj_v = v;
+    grad_container.resize(x.size());
+    for (size_t i = 0; i < x.size(); ++i) {
+      grad_container[i] = 2 * x[i];
    }
+  }
 };

 BOOST_AUTO_TEST_CASE_TEMPLATE(analytic_derivative_policies, T, all_float_types)
 {
-    std::vector<T>  x;
-    size_t          NITER = 2000;
-    size_t          N     = 15;
-    T               lr    = T{1e-2};
-    RandomSample<T> rng{T(-100), (100)};
-    T               eps = T{1e-3};
-    for (size_t i = 0; i < N; ++i) {
-        x.push_back(rng.next());
-    }
+  std::vector<T> x;
+  size_t NITER = 2000;
+  size_t N = 15;
+  T lr = T{ 1e-2 };
+  RandomSample<T> rng{ T(-100), (100) };
+  T eps = T{ 1e-3 };
+  for (size_t i = 0; i < N; ++i) {
+    x.push_back(rng.next());
+  }

-    auto gdopt = bopt::make_gradient_descent(&quadratic<T>,
-                                             x,
-                                             lr,
-                                             no_init_policy<T>{},
-                                             analytic_objective_eval_pol<T>{},
-                                             analytic_gradient_eval_pol<T>{});
+  auto gdopt = bopt::make_gradient_descent(&quadratic<T>,
+                                           x,
+                                           lr,
+                                           no_init_policy<T>{},
+                                           analytic_objective_eval_pol<T>{},
+                                           analytic_gradient_eval_pol<T>{});

-    for (size_t i = 0; i < NITER; ++i) {
-        gdopt.step();
-    }
-    for (auto& xi : x) {
-        BOOST_REQUIRE_SMALL(xi, eps);
-    }
+  for (size_t i = 0; i < NITER; ++i) {
+    gdopt.step();
+  }
+  for (auto& xi : x) {
+    BOOST_REQUIRE_SMALL(xi, eps);
+  }
 }
 BOOST_AUTO_TEST_SUITE_END()
--- a/test/test_lbfgs.cpp
+++ b/test/test_lbfgs.cpp
@@ -1,4 +1,8 @@
-#include "test_autodiff_reverse.hpp" // reuse for same test infra
+//           Copyright Maksym Zhelyenzyakov 2025-2026.
+// Distributed under the Boost Software License, Version 1.0.
+//      (See accompanying file LICENSE_1_0.txt or copy at
+//           https://www.boost.org/LICENSE_1_0.txt)
+#include "test_autodiff_reverse.hpp"
 #include "test_functions_for_optimization.hpp"
 #include <boost/math/differentiation/autodiff_reverse.hpp>
 #include <boost/math/optimization/lbfgs.hpp>
@@ -9,26 +13,175 @@ namespace bopt = boost::math::optimization;

 BOOST_AUTO_TEST_SUITE(basic_lbfgs)

-BOOST_AUTO_TEST_CASE(default_lbfgs_test) {
-  using T = double;
+BOOST_AUTO_TEST_CASE_TEMPLATE(default_lbfgs_test, T, all_float_types)
+{
  constexpr size_t NITER = 10;
  constexpr size_t M = 10;
-  const T eps = T{1e-8};
+  const T eps = T{ 1e-8 };

-  RandomSample<T> rng{T(-10), T(10)};
+  RandomSample<T> rng{ T(-10), T(10) };
  std::array<rdiff::rvar<T, 1>, 2> x;
  x[0] = rng.next();
  x[1] = rng.next();

-  auto opt = bopt::make_lbfgs<decltype(&rosenbrock_saddle<rdiff::rvar<T, 1>>),
-                              std::array<rdiff::rvar<T, 1>, 2>, T>(
-      &rosenbrock_saddle<rdiff::rvar<T, 1>>, x, M);
+  auto opt = bopt::make_lbfgs(&rosenbrock_saddle<rdiff::rvar<T, 1>>, x, M);

  auto result = minimize(opt);
-  std::cout << result << std::endl;
-  for (auto &xi : x) {
-    BOOST_REQUIRE_CLOSE(xi, T{1.0}, eps);
+  for (auto& xi : x) {
+    BOOST_REQUIRE_CLOSE(xi, T{ 1.0 }, eps);
  }
 }

+// Custom initialization policy that zeros out the parameters
+template<typename RealType>
+struct zero_init_policy
+{
+  void operator()(std::vector<RealType>& x) const noexcept
+  {
+    std::fill(x.begin(), x.end(), RealType{ 0 });
+  }
+};
+
+template<typename RealType>
+struct analytic_objective_eval_pol
+{
+  template<typename Objective, typename ArgumentContainer>
+  RealType operator()(Objective&& objective, ArgumentContainer& x)
+  {
+    return objective(x);
+  }
+};
+template<typename RealType>
+struct analytic_gradient_eval_pol
+{
+  template<class Objective,
+           class ArgumentContainer,
+           class FunctionEvaluationPolicy>
+  void operator()(Objective&& obj_f,
+                  ArgumentContainer& x,
+                  FunctionEvaluationPolicy&& f_eval_pol,
+                  RealType& obj_v,
+                  std::vector<RealType>& grad_container)
+  {
+    RealType v = f_eval_pol(obj_f, x);
+    obj_v = v;
+    grad_container.resize(x.size());
+    for (size_t i = 0; i < x.size(); ++i) {
+      grad_container[i] = 2 * x[i];
+    }
+  }
+};
+
+// -- Test L-BFGS with custom initialization policy (zero_init_policy)
+BOOST_AUTO_TEST_CASE_TEMPLATE(custom_init_lbfgs_test, T, all_float_types)
+{
+  constexpr size_t M = 8;
+  const T eps = T{ 1e-6 };
+
+  RandomSample<T> rng{ T(-5), T(5) };
+  std::array<rdiff::rvar<T, 1>, 2> x;
+  x[0] = rng.next();
+  x[1] = rng.next();
+
+  auto opt = bopt::make_lbfgs(&rosenbrock_saddle<rdiff::rvar<T, 1>>,
+                              x,
+                              M,
+                              bopt::costant_initializer_rvar<T>(0.0));
+  auto result = minimize(opt);
+
+  for (auto& xi : x) {
+    BOOST_REQUIRE_CLOSE(xi, T{ 1.0 }, eps);
+  }
+}
+
+// // -- Test L-BFGS with analytic derivative policies
+// BOOST_AUTO_TEST_CASE_TEMPLATE(analytic_lbfgs_test, T, all_float_types)
+// {
+//   constexpr size_t M = 10;
+//   const T eps = T{ 1e-3 };
+
+//   RandomSample<T> rng{ T(-5), T(5) };
+//   std::vector<T> x(3);
+//   for (auto& xi : x)
+//     xi = rng.next();
+
+//   auto opt = bopt::make_lbfgs(
+//     &quadratic<rdiff::rvar<T, 1>>,              // Objective
+//     x,                                          // Arguments
+//     M,                                          // History size
+//     bopt::random_uniform_initializer_rvar<T>{}, // Initialization
+//     analytic_objective_eval_pol<T>{},           // Function eval
+//     analytic_gradient_eval_pol<T>{}             // Gradient eval
+//   );
+
+//   // Run optimization (manual loop or minimize wrapper)
+//   auto result = minimize(opt);
+
+//   for (auto& xi : x) {
+//     BOOST_REQUIRE_SMALL(xi, eps);
+//   }
+// }
+
+// // -- Test L-BFGS with analytic policies and custom line search
+// // (strong_wolfe_line_search_policy)
+// BOOST_AUTO_TEST_CASE_TEMPLATE(analytic_lbfgs_strong_wolfe_test,
+//                               T,
+//                               all_float_types)
+// {
+//   constexpr size_t M = 12;
+//   const T eps = T{ 1e-4 };
+
+//   RandomSample<T> rng{ T(-10), T(10) };
+//   std::vector<T> x(5);
+//   for (auto& xi : x)
+//     xi = rng.next();
+
+//   auto opt = bopt::make_lbfgs(
+//     &quadratic<T>,
+//     x,
+//     M,
+//     boost::math::optimization::random_uniform_initializer_rvar<T>{},
+//     analytic_objective_eval_pol<T>{},
+//     analytic_gradient_eval_pol<T>{},
+//     boost::math::optimization::armijo_line_search_policy<T>{});
+
+//   auto result = minimize(opt);
+
+//   for (auto& xi : x) {
+//     BOOST_REQUIRE_SMALL(xi, eps);
+//   }
+// }
+
+// // -- Test L-BFGS with random init policy (demonstrates flexible
+// initialization) template<typename RealType> struct random_init_policy
+// {
+//   RandomSample<RealType> rng{ RealType(-1), RealType(1) };
+//   void operator()(std::vector<RealType>& x) const noexcept
+//   {
+//     for (auto& xi : x)
+//       xi = rng.next();
+//   }
+// };
+
+// BOOST_AUTO_TEST_CASE_TEMPLATE(random_init_lbfgs_test, T, all_float_types)
+// {
+//   constexpr size_t M = 6;
+//   const T eps = T{ 1e-4 };
+
+//   std::vector<T> x(4);
+//   random_init_policy<T>{}(x); // Apply initialization manually
+
+//   auto opt = bopt::make_lbfgs(&quadratic<T>,
+//                               x,
+//                               M,
+//                               random_init_policy<T>{},
+//                               analytic_objective_eval_pol<T>{},
+//                               analytic_gradient_eval_pol<T>{});
+
+//   auto result = minimize(opt);
+
+//   for (auto& xi : x) {
+//     BOOST_REQUIRE_SMALL(xi, eps);
+//   }
+// }
 BOOST_AUTO_TEST_SUITE_END()
--- a/test/test_nesterov_optimizer.cpp
+++ b/test/test_nesterov_optimizer.cpp
@@ -1,26 +1,31 @@
+//           Copyright Maksym Zhelyenzyakov 2025-2026.
+// Distributed under the Boost Software License, Version 1.0.
+//      (See accompanying file LICENSE_1_0.txt or copy at
+//           https://www.boost.org/LICENSE_1_0.txt)
 #include "test_autodiff_reverse.hpp" // reuse for some basic options
 #include "test_functions_for_optimization.hpp"
 #include <boost/math/differentiation/autodiff_reverse.hpp>
 #include <boost/math/optimization/minimizer.hpp>
 #include <boost/math/optimization/nesterov.hpp>
 namespace rdiff = boost::math::differentiation::reverse_mode;
-namespace bopt  = boost::math::optimization;
+namespace bopt = boost::math::optimization;
 BOOST_AUTO_TEST_SUITE(basic_gradient_descent)

 BOOST_AUTO_TEST_CASE_TEMPLATE(default_nesterov_test, T, all_float_types)
 {
-    size_t                         NITER = 5;
-    T                              lr    = T{1e-3};
-    T                              mu    = T{0.95};
-    RandomSample<T>                rng{T(-10), (10)};
-    std::vector<rdiff::rvar<T, 1>> x;
-    x.push_back(rng.next());
-    x.push_back(rng.next());
-    T    eps = T{1e-8};
-    auto nag = bopt::make_nag(&quadratic_high_cond_2D<rdiff::rvar<T, 1>>, x, lr, mu);
-    auto z   = minimize(nag);
-    for (auto& xi : x) {
-        BOOST_REQUIRE_SMALL(xi.item(), eps);
-    }
+  size_t NITER = 5;
+  T lr = T{ 1e-3 };
+  T mu = T{ 0.95 };
+  RandomSample<T> rng{ T(-10), (10) };
+  std::vector<rdiff::rvar<T, 1>> x;
+  x.push_back(rng.next());
+  x.push_back(rng.next());
+  T eps = T{ 1e-8 };
+  auto nag =
+    bopt::make_nag(&quadratic_high_cond_2D<rdiff::rvar<T, 1>>, x, lr, mu);
+  auto z = minimize(nag);
+  for (auto& xi : x) {
+    BOOST_REQUIRE_SMALL(xi.item(), eps);
+  }
 }
 BOOST_AUTO_TEST_SUITE_END()