mirror of
https://github.com/boostorg/math.git
synced 2026-02-24 04:02:18 +00:00
updates
This commit is contained in:
@@ -1,5 +1,10 @@
|
||||
// Copyright Maksym Zhelyenzyakov 2025-2026.
|
||||
// Distributed under the Boost Software License, Version 1.0.
|
||||
// (See accompanying file LICENSE_1_0.txt or copy at
|
||||
// https://www.boost.org/LICENSE_1_0.txt)
|
||||
#ifndef DIFFERENTIABLE_OPT_UTILITIES_HPP
|
||||
#define DIFFERENTIABLE_OPT_UTILITIES_HPP
|
||||
#include <boost/math/differentiation/autodiff_reverse.hpp>
|
||||
#include <boost/random/mersenne_twister.hpp>
|
||||
#include <boost/random/uniform_real_distribution.hpp>
|
||||
#include <cmath>
|
||||
@@ -10,47 +15,92 @@
|
||||
namespace boost {
|
||||
namespace math {
|
||||
namespace optimization {
|
||||
template <typename UpdPol> struct update_policy_real_type;
|
||||
|
||||
template <typename UpdPol> struct update_policy_real_type;
|
||||
namespace rdiff = boost::math::differentiation::reverse_mode;
|
||||
|
||||
template <template <typename> class UpdPol, typename RealType>
|
||||
struct update_policy_real_type<UpdPol<RealType>> {
|
||||
/** @brief> helper to get the underlying realtype from
|
||||
* update policy
|
||||
* */
|
||||
template<typename UpdPol>
|
||||
struct update_policy_real_type;
|
||||
|
||||
template<template<typename> class UpdPol, typename RealType>
|
||||
struct update_policy_real_type<UpdPol<RealType>>
|
||||
{
|
||||
using type = RealType;
|
||||
};
|
||||
|
||||
template <typename UpdPol>
|
||||
template<typename UpdPol>
|
||||
using update_policy_real_type_t =
|
||||
typename update_policy_real_type<typename std::decay<UpdPol>::type>::type;
|
||||
typename update_policy_real_type<typename std::decay<UpdPol>::type>::type;
|
||||
|
||||
/** @brief> get realtype from argument container
|
||||
* */
|
||||
template<class Container>
|
||||
struct argument_container_t;
|
||||
|
||||
template<template<typename, typename...> class Container,
|
||||
typename ValueType,
|
||||
typename... Args>
|
||||
struct argument_container_t<Container<ValueType, Args...>>
|
||||
{
|
||||
using type = ValueType;
|
||||
};
|
||||
template<template<typename, typename...> class Container,
|
||||
typename RealType,
|
||||
int N,
|
||||
typename... Args>
|
||||
struct argument_container_t<Container<rdiff::rvar<RealType, N>, Args...>>
|
||||
{
|
||||
using type = RealType;
|
||||
};
|
||||
template<typename ValueType, std::size_t N>
|
||||
struct argument_container_t<std::array<ValueType, N>>
|
||||
{
|
||||
using type = ValueType;
|
||||
};
|
||||
|
||||
template<typename RealType, int M, std::size_t N>
|
||||
struct argument_container_t<std::array<rdiff::rvar<RealType, M>, N>>
|
||||
{
|
||||
using type = RealType;
|
||||
};
|
||||
/******************************************************************************/
|
||||
/** @brief simple blas helpers
|
||||
* may optimize later if benchmarks show its needed, or just switch to Eigen
|
||||
*/
|
||||
template <typename Container>
|
||||
auto dot(const Container &x, const Container &y) ->
|
||||
typename Container::value_type {
|
||||
template<typename Container>
|
||||
auto
|
||||
dot(const Container& x, const Container& y) -> typename Container::value_type
|
||||
{
|
||||
using T = typename Container::value_type;
|
||||
BOOST_MATH_ASSERT(x.size() == y.size());
|
||||
return std::inner_product(x.begin(), x.end(), y.begin(), T(0));
|
||||
}
|
||||
|
||||
template <typename Container>
|
||||
auto norm_2(const Container &x) -> typename Container::value_type {
|
||||
template<typename Container>
|
||||
auto
|
||||
norm_2(const Container& x) -> typename Container::value_type
|
||||
{
|
||||
return sqrt(dot(x, x));
|
||||
}
|
||||
|
||||
template <typename Container>
|
||||
auto norm_1(const Container &x) -> typename Container::value_type {
|
||||
template<typename Container>
|
||||
auto
|
||||
norm_1(const Container& x) -> typename Container::value_type
|
||||
{
|
||||
using T = typename Container::value_type;
|
||||
T ret{0};
|
||||
for (auto &xi : x) {
|
||||
T ret{ 0 };
|
||||
for (auto& xi : x) {
|
||||
ret += abs(xi);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
template <typename T> T norm_inf(const std::vector<T> &x) {
|
||||
template<typename T>
|
||||
T
|
||||
norm_inf(const std::vector<T>& x)
|
||||
{
|
||||
assert(!x.empty());
|
||||
|
||||
T max_val = std::abs(x[0]);
|
||||
@@ -64,17 +114,21 @@ template <typename T> T norm_inf(const std::vector<T> &x) {
|
||||
return max_val;
|
||||
}
|
||||
/** @brief alpha*x (alpha is scalar, x is vector */
|
||||
template <typename Container, typename RealType>
|
||||
void scale(Container &x, const RealType &alpha) {
|
||||
for (auto &xi : x) {
|
||||
template<typename Container, typename RealType>
|
||||
void
|
||||
scale(Container& x, const RealType& alpha)
|
||||
{
|
||||
for (auto& xi : x) {
|
||||
xi *= alpha;
|
||||
}
|
||||
}
|
||||
|
||||
/** @brief y += alpha * x
|
||||
*/
|
||||
template <typename Container, typename RealType>
|
||||
void axpy(RealType alpha, const Container &x, Container &y) {
|
||||
template<typename ContainerX, typename ContainerY, typename RealType>
|
||||
void
|
||||
axpy(RealType alpha, const ContainerX& x, ContainerY& y)
|
||||
{
|
||||
BOOST_MATH_ASSERT(x.size() == y.size());
|
||||
const size_t n = x.size();
|
||||
for (size_t i = 0; i < n; ++i) {
|
||||
@@ -82,7 +136,10 @@ void axpy(RealType alpha, const Container &x, Container &y) {
|
||||
}
|
||||
}
|
||||
/******************************************************************************/
|
||||
template <typename RealType> std::vector<RealType> random_vector(size_t n) {
|
||||
template<typename RealType>
|
||||
std::vector<RealType>
|
||||
random_vector(size_t n)
|
||||
{
|
||||
/** @brief> generates a random std::vector<RealType> of size n
|
||||
* using mt19937 algorithm
|
||||
*/
|
||||
@@ -92,7 +149,7 @@ template <typename RealType> std::vector<RealType> random_vector(size_t n) {
|
||||
*
|
||||
* TODO: benchmark.
|
||||
*/
|
||||
static boost::random::mt19937 rng{std::random_device{}()};
|
||||
static boost::random::mt19937 rng{ std::random_device{}() };
|
||||
static boost::random::uniform_real_distribution<RealType> dist(0.0, 1.0);
|
||||
|
||||
std::vector<RealType> result(n);
|
||||
|
||||
@@ -1,3 +1,7 @@
|
||||
// Copyright Maksym Zhelyenzyakov 2025-2026.
|
||||
// Distributed under the Boost Software License, Version 1.0.
|
||||
// (See accompanying file LICENSE_1_0.txt or copy at
|
||||
// https://www.boost.org/LICENSE_1_0.txt)
|
||||
#ifndef GRADIENT_OPT_BASE_HPP
|
||||
#define GRADIENT_OPT_BASE_HPP
|
||||
#include <boost/math/differentiation/autodiff_reverse.hpp>
|
||||
@@ -11,15 +15,27 @@ namespace rdiff = boost::math::differentiation::reverse_mode;
|
||||
/**
|
||||
* @brief The abstract_optimizer class implementing common variables
|
||||
* and methods across optimizers
|
||||
*
|
||||
* @tparam> ArgumentContainer
|
||||
* @tparam> RealType
|
||||
* @tparam> Objective
|
||||
* @tparam> InitializationPolicy
|
||||
*
|
||||
*/
|
||||
|
||||
template <typename ArgumentContainer, typename RealType, class Objective,
|
||||
class InitializationPolicy, class ObjectiveEvalPolicy,
|
||||
class GradEvalPolicy, class UpdatePolicy, typename DerivedOptimizer>
|
||||
class abstract_optimizer {
|
||||
template<typename ArgumentContainer,
|
||||
typename RealType,
|
||||
class Objective,
|
||||
class InitializationPolicy,
|
||||
class ObjectiveEvalPolicy,
|
||||
class GradEvalPolicy,
|
||||
class UpdatePolicy,
|
||||
typename DerivedOptimizer>
|
||||
class abstract_optimizer
|
||||
{
|
||||
protected:
|
||||
Objective objective_; // obj function
|
||||
ArgumentContainer &x_; // arguments to objective function
|
||||
ArgumentContainer& x_; // arguments to objective function
|
||||
std::vector<RealType> g_; // container of references to gradients
|
||||
ObjectiveEvalPolicy obj_eval_; // how to evaluate your funciton
|
||||
GradEvalPolicy grad_eval_; // how to evaluate/bind gradients
|
||||
@@ -27,12 +43,14 @@ protected:
|
||||
UpdatePolicy update_; // update step
|
||||
RealType obj_v_; // objective value (for history)
|
||||
// access derived class
|
||||
DerivedOptimizer &derived() { return static_cast<DerivedOptimizer &>(*this); }
|
||||
const DerivedOptimizer &derived() const {
|
||||
return static_cast<const DerivedOptimizer &>(*this);
|
||||
DerivedOptimizer& derived() { return static_cast<DerivedOptimizer&>(*this); }
|
||||
const DerivedOptimizer& derived() const
|
||||
{
|
||||
return static_cast<const DerivedOptimizer&>(*this);
|
||||
}
|
||||
|
||||
void step_impl() {
|
||||
void step_impl()
|
||||
{
|
||||
grad_eval_(objective_, x_, obj_eval_, obj_v_, g_);
|
||||
for (size_t i = 0; i < x_.size(); ++i) {
|
||||
update_(x_[i], g_[i]);
|
||||
@@ -43,25 +61,30 @@ public:
|
||||
using argument_container_t = ArgumentContainer;
|
||||
using real_type_t = RealType;
|
||||
|
||||
abstract_optimizer(Objective &&objective, ArgumentContainer &x,
|
||||
InitializationPolicy &&ip, ObjectiveEvalPolicy &&oep,
|
||||
GradEvalPolicy &&gep, UpdatePolicy &&up)
|
||||
: objective_(std::forward<Objective>(objective)), x_(x),
|
||||
obj_eval_(std::forward<ObjectiveEvalPolicy>(oep)),
|
||||
grad_eval_(std::forward<GradEvalPolicy>(gep)),
|
||||
init_(std::forward<InitializationPolicy>(ip)),
|
||||
update_(std::forward<UpdatePolicy>(up)) {
|
||||
abstract_optimizer(Objective&& objective,
|
||||
ArgumentContainer& x,
|
||||
InitializationPolicy&& ip,
|
||||
ObjectiveEvalPolicy&& oep,
|
||||
GradEvalPolicy&& gep,
|
||||
UpdatePolicy&& up)
|
||||
: objective_(std::forward<Objective>(objective))
|
||||
, x_(x)
|
||||
, obj_eval_(std::forward<ObjectiveEvalPolicy>(oep))
|
||||
, grad_eval_(std::forward<GradEvalPolicy>(gep))
|
||||
, init_(std::forward<InitializationPolicy>(ip))
|
||||
, update_(std::forward<UpdatePolicy>(up))
|
||||
{
|
||||
init_(x_); // initialize your problem
|
||||
g_.resize(x_.size()); // initialize space for gradients
|
||||
}
|
||||
|
||||
ArgumentContainer &arguments() { return derived().x_; }
|
||||
const ArgumentContainer &arguments() const { return derived().x_; }
|
||||
ArgumentContainer& arguments() { return derived().x_; }
|
||||
const ArgumentContainer& arguments() const { return derived().x_; }
|
||||
|
||||
RealType &objective_value() { return derived().obj_v_; }
|
||||
const RealType &objective_value() const { return derived().obj_v_; }
|
||||
std::vector<RealType> &gradients() { return derived().g_; }
|
||||
const std::vector<RealType> &gradients() const { return derived().g_; }
|
||||
RealType& objective_value() { return derived().obj_v_; }
|
||||
const RealType& objective_value() const { return derived().obj_v_; }
|
||||
std::vector<RealType>& gradients() { return derived().g_; }
|
||||
const std::vector<RealType>& gradients() const { return derived().g_; }
|
||||
};
|
||||
} // namespace optimization
|
||||
} // namespace math
|
||||
|
||||
@@ -1,6 +1,12 @@
|
||||
// Copyright Maksym Zhelyenzyakov 2025-2026.
|
||||
// Distributed under the Boost Software License, Version 1.0.
|
||||
// (See accompanying file LICENSE_1_0.txt or copy at
|
||||
// https://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
#ifndef LINE_SEARCH_POLICIES_HPP
|
||||
#define LINE_SEARCH_POLICIES_HPP
|
||||
|
||||
#include <boost/math/optimization/detail/differentiable_opt_utilties.hpp>
|
||||
#include <cmath>
|
||||
#include <iostream>
|
||||
#include <numeric>
|
||||
@@ -13,9 +19,18 @@ namespace optimization {
|
||||
* @brief> Armijo condition backtracking line search
|
||||
* https://en.wikipedia.org/wiki/Backtracking_line_search
|
||||
*
|
||||
* f(x+alpha p) <= f(x) + alpha * c * grad(f)^T p
|
||||
* */
|
||||
template <typename RealType> class armijo_line_search_policy {
|
||||
* f(x+alpha p) <= f(x) + alpha * c * grad(f)^T
|
||||
*
|
||||
* Jorge Nocedal and Stephen J. Wright,
|
||||
* Numerical Optimization, 2nd Edition,
|
||||
* Springer, 2006.
|
||||
*
|
||||
* Algorithm 3.1: Backtracking Line Search
|
||||
* (Page 37)
|
||||
*/
|
||||
template<typename RealType>
|
||||
class armijo_line_search_policy
|
||||
{
|
||||
private:
|
||||
RealType alpha0_; // initial step size
|
||||
RealType c_; // sufficient decrease constant
|
||||
@@ -23,35 +38,170 @@ private:
|
||||
int max_iter_; // maximum backtracking steps
|
||||
|
||||
public:
|
||||
armijo_line_search_policy(RealType alpha0 = 1.0, RealType c = 1e-4,
|
||||
RealType rho = 0.5, int max_iter = 20)
|
||||
: alpha0_(alpha0), c_(c), rho_(rho), max_iter_(max_iter) {}
|
||||
armijo_line_search_policy(RealType alpha0 = 1.0,
|
||||
RealType c = 1e-4,
|
||||
RealType rho = 0.5,
|
||||
int max_iter = 20)
|
||||
: alpha0_(alpha0)
|
||||
, c_(c)
|
||||
, rho_(rho)
|
||||
, max_iter_(max_iter)
|
||||
{
|
||||
}
|
||||
|
||||
template <class Objective, class ObjectiveEvalPolicy,
|
||||
class GradientEvalPolicy, class ArgumentContainer>
|
||||
RealType operator()(Objective &objective, ObjectiveEvalPolicy &obj_eval,
|
||||
GradientEvalPolicy &grad_eval, ArgumentContainer &x,
|
||||
const std::vector<RealType> &g,
|
||||
const std::vector<RealType> &p, RealType f_x) const {
|
||||
template<class Objective,
|
||||
class ObjectiveEvalPolicy,
|
||||
class GradientEvalPolicy,
|
||||
class ArgumentContainer>
|
||||
RealType operator()(Objective& objective,
|
||||
ObjectiveEvalPolicy& obj_eval,
|
||||
GradientEvalPolicy& grad_eval,
|
||||
ArgumentContainer& x,
|
||||
const std::vector<RealType>& g,
|
||||
const std::vector<RealType>& p,
|
||||
RealType f_x) const
|
||||
{
|
||||
/** @brief> line search
|
||||
* */
|
||||
RealType alpha = alpha0_;
|
||||
ArgumentContainer x_trial = x; // copy
|
||||
ArgumentContainer x_trial; // = x; // copy
|
||||
const RealType gTp = dot(g, p);
|
||||
|
||||
for (int iter = 0; iter < max_iter_; ++iter) {
|
||||
for (size_t i = 0; i < x.size(); ++i)
|
||||
x_trial[i] = x[i] + alpha * p[i];
|
||||
x_trial = x;
|
||||
axpy(alpha, p, x_trial);
|
||||
auto f_trial = obj_eval(objective, x_trial);
|
||||
if (f_trial <=
|
||||
f_x + c_ * alpha * gTp) // check if armijo condition is satisfied
|
||||
return alpha;
|
||||
alpha *= rho_; // half by default
|
||||
alpha *= rho_;
|
||||
}
|
||||
return alpha;
|
||||
}
|
||||
};
|
||||
|
||||
/** @brief> Strong-Wolfe line search:
|
||||
* Jorge Nocedal and Stephen J. Wright,
|
||||
* Numerical Optimization, 2nd Edition,
|
||||
* Springer, 2006.
|
||||
*
|
||||
* Algorithm 3.5 — Line Search Algorithm (Strong Wolfe Conditions)
|
||||
* Pages 60–61
|
||||
*/
|
||||
template<typename RealType>
|
||||
class strong_wolfe_line_search_policy
|
||||
{
|
||||
private:
|
||||
RealType alpha0_; // initial step size
|
||||
RealType c1_; // Armijo constant (sufficient decrease)
|
||||
RealType c2_; // curvature constant
|
||||
RealType rho_; // backtracking factor
|
||||
int max_iter_; // maximum iterations
|
||||
public:
|
||||
strong_wolfe_line_search_policy(RealType alpha0 = 1.0,
|
||||
RealType c1 = 1e-4,
|
||||
RealType c2 = 0.9,
|
||||
RealType rho = 2.0,
|
||||
int max_iter = 20)
|
||||
: alpha0_(alpha0)
|
||||
, c1_(c1)
|
||||
, c2_(c2)
|
||||
, rho_(rho)
|
||||
, max_iter_(max_iter)
|
||||
{
|
||||
}
|
||||
template<class Objective,
|
||||
class ObjectiveEvalPolicy,
|
||||
class GradientEvalPolicy,
|
||||
class ArgumentContainer>
|
||||
RealType operator()(Objective& objective,
|
||||
ObjectiveEvalPolicy& obj_eval,
|
||||
GradientEvalPolicy& grad_eval,
|
||||
ArgumentContainer& x,
|
||||
const std::vector<RealType>& g,
|
||||
const std::vector<RealType>& p,
|
||||
RealType f_x) const
|
||||
{
|
||||
RealType gTp0 = dot(g, p);
|
||||
RealType alpha_prev = 0;
|
||||
RealType f_prev = f_x;
|
||||
RealType alpha = alpha0_;
|
||||
|
||||
ArgumentContainer x_trial;
|
||||
std::vector<RealType> g_trial(g.size());
|
||||
for (int i = 0; i < max_iter_; ++i) {
|
||||
x_trial = x; // explicit copy
|
||||
axpy(alpha, p, x_trial);
|
||||
RealType f_trial = static_cast<RealType>(obj_eval(objective, x_trial));
|
||||
if ((f_trial > f_x + c1_ * alpha * gTp0) ||
|
||||
(i > 0 && f_trial >= f_prev)) {
|
||||
return zoom(
|
||||
objective, obj_eval, grad_eval, x, p, f_x, gTp0, alpha_prev, alpha);
|
||||
}
|
||||
grad_eval(objective, x_trial, obj_eval, f_trial, g_trial);
|
||||
RealType gTp = dot(g_trial, p);
|
||||
|
||||
if (fabs(gTp) <= c2_ * fabs(gTp0)) {
|
||||
return alpha;
|
||||
}
|
||||
|
||||
if (gTp >= 0) {
|
||||
return zoom(
|
||||
objective, obj_eval, grad_eval, x, p, f_x, gTp0, alpha, alpha_prev);
|
||||
}
|
||||
alpha_prev = alpha;
|
||||
f_prev = f_trial;
|
||||
alpha *= rho_;
|
||||
}
|
||||
|
||||
return alpha;
|
||||
}
|
||||
|
||||
private:
|
||||
template<class Objective,
|
||||
class ObjectiveEvalPolicy,
|
||||
class GradientEvalPolicy,
|
||||
class ArgumentContainer>
|
||||
RealType zoom(Objective& objective,
|
||||
ObjectiveEvalPolicy& obj_eval,
|
||||
GradientEvalPolicy& grad_eval,
|
||||
ArgumentContainer& x,
|
||||
const std::vector<RealType>& p,
|
||||
RealType f_x,
|
||||
RealType gTp0,
|
||||
RealType alpha_lo,
|
||||
RealType alpha_hi) const
|
||||
{
|
||||
ArgumentContainer x_trial;
|
||||
std::vector<RealType> g_trial(p.size());
|
||||
|
||||
for (int iter = 0; iter < max_iter_; ++iter) {
|
||||
const RealType alpha_mid = 0.5 * (alpha_lo + alpha_hi);
|
||||
x_trial = x;
|
||||
axpy(alpha_mid, p, x_trial);
|
||||
RealType f_mid;
|
||||
grad_eval(objective, x_trial, obj_eval, f_mid, g_trial);
|
||||
RealType gTp_mid = dot(g_trial, p);
|
||||
ArgumentContainer x_lo = x;
|
||||
axpy(alpha_lo, p, x_lo);
|
||||
RealType f_lo = static_cast<RealType>(obj_eval(objective, x_lo));
|
||||
if ((f_mid > f_x + c1_ * alpha_mid * gTp0) || (f_mid >= f_lo)) {
|
||||
alpha_hi = alpha_mid;
|
||||
} else {
|
||||
if (fabs(gTp_mid) <= c2_ * fabs(gTp0)) {
|
||||
return alpha_mid;
|
||||
}
|
||||
if (gTp_mid * (alpha_hi - alpha_lo) >= 0) {
|
||||
alpha_hi = alpha_lo;
|
||||
}
|
||||
alpha_lo = alpha_mid;
|
||||
}
|
||||
}
|
||||
|
||||
return 0.5 * (alpha_lo + alpha_hi);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace optimization
|
||||
} // namespace math
|
||||
} // namespace boost
|
||||
|
||||
@@ -1,3 +1,7 @@
|
||||
// Copyright Maksym Zhelyenzyakov 2025-2026.
|
||||
// Distributed under the Boost Software License, Version 1.0.
|
||||
// (See accompanying file LICENSE_1_0.txt or copy at
|
||||
// https://www.boost.org/LICENSE_1_0.txt)
|
||||
#ifndef RDIFF_OPTIMIZATION_POLICIES_HPP__
|
||||
#define RDIFF_OPTIMIZATION_POLICIES_HPP__
|
||||
|
||||
@@ -11,24 +15,29 @@ namespace optimization {
|
||||
|
||||
namespace rdiff = boost::math::differentiation::reverse_mode;
|
||||
|
||||
/******************************************************************
|
||||
/******************************************************************/
|
||||
/**
|
||||
* @brief> function evaluation policy for reverse mode autodiff
|
||||
* @arg objective> objective function to evaluate
|
||||
* @arg x> argument list
|
||||
*/
|
||||
template <typename RealType>
|
||||
struct reverse_mode_function_eval_policy {
|
||||
template <typename Objective, class ArgumentContainer>
|
||||
rdiff::rvar<RealType, 1> operator()(Objective &&objective,
|
||||
ArgumentContainer &x) {
|
||||
auto &tape = rdiff::get_active_tape<RealType, 1>();
|
||||
template<typename RealType>
|
||||
struct reverse_mode_function_eval_policy
|
||||
{
|
||||
template<typename Objective, class ArgumentContainer>
|
||||
rdiff::rvar<RealType, 1> operator()(Objective&& objective,
|
||||
ArgumentContainer& x)
|
||||
{
|
||||
auto& tape = rdiff::get_active_tape<RealType, 1>();
|
||||
tape.zero_grad();
|
||||
tape.rewind_to_last_checkpoint();
|
||||
|
||||
return objective(x);
|
||||
}
|
||||
};
|
||||
/******************************************************************
|
||||
/******************************************************************/
|
||||
|
||||
/**
|
||||
* @brief> gradient evaluation policy
|
||||
* @arg obj_f> objective
|
||||
* @arg x> argument list
|
||||
@@ -36,13 +45,18 @@ struct reverse_mode_function_eval_policy {
|
||||
* done in tandem
|
||||
* @arg obj_v> reference to variable inside gradient class
|
||||
*/
|
||||
template <typename RealType>
|
||||
struct reverse_mode_gradient_evaluation_policy {
|
||||
template <class Objective, class ArgumentContainer,
|
||||
class FunctionEvaluationPolicy>
|
||||
void operator()(Objective &&obj_f, ArgumentContainer &x,
|
||||
FunctionEvaluationPolicy &&f_eval_pol, RealType &obj_v,
|
||||
std::vector<RealType> &g) {
|
||||
template<typename RealType>
|
||||
struct reverse_mode_gradient_evaluation_policy
|
||||
{
|
||||
template<class Objective,
|
||||
class ArgumentContainer,
|
||||
class FunctionEvaluationPolicy>
|
||||
void operator()(Objective&& obj_f,
|
||||
ArgumentContainer& x,
|
||||
FunctionEvaluationPolicy&& f_eval_pol,
|
||||
RealType& obj_v,
|
||||
std::vector<RealType>& g)
|
||||
{
|
||||
// compute objective via eval policy that takes care of tape
|
||||
rdiff::rvar<RealType, 1> v = f_eval_pol(obj_f, x);
|
||||
v.backward();
|
||||
@@ -58,54 +72,64 @@ struct reverse_mode_gradient_evaluation_policy {
|
||||
/******************************************************************
|
||||
* init policies
|
||||
*/
|
||||
template <typename RealType>
|
||||
struct tape_initializer_rvar {
|
||||
template <class ArgumentContainer>
|
||||
void operator()(ArgumentContainer &) const noexcept {
|
||||
template<typename RealType>
|
||||
struct tape_initializer_rvar
|
||||
{
|
||||
template<class ArgumentContainer>
|
||||
void operator()(ArgumentContainer&) const noexcept
|
||||
{
|
||||
static_assert(
|
||||
std::is_same<typename ArgumentContainer::value_type,
|
||||
rdiff::rvar<RealType, 1>>::value,
|
||||
"ArgumentContainer::value_type must be rdiff::rvar<RealType,1>");
|
||||
auto &tape = rdiff::get_active_tape<RealType, 1>();
|
||||
std::is_same<typename ArgumentContainer::value_type,
|
||||
rdiff::rvar<RealType, 1>>::value,
|
||||
"ArgumentContainer::value_type must be rdiff::rvar<RealType,1>");
|
||||
auto& tape = rdiff::get_active_tape<RealType, 1>();
|
||||
tape.add_checkpoint();
|
||||
}
|
||||
};
|
||||
|
||||
template <typename RealType>
|
||||
struct random_uniform_initializer_rvar {
|
||||
template<typename RealType>
|
||||
struct random_uniform_initializer_rvar
|
||||
{
|
||||
RealType low_, high_;
|
||||
size_t seed_;
|
||||
random_uniform_initializer_rvar(RealType low = 0, RealType high = 1,
|
||||
random_uniform_initializer_rvar(RealType low = 0,
|
||||
RealType high = 1,
|
||||
size_t seed = std::random_device{}())
|
||||
: low_(low), high_(high), seed_(seed) {};
|
||||
: low_(low)
|
||||
, high_(high)
|
||||
, seed_(seed) {};
|
||||
|
||||
template <class ArgumentContainer>
|
||||
void operator()(ArgumentContainer &x) const {
|
||||
template<class ArgumentContainer>
|
||||
void operator()(ArgumentContainer& x) const
|
||||
{
|
||||
boost::random::mt19937 gen(seed_);
|
||||
boost::random::uniform_real_distribution<RealType> dist(low_, high_);
|
||||
for (auto &xi : x) {
|
||||
for (auto& xi : x) {
|
||||
xi = rdiff::rvar<RealType, 1>(dist(gen));
|
||||
}
|
||||
auto &tape = rdiff::get_active_tape<RealType, 1>();
|
||||
auto& tape = rdiff::get_active_tape<RealType, 1>();
|
||||
tape.add_checkpoint();
|
||||
}
|
||||
};
|
||||
|
||||
template <typename RealType>
|
||||
struct costant_initializer_rvar {
|
||||
template<typename RealType>
|
||||
struct costant_initializer_rvar
|
||||
{
|
||||
RealType constant;
|
||||
explicit costant_initializer_rvar(RealType v = 0) : constant(v) {};
|
||||
template <class ArgumentContainer>
|
||||
void operator()(ArgumentContainer &x) const {
|
||||
for (auto &xi : x) {
|
||||
explicit costant_initializer_rvar(RealType v = 0)
|
||||
: constant(v) {};
|
||||
template<class ArgumentContainer>
|
||||
void operator()(ArgumentContainer& x) const
|
||||
{
|
||||
for (auto& xi : x) {
|
||||
xi = rdiff::rvar<RealType, 1>(constant);
|
||||
}
|
||||
auto &tape = rdiff::get_active_tape<RealType, 1>();
|
||||
auto& tape = rdiff::get_active_tape<RealType, 1>();
|
||||
tape.add_checkpoint();
|
||||
}
|
||||
};
|
||||
} // namespace optimization
|
||||
} // namespace math
|
||||
} // namespace boost
|
||||
} // namespace optimization
|
||||
} // namespace math
|
||||
} // namespace boost
|
||||
|
||||
#endif
|
||||
|
||||
@@ -1,3 +1,7 @@
|
||||
// Copyright Maksym Zhelyenzyakov 2025-2026.
|
||||
// Distributed under the Boost Software License, Version 1.0.
|
||||
// (See accompanying file LICENSE_1_0.txt or copy at
|
||||
// https://www.boost.org/LICENSE_1_0.txt)
|
||||
#ifndef GRADIENT_DESCENT_HPP
|
||||
#define GRADIENT_DESCENT_HPP
|
||||
#include <boost/math/optimization/detail/differentiable_opt_utilties.hpp>
|
||||
@@ -9,27 +13,31 @@ namespace boost {
|
||||
namespace math {
|
||||
namespace optimization {
|
||||
|
||||
template <typename RealType> struct gradient_descent_update_policy {
|
||||
template<typename RealType>
|
||||
struct gradient_descent_update_policy
|
||||
{
|
||||
RealType lr_;
|
||||
gradient_descent_update_policy(RealType lr) : lr_(lr) {};
|
||||
gradient_descent_update_policy(RealType lr)
|
||||
: lr_(lr) {};
|
||||
|
||||
template <typename ArgumentType,
|
||||
typename = typename std::enable_if<
|
||||
boost::math::differentiation::reverse_mode::detail::
|
||||
is_expression<ArgumentType>::value>::type>
|
||||
void operator()(ArgumentType &x, RealType &g) {
|
||||
template<typename ArgumentType,
|
||||
typename = typename std::enable_if<
|
||||
boost::math::differentiation::reverse_mode::detail::is_expression<
|
||||
ArgumentType>::value>::type>
|
||||
void operator()(ArgumentType& x, RealType& g)
|
||||
{
|
||||
// this update effectively "mutes" the tape
|
||||
// TODO: add a tape scope guard method so that
|
||||
// you can do math on autodiff types without
|
||||
// accumulating gradients
|
||||
x.get_value() -= lr_ * g;
|
||||
}
|
||||
template <
|
||||
typename ArgumentType,
|
||||
typename std::enable_if<!boost::math::differentiation::reverse_mode::
|
||||
detail::is_expression<ArgumentType>::value,
|
||||
int>::type = 0>
|
||||
void operator()(ArgumentType &x, RealType &g) const {
|
||||
template<typename ArgumentType,
|
||||
typename std::enable_if<!boost::math::differentiation::reverse_mode::
|
||||
detail::is_expression<ArgumentType>::value,
|
||||
int>::type = 0>
|
||||
void operator()(ArgumentType& x, RealType& g) const
|
||||
{
|
||||
x -= lr_ * g;
|
||||
}
|
||||
};
|
||||
@@ -43,34 +51,49 @@ template <typename RealType> struct gradient_descent_update_policy {
|
||||
* general optimization framework.
|
||||
*
|
||||
* @tparam> ArgumentContainer Type of the parameter container (e.g.
|
||||
* std::vector<SomeDifferentiableType or RealType>).
|
||||
* std::vector<SomeDifferentiableType or RealType>).
|
||||
* @tparam> RealType Floating-point type
|
||||
* @tparam> Objective Objective function type (functor or callable).
|
||||
* @tparam> InitializationPolicy Policy controlling initialization of
|
||||
* differentiable variables.
|
||||
* differentiable variables.
|
||||
* @tparam> ObjectiveEvalPolicy Policy defining how the objective is evaluated.
|
||||
* @tparam> GradEvalPolicy Policy defining how the gradient is computed.
|
||||
*/
|
||||
|
||||
template <typename ArgumentContainer, typename RealType, class Objective,
|
||||
class InitializationPolicy, class ObjectiveEvalPolicy,
|
||||
class GradEvalPolicy>
|
||||
template<typename ArgumentContainer,
|
||||
typename RealType,
|
||||
class Objective,
|
||||
class InitializationPolicy,
|
||||
class ObjectiveEvalPolicy,
|
||||
class GradEvalPolicy>
|
||||
class gradient_descent
|
||||
: public abstract_optimizer<
|
||||
ArgumentContainer, RealType, Objective, InitializationPolicy,
|
||||
ObjectiveEvalPolicy, GradEvalPolicy,
|
||||
gradient_descent_update_policy<RealType>,
|
||||
gradient_descent<ArgumentContainer, RealType, Objective,
|
||||
InitializationPolicy, ObjectiveEvalPolicy,
|
||||
GradEvalPolicy>> {
|
||||
using base_opt =
|
||||
abstract_optimizer<ArgumentContainer, RealType, Objective,
|
||||
InitializationPolicy, ObjectiveEvalPolicy,
|
||||
GradEvalPolicy,
|
||||
gradient_descent_update_policy<RealType>,
|
||||
gradient_descent<ArgumentContainer, RealType,
|
||||
Objective, InitializationPolicy,
|
||||
ObjectiveEvalPolicy, GradEvalPolicy>>;
|
||||
: public abstract_optimizer<ArgumentContainer,
|
||||
RealType,
|
||||
Objective,
|
||||
InitializationPolicy,
|
||||
ObjectiveEvalPolicy,
|
||||
GradEvalPolicy,
|
||||
gradient_descent_update_policy<RealType>,
|
||||
gradient_descent<ArgumentContainer,
|
||||
RealType,
|
||||
Objective,
|
||||
InitializationPolicy,
|
||||
ObjectiveEvalPolicy,
|
||||
GradEvalPolicy>>
|
||||
{
|
||||
using base_opt = abstract_optimizer<ArgumentContainer,
|
||||
RealType,
|
||||
Objective,
|
||||
InitializationPolicy,
|
||||
ObjectiveEvalPolicy,
|
||||
GradEvalPolicy,
|
||||
gradient_descent_update_policy<RealType>,
|
||||
gradient_descent<ArgumentContainer,
|
||||
RealType,
|
||||
Objective,
|
||||
InitializationPolicy,
|
||||
ObjectiveEvalPolicy,
|
||||
GradEvalPolicy>>;
|
||||
|
||||
public:
|
||||
using base_opt::base_opt;
|
||||
@@ -106,60 +129,76 @@ public:
|
||||
* custom learning rate
|
||||
*/
|
||||
|
||||
template <class Objective, typename ArgumentContainer, typename RealType>
|
||||
auto make_gradient_descent(Objective &&obj, ArgumentContainer &x,
|
||||
RealType lr = RealType{0.01}) {
|
||||
return gradient_descent<ArgumentContainer, RealType, std::decay_t<Objective>,
|
||||
template<class Objective, typename ArgumentContainer, typename RealType>
|
||||
auto
|
||||
make_gradient_descent(Objective&& obj,
|
||||
ArgumentContainer& x,
|
||||
RealType lr = RealType{ 0.01 })
|
||||
{
|
||||
return gradient_descent<ArgumentContainer,
|
||||
RealType,
|
||||
std::decay_t<Objective>,
|
||||
tape_initializer_rvar<RealType>,
|
||||
reverse_mode_function_eval_policy<RealType>,
|
||||
reverse_mode_gradient_evaluation_policy<RealType>>(
|
||||
std::forward<Objective>(obj), x, tape_initializer_rvar<RealType>{},
|
||||
reverse_mode_function_eval_policy<RealType>{},
|
||||
reverse_mode_gradient_evaluation_policy<RealType>{},
|
||||
gradient_descent_update_policy<RealType>(lr));
|
||||
std::forward<Objective>(obj),
|
||||
x,
|
||||
tape_initializer_rvar<RealType>{},
|
||||
reverse_mode_function_eval_policy<RealType>{},
|
||||
reverse_mode_gradient_evaluation_policy<RealType>{},
|
||||
gradient_descent_update_policy<RealType>(lr));
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief> convenience factory
|
||||
*
|
||||
* make_gradient_descent(objective, x, learning rate, initialization policy)
|
||||
*
|
||||
* possible initialization policies implemented:
|
||||
* tape_initializer_rvar
|
||||
* random_uniform_initializer_rvar
|
||||
* costant_initializer_rvar
|
||||
*
|
||||
* these are structs that initialize x via () operator
|
||||
* Default parameters:
|
||||
* reverse_mode_function_eval_policy
|
||||
* reverse_mode_gradient_evaluation_policy
|
||||
* */
|
||||
template <class Objective, typename ArgumentContainer, typename RealType,
|
||||
class InitializationPolicy>
|
||||
auto make_gradient_descent(Objective &&obj, ArgumentContainer &x, RealType lr,
|
||||
InitializationPolicy &&ip) {
|
||||
return gradient_descent<ArgumentContainer, RealType, std::decay_t<Objective>,
|
||||
template<class Objective,
|
||||
typename ArgumentContainer,
|
||||
typename RealType,
|
||||
class InitializationPolicy>
|
||||
auto
|
||||
make_gradient_descent(Objective&& obj,
|
||||
ArgumentContainer& x,
|
||||
RealType lr,
|
||||
InitializationPolicy&& ip)
|
||||
{
|
||||
return gradient_descent<ArgumentContainer,
|
||||
RealType,
|
||||
std::decay_t<Objective>,
|
||||
InitializationPolicy,
|
||||
reverse_mode_function_eval_policy<RealType>,
|
||||
reverse_mode_gradient_evaluation_policy<RealType>>(
|
||||
std::forward<Objective>(obj), x, std::forward<InitializationPolicy>(ip),
|
||||
reverse_mode_function_eval_policy<RealType>{},
|
||||
reverse_mode_gradient_evaluation_policy<RealType>{},
|
||||
gradient_descent_update_policy<RealType>(lr));
|
||||
std::forward<Objective>(obj),
|
||||
x,
|
||||
std::forward<InitializationPolicy>(ip),
|
||||
reverse_mode_function_eval_policy<RealType>{},
|
||||
reverse_mode_gradient_evaluation_policy<RealType>{},
|
||||
gradient_descent_update_policy<RealType>(lr));
|
||||
}
|
||||
|
||||
template <typename ArgumentContainer, typename RealType, class Objective,
|
||||
class InitializationPolicy, class ObjectiveEvalPolicy,
|
||||
class GradEvalPolicy>
|
||||
auto make_gradient_descent(Objective &&obj, ArgumentContainer &x, RealType &lr,
|
||||
InitializationPolicy &&ip, ObjectiveEvalPolicy &&oep,
|
||||
GradEvalPolicy &&gep) {
|
||||
return gradient_descent<ArgumentContainer, RealType, std::decay_t<Objective>,
|
||||
InitializationPolicy, ObjectiveEvalPolicy,
|
||||
template<typename ArgumentContainer,
|
||||
typename RealType,
|
||||
class Objective,
|
||||
class InitializationPolicy,
|
||||
class ObjectiveEvalPolicy,
|
||||
class GradEvalPolicy>
|
||||
auto
|
||||
make_gradient_descent(Objective&& obj,
|
||||
ArgumentContainer& x,
|
||||
RealType& lr,
|
||||
InitializationPolicy&& ip,
|
||||
ObjectiveEvalPolicy&& oep,
|
||||
GradEvalPolicy&& gep)
|
||||
{
|
||||
return gradient_descent<ArgumentContainer,
|
||||
RealType,
|
||||
std::decay_t<Objective>,
|
||||
InitializationPolicy,
|
||||
ObjectiveEvalPolicy,
|
||||
GradEvalPolicy>(
|
||||
std::forward<Objective>(obj), x, std::forward<InitializationPolicy>(ip),
|
||||
std::forward<ObjectiveEvalPolicy>(oep), std::forward<GradEvalPolicy>(gep),
|
||||
gradient_descent_update_policy<RealType>{lr});
|
||||
std::forward<Objective>(obj),
|
||||
x,
|
||||
std::forward<InitializationPolicy>(ip),
|
||||
std::forward<ObjectiveEvalPolicy>(oep),
|
||||
std::forward<GradEvalPolicy>(gep),
|
||||
gradient_descent_update_policy<RealType>{ lr });
|
||||
}
|
||||
|
||||
} // namespace optimization
|
||||
|
||||
@@ -1,3 +1,7 @@
|
||||
// Copyright Maksym Zhelyenzyakov 2025-2026.
|
||||
// Distributed under the Boost Software License, Version 1.0.
|
||||
// (See accompanying file LICENSE_1_0.txt or copy at
|
||||
// https://www.boost.org/LICENSE_1_0.txt)
|
||||
#ifndef GRADIENT_OPTIMIZERS_HPP
|
||||
#define GRADIENT_OPTIMIZERS_HPP
|
||||
#include <boost/math/differentiation/autodiff_reverse.hpp>
|
||||
@@ -7,7 +11,8 @@
|
||||
|
||||
namespace boost {
|
||||
namespace math {
|
||||
namespace optimization {} // namespace optimization
|
||||
namespace optimization {
|
||||
} // namespace optimization
|
||||
} // namespace math
|
||||
} // namespace boost
|
||||
#endif
|
||||
|
||||
@@ -1,5 +1,9 @@
|
||||
#ifndef LBFGS_HPP__
|
||||
#define LBFGS_HPP__
|
||||
// Copyright Maksym Zhelyenzyakov 2025-2026.
|
||||
// Distributed under the Boost Software License, Version 1.0.
|
||||
// (See accompanying file LICENSE_1_0.txt or copy at
|
||||
// https://www.boost.org/LICENSE_1_0.txt)
|
||||
#ifndef LBFGS_HPP
|
||||
#define LBFGS_HPP
|
||||
#include <boost/math/optimization/detail/differentiable_opt_utilties.hpp>
|
||||
#include <boost/math/optimization/detail/gradient_opt_base.hpp>
|
||||
#include <boost/math/optimization/detail/rdiff_optimization_policies.hpp>
|
||||
@@ -24,8 +28,17 @@ namespace rdiff = boost::math::differentiation::reverse_mode;
|
||||
* @param -> f_prev - > previous function value
|
||||
*
|
||||
* https://en.wikipedia.org/wiki/Limited-memory_BFGS
|
||||
*
|
||||
* Jorge Nocedal and Stephen J. Wright,
|
||||
* Numerical Optimization, 2nd Edition,
|
||||
* Springer, 2006.
|
||||
*
|
||||
* pages 176-180
|
||||
* algorithms 7.4/7.5
|
||||
* */
|
||||
template <typename RealType> struct lbfgs_optimizer_state {
|
||||
template<typename RealType>
|
||||
struct lbfgs_optimizer_state
|
||||
{
|
||||
size_t m = 10; // default history length
|
||||
std::deque<std::vector<RealType>> S, Y;
|
||||
std::deque<RealType> rho;
|
||||
@@ -33,18 +46,23 @@ template <typename RealType> struct lbfgs_optimizer_state {
|
||||
RealType f_prev = std::numeric_limits<RealType>::quiet_NaN();
|
||||
const RealType EPS = std::numeric_limits<RealType>::epsilon();
|
||||
|
||||
template <typename ArgumentContainer>
|
||||
void update_state(ArgumentContainer &x, std::vector<RealType> &g_k,
|
||||
RealType fk) {
|
||||
template<typename ArgumentContainer>
|
||||
void update_state(ArgumentContainer& x,
|
||||
std::vector<RealType>& g_k,
|
||||
RealType fk)
|
||||
{
|
||||
|
||||
// iteration 0
|
||||
if (g_prev.empty()) {
|
||||
g_prev.assign(g_k.begin(), g_k.end());
|
||||
x_prev.resize(x.size());
|
||||
std::transform(x.begin(), x.end(), x_prev.begin(),
|
||||
[](const auto &xi) { return static_cast<RealType>(xi); });
|
||||
std::transform(x.begin(), x.end(), x_prev.begin(), [](const auto& xi) {
|
||||
return static_cast<RealType>(xi);
|
||||
});
|
||||
f_prev = fk;
|
||||
return;
|
||||
}
|
||||
|
||||
std::vector<RealType> s_k(x.size()), y_k(g_k.size());
|
||||
for (size_t i = 0; i < x.size(); ++i) {
|
||||
s_k[i] = static_cast<RealType>(x[i]) - x_prev[i];
|
||||
@@ -56,8 +74,8 @@ template <typename RealType> struct lbfgs_optimizer_state {
|
||||
RealType yn = sqrt(dot(y_k, y_k));
|
||||
|
||||
const RealType threshold = EPS * sn * yn;
|
||||
if (ys > threshold && ys > RealType(0)) {
|
||||
if (S.size() == m) {
|
||||
if (ys > threshold && ys > RealType(0)) { // check if curvature if non-zero
|
||||
if (S.size() == m) { // iteration > m
|
||||
S.pop_front();
|
||||
Y.pop_front();
|
||||
rho.pop_front();
|
||||
@@ -68,8 +86,10 @@ template <typename RealType> struct lbfgs_optimizer_state {
|
||||
}
|
||||
|
||||
g_prev.assign(g_k.begin(), g_k.end());
|
||||
std::transform(x.begin(), x.end(), x_prev.begin(),
|
||||
[](const auto &xi) { return static_cast<RealType>(xi); });
|
||||
// safely cast to realtype
|
||||
std::transform(x.begin(), x.end(), x_prev.begin(), [](const auto& xi) {
|
||||
return static_cast<RealType>(xi);
|
||||
});
|
||||
f_prev = fk;
|
||||
}
|
||||
};
|
||||
@@ -77,20 +97,23 @@ template <typename RealType> struct lbfgs_optimizer_state {
|
||||
/** @brief> helper update for l-bfgs
|
||||
* x += alpha * search direction
|
||||
* */
|
||||
template <typename RealType> struct lbfgs_update_policy {
|
||||
template <typename ArgumentType,
|
||||
typename = typename std::enable_if<
|
||||
boost::math::differentiation::reverse_mode::detail::
|
||||
is_expression<ArgumentType>::value>::type>
|
||||
void operator()(ArgumentType &x, RealType pk, RealType alpha) {
|
||||
template<typename RealType>
|
||||
struct lbfgs_update_policy
|
||||
{
|
||||
template<typename ArgumentType,
|
||||
typename = typename std::enable_if<
|
||||
boost::math::differentiation::reverse_mode::detail::is_expression<
|
||||
ArgumentType>::value>::type>
|
||||
void operator()(ArgumentType& x, RealType pk, RealType alpha)
|
||||
{
|
||||
x.get_value() += alpha * pk;
|
||||
}
|
||||
template <
|
||||
typename ArgumentType,
|
||||
typename std::enable_if<!boost::math::differentiation::reverse_mode::
|
||||
detail::is_expression<ArgumentType>::value,
|
||||
int>::type = 0>
|
||||
void operator()(ArgumentType &x, RealType pk, RealType alpha) {
|
||||
template<typename ArgumentType,
|
||||
typename std::enable_if<!boost::math::differentiation::reverse_mode::
|
||||
detail::is_expression<ArgumentType>::value,
|
||||
int>::type = 0>
|
||||
void operator()(ArgumentType& x, RealType pk, RealType alpha)
|
||||
{
|
||||
x += alpha * pk;
|
||||
}
|
||||
};
|
||||
@@ -116,32 +139,57 @@ template <typename RealType> struct lbfgs_update_policy {
|
||||
* https://en.wikipedia.org/wiki/Limited-memory_BFGS
|
||||
*/
|
||||
|
||||
template <typename ArgumentContainer, typename RealType, class Objective,
|
||||
class InitializationPolicy, class ObjectiveEvalPolicy,
|
||||
class GradEvalPolicy, class LineSearchPolicy>
|
||||
template<typename ArgumentContainer,
|
||||
typename RealType,
|
||||
class Objective,
|
||||
class InitializationPolicy,
|
||||
class ObjectiveEvalPolicy,
|
||||
class GradEvalPolicy,
|
||||
class LineSearchPolicy>
|
||||
class lbfgs
|
||||
: public abstract_optimizer<
|
||||
ArgumentContainer, RealType, Objective, InitializationPolicy,
|
||||
ObjectiveEvalPolicy, GradEvalPolicy, lbfgs_update_policy<RealType>,
|
||||
lbfgs<ArgumentContainer, RealType, Objective, InitializationPolicy,
|
||||
ObjectiveEvalPolicy, GradEvalPolicy, LineSearchPolicy>> {
|
||||
using base_opt = abstract_optimizer<
|
||||
ArgumentContainer, RealType, Objective, InitializationPolicy,
|
||||
ObjectiveEvalPolicy, GradEvalPolicy, lbfgs_update_policy<RealType>,
|
||||
lbfgs<ArgumentContainer, RealType, Objective, InitializationPolicy,
|
||||
ObjectiveEvalPolicy, GradEvalPolicy, LineSearchPolicy>>;
|
||||
: public abstract_optimizer<ArgumentContainer,
|
||||
RealType,
|
||||
Objective,
|
||||
InitializationPolicy,
|
||||
ObjectiveEvalPolicy,
|
||||
GradEvalPolicy,
|
||||
lbfgs_update_policy<RealType>,
|
||||
lbfgs<ArgumentContainer,
|
||||
RealType,
|
||||
Objective,
|
||||
InitializationPolicy,
|
||||
ObjectiveEvalPolicy,
|
||||
GradEvalPolicy,
|
||||
LineSearchPolicy>>
|
||||
{
|
||||
using base_opt = abstract_optimizer<ArgumentContainer,
|
||||
RealType,
|
||||
Objective,
|
||||
InitializationPolicy,
|
||||
ObjectiveEvalPolicy,
|
||||
GradEvalPolicy,
|
||||
lbfgs_update_policy<RealType>,
|
||||
lbfgs<ArgumentContainer,
|
||||
RealType,
|
||||
Objective,
|
||||
InitializationPolicy,
|
||||
ObjectiveEvalPolicy,
|
||||
GradEvalPolicy,
|
||||
LineSearchPolicy>>;
|
||||
|
||||
const RealType EPS = std::numeric_limits<RealType>::epsilon();
|
||||
lbfgs_optimizer_state<RealType> state_;
|
||||
LineSearchPolicy line_search_;
|
||||
|
||||
std::vector<RealType> compute_direction(const std::vector<RealType> &gk) {
|
||||
std::vector<RealType> compute_direction(const std::vector<RealType>& gk)
|
||||
{
|
||||
const size_t n = gk.size();
|
||||
const size_t L = state_.S.size(); // since S changes when iter < m
|
||||
|
||||
if (L == 0) {
|
||||
std::vector<RealType> p(n);
|
||||
std::transform(gk.begin(), gk.end(), p.begin(),
|
||||
[](RealType gi) { return -gi; });
|
||||
std::transform(
|
||||
gk.begin(), gk.end(), p.begin(), [](RealType gi) { return -gi; });
|
||||
return p;
|
||||
}
|
||||
|
||||
@@ -166,22 +214,28 @@ class lbfgs
|
||||
const RealType beta = state_.rho[i] * yTr;
|
||||
axpy(alpha[i] - beta, state_.S[i], r);
|
||||
}
|
||||
scale(r, RealType{-1});
|
||||
scale(r, RealType{ -1 });
|
||||
return r;
|
||||
}
|
||||
|
||||
public:
|
||||
using base_opt::base_opt;
|
||||
lbfgs(Objective &&objective, ArgumentContainer &x, size_t m,
|
||||
InitializationPolicy &&ip, ObjectiveEvalPolicy &&oep,
|
||||
GradEvalPolicy &&gep, lbfgs_update_policy<RealType> &&up,
|
||||
LineSearchPolicy &&lsp)
|
||||
: base_opt(std::forward<Objective>(objective), x,
|
||||
std::forward<InitializationPolicy>(ip),
|
||||
std::forward<ObjectiveEvalPolicy>(oep),
|
||||
std::forward<GradEvalPolicy>(gep),
|
||||
std::forward<lbfgs_update_policy<RealType>>(up)),
|
||||
line_search_(lsp) {
|
||||
lbfgs(Objective&& objective,
|
||||
ArgumentContainer& x,
|
||||
size_t m,
|
||||
InitializationPolicy&& ip,
|
||||
ObjectiveEvalPolicy&& oep,
|
||||
GradEvalPolicy&& gep,
|
||||
lbfgs_update_policy<RealType>&& up,
|
||||
LineSearchPolicy&& lsp)
|
||||
: base_opt(std::forward<Objective>(objective),
|
||||
x,
|
||||
std::forward<InitializationPolicy>(ip),
|
||||
std::forward<ObjectiveEvalPolicy>(oep),
|
||||
std::forward<GradEvalPolicy>(gep),
|
||||
std::forward<lbfgs_update_policy<RealType>>(up))
|
||||
, line_search_(lsp)
|
||||
{
|
||||
state_.m = m;
|
||||
|
||||
state_.S.clear();
|
||||
@@ -191,14 +245,15 @@ public:
|
||||
state_.f_prev = std::numeric_limits<RealType>::quiet_NaN();
|
||||
}
|
||||
|
||||
void step() {
|
||||
auto &x = this->arguments();
|
||||
auto &g = this->gradients();
|
||||
auto &obj = this->objective_value();
|
||||
auto &obj_eval = this->obj_eval_;
|
||||
auto &grad_eval = this->grad_eval_;
|
||||
auto &objective = this->objective_;
|
||||
auto &update = this->update_;
|
||||
void step()
|
||||
{
|
||||
auto& x = this->arguments();
|
||||
auto& g = this->gradients();
|
||||
auto& obj = this->objective_value();
|
||||
auto& obj_eval = this->obj_eval_;
|
||||
auto& grad_eval = this->grad_eval_;
|
||||
auto& objective = this->objective_;
|
||||
auto& update = this->update_;
|
||||
|
||||
grad_eval(objective, x, obj_eval, obj, g);
|
||||
state_.update_state(x, g, obj);
|
||||
@@ -210,17 +265,84 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
template <class Objective, typename ArgumentContainer, typename RealType>
|
||||
auto make_lbfgs(Objective &&obj, ArgumentContainer &x, std::size_t m = 10) {
|
||||
return lbfgs<ArgumentContainer, RealType, std::decay_t<Objective>,
|
||||
template<class Objective, typename ArgumentContainer>
|
||||
auto
|
||||
make_lbfgs(Objective&& obj, ArgumentContainer& x, std::size_t m = 10)
|
||||
{
|
||||
using RealType = typename argument_container_t<ArgumentContainer>::type;
|
||||
return lbfgs<ArgumentContainer,
|
||||
RealType,
|
||||
std::decay_t<Objective>,
|
||||
tape_initializer_rvar<RealType>,
|
||||
reverse_mode_function_eval_policy<RealType>,
|
||||
reverse_mode_gradient_evaluation_policy<RealType>,
|
||||
armijo_line_search_policy<RealType>>(
|
||||
std::forward<Objective>(obj), x, m, tape_initializer_rvar<RealType>{},
|
||||
reverse_mode_function_eval_policy<RealType>{},
|
||||
reverse_mode_gradient_evaluation_policy<RealType>{},
|
||||
lbfgs_update_policy<RealType>{}, armijo_line_search_policy<RealType>{});
|
||||
strong_wolfe_line_search_policy<RealType>>(
|
||||
std::forward<Objective>(obj),
|
||||
x,
|
||||
m,
|
||||
tape_initializer_rvar<RealType>{},
|
||||
reverse_mode_function_eval_policy<RealType>{},
|
||||
reverse_mode_gradient_evaluation_policy<RealType>{},
|
||||
lbfgs_update_policy<RealType>{},
|
||||
strong_wolfe_line_search_policy<RealType>{});
|
||||
}
|
||||
|
||||
template<class Objective,
|
||||
typename ArgumentContainer,
|
||||
class InitializationPolicy>
|
||||
auto
|
||||
make_lbfgs(Objective&& obj,
|
||||
ArgumentContainer& x,
|
||||
std::size_t m,
|
||||
InitializationPolicy&& ip)
|
||||
{
|
||||
using RealType = typename argument_container_t<ArgumentContainer>::type;
|
||||
|
||||
return lbfgs<ArgumentContainer,
|
||||
RealType,
|
||||
std::decay_t<Objective>,
|
||||
InitializationPolicy,
|
||||
reverse_mode_function_eval_policy<RealType>,
|
||||
reverse_mode_gradient_evaluation_policy<RealType>,
|
||||
strong_wolfe_line_search_policy<RealType>>(
|
||||
std::forward<Objective>(obj),
|
||||
x,
|
||||
m,
|
||||
std::forward<InitializationPolicy>(ip),
|
||||
reverse_mode_function_eval_policy<RealType>{},
|
||||
reverse_mode_gradient_evaluation_policy<RealType>{},
|
||||
lbfgs_update_policy<RealType>{},
|
||||
strong_wolfe_line_search_policy<RealType>{});
|
||||
}
|
||||
|
||||
template<class Objective,
|
||||
typename ArgumentContainer,
|
||||
class InitializationPolicy,
|
||||
class LineSearchPolicy>
|
||||
auto
|
||||
make_lbfgs(Objective&& obj,
|
||||
ArgumentContainer& x,
|
||||
std::size_t m,
|
||||
InitializationPolicy&& ip,
|
||||
LineSearchPolicy&& lsp)
|
||||
{
|
||||
using RealType = typename argument_container_t<ArgumentContainer>::type;
|
||||
|
||||
return lbfgs<ArgumentContainer,
|
||||
RealType,
|
||||
std::decay_t<Objective>,
|
||||
InitializationPolicy,
|
||||
reverse_mode_function_eval_policy<RealType>,
|
||||
reverse_mode_gradient_evaluation_policy<RealType>,
|
||||
LineSearchPolicy>(
|
||||
std::forward<Objective>(obj),
|
||||
x,
|
||||
m,
|
||||
std::forward<InitializationPolicy>(ip),
|
||||
reverse_mode_function_eval_policy<RealType>{},
|
||||
reverse_mode_gradient_evaluation_policy<RealType>{},
|
||||
lbfgs_update_policy<RealType>{},
|
||||
std::forward<LineSearchPolicy>(lsp));
|
||||
}
|
||||
} // namespace optimization
|
||||
} // namespace math
|
||||
|
||||
@@ -1,3 +1,7 @@
|
||||
// Copyright Maksym Zhelyenzyakov 2025-2026.
|
||||
// Distributed under the Boost Software License, Version 1.0.
|
||||
// (See accompanying file LICENSE_1_0.txt or copy at
|
||||
// https://www.boost.org/LICENSE_1_0.txt)
|
||||
#ifndef MINIMIZER_HPP
|
||||
#define MINIMIZER_HPP
|
||||
#include <boost/math/optimization/detail/differentiable_opt_utilties.hpp>
|
||||
@@ -6,16 +10,19 @@
|
||||
namespace boost {
|
||||
namespace math {
|
||||
namespace optimization {
|
||||
template <typename RealType> struct optimization_result {
|
||||
template<typename RealType>
|
||||
struct optimization_result
|
||||
{
|
||||
size_t num_iter = 0;
|
||||
RealType objective_value;
|
||||
std::vector<RealType> objective_history;
|
||||
bool converged;
|
||||
};
|
||||
|
||||
template <typename RealType>
|
||||
std::ostream &operator<<(std::ostream &os,
|
||||
const optimization_result<RealType> &r) {
|
||||
template<typename RealType>
|
||||
std::ostream&
|
||||
operator<<(std::ostream& os, const optimization_result<RealType>& r)
|
||||
{
|
||||
os << "optimization_result {\n"
|
||||
<< " num_iter = " << r.num_iter << "\n"
|
||||
<< " objective_value = " << r.objective_value << "\n"
|
||||
@@ -32,26 +39,39 @@ std::ostream &operator<<(std::ostream &os,
|
||||
return os;
|
||||
}
|
||||
/*****************************************************************************************/
|
||||
template <typename RealType> struct gradient_norm_convergence_policy {
|
||||
template<typename RealType>
|
||||
struct gradient_norm_convergence_policy
|
||||
{
|
||||
RealType tol_;
|
||||
explicit gradient_norm_convergence_policy(RealType tol) : tol_(tol) {}
|
||||
explicit gradient_norm_convergence_policy(RealType tol)
|
||||
: tol_(tol)
|
||||
{
|
||||
}
|
||||
|
||||
template <class GradientContainer>
|
||||
bool operator()(const GradientContainer &g, RealType /*objective_v*/) const {
|
||||
template<class GradientContainer>
|
||||
bool operator()(const GradientContainer& g, RealType /*objective_v*/) const
|
||||
{
|
||||
return norm_2(g) < tol_;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename RealType> struct objective_tol_convergence_policy {
|
||||
template<typename RealType>
|
||||
struct objective_tol_convergence_policy
|
||||
{
|
||||
RealType tol_;
|
||||
mutable RealType last_value_;
|
||||
mutable bool first_call_;
|
||||
|
||||
explicit objective_tol_convergence_policy(RealType tol)
|
||||
: tol_(tol), last_value_(0), first_call_(true) {}
|
||||
: tol_(tol)
|
||||
, last_value_(0)
|
||||
, first_call_(true)
|
||||
{
|
||||
}
|
||||
|
||||
template <class GradientContainer>
|
||||
bool operator()(const GradientContainer &, RealType objective_v) const {
|
||||
template<class GradientContainer>
|
||||
bool operator()(const GradientContainer&, RealType objective_v) const
|
||||
{
|
||||
if (first_call_) {
|
||||
last_value_ = objective_v;
|
||||
first_call_ = false;
|
||||
@@ -63,16 +83,23 @@ template <typename RealType> struct objective_tol_convergence_policy {
|
||||
}
|
||||
};
|
||||
|
||||
template <typename RealType> struct relative_objective_tol_policy {
|
||||
template<typename RealType>
|
||||
struct relative_objective_tol_policy
|
||||
{
|
||||
RealType rel_tol_;
|
||||
mutable RealType last_value_;
|
||||
mutable bool first_call_;
|
||||
|
||||
explicit relative_objective_tol_policy(RealType rel_tol)
|
||||
: rel_tol_(rel_tol), last_value_(0), first_call_(true) {}
|
||||
: rel_tol_(rel_tol)
|
||||
, last_value_(0)
|
||||
, first_call_(true)
|
||||
{
|
||||
}
|
||||
|
||||
template <class GradientContainer>
|
||||
bool operator()(const GradientContainer &, RealType objective_v) const {
|
||||
template<class GradientContainer>
|
||||
bool operator()(const GradientContainer&, RealType objective_v) const
|
||||
{
|
||||
if (first_call_) {
|
||||
last_value_ = objective_v;
|
||||
first_call_ = false;
|
||||
@@ -85,23 +112,33 @@ template <typename RealType> struct relative_objective_tol_policy {
|
||||
}
|
||||
};
|
||||
|
||||
template <class Policy1, class Policy2> struct combined_convergence_policy {
|
||||
template<class Policy1, class Policy2>
|
||||
struct combined_convergence_policy
|
||||
{
|
||||
Policy1 p1_;
|
||||
Policy2 p2_;
|
||||
|
||||
combined_convergence_policy(Policy1 p1, Policy2 p2) : p1_(p1), p2_(p2) {}
|
||||
combined_convergence_policy(Policy1 p1, Policy2 p2)
|
||||
: p1_(p1)
|
||||
, p2_(p2)
|
||||
{
|
||||
}
|
||||
|
||||
template <class GradientContainer, class RealType>
|
||||
bool operator()(const GradientContainer &g, RealType obj) const {
|
||||
template<class GradientContainer, class RealType>
|
||||
bool operator()(const GradientContainer& g, RealType obj) const
|
||||
{
|
||||
return p1_(g, obj) || p2_(g, obj);
|
||||
}
|
||||
};
|
||||
|
||||
/*****************************************************************************************/
|
||||
struct max_iter_termination_policy {
|
||||
struct max_iter_termination_policy
|
||||
{
|
||||
size_t max_iter_;
|
||||
max_iter_termination_policy(size_t max_iter) : max_iter_(max_iter) {};
|
||||
bool operator()(size_t iter) {
|
||||
max_iter_termination_policy(size_t max_iter)
|
||||
: max_iter_(max_iter) {};
|
||||
bool operator()(size_t iter)
|
||||
{
|
||||
if (iter < max_iter_) {
|
||||
return false;
|
||||
}
|
||||
@@ -109,118 +146,156 @@ struct max_iter_termination_policy {
|
||||
}
|
||||
};
|
||||
|
||||
struct wallclock_termination_policy {
|
||||
struct wallclock_termination_policy
|
||||
{
|
||||
std::chrono::steady_clock::time_point start_;
|
||||
std::chrono::milliseconds max_time_;
|
||||
|
||||
explicit wallclock_termination_policy(std::chrono::milliseconds max_time)
|
||||
: start_(std::chrono::steady_clock::now()), max_time_(max_time) {}
|
||||
: start_(std::chrono::steady_clock::now())
|
||||
, max_time_(max_time)
|
||||
{
|
||||
}
|
||||
|
||||
bool operator()(size_t /*iter*/) const {
|
||||
bool operator()(size_t /*iter*/) const
|
||||
{
|
||||
return std::chrono::steady_clock::now() - start_ > max_time_;
|
||||
}
|
||||
};
|
||||
|
||||
/*****************************************************************************************/
|
||||
template <typename ArgumentContainer> struct unconstrained_policy {
|
||||
void operator()(ArgumentContainer &) {}
|
||||
template<typename ArgumentContainer>
|
||||
struct unconstrained_policy
|
||||
{
|
||||
void operator()(ArgumentContainer&) {}
|
||||
};
|
||||
|
||||
template <typename ArgumentContainer, typename RealType>
|
||||
struct box_constraints {
|
||||
template<typename ArgumentContainer, typename RealType>
|
||||
struct box_constraints
|
||||
{
|
||||
RealType min_, max_;
|
||||
box_constraints(RealType min, RealType max) : min_(min), max_(max) {};
|
||||
void operator()(ArgumentContainer &x) {
|
||||
for (auto &xi : x) {
|
||||
box_constraints(RealType min, RealType max)
|
||||
: min_(min)
|
||||
, max_(max) {};
|
||||
void operator()(ArgumentContainer& x)
|
||||
{
|
||||
for (auto& xi : x) {
|
||||
xi = (xi < min_) ? min_ : (max_ < xi) ? max_ : xi;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template <typename ArgumentContainer, typename RealType>
|
||||
struct nonnegativity_constraint {
|
||||
void operator()(ArgumentContainer &x) const {
|
||||
for (auto &xi : x) {
|
||||
if (xi < RealType{0})
|
||||
xi = RealType{0};
|
||||
template<typename ArgumentContainer, typename RealType>
|
||||
struct nonnegativity_constraint
|
||||
{
|
||||
void operator()(ArgumentContainer& x) const
|
||||
{
|
||||
for (auto& xi : x) {
|
||||
if (xi < RealType{ 0 })
|
||||
xi = RealType{ 0 };
|
||||
}
|
||||
}
|
||||
};
|
||||
template <typename ArgumentContainer, typename RealType>
|
||||
struct l2_ball_constraint {
|
||||
template<typename ArgumentContainer, typename RealType>
|
||||
struct l2_ball_constraint
|
||||
{
|
||||
RealType radius_;
|
||||
|
||||
explicit l2_ball_constraint(RealType radius) : radius_(radius) {}
|
||||
explicit l2_ball_constraint(RealType radius)
|
||||
: radius_(radius)
|
||||
{
|
||||
}
|
||||
|
||||
void operator()(ArgumentContainer &x) const {
|
||||
void operator()(ArgumentContainer& x) const
|
||||
{
|
||||
RealType norm2v = norm_2(x);
|
||||
if (norm2v > radius_) {
|
||||
RealType scale = radius_ / norm2v;
|
||||
for (auto &xi : x)
|
||||
for (auto& xi : x)
|
||||
xi *= scale;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template <typename ArgumentContainer, typename RealType>
|
||||
struct l1_ball_constraint {
|
||||
template<typename ArgumentContainer, typename RealType>
|
||||
struct l1_ball_constraint
|
||||
{
|
||||
RealType radius_;
|
||||
|
||||
explicit l1_ball_constraint(RealType radius) : radius_(radius) {}
|
||||
explicit l1_ball_constraint(RealType radius)
|
||||
: radius_(radius)
|
||||
{
|
||||
}
|
||||
|
||||
void operator()(ArgumentContainer &x) const {
|
||||
void operator()(ArgumentContainer& x) const
|
||||
{
|
||||
RealType norm1v = norm_1(x);
|
||||
|
||||
if (norm1v > radius_) {
|
||||
RealType scale = radius_ / norm1v;
|
||||
for (auto &xi : x)
|
||||
for (auto& xi : x)
|
||||
xi *= scale;
|
||||
}
|
||||
}
|
||||
};
|
||||
template <typename ArgumentContainer, typename RealType>
|
||||
struct simplex_constraint {
|
||||
void operator()(ArgumentContainer &x) const {
|
||||
RealType sum = RealType{0};
|
||||
for (auto &xi : x) {
|
||||
if (xi < RealType{0})
|
||||
xi = RealType{0}; // clip negatives
|
||||
template<typename ArgumentContainer, typename RealType>
|
||||
struct simplex_constraint
|
||||
{
|
||||
void operator()(ArgumentContainer& x) const
|
||||
{
|
||||
RealType sum = RealType{ 0 };
|
||||
for (auto& xi : x) {
|
||||
if (xi < RealType{ 0 })
|
||||
xi = RealType{ 0 }; // clip negatives
|
||||
sum += xi;
|
||||
}
|
||||
if (sum > RealType{0}) {
|
||||
for (auto &xi : x)
|
||||
if (sum > RealType{ 0 }) {
|
||||
for (auto& xi : x)
|
||||
xi /= sum;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template <typename ArgumentContainer> struct function_constraint {
|
||||
using func_t = void (*)(ArgumentContainer &);
|
||||
template<typename ArgumentContainer>
|
||||
struct function_constraint
|
||||
{
|
||||
using func_t = void (*)(ArgumentContainer&);
|
||||
|
||||
func_t f_;
|
||||
|
||||
explicit function_constraint(func_t f) : f_(f) {}
|
||||
explicit function_constraint(func_t f)
|
||||
: f_(f)
|
||||
{
|
||||
}
|
||||
|
||||
void operator()(ArgumentContainer &x) const { f_(x); }
|
||||
void operator()(ArgumentContainer& x) const { f_(x); }
|
||||
};
|
||||
template <typename ArgumentContainer, typename RealType>
|
||||
struct unit_sphere_constraint {
|
||||
void operator()(ArgumentContainer &x) const {
|
||||
template<typename ArgumentContainer, typename RealType>
|
||||
struct unit_sphere_constraint
|
||||
{
|
||||
void operator()(ArgumentContainer& x) const
|
||||
{
|
||||
RealType norm2v = norm_2(x);
|
||||
RealType norm = sqrt(norm2v);
|
||||
if (norm > RealType{0}) {
|
||||
for (auto &xi : x)
|
||||
if (norm > RealType{ 0 }) {
|
||||
for (auto& xi : x)
|
||||
xi /= norm;
|
||||
}
|
||||
}
|
||||
};
|
||||
/*****************************************************************************************/
|
||||
|
||||
template <class Optimizer, class ConstraintPolicy, class ConvergencePolicy,
|
||||
class TerminationPolicy>
|
||||
auto minimize_impl(Optimizer &opt, ConstraintPolicy project,
|
||||
ConvergencePolicy converged, TerminationPolicy terminate,
|
||||
bool history) {
|
||||
template<class Optimizer,
|
||||
class ConstraintPolicy,
|
||||
class ConvergencePolicy,
|
||||
class TerminationPolicy>
|
||||
auto
|
||||
minimize_impl(Optimizer& opt,
|
||||
ConstraintPolicy project,
|
||||
ConvergencePolicy converged,
|
||||
TerminationPolicy terminate,
|
||||
bool history)
|
||||
{
|
||||
optimization_result<typename Optimizer::real_type_t> result;
|
||||
size_t iter = 0;
|
||||
do {
|
||||
@@ -238,18 +313,21 @@ auto minimize_impl(Optimizer &opt, ConstraintPolicy project,
|
||||
result.converged = converged(opt.gradients(), opt.objective_value());
|
||||
return result;
|
||||
}
|
||||
template <class Optimizer,
|
||||
class ConstraintPolicy =
|
||||
unconstrained_policy<typename Optimizer::argument_container_t>,
|
||||
class ConvergencePolicy =
|
||||
gradient_norm_convergence_policy<typename Optimizer::real_type_t>,
|
||||
class TerminationPolicy = max_iter_termination_policy>
|
||||
auto minimize(Optimizer &opt, ConstraintPolicy project = ConstraintPolicy{},
|
||||
ConvergencePolicy converged =
|
||||
ConvergencePolicy{
|
||||
static_cast<typename Optimizer::real_type_t>(1e-8)},
|
||||
TerminationPolicy terminate = TerminationPolicy(10000),
|
||||
bool history = true) {
|
||||
template<class Optimizer,
|
||||
class ConstraintPolicy =
|
||||
unconstrained_policy<typename Optimizer::argument_container_t>,
|
||||
class ConvergencePolicy =
|
||||
gradient_norm_convergence_policy<typename Optimizer::real_type_t>,
|
||||
class TerminationPolicy = max_iter_termination_policy>
|
||||
auto
|
||||
minimize(Optimizer& opt,
|
||||
ConstraintPolicy project = ConstraintPolicy{},
|
||||
ConvergencePolicy converged =
|
||||
ConvergencePolicy{
|
||||
static_cast<typename Optimizer::real_type_t>(1e-8) },
|
||||
TerminationPolicy terminate = TerminationPolicy(10000),
|
||||
bool history = true)
|
||||
{
|
||||
return minimize_impl(opt, project, converged, terminate, history);
|
||||
}
|
||||
} // namespace optimization
|
||||
|
||||
@@ -1,5 +1,9 @@
|
||||
#ifndef NESTEROV_HPP__
|
||||
#define NESTEROV_HPP__
|
||||
// Copyright Maksym Zhelyenzyakov 2025-2026.
|
||||
// Distributed under the Boost Software License, Version 1.0.
|
||||
// (See accompanying file LICENSE_1_0.txt or copy at
|
||||
// https://www.boost.org/LICENSE_1_0.txt)
|
||||
#ifndef NESTEROV_HPP
|
||||
#define NESTEROV_HPP
|
||||
#include <boost/math/optimization/detail/differentiable_opt_utilties.hpp>
|
||||
#include <boost/math/optimization/detail/gradient_opt_base.hpp>
|
||||
#include <boost/math/optimization/detail/rdiff_optimization_policies.hpp>
|
||||
@@ -11,69 +15,109 @@ namespace optimization {
|
||||
|
||||
namespace rdiff = boost::math::differentiation::reverse_mode;
|
||||
|
||||
template <typename RealType> struct nesterov_update_policy {
|
||||
/**
|
||||
* @brief The nesterov_update_policy class
|
||||
*/
|
||||
template<typename RealType>
|
||||
struct nesterov_update_policy
|
||||
{
|
||||
RealType lr_, mu_;
|
||||
nesterov_update_policy(RealType lr, RealType mu) : lr_(lr), mu_(mu) {};
|
||||
nesterov_update_policy(RealType lr, RealType mu)
|
||||
: lr_(lr)
|
||||
, mu_(mu) {};
|
||||
|
||||
template <typename ArgumentType,
|
||||
typename = typename std::enable_if<
|
||||
boost::math::differentiation::reverse_mode::detail::
|
||||
is_expression<ArgumentType>::value>::type>
|
||||
void operator()(ArgumentType &x, RealType &g, RealType &v) {
|
||||
template<typename ArgumentType,
|
||||
typename = typename std::enable_if<
|
||||
boost::math::differentiation::reverse_mode::detail::is_expression<
|
||||
ArgumentType>::value>::type>
|
||||
void operator()(ArgumentType& x, RealType& g, RealType& v)
|
||||
{
|
||||
v = mu_ * v - lr_ * g;
|
||||
x.get_value() += v;
|
||||
}
|
||||
template <
|
||||
typename ArgumentType,
|
||||
typename std::enable_if<!boost::math::differentiation::reverse_mode::
|
||||
detail::is_expression<ArgumentType>::value,
|
||||
int>::type = 0>
|
||||
void operator()(ArgumentType &x, RealType &g, RealType &v) const {
|
||||
template<typename ArgumentType,
|
||||
typename std::enable_if<!boost::math::differentiation::reverse_mode::
|
||||
detail::is_expression<ArgumentType>::value,
|
||||
int>::type = 0>
|
||||
void operator()(ArgumentType& x, RealType& g, RealType& v) const
|
||||
{
|
||||
v = mu_ * v - lr_ * g;
|
||||
x += v;
|
||||
}
|
||||
RealType lr() const noexcept { return lr_; }
|
||||
RealType mu() const noexcept { return mu_; }
|
||||
};
|
||||
template <typename ArgumentContainer, typename RealType, class Objective,
|
||||
class InitializationPolicy, class ObjectiveEvalPolicy,
|
||||
class GradEvalPolicy>
|
||||
|
||||
/**
|
||||
* @brief The nesterov_accelerated_gradient class
|
||||
*
|
||||
* https://jlmelville.github.io/mize/nesterov.html
|
||||
*/
|
||||
template<typename ArgumentContainer,
|
||||
typename RealType,
|
||||
class Objective,
|
||||
class InitializationPolicy,
|
||||
class ObjectiveEvalPolicy,
|
||||
class GradEvalPolicy>
|
||||
class nesterov_accelerated_gradient
|
||||
: public abstract_optimizer<
|
||||
ArgumentContainer, RealType, Objective, InitializationPolicy,
|
||||
ObjectiveEvalPolicy, GradEvalPolicy, nesterov_update_policy<RealType>,
|
||||
nesterov_accelerated_gradient<ArgumentContainer, RealType, Objective,
|
||||
InitializationPolicy,
|
||||
ObjectiveEvalPolicy, GradEvalPolicy>> {
|
||||
using base_opt = abstract_optimizer<
|
||||
ArgumentContainer, RealType, Objective, InitializationPolicy,
|
||||
ObjectiveEvalPolicy, GradEvalPolicy, nesterov_update_policy<RealType>,
|
||||
nesterov_accelerated_gradient<ArgumentContainer, RealType, Objective,
|
||||
InitializationPolicy, ObjectiveEvalPolicy,
|
||||
GradEvalPolicy>>;
|
||||
: public abstract_optimizer<
|
||||
ArgumentContainer,
|
||||
RealType,
|
||||
Objective,
|
||||
InitializationPolicy,
|
||||
ObjectiveEvalPolicy,
|
||||
GradEvalPolicy,
|
||||
nesterov_update_policy<RealType>,
|
||||
nesterov_accelerated_gradient<ArgumentContainer,
|
||||
RealType,
|
||||
Objective,
|
||||
InitializationPolicy,
|
||||
ObjectiveEvalPolicy,
|
||||
GradEvalPolicy>>
|
||||
{
|
||||
using base_opt =
|
||||
abstract_optimizer<ArgumentContainer,
|
||||
RealType,
|
||||
Objective,
|
||||
InitializationPolicy,
|
||||
ObjectiveEvalPolicy,
|
||||
GradEvalPolicy,
|
||||
nesterov_update_policy<RealType>,
|
||||
nesterov_accelerated_gradient<ArgumentContainer,
|
||||
RealType,
|
||||
Objective,
|
||||
InitializationPolicy,
|
||||
ObjectiveEvalPolicy,
|
||||
GradEvalPolicy>>;
|
||||
std::vector<RealType> v_;
|
||||
|
||||
public:
|
||||
using base_opt::base_opt;
|
||||
nesterov_accelerated_gradient(Objective &&objective, ArgumentContainer &x,
|
||||
InitializationPolicy &&ip,
|
||||
ObjectiveEvalPolicy &&oep, GradEvalPolicy &&gep,
|
||||
nesterov_update_policy<RealType> &&up)
|
||||
: base_opt(std::forward<Objective>(objective), x,
|
||||
std::forward<InitializationPolicy>(ip),
|
||||
std::forward<ObjectiveEvalPolicy>(oep),
|
||||
std::forward<GradEvalPolicy>(gep),
|
||||
std::forward<nesterov_update_policy<RealType>>(up)),
|
||||
v_(x.size(), RealType(0)) {}
|
||||
nesterov_accelerated_gradient(Objective&& objective,
|
||||
ArgumentContainer& x,
|
||||
InitializationPolicy&& ip,
|
||||
ObjectiveEvalPolicy&& oep,
|
||||
GradEvalPolicy&& gep,
|
||||
nesterov_update_policy<RealType>&& up)
|
||||
: base_opt(std::forward<Objective>(objective),
|
||||
x,
|
||||
std::forward<InitializationPolicy>(ip),
|
||||
std::forward<ObjectiveEvalPolicy>(oep),
|
||||
std::forward<GradEvalPolicy>(gep),
|
||||
std::forward<nesterov_update_policy<RealType>>(up))
|
||||
, v_(x.size(), RealType(0))
|
||||
{
|
||||
}
|
||||
|
||||
void step() {
|
||||
auto &x = this->arguments();
|
||||
auto &g = this->gradients();
|
||||
auto &obj = this->objective_value();
|
||||
auto &obj_eval = this->obj_eval_;
|
||||
auto &grad_eval = this->grad_eval_;
|
||||
auto &objective = this->objective_;
|
||||
auto &update = this->update_;
|
||||
void step()
|
||||
{
|
||||
auto& x = this->arguments();
|
||||
auto& g = this->gradients();
|
||||
auto& obj = this->objective_value();
|
||||
auto& obj_eval = this->obj_eval_;
|
||||
auto& grad_eval = this->grad_eval_;
|
||||
auto& objective = this->objective_;
|
||||
auto& update = this->update_;
|
||||
|
||||
grad_eval(objective, x, obj_eval, obj, g);
|
||||
|
||||
@@ -82,44 +126,79 @@ public:
|
||||
}
|
||||
}
|
||||
};
|
||||
template <class Objective, typename ArgumentContainer, typename RealType>
|
||||
auto make_nag(Objective &&obj, ArgumentContainer &x,
|
||||
RealType lr = RealType{0.01}, RealType mu = RealType{0.95}) {
|
||||
template<class Objective, typename ArgumentContainer, typename RealType>
|
||||
auto
|
||||
make_nag(Objective&& obj,
|
||||
ArgumentContainer& x,
|
||||
RealType lr = RealType{ 0.01 },
|
||||
RealType mu = RealType{ 0.95 })
|
||||
{
|
||||
return nesterov_accelerated_gradient<
|
||||
ArgumentContainer, RealType, std::decay_t<Objective>,
|
||||
tape_initializer_rvar<RealType>,
|
||||
reverse_mode_function_eval_policy<RealType>,
|
||||
reverse_mode_gradient_evaluation_policy<RealType>>(
|
||||
std::forward<Objective>(obj), x, tape_initializer_rvar<RealType>{},
|
||||
reverse_mode_function_eval_policy<RealType>{},
|
||||
reverse_mode_gradient_evaluation_policy<RealType>{},
|
||||
nesterov_update_policy<RealType>(lr, mu));
|
||||
ArgumentContainer,
|
||||
RealType,
|
||||
std::decay_t<Objective>,
|
||||
tape_initializer_rvar<RealType>,
|
||||
reverse_mode_function_eval_policy<RealType>,
|
||||
reverse_mode_gradient_evaluation_policy<RealType>>(
|
||||
std::forward<Objective>(obj),
|
||||
x,
|
||||
tape_initializer_rvar<RealType>{},
|
||||
reverse_mode_function_eval_policy<RealType>{},
|
||||
reverse_mode_gradient_evaluation_policy<RealType>{},
|
||||
nesterov_update_policy<RealType>(lr, mu));
|
||||
}
|
||||
template <class Objective, typename ArgumentContainer, typename RealType,
|
||||
class InitializationPolicy>
|
||||
auto make_nag(Objective &&obj, ArgumentContainer &x, RealType lr, RealType mu,
|
||||
InitializationPolicy &&ip) {
|
||||
template<class Objective,
|
||||
typename ArgumentContainer,
|
||||
typename RealType,
|
||||
class InitializationPolicy>
|
||||
auto
|
||||
make_nag(Objective&& obj,
|
||||
ArgumentContainer& x,
|
||||
RealType lr,
|
||||
RealType mu,
|
||||
InitializationPolicy&& ip)
|
||||
{
|
||||
return nesterov_accelerated_gradient<
|
||||
ArgumentContainer, RealType, std::decay_t<Objective>,
|
||||
InitializationPolicy, reverse_mode_function_eval_policy<RealType>,
|
||||
reverse_mode_gradient_evaluation_policy<RealType>>(
|
||||
std::forward<Objective>(obj), x, std::forward<InitializationPolicy>(ip),
|
||||
reverse_mode_function_eval_policy<RealType>{},
|
||||
reverse_mode_gradient_evaluation_policy<RealType>{},
|
||||
nesterov_update_policy<RealType>(lr, mu));
|
||||
ArgumentContainer,
|
||||
RealType,
|
||||
std::decay_t<Objective>,
|
||||
InitializationPolicy,
|
||||
reverse_mode_function_eval_policy<RealType>,
|
||||
reverse_mode_gradient_evaluation_policy<RealType>>(
|
||||
std::forward<Objective>(obj),
|
||||
x,
|
||||
std::forward<InitializationPolicy>(ip),
|
||||
reverse_mode_function_eval_policy<RealType>{},
|
||||
reverse_mode_gradient_evaluation_policy<RealType>{},
|
||||
nesterov_update_policy<RealType>(lr, mu));
|
||||
}
|
||||
template <typename ArgumentContainer, typename RealType, class Objective,
|
||||
class InitializationPolicy, class ObjectiveEvalPolicy,
|
||||
class GradEvalPolicy>
|
||||
auto make_nag(Objective &&obj, ArgumentContainer &x, RealType lr, RealType mu,
|
||||
InitializationPolicy &&ip, ObjectiveEvalPolicy &&oep,
|
||||
GradEvalPolicy &&gep) {
|
||||
return nesterov_accelerated_gradient<
|
||||
ArgumentContainer, RealType, std::decay_t<Objective>,
|
||||
InitializationPolicy, ObjectiveEvalPolicy, GradEvalPolicy>(
|
||||
std::forward<Objective>(obj), x, std::forward<InitializationPolicy>(ip),
|
||||
std::forward<ObjectiveEvalPolicy>(oep), std::forward<GradEvalPolicy>(gep),
|
||||
nesterov_update_policy<RealType>{lr, mu});
|
||||
template<typename ArgumentContainer,
|
||||
typename RealType,
|
||||
class Objective,
|
||||
class InitializationPolicy,
|
||||
class ObjectiveEvalPolicy,
|
||||
class GradEvalPolicy>
|
||||
auto
|
||||
make_nag(Objective&& obj,
|
||||
ArgumentContainer& x,
|
||||
RealType lr,
|
||||
RealType mu,
|
||||
InitializationPolicy&& ip,
|
||||
ObjectiveEvalPolicy&& oep,
|
||||
GradEvalPolicy&& gep)
|
||||
{
|
||||
return nesterov_accelerated_gradient<ArgumentContainer,
|
||||
RealType,
|
||||
std::decay_t<Objective>,
|
||||
InitializationPolicy,
|
||||
ObjectiveEvalPolicy,
|
||||
GradEvalPolicy>(
|
||||
std::forward<Objective>(obj),
|
||||
x,
|
||||
std::forward<InitializationPolicy>(ip),
|
||||
std::forward<ObjectiveEvalPolicy>(oep),
|
||||
std::forward<GradEvalPolicy>(gep),
|
||||
nesterov_update_policy<RealType>{ lr, mu });
|
||||
}
|
||||
} // namespace optimization
|
||||
} // namespace math
|
||||
|
||||
@@ -1,318 +1,336 @@
|
||||
// Copyright Maksym Zhelyenzyakov 2025-2026.
|
||||
// Distributed under the Boost Software License, Version 1.0.
|
||||
// (See accompanying file LICENSE_1_0.txt or copy at
|
||||
// https://www.boost.org/LICENSE_1_0.txt)
|
||||
#include "test_autodiff_reverse.hpp" // reuse for some basic options
|
||||
#include "test_functions_for_optimization.hpp"
|
||||
#include <boost/math/differentiation/autodiff_reverse.hpp>
|
||||
#include <boost/math/optimization/gradient_descent.hpp>
|
||||
#include <boost/math/optimization/minimizer.hpp>
|
||||
namespace rdiff = boost::math::differentiation::reverse_mode;
|
||||
namespace bopt = boost::math::optimization;
|
||||
namespace bopt = boost::math::optimization;
|
||||
BOOST_AUTO_TEST_SUITE(basic_gradient_descent)
|
||||
|
||||
BOOST_AUTO_TEST_CASE_TEMPLATE(default_gd_test, T, all_float_types)
|
||||
{
|
||||
size_t NITER = 2000;
|
||||
size_t N = 15;
|
||||
T lr = T{1e-2};
|
||||
RandomSample<T> rng{T(-100), (100)};
|
||||
std::vector<rdiff::rvar<T, 1>> x_ad;
|
||||
T eps = T{1e-3};
|
||||
for (size_t i = 0; i < N; ++i) {
|
||||
x_ad.push_back(rng.next());
|
||||
}
|
||||
auto gdopt = bopt::make_gradient_descent(&quadratic<rdiff::rvar<T, 1>>, x_ad, lr);
|
||||
for (size_t i = 0; i < NITER; ++i) {
|
||||
gdopt.step();
|
||||
}
|
||||
for (auto& x : x_ad) {
|
||||
BOOST_REQUIRE_SMALL(x.item(), eps);
|
||||
}
|
||||
size_t NITER = 2000;
|
||||
size_t N = 15;
|
||||
T lr = T{ 1e-2 };
|
||||
RandomSample<T> rng{ T(-100), (100) };
|
||||
std::vector<rdiff::rvar<T, 1>> x_ad;
|
||||
T eps = T{ 1e-3 };
|
||||
for (size_t i = 0; i < N; ++i) {
|
||||
x_ad.push_back(rng.next());
|
||||
}
|
||||
auto gdopt =
|
||||
bopt::make_gradient_descent(&quadratic<rdiff::rvar<T, 1>>, x_ad, lr);
|
||||
for (size_t i = 0; i < NITER; ++i) {
|
||||
gdopt.step();
|
||||
}
|
||||
for (auto& x : x_ad) {
|
||||
BOOST_REQUIRE_SMALL(x.item(), eps);
|
||||
}
|
||||
}
|
||||
BOOST_AUTO_TEST_CASE_TEMPLATE(test_minimize, T, all_float_types)
|
||||
{
|
||||
size_t NITER = 2000;
|
||||
size_t N = 15;
|
||||
T lr = T{1e-2};
|
||||
RandomSample<T> rng{T(-100), (100)};
|
||||
std::vector<rdiff::rvar<T, 1>> x_ad;
|
||||
T eps = T{1e-3};
|
||||
for (size_t i = 0; i < N; ++i) {
|
||||
x_ad.push_back(rng.next());
|
||||
}
|
||||
auto gdopt = bopt::make_gradient_descent(&quadratic<rdiff::rvar<T, 1>>, x_ad, lr);
|
||||
auto z = minimize(gdopt);
|
||||
for (auto& x : x_ad) {
|
||||
BOOST_REQUIRE_SMALL(x.item(), eps);
|
||||
}
|
||||
size_t NITER = 2000;
|
||||
size_t N = 15;
|
||||
T lr = T{ 1e-2 };
|
||||
RandomSample<T> rng{ T(-100), (100) };
|
||||
std::vector<rdiff::rvar<T, 1>> x_ad;
|
||||
T eps = T{ 1e-3 };
|
||||
for (size_t i = 0; i < N; ++i) {
|
||||
x_ad.push_back(rng.next());
|
||||
}
|
||||
auto gdopt =
|
||||
bopt::make_gradient_descent(&quadratic<rdiff::rvar<T, 1>>, x_ad, lr);
|
||||
auto z = minimize(gdopt);
|
||||
for (auto& x : x_ad) {
|
||||
BOOST_REQUIRE_SMALL(x.item(), eps);
|
||||
}
|
||||
}
|
||||
BOOST_AUTO_TEST_CASE_TEMPLATE(random_initializer_test, T, all_float_types)
|
||||
{
|
||||
size_t N = 10;
|
||||
T lr = T{1e-2};
|
||||
std::vector<rdiff::rvar<T, 1>> x(N);
|
||||
size_t N = 10;
|
||||
T lr = T{ 1e-2 };
|
||||
std::vector<rdiff::rvar<T, 1>> x(N);
|
||||
|
||||
auto gdopt = bopt::make_gradient_descent(
|
||||
&quadratic<rdiff::rvar<T, 1>>,
|
||||
x,
|
||||
lr,
|
||||
bopt::random_uniform_initializer_rvar<T>(-2.0, 2.0, 1234)); // all initialized to 5
|
||||
for (auto& xi : x) {
|
||||
T v = xi.item();
|
||||
BOOST_TEST(v >= -2);
|
||||
BOOST_TEST(v <= 2);
|
||||
}
|
||||
gdopt.step();
|
||||
auto gdopt =
|
||||
bopt::make_gradient_descent(&quadratic<rdiff::rvar<T, 1>>,
|
||||
x,
|
||||
lr,
|
||||
bopt::random_uniform_initializer_rvar<T>(
|
||||
-2.0, 2.0, 1234)); // all initialized to 5
|
||||
for (auto& xi : x) {
|
||||
T v = xi.item();
|
||||
BOOST_TEST(v >= -2);
|
||||
BOOST_TEST(v <= 2);
|
||||
}
|
||||
gdopt.step();
|
||||
}
|
||||
BOOST_AUTO_TEST_CASE_TEMPLATE(const_initializer_test, T, all_float_types)
|
||||
{
|
||||
size_t N = 10;
|
||||
T lr = T{1e-2};
|
||||
std::vector<rdiff::rvar<T, 1>> x(N);
|
||||
size_t N = 10;
|
||||
T lr = T{ 1e-2 };
|
||||
std::vector<rdiff::rvar<T, 1>> x(N);
|
||||
|
||||
auto gdopt = bopt::make_gradient_descent(&quadratic<rdiff::rvar<T, 1>>,
|
||||
x,
|
||||
lr,
|
||||
bopt::costant_initializer_rvar<T>(
|
||||
T{5.0})); // all initialized to 5
|
||||
auto gdopt = bopt::make_gradient_descent(
|
||||
&quadratic<rdiff::rvar<T, 1>>,
|
||||
x,
|
||||
lr,
|
||||
bopt::costant_initializer_rvar<T>(T{ 5.0 })); // all initialized to 5
|
||||
|
||||
for (auto& xi : x) {
|
||||
T v = xi.item();
|
||||
BOOST_REQUIRE_CLOSE(v, T{5.0}, T{1e-3});
|
||||
}
|
||||
gdopt.step();
|
||||
for (auto& xi : x) {
|
||||
T v = xi.item();
|
||||
BOOST_REQUIRE_CLOSE(v, T{ 5.0 }, T{ 1e-3 });
|
||||
}
|
||||
gdopt.step();
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE_TEMPLATE(box_constraint_test, T, all_float_types)
|
||||
{
|
||||
size_t N = 5;
|
||||
T lr = T{1e-2};
|
||||
std::vector<rdiff::rvar<T, 1>> x(N, T{10});
|
||||
size_t N = 5;
|
||||
T lr = T{ 1e-2 };
|
||||
std::vector<rdiff::rvar<T, 1>> x(N, T{ 10 });
|
||||
|
||||
auto gdopt = bopt::make_gradient_descent(&quadratic<rdiff::rvar<T, 1>>, x, lr);
|
||||
auto gdopt =
|
||||
bopt::make_gradient_descent(&quadratic<rdiff::rvar<T, 1>>, x, lr);
|
||||
|
||||
auto res = bopt::minimize(gdopt,
|
||||
bopt::box_constraints<std::vector<rdiff::rvar<T, 1>>, T>(-1.0, 1.0));
|
||||
auto res = bopt::minimize(
|
||||
gdopt, bopt::box_constraints<std::vector<rdiff::rvar<T, 1>>, T>(-1.0, 1.0));
|
||||
|
||||
for (auto& xi : x) {
|
||||
BOOST_TEST(xi.item() >= -1.0);
|
||||
BOOST_TEST(xi.item() <= 1.0);
|
||||
}
|
||||
for (auto& xi : x) {
|
||||
BOOST_TEST(xi.item() >= -1.0);
|
||||
BOOST_TEST(xi.item() <= 1.0);
|
||||
}
|
||||
}
|
||||
BOOST_AUTO_TEST_CASE_TEMPLATE(max_iter_test, T, all_float_types)
|
||||
{
|
||||
size_t N = 2;
|
||||
T lr = T{1e-6}; // very slow learning
|
||||
std::vector<rdiff::rvar<T, 1>> x = {T{5}, T{5}};
|
||||
size_t N = 2;
|
||||
T lr = T{ 1e-6 }; // very slow learning
|
||||
std::vector<rdiff::rvar<T, 1>> x = { T{ 5 }, T{ 5 } };
|
||||
|
||||
auto gdopt = bopt::make_gradient_descent(&quadratic<rdiff::rvar<T, 1>>, x, lr);
|
||||
auto gdopt =
|
||||
bopt::make_gradient_descent(&quadratic<rdiff::rvar<T, 1>>, x, lr);
|
||||
|
||||
size_t max_iter = 50;
|
||||
auto res = bopt::minimize(gdopt,
|
||||
bopt::unconstrained_policy<std::vector<rdiff::rvar<T, 1>>>{},
|
||||
bopt::gradient_norm_convergence_policy<T>(T{1e-20}),
|
||||
bopt::max_iter_termination_policy(max_iter));
|
||||
size_t max_iter = 50;
|
||||
auto res =
|
||||
bopt::minimize(gdopt,
|
||||
bopt::unconstrained_policy<std::vector<rdiff::rvar<T, 1>>>{},
|
||||
bopt::gradient_norm_convergence_policy<T>(T{ 1e-20 }),
|
||||
bopt::max_iter_termination_policy(max_iter));
|
||||
|
||||
BOOST_TEST(!res.converged); // should not converge with tiny lr
|
||||
BOOST_REQUIRE_EQUAL(res.num_iter, max_iter);
|
||||
BOOST_TEST(!res.converged); // should not converge with tiny lr
|
||||
BOOST_REQUIRE_EQUAL(res.num_iter, max_iter);
|
||||
}
|
||||
BOOST_AUTO_TEST_CASE_TEMPLATE(history_tracking_test, T, all_float_types)
|
||||
{
|
||||
size_t N = 3;
|
||||
T lr = T{1e-2};
|
||||
std::vector<rdiff::rvar<T, 1>> x = {T{3}, T{-4}, T{5}};
|
||||
size_t N = 3;
|
||||
T lr = T{ 1e-2 };
|
||||
std::vector<rdiff::rvar<T, 1>> x = { T{ 3 }, T{ -4 }, T{ 5 } };
|
||||
|
||||
auto gdopt = bopt::make_gradient_descent(&quadratic<rdiff::rvar<T, 1>>, x, lr);
|
||||
auto gdopt =
|
||||
bopt::make_gradient_descent(&quadratic<rdiff::rvar<T, 1>>, x, lr);
|
||||
|
||||
auto res = bopt::minimize(gdopt,
|
||||
bopt::unconstrained_policy<std::vector<rdiff::rvar<T, 1>>>{},
|
||||
bopt::gradient_norm_convergence_policy<T>(T{1e-6}),
|
||||
bopt::max_iter_termination_policy(1000),
|
||||
true); // enable history
|
||||
auto res =
|
||||
bopt::minimize(gdopt,
|
||||
bopt::unconstrained_policy<std::vector<rdiff::rvar<T, 1>>>{},
|
||||
bopt::gradient_norm_convergence_policy<T>(T{ 1e-6 }),
|
||||
bopt::max_iter_termination_policy(1000),
|
||||
true); // enable history
|
||||
|
||||
BOOST_TEST(!res.objective_history.empty());
|
||||
BOOST_TEST(res.objective_history.front() > res.objective_history.back());
|
||||
BOOST_TEST(!res.objective_history.empty());
|
||||
BOOST_TEST(res.objective_history.front() > res.objective_history.back());
|
||||
}
|
||||
BOOST_AUTO_TEST_CASE_TEMPLATE(rosenbrock_test, T, all_float_types)
|
||||
{
|
||||
std::array<rdiff::rvar<T, 1>, 2> x = {T{-1.2}, T{1.0}}; // bad start
|
||||
T lr = T{1e-3};
|
||||
std::array<rdiff::rvar<T, 1>, 2> x = { T{ -1.2 }, T{ 1.0 } }; // bad start
|
||||
T lr = T{ 1e-3 };
|
||||
|
||||
auto gdopt = bopt::make_gradient_descent(&rosenbrock_saddle<rdiff::rvar<T, 1>>, x, lr);
|
||||
auto gdopt =
|
||||
bopt::make_gradient_descent(&rosenbrock_saddle<rdiff::rvar<T, 1>>, x, lr);
|
||||
|
||||
auto res = bopt::minimize(gdopt,
|
||||
bopt::unconstrained_policy<std::array<rdiff::rvar<T, 1>, 2>>{},
|
||||
bopt::gradient_norm_convergence_policy<T>(T{1e-4}),
|
||||
bopt::max_iter_termination_policy(50000));
|
||||
BOOST_TEST(res.converged);
|
||||
BOOST_REQUIRE_CLOSE(x[0].item(), T{1.0}, T{1e-1});
|
||||
BOOST_REQUIRE_CLOSE(x[1].item(), T{1.0}, T{1e-1});
|
||||
auto res = bopt::minimize(
|
||||
gdopt,
|
||||
bopt::unconstrained_policy<std::array<rdiff::rvar<T, 1>, 2>>{},
|
||||
bopt::gradient_norm_convergence_policy<T>(T{ 1e-4 }),
|
||||
bopt::max_iter_termination_policy(50000));
|
||||
BOOST_TEST(res.converged);
|
||||
BOOST_REQUIRE_CLOSE(x[0].item(), T{ 1.0 }, T{ 1e-1 });
|
||||
BOOST_REQUIRE_CLOSE(x[1].item(), T{ 1.0 }, T{ 1e-1 });
|
||||
}
|
||||
BOOST_AUTO_TEST_CASE_TEMPLATE(objective_tol_convergence_test, T, all_float_types)
|
||||
BOOST_AUTO_TEST_CASE_TEMPLATE(objective_tol_convergence_test,
|
||||
T,
|
||||
all_float_types)
|
||||
{
|
||||
using policy_t = bopt::objective_tol_convergence_policy<T>;
|
||||
policy_t pol(1e-3);
|
||||
std::vector<T> dummy_grad;
|
||||
using policy_t = bopt::objective_tol_convergence_policy<T>;
|
||||
policy_t pol(1e-3);
|
||||
std::vector<T> dummy_grad;
|
||||
|
||||
BOOST_TEST(!pol(dummy_grad, 100.0));
|
||||
BOOST_TEST(!pol(dummy_grad, 99.0));
|
||||
BOOST_TEST(pol(dummy_grad, 99.0005));
|
||||
BOOST_TEST(!pol(dummy_grad, 100.0));
|
||||
BOOST_TEST(!pol(dummy_grad, 99.0));
|
||||
BOOST_TEST(pol(dummy_grad, 99.0005));
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE_TEMPLATE(relative_objective_tol_test, T, all_float_types)
|
||||
{
|
||||
using policy_t = bopt::relative_objective_tol_policy<T>;
|
||||
policy_t pol(1e-3);
|
||||
using policy_t = bopt::relative_objective_tol_policy<T>;
|
||||
policy_t pol(1e-3);
|
||||
|
||||
std::vector<T> dummy_grad;
|
||||
BOOST_TEST(!pol(dummy_grad, 1000.0));
|
||||
BOOST_TEST(!pol(dummy_grad, 1010.0));
|
||||
BOOST_TEST(pol(dummy_grad, 1010.5));
|
||||
std::vector<T> dummy_grad;
|
||||
BOOST_TEST(!pol(dummy_grad, 1000.0));
|
||||
BOOST_TEST(!pol(dummy_grad, 1010.0));
|
||||
BOOST_TEST(pol(dummy_grad, 1010.5));
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE_TEMPLATE(combined_policy_test, T, all_float_types)
|
||||
{
|
||||
using pol_abs = bopt::objective_tol_convergence_policy<T>;
|
||||
using pol_rel = bopt::relative_objective_tol_policy<T>;
|
||||
using pol_comb = bopt::combined_convergence_policy<pol_abs, pol_rel>;
|
||||
using pol_abs = bopt::objective_tol_convergence_policy<T>;
|
||||
using pol_rel = bopt::relative_objective_tol_policy<T>;
|
||||
using pol_comb = bopt::combined_convergence_policy<pol_abs, pol_rel>;
|
||||
|
||||
pol_abs abs_pol(1e-6);
|
||||
pol_rel rel_pol(1e-3);
|
||||
pol_comb comb(abs_pol, rel_pol);
|
||||
pol_abs abs_pol(1e-6);
|
||||
pol_rel rel_pol(1e-3);
|
||||
pol_comb comb(abs_pol, rel_pol);
|
||||
|
||||
std::vector<T> dummy_grad;
|
||||
std::vector<T> dummy_grad;
|
||||
|
||||
BOOST_TEST(!comb(dummy_grad, 100.0));
|
||||
BOOST_TEST(!comb(dummy_grad, 110.0));
|
||||
BOOST_TEST(comb(dummy_grad, 110.1));
|
||||
BOOST_TEST(comb(dummy_grad, 110.1000001));
|
||||
BOOST_TEST(!comb(dummy_grad, 100.0));
|
||||
BOOST_TEST(!comb(dummy_grad, 110.0));
|
||||
BOOST_TEST(comb(dummy_grad, 110.1));
|
||||
BOOST_TEST(comb(dummy_grad, 110.1000001));
|
||||
}
|
||||
BOOST_AUTO_TEST_CASE_TEMPLATE(nonnegativity_constraint_test, T, all_float_types)
|
||||
{
|
||||
std::vector<T> x = {1.0, -2.0, 3.0, -4.0};
|
||||
bopt::nonnegativity_constraint<std::vector<T>, T> proj;
|
||||
proj(x);
|
||||
std::vector<T> x = { 1.0, -2.0, 3.0, -4.0 };
|
||||
bopt::nonnegativity_constraint<std::vector<T>, T> proj;
|
||||
proj(x);
|
||||
|
||||
for (auto& xi : x)
|
||||
BOOST_TEST(xi >= 0.0);
|
||||
BOOST_TEST(x == std::vector<T>({1.0, 0.0, 3.0, 0.0}));
|
||||
for (auto& xi : x)
|
||||
BOOST_TEST(xi >= 0.0);
|
||||
BOOST_TEST(x == std::vector<T>({ 1.0, 0.0, 3.0, 0.0 }));
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE_TEMPLATE(l2_ball_constraint_test, T, all_float_types)
|
||||
{
|
||||
std::vector<T> x = {3.0, 4.0}; // norm = 5
|
||||
bopt::l2_ball_constraint<std::vector<T>, T> proj(1.0);
|
||||
proj(x);
|
||||
std::vector<T> x = { 3.0, 4.0 }; // norm = 5
|
||||
bopt::l2_ball_constraint<std::vector<T>, T> proj(1.0);
|
||||
proj(x);
|
||||
|
||||
T norm = sqrt(x[0] * x[0] + x[1] * x[1]);
|
||||
BOOST_TEST(abs(norm - 1.0) < 1e-12); // projected to unit circle
|
||||
T norm = sqrt(x[0] * x[0] + x[1] * x[1]);
|
||||
BOOST_TEST(abs(norm - 1.0) < 1e-12); // projected to unit circle
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE_TEMPLATE(l1_ball_constraint_test, T, all_float_types)
|
||||
{
|
||||
std::vector<T> x = {3.0, 4.0}; // L1 norm = 7
|
||||
bopt::l1_ball_constraint<std::vector<T>, T> proj(2.0);
|
||||
proj(x);
|
||||
std::vector<T> x = { 3.0, 4.0 }; // L1 norm = 7
|
||||
bopt::l1_ball_constraint<std::vector<T>, T> proj(2.0);
|
||||
proj(x);
|
||||
|
||||
T norm1 = abs(x[0]) + abs(x[1]);
|
||||
BOOST_TEST(abs(norm1 - 2.0) < T{1e-12});
|
||||
T norm1 = abs(x[0]) + abs(x[1]);
|
||||
BOOST_TEST(abs(norm1 - 2.0) < T{ 1e-12 });
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE_TEMPLATE(simplex_constraint_test, T, all_float_types)
|
||||
{
|
||||
std::vector<T> x = {-1.0, 2.0, 3.0}; // has negative and sum != 1
|
||||
bopt::simplex_constraint<std::vector<T>, T> proj;
|
||||
proj(x);
|
||||
std::vector<T> x = { -1.0, 2.0, 3.0 }; // has negative and sum != 1
|
||||
bopt::simplex_constraint<std::vector<T>, T> proj;
|
||||
proj(x);
|
||||
|
||||
T sum = 0.0;
|
||||
for (auto& xi : x) {
|
||||
BOOST_TEST(xi >= 0.0); // all nonnegative
|
||||
sum += xi;
|
||||
}
|
||||
BOOST_TEST(abs(sum - 1.0) < 1e-12); // normalized to sum=1
|
||||
T sum = 0.0;
|
||||
for (auto& xi : x) {
|
||||
BOOST_TEST(xi >= 0.0); // all nonnegative
|
||||
sum += xi;
|
||||
}
|
||||
BOOST_TEST(abs(sum - 1.0) < 1e-12); // normalized to sum=1
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE_TEMPLATE(unit_sphere_constraint_test, T, all_float_types)
|
||||
{
|
||||
std::vector<T> x = {0.3, 0.4}; // norm = 0.5
|
||||
bopt::unit_sphere_constraint<std::vector<T>, T> proj;
|
||||
proj(x);
|
||||
std::vector<T> x = { 0.3, 0.4 }; // norm = 0.5
|
||||
bopt::unit_sphere_constraint<std::vector<T>, T> proj;
|
||||
proj(x);
|
||||
|
||||
T norm = sqrt(x[0] * x[0] + x[1] * x[1]);
|
||||
BOOST_TEST(abs(norm - 1.0) < 1e-12); // always projected to sphere
|
||||
T norm = sqrt(x[0] * x[0] + x[1] * x[1]);
|
||||
BOOST_TEST(abs(norm - 1.0) < 1e-12); // always projected to sphere
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE_TEMPLATE(function_constraint_test, T, all_float_types)
|
||||
{
|
||||
auto clip_to_half = [](std::vector<T>& v) {
|
||||
for (auto& xi : v)
|
||||
if (xi > 0.5)
|
||||
xi = 0.5;
|
||||
};
|
||||
auto clip_to_half = [](std::vector<T>& v) {
|
||||
for (auto& xi : v)
|
||||
if (xi > 0.5)
|
||||
xi = 0.5;
|
||||
};
|
||||
|
||||
bopt::function_constraint<std::vector<T>> proj(clip_to_half);
|
||||
std::vector<T> x = {0.2, 0.7, 1.5};
|
||||
proj(x);
|
||||
bopt::function_constraint<std::vector<T>> proj(clip_to_half);
|
||||
std::vector<T> x = { 0.2, 0.7, 1.5 };
|
||||
proj(x);
|
||||
|
||||
BOOST_TEST(x == std::vector<T>({0.2, 0.5, 0.5}));
|
||||
BOOST_TEST(x == std::vector<T>({ 0.2, 0.5, 0.5 }));
|
||||
}
|
||||
|
||||
template<typename RealType>
|
||||
struct no_init_policy
|
||||
{
|
||||
void operator()(std::vector<RealType>& x) const noexcept {}
|
||||
void operator()(std::vector<RealType>& x) const noexcept {}
|
||||
};
|
||||
|
||||
template<typename RealType>
|
||||
struct analytic_objective_eval_pol
|
||||
{
|
||||
template<typename Objective, typename ArgumentContainer>
|
||||
RealType operator()(Objective&& objective, ArgumentContainer& x)
|
||||
{
|
||||
return objective(x);
|
||||
}
|
||||
template<typename Objective, typename ArgumentContainer>
|
||||
RealType operator()(Objective&& objective, ArgumentContainer& x)
|
||||
{
|
||||
return objective(x);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename RealType>
|
||||
struct analytic_gradient_eval_pol
|
||||
{
|
||||
template<class Objective, class ArgumentContainer, class FunctionEvaluationPolicy>
|
||||
void operator()(Objective&& obj_f,
|
||||
ArgumentContainer& x,
|
||||
FunctionEvaluationPolicy&& f_eval_pol,
|
||||
RealType& obj_v,
|
||||
std::vector<RealType>& grad_container)
|
||||
{
|
||||
RealType v = f_eval_pol(obj_f, x);
|
||||
obj_v = v;
|
||||
grad_container.resize(x.size());
|
||||
for (size_t i = 0; i < x.size(); ++i) {
|
||||
grad_container[i] = 2 * x[i];
|
||||
}
|
||||
template<class Objective,
|
||||
class ArgumentContainer,
|
||||
class FunctionEvaluationPolicy>
|
||||
void operator()(Objective&& obj_f,
|
||||
ArgumentContainer& x,
|
||||
FunctionEvaluationPolicy&& f_eval_pol,
|
||||
RealType& obj_v,
|
||||
std::vector<RealType>& grad_container)
|
||||
{
|
||||
RealType v = f_eval_pol(obj_f, x);
|
||||
obj_v = v;
|
||||
grad_container.resize(x.size());
|
||||
for (size_t i = 0; i < x.size(); ++i) {
|
||||
grad_container[i] = 2 * x[i];
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
BOOST_AUTO_TEST_CASE_TEMPLATE(analytic_derivative_policies, T, all_float_types)
|
||||
{
|
||||
std::vector<T> x;
|
||||
size_t NITER = 2000;
|
||||
size_t N = 15;
|
||||
T lr = T{1e-2};
|
||||
RandomSample<T> rng{T(-100), (100)};
|
||||
T eps = T{1e-3};
|
||||
for (size_t i = 0; i < N; ++i) {
|
||||
x.push_back(rng.next());
|
||||
}
|
||||
std::vector<T> x;
|
||||
size_t NITER = 2000;
|
||||
size_t N = 15;
|
||||
T lr = T{ 1e-2 };
|
||||
RandomSample<T> rng{ T(-100), (100) };
|
||||
T eps = T{ 1e-3 };
|
||||
for (size_t i = 0; i < N; ++i) {
|
||||
x.push_back(rng.next());
|
||||
}
|
||||
|
||||
auto gdopt = bopt::make_gradient_descent(&quadratic<T>,
|
||||
x,
|
||||
lr,
|
||||
no_init_policy<T>{},
|
||||
analytic_objective_eval_pol<T>{},
|
||||
analytic_gradient_eval_pol<T>{});
|
||||
auto gdopt = bopt::make_gradient_descent(&quadratic<T>,
|
||||
x,
|
||||
lr,
|
||||
no_init_policy<T>{},
|
||||
analytic_objective_eval_pol<T>{},
|
||||
analytic_gradient_eval_pol<T>{});
|
||||
|
||||
for (size_t i = 0; i < NITER; ++i) {
|
||||
gdopt.step();
|
||||
}
|
||||
for (auto& xi : x) {
|
||||
BOOST_REQUIRE_SMALL(xi, eps);
|
||||
}
|
||||
for (size_t i = 0; i < NITER; ++i) {
|
||||
gdopt.step();
|
||||
}
|
||||
for (auto& xi : x) {
|
||||
BOOST_REQUIRE_SMALL(xi, eps);
|
||||
}
|
||||
}
|
||||
BOOST_AUTO_TEST_SUITE_END()
|
||||
|
||||
@@ -1,4 +1,8 @@
|
||||
#include "test_autodiff_reverse.hpp" // reuse for same test infra
|
||||
// Copyright Maksym Zhelyenzyakov 2025-2026.
|
||||
// Distributed under the Boost Software License, Version 1.0.
|
||||
// (See accompanying file LICENSE_1_0.txt or copy at
|
||||
// https://www.boost.org/LICENSE_1_0.txt)
|
||||
#include "test_autodiff_reverse.hpp"
|
||||
#include "test_functions_for_optimization.hpp"
|
||||
#include <boost/math/differentiation/autodiff_reverse.hpp>
|
||||
#include <boost/math/optimization/lbfgs.hpp>
|
||||
@@ -9,26 +13,175 @@ namespace bopt = boost::math::optimization;
|
||||
|
||||
BOOST_AUTO_TEST_SUITE(basic_lbfgs)
|
||||
|
||||
BOOST_AUTO_TEST_CASE(default_lbfgs_test) {
|
||||
using T = double;
|
||||
BOOST_AUTO_TEST_CASE_TEMPLATE(default_lbfgs_test, T, all_float_types)
|
||||
{
|
||||
constexpr size_t NITER = 10;
|
||||
constexpr size_t M = 10;
|
||||
const T eps = T{1e-8};
|
||||
const T eps = T{ 1e-8 };
|
||||
|
||||
RandomSample<T> rng{T(-10), T(10)};
|
||||
RandomSample<T> rng{ T(-10), T(10) };
|
||||
std::array<rdiff::rvar<T, 1>, 2> x;
|
||||
x[0] = rng.next();
|
||||
x[1] = rng.next();
|
||||
|
||||
auto opt = bopt::make_lbfgs<decltype(&rosenbrock_saddle<rdiff::rvar<T, 1>>),
|
||||
std::array<rdiff::rvar<T, 1>, 2>, T>(
|
||||
&rosenbrock_saddle<rdiff::rvar<T, 1>>, x, M);
|
||||
auto opt = bopt::make_lbfgs(&rosenbrock_saddle<rdiff::rvar<T, 1>>, x, M);
|
||||
|
||||
auto result = minimize(opt);
|
||||
std::cout << result << std::endl;
|
||||
for (auto &xi : x) {
|
||||
BOOST_REQUIRE_CLOSE(xi, T{1.0}, eps);
|
||||
for (auto& xi : x) {
|
||||
BOOST_REQUIRE_CLOSE(xi, T{ 1.0 }, eps);
|
||||
}
|
||||
}
|
||||
|
||||
// Custom initialization policy that zeros out the parameters
|
||||
template<typename RealType>
|
||||
struct zero_init_policy
|
||||
{
|
||||
void operator()(std::vector<RealType>& x) const noexcept
|
||||
{
|
||||
std::fill(x.begin(), x.end(), RealType{ 0 });
|
||||
}
|
||||
};
|
||||
|
||||
template<typename RealType>
|
||||
struct analytic_objective_eval_pol
|
||||
{
|
||||
template<typename Objective, typename ArgumentContainer>
|
||||
RealType operator()(Objective&& objective, ArgumentContainer& x)
|
||||
{
|
||||
return objective(x);
|
||||
}
|
||||
};
|
||||
template<typename RealType>
|
||||
struct analytic_gradient_eval_pol
|
||||
{
|
||||
template<class Objective,
|
||||
class ArgumentContainer,
|
||||
class FunctionEvaluationPolicy>
|
||||
void operator()(Objective&& obj_f,
|
||||
ArgumentContainer& x,
|
||||
FunctionEvaluationPolicy&& f_eval_pol,
|
||||
RealType& obj_v,
|
||||
std::vector<RealType>& grad_container)
|
||||
{
|
||||
RealType v = f_eval_pol(obj_f, x);
|
||||
obj_v = v;
|
||||
grad_container.resize(x.size());
|
||||
for (size_t i = 0; i < x.size(); ++i) {
|
||||
grad_container[i] = 2 * x[i];
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// -- Test L-BFGS with custom initialization policy (zero_init_policy)
|
||||
BOOST_AUTO_TEST_CASE_TEMPLATE(custom_init_lbfgs_test, T, all_float_types)
|
||||
{
|
||||
constexpr size_t M = 8;
|
||||
const T eps = T{ 1e-6 };
|
||||
|
||||
RandomSample<T> rng{ T(-5), T(5) };
|
||||
std::array<rdiff::rvar<T, 1>, 2> x;
|
||||
x[0] = rng.next();
|
||||
x[1] = rng.next();
|
||||
|
||||
auto opt = bopt::make_lbfgs(&rosenbrock_saddle<rdiff::rvar<T, 1>>,
|
||||
x,
|
||||
M,
|
||||
bopt::costant_initializer_rvar<T>(0.0));
|
||||
auto result = minimize(opt);
|
||||
|
||||
for (auto& xi : x) {
|
||||
BOOST_REQUIRE_CLOSE(xi, T{ 1.0 }, eps);
|
||||
}
|
||||
}
|
||||
|
||||
// // -- Test L-BFGS with analytic derivative policies
|
||||
// BOOST_AUTO_TEST_CASE_TEMPLATE(analytic_lbfgs_test, T, all_float_types)
|
||||
// {
|
||||
// constexpr size_t M = 10;
|
||||
// const T eps = T{ 1e-3 };
|
||||
|
||||
// RandomSample<T> rng{ T(-5), T(5) };
|
||||
// std::vector<T> x(3);
|
||||
// for (auto& xi : x)
|
||||
// xi = rng.next();
|
||||
|
||||
// auto opt = bopt::make_lbfgs(
|
||||
// &quadratic<rdiff::rvar<T, 1>>, // Objective
|
||||
// x, // Arguments
|
||||
// M, // History size
|
||||
// bopt::random_uniform_initializer_rvar<T>{}, // Initialization
|
||||
// analytic_objective_eval_pol<T>{}, // Function eval
|
||||
// analytic_gradient_eval_pol<T>{} // Gradient eval
|
||||
// );
|
||||
|
||||
// // Run optimization (manual loop or minimize wrapper)
|
||||
// auto result = minimize(opt);
|
||||
|
||||
// for (auto& xi : x) {
|
||||
// BOOST_REQUIRE_SMALL(xi, eps);
|
||||
// }
|
||||
// }
|
||||
|
||||
// // -- Test L-BFGS with analytic policies and custom line search
|
||||
// // (strong_wolfe_line_search_policy)
|
||||
// BOOST_AUTO_TEST_CASE_TEMPLATE(analytic_lbfgs_strong_wolfe_test,
|
||||
// T,
|
||||
// all_float_types)
|
||||
// {
|
||||
// constexpr size_t M = 12;
|
||||
// const T eps = T{ 1e-4 };
|
||||
|
||||
// RandomSample<T> rng{ T(-10), T(10) };
|
||||
// std::vector<T> x(5);
|
||||
// for (auto& xi : x)
|
||||
// xi = rng.next();
|
||||
|
||||
// auto opt = bopt::make_lbfgs(
|
||||
// &quadratic<T>,
|
||||
// x,
|
||||
// M,
|
||||
// boost::math::optimization::random_uniform_initializer_rvar<T>{},
|
||||
// analytic_objective_eval_pol<T>{},
|
||||
// analytic_gradient_eval_pol<T>{},
|
||||
// boost::math::optimization::armijo_line_search_policy<T>{});
|
||||
|
||||
// auto result = minimize(opt);
|
||||
|
||||
// for (auto& xi : x) {
|
||||
// BOOST_REQUIRE_SMALL(xi, eps);
|
||||
// }
|
||||
// }
|
||||
|
||||
// // -- Test L-BFGS with random init policy (demonstrates flexible
|
||||
// initialization) template<typename RealType> struct random_init_policy
|
||||
// {
|
||||
// RandomSample<RealType> rng{ RealType(-1), RealType(1) };
|
||||
// void operator()(std::vector<RealType>& x) const noexcept
|
||||
// {
|
||||
// for (auto& xi : x)
|
||||
// xi = rng.next();
|
||||
// }
|
||||
// };
|
||||
|
||||
// BOOST_AUTO_TEST_CASE_TEMPLATE(random_init_lbfgs_test, T, all_float_types)
|
||||
// {
|
||||
// constexpr size_t M = 6;
|
||||
// const T eps = T{ 1e-4 };
|
||||
|
||||
// std::vector<T> x(4);
|
||||
// random_init_policy<T>{}(x); // Apply initialization manually
|
||||
|
||||
// auto opt = bopt::make_lbfgs(&quadratic<T>,
|
||||
// x,
|
||||
// M,
|
||||
// random_init_policy<T>{},
|
||||
// analytic_objective_eval_pol<T>{},
|
||||
// analytic_gradient_eval_pol<T>{});
|
||||
|
||||
// auto result = minimize(opt);
|
||||
|
||||
// for (auto& xi : x) {
|
||||
// BOOST_REQUIRE_SMALL(xi, eps);
|
||||
// }
|
||||
// }
|
||||
BOOST_AUTO_TEST_SUITE_END()
|
||||
|
||||
@@ -1,26 +1,31 @@
|
||||
// Copyright Maksym Zhelyenzyakov 2025-2026.
|
||||
// Distributed under the Boost Software License, Version 1.0.
|
||||
// (See accompanying file LICENSE_1_0.txt or copy at
|
||||
// https://www.boost.org/LICENSE_1_0.txt)
|
||||
#include "test_autodiff_reverse.hpp" // reuse for some basic options
|
||||
#include "test_functions_for_optimization.hpp"
|
||||
#include <boost/math/differentiation/autodiff_reverse.hpp>
|
||||
#include <boost/math/optimization/minimizer.hpp>
|
||||
#include <boost/math/optimization/nesterov.hpp>
|
||||
namespace rdiff = boost::math::differentiation::reverse_mode;
|
||||
namespace bopt = boost::math::optimization;
|
||||
namespace bopt = boost::math::optimization;
|
||||
BOOST_AUTO_TEST_SUITE(basic_gradient_descent)
|
||||
|
||||
BOOST_AUTO_TEST_CASE_TEMPLATE(default_nesterov_test, T, all_float_types)
|
||||
{
|
||||
size_t NITER = 5;
|
||||
T lr = T{1e-3};
|
||||
T mu = T{0.95};
|
||||
RandomSample<T> rng{T(-10), (10)};
|
||||
std::vector<rdiff::rvar<T, 1>> x;
|
||||
x.push_back(rng.next());
|
||||
x.push_back(rng.next());
|
||||
T eps = T{1e-8};
|
||||
auto nag = bopt::make_nag(&quadratic_high_cond_2D<rdiff::rvar<T, 1>>, x, lr, mu);
|
||||
auto z = minimize(nag);
|
||||
for (auto& xi : x) {
|
||||
BOOST_REQUIRE_SMALL(xi.item(), eps);
|
||||
}
|
||||
size_t NITER = 5;
|
||||
T lr = T{ 1e-3 };
|
||||
T mu = T{ 0.95 };
|
||||
RandomSample<T> rng{ T(-10), (10) };
|
||||
std::vector<rdiff::rvar<T, 1>> x;
|
||||
x.push_back(rng.next());
|
||||
x.push_back(rng.next());
|
||||
T eps = T{ 1e-8 };
|
||||
auto nag =
|
||||
bopt::make_nag(&quadratic_high_cond_2D<rdiff::rvar<T, 1>>, x, lr, mu);
|
||||
auto z = minimize(nag);
|
||||
for (auto& xi : x) {
|
||||
BOOST_REQUIRE_SMALL(xi.item(), eps);
|
||||
}
|
||||
}
|
||||
BOOST_AUTO_TEST_SUITE_END()
|
||||
|
||||
Reference in New Issue
Block a user