2
0
mirror of https://github.com/boostorg/math.git synced 2026-02-24 04:02:18 +00:00

saving progress on linux machine

This commit is contained in:
mzhelyez
2025-10-04 17:28:01 +02:00
parent 2697069329
commit e2ce13e3fb
11 changed files with 1124 additions and 1 deletions

View File

@@ -580,6 +580,8 @@ public:
}
it->backward();
}
void set_item(RealType value) { value_ = inner_t(value); }
};
template<typename RealType, size_t DerivativeOrder>

View File

@@ -266,7 +266,7 @@ public:
void add_checkpoint()
{
if (total_size_ > 0) {
checkpoints_.push_back(total_size_ - 1);
checkpoints_.push_back(total_size_); //- 1);
} else {
checkpoints_.push_back(0);
}

View File

@@ -0,0 +1,80 @@
#ifndef DIFFERENTIABLE_OPT_UTILITIES_HPP
#define DIFFERENTIABLE_OPT_UTILITIES_HPP
#include <cmath>
#include <random>
#include <type_traits>
#include <vector>
#include <boost/random/mersenne_twister.hpp>
#include <boost/random/uniform_real_distribution.hpp>
#include <vector>
namespace boost {
namespace math {
namespace optimization {
template<typename UpdPol>
struct update_policy_real_type;
template<typename UpdPol>
struct update_policy_real_type;
template<template<typename> class UpdPol, typename RealType>
struct update_policy_real_type<UpdPol<RealType>>
{
using type = RealType;
};
template<typename UpdPol>
using update_policy_real_type_t =
typename update_policy_real_type<typename std::decay<UpdPol>::type>::type;
template<class RealType>
RealType gradient_norm2(const std::vector<std::reference_wrapper<RealType>>& g)
{
/* @brief computes 2 norm of a vector of reference wrapped RealTypes
*/
RealType sum = RealType(0);
for (auto& gi : g) {
RealType val = gi.get();
sum += val * val;
}
return sqrt(sum);
}
template<class RealType>
RealType gradient_norm1(const std::vector<std::reference_wrapper<RealType>>& g)
{
/* @brief computes 2 norm of a vector of reference wrapped RealTypes
*/
RealType sum = RealType(0);
for (auto& gi : g) {
RealType val = gi.get();
sum += abs(val);
}
return sqrt(sum);
}
template<typename RealType>
std::vector<RealType> random_vector(size_t n)
{
/*@brief> generates a random std::vector<RealType> of size n
* using mt19937 algorithm
*/
/* TODO: these may need to be marked thread local
* in the future
*
* TODO: benchmark.
*/
static boost::random::mt19937 rng{std::random_device{}()};
static boost::random::uniform_real_distribution<RealType> dist(0.0, 1.0);
std::vector<RealType> result(n);
std::generate(result.begin(), result.end(), [&] { return dist(rng); });
return result;
}
} // namespace optimization
} // namespace math
} // namespace boost
#endif

View File

@@ -0,0 +1,72 @@
#ifndef GRADIENT_OPT_BASE_HPP
#define GRADIENT_OPT_BASE_HPP
#include <boost/math/differentiation/autodiff_reverse.hpp>
namespace boost {
namespace math {
namespace optimization {
namespace rdiff = boost::math::differentiation::reverse_mode;
template<typename ArgumentContainer,
typename RealType,
class Objective,
class InitializationPolicy,
class ObjectiveEvalPolicy,
class GradEvalPolicy,
class UpdatePolicy,
typename DerivedOptimizer>
class abstract_optimizer
{
private:
Objective objective_; // obj function
ArgumentContainer& x_; // arguments to objective function
std::vector<std::reference_wrapper<RealType>> g_; // container of references to gradients
ObjectiveEvalPolicy obj_eval_; // how to evaluate your funciton
GradEvalPolicy grad_eval_; // how to evaluate/bind gradients
InitializationPolicy init_; // how to initialize the problem
UpdatePolicy update_; // update step
RealType obj_v_; // objective value (for history)
// access derived class
DerivedOptimizer& derived() { return static_cast<DerivedOptimizer&>(*this); }
const DerivedOptimizer& derived() const { return static_cast<const DerivedOptimizer&>(*this); }
public:
using argument_container_t = ArgumentContainer;
using real_type_t = RealType;
abstract_optimizer(Objective&& objective,
ArgumentContainer& x,
InitializationPolicy&& ip,
ObjectiveEvalPolicy&& oep,
GradEvalPolicy&& gep,
UpdatePolicy&& up)
: objective_(std::forward<Objective>(objective))
, x_(x)
, obj_eval_(std::forward<ObjectiveEvalPolicy>(oep))
, grad_eval_(std::forward<GradEvalPolicy>(gep))
, init_(std::forward<InitializationPolicy>(ip))
, update_(std::forward<UpdatePolicy>(up))
{
init_(x_); // initialize your problem
grad_eval_.bind(x_, g_); // bind gradients to g_ container
}
void step()
{
grad_eval_(objective_, x_, obj_eval_, obj_v_);
for (size_t i = 0; i < x_.size(); ++i) {
update_(x_[i], g_[i].get());
}
};
ArgumentContainer& arguments() { return derived().x_; }
const ArgumentContainer& arguments() const { return derived().x_; }
RealType& objective_value() { return derived().obj_v_; }
const RealType& objective_value() const { return derived().obj_v_; }
std::vector<std::reference_wrapper<RealType>>& gradients() { return derived().g_; }
const std::vector<std::reference_wrapper<RealType>>& gradients() const { return derived().g_; }
};
} // namespace optimization
} // namespace math
} // namespace boost
#endif

View File

@@ -0,0 +1,140 @@
#ifndef RDIFF_OPTIMIZATION_POLICIES_HPP__
#define RDIFF_OPTIMIZATION_POLICIES_HPP__
#include <boost/math/differentiation/autodiff_reverse.hpp>
#include <boost/random.hpp>
#include <random>
namespace boost {
namespace math {
namespace optimization {
namespace rdiff = boost::math::differentiation::reverse_mode;
/******************************************************************
* @brief> function evaluation policy for reverse mode autodiff
* @arg objective> objective function to evaluate
* @arg x> argument list
*/
template<typename RealType>
struct reverse_mode_function_eval_policy
{
template<typename Objective, class ArgumentContainer>
rdiff::rvar<RealType, 1> operator()(Objective &&objective, ArgumentContainer &x)
{
auto &tape = rdiff::get_active_tape<RealType, 1>();
tape.zero_grad();
tape.rewind_to_last_checkpoint();
return objective(x);
}
};
/******************************************************************
* @brief> gradient evaluation policy
* @arg obj_f> objective
* @arg x> argument list
* @arg f_eval_olicy> funciton evaluation policy. These need to be
* done in tandem
* @arg obj_v> reference to variable inside gradient class
*/
template<typename RealType>
struct reverse_mode_gradient_evaluation_policy
{
template<typename ArgumentContainer>
void bind(ArgumentContainer &x, std::vector<std::reference_wrapper<RealType>> &g)
{
g.reserve(x.size());
for (auto &xi : x) {
g.push_back(std::ref(xi.adjoint()));
}
}
template<class Objective, class ArgumentContainer, class FunctionEvaluationPolicy>
void operator()(Objective &&obj_f,
ArgumentContainer &x,
FunctionEvaluationPolicy &&f_eval_pol,
RealType &obj_v)
{
// compute objective via eval policy that takes care of tape
rdiff::rvar<RealType, 1> v = f_eval_pol(obj_f, x);
v.backward();
obj_v = v.item();
}
};
/*************************************************************************
* update policy
*/
template<typename RealType>
struct nesterov_update_policy
{
RealType lr_;
RealType mu_;
nesterov_update_policy(RealType lr, RealType mu)
: lr_(lr)
, mu_(mu)
{}
void operator()(rdiff::rvar<RealType, 1> &x) {}
};
/******************************************************************
* init policies
*/
template<typename RealType>
struct tape_initializer_rvar
{
template<class ArgumentContainer>
void operator()(ArgumentContainer &) const noexcept
{
static_assert(std::is_same<typename ArgumentContainer::value_type,
rdiff::rvar<RealType, 1> >::value,
"ArgumentContainer::value_type must be rdiff::rvar<RealType,1>");
auto &tape = rdiff::get_active_tape<RealType, 1>();
tape.add_checkpoint();
}
};
template<typename RealType>
struct random_uniform_initializer_rvar
{
RealType low_, high_;
size_t seed_;
random_uniform_initializer_rvar(RealType low = 0,
RealType high = 1,
size_t seed = std::random_device{}())
: low_(low)
, high_(high)
, seed_(seed){};
template<class ArgumentContainer>
void operator()(ArgumentContainer &x) const
{
boost::random::mt19937 gen(seed_);
boost::random::uniform_real_distribution<RealType> dist(low_, high_);
for (auto &xi : x) {
xi = rdiff::rvar<RealType, 1>(dist(gen));
}
auto &tape = rdiff::get_active_tape<RealType, 1>();
tape.add_checkpoint();
}
};
template<typename RealType>
struct costant_initializer_rvar
{
RealType constant;
explicit costant_initializer_rvar(RealType v = 0)
: constant(v){};
template<class ArgumentContainer>
void operator()(ArgumentContainer &x) const
{
for (auto &xi : x) {
xi = rdiff::rvar<RealType, 1>(constant);
}
auto &tape = rdiff::get_active_tape<RealType, 1>();
tape.add_checkpoint();
}
};
} // namespace optimization
} // namespace math
} // namespace boost
#endif

View File

@@ -0,0 +1,143 @@
#ifndef GRADIENT_DESCENT_HPP
#define GRADIENT_DESCENT_HPP
#include <boost/math/optimization/detail/differentiable_opt_utilties.hpp>
#include <boost/math/optimization/detail/gradient_opt_base.hpp>
#include <boost/math/optimization/detail/rdiff_optimization_policies.hpp>
#include <functional>
#include <vector>
namespace rdiff = boost::math::differentiation::reverse_mode;
namespace boost {
namespace math {
namespace optimization {
template<typename RealType>
struct gradient_descent_update_policy
{
RealType lr_;
gradient_descent_update_policy(RealType lr)
: lr_(lr){};
template<typename ArgumentType,
typename = typename std::enable_if<boost::math::differentiation::reverse_mode::detail::
is_expression<ArgumentType>::value>::type>
void operator()(ArgumentType &x, RealType &g)
{
// this update effectively "mutes" the tape
// TODO: add a tape scope guard method so that
// you can do math on autodiff types without
// accumulating gradients
x.get_value() -= lr_ * g;
}
template<
typename ArgumentType,
typename std::enable_if<
!boost::math::differentiation::reverse_mode::detail::is_expression<ArgumentType>::value,
int>::type
= 0>
void operator()(ArgumentType &x, RealType &g) const
{
x -= lr_ * g;
}
};
template<typename ArgumentContainer,
typename RealType,
class Objective,
class InitializationPolicy,
class ObjectiveEvalPolicy,
class GradEvalPolicy>
class gradient_descent : public abstract_optimizer<ArgumentContainer,
RealType,
Objective,
InitializationPolicy,
ObjectiveEvalPolicy,
GradEvalPolicy,
gradient_descent_update_policy<RealType>,
gradient_descent<ArgumentContainer,
RealType,
Objective,
InitializationPolicy,
ObjectiveEvalPolicy,
GradEvalPolicy>>
{
using base_opt = abstract_optimizer<ArgumentContainer,
RealType,
Objective,
InitializationPolicy,
ObjectiveEvalPolicy,
GradEvalPolicy,
gradient_descent_update_policy<RealType>,
gradient_descent<ArgumentContainer,
RealType,
Objective,
InitializationPolicy,
ObjectiveEvalPolicy,
GradEvalPolicy>>;
public:
using base_opt::base_opt;
};
template<class Objective, typename ArgumentContainer, typename RealType>
auto make_gradient_descent(Objective &&obj, ArgumentContainer &x, RealType lr = RealType{0.01})
{
return gradient_descent<ArgumentContainer,
RealType,
Objective,
tape_initializer_rvar<RealType>,
reverse_mode_function_eval_policy<RealType>,
reverse_mode_gradient_evaluation_policy<RealType>>(
std::forward<Objective>(obj),
x,
tape_initializer_rvar<RealType>{},
reverse_mode_function_eval_policy<RealType>{},
reverse_mode_gradient_evaluation_policy<RealType>{},
gradient_descent_update_policy<RealType>(lr));
}
template<class Objective, typename ArgumentContainer, typename RealType, class InitializationPolicy>
auto make_gradient_descent(Objective &&obj,
ArgumentContainer &x,
RealType lr,
InitializationPolicy &&ip)
{
return gradient_descent<ArgumentContainer,
RealType,
Objective,
InitializationPolicy,
reverse_mode_function_eval_policy<RealType>,
reverse_mode_gradient_evaluation_policy<RealType>>(
std::forward<Objective>(obj),
x,
std::forward<InitializationPolicy>(ip),
reverse_mode_function_eval_policy<RealType>{},
reverse_mode_gradient_evaluation_policy<RealType>{},
gradient_descent_update_policy<RealType>(lr));
}
template<typename ArgumentContainer,
typename RealType,
class Objective,
class InitializationPolicy,
class ObjectiveEvalPolicy,
class GradEvalPolicy>
auto make_gradient_descent(Objective &&obj,
ArgumentContainer &x,
RealType &lr,
InitializationPolicy &&ip,
ObjectiveEvalPolicy &&oep,
GradEvalPolicy &&gep)
{
return gradient_descent<ArgumentContainer,
RealType,
Objective,
InitializationPolicy,
ObjectiveEvalPolicy,
GradEvalPolicy>(std::forward<Objective>(obj),
x,
std::forward<InitializationPolicy>(ip),
std::forward<ObjectiveEvalPolicy>(oep),
std::forward<GradEvalPolicy>(gep),
gradient_descent_update_policy<RealType>{lr});
}
} // namespace optimization
} // namespace math
} // namespace boost
#endif

View File

@@ -0,0 +1,12 @@
#ifndef GRADIENT_OPTIMIZERS_HPP
#define GRADIENT_OPTIMIZERS_HPP
#include <boost/math/differentiation/autodiff_reverse.hpp>
#include <boost/math/optimization/gradient_descent.hpp>
namespace boost {
namespace math {
namespace optimization {
} // namespace optimization
} // namespace math
} // namespace boost
#endif

View File

@@ -0,0 +1,321 @@
#ifndef MINIMIZER_HPP
#define MINIMIZER_HPP
#include <boost/math/optimization/detail/differentiable_opt_utilties.hpp>
#include <boost/math/optimization/gradient_optimizers.hpp>
#include <vector>
namespace boost {
namespace math {
namespace optimization {
template<typename RealType>
struct optimization_result
{
size_t num_iter = 0;
RealType objective_value;
std::vector<RealType> objective_history;
bool converged;
};
template<typename RealType>
std::ostream& operator<<(std::ostream& os, const optimization_result<RealType>& r)
{
os << "optimization_result {\n"
<< " num_iter = " << r.num_iter << "\n"
<< " objective_value = " << r.objective_value << "\n"
<< " converged = " << std::boolalpha << r.converged << "\n"
<< " objective_history = [";
for (std::size_t i = 0; i < r.objective_history.size(); ++i) {
os << r.objective_history[i];
if (i + 1 < r.objective_history.size()) {
os << ", ";
}
}
os << "]\n}\n";
return os;
}
/*****************************************************************************************/
template<typename RealType>
struct gradient_norm_convergence_policy
{
RealType tol_;
explicit gradient_norm_convergence_policy(RealType tol)
: tol_(tol)
{}
template<class GradientContainer>
bool operator()(const GradientContainer& g, RealType /*objective_v*/) const
{
return gradient_norm2(g) < tol_;
}
};
template<typename RealType>
struct objective_tol_convergence_policy
{
RealType tol_;
mutable RealType last_value_;
mutable bool first_call_;
explicit objective_tol_convergence_policy(RealType tol)
: tol_(tol)
, last_value_(0)
, first_call_(true)
{}
template<class GradientContainer>
bool operator()(const GradientContainer&, RealType objective_v) const
{
if (first_call_) {
last_value_ = objective_v;
first_call_ = false;
return false;
}
RealType diff = abs(objective_v - last_value_);
last_value_ = objective_v;
return diff < tol_;
}
};
template<typename RealType>
struct relative_objective_tol_policy
{
RealType rel_tol_;
mutable RealType last_value_;
mutable bool first_call_;
explicit relative_objective_tol_policy(RealType rel_tol)
: rel_tol_(rel_tol)
, last_value_(0)
, first_call_(true)
{}
template<class GradientContainer>
bool operator()(const GradientContainer&, RealType objective_v) const
{
if (first_call_) {
last_value_ = objective_v;
first_call_ = false;
return false;
}
RealType denom = std::max<RealType>(1, std::abs(last_value_));
RealType rel_diff = std::abs(objective_v - last_value_) / denom;
last_value_ = objective_v;
return rel_diff < rel_tol_;
}
};
template<class Policy1, class Policy2>
struct combined_convergence_policy
{
Policy1 p1_;
Policy2 p2_;
combined_convergence_policy(Policy1 p1, Policy2 p2)
: p1_(p1)
, p2_(p2)
{}
template<class GradientContainer, class RealType>
bool operator()(const GradientContainer& g, RealType obj) const
{
return p1_(g, obj) || p2_(g, obj);
}
};
/*****************************************************************************************/
struct max_iter_termination_policy
{
size_t max_iter_;
max_iter_termination_policy(size_t max_iter)
: max_iter_(max_iter){};
bool operator()(size_t iter)
{
if (iter < max_iter_) {
return false;
}
return true;
}
};
struct wallclock_termination_policy
{
std::chrono::steady_clock::time_point start_;
std::chrono::milliseconds max_time_;
explicit wallclock_termination_policy(std::chrono::milliseconds max_time)
: start_(std::chrono::steady_clock::now())
, max_time_(max_time)
{}
bool operator()(size_t /*iter*/) const
{
return std::chrono::steady_clock::now() - start_ > max_time_;
}
};
/*****************************************************************************************/
template<typename ArgumentContainer>
struct unconstrained_policy
{
void operator()(ArgumentContainer&) {}
};
template<typename ArgumentContainer, typename RealType>
struct box_constraints
{
RealType min_, max_;
box_constraints(RealType min, RealType max)
: min_(min)
, max_(max){};
void operator()(ArgumentContainer& x)
{
for (auto& xi : x) {
xi = (xi < min_) ? min_ : (max_ < xi) ? max_ : xi;
}
}
};
template<typename ArgumentContainer, typename RealType>
struct nonnegativity_constraint
{
void operator()(ArgumentContainer& x) const
{
for (auto& xi : x) {
if (xi < RealType{0})
xi = RealType{0};
}
}
};
template<typename ArgumentContainer, typename RealType>
struct l2_ball_constraint
{
RealType radius_;
explicit l2_ball_constraint(RealType radius)
: radius_(radius)
{}
void operator()(ArgumentContainer& x) const
{
RealType norm2 = RealType{0};
for (auto& xi : x)
norm2 += xi * xi;
RealType norm = std::sqrt(norm2);
if (norm > radius_) {
RealType scale = radius_ / norm;
for (auto& xi : x)
xi *= scale;
}
}
};
template<typename ArgumentContainer, typename RealType>
struct l1_ball_constraint
{
RealType radius_;
explicit l1_ball_constraint(RealType radius)
: radius_(radius)
{}
void operator()(ArgumentContainer& x) const
{
RealType norm1 = RealType{0};
for (auto& xi : x)
norm1 += std::abs(xi);
if (norm1 > radius_) {
RealType scale = radius_ / norm1;
for (auto& xi : x)
xi *= scale;
}
}
};
template<typename ArgumentContainer, typename RealType>
struct simplex_constraint
{
void operator()(ArgumentContainer& x) const
{
RealType sum = RealType{0};
for (auto& xi : x) {
if (xi < RealType{0})
xi = RealType{0}; // clip negatives
sum += xi;
}
if (sum > RealType{0}) {
for (auto& xi : x)
xi /= sum;
}
}
};
template<typename ArgumentContainer>
struct function_constraint
{
using func_t = void (*)(ArgumentContainer&);
func_t f_;
explicit function_constraint(func_t f)
: f_(f)
{}
void operator()(ArgumentContainer& x) const { f_(x); }
};
template<typename ArgumentContainer, typename RealType>
struct unit_sphere_constraint
{
void operator()(ArgumentContainer& x) const
{
RealType norm2 = RealType{0};
for (auto& xi : x)
norm2 += xi * xi;
RealType norm = std::sqrt(norm2);
if (norm > RealType{0}) {
for (auto& xi : x)
xi /= norm;
}
}
};
/*****************************************************************************************/
template<class Optimizer, class ConstraintPolicy, class ConvergencePolicy, class TerminationPolicy>
auto minimize_impl(Optimizer& opt,
ConstraintPolicy project,
ConvergencePolicy converged,
TerminationPolicy terminate,
bool history)
{
optimization_result<typename Optimizer::real_type_t> result;
size_t iter = 0;
do {
opt.step();
project(opt.arguments());
++iter;
if (history) {
result.objective_history.push_back(opt.objective_value());
}
} while (!converged(opt.gradients(), opt.objective_value()) && !terminate(iter));
result.num_iter = iter;
result.objective_value = opt.objective_value();
result.converged = converged(opt.gradients(), opt.objective_value());
return result;
}
template<class Optimizer,
class ConstraintPolicy = unconstrained_policy<typename Optimizer::argument_container_t>,
class ConvergencePolicy = gradient_norm_convergence_policy<typename Optimizer::real_type_t>,
class TerminationPolicy = max_iter_termination_policy>
auto minimize(Optimizer& opt,
ConstraintPolicy project = ConstraintPolicy{},
ConvergencePolicy converged
= ConvergencePolicy{static_cast<typename Optimizer::real_type_t>(1e-5)},
TerminationPolicy terminate = TerminationPolicy(10000),
bool history = false)
{
return minimize_impl(opt, project, converged, terminate, history);
}
} // namespace optimization
} // namespace math
} // namespace boost
#endif

View File

@@ -0,0 +1,15 @@
#ifndef NESTEROV_HPP
#define NESTEROV_HPP
#include <boost/math/optimization/detail/differentiable_opt_utilties.hpp>
#include <boost/math/optimization/detail/gradient_opt_base.hpp>
#include <functional>
#include <vector>
namespace boost {
namespace math {
namespace optimization {
} // namespace optimization
} // namespace math
} // namespace boost
#endif // NESTEROV_HPP

View File

@@ -12,6 +12,16 @@
#include <array>
#include <vector>
/* simple n-d quadratic function */
template<typename RealType>
RealType quadratic(std::vector<RealType> &x)
{
RealType res{0.0};
for (auto &item : x) {
res += item * item;
}
return res;
}
// Taken from: https://en.wikipedia.org/wiki/Test_functions_for_optimization
template <typename Real> Real ackley(std::array<Real, 2> const &v) {
using std::sqrt;

View File

@@ -0,0 +1,328 @@
#include "test_autodiff_reverse.hpp" // reuse for some basic options
#include "test_functions_for_optimization.hpp"
#include <boost/math/differentiation/autodiff_reverse.hpp>
#include <boost/math/optimization/gradient_descent.hpp>
#include <boost/math/optimization/minimizer.hpp>
namespace rdiff = boost::math::differentiation::reverse_mode;
namespace bopt = boost::math::optimization;
BOOST_AUTO_TEST_SUITE(basic_gradient_descent)
BOOST_AUTO_TEST_CASE_TEMPLATE(default_gd_test, T, all_float_types)
{
size_t NITER = 2000;
size_t N = 15;
T lr = T{1e-2};
RandomSample<T> rng{T(-100), (100)};
std::vector<rdiff::rvar<T, 1>> x_ad;
T eps = T{1e-3};
for (size_t i = 0; i < N; ++i) {
x_ad.push_back(rng.next());
}
auto gdopt = bopt::make_gradient_descent(&quadratic<rdiff::rvar<T, 1>>, x_ad, lr);
for (size_t i = 0; i < NITER; ++i) {
gdopt.step();
}
for (auto& x : x_ad) {
BOOST_REQUIRE_SMALL(x.item(), eps);
}
}
BOOST_AUTO_TEST_CASE_TEMPLATE(test_minimize, T, all_float_types)
{
size_t NITER = 2000;
size_t N = 15;
T lr = T{1e-2};
RandomSample<T> rng{T(-100), (100)};
std::vector<rdiff::rvar<T, 1>> x_ad;
T eps = T{1e-3};
for (size_t i = 0; i < N; ++i) {
x_ad.push_back(rng.next());
}
auto gdopt = bopt::make_gradient_descent(&quadratic<rdiff::rvar<T, 1>>, x_ad, lr);
auto z = minimize(gdopt);
for (auto& x : x_ad) {
BOOST_REQUIRE_SMALL(x.item(), eps);
}
}
BOOST_AUTO_TEST_CASE_TEMPLATE(random_initializer_test, T, all_float_types)
{
size_t N = 10;
T lr = T{1e-2};
std::vector<rdiff::rvar<T, 1>> x(N);
auto gdopt = bopt::make_gradient_descent(
&quadratic<rdiff::rvar<T, 1>>,
x,
lr,
bopt::random_uniform_initializer_rvar<T>(-2.0, 2.0, 1234)); // all initialized to 5
for (auto& xi : x) {
T v = xi.item();
BOOST_TEST(v >= -2);
BOOST_TEST(v <= 2);
}
gdopt.step();
}
BOOST_AUTO_TEST_CASE_TEMPLATE(const_initializer_test, T, all_float_types)
{
size_t N = 10;
T lr = T{1e-2};
std::vector<rdiff::rvar<T, 1>> x(N);
auto gdopt = bopt::make_gradient_descent(&quadratic<rdiff::rvar<T, 1>>,
x,
lr,
bopt::costant_initializer_rvar<T>(
T{5.0})); // all initialized to 5
for (auto& xi : x) {
T v = xi.item();
BOOST_REQUIRE_CLOSE(v, T{5.0}, T{1e-3});
}
gdopt.step();
}
BOOST_AUTO_TEST_CASE_TEMPLATE(box_constraint_test, T, all_float_types)
{
size_t N = 5;
T lr = T{1e-2};
std::vector<rdiff::rvar<T, 1>> x(N, T{10});
auto gdopt = bopt::make_gradient_descent(&quadratic<rdiff::rvar<T, 1>>, x, lr);
auto res = bopt::minimize(gdopt,
bopt::box_constraints<std::vector<rdiff::rvar<T, 1>>, T>(-1.0, 1.0));
for (auto& xi : x) {
BOOST_TEST(xi.item() >= -1.0);
BOOST_TEST(xi.item() <= 1.0);
}
}
BOOST_AUTO_TEST_CASE_TEMPLATE(max_iter_test, T, all_float_types)
{
size_t N = 2;
T lr = T{1e-6}; // very slow learning
std::vector<rdiff::rvar<T, 1>> x = {T{5}, T{5}};
auto gdopt = bopt::make_gradient_descent(&quadratic<rdiff::rvar<T, 1>>, x, lr);
size_t max_iter = 50;
auto res = bopt::minimize(gdopt,
bopt::unconstrained_policy<std::vector<rdiff::rvar<T, 1>>>{},
bopt::gradient_norm_convergence_policy<T>(T{1e-20}),
bopt::max_iter_termination_policy(max_iter));
BOOST_TEST(!res.converged); // should not converge with tiny lr
BOOST_REQUIRE_EQUAL(res.num_iter, max_iter);
}
BOOST_AUTO_TEST_CASE_TEMPLATE(history_tracking_test, T, all_float_types)
{
size_t N = 3;
T lr = T{1e-2};
std::vector<rdiff::rvar<T, 1>> x = {T{3}, T{-4}, T{5}};
auto gdopt = bopt::make_gradient_descent(&quadratic<rdiff::rvar<T, 1>>, x, lr);
auto res = bopt::minimize(gdopt,
bopt::unconstrained_policy<std::vector<rdiff::rvar<T, 1>>>{},
bopt::gradient_norm_convergence_policy<T>(T{1e-6}),
bopt::max_iter_termination_policy(1000),
true); // enable history
BOOST_TEST(!res.objective_history.empty());
BOOST_TEST(res.objective_history.front() > res.objective_history.back());
}
BOOST_AUTO_TEST_CASE_TEMPLATE(rosenbrock_test, T, all_float_types)
{
std::array<rdiff::rvar<T, 1>, 2> x = {T{-1.2}, T{1.0}}; // bad start
T lr = T{1e-3};
auto gdopt = bopt::make_gradient_descent(&rosenbrock_saddle<rdiff::rvar<T, 1>>, x, lr);
auto res = bopt::minimize(gdopt,
bopt::unconstrained_policy<std::array<rdiff::rvar<T, 1>, 2>>{},
bopt::gradient_norm_convergence_policy<T>(T{1e-4}),
bopt::max_iter_termination_policy(50000));
BOOST_TEST(res.converged);
BOOST_REQUIRE_CLOSE(x[0].item(), T{1.0}, T{1e-1});
BOOST_REQUIRE_CLOSE(x[1].item(), T{1.0}, T{1e-1});
}
BOOST_AUTO_TEST_CASE(objective_tol_convergence_test)
{
using policy_t = bopt::objective_tol_convergence_policy<double>;
policy_t pol(1e-3);
std::vector<double> dummy_grad;
BOOST_TEST(!pol(dummy_grad, 100.0));
BOOST_TEST(!pol(dummy_grad, 99.0));
BOOST_TEST(pol(dummy_grad, 99.0005));
}
BOOST_AUTO_TEST_CASE(relative_objective_tol_test)
{
using policy_t = bopt::relative_objective_tol_policy<double>;
policy_t pol(1e-3);
std::vector<double> dummy_grad;
BOOST_TEST(!pol(dummy_grad, 1000.0));
BOOST_TEST(!pol(dummy_grad, 1010.0));
BOOST_TEST(pol(dummy_grad, 1010.5));
}
BOOST_AUTO_TEST_CASE(combined_policy_test)
{
using pol_abs = bopt::objective_tol_convergence_policy<double>;
using pol_rel = bopt::relative_objective_tol_policy<double>;
using pol_comb = bopt::combined_convergence_policy<pol_abs, pol_rel>;
pol_abs abs_pol(1e-6);
pol_rel rel_pol(1e-3);
pol_comb comb(abs_pol, rel_pol);
std::vector<double> dummy_grad;
BOOST_TEST(!comb(dummy_grad, 100.0));
BOOST_TEST(!comb(dummy_grad, 110.0));
BOOST_TEST(comb(dummy_grad, 110.1));
BOOST_TEST(comb(dummy_grad, 110.1000001));
}
BOOST_AUTO_TEST_CASE(nonnegativity_constraint_test)
{
std::vector<double> x = {1.0, -2.0, 3.0, -4.0};
bopt::nonnegativity_constraint<std::vector<double>, double> proj;
proj(x);
for (auto& xi : x)
BOOST_TEST(xi >= 0.0);
BOOST_TEST(x == std::vector<double>({1.0, 0.0, 3.0, 0.0}));
}
BOOST_AUTO_TEST_CASE(l2_ball_constraint_test)
{
std::vector<double> x = {3.0, 4.0}; // norm = 5
bopt::l2_ball_constraint<std::vector<double>, double> proj(1.0);
proj(x);
double norm = std::sqrt(x[0] * x[0] + x[1] * x[1]);
BOOST_TEST(std::abs(norm - 1.0) < 1e-12); // projected to unit circle
}
BOOST_AUTO_TEST_CASE(l1_ball_constraint_test)
{
std::vector<double> x = {3.0, 4.0}; // L1 norm = 7
bopt::l1_ball_constraint<std::vector<double>, double> proj(2.0);
proj(x);
double norm1 = std::abs(x[0]) + std::abs(x[1]);
BOOST_TEST(std::abs(norm1 - 2.0) < 1e-12);
}
BOOST_AUTO_TEST_CASE(simplex_constraint_test)
{
std::vector<double> x = {-1.0, 2.0, 3.0}; // has negative and sum != 1
bopt::simplex_constraint<std::vector<double>, double> proj;
proj(x);
double sum = 0.0;
for (auto& xi : x) {
BOOST_TEST(xi >= 0.0); // all nonnegative
sum += xi;
}
BOOST_TEST(std::abs(sum - 1.0) < 1e-12); // normalized to sum=1
}
BOOST_AUTO_TEST_CASE(unit_sphere_constraint_test)
{
std::vector<double> x = {0.3, 0.4}; // norm = 0.5
bopt::unit_sphere_constraint<std::vector<double>, double> proj;
proj(x);
double norm = std::sqrt(x[0] * x[0] + x[1] * x[1]);
BOOST_TEST(std::abs(norm - 1.0) < 1e-12); // always projected to sphere
}
BOOST_AUTO_TEST_CASE(function_constraint_test)
{
auto clip_to_half = [](std::vector<double>& v) {
for (auto& xi : v)
if (xi > 0.5)
xi = 0.5;
};
bopt::function_constraint<std::vector<double>> proj(clip_to_half);
std::vector<double> x = {0.2, 0.7, 1.5};
proj(x);
BOOST_TEST(x == std::vector<double>({0.2, 0.5, 0.5}));
}
template<typename RealType>
struct no_init_policy
{
void operator()(std::vector<RealType>& x) const noexcept {}
};
template<typename RealType>
struct analytic_objective_eval_pol
{
template<typename Objective, typename ArgumentContainer>
RealType operator()(Objective&& objective, ArgumentContainer& x)
{
return objective(x);
}
};
template<typename RealType>
struct analytic_gradient_eval_pol
{
std::vector<RealType> grad_container;
template<typename ArgumentContainer>
void bind(ArgumentContainer& x, std::vector<std::reference_wrapper<RealType>>& g)
{
grad_container.resize(x.size(), RealType{0.0});
g.clear();
g.reserve(x.size());
for (size_t i = 0; i < x.size(); ++i) {
g.push_back(std::ref(grad_container[i]));
}
}
template<class Objective, class ArgumentContainer, class FunctionEvaluationPolicy>
void operator()(Objective&& obj_f,
ArgumentContainer& x,
FunctionEvaluationPolicy&& f_eval_pol,
RealType& obj_v)
{
// compute objective via eval policy that takes care of tape
RealType v = f_eval_pol(obj_f, x);
obj_v = v;
for (size_t i = 0; i < x.size(); ++i) {
grad_container[i] = 2 * x[i];
}
}
};
BOOST_AUTO_TEST_CASE_TEMPLATE(analytic_derivative_policies, T, all_float_types)
{
std::vector<T> x;
size_t NITER = 2000;
size_t N = 15;
T lr = T{1e-2};
RandomSample<T> rng{T(-100), (100)};
T eps = T{1e-3};
for (size_t i = 0; i < N; ++i) {
x.push_back(rng.next());
}
auto gdopt = bopt::make_gradient_descent(&quadratic<T>,
x,
lr,
no_init_policy<T>{},
analytic_objective_eval_pol<T>{},
analytic_gradient_eval_pol<T>{});
for (size_t i = 0; i < NITER; ++i) {
gdopt.step();
}
for (auto& xi : x) {
BOOST_REQUIRE_SMALL(xi, eps);
}
}
BOOST_AUTO_TEST_SUITE_END()