diff --git a/doc/html/math_toolkit/gd_opt.html b/doc/html/math_toolkit/gd_opt.html new file mode 100644 index 000000000..58f10c8ff --- /dev/null +++ b/doc/html/math_toolkit/gd_opt.html @@ -0,0 +1,59 @@ + + + +Gradient Based Optimizers + + + + + + + + + + + + + + + + +
Boost C++ LibrariesHomeLibrariesPeopleFAQMore
+
+
+PrevUpHomeNext +
+
+

+Gradient Based Optimizers +

+
+
Introduction
+
Gradient Desccent
+
Nesterov Gradient Desccent
+
L-BFGS
+
+

+ Gradient based optimizers are algorithms that use the gradient of a funciton + to iteratively find locally extreme points of functions over a set of parameters. + This sections provides a description of a set of gradient optimizers. The optimizers + are written with boost::math::differentiation::reverse_mode::rvar in + mind, however if a way to evaluate the funciton and its gradient is provided, + the optimizers should work in exactly the same way. +

+
+ +
+
+PrevUpHomeNext +
+ + diff --git a/doc/html/math_toolkit/gd_opt/gradient_descent.html b/doc/html/math_toolkit/gd_opt/gradient_descent.html new file mode 100644 index 000000000..416a19d1e --- /dev/null +++ b/doc/html/math_toolkit/gd_opt/gradient_descent.html @@ -0,0 +1,129 @@ + + + +Gradient Desccent + + + + + + + + + + + + + + + + +
Boost C++ LibrariesHomeLibrariesPeopleFAQMore
+
+
+PrevUpHomeNext +
+
+

+Gradient Desccent +

+
+ + Synopsis +
+
#include <boost/math/optimization/gradient_descent.hpp>
+
+template<typename ArgumentContainer,
+         typename RealType,
+     class Objective,
+     class InitializationPolicy,
+     class ObjectiveEvalPolicy,
+     class GradEvalPolicy>
+class gradient_descent {
+  public:
+    void step();
+}
+
+/* Convenience overloads  */
+/* make gradient descent by providing
+ ** objective function
+ ** variables to optimize over
+ ** optionally learing rate
+ *
+ * requires that code is written using boost::math::differentiation::rvar
+ */
+template<class Objective, typename ArgumentContainer, typename RealType>
+auto make_gradient_descent(Objective&& obj, ArgumentContainer& x, RealType lr = RealType{ 0.01 });
+
+/* make gradient descent by providing
+ * objective function
+ ** variables to optimize over
+ ** learning rate (not optional)
+ ** initialization policy
+ *
+ * requires that code is written using boost::math::differentiation::rvar
+ */
+
+template<class Objective, typename ArgumentContainer, typename RealType, class InitializationPolicy>
+    auto make_gradient_descent(Objective&& obj,
+                               ArgumentContainer& x,
+                               RealType lr,
+                               InitializationPolicy&& ip);
+/* make gradient descent by providing
+** objective function
+** variables to optimize over
+** learning rate (not optional)
+** variable initialization policy
+** objective evaluation policy
+** gradient evaluation policy
+*
+* code does not have to use boost::math::differentiation::rvar
+*/
+template<typename ArgumentContainer,
+             typename RealType,
+             class Objective,
+         class InitializationPolicy,
+             class ObjectiveEvalPolicy,
+             class GradEvalPolicy>
+auto make_gradient_descent(Objective&& obj,
+                           ArgumentContainer& x,
+                               RealType& lr,
+                               InitializationPolicy&& ip,
+                               ObjectiveEvalPolicy&& oep,
+                               GradEvalPolicy&& gep)
+
+

+ Gradient descent iteratively updates parameters x + in the direction opposite to the gradient of the objective function (minimizing + the objective). +

+
x[i] -= lr * g[i]
+
+

+ where lr is a user defined + learning rate. For a more complete decription of the theoretical principle + check the wikipedia + page +

+

+ The implementation delegates: - the initialization of differentiable variables + to an initialization policy - objective evaluation to an objective evaluation + policy - the gradient computation to a gradient evaluation policy - the parameter + updates to an update policy +

+
+ +
+
+PrevUpHomeNext +
+ + diff --git a/doc/html/math_toolkit/gd_opt/introduction.html b/doc/html/math_toolkit/gd_opt/introduction.html new file mode 100644 index 000000000..95fbfa23f --- /dev/null +++ b/doc/html/math_toolkit/gd_opt/introduction.html @@ -0,0 +1,43 @@ + + + +Introduction + + + + + + + + + + + + + + + + +
Boost C++ LibrariesHomeLibrariesPeopleFAQMore
+
+
+PrevUpHomeNext +
+

+Introduction +

+ +
+
+PrevUpHomeNext +
+ + diff --git a/doc/html/math_toolkit/gd_opt/lbfgs.html b/doc/html/math_toolkit/gd_opt/lbfgs.html new file mode 100644 index 000000000..f2ae74c3b --- /dev/null +++ b/doc/html/math_toolkit/gd_opt/lbfgs.html @@ -0,0 +1,43 @@ + + + +L-BFGS + + + + + + + + + + + + + + + + +
Boost C++ LibrariesHomeLibrariesPeopleFAQMore
+
+
+PrevUpHomeNext +
+

+L-BFGS +

+ +
+
+PrevUpHomeNext +
+ + diff --git a/doc/html/math_toolkit/gd_opt/nesterov.html b/doc/html/math_toolkit/gd_opt/nesterov.html new file mode 100644 index 000000000..1ba82e55a --- /dev/null +++ b/doc/html/math_toolkit/gd_opt/nesterov.html @@ -0,0 +1,43 @@ + + + +Nesterov Gradient Desccent + + + + + + + + + + + + + + + + +
Boost C++ LibrariesHomeLibrariesPeopleFAQMore
+
+
+PrevUpHomeNext +
+

+Nesterov Gradient Desccent +

+ +
+
+PrevUpHomeNext +
+ + diff --git a/doc/math.qbk b/doc/math.qbk index 1f479b2d4..046720e26 100644 --- a/doc/math.qbk +++ b/doc/math.qbk @@ -739,6 +739,7 @@ and as a CD ISBN 0-9504833-2-X 978-0-9504833-2-0, Classification 519.2-dc22. [include optimization/jso.qbk] [include optimization/random_search.qbk] [include optimization/cma_es.qbk] +[include optimization/gradient_optimizers.qbk] [endmathpart] [/mathpart optimization Optimization] [mathpart poly Polynomials and Rational Functions] diff --git a/doc/optimization/gradient_optimizers.qbk b/doc/optimization/gradient_optimizers.qbk new file mode 100644 index 000000000..bf1120340 --- /dev/null +++ b/doc/optimization/gradient_optimizers.qbk @@ -0,0 +1,101 @@ +[/ +Copyright (c) 2025-2026 Maksym Zhelyeznyakov +Use, modification and distribution are subject to the +Boost Software License, Version 1.0. (See accompanying file +LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +] + +[section:gd_opt Gradient Based Optimizers] + +Gradient based optimizers are algorithms that use the gradient of a funciton to iteratively find locally extreme points of functions over a set of parameters. This sections provides a description of a set of gradient optimizers. The optimizers are written with `boost::math::differentiation::reverse_mode::rvar` in mind, however if a way to evaluate the funciton and its gradient is provided, the optimizers should work in exactly the same way. + +[section:introduction Introduction] +[endsect] [/section:introduction] + +[section:gradient_descent Gradient Desccent] +[heading Synopsis] +`` + #include + + template + class gradient_descent { + public: + void step(); + } + + /* Convenience overloads */ + /* make gradient descent by providing + ** objective function + ** variables to optimize over + ** optionally learing rate + * + * requires that code is written using boost::math::differentiation::rvar + */ + template + auto make_gradient_descent(Objective&& obj, ArgumentContainer& x, RealType lr = RealType{ 0.01 }); + + /* make gradient descent by providing + * objective function + ** variables to optimize over + ** learning rate (not optional) + ** initialization policy + * + * requires that code is written using boost::math::differentiation::rvar + */ + + template + auto make_gradient_descent(Objective&& obj, + ArgumentContainer& x, + RealType lr, + InitializationPolicy&& ip); + /* make gradient descent by providing + ** objective function + ** variables to optimize over + ** learning rate (not optional) + ** variable initialization policy + ** objective evaluation policy + ** gradient evaluation policy + * + * code does not have to use boost::math::differentiation::rvar + */ + template + auto make_gradient_descent(Objective&& obj, + ArgumentContainer& x, + RealType& lr, + InitializationPolicy&& ip, + ObjectiveEvalPolicy&& oep, + GradEvalPolicy&& gep) +`` + +Gradient descent iteratively updates parameters `x` in the direction opposite to the gradient of the objective function (minimizing the objective). +`` +x[i] -= lr * g[i] +`` +where `lr` is a user defined learning rate. For a more complete decription of the theoretical principle check [@https://en.wikipedia.org/wiki/Gradient_descent the wikipedia page] + +The implementation delegates: +- the initialization of differentiable variables to an initialization policy +- objective evaluation to an objective evaluation policy +- the gradient computation to a gradient evaluation policy +- the parameter updates to an update policy + +[endsect] [/section:gradient_descent] + +[section:nesterov Nesterov Gradient Desccent] +[endsect] [/section:nesterov] + +[section:lbfgs L-BFGS] +[endsect] [/section:lbfgs] + + +[endsect] [/section:gd_opt]