mirror of
https://github.com/boostorg/math.git
synced 2026-02-25 16:32:15 +00:00
Merge branch 'boostorg:develop' into non-central-f-inverse
This commit is contained in:
@@ -14,7 +14,6 @@
|
||||
|
||||
#include <cstdint>
|
||||
#include <boost/math/tools/assert.hpp>
|
||||
#include <boost/math/special_functions/detail/fp_traits.hpp>
|
||||
#include <boost/math/ccmath/isnan.hpp>
|
||||
#include <boost/math/ccmath/abs.hpp>
|
||||
|
||||
|
||||
@@ -580,6 +580,8 @@ public:
|
||||
}
|
||||
it->backward();
|
||||
}
|
||||
|
||||
void set_item(RealType value) { value_ = inner_t(value); }
|
||||
};
|
||||
|
||||
template<typename RealType, size_t DerivativeOrder>
|
||||
|
||||
@@ -23,407 +23,435 @@ namespace detail {
|
||||
template<typename allocator_type, size_t buffer_size>
|
||||
class flat_linear_allocator_iterator
|
||||
{
|
||||
/**
|
||||
/**
|
||||
* @brief enables iterating over linear allocator with
|
||||
* c++ iterators
|
||||
*/
|
||||
public:
|
||||
using raw_allocator_type = std::remove_const_t<allocator_type>;
|
||||
using value_type = typename allocator_type::value_type;
|
||||
using pointer = typename allocator_type::value_type *;
|
||||
using const_ptr_type = const value_type *;
|
||||
using reference = typename allocator_type::value_type &;
|
||||
using const_reference_type = const value_type &;
|
||||
using iterator_category = std::random_access_iterator_tag;
|
||||
using difference_type = ptrdiff_t;
|
||||
using raw_allocator_type = std::remove_const_t<allocator_type>;
|
||||
using value_type = typename allocator_type::value_type;
|
||||
using pointer = typename allocator_type::value_type*;
|
||||
using const_ptr_type = const value_type*;
|
||||
using reference = typename allocator_type::value_type&;
|
||||
using const_reference_type = const value_type&;
|
||||
using iterator_category = std::random_access_iterator_tag;
|
||||
using difference_type = ptrdiff_t;
|
||||
|
||||
private:
|
||||
const allocator_type *storage_ = nullptr;
|
||||
size_t index_ = 0;
|
||||
size_t begin_ = 0;
|
||||
size_t end_ = 0;
|
||||
const allocator_type* storage_ = nullptr;
|
||||
size_t index_ = 0;
|
||||
size_t begin_ = 0;
|
||||
size_t end_ = 0;
|
||||
|
||||
public:
|
||||
flat_linear_allocator_iterator() = default;
|
||||
flat_linear_allocator_iterator() = default;
|
||||
|
||||
explicit flat_linear_allocator_iterator(allocator_type *storage, size_t index)
|
||||
: storage_(storage)
|
||||
, index_(index)
|
||||
, begin_(0)
|
||||
, end_(storage->size())
|
||||
{}
|
||||
explicit flat_linear_allocator_iterator(allocator_type* storage, size_t index)
|
||||
: storage_(storage)
|
||||
, index_(index)
|
||||
, begin_(0)
|
||||
, end_(storage->size())
|
||||
{
|
||||
}
|
||||
|
||||
explicit flat_linear_allocator_iterator(allocator_type *storage,
|
||||
size_t index,
|
||||
size_t begin,
|
||||
size_t end)
|
||||
: storage_(storage)
|
||||
, index_(index)
|
||||
, begin_(begin)
|
||||
, end_(end)
|
||||
{}
|
||||
explicit flat_linear_allocator_iterator(allocator_type* storage,
|
||||
size_t index,
|
||||
size_t begin,
|
||||
size_t end)
|
||||
: storage_(storage)
|
||||
, index_(index)
|
||||
, begin_(begin)
|
||||
, end_(end)
|
||||
{
|
||||
}
|
||||
|
||||
explicit flat_linear_allocator_iterator(const allocator_type *storage, size_t index)
|
||||
: storage_(storage)
|
||||
, index_(index)
|
||||
, begin_(0)
|
||||
, end_(storage->size())
|
||||
{}
|
||||
explicit flat_linear_allocator_iterator(const allocator_type* storage,
|
||||
size_t index)
|
||||
: storage_(storage)
|
||||
, index_(index)
|
||||
, begin_(0)
|
||||
, end_(storage->size())
|
||||
{
|
||||
}
|
||||
|
||||
explicit flat_linear_allocator_iterator(const allocator_type *storage,
|
||||
size_t index,
|
||||
size_t begin,
|
||||
size_t end)
|
||||
: storage_(storage)
|
||||
, index_(index)
|
||||
, begin_(begin)
|
||||
, end_(end)
|
||||
{}
|
||||
reference operator*()
|
||||
{
|
||||
BOOST_MATH_ASSERT(index_ >= begin_ && index_ < end_);
|
||||
return (*storage_->data_[index_ / buffer_size])[index_ % buffer_size];
|
||||
}
|
||||
explicit flat_linear_allocator_iterator(const allocator_type* storage,
|
||||
size_t index,
|
||||
size_t begin,
|
||||
size_t end)
|
||||
: storage_(storage)
|
||||
, index_(index)
|
||||
, begin_(begin)
|
||||
, end_(end)
|
||||
{
|
||||
}
|
||||
reference operator*()
|
||||
{
|
||||
BOOST_MATH_ASSERT(index_ >= begin_ && index_ < end_);
|
||||
return (*storage_->data_[index_ / buffer_size])[index_ % buffer_size];
|
||||
}
|
||||
|
||||
const_reference_type operator*() const
|
||||
{
|
||||
BOOST_MATH_ASSERT(index_ >= begin_ && index_ < end_);
|
||||
return (*storage_->data_[index_ / buffer_size])[index_ % buffer_size];
|
||||
}
|
||||
const_reference_type operator*() const
|
||||
{
|
||||
BOOST_MATH_ASSERT(index_ >= begin_ && index_ < end_);
|
||||
return (*storage_->data_[index_ / buffer_size])[index_ % buffer_size];
|
||||
}
|
||||
|
||||
pointer operator->()
|
||||
{
|
||||
BOOST_MATH_ASSERT(index_ >= begin_ && index_ < end_);
|
||||
return &operator*();
|
||||
}
|
||||
pointer operator->()
|
||||
{
|
||||
BOOST_MATH_ASSERT(index_ >= begin_ && index_ < end_);
|
||||
return &operator*();
|
||||
}
|
||||
|
||||
const_ptr_type operator->() const
|
||||
{
|
||||
BOOST_MATH_ASSERT(index_ >= begin_ && index_ < end_);
|
||||
return &operator*();
|
||||
}
|
||||
flat_linear_allocator_iterator &operator++()
|
||||
{
|
||||
++index_;
|
||||
return *this;
|
||||
}
|
||||
const_ptr_type operator->() const
|
||||
{
|
||||
BOOST_MATH_ASSERT(index_ >= begin_ && index_ < end_);
|
||||
return &operator*();
|
||||
}
|
||||
flat_linear_allocator_iterator& operator++()
|
||||
{
|
||||
++index_;
|
||||
return *this;
|
||||
}
|
||||
|
||||
flat_linear_allocator_iterator operator++(int)
|
||||
{
|
||||
auto tmp = *this;
|
||||
++(*this);
|
||||
return tmp;
|
||||
}
|
||||
flat_linear_allocator_iterator operator++(int)
|
||||
{
|
||||
auto tmp = *this;
|
||||
++(*this);
|
||||
return tmp;
|
||||
}
|
||||
|
||||
flat_linear_allocator_iterator &operator--()
|
||||
{
|
||||
--index_;
|
||||
return *this;
|
||||
}
|
||||
flat_linear_allocator_iterator& operator--()
|
||||
{
|
||||
--index_;
|
||||
return *this;
|
||||
}
|
||||
|
||||
flat_linear_allocator_iterator operator--(int)
|
||||
{
|
||||
auto tmp = *this;
|
||||
--(*this);
|
||||
return tmp;
|
||||
}
|
||||
flat_linear_allocator_iterator operator--(int)
|
||||
{
|
||||
auto tmp = *this;
|
||||
--(*this);
|
||||
return tmp;
|
||||
}
|
||||
|
||||
bool operator==(const flat_linear_allocator_iterator &other) const
|
||||
{
|
||||
return index_ == other.index_ && storage_ == other.storage_;
|
||||
}
|
||||
bool operator==(const flat_linear_allocator_iterator& other) const
|
||||
{
|
||||
return index_ == other.index_ && storage_ == other.storage_;
|
||||
}
|
||||
|
||||
bool operator!=(const flat_linear_allocator_iterator &other) const { return !(*this == other); }
|
||||
bool operator!=(const flat_linear_allocator_iterator& other) const
|
||||
{
|
||||
return !(*this == other);
|
||||
}
|
||||
|
||||
flat_linear_allocator_iterator operator+(difference_type n) const
|
||||
{
|
||||
return flat_linear_allocator_iterator(storage_, index_ + static_cast<size_t>(n), begin_, end_);
|
||||
}
|
||||
flat_linear_allocator_iterator operator+(difference_type n) const
|
||||
{
|
||||
return flat_linear_allocator_iterator(
|
||||
storage_, index_ + static_cast<size_t>(n), begin_, end_);
|
||||
}
|
||||
|
||||
flat_linear_allocator_iterator &operator+=(difference_type n)
|
||||
{
|
||||
index_ += n;
|
||||
return *this;
|
||||
}
|
||||
flat_linear_allocator_iterator& operator+=(difference_type n)
|
||||
{
|
||||
index_ += n;
|
||||
return *this;
|
||||
}
|
||||
|
||||
flat_linear_allocator_iterator operator-(difference_type n) const
|
||||
{
|
||||
return flat_linear_allocator_iterator(storage_, index_ - n, begin_, end_);
|
||||
}
|
||||
flat_linear_allocator_iterator &operator-=(difference_type n)
|
||||
{
|
||||
index_ -= n;
|
||||
return *this;
|
||||
}
|
||||
flat_linear_allocator_iterator operator-(difference_type n) const
|
||||
{
|
||||
return flat_linear_allocator_iterator(storage_, index_ - n, begin_, end_);
|
||||
}
|
||||
flat_linear_allocator_iterator& operator-=(difference_type n)
|
||||
{
|
||||
index_ -= n;
|
||||
return *this;
|
||||
}
|
||||
|
||||
difference_type operator-(const flat_linear_allocator_iterator &other) const
|
||||
{
|
||||
return static_cast<difference_type>(index_) - static_cast<difference_type>(other.index_);
|
||||
}
|
||||
difference_type operator-(const flat_linear_allocator_iterator& other) const
|
||||
{
|
||||
return static_cast<difference_type>(index_) -
|
||||
static_cast<difference_type>(other.index_);
|
||||
}
|
||||
|
||||
reference operator[](difference_type n) { return *(*this + n); }
|
||||
reference operator[](difference_type n) { return *(*this + n); }
|
||||
|
||||
const_reference_type operator[](difference_type n) const { return *(*this + n); }
|
||||
const_reference_type operator[](difference_type n) const
|
||||
{
|
||||
return *(*this + n);
|
||||
}
|
||||
|
||||
bool operator<(const flat_linear_allocator_iterator &other) const
|
||||
{
|
||||
return index_ < other.index_;
|
||||
}
|
||||
bool operator<(const flat_linear_allocator_iterator& other) const
|
||||
{
|
||||
return index_ < other.index_;
|
||||
}
|
||||
|
||||
bool operator>(const flat_linear_allocator_iterator &other) const
|
||||
{
|
||||
return index_ > other.index_;
|
||||
}
|
||||
bool operator>(const flat_linear_allocator_iterator& other) const
|
||||
{
|
||||
return index_ > other.index_;
|
||||
}
|
||||
|
||||
bool operator<=(const flat_linear_allocator_iterator &other) const
|
||||
{
|
||||
return index_ <= other.index_;
|
||||
}
|
||||
bool operator<=(const flat_linear_allocator_iterator& other) const
|
||||
{
|
||||
return index_ <= other.index_;
|
||||
}
|
||||
|
||||
bool operator>=(const flat_linear_allocator_iterator &other) const
|
||||
{
|
||||
return index_ >= other.index_;
|
||||
}
|
||||
bool operator>=(const flat_linear_allocator_iterator& other) const
|
||||
{
|
||||
return index_ >= other.index_;
|
||||
}
|
||||
|
||||
bool operator!() const noexcept { return storage_ == nullptr; }
|
||||
bool operator!() const noexcept { return storage_ == nullptr; }
|
||||
};
|
||||
/* memory management helps for tape */
|
||||
template<typename RealType, size_t buffer_size>
|
||||
class flat_linear_allocator
|
||||
{
|
||||
/** @brief basically a vector<array<T*, size>>
|
||||
/** @brief basically a vector<array<T*, size>>
|
||||
* intended to work like a vector that allocates memory in chunks
|
||||
* and doesn't invalidate references
|
||||
* */
|
||||
public:
|
||||
// store vector of unique pointers to arrays
|
||||
// to avoid vector reference invalidation
|
||||
using buffer_type = std::array<RealType, buffer_size>;
|
||||
using buffer_ptr = std::unique_ptr<std::array<RealType, buffer_size>>;
|
||||
// store vector of unique pointers to arrays
|
||||
// to avoid vector reference invalidation
|
||||
using buffer_type = std::array<RealType, buffer_size>;
|
||||
using buffer_ptr = std::unique_ptr<std::array<RealType, buffer_size>>;
|
||||
|
||||
private:
|
||||
std::vector<buffer_ptr> data_;
|
||||
size_t total_size_ = 0;
|
||||
std::vector<size_t> checkpoints_; //{0};
|
||||
std::vector<buffer_ptr> data_;
|
||||
size_t total_size_ = 0;
|
||||
std::vector<size_t> checkpoints_; //{0};
|
||||
|
||||
public:
|
||||
friend class flat_linear_allocator_iterator<flat_linear_allocator<RealType, buffer_size>,
|
||||
buffer_size>;
|
||||
friend class flat_linear_allocator_iterator<const flat_linear_allocator<RealType, buffer_size>,
|
||||
buffer_size>;
|
||||
using value_type = RealType;
|
||||
using iterator
|
||||
= flat_linear_allocator_iterator<flat_linear_allocator<RealType, buffer_size>, buffer_size>;
|
||||
using const_iterator
|
||||
= flat_linear_allocator_iterator<const flat_linear_allocator<RealType, buffer_size>,
|
||||
buffer_size>;
|
||||
friend class flat_linear_allocator_iterator<
|
||||
flat_linear_allocator<RealType, buffer_size>,
|
||||
buffer_size>;
|
||||
friend class flat_linear_allocator_iterator<
|
||||
const flat_linear_allocator<RealType, buffer_size>,
|
||||
buffer_size>;
|
||||
using value_type = RealType;
|
||||
using iterator =
|
||||
flat_linear_allocator_iterator<flat_linear_allocator<RealType, buffer_size>,
|
||||
buffer_size>;
|
||||
using const_iterator = flat_linear_allocator_iterator<
|
||||
const flat_linear_allocator<RealType, buffer_size>,
|
||||
buffer_size>;
|
||||
|
||||
size_t buffer_id() const noexcept { return total_size_ / buffer_size; }
|
||||
size_t item_id() const noexcept { return total_size_ % buffer_size; }
|
||||
size_t buffer_id() const noexcept { return total_size_ / buffer_size; }
|
||||
size_t item_id() const noexcept { return total_size_ % buffer_size; }
|
||||
|
||||
private:
|
||||
void allocate_buffer()
|
||||
{
|
||||
data_.emplace_back(std::make_unique<buffer_type>());
|
||||
}
|
||||
void allocate_buffer()
|
||||
{
|
||||
data_.emplace_back(std::make_unique<buffer_type>());
|
||||
}
|
||||
|
||||
public:
|
||||
flat_linear_allocator() { allocate_buffer(); }
|
||||
flat_linear_allocator(const flat_linear_allocator &) = delete;
|
||||
flat_linear_allocator &operator=(const flat_linear_allocator &) = delete;
|
||||
flat_linear_allocator(flat_linear_allocator &&) = delete;
|
||||
flat_linear_allocator &operator=(flat_linear_allocator &&) = delete;
|
||||
~flat_linear_allocator()
|
||||
{
|
||||
destroy_all();
|
||||
data_.clear();
|
||||
}
|
||||
flat_linear_allocator() { allocate_buffer(); }
|
||||
flat_linear_allocator(const flat_linear_allocator&) = delete;
|
||||
flat_linear_allocator& operator=(const flat_linear_allocator&) = delete;
|
||||
flat_linear_allocator(flat_linear_allocator&&) = delete;
|
||||
flat_linear_allocator& operator=(flat_linear_allocator&&) = delete;
|
||||
~flat_linear_allocator()
|
||||
{
|
||||
destroy_all();
|
||||
data_.clear();
|
||||
}
|
||||
|
||||
void destroy_all()
|
||||
{
|
||||
for (size_t i = 0; i < total_size_; ++i) {
|
||||
size_t bid = i / buffer_size;
|
||||
size_t iid = i % buffer_size;
|
||||
(*data_[bid])[iid].~RealType();
|
||||
}
|
||||
void destroy_all()
|
||||
{
|
||||
for (size_t i = 0; i < total_size_; ++i) {
|
||||
size_t bid = i / buffer_size;
|
||||
size_t iid = i % buffer_size;
|
||||
(*data_[bid])[iid].~RealType();
|
||||
}
|
||||
/** @brief
|
||||
}
|
||||
/** @brief
|
||||
* helper functions to clear tape and create block in tape
|
||||
*/
|
||||
void clear()
|
||||
{
|
||||
data_.clear();
|
||||
total_size_ = 0;
|
||||
checkpoints_.clear();
|
||||
allocate_buffer();
|
||||
}
|
||||
void clear()
|
||||
{
|
||||
data_.clear();
|
||||
total_size_ = 0;
|
||||
checkpoints_.clear();
|
||||
allocate_buffer();
|
||||
}
|
||||
|
||||
// doesn't delete anything, only sets the current index to zero
|
||||
void reset() { total_size_ = 0; }
|
||||
void rewind() { total_size_ = 0; };
|
||||
// doesn't delete anything, only sets the current index to zero
|
||||
void reset() { total_size_ = 0; }
|
||||
void rewind() { total_size_ = 0; };
|
||||
|
||||
// adds current index as a checkpoint to be able to walk back to
|
||||
void add_checkpoint()
|
||||
{
|
||||
if (total_size_ > 0) {
|
||||
checkpoints_.push_back(total_size_ - 1);
|
||||
} else {
|
||||
checkpoints_.push_back(0);
|
||||
}
|
||||
};
|
||||
// adds current index as a checkpoint to be able to walk back to
|
||||
void add_checkpoint()
|
||||
{
|
||||
checkpoints_.push_back(total_size_);
|
||||
/*
|
||||
* if (total_size_ > 0) {
|
||||
checkpoints_.push_back(total_size_ - 1);
|
||||
} else {
|
||||
checkpoints_.push_back(0);
|
||||
}*/
|
||||
};
|
||||
|
||||
/** @brief clears all checkpoints
|
||||
/** @brief clears all checkpoints
|
||||
* */
|
||||
void reset_checkpoints() { checkpoints_.clear(); }
|
||||
void reset_checkpoints() { checkpoints_.clear(); }
|
||||
|
||||
void rewind_to_last_checkpoint() { total_size_ = checkpoints_.back(); }
|
||||
void rewind_to_checkpoint_at(size_t index) { total_size_ = checkpoints_[index]; }
|
||||
void rewind_to_last_checkpoint() { total_size_ = checkpoints_.back(); }
|
||||
void rewind_to_checkpoint_at(size_t index)
|
||||
{
|
||||
total_size_ = checkpoints_[index];
|
||||
}
|
||||
|
||||
void fill(const RealType &val)
|
||||
{
|
||||
for (size_t i = 0; i < total_size_; ++i) {
|
||||
size_t bid = i / buffer_size;
|
||||
size_t iid = i % buffer_size;
|
||||
(*data_[bid])[iid] = val;
|
||||
}
|
||||
void fill(const RealType& val)
|
||||
{
|
||||
for (size_t i = 0; i < total_size_; ++i) {
|
||||
size_t bid = i / buffer_size;
|
||||
size_t iid = i % buffer_size;
|
||||
(*data_[bid])[iid] = val;
|
||||
}
|
||||
}
|
||||
|
||||
/** @brief emplaces back object at the end of the
|
||||
/** @brief emplaces back object at the end of the
|
||||
* data structure, calls default constructor */
|
||||
iterator emplace_back()
|
||||
{
|
||||
if (item_id() == 0 && total_size_ != 0) {
|
||||
allocate_buffer();
|
||||
}
|
||||
size_t bid = buffer_id();
|
||||
size_t iid = item_id();
|
||||
iterator emplace_back()
|
||||
{
|
||||
if (item_id() == 0 && total_size_ != 0) {
|
||||
allocate_buffer();
|
||||
}
|
||||
size_t bid = buffer_id();
|
||||
size_t iid = item_id();
|
||||
|
||||
RealType *ptr = &(*data_[bid])[iid];
|
||||
new (ptr) RealType();
|
||||
++total_size_;
|
||||
return iterator(this, total_size_ - 1);
|
||||
};
|
||||
RealType* ptr = &(*data_[bid])[iid];
|
||||
new (ptr) RealType();
|
||||
++total_size_;
|
||||
return iterator(this, total_size_ - 1);
|
||||
};
|
||||
|
||||
/** @brief, emplaces back object at end of data structure,
|
||||
/** @brief, emplaces back object at end of data structure,
|
||||
* passes arguments to constructor */
|
||||
template<typename... Args>
|
||||
iterator emplace_back(Args &&...args)
|
||||
{
|
||||
if (item_id() == 0 && total_size_ != 0) {
|
||||
allocate_buffer();
|
||||
}
|
||||
BOOST_MATH_ASSERT(buffer_id() < data_.size());
|
||||
BOOST_MATH_ASSERT(item_id() < buffer_size);
|
||||
RealType *ptr = &(*data_[buffer_id()])[item_id()];
|
||||
new (ptr) RealType(std::forward<Args>(args)...);
|
||||
++total_size_;
|
||||
return iterator(this, total_size_ - 1);
|
||||
template<typename... Args>
|
||||
iterator emplace_back(Args&&... args)
|
||||
{
|
||||
if (item_id() == 0 && total_size_ != 0) {
|
||||
allocate_buffer();
|
||||
}
|
||||
/** @brief default constructs n objects at end of
|
||||
BOOST_MATH_ASSERT(buffer_id() < data_.size());
|
||||
BOOST_MATH_ASSERT(item_id() < buffer_size);
|
||||
RealType* ptr = &(*data_[buffer_id()])[item_id()];
|
||||
new (ptr) RealType(std::forward<Args>(args)...);
|
||||
++total_size_;
|
||||
return iterator(this, total_size_ - 1);
|
||||
}
|
||||
/** @brief default constructs n objects at end of
|
||||
* data structure, n known at compile time */
|
||||
template<size_t n>
|
||||
iterator emplace_back_n()
|
||||
{
|
||||
size_t bid = buffer_id();
|
||||
size_t iid = item_id();
|
||||
if (iid + n < buffer_size) {
|
||||
RealType *ptr = &(*data_[bid])[iid];
|
||||
for (size_t i = 0; i < n; ++i) {
|
||||
new (ptr + i) RealType();
|
||||
}
|
||||
total_size_ += n;
|
||||
return iterator(this, total_size_ - n, total_size_ - n, total_size_);
|
||||
} else {
|
||||
size_t allocs_in_curr_buffer = buffer_size - iid;
|
||||
size_t allocs_in_next_buffer = n - (buffer_size - iid);
|
||||
RealType *ptr = &(*data_[bid])[iid];
|
||||
for (size_t i = 0; i < allocs_in_curr_buffer; ++i) {
|
||||
new (ptr + i) RealType();
|
||||
}
|
||||
allocate_buffer();
|
||||
bid = data_.size() - 1;
|
||||
iid = 0;
|
||||
total_size_ += n;
|
||||
template<size_t n>
|
||||
iterator emplace_back_n()
|
||||
{
|
||||
size_t bid = buffer_id();
|
||||
size_t iid = item_id();
|
||||
if (iid + n < buffer_size) {
|
||||
RealType* ptr = &(*data_[bid])[iid];
|
||||
for (size_t i = 0; i < n; ++i) {
|
||||
new (ptr + i) RealType();
|
||||
}
|
||||
total_size_ += n;
|
||||
return iterator(this, total_size_ - n, total_size_ - n, total_size_);
|
||||
} else {
|
||||
size_t allocs_in_curr_buffer = buffer_size - iid;
|
||||
size_t allocs_in_next_buffer = n - (buffer_size - iid);
|
||||
RealType* ptr = &(*data_[bid])[iid];
|
||||
for (size_t i = 0; i < allocs_in_curr_buffer; ++i) {
|
||||
new (ptr + i) RealType();
|
||||
}
|
||||
allocate_buffer();
|
||||
bid = data_.size() - 1;
|
||||
iid = 0;
|
||||
total_size_ += n;
|
||||
|
||||
RealType *ptr2 = &(*data_[bid])[iid];
|
||||
for (size_t i = 0; i < allocs_in_next_buffer; i++) {
|
||||
new (ptr2 + i) RealType();
|
||||
}
|
||||
return iterator(this, total_size_ - n, total_size_ - n, total_size_);
|
||||
}
|
||||
RealType* ptr2 = &(*data_[bid])[iid];
|
||||
for (size_t i = 0; i < allocs_in_next_buffer; i++) {
|
||||
new (ptr2 + i) RealType();
|
||||
}
|
||||
return iterator(this, total_size_ - n, total_size_ - n, total_size_);
|
||||
}
|
||||
/** @brief default constructs n objects at end of
|
||||
}
|
||||
/** @brief default constructs n objects at end of
|
||||
* data structure, n known at run time
|
||||
*/
|
||||
iterator emplace_back_n(size_t n)
|
||||
{
|
||||
size_t bid = buffer_id();
|
||||
size_t iid = item_id();
|
||||
if (iid + n < buffer_size) {
|
||||
RealType *ptr = &(*data_[bid])[iid];
|
||||
for (size_t i = 0; i < n; ++i) {
|
||||
new (ptr + i) RealType();
|
||||
}
|
||||
total_size_ += n;
|
||||
return iterator(this, total_size_ - n, total_size_ - n, total_size_);
|
||||
} else {
|
||||
size_t allocs_in_curr_buffer = buffer_size - iid;
|
||||
size_t allocs_in_next_buffer = n - (buffer_size - iid);
|
||||
RealType *ptr = &(*data_[bid])[iid];
|
||||
for (size_t i = 0; i < allocs_in_curr_buffer; ++i) {
|
||||
new (ptr + i) RealType();
|
||||
}
|
||||
allocate_buffer();
|
||||
bid = data_.size() - 1;
|
||||
iid = 0;
|
||||
total_size_ += n;
|
||||
iterator emplace_back_n(size_t n)
|
||||
{
|
||||
size_t bid = buffer_id();
|
||||
size_t iid = item_id();
|
||||
if (iid + n < buffer_size) {
|
||||
RealType* ptr = &(*data_[bid])[iid];
|
||||
for (size_t i = 0; i < n; ++i) {
|
||||
new (ptr + i) RealType();
|
||||
}
|
||||
total_size_ += n;
|
||||
return iterator(this, total_size_ - n, total_size_ - n, total_size_);
|
||||
} else {
|
||||
size_t allocs_in_curr_buffer = buffer_size - iid;
|
||||
size_t allocs_in_next_buffer = n - (buffer_size - iid);
|
||||
RealType* ptr = &(*data_[bid])[iid];
|
||||
for (size_t i = 0; i < allocs_in_curr_buffer; ++i) {
|
||||
new (ptr + i) RealType();
|
||||
}
|
||||
allocate_buffer();
|
||||
bid = data_.size() - 1;
|
||||
iid = 0;
|
||||
total_size_ += n;
|
||||
|
||||
RealType *ptr2 = &(*data_[bid])[iid];
|
||||
for (size_t i = 0; i < allocs_in_next_buffer; i++) {
|
||||
new (ptr2 + i) RealType();
|
||||
}
|
||||
return iterator(this, total_size_ - n, total_size_ - n, total_size_);
|
||||
}
|
||||
RealType* ptr2 = &(*data_[bid])[iid];
|
||||
for (size_t i = 0; i < allocs_in_next_buffer; i++) {
|
||||
new (ptr2 + i) RealType();
|
||||
}
|
||||
return iterator(this, total_size_ - n, total_size_ - n, total_size_);
|
||||
}
|
||||
}
|
||||
|
||||
/** @brief number of elements */
|
||||
size_t size() const { return total_size_; }
|
||||
/** @brief number of elements */
|
||||
size_t size() const { return total_size_; }
|
||||
|
||||
/** @brief total capacity */
|
||||
size_t capacity() const { return data_.size() * buffer_size; }
|
||||
/** @brief total capacity */
|
||||
size_t capacity() const { return data_.size() * buffer_size; }
|
||||
|
||||
/** @brief iterator helpers */
|
||||
iterator begin() { return iterator(this, 0); }
|
||||
iterator end() { return iterator(this, total_size_); }
|
||||
const_iterator begin() const { return const_iterator(this, 0); }
|
||||
const_iterator end() const { return const_iterator(this, total_size_); }
|
||||
/** @brief iterator helpers */
|
||||
iterator begin() { return iterator(this, 0); }
|
||||
iterator end() { return iterator(this, total_size_); }
|
||||
const_iterator begin() const { return const_iterator(this, 0); }
|
||||
const_iterator end() const { return const_iterator(this, total_size_); }
|
||||
|
||||
iterator last_checkpoint() { return iterator(this, checkpoints_.back(), 0, total_size_); }
|
||||
iterator first_checkpoint() { return iterator(this, checkpoints_[0], 0, total_size_); };
|
||||
iterator checkpoint_at(size_t index)
|
||||
{
|
||||
return iterator(this, checkpoints_[index], 0, total_size_);
|
||||
};
|
||||
iterator last_checkpoint()
|
||||
{
|
||||
return iterator(this, checkpoints_.back(), 0, total_size_);
|
||||
}
|
||||
iterator first_checkpoint()
|
||||
{
|
||||
return iterator(this, checkpoints_[0], 0, total_size_);
|
||||
};
|
||||
iterator checkpoint_at(size_t index)
|
||||
{
|
||||
return iterator(this, checkpoints_[index], 0, total_size_);
|
||||
};
|
||||
|
||||
/** @brief searches for item in allocator
|
||||
/** @brief searches for item in allocator
|
||||
* only used to find gradient nodes for propagation */
|
||||
iterator find(const RealType *const item)
|
||||
{
|
||||
return std::find_if(begin(), end(), [&](const RealType &val) { return &val == item; });
|
||||
}
|
||||
/** @brief vector like access,
|
||||
iterator find(const RealType* const item)
|
||||
{
|
||||
return std::find_if(
|
||||
begin(), end(), [&](const RealType& val) { return &val == item; });
|
||||
}
|
||||
/** @brief vector like access,
|
||||
* currently unused anywhere but very useful for debugging
|
||||
*/
|
||||
RealType &operator[](std::size_t i)
|
||||
{
|
||||
BOOST_MATH_ASSERT(i < total_size_);
|
||||
return (*data_[i / buffer_size])[i % buffer_size];
|
||||
}
|
||||
const RealType &operator[](std::size_t i) const
|
||||
{
|
||||
BOOST_MATH_ASSERT(i < total_size_);
|
||||
return (*data_[i / buffer_size])[i % buffer_size];
|
||||
}
|
||||
RealType& operator[](std::size_t i)
|
||||
{
|
||||
BOOST_MATH_ASSERT(i < total_size_);
|
||||
return (*data_[i / buffer_size])[i % buffer_size];
|
||||
}
|
||||
const RealType& operator[](std::size_t i) const
|
||||
{
|
||||
BOOST_MATH_ASSERT(i < total_size_);
|
||||
return (*data_[i / buffer_size])[i % buffer_size];
|
||||
}
|
||||
};
|
||||
} // namespace detail
|
||||
} // namespace reverse_mode
|
||||
|
||||
@@ -0,0 +1,157 @@
|
||||
// Copyright Maksym Zhelyenzyakov 2025-2026.
|
||||
// Distributed under the Boost Software License, Version 1.0.
|
||||
// (See accompanying file LICENSE_1_0.txt or copy at
|
||||
// https://www.boost.org/LICENSE_1_0.txt)
|
||||
#ifndef BOOST_MATH_OPTIMIZATION_DETAIL_DIFFERENTIABLE_OPT_UTILITIES_HPP
|
||||
#define BOOST_MATH_OPTIMIZATION_DETAIL_DIFFERENTIABLE_OPT_UTILITIES_HPP
|
||||
#include <boost/math/differentiation/autodiff_reverse.hpp>
|
||||
#include <cmath>
|
||||
#include <random>
|
||||
#include <type_traits>
|
||||
#include <vector>
|
||||
|
||||
namespace boost {
|
||||
namespace math {
|
||||
namespace optimization {
|
||||
|
||||
namespace rdiff = boost::math::differentiation::reverse_mode;
|
||||
|
||||
/** @brief> helper to get the underlying realtype from
|
||||
* update policy
|
||||
* */
|
||||
template<typename UpdPol>
|
||||
struct update_policy_real_type;
|
||||
|
||||
template<template<typename> class UpdPol, typename RealType>
|
||||
struct update_policy_real_type<UpdPol<RealType>>
|
||||
{
|
||||
using type = RealType;
|
||||
};
|
||||
|
||||
template<typename UpdPol>
|
||||
using update_policy_real_type_t =
|
||||
typename update_policy_real_type<typename std::decay<UpdPol>::type>::type;
|
||||
|
||||
/** @brief> get realtype from argument container
|
||||
* */
|
||||
template<class Container>
|
||||
struct argument_container_t
|
||||
{
|
||||
using type = typename argument_container_t<typename std::decay<Container>::type>::type;
|
||||
};
|
||||
|
||||
template<typename ValueType, std::size_t N>
|
||||
struct argument_container_t<std::array<ValueType, N>>
|
||||
{
|
||||
using type = ValueType;
|
||||
};
|
||||
|
||||
template<typename RealType, std::size_t M, std::size_t N>
|
||||
struct argument_container_t<std::array<rdiff::rvar<RealType, M>, N>>
|
||||
{
|
||||
using type = RealType;
|
||||
};
|
||||
|
||||
template<template<typename, typename...> class Container, typename ValueType, typename... Args>
|
||||
struct argument_container_t<Container<ValueType, Args...>>
|
||||
{
|
||||
using type = ValueType;
|
||||
};
|
||||
|
||||
template<template<typename, typename...> class Container,
|
||||
typename RealType,
|
||||
std::size_t N,
|
||||
typename... Args>
|
||||
struct argument_container_t<Container<rdiff::rvar<RealType, N>, Args...>>
|
||||
{
|
||||
using type = RealType;
|
||||
};
|
||||
/******************************************************************************/
|
||||
/** @brief simple blas helpers
|
||||
*/
|
||||
template<typename Container>
|
||||
auto
|
||||
dot(const Container& x, const Container& y) -> typename Container::value_type
|
||||
{
|
||||
using T = typename Container::value_type;
|
||||
BOOST_MATH_ASSERT(x.size() == y.size());
|
||||
return std::inner_product(x.begin(), x.end(), y.begin(), T(0));
|
||||
}
|
||||
|
||||
template<typename Container>
|
||||
auto
|
||||
norm_2(const Container& x) -> typename Container::value_type
|
||||
{
|
||||
return sqrt(dot(x, x));
|
||||
}
|
||||
|
||||
template<typename Container>
|
||||
auto
|
||||
norm_1(const Container& x) -> typename Container::value_type
|
||||
{
|
||||
using T = typename Container::value_type;
|
||||
T ret{ 0 };
|
||||
for (auto& xi : x) {
|
||||
ret += abs(xi);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
T
|
||||
norm_inf(const std::vector<T>& x)
|
||||
{
|
||||
BOOST_ASSERT(!x.empty());
|
||||
|
||||
T max_val = std::abs(x[0]);
|
||||
const std::size_t n = x.size();
|
||||
|
||||
for (std::size_t i = 1; i < n; ++i) {
|
||||
const T abs_val = std::abs(x[i]);
|
||||
if (abs_val > max_val)
|
||||
max_val = abs_val;
|
||||
}
|
||||
return max_val;
|
||||
}
|
||||
/** @brief alpha*x (alpha is scalar, x is vector */
|
||||
template<typename Container, typename RealType>
|
||||
void
|
||||
scale(Container& x, const RealType& alpha)
|
||||
{
|
||||
for (auto& xi : x) {
|
||||
xi *= alpha;
|
||||
}
|
||||
}
|
||||
|
||||
/** @brief y += alpha * x
|
||||
*/
|
||||
template<typename ContainerX, typename ContainerY, typename RealType>
|
||||
void
|
||||
axpy(RealType alpha, const ContainerX& x, ContainerY& y)
|
||||
{
|
||||
BOOST_MATH_ASSERT(x.size() == y.size());
|
||||
const size_t n = x.size();
|
||||
for (size_t i = 0; i < n; ++i) {
|
||||
y[i] += alpha * x[i];
|
||||
}
|
||||
}
|
||||
/******************************************************************************/
|
||||
template<typename RealType>
|
||||
std::vector<RealType>
|
||||
random_vector(size_t n)
|
||||
{
|
||||
/** @brief> generates a random std::vector<RealType> of size n
|
||||
* using mt19937 algorithm
|
||||
*/
|
||||
static std::mt19937 rng{ std::random_device{}() };
|
||||
static std::uniform_real_distribution<double> dist(0.0, 1.0);
|
||||
|
||||
std::vector<RealType> result(n);
|
||||
std::generate(result.begin(), result.end(), [&] { return static_cast<RealType>(dist(rng)); });
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace optimization
|
||||
} // namespace math
|
||||
} // namespace boost
|
||||
#endif
|
||||
92
include/boost/math/optimization/detail/gradient_opt_base.hpp
Normal file
92
include/boost/math/optimization/detail/gradient_opt_base.hpp
Normal file
@@ -0,0 +1,92 @@
|
||||
// Copyright Maksym Zhelyenzyakov 2025-2026.
|
||||
// Distributed under the Boost Software License, Version 1.0.
|
||||
// (See accompanying file LICENSE_1_0.txt or copy at
|
||||
// https://www.boost.org/LICENSE_1_0.txt)
|
||||
#ifndef BOOST_MATH_OPTIMIZATION_DETAIL_GRADIENT_OPT_BASE_HPP
|
||||
#define BOOST_MATH_OPTIMIZATION_DETAIL_GRADIENT_OPT_BASE_HPP
|
||||
#include <boost/math/differentiation/autodiff_reverse.hpp>
|
||||
|
||||
namespace boost {
|
||||
namespace math {
|
||||
namespace optimization {
|
||||
|
||||
namespace rdiff = boost::math::differentiation::reverse_mode;
|
||||
|
||||
/**
|
||||
* @brief The abstract_optimizer class implementing common variables
|
||||
* and methods across optimizers
|
||||
*
|
||||
* @tparam> ArgumentContainer
|
||||
* @tparam> RealType
|
||||
* @tparam> Objective
|
||||
* @tparam> InitializationPolicy
|
||||
*
|
||||
*/
|
||||
|
||||
template<typename ArgumentContainer,
|
||||
typename RealType,
|
||||
class Objective,
|
||||
class InitializationPolicy,
|
||||
class ObjectiveEvalPolicy,
|
||||
class GradEvalPolicy,
|
||||
class UpdatePolicy,
|
||||
typename DerivedOptimizer>
|
||||
class abstract_optimizer
|
||||
{
|
||||
protected:
|
||||
Objective objective_; // obj function
|
||||
ArgumentContainer& x_; // arguments to objective function
|
||||
std::vector<RealType> g_; // container of references to gradients
|
||||
ObjectiveEvalPolicy obj_eval_; // how to evaluate your funciton
|
||||
GradEvalPolicy grad_eval_; // how to evaluate/bind gradients
|
||||
InitializationPolicy init_; // how to initialize the problem
|
||||
UpdatePolicy update_; // update step
|
||||
RealType obj_v_; // objective value (for history)
|
||||
// access derived class
|
||||
DerivedOptimizer& derived() { return static_cast<DerivedOptimizer&>(*this); }
|
||||
const DerivedOptimizer& derived() const
|
||||
{
|
||||
return static_cast<const DerivedOptimizer&>(*this);
|
||||
}
|
||||
|
||||
void step_impl()
|
||||
{
|
||||
grad_eval_(objective_, x_, obj_eval_, obj_v_, g_);
|
||||
for (size_t i = 0; i < x_.size(); ++i) {
|
||||
update_(x_[i], g_[i]);
|
||||
}
|
||||
};
|
||||
|
||||
public:
|
||||
using argument_container_t = ArgumentContainer;
|
||||
using real_type_t = RealType;
|
||||
|
||||
abstract_optimizer(Objective&& objective,
|
||||
ArgumentContainer& x,
|
||||
InitializationPolicy&& ip,
|
||||
ObjectiveEvalPolicy&& oep,
|
||||
GradEvalPolicy&& gep,
|
||||
UpdatePolicy&& up)
|
||||
: objective_(std::forward<Objective>(objective))
|
||||
, x_(x)
|
||||
, obj_eval_(std::forward<ObjectiveEvalPolicy>(oep))
|
||||
, grad_eval_(std::forward<GradEvalPolicy>(gep))
|
||||
, init_(std::forward<InitializationPolicy>(ip))
|
||||
, update_(std::forward<UpdatePolicy>(up))
|
||||
{
|
||||
init_(x_); // initialize your problem
|
||||
g_.resize(x_.size()); // initialize space for gradients
|
||||
}
|
||||
|
||||
ArgumentContainer& arguments() { return derived().x_; }
|
||||
const ArgumentContainer& arguments() const { return derived().x_; }
|
||||
|
||||
RealType& objective_value() { return derived().obj_v_; }
|
||||
const RealType& objective_value() const { return derived().obj_v_; }
|
||||
std::vector<RealType>& gradients() { return derived().g_; }
|
||||
const std::vector<RealType>& gradients() const { return derived().g_; }
|
||||
};
|
||||
} // namespace optimization
|
||||
} // namespace math
|
||||
} // namespace boost
|
||||
#endif
|
||||
206
include/boost/math/optimization/detail/line_search_policies.hpp
Normal file
206
include/boost/math/optimization/detail/line_search_policies.hpp
Normal file
@@ -0,0 +1,206 @@
|
||||
// Copyright Maksym Zhelyenzyakov 2025-2026.
|
||||
// Distributed under the Boost Software License, Version 1.0.
|
||||
// (See accompanying file LICENSE_1_0.txt or copy at
|
||||
// https://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
#ifndef BOOST_MATH_OPTIMIZATION_DETAIL_LINE_SEARCH_POLICIES_HPP
|
||||
#define BOOST_MATH_OPTIMIZATION_DETAIL_LINE_SEARCH_POLICIES_HPP
|
||||
|
||||
#include <boost/math/optimization/detail/differentiable_opt_utilties.hpp>
|
||||
#include <cmath>
|
||||
#include <vector>
|
||||
namespace boost {
|
||||
namespace math {
|
||||
namespace optimization {
|
||||
|
||||
/**
|
||||
* @brief> Armijo condition backtracking line search
|
||||
* https://en.wikipedia.org/wiki/Backtracking_line_search
|
||||
*
|
||||
* f(x+alpha p) <= f(x) + alpha * c * grad(f)^T
|
||||
*
|
||||
* Jorge Nocedal and Stephen J. Wright,
|
||||
* Numerical Optimization, 2nd Edition,
|
||||
* Springer, 2006.
|
||||
*
|
||||
* Algorithm 3.1: Backtracking Line Search
|
||||
* (Page 37)
|
||||
*/
|
||||
template<typename RealType>
|
||||
class armijo_line_search_policy
|
||||
{
|
||||
private:
|
||||
RealType alpha0_; // initial step size
|
||||
RealType c_; // sufficient decrease constant
|
||||
RealType rho_; // backtracking factor
|
||||
int max_iter_; // maximum backtracking steps
|
||||
|
||||
public:
|
||||
armijo_line_search_policy(RealType alpha0 = 1.0,
|
||||
RealType c = 1e-4,
|
||||
RealType rho = 0.5,
|
||||
int max_iter = 20)
|
||||
: alpha0_(alpha0)
|
||||
, c_(c)
|
||||
, rho_(rho)
|
||||
, max_iter_(max_iter)
|
||||
{
|
||||
}
|
||||
|
||||
template<class Objective,
|
||||
class ObjectiveEvalPolicy,
|
||||
class GradientEvalPolicy,
|
||||
class ArgumentContainer>
|
||||
RealType operator()(Objective& objective,
|
||||
ObjectiveEvalPolicy& obj_eval,
|
||||
GradientEvalPolicy& grad_eval,
|
||||
ArgumentContainer& x,
|
||||
const std::vector<RealType>& g,
|
||||
const std::vector<RealType>& p,
|
||||
RealType f_x) const
|
||||
{
|
||||
/** @brief> line search
|
||||
* */
|
||||
RealType alpha = alpha0_;
|
||||
ArgumentContainer x_trial; // = x; // copy
|
||||
const RealType gTp = dot(g, p);
|
||||
|
||||
for (int iter = 0; iter < max_iter_; ++iter) {
|
||||
x_trial = x;
|
||||
axpy(alpha, p, x_trial);
|
||||
auto f_trial = obj_eval(objective, x_trial);
|
||||
if (f_trial <=
|
||||
f_x + c_ * alpha * gTp) // check if armijo condition is satisfied
|
||||
return alpha;
|
||||
alpha *= rho_;
|
||||
}
|
||||
return alpha;
|
||||
}
|
||||
};
|
||||
|
||||
/** @brief> Strong-Wolfe line search:
|
||||
* Jorge Nocedal and Stephen J. Wright,
|
||||
* Numerical Optimization, 2nd Edition,
|
||||
* Springer, 2006.
|
||||
*
|
||||
* Algorithm 3.5 Line Search Algorithm (Strong Wolfe Conditions)
|
||||
* pages 60-61
|
||||
*/
|
||||
template<typename RealType>
|
||||
class strong_wolfe_line_search_policy
|
||||
{
|
||||
private:
|
||||
RealType alpha0_; // initial step size
|
||||
RealType c1_; // Armijo constant (sufficient decrease)
|
||||
RealType c2_; // curvature constant
|
||||
RealType rho_; // backtracking factor
|
||||
int max_iter_; // maximum iterations
|
||||
public:
|
||||
strong_wolfe_line_search_policy(RealType alpha0 = 1.0,
|
||||
RealType c1 = 1e-4,
|
||||
RealType c2 = 0.9,
|
||||
RealType rho = 2.0,
|
||||
int max_iter = 20)
|
||||
: alpha0_(alpha0)
|
||||
, c1_(c1)
|
||||
, c2_(c2)
|
||||
, rho_(rho)
|
||||
, max_iter_(max_iter)
|
||||
{
|
||||
}
|
||||
template<class Objective,
|
||||
class ObjectiveEvalPolicy,
|
||||
class GradientEvalPolicy,
|
||||
class ArgumentContainer>
|
||||
RealType operator()(Objective& objective,
|
||||
ObjectiveEvalPolicy& obj_eval,
|
||||
GradientEvalPolicy& grad_eval,
|
||||
ArgumentContainer& x,
|
||||
const std::vector<RealType>& g,
|
||||
const std::vector<RealType>& p,
|
||||
RealType f_x) const
|
||||
{
|
||||
RealType gTp0 = dot(g, p);
|
||||
RealType alpha_prev{0};
|
||||
RealType f_prev = f_x;
|
||||
RealType alpha = alpha0_;
|
||||
|
||||
ArgumentContainer x_trial;
|
||||
std::vector<RealType> g_trial(g.size());
|
||||
for (int i = 0; i < max_iter_; ++i) {
|
||||
x_trial = x; // explicit copy
|
||||
axpy(alpha, p, x_trial);
|
||||
RealType f_trial = static_cast<RealType>(obj_eval(objective, x_trial));
|
||||
if ((f_trial > f_x + c1_ * alpha * gTp0) ||
|
||||
(i > 0 && f_trial >= f_prev)) {
|
||||
return zoom(
|
||||
objective, obj_eval, grad_eval, x, p, f_x, gTp0, alpha_prev, alpha);
|
||||
}
|
||||
grad_eval(objective, x_trial, obj_eval, f_trial, g_trial);
|
||||
RealType gTp = dot(g_trial, p);
|
||||
|
||||
if (fabs(gTp) <= c2_ * fabs(gTp0)) {
|
||||
return alpha;
|
||||
}
|
||||
|
||||
if (gTp >= 0) {
|
||||
return zoom(
|
||||
objective, obj_eval, grad_eval, x, p, f_x, gTp0, alpha, alpha_prev);
|
||||
}
|
||||
alpha_prev = alpha;
|
||||
f_prev = f_trial;
|
||||
alpha *= rho_;
|
||||
}
|
||||
|
||||
return alpha;
|
||||
}
|
||||
|
||||
private:
|
||||
template<class Objective,
|
||||
class ObjectiveEvalPolicy,
|
||||
class GradientEvalPolicy,
|
||||
class ArgumentContainer>
|
||||
RealType zoom(Objective& objective,
|
||||
ObjectiveEvalPolicy& obj_eval,
|
||||
GradientEvalPolicy& grad_eval,
|
||||
ArgumentContainer& x,
|
||||
const std::vector<RealType>& p,
|
||||
RealType f_x,
|
||||
RealType gTp0,
|
||||
RealType alpha_lo,
|
||||
RealType alpha_hi) const
|
||||
{
|
||||
ArgumentContainer x_trial;
|
||||
std::vector<RealType> g_trial(p.size());
|
||||
|
||||
for (int iter = 0; iter < max_iter_; ++iter) {
|
||||
const RealType alpha_mid = 0.5 * (alpha_lo + alpha_hi);
|
||||
x_trial = x;
|
||||
axpy(alpha_mid, p, x_trial);
|
||||
RealType f_mid;
|
||||
grad_eval(objective, x_trial, obj_eval, f_mid, g_trial);
|
||||
RealType gTp_mid = dot(g_trial, p);
|
||||
ArgumentContainer x_lo = x;
|
||||
axpy(alpha_lo, p, x_lo);
|
||||
RealType f_lo = static_cast<RealType>(obj_eval(objective, x_lo));
|
||||
if ((f_mid > f_x + c1_ * alpha_mid * gTp0) || (f_mid >= f_lo)) {
|
||||
alpha_hi = alpha_mid;
|
||||
} else {
|
||||
if (fabs(gTp_mid) <= c2_ * fabs(gTp0)) {
|
||||
return alpha_mid;
|
||||
}
|
||||
if (gTp_mid * (alpha_hi - alpha_lo) >= 0) {
|
||||
alpha_hi = alpha_lo;
|
||||
}
|
||||
alpha_lo = alpha_mid;
|
||||
}
|
||||
}
|
||||
|
||||
return 0.5 * (alpha_lo + alpha_hi);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace optimization
|
||||
} // namespace math
|
||||
} // namespace boost
|
||||
#endif // LINE_SEARCH_POLICIES_HPP
|
||||
@@ -0,0 +1,134 @@
|
||||
// Copyright Maksym Zhelyenzyakov 2025-2026.
|
||||
// Distributed under the Boost Software License, Version 1.0.
|
||||
// (See accompanying file LICENSE_1_0.txt or copy at
|
||||
// https://www.boost.org/LICENSE_1_0.txt)
|
||||
#ifndef BOOST_MATH_OPTIMIZATION_DETAIL_RDIFF_OPTIMIZATION_POLICIES_HPP
|
||||
#define BOOST_MATH_OPTIMIZATION_DETAIL_RDIFF_OPTIMIZATION_POLICIES_HPP
|
||||
|
||||
#include <boost/math/differentiation/autodiff_reverse.hpp>
|
||||
#include <random>
|
||||
#include <type_traits>
|
||||
|
||||
namespace boost {
|
||||
namespace math {
|
||||
namespace optimization {
|
||||
|
||||
namespace rdiff = boost::math::differentiation::reverse_mode;
|
||||
|
||||
/******************************************************************/
|
||||
/**
|
||||
* @brief> function evaluation policy for reverse mode autodiff
|
||||
* @arg objective> objective function to evaluate
|
||||
* @arg x> argument list
|
||||
*/
|
||||
template<typename RealType>
|
||||
struct reverse_mode_function_eval_policy
|
||||
{
|
||||
template<typename Objective, class ArgumentContainer>
|
||||
rdiff::rvar<RealType, 1> operator()(Objective&& objective,
|
||||
ArgumentContainer& x)
|
||||
{
|
||||
auto& tape = rdiff::get_active_tape<RealType, 1>();
|
||||
tape.zero_grad();
|
||||
tape.rewind_to_last_checkpoint();
|
||||
|
||||
return objective(x);
|
||||
}
|
||||
};
|
||||
/******************************************************************/
|
||||
|
||||
/**
|
||||
* @brief> gradient evaluation policy
|
||||
* @arg obj_f> objective
|
||||
* @arg x> argument list
|
||||
* @arg f_eval_olicy> funciton evaluation policy. These need to be
|
||||
* done in tandem
|
||||
* @arg obj_v> reference to variable inside gradient class
|
||||
*/
|
||||
template<typename RealType>
|
||||
struct reverse_mode_gradient_evaluation_policy
|
||||
{
|
||||
template<class Objective,
|
||||
class ArgumentContainer,
|
||||
class FunctionEvaluationPolicy>
|
||||
void operator()(Objective&& obj_f,
|
||||
ArgumentContainer& x,
|
||||
FunctionEvaluationPolicy&& f_eval_pol,
|
||||
RealType& obj_v,
|
||||
std::vector<RealType>& g)
|
||||
{
|
||||
// compute objective via eval policy that takes care of tape
|
||||
rdiff::rvar<RealType, 1> v = f_eval_pol(obj_f, x);
|
||||
v.backward();
|
||||
obj_v = v.item();
|
||||
g.resize(x.size());
|
||||
|
||||
// copy gradients into gradient vector
|
||||
for (size_t i = 0; i < x.size(); ++i) {
|
||||
g[i] = x[i].adjoint();
|
||||
}
|
||||
}
|
||||
};
|
||||
/******************************************************************
|
||||
* init policies
|
||||
*/
|
||||
template<typename RealType>
|
||||
struct tape_initializer_rvar
|
||||
{
|
||||
template<class ArgumentContainer>
|
||||
void operator()(ArgumentContainer& x) const noexcept
|
||||
{
|
||||
static_assert(std::is_same<typename ArgumentContainer::value_type,
|
||||
rdiff::rvar<RealType, 1>>::value,
|
||||
"ArgumentContainer::value_type must be rdiff::rvar<RealType,1>");
|
||||
auto& tape = rdiff::get_active_tape<RealType, 1>();
|
||||
tape.add_checkpoint();
|
||||
}
|
||||
};
|
||||
|
||||
template<typename RealType>
|
||||
struct random_uniform_initializer_rvar
|
||||
{
|
||||
RealType low_, high_;
|
||||
size_t seed_;
|
||||
random_uniform_initializer_rvar(RealType low = 0,
|
||||
RealType high = 1,
|
||||
size_t seed = std::random_device{}())
|
||||
: low_(low)
|
||||
, high_(high)
|
||||
, seed_(seed) {};
|
||||
|
||||
template<class ArgumentContainer>
|
||||
void operator()(ArgumentContainer& x) const
|
||||
{
|
||||
static std::mt19937 gen{ std::random_device{}() };
|
||||
static std::uniform_real_distribution<double> dist(0.0, 1.0);
|
||||
for (auto& xi : x) {
|
||||
xi = rdiff::rvar<RealType, 1>(static_cast<RealType>(dist(gen)));
|
||||
}
|
||||
auto& tape = rdiff::get_active_tape<RealType, 1>();
|
||||
tape.add_checkpoint();
|
||||
}
|
||||
};
|
||||
|
||||
template<typename RealType>
|
||||
struct costant_initializer_rvar
|
||||
{
|
||||
RealType constant;
|
||||
explicit costant_initializer_rvar(RealType v = 0)
|
||||
: constant(v) {};
|
||||
template<class ArgumentContainer>
|
||||
void operator()(ArgumentContainer& x) const
|
||||
{
|
||||
for (auto& xi : x) {
|
||||
xi = rdiff::rvar<RealType, 1>(constant);
|
||||
}
|
||||
auto& tape = rdiff::get_active_tape<RealType, 1>();
|
||||
tape.add_checkpoint();
|
||||
}
|
||||
};
|
||||
} // namespace optimization
|
||||
} // namespace math
|
||||
} // namespace boost
|
||||
|
||||
#endif
|
||||
202
include/boost/math/optimization/gradient_descent.hpp
Normal file
202
include/boost/math/optimization/gradient_descent.hpp
Normal file
@@ -0,0 +1,202 @@
|
||||
// Copyright Maksym Zhelyenzyakov 2025-2026.
|
||||
// Distributed under the Boost Software License, Version 1.0.
|
||||
// (See accompanying file LICENSE_1_0.txt or copy at
|
||||
// https://www.boost.org/LICENSE_1_0.txt)
|
||||
#ifndef BOOST_MATH_OPTIMIZATION_GRADIENT_DESCENT_HPP
|
||||
#define BOOST_MATH_OPTIMIZATION_GRADIENT_DESCENT_HPP
|
||||
#include <boost/math/optimization/detail/differentiable_opt_utilties.hpp>
|
||||
#include <boost/math/optimization/detail/gradient_opt_base.hpp>
|
||||
#include <boost/math/optimization/detail/rdiff_optimization_policies.hpp>
|
||||
|
||||
namespace boost {
|
||||
namespace math {
|
||||
namespace optimization {
|
||||
|
||||
template<typename RealType>
|
||||
struct gradient_descent_update_policy
|
||||
{
|
||||
RealType lr_;
|
||||
gradient_descent_update_policy(RealType lr)
|
||||
: lr_(lr) {};
|
||||
|
||||
template<typename ArgumentType,
|
||||
typename = typename std::enable_if<
|
||||
boost::math::differentiation::reverse_mode::detail::is_expression<
|
||||
ArgumentType>::value>::type>
|
||||
void operator()(ArgumentType& x, RealType& g)
|
||||
{
|
||||
x.get_value() -= lr_ * g;
|
||||
}
|
||||
template<typename ArgumentType,
|
||||
typename std::enable_if<!boost::math::differentiation::reverse_mode::
|
||||
detail::is_expression<ArgumentType>::value,
|
||||
int>::type = 0>
|
||||
void operator()(ArgumentType& x, RealType& g) const
|
||||
{
|
||||
x -= lr_ * g;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief> Gradient Descent Optimizer.
|
||||
*
|
||||
* This class implements a gradient descent optimization strategy using the
|
||||
* policies provided for initialization, objective evaluation, and gradient
|
||||
* computation. It inherits from `abstract_optimizer`, which provides the
|
||||
* general optimization framework.
|
||||
*
|
||||
* @tparam> ArgumentContainer Type of the parameter container (e.g.
|
||||
* std::vector<SomeDifferentiableType or RealType>).
|
||||
* @tparam> RealType Floating-point type
|
||||
* @tparam> Objective Objective function type (functor or callable).
|
||||
* @tparam> InitializationPolicy Policy controlling initialization of
|
||||
* differentiable variables.
|
||||
* @tparam> ObjectiveEvalPolicy Policy defining how the objective is evaluated.
|
||||
* @tparam> GradEvalPolicy Policy defining how the gradient is computed.
|
||||
*/
|
||||
|
||||
template<typename ArgumentContainer,
|
||||
typename RealType,
|
||||
class Objective,
|
||||
class InitializationPolicy,
|
||||
class ObjectiveEvalPolicy,
|
||||
class GradEvalPolicy>
|
||||
class gradient_descent
|
||||
: public abstract_optimizer<ArgumentContainer,
|
||||
RealType,
|
||||
Objective,
|
||||
InitializationPolicy,
|
||||
ObjectiveEvalPolicy,
|
||||
GradEvalPolicy,
|
||||
gradient_descent_update_policy<RealType>,
|
||||
gradient_descent<ArgumentContainer,
|
||||
RealType,
|
||||
Objective,
|
||||
InitializationPolicy,
|
||||
ObjectiveEvalPolicy,
|
||||
GradEvalPolicy>>
|
||||
{
|
||||
using base_opt = abstract_optimizer<ArgumentContainer,
|
||||
RealType,
|
||||
Objective,
|
||||
InitializationPolicy,
|
||||
ObjectiveEvalPolicy,
|
||||
GradEvalPolicy,
|
||||
gradient_descent_update_policy<RealType>,
|
||||
gradient_descent<ArgumentContainer,
|
||||
RealType,
|
||||
Objective,
|
||||
InitializationPolicy,
|
||||
ObjectiveEvalPolicy,
|
||||
GradEvalPolicy>>;
|
||||
|
||||
public:
|
||||
using base_opt::base_opt;
|
||||
/**
|
||||
* @brief Perform one optimization step.
|
||||
*
|
||||
* defaults to a regular update inside abstract optimizer
|
||||
* x -= lr * grad(f)
|
||||
*/
|
||||
void step() { this->step_impl(); }
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Create a gradient descent optimizer with default reverse-mode autodiff
|
||||
policies.
|
||||
*
|
||||
* make_gradient_descent(objective, x)
|
||||
* constructs gradient descent with objective function, and parameters x
|
||||
*
|
||||
* learning rate set to 0.01 by default
|
||||
*
|
||||
* initialization strategy : use specified, gradient tape taken care of by
|
||||
* optimizer
|
||||
* function eval policy : rvar function eval policy, to be used with
|
||||
* boost::math::differentiation::reverse_mode::rvar
|
||||
*
|
||||
* gradient eval policy : rvar gradient evaluation policy
|
||||
*
|
||||
* gradient descent update policy :
|
||||
* basically x -= lr * grad(f);
|
||||
*
|
||||
* make_gradient_descent(objective, x, lr)
|
||||
* custom learning rate
|
||||
*/
|
||||
|
||||
template<class Objective, typename ArgumentContainer, typename RealType>
|
||||
auto
|
||||
make_gradient_descent(Objective&& obj,
|
||||
ArgumentContainer& x,
|
||||
RealType lr = RealType{ 0.01 })
|
||||
{
|
||||
return gradient_descent<ArgumentContainer,
|
||||
RealType,
|
||||
std::decay_t<Objective>,
|
||||
tape_initializer_rvar<RealType>,
|
||||
reverse_mode_function_eval_policy<RealType>,
|
||||
reverse_mode_gradient_evaluation_policy<RealType>>(
|
||||
std::forward<Objective>(obj),
|
||||
x,
|
||||
tape_initializer_rvar<RealType>{},
|
||||
reverse_mode_function_eval_policy<RealType>{},
|
||||
reverse_mode_gradient_evaluation_policy<RealType>{},
|
||||
gradient_descent_update_policy<RealType>(lr));
|
||||
}
|
||||
|
||||
template<class Objective,
|
||||
typename ArgumentContainer,
|
||||
typename RealType,
|
||||
class InitializationPolicy>
|
||||
auto
|
||||
make_gradient_descent(Objective&& obj,
|
||||
ArgumentContainer& x,
|
||||
RealType lr,
|
||||
InitializationPolicy&& ip)
|
||||
{
|
||||
return gradient_descent<ArgumentContainer,
|
||||
RealType,
|
||||
std::decay_t<Objective>,
|
||||
InitializationPolicy,
|
||||
reverse_mode_function_eval_policy<RealType>,
|
||||
reverse_mode_gradient_evaluation_policy<RealType>>(
|
||||
std::forward<Objective>(obj),
|
||||
x,
|
||||
std::forward<InitializationPolicy>(ip),
|
||||
reverse_mode_function_eval_policy<RealType>{},
|
||||
reverse_mode_gradient_evaluation_policy<RealType>{},
|
||||
gradient_descent_update_policy<RealType>(lr));
|
||||
}
|
||||
|
||||
template<typename ArgumentContainer,
|
||||
typename RealType,
|
||||
class Objective,
|
||||
class InitializationPolicy,
|
||||
class ObjectiveEvalPolicy,
|
||||
class GradEvalPolicy>
|
||||
auto
|
||||
make_gradient_descent(Objective&& obj,
|
||||
ArgumentContainer& x,
|
||||
RealType& lr,
|
||||
InitializationPolicy&& ip,
|
||||
ObjectiveEvalPolicy&& oep,
|
||||
GradEvalPolicy&& gep)
|
||||
{
|
||||
return gradient_descent<ArgumentContainer,
|
||||
RealType,
|
||||
std::decay_t<Objective>,
|
||||
InitializationPolicy,
|
||||
ObjectiveEvalPolicy,
|
||||
GradEvalPolicy>(
|
||||
std::forward<Objective>(obj),
|
||||
x,
|
||||
std::forward<InitializationPolicy>(ip),
|
||||
std::forward<ObjectiveEvalPolicy>(oep),
|
||||
std::forward<GradEvalPolicy>(gep),
|
||||
gradient_descent_update_policy<RealType>{ lr });
|
||||
}
|
||||
|
||||
} // namespace optimization
|
||||
} // namespace math
|
||||
} // namespace boost
|
||||
#endif
|
||||
18
include/boost/math/optimization/gradient_optimizers.hpp
Normal file
18
include/boost/math/optimization/gradient_optimizers.hpp
Normal file
@@ -0,0 +1,18 @@
|
||||
// Copyright Maksym Zhelyenzyakov 2025-2026.
|
||||
// Distributed under the Boost Software License, Version 1.0.
|
||||
// (See accompanying file LICENSE_1_0.txt or copy at
|
||||
// https://www.boost.org/LICENSE_1_0.txt)
|
||||
#ifndef BOOST_MATH_OPTIMIZATION_GRADIENT_OPTIMIZERS_HPP
|
||||
#define BOOST_MATH_OPTIMIZATION_GRADIENT_OPTIMIZERS_HPP
|
||||
#include <boost/math/differentiation/autodiff_reverse.hpp>
|
||||
#include <boost/math/optimization/gradient_descent.hpp>
|
||||
#include <boost/math/optimization/lbfgs.hpp>
|
||||
#include <boost/math/optimization/nesterov.hpp>
|
||||
|
||||
namespace boost {
|
||||
namespace math {
|
||||
namespace optimization {
|
||||
} // namespace optimization
|
||||
} // namespace math
|
||||
} // namespace boost
|
||||
#endif
|
||||
382
include/boost/math/optimization/lbfgs.hpp
Normal file
382
include/boost/math/optimization/lbfgs.hpp
Normal file
@@ -0,0 +1,382 @@
|
||||
// Copyright Maksym Zhelyenzyakov 2025-2026.
|
||||
// Distributed under the Boost Software License, Version 1.0.
|
||||
// (See accompanying file LICENSE_1_0.txt or copy at
|
||||
// https://www.boost.org/LICENSE_1_0.txt)
|
||||
#ifndef BOOST_MATH_OPTIMIZATION_LBFGS_HPP
|
||||
#define BOOST_MATH_OPTIMIZATION_LBFGS_HPP
|
||||
#include <boost/math/optimization/detail/differentiable_opt_utilties.hpp>
|
||||
#include <boost/math/optimization/detail/gradient_opt_base.hpp>
|
||||
#include <boost/math/optimization/detail/rdiff_optimization_policies.hpp>
|
||||
#include <vector>
|
||||
|
||||
#include <boost/math/optimization/detail/line_search_policies.hpp>
|
||||
#include <deque>
|
||||
|
||||
namespace boost {
|
||||
namespace math {
|
||||
namespace optimization {
|
||||
|
||||
/** @brief> Helper struct for L-BFGS
|
||||
*
|
||||
* stores state of L-BFGS optimizer
|
||||
* @param> m = 10 -> history (how far back to look)
|
||||
* @param> S - > x_k - x_{k-1} ( m states )
|
||||
* @param> Y - > g_k - g_{k-1} (m states)
|
||||
* @param> rho - > 1/(y^T y)
|
||||
* @param> g_prev, x_prev - > previous state of argument and gradient
|
||||
* @param -> f_prev - > previous function value
|
||||
*
|
||||
* https://en.wikipedia.org/wiki/Limited-memory_BFGS
|
||||
*
|
||||
* Jorge Nocedal and Stephen J. Wright,
|
||||
* Numerical Optimization, 2nd Edition,
|
||||
* Springer, 2006.
|
||||
*
|
||||
* pages 176-180
|
||||
* algorithms 7.4/7.5
|
||||
* */
|
||||
template<typename RealType>
|
||||
struct lbfgs_optimizer_state
|
||||
{
|
||||
size_t m = 10; // default history length
|
||||
std::deque<std::vector<RealType>> S, Y;
|
||||
std::deque<RealType> rho;
|
||||
std::vector<RealType> g_prev, x_prev;
|
||||
RealType f_prev = std::numeric_limits<RealType>::quiet_NaN();
|
||||
const RealType EPS = std::numeric_limits<RealType>::epsilon();
|
||||
|
||||
template<typename ArgumentContainer>
|
||||
void update_state(ArgumentContainer& x,
|
||||
std::vector<RealType>& g_k,
|
||||
RealType fk)
|
||||
{
|
||||
|
||||
// iteration 0
|
||||
if (g_prev.empty()) {
|
||||
g_prev.assign(g_k.begin(), g_k.end());
|
||||
x_prev.resize(x.size());
|
||||
std::transform(x.begin(), x.end(), x_prev.begin(), [](const auto& xi) {
|
||||
return static_cast<RealType>(xi);
|
||||
});
|
||||
f_prev = fk;
|
||||
return;
|
||||
}
|
||||
|
||||
std::vector<RealType> s_k(x.size()), y_k(g_k.size());
|
||||
for (size_t i = 0; i < x.size(); ++i) {
|
||||
s_k[i] = static_cast<RealType>(x[i]) - x_prev[i];
|
||||
y_k[i] = g_k[i] - g_prev[i];
|
||||
}
|
||||
|
||||
RealType ys = dot(y_k, s_k);
|
||||
RealType sn = sqrt(dot(s_k, s_k));
|
||||
RealType yn = sqrt(dot(y_k, y_k));
|
||||
|
||||
const RealType threshold = EPS * sn * yn;
|
||||
if (ys > threshold && ys > RealType(0)) { // check if curvature if non-zero
|
||||
if (S.size() == m) { // iteration > m
|
||||
S.pop_front();
|
||||
Y.pop_front();
|
||||
rho.pop_front();
|
||||
}
|
||||
S.push_back(std::move(s_k));
|
||||
Y.push_back(std::move(y_k));
|
||||
rho.push_back(RealType(1) / ys);
|
||||
}
|
||||
|
||||
g_prev.assign(g_k.begin(), g_k.end());
|
||||
// safely cast to realtype
|
||||
std::transform(x.begin(), x.end(), x_prev.begin(), [](const auto& xi) {
|
||||
return static_cast<RealType>(xi);
|
||||
});
|
||||
f_prev = fk;
|
||||
}
|
||||
};
|
||||
|
||||
/** @brief> helper update for l-bfgs
|
||||
* x += alpha * search direction
|
||||
* */
|
||||
template<typename RealType>
|
||||
struct lbfgs_update_policy
|
||||
{
|
||||
template<typename ArgumentType,
|
||||
typename = typename std::enable_if<
|
||||
boost::math::differentiation::reverse_mode::detail::is_expression<
|
||||
ArgumentType>::value>::type>
|
||||
void operator()(ArgumentType& x, RealType pk, RealType alpha)
|
||||
{
|
||||
x.get_value() += alpha * pk;
|
||||
}
|
||||
template<typename ArgumentType,
|
||||
typename std::enable_if<!boost::math::differentiation::reverse_mode::
|
||||
detail::is_expression<ArgumentType>::value,
|
||||
int>::type = 0>
|
||||
void operator()(ArgumentType& x, RealType pk, RealType alpha)
|
||||
{
|
||||
x += alpha * pk;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
*
|
||||
* @brief Limited-memory BFGS (L-BFGS) optimizer
|
||||
*
|
||||
* The `lbfgs` class implements the Limited-memory BFGS optimization algorithm,
|
||||
* a quasi-Newton method that approximates the inverse Hessian using a rolling
|
||||
* window of the last `m` updates. It is suitable for medium- to large-scale
|
||||
* optimization problems where full Hessian storage is infeasible.
|
||||
*
|
||||
* @tparam> ArgumentContainer: container type for parameters, e.g.
|
||||
* std::vector<RealType>
|
||||
* @tparam> RealType scalar floating type (e.g. double, float)
|
||||
* @tparam> Objective: objective function. must support "f(x)" evaluation
|
||||
* @tparam> InitializationPolicy: policy for initializing x
|
||||
* @tparam> ObjectiveEvalPolicy: policy for computing the objective value
|
||||
* @tparam> GradEvalPolicy: policy for computing gradients
|
||||
* @tparam> LineaSearchPolicy: e.g. Armijo, StrongWolfe
|
||||
*
|
||||
* https://en.wikipedia.org/wiki/Limited-memory_BFGS
|
||||
*/
|
||||
|
||||
template<typename ArgumentContainer,
|
||||
typename RealType,
|
||||
class Objective,
|
||||
class InitializationPolicy,
|
||||
class ObjectiveEvalPolicy,
|
||||
class GradEvalPolicy,
|
||||
class LineSearchPolicy>
|
||||
class lbfgs
|
||||
: public abstract_optimizer<ArgumentContainer,
|
||||
RealType,
|
||||
Objective,
|
||||
InitializationPolicy,
|
||||
ObjectiveEvalPolicy,
|
||||
GradEvalPolicy,
|
||||
lbfgs_update_policy<RealType>,
|
||||
lbfgs<ArgumentContainer,
|
||||
RealType,
|
||||
Objective,
|
||||
InitializationPolicy,
|
||||
ObjectiveEvalPolicy,
|
||||
GradEvalPolicy,
|
||||
LineSearchPolicy>>
|
||||
{
|
||||
using base_opt = abstract_optimizer<ArgumentContainer,
|
||||
RealType,
|
||||
Objective,
|
||||
InitializationPolicy,
|
||||
ObjectiveEvalPolicy,
|
||||
GradEvalPolicy,
|
||||
lbfgs_update_policy<RealType>,
|
||||
lbfgs<ArgumentContainer,
|
||||
RealType,
|
||||
Objective,
|
||||
InitializationPolicy,
|
||||
ObjectiveEvalPolicy,
|
||||
GradEvalPolicy,
|
||||
LineSearchPolicy>>;
|
||||
|
||||
const RealType EPS = std::numeric_limits<RealType>::epsilon();
|
||||
lbfgs_optimizer_state<RealType> state_;
|
||||
LineSearchPolicy line_search_;
|
||||
|
||||
std::vector<RealType> compute_direction(const std::vector<RealType>& gk)
|
||||
{
|
||||
const size_t n = gk.size();
|
||||
const size_t L = state_.S.size(); // since S changes when iter < m
|
||||
|
||||
if (L == 0) {
|
||||
std::vector<RealType> p(n);
|
||||
std::transform(
|
||||
gk.begin(), gk.end(), p.begin(), [](RealType gi) { return -gi; });
|
||||
return p;
|
||||
}
|
||||
|
||||
std::vector<RealType> q = gk;
|
||||
std::vector<RealType> alpha(L, RealType(0));
|
||||
for (size_t t = 0; t < L; ++t) {
|
||||
const std::size_t i = L - 1 - t; // newest first
|
||||
const RealType sTq = dot(state_.S[i], q);
|
||||
alpha[i] = state_.rho[i] * sTq;
|
||||
axpy(-alpha[i], state_.Y[i], q);
|
||||
}
|
||||
|
||||
const RealType sTy = dot(state_.S.back(), state_.Y.back());
|
||||
const RealType yTy = dot(state_.Y.back(), state_.Y.back());
|
||||
const RealType gamma = (yTy > RealType(0)) ? (sTy / yTy) : RealType(1);
|
||||
|
||||
std::vector<RealType> r = q;
|
||||
scale(r, gamma);
|
||||
|
||||
for (std::size_t i = 0; i < L; ++i) {
|
||||
const RealType yTr = dot(state_.Y[i], r);
|
||||
const RealType beta = state_.rho[i] * yTr;
|
||||
axpy(alpha[i] - beta, state_.S[i], r);
|
||||
}
|
||||
scale(r, RealType{ -1 });
|
||||
return r;
|
||||
}
|
||||
|
||||
public:
|
||||
using base_opt::base_opt;
|
||||
lbfgs(Objective&& objective,
|
||||
ArgumentContainer& x,
|
||||
size_t m,
|
||||
InitializationPolicy&& ip,
|
||||
ObjectiveEvalPolicy&& oep,
|
||||
GradEvalPolicy&& gep,
|
||||
lbfgs_update_policy<RealType>&& up,
|
||||
LineSearchPolicy&& lsp)
|
||||
: base_opt(std::forward<Objective>(objective),
|
||||
x,
|
||||
std::forward<InitializationPolicy>(ip),
|
||||
std::forward<ObjectiveEvalPolicy>(oep),
|
||||
std::forward<GradEvalPolicy>(gep),
|
||||
std::forward<lbfgs_update_policy<RealType>>(up))
|
||||
, line_search_(lsp)
|
||||
{
|
||||
state_.m = m;
|
||||
|
||||
state_.S.clear();
|
||||
state_.Y.clear();
|
||||
state_.rho.clear();
|
||||
state_.g_prev.clear();
|
||||
state_.f_prev = std::numeric_limits<RealType>::quiet_NaN();
|
||||
}
|
||||
|
||||
void step()
|
||||
{
|
||||
auto& x = this->arguments();
|
||||
auto& g = this->gradients();
|
||||
auto& obj = this->objective_value();
|
||||
auto& obj_eval = this->obj_eval_;
|
||||
auto& grad_eval = this->grad_eval_;
|
||||
auto& objective = this->objective_;
|
||||
auto& update = this->update_;
|
||||
|
||||
grad_eval(objective, x, obj_eval, obj, g);
|
||||
state_.update_state(x, g, obj);
|
||||
std::vector<RealType> p = compute_direction(g);
|
||||
RealType alpha = line_search_(objective, obj_eval, grad_eval, x, g, p, obj);
|
||||
for (size_t i = 0; i < x.size(); ++i) {
|
||||
update(x[i], p[i], alpha);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template<class Objective, typename ArgumentContainer>
|
||||
auto
|
||||
make_lbfgs(Objective&& obj, ArgumentContainer& x, std::size_t m = 10)
|
||||
{
|
||||
using RealType = typename argument_container_t<ArgumentContainer>::type;
|
||||
return lbfgs<ArgumentContainer,
|
||||
RealType,
|
||||
std::decay_t<Objective>,
|
||||
tape_initializer_rvar<RealType>,
|
||||
reverse_mode_function_eval_policy<RealType>,
|
||||
reverse_mode_gradient_evaluation_policy<RealType>,
|
||||
strong_wolfe_line_search_policy<RealType>>(
|
||||
std::forward<Objective>(obj),
|
||||
x,
|
||||
m,
|
||||
tape_initializer_rvar<RealType>{},
|
||||
reverse_mode_function_eval_policy<RealType>{},
|
||||
reverse_mode_gradient_evaluation_policy<RealType>{},
|
||||
lbfgs_update_policy<RealType>{},
|
||||
strong_wolfe_line_search_policy<RealType>{});
|
||||
}
|
||||
|
||||
template<class Objective,
|
||||
typename ArgumentContainer,
|
||||
class InitializationPolicy>
|
||||
auto
|
||||
make_lbfgs(Objective&& obj,
|
||||
ArgumentContainer& x,
|
||||
std::size_t m,
|
||||
InitializationPolicy&& ip)
|
||||
{
|
||||
using RealType = typename argument_container_t<ArgumentContainer>::type;
|
||||
|
||||
return lbfgs<ArgumentContainer,
|
||||
RealType,
|
||||
std::decay_t<Objective>,
|
||||
InitializationPolicy,
|
||||
reverse_mode_function_eval_policy<RealType>,
|
||||
reverse_mode_gradient_evaluation_policy<RealType>,
|
||||
strong_wolfe_line_search_policy<RealType>>(
|
||||
std::forward<Objective>(obj),
|
||||
x,
|
||||
m,
|
||||
std::forward<InitializationPolicy>(ip),
|
||||
reverse_mode_function_eval_policy<RealType>{},
|
||||
reverse_mode_gradient_evaluation_policy<RealType>{},
|
||||
lbfgs_update_policy<RealType>{},
|
||||
strong_wolfe_line_search_policy<RealType>{});
|
||||
}
|
||||
|
||||
template<class Objective,
|
||||
typename ArgumentContainer,
|
||||
class InitializationPolicy,
|
||||
class LineSearchPolicy>
|
||||
auto
|
||||
make_lbfgs(Objective&& obj,
|
||||
ArgumentContainer& x,
|
||||
std::size_t m,
|
||||
InitializationPolicy&& ip,
|
||||
LineSearchPolicy&& lsp)
|
||||
{
|
||||
using RealType = typename argument_container_t<ArgumentContainer>::type;
|
||||
|
||||
return lbfgs<ArgumentContainer,
|
||||
RealType,
|
||||
std::decay_t<Objective>,
|
||||
InitializationPolicy,
|
||||
reverse_mode_function_eval_policy<RealType>,
|
||||
reverse_mode_gradient_evaluation_policy<RealType>,
|
||||
LineSearchPolicy>(
|
||||
std::forward<Objective>(obj),
|
||||
x,
|
||||
m,
|
||||
std::forward<InitializationPolicy>(ip),
|
||||
reverse_mode_function_eval_policy<RealType>{},
|
||||
reverse_mode_gradient_evaluation_policy<RealType>{},
|
||||
lbfgs_update_policy<RealType>{},
|
||||
std::forward<LineSearchPolicy>(lsp));
|
||||
}
|
||||
|
||||
template<class Objective,
|
||||
typename ArgumentContainer,
|
||||
class InitializationPolicy,
|
||||
class FunctionEvalPolicy,
|
||||
class GradientEvalPolicy,
|
||||
class LineSearchPolicy>
|
||||
auto
|
||||
make_lbfgs(Objective&& obj,
|
||||
ArgumentContainer& x,
|
||||
std::size_t m,
|
||||
InitializationPolicy&& ip,
|
||||
FunctionEvalPolicy&& fep,
|
||||
GradientEvalPolicy&& gep,
|
||||
LineSearchPolicy&& lsp)
|
||||
{
|
||||
using RealType = typename argument_container_t<ArgumentContainer>::type;
|
||||
return lbfgs<ArgumentContainer,
|
||||
RealType,
|
||||
std::decay_t<Objective>,
|
||||
InitializationPolicy,
|
||||
FunctionEvalPolicy,
|
||||
GradientEvalPolicy,
|
||||
LineSearchPolicy>(std::forward<Objective>(obj),
|
||||
x,
|
||||
m,
|
||||
std::forward<InitializationPolicy>(ip),
|
||||
std::forward<FunctionEvalPolicy>(fep),
|
||||
std::forward<GradientEvalPolicy>(gep),
|
||||
lbfgs_update_policy<RealType>{},
|
||||
std::forward<LineSearchPolicy>(lsp));
|
||||
}
|
||||
|
||||
} // namespace optimization
|
||||
} // namespace math
|
||||
} // namespace boost
|
||||
#endif
|
||||
336
include/boost/math/optimization/minimizer.hpp
Normal file
336
include/boost/math/optimization/minimizer.hpp
Normal file
@@ -0,0 +1,336 @@
|
||||
// Copyright Maksym Zhelyenzyakov 2025-2026.
|
||||
// Distributed under the Boost Software License, Version 1.0.
|
||||
// (See accompanying file LICENSE_1_0.txt or copy at
|
||||
// https://www.boost.org/LICENSE_1_0.txt)
|
||||
#ifndef BOOST_MATH_OPTIMIZATION_MINIMIZER_HPP
|
||||
#define BOOST_MATH_OPTIMIZATION_MINIMIZER_HPP
|
||||
#include <boost/math/optimization/detail/differentiable_opt_utilties.hpp>
|
||||
#include <boost/math/optimization/gradient_optimizers.hpp>
|
||||
#include <vector>
|
||||
#include <chrono>
|
||||
namespace boost {
|
||||
namespace math {
|
||||
namespace optimization {
|
||||
template<typename RealType>
|
||||
struct optimization_result
|
||||
{
|
||||
size_t num_iter = 0;
|
||||
RealType objective_value;
|
||||
std::vector<RealType> objective_history;
|
||||
bool converged;
|
||||
};
|
||||
|
||||
template<typename RealType>
|
||||
std::ostream&
|
||||
operator<<(std::ostream& os, const optimization_result<RealType>& r)
|
||||
{
|
||||
os << "optimization_result {\n"
|
||||
<< " num_iter = " << r.num_iter << "\n"
|
||||
<< " objective_value = " << r.objective_value << "\n"
|
||||
<< " converged = " << std::boolalpha << r.converged << "\n"
|
||||
<< " objective_history = [";
|
||||
|
||||
for (std::size_t i = 0; i < r.objective_history.size(); ++i) {
|
||||
os << r.objective_history[i];
|
||||
if (i + 1 < r.objective_history.size()) {
|
||||
os << ", ";
|
||||
}
|
||||
}
|
||||
os << "]\n}\n";
|
||||
return os;
|
||||
}
|
||||
/*****************************************************************************************/
|
||||
template<typename RealType>
|
||||
struct gradient_norm_convergence_policy
|
||||
{
|
||||
RealType tol_;
|
||||
explicit gradient_norm_convergence_policy(RealType tol)
|
||||
: tol_(tol)
|
||||
{
|
||||
}
|
||||
|
||||
template<class GradientContainer>
|
||||
bool operator()(const GradientContainer& g, RealType /*objective_v*/) const
|
||||
{
|
||||
return norm_2(g) < tol_;
|
||||
}
|
||||
};
|
||||
|
||||
template<typename RealType>
|
||||
struct objective_tol_convergence_policy
|
||||
{
|
||||
RealType tol_;
|
||||
mutable RealType last_value_;
|
||||
mutable bool first_call_;
|
||||
|
||||
explicit objective_tol_convergence_policy(RealType tol)
|
||||
: tol_(tol)
|
||||
, last_value_(0)
|
||||
, first_call_(true)
|
||||
{
|
||||
}
|
||||
|
||||
template<class GradientContainer>
|
||||
bool operator()(const GradientContainer&, RealType objective_v) const
|
||||
{
|
||||
if (first_call_) {
|
||||
last_value_ = objective_v;
|
||||
first_call_ = false;
|
||||
return false;
|
||||
}
|
||||
RealType diff = abs(objective_v - last_value_);
|
||||
last_value_ = objective_v;
|
||||
return diff < tol_;
|
||||
}
|
||||
};
|
||||
|
||||
template<typename RealType>
|
||||
struct relative_objective_tol_policy
|
||||
{
|
||||
RealType rel_tol_;
|
||||
mutable RealType last_value_;
|
||||
mutable bool first_call_;
|
||||
|
||||
explicit relative_objective_tol_policy(RealType rel_tol)
|
||||
: rel_tol_(rel_tol)
|
||||
, last_value_(0)
|
||||
, first_call_(true)
|
||||
{
|
||||
}
|
||||
|
||||
template<class GradientContainer>
|
||||
bool operator()(const GradientContainer&, RealType objective_v) const
|
||||
{
|
||||
if (first_call_) {
|
||||
last_value_ = objective_v;
|
||||
first_call_ = false;
|
||||
return false;
|
||||
}
|
||||
RealType denom = max<RealType>(1, abs(last_value_));
|
||||
RealType rel_diff = abs(objective_v - last_value_) / denom;
|
||||
last_value_ = objective_v;
|
||||
return rel_diff < rel_tol_;
|
||||
}
|
||||
};
|
||||
|
||||
template<class Policy1, class Policy2>
|
||||
struct combined_convergence_policy
|
||||
{
|
||||
Policy1 p1_;
|
||||
Policy2 p2_;
|
||||
|
||||
combined_convergence_policy(Policy1 p1, Policy2 p2)
|
||||
: p1_(p1)
|
||||
, p2_(p2)
|
||||
{
|
||||
}
|
||||
|
||||
template<class GradientContainer, class RealType>
|
||||
bool operator()(const GradientContainer& g, RealType obj) const
|
||||
{
|
||||
return p1_(g, obj) || p2_(g, obj);
|
||||
}
|
||||
};
|
||||
|
||||
/*****************************************************************************************/
|
||||
struct max_iter_termination_policy
|
||||
{
|
||||
size_t max_iter_;
|
||||
max_iter_termination_policy(size_t max_iter)
|
||||
: max_iter_(max_iter) {};
|
||||
bool operator()(size_t iter)
|
||||
{
|
||||
if (iter < max_iter_) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
struct wallclock_termination_policy
|
||||
{
|
||||
std::chrono::steady_clock::time_point start_;
|
||||
std::chrono::milliseconds max_time_;
|
||||
|
||||
explicit wallclock_termination_policy(std::chrono::milliseconds max_time)
|
||||
: start_(std::chrono::steady_clock::now())
|
||||
, max_time_(max_time)
|
||||
{
|
||||
}
|
||||
|
||||
bool operator()(size_t /*iter*/) const
|
||||
{
|
||||
return std::chrono::steady_clock::now() - start_ > max_time_;
|
||||
}
|
||||
};
|
||||
|
||||
/*****************************************************************************************/
|
||||
template<typename ArgumentContainer>
|
||||
struct unconstrained_policy
|
||||
{
|
||||
void operator()(ArgumentContainer&) {}
|
||||
};
|
||||
|
||||
template<typename ArgumentContainer, typename RealType>
|
||||
struct box_constraints
|
||||
{
|
||||
RealType min_, max_;
|
||||
box_constraints(RealType min, RealType max)
|
||||
: min_(min)
|
||||
, max_(max) {};
|
||||
void operator()(ArgumentContainer& x)
|
||||
{
|
||||
for (auto& xi : x) {
|
||||
xi = (xi < min_) ? min_ : (max_ < xi) ? max_ : xi;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template<typename ArgumentContainer, typename RealType>
|
||||
struct nonnegativity_constraint
|
||||
{
|
||||
void operator()(ArgumentContainer& x) const
|
||||
{
|
||||
for (auto& xi : x) {
|
||||
if (xi < RealType{ 0 })
|
||||
xi = RealType{ 0 };
|
||||
}
|
||||
}
|
||||
};
|
||||
template<typename ArgumentContainer, typename RealType>
|
||||
struct l2_ball_constraint
|
||||
{
|
||||
RealType radius_;
|
||||
|
||||
explicit l2_ball_constraint(RealType radius)
|
||||
: radius_(radius)
|
||||
{
|
||||
}
|
||||
|
||||
void operator()(ArgumentContainer& x) const
|
||||
{
|
||||
RealType norm2v = norm_2(x);
|
||||
if (norm2v > radius_) {
|
||||
RealType scale = radius_ / norm2v;
|
||||
for (auto& xi : x)
|
||||
xi *= scale;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template<typename ArgumentContainer, typename RealType>
|
||||
struct l1_ball_constraint
|
||||
{
|
||||
RealType radius_;
|
||||
|
||||
explicit l1_ball_constraint(RealType radius)
|
||||
: radius_(radius)
|
||||
{
|
||||
}
|
||||
|
||||
void operator()(ArgumentContainer& x) const
|
||||
{
|
||||
RealType norm1v = norm_1(x);
|
||||
|
||||
if (norm1v > radius_) {
|
||||
RealType scale = radius_ / norm1v;
|
||||
for (auto& xi : x)
|
||||
xi *= scale;
|
||||
}
|
||||
}
|
||||
};
|
||||
template<typename ArgumentContainer, typename RealType>
|
||||
struct simplex_constraint
|
||||
{
|
||||
void operator()(ArgumentContainer& x) const
|
||||
{
|
||||
RealType sum = RealType{ 0 };
|
||||
for (auto& xi : x) {
|
||||
if (xi < RealType{ 0 })
|
||||
xi = RealType{ 0 }; // clip negatives
|
||||
sum += xi;
|
||||
}
|
||||
if (sum > RealType{ 0 }) {
|
||||
for (auto& xi : x)
|
||||
xi /= sum;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template<typename ArgumentContainer>
|
||||
struct function_constraint
|
||||
{
|
||||
using func_t = void (*)(ArgumentContainer&);
|
||||
|
||||
func_t f_;
|
||||
|
||||
explicit function_constraint(func_t f)
|
||||
: f_(f)
|
||||
{
|
||||
}
|
||||
|
||||
void operator()(ArgumentContainer& x) const { f_(x); }
|
||||
};
|
||||
template<typename ArgumentContainer, typename RealType>
|
||||
struct unit_sphere_constraint
|
||||
{
|
||||
void operator()(ArgumentContainer& x) const
|
||||
{
|
||||
RealType norm = norm_2(x);
|
||||
if (norm > RealType{ 0 }) {
|
||||
for (auto& xi : x)
|
||||
xi /= norm;
|
||||
}
|
||||
}
|
||||
};
|
||||
/*****************************************************************************************/
|
||||
|
||||
template<class Optimizer,
|
||||
class ConstraintPolicy,
|
||||
class ConvergencePolicy,
|
||||
class TerminationPolicy>
|
||||
auto
|
||||
minimize_impl(Optimizer& opt,
|
||||
ConstraintPolicy project,
|
||||
ConvergencePolicy converged,
|
||||
TerminationPolicy terminate,
|
||||
bool history)
|
||||
{
|
||||
optimization_result<typename Optimizer::real_type_t> result;
|
||||
size_t iter = 0;
|
||||
do {
|
||||
opt.step();
|
||||
project(opt.arguments());
|
||||
++iter;
|
||||
if (history) {
|
||||
result.objective_history.push_back(opt.objective_value());
|
||||
}
|
||||
|
||||
} while (!converged(opt.gradients(), opt.objective_value()) &&
|
||||
!terminate(iter));
|
||||
result.num_iter = iter;
|
||||
result.objective_value = opt.objective_value();
|
||||
result.converged = converged(opt.gradients(), opt.objective_value());
|
||||
return result;
|
||||
}
|
||||
template<class Optimizer,
|
||||
class ConstraintPolicy =
|
||||
unconstrained_policy<typename Optimizer::argument_container_t>,
|
||||
class ConvergencePolicy =
|
||||
gradient_norm_convergence_policy<typename Optimizer::real_type_t>,
|
||||
class TerminationPolicy = max_iter_termination_policy>
|
||||
auto
|
||||
minimize(Optimizer& opt,
|
||||
ConstraintPolicy project = ConstraintPolicy{},
|
||||
ConvergencePolicy converged =
|
||||
ConvergencePolicy{
|
||||
static_cast<typename Optimizer::real_type_t>(1e-3) },
|
||||
TerminationPolicy terminate = TerminationPolicy(100000),
|
||||
bool history = true)
|
||||
{
|
||||
return minimize_impl(opt, project, converged, terminate, history);
|
||||
}
|
||||
} // namespace optimization
|
||||
} // namespace math
|
||||
} // namespace boost
|
||||
#endif
|
||||
208
include/boost/math/optimization/nesterov.hpp
Normal file
208
include/boost/math/optimization/nesterov.hpp
Normal file
@@ -0,0 +1,208 @@
|
||||
// Copyright Maksym Zhelyenzyakov 2025-2026.
|
||||
// Distributed under the Boost Software License, Version 1.0.
|
||||
// (See accompanying file LICENSE_1_0.txt or copy at
|
||||
// https://www.boost.org/LICENSE_1_0.txt)
|
||||
#ifndef BOOST_MATH_OPTIMIZATION_NESTEROV_HPP
|
||||
#define BOOST_MATH_OPTIMIZATION_NESTEROV_HPP
|
||||
#include <boost/math/optimization/detail/differentiable_opt_utilties.hpp>
|
||||
#include <boost/math/optimization/detail/gradient_opt_base.hpp>
|
||||
#include <boost/math/optimization/detail/rdiff_optimization_policies.hpp>
|
||||
#include <vector>
|
||||
|
||||
namespace boost {
|
||||
namespace math {
|
||||
namespace optimization {
|
||||
|
||||
namespace rdiff = boost::math::differentiation::reverse_mode;
|
||||
|
||||
/**
|
||||
* @brief The nesterov_update_policy class
|
||||
*/
|
||||
template<typename RealType>
|
||||
struct nesterov_update_policy
|
||||
{
|
||||
RealType lr_, mu_;
|
||||
nesterov_update_policy(RealType lr, RealType mu)
|
||||
: lr_(lr)
|
||||
, mu_(mu) {};
|
||||
|
||||
template<typename ArgumentType,
|
||||
typename = typename std::enable_if<
|
||||
boost::math::differentiation::reverse_mode::detail::is_expression<
|
||||
ArgumentType>::value>::type>
|
||||
void operator()(ArgumentType& x, RealType& g, RealType& v)
|
||||
{
|
||||
RealType v_prev = v;
|
||||
v = mu_ * v - lr_ * g;
|
||||
x.get_value() += -mu_ * v_prev + (static_cast<RealType>(1) + mu_) * v;
|
||||
}
|
||||
template<typename ArgumentType,
|
||||
typename std::enable_if<!boost::math::differentiation::reverse_mode::
|
||||
detail::is_expression<ArgumentType>::value,
|
||||
int>::type = 0>
|
||||
void operator()(ArgumentType& x, RealType& g, RealType& v) const
|
||||
{
|
||||
const RealType v_prev = v;
|
||||
v = mu_ * v - lr_ * g;
|
||||
x += -mu_ * v_prev + (static_cast<RealType>(1) + mu_) * v;
|
||||
}
|
||||
RealType lr() const noexcept { return lr_; }
|
||||
RealType mu() const noexcept { return mu_; }
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief The nesterov_accelerated_gradient class
|
||||
*
|
||||
* https://jlmelville.github.io/mize/nesterov.html
|
||||
*/
|
||||
template<typename ArgumentContainer,
|
||||
typename RealType,
|
||||
class Objective,
|
||||
class InitializationPolicy,
|
||||
class ObjectiveEvalPolicy,
|
||||
class GradEvalPolicy>
|
||||
class nesterov_accelerated_gradient
|
||||
: public abstract_optimizer<
|
||||
ArgumentContainer,
|
||||
RealType,
|
||||
Objective,
|
||||
InitializationPolicy,
|
||||
ObjectiveEvalPolicy,
|
||||
GradEvalPolicy,
|
||||
nesterov_update_policy<RealType>,
|
||||
nesterov_accelerated_gradient<ArgumentContainer,
|
||||
RealType,
|
||||
Objective,
|
||||
InitializationPolicy,
|
||||
ObjectiveEvalPolicy,
|
||||
GradEvalPolicy>>
|
||||
{
|
||||
using base_opt =
|
||||
abstract_optimizer<ArgumentContainer,
|
||||
RealType,
|
||||
Objective,
|
||||
InitializationPolicy,
|
||||
ObjectiveEvalPolicy,
|
||||
GradEvalPolicy,
|
||||
nesterov_update_policy<RealType>,
|
||||
nesterov_accelerated_gradient<ArgumentContainer,
|
||||
RealType,
|
||||
Objective,
|
||||
InitializationPolicy,
|
||||
ObjectiveEvalPolicy,
|
||||
GradEvalPolicy>>;
|
||||
std::vector<RealType> v_;
|
||||
|
||||
public:
|
||||
using base_opt::base_opt;
|
||||
nesterov_accelerated_gradient(Objective&& objective,
|
||||
ArgumentContainer& x,
|
||||
InitializationPolicy&& ip,
|
||||
ObjectiveEvalPolicy&& oep,
|
||||
GradEvalPolicy&& gep,
|
||||
nesterov_update_policy<RealType>&& up)
|
||||
: base_opt(std::forward<Objective>(objective),
|
||||
x,
|
||||
std::forward<InitializationPolicy>(ip),
|
||||
std::forward<ObjectiveEvalPolicy>(oep),
|
||||
std::forward<GradEvalPolicy>(gep),
|
||||
std::forward<nesterov_update_policy<RealType>>(up))
|
||||
, v_(x.size(), RealType(0))
|
||||
{
|
||||
}
|
||||
|
||||
void step()
|
||||
{
|
||||
auto& x = this->arguments();
|
||||
auto& g = this->gradients();
|
||||
auto& obj = this->objective_value();
|
||||
auto& obj_eval = this->obj_eval_;
|
||||
auto& grad_eval = this->grad_eval_;
|
||||
auto& objective = this->objective_;
|
||||
auto& update = this->update_;
|
||||
|
||||
grad_eval(objective, x, obj_eval, obj, g);
|
||||
|
||||
for (size_t i = 0; i < x.size(); ++i) {
|
||||
update(x[i], g[i], v_[i]);
|
||||
}
|
||||
}
|
||||
};
|
||||
template<class Objective, typename ArgumentContainer, typename RealType>
|
||||
auto
|
||||
make_nag(Objective&& obj,
|
||||
ArgumentContainer& x,
|
||||
RealType lr = RealType{ 0.01 },
|
||||
RealType mu = RealType{ 0.95 })
|
||||
{
|
||||
return nesterov_accelerated_gradient<
|
||||
ArgumentContainer,
|
||||
RealType,
|
||||
std::decay_t<Objective>,
|
||||
tape_initializer_rvar<RealType>,
|
||||
reverse_mode_function_eval_policy<RealType>,
|
||||
reverse_mode_gradient_evaluation_policy<RealType>>(
|
||||
std::forward<Objective>(obj),
|
||||
x,
|
||||
tape_initializer_rvar<RealType>{},
|
||||
reverse_mode_function_eval_policy<RealType>{},
|
||||
reverse_mode_gradient_evaluation_policy<RealType>{},
|
||||
nesterov_update_policy<RealType>(lr, mu));
|
||||
}
|
||||
template<class Objective,
|
||||
typename ArgumentContainer,
|
||||
typename RealType,
|
||||
class InitializationPolicy>
|
||||
auto
|
||||
make_nag(Objective&& obj,
|
||||
ArgumentContainer& x,
|
||||
RealType lr,
|
||||
RealType mu,
|
||||
InitializationPolicy&& ip)
|
||||
{
|
||||
return nesterov_accelerated_gradient<
|
||||
ArgumentContainer,
|
||||
RealType,
|
||||
std::decay_t<Objective>,
|
||||
InitializationPolicy,
|
||||
reverse_mode_function_eval_policy<RealType>,
|
||||
reverse_mode_gradient_evaluation_policy<RealType>>(
|
||||
std::forward<Objective>(obj),
|
||||
x,
|
||||
std::forward<InitializationPolicy>(ip),
|
||||
reverse_mode_function_eval_policy<RealType>{},
|
||||
reverse_mode_gradient_evaluation_policy<RealType>{},
|
||||
nesterov_update_policy<RealType>(lr, mu));
|
||||
}
|
||||
template<typename ArgumentContainer,
|
||||
typename RealType,
|
||||
class Objective,
|
||||
class InitializationPolicy,
|
||||
class ObjectiveEvalPolicy,
|
||||
class GradEvalPolicy>
|
||||
auto
|
||||
make_nag(Objective&& obj,
|
||||
ArgumentContainer& x,
|
||||
RealType lr,
|
||||
RealType mu,
|
||||
InitializationPolicy&& ip,
|
||||
ObjectiveEvalPolicy&& oep,
|
||||
GradEvalPolicy&& gep)
|
||||
{
|
||||
return nesterov_accelerated_gradient<ArgumentContainer,
|
||||
RealType,
|
||||
std::decay_t<Objective>,
|
||||
InitializationPolicy,
|
||||
ObjectiveEvalPolicy,
|
||||
GradEvalPolicy>(
|
||||
std::forward<Objective>(obj),
|
||||
x,
|
||||
std::forward<InitializationPolicy>(ip),
|
||||
std::forward<ObjectiveEvalPolicy>(oep),
|
||||
std::forward<GradEvalPolicy>(gep),
|
||||
nesterov_update_policy<RealType>{ lr, mu });
|
||||
}
|
||||
} // namespace optimization
|
||||
} // namespace math
|
||||
} // namespace boost
|
||||
#endif
|
||||
@@ -24,6 +24,7 @@ With these techniques, the code could be simplified.
|
||||
#include <cstring>
|
||||
#include <cstdint>
|
||||
#include <limits>
|
||||
#include <climits>
|
||||
#include <type_traits>
|
||||
#include <boost/math/tools/config.hpp>
|
||||
#include <boost/math/tools/is_standalone.hpp>
|
||||
@@ -270,7 +271,12 @@ template<> struct fp_traits_non_native<double, double_precision>
|
||||
// long double (64 bits) -------------------------------------------------------
|
||||
|
||||
#if defined(BOOST_NO_INT64_T) || defined(BOOST_NO_INCLASS_MEMBER_INITIALIZATION)\
|
||||
|| defined(BOOST_BORLANDC) || defined(__CODEGEAR__) || (defined(__APPLE__) && defined(__aarch64__)) || defined(_MSC_VER)
|
||||
|| defined(BOOST_BORLANDC) || defined(__CODEGEAR__) || (defined(__APPLE__) && defined(__aarch64__)) || defined(_MSC_VER)\
|
||||
|| (defined(__GNUC__) && defined(__aarch64__) && defined(_WIN32))\
|
||||
|| (defined(__GNUC__) && (defined(__arm__) || defined(__thumb__)))\
|
||||
|| defined(__SYCL_DEVICE_ONLY__) || (LDBL_MANT_DIG == 53)
|
||||
|
||||
static_assert(LDBL_MANT_DIG == 53, "Oops, assumption that long double is a 64-bit quantity is incorrect!!");
|
||||
|
||||
template<> struct fp_traits_non_native<long double, double_precision>
|
||||
{
|
||||
@@ -305,6 +311,8 @@ private:
|
||||
|
||||
// Intel extended double precision format (80 bits)
|
||||
|
||||
static_assert(LDBL_MANT_DIG == 64, "Oops, assumption that long double is an 80-bit quantity is incorrect!!");
|
||||
|
||||
template<>
|
||||
struct fp_traits_non_native<long double, extended_double_precision>
|
||||
{
|
||||
@@ -356,6 +364,8 @@ struct fp_traits_non_native<long double, extended_double_precision>
|
||||
|
||||
// PowerPC extended double precision format (128 bits)
|
||||
|
||||
static_assert(LDBL_MANT_DIG == 113, "Oops, assumption that long double is a 128-bit quantity is incorrect!!");
|
||||
|
||||
template<>
|
||||
struct fp_traits_non_native<long double, extended_double_precision>
|
||||
{
|
||||
@@ -426,10 +436,12 @@ struct fp_traits_non_native<long double, extended_double_precision>
|
||||
|
||||
// long double (>64 bits), All other processors --------------------------------
|
||||
|
||||
#else
|
||||
#elif (LDBL_MANT_DIG == 113)
|
||||
|
||||
// IEEE extended double precision format with 15 exponent bits (128 bits)
|
||||
|
||||
static_assert(LDBL_MANT_DIG == 113, "Oops, assumption that long double is a 128-bit quantity is incorrect!!");
|
||||
|
||||
template<>
|
||||
struct fp_traits_non_native<long double, extended_double_precision>
|
||||
{
|
||||
@@ -456,6 +468,10 @@ private:
|
||||
BOOST_MATH_STATIC constexpr int offset_ = BOOST_MATH_ENDIAN_BIG_BYTE ? 0 : 12;
|
||||
};
|
||||
|
||||
#else
|
||||
|
||||
// Nothing in here, we don't understand the format!
|
||||
|
||||
#endif
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
@@ -1290,7 +1290,6 @@ BOOST_MATH_GPU_ENABLED T incomplete_tgamma_large_x(const T& a, const T& x, const
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
//
|
||||
// Main incomplete gamma entry point, handles all four incomplete gamma's:
|
||||
//
|
||||
@@ -1813,6 +1812,46 @@ BOOST_MATH_GPU_ENABLED T lgamma_incomplete_imp(T a, T x, const Policy& pol)
|
||||
return log(gamma_q(a, x, pol));
|
||||
}
|
||||
|
||||
// Calculate log of incomplete gamma function
|
||||
template <class T, class Policy>
|
||||
BOOST_MATH_GPU_ENABLED T lgamma_incomplete_lower_imp(T a, T x, const Policy& pol)
|
||||
{
|
||||
using namespace boost::math; // temporary until we're in the right namespace
|
||||
|
||||
BOOST_MATH_STD_USING_CORE
|
||||
|
||||
// Check for invalid inputs (a < 0 or x < 0)
|
||||
constexpr auto function = "boost::math::lgamma_p<%1%>(%1%, %1%)";
|
||||
if(a <= 0)
|
||||
return policies::raise_domain_error<T>(function, "Argument a to the incomplete gamma function must be greater than zero (got a=%1%).", a, pol);
|
||||
if(x < 0)
|
||||
return policies::raise_domain_error<T>(function, "Argument x to the incomplete gamma function must be >= 0 (got x=%1%).", x, pol);
|
||||
|
||||
// Taken from conditions on Line 1709. There are also
|
||||
// conditions on Line 1368, but didn't implement that one here.
|
||||
// The second condition ensures floats do not return -inf for small
|
||||
// values of x.
|
||||
if (((a > 4 * x) && (a >= max_factorial<T>::value)) || ((T(-0.4) / log(x) < a) && (x < T(1.0)))){
|
||||
return log(detail::lower_gamma_series(a, x, pol)) - log(a) + a * log(x) - x - lgamma(a, pol);
|
||||
}
|
||||
|
||||
//
|
||||
// Can't do better than taking the log of P, but...
|
||||
//
|
||||
// Figure out whether we need P or Q, since if we calculate P and it's too close to unity
|
||||
// we will lose precision in the result, selection logic here is extracted from gamma_incomplete_imp_final:
|
||||
//
|
||||
bool need_p = false;
|
||||
if ((x < 1.1) && (x >= 0.5) && (x * 0.75f < a))
|
||||
need_p = true;
|
||||
else if ((x < a) && (x >= 1.1))
|
||||
need_p = true;
|
||||
|
||||
if (need_p)
|
||||
return log(gamma_p(a, x, pol));
|
||||
return log1p(-gamma_q(a, x, pol), pol);
|
||||
}
|
||||
|
||||
//
|
||||
// Ratios of two gamma functions:
|
||||
//
|
||||
@@ -2454,6 +2493,29 @@ BOOST_MATH_GPU_ENABLED inline tools::promote_args_t<T1, T2> lgamma_q(T1 a, T2 z)
|
||||
{
|
||||
return lgamma_q(a, z, policies::policy<>());
|
||||
}
|
||||
|
||||
template <class T1, class T2, class Policy>
|
||||
BOOST_MATH_GPU_ENABLED inline tools::promote_args_t<T1, T2> lgamma_p(T1 a, T2 z, const Policy& /* pol */)
|
||||
{
|
||||
typedef tools::promote_args_t<T1, T2> result_type;
|
||||
typedef typename policies::evaluation<result_type, Policy>::type value_type;
|
||||
typedef typename policies::normalise<
|
||||
Policy,
|
||||
policies::promote_float<false>,
|
||||
policies::promote_double<false>,
|
||||
policies::discrete_quantile<>,
|
||||
policies::assert_undefined<> >::type forwarding_policy;
|
||||
|
||||
return policies::checked_narrowing_cast<result_type, forwarding_policy>(
|
||||
detail::lgamma_incomplete_lower_imp(static_cast<value_type>(a),
|
||||
static_cast<value_type>(z), forwarding_policy()), "lgamma_p<%1%>(%1%, %1%)");
|
||||
}
|
||||
|
||||
template <class T1, class T2>
|
||||
BOOST_MATH_GPU_ENABLED inline tools::promote_args_t<T1, T2> lgamma_p(T1 a, T2 z)
|
||||
{
|
||||
return lgamma_p(a, z, policies::policy<>());
|
||||
}
|
||||
//
|
||||
// Regularised lower incomplete gamma:
|
||||
//
|
||||
|
||||
@@ -567,6 +567,12 @@ namespace boost
|
||||
template <class RT1, class RT2, class Policy>
|
||||
BOOST_MATH_GPU_ENABLED tools::promote_args_t<RT1, RT2> lgamma_q(RT1 a, RT2 z, const Policy&);
|
||||
|
||||
template <class RT1, class RT2>
|
||||
BOOST_MATH_GPU_ENABLED tools::promote_args_t<RT1, RT2> lgamma_p(RT1 a, RT2 z);
|
||||
|
||||
template <class RT1, class RT2, class Policy>
|
||||
BOOST_MATH_GPU_ENABLED tools::promote_args_t<RT1, RT2> lgamma_p(RT1 a, RT2 z, const Policy&);
|
||||
|
||||
template <class RT1, class RT2>
|
||||
BOOST_MATH_GPU_ENABLED tools::promote_args_t<RT1, RT2> gamma_p(RT1 a, RT2 z);
|
||||
|
||||
@@ -1525,6 +1531,9 @@ namespace boost
|
||||
\
|
||||
template <class RT1, class RT2>\
|
||||
BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t<RT1, RT2> lgamma_q(RT1 a, RT2 z){ return boost::math::lgamma_q(a, z, Policy()); }\
|
||||
\
|
||||
template <class RT1, class RT2>\
|
||||
BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t<RT1, RT2> lgamma_p(RT1 a, RT2 z){ return boost::math::lgamma_p(a, z, Policy()); }\
|
||||
\
|
||||
template <class RT1, class RT2>\
|
||||
BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t<RT1, RT2> gamma_p(RT1 a, RT2 z){ return boost::math::gamma_p(a, z, Policy()); }\
|
||||
|
||||
@@ -178,7 +178,7 @@ BOOST_MATH_GPU_ENABLED U evaluate_polynomial(const T* poly, U const& z, boost::m
|
||||
namespace detail{
|
||||
|
||||
template <class T, class V, class Tag>
|
||||
BOOST_MATH_GPU_ENABLED BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& val, const Tag*) BOOST_MATH_NOEXCEPT(V)
|
||||
BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& val, const Tag*) BOOST_MATH_NOEXCEPT(V)
|
||||
{
|
||||
return evaluate_polynomial(a, val, Tag::value);
|
||||
}
|
||||
@@ -207,7 +207,7 @@ BOOST_MATH_GPU_ENABLED inline U evaluate_polynomial(const T* poly, U const& z, b
|
||||
// implementations above:
|
||||
//
|
||||
template <boost::math::size_t N, class T, class V>
|
||||
BOOST_MATH_GPU_ENABLED BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial(const T(&a)[N], const V& val) BOOST_MATH_NOEXCEPT(V)
|
||||
BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial(const T(&a)[N], const V& val) BOOST_MATH_NOEXCEPT(V)
|
||||
{
|
||||
typedef boost::math::integral_constant<int, static_cast<int>(N)> tag_type;
|
||||
return detail::evaluate_polynomial_c_imp(static_cast<const T*>(a), val, static_cast<tag_type const*>(nullptr));
|
||||
@@ -215,7 +215,7 @@ BOOST_MATH_GPU_ENABLED BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial(const
|
||||
|
||||
#ifndef BOOST_MATH_HAS_NVRTC
|
||||
template <boost::math::size_t N, class T, class V>
|
||||
BOOST_MATH_GPU_ENABLED BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial(const std::array<T,N>& a, const V& val) BOOST_MATH_NOEXCEPT(V)
|
||||
BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial(const std::array<T,N>& a, const V& val) BOOST_MATH_NOEXCEPT(V)
|
||||
{
|
||||
typedef boost::math::integral_constant<int, static_cast<int>(N)> tag_type;
|
||||
return detail::evaluate_polynomial_c_imp(static_cast<const T*>(a.data()), val, static_cast<tag_type const*>(nullptr));
|
||||
@@ -231,14 +231,14 @@ BOOST_MATH_GPU_ENABLED inline U evaluate_even_polynomial(const T* poly, U z, boo
|
||||
}
|
||||
|
||||
template <boost::math::size_t N, class T, class V>
|
||||
BOOST_MATH_GPU_ENABLED BOOST_MATH_GPU_ENABLED inline V evaluate_even_polynomial(const T(&a)[N], const V& z) BOOST_MATH_NOEXCEPT(V)
|
||||
BOOST_MATH_GPU_ENABLED inline V evaluate_even_polynomial(const T(&a)[N], const V& z) BOOST_MATH_NOEXCEPT(V)
|
||||
{
|
||||
return evaluate_polynomial(a, V(z*z));
|
||||
}
|
||||
|
||||
#ifndef BOOST_MATH_HAS_NVRTC
|
||||
template <boost::math::size_t N, class T, class V>
|
||||
BOOST_MATH_GPU_ENABLED BOOST_MATH_GPU_ENABLED inline V evaluate_even_polynomial(const std::array<T,N>& a, const V& z) BOOST_MATH_NOEXCEPT(V)
|
||||
BOOST_MATH_GPU_ENABLED inline V evaluate_even_polynomial(const std::array<T,N>& a, const V& z) BOOST_MATH_NOEXCEPT(V)
|
||||
{
|
||||
return evaluate_polynomial(a, V(z*z));
|
||||
}
|
||||
@@ -253,7 +253,7 @@ BOOST_MATH_GPU_ENABLED inline U evaluate_odd_polynomial(const T* poly, U z, boos
|
||||
}
|
||||
|
||||
template <boost::math::size_t N, class T, class V>
|
||||
BOOST_MATH_GPU_ENABLED BOOST_MATH_GPU_ENABLED inline V evaluate_odd_polynomial(const T(&a)[N], const V& z) BOOST_MATH_NOEXCEPT(V)
|
||||
BOOST_MATH_GPU_ENABLED inline V evaluate_odd_polynomial(const T(&a)[N], const V& z) BOOST_MATH_NOEXCEPT(V)
|
||||
{
|
||||
typedef boost::math::integral_constant<int, static_cast<int>(N-1)> tag_type;
|
||||
return a[0] + z * detail::evaluate_polynomial_c_imp(static_cast<const T*>(a) + 1, V(z*z), static_cast<tag_type const*>(nullptr));
|
||||
@@ -261,7 +261,7 @@ BOOST_MATH_GPU_ENABLED BOOST_MATH_GPU_ENABLED inline V evaluate_odd_polynomial(c
|
||||
|
||||
#ifndef BOOST_MATH_HAS_NVRTC
|
||||
template <boost::math::size_t N, class T, class V>
|
||||
BOOST_MATH_GPU_ENABLED BOOST_MATH_GPU_ENABLED inline V evaluate_odd_polynomial(const std::array<T,N>& a, const V& z) BOOST_MATH_NOEXCEPT(V)
|
||||
BOOST_MATH_GPU_ENABLED inline V evaluate_odd_polynomial(const std::array<T,N>& a, const V& z) BOOST_MATH_NOEXCEPT(V)
|
||||
{
|
||||
typedef boost::math::integral_constant<int, static_cast<int>(N-1)> tag_type;
|
||||
return a[0] + z * detail::evaluate_polynomial_c_imp(static_cast<const T*>(a.data()) + 1, V(z*z), static_cast<tag_type const*>(nullptr));
|
||||
@@ -274,7 +274,7 @@ BOOST_MATH_GPU_ENABLED V evaluate_rational(const T* num, const U* denom, const V
|
||||
namespace detail{
|
||||
|
||||
template <class T, class U, class V, class Tag>
|
||||
BOOST_MATH_GPU_ENABLED BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* num, const U* denom, const V& z, const Tag*) BOOST_MATH_NOEXCEPT(V)
|
||||
BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* num, const U* denom, const V& z, const Tag*) BOOST_MATH_NOEXCEPT(V)
|
||||
{
|
||||
return boost::math::tools::evaluate_rational(num, denom, z, Tag::value);
|
||||
}
|
||||
@@ -322,14 +322,14 @@ BOOST_MATH_GPU_ENABLED V evaluate_rational(const T* num, const U* denom, const V
|
||||
}
|
||||
|
||||
template <boost::math::size_t N, class T, class U, class V>
|
||||
BOOST_MATH_GPU_ENABLED BOOST_MATH_GPU_ENABLED inline V evaluate_rational(const T(&a)[N], const U(&b)[N], const V& z) BOOST_MATH_NOEXCEPT(V)
|
||||
BOOST_MATH_GPU_ENABLED inline V evaluate_rational(const T(&a)[N], const U(&b)[N], const V& z) BOOST_MATH_NOEXCEPT(V)
|
||||
{
|
||||
return detail::evaluate_rational_c_imp(a, b, z, static_cast<const boost::math::integral_constant<int, static_cast<int>(N)>*>(nullptr));
|
||||
}
|
||||
|
||||
#ifndef BOOST_MATH_HAS_NVRTC
|
||||
template <boost::math::size_t N, class T, class U, class V>
|
||||
BOOST_MATH_GPU_ENABLED BOOST_MATH_GPU_ENABLED inline V evaluate_rational(const std::array<T,N>& a, const std::array<U,N>& b, const V& z) BOOST_MATH_NOEXCEPT(V)
|
||||
BOOST_MATH_GPU_ENABLED inline V evaluate_rational(const std::array<T,N>& a, const std::array<U,N>& b, const V& z) BOOST_MATH_NOEXCEPT(V)
|
||||
{
|
||||
return detail::evaluate_rational_c_imp(a.data(), b.data(), z, static_cast<boost::math::integral_constant<int, static_cast<int>(N)>*>(nullptr));
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user