super fast regular_axis and code cleanup

This commit is contained in:
Hans Dembinski
2016-07-16 22:07:24 -04:00
parent dee97bca86
commit 2e5294e1fa
5 changed files with 99 additions and 201 deletions

View File

@@ -6,7 +6,6 @@ list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR})
# setup build
option(BUILD_CHECKS "Build auxillary checks" OFF)
option(COVERAGE "Build with coverage instrumentation" OFF)
if(${CMAKE_BUILD_TYPE})
STRING(TOLOWER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE)
@@ -24,11 +23,11 @@ include_directories(../include ${Boost_INCLUDE_DIRS})
if(CMAKE_BUILD_TYPE STREQUAL "debug")
add_definitions(-O0 -g)
set(CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS} --coverage)
message(STATUS "Build type: COVERAGE [optimizations off]")
message(STATUS "Build type: DEBUG [optimizations off]")
elseif(CMAKE_BUILD_TYPE STREQUAL "cov")
add_definitions(-O0 -g)
message(STATUS "Build type: DEBUG [optimizations off]")
set(CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS} --coverage)
message(STATUS "Build type: COVERAGE [optimizations off]")
elseif(CMAKE_BUILD_TYPE STREQUAL "perf")
message(STATUS "Build type: PERF [optimizations on]")
add_definitions(-O3 -g -fno-omit-frame-pointer)

View File

@@ -9,7 +9,6 @@
#include <boost/algorithm/clamp.hpp>
#include <boost/variant.hpp>
#include <boost/scoped_array.hpp>
#include <boost/math/constants/constants.hpp>
#include <type_traits>
#include <string>
@@ -31,25 +30,28 @@ namespace histogram {
class axis_base {
public:
///Returns the number of bins.
inline int bins() const { return size_ > 0 ? size_ : -size_; }
inline int bins() const { return size_ ; }
///Returns whether overflow and underflow bins will be added in the histogram.
inline bool uoflow() const { return size_ > 0; }
inline bool uoflow() const { return uoflow_; }
///Change the label of an axis (not implemented for category_axis).
const std::string& label() const { return label_; }
///Returns the axis label, which is a name or description (not implemented for category_axis).
void label(const std::string& label) { label_ = label; }
protected:
axis_base(int, const std::string&, bool);
axis_base(unsigned, const std::string&, bool);
axis_base() : size_(0) {}
axis_base(const axis_base&);
axis_base& operator=(const axis_base&);
axis_base() = default;
axis_base(const axis_base&) = default;
axis_base(axis_base&&) = default;
axis_base& operator=(const axis_base&) = default;
axis_base& operator=(axis_base&&) = default;
bool operator==(const axis_base&) const;
private:
int size_;
bool uoflow_;
std::string label_;
template <class Archive>
@@ -86,24 +88,27 @@ public:
* \param label description of the axis
* \param uoflow add underflow and overflow bins to the histogram for this axis or not
*/
regular_axis(int n, double min, double max,
regular_axis(unsigned n, double min, double max,
const std::string& label = std::string(),
bool uoflow = true);
regular_axis() : min_(0), range_(0) {}
regular_axis(const regular_axis&);
regular_axis& operator=(const regular_axis&);
regular_axis() : min_(0), delta_(0) {}
regular_axis(const regular_axis&) = default;
regular_axis(regular_axis&&) = default;
regular_axis& operator=(const regular_axis&) = default;
regular_axis& operator=(regular_axis&&) = default;
///Returns the bin index for the passed argument.
inline int index(double x) const {
const double z = (x - min_) / range_;
return algorithm::clamp(static_cast<int>(floor(z * bins())), -1, bins());
///Returns the bin index for the passed argument (optimized code).
inline int index(double x) const
{
const double z = (x - min_) / delta_;
return z >= 0.0 ? std::min(static_cast<int>(z), bins()) : -1;
}
double operator[](int idx) const;
bool operator==(const regular_axis&) const;
private:
double min_, range_;
double min_, delta_;
template <class Archive>
friend void serialize(Archive& ar, regular_axis & axis ,unsigned version);
@@ -113,17 +118,19 @@ private:
class polar_axis: public axis_base, public real_axis<polar_axis> {
public:
/** Constructor
\param n number of bins
\param start starting phase of the angle
\param label description of the axis
*/
explicit
polar_axis(int n, double start = 0.0,
* \param n number of bins
* \param start starting phase of the angle
* \param label description of the axis
*/
explicit
polar_axis(unsigned n, double start = 0.0,
const std::string& label = std::string());
polar_axis() : start_(0) {}
polar_axis(const polar_axis&);
polar_axis& operator=(const polar_axis&);
polar_axis(const polar_axis&) = default;
polar_axis(polar_axis&&) = default;
polar_axis& operator=(const polar_axis&) = default;
polar_axis& operator=(polar_axis&&) = default;
///Returns the bin index for the passed argument.
inline int index(double x) const {
@@ -147,9 +154,9 @@ class variable_axis : public axis_base, public real_axis<variable_axis> {
public:
/** Constructor
*
* \param x An axis for real-valued data and bins of varying width. Binning is a O(log(N)) operation. If speed matters and the problem domain allows it, prefer a regular_axis.
* \param x An axis for real-valued data and bins of varying width. Binning is a O(log(N)) operation. If speed matters and the problem domain allows it, prefer a regular_axis.
* \param label description of the axis
* \param uoflow add underflow and overflow bins to the histogram for this axis or not
* \param uoflow add under-/overflow bins for this axis or not
*/
explicit
variable_axis(const std::initializer_list<double>& x,
@@ -187,7 +194,7 @@ public:
double operator[](int idx) const;
bool operator==(const variable_axis&) const;
private:
boost::scoped_array<double> x_;
std::unique_ptr<double[]> x_; // smaller size compared to std::vector
template <class Archive>
friend void serialize(Archive& ar, variable_axis & axis, unsigned version);
@@ -207,15 +214,16 @@ public:
category_axis(const std::initializer_list<std::string>& categories);
category_axis() {}
category_axis(const category_axis&);
category_axis(const category_axis&) = default;
category_axis(category_axis&&) = default;
category_axis& operator=(const category_axis&);
category_axis& operator=(const category_axis&) = default;
category_axis& operator=(category_axis&&) = default;
inline int bins() const { return categories_.size(); }
///Returns the bin index for the passed argument.
inline int index(double x) const { return static_cast<int>(x + 0.5); }
inline int index(double x) const
{ return static_cast<int>(x + 0.5); }
///Returns the category for the bin index.
const std::string& operator[](int idx) const
{ return categories_[idx]; }
@@ -247,8 +255,10 @@ public:
bool uoflow = true);
integer_axis() : min_(0) {}
integer_axis(const integer_axis&);
integer_axis& operator=(const integer_axis&);
integer_axis(const integer_axis&) = default;
integer_axis(integer_axis&&) = default;
integer_axis& operator=(const integer_axis&) = default;
integer_axis& operator=(integer_axis&&) = default;
///Returns the bin index for the passed argument.
inline int index(double x) const
@@ -274,20 +284,13 @@ namespace detail {
template <typename A>
void operator()(const A& a) {
int j = std::is_same<T, double>::value ? a.index(in) : in;
const int bins = a.bins();
const int range = bins + 2 * a.uoflow();
// the following three lines worout for any overflow setting
j += (j < 0) * (bins + 2); // wrap around if in < 0
if (j < range) {
out += j * stride;
stride *= range;
} else {
out = std::size_t(-1);
stride = 0;
}
const int shape = a.bins() + 2 * a.uoflow();
j += (j < 0) * (a.bins() + 2); // wrap around if j < 0
out += j * stride;
#pragma GCC diagnostic ignored "-Wstrict-overflow"
stride *= (j < shape) * shape; // stride == 0 indicates out-of-range
}
};
typedef linearize_t<int> linearize;
typedef linearize_t<double> linearize_x;
}

View File

@@ -10,7 +10,10 @@
#include <boost/config.hpp>
#include <boost/histogram/axis.hpp>
#include <boost/histogram/visitors.hpp>
#include <boost/histogram/storage.hpp>
#include <type_traits>
#include <array>
#include <vector>
#include <stdexcept>
#include <iterator>
#include <iostream>
@@ -21,29 +24,10 @@
namespace boost {
namespace histogram {
template <typename T>
struct static_storage_policy {
std::vector<T> data_;
typedef T value_t;
typedef T variance_t;
std::size_t size() const { return data_.size(); }
void allocate(std::size_t n) { data_.resize(n, 0); }
void increase(std::size_t i) { ++data_[i]; }
value_t value(std::size_t i) const { return data_[i]; }
variance_t variance(std::size_t i) const { return data_[i]; }
bool operator==(const static_storage_policy& other) const
{ return data_ == other.data_; }
void operator+=(const static_storage_policy& other)
{
for (std::size_t i = 0, n = size(); i < n; ++i)
data_[i] += other.data_[i];
}
};
///Use dynamic dimension
constexpr unsigned Dynamic = 0;
template <unsigned Dim, typename StoragePolicy = static_storage_policy<unsigned> >
template <unsigned Dim, typename StoragePolicy = static_storage<unsigned> >
class histogram_t : private StoragePolicy
{
public:
@@ -95,7 +79,7 @@ namespace histogram {
"number of arguments does not match histogram dimension");
detail::linearize_x lin;
fill_impl(lin, std::forward<Args>(args)...);
if (lin.out < size())
if (lin.stride)
StoragePolicy::increase(lin.out);
}
@@ -106,7 +90,7 @@ namespace histogram {
"number of arguments does not match histogram dimension");
detail::linearize lin;
index_impl(lin, std::forward<Args>(args)...);
if (lin.out >= size())
if (lin.stride == 0)
throw std::out_of_range("invalid index");
return StoragePolicy::value(lin.out);
}
@@ -118,7 +102,7 @@ namespace histogram {
"number of arguments does not match histogram dimension");
detail::linearize lin;
index_impl(lin, std::forward<Args>(args)...);
if (lin.out >= size())
if (lin.stride == 0)
throw std::out_of_range("invalid index");
return StoragePolicy::variance(lin.out);
}
@@ -158,7 +142,7 @@ namespace histogram {
}
private:
std::array<axis_t, Dim> axes_;
std::array<axis_t, Dim> axes_;
std::size_t field_count() const
{

View File

@@ -29,62 +29,27 @@ namespace {
}
}
axis_base::axis_base(int size,
axis_base::axis_base(unsigned size,
const std::string& label,
bool uoflow) :
size_(size),
uoflow_(uoflow),
label_(label)
{
if (size <= 0)
throw std::logic_error("size must be positive");
if (!uoflow) size_ = -size_;
}
axis_base::axis_base(const axis_base& o) :
size_(o.size_),
label_(o.label_)
{}
axis_base&
axis_base::operator=(const axis_base& o)
{
if (this != &o) {
size_ = o.size_;
label_ = o.label_;
}
return *this;
}
bool axis_base::operator==(const axis_base& o) const
{ return size_ == o.size_ && label_ == o.label_; }
regular_axis::regular_axis(int n, double min, double max,
regular_axis::regular_axis(unsigned n, double min, double max,
const std::string& label, bool uoflow):
axis_base(n, label, uoflow),
min_(min),
range_(max - min)
delta_((max - min) / n)
{
if (min >= max)
throw std::logic_error("regular_axis: min must be less than max");
}
regular_axis::regular_axis(const regular_axis& o) :
axis_base(o),
min_(o.min_),
range_(o.range_)
{}
regular_axis&
regular_axis::operator=(const regular_axis& o)
{
if (this != &o) {
axis_base::operator=(o);
min_ = o.min_;
range_ = o.range_;
}
return *this;
}
double
regular_axis::operator[](int idx)
const
@@ -94,7 +59,7 @@ regular_axis::operator[](int idx)
if (idx > bins())
return std::numeric_limits<double>::infinity();
const double z = double(idx) / bins();
return (1.0 - z) * min_ + z * (min_ + range_);
return (1.0 - z) * min_ + z * (min_ + delta_ * bins());
}
bool
@@ -102,30 +67,15 @@ regular_axis::operator==(const regular_axis& o) const
{
return axis_base::operator==(o) &&
min_ == o.min_ &&
range_ == o.range_;
delta_ == o.delta_;
}
polar_axis::polar_axis(int n, double start,
polar_axis::polar_axis(unsigned n, double start,
const std::string& label) :
axis_base(n, label, false),
start_(start)
{}
polar_axis::polar_axis(const polar_axis& o) :
axis_base(o),
start_(o.start_)
{}
polar_axis&
polar_axis::operator=(const polar_axis& o)
{
if (this != &o) {
axis_base::operator=(o);
start_ = o.start_;
}
return *this;
}
double
polar_axis::operator[](int idx)
const
@@ -186,18 +136,6 @@ category_axis::category_axis(const std::initializer_list<std::string>& c) :
categories_(c)
{}
category_axis::category_axis(const category_axis& o) :
categories_(o.categories_)
{}
category_axis&
category_axis::operator=(const category_axis& o)
{
if (this != &o)
categories_ = o.categories_;
return *this;
}
bool
category_axis::operator==(const category_axis& o) const
{ return categories_ == o.categories_; }
@@ -209,21 +147,6 @@ integer_axis::integer_axis(int min, int max,
min_(min)
{}
integer_axis::integer_axis(const integer_axis& a) :
axis_base(a),
min_(a.min_)
{}
integer_axis&
integer_axis::operator=(const integer_axis& o)
{
if (this != &o) {
axis_base::operator=(o);
min_ = o.min_;
}
return *this;
}
bool
integer_axis::operator==(const integer_axis& o) const
{

View File

@@ -6,54 +6,44 @@
#include <boost/histogram/histogram.hpp>
#include <boost/histogram/axis.hpp>
#include <boost/random.hpp>
#include <boost/array.hpp>
#include <random>
#include <algorithm>
#include <limits>
#include <vector>
#include <array>
#include <ctime>
#include <cstdio>
using namespace std;
using namespace boost::histogram;
template <typename D>
struct rng {
boost::random::mt19937 r;
D d;
rng(double a, double b) : d(a, b) {}
double operator()() { return d(r); }
};
vector<double> random_array(unsigned n, int type) {
using namespace boost::random;
std::vector<double> result;
switch (type) {
case 0:
std::generate_n(std::back_inserter(result), n, rng<uniform_real_distribution<> >(0.0, 1.0));
break;
case 1:
std::generate_n(std::back_inserter(result), n, rng<normal_distribution<> >(0.0, 0.3));
break;
std::vector<double> random_array(unsigned n, int type) {
std::vector<double> result(n);
std::default_random_engine gen(1);
if (type) { // type == 1
std::normal_distribution<> d(0.0, 0.3);
for (auto& x : result)
x = d(gen);
}
else { // type == 0
std::uniform_real_distribution<> d(0.0, 1.0);
for (auto& x: result)
x = d(gen);
}
return result;
}
void compare_1d(unsigned n, int distrib)
{
vector<double> r = random_array(n, distrib);
auto r = random_array(n, distrib);
double best_boost = std::numeric_limits<double>::max();
auto best_boost = std::numeric_limits<double>::max();
for (unsigned k = 0; k < 10; ++k) {
histogram h(regular_axis(100, 0, 1));
t = clock();
auto h = histogram(regular_axis(100, 0, 1));
auto t = clock();
for (unsigned i = 0; i < n; ++i)
h.fill(r[i]);
t = clock() - t;
best_boost = std::min(best_boost, double(t) / CLOCKS_PER_SEC);
// printf("root %g this %g\n", hroot.GetSum(), h.sum());
assert(hroot.GetSum() == h.sum());
}
printf("1D\n");
@@ -62,19 +52,18 @@ void compare_1d(unsigned n, int distrib)
void compare_3d(unsigned n, int distrib)
{
vector<double> r = random_array(3 * n, distrib);
auto r = random_array(3 * n, distrib);
double best_boost = std::numeric_limits<double>::max();
auto best_boost = std::numeric_limits<double>::max();
for (unsigned k = 0; k < 10; ++k) {
histogram h(regular_axis(100, 0, 1),
regular_axis(100, 0, 1),
regular_axis(100, 0, 1));
t = clock();
auto h = histogram(regular_axis(100, 0, 1),
regular_axis(100, 0, 1),
regular_axis(100, 0, 1));
auto t = clock();
for (unsigned i = 0; i < n; ++i)
h.fill(r[3 * i], r[3 * i + 1], r[3 * i + 2]);
t = clock() - t;
best_boost = std::min(best_boost, double(t) / CLOCKS_PER_SEC);
assert(hroot.GetSum() == h.sum());
}
printf("3D\n");
@@ -83,24 +72,24 @@ void compare_3d(unsigned n, int distrib)
void compare_6d(unsigned n, int distrib)
{
vector<double> r = random_array(6 * n, distrib);
auto r = random_array(6 * n, distrib);
double best_boost = std::numeric_limits<double>::max();
auto best_boost = std::numeric_limits<double>::max();
for (unsigned k = 0; k < 10; ++k) {
double x[6];
histogram h(regular_axis(10, 0, 1),
regular_axis(10, 0, 1),
regular_axis(10, 0, 1),
regular_axis(10, 0, 1),
regular_axis(10, 0, 1),
regular_axis(10, 0, 1));
auto h = histogram(regular_axis(10, 0, 1),
regular_axis(10, 0, 1),
regular_axis(10, 0, 1),
regular_axis(10, 0, 1),
regular_axis(10, 0, 1),
regular_axis(10, 0, 1));
t = clock();
auto t = clock();
for (unsigned i = 0; i < n; ++i) {
for (unsigned k = 0; k < 6; ++k)
x[k] = r[6 * i + k];
h.fill(x, x+6);
h.fill(x[0], x[1], x[2], x[3], x[4], x[5]);
}
t = clock() - t;
best_boost = std::min(best_boost, double(t) / CLOCKS_PER_SEC);