From 41a767947ad356326dfa9632db08881bc5c95bc1 Mon Sep 17 00:00:00 2001 From: Hans Dembinski Date: Sat, 18 Mar 2017 18:16:21 +0100 Subject: [PATCH] added axis iterators --- README.md | 25 +- doc/tutorial.qbk | 9 +- include/boost/histogram/axis.hpp | 280 ++++++++++++++++----- include/boost/histogram/detail/utility.hpp | 12 +- include/boost/histogram/serialization.hpp | 23 +- src/python/axis.cpp | 27 +- test/detail_test.cpp | 6 +- test/static_histogram_test.cpp | 9 +- 8 files changed, 258 insertions(+), 133 deletions(-) diff --git a/README.md b/README.md index ed69f23a..435aab11 100644 --- a/README.md +++ b/README.md @@ -59,7 +59,6 @@ Example 1: Fill a 1d-histogram in C++ ```cpp #include // proposed for inclusion in Boost #include // proposed for inclusion in Boost - #include // proposed for inclusion in Boost #include #include @@ -82,11 +81,10 @@ Example 1: Fill a 1d-histogram in C++ h.wfill(0.1, 5.0); // fill with a weighted entry, weight is 5.0 // access histogram counts, loop includes under- and overflow bin - const auto& a = h.axis<0>(); - for (int i = -1, n = bh::bins(a) + 1; i < n; ++i) { - std::cout << "bin " << i - << " x in [" << bh::left(a, i) << ", " << bh::right(a, i) << "): " - << h.value(i) << " +/- " << std::sqrt(h.variance(i)) + for (const auto& b : h.axis<0>()) { + std::cout << "bin " << b.idx + << " x in [" << b.left << ", " << b.right << "): " + << h.value(b.idx) << " +/- " << std::sqrt(h.variance(b.idx)) << std::endl; } @@ -165,20 +163,7 @@ The benchmark against ROOT is implemented in C++, the benchmark against numpy in Test system: Intel Core i7-4500U CPU clocked at 1.8 GHz, 8 GB of DDR3 RAM -``` -====================================== ======= ======= ======= ======= ======= ======= -distribution uniform normal --------------------------------------- ------------------------- ------------------------- -dimension 1D 3D 6D 1D 3D 6D -====================================== ======= ======= ======= ======= ======= ======= -No. of fills 12M 4M 2M 12M 4M 2M -C++: ROOT [t/s] 0.13 0.21 0.19 0.17 0.14 0.18 -C++: boost/static_storage [t/s] 0.07 0.14 0.15 0.09 0.13 0.17 -C++: boost/dynamic_storage [t/s] 0.12 0.10 0.09 0.13 0.12 0.12 -Py: numpy [t/s] 0.83 0.73 0.44 0.82 0.43 0.40 -Py: boost [t/s] 0.21 0.23 0.19 0.21 0.19 0.17 -====================================== ======= ======= ======= ======= ======= ======= -``` +![alt benchmark](https://github.com/hdembinski/doc/benchmark.png "") `boost::histogram` is faster than the respective ROOT histograms, while being richer in core features and easier to use. The performance of `boost::histogram` is similar in C++ and Python, showing only a small overhead in Python. It is by a factor 2-4 faster than numpy's histogram functions. diff --git a/doc/tutorial.qbk b/doc/tutorial.qbk index aea0d00e..39e8f950 100644 --- a/doc/tutorial.qbk +++ b/doc/tutorial.qbk @@ -27,11 +27,10 @@ Example 1: How to make a 1d-histogram in C++ and to fill it. h.wfill(5.0, 0.1); // fill with a weighted entry, weight is 5.0 // access histogram counts, loop includes under- and overflow bin - const auto& a = h.axis<0>(); - for (int i = -1, n = bh::bins(a) + 1; i < n; ++i) { - std::cout << "bin " << i - << " x in [" << bh::left(a, i) << ", " << bh::right(a, i) << "): " - << h.value(i) << " +/- " << std::sqrt(h.variance(i)) + for (const auto& b : h.axis<0>()) { + std::cout << "bin " << b.idx + << " x in [" << b.left << ", " << b.right << "): " + << h.value(b.idx) << " +/- " << std::sqrt(h.variance(b.idx)) << std::endl; } diff --git a/include/boost/histogram/axis.hpp b/include/boost/histogram/axis.hpp index 52763034..dc2931af 100644 --- a/include/boost/histogram/axis.hpp +++ b/include/boost/histogram/axis.hpp @@ -9,6 +9,8 @@ #include #include +#include +#include #include #include #include @@ -20,8 +22,78 @@ namespace boost { namespace histogram { -/// Common base class for most axes. -class axis_base { +namespace detail { + + template + struct bin + { + int idx; + Value value; + }; + + template <> + struct bin + { + int idx; + boost::string_ref value; + }; + + template + struct real_bin + { + int idx; + Value left, right; + }; + + template + using axis_bin = typename std::conditional< + std::is_floating_point::value, + real_bin, + bin + >::type; + + template + class axis_iterator : public iterator_facade< + axis_iterator, + const axis_bin, + random_access_traversal_tag + > + { + using bin_type = axis_bin; + + public: + explicit axis_iterator(const Axis& axis, int idx) : + axis_(axis), value_() + { value_.idx = idx; set_impl(value_); } + + private: + void increment() { ++value_.idx; set_impl(value_); } + void decrement() { --value_.idx; set_impl(value_); } + void advance(int n) { value_.idx += n; set_impl(value_); } + int distance_to(const axis_iterator& other) const + { return other.value_.idx - value_.idx; } + bool equal(const axis_iterator& other) const + { return value_.idx == other.value_.idx; } + const bin_type& dereference() const { return value_; } + template + void set_impl(bin& v) + { v.value = axis_[v.idx]; } + template + void set_impl(real_bin& v) + { v.left = axis_[v.idx]; v.right = axis_[v.idx + 1]; } + const Axis& axis_; + bin_type value_; + friend class boost::iterator_core_access; + }; +} // NS detail + +/// Common base class for axes. +template +class axis_base; + +template <> +class axis_base +{ public: /// Returns the number of bins, excluding overflow/underflow. inline int bins() const { return size_ ; } @@ -45,34 +117,82 @@ protected: axis_base() = default; axis_base(const axis_base&) = default; axis_base& operator=(const axis_base&) = default; - axis_base(axis_base&&) = default; - axis_base& operator=(axis_base&&) = default; + axis_base(axis_base&& other) : + size_(other.size_), + shape_(other.shape_), + label_(std::move(other.label_)) + { other.size_ = 0; other.shape_ = 0; } + axis_base& operator=(axis_base&& other) { + if (this != &other) { + size_ = other.size_; + shape_ = other.shape_; + label_ = std::move(other.label_); + other.size_ = 0; + other.shape_ = 0; + } + return *this; + } bool operator==(const axis_base& o) const { return size_ == o.size_ && shape_ == o.shape_ && label_ == o.label_; } private: - int size_; - int shape_; + int size_ = 0; + int shape_ = 0; std::string label_; template - friend void serialize(Archive&, axis_base&, unsigned); + friend void serialize(Archive&, axis_base&, unsigned); }; -/// Mixin for real-valued axes. -template -class real_axis { +template <> +class axis_base +{ public: - /// Lower edge of the bin (left side). - RealType left(int idx) const { - return static_cast(*this)[idx]; + /// Returns the number of bins, excluding overflow/underflow. + inline int bins() const { return size_ ; } + /// Returns the number of bins, including overflow/underflow. + inline int shape() const { return size_; } + /// Returns whether axis has extra overflow and underflow bins. + inline bool uoflow() const { return false; } + /// Returns the axis label, which is a name or description (not implemented for category_axis). + const std::string& label() const { return label_; } + /// Change the label of an axis (not implemented for category_axis). + void label(const std::string& label) { label_ = label; } + +protected: + axis_base(unsigned n, const std::string& label) : + size_(n), label_(label) + { + if (n == 0) + throw std::logic_error("bins > 0 required"); } - /// Upper edge of the bin (right side). - RealType right(int idx) const { - return static_cast(*this)[idx + 1]; + axis_base() = default; + axis_base(const axis_base&) = default; + axis_base& operator=(const axis_base&) = default; + axis_base(axis_base&& other) : + size_(other.size_), + label_(std::move(other.label_)) + { other.size_ = 0; } + axis_base& operator=(axis_base&& other) { + if (this != &other) { + size_ = other.size_; + label_ = std::move(other.label_); + other.size_ = 0; + } + return *this; } + + bool operator==(const axis_base& other) const + { return size_ == other.size_ && label_ == other.label_; } + +private: + int size_ = 0; + std::string label_; + + template + friend void serialize(Archive&, axis_base&, unsigned); }; /** Axis for binning real-valued data into equidistant bins. @@ -81,10 +201,10 @@ public: * Very fast. Binning is a O(1) operation. */ template -class regular_axis: public axis_base, - public real_axis> { +class regular_axis: public axis_base { public: using value_type = RealType; + using const_iterator = detail::axis_iterator; /** Construct axis with n bins over range [min, max). * @@ -97,7 +217,7 @@ public: regular_axis(unsigned n, value_type min, value_type max, const std::string& label = std::string(), bool uoflow = true) : - axis_base(n, label, uoflow), + axis_base(n, label, uoflow), min_(min), delta_((max - min) / n) { @@ -132,11 +252,17 @@ public: bool operator==(const regular_axis& o) const { - return axis_base::operator==(o) && + return axis_base::operator==(o) && min_ == o.min_ && delta_ == o.delta_; } + const_iterator begin() const + { return const_iterator(*this, uoflow() ? -1 : 0); } + + const_iterator end() const + { return const_iterator(*this, uoflow() ? bins() + 1 : bins()); } + private: value_type min_ = 0.0, delta_ = 1.0; @@ -152,10 +278,10 @@ private: * Binning is a O(1) operation. */ template -class circular_axis: public axis_base, - public real_axis> { +class circular_axis: public axis_base { public: using value_type = RealType; + using const_iterator = detail::axis_iterator; /** Constructor for n bins with an optional offset. * @@ -168,7 +294,7 @@ public: circular_axis(unsigned n, value_type phase = 0.0, value_type perimeter = math::double_constants::two_pi, const std::string& label = std::string()) : - axis_base(n, label, false), + axis_base(n, label), phase_(phase), perimeter_(perimeter) {} @@ -194,7 +320,7 @@ public: bool operator==(const circular_axis& o) const { - return axis_base::operator==(o) && + return axis_base::operator==(o) && phase_ == o.phase_ && perimeter_ == o.perimeter_; } @@ -202,6 +328,13 @@ public: value_type perimeter() const { return perimeter_; } value_type phase() const { return phase_; } + const_iterator begin() const + { return const_iterator(*this, 0); } + + const_iterator end() const + { return const_iterator(*this, bins()); } + + private: value_type phase_ = 0.0, perimeter_ = 1.0; @@ -215,10 +348,10 @@ private: * and the problem domain allows it, prefer a regular_axis. */ template -class variable_axis : public axis_base, - public real_axis> { +class variable_axis : public axis_base { public: using value_type = RealType; + using const_iterator = detail::axis_iterator; /** Construct an axis from bin edges. * @@ -230,7 +363,7 @@ public: variable_axis(const std::initializer_list& x, const std::string& label = std::string(), bool uoflow = true) : - axis_base(x.size() - 1, label, uoflow), + axis_base(x.size() - 1, label, uoflow), x_(new value_type[x.size()]) { if (x.size() < 2) @@ -242,7 +375,7 @@ public: variable_axis(const std::vector& x, const std::string& label = std::string(), bool uoflow = true) : - axis_base(x.size() - 1, label, uoflow), + axis_base(x.size() - 1, label, uoflow), x_(new value_type[x.size()]) { std::copy(x.begin(), x.end(), x_.get()); @@ -253,7 +386,7 @@ public: variable_axis(Iterator begin, Iterator end, const std::string& label = std::string(), bool uoflow = true) : - axis_base(std::distance(begin, end) - 1, label, uoflow), + axis_base(std::distance(begin, end) - 1, label, uoflow), x_(new value_type[std::distance(begin, end)]) { std::copy(begin, end, x_.get()); @@ -262,7 +395,7 @@ public: variable_axis() = default; variable_axis(const variable_axis& o) : - axis_base(o), + axis_base(o), x_(new value_type[bins() + 1]) { std::copy(o.x_.get(), o.x_.get() + bins() + 1, x_.get()); @@ -270,7 +403,7 @@ public: variable_axis& operator=(const variable_axis& o) { if (this != &o) { - axis_base::operator=(o); + axis_base::operator=(o); x_.reset(new value_type[bins() + 1]); std::copy(o.x_.get(), o.x_.get() + bins() + 1, x_.get()); } @@ -297,11 +430,20 @@ public: bool operator==(const variable_axis& o) const { - if (!axis_base::operator==(o)) + if (!axis_base::operator==(o)) return false; return std::equal(x_.get(), x_.get() + bins() + 1, o.x_.get()); } + const_iterator begin() const + { return const_iterator(*this, uoflow() ? -1 : 0); } + + const_iterator end() const + { return const_iterator(*this, uoflow() ? bins() + 1 : bins()); } + + value_type left(int idx) const { return operator[](idx); } + value_type right(int idx) const { return operator[](idx+1); } + private: std::unique_ptr x_; // smaller size compared to std::vector @@ -311,12 +453,13 @@ private: /** An axis for a contiguous range of integers. * - * There are no underflow/overflow bins for this axis. - * Binning is a O(1) operation. + * Binning is a O(1) operation. This axis operates + * faster than a regular_axis. */ -class integer_axis: public axis_base { +class integer_axis: public axis_base { public: using value_type = int; + using const_iterator = detail::axis_iterator; /** Construct axis over integer range [min, max]. * @@ -326,7 +469,7 @@ public: integer_axis(value_type min, value_type max, const std::string& label = std::string(), bool uoflow = true) : - axis_base(max + 1 - min, label, uoflow), + axis_base(max + 1 - min, label, uoflow), min_(min) { if (min > max) @@ -351,9 +494,15 @@ public: bool operator==(const integer_axis& o) const { - return axis_base::operator==(o) && min_ == o.min_; + return axis_base::operator==(o) && min_ == o.min_; } + const_iterator begin() const + { return const_iterator(*this, uoflow() ? -1 : 0); } + + const_iterator end() const + { return const_iterator(*this, uoflow() ? bins() + 1 : bins()); } + private: value_type min_ = 0; @@ -368,16 +517,18 @@ private: * There are no underflow/overflow bins for this axis. * Binning is a O(1) operation. */ -class category_axis { +class category_axis : public axis_base { public: using value_type = const std::string&; + using const_iterator = detail::axis_iterator; template - category_axis(Iterator begin, Iterator end) : - size_(std::distance(begin, end)), - ptr_(new std::string[size_]) + category_axis(Iterator begin, Iterator end, + const std::string& label = std::string()) : + axis_base(std::distance(begin, end), label), + ptr_(new std::string[bins()]) { - if (size_ == 0) + if (bins() == 0) throw std::logic_error("at least one argument required"); std::copy(begin, end, ptr_.get()); } @@ -387,54 +538,50 @@ public: * \param categories sequence of labeled categories. */ explicit - category_axis(const std::initializer_list& categories) : - category_axis(categories.begin(), categories.end()) + category_axis(const std::initializer_list& categories, + const std::string& label = std::string()) : + category_axis(categories.begin(), categories.end(), label) {} explicit - category_axis(const std::vector& categories) : - category_axis(categories.begin(), categories.end()) + category_axis(const std::vector& categories, + const std::string& label = std::string()) : + category_axis(categories.begin(), categories.end(), label) {} category_axis() = default; category_axis(const category_axis& other) : category_axis(other.ptr_.get(), - other.ptr_.get() + other.size_) + other.ptr_.get() + other.bins(), + other.label()) {} category_axis& operator=(const category_axis& other) { if (this != &other) { - size_ = other.size_; - ptr_.reset(new std::string[other.size_]); - std::copy(other.ptr_.get(), other.ptr_.get() + other.size_, ptr_.get()); + axis_base::operator=(other); + ptr_.reset(new std::string[other.bins()]); + std::copy(other.ptr_.get(), other.ptr_.get() + other.bins(), ptr_.get()); } return *this; } category_axis(category_axis&& other) : - size_(other.size_), + axis_base(std::move(other)), ptr_(std::move(other.ptr_)) - { - other.size_ = 0; - } + {} category_axis& operator=(category_axis&& other) { if (this != &other) { - size_ = other.size_; + axis_base::operator=(std::move(other)); ptr_ = std::move(other.ptr_); - other.size_ = 0; } return *this; } - inline int bins() const { return size_; } - inline int shape() const { return size_; } - inline bool uoflow() const { return false; } - /// Returns the bin index for the passed argument. /// Performs a range check. inline int index(int x) const - { if (!(0 <= x && x < size_)) + { if (!(0 <= x && x < bins())) throw std::out_of_range("category index is out of range"); return x; } @@ -444,12 +591,17 @@ public: bool operator==(const category_axis& other) const { - return size_ == other.size_ && - std::equal(ptr_.get(), ptr_.get() + size_, other.ptr_.get()); + return axis_base::operator==(other) && + std::equal(ptr_.get(), ptr_.get() + bins(), other.ptr_.get()); } + const_iterator begin() const + { return const_iterator(*this, 0); } + + const_iterator end() const + { return const_iterator(*this, bins()); } + private: - int size_ = 0; std::unique_ptr ptr_; template diff --git a/include/boost/histogram/detail/utility.hpp b/include/boost/histogram/detail/utility.hpp index d3fa41a7..145d1fe7 100644 --- a/include/boost/histogram/detail/utility.hpp +++ b/include/boost/histogram/detail/utility.hpp @@ -13,11 +13,12 @@ namespace boost { namespace histogram { namespace detail { + template inline - void escape(std::ostream& os, const char* s) { + void escape(std::ostream& os, const String& s) { os << '\''; - for (const char* sit = s; *sit; ++sit) { - if (*sit == '\'' && (sit == s || *(sit-1) != '\\')) + for (auto sit = s.begin(); sit != s.end(); ++sit) { + if (*sit == '\'' && (sit == s.begin() || *(sit-1) != '\\')) os << "\\\'"; else os << *sit; @@ -25,11 +26,6 @@ namespace detail { os << '\''; } - inline - void escape(std::ostream& os, const std::string& s) { - escape(os, s.c_str()); - } - inline bool empty(const std::string& s) { return s.empty(); } } diff --git a/include/boost/histogram/serialization.hpp b/include/boost/histogram/serialization.hpp index 31f9c95d..3d2223f5 100644 --- a/include/boost/histogram/serialization.hpp +++ b/include/boost/histogram/serialization.hpp @@ -76,7 +76,14 @@ void adaptive_storage::serialize(Archive& ar, unsigned /* version */) } template -inline void serialize(Archive& ar, axis_base & base, unsigned /* version */) +inline void serialize(Archive& ar, axis_base & base, unsigned /* version */) +{ + ar & base.size_; + ar & base.label_; +} + +template +inline void serialize(Archive& ar, axis_base & base, unsigned /* version */) { ar & base.size_; ar & base.shape_; @@ -86,7 +93,7 @@ inline void serialize(Archive& ar, axis_base & base, unsigned /* version */) template inline void serialize(Archive& ar, regular_axis & axis, unsigned /* version */) { - ar & boost::serialization::base_object(axis); + ar & boost::serialization::base_object>(axis); ar & axis.min_; ar & axis.delta_; } @@ -94,7 +101,7 @@ inline void serialize(Archive& ar, regular_axis & axis, unsigned /* ve template inline void serialize(Archive& ar, circular_axis & axis, unsigned /* version */) { - ar & boost::serialization::base_object(axis); + ar & boost::serialization::base_object>(axis); ar & axis.phase_; ar & axis.perimeter_; } @@ -102,7 +109,7 @@ inline void serialize(Archive& ar, circular_axis & axis, unsigned /* v template inline void serialize(Archive& ar, variable_axis & axis, unsigned /* version */) { - ar & boost::serialization::base_object(axis); + ar & boost::serialization::base_object>(axis); if (Archive::is_loading::value) axis.x_.reset(new RealType[axis.bins() + 1]); ar & boost::serialization::make_array(axis.x_.get(), axis.bins() + 1); @@ -111,18 +118,18 @@ inline void serialize(Archive& ar, variable_axis & axis, unsigned /* v template inline void serialize(Archive& ar, integer_axis & axis, unsigned /* version */) { - ar & boost::serialization::base_object(axis); + ar & boost::serialization::base_object>(axis); ar & axis.min_; } template inline void serialize(Archive& ar, category_axis & axis, unsigned /* version */) { - ar & axis.size_; + ar & boost::serialization::base_object>(axis); if (Archive::is_loading::value) { - axis.ptr_.reset(new std::string[axis.size_]); + axis.ptr_.reset(new std::string[axis.bins()]); } - ar & boost::serialization::make_array(axis.ptr_.get(), axis.size_); + ar & boost::serialization::make_array(axis.ptr_.get(), axis.bins()); } namespace { diff --git a/src/python/axis.cpp b/src/python/axis.cpp index 2c0d2f74..55c5e881 100644 --- a/src/python/axis.cpp +++ b/src/python/axis.cpp @@ -130,30 +130,17 @@ axis_repr(const T& t) { template struct axis_suite : public python::def_visitor > { - - template - static - typename std::enable_if::value>::type - label(Class& cl) { - cl.add_property("label", - make_function((const std::string&(U::*)() const) &U::label, - python::return_value_policy()), - (void(U::*)(const std::string&)) &U::label, - "Name or description for the axis."); - } - - template - static - typename std::enable_if::value>::type - label(Class& cl) {} - template static void visit(Class& cl) { - label(cl); - cl.add_property("bins", &T::bins); - cl.add_property("shape", &T::shape); + cl.add_property("bins", &T::bins, "Number of bins."); + cl.add_property("shape", &T::shape, "Number of bins, including possible over- and underflow bins."); + cl.add_property("label", + make_function((const std::string&(T::*)() const) &T::label, + python::return_value_policy()), + (void(T::*)(const std::string&)) &T::label, + "Name or description for the axis."); cl.def("index", &T::index, ":param float x: value" "\n:returns: bin index for the passed value", diff --git a/test/detail_test.cpp b/test/detail_test.cpp index ec7d8cb5..facafb28 100644 --- a/test/detail_test.cpp +++ b/test/detail_test.cpp @@ -32,21 +32,21 @@ int main () // escape0 { std::ostringstream os; - escape(os, "abc"); + escape(os, std::string("abc")); BOOST_TEST_EQ(os.str(), std::string("'abc'")); } // escape1 { std::ostringstream os; - escape(os, "abc\n"); + escape(os, std::string("abc\n")); BOOST_TEST_EQ(os.str(), std::string("'abc\n'")); } // escape2 { std::ostringstream os; - escape(os, "'abc'"); + escape(os, std::string("'abc'")); BOOST_TEST_EQ(os.str(), std::string("'\\\'abc\\\''")); } diff --git a/test/static_histogram_test.cpp b/test/static_histogram_test.cpp index 0252d229..507f25e4 100644 --- a/test/static_histogram_test.cpp +++ b/test/static_histogram_test.cpp @@ -490,11 +490,10 @@ int main() { std::ostringstream os1; // access histogram counts - const auto& a = h.axis<0>(); - for (int i = -1, n = bins(a) + 1; i < n; ++i) { - os1 << "bin " << i - << " x in [" << left(a, i) << ", " << right(a ,i) << "): " - << h.value(i) << " +/- " << std::sqrt(h.variance(i)) + for (const auto& b : h.axis<0>()) { + os1 << "bin " << b.idx + << " x in [" << b.left << ", " << b.right << "): " + << h.value(b.idx) << " +/- " << std::sqrt(h.variance(b.idx)) << "\n"; }