Leverage core::popcount() in the implementation of count()

Reason: This significantly simplifies the code, replacing a previous
complex and error-prone implementation.
This commit is contained in:
Gennaro Prota
2025-06-22 18:51:41 +02:00
parent fffe155495
commit 5459cdf373
2 changed files with 6 additions and 208 deletions

View File

@@ -21,10 +21,6 @@
#include <cstddef>
#include <memory>
#if ((defined(BOOST_MSVC) && (BOOST_MSVC >= 1600)) || (defined(__clang__) && defined(__c2__)) || (defined(BOOST_INTEL) && defined(_MSC_VER))) && (defined(_M_IX86) || defined(_M_X64))
#include <intrin.h>
#endif
namespace boost {
namespace detail {
@@ -38,16 +34,6 @@ namespace boost {
template<class T>
BOOST_CONSTEXPR_OR_CONST T max_limit<T>::value;
// Gives (read-)access to the object representation
// of an object of type T (3.9p4). CANNOT be used
// on a base sub-object
//
template <typename T>
inline const unsigned char * object_representation (T* p)
{
return static_cast<const unsigned char *>(static_cast<const void *>(p));
}
template<typename T, int amount, int width /* = default */>
struct shifter
{
@@ -57,170 +43,11 @@ namespace boost {
}
};
// ------- count function implementation --------------
typedef unsigned char byte_type;
// These two entities
//
// enum mode { access_by_bytes, access_by_blocks };
// template <mode> struct mode_to_type {};
//
// were removed, since the regression logs (as of 24 Aug 2008)
// showed that several compilers had troubles with recognizing
//
// const mode m = access_by_bytes
//
// as a constant expression
//
// * So, we'll use bool, instead of enum *.
//
template <bool value>
struct value_to_type
{
value_to_type() {}
};
const bool access_by_bytes = true;
const bool access_by_blocks = false;
// the table: wrapped in a class template, so
// that it is only instantiated if/when needed
//
template <bool dummy_name = true>
struct count_table { static const byte_type table[]; };
template <>
struct count_table<false> { /* no table */ };
const unsigned int table_width = 8;
template <bool b>
const byte_type count_table<b>::table[] =
{
// Automatically generated by GPTableGen.exe v.1.0
//
0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
};
// Some platforms have fast popcount operation, that allow us to implement
// counting bits much more efficiently
//
template <typename ValueType>
BOOST_FORCEINLINE std::size_t popcount(ValueType value) BOOST_NOEXCEPT
{
std::size_t num = 0u;
while (value) {
num += count_table<>::table[value & ((1u<<table_width) - 1)];
value >>= table_width;
}
return num;
}
#if (((defined(BOOST_MSVC) && (BOOST_MSVC >= 1600)) || (defined(__clang__) && defined(__c2__)) || (defined(BOOST_INTEL) && defined(_MSC_VER))) && (defined(_M_IX86) || defined(_M_X64))) \
&& (defined(__POPCNT__) || defined(__AVX__))
template <>
BOOST_FORCEINLINE std::size_t popcount<unsigned short>(unsigned short value) BOOST_NOEXCEPT
{
return static_cast<std::size_t>(__popcnt16(value));
}
template <>
BOOST_FORCEINLINE std::size_t popcount<unsigned int>(unsigned int value) BOOST_NOEXCEPT
{
return static_cast<std::size_t>(__popcnt(value));
}
template <>
BOOST_FORCEINLINE std::size_t popcount<unsigned __int64>(unsigned __int64 value) BOOST_NOEXCEPT
{
#if defined(_M_X64)
return static_cast<std::size_t>(__popcnt64(value));
#else
return static_cast<std::size_t>(__popcnt(static_cast< unsigned int >(value))) + static_cast<std::size_t>(__popcnt(static_cast< unsigned int >(value >> 32)));
#endif
}
#elif defined(BOOST_GCC) || defined(__clang__) || (defined(BOOST_INTEL) && defined(__GNUC__))
// Note: gcc builtins are implemented by compiler runtime when the target CPU may not support the necessary instructions
template <>
BOOST_FORCEINLINE std::size_t popcount<unsigned short>(unsigned short value) BOOST_NOEXCEPT
{
return static_cast<unsigned int>(__builtin_popcount(static_cast<unsigned int>(value)));
}
template <>
BOOST_FORCEINLINE std::size_t popcount<unsigned int>(unsigned int value) BOOST_NOEXCEPT
{
return static_cast<unsigned int>(__builtin_popcount(value));
}
template <>
BOOST_FORCEINLINE std::size_t popcount<unsigned long>(unsigned long value) BOOST_NOEXCEPT
{
return static_cast<unsigned int>(__builtin_popcountl(value));
}
template <>
BOOST_FORCEINLINE std::size_t popcount<boost::ulong_long_type>(boost::ulong_long_type value) BOOST_NOEXCEPT
{
return static_cast<unsigned int>(__builtin_popcountll(value));
}
#endif
// overload for access by blocks
//
template <typename Iterator, typename ValueType>
inline std::size_t do_count(Iterator first, std::size_t length, ValueType,
value_to_type<access_by_blocks>*)
{
std::size_t num1 = 0u, num2 = 0u;
while (length >= 2u) {
num1 += popcount<ValueType>(*first);
++first;
num2 += popcount<ValueType>(*first);
++first;
length -= 2u;
}
if (length > 0u)
num1 += popcount<ValueType>(*first);
return num1 + num2;
}
// overload for access by bytes
//
template <typename Iterator>
inline std::size_t do_count(Iterator first, std::size_t length,
int /*dummy param*/,
value_to_type<access_by_bytes>*)
{
if (length > 0u) {
const byte_type* p = object_representation(&*first);
length *= sizeof(*first);
return do_count(p, length, static_cast<byte_type>(0u),
static_cast< value_to_type<access_by_blocks>* >(0));
}
return 0u;
}
// -------------------------------------------------------
// Some library implementations simply return a dummy
// value such as
//

View File

@@ -20,6 +20,7 @@
#define BOOST_DYNAMIC_BITSET_DYNAMIC_BITSET_HPP
#include "boost/assert.hpp"
#include "boost/core/bit.hpp"
#include "boost/core/no_exceptions_support.hpp"
#include "boost/dynamic_bitset/config.hpp"
#include "boost/dynamic_bitset/detail/dynamic_bitset.hpp"
@@ -1174,41 +1175,11 @@ template <typename Block, typename Allocator>
typename dynamic_bitset<Block, Allocator>::size_type
dynamic_bitset<Block, Allocator>::count() const BOOST_NOEXCEPT
{
using detail::dynamic_bitset_impl::table_width;
using detail::dynamic_bitset_impl::access_by_bytes;
using detail::dynamic_bitset_impl::access_by_blocks;
using detail::dynamic_bitset_impl::value_to_type;
#if BOOST_WORKAROUND(__GNUC__, == 4) && (__GNUC_MINOR__ == 3) && (__GNUC_PATCHLEVEL__ == 3)
// NOTE: Explicit qualification of "bits_per_block"
// breaks compilation on gcc 4.3.3
enum { no_padding = bits_per_block == CHAR_BIT * sizeof(Block) };
#else
// NOTE: Explicitly qualifying "bits_per_block" to workaround
// regressions of gcc 3.4.x
enum { no_padding =
dynamic_bitset<Block, Allocator>::bits_per_block
== CHAR_BIT * sizeof(Block) };
#endif
enum { enough_table_width = table_width >= CHAR_BIT };
#if ((defined(BOOST_MSVC) && (BOOST_MSVC >= 1600)) || (defined(__clang__) && defined(__c2__)) || (defined(BOOST_INTEL) && defined(_MSC_VER))) && (defined(_M_IX86) || defined(_M_X64))
// Windows popcount is effective starting from the unsigned short type
enum { uneffective_popcount = sizeof(Block) < sizeof(unsigned short) };
#elif defined(BOOST_GCC) || defined(__clang__) || (defined(BOOST_INTEL) && defined(__GNUC__))
// GCC popcount is effective starting from the unsigned int type
enum { uneffective_popcount = sizeof(Block) < sizeof(unsigned int) };
#else
enum { uneffective_popcount = true };
#endif
enum { mode = (no_padding && enough_table_width && uneffective_popcount)
? access_by_bytes
: access_by_blocks };
return do_count(m_bits.begin(), num_blocks(), Block(0),
static_cast<value_to_type<(bool)mode> *>(0));
size_type result = 0;
for (block_type block : m_bits) {
result += core::popcount(block);
}
return result;
}