Add minimal_perfect_hash policy and test suite

Co-authored-by: jll63 <5083077+jll63@users.noreply.github.com>
This commit is contained in:
copilot-swe-agent[bot]
2025-12-18 04:43:21 +00:00
parent b420157916
commit 530de74f27
2 changed files with 542 additions and 0 deletions

View File

@@ -0,0 +1,290 @@
// Copyright (c) 2018-2025 Jean-Louis Leroy
// Distributed under the Boost Software License, Version 1.0.
// See accompanying file LICENSE_1_0.txt
// or copy at http://www.boost.org/LICENSE_1_0.txt)
#ifndef BOOST_OPENMETHOD_POLICY_MINIMAL_PERFECT_HASH_HPP
#define BOOST_OPENMETHOD_POLICY_MINIMAL_PERFECT_HASH_HPP
#include <boost/openmethod/preamble.hpp>
#include <limits>
#include <random>
#ifdef _MSC_VER
#pragma warning(push)
#pragma warning(disable : 4702) // unreachable code
#endif
namespace boost::openmethod {
namespace detail {
template<class Registry>
std::vector<type_id> minimal_perfect_hash_control;
} // namespace detail
namespace policies {
//! Hash type ids using a minimal perfect hash function.
//!
//! `minimal_perfect_hash` implements the @ref type_hash policy using a hash
//! function in the form `H(x)=(M*x)>>N`. It uses the PtHash algorithm to
//! determine values for `M` and `N` that result in a minimal perfect hash
//! function for the set of registered type_ids. This means that the hash
//! function is collision-free and the codomain is exactly the size of the
//! domain, resulting in a dense range [0, n-1] for n inputs.
struct minimal_perfect_hash : type_hash {
//! Cannot find hash factors
struct search_error : openmethod_error {
//! Number of attempts to find hash factors
std::size_t attempts;
//! Number of buckets used in the last attempt
std::size_t buckets;
//! Write a short description to an output stream
//! @param os The output stream
//! @tparam Registry The registry
//! @tparam Stream A @ref LightweightOutputStream
template<class Registry, class Stream>
auto write(Stream& os) const -> void;
};
using errors = std::variant<search_error>;
//! A TypeHashFn metafunction.
//!
//! @tparam Registry The registry containing this policy
template<class Registry>
class fn {
static std::size_t mult;
static std::size_t shift;
static std::size_t min_value;
static std::size_t max_value;
static void check(std::size_t index, type_id type);
template<class InitializeContext, class... Options>
static void initialize(
const InitializeContext& ctx, std::vector<type_id>& buckets,
const std::tuple<Options...>& options);
public:
//! Find the hash factors
//!
//! Attempts to find suitable values for the multiplication factor `M`
//! and the shift amount `N` that result in a minimal perfect hash
//! function for the specified input values.
//!
//! If no suitable values are found, calls the error handler with
//! a @ref hash_error object then calls `abort`.
//!
//! @tparam Context An @ref InitializeContext.
//! @param ctx A Context object.
//! @return A pair containing the minimum and maximum hash values.
template<class Context, class... Options>
static auto
initialize(const Context& ctx, const std::tuple<Options...>& options) {
if constexpr (Registry::has_runtime_checks) {
initialize(
ctx, detail::minimal_perfect_hash_control<Registry>, options);
} else {
std::vector<type_id> buckets;
initialize(ctx, buckets, options);
}
return std::pair{min_value, max_value};
}
//! Hash a type id
//!
//! Hash a type id.
//!
//! If `Registry` contains the @ref runtime_checks policy, checks that
//! the type id is valid, i.e. if it was present in the set passed to
//! @ref initialize. Its absence indicates that a class involved in a
//! method definition, method overrider, or method call was not
//! registered. In this case, signal a @ref missing_class using
//! the registry's @ref error_handler if present; then calls `abort`.
//!
//! @param type The type_id to hash
//! @return The hash value
BOOST_FORCEINLINE
static auto hash(type_id type) -> std::size_t {
auto index =
(mult * reinterpret_cast<detail::uintptr>(type)) >> shift;
if constexpr (Registry::has_runtime_checks) {
check(index, type);
}
return index;
}
//! Releases the memory allocated by `initialize`.
//!
//! @tparam Options... Zero or more option types, deduced from the function
//! arguments.
//! @param options Zero or more option objects.
template<class... Options>
static auto finalize(const std::tuple<Options...>&) -> void {
detail::minimal_perfect_hash_control<Registry>.clear();
}
};
};
template<class Registry>
std::size_t minimal_perfect_hash::fn<Registry>::mult;
template<class Registry>
std::size_t minimal_perfect_hash::fn<Registry>::shift;
template<class Registry>
std::size_t minimal_perfect_hash::fn<Registry>::min_value;
template<class Registry>
std::size_t minimal_perfect_hash::fn<Registry>::max_value;
template<class Registry>
template<class InitializeContext, class... Options>
void minimal_perfect_hash::fn<Registry>::initialize(
const InitializeContext& ctx, std::vector<type_id>& buckets,
const std::tuple<Options...>& options) {
(void)options;
const auto N = std::distance(ctx.classes_begin(), ctx.classes_end());
if constexpr (mp11::mp_contains<mp11::mp_list<Options...>, trace>::value) {
Registry::output::os << "Finding minimal perfect hash factors for " << N << " types\n";
}
// For minimal perfect hash, we need exactly N buckets
std::size_t hash_size = N;
if (hash_size == 0) {
min_value = 0;
max_value = 0;
shift = 0;
mult = 1;
return;
}
std::default_random_engine rnd(13081963);
std::size_t total_attempts = 0;
// Calculate M (number of bits needed to represent hash_size)
std::size_t M = 0;
for (auto size = hash_size; size > 0; size >>= 1) {
++M;
}
if (M > 0) {
M--;
}
std::uniform_int_distribution<std::size_t> uniform_dist;
// Try increasing values of M for better distribution
for (std::size_t pass = 0; pass < 4; ++pass, ++M) {
shift = 8 * sizeof(type_id) - M;
min_value = (std::numeric_limits<std::size_t>::max)();
max_value = (std::numeric_limits<std::size_t>::min)();
if constexpr (InitializeContext::template has_option<trace>) {
ctx.tr << " trying with M = " << M << ", " << hash_size
<< " buckets (minimal)\n";
}
std::size_t attempts = 0;
buckets.resize(hash_size);
while (attempts < 100000) {
std::fill(
buckets.begin(), buckets.end(), type_id(detail::uintptr_max));
++attempts;
++total_attempts;
mult = uniform_dist(rnd) | 1;
bool collision_found = false;
for (auto iter = ctx.classes_begin(); iter != ctx.classes_end();
++iter) {
for (auto type_iter = iter->type_id_begin();
type_iter != iter->type_id_end(); ++type_iter) {
auto type = *type_iter;
auto index = (detail::uintptr(type) * mult) >> shift;
// For minimal perfect hash, index must be in [0, N)
if (index >= hash_size) {
collision_found = true;
goto collision;
}
min_value = (std::min)(min_value, index);
max_value = (std::max)(max_value, index);
if (detail::uintptr(buckets[index]) !=
detail::uintptr_max) {
collision_found = true;
goto collision;
}
buckets[index] = type;
}
}
// Verify that we have a minimal perfect hash (all buckets used)
for (std::size_t i = 0; i < hash_size; ++i) {
if (detail::uintptr(buckets[i]) == detail::uintptr_max) {
collision_found = true;
goto collision;
}
}
if constexpr (InitializeContext::template has_option<trace>) {
ctx.tr << " found " << mult << " after " << total_attempts
<< " attempts; span = [" << min_value << ", "
<< max_value << "], size = " << (max_value - min_value + 1) << "\n";
}
return;
collision: {}
}
}
search_error error;
error.attempts = total_attempts;
error.buckets = hash_size;
if constexpr (Registry::has_error_handler) {
Registry::error_handler::error(error);
}
abort();
}
template<class Registry>
void minimal_perfect_hash::fn<Registry>::check(std::size_t index, type_id type) {
if (index < min_value || index > max_value ||
detail::minimal_perfect_hash_control<Registry>[index] != type) {
if constexpr (Registry::has_error_handler) {
missing_class error;
error.type = type;
Registry::error_handler::error(error);
}
abort();
}
}
template<class Registry, class Stream>
auto minimal_perfect_hash::search_error::write(Stream& os) const -> void {
os << "could not find minimal perfect hash factors after " << attempts
<< " attempts using " << buckets << " buckets\n";
}
} // namespace policies
} // namespace boost::openmethod
#endif

View File

@@ -0,0 +1,252 @@
// Copyright (c) 2018-2025 Jean-Louis Leroy
// Distributed under the Boost Software License, Version 1.0.
// See accompanying file LICENSE_1_0.txt
// or copy at http://www.boost.org/LICENSE_1_0.txt)
#include <iostream>
#include <string>
#include <set>
#define BOOST_TEST_MODULE minimal_perfect_hash
#include <boost/test/unit_test.hpp>
#include <boost/openmethod.hpp>
#include <boost/openmethod/policies/minimal_perfect_hash.hpp>
#include <boost/openmethod/policies/std_rtti.hpp>
#include <boost/openmethod/policies/vptr_vector.hpp>
#include <boost/openmethod/policies/stderr_output.hpp>
#include <boost/openmethod/policies/default_error_handler.hpp>
#include <boost/openmethod/initialize.hpp>
#include "test_util.hpp"
using namespace boost::openmethod;
using namespace boost::openmethod::policies;
// Test registry with minimal_perfect_hash
struct minimal_hash_registry
: registry<
std_rtti, vptr_vector, minimal_perfect_hash,
default_error_handler, stderr_output> {
};
// Test registry with runtime checks
struct minimal_hash_registry_with_checks
: registry<
std_rtti, vptr_vector, minimal_perfect_hash,
default_error_handler, stderr_output, runtime_checks> {
};
namespace test_basic {
struct Animal {
virtual ~Animal() {}
};
struct Dog : Animal {};
struct Cat : Animal {};
struct Bird : Animal {};
BOOST_OPENMETHOD_CLASSES(Animal, Dog, Cat, Bird, minimal_hash_registry);
BOOST_OPENMETHOD(get_sound, (virtual_<const Animal&>), std::string, minimal_hash_registry);
BOOST_OPENMETHOD_OVERRIDE(get_sound, (const Dog&), std::string) {
return "woof";
}
BOOST_OPENMETHOD_OVERRIDE(get_sound, (const Cat&), std::string) {
return "meow";
}
BOOST_OPENMETHOD_OVERRIDE(get_sound, (const Bird&), std::string) {
return "chirp";
}
BOOST_AUTO_TEST_CASE(basic_functionality) {
initialize<minimal_hash_registry>();
Dog dog;
Cat cat;
Bird bird;
BOOST_TEST(get_sound(dog) == "woof");
BOOST_TEST(get_sound(cat) == "meow");
BOOST_TEST(get_sound(bird) == "chirp");
}
} // namespace test_basic
namespace test_hash_properties {
struct Base {
virtual ~Base() {}
};
struct D1 : Base {};
struct D2 : Base {};
struct D3 : Base {};
struct D4 : Base {};
struct D5 : Base {};
BOOST_OPENMETHOD_CLASSES(Base, D1, D2, D3, D4, D5, minimal_hash_registry);
BOOST_OPENMETHOD(get_id, (virtual_<const Base&>), int, minimal_hash_registry);
BOOST_OPENMETHOD_OVERRIDE(get_id, (const D1&), int) {
return 1;
}
BOOST_OPENMETHOD_OVERRIDE(get_id, (const D2&), int) {
return 2;
}
BOOST_OPENMETHOD_OVERRIDE(get_id, (const D3&), int) {
return 3;
}
BOOST_OPENMETHOD_OVERRIDE(get_id, (const D4&), int) {
return 4;
}
BOOST_OPENMETHOD_OVERRIDE(get_id, (const D5&), int) {
return 5;
}
BOOST_AUTO_TEST_CASE(minimal_hash_properties) {
initialize<minimal_hash_registry>();
// Test that all classes are correctly hashed
D1 d1;
D2 d2;
D3 d3;
D4 d4;
D5 d5;
BOOST_TEST(get_id(d1) == 1);
BOOST_TEST(get_id(d2) == 2);
BOOST_TEST(get_id(d3) == 3);
BOOST_TEST(get_id(d4) == 4);
BOOST_TEST(get_id(d5) == 5);
// Verify that the hash function produces a minimal perfect hash
// (This is implicit - if it didn't, initialization would fail or we'd get wrong results)
}
} // namespace test_hash_properties
namespace test_with_runtime_checks {
struct Vehicle {
virtual ~Vehicle() {}
};
struct Car : Vehicle {};
struct Bike : Vehicle {};
BOOST_OPENMETHOD_CLASSES(Vehicle, Car, Bike, minimal_hash_registry_with_checks);
BOOST_OPENMETHOD(get_wheels, (virtual_<const Vehicle&>), int, minimal_hash_registry_with_checks);
BOOST_OPENMETHOD_OVERRIDE(get_wheels, (const Car&), int) {
return 4;
}
BOOST_OPENMETHOD_OVERRIDE(get_wheels, (const Bike&), int) {
return 2;
}
BOOST_AUTO_TEST_CASE(runtime_checks) {
initialize<minimal_hash_registry_with_checks>();
Car car;
Bike bike;
BOOST_TEST(get_wheels(car) == 4);
BOOST_TEST(get_wheels(bike) == 2);
}
} // namespace test_with_runtime_checks
namespace test_empty {
struct Empty {
virtual ~Empty() {}
};
BOOST_OPENMETHOD_CLASSES(Empty, minimal_hash_registry);
BOOST_OPENMETHOD(process, (virtual_<const Empty&>), int, minimal_hash_registry);
BOOST_OPENMETHOD_OVERRIDE(process, (const Empty&), int) {
return 42;
}
BOOST_AUTO_TEST_CASE(single_class) {
initialize<minimal_hash_registry>();
Empty e;
BOOST_TEST(process(e) == 42);
}
} // namespace test_empty
namespace test_large_hierarchy {
struct Root {
virtual ~Root() {}
};
struct L1_1 : Root {};
struct L1_2 : Root {};
struct L1_3 : Root {};
struct L1_4 : Root {};
struct L1_5 : Root {};
struct L1_6 : Root {};
struct L1_7 : Root {};
struct L1_8 : Root {};
struct L1_9 : Root {};
struct L1_10 : Root {};
BOOST_OPENMETHOD_CLASSES(Root, L1_1, L1_2, L1_3, L1_4, L1_5, L1_6, L1_7, L1_8, L1_9, L1_10, minimal_hash_registry);
BOOST_OPENMETHOD(classify, (virtual_<const Root&>), int, minimal_hash_registry);
BOOST_OPENMETHOD_OVERRIDE(classify, (const L1_1&), int) { return 1; }
BOOST_OPENMETHOD_OVERRIDE(classify, (const L1_2&), int) { return 2; }
BOOST_OPENMETHOD_OVERRIDE(classify, (const L1_3&), int) { return 3; }
BOOST_OPENMETHOD_OVERRIDE(classify, (const L1_4&), int) { return 4; }
BOOST_OPENMETHOD_OVERRIDE(classify, (const L1_5&), int) { return 5; }
BOOST_OPENMETHOD_OVERRIDE(classify, (const L1_6&), int) { return 6; }
BOOST_OPENMETHOD_OVERRIDE(classify, (const L1_7&), int) { return 7; }
BOOST_OPENMETHOD_OVERRIDE(classify, (const L1_8&), int) { return 8; }
BOOST_OPENMETHOD_OVERRIDE(classify, (const L1_9&), int) { return 9; }
BOOST_OPENMETHOD_OVERRIDE(classify, (const L1_10&), int) { return 10; }
BOOST_AUTO_TEST_CASE(larger_hierarchy) {
initialize<minimal_hash_registry>();
L1_1 o1;
L1_2 o2;
L1_3 o3;
L1_4 o4;
L1_5 o5;
L1_6 o6;
L1_7 o7;
L1_8 o8;
L1_9 o9;
L1_10 o10;
BOOST_TEST(classify(o1) == 1);
BOOST_TEST(classify(o2) == 2);
BOOST_TEST(classify(o3) == 3);
BOOST_TEST(classify(o4) == 4);
BOOST_TEST(classify(o5) == 5);
BOOST_TEST(classify(o6) == 6);
BOOST_TEST(classify(o7) == 7);
BOOST_TEST(classify(o8) == 8);
BOOST_TEST(classify(o9) == 9);
BOOST_TEST(classify(o10) == 10);
}
} // namespace test_large_hierarchy