mirror of
https://github.com/boostorg/graph.git
synced 2026-02-01 20:42:11 +00:00
Merged in r55156 from trunk; fixes to ER generator for large-scale graphs
[SVN r55157]
This commit is contained in:
@@ -17,14 +17,23 @@
|
||||
#include <boost/random/uniform_int.hpp>
|
||||
#include <boost/graph/graph_traits.hpp>
|
||||
#include <boost/random/geometric_distribution.hpp>
|
||||
#include <boost/type_traits/is_base_and_derived.hpp>
|
||||
#include <boost/type_traits/is_base_of.hpp>
|
||||
#include <boost/type_traits/is_same.hpp>
|
||||
#include <boost/config/no_tr1/cmath.hpp>
|
||||
#include <boost/iterator/iterator_facade.hpp>
|
||||
|
||||
namespace boost {
|
||||
|
||||
template<typename RandomGenerator, typename Graph>
|
||||
class erdos_renyi_iterator
|
||||
: public iterator_facade<
|
||||
erdos_renyi_iterator<RandomGenerator, Graph>,
|
||||
std::pair<typename graph_traits<Graph>::vertices_size_type,
|
||||
typename graph_traits<Graph>::vertices_size_type>,
|
||||
std::input_iterator_tag,
|
||||
const
|
||||
std::pair<typename graph_traits<Graph>::vertices_size_type,
|
||||
typename graph_traits<Graph>::vertices_size_type>&>
|
||||
{
|
||||
typedef typename graph_traits<Graph>::directed_category directed_category;
|
||||
typedef typename graph_traits<Graph>::vertices_size_type vertices_size_type;
|
||||
@@ -32,17 +41,9 @@ namespace boost {
|
||||
|
||||
BOOST_STATIC_CONSTANT
|
||||
(bool,
|
||||
is_undirected = (is_base_and_derived<undirected_tag,
|
||||
directed_category>::value
|
||||
|| is_same<undirected_tag, directed_category>::value));
|
||||
is_undirected = (is_base_of<undirected_tag, directed_category>::value));
|
||||
|
||||
public:
|
||||
typedef std::input_iterator_tag iterator_category;
|
||||
typedef std::pair<vertices_size_type, vertices_size_type> value_type;
|
||||
typedef const value_type& reference;
|
||||
typedef const value_type* pointer;
|
||||
typedef void difference_type;
|
||||
|
||||
erdos_renyi_iterator() : gen(), n(0), edges(0), allow_self_loops(false) {}
|
||||
erdos_renyi_iterator(RandomGenerator& gen, vertices_size_type n,
|
||||
double fraction = 0.0, bool allow_self_loops = false)
|
||||
@@ -61,29 +62,17 @@ namespace boost {
|
||||
next();
|
||||
}
|
||||
|
||||
reference operator*() const { return current; }
|
||||
pointer operator->() const { return ¤t; }
|
||||
const std::pair<vertices_size_type, vertices_size_type>&
|
||||
dereference() const { return current; }
|
||||
|
||||
erdos_renyi_iterator& operator++()
|
||||
{
|
||||
void increment() {
|
||||
--edges;
|
||||
next();
|
||||
return *this;
|
||||
}
|
||||
|
||||
erdos_renyi_iterator operator++(int)
|
||||
{
|
||||
erdos_renyi_iterator temp(*this);
|
||||
++(*this);
|
||||
return temp;
|
||||
}
|
||||
|
||||
bool operator==(const erdos_renyi_iterator& other) const
|
||||
bool equal(const erdos_renyi_iterator& other) const
|
||||
{ return edges == other.edges; }
|
||||
|
||||
bool operator!=(const erdos_renyi_iterator& other) const
|
||||
{ return !(*this == other); }
|
||||
|
||||
private:
|
||||
void next()
|
||||
{
|
||||
@@ -98,11 +87,19 @@ namespace boost {
|
||||
vertices_size_type n;
|
||||
edges_size_type edges;
|
||||
bool allow_self_loops;
|
||||
value_type current;
|
||||
std::pair<vertices_size_type, vertices_size_type> current;
|
||||
};
|
||||
|
||||
template<typename RandomGenerator, typename Graph>
|
||||
class sorted_erdos_renyi_iterator
|
||||
: public iterator_facade<
|
||||
sorted_erdos_renyi_iterator<RandomGenerator, Graph>,
|
||||
std::pair<typename graph_traits<Graph>::vertices_size_type,
|
||||
typename graph_traits<Graph>::vertices_size_type>,
|
||||
std::input_iterator_tag,
|
||||
const
|
||||
std::pair<typename graph_traits<Graph>::vertices_size_type,
|
||||
typename graph_traits<Graph>::vertices_size_type>&>
|
||||
{
|
||||
typedef typename graph_traits<Graph>::directed_category directed_category;
|
||||
typedef typename graph_traits<Graph>::vertices_size_type vertices_size_type;
|
||||
@@ -110,116 +107,86 @@ namespace boost {
|
||||
|
||||
BOOST_STATIC_CONSTANT
|
||||
(bool,
|
||||
is_undirected = (is_base_and_derived<undirected_tag,
|
||||
directed_category>::value
|
||||
|| is_same<undirected_tag, directed_category>::value));
|
||||
is_undirected = (is_base_of<undirected_tag, directed_category>::value));
|
||||
|
||||
public:
|
||||
typedef std::input_iterator_tag iterator_category;
|
||||
typedef std::pair<vertices_size_type, vertices_size_type> value_type;
|
||||
typedef const value_type& reference;
|
||||
typedef const value_type* pointer;
|
||||
typedef void difference_type;
|
||||
|
||||
sorted_erdos_renyi_iterator()
|
||||
: gen(), rand_vertex(0.5), n(0), allow_self_loops(false),
|
||||
src((std::numeric_limits<vertices_size_type>::max)()), tgt(0), prob(0) {}
|
||||
sorted_erdos_renyi_iterator(RandomGenerator& gen, vertices_size_type n,
|
||||
double prob = 0.0,
|
||||
bool allow_self_loops = false)
|
||||
: gen(),
|
||||
// The "1.0 - prob" in the next line is to work around a Boost.Random
|
||||
// (and TR1) bug in the specification of geometric_distribution. It
|
||||
// should be replaced by "prob" when the issue is fixed.
|
||||
rand_vertex(1.0 - prob),
|
||||
n(n), allow_self_loops(allow_self_loops), src(0), tgt(0), prob(prob)
|
||||
{
|
||||
: gen(), rand_vertex(0.5), n(0), allow_self_loops(false)
|
||||
, src((std::numeric_limits<vertices_size_type>::max)()),
|
||||
tgt_index(vertices_size_type(-1)), prob(.5)
|
||||
{ }
|
||||
|
||||
// NOTE: The default probability has been changed to be the same as that
|
||||
// used by the geometic distribution. It was previously 0.0, which would
|
||||
// cause an assertion.
|
||||
sorted_erdos_renyi_iterator(RandomGenerator& gen, vertices_size_type n,
|
||||
double prob = 0.5,
|
||||
bool loops = false)
|
||||
: gen(), rand_vertex(1. - prob), n(n), allow_self_loops(loops), src(0)
|
||||
, tgt_index(vertices_size_type(-1)), prob(prob)
|
||||
{
|
||||
this->gen.reset(new uniform_01<RandomGenerator>(gen));
|
||||
|
||||
if (prob == 0.0) {src = (std::numeric_limits<vertices_size_type>::max)(); return;}
|
||||
next();
|
||||
}
|
||||
|
||||
reference operator*() const { return current; }
|
||||
pointer operator->() const { return ¤t; }
|
||||
const std::pair<vertices_size_type, vertices_size_type>&
|
||||
dereference() const {
|
||||
return current;
|
||||
}
|
||||
|
||||
sorted_erdos_renyi_iterator& operator++()
|
||||
{
|
||||
bool equal(const sorted_erdos_renyi_iterator& o) const {
|
||||
return src == o.src && tgt_index == o.tgt_index;
|
||||
}
|
||||
|
||||
void increment() {
|
||||
next();
|
||||
return *this;
|
||||
}
|
||||
|
||||
sorted_erdos_renyi_iterator operator++(int)
|
||||
{
|
||||
sorted_erdos_renyi_iterator temp(*this);
|
||||
++(*this);
|
||||
return temp;
|
||||
}
|
||||
|
||||
bool operator==(const sorted_erdos_renyi_iterator& other) const
|
||||
{ return src == other.src && tgt == other.tgt; }
|
||||
|
||||
bool operator!=(const sorted_erdos_renyi_iterator& other) const
|
||||
{ return !(*this == other); }
|
||||
|
||||
private:
|
||||
void next()
|
||||
{
|
||||
using std::sqrt;
|
||||
using std::floor;
|
||||
|
||||
// In order to get the edges from the generator in sorted order, one
|
||||
// effective (but slow) procedure would be to use a
|
||||
// bernoulli_distribution for each legal (src, tgt) pair. Because of the
|
||||
// O(n^2) cost of that, a geometric distribution is used. The geometric
|
||||
// distribution tells how many times the bernoulli_distribution would
|
||||
// need to be run until it returns true. Thus, this distribution can be
|
||||
// used to step through the edges which are actually present. Everything
|
||||
// beyond "tgt += increment" is done to effectively convert linear
|
||||
// indexing (the partial sums of the geometric distribution output) into
|
||||
// graph edges.
|
||||
assert (src != (std::numeric_limits<vertices_size_type>::max)());
|
||||
vertices_size_type increment = rand_vertex(*gen);
|
||||
tgt += increment;
|
||||
if (is_undirected) {
|
||||
// Update src and tgt based on position of tgt
|
||||
// Basically, we want the greatest src_increment such that (in \bbQ):
|
||||
// src_increment * (src + allow_self_loops + src_increment - 1/2) <= tgt
|
||||
// The result of the LHS of this, evaluated with the computed
|
||||
// src_increment, is then subtracted from tgt
|
||||
double src_minus_half = (src + allow_self_loops) - 0.5;
|
||||
double disc = src_minus_half * src_minus_half + 2 * tgt;
|
||||
double src_increment_fp = floor(sqrt(disc) - src_minus_half);
|
||||
vertices_size_type src_increment = vertices_size_type(src_increment_fp);
|
||||
if (src + src_increment >= n) {
|
||||
src = n;
|
||||
// bernoulli_distribution for each legal (src, tgt_index) pair. Because of
|
||||
// the O(|V|^2) cost of that, a geometric distribution is used. The
|
||||
// geometric distribution tells how many times the
|
||||
// bernoulli_distribution would need to be run until it returns true.
|
||||
// Thus, this distribution can be used to step through the edges
|
||||
// which are actually present.
|
||||
assert (src != (std::numeric_limits<vertices_size_type>::max)() &&
|
||||
src != n);
|
||||
while (src != n) {
|
||||
vertices_size_type increment = rand_vertex(*gen);
|
||||
size_t tgt_index_limit =
|
||||
(is_undirected ? src + 1 : n) +
|
||||
(allow_self_loops ? 0 : -1);
|
||||
if (tgt_index + increment >= tgt_index_limit) {
|
||||
// Overflowed this source; go to the next one and try again.
|
||||
++src;
|
||||
// This bias is because the geometric distribution always returns
|
||||
// values >=1, and we want to allow 0 as a valid target.
|
||||
tgt_index = vertices_size_type(-1);
|
||||
continue;
|
||||
} else {
|
||||
tgt -= (src + allow_self_loops) * src_increment +
|
||||
src_increment * (src_increment - 1) / 2;
|
||||
src += src_increment;
|
||||
tgt_index += increment;
|
||||
current.first = src;
|
||||
current.second =
|
||||
tgt_index +
|
||||
(!allow_self_loops && !is_undirected && tgt_index >= src ? 1 : 0);
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
// Number of out edge positions possible from each vertex in this graph
|
||||
vertices_size_type possible_out_edges = n - (allow_self_loops ? 0 : 1);
|
||||
src += (std::min)(n - src, tgt / possible_out_edges);
|
||||
tgt %= possible_out_edges;
|
||||
}
|
||||
// Set end of graph code so (src, tgt) will be the same as for the end
|
||||
// sorted_erdos_renyi_iterator
|
||||
if (src >= n) {src = (std::numeric_limits<vertices_size_type>::max)(); tgt = 0;}
|
||||
// Copy (src, tgt) into current
|
||||
current.first = src;
|
||||
current.second = tgt;
|
||||
// Adjust for (src, src) edge being forbidden
|
||||
if (!allow_self_loops && tgt >= src) ++current.second;
|
||||
if (src == n) src = (std::numeric_limits<vertices_size_type>::max)();
|
||||
}
|
||||
|
||||
shared_ptr<uniform_01<RandomGenerator> > gen;
|
||||
geometric_distribution<vertices_size_type> rand_vertex;
|
||||
vertices_size_type n;
|
||||
bool allow_self_loops;
|
||||
vertices_size_type src, tgt;
|
||||
value_type current;
|
||||
vertices_size_type src, tgt_index;
|
||||
std::pair<vertices_size_type, vertices_size_type> current;
|
||||
double prob;
|
||||
};
|
||||
|
||||
|
||||
Reference in New Issue
Block a user