From 1acdecb32fcfeec2d3d225b08f4e35fbe19d2c36 Mon Sep 17 00:00:00 2001 From: Jeremiah Willcock Date: Sat, 25 Jul 2009 23:30:30 +0000 Subject: [PATCH] Merged in r55156 from trunk; fixes to ER generator for large-scale graphs [SVN r55157] --- include/boost/graph/erdos_renyi_generator.hpp | 187 ++++++++---------- 1 file changed, 77 insertions(+), 110 deletions(-) diff --git a/include/boost/graph/erdos_renyi_generator.hpp b/include/boost/graph/erdos_renyi_generator.hpp index 0352284f..0d904149 100644 --- a/include/boost/graph/erdos_renyi_generator.hpp +++ b/include/boost/graph/erdos_renyi_generator.hpp @@ -17,14 +17,23 @@ #include #include #include -#include +#include #include #include +#include namespace boost { template class erdos_renyi_iterator + : public iterator_facade< + erdos_renyi_iterator, + std::pair::vertices_size_type, + typename graph_traits::vertices_size_type>, + std::input_iterator_tag, + const + std::pair::vertices_size_type, + typename graph_traits::vertices_size_type>&> { typedef typename graph_traits::directed_category directed_category; typedef typename graph_traits::vertices_size_type vertices_size_type; @@ -32,17 +41,9 @@ namespace boost { BOOST_STATIC_CONSTANT (bool, - is_undirected = (is_base_and_derived::value - || is_same::value)); + is_undirected = (is_base_of::value)); public: - typedef std::input_iterator_tag iterator_category; - typedef std::pair value_type; - typedef const value_type& reference; - typedef const value_type* pointer; - typedef void difference_type; - erdos_renyi_iterator() : gen(), n(0), edges(0), allow_self_loops(false) {} erdos_renyi_iterator(RandomGenerator& gen, vertices_size_type n, double fraction = 0.0, bool allow_self_loops = false) @@ -61,29 +62,17 @@ namespace boost { next(); } - reference operator*() const { return current; } - pointer operator->() const { return ¤t; } + const std::pair& + dereference() const { return current; } - erdos_renyi_iterator& operator++() - { + void increment() { --edges; next(); - return *this; } - erdos_renyi_iterator operator++(int) - { - erdos_renyi_iterator temp(*this); - ++(*this); - return temp; - } - - bool operator==(const erdos_renyi_iterator& other) const + bool equal(const erdos_renyi_iterator& other) const { return edges == other.edges; } - bool operator!=(const erdos_renyi_iterator& other) const - { return !(*this == other); } - private: void next() { @@ -98,11 +87,19 @@ namespace boost { vertices_size_type n; edges_size_type edges; bool allow_self_loops; - value_type current; + std::pair current; }; template class sorted_erdos_renyi_iterator + : public iterator_facade< + sorted_erdos_renyi_iterator, + std::pair::vertices_size_type, + typename graph_traits::vertices_size_type>, + std::input_iterator_tag, + const + std::pair::vertices_size_type, + typename graph_traits::vertices_size_type>&> { typedef typename graph_traits::directed_category directed_category; typedef typename graph_traits::vertices_size_type vertices_size_type; @@ -110,116 +107,86 @@ namespace boost { BOOST_STATIC_CONSTANT (bool, - is_undirected = (is_base_and_derived::value - || is_same::value)); + is_undirected = (is_base_of::value)); public: - typedef std::input_iterator_tag iterator_category; - typedef std::pair value_type; - typedef const value_type& reference; - typedef const value_type* pointer; - typedef void difference_type; - sorted_erdos_renyi_iterator() - : gen(), rand_vertex(0.5), n(0), allow_self_loops(false), - src((std::numeric_limits::max)()), tgt(0), prob(0) {} - sorted_erdos_renyi_iterator(RandomGenerator& gen, vertices_size_type n, - double prob = 0.0, - bool allow_self_loops = false) - : gen(), - // The "1.0 - prob" in the next line is to work around a Boost.Random - // (and TR1) bug in the specification of geometric_distribution. It - // should be replaced by "prob" when the issue is fixed. - rand_vertex(1.0 - prob), - n(n), allow_self_loops(allow_self_loops), src(0), tgt(0), prob(prob) - { + : gen(), rand_vertex(0.5), n(0), allow_self_loops(false) + , src((std::numeric_limits::max)()), + tgt_index(vertices_size_type(-1)), prob(.5) + { } + + // NOTE: The default probability has been changed to be the same as that + // used by the geometic distribution. It was previously 0.0, which would + // cause an assertion. + sorted_erdos_renyi_iterator(RandomGenerator& gen, vertices_size_type n, + double prob = 0.5, + bool loops = false) + : gen(), rand_vertex(1. - prob), n(n), allow_self_loops(loops), src(0) + , tgt_index(vertices_size_type(-1)), prob(prob) + { this->gen.reset(new uniform_01(gen)); if (prob == 0.0) {src = (std::numeric_limits::max)(); return;} next(); } - reference operator*() const { return current; } - pointer operator->() const { return ¤t; } + const std::pair& + dereference() const { + return current; + } - sorted_erdos_renyi_iterator& operator++() - { + bool equal(const sorted_erdos_renyi_iterator& o) const { + return src == o.src && tgt_index == o.tgt_index; + } + + void increment() { next(); - return *this; } - sorted_erdos_renyi_iterator operator++(int) - { - sorted_erdos_renyi_iterator temp(*this); - ++(*this); - return temp; - } - - bool operator==(const sorted_erdos_renyi_iterator& other) const - { return src == other.src && tgt == other.tgt; } - - bool operator!=(const sorted_erdos_renyi_iterator& other) const - { return !(*this == other); } - private: void next() { - using std::sqrt; - using std::floor; - // In order to get the edges from the generator in sorted order, one // effective (but slow) procedure would be to use a - // bernoulli_distribution for each legal (src, tgt) pair. Because of the - // O(n^2) cost of that, a geometric distribution is used. The geometric - // distribution tells how many times the bernoulli_distribution would - // need to be run until it returns true. Thus, this distribution can be - // used to step through the edges which are actually present. Everything - // beyond "tgt += increment" is done to effectively convert linear - // indexing (the partial sums of the geometric distribution output) into - // graph edges. - assert (src != (std::numeric_limits::max)()); - vertices_size_type increment = rand_vertex(*gen); - tgt += increment; - if (is_undirected) { - // Update src and tgt based on position of tgt - // Basically, we want the greatest src_increment such that (in \bbQ): - // src_increment * (src + allow_self_loops + src_increment - 1/2) <= tgt - // The result of the LHS of this, evaluated with the computed - // src_increment, is then subtracted from tgt - double src_minus_half = (src + allow_self_loops) - 0.5; - double disc = src_minus_half * src_minus_half + 2 * tgt; - double src_increment_fp = floor(sqrt(disc) - src_minus_half); - vertices_size_type src_increment = vertices_size_type(src_increment_fp); - if (src + src_increment >= n) { - src = n; + // bernoulli_distribution for each legal (src, tgt_index) pair. Because of + // the O(|V|^2) cost of that, a geometric distribution is used. The + // geometric distribution tells how many times the + // bernoulli_distribution would need to be run until it returns true. + // Thus, this distribution can be used to step through the edges + // which are actually present. + assert (src != (std::numeric_limits::max)() && + src != n); + while (src != n) { + vertices_size_type increment = rand_vertex(*gen); + size_t tgt_index_limit = + (is_undirected ? src + 1 : n) + + (allow_self_loops ? 0 : -1); + if (tgt_index + increment >= tgt_index_limit) { + // Overflowed this source; go to the next one and try again. + ++src; + // This bias is because the geometric distribution always returns + // values >=1, and we want to allow 0 as a valid target. + tgt_index = vertices_size_type(-1); + continue; } else { - tgt -= (src + allow_self_loops) * src_increment + - src_increment * (src_increment - 1) / 2; - src += src_increment; + tgt_index += increment; + current.first = src; + current.second = + tgt_index + + (!allow_self_loops && !is_undirected && tgt_index >= src ? 1 : 0); + break; } - } else { - // Number of out edge positions possible from each vertex in this graph - vertices_size_type possible_out_edges = n - (allow_self_loops ? 0 : 1); - src += (std::min)(n - src, tgt / possible_out_edges); - tgt %= possible_out_edges; } - // Set end of graph code so (src, tgt) will be the same as for the end - // sorted_erdos_renyi_iterator - if (src >= n) {src = (std::numeric_limits::max)(); tgt = 0;} - // Copy (src, tgt) into current - current.first = src; - current.second = tgt; - // Adjust for (src, src) edge being forbidden - if (!allow_self_loops && tgt >= src) ++current.second; + if (src == n) src = (std::numeric_limits::max)(); } shared_ptr > gen; geometric_distribution rand_vertex; vertices_size_type n; bool allow_self_loops; - vertices_size_type src, tgt; - value_type current; + vertices_size_type src, tgt_index; + std::pair current; double prob; };