diff --git a/benchmark/parallel/runCLANG_benchmark_numbers.sh b/benchmark/parallel/runCLANG_benchmark_numbers.sh old mode 100644 new mode 100755 diff --git a/benchmark/parallel/runCLANG_benchmark_objects.sh b/benchmark/parallel/runCLANG_benchmark_objects.sh old mode 100644 new mode 100755 diff --git a/benchmark/parallel/runCLANG_benchmark_strings.sh b/benchmark/parallel/runCLANG_benchmark_strings.sh old mode 100644 new mode 100755 diff --git a/benchmark/parallel/runGCC_benchmark_numbers.sh b/benchmark/parallel/runGCC_benchmark_numbers.sh old mode 100644 new mode 100755 diff --git a/benchmark/parallel/runGCC_benchmark_objects.sh b/benchmark/parallel/runGCC_benchmark_objects.sh old mode 100644 new mode 100755 diff --git a/benchmark/parallel/runGCC_benchmark_strings.sh b/benchmark/parallel/runGCC_benchmark_strings.sh old mode 100644 new mode 100755 diff --git a/benchmark/single/runCLANG_benchmark_numbers.sh b/benchmark/single/runCLANG_benchmark_numbers.sh old mode 100644 new mode 100755 diff --git a/benchmark/single/runCLANG_benchmark_objects.sh b/benchmark/single/runCLANG_benchmark_objects.sh old mode 100644 new mode 100755 diff --git a/benchmark/single/runCLANG_benchmark_strings.sh b/benchmark/single/runCLANG_benchmark_strings.sh old mode 100644 new mode 100755 diff --git a/benchmark/single/runGCC_benchmark_numbers.sh b/benchmark/single/runGCC_benchmark_numbers.sh old mode 100644 new mode 100755 diff --git a/benchmark/single/runGCC_benchmark_objects.sh b/benchmark/single/runGCC_benchmark_objects.sh old mode 100644 new mode 100755 diff --git a/benchmark/single/runGCC_benchmark_strings.sh b/benchmark/single/runGCC_benchmark_strings.sh old mode 100644 new mode 100755 diff --git a/include/boost/sort/parallel/block_indirect_sort.hpp b/include/boost/sort/parallel/block_indirect_sort.hpp deleted file mode 100644 index 44b790d..0000000 --- a/include/boost/sort/parallel/block_indirect_sort.hpp +++ /dev/null @@ -1,504 +0,0 @@ -//---------------------------------------------------------------------------- -/// @file block_indirect_sort.hpp -/// @brief block indirect sort algorithm -/// -/// @author Copyright (c) 2016 Francisco Jose Tapia (fjtapia@gmail.com )\n -/// Distributed under the Boost Software License, Version 1.0.\n -/// ( See accompanying file LICENSE_1_0.txt or copy at -/// http://www.boost.org/LICENSE_1_0.txt ) -/// @version 0.1 -/// -/// @remarks -//----------------------------------------------------------------------------- -#ifndef __BOOST_SORT_PARALLEL_DETAIL_BLOCK_INDIRECT_SORT_HPP -#define __BOOST_SORT_PARALLEL_DETAIL_BLOCK_INDIRECT_SORT_HPP - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -// This value is the minimal number of threads for to use the -// block_indirect_sort algorithm -#define BOOST_NTHREAD_BORDER 6 - -namespace boost -{ -namespace sort -{ -namespace parallel -{ -namespace detail -{ -//--------------------------------------------------------------------------- -// USING SENTENCES -//--------------------------------------------------------------------------- -namespace bs = boost::sort; -namespace bsc = bs::common; -namespace bscu = bsc::util; -using bscu::compare_iter; -using bscu::value_iter; -using bsc::range; -using bsc::destroy; -using bsc::initialize; -using bscu::nbits64; -using bs::pdqsort; -using bscu::enable_if_string; -using bscu::enable_if_not_string; -using bscu::tmsb; -// -///--------------------------------------------------------------------------- -/// @struct block_indirect_sort -/// @brief This class is the entry point of the block indirect sort. The code -/// of this algorithm is divided in several classes: -/// bis/block.hpp : basic structures used in the algorithm -/// bis/backbone.hpp : data used by all the classes -/// bis/merge_blocks.hpp : merge the internal blocks -/// bis/move_blocks.hpp : move the blocks, and obtain all the elements -/// phisicaly sorted -/// bis/parallel_sort.hpp : make the parallel sort of each part in the -/// initial division of the data -/// -//---------------------------------------------------------------------------- -template > -struct block_indirect_sort -{ - //------------------------------------------------------------------------ - // D E F I N I T I O N S - //------------------------------------------------------------------------ - typedef typename std::iterator_traits::value_type value_t; - typedef std::atomic atomic_t; - typedef range range_pos; - typedef range range_it; - typedef range range_buf; - typedef std::function function_t; - - // classes used in the internal operations of the algorithm - typedef block_pos block_pos_t; - typedef block block_t; - typedef backbone backbone_t; - typedef parallel_sort parallel_sort_t; - - typedef merge_blocks merge_blocks_t; - typedef move_blocks move_blocks_t; - typedef compare_block_pos compare_block_pos_t; - // - //------------------------------------------------------------------------ - // V A R I A B L E S A N D C O N S T A N T S - //------------------------------------------------------------------------ - // contains the data and the internal data structures of the algorithm for - // to be shared between the classes which are part of the algorithm - backbone_t bk; - // atomic counter for to detect the end of the works created inside - // the object - atomic_t counter; - // pointer to the uninitialized memory used for the thread buffers - value_t *ptr; - // indicate if the memory pointed by ptr is initialized - bool construct; - // range from extract the buffers for the threads - range_buf rglobal_buf; - // number of threads to use - uint32_t nthread; - // - //------------------------------------------------------------------------ - // F U N C T I O N S - //------------------------------------------------------------------------ - - block_indirect_sort(Iter_t first, Iter_t last, Compare cmp, uint32_t nthr); - - block_indirect_sort(Iter_t first, Iter_t last) : - block_indirect_sort(first, last, Compare(), - std::thread::hardware_concurrency()) { } - - - block_indirect_sort(Iter_t first, Iter_t last, Compare cmp) : - block_indirect_sort(first, last, cmp, - std::thread::hardware_concurrency()) { } - - - block_indirect_sort(Iter_t first, Iter_t last, uint32_t nthread) : - block_indirect_sort(first, last, Compare(), nthread){} - - - // - //------------------------------------------------------------------------ - // function :destroy_all - /// @brief destructor all the data structures of the class (if the memory - /// is constructed, is destroyed) and return the uninitialized - /// memory - //------------------------------------------------------------------------ - void destroy_all(void) - { - if (ptr != nullptr) - { - if (construct) - { - destroy(rglobal_buf); - construct = false; - }; - std::return_temporary_buffer(ptr); - ptr = nullptr; - }; - } - // - //------------------------------------------------------------------------ - // function :~block_indirect_sort - /// @brief destructor of the class (if the memory is constructed, is - /// destroyed) and return the uninitialized memory - //------------------------------------------------------------------------ - ~block_indirect_sort(void) - { - destroy_all(); - } - - void split_range(size_t pos_index1, size_t pos_index2, - uint32_t level_thread); - - void start_function(void); - -//------------------------------------------------------------------------- -}; // End class block_indirect_sort -//---------------------------------------------------------------------------- -// -//############################################################################ -// ## -// ## -// N O N I N L I N E F U N C T I O N S ## -// ## -// ## -//############################################################################ -// -//------------------------------------------------------------------------- -// function : block_indirect_sort -/// @brief begin with the execution of the functions stored in works -/// @param first : iterator to the first element of the range to sort -/// @param last : iterator after the last element to the range to sort -/// @param comp : object for to compare two elements pointed by Iter_t -/// iterators -/// @param nthr : Number of threads to use in the process.When this value -/// is lower than 2, the sorting is done with 1 thread -//------------------------------------------------------------------------- -template -block_indirect_sort -::block_indirect_sort(Iter_t first, Iter_t last, Compare cmp, uint32_t nthr) -: bk(first, last, cmp), counter(0), ptr(nullptr), construct(false), - nthread(nthr) -{ - try - { - assert((last - first) >= 0); - size_t nelem = size_t(last - first); - if (nelem == 0) return; - - //------------------- check if sort ----------------------------------- - bool sorted = true; - for (Iter_t it1 = first, it2 = first + 1; it2 != last and (sorted = - not bk.cmp(*it2, *it1)); it1 = it2++); - if (sorted) return; - - //------------------- check if reverse sort --------------------------- - sorted = true; - for (Iter_t it1 = first, it2 = first + 1; it2 != last and (sorted = - not bk.cmp(*it1, *it2)); it1 = it2++); - - if (sorted) - { - size_t nelem2 = nelem >> 1; - Iter_t it1 = first, it2 = last - 1; - for (size_t i = 0; i < nelem2; ++i) - { - std::swap(*(it1++), *(it2--)); - }; - return; - }; - - //---------------- check if only single thread ----------------------- - size_t nthreadmax = nelem / (Block_size * Group_size) + 1; - if (nthread > nthreadmax) nthread = (uint32_t) nthreadmax; - - uint32_t nbits_size = (nbits64(sizeof(value_t)) >> 1); - if (nbits_size > 5) nbits_size = 5; - size_t max_per_thread = 1 << (18 - nbits_size); - - if (nelem < (max_per_thread) or nthread < 2) - { - //intro_sort (first, last, bk.cmp); - pdqsort(first, last, bk.cmp); - return; - }; - - //----------- creation of the temporary buffer -------------------- - ptr = std::get_temporary_buffer(Block_size * nthread).first; - if (ptr == nullptr) - { - bk.error = true; - throw std::bad_alloc(); - }; - - rglobal_buf = range_buf(ptr, ptr + (Block_size * nthread)); - initialize(rglobal_buf, *first); - construct = true; - - // creation of the buffers for the threads - std::vector vbuf(nthread); - for (uint32_t i = 0; i < nthread; ++i) - { - vbuf[i] = ptr + (i * Block_size); - }; - - // Insert the first work in the stack - bscu::atomic_write(counter, 1); - function_t f1 = [&]( ) - { - start_function ( ); - bscu::atomic_sub (counter, 1); - }; - bk.works.emplace_back(f1); - - //--------------------------------------------------------------------- - // PROCESS - //--------------------------------------------------------------------- - std::vector > vfuture(nthread); - - // The function launched with the futures is "execute the functions of - // the stack until this->counter is zero - // vbuf[i] is the memory from the main thread for to configure the - // thread local buffer - for (uint32_t i = 0; i < nthread; ++i) - { - auto f1 = [=, &vbuf]( ) - { bk.exec (vbuf[i], this->counter);}; - vfuture[i] = std::async(std::launch::async, f1); - }; - for (uint32_t i = 0; i < nthread; ++i) - vfuture[i].get(); - if (bk.error) throw std::bad_alloc(); - } - catch (std::bad_alloc &) - { - destroy_all(); - throw; - } -}; -// -//----------------------------------------------------------------------------- -// function : split_rage -/// @brief this function splits a range of positions in the index, and -/// depending of the size, sort directly or make to a recursive call -/// to split_range -/// @param pos_index1 : first position in the index -/// @param pos_index2 : position after the last in the index -/// @param level_thread : depth of the call. When 0 sort the blocks -//----------------------------------------------------------------------------- -template -void block_indirect_sort -::split_range(size_t pos_index1, size_t pos_index2, uint32_t level_thread) -{ - size_t nblock = pos_index2 - pos_index1; - - //------------------------------------------------------------------------- - // In the blocks not sorted, the physical position is the logical position - //------------------------------------------------------------------------- - Iter_t first = bk.get_block(pos_index1).first; - Iter_t last = bk.get_range(pos_index2 - 1).last; - - if (nblock < Group_size) - { - pdqsort(first, last, bk.cmp); - return; - }; - - size_t pos_index_mid = pos_index1 + (nblock >> 1); - atomic_t son_counter(1); - - //------------------------------------------------------------------------- - // Insert in the stack the work for the second part, and the actual thread, - // execute the first part - //------------------------------------------------------------------------- - if (level_thread != 0) - { - auto f1 = [=, &son_counter]( ) - { - split_range (pos_index_mid, pos_index2, level_thread - 1); - bscu::atomic_sub (son_counter, 1); - }; - bk.works.emplace_back(f1); - if (bk.error) return; - split_range(pos_index1, pos_index_mid, level_thread - 1); - } - else - { - Iter_t mid = first + ((nblock >> 1) * Block_size); - auto f1 = [=, &son_counter]( ) - { - parallel_sort_t (bk, mid, last); - bscu::atomic_sub (son_counter, 1); - }; - bk.works.emplace_back(f1); - if (bk.error) return; - parallel_sort_t(bk, first, mid); - }; - bk.exec(son_counter); - if (bk.error) return; - merge_blocks_t(bk, pos_index1, pos_index_mid, pos_index2); -}; - -// -//----------------------------------------------------------------------------- -// function : start_function -/// @brief this function init the process. When the number of threads is lower -/// than a predefined value, sort the elements with a parallel pdqsort. -//----------------------------------------------------------------------------- -template -void block_indirect_sort -::start_function(void) -{ - if (nthread < BOOST_NTHREAD_BORDER) - { - parallel_sort_t(bk, bk.global_range.first, bk.global_range.last); - } - else - { - size_t level_thread = nbits64(nthread - 1) - 1; - split_range(0, bk.nblock, level_thread - 1); - if (bk.error) return; - move_blocks_t k(bk); - }; -}; - -///--------------------------------------------------------------------------- -// function block_indirect_sort_call -/// @brief This class is select the block size in the block_indirect_sort -/// algorithm depending of the type and size of the data to sort -/// -//---------------------------------------------------------------------------- -template > * = nullptr> -inline void block_indirect_sort_call(Iter_t first, Iter_t last, Compare cmp, - uint32_t nthr) -{ - block_indirect_sort<128, 128, Iter_t, Compare>(first, last, cmp, nthr); -}; - -template -struct block_size -{ - static constexpr const uint32_t BitsSize = - (Size == 0) ? 0 : (Size > 256) ? 9 : tmsb[Size - 1]; - static constexpr const uint32_t sz[10] = - { 4096, 4096, 4096, 4096, 2048, 1024, 768, 512, 256, 128 }; - static constexpr const uint32_t data = sz[BitsSize]; -}; -// -///--------------------------------------------------------------------------- -/// @struct block_indirect_sort_call -/// @brief This class is select the block size in the block_indirect_sort -/// algorithm depending of the type and size of the data to sort -/// -//---------------------------------------------------------------------------- -template > * = nullptr> -inline void block_indirect_sort_call (Iter_t first, Iter_t last, Compare cmp, - uint32_t nthr) -{ - block_indirect_sort )>::data, 64, - Iter_t, Compare> (first, last, cmp, nthr); -}; - -// -//**************************************************************************** -}; // End namespace block_detail -//**************************************************************************** -// -namespace bscu = boost::sort::common::util; -// -//############################################################################ -// ## -// ## -// B L O C K _ I N D I R E C T _ S O R T ## -// ## -// ## -//############################################################################ -// -//----------------------------------------------------------------------------- -// function : block_indirect_sort -/// @brief parallel sample sort algorithm (stable sort) -/// -/// @param first : iterator to the first element of the range to sort -/// @param last : iterator after the last element to the range to sort -//----------------------------------------------------------------------------- -template -void block_indirect_sort(Iter_t first, Iter_t last) -{ - typedef bscu::compare_iter Compare; - detail::block_indirect_sort_call (first, last, Compare(), - std::thread::hardware_concurrency()); -} - -// -//----------------------------------------------------------------------------- -// function : block_indirect_sort -/// @brief parallel sample sort algorithm (stable sort) -/// -/// @param first : iterator to the first element of the range to sort -/// @param last : iterator after the last element to the range to sort -/// @param nthread : Number of threads to use in the process. When this value -/// is lower than 2, the sorting is done with 1 thread -//----------------------------------------------------------------------------- -template -void block_indirect_sort(Iter_t first, Iter_t last, uint32_t nthread) -{ - typedef bscu::compare_iter Compare; - detail::block_indirect_sort_call(first, last, Compare(), nthread); -} -// -//----------------------------------------------------------------------------- -// function : block_indirect_sort -/// @brief parallel sample sort algorithm (stable sort) -/// -/// @param first : iterator to the first element of the range to sort -/// @param last : iterator after the last element to the range to sort -/// @param comp : object for to compare two elements pointed by Iter_t -/// iterators -//----------------------------------------------------------------------------- -template * = nullptr> -void block_indirect_sort(Iter_t first, Iter_t last, Compare comp) -{ - detail::block_indirect_sort_call (first, last, comp, - std::thread::hardware_concurrency()); -} - -// -//----------------------------------------------------------------------------- -// function : block_indirect_sort -/// @brief parallel sample sort algorithm (stable sort) -/// -/// @param first : iterator to the first element of the range to sort -/// @param last : iterator after the last element to the range to sort -/// @param comp : object for to compare two elements pointed by Iter_t -/// iterators -/// @param nthread : Number of threads to use in the process. When this value -/// is lower than 2, the sorting is done with 1 thread -//----------------------------------------------------------------------------- -template -void block_indirect_sort (Iter_t first, Iter_t last, Compare comp, - uint32_t nthread) -{ - detail::block_indirect_sort_call(first, last, comp, nthread); -} -// -//**************************************************************************** -}; // End namespace parallel -}; // End namespace sort -}; // End namespace boost -//**************************************************************************** -// -#endif diff --git a/include/boost/sort/parallel/detail/backbone.hpp b/include/boost/sort/parallel/detail/backbone.hpp deleted file mode 100644 index 2374241..0000000 --- a/include/boost/sort/parallel/detail/backbone.hpp +++ /dev/null @@ -1,222 +0,0 @@ -//---------------------------------------------------------------------------- -/// @file backbone.hpp -/// @brief This file constains the class backbone, which is part of the -/// block_indirect_sort algorithm -/// -/// @author Copyright (c) 2016 Francisco Jose Tapia (fjtapia@gmail.com )\n -/// Distributed under the Boost Software License, Version 1.0.\n -/// ( See accompanying file LICENSE_1_0.txt or copy at -/// http://www.boost.org/LICENSE_1_0.txt ) -/// @version 0.1 -/// -/// @remarks -//----------------------------------------------------------------------------- -#ifndef __BOOST_SORT_PARALLEL_DETAIL_BACKBONE_HPP -#define __BOOST_SORT_PARALLEL_DETAIL_BACKBONE_HPP - -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -namespace boost -{ -namespace sort -{ -namespace parallel -{ -namespace detail -{ - -//--------------------------------------------------------------------------- -// USING SENTENCES -//--------------------------------------------------------------------------- -namespace bsc = boost::sort::common; -namespace bscu = bsc::util; -using bsc::stack_cnc; -using bsc::range; - -///--------------------------------------------------------------------------- -/// @struct backbone -/// @brief This contains all the information shared betwen the classes of the -/// block indirect sort algorithm - -//---------------------------------------------------------------------------- -template < uint32_t Block_size, class Iter_t, class Compare > -struct backbone -{ - //------------------------------------------------------------------------- - // D E F I N I T I O N S - //------------------------------------------------------------------------- - typedef typename std::iterator_traits< Iter_t >::value_type value_t; - typedef std::atomic< uint32_t > atomic_t; - typedef range< size_t > range_pos; - typedef range< Iter_t > range_it; - typedef range< value_t * > range_buf; - typedef std::function< void(void) > function_t; - typedef block< Block_size, Iter_t > block_t; - - //------------------------------------------------------------------------ - // V A R I A B L E S - //------------------------------------------------------------------------ - // range with all the element to sort - range< Iter_t > global_range; - - // index vector of block_pos elements - std::vector< block_pos > index; - - // Number of elements to sort - size_t nelem; - - // Number of blocks to sort - size_t nblock; - - // Number of elements in the last block (tail) - size_t ntail; - - // object for to compare two elements - Compare cmp; - - // range of elements of the last block (tail) - range_it range_tail; - - // thread local varible. It is a pointer to the buffer - static thread_local value_t *buf; - - // concurrent stack where store the function_t elements - stack_cnc< function_t > works; - - // global indicator of error - bool error; - // - //------------------------------------------------------------------------ - // F U N C T I O N S - //------------------------------------------------------------------------ - backbone (Iter_t first, Iter_t last, Compare comp); - - //------------------------------------------------------------------------ - // function : get_block - /// @brief obtain the block in the position pos - /// @param pos : position of the range - /// @return block required - //------------------------------------------------------------------------ - block_t get_block (size_t pos) const - { - return block_t (global_range.first + (pos * Block_size)); - }; - //------------------------------------------------------------------------- - // function : get_range - /// @brief obtain the range in the position pos - /// @param pos : position of the range - /// @return range required - //------------------------------------------------------------------------- - range_it get_range (size_t pos) const - { - Iter_t it1 = global_range.first + (pos * Block_size); - Iter_t it2 = - (pos == (nblock - 1)) ? global_range.last : it1 + Block_size; - return range_it (it1, it2); - }; - //------------------------------------------------------------------------- - // function : get_range_buf - /// @brief obtain the auxiliary buffer of the thread - //------------------------------------------------------------------------- - range_buf get_range_buf ( ) const - { - return range_buf (buf, buf + Block_size); - }; - - //------------------------------------------------------------------------- - // function : exec - /// @brief Initialize the thread local buffer with the ptr_buf pointer, - /// and begin with the execution of the functions stored in works - // - /// @param ptr_buf : Pointer to the memory assigned to the thread_local - /// buffer - /// @param counter : atomic counter for to invoke to the exec function - /// with only 1 parameter - //------------------------------------------------------------------------- - void exec (value_t *ptr_buf, atomic_t &counter) - { - buf = ptr_buf; - exec (counter); - }; - - void exec (atomic_t &counter); - -//--------------------------------------------------------------------------- -}; // end struct backbone -//--------------------------------------------------------------------------- -// -//############################################################################ -// ## -// ## -// N O N I N L I N E F U N C T I O N S ## -// ## -// ## -//############################################################################ -// -// initialization of the thread_local pointer to the auxiliary buffer -template < uint32_t Block_size, class Iter_t, class Compare > -thread_local typename std::iterator_traits< Iter_t > -::value_type *backbone< Block_size, Iter_t, Compare >::buf = nullptr; - -//------------------------------------------------------------------------ -// function : backbone -/// @brief constructor of the class -// -/// @param first : iterator to the first element of the range to sort -/// @param last : iterator after the last element to the range to sort -/// @param comp : object for to compare two elements pointed by Iter_t -/// iterators -//------------------------------------------------------------------------ -template < uint32_t Block_size, class Iter_t, class Compare > -backbone< Block_size, Iter_t, Compare > -::backbone (Iter_t first, Iter_t last, Compare comp) -: global_range (first, last), cmp (comp), error (false) -{ - assert ((last - first) >= 0); - if (first == last) return; // nothing to do - - nelem = size_t (last - first); - nblock = (nelem + Block_size - 1) / Block_size; - ntail = (nelem % Block_size); - index.reserve (nblock + 1); - - for (size_t i = 0; i < nblock; ++i) index.emplace_back (block_pos (i)); - - range_tail.first = - (ntail == 0) ? last : (first + ((nblock - 1) * Block_size)); - range_tail.last = last; -}; -// -//------------------------------------------------------------------------- -// function : exec -/// @brief execute the function_t stored in works, until counter is zero -// -/// @param counter : atomic counter. When 0 exits the function -//------------------------------------------------------------------------- -template < uint32_t Block_size, class Iter_t, class Compare > -void backbone< Block_size, Iter_t, Compare >::exec (atomic_t &counter) -{ - function_t func_exec; - while (bscu::atomic_read (counter) != 0) - { - if (works.pop_move_back (func_exec)) func_exec ( ); - else std::this_thread::yield ( ); - }; -}; -// -//**************************************************************************** -}; // End namespace block_detail -}; // End namespace parallel -}; // End namespace sort -}; // End namespace boost -//**************************************************************************** -#endif diff --git a/include/boost/sort/parallel/detail/block.hpp b/include/boost/sort/parallel/detail/block.hpp deleted file mode 100644 index e388a9c..0000000 --- a/include/boost/sort/parallel/detail/block.hpp +++ /dev/null @@ -1,183 +0,0 @@ -//---------------------------------------------------------------------------- -/// @file block.hpp -/// @brief This file contains the internal data structures used in the -/// block_indirect_sort algorithm -/// -/// @author Copyright (c) 2016 Francisco Jose Tapia (fjtapia@gmail.com )\n -/// Distributed under the Boost Software License, Version 1.0.\n -/// ( See accompanying file LICENSE_1_0.txt or copy at -/// http://www.boost.org/LICENSE_1_0.txt ) -/// @version 0.1 -/// -/// @remarks -//----------------------------------------------------------------------------- -#ifndef __BOOST_SORT_PARALLEL_DETAIL_BLOCK_HPP -#define __BOOST_SORT_PARALLEL_DETAIL_BLOCK_HPP - -#include - -namespace boost -{ -namespace sort -{ -namespace parallel -{ -namespace detail -{ -//--------------------------------------------------------------------------- -// USING SENTENCES -//--------------------------------------------------------------------------- -using namespace boost::sort::common; -// -//--------------------------------------------------------------------------- -/// @struct block_pos -/// @brief represent a pair of values, a position represented as an unsigned -/// variable ( position ), and a bool variable ( side ). They are packed -/// in a size_t variable. The Least Significant Bit is the bool variable, -/// and the others bits are the position -//---------------------------------------------------------------------------- -class block_pos -{ - //------------------------------------------------------------------------ - // VARIABLES - //----------------------------------------------------------------------- - size_t num; // number which store a position and a bool side - - public: - //----------------------------- FUNCTIONS ------------------------------ - block_pos (void) : num (0){}; - // - //------------------------------------------------------------------------- - // function : block_pos - /// @brief constructor from a position and a side - /// @param position : position to sotre - /// @param side : side to store - //------------------------------------------------------------------------- - block_pos (size_t position, bool side = false) - { - num = (position << 1) + ((side) ? 1 : 0); - }; - // - //------------------------------------------------------------------------- - // function : pos - /// @brief obtain the position stored inside the block_pos - /// @return position - //------------------------------------------------------------------------- - size_t pos (void) const { return (num >> 1); }; - // - //------------------------------------------------------------------------- - // function : pos - /// @brief store a position inside the block_pos - /// @param position : value to store - //------------------------------------------------------------------------- - void set_pos (size_t position) { num = (position << 1) + (num & 1); }; - // - //------------------------------------------------------------------------- - // function : side - /// @brief obtain the side stored inside the block_pos - /// @return bool value - //------------------------------------------------------------------------- - bool side (void) const { return ((num & 1) != 0); }; - // - //------------------------------------------------------------------------- - // function : side - /// @brief store a bool value the block_pos - /// @param sd : bool value to store - //------------------------------------------------------------------------- - void set_side (bool sd) { num = (num & ~1) + ((sd) ? 1 : 0); }; -}; // end struct block_pos - -// -//--------------------------------------------------------------------------- -/// @struct block -/// @brief represent a group of Block_size contiguous elements, beginning -/// with the pointed by first -//---------------------------------------------------------------------------- -template < uint32_t Block_size, class Iter_t > -struct block -{ - //---------------------------------------------------------------------- - // VARIABLES - //---------------------------------------------------------------------- - Iter_t first; // iterator to the first element of the block - - //------------------------------------------------------------------------- - // function : block - /// @brief constructor from an iterator to the first element of the block - /// @param it : iterator to the first element of the block - //------------------------------------------------------------------------- - block (Iter_t it) : first (it){}; - - //------------------------------------------------------------------------- - // function : get_range - /// @brief convert a block in a range - /// @return range - //------------------------------------------------------------------------- - range< Iter_t > get_range (void) - { - return range_it (first, first + Block_size); - }; - -}; // end struct block - -// -//------------------------------------------------------------------------- -// function : compare_block -/// @brief compare two blocks using the content of the pointed by first -/// @param block1 : first block to compare -/// @param block2 : second block to compare -/// @param cmp : comparison operator -//------------------------------------------------------------------------- -template < uint32_t Block_size, class Iter_t, class Compare > -bool compare_block (block< Block_size, Iter_t > block1, - block< Block_size, Iter_t > block2, - Compare cmp = Compare ( )) -{ - return cmp (*block1.first, *block2.first); -}; -// -///--------------------------------------------------------------------------- -/// @struct compare_block_pos -/// @brief This is a object for to compare two block_pos objects -//---------------------------------------------------------------------------- -template < uint32_t Block_size, class Iter_t, class Compare > -struct compare_block_pos -{ - //----------------------------------------------------------------------- - // VARIABLES - //----------------------------------------------------------------------- - Iter_t global_first; // iterator to the first element to sort - Compare comp; // comparison object for to compare two elements - - //------------------------------------------------------------------------- - // function : compare_block_pos - /// @brief constructor - /// @param g_first : itertor to the first element to sort - /// @param cmp : comparison operator - //------------------------------------------------------------------------- - compare_block_pos (Iter_t g_first, Compare cmp) - : global_first (g_first), comp (cmp){}; - // - //------------------------------------------------------------------------- - // function : operator () - /// @brief compare two blocks using the content of the pointed by - /// global_first - /// @param block_pos1 : first block to compare - /// @param block_pos2 : second block to compare - //------------------------------------------------------------------------- - bool operator( ) (block_pos block_pos1, block_pos block_pos2) const - { - return comp (*(global_first + (block_pos1.pos ( ) * Block_size)), - *(global_first + (block_pos2.pos ( ) * Block_size))); - }; - -}; // end struct compare_block_pos - -//**************************************************************************** -}; // End namespace block_detail -}; // End namespace parallel -}; // End namespace sort -}; // End namespace boost -//**************************************************************************** -// -#endif diff --git a/include/boost/sort/parallel/detail/constants.hpp b/include/boost/sort/parallel/detail/constants.hpp deleted file mode 100644 index c407243..0000000 --- a/include/boost/sort/parallel/detail/constants.hpp +++ /dev/null @@ -1,26 +0,0 @@ -//---------------------------------------------------------------------------- -/// @file constants.hpp -/// @brief This file contains the constants values used in the algorithms -/// -/// @author Copyright (c) 2016 Francisco José Tapia (fjtapia@gmail.com )\n -/// Distributed under the Boost Software License, Version 1.0.\n -/// ( See accompanying file LICENSE_1_0.txt or copy at -/// http://www.boost.org/LICENSE_1_0.txt ) -/// @version 0.1 -/// -/// @remarks -//----------------------------------------------------------------------------- -#ifndef __BOOST_SORT_PARALLEL_DETAIL_CONSTANTS_HPP -#define __BOOST_SORT_PARALLEL_DETAIL_CONSTANTS_HPP - -// This value is the block size in the block_indirect_sort algorithm -#define BOOST_BLOCK_SIZE 1024 - -// This value represent the group size in the block_indirect_sort algorithm -#define BOOST_GROUP_SIZE 64 - -// This value is the minimal number of threads for to use the -// block_indirect_sort algorithm -#define BOOST_NTHREAD_BORDER 6 - -#endif diff --git a/include/boost/sort/parallel/detail/merge_blocks.hpp b/include/boost/sort/parallel/detail/merge_blocks.hpp deleted file mode 100644 index ad11b51..0000000 --- a/include/boost/sort/parallel/detail/merge_blocks.hpp +++ /dev/null @@ -1,429 +0,0 @@ -//---------------------------------------------------------------------------- -/// @file merge_blocks.hpp -/// @brief contains the class merge_blocks, which is part of the -/// block_indirect_sort algorithm -/// -/// @author Copyright (c) 2016 Francisco Jose Tapia (fjtapia@gmail.com )\n -/// Distributed under the Boost Software License, Version 1.0.\n -/// ( See accompanying file LICENSE_1_0.txt or copy at -/// http://www.boost.org/LICENSE_1_0.txt ) -/// @version 0.1 -/// -/// @remarks -//----------------------------------------------------------------------------- -#ifndef __BOOST_SORT_PARALLEL_DETAIL_MERGE_BLOCKS_HPP -#define __BOOST_SORT_PARALLEL_DETAIL_MERGE_BLOCKS_HPP - -#include -#include -#include -#include -#include -#include - -namespace boost -{ -namespace sort -{ -namespace parallel -{ -namespace detail -{ -//---------------------------------------------------------------------------- -// USING SENTENCES -//---------------------------------------------------------------------------- -namespace bsc = boost::sort::common; -namespace bscu = bsc::util; -using bsc::range; -using bsc::is_mergeable; -using bsc::merge_uncontiguous; -// -///--------------------------------------------------------------------------- -/// @struct merge_blocks -/// @brief This class merge the blocks. The blocks to merge are defined by two -/// ranges of positions in the index of the backbone -//---------------------------------------------------------------------------- -template -struct merge_blocks -{ - //----------------------------------------------------------------------- - // D E F I N I T I O N S - //----------------------------------------------------------------------- - typedef typename std::iterator_traits::value_type value_t; - typedef std::atomic atomic_t; - typedef range range_pos; - typedef range range_it; - typedef range range_buf; - typedef std::function function_t; - typedef backbone backbone_t; - typedef compare_block_pos compare_block_pos_t; - - //------------------------------------------------------------------------ - // V A R I A B L E S - //------------------------------------------------------------------------ - // Object with the elements to sort and all internal data structures of the - // algorithm - backbone_t &bk; - // - //------------------------------------------------------------------------ - // F U N C T I O N S - //------------------------------------------------------------------------ - merge_blocks(backbone_t &bkb, size_t pos_index1, size_t pos_index2, - size_t pos_index3); - - void tail_process(std::vector &vblkpos1, - std::vector &vblkpos2); - - void cut_range(range_pos rng); - - void merge_range_pos(range_pos rng); - - void extract_ranges(range_pos range_input); - // - //------------------------------------------------------------------------ - // function : function_merge_range_pos - /// @brief create a function_t with a call to merge_range_pos, and insert - /// in the stack of the backbone - // - /// @param rng_input : range of positions of blocks in the index to merge - /// @param son_counter : atomic variable which is decremented when finish - /// the function. This variable is used for to know - /// when are finished all the function_t created - /// inside an object - /// @param error : global indicator of error. - /// - //------------------------------------------------------------------------ - void function_merge_range_pos(const range_pos &rng_input, atomic_t &counter, - bool &error) - { - bscu::atomic_add(counter, 1); - function_t f1 = [this, rng_input, &counter, &error]( ) -> void - { - if (not error) - { - try - { - this->merge_range_pos (rng_input); - } - catch (std::bad_alloc &ba) - { - error = true; - }; - } - bscu::atomic_sub (counter, 1); - }; - bk.works.emplace_back(f1); - } - ; - // - //------------------------------------------------------------------------ - // function : function_cut_range - /// @brief create a function_t with a call to cut_range, and inser in - /// the stack of the backbone - // - /// @param rng_input : range of positions in the index to cut - /// @param counter : atomic variable which is decremented when finish - /// the function. This variable is used for to know - /// when are finished all the function_t created - /// inside an object - /// @param error : global indicator of error. - //------------------------------------------------------------------------ - void function_cut_range(const range_pos &rng_input, atomic_t &counter, - bool &error) - { - bscu::atomic_add(counter, 1); - function_t f1 = [this, rng_input, &counter, &error]( ) -> void - { - if (not error) - { - try - { - this->cut_range (rng_input); - } - catch (std::bad_alloc &) - { - error = true; - }; - } - bscu::atomic_sub (counter, 1); - }; - bk.works.emplace_back(f1); - } - - -//---------------------------------------------------------------------------- -}; -// end struct merge_blocks -//---------------------------------------------------------------------------- -// -//############################################################################ -// ## -// ## -// N O N I N L I N E F U N C T I O N S ## -// ## -// ## -//############################################################################ -// -//------------------------------------------------------------------------- -// function : merge_blocks -/// @brief make the indirect merge of the two range_pos defined by their index -/// position [pos_index1, pos_index2 ) and [ pos_index2, pos_index3 ) -// -/// @param bkb : backbone with all the data to sort , and the internal data -/// structures of the algorithm -/// @param pos_index1 : first position of the first range in the index -/// @param pos_index2 : last position of the first range and first position -/// of the second range in the index -/// @param pos_index3 : last position of the second range in the index -//------------------------------------------------------------------------- -template -merge_blocks -::merge_blocks( backbone_t &bkb, size_t pos_index1, size_t pos_index2, - size_t pos_index3) : bk(bkb) -{ - size_t nblock1 = pos_index2 - pos_index1; - size_t nblock2 = pos_index3 - pos_index2; - if (nblock1 == 0 or nblock2 == 0) return; - - //----------------------------------------------------------------------- - // Merging of the two intervals - //----------------------------------------------------------------------- - std::vector vpos1, vpos2; - vpos1.reserve(nblock1 + 1); - vpos2.reserve(nblock2 + 1); - - for (size_t i = pos_index1; i < pos_index2; ++i) - { - vpos1.emplace_back(bk.index[i].pos(), true); - }; - - for (size_t i = pos_index2; i < pos_index3; ++i) - { - vpos2.emplace_back(bk.index[i].pos(), false); - }; - //------------------------------------------------------------------- - // tail process - //------------------------------------------------------------------- - if (vpos2.back().pos() == (bk.nblock - 1) - and bk.range_tail.first != bk.range_tail.last) - { - tail_process(vpos1, vpos2); - nblock1 = vpos1.size(); - nblock2 = vpos2.size(); - }; - - compare_block_pos_t cmp_blk(bk.global_range.first, bk.cmp); - if (bk.error) return; - bscu::merge(vpos1.begin(), vpos1.end(), vpos2.begin(), vpos2.end(), - bk.index.begin() + pos_index1, cmp_blk); - if (bk.error) return; - // Extracting the ranges for to merge the elements - extract_ranges(range_pos(pos_index1, pos_index1 + nblock1 + nblock2)); -} - - -// -//------------------------------------------------------------------------- -// function : tail_process -/// @brief make the process when the second vector of block_pos to merge is -/// the last, and have an incomplete block ( tail) -// -/// @param vblkpos1 : first vector of block_pos elements to merge -/// @param vblkpos2 : second vector of block_pos elements to merge -//------------------------------------------------------------------------- -template -void merge_blocks -::tail_process( std::vector &vblkpos1, - std::vector &vblkpos2 ) -{ - if (vblkpos1.size() == 0 or vblkpos2.size() == 0) return; - - vblkpos2.pop_back(); - - size_t posback1 = vblkpos1.back().pos(); - range_it range_back1 = bk.get_range(posback1); - - if (bsc::is_mergeable(range_back1, bk.range_tail, bk.cmp)) - { - bsc::merge_uncontiguous(range_back1, bk.range_tail, bk.get_range_buf(), - bk.cmp); - if (vblkpos1.size() > 1) - { - size_t pos_aux = vblkpos1[vblkpos1.size() - 2].pos(); - range_it range_aux = bk.get_range(pos_aux); - - if (bsc::is_mergeable(range_aux, range_back1, bk.cmp)) - { - vblkpos2.emplace_back(posback1, false); - vblkpos1.pop_back(); - }; - }; - }; -} - -// -//------------------------------------------------------------------------- -// function : cut_range -/// @brief when the rng_input is greather than Group_size, this function divide -/// it in several parts creating function_t elements, which are inserted -/// in the concurrent stack of the backbone -// -/// @param rng_input : range to divide -//------------------------------------------------------------------------- -template -void merge_blocks -::cut_range(range_pos rng_input) -{ - if (rng_input.size() < Group_size) - { - merge_range_pos(rng_input); - return; - }; - - atomic_t counter(0); - size_t npart = (rng_input.size() + Group_size - 1) / Group_size; - size_t size_part = rng_input.size() / npart; - - size_t pos_ini = rng_input.first; - size_t pos_last = rng_input.last; - - while (pos_ini < pos_last) - { - size_t pos = pos_ini + size_part; - while (pos < pos_last - and bk.index[pos - 1].side() == bk.index[pos].side()) - { - ++pos; - }; - if (pos < pos_last) - { - merge_uncontiguous(bk.get_range(bk.index[pos - 1].pos()), - bk.get_range(bk.index[pos].pos()), - bk.get_range_buf(), bk.cmp); - } - else pos = pos_last; - if ((pos - pos_ini) > 1) - { - range_pos rng_aux(pos_ini, pos); - function_merge_range_pos(rng_aux, counter, bk.error); - }; - pos_ini = pos; - }; - bk.exec(counter); // wait until finish all the ranges -} - - -// -//------------------------------------------------------------------------- -// function : merge_range_pos -/// @brief make the indirect merge of the blocks inside the rng_input -// -/// @param rng_input : range of positions of the blocks to merge -//------------------------------------------------------------------------- -template -void merge_blocks -::merge_range_pos(range_pos rng_input) -{ - if (rng_input.size() < 2) return; - range_buf rbuf = bk.get_range_buf(); - - range_it rng_prev = bk.get_range(bk.index[rng_input.first].pos()); - move_forward(rbuf, rng_prev); - range_it rng_posx(rng_prev); - - for (size_t posx = rng_input.first + 1; posx != rng_input.last; ++posx) - { - rng_posx = bk.get_range(bk.index[posx].pos()); - bsc::merge_flow(rng_prev, rbuf, rng_posx, bk.cmp); - rng_prev = rng_posx; - - }; - move_forward(rng_posx, rbuf); -} -// -//------------------------------------------------------------------------- -// function : extract_ranges -/// @brief from a big range of positions of blocks in the index. Examine which -/// are mergeable, and generate a couple of ranges for to be merged. -/// With the ranges obtained generate function_t elements and are -/// inserted in the concurrent stack. -/// When the range obtained is smaller than Group_size, generate a -/// function_t calling to merge_range_pos, when is greater, generate a -/// function_t calling to cut_range -// -/// @param rpos range_input : range of the position in the index, where must -/// extract the ranges to merge -//------------------------------------------------------------------------- -template -void merge_blocks -::extract_ranges(range_pos range_input) -{ - if (range_input.size() < 2) return; - atomic_t counter(0); - - // The names with x are positions of the index - size_t posx_ini = range_input.first; - block_pos bp_posx_ini = bk.index[posx_ini]; - - range_it rng_max = bk.get_range(bp_posx_ini.pos()); - bool side_max = bp_posx_ini.side(); - - block_pos bp_posx; - range_it rng_posx = rng_max; - bool side_posx = side_max; - - for (size_t posx = posx_ini + 1; posx <= range_input.last; ++posx) - { - bool final = (posx == range_input.last); - bool mergeable = false; - - if (not final) - { - bp_posx = bk.index[posx]; - rng_posx = bk.get_range(bp_posx.pos()); - side_posx = bp_posx.side(); - mergeable = (side_max != side_posx - and is_mergeable(rng_max, rng_posx, bk.cmp)); - }; - if (bk.error) return; - if (final or not mergeable) - { - range_pos rp_final(posx_ini, posx); - if (rp_final.size() > 1) - { - if (rp_final.size() > Group_size) - { - function_cut_range(rp_final, counter, bk.error); - } - else - { - function_merge_range_pos(rp_final, counter, bk.error); - }; - }; - posx_ini = posx; - if (not final) - { - rng_max = rng_posx; - side_max = side_posx; - }; - } - else - { - if (bk.cmp(*(rng_max.back()), *(rng_posx.back()))) - { - rng_max = rng_posx; - side_max = side_posx; - }; - }; - }; - bk.exec(counter); -} -// -//**************************************************************************** -}; // End namespace block_detail -}; // End namespace parallel -}; // End namespace sort -}; // End namespace boost -//**************************************************************************** -// -#endif diff --git a/include/boost/sort/parallel/detail/move_blocks.hpp b/include/boost/sort/parallel/detail/move_blocks.hpp deleted file mode 100644 index db909b4..0000000 --- a/include/boost/sort/parallel/detail/move_blocks.hpp +++ /dev/null @@ -1,287 +0,0 @@ -//---------------------------------------------------------------------------- -/// @file move_blocks.hpp -/// @brief contains the class move_blocks, which is part of the -/// block_indirect_sort algorithm -/// -/// @author Copyright (c) 2016 Francisco Jose Tapia (fjtapia@gmail.com )\n -/// Distributed under the Boost Software License, Version 1.0.\n -/// ( See accompanying file LICENSE_1_0.txt or copy at -/// http://www.boost.org/LICENSE_1_0.txt ) -/// @version 0.1 -/// -/// @remarks -//----------------------------------------------------------------------------- -#ifndef __BOOST_SORT_PARALLEL_DETAIL_MOVE_BLOCKS_HPP -#define __BOOST_SORT_PARALLEL_DETAIL_MOVE_BLOCKS_HPP - -#include -#include -#include -#include -#include - -namespace boost -{ -namespace sort -{ -namespace parallel -{ -namespace detail -{ -//---------------------------------------------------------------------------- -// USING SENTENCES -//---------------------------------------------------------------------------- -namespace bsc = boost::sort::common; -// -///--------------------------------------------------------------------------- -/// @struct move_blocks -/// @brief This class move the blocks, trnasforming a logical sort by an index, -/// in physical sort -//---------------------------------------------------------------------------- -template -struct move_blocks -{ - //------------------------------------------------------------------------- - // D E F I N I T I O N S - //------------------------------------------------------------------------- - typedef move_blocks this_type; - typedef typename std::iterator_traits::value_type value_t; - typedef std::atomic atomic_t; - typedef bsc::range range_pos; - typedef bsc::range range_it; - typedef bsc::range range_buf; - typedef std::function function_t; - typedef backbone backbone_t; - - //------------------------------------------------------------------------ - // V A R I A B L E S - //------------------------------------------------------------------------ - // Object with the elements to sort and all internal data structures of the - // algorithm - backbone_t &bk; - - //------------------------------------------------------------------------ - // F U N C T I O N S - //------------------------------------------------------------------------ - move_blocks(backbone_t &bkb); - - void move_sequence(const std::vector &init_sequence); - - void move_long_sequence(const std::vector &init_sequence); - // - //------------------------------------------------------------------------ - // function : function_move_sequence - /// @brief create a function_t with a call to move_sequence, and insert - /// in the stack of the backbone - /// - /// @param sequence :sequence of positions for to move the blocks - /// @param counter : atomic variable which is decremented when finish - /// the function. This variable is used for to know - /// when are finished all the function_t created - /// inside an object - /// @param error : global indicator of error. - //------------------------------------------------------------------------ - void function_move_sequence(std::vector &sequence, - atomic_t &counter, bool &error) - { - bscu::atomic_add(counter, 1); - function_t f1 = [this, sequence, &counter, &error]( ) -> void - { - if (not error) - { - try - { - this->move_sequence (sequence); - } - catch (std::bad_alloc &) - { - error = true; - }; - } - bscu::atomic_sub (counter, 1); - }; - bk.works.emplace_back(f1); - } - - // - //------------------------------------------------------------------------ - // function : function_move_long_sequence - /// @brief create a function_t with a call to move_long_sequence, and - /// insert in the stack of the backbone - // - /// @param sequence :sequence of positions for to move the blocks - /// @param counter : atomic variable which is decremented when finish - /// the function. This variable is used for to know - /// when are finished all the function_t created - /// inside an object - /// @param error : global indicator of error. - //------------------------------------------------------------------------ - void function_move_long_sequence(std::vector &sequence, - atomic_t &counter, bool &error) - { - bscu::atomic_add(counter, 1); - function_t f1 = [this, sequence, &counter, &error]( ) -> void - { - if (not error) - { - try - { - this->move_long_sequence (sequence); - } - catch (std::bad_alloc &) - { - error = true; - }; - } - bscu::atomic_sub (counter, 1); - }; - bk.works.emplace_back(f1); - } - ; -//--------------------------------------------------------------------------- -}; // end of struct move_blocks -//--------------------------------------------------------------------------- -// -//############################################################################ -// ## -// ## -// N O N I N L I N E F U N C T I O N S ## -// ## -// ## -//############################################################################ -// -//------------------------------------------------------------------------- -// function : move_blocks -/// @brief constructor of the class for to move the blocks to their true -/// position obtained from the index -// -/// @param bkb : backbone with the index and the blocks -//------------------------------------------------------------------------- -template -move_blocks -::move_blocks(backbone_t &bkb) : bk(bkb) -{ - std::vector > vsequence; - vsequence.reserve(bk.index.size() >> 1); - std::vector sequence; - atomic_t counter(0); - - size_t pos_index_ini = 0, pos_index_src = 0, pos_index_dest = 0; - while (pos_index_ini < bk.index.size()) - { - while (pos_index_ini < bk.index.size() - and bk.index[pos_index_ini].pos() == pos_index_ini) - { - ++pos_index_ini; - }; - - if (pos_index_ini == bk.index.size()) break; - - sequence.clear(); - pos_index_src = pos_index_dest = pos_index_ini; - sequence.push_back(pos_index_ini); - - while (bk.index[pos_index_dest].pos() != pos_index_ini) - { - pos_index_src = bk.index[pos_index_dest].pos(); - sequence.push_back(pos_index_src); - - bk.index[pos_index_dest].set_pos(pos_index_dest); - pos_index_dest = pos_index_src; - }; - - bk.index[pos_index_dest].set_pos(pos_index_dest); - vsequence.push_back(sequence); - - if (sequence.size() < Group_size) - { - function_move_sequence(vsequence.back(), counter, bk.error); - } - else - { - function_move_long_sequence(vsequence.back(), counter, bk.error); - }; - }; - bk.exec(counter); -} -; -// -//------------------------------------------------------------------------- -// function : move_sequence -/// @brief move the blocks, following the positions of the init_sequence -// -/// @param init_sequence : vector with the positions from and where move the -/// blocks -//------------------------------------------------------------------------- -template -void move_blocks -::move_sequence(const std::vector &init_sequence) -{ - range_buf rbuf = bk.get_range_buf(); - size_t pos_range2 = init_sequence[0]; - - range_it range2 = bk.get_range(pos_range2); - move_forward(rbuf, range2); - - for (size_t i = 1; i < init_sequence.size(); ++i) - { - pos_range2 = init_sequence[i]; - range_it range1(range2); - range2 = bk.get_range(pos_range2); - move_forward(range1, range2); - }; - move_forward(range2, rbuf); -}; -// -//------------------------------------------------------------------------- -// function : move_long_sequence -/// @brief move the blocks, following the positions of the init_sequence. -/// if the sequence is greater than Group_size, it is divided in small -/// sequences, creating function_t elements, for to be inserted in the -/// concurrent stack -// -/// @param init_sequence : vector with the positions from and where move the -/// blocks -//------------------------------------------------------------------------- -template -void move_blocks -::move_long_sequence(const std::vector &init_sequence) -{ - if (init_sequence.size() < Group_size) return move_sequence(init_sequence); - - size_t npart = (init_sequence.size() + Group_size - 1) / Group_size; - size_t size_part = init_sequence.size() / npart; - atomic_t son_counter(0); - - std::vector sequence; - sequence.reserve(size_part); - - std::vector index_seq; - index_seq.reserve(npart); - - auto it_pos = init_sequence.begin(); - for (size_t i = 0; i < (npart - 1); ++i, it_pos += size_part) - { - sequence.assign(it_pos, it_pos + size_part); - index_seq.emplace_back(*(it_pos + size_part - 1)); - function_move_sequence(sequence, son_counter, bk.error); - }; - - sequence.assign(it_pos, init_sequence.end()); - index_seq.emplace_back(init_sequence.back()); - function_move_sequence(sequence, son_counter, bk.error); - - bk.exec(son_counter); - if (bk.error) return; - move_long_sequence(index_seq); -} - -// -//**************************************************************************** -}; // End namespace block_detail -}; // End namespace parallel -}; // End namespace sort -}; // End namespace boost -//**************************************************************************** -// -#endif diff --git a/include/boost/sort/parallel/detail/parallel_sort.hpp b/include/boost/sort/parallel/detail/parallel_sort.hpp deleted file mode 100644 index 5a089d7..0000000 --- a/include/boost/sort/parallel/detail/parallel_sort.hpp +++ /dev/null @@ -1,239 +0,0 @@ -//---------------------------------------------------------------------------- -/// @file parallel_sort.hpp -/// @brief Contains the parallel_sort class, which is part of the -/// block_indirect_sort algorithm -/// -/// @author Copyright (c) 2016 Francisco Jose Tapia (fjtapia@gmail.com )\n -/// Distributed under the Boost Software License, Version 1.0.\n -/// ( See accompanying file LICENSE_1_0.txt or copy at -/// http://www.boost.org/LICENSE_1_0.txt ) -/// @version 0.1 -/// -/// @remarks -//----------------------------------------------------------------------------- -#ifndef __BOOST_SORT_PARALLEL_DETAIL_PARALLEL_SORT_HPP -#define __BOOST_SORT_PARALLEL_DETAIL_PARALLEL_SORT_HPP - -#include -#include -#include - -namespace boost -{ -namespace sort -{ -namespace parallel -{ -namespace detail -{ - -//---------------------------------------------------------------------------- -// USING SENTENCES -//---------------------------------------------------------------------------- -namespace bsc = boost::sort::common; -namespace bscu = bsc::util; -using bscu::nbits64; -using bsc::pivot9; -using boost::sort::pdqsort; -// -///--------------------------------------------------------------------------- -/// @struct parallel_sort -/// @brief This class do a parallel sort, using the quicksort filtering, -/// splitting the data until the number of elements is smaller than a -/// predefined value (max_per_thread) -//---------------------------------------------------------------------------- -template -struct parallel_sort -{ - //------------------------------------------------------------------------- - // D E F I N I T I O N S - //------------------------------------------------------------------------- - typedef typename std::iterator_traits::value_type value_t; - typedef std::atomic atomic_t; - typedef std::function function_t; - typedef backbone backbone_t; - - //------------------------------------------------------------------------ - // V A R I A B L E S - //------------------------------------------------------------------------ - // reference to a object with all the data to sort - backbone_t &bk; - - // maximun number of element to sort woth 1 thread - size_t max_per_thread; - - // atomic counter for to detect the end of the works created inside - // the object - atomic_t counter; - - //------------------------------------------------------------------------ - // F U N C T I O N S - //------------------------------------------------------------------------ - parallel_sort(backbone_t &bkbn, Iter_t first, Iter_t last); - - void divide_sort(Iter_t first, Iter_t last, uint32_t level); - // - //------------------------------------------------------------------------ - // function : function_divide_sort - /// @brief create a function_t with a call to divide_sort, and inser in - /// the stack of the backbone - // - /// @param first : iterator to the first element of the range to divide - /// @param last : iterator to the next element after the last element of - /// the range to divide - /// @param level : level of depth in the division.When zero call to - /// pdqsort - /// @param counter : atomic variable which is decremented when finish - /// the function. This variable is used for to know - /// when are finished all the function_t created - /// inside an object - /// @param error : global indicator of error. - //------------------------------------------------------------------------ - void function_divide_sort(Iter_t first, Iter_t last, uint32_t level, - atomic_t &counter, bool &error) - { - bscu::atomic_add(counter, 1); - function_t f1 = [this, first, last, level, &counter, &error]( ) - { - if (not error) - { - try - { - this->divide_sort (first, last, level); - } - catch (std::bad_alloc &) - { - error = true; - }; - }; - bscu::atomic_sub (counter, 1); - }; - bk.works.emplace_back(f1); - }; - -//-------------------------------------------------------------------------- -};// end struct parallel_sort -//-------------------------------------------------------------------------- -// -//############################################################################ -// ## -// ## -// N O N I N L I N E F U N C T I O N S ## -// ## -// ## -//############################################################################ -// -//------------------------------------------------------------------------ -// function : parallel_sort -/// @brief constructor of the class -/// @param [in] bkbn : backbone struct with all the information to sort -/// @param [in] first : iterator to the first element to sort -/// @param [in] last : iterator to the next element after the last -//------------------------------------------------------------------------ -template -parallel_sort -::parallel_sort(backbone_t &bkbn, Iter_t first, Iter_t last) - : bk(bkbn), counter(0) -{ - assert((last - first) >= 0); - size_t nelem = size_t(last - first); - - //------------------- check if sort -------------------------------------- - bool sorted = true; - for (Iter_t it1 = first, it2 = first + 1; - it2 != last and (sorted = not bk.cmp(*it2, *it1)); it1 = it2++); - if (sorted) return; - - //------------------- check if reverse sort --------------------------- - sorted = true; - for (Iter_t it1 = first, it2 = first + 1; - it2 != last and (sorted = not bk.cmp(*it1, *it2)); it1 = it2++); - - if (sorted) - { - size_t nelem2 = nelem >> 1; - Iter_t it1 = first, it2 = last - 1; - for (size_t i = 0; i < nelem2; ++i) - std::swap(*(it1++), *(it2--)); - return; - }; - - //-------------------max_per_thread --------------------------- - uint32_t nbits_size = (nbits64(sizeof(value_t))) >> 1; - if (nbits_size > 5) nbits_size = 5; - max_per_thread = 1 << (18 - nbits_size); - - uint32_t level = ((nbits64(nelem / max_per_thread)) * 3) / 2; - - //---------------- check if only single thread ----------------------- - if (nelem < (max_per_thread)) - { - pdqsort(first, last, bk.cmp); - return; - }; - if (not bk.error) divide_sort(first, last, level); - - // wait until all the parts are finished - bk.exec(counter); -}; - -//------------------------------------------------------------------------ -// function : divide_sort -/// @brief this function divide the data in two part, for to be sorted in -/// a parallel mode -/// @param first : iterator to the first element to sort -/// @param last : iterator to the next element after the last -/// @param level : level of depth before call to pdqsort -//------------------------------------------------------------------------ -template -void parallel_sort -::divide_sort(Iter_t first, Iter_t last, uint32_t level) -{ - //------------------- check if sort ----------------------------------- - bool sorted = true; - for (Iter_t it1 = first, it2 = first + 1; - it2 != last and (sorted = not bk.cmp(*it2, *it1)); it1 = it2++); - if (sorted) return; - - //---------------- check if finish the subdivision ------------------- - size_t nelem = last - first; - if (level == 0 or nelem < (max_per_thread)) - { - return pdqsort(first, last, bk.cmp); - }; - - //-------------------- pivoting ---------------------------------- - pivot9(first, last, bk.cmp); - const value_t &val = const_cast(*first); - Iter_t c_first = first + 1, c_last = last - 1; - - while (bk.cmp(*c_first, val)) ++c_first; - while (bk.cmp(val, *c_last)) --c_last; - - while (not (c_first > c_last)) - { - std::swap(*(c_first++), *(c_last--)); - while (bk.cmp(*c_first, val)) - ++c_first; - while (bk.cmp(val, *c_last)) - --c_last; - }; - - std::swap(*first, *c_last); - - // insert the work of the second half in the stack of works - function_divide_sort(c_first, last, level - 1, counter, bk.error); - if (bk.error) return; - - // The first half is done by the same thread - function_divide_sort(first, c_last, level - 1, counter, bk.error); -}; -// -//**************************************************************************** -};// End namespace block_detail -};// End namespace parallel -};// End namespace sort -};// End namespace boost -//**************************************************************************** -// -#endif diff --git a/include/boost/sort/parallel/parallel_stable_sort.hpp b/include/boost/sort/parallel/parallel_stable_sort.hpp deleted file mode 100644 index 2885a35..0000000 --- a/include/boost/sort/parallel/parallel_stable_sort.hpp +++ /dev/null @@ -1,273 +0,0 @@ -//---------------------------------------------------------------------------- -/// @file parallel_stable_sort.hpp -/// @brief This file contains the class parallel_stable_sort -/// -/// @author Copyright (c) 2016 Francisco Jose Tapia (fjtapia@gmail.com )\n -/// Distributed under the Boost Software License, Version 1.0.\n -/// ( See accompanying file LICENSE_1_0.txt or copy at -/// http://www.boost.org/LICENSE_1_0.txt ) -/// @version 0.1 -/// -/// @remarks -//----------------------------------------------------------------------------- -#ifndef __BOOST_SORT_PARALLEL_DETAIL_PARALLEL_STABLE_SORT_HPP -#define __BOOST_SORT_PARALLEL_DETAIL_PARALLEL_STABLE_SORT_HPP - -#include -#include -#include -#include -#include -#include -#include -#include - -namespace boost -{ -namespace sort -{ -namespace parallel -{ -namespace stable_detail -{ - -//--------------------------------------------------------------------------- -// USING SENTENCES -//--------------------------------------------------------------------------- -namespace bsc = boost::sort::common; -namespace bss = boost::sort::spin_detail; -using bsc::range; -using bsc::merge_half; -using boost::sort::parallel::sample_detail::sample_sort; -// -///--------------------------------------------------------------------------- -/// @struct parallel_stable_sort -/// @brief This a structure for to implement a parallel stable sort, exception -/// safe -//---------------------------------------------------------------------------- -template > -struct parallel_stable_sort -{ - //------------------------------------------------------------------------- - // DEFINITIONS - //------------------------------------------------------------------------- - typedef value_iter value_t; - - //------------------------------------------------------------------------- - // VARIABLES - //------------------------------------------------------------------------- - // Number of elements to sort - size_t nelem; - // Pointer to the auxiliary memory needed for the algorithm - value_t *ptr; - // Minimal number of elements for to be sorted in parallel mode - const size_t nelem_min = 1 << 16; - - //------------------------------------------------------------------------ - // F U N C T I O N S - //------------------------------------------------------------------------ - parallel_stable_sort (Iter_t first, Iter_t last) - : parallel_stable_sort (first, last, Compare(), - std::thread::hardware_concurrency()) { }; - - parallel_stable_sort (Iter_t first, Iter_t last, Compare cmp) - : parallel_stable_sort (first, last, cmp, - std::thread::hardware_concurrency()) { }; - - parallel_stable_sort (Iter_t first, Iter_t last, uint32_t num_thread) - : parallel_stable_sort (first, last, Compare(), num_thread) { }; - - parallel_stable_sort (Iter_t first, Iter_t last, Compare cmp, - uint32_t num_thread); - - // - //----------------------------------------------------------------------------- - // function : destroy_all - /// @brief The utility is to destroy the temporary buffer used in the - /// sorting process - //----------------------------------------------------------------------------- - void destroy_all() - { - if (ptr != nullptr) std::return_temporary_buffer(ptr); - }; - // - //----------------------------------------------------------------------------- - // function :~parallel_stable_sort - /// @brief destructor of the class. The utility is to destroy the temporary - /// buffer used in the sorting process - //----------------------------------------------------------------------------- - ~parallel_stable_sort() {destroy_all(); } ; -}; -// end struct parallel_stable_sort - -// -//############################################################################ -// ## -// ## -// N O N I N L I N E F U N C T I O N S ## -// ## -// ## -//############################################################################ -// -//----------------------------------------------------------------------------- -// function : parallel_stable_sort -/// @brief constructor of the class -/// -/// @param first : iterator to the first element of the range to sort -/// @param last : iterator after the last element to the range to sort -/// @param comp : object for to compare two elements pointed by Iter_t -/// iterators -/// @param nthread : Number of threads to use in the process. When this value -/// is lower than 2, the sorting is done with 1 thread -//----------------------------------------------------------------------------- -template -parallel_stable_sort -::parallel_stable_sort (Iter_t first, Iter_t last, Compare comp, - uint32_t nthread) : nelem(0), ptr(nullptr) -{ - range range_initial(first, last); - assert(range_initial.valid()); - - nelem = range_initial.size(); - size_t nptr = (nelem + 1) >> 1; - - if (nelem < nelem_min or nthread < 2) - { - bss::spinsort - (range_initial.first, range_initial.last, comp); - return; - }; - - //------------------- check if sort -------------------------------------- - bool sw = true; - for (Iter_t it1 = first, it2 = first + 1; - it2 != last and (sw = not comp(*it2, *it1)); it1 = it2++); - if (sw) return; - - //------------------- check if reverse sort --------------------------- - sw = true; - for (Iter_t it1 = first, it2 = first + 1; - it2 != last and (sw = comp(*it2, *it1)); it1 = it2++); - if (sw) - { - size_t nelem2 = nelem >> 1; - Iter_t it1 = first, it2 = last - 1; - for (size_t i = 0; i < nelem2; ++i) - std::swap(*(it1++), *(it2--)); - return; - }; - - ptr = std::get_temporary_buffer(nptr).first; - if (ptr == nullptr) throw std::bad_alloc(); - - //--------------------------------------------------------------------- - // Parallel Process - //--------------------------------------------------------------------- - range range_first(range_initial.first, range_initial.first + nptr); - - range range_second(range_initial.first + nptr, range_initial.last); - - range range_buffer(ptr, ptr + nptr); - - try - { - sample_sort - (range_initial.first, range_initial.first + nptr, - comp, nthread, range_buffer); - } catch (std::bad_alloc &) - { - destroy_all(); - throw std::bad_alloc(); - }; - - try - { - sample_sort - (range_initial.first + nptr, - range_initial.last, comp, nthread, range_buffer); - } catch (std::bad_alloc &) - { - destroy_all(); - throw std::bad_alloc(); - }; - - range_buffer = move_forward(range_buffer, range_first); - range_initial = merge_half(range_initial, range_buffer, range_second, comp); -}; // end of constructor - -// -//**************************************************************************** -};// End namespace stable_detail -//**************************************************************************** -// - -//--------------------------------------------------------------------------- -// USING SENTENCES -//--------------------------------------------------------------------------- -namespace bsc = boost::sort::common; -namespace bscu = bsc::util; -namespace bss = boost::sort::spin_detail; -using bsc::range; -using bsc::merge_half; -// -//############################################################################ -// ## -// ## -// P A R A L L E L _ S T A B L E _ S O R T ## -// ## -// ## -//############################################################################ -// -//----------------------------------------------------------------------------- -// function : parallel_stable_sort -/// @brief : parallel stable sort algorithm. -/// -/// @param first : iterator to the first element of the range to sort -/// @param last : iterator after the last element to the range to sort -//----------------------------------------------------------------------------- -template -void parallel_stable_sort(Iter_t first, Iter_t last) -{ - typedef bscu::compare_iter Compare; - stable_detail::parallel_stable_sort(first, last); -}; -// -//----------------------------------------------------------------------------- -// function : parallel_stable_sort -/// @brief parallel stable sort. -/// -/// @param first : iterator to the first element of the range to sort -/// @param last : iterator after the last element to the range to sort -/// @param nthread : Number of threads to use in the process. When this value -/// is lower than 2, the sorting is done with 1 thread -//----------------------------------------------------------------------------- -template -void parallel_stable_sort(Iter_t first, Iter_t last, uint32_t nthread) -{ - typedef bscu::compare_iter Compare; - stable_detail::parallel_stable_sort(first, last, nthread); -}; -// -//----------------------------------------------------------------------------- -// function : parallel_stable_sort -/// @brief : parallel stable sort. -/// -/// @param first : iterator to the first element of the range to sort -/// @param last : iterator after the last element to the range to sort -/// @param comp : object for to compare two elements pointed by Iter_t -/// iterators -//----------------------------------------------------------------------------- -template * = nullptr> -void parallel_stable_sort(Iter_t first, Iter_t last, Compare comp) -{ - stable_detail::parallel_stable_sort(first, last, comp); -}; -// -//**************************************************************************** -};// End namespace parallel -};// End namespace sort -};// End namespace boost -//**************************************************************************** -// -#endif diff --git a/include/boost/sort/parallel/sample_sort.hpp b/include/boost/sort/parallel/sample_sort.hpp deleted file mode 100644 index c901c9e..0000000 --- a/include/boost/sort/parallel/sample_sort.hpp +++ /dev/null @@ -1,563 +0,0 @@ -//---------------------------------------------------------------------------- -/// @file sample_sort.hpp -/// @brief contains the class sample_sort -/// -/// @author Copyright (c) 2016 Francisco Jose Tapia (fjtapia@gmail.com )\n -/// Distributed under the Boost Software License, Version 1.0.\n -/// ( See accompanying file LICENSE_1_0.txt or copy at -/// http://www.boost.org/LICENSE_1_0.txt ) -/// @version 0.1 -/// -/// @remarks -//----------------------------------------------------------------------------- -#ifndef __BOOST_SORT_PARALLEL_DETAIL_SAMPLE_SORT_HPP -#define __BOOST_SORT_PARALLEL_DETAIL_SAMPLE_SORT_HPP - -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -namespace boost -{ -namespace sort -{ -namespace parallel -{ -namespace sample_detail -{ -//--------------------------------------------------------------------------- -// USING SENTENCES -//--------------------------------------------------------------------------- -namespace bsc = boost::sort::common; -namespace bss = boost::sort::spin_detail; -namespace bscu = boost::sort::common::util; -using bsc::range; -using bscu::atomic_add; -using bsc::merge_vector4; -using bsc::uninit_merge_level4; -using bsc::less_ptr_no_null; - -// -///--------------------------------------------------------------------------- -/// @struct sample_sort -/// @brief This a structure for to implement a sample sort, exception -/// safe -/// @tparam -/// @remarks -//---------------------------------------------------------------------------- -template -struct sample_sort -{ - //------------------------------------------------------------------------ - // DEFINITIONS - //------------------------------------------------------------------------ - typedef value_iter value_t; - typedef range range_it; - typedef range range_buf; - typedef sample_sort this_t; - - //------------------------------------------------------------------------ - // VARIABLES AND CONSTANTS - //------------------------------------------------------------------------ - // minimun numbers of elements for to be sortd in parallel mode - static const uint32_t thread_min = (1 << 16); - - // Number of threads to use in the algorithm - // Number of intervals for to do the internal division of the data - uint32_t nthread, ninterval; - - // Bool variables indicating if the auxiliary memory is constructed - // and indicating in the auxiliary memory had been obtained inside the - /// algorithm or had been received as a parameter - bool construct = false, owner = false; - - // Comparison object for to compare two elements - Compare comp; - - // Range with all the elements to sort - range_it global_range; - - // range with the auxiliary memory - range_buf global_buf; - - // vector of futures - std::vector> vfuture; - - // vector of vectors which contains the ranges to merge obtained in the - // subdivision - std::vector> vv_range_it; - - // each vector of ranges of the vv_range_it, need their corresponding buffer - // for to do the merge - std::vector> vv_range_buf; - - // Initial vector of ranges - std::vector vrange_it_ini; - - // Initial vector of buffers - std::vector vrange_buf_ini; - - // atomic counter for to know when are finished the function_t created - // inside a function - std::atomic njob; - - // Indicate if an error in the algorithm for to undo all - bool error; - - //------------------------------------------------------------------------ - // FUNCTIONS OF THE STRUCT - //------------------------------------------------------------------------ - void initial_configuration(void); - - sample_sort (Iter_t first, Iter_t last, Compare cmp, uint32_t num_thread, - value_t *paux, size_t naux); - - sample_sort(Iter_t first, Iter_t last) - : sample_sort (first, last, Compare(), std::thread::hardware_concurrency(), - nullptr, 0) { }; - - sample_sort(Iter_t first, Iter_t last, Compare cmp) - : sample_sort(first, last, cmp, std::thread::hardware_concurrency(), - nullptr, 0) { }; - - sample_sort(Iter_t first, Iter_t last, uint32_t num_thread) - : sample_sort(first, last, Compare(), num_thread, nullptr, 0) { }; - - sample_sort(Iter_t first, Iter_t last, Compare cmp, uint32_t num_thread) - : sample_sort(first, last, cmp, num_thread, nullptr, 0) { }; - - sample_sort(Iter_t first, Iter_t last, Compare cmp, uint32_t num_thread, - range_buf range_buf_initial) - : sample_sort(first, last, cmp, num_thread, - range_buf_initial.first, range_buf_initial.size()) { }; - - void destroy_all(void); - // - //----------------------------------------------------------------------------- - // function :~sample_sort - /// @brief destructor of the class. The utility is to destroy the temporary - /// buffer used in the sorting process - //----------------------------------------------------------------------------- - ~sample_sort(void) { destroy_all(); }; - // - //----------------------------------------------------------------------- - // function : execute first - /// @brief this a function to assign to each thread in the first merge - //----------------------------------------------------------------------- - void execute_first(void) - { - uint32_t job = 0; - while ((job = atomic_add(njob, 1)) < ninterval) - { - uninit_merge_level4(vrange_buf_ini[job], vv_range_it[job], - vv_range_buf[job], comp); - }; - }; - // - //----------------------------------------------------------------------- - // function : execute - /// @brief this is a function to assignt each thread the final merge - //----------------------------------------------------------------------- - void execute(void) - { - uint32_t job = 0; - while ((job = atomic_add(njob, 1)) < ninterval) - { - merge_vector4(vrange_buf_ini[job], vrange_it_ini[job], - vv_range_buf[job], vv_range_it[job], comp); - }; - }; - // - //----------------------------------------------------------------------- - // function : first merge - /// @brief Implement the merge of the initially sparse ranges - //----------------------------------------------------------------------- - void first_merge(void) - { //---------------------------------- begin -------------------------- - njob = 0; - - for (uint32_t i = 0; i < nthread; ++i) - { - vfuture[i] = std::async(std::launch::async, &this_t::execute_first, - this); - }; - for (uint32_t i = 0; i < nthread; ++i) - vfuture[i].get(); - }; - // - //----------------------------------------------------------------------- - // function : final merge - /// @brief Implement the final merge of the ranges - //----------------------------------------------------------------------- - void final_merge(void) - { //---------------------------------- begin -------------------------- - njob = 0; - - for (uint32_t i = 0; i < nthread; ++i) - { - vfuture[i] = std::async(std::launch::async, &this_t::execute, this); - }; - for (uint32_t i = 0; i < nthread; ++i) - vfuture[i].get(); - }; - //---------------------------------------------------------------------------- -}; -// End class sample_sort -//---------------------------------------------------------------------------- -// -//############################################################################ -// ## -// N O N I N L I N E F U N C T I O N S ## -// ## -// ## -//############################################################################ -// -//----------------------------------------------------------------------------- -// function : sample_sort -/// @brief constructor of the class -/// -/// @param first : iterator to the first element of the range to sort -/// @param last : iterator after the last element to the range to sort -/// @param cmp : object for to compare two elements pointed by Iter_t iterators -/// @param num_thread : Number of threads to use in the process. When this value -/// is lower than 2, the sorting is done with 1 thread -/// @param paux : pointer to the auxiliary memory. If nullptr, the memory is -/// created inside the class -/// @param naux : number of elements of the memory pointed by paux -//----------------------------------------------------------------------------- -template -sample_sort -::sample_sort (Iter_t first, Iter_t last, Compare cmp, uint32_t num_thread, - value_t *paux, size_t naux) -: nthread(num_thread), owner(false), comp(cmp), global_range(first, last), - global_buf(nullptr, nullptr), error(false) -{ - assert((last - first) >= 0); - size_t nelem = size_t(last - first); - construct = false; - njob = 0; - vfuture.resize(nthread); - - // Adjust when have many threads and only a few elements - while (nelem > thread_min and (nthread * nthread) > (nelem >> 3)) - { - nthread /= 2; - }; - ninterval = (nthread << 3); - - if (nthread < 2 or nelem <= (thread_min)) - { - bss::spinsort(first, last, comp); - return; - }; - - //------------------- check if sort -------------------------------------- - bool sw = true; - for (Iter_t it1 = first, it2 = first + 1; - it2 != last and (sw = not comp(*it2, *it1)); it1 = it2++); - if (sw) return; - - //------------------- check if reverse sort --------------------------- - sw = true; - for (Iter_t it1 = first, it2 = first + 1; - it2 != last and (sw = comp(*it2, *it1)); it1 = it2++); - if (sw) - { - size_t nelem2 = nelem >> 1; - Iter_t it1 = first, it2 = last - 1; - for (size_t i = 0; i < nelem2; ++i) - std::swap(*(it1++), *(it2--)); - return; - }; - - if (paux != nullptr) - { - assert(naux != 0); - global_buf.first = paux; - global_buf.last = paux + naux; - owner = false; - } - else - { - value_t *ptr = std::get_temporary_buffer(nelem).first; - if (ptr == nullptr) throw std::bad_alloc(); - owner = true; - global_buf = range_buf(ptr, ptr + nelem); - }; - //------------------------------------------------------------------------ - // PROCESS - //------------------------------------------------------------------------ - try - { - initial_configuration(); - } catch (std::bad_alloc &) - { - error = true; - }; - if (not error) - { - first_merge(); - construct = true; - final_merge(); - }; - if (error) - { - destroy_all(); - throw std::bad_alloc(); - }; -} -; -// -//----------------------------------------------------------------------------- -// function : destroy_all -/// @brief destructor of the class. The utility is to destroy the temporary -/// buffer used in the sorting process -//----------------------------------------------------------------------------- -template -void sample_sort::destroy_all(void) -{ - if (construct) - { - destroy(global_buf); - construct = false; - } - if (global_buf.first != nullptr and owner) - std::return_temporary_buffer(global_buf.first); -} -// -//----------------------------------------------------------------------------- -// function : initial_configuration -/// @brief Create the internal data structures, and obtain the inital set of -/// ranges to merge -//----------------------------------------------------------------------------- -template -void sample_sort::initial_configuration(void) -{ - std::vector vmem_thread; - std::vector vbuf_thread; - size_t nelem = global_range.size(); - - //------------------------------------------------------------------------ - size_t cupo = nelem / nthread; - Iter_t it_first = global_range.first; - value_t *buf_first = global_buf.first; - vmem_thread.reserve(nthread + 1); - vbuf_thread.reserve(nthread + 1); - - for (uint32_t i = 0; i < (nthread - 1); ++i, it_first += cupo, buf_first += - cupo) - { - vmem_thread.emplace_back(it_first, it_first + cupo); - vbuf_thread.emplace_back(buf_first, buf_first + cupo); - }; - - vmem_thread.emplace_back(it_first, global_range.last); - vbuf_thread.emplace_back(buf_first, global_buf.last); - - //------------------------------------------------------------------------ - // Sorting of the ranges - //------------------------------------------------------------------------ - std::vector> vfuture(nthread); - - for (uint32_t i = 0; i < nthread; ++i) - { - auto func = [=]() - { - bss::spinsort (vmem_thread[i].first, - vmem_thread[i].last, comp, - vbuf_thread[i]); - }; - vfuture[i] = std::async(std::launch::async, func); - }; - - for (uint32_t i = 0; i < nthread; ++i) - vfuture[i].get(); - - //------------------------------------------------------------------------ - // Obtain the vector of milestones - //------------------------------------------------------------------------ - std::vector vsample; - vsample.reserve(nthread * (ninterval - 1)); - - for (uint32_t i = 0; i < nthread; ++i) - { - size_t distance = vmem_thread[i].size() / ninterval; - for (size_t j = 1, pos = distance; j < ninterval; ++j, pos += distance) - { - vsample.push_back(vmem_thread[i].first + pos); - }; - }; - typedef less_ptr_no_null compare_ptr; - typedef typename std::vector::iterator it_to_it; - - bss::spinsort(vsample.begin(), vsample.end(), - compare_ptr(comp)); - - //------------------------------------------------------------------------ - // Create the final milestone vector - //------------------------------------------------------------------------ - std::vector vmilestone; - vmilestone.reserve(ninterval); - - for (uint32_t pos = nthread >> 1; pos < vsample.size(); pos += nthread) - { - vmilestone.push_back(vsample[pos]); - }; - - //------------------------------------------------------------------------ - // Creation of the first vector of ranges - //------------------------------------------------------------------------ - std::vector>>vv_range_first (nthread); - - for (uint32_t i = 0; i < nthread; ++i) - { - Iter_t itaux = vmem_thread[i].first; - - for (uint32_t k = 0; k < (ninterval - 1); ++k) - { - Iter_t it2 = std::upper_bound(itaux, vmem_thread[i].last, - *vmilestone[k], comp); - - vv_range_first[i].emplace_back(itaux, it2); - itaux = it2; - }; - vv_range_first[i].emplace_back(itaux, vmem_thread[i].last); - }; - - //------------------------------------------------------------------------ - // Copy in buffer and creation of the final matrix of ranges - //------------------------------------------------------------------------ - vv_range_it.resize(ninterval); - vv_range_buf.resize(ninterval); - vrange_it_ini.reserve(ninterval); - vrange_buf_ini.reserve(ninterval); - - for (uint32_t i = 0; i < ninterval; ++i) - { - vv_range_it[i].reserve(nthread); - vv_range_buf[i].reserve(nthread); - }; - - Iter_t it = global_range.first; - value_t *it_buf = global_buf.first; - - for (uint32_t k = 0; k < ninterval; ++k) - { - size_t nelem_interval = 0; - - for (uint32_t i = 0; i < nthread; ++i) - { - size_t nelem_range = vv_range_first[i][k].size(); - if (nelem_range != 0) - { - vv_range_it[k].push_back(vv_range_first[i][k]); - }; - nelem_interval += nelem_range; - }; - - vrange_it_ini.emplace_back(it, it + nelem_interval); - vrange_buf_ini.emplace_back(it_buf, it_buf + nelem_interval); - - it += nelem_interval; - it_buf += nelem_interval; - }; -} -; -// -//**************************************************************************** -} -; -// End namespace sample_detail -//**************************************************************************** -// -namespace bscu = boost::sort::common::util; -// -//############################################################################ -// ## -// ## -// S A M P L E _ S O R T ## -// ## -// ## -//############################################################################ -// -//----------------------------------------------------------------------------- -// function : sample_sort -/// @brief parallel sample sort algorithm (stable sort) -/// -/// @param first : iterator to the first element of the range to sort -/// @param last : iterator after the last element to the range to sort -//----------------------------------------------------------------------------- -template -void sample_sort(Iter_t first, Iter_t last) -{ - typedef compare_iter Compare; - sample_detail::sample_sort(first, last); -}; -// -//----------------------------------------------------------------------------- -// function : sample_sort -/// @brief parallel sample sort algorithm (stable sort) -/// -/// @param first : iterator to the first element of the range to sort -/// @param last : iterator after the last element to the range to sort -/// @param nthread : Number of threads to use in the process. When this value -/// is lower than 2, the sorting is done with 1 thread -//----------------------------------------------------------------------------- -template -void sample_sort(Iter_t first, Iter_t last, uint32_t nthread) -{ - typedef compare_iter Compare; - sample_detail::sample_sort(first, last, nthread); -}; -// -//----------------------------------------------------------------------------- -// function : sample_sort -/// @brief parallel sample sort algorithm (stable sort) -/// -/// @param first : iterator to the first element of the range to sort -/// @param last : iterator after the last element to the range to sort -/// @param comp : object for to compare two elements pointed by Iter_t -/// iterators -//----------------------------------------------------------------------------- -template * = - nullptr> -void sample_sort(Iter_t first, Iter_t last, Compare comp) -{ - sample_detail::sample_sort(first, last, comp); -}; -// -//----------------------------------------------------------------------------- -// function : sample_sort -/// @brief parallel sample sort algorithm (stable sort) -/// -/// @param first : iterator to the first element of the range to sort -/// @param last : iterator after the last element to the range to sort -/// @param comp : object for to compare two elements pointed by Iter_t -/// iterators -/// @param nthread : Number of threads to use in the process. When this value -/// is lower than 2, the sorting is done with 1 thread -//----------------------------------------------------------------------------- -template -void sample_sort(Iter_t first, Iter_t last, Compare comp, uint32_t nthread) -{ - sample_detail::sample_sort(first, last, comp, nthread); -}; -// -//**************************************************************************** -};// End namespace parallel -};// End namespace sort -};// End namespace boost -//**************************************************************************** -// -#endif diff --git a/include/boost/sort/parallel/sort.hpp b/include/boost/sort/parallel/sort.hpp deleted file mode 100644 index 303ed9b..0000000 --- a/include/boost/sort/parallel/sort.hpp +++ /dev/null @@ -1,20 +0,0 @@ -//---------------------------------------------------------------------------- -/// @file sort.hpp -/// @brief This file contains the sort functions of the sort library -/// -/// @author Copyright (c) 2016 Francisco Jose Tapia (fjtapia@gmail.com )\n -/// Distributed under the Boost Software License, Version 1.0.\n -/// ( See accompanying file LICENSE_1_0.txt or copy at -/// http://www.boost.org/LICENSE_1_0.txt ) -/// @version 0.1 -/// -/// @remarks -//----------------------------------------------------------------------------- -#ifndef __BOOST_SORT_PARALLEL_SORT_HPP -#define __BOOST_SORT_PARALLEL_SORT_HPP - -#include -#include -#include -// -#endif diff --git a/test/Jamfile.v2 b/test/Jamfile.v2 index 32c1815..c348fc2 100644 --- a/test/Jamfile.v2 +++ b/test/Jamfile.v2 @@ -19,7 +19,20 @@ import testing ; : : : : string_sort ] [ run sort_detail_test.cpp : : : : sort_detail ] - - + + [ run test_flat_stable_sort.cpp + : : : speed : test_flat_stable_sort ] + [ run test_spinsort.cpp + : : : speed : test_spinsort ] + [ run test_insert_sort.cpp + : : : speed : test_insert_sort ] + + + [ run test_block_indirect_sort.cpp + : : : speed multi : test_block_indirect_sort ] + [ run test_sample_sort.cpp + : : : speed multi : test_sample_sort ] + [ run test_parallel_stable_sort.cpp + : : : speed multi : test_parallel_stable_sort ] ; } diff --git a/test/test_block_indirect_sort.cpp b/test/test_block_indirect_sort.cpp new file mode 100644 index 0000000..7ff043d --- /dev/null +++ b/test/test_block_indirect_sort.cpp @@ -0,0 +1,384 @@ +//---------------------------------------------------------------------------- +/// @file test_block_indirect_sort.cpp +/// @brief Test program of the block_indirect_sort algorithm +/// +/// @author Copyright (c) 2016 Francisco Jose Tapia (fjtapia@gmail.com )\n +/// Distributed under the Boost Software License, Version 1.0.\n +/// ( See accompanying file LICENSE_1_0.txt or copy at +/// http://www.boost.org/LICENSE_1_0.txt ) +/// @version 0.1 +/// +/// @remarks +//----------------------------------------------------------------------------- +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace bsc = boost::sort::common; +namespace bsp = boost::sort; +using boost::sort::block_indirect_sort; +using bsc::range; + + +void test1 (void) +{ + typedef std::less< uint64_t > compare; + + const uint32_t NElem = 500000; + std::vector< uint64_t > V1; + std::mt19937_64 my_rand (0); + compare comp; + + for (uint32_t i = 0; i < NElem; ++i) V1.push_back (my_rand ( ) % NElem); + block_indirect_sort (V1.begin ( ), V1.end ( ), comp, 2); + + for (unsigned i = 1; i < NElem; i++) { + BOOST_CHECK (V1[ i - 1 ] <= V1[ i ]); + }; + + V1.clear ( ); + for (uint32_t i = 0; i < NElem; ++i) V1.push_back (i); + + block_indirect_sort ( V1.begin ( ), V1.end ( ), comp, 2); + for (unsigned i = 1; i < NElem; i++) { + BOOST_CHECK (V1[ i - 1 ] <= V1[ i ]); + }; + + V1.clear ( ); + for (uint32_t i = 0; i < NElem; ++i) V1.push_back (NElem - i); + + block_indirect_sort ( V1.begin ( ), V1.end ( ), comp, 2); + for (unsigned i = 1; i < NElem; i++) { + BOOST_CHECK (V1[ i - 1 ] <= V1[ i ]); + }; + + V1.clear ( ); + for (uint32_t i = 0; i < NElem; ++i) V1.push_back (1000); + + block_indirect_sort (V1.begin ( ), V1.end ( ), comp, 2); + for (unsigned i = 1; i < NElem; i++) { + BOOST_CHECK (V1[ i - 1 ] == V1[ i ]); + }; +}; + +void test2 (void) +{ + std::less< uint64_t > comp; + std::vector< uint64_t > V; + + for (uint32_t i = 0; i < 2083333; ++i) V.push_back (i); + + block_indirect_sort ( V.begin ( ), V.end ( ),comp, 8); + for (uint32_t i = 0; i < V.size ( ); ++i) { + BOOST_CHECK (V[ i ] == i); + }; +}; + +void test3 (void) +{ + typedef typename std::vector< uint64_t >::iterator iter_t; + typedef range< iter_t > range_it; + + const uint32_t NELEM = 416667; + std::vector< uint64_t > A; + std::less< uint64_t > comp; + + for (uint32_t i = 0; i < 1000; ++i) A.push_back (0); + for (uint32_t i = 0; i < NELEM; ++i) A.push_back (NELEM - i); + for (uint32_t i = 0; i < 1000; ++i) A.push_back (0); + + range_it R1 (A.begin ( ) + 1000, A.begin ( ) + (1000 + NELEM)); + block_indirect_sort ( A.begin () + 1000, + A.begin () + (1000 + NELEM), comp, 8); + for (iter_t it = A.begin ( ) + 1000; it != A.begin ( ) + (1000 + NELEM); + ++it) + { + BOOST_CHECK ((*(it - 1)) <= (*it)); + }; + BOOST_CHECK (A[ 998 ] == 0 and A[ 999 ] == 0 and A[ 1000 + NELEM ] == 0 and + A[ 1001 + NELEM ] == 0); + + + A.clear ( ); + for (uint32_t i = 0; i < 1000; ++i) A.push_back (999999999); + for (uint32_t i = 0; i < NELEM; ++i) A.push_back (NELEM - i); + for (uint32_t i = 0; i < 1000; ++i) A.push_back (999999999); + + R1 = range_it (A.begin ( ) + 1000, A.begin ( ) + (1000 + NELEM)); + block_indirect_sort ( A.begin ( ) + 1000, + A.begin ( ) + (1000 + NELEM), comp, 4); + + for (iter_t it = A.begin ( ) + 1001; it != A.begin ( ) + (1000 + NELEM); + ++it) + { + BOOST_CHECK ((*(it - 1)) <= (*it)); + }; + BOOST_CHECK (A[ 998 ] == 999999999 and A[ 999 ] == 999999999 and + A[ 1000 + NELEM ] == 999999999 and + A[ 1001 + NELEM ] == 999999999); +}; + +void test4 (void) +{ + typedef std::less< uint32_t > compare; + + const uint32_t NElem = 1000000; + std::vector< uint32_t > V1, V2, V3; + V1.reserve ( NElem ) ; + std::mt19937 my_rand (0); + + for (uint32_t i = 0; i < NElem; ++i) V1.push_back (my_rand ( )); + + V2 = V1; + V3 = V1; + std::sort (V2.begin ( ), V2.end ( )); + + V1 = V3; + block_indirect_sort ( V1.begin ( ), V1.end ( ), compare(), 2); + for (unsigned i = 0; i < V1.size(); i++) + { + BOOST_CHECK (V1[ i ] == V2[ i ]); + }; + + V1 = V3; + block_indirect_sort (V1.begin ( ), V1.end ( ), compare(), 4); + for (unsigned i = 0; i < V1.size(); i++) + { BOOST_CHECK (V1[ i ] == V2[ i ]); + }; + + V1 = V3; + + block_indirect_sort (V1.begin ( ), V1.end ( ), compare(), 8); + + for (unsigned i = 0; i < V1.size(); i++) + { BOOST_CHECK (V1[ i ] == V2[ i ]); + }; + + V1 = V3; + block_indirect_sort ( V1.begin ( ), V1.end ( ), compare(), 16); + for (unsigned i = 0; i < V1.size(); i++) + { BOOST_CHECK (V1[ i ] == V2[ i ]); + }; + + V1 = V3; + block_indirect_sort ( V1.begin ( ), V1.end ( ), compare(), 100); + for (unsigned i = 1; i < V1.size(); i++) + { BOOST_CHECK (V1[ i ] == V2[ i ]); + }; +}; + +template +struct int_array +{ + uint64_t M[NN]; + + int_array(uint64_t number = 0) + { + for (uint32_t i = 0; i < NN; ++i) + M[i] = number; + } + + bool operator<(const int_array &A) const + { + return M[0] < A.M[0]; + } +}; + +void test5(void) +{ + namespace bspd = boost::sort::blk_detail; + BOOST_CHECK(bspd::block_size<0>::data == 4096); + + BOOST_CHECK(bspd::block_size<1>::data == 4096); + + BOOST_CHECK(bspd::block_size<2>::data == 4096); + + BOOST_CHECK(bspd::block_size<3>::data == 4096); + BOOST_CHECK(bspd::block_size<4>::data == 4096); + + BOOST_CHECK(bspd::block_size<5>::data == 4096); + BOOST_CHECK(bspd::block_size<6>::data == 4096); + BOOST_CHECK(bspd::block_size<7>::data == 4096); + BOOST_CHECK(bspd::block_size<8>::data == 4096); + + BOOST_CHECK(bspd::block_size<9>::data == 2048); + BOOST_CHECK(bspd::block_size<12>::data == 2048); + BOOST_CHECK(bspd::block_size<15>::data == 2048); + BOOST_CHECK(bspd::block_size<16>::data == 2048); + + BOOST_CHECK(bspd::block_size<17>::data == 1024); + BOOST_CHECK(bspd::block_size<24>::data == 1024); + BOOST_CHECK(bspd::block_size<31>::data == 1024); + BOOST_CHECK(bspd::block_size<32>::data == 1024); + + BOOST_CHECK(bspd::block_size<33>::data == 768); + BOOST_CHECK(bspd::block_size<50>::data == 768); + BOOST_CHECK(bspd::block_size<63>::data == 768); + BOOST_CHECK(bspd::block_size<64>::data == 768); + + BOOST_CHECK(bspd::block_size<65>::data == 512); + BOOST_CHECK(bspd::block_size<100>::data == 512); + BOOST_CHECK(bspd::block_size<127>::data == 512); + BOOST_CHECK(bspd::block_size<128>::data == 512); + + BOOST_CHECK(bspd::block_size<129>::data == 256); + BOOST_CHECK(bspd::block_size<200>::data == 256); + BOOST_CHECK(bspd::block_size<255>::data == 256); + BOOST_CHECK(bspd::block_size<256>::data == 256); + + BOOST_CHECK(bspd::block_size<257>::data == 128); + BOOST_CHECK(bspd::block_size<400>::data == 128); + BOOST_CHECK(bspd::block_size<511>::data == 128); + BOOST_CHECK(bspd::block_size<512>::data == 128); + BOOST_CHECK(bspd::block_size<513>::data == 128); + BOOST_CHECK(bspd::block_size<600>::data == 128); +}; + +void test6() +{ + std::less cmp64; + std::less cmp32; + std::less cmp16; + std::less cmp8; + + std::mt19937_64 my_rand(0); + + const uint32_t NELEM = (1 << 20); + std::vector V1, V2; + V1.reserve(NELEM); + V2.reserve(NELEM); + + for (uint32_t i = 0; i < NELEM; ++i) + V1.push_back(my_rand()); + V2 = V1; + + uint64_t *p64 = &V1[0]; + uint32_t *p32 = reinterpret_cast(p64); + uint16_t *p16 = reinterpret_cast(p64); + uint8_t *p8 = reinterpret_cast(p64); + + V1 = V2; + bsp::block_indirect_sort(p64, p64 + NELEM, cmp64, 8); + for (unsigned i = 1; i < NELEM; i++) + { + BOOST_CHECK(p64[i - 1] <= p64[i]); + }; + + V1 = V2; + bsp::block_indirect_sort(p32, p32 + (NELEM << 1), cmp32, 8); + for (unsigned i = 1; i < (NELEM << 1); i++) + { + BOOST_CHECK(p32[i - 1] <= p32[i]); + }; + + V1 = V2; + bsp::block_indirect_sort(p16, p16 + (NELEM << 2), cmp16, 8); + for (unsigned i = 1; i < (NELEM << 2); i++) + { + BOOST_CHECK(p16[i - 1] <= p16[i]); + }; + + V1 = V2; + bsp::block_indirect_sort(p8, p8 + (NELEM << 3), cmp8, 8); + for (unsigned i = 1; i < (NELEM << 3); i++) + { + BOOST_CHECK(p8[i - 1] <= p8[i]); + }; +}; + +template +void test_int_array(uint32_t NELEM) +{ + typedef std::less compare; + std::mt19937_64 my_rand(0); + + std::vector V1; + V1.reserve(NELEM); + for (uint32_t i = 0; i < NELEM; ++i) + V1.emplace_back(my_rand()); + + bsp::block_indirect_sort(V1.begin(), V1.end(), compare()); + for (unsigned i = 1; i < NELEM; i++) + { + BOOST_CHECK(not (V1[i] < V1[i - 1])); + }; +} + +void test7() +{ + test_int_array >(1u << 20); + test_int_array >(1u << 19); + test_int_array >(1u << 18); + test_int_array >(1u << 17); + test_int_array >(1u << 17); + test_int_array >(1u << 17); + test_int_array >(1u << 17); + test_int_array >(1u << 17); +} +void test8() +{ + std::mt19937_64 my_rand(0); + const uint32_t NELEM = 1 << 20; + const uint32_t NString = 100000; + std::vector V1; + V1.reserve(NELEM); + for (uint32_t i = 0; i < NELEM; ++i) + V1.push_back(my_rand()); + + uint64_t *p64 = &V1[0]; + char *pchar = reinterpret_cast(p64); + + std::string sinput(pchar, (NELEM << 3)); + + std::istringstream strm_input(sinput); + std::string inval; + std::vector V; + V.reserve(NString); + strm_input.seekg(0, std::ios_base::beg); + + strm_input.seekg(0, std::ios_base::beg); + + for (size_t i = 0; i < NString; ++i) + { + if (!strm_input.eof()) + { + strm_input >> inval; + V.push_back(inval); + inval.clear(); + } + else + { + throw std::ios_base::failure("Insuficient lenght of the file\n"); + }; + }; + + typedef std::less compare; + bsp::block_indirect_sort(V.begin(), V.end(), compare()); + for (unsigned i = 1; i < NString; i++) + { + BOOST_CHECK(not (V[i] < V[i - 1])); + }; + +} + +int test_main (int, char *[]) +{ + test1 ( ); + test2 ( ); + test3 ( ); + test4 ( ); + test5 ( ); + test6 ( ); + test7 ( ); + test8 ( ); + + return 0; +}; diff --git a/test/test_flat_stable_sort.cpp b/test/test_flat_stable_sort.cpp new file mode 100644 index 0000000..8e2d7b9 --- /dev/null +++ b/test/test_flat_stable_sort.cpp @@ -0,0 +1,269 @@ +//---------------------------------------------------------------------------- +/// @file test_flat_stable_sort.cpp +/// @brief test program of the flat_stable_sort algorithm +/// +/// @author Copyright (c) 2017 Francisco José Tapia (fjtapia@gmail.com )\n +/// Distributed under the Boost Software License, Version 1.0.\n +/// ( See accompanying file LICENSE_1_0.txt or copy at +/// http://www.boost.org/LICENSE_1_0.txt ) +/// @version 0.1 +/// +/// @remarks +//----------------------------------------------------------------------------- +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace boost::sort; + +void test2 ( ); +void test3 ( ); +void test4 ( ); +void test5 ( ); +void test6 ( ); +void test7 ( ); + +struct xk +{ + unsigned tail : 4; + unsigned num : 28; + xk ( uint32_t n =0 , uint32_t t =0): tail (t), num(n){}; + bool operator< (xk A) const { return (num < A.num); }; +}; + +void test2 ( ) +{ + uint64_t V1[ 300 ]; + typedef std::less< uint64_t > compare_t; + compare_t comp; + + for (uint32_t i = 0; i < 200; ++i) V1[ i ] = i; + + indirect_flat_stable_sort (&V1[ 0 ], &V1[ 200 ], comp); + for (unsigned i = 1; i < 200; i++) { + BOOST_CHECK (V1[ i - 1 ] <= V1[ i ]); + }; + + for (uint32_t i = 0; i < 200; ++i) V1[ i ] = 199 - i; + flat_stable_sort (&V1[ 0 ], &V1[ 200 ], comp); + for (unsigned i = 1; i < 200; i++) { + BOOST_CHECK (V1[ i - 1 ] <= V1[ i ]); + }; + + for (uint32_t i = 0; i < 300; ++i) V1[ i ] = 299 - i; + + flat_stable_sort (&V1[ 0 ], &V1[ 300 ], comp); + for (unsigned i = 1; i < 300; i++) { + BOOST_CHECK (V1[ i - 1 ] <= V1[ i ]); + }; + + for (uint32_t i = 0; i < 300; ++i) V1[ i ] = 88; + + flat_stable_sort (&V1[ 0 ], &V1[ 300 ], comp); + for (unsigned i = 1; i < 300; i++) { + BOOST_CHECK (V1[ i - 1 ] <= V1[ i ]); + }; +}; + +void test3 ( ) +{ + typedef std::less< xk > compare_t; + std::mt19937_64 my_rand (0); + + const uint32_t NMAX = 500000; + + + std::vector< xk > V1, V2, V3; + V1.reserve (NMAX); + for (uint32_t i = 0; i < 8; ++i) { + for (uint32_t k = 0; k < NMAX; ++k) { + uint32_t NM = my_rand ( ); + xk G; + G.num = NM >> 3; + G.tail = i; + V1.push_back (G); + }; + }; + V3 = V2 = V1; + flat_stable_sort (V1.begin ( ), V1.end ( ), compare_t ( )); + std::stable_sort (V2.begin ( ), V2.end ( )); + + BOOST_CHECK (V1.size ( ) == V2.size ( )); + for (uint32_t i = 0; i < V1.size ( ); ++i) { + BOOST_CHECK (V1[ i ].num == V2[ i ].num and + V1[ i ].tail == V2[ i ].tail); + }; +}; + +void test4 (void) +{ + typedef std::less< uint64_t > compare_t; + const uint32_t NElem = 500000; + std::vector< uint64_t > V1; + std::mt19937_64 my_rand (0); + compare_t comp; + + for (uint32_t i = 0; i < NElem; ++i) V1.push_back (my_rand ( ) % NElem); + + flat_stable_sort (V1.begin ( ), V1.end ( ), comp); + for (unsigned i = 1; i < NElem; i++) { + BOOST_CHECK (V1[ i - 1 ] <= V1[ i ]); + }; + + V1.clear ( ); + for (uint32_t i = 0; i < NElem; ++i) V1.push_back (i); + flat_stable_sort (V1.begin ( ), V1.end ( ), comp); + for (unsigned i = 1; i < NElem; i++) { + BOOST_CHECK (V1[ i - 1 ] <= V1[ i ]); + }; + + V1.clear ( ); + for (uint32_t i = 0; i < NElem; ++i) V1.push_back (NElem - i); + flat_stable_sort (V1.begin ( ), V1.end ( ), comp); + for (unsigned i = 1; i < NElem; i++) { + BOOST_CHECK (V1[ i - 1 ] <= V1[ i ]); + }; + + V1.clear ( ); + for (uint32_t i = 0; i < NElem; ++i) V1.push_back (1000); + flat_stable_sort (V1.begin ( ), V1.end ( ), comp); + for (unsigned i = 1; i < NElem; i++) { + BOOST_CHECK (V1[ i - 1 ] == V1[ i ]); + }; +}; + +void test5 (void) +{ + typedef std::less< uint64_t > compare; + + const uint32_t KMax = 500000; + + std::vector< uint64_t > K, M; + std::mt19937_64 my_rand (0); + compare comp; + + for (uint32_t i = 0; i < KMax; ++i) K.push_back (my_rand ( )); + M = K; + + flat_stable_sort (K.begin ( ), K.end ( ), comp); + std::stable_sort (M.begin ( ), M.end ( ), comp); + for (unsigned i = 0; i < KMax; i++) BOOST_CHECK (M[ i ] == K[ i ]); +}; + +void test6 (void) +{ + typedef std::less< uint64_t > compare_t; + std::vector< uint64_t > V; + + for (uint32_t i = 0; i < 2083333; ++i) V.push_back (i); + flat_stable_sort(V.begin ( ), V.end ( ), compare_t ( )); + for (uint32_t i = 0; i < V.size ( ); ++i) { + BOOST_CHECK (V[ i ] == i); + }; +}; + +void test7 (void) +{ + typedef typename std::vector< uint64_t >::iterator iter_t; + typedef std::less< uint64_t > compare_t; + + compare_t comp; + const uint32_t NELEM = 416667; + const uint32_t N1 = (NELEM + 1) / 2; + std::vector< uint64_t > A; + + for (uint32_t i = 0; i < 1000; ++i) A.push_back (0); + for (uint32_t i = 0; i < NELEM; ++i) A.push_back (NELEM - i); + for (uint32_t i = 0; i < 1000; ++i) A.push_back (0); + + flat_stable_sort (A.begin ( ) + 1000, + A.begin ( ) + (1000 + NELEM), comp); + + for (iter_t it = A.begin ( ) + 1000; it != A.begin ( ) + (1000 + NELEM); + ++it) + { + BOOST_CHECK ((*(it - 1)) <= (*it)); + }; + BOOST_CHECK (A[ 998 ] == 0 and A[ 999 ] == 0 and A[ 1000 + NELEM ] == 0 and + A[ 1001 + NELEM ] == 0); + + + A.clear ( ); + for (uint32_t i = 0; i < 1000; ++i) A.push_back (999999999); + for (uint32_t i = 0; i < NELEM; ++i) A.push_back (NELEM - i); + for (uint32_t i = 0; i < 1000; ++i) A.push_back (999999999); + + flat_stable_sort (A.begin ( ) + 1000, + A.begin ( ) + (1000 + NELEM), comp); + + for (iter_t it = A.begin ( ) + 1001; it != A.begin ( ) + (1000 + NELEM); + ++it) + { + BOOST_CHECK ((*(it - 1)) <= (*it)); + }; + BOOST_CHECK (A[ 998 ] == 999999999 and A[ 999 ] == 999999999 and + A[ 1000 + NELEM ] == 999999999 and + A[ 1001 + NELEM ] == 999999999); + + std::vector< uint64_t > B (N1 + 2000, 0); + + A.clear ( ); + for (uint32_t i = 0; i < NELEM; ++i) A.push_back (NELEM - i); + + flat_stable_sort (A.begin ( ), A.end ( ), comp); + for (iter_t it = A.begin ( ) + 1; it != A.end ( ); ++it) { + if ((*(it - 1)) > (*it)) std::cout << "error 1\n"; + }; + BOOST_CHECK (B[ 998 ] == 0 and B[ 999 ] == 0 and B[ 1000 + N1 ] == 0 and + B[ 1001 + N1 ] == 0); + + for (uint32_t i = 0; i < B.size ( ); ++i) B[ i ] = 999999999; + A.clear ( ); + for (uint32_t i = 0; i < NELEM; ++i) A.push_back (NELEM - i); + flat_stable_sort (A.begin ( ), A.end ( ), comp); + + for (iter_t it = A.begin ( ) + 1; it != A.end ( ); ++it) { + BOOST_CHECK ((*(it - 1)) <= (*it)); + }; + BOOST_CHECK (B[ 998 ] == 999999999 and B[ 999 ] == 999999999 and + B[ 1000 + N1 ] == 999999999 and B[ 1001 + N1 ] == 999999999); +}; +void test8 (void) +{ + typedef typename std::vector::iterator iter_t; + typedef std::less compare_t; + std::mt19937 my_rand (0); + std::vector V ; + const uint32_t NELEM = 1000000; + V.reserve(NELEM * 10); + + + for (uint32_t k =0 ; k < 10 ; ++k) + { for ( uint32_t i =0 ; i < NELEM ; ++i) + { V.emplace_back(i , k); + }; + iter_t first = V.begin() + (k * NELEM); + iter_t last = first + NELEM ; + std::shuffle( first, last, my_rand); + }; + flat_stable_sort( V.begin() , V.end(), compare_t()); + for ( uint32_t i =0 ; i < ( NELEM * 10); ++i) + { BOOST_CHECK ( V[i].num == (i / 10) and V[i].tail == (i %10) ); + }; +} +int test_main (int, char *[]) +{ + test2 ( ); + test3 ( ); + test4 ( ); + test5 ( ); + test6 ( ); + test7 ( ); + test8 ( ); + return 0; +}; diff --git a/test/test_insert_sort.cpp b/test/test_insert_sort.cpp new file mode 100644 index 0000000..c2b93db --- /dev/null +++ b/test/test_insert_sort.cpp @@ -0,0 +1,163 @@ +//---------------------------------------------------------------------------- +/// @file test_insert_sort.cpp +/// @brief Test program of the insert_sort algorithm +/// +/// @author Copyright (c) 2016 Francisco Jose Tapia (fjtapia@gmail.com )\n +/// Distributed under the Boost Software License, Version 1.0.\n +/// ( See accompanying file LICENSE_1_0.txt or copy at +/// http://www.boost.org/LICENSE_1_0.txt ) +/// @version 0.1 +/// +/// @remarks +//----------------------------------------------------------------------------- +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +using namespace boost::sort; +using namespace std; +using boost::sort::common::util::insert_sorted; + +void test01 (void) +{ + unsigned A[] = {7, 4, 23, 15, 17, 2, 24, 13, 8, 3, 11, + 16, 6, 14, 21, 5, 1, 12, 19, 22, 25, 8}; + + std::less< unsigned > comp; + // Insertion Sort Unordered, not repeated + insert_sort (&A[ 0 ], &A[ 22 ], comp); + for (unsigned i = 0; i < 21; i++) { + BOOST_CHECK (A[ i ] <= A[ i + 1 ]); + }; + + unsigned B[] = {1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, + 13, 14, 15, 17, 18, 19, 20, 21, 23, 24, 25}; + // Insertion Sort Ordered, not repeated + insert_sort (&B[ 0 ], &B[ 22 ], comp); + for (unsigned i = 0; i < 21; i++) { + BOOST_CHECK (B[ i ] <= B[ i + 1 ]); + }; + + unsigned C[] = {27, 26, 25, 23, 22, 21, 19, 18, 17, 16, 15, + 14, 13, 11, 10, 9, 8, 7, 6, 5, 3, 2}; + // Insertion Sort reverse sorted, not repeated + insert_sort (&C[ 0 ], &C[ 22 ], comp); + for (unsigned i = 0; i < 21; i++) { + BOOST_CHECK (C[ i ] <= C[ i + 1 ]); + }; + + unsigned D[] = {4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4}; + // Insertion Sort equal elements + insert_sort (&D[ 0 ], &D[ 22 ], comp); + for (unsigned i = 0; i < 21; i++) { + BOOST_CHECK (D[ i ] <= D[ i + 1 ]); + }; + + // Insertion Sort 100 random elements + unsigned F[ 100 ]; + for (unsigned i = 0; i < 100; i++) F[ i ] = rand ( ) % 1000; + insert_sort (&F[ 0 ], &F[ 100 ], comp); + for (unsigned i = 0; i < 99; i++) { + BOOST_CHECK (F[ i ] <= F[ i + 1 ]); + }; + + const unsigned NG = 10000; + // Insertion Sort "<::iterator iter_t; + const uint32_t NELEM = 6667; + std::vector< uint64_t > A; + std::less< uint64_t > comp; + + for (uint32_t i = 0; i < 1000; ++i) A.push_back (0); + for (uint32_t i = 0; i < NELEM; ++i) A.push_back (NELEM - i); + for (uint32_t i = 0; i < 1000; ++i) A.push_back (0); + + insert_sort (A.begin ( ) + 1000, A.begin ( ) + (1000 + NELEM), comp); + + for (iter_t it = A.begin ( ) + 1000; it != A.begin ( ) + (1000 + NELEM); + ++it) + { + BOOST_CHECK ((*(it - 1)) <= (*it)); + }; + BOOST_CHECK (A[ 998 ] == 0 and A[ 999 ] == 0 and A[ 1000 + NELEM ] == 0 and + A[ 1001 + NELEM ] == 0); + + + A.clear ( ); + for (uint32_t i = 0; i < 1000; ++i) A.push_back (999999999); + for (uint32_t i = 0; i < NELEM; ++i) A.push_back (NELEM - i); + for (uint32_t i = 0; i < 1000; ++i) A.push_back (999999999); + + insert_sort (A.begin ( ) + 1000, A.begin ( ) + (1000 + NELEM), comp); + + for (iter_t it = A.begin ( ) + 1001; it != A.begin ( ) + (1000 + NELEM); + ++it) + { + BOOST_CHECK ((*(it - 1)) <= (*it)); + }; + BOOST_CHECK (A[ 998 ] == 999999999 and A[ 999 ] == 999999999 and + A[ 1000 + NELEM ] == 999999999 and + A[ 1001 + NELEM ] == 999999999); +}; + + +void test03 ( void) +{ + std::vector V {1,3,5,2,4}; + std::less comp ; + uint32_t aux[10] ; + + insert_sorted ( V.begin() , V.begin()+3, V.end(), comp, aux); + //insert_partial_sort ( V.begin() , V.begin()+3, V.end() , comp); + for ( uint32_t i =0 ; i < V.size() ; ++i) + std::cout< +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace std; +namespace bsort = boost::sort::stable_detail; + +typedef typename std::vector::iterator iter_t; + +std::mt19937_64 my_rand(0); + +struct xk +{ + unsigned tail : 4; + unsigned num : 28; + xk ( uint32_t n =0 , uint32_t t =0): tail (t), num(n){}; + bool operator< (xk A) const { return (num < A.num); }; +}; + +void test3() +{ + typedef typename std::vector::iterator iter_t; + typedef std::less compare; + + const uint32_t NMAX = 500000; + std::vector V1, V2, V3; + V1.reserve(NMAX); + + for (uint32_t i = 0; i < 8; ++i) + { + for (uint32_t k = 0; k < NMAX; ++k) + { + uint32_t NM = my_rand(); + xk G; + G.num = NM >> 3; + G.tail = i; + V1.push_back(G); + }; + }; + V3 = V2 = V1; + + bsort::parallel_stable_sort(V1.begin(), V1.end()); + std::stable_sort(V2.begin(), V2.end()); + bsort::parallel_stable_sort(V3.begin(), V3.end(), 0); + + BOOST_CHECK(V1.size() == V2.size()); + for (uint32_t i = 0; i < V1.size(); ++i) + { + BOOST_CHECK(V1[i].num == V2[i].num and V1[i].tail == V2[i].tail); + }; + + BOOST_CHECK(V3.size() == V2.size()); + for (uint32_t i = 0; i < V3.size(); ++i) + { + BOOST_CHECK(V3[i].num == V2[i].num and V3[i].tail == V2[i].tail); + }; +}; + +void test4(void) +{ + typedef typename std::vector::iterator iter_t; + typedef std::less compare; + + const uint32_t NElem = 500000; + std::vector V1; + std::mt19937_64 my_rand(0); + + for (uint32_t i = 0; i < NElem; ++i) + V1.push_back(my_rand() % NElem); + + // parallel_stable_sort unsorted + bsort::parallel_stable_sort(V1.begin(), V1.end()); + for (unsigned i = 1; i < NElem; i++) + { + BOOST_CHECK(V1[i - 1] <= V1[i]); + }; + + V1.clear(); + for (uint32_t i = 0; i < NElem; ++i) + V1.push_back(i); + + // parallel_stable_sort sorted + bsort::parallel_stable_sort(V1.begin(), V1.end()); + for (unsigned i = 1; i < NElem; i++) + { + BOOST_CHECK(V1[i - 1] <= V1[i]); + }; + + V1.clear(); + for (uint32_t i = 0; i < NElem; ++i) + V1.push_back(NElem - i); + + // parallel_stable_sort reverse sorted + bsort::parallel_stable_sort(V1.begin(), V1.end()); + for (unsigned i = 1; i < NElem; i++) + { + BOOST_CHECK(V1[i - 1] <= V1[i]); + }; + + V1.clear(); + for (uint32_t i = 0; i < NElem; ++i) + V1.push_back(1000); + // parallel_stable_sort equals + bsort::parallel_stable_sort(V1.begin(), V1.end()); + for (unsigned i = 1; i < NElem; i++) + { + BOOST_CHECK(V1[i - 1] == V1[i]); + }; +}; + +void test5(void) +{ + typedef typename std::vector::iterator iter_t; + typedef std::less compare; + + const uint32_t NELEM = 500000; + std::vector A, B; + A.reserve(NELEM); + + for (unsigned i = 0; i < NELEM; i++) + A.push_back(my_rand()); + B = A; + + bsort::parallel_stable_sort(A.begin(), A.end()); + for (unsigned i = 0; i < (NELEM - 1); i++) + { + BOOST_CHECK(A[i] <= A[i + 1]); + }; + std::stable_sort(B.begin(), B.end()); + BOOST_CHECK(A.size() == B.size()); + + for (uint32_t i = 0; i < A.size(); ++i) + BOOST_CHECK(A[i] == B[i]); +}; + +void test6(void) +{ + typedef typename std::vector::iterator iter_t; + typedef std::less compare; + + const uint32_t NELEM = 500000; + std::vector A; + A.reserve(NELEM); + + for (unsigned i = 0; i < NELEM; i++) + A.push_back(NELEM - i); + + bsort::parallel_stable_sort(A.begin(), A.end()); + for (unsigned i = 1; i < NELEM; i++) + { + BOOST_CHECK(A[i - 1] <= A[i]); + }; +}; +void test7(void) +{ + typedef typename std::vector::iterator iter_t; + typedef std::less compare; + + const uint32_t NELEM = 132000; + std::vector A, B; + A.reserve(NELEM); + + for (unsigned i = 0; i < NELEM; i++) + A.push_back(my_rand()); + B = A; + + bsort::parallel_stable_sort(A.begin(), A.end(), 200); + for (unsigned i = 0; i < (NELEM - 1); i++) + { + BOOST_CHECK(A[i] <= A[i + 1]); + }; + std::stable_sort(B.begin(), B.end()); + BOOST_CHECK(A.size() == B.size()); + + for (uint32_t i = 0; i < A.size(); ++i) + BOOST_CHECK(A[i] == B[i]); +}; +void test8 (void) +{ + typedef typename std::vector::iterator iter_t; + typedef std::less compare_t; + std::mt19937 my_rand (0); + std::vector V ; + const uint32_t NELEM = 500000; + V.reserve(NELEM * 10); + + + for (uint32_t k =0 ; k < 10 ; ++k) + { for ( uint32_t i =0 ; i < NELEM ; ++i) + { V.emplace_back(i , k); + }; + iter_t first = V.begin() + (k * NELEM); + iter_t last = first + NELEM ; + std::shuffle( first, last, my_rand); + }; + bsort::parallel_stable_sort + ( V.begin() , V.end(), compare_t()); + for ( uint32_t i =0 ; i < ( NELEM * 10); ++i) + { BOOST_CHECK ( V[i].num == (i / 10) and V[i].tail == (i %10) ); + }; +} +int test_main(int, char *[]) +{ + test3(); + test4(); + test5(); + test6(); + test7(); + test8(); + return 0; +} diff --git a/test/test_sample_sort.cpp b/test/test_sample_sort.cpp new file mode 100644 index 0000000..fae2474 --- /dev/null +++ b/test/test_sample_sort.cpp @@ -0,0 +1,320 @@ +//---------------------------------------------------------------------------- +/// @file test_sample_sort.cpp +/// @brief test sample_sort algorithm +/// +/// @author Copyright (c) 2016 Francisco Jose Tapia (fjtapia@gmail.com )\n +/// Distributed under the Boost Software License, Version 1.0.\n +/// ( See accompanying file LICENSE_1_0.txt or copy at +/// http://www.boost.org/LICENSE_1_0.txt ) +/// @version 0.1 +/// +/// @remarks +//----------------------------------------------------------------------------- +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace bss = boost::sort::sample_detail; +using namespace boost::sort::common; + +std::mt19937_64 my_rand(0); + +void test3(); +void test4(); +void test5(); +void test6(); +void test7(); +void test8(); +void test9(); + +struct xk +{ + unsigned tail : 4; + unsigned num : 28; + xk ( uint32_t n =0 , uint32_t t =0): tail (t), num(n){}; + bool operator< (xk A) const { return (num < A.num); }; +}; +void test3() +{ + typedef typename std::vector::iterator iter_t; + typedef std::less compare; + + std::mt19937_64 my_rand(0); + + const uint32_t NMAX = 500000; + + std::vector V1, V2, V3; + V1.reserve(NMAX); + for (uint32_t i = 0; i < 8; ++i) + { + for (uint32_t k = 0; k < NMAX; ++k) + { + uint32_t NM = my_rand(); + xk G; + G.num = NM >> 3; + G.tail = i; + V1.push_back(G); + }; + }; + V3 = V2 = V1; + bss::sample_sort(V1.begin(), V1.end()); + std::stable_sort(V2.begin(), V2.end()); + bss::sample_sort(V3.begin(), V3.end(), 0); + + BOOST_CHECK(V1.size() == V2.size()); + for (uint32_t i = 0; i < V1.size(); ++i) + { + BOOST_CHECK(V1[i].num == V2[i].num and V1[i].tail == V2[i].tail); + }; + + BOOST_CHECK(V3.size() == V2.size()); + for (uint32_t i = 0; i < V3.size(); ++i) + { + BOOST_CHECK(V3[i].num == V2[i].num and V3[i].tail == V2[i].tail); + }; +} +; + +void test4(void) +{ + typedef typename std::vector::iterator iter_t; + typedef std::less compare; + + const uint32_t NElem = 500000; + std::vector V1; + std::mt19937_64 my_rand(0); + + for (uint32_t i = 0; i < NElem; ++i) + V1.push_back(my_rand() % NElem); + + // bss::sample_sort unsorted + bss::sample_sort(V1.begin(), V1.end()); + for (unsigned i = 1; i < NElem; i++) + { + BOOST_CHECK(V1[i - 1] <= V1[i]); + }; + + V1.clear(); + for (uint32_t i = 0; i < NElem; ++i) + V1.push_back(i); + + // bss::sample_sort sorted + bss::sample_sort(V1.begin(), V1.end()); + for (unsigned i = 1; i < NElem; i++) + { + BOOST_CHECK(V1[i - 1] <= V1[i]); + }; + + V1.clear(); + for (uint32_t i = 0; i < NElem; ++i) + V1.push_back(NElem - i); + + // bss::sample_sort reverse sorted + bss::sample_sort(V1.begin(), V1.end()); + for (unsigned i = 1; i < NElem; i++) + { + BOOST_CHECK(V1[i - 1] <= V1[i]); + }; + + V1.clear(); + for (uint32_t i = 0; i < NElem; ++i) + V1.push_back(1000); + + // bss::sample_sort equals + bss::sample_sort(V1.begin(), V1.end()); + for (unsigned i = 1; i < NElem; i++) + { + BOOST_CHECK(V1[i - 1] == V1[i]); + }; +} +; + +void test5(void) +{ + typedef typename std::vector::iterator iter_t; + typedef std::less compare; + const uint32_t KMax = 500000; + + std::vector K, M; + std::mt19937_64 my_rand(0); + std::less comp; + + for (uint32_t i = 0; i < KMax; ++i) + K.push_back(my_rand()); + M = K; + + // bss::sample_sort - random elements + uint64_t *Ptr = std::get_temporary_buffer(KMax).first; + if (Ptr == nullptr) throw std::bad_alloc(); + range Rbuf(Ptr, Ptr + KMax); + + bss::sample_sort(K.begin(), K.end(), comp, + std::thread::hardware_concurrency(), Rbuf); + + std::return_temporary_buffer(Ptr); + + std::stable_sort(M.begin(), M.end(), comp); + for (unsigned i = 0; i < KMax; i++) + BOOST_CHECK(M[i] == K[i]); +} +; + +void test6(void) +{ + typedef typename std::vector::iterator iter_t; + typedef std::less compare; + std::vector V; + + for (uint32_t i = 0; i < 2083333; ++i) + V.push_back(i); + bss::sample_sort(V.begin(), V.end()); + for (uint32_t i = 0; i < V.size(); ++i) + { + BOOST_CHECK(V[i] == i); + }; +} +; + +void test7(void) +{ + typedef typename std::vector::iterator iter_t; + typedef std::less compare; + + const uint32_t NELEM = 416667; + std::vector A; + + for (uint32_t i = 0; i < 1000; ++i) + A.push_back(0); + for (uint32_t i = 0; i < NELEM; ++i) + A.push_back(NELEM - i); + for (uint32_t i = 0; i < 1000; ++i) + A.push_back(0); + + bss::sample_sort(A.begin() + 1000, + A.begin() + (1000 + NELEM)); + + for (iter_t it = A.begin() + 1000; it != A.begin() + (1000 + NELEM); ++it) + { + BOOST_CHECK((*(it - 1)) <= (*it)); + }; + + BOOST_CHECK (A[998] == 0 and A[999] == 0 and A[1000 + NELEM] == 0 + and A[1001 + NELEM] == 0); + + A.clear(); + for (uint32_t i = 0; i < 1000; ++i) + A.push_back(999999999); + for (uint32_t i = 0; i < NELEM; ++i) + A.push_back(NELEM - i); + for (uint32_t i = 0; i < 1000; ++i) + A.push_back(999999999); + + bss::sample_sort + (A.begin() + 1000, A.begin() + (1000 + NELEM)); + + for (iter_t it = A.begin() + 1001; it != A.begin() + (1000 + NELEM); ++it) + { + BOOST_CHECK((*(it - 1)) <= (*it)); + }; + BOOST_CHECK (A[998] == 999999999 + and A[999] == 999999999 + and A[1000 + NELEM] == 999999999 + and A[1001 + NELEM] == 999999999); + + std::vector B(NELEM + 2000, 0); + + A.clear(); + for (uint32_t i = 0; i < NELEM; ++i) A.push_back(NELEM - i); + + range Rbuf(&B[1000], (&B[1000]) + NELEM); + bss::sample_sort + ( A.begin(), A.end(), std::less(), + std::thread::hardware_concurrency(), Rbuf); + + for (iter_t it = A.begin() + 1; it != A.end(); ++it) + { + BOOST_CHECK((*(it - 1)) <= (*it)); + }; + BOOST_CHECK (B[998] == 0 and B[999] == 0 and B[1000 + NELEM] == 0 + and B[1001 + NELEM] == 0); + + for (uint32_t i = 0; i < B.size(); ++i) B[i] = 999999999; + A.clear(); + for (uint32_t i = 0; i < NELEM; ++i) A.push_back(NELEM - i); + + bss::sample_sort > + (A.begin(), A.end(), std::less(), + std::thread::hardware_concurrency(), Rbuf); + + for (iter_t it = A.begin() + 1; it != A.end(); ++it) + { + if ((*(it - 1)) > (*it)) std::cout << "error 2\n"; + }; + BOOST_CHECK (B[998] == 999999999 and B[999] == 999999999 + and B[1000 + NELEM] == 999999999 + and B[1001 + NELEM] == 999999999); +} +; +void test8(void) +{ + typedef typename std::vector::iterator iter_t; + typedef std::less compare; + const uint32_t KMax = 66000; + + std::vector K, M; + std::mt19937_64 my_rand(0); + std::less comp; + + for (uint32_t i = 0; i < KMax; ++i) + K.push_back(my_rand()); + M = K; + + bss::sample_sort(K.begin(), K.end(), 300); + + std::stable_sort(M.begin(), M.end(), comp); + for (unsigned i = 0; i < KMax; i++) + BOOST_CHECK(M[i] == K[i]); +}; + +void test9 (void) +{ + typedef typename std::vector::iterator iter_t; + typedef std::less compare_t; + std::mt19937 my_rand (0); + std::vector V ; + const uint32_t NELEM = 500000; + V.reserve(NELEM * 10); + + + for (uint32_t k =0 ; k < 10 ; ++k) + { for ( uint32_t i =0 ; i < NELEM ; ++i) + { V.emplace_back(i , k); + }; + iter_t first = V.begin() + (k * NELEM); + iter_t last = first + NELEM ; + std::shuffle( first, last, my_rand); + }; + bss::sample_sort( V.begin() , V.end(), compare_t()); + for ( uint32_t i =0 ; i < ( NELEM * 10); ++i) + { BOOST_CHECK ( V[i].num == (i / 10) and V[i].tail == (i %10) ); + }; +} + + +int test_main(int, char *[]) +{ + test3(); + test4(); + test5(); + test6(); + test7(); + test8(); + test9(); + return 0; +}; diff --git a/test/test_spinsort.cpp b/test/test_spinsort.cpp new file mode 100644 index 0000000..31f2541 --- /dev/null +++ b/test/test_spinsort.cpp @@ -0,0 +1,430 @@ +//---------------------------------------------------------------------------- +/// @file test_spinsort.cpp +/// @brief test program of the spinsort algorithm +/// +/// @author Copyright (c) 2016 Francisco José Tapia (fjtapia@gmail.com )\n +/// Distributed under the Boost Software License, Version 1.0.\n +/// ( See accompanying file LICENSE_1_0.txt or copy at +/// http://www.boost.org/LICENSE_1_0.txt ) +/// @version 0.1 +/// +/// @remarks +//----------------------------------------------------------------------------- +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +using namespace boost::sort; +using spin_detail::check_stable_sort; +using spin_detail::range_sort; +using common::range; + +void test2 ( ); +void test3 ( ); +void test4 ( ); +void test5 ( ); +void test6 ( ); +void test7 ( ); + +struct xk +{ + unsigned tail : 4; + unsigned num : 28; + xk ( uint32_t n =0 , uint32_t t =0): tail (t), num(n){}; + bool operator< (xk A) const { return (num < A.num); }; +}; + +void test1 ( ) +{ + + std::mt19937_64 my_rand (0); + typedef typename std::vector< uint64_t >::iterator iter_t; + typedef std::less< uint64_t > compare; + typedef range< iter_t > range_it; + + std::vector< uint64_t > V1, V2, V3; + uint32_t NELEM = 0; + + //---------------------------------------------------------------------- + // Range of 40 , randomly filled level 1 + //---------------------------------------------------------------------- + NELEM = 40; + for (uint32_t i = 0; i < NELEM; ++i) { + V1.push_back (my_rand ( ) % 10000); + }; + V2 = V1; + range_it R2 (V2.begin ( ), V2.end ( )); + + V3.resize (NELEM, 0); + range_it RAux (V3.begin ( ), V3.end ( )); + range_sort (R2, RAux, compare ( ), 1); + + std::sort (V1.begin ( ), V1.end ( )); + + for (uint32_t i = 0; i < NELEM; ++i) { + BOOST_CHECK (V1[ i ] == V3[ i ]); + }; + + //---------------------------------------------------------------------- + // Range of 75, randomly filled , level 2 + //--------------------------------------------------------------------- + V1.clear ( ); + V2.clear ( ); + V3.clear ( ); + NELEM = 75; + + for (uint32_t i = 0; i < NELEM; ++i) { + V1.push_back (my_rand ( ) % 10000); + }; + V2 = V1; + R2 = range_it (V2.begin ( ), V2.end ( )); + + V3.resize (NELEM, 0); + RAux = range_it (V3.begin ( ), V3.end ( )); + range_sort (RAux, R2, compare ( ), 2); + std::sort (V1.begin ( ), V1.end ( )); + + for (uint32_t i = 0; i < NELEM; ++i) { + BOOST_CHECK (V1[ i ] == V2[ i ]); + }; + + //---------------------------------------------------------------------- + // Range of 200, randomly filled , level 3 + //--------------------------------------------------------------------- + V1.clear ( ); + V2.clear ( ); + V3.clear ( ); + NELEM = 200; + + for (uint32_t i = 0; i < NELEM; ++i) { + V1.push_back (my_rand ( ) % 10000); + }; + V2 = V1; + R2 = range_it (V2.begin ( ), V2.end ( )); + + V3.resize (NELEM, 0); + RAux = range_it (V3.begin ( ), V3.end ( )); + + range_sort (R2, RAux, compare ( ), 3); + + std::sort (V1.begin ( ), V1.end ( )); + + for (uint32_t i = 0; i < NELEM; ++i) { + BOOST_CHECK (V1[ i ] == V3[ i ]); + }; + + //---------------------------------------------------------------------- + // Range of 400, randomly filled , level 4 + //--------------------------------------------------------------------- + V1.clear ( ); + V2.clear ( ); + V3.clear ( ); + NELEM = 400; + + for (uint32_t i = 0; i < NELEM; ++i) { + V1.push_back (my_rand ( ) % 10000); + }; + V2 = V1; + R2 = range_it (V2.begin ( ), V2.end ( )); + + V3.resize (NELEM, 0); + RAux = range_it (V3.begin ( ), V3.end ( )); + + range_sort (RAux, R2, compare ( ), 4); + std::sort (V1.begin ( ), V1.end ( )); + + for (uint32_t i = 0; i < NELEM; ++i) { + BOOST_CHECK (V1[ i ] == V2[ i ]); + }; + +}; + +void test2 ( ) +{ + uint64_t V1[ 300 ]; + typedef std::less< uint64_t > compare_t; + compare_t comp; + + for (uint32_t i = 0; i < 200; ++i) V1[ i ] = i; + indirect_spinsort (&V1[ 0 ], &V1[ 200 ], comp); + //spinsort< uint64_t *, compare_t > (&V1[ 0 ], &V1[ 200 ], comp); + for (unsigned i = 1; i < 200; i++) { + BOOST_CHECK (V1[ i - 1 ] <= V1[ i ]); + }; + + for (uint32_t i = 0; i < 200; ++i) V1[ i ] = 199 - i; + spinsort< uint64_t *, compare_t > (&V1[ 0 ], &V1[ 200 ], comp); + for (unsigned i = 1; i < 200; i++) { + BOOST_CHECK (V1[ i - 1 ] <= V1[ i ]); + }; + + for (uint32_t i = 0; i < 300; ++i) V1[ i ] = 299 - i; + + spinsort< uint64_t *, compare_t > (&V1[ 0 ], &V1[ 300 ], comp); + for (unsigned i = 1; i < 300; i++) { + BOOST_CHECK (V1[ i - 1 ] <= V1[ i ]); + }; + + for (uint32_t i = 0; i < 300; ++i) V1[ i ] = 88; + + spinsort< uint64_t *, compare_t > (&V1[ 0 ], &V1[ 300 ], comp); + for (unsigned i = 1; i < 300; i++) { + BOOST_CHECK (V1[ i - 1 ] <= V1[ i ]); + }; +}; + +void test3 ( ) +{ + typedef typename std::vector< xk >::iterator iter_t; + typedef std::less< xk > compare_t; + std::mt19937_64 my_rand (0); + + const uint32_t NMAX = 500000; + + + std::vector< xk > V1, V2, V3; + V1.reserve (NMAX); + for (uint32_t i = 0; i < 8; ++i) { + for (uint32_t k = 0; k < NMAX; ++k) { + uint32_t NM = my_rand ( ); + xk G; + G.num = NM >> 3; + G.tail = i; + V1.push_back (G); + }; + }; + V3 = V2 = V1; + spinsort< iter_t, compare_t > (V1.begin ( ), V1.end ( ), compare_t ( )); + std::stable_sort (V2.begin ( ), V2.end ( )); + + BOOST_CHECK (V1.size ( ) == V2.size ( )); + for (uint32_t i = 0; i < V1.size ( ); ++i) { + BOOST_CHECK (V1[ i ].num == V2[ i ].num and + V1[ i ].tail == V2[ i ].tail); + }; +}; + +void test4 (void) +{ + typedef std::less< uint64_t > compare_t; + typedef typename std::vector< uint64_t >::iterator iter_t; + + const uint32_t NElem = 500000; + std::vector< uint64_t > V1; + std::mt19937_64 my_rand (0); + compare_t comp; + + for (uint32_t i = 0; i < NElem; ++i) V1.push_back (my_rand ( ) % NElem); + + spinsort< iter_t, compare_t > (V1.begin ( ), V1.end ( ), comp); + for (unsigned i = 1; i < NElem; i++) { + BOOST_CHECK (V1[ i - 1 ] <= V1[ i ]); + }; + + V1.clear ( ); + for (uint32_t i = 0; i < NElem; ++i) V1.push_back (i); + spinsort< iter_t, compare_t > (V1.begin ( ), V1.end ( ), comp); + for (unsigned i = 1; i < NElem; i++) { + BOOST_CHECK (V1[ i - 1 ] <= V1[ i ]); + }; + + V1.clear ( ); + for (uint32_t i = 0; i < NElem; ++i) V1.push_back (NElem - i); + spinsort< iter_t, compare_t > (V1.begin ( ), V1.end ( ), comp); + for (unsigned i = 1; i < NElem; i++) { + BOOST_CHECK (V1[ i - 1 ] <= V1[ i ]); + }; + + V1.clear ( ); + for (uint32_t i = 0; i < NElem; ++i) V1.push_back (1000); + spinsort< iter_t, compare_t > (V1.begin ( ), V1.end ( ), comp); + for (unsigned i = 1; i < NElem; i++) { + BOOST_CHECK (V1[ i - 1 ] == V1[ i ]); + }; +}; + +void test5 (void) +{ + typedef std::vector< uint64_t >::iterator iter_t; + typedef std::less< uint64_t > compare; + + const uint32_t KMax = 500000; + + std::vector< uint64_t > K, M; + std::mt19937_64 my_rand (0); + compare comp; + + for (uint32_t i = 0; i < KMax; ++i) K.push_back (my_rand ( )); + M = K; + + uint64_t *Ptr = std::get_temporary_buffer< uint64_t > (KMax >> 1).first; + if (Ptr == nullptr) throw std::bad_alloc ( ); + range< uint64_t * > Rbuf (Ptr, Ptr + (KMax >> 1)); + + spin_detail::spinsort< iter_t, compare > (K.begin ( ), K.end ( ), comp, Rbuf); + + std::return_temporary_buffer (Ptr); + + std::stable_sort (M.begin ( ), M.end ( ), comp); + for (unsigned i = 0; i < KMax; i++) BOOST_CHECK (M[ i ] == K[ i ]); +}; + +void test6 (void) +{ + typedef std::vector< uint64_t >::iterator iter_t; + typedef std::less< uint64_t > compare_t; + std::vector< uint64_t > V; + + for (uint32_t i = 0; i < 2083333; ++i) V.push_back (i); + spinsort< iter_t, compare_t > (V.begin ( ), V.end ( ), compare_t ( )); + for (uint32_t i = 0; i < V.size ( ); ++i) { + BOOST_CHECK (V[ i ] == i); + }; +}; + +void test7 (void) +{ + typedef typename std::vector< uint64_t >::iterator iter_t; + typedef std::less< uint64_t > compare_t; + + compare_t comp; + const uint32_t NELEM = 416667; + const uint32_t N1 = (NELEM + 1) / 2; + std::vector< uint64_t > A; + + for (uint32_t i = 0; i < 1000; ++i) A.push_back (0); + for (uint32_t i = 0; i < NELEM; ++i) A.push_back (NELEM - i); + for (uint32_t i = 0; i < 1000; ++i) A.push_back (0); + + spinsort< iter_t, compare_t > (A.begin ( ) + 1000, + A.begin ( ) + (1000 + NELEM), comp); + + for (iter_t it = A.begin ( ) + 1000; it != A.begin ( ) + (1000 + NELEM); + ++it) + { + BOOST_CHECK ((*(it - 1)) <= (*it)); + }; + BOOST_CHECK (A[ 998 ] == 0 and A[ 999 ] == 0 and A[ 1000 + NELEM ] == 0 and + A[ 1001 + NELEM ] == 0); + + + A.clear ( ); + for (uint32_t i = 0; i < 1000; ++i) A.push_back (999999999); + for (uint32_t i = 0; i < NELEM; ++i) A.push_back (NELEM - i); + for (uint32_t i = 0; i < 1000; ++i) A.push_back (999999999); + + spinsort< iter_t, compare_t > (A.begin ( ) + 1000, + A.begin ( ) + (1000 + NELEM), comp); + + for (iter_t it = A.begin ( ) + 1001; it != A.begin ( ) + (1000 + NELEM); + ++it) + { + BOOST_CHECK ((*(it - 1)) <= (*it)); + }; + BOOST_CHECK (A[ 998 ] == 999999999 and A[ 999 ] == 999999999 and + A[ 1000 + NELEM ] == 999999999 and + A[ 1001 + NELEM ] == 999999999); + + std::vector< uint64_t > B (N1 + 2000, 0); + + A.clear ( ); + range< uint64_t * > Rbuf (&B[ 1000 ], (&B[ 1000 ]) + N1); + for (uint32_t i = 0; i < NELEM; ++i) A.push_back (NELEM - i); + + spin_detail::spinsort< iter_t, compare_t > (A.begin ( ), A.end ( ), + comp, Rbuf); + for (iter_t it = A.begin ( ) + 1; it != A.end ( ); ++it) { + if ((*(it - 1)) > (*it)) std::cout << "error 1\n"; + }; + BOOST_CHECK (B[ 998 ] == 0 and B[ 999 ] == 0 and B[ 1000 + N1 ] == 0 and + B[ 1001 + N1 ] == 0); + + for (uint32_t i = 0; i < B.size ( ); ++i) B[ i ] = 999999999; + A.clear ( ); + for (uint32_t i = 0; i < NELEM; ++i) A.push_back (NELEM - i); + spin_detail::spinsort< iter_t, compare_t > (A.begin ( ), A.end ( ), + comp, Rbuf); + + for (iter_t it = A.begin ( ) + 1; it != A.end ( ); ++it) { + BOOST_CHECK ((*(it - 1)) <= (*it)); + }; + BOOST_CHECK (B[ 998 ] == 999999999 and B[ 999 ] == 999999999 and + B[ 1000 + N1 ] == 999999999 and B[ 1001 + N1 ] == 999999999); +}; +void test8 (void) +{ + typedef typename std::vector::iterator iter_t ; + typedef range range_it ; + std::less comp ; + + std::vector V = { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, + 21, 23, 25, 27, 29, 31, 33, 35, 37, 39, + 41, 43, 45, 47, 49, 51, 53, 55, 57, 59, + 14, 2, 4, 6, 8, 10, 12, 16, 18, 20}; + range_it rdata (V.begin() , V.end()); + std::vector aux (40,0 ); + range_it raux ( aux.begin() , aux.end()); + + check_stable_sort ( rdata, raux, comp ); + for ( uint32_t i =0 ; i < V.size() ; ++i) + std::cout<::iterator iter_t; + typedef std::less compare_t; + std::mt19937 my_rand (0); + std::vector V ; + const uint32_t NELEM = 1000000; + V.reserve(NELEM * 10); + + + for (uint32_t k =0 ; k < 10 ; ++k) + { for ( uint32_t i =0 ; i < NELEM ; ++i) + { V.emplace_back(i , k); + }; + iter_t first = V.begin() + (k * NELEM); + iter_t last = first + NELEM ; + std::shuffle( first, last, my_rand); + }; + spinsort( V.begin() , V.end(), compare_t()); + for ( uint32_t i =0 ; i < ( NELEM * 10); ++i) + { BOOST_CHECK ( V[i].num == (i / 10) and V[i].tail == (i %10) ); + }; +} +int test_main (int, char *[]) +{ + test1 ( ); + test2 ( ); + test3 ( ); + test4 ( ); + test5 ( ); + test6 ( ); + test7 ( ); + test8 ( ); + test9 ( ); + return 0; +};