mirror of
https://github.com/boostorg/compute.git
synced 2026-01-27 06:42:19 +00:00
Merge pull request #239 from roshanr95/set_algorithms
Enforce same tile_size for all three kernels
This commit is contained in:
@@ -118,17 +118,17 @@ inline bool includes(InputIterator1 first1,
|
||||
{
|
||||
typedef typename std::iterator_traits<InputIterator1>::value_type value_type;
|
||||
|
||||
int tile_size = 4;
|
||||
int tile_size = 1024;
|
||||
|
||||
int count1 = detail::iterator_range_size(first1, last1);
|
||||
int count2 = detail::iterator_range_size(first2, last2);
|
||||
|
||||
vector<uint_> tile_a((count1+count2+3)/tile_size+1, queue.get_context());
|
||||
vector<uint_> tile_b((count1+count2+3)/tile_size+1, queue.get_context());
|
||||
vector<uint_> tile_a((count1+count2+tile_size-1)/tile_size+1, queue.get_context());
|
||||
vector<uint_> tile_b((count1+count2+tile_size-1)/tile_size+1, queue.get_context());
|
||||
|
||||
// Tile the sets
|
||||
detail::tile_sets_kernel tiling_kernel;
|
||||
|
||||
tiling_kernel.tile_size = tile_size;
|
||||
tiling_kernel.set_range(first1, last1, first2, last2,
|
||||
tile_a.begin()+1, tile_b.begin()+1);
|
||||
fill_n(tile_a.begin(), 1, 0, queue);
|
||||
@@ -138,11 +138,11 @@ inline bool includes(InputIterator1 first1,
|
||||
fill_n(tile_a.end()-1, 1, count1, queue);
|
||||
fill_n(tile_b.end()-1, 1, count2, queue);
|
||||
|
||||
vector<uint_> result((count1+count2+3)/tile_size, queue.get_context());
|
||||
vector<uint_> result((count1+count2+tile_size-1)/tile_size, queue.get_context());
|
||||
|
||||
// Find individually
|
||||
detail::serial_includes_kernel includes_kernel;
|
||||
|
||||
includes_kernel.tile_size = tile_size;
|
||||
includes_kernel.set_range(first1, first2, tile_a.begin(), tile_a.end(),
|
||||
tile_b.begin(), result.begin());
|
||||
|
||||
|
||||
@@ -133,17 +133,17 @@ inline OutputIterator set_difference(InputIterator1 first1,
|
||||
{
|
||||
typedef typename std::iterator_traits<InputIterator1>::value_type value_type;
|
||||
|
||||
int tile_size = 4;
|
||||
int tile_size = 1024;
|
||||
|
||||
int count1 = detail::iterator_range_size(first1, last1);
|
||||
int count2 = detail::iterator_range_size(first2, last2);
|
||||
|
||||
vector<uint_> tile_a((count1+count2+3)/tile_size+1, queue.get_context());
|
||||
vector<uint_> tile_b((count1+count2+3)/tile_size+1, queue.get_context());
|
||||
vector<uint_> tile_a((count1+count2+tile_size-1)/tile_size+1, queue.get_context());
|
||||
vector<uint_> tile_b((count1+count2+tile_size-1)/tile_size+1, queue.get_context());
|
||||
|
||||
// Tile the sets
|
||||
detail::tile_sets_kernel tiling_kernel;
|
||||
|
||||
tiling_kernel.tile_size = tile_size;
|
||||
tiling_kernel.set_range(first1, last1, first2, last2,
|
||||
tile_a.begin()+1, tile_b.begin()+1);
|
||||
fill_n(tile_a.begin(), 1, 0, queue);
|
||||
@@ -154,12 +154,12 @@ inline OutputIterator set_difference(InputIterator1 first1,
|
||||
fill_n(tile_b.end()-1, 1, count2, queue);
|
||||
|
||||
vector<value_type> temp_result(count1+count2, queue.get_context());
|
||||
vector<uint_> counts((count1+count2+3)/tile_size + 1, queue.get_context());
|
||||
vector<uint_> counts((count1+count2+tile_size-1)/tile_size + 1, queue.get_context());
|
||||
fill_n(counts.end()-1, 1, 0, queue);
|
||||
|
||||
// Find individual differences
|
||||
detail::serial_set_difference_kernel difference_kernel;
|
||||
|
||||
difference_kernel.tile_size = tile_size;
|
||||
difference_kernel.set_range(first1, first2, tile_a.begin(), tile_a.end(),
|
||||
tile_b.begin(), temp_result.begin(), counts.begin());
|
||||
|
||||
@@ -169,7 +169,7 @@ inline OutputIterator set_difference(InputIterator1 first1,
|
||||
|
||||
// Compact the results
|
||||
detail::compact_kernel compact_kernel;
|
||||
|
||||
compact_kernel.tile_size = tile_size;
|
||||
compact_kernel.set_range(temp_result.begin(), counts.begin(), counts.end(), result);
|
||||
|
||||
compact_kernel.exec(queue);
|
||||
|
||||
@@ -121,17 +121,17 @@ inline OutputIterator set_intersection(InputIterator1 first1,
|
||||
{
|
||||
typedef typename std::iterator_traits<InputIterator1>::value_type value_type;
|
||||
|
||||
int tile_size = 4;
|
||||
int tile_size = 1024;
|
||||
|
||||
int count1 = detail::iterator_range_size(first1, last1);
|
||||
int count2 = detail::iterator_range_size(first2, last2);
|
||||
|
||||
vector<uint_> tile_a((count1+count2+3)/tile_size+1, queue.get_context());
|
||||
vector<uint_> tile_b((count1+count2+3)/tile_size+1, queue.get_context());
|
||||
vector<uint_> tile_a((count1+count2+tile_size-1)/tile_size+1, queue.get_context());
|
||||
vector<uint_> tile_b((count1+count2+tile_size-1)/tile_size+1, queue.get_context());
|
||||
|
||||
// Tile the sets
|
||||
detail::tile_sets_kernel tiling_kernel;
|
||||
|
||||
tiling_kernel.tile_size = tile_size;
|
||||
tiling_kernel.set_range(first1, last1, first2, last2,
|
||||
tile_a.begin()+1, tile_b.begin()+1);
|
||||
fill_n(tile_a.begin(), 1, 0, queue);
|
||||
@@ -142,12 +142,12 @@ inline OutputIterator set_intersection(InputIterator1 first1,
|
||||
fill_n(tile_b.end()-1, 1, count2, queue);
|
||||
|
||||
vector<value_type> temp_result(count1+count2, queue.get_context());
|
||||
vector<uint_> counts((count1+count2+3)/tile_size + 1, queue.get_context());
|
||||
vector<uint_> counts((count1+count2+tile_size-1)/tile_size + 1, queue.get_context());
|
||||
fill_n(counts.end()-1, 1, 0, queue);
|
||||
|
||||
// Find individual intersections
|
||||
detail::serial_set_intersection_kernel intersection_kernel;
|
||||
|
||||
intersection_kernel.tile_size = tile_size;
|
||||
intersection_kernel.set_range(first1, first2, tile_a.begin(), tile_a.end(),
|
||||
tile_b.begin(), temp_result.begin(), counts.begin());
|
||||
|
||||
@@ -157,7 +157,7 @@ inline OutputIterator set_intersection(InputIterator1 first1,
|
||||
|
||||
// Compact the results
|
||||
detail::compact_kernel compact_kernel;
|
||||
|
||||
compact_kernel.tile_size = tile_size;
|
||||
compact_kernel.set_range(temp_result.begin(), counts.begin(), counts.end(), result);
|
||||
|
||||
compact_kernel.exec(queue);
|
||||
|
||||
@@ -144,17 +144,17 @@ inline OutputIterator set_symmetric_difference(InputIterator1 first1,
|
||||
{
|
||||
typedef typename std::iterator_traits<InputIterator1>::value_type value_type;
|
||||
|
||||
int tile_size = 4;
|
||||
int tile_size = 1024;
|
||||
|
||||
int count1 = detail::iterator_range_size(first1, last1);
|
||||
int count2 = detail::iterator_range_size(first2, last2);
|
||||
|
||||
vector<uint_> tile_a((count1+count2+3)/tile_size+1, queue.get_context());
|
||||
vector<uint_> tile_b((count1+count2+3)/tile_size+1, queue.get_context());
|
||||
vector<uint_> tile_a((count1+count2+tile_size-1)/tile_size+1, queue.get_context());
|
||||
vector<uint_> tile_b((count1+count2+tile_size-1)/tile_size+1, queue.get_context());
|
||||
|
||||
// Tile the sets
|
||||
detail::tile_sets_kernel tiling_kernel;
|
||||
|
||||
tiling_kernel.tile_size = tile_size;
|
||||
tiling_kernel.set_range(first1, last1, first2, last2,
|
||||
tile_a.begin()+1, tile_b.begin()+1);
|
||||
fill_n(tile_a.begin(), 1, 0, queue);
|
||||
@@ -165,12 +165,12 @@ inline OutputIterator set_symmetric_difference(InputIterator1 first1,
|
||||
fill_n(tile_b.end()-1, 1, count2, queue);
|
||||
|
||||
vector<value_type> temp_result(count1+count2, queue.get_context());
|
||||
vector<uint_> counts((count1+count2+3)/tile_size + 1, queue.get_context());
|
||||
vector<uint_> counts((count1+count2+tile_size-1)/tile_size + 1, queue.get_context());
|
||||
fill_n(counts.end()-1, 1, 0, queue);
|
||||
|
||||
// Find individual symmetric differences
|
||||
detail::serial_set_symmetric_difference_kernel symmetric_difference_kernel;
|
||||
|
||||
symmetric_difference_kernel.tile_size = tile_size;
|
||||
symmetric_difference_kernel.set_range(first1, first2, tile_a.begin(),
|
||||
tile_a.end(), tile_b.begin(),
|
||||
temp_result.begin(), counts.begin());
|
||||
@@ -181,7 +181,7 @@ inline OutputIterator set_symmetric_difference(InputIterator1 first1,
|
||||
|
||||
// Compact the results
|
||||
detail::compact_kernel compact_kernel;
|
||||
|
||||
compact_kernel.tile_size = tile_size;
|
||||
compact_kernel.set_range(temp_result.begin(), counts.begin(), counts.end(), result);
|
||||
|
||||
compact_kernel.exec(queue);
|
||||
|
||||
@@ -146,17 +146,17 @@ inline OutputIterator set_union(InputIterator1 first1,
|
||||
{
|
||||
typedef typename std::iterator_traits<InputIterator1>::value_type value_type;
|
||||
|
||||
int tile_size = 4;
|
||||
int tile_size = 1024;
|
||||
|
||||
int count1 = detail::iterator_range_size(first1, last1);
|
||||
int count2 = detail::iterator_range_size(first2, last2);
|
||||
|
||||
vector<uint_> tile_a((count1+count2+3)/tile_size+1, queue.get_context());
|
||||
vector<uint_> tile_b((count1+count2+3)/tile_size+1, queue.get_context());
|
||||
vector<uint_> tile_a((count1+count2+tile_size-1)/tile_size+1, queue.get_context());
|
||||
vector<uint_> tile_b((count1+count2+tile_size-1)/tile_size+1, queue.get_context());
|
||||
|
||||
// Tile the sets
|
||||
detail::tile_sets_kernel tiling_kernel;
|
||||
|
||||
tiling_kernel.tile_size = tile_size;
|
||||
tiling_kernel.set_range(first1, last1, first2, last2,
|
||||
tile_a.begin()+1, tile_b.begin()+1);
|
||||
fill_n(tile_a.begin(), 1, 0, queue);
|
||||
@@ -167,12 +167,12 @@ inline OutputIterator set_union(InputIterator1 first1,
|
||||
fill_n(tile_b.end()-1, 1, count2, queue);
|
||||
|
||||
vector<value_type> temp_result(count1+count2, queue.get_context());
|
||||
vector<uint_> counts((count1+count2+3)/tile_size + 1, queue.get_context());
|
||||
vector<uint_> counts((count1+count2+tile_size-1)/tile_size + 1, queue.get_context());
|
||||
fill_n(counts.end()-1, 1, 0, queue);
|
||||
|
||||
// Find individual unions
|
||||
detail::serial_set_union_kernel union_kernel;
|
||||
|
||||
union_kernel.tile_size = tile_size;
|
||||
union_kernel.set_range(first1, first2, tile_a.begin(), tile_a.end(),
|
||||
tile_b.begin(), temp_result.begin(), counts.begin());
|
||||
|
||||
@@ -182,7 +182,7 @@ inline OutputIterator set_union(InputIterator1 first1,
|
||||
|
||||
// Compact the results
|
||||
detail::compact_kernel compact_kernel;
|
||||
|
||||
compact_kernel.tile_size = tile_size;
|
||||
compact_kernel.set_range(temp_result.begin(), counts.begin(), counts.end(), result);
|
||||
|
||||
compact_kernel.exec(queue);
|
||||
|
||||
Reference in New Issue
Block a user