2
0
mirror of https://github.com/boostorg/compute.git synced 2026-01-27 06:42:19 +00:00

Merge pull request #239 from roshanr95/set_algorithms

Enforce same tile_size for all three kernels
This commit is contained in:
Kyle Lutz
2014-08-19 18:59:53 -07:00
5 changed files with 34 additions and 34 deletions

View File

@@ -118,17 +118,17 @@ inline bool includes(InputIterator1 first1,
{
typedef typename std::iterator_traits<InputIterator1>::value_type value_type;
int tile_size = 4;
int tile_size = 1024;
int count1 = detail::iterator_range_size(first1, last1);
int count2 = detail::iterator_range_size(first2, last2);
vector<uint_> tile_a((count1+count2+3)/tile_size+1, queue.get_context());
vector<uint_> tile_b((count1+count2+3)/tile_size+1, queue.get_context());
vector<uint_> tile_a((count1+count2+tile_size-1)/tile_size+1, queue.get_context());
vector<uint_> tile_b((count1+count2+tile_size-1)/tile_size+1, queue.get_context());
// Tile the sets
detail::tile_sets_kernel tiling_kernel;
tiling_kernel.tile_size = tile_size;
tiling_kernel.set_range(first1, last1, first2, last2,
tile_a.begin()+1, tile_b.begin()+1);
fill_n(tile_a.begin(), 1, 0, queue);
@@ -138,11 +138,11 @@ inline bool includes(InputIterator1 first1,
fill_n(tile_a.end()-1, 1, count1, queue);
fill_n(tile_b.end()-1, 1, count2, queue);
vector<uint_> result((count1+count2+3)/tile_size, queue.get_context());
vector<uint_> result((count1+count2+tile_size-1)/tile_size, queue.get_context());
// Find individually
detail::serial_includes_kernel includes_kernel;
includes_kernel.tile_size = tile_size;
includes_kernel.set_range(first1, first2, tile_a.begin(), tile_a.end(),
tile_b.begin(), result.begin());

View File

@@ -133,17 +133,17 @@ inline OutputIterator set_difference(InputIterator1 first1,
{
typedef typename std::iterator_traits<InputIterator1>::value_type value_type;
int tile_size = 4;
int tile_size = 1024;
int count1 = detail::iterator_range_size(first1, last1);
int count2 = detail::iterator_range_size(first2, last2);
vector<uint_> tile_a((count1+count2+3)/tile_size+1, queue.get_context());
vector<uint_> tile_b((count1+count2+3)/tile_size+1, queue.get_context());
vector<uint_> tile_a((count1+count2+tile_size-1)/tile_size+1, queue.get_context());
vector<uint_> tile_b((count1+count2+tile_size-1)/tile_size+1, queue.get_context());
// Tile the sets
detail::tile_sets_kernel tiling_kernel;
tiling_kernel.tile_size = tile_size;
tiling_kernel.set_range(first1, last1, first2, last2,
tile_a.begin()+1, tile_b.begin()+1);
fill_n(tile_a.begin(), 1, 0, queue);
@@ -154,12 +154,12 @@ inline OutputIterator set_difference(InputIterator1 first1,
fill_n(tile_b.end()-1, 1, count2, queue);
vector<value_type> temp_result(count1+count2, queue.get_context());
vector<uint_> counts((count1+count2+3)/tile_size + 1, queue.get_context());
vector<uint_> counts((count1+count2+tile_size-1)/tile_size + 1, queue.get_context());
fill_n(counts.end()-1, 1, 0, queue);
// Find individual differences
detail::serial_set_difference_kernel difference_kernel;
difference_kernel.tile_size = tile_size;
difference_kernel.set_range(first1, first2, tile_a.begin(), tile_a.end(),
tile_b.begin(), temp_result.begin(), counts.begin());
@@ -169,7 +169,7 @@ inline OutputIterator set_difference(InputIterator1 first1,
// Compact the results
detail::compact_kernel compact_kernel;
compact_kernel.tile_size = tile_size;
compact_kernel.set_range(temp_result.begin(), counts.begin(), counts.end(), result);
compact_kernel.exec(queue);

View File

@@ -121,17 +121,17 @@ inline OutputIterator set_intersection(InputIterator1 first1,
{
typedef typename std::iterator_traits<InputIterator1>::value_type value_type;
int tile_size = 4;
int tile_size = 1024;
int count1 = detail::iterator_range_size(first1, last1);
int count2 = detail::iterator_range_size(first2, last2);
vector<uint_> tile_a((count1+count2+3)/tile_size+1, queue.get_context());
vector<uint_> tile_b((count1+count2+3)/tile_size+1, queue.get_context());
vector<uint_> tile_a((count1+count2+tile_size-1)/tile_size+1, queue.get_context());
vector<uint_> tile_b((count1+count2+tile_size-1)/tile_size+1, queue.get_context());
// Tile the sets
detail::tile_sets_kernel tiling_kernel;
tiling_kernel.tile_size = tile_size;
tiling_kernel.set_range(first1, last1, first2, last2,
tile_a.begin()+1, tile_b.begin()+1);
fill_n(tile_a.begin(), 1, 0, queue);
@@ -142,12 +142,12 @@ inline OutputIterator set_intersection(InputIterator1 first1,
fill_n(tile_b.end()-1, 1, count2, queue);
vector<value_type> temp_result(count1+count2, queue.get_context());
vector<uint_> counts((count1+count2+3)/tile_size + 1, queue.get_context());
vector<uint_> counts((count1+count2+tile_size-1)/tile_size + 1, queue.get_context());
fill_n(counts.end()-1, 1, 0, queue);
// Find individual intersections
detail::serial_set_intersection_kernel intersection_kernel;
intersection_kernel.tile_size = tile_size;
intersection_kernel.set_range(first1, first2, tile_a.begin(), tile_a.end(),
tile_b.begin(), temp_result.begin(), counts.begin());
@@ -157,7 +157,7 @@ inline OutputIterator set_intersection(InputIterator1 first1,
// Compact the results
detail::compact_kernel compact_kernel;
compact_kernel.tile_size = tile_size;
compact_kernel.set_range(temp_result.begin(), counts.begin(), counts.end(), result);
compact_kernel.exec(queue);

View File

@@ -144,17 +144,17 @@ inline OutputIterator set_symmetric_difference(InputIterator1 first1,
{
typedef typename std::iterator_traits<InputIterator1>::value_type value_type;
int tile_size = 4;
int tile_size = 1024;
int count1 = detail::iterator_range_size(first1, last1);
int count2 = detail::iterator_range_size(first2, last2);
vector<uint_> tile_a((count1+count2+3)/tile_size+1, queue.get_context());
vector<uint_> tile_b((count1+count2+3)/tile_size+1, queue.get_context());
vector<uint_> tile_a((count1+count2+tile_size-1)/tile_size+1, queue.get_context());
vector<uint_> tile_b((count1+count2+tile_size-1)/tile_size+1, queue.get_context());
// Tile the sets
detail::tile_sets_kernel tiling_kernel;
tiling_kernel.tile_size = tile_size;
tiling_kernel.set_range(first1, last1, first2, last2,
tile_a.begin()+1, tile_b.begin()+1);
fill_n(tile_a.begin(), 1, 0, queue);
@@ -165,12 +165,12 @@ inline OutputIterator set_symmetric_difference(InputIterator1 first1,
fill_n(tile_b.end()-1, 1, count2, queue);
vector<value_type> temp_result(count1+count2, queue.get_context());
vector<uint_> counts((count1+count2+3)/tile_size + 1, queue.get_context());
vector<uint_> counts((count1+count2+tile_size-1)/tile_size + 1, queue.get_context());
fill_n(counts.end()-1, 1, 0, queue);
// Find individual symmetric differences
detail::serial_set_symmetric_difference_kernel symmetric_difference_kernel;
symmetric_difference_kernel.tile_size = tile_size;
symmetric_difference_kernel.set_range(first1, first2, tile_a.begin(),
tile_a.end(), tile_b.begin(),
temp_result.begin(), counts.begin());
@@ -181,7 +181,7 @@ inline OutputIterator set_symmetric_difference(InputIterator1 first1,
// Compact the results
detail::compact_kernel compact_kernel;
compact_kernel.tile_size = tile_size;
compact_kernel.set_range(temp_result.begin(), counts.begin(), counts.end(), result);
compact_kernel.exec(queue);

View File

@@ -146,17 +146,17 @@ inline OutputIterator set_union(InputIterator1 first1,
{
typedef typename std::iterator_traits<InputIterator1>::value_type value_type;
int tile_size = 4;
int tile_size = 1024;
int count1 = detail::iterator_range_size(first1, last1);
int count2 = detail::iterator_range_size(first2, last2);
vector<uint_> tile_a((count1+count2+3)/tile_size+1, queue.get_context());
vector<uint_> tile_b((count1+count2+3)/tile_size+1, queue.get_context());
vector<uint_> tile_a((count1+count2+tile_size-1)/tile_size+1, queue.get_context());
vector<uint_> tile_b((count1+count2+tile_size-1)/tile_size+1, queue.get_context());
// Tile the sets
detail::tile_sets_kernel tiling_kernel;
tiling_kernel.tile_size = tile_size;
tiling_kernel.set_range(first1, last1, first2, last2,
tile_a.begin()+1, tile_b.begin()+1);
fill_n(tile_a.begin(), 1, 0, queue);
@@ -167,12 +167,12 @@ inline OutputIterator set_union(InputIterator1 first1,
fill_n(tile_b.end()-1, 1, count2, queue);
vector<value_type> temp_result(count1+count2, queue.get_context());
vector<uint_> counts((count1+count2+3)/tile_size + 1, queue.get_context());
vector<uint_> counts((count1+count2+tile_size-1)/tile_size + 1, queue.get_context());
fill_n(counts.end()-1, 1, 0, queue);
// Find individual unions
detail::serial_set_union_kernel union_kernel;
union_kernel.tile_size = tile_size;
union_kernel.set_range(first1, first2, tile_a.begin(), tile_a.end(),
tile_b.begin(), temp_result.begin(), counts.begin());
@@ -182,7 +182,7 @@ inline OutputIterator set_union(InputIterator1 first1,
// Compact the results
detail::compact_kernel compact_kernel;
compact_kernel.tile_size = tile_size;
compact_kernel.set_range(temp_result.begin(), counts.begin(), counts.end(), result);
compact_kernel.exec(queue);