diff --git a/include/boost/compute/algorithm/includes.hpp b/include/boost/compute/algorithm/includes.hpp index 7c51ddf0..eeaec72a 100644 --- a/include/boost/compute/algorithm/includes.hpp +++ b/include/boost/compute/algorithm/includes.hpp @@ -118,17 +118,17 @@ inline bool includes(InputIterator1 first1, { typedef typename std::iterator_traits::value_type value_type; - int tile_size = 4; + int tile_size = 1024; int count1 = detail::iterator_range_size(first1, last1); int count2 = detail::iterator_range_size(first2, last2); - vector tile_a((count1+count2+3)/tile_size+1, queue.get_context()); - vector tile_b((count1+count2+3)/tile_size+1, queue.get_context()); + vector tile_a((count1+count2+tile_size-1)/tile_size+1, queue.get_context()); + vector tile_b((count1+count2+tile_size-1)/tile_size+1, queue.get_context()); // Tile the sets detail::tile_sets_kernel tiling_kernel; - + tiling_kernel.tile_size = tile_size; tiling_kernel.set_range(first1, last1, first2, last2, tile_a.begin()+1, tile_b.begin()+1); fill_n(tile_a.begin(), 1, 0, queue); @@ -138,11 +138,11 @@ inline bool includes(InputIterator1 first1, fill_n(tile_a.end()-1, 1, count1, queue); fill_n(tile_b.end()-1, 1, count2, queue); - vector result((count1+count2+3)/tile_size, queue.get_context()); + vector result((count1+count2+tile_size-1)/tile_size, queue.get_context()); // Find individually detail::serial_includes_kernel includes_kernel; - + includes_kernel.tile_size = tile_size; includes_kernel.set_range(first1, first2, tile_a.begin(), tile_a.end(), tile_b.begin(), result.begin()); diff --git a/include/boost/compute/algorithm/set_difference.hpp b/include/boost/compute/algorithm/set_difference.hpp index 6a119233..bb2123a8 100644 --- a/include/boost/compute/algorithm/set_difference.hpp +++ b/include/boost/compute/algorithm/set_difference.hpp @@ -133,17 +133,17 @@ inline OutputIterator set_difference(InputIterator1 first1, { typedef typename std::iterator_traits::value_type value_type; - int tile_size = 4; + int tile_size = 1024; int count1 = detail::iterator_range_size(first1, last1); int count2 = detail::iterator_range_size(first2, last2); - vector tile_a((count1+count2+3)/tile_size+1, queue.get_context()); - vector tile_b((count1+count2+3)/tile_size+1, queue.get_context()); + vector tile_a((count1+count2+tile_size-1)/tile_size+1, queue.get_context()); + vector tile_b((count1+count2+tile_size-1)/tile_size+1, queue.get_context()); // Tile the sets detail::tile_sets_kernel tiling_kernel; - + tiling_kernel.tile_size = tile_size; tiling_kernel.set_range(first1, last1, first2, last2, tile_a.begin()+1, tile_b.begin()+1); fill_n(tile_a.begin(), 1, 0, queue); @@ -154,12 +154,12 @@ inline OutputIterator set_difference(InputIterator1 first1, fill_n(tile_b.end()-1, 1, count2, queue); vector temp_result(count1+count2, queue.get_context()); - vector counts((count1+count2+3)/tile_size + 1, queue.get_context()); + vector counts((count1+count2+tile_size-1)/tile_size + 1, queue.get_context()); fill_n(counts.end()-1, 1, 0, queue); // Find individual differences detail::serial_set_difference_kernel difference_kernel; - + difference_kernel.tile_size = tile_size; difference_kernel.set_range(first1, first2, tile_a.begin(), tile_a.end(), tile_b.begin(), temp_result.begin(), counts.begin()); @@ -169,7 +169,7 @@ inline OutputIterator set_difference(InputIterator1 first1, // Compact the results detail::compact_kernel compact_kernel; - + compact_kernel.tile_size = tile_size; compact_kernel.set_range(temp_result.begin(), counts.begin(), counts.end(), result); compact_kernel.exec(queue); diff --git a/include/boost/compute/algorithm/set_intersection.hpp b/include/boost/compute/algorithm/set_intersection.hpp index 1ab87925..822ec55c 100644 --- a/include/boost/compute/algorithm/set_intersection.hpp +++ b/include/boost/compute/algorithm/set_intersection.hpp @@ -121,17 +121,17 @@ inline OutputIterator set_intersection(InputIterator1 first1, { typedef typename std::iterator_traits::value_type value_type; - int tile_size = 4; + int tile_size = 1024; int count1 = detail::iterator_range_size(first1, last1); int count2 = detail::iterator_range_size(first2, last2); - vector tile_a((count1+count2+3)/tile_size+1, queue.get_context()); - vector tile_b((count1+count2+3)/tile_size+1, queue.get_context()); + vector tile_a((count1+count2+tile_size-1)/tile_size+1, queue.get_context()); + vector tile_b((count1+count2+tile_size-1)/tile_size+1, queue.get_context()); // Tile the sets detail::tile_sets_kernel tiling_kernel; - + tiling_kernel.tile_size = tile_size; tiling_kernel.set_range(first1, last1, first2, last2, tile_a.begin()+1, tile_b.begin()+1); fill_n(tile_a.begin(), 1, 0, queue); @@ -142,12 +142,12 @@ inline OutputIterator set_intersection(InputIterator1 first1, fill_n(tile_b.end()-1, 1, count2, queue); vector temp_result(count1+count2, queue.get_context()); - vector counts((count1+count2+3)/tile_size + 1, queue.get_context()); + vector counts((count1+count2+tile_size-1)/tile_size + 1, queue.get_context()); fill_n(counts.end()-1, 1, 0, queue); // Find individual intersections detail::serial_set_intersection_kernel intersection_kernel; - + intersection_kernel.tile_size = tile_size; intersection_kernel.set_range(first1, first2, tile_a.begin(), tile_a.end(), tile_b.begin(), temp_result.begin(), counts.begin()); @@ -157,7 +157,7 @@ inline OutputIterator set_intersection(InputIterator1 first1, // Compact the results detail::compact_kernel compact_kernel; - + compact_kernel.tile_size = tile_size; compact_kernel.set_range(temp_result.begin(), counts.begin(), counts.end(), result); compact_kernel.exec(queue); diff --git a/include/boost/compute/algorithm/set_symmetric_difference.hpp b/include/boost/compute/algorithm/set_symmetric_difference.hpp index 907082f6..76abdb8f 100644 --- a/include/boost/compute/algorithm/set_symmetric_difference.hpp +++ b/include/boost/compute/algorithm/set_symmetric_difference.hpp @@ -144,17 +144,17 @@ inline OutputIterator set_symmetric_difference(InputIterator1 first1, { typedef typename std::iterator_traits::value_type value_type; - int tile_size = 4; + int tile_size = 1024; int count1 = detail::iterator_range_size(first1, last1); int count2 = detail::iterator_range_size(first2, last2); - vector tile_a((count1+count2+3)/tile_size+1, queue.get_context()); - vector tile_b((count1+count2+3)/tile_size+1, queue.get_context()); + vector tile_a((count1+count2+tile_size-1)/tile_size+1, queue.get_context()); + vector tile_b((count1+count2+tile_size-1)/tile_size+1, queue.get_context()); // Tile the sets detail::tile_sets_kernel tiling_kernel; - + tiling_kernel.tile_size = tile_size; tiling_kernel.set_range(first1, last1, first2, last2, tile_a.begin()+1, tile_b.begin()+1); fill_n(tile_a.begin(), 1, 0, queue); @@ -165,12 +165,12 @@ inline OutputIterator set_symmetric_difference(InputIterator1 first1, fill_n(tile_b.end()-1, 1, count2, queue); vector temp_result(count1+count2, queue.get_context()); - vector counts((count1+count2+3)/tile_size + 1, queue.get_context()); + vector counts((count1+count2+tile_size-1)/tile_size + 1, queue.get_context()); fill_n(counts.end()-1, 1, 0, queue); // Find individual symmetric differences detail::serial_set_symmetric_difference_kernel symmetric_difference_kernel; - + symmetric_difference_kernel.tile_size = tile_size; symmetric_difference_kernel.set_range(first1, first2, tile_a.begin(), tile_a.end(), tile_b.begin(), temp_result.begin(), counts.begin()); @@ -181,7 +181,7 @@ inline OutputIterator set_symmetric_difference(InputIterator1 first1, // Compact the results detail::compact_kernel compact_kernel; - + compact_kernel.tile_size = tile_size; compact_kernel.set_range(temp_result.begin(), counts.begin(), counts.end(), result); compact_kernel.exec(queue); diff --git a/include/boost/compute/algorithm/set_union.hpp b/include/boost/compute/algorithm/set_union.hpp index 2e32c116..f45dbc38 100644 --- a/include/boost/compute/algorithm/set_union.hpp +++ b/include/boost/compute/algorithm/set_union.hpp @@ -146,17 +146,17 @@ inline OutputIterator set_union(InputIterator1 first1, { typedef typename std::iterator_traits::value_type value_type; - int tile_size = 4; + int tile_size = 1024; int count1 = detail::iterator_range_size(first1, last1); int count2 = detail::iterator_range_size(first2, last2); - vector tile_a((count1+count2+3)/tile_size+1, queue.get_context()); - vector tile_b((count1+count2+3)/tile_size+1, queue.get_context()); + vector tile_a((count1+count2+tile_size-1)/tile_size+1, queue.get_context()); + vector tile_b((count1+count2+tile_size-1)/tile_size+1, queue.get_context()); // Tile the sets detail::tile_sets_kernel tiling_kernel; - + tiling_kernel.tile_size = tile_size; tiling_kernel.set_range(first1, last1, first2, last2, tile_a.begin()+1, tile_b.begin()+1); fill_n(tile_a.begin(), 1, 0, queue); @@ -167,12 +167,12 @@ inline OutputIterator set_union(InputIterator1 first1, fill_n(tile_b.end()-1, 1, count2, queue); vector temp_result(count1+count2, queue.get_context()); - vector counts((count1+count2+3)/tile_size + 1, queue.get_context()); + vector counts((count1+count2+tile_size-1)/tile_size + 1, queue.get_context()); fill_n(counts.end()-1, 1, 0, queue); // Find individual unions detail::serial_set_union_kernel union_kernel; - + union_kernel.tile_size = tile_size; union_kernel.set_range(first1, first2, tile_a.begin(), tile_a.end(), tile_b.begin(), temp_result.begin(), counts.begin()); @@ -182,7 +182,7 @@ inline OutputIterator set_union(InputIterator1 first1, // Compact the results detail::compact_kernel compact_kernel; - + compact_kernel.tile_size = tile_size; compact_kernel.set_range(temp_result.begin(), counts.begin(), counts.end(), result); compact_kernel.exec(queue);