2
0
mirror of https://github.com/boostorg/compute.git synced 2026-02-23 15:42:17 +00:00

Merge pull request #656 from Slonegg/master

transform_if_impl optimization: avoid reduction
This commit is contained in:
Kyle Lutz
2016-10-18 21:08:09 -07:00
committed by GitHub

View File

@@ -54,14 +54,12 @@ inline OutputIterator transform_if_impl(InputIterator first,
<< predicate(first[k1.get_global_id(0)]) << " ? 1 : 0;\n";
k1.exec_1d(queue, 0, count);
// count number of elements to be copied
size_t copied_element_count =
::boost::compute::count(indices.begin(), indices.end(), 1, queue);
// scan indices
size_t copied_element_count = (indices.cend() - 1).read(queue);
::boost::compute::exclusive_scan(
indices.begin(), indices.end(), indices.begin(), queue
);
copied_element_count += (indices.cend() - 1).read(queue); // last scan element plus last mask element
// copy values
::boost::compute::detail::meta_kernel k2("transform_if_do_copy");