2
0
mirror of https://github.com/boostorg/compute.git synced 2026-02-18 14:02:13 +00:00

Fix find_extrema_reduce kernel in find_extrema_with_reduce.hpp

It was not guaranteed that every barrier would be encountered by all
work-items in a work-group executing the kernel due to some work-items
returing too early, not entering the conditional if with with a barrier
or not executing every iteration of the loop with a barrier.

See barrier function description in OpenCL documentation.
This commit is contained in:
Jakub Szuppe
2016-01-07 19:17:36 +01:00
parent d142a36e98
commit a6f258cf09

View File

@@ -103,22 +103,24 @@ inline void find_extrema_with_reduce(InputIterator input,
k <<
// Work item global id
k.decl<const uint_>("gid") << " = get_global_id(0);\n" <<
"if(gid >= count) {\n return;\n}\n" <<
// Index of element that will be read from input buffer
k.decl<uint_>("idx") << " = gid;\n" <<
k.decl<input_type>("acc") << ";\n" <<
// Real index of currently best element
"#ifdef BOOST_COMPUTE_USE_INPUT_IDX\n" <<
k.decl<input_type>("acc_idx") << " = " << input_idx[k.var<uint_>("idx")] << ";\n" <<
"#else\n" <<
k.decl<uint_>("acc_idx") << " = idx;\n" <<
"#endif\n" <<
k.decl<uint_>("acc_idx") << ";\n" <<
"if(gid < count) {\n" <<
// Real index of currently best element
"#ifdef BOOST_COMPUTE_USE_INPUT_IDX\n" <<
k.var<uint_>("acc_idx") << " = " << input_idx[k.var<uint_>("idx")] << ";\n" <<
"#else\n" <<
k.var<uint_>("acc_idx") << " = idx;\n" <<
"#endif\n" <<
// Init accumulator with first[get_global_id(0)]
"acc = " << input[k.var<uint_>("idx")] << ";\n" <<
"idx += get_global_size(0);\n" <<
// Init accumulator with first[get_global_id(0)]
"acc = " << input[k.var<uint_>("idx")] << ";\n" <<
"idx += get_global_size(0);\n" <<
"}\n" <<
k.decl<bool>("compare_result") << ";\n" <<
k.decl<bool>("equal") << ";\n\n" <<