From a6f258cf098fe8e9877c8e96ff63a2e99010748c Mon Sep 17 00:00:00 2001 From: Jakub Szuppe Date: Thu, 7 Jan 2016 19:17:36 +0100 Subject: [PATCH] Fix find_extrema_reduce kernel in find_extrema_with_reduce.hpp It was not guaranteed that every barrier would be encountered by all work-items in a work-group executing the kernel due to some work-items returing too early, not entering the conditional if with with a barrier or not executing every iteration of the loop with a barrier. See barrier function description in OpenCL documentation. --- .../detail/find_extrema_with_reduce.hpp | 22 ++++++++++--------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/include/boost/compute/algorithm/detail/find_extrema_with_reduce.hpp b/include/boost/compute/algorithm/detail/find_extrema_with_reduce.hpp index a157e248..1fbb7dee 100644 --- a/include/boost/compute/algorithm/detail/find_extrema_with_reduce.hpp +++ b/include/boost/compute/algorithm/detail/find_extrema_with_reduce.hpp @@ -103,22 +103,24 @@ inline void find_extrema_with_reduce(InputIterator input, k << // Work item global id k.decl("gid") << " = get_global_id(0);\n" << - "if(gid >= count) {\n return;\n}\n" << // Index of element that will be read from input buffer k.decl("idx") << " = gid;\n" << k.decl("acc") << ";\n" << - // Real index of currently best element - "#ifdef BOOST_COMPUTE_USE_INPUT_IDX\n" << - k.decl("acc_idx") << " = " << input_idx[k.var("idx")] << ";\n" << - "#else\n" << - k.decl("acc_idx") << " = idx;\n" << - "#endif\n" << + k.decl("acc_idx") << ";\n" << + "if(gid < count) {\n" << + // Real index of currently best element + "#ifdef BOOST_COMPUTE_USE_INPUT_IDX\n" << + k.var("acc_idx") << " = " << input_idx[k.var("idx")] << ";\n" << + "#else\n" << + k.var("acc_idx") << " = idx;\n" << + "#endif\n" << - // Init accumulator with first[get_global_id(0)] - "acc = " << input[k.var("idx")] << ";\n" << - "idx += get_global_size(0);\n" << + // Init accumulator with first[get_global_id(0)] + "acc = " << input[k.var("idx")] << ";\n" << + "idx += get_global_size(0);\n" << + "}\n" << k.decl("compare_result") << ";\n" << k.decl("equal") << ";\n\n" <<