diff --git a/core/src/Kokkos_Parallel_Reduce.hpp b/core/src/Kokkos_Parallel_Reduce.hpp index 1ddd186ea01..3b89d184f2a 100644 --- a/core/src/Kokkos_Parallel_Reduce.hpp +++ b/core/src/Kokkos_Parallel_Reduce.hpp @@ -438,7 +438,12 @@ struct MinLoc { // Required KOKKOS_INLINE_FUNCTION void join(value_type& dest, const value_type& src) const { - if (src.val < dest.val) dest = src; + if (src.val < dest.val) + dest = src; + else if (src.val == dest.val && + dest.loc == reduction_identity::min()) { + dest.loc = src.loc; + } } KOKKOS_INLINE_FUNCTION @@ -493,7 +498,12 @@ struct MaxLoc { // Required KOKKOS_INLINE_FUNCTION void join(value_type& dest, const value_type& src) const { - if (src.val > dest.val) dest = src; + if (src.val > dest.val) + dest = src; + else if (src.val == dest.val && + dest.loc == reduction_identity::min()) { + dest.loc = src.loc; + } } KOKKOS_INLINE_FUNCTION @@ -620,10 +630,16 @@ struct MinMaxLoc { if (src.min_val < dest.min_val) { dest.min_val = src.min_val; dest.min_loc = src.min_loc; + } else if (dest.min_val == src.min_val && + dest.min_loc == reduction_identity::min()) { + dest.min_loc = src.min_loc; } if (src.max_val > dest.max_val) { dest.max_val = src.max_val; dest.max_loc = src.max_loc; + } else if (dest.max_val == src.max_val && + dest.max_loc == reduction_identity::min()) { + dest.max_loc = src.max_loc; } } diff --git a/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Reducer.hpp b/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Reducer.hpp index 413abd841ec..4308fb042a3 100644 --- a/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Reducer.hpp +++ b/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Reducer.hpp @@ -193,7 +193,12 @@ struct OpenMPTargetReducerWrapper> { // Required KOKKOS_INLINE_FUNCTION static void join(value_type& dest, const value_type& src) { - if (src.val < dest.val) dest = src; + if (src.val < dest.val) + dest = src; + else if (src.val == dest.val && + dest.loc == reduction_identity::min()) { + dest.loc = src.loc; + } } KOKKOS_INLINE_FUNCTION @@ -215,7 +220,12 @@ struct OpenMPTargetReducerWrapper> { KOKKOS_INLINE_FUNCTION static void join(value_type& dest, const value_type& src) { - if (src.val > dest.val) dest = src; + if (src.val > dest.val) + dest = src; + else if (src.val == dest.val && + dest.loc == reduction_identity::min()) { + dest.loc = src.loc; + } } KOKKOS_INLINE_FUNCTION @@ -268,10 +278,16 @@ struct OpenMPTargetReducerWrapper> { if (src.min_val < dest.min_val) { dest.min_val = src.min_val; dest.min_loc = src.min_loc; + } else if (dest.min_val == src.min_val && + dest.min_loc == reduction_identity::min()) { + dest.min_loc = src.min_loc; } if (src.max_val > dest.max_val) { dest.max_val = src.max_val; dest.max_loc = src.max_loc; + } else if (dest.max_val == src.max_val && + dest.max_loc == reduction_identity::min()) { + dest.max_loc = src.max_loc; } } diff --git a/core/unit_test/TestReducers.hpp b/core/unit_test/TestReducers.hpp index a65a5c95c22..b6633d02c4a 100644 --- a/core/unit_test/TestReducers.hpp +++ b/core/unit_test/TestReducers.hpp @@ -823,6 +823,38 @@ struct TestReducers { } } + static void test_minloc_loc_init(int N) { + using reducer_type = Kokkos::MinLoc; + using reducer_value_type = typename reducer_type::value_type; + + Kokkos::View values("Values", N); + auto h_values = Kokkos::create_mirror_view(values); + + for (int i = 0; i < N; ++i) { + h_values(i) = Kokkos::reduction_identity::min(); + } + Kokkos::deep_copy(values, h_values); + + reducer_value_type value_loc{0, -1}; + + Kokkos::parallel_reduce( + Kokkos::RangePolicy(0, N), + KOKKOS_LAMBDA(const int i, reducer_value_type& update) { + auto x = values(i); + if (i % 2 == 0) + return; + else if (x <= update.val) { + update.val = x; + update.loc = i; + } + }, + reducer_type(value_loc)); + + ASSERT_EQ(value_loc.val, h_values(0)); + ASSERT_GE(value_loc.loc, 0); + ASSERT_LT(value_loc.loc, N); + } + static void test_maxloc(int N) { using value_type = typename Kokkos::MaxLoc::value_type; @@ -924,6 +956,38 @@ struct TestReducers { } } + static void test_maxloc_loc_init(int N) { + using reducer_type = Kokkos::MaxLoc; + using reducer_value_type = typename reducer_type::value_type; + + Kokkos::View values("Values", N); + auto h_values = Kokkos::create_mirror_view(values); + + for (int i = 0; i < N; ++i) { + h_values(i) = Kokkos::reduction_identity::max(); + } + Kokkos::deep_copy(values, h_values); + + reducer_value_type value_loc{0, -1}; + + Kokkos::parallel_reduce( + Kokkos::RangePolicy(0, N), + KOKKOS_LAMBDA(const int i, reducer_value_type& update) { + auto x = values(i); + if (i % 2 == 0) + return; + else if (x >= update.val) { + update.val = x; + update.loc = i; + } + }, + reducer_type(value_loc)); + + ASSERT_EQ(value_loc.val, h_values(0)); + ASSERT_GE(value_loc.loc, 0); + ASSERT_LT(value_loc.loc, N); + } + static void test_minmaxloc(int N) { using value_type = typename Kokkos::MinMaxLoc::value_type; @@ -1112,6 +1176,188 @@ struct TestReducers { } } + static void test_minmaxloc_loc_init(int N) { + using reducer_type = Kokkos::MinMaxLoc; + using reducer_value_type = typename reducer_type::value_type; + + Kokkos::View values("Values", N); + auto h_values = Kokkos::create_mirror_view(values); + + auto functor = KOKKOS_LAMBDA(const int i, reducer_value_type& update) { + auto x = values(i); + if (i % 2 == 0) return; + if (x <= update.min_val) { + update.min_val = x; + update.min_loc = i; + } + if (x >= update.max_val) { + update.max_val = x; + update.max_loc = i; + } + }; + + { + for (int i = 0; i < N; ++i) { + h_values(i) = Kokkos::reduction_identity::min(); + } + Kokkos::deep_copy(values, h_values); + + reducer_value_type value_loc{0, 0, -1, -1}; + + Kokkos::parallel_reduce(Kokkos::RangePolicy(0, N), functor, + reducer_type(value_loc)); + + ASSERT_EQ(value_loc.min_val, h_values(0)); + ASSERT_EQ(value_loc.max_val, h_values(0)); + ASSERT_GE(value_loc.min_loc, 0); + ASSERT_LT(value_loc.min_loc, N); + ASSERT_GE(value_loc.max_loc, 0); + ASSERT_LT(value_loc.max_loc, N); + } + + { + for (int i = 0; i < N; ++i) { + h_values(i) = Kokkos::reduction_identity::max(); + } + Kokkos::deep_copy(values, h_values); + + reducer_value_type value_loc{0, 0, -1, -1}; + + Kokkos::parallel_reduce(Kokkos::RangePolicy(0, N), functor, + reducer_type(value_loc)); + + ASSERT_EQ(value_loc.min_val, h_values(0)); + ASSERT_EQ(value_loc.max_val, h_values(0)); + ASSERT_GE(value_loc.min_loc, 0); + ASSERT_LT(value_loc.min_loc, N); + ASSERT_GE(value_loc.max_loc, 0); + ASSERT_LT(value_loc.max_loc, N); + } + } + + static void test_minmaxfirstlastloc_loc_init(int N) { + using reducer_type = Kokkos::MinMaxFirstLastLoc; + using reducer_value_type = typename reducer_type::value_type; + + Kokkos::View values("Values", N); + auto h_values = Kokkos::create_mirror_view(values); + + auto functor = KOKKOS_LAMBDA(const int i, reducer_value_type& update) { + auto x = values(i); + if (i % 2 == 0) return; + if (x <= update.min_val) { + update.min_val = x; + update.min_loc = i; + } + if (x >= update.max_val) { + update.max_val = x; + update.max_loc = i; + } + }; + + { + for (int i = 0; i < N; ++i) { + h_values(i) = Kokkos::reduction_identity::min(); + } + Kokkos::deep_copy(values, h_values); + + reducer_value_type value_loc{0, 0, -1, -1}; + + Kokkos::parallel_reduce(Kokkos::RangePolicy(0, N), functor, + reducer_type(value_loc)); + + ASSERT_EQ(value_loc.min_val, h_values(0)); + ASSERT_EQ(value_loc.max_val, h_values(0)); + ASSERT_GE(value_loc.min_loc, 0); + ASSERT_LT(value_loc.min_loc, N); + ASSERT_GE(value_loc.max_loc, 0); + ASSERT_LT(value_loc.max_loc, N); + } + + { + for (int i = 0; i < N; ++i) { + h_values(i) = Kokkos::reduction_identity::max(); + } + Kokkos::deep_copy(values, h_values); + + reducer_value_type value_loc{0, 0, -1, -1}; + + Kokkos::parallel_reduce(Kokkos::RangePolicy(0, N), functor, + reducer_type(value_loc)); + + ASSERT_EQ(value_loc.min_val, h_values(0)); + ASSERT_EQ(value_loc.max_val, h_values(0)); + ASSERT_GE(value_loc.min_loc, 0); + ASSERT_LT(value_loc.min_loc, N); + ASSERT_GE(value_loc.max_loc, 0); + ASSERT_LT(value_loc.max_loc, N); + } + } + + static void test_minfirstloc_loc_init(int N) { + using reducer_type = Kokkos::MinFirstLoc; + using reducer_value_type = typename reducer_type::value_type; + + Kokkos::View values("Values", N); + auto h_values = Kokkos::create_mirror_view(values); + + for (int i = 0; i < N; ++i) { + h_values(i) = Kokkos::reduction_identity::min(); + } + Kokkos::deep_copy(values, h_values); + + reducer_value_type value_loc{0, -1}; + + Kokkos::parallel_reduce( + Kokkos::RangePolicy(0, N), + KOKKOS_LAMBDA(const int i, reducer_value_type& update) { + auto x = values(i); + if (i % 2 == 0) + return; + else if (x <= update.val) { + update.val = x; + update.loc = i; + } + }, + reducer_type(value_loc)); + + ASSERT_EQ(value_loc.val, h_values(0)); + ASSERT_GE(value_loc.loc, 0); + ASSERT_LT(value_loc.loc, N); + } + + static void test_maxfirstloc_loc_init(int N) { + using reducer_type = Kokkos::MaxFirstLoc; + using reducer_value_type = typename reducer_type::value_type; + + Kokkos::View values("Values", N); + auto h_values = Kokkos::create_mirror_view(values); + + for (int i = 0; i < N; ++i) { + h_values(i) = Kokkos::reduction_identity::max(); + } + Kokkos::deep_copy(values, h_values); + + reducer_value_type value_loc{0, -1}; + + Kokkos::parallel_reduce( + Kokkos::RangePolicy(0, N), + KOKKOS_LAMBDA(const int i, reducer_value_type& update) { + auto x = values(i); + if (i % 2 == 0) + return; + else if (x >= update.val) { + update.val = x; + update.loc = i; + } + }, + reducer_type(value_loc)); + + ASSERT_EQ(value_loc.val, h_values(0)); + ASSERT_GE(value_loc.loc, 0); + ASSERT_LT(value_loc.loc, N); + } + static void test_BAnd(int N) { Kokkos::View values("Values", N); auto h_values = Kokkos::create_mirror_view(values); @@ -1320,6 +1566,7 @@ struct TestReducers { #if !defined(KOKKOS_ENABLE_OPENACC) // FIXME_OPENACC - OpenACC (V3.3) does not support custom reductions. test_minloc(10003); + test_minloc_loc_init(3); // FIXME_OPENMPTARGET requires custom reductions. #if !defined(KOKKOS_ENABLE_OPENMPTARGET) test_minloc_2d(100); @@ -1329,6 +1576,7 @@ struct TestReducers { #if !defined(KOKKOS_ENABLE_OPENACC) // FIXME_OPENACC - OpenACC (V3.3) does not support custom reductions. test_maxloc(10007); + test_maxloc_loc_init(3); // FIXME_OPENMPTARGET requires custom reductions. #if !defined(KOKKOS_ENABLE_OPENMPTARGET) test_maxloc_2d(100); @@ -1348,7 +1596,12 @@ struct TestReducers { #endif #else test_minmaxloc(10007); + test_minmaxloc_loc_init(3); test_minmaxloc_2d(100); + + test_minmaxfirstlastloc_loc_init(3); + test_minfirstloc_loc_init(3); + test_maxfirstloc_loc_init(3); #endif #endif } @@ -1363,6 +1616,7 @@ struct TestReducers { #if !defined(KOKKOS_ENABLE_OPENACC) // FIXME_OPENACC - OpenACC (V3.3) does not support custom reductions. test_minloc(10003); + test_minloc_loc_init(3); #if defined(KOKKOS_ENABLE_CUDA) if (!std::is_same_v) #endif @@ -1375,6 +1629,7 @@ struct TestReducers { #if !defined(KOKKOS_ENABLE_OPENACC) // FIXME_OPENACC - OpenACC (V3.3) does not support custom reductions. test_maxloc(10007); + test_maxloc_loc_init(3); #if defined(KOKKOS_ENABLE_CUDA) if (!std::is_same_v) #endif @@ -1396,7 +1651,12 @@ struct TestReducers { #endif #else test_minmaxloc(10007); + test_minmaxloc_loc_init(3); test_minmaxloc_2d(100); + + test_minmaxfirstlastloc_loc_init(3); + test_minfirstloc_loc_init(3); + test_maxfirstloc_loc_init(3); #endif #endif test_BAnd(35);