From 4d1d2e1308a0f7787514654b0826b9a183df6650 Mon Sep 17 00:00:00 2001 From: Christian Trott Date: Tue, 10 Sep 2024 14:18:21 -0600 Subject: [PATCH 1/6] Fix join operator of [Min][Max]Loc with location tiebreaker If all values are actually the Max/Min value we may otherwise end up with invalid location. --- core/src/Kokkos_Parallel_Reduce.hpp | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/core/src/Kokkos_Parallel_Reduce.hpp b/core/src/Kokkos_Parallel_Reduce.hpp index 1ddd186ea01..3b89d184f2a 100644 --- a/core/src/Kokkos_Parallel_Reduce.hpp +++ b/core/src/Kokkos_Parallel_Reduce.hpp @@ -438,7 +438,12 @@ struct MinLoc { // Required KOKKOS_INLINE_FUNCTION void join(value_type& dest, const value_type& src) const { - if (src.val < dest.val) dest = src; + if (src.val < dest.val) + dest = src; + else if (src.val == dest.val && + dest.loc == reduction_identity::min()) { + dest.loc = src.loc; + } } KOKKOS_INLINE_FUNCTION @@ -493,7 +498,12 @@ struct MaxLoc { // Required KOKKOS_INLINE_FUNCTION void join(value_type& dest, const value_type& src) const { - if (src.val > dest.val) dest = src; + if (src.val > dest.val) + dest = src; + else if (src.val == dest.val && + dest.loc == reduction_identity::min()) { + dest.loc = src.loc; + } } KOKKOS_INLINE_FUNCTION @@ -620,10 +630,16 @@ struct MinMaxLoc { if (src.min_val < dest.min_val) { dest.min_val = src.min_val; dest.min_loc = src.min_loc; + } else if (dest.min_val == src.min_val && + dest.min_loc == reduction_identity::min()) { + dest.min_loc = src.min_loc; } if (src.max_val > dest.max_val) { dest.max_val = src.max_val; dest.max_loc = src.max_loc; + } else if (dest.max_val == src.max_val && + dest.max_loc == reduction_identity::min()) { + dest.max_loc = src.max_loc; } } From 6c279950e4e4acda1dc9d7f1996c720ea2c83aaa Mon Sep 17 00:00:00 2001 From: Dong Hun Lee Date: Wed, 11 Sep 2024 22:45:22 -0600 Subject: [PATCH 2/6] Added unit tests Co-Authored-By: Christian Trott --- core/unit_test/TestReducers.hpp | 118 ++++++++++++++++++++++++++++++++ 1 file changed, 118 insertions(+) diff --git a/core/unit_test/TestReducers.hpp b/core/unit_test/TestReducers.hpp index a65a5c95c22..92b32fc86a2 100644 --- a/core/unit_test/TestReducers.hpp +++ b/core/unit_test/TestReducers.hpp @@ -823,6 +823,37 @@ struct TestReducers { } } + static void test_minloc_loc_init(int N) { + using reducer_type = Kokkos::MinLoc; + using reducer_value_type = typename reducer_type::value_type; + + Kokkos::View values("Values", N); + auto h_values = Kokkos::create_mirror_view(values); + + for (int i = 0; i < N; ++i) { + h_values(i) = Kokkos::reduction_identity::min(); + } + Kokkos::deep_copy(values, h_values); + + reducer_value_type value_loc{0, -1}; + + Kokkos::parallel_reduce( + N, + KOKKOS_LAMBDA(const int i, reducer_value_type& update) { + auto x = values(i); + if (i % 2 == 0) + return; + else if (x <= update.val) { + update.val = x; + update.loc = i; + } + }, + reducer_type(value_loc)); + + ASSERT_EQ(value_loc.val, h_values(0)); + ASSERT_TRUE(value_loc.loc >= 0 && value_loc.loc < N); + } + static void test_maxloc(int N) { using value_type = typename Kokkos::MaxLoc::value_type; @@ -924,6 +955,37 @@ struct TestReducers { } } + static void test_maxloc_loc_init(int N) { + using reducer_type = Kokkos::MaxLoc; + using reducer_value_type = typename reducer_type::value_type; + + Kokkos::View values("Values", N); + auto h_values = Kokkos::create_mirror_view(values); + + for (int i = 0; i < N; ++i) { + h_values(i) = Kokkos::reduction_identity::max(); + } + Kokkos::deep_copy(values, h_values); + + reducer_value_type value_loc{0, -1}; + + Kokkos::parallel_reduce( + N, + KOKKOS_LAMBDA(const int i, reducer_value_type& update) { + auto x = values(i); + if (i % 2 == 0) + return; + else if (x >= update.val) { + update.val = x; + update.loc = i; + } + }, + reducer_type(value_loc)); + + ASSERT_EQ(value_loc.val, h_values(0)); + ASSERT_TRUE(value_loc.loc >= 0 && value_loc.loc < N); + } + static void test_minmaxloc(int N) { using value_type = typename Kokkos::MinMaxLoc::value_type; @@ -1112,6 +1174,59 @@ struct TestReducers { } } + static void test_minmaxloc_loc_init(int N) { + using reducer_type = Kokkos::MinMaxLoc; + using reducer_value_type = typename reducer_type::value_type; + + Kokkos::View values("Values", N); + auto h_values = Kokkos::create_mirror_view(values); + + auto functor = KOKKOS_LAMBDA(const int i, reducer_value_type& update) { + auto x = values(i); + if (i % 2 == 0) return; + if (x <= update.min_val) { + update.min_val = x; + update.min_loc = i; + } + if (x >= update.max_val) { + update.max_val = x; + update.max_loc = i; + } + }; + + { + for (int i = 0; i < N; ++i) { + h_values(i) = Kokkos::reduction_identity::min(); + } + Kokkos::deep_copy(values, h_values); + + reducer_value_type value_loc{0, 0, -1, -1}; + + Kokkos::parallel_reduce(N, functor, reducer_type(value_loc)); + + ASSERT_EQ(value_loc.min_val, h_values(0)); + ASSERT_EQ(value_loc.max_val, h_values(0)); + ASSERT_TRUE(value_loc.min_loc >= 0 && value_loc.min_loc < N); + ASSERT_TRUE(value_loc.max_loc >= 0 && value_loc.max_loc < N); + } + + { + for (int i = 0; i < N; ++i) { + h_values(i) = Kokkos::reduction_identity::max(); + } + Kokkos::deep_copy(values, h_values); + + reducer_value_type value_loc{0, 0, -1, -1}; + + Kokkos::parallel_reduce(N, functor, reducer_type(value_loc)); + + ASSERT_EQ(value_loc.min_val, h_values(0)); + ASSERT_EQ(value_loc.max_val, h_values(0)); + ASSERT_TRUE(value_loc.min_loc >= 0 && value_loc.min_loc < N); + ASSERT_TRUE(value_loc.max_loc >= 0 && value_loc.max_loc < N); + } + } + static void test_BAnd(int N) { Kokkos::View values("Values", N); auto h_values = Kokkos::create_mirror_view(values); @@ -1363,6 +1478,7 @@ struct TestReducers { #if !defined(KOKKOS_ENABLE_OPENACC) // FIXME_OPENACC - OpenACC (V3.3) does not support custom reductions. test_minloc(10003); + test_minloc_loc_init(101); #if defined(KOKKOS_ENABLE_CUDA) if (!std::is_same_v) #endif @@ -1375,6 +1491,7 @@ struct TestReducers { #if !defined(KOKKOS_ENABLE_OPENACC) // FIXME_OPENACC - OpenACC (V3.3) does not support custom reductions. test_maxloc(10007); + test_maxloc_loc_init(101); #if defined(KOKKOS_ENABLE_CUDA) if (!std::is_same_v) #endif @@ -1396,6 +1513,7 @@ struct TestReducers { #endif #else test_minmaxloc(10007); + test_minmaxloc_loc_init(101); test_minmaxloc_2d(100); #endif #endif From 8e33cbc662c0d03c2e584f43cda23ddd79fac4ce Mon Sep 17 00:00:00 2001 From: Dong Hun Lee Date: Thu, 12 Sep 2024 10:06:47 -0600 Subject: [PATCH 3/6] Applying suggestions from reviews --- core/unit_test/TestReducers.hpp | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/core/unit_test/TestReducers.hpp b/core/unit_test/TestReducers.hpp index 92b32fc86a2..53e9bab6122 100644 --- a/core/unit_test/TestReducers.hpp +++ b/core/unit_test/TestReducers.hpp @@ -851,7 +851,8 @@ struct TestReducers { reducer_type(value_loc)); ASSERT_EQ(value_loc.val, h_values(0)); - ASSERT_TRUE(value_loc.loc >= 0 && value_loc.loc < N); + ASSERT_GE(value_loc.loc, 0); + ASSERT_LT(value_loc.loc, N); } static void test_maxloc(int N) { @@ -983,7 +984,8 @@ struct TestReducers { reducer_type(value_loc)); ASSERT_EQ(value_loc.val, h_values(0)); - ASSERT_TRUE(value_loc.loc >= 0 && value_loc.loc < N); + ASSERT_GE(value_loc.loc, 0); + ASSERT_LT(value_loc.loc, N); } static void test_minmaxloc(int N) { @@ -1206,8 +1208,10 @@ struct TestReducers { ASSERT_EQ(value_loc.min_val, h_values(0)); ASSERT_EQ(value_loc.max_val, h_values(0)); - ASSERT_TRUE(value_loc.min_loc >= 0 && value_loc.min_loc < N); - ASSERT_TRUE(value_loc.max_loc >= 0 && value_loc.max_loc < N); + ASSERT_GE(value_loc.min_loc, 0); + ASSERT_LT(value_loc.min_loc, N); + ASSERT_GE(value_loc.max_loc, 0); + ASSERT_LT(value_loc.max_loc, N); } { @@ -1222,8 +1226,10 @@ struct TestReducers { ASSERT_EQ(value_loc.min_val, h_values(0)); ASSERT_EQ(value_loc.max_val, h_values(0)); - ASSERT_TRUE(value_loc.min_loc >= 0 && value_loc.min_loc < N); - ASSERT_TRUE(value_loc.max_loc >= 0 && value_loc.max_loc < N); + ASSERT_GE(value_loc.min_loc, 0); + ASSERT_LT(value_loc.min_loc, N); + ASSERT_GE(value_loc.max_loc, 0); + ASSERT_LT(value_loc.max_loc, N); } } @@ -1478,7 +1484,7 @@ struct TestReducers { #if !defined(KOKKOS_ENABLE_OPENACC) // FIXME_OPENACC - OpenACC (V3.3) does not support custom reductions. test_minloc(10003); - test_minloc_loc_init(101); + test_minloc_loc_init(3); #if defined(KOKKOS_ENABLE_CUDA) if (!std::is_same_v) #endif @@ -1491,7 +1497,7 @@ struct TestReducers { #if !defined(KOKKOS_ENABLE_OPENACC) // FIXME_OPENACC - OpenACC (V3.3) does not support custom reductions. test_maxloc(10007); - test_maxloc_loc_init(101); + test_maxloc_loc_init(3); #if defined(KOKKOS_ENABLE_CUDA) if (!std::is_same_v) #endif @@ -1513,7 +1519,7 @@ struct TestReducers { #endif #else test_minmaxloc(10007); - test_minmaxloc_loc_init(101); + test_minmaxloc_loc_init(3); test_minmaxloc_2d(100); #endif #endif From 27ca14d1c948ac2902b34cbb8d2c6a9493df661b Mon Sep 17 00:00:00 2001 From: Dong Hun Lee Date: Thu, 12 Sep 2024 18:43:09 -0600 Subject: [PATCH 4/6] Added unit tests for MinMaxFirstLastLoc, MinFirstLoc, MaxFirstLoc --- core/unit_test/TestReducers.hpp | 125 ++++++++++++++++++++++++++++++++ 1 file changed, 125 insertions(+) diff --git a/core/unit_test/TestReducers.hpp b/core/unit_test/TestReducers.hpp index 53e9bab6122..ade3484eb43 100644 --- a/core/unit_test/TestReducers.hpp +++ b/core/unit_test/TestReducers.hpp @@ -1233,6 +1233,127 @@ struct TestReducers { } } + static void test_minmaxfirstlastloc_loc_init(int N) { + using reducer_type = Kokkos::MinMaxFirstLastLoc; + using reducer_value_type = typename reducer_type::value_type; + + Kokkos::View values("Values", N); + auto h_values = Kokkos::create_mirror_view(values); + + auto functor = KOKKOS_LAMBDA(const int i, reducer_value_type& update) { + auto x = values(i); + if (i % 2 == 0) return; + if (x <= update.min_val) { + update.min_val = x; + update.min_loc = i; + } + if (x >= update.max_val) { + update.max_val = x; + update.max_loc = i; + } + }; + + { + for (int i = 0; i < N; ++i) { + h_values(i) = Kokkos::reduction_identity::min(); + } + Kokkos::deep_copy(values, h_values); + + reducer_value_type value_loc{0, 0, -1, -1}; + + Kokkos::parallel_reduce(N, functor, reducer_type(value_loc)); + + ASSERT_EQ(value_loc.min_val, h_values(0)); + ASSERT_EQ(value_loc.max_val, h_values(0)); + ASSERT_GE(value_loc.min_loc, 0); + ASSERT_LT(value_loc.min_loc, N); + ASSERT_GE(value_loc.max_loc, 0); + ASSERT_LT(value_loc.max_loc, N); + } + + { + for (int i = 0; i < N; ++i) { + h_values(i) = Kokkos::reduction_identity::max(); + } + Kokkos::deep_copy(values, h_values); + + reducer_value_type value_loc{0, 0, -1, -1}; + + Kokkos::parallel_reduce(N, functor, reducer_type(value_loc)); + + ASSERT_EQ(value_loc.min_val, h_values(0)); + ASSERT_EQ(value_loc.max_val, h_values(0)); + ASSERT_GE(value_loc.min_loc, 0); + ASSERT_LT(value_loc.min_loc, N); + ASSERT_GE(value_loc.max_loc, 0); + ASSERT_LT(value_loc.max_loc, N); + } + } + + static void test_minfirstloc_loc_init(int N) { + using reducer_type = Kokkos::MinFirstLoc; + using reducer_value_type = typename reducer_type::value_type; + + Kokkos::View values("Values", N); + auto h_values = Kokkos::create_mirror_view(values); + + for (int i = 0; i < N; ++i) { + h_values(i) = Kokkos::reduction_identity::min(); + } + Kokkos::deep_copy(values, h_values); + + reducer_value_type value_loc{0, -1}; + + Kokkos::parallel_reduce( + N, + KOKKOS_LAMBDA(const int i, reducer_value_type& update) { + auto x = values(i); + if (i % 2 == 0) + return; + else if (x <= update.val) { + update.val = x; + update.loc = i; + } + }, + reducer_type(value_loc)); + + ASSERT_EQ(value_loc.val, h_values(0)); + ASSERT_GE(value_loc.loc, 0); + ASSERT_LT(value_loc.loc, N); + } + + static void test_maxfirstloc_loc_init(int N) { + using reducer_type = Kokkos::MaxFirstLoc; + using reducer_value_type = typename reducer_type::value_type; + + Kokkos::View values("Values", N); + auto h_values = Kokkos::create_mirror_view(values); + + for (int i = 0; i < N; ++i) { + h_values(i) = Kokkos::reduction_identity::max(); + } + Kokkos::deep_copy(values, h_values); + + reducer_value_type value_loc{0, -1}; + + Kokkos::parallel_reduce( + N, + KOKKOS_LAMBDA(const int i, reducer_value_type& update) { + auto x = values(i); + if (i % 2 == 0) + return; + else if (x >= update.val) { + update.val = x; + update.loc = i; + } + }, + reducer_type(value_loc)); + + ASSERT_EQ(value_loc.val, h_values(0)); + ASSERT_GE(value_loc.loc, 0); + ASSERT_LT(value_loc.loc, N); + } + static void test_BAnd(int N) { Kokkos::View values("Values", N); auto h_values = Kokkos::create_mirror_view(values); @@ -1521,6 +1642,10 @@ struct TestReducers { test_minmaxloc(10007); test_minmaxloc_loc_init(3); test_minmaxloc_2d(100); + + test_minmaxfirstlastloc_loc_init(3); + test_minfirstloc_loc_init(3); + test_maxfirstloc_loc_init(3); #endif #endif test_BAnd(35); From 00bc17de8e00291945d384e02b39f1a187556bc6 Mon Sep 17 00:00:00 2001 From: Dong Hun Lee Date: Mon, 16 Sep 2024 09:27:58 -0600 Subject: [PATCH 5/6] Fix errors found in CI --- core/unit_test/TestReducers.hpp | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/core/unit_test/TestReducers.hpp b/core/unit_test/TestReducers.hpp index ade3484eb43..b6633d02c4a 100644 --- a/core/unit_test/TestReducers.hpp +++ b/core/unit_test/TestReducers.hpp @@ -838,7 +838,7 @@ struct TestReducers { reducer_value_type value_loc{0, -1}; Kokkos::parallel_reduce( - N, + Kokkos::RangePolicy(0, N), KOKKOS_LAMBDA(const int i, reducer_value_type& update) { auto x = values(i); if (i % 2 == 0) @@ -971,7 +971,7 @@ struct TestReducers { reducer_value_type value_loc{0, -1}; Kokkos::parallel_reduce( - N, + Kokkos::RangePolicy(0, N), KOKKOS_LAMBDA(const int i, reducer_value_type& update) { auto x = values(i); if (i % 2 == 0) @@ -1204,7 +1204,8 @@ struct TestReducers { reducer_value_type value_loc{0, 0, -1, -1}; - Kokkos::parallel_reduce(N, functor, reducer_type(value_loc)); + Kokkos::parallel_reduce(Kokkos::RangePolicy(0, N), functor, + reducer_type(value_loc)); ASSERT_EQ(value_loc.min_val, h_values(0)); ASSERT_EQ(value_loc.max_val, h_values(0)); @@ -1222,7 +1223,8 @@ struct TestReducers { reducer_value_type value_loc{0, 0, -1, -1}; - Kokkos::parallel_reduce(N, functor, reducer_type(value_loc)); + Kokkos::parallel_reduce(Kokkos::RangePolicy(0, N), functor, + reducer_type(value_loc)); ASSERT_EQ(value_loc.min_val, h_values(0)); ASSERT_EQ(value_loc.max_val, h_values(0)); @@ -1261,7 +1263,8 @@ struct TestReducers { reducer_value_type value_loc{0, 0, -1, -1}; - Kokkos::parallel_reduce(N, functor, reducer_type(value_loc)); + Kokkos::parallel_reduce(Kokkos::RangePolicy(0, N), functor, + reducer_type(value_loc)); ASSERT_EQ(value_loc.min_val, h_values(0)); ASSERT_EQ(value_loc.max_val, h_values(0)); @@ -1279,7 +1282,8 @@ struct TestReducers { reducer_value_type value_loc{0, 0, -1, -1}; - Kokkos::parallel_reduce(N, functor, reducer_type(value_loc)); + Kokkos::parallel_reduce(Kokkos::RangePolicy(0, N), functor, + reducer_type(value_loc)); ASSERT_EQ(value_loc.min_val, h_values(0)); ASSERT_EQ(value_loc.max_val, h_values(0)); @@ -1305,7 +1309,7 @@ struct TestReducers { reducer_value_type value_loc{0, -1}; Kokkos::parallel_reduce( - N, + Kokkos::RangePolicy(0, N), KOKKOS_LAMBDA(const int i, reducer_value_type& update) { auto x = values(i); if (i % 2 == 0) @@ -1337,7 +1341,7 @@ struct TestReducers { reducer_value_type value_loc{0, -1}; Kokkos::parallel_reduce( - N, + Kokkos::RangePolicy(0, N), KOKKOS_LAMBDA(const int i, reducer_value_type& update) { auto x = values(i); if (i % 2 == 0) @@ -1562,6 +1566,7 @@ struct TestReducers { #if !defined(KOKKOS_ENABLE_OPENACC) // FIXME_OPENACC - OpenACC (V3.3) does not support custom reductions. test_minloc(10003); + test_minloc_loc_init(3); // FIXME_OPENMPTARGET requires custom reductions. #if !defined(KOKKOS_ENABLE_OPENMPTARGET) test_minloc_2d(100); @@ -1571,6 +1576,7 @@ struct TestReducers { #if !defined(KOKKOS_ENABLE_OPENACC) // FIXME_OPENACC - OpenACC (V3.3) does not support custom reductions. test_maxloc(10007); + test_maxloc_loc_init(3); // FIXME_OPENMPTARGET requires custom reductions. #if !defined(KOKKOS_ENABLE_OPENMPTARGET) test_maxloc_2d(100); @@ -1590,7 +1596,12 @@ struct TestReducers { #endif #else test_minmaxloc(10007); + test_minmaxloc_loc_init(3); test_minmaxloc_2d(100); + + test_minmaxfirstlastloc_loc_init(3); + test_minfirstloc_loc_init(3); + test_maxfirstloc_loc_init(3); #endif #endif } From 70e6b5be7ee88ea696cd66ae2c06e24510c14a09 Mon Sep 17 00:00:00 2001 From: Daniel Arndt Date: Tue, 17 Sep 2024 17:52:22 +0000 Subject: [PATCH 6/6] Fix OpenMPTarget --- .../Kokkos_OpenMPTarget_Reducer.hpp | 40 ++++++++----------- 1 file changed, 17 insertions(+), 23 deletions(-) diff --git a/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Reducer.hpp b/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Reducer.hpp index 9b578aca112..8ab5f0f1a2c 100644 --- a/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Reducer.hpp +++ b/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Reducer.hpp @@ -236,12 +236,12 @@ struct OpenMPTargetReducerWrapper> { // Required KOKKOS_INLINE_FUNCTION static void join(value_type& dest, const value_type& src) { - if (src.val < dest.val) dest = src; - } - - KOKKOS_INLINE_FUNCTION - static void join(volatile value_type& dest, const volatile value_type& src) { - if (src.val < dest.val) dest = src; + if (src.val < dest.val) + dest = src; + else if (src.val == dest.val && + dest.loc == reduction_identity::min()) { + dest.loc = src.loc; + } } KOKKOS_INLINE_FUNCTION @@ -263,12 +263,12 @@ struct OpenMPTargetReducerWrapper> { KOKKOS_INLINE_FUNCTION static void join(value_type& dest, const value_type& src) { - if (src.val > dest.val) dest = src; - } - - KOKKOS_INLINE_FUNCTION - static void join(volatile value_type& dest, const volatile value_type& src) { - if (src.val > dest.val) dest = src; + if (src.val > dest.val) + dest = src; + else if (src.val == dest.val && + dest.loc == reduction_identity::min()) { + dest.loc = src.loc; + } } KOKKOS_INLINE_FUNCTION @@ -331,22 +331,16 @@ struct OpenMPTargetReducerWrapper> { if (src.min_val < dest.min_val) { dest.min_val = src.min_val; dest.min_loc = src.min_loc; - } - if (src.max_val > dest.max_val) { - dest.max_val = src.max_val; - dest.max_loc = src.max_loc; - } - } - - KOKKOS_INLINE_FUNCTION - static void join(volatile value_type& dest, const volatile value_type& src) { - if (src.min_val < dest.min_val) { - dest.min_val = src.min_val; + } else if (dest.min_val == src.min_val && + dest.min_loc == reduction_identity::min()) { dest.min_loc = src.min_loc; } if (src.max_val > dest.max_val) { dest.max_val = src.max_val; dest.max_loc = src.max_loc; + } else if (dest.max_val == src.max_val && + dest.max_loc == reduction_identity::min()) { + dest.max_loc = src.max_loc; } }