From 5d4368859d02d23097bc16e932f1a66090ea7dd8 Mon Sep 17 00:00:00 2001 From: Hartmut Kaiser Date: Sun, 26 Jan 2025 15:33:13 -0600 Subject: [PATCH] Partially support data parallel for_loop --- .../hpx/parallel/datapar/iterator_helpers.hpp | 23 ++++ .../include/hpx/parallel/datapar/loop.hpp | 77 ++++++++++-- .../unit/datapar_algorithms/CMakeLists.txt | 1 + .../datapar_algorithms/for_loop_datapar.cpp | 119 ++++++++++++++++++ 4 files changed, 210 insertions(+), 10 deletions(-) create mode 100644 libs/core/algorithms/tests/unit/datapar_algorithms/for_loop_datapar.cpp diff --git a/libs/core/algorithms/include/hpx/parallel/datapar/iterator_helpers.hpp b/libs/core/algorithms/include/hpx/parallel/datapar/iterator_helpers.hpp index fc9c73e02ed5..05eca20dd2ab 100644 --- a/libs/core/algorithms/include/hpx/parallel/datapar/iterator_helpers.hpp +++ b/libs/core/algorithms/include/hpx/parallel/datapar/iterator_helpers.hpp @@ -133,6 +133,29 @@ namespace hpx::parallel::util::detail { } }; + template + struct datapar_loop_step>> + { + using V1 = traits::vector_pack_type_t; + using V = traits::vector_pack_type_t; + + template + HPX_HOST_DEVICE HPX_FORCEINLINE static constexpr void call1(F&& f, I& i) + { + V1 tmp(i); + HPX_INVOKE(f, tmp); + ++i; + } + + template + HPX_HOST_DEVICE HPX_FORCEINLINE static constexpr void callv(F&& f, I& i) + { + V tmp([i](auto e) { return static_cast(i + e); }); + HPX_INVOKE(f, tmp); + i += traits::vector_pack_size_v; + } + }; + /////////////////////////////////////////////////////////////////////////// template struct datapar_loop_pred_step diff --git a/libs/core/algorithms/include/hpx/parallel/datapar/loop.hpp b/libs/core/algorithms/include/hpx/parallel/datapar/loop.hpp index 3fb7af885c25..98c93b2a845a 100644 --- a/libs/core/algorithms/include/hpx/parallel/datapar/loop.hpp +++ b/libs/core/algorithms/include/hpx/parallel/datapar/loop.hpp @@ -1,4 +1,4 @@ -// Copyright (c) 2007-2023 Hartmut Kaiser +// Copyright (c) 2007-2025 Hartmut Kaiser // // SPDX-License-Identifier: BSL-1.0 // Distributed under the Boost Software License, Version 1.0. (See accompanying @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -238,8 +239,12 @@ namespace hpx::parallel::util { }; /////////////////////////////////////////////////////////////////////// + template + struct datapar_loop_n; + template - struct datapar_loop_n + struct datapar_loop_n>> { using iterator_type = std::decay_t; using value_type = @@ -258,8 +263,9 @@ namespace hpx::parallel::util { { std::size_t len = count; + // clang-format off for (/* */; !detail::is_data_aligned(first) && len != 0; - --len) + --len) { datapar_loop_step::call1(f, first); } @@ -268,16 +274,18 @@ namespace hpx::parallel::util { for (auto len_v = static_cast(len - (size + 1)); - len_v > 0; - len_v -= static_cast(size), len -= size) + len_v > 0; + len_v -= static_cast(size), len -= size) { datapar_loop_step::callv(f, first); } + // clang-format on for (/* */; len != 0; --len) { datapar_loop_step::call1(f, first); } + return first; } else @@ -302,6 +310,51 @@ namespace hpx::parallel::util { } }; + template + struct datapar_loop_n>> + { + using V = traits::vector_pack_type_t; + + template + HPX_HOST_DEVICE HPX_FORCEINLINE static constexpr Iter call( + Iter first, std::size_t count, F&& f) + { + std::size_t len = count; + constexpr std::size_t size = traits::vector_pack_size_v; + + for (size_t i = first % size; i != 0 && len != 0; --i, --len) + { + datapar_loop_step::call1(f, first); + } + + // clang-format off + for (auto len_v = static_cast(len - (size + 1)); + len_v > 0; + len_v -= static_cast(size), len -= size) + { + datapar_loop_step::callv(f, first); + } + // clang-format on + + for (/* */; len != 0; --len) + { + datapar_loop_step::call1(f, first); + } + return first; + } + + template + HPX_HOST_DEVICE HPX_FORCEINLINE static constexpr Iter call( + Iter first, std::size_t count, CancelToken& tok, F&& f) + { + // check at the start of a partition only + if (tok.was_cancelled()) + return first; + + return call(first, count, HPX_FORWARD(F, f)); + } + }; + /////////////////////////////////////////////////////////////////////// template struct datapar_loop_n_ind @@ -323,8 +376,9 @@ namespace hpx::parallel::util { { std::size_t len = count; + // clang-format off for (/* */; !detail::is_data_aligned(first) && len != 0; - --len) + --len) { datapar_loop_step_ind::call1(f, first); } @@ -333,11 +387,12 @@ namespace hpx::parallel::util { for (auto len_v = static_cast(len - (size + 1)); - len_v > 0; - len_v -= static_cast(size), len -= size) + len_v > 0; + len_v -= static_cast(size), len -= size) { datapar_loop_step_ind::callv(f, first); } + // clang-format on for (/* */; len != 0; --len) { @@ -381,14 +436,16 @@ namespace hpx::parallel::util { constexpr std::size_t size = traits::vector_pack_size_v; + // clang-format off for (auto len_v = static_cast(len - (size + 1)); - len_v > 0; - len_v -= static_cast(size), len -= size) + len_v > 0; + len_v -= static_cast(size), len -= size) { datapar_loop_idx_step::callv(f, it, base_idx); std::advance(it, size); base_idx += size; } + // clang-format on for (/* */; len != 0; --len) { diff --git a/libs/core/algorithms/tests/unit/datapar_algorithms/CMakeLists.txt b/libs/core/algorithms/tests/unit/datapar_algorithms/CMakeLists.txt index cd15949a449c..b17436167fcc 100644 --- a/libs/core/algorithms/tests/unit/datapar_algorithms/CMakeLists.txt +++ b/libs/core/algorithms/tests/unit/datapar_algorithms/CMakeLists.txt @@ -29,6 +29,7 @@ if(HPX_WITH_DATAPAR) foreach_datapar foreach_datapar_zipiter foreachn_datapar + for_loop_datapar generate_datapar generaten_datapar mismatch_binary_datapar diff --git a/libs/core/algorithms/tests/unit/datapar_algorithms/for_loop_datapar.cpp b/libs/core/algorithms/tests/unit/datapar_algorithms/for_loop_datapar.cpp new file mode 100644 index 000000000000..ca7e7ab79400 --- /dev/null +++ b/libs/core/algorithms/tests/unit/datapar_algorithms/for_loop_datapar.cpp @@ -0,0 +1,119 @@ +// Copyright (c) 2016-2025 Hartmut Kaiser +// +// SPDX-License-Identifier: BSL-1.0 +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +/////////////////////////////////////////////////////////////////////////////// +unsigned int seed = std::random_device{}(); +std::mt19937 gen(seed); + +/////////////////////////////////////////////////////////////////////////////// +template +void test_for_loop_idx(ExPolicy&& policy) +{ + static_assert(hpx::is_execution_policy_v, + "hpx::is_execution_policy_v"); + + std::vector c(10007); + std::iota(std::begin(c), std::end(c), gen()); + + hpx::experimental::for_loop( + std::forward(policy), 0, int(c.size()), [&c](auto i) { + for (int e = 0; e < i.size(); ++e) + c[hpx::parallel::traits::get(i, e)] = 42; + }); + + // verify values + std::size_t count = 0; + std::for_each(std::begin(c), std::end(c), [&count](std::size_t v) -> void { + HPX_TEST_EQ(v, std::size_t(42)); + ++count; + }); + HPX_TEST_EQ(count, c.size()); +} + +template +void test_for_loop_idx_async(ExPolicy&& p) +{ + std::vector c(10007); + std::iota(std::begin(c), std::end(c), gen()); + + auto f = hpx::experimental::for_loop( + std::forward(p), 0, int(c.size()), [&c](auto i) { + for (int e = 0; e < i.size(); ++e) + c[hpx::parallel::traits::get(i, e)] = 42; + }); + f.wait(); + + // verify values + std::size_t count = 0; + std::for_each(std::begin(c), std::end(c), [&count](std::size_t v) -> void { + HPX_TEST_EQ(v, std::size_t(42)); + ++count; + }); + HPX_TEST_EQ(count, c.size()); +} + +void for_loop_test_idx() +{ + using namespace hpx::execution; + + test_for_loop_idx(simd); + test_for_loop_idx(par_simd); + + test_for_loop_idx_async(simd(task)); + test_for_loop_idx_async(par_simd(task)); +} + +/////////////////////////////////////////////////////////////////////////////// +int hpx_main(hpx::program_options::variables_map& vm) +{ + if (vm.count("seed")) + seed = vm["seed"].as(); + + std::cout << "using seed: " << seed << std::endl; + gen.seed(seed); + + for_loop_test_idx(); + + return hpx::local::finalize(); +} + +int main(int argc, char* argv[]) +{ + // add command line option which controls the random number generator seed + using namespace hpx::program_options; + options_description desc_commandline( + "Usage: " HPX_APPLICATION_STRING " [options]"); + + desc_commandline.add_options()("seed,s", value(), + "the random number generator seed to use for this run"); + + // By default this test should run on all available cores + std::vector const cfg = {"hpx.os_threads=all"}; + + // Initialize and run HPX + hpx::local::init_params init_args; + init_args.desc_cmdline = desc_commandline; + init_args.cfg = cfg; + + HPX_TEST_EQ_MSG(hpx::local::init(hpx_main, argc, argv, init_args), 0, + "HPX main exited with non-zero status"); + + return hpx::util::report_errors(); +}