Skip to content

Commit

Permalink
Partially support data parallel for_loop
Browse files Browse the repository at this point in the history
  • Loading branch information
hkaiser committed Jan 27, 2025
1 parent 714b7c8 commit 93bdacc
Show file tree
Hide file tree
Showing 8 changed files with 247 additions and 16 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,31 @@ namespace hpx::parallel::util::detail {
}
};

template <typename I>
struct datapar_loop_step<I, std::enable_if_t<std::is_integral_v<I>>>
{
using V1 = traits::vector_pack_type_t<I, 1>;
using V = traits::vector_pack_type_t<I>;

template <typename F>
HPX_HOST_DEVICE HPX_FORCEINLINE static constexpr void call1(F&& f, I& i)
{
V1 tmp(i);
HPX_INVOKE(f, tmp);
++i;
}

template <typename F>
HPX_HOST_DEVICE HPX_FORCEINLINE static constexpr void callv(F&& f, I& i)
{
V tmp;
for (std::size_t e = 0; e != traits::size(tmp); ++e)
traits::set(tmp, e, static_cast<I>(i + e));
HPX_INVOKE(f, tmp);
i += traits::vector_pack_size_v<V>;
}
};

///////////////////////////////////////////////////////////////////////////
template <typename Iter, typename Enable = void>
struct datapar_loop_pred_step
Expand Down
77 changes: 67 additions & 10 deletions libs/core/algorithms/include/hpx/parallel/datapar/loop.hpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2007-2023 Hartmut Kaiser
// Copyright (c) 2007-2025 Hartmut Kaiser
//
// SPDX-License-Identifier: BSL-1.0
// Distributed under the Boost Software License, Version 1.0. (See accompanying
Expand All @@ -15,6 +15,7 @@
#include <hpx/execution/traits/vector_pack_load_store.hpp>
#include <hpx/execution/traits/vector_pack_type.hpp>
#include <hpx/executors/datapar/execution_policy.hpp>
#include <hpx/iterator_support/traits/is_iterator.hpp>
#include <hpx/parallel/datapar/iterator_helpers.hpp>
#include <hpx/parallel/util/loop.hpp>

Expand Down Expand Up @@ -238,8 +239,12 @@ namespace hpx::parallel::util {
};

///////////////////////////////////////////////////////////////////////
template <typename Iterator, typename Enable = void>
struct datapar_loop_n;

template <typename Iterator>
struct datapar_loop_n
struct datapar_loop_n<Iterator,
std::enable_if_t<hpx::traits::is_iterator_v<Iterator>>>
{
using iterator_type = std::decay_t<Iterator>;
using value_type =
Expand All @@ -258,8 +263,9 @@ namespace hpx::parallel::util {
{
std::size_t len = count;

// clang-format off
for (/* */; !detail::is_data_aligned(first) && len != 0;
--len)
--len)
{
datapar_loop_step<InIter>::call1(f, first);
}
Expand All @@ -268,16 +274,18 @@ namespace hpx::parallel::util {

for (auto len_v =
static_cast<std::int64_t>(len - (size + 1));
len_v > 0;
len_v -= static_cast<std::int64_t>(size), len -= size)
len_v > 0;
len_v -= static_cast<std::int64_t>(size), len -= size)
{
datapar_loop_step<InIter>::callv(f, first);
}
// clang-format on

for (/* */; len != 0; --len)
{
datapar_loop_step<InIter>::call1(f, first);
}

return first;
}
else
Expand All @@ -302,6 +310,51 @@ namespace hpx::parallel::util {
}
};

template <typename I>
struct datapar_loop_n<I, std::enable_if_t<std::is_integral_v<I>>>
{
using V = traits::vector_pack_type_t<I>;

template <typename Iter, typename F>
HPX_HOST_DEVICE HPX_FORCEINLINE static constexpr Iter call(
Iter first, std::size_t count, F&& f)
{
std::size_t len = count;
constexpr std::size_t size = traits::vector_pack_size_v<V>;

for (size_t i = first % size; i != 0 && len != 0; --i, --len)
{
datapar_loop_step<Iter>::call1(f, first);
}

// clang-format off
for (auto len_v = static_cast<std::int64_t>(len - (size + 1));
len_v > 0;
len_v -= static_cast<std::int64_t>(size), len -= size)
{
datapar_loop_step<Iter>::callv(f, first);
}
// clang-format on

for (/* */; len != 0; --len)
{
datapar_loop_step<Iter>::call1(f, first);
}
return first;
}

template <typename Iter, typename CancelToken, typename F>
HPX_HOST_DEVICE HPX_FORCEINLINE static constexpr Iter call(
Iter first, std::size_t count, CancelToken& tok, F&& f)
{
// check at the start of a partition only
if (tok.was_cancelled())
return first;

return call(first, count, HPX_FORWARD(F, f));
}
};

///////////////////////////////////////////////////////////////////////
template <typename Iterator>
struct datapar_loop_n_ind
Expand All @@ -323,8 +376,9 @@ namespace hpx::parallel::util {
{
std::size_t len = count;

// clang-format off
for (/* */; !detail::is_data_aligned(first) && len != 0;
--len)
--len)
{
datapar_loop_step_ind<InIter>::call1(f, first);
}
Expand All @@ -333,11 +387,12 @@ namespace hpx::parallel::util {

for (auto len_v =
static_cast<std::int64_t>(len - (size + 1));
len_v > 0;
len_v -= static_cast<std::int64_t>(size), len -= size)
len_v > 0;
len_v -= static_cast<std::int64_t>(size), len -= size)
{
datapar_loop_step_ind<InIter>::callv(f, first);
}
// clang-format on

for (/* */; len != 0; --len)
{
Expand Down Expand Up @@ -381,14 +436,16 @@ namespace hpx::parallel::util {

constexpr std::size_t size = traits::vector_pack_size_v<V>;

// clang-format off
for (auto len_v = static_cast<std::int64_t>(len - (size + 1));
len_v > 0;
len_v -= static_cast<std::int64_t>(size), len -= size)
len_v > 0;
len_v -= static_cast<std::int64_t>(size), len -= size)
{
datapar_loop_idx_step<Iter>::callv(f, it, base_idx);
std::advance(it, size);
base_idx += size;
}
// clang-format on

for (/* */; len != 0; --len)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ if(HPX_WITH_DATAPAR)
foreach_datapar
foreach_datapar_zipiter
foreachn_datapar
for_loop_datapar
generate_datapar
generaten_datapar
mismatch_binary_datapar
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
// Copyright (c) 2016-2025 Hartmut Kaiser
//
// SPDX-License-Identifier: BSL-1.0
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)

#include <hpx/algorithm.hpp>
#include <hpx/datapar.hpp>
#include <hpx/init.hpp>
#include <hpx/modules/testing.hpp>

#include <algorithm>
#include <cstddef>
#include <iostream>
#include <numeric>
#include <random>
#include <string>
#include <utility>
#include <vector>

///////////////////////////////////////////////////////////////////////////////
unsigned int seed = std::random_device{}();
std::mt19937 gen(seed);

///////////////////////////////////////////////////////////////////////////////
template <typename ExPolicy>
void test_for_loop_idx(ExPolicy&& policy)
{
static_assert(hpx::is_execution_policy_v<ExPolicy>,
"hpx::is_execution_policy_v<ExPolicy>");

std::vector<std::size_t> c(10007);
std::iota(std::begin(c), std::end(c), gen());

hpx::experimental::for_loop(
std::forward<ExPolicy>(policy), 0, int(c.size()), [&c](auto i) {
for (std::size_t e = 0; e < hpx::parallel::traits::size(i); ++e)
c[hpx::parallel::traits::get(i, e)] = 42;
});

// verify values
std::size_t count = 0;
std::for_each(std::begin(c), std::end(c), [&count](std::size_t v) -> void {
HPX_TEST_EQ(v, std::size_t(42));
++count;
});
HPX_TEST_EQ(count, c.size());
}

template <typename ExPolicy>
void test_for_loop_idx_async(ExPolicy&& p)
{
std::vector<std::size_t> c(10007);
std::iota(std::begin(c), std::end(c), gen());

auto f = hpx::experimental::for_loop(
std::forward<ExPolicy>(p), 0, int(c.size()), [&c](auto i) {
for (std::size_t e = 0; e < hpx::parallel::traits::size(i); ++e)
c[hpx::parallel::traits::get(i, e)] = 42;
});
f.wait();

// verify values
std::size_t count = 0;
std::for_each(std::begin(c), std::end(c), [&count](std::size_t v) -> void {
HPX_TEST_EQ(v, std::size_t(42));
++count;
});
HPX_TEST_EQ(count, c.size());
}

void for_loop_test_idx()
{
using namespace hpx::execution;

test_for_loop_idx(simd);
test_for_loop_idx(par_simd);

test_for_loop_idx_async(simd(task));
test_for_loop_idx_async(par_simd(task));
}

///////////////////////////////////////////////////////////////////////////////
int hpx_main(hpx::program_options::variables_map& vm)
{
if (vm.count("seed"))
seed = vm["seed"].as<unsigned int>();

std::cout << "using seed: " << seed << std::endl;
gen.seed(seed);

for_loop_test_idx();

return hpx::local::finalize();
}

int main(int argc, char* argv[])
{
// add command line option which controls the random number generator seed
using namespace hpx::program_options;
options_description desc_commandline(
"Usage: " HPX_APPLICATION_STRING " [options]");

desc_commandline.add_options()("seed,s", value<unsigned int>(),
"the random number generator seed to use for this run");

// By default this test should run on all available cores
std::vector<std::string> const cfg = {"hpx.os_threads=all"};

// Initialize and run HPX
hpx::local::init_params init_args;
init_args.desc_cmdline = desc_commandline;
init_args.cfg = cfg;

HPX_TEST_EQ_MSG(hpx::local::init(hpx_main, argc, argv, init_args), 0,
"HPX main exited with non-zero status");

return hpx::util::report_errors();
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,20 +9,27 @@
#include <hpx/config.hpp>

#if defined(HPX_HAVE_DATAPAR_EVE)
#include <hpx/concepts/concepts.hpp>
#include <hpx/execution/traits/vector_pack_alignment_size.hpp>

#include <cstddef>

namespace hpx::parallel::traits {

///////////////////////////////////////////////////////////////////////
template <typename Vector>
template <typename Vector,
HPX_CONCEPT_REQUIRES_(
is_vector_pack_v<Vector> || is_scalar_vector_pack_v<Vector>)>
HPX_HOST_DEVICE HPX_FORCEINLINE auto get(
Vector& vec, std::size_t index) noexcept
{
return vec.get(index);
}

///////////////////////////////////////////////////////////////////////
template <typename Vector, typename T>
template <typename Vector, typename T,
HPX_CONCEPT_REQUIRES_(
is_vector_pack_v<Vector> || is_scalar_vector_pack_v<Vector>)>
HPX_HOST_DEVICE HPX_FORCEINLINE auto set(
Vector& vec, std::size_t index, T val) noexcept
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,22 +10,28 @@

#if defined(HPX_HAVE_DATAPAR_EXPERIMENTAL_SIMD)

#include <hpx/concepts/concepts.hpp>
#include <hpx/execution/traits/detail/simd/vector_pack_simd.hpp>
#include <hpx/execution/traits/vector_pack_alignment_size.hpp>

#include <cstddef>

namespace hpx::parallel::traits {

///////////////////////////////////////////////////////////////////////
template <typename Vector>
template <typename Vector,
HPX_CONCEPT_REQUIRES_(
is_vector_pack_v<Vector> || is_scalar_vector_pack_v<Vector>)>
HPX_HOST_DEVICE HPX_FORCEINLINE auto get(
Vector& vec, std::size_t index) noexcept
{
return vec[index];
}

///////////////////////////////////////////////////////////////////////
template <typename Vector, typename T>
template <typename Vector, typename T,
HPX_CONCEPT_REQUIRES_(
is_vector_pack_v<Vector> || is_scalar_vector_pack_v<Vector>)>
HPX_HOST_DEVICE HPX_FORCEINLINE auto set(
Vector& vec, std::size_t index, T val) noexcept
{
Expand Down
Loading

0 comments on commit 93bdacc

Please sign in to comment.