Skip to content

Commit

Permalink
Merge pull request #1917 from e10harvey/gs_apply_non_blocking
Browse files Browse the repository at this point in the history
sparse/impl: Make PSGS non-blocking
  • Loading branch information
e10harvey authored Aug 9, 2023
2 parents c1a6ca9 + 890f5ff commit a0684e1
Show file tree
Hide file tree
Showing 2 changed files with 57 additions and 26 deletions.
26 changes: 20 additions & 6 deletions perf_test/sparse/KokkosSparse_gs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -219,14 +219,22 @@ void runGS(const GS_Parameters& params) {
KokkosSparse::Experimental::gauss_seidel_symbolic(
&kh, nrows, nrows, A.graph.row_map, A.graph.entries,
params.graph_symmetric);
double symbolicTime = timer.seconds();
std::cout << "\n*** Symbolic time: " << symbolicTime << '\n';
double symbolicLaunchTime = timer.seconds();
std::cout << "\n*** Symbolic launch time: " << symbolicLaunchTime << '\n';
timer.reset();
Kokkos::fence();
double symbolicComputeTime = timer.seconds();
std::cout << "\n*** Symbolic compute time: " << symbolicComputeTime << '\n';
timer.reset();
KokkosSparse::Experimental::gauss_seidel_numeric(
&kh, nrows, nrows, A.graph.row_map, A.graph.entries, A.values,
params.graph_symmetric);
double numericTime = timer.seconds();
std::cout << "\n*** Numeric time: " << numericTime << '\n';
double numericLaunchTime = timer.seconds();
std::cout << "\n*** Numeric launch time: " << numericLaunchTime << '\n';
timer.reset();
Kokkos::fence();
double numericComputeTime = timer.seconds();
std::cout << "\n*** Numeric compute time: " << numericComputeTime << '\n';
timer.reset();
// Last two parameters are damping factor (should be 1) and sweeps
switch (params.direction) {
Expand All @@ -246,8 +254,14 @@ void runGS(const GS_Parameters& params) {
true, true, 1.0, params.sweeps);
break;
}
double applyTime = timer.seconds();
std::cout << "\n*** Apply time: " << applyTime << '\n';

double applyLaunchTime = timer.seconds();
std::cout << "\n*** Apply launch time: " << applyLaunchTime << '\n';
timer.reset();
Kokkos::fence();
double applyComputeTime = timer.seconds();
std::cout << "\n*** Apply compute time: " << applyComputeTime << '\n';
timer.reset();
kh.destroy_gs_handle();
// Now, compute the 2-norm of residual
scalar_view_t res("Ax-b", nrows);
Expand Down
57 changes: 37 additions & 20 deletions sparse/impl/KokkosSparse_gauss_seidel_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1778,25 +1778,31 @@ class PointGaussSeidel {
if (block_size == 1) {
Kokkos::parallel_for(
labelRegular,
team_policy_t((numRegularRows + team_row_chunk_size - 1) /
team_row_chunk_size,
suggested_team_size, vector_size),
Kokkos::Experimental::require(
team_policy_t((numRegularRows + team_row_chunk_size - 1) /
team_row_chunk_size,
suggested_team_size, vector_size),
Kokkos::Experimental::WorkItemProperty::HintLightWeight),
gs);
} else if (gs.num_max_vals_in_l2 == 0) {
Kokkos::parallel_for(
labelBlock,
block_apply_team_policy_t(
(numRegularRows + team_row_chunk_size - 1) /
team_row_chunk_size,
suggested_team_size, vector_size),
Kokkos::Experimental::require(
block_apply_team_policy_t(
(numRegularRows + team_row_chunk_size - 1) /
team_row_chunk_size,
suggested_team_size, vector_size),
Kokkos::Experimental::WorkItemProperty::HintLightWeight),
gs);
} else {
Kokkos::parallel_for(
labelBigBlock,
bigblock_apply_team_policy_t(
(numRegularRows + team_row_chunk_size - 1) /
team_row_chunk_size,
suggested_team_size, vector_size),
Kokkos::Experimental::require(
bigblock_apply_team_policy_t(
(numRegularRows + team_row_chunk_size - 1) /
team_row_chunk_size,
suggested_team_size, vector_size),
Kokkos::Experimental::WorkItemProperty::HintLightWeight),
gs);
}
}
Expand All @@ -1818,12 +1824,16 @@ class PointGaussSeidel {
Kokkos::deep_copy(long_row_x, nnz_scalar_t());
Kokkos::parallel_for(
labelLong,
longrow_apply_team_policy_t(numLongRows * teams_per_row,
longRowTeamSize),
Kokkos::Experimental::require(
longrow_apply_team_policy_t(numLongRows * teams_per_row,
longRowTeamSize),
Kokkos::Experimental::WorkItemProperty::HintLightWeight),
gs);
Kokkos::parallel_for(
"KokkosSparse::GaussSeidel::LongRows::x_update",
range_pol(color_index_end - numLongRows, color_index_end),
Kokkos::Experimental::require(
range_pol(color_index_end - numLongRows, color_index_end),
Kokkos::Experimental::WorkItemProperty::HintLightWeight),
LongRowUpdateFunctor<decltype(Xcol), decltype(Ycol)>(
Xcol, Ycol, long_row_x, gs._permuted_inverse_diagonal,
gs.omega, color_index_end - numLongRows));
Expand Down Expand Up @@ -1874,7 +1884,9 @@ class PointGaussSeidel {
if (numRegularRows) {
Kokkos::parallel_for(
labelShort,
range_pol(color_index_begin, color_index_end - numLongRows),
Kokkos::Experimental::require(
range_pol(color_index_begin, color_index_end - numLongRows),
Kokkos::Experimental::WorkItemProperty::HintLightWeight),
gs);
}
if (numLongRows) {
Expand All @@ -1890,13 +1902,18 @@ class PointGaussSeidel {
Kokkos::subview(gs._Yvector, Kokkos::ALL(), long_row_col);
gs._long_row_col = long_row_col;
Kokkos::deep_copy(long_row_x, nnz_scalar_t());
Kokkos::parallel_for(labelLong,
Kokkos::RangePolicy<MyExecSpace, LongRowTag>(
0, numLongRows * par_per_row),
gs);
Kokkos::parallel_for(
labelLong,
Kokkos::Experimental::require(
Kokkos::RangePolicy<MyExecSpace, LongRowTag>(
0, numLongRows * par_per_row),
Kokkos::Experimental::WorkItemProperty::HintLightWeight),
gs);
Kokkos::parallel_for(
"KokkosSparse::GaussSeidel::LongRows::x_update",
range_pol(color_index_end - numLongRows, color_index_end),
Kokkos::Experimental::require(
range_pol(color_index_end - numLongRows, color_index_end),
Kokkos::Experimental::WorkItemProperty::HintLightWeight),
LongRowUpdateFunctor<decltype(Xcol), decltype(Ycol)>(
Xcol, Ycol, long_row_x, gs._permuted_inverse_diagonal,
gs.omega, color_index_end - numLongRows));
Expand Down

0 comments on commit a0684e1

Please sign in to comment.