Skip to content

Commit

Permalink
Merge Pull Request #12259 from trilinos/Trilinos/csiefer-f1ae552
Browse files Browse the repository at this point in the history
Automatically Merged using Trilinos Pull Request AutoTester
PR Title: b'Tpetra: Adding labels to Kokkos::fence(), fixing bug'
PR Author: csiefer2
  • Loading branch information
trilinos-autotester authored Sep 14, 2023
2 parents 91621c3 + f1ae552 commit 3a0be59
Show file tree
Hide file tree
Showing 9 changed files with 14 additions and 14 deletions.
2 changes: 1 addition & 1 deletion packages/tpetra/core/src/Tpetra_CrsGraph_decl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2512,7 +2512,7 @@ namespace Tpetra {
//! Fence if necessary and set flag so we don't duplicate.
void execute_sync_host_uvm_access() const {
if(need_sync_host_uvm_access) {
Kokkos::fence();
Kokkos::fence("CrsGraph::execute_sync_host_uvm_access");
need_sync_host_uvm_access = false;
}
}
Expand Down
2 changes: 1 addition & 1 deletion packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1298,7 +1298,7 @@ namespace Tpetra {
row_ent_type numRowEnt (ViewAllocateWithoutInitializing (label), numRows);
// DEEP_COPY REVIEW - VALUE-TO-HOSTMIRROR
Kokkos::deep_copy (execution_space(), numRowEnt, static_cast<size_t> (0)); // fill w/ 0s
Kokkos::fence(); // TODO: Need to understand downstream failure points and move this fence.
Kokkos::fence("CrsGraph::allocateIndices"); // TODO: Need to understand downstream failure points and move this fence.
this->k_numRowEntries_ = numRowEnt; // "commit" our allocation
}

Expand Down
4 changes: 2 additions & 2 deletions packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3435,7 +3435,7 @@ CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
Kokkos::deep_copy (execution_space(), valuesUnpacked_wdv.getDeviceView(Access::OverwriteAll),
theAlpha);
// CAG: This fence was found to be required on Cuda with UVM=on.
Kokkos::fence();
Kokkos::fence("CrsMatrix::setAllToScalar");
}
}

Expand Down Expand Up @@ -4909,7 +4909,7 @@ CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
RCP<const MV> X;

// some parameters for below
const bool Y_is_replicated = ! Y_in.isDistributed ();
const bool Y_is_replicated = (! Y_in.isDistributed () && this->getComm ()->getSize () != 1);
const bool Y_is_overwritten = (beta == ZERO);
if (Y_is_replicated && this->getComm ()->getRank () > 0) {
beta = ZERO;
Expand Down
4 changes: 2 additions & 2 deletions packages/tpetra/core/src/Tpetra_Details_WrappedDualView.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,7 @@ class WrappedDualView {
}
if(needsSyncPath()) {
throwIfDeviceViewAlive();
if (deviceMemoryIsHostAccessible) Kokkos::fence();
if (deviceMemoryIsHostAccessible) Kokkos::fence("WrappedDualView::getHostView");
dualView.clear_sync_state();
dualView.modify_host();
}
Expand Down Expand Up @@ -327,7 +327,7 @@ class WrappedDualView {
}
if(needsSyncPath()) {
throwIfHostViewAlive();
if (deviceMemoryIsHostAccessible) Kokkos::fence();
if (deviceMemoryIsHostAccessible) Kokkos::fence("WrappedDualView::getDeviceView");
dualView.clear_sync_state();
dualView.modify_device();
}
Expand Down
2 changes: 1 addition & 1 deletion packages/tpetra/core/src/Tpetra_Details_makeColMap_def.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -641,7 +641,7 @@ makeColMap (Teuchos::RCP<const Tpetra::Map<LO, GO, NT>>& colMap,
// DEEP_COPY REVIEW - DEVICE-TO-HOSTMIRROR
Kokkos::deep_copy(exec_space(), remotesHost, remoteGIDView);
// CAG: This fence was found to be required on Cuda with UVM=on.
Kokkos::fence();
Kokkos::fence("Tpetra::makeColMap");
//Finally, populate the STL structures which hold the index lists
std::set<GO> RemoteGIDSet;
std::vector<GO> RemoteGIDUnorderedVector;
Expand Down
4 changes: 2 additions & 2 deletions packages/tpetra/core/src/Tpetra_Details_residual.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -549,7 +549,7 @@ void localResidualWithCommCompOverlap(const CrsMatrix<SC,LO,GO,NO> & A,
RCP<const import_type> importer = A.getGraph ()->getImporter ();
X_colmap.endImport (X_domainmap, *importer, INSERT, true);

Kokkos::fence();
Kokkos::fence("Tpetra::localResidualWithCommCompOverlap-1");

using functor_type2 = OffRankUpdateFunctor<local_matrix_device_type,local_view_device_type,const_local_view_device_type,offset_type,false>;
functor_type2 func2 (A_lcl, X_colmap_lcl, R_lcl, rows_per_team, offsets);
Expand All @@ -565,7 +565,7 @@ void localResidualWithCommCompOverlap(const CrsMatrix<SC,LO,GO,NO> & A,
RCP<const import_type> importer = A.getGraph ()->getImporter ();
X_colmap.endImport (X_domainmap, *importer, INSERT, true);

Kokkos::fence();
Kokkos::fence("Tpetra::localResidualWithCommCompOverlap-2");

using functor_type2 = OffRankUpdateFunctor<local_matrix_device_type,local_view_device_type,const_local_view_device_type,offset_type,true>;
functor_type2 func2 (A_lcl, X_colmap_lcl, R_lcl, rows_per_team, offsets);
Expand Down
4 changes: 2 additions & 2 deletions packages/tpetra/core/src/Tpetra_DistObject_def.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1491,7 +1491,7 @@ namespace Tpetra {
numImportPacketsPerLID_av);
}
else { // pack on device
Kokkos::fence(); // for UVM
Kokkos::fence("DistObject::doPosts-1"); // for UVM
this->imports_.modify_device ();
distributorActor_.doPosts
(distributorPlan,
Expand Down Expand Up @@ -1536,7 +1536,7 @@ namespace Tpetra {
this->imports_.view_host ());
}
else { // pack on device
Kokkos::fence(); // for UVM
Kokkos::fence("DistObject::doPosts-2"); // for UVM
this->imports_.modify_device ();
distributorActor_.doPosts
(distributorPlan,
Expand Down
4 changes: 2 additions & 2 deletions packages/tpetra/core/src/Tpetra_Map_def.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -699,7 +699,7 @@ namespace Tpetra {
nonContigGids_host.size ());
// DEEP_COPY REVIEW - HOST-TO-DEVICE
Kokkos::deep_copy (execution_space(), nonContigGids, nonContigGids_host);
Kokkos::fence(); // for UVM issues below - which will be refatored soon so FixedHashTable can build as pure CudaSpace - then I think remove this fence
Kokkos::fence("Map::initWithNonownedHostIndexList"); // for UVM issues below - which will be refatored soon so FixedHashTable can build as pure CudaSpace - then I think remove this fence

glMap_ = global_to_local_table_type(nonContigGids,
firstContiguousGID_,
Expand Down Expand Up @@ -1036,7 +1036,7 @@ namespace Tpetra {
entryList.extent(0));
// DEEP_COPY REVIEW - DEVICE-TO-HOST
Kokkos::deep_copy (execution_space(), entryList_host, entryList);
Kokkos::fence(); // UVM follows
Kokkos::fence("Map::Map"); // UVM follows
firstContiguousGID_ = entryList_host[0];
lastContiguousGID_ = firstContiguousGID_+1;

Expand Down
2 changes: 1 addition & 1 deletion packages/tpetra/core/src/Tpetra_MultiVector_def.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -4356,7 +4356,7 @@ void MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>::copyAndPermute(
// NOTE (mfh 17 Mar 2019) If we ever get rid of UVM, then device
// and host will be separate allocations. In that case, it may
// pay to do the all-reduce from device to host.
Kokkos::fence(); // for UVM getLocalViewDevice is UVM which can be read as host by allReduceView, so we must not read until device is fenced
Kokkos::fence("MultiVector::reduce"); // for UVM getLocalViewDevice is UVM which can be read as host by allReduceView, so we must not read until device is fenced
auto X_lcl = this->getLocalViewDevice(Access::ReadWrite);
allReduceView (X_lcl, X_lcl, *comm);
}
Expand Down

0 comments on commit 3a0be59

Please sign in to comment.