From 58ad750a61fe4b8916f5fbe38135885299b2dc6c Mon Sep 17 00:00:00 2001 From: Chris Siefert Date: Wed, 13 Sep 2023 15:04:02 -0600 Subject: [PATCH] Tpetra: Adding labels to 2-arg Kokkos::fence calls --- packages/tpetra/core/src/Tpetra_CrsGraph_decl.hpp | 2 +- packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp | 2 +- packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp | 2 +- packages/tpetra/core/src/Tpetra_Details_WrappedDualView.hpp | 4 ++-- packages/tpetra/core/src/Tpetra_Details_makeColMap_def.hpp | 2 +- packages/tpetra/core/src/Tpetra_Details_residual.hpp | 4 ++-- packages/tpetra/core/src/Tpetra_DistObject_def.hpp | 4 ++-- packages/tpetra/core/src/Tpetra_Map_def.hpp | 4 ++-- packages/tpetra/core/src/Tpetra_MultiVector_def.hpp | 2 +- 9 files changed, 13 insertions(+), 13 deletions(-) diff --git a/packages/tpetra/core/src/Tpetra_CrsGraph_decl.hpp b/packages/tpetra/core/src/Tpetra_CrsGraph_decl.hpp index 6b77028ff90c..84a31d969815 100644 --- a/packages/tpetra/core/src/Tpetra_CrsGraph_decl.hpp +++ b/packages/tpetra/core/src/Tpetra_CrsGraph_decl.hpp @@ -2512,7 +2512,7 @@ namespace Tpetra { //! Fence if necessary and set flag so we don't duplicate. void execute_sync_host_uvm_access() const { if(need_sync_host_uvm_access) { - Kokkos::fence(); + Kokkos::fence("CrsGraph::execute_sync_host_uvm_access"); need_sync_host_uvm_access = false; } } diff --git a/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp b/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp index a3efd2938914..60ac1354d9f4 100644 --- a/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp +++ b/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp @@ -1298,7 +1298,7 @@ namespace Tpetra { row_ent_type numRowEnt (ViewAllocateWithoutInitializing (label), numRows); // DEEP_COPY REVIEW - VALUE-TO-HOSTMIRROR Kokkos::deep_copy (execution_space(), numRowEnt, static_cast (0)); // fill w/ 0s - Kokkos::fence(); // TODO: Need to understand downstream failure points and move this fence. + Kokkos::fence("CrsGraph::allocateIndices"); // TODO: Need to understand downstream failure points and move this fence. this->k_numRowEntries_ = numRowEnt; // "commit" our allocation } diff --git a/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp b/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp index 3649b7dd6a1e..391804ac02c1 100644 --- a/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp +++ b/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp @@ -3435,7 +3435,7 @@ CrsMatrix:: Kokkos::deep_copy (execution_space(), valuesUnpacked_wdv.getDeviceView(Access::OverwriteAll), theAlpha); // CAG: This fence was found to be required on Cuda with UVM=on. - Kokkos::fence(); + Kokkos::fence("CrsMatrix::setAllToScalar"); } } diff --git a/packages/tpetra/core/src/Tpetra_Details_WrappedDualView.hpp b/packages/tpetra/core/src/Tpetra_Details_WrappedDualView.hpp index 5d498707699a..d597df4b191a 100644 --- a/packages/tpetra/core/src/Tpetra_Details_WrappedDualView.hpp +++ b/packages/tpetra/core/src/Tpetra_Details_WrappedDualView.hpp @@ -278,7 +278,7 @@ class WrappedDualView { } if(needsSyncPath()) { throwIfDeviceViewAlive(); - if (deviceMemoryIsHostAccessible) Kokkos::fence(); + if (deviceMemoryIsHostAccessible) Kokkos::fence("WrappedDualView::getHostView"); dualView.clear_sync_state(); dualView.modify_host(); } @@ -327,7 +327,7 @@ class WrappedDualView { } if(needsSyncPath()) { throwIfHostViewAlive(); - if (deviceMemoryIsHostAccessible) Kokkos::fence(); + if (deviceMemoryIsHostAccessible) Kokkos::fence("WrappedDualView::getDeviceView"); dualView.clear_sync_state(); dualView.modify_device(); } diff --git a/packages/tpetra/core/src/Tpetra_Details_makeColMap_def.hpp b/packages/tpetra/core/src/Tpetra_Details_makeColMap_def.hpp index 78fb9699b4ec..f27718868f46 100644 --- a/packages/tpetra/core/src/Tpetra_Details_makeColMap_def.hpp +++ b/packages/tpetra/core/src/Tpetra_Details_makeColMap_def.hpp @@ -641,7 +641,7 @@ makeColMap (Teuchos::RCP>& colMap, // DEEP_COPY REVIEW - DEVICE-TO-HOSTMIRROR Kokkos::deep_copy(exec_space(), remotesHost, remoteGIDView); // CAG: This fence was found to be required on Cuda with UVM=on. - Kokkos::fence(); + Kokkos::fence("Tpetra::makeColMap"); //Finally, populate the STL structures which hold the index lists std::set RemoteGIDSet; std::vector RemoteGIDUnorderedVector; diff --git a/packages/tpetra/core/src/Tpetra_Details_residual.hpp b/packages/tpetra/core/src/Tpetra_Details_residual.hpp index 8dfad27ad83f..a3459e263119 100644 --- a/packages/tpetra/core/src/Tpetra_Details_residual.hpp +++ b/packages/tpetra/core/src/Tpetra_Details_residual.hpp @@ -549,7 +549,7 @@ void localResidualWithCommCompOverlap(const CrsMatrix & A, RCP importer = A.getGraph ()->getImporter (); X_colmap.endImport (X_domainmap, *importer, INSERT, true); - Kokkos::fence(); + Kokkos::fence("Tpetra::localResidualWithCommCompOverlap-1"); using functor_type2 = OffRankUpdateFunctor; functor_type2 func2 (A_lcl, X_colmap_lcl, R_lcl, rows_per_team, offsets); @@ -565,7 +565,7 @@ void localResidualWithCommCompOverlap(const CrsMatrix & A, RCP importer = A.getGraph ()->getImporter (); X_colmap.endImport (X_domainmap, *importer, INSERT, true); - Kokkos::fence(); + Kokkos::fence("Tpetra::localResidualWithCommCompOverlap-2"); using functor_type2 = OffRankUpdateFunctor; functor_type2 func2 (A_lcl, X_colmap_lcl, R_lcl, rows_per_team, offsets); diff --git a/packages/tpetra/core/src/Tpetra_DistObject_def.hpp b/packages/tpetra/core/src/Tpetra_DistObject_def.hpp index c9916760dfa1..4a85f8b78e3d 100644 --- a/packages/tpetra/core/src/Tpetra_DistObject_def.hpp +++ b/packages/tpetra/core/src/Tpetra_DistObject_def.hpp @@ -1491,7 +1491,7 @@ namespace Tpetra { numImportPacketsPerLID_av); } else { // pack on device - Kokkos::fence(); // for UVM + Kokkos::fence("DistObject::doPosts-1"); // for UVM this->imports_.modify_device (); distributorActor_.doPosts (distributorPlan, @@ -1536,7 +1536,7 @@ namespace Tpetra { this->imports_.view_host ()); } else { // pack on device - Kokkos::fence(); // for UVM + Kokkos::fence("DistObject::doPosts-2"); // for UVM this->imports_.modify_device (); distributorActor_.doPosts (distributorPlan, diff --git a/packages/tpetra/core/src/Tpetra_Map_def.hpp b/packages/tpetra/core/src/Tpetra_Map_def.hpp index 74e55919fc93..1595ce9e147c 100644 --- a/packages/tpetra/core/src/Tpetra_Map_def.hpp +++ b/packages/tpetra/core/src/Tpetra_Map_def.hpp @@ -699,7 +699,7 @@ namespace Tpetra { nonContigGids_host.size ()); // DEEP_COPY REVIEW - HOST-TO-DEVICE Kokkos::deep_copy (execution_space(), nonContigGids, nonContigGids_host); - Kokkos::fence(); // for UVM issues below - which will be refatored soon so FixedHashTable can build as pure CudaSpace - then I think remove this fence + Kokkos::fence("Map::initWithNonownedHostIndexList"); // for UVM issues below - which will be refatored soon so FixedHashTable can build as pure CudaSpace - then I think remove this fence glMap_ = global_to_local_table_type(nonContigGids, firstContiguousGID_, @@ -1036,7 +1036,7 @@ namespace Tpetra { entryList.extent(0)); // DEEP_COPY REVIEW - DEVICE-TO-HOST Kokkos::deep_copy (execution_space(), entryList_host, entryList); - Kokkos::fence(); // UVM follows + Kokkos::fence("Map::Map"); // UVM follows firstContiguousGID_ = entryList_host[0]; lastContiguousGID_ = firstContiguousGID_+1; diff --git a/packages/tpetra/core/src/Tpetra_MultiVector_def.hpp b/packages/tpetra/core/src/Tpetra_MultiVector_def.hpp index 44cd918003d4..34e670519fe8 100644 --- a/packages/tpetra/core/src/Tpetra_MultiVector_def.hpp +++ b/packages/tpetra/core/src/Tpetra_MultiVector_def.hpp @@ -4356,7 +4356,7 @@ void MultiVector::copyAndPermute( // NOTE (mfh 17 Mar 2019) If we ever get rid of UVM, then device // and host will be separate allocations. In that case, it may // pay to do the all-reduce from device to host. - Kokkos::fence(); // for UVM getLocalViewDevice is UVM which can be read as host by allReduceView, so we must not read until device is fenced + Kokkos::fence("MultiVector::reduce"); // for UVM getLocalViewDevice is UVM which can be read as host by allReduceView, so we must not read until device is fenced auto X_lcl = this->getLocalViewDevice(Access::ReadWrite); allReduceView (X_lcl, X_lcl, *comm); }