Skip to content

Commit

Permalink
Revert "Tpetra: Don't access DualView's [h|d]_view directly (#13778)"
Browse files Browse the repository at this point in the history
This reverts commit 9a9ba84.

Trilinos performance dashboard shows regressions after this PR merged.

Signed-off-by: Jonathan Hu <[email protected]>
  • Loading branch information
jhux2 committed Feb 18, 2025
1 parent 751557e commit 025b09a
Show file tree
Hide file tree
Showing 21 changed files with 155 additions and 152 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -249,12 +249,12 @@ int main (int argc, char* argv[]) {
// DualView<double**> for now, rather than a View<double*>.
Kokkos::DualView<double**, Kokkos::LayoutLeft> b_lcl ("b", numLclRows, 1);
b_lcl.modify<Kokkos::DualView<double**, Kokkos::LayoutLeft>::t_dev::execution_space> ();
Kokkos::deep_copy (Kokkos::subview (b_lcl.view_device(), Kokkos::ALL (), 0), forcingTerm);
Kokkos::deep_copy (Kokkos::subview (b_lcl.d_view, Kokkos::ALL (), 0), forcingTerm);
Tpetra::Vector<> b (A.getRangeMap (), b_lcl);

Kokkos::DualView<double**, Kokkos::LayoutLeft> x_lcl ("b", numLclRows, 1);
x_lcl.modify<Kokkos::DualView<double**, Kokkos::LayoutLeft>::t_dev::execution_space> ();
Kokkos::deep_copy (Kokkos::subview (x_lcl.view_device(), Kokkos::ALL (), 0), temperature);
Kokkos::deep_copy (Kokkos::subview (x_lcl.d_view, Kokkos::ALL (), 0), temperature);
Tpetra::Vector<> x (A.getDomainMap (), x_lcl);

//
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -356,12 +356,12 @@ int main (int argc, char* argv[]) {
// DualView<double**> for now, rather than a View<double*>.
Kokkos::DualView<double**, Kokkos::LayoutLeft, device_type> b_lcl ("b", numLclRows, 1);
b_lcl.modify_device ();
Kokkos::deep_copy (Kokkos::subview (b_lcl.view_device(), Kokkos::ALL (), 0), forcingTerm);
Kokkos::deep_copy (Kokkos::subview (b_lcl.d_view, Kokkos::ALL (), 0), forcingTerm);
Tpetra::Vector<> b (A.getRangeMap (), b_lcl);

Kokkos::DualView<double**, Kokkos::LayoutLeft, device_type> x_lcl ("b", numLclRows, 1);
x_lcl.modify_device ();
Kokkos::deep_copy (Kokkos::subview (x_lcl.view_device(), Kokkos::ALL (), 0), temperature);
Kokkos::deep_copy (Kokkos::subview (x_lcl.d_view, Kokkos::ALL (), 0), temperature);
Tpetra::Vector<> x (A.getDomainMap (), x_lcl);

const int numIters = solve (x, A, b, dx); // solve the linear system
Expand All @@ -374,7 +374,7 @@ int main (int argc, char* argv[]) {
// means that we have to make a deep copy back into the
// 'temperature' output array.
x_lcl.sync_device ();
Kokkos::deep_copy (temperature, Kokkos::subview (b_lcl.view_device(), Kokkos::ALL (), 0));
Kokkos::deep_copy (temperature, Kokkos::subview (b_lcl.d_view, Kokkos::ALL (), 0));

// Correct the solution for the nonhomogenous Dirichlet boundary
// conditions.
Expand Down
16 changes: 8 additions & 8 deletions packages/tpetra/core/inout/Tpetra_Details_CooMatrix.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1221,13 +1221,13 @@ class CooMatrix : public ::Tpetra::DistObject<char, LO, GO, NT> {
numPacketsPerLID_tmp.modify_host ();
}
// Fill numPacketsPerLID with zeros.
Kokkos::deep_copy (numPacketsPerLID.view_host(), static_cast<size_t> (0));
Kokkos::deep_copy (numPacketsPerLID.h_view, static_cast<size_t> (0));
return;
}

const size_t numExports = exportLIDs.extent (0);
if (numExports == 0) {
Details::reallocDualViewIfNeeded (exports, 0, exports.view_host().label ());
Details::reallocDualViewIfNeeded (exports, 0, exports.h_view.label ());
return; // nothing to send
}
RCP<const Comm<int> > comm = src->getMap ().is_null () ?
Expand Down Expand Up @@ -1264,7 +1264,7 @@ class CooMatrix : public ::Tpetra::DistObject<char, LO, GO, NT> {
if (gblRow == ::Tpetra::Details::OrdinalTraits<GO>::invalid ()) {
// Mark the error later; just count for now.
++numInvalidRowInds;
numPacketsPerLID.view_host()[k] = 0;
numPacketsPerLID.h_view[k] = 0;
continue;
}

Expand All @@ -1276,7 +1276,7 @@ class CooMatrix : public ::Tpetra::DistObject<char, LO, GO, NT> {
if (errCode != 0) {
std::ostream& err = this->markLocalErrorAndGetStream ();
err << prefix << errStrm.str () << endl;
numPacketsPerLID.view_host()[k] = 0;
numPacketsPerLID.h_view[k] = 0;
continue;
}

Expand All @@ -1294,11 +1294,11 @@ class CooMatrix : public ::Tpetra::DistObject<char, LO, GO, NT> {
// leave the output arguments in a rational state, we zero out
// all remaining entries of numPacketsPerLID before returning.
for (size_t k2 = k; k2 < numExports; ++k2) {
numPacketsPerLID.view_host()[k2] = 0;
numPacketsPerLID.h_view[k2] = 0;
}
return;
}
numPacketsPerLID.view_host()[k] = static_cast<size_t> (numPackets);
numPacketsPerLID.h_view[k] = static_cast<size_t> (numPackets);
totalNumPackets = static_cast<int> (newTotalNumPackets);
}

Expand Down Expand Up @@ -1352,9 +1352,9 @@ class CooMatrix : public ::Tpetra::DistObject<char, LO, GO, NT> {
std::vector<SC> vals;

int outBufCurPos = 0;
packet_type* outBuf = exports.view_host().data ();
packet_type* outBuf = exports.h_view.data ();
for (size_t k = 0; k < numExports; ++k) {
const LO lclRow = exportLIDs.view_host()[k];
const LO lclRow = exportLIDs.h_view[k];
// We're packing the source object's data, so we need to use the
// source object's Map to convert from local to global indices.
const GO gblRow = src->map_->getGlobalElement (lclRow);
Expand Down
2 changes: 1 addition & 1 deletion packages/tpetra/core/src/Tpetra_BlockCrsMatrix_def.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2074,7 +2074,7 @@ void BlockCrsMatrix<Scalar, LO, GO, Node>::localApplyBlockNoTrans(
// then all their owning process ranks, and then the values.
if(exports.extent(0) != totalNumBytes)
{
const std::string oldLabel = exports.view_device().label ();
const std::string oldLabel = exports.d_view.label ();
const std::string newLabel = (oldLabel == "") ? "exports" : oldLabel;
exports = Kokkos::DualView<packet_type*, buffer_device_type>(newLabel, totalNumBytes);
}
Expand Down
8 changes: 4 additions & 4 deletions packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -365,7 +365,7 @@ namespace Tpetra {
const Teuchos::RCP<Teuchos::ParameterList>& params) :
dist_object_type (rowMap)
, rowMap_ (rowMap)
, k_numAllocPerRow_ (numEntPerRow.view_host())
, k_numAllocPerRow_ (numEntPerRow.h_view)
, numAllocForAllRows_ (0)
{
const char tfecfFuncName[] =
Expand All @@ -382,7 +382,7 @@ namespace Tpetra {

if (debug_) {
for (size_t r = 0; r < lclNumRows; ++r) {
const size_t curRowCount = numEntPerRow.view_host()(r);
const size_t curRowCount = numEntPerRow.h_view(r);
TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
(curRowCount == Teuchos::OrdinalTraits<size_t>::invalid (),
std::invalid_argument, "numEntPerRow(" << r << ") "
Expand All @@ -405,7 +405,7 @@ namespace Tpetra {
dist_object_type (rowMap)
, rowMap_ (rowMap)
, colMap_ (colMap)
, k_numAllocPerRow_ (numEntPerRow.view_host())
, k_numAllocPerRow_ (numEntPerRow.h_view)
, numAllocForAllRows_ (0)
{
const char tfecfFuncName[] =
Expand All @@ -422,7 +422,7 @@ namespace Tpetra {

if (debug_) {
for (size_t r = 0; r < lclNumRows; ++r) {
const size_t curRowCount = numEntPerRow.view_host()(r);
const size_t curRowCount = numEntPerRow.h_view(r);
TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
(curRowCount == Teuchos::OrdinalTraits<size_t>::invalid (),
std::invalid_argument, "numEntPerRow(" << r << ") "
Expand Down
4 changes: 2 additions & 2 deletions packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -6435,7 +6435,7 @@ CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
// Allocate 'exports', and copy exports_a back into it.
const size_t newAllocSize = static_cast<size_t> (exports_a.size ());
if (static_cast<size_t> (exports.extent (0)) < newAllocSize) {
const std::string oldLabel = exports.view_device().label ();
const std::string oldLabel = exports.d_view.label ();
const std::string newLabel = (oldLabel == "") ? "exports" : oldLabel;
exports = exports_type (newLabel, newAllocSize);
}
Expand Down Expand Up @@ -6738,7 +6738,7 @@ CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
if (static_cast<size_t> (exports.extent (0)) < allocSize) {
using exports_type = Kokkos::DualView<char*, buffer_device_type>;

const std::string oldLabel = exports.view_device().label ();
const std::string oldLabel = exports.d_view.label ();
const std::string newLabel = (oldLabel == "") ? "exports" : oldLabel;
exports = exports_type (newLabel, allocSize);
}
Expand Down
22 changes: 13 additions & 9 deletions packages/tpetra/core/src/Tpetra_Details_DualViewUtil.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,15 +42,19 @@ makeDualViewFromOwningHostView
using execution_space = typename DeviceType::execution_space;
using dual_view_type = Kokkos::DualView<ElementType*, DeviceType>;

typename Kokkos::DualView<ElementType*, DeviceType>::t_dev devView;
if (dv.extent (0) == hostView.extent (0))
devView = dv.view_device();
else
devView = Kokkos::create_mirror_view (DeviceType (), hostView);
// DEEP_COPY REVIEW - DEVICE-TO-HOSTMIRROR
Kokkos::deep_copy (execution_space(), devView, hostView);
dv = dual_view_type (devView, hostView);
execution_space().fence();
if (dv.extent (0) == hostView.extent (0)) {
// We don't need to reallocate the device View.
dv.clear_sync_state ();
dv.modify_host ();
dv.h_view = hostView;
dv.sync_device ();
}
else {
auto devView = Kokkos::create_mirror_view (DeviceType (), hostView);
// DEEP_COPY REVIEW - DEVICE-TO-HOSTMIRROR
Kokkos::deep_copy (execution_space(), devView, hostView);
dv = dual_view_type (devView, hostView);
}
}

template<class ElementType, class DeviceType>
Expand Down
40 changes: 20 additions & 20 deletions packages/tpetra/core/src/Tpetra_Details_WrappedDualView.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@
if (envVarSet && (std::strcmp(envVarSet,"1") == 0)) \
std::cout << (fn) << " called from " << callerstr \
<< " at " << filestr << ":"<<linnum \
<< " host cnt " << dualView.view_host().use_count() \
<< " device cnt " << dualView.view_device().use_count() \
<< " host cnt " << dualView.h_view.use_count() \
<< " device cnt " << dualView.d_view.use_count() \
<< std::endl; \
}

Expand Down Expand Up @@ -210,7 +210,7 @@ class WrappedDualView {
}

size_t extent(const int i) const {
return dualView.view_host().extent(i);
return dualView.h_view.extent(i);
}

void stride(size_t * stride_) const {
Expand All @@ -219,11 +219,11 @@ class WrappedDualView {


size_t origExtent(const int i) const {
return originalDualView.view_host().extent(i);
return originalDualView.h_view.extent(i);
}

const char * label() const {
return dualView.view_device().label();
return dualView.d_view.label();
}


Expand Down Expand Up @@ -551,11 +551,11 @@ class WrappedDualView {
}

int host_view_use_count() const {
return originalDualView.view_host().use_count();
return originalDualView.h_view.use_count();
}

int device_view_use_count() const {
return originalDualView.view_device().use_count();
return originalDualView.d_view.use_count();
}


Expand Down Expand Up @@ -597,7 +597,7 @@ class WrappedDualView {
}

bool memoryIsAliased() const {
return deviceMemoryIsHostAccessible && dualView.view_host().data() == dualView.view_device().data();
return deviceMemoryIsHostAccessible && dualView.h_view.data() == dualView.d_view.data();
}


Expand Down Expand Up @@ -639,41 +639,41 @@ class WrappedDualView {
void throwIfViewsAreDifferentSizes() const {
// Here we check *size* (the product of extents) rather than each extent individually.
// This is mostly designed to catch people resizing one view, but not the other.
if(dualView.view_device().size() != dualView.view_host().size()) {
if(dualView.d_view.size() != dualView.h_view.size()) {
std::ostringstream msg;
msg << "Tpetra::Details::WrappedDualView (name = " << dualView.view_device().label()
msg << "Tpetra::Details::WrappedDualView (name = " << dualView.d_view.label()
<< "; host and device views are different sizes: "
<< dualView.view_host().size() << " vs " <<dualView.view_host().size();
<< dualView.h_view.size() << " vs " <<dualView.h_view.size();
throw std::runtime_error(msg.str());
}
}

void throwIfHostViewAlive() const {
throwIfViewsAreDifferentSizes();
if (dualView.view_host().use_count() > dualView.view_device().use_count()) {
if (dualView.h_view.use_count() > dualView.d_view.use_count()) {
std::ostringstream msg;
msg << "Tpetra::Details::WrappedDualView (name = " << dualView.view_device().label()
<< "; host use_count = " << dualView.view_host().use_count()
<< "; device use_count = " << dualView.view_device().use_count() << "): "
msg << "Tpetra::Details::WrappedDualView (name = " << dualView.d_view.label()
<< "; host use_count = " << dualView.h_view.use_count()
<< "; device use_count = " << dualView.d_view.use_count() << "): "
<< "Cannot access data on device while a host view is alive";
throw std::runtime_error(msg.str());
}
}

void throwIfDeviceViewAlive() const {
throwIfViewsAreDifferentSizes();
if (dualView.view_device().use_count() > dualView.view_host().use_count()) {
if (dualView.d_view.use_count() > dualView.h_view.use_count()) {
std::ostringstream msg;
msg << "Tpetra::Details::WrappedDualView (name = " << dualView.view_device().label()
<< "; host use_count = " << dualView.view_host().use_count()
<< "; device use_count = " << dualView.view_device().use_count() << "): "
msg << "Tpetra::Details::WrappedDualView (name = " << dualView.d_view.label()
<< "; host use_count = " << dualView.h_view.use_count()
<< "; device use_count = " << dualView.d_view.use_count() << "): "
<< "Cannot access data on host while a device view is alive";
throw std::runtime_error(msg.str());
}
}

bool iAmASubview() {
return originalDualView.view_host() != dualView.view_host();
return originalDualView.h_view != dualView.h_view;
}

mutable DualViewType originalDualView;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,11 @@ castAwayConstDualView (const Kokkos::DualView<const ValueType*, DeviceType>& inp
typedef typename input_dual_view_type::t_host::non_const_type out_host_view_type;

out_dev_view_type output_view_dev
(const_cast<ValueType*> (input_dv.view_device().data ()),
input_dv.view_device().extent (0));
(const_cast<ValueType*> (input_dv.d_view.data ()),
input_dv.d_view.extent (0));
out_host_view_type output_view_host
(const_cast<ValueType*> (input_dv.view_host().data ()),
input_dv.view_host().extent (0));
(const_cast<ValueType*> (input_dv.h_view.data ()),
input_dv.h_view.extent (0));

Kokkos::DualView<ValueType*, DeviceType> output_dv(output_view_dev,output_view_host);
if(input_dv.need_sync_host()) output_dv.modify_device();
Expand Down
4 changes: 2 additions & 2 deletions packages/tpetra/core/src/Tpetra_Details_packCrsGraph_decl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -103,8 +103,8 @@ packCrsGraph (const CrsGraph<LO, GO, NT>& sourceGraph,
/// \param exports [in/out] Output pack buffer; resized if needed.
///
/// \param numPacketsPerLID [out] On output,
/// numPacketsPerLID.view_device()[k] is the number of bytes packed for row
/// exportLIDs.view_device()[k] of the local graph.
/// numPacketsPerLID.d_view[k] is the number of bytes packed for row
/// exportLIDs.d_view[k] of the local graph.
///
/// \param exportLIDs [in] Local indices of the rows to pack.
///
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -820,7 +820,7 @@ packCrsGraph (const CrsGraph<LO, GO, NT>& sourceGraph,
View<packet_type*, HostSpace, MemoryUnmanaged>
exports_h (exports.getRawPtr (), exports.size ());
// DEEP_COPY REVIEW - DEVICE-TO-HOST
Kokkos::deep_copy (execution_space(), exports_h, exports_dv.view_device());
Kokkos::deep_copy (execution_space(), exports_h, exports_dv.d_view);
execution_space().fence();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -109,8 +109,8 @@ packCrsMatrix (const CrsMatrix<ST, LO, GO, NT>& sourceMatrix,
/// \param exports [in/out] Output pack buffer; resized if needed.
///
/// \param numPacketsPerLID [out] On output,
/// numPacketsPerLID.view_device()[k] is the number of bytes packed for row
/// exportLIDs.view_device()[k] of the local matrix.
/// numPacketsPerLID.d_view[k] is the number of bytes packed for row
/// exportLIDs.d_view[k] of the local matrix.
///
/// \param exportLIDs [in] Local indices of the rows to pack.
///
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -889,7 +889,7 @@ packCrsMatrix (const CrsMatrix<ST, LO, GO, NT>& sourceMatrix,
Kokkos::View<char*, host_dev_type> exports_h (exports.getRawPtr (),
exports.size ());
// DEEP_COPY REVIEW - DEVICE-TO-HOST
Kokkos::deep_copy (device_exec_space(), exports_h, exports_dv.view_device());
Kokkos::deep_copy (device_exec_space(), exports_h, exports_dv.d_view);
}

template<typename ST, typename LO, typename GO, typename NT>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ reallocDualViewIfNeeded (Kokkos::DualView<ValueType*, DeviceType>& dv,
}
dv = dual_view_type (); // free first, in order to save memory
// If current size is 0, the DualView's Views likely lack a label.
dv = dual_view_type (curSize == 0 ? newLabel : dv.view_device().label (), newSize);
dv = dual_view_type (curSize == 0 ? newLabel : dv.d_view.label (), newSize);
return true; // we did reallocate
}
else {
Expand All @@ -81,7 +81,7 @@ reallocDualViewIfNeeded (Kokkos::DualView<ValueType*, DeviceType>& dv,
execution_space().fence (); // keep this fence to respect needFenceBeforeRealloc
}
// If current size is 0, the DualView's Views likely lack a label.
dv = dual_view_type (curSize == 0 ? newLabel : dv.view_device().label (), 0);
dv = dual_view_type (curSize == 0 ? newLabel : dv.d_view.label (), 0);
return true; // we did reallocate
}
// Instead of writing curSize >= tooBigFactor * newSize, express
Expand All @@ -95,13 +95,12 @@ reallocDualViewIfNeeded (Kokkos::DualView<ValueType*, DeviceType>& dv,
}
dv = dual_view_type (); // free first, in order to save memory
// If current size is 0, the DualView's Views likely lack a label.
dv = dual_view_type (curSize == 0 ? newLabel : dv.view_device().label (), newSize);
dv = dual_view_type (curSize == 0 ? newLabel : dv.d_view.label (), newSize);
return true; // we did reallocate
}
else {
auto d_view = Kokkos::subview (dv.view_device(), range_type (0, newSize));
auto h_view = Kokkos::subview (dv.view_host(), range_type (0, newSize));
dv = Kokkos::DualView<ValueType*, DeviceType>(d_view, h_view);
dv.d_view = Kokkos::subview (dv.d_view, range_type (0, newSize));
dv.h_view = Kokkos::subview (dv.h_view, range_type (0, newSize));
return false; // we did not reallocate
}
}
Expand Down
Loading

0 comments on commit 025b09a

Please sign in to comment.