Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Tpetra: Don't access DualView's [h|d]_view directly #13778

Merged
merged 2 commits into from
Feb 12, 2025
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion packages/tpetra/core/src/Tpetra_BlockCrsMatrix_def.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2074,7 +2074,7 @@ void BlockCrsMatrix<Scalar, LO, GO, Node>::localApplyBlockNoTrans(
// then all their owning process ranks, and then the values.
if(exports.extent(0) != totalNumBytes)
{
const std::string oldLabel = exports.d_view.label ();
const std::string oldLabel = exports.view_device().label ();
const std::string newLabel = (oldLabel == "") ? "exports" : oldLabel;
exports = Kokkos::DualView<packet_type*, buffer_device_type>(newLabel, totalNumBytes);
}
Expand Down
8 changes: 4 additions & 4 deletions packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -365,7 +365,7 @@ namespace Tpetra {
const Teuchos::RCP<Teuchos::ParameterList>& params) :
dist_object_type (rowMap)
, rowMap_ (rowMap)
, k_numAllocPerRow_ (numEntPerRow.h_view)
, k_numAllocPerRow_ (numEntPerRow.view_host())
, numAllocForAllRows_ (0)
{
const char tfecfFuncName[] =
Expand All @@ -382,7 +382,7 @@ namespace Tpetra {

if (debug_) {
for (size_t r = 0; r < lclNumRows; ++r) {
const size_t curRowCount = numEntPerRow.h_view(r);
const size_t curRowCount = numEntPerRow.view_host()(r);
TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
(curRowCount == Teuchos::OrdinalTraits<size_t>::invalid (),
std::invalid_argument, "numEntPerRow(" << r << ") "
Expand All @@ -405,7 +405,7 @@ namespace Tpetra {
dist_object_type (rowMap)
, rowMap_ (rowMap)
, colMap_ (colMap)
, k_numAllocPerRow_ (numEntPerRow.h_view)
, k_numAllocPerRow_ (numEntPerRow.view_host())
, numAllocForAllRows_ (0)
{
const char tfecfFuncName[] =
Expand All @@ -422,7 +422,7 @@ namespace Tpetra {

if (debug_) {
for (size_t r = 0; r < lclNumRows; ++r) {
const size_t curRowCount = numEntPerRow.h_view(r);
const size_t curRowCount = numEntPerRow.view_host()(r);
TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
(curRowCount == Teuchos::OrdinalTraits<size_t>::invalid (),
std::invalid_argument, "numEntPerRow(" << r << ") "
Expand Down
4 changes: 2 additions & 2 deletions packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -6269,7 +6269,7 @@ CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
// Allocate 'exports', and copy exports_a back into it.
const size_t newAllocSize = static_cast<size_t> (exports_a.size ());
if (static_cast<size_t> (exports.extent (0)) < newAllocSize) {
const std::string oldLabel = exports.d_view.label ();
const std::string oldLabel = exports.view_device().label ();
const std::string newLabel = (oldLabel == "") ? "exports" : oldLabel;
exports = exports_type (newLabel, newAllocSize);
}
Expand Down Expand Up @@ -6572,7 +6572,7 @@ CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
if (static_cast<size_t> (exports.extent (0)) < allocSize) {
using exports_type = Kokkos::DualView<char*, buffer_device_type>;

const std::string oldLabel = exports.d_view.label ();
const std::string oldLabel = exports.view_device().label ();
const std::string newLabel = (oldLabel == "") ? "exports" : oldLabel;
exports = exports_type (newLabel, allocSize);
}
Expand Down
22 changes: 9 additions & 13 deletions packages/tpetra/core/src/Tpetra_Details_DualViewUtil.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,19 +42,15 @@ makeDualViewFromOwningHostView
using execution_space = typename DeviceType::execution_space;
using dual_view_type = Kokkos::DualView<ElementType*, DeviceType>;

if (dv.extent (0) == hostView.extent (0)) {
// We don't need to reallocate the device View.
dv.clear_sync_state ();
dv.modify_host ();
dv.h_view = hostView;
dv.sync_device ();
}
else {
auto devView = Kokkos::create_mirror_view (DeviceType (), hostView);
// DEEP_COPY REVIEW - DEVICE-TO-HOSTMIRROR
Kokkos::deep_copy (execution_space(), devView, hostView);
dv = dual_view_type (devView, hostView);
}
typename Kokkos::DualView<ElementType*, DeviceType>::t_dev devView;
if (dv.extent (0) == hostView.extent (0))
devView = dv.view_device();
else
devView = Kokkos::create_mirror_view (DeviceType (), hostView);
// DEEP_COPY REVIEW - DEVICE-TO-HOSTMIRROR
Kokkos::deep_copy (execution_space(), devView, hostView);
dv = dual_view_type (devView, hostView);
execution_space().fence();
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This fence is new but seems necessary. The constructor will not fence and the deep_copy is asynchronous. The function doesn't take an execution space instance argument so it should be synchronous.

}

template<class ElementType, class DeviceType>
Expand Down
40 changes: 20 additions & 20 deletions packages/tpetra/core/src/Tpetra_Details_WrappedDualView.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@
if (envVarSet && (std::strcmp(envVarSet,"1") == 0)) \
std::cout << (fn) << " called from " << callerstr \
<< " at " << filestr << ":"<<linnum \
<< " host cnt " << dualView.h_view.use_count() \
<< " device cnt " << dualView.d_view.use_count() \
<< " host cnt " << dualView.view_host().use_count() \
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I’m not sure if it’s an issue, but here, behavior changes, right? Because the getter is making a copy, so we’ll now report the previous use count incremented by one.

Copy link
Contributor Author

@masterleinad masterleinad Feb 4, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, in general all use counts will be larger but from what I understand only the difference between host and device is of relevance here.

<< " device cnt " << dualView.view_device().use_count() \
<< std::endl; \
}

Expand Down Expand Up @@ -210,7 +210,7 @@ class WrappedDualView {
}

size_t extent(const int i) const {
return dualView.h_view.extent(i);
return dualView.view_host().extent(i);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This can probably become just dualView.extent(i).

}

void stride(size_t * stride_) const {
Expand All @@ -219,11 +219,11 @@ class WrappedDualView {


size_t origExtent(const int i) const {
return originalDualView.h_view.extent(i);
return originalDualView.view_host().extent(i);
}

const char * label() const {
return dualView.d_view.label();
return dualView.view_device().label();
}


Expand Down Expand Up @@ -551,11 +551,11 @@ class WrappedDualView {
}

int host_view_use_count() const {
return originalDualView.h_view.use_count();
return originalDualView.view_host().use_count();
Copy link
Contributor

@maartenarnst maartenarnst Feb 4, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here, the increment by one may be worth considering. Similar cases below.

}

int device_view_use_count() const {
return originalDualView.d_view.use_count();
return originalDualView.view_device().use_count();
}


Expand Down Expand Up @@ -597,7 +597,7 @@ class WrappedDualView {
}

bool memoryIsAliased() const {
return deviceMemoryIsHostAccessible && dualView.h_view.data() == dualView.d_view.data();
return deviceMemoryIsHostAccessible && dualView.view_host().data() == dualView.view_device().data();
}


Expand Down Expand Up @@ -631,41 +631,41 @@ class WrappedDualView {
void throwIfViewsAreDifferentSizes() const {
// Here we check *size* (the product of extents) rather than each extent individually.
// This is mostly designed to catch people resizing one view, but not the other.
if(dualView.d_view.size() != dualView.h_view.size()) {
if(dualView.view_device().size() != dualView.view_host().size()) {
std::ostringstream msg;
msg << "Tpetra::Details::WrappedDualView (name = " << dualView.d_view.label()
msg << "Tpetra::Details::WrappedDualView (name = " << dualView.view_device().label()
<< "; host and device views are different sizes: "
<< dualView.h_view.size() << " vs " <<dualView.h_view.size();
<< dualView.view_host().size() << " vs " <<dualView.view_host().size();
throw std::runtime_error(msg.str());
}
}

void throwIfHostViewAlive() const {
throwIfViewsAreDifferentSizes();
if (dualView.h_view.use_count() > dualView.d_view.use_count()) {
if (dualView.view_host().use_count() > dualView.view_device().use_count()) {
std::ostringstream msg;
msg << "Tpetra::Details::WrappedDualView (name = " << dualView.d_view.label()
<< "; host use_count = " << dualView.h_view.use_count()
<< "; device use_count = " << dualView.d_view.use_count() << "): "
msg << "Tpetra::Details::WrappedDualView (name = " << dualView.view_device().label()
<< "; host use_count = " << dualView.view_host().use_count()
<< "; device use_count = " << dualView.view_device().use_count() << "): "
<< "Cannot access data on device while a host view is alive";
throw std::runtime_error(msg.str());
}
}

void throwIfDeviceViewAlive() const {
throwIfViewsAreDifferentSizes();
if (dualView.d_view.use_count() > dualView.h_view.use_count()) {
if (dualView.view_device().use_count() > dualView.view_host().use_count()) {
std::ostringstream msg;
msg << "Tpetra::Details::WrappedDualView (name = " << dualView.d_view.label()
<< "; host use_count = " << dualView.h_view.use_count()
<< "; device use_count = " << dualView.d_view.use_count() << "): "
msg << "Tpetra::Details::WrappedDualView (name = " << dualView.view_device().label()
<< "; host use_count = " << dualView.view_host().use_count()
<< "; device use_count = " << dualView.view_device().use_count() << "): "
<< "Cannot access data on host while a device view is alive";
throw std::runtime_error(msg.str());
}
}

bool iAmASubview() {
return originalDualView.h_view != dualView.h_view;
return originalDualView.view_host() != dualView.view_host();
}

mutable DualViewType originalDualView;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -820,7 +820,7 @@ packCrsGraph (const CrsGraph<LO, GO, NT>& sourceGraph,
View<packet_type*, HostSpace, MemoryUnmanaged>
exports_h (exports.getRawPtr (), exports.size ());
// DEEP_COPY REVIEW - DEVICE-TO-HOST
Kokkos::deep_copy (execution_space(), exports_h, exports_dv.d_view);
Kokkos::deep_copy (execution_space(), exports_h, exports_dv.view_device());
execution_space().fence();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -889,7 +889,7 @@ packCrsMatrix (const CrsMatrix<ST, LO, GO, NT>& sourceMatrix,
Kokkos::View<char*, host_dev_type> exports_h (exports.getRawPtr (),
exports.size ());
// DEEP_COPY REVIEW - DEVICE-TO-HOST
Kokkos::deep_copy (device_exec_space(), exports_h, exports_dv.d_view);
Kokkos::deep_copy (device_exec_space(), exports_h, exports_dv.view_device());
}

template<typename ST, typename LO, typename GO, typename NT>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ reallocDualViewIfNeeded (Kokkos::DualView<ValueType*, DeviceType>& dv,
}
dv = dual_view_type (); // free first, in order to save memory
// If current size is 0, the DualView's Views likely lack a label.
dv = dual_view_type (curSize == 0 ? newLabel : dv.d_view.label (), newSize);
dv = dual_view_type (curSize == 0 ? newLabel : dv.view_device().label (), newSize);
return true; // we did reallocate
}
else {
Expand All @@ -81,7 +81,7 @@ reallocDualViewIfNeeded (Kokkos::DualView<ValueType*, DeviceType>& dv,
execution_space().fence (); // keep this fence to respect needFenceBeforeRealloc
}
// If current size is 0, the DualView's Views likely lack a label.
dv = dual_view_type (curSize == 0 ? newLabel : dv.d_view.label (), 0);
dv = dual_view_type (curSize == 0 ? newLabel : dv.view_device().label (), 0);
return true; // we did reallocate
}
// Instead of writing curSize >= tooBigFactor * newSize, express
Expand All @@ -95,12 +95,13 @@ reallocDualViewIfNeeded (Kokkos::DualView<ValueType*, DeviceType>& dv,
}
dv = dual_view_type (); // free first, in order to save memory
// If current size is 0, the DualView's Views likely lack a label.
dv = dual_view_type (curSize == 0 ? newLabel : dv.d_view.label (), newSize);
dv = dual_view_type (curSize == 0 ? newLabel : dv.view_device().label (), newSize);
return true; // we did reallocate
}
else {
dv.d_view = Kokkos::subview (dv.d_view, range_type (0, newSize));
dv.h_view = Kokkos::subview (dv.h_view, range_type (0, newSize));
auto d_view = Kokkos::subview (dv.view_device(), range_type (0, newSize));
auto h_view = Kokkos::subview (dv.view_host(), range_type (0, newSize));
dv = Kokkos::DualView<ValueType*, DeviceType>(d_view, h_view);
return false; // we did not reallocate
}
}
Expand Down
32 changes: 16 additions & 16 deletions packages/tpetra/core/src/Tpetra_MultiVector_def.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -863,7 +863,7 @@ namespace Tpetra {
(LDA < lclNumRows, std::invalid_argument, "Map and DualView origView "
"do not match. LDA = " << LDA << " < this->getLocalLength() = " <<
lclNumRows << ". origView.extent(0) = " << origView.extent(0)
<< ", origView.stride(1) = " << origView.d_view.stride(1) << ".");
<< ", origView.stride(1) = " << origView.view_device().stride(1) << ".");

if (whichVectors.size () == 1) {
// If whichVectors has only one entry, we don't need to bother
Expand Down Expand Up @@ -1049,10 +1049,10 @@ namespace Tpetra {
aliases(const MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>& other) const
{
//Don't actually get a view, just get pointers.
auto thisData = view_.getDualView().h_view.data();
auto otherData = other.view_.getDualView().h_view.data();
size_t thisSpan = view_.getDualView().h_view.span();
size_t otherSpan = other.view_.getDualView().h_view.span();
auto thisData = view_.getDualView().view_host().data();
auto otherData = other.view_.getDualView().view_host().data();
size_t thisSpan = view_.getDualView().view_host().span();
size_t otherSpan = other.view_.getDualView().view_host().span();
return (otherData <= thisData && thisData < otherData + otherSpan)
|| (thisData <= otherData && otherData < thisData + thisSpan);
}
Expand Down Expand Up @@ -1286,7 +1286,7 @@ namespace Tpetra {
Kokkos::DualView<size_t*, device_type> tmpTgt ("tgtWhichVecs", numCols);
tmpTgt.modify_host ();
for (size_t j = 0; j < numCols; ++j) {
tmpTgt.h_view(j) = j;
tmpTgt.view_host()(j) = j;
}
if (! copyOnHost) {
tmpTgt.sync_device ();
Expand All @@ -1311,7 +1311,7 @@ namespace Tpetra {
Kokkos::DualView<size_t*, device_type> tmpSrc ("srcWhichVecs", numCols);
tmpSrc.modify_host ();
for (size_t j = 0; j < numCols; ++j) {
tmpSrc.h_view(j) = j;
tmpSrc.view_host()(j) = j;
}
if (! copyOnHost) {
tmpSrc.sync_device ();
Expand Down Expand Up @@ -1804,7 +1804,7 @@ void MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>::copyAndPermute(
(newSize > 0)) {

size_t offset = getLocalLength () - newSize;
reallocated = this->imports_.d_view.data() != view_.getDualView().d_view.data() + offset;
reallocated = this->imports_.view_device().data() != view_.getDualView().view_device().data() + offset;
if (reallocated) {
typedef std::pair<size_t, size_t> range_type;
this->imports_ = DV(view_.getDualView(),
Expand Down Expand Up @@ -1864,8 +1864,8 @@ void MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>::copyAndPermute(
bool
MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
importsAreAliased() {
return (this->imports_.d_view.data() + this->imports_.d_view.extent(0) ==
view_.getDualView().d_view.data() + view_.getDualView().d_view.extent(0));
return (this->imports_.view_device().data() + this->imports_.view_device().extent(0) ==
view_.getDualView().view_device().data() + view_.getDualView().view_device().extent(0));
}


Expand Down Expand Up @@ -2885,7 +2885,7 @@ void MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>::copyAndPermute(
k_alphas_type k_alphas ("alphas::tmp", numAlphas);
k_alphas.modify_host ();
for (size_t i = 0; i < numAlphas; ++i) {
k_alphas.h_view(i) = static_cast<impl_scalar_type> (alphas[i]);
k_alphas.view_host()(i) = static_cast<impl_scalar_type> (alphas[i]);
}
k_alphas.sync_device ();
// Invoke the scale() overload that takes a device View of coefficients.
Expand Down Expand Up @@ -3709,19 +3709,19 @@ void MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>::copyAndPermute(
const size_t newSize = X.imports_.extent (0) / numCols;
const size_t offset = jj*newSize;
auto newImports = X.imports_;
newImports.d_view = subview (X.imports_.d_view,
newImports.view_device() = subview (X.imports_.view_device(),
range_type (offset, offset+newSize));
newImports.h_view = subview (X.imports_.h_view,
newImports.view_host() = subview (X.imports_.view_host(),
range_type (offset, offset+newSize));
this->imports_ = newImports;
}
{
const size_t newSize = X.exports_.extent (0) / numCols;
const size_t offset = jj*newSize;
auto newExports = X.exports_;
newExports.d_view = subview (X.exports_.d_view,
newExports.view_device() = subview (X.exports_.view_device(),
range_type (offset, offset+newSize));
newExports.h_view = subview (X.exports_.h_view,
newExports.view_host() = subview (X.exports_.view_host(),
range_type (offset, offset+newSize));
this->exports_ = newExports;
}
Expand Down Expand Up @@ -4488,7 +4488,7 @@ void MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>::copyAndPermute(
const size_t col = isConstantStride () ? j : whichVectors_[j];
col_dual_view_type X_col =
Kokkos::subview (view_, Kokkos::ALL (), col);
return Kokkos::Compat::persistingView (X_col.d_view);
return Kokkos::Compat::persistingView (X_col.view_device());
}

template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
Expand Down
2 changes: 1 addition & 1 deletion packages/tpetra/core/test/Map/Map_LocalMap.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -304,7 +304,7 @@ TEUCHOS_UNIT_TEST_TEMPLATE_3_DECL( LocalMap, KokkosView, LO, GO, NT )

dual_view_t my_dual_view("test view with local maps",1);

my_dual_view.h_view(0) = local_map_t();
my_dual_view.view_host()(0) = local_map_t();

my_dual_view.sync_device();
}
Expand Down
Loading
Loading