From 46dd9831b179d7b9ffdd99837ce077db0c8f8c88 Mon Sep 17 00:00:00 2001 From: Roger Pawlowski Date: Fri, 28 Jun 2024 14:48:24 -0600 Subject: [PATCH] Phalanx: fix for changes to kokkos serial backend We now have to manually delete the inner views of the host-host mirror of a view-of-views. Otherwise we see deadlock. See Kokkos issue 7092 for details. --- .../phalanx/src/Phalanx_KokkosViewOfViews.hpp | 62 +++++++++++++++++-- .../test/ViewOfViews/tPhalanxViewOfViews.cpp | 9 +++ 2 files changed, 67 insertions(+), 4 deletions(-) diff --git a/packages/phalanx/src/Phalanx_KokkosViewOfViews.hpp b/packages/phalanx/src/Phalanx_KokkosViewOfViews.hpp index c3d91ce66cb5..76e0c953e040 100644 --- a/packages/phalanx/src/Phalanx_KokkosViewOfViews.hpp +++ b/packages/phalanx/src/Phalanx_KokkosViewOfViews.hpp @@ -211,7 +211,7 @@ namespace PHX { is_initialized_ = true; } - /// Set an inner device view on the outer view. Indices are the outer view indices. + /// Set an inner device view on the outer view. Indices are the outer view indices. template void setView(InnerViewType v,Indices... i) { @@ -488,7 +488,16 @@ namespace PHX { } }; - // Rank 1 outer view + /** \brief Returns a rank-1 view of views where both the outer and inner views are on host. Values are deep_copied from input v_of_v. + + IMPORTANT: The user must manually call free on the inner views + of the returned object with the + PHX::freeInnerViewsOfHostHostViewOfViews() before deleting the + host-host view of view. Failure to do so will result in + deadlock. The outer view dtor calls a parallel_for and the inner + view dtor calls another parallel_for. Nested parallel_fors are + blocked by a mutex even on Serial backend now! + */ template auto createHostHostViewOfViews(const Kokkos::View*,OuterProps...>& v_of_v) { // Host outer view pointing to device inner views @@ -510,7 +519,16 @@ namespace PHX { return host_host; } - // Rank 2 outer view + /** \brief Returns a rank-2 view of views where both the outer and inner views are on host. Values are deep_copied from input v_of_v. + + IMPORTANT: The user must manually call free on the inner views + of the returned object with the + PHX::freeInnerViewsOfHostHostViewOfViews() before deleting the + host-host view of view. Failure to do so will result in + deadlock. The outer view dtor calls a parallel_for and the inner + view dtor calls another parallel_for. Nested parallel_fors are + blocked by a mutex even on Serial backend now! + */ template auto createHostHostViewOfViews(const Kokkos::View**,OuterProps...>& v_of_v) { // Host outer view pointing to device inner views @@ -534,7 +552,16 @@ namespace PHX { return host_host; } - // Rank 3 outer view + /** \brief Returns a rank-3 view of views where both the outer and inner views are on host. Values are deep_copied from input v_of_v. + + IMPORTANT: The user must manually call free on the inner views + of the returned object with the + PHX::freeInnerViewsOfHostHostViewOfViews() before deleting the + host-host view of view. Failure to do so will result in + deadlock. The outer view dtor calls a parallel_for and the inner + view dtor calls another parallel_for. Nested parallel_fors are + blocked by a mutex even on Serial backend now! + */ template auto createHostHostViewOfViews(const Kokkos::View***,OuterProps...>& v_of_v) { // Host outer view pointing to device inner views @@ -561,6 +588,33 @@ namespace PHX { return host_host; } + template + auto freeInnerViewsOfHostHostViewOfViews(Kokkos::View*,OuterProps...>& v_of_v) { + for (std::size_t i=0; i < v_of_v.extent(0); ++i) { + v_of_v(i) = Kokkos::View(); + } + } + + template + auto freeInnerViewsOfHostHostViewOfViews(Kokkos::View**,OuterProps...>& v_of_v) { + for (std::size_t i=0; i < v_of_v.extent(0); ++i) { + for (std::size_t j=0; j < v_of_v.extent(1); ++j) { + v_of_v(i,j) = Kokkos::View(); + } + } + } + + template + auto freeInnerViewsOfHostHostViewOfViews(Kokkos::View***,OuterProps...>& v_of_v) { + for (std::size_t i=0; i < v_of_v.extent(0); ++i) { + for (std::size_t j=0; j < v_of_v.extent(1); ++j) { + for (std::size_t k=0; k < v_of_v.extent(2); ++k) { + v_of_v(i,j,k) = Kokkos::View(); + } + } + } + } + } // namespace PHX #endif diff --git a/packages/phalanx/test/ViewOfViews/tPhalanxViewOfViews.cpp b/packages/phalanx/test/ViewOfViews/tPhalanxViewOfViews.cpp index 2c5c20fec65c..e9f502987b99 100644 --- a/packages/phalanx/test/ViewOfViews/tPhalanxViewOfViews.cpp +++ b/packages/phalanx/test/ViewOfViews/tPhalanxViewOfViews.cpp @@ -590,6 +590,9 @@ TEUCHOS_UNIT_TEST(PhalanxViewOfViews,CreateHostHost) { TEST_FLOATING_EQUALITY(vov_host(2)(cell),4.0,tol); TEST_FLOATING_EQUALITY(vov_host(3)(cell),10.0,tol); } + + // NOTE: you must call this on the host-host version to avoid deadlock! + PHX::freeInnerViewsOfHostHostViewOfViews(vov_host); } // Rank 2 outer view @@ -616,6 +619,9 @@ TEUCHOS_UNIT_TEST(PhalanxViewOfViews,CreateHostHost) { TEST_FLOATING_EQUALITY(vov_host(1,0)(cell),4.0,tol); TEST_FLOATING_EQUALITY(vov_host(1,1)(cell),11.0,tol); } + + // NOTE: you must call this on the host-host version to avoid deadlock! + PHX::freeInnerViewsOfHostHostViewOfViews(vov_host); } // Rank 3 outer view @@ -642,6 +648,9 @@ TEUCHOS_UNIT_TEST(PhalanxViewOfViews,CreateHostHost) { TEST_FLOATING_EQUALITY(vov_host(2,2,2)(cell),4.0,tol); TEST_FLOATING_EQUALITY(vov_host(0,1,2)(cell),12.0,tol); } + + // NOTE: you must call this on the host-host version to avoid deadlock! + PHX::freeInnerViewsOfHostHostViewOfViews(vov_host); } }