diff --git a/packages/tpetra/core/src/Tpetra_Details_KokkosCounter.cpp b/packages/tpetra/core/src/Tpetra_Details_KokkosCounter.cpp index 7dd95c747adb..a78d4294e231 100644 --- a/packages/tpetra/core/src/Tpetra_Details_KokkosCounter.cpp +++ b/packages/tpetra/core/src/Tpetra_Details_KokkosCounter.cpp @@ -38,11 +38,13 @@ // ************************************************************************ // @HEADER */ +// clang-format off #include "Tpetra_Details_KokkosCounter.hpp" #include "TpetraCore_config.h" #include "Kokkos_Core.hpp" #include "Teuchos_TestForException.hpp" #include +#include namespace Tpetra { namespace Details { @@ -201,6 +203,54 @@ namespace Details { TEUCHOS_TEST_FOR_EXCEPTION(1,std::runtime_error,std::string("Error: ") + device + std::string(" is not a device known to Tpetra")); } +// clang-format on +namespace KokkosRegionCounterDetails { +std::vector regions; + +void push_region_callback(const char *label) { regions.push_back(label); } +static_assert(std::is_same_v, + "Unexpected Kokkos profiling interface API. This is an internal " + "Tpetra developer error, please report this."); + +} // namespace KokkosRegionCounterDetails + +void KokkosRegionCounter::start() { + Kokkos::Tools::Experimental::set_push_region_callback( + KokkosRegionCounterDetails::push_region_callback); +} + +void KokkosRegionCounter::reset() { + KokkosRegionCounterDetails::regions.clear(); +} + +void KokkosRegionCounter::stop() { + Kokkos::Tools::Experimental::set_push_region_callback(nullptr); +} + +size_t +KokkosRegionCounter::get_count_region_contains(const std::string &needle) { + size_t count = 0; + for (const auto ®ion : KokkosRegionCounterDetails::regions) { + count += (region.find(needle) != std::string::npos); + } + return count; +} + +void KokkosRegionCounter::dump_regions(Teuchos::FancyOStream &os) { + for (const auto ®ion : KokkosRegionCounterDetails::regions) { + os << region << "\n"; + } +} + +void KokkosRegionCounter::dump_regions(std::ostream &os) { + for (const auto ®ion : KokkosRegionCounterDetails::regions) { + os << region << "\n"; + } +} + + +// clang-format off } // namespace Details diff --git a/packages/tpetra/core/src/Tpetra_Details_KokkosCounter.hpp b/packages/tpetra/core/src/Tpetra_Details_KokkosCounter.hpp index e6a8bccb9375..702819fad61d 100644 --- a/packages/tpetra/core/src/Tpetra_Details_KokkosCounter.hpp +++ b/packages/tpetra/core/src/Tpetra_Details_KokkosCounter.hpp @@ -38,6 +38,7 @@ // ************************************************************************ // @HEADER */ +// clang-format off #ifndef TPETRA_DETAILS_KOKKOS_COUNTER_HPP #define TPETRA_DETAILS_KOKKOS_COUNTER_HPP @@ -46,6 +47,7 @@ /// types using the Kokkos Profiling Library #include +#include namespace Tpetra { namespace Details { @@ -87,6 +89,30 @@ namespace FenceCounter { size_t get_count_global(const std::string & device); } +// clang-format on + +/// \brief Counter for Kokkos regions representing third-party library usage +namespace KokkosRegionCounter { +/// \brief Start the counter +void start(); + +/// \brief Reset the counter +void reset(); + +/// \brief Stop the counter +void stop(); + +/// \brief How many regions containing `substr` have been seen +size_t get_count_region_contains(const std::string &substr); + +/// \brief Print all observed region labels, separated by newline +void dump_regions(std::ostream &os); +void dump_regions(Teuchos::FancyOStream &os); +} // namespace KokkosRegionCounter + +// clang-format off + + } // namespace Details } // namespace Tpetra diff --git a/packages/tpetra/core/test/CrsMatrix/CMakeLists.txt b/packages/tpetra/core/test/CrsMatrix/CMakeLists.txt index 646ff80848a0..8c79f3c14bd3 100644 --- a/packages/tpetra/core/test/CrsMatrix/CMakeLists.txt +++ b/packages/tpetra/core/test/CrsMatrix/CMakeLists.txt @@ -490,8 +490,29 @@ TRIBITS_ADD_EXECUTABLE_AND_TEST( STANDARD_PASS_OUTPUT ) +if ( + # supported TPLs + ( + (Tpetra_ENABLE_CUDA AND TPL_ENABLE_CUSPARSE ) OR + (Tpetra_ENABLE_HIP AND TPL_ENABLE_ROCSPARSE) + ) + AND + # supported type combos + ( + (Tpetra_INST_DOUBLE OR Tpetra_INST_FLOAT) + ) +) + TRIBITS_ADD_EXECUTABLE_AND_TEST( + CrsMatrix_ApplyUsesTPLs + SOURCES + CrsMatrix_ApplyUsesTPLs.cpp + ${TEUCHOS_STD_UNIT_TEST_MAIN} + COMM serial mpi + STANDARD_PASS_OUTPUT + ) +endif() SET(TIMING_INSTALLS "") diff --git a/packages/tpetra/core/test/CrsMatrix/CrsMatrix_ApplyUsesTPLs.cpp b/packages/tpetra/core/test/CrsMatrix/CrsMatrix_ApplyUsesTPLs.cpp new file mode 100644 index 000000000000..dff7928d1f12 --- /dev/null +++ b/packages/tpetra/core/test/CrsMatrix/CrsMatrix_ApplyUsesTPLs.cpp @@ -0,0 +1,286 @@ +/* +// @HEADER +// *********************************************************************** +// +// Tpetra: Templated Linear Algebra Services Package +// Copyright (2008) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Michael A. Heroux (maherou@sandia.gov) +// +// ************************************************************************ +// @HEADER +*/ + +#include "Tpetra_TestingUtilities.hpp" +#include "Tpetra_MultiVector.hpp" +#include "Tpetra_CrsMatrix.hpp" +#include "Tpetra_Details_KokkosCounter.hpp" + +// TODO: add test where some nodes have zero rows +// TODO: add test where non-"zero" graph is used to build matrix; if no values are added to matrix, the operator effect should be zero. This tests that matrix values are initialized properly. +// TODO: add test where dynamic profile initially has no allocation, then entries are added. this will test new view functionality. + +namespace Teuchos { + template <> + ScalarTraits::magnitudeType + relErr( const int &s1, const int &s2 ) + { + typedef ScalarTraits ST; + return ST::magnitude(s1-s2); + } + + template <> + ScalarTraits::magnitudeType + relErr( const char &s1, const char &s2 ) + { + typedef ScalarTraits ST; + return ST::magnitude(s1-s2); + } +} + +namespace { + + // no ScalarTraits<>::eps() for integer types + + template struct TestingTolGuts {}; + + template + struct TestingTolGuts { + static typename Teuchos::ScalarTraits::magnitudeType testingTol() + { return Teuchos::ScalarTraits::eps(); } + }; + + template + struct TestingTolGuts { + static typename Teuchos::ScalarTraits::magnitudeType testingTol() + { return 0; } + }; + + template + static typename Teuchos::ScalarTraits::magnitudeType testingTol() + { + return TestingTolGuts::hasMachineParameters>:: + testingTol(); + } + + using Tpetra::TestingUtilities::getDefaultComm; + + using std::endl; + using std::swap; + + using std::string; + + using Teuchos::as; + using Teuchos::FancyOStream; + using Teuchos::RCP; + using Teuchos::ArrayRCP; + using Teuchos::rcp; + using Teuchos::arcp; + using Teuchos::outArg; + using Teuchos::arcpClone; + using Teuchos::arrayView; + using Teuchos::broadcast; + using Teuchos::OrdinalTraits; + using Teuchos::ScalarTraits; + using Teuchos::Comm; + using Teuchos::Array; + using Teuchos::ArrayView; + using Teuchos::tuple; + using Teuchos::null; + using Teuchos::VERB_NONE; + using Teuchos::VERB_LOW; + using Teuchos::VERB_MEDIUM; + using Teuchos::VERB_HIGH; + using Teuchos::VERB_EXTREME; + using Teuchos::ETransp; + using Teuchos::NO_TRANS; + using Teuchos::TRANS; + using Teuchos::CONJ_TRANS; + using Teuchos::EDiag; + using Teuchos::UNIT_DIAG; + using Teuchos::NON_UNIT_DIAG; + using Teuchos::EUplo; + using Teuchos::UPPER_TRI; + using Teuchos::LOWER_TRI; + using Teuchos::ParameterList; + using Teuchos::parameterList; + + using Tpetra::Map; + using Tpetra::MultiVector; + using Tpetra::Vector; + using Tpetra::Operator; + using Tpetra::CrsMatrix; + using Tpetra::CrsGraph; + using Tpetra::RowMatrix; + using Tpetra::Import; + using Tpetra::global_size_t; + using Tpetra::createContigMapWithNode; + using Tpetra::createLocalMapWithNode; + using Tpetra::createVector; + using Tpetra::OptimizeOption; + using Tpetra::DoOptimizeStorage; + using Tpetra::DoNotOptimizeStorage; + using Tpetra::GloballyDistributed; + using Tpetra::INSERT; + + // + // UNIT TESTS + // + + //// + TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL( CrsMatrix, NonSquare, LO, GO, Scalar, Node ) + { + + // skip test if Scalar is not (float or double) + if constexpr (!(std::is_same_v || std::is_same_v)) { + out << "SKIP: unsupported scalar type" << std::endl; + TEST_EQUALITY_CONST(1,1); // SKIP + return; + } + // skip test if LO != int + if constexpr (!std::is_same_v) { + out << "SKIP: unsupported local ordinal type" << std::endl; + TEST_EQUALITY_CONST(1,1); // SKIP + return; + } + // skip test if CUDA enables and not CUDA space + #if defined(HAVE_TPETRA_CUDA) || defined(HAVE_TPETRACORE_CUDA) + if constexpr (!std::is_same_v) { + out << "SKIP: non-CUDA exec space" << std::endl; + TEST_EQUALITY_CONST(1,1); // SKIP + return; + } + #endif + // skip test if HIP enabled and not HIP space + #if defined(HAVE_TPETRA_HIP) || defined(HAVE_TPETRACORE_HIP) + if constexpr (!std::is_same_v) { + out << "SKIP: non-HIP exec space" << std::endl; + TEST_EQUALITY_CONST(1,1); // SKIP + return; + } + #endif + + typedef CrsMatrix MAT; + typedef MultiVector MV; + typedef Map map_type; + const global_size_t INVALID = OrdinalTraits::invalid(); + // get a comm + RCP > comm = getDefaultComm(); + const int M = 3; + const int P = 5; + const int N = comm->getSize(); + const int myImageID = comm->getRank(); + // create Maps + // matrix is M*N-by-P + // col + // 0 1 P-1 + // 0 [0 MN ... (P-1)MN ] + // . [... ... ... ] + // 0 [M-1 MN+M-1 (P-1)MN+M-1 ] + //p 1 [M MN+M ] + //r . [... ... ] = [A_ij], where A_ij = i+jMN + //o 1 [2M-1 MN+2M-1 ] + //c . [... ] + // N-1 [(N-1)M MN+(N-1)(M-1) ] + // . [... ... ] + // N-1 [MN-1 MN+MN-1 ] + // + // row map, range map is [0,M-1] [M,2M-1] [2M,3M-1] ... [MN-M,MN-1] + // domain map will be map for X (lclmap) + // + // input multivector X is not distributed: + // + // X = [ 0 P ... (numVecs-1)P ] + // [ ... .... ... ... ] = [X_ji], where X_ij = i+jP + // [ P-1 2P-1 ... numVecs*P-1 ] + // + // the result of the non-transpose multiplication should be + // P-1 + // (A*X)_ij = sum_k A_ik X_kj = sum (i+kMN)(k+jP) = jiP^2 + (i+jMNP)(P^2-P)/2 + MNP(P-1)(2P-1)/6 + // k=0 + // + // + // + const int numVecs = 3; + RCP rowmap (new map_type (INVALID, M, 0, comm)); + RCP lclmap = createLocalMapWithNode (P, comm); + + // create the matrix + MAT A(rowmap,P); + for (GO i=0; i(M); ++i) { + for (GO j=0; j(P); ++j) { + A.insertGlobalValues( M*myImageID+i, tuple(j), tuple(M*myImageID+i + j*M*N) ); + } + } + // call fillComplete() + A.fillComplete(lclmap,rowmap); + // build the input multivector X + MV X(lclmap,numVecs); + for (GO i=0; i(P); ++i) { + for (GO j=0; j(numVecs); ++j) { + X.replaceGlobalValue(i,j,static_cast(i+j*P)); + } + } + // allocate output multivec + MV Bout(rowmap,numVecs); + // test the action + Bout.randomize(); + Tpetra::Details::KokkosRegionCounter::reset(); + Tpetra::Details::KokkosRegionCounter::start(); + A.apply(X,Bout); + Tpetra::Details::KokkosRegionCounter::stop(); + + TEST_COMPARE(Tpetra::Details::KokkosRegionCounter::get_count_region_contains("spmv[TPL_"), ==, 1); + + using Teuchos::outArg; + using Teuchos::REDUCE_MIN; + using Teuchos::reduceAll; + const int lclSuccess = success ? 1 : 0; + int gblSuccess = 0; // output argument + reduceAll (*comm, REDUCE_MIN, lclSuccess, outArg (gblSuccess)); + TEST_EQUALITY_CONST( gblSuccess, 1 ); + if (gblSuccess != 1) { + out << "KokkosKernels TPL use was not detected where it was expected!" << endl; + } + } + +// +// INSTANTIATIONS +// +#define UNIT_TEST_GROUP( SCALAR, LO, GO, NODE ) \ + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT( CrsMatrix, NonSquare, LO, GO, SCALAR, NODE ) + + TPETRA_ETI_MANGLING_TYPEDEFS() + + TPETRA_INSTANTIATE_SLGN( UNIT_TEST_GROUP ) +}