diff --git a/packages/muelu/src/Graph/Containers/MueLu_Aggregates_def.hpp b/packages/muelu/src/Graph/Containers/MueLu_Aggregates_def.hpp index 4ad46301f9df..7e5daf357256 100644 --- a/packages/muelu/src/Graph/Containers/MueLu_Aggregates_def.hpp +++ b/packages/muelu/src/Graph/Containers/MueLu_Aggregates_def.hpp @@ -191,12 +191,12 @@ namespace MueLu { for(LO i=0; i; using device_type = DeviceType; using range_type = Kokkos::RangePolicy; + using LO_view = Kokkos::View; using aggregates_sizes_type = Kokkos::View; @@ -259,6 +260,12 @@ namespace MueLu { local_graph_type GetGraph() const; + /*! @brief Generates a compressed list of nodes in each aggregate, where + the entries in aggNodes[aggPtr[i]] up to aggNodes[aggPtr[i+1]-1] contain the nodes in aggregate i. + unaggregated contains the list of nodes which are, for whatever reason, not aggregated (e.g. Dirichlet) + */ + void ComputeNodesInAggregate(LO_view & aggPtr, LO_view & aggNodes, LO_view & unaggregated) const; + //! @name Overridden from Teuchos::Describable //@{ diff --git a/packages/muelu/src/Graph/Containers/MueLu_Aggregates_kokkos_def.hpp b/packages/muelu/src/Graph/Containers/MueLu_Aggregates_kokkos_def.hpp index 80827cb480e8..f8cba41cfaee 100644 --- a/packages/muelu/src/Graph/Containers/MueLu_Aggregates_kokkos_def.hpp +++ b/packages/muelu/src/Graph/Containers/MueLu_Aggregates_kokkos_def.hpp @@ -180,6 +180,58 @@ namespace MueLu { return graph_; } + + template + void + Aggregates_kokkos >::ComputeNodesInAggregate(LO_view & aggPtr, LO_view & aggNodes, LO_view & unaggregated) const { + LO numAggs = GetNumAggregates(); + LO numNodes = vertex2AggId_->getLocalLength(); + auto vertex2AggId = vertex2AggId_->getDeviceLocalView(Xpetra::Access::ReadOnly); + typename aggregates_sizes_type::const_type aggSizes = ComputeAggregateSizes(true); + LO INVALID = Teuchos::OrdinalTraits::invalid(); + + aggPtr = LO_view("aggPtr",numAggs+1); + aggNodes = LO_view("aggNodes",numNodes); + LO_view aggCurr("agg curr",numAggs+1); + + // Construct the "rowptr" and the counter + Kokkos::parallel_scan("MueLu:Aggregates:ComputeNodesInAggregate:scan", range_type(0,numAggs+1), + KOKKOS_LAMBDA(const LO aggIdx, LO& aggOffset, bool final_pass) { + LO count = 0; + if(aggIdx < numAggs) + count = aggSizes(aggIdx); + if(final_pass) { + aggPtr(aggIdx) = aggOffset; + aggCurr(aggIdx) = aggOffset; + if(aggIdx==numAggs) + aggCurr(numAggs) = 0; // use this for counting unaggregated nodes + } + aggOffset += count; + }); + + // Preallocate unaggregated to the correct size + LO numUnaggregated = 0; + Kokkos::parallel_reduce("MueLu:Aggregates:ComputeNodesInAggregate:unaggregatedSize", range_type(0,numNodes), + KOKKOS_LAMBDA(const LO nodeIdx, LO & count) { + if(vertex2AggId(nodeIdx,0)==INVALID) + count++; + }, numUnaggregated); + unaggregated = LO_view("unaggregated",numUnaggregated); + + // Stick the nodes in each aggregate's spot + Kokkos::parallel_for("MueLu:Aggregates:ComputeNodesInAggregate:for", range_type(0,numNodes), + KOKKOS_LAMBDA(const LO nodeIdx) { + LO aggIdx = vertex2AggId(nodeIdx,0); + if(aggIdx != INVALID) { + // atomic postincrement aggCurr(aggIdx) each time + aggNodes(Kokkos::atomic_fetch_add(&aggCurr(aggIdx),1)) = nodeIdx; + } else { + // same, but using last entry of aggCurr for unaggregated nodes + unaggregated(Kokkos::atomic_fetch_add(&aggCurr(numAggs),1)) = nodeIdx; + } + }); + + } template std::string Aggregates_kokkos >::description() const { diff --git a/packages/muelu/test/unit_tests_kokkos/Aggregates_kokkos.cpp b/packages/muelu/test/unit_tests_kokkos/Aggregates_kokkos.cpp index 2d8deec90d7a..a9bb67efc595 100644 --- a/packages/muelu/test/unit_tests_kokkos/Aggregates_kokkos.cpp +++ b/packages/muelu/test/unit_tests_kokkos/Aggregates_kokkos.cpp @@ -611,6 +611,20 @@ namespace MueLuTests { }, numBadAggregates); TEST_EQUALITY(numBadAggregates, 0); + // Check ComputeNodesInAggregate + typename Aggregates_kokkos::LO_view aggPtr, aggNodes, unaggregated; + aggregates->ComputeNodesInAggregate(aggPtr, aggNodes, unaggregated); + TEST_EQUALITY(aggPtr.extent_int(0), numAggs+1); + // TEST_EQUALITY(unaggregated.extent_int(0), 0); // 1 unaggregated node in the MPI_4 case + + // test aggPtr(i)+aggSizes(i)=aggPtr(i+1) + typename Aggregates_kokkos::LO_view::HostMirror aggPtr_h = Kokkos::create_mirror_view(aggPtr); + typename Aggregates_kokkos::aggregates_sizes_type::HostMirror aggSizes_h = Kokkos::create_mirror_view(aggSizes); + Kokkos::deep_copy(aggPtr_h, aggPtr); + Kokkos::deep_copy(aggSizes_h, aggSizes); + for(LO i=0; i