From 1b2eb9b8c49d1a05e60e388dac65d4193417290a Mon Sep 17 00:00:00 2001 From: Christian Glusa Date: Tue, 5 Nov 2024 15:22:02 -0700 Subject: [PATCH 1/3] MueLu: Fix for phase 2b Compute aggWeight locally instead of globally. Signed-off-by: Christian Glusa --- ...MueLu_AggregationPhase2bAlgorithm_decl.hpp | 18 +- .../MueLu_AggregationPhase2bAlgorithm_def.hpp | 357 ++++++++---------- .../MueLu_UncoupledAggregationFactory_def.hpp | 3 +- .../unit_tests_kokkos/Aggregates_kokkos.cpp | 1 + 4 files changed, 173 insertions(+), 206 deletions(-) diff --git a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase2bAlgorithm_decl.hpp b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase2bAlgorithm_decl.hpp index b1dc9913bbb1..dd86190c590a 100644 --- a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase2bAlgorithm_decl.hpp +++ b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase2bAlgorithm_decl.hpp @@ -73,17 +73,13 @@ class AggregationPhase2bAlgorithm : public MueLu::AggregationAlgorithmBase::AggStatType& aggStat, LO& numNonAggregatedNodes) const; - void BuildAggregatesRandom(const ParameterList& params, - const LWGraph_kokkos& graph, - Aggregates& aggregates, - typename AggregationAlgorithmBase::AggStatType& aggStat, - LO& numNonAggregatedNodes) const; - - void BuildAggregatesDeterministic(const ParameterList& params, - const LWGraph_kokkos& graph, - Aggregates& aggregates, - typename AggregationAlgorithmBase::AggStatType& aggStat, - LO& numNonAggregatedNodes) const; + template + void BuildAggregates(const ParameterList& params, + const LWGraph_kokkos graph, + Aggregates& aggregates, + typename AggregationAlgorithmBase::AggStatType aggStat, + LO& numNonAggregatedNodes) const; + //@} std::string description() const { return "Phase 2b (expansion)"; } diff --git a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase2bAlgorithm_def.hpp b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase2bAlgorithm_def.hpp index ca6802c31f5f..7e2292cdccba 100644 --- a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase2bAlgorithm_def.hpp +++ b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase2bAlgorithm_def.hpp @@ -39,12 +39,12 @@ void AggregationPhase2bAlgorithm::BuildAggreg LO numLocalAggregates = aggregates.GetNumAggregates(); - const int defaultConnectWeight = 100; - const int penaltyConnectWeight = 10; + const LO defaultConnectWeight = 100; + const LO penaltyConnectWeight = 10; - std::vector aggWeight(numLocalAggregates, 0); - std::vector connectWeight(numRows, defaultConnectWeight); - std::vector aggPenalties(numRows, 0); + std::vector aggWeight(numLocalAggregates, 0); + std::vector connectWeight(numRows, defaultConnectWeight); + std::vector aggPenalties(numRows, 0); // We do this cycle twice. // I don't know why, but ML does it too @@ -118,24 +118,126 @@ void AggregationPhase2bAlgorithm:: LO& numNonAggregatedNodes) const { if (params.get("aggregation: deterministic")) { Monitor m(*this, "BuildAggregatesDeterministic"); - BuildAggregatesDeterministic(params, graph, aggregates, aggStat, numNonAggregatedNodes); + BuildAggregates(params, graph, aggregates, aggStat, numNonAggregatedNodes); } else { Monitor m(*this, "BuildAggregatesRandom"); - BuildAggregatesRandom(params, graph, aggregates, aggStat, numNonAggregatedNodes); + BuildAggregates(params, graph, aggregates, aggStat, numNonAggregatedNodes); } } // BuildAggregates -template -void AggregationPhase2bAlgorithm:: - BuildAggregatesRandom(const ParameterList& params, - const LWGraph_kokkos& graph, - Aggregates& aggregates, - typename AggregationAlgorithmBase::AggStatType& aggStat, - LO& numNonAggregatedNodes) const { +template +class ExpansionFunctor { + private: + AggStatType aggStat; + ProcWinnerType procWinner; + Vertex2AggType vertex2AggId; + ColorsType colors; + LocalGraphType lclLWGraph; + AggPenaltyType aggPenalties; + AggPenaltyType aggPenaltyUpdates; + AggPenaltyType connectWeight; + LO penaltyConnectWeight; + LO color; + LO myRank; + + public: + ExpansionFunctor(AggStatType& aggStat_, ProcWinnerType& procWinner_, Vertex2AggType& vertex2AggId_, ColorsType& colors_, LocalGraphType& lclLWGraph_, AggPenaltyType& aggPenalties_, AggPenaltyType& aggPenaltyUpdates_, AggPenaltyType& connectWeight_, LO penaltyConnectWeight_, LO color_, LO rank_) + : aggStat(aggStat_) + , procWinner(procWinner_) + , vertex2AggId(vertex2AggId_) + , colors(colors_) + , lclLWGraph(lclLWGraph_) + , aggPenalties(aggPenalties_) + , connectWeight(connectWeight_) + , aggPenaltyUpdates(aggPenaltyUpdates_) + , penaltyConnectWeight(penaltyConnectWeight_) + , color(color_) + , myRank(rank_) {} + + ExpansionFunctor(AggStatType& aggStat_, ProcWinnerType& procWinner_, Vertex2AggType& vertex2AggId_, ColorsType& colors_, LocalGraphType& lclLWGraph_, AggPenaltyType& aggPenalties_, AggPenaltyType& connectWeight_, LO penaltyConnectWeight_, LO color_, LO rank_) + : aggStat(aggStat_) + , procWinner(procWinner_) + , vertex2AggId(vertex2AggId_) + , colors(colors_) + , lclLWGraph(lclLWGraph_) + , aggPenalties(aggPenalties_) + , connectWeight(connectWeight_) + , penaltyConnectWeight(penaltyConnectWeight_) + , color(color_) + , myRank(rank_) {} + + KOKKOS_INLINE_FUNCTION + void operator()(const LO& i, LO& tmpNumAggregated) const { + if (aggStat(i) != READY || colors(i) != color) + return; + + int bestScore = -100000; + int bestAggId = -1; + int bestConnect = -1; + + auto neighOfINode = lclLWGraph.getNeighborVertices(i); + + for (int j = 0; j < neighOfINode.length; j++) { + LO neigh = neighOfINode(j); + + if (lclLWGraph.isLocalNeighborVertex(neigh) && + (aggStat(neigh) == AGGREGATED)) { + auto aggId = vertex2AggId(neigh, 0); + LO aggWeight = 0; + for (int k = 0; k < neighOfINode.length; k++) { + LO neigh2 = neighOfINode(k); + if (lclLWGraph.isLocalNeighborVertex(neigh2) && + (aggStat(neigh2) == AGGREGATED) && + (vertex2AggId(neigh2, 0) == aggId)) + aggWeight += connectWeight(neigh2); + } + + if (matchMLbehavior && (aggWeight == 0)) + return; + + int score = aggWeight - aggPenalties(aggId); + + if (score > bestScore) { + bestAggId = aggId; + bestScore = score; + bestConnect = connectWeight(neigh); + + } else if (aggId == bestAggId && + connectWeight(neigh) > bestConnect) { + bestConnect = connectWeight(neigh); + } + } + } + if (bestScore >= 0) { + aggStat(i) = AGGREGATED; + vertex2AggId(i, 0) = bestAggId; + procWinner(i, 0) = myRank; + + if constexpr (deterministic) { + Kokkos::atomic_add(&aggPenaltyUpdates(bestAggId), 1); + } else { + Kokkos::atomic_add(&aggPenalties(bestAggId), 1); + } + connectWeight(i) = bestConnect - penaltyConnectWeight; + tmpNumAggregated++; + } + } +}; + +template +template +void AggregationPhase2bAlgorithm:: + BuildAggregates(const ParameterList& params, + const LWGraph_kokkos graph, + Aggregates& aggregates, + typename AggregationAlgorithmBase::AggStatType aggStat, + LO& numNonAggregatedNodes) const { using device_type = typename LWGraph_kokkos::device_type; using execution_space = typename LWGraph_kokkos::execution_space; + bool matchMLbehavior = params.get("aggregation: match ML phase2b"); + const LO numRows = graph.GetNodeNumVertices(); const int myRank = graph.GetComm()->getRank(); @@ -145,14 +247,14 @@ void AggregationPhase2bAlgorithm:: const LO numColors = aggregates.GetGraphNumColors(); const LO numLocalAggregates = aggregates.GetNumAggregates(); - auto lclLWGraph = graph; - const LO defaultConnectWeight = 100; const LO penaltyConnectWeight = 10; - Kokkos::View aggWeight(Kokkos::ViewAllocateWithoutInitializing("aggWeight"), numLocalAggregates); // This gets re-initialized at the start of each "color" loop Kokkos::View connectWeight(Kokkos::ViewAllocateWithoutInitializing("connectWeight"), numRows); Kokkos::View aggPenalties("aggPenalties", numLocalAggregates); // This gets initialized to zero here + Kokkos::View aggPenaltyUpdates; + // if constexpr (deterministic) + aggPenaltyUpdates = Kokkos::View("aggPenaltyUpdates", numLocalAggregates); Kokkos::deep_copy(connectWeight, defaultConnectWeight); @@ -170,190 +272,59 @@ void AggregationPhase2bAlgorithm:: } for (int iter = 0; iter < maxIters; ++iter) { for (LO color = 1; color <= numColors; ++color) { - Kokkos::deep_copy(aggWeight, 0); - // the reduce counts how many nodes are aggregated by this phase, // which will then be subtracted from numNonAggregatedNodes LO numAggregated = 0; - Kokkos::parallel_reduce( - "Aggregation Phase 2b: aggregates expansion", - Kokkos::RangePolicy(0, numRows), - KOKKOS_LAMBDA(const LO i, LO& tmpNumAggregated) { - if (aggStat(i) != READY || colors(i) != color) - return; - - auto neighOfINode = lclLWGraph.getNeighborVertices(i); - for (int j = 0; j < neighOfINode.length; j++) { - LO neigh = neighOfINode(j); - - // We don't check (neigh != i), as it is covered by checking - // (aggStat[neigh] == AGGREGATED) - if (lclLWGraph.isLocalNeighborVertex(neigh) && - aggStat(neigh) == AGGREGATED) - Kokkos::atomic_add(&aggWeight(vertex2AggId(neigh, 0)), - connectWeight(neigh)); - } - - int bestScore = -100000; - int bestAggId = -1; - int bestConnect = -1; - - for (int j = 0; j < neighOfINode.length; j++) { - LO neigh = neighOfINode(j); - - if (lclLWGraph.isLocalNeighborVertex(neigh) && - aggStat(neigh) == AGGREGATED) { - auto aggId = vertex2AggId(neigh, 0); - int score = aggWeight(aggId) - aggPenalties(aggId); - - if (score > bestScore) { - bestAggId = aggId; - bestScore = score; - bestConnect = connectWeight(neigh); - - } else if (aggId == bestAggId && - connectWeight(neigh) > bestConnect) { - bestConnect = connectWeight(neigh); - } - } - } - if (bestScore >= 0) { - aggStat(i) = AGGREGATED; - vertex2AggId(i, 0) = bestAggId; - procWinner(i, 0) = myRank; - - Kokkos::atomic_add(&aggPenalties(bestAggId), 1); - connectWeight(i) = bestConnect - penaltyConnectWeight; - tmpNumAggregated++; - } - }, - numAggregated); // parallel_for - numNonAggregatedNodes -= numAggregated; - } - } // loop over maxIters -} // BuildAggregatesRandom - -template -void AggregationPhase2bAlgorithm:: - BuildAggregatesDeterministic(const ParameterList& params, - const LWGraph_kokkos& graph, - Aggregates& aggregates, - typename AggregationAlgorithmBase::AggStatType& aggStat, - LO& numNonAggregatedNodes) const { - using device_type = typename LWGraph_kokkos::device_type; - using execution_space = typename LWGraph_kokkos::execution_space; - - const LO numRows = graph.GetNodeNumVertices(); - const int myRank = graph.GetComm()->getRank(); - - auto vertex2AggId = aggregates.GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite); - auto procWinner = aggregates.GetProcWinner()->getDeviceLocalView(Xpetra::Access::ReadWrite); - auto colors = aggregates.GetGraphColors(); - const LO numColors = aggregates.GetGraphNumColors(); - LO numLocalAggregates = aggregates.GetNumAggregates(); - - auto lclLWGraph = graph; - - const int defaultConnectWeight = 100; - const int penaltyConnectWeight = 10; - - Kokkos::View connectWeight(Kokkos::ViewAllocateWithoutInitializing("connectWeight"), numRows); - Kokkos::View aggWeight(Kokkos::ViewAllocateWithoutInitializing("aggWeight"), numLocalAggregates); // This gets re-initialized at the start of each "color" loop - Kokkos::View aggPenaltyUpdates("aggPenaltyUpdates", numLocalAggregates); - Kokkos::View aggPenalties("aggPenalties", numLocalAggregates); - - Kokkos::deep_copy(connectWeight, defaultConnectWeight); + if constexpr (deterministic) { + if (matchMLbehavior) { + auto functor = ExpansionFunctor(aggStat, procWinner, vertex2AggId, colors, graph, aggPenalties, aggPenaltyUpdates, connectWeight, penaltyConnectWeight, color, myRank); + + Kokkos::parallel_reduce("Aggregation Phase 2b: aggregates expansion", + Kokkos::RangePolicy(0, numRows), + functor, + numAggregated); + } else { + auto functor = ExpansionFunctor(aggStat, procWinner, vertex2AggId, colors, graph, aggPenalties, aggPenaltyUpdates, connectWeight, penaltyConnectWeight, color, myRank); + + Kokkos::parallel_reduce("Aggregation Phase 2b: aggregates expansion", + Kokkos::RangePolicy(0, numRows), + functor, + numAggregated); + } + } else { + if (matchMLbehavior) { + auto functor = ExpansionFunctor(aggStat, procWinner, vertex2AggId, colors, graph, aggPenalties, connectWeight, penaltyConnectWeight, color, myRank); + + Kokkos::parallel_reduce("Aggregation Phase 2b: aggregates expansion", + Kokkos::RangePolicy(0, numRows), + functor, + numAggregated); + } else { + auto functor = ExpansionFunctor(aggStat, procWinner, vertex2AggId, colors, graph, aggPenalties, connectWeight, penaltyConnectWeight, color, myRank); + + Kokkos::parallel_reduce("Aggregation Phase 2b: aggregates expansion", + Kokkos::RangePolicy(0, numRows), + functor, + numAggregated); + } + } - // We do this cycle twice. - // I don't know why, but ML does it too - // taw: by running the aggregation routine more than once there is a chance that also - // non-aggregated nodes with a node distance of two are added to existing aggregates. - // Assuming that the aggregate size is 3 in each direction running the algorithm only twice - // should be sufficient. - int maxIters = 2; - int maxNodesPerAggregate = params.get("aggregation: max agg size"); - if (maxNodesPerAggregate == std::numeric_limits::max()) { - maxIters = 1; - } - for (int iter = 0; iter < maxIters; ++iter) { - for (LO color = 1; color <= numColors; color++) { - Kokkos::deep_copy(aggWeight, 0); + if constexpr (deterministic) { + Kokkos::parallel_for( + "Aggregation Phase 2b: updating agg penalties", + Kokkos::RangePolicy(0, numLocalAggregates), + KOKKOS_LAMBDA(const LO agg) { + aggPenalties(agg) += aggPenaltyUpdates(agg); + aggPenaltyUpdates(agg) = 0; + }); + } - // the reduce counts how many nodes are aggregated by this phase, - // which will then be subtracted from numNonAggregatedNodes - LO numAggregated = 0; - Kokkos::parallel_for( - "Aggregation Phase 2b: updating agg weights", - Kokkos::RangePolicy(0, numRows), - KOKKOS_LAMBDA(const LO i) { - if (aggStat(i) != READY || colors(i) != color) - return; - auto neighOfINode = lclLWGraph.getNeighborVertices(i); - for (int j = 0; j < neighOfINode.length; j++) { - LO neigh = neighOfINode(j); - // We don't check (neigh != i), as it is covered by checking - // (aggStat[neigh] == AGGREGATED) - if (lclLWGraph.isLocalNeighborVertex(neigh) && - aggStat(neigh) == AGGREGATED) - Kokkos::atomic_add(&aggWeight(vertex2AggId(neigh, 0)), - connectWeight(neigh)); - } - }); - - Kokkos::parallel_reduce( - "Aggregation Phase 2b: aggregates expansion", - Kokkos::RangePolicy(0, numRows), - KOKKOS_LAMBDA(const LO i, LO& tmpNumAggregated) { - if (aggStat(i) != READY || colors(i) != color) - return; - int bestScore = -100000; - int bestAggId = -1; - int bestConnect = -1; - - auto neighOfINode = lclLWGraph.getNeighborVertices(i); - for (int j = 0; j < neighOfINode.length; j++) { - LO neigh = neighOfINode(j); - - if (lclLWGraph.isLocalNeighborVertex(neigh) && - aggStat(neigh) == AGGREGATED) { - auto aggId = vertex2AggId(neigh, 0); - int score = aggWeight(aggId) - aggPenalties(aggId); - - if (score > bestScore) { - bestAggId = aggId; - bestScore = score; - bestConnect = connectWeight(neigh); - - } else if (aggId == bestAggId && - connectWeight(neigh) > bestConnect) { - bestConnect = connectWeight(neigh); - } - } - } - if (bestScore >= 0) { - aggStat(i) = AGGREGATED; - vertex2AggId(i, 0) = bestAggId; - procWinner(i, 0) = myRank; - - Kokkos::atomic_add(&aggPenaltyUpdates(bestAggId), 1); - connectWeight(i) = bestConnect - penaltyConnectWeight; - tmpNumAggregated++; - } - }, - numAggregated); // parallel_reduce - - Kokkos::parallel_for( - "Aggregation Phase 2b: updating agg penalties", - Kokkos::RangePolicy(0, numLocalAggregates), - KOKKOS_LAMBDA(const LO agg) { - aggPenalties(agg) += aggPenaltyUpdates(agg); - aggPenaltyUpdates(agg) = 0; - }); numNonAggregatedNodes -= numAggregated; } - } // loop over k -} // BuildAggregatesDeterministic + } // loop over maxIters + +} // BuildAggregates } // namespace MueLu diff --git a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_UncoupledAggregationFactory_def.hpp b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_UncoupledAggregationFactory_def.hpp index 386451d1cfc3..170506f48d39 100644 --- a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_UncoupledAggregationFactory_def.hpp +++ b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_UncoupledAggregationFactory_def.hpp @@ -190,9 +190,8 @@ void UncoupledAggregationFactory::Build(Level runOnHost = false; TEUCHOS_TEST_FOR_EXCEPTION(pL.get("aggregation: use interface aggregation"), std::invalid_argument, "Option: 'aggregation: use interface aggregation' is not supported in the Kokkos version of uncoupled aggregation"); - // Sanity Checking: match ML behavior is not supported in UncoupledAggregation_Kokkos in Phase 1 or Phase 2b, but is in 2a + // Sanity Checking: match ML behavior is not supported in UncoupledAggregation_Kokkos in Phase 1 , but it is in 2a and 2b TEUCHOS_TEST_FOR_EXCEPTION(pL.get("aggregation: match ML phase1"), std::invalid_argument, "Option: 'aggregation: match ML phase1' is not supported in the Kokkos version of uncoupled aggregation"); - TEUCHOS_TEST_FOR_EXCEPTION(pL.get("aggregation: match ML phase2b"), std::invalid_argument, "Option: 'aggregation: match ML phase2b' is not supported in the Kokkos version of uncoupled aggregation"); } // Build diff --git a/packages/muelu/test/unit_tests_kokkos/Aggregates_kokkos.cpp b/packages/muelu/test/unit_tests_kokkos/Aggregates_kokkos.cpp index cca61d42e331..5ea76cbaec3c 100644 --- a/packages/muelu/test/unit_tests_kokkos/Aggregates_kokkos.cpp +++ b/packages/muelu/test/unit_tests_kokkos/Aggregates_kokkos.cpp @@ -102,6 +102,7 @@ void gimmeUncoupledAggregates(const Teuchos::RCP("aggregation: deterministic", false); params.set("aggregation: match ML phase2a", true); + params.set("aggregation: match ML phase2b", false); params.set("aggregation: error on nodes with no on-rank neighbors", false); params.set("aggregation: phase3 avoid singletons", false); From 7f3e984a4f14e36466fbb488c7bf9addbe42ff37 Mon Sep 17 00:00:00 2001 From: Christian Glusa Date: Tue, 19 Nov 2024 17:31:32 -0700 Subject: [PATCH 2/3] MueLu: Rebase gold files Signed-off-by: Christian Glusa --- .../kokkos/Output/operator_solve_1_np1_tpetra.gold | 6 +++--- .../kokkos/Output/operator_solve_1_np4_tpetra.gold | 6 +++--- .../kokkos/Output/operator_solve_5_np1_tpetra.gold | 6 +++--- .../kokkos/Output/operator_solve_5_np4_tpetra.gold | 6 +++--- .../kokkos/Output/operator_solve_6_np1_tpetra.gold | 6 +++--- .../kokkos/Output/operator_solve_6_np4_tpetra.gold | 6 +++--- 6 files changed, 18 insertions(+), 18 deletions(-) diff --git a/packages/muelu/test/interface/kokkos/Output/operator_solve_1_np1_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/operator_solve_1_np1_tpetra.gold index 61ecae9e7780..186ca4496970 100644 --- a/packages/muelu/test/interface/kokkos/Output/operator_solve_1_np1_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/operator_solve_1_np1_tpetra.gold @@ -120,14 +120,14 @@ Cycle type = V level rows nnz nnz/row c ratio procs 0 10000 49600 4.96 1 1 1700 14928 8.78 5.88 1 - 2 192 1682 8.76 8.85 1 - 3 24 200 8.33 8.00 1 + 2 192 1674 8.72 8.85 1 + 3 24 190 7.92 8.00 1 Smoother (level 0) both : "Ifpack2::Relaxation": {Initialized: true, Computed: true, Type: Symmetric Gauss-Seidel, sweeps: 1, damping factor: 1, Global matrix dimensions: [10000, 10000], Global nnz: 49600} Smoother (level 1) both : "Ifpack2::Relaxation": {Initialized: true, Computed: true, Type: Symmetric Gauss-Seidel, sweeps: 1, damping factor: 1, Global matrix dimensions: [1700, 1700], Global nnz: 14928} -Smoother (level 2) both : "Ifpack2::Relaxation": {Initialized: true, Computed: true, Type: Symmetric Gauss-Seidel, sweeps: 1, damping factor: 1, Global matrix dimensions: [192, 192], Global nnz: 1682} +Smoother (level 2) both : "Ifpack2::Relaxation": {Initialized: true, Computed: true, Type: Symmetric Gauss-Seidel, sweeps: 1, damping factor: 1, Global matrix dimensions: [192, 192], Global nnz: 1674} Smoother (level 3) pre : solver interface Smoother (level 3) post : no smoother diff --git a/packages/muelu/test/interface/kokkos/Output/operator_solve_1_np4_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/operator_solve_1_np4_tpetra.gold index 0117cfa2fd4d..df9604a89b67 100644 --- a/packages/muelu/test/interface/kokkos/Output/operator_solve_1_np4_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/operator_solve_1_np4_tpetra.gold @@ -120,14 +120,14 @@ Cycle type = V level rows nnz nnz/row c ratio procs 0 10000 49600 4.96 4 1 1700 15318 9.01 5.88 4 - 2 216 2158 9.99 7.87 4 - 3 32 446 13.94 6.75 4 + 2 216 2150 9.95 7.87 4 + 3 32 434 13.56 6.75 4 Smoother (level 0) both : "Ifpack2::Relaxation": {Initialized: true, Computed: true, Type: Symmetric Gauss-Seidel, sweeps: 1, damping factor: 1, Global matrix dimensions: [10000, 10000], Global nnz: 49600} Smoother (level 1) both : "Ifpack2::Relaxation": {Initialized: true, Computed: true, Type: Symmetric Gauss-Seidel, sweeps: 1, damping factor: 1, Global matrix dimensions: [1700, 1700], Global nnz: 15318} -Smoother (level 2) both : "Ifpack2::Relaxation": {Initialized: true, Computed: true, Type: Symmetric Gauss-Seidel, sweeps: 1, damping factor: 1, Global matrix dimensions: [216, 216], Global nnz: 2158} +Smoother (level 2) both : "Ifpack2::Relaxation": {Initialized: true, Computed: true, Type: Symmetric Gauss-Seidel, sweeps: 1, damping factor: 1, Global matrix dimensions: [216, 216], Global nnz: 2150} Smoother (level 3) pre : solver interface Smoother (level 3) post : no smoother diff --git a/packages/muelu/test/interface/kokkos/Output/operator_solve_5_np1_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/operator_solve_5_np1_tpetra.gold index 622442f2328f..0b51e98970e5 100644 --- a/packages/muelu/test/interface/kokkos/Output/operator_solve_5_np1_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/operator_solve_5_np1_tpetra.gold @@ -95,14 +95,14 @@ Cycle type = V level rows nnz nnz/row c ratio procs 0 10000 49600 4.96 1 1 1700 14928 8.78 5.88 1 - 2 192 1682 8.76 8.85 1 - 3 24 200 8.33 8.00 1 + 2 192 1674 8.72 8.85 1 + 3 24 190 7.92 8.00 1 Smoother (level 0) both : "Ifpack2::Relaxation": {Initialized: true, Computed: true, Type: Symmetric Gauss-Seidel, sweeps: 1, damping factor: 1, Global matrix dimensions: [10000, 10000], Global nnz: 49600} Smoother (level 1) both : "Ifpack2::Relaxation": {Initialized: true, Computed: true, Type: Symmetric Gauss-Seidel, sweeps: 1, damping factor: 1, Global matrix dimensions: [1700, 1700], Global nnz: 14928} -Smoother (level 2) both : "Ifpack2::Relaxation": {Initialized: true, Computed: true, Type: Symmetric Gauss-Seidel, sweeps: 1, damping factor: 1, Global matrix dimensions: [192, 192], Global nnz: 1682} +Smoother (level 2) both : "Ifpack2::Relaxation": {Initialized: true, Computed: true, Type: Symmetric Gauss-Seidel, sweeps: 1, damping factor: 1, Global matrix dimensions: [192, 192], Global nnz: 1674} Smoother (level 3) pre : solver interface Smoother (level 3) post : no smoother diff --git a/packages/muelu/test/interface/kokkos/Output/operator_solve_5_np4_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/operator_solve_5_np4_tpetra.gold index a747e7c913f7..baf6c048c339 100644 --- a/packages/muelu/test/interface/kokkos/Output/operator_solve_5_np4_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/operator_solve_5_np4_tpetra.gold @@ -95,14 +95,14 @@ Cycle type = V level rows nnz nnz/row c ratio procs 0 10000 49600 4.96 4 1 1700 15318 9.01 5.88 4 - 2 216 2158 9.99 7.87 4 - 3 32 446 13.94 6.75 4 + 2 216 2150 9.95 7.87 4 + 3 32 434 13.56 6.75 4 Smoother (level 0) both : "Ifpack2::Relaxation": {Initialized: true, Computed: true, Type: Symmetric Gauss-Seidel, sweeps: 1, damping factor: 1, Global matrix dimensions: [10000, 10000], Global nnz: 49600} Smoother (level 1) both : "Ifpack2::Relaxation": {Initialized: true, Computed: true, Type: Symmetric Gauss-Seidel, sweeps: 1, damping factor: 1, Global matrix dimensions: [1700, 1700], Global nnz: 15318} -Smoother (level 2) both : "Ifpack2::Relaxation": {Initialized: true, Computed: true, Type: Symmetric Gauss-Seidel, sweeps: 1, damping factor: 1, Global matrix dimensions: [216, 216], Global nnz: 2158} +Smoother (level 2) both : "Ifpack2::Relaxation": {Initialized: true, Computed: true, Type: Symmetric Gauss-Seidel, sweeps: 1, damping factor: 1, Global matrix dimensions: [216, 216], Global nnz: 2150} Smoother (level 3) pre : solver interface Smoother (level 3) post : no smoother diff --git a/packages/muelu/test/interface/kokkos/Output/operator_solve_6_np1_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/operator_solve_6_np1_tpetra.gold index fe5914900fb0..49df428e7ac1 100644 --- a/packages/muelu/test/interface/kokkos/Output/operator_solve_6_np1_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/operator_solve_6_np1_tpetra.gold @@ -100,14 +100,14 @@ Cycle type = V level rows nnz nnz/row c ratio procs 0 10000 49600 4.96 1 1 1700 14928 8.78 5.88 1 - 2 192 1682 8.76 8.85 1 - 3 24 200 8.33 8.00 1 + 2 192 1674 8.72 8.85 1 + 3 24 190 7.92 8.00 1 Smoother (level 0) both : "Ifpack2::Relaxation": {Initialized: true, Computed: true, Type: Symmetric Gauss-Seidel, sweeps: 1, damping factor: 1, Global matrix dimensions: [10000, 10000], Global nnz: 49600} Smoother (level 1) both : "Ifpack2::Relaxation": {Initialized: true, Computed: true, Type: Symmetric Gauss-Seidel, sweeps: 1, damping factor: 1, Global matrix dimensions: [1700, 1700], Global nnz: 14928} -Smoother (level 2) both : "Ifpack2::Relaxation": {Initialized: true, Computed: true, Type: Symmetric Gauss-Seidel, sweeps: 1, damping factor: 1, Global matrix dimensions: [192, 192], Global nnz: 1682} +Smoother (level 2) both : "Ifpack2::Relaxation": {Initialized: true, Computed: true, Type: Symmetric Gauss-Seidel, sweeps: 1, damping factor: 1, Global matrix dimensions: [192, 192], Global nnz: 1674} Smoother (level 3) pre : solver interface Smoother (level 3) post : no smoother diff --git a/packages/muelu/test/interface/kokkos/Output/operator_solve_6_np4_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/operator_solve_6_np4_tpetra.gold index 0d4663cf3256..9fb6a3101a8b 100644 --- a/packages/muelu/test/interface/kokkos/Output/operator_solve_6_np4_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/operator_solve_6_np4_tpetra.gold @@ -100,14 +100,14 @@ Cycle type = V level rows nnz nnz/row c ratio procs 0 10000 49600 4.96 4 1 1700 15318 9.01 5.88 4 - 2 216 2158 9.99 7.87 4 - 3 32 446 13.94 6.75 4 + 2 216 2150 9.95 7.87 4 + 3 32 434 13.56 6.75 4 Smoother (level 0) both : "Ifpack2::Relaxation": {Initialized: true, Computed: true, Type: Symmetric Gauss-Seidel, sweeps: 1, damping factor: 1, Global matrix dimensions: [10000, 10000], Global nnz: 49600} Smoother (level 1) both : "Ifpack2::Relaxation": {Initialized: true, Computed: true, Type: Symmetric Gauss-Seidel, sweeps: 1, damping factor: 1, Global matrix dimensions: [1700, 1700], Global nnz: 15318} -Smoother (level 2) both : "Ifpack2::Relaxation": {Initialized: true, Computed: true, Type: Symmetric Gauss-Seidel, sweeps: 1, damping factor: 1, Global matrix dimensions: [216, 216], Global nnz: 2158} +Smoother (level 2) both : "Ifpack2::Relaxation": {Initialized: true, Computed: true, Type: Symmetric Gauss-Seidel, sweeps: 1, damping factor: 1, Global matrix dimensions: [216, 216], Global nnz: 2150} Smoother (level 3) pre : solver interface Smoother (level 3) post : no smoother From ff0636bbe636fcd49fd7c8b84809783060c6a1fd Mon Sep 17 00:00:00 2001 From: Christian Glusa Date: Tue, 19 Nov 2024 17:42:32 -0700 Subject: [PATCH 3/3] MueLu Regression test: adjust deepcopy counts Signed-off-by: Christian Glusa --- packages/muelu/test/unit_tests_kokkos/Regression.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/packages/muelu/test/unit_tests_kokkos/Regression.cpp b/packages/muelu/test/unit_tests_kokkos/Regression.cpp index e9ab09ea37a2..46f4f88777ce 100644 --- a/packages/muelu/test/unit_tests_kokkos/Regression.cpp +++ b/packages/muelu/test/unit_tests_kokkos/Regression.cpp @@ -98,12 +98,12 @@ TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(Regression, H2D, Scalar, LocalOrdinal, GlobalO } #ifdef KOKKOS_HAS_SHARED_SPACE else { - size_t targetNumDeepCopies = kkNativeDeepCopies + (std::is_same_v ? 19 : 34); + size_t targetNumDeepCopies = kkNativeDeepCopies + (std::is_same_v ? 19 : 31); TEST_EQUALITY(Tpetra::Details::DeepCopyCounter::get_count_different_space(), targetNumDeepCopies); } #else else { - TEST_EQUALITY(Tpetra::Details::DeepCopyCounter::get_count_different_space(), kkNativeDeepCopies + 34); + TEST_EQUALITY(Tpetra::Details::DeepCopyCounter::get_count_different_space(), kkNativeDeepCopies + 31); } #endif // KOKKOS_HAS_SHARED_SPACE @@ -130,7 +130,7 @@ TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(Regression, H2D, Scalar, LocalOrdinal, GlobalO } // H2D -TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(Regression, Aggregration, Scalar, LocalOrdinal, GlobalOrdinal, Node) { +TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(Regression, Aggregation, Scalar, LocalOrdinal, GlobalOrdinal, Node) { #include MUELU_TESTING_SET_OSTREAM; MUELU_TESTING_LIMIT_SCOPE(Scalar, GlobalOrdinal, Node); @@ -175,12 +175,12 @@ TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(Regression, Aggregration, Scalar, LocalOrdinal } #ifdef KOKKOS_HAS_SHARED_SPACE else { - size_t targetNumDeepCopies = std::is_same_v ? 17 : 23; + size_t targetNumDeepCopies = std::is_same_v ? 17 : 16; TEST_EQUALITY(Tpetra::Details::DeepCopyCounter::get_count_different_space(), targetNumDeepCopies); } #else else { - TEST_EQUALITY(Tpetra::Details::DeepCopyCounter::get_count_different_space(), 23); + TEST_EQUALITY(Tpetra::Details::DeepCopyCounter::get_count_different_space(), 16); } #endif @@ -193,7 +193,7 @@ TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(Regression, Aggregration, Scalar, LocalOrdinal #define MUELU_ETI_GROUP(Scalar, LO, GO, Node) \ TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(Regression, H2D, Scalar, LO, GO, Node) \ - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(Regression, Aggregration, Scalar, LO, GO, Node) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(Regression, Aggregation, Scalar, LO, GO, Node) #include