From 9b82225697ac2530f416a5c17792fe071b8ebe12 Mon Sep 17 00:00:00 2001 From: Michele Scuttari Date: Sat, 21 Sep 2024 01:23:31 +0200 Subject: [PATCH 01/14] Use mold linker for CI testing --- .jenkins/dev-debian-12.Jenkinsfile | 2 +- .jenkins/dev-fedora-40.Jenkinsfile | 2 +- .jenkins/dev-ubuntu-22.04.Jenkinsfile | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.jenkins/dev-debian-12.Jenkinsfile b/.jenkins/dev-debian-12.Jenkinsfile index 5605ff412..25b91049f 100644 --- a/.jenkins/dev-debian-12.Jenkinsfile +++ b/.jenkins/dev-debian-12.Jenkinsfile @@ -49,7 +49,7 @@ node { } stage('Configure') { - cmake arguments: "-S " + runtimeSrcPath + " -B " + runtimeBuildPath + " -G Ninja -DCMAKE_BUILD_TYPE=Debug -DCMAKE_INSTALL_PREFIX=" + runtimeInstallPath, installation: 'InSearchPath', label: 'Configure' + cmake arguments: "-S " + runtimeSrcPath + " -B " + runtimeBuildPath + " -G Ninja -DCMAKE_BUILD_TYPE=Debug -DCMAKE_LINKER_TYPE=MOLD -DCMAKE_INSTALL_PREFIX=" + runtimeInstallPath, installation: 'InSearchPath', label: 'Configure' } stage('Build') { diff --git a/.jenkins/dev-fedora-40.Jenkinsfile b/.jenkins/dev-fedora-40.Jenkinsfile index 802dd90b8..322bdbc08 100644 --- a/.jenkins/dev-fedora-40.Jenkinsfile +++ b/.jenkins/dev-fedora-40.Jenkinsfile @@ -49,7 +49,7 @@ node { } stage('Configure') { - cmake arguments: "-S " + runtimeSrcPath + " -B " + runtimeBuildPath + " -G Ninja -DCMAKE_BUILD_TYPE=Debug -DCMAKE_INSTALL_PREFIX=" + runtimeInstallPath, installation: 'InSearchPath', label: 'Configure' + cmake arguments: "-S " + runtimeSrcPath + " -B " + runtimeBuildPath + " -G Ninja -DCMAKE_BUILD_TYPE=Debug -DCMAKE_LINKER_TYPE=MOLD -DCMAKE_INSTALL_PREFIX=" + runtimeInstallPath, installation: 'InSearchPath', label: 'Configure' } stage('Build') { diff --git a/.jenkins/dev-ubuntu-22.04.Jenkinsfile b/.jenkins/dev-ubuntu-22.04.Jenkinsfile index 772f81a1b..aaf058de8 100644 --- a/.jenkins/dev-ubuntu-22.04.Jenkinsfile +++ b/.jenkins/dev-ubuntu-22.04.Jenkinsfile @@ -49,7 +49,7 @@ node { } stage('Configure') { - cmake arguments: "-S " + runtimeSrcPath + " -B " + runtimeBuildPath + " -G Ninja -DCMAKE_BUILD_TYPE=Debug -DCMAKE_INSTALL_PREFIX=" + runtimeInstallPath, installation: 'InSearchPath', label: 'Configure' + cmake arguments: "-S " + runtimeSrcPath + " -B " + runtimeBuildPath + " -G Ninja -DCMAKE_BUILD_TYPE=Debug -DCMAKE_LINKER_TYPE=MOLD -DCMAKE_INSTALL_PREFIX=" + runtimeInstallPath, installation: 'InSearchPath', label: 'Configure' } stage('Build') { From c520f319dc384ea80d8c700585803ca56e6ffb99 Mon Sep 17 00:00:00 2001 From: Michele Scuttari Date: Mon, 7 Oct 2024 23:00:00 +0200 Subject: [PATCH 02/14] Add safety check --- lib/Solvers/IDA/Instance.cpp | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/lib/Solvers/IDA/Instance.cpp b/lib/Solvers/IDA/Instance.cpp index d5aeba741..c96432952 100644 --- a/lib/Solvers/IDA/Instance.cpp +++ b/lib/Solvers/IDA/Instance.cpp @@ -1539,6 +1539,21 @@ namespace marco::runtime::sundials::ida std::vector equationIndices = std::get<1>(chunk); do { + assert([&]() -> bool { + if (equationIndices.size() != equationRanges[equation].size()) { + return false; + } + + for (size_t i = 0, rank = equationIndices.size(); i < rank; ++i) { + if (equationIndices[i] < equationRanges[equation][i].begin || + equationIndices[i] >= equationRanges[equation][i].end) { + return false; + } + } + + return true; + }() && "Invalid equation indices"); + processFn(equation, equationIndices); } while (advanceEquationIndicesUntil( equationIndices, equationRanges[equation], std::get<2>(chunk))); From 6bf30a90a21866f1ed71dce339064669921ace39 Mon Sep 17 00:00:00 2001 From: Michele Scuttari Date: Mon, 7 Oct 2024 23:00:09 +0200 Subject: [PATCH 03/14] Reformat code --- lib/Solvers/IDA/Instance.cpp | 3348 +++++++++++++++++----------------- 1 file changed, 1639 insertions(+), 1709 deletions(-) diff --git a/lib/Solvers/IDA/Instance.cpp b/lib/Solvers/IDA/Instance.cpp index c96432952..c3534af3b 100644 --- a/lib/Solvers/IDA/Instance.cpp +++ b/lib/Solvers/IDA/Instance.cpp @@ -1,9 +1,9 @@ #ifdef SUNDIALS_ENABLE #include "marco/Runtime/Solvers/IDA/Instance.h" +#include "marco/Runtime/Simulation/Options.h" #include "marco/Runtime/Solvers/IDA/Options.h" #include "marco/Runtime/Solvers/IDA/Profiler.h" -#include "marco/Runtime/Simulation/Options.h" #include #include #include @@ -21,2174 +21,2134 @@ using namespace ::marco::runtime::sundials::ida; // Solver //===---------------------------------------------------------------------===// -namespace marco::runtime::sundials::ida -{ - IDAInstance::IDAInstance() - : startTime(simulation::getOptions().startTime), - endTime(simulation::getOptions().endTime), - timeStep(getOptions().timeStep) - { - // Initially there is no variable in the instance. - variableOffsets.push_back(0); - - if (marco::runtime::simulation::getOptions().debug) { - std::cerr << "[IDA] Instance created" << std::endl; - } +namespace marco::runtime::sundials::ida { +IDAInstance::IDAInstance() + : startTime(simulation::getOptions().startTime), + endTime(simulation::getOptions().endTime), + timeStep(getOptions().timeStep) { + // Initially there is no variable in the instance. + variableOffsets.push_back(0); + + if (marco::runtime::simulation::getOptions().debug) { + std::cerr << "[IDA] Instance created" << std::endl; } +} - IDAInstance::~IDAInstance() - { - if (getNumOfScalarEquations() != 0) { - N_VDestroy(variablesVector); - N_VDestroy(derivativesVector); - N_VDestroy(idVector); - N_VDestroy(tolerancesVector); +IDAInstance::~IDAInstance() { + if (getNumOfScalarEquations() != 0) { + N_VDestroy(variablesVector); + N_VDestroy(derivativesVector); + N_VDestroy(idVector); + N_VDestroy(tolerancesVector); - IDAFree(&idaMemory); - SUNLinSolFree(linearSolver); - SUNMatDestroy(sparseMatrix); - } + IDAFree(&idaMemory); + SUNLinSolFree(linearSolver); + SUNMatDestroy(sparseMatrix); + } - if (marco::runtime::simulation::getOptions().debug) { - std::cerr << "[IDA] Instance destroyed" << std::endl; - } + if (marco::runtime::simulation::getOptions().debug) { + std::cerr << "[IDA] Instance destroyed" << std::endl; } +} - void IDAInstance::setStartTime(double time) - { - startTime = time; +void IDAInstance::setStartTime(double time) { + startTime = time; - if (marco::runtime::simulation::getOptions().debug) { - std::cerr << "[IDA] Start time set to " << startTime << std::endl; - } + if (marco::runtime::simulation::getOptions().debug) { + std::cerr << "[IDA] Start time set to " << startTime << std::endl; } +} - void IDAInstance::setEndTime(double time) - { - endTime = time; +void IDAInstance::setEndTime(double time) { + endTime = time; - if (marco::runtime::simulation::getOptions().debug) { - std::cerr << "[IDA] End time set to " << endTime << std::endl; - } + if (marco::runtime::simulation::getOptions().debug) { + std::cerr << "[IDA] End time set to " << endTime << std::endl; } +} - void IDAInstance::setTimeStep(double step) - { - assert(step > 0); - timeStep = step; +void IDAInstance::setTimeStep(double step) { + assert(step > 0); + timeStep = step; - if (marco::runtime::simulation::getOptions().debug) { - std::cerr << "[IDA] Time step set to " << timeStep << std::endl; - } + if (marco::runtime::simulation::getOptions().debug) { + std::cerr << "[IDA] Time step set to " << timeStep << std::endl; } +} - Variable IDAInstance::addAlgebraicVariable( - uint64_t rank, - const uint64_t* dimensions, - VariableGetter getterFunction, - VariableSetter setterFunction, - const char* name) - { - if (marco::runtime::simulation::getOptions().debug) { - std::cerr << "[IDA] Adding algebraic variable"; - - if (name != nullptr) { - std::cerr << " \"" << name << "\""; - } +Variable IDAInstance::addAlgebraicVariable(uint64_t rank, + const uint64_t *dimensions, + VariableGetter getterFunction, + VariableSetter setterFunction, + const char *name) { + if (marco::runtime::simulation::getOptions().debug) { + std::cerr << "[IDA] Adding algebraic variable"; - std::cerr << std::endl; + if (name != nullptr) { + std::cerr << " \"" << name << "\""; } - // Add variable offset and dimensions. - assert(variableOffsets.size() == variablesDimensions.size() + 1); + std::cerr << std::endl; + } - VariableDimensions varDimension(rank); - uint64_t flatSize = 1; + // Add variable offset and dimensions. + assert(variableOffsets.size() == variablesDimensions.size() + 1); - for (uint64_t i = 0; i < rank; ++i) { - flatSize *= dimensions[i]; - varDimension[i] = dimensions[i]; - } + VariableDimensions varDimension(rank); + uint64_t flatSize = 1; - variablesDimensions.push_back(std::move(varDimension)); + for (uint64_t i = 0; i < rank; ++i) { + flatSize *= dimensions[i]; + varDimension[i] = dimensions[i]; + } - size_t offset = variableOffsets.back(); - variableOffsets.push_back(offset + flatSize); + variablesDimensions.push_back(std::move(varDimension)); - // Store the getter and setter functions. - algebraicAndStateVariablesGetters.push_back(getterFunction); - algebraicAndStateVariablesSetters.push_back(setterFunction); + size_t offset = variableOffsets.back(); + variableOffsets.push_back(offset + flatSize); - // Return the index of the variable. - Variable id = getNumOfArrayVariables() - 1; + // Store the getter and setter functions. + algebraicAndStateVariablesGetters.push_back(getterFunction); + algebraicAndStateVariablesSetters.push_back(setterFunction); - if (marco::runtime::simulation::getOptions().debug) { - std::cerr << " - ID: " << id << std::endl; - std::cerr << " - Rank: " << rank << std::endl; - std::cerr << " - Dimensions: ["; + // Return the index of the variable. + Variable id = getNumOfArrayVariables() - 1; - for (uint64_t i = 0; i < rank; ++i) { - if (i != 0) { - std::cerr << ","; - } + if (marco::runtime::simulation::getOptions().debug) { + std::cerr << " - ID: " << id << std::endl; + std::cerr << " - Rank: " << rank << std::endl; + std::cerr << " - Dimensions: ["; - std::cerr << dimensions[i]; + for (uint64_t i = 0; i < rank; ++i) { + if (i != 0) { + std::cerr << ","; } - std::cerr << "]" << std::endl; - std::cerr << " - Getter function address: " - << reinterpret_cast(getterFunction) << std::endl; - std::cerr << " - Setter function address: " - << reinterpret_cast(setterFunction) << std::endl; + std::cerr << dimensions[i]; } - return id; + std::cerr << "]" << std::endl; + std::cerr << " - Getter function address: " + << reinterpret_cast(getterFunction) << std::endl; + std::cerr << " - Setter function address: " + << reinterpret_cast(setterFunction) << std::endl; } - Variable IDAInstance::addStateVariable( - uint64_t rank, - const uint64_t* dimensions, - VariableGetter stateGetterFunction, - VariableSetter stateSetterFunction, - VariableGetter derivativeGetterFunction, - VariableSetter derivativeSetterFunction, - const char* name) - { - if (marco::runtime::simulation::getOptions().debug) { - std::cerr << "[IDA] Adding state variable"; + return id; +} - if (name != nullptr) { - std::cerr << " \"" << name << "\""; - } +Variable IDAInstance::addStateVariable(uint64_t rank, + const uint64_t *dimensions, + VariableGetter stateGetterFunction, + VariableSetter stateSetterFunction, + VariableGetter derivativeGetterFunction, + VariableSetter derivativeSetterFunction, + const char *name) { + if (marco::runtime::simulation::getOptions().debug) { + std::cerr << "[IDA] Adding state variable"; - std::cerr << std::endl; + if (name != nullptr) { + std::cerr << " \"" << name << "\""; } - assert(variableOffsets.size() == getNumOfArrayVariables() + 1); + std::cerr << std::endl; + } - // Add variable offset and dimensions. - VariableDimensions variableDimensions(rank); - uint64_t flatSize = 1; + assert(variableOffsets.size() == getNumOfArrayVariables() + 1); - for (uint64_t i = 0; i < rank; ++i) { - flatSize *= dimensions[i]; - variableDimensions[i] = dimensions[i]; - } + // Add variable offset and dimensions. + VariableDimensions variableDimensions(rank); + uint64_t flatSize = 1; - variablesDimensions.push_back(variableDimensions); + for (uint64_t i = 0; i < rank; ++i) { + flatSize *= dimensions[i]; + variableDimensions[i] = dimensions[i]; + } - // Store the position of the start of the flattened array. - uint64_t offset = variableOffsets.back(); - variableOffsets.push_back(offset + flatSize); + variablesDimensions.push_back(variableDimensions); - // Store the getter and setter functions for the state variable. - algebraicAndStateVariablesGetters.push_back(stateGetterFunction); - algebraicAndStateVariablesSetters.push_back(stateSetterFunction); + // Store the position of the start of the flattened array. + uint64_t offset = variableOffsets.back(); + variableOffsets.push_back(offset + flatSize); - // Store the getter and setter functions for the derivative variable. - derivativeVariablesGetters.push_back(derivativeGetterFunction); - derivativeVariablesSetters.push_back(derivativeSetterFunction); + // Store the getter and setter functions for the state variable. + algebraicAndStateVariablesGetters.push_back(stateGetterFunction); + algebraicAndStateVariablesSetters.push_back(stateSetterFunction); - // Return the index of the variable. - Variable id = getNumOfArrayVariables() - 1; - stateVariablesMapping[id] = derivativeVariablesGetters.size() - 1; + // Store the getter and setter functions for the derivative variable. + derivativeVariablesGetters.push_back(derivativeGetterFunction); + derivativeVariablesSetters.push_back(derivativeSetterFunction); - if (marco::runtime::simulation::getOptions().debug) { - std::cerr << " - ID: " << id << std::endl; - std::cerr << " - Rank: " << rank << std::endl; - std::cerr << " - Dimensions: ["; + // Return the index of the variable. + Variable id = getNumOfArrayVariables() - 1; + stateVariablesMapping[id] = derivativeVariablesGetters.size() - 1; - for (uint64_t i = 0; i < rank; ++i) { - if (i != 0) { - std::cerr << ","; - } + if (marco::runtime::simulation::getOptions().debug) { + std::cerr << " - ID: " << id << std::endl; + std::cerr << " - Rank: " << rank << std::endl; + std::cerr << " - Dimensions: ["; - std::cerr << dimensions[i]; + for (uint64_t i = 0; i < rank; ++i) { + if (i != 0) { + std::cerr << ","; } - std::cerr << "]" << std::endl; - std::cerr << " - State variable getter function address: " - << reinterpret_cast(stateGetterFunction) << std::endl; - std::cerr << " - State variable setter function address: " - << reinterpret_cast(stateSetterFunction) << std::endl; - std::cerr << " - Derivative variable getter function address: " - << reinterpret_cast(derivativeGetterFunction) - << std::endl; - std::cerr << " - Derivative variable setter function address: " - << reinterpret_cast(derivativeSetterFunction) - << std::endl; + std::cerr << dimensions[i]; } - return id; + std::cerr << "]" << std::endl; + std::cerr << " - State variable getter function address: " + << reinterpret_cast(stateGetterFunction) << std::endl; + std::cerr << " - State variable setter function address: " + << reinterpret_cast(stateSetterFunction) << std::endl; + std::cerr << " - Derivative variable getter function address: " + << reinterpret_cast(derivativeGetterFunction) + << std::endl; + std::cerr << " - Derivative variable setter function address: " + << reinterpret_cast(derivativeSetterFunction) + << std::endl; } - Equation IDAInstance::addEquation( - const int64_t* ranges, - uint64_t equationRank, - Variable writtenVariable, - AccessFunction writeAccess, - const char* stringRepresentation) - { - if (marco::runtime::simulation::getOptions().debug) { - std::cerr << "[IDA] Adding equation"; + return id; +} - if (stringRepresentation != nullptr) { - std::cerr << " \"" << stringRepresentation << "\""; - } +Equation IDAInstance::addEquation(const int64_t *ranges, uint64_t equationRank, + Variable writtenVariable, + AccessFunction writeAccess, + const char *stringRepresentation) { + if (marco::runtime::simulation::getOptions().debug) { + std::cerr << "[IDA] Adding equation"; - std::cerr << std::endl; + if (stringRepresentation != nullptr) { + std::cerr << " \"" << stringRepresentation << "\""; } - // Add the start and end dimensions of the current equation. - MultidimensionalRange eqRanges = {}; + std::cerr << std::endl; + } - for (size_t i = 0, e = equationRank * 2; i < e; i += 2) { - int64_t begin = ranges[i]; - int64_t end = ranges[i + 1]; - eqRanges.push_back({ begin, end }); - } + // Add the start and end dimensions of the current equation. + MultidimensionalRange eqRanges = {}; - equationRanges.push_back(eqRanges); + for (size_t i = 0, e = equationRank * 2; i < e; i += 2) { + int64_t begin = ranges[i]; + int64_t end = ranges[i + 1]; + eqRanges.push_back({begin, end}); + } - // Add the write access. - writeAccesses.emplace_back(writtenVariable, writeAccess); + equationRanges.push_back(eqRanges); - // Return the index of the equation. - Equation id = getNumOfVectorizedEquations() - 1; + // Add the write access. + writeAccesses.emplace_back(writtenVariable, writeAccess); - if (marco::runtime::simulation::getOptions().debug) { - std::cerr << " - ID: " << id << std::endl; - std::cerr << " - Rank: " << equationRank << std::endl; - std::cerr << " - Ranges: ["; + // Return the index of the equation. + Equation id = getNumOfVectorizedEquations() - 1; - for (uint64_t i = 0; i < equationRank; ++i) { - if (i != 0) { - std::cerr << ","; - } + if (marco::runtime::simulation::getOptions().debug) { + std::cerr << " - ID: " << id << std::endl; + std::cerr << " - Rank: " << equationRank << std::endl; + std::cerr << " - Ranges: ["; - std::cerr << "[" << ranges[i * 2] << "," << (ranges[i * 2 + 1] - 1) << "]"; + for (uint64_t i = 0; i < equationRank; ++i) { + if (i != 0) { + std::cerr << ","; } - std::cerr << "]" << std::endl; - std::cerr << " - Written variable ID: " << writtenVariable << std::endl; - std::cerr << " - Write access function address: " - << reinterpret_cast(writeAccess) << std::endl; + std::cerr << "[" << ranges[i * 2] << "," << (ranges[i * 2 + 1] - 1) + << "]"; } - return id; + std::cerr << "]" << std::endl; + std::cerr << " - Written variable ID: " << writtenVariable << std::endl; + std::cerr << " - Write access function address: " + << reinterpret_cast(writeAccess) << std::endl; } - void IDAInstance::addVariableAccess( - Equation equation, - Variable variable, - AccessFunction accessFunction) - { - if (marco::runtime::simulation::getOptions().debug) { - std::cerr << "[IDA] Adding access information" << std::endl; - std::cerr << " - Equation: " << equation << std::endl; - std::cerr << " - Variable: " << variable << std::endl; - std::cerr << " - Access function address: " - << reinterpret_cast(accessFunction) << std::endl; - } + return id; +} - assert(equation < getNumOfVectorizedEquations()); - assert(variable < getNumOfArrayVariables()); +void IDAInstance::addVariableAccess(Equation equation, Variable variable, + AccessFunction accessFunction) { + if (marco::runtime::simulation::getOptions().debug) { + std::cerr << "[IDA] Adding access information" << std::endl; + std::cerr << " - Equation: " << equation << std::endl; + std::cerr << " - Variable: " << variable << std::endl; + std::cerr << " - Access function address: " + << reinterpret_cast(accessFunction) << std::endl; + } - precomputedAccesses = true; + assert(equation < getNumOfVectorizedEquations()); + assert(variable < getNumOfArrayVariables()); - if (variableAccesses.size() <= (size_t) equation) { - variableAccesses.resize(equation + 1); - } + precomputedAccesses = true; - auto& varAccessList = variableAccesses[equation]; - varAccessList.emplace_back(variable, accessFunction); + if (variableAccesses.size() <= (size_t)equation) { + variableAccesses.resize(equation + 1); } - void IDAInstance::setResidualFunction( - Equation equation, - ResidualFunction residualFunction) - { - if (marco::runtime::simulation::getOptions().debug) { - std::cerr << "[IDA] Setting residual function for equation " << equation - << ". Address: " << reinterpret_cast(residualFunction) - << std::endl; - } + auto &varAccessList = variableAccesses[equation]; + varAccessList.emplace_back(variable, accessFunction); +} - if (residualFunctions.size() <= equation) { - residualFunctions.resize(equation + 1, nullptr); - } +void IDAInstance::setResidualFunction(Equation equation, + ResidualFunction residualFunction) { + if (marco::runtime::simulation::getOptions().debug) { + std::cerr << "[IDA] Setting residual function for equation " << equation + << ". Address: " << reinterpret_cast(residualFunction) + << std::endl; + } - residualFunctions[equation] = residualFunction; + if (residualFunctions.size() <= equation) { + residualFunctions.resize(equation + 1, nullptr); } - void IDAInstance::addJacobianFunction( - Equation equation, - Variable variable, - JacobianFunction jacobianFunction) - { - if (marco::runtime::simulation::getOptions().debug) { - std::cerr << "[IDA] Setting jacobian function for equation " << equation - << " and variable " << variable << ". Address: " - << reinterpret_cast(jacobianFunction) << std::endl; - } + residualFunctions[equation] = residualFunction; +} - if (jacobianFunctions.size() <= equation) { - jacobianFunctions.resize(equation + 1, {}); - } +void IDAInstance::addJacobianFunction(Equation equation, Variable variable, + JacobianFunction jacobianFunction) { + if (marco::runtime::simulation::getOptions().debug) { + std::cerr << "[IDA] Setting jacobian function for equation " << equation + << " and variable " << variable + << ". Address: " << reinterpret_cast(jacobianFunction) + << std::endl; + } - if (jacobianFunctions[equation].size() <= variable) { - jacobianFunctions[equation].resize(variable + 1, nullptr); - } + if (jacobianFunctions.size() <= equation) { + jacobianFunctions.resize(equation + 1, {}); + } - jacobianFunctions[equation][variable] = jacobianFunction; + if (jacobianFunctions[equation].size() <= variable) { + jacobianFunctions[equation].resize(variable + 1, nullptr); } - bool IDAInstance::initialize() - { - assert(!initialized && "The IDA instance has already been initialized"); + jacobianFunctions[equation][variable] = jacobianFunction; +} + +bool IDAInstance::initialize() { + assert(!initialized && "The IDA instance has already been initialized"); - if (marco::runtime::simulation::getOptions().debug) { - std::cerr << "[IDA] Performing initialization" << std::endl; - } + if (marco::runtime::simulation::getOptions().debug) { + std::cerr << "[IDA] Performing initialization" << std::endl; + } - currentTime = startTime; + currentTime = startTime; - // Compute the number of scalar variables. - scalarVariablesNumber = 0; + // Compute the number of scalar variables. + scalarVariablesNumber = 0; - for (Variable var = 0, e = getNumOfArrayVariables(); var < e; ++var) { - scalarVariablesNumber += getVariableFlatSize(var); - } + for (Variable var = 0, e = getNumOfArrayVariables(); var < e; ++var) { + scalarVariablesNumber += getVariableFlatSize(var); + } - // Compute the number of scalar equations. - scalarEquationsNumber = 0; + // Compute the number of scalar equations. + scalarEquationsNumber = 0; - for (Equation eq = 0, e = getNumOfVectorizedEquations(); eq < e; ++eq) { - scalarEquationsNumber += getEquationFlatSize(eq); - } + for (Equation eq = 0, e = getNumOfVectorizedEquations(); eq < e; ++eq) { + scalarEquationsNumber += getEquationFlatSize(eq); + } - assert(getNumOfScalarVariables() == getNumOfScalarEquations() && - "Unbalanced system"); + assert(getNumOfScalarVariables() == getNumOfScalarEquations() && + "Unbalanced system"); - if (scalarEquationsNumber == 0) { - // IDA has nothing to solve. - initialized = true; - return true; - } + if (scalarEquationsNumber == 0) { + // IDA has nothing to solve. + initialized = true; + return true; + } #if SUNDIALS_VERSION_MAJOR >= 6 - // Create the SUNDIALS context. - if (SUNContext_Create(nullptr, &ctx) != 0) { - return false; - } + // Create the SUNDIALS context. + if (SUNContext_Create(nullptr, &ctx) != 0) { + return false; + } #endif - // Create and initialize the variables vector. + // Create and initialize the variables vector. #if SUNDIALS_VERSION_MAJOR >= 6 - variablesVector = N_VNew_Serial( - static_cast(scalarVariablesNumber), ctx); + variablesVector = + N_VNew_Serial(static_cast(scalarVariablesNumber), ctx); #else - variablesVector = N_VNew_Serial( - static_cast(scalarVariablesNumber)); + variablesVector = + N_VNew_Serial(static_cast(scalarVariablesNumber)); #endif - assert(checkAllocation( - static_cast(variablesVector), "N_VNew_Serial")); + assert( + checkAllocation(static_cast(variablesVector), "N_VNew_Serial")); - for (uint64_t i = 0; i < scalarVariablesNumber; ++i) { - N_VGetArrayPointer(variablesVector)[i] = 0; - } + for (uint64_t i = 0; i < scalarVariablesNumber; ++i) { + N_VGetArrayPointer(variablesVector)[i] = 0; + } - // Create and initialize the derivatives vector. + // Create and initialize the derivatives vector. #if SUNDIALS_VERSION_MAJOR >= 6 - derivativesVector = N_VNew_Serial( - static_cast(scalarVariablesNumber), ctx); + derivativesVector = + N_VNew_Serial(static_cast(scalarVariablesNumber), ctx); #else - derivativesVector = N_VNew_Serial( - static_cast(scalarVariablesNumber)); + derivativesVector = + N_VNew_Serial(static_cast(scalarVariablesNumber)); #endif - assert(checkAllocation( - static_cast(derivativesVector), "N_VNew_Serial")); + assert( + checkAllocation(static_cast(derivativesVector), "N_VNew_Serial")); - for (uint64_t i = 0; i < scalarVariablesNumber; ++i) { - N_VGetArrayPointer(derivativesVector)[i] = 0; - } + for (uint64_t i = 0; i < scalarVariablesNumber; ++i) { + N_VGetArrayPointer(derivativesVector)[i] = 0; + } - // Create and initialize the IDs vector. + // Create and initialize the IDs vector. #if SUNDIALS_VERSION_MAJOR >= 6 - idVector = N_VNew_Serial( - static_cast(scalarVariablesNumber), ctx); + idVector = + N_VNew_Serial(static_cast(scalarVariablesNumber), ctx); #else - idVector = N_VNew_Serial( - static_cast(scalarVariablesNumber)); + idVector = N_VNew_Serial(static_cast(scalarVariablesNumber)); #endif - assert(checkAllocation(static_cast(idVector), "N_VNew_Serial")); + assert(checkAllocation(static_cast(idVector), "N_VNew_Serial")); - for (Variable var = 0; var < getNumOfArrayVariables(); ++var) { - VariableKind variableKind = getVariableKind(var); - uint64_t arrayOffset = variableOffsets[var]; - uint64_t flatSize = getVariableFlatSize(var); + for (Variable var = 0; var < getNumOfArrayVariables(); ++var) { + VariableKind variableKind = getVariableKind(var); + uint64_t arrayOffset = variableOffsets[var]; + uint64_t flatSize = getVariableFlatSize(var); - for (uint64_t scalarOffset = 0; scalarOffset < flatSize; ++scalarOffset) { - uint64_t offset = arrayOffset + scalarOffset; + for (uint64_t scalarOffset = 0; scalarOffset < flatSize; ++scalarOffset) { + uint64_t offset = arrayOffset + scalarOffset; - if (variableKind == VariableKind::ALGEBRAIC) { - N_VGetArrayPointer(idVector)[offset] = 0; - } else if (variableKind == VariableKind::STATE) { - N_VGetArrayPointer(idVector)[offset] = 1; - } + if (variableKind == VariableKind::ALGEBRAIC) { + N_VGetArrayPointer(idVector)[offset] = 0; + } else if (variableKind == VariableKind::STATE) { + N_VGetArrayPointer(idVector)[offset] = 1; } } + } - // Create and initialize the tolerances vector. + // Create and initialize the tolerances vector. #if SUNDIALS_VERSION_MAJOR >= 6 - tolerancesVector = N_VNew_Serial( - static_cast(scalarVariablesNumber), ctx); + tolerancesVector = + N_VNew_Serial(static_cast(scalarVariablesNumber), ctx); #else - tolerancesVector = N_VNew_Serial( - static_cast(scalarVariablesNumber)); + tolerancesVector = + N_VNew_Serial(static_cast(scalarVariablesNumber)); #endif - assert(checkAllocation( - static_cast(tolerancesVector), "N_VNew_Serial")); + assert( + checkAllocation(static_cast(tolerancesVector), "N_VNew_Serial")); - for (Variable var = 0; var < getNumOfArrayVariables(); ++var) { - VariableKind variableKind = getVariableKind(var); - uint64_t arrayOffset = variableOffsets[var]; - uint64_t flatSize = getVariableFlatSize(var); + for (Variable var = 0; var < getNumOfArrayVariables(); ++var) { + VariableKind variableKind = getVariableKind(var); + uint64_t arrayOffset = variableOffsets[var]; + uint64_t flatSize = getVariableFlatSize(var); - for (uint64_t scalarOffset = 0; scalarOffset < flatSize; ++scalarOffset) { - uint64_t offset = arrayOffset + scalarOffset; + for (uint64_t scalarOffset = 0; scalarOffset < flatSize; ++scalarOffset) { + uint64_t offset = arrayOffset + scalarOffset; - if (variableKind == VariableKind::ALGEBRAIC) { - N_VGetArrayPointer(tolerancesVector)[offset] = std::min( - getOptions().maxAlgebraicAbsoluteTolerance, - getOptions().absoluteTolerance); - } else if (variableKind == VariableKind::STATE) { - N_VGetArrayPointer(tolerancesVector)[offset] = - getOptions().absoluteTolerance; - } + if (variableKind == VariableKind::ALGEBRAIC) { + N_VGetArrayPointer(tolerancesVector)[offset] = + std::min(getOptions().maxAlgebraicAbsoluteTolerance, + getOptions().absoluteTolerance); + } else if (variableKind == VariableKind::STATE) { + N_VGetArrayPointer(tolerancesVector)[offset] = + getOptions().absoluteTolerance; } } + } - // Determine the order in which the equations must be processed when - // computing residuals and jacobians. - assert(getNumOfVectorizedEquations() == writeAccesses.size()); - equationsProcessingOrder.resize(getNumOfVectorizedEquations()); - - for (size_t i = 0, e = getNumOfVectorizedEquations(); i < e; ++i) { - equationsProcessingOrder[i] = i; - } + // Determine the order in which the equations must be processed when + // computing residuals and jacobians. + assert(getNumOfVectorizedEquations() == writeAccesses.size()); + equationsProcessingOrder.resize(getNumOfVectorizedEquations()); - if (marco::runtime::simulation::getOptions().debug) { - std::cerr << "[IDA] Equations processing order: ["; + for (size_t i = 0, e = getNumOfVectorizedEquations(); i < e; ++i) { + equationsProcessingOrder[i] = i; + } - for (size_t i = 0, e = equationsProcessingOrder.size(); i < e; ++i) { - if (i != 0) { - std::cerr << ", "; - } + if (marco::runtime::simulation::getOptions().debug) { + std::cerr << "[IDA] Equations processing order: ["; - std::cerr << equationsProcessingOrder[i]; + for (size_t i = 0, e = equationsProcessingOrder.size(); i < e; ++i) { + if (i != 0) { + std::cerr << ", "; } - std::cerr << "]" << std::endl; + std::cerr << equationsProcessingOrder[i]; } - // Check that all the residual functions have been set. - assert(residualFunctions.size() == getNumOfVectorizedEquations()); - - assert(std::all_of( - residualFunctions.begin(), residualFunctions.end(), - [](const ResidualFunction& function) { - return function != nullptr; - })); - - // Check if the IDA instance is not informed about the accesses that all - // the jacobian functions have been set. - assert(precomputedAccesses || - jacobianFunctions.size() == getNumOfVectorizedEquations()); - - assert(precomputedAccesses || - std::all_of( - jacobianFunctions.begin(), jacobianFunctions.end(), - [&](std::vector functions) { - if (functions.size() != - algebraicAndStateVariablesGetters.size()) { - return false; - } - - return std::all_of( - functions.begin(), functions.end(), - [](const JacobianFunction& function) { - return function != nullptr; - }); - })); - - // Check that all the getters and setters have been set. - assert(std::none_of( - algebraicAndStateVariablesGetters.begin(), - algebraicAndStateVariablesGetters.end(), - [](VariableGetter getter) { - return getter == nullptr; - }) && "Not all the variable getters have been set"); - - assert(std::none_of( - algebraicAndStateVariablesSetters.begin(), - algebraicAndStateVariablesSetters.end(), - [](VariableSetter setter) { - return setter == nullptr; - }) && "Not all the variable setters have been set"); - - assert(std::none_of( - derivativeVariablesGetters.begin(), - derivativeVariablesGetters.end(), - [](VariableGetter getter) { - return getter == nullptr; - }) && "Not all the derivative getters have been set"); - - assert(std::none_of( - derivativeVariablesSetters.begin(), - derivativeVariablesSetters.end(), - [](VariableSetter setter) { - return setter == nullptr; - }) && "Not all the derivative setters have been set"); - - // Reserve the space for data of the jacobian matrix. - if (marco::runtime::simulation::getOptions().debug) { - std::cerr << "[IDA] Reserving space for the data of the Jacobian matrix" - << std::endl; - } + std::cerr << "]" << std::endl; + } - jacobianMatrixData.resize(scalarEquationsNumber); + // Check that all the residual functions have been set. + assert(residualFunctions.size() == getNumOfVectorizedEquations()); - for (Equation eq : equationsProcessingOrder) { - std::vector equationIndices; - getEquationBeginIndices(eq, equationIndices); + assert(std::all_of( + residualFunctions.begin(), residualFunctions.end(), + [](const ResidualFunction &function) { return function != nullptr; })); - Variable writtenVariable = getWrittenVariable(eq); + // Check if the IDA instance is not informed about the accesses that all + // the jacobian functions have been set. + assert(precomputedAccesses || + jacobianFunctions.size() == getNumOfVectorizedEquations()); - uint64_t writtenVariableRank = getVariableRank(writtenVariable); - uint64_t writtenVariableArrayOffset = variableOffsets[writtenVariable]; + assert(precomputedAccesses || + std::all_of(jacobianFunctions.begin(), jacobianFunctions.end(), + [&](std::vector functions) { + if (functions.size() != + algebraicAndStateVariablesGetters.size()) { + return false; + } - do { - std::vector writtenVariableIndices; - writtenVariableIndices.resize(writtenVariableRank, 0); + return std::all_of(functions.begin(), functions.end(), + [](const JacobianFunction &function) { + return function != nullptr; + }); + })); - AccessFunction writeAccessFunction = getWriteAccessFunction(eq); + // Check that all the getters and setters have been set. + assert( + std::none_of(algebraicAndStateVariablesGetters.begin(), + algebraicAndStateVariablesGetters.end(), + [](VariableGetter getter) { return getter == nullptr; }) && + "Not all the variable getters have been set"); - writeAccessFunction( - equationIndices.data(), - writtenVariableIndices.data()); + assert( + std::none_of(algebraicAndStateVariablesSetters.begin(), + algebraicAndStateVariablesSetters.end(), + [](VariableSetter setter) { return setter == nullptr; }) && + "Not all the variable setters have been set"); - if (marco::runtime::simulation::getOptions().debug) { - std::cerr << " Variable indices: "; - printIndices(writtenVariableIndices); - std::cerr << std::endl; - } + assert( + std::none_of(derivativeVariablesGetters.begin(), + derivativeVariablesGetters.end(), + [](VariableGetter getter) { return getter == nullptr; }) && + "Not all the derivative getters have been set"); - uint64_t equationScalarVariableOffset = getVariableFlatIndex( - variablesDimensions[writtenVariable], - writtenVariableIndices); + assert( + std::none_of(derivativeVariablesSetters.begin(), + derivativeVariablesSetters.end(), + [](VariableSetter setter) { return setter == nullptr; }) && + "Not all the derivative setters have been set"); - uint64_t scalarEquationIndex = - writtenVariableArrayOffset + equationScalarVariableOffset; + // Reserve the space for data of the jacobian matrix. + if (marco::runtime::simulation::getOptions().debug) { + std::cerr << "[IDA] Reserving space for the data of the Jacobian matrix" + << std::endl; + } - // Compute the column indexes that may be non-zeros. - std::vector jacobianColumns = - computeJacobianColumns(eq, equationIndices.data()); + jacobianMatrixData.resize(scalarEquationsNumber); - jacobianMatrixData[scalarEquationIndex].resize(jacobianColumns.size()); + for (Equation eq : equationsProcessingOrder) { + std::vector equationIndices; + getEquationBeginIndices(eq, equationIndices); - if (marco::runtime::simulation::getOptions().debug) { - std::cerr << " - Equation " << eq << std::endl; - std::cerr << " Equation indices: "; - printIndices(equationIndices); - std::cerr << std::endl; + Variable writtenVariable = getWrittenVariable(eq); - std::cerr << " Variable indices: "; - printIndices(writtenVariableIndices); - std::cerr << std::endl; + uint64_t writtenVariableRank = getVariableRank(writtenVariable); + uint64_t writtenVariableArrayOffset = variableOffsets[writtenVariable]; - std::cerr << " Scalar equation index: " << scalarEquationIndex - << std::endl; + do { + std::vector writtenVariableIndices; + writtenVariableIndices.resize(writtenVariableRank, 0); - std::cerr << " Number of possibly non-zero columns: " - << jacobianColumns.size() << std::endl; - } - } while (advanceEquationIndices(equationIndices, equationRanges[eq])); - } + AccessFunction writeAccessFunction = getWriteAccessFunction(eq); + + writeAccessFunction(equationIndices.data(), + writtenVariableIndices.data()); + + if (marco::runtime::simulation::getOptions().debug) { + std::cerr << " Variable indices: "; + printIndices(writtenVariableIndices); + std::cerr << std::endl; + } + + uint64_t equationScalarVariableOffset = getVariableFlatIndex( + variablesDimensions[writtenVariable], writtenVariableIndices); + + uint64_t scalarEquationIndex = + writtenVariableArrayOffset + equationScalarVariableOffset; - // Compute the total amount of non-zero values in the Jacobian Matrix. - computeNNZ(); + // Compute the column indexes that may be non-zeros. + std::vector jacobianColumns = + computeJacobianColumns(eq, equationIndices.data()); - // Compute the equation chunks for each thread. - computeThreadChunks(); + jacobianMatrixData[scalarEquationIndex].resize(jacobianColumns.size()); - // Initialize the values of the variables living inside IDA. - copyVariablesFromMARCO(variablesVector, derivativesVector); + if (marco::runtime::simulation::getOptions().debug) { + std::cerr << " - Equation " << eq << std::endl; + std::cerr << " Equation indices: "; + printIndices(equationIndices); + std::cerr << std::endl; + + std::cerr << " Variable indices: "; + printIndices(writtenVariableIndices); + std::cerr << std::endl; + + std::cerr << " Scalar equation index: " << scalarEquationIndex + << std::endl; + + std::cerr << " Number of possibly non-zero columns: " + << jacobianColumns.size() << std::endl; + } + } while (advanceEquationIndices(equationIndices, equationRanges[eq])); + } - // Create and initialize the memory for IDA. + // Compute the total amount of non-zero values in the Jacobian Matrix. + computeNNZ(); + + // Compute the equation chunks for each thread. + computeThreadChunks(); + + // Initialize the values of the variables living inside IDA. + copyVariablesFromMARCO(variablesVector, derivativesVector); + + // Create and initialize the memory for IDA. #if SUNDIALS_VERSION_MAJOR >= 6 - idaMemory = IDACreate(ctx); + idaMemory = IDACreate(ctx); #else - idaMemory = IDACreate(); + idaMemory = IDACreate(); #endif - if (!checkAllocation(idaMemory, "IDACreate")) { - return false; - } + if (!checkAllocation(idaMemory, "IDACreate")) { + return false; + } - if (!idaInit()) { - return false; - } + if (!idaInit()) { + return false; + } - if (!idaSVTolerances()) { - return false; - } + if (!idaSVTolerances()) { + return false; + } - // Create sparse SUNMatrix for use in linear solver. + // Create sparse SUNMatrix for use in linear solver. #if SUNDIALS_VERSION_MAJOR >= 6 - sparseMatrix = SUNSparseMatrix( - static_cast(scalarEquationsNumber), - static_cast(scalarEquationsNumber), - static_cast(nonZeroValuesNumber), - CSR_MAT, - ctx); + sparseMatrix = SUNSparseMatrix( + static_cast(scalarEquationsNumber), + static_cast(scalarEquationsNumber), + static_cast(nonZeroValuesNumber), CSR_MAT, ctx); #else - sparseMatrix = SUNSparseMatrix( - static_cast(scalarEquationsNumber), - static_cast(scalarEquationsNumber), - static_cast(nonZeroValuesNumber), - CSR_MAT); + sparseMatrix = + SUNSparseMatrix(static_cast(scalarEquationsNumber), + static_cast(scalarEquationsNumber), + static_cast(nonZeroValuesNumber), CSR_MAT); #endif - if (!checkAllocation( - static_cast(sparseMatrix), "SUNSparseMatrix")) { - return false; - } + if (!checkAllocation(static_cast(sparseMatrix), "SUNSparseMatrix")) { + return false; + } - // Create and attach a KLU SUNLinearSolver object. + // Create and attach a KLU SUNLinearSolver object. #if SUNDIALS_VERSION_MAJOR >= 6 - linearSolver = SUNLinSol_KLU(variablesVector, sparseMatrix, ctx); + linearSolver = SUNLinSol_KLU(variablesVector, sparseMatrix, ctx); #else - linearSolver = SUNLinSol_KLU(variablesVector, sparseMatrix); + linearSolver = SUNLinSol_KLU(variablesVector, sparseMatrix); #endif - if (!checkAllocation(static_cast(linearSolver), "SUNLinSol_KLU")) { - return false; - } + if (!checkAllocation(static_cast(linearSolver), "SUNLinSol_KLU")) { + return false; + } - if (!idaSetLinearSolver()) { - return false; - } + if (!idaSetLinearSolver()) { + return false; + } - if (!idaSetUserData() || - !idaSetMaxNumSteps() || - !idaSetInitialStepSize() || - !idaSetMinStepSize() || - !idaSetMaxStepSize() || - !idaSetStopTime() || - !idaSetMaxErrTestFails() || - !idaSetSuppressAlg() || - !idaSetId() || - !idaSetJacobianFunction() || - !idaSetMaxNonlinIters() || - !idaSetMaxConvFails() || - !idaSetNonlinConvCoef() || - !idaSetNonlinConvCoefIC() || - !idaSetMaxNumStepsIC() || - !idaSetMaxNumJacsIC() || - !idaSetMaxNumItersIC() || - !idaSetLineSearchOffIC()) { - return false; - } + if (!idaSetUserData() || !idaSetMaxNumSteps() || !idaSetInitialStepSize() || + !idaSetMinStepSize() || !idaSetMaxStepSize() || !idaSetStopTime() || + !idaSetMaxErrTestFails() || !idaSetSuppressAlg() || !idaSetId() || + !idaSetJacobianFunction() || !idaSetMaxNonlinIters() || + !idaSetMaxConvFails() || !idaSetNonlinConvCoef() || + !idaSetNonlinConvCoefIC() || !idaSetMaxNumStepsIC() || + !idaSetMaxNumJacsIC() || !idaSetMaxNumItersIC() || + !idaSetLineSearchOffIC()) { + return false; + } - initialized = true; + initialized = true; + + if (marco::runtime::simulation::getOptions().debug) { + std::cerr << "[IDA] Initialization completed" << std::endl; + } + + return true; +} - if (marco::runtime::simulation::getOptions().debug) { - std::cerr << "[IDA] Initialization completed" << std::endl; +bool IDAInstance::calcIC() { + if (!initialized) { + if (!initialize()) { + return false; } + } + if (getNumOfScalarEquations() == 0) { + // IDA has nothing to solve return true; } - bool IDAInstance::calcIC() - { - if (!initialized) { - if (!initialize()) { - return false; - } - } - - if (getNumOfScalarEquations() == 0) { - // IDA has nothing to solve - return true; - } + realtype firstOutTime = + (endTime - startTime) / getOptions().timeScalingFactorInit; - realtype firstOutTime = (endTime - startTime) / - getOptions().timeScalingFactorInit; - - IDA_PROFILER_IC_START; - auto calcICRetVal = IDACalcIC(idaMemory, IDA_YA_YDP_INIT, firstOutTime); - IDA_PROFILER_IC_STOP; - - if (calcICRetVal != IDA_SUCCESS) { - if (calcICRetVal == IDALS_MEM_NULL) { - std::cerr << "IDACalcIC - The ida_mem pointer is NULL" << std::endl; - } else if (calcICRetVal == IDA_NO_MALLOC) { - std::cerr << "IDACalcIC - The allocation function IDAInit has not been called" << std::endl; - } else if (calcICRetVal == IDA_ILL_INPUT) { - std::cerr << "IDACalcIC - One of the input arguments was illegal" << std::endl; - } else if (calcICRetVal == IDA_LSETUP_FAIL) { - std::cerr << "IDACalcIC - The linear solver’s setup function failed in an unrecoverable manner" << std::endl; - } else if (calcICRetVal == IDA_LINIT_FAIL) { - std::cerr << "IDACalcIC - The linear solver’s initialization function failed" << std::endl; - } else if (calcICRetVal == IDA_LSOLVE_FAIL) { - std::cerr << "IDACalcIC - The linear solver’s solve function failed in an unrecoverable manner" << std::endl; - } else if (calcICRetVal == IDA_BAD_EWT) { - std::cerr << "IDACalcIC - Some component of the error weight vector is zero (illegal), either for the input value of y0 or a corrected value" << std::endl; - } else if (calcICRetVal == IDA_FIRST_RES_FAIL) { - std::cerr << "IDACalcIC - The user’s residual function returned a recoverable error flag on the first call, but IDACalcIC was unable to recover" << std::endl; - } else if (calcICRetVal == IDA_RES_FAIL) { - std::cerr << "IDACalcIC - The user’s residual function returned a nonrecoverable error flag" << std::endl; - } else if (calcICRetVal == IDA_NO_RECOVERY) { - std::cerr << "IDACalcIC - The user’s residual function, or the linear solver’s setup or solve function had a recoverable error, but IDACalcIC was unable to recover" << std::endl; - } else if (calcICRetVal == IDA_CONSTR_FAIL) { - std::cerr << "IDACalcIC - IDACalcIC was unable to find a solution satisfying the inequality constraints" << std::endl; - } else if (calcICRetVal == IDA_LINESEARCH_FAIL) { - std::cerr << "IDACalcIC - The linesearch algorithm failed to find a solution with a step larger than steptol in weighted RMS norm, and within the allowed number of backtracks" << std::endl; - } else if (calcICRetVal == IDA_CONV_FAIL) { - std::cerr << "IDACalcIC - IDACalcIC failed to get convergence of the Newton iterations" << std::endl; - } + IDA_PROFILER_IC_START; + auto calcICRetVal = IDACalcIC(idaMemory, IDA_YA_YDP_INIT, firstOutTime); + IDA_PROFILER_IC_STOP; - return false; + if (calcICRetVal != IDA_SUCCESS) { + if (calcICRetVal == IDALS_MEM_NULL) { + std::cerr << "IDACalcIC - The ida_mem pointer is NULL" << std::endl; + } else if (calcICRetVal == IDA_NO_MALLOC) { + std::cerr + << "IDACalcIC - The allocation function IDAInit has not been called" + << std::endl; + } else if (calcICRetVal == IDA_ILL_INPUT) { + std::cerr << "IDACalcIC - One of the input arguments was illegal" + << std::endl; + } else if (calcICRetVal == IDA_LSETUP_FAIL) { + std::cerr << "IDACalcIC - The linear solver’s setup function failed in " + "an unrecoverable manner" + << std::endl; + } else if (calcICRetVal == IDA_LINIT_FAIL) { + std::cerr + << "IDACalcIC - The linear solver’s initialization function failed" + << std::endl; + } else if (calcICRetVal == IDA_LSOLVE_FAIL) { + std::cerr << "IDACalcIC - The linear solver’s solve function failed in " + "an unrecoverable manner" + << std::endl; + } else if (calcICRetVal == IDA_BAD_EWT) { + std::cerr + << "IDACalcIC - Some component of the error weight vector is zero " + "(illegal), either for the input value of y0 or a corrected value" + << std::endl; + } else if (calcICRetVal == IDA_FIRST_RES_FAIL) { + std::cerr + << "IDACalcIC - The user’s residual function returned a recoverable " + "error flag on the first call, but IDACalcIC was unable to recover" + << std::endl; + } else if (calcICRetVal == IDA_RES_FAIL) { + std::cerr << "IDACalcIC - The user’s residual function returned a " + "nonrecoverable error flag" + << std::endl; + } else if (calcICRetVal == IDA_NO_RECOVERY) { + std::cerr << "IDACalcIC - The user’s residual function, or the linear " + "solver’s setup or solve function had a recoverable error, " + "but IDACalcIC was unable to recover" + << std::endl; + } else if (calcICRetVal == IDA_CONSTR_FAIL) { + std::cerr << "IDACalcIC - IDACalcIC was unable to find a solution " + "satisfying the inequality constraints" + << std::endl; + } else if (calcICRetVal == IDA_LINESEARCH_FAIL) { + std::cerr << "IDACalcIC - The linesearch algorithm failed to find a " + "solution with a step larger than steptol in weighted RMS " + "norm, and within the allowed number of backtracks" + << std::endl; + } else if (calcICRetVal == IDA_CONV_FAIL) { + std::cerr << "IDACalcIC - IDACalcIC failed to get convergence of the " + "Newton iterations" + << std::endl; } - auto getConsistentIcRetVal = - IDAGetConsistentIC(idaMemory, variablesVector, derivativesVector); + return false; + } - if (getConsistentIcRetVal != IDA_SUCCESS) { - if (getConsistentIcRetVal == IDA_ILL_INPUT) { - std::cerr << "IDAGetConsistentIC - Called before the first IDASolve" << std::endl; - } else if (getConsistentIcRetVal == IDA_MEM_NULL) { - std::cerr << "IDAGetConsistentIC - The ida_mem pointer is NULL" << std::endl; - } + auto getConsistentIcRetVal = + IDAGetConsistentIC(idaMemory, variablesVector, derivativesVector); - return false; + if (getConsistentIcRetVal != IDA_SUCCESS) { + if (getConsistentIcRetVal == IDA_ILL_INPUT) { + std::cerr << "IDAGetConsistentIC - Called before the first IDASolve" + << std::endl; + } else if (getConsistentIcRetVal == IDA_MEM_NULL) { + std::cerr << "IDAGetConsistentIC - The ida_mem pointer is NULL" + << std::endl; } - copyVariablesIntoMARCO(variablesVector, derivativesVector); - return true; + return false; } - bool IDAInstance::step() - { - if (!initialized) { - if (!initialize()) { - return false; - } - } + copyVariablesIntoMARCO(variablesVector, derivativesVector); + return true; +} - if (getNumOfScalarEquations() == 0) { - // IDA has nothing to solve. Just increment the time. +bool IDAInstance::step() { + if (!initialized) { + if (!initialize()) { + return false; + } + } - if (getOptions().equidistantTimeGrid) { - currentTime += timeStep; - } else { - currentTime = endTime; - } + if (getNumOfScalarEquations() == 0) { + // IDA has nothing to solve. Just increment the time. - return true; + if (getOptions().equidistantTimeGrid) { + currentTime += timeStep; + } else { + currentTime = endTime; } - // Execute one step. - IDA_PROFILER_STEPS_COUNTER_INCREMENT; - IDA_PROFILER_STEP_START; - - realtype tout = getOptions().equidistantTimeGrid ? (currentTime + timeStep) : endTime; + return true; + } - auto solveRetVal = IDASolve( - idaMemory, - tout, - ¤tTime, - variablesVector, - derivativesVector, - getOptions().equidistantTimeGrid ? IDA_NORMAL : IDA_ONE_STEP); + // Execute one step. + IDA_PROFILER_STEPS_COUNTER_INCREMENT; + IDA_PROFILER_STEP_START; - IDA_PROFILER_STEP_STOP; + realtype tout = + getOptions().equidistantTimeGrid ? (currentTime + timeStep) : endTime; - if (solveRetVal != IDA_SUCCESS) { - if (solveRetVal == IDA_TSTOP_RETURN) { - return true; - } + auto solveRetVal = IDASolve( + idaMemory, tout, ¤tTime, variablesVector, derivativesVector, + getOptions().equidistantTimeGrid ? IDA_NORMAL : IDA_ONE_STEP); - if (solveRetVal == IDA_ROOT_RETURN) { - return true; - } + IDA_PROFILER_STEP_STOP; - if (solveRetVal == IDA_MEM_NULL) { - std::cerr << "IDASolve - The ida_mem pointer is NULL" << std::endl; - } else if (solveRetVal == IDA_ILL_INPUT) { - std::cerr << "IDASolve - One of the inputs to IDASolve was illegal, or some other input to the solver was either illegal or missing" << std::endl; - } else if (solveRetVal == IDA_TOO_MUCH_WORK) { - std::cerr << "IDASolve - The solver took mxstep internal steps but could not reach tout" << std::endl; - } else if (solveRetVal == IDA_TOO_MUCH_ACC) { - std::cerr << "IDASolve - The solver could not satisfy the accuracy demanded by the user for some internal step" << std::endl; - } else if (solveRetVal == IDA_ERR_FAIL) { - std::cerr << "IDASolve - Error test failures occurred too many times during one internal time step or occurred with |h| = hmin" << std::endl; - } else if (solveRetVal == IDA_CONV_FAIL) { - std::cerr << "IDASolve - Convergence test failures occurred too many times during one internal time step or occurred with |h| = hmin" << std::endl; - } else if (solveRetVal == IDA_LINIT_FAIL) { - std::cerr << "IDASolve - The linear solver’s initialization function failed" << std::endl; - } else if (solveRetVal == IDA_LSETUP_FAIL) { - std::cerr << "IDASolve - The linear solver’s setup function failed in an unrecoverable manner" << std::endl; - } else if (solveRetVal == IDA_LSOLVE_FAIL) { - std::cerr << "IDASolve - The linear solver’s solve function failed in an unrecoverable manner" << std::endl; - } else if (solveRetVal == IDA_CONSTR_FAIL) { - std::cerr << "IDASolve - The inequality constraints were violated and the solver was unable to recover" << std::endl; - } else if (solveRetVal == IDA_REP_RES_ERR) { - std::cerr << "IDASolve - The user’s residual function repeatedly returned a recoverable error flag, but the solver was unable to recover" << std::endl; - } else if (solveRetVal == IDA_RES_FAIL) { - std::cerr << "IDASolve - The user’s residual function returned a nonrecoverable error flag" << std::endl; - } else if (solveRetVal == IDA_RTFUNC_FAIL) { - std::cerr << "IDASolve - The rootfinding function failed" << std::endl; - } + if (solveRetVal != IDA_SUCCESS) { + if (solveRetVal == IDA_TSTOP_RETURN) { + return true; + } - return false; + if (solveRetVal == IDA_ROOT_RETURN) { + return true; } - copyVariablesIntoMARCO(variablesVector, derivativesVector); + if (solveRetVal == IDA_MEM_NULL) { + std::cerr << "IDASolve - The ida_mem pointer is NULL" << std::endl; + } else if (solveRetVal == IDA_ILL_INPUT) { + std::cerr + << "IDASolve - One of the inputs to IDASolve was illegal, or some " + "other input to the solver was either illegal or missing" + << std::endl; + } else if (solveRetVal == IDA_TOO_MUCH_WORK) { + std::cerr << "IDASolve - The solver took mxstep internal steps but could " + "not reach tout" + << std::endl; + } else if (solveRetVal == IDA_TOO_MUCH_ACC) { + std::cerr << "IDASolve - The solver could not satisfy the accuracy " + "demanded by the user for some internal step" + << std::endl; + } else if (solveRetVal == IDA_ERR_FAIL) { + std::cerr << "IDASolve - Error test failures occurred too many times " + "during one internal time step or occurred with |h| = hmin" + << std::endl; + } else if (solveRetVal == IDA_CONV_FAIL) { + std::cerr + << "IDASolve - Convergence test failures occurred too many times " + "during one internal time step or occurred with |h| = hmin" + << std::endl; + } else if (solveRetVal == IDA_LINIT_FAIL) { + std::cerr + << "IDASolve - The linear solver’s initialization function failed" + << std::endl; + } else if (solveRetVal == IDA_LSETUP_FAIL) { + std::cerr << "IDASolve - The linear solver’s setup function failed in an " + "unrecoverable manner" + << std::endl; + } else if (solveRetVal == IDA_LSOLVE_FAIL) { + std::cerr << "IDASolve - The linear solver’s solve function failed in an " + "unrecoverable manner" + << std::endl; + } else if (solveRetVal == IDA_CONSTR_FAIL) { + std::cerr << "IDASolve - The inequality constraints were violated and " + "the solver was unable to recover" + << std::endl; + } else if (solveRetVal == IDA_REP_RES_ERR) { + std::cerr + << "IDASolve - The user’s residual function repeatedly returned a " + "recoverable error flag, but the solver was unable to recover" + << std::endl; + } else if (solveRetVal == IDA_RES_FAIL) { + std::cerr << "IDASolve - The user’s residual function returned a " + "nonrecoverable error flag" + << std::endl; + } else if (solveRetVal == IDA_RTFUNC_FAIL) { + std::cerr << "IDASolve - The rootfinding function failed" << std::endl; + } - return true; + return false; } - realtype IDAInstance::getCurrentTime() const - { - return currentTime; - } + copyVariablesIntoMARCO(variablesVector, derivativesVector); - int IDAInstance::residualFunction( - realtype time, - N_Vector variables, - N_Vector derivatives, - N_Vector residuals, - void* userData) - { - IDA_PROFILER_RESIDUALS_CALL_COUNTER_INCREMENT; + return true; +} - realtype* rval = N_VGetArrayPointer(residuals); - auto* instance = static_cast(userData); +realtype IDAInstance::getCurrentTime() const { return currentTime; } - // Copy the values of the variables and derivatives provided by IDA into - // the variables owned by MARCO, so that the residual functions operate on - // the current iteration values. - instance->copyVariablesIntoMARCO(variables, derivatives); +int IDAInstance::residualFunction(realtype time, N_Vector variables, + N_Vector derivatives, N_Vector residuals, + void *userData) { + IDA_PROFILER_RESIDUALS_CALL_COUNTER_INCREMENT; - // For every vectorized equation, set the residual values of the variables - // it writes into. - IDA_PROFILER_RESIDUALS_START; + realtype *rval = N_VGetArrayPointer(residuals); + auto *instance = static_cast(userData); - instance->equationsParallelIteration( - [&](Equation eq, const std::vector& equationIndices) { - uint64_t equationRank = instance->getEquationRank(eq); - assert(equationIndices.size() == equationRank); + // Copy the values of the variables and derivatives provided by IDA into + // the variables owned by MARCO, so that the residual functions operate on + // the current iteration values. + instance->copyVariablesIntoMARCO(variables, derivatives); - Variable writtenVariable = instance->getWrittenVariable(eq); + // For every vectorized equation, set the residual values of the variables + // it writes into. + IDA_PROFILER_RESIDUALS_START; - uint64_t writtenVariableArrayOffset = - instance->variableOffsets[writtenVariable]; + instance->equationsParallelIteration( + [&](Equation eq, const std::vector &equationIndices) { + uint64_t equationRank = instance->getEquationRank(eq); + assert(equationIndices.size() == equationRank); - uint64_t writtenVariableRank = - instance->getVariableRank(writtenVariable); + Variable writtenVariable = instance->getWrittenVariable(eq); - std::vector writtenVariableIndices(writtenVariableRank, 0); + uint64_t writtenVariableArrayOffset = + instance->variableOffsets[writtenVariable]; - AccessFunction writeAccessFunction = - instance->getWriteAccessFunction(eq); + uint64_t writtenVariableRank = + instance->getVariableRank(writtenVariable); - writeAccessFunction( - equationIndices.data(), - writtenVariableIndices.data()); + std::vector writtenVariableIndices(writtenVariableRank, 0); - uint64_t writtenVariableScalarOffset = getVariableFlatIndex( - instance->variablesDimensions[writtenVariable], - writtenVariableIndices); + AccessFunction writeAccessFunction = + instance->getWriteAccessFunction(eq); - uint64_t offset = - writtenVariableArrayOffset + writtenVariableScalarOffset; + writeAccessFunction(equationIndices.data(), + writtenVariableIndices.data()); - auto residualFn = instance->residualFunctions[eq]; - auto* eqIndicesPtr = equationIndices.data(); + uint64_t writtenVariableScalarOffset = + getVariableFlatIndex(instance->variablesDimensions[writtenVariable], + writtenVariableIndices); - auto residualFunctionResult = residualFn(time, eqIndicesPtr); - *(rval + offset) = residualFunctionResult; - }); + uint64_t offset = + writtenVariableArrayOffset + writtenVariableScalarOffset; - IDA_PROFILER_RESIDUALS_STOP; + auto residualFn = instance->residualFunctions[eq]; + auto *eqIndicesPtr = equationIndices.data(); - if (marco::runtime::simulation::getOptions().debug) { - std::cerr << "[IDA] Residuals function called" << std::endl; - std::cerr << "Variables:" << std::endl; - instance->printVariablesVector(variables); - std::cerr << "Derivatives:" << std::endl; - instance->printDerivativesVector(derivatives); - std::cerr << "Residuals vector:" << std::endl; - instance->printResidualsVector(residuals); - } + auto residualFunctionResult = residualFn(time, eqIndicesPtr); + *(rval + offset) = residualFunctionResult; + }); + + IDA_PROFILER_RESIDUALS_STOP; - return IDA_SUCCESS; + if (marco::runtime::simulation::getOptions().debug) { + std::cerr << "[IDA] Residuals function called" << std::endl; + std::cerr << "Variables:" << std::endl; + instance->printVariablesVector(variables); + std::cerr << "Derivatives:" << std::endl; + instance->printDerivativesVector(derivatives); + std::cerr << "Residuals vector:" << std::endl; + instance->printResidualsVector(residuals); } - int IDAInstance::jacobianMatrix( - realtype time, realtype alpha, - N_Vector variables, N_Vector derivatives, N_Vector residuals, - SUNMatrix jacobianMatrix, - void* userData, - N_Vector tempv1, N_Vector tempv2, N_Vector tempv3) - { - IDA_PROFILER_PARTIAL_DERIVATIVES_CALL_COUNTER_INCREMENT; + return IDA_SUCCESS; +} - realtype* jacobian = SUNSparseMatrix_Data(jacobianMatrix); - auto* instance = static_cast(userData); +int IDAInstance::jacobianMatrix(realtype time, realtype alpha, + N_Vector variables, N_Vector derivatives, + N_Vector residuals, SUNMatrix jacobianMatrix, + void *userData, N_Vector tempv1, + N_Vector tempv2, N_Vector tempv3) { + IDA_PROFILER_PARTIAL_DERIVATIVES_CALL_COUNTER_INCREMENT; - // Copy the values of the variables and derivatives provided by IDA into - // the variables owned by MARCO, so that the jacobian functions operate on - // the current iteration values. - instance->copyVariablesIntoMARCO(variables, derivatives); + realtype *jacobian = SUNSparseMatrix_Data(jacobianMatrix); + auto *instance = static_cast(userData); - // For every vectorized equation, compute its row within the Jacobian - // matrix. - IDA_PROFILER_PARTIAL_DERIVATIVES_START; + // Copy the values of the variables and derivatives provided by IDA into + // the variables owned by MARCO, so that the jacobian functions operate on + // the current iteration values. + instance->copyVariablesIntoMARCO(variables, derivatives); - instance->equationsParallelIteration( - [&](Equation eq, const std::vector& equationIndices) { - Variable writtenVariable = instance->getWrittenVariable(eq); + // For every vectorized equation, compute its row within the Jacobian + // matrix. + IDA_PROFILER_PARTIAL_DERIVATIVES_START; - uint64_t writtenVariableArrayOffset = - instance->variableOffsets[writtenVariable]; + instance->equationsParallelIteration( + [&](Equation eq, const std::vector &equationIndices) { + Variable writtenVariable = instance->getWrittenVariable(eq); - uint64_t writtenVariableRank = - instance->getVariableRank(writtenVariable); + uint64_t writtenVariableArrayOffset = + instance->variableOffsets[writtenVariable]; - std::vector writtenVariableIndices; - writtenVariableIndices.resize(writtenVariableRank, 0); + uint64_t writtenVariableRank = + instance->getVariableRank(writtenVariable); - AccessFunction writeAccessFunction = - instance->getWriteAccessFunction(eq); + std::vector writtenVariableIndices; + writtenVariableIndices.resize(writtenVariableRank, 0); - writeAccessFunction( - equationIndices.data(), - writtenVariableIndices.data()); + AccessFunction writeAccessFunction = + instance->getWriteAccessFunction(eq); - uint64_t writtenVariableScalarOffset = getVariableFlatIndex( - instance->variablesDimensions[writtenVariable], - writtenVariableIndices); + writeAccessFunction(equationIndices.data(), + writtenVariableIndices.data()); - uint64_t scalarEquationIndex = - writtenVariableArrayOffset + writtenVariableScalarOffset; + uint64_t writtenVariableScalarOffset = + getVariableFlatIndex(instance->variablesDimensions[writtenVariable], + writtenVariableIndices); - assert(scalarEquationIndex < instance->getNumOfScalarEquations()); + uint64_t scalarEquationIndex = + writtenVariableArrayOffset + writtenVariableScalarOffset; - // Compute the column indexes that may be non-zeros. - std::vector jacobianColumns = - instance->computeJacobianColumns(eq, equationIndices.data()); + assert(scalarEquationIndex < instance->getNumOfScalarEquations()); - // For every scalar variable with respect to which the equation must be - // partially differentiated. - for (size_t i = 0, e = jacobianColumns.size(); i < e; ++i) { - const JacobianColumn& column = jacobianColumns[i]; - Variable variable = column.first; - const auto& variableIndices = column.second; + // Compute the column indexes that may be non-zeros. + std::vector jacobianColumns = + instance->computeJacobianColumns(eq, equationIndices.data()); - uint64_t variableArrayOffset = instance->variableOffsets[variable]; + // For every scalar variable with respect to which the equation must be + // partially differentiated. + for (size_t i = 0, e = jacobianColumns.size(); i < e; ++i) { + const JacobianColumn &column = jacobianColumns[i]; + Variable variable = column.first; + const auto &variableIndices = column.second; - uint64_t variableScalarOffset = getVariableFlatIndex( - instance->variablesDimensions[variable], - column.second); + uint64_t variableArrayOffset = instance->variableOffsets[variable]; - assert(instance->jacobianFunctions[eq][variable] != nullptr); + uint64_t variableScalarOffset = getVariableFlatIndex( + instance->variablesDimensions[variable], column.second); - auto jacobianFunctionResult = - instance->jacobianFunctions[eq][variable]( - time, - equationIndices.data(), - variableIndices.data(), - alpha); + assert(instance->jacobianFunctions[eq][variable] != nullptr); - instance->jacobianMatrixData[scalarEquationIndex][i].second = - jacobianFunctionResult; + auto jacobianFunctionResult = + instance->jacobianFunctions[eq][variable]( + time, equationIndices.data(), variableIndices.data(), alpha); - auto index = static_cast( - variableArrayOffset + variableScalarOffset); + instance->jacobianMatrixData[scalarEquationIndex][i].second = + jacobianFunctionResult; - instance->jacobianMatrixData[scalarEquationIndex][i].first = - index; - } - }); + auto index = static_cast(variableArrayOffset + + variableScalarOffset); - sunindextype* rowPtrs = SUNSparseMatrix_IndexPointers(jacobianMatrix); - sunindextype* columnIndices = SUNSparseMatrix_IndexValues(jacobianMatrix); + instance->jacobianMatrixData[scalarEquationIndex][i].first = index; + } + }); - sunindextype offset = 0; - *rowPtrs++ = offset; + sunindextype *rowPtrs = SUNSparseMatrix_IndexPointers(jacobianMatrix); + sunindextype *columnIndices = SUNSparseMatrix_IndexValues(jacobianMatrix); - for (const auto& row : instance->jacobianMatrixData) { - offset += static_cast(row.size()); - *rowPtrs++ = offset; + sunindextype offset = 0; + *rowPtrs++ = offset; - for (const auto& column : row) { - *columnIndices++ = column.first; - *jacobian++ = column.second; - } - } + for (const auto &row : instance->jacobianMatrixData) { + offset += static_cast(row.size()); + *rowPtrs++ = offset; - assert(rowPtrs == SUNSparseMatrix_IndexPointers(jacobianMatrix) + instance->getNumOfScalarEquations() + 1); - assert(columnIndices == SUNSparseMatrix_IndexValues(jacobianMatrix) + instance->nonZeroValuesNumber); - assert(jacobian == SUNSparseMatrix_Data(jacobianMatrix) + instance->nonZeroValuesNumber); - - IDA_PROFILER_PARTIAL_DERIVATIVES_STOP; - - if (marco::runtime::simulation::getOptions().debug) { - std::cerr << "[IDA] Jacobian matrix function called" << std::endl; - std::cerr << "Time: " << time << std::endl; - std::cerr << "Alpha: " << alpha << std::endl; - std::cerr << "Variables:" << std::endl; - instance->printVariablesVector(variables); - std::cerr << "Derivatives:" << std::endl; - instance->printDerivativesVector(derivatives); - std::cerr << "Residuals vector:" << std::endl; - instance->printResidualsVector(residuals); - std::cerr << "Jacobian matrix:" << std::endl; - instance->printJacobianMatrix(jacobianMatrix); + for (const auto &column : row) { + *columnIndices++ = column.first; + *jacobian++ = column.second; } - - return IDA_SUCCESS; } - uint64_t IDAInstance::getNumOfArrayVariables() const - { - return variablesDimensions.size(); - } + assert(rowPtrs == SUNSparseMatrix_IndexPointers(jacobianMatrix) + + instance->getNumOfScalarEquations() + 1); + assert(columnIndices == SUNSparseMatrix_IndexValues(jacobianMatrix) + + instance->nonZeroValuesNumber); + assert(jacobian == + SUNSparseMatrix_Data(jacobianMatrix) + instance->nonZeroValuesNumber); - uint64_t IDAInstance::getNumOfScalarVariables() const - { - return scalarVariablesNumber; - } + IDA_PROFILER_PARTIAL_DERIVATIVES_STOP; - VariableKind IDAInstance::getVariableKind(Variable variable) const - { - auto it = stateVariablesMapping.find(variable); - auto endIt = stateVariablesMapping.end(); - return it == endIt ? VariableKind::ALGEBRAIC : VariableKind::STATE; + if (marco::runtime::simulation::getOptions().debug) { + std::cerr << "[IDA] Jacobian matrix function called" << std::endl; + std::cerr << "Time: " << time << std::endl; + std::cerr << "Alpha: " << alpha << std::endl; + std::cerr << "Variables:" << std::endl; + instance->printVariablesVector(variables); + std::cerr << "Derivatives:" << std::endl; + instance->printDerivativesVector(derivatives); + std::cerr << "Residuals vector:" << std::endl; + instance->printResidualsVector(residuals); + std::cerr << "Jacobian matrix:" << std::endl; + instance->printJacobianMatrix(jacobianMatrix); } - uint64_t IDAInstance::getVariableFlatSize(Variable variable) const - { - uint64_t result = 1; + return IDA_SUCCESS; +} - for (uint64_t dimension : variablesDimensions[variable]) { - result *= dimension; - } +uint64_t IDAInstance::getNumOfArrayVariables() const { + return variablesDimensions.size(); +} - return result; - } +uint64_t IDAInstance::getNumOfScalarVariables() const { + return scalarVariablesNumber; +} - uint64_t IDAInstance::getNumOfVectorizedEquations() const - { - return equationRanges.size(); - } +VariableKind IDAInstance::getVariableKind(Variable variable) const { + auto it = stateVariablesMapping.find(variable); + auto endIt = stateVariablesMapping.end(); + return it == endIt ? VariableKind::ALGEBRAIC : VariableKind::STATE; +} - uint64_t IDAInstance::getNumOfScalarEquations() const - { - return scalarEquationsNumber; - } +uint64_t IDAInstance::getVariableFlatSize(Variable variable) const { + uint64_t result = 1; - uint64_t IDAInstance::getEquationRank(Equation equation) const - { - return equationRanges[equation].size(); + for (uint64_t dimension : variablesDimensions[variable]) { + result *= dimension; } - uint64_t IDAInstance::getEquationFlatSize(Equation equation) const - { - assert(equation < getNumOfVectorizedEquations()); - uint64_t result = 1; + return result; +} - for (const Range& range : equationRanges[equation]) { - result *= range.end - range.begin; - } +uint64_t IDAInstance::getNumOfVectorizedEquations() const { + return equationRanges.size(); +} - return result; - } +uint64_t IDAInstance::getNumOfScalarEquations() const { + return scalarEquationsNumber; +} - Variable IDAInstance::getWrittenVariable(Equation equation) const - { - return writeAccesses[equation].first; - } +uint64_t IDAInstance::getEquationRank(Equation equation) const { + return equationRanges[equation].size(); +} - AccessFunction IDAInstance::getWriteAccessFunction(Equation equation) const - { - return writeAccesses[equation].second; - } +uint64_t IDAInstance::getEquationFlatSize(Equation equation) const { + assert(equation < getNumOfVectorizedEquations()); + uint64_t result = 1; - uint64_t IDAInstance::getVariableRank(Variable variable) const - { - return variablesDimensions[variable].rank(); + for (const Range &range : equationRanges[equation]) { + result *= range.end - range.begin; } - /// Determine which of the columns of the current Jacobian row has to be - /// populated, and with respect to which variable the partial derivative has - /// to be performed. The row is determined by the indices of the equation. - std::vector IDAInstance::computeJacobianColumns( - Equation eq, const int64_t* equationIndices) const - { - std::set uniqueColumns; + return result; +} - if (precomputedAccesses) { - for (const auto& access : variableAccesses[eq]) { - Variable variable = access.first; - AccessFunction accessFunction = access.second; +Variable IDAInstance::getWrittenVariable(Equation equation) const { + return writeAccesses[equation].first; +} - uint64_t variableRank = getVariableRank(variable); +AccessFunction IDAInstance::getWriteAccessFunction(Equation equation) const { + return writeAccesses[equation].second; +} - std::vector variableIndices; - variableIndices.resize(variableRank, 0); - accessFunction(equationIndices, variableIndices.data()); +uint64_t IDAInstance::getVariableRank(Variable variable) const { + return variablesDimensions[variable].rank(); +} - assert([&]() -> bool { - for (uint64_t i = 0; i < variableRank; ++i) { - if (variableIndices[i] >= variablesDimensions[variable][i]) { - return false; - } - } +/// Determine which of the columns of the current Jacobian row has to be +/// populated, and with respect to which variable the partial derivative has +/// to be performed. The row is determined by the indices of the equation. +std::vector +IDAInstance::computeJacobianColumns(Equation eq, + const int64_t *equationIndices) const { + std::set uniqueColumns; - return true; - }() && "Access out of bounds"); + if (precomputedAccesses) { + for (const auto &access : variableAccesses[eq]) { + Variable variable = access.first; + AccessFunction accessFunction = access.second; - uniqueColumns.insert({variable, variableIndices}); - } - } else { - for (size_t variableIndex = 0, e = getNumOfArrayVariables(); - variableIndex < e; ++variableIndex) { - const auto& dimensions = variablesDimensions[variableIndex]; + uint64_t variableRank = getVariableRank(variable); - for (auto indices = dimensions.indicesBegin(), - end = dimensions.indicesEnd(); - indices != end; ++indices) { - JacobianColumn column(variableIndex, {}); + std::vector variableIndices; + variableIndices.resize(variableRank, 0); + accessFunction(equationIndices, variableIndices.data()); - for (size_t dim = 0; dim < dimensions.rank(); ++dim) { - column.second.push_back((*indices)[dim]); + assert([&]() -> bool { + for (uint64_t i = 0; i < variableRank; ++i) { + if (variableIndices[i] >= variablesDimensions[variable][i]) { + return false; } - - uniqueColumns.insert(std::move(column)); } - } - } - std::vector orderedColumns; + return true; + }() && "Access out of bounds"); - for (const JacobianColumn& column : uniqueColumns) { - orderedColumns.push_back(column); + uniqueColumns.insert({variable, variableIndices}); } + } else { + for (size_t variableIndex = 0, e = getNumOfArrayVariables(); + variableIndex < e; ++variableIndex) { + const auto &dimensions = variablesDimensions[variableIndex]; - std::sort(orderedColumns.begin(), orderedColumns.end(), - [](const JacobianColumn& first, const JacobianColumn& second) { - if (first.first != second.first) { - return first.first < second.first; - } + for (auto indices = dimensions.indicesBegin(), + end = dimensions.indicesEnd(); + indices != end; ++indices) { + JacobianColumn column(variableIndex, {}); - assert(first.second.size() == second.second.size()); + for (size_t dim = 0; dim < dimensions.rank(); ++dim) { + column.second.push_back((*indices)[dim]); + } - for (size_t i = 0, e = first.second.size(); i < e; ++i) { - if (first.second[i] < second.second[i]) { - return true; - } - } + uniqueColumns.insert(std::move(column)); + } + } + } - return false; - }); + std::vector orderedColumns; - return orderedColumns; + for (const JacobianColumn &column : uniqueColumns) { + orderedColumns.push_back(column); } - /// Compute the number of non-zero values in the Jacobian Matrix. Also - /// compute the column indexes of all non-zero values in the Jacobian Matrix. - /// This allows to avoid the recomputation of such indexes during the - /// Jacobian evaluation. - void IDAInstance::computeNNZ() - { - nonZeroValuesNumber = 0; - std::vector equationIndices; + std::sort(orderedColumns.begin(), orderedColumns.end(), + [](const JacobianColumn &first, const JacobianColumn &second) { + if (first.first != second.first) { + return first.first < second.first; + } - for (size_t eq = 0; eq < getNumOfVectorizedEquations(); ++eq) { - // Initialize the multidimensional interval of the vector equation. - uint64_t equationRank = equationRanges[eq].size(); - equationIndices.resize(equationRank); + assert(first.second.size() == second.second.size()); - for (size_t i = 0; i < equationRank; ++i) { - const auto& iterationRange = equationRanges[eq][i]; - int64_t beginIndex = iterationRange.begin; - equationIndices[i] = beginIndex; - } + for (size_t i = 0, e = first.second.size(); i < e; ++i) { + if (first.second[i] < second.second[i]) { + return true; + } + } - // For every scalar equation in the vector equation. - do { - // Compute the column indexes that may be non-zeros - nonZeroValuesNumber += - computeJacobianColumns(eq, equationIndices.data()).size(); + return false; + }); - } while (advanceEquationIndices(equationIndices, equationRanges[eq])); + return orderedColumns; +} + +/// Compute the number of non-zero values in the Jacobian Matrix. Also +/// compute the column indexes of all non-zero values in the Jacobian Matrix. +/// This allows to avoid the recomputation of such indexes during the +/// Jacobian evaluation. +void IDAInstance::computeNNZ() { + nonZeroValuesNumber = 0; + std::vector equationIndices; + + for (size_t eq = 0; eq < getNumOfVectorizedEquations(); ++eq) { + // Initialize the multidimensional interval of the vector equation. + uint64_t equationRank = equationRanges[eq].size(); + equationIndices.resize(equationRank); + + for (size_t i = 0; i < equationRank; ++i) { + const auto &iterationRange = equationRanges[eq][i]; + int64_t beginIndex = iterationRange.begin; + equationIndices[i] = beginIndex; } - } - void IDAInstance::computeThreadChunks() - { - unsigned int numOfThreads = threadPool.getNumOfThreads(); + // For every scalar equation in the vector equation. + do { + // Compute the column indexes that may be non-zeros + nonZeroValuesNumber += + computeJacobianColumns(eq, equationIndices.data()).size(); - int64_t chunksFactor = getOptions().equationsChunksFactor; - int64_t numOfChunks = numOfThreads * chunksFactor; + } while (advanceEquationIndices(equationIndices, equationRanges[eq])); + } +} - uint64_t numOfVectorizedEquations = getNumOfVectorizedEquations(); - uint64_t numOfScalarEquations = getNumOfScalarEquations(); +void IDAInstance::computeThreadChunks() { + unsigned int numOfThreads = threadPool.getNumOfThreads(); - size_t chunkSize = - (numOfScalarEquations + numOfChunks - 1) / numOfChunks; + int64_t chunksFactor = getOptions().equationsChunksFactor; + int64_t numOfChunks = numOfThreads * chunksFactor; - // The number of vectorized equations whose indices have been completely - // assigned. - uint64_t processedEquations = 0; + uint64_t numOfVectorizedEquations = getNumOfVectorizedEquations(); + uint64_t numOfScalarEquations = getNumOfScalarEquations(); - while (processedEquations < numOfVectorizedEquations) { - Equation equation = equationsProcessingOrder[processedEquations]; - uint64_t equationFlatSize = getEquationFlatSize(equation); - uint64_t equationFlatIndex = 0; + size_t chunkSize = (numOfScalarEquations + numOfChunks - 1) / numOfChunks; - // Divide the ranges into chunks. - while (equationFlatIndex < equationFlatSize) { - uint64_t beginFlatIndex = equationFlatIndex; + // The number of vectorized equations whose indices have been completely + // assigned. + uint64_t processedEquations = 0; - uint64_t endFlatIndex = std::min( - beginFlatIndex + static_cast(chunkSize), - equationFlatSize); + while (processedEquations < numOfVectorizedEquations) { + Equation equation = equationsProcessingOrder[processedEquations]; + uint64_t equationFlatSize = getEquationFlatSize(equation); + uint64_t equationFlatIndex = 0; - std::vector beginIndices; - std::vector endIndices; + // Divide the ranges into chunks. + while (equationFlatIndex < equationFlatSize) { + uint64_t beginFlatIndex = equationFlatIndex; - getEquationIndicesFromFlatIndex( - beginFlatIndex, beginIndices, equationRanges[equation]); + uint64_t endFlatIndex = std::min( + beginFlatIndex + static_cast(chunkSize), equationFlatSize); - if (endFlatIndex == equationFlatSize) { - getEquationEndIndices(equation, endIndices); - } else { - getEquationIndicesFromFlatIndex( - endFlatIndex, endIndices, equationRanges[equation]); - } + std::vector beginIndices; + std::vector endIndices; - threadEquationsChunks.emplace_back( - equation, std::move(beginIndices), std::move(endIndices)); + getEquationIndicesFromFlatIndex(beginFlatIndex, beginIndices, + equationRanges[equation]); - // Move to the next chunk. - equationFlatIndex = endFlatIndex; + if (endFlatIndex == equationFlatSize) { + getEquationEndIndices(equation, endIndices); + } else { + getEquationIndicesFromFlatIndex(endFlatIndex, endIndices, + equationRanges[equation]); } - // Move to the next vectorized equation. - ++processedEquations; + threadEquationsChunks.emplace_back(equation, std::move(beginIndices), + std::move(endIndices)); + + // Move to the next chunk. + equationFlatIndex = endFlatIndex; } + + // Move to the next vectorized equation. + ++processedEquations; } +} - void IDAInstance::copyVariablesFromMARCO( - N_Vector algebraicAndStateVariablesVector, - N_Vector derivativeVariablesVector) - { - if (marco::runtime::simulation::getOptions().debug) { - std::cerr << "[IDA] Copying variables from MARCO" << std::endl; - } +void IDAInstance::copyVariablesFromMARCO( + N_Vector algebraicAndStateVariablesVector, + N_Vector derivativeVariablesVector) { + if (marco::runtime::simulation::getOptions().debug) { + std::cerr << "[IDA] Copying variables from MARCO" << std::endl; + } - IDA_PROFILER_COPY_VARS_FROM_MARCO_START; + IDA_PROFILER_COPY_VARS_FROM_MARCO_START; - realtype* varsPtr = N_VGetArrayPointer(algebraicAndStateVariablesVector); - realtype* dersPtr = N_VGetArrayPointer(derivativeVariablesVector); - uint64_t numOfArrayVariables = getNumOfArrayVariables(); + realtype *varsPtr = N_VGetArrayPointer(algebraicAndStateVariablesVector); + realtype *dersPtr = N_VGetArrayPointer(derivativeVariablesVector); + uint64_t numOfArrayVariables = getNumOfArrayVariables(); - for (Variable var = 0; var < numOfArrayVariables; ++var) { - uint64_t variableArrayOffset = variableOffsets[var]; - const auto& dimensions = variablesDimensions[var]; + for (Variable var = 0; var < numOfArrayVariables; ++var) { + uint64_t variableArrayOffset = variableOffsets[var]; + const auto &dimensions = variablesDimensions[var]; - std::vector varIndices; - getVariableBeginIndices(var, varIndices); + std::vector varIndices; + getVariableBeginIndices(var, varIndices); - do { - uint64_t variableScalarOffset = - getVariableFlatIndex(dimensions, varIndices.data()); + do { + uint64_t variableScalarOffset = + getVariableFlatIndex(dimensions, varIndices.data()); - uint64_t offset = variableArrayOffset + variableScalarOffset; + uint64_t offset = variableArrayOffset + variableScalarOffset; - // Get the state / algebraic variable. - auto getterFn = algebraicAndStateVariablesGetters[var]; - auto value = static_cast(getterFn(varIndices.data())); - varsPtr[offset] = value; + // Get the state / algebraic variable. + auto getterFn = algebraicAndStateVariablesGetters[var]; + auto value = static_cast(getterFn(varIndices.data())); + varsPtr[offset] = value; - if (marco::runtime::simulation::getOptions().debug) { - std::cerr << "Got var " << var << " "; - printIndices(varIndices); - std::cerr << " with value " << std::fixed << std::setprecision(9) - << value << std::endl; - } + if (marco::runtime::simulation::getOptions().debug) { + std::cerr << "Got var " << var << " "; + printIndices(varIndices); + std::cerr << " with value " << std::fixed << std::setprecision(9) + << value << std::endl; + } - // Get the derivative variable, if the variable was a state. - auto derivativeVariablePositionIt = stateVariablesMapping.find(var); + // Get the derivative variable, if the variable was a state. + auto derivativeVariablePositionIt = stateVariablesMapping.find(var); - if (derivativeVariablePositionIt != stateVariablesMapping.end()) { - auto derGetterFn = - derivativeVariablesGetters[derivativeVariablePositionIt->second]; + if (derivativeVariablePositionIt != stateVariablesMapping.end()) { + auto derGetterFn = + derivativeVariablesGetters[derivativeVariablePositionIt->second]; - auto derValue = - static_cast(derGetterFn(varIndices.data())); + auto derValue = static_cast(derGetterFn(varIndices.data())); - dersPtr[offset] = derValue; + dersPtr[offset] = derValue; - if (marco::runtime::simulation::getOptions().debug) { - std::cerr << "Got der(var " << var << ") "; - printIndices(varIndices); - std::cerr << " with value " << std::fixed << std::setprecision(9) - << derValue << std::endl; - } + if (marco::runtime::simulation::getOptions().debug) { + std::cerr << "Got der(var " << var << ") "; + printIndices(varIndices); + std::cerr << " with value " << std::fixed << std::setprecision(9) + << derValue << std::endl; } - } while (advanceVariableIndices(varIndices, variablesDimensions[var])); - } - - IDA_PROFILER_COPY_VARS_FROM_MARCO_STOP; + } + } while (advanceVariableIndices(varIndices, variablesDimensions[var])); } - void IDAInstance::copyVariablesIntoMARCO( - N_Vector algebraicAndStateVariablesVector, - N_Vector derivativeVariablesVector) - { - if (marco::runtime::simulation::getOptions().debug) { - std::cerr << "[IDA] Copying variables into MARCO" << std::endl; - } + IDA_PROFILER_COPY_VARS_FROM_MARCO_STOP; +} - IDA_PROFILER_COPY_VARS_INTO_MARCO_START; +void IDAInstance::copyVariablesIntoMARCO( + N_Vector algebraicAndStateVariablesVector, + N_Vector derivativeVariablesVector) { + if (marco::runtime::simulation::getOptions().debug) { + std::cerr << "[IDA] Copying variables into MARCO" << std::endl; + } - realtype* varsPtr = N_VGetArrayPointer(algebraicAndStateVariablesVector); - realtype* dersPtr = N_VGetArrayPointer(derivativeVariablesVector); - uint64_t numOfArrayVariables = getNumOfArrayVariables(); + IDA_PROFILER_COPY_VARS_INTO_MARCO_START; - for (Variable var = 0; var < numOfArrayVariables; ++var) { - uint64_t variableArrayOffset = variableOffsets[var]; - const auto& dimensions = variablesDimensions[var]; + realtype *varsPtr = N_VGetArrayPointer(algebraicAndStateVariablesVector); + realtype *dersPtr = N_VGetArrayPointer(derivativeVariablesVector); + uint64_t numOfArrayVariables = getNumOfArrayVariables(); - std::vector varIndices; - getVariableBeginIndices(var, varIndices); + for (Variable var = 0; var < numOfArrayVariables; ++var) { + uint64_t variableArrayOffset = variableOffsets[var]; + const auto &dimensions = variablesDimensions[var]; - do { - uint64_t variableScalarOffset = - getVariableFlatIndex(dimensions, varIndices.data()); + std::vector varIndices; + getVariableBeginIndices(var, varIndices); - uint64_t offset = variableArrayOffset + variableScalarOffset; + do { + uint64_t variableScalarOffset = + getVariableFlatIndex(dimensions, varIndices.data()); - // Set the state / algebraic variable. - auto setterFn = algebraicAndStateVariablesSetters[var]; - auto value = static_cast(varsPtr[offset]); + uint64_t offset = variableArrayOffset + variableScalarOffset; - if (marco::runtime::simulation::getOptions().debug) { - std::cerr << "Setting var " << var << " "; - printIndices(varIndices); - std::cerr << " to " << value << std::endl; - } + // Set the state / algebraic variable. + auto setterFn = algebraicAndStateVariablesSetters[var]; + auto value = static_cast(varsPtr[offset]); - setterFn(value, varIndices.data()); + if (marco::runtime::simulation::getOptions().debug) { + std::cerr << "Setting var " << var << " "; + printIndices(varIndices); + std::cerr << " to " << value << std::endl; + } - assert([&]() -> bool { - auto getterFn = algebraicAndStateVariablesGetters[var]; - return getterFn(varIndices.data()) == value; - }() && "Variable value not set correctly"); + setterFn(value, varIndices.data()); - // Set the derivative variable, if the variable was a state. - auto derivativeVariablePositionIt = stateVariablesMapping.find(var); + assert([&]() -> bool { + auto getterFn = algebraicAndStateVariablesGetters[var]; + return getterFn(varIndices.data()) == value; + }() && "Variable value not set correctly"); - if (derivativeVariablePositionIt != stateVariablesMapping.end()) { - auto derSetterFn = - derivativeVariablesSetters[derivativeVariablePositionIt->second]; + // Set the derivative variable, if the variable was a state. + auto derivativeVariablePositionIt = stateVariablesMapping.find(var); - auto derValue = static_cast(dersPtr[offset]); + if (derivativeVariablePositionIt != stateVariablesMapping.end()) { + auto derSetterFn = + derivativeVariablesSetters[derivativeVariablePositionIt->second]; - if (marco::runtime::simulation::getOptions().debug) { - std::cerr << "Setting der(var " << var << ") "; - printIndices(varIndices); - std::cerr << " to " << derValue << std::endl; - } + auto derValue = static_cast(dersPtr[offset]); - derSetterFn(derValue, varIndices.data()); + if (marco::runtime::simulation::getOptions().debug) { + std::cerr << "Setting der(var " << var << ") "; + printIndices(varIndices); + std::cerr << " to " << derValue << std::endl; + } - assert([&]() -> bool { - auto derGetterFn = derivativeVariablesGetters[ - derivativeVariablePositionIt->second]; + derSetterFn(derValue, varIndices.data()); - return derGetterFn(varIndices.data()) == derValue; - }() && "Derivative value not set correctly"); - } - } while (advanceVariableIndices(varIndices, variablesDimensions[var])); - } + assert([&]() -> bool { + auto derGetterFn = + derivativeVariablesGetters[derivativeVariablePositionIt->second]; - IDA_PROFILER_COPY_VARS_INTO_MARCO_STOP; + return derGetterFn(varIndices.data()) == derValue; + }() && "Derivative value not set correctly"); + } + } while (advanceVariableIndices(varIndices, variablesDimensions[var])); } - void IDAInstance::vectorEquationsParallelIteration( - std::function processFn) - { - std::mutex mutex; - size_t processedEquations = 0; + IDA_PROFILER_COPY_VARS_INTO_MARCO_STOP; +} - // Function to move to the next equation. - auto getEquationAndAdvance = - [&](Equation& eq) { - std::lock_guard lockGuard(mutex); +void IDAInstance::vectorEquationsParallelIteration( + std::function processFn) { + std::mutex mutex; + size_t processedEquations = 0; - if (processedEquations >= getNumOfVectorizedEquations()) { - return false; - } + // Function to move to the next equation. + auto getEquationAndAdvance = [&](Equation &eq) { + std::lock_guard lockGuard(mutex); - eq = equationsProcessingOrder[processedEquations++]; - return true; - }; + if (processedEquations >= getNumOfVectorizedEquations()) { + return false; + } - for (unsigned int i = 0, e = threadPool.getNumOfThreads(); i < e; ++i) { - threadPool.async([&]() { - Equation equation; + eq = equationsProcessingOrder[processedEquations++]; + return true; + }; - while (getEquationAndAdvance(equation)) { - processFn(equation); - } - }); - } + for (unsigned int i = 0, e = threadPool.getNumOfThreads(); i < e; ++i) { + threadPool.async([&]() { + Equation equation; - threadPool.wait(); + while (getEquationAndAdvance(equation)) { + processFn(equation); + } + }); } - void IDAInstance::scalarEquationsParallelIteration( - std::function& equationIndices)> processFn) - { - size_t processedEquations = 0; - std::vector equationIndices; - std::mutex mutex; + threadPool.wait(); +} - // Function to advance the indices by one, or move to the next equation if - // the current one has been fully visited. - auto getEquationAndAdvance = - [&](Equation& eq, std::vector& indices) { - std::lock_guard lockGuard(mutex); +void IDAInstance::scalarEquationsParallelIteration( + std::function &equationIndices)> + processFn) { + size_t processedEquations = 0; + std::vector equationIndices; + std::mutex mutex; + + // Function to advance the indices by one, or move to the next equation if + // the current one has been fully visited. + auto getEquationAndAdvance = [&](Equation &eq, + std::vector &indices) { + std::lock_guard lockGuard(mutex); + + if (processedEquations >= getNumOfVectorizedEquations()) { + return false; + } - if (processedEquations >= getNumOfVectorizedEquations()) { - return false; - } + eq = equationsProcessingOrder[processedEquations]; + indices = equationIndices; - eq = equationsProcessingOrder[processedEquations]; - indices = equationIndices; + if (!advanceEquationIndices(equationIndices, equationRanges[eq])) { + if (++processedEquations < getNumOfVectorizedEquations()) { + getEquationBeginIndices(equationsProcessingOrder[processedEquations], + equationIndices); + } + } - if (!advanceEquationIndices(equationIndices, equationRanges[eq])) { - if (++processedEquations < getNumOfVectorizedEquations()) { - getEquationBeginIndices( - equationsProcessingOrder[processedEquations], - equationIndices); - } - } + return true; + }; - return true; - }; + getEquationBeginIndices(equationsProcessingOrder[processedEquations], + equationIndices); - getEquationBeginIndices( - equationsProcessingOrder[processedEquations], equationIndices); + for (unsigned int i = 0, e = threadPool.getNumOfThreads(); i < e; ++i) { + threadPool.async([&]() { + Equation equation; + std::vector indices; - for (unsigned int i = 0, e = threadPool.getNumOfThreads(); i < e; ++i) { - threadPool.async([&]() { - Equation equation; - std::vector indices; + while (getEquationAndAdvance(equation, indices)) { + processFn(equation, indices); + } + }); + } - while (getEquationAndAdvance(equation, indices)) { - processFn(equation, indices); - } - }); - } + threadPool.wait(); +} - threadPool.wait(); - } +void IDAInstance::equationsParallelIteration( + std::function &equationIndices)> + processFn) { + // Shard the work among multiple threads. + unsigned int numOfThreads = threadPool.getNumOfThreads(); + std::atomic_size_t chunkIndex = 0; - void IDAInstance::equationsParallelIteration( - std::function& equationIndices)> processFn) - { - // Shard the work among multiple threads. - unsigned int numOfThreads = threadPool.getNumOfThreads(); - std::atomic_size_t chunkIndex = 0; + for (unsigned int thread = 0; thread < numOfThreads; ++thread) { + threadPool.async([&]() { + size_t assignedChunk; - for (unsigned int thread = 0; thread < numOfThreads; ++thread) { - threadPool.async([&]() { - size_t assignedChunk; + while ((assignedChunk = chunkIndex++) < threadEquationsChunks.size()) { + const ThreadEquationsChunk &chunk = + threadEquationsChunks[assignedChunk]; - while ((assignedChunk = chunkIndex++) < threadEquationsChunks.size()) { - const ThreadEquationsChunk& chunk = - threadEquationsChunks[assignedChunk]; + Equation equation = std::get<0>(chunk); + std::vector equationIndices = std::get<1>(chunk); - Equation equation = std::get<0>(chunk); - std::vector equationIndices = std::get<1>(chunk); + do { + assert([&]() -> bool { + if (equationIndices.size() != equationRanges[equation].size()) { + return false; + } - do { - assert([&]() -> bool { - if (equationIndices.size() != equationRanges[equation].size()) { + for (size_t i = 0, rank = equationIndices.size(); i < rank; ++i) { + if (equationIndices[i] < equationRanges[equation][i].begin || + equationIndices[i] >= equationRanges[equation][i].end) { return false; } + } - for (size_t i = 0, rank = equationIndices.size(); i < rank; ++i) { - if (equationIndices[i] < equationRanges[equation][i].begin || - equationIndices[i] >= equationRanges[equation][i].end) { - return false; - } - } - - return true; - }() && "Invalid equation indices"); - - processFn(equation, equationIndices); - } while (advanceEquationIndicesUntil( - equationIndices, equationRanges[equation], std::get<2>(chunk))); - } - }); - } + return true; + }() && "Invalid equation indices"); - threadPool.wait(); + processFn(equation, equationIndices); + } while (advanceEquationIndicesUntil( + equationIndices, equationRanges[equation], std::get<2>(chunk))); + } + }); } - void IDAInstance::getVariableBeginIndices( - Variable variable, std::vector& indices) const - { - uint64_t variableRank = getVariableRank(variable); - indices.resize(variableRank); + threadPool.wait(); +} - for (uint64_t i = 0; i < variableRank; ++i) { - indices[i] = 0; - } +void IDAInstance::getVariableBeginIndices( + Variable variable, std::vector &indices) const { + uint64_t variableRank = getVariableRank(variable); + indices.resize(variableRank); + + for (uint64_t i = 0; i < variableRank; ++i) { + indices[i] = 0; } +} - void IDAInstance::getVariableEndIndices( - Variable variable, std::vector& indices) const - { - uint64_t variableRank = getVariableRank(variable); - indices.resize(variableRank); +void IDAInstance::getVariableEndIndices(Variable variable, + std::vector &indices) const { + uint64_t variableRank = getVariableRank(variable); + indices.resize(variableRank); - for (uint64_t i = 0; i < variableRank; ++i) { - indices[i] = variablesDimensions[variable][i]; - } + for (uint64_t i = 0; i < variableRank; ++i) { + indices[i] = variablesDimensions[variable][i]; } +} - void IDAInstance::getEquationBeginIndices( - Equation equation, std::vector& indices) const - { - uint64_t equationRank = getEquationRank(equation); - indices.resize(equationRank); +void IDAInstance::getEquationBeginIndices(Equation equation, + std::vector &indices) const { + uint64_t equationRank = getEquationRank(equation); + indices.resize(equationRank); - for (uint64_t i = 0; i < equationRank; ++i) { - indices[i] = equationRanges[equation][i].begin; - } + for (uint64_t i = 0; i < equationRank; ++i) { + indices[i] = equationRanges[equation][i].begin; } +} - void IDAInstance::getEquationEndIndices( - Equation equation, std::vector& indices) const - { - uint64_t equationRank = getEquationRank(equation); - indices.resize(equationRank); +void IDAInstance::getEquationEndIndices(Equation equation, + std::vector &indices) const { + uint64_t equationRank = getEquationRank(equation); + indices.resize(equationRank); - for (uint64_t i = 0; i < equationRank; ++i) { - indices[i] = equationRanges[equation][i].end; - } + for (uint64_t i = 0; i < equationRank; ++i) { + indices[i] = equationRanges[equation][i].end; } +} - void IDAInstance::printStatistics() const - { - if (getNumOfScalarEquations() == 0) { - return; - } - - long nst, nre, nje, nni, nli, netf, nncf; - realtype ais, ls; +void IDAInstance::printStatistics() const { + if (getNumOfScalarEquations() == 0) { + return; + } - IDAGetNumSteps(idaMemory, &nst); - IDAGetNumResEvals(idaMemory, &nre); - IDAGetNumJacEvals(idaMemory, &nje); - IDAGetNumNonlinSolvIters(idaMemory, &nni); - IDAGetNumLinIters(idaMemory, &nli); - IDAGetNumErrTestFails(idaMemory, &netf); - IDAGetNumNonlinSolvConvFails(idaMemory, &nncf); - IDAGetActualInitStep(idaMemory, &ais); - IDAGetLastStep(idaMemory, &ls); + long nst, nre, nje, nni, nli, netf, nncf; + realtype ais, ls; - std::cerr << std::endl << "Final Run Statistics:" << std::endl; + IDAGetNumSteps(idaMemory, &nst); + IDAGetNumResEvals(idaMemory, &nre); + IDAGetNumJacEvals(idaMemory, &nje); + IDAGetNumNonlinSolvIters(idaMemory, &nni); + IDAGetNumLinIters(idaMemory, &nli); + IDAGetNumErrTestFails(idaMemory, &netf); + IDAGetNumNonlinSolvConvFails(idaMemory, &nncf); + IDAGetActualInitStep(idaMemory, &ais); + IDAGetLastStep(idaMemory, &ls); - std::cerr << "Number of vector equations = "; - std::cerr << getNumOfVectorizedEquations() << std::endl; - std::cerr << "Number of scalar equations = "; - std::cerr << getNumOfScalarEquations() << std::endl; - std::cerr << "Number of non-zero values = "; - std::cerr << nonZeroValuesNumber << std::endl; + std::cerr << std::endl << "Final Run Statistics:" << std::endl; - std::cerr << "Number of steps = " << nst << std::endl; - std::cerr << "Number of residual evaluations = " << nre << std::endl; - std::cerr << "Number of Jacobian evaluations = " << nje << std::endl; + std::cerr << "Number of vector equations = "; + std::cerr << getNumOfVectorizedEquations() << std::endl; + std::cerr << "Number of scalar equations = "; + std::cerr << getNumOfScalarEquations() << std::endl; + std::cerr << "Number of non-zero values = "; + std::cerr << nonZeroValuesNumber << std::endl; - std::cerr << "Number of nonlinear iterations = " << nni << std::endl; - std::cerr << "Number of linear iterations = " << nli << std::endl; - std::cerr << "Number of error test failures = " << netf << std::endl; - std::cerr << "Number of nonlin. conv. failures = " << nncf << std::endl; + std::cerr << "Number of steps = " << nst << std::endl; + std::cerr << "Number of residual evaluations = " << nre << std::endl; + std::cerr << "Number of Jacobian evaluations = " << nje << std::endl; - std::cerr << "Actual initial step size used = " << ais << std::endl; - std::cerr << "Step size used for the last step = " << ls << std::endl; - } + std::cerr << "Number of nonlinear iterations = " << nni << std::endl; + std::cerr << "Number of linear iterations = " << nli << std::endl; + std::cerr << "Number of error test failures = " << netf << std::endl; + std::cerr << "Number of nonlin. conv. failures = " << nncf << std::endl; - bool IDAInstance::idaInit() - { - auto retVal = IDAInit(idaMemory, residualFunction, startTime, variablesVector, derivativesVector); + std::cerr << "Actual initial step size used = " << ais << std::endl; + std::cerr << "Step size used for the last step = " << ls << std::endl; +} - if (retVal == IDA_MEM_NULL) { - std::cerr << "IDAInit - The ida_mem pointer is NULL" << std::endl; - return false; - } +bool IDAInstance::idaInit() { + auto retVal = IDAInit(idaMemory, residualFunction, startTime, variablesVector, + derivativesVector); - if (retVal == IDA_MEM_FAIL) { - std::cerr << "IDAInit - A memory allocation request has failed" << std::endl; - return false; - } + if (retVal == IDA_MEM_NULL) { + std::cerr << "IDAInit - The ida_mem pointer is NULL" << std::endl; + return false; + } - if (retVal == IDA_ILL_INPUT) { - std::cerr << "IDAInit - An input argument to IDAInit has an illegal value" << std::endl; - return false; - } + if (retVal == IDA_MEM_FAIL) { + std::cerr << "IDAInit - A memory allocation request has failed" + << std::endl; + return false; + } - return retVal == IDA_SUCCESS; + if (retVal == IDA_ILL_INPUT) { + std::cerr << "IDAInit - An input argument to IDAInit has an illegal value" + << std::endl; + return false; } - bool IDAInstance::idaSVTolerances() - { - auto retVal = IDASVtolerances(idaMemory, getOptions().relativeTolerance, tolerancesVector); + return retVal == IDA_SUCCESS; +} - if (retVal == IDA_MEM_NULL) { - std::cerr << "IDASVtolerances - The ida_mem pointer is NULL" << std::endl; - return false; - } +bool IDAInstance::idaSVTolerances() { + auto retVal = IDASVtolerances(idaMemory, getOptions().relativeTolerance, + tolerancesVector); - if (retVal == IDA_NO_MALLOC) { - std::cerr << "IDASVtolerances - The allocation function IDAInit(has not been called" << std::endl; - return false; - } + if (retVal == IDA_MEM_NULL) { + std::cerr << "IDASVtolerances - The ida_mem pointer is NULL" << std::endl; + return false; + } - if (retVal == IDA_ILL_INPUT) { - std::cerr << "IDASVtolerances - The relative error tolerance was negative or the absolute tolerance vector had a negative component" << std::endl; - return false; - } + if (retVal == IDA_NO_MALLOC) { + std::cerr << "IDASVtolerances - The allocation function IDAInit(has not " + "been called" + << std::endl; + return false; + } - return retVal == IDA_SUCCESS; + if (retVal == IDA_ILL_INPUT) { + std::cerr << "IDASVtolerances - The relative error tolerance was negative " + "or the absolute tolerance vector had a negative component" + << std::endl; + return false; } - bool IDAInstance::idaSetLinearSolver() - { - auto retVal = IDASetLinearSolver(idaMemory, linearSolver, sparseMatrix); + return retVal == IDA_SUCCESS; +} - if (retVal == IDALS_MEM_NULL) { - std::cerr << "IDASetLinearSolver - The ida_mem pointer is NULL" << std::endl; - return false; - } +bool IDAInstance::idaSetLinearSolver() { + auto retVal = IDASetLinearSolver(idaMemory, linearSolver, sparseMatrix); - if (retVal == IDALS_ILL_INPUT) { - std::cerr << "IDASetLinearSolver - The IDALS interface is not compatible with the LS or J input objects or is incompatible with the N_Vector object passed to IDAInit" << std::endl; - return false; - } + if (retVal == IDALS_MEM_NULL) { + std::cerr << "IDASetLinearSolver - The ida_mem pointer is NULL" + << std::endl; + return false; + } - if (retVal == IDALS_SUNLS_FAIL) { - std::cerr << "IDASetLinearSolver - A call to the LS object failed" << std::endl; - return false; - } + if (retVal == IDALS_ILL_INPUT) { + std::cerr << "IDASetLinearSolver - The IDALS interface is not compatible " + "with the LS or J input objects or is incompatible with the " + "N_Vector object passed to IDAInit" + << std::endl; + return false; + } - if (retVal == IDALS_MEM_FAIL) { - std::cerr << "IDASetLinearSolver - A memory allocation request failed" << std::endl; - return false; - } + if (retVal == IDALS_SUNLS_FAIL) { + std::cerr << "IDASetLinearSolver - A call to the LS object failed" + << std::endl; + return false; + } - return retVal == IDALS_SUCCESS; + if (retVal == IDALS_MEM_FAIL) { + std::cerr << "IDASetLinearSolver - A memory allocation request failed" + << std::endl; + return false; } - bool IDAInstance::idaSetUserData() - { - auto retVal = IDASetUserData(idaMemory, this); + return retVal == IDALS_SUCCESS; +} - if (retVal == IDA_MEM_NULL) { - std::cerr << "IDASetUserData - The ida_mem pointer is NULL" << std::endl; - return false; - } +bool IDAInstance::idaSetUserData() { + auto retVal = IDASetUserData(idaMemory, this); - return retVal == IDA_SUCCESS; + if (retVal == IDA_MEM_NULL) { + std::cerr << "IDASetUserData - The ida_mem pointer is NULL" << std::endl; + return false; } - bool IDAInstance::idaSetMaxNumSteps() - { - auto retVal = IDASetMaxNumSteps(idaMemory, getOptions().maxSteps); + return retVal == IDA_SUCCESS; +} - if (retVal == IDA_MEM_NULL) { - std::cerr << "IDASetMaxNumSteps - The ida_mem pointer is NULL" << std::endl; - return false; - } +bool IDAInstance::idaSetMaxNumSteps() { + auto retVal = IDASetMaxNumSteps(idaMemory, getOptions().maxSteps); - if (retVal == IDA_ILL_INPUT) { - std::cerr << "IDASetMaxNumSteps - Either hmax is not positive or it is smaller than the minimum allowable step" << std::endl; - return false; - } + if (retVal == IDA_MEM_NULL) { + std::cerr << "IDASetMaxNumSteps - The ida_mem pointer is NULL" << std::endl; + return false; + } - return retVal == IDA_SUCCESS; + if (retVal == IDA_ILL_INPUT) { + std::cerr << "IDASetMaxNumSteps - Either hmax is not positive or it is " + "smaller than the minimum allowable step" + << std::endl; + return false; } - bool IDAInstance::idaSetInitialStepSize() - { - auto retVal = IDASetInitStep(idaMemory, getOptions().initialStepSize); + return retVal == IDA_SUCCESS; +} - if (retVal == IDA_MEM_NULL) { - std::cerr << "IDASetInitStep - The ida_mem pointer is NULL" << std::endl; - return false; - } +bool IDAInstance::idaSetInitialStepSize() { + auto retVal = IDASetInitStep(idaMemory, getOptions().initialStepSize); - return retVal == IDA_SUCCESS; + if (retVal == IDA_MEM_NULL) { + std::cerr << "IDASetInitStep - The ida_mem pointer is NULL" << std::endl; + return false; } - bool IDAInstance::idaSetMinStepSize() - { + return retVal == IDA_SUCCESS; +} + +bool IDAInstance::idaSetMinStepSize() { #if SUNDIALS_VERSION_MAJOR >= 6 && SUNDIALS_VERSION_MINOR >= 2 - auto retVal = IDASetMinStep(idaMemory, getOptions().minStepSize); + auto retVal = IDASetMinStep(idaMemory, getOptions().minStepSize); - if (retVal == IDA_MEM_NULL) { - std::cerr << "IDASetMinStep - The ida_mem pointer is NULL" << std::endl; - return false; - } + if (retVal == IDA_MEM_NULL) { + std::cerr << "IDASetMinStep - The ida_mem pointer is NULL" << std::endl; + return false; + } - if (retVal == IDA_ILL_INPUT) { - std::cerr << "IDASetMinStep - hmin is negative" << std::endl; - return false; - } + if (retVal == IDA_ILL_INPUT) { + std::cerr << "IDASetMinStep - hmin is negative" << std::endl; + return false; + } - return retVal == IDA_SUCCESS; + return retVal == IDA_SUCCESS; #else - return true; + return true; #endif - } +} - bool IDAInstance::idaSetMaxStepSize() - { - auto retVal = IDASetMaxStep(idaMemory, getOptions().maxStepSize); +bool IDAInstance::idaSetMaxStepSize() { + auto retVal = IDASetMaxStep(idaMemory, getOptions().maxStepSize); - if (retVal == IDA_MEM_NULL) { - std::cerr << "IDASetMaxStep - The ida_mem pointer is NULL" << std::endl; - return false; - } - - if (retVal == IDA_ILL_INPUT) { - std::cerr << "IDASetMaxStep - Either hmax is not positive or it is smaller than the minimum allowable step" << std::endl; - return false; - } + if (retVal == IDA_MEM_NULL) { + std::cerr << "IDASetMaxStep - The ida_mem pointer is NULL" << std::endl; + return false; + } - return retVal == IDA_SUCCESS; + if (retVal == IDA_ILL_INPUT) { + std::cerr << "IDASetMaxStep - Either hmax is not positive or it is smaller " + "than the minimum allowable step" + << std::endl; + return false; } - bool IDAInstance::idaSetStopTime() - { - auto retVal = IDASetStopTime(idaMemory, endTime); + return retVal == IDA_SUCCESS; +} - if (retVal == IDA_MEM_NULL) { - std::cerr << "IDASetMaxStep - The ida_mem pointer is NULL" << std::endl; - return false; - } +bool IDAInstance::idaSetStopTime() { + auto retVal = IDASetStopTime(idaMemory, endTime); - if (retVal == IDA_ILL_INPUT) { - std::cerr << "IDASetMaxStep - The value of tstop is not beyond the current t value" << std::endl; - return false; - } + if (retVal == IDA_MEM_NULL) { + std::cerr << "IDASetMaxStep - The ida_mem pointer is NULL" << std::endl; + return false; + } - return retVal == IDA_SUCCESS; + if (retVal == IDA_ILL_INPUT) { + std::cerr << "IDASetMaxStep - The value of tstop is not beyond the current " + "t value" + << std::endl; + return false; } - bool IDAInstance::idaSetMaxErrTestFails() - { - auto retVal = IDASetMaxErrTestFails(idaMemory, getOptions().maxErrTestFails); + return retVal == IDA_SUCCESS; +} - if (retVal == IDA_MEM_NULL) { - std::cerr << "IDASetMaxErrTestFails - The ida_mem pointer is NULL" << std::endl; - return false; - } +bool IDAInstance::idaSetMaxErrTestFails() { + auto retVal = IDASetMaxErrTestFails(idaMemory, getOptions().maxErrTestFails); - return retVal == IDA_SUCCESS; + if (retVal == IDA_MEM_NULL) { + std::cerr << "IDASetMaxErrTestFails - The ida_mem pointer is NULL" + << std::endl; + return false; } - bool IDAInstance::idaSetSuppressAlg() - { - auto retVal = IDASetSuppressAlg(idaMemory, getOptions().suppressAlg); + return retVal == IDA_SUCCESS; +} - if (retVal == IDA_MEM_NULL) { - std::cerr << "IDASetSuppressAlg - The ida_mem pointer is NULL" << std::endl; - return false; - } +bool IDAInstance::idaSetSuppressAlg() { + auto retVal = IDASetSuppressAlg(idaMemory, getOptions().suppressAlg); - return retVal == IDA_SUCCESS; + if (retVal == IDA_MEM_NULL) { + std::cerr << "IDASetSuppressAlg - The ida_mem pointer is NULL" << std::endl; + return false; } - bool IDAInstance::idaSetId() - { - auto retVal = IDASetId(idaMemory, idVector); + return retVal == IDA_SUCCESS; +} - if (retVal == IDA_MEM_NULL) { - std::cerr << "IDASetId - The ida_mem pointer is NULL" << std::endl; - return false; - } +bool IDAInstance::idaSetId() { + auto retVal = IDASetId(idaMemory, idVector); - return retVal == IDA_SUCCESS; + if (retVal == IDA_MEM_NULL) { + std::cerr << "IDASetId - The ida_mem pointer is NULL" << std::endl; + return false; } - bool IDAInstance::idaSetJacobianFunction() - { - auto retVal = IDASetJacFn(idaMemory, jacobianMatrix); + return retVal == IDA_SUCCESS; +} - if (retVal == IDA_MEM_NULL) { - std::cerr << "IDASetJacFn - The ida_mem pointer is NULL" << std::endl; - return false; - } +bool IDAInstance::idaSetJacobianFunction() { + auto retVal = IDASetJacFn(idaMemory, jacobianMatrix); - if (retVal == IDALS_LMEM_NULL) { - std::cerr << "IDASetJacFn - The IDALS linear solver interface has not been initialized" << std::endl; - return false; - } + if (retVal == IDA_MEM_NULL) { + std::cerr << "IDASetJacFn - The ida_mem pointer is NULL" << std::endl; + return false; + } - return retVal == IDA_SUCCESS; + if (retVal == IDALS_LMEM_NULL) { + std::cerr << "IDASetJacFn - The IDALS linear solver interface has not been " + "initialized" + << std::endl; + return false; } - bool IDAInstance::idaSetMaxNonlinIters() - { - auto retVal = IDASetMaxNonlinIters(idaMemory, getOptions().maxNonlinIters); + return retVal == IDA_SUCCESS; +} - if (retVal == IDA_MEM_NULL) { - std::cerr << "IDASetMaxNonlinIters - The ida_mem pointer is NULL" << std::endl; - return false; - } +bool IDAInstance::idaSetMaxNonlinIters() { + auto retVal = IDASetMaxNonlinIters(idaMemory, getOptions().maxNonlinIters); - if (retVal == IDA_MEM_FAIL) { - std::cerr << "IDASetMaxNonlinIters - The SUNNonlinearSolver object is NULL" << std::endl; - return false; - } + if (retVal == IDA_MEM_NULL) { + std::cerr << "IDASetMaxNonlinIters - The ida_mem pointer is NULL" + << std::endl; + return false; + } - return retVal == IDA_SUCCESS; + if (retVal == IDA_MEM_FAIL) { + std::cerr << "IDASetMaxNonlinIters - The SUNNonlinearSolver object is NULL" + << std::endl; + return false; } - bool IDAInstance::idaSetMaxConvFails() - { - auto retVal = IDASetMaxConvFails(idaMemory, getOptions().maxConvFails); + return retVal == IDA_SUCCESS; +} - if (retVal == IDA_MEM_NULL) { - std::cerr << "IDASetMaxConvFails - The ida_mem pointer is NULL" << std::endl; - return false; - } +bool IDAInstance::idaSetMaxConvFails() { + auto retVal = IDASetMaxConvFails(idaMemory, getOptions().maxConvFails); - return retVal == IDA_SUCCESS; + if (retVal == IDA_MEM_NULL) { + std::cerr << "IDASetMaxConvFails - The ida_mem pointer is NULL" + << std::endl; + return false; } - bool IDAInstance::idaSetNonlinConvCoef() - { - auto retVal = IDASetNonlinConvCoef(idaMemory, getOptions().nonlinConvCoef); + return retVal == IDA_SUCCESS; +} - if (retVal == IDA_MEM_NULL) { - std::cerr << "IDASetNonlinConvCoef - The ida_mem pointer is NULL" << std::endl; - return false; - } +bool IDAInstance::idaSetNonlinConvCoef() { + auto retVal = IDASetNonlinConvCoef(idaMemory, getOptions().nonlinConvCoef); - if (retVal == IDA_ILL_INPUT) { - std::cerr << "IDASetNonlinConvCoef - The value of nlscoef is <= 0" << std::endl; - return false; - } + if (retVal == IDA_MEM_NULL) { + std::cerr << "IDASetNonlinConvCoef - The ida_mem pointer is NULL" + << std::endl; + return false; + } - return retVal == IDA_SUCCESS; + if (retVal == IDA_ILL_INPUT) { + std::cerr << "IDASetNonlinConvCoef - The value of nlscoef is <= 0" + << std::endl; + return false; } - bool IDAInstance::idaSetNonlinConvCoefIC() - { - auto retVal = IDASetNonlinConvCoefIC(idaMemory, getOptions().nonlinConvCoefIC); + return retVal == IDA_SUCCESS; +} - if (retVal == IDA_MEM_NULL) { - std::cerr << "IDASetNonlinConvCoefIC - The ida_mem pointer is NULL" << std::endl; - return false; - } +bool IDAInstance::idaSetNonlinConvCoefIC() { + auto retVal = + IDASetNonlinConvCoefIC(idaMemory, getOptions().nonlinConvCoefIC); - if (retVal == IDA_ILL_INPUT) { - std::cerr << "IDASetNonlinConvCoefIC - The epiccon factor is <= 0" << std::endl; - return false; - } + if (retVal == IDA_MEM_NULL) { + std::cerr << "IDASetNonlinConvCoefIC - The ida_mem pointer is NULL" + << std::endl; + return false; + } - return retVal == IDA_SUCCESS; + if (retVal == IDA_ILL_INPUT) { + std::cerr << "IDASetNonlinConvCoefIC - The epiccon factor is <= 0" + << std::endl; + return false; } - bool IDAInstance::idaSetMaxNumStepsIC() - { - auto retVal = IDASetMaxNumStepsIC( - idaMemory, static_cast(getOptions().maxStepsIC)); + return retVal == IDA_SUCCESS; +} - if (retVal == IDA_MEM_NULL) { - std::cerr << "IDASetMaxNumStepsIC - The ida_mem pointer is NULL" - << std::endl; - return false; - } +bool IDAInstance::idaSetMaxNumStepsIC() { + auto retVal = + IDASetMaxNumStepsIC(idaMemory, static_cast(getOptions().maxStepsIC)); - if (retVal == IDA_ILL_INPUT) { - std::cerr << "IDASetMaxNumStepsIC - maxnh is non-positive" << std::endl; - return false; - } + if (retVal == IDA_MEM_NULL) { + std::cerr << "IDASetMaxNumStepsIC - The ida_mem pointer is NULL" + << std::endl; + return false; + } - return retVal == IDA_SUCCESS; + if (retVal == IDA_ILL_INPUT) { + std::cerr << "IDASetMaxNumStepsIC - maxnh is non-positive" << std::endl; + return false; } - bool IDAInstance::idaSetMaxNumJacsIC() - { - auto retVal = IDASetMaxNumJacsIC(idaMemory, getOptions().maxNumJacsIC); + return retVal == IDA_SUCCESS; +} - if (retVal == IDA_MEM_NULL) { - std::cerr << "IDASetMaxNumJacsIC - The ida_mem pointer is NULL" << std::endl; - return false; - } +bool IDAInstance::idaSetMaxNumJacsIC() { + auto retVal = IDASetMaxNumJacsIC(idaMemory, getOptions().maxNumJacsIC); - if (retVal == IDA_ILL_INPUT) { - std::cerr << "IDASetMaxNumJacsIC - maxnj is non-positive" << std::endl; - return false; - } + if (retVal == IDA_MEM_NULL) { + std::cerr << "IDASetMaxNumJacsIC - The ida_mem pointer is NULL" + << std::endl; + return false; + } - return retVal == IDA_SUCCESS; + if (retVal == IDA_ILL_INPUT) { + std::cerr << "IDASetMaxNumJacsIC - maxnj is non-positive" << std::endl; + return false; } - bool IDAInstance::idaSetMaxNumItersIC() - { - auto retVal = IDASetMaxNumItersIC(idaMemory, getOptions().maxNumItersIC); + return retVal == IDA_SUCCESS; +} - if (retVal == IDA_MEM_NULL) { - std::cerr << "IDASetMaxNumItersIC - The ida_mem pointer is NULL" - << std::endl; - return false; - } +bool IDAInstance::idaSetMaxNumItersIC() { + auto retVal = IDASetMaxNumItersIC(idaMemory, getOptions().maxNumItersIC); - if (retVal == IDA_ILL_INPUT) { - std::cerr << "IDASetMaxNumItersIC - maxnit is non-positive" << std::endl; - return false; - } + if (retVal == IDA_MEM_NULL) { + std::cerr << "IDASetMaxNumItersIC - The ida_mem pointer is NULL" + << std::endl; + return false; + } - return retVal == IDA_SUCCESS; + if (retVal == IDA_ILL_INPUT) { + std::cerr << "IDASetMaxNumItersIC - maxnit is non-positive" << std::endl; + return false; } - bool IDAInstance::idaSetLineSearchOffIC() - { - auto retVal = IDASetLineSearchOffIC(idaMemory, getOptions().lineSearchOff); + return retVal == IDA_SUCCESS; +} - if (retVal == IDA_MEM_NULL) { - std::cerr << "IDASetLineSearchOffIC - The ida_mem pointer is NULL" - << std::endl; - return false; - } +bool IDAInstance::idaSetLineSearchOffIC() { + auto retVal = IDASetLineSearchOffIC(idaMemory, getOptions().lineSearchOff); - return retVal == IDA_SUCCESS; + if (retVal == IDA_MEM_NULL) { + std::cerr << "IDASetLineSearchOffIC - The ida_mem pointer is NULL" + << std::endl; + return false; } - void IDAInstance::getWritingEquation( - Variable variable, - const std::vector& variableIndices, - Equation& equation, - std::vector& equationIndices) const - { - bool found = false; - uint64_t numOfVectorizedEquations = getNumOfVectorizedEquations(); + return retVal == IDA_SUCCESS; +} - for (Equation eq = 0; eq < numOfVectorizedEquations; ++eq) { - Variable writtenVariable = getWrittenVariable(eq); +void IDAInstance::getWritingEquation( + Variable variable, const std::vector &variableIndices, + Equation &equation, std::vector &equationIndices) const { + bool found = false; + uint64_t numOfVectorizedEquations = getNumOfVectorizedEquations(); - if (writtenVariable == variable) { - std::vector writingEquationIndices; - getEquationBeginIndices(eq, writingEquationIndices); + for (Equation eq = 0; eq < numOfVectorizedEquations; ++eq) { + Variable writtenVariable = getWrittenVariable(eq); - std::vector writtenVariableIndices( - getVariableRank(writtenVariable)); + if (writtenVariable == variable) { + std::vector writingEquationIndices; + getEquationBeginIndices(eq, writingEquationIndices); - AccessFunction writeAccessFunction = getWriteAccessFunction(eq); + std::vector writtenVariableIndices( + getVariableRank(writtenVariable)); - do { - writeAccessFunction(writingEquationIndices.data(), - writtenVariableIndices.data()); - - if (writtenVariableIndices == variableIndices) { - assert(!found && - "Multiple equations writing to the same variable"); - found = true; - equation = eq; - equationIndices = writingEquationIndices; - } - } while (advanceEquationIndices( - writingEquationIndices, equationRanges[eq])); - } + AccessFunction writeAccessFunction = getWriteAccessFunction(eq); + + do { + writeAccessFunction(writingEquationIndices.data(), + writtenVariableIndices.data()); + + if (writtenVariableIndices == variableIndices) { + assert(!found && "Multiple equations writing to the same variable"); + found = true; + equation = eq; + equationIndices = writingEquationIndices; + } + } while ( + advanceEquationIndices(writingEquationIndices, equationRanges[eq])); } + } - assert(found && "Writing equation not found"); + assert(found && "Writing equation not found"); +} + +void IDAInstance::printVariablesVector(N_Vector variables) const { + realtype *data = N_VGetArrayPointer(variables); + uint64_t numOfArrayVariables = getNumOfArrayVariables(); + + for (Variable var = 0; var < numOfArrayVariables; ++var) { + std::vector indices; + getVariableBeginIndices(var, indices); + + do { + std::cerr << "var " << var << " "; + printIndices(indices); + std::cerr << "\t" << std::fixed << std::setprecision(9) << *data + << std::endl; + ++data; + } while (advanceVariableIndices(indices, variablesDimensions[var])); } +} - void IDAInstance::printVariablesVector(N_Vector variables) const - { - realtype* data = N_VGetArrayPointer(variables); - uint64_t numOfArrayVariables = getNumOfArrayVariables(); +void IDAInstance::printDerivativesVector(N_Vector derivatives) const { + realtype *data = N_VGetArrayPointer(derivatives); + uint64_t numOfArrayVariables = getNumOfArrayVariables(); - for (Variable var = 0; var < numOfArrayVariables; ++var) { + for (Variable var = 0; var < numOfArrayVariables; ++var) { + auto it = stateVariablesMapping.find(var); + + if (it != stateVariablesMapping.end()) { std::vector indices; getVariableBeginIndices(var, indices); do { - std::cerr << "var " << var << " "; + std::cerr << "der(var " << var << ") "; printIndices(indices); - std::cerr << "\t" << std::fixed << std::setprecision(9) - << *data << std::endl; + std::cerr << "\t" << std::fixed << std::setprecision(9) << *data + << std::endl; ++data; } while (advanceVariableIndices(indices, variablesDimensions[var])); } } +} - void IDAInstance::printDerivativesVector(N_Vector derivatives) const - { - realtype* data = N_VGetArrayPointer(derivatives); - uint64_t numOfArrayVariables = getNumOfArrayVariables(); +void IDAInstance::printResidualsVector(N_Vector residuals) const { + realtype *data = N_VGetArrayPointer(residuals); + uint64_t numOfArrayVariables = getNumOfArrayVariables(); - for (Variable var = 0; var < numOfArrayVariables; ++var) { - auto it = stateVariablesMapping.find(var); + for (Variable var = 0; var < numOfArrayVariables; ++var) { + std::vector variableIndices; + getVariableBeginIndices(var, variableIndices); - if (it != stateVariablesMapping.end()) { - std::vector indices; - getVariableBeginIndices(var, indices); + do { + Equation eq; + std::vector equationIndices; + getWritingEquation(var, variableIndices, eq, equationIndices); - do { - std::cerr << "der(var " << var << ") "; - printIndices(indices); - std::cerr << "\t" << std::fixed << std::setprecision(9) - << *data << std::endl; - ++data; - } while (advanceVariableIndices(indices, variablesDimensions[var])); - } - } + std::cerr << "eq " << eq << " "; + printIndices(equationIndices); + std::cerr << " (writing to var " << var; + printIndices(variableIndices); + std::cerr << ")" << "\t" << std::fixed << std::setprecision(9) << *data + << "\n"; + ++data; + } while (advanceVariableIndices(variableIndices, variablesDimensions[var])); } +} - void IDAInstance::printResidualsVector(N_Vector residuals) const - { - realtype* data = N_VGetArrayPointer(residuals); - uint64_t numOfArrayVariables = getNumOfArrayVariables(); +// Highly inefficient, use only for debug purposes. +static double getCellFromSparseMatrix(SUNMatrix matrix, uint64_t rowIndex, + uint64_t columnIndex) { + realtype *data = SUNSparseMatrix_Data(matrix); - for (Variable var = 0; var < numOfArrayVariables; ++var) { - std::vector variableIndices; - getVariableBeginIndices(var, variableIndices); + sunindextype *rowPtrs = SUNSparseMatrix_IndexPointers(matrix); + sunindextype *columnIndices = SUNSparseMatrix_IndexValues(matrix); - do { - Equation eq; - std::vector equationIndices; - getWritingEquation(var, variableIndices, eq, equationIndices); + sunindextype beginIndex = rowPtrs[rowIndex]; + sunindextype endIndex = rowPtrs[rowIndex + 1]; - std::cerr << "eq " << eq << " "; - printIndices(equationIndices); - std::cerr << " (writing to var " << var; - printIndices(variableIndices); - std::cerr << ")" << "\t" << std::fixed << std::setprecision(9) - << *data << "\n"; - ++data; - } while (advanceVariableIndices( - variableIndices, variablesDimensions[var])); + for (sunindextype i = beginIndex; i < endIndex; ++i) { + if (columnIndices[i] == static_cast(columnIndex)) { + return data[i]; } } - // Highly inefficient, use only for debug purposes. - static double getCellFromSparseMatrix( - SUNMatrix matrix, - uint64_t rowIndex, - uint64_t columnIndex) - { - realtype* data = SUNSparseMatrix_Data(matrix); - - sunindextype* rowPtrs = SUNSparseMatrix_IndexPointers(matrix); - sunindextype* columnIndices = SUNSparseMatrix_IndexValues(matrix); + return 0; +} - sunindextype beginIndex = rowPtrs[rowIndex]; - sunindextype endIndex = rowPtrs[rowIndex + 1]; +void IDAInstance::printJacobianMatrix(SUNMatrix jacobianMatrix) const { + uint64_t numOfArrayVariables = getNumOfArrayVariables(); - for (sunindextype i = beginIndex; i < endIndex; ++i) { - if (columnIndices[i] == static_cast(columnIndex)) { - return data[i]; - } - } + // Print the heading row. + for (Variable var = 0; var < numOfArrayVariables; ++var) { + std::vector variableIndices; + getVariableBeginIndices(var, variableIndices); - return 0; + do { + std::cerr << "\tvar " << var << " "; + printIndices(variableIndices); + } while (advanceVariableIndices(variableIndices, variablesDimensions[var])); } - void IDAInstance::printJacobianMatrix(SUNMatrix jacobianMatrix) const - { - uint64_t numOfArrayVariables = getNumOfArrayVariables(); - - // Print the heading row. - for (Variable var = 0; var < numOfArrayVariables; ++var) { - std::vector variableIndices; - getVariableBeginIndices(var, variableIndices); - - do { - std::cerr << "\tvar " << var << " "; - printIndices(variableIndices); - } while (advanceVariableIndices( - variableIndices, variablesDimensions[var])); - } - - std::cerr << std::endl; + std::cerr << std::endl; - // Print the rows containing the values. - uint64_t rowFlatIndex = 0; + // Print the rows containing the values. + uint64_t rowFlatIndex = 0; - for (Variable eqVar = 0; eqVar < numOfArrayVariables; ++eqVar) { - std::vector eqVarIndices; - getVariableBeginIndices(eqVar, eqVarIndices); + for (Variable eqVar = 0; eqVar < numOfArrayVariables; ++eqVar) { + std::vector eqVarIndices; + getVariableBeginIndices(eqVar, eqVarIndices); - do { - Equation eq; - std::vector equationIndices; - getWritingEquation(eqVar, eqVarIndices, eq, equationIndices); + do { + Equation eq; + std::vector equationIndices; + getWritingEquation(eqVar, eqVarIndices, eq, equationIndices); - std::cerr << "eq " << eq << " "; - printIndices(equationIndices); - std::cerr << " (writing to var " << eqVar << " "; - printIndices(eqVarIndices); - std::cerr << ")"; + std::cerr << "eq " << eq << " "; + printIndices(equationIndices); + std::cerr << " (writing to var " << eqVar << " "; + printIndices(eqVarIndices); + std::cerr << ")"; - uint64_t columnFlatIndex = 0; + uint64_t columnFlatIndex = 0; - for (Variable indVar = 0; indVar < numOfArrayVariables; ++indVar) { - std::vector indVarIndices; - getVariableBeginIndices(indVar, indVarIndices); + for (Variable indVar = 0; indVar < numOfArrayVariables; ++indVar) { + std::vector indVarIndices; + getVariableBeginIndices(indVar, indVarIndices); - do { - auto value = getCellFromSparseMatrix( - jacobianMatrix, rowFlatIndex, columnFlatIndex); + do { + auto value = getCellFromSparseMatrix(jacobianMatrix, rowFlatIndex, + columnFlatIndex); - std::cerr << "\t" << std::fixed << std::setprecision(9) << value; - columnFlatIndex++; - } while (advanceVariableIndices( - indVarIndices, variablesDimensions[indVar])); - } + std::cerr << "\t" << std::fixed << std::setprecision(9) << value; + columnFlatIndex++; + } while ( + advanceVariableIndices(indVarIndices, variablesDimensions[indVar])); + } - std::cerr << std::endl; - rowFlatIndex++; - } while (advanceVariableIndices( - eqVarIndices, variablesDimensions[eqVar])); - } + std::cerr << std::endl; + rowFlatIndex++; + } while (advanceVariableIndices(eqVarIndices, variablesDimensions[eqVar])); } } +} // namespace marco::runtime::sundials::ida //===---------------------------------------------------------------------===// // Exported functions @@ -2197,10 +2157,9 @@ namespace marco::runtime::sundials::ida //===---------------------------------------------------------------------===// // idaCreate -static void* idaCreate_pvoid() -{ - auto* instance = new IDAInstance(); - return static_cast(instance); +static void *idaCreate_pvoid() { + auto *instance = new IDAInstance(); + return static_cast(instance); } RUNTIME_FUNC_DEF(idaCreate, PTR(void)) @@ -2208,9 +2167,8 @@ RUNTIME_FUNC_DEF(idaCreate, PTR(void)) //===---------------------------------------------------------------------===// // idaCalcIC -static void idaCalcIC_void(void* instance) -{ - [[maybe_unused]] bool result = static_cast(instance)->calcIC(); +static void idaCalcIC_void(void *instance) { + [[maybe_unused]] bool result = static_cast(instance)->calcIC(); assert(result && "Can't compute the initial values of the variables"); } @@ -2219,9 +2177,8 @@ RUNTIME_FUNC_DEF(idaCalcIC, void, PTR(void)) //===---------------------------------------------------------------------===// // idaStep -static void idaStep_void(void* instance) -{ - [[maybe_unused]] bool result = static_cast(instance)->step(); +static void idaStep_void(void *instance) { + [[maybe_unused]] bool result = static_cast(instance)->step(); assert(result && "IDA step failed"); } @@ -2230,9 +2187,8 @@ RUNTIME_FUNC_DEF(idaStep, void, PTR(void)) //===---------------------------------------------------------------------===// // idaFree -static void idaFree_void(void* instance) -{ - delete static_cast(instance); +static void idaFree_void(void *instance) { + delete static_cast(instance); } RUNTIME_FUNC_DEF(idaFree, void, PTR(void)) @@ -2240,9 +2196,8 @@ RUNTIME_FUNC_DEF(idaFree, void, PTR(void)) //===---------------------------------------------------------------------===// // idaSetStartTime -static void idaSetStartTime_void(void* instance, double startTime) -{ - static_cast(instance)->setStartTime(startTime); +static void idaSetStartTime_void(void *instance, double startTime) { + static_cast(instance)->setStartTime(startTime); } RUNTIME_FUNC_DEF(idaSetStartTime, void, PTR(void), double) @@ -2250,9 +2205,8 @@ RUNTIME_FUNC_DEF(idaSetStartTime, void, PTR(void), double) //===---------------------------------------------------------------------===// // idaSetEndTime -static void idaSetEndTime_void(void* instance, double endTime) -{ - static_cast(instance)->setEndTime(endTime); +static void idaSetEndTime_void(void *instance, double endTime) { + static_cast(instance)->setEndTime(endTime); } RUNTIME_FUNC_DEF(idaSetEndTime, void, PTR(void), double) @@ -2260,9 +2214,8 @@ RUNTIME_FUNC_DEF(idaSetEndTime, void, PTR(void), double) //===---------------------------------------------------------------------===// // idaSetTimeStep -static void idaSetTimeStep_void(void* instance, double timeStep) -{ - static_cast(instance)->setTimeStep(timeStep); +static void idaSetTimeStep_void(void *instance, double timeStep) { + static_cast(instance)->setTimeStep(timeStep); } RUNTIME_FUNC_DEF(idaSetTimeStep, void, PTR(void), double) @@ -2270,10 +2223,9 @@ RUNTIME_FUNC_DEF(idaSetTimeStep, void, PTR(void), double) //===---------------------------------------------------------------------===// // idaGetCurrentTime -static double idaGetCurrentTime_f64(void* instance) -{ +static double idaGetCurrentTime_f64(void *instance) { return static_cast( - static_cast(instance)->getCurrentTime()); + static_cast(instance)->getCurrentTime()); } RUNTIME_FUNC_DEF(idaGetCurrentTime, double, PTR(void)) @@ -2281,93 +2233,75 @@ RUNTIME_FUNC_DEF(idaGetCurrentTime, double, PTR(void)) //===---------------------------------------------------------------------===// // idaAddAlgebraicVariable -static uint64_t idaAddAlgebraicVariable_i64( - void* instance, - uint64_t rank, - uint64_t* dimensions, - void* getter, - void* setter, - void* name) -{ - return static_cast(instance)->addAlgebraicVariable( - rank, dimensions, - reinterpret_cast(getter), +static uint64_t idaAddAlgebraicVariable_i64(void *instance, uint64_t rank, + uint64_t *dimensions, void *getter, + void *setter, void *name) { + return static_cast(instance)->addAlgebraicVariable( + rank, dimensions, reinterpret_cast(getter), reinterpret_cast(setter), - static_cast(name)); + static_cast(name)); } -RUNTIME_FUNC_DEF(idaAddAlgebraicVariable, uint64_t, PTR(void), uint64_t, PTR(uint64_t), PTR(void), PTR(void), PTR(void)) +RUNTIME_FUNC_DEF(idaAddAlgebraicVariable, uint64_t, PTR(void), uint64_t, + PTR(uint64_t), PTR(void), PTR(void), PTR(void)) //===---------------------------------------------------------------------===// // idaAddStateVariable -static uint64_t idaAddStateVariable_i64( - void* instance, - uint64_t rank, - uint64_t* dimensions, - void* stateGetter, - void* stateSetter, - void* derivativeGetter, - void* derivativeSetter, - void* name) -{ - return static_cast(instance)->addStateVariable( - rank, dimensions, - reinterpret_cast(stateGetter), +static uint64_t idaAddStateVariable_i64(void *instance, uint64_t rank, + uint64_t *dimensions, void *stateGetter, + void *stateSetter, + void *derivativeGetter, + void *derivativeSetter, void *name) { + return static_cast(instance)->addStateVariable( + rank, dimensions, reinterpret_cast(stateGetter), reinterpret_cast(stateSetter), reinterpret_cast(derivativeGetter), reinterpret_cast(derivativeSetter), - static_cast(name)); + static_cast(name)); } -RUNTIME_FUNC_DEF(idaAddStateVariable, uint64_t, PTR(void), uint64_t, PTR(uint64_t), PTR(void), PTR(void), PTR(void), PTR(void), PTR(void)) +RUNTIME_FUNC_DEF(idaAddStateVariable, uint64_t, PTR(void), uint64_t, + PTR(uint64_t), PTR(void), PTR(void), PTR(void), PTR(void), + PTR(void)) //===---------------------------------------------------------------------===// // idaAddVariableAccess -static void idaAddVariableAccess_void( - void* instance, - uint64_t equationIndex, - uint64_t variableIndex, - void* accessFunction) -{ - static_cast(instance)->addVariableAccess( +static void idaAddVariableAccess_void(void *instance, uint64_t equationIndex, + uint64_t variableIndex, + void *accessFunction) { + static_cast(instance)->addVariableAccess( equationIndex, variableIndex, reinterpret_cast(accessFunction)); } -RUNTIME_FUNC_DEF(idaAddVariableAccess, void, PTR(void), uint64_t, uint64_t, PTR(void)) +RUNTIME_FUNC_DEF(idaAddVariableAccess, void, PTR(void), uint64_t, uint64_t, + PTR(void)) //===---------------------------------------------------------------------===// // idaAddEquation -static uint64_t idaAddEquation_i64( - void* instance, - int64_t* ranges, - uint64_t rank, - uint64_t writtenVariable, - void* writeAccessFunction, - void* stringRepresentation) -{ - return static_cast(instance)->addEquation( +static uint64_t idaAddEquation_i64(void *instance, int64_t *ranges, + uint64_t rank, uint64_t writtenVariable, + void *writeAccessFunction, + void *stringRepresentation) { + return static_cast(instance)->addEquation( ranges, rank, writtenVariable, reinterpret_cast(writeAccessFunction), - static_cast(stringRepresentation)); + static_cast(stringRepresentation)); } -RUNTIME_FUNC_DEF(idaAddEquation, uint64_t, PTR(void), PTR(int64_t), uint64_t, uint64_t, PTR(void), PTR(void)) +RUNTIME_FUNC_DEF(idaAddEquation, uint64_t, PTR(void), PTR(int64_t), uint64_t, + uint64_t, PTR(void), PTR(void)) //===---------------------------------------------------------------------===// // idaSetResidual -static void idaSetResidual_void( - void* instance, - uint64_t equationIndex, - void* residualFunction) -{ - static_cast(instance)->setResidualFunction( - equationIndex, - reinterpret_cast(residualFunction)); +static void idaSetResidual_void(void *instance, uint64_t equationIndex, + void *residualFunction) { + static_cast(instance)->setResidualFunction( + equationIndex, reinterpret_cast(residualFunction)); } RUNTIME_FUNC_DEF(idaSetResidual, void, PTR(void), uint64_t, PTR(void)) @@ -2375,13 +2309,10 @@ RUNTIME_FUNC_DEF(idaSetResidual, void, PTR(void), uint64_t, PTR(void)) //===---------------------------------------------------------------------===// // idaAddJacobian -static void idaAddJacobian_void( - void* instance, - uint64_t equationIndex, - uint64_t variableIndex, - void* jacobianFunction) -{ - static_cast(instance)->addJacobianFunction( +static void idaAddJacobian_void(void *instance, uint64_t equationIndex, + uint64_t variableIndex, + void *jacobianFunction) { + static_cast(instance)->addJacobianFunction( equationIndex, variableIndex, reinterpret_cast(jacobianFunction)); } @@ -2391,9 +2322,8 @@ RUNTIME_FUNC_DEF(idaAddJacobian, void, PTR(void), uint64_t, uint64_t, PTR(void)) //===---------------------------------------------------------------------===// // idaPrintStatistics -static void printStatistics_void(void* instance) -{ - static_cast(instance)->printStatistics(); +static void printStatistics_void(void *instance) { + static_cast(instance)->printStatistics(); } RUNTIME_FUNC_DEF(printStatistics, void, PTR(void)) From ad9c7c1ea874a9d53c08fcb16431e1ee21c37f9e Mon Sep 17 00:00:00 2001 From: Michele Scuttari Date: Tue, 8 Oct 2024 13:11:32 +0200 Subject: [PATCH 04/14] Update LLVM commit --- .jenkins/version_llvm.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.jenkins/version_llvm.txt b/.jenkins/version_llvm.txt index 626cfcf3e..09db847b1 100644 --- a/.jenkins/version_llvm.txt +++ b/.jenkins/version_llvm.txt @@ -1 +1 @@ -969ff8ef9aba8c17ddb53fd44bd8d3b82d47b3fa \ No newline at end of file +324b30d4cb13e3f9dde0163fdc647e5a1b420fbb \ No newline at end of file From b3f47667f07c431c03ff1d9d6019334824ee4bed Mon Sep 17 00:00:00 2001 From: Michele Scuttari Date: Tue, 8 Oct 2024 17:10:28 +0200 Subject: [PATCH 05/14] Disable equations split when computing the Jacobian matrix to avoid race conditions on the seeds --- include/marco/Runtime/Solvers/IDA/Instance.h | 19 +- .../marco/Runtime/Solvers/KINSOL/Instance.h | 11 +- lib/Solvers/IDA/Instance.cpp | 193 ++++++------------ lib/Solvers/KINSOL/Instance.cpp | 135 +++++++----- 4 files changed, 162 insertions(+), 196 deletions(-) diff --git a/include/marco/Runtime/Solvers/IDA/Instance.h b/include/marco/Runtime/Solvers/IDA/Instance.h index 0b7fd43c2..d4e9d174b 100644 --- a/include/marco/Runtime/Solvers/IDA/Instance.h +++ b/include/marco/Runtime/Solvers/IDA/Instance.h @@ -169,7 +169,7 @@ namespace marco::runtime::sundials::ida void computeNNZ(); - void computeThreadChunks(); + void computeResidualThreadChunks(); void copyVariablesFromMARCO( N_Vector algebraicAndStateVariablesVector, @@ -179,15 +179,7 @@ namespace marco::runtime::sundials::ida N_Vector algebraicAndStateVariablesVector, N_Vector derivativeVariablesVector); - void vectorEquationsParallelIteration( - std::function processFn); - - void scalarEquationsParallelIteration( - std::function& equationIndices)> processFn); - - void equationsParallelIteration( + void residualsParallelIteration( std::function& equationIndices)> processFn); @@ -354,19 +346,20 @@ namespace marco::runtime::sundials::ida // Thread pool. ThreadPool threadPool; - // A chunk of equations to be processed by a thread. + // A chunk of equations to be processed by a thread while computing the + // residual values. // A chunk is composed of: // - the identifier (position) of the equation. // - the begin indices (included) // - the end indices (excluded) - using ThreadEquationsChunk = std::tuple< + using ResidualThreadEquationsChunk = std::tuple< Equation, std::vector, std::vector>; // The list of chunks the threads will process. Each thread elaborates // one chunk at a time. // The information is computed only once during the initialization to // save time during the actual simulation. - std::vector threadEquationsChunks; + std::vector residualThreadEquationsChunks; }; } diff --git a/include/marco/Runtime/Solvers/KINSOL/Instance.h b/include/marco/Runtime/Solvers/KINSOL/Instance.h index c42cbd4bf..684911993 100644 --- a/include/marco/Runtime/Solvers/KINSOL/Instance.h +++ b/include/marco/Runtime/Solvers/KINSOL/Instance.h @@ -119,13 +119,13 @@ namespace marco::runtime::sundials::kinsol void computeNNZ(); - void computeThreadChunks(); + void computeResidualThreadChunks(); void copyVariablesFromMARCO(N_Vector variables); void copyVariablesIntoMARCO(N_Vector variables); - void equationsParallelIteration( + void residualsParallelIteration( std::function& equationIndices)> processFn); @@ -259,19 +259,20 @@ namespace marco::runtime::sundials::kinsol // Thread pool. ThreadPool threadPool; - // A chunk of equations to be processed by a thread. + // A chunk of equations to be processed by a thread while computing the + // residual values. // A chunk is composed of: // - the identifier (position) of the equation. // - the begin indices (included) // - the end indices (exluded) - using ThreadEquationsChunk = std::tuple< + using ResidualThreadEquationsChunk = std::tuple< Equation, std::vector, std::vector>; // The list of chunks the threads will process. Each thread elaborates // one chunk at a time. // The information is computed only once during the initialization to // save time during the actual simulation. - std::vector threadEquationsChunks; + std::vector residualThreadEquationsChunks; }; } diff --git a/lib/Solvers/IDA/Instance.cpp b/lib/Solvers/IDA/Instance.cpp index c3534af3b..59f8a0217 100644 --- a/lib/Solvers/IDA/Instance.cpp +++ b/lib/Solvers/IDA/Instance.cpp @@ -592,8 +592,8 @@ bool IDAInstance::initialize() { // Compute the total amount of non-zero values in the Jacobian Matrix. computeNNZ(); - // Compute the equation chunks for each thread. - computeThreadChunks(); + // Compute the workload for each thread. + computeResidualThreadChunks(); // Initialize the values of the variables living inside IDA. copyVariablesFromMARCO(variablesVector, derivativesVector); @@ -887,7 +887,7 @@ int IDAInstance::residualFunction(realtype time, N_Vector variables, // it writes into. IDA_PROFILER_RESIDUALS_START; - instance->equationsParallelIteration( + instance->residualsParallelIteration( [&](Equation eq, const std::vector &equationIndices) { uint64_t equationRank = instance->getEquationRank(eq); assert(equationIndices.size() == equationRank); @@ -952,13 +952,23 @@ int IDAInstance::jacobianMatrix(realtype time, realtype alpha, // the current iteration values. instance->copyVariablesIntoMARCO(variables, derivatives); - // For every vectorized equation, compute its row within the Jacobian - // matrix. IDA_PROFILER_PARTIAL_DERIVATIVES_START; - instance->equationsParallelIteration( - [&](Equation eq, const std::vector &equationIndices) { - Variable writtenVariable = instance->getWrittenVariable(eq); + unsigned int numOfThreads = instance->threadPool.getNumOfThreads(); + + std::atomic_size_t currentEquation = 0; + uint64_t numOfVectorizedEquations = instance->getNumOfVectorizedEquations(); + + for (unsigned int thread = 0; thread < numOfThreads; ++thread) { + instance->threadPool.async([&]() { + size_t equationIndex = 0; + Equation equation; + std::vector equationIndices; + + while ((equationIndex = currentEquation++) < numOfVectorizedEquations) { + equation = instance->equationsProcessingOrder[equationIndex]; + instance->getEquationBeginIndices(equation, equationIndices); + Variable writtenVariable = instance->getWrittenVariable(equation); uint64_t writtenVariableArrayOffset = instance->variableOffsets[writtenVariable]; @@ -970,52 +980,62 @@ int IDAInstance::jacobianMatrix(realtype time, realtype alpha, writtenVariableIndices.resize(writtenVariableRank, 0); AccessFunction writeAccessFunction = - instance->getWriteAccessFunction(eq); + instance->getWriteAccessFunction(equation); - writeAccessFunction(equationIndices.data(), - writtenVariableIndices.data()); + do { + writeAccessFunction(equationIndices.data(), + writtenVariableIndices.data()); - uint64_t writtenVariableScalarOffset = - getVariableFlatIndex(instance->variablesDimensions[writtenVariable], - writtenVariableIndices); + uint64_t writtenVariableScalarOffset = getVariableFlatIndex( + instance->variablesDimensions[writtenVariable], + writtenVariableIndices); - uint64_t scalarEquationIndex = - writtenVariableArrayOffset + writtenVariableScalarOffset; + uint64_t scalarEquationIndex = + writtenVariableArrayOffset + writtenVariableScalarOffset; - assert(scalarEquationIndex < instance->getNumOfScalarEquations()); + assert(scalarEquationIndex < instance->getNumOfScalarEquations()); - // Compute the column indexes that may be non-zeros. - std::vector jacobianColumns = - instance->computeJacobianColumns(eq, equationIndices.data()); + // Compute the column indices that may be non-zero. + std::vector jacobianColumns = + instance->computeJacobianColumns(equation, + equationIndices.data()); - // For every scalar variable with respect to which the equation must be - // partially differentiated. - for (size_t i = 0, e = jacobianColumns.size(); i < e; ++i) { - const JacobianColumn &column = jacobianColumns[i]; - Variable variable = column.first; - const auto &variableIndices = column.second; + // For every scalar variable with respect to which the equation must + // be partially differentiated. + for (size_t i = 0, e = jacobianColumns.size(); i < e; ++i) { + const JacobianColumn &column = jacobianColumns[i]; + Variable variable = column.first; + const auto &variableIndices = column.second; - uint64_t variableArrayOffset = instance->variableOffsets[variable]; + uint64_t variableArrayOffset = instance->variableOffsets[variable]; - uint64_t variableScalarOffset = getVariableFlatIndex( - instance->variablesDimensions[variable], column.second); + uint64_t variableScalarOffset = getVariableFlatIndex( + instance->variablesDimensions[variable], column.second); - assert(instance->jacobianFunctions[eq][variable] != nullptr); + assert(instance->jacobianFunctions[equation][variable] != nullptr); - auto jacobianFunctionResult = - instance->jacobianFunctions[eq][variable]( - time, equationIndices.data(), variableIndices.data(), alpha); + auto jacobianFunctionResult = + instance->jacobianFunctions[equation][variable]( + time, equationIndices.data(), variableIndices.data(), + alpha); - instance->jacobianMatrixData[scalarEquationIndex][i].second = - jacobianFunctionResult; + instance->jacobianMatrixData[scalarEquationIndex][i].second = + jacobianFunctionResult; - auto index = static_cast(variableArrayOffset + - variableScalarOffset); + auto index = static_cast(variableArrayOffset + + variableScalarOffset); - instance->jacobianMatrixData[scalarEquationIndex][i].first = index; - } - }); + instance->jacobianMatrixData[scalarEquationIndex][i].first = index; + } + } while (advanceEquationIndices(equationIndices, + instance->equationRanges[equation])); + } + }); + } + + instance->threadPool.wait(); + // Move the partial derivatives into the SUNDIALS sparse matrix. sunindextype *rowPtrs = SUNSparseMatrix_IndexPointers(jacobianMatrix); sunindextype *columnIndices = SUNSparseMatrix_IndexValues(jacobianMatrix); @@ -1222,7 +1242,7 @@ void IDAInstance::computeNNZ() { } } -void IDAInstance::computeThreadChunks() { +void IDAInstance::computeResidualThreadChunks() { unsigned int numOfThreads = threadPool.getNumOfThreads(); int64_t chunksFactor = getOptions().equationsChunksFactor; @@ -1262,8 +1282,8 @@ void IDAInstance::computeThreadChunks() { equationRanges[equation]); } - threadEquationsChunks.emplace_back(equation, std::move(beginIndices), - std::move(endIndices)); + residualThreadEquationsChunks.emplace_back( + equation, std::move(beginIndices), std::move(endIndices)); // Move to the next chunk. equationFlatIndex = endFlatIndex; @@ -1409,85 +1429,7 @@ void IDAInstance::copyVariablesIntoMARCO( IDA_PROFILER_COPY_VARS_INTO_MARCO_STOP; } -void IDAInstance::vectorEquationsParallelIteration( - std::function processFn) { - std::mutex mutex; - size_t processedEquations = 0; - - // Function to move to the next equation. - auto getEquationAndAdvance = [&](Equation &eq) { - std::lock_guard lockGuard(mutex); - - if (processedEquations >= getNumOfVectorizedEquations()) { - return false; - } - - eq = equationsProcessingOrder[processedEquations++]; - return true; - }; - - for (unsigned int i = 0, e = threadPool.getNumOfThreads(); i < e; ++i) { - threadPool.async([&]() { - Equation equation; - - while (getEquationAndAdvance(equation)) { - processFn(equation); - } - }); - } - - threadPool.wait(); -} - -void IDAInstance::scalarEquationsParallelIteration( - std::function &equationIndices)> - processFn) { - size_t processedEquations = 0; - std::vector equationIndices; - std::mutex mutex; - - // Function to advance the indices by one, or move to the next equation if - // the current one has been fully visited. - auto getEquationAndAdvance = [&](Equation &eq, - std::vector &indices) { - std::lock_guard lockGuard(mutex); - - if (processedEquations >= getNumOfVectorizedEquations()) { - return false; - } - - eq = equationsProcessingOrder[processedEquations]; - indices = equationIndices; - - if (!advanceEquationIndices(equationIndices, equationRanges[eq])) { - if (++processedEquations < getNumOfVectorizedEquations()) { - getEquationBeginIndices(equationsProcessingOrder[processedEquations], - equationIndices); - } - } - - return true; - }; - - getEquationBeginIndices(equationsProcessingOrder[processedEquations], - equationIndices); - - for (unsigned int i = 0, e = threadPool.getNumOfThreads(); i < e; ++i) { - threadPool.async([&]() { - Equation equation; - std::vector indices; - - while (getEquationAndAdvance(equation, indices)) { - processFn(equation, indices); - } - }); - } - - threadPool.wait(); -} - -void IDAInstance::equationsParallelIteration( +void IDAInstance::residualsParallelIteration( std::function &equationIndices)> processFn) { @@ -1499,9 +1441,10 @@ void IDAInstance::equationsParallelIteration( threadPool.async([&]() { size_t assignedChunk; - while ((assignedChunk = chunkIndex++) < threadEquationsChunks.size()) { - const ThreadEquationsChunk &chunk = - threadEquationsChunks[assignedChunk]; + while ((assignedChunk = chunkIndex++) < + residualThreadEquationsChunks.size()) { + const ResidualThreadEquationsChunk &chunk = + residualThreadEquationsChunks[assignedChunk]; Equation equation = std::get<0>(chunk); std::vector equationIndices = std::get<1>(chunk); diff --git a/lib/Solvers/KINSOL/Instance.cpp b/lib/Solvers/KINSOL/Instance.cpp index 0dcfccd0c..9ff592643 100644 --- a/lib/Solvers/KINSOL/Instance.cpp +++ b/lib/Solvers/KINSOL/Instance.cpp @@ -466,7 +466,7 @@ namespace marco::runtime::sundials::kinsol computeNNZ(); // Compute the equation chunks for each thread. - computeThreadChunks(); + computeResidualThreadChunks(); // Create and initialize the memory for KINSOL. #if SUNDIALS_VERSION_MAJOR >= 6 @@ -586,7 +586,7 @@ namespace marco::runtime::sundials::kinsol // it writes into. KINSOL_PROFILER_RESIDUALS_START; - instance->equationsParallelIteration( + instance->residualsParallelIteration( [&](Equation eq, const std::vector& equationIndices) { uint64_t equationRank = instance->getEquationRank(eq); assert(equationIndices.size() == equationRank); @@ -651,13 +651,23 @@ namespace marco::runtime::sundials::kinsol // the current iteration values. instance->copyVariablesIntoMARCO(variables); - // For every vectorized equation, compute its row within the Jacobian - // matrix. KINSOL_PROFILER_PARTIAL_DERIVATIVES_START; - instance->equationsParallelIteration( - [&](Equation eq, const std::vector& equationIndices) { - Variable writtenVariable = instance->getWrittenVariable(eq); + unsigned int numOfThreads = instance->threadPool.getNumOfThreads(); + + std::atomic_size_t currentEquation = 0; + uint64_t numOfVectorizedEquations = instance->getNumOfVectorizedEquations(); + + for (unsigned int thread = 0; thread < numOfThreads; ++thread) { + instance->threadPool.async([&]() { + size_t equationIndex = 0; + Equation equation; + std::vector equationIndices; + + while ((equationIndex = currentEquation++) < numOfVectorizedEquations) { + equation = instance->equationsProcessingOrder[equationIndex]; + instance->getEquationBeginIndices(equation, equationIndices); + Variable writtenVariable = instance->getWrittenVariable(equation); uint64_t writtenVariableArrayOffset = instance->variableOffsets[writtenVariable]; @@ -669,55 +679,59 @@ namespace marco::runtime::sundials::kinsol writtenVariableIndices.resize(writtenVariableRank, 0); AccessFunction writeAccessFunction = - instance->getWriteAccessFunction(eq); + instance->getWriteAccessFunction(equation); - writeAccessFunction( - equationIndices.data(), - writtenVariableIndices.data()); + do { + writeAccessFunction(equationIndices.data(), + writtenVariableIndices.data()); - uint64_t writtenVariableScalarOffset = getVariableFlatIndex( - instance->variablesDimensions[writtenVariable], - writtenVariableIndices); + uint64_t writtenVariableScalarOffset = getVariableFlatIndex( + instance->variablesDimensions[writtenVariable], + writtenVariableIndices); - uint64_t scalarEquationIndex = - writtenVariableArrayOffset + writtenVariableScalarOffset; + uint64_t scalarEquationIndex = + writtenVariableArrayOffset + writtenVariableScalarOffset; - assert(scalarEquationIndex < instance->getNumOfScalarEquations()); + assert(scalarEquationIndex < instance->getNumOfScalarEquations()); - // Compute the column indexes that may be non-zeros. - std::vector jacobianColumns = - instance->computeJacobianColumns(eq, equationIndices.data()); + // Compute the column indices that may be non-zero. + std::vector jacobianColumns = + instance->computeJacobianColumns(equation, + equationIndices.data()); - // For every scalar variable with respect to which the equation must be - // partially differentiated. - for (size_t i = 0, e = jacobianColumns.size(); i < e; ++i) { - const JacobianColumn& column = jacobianColumns[i]; - Variable variable = column.first; - const auto& variableIndices = column.second; + // For every scalar variable with respect to which the equation must + // be partially differentiated. + for (size_t i = 0, e = jacobianColumns.size(); i < e; ++i) { + const JacobianColumn &column = jacobianColumns[i]; + Variable variable = column.first; + const auto &variableIndices = column.second; - uint64_t variableArrayOffset = instance->variableOffsets[variable]; + uint64_t variableArrayOffset = instance->variableOffsets[variable]; - uint64_t variableScalarOffset = getVariableFlatIndex( - instance->variablesDimensions[variable], - column.second); + uint64_t variableScalarOffset = getVariableFlatIndex( + instance->variablesDimensions[variable], column.second); - assert(instance->jacobianFunctions[eq][variable] != nullptr); + assert(instance->jacobianFunctions[equation][variable] != nullptr); - auto jacobianFunctionResult = - instance->jacobianFunctions[eq][variable]( - equationIndices.data(), - variableIndices.data()); + auto jacobianFunctionResult = + instance->jacobianFunctions[equation][variable]( + equationIndices.data(), variableIndices.data()); - instance->jacobianMatrixData[scalarEquationIndex][i].second = - jacobianFunctionResult; + instance->jacobianMatrixData[scalarEquationIndex][i].second = + jacobianFunctionResult; - auto index = static_cast( - variableArrayOffset + variableScalarOffset); + auto index = static_cast(variableArrayOffset + + variableScalarOffset); - instance->jacobianMatrixData[scalarEquationIndex][i].first = - index; - } - }); + instance->jacobianMatrixData[scalarEquationIndex][i].first = index; + } + } while (advanceEquationIndices(equationIndices, + instance->equationRanges[equation])); + } + }); + } + + instance->threadPool.wait(); sunindextype* rowPtrs = SUNSparseMatrix_IndexPointers(jacobianMatrix); sunindextype* columnIndices = SUNSparseMatrix_IndexValues(jacobianMatrix); @@ -923,7 +937,7 @@ namespace marco::runtime::sundials::kinsol } } - void KINSOLInstance::computeThreadChunks() + void KINSOLInstance::computeResidualThreadChunks() { unsigned int numOfThreads = threadPool.getNumOfThreads(); @@ -966,7 +980,7 @@ namespace marco::runtime::sundials::kinsol endFlatIndex, endIndices, equationRanges[equation]); } - threadEquationsChunks.emplace_back( + residualThreadEquationsChunks.emplace_back( equation, std::move(beginIndices), std::move(endIndices)); // Move to the next chunk. @@ -1065,11 +1079,10 @@ namespace marco::runtime::sundials::kinsol KINSOL_PROFILER_COPY_VARS_INTO_MARCO_STOP; } - void KINSOLInstance::equationsParallelIteration( - std::function& equationIndices)> processFn) - { + void KINSOLInstance::residualsParallelIteration( + std::function &equationIndices)> + processFn) { // Shard the work among multiple threads. unsigned int numOfThreads = threadPool.getNumOfThreads(); std::atomic_size_t chunkIndex = 0; @@ -1078,14 +1091,30 @@ namespace marco::runtime::sundials::kinsol threadPool.async([&]() { size_t assignedChunk; - while ((assignedChunk = chunkIndex++) < threadEquationsChunks.size()) { - const ThreadEquationsChunk& chunk = - threadEquationsChunks[assignedChunk]; + while ((assignedChunk = chunkIndex++) < + residualThreadEquationsChunks.size()) { + const ResidualThreadEquationsChunk &chunk = + residualThreadEquationsChunks[assignedChunk]; Equation equation = std::get<0>(chunk); std::vector equationIndices = std::get<1>(chunk); do { + assert([&]() -> bool { + if (equationIndices.size() != equationRanges[equation].size()) { + return false; + } + + for (size_t i = 0, rank = equationIndices.size(); i < rank; ++i) { + if (equationIndices[i] < equationRanges[equation][i].begin || + equationIndices[i] >= equationRanges[equation][i].end) { + return false; + } + } + + return true; + }() && "Invalid equation indices"); + processFn(equation, equationIndices); } while (advanceEquationIndicesUntil( equationIndices, equationRanges[equation], std::get<2>(chunk))); From a98f5aab3875f9441f565ffc800e57be4360e231 Mon Sep 17 00:00:00 2001 From: Michele Scuttari Date: Tue, 8 Oct 2024 17:10:42 +0200 Subject: [PATCH 06/14] Reformat code --- include/marco/Runtime/Solvers/IDA/Instance.h | 664 +++-- .../marco/Runtime/Solvers/KINSOL/Instance.h | 387 ++- lib/Solvers/KINSOL/Instance.cpp | 2127 ++++++++--------- 3 files changed, 1534 insertions(+), 1644 deletions(-) diff --git a/include/marco/Runtime/Solvers/IDA/Instance.h b/include/marco/Runtime/Solvers/IDA/Instance.h index d4e9d174b..3857456b2 100644 --- a/include/marco/Runtime/Solvers/IDA/Instance.h +++ b/include/marco/Runtime/Solvers/IDA/Instance.h @@ -3,8 +3,8 @@ #ifdef SUNDIALS_ENABLE -#include "marco/Runtime/Solvers/SUNDIALS/Instance.h" #include "ida/ida.h" +#include "marco/Runtime/Solvers/SUNDIALS/Instance.h" #include "nvector/nvector_serial.h" #include "sundials/sundials_config.h" #include "sundials/sundials_types.h" @@ -14,354 +14,326 @@ #include #include -namespace marco::runtime::sundials::ida -{ - enum class VariableKind - { - ALGEBRAIC, - STATE - }; - - /// Signature of residual functions. - /// The 1st argument is the current time. - /// The 2nd argument is a pointer to the list of equation indices. - /// The result is the residual value. - using ResidualFunction = double(*)(double, const int64_t*); - - /// Signature of Jacobian functions. - /// The 1st argument is the current time. - /// The 2nd argument is a pointer to the list of equation indices. - /// The 3rd argument is a pointer to the list of variable indices. - /// The 4th argument is the 'alpha' value. - /// The result is the Jacobian value. - using JacobianFunction = double(*)( - double, const int64_t*, const uint64_t*, double); - - class IDAInstance - { - public: - IDAInstance(); - - ~IDAInstance(); - - void setStartTime(double time); - void setEndTime(double time); - void setTimeStep(double time); - - Variable addAlgebraicVariable( - uint64_t rank, - const uint64_t* dimensions, - VariableGetter getterFunction, - VariableSetter setterFunction, - const char* name); - - Variable addStateVariable( - uint64_t rank, - const uint64_t* dimensions, - VariableGetter stateGetterFunction, - VariableSetter stateSetterFunction, - VariableGetter derivativeGetterFunction, - VariableSetter derivativeSetterFunction, - const char* name); - - /// Add the information about an equation that is handled by IDA. - Equation addEquation( - const int64_t* ranges, - uint64_t rank, - Variable writtenVariable, - AccessFunction writeAccessFunction, - const char* stringRepresentation); - - void addVariableAccess( - Equation equation, - Variable variableIndex, - AccessFunction accessFunction); - - /// Add the function pointer that computes the residual value of an - /// equation. - void setResidualFunction( - Equation equationIndex, - ResidualFunction residualFunction); - - /// Add the function pointer that computes a partial derivative of an - /// equation. - void addJacobianFunction( - Equation equationIndex, - Variable variableIndex, - JacobianFunction jacobianFunction); - - /// Instantiate and initialize all the classes needed by IDA in order to - /// solve the given system of equations. It also sets optional simulation - /// parameters for IDA. It must be called before the first usage of - /// idaStep() and after a call to idaAllocData(). It may fail in case of - /// malformed model. - bool initialize(); - - /// Invoke IDA to perform the computation of the initial values of the - /// variables. Returns true if the computation was successful, false - /// otherwise. - bool calcIC(); - - /// Invoke IDA to perform one step of the computation. If a time step is - /// given, the output will show the variables in an equidistant time grid - /// based on the step time parameter. Otherwise, the output will show the - /// variables at every step of the computation. Returns true if the - /// computation was successful, false otherwise. - bool step(); - - /// Returns the time reached by the solver after the last step. - realtype getCurrentTime() const; - - /// Prints statistics regarding the computation of the system. - void printStatistics() const; - - /// IDAResFn user-defined residual function, passed to IDA through - /// IDAInit. It contains how to compute the Residual Function of the - /// system, starting from the provided UserData struct, iterating through - /// every equation. - static int residualFunction( - realtype time, - N_Vector variables, N_Vector derivatives, N_Vector residuals, - void* userData); - - /// IDALsJacFn user-defined Jacobian approximation function, passed to - /// IDA through IDASetJacFn. It contains how to compute the Jacobian - /// Matrix of the system, starting from the provided UserData struct, - /// iterating through every equation and variable. The matrix is - /// represented in CSR format. - static int jacobianMatrix( - realtype time, - realtype alpha, - N_Vector variables, - N_Vector derivatives, - N_Vector residuals, - SUNMatrix jacobianMatrix, - void* userData, - N_Vector tempv1, - N_Vector tempv2, - N_Vector tempv3); - - private: - [[nodiscard]] uint64_t getNumOfArrayVariables() const; - - [[nodiscard]] uint64_t getNumOfScalarVariables() const; - - [[nodiscard]] VariableKind getVariableKind(Variable variable) const; - - [[nodiscard]] uint64_t getVariableFlatSize(Variable variable) const; - - [[nodiscard]] uint64_t getNumOfVectorizedEquations() const; - - [[nodiscard]] uint64_t getNumOfScalarEquations() const; - - [[nodiscard]] uint64_t getEquationRank(Equation equation) const; - - [[nodiscard]] uint64_t getEquationFlatSize(Equation equation) const; - - [[nodiscard]] Variable getWrittenVariable(Equation equation) const; - - [[nodiscard]] AccessFunction getWriteAccessFunction(Equation equation) const; - - [[nodiscard]] uint64_t getVariableRank(Variable variable) const; - - std::vector computeJacobianColumns( - Equation eq, const int64_t* equationIndices) const; - - void computeNNZ(); - - void computeResidualThreadChunks(); - - void copyVariablesFromMARCO( - N_Vector algebraicAndStateVariablesVector, - N_Vector derivativeVariablesVector); - - void copyVariablesIntoMARCO( - N_Vector algebraicAndStateVariablesVector, - N_Vector derivativeVariablesVector); - - void residualsParallelIteration( - std::function& equationIndices)> processFn); - - void getVariableBeginIndices( - Variable variable, std::vector& indices) const; - - void getVariableEndIndices( - Variable variable, std::vector& indices) const; - - void getEquationBeginIndices( - Equation equation, std::vector& indices) const; - - void getEquationEndIndices( - Equation equation, std::vector& indices) const; - - private: - /// @name Forwarded methods - /// { - - bool idaInit(); - bool idaSVTolerances(); - bool idaSetLinearSolver(); - bool idaSetUserData(); - bool idaSetMaxNumSteps(); - bool idaSetInitialStepSize(); - bool idaSetMinStepSize(); - bool idaSetMaxStepSize(); - bool idaSetStopTime(); - bool idaSetMaxErrTestFails(); - bool idaSetSuppressAlg(); - bool idaSetId(); - bool idaSetJacobianFunction(); - bool idaSetMaxNonlinIters(); - bool idaSetMaxConvFails(); - bool idaSetNonlinConvCoef(); - bool idaSetNonlinConvCoefIC(); - bool idaSetMaxNumStepsIC(); - bool idaSetMaxNumJacsIC(); - bool idaSetMaxNumItersIC(); - bool idaSetLineSearchOffIC(); +namespace marco::runtime::sundials::ida { +enum class VariableKind { ALGEBRAIC, STATE }; + +/// Signature of residual functions. +/// The 1st argument is the current time. +/// The 2nd argument is a pointer to the list of equation indices. +/// The result is the residual value. +using ResidualFunction = double (*)(double, const int64_t *); + +/// Signature of Jacobian functions. +/// The 1st argument is the current time. +/// The 2nd argument is a pointer to the list of equation indices. +/// The 3rd argument is a pointer to the list of variable indices. +/// The 4th argument is the 'alpha' value. +/// The result is the Jacobian value. +using JacobianFunction = double (*)(double, const int64_t *, const uint64_t *, + double); + +class IDAInstance { +public: + IDAInstance(); + + ~IDAInstance(); + + void setStartTime(double time); + void setEndTime(double time); + void setTimeStep(double time); + + Variable addAlgebraicVariable(uint64_t rank, const uint64_t *dimensions, + VariableGetter getterFunction, + VariableSetter setterFunction, + const char *name); + + Variable addStateVariable(uint64_t rank, const uint64_t *dimensions, + VariableGetter stateGetterFunction, + VariableSetter stateSetterFunction, + VariableGetter derivativeGetterFunction, + VariableSetter derivativeSetterFunction, + const char *name); + + /// Add the information about an equation that is handled by IDA. + Equation addEquation(const int64_t *ranges, uint64_t rank, + Variable writtenVariable, + AccessFunction writeAccessFunction, + const char *stringRepresentation); + + void addVariableAccess(Equation equation, Variable variableIndex, + AccessFunction accessFunction); + + /// Add the function pointer that computes the residual value of an + /// equation. + void setResidualFunction(Equation equationIndex, + ResidualFunction residualFunction); + + /// Add the function pointer that computes a partial derivative of an + /// equation. + void addJacobianFunction(Equation equationIndex, Variable variableIndex, + JacobianFunction jacobianFunction); + + /// Instantiate and initialize all the classes needed by IDA in order to + /// solve the given system of equations. It also sets optional simulation + /// parameters for IDA. It must be called before the first usage of + /// idaStep() and after a call to idaAllocData(). It may fail in case of + /// malformed model. + bool initialize(); + + /// Invoke IDA to perform the computation of the initial values of the + /// variables. Returns true if the computation was successful, false + /// otherwise. + bool calcIC(); + + /// Invoke IDA to perform one step of the computation. If a time step is + /// given, the output will show the variables in an equidistant time grid + /// based on the step time parameter. Otherwise, the output will show the + /// variables at every step of the computation. Returns true if the + /// computation was successful, false otherwise. + bool step(); + + /// Returns the time reached by the solver after the last step. + realtype getCurrentTime() const; + + /// Prints statistics regarding the computation of the system. + void printStatistics() const; + + /// IDAResFn user-defined residual function, passed to IDA through + /// IDAInit. It contains how to compute the Residual Function of the + /// system, starting from the provided UserData struct, iterating through + /// every equation. + static int residualFunction(realtype time, N_Vector variables, + N_Vector derivatives, N_Vector residuals, + void *userData); + + /// IDALsJacFn user-defined Jacobian approximation function, passed to + /// IDA through IDASetJacFn. It contains how to compute the Jacobian + /// Matrix of the system, starting from the provided UserData struct, + /// iterating through every equation and variable. The matrix is + /// represented in CSR format. + static int jacobianMatrix(realtype time, realtype alpha, N_Vector variables, + N_Vector derivatives, N_Vector residuals, + SUNMatrix jacobianMatrix, void *userData, + N_Vector tempv1, N_Vector tempv2, N_Vector tempv3); + +private: + [[nodiscard]] uint64_t getNumOfArrayVariables() const; + + [[nodiscard]] uint64_t getNumOfScalarVariables() const; + + [[nodiscard]] VariableKind getVariableKind(Variable variable) const; + + [[nodiscard]] uint64_t getVariableFlatSize(Variable variable) const; + + [[nodiscard]] uint64_t getNumOfVectorizedEquations() const; + + [[nodiscard]] uint64_t getNumOfScalarEquations() const; + + [[nodiscard]] uint64_t getEquationRank(Equation equation) const; + + [[nodiscard]] uint64_t getEquationFlatSize(Equation equation) const; + + [[nodiscard]] Variable getWrittenVariable(Equation equation) const; + + [[nodiscard]] AccessFunction getWriteAccessFunction(Equation equation) const; + + [[nodiscard]] uint64_t getVariableRank(Variable variable) const; + + std::vector + computeJacobianColumns(Equation eq, const int64_t *equationIndices) const; + + void computeNNZ(); + + void computeResidualThreadChunks(); + + void copyVariablesFromMARCO(N_Vector algebraicAndStateVariablesVector, + N_Vector derivativeVariablesVector); + + void copyVariablesIntoMARCO(N_Vector algebraicAndStateVariablesVector, + N_Vector derivativeVariablesVector); + + void residualsParallelIteration( + std::function &equationIndices)> + processFn); + + void getVariableBeginIndices(Variable variable, + std::vector &indices) const; + + void getVariableEndIndices(Variable variable, + std::vector &indices) const; + + void getEquationBeginIndices(Equation equation, + std::vector &indices) const; + + void getEquationEndIndices(Equation equation, + std::vector &indices) const; + +private: + /// @name Forwarded methods + /// { + + bool idaInit(); + bool idaSVTolerances(); + bool idaSetLinearSolver(); + bool idaSetUserData(); + bool idaSetMaxNumSteps(); + bool idaSetInitialStepSize(); + bool idaSetMinStepSize(); + bool idaSetMaxStepSize(); + bool idaSetStopTime(); + bool idaSetMaxErrTestFails(); + bool idaSetSuppressAlg(); + bool idaSetId(); + bool idaSetJacobianFunction(); + bool idaSetMaxNonlinIters(); + bool idaSetMaxConvFails(); + bool idaSetNonlinConvCoef(); + bool idaSetNonlinConvCoefIC(); + bool idaSetMaxNumStepsIC(); + bool idaSetMaxNumJacsIC(); + bool idaSetMaxNumItersIC(); + bool idaSetLineSearchOffIC(); - /// } - /// @name Utility functions - /// { + /// } + /// @name Utility functions + /// { - /// Get the scalar equation writing to a certain scalar variable. - /// Warning: extremely slow, to be used only for debug purposes. - void getWritingEquation( - Variable variable, - const std::vector& variableIndices, - Equation& equation, - std::vector& equationIndices) const; + /// Get the scalar equation writing to a certain scalar variable. + /// Warning: extremely slow, to be used only for debug purposes. + void getWritingEquation(Variable variable, + const std::vector &variableIndices, + Equation &equation, + std::vector &equationIndices) const; - /// } - /// @name Debug functions - /// { - void printVariablesVector(N_Vector variables) const; + /// } + /// @name Debug functions + /// { + void printVariablesVector(N_Vector variables) const; - void printDerivativesVector(N_Vector derivatives) const; + void printDerivativesVector(N_Vector derivatives) const; - void printResidualsVector(N_Vector residuals) const; + void printResidualsVector(N_Vector residuals) const; - void printJacobianMatrix(SUNMatrix jacobianMatrix) const; + void printJacobianMatrix(SUNMatrix jacobianMatrix) const; - /// } + /// } - private: +private: #if SUNDIALS_VERSION_MAJOR >= 6 - // SUNDIALS context. - SUNContext ctx{nullptr}; + // SUNDIALS context. + SUNContext ctx{nullptr}; #endif - // Whether the instance has been inizialized or not. - bool initialized{false}; - - // Model size. - uint64_t scalarVariablesNumber{0}; - uint64_t scalarEquationsNumber{0}; - uint64_t nonZeroValuesNumber{0}; - - // The iteration ranges of the vectorized equations. - std::vector equationRanges; - - // The array variables written by the equations. - // The i-th position contains the information about the variable written - // by the i-th equation: the first element is the index of the IDA - // variable, while the second represents the ranges of the scalar - // variable. - std::vector> writeAccesses; - - // The order in which the equations must be processed when computing - // residuals and partial derivatives. - std::vector equationsProcessingOrder; - - // The residual functions associated with the equations. - // The i-th position contains the pointer to the residual function of the - // i-th equation. - std::vector residualFunctions; - - // The jacobian functions associated with the equations. - // The i-th position contains the list of partial derivative functions of - // the i-th equation. The j-th function represents the function to - // compute the derivative with respect to the j-th variable. - std::vector> jacobianFunctions; - - // Whether the IDA instance is informed about the accesses to the - // variables. - bool precomputedAccesses{false}; - - std::vector variableAccesses; - - // The offset of each array variable inside the flattened variables - // vector. - std::vector variableOffsets; - - // The dimensions list of each array variable. - std::vector variablesDimensions; - - // Simulation times. - realtype startTime; - realtype endTime; - realtype timeStep; - realtype currentTime = 0; - - // Variables vectors and values. - N_Vector variablesVector; - N_Vector derivativesVector; - - // The vector stores whether each scalar variable is an algebraic or a - // state one. - // 0 = algebraic - // 1 = state - N_Vector idVector; - - // The tolerance for each scalar variable. - N_Vector tolerancesVector; - - // IDA classes. - void* idaMemory; - - SUNMatrix sparseMatrix; - - // Support structure for the computation of the jacobian matrix. - // The outer vector has a number of elements equal to the scalar number - // of equations. Each of them represents a row of the matrix and consists - // in a vector of paired elements. The first element of each pair - // represents the index of the column (that is, the independent scalar - // variable for the partial derivative) while the second one is the - // value of the partial derivative. - std::vector>> jacobianMatrixData; - - SUNLinearSolver linearSolver; - - std::vector algebraicAndStateVariablesGetters; - std::vector algebraicAndStateVariablesSetters; - - std::vector derivativeVariablesGetters; - std::vector derivativeVariablesSetters; - - // Mapping from the IDA variable position to state variables position. - std::map stateVariablesMapping; - - // Thread pool. - ThreadPool threadPool; - - // A chunk of equations to be processed by a thread while computing the - // residual values. - // A chunk is composed of: - // - the identifier (position) of the equation. - // - the begin indices (included) - // - the end indices (excluded) - using ResidualThreadEquationsChunk = std::tuple< - Equation, std::vector, std::vector>; - - // The list of chunks the threads will process. Each thread elaborates - // one chunk at a time. - // The information is computed only once during the initialization to - // save time during the actual simulation. - std::vector residualThreadEquationsChunks; - }; -} + // Whether the instance has been inizialized or not. + bool initialized{false}; + + // Model size. + uint64_t scalarVariablesNumber{0}; + uint64_t scalarEquationsNumber{0}; + uint64_t nonZeroValuesNumber{0}; + + // The iteration ranges of the vectorized equations. + std::vector equationRanges; + + // The array variables written by the equations. + // The i-th position contains the information about the variable written + // by the i-th equation: the first element is the index of the IDA + // variable, while the second represents the ranges of the scalar + // variable. + std::vector> writeAccesses; + + // The order in which the equations must be processed when computing + // residuals and partial derivatives. + std::vector equationsProcessingOrder; + + // The residual functions associated with the equations. + // The i-th position contains the pointer to the residual function of the + // i-th equation. + std::vector residualFunctions; + + // The jacobian functions associated with the equations. + // The i-th position contains the list of partial derivative functions of + // the i-th equation. The j-th function represents the function to + // compute the derivative with respect to the j-th variable. + std::vector> jacobianFunctions; + + // Whether the IDA instance is informed about the accesses to the + // variables. + bool precomputedAccesses{false}; + + std::vector variableAccesses; + + // The offset of each array variable inside the flattened variables + // vector. + std::vector variableOffsets; + + // The dimensions list of each array variable. + std::vector variablesDimensions; + + // Simulation times. + realtype startTime; + realtype endTime; + realtype timeStep; + realtype currentTime = 0; + + // Variables vectors and values. + N_Vector variablesVector; + N_Vector derivativesVector; + + // The vector stores whether each scalar variable is an algebraic or a + // state one. + // 0 = algebraic + // 1 = state + N_Vector idVector; + + // The tolerance for each scalar variable. + N_Vector tolerancesVector; + + // IDA classes. + void *idaMemory; + + SUNMatrix sparseMatrix; + + // Support structure for the computation of the jacobian matrix. + // The outer vector has a number of elements equal to the scalar number + // of equations. Each of them represents a row of the matrix and consists + // in a vector of paired elements. The first element of each pair + // represents the index of the column (that is, the independent scalar + // variable for the partial derivative) while the second one is the + // value of the partial derivative. + std::vector>> jacobianMatrixData; + + SUNLinearSolver linearSolver; + + std::vector algebraicAndStateVariablesGetters; + std::vector algebraicAndStateVariablesSetters; + + std::vector derivativeVariablesGetters; + std::vector derivativeVariablesSetters; + + // Mapping from the IDA variable position to state variables position. + std::map stateVariablesMapping; + + // Thread pool. + ThreadPool threadPool; + + // A chunk of equations to be processed by a thread while computing the + // residual values. + // A chunk is composed of: + // - the identifier (position) of the equation. + // - the begin indices (included) + // - the end indices (excluded) + using ResidualThreadEquationsChunk = + std::tuple, std::vector>; + + // The list of chunks the threads will process. Each thread elaborates + // one chunk at a time. + // The information is computed only once during the initialization to + // save time during the actual simulation. + std::vector residualThreadEquationsChunks; +}; +} // namespace marco::runtime::sundials::ida //===---------------------------------------------------------------------===// // Exported functions @@ -383,17 +355,23 @@ RUNTIME_FUNC_DECL(idaSetTimeStep, void, PTR(void), double) RUNTIME_FUNC_DECL(idaGetCurrentTime, double, PTR(void)) -RUNTIME_FUNC_DECL(idaAddAlgebraicVariable, uint64_t, PTR(void), uint64_t, PTR(uint64_t), PTR(void), PTR(void), PTR(void)) +RUNTIME_FUNC_DECL(idaAddAlgebraicVariable, uint64_t, PTR(void), uint64_t, + PTR(uint64_t), PTR(void), PTR(void), PTR(void)) -RUNTIME_FUNC_DECL(idaAddStateVariable, uint64_t, PTR(void), uint64_t, PTR(uint64_t), PTR(void), PTR(void), PTR(void), PTR(void), PTR(void)) +RUNTIME_FUNC_DECL(idaAddStateVariable, uint64_t, PTR(void), uint64_t, + PTR(uint64_t), PTR(void), PTR(void), PTR(void), PTR(void), + PTR(void)) -RUNTIME_FUNC_DECL(idaAddVariableAccess, void, PTR(void), uint64_t, uint64_t, PTR(void)) +RUNTIME_FUNC_DECL(idaAddVariableAccess, void, PTR(void), uint64_t, uint64_t, + PTR(void)) -RUNTIME_FUNC_DECL(idaAddEquation, uint64_t, PTR(void), PTR(int64_t), uint64_t, uint64_t, PTR(void), PTR(void)) +RUNTIME_FUNC_DECL(idaAddEquation, uint64_t, PTR(void), PTR(int64_t), uint64_t, + uint64_t, PTR(void), PTR(void)) RUNTIME_FUNC_DECL(idaSetResidual, void, PTR(void), uint64_t, PTR(void)) -RUNTIME_FUNC_DECL(idaAddJacobian, void, PTR(void), uint64_t, uint64_t, PTR(void)) +RUNTIME_FUNC_DECL(idaAddJacobian, void, PTR(void), uint64_t, uint64_t, + PTR(void)) RUNTIME_FUNC_DECL(printStatistics, void, PTR(void)) diff --git a/include/marco/Runtime/Solvers/KINSOL/Instance.h b/include/marco/Runtime/Solvers/KINSOL/Instance.h index 684911993..43263f4f4 100644 --- a/include/marco/Runtime/Solvers/KINSOL/Instance.h +++ b/include/marco/Runtime/Solvers/KINSOL/Instance.h @@ -3,8 +3,8 @@ #ifdef SUNDIALS_ENABLE -#include "marco/Runtime/Solvers/SUNDIALS/Instance.h" #include "kinsol/kinsol.h" +#include "marco/Runtime/Solvers/SUNDIALS/Instance.h" #include "nvector/nvector_serial.h" #include "sundials/sundials_config.h" #include "sundials/sundials_types.h" @@ -13,268 +13,249 @@ #include #include -namespace marco::runtime::sundials::kinsol -{ - /// Signature of residual functions. - /// The 1st argument is a pointer to the list of equation indices. - /// The result is the residual value. - using ResidualFunction = double(*)(const int64_t*); +namespace marco::runtime::sundials::kinsol { +/// Signature of residual functions. +/// The 1st argument is a pointer to the list of equation indices. +/// The result is the residual value. +using ResidualFunction = double (*)(const int64_t *); - /// Signature of Jacobian functions. - /// The 1st argument is a pointer to the list of equation indices. - /// The 2nd argument is a pointer to the list of variable indices. - /// The result is the Jacobian value. - using JacobianFunction = double(*)(const int64_t*, const uint64_t*); +/// Signature of Jacobian functions. +/// The 1st argument is a pointer to the list of equation indices. +/// The 2nd argument is a pointer to the list of variable indices. +/// The result is the Jacobian value. +using JacobianFunction = double (*)(const int64_t *, const uint64_t *); - class KINSOLInstance - { - public: - KINSOLInstance(); +class KINSOLInstance { +public: + KINSOLInstance(); - ~KINSOLInstance(); + ~KINSOLInstance(); - Variable addVariable( - uint64_t rank, - const uint64_t* dimensions, - VariableGetter getterFunction, - VariableSetter setterFunction, - const char* name); + Variable addVariable(uint64_t rank, const uint64_t *dimensions, + VariableGetter getterFunction, + VariableSetter setterFunction, const char *name); - /// Add the information about an equation that is handled by KINSOL. - Equation addEquation( - const int64_t* ranges, - uint64_t rank, - Variable writtenVariable, - AccessFunction writeAccessFunction, - const char* stringRepresentation); + /// Add the information about an equation that is handled by KINSOL. + Equation addEquation(const int64_t *ranges, uint64_t rank, + Variable writtenVariable, + AccessFunction writeAccessFunction, + const char *stringRepresentation); - void addVariableAccess( - Equation equation, - Variable variableIndex, - AccessFunction accessFunction); + void addVariableAccess(Equation equation, Variable variableIndex, + AccessFunction accessFunction); - /// Add the function pointer that computes the residual value of an - /// equation. - void setResidualFunction( - Equation equationIndex, - ResidualFunction residualFunction); + /// Add the function pointer that computes the residual value of an + /// equation. + void setResidualFunction(Equation equationIndex, + ResidualFunction residualFunction); - /// Add the function pointer that computes a partial derivative of an - /// equation. - void addJacobianFunction( - Equation equationIndex, - Variable variableIndex, - JacobianFunction jacobianFunction); + /// Add the function pointer that computes a partial derivative of an + /// equation. + void addJacobianFunction(Equation equationIndex, Variable variableIndex, + JacobianFunction jacobianFunction); - /// Instantiate and initialize all the classes needed by KINSOL in order to - /// solve the given system of equations. It also sets optional simulation - /// parameters for KINSOL. - bool initialize(); + /// Instantiate and initialize all the classes needed by KINSOL in order to + /// solve the given system of equations. It also sets optional simulation + /// parameters for KINSOL. + bool initialize(); - bool solve(); + bool solve(); - /// KINSOLResFn user-defined residual function, passed to KINSOL through KINSOLInit. - /// It contains how to compute the Residual Function of the system, starting - /// from the provided UserData struct, iterating through every equation. - static int residualFunction( - N_Vector variables, N_Vector residuals, void* userData); + /// KINSOLResFn user-defined residual function, passed to KINSOL through + /// KINSOLInit. It contains how to compute the Residual Function of the + /// system, starting from the provided UserData struct, iterating through + /// every equation. + static int residualFunction(N_Vector variables, N_Vector residuals, + void *userData); - static int residualFunction( - realtype time, - N_Vector variables, - N_Vector residuals, - void* userData); + static int residualFunction(realtype time, N_Vector variables, + N_Vector residuals, void *userData); - static int jacobianMatrix( - N_Vector variables, - N_Vector residuals, - SUNMatrix jacobianMatrix, - void* userData, - N_Vector tempv1, - N_Vector tempv2); + static int jacobianMatrix(N_Vector variables, N_Vector residuals, + SUNMatrix jacobianMatrix, void *userData, + N_Vector tempv1, N_Vector tempv2); - private: - [[nodiscard]] uint64_t getNumOfArrayVariables() const; +private: + [[nodiscard]] uint64_t getNumOfArrayVariables() const; - [[nodiscard]] uint64_t getNumOfScalarVariables() const; + [[nodiscard]] uint64_t getNumOfScalarVariables() const; - [[nodiscard]] uint64_t getVariableFlatSize(Variable variable) const; + [[nodiscard]] uint64_t getVariableFlatSize(Variable variable) const; - [[nodiscard]] uint64_t getNumOfVectorizedEquations() const; + [[nodiscard]] uint64_t getNumOfVectorizedEquations() const; - [[nodiscard]] uint64_t getNumOfScalarEquations() const; + [[nodiscard]] uint64_t getNumOfScalarEquations() const; - [[nodiscard]] uint64_t getEquationRank(Equation equation) const; + [[nodiscard]] uint64_t getEquationRank(Equation equation) const; - [[nodiscard]] uint64_t getEquationFlatSize(Equation equation) const; + [[nodiscard]] uint64_t getEquationFlatSize(Equation equation) const; - [[nodiscard]] Variable getWrittenVariable(Equation equation) const; + [[nodiscard]] Variable getWrittenVariable(Equation equation) const; - [[nodiscard]] AccessFunction getWriteAccessFunction(Equation equation) const; + [[nodiscard]] AccessFunction getWriteAccessFunction(Equation equation) const; - [[nodiscard]] uint64_t getVariableRank(Variable variable) const; + [[nodiscard]] uint64_t getVariableRank(Variable variable) const; - std::vector computeJacobianColumns( - Equation eq, const int64_t* equationIndices) const; + std::vector + computeJacobianColumns(Equation eq, const int64_t *equationIndices) const; - void computeNNZ(); + void computeNNZ(); - void computeResidualThreadChunks(); + void computeResidualThreadChunks(); - void copyVariablesFromMARCO(N_Vector variables); + void copyVariablesFromMARCO(N_Vector variables); - void copyVariablesIntoMARCO(N_Vector variables); + void copyVariablesIntoMARCO(N_Vector variables); - void residualsParallelIteration( - std::function& equationIndices)> processFn); + void residualsParallelIteration( + std::function &equationIndices)> + processFn); - void getVariableBeginIndices( - Variable variable, std::vector& indices) const; + void getVariableBeginIndices(Variable variable, + std::vector &indices) const; - void getVariableEndIndices( - Variable variable, std::vector& indices) const; + void getVariableEndIndices(Variable variable, + std::vector &indices) const; - void getEquationBeginIndices( - Equation equation, std::vector& indices) const; + void getEquationBeginIndices(Equation equation, + std::vector &indices) const; - void getEquationEndIndices( - Equation equation, std::vector& indices) const; + void getEquationEndIndices(Equation equation, + std::vector &indices) const; - private: - /// @name Forwarded methods - /// { +private: + /// @name Forwarded methods + /// { - bool kinsolInit(); - bool kinsolFNTolerance(); - bool kinsolSSTolerance(); - bool kinsolSetLinearSolver(); - bool kinsolSetUserData(); - bool kinsolSetJacobianFunction(); + bool kinsolInit(); + bool kinsolFNTolerance(); + bool kinsolSSTolerance(); + bool kinsolSetLinearSolver(); + bool kinsolSetUserData(); + bool kinsolSetJacobianFunction(); - /// } - /// @name Utility functions - /// { + /// } + /// @name Utility functions + /// { - /// Get the scalar equation writing to a certain scalar variable. - /// Warning: extremely slow, to be used only for debug purposes. - void getWritingEquation( - Variable variable, - const std::vector& variableIndices, - Equation& equation, - std::vector& equationIndices) const; + /// Get the scalar equation writing to a certain scalar variable. + /// Warning: extremely slow, to be used only for debug purposes. + void getWritingEquation(Variable variable, + const std::vector &variableIndices, + Equation &equation, + std::vector &equationIndices) const; - /// } - /// @name Debug functions - /// { - void printVariablesVector(N_Vector variables) const; + /// } + /// @name Debug functions + /// { + void printVariablesVector(N_Vector variables) const; - void printResidualsVector(N_Vector residuals) const; + void printResidualsVector(N_Vector residuals) const; - void printJacobianMatrix(SUNMatrix jacobianMatrix) const; + void printJacobianMatrix(SUNMatrix jacobianMatrix) const; - /// } + /// } - private: +private: #if SUNDIALS_VERSION_MAJOR >= 6 - // SUNDIALS context. - SUNContext ctx{nullptr}; + // SUNDIALS context. + SUNContext ctx{nullptr}; #endif - // Whether the instance has been inizialized or not. - bool initialized{false}; + // Whether the instance has been inizialized or not. + bool initialized{false}; - // Model size. - uint64_t scalarVariablesNumber{0}; - uint64_t scalarEquationsNumber{0}; - uint64_t nonZeroValuesNumber{0}; + // Model size. + uint64_t scalarVariablesNumber{0}; + uint64_t scalarEquationsNumber{0}; + uint64_t nonZeroValuesNumber{0}; - // The iteration ranges of the vectorized equations. - std::vector equationRanges; + // The iteration ranges of the vectorized equations. + std::vector equationRanges; - // The array variables written by the equations. - // The i-th position contains the information about the variable written - // by the i-th equation: the first element is the index of the IDA - // variable, while the second represents the ranges of the scalar - // variable. - std::vector> writeAccesses; + // The array variables written by the equations. + // The i-th position contains the information about the variable written + // by the i-th equation: the first element is the index of the IDA + // variable, while the second represents the ranges of the scalar + // variable. + std::vector> writeAccesses; - // The order in which the equations must be processed when computing - // residuals and partial derivatives. - std::vector equationsProcessingOrder; + // The order in which the equations must be processed when computing + // residuals and partial derivatives. + std::vector equationsProcessingOrder; - // The residual functions associated with the equations. - // The i-th position contains the pointer to the residual function of the - // i-th equation. - std::vector residualFunctions; + // The residual functions associated with the equations. + // The i-th position contains the pointer to the residual function of the + // i-th equation. + std::vector residualFunctions; - // The jacobian functions associated with the equations. - // The i-th position contains the list of partial derivative functions of - // the i-th equation. The j-th function represents the function to - // compute the derivative with respect to the j-th variable. - std::vector> jacobianFunctions; + // The jacobian functions associated with the equations. + // The i-th position contains the list of partial derivative functions of + // the i-th equation. The j-th function represents the function to + // compute the derivative with respect to the j-th variable. + std::vector> jacobianFunctions; - // Whether the IDA instance is informed about the accesses to the - // variables. - bool precomputedAccesses{false}; + // Whether the IDA instance is informed about the accesses to the + // variables. + bool precomputedAccesses{false}; - std::vector variableAccesses; + std::vector variableAccesses; - // The offset of each array variable inside the flattened variables - // vector. - std::vector variableOffsets; + // The offset of each array variable inside the flattened variables + // vector. + std::vector variableOffsets; - // The dimensions list of each array variable. - std::vector variablesDimensions; + // The dimensions list of each array variable. + std::vector variablesDimensions; - // Variables vectors and values. - N_Vector variablesVector; + // Variables vectors and values. + N_Vector variablesVector; - // The tolerance for each scalar variable. - N_Vector tolerancesVector; + // The tolerance for each scalar variable. + N_Vector tolerancesVector; - N_Vector variableScaleVector; - N_Vector residualScaleVector; + N_Vector variableScaleVector; + N_Vector residualScaleVector; - // KINSOL classes. - void* kinsolMemory; + // KINSOL classes. + void *kinsolMemory; - SUNMatrix sparseMatrix; + SUNMatrix sparseMatrix; - // Support structure for the computation of the jacobian matrix. - // The outer vector has a number of elements equal to the scalar number - // of equations. Each of them represents a row of the matrix and consists - // in a vector of paired elements. The first element of each pair - // represents the index of the column (that is, the independent scalar - // variable for the partial derivative) while the second one is the - // value of the partial derivative. - std::vector>> jacobianMatrixData; + // Support structure for the computation of the jacobian matrix. + // The outer vector has a number of elements equal to the scalar number + // of equations. Each of them represents a row of the matrix and consists + // in a vector of paired elements. The first element of each pair + // represents the index of the column (that is, the independent scalar + // variable for the partial derivative) while the second one is the + // value of the partial derivative. + std::vector>> jacobianMatrixData; - SUNLinearSolver linearSolver; + SUNLinearSolver linearSolver; - std::vector variableGetters; - std::vector variableSetters; + std::vector variableGetters; + std::vector variableSetters; - // Thread pool. - ThreadPool threadPool; + // Thread pool. + ThreadPool threadPool; - // A chunk of equations to be processed by a thread while computing the - // residual values. - // A chunk is composed of: - // - the identifier (position) of the equation. - // - the begin indices (included) - // - the end indices (exluded) - using ResidualThreadEquationsChunk = std::tuple< - Equation, std::vector, std::vector>; + // A chunk of equations to be processed by a thread while computing the + // residual values. + // A chunk is composed of: + // - the identifier (position) of the equation. + // - the begin indices (included) + // - the end indices (exluded) + using ResidualThreadEquationsChunk = + std::tuple, std::vector>; - // The list of chunks the threads will process. Each thread elaborates - // one chunk at a time. - // The information is computed only once during the initialization to - // save time during the actual simulation. - std::vector residualThreadEquationsChunks; - }; -} + // The list of chunks the threads will process. Each thread elaborates + // one chunk at a time. + // The information is computed only once during the initialization to + // save time during the actual simulation. + std::vector residualThreadEquationsChunks; +}; +} // namespace marco::runtime::sundials::kinsol //===---------------------------------------------------------------------===// // Exported functions @@ -286,15 +267,19 @@ RUNTIME_FUNC_DECL(kinsolSolve, void, PTR(void)) RUNTIME_FUNC_DECL(kinsolFree, void, PTR(void)) -RUNTIME_FUNC_DECL(kinsolAddVariable, uint64_t, PTR(void), uint64_t, PTR(uint64_t), PTR(void), PTR(void), PTR(void)) +RUNTIME_FUNC_DECL(kinsolAddVariable, uint64_t, PTR(void), uint64_t, + PTR(uint64_t), PTR(void), PTR(void), PTR(void)) -RUNTIME_FUNC_DECL(kinsolAddVariableAccess, void, PTR(void), uint64_t, uint64_t, PTR(void)) +RUNTIME_FUNC_DECL(kinsolAddVariableAccess, void, PTR(void), uint64_t, uint64_t, + PTR(void)) -RUNTIME_FUNC_DECL(kinsolAddEquation, uint64_t, PTR(void), PTR(int64_t), uint64_t, uint64_t, PTR(void), PTR(void)) +RUNTIME_FUNC_DECL(kinsolAddEquation, uint64_t, PTR(void), PTR(int64_t), + uint64_t, uint64_t, PTR(void), PTR(void)) RUNTIME_FUNC_DECL(kinsolSetResidual, void, PTR(void), uint64_t, PTR(void)) -RUNTIME_FUNC_DECL(kinsolAddJacobian, void, PTR(void), uint64_t, uint64_t, PTR(void)) +RUNTIME_FUNC_DECL(kinsolAddJacobian, void, PTR(void), uint64_t, uint64_t, + PTR(void)) #endif // SUNDIALS_ENABLE diff --git a/lib/Solvers/KINSOL/Instance.cpp b/lib/Solvers/KINSOL/Instance.cpp index 9ff592643..5233eb17c 100644 --- a/lib/Solvers/KINSOL/Instance.cpp +++ b/lib/Solvers/KINSOL/Instance.cpp @@ -1,10 +1,10 @@ #ifdef SUNDIALS_ENABLE #include "marco/Runtime/Solvers/KINSOL/Instance.h" +#include "kinsol/kinsol.h" +#include "marco/Runtime/Simulation/Options.h" #include "marco/Runtime/Solvers/KINSOL/Options.h" #include "marco/Runtime/Solvers/KINSOL/Profiler.h" -#include "marco/Runtime/Simulation/Options.h" -#include "kinsol/kinsol.h" #include #include #include @@ -20,1412 +20,1357 @@ using namespace marco::runtime::sundials::kinsol; // Solver //===---------------------------------------------------------------------===// -namespace marco::runtime::sundials::kinsol -{ - KINSOLInstance::KINSOLInstance() - { - // Initially there is no variable in the instance. - variableOffsets.push_back(0); +namespace marco::runtime::sundials::kinsol { +KINSOLInstance::KINSOLInstance() { + // Initially there is no variable in the instance. + variableOffsets.push_back(0); - if (marco::runtime::simulation::getOptions().debug) { - std::cerr << "[KINSOL] Instance created" << std::endl; - } + if (marco::runtime::simulation::getOptions().debug) { + std::cerr << "[KINSOL] Instance created" << std::endl; } +} - KINSOLInstance::~KINSOLInstance() - { - if (getNumOfScalarEquations() != 0) { - N_VDestroy(variablesVector); - N_VDestroy(tolerancesVector); +KINSOLInstance::~KINSOLInstance() { + if (getNumOfScalarEquations() != 0) { + N_VDestroy(variablesVector); + N_VDestroy(tolerancesVector); - KINFree(&kinsolMemory); - SUNLinSolFree(linearSolver); - SUNMatDestroy(sparseMatrix); - } + KINFree(&kinsolMemory); + SUNLinSolFree(linearSolver); + SUNMatDestroy(sparseMatrix); + } - if (marco::runtime::simulation::getOptions().debug) { - std::cerr << "[KINSOL] Instance destroyed" << std::endl; - } + if (marco::runtime::simulation::getOptions().debug) { + std::cerr << "[KINSOL] Instance destroyed" << std::endl; } +} - Variable KINSOLInstance::addVariable( - uint64_t rank, - const uint64_t* dimensions, - VariableGetter getterFunction, - VariableSetter setterFunction, - const char* name) - { - if (marco::runtime::simulation::getOptions().debug) { - std::cerr << "[KINSOL] Adding algebraic variable"; +Variable KINSOLInstance::addVariable(uint64_t rank, const uint64_t *dimensions, + VariableGetter getterFunction, + VariableSetter setterFunction, + const char *name) { + if (marco::runtime::simulation::getOptions().debug) { + std::cerr << "[KINSOL] Adding algebraic variable"; - if (name != nullptr) { - std::cerr << " \"" << name << "\""; - } - - std::cerr << std::endl; + if (name != nullptr) { + std::cerr << " \"" << name << "\""; } - // Add variable offset and dimensions. - assert(variableOffsets.size() == variablesDimensions.size() + 1); + std::cerr << std::endl; + } - VariableDimensions varDimension(rank); - uint64_t flatSize = 1; + // Add variable offset and dimensions. + assert(variableOffsets.size() == variablesDimensions.size() + 1); - for (uint64_t i = 0; i < rank; ++i) { - flatSize *= dimensions[i]; - varDimension[i] = dimensions[i]; - } + VariableDimensions varDimension(rank); + uint64_t flatSize = 1; - variablesDimensions.push_back(std::move(varDimension)); + for (uint64_t i = 0; i < rank; ++i) { + flatSize *= dimensions[i]; + varDimension[i] = dimensions[i]; + } - size_t offset = variableOffsets.back(); - variableOffsets.push_back(offset + flatSize); + variablesDimensions.push_back(std::move(varDimension)); - // Store the getter and setter functions. - variableGetters.push_back(getterFunction); - variableSetters.push_back(setterFunction); + size_t offset = variableOffsets.back(); + variableOffsets.push_back(offset + flatSize); - // Return the index of the variable. - Variable id = getNumOfArrayVariables() - 1; + // Store the getter and setter functions. + variableGetters.push_back(getterFunction); + variableSetters.push_back(setterFunction); - if (marco::runtime::simulation::getOptions().debug) { - std::cerr << " - ID: " << id << std::endl; - std::cerr << " - Rank: " << rank << std::endl; - std::cerr << " - Dimensions: ["; + // Return the index of the variable. + Variable id = getNumOfArrayVariables() - 1; - for (uint64_t i = 0; i < rank; ++i) { - if (i != 0) { - std::cerr << ","; - } + if (marco::runtime::simulation::getOptions().debug) { + std::cerr << " - ID: " << id << std::endl; + std::cerr << " - Rank: " << rank << std::endl; + std::cerr << " - Dimensions: ["; - std::cerr << dimensions[i]; + for (uint64_t i = 0; i < rank; ++i) { + if (i != 0) { + std::cerr << ","; } - std::cerr << "]" << std::endl; - std::cerr << " - Getter function address: " - << reinterpret_cast(getterFunction) << std::endl; - std::cerr << " - Setter function address: " - << reinterpret_cast(setterFunction) << std::endl; + std::cerr << dimensions[i]; } - return id; + std::cerr << "]" << std::endl; + std::cerr << " - Getter function address: " + << reinterpret_cast(getterFunction) << std::endl; + std::cerr << " - Setter function address: " + << reinterpret_cast(setterFunction) << std::endl; } - Equation KINSOLInstance::addEquation( - const int64_t* ranges, - uint64_t equationRank, - Variable writtenVariable, - AccessFunction writeAccess, - const char* stringRepresentation) - { - if (marco::runtime::simulation::getOptions().debug) { - std::cerr << "[KINSOL] Adding equation"; + return id; +} - if (stringRepresentation != nullptr) { - std::cerr << " \"" << stringRepresentation << "\""; - } +Equation KINSOLInstance::addEquation(const int64_t *ranges, + uint64_t equationRank, + Variable writtenVariable, + AccessFunction writeAccess, + const char *stringRepresentation) { + if (marco::runtime::simulation::getOptions().debug) { + std::cerr << "[KINSOL] Adding equation"; - std::cerr << std::endl; + if (stringRepresentation != nullptr) { + std::cerr << " \"" << stringRepresentation << "\""; } - // Add the start and end dimensions of the current equation. - MultidimensionalRange eqRanges = {}; + std::cerr << std::endl; + } - for (size_t i = 0, e = equationRank * 2; i < e; i += 2) { - int64_t begin = ranges[i]; - int64_t end = ranges[i + 1]; - eqRanges.push_back({ begin, end }); - } + // Add the start and end dimensions of the current equation. + MultidimensionalRange eqRanges = {}; - equationRanges.push_back(eqRanges); + for (size_t i = 0, e = equationRank * 2; i < e; i += 2) { + int64_t begin = ranges[i]; + int64_t end = ranges[i + 1]; + eqRanges.push_back({begin, end}); + } - // Add the write access. - writeAccesses.emplace_back(writtenVariable, writeAccess); + equationRanges.push_back(eqRanges); - // Return the index of the equation. - Equation id = getNumOfVectorizedEquations() - 1; + // Add the write access. + writeAccesses.emplace_back(writtenVariable, writeAccess); - if (marco::runtime::simulation::getOptions().debug) { - std::cerr << " - ID: " << id << std::endl; - std::cerr << " - Rank: " << equationRank << std::endl; - std::cerr << " - Ranges: ["; + // Return the index of the equation. + Equation id = getNumOfVectorizedEquations() - 1; - for (uint64_t i = 0; i < equationRank; ++i) { - if (i != 0) { - std::cerr << ","; - } + if (marco::runtime::simulation::getOptions().debug) { + std::cerr << " - ID: " << id << std::endl; + std::cerr << " - Rank: " << equationRank << std::endl; + std::cerr << " - Ranges: ["; - std::cerr << "[" << ranges[i * 2] << "," << (ranges[i * 2 + 1] - 1) << "]"; + for (uint64_t i = 0; i < equationRank; ++i) { + if (i != 0) { + std::cerr << ","; } - std::cerr << "]" << std::endl; - std::cerr << " - Written variable ID: " << writtenVariable << std::endl; - std::cerr << " - Write access function address: " - << reinterpret_cast(writeAccess) << std::endl; + std::cerr << "[" << ranges[i * 2] << "," << (ranges[i * 2 + 1] - 1) + << "]"; } - return id; + std::cerr << "]" << std::endl; + std::cerr << " - Written variable ID: " << writtenVariable << std::endl; + std::cerr << " - Write access function address: " + << reinterpret_cast(writeAccess) << std::endl; } - void KINSOLInstance::addVariableAccess( - Equation equation, - Variable variable, - AccessFunction accessFunction) - { - if (marco::runtime::simulation::getOptions().debug) { - std::cerr << "[KINSOL] Adding access information" << std::endl; - std::cerr << " - Equation: " << equation << std::endl; - std::cerr << " - Variable: " << variable << std::endl; - std::cerr << " - Access function address: " - << reinterpret_cast(accessFunction) << std::endl; - } + return id; +} - assert(equation < getNumOfVectorizedEquations()); - assert(variable < getNumOfArrayVariables()); +void KINSOLInstance::addVariableAccess(Equation equation, Variable variable, + AccessFunction accessFunction) { + if (marco::runtime::simulation::getOptions().debug) { + std::cerr << "[KINSOL] Adding access information" << std::endl; + std::cerr << " - Equation: " << equation << std::endl; + std::cerr << " - Variable: " << variable << std::endl; + std::cerr << " - Access function address: " + << reinterpret_cast(accessFunction) << std::endl; + } - precomputedAccesses = true; + assert(equation < getNumOfVectorizedEquations()); + assert(variable < getNumOfArrayVariables()); - if (variableAccesses.size() <= (size_t) equation) { - variableAccesses.resize(equation + 1); - } + precomputedAccesses = true; - auto& varAccessList = variableAccesses[equation]; - varAccessList.emplace_back(variable, accessFunction); + if (variableAccesses.size() <= (size_t)equation) { + variableAccesses.resize(equation + 1); } - void KINSOLInstance::setResidualFunction( - Equation equation, - ResidualFunction residualFunction) - { - if (marco::runtime::simulation::getOptions().debug) { - std::cerr << "[KINSOL] Setting residual function for equation " << equation - << ". Address: " << reinterpret_cast(residualFunction) - << std::endl; - } + auto &varAccessList = variableAccesses[equation]; + varAccessList.emplace_back(variable, accessFunction); +} - if (residualFunctions.size() <= equation) { - residualFunctions.resize(equation + 1, nullptr); - } +void KINSOLInstance::setResidualFunction(Equation equation, + ResidualFunction residualFunction) { + if (marco::runtime::simulation::getOptions().debug) { + std::cerr << "[KINSOL] Setting residual function for equation " << equation + << ". Address: " << reinterpret_cast(residualFunction) + << std::endl; + } - residualFunctions[equation] = residualFunction; + if (residualFunctions.size() <= equation) { + residualFunctions.resize(equation + 1, nullptr); } - void KINSOLInstance::addJacobianFunction( - Equation equation, - Variable variable, - JacobianFunction jacobianFunction) - { - if (marco::runtime::simulation::getOptions().debug) { - std::cerr << "[KINSOL] Setting jacobian function for equation " << equation - << " and variable " << variable << ". Address: " - << reinterpret_cast(jacobianFunction) << std::endl; - } + residualFunctions[equation] = residualFunction; +} - if (jacobianFunctions.size() <= equation) { - jacobianFunctions.resize(equation + 1, {}); - } +void KINSOLInstance::addJacobianFunction(Equation equation, Variable variable, + JacobianFunction jacobianFunction) { + if (marco::runtime::simulation::getOptions().debug) { + std::cerr << "[KINSOL] Setting jacobian function for equation " << equation + << " and variable " << variable + << ". Address: " << reinterpret_cast(jacobianFunction) + << std::endl; + } - if (jacobianFunctions[equation].size() <= variable) { - jacobianFunctions[equation].resize(variable + 1, nullptr); - } + if (jacobianFunctions.size() <= equation) { + jacobianFunctions.resize(equation + 1, {}); + } - jacobianFunctions[equation][variable] = jacobianFunction; + if (jacobianFunctions[equation].size() <= variable) { + jacobianFunctions[equation].resize(variable + 1, nullptr); } - bool KINSOLInstance::initialize() - { - assert(!initialized && "KINSOL instance has already been initialized"); + jacobianFunctions[equation][variable] = jacobianFunction; +} - if (marco::runtime::simulation::getOptions().debug) { - std::cerr << "[KINSOL] Performing initialization" << std::endl; - } +bool KINSOLInstance::initialize() { + assert(!initialized && "KINSOL instance has already been initialized"); - // Compute the number of scalar variables. - scalarVariablesNumber = 0; + if (marco::runtime::simulation::getOptions().debug) { + std::cerr << "[KINSOL] Performing initialization" << std::endl; + } - for (Variable var = 0, e = getNumOfArrayVariables(); var < e; ++var) { - scalarVariablesNumber += getVariableFlatSize(var); - } + // Compute the number of scalar variables. + scalarVariablesNumber = 0; - // Compute the number of scalar equations. - scalarEquationsNumber = 0; + for (Variable var = 0, e = getNumOfArrayVariables(); var < e; ++var) { + scalarVariablesNumber += getVariableFlatSize(var); + } - for (Equation eq = 0, e = getNumOfVectorizedEquations(); eq < e; ++eq) { - scalarEquationsNumber += getEquationFlatSize(eq); - } + // Compute the number of scalar equations. + scalarEquationsNumber = 0; - assert(getNumOfScalarVariables() == getNumOfScalarEquations() && - "Unbalanced system"); + for (Equation eq = 0, e = getNumOfVectorizedEquations(); eq < e; ++eq) { + scalarEquationsNumber += getEquationFlatSize(eq); + } - if (scalarEquationsNumber == 0) { - // KINSOL has nothing to solve. - initialized = true; - return true; - } + assert(getNumOfScalarVariables() == getNumOfScalarEquations() && + "Unbalanced system"); + + if (scalarEquationsNumber == 0) { + // KINSOL has nothing to solve. + initialized = true; + return true; + } #if SUNDIALS_VERSION_MAJOR >= 6 - // Create the SUNDIALS context. - if (SUNContext_Create(nullptr, &ctx) != 0) { - return false; - } + // Create the SUNDIALS context. + if (SUNContext_Create(nullptr, &ctx) != 0) { + return false; + } #endif - // Create and initialize the variables vector. + // Create and initialize the variables vector. #if SUNDIALS_VERSION_MAJOR >= 6 - variablesVector = N_VNew_Serial( - static_cast(scalarVariablesNumber), ctx); + variablesVector = + N_VNew_Serial(static_cast(scalarVariablesNumber), ctx); #else - variablesVector = N_VNew_Serial( - static_cast(scalarVariablesNumber)); + variablesVector = + N_VNew_Serial(static_cast(scalarVariablesNumber)); #endif - assert(checkAllocation( - static_cast(variablesVector), "N_VNew_Serial")); + assert( + checkAllocation(static_cast(variablesVector), "N_VNew_Serial")); - for (uint64_t i = 0; i < scalarVariablesNumber; ++i) { - N_VGetArrayPointer(variablesVector)[i] = 0; - } + for (uint64_t i = 0; i < scalarVariablesNumber; ++i) { + N_VGetArrayPointer(variablesVector)[i] = 0; + } - // Create and initialize the tolerances vector. + // Create and initialize the tolerances vector. #if SUNDIALS_VERSION_MAJOR >= 6 - tolerancesVector = N_VNew_Serial( - static_cast(scalarVariablesNumber), ctx); + tolerancesVector = + N_VNew_Serial(static_cast(scalarVariablesNumber), ctx); #else - tolerancesVector = N_VNew_Serial( - static_cast(scalarVariablesNumber)); + tolerancesVector = + N_VNew_Serial(static_cast(scalarVariablesNumber)); #endif - assert(checkAllocation( - static_cast(tolerancesVector), "N_VNew_Serial")); + assert( + checkAllocation(static_cast(tolerancesVector), "N_VNew_Serial")); - for (Variable var = 0; var < getNumOfArrayVariables(); ++var) { - uint64_t arrayOffset = variableOffsets[var]; - uint64_t flatSize = getVariableFlatSize(var); + for (Variable var = 0; var < getNumOfArrayVariables(); ++var) { + uint64_t arrayOffset = variableOffsets[var]; + uint64_t flatSize = getVariableFlatSize(var); - for (uint64_t scalarOffset = 0; scalarOffset < flatSize; ++scalarOffset) { - uint64_t offset = arrayOffset + scalarOffset; + for (uint64_t scalarOffset = 0; scalarOffset < flatSize; ++scalarOffset) { + uint64_t offset = arrayOffset + scalarOffset; - N_VGetArrayPointer(tolerancesVector)[offset] = std::min( - getOptions().maxAlgebraicAbsoluteTolerance, - getOptions().absoluteTolerance); - } + N_VGetArrayPointer(tolerancesVector)[offset] = + std::min(getOptions().maxAlgebraicAbsoluteTolerance, + getOptions().absoluteTolerance); } + } #if SUNDIALS_VERSION_MAJOR >= 6 - variableScaleVector = N_VNew_Serial( - static_cast(scalarVariablesNumber), ctx); + variableScaleVector = + N_VNew_Serial(static_cast(scalarVariablesNumber), ctx); #else - variableScaleVector = N_VNew_Serial( - static_cast(scalarVariablesNumber)); + variableScaleVector = + N_VNew_Serial(static_cast(scalarVariablesNumber)); #endif #if SUNDIALS_VERSION_MAJOR >= 6 - residualScaleVector = N_VNew_Serial( - static_cast(scalarVariablesNumber), ctx); + residualScaleVector = + N_VNew_Serial(static_cast(scalarVariablesNumber), ctx); #else - residualScaleVector = N_VNew_Serial( - static_cast(scalarVariablesNumber)); + residualScaleVector = + N_VNew_Serial(static_cast(scalarVariablesNumber)); #endif - for (uint64_t i = 0; i < scalarVariablesNumber; ++i) { - N_VGetArrayPointer(variableScaleVector)[i] = 1; - N_VGetArrayPointer(residualScaleVector)[i] = 1; - } - - // Determine the order in which the equations must be processed when - // computing residuals and jacobians. - assert(getNumOfVectorizedEquations() == writeAccesses.size()); - equationsProcessingOrder.resize(getNumOfVectorizedEquations()); + for (uint64_t i = 0; i < scalarVariablesNumber; ++i) { + N_VGetArrayPointer(variableScaleVector)[i] = 1; + N_VGetArrayPointer(residualScaleVector)[i] = 1; + } - for (size_t i = 0, e = getNumOfVectorizedEquations(); i < e; ++i) { - equationsProcessingOrder[i] = i; - } + // Determine the order in which the equations must be processed when + // computing residuals and jacobians. + assert(getNumOfVectorizedEquations() == writeAccesses.size()); + equationsProcessingOrder.resize(getNumOfVectorizedEquations()); - if (marco::runtime::simulation::getOptions().debug) { - std::cerr << "[KINSOL] Equations processing order: ["; + for (size_t i = 0, e = getNumOfVectorizedEquations(); i < e; ++i) { + equationsProcessingOrder[i] = i; + } - for (size_t i = 0, e = equationsProcessingOrder.size(); i < e; ++i) { - if (i != 0) { - std::cerr << ", "; - } + if (marco::runtime::simulation::getOptions().debug) { + std::cerr << "[KINSOL] Equations processing order: ["; - std::cerr << equationsProcessingOrder[i]; + for (size_t i = 0, e = equationsProcessingOrder.size(); i < e; ++i) { + if (i != 0) { + std::cerr << ", "; } - std::cerr << "]" << std::endl; + std::cerr << equationsProcessingOrder[i]; } - // Check that all the residual functions have been set. - assert(residualFunctions.size() == getNumOfVectorizedEquations()); - - assert(std::all_of( - residualFunctions.begin(), residualFunctions.end(), - [](const ResidualFunction& function) { - return function != nullptr; - })); - - // Check if the KINSOL instance is not informed about the accesses that all - // the jacobian functions have been set. - assert(precomputedAccesses || - jacobianFunctions.size() == getNumOfVectorizedEquations()); - - assert(precomputedAccesses || - std::all_of( - jacobianFunctions.begin(), jacobianFunctions.end(), - [&](std::vector functions) { - if (functions.size() != variableGetters.size()) { - return false; - } - - return std::all_of( - functions.begin(), functions.end(), - [](const JacobianFunction& function) { - return function != nullptr; - }); - })); - - // Check that all the getters and setters have been set. - assert(std::none_of( - variableGetters.begin(), variableGetters.end(), - [](VariableGetter getter) { - return getter == nullptr; - }) && "Not all the variable getters have been set"); - - assert(std::none_of( - variableSetters.begin(), variableSetters.end(), - [](VariableSetter setter) { - return setter == nullptr; - }) && "Not all the variable setters have been set"); - - // Reserve the space for data of the jacobian matrix. - if (marco::runtime::simulation::getOptions().debug) { - std::cerr << "[KINSOL] Reserving space for the data of the Jacobian matrix" - << std::endl; - } + std::cerr << "]" << std::endl; + } - jacobianMatrixData.resize(scalarEquationsNumber); + // Check that all the residual functions have been set. + assert(residualFunctions.size() == getNumOfVectorizedEquations()); + + assert(std::all_of( + residualFunctions.begin(), residualFunctions.end(), + [](const ResidualFunction &function) { return function != nullptr; })); + + // Check if the KINSOL instance is not informed about the accesses that all + // the jacobian functions have been set. + assert(precomputedAccesses || + jacobianFunctions.size() == getNumOfVectorizedEquations()); + + assert(precomputedAccesses || + std::all_of(jacobianFunctions.begin(), jacobianFunctions.end(), + [&](std::vector functions) { + if (functions.size() != variableGetters.size()) { + return false; + } + + return std::all_of(functions.begin(), functions.end(), + [](const JacobianFunction &function) { + return function != nullptr; + }); + })); + + // Check that all the getters and setters have been set. + assert( + std::none_of(variableGetters.begin(), variableGetters.end(), + [](VariableGetter getter) { return getter == nullptr; }) && + "Not all the variable getters have been set"); + + assert( + std::none_of(variableSetters.begin(), variableSetters.end(), + [](VariableSetter setter) { return setter == nullptr; }) && + "Not all the variable setters have been set"); + + // Reserve the space for data of the jacobian matrix. + if (marco::runtime::simulation::getOptions().debug) { + std::cerr << "[KINSOL] Reserving space for the data of the Jacobian matrix" + << std::endl; + } - for (Equation eq : equationsProcessingOrder) { - std::vector equationIndices; - getEquationBeginIndices(eq, equationIndices); + jacobianMatrixData.resize(scalarEquationsNumber); - Variable writtenVariable = getWrittenVariable(eq); + for (Equation eq : equationsProcessingOrder) { + std::vector equationIndices; + getEquationBeginIndices(eq, equationIndices); - uint64_t writtenVariableRank = getVariableRank(writtenVariable); - uint64_t writtenVariableArrayOffset = variableOffsets[writtenVariable]; + Variable writtenVariable = getWrittenVariable(eq); - do { - std::vector writtenVariableIndices; - writtenVariableIndices.resize(writtenVariableRank, 0); + uint64_t writtenVariableRank = getVariableRank(writtenVariable); + uint64_t writtenVariableArrayOffset = variableOffsets[writtenVariable]; - AccessFunction writeAccessFunction = getWriteAccessFunction(eq); + do { + std::vector writtenVariableIndices; + writtenVariableIndices.resize(writtenVariableRank, 0); - writeAccessFunction( - equationIndices.data(), - writtenVariableIndices.data()); + AccessFunction writeAccessFunction = getWriteAccessFunction(eq); - if (marco::runtime::simulation::getOptions().debug) { - std::cerr << " Variable indices: "; - printIndices(writtenVariableIndices); - std::cerr << std::endl; - } + writeAccessFunction(equationIndices.data(), + writtenVariableIndices.data()); + + if (marco::runtime::simulation::getOptions().debug) { + std::cerr << " Variable indices: "; + printIndices(writtenVariableIndices); + std::cerr << std::endl; + } - uint64_t equationScalarVariableOffset = getVariableFlatIndex( - variablesDimensions[writtenVariable], - writtenVariableIndices); + uint64_t equationScalarVariableOffset = getVariableFlatIndex( + variablesDimensions[writtenVariable], writtenVariableIndices); - uint64_t scalarEquationIndex = - writtenVariableArrayOffset + equationScalarVariableOffset; + uint64_t scalarEquationIndex = + writtenVariableArrayOffset + equationScalarVariableOffset; - // Compute the column indexes that may be non-zeros. - std::vector jacobianColumns = - computeJacobianColumns(eq, equationIndices.data()); + // Compute the column indexes that may be non-zeros. + std::vector jacobianColumns = + computeJacobianColumns(eq, equationIndices.data()); - jacobianMatrixData[scalarEquationIndex].resize(jacobianColumns.size()); + jacobianMatrixData[scalarEquationIndex].resize(jacobianColumns.size()); - if (marco::runtime::simulation::getOptions().debug) { - std::cerr << " - Equation " << eq << std::endl; - std::cerr << " Equation indices: "; - printIndices(equationIndices); - std::cerr << std::endl; + if (marco::runtime::simulation::getOptions().debug) { + std::cerr << " - Equation " << eq << std::endl; + std::cerr << " Equation indices: "; + printIndices(equationIndices); + std::cerr << std::endl; - std::cerr << " Variable indices: "; - printIndices(writtenVariableIndices); - std::cerr << std::endl; + std::cerr << " Variable indices: "; + printIndices(writtenVariableIndices); + std::cerr << std::endl; - std::cerr << " Scalar equation index: " << scalarEquationIndex - << std::endl; + std::cerr << " Scalar equation index: " << scalarEquationIndex + << std::endl; - std::cerr << " Number of possibly non-zero columns: " - << jacobianColumns.size() << std::endl; - } - } while (advanceEquationIndices(equationIndices, equationRanges[eq])); - } + std::cerr << " Number of possibly non-zero columns: " + << jacobianColumns.size() << std::endl; + } + } while (advanceEquationIndices(equationIndices, equationRanges[eq])); + } - // Compute the total amount of non-zero values in the Jacobian Matrix. - computeNNZ(); + // Compute the total amount of non-zero values in the Jacobian Matrix. + computeNNZ(); - // Compute the equation chunks for each thread. - computeResidualThreadChunks(); + // Compute the equation chunks for each thread. + computeResidualThreadChunks(); - // Create and initialize the memory for KINSOL. + // Create and initialize the memory for KINSOL. #if SUNDIALS_VERSION_MAJOR >= 6 - kinsolMemory = KINCreate(ctx); + kinsolMemory = KINCreate(ctx); #else - kinsolMemory = KINCreate(); + kinsolMemory = KINCreate(); #endif - if (!checkAllocation(kinsolMemory, "KINCreate")) { - return false; - } + if (!checkAllocation(kinsolMemory, "KINCreate")) { + return false; + } - if (!kinsolInit()) { - return false; - } + if (!kinsolInit()) { + return false; + } - // Create sparse SUNMatrix for use in linear solver. + // Create sparse SUNMatrix for use in linear solver. #if SUNDIALS_VERSION_MAJOR >= 6 - sparseMatrix = SUNSparseMatrix( - static_cast(scalarEquationsNumber), - static_cast(scalarEquationsNumber), - static_cast(nonZeroValuesNumber), - CSR_MAT, - ctx); + sparseMatrix = SUNSparseMatrix( + static_cast(scalarEquationsNumber), + static_cast(scalarEquationsNumber), + static_cast(nonZeroValuesNumber), CSR_MAT, ctx); #else - sparseMatrix = SUNSparseMatrix( - static_cast(scalarEquationsNumber), - static_cast(scalarEquationsNumber), - static_cast(nonZeroValuesNumber), - CSR_MAT); + sparseMatrix = + SUNSparseMatrix(static_cast(scalarEquationsNumber), + static_cast(scalarEquationsNumber), + static_cast(nonZeroValuesNumber), CSR_MAT); #endif - if (!checkAllocation( - static_cast(sparseMatrix), "SUNSparseMatrix")) { - return false; - } + if (!checkAllocation(static_cast(sparseMatrix), "SUNSparseMatrix")) { + return false; + } - // Create and attach a KLU SUNLinearSolver object. + // Create and attach a KLU SUNLinearSolver object. #if SUNDIALS_VERSION_MAJOR >= 6 - linearSolver = SUNLinSol_KLU(variablesVector, sparseMatrix, ctx); + linearSolver = SUNLinSol_KLU(variablesVector, sparseMatrix, ctx); #else - linearSolver = SUNLinSol_KLU(variablesVector, sparseMatrix); + linearSolver = SUNLinSol_KLU(variablesVector, sparseMatrix); #endif - if (!checkAllocation(static_cast(linearSolver), "SUNLinSol_KLU")) { - return false; - } - - if (!kinsolSetLinearSolver()) { - return false; - } - - if (!kinsolSetUserData() || - !kinsolSetJacobianFunction()) { - return false; - } - - initialized = true; - - if (marco::runtime::simulation::getOptions().debug) { - std::cerr << "[KINSOL] Initialization completed" << std::endl; - } - - return true; + if (!checkAllocation(static_cast(linearSolver), "SUNLinSol_KLU")) { + return false; } - bool KINSOLInstance::solve() - { - if (!initialized) { - if (!initialize()) { - return false; - } - } + if (!kinsolSetLinearSolver()) { + return false; + } - if (marco::runtime::simulation::getOptions().debug) { - std::cerr << "[KINSOL] Computing solution" << std::endl; - } + if (!kinsolSetUserData() || !kinsolSetJacobianFunction()) { + return false; + } - if (getNumOfScalarEquations() == 0) { - // KINSOL has nothing to solve. - return true; - } + initialized = true; - // Update the values of the variables living inside KINSOL. - copyVariablesFromMARCO(variablesVector); + if (marco::runtime::simulation::getOptions().debug) { + std::cerr << "[KINSOL] Initialization completed" << std::endl; + } - auto solveRetVal = KINSol( - kinsolMemory, variablesVector, KIN_LINESEARCH, - variableScaleVector, residualScaleVector); + return true; +} - if (solveRetVal != KIN_SUCCESS) { - // TODO handle errors +bool KINSOLInstance::solve() { + if (!initialized) { + if (!initialize()) { return false; } + } - copyVariablesIntoMARCO(variablesVector); + if (marco::runtime::simulation::getOptions().debug) { + std::cerr << "[KINSOL] Computing solution" << std::endl; + } + if (getNumOfScalarEquations() == 0) { + // KINSOL has nothing to solve. return true; } - int KINSOLInstance::residualFunction( - N_Vector variables, - N_Vector residuals, - void* userData) - { - KINSOL_PROFILER_RESIDUALS_CALL_COUNTER_INCREMENT; + // Update the values of the variables living inside KINSOL. + copyVariablesFromMARCO(variablesVector); - realtype* rval = N_VGetArrayPointer(residuals); - auto* instance = static_cast(userData); + auto solveRetVal = KINSol(kinsolMemory, variablesVector, KIN_LINESEARCH, + variableScaleVector, residualScaleVector); - // Copy the values of the variables and derivatives provided by KINSOL into - // the variables owned by MARCO, so that the residual functions operate on - // the current iteration values. - instance->copyVariablesIntoMARCO(variables); + if (solveRetVal != KIN_SUCCESS) { + // TODO handle errors + return false; + } - // For every vectorized equation, set the residual values of the variables - // it writes into. - KINSOL_PROFILER_RESIDUALS_START; + copyVariablesIntoMARCO(variablesVector); - instance->residualsParallelIteration( - [&](Equation eq, const std::vector& equationIndices) { - uint64_t equationRank = instance->getEquationRank(eq); - assert(equationIndices.size() == equationRank); + return true; +} - Variable writtenVariable = instance->getWrittenVariable(eq); +int KINSOLInstance::residualFunction(N_Vector variables, N_Vector residuals, + void *userData) { + KINSOL_PROFILER_RESIDUALS_CALL_COUNTER_INCREMENT; - uint64_t writtenVariableArrayOffset = - instance->variableOffsets[writtenVariable]; + realtype *rval = N_VGetArrayPointer(residuals); + auto *instance = static_cast(userData); - uint64_t writtenVariableRank = - instance->getVariableRank(writtenVariable); + // Copy the values of the variables and derivatives provided by KINSOL into + // the variables owned by MARCO, so that the residual functions operate on + // the current iteration values. + instance->copyVariablesIntoMARCO(variables); - std::vector writtenVariableIndices(writtenVariableRank, 0); + // For every vectorized equation, set the residual values of the variables + // it writes into. + KINSOL_PROFILER_RESIDUALS_START; - AccessFunction writeAccessFunction = - instance->getWriteAccessFunction(eq); + instance->residualsParallelIteration( + [&](Equation eq, const std::vector &equationIndices) { + uint64_t equationRank = instance->getEquationRank(eq); + assert(equationIndices.size() == equationRank); - writeAccessFunction( - equationIndices.data(), - writtenVariableIndices.data()); + Variable writtenVariable = instance->getWrittenVariable(eq); - uint64_t writtenVariableScalarOffset = getVariableFlatIndex( - instance->variablesDimensions[writtenVariable], - writtenVariableIndices); + uint64_t writtenVariableArrayOffset = + instance->variableOffsets[writtenVariable]; - uint64_t offset = - writtenVariableArrayOffset + writtenVariableScalarOffset; + uint64_t writtenVariableRank = + instance->getVariableRank(writtenVariable); - auto residualFn = instance->residualFunctions[eq]; - auto* eqIndicesPtr = equationIndices.data(); + std::vector writtenVariableIndices(writtenVariableRank, 0); - auto residualFunctionResult = residualFn(eqIndicesPtr); - *(rval + offset) = residualFunctionResult; - }); + AccessFunction writeAccessFunction = + instance->getWriteAccessFunction(eq); - KINSOL_PROFILER_RESIDUALS_STOP; + writeAccessFunction(equationIndices.data(), + writtenVariableIndices.data()); - if (marco::runtime::simulation::getOptions().debug) { - std::cerr << "[KINSOL] Residuals function called" << std::endl; - std::cerr << "Variables:" << std::endl; - instance->printVariablesVector(variables); - std::cerr << "Residuals vector:" << std::endl; - instance->printResidualsVector(residuals); - } + uint64_t writtenVariableScalarOffset = + getVariableFlatIndex(instance->variablesDimensions[writtenVariable], + writtenVariableIndices); - return KIN_SUCCESS; - } + uint64_t offset = + writtenVariableArrayOffset + writtenVariableScalarOffset; - int KINSOLInstance::jacobianMatrix( - N_Vector variables, N_Vector residuals, - SUNMatrix jacobianMatrix, - void* userData, - N_Vector tempv1, N_Vector tempv2) - { - KINSOL_PROFILER_PARTIAL_DERIVATIVES_CALL_COUNTER_INCREMENT; + auto residualFn = instance->residualFunctions[eq]; + auto *eqIndicesPtr = equationIndices.data(); - realtype* jacobian = SUNSparseMatrix_Data(jacobianMatrix); - auto* instance = static_cast(userData); + auto residualFunctionResult = residualFn(eqIndicesPtr); + *(rval + offset) = residualFunctionResult; + }); - // Copy the values of the variables and derivatives provided by KINSOL into - // the variables owned by MARCO, so that the jacobian functions operate on - // the current iteration values. - instance->copyVariablesIntoMARCO(variables); + KINSOL_PROFILER_RESIDUALS_STOP; - KINSOL_PROFILER_PARTIAL_DERIVATIVES_START; + if (marco::runtime::simulation::getOptions().debug) { + std::cerr << "[KINSOL] Residuals function called" << std::endl; + std::cerr << "Variables:" << std::endl; + instance->printVariablesVector(variables); + std::cerr << "Residuals vector:" << std::endl; + instance->printResidualsVector(residuals); + } - unsigned int numOfThreads = instance->threadPool.getNumOfThreads(); + return KIN_SUCCESS; +} - std::atomic_size_t currentEquation = 0; - uint64_t numOfVectorizedEquations = instance->getNumOfVectorizedEquations(); +int KINSOLInstance::jacobianMatrix(N_Vector variables, N_Vector residuals, + SUNMatrix jacobianMatrix, void *userData, + N_Vector tempv1, N_Vector tempv2) { + KINSOL_PROFILER_PARTIAL_DERIVATIVES_CALL_COUNTER_INCREMENT; - for (unsigned int thread = 0; thread < numOfThreads; ++thread) { - instance->threadPool.async([&]() { - size_t equationIndex = 0; - Equation equation; - std::vector equationIndices; + realtype *jacobian = SUNSparseMatrix_Data(jacobianMatrix); + auto *instance = static_cast(userData); - while ((equationIndex = currentEquation++) < numOfVectorizedEquations) { - equation = instance->equationsProcessingOrder[equationIndex]; - instance->getEquationBeginIndices(equation, equationIndices); - Variable writtenVariable = instance->getWrittenVariable(equation); + // Copy the values of the variables and derivatives provided by KINSOL into + // the variables owned by MARCO, so that the jacobian functions operate on + // the current iteration values. + instance->copyVariablesIntoMARCO(variables); - uint64_t writtenVariableArrayOffset = - instance->variableOffsets[writtenVariable]; + KINSOL_PROFILER_PARTIAL_DERIVATIVES_START; - uint64_t writtenVariableRank = - instance->getVariableRank(writtenVariable); + unsigned int numOfThreads = instance->threadPool.getNumOfThreads(); - std::vector writtenVariableIndices; - writtenVariableIndices.resize(writtenVariableRank, 0); + std::atomic_size_t currentEquation = 0; + uint64_t numOfVectorizedEquations = instance->getNumOfVectorizedEquations(); - AccessFunction writeAccessFunction = - instance->getWriteAccessFunction(equation); + for (unsigned int thread = 0; thread < numOfThreads; ++thread) { + instance->threadPool.async([&]() { + size_t equationIndex = 0; + Equation equation; + std::vector equationIndices; - do { - writeAccessFunction(equationIndices.data(), - writtenVariableIndices.data()); + while ((equationIndex = currentEquation++) < numOfVectorizedEquations) { + equation = instance->equationsProcessingOrder[equationIndex]; + instance->getEquationBeginIndices(equation, equationIndices); + Variable writtenVariable = instance->getWrittenVariable(equation); - uint64_t writtenVariableScalarOffset = getVariableFlatIndex( - instance->variablesDimensions[writtenVariable], - writtenVariableIndices); + uint64_t writtenVariableArrayOffset = + instance->variableOffsets[writtenVariable]; - uint64_t scalarEquationIndex = - writtenVariableArrayOffset + writtenVariableScalarOffset; + uint64_t writtenVariableRank = + instance->getVariableRank(writtenVariable); - assert(scalarEquationIndex < instance->getNumOfScalarEquations()); + std::vector writtenVariableIndices; + writtenVariableIndices.resize(writtenVariableRank, 0); - // Compute the column indices that may be non-zero. - std::vector jacobianColumns = - instance->computeJacobianColumns(equation, - equationIndices.data()); + AccessFunction writeAccessFunction = + instance->getWriteAccessFunction(equation); - // For every scalar variable with respect to which the equation must - // be partially differentiated. - for (size_t i = 0, e = jacobianColumns.size(); i < e; ++i) { - const JacobianColumn &column = jacobianColumns[i]; - Variable variable = column.first; - const auto &variableIndices = column.second; + do { + writeAccessFunction(equationIndices.data(), + writtenVariableIndices.data()); - uint64_t variableArrayOffset = instance->variableOffsets[variable]; + uint64_t writtenVariableScalarOffset = getVariableFlatIndex( + instance->variablesDimensions[writtenVariable], + writtenVariableIndices); - uint64_t variableScalarOffset = getVariableFlatIndex( - instance->variablesDimensions[variable], column.second); + uint64_t scalarEquationIndex = + writtenVariableArrayOffset + writtenVariableScalarOffset; - assert(instance->jacobianFunctions[equation][variable] != nullptr); + assert(scalarEquationIndex < instance->getNumOfScalarEquations()); - auto jacobianFunctionResult = - instance->jacobianFunctions[equation][variable]( - equationIndices.data(), variableIndices.data()); + // Compute the column indices that may be non-zero. + std::vector jacobianColumns = + instance->computeJacobianColumns(equation, + equationIndices.data()); - instance->jacobianMatrixData[scalarEquationIndex][i].second = - jacobianFunctionResult; + // For every scalar variable with respect to which the equation must + // be partially differentiated. + for (size_t i = 0, e = jacobianColumns.size(); i < e; ++i) { + const JacobianColumn &column = jacobianColumns[i]; + Variable variable = column.first; + const auto &variableIndices = column.second; - auto index = static_cast(variableArrayOffset + - variableScalarOffset); + uint64_t variableArrayOffset = instance->variableOffsets[variable]; - instance->jacobianMatrixData[scalarEquationIndex][i].first = index; - } - } while (advanceEquationIndices(equationIndices, - instance->equationRanges[equation])); - } - }); - } + uint64_t variableScalarOffset = getVariableFlatIndex( + instance->variablesDimensions[variable], column.second); - instance->threadPool.wait(); + assert(instance->jacobianFunctions[equation][variable] != nullptr); - sunindextype* rowPtrs = SUNSparseMatrix_IndexPointers(jacobianMatrix); - sunindextype* columnIndices = SUNSparseMatrix_IndexValues(jacobianMatrix); + auto jacobianFunctionResult = + instance->jacobianFunctions[equation][variable]( + equationIndices.data(), variableIndices.data()); - sunindextype offset = 0; - *rowPtrs++ = offset; + instance->jacobianMatrixData[scalarEquationIndex][i].second = + jacobianFunctionResult; - for (const auto& row : instance->jacobianMatrixData) { - offset += static_cast(row.size()); - *rowPtrs++ = offset; + auto index = static_cast(variableArrayOffset + + variableScalarOffset); - for (const auto& column : row) { - *columnIndices++ = column.first; - *jacobian++ = column.second; + instance->jacobianMatrixData[scalarEquationIndex][i].first = index; + } + } while (advanceEquationIndices(equationIndices, + instance->equationRanges[equation])); } - } + }); + } - assert(rowPtrs == SUNSparseMatrix_IndexPointers(jacobianMatrix) + instance->getNumOfScalarEquations() + 1); - assert(columnIndices == SUNSparseMatrix_IndexValues(jacobianMatrix) + instance->nonZeroValuesNumber); - assert(jacobian == SUNSparseMatrix_Data(jacobianMatrix) + instance->nonZeroValuesNumber); + instance->threadPool.wait(); - KINSOL_PROFILER_PARTIAL_DERIVATIVES_STOP; + sunindextype *rowPtrs = SUNSparseMatrix_IndexPointers(jacobianMatrix); + sunindextype *columnIndices = SUNSparseMatrix_IndexValues(jacobianMatrix); - if (marco::runtime::simulation::getOptions().debug) { - std::cerr << "[KINSOL] Jacobian matrix function called" << std::endl; - std::cerr << "Variables:" << std::endl; - instance->printVariablesVector(variables); - std::cerr << "Residuals vector:" << std::endl; - instance->printResidualsVector(residuals); - std::cerr << "Jacobian matrix:" << std::endl; - instance->printJacobianMatrix(jacobianMatrix); - } + sunindextype offset = 0; + *rowPtrs++ = offset; - return KIN_SUCCESS; - } + for (const auto &row : instance->jacobianMatrixData) { + offset += static_cast(row.size()); + *rowPtrs++ = offset; - uint64_t KINSOLInstance::getNumOfArrayVariables() const - { - return variablesDimensions.size(); + for (const auto &column : row) { + *columnIndices++ = column.first; + *jacobian++ = column.second; + } } - uint64_t KINSOLInstance::getNumOfScalarVariables() const - { - return scalarVariablesNumber; + assert(rowPtrs == SUNSparseMatrix_IndexPointers(jacobianMatrix) + + instance->getNumOfScalarEquations() + 1); + assert(columnIndices == SUNSparseMatrix_IndexValues(jacobianMatrix) + + instance->nonZeroValuesNumber); + assert(jacobian == + SUNSparseMatrix_Data(jacobianMatrix) + instance->nonZeroValuesNumber); + + KINSOL_PROFILER_PARTIAL_DERIVATIVES_STOP; + + if (marco::runtime::simulation::getOptions().debug) { + std::cerr << "[KINSOL] Jacobian matrix function called" << std::endl; + std::cerr << "Variables:" << std::endl; + instance->printVariablesVector(variables); + std::cerr << "Residuals vector:" << std::endl; + instance->printResidualsVector(residuals); + std::cerr << "Jacobian matrix:" << std::endl; + instance->printJacobianMatrix(jacobianMatrix); } - uint64_t KINSOLInstance::getVariableFlatSize(Variable variable) const - { - uint64_t result = 1; + return KIN_SUCCESS; +} - for (uint64_t dimension : variablesDimensions[variable]) { - result *= dimension; - } +uint64_t KINSOLInstance::getNumOfArrayVariables() const { + return variablesDimensions.size(); +} - return result; - } +uint64_t KINSOLInstance::getNumOfScalarVariables() const { + return scalarVariablesNumber; +} - uint64_t KINSOLInstance::getNumOfVectorizedEquations() const - { - return equationRanges.size(); - } +uint64_t KINSOLInstance::getVariableFlatSize(Variable variable) const { + uint64_t result = 1; - uint64_t KINSOLInstance::getNumOfScalarEquations() const - { - return scalarEquationsNumber; + for (uint64_t dimension : variablesDimensions[variable]) { + result *= dimension; } - uint64_t KINSOLInstance::getEquationRank(Equation equation) const - { - return equationRanges[equation].size(); - } + return result; +} - uint64_t KINSOLInstance::getEquationFlatSize(Equation equation) const - { - assert(equation < getNumOfVectorizedEquations()); - uint64_t result = 1; +uint64_t KINSOLInstance::getNumOfVectorizedEquations() const { + return equationRanges.size(); +} - for (const Range& range : equationRanges[equation]) { - result *= range.end - range.begin; - } +uint64_t KINSOLInstance::getNumOfScalarEquations() const { + return scalarEquationsNumber; +} - return result; - } +uint64_t KINSOLInstance::getEquationRank(Equation equation) const { + return equationRanges[equation].size(); +} - Variable KINSOLInstance::getWrittenVariable(Equation equation) const - { - return writeAccesses[equation].first; - } +uint64_t KINSOLInstance::getEquationFlatSize(Equation equation) const { + assert(equation < getNumOfVectorizedEquations()); + uint64_t result = 1; - AccessFunction KINSOLInstance::getWriteAccessFunction(Equation equation) const - { - return writeAccesses[equation].second; + for (const Range &range : equationRanges[equation]) { + result *= range.end - range.begin; } - uint64_t KINSOLInstance::getVariableRank(Variable variable) const - { - return variablesDimensions[variable].rank(); - } + return result; +} - /// Determine which of the columns of the current Jacobian row has to be - /// populated, and with respect to which variable the partial derivative has - /// to be performed. The row is determined by the indices of the equation. - std::vector KINSOLInstance::computeJacobianColumns( - Equation eq, const int64_t* equationIndices) const - { - std::set uniqueColumns; +Variable KINSOLInstance::getWrittenVariable(Equation equation) const { + return writeAccesses[equation].first; +} - if (precomputedAccesses) { - for (const auto& access : variableAccesses[eq]) { - Variable variable = access.first; - AccessFunction accessFunction = access.second; +AccessFunction KINSOLInstance::getWriteAccessFunction(Equation equation) const { + return writeAccesses[equation].second; +} - uint64_t variableRank = getVariableRank(variable); +uint64_t KINSOLInstance::getVariableRank(Variable variable) const { + return variablesDimensions[variable].rank(); +} - std::vector variableIndices; - variableIndices.resize(variableRank, 0); - accessFunction(equationIndices, variableIndices.data()); +/// Determine which of the columns of the current Jacobian row has to be +/// populated, and with respect to which variable the partial derivative has +/// to be performed. The row is determined by the indices of the equation. +std::vector +KINSOLInstance::computeJacobianColumns(Equation eq, + const int64_t *equationIndices) const { + std::set uniqueColumns; - assert([&]() -> bool { - for (uint64_t i = 0; i < variableRank; ++i) { - if (variableIndices[i] >= variablesDimensions[variable][i]) { - return false; - } - } + if (precomputedAccesses) { + for (const auto &access : variableAccesses[eq]) { + Variable variable = access.first; + AccessFunction accessFunction = access.second; - return true; - }() && "Access out of bounds"); + uint64_t variableRank = getVariableRank(variable); - uniqueColumns.insert({variable, variableIndices}); - } - } else { - for (size_t variableIndex = 0, e = getNumOfArrayVariables(); - variableIndex < e; ++variableIndex) { - const auto& dimensions = variablesDimensions[variableIndex]; - - for (auto indices = dimensions.indicesBegin(), - end = dimensions.indicesEnd(); - indices != end; ++indices) { - JacobianColumn column(variableIndex, {}); - - for (size_t dim = 0; dim < dimensions.rank(); ++dim) { - column.second.push_back((*indices)[dim]); - } + std::vector variableIndices; + variableIndices.resize(variableRank, 0); + accessFunction(equationIndices, variableIndices.data()); - uniqueColumns.insert(std::move(column)); + assert([&]() -> bool { + for (uint64_t i = 0; i < variableRank; ++i) { + if (variableIndices[i] >= variablesDimensions[variable][i]) { + return false; + } } - } - } - std::vector orderedColumns; + return true; + }() && "Access out of bounds"); - for (const JacobianColumn& column : uniqueColumns) { - orderedColumns.push_back(column); + uniqueColumns.insert({variable, variableIndices}); } + } else { + for (size_t variableIndex = 0, e = getNumOfArrayVariables(); + variableIndex < e; ++variableIndex) { + const auto &dimensions = variablesDimensions[variableIndex]; - std::sort(orderedColumns.begin(), orderedColumns.end(), - [](const JacobianColumn& first, const JacobianColumn& second) { - if (first.first != second.first) { - return first.first < second.first; - } + for (auto indices = dimensions.indicesBegin(), + end = dimensions.indicesEnd(); + indices != end; ++indices) { + JacobianColumn column(variableIndex, {}); - assert(first.second.size() == second.second.size()); + for (size_t dim = 0; dim < dimensions.rank(); ++dim) { + column.second.push_back((*indices)[dim]); + } - for (size_t i = 0, e = first.second.size(); i < e; ++i) { - if (first.second[i] < second.second[i]) { - return true; - } - } + uniqueColumns.insert(std::move(column)); + } + } + } - return false; - }); + std::vector orderedColumns; - return orderedColumns; + for (const JacobianColumn &column : uniqueColumns) { + orderedColumns.push_back(column); } - /// Compute the number of non-zero values in the Jacobian Matrix. Also - /// compute the column indexes of all non-zero values in the Jacobian Matrix. - /// This allows to avoid the recomputation of such indexes during the - /// Jacobian evaluation. - void KINSOLInstance::computeNNZ() - { - nonZeroValuesNumber = 0; - std::vector equationIndices; + std::sort(orderedColumns.begin(), orderedColumns.end(), + [](const JacobianColumn &first, const JacobianColumn &second) { + if (first.first != second.first) { + return first.first < second.first; + } - for (size_t eq = 0; eq < getNumOfVectorizedEquations(); ++eq) { - // Initialize the multidimensional interval of the vector equation. - uint64_t equationRank = equationRanges[eq].size(); - equationIndices.resize(equationRank); + assert(first.second.size() == second.second.size()); - for (size_t i = 0; i < equationRank; ++i) { - const auto& iterationRange = equationRanges[eq][i]; - int64_t beginIndex = iterationRange.begin; - equationIndices[i] = beginIndex; - } + for (size_t i = 0, e = first.second.size(); i < e; ++i) { + if (first.second[i] < second.second[i]) { + return true; + } + } - // For every scalar equation in the vector equation. - do { - // Compute the column indexes that may be non-zeros - nonZeroValuesNumber += - computeJacobianColumns(eq, equationIndices.data()).size(); + return false; + }); + + return orderedColumns; +} + +/// Compute the number of non-zero values in the Jacobian Matrix. Also +/// compute the column indexes of all non-zero values in the Jacobian Matrix. +/// This allows to avoid the recomputation of such indexes during the +/// Jacobian evaluation. +void KINSOLInstance::computeNNZ() { + nonZeroValuesNumber = 0; + std::vector equationIndices; + + for (size_t eq = 0; eq < getNumOfVectorizedEquations(); ++eq) { + // Initialize the multidimensional interval of the vector equation. + uint64_t equationRank = equationRanges[eq].size(); + equationIndices.resize(equationRank); - } while (advanceEquationIndices(equationIndices, equationRanges[eq])); + for (size_t i = 0; i < equationRank; ++i) { + const auto &iterationRange = equationRanges[eq][i]; + int64_t beginIndex = iterationRange.begin; + equationIndices[i] = beginIndex; } - } - void KINSOLInstance::computeResidualThreadChunks() - { - unsigned int numOfThreads = threadPool.getNumOfThreads(); + // For every scalar equation in the vector equation. + do { + // Compute the column indexes that may be non-zeros + nonZeroValuesNumber += + computeJacobianColumns(eq, equationIndices.data()).size(); - int64_t chunksFactor = getOptions().equationsChunksFactor; - int64_t numOfChunks = numOfThreads * chunksFactor; + } while (advanceEquationIndices(equationIndices, equationRanges[eq])); + } +} - uint64_t numOfVectorizedEquations = getNumOfVectorizedEquations(); - uint64_t numOfScalarEquations = getNumOfScalarEquations(); +void KINSOLInstance::computeResidualThreadChunks() { + unsigned int numOfThreads = threadPool.getNumOfThreads(); - size_t chunkSize = - (numOfScalarEquations + numOfChunks - 1) / numOfChunks; + int64_t chunksFactor = getOptions().equationsChunksFactor; + int64_t numOfChunks = numOfThreads * chunksFactor; - // The number of vectorized equations whose indices have been completely - // assigned. - uint64_t processedEquations = 0; + uint64_t numOfVectorizedEquations = getNumOfVectorizedEquations(); + uint64_t numOfScalarEquations = getNumOfScalarEquations(); - while (processedEquations < numOfVectorizedEquations) { - Equation equation = equationsProcessingOrder[processedEquations]; - uint64_t equationFlatSize = getEquationFlatSize(equation); - uint64_t equationFlatIndex = 0; + size_t chunkSize = (numOfScalarEquations + numOfChunks - 1) / numOfChunks; - // Divide the ranges into chunks. - while (equationFlatIndex < equationFlatSize) { - uint64_t beginFlatIndex = equationFlatIndex; + // The number of vectorized equations whose indices have been completely + // assigned. + uint64_t processedEquations = 0; - uint64_t endFlatIndex = std::min( - beginFlatIndex + static_cast(chunkSize), - equationFlatSize); + while (processedEquations < numOfVectorizedEquations) { + Equation equation = equationsProcessingOrder[processedEquations]; + uint64_t equationFlatSize = getEquationFlatSize(equation); + uint64_t equationFlatIndex = 0; - std::vector beginIndices; - std::vector endIndices; + // Divide the ranges into chunks. + while (equationFlatIndex < equationFlatSize) { + uint64_t beginFlatIndex = equationFlatIndex; - getEquationIndicesFromFlatIndex( - beginFlatIndex, beginIndices, equationRanges[equation]); + uint64_t endFlatIndex = std::min( + beginFlatIndex + static_cast(chunkSize), equationFlatSize); - if (endFlatIndex == equationFlatSize) { - getEquationEndIndices(equation, endIndices); - } else { - getEquationIndicesFromFlatIndex( - endFlatIndex, endIndices, equationRanges[equation]); - } + std::vector beginIndices; + std::vector endIndices; - residualThreadEquationsChunks.emplace_back( - equation, std::move(beginIndices), std::move(endIndices)); + getEquationIndicesFromFlatIndex(beginFlatIndex, beginIndices, + equationRanges[equation]); - // Move to the next chunk. - equationFlatIndex = endFlatIndex; + if (endFlatIndex == equationFlatSize) { + getEquationEndIndices(equation, endIndices); + } else { + getEquationIndicesFromFlatIndex(endFlatIndex, endIndices, + equationRanges[equation]); } - // Move to the next vectorized equation. - ++processedEquations; + residualThreadEquationsChunks.emplace_back( + equation, std::move(beginIndices), std::move(endIndices)); + + // Move to the next chunk. + equationFlatIndex = endFlatIndex; } + + // Move to the next vectorized equation. + ++processedEquations; } +} - void KINSOLInstance::copyVariablesFromMARCO(N_Vector variables) - { - if (marco::runtime::simulation::getOptions().debug) { - std::cerr << "[KINSOL] Copying variables from MARCO" << std::endl; - } +void KINSOLInstance::copyVariablesFromMARCO(N_Vector variables) { + if (marco::runtime::simulation::getOptions().debug) { + std::cerr << "[KINSOL] Copying variables from MARCO" << std::endl; + } - KINSOL_PROFILER_COPY_VARS_FROM_MARCO_START; + KINSOL_PROFILER_COPY_VARS_FROM_MARCO_START; - realtype* varsPtr = N_VGetArrayPointer(variables); - uint64_t numOfArrayVariables = getNumOfArrayVariables(); + realtype *varsPtr = N_VGetArrayPointer(variables); + uint64_t numOfArrayVariables = getNumOfArrayVariables(); - for (Variable var = 0; var < numOfArrayVariables; ++var) { - uint64_t variableArrayOffset = variableOffsets[var]; - const auto& dimensions = variablesDimensions[var]; + for (Variable var = 0; var < numOfArrayVariables; ++var) { + uint64_t variableArrayOffset = variableOffsets[var]; + const auto &dimensions = variablesDimensions[var]; - std::vector varIndices; - getVariableBeginIndices(var, varIndices); + std::vector varIndices; + getVariableBeginIndices(var, varIndices); - do { - uint64_t variableScalarOffset = - getVariableFlatIndex(dimensions, varIndices.data()); + do { + uint64_t variableScalarOffset = + getVariableFlatIndex(dimensions, varIndices.data()); - uint64_t offset = variableArrayOffset + variableScalarOffset; + uint64_t offset = variableArrayOffset + variableScalarOffset; - // Get the variable. - auto getterFn = variableGetters[var]; - auto value = static_cast(getterFn(varIndices.data())); - varsPtr[offset] = value; - - if (marco::runtime::simulation::getOptions().debug) { - std::cerr << "Got var " << var << " "; - printIndices(varIndices); - std::cerr << " with value " << std::fixed << std::setprecision(9) - << value << std::endl; - } - } while (advanceVariableIndices(varIndices, variablesDimensions[var])); - } + // Get the variable. + auto getterFn = variableGetters[var]; + auto value = static_cast(getterFn(varIndices.data())); + varsPtr[offset] = value; - KINSOL_PROFILER_COPY_VARS_FROM_MARCO_STOP; + if (marco::runtime::simulation::getOptions().debug) { + std::cerr << "Got var " << var << " "; + printIndices(varIndices); + std::cerr << " with value " << std::fixed << std::setprecision(9) + << value << std::endl; + } + } while (advanceVariableIndices(varIndices, variablesDimensions[var])); } - void KINSOLInstance::copyVariablesIntoMARCO(N_Vector variables) - { - if (marco::runtime::simulation::getOptions().debug) { - std::cerr << "[KINSOL] Copying variables into MARCO" << std::endl; - } + KINSOL_PROFILER_COPY_VARS_FROM_MARCO_STOP; +} - KINSOL_PROFILER_COPY_VARS_INTO_MARCO_START; +void KINSOLInstance::copyVariablesIntoMARCO(N_Vector variables) { + if (marco::runtime::simulation::getOptions().debug) { + std::cerr << "[KINSOL] Copying variables into MARCO" << std::endl; + } - realtype* varsPtr = N_VGetArrayPointer(variables); - uint64_t numOfArrayVariables = getNumOfArrayVariables(); + KINSOL_PROFILER_COPY_VARS_INTO_MARCO_START; - for (Variable var = 0; var < numOfArrayVariables; ++var) { - uint64_t variableArrayOffset = variableOffsets[var]; - const auto& dimensions = variablesDimensions[var]; + realtype *varsPtr = N_VGetArrayPointer(variables); + uint64_t numOfArrayVariables = getNumOfArrayVariables(); - std::vector varIndices; - getVariableBeginIndices(var, varIndices); + for (Variable var = 0; var < numOfArrayVariables; ++var) { + uint64_t variableArrayOffset = variableOffsets[var]; + const auto &dimensions = variablesDimensions[var]; - do { - uint64_t variableScalarOffset = - getVariableFlatIndex(dimensions, varIndices.data()); + std::vector varIndices; + getVariableBeginIndices(var, varIndices); - uint64_t offset = variableArrayOffset + variableScalarOffset; + do { + uint64_t variableScalarOffset = + getVariableFlatIndex(dimensions, varIndices.data()); - // Set the variable. - auto setterFn = variableSetters[var]; - auto value = static_cast(varsPtr[offset]); + uint64_t offset = variableArrayOffset + variableScalarOffset; - if (marco::runtime::simulation::getOptions().debug) { - std::cerr << "Setting var " << var << " "; - printIndices(varIndices); - std::cerr << " to " << value << std::endl; - } + // Set the variable. + auto setterFn = variableSetters[var]; + auto value = static_cast(varsPtr[offset]); - setterFn(value, varIndices.data()); + if (marco::runtime::simulation::getOptions().debug) { + std::cerr << "Setting var " << var << " "; + printIndices(varIndices); + std::cerr << " to " << value << std::endl; + } - assert([&]() -> bool { - auto getterFn = variableGetters[var]; - return getterFn(varIndices.data()) == value; - }() && "Variable value not set correctly"); - } while (advanceVariableIndices(varIndices, variablesDimensions[var])); - } + setterFn(value, varIndices.data()); - KINSOL_PROFILER_COPY_VARS_INTO_MARCO_STOP; + assert([&]() -> bool { + auto getterFn = variableGetters[var]; + return getterFn(varIndices.data()) == value; + }() && "Variable value not set correctly"); + } while (advanceVariableIndices(varIndices, variablesDimensions[var])); } - void KINSOLInstance::residualsParallelIteration( - std::function &equationIndices)> - processFn) { - // Shard the work among multiple threads. - unsigned int numOfThreads = threadPool.getNumOfThreads(); - std::atomic_size_t chunkIndex = 0; + KINSOL_PROFILER_COPY_VARS_INTO_MARCO_STOP; +} - for (unsigned int thread = 0; thread < numOfThreads; ++thread) { - threadPool.async([&]() { - size_t assignedChunk; +void KINSOLInstance::residualsParallelIteration( + std::function &equationIndices)> + processFn) { + // Shard the work among multiple threads. + unsigned int numOfThreads = threadPool.getNumOfThreads(); + std::atomic_size_t chunkIndex = 0; - while ((assignedChunk = chunkIndex++) < - residualThreadEquationsChunks.size()) { - const ResidualThreadEquationsChunk &chunk = - residualThreadEquationsChunks[assignedChunk]; + for (unsigned int thread = 0; thread < numOfThreads; ++thread) { + threadPool.async([&]() { + size_t assignedChunk; - Equation equation = std::get<0>(chunk); - std::vector equationIndices = std::get<1>(chunk); + while ((assignedChunk = chunkIndex++) < + residualThreadEquationsChunks.size()) { + const ResidualThreadEquationsChunk &chunk = + residualThreadEquationsChunks[assignedChunk]; - do { - assert([&]() -> bool { - if (equationIndices.size() != equationRanges[equation].size()) { - return false; - } + Equation equation = std::get<0>(chunk); + std::vector equationIndices = std::get<1>(chunk); - for (size_t i = 0, rank = equationIndices.size(); i < rank; ++i) { - if (equationIndices[i] < equationRanges[equation][i].begin || - equationIndices[i] >= equationRanges[equation][i].end) { - return false; - } - } + do { + assert([&]() -> bool { + if (equationIndices.size() != equationRanges[equation].size()) { + return false; + } - return true; - }() && "Invalid equation indices"); + for (size_t i = 0, rank = equationIndices.size(); i < rank; ++i) { + if (equationIndices[i] < equationRanges[equation][i].begin || + equationIndices[i] >= equationRanges[equation][i].end) { + return false; + } + } - processFn(equation, equationIndices); - } while (advanceEquationIndicesUntil( - equationIndices, equationRanges[equation], std::get<2>(chunk))); - } - }); - } + return true; + }() && "Invalid equation indices"); - threadPool.wait(); + processFn(equation, equationIndices); + } while (advanceEquationIndicesUntil( + equationIndices, equationRanges[equation], std::get<2>(chunk))); + } + }); } - void KINSOLInstance::getVariableBeginIndices( - Variable variable, std::vector& indices) const - { - uint64_t variableRank = getVariableRank(variable); - indices.resize(variableRank); - - for (uint64_t i = 0; i < variableRank; ++i) { - indices[i] = 0; - } - } + threadPool.wait(); +} - void KINSOLInstance::getVariableEndIndices( - Variable variable, std::vector& indices) const - { - uint64_t variableRank = getVariableRank(variable); - indices.resize(variableRank); +void KINSOLInstance::getVariableBeginIndices( + Variable variable, std::vector &indices) const { + uint64_t variableRank = getVariableRank(variable); + indices.resize(variableRank); - for (uint64_t i = 0; i < variableRank; ++i) { - indices[i] = variablesDimensions[variable][i]; - } + for (uint64_t i = 0; i < variableRank; ++i) { + indices[i] = 0; } +} - void KINSOLInstance::getEquationBeginIndices( - Equation equation, std::vector& indices) const - { - uint64_t equationRank = getEquationRank(equation); - indices.resize(equationRank); +void KINSOLInstance::getVariableEndIndices( + Variable variable, std::vector &indices) const { + uint64_t variableRank = getVariableRank(variable); + indices.resize(variableRank); - for (uint64_t i = 0; i < equationRank; ++i) { - indices[i] = equationRanges[equation][i].begin; - } + for (uint64_t i = 0; i < variableRank; ++i) { + indices[i] = variablesDimensions[variable][i]; } +} - void KINSOLInstance::getEquationEndIndices( - Equation equation, std::vector& indices) const - { - uint64_t equationRank = getEquationRank(equation); - indices.resize(equationRank); +void KINSOLInstance::getEquationBeginIndices( + Equation equation, std::vector &indices) const { + uint64_t equationRank = getEquationRank(equation); + indices.resize(equationRank); - for (uint64_t i = 0; i < equationRank; ++i) { - indices[i] = equationRanges[equation][i].end; - } + for (uint64_t i = 0; i < equationRank; ++i) { + indices[i] = equationRanges[equation][i].begin; } +} +void KINSOLInstance::getEquationEndIndices( + Equation equation, std::vector &indices) const { + uint64_t equationRank = getEquationRank(equation); + indices.resize(equationRank); - bool KINSOLInstance::kinsolInit() - { - auto retVal = KINInit(kinsolMemory, residualFunction, variablesVector); + for (uint64_t i = 0; i < equationRank; ++i) { + indices[i] = equationRanges[equation][i].end; + } +} - if (retVal == KIN_MEM_NULL) { - std::cerr << "KINInit - The kinsol_mem pointer is NULL" << std::endl; - return false; - } +bool KINSOLInstance::kinsolInit() { + auto retVal = KINInit(kinsolMemory, residualFunction, variablesVector); - if (retVal == KIN_MEM_FAIL) { - std::cerr << "KINInit - A memory allocation request has failed" << std::endl; - return false; - } + if (retVal == KIN_MEM_NULL) { + std::cerr << "KINInit - The kinsol_mem pointer is NULL" << std::endl; + return false; + } - if (retVal == KIN_ILL_INPUT) { - std::cerr << "KINInit - An input argument to KINInit has an illegal value" << std::endl; - return false; - } + if (retVal == KIN_MEM_FAIL) { + std::cerr << "KINInit - A memory allocation request has failed" + << std::endl; + return false; + } - return retVal == KIN_SUCCESS; + if (retVal == KIN_ILL_INPUT) { + std::cerr << "KINInit - An input argument to KINInit has an illegal value" + << std::endl; + return false; } - bool KINSOLInstance::kinsolFNTolerance() - { - auto retVal = KINSetFuncNormTol(kinsolMemory, getOptions().fnormtol); + return retVal == KIN_SUCCESS; +} - if (retVal == KIN_MEM_NULL) { - std::cerr << "KINSVtolerances - The kinsol_mem pointer is NULL" << std::endl; - return false; - } +bool KINSOLInstance::kinsolFNTolerance() { + auto retVal = KINSetFuncNormTol(kinsolMemory, getOptions().fnormtol); - if (retVal == KIN_ILL_INPUT) { - std::cerr << "KINSVtolerances - The relative error tolerance was negative or the absolute tolerance vector had a negative component" << std::endl; - return false; - } + if (retVal == KIN_MEM_NULL) { + std::cerr << "KINSVtolerances - The kinsol_mem pointer is NULL" + << std::endl; + return false; + } - return retVal == KIN_SUCCESS; + if (retVal == KIN_ILL_INPUT) { + std::cerr << "KINSVtolerances - The relative error tolerance was negative " + "or the absolute tolerance vector had a negative component" + << std::endl; + return false; } - bool KINSOLInstance::kinsolSSTolerance() - { - auto retVal = KINSetScaledStepTol(kinsolMemory, getOptions().scsteptol); + return retVal == KIN_SUCCESS; +} - if (retVal == KIN_MEM_NULL) { - std::cerr << "KINSVtolerances - The kinsol_mem pointer is NULL" << std::endl; - return false; - } +bool KINSOLInstance::kinsolSSTolerance() { + auto retVal = KINSetScaledStepTol(kinsolMemory, getOptions().scsteptol); - if (retVal == KIN_ILL_INPUT) { - std::cerr << "KINSVtolerances - The relative error tolerance was negative or the absolute tolerance vector had a negative component" << std::endl; - return false; - } + if (retVal == KIN_MEM_NULL) { + std::cerr << "KINSVtolerances - The kinsol_mem pointer is NULL" + << std::endl; + return false; + } - return retVal == KIN_SUCCESS; + if (retVal == KIN_ILL_INPUT) { + std::cerr << "KINSVtolerances - The relative error tolerance was negative " + "or the absolute tolerance vector had a negative component" + << std::endl; + return false; } - bool KINSOLInstance::kinsolSetLinearSolver() - { - auto retVal = KINSetLinearSolver(kinsolMemory, linearSolver, sparseMatrix); + return retVal == KIN_SUCCESS; +} - if (retVal == KINLS_MEM_NULL) { - std::cerr << "KINSetLinearSolver - The kinsol_mem pointer is NULL" << std::endl; - return false; - } +bool KINSOLInstance::kinsolSetLinearSolver() { + auto retVal = KINSetLinearSolver(kinsolMemory, linearSolver, sparseMatrix); - if (retVal == KINLS_ILL_INPUT) { - std::cerr << "KINSetLinearSolver - The KINLS interface is not compatible with the LS or J input objects or is incompatible with the N_Vector object passed to KINInit" << std::endl; - return false; - } + if (retVal == KINLS_MEM_NULL) { + std::cerr << "KINSetLinearSolver - The kinsol_mem pointer is NULL" + << std::endl; + return false; + } - if (retVal == KINLS_SUNLS_FAIL) { - std::cerr << "KINSetLinearSolver - A call to the LS object failed" << std::endl; - return false; - } + if (retVal == KINLS_ILL_INPUT) { + std::cerr << "KINSetLinearSolver - The KINLS interface is not compatible " + "with the LS or J input objects or is incompatible with the " + "N_Vector object passed to KINInit" + << std::endl; + return false; + } - if (retVal == KINLS_MEM_FAIL) { - std::cerr << "KINSetLinearSolver - A memory allocation request failed" << std::endl; - return false; - } + if (retVal == KINLS_SUNLS_FAIL) { + std::cerr << "KINSetLinearSolver - A call to the LS object failed" + << std::endl; + return false; + } - return retVal == KINLS_SUCCESS; + if (retVal == KINLS_MEM_FAIL) { + std::cerr << "KINSetLinearSolver - A memory allocation request failed" + << std::endl; + return false; } - bool KINSOLInstance::kinsolSetUserData() - { - auto retVal = KINSetUserData(kinsolMemory, this); + return retVal == KINLS_SUCCESS; +} - if (retVal == KIN_MEM_NULL) { - std::cerr << "KINSetUserData - The kinsol_mem pointer is NULL" << std::endl; - return false; - } +bool KINSOLInstance::kinsolSetUserData() { + auto retVal = KINSetUserData(kinsolMemory, this); - return retVal == KIN_SUCCESS; + if (retVal == KIN_MEM_NULL) { + std::cerr << "KINSetUserData - The kinsol_mem pointer is NULL" << std::endl; + return false; } - bool KINSOLInstance::kinsolSetJacobianFunction() - { - auto retVal = KINSetJacFn(kinsolMemory, jacobianMatrix); + return retVal == KIN_SUCCESS; +} - if (retVal == KIN_MEM_NULL) { - std::cerr << "KINSetJacFn - The kinsol_mem pointer is NULL" << std::endl; - return false; - } +bool KINSOLInstance::kinsolSetJacobianFunction() { + auto retVal = KINSetJacFn(kinsolMemory, jacobianMatrix); - if (retVal == KINLS_LMEM_NULL) { - std::cerr << "KINSetJacFn - The KINLS linear solver interface has not been initialized" << std::endl; - return false; - } + if (retVal == KIN_MEM_NULL) { + std::cerr << "KINSetJacFn - The kinsol_mem pointer is NULL" << std::endl; + return false; + } - return retVal == KIN_SUCCESS; + if (retVal == KINLS_LMEM_NULL) { + std::cerr << "KINSetJacFn - The KINLS linear solver interface has not been " + "initialized" + << std::endl; + return false; } - void KINSOLInstance::getWritingEquation( - Variable variable, - const std::vector& variableIndices, - Equation& equation, - std::vector& equationIndices) const - { - bool found = false; - uint64_t numOfVectorizedEquations = getNumOfVectorizedEquations(); + return retVal == KIN_SUCCESS; +} - for (Equation eq = 0; eq < numOfVectorizedEquations; ++eq) { - Variable writtenVariable = getWrittenVariable(eq); +void KINSOLInstance::getWritingEquation( + Variable variable, const std::vector &variableIndices, + Equation &equation, std::vector &equationIndices) const { + bool found = false; + uint64_t numOfVectorizedEquations = getNumOfVectorizedEquations(); - if (writtenVariable == variable) { - std::vector writingEquationIndices; - getEquationBeginIndices(eq, writingEquationIndices); + for (Equation eq = 0; eq < numOfVectorizedEquations; ++eq) { + Variable writtenVariable = getWrittenVariable(eq); - std::vector writtenVariableIndices( - getVariableRank(writtenVariable)); + if (writtenVariable == variable) { + std::vector writingEquationIndices; + getEquationBeginIndices(eq, writingEquationIndices); - AccessFunction writeAccessFunction = getWriteAccessFunction(eq); + std::vector writtenVariableIndices( + getVariableRank(writtenVariable)); - do { - writeAccessFunction(writingEquationIndices.data(), - writtenVariableIndices.data()); + AccessFunction writeAccessFunction = getWriteAccessFunction(eq); - if (writtenVariableIndices == variableIndices) { - assert(!found && - "Multiple equations writing to the same variable"); - found = true; - equation = eq; - equationIndices = writingEquationIndices; - } - } while (advanceEquationIndices( - writingEquationIndices, equationRanges[eq])); - } + do { + writeAccessFunction(writingEquationIndices.data(), + writtenVariableIndices.data()); + + if (writtenVariableIndices == variableIndices) { + assert(!found && "Multiple equations writing to the same variable"); + found = true; + equation = eq; + equationIndices = writingEquationIndices; + } + } while ( + advanceEquationIndices(writingEquationIndices, equationRanges[eq])); } - - assert(found && "Writing equation not found"); } - void KINSOLInstance::printVariablesVector(N_Vector variables) const - { - realtype* data = N_VGetArrayPointer(variables); - uint64_t numOfArrayVariables = getNumOfArrayVariables(); + assert(found && "Writing equation not found"); +} - for (Variable var = 0; var < numOfArrayVariables; ++var) { - std::vector indices; - getVariableBeginIndices(var, indices); +void KINSOLInstance::printVariablesVector(N_Vector variables) const { + realtype *data = N_VGetArrayPointer(variables); + uint64_t numOfArrayVariables = getNumOfArrayVariables(); - do { - std::cerr << "var " << var << " "; - printIndices(indices); - std::cerr << "\t" << std::fixed << std::setprecision(9) - << *data << std::endl; - ++data; - } while (advanceVariableIndices(indices, variablesDimensions[var])); - } - } + for (Variable var = 0; var < numOfArrayVariables; ++var) { + std::vector indices; + getVariableBeginIndices(var, indices); - void KINSOLInstance::printResidualsVector(N_Vector residuals) const - { - realtype* data = N_VGetArrayPointer(residuals); - uint64_t numOfArrayVariables = getNumOfArrayVariables(); + do { + std::cerr << "var " << var << " "; + printIndices(indices); + std::cerr << "\t" << std::fixed << std::setprecision(9) << *data + << std::endl; + ++data; + } while (advanceVariableIndices(indices, variablesDimensions[var])); + } +} - for (Variable var = 0; var < numOfArrayVariables; ++var) { - std::vector variableIndices; - getVariableBeginIndices(var, variableIndices); +void KINSOLInstance::printResidualsVector(N_Vector residuals) const { + realtype *data = N_VGetArrayPointer(residuals); + uint64_t numOfArrayVariables = getNumOfArrayVariables(); - do { - Equation eq; - std::vector equationIndices; - getWritingEquation(var, variableIndices, eq, equationIndices); + for (Variable var = 0; var < numOfArrayVariables; ++var) { + std::vector variableIndices; + getVariableBeginIndices(var, variableIndices); - std::cerr << "eq " << eq << " "; - printIndices(equationIndices); - std::cerr << " (writing to var " << var; - printIndices(variableIndices); - std::cerr << ")" << "\t" << std::fixed << std::setprecision(9) - << *data << "\n"; - ++data; - } while (advanceVariableIndices( - variableIndices, variablesDimensions[var])); - } + do { + Equation eq; + std::vector equationIndices; + getWritingEquation(var, variableIndices, eq, equationIndices); + + std::cerr << "eq " << eq << " "; + printIndices(equationIndices); + std::cerr << " (writing to var " << var; + printIndices(variableIndices); + std::cerr << ")" << "\t" << std::fixed << std::setprecision(9) << *data + << "\n"; + ++data; + } while (advanceVariableIndices(variableIndices, variablesDimensions[var])); } +} - void KINSOLInstance::printJacobianMatrix(SUNMatrix jacobianMatrix) const - { - uint64_t numOfArrayVariables = getNumOfArrayVariables(); +void KINSOLInstance::printJacobianMatrix(SUNMatrix jacobianMatrix) const { + uint64_t numOfArrayVariables = getNumOfArrayVariables(); - // Print the heading row. - for (Variable var = 0; var < numOfArrayVariables; ++var) { - std::vector variableIndices; - getVariableBeginIndices(var, variableIndices); + // Print the heading row. + for (Variable var = 0; var < numOfArrayVariables; ++var) { + std::vector variableIndices; + getVariableBeginIndices(var, variableIndices); - do { - std::cerr << "\tvar " << var << " "; - printIndices(variableIndices); - } while (advanceVariableIndices( - variableIndices, variablesDimensions[var])); - } + do { + std::cerr << "\tvar " << var << " "; + printIndices(variableIndices); + } while (advanceVariableIndices(variableIndices, variablesDimensions[var])); + } - std::cerr << std::endl; + std::cerr << std::endl; - // Print the rows containing the values. - uint64_t rowFlatIndex = 0; + // Print the rows containing the values. + uint64_t rowFlatIndex = 0; - for (Variable eqVar = 0; eqVar < numOfArrayVariables; ++eqVar) { - std::vector eqVarIndices; - getVariableBeginIndices(eqVar, eqVarIndices); + for (Variable eqVar = 0; eqVar < numOfArrayVariables; ++eqVar) { + std::vector eqVarIndices; + getVariableBeginIndices(eqVar, eqVarIndices); - do { - Equation eq; - std::vector equationIndices; - getWritingEquation(eqVar, eqVarIndices, eq, equationIndices); + do { + Equation eq; + std::vector equationIndices; + getWritingEquation(eqVar, eqVarIndices, eq, equationIndices); - std::cerr << "eq " << eq << " "; - printIndices(equationIndices); - std::cerr << " (writing to var " << eqVar << " "; - printIndices(eqVarIndices); - std::cerr << ")"; + std::cerr << "eq " << eq << " "; + printIndices(equationIndices); + std::cerr << " (writing to var " << eqVar << " "; + printIndices(eqVarIndices); + std::cerr << ")"; - uint64_t columnFlatIndex = 0; + uint64_t columnFlatIndex = 0; - for (Variable indVar = 0; indVar < numOfArrayVariables; ++indVar) { - std::vector indVarIndices; - getVariableBeginIndices(indVar, indVarIndices); + for (Variable indVar = 0; indVar < numOfArrayVariables; ++indVar) { + std::vector indVarIndices; + getVariableBeginIndices(indVar, indVarIndices); - do { - auto value = getCellFromSparseMatrix( - jacobianMatrix, rowFlatIndex, columnFlatIndex); + do { + auto value = getCellFromSparseMatrix(jacobianMatrix, rowFlatIndex, + columnFlatIndex); - std::cerr << "\t" << std::fixed << std::setprecision(9) << value; - columnFlatIndex++; - } while (advanceVariableIndices( - indVarIndices, variablesDimensions[indVar])); - } + std::cerr << "\t" << std::fixed << std::setprecision(9) << value; + columnFlatIndex++; + } while ( + advanceVariableIndices(indVarIndices, variablesDimensions[indVar])); + } - std::cerr << std::endl; - rowFlatIndex++; - } while (advanceVariableIndices( - eqVarIndices, variablesDimensions[eqVar])); - } + std::cerr << std::endl; + rowFlatIndex++; + } while (advanceVariableIndices(eqVarIndices, variablesDimensions[eqVar])); } } +} // namespace marco::runtime::sundials::kinsol //===---------------------------------------------------------------------===// // Exported functions @@ -1434,10 +1379,9 @@ namespace marco::runtime::sundials::kinsol //===---------------------------------------------------------------------===// // kinsolCreate -static void* kinsolCreate_pvoid() -{ - auto* instance = new KINSOLInstance(); - return static_cast(instance); +static void *kinsolCreate_pvoid() { + auto *instance = new KINSOLInstance(); + return static_cast(instance); } RUNTIME_FUNC_DEF(kinsolCreate, PTR(void)) @@ -1445,9 +1389,9 @@ RUNTIME_FUNC_DEF(kinsolCreate, PTR(void)) //===---------------------------------------------------------------------===// // kinsolSolve -static void kinsolSolve_void(void* instance) -{ - [[maybe_unused]] bool result = static_cast(instance)->solve(); +static void kinsolSolve_void(void *instance) { + [[maybe_unused]] bool result = + static_cast(instance)->solve(); assert(result && "KINSOL solve failed"); } @@ -1456,9 +1400,8 @@ RUNTIME_FUNC_DEF(kinsolSolve, void, PTR(void)) //===---------------------------------------------------------------------===// // kinsolFree -static void kinsolFree_void(void* instance) -{ - delete static_cast(instance); +static void kinsolFree_void(void *instance) { + delete static_cast(instance); } RUNTIME_FUNC_DEF(kinsolFree, void, PTR(void)) @@ -1466,69 +1409,55 @@ RUNTIME_FUNC_DEF(kinsolFree, void, PTR(void)) //===---------------------------------------------------------------------===// // kinsolAddVariable -static uint64_t kinsolAddVariable_i64( - void* instance, - uint64_t rank, - uint64_t* dimensions, - void* getter, - void* setter, - void* name) -{ - return static_cast(instance)->addVariable( - rank, dimensions, - reinterpret_cast(getter), +static uint64_t kinsolAddVariable_i64(void *instance, uint64_t rank, + uint64_t *dimensions, void *getter, + void *setter, void *name) { + return static_cast(instance)->addVariable( + rank, dimensions, reinterpret_cast(getter), reinterpret_cast(setter), - static_cast(name)); + static_cast(name)); } -RUNTIME_FUNC_DEF(kinsolAddVariable, uint64_t, PTR(void), uint64_t, PTR(uint64_t), PTR(void), PTR(void), PTR(void)) +RUNTIME_FUNC_DEF(kinsolAddVariable, uint64_t, PTR(void), uint64_t, + PTR(uint64_t), PTR(void), PTR(void), PTR(void)) //===---------------------------------------------------------------------===// // kinsolAddVariableAccess -static void kinsolAddVariableAccess_void( - void* instance, - uint64_t equationIndex, - uint64_t variableIndex, - void* accessFunction) -{ - static_cast(instance)->addVariableAccess( +static void kinsolAddVariableAccess_void(void *instance, uint64_t equationIndex, + uint64_t variableIndex, + void *accessFunction) { + static_cast(instance)->addVariableAccess( equationIndex, variableIndex, reinterpret_cast(accessFunction)); } -RUNTIME_FUNC_DEF(kinsolAddVariableAccess, void, PTR(void), uint64_t, uint64_t, PTR(void)) +RUNTIME_FUNC_DEF(kinsolAddVariableAccess, void, PTR(void), uint64_t, uint64_t, + PTR(void)) //===---------------------------------------------------------------------===// // kinsolAddEquation -static uint64_t kinsolAddEquation_i64( - void* instance, - int64_t* ranges, - uint64_t rank, - uint64_t writtenVariable, - void* writeAccessFunction, - void* stringRepresentation) -{ - return static_cast(instance)->addEquation( +static uint64_t kinsolAddEquation_i64(void *instance, int64_t *ranges, + uint64_t rank, uint64_t writtenVariable, + void *writeAccessFunction, + void *stringRepresentation) { + return static_cast(instance)->addEquation( ranges, rank, writtenVariable, reinterpret_cast(writeAccessFunction), - static_cast(stringRepresentation)); + static_cast(stringRepresentation)); } -RUNTIME_FUNC_DEF(kinsolAddEquation, uint64_t, PTR(void), PTR(int64_t), uint64_t, uint64_t, PTR(void), PTR(void)) +RUNTIME_FUNC_DEF(kinsolAddEquation, uint64_t, PTR(void), PTR(int64_t), uint64_t, + uint64_t, PTR(void), PTR(void)) //===---------------------------------------------------------------------===// // kinsolSetResidual -static void kinsolSetResidual_void( - void* instance, - uint64_t equationIndex, - void* residualFunction) -{ - static_cast(instance)->setResidualFunction( - equationIndex, - reinterpret_cast(residualFunction)); +static void kinsolSetResidual_void(void *instance, uint64_t equationIndex, + void *residualFunction) { + static_cast(instance)->setResidualFunction( + equationIndex, reinterpret_cast(residualFunction)); } RUNTIME_FUNC_DEF(kinsolSetResidual, void, PTR(void), uint64_t, PTR(void)) @@ -1536,17 +1465,15 @@ RUNTIME_FUNC_DEF(kinsolSetResidual, void, PTR(void), uint64_t, PTR(void)) //===---------------------------------------------------------------------===// // kinsolAddJacobian -static void kinsolAddJacobian_void( - void* instance, - uint64_t equationIndex, - uint64_t variableIndex, - void* jacobianFunction) -{ - static_cast(instance)->addJacobianFunction( +static void kinsolAddJacobian_void(void *instance, uint64_t equationIndex, + uint64_t variableIndex, + void *jacobianFunction) { + static_cast(instance)->addJacobianFunction( equationIndex, variableIndex, reinterpret_cast(jacobianFunction)); } -RUNTIME_FUNC_DEF(kinsolAddJacobian, void, PTR(void), uint64_t, uint64_t, PTR(void)) +RUNTIME_FUNC_DEF(kinsolAddJacobian, void, PTR(void), uint64_t, uint64_t, + PTR(void)) #endif // SUNDIALS_ENABLE From 53db038c41525961107823a2deb95d0fbdc22ac1 Mon Sep 17 00:00:00 2001 From: Michele Scuttari Date: Tue, 8 Oct 2024 18:54:07 +0200 Subject: [PATCH 07/14] Deallocate SUNDIALS context on solver instance destruction --- lib/Solvers/IDA/Instance.cpp | 4 ++++ lib/Solvers/KINSOL/Instance.cpp | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/lib/Solvers/IDA/Instance.cpp b/lib/Solvers/IDA/Instance.cpp index 59f8a0217..e1539e0d6 100644 --- a/lib/Solvers/IDA/Instance.cpp +++ b/lib/Solvers/IDA/Instance.cpp @@ -46,6 +46,10 @@ IDAInstance::~IDAInstance() { SUNMatDestroy(sparseMatrix); } + if (ctx != nullptr) { + SUNContext_Free(&ctx); + } + if (marco::runtime::simulation::getOptions().debug) { std::cerr << "[IDA] Instance destroyed" << std::endl; } diff --git a/lib/Solvers/KINSOL/Instance.cpp b/lib/Solvers/KINSOL/Instance.cpp index 5233eb17c..3eb456397 100644 --- a/lib/Solvers/KINSOL/Instance.cpp +++ b/lib/Solvers/KINSOL/Instance.cpp @@ -40,6 +40,10 @@ KINSOLInstance::~KINSOLInstance() { SUNMatDestroy(sparseMatrix); } + if (ctx != nullptr) { + SUNContext_Free(&ctx); + } + if (marco::runtime::simulation::getOptions().debug) { std::cerr << "[KINSOL] Instance destroyed" << std::endl; } From ebe8173917a0b80d70e7ab1f0b8519e23c17cd3b Mon Sep 17 00:00:00 2001 From: Michele Scuttari Date: Wed, 9 Oct 2024 18:36:06 +0200 Subject: [PATCH 08/14] Add memory pool --- .../marco/Runtime/Support/MemoryManagement.h | 55 +++++++++++++++++++ lib/Support/MemoryManagement.cpp | 53 ++++++++++++++++++ 2 files changed, 108 insertions(+) diff --git a/include/marco/Runtime/Support/MemoryManagement.h b/include/marco/Runtime/Support/MemoryManagement.h index fda4588f2..e7d31d737 100644 --- a/include/marco/Runtime/Support/MemoryManagement.h +++ b/include/marco/Runtime/Support/MemoryManagement.h @@ -1,7 +1,11 @@ #ifndef MARCO_RUNTIME_MEMORYMANAGEMENT_H #define MARCO_RUNTIME_MEMORYMANAGEMENT_H +#include "marco/Runtime/Support/Mangling.h" #include +#include +#include +#include extern "C" { @@ -10,4 +14,55 @@ extern "C" void marco_free(void* ptr); }; +namespace marco::runtime { +class MemoryPool { +public: + MemoryPool() = default; + + MemoryPool(const MemoryPool& other) = delete; + + MemoryPool(MemoryPool&& other) = default; + + ~MemoryPool(); + + MemoryPool& operator=(const MemoryPool& other) = delete; + + MemoryPool& operator=(MemoryPool&& other) = default; + + double* get(uint64_t id) const; + + uint64_t create(size_t numOfElements); + +private: + std::vector buffers; +}; + +class MemoryPoolManager { +private: + MemoryPoolManager() = default; + +public: + static MemoryPoolManager& getInstance(); + + MemoryPoolManager(const MemoryPoolManager& other) = delete; + + MemoryPoolManager(MemoryPoolManager&& other) = default; + + ~MemoryPoolManager() = default; + + MemoryPoolManager& operator=(const MemoryPoolManager& other) = delete; + + MemoryPoolManager& operator=(MemoryPoolManager&& other) = default; + + MemoryPool& get(uint64_t pool) const; + + uint64_t create(); + +private: + std::vector> pools{}; +}; +} + +RUNTIME_FUNC_DECL(memoryPoolGet, PTR(void), int64_t, int64_t) + #endif // MARCO_RUNTIME_MEMORYMANAGEMENT_H diff --git a/lib/Support/MemoryManagement.cpp b/lib/Support/MemoryManagement.cpp index 273135bb9..8c1c69e8c 100644 --- a/lib/Support/MemoryManagement.cpp +++ b/lib/Support/MemoryManagement.cpp @@ -1,5 +1,10 @@ #include "marco/Runtime/Support/MemoryManagement.h" +#include #include +#include +#include + +using namespace ::marco::runtime; #ifdef MARCO_PROFILING @@ -186,3 +191,51 @@ void marco_free(void* ptr) ::profiler().stopTimer(); #endif } +#include +namespace marco::runtime +{ + +MemoryPool::~MemoryPool() { + for (double* buffer : buffers) { + if (buffer != nullptr) { + std::free(buffer); + } + } +} + +double* MemoryPool::get(uint64_t id) const { + assert(id < buffers.size()); + return buffers[id]; +} + +uint64_t MemoryPool::create(size_t numOfElements) { + uint64_t id = buffers.size(); + buffers.push_back(static_cast(std::malloc(sizeof(double) * numOfElements))); + return id; +} + +MemoryPoolManager& MemoryPoolManager::getInstance() { + static MemoryPoolManager instance; + return instance; +} + +MemoryPool& MemoryPoolManager::get(uint64_t pool) const { + assert(pool < pools.size()); + return *pools[pool]; +} + +uint64_t MemoryPoolManager::create() { + uint64_t id = pools.size(); + pools.push_back(std::make_unique()); + return id; +} +} + +namespace { +void* memoryPoolGet_pvoid(uint64_t pool, uint64_t buffer) { + MemoryPoolManager& manager = MemoryPoolManager::getInstance(); + return static_cast(manager.get(pool).get(buffer)); +} +} + +RUNTIME_FUNC_DEF(memoryPoolGet, PTR(void), int64_t, int64_t) From 0ed73b87dcce85f2826def93d1cab51a3fb66c52 Mon Sep 17 00:00:00 2001 From: Michele Scuttari Date: Wed, 9 Oct 2024 18:39:50 +0200 Subject: [PATCH 09/14] Use memory pool for Jacobian matrix computation with IDA --- include/marco/Runtime/Solvers/IDA/Instance.h | 52 +++- lib/Solvers/IDA/CMakeLists.txt | 3 +- lib/Solvers/IDA/Instance.cpp | 297 +++++++++++++------ 3 files changed, 250 insertions(+), 102 deletions(-) diff --git a/include/marco/Runtime/Solvers/IDA/Instance.h b/include/marco/Runtime/Solvers/IDA/Instance.h index 3857456b2..8c2181c05 100644 --- a/include/marco/Runtime/Solvers/IDA/Instance.h +++ b/include/marco/Runtime/Solvers/IDA/Instance.h @@ -28,9 +28,31 @@ using ResidualFunction = double (*)(double, const int64_t *); /// The 2nd argument is a pointer to the list of equation indices. /// The 3rd argument is a pointer to the list of variable indices. /// The 4th argument is the 'alpha' value. +/// The 5th argument is the identifier of the memory pool owning the AD seeds. +/// The 6th argument is a pointer to the list of AD seed identifiers /// The result is the Jacobian value. using JacobianFunction = double (*)(double, const int64_t *, const uint64_t *, - double); + double, uint64_t, const uint64_t*); + +/// A descriptor of a Jacobian function is a pair of value consisting in: +/// - the function pointer +/// - the number of elements of each AD seed +using JacobianFunctionDescriptor = std::pair>; + +/// A map indicating the IDs of the buffers living inside the memory pool to +/// be used as AD seeds for each Jacobian function. +using JacobianSeedsMap = std::map>; + +/// A chunk of equations to be processed by a thread while computing the +/// residual values or partial derivatives. +/// A chunk is composed of: +/// - the identifier (position) of the equation. +/// - the begin indices (included) +/// - the end indices (excluded) +/// - the map indicating the buffer IDs to be used when computing the +/// partial derivatives +using ThreadEquationsChunk = + std::tuple, std::vector, JacobianSeedsMap>; class IDAInstance { public: @@ -71,7 +93,8 @@ class IDAInstance { /// Add the function pointer that computes a partial derivative of an /// equation. void addJacobianFunction(Equation equationIndex, Variable variableIndex, - JacobianFunction jacobianFunction); + JacobianFunction jacobianFunction, + uint64_t numOfSeeds, uint64_t* seedSizes); /// Instantiate and initialize all the classes needed by IDA in order to /// solve the given system of equations. It also sets optional simulation @@ -139,12 +162,14 @@ class IDAInstance { [[nodiscard]] uint64_t getVariableRank(Variable variable) const; + void iterateAccessedArrayVariables(Equation equation, std::function callback) const; + std::vector computeJacobianColumns(Equation eq, const int64_t *equationIndices) const; void computeNNZ(); - void computeResidualThreadChunks(); + void computeThreadChunks(); void copyVariablesFromMARCO(N_Vector algebraicAndStateVariablesVector, N_Vector derivativeVariablesVector); @@ -152,9 +177,10 @@ class IDAInstance { void copyVariablesIntoMARCO(N_Vector algebraicAndStateVariablesVector, N_Vector derivativeVariablesVector); - void residualsParallelIteration( + void equationsParallelIteration( std::function &equationIndices)> + const std::vector &equationIndices, + const JacobianSeedsMap& jacobianSeedsMap)> processFn); void getVariableBeginIndices(Variable variable, @@ -256,7 +282,7 @@ class IDAInstance { // The i-th position contains the list of partial derivative functions of // the i-th equation. The j-th function represents the function to // compute the derivative with respect to the j-th variable. - std::vector> jacobianFunctions; + std::vector> jacobianFunctions; // Whether the IDA instance is informed about the accesses to the // variables. @@ -318,20 +344,14 @@ class IDAInstance { // Thread pool. ThreadPool threadPool; - // A chunk of equations to be processed by a thread while computing the - // residual values. - // A chunk is composed of: - // - the identifier (position) of the equation. - // - the begin indices (included) - // - the end indices (excluded) - using ResidualThreadEquationsChunk = - std::tuple, std::vector>; + // Memory pool ID. + uint64_t memoryPoolId; // The list of chunks the threads will process. Each thread elaborates // one chunk at a time. // The information is computed only once during the initialization to // save time during the actual simulation. - std::vector residualThreadEquationsChunks; + std::vector threadEquationsChunks; }; } // namespace marco::runtime::sundials::ida @@ -371,7 +391,7 @@ RUNTIME_FUNC_DECL(idaAddEquation, uint64_t, PTR(void), PTR(int64_t), uint64_t, RUNTIME_FUNC_DECL(idaSetResidual, void, PTR(void), uint64_t, PTR(void)) RUNTIME_FUNC_DECL(idaAddJacobian, void, PTR(void), uint64_t, uint64_t, - PTR(void)) + PTR(void), uint64_t, PTR(uint64_t)) RUNTIME_FUNC_DECL(printStatistics, void, PTR(void)) diff --git a/lib/Solvers/IDA/CMakeLists.txt b/lib/Solvers/IDA/CMakeLists.txt index 75fe607b7..b3ae43718 100644 --- a/lib/Solvers/IDA/CMakeLists.txt +++ b/lib/Solvers/IDA/CMakeLists.txt @@ -17,4 +17,5 @@ endif() target_link_libraries(SolverIDA PUBLIC SolverSUNDIALS - Profiling) + Profiling + Support) diff --git a/lib/Solvers/IDA/Instance.cpp b/lib/Solvers/IDA/Instance.cpp index e1539e0d6..107de5af2 100644 --- a/lib/Solvers/IDA/Instance.cpp +++ b/lib/Solvers/IDA/Instance.cpp @@ -4,6 +4,7 @@ #include "marco/Runtime/Simulation/Options.h" #include "marco/Runtime/Solvers/IDA/Options.h" #include "marco/Runtime/Solvers/IDA/Profiler.h" +#include "marco/Runtime/Support/MemoryManagement.h" #include #include #include @@ -309,7 +310,9 @@ void IDAInstance::setResidualFunction(Equation equation, } void IDAInstance::addJacobianFunction(Equation equation, Variable variable, - JacobianFunction jacobianFunction) { + JacobianFunction jacobianFunction, + uint64_t numOfSeeds, + uint64_t *seedSizes) { if (marco::runtime::simulation::getOptions().debug) { std::cerr << "[IDA] Setting jacobian function for equation " << equation << " and variable " << variable @@ -322,10 +325,17 @@ void IDAInstance::addJacobianFunction(Equation equation, Variable variable, } if (jacobianFunctions[equation].size() <= variable) { - jacobianFunctions[equation].resize(variable + 1, nullptr); + jacobianFunctions[equation].resize( + variable + 1, std::make_pair(nullptr, std::vector{})); } - jacobianFunctions[equation][variable] = jacobianFunction; + jacobianFunctions[equation][variable].first = jacobianFunction; + jacobianFunctions[equation][variable].second.resize(numOfSeeds); + + for (uint64_t i = 0; i < numOfSeeds; ++i) { + assert(seedSizes[i] != 0); + jacobianFunctions[equation][variable].second[i] = seedSizes[i]; + } } bool IDAInstance::initialize() { @@ -335,6 +345,7 @@ bool IDAInstance::initialize() { std::cerr << "[IDA] Performing initialization" << std::endl; } + memoryPoolId = MemoryPoolManager::getInstance().create(); currentTime = startTime; // Compute the number of scalar variables. @@ -493,16 +504,17 @@ bool IDAInstance::initialize() { assert(precomputedAccesses || std::all_of(jacobianFunctions.begin(), jacobianFunctions.end(), - [&](std::vector functions) { + [&](std::vector functions) { if (functions.size() != algebraicAndStateVariablesGetters.size()) { return false; } - return std::all_of(functions.begin(), functions.end(), - [](const JacobianFunction &function) { - return function != nullptr; - }); + return std::all_of( + functions.begin(), functions.end(), + [](const JacobianFunctionDescriptor &function) { + return function.first != nullptr; + }); })); // Check that all the getters and setters have been set. @@ -597,7 +609,7 @@ bool IDAInstance::initialize() { computeNNZ(); // Compute the workload for each thread. - computeResidualThreadChunks(); + computeThreadChunks(); // Initialize the values of the variables living inside IDA. copyVariablesFromMARCO(variablesVector, derivativesVector); @@ -891,8 +903,9 @@ int IDAInstance::residualFunction(realtype time, N_Vector variables, // it writes into. IDA_PROFILER_RESIDUALS_START; - instance->residualsParallelIteration( - [&](Equation eq, const std::vector &equationIndices) { + instance->equationsParallelIteration( + [&](Equation eq, const std::vector &equationIndices, + const JacobianSeedsMap &jacobianSeedsMap) { uint64_t equationRank = instance->getEquationRank(eq); assert(equationIndices.size() == equationRank); @@ -956,23 +969,14 @@ int IDAInstance::jacobianMatrix(realtype time, realtype alpha, // the current iteration values. instance->copyVariablesIntoMARCO(variables, derivatives); + // For every vectorized equation, compute its row within the Jacobian + // matrix. IDA_PROFILER_PARTIAL_DERIVATIVES_START; - unsigned int numOfThreads = instance->threadPool.getNumOfThreads(); - - std::atomic_size_t currentEquation = 0; - uint64_t numOfVectorizedEquations = instance->getNumOfVectorizedEquations(); - - for (unsigned int thread = 0; thread < numOfThreads; ++thread) { - instance->threadPool.async([&]() { - size_t equationIndex = 0; - Equation equation; - std::vector equationIndices; - - while ((equationIndex = currentEquation++) < numOfVectorizedEquations) { - equation = instance->equationsProcessingOrder[equationIndex]; - instance->getEquationBeginIndices(equation, equationIndices); - Variable writtenVariable = instance->getWrittenVariable(equation); + instance->equationsParallelIteration( + [&](Equation eq, const std::vector &equationIndices, + const JacobianSeedsMap &jacobianSeedsMap) { + Variable writtenVariable = instance->getWrittenVariable(eq); uint64_t writtenVariableArrayOffset = instance->variableOffsets[writtenVariable]; @@ -984,60 +988,55 @@ int IDAInstance::jacobianMatrix(realtype time, realtype alpha, writtenVariableIndices.resize(writtenVariableRank, 0); AccessFunction writeAccessFunction = - instance->getWriteAccessFunction(equation); - - do { - writeAccessFunction(equationIndices.data(), - writtenVariableIndices.data()); + instance->getWriteAccessFunction(eq); - uint64_t writtenVariableScalarOffset = getVariableFlatIndex( - instance->variablesDimensions[writtenVariable], - writtenVariableIndices); + writeAccessFunction(equationIndices.data(), + writtenVariableIndices.data()); - uint64_t scalarEquationIndex = - writtenVariableArrayOffset + writtenVariableScalarOffset; + uint64_t writtenVariableScalarOffset = + getVariableFlatIndex(instance->variablesDimensions[writtenVariable], + writtenVariableIndices); - assert(scalarEquationIndex < instance->getNumOfScalarEquations()); + uint64_t scalarEquationIndex = + writtenVariableArrayOffset + writtenVariableScalarOffset; - // Compute the column indices that may be non-zero. - std::vector jacobianColumns = - instance->computeJacobianColumns(equation, - equationIndices.data()); + assert(scalarEquationIndex < instance->getNumOfScalarEquations()); - // For every scalar variable with respect to which the equation must - // be partially differentiated. - for (size_t i = 0, e = jacobianColumns.size(); i < e; ++i) { - const JacobianColumn &column = jacobianColumns[i]; - Variable variable = column.first; - const auto &variableIndices = column.second; + // Compute the column indexes that may be non-zeros. + std::vector jacobianColumns = + instance->computeJacobianColumns(eq, equationIndices.data()); - uint64_t variableArrayOffset = instance->variableOffsets[variable]; + // For every scalar variable with respect to which the equation must be + // partially differentiated. + for (size_t i = 0, e = jacobianColumns.size(); i < e; ++i) { + const JacobianColumn &column = jacobianColumns[i]; + Variable variable = column.first; + const auto &variableIndices = column.second; - uint64_t variableScalarOffset = getVariableFlatIndex( - instance->variablesDimensions[variable], column.second); + uint64_t variableArrayOffset = instance->variableOffsets[variable]; - assert(instance->jacobianFunctions[equation][variable] != nullptr); + uint64_t variableScalarOffset = getVariableFlatIndex( + instance->variablesDimensions[variable], column.second); - auto jacobianFunctionResult = - instance->jacobianFunctions[equation][variable]( - time, equationIndices.data(), variableIndices.data(), - alpha); + auto jacobianFunction = + instance->jacobianFunctions[eq][variable].first; + auto seedsMapIt = jacobianSeedsMap.find(jacobianFunction); + assert(seedsMapIt != jacobianSeedsMap.end()); + assert(jacobianFunction != nullptr); - instance->jacobianMatrixData[scalarEquationIndex][i].second = - jacobianFunctionResult; + auto jacobianFunctionResult = jacobianFunction( + time, equationIndices.data(), variableIndices.data(), alpha, + instance->memoryPoolId, seedsMapIt->second.data()); - auto index = static_cast(variableArrayOffset + - variableScalarOffset); + instance->jacobianMatrixData[scalarEquationIndex][i].second = + jacobianFunctionResult; - instance->jacobianMatrixData[scalarEquationIndex][i].first = index; - } - } while (advanceEquationIndices(equationIndices, - instance->equationRanges[equation])); - } - }); - } + auto index = static_cast(variableArrayOffset + + variableScalarOffset); - instance->threadPool.wait(); + instance->jacobianMatrixData[scalarEquationIndex][i].first = index; + } + }); // Move the partial derivatives into the SUNDIALS sparse matrix. sunindextype *rowPtrs = SUNSparseMatrix_IndexPointers(jacobianMatrix); @@ -1141,6 +1140,21 @@ uint64_t IDAInstance::getVariableRank(Variable variable) const { return variablesDimensions[variable].rank(); } +void IDAInstance::iterateAccessedArrayVariables( + Equation equation, std::function callback) const { + if (precomputedAccesses) { + for (const auto &access : variableAccesses[equation]) { + callback(access.first); + } + } else { + uint64_t numOfArrayVariables = getNumOfArrayVariables(); + + for (Variable variable = 0; variable < numOfArrayVariables; ++variable) { + callback(variable); + } + } +} + /// Determine which of the columns of the current Jacobian row has to be /// populated, and with respect to which variable the partial derivative has /// to be performed. The row is determined by the indices of the equation. @@ -1173,14 +1187,15 @@ IDAInstance::computeJacobianColumns(Equation eq, uniqueColumns.insert({variable, variableIndices}); } } else { - for (size_t variableIndex = 0, e = getNumOfArrayVariables(); - variableIndex < e; ++variableIndex) { - const auto &dimensions = variablesDimensions[variableIndex]; + uint64_t numOfArrayVariables = getNumOfArrayVariables(); + + for (Variable variable = 0; variable < numOfArrayVariables; ++variable) { + const auto &dimensions = variablesDimensions[variable]; for (auto indices = dimensions.indicesBegin(), end = dimensions.indicesEnd(); indices != end; ++indices) { - JacobianColumn column(variableIndex, {}); + JacobianColumn column(variable, {}); for (size_t dim = 0; dim < dimensions.rank(); ++dim) { column.second.push_back((*indices)[dim]); @@ -1246,7 +1261,7 @@ void IDAInstance::computeNNZ() { } } -void IDAInstance::computeResidualThreadChunks() { +void IDAInstance::computeThreadChunks() { unsigned int numOfThreads = threadPool.getNumOfThreads(); int64_t chunksFactor = getOptions().equationsChunksFactor; @@ -1286,8 +1301,23 @@ void IDAInstance::computeResidualThreadChunks() { equationRanges[equation]); } - residualThreadEquationsChunks.emplace_back( - equation, std::move(beginIndices), std::move(endIndices)); + JacobianSeedsMap jacobianSeedsMap; + + iterateAccessedArrayVariables(equation, [&](Variable variable) { + auto jacobianFunction = jacobianFunctions[equation][variable].first; + const auto &seedSizes = jacobianFunctions[equation][variable].second; + + for (const auto &seedSize : seedSizes) { + MemoryPool &memoryPool = + MemoryPoolManager::getInstance().get(memoryPoolId); + uint64_t seedId = memoryPool.create(seedSize); + jacobianSeedsMap[jacobianFunction].push_back(seedId); + } + }); + + threadEquationsChunks.emplace_back(equation, std::move(beginIndices), + std::move(endIndices), + std::move(jacobianSeedsMap)); // Move to the next chunk. equationFlatIndex = endFlatIndex; @@ -1298,6 +1328,101 @@ void IDAInstance::computeResidualThreadChunks() { } } +/* +void IDAInstance::computeJacobianThreadChunks() { + unsigned int numOfThreads = threadPool.getNumOfThreads(); + + int64_t chunksFactor = getOptions().equationsChunksFactor; + int64_t numOfChunks = numOfThreads * chunksFactor; + size_t idealChunkSize = (nonZeroValuesNumber + numOfChunks - 1) / numOfChunks; + size_t chunkSize = 0; + + uint64_t numOfVectorizedEquations = getNumOfVectorizedEquations(); + + // The number of vectorized equations whose indices have been completely + // assigned. + uint64_t processedEquations = 0; + + while (processedEquations < numOfVectorizedEquations) { + Equation equation = equationsProcessingOrder[processedEquations]; + + Variable writtenVariable = getWrittenVariable(equation); + + uint64_t writtenVariableArrayOffset = variableOffsets[writtenVariable]; + uint64_t writtenVariableRank = getVariableRank(writtenVariable); + + std::vector writtenVariableIndices; + writtenVariableIndices.resize(writtenVariableRank, 0); + + AccessFunction writeAccessFunction = getWriteAccessFunction(equation); + + std::vector equationIndices; + getEquationBeginIndices(equation, equationIndices); + + do { + JacobianSeedsMap jacobianSeedsMap; + std::vector equationBeginIndices = equationIndices; + + writeAccessFunction(equationIndices.data(), + writtenVariableIndices.data()); + + uint64_t writtenVariableScalarOffset = getVariableFlatIndex( + variablesDimensions[writtenVariable], + writtenVariableIndices); + + uint64_t scalarEquationIndex = + writtenVariableArrayOffset + writtenVariableScalarOffset; + + assert(scalarEquationIndex < getNumOfScalarEquations()); + + + + + + // Compute the column indices that may be non-zero. + std::vector jacobianColumns = + computeJacobianColumns(equation, equationIndices.data()); + + // For every scalar variable with respect to which the equation must + // be partially differentiated. + for (size_t i = 0, e = jacobianColumns.size(); i < e; ++i) { + const JacobianColumn &column = jacobianColumns[i]; + Variable variable = column.first; + const auto &variableIndices = column.second; + + uint64_t variableArrayOffset = variableOffsets[variable]; + + uint64_t variableScalarOffset = getVariableFlatIndex( + variablesDimensions[variable], column.second); + + assert(jacobianFunctions[equation][variable] != nullptr); + auto jacobianFunction = jacobianFunctions[equation][variable]; + + + } + + if (++chunkSize >= idealChunkSize) { + // Add the chunk. + std::vector equationEndIndices = equationIndices; + uint64_t equationFlatEndIndex = getEquationFlatIndex(equationEndIndices, +equationRanges[equation]); + ++equationFlatEndIndex; + getEquationIndicesFromFlatIndex(equationFlatEndIndex, +equationEndIndices, equationRanges[equation]); + + jacobianThreadEquationsChunks.emplace_back(equation, +equationBeginIndices, equationEndIndices, jacobianSeedsMap); + + chunkSize = 0; + } + } while (advanceEquationIndices(equationIndices, equationRanges[equation])); + + // Move to the next vectorized equation. + ++processedEquations; + } +} +*/ + void IDAInstance::copyVariablesFromMARCO( N_Vector algebraicAndStateVariablesVector, N_Vector derivativeVariablesVector) { @@ -1433,9 +1558,10 @@ void IDAInstance::copyVariablesIntoMARCO( IDA_PROFILER_COPY_VARS_INTO_MARCO_STOP; } -void IDAInstance::residualsParallelIteration( +void IDAInstance::equationsParallelIteration( std::function &equationIndices)> + const std::vector &equationIndices, + const JacobianSeedsMap &jacobianSeedsMap)> processFn) { // Shard the work among multiple threads. unsigned int numOfThreads = threadPool.getNumOfThreads(); @@ -1445,10 +1571,9 @@ void IDAInstance::residualsParallelIteration( threadPool.async([&]() { size_t assignedChunk; - while ((assignedChunk = chunkIndex++) < - residualThreadEquationsChunks.size()) { - const ResidualThreadEquationsChunk &chunk = - residualThreadEquationsChunks[assignedChunk]; + while ((assignedChunk = chunkIndex++) < threadEquationsChunks.size()) { + const ThreadEquationsChunk &chunk = + threadEquationsChunks[assignedChunk]; Equation equation = std::get<0>(chunk); std::vector equationIndices = std::get<1>(chunk); @@ -1469,7 +1594,7 @@ void IDAInstance::residualsParallelIteration( return true; }() && "Invalid equation indices"); - processFn(equation, equationIndices); + processFn(equation, equationIndices, std::get<3>(chunk)); } while (advanceEquationIndicesUntil( equationIndices, equationRanges[equation], std::get<2>(chunk))); } @@ -2257,14 +2382,16 @@ RUNTIME_FUNC_DEF(idaSetResidual, void, PTR(void), uint64_t, PTR(void)) // idaAddJacobian static void idaAddJacobian_void(void *instance, uint64_t equationIndex, - uint64_t variableIndex, - void *jacobianFunction) { + uint64_t variableIndex, void *jacobianFunction, + uint64_t numOfSeeds, uint64_t *seedSizes) { static_cast(instance)->addJacobianFunction( equationIndex, variableIndex, - reinterpret_cast(jacobianFunction)); + reinterpret_cast(jacobianFunction), numOfSeeds, + seedSizes); } -RUNTIME_FUNC_DEF(idaAddJacobian, void, PTR(void), uint64_t, uint64_t, PTR(void)) +RUNTIME_FUNC_DEF(idaAddJacobian, void, PTR(void), uint64_t, uint64_t, PTR(void), + uint64_t, PTR(uint64_t)) //===---------------------------------------------------------------------===// // idaPrintStatistics From 47371d08115bc7a07c2fbd68808c9321a568def8 Mon Sep 17 00:00:00 2001 From: Michele Scuttari Date: Wed, 9 Oct 2024 19:15:02 +0200 Subject: [PATCH 10/14] Add missing dependencies --- lib/Solvers/KINSOL/CMakeLists.txt | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/lib/Solvers/KINSOL/CMakeLists.txt b/lib/Solvers/KINSOL/CMakeLists.txt index adeffba45..4a3d89881 100644 --- a/lib/Solvers/KINSOL/CMakeLists.txt +++ b/lib/Solvers/KINSOL/CMakeLists.txt @@ -12,3 +12,8 @@ if (MARCO_ENABLE_SUNDIALS) SUNDIALS::nvecserial SUNDIALS::sunlinsolklu) endif() + +target_link_libraries(SolverKINSOL + PUBLIC + SolverSUNDIALS + Profiling) From a43b8b72b5f7c8f8dcb3aa5703fa8079dd64e041 Mon Sep 17 00:00:00 2001 From: Michele Scuttari Date: Wed, 9 Oct 2024 19:35:43 +0200 Subject: [PATCH 11/14] Clean-up --- lib/Solvers/IDA/Instance.cpp | 95 ------------------------------------ 1 file changed, 95 deletions(-) diff --git a/lib/Solvers/IDA/Instance.cpp b/lib/Solvers/IDA/Instance.cpp index 107de5af2..d5ccb1b1a 100644 --- a/lib/Solvers/IDA/Instance.cpp +++ b/lib/Solvers/IDA/Instance.cpp @@ -1328,101 +1328,6 @@ void IDAInstance::computeThreadChunks() { } } -/* -void IDAInstance::computeJacobianThreadChunks() { - unsigned int numOfThreads = threadPool.getNumOfThreads(); - - int64_t chunksFactor = getOptions().equationsChunksFactor; - int64_t numOfChunks = numOfThreads * chunksFactor; - size_t idealChunkSize = (nonZeroValuesNumber + numOfChunks - 1) / numOfChunks; - size_t chunkSize = 0; - - uint64_t numOfVectorizedEquations = getNumOfVectorizedEquations(); - - // The number of vectorized equations whose indices have been completely - // assigned. - uint64_t processedEquations = 0; - - while (processedEquations < numOfVectorizedEquations) { - Equation equation = equationsProcessingOrder[processedEquations]; - - Variable writtenVariable = getWrittenVariable(equation); - - uint64_t writtenVariableArrayOffset = variableOffsets[writtenVariable]; - uint64_t writtenVariableRank = getVariableRank(writtenVariable); - - std::vector writtenVariableIndices; - writtenVariableIndices.resize(writtenVariableRank, 0); - - AccessFunction writeAccessFunction = getWriteAccessFunction(equation); - - std::vector equationIndices; - getEquationBeginIndices(equation, equationIndices); - - do { - JacobianSeedsMap jacobianSeedsMap; - std::vector equationBeginIndices = equationIndices; - - writeAccessFunction(equationIndices.data(), - writtenVariableIndices.data()); - - uint64_t writtenVariableScalarOffset = getVariableFlatIndex( - variablesDimensions[writtenVariable], - writtenVariableIndices); - - uint64_t scalarEquationIndex = - writtenVariableArrayOffset + writtenVariableScalarOffset; - - assert(scalarEquationIndex < getNumOfScalarEquations()); - - - - - - // Compute the column indices that may be non-zero. - std::vector jacobianColumns = - computeJacobianColumns(equation, equationIndices.data()); - - // For every scalar variable with respect to which the equation must - // be partially differentiated. - for (size_t i = 0, e = jacobianColumns.size(); i < e; ++i) { - const JacobianColumn &column = jacobianColumns[i]; - Variable variable = column.first; - const auto &variableIndices = column.second; - - uint64_t variableArrayOffset = variableOffsets[variable]; - - uint64_t variableScalarOffset = getVariableFlatIndex( - variablesDimensions[variable], column.second); - - assert(jacobianFunctions[equation][variable] != nullptr); - auto jacobianFunction = jacobianFunctions[equation][variable]; - - - } - - if (++chunkSize >= idealChunkSize) { - // Add the chunk. - std::vector equationEndIndices = equationIndices; - uint64_t equationFlatEndIndex = getEquationFlatIndex(equationEndIndices, -equationRanges[equation]); - ++equationFlatEndIndex; - getEquationIndicesFromFlatIndex(equationFlatEndIndex, -equationEndIndices, equationRanges[equation]); - - jacobianThreadEquationsChunks.emplace_back(equation, -equationBeginIndices, equationEndIndices, jacobianSeedsMap); - - chunkSize = 0; - } - } while (advanceEquationIndices(equationIndices, equationRanges[equation])); - - // Move to the next vectorized equation. - ++processedEquations; - } -} -*/ - void IDAInstance::copyVariablesFromMARCO( N_Vector algebraicAndStateVariablesVector, N_Vector derivativeVariablesVector) { From d4065b24c8a8e2706aa48b501ebb30129202bd06 Mon Sep 17 00:00:00 2001 From: Michele Scuttari Date: Wed, 9 Oct 2024 21:30:03 +0200 Subject: [PATCH 12/14] Check SUNDIALS version for context deallocation --- lib/Solvers/IDA/Instance.cpp | 2 ++ lib/Solvers/KINSOL/Instance.cpp | 2 ++ 2 files changed, 4 insertions(+) diff --git a/lib/Solvers/IDA/Instance.cpp b/lib/Solvers/IDA/Instance.cpp index d5ccb1b1a..f26c2681e 100644 --- a/lib/Solvers/IDA/Instance.cpp +++ b/lib/Solvers/IDA/Instance.cpp @@ -47,9 +47,11 @@ IDAInstance::~IDAInstance() { SUNMatDestroy(sparseMatrix); } +#if SUNDIALS_VERSION_MAJOR >= 6 if (ctx != nullptr) { SUNContext_Free(&ctx); } +#endif if (marco::runtime::simulation::getOptions().debug) { std::cerr << "[IDA] Instance destroyed" << std::endl; diff --git a/lib/Solvers/KINSOL/Instance.cpp b/lib/Solvers/KINSOL/Instance.cpp index 3eb456397..edd30263a 100644 --- a/lib/Solvers/KINSOL/Instance.cpp +++ b/lib/Solvers/KINSOL/Instance.cpp @@ -40,9 +40,11 @@ KINSOLInstance::~KINSOLInstance() { SUNMatDestroy(sparseMatrix); } +#if SUNDIALS_VERSION_MAJOR >= 6 if (ctx != nullptr) { SUNContext_Free(&ctx); } +#endif if (marco::runtime::simulation::getOptions().debug) { std::cerr << "[KINSOL] Instance destroyed" << std::endl; From c310f53ce3f38e8afcef381412e9713f09f31807 Mon Sep 17 00:00:00 2001 From: Michele Scuttari Date: Wed, 9 Oct 2024 21:31:45 +0200 Subject: [PATCH 13/14] Use memory pool for Jacobian matrix computation with KINSOL --- .../marco/Runtime/Solvers/KINSOL/Instance.h | 58 ++++-- lib/Solvers/KINSOL/CMakeLists.txt | 3 +- lib/Solvers/KINSOL/Instance.cpp | 189 ++++++++++-------- 3 files changed, 155 insertions(+), 95 deletions(-) diff --git a/include/marco/Runtime/Solvers/KINSOL/Instance.h b/include/marco/Runtime/Solvers/KINSOL/Instance.h index 43263f4f4..313b36430 100644 --- a/include/marco/Runtime/Solvers/KINSOL/Instance.h +++ b/include/marco/Runtime/Solvers/KINSOL/Instance.h @@ -10,6 +10,7 @@ #include "sundials/sundials_types.h" #include "sunlinsol/sunlinsol_klu.h" #include "sunmatrix/sunmatrix_sparse.h" +#include #include #include @@ -23,7 +24,32 @@ using ResidualFunction = double (*)(const int64_t *); /// The 1st argument is a pointer to the list of equation indices. /// The 2nd argument is a pointer to the list of variable indices. /// The result is the Jacobian value. -using JacobianFunction = double (*)(const int64_t *, const uint64_t *); +/// The 3rd argument is the identifier of the memory pool owning the AD seeds. +/// The 4th argument is a pointer to the list of AD seed identifiers +/// The result is the Jacobian value. +using JacobianFunction = double (*)(const int64_t *, const uint64_t *, uint64_t, + const uint64_t *); + +/// A descriptor of a Jacobian function is a pair of value consisting in: +/// - the function pointer +/// - the number of elements of each AD seed +using JacobianFunctionDescriptor = + std::pair>; + +/// A map indicating the IDs of the buffers living inside the memory pool to +/// be used as AD seeds for each Jacobian function. +using JacobianSeedsMap = std::map>; + +/// A chunk of equations to be processed by a thread while computing the +/// residual values or partial derivatives. +/// A chunk is composed of: +/// - the identifier (position) of the equation. +/// - the begin indices (included) +/// - the end indices (excluded) +/// - the map indicating the buffer IDs to be used when computing the +/// partial derivatives +using ThreadEquationsChunk = std::tuple, + std::vector, JacobianSeedsMap>; class KINSOLInstance { public: @@ -52,7 +78,8 @@ class KINSOLInstance { /// Add the function pointer that computes a partial derivative of an /// equation. void addJacobianFunction(Equation equationIndex, Variable variableIndex, - JacobianFunction jacobianFunction); + JacobianFunction jacobianFunction, + uint64_t numOfSeeds, uint64_t *seedSizes); /// Instantiate and initialize all the classes needed by KINSOL in order to /// solve the given system of equations. It also sets optional simulation @@ -96,20 +123,25 @@ class KINSOLInstance { [[nodiscard]] uint64_t getVariableRank(Variable variable) const; + void + iterateAccessedArrayVariables(Equation equation, + std::function callback) const; + std::vector computeJacobianColumns(Equation eq, const int64_t *equationIndices) const; void computeNNZ(); - void computeResidualThreadChunks(); + void computeThreadChunks(); void copyVariablesFromMARCO(N_Vector variables); void copyVariablesIntoMARCO(N_Vector variables); - void residualsParallelIteration( + void equationsParallelIteration( std::function &equationIndices)> + const std::vector &equationIndices, + const JacobianSeedsMap &jacobianSeedsMap)> processFn); void getVariableBeginIndices(Variable variable, @@ -194,7 +226,7 @@ class KINSOLInstance { // The i-th position contains the list of partial derivative functions of // the i-th equation. The j-th function represents the function to // compute the derivative with respect to the j-th variable. - std::vector> jacobianFunctions; + std::vector> jacobianFunctions; // Whether the IDA instance is informed about the accesses to the // variables. @@ -240,20 +272,14 @@ class KINSOLInstance { // Thread pool. ThreadPool threadPool; - // A chunk of equations to be processed by a thread while computing the - // residual values. - // A chunk is composed of: - // - the identifier (position) of the equation. - // - the begin indices (included) - // - the end indices (exluded) - using ResidualThreadEquationsChunk = - std::tuple, std::vector>; + // Memory pool ID. + uint64_t memoryPoolId; // The list of chunks the threads will process. Each thread elaborates // one chunk at a time. // The information is computed only once during the initialization to // save time during the actual simulation. - std::vector residualThreadEquationsChunks; + std::vector threadEquationsChunks; }; } // namespace marco::runtime::sundials::kinsol @@ -279,7 +305,7 @@ RUNTIME_FUNC_DECL(kinsolAddEquation, uint64_t, PTR(void), PTR(int64_t), RUNTIME_FUNC_DECL(kinsolSetResidual, void, PTR(void), uint64_t, PTR(void)) RUNTIME_FUNC_DECL(kinsolAddJacobian, void, PTR(void), uint64_t, uint64_t, - PTR(void)) + PTR(void), uint64_t, PTR(uint64_t)) #endif // SUNDIALS_ENABLE diff --git a/lib/Solvers/KINSOL/CMakeLists.txt b/lib/Solvers/KINSOL/CMakeLists.txt index 4a3d89881..21807667b 100644 --- a/lib/Solvers/KINSOL/CMakeLists.txt +++ b/lib/Solvers/KINSOL/CMakeLists.txt @@ -16,4 +16,5 @@ endif() target_link_libraries(SolverKINSOL PUBLIC SolverSUNDIALS - Profiling) + Profiling + Support) diff --git a/lib/Solvers/KINSOL/Instance.cpp b/lib/Solvers/KINSOL/Instance.cpp index edd30263a..5f33d3ef4 100644 --- a/lib/Solvers/KINSOL/Instance.cpp +++ b/lib/Solvers/KINSOL/Instance.cpp @@ -5,6 +5,7 @@ #include "marco/Runtime/Simulation/Options.h" #include "marco/Runtime/Solvers/KINSOL/Options.h" #include "marco/Runtime/Solvers/KINSOL/Profiler.h" +#include "marco/Runtime/Support/MemoryManagement.h" #include #include #include @@ -205,7 +206,9 @@ void KINSOLInstance::setResidualFunction(Equation equation, } void KINSOLInstance::addJacobianFunction(Equation equation, Variable variable, - JacobianFunction jacobianFunction) { + JacobianFunction jacobianFunction, + uint64_t numOfSeeds, + uint64_t *seedSizes) { if (marco::runtime::simulation::getOptions().debug) { std::cerr << "[KINSOL] Setting jacobian function for equation " << equation << " and variable " << variable @@ -218,10 +221,17 @@ void KINSOLInstance::addJacobianFunction(Equation equation, Variable variable, } if (jacobianFunctions[equation].size() <= variable) { - jacobianFunctions[equation].resize(variable + 1, nullptr); + jacobianFunctions[equation].resize( + variable + 1, std::make_pair(nullptr, std::vector{})); } - jacobianFunctions[equation][variable] = jacobianFunction; + jacobianFunctions[equation][variable].first = jacobianFunction; + jacobianFunctions[equation][variable].second.resize(numOfSeeds); + + for (uint64_t i = 0; i < numOfSeeds; ++i) { + assert(seedSizes[i] != 0); + jacobianFunctions[equation][variable].second[i] = seedSizes[i]; + } } bool KINSOLInstance::initialize() { @@ -231,6 +241,8 @@ bool KINSOLInstance::initialize() { std::cerr << "[KINSOL] Performing initialization" << std::endl; } + memoryPoolId = MemoryPoolManager::getInstance().create(); + // Compute the number of scalar variables. scalarVariablesNumber = 0; @@ -360,15 +372,16 @@ bool KINSOLInstance::initialize() { assert(precomputedAccesses || std::all_of(jacobianFunctions.begin(), jacobianFunctions.end(), - [&](std::vector functions) { + [&](std::vector functions) { if (functions.size() != variableGetters.size()) { return false; } - return std::all_of(functions.begin(), functions.end(), - [](const JacobianFunction &function) { - return function != nullptr; - }); + return std::all_of( + functions.begin(), functions.end(), + [](const JacobianFunctionDescriptor &function) { + return function.first != nullptr; + }); })); // Check that all the getters and setters have been set. @@ -449,7 +462,7 @@ bool KINSOLInstance::initialize() { computeNNZ(); // Compute the equation chunks for each thread. - computeResidualThreadChunks(); + computeThreadChunks(); // Create and initialize the memory for KINSOL. #if SUNDIALS_VERSION_MAJOR >= 6 @@ -559,8 +572,9 @@ int KINSOLInstance::residualFunction(N_Vector variables, N_Vector residuals, // it writes into. KINSOL_PROFILER_RESIDUALS_START; - instance->residualsParallelIteration( - [&](Equation eq, const std::vector &equationIndices) { + instance->equationsParallelIteration( + [&](Equation eq, const std::vector &equationIndices, + const JacobianSeedsMap &jacobianSeedsMap) { uint64_t equationRank = instance->getEquationRank(eq); assert(equationIndices.size() == equationRank); @@ -622,21 +636,10 @@ int KINSOLInstance::jacobianMatrix(N_Vector variables, N_Vector residuals, KINSOL_PROFILER_PARTIAL_DERIVATIVES_START; - unsigned int numOfThreads = instance->threadPool.getNumOfThreads(); - - std::atomic_size_t currentEquation = 0; - uint64_t numOfVectorizedEquations = instance->getNumOfVectorizedEquations(); - - for (unsigned int thread = 0; thread < numOfThreads; ++thread) { - instance->threadPool.async([&]() { - size_t equationIndex = 0; - Equation equation; - std::vector equationIndices; - - while ((equationIndex = currentEquation++) < numOfVectorizedEquations) { - equation = instance->equationsProcessingOrder[equationIndex]; - instance->getEquationBeginIndices(equation, equationIndices); - Variable writtenVariable = instance->getWrittenVariable(equation); + instance->equationsParallelIteration( + [&](Equation eq, const std::vector &equationIndices, + const JacobianSeedsMap &jacobianSeedsMap) { + Variable writtenVariable = instance->getWrittenVariable(eq); uint64_t writtenVariableArrayOffset = instance->variableOffsets[writtenVariable]; @@ -648,60 +651,58 @@ int KINSOLInstance::jacobianMatrix(N_Vector variables, N_Vector residuals, writtenVariableIndices.resize(writtenVariableRank, 0); AccessFunction writeAccessFunction = - instance->getWriteAccessFunction(equation); + instance->getWriteAccessFunction(eq); - do { - writeAccessFunction(equationIndices.data(), - writtenVariableIndices.data()); + writeAccessFunction(equationIndices.data(), + writtenVariableIndices.data()); - uint64_t writtenVariableScalarOffset = getVariableFlatIndex( - instance->variablesDimensions[writtenVariable], - writtenVariableIndices); + uint64_t writtenVariableScalarOffset = + getVariableFlatIndex(instance->variablesDimensions[writtenVariable], + writtenVariableIndices); - uint64_t scalarEquationIndex = - writtenVariableArrayOffset + writtenVariableScalarOffset; + uint64_t scalarEquationIndex = + writtenVariableArrayOffset + writtenVariableScalarOffset; - assert(scalarEquationIndex < instance->getNumOfScalarEquations()); + assert(scalarEquationIndex < instance->getNumOfScalarEquations()); - // Compute the column indices that may be non-zero. - std::vector jacobianColumns = - instance->computeJacobianColumns(equation, - equationIndices.data()); + // Compute the column indexes that may be non-zeros. + std::vector jacobianColumns = + instance->computeJacobianColumns(eq, equationIndices.data()); - // For every scalar variable with respect to which the equation must - // be partially differentiated. - for (size_t i = 0, e = jacobianColumns.size(); i < e; ++i) { - const JacobianColumn &column = jacobianColumns[i]; - Variable variable = column.first; - const auto &variableIndices = column.second; + // For every scalar variable with respect to which the equation must be + // partially differentiated. + for (size_t i = 0, e = jacobianColumns.size(); i < e; ++i) { + const JacobianColumn &column = jacobianColumns[i]; + Variable variable = column.first; + const auto &variableIndices = column.second; - uint64_t variableArrayOffset = instance->variableOffsets[variable]; + uint64_t variableArrayOffset = instance->variableOffsets[variable]; - uint64_t variableScalarOffset = getVariableFlatIndex( - instance->variablesDimensions[variable], column.second); + uint64_t variableScalarOffset = getVariableFlatIndex( + instance->variablesDimensions[variable], column.second); - assert(instance->jacobianFunctions[equation][variable] != nullptr); + auto jacobianFunction = + instance->jacobianFunctions[eq][variable].first; - auto jacobianFunctionResult = - instance->jacobianFunctions[equation][variable]( - equationIndices.data(), variableIndices.data()); + auto seedsMapIt = jacobianSeedsMap.find(jacobianFunction); + assert(seedsMapIt != jacobianSeedsMap.end()); + assert(jacobianFunction != nullptr); - instance->jacobianMatrixData[scalarEquationIndex][i].second = - jacobianFunctionResult; + auto jacobianFunctionResult = jacobianFunction( + equationIndices.data(), variableIndices.data(), + instance->memoryPoolId, seedsMapIt->second.data()); - auto index = static_cast(variableArrayOffset + - variableScalarOffset); + instance->jacobianMatrixData[scalarEquationIndex][i].second = + jacobianFunctionResult; - instance->jacobianMatrixData[scalarEquationIndex][i].first = index; - } - } while (advanceEquationIndices(equationIndices, - instance->equationRanges[equation])); - } - }); - } + auto index = static_cast(variableArrayOffset + + variableScalarOffset); - instance->threadPool.wait(); + instance->jacobianMatrixData[scalarEquationIndex][i].first = index; + } + }); + // Move the partial derivatives into the SUNDIALS sparse matrix. sunindextype *rowPtrs = SUNSparseMatrix_IndexPointers(jacobianMatrix); sunindextype *columnIndices = SUNSparseMatrix_IndexValues(jacobianMatrix); @@ -793,6 +794,21 @@ uint64_t KINSOLInstance::getVariableRank(Variable variable) const { return variablesDimensions[variable].rank(); } +void KINSOLInstance::iterateAccessedArrayVariables( + Equation equation, std::function callback) const { + if (precomputedAccesses) { + for (const auto &access : variableAccesses[equation]) { + callback(access.first); + } + } else { + uint64_t numOfArrayVariables = getNumOfArrayVariables(); + + for (Variable variable = 0; variable < numOfArrayVariables; ++variable) { + callback(variable); + } + } +} + /// Determine which of the columns of the current Jacobian row has to be /// populated, and with respect to which variable the partial derivative has /// to be performed. The row is determined by the indices of the equation. @@ -898,7 +914,7 @@ void KINSOLInstance::computeNNZ() { } } -void KINSOLInstance::computeResidualThreadChunks() { +void KINSOLInstance::computeThreadChunks() { unsigned int numOfThreads = threadPool.getNumOfThreads(); int64_t chunksFactor = getOptions().equationsChunksFactor; @@ -938,8 +954,23 @@ void KINSOLInstance::computeResidualThreadChunks() { equationRanges[equation]); } - residualThreadEquationsChunks.emplace_back( - equation, std::move(beginIndices), std::move(endIndices)); + JacobianSeedsMap jacobianSeedsMap; + + iterateAccessedArrayVariables(equation, [&](Variable variable) { + auto jacobianFunction = jacobianFunctions[equation][variable].first; + const auto &seedSizes = jacobianFunctions[equation][variable].second; + + for (const auto &seedSize : seedSizes) { + MemoryPool &memoryPool = + MemoryPoolManager::getInstance().get(memoryPoolId); + uint64_t seedId = memoryPool.create(seedSize); + jacobianSeedsMap[jacobianFunction].push_back(seedId); + } + }); + + threadEquationsChunks.emplace_back(equation, std::move(beginIndices), + std::move(endIndices), + std::move(jacobianSeedsMap)); // Move to the next chunk. equationFlatIndex = endFlatIndex; @@ -1035,9 +1066,10 @@ void KINSOLInstance::copyVariablesIntoMARCO(N_Vector variables) { KINSOL_PROFILER_COPY_VARS_INTO_MARCO_STOP; } -void KINSOLInstance::residualsParallelIteration( +void KINSOLInstance::equationsParallelIteration( std::function &equationIndices)> + const std::vector &equationIndices, + const JacobianSeedsMap &jacobianSeedsMap)> processFn) { // Shard the work among multiple threads. unsigned int numOfThreads = threadPool.getNumOfThreads(); @@ -1047,10 +1079,9 @@ void KINSOLInstance::residualsParallelIteration( threadPool.async([&]() { size_t assignedChunk; - while ((assignedChunk = chunkIndex++) < - residualThreadEquationsChunks.size()) { - const ResidualThreadEquationsChunk &chunk = - residualThreadEquationsChunks[assignedChunk]; + while ((assignedChunk = chunkIndex++) < threadEquationsChunks.size()) { + const ThreadEquationsChunk &chunk = + threadEquationsChunks[assignedChunk]; Equation equation = std::get<0>(chunk); std::vector equationIndices = std::get<1>(chunk); @@ -1071,7 +1102,7 @@ void KINSOLInstance::residualsParallelIteration( return true; }() && "Invalid equation indices"); - processFn(equation, equationIndices); + processFn(equation, equationIndices, std::get<3>(chunk)); } while (advanceEquationIndicesUntil( equationIndices, equationRanges[equation], std::get<2>(chunk))); } @@ -1473,13 +1504,15 @@ RUNTIME_FUNC_DEF(kinsolSetResidual, void, PTR(void), uint64_t, PTR(void)) static void kinsolAddJacobian_void(void *instance, uint64_t equationIndex, uint64_t variableIndex, - void *jacobianFunction) { + void *jacobianFunction, uint64_t numOfSeeds, + uint64_t *seedSizes) { static_cast(instance)->addJacobianFunction( equationIndex, variableIndex, - reinterpret_cast(jacobianFunction)); + reinterpret_cast(jacobianFunction), numOfSeeds, + seedSizes); } RUNTIME_FUNC_DEF(kinsolAddJacobian, void, PTR(void), uint64_t, uint64_t, - PTR(void)) + PTR(void), uint64_t, PTR(uint64_t)) #endif // SUNDIALS_ENABLE From 4b256af81738d2a2c288d180437820260b1cb715 Mon Sep 17 00:00:00 2001 From: Michele Scuttari Date: Wed, 9 Oct 2024 23:15:16 +0200 Subject: [PATCH 14/14] Reformat code --- include/marco/Runtime/Solvers/IDA/Instance.h | 17 +- .../marco/Runtime/Support/MemoryManagement.h | 35 ++- lib/Support/MemoryManagement.cpp | 269 ++++++++---------- 3 files changed, 152 insertions(+), 169 deletions(-) diff --git a/include/marco/Runtime/Solvers/IDA/Instance.h b/include/marco/Runtime/Solvers/IDA/Instance.h index 8c2181c05..e756726c8 100644 --- a/include/marco/Runtime/Solvers/IDA/Instance.h +++ b/include/marco/Runtime/Solvers/IDA/Instance.h @@ -32,12 +32,13 @@ using ResidualFunction = double (*)(double, const int64_t *); /// The 6th argument is a pointer to the list of AD seed identifiers /// The result is the Jacobian value. using JacobianFunction = double (*)(double, const int64_t *, const uint64_t *, - double, uint64_t, const uint64_t*); + double, uint64_t, const uint64_t *); /// A descriptor of a Jacobian function is a pair of value consisting in: /// - the function pointer /// - the number of elements of each AD seed -using JacobianFunctionDescriptor = std::pair>; +using JacobianFunctionDescriptor = + std::pair>; /// A map indicating the IDs of the buffers living inside the memory pool to /// be used as AD seeds for each Jacobian function. @@ -51,8 +52,8 @@ using JacobianSeedsMap = std::map>; /// - the end indices (excluded) /// - the map indicating the buffer IDs to be used when computing the /// partial derivatives -using ThreadEquationsChunk = - std::tuple, std::vector, JacobianSeedsMap>; +using ThreadEquationsChunk = std::tuple, + std::vector, JacobianSeedsMap>; class IDAInstance { public: @@ -94,7 +95,7 @@ class IDAInstance { /// equation. void addJacobianFunction(Equation equationIndex, Variable variableIndex, JacobianFunction jacobianFunction, - uint64_t numOfSeeds, uint64_t* seedSizes); + uint64_t numOfSeeds, uint64_t *seedSizes); /// Instantiate and initialize all the classes needed by IDA in order to /// solve the given system of equations. It also sets optional simulation @@ -162,7 +163,9 @@ class IDAInstance { [[nodiscard]] uint64_t getVariableRank(Variable variable) const; - void iterateAccessedArrayVariables(Equation equation, std::function callback) const; + void + iterateAccessedArrayVariables(Equation equation, + std::function callback) const; std::vector computeJacobianColumns(Equation eq, const int64_t *equationIndices) const; @@ -180,7 +183,7 @@ class IDAInstance { void equationsParallelIteration( std::function &equationIndices, - const JacobianSeedsMap& jacobianSeedsMap)> + const JacobianSeedsMap &jacobianSeedsMap)> processFn); void getVariableBeginIndices(Variable variable, diff --git a/include/marco/Runtime/Support/MemoryManagement.h b/include/marco/Runtime/Support/MemoryManagement.h index e7d31d737..4f9d4d183 100644 --- a/include/marco/Runtime/Support/MemoryManagement.h +++ b/include/marco/Runtime/Support/MemoryManagement.h @@ -7,11 +7,10 @@ #include #include -extern "C" -{ - void* marco_malloc(int64_t size); - void* marco_realloc(void* ptr, int64_t size); - void marco_free(void* ptr); +extern "C" { +void *marco_malloc(int64_t size); +void *marco_realloc(void *ptr, int64_t size); +void marco_free(void *ptr); }; namespace marco::runtime { @@ -19,22 +18,22 @@ class MemoryPool { public: MemoryPool() = default; - MemoryPool(const MemoryPool& other) = delete; + MemoryPool(const MemoryPool &other) = delete; - MemoryPool(MemoryPool&& other) = default; + MemoryPool(MemoryPool &&other) = default; ~MemoryPool(); - MemoryPool& operator=(const MemoryPool& other) = delete; + MemoryPool &operator=(const MemoryPool &other) = delete; - MemoryPool& operator=(MemoryPool&& other) = default; + MemoryPool &operator=(MemoryPool &&other) = default; - double* get(uint64_t id) const; + double *get(uint64_t id) const; uint64_t create(size_t numOfElements); private: - std::vector buffers; + std::vector buffers; }; class MemoryPoolManager { @@ -42,26 +41,26 @@ class MemoryPoolManager { MemoryPoolManager() = default; public: - static MemoryPoolManager& getInstance(); + static MemoryPoolManager &getInstance(); - MemoryPoolManager(const MemoryPoolManager& other) = delete; + MemoryPoolManager(const MemoryPoolManager &other) = delete; - MemoryPoolManager(MemoryPoolManager&& other) = default; + MemoryPoolManager(MemoryPoolManager &&other) = default; ~MemoryPoolManager() = default; - MemoryPoolManager& operator=(const MemoryPoolManager& other) = delete; + MemoryPoolManager &operator=(const MemoryPoolManager &other) = delete; - MemoryPoolManager& operator=(MemoryPoolManager&& other) = default; + MemoryPoolManager &operator=(MemoryPoolManager &&other) = default; - MemoryPool& get(uint64_t pool) const; + MemoryPool &get(uint64_t pool) const; uint64_t create(); private: std::vector> pools{}; }; -} +} // namespace marco::runtime RUNTIME_FUNC_DECL(memoryPoolGet, PTR(void), int64_t, int64_t) diff --git a/lib/Support/MemoryManagement.cpp b/lib/Support/MemoryManagement.cpp index 8c1c69e8c..191dba6c9 100644 --- a/lib/Support/MemoryManagement.cpp +++ b/lib/Support/MemoryManagement.cpp @@ -14,143 +14,126 @@ using namespace ::marco::runtime; #include #include -namespace marco::runtime::profiling -{ - class MemoryProfiler : public Profiler - { - public: - MemoryProfiler() : Profiler("Memory management") - { - registerProfiler(*this); - } - - void reset() override - { - std::lock_guard lockGuard(mutex); - - mallocCalls = 0; - reallocCalls = 0; - freeCalls = 0; - totalHeapMemory = 0; - currentHeapMemory = 0; - peakHeapMemory = 0; - timer.reset(); - } - - void print() const override - { - std::lock_guard lockGuard(mutex); - - std::cerr << "Number of 'malloc' invocations: " << mallocCalls << "\n"; - std::cerr << "Number of 'realloc' invocations: " << reallocCalls << "\n"; - std::cerr << "Number of 'free' invocations: " << freeCalls << "\n"; - - if (mallocCalls > reallocCalls + freeCalls) { - std::cerr << "[Warning] Possible memory leak detected\n"; - } else if (mallocCalls + reallocCalls < freeCalls) { - std::cerr << "[Warning] Possible double 'free' detected\n"; - } - - std::cerr << "Total amount of heap allocated memory: " << totalHeapMemory << " bytes\n"; - std::cerr << "Peak of heap memory usage: " << peakHeapMemory << " bytes\n"; - std::cerr << "Time spent on heap memory management: " << time() << " ms\n"; - } - - void malloc(void* address, int64_t bytes) - { - std::lock_guard lockGuard(mutex); - - ++mallocCalls; - - totalHeapMemory += bytes; - currentHeapMemory += bytes; - sizes[address] = bytes; - - if (currentHeapMemory > peakHeapMemory) { - peakHeapMemory = currentHeapMemory; - } - } - - void realloc(void* previous, void* current, int64_t bytes) - { - std::lock_guard lockGuard(mutex); - - ++reallocCalls; - - totalHeapMemory -= sizes[previous]; - currentHeapMemory -= sizes[previous]; - - totalHeapMemory += bytes; - currentHeapMemory += bytes; - sizes[current] = bytes; - - if (currentHeapMemory > peakHeapMemory) { - peakHeapMemory = currentHeapMemory; - } - } - - void free(void* address) - { - std::lock_guard lockGuard(mutex); - - ++freeCalls; - - if (auto it = sizes.find(address); it != sizes.end()) { - currentHeapMemory -= it->second; - sizes.erase(it); - } - } - - void startTimer() - { - std::lock_guard lockGuard(mutex); - timer.start(); - } - - void stopTimer() - { - std::lock_guard lockGuard(mutex); - timer.stop(); - } - - private: - double time() const - { - return timer.totalElapsedTime(); - } - - private: - size_t mallocCalls; - size_t reallocCalls; - size_t freeCalls; - int64_t totalHeapMemory; - int64_t currentHeapMemory; - int64_t peakHeapMemory; - std::map sizes; - Timer timer; - - mutable std::mutex mutex; - }; -} +namespace marco::runtime::profiling { +class MemoryProfiler : public Profiler { +public: + MemoryProfiler() : Profiler("Memory management") { registerProfiler(*this); } + + void reset() override { + std::lock_guard lockGuard(mutex); + + mallocCalls = 0; + reallocCalls = 0; + freeCalls = 0; + totalHeapMemory = 0; + currentHeapMemory = 0; + peakHeapMemory = 0; + timer.reset(); + } + + void print() const override { + std::lock_guard lockGuard(mutex); + + std::cerr << "Number of 'malloc' invocations: " << mallocCalls << "\n"; + std::cerr << "Number of 'realloc' invocations: " << reallocCalls << "\n"; + std::cerr << "Number of 'free' invocations: " << freeCalls << "\n"; + + if (mallocCalls > reallocCalls + freeCalls) { + std::cerr << "[Warning] Possible memory leak detected\n"; + } else if (mallocCalls + reallocCalls < freeCalls) { + std::cerr << "[Warning] Possible double 'free' detected\n"; + } + + std::cerr << "Total amount of heap allocated memory: " << totalHeapMemory + << " bytes\n"; + std::cerr << "Peak of heap memory usage: " << peakHeapMemory << " bytes\n"; + std::cerr << "Time spent on heap memory management: " << time() << " ms\n"; + } + + void malloc(void *address, int64_t bytes) { + std::lock_guard lockGuard(mutex); -namespace -{ - marco::runtime::profiling::MemoryProfiler& profiler() - { - static marco::runtime::profiling::MemoryProfiler obj; - return obj; + ++mallocCalls; + + totalHeapMemory += bytes; + currentHeapMemory += bytes; + sizes[address] = bytes; + + if (currentHeapMemory > peakHeapMemory) { + peakHeapMemory = currentHeapMemory; + } } + + void realloc(void *previous, void *current, int64_t bytes) { + std::lock_guard lockGuard(mutex); + + ++reallocCalls; + + totalHeapMemory -= sizes[previous]; + currentHeapMemory -= sizes[previous]; + + totalHeapMemory += bytes; + currentHeapMemory += bytes; + sizes[current] = bytes; + + if (currentHeapMemory > peakHeapMemory) { + peakHeapMemory = currentHeapMemory; + } + } + + void free(void *address) { + std::lock_guard lockGuard(mutex); + + ++freeCalls; + + if (auto it = sizes.find(address); it != sizes.end()) { + currentHeapMemory -= it->second; + sizes.erase(it); + } + } + + void startTimer() { + std::lock_guard lockGuard(mutex); + timer.start(); + } + + void stopTimer() { + std::lock_guard lockGuard(mutex); + timer.stop(); + } + +private: + double time() const { return timer.totalElapsedTime(); } + +private: + size_t mallocCalls; + size_t reallocCalls; + size_t freeCalls; + int64_t totalHeapMemory; + int64_t currentHeapMemory; + int64_t peakHeapMemory; + std::map sizes; + Timer timer; + + mutable std::mutex mutex; +}; +} // namespace marco::runtime::profiling + +namespace { +marco::runtime::profiling::MemoryProfiler &profiler() { + static marco::runtime::profiling::MemoryProfiler obj; + return obj; } +} // namespace #endif -void* marco_malloc(int64_t sizeInBytes) -{ +void *marco_malloc(int64_t sizeInBytes) { #ifdef MARCO_PROFILING ::profiler().startTimer(); #endif - void* result = sizeInBytes == 0 ? nullptr : std::malloc(sizeInBytes); + void *result = sizeInBytes == 0 ? nullptr : std::malloc(sizeInBytes); #ifdef MARCO_PROFILING ::profiler().stopTimer(); @@ -160,13 +143,12 @@ void* marco_malloc(int64_t sizeInBytes) return result; } -void* marco_realloc(void* ptr, int64_t sizeInBytes) -{ +void *marco_realloc(void *ptr, int64_t sizeInBytes) { #ifdef MARCO_PROFILING ::profiler().startTimer(); #endif - void* result = sizeInBytes == 0 ? nullptr : std::realloc(ptr, sizeInBytes); + void *result = sizeInBytes == 0 ? nullptr : std::realloc(ptr, sizeInBytes); #ifdef MARCO_PROFILING ::profiler().stopTimer(); @@ -176,8 +158,7 @@ void* marco_realloc(void* ptr, int64_t sizeInBytes) return result; } -void marco_free(void* ptr) -{ +void marco_free(void *ptr) { #ifdef MARCO_PROFILING ::profiler().free(ptr); ::profiler().startTimer(); @@ -192,34 +173,34 @@ void marco_free(void* ptr) #endif } #include -namespace marco::runtime -{ +namespace marco::runtime { MemoryPool::~MemoryPool() { - for (double* buffer : buffers) { + for (double *buffer : buffers) { if (buffer != nullptr) { std::free(buffer); } } } -double* MemoryPool::get(uint64_t id) const { +double *MemoryPool::get(uint64_t id) const { assert(id < buffers.size()); return buffers[id]; } uint64_t MemoryPool::create(size_t numOfElements) { uint64_t id = buffers.size(); - buffers.push_back(static_cast(std::malloc(sizeof(double) * numOfElements))); + buffers.push_back( + static_cast(std::malloc(sizeof(double) * numOfElements))); return id; } -MemoryPoolManager& MemoryPoolManager::getInstance() { +MemoryPoolManager &MemoryPoolManager::getInstance() { static MemoryPoolManager instance; return instance; } -MemoryPool& MemoryPoolManager::get(uint64_t pool) const { +MemoryPool &MemoryPoolManager::get(uint64_t pool) const { assert(pool < pools.size()); return *pools[pool]; } @@ -229,13 +210,13 @@ uint64_t MemoryPoolManager::create() { pools.push_back(std::make_unique()); return id; } -} +} // namespace marco::runtime namespace { -void* memoryPoolGet_pvoid(uint64_t pool, uint64_t buffer) { - MemoryPoolManager& manager = MemoryPoolManager::getInstance(); - return static_cast(manager.get(pool).get(buffer)); -} +void *memoryPoolGet_pvoid(uint64_t pool, uint64_t buffer) { + MemoryPoolManager &manager = MemoryPoolManager::getInstance(); + return static_cast(manager.get(pool).get(buffer)); } +} // namespace RUNTIME_FUNC_DEF(memoryPoolGet, PTR(void), int64_t, int64_t)