diff --git a/ode/impl/KokkosODE_BDF_impl.hpp b/ode/impl/KokkosODE_BDF_impl.hpp index 3119ff0e3a..3e817563da 100644 --- a/ode/impl/KokkosODE_BDF_impl.hpp +++ b/ode/impl/KokkosODE_BDF_impl.hpp @@ -142,7 +142,7 @@ struct BDF_system_wrapper2 { if (compute_jac) { mySys.evaluate_jacobian(t, dt, y, jac); - // J = I - dt*(dy/dy) + // J = I - dt*(df/dy) for (int rowIdx = 0; rowIdx < neqs; ++rowIdx) { for (int colIdx = 0; colIdx < neqs; ++colIdx) { jac(rowIdx, colIdx) = -dt * jac(rowIdx, colIdx); diff --git a/ode/impl/KokkosODE_RungeKuttaTables_impl.hpp b/ode/impl/KokkosODE_RungeKuttaTables_impl.hpp index 6a0770d1a7..f5b844a358 100644 --- a/ode/impl/KokkosODE_RungeKuttaTables_impl.hpp +++ b/ode/impl/KokkosODE_RungeKuttaTables_impl.hpp @@ -257,6 +257,59 @@ struct ButcherTableau<4, 6> // Referred to as DOPRI5 or RKDP 11.0 / 84.0 - 187.0 / 2100.0, -1.0 / 40.0}}; }; +// Coefficients obtained from: +// J. H. Verner +// "Explicit Runge-Kutta methods with estimates of the local truncation error", +// Journal of Numerical Analysis, Volume 15, Issue 4, 1978, +// https://doi.org/10.1137/0715051. +template <> +struct ButcherTableau<5, 7> // Referred to as Verner 5-6 or VER56 +{ + static constexpr int order = 6; + static constexpr int nstages = 8; + Kokkos::Array a{{0.0, + 1.0 / 6.0, + 0.0, + 4.0 / 75.0, + 16.0 / 75.0, + 0.0, + 5.0 / 6.0, + -8.0 / 3.0, + 5.0 / 2.0, + 0.0, + -165.0 / 64.0, + 55.0 / 6.0, + -425.0 / 64.0, + 85.0 / 96.0, + 0.0, + 12.0 / 5.0, + -8.0, + 4015.0 / 612.0, + -11.0 / 36.0, + 88.0 / 255.0, + 0.0, + -8263.0 / 15000.0, + 124.0 / 75.0, + -643.0 / 680.0, + -81.0 / 250.0, + 2484.0 / 10625.0, + 0.0, + 0.0, + 3501.0 / 1720.0, + -300.0 / 43.0, + 297275.0 / 52632.0, + -319.0 / 2322.0, + 24068.0 / 84065.0, + 3850.0 / 26703.0, + 0.0}}; + Kokkos::Array b{ + {3.0 / 4.0, 0.0, 875.0 / 2244.0, 23.0 / 72.0, 264.0 / 1955.0, 0.0, 125.0 / 11592.0, 43.0 / 616.0}}; + Kokkos::Array c{{0.0, 1.0 / 6.0, 4.0 / 15.0, 2.0 / 3.0, 5.0 / 6.0, 1.0, 1.0 / 15.0, 1.0}}; + Kokkos::Array e{{3.0 / 4.0 - 13.0 / 160.0, 0.0, 875.0 / 2244.0 - 2375.0 / 5984.0, + 23.0 / 72.0 - 5.0 / 16.0, 264.0 / 1955.0 - 12.0 / 85.0, -3.0 / 44.0, + 125.0 / 11592.0, 43.0 / 616.0}}; +}; + } // namespace Impl } // namespace KokkosODE diff --git a/ode/impl/KokkosODE_RungeKutta_impl.hpp b/ode/impl/KokkosODE_RungeKutta_impl.hpp index 83ab76758f..533914477d 100644 --- a/ode/impl/KokkosODE_RungeKutta_impl.hpp +++ b/ode/impl/KokkosODE_RungeKutta_impl.hpp @@ -23,19 +23,84 @@ #include "KokkosODE_RungeKuttaTables_impl.hpp" #include "KokkosODE_Types.hpp" +#include "iostream" + namespace KokkosODE { namespace Impl { +// This algorithm is mostly derived from +// E. Hairer, S. P. Norsett G. Wanner, +// "Solving Ordinary Differential Equations I: +// Nonstiff Problems", Sec. II.4. +// Note that all floating point values below +// have been heuristically selected for +// convergence performance. +template +KOKKOS_FUNCTION void first_step_size(const ode_type ode, const int order, const scalar_type t0, const scalar_type atol, + const scalar_type rtol, const vec_type& y0, const res_type& f0, const vec_type y1, + const mat_type temp, scalar_type& dt_ini) { + using KAT = Kokkos::ArithTraits; + + // Extract subviews to store intermediate data + auto f1 = Kokkos::subview(temp, 1, Kokkos::ALL()); + + // Compute norms for y0 and f0 + double n0 = KAT::zero(), n1 = KAT::zero(), dt0, scale; + for (int eqIdx = 0; eqIdx < ode.neqs; ++eqIdx) { + scale = atol + rtol * Kokkos::abs(y0(eqIdx)); + n0 += Kokkos::pow(y0(eqIdx) / scale, 2); + n1 += Kokkos::pow(f0(eqIdx) / scale, 2); + } + n0 = Kokkos::sqrt(n0) / Kokkos::sqrt(ode.neqs); + n1 = Kokkos::sqrt(n1) / Kokkos::sqrt(ode.neqs); + + // Select dt0 + if ((n0 < 1e-5) || (n1 < 1e-5)) { + dt0 = 1e-6; + } else { + dt0 = 0.01 * n0 / n1; + } + + // Estimate y at t0 + dt0 + for (int eqIdx = 0; eqIdx < ode.neqs; ++eqIdx) { + y1(eqIdx) = y0(eqIdx) + dt0 * f0(eqIdx); + } + + // Compute f at t0+dt0 and y1, + // then compute the norm of f(t0+dt0, y1) - f(t0, y0) + scalar_type n2 = KAT::zero(); + ode.evaluate_function(t0 + dt0, dt0, y1, f1); + for (int eqIdx = 0; eqIdx < ode.neqs; ++eqIdx) { + n2 += Kokkos::pow((f1(eqIdx) - f0(eqIdx)) / (atol + rtol * Kokkos::abs(y0(eqIdx))), 2); + } + n2 = Kokkos::sqrt(n2) / (dt0 * Kokkos::sqrt(ode.neqs)); + + // Finally select initial time step dt_ini + if ((n1 <= 1e-15) && (n2 <= 1e-15)) { + dt_ini = Kokkos::max(1e-6, dt0 * 1e-3); + } else { + dt_ini = Kokkos::pow(0.01 / Kokkos::max(n1, n2), KAT::one() / order); + } + + dt_ini = Kokkos::min(100 * dt0, dt_ini); + + // Zero out temp variables just to be safe... + for (int eqIdx = 0; eqIdx < ode.neqs; ++eqIdx) { + f0(eqIdx) = 0.0; + y1(eqIdx) = 0.0; + f1(eqIdx) = 0.0; + } +} // first_step_size + // y_new = y_old + dt*sum(b_i*k_i) i in [1, nstages] // k_i = f(t+c_i*dt, y_old+sum(a_{ij}*k_i)) j in [1, i-1] // we need to compute the k_i and store them as we go // to use them for k_{i+1} computation. template -KOKKOS_FUNCTION void RKStep(ode_type& ode, const table_type& table, const bool adaptivity, scalar_type t, - scalar_type dt, const vec_type& y_old, const vec_type& y_new, const vec_type& temp, - const mv_type& k_vecs) { - const int neqs = ode.neqs; - const int nstages = table.nstages; +KOKKOS_FUNCTION void RKStep(ode_type& ode, const table_type& table, scalar_type t, scalar_type dt, + const vec_type& y_old, const vec_type& y_new, const vec_type& temp, const mv_type& k_vecs) { + const int neqs = ode.neqs; + constexpr int nstages = table_type::nstages; // first set y_new = y_old for (int eqIdx = 0; eqIdx < ode.neqs; ++eqIdx) { @@ -72,34 +137,42 @@ KOKKOS_FUNCTION void RKStep(ode_type& ode, const table_type& table, const bool a y_new(eqIdx) += dt * table.b[stageIdx] * k(eqIdx); } } - - // Compute estimation of the error using k_vecs and table.e - if (adaptivity == true) { - for (int eqIdx = 0; eqIdx < neqs; ++eqIdx) { - temp(eqIdx) = 0; - for (int stageIdx = 0; stageIdx < nstages; ++stageIdx) { - temp(eqIdx) += dt * table.e[stageIdx] * k_vecs(stageIdx, eqIdx); - } - } - } } // RKStep +// Note that the control values for +// time step increase/decrease are +// heuristically chosen based on +// L. F. Shampine and M. W. Reichelt +// "The Matlab ODE suite" SIAM J. Sci. +// Comput. Vol. 18, No. 1, pp. 1-22 +// Jan. 1997 template KOKKOS_FUNCTION Experimental::ode_solver_status RKSolve(const ode_type& ode, const table_type& table, const KokkosODE::Experimental::ODE_params& params, const scalar_type t_start, const scalar_type t_end, const vec_type& y0, const vec_type& y, const vec_type& temp, - const mv_type& k_vecs) { + const mv_type& k_vecs, int* const step_count) { constexpr scalar_type error_threshold = 1; - bool adapt = params.adaptivity; + scalar_type error_n; + bool adapt = params.adaptivity; bool dt_was_reduced; - if (std::is_same_v>) { + if constexpr (std::is_same_v>) { adapt = false; } // Set current time and initial time step - scalar_type t_now = t_start; - scalar_type dt = (t_end - t_start) / params.max_steps; + scalar_type t_now = t_start, dt = 0.0; + if (adapt == true) { + ode.evaluate_function(t_start, 0, y0, temp); + first_step_size(ode, table_type::order, t_start, params.abs_tol, params.rel_tol, y0, temp, y, k_vecs, dt); + if (dt < params.min_step_size) { + dt = params.min_step_size; + } + } else { + dt = (t_end - t_start) / params.num_steps; + } + + *step_count = 0; // Loop over time steps to integrate ODE for (int stepIdx = 0; (stepIdx < params.max_steps) && (t_now <= t_end); ++stepIdx) { @@ -119,28 +192,33 @@ KOKKOS_FUNCTION Experimental::ode_solver_status RKSolve(const ode_type& ode, con // Take tentative steps until the requested error // is met. This of course only works for adaptive // solvers, for fix time steps we simply do not - // compute and check what error of the current step + // compute and check the error of the current step while (error_threshold < error) { // Take a step of Runge-Kutta integrator - RKStep(ode, table, adapt, t_now, dt, y0, y, temp, k_vecs); + RKStep(ode, table, t_now, dt, y0, y, temp, k_vecs); // Compute the largest error and decide on // the size of the next time step to take. error = 0; - if (adapt) { + + // Compute estimation of the error using k_vecs and table.e + if (adapt == true) { // Compute the error for (int eqIdx = 0; eqIdx < ode.neqs; ++eqIdx) { - error = Kokkos::max(error, Kokkos::abs(temp(eqIdx))); - tol = Kokkos::max( - tol, params.abs_tol + params.rel_tol * Kokkos::max(Kokkos::abs(y(eqIdx)), Kokkos::abs(y0(eqIdx)))); + tol = params.abs_tol + params.rel_tol * Kokkos::max(Kokkos::abs(y(eqIdx)), Kokkos::abs(y0(eqIdx))); + error_n = 0; + for (int stageIdx = 0; stageIdx < table.nstages; ++stageIdx) { + error_n += dt * table.e[stageIdx] * k_vecs(stageIdx, eqIdx); + } + error += (error_n * error_n) / (tol * tol); } - error = error / tol; + error = Kokkos::sqrt(error / ode.neqs); // Reduce the time step if error // is too large and current step // is rejected. if (error > 1) { - dt = dt * Kokkos::max(0.2, 0.8 / Kokkos::pow(error, 1 / table.order)); + dt = dt * Kokkos::max(0.2, 0.8 * Kokkos::pow(error, -1.0 / table.order)); dt_was_reduced = true; } @@ -150,6 +228,7 @@ KOKKOS_FUNCTION Experimental::ode_solver_status RKSolve(const ode_type& ode, con // Update time and initial condition for next time step t_now += dt; + *step_count += 1; for (int eqIdx = 0; eqIdx < ode.neqs; ++eqIdx) { y0(eqIdx) = y(eqIdx); } @@ -157,7 +236,7 @@ KOKKOS_FUNCTION Experimental::ode_solver_status RKSolve(const ode_type& ode, con if (t_now < t_end) { if (adapt && !dt_was_reduced && error < 0.5) { // Compute new time increment - dt = dt * Kokkos::min(10.0, Kokkos::max(2.0, 0.9 * Kokkos::pow(error, 1 / table.order))); + dt = dt * Kokkos::min(10.0, Kokkos::max(2.0, 0.9 * Kokkos::pow(error, -1.0 / table.order))); } } else { return Experimental::ode_solver_status::SUCCESS; diff --git a/ode/src/KokkosODE_BDF.hpp b/ode/src/KokkosODE_BDF.hpp index 419316ba45..2afb6e46e2 100644 --- a/ode/src/KokkosODE_BDF.hpp +++ b/ode/src/KokkosODE_BDF.hpp @@ -93,6 +93,7 @@ struct BDF { const double dt = (t_end - t_start) / num_steps; double t = t_start; + int count = 0; // Load y0 into y_vecs(:, 0) for (int eqIdx = 0; eqIdx < ode.neqs; ++eqIdx) { @@ -107,7 +108,7 @@ struct BDF { } KokkosODE::Experimental::ODE_params params(table.order - 1); for (int stepIdx = 0; stepIdx < init_steps; ++stepIdx) { - KokkosODE::Experimental::RungeKutta::Solve(ode, params, t, t + dt, y0, y, update, kstack); + KokkosODE::Experimental::RungeKutta::Solve(ode, params, t, t + dt, y0, y, update, kstack, &count); for (int eqIdx = 0; eqIdx < ode.neqs; ++eqIdx) { y_vecs(eqIdx, stepIdx + 1) = y(eqIdx); diff --git a/ode/src/KokkosODE_RungeKutta.hpp b/ode/src/KokkosODE_RungeKutta.hpp index 2d298a6568..39c1800b6c 100644 --- a/ode/src/KokkosODE_RungeKutta.hpp +++ b/ode/src/KokkosODE_RungeKutta.hpp @@ -38,7 +38,8 @@ enum RK_type : int { RK4 = 4, ///< Runge-Kutta classic order 4 method RKF45 = 5, ///< Fehlberg order 5 method RKCK = 6, ///< Cash-Karp method - RKDP = 7 ///< Dormand-Prince method + RKDP = 7, ///< Dormand-Prince method + VER56 = 8 ///< Verner order 6 method }; template @@ -86,6 +87,11 @@ struct RK_Tableau_helper { using table_type = KokkosODE::Impl::ButcherTableau<4, 6>; }; +template <> +struct RK_Tableau_helper { + using table_type = KokkosODE::Impl::ButcherTableau<5, 7>; +}; + /// \brief Unspecialized version of the RungeKutta solvers /// /// \tparam RK_type an RK_type enum value used to specify @@ -128,9 +134,10 @@ struct RungeKutta { template KOKKOS_FUNCTION static ode_solver_status Solve(const ode_type& ode, const KokkosODE::Experimental::ODE_params& params, const scalar_type t_start, const scalar_type t_end, const vec_type& y0, - const vec_type& y, const vec_type& temp, const mv_type& k_vecs) { + const vec_type& y, const vec_type& temp, const mv_type& k_vecs, + int* const count) { table_type table; - return KokkosODE::Impl::RKSolve(ode, table, params, t_start, t_end, y0, y, temp, k_vecs); + return KokkosODE::Impl::RKSolve(ode, table, params, t_start, t_end, y0, y, temp, k_vecs, count); } }; diff --git a/ode/src/KokkosODE_Types.hpp b/ode/src/KokkosODE_Types.hpp index 2145afb718..e28bcb82f0 100644 --- a/ode/src/KokkosODE_Types.hpp +++ b/ode/src/KokkosODE_Types.hpp @@ -27,12 +27,21 @@ struct ODE_params { int num_steps, max_steps; double abs_tol, rel_tol, min_step_size; + KOKKOS_FUNCTION + ODE_params() + : adaptivity(true), num_steps(100), max_steps(10000), abs_tol(1e-12), rel_tol(1e-6), min_step_size(1e-9) {} + // Constructor that only specify the desired number of steps. // In this case no adaptivity is provided, the time step will // be constant such that dt = (tend - tstart) / num_steps; KOKKOS_FUNCTION ODE_params(const int num_steps_) - : adaptivity(false), num_steps(num_steps_), max_steps(num_steps_), abs_tol(0), rel_tol(0), min_step_size(0) {} + : adaptivity(false), + num_steps(num_steps_), + max_steps(num_steps_ + 1), + abs_tol(1e-12), + rel_tol(1e-6), + min_step_size(0) {} /// ODE_parms construtor for adaptive time stepping. KOKKOS_FUNCTION diff --git a/ode/unit_test/Test_ODE.hpp b/ode/unit_test/Test_ODE.hpp index 1b55171581..f30ff39bd2 100644 --- a/ode/unit_test/Test_ODE.hpp +++ b/ode/unit_test/Test_ODE.hpp @@ -19,6 +19,7 @@ // Explicit integrators #include "Test_ODE_RK.hpp" #include "Test_ODE_RK_chem.hpp" +#include "Test_ODE_RK_counts.hpp" // Implicit integrators #include "Test_ODE_Newton.hpp" diff --git a/ode/unit_test/Test_ODE_RK.hpp b/ode/unit_test/Test_ODE_RK.hpp index 90bec0e184..6d6f7877fe 100644 --- a/ode/unit_test/Test_ODE_RK.hpp +++ b/ode/unit_test/Test_ODE_RK.hpp @@ -21,12 +21,14 @@ namespace Test { -// damped harmonic undriven oscillator +// damped undriven harmonic oscillator // m y'' + c y' + k y = 0 -// solution: y=A * exp(-xi * omega_0 * t) * sin(sqrt(1-xi^2) * omega_0 * t + -// phi) omega_0 = sqrt(k/m); xi = c / sqrt(4*m*k) A and phi depend on y(0) and -// y'(0); Change of variables: x(t) = y(t)*exp(-c/(2m)*t) = y(t)*exp(-xi * -// omega_0 * t) Change of variables: X = [x ] +// solution: y=A * exp(-xi * omega_0 * t) * sin(sqrt(1-xi^2) * omega_0 * t + phi) +// omega_0 = sqrt(k/m) +// xi = c / sqrt(4*m*k) +// A and phi depend on y(0) and y'(0); +// Change of variables: x(t) = y(t)*exp(-c/(2m)*t) = y(t)*exp(-xi * omega_0 * t) +// Change of variables: X = [x ] // [x'] // Leads to X' = A*X with A = [ 0 1] // [-d 0] @@ -74,7 +76,8 @@ struct solution_wrapper { void operator()(const int /*idx*/) const { ode.solution(t, y_old, y_ref); } }; -template +template struct RKSolve_wrapper { using ode_params = KokkosODE::Experimental::ODE_params; @@ -84,10 +87,11 @@ struct RKSolve_wrapper { int max_steps; vec_type y_old, y_new, tmp; mv_type kstack; + count_type count; RKSolve_wrapper(const ode_type& my_ode_, const ode_params& params_, const scalar_type tstart_, const scalar_type tend_, const vec_type& y_old_, const vec_type& y_new_, const vec_type& tmp_, - const mv_type& kstack_) + const mv_type& kstack_, const count_type& count_) : my_ode(my_ode_), params(params_), tstart(tstart_), @@ -95,11 +99,13 @@ struct RKSolve_wrapper { y_old(y_old_), y_new(y_new_), tmp(tmp_), - kstack(kstack_) {} + kstack(kstack_), + count(count_) {} KOKKOS_FUNCTION void operator()(const int /*idx*/) const { - KokkosODE::Experimental::RungeKutta::Solve(my_ode, params, tstart, tend, y_old, y_new, tmp, kstack); + KokkosODE::Experimental::RungeKutta::Solve(my_ode, params, tstart, tend, y_old, y_new, tmp, kstack, + count.data()); } }; @@ -110,14 +116,16 @@ void test_method(const std::string label, ode_type& my_ode, const scalar_type& t const Kokkos::View& sol, typename vec_type::HostMirror y_ref_h) { using execution_space = typename vec_type::execution_space; using solver_type = KokkosODE::Experimental::RungeKutta; + using count_type = Kokkos::View; KokkosODE::Experimental::ODE_params params(num_steps); vec_type tmp("tmp vector", my_ode.neqs); mv_type kstack("k stack", solver_type::num_stages(), my_ode.neqs); + count_type count("time step count", 1); Kokkos::RangePolicy my_policy(0, 1); - RKSolve_wrapper solve_wrapper(my_ode, params, tstart, tend, y_old, - y_new, tmp, kstack); + RKSolve_wrapper solve_wrapper( + my_ode, params, tstart, tend, y_old, y_new, tmp, kstack, count); Kokkos::parallel_for(my_policy, solve_wrapper); auto y_new_h = Kokkos::create_mirror_view(y_new); @@ -284,7 +292,9 @@ void test_rate(ode_type& my_ode, const scalar_type& tstart, const scalar_type& t typename vec_type::HostMirror& y_ref_h, typename vec_type::HostMirror& error) { using execution_space = typename vec_type::execution_space; using solver_type = KokkosODE::Experimental::RungeKutta; + using count_type = Kokkos::View; + count_type count("time step count", 1); vec_type tmp("tmp vector", my_ode.neqs); mv_type kstack("k stack", solver_type::num_stages(), my_ode.neqs); @@ -295,10 +305,11 @@ void test_rate(ode_type& my_ode, const scalar_type& tstart, const scalar_type& t Kokkos::RangePolicy my_policy(0, 1); for (int idx = 0; idx < num_steps.extent_int(0); ++idx) { KokkosODE::Experimental::ODE_params params(num_steps(idx)); + params.adaptivity = false; Kokkos::deep_copy(y_old, y_old_h); Kokkos::deep_copy(y_new, y_old_h); - RKSolve_wrapper solve_wrapper(my_ode, params, tstart, tend, - y_old, y_new, tmp, kstack); + RKSolve_wrapper solve_wrapper( + my_ode, params, tstart, tend, y_old, y_new, tmp, kstack, count); Kokkos::parallel_for(my_policy, solve_wrapper); Kokkos::deep_copy(y_new_h, y_new); @@ -306,8 +317,8 @@ void test_rate(ode_type& my_ode, const scalar_type& tstart, const scalar_type& t #if defined(HAVE_KOKKOSKERNELS_DEBUG) scalar_type dt = (tend - tstart) / num_steps(idx); - std::cout << "dt=" << dt << ", error=" << error(idx) << ", solution: {" << y_new_h(0) << ", " << y_new_h(1) << "}" - << std::endl; + std::cout << "count=" << count(0) << ", dt=" << dt << ", error=" << error(idx) << ", solution: {" << y_new_h(0) + << ", " << y_new_h(1) << "}" << std::endl; #endif } @@ -424,9 +435,11 @@ void test_adaptivity() { using RK_type = KokkosODE::Experimental::RK_type; using vec_type = Kokkos::View; using mv_type = Kokkos::View; + using count_type = Kokkos::View; duho my_oscillator(1, 1, 4); const int neqs = my_oscillator.neqs; + count_type count("time step count", 1); vec_type y("solution", neqs), f("function", neqs); auto y_h = Kokkos::create_mirror(y); @@ -471,8 +484,8 @@ void test_adaptivity() { KokkosODE::Experimental::ODE_params params(numSteps, maxSteps, absTol, relTol, minStepSize); Kokkos::deep_copy(y_old, y_old_h); Kokkos::deep_copy(y_new, y_old_h); - RKSolve_wrapper solve_wrapper(my_oscillator, params, tstart, tend, - y_old, y_new, tmp, kstack); + RKSolve_wrapper solve_wrapper( + my_oscillator, params, tstart, tend, y_old, y_new, tmp, kstack, count); Kokkos::parallel_for(my_policy, solve_wrapper); auto y_new_h = Kokkos::create_mirror(y_new); diff --git a/ode/unit_test/Test_ODE_RK_chem.hpp b/ode/unit_test/Test_ODE_RK_chem.hpp index 690e271c84..aabdcbb490 100644 --- a/ode/unit_test/Test_ODE_RK_chem.hpp +++ b/ode/unit_test/Test_ODE_RK_chem.hpp @@ -92,6 +92,7 @@ void test_chem() { using mv_type = Kokkos::View; using RK_type = KokkosODE::Experimental::RK_type; using solver_type = KokkosODE::Experimental::RungeKutta; + using count_type = Kokkos::View; { chem_model_1 chem_model; @@ -101,6 +102,7 @@ void test_chem() { KokkosODE::Experimental::ODE_params params(num_steps); vec_type tmp("tmp vector", neqs); mv_type kstack("k stack", solver_type::num_stages(), neqs); + count_type count("time steps count", 1); // Set initial conditions vec_type y_new("solution", neqs); @@ -112,8 +114,8 @@ void test_chem() { Kokkos::deep_copy(y_new, y_old_h); Kokkos::RangePolicy my_policy(0, 1); - RKSolve_wrapper solve_wrapper( - chem_model, params, chem_model.tstart, chem_model.tend, y_old, y_new, tmp, kstack); + RKSolve_wrapper solve_wrapper( + chem_model, params, chem_model.tstart, chem_model.tend, y_old, y_new, tmp, kstack, count); Kokkos::parallel_for(my_policy, solve_wrapper); auto y_new_h = Kokkos::create_mirror(y_new); @@ -137,6 +139,7 @@ void test_chem() { KokkosODE::Experimental::ODE_params params(num_steps); vec_type tmp("tmp vector", neqs); mv_type kstack("k stack", solver_type::num_stages(), neqs); + count_type count("time steps count", 1); // Set initial conditions vec_type y_new("solution", neqs); @@ -153,8 +156,8 @@ void test_chem() { Kokkos::deep_copy(y_new, y_old_h); Kokkos::RangePolicy my_policy(0, 1); - RKSolve_wrapper solve_wrapper( - chem_model, params, chem_model.tstart, chem_model.tend, y_old, y_new, tmp, kstack); + RKSolve_wrapper solve_wrapper( + chem_model, params, chem_model.tstart, chem_model.tend, y_old, y_new, tmp, kstack, count); Kokkos::parallel_for(my_policy, solve_wrapper); auto y_new_h = Kokkos::create_mirror(y_new); diff --git a/ode/unit_test/Test_ODE_RK_counts.hpp b/ode/unit_test/Test_ODE_RK_counts.hpp new file mode 100644 index 0000000000..a59a9a1754 --- /dev/null +++ b/ode/unit_test/Test_ODE_RK_counts.hpp @@ -0,0 +1,157 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#include +#include "KokkosKernels_TestUtils.hpp" + +#include "KokkosODE_RungeKutta.hpp" +#include "Test_ODE_TestProblems.hpp" + +namespace Test { + +std::string RK_type_to_name(const KokkosODE::Experimental::RK_type RK) { + std::string name; + + switch (RK) { + case KokkosODE::Experimental::RK_type::RKFE: name = "Forward-Euler"; break; + case KokkosODE::Experimental::RK_type::RKEH: name = "Euler-Heun"; break; + case KokkosODE::Experimental::RK_type::RKF12: name = "Fehlberg 1-2"; break; + case KokkosODE::Experimental::RK_type::RKBS: name = "Bogacki-Shampine"; break; + case KokkosODE::Experimental::RK_type::RK4: name = "Classic RK order 4"; break; + case KokkosODE::Experimental::RK_type::RKF45: name = "Fehlberg 4-5"; break; + case KokkosODE::Experimental::RK_type::RKCK: name = "Cash-Karp"; break; + case KokkosODE::Experimental::RK_type::RKDP: name = "Dormand-Prince"; break; + default: name = "Unknown Runge-Kutta method"; + } + + return name; +} + +template +void RK_Count(const Device, const OdeType myODE, const double relTol, const double absTol, + const int /*expected_count*/) { + using execution_space = typename Device::execution_space; + using vec_type = Kokkos::View; + using mv_type = Kokkos::View; + using count_type = Kokkos::View; + + constexpr int neqs = myODE.neqs; + + constexpr double tstart = myODE.tstart(), tend = myODE.tend(); + constexpr int num_steps = myODE.numsteps(); + constexpr int maxSteps = 1e6; + constexpr double minStepSize = (tend - tstart) / (100 * maxSteps); + KokkosODE::Experimental::ODE_params params( + num_steps, maxSteps, 1.0e-12, (RK == KokkosODE::Experimental::RK_type::RKF12) ? 1.0e-8 : 1.0e-6, minStepSize); + + vec_type y("solution", neqs), f("function", neqs); + vec_type y_new("y new", neqs), y_old("y old", neqs); + count_type count("time step count", 1); + + auto y_h = Kokkos::create_mirror_view(y); + typename vec_type::HostMirror y_old_h = Kokkos::create_mirror(y_old); + auto y_ref_h = Kokkos::create_mirror(y); + for (int dofIdx = 0; dofIdx < neqs; ++dofIdx) { + y_h(dofIdx) = myODE.expected_val(tstart, dofIdx); + y_old_h(dofIdx) = y_h(dofIdx); + y_ref_h(dofIdx) = myODE.expected_val(tend, dofIdx); + } + Kokkos::deep_copy(y, y_h); + + vec_type tmp("tmp vector", neqs); + mv_type kstack("k stack", KokkosODE::Experimental::RungeKutta::num_stages(), neqs); + + Kokkos::RangePolicy my_policy(0, 1); + Kokkos::deep_copy(y_old, y_old_h); + Kokkos::deep_copy(y_new, y_old_h); + RKSolve_wrapper solve_wrapper(myODE, params, tstart, tend, y_old, + y_new, tmp, kstack, count); + Kokkos::parallel_for(my_policy, solve_wrapper); + + auto y_new_h = Kokkos::create_mirror(y_new); + Kokkos::deep_copy(y_new_h, y_new); + + typename count_type::HostMirror count_h = Kokkos::create_mirror_view(count); + Kokkos::deep_copy(count_h, count); + + double error = 0.0; + for (int eqIdx = 0; eqIdx < neqs; ++eqIdx) { + error += Kokkos::pow(y_ref_h(eqIdx) - y_new_h(eqIdx), 2.0) / + Kokkos::pow(absTol + relTol * Kokkos::abs(y_new_h(eqIdx)), 2.0); + } + error = Kokkos::sqrt(error / neqs); + + std::string msg = std::string(OdeType::name) + ", " + RK_type_to_name(RK); + EXPECT_LE(error, 1.0) << msg.c_str(); + // EXPECT_LE(count_h(0), expected_count); +} // RK_Count + +} // namespace Test + +template +void test_RK_count() { + // RK_Count (Device, OdeType, relTol, absTol, /*expected_count*/) + Test::RK_Count(TestDevice(), TestProblem::DegreeOnePoly(), 1.0e-6, 1e-12, 2); + Test::RK_Count(TestDevice(), TestProblem::DegreeTwoPoly(), 1.0e-6, 1e-12, 2); + Test::RK_Count(TestDevice(), TestProblem::DegreeThreePoly(), 1.0e-6, 1e-12, 2); + Test::RK_Count(TestDevice(), TestProblem::DegreeFivePoly(), 1.0e-6, 1e-12, 5); + Test::RK_Count(TestDevice(), TestProblem::Exponential(0.7), 2.0e-6, 1e-12, 4); + Test::RK_Count(TestDevice(), TestProblem::SpringMassDamper(1001., 1000.), 1.0e-4, 0.0, 272); + Test::RK_Count(TestDevice(), TestProblem::CosExp(-10., 2., 1.), 5.3e-5, 0.0, 25); + if constexpr (RK == KokkosODE::Experimental::RK_type::RKF12) { + Test::RK_Count(TestDevice(), TestProblem::StiffChemicalDecayProcess(1e4, 1.), 4e-9, 1e-9, 2786); + } else { + Test::RK_Count(TestDevice(), TestProblem::StiffChemicalDecayProcess(1e4, 1.), 4e-9, 1.8e-10, 2786); + } + Test::RK_Count(TestDevice(), TestProblem::Tracer(10.0), 0.0, 1e-3, 10); + Test::RK_Count(TestDevice(), TestProblem::EnrightB5(), 1.3e-2, 0.0, 90); + if constexpr (RK == KokkosODE::Experimental::RK_type::RKF12) { + Test::RK_Count(TestDevice(), TestProblem::EnrightC1(), 1.e-4, 1e-14, 90); + } else { + Test::RK_Count(TestDevice(), TestProblem::EnrightC1(), 1.e-5, 1e-14, 90); + } + if constexpr (RK == KokkosODE::Experimental::RK_type::RKF12) { + Test::RK_Count(TestDevice(), TestProblem::EnrightC5(), 1.e-4, 1e-14, 97); + } else { + Test::RK_Count(TestDevice(), TestProblem::EnrightC5(), 1.e-5, 1e-14, 97); + } + if constexpr (RK == KokkosODE::Experimental::RK_type::RKF12) { + Test::RK_Count(TestDevice(), TestProblem::EnrightD2(), 2.e-4, 0.0, 590); + } else { + Test::RK_Count(TestDevice(), TestProblem::EnrightD2(), 1.e-5, 0.0, 590); + } + Test::RK_Count(TestDevice(), TestProblem::EnrightD4(), 1.e-5, 1.e-9, 932); + if constexpr (RK != KokkosODE::Experimental::RK_type::RKF12) { + Test::RK_Count(TestDevice(), TestProblem::KKStiffChemistry(), 1e-5, 0.0, 1); + } +} + +void test_count() { + using RK_type = KokkosODE::Experimental::RK_type; + + // test_RK_count(); + test_RK_count(); + test_RK_count(); + test_RK_count(); + test_RK_count(); + test_RK_count(); + test_RK_count(); + // test_RK_count(); +} + +#if defined(KOKKOSKERNELS_INST_DOUBLE) +TEST_F(TestCategory, RK_Count) { test_count(); } +#endif diff --git a/ode/unit_test/Test_ODE_TestProblems.hpp b/ode/unit_test/Test_ODE_TestProblems.hpp new file mode 100644 index 0000000000..2b66ec682d --- /dev/null +++ b/ode/unit_test/Test_ODE_TestProblems.hpp @@ -0,0 +1,649 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#ifndef TEST_ODE_TESTPROBLEMS_HPP +#define TEST_ODE_TESTPROBLEMS_HPP + +namespace TestProblem { + +struct DegreeOnePoly { + template + KOKKOS_FUNCTION void evaluate_function(double /*t*/, double /*dt*/, View1& /*y*/, View2& dydt) const { + for (int dofIdx = 0; dofIdx < neqs; ++dofIdx) { + dydt(dofIdx) = 1; + } + } + + template + KOKKOS_FUNCTION void evaluate_jacobian(double /*t*/, double /*dt*/, View1& /*y*/, View2& jac) const { + for (int rowIdx = 0; rowIdx < neqs; ++rowIdx) { + for (int colIdx = 0; colIdx < neqs; ++colIdx) { + jac(rowIdx, colIdx) = 0; + } + } + } + + KOKKOS_FUNCTION constexpr double tstart() const { return 0.0; } + KOKKOS_FUNCTION constexpr double tend() const { return 1.0; } + KOKKOS_FUNCTION constexpr int numsteps() const { return 10; } + KOKKOS_FUNCTION double expected_val(const double t, const int /*n*/) const { return t + 1.0; } + KOKKOS_FUNCTION static constexpr int num_equations() { return neqs; } + static constexpr int neqs = 1; + static constexpr char name[] = "DegreeOnePoly"; +}; + +struct DegreeTwoPoly { + template + KOKKOS_FUNCTION void evaluate_function(double t, double /*dt*/, View1& /*y*/, View2& dydt) const { + for (int dofIdx = 0; dofIdx < neqs; ++dofIdx) { + dydt(dofIdx) = t + 1; + } + } + + template + KOKKOS_FUNCTION void evaluate_jacobian(double /*t*/, double /*dt*/, View1& /*y*/, View2& jac) const { + for (int rowIdx = 0; rowIdx < neqs; ++rowIdx) { + for (int colIdx = 0; colIdx < neqs; ++colIdx) { + jac(rowIdx, colIdx) = 0; + } + } + } + + KOKKOS_FUNCTION constexpr double tstart() const { return 0.0; } + KOKKOS_FUNCTION constexpr double tend() const { return 1.0; } + KOKKOS_FUNCTION constexpr int numsteps() const { return 10; } + KOKKOS_FUNCTION double expected_val(const double t, const int /*n*/) const { return 0.5 * t * t + t + 1.0; } + KOKKOS_FUNCTION static constexpr int num_equations() { return neqs; } + static constexpr int neqs = 1; + static constexpr char name[] = "DegreeTwoPoly"; +}; + +struct DegreeThreePoly { + template + KOKKOS_FUNCTION void evaluate_function(double t, double /*dt*/, View1& /*y*/, View2& dydt) const { + for (int dofIdx = 0; dofIdx < neqs; ++dofIdx) { + dydt(dofIdx) = (t * t) + t + 1; + } + } + + template + KOKKOS_FUNCTION void evaluate_jacobian(double /*t*/, double /*dt*/, View1& /*y*/, View2& jac) const { + for (int rowIdx = 0; rowIdx < neqs; ++rowIdx) { + for (int colIdx = 0; colIdx < neqs; ++colIdx) { + jac(rowIdx, colIdx) = 0; + } + } + } + + KOKKOS_FUNCTION constexpr double tstart() const { return 0.0; } + KOKKOS_FUNCTION constexpr double tend() const { return 1.0; } + KOKKOS_FUNCTION constexpr int numsteps() const { return 10; } + KOKKOS_FUNCTION double expected_val(const double t, const int /*n*/) const { + return (1. / 3) * (t * t * t) + (1. / 2) * (t * t) + t + 1; + } + KOKKOS_FUNCTION static constexpr int num_equations() { return neqs; } + static constexpr int neqs = 1; + static constexpr char name[] = "DegreeThreePoly"; +}; + +struct DegreeFivePoly { + template + KOKKOS_FUNCTION void evaluate_function(double t, double /*dt*/, View1& /*y*/, View2& dydt) const { + for (int dofIdx = 0; dofIdx < neqs; ++dofIdx) { + dydt(dofIdx) = (t * t * t * t) + (t * t * t) + (t * t) + t + 1; + } + } + + template + KOKKOS_FUNCTION void evaluate_jacobian(double /*t*/, double /*dt*/, View1& /*y*/, View2& jac) const { + for (int rowIdx = 0; rowIdx < neqs; ++rowIdx) { + for (int colIdx = 0; colIdx < neqs; ++colIdx) { + jac(rowIdx, colIdx) = 0; + } + } + } + + KOKKOS_FUNCTION constexpr double tstart() const { return 0.0; } + KOKKOS_FUNCTION constexpr double tend() const { return 1.0; } + KOKKOS_FUNCTION constexpr int numsteps() const { return 10; } + KOKKOS_FUNCTION double expected_val(const double t, const int /*n*/) const { + return (1. / 5) * (t * t * t * t * t) + (1. / 4) * (t * t * t * t) + (1. / 3) * (t * t * t) + (1. / 2) * (t * t) + + t + 1; + } + KOKKOS_FUNCTION static constexpr int num_equations() { return neqs; } + static constexpr int neqs = 1; + static constexpr char name[] = "DegreeFivePoly"; +}; + +struct Exponential { + Exponential(double rate_) : rate(rate_) {} + + template + KOKKOS_FUNCTION void evaluate_function(double /*t*/, double /*dt*/, View1& y, View2& dydt) const { + for (int dofIdx = 0; dofIdx < neqs; ++dofIdx) { + dydt(dofIdx) = rate * y(dofIdx); + } + } + + template + KOKKOS_FUNCTION void evaluate_jacobian(double /*t*/, double /*dt*/, View1& /*y*/, View2& jac) const { + for (int rowIdx = 0; rowIdx < neqs; ++rowIdx) { + for (int colIdx = 0; colIdx < neqs; ++colIdx) { + jac(rowIdx, colIdx) = 0; + } + } + + for (int rowIdx = 0; rowIdx < neqs; ++rowIdx) { + jac(rowIdx, rowIdx) = rate; + } + } + + KOKKOS_FUNCTION constexpr double tstart() const { return 0.0; } + KOKKOS_FUNCTION constexpr double tend() const { return 1.0; } + KOKKOS_FUNCTION constexpr int numsteps() const { return 10; } + KOKKOS_FUNCTION double expected_val(const double t, const int /*n*/) const { return Kokkos::exp(rate * t); } + KOKKOS_FUNCTION static constexpr int num_equations() { return neqs; } + static constexpr int neqs = 1; + const double rate; + static constexpr char name[] = "Exponential"; +}; + +struct SpringMassDamper { + SpringMassDamper(double c_, double k_) + : c(c_), + k(k_), + lambda1((-c + Kokkos::pow(c * c - 4. * k, 0.5)) / 2.), + lambda2((-c - Kokkos::pow(c * c - 4. * k, 0.5)) / 2.) {} + + template + KOKKOS_FUNCTION void evaluate_function(double /*t*/, double /*dt*/, View1& y, View2& dydt) const { + dydt[0] = y[1]; + dydt[1] = -k * y[0] - c * y[1]; + } + + template + KOKKOS_FUNCTION void evaluate_jacobian(double /*t*/, double /*dt*/, View1& /*y*/, View2& jac) const { + jac(0, 0) = 0.; + jac(0, 1) = 1.; + jac(1, 0) = -k; + jac(1, 1) = -c; + } + + KOKKOS_FUNCTION constexpr double tstart() const { return 0.0; } + KOKKOS_FUNCTION constexpr double tend() const { return 1.0; } + KOKKOS_FUNCTION constexpr int numsteps() const { return 10; } + KOKKOS_FUNCTION double expected_val(const double t, const int n) const { + using Kokkos::exp; + + const double det = lambda1 - lambda2; + double val = 0; + + if (n == 0) { + val = -(lambda2 / det) * exp(lambda1 * t) + (lambda1 / det) * exp(lambda2 * t); + } else { + val = -(lambda2 * lambda1 / det) * exp(lambda1 * t) + (lambda1 * lambda2 / det) * exp(lambda2 * t); + } + + return val; + } + + KOKKOS_FUNCTION static constexpr int num_equations() { return neqs; } + + static constexpr int neqs = 2; + const double c; + const double k; + const double lambda1; + const double lambda2; + static constexpr char name[] = "SpringMassDamper"; +}; + +// Example 8.1 from Leveque + +struct CosExp { + CosExp(double lambda_, double t0_, double eta_) : lambda(lambda_), t0(t0_), eta(eta_) {} + + template + KOKKOS_FUNCTION void evaluate_function(double t, double /*dt*/, View1& y, View2& dydt) const { + for (int i = 0; i < neqs; i++) { + dydt(i) = lambda * (y(i) - Kokkos::cos(t)) - Kokkos::sin(t); + } + } + + template + KOKKOS_FUNCTION void evaluate_jacobian(double /*t*/, double /*dt*/, View1& /*y*/, View2& jac) const { + jac(0, 0) = 0.0; + + for (int i = 0; i < neqs; ++i) { + jac(i, i) = lambda; + } + } + + KOKKOS_FUNCTION constexpr double tstart() const { return 0.0; } + KOKKOS_FUNCTION constexpr double tend() const { return 10.0; } + KOKKOS_FUNCTION constexpr int numsteps() const { return 10; } + KOKKOS_FUNCTION double expected_val(const double t, const int /*n*/) const { + return Kokkos::exp(lambda * (t - t0)) * (eta - Kokkos::cos(t0)) + Kokkos::cos(t); + } + KOKKOS_FUNCTION static constexpr int num_equations() { return neqs; } + + static constexpr int neqs = 1; + const double lambda; + const double t0; + const double eta; + static constexpr char name[] = "CosExp"; +}; + +// Example 7.9 in LeVeque + +struct StiffChemicalDecayProcess { + StiffChemicalDecayProcess(double K1_, double K2_) : K1(K1_), K2(K2_) {} + + template + KOKKOS_FUNCTION void evaluate_function(double /*t*/, double /*dt*/, View1& y, View2& dydt) const { + dydt[0] = -K1 * y[0]; + dydt[1] = K1 * y[0] - K2 * y[1]; + dydt[2] = K2 * y[1]; + } + + template + KOKKOS_FUNCTION void evaluate_jacobian(double /*t*/, double /*dt*/, View1& /*y*/, View2& jac) const { + jac(0, 0) = -K1; + jac(0, 1) = 0.; + jac(0, 2) = 0.; + + jac(1, 0) = K1; + jac(1, 1) = -K2; + jac(1, 2) = 0.; + + jac(2, 0) = 0.; + jac(2, 1) = K2; + jac(2, 2) = 0.; + } + + KOKKOS_FUNCTION constexpr double tstart() const { return 0.0; } + KOKKOS_FUNCTION constexpr double tend() const { return 0.2; } + KOKKOS_FUNCTION constexpr int numsteps() const { return 10; } + KOKKOS_FUNCTION double expected_val(const double t, const int n) const { + using Kokkos::exp; + + const double C21 = y1_init * K1 / (K2 - K1); + const double C22 = y2_init - C21; + + double val = 0.0; + + if (n == 0) { + val = y1_init * exp(-K1 * t); + } else if (n == 1) { + val = C21 * exp(-K1 * t) + C22 * exp(-K2 * t); + } else { + const double C31 = -K2 * C21 / K1; + const double C32 = -C22; + const double C33 = y1_init + y2_init + y3_init; + + val = C31 * exp(-K1 * t) + C32 * exp(-K2 * t) + C33; + } + + return val; + } + KOKKOS_FUNCTION static constexpr int num_equations() { return neqs; } + + static constexpr int neqs = 3; + const double y1_init = 3.0; + const double y2_init = 4.0; + const double y3_init = 2.0; + const double K1; + const double K2; + static constexpr char name[] = "StiffChemicalDecay"; +}; + +struct Tracer { + Tracer(double rate_) : rate(rate_) {} + + template + KOKKOS_FUNCTION void evaluate_function(double /*t*/, double /*dt*/, View1& y, View2& dydt) const { + for (int i = 0; i < neqs; i += 2) { + // const double R = Kokkos::sqrt(y[i] * y[i] + y[i + 1] * y[i + 1]); + // dydt[i] = -rate * y[i + 1] / R; + // dydt[i + 1] = rate * y[i] / R; + dydt[i] = -rate * y[i + 1]; + dydt[i + 1] = rate * y[i]; + } + } + + KOKKOS_FUNCTION constexpr double tstart() const { return 0.0; } + KOKKOS_FUNCTION constexpr double tend() const { return 2.0 * pi; } + KOKKOS_FUNCTION constexpr int numsteps() const { return 10; } + KOKKOS_FUNCTION double expected_val(const double t, const int n) const { + const double theta = rate * t; + double val = 0.0; + + if (n % 2 == 0) { + val = Kokkos::cos(theta); + } else { + val = Kokkos::sin(theta); + } + return val; + } + KOKKOS_FUNCTION static constexpr int num_equations() { return neqs; } + + static constexpr int neqs = 2; + static constexpr double pi = 3.14159265358979323846; + const double rate; + static constexpr char name[] = "Tracer"; +}; + +struct EnrightB5 { + EnrightB5(double alpha_ = 100.0) : alpha(alpha_) {} + + template + KOKKOS_FUNCTION void evaluate_function(double /*t*/, double /*dt*/, View1& y, View2& dydt) const { + dydt[0] = -10. * y[0] + alpha * y[1]; + dydt[1] = -alpha * y[0] - 10. * y[1]; + dydt[2] = -4. * y[2]; + dydt[3] = -y[3]; + dydt[4] = -0.5 * y[4]; + dydt[5] = -0.1 * y[5]; + } + + template + KOKKOS_FUNCTION void evaluate_jacobian(double /*t*/, double /*dt*/, View1& /*y*/, View2& jac) const { + for (int rowIdx = 0; rowIdx < neqs; ++rowIdx) { + for (int colIdx = 0; colIdx < neqs; ++colIdx) { + jac(rowIdx, colIdx) = 0.0; + } + } + + jac(0, 0) = -10.; + jac(0, 1) = alpha; + jac(1, 0) = -alpha; + jac(1, 1) = -10.; + jac(2, 2) = -4.; + jac(3, 3) = -1.; + jac(4, 4) = -0.5; + jac(5, 5) = -0.1; + } + + KOKKOS_FUNCTION constexpr double tstart() const { return 0.0; } + KOKKOS_FUNCTION constexpr double tend() const { return 0.2; } + KOKKOS_FUNCTION constexpr int numsteps() const { return 10; } + KOKKOS_FUNCTION double expected_val(const double t, const int n) const { + using Kokkos::cos; + using Kokkos::exp; + using Kokkos::sin; + + double val = 0.0; + + const double c1 = 1.0; + const double c2 = -1.0; + + const double a[2] = {0, 1}; + const double b[2] = {-1, 0}; + + if (n < 2) { + val = exp(-10. * t) * (c1 * (a[n] * cos(alpha * t) - b[n] * sin(alpha * t)) + + c2 * (a[n] * sin(alpha * t) + b[n] * cos(alpha * t))); + } else if (n == 2) { + val = exp(-4. * t); + } else if (n == 3) { + val = exp(-t); + } else if (n == 4) { + val = exp(-0.5 * t); + } else { + val = exp(-0.1 * t); + } + + return val; + } + + KOKKOS_FUNCTION static constexpr int num_equations() { return neqs; } + static constexpr int neqs = 6; + const double alpha; + static constexpr char name[] = "EnrightB5"; +}; // EnrightB5 + +struct EnrightC1 { + template + KOKKOS_FUNCTION void evaluate_function(double /*t*/, double /*dt*/, View1& y, View2& dydt) const { + dydt[0] = -y[0] + y[1] * y[1] + y[2] * y[2] + y[3] * y[3]; + dydt[1] = -10. * y[1] + 10. * (y[2] * y[2] + y[3] * y[3]); + dydt[2] = -40. * y[2] + 40. * y[3] * y[3]; + dydt[3] = -100.0 * y[3] + 2.; + } + + template + KOKKOS_FUNCTION void evaluate_jacobian(double /*t*/, double /*dt*/, View1& y, View2& jac) const { + for (int rowIdx = 0; rowIdx < neqs; ++rowIdx) { + for (int colIdx = 0; colIdx < neqs; ++colIdx) { + jac(rowIdx, colIdx) = 0.0; + } + } + + jac(0, 0) = -1.; + jac(0, 1) = 2. * y[1]; + jac(0, 2) = 2. * y[2]; + jac(0, 3) = 2. * y[3]; + + jac(1, 1) = -10.; + jac(1, 2) = 20. * y[2]; + jac(1, 3) = 20. * y[3]; + + jac(2, 2) = -40.; + jac(2, 3) = 80. * y[3]; + + jac(3, 3) = -100.; + } + + KOKKOS_FUNCTION constexpr double tstart() const { return 0.0; } + KOKKOS_FUNCTION constexpr double tend() const { return 20.0; } + KOKKOS_FUNCTION constexpr int numsteps() const { return 10; } + KOKKOS_FUNCTION double expected_val(const double t, const int n) const { + if (t == 0) { + return 1.; + } else { + // cvode sol + constexpr Kokkos::Array y = {4.003235e-04, 4.001600e-04, 4.000000e-04, 2.000000e-02}; + return y[n]; + } + } + KOKKOS_FUNCTION static constexpr int num_equations() { return neqs; } + static constexpr int neqs = 4; + static constexpr char name[] = "EnrightC1"; +}; + +struct EnrightC5 { + EnrightC5(const double beta_ = 20.0) : beta(beta_) {} + + template + KOKKOS_FUNCTION void evaluate_function(double /*t*/, double /*dt*/, View1& y, View2& dydt) const { + dydt[0] = -y[0] + 2.; + dydt[1] = -10. * y[1] + beta * y[0] * y[0]; + dydt[2] = -40. * y[2] + 4. * beta * (y[0] * y[0] + y[1] * y[1]); + dydt[3] = -100.0 * y[3] + 10. * beta * (y[0] * y[0] + y[1] * y[1] + y[2] * y[2]); + } + + template + KOKKOS_FUNCTION void evaluate_jacobian(double /*t*/, double /*dt*/, View1& y, View2& jac) const { + for (int rowIdx = 0; rowIdx < neqs; ++rowIdx) { + for (int colIdx = 0; colIdx < neqs; ++colIdx) { + jac(rowIdx, colIdx) = 0.0; + } + } + + jac(0, 0) = -1.; + + jac(1, 0) = 2 * beta * y[0]; + jac(1, 1) = -10.; + + jac(2, 0) = 8. * beta * y[0]; + jac(2, 1) = 8. * beta * y[1]; + jac(2, 2) = -40.; + + jac(3, 0) = 20. * beta * y[0]; + jac(3, 1) = 20. * beta * y[1]; + jac(3, 2) = 20. * beta * y[2]; + jac(3, 3) = -100.; + } + + KOKKOS_FUNCTION constexpr double tstart() const { return 0.0; } + KOKKOS_FUNCTION constexpr double tend() const { return 20.0; } + KOKKOS_FUNCTION constexpr int numsteps() const { return 10; } + KOKKOS_FUNCTION double expected_val(const double t, const int n) const { + if (t == 0) { + return 1.; + } else { + // cvode sol + constexpr Kokkos::Array y = {2.000000e+00, 8.000000e+00, 1.360000e+02, 3.712800e+04}; + return y[n]; + } + } + KOKKOS_FUNCTION static constexpr int num_equations() { return neqs; } + static constexpr int neqs = 4; + const double beta; + static constexpr char name[] = "EnrightC5"; +}; + +struct EnrightD2 { + template + KOKKOS_FUNCTION void evaluate_function(double /*t*/, double /*dt*/, View1& y, View2& dydt) const { + dydt[0] = -0.04 * y[0] + 0.01 * y[1] * y[2]; + dydt[1] = 400.0 * y[0] - 100.0 * y[1] * y[2] - 3000. * y[1] * y[1]; + dydt[2] = 30. * y[1] * y[1]; + } + + template + KOKKOS_FUNCTION void evaluate_jacobian(double /*t*/, double /*dt*/, View1& y, View2& jac) const { + for (int rowIdx = 0; rowIdx < neqs; ++rowIdx) { + for (int colIdx = 0; colIdx < neqs; ++colIdx) { + jac(rowIdx, colIdx) = 0.0; + } + } + + jac(0, 0) = -0.04; + jac(0, 1) = 0.01 * y[2]; + jac(0, 2) = 0.01 * y[1]; + + jac(1, 0) = 400.; + jac(1, 1) = -100. * y[2] - 6000. * y[1]; + jac(1, 2) = -100. * y[1]; + + jac(2, 1) = 60. * y[1]; + } + + KOKKOS_FUNCTION constexpr double tstart() const { return 0.0; } + KOKKOS_FUNCTION constexpr double tend() const { return 40.0; } + KOKKOS_FUNCTION constexpr int numsteps() const { return 100; } + KOKKOS_FUNCTION double expected_val(const double t, const int n) const { + if (t == 0.) { + constexpr Kokkos::Array y = {1., 0., 0.}; + return y[n]; + } else { + // cvode solution + constexpr Kokkos::Array y = {7.158278e-01, 9.185559e-02, 2.841630e+01}; + return y[n]; + } + } + KOKKOS_FUNCTION static constexpr int num_equations() { return neqs; } + static constexpr int neqs = 3; + static constexpr char name[] = "EnrightD2"; +}; + +struct EnrightD4 { + template + KOKKOS_FUNCTION void evaluate_function(double /*t*/, double /*dt*/, View1& y, View2& dydt) const { + dydt[0] = -0.013 * y[0] - 1000. * y[0] * y[2]; + dydt[1] = -2500. * y[1] * y[2]; + dydt[2] = dydt[0] + dydt[1]; + } + + template + KOKKOS_FUNCTION void evaluate_jacobian(double /*t*/, double /*dt*/, View1& y, View2& jac) const { + for (int rowIdx = 0; rowIdx < neqs; ++rowIdx) { + for (int colIdx = 0; colIdx < neqs; ++colIdx) { + jac(rowIdx, colIdx) = 0.0; + } + } + + jac(0, 0) = -0.013 - 1000. * y[2]; + jac(0, 2) = -1000. * y[0]; + + jac(1, 1) = -2500. * y[2]; + jac(1, 2) = -2500. * y[1]; + + jac(2, 0) = jac(0, 0); + jac(2, 1) = jac(1, 1); + jac(2, 2) = jac(0, 2) + jac(1, 2); + } + + KOKKOS_FUNCTION constexpr double tstart() const { return 0.0; } + KOKKOS_FUNCTION constexpr double tend() const { return 50.0; } // 50.0; } + KOKKOS_FUNCTION constexpr int numsteps() const { return 10; } + KOKKOS_FUNCTION double expected_val(const double t, const int n) const { + if (t == 0.) { + constexpr Kokkos::Array y = {1., 1., 0}; + return y[n]; + } else { + // cvode solution at tend + constexpr Kokkos::Array y = {5.976506e-01, 1.402347e+00, -1.893371e-06}; + return y[n]; + } + } + KOKKOS_FUNCTION static constexpr int num_equations() { return neqs; } + static constexpr int neqs = 3; + static constexpr char name[] = "EnrightD4"; +}; + +// Robertson Autocatalytic reaction +struct KKStiffChemistry { + template + KOKKOS_FUNCTION void evaluate_function(double /*t*/, double /*dt*/, View1& y, View2& dydt) const { + dydt(0) = -0.04 * y(0) + 1.e4 * y(1) * y(2); + dydt(1) = 0.04 * y(0) - 1.e4 * y(1) * y(2) - 3.e7 * y(1) * y(1); + dydt(2) = 3.e7 * y(1) * y(1); + } + + template + KOKKOS_FUNCTION void evaluate_jacobian(double /*t*/, double /*dt*/, View1& y, View2& jac) const { + jac(0, 0) = -0.04; + jac(0, 1) = 1.e4 * y(2); + jac(0, 2) = 1.e4 * y(1); + jac(1, 0) = 0.04; + jac(1, 1) = -1.e4 * y(2) - 3.e7 * 2.0 * y(1); + jac(1, 2) = -1.e4 * y(1); + jac(2, 0) = 0.0; + jac(2, 1) = 3.e7 * 2.0 * y(1); + jac(2, 2) = 0.0; + } + + KOKKOS_FUNCTION constexpr double tstart() const { return 0.0; } + KOKKOS_FUNCTION constexpr double tend() const { return 500.0; } + KOKKOS_FUNCTION constexpr int numsteps() const { return 1000; } + KOKKOS_FUNCTION double expected_val(const double t, const int n) const { + if (t == 0) { + return n == 0 ? 1. : 0.; + } else { + // cvode solution + constexpr Kokkos::Array y = {4.226713e-01, 2.885221e-06, 5.773258e-01}; + return y[n]; + } + } + KOKKOS_FUNCTION static constexpr int num_equations() { return neqs; } + static constexpr int neqs = 3; + static constexpr char name[] = "Robertson Autocatalytic"; +}; + +} // namespace TestProblem + +#endif // TEST_ODE_TESTPROBLEMS_HPP diff --git a/perf_test/ode/KokkosODE_RK.cpp b/perf_test/ode/KokkosODE_RK.cpp index 2c52cfbb5d..7a5fd33999 100644 --- a/perf_test/ode/KokkosODE_RK.cpp +++ b/perf_test/ode/KokkosODE_RK.cpp @@ -92,7 +92,7 @@ struct chem_model_2 { } }; -template +template struct RKSolve_wrapper { using ode_params = KokkosODE::Experimental::ODE_params; @@ -103,10 +103,11 @@ struct RKSolve_wrapper { scalar_type tstart, tend; vec_type y_old, y_new, tmp; mv_type kstack; + count_type count; RKSolve_wrapper(const ode_type& my_ode_, const table_type& table_, const ode_params& params_, const scalar_type tstart_, const scalar_type tend_, const vec_type& y_old_, const vec_type& y_new_, - const vec_type& tmp_, const mv_type& kstack_) + const vec_type& tmp_, const mv_type& kstack_, const count_type& count_) : my_ode(my_ode_), table(table_), params(params_), @@ -115,7 +116,8 @@ struct RKSolve_wrapper { y_old(y_old_), y_new(y_new_), tmp(tmp_), - kstack(kstack_) {} + kstack(kstack_), + count(count_) {} KOKKOS_FUNCTION void operator()(const int idx) const { @@ -124,10 +126,12 @@ struct RKSolve_wrapper { auto local_y_new = Kokkos::subview(y_new, Kokkos::pair(2 * idx, 2 * idx + 1)); auto local_tmp = Kokkos::subview(tmp, Kokkos::pair(2 * idx, 2 * idx + 1)); auto local_kstack = Kokkos::subview(kstack, Kokkos::ALL(), Kokkos::pair(2 * idx, 2 * idx + 1)); + auto local_count = Kokkos::subview(count, idx, Kokkos::ALL()); // Run Runge-Kutta time integrator - KokkosODE::Impl::RKSolve( - my_ode, table, params, tstart, tend, local_y_old, local_y_new, local_tmp, local_kstack); + // This should be replaced by a call to the public interface! + KokkosODE::Impl::RKSolve(my_ode, table, params, tstart, tend, local_y_old, local_y_new, local_tmp, local_kstack, + local_count.data()); } }; @@ -149,6 +153,7 @@ void run_ode_chem(benchmark::State& state, const rk_input_parameters& inputs) { using mv_type = Kokkos::View; using table_type = KokkosODE::Impl::ButcherTableau<4, 5, 1>; using ode_params = KokkosODE::Experimental::ODE_params; + using count_type = Kokkos::View; const int num_odes = inputs.num_odes; const int model = inputs.model; @@ -159,6 +164,7 @@ void run_ode_chem(benchmark::State& state, const rk_input_parameters& inputs) { const int neqs = chem_model.neqs; const int num_steps = 15000; const double dt = 0.1; + count_type count("time steps count", num_odes, 1); table_type table; ode_params params(num_steps); @@ -176,7 +182,7 @@ void run_ode_chem(benchmark::State& state, const rk_input_parameters& inputs) { Kokkos::RangePolicy my_policy(0, num_odes); RKSolve_wrapper solve_wrapper(chem_model, table, params, chem_model.tstart, chem_model.tend, y_old, y_new, tmp, - kstack); + kstack, count); Kokkos::Timer time; time.reset(); @@ -206,6 +212,7 @@ void run_ode_chem(benchmark::State& state, const rk_input_parameters& inputs) { const int neqs = chem_model.neqs; const int num_steps = 15000; const double dt = 0.1; + count_type count("time steps count", num_odes, 1); table_type table; ode_params params(num_steps); @@ -228,7 +235,7 @@ void run_ode_chem(benchmark::State& state, const rk_input_parameters& inputs) { Kokkos::RangePolicy my_policy(0, num_odes); RKSolve_wrapper solve_wrapper(chem_model, table, params, chem_model.tstart, chem_model.tend, y_old, y_new, tmp, - kstack); + kstack, count); Kokkos::Timer time; time.reset();