From 5da9e5ca60a7b417929a3172915402aad7121291 Mon Sep 17 00:00:00 2001 From: Hendrik Ranocha Date: Wed, 30 Mar 2022 18:06:08 +0200 Subject: [PATCH] update TODO notes --- docs/src/reference-trixi.md | 2 +- src/auxiliary/mpi_arrays.jl | 274 ++++++++++++++++++------------------ src/solvers/dg.jl | 16 ++- 3 files changed, 150 insertions(+), 142 deletions(-) diff --git a/docs/src/reference-trixi.md b/docs/src/reference-trixi.md index 940051115d5..73ef8fe27e8 100644 --- a/docs/src/reference-trixi.md +++ b/docs/src/reference-trixi.md @@ -5,5 +5,5 @@ CurrentModule = Trixi ``` ```@autodocs -Modules = [Trixi] +Modules = [Trixi, Trixi.TrixiMPIArrays] ``` diff --git a/src/auxiliary/mpi_arrays.jl b/src/auxiliary/mpi_arrays.jl index fd272946b1d..cd04e3527f6 100644 --- a/src/auxiliary/mpi_arrays.jl +++ b/src/auxiliary/mpi_arrays.jl @@ -37,24 +37,20 @@ struct TrixiMPIArray{T, N, Parent<:AbstractArray{T, N}} <: AbstractArray{T, N} # mpi_size::Int # mpi_isroot::Bool # mpi_isparallel::Bool - - function TrixiMPIArray{T, N, Parent}(u_local::Parent) where {T, N, Parent<:AbstractArray{T, N}} - # TODO: MPI. Hard-coded to MPI.COMM_WORLD for now - mpi_comm = MPI.COMM_WORLD - mpi_rank = MPI.Comm_rank(MPI.COMM_WORLD) - return new{T, N, Parent}(u_local, mpi_comm, mpi_rank) - end end function TrixiMPIArray(u_local::AbstractArray{T, N}) where {T, N} - TrixiMPIArray{T, N, typeof(u_local)}(u_local) + # TODO: MPI. Hard-coded to MPI.COMM_WORLD for now + mpi_comm = MPI.COMM_WORLD + mpi_rank = MPI.Comm_rank(MPI.COMM_WORLD) + TrixiMPIArray{T, N, typeof(u_local)}(u_local, mpi_comm, mpi_rank) end # TODO: MPI. Adapt -# - wrap_array -# - wrap_array_native -# - return type of initialization stuff when setting an IC +# - wrap_array - done +# - wrap_array_native - should not be changed since it should return a plain `Array` +# - return type of initialization stuff when setting an IC with MPI # - dispatch on this array type instead of parallel trees etc. and use # `parent(u)` to get local versions instead of `invoke` @@ -86,10 +82,15 @@ Base.unsafe_convert(::Type{Ptr{T}}, u::TrixiMPIArray{T}) where {T} = Base.unsafe Base.elsize(::Type{TrixiMPIArray{T, N, Parent}}) where {T, N, Parent} = elsize(Parent) -# TODO: MPI. Do we need customized broadcasting? +# TODO: MPI. Do we need customized broadcasting? What about FastBroadcast.jl and +# threaded execution with `@.. thread=true`? # See https://docs.julialang.org/en/v1/manual/interfaces/#man-interfaces-broadcasting +# TODO: MPI. How shall we handle specializations such as `split_form_kernel!(_du::PtrArray, u_cons::PtrArray,` +# for `flux_ranocha_turbo` and `flux_shima_etal_turbo`? + + # Implementation of methods from ArrayInterface.jl for use with # LoopVectorization.jl etc. # See https://juliaarrays.github.io/ArrayInterface.jl/stable/ @@ -137,98 +138,100 @@ using .TrixiMPIArrays julia> trixi_include("examples/tree_2d_dgsem/elixir_euler_ec.jl", tspan=(0.0, 10.0)) -julia> sol = solve(ode, CarpenterKennedy2N54(williamson_condition=false), dt=1.0, ave_everystep=false, callback=callbacks); summary_callback() +julia> sol = solve(ode, CarpenterKennedy2N54(williamson_condition=false), dt=1.0, save_everystep=false, callback=callbacks); summary_callback() ──────────────────────────────────────────────────────────────────────────────────── Trixi.jl Time Allocations ─────────────────────── ──────────────────────── - Tot / % measured: 5.42s / 90.8% 864MiB / 1.9% + Tot / % measured: 5.22s / 95.2% 17.4MiB / 96.3% Section ncalls time %tot avg alloc %tot avg ──────────────────────────────────────────────────────────────────────────────────── - rhs! 4.24k 4.82s 97.8% 1.14ms 1.50MiB 8.9% 370B - volume integral 4.24k 3.96s 80.3% 934μs 0.00B 0.0% 0.00B - interface flux 4.24k 540ms 11.0% 127μs 0.00B 0.0% 0.00B - surface integral 4.24k 111ms 2.2% 26.1μs 0.00B 0.0% 0.00B - prolong2interfaces 4.24k 109ms 2.2% 25.7μs 0.00B 0.0% 0.00B - Jacobian 4.24k 45.6ms 0.9% 10.8μs 0.00B 0.0% 0.00B + rhs! 4.24k 4.86s 97.8% 1.15ms 1.50MiB 8.9% 370B + volume integral 4.24k 3.97s 80.0% 938μs 0.00B 0.0% 0.00B + interface flux 4.24k 561ms 11.3% 132μs 0.00B 0.0% 0.00B + prolong2interfaces 4.24k 115ms 2.3% 27.1μs 0.00B 0.0% 0.00B + surface integral 4.24k 110ms 2.2% 25.9μs 0.00B 0.0% 0.00B reset ∂u/∂t 4.24k 45.6ms 0.9% 10.8μs 0.00B 0.0% 0.00B - ~rhs!~ 4.24k 8.19ms 0.2% 1.93μs 1.50MiB 8.9% 370B - prolong2mortars 4.24k 282μs 0.0% 66.7ns 0.00B 0.0% 0.00B - prolong2boundaries 4.24k 279μs 0.0% 66.0ns 0.00B 0.0% 0.00B - mortar flux 4.24k 174μs 0.0% 41.1ns 0.00B 0.0% 0.00B - boundary flux 4.24k 105μs 0.0% 24.8ns 0.00B 0.0% 0.00B - source terms 4.24k 102μs 0.0% 24.1ns 0.00B 0.0% 0.00B - calculate dt 848 51.6ms 1.0% 60.9μs 0.00B 0.0% 0.00B - analyze solution 10 37.5ms 0.8% 3.75ms 160KiB 0.9% 16.0KiB - I/O 11 19.9ms 0.4% 1.81ms 15.1MiB 90.2% 1.38MiB - save solution 10 19.8ms 0.4% 1.98ms 15.1MiB 90.0% 1.51MiB - get element variables 10 83.5μs 0.0% 8.35μs 20.6KiB 0.1% 2.06KiB - ~I/O~ 11 35.6μs 0.0% 3.24μs 7.20KiB 0.0% 671B - save mesh 10 912ns 0.0% 91.2ns 0.00B 0.0% 0.00B + Jacobian 4.24k 45.2ms 0.9% 10.7μs 0.00B 0.0% 0.00B + ~rhs!~ 4.24k 8.63ms 0.2% 2.04μs 1.50MiB 8.9% 370B + prolong2boundaries 4.24k 343μs 0.0% 80.9ns 0.00B 0.0% 0.00B + mortar flux 4.24k 251μs 0.0% 59.2ns 0.00B 0.0% 0.00B + prolong2mortars 4.24k 213μs 0.0% 50.3ns 0.00B 0.0% 0.00B + boundary flux 4.24k 88.7μs 0.0% 20.9ns 0.00B 0.0% 0.00B + source terms 4.24k 74.8μs 0.0% 17.7ns 0.00B 0.0% 0.00B + calculate dt 848 52.9ms 1.1% 62.4μs 0.00B 0.0% 0.00B + analyze solution 10 38.5ms 0.8% 3.85ms 159KiB 0.9% 15.9KiB + I/O 11 18.8ms 0.4% 1.71ms 15.1MiB 90.2% 1.38MiB + save solution 10 18.6ms 0.4% 1.86ms 15.1MiB 90.0% 1.51MiB + get element variables 10 170μs 0.0% 17.0μs 20.6KiB 0.1% 2.06KiB + ~I/O~ 11 35.9μs 0.0% 3.26μs 7.20KiB 0.0% 671B + save mesh 10 554ns 0.0% 55.4ns 0.00B 0.0% 0.00B ──────────────────────────────────────────────────────────────────────────────────── + julia> mpi_ode = remake(ode, u0=Trixi.TrixiMPIArray(copy(ode.u0))); -julia> mpi_sol = solve(mpi_ode, CarpenterKennedy2N54(williamson_condition=false), dt=1.0, ave_everystep=false, callback=callbacks); summary_callback() +julia> mpi_sol = solve(mpi_ode, CarpenterKennedy2N54(williamson_condition=false), dt=1.0, save_everystep=false, callback=callbacks); summary_callback() ──────────────────────────────────────────────────────────────────────────────────── Trixi.jl Time Allocations ─────────────────────── ──────────────────────── - Tot / % measured: 5.42s / 90.6% 863MiB / 1.8% + Tot / % measured: 5.17s / 95.3% 17.5MiB / 96.3% Section ncalls time %tot avg alloc %tot avg ──────────────────────────────────────────────────────────────────────────────────── - rhs! 4.24k 4.81s 97.9% 1.13ms 9.33KiB 0.1% 2.25B - volume integral 4.24k 3.96s 80.6% 935μs 0.00B 0.0% 0.00B - interface flux 4.24k 542ms 11.0% 128μs 0.00B 0.0% 0.00B - prolong2interfaces 4.24k 106ms 2.2% 25.0μs 0.00B 0.0% 0.00B - surface integral 4.24k 104ms 2.1% 24.7μs 0.00B 0.0% 0.00B - reset ∂u/∂t 4.24k 44.6ms 0.9% 10.5μs 0.00B 0.0% 0.00B - Jacobian 4.24k 43.4ms 0.9% 10.2μs 0.00B 0.0% 0.00B - ~rhs!~ 4.24k 7.10ms 0.1% 1.68μs 9.33KiB 0.1% 2.25B - prolong2boundaries 4.24k 271μs 0.0% 64.0ns 0.00B 0.0% 0.00B - prolong2mortars 4.24k 242μs 0.0% 57.1ns 0.00B 0.0% 0.00B - mortar flux 4.24k 188μs 0.0% 44.4ns 0.00B 0.0% 0.00B - source terms 4.24k 83.7μs 0.0% 19.8ns 0.00B 0.0% 0.00B - boundary flux 4.24k 74.8μs 0.0% 17.7ns 0.00B 0.0% 0.00B - calculate dt 848 50.0ms 1.0% 58.9μs 0.00B 0.0% 0.00B - analyze solution 10 36.7ms 0.7% 3.67ms 155KiB 1.0% 15.5KiB - I/O 11 18.6ms 0.4% 1.69ms 15.1MiB 98.9% 1.38MiB - save solution 10 18.4ms 0.4% 1.84ms 15.1MiB 98.8% 1.51MiB - get element variables 10 101μs 0.0% 10.1μs 22.2KiB 0.1% 2.22KiB - ~I/O~ 11 22.6μs 0.0% 2.05μs 7.20KiB 0.0% 671B - save mesh 10 876ns 0.0% 87.6ns 0.00B 0.0% 0.00B + rhs! 4.24k 4.82s 97.8% 1.14ms 1.63MiB 9.6% 402B + volume integral 4.24k 3.94s 80.0% 931μs 0.00B 0.0% 0.00B + interface flux 4.24k 563ms 11.4% 133μs 0.00B 0.0% 0.00B + surface integral 4.24k 108ms 2.2% 25.5μs 0.00B 0.0% 0.00B + prolong2interfaces 4.24k 107ms 2.2% 25.2μs 0.00B 0.0% 0.00B + reset ∂u/∂t 4.24k 45.5ms 0.9% 10.8μs 0.00B 0.0% 0.00B + Jacobian 4.24k 43.7ms 0.9% 10.3μs 0.00B 0.0% 0.00B + ~rhs!~ 4.24k 7.45ms 0.2% 1.76μs 1.63MiB 9.6% 402B + prolong2mortars 4.24k 341μs 0.0% 80.6ns 0.00B 0.0% 0.00B + prolong2boundaries 4.24k 311μs 0.0% 73.5ns 0.00B 0.0% 0.00B + mortar flux 4.24k 228μs 0.0% 53.9ns 0.00B 0.0% 0.00B + source terms 4.24k 89.5μs 0.0% 21.1ns 0.00B 0.0% 0.00B + boundary flux 4.24k 88.1μs 0.0% 20.8ns 0.00B 0.0% 0.00B + calculate dt 848 52.0ms 1.1% 61.3μs 0.00B 0.0% 0.00B + analyze solution 10 37.0ms 0.8% 3.70ms 158KiB 0.9% 15.8KiB + I/O 11 19.6ms 0.4% 1.78ms 15.1MiB 89.5% 1.38MiB + save solution 10 19.4ms 0.4% 1.94ms 15.1MiB 89.3% 1.51MiB + get element variables 10 227μs 0.0% 22.7μs 22.2KiB 0.1% 2.22KiB + ~I/O~ 11 23.8μs 0.0% 2.16μs 7.20KiB 0.0% 671B + save mesh 10 795ns 0.0% 79.5ns 0.00B 0.0% 0.00B ──────────────────────────────────────────────────────────────────────────────────── + julia> sol = solve(ode, RDPK3SpFSAL35(), abstol=1.0e-4, reltol=1.0e-4, save_everystep=false, callback=callbacks); summary_callback() ──────────────────────────────────────────────────────────────────────────────────── Trixi.jl Time Allocations ─────────────────────── ──────────────────────── - Tot / % measured: 2.84s / 91.6% 12.1MiB / 82.8% + Tot / % measured: 2.70s / 90.2% 12.1MiB / 82.8% Section ncalls time %tot avg alloc %tot avg ──────────────────────────────────────────────────────────────────────────────────── - rhs! 2.35k 2.34s 89.9% 995μs 853KiB 8.3% 372B - volume integral 2.35k 1.90s 73.1% 808μs 0.00B 0.0% 0.00B - interface flux 2.35k 268ms 10.3% 114μs 0.00B 0.0% 0.00B - prolong2interfaces 2.35k 59.3ms 2.3% 25.3μs 0.00B 0.0% 0.00B - surface integral 2.35k 57.7ms 2.2% 24.6μs 0.00B 0.0% 0.00B - Jacobian 2.35k 24.2ms 0.9% 10.3μs 0.00B 0.0% 0.00B - reset ∂u/∂t 2.35k 23.5ms 0.9% 10.0μs 0.00B 0.0% 0.00B - ~rhs!~ 2.35k 4.44ms 0.2% 1.89μs 853KiB 8.3% 372B - prolong2mortars 2.35k 162μs 0.0% 69.0ns 0.00B 0.0% 0.00B - prolong2boundaries 2.35k 159μs 0.0% 67.6ns 0.00B 0.0% 0.00B - mortar flux 2.35k 102μs 0.0% 43.4ns 0.00B 0.0% 0.00B - source terms 2.35k 46.2μs 0.0% 19.7ns 0.00B 0.0% 0.00B - boundary flux 2.35k 42.5μs 0.0% 18.1ns 0.00B 0.0% 0.00B - I/O 7 240ms 9.2% 34.3ms 9.08MiB 90.7% 1.30MiB - save solution 6 240ms 9.2% 40.0ms 9.06MiB 90.6% 1.51MiB - get element variables 6 61.1μs 0.0% 10.2μs 12.4KiB 0.1% 2.06KiB - ~I/O~ 7 15.1μs 0.0% 2.15μs 5.20KiB 0.1% 761B - save mesh 6 499ns 0.0% 83.2ns 0.00B 0.0% 0.00B - analyze solution 6 22.1ms 0.9% 3.68ms 96.3KiB 0.9% 16.0KiB + rhs! 2.35k 2.41s 98.6% 1.02ms 853KiB 8.3% 372B + volume integral 2.35k 1.94s 79.5% 827μs 0.00B 0.0% 0.00B + interface flux 2.35k 280ms 11.5% 119μs 0.00B 0.0% 0.00B + prolong2interfaces 2.35k 65.3ms 2.7% 27.8μs 0.00B 0.0% 0.00B + surface integral 2.35k 63.8ms 2.6% 27.2μs 0.00B 0.0% 0.00B + Jacobian 2.35k 25.6ms 1.0% 10.9μs 0.00B 0.0% 0.00B + reset ∂u/∂t 2.35k 24.8ms 1.0% 10.6μs 0.00B 0.0% 0.00B + ~rhs!~ 2.35k 5.16ms 0.2% 2.20μs 853KiB 8.3% 372B + prolong2boundaries 2.35k 179μs 0.0% 76.4ns 0.00B 0.0% 0.00B + prolong2mortars 2.35k 156μs 0.0% 66.4ns 0.00B 0.0% 0.00B + mortar flux 2.35k 145μs 0.0% 61.9ns 0.00B 0.0% 0.00B + source terms 2.35k 46.3μs 0.0% 19.7ns 0.00B 0.0% 0.00B + boundary flux 2.35k 45.8μs 0.0% 19.5ns 0.00B 0.0% 0.00B + analyze solution 6 21.4ms 0.9% 3.57ms 94.7KiB 0.9% 15.8KiB + I/O 7 12.2ms 0.5% 1.75ms 9.08MiB 90.7% 1.30MiB + save solution 6 12.1ms 0.5% 2.01ms 9.06MiB 90.6% 1.51MiB + get element variables 6 82.2μs 0.0% 13.7μs 12.4KiB 0.1% 2.06KiB + ~I/O~ 7 64.2μs 0.0% 9.18μs 5.20KiB 0.1% 761B + save mesh 6 516ns 0.0% 86.0ns 0.00B 0.0% 0.00B ──────────────────────────────────────────────────────────────────────────────────── julia> mpi_sol = solve(mpi_ode, RDPK3SpFSAL35(), abstol=1.0e-4, reltol=1.0e-4, save_everystep=false, callback=callbacks); summary_callback() @@ -236,29 +239,29 @@ julia> mpi_sol = solve(mpi_ode, RDPK3SpFSAL35(), abstol=1.0e-4, reltol=1.0e-4, s ──────────────────────────────────────────────────────────────────────────────────── Trixi.jl Time Allocations ─────────────────────── ──────────────────────── - Tot / % measured: 2.88s / 89.3% 11.4MiB / 80.7% + Tot / % measured: 2.71s / 88.7% 12.3MiB / 82.1% Section ncalls time %tot avg alloc %tot avg ──────────────────────────────────────────────────────────────────────────────────── - rhs! 2.35k 2.54s 98.7% 1.08ms 9.33KiB 0.1% 4.07B - volume integral 2.35k 2.06s 80.3% 878μs 0.00B 0.0% 0.00B - interface flux 2.35k 289ms 11.3% 123μs 0.00B 0.0% 0.00B - surface integral 2.35k 64.4ms 2.5% 27.4μs 0.00B 0.0% 0.00B - prolong2interfaces 2.35k 63.1ms 2.5% 26.9μs 0.00B 0.0% 0.00B - Jacobian 2.35k 26.1ms 1.0% 11.1μs 0.00B 0.0% 0.00B - reset ∂u/∂t 2.35k 25.6ms 1.0% 10.9μs 0.00B 0.0% 0.00B - ~rhs!~ 2.35k 4.74ms 0.2% 2.02μs 9.33KiB 0.1% 4.07B - prolong2mortars 2.35k 166μs 0.0% 70.6ns 0.00B 0.0% 0.00B - prolong2boundaries 2.35k 151μs 0.0% 64.2ns 0.00B 0.0% 0.00B - mortar flux 2.35k 101μs 0.0% 43.0ns 0.00B 0.0% 0.00B - source terms 2.35k 49.5μs 0.0% 21.1ns 0.00B 0.0% 0.00B - boundary flux 2.35k 45.3μs 0.0% 19.3ns 0.00B 0.0% 0.00B - analyze solution 6 21.7ms 0.8% 3.62ms 94.1KiB 1.0% 15.7KiB - I/O 7 11.1ms 0.4% 1.58ms 9.08MiB 98.9% 1.30MiB - save solution 6 10.9ms 0.4% 1.82ms 9.06MiB 98.7% 1.51MiB - get element variables 6 117μs 0.0% 19.6μs 13.3KiB 0.1% 2.22KiB - ~I/O~ 7 17.8μs 0.0% 2.54μs 5.20KiB 0.1% 761B - save mesh 6 826ns 0.0% 138ns 0.00B 0.0% 0.00B + rhs! 2.35k 2.35s 97.9% 1.00ms 927KiB 9.0% 404B + volume integral 2.35k 1.91s 79.2% 811μs 0.00B 0.0% 0.00B + interface flux 2.35k 273ms 11.4% 116μs 0.00B 0.0% 0.00B + prolong2interfaces 2.35k 60.7ms 2.5% 25.8μs 0.00B 0.0% 0.00B + surface integral 2.35k 59.3ms 2.5% 25.3μs 0.00B 0.0% 0.00B + reset ∂u/∂t 2.35k 25.3ms 1.1% 10.8μs 0.00B 0.0% 0.00B + Jacobian 2.35k 25.0ms 1.0% 10.7μs 0.00B 0.0% 0.00B + ~rhs!~ 2.35k 4.88ms 0.2% 2.08μs 927KiB 9.0% 404B + prolong2mortars 2.35k 187μs 0.0% 79.8ns 0.00B 0.0% 0.00B + prolong2boundaries 2.35k 152μs 0.0% 64.5ns 0.00B 0.0% 0.00B + mortar flux 2.35k 122μs 0.0% 51.9ns 0.00B 0.0% 0.00B + source terms 2.35k 48.0μs 0.0% 20.5ns 0.00B 0.0% 0.00B + boundary flux 2.35k 45.0μs 0.0% 19.2ns 0.00B 0.0% 0.00B + analyze solution 6 33.7ms 1.4% 5.61ms 96.5KiB 0.9% 16.1KiB + I/O 7 16.3ms 0.7% 2.33ms 9.08MiB 90.1% 1.30MiB + save solution 6 16.2ms 0.7% 2.69ms 9.06MiB 89.9% 1.51MiB + get element variables 6 142μs 0.0% 23.6μs 13.3KiB 0.1% 2.22KiB + ~I/O~ 7 31.4μs 0.0% 4.49μs 5.20KiB 0.1% 761B + save mesh 6 552ns 0.0% 92.0ns 0.00B 0.0% 0.00B ──────────────────────────────────────────────────────────────────────────────────── @@ -271,29 +274,29 @@ julia> sol = solve(ode, RDPK3SpFSAL35(), abstol=1.0e-4, reltol=1.0e-4, save_ever ──────────────────────────────────────────────────────────────────────────────────── Trixi.jl Time Allocations ─────────────────────── ──────────────────────── - Tot / % measured: 1.40s / 82.3% 12.1MiB / 82.8% + Tot / % measured: 1.46s / 82.7% 12.1MiB / 82.8% Section ncalls time %tot avg alloc %tot avg ──────────────────────────────────────────────────────────────────────────────────── - rhs! 2.35k 1.12s 97.3% 478μs 853KiB 8.3% 372B - volume integral 2.35k 665ms 57.6% 283μs 0.00B 0.0% 0.00B - interface flux 2.35k 278ms 24.1% 119μs 0.00B 0.0% 0.00B - prolong2interfaces 2.35k 63.3ms 5.5% 27.0μs 0.00B 0.0% 0.00B - surface integral 2.35k 61.3ms 5.3% 26.1μs 0.00B 0.0% 0.00B - Jacobian 2.35k 24.8ms 2.1% 10.6μs 0.00B 0.0% 0.00B - reset ∂u/∂t 2.35k 24.7ms 2.1% 10.5μs 0.00B 0.0% 0.00B - ~rhs!~ 2.35k 4.52ms 0.4% 1.92μs 853KiB 8.3% 372B - prolong2boundaries 2.35k 128μs 0.0% 54.6ns 0.00B 0.0% 0.00B - prolong2mortars 2.35k 111μs 0.0% 47.5ns 0.00B 0.0% 0.00B - mortar flux 2.35k 67.0μs 0.0% 28.5ns 0.00B 0.0% 0.00B - source terms 2.35k 53.0μs 0.0% 22.6ns 0.00B 0.0% 0.00B - boundary flux 2.35k 47.3μs 0.0% 20.1ns 0.00B 0.0% 0.00B - analyze solution 6 20.1ms 1.7% 3.35ms 96.3KiB 0.9% 16.0KiB - I/O 7 11.6ms 1.0% 1.65ms 9.08MiB 90.7% 1.30MiB - save solution 6 11.4ms 1.0% 1.91ms 9.06MiB 90.6% 1.51MiB - get element variables 6 95.8μs 0.0% 16.0μs 12.4KiB 0.1% 2.06KiB - ~I/O~ 7 18.3μs 0.0% 2.61μs 5.20KiB 0.1% 761B - save mesh 6 816ns 0.0% 136ns 0.00B 0.0% 0.00B + rhs! 2.35k 1.18s 97.5% 500μs 853KiB 8.3% 372B + volume integral 2.35k 699ms 58.0% 298μs 0.00B 0.0% 0.00B + interface flux 2.35k 290ms 24.0% 124μs 0.00B 0.0% 0.00B + surface integral 2.35k 64.3ms 5.3% 27.4μs 0.00B 0.0% 0.00B + prolong2interfaces 2.35k 64.0ms 5.3% 27.3μs 0.00B 0.0% 0.00B + Jacobian 2.35k 26.0ms 2.2% 11.1μs 0.00B 0.0% 0.00B + reset ∂u/∂t 2.35k 25.9ms 2.1% 11.0μs 0.00B 0.0% 0.00B + ~rhs!~ 2.35k 5.04ms 0.4% 2.15μs 853KiB 8.3% 372B + prolong2boundaries 2.35k 184μs 0.0% 78.5ns 0.00B 0.0% 0.00B + prolong2mortars 2.35k 114μs 0.0% 48.6ns 0.00B 0.0% 0.00B + mortar flux 2.35k 75.0μs 0.0% 31.9ns 0.00B 0.0% 0.00B + boundary flux 2.35k 63.2μs 0.0% 26.9ns 0.00B 0.0% 0.00B + source terms 2.35k 56.8μs 0.0% 24.2ns 0.00B 0.0% 0.00B + analyze solution 6 18.8ms 1.6% 3.14ms 96.3KiB 0.9% 16.0KiB + I/O 7 11.9ms 1.0% 1.70ms 9.08MiB 90.7% 1.30MiB + save solution 6 11.8ms 1.0% 1.96ms 9.06MiB 90.6% 1.51MiB + get element variables 6 103μs 0.0% 17.1μs 12.4KiB 0.1% 2.06KiB + ~I/O~ 7 16.0μs 0.0% 2.28μs 5.20KiB 0.1% 761B + save mesh 6 420ns 0.0% 70.0ns 0.00B 0.0% 0.00B ──────────────────────────────────────────────────────────────────────────────────── julia> mpi_sol = solve(mpi_ode, RDPK3SpFSAL35(), abstol=1.0e-4, reltol=1.0e-4, save_everystep=false, callback=callbacks); summary_callback() @@ -301,30 +304,29 @@ julia> mpi_sol = solve(mpi_ode, RDPK3SpFSAL35(), abstol=1.0e-4, reltol=1.0e-4, s ──────────────────────────────────────────────────────────────────────────────────── Trixi.jl Time Allocations ─────────────────────── ──────────────────────── - Tot / % measured: 1.51s / 78.9% 11.4MiB / 80.7% + Tot / % measured: 2.66s / 89.3% 12.3MiB / 82.1% Section ncalls time %tot avg alloc %tot avg ──────────────────────────────────────────────────────────────────────────────────── - rhs! 2.35k 1.15s 96.5% 492μs 9.33KiB 0.1% 4.07B - volume integral 2.35k 693ms 58.0% 295μs 0.00B 0.0% 0.00B - interface flux 2.35k 280ms 23.4% 119μs 0.00B 0.0% 0.00B - prolong2interfaces 2.35k 64.0ms 5.4% 27.2μs 0.00B 0.0% 0.00B - surface integral 2.35k 62.3ms 5.2% 26.5μs 0.00B 0.0% 0.00B - Jacobian 2.35k 25.1ms 2.1% 10.7μs 0.00B 0.0% 0.00B - reset ∂u/∂t 2.35k 24.6ms 2.1% 10.5μs 0.00B 0.0% 0.00B - ~rhs!~ 2.35k 4.55ms 0.4% 1.94μs 9.33KiB 0.1% 4.07B - prolong2mortars 2.35k 173μs 0.0% 73.7ns 0.00B 0.0% 0.00B - prolong2boundaries 2.35k 142μs 0.0% 60.3ns 0.00B 0.0% 0.00B - mortar flux 2.35k 73.5μs 0.0% 31.3ns 0.00B 0.0% 0.00B - boundary flux 2.35k 46.4μs 0.0% 19.8ns 0.00B 0.0% 0.00B - source terms 2.35k 43.8μs 0.0% 18.7ns 0.00B 0.0% 0.00B - analyze solution 6 25.1ms 2.1% 4.19ms 94.1KiB 1.0% 15.7KiB - I/O 7 16.2ms 1.4% 2.32ms 9.08MiB 98.9% 1.30MiB - save solution 6 15.7ms 1.3% 2.62ms 9.06MiB 98.7% 1.51MiB - get element variables 6 444μs 0.0% 74.1μs 13.3KiB 0.1% 2.22KiB - ~I/O~ 7 22.7μs 0.0% 3.25μs 5.20KiB 0.1% 761B - save mesh 6 536ns 0.0% 89.3ns 0.00B 0.0% 0.00B + rhs! 2.35k 2.34s 98.7% 997μs 927KiB 9.0% 404B + volume integral 2.35k 1.90s 80.2% 810μs 0.00B 0.0% 0.00B + interface flux 2.35k 269ms 11.3% 115μs 0.00B 0.0% 0.00B + prolong2interfaces 2.35k 59.0ms 2.5% 25.1μs 0.00B 0.0% 0.00B + surface integral 2.35k 57.3ms 2.4% 24.4μs 0.00B 0.0% 0.00B + Jacobian 2.35k 23.9ms 1.0% 10.2μs 0.00B 0.0% 0.00B + reset ∂u/∂t 2.35k 23.8ms 1.0% 10.1μs 0.00B 0.0% 0.00B + ~rhs!~ 2.35k 4.47ms 0.2% 1.90μs 927KiB 9.0% 404B + prolong2boundaries 2.35k 158μs 0.0% 67.1ns 0.00B 0.0% 0.00B + prolong2mortars 2.35k 104μs 0.0% 44.4ns 0.00B 0.0% 0.00B + mortar flux 2.35k 93.3μs 0.0% 39.7ns 0.00B 0.0% 0.00B + source terms 2.35k 62.5μs 0.0% 26.6ns 0.00B 0.0% 0.00B + boundary flux 2.35k 45.3μs 0.0% 19.3ns 0.00B 0.0% 0.00B + analyze solution 6 21.2ms 0.9% 3.53ms 96.5KiB 0.9% 16.1KiB + I/O 7 10.5ms 0.4% 1.49ms 9.08MiB 90.1% 1.30MiB + save solution 6 10.3ms 0.4% 1.72ms 9.06MiB 89.9% 1.51MiB + get element variables 6 104μs 0.0% 17.3μs 13.3KiB 0.1% 2.22KiB + ~I/O~ 7 13.9μs 0.0% 1.98μs 5.20KiB 0.1% 761B + save mesh 6 482ns 0.0% 80.3ns 0.00B 0.0% 0.00B ──────────────────────────────────────────────────────────────────────────────────── - =# diff --git a/src/solvers/dg.jl b/src/solvers/dg.jl index 86cbac1ee68..14aeda7e0b0 100644 --- a/src/solvers/dg.jl +++ b/src/solvers/dg.jl @@ -405,13 +405,19 @@ end # `@batch` from Polyester.jl or something similar. Using Polyester.jl # is probably the best option since everything will be handed over to # Chris Elrod, one of the best performance software engineers for Julia. - PtrArray(pointer(u_ode), - (StaticInt(nvariables(equations)), ntuple(_ -> StaticInt(nnodes(dg)), ndims(mesh))..., nelements(dg, cache))) - # (nvariables(equations), ntuple(_ -> nnodes(dg), ndims(mesh))..., nelements(dg, cache))) + u = PtrArray(pointer(u_ode), + (StaticInt(nvariables(equations)), ntuple(_ -> StaticInt(nnodes(dg)), ndims(mesh))..., nelements(dg, cache))) + # (nvariables(equations), ntuple(_ -> nnodes(dg), ndims(mesh))..., nelements(dg, cache))) else # The following version is reasonably fast and allows us to `resize!(u_ode, ...)`. - unsafe_wrap(Array{eltype(u_ode), ndims(mesh)+2}, pointer(u_ode), - (nvariables(equations), ntuple(_ -> nnodes(dg), ndims(mesh))..., nelements(dg, cache))) + u = unsafe_wrap(Array{eltype(u_ode), ndims(mesh)+2}, pointer(u_ode), + (nvariables(equations), ntuple(_ -> nnodes(dg), ndims(mesh))..., nelements(dg, cache))) + end + + if u_ode isa TrixiMPIArray + return TrixiMPIArray(u, u_ode.mpi_comm, u_ode.mpi_rank) + else + return u end end