From 26759a82a3382bef4929765f1413058d5f71e109 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Tue, 17 Dec 2024 17:36:16 +0100 Subject: [PATCH 001/158] Use Adapt.jl to change storage and element type In order to eventually support GPU computation we need to use Adapt.jl to allow GPU backend packages to swap out host-array types like `CuArray` with device-side types like `CuDeviceArray`. Additionally this will allow us to change the element type of a simulation by using `adapt(Array{Float32}`. Co-authored-by: Lars Christmann Co-authored-by: Benedict Geihe --- Project.toml | 2 + src/Trixi.jl | 2 + src/auxiliary/containers.jl | 84 +++++ src/auxiliary/vector_of_arrays.jl | 31 ++ .../semidiscretization_hyperbolic.jl | 27 +- src/solvers/dg.jl | 3 + src/solvers/dgsem/basis_lobatto_legendre.jl | 37 +++ src/solvers/dgsem_p4est/containers.jl | 314 ++++++++++++++---- .../dgsem_p4est/containers_parallel.jl | 114 +++++-- src/solvers/dgsem_p4est/dg_parallel.jl | 60 ++-- .../sort_boundary_conditions.jl | 17 +- test/Project.toml | 1 + test/test_p4est_2d.jl | 6 + test/test_unstructured_2d.jl | 7 + 14 files changed, 567 insertions(+), 138 deletions(-) create mode 100644 src/auxiliary/vector_of_arrays.jl diff --git a/Project.toml b/Project.toml index b53431fd171..204c4088f2f 100644 --- a/Project.toml +++ b/Project.toml @@ -4,6 +4,7 @@ authors = ["Michael Schlottke-Lakemper ", " version = "0.11.16-DEV" [deps] +Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" Accessors = "7d9f7c33-5ae7-4f3b-8dc6-eff91059b697" CodeTracking = "da1fd8a2-8d9e-5ec2-8556-3022fb5608a2" ConstructionBase = "187b0558-2788-49d3-abe0-74a17ed4e7c9" @@ -63,6 +64,7 @@ TrixiMakieExt = "Makie" TrixiNLsolveExt = "NLsolve" [compat] +Adapt = "4" Accessors = "0.1.36" CodeTracking = "1.0.5" ConstructionBase = "1.5" diff --git a/src/Trixi.jl b/src/Trixi.jl index 8f13835dbae..3844746b777 100644 --- a/src/Trixi.jl +++ b/src/Trixi.jl @@ -44,6 +44,7 @@ import SciMLBase: get_du, get_tmp_cache, u_modified!, using DelimitedFiles: readdlm using Downloads: Downloads +using Adapt: Adapt, adapt using CodeTracking: CodeTracking using ConstructionBase: ConstructionBase using DiffEqBase: DiffEqBase, get_tstops, get_tstops_array @@ -125,6 +126,7 @@ include("basic_types.jl") # Include all top-level source files include("auxiliary/auxiliary.jl") +include("auxiliary/vector_of_arrays.jl") include("auxiliary/mpi.jl") include("auxiliary/p4est.jl") include("auxiliary/t8code.jl") diff --git a/src/auxiliary/containers.jl b/src/auxiliary/containers.jl index 90650f6abcf..5738467ec6b 100644 --- a/src/auxiliary/containers.jl +++ b/src/auxiliary/containers.jl @@ -314,4 +314,88 @@ end function raw_copy!(c::AbstractContainer, from::Int, destination::Int) raw_copy!(c, c, from, from, destination) end + +# Trixi storage types must implement these two Adapt.jl methods +function Adapt.adapt_structure(to, c::AbstractContainer) + error("Interface: Must implement Adapt.adapt_structure(to, ::$(typeof(c)))") +end + +function Adapt.parent_type(C::Type{<:AbstractContainer}) + error("Interface: Must implement Adapt.parent_type(::Type{$C}") +end + +function Adapt.unwrap_type(C::Type{<:AbstractContainer}) + return Adapt.unwrap_type(Adapt.parent_type(C)) +end + +# TODO: Upstream to Adapt +function storage_type(x) + return storage_type(typeof(x)) +end + +function storage_type(T::Type) + error("Interface: Must implement storage_type(::Type{$T}") +end + +function storage_type(::Type{<:Array}) + Array +end + +function storage_type(C::Type{<:AbstractContainer}) + return storage_type(Adapt.unwrap_type(C)) +end + +# For some storage backends like CUDA.jl, empty arrays do seem to simply be +# null pointers which can cause `unsafe_wrap` to fail when calling +# Adapt.adapt (ArgumentError, see +# https://github.com/JuliaGPU/CUDA.jl/blob/v5.4.2/src/array.jl#L212-L229). +# To circumvent this, on length zero arrays this allocates +# a separate empty array instead of wrapping. +# However, since zero length arrays are not used in calculations, +# it should be okay if the underlying storage vectors and wrapped arrays +# are not the same as long as they are properly wrapped when `resize!`d etc. +function unsafe_wrap_or_alloc(to, vector, size) + if length(vector) == 0 + return similar(vector, size) + else + return unsafe_wrap(to, pointer(vector), size) + end +end + +struct TrixiAdaptor{Storage, Real} end + +function trixi_adapt(storage, real, x) + adapt(TrixiAdaptor{storage, real}(), x) +end + +# Custom rules +# 1. handling of StaticArrays +function Adapt.adapt_storage(::TrixiAdaptor{<:Any, Real}, + x::StaticArrays.StaticArray{S, T, N}) where {Real, S, T, N} + StaticArrays.similar_type(x, Real)(x) +end + +# 2. Handling of Arrays +function Adapt.adapt_storage(::TrixiAdaptor{Storage, Real}, + x::AbstractArray{T}) where {Storage, Real, + T <: AbstractFloat} + adapt(Storage{Real}, x) +end + +function Adapt.adapt_storage(::TrixiAdaptor{Storage, Real}, + x::AbstractArray{T}) where {Storage, Real, + T <: StaticArrays.StaticArray} + adapt(Storage{StaticArrays.similar_type(T, Real)}, x) +end + +function Adapt.adapt_storage(::TrixiAdaptor{Storage, Real}, + x::AbstractArray) where {Storage, Real} + adapt(Storage, x) +end + +# 3. TODO: Should we have a fallback? But that would imply implementing things for NamedTuple again + +function unsafe_wrap_or_alloc(::TrixiAdaptor{Storage}, vec, size) where {Storage} + return unsafe_wrap_or_alloc(Storage, vec, size) +end end # @muladd diff --git a/src/auxiliary/vector_of_arrays.jl b/src/auxiliary/vector_of_arrays.jl new file mode 100644 index 00000000000..0fa8dd7f1ec --- /dev/null +++ b/src/auxiliary/vector_of_arrays.jl @@ -0,0 +1,31 @@ +# By default, Julia/LLVM does not use fused multiply-add operations (FMAs). +# Since these FMAs can increase the performance of many numerical algorithms, +# we need to opt-in explicitly. +# See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. +@muladd begin +#! format: noindent + +# Wraps a Vector of Arrays, forwards `getindex` to the underlying Vector. +# Implements `Adapt.adapt_structure` to allow offloading to the GPU which is +# not possible for a plain Vector of Arrays. +struct VecOfArrays{T <: AbstractArray} + arrays::Vector{T} +end +Base.getindex(v::VecOfArrays, i::Int) = Base.getindex(v.arrays, i) +Base.IndexStyle(v::VecOfArrays) = Base.IndexStyle(v.arrays) +Base.size(v::VecOfArrays) = Base.size(v.arrays) +Base.length(v::VecOfArrays) = Base.length(v.arrays) +Base.eltype(v::VecOfArrays{T}) where {T} = T +function Adapt.adapt_structure(to, v::VecOfArrays) + return VecOfArrays([Adapt.adapt(to, arr) for arr in v.arrays]) +end +function Adapt.parent_type(::Type{<:VecOfArrays{T}}) where {T} + return T +end +function Adapt.unwrap_type(A::Type{<:VecOfArrays}) + Adapt.unwrap_type(Adapt.parent_type(A)) +end +function Base.convert(::Type{<:VecOfArrays}, v::Vector{<:AbstractArray}) + VecOfArrays(v) +end +end # @muladd diff --git a/src/semidiscretization/semidiscretization_hyperbolic.jl b/src/semidiscretization/semidiscretization_hyperbolic.jl index c909196b5db..f86be5dc069 100644 --- a/src/semidiscretization/semidiscretization_hyperbolic.jl +++ b/src/semidiscretization/semidiscretization_hyperbolic.jl @@ -27,25 +27,6 @@ mutable struct SemidiscretizationHyperbolic{Mesh, Equations, InitialCondition, solver::Solver cache::Cache performance_counter::PerformanceCounter - - function SemidiscretizationHyperbolic{Mesh, Equations, InitialCondition, - BoundaryConditions, SourceTerms, Solver, - Cache}(mesh::Mesh, equations::Equations, - initial_condition::InitialCondition, - boundary_conditions::BoundaryConditions, - source_terms::SourceTerms, - solver::Solver, - cache::Cache) where {Mesh, Equations, - InitialCondition, - BoundaryConditions, - SourceTerms, - Solver, - Cache} - performance_counter = PerformanceCounter() - - new(mesh, equations, initial_condition, boundary_conditions, source_terms, - solver, cache, performance_counter) - end end """ @@ -74,6 +55,8 @@ function SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver check_periodicity_mesh_boundary_conditions(mesh, _boundary_conditions) + performance_counter = PerformanceCounter() + SemidiscretizationHyperbolic{typeof(mesh), typeof(equations), typeof(initial_condition), typeof(_boundary_conditions), typeof(source_terms), @@ -81,9 +64,13 @@ function SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver initial_condition, _boundary_conditions, source_terms, solver, - cache) + cache, + performance_counter) end +# @eval due to @muladd +@eval Adapt.@adapt_structure(SemidiscretizationHyperbolic) + # Create a new semidiscretization but change some parameters compared to the input. # `Base.similar` follows a related concept but would require us to `copy` the `mesh`, # which would impact the performance. Instead, `SciMLBase.remake` has exactly the diff --git a/src/solvers/dg.jl b/src/solvers/dg.jl index 20b989da334..28774e0029a 100644 --- a/src/solvers/dg.jl +++ b/src/solvers/dg.jl @@ -400,6 +400,9 @@ struct DG{Basis, Mortar, SurfaceIntegral, VolumeIntegral} volume_integral::VolumeIntegral end +# @eval due to @muladd +@eval Adapt.@adapt_structure(DG) + function Base.show(io::IO, dg::DG) @nospecialize dg # reduce precompilation time diff --git a/src/solvers/dgsem/basis_lobatto_legendre.jl b/src/solvers/dgsem/basis_lobatto_legendre.jl index 777348aa8ce..9647f172e20 100644 --- a/src/solvers/dgsem/basis_lobatto_legendre.jl +++ b/src/solvers/dgsem/basis_lobatto_legendre.jl @@ -34,6 +34,32 @@ struct LobattoLegendreBasis{RealT <: Real, NNODES, # negative adjoint wrt the SBP dot product end +function Adapt.adapt_structure(to, basis::LobattoLegendreBasis) + inverse_vandermonde_legendre = adapt(to, basis.inverse_vandermonde_legendre) + RealT = eltype(inverse_vandermonde_legendre) + + nodes = SVector{<:Any, RealT}(basis.nodes) + weights = SVector{<:Any, RealT}(basis.weights) + inverse_weights = SVector{<:Any, RealT}(basis.inverse_weights) + boundary_interpolation = adapt(to, basis.boundary_interpolation) + derivative_matrix = adapt(to, basis.derivative_matrix) + derivative_split = adapt(to, basis.derivative_split) + derivative_split_transpose = adapt(to, basis.derivative_split_transpose) + derivative_dhat = adapt(to, basis.derivative_dhat) + return LobattoLegendreBasis{RealT, nnodes(basis), typeof(nodes), + typeof(inverse_vandermonde_legendre), + typeof(boundary_interpolation), + typeof(derivative_matrix)}(nodes, + weights, + inverse_weights, + inverse_vandermonde_legendre, + boundary_interpolation, + derivative_matrix, + derivative_split, + derivative_split_transpose, + derivative_dhat) +end + function LobattoLegendreBasis(RealT, polydeg::Integer) nnodes_ = polydeg + 1 @@ -155,6 +181,17 @@ struct LobattoLegendreMortarL2{RealT <: Real, NNODES, reverse_lower::ReverseMatrix end +function Adapt.adapt_structure(to, mortar::LobattoLegendreMortarL2) + forward_upper = adapt(to, mortar.forward_upper) + forward_lower = adapt(to, mortar.forward_lower) + reverse_upper = adapt(to, mortar.reverse_upper) + reverse_lower = adapt(to, mortar.reverse_lower) + return LobattoLegendreMortarL2{eltype(forward_upper), nnodes(mortar), + typeof(forward_upper), + typeof(reverse_upper)}(forward_upper, forward_lower, + reverse_upper, reverse_lower) +end + function MortarL2(basis::LobattoLegendreBasis) RealT = real(basis) nnodes_ = nnodes(basis) diff --git a/src/solvers/dgsem_p4est/containers.jl b/src/solvers/dgsem_p4est/containers.jl index a070db6b701..68e5b3d758b 100644 --- a/src/solvers/dgsem_p4est/containers.jl +++ b/src/solvers/dgsem_p4est/containers.jl @@ -6,25 +6,31 @@ #! format: noindent mutable struct P4estElementContainer{NDIMS, RealT <: Real, uEltype <: Real, NDIMSP1, - NDIMSP2, NDIMSP3} <: AbstractContainer + NDIMSP2, NDIMSP3, + ArrayNDIMSP1 <: DenseArray{RealT, NDIMSP1}, + ArrayNDIMSP2 <: DenseArray{RealT, NDIMSP2}, + ArrayNDIMSP3 <: DenseArray{RealT, NDIMSP3}, + VectorRealT <: DenseVector{RealT}, + VectoruEltype <: DenseVector{uEltype}} <: + AbstractContainer # Physical coordinates at each node - node_coordinates::Array{RealT, NDIMSP2} # [orientation, node_i, node_j, node_k, element] + node_coordinates::ArrayNDIMSP2 # [orientation, node_i, node_j, node_k, element] # Jacobian matrix of the transformation # [jacobian_i, jacobian_j, node_i, node_j, node_k, element] where jacobian_i is the first index of the Jacobian matrix,... - jacobian_matrix::Array{RealT, NDIMSP3} + jacobian_matrix::ArrayNDIMSP3 # Contravariant vectors, scaled by J, in Kopriva's blue book called Ja^i_n (i index, n dimension) - contravariant_vectors::Array{RealT, NDIMSP3} # [dimension, index, node_i, node_j, node_k, element] + contravariant_vectors::ArrayNDIMSP3 # [dimension, index, node_i, node_j, node_k, element] # 1/J where J is the Jacobian determinant (determinant of Jacobian matrix) - inverse_jacobian::Array{RealT, NDIMSP1} # [node_i, node_j, node_k, element] + inverse_jacobian::ArrayNDIMSP1 # [node_i, node_j, node_k, element] # Buffer for calculated surface flux - surface_flux_values::Array{uEltype, NDIMSP2} # [variable, i, j, direction, element] + surface_flux_values::ArrayNDIMSP2 # [variable, i, j, direction, element] # internal `resize!`able storage - _node_coordinates::Vector{RealT} - _jacobian_matrix::Vector{RealT} - _contravariant_vectors::Vector{RealT} - _inverse_jacobian::Vector{RealT} - _surface_flux_values::Vector{uEltype} + _node_coordinates::VectorRealT + _jacobian_matrix::VectorRealT + _contravariant_vectors::VectorRealT + _inverse_jacobian::VectorRealT + _surface_flux_values::VectoruEltype end @inline function nelements(elements::P4estElementContainer) @@ -36,7 +42,7 @@ end RealT, uEltype } - uEltype + return uEltype end # Only one-dimensional `Array`s are `resize!`able in Julia. @@ -51,28 +57,30 @@ function Base.resize!(elements::P4estElementContainer, capacity) n_dims = ndims(elements) n_nodes = size(elements.node_coordinates, 2) n_variables = size(elements.surface_flux_values, 1) + ArrayType = storage_type(elements) resize!(_node_coordinates, n_dims * n_nodes^n_dims * capacity) - elements.node_coordinates = unsafe_wrap(Array, pointer(_node_coordinates), + elements.node_coordinates = unsafe_wrap(ArrayType, pointer(_node_coordinates), (n_dims, ntuple(_ -> n_nodes, n_dims)..., capacity)) resize!(_jacobian_matrix, n_dims^2 * n_nodes^n_dims * capacity) - elements.jacobian_matrix = unsafe_wrap(Array, pointer(_jacobian_matrix), + elements.jacobian_matrix = unsafe_wrap(ArrayType, pointer(_jacobian_matrix), (n_dims, n_dims, ntuple(_ -> n_nodes, n_dims)..., capacity)) resize!(_contravariant_vectors, length(_jacobian_matrix)) - elements.contravariant_vectors = unsafe_wrap(Array, pointer(_contravariant_vectors), + elements.contravariant_vectors = unsafe_wrap(ArrayType, + pointer(_contravariant_vectors), size(elements.jacobian_matrix)) resize!(_inverse_jacobian, n_nodes^n_dims * capacity) - elements.inverse_jacobian = unsafe_wrap(Array, pointer(_inverse_jacobian), + elements.inverse_jacobian = unsafe_wrap(ArrayType, pointer(_inverse_jacobian), (ntuple(_ -> n_nodes, n_dims)..., capacity)) resize!(_surface_flux_values, n_variables * n_nodes^(n_dims - 1) * (n_dims * 2) * capacity) - elements.surface_flux_values = unsafe_wrap(Array, pointer(_surface_flux_values), + elements.surface_flux_values = unsafe_wrap(ArrayType, pointer(_surface_flux_values), (n_variables, ntuple(_ -> n_nodes, n_dims - 1)..., n_dims * 2, capacity)) @@ -117,33 +125,104 @@ function init_elements(mesh::Union{P4estMesh{NDIMS, NDIMS, RealT}, NDIMS * 2, nelements)) elements = P4estElementContainer{NDIMS, RealT, uEltype, NDIMS + 1, NDIMS + 2, - NDIMS + 3}(node_coordinates, jacobian_matrix, - contravariant_vectors, - inverse_jacobian, surface_flux_values, - _node_coordinates, _jacobian_matrix, - _contravariant_vectors, - _inverse_jacobian, _surface_flux_values) + NDIMS + 3, Array{RealT, NDIMS + 1}, + Array{RealT, NDIMS + 2}, Array{RealT, NDIMS + 3}, + Vector{RealT}, Vector{uEltype}}(node_coordinates, + jacobian_matrix, + contravariant_vectors, + inverse_jacobian, + surface_flux_values, + _node_coordinates, + _jacobian_matrix, + _contravariant_vectors, + _inverse_jacobian, + _surface_flux_values) init_elements!(elements, mesh, basis) return elements end -mutable struct P4estInterfaceContainer{NDIMS, uEltype <: Real, NDIMSP2} <: +function Adapt.parent_type(::Type{<:P4estElementContainer{<:Any, <:Any, <:Any, <:Any, + <:Any, <:Any, ArrayT}}) where {ArrayT} + ArrayT +end + +# Manual adapt_structure since we have aliasing memory +function Adapt.adapt_structure(to, + elements::P4estElementContainer{NDIMS}) where {NDIMS} + # Adapt underlying storage + _node_coordinates = adapt(to, elements._node_coordinates) + _jacobian_matrix = adapt(to, elements._jacobian_matrix) + _contravariant_vectors = adapt(to, elements._contravariant_vectors) + _inverse_jacobian = adapt(to, elements._inverse_jacobian) + _surface_flux_values = adapt(to, elements._surface_flux_values) + + RealT = eltype(_inverse_jacobian) + uEltype = eltype(_surface_flux_values) + + # Wrap arrays again + node_coordinates = unsafe_wrap_or_alloc(to, _node_coordinates, + size(elements.node_coordinates)) + jacobian_matrix = unsafe_wrap_or_alloc(to, _jacobian_matrix, + size(elements.jacobian_matrix)) + contravariant_vectors = unsafe_wrap_or_alloc(to, _contravariant_vectors, + size(jacobian_matrix)) + inverse_jacobian = unsafe_wrap_or_alloc(to, _inverse_jacobian, + size(elements.inverse_jacobian)) + surface_flux_values = unsafe_wrap_or_alloc(to, _surface_flux_values, + size(elements.surface_flux_values)) + + new_type_params = (NDIMS, + RealT, + uEltype, + NDIMS + 1, + NDIMS + 2, + NDIMS + 3, + typeof(inverse_jacobian), # ArrayNDIMSP1 + typeof(node_coordinates), # ArrayNDIMSP2 + typeof(jacobian_matrix), # ArrayNDIMSP3 + typeof(_node_coordinates), # VectorRealT + typeof(_surface_flux_values)) # VectoruEltype + return P4estElementContainer{new_type_params...}(node_coordinates, + jacobian_matrix, + contravariant_vectors, + inverse_jacobian, + surface_flux_values, + _node_coordinates, + _jacobian_matrix, + _contravariant_vectors, + _inverse_jacobian, + _surface_flux_values) +end + +mutable struct P4estInterfaceContainer{NDIMS, uEltype <: Real, NDIMSP2, + uArray <: DenseArray{uEltype, NDIMSP2}, + IdsMatrix <: DenseMatrix{Int}, + IndicesMatrix <: + DenseMatrix{NTuple{NDIMS, Symbol}}, + uVector <: DenseVector{uEltype}, + IdsVector <: DenseVector{Int}, + IndicesVector <: + DenseVector{NTuple{NDIMS, Symbol}}} <: AbstractContainer - u::Array{uEltype, NDIMSP2} # [primary/secondary, variable, i, j, interface] - neighbor_ids::Matrix{Int} # [primary/secondary, interface] - node_indices::Matrix{NTuple{NDIMS, Symbol}} # [primary/secondary, interface] + u::uArray # [primary/secondary, variable, i, j, interface] + neighbor_ids::IdsMatrix # [primary/secondary, interface] + node_indices::IndicesMatrix # [primary/secondary, interface] # internal `resize!`able storage - _u::Vector{uEltype} - _neighbor_ids::Vector{Int} - _node_indices::Vector{NTuple{NDIMS, Symbol}} + _u::uVector + _neighbor_ids::IdsVector + _node_indices::IndicesVector end @inline function ninterfaces(interfaces::P4estInterfaceContainer) size(interfaces.neighbor_ids, 2) end @inline Base.ndims(::P4estInterfaceContainer{NDIMS}) where {NDIMS} = NDIMS +@inline function Base.eltype(::P4estInterfaceContainer{NDIMS, uEltype}) where {NDIMS, + uEltype} + uEltype +end # See explanation of Base.resize! for the element container function Base.resize!(interfaces::P4estInterfaceContainer, capacity) @@ -152,17 +231,20 @@ function Base.resize!(interfaces::P4estInterfaceContainer, capacity) n_dims = ndims(interfaces) n_nodes = size(interfaces.u, 3) n_variables = size(interfaces.u, 2) + ArrayType = storage_type(interfaces) resize!(_u, 2 * n_variables * n_nodes^(n_dims - 1) * capacity) - interfaces.u = unsafe_wrap(Array, pointer(_u), + interfaces.u = unsafe_wrap(ArrayType, pointer(_u), (2, n_variables, ntuple(_ -> n_nodes, n_dims - 1)..., capacity)) resize!(_neighbor_ids, 2 * capacity) - interfaces.neighbor_ids = unsafe_wrap(Array, pointer(_neighbor_ids), (2, capacity)) + interfaces.neighbor_ids = unsafe_wrap(ArrayType, pointer(_neighbor_ids), + (2, capacity)) resize!(_node_indices, 2 * capacity) - interfaces.node_indices = unsafe_wrap(Array, pointer(_node_indices), (2, capacity)) + interfaces.node_indices = unsafe_wrap(ArrayType, pointer(_node_indices), + (2, capacity)) return nothing end @@ -189,10 +271,15 @@ function init_interfaces(mesh::Union{P4estMesh, P4estMeshView, T8codeMesh}, equa _node_indices = Vector{NTuple{NDIMS, Symbol}}(undef, 2 * n_interfaces) node_indices = unsafe_wrap(Array, pointer(_node_indices), (2, n_interfaces)) - interfaces = P4estInterfaceContainer{NDIMS, uEltype, NDIMS + 2}(u, neighbor_ids, - node_indices, - _u, _neighbor_ids, - _node_indices) + interfaces = P4estInterfaceContainer{NDIMS, uEltype, NDIMS + 2, + typeof(u), typeof(neighbor_ids), + typeof(node_indices), typeof(_u), + typeof(_neighbor_ids), typeof(_node_indices)}(u, + neighbor_ids, + node_indices, + _u, + _neighbor_ids, + _node_indices) init_interfaces!(interfaces, mesh) @@ -205,21 +292,58 @@ function init_interfaces!(interfaces, mesh::Union{P4estMesh, P4estMeshView}) return interfaces end -mutable struct P4estBoundaryContainer{NDIMS, uEltype <: Real, NDIMSP1} <: +function Adapt.parent_type(::Type{<:P4estInterfaceContainer{<:Any, <:Any, <:Any, + ArrayT}}) where {ArrayT} + ArrayT +end + +# Manual adapt_structure since we have aliasing memory +function Adapt.adapt_structure(to, interfaces::P4estInterfaceContainer) + # Adapt underlying storage + _u = adapt(to, interfaces._u) + _neighbor_ids = adapt(to, interfaces._neighbor_ids) + _node_indices = adapt(to, interfaces._node_indices) + # Wrap arrays again + u = unsafe_wrap_or_alloc(to, _u, size(interfaces.u)) + neighbor_ids = unsafe_wrap_or_alloc(to, _neighbor_ids, + size(interfaces.neighbor_ids)) + node_indices = unsafe_wrap_or_alloc(to, _node_indices, + size(interfaces.node_indices)) + + NDIMS = ndims(interfaces) + new_type_params = (NDIMS, + eltype(_u), + NDIMS + 2, + typeof(u), typeof(neighbor_ids), typeof(node_indices), + typeof(_u), typeof(_neighbor_ids), typeof(_node_indices)) + return P4estInterfaceContainer{new_type_params...}(u, neighbor_ids, node_indices, + _u, _neighbor_ids, _node_indices) +end + +mutable struct P4estBoundaryContainer{NDIMS, uEltype <: Real, NDIMSP1, + uArray <: DenseArray{uEltype, NDIMSP1}, + IdsVector <: DenseVector{Int}, + IndicesVector <: + DenseVector{NTuple{NDIMS, Symbol}}, + uVector <: DenseVector{uEltype}} <: AbstractContainer - u::Array{uEltype, NDIMSP1} # [variables, i, j, boundary] - neighbor_ids::Vector{Int} # [boundary] - node_indices::Vector{NTuple{NDIMS, Symbol}} # [boundary] + u::uArray # [variables, i, j, boundary] + neighbor_ids::IdsVector # [boundary] + node_indices::IndicesVector # [boundary] name::Vector{Symbol} # [boundary] # internal `resize!`able storage - _u::Vector{uEltype} + _u::uVector end @inline function nboundaries(boundaries::P4estBoundaryContainer) length(boundaries.neighbor_ids) end @inline Base.ndims(::P4estBoundaryContainer{NDIMS}) where {NDIMS} = NDIMS +@inline function Base.eltype(::P4estBoundaryContainer{NDIMS, uEltype}) where {NDIMS, + uEltype} + uEltype +end # See explanation of Base.resize! for the element container function Base.resize!(boundaries::P4estBoundaryContainer, capacity) @@ -228,9 +352,10 @@ function Base.resize!(boundaries::P4estBoundaryContainer, capacity) n_dims = ndims(boundaries) n_nodes = size(boundaries.u, 2) n_variables = size(boundaries.u, 1) + ArrayType = storage_type(boundaries) resize!(_u, n_variables * n_nodes^(n_dims - 1) * capacity) - boundaries.u = unsafe_wrap(Array, pointer(_u), + boundaries.u = unsafe_wrap(ArrayType, pointer(_u), (n_variables, ntuple(_ -> n_nodes, n_dims - 1)..., capacity)) @@ -263,9 +388,11 @@ function init_boundaries(mesh::Union{P4estMesh, P4estMeshView, T8codeMesh}, equa node_indices = Vector{NTuple{NDIMS, Symbol}}(undef, n_boundaries) names = Vector{Symbol}(undef, n_boundaries) - boundaries = P4estBoundaryContainer{NDIMS, uEltype, NDIMS + 1}(u, neighbor_ids, - node_indices, names, - _u) + boundaries = P4estBoundaryContainer{NDIMS, uEltype, NDIMS + 1, typeof(u), + typeof(neighbor_ids), typeof(node_indices), + typeof(_u)}(u, neighbor_ids, + node_indices, names, + _u) if n_boundaries > 0 init_boundaries!(boundaries, mesh) @@ -312,6 +439,25 @@ function init_boundaries_iter_face_inner(info_pw, boundaries, boundary_id, mesh) return nothing end +function Adapt.parent_type(::Type{<:P4estBoundaryContainer{<:Any, <:Any, <:Any, ArrayT}}) where {ArrayT} + ArrayT +end + +# Manual adapt_structure since we have aliasing memory +function Adapt.adapt_structure(to, boundaries::P4estBoundaryContainer) + _u = adapt(to, boundaries._u) + u = unsafe_wrap_or_alloc(to, _u, size(boundaries.u)) + neighbor_ids = adapt(to, boundaries.neighbor_ids) + node_indices = adapt(to, boundaries.node_indices) + name = boundaries.name + + NDIMS = ndims(boundaries) + return P4estBoundaryContainer{NDIMS, eltype(_u), NDIMS + 1, typeof(u), + typeof(neighbor_ids), typeof(node_indices), + typeof(_u)}(u, neighbor_ids, node_indices, + name, _u) +end + # Container data structure (structure-of-arrays style) for DG L2 mortars # # The positions used in `neighbor_ids` are 1:3 (in 2D) or 1:5 (in 3D), where 1:2 (in 2D) @@ -337,20 +483,32 @@ end # │ └─────────────┴─────────────┘ └───────────────────────────┘ # │ # ⋅────> ξ -mutable struct P4estMortarContainer{NDIMS, uEltype <: Real, NDIMSP1, NDIMSP3} <: +mutable struct P4estMortarContainer{NDIMS, uEltype <: Real, NDIMSP1, NDIMSP3, + uArray <: DenseArray{uEltype, NDIMSP3}, + IdsMatrix <: DenseMatrix{Int}, + IndicesMatrix <: + DenseMatrix{NTuple{NDIMS, Symbol}}, + uVector <: DenseVector{uEltype}, + IdsVector <: DenseVector{Int}, + IndicesVector <: + DenseVector{NTuple{NDIMS, Symbol}}} <: AbstractContainer - u::Array{uEltype, NDIMSP3} # [small/large side, variable, position, i, j, mortar] - neighbor_ids::Matrix{Int} # [position, mortar] - node_indices::Matrix{NTuple{NDIMS, Symbol}} # [small/large, mortar] + u::uArray # [small/large side, variable, position, i, j, mortar] + neighbor_ids::IdsMatrix # [position, mortar] + node_indices::IndicesMatrix # [small/large, mortar] # internal `resize!`able storage - _u::Vector{uEltype} - _neighbor_ids::Vector{Int} - _node_indices::Vector{NTuple{NDIMS, Symbol}} + _u::uVector + _neighbor_ids::IdsVector + _node_indices::IndicesVector end @inline nmortars(mortars::P4estMortarContainer) = size(mortars.neighbor_ids, 2) @inline Base.ndims(::P4estMortarContainer{NDIMS}) where {NDIMS} = NDIMS +@inline function Base.eltype(::P4estMortarContainer{NDIMS, uEltype}) where {NDIMS, + uEltype} + uEltype +end # See explanation of Base.resize! for the element container function Base.resize!(mortars::P4estMortarContainer, capacity) @@ -359,18 +517,19 @@ function Base.resize!(mortars::P4estMortarContainer, capacity) n_dims = ndims(mortars) n_nodes = size(mortars.u, 4) n_variables = size(mortars.u, 2) + ArrayType = storage_type(mortars) resize!(_u, 2 * n_variables * 2^(n_dims - 1) * n_nodes^(n_dims - 1) * capacity) - mortars.u = unsafe_wrap(Array, pointer(_u), + mortars.u = unsafe_wrap(ArrayType, pointer(_u), (2, n_variables, 2^(n_dims - 1), ntuple(_ -> n_nodes, n_dims - 1)..., capacity)) resize!(_neighbor_ids, (2^(n_dims - 1) + 1) * capacity) - mortars.neighbor_ids = unsafe_wrap(Array, pointer(_neighbor_ids), + mortars.neighbor_ids = unsafe_wrap(ArrayType, pointer(_neighbor_ids), (2^(n_dims - 1) + 1, capacity)) resize!(_node_indices, 2 * capacity) - mortars.node_indices = unsafe_wrap(Array, pointer(_node_indices), (2, capacity)) + mortars.node_indices = unsafe_wrap(ArrayType, pointer(_node_indices), (2, capacity)) return nothing end @@ -398,12 +557,15 @@ function init_mortars(mesh::Union{P4estMesh, P4estMeshView, T8codeMesh}, equatio _node_indices = Vector{NTuple{NDIMS, Symbol}}(undef, 2 * n_mortars) node_indices = unsafe_wrap(Array, pointer(_node_indices), (2, n_mortars)) - mortars = P4estMortarContainer{NDIMS, uEltype, NDIMS + 1, NDIMS + 3}(u, - neighbor_ids, - node_indices, - _u, - _neighbor_ids, - _node_indices) + mortars = P4estMortarContainer{NDIMS, uEltype, NDIMS + 1, NDIMS + 3, typeof(u), + typeof(neighbor_ids), typeof(node_indices), + typeof(_u), typeof(_neighbor_ids), + typeof(_node_indices)}(u, + neighbor_ids, + node_indices, + _u, + _neighbor_ids, + _node_indices) if n_mortars > 0 init_mortars!(mortars, mesh) @@ -418,6 +580,34 @@ function init_mortars!(mortars, mesh::Union{P4estMesh, P4estMeshView}) return mortars end +function Adapt.parent_type(::Type{<:P4estMortarContainer{<:Any, <:Any, <:Any, <:Any, + ArrayT}}) where {ArrayT} + ArrayT +end + +# Manual adapt_structure since we have aliasing memory +function Adapt.adapt_structure(to, mortars::P4estMortarContainer) + # Adapt underlying storage + _u = adapt(to, mortars._u) + _neighbor_ids = adapt(to, mortars._neighbor_ids) + _node_indices = adapt(to, mortars._node_indices) + + # Wrap arrays again + u = unsafe_wrap_or_alloc(to, _u, size(mortars.u)) + neighbor_ids = unsafe_wrap_or_alloc(to, _neighbor_ids, size(mortars.neighbor_ids)) + node_indices = unsafe_wrap_or_alloc(to, _node_indices, size(mortars.node_indices)) + + NDIMS = ndims(mortars) + new_type_params = (NDIMS, + eltype(_u), + NDIMS + 1, + NDIMS + 3, + typeof(u), typeof(neighbor_ids), typeof(node_indices), + typeof(_u), typeof(_neighbor_ids), typeof(_node_indices)) + return P4estMortarContainer{new_type_params...}(u, neighbor_ids, node_indices, + _u, _neighbor_ids, _node_indices) +end + function reinitialize_containers!(mesh::P4estMesh, equations, dg::DGSEM, cache) # Re-initialize elements container @unpack elements = cache diff --git a/src/solvers/dgsem_p4est/containers_parallel.jl b/src/solvers/dgsem_p4est/containers_parallel.jl index 676b37efff3..cb9cd1ffc95 100644 --- a/src/solvers/dgsem_p4est/containers_parallel.jl +++ b/src/solvers/dgsem_p4est/containers_parallel.jl @@ -5,15 +5,19 @@ @muladd begin #! format: noindent -mutable struct P4estMPIInterfaceContainer{NDIMS, uEltype <: Real, NDIMSP2} <: +mutable struct P4estMPIInterfaceContainer{NDIMS, uEltype <: Real, NDIMSP2, + uArray <: DenseArray{uEltype, NDIMSP2}, + VecInt <: DenseVector{Int}, + IndicesVector <: + DenseVector{NTuple{NDIMS, Symbol}}, + uVector <: DenseVector{uEltype}} <: AbstractContainer - u::Array{uEltype, NDIMSP2} # [primary/secondary, variable, i, j, interface] - local_neighbor_ids::Vector{Int} # [interface] - node_indices::Vector{NTuple{NDIMS, Symbol}} # [interface] - local_sides::Vector{Int} # [interface] - + u::uArray # [primary/secondary, variable, i, j, interface] + local_neighbor_ids::VecInt # [interface] + node_indices::IndicesVector # [interface] + local_sides::VecInt # [interface] # internal `resize!`able storage - _u::Vector{uEltype} + _u::uVector end @inline function nmpiinterfaces(interfaces::P4estMPIInterfaceContainer) @@ -27,9 +31,10 @@ function Base.resize!(mpi_interfaces::P4estMPIInterfaceContainer, capacity) n_dims = ndims(mpi_interfaces) n_nodes = size(mpi_interfaces.u, 3) n_variables = size(mpi_interfaces.u, 2) + ArrayType = storage_type(mpi_interfaces) resize!(_u, 2 * n_variables * n_nodes^(n_dims - 1) * capacity) - mpi_interfaces.u = unsafe_wrap(Array, pointer(_u), + mpi_interfaces.u = unsafe_wrap(ArrayType, pointer(_u), (2, n_variables, ntuple(_ -> n_nodes, n_dims - 1)..., capacity)) @@ -64,11 +69,13 @@ function init_mpi_interfaces(mesh::Union{ParallelP4estMesh, ParallelT8codeMesh}, local_sides = Vector{Int}(undef, n_mpi_interfaces) - mpi_interfaces = P4estMPIInterfaceContainer{NDIMS, uEltype, NDIMS + 2}(u, - local_neighbor_ids, - node_indices, - local_sides, - _u) + mpi_interfaces = P4estMPIInterfaceContainer{NDIMS, uEltype, NDIMS + 2, + typeof(u), typeof(local_neighbor_ids), + typeof(node_indices), typeof(_u)}(u, + local_neighbor_ids, + node_indices, + local_sides, + _u) init_mpi_interfaces!(mpi_interfaces, mesh) @@ -81,6 +88,32 @@ function init_mpi_interfaces!(mpi_interfaces, mesh::ParallelP4estMesh) return mpi_interfaces end +function Adapt.parent_type(::Type{<:Trixi.P4estMPIInterfaceContainer{<:Any, <:Any, + <:Any, A}}) where {A} + return A +end + +# Manual adapt_structure since we have aliasing memory +function Adapt.adapt_structure(to, mpi_interfaces::P4estMPIInterfaceContainer) + # Adapt Vectors and underlying storage + _u = adapt(to, mpi_interfaces._u) + local_neighbor_ids = adapt(to, mpi_interfaces.local_neighbor_ids) + node_indices = adapt(to, mpi_interfaces.node_indices) + local_sides = adapt(to, mpi_interfaces.local_sides) + + # Wrap array again + u = unsafe_wrap_or_alloc(to, _u, size(mpi_interfaces.u)) + + NDIMS = ndims(mpi_interfaces) + return P4estMPIInterfaceContainer{NDIMS, eltype(u), + NDIMS + 2, + typeof(u), typeof(local_neighbor_ids), + typeof(node_indices), typeof(_u)}(u, + local_neighbor_ids, + node_indices, + local_sides, _u) +end + # Container data structure (structure-of-arrays style) for DG L2 mortars # # Similar to `P4estMortarContainer`. The field `neighbor_ids` has been split up into @@ -88,14 +121,17 @@ end # available elements belonging to a particular MPI mortar. Furthermore, `normal_directions` holds # the normal vectors on the surface of the small elements for each mortar. mutable struct P4estMPIMortarContainer{NDIMS, uEltype <: Real, RealT <: Real, NDIMSP1, - NDIMSP2, NDIMSP3} <: AbstractContainer - u::Array{uEltype, NDIMSP3} # [small/large side, variable, position, i, j, mortar] - local_neighbor_ids::Vector{Vector{Int}} # [mortar][ids] - local_neighbor_positions::Vector{Vector{Int}} # [mortar][positions] - node_indices::Matrix{NTuple{NDIMS, Symbol}} # [small/large, mortar] - normal_directions::Array{RealT, NDIMSP2} # [dimension, i, j, position, mortar] + NDIMSP2, NDIMSP3, + uArray <: DenseArray{uEltype, NDIMSP3}, + uVector <: DenseVector{uEltype}} <: + AbstractContainer + u::uArray # [small/large side, variable, position, i, j, mortar] + local_neighbor_ids::Vector{Vector{Int}} # [mortar][ids] + local_neighbor_positions::Vector{Vector{Int}} # [mortar][positions] + node_indices::Matrix{NTuple{NDIMS, Symbol}} # [small/large, mortar] + normal_directions::Array{RealT, NDIMSP2} # [dimension, i, j, position, mortar] # internal `resize!`able storage - _u::Vector{uEltype} + _u::uVector _node_indices::Vector{NTuple{NDIMS, Symbol}} _normal_directions::Vector{RealT} end @@ -164,11 +200,12 @@ function init_mpi_mortars(mesh::Union{ParallelP4estMesh, ParallelT8codeMesh}, eq 2^(NDIMS - 1), n_mpi_mortars)) mpi_mortars = P4estMPIMortarContainer{NDIMS, uEltype, RealT, NDIMS + 1, NDIMS + 2, - NDIMS + 3}(u, local_neighbor_ids, - local_neighbor_positions, - node_indices, normal_directions, - _u, _node_indices, - _normal_directions) + NDIMS + 3, typeof(u), + typeof(_u)}(u, local_neighbor_ids, + local_neighbor_positions, + node_indices, normal_directions, + _u, _node_indices, + _normal_directions) if n_mpi_mortars > 0 init_mpi_mortars!(mpi_mortars, mesh, basis, elements) @@ -184,6 +221,33 @@ function init_mpi_mortars!(mpi_mortars, mesh::ParallelP4estMesh, basis, elements return mpi_mortars end +function Adapt.adapt_structure(to, mpi_mortars::P4estMPIMortarContainer) + # TODO: Vector of Vector type data structure does not work on GPUs, + # must be redesigned. This skeleton implementation here just exists just + # for compatibility with the rest of the KA.jl solver code + + _u = adapt(to, mpi_mortars._u) + _node_indices = mpi_mortars._node_indices + _normal_directions = mpi_mortars._normal_directions + + u = unsafe_wrap_or_alloc(to, _u, size(mpi_mortars.u)) + local_neighbor_ids = mpi_mortars.local_neighbor_ids + local_neighbor_positions = mpi_mortars.local_neighbor_positions + node_indices = mpi_mortars.node_indices + normal_directions = mpi_mortars.normal_directions + + NDIMS = ndims(mpi_mortars) + return P4estMPIMortarContainer{NDIMS, eltype(_u), + eltype(_normal_directions), + NDIMS + 1, NDIMS + 2, NDIMS + 3, + typeof(u), typeof(_u)}(u, local_neighbor_ids, + local_neighbor_positions, + node_indices, + normal_directions, _u, + _node_indices, + _normal_directions) +end + # Overload init! function for regular interfaces, regular mortars and boundaries since they must # call the appropriate init_surfaces! function for parallel p4est meshes function init_interfaces!(interfaces, mesh::ParallelP4estMesh) diff --git a/src/solvers/dgsem_p4est/dg_parallel.jl b/src/solvers/dgsem_p4est/dg_parallel.jl index 2cc201dd1f0..7acddf07b4b 100644 --- a/src/solvers/dgsem_p4est/dg_parallel.jl +++ b/src/solvers/dgsem_p4est/dg_parallel.jl @@ -5,12 +5,13 @@ @muladd begin #! format: noindent -mutable struct P4estMPICache{uEltype} +mutable struct P4estMPICache{BufferType <: DenseVector, + VecInt <: DenseVector{<:Integer}} mpi_neighbor_ranks::Vector{Int} - mpi_neighbor_interfaces::Vector{Vector{Int}} - mpi_neighbor_mortars::Vector{Vector{Int}} - mpi_send_buffers::Vector{Vector{uEltype}} - mpi_recv_buffers::Vector{Vector{uEltype}} + mpi_neighbor_interfaces::VecOfArrays{VecInt} + mpi_neighbor_mortars::VecOfArrays{VecInt} + mpi_send_buffers::VecOfArrays{BufferType} + mpi_recv_buffers::VecOfArrays{BufferType} mpi_send_requests::Vector{MPI.Request} mpi_recv_requests::Vector{MPI.Request} n_elements_by_rank::OffsetArray{Int, 1, Array{Int, 1}} @@ -25,25 +26,29 @@ function P4estMPICache(uEltype) end mpi_neighbor_ranks = Vector{Int}(undef, 0) - mpi_neighbor_interfaces = Vector{Vector{Int}}(undef, 0) - mpi_neighbor_mortars = Vector{Vector{Int}}(undef, 0) - mpi_send_buffers = Vector{Vector{uEltype}}(undef, 0) - mpi_recv_buffers = Vector{Vector{uEltype}}(undef, 0) + mpi_neighbor_interfaces = Vector{Vector{Int}}(undef, 0) |> VecOfArrays + mpi_neighbor_mortars = Vector{Vector{Int}}(undef, 0) |> VecOfArrays + mpi_send_buffers = Vector{Vector{uEltype}}(undef, 0) |> VecOfArrays + mpi_recv_buffers = Vector{Vector{uEltype}}(undef, 0) |> VecOfArrays mpi_send_requests = Vector{MPI.Request}(undef, 0) mpi_recv_requests = Vector{MPI.Request}(undef, 0) n_elements_by_rank = OffsetArray(Vector{Int}(undef, 0), 0:-1) n_elements_global = 0 first_element_global_id = 0 - P4estMPICache{uEltype}(mpi_neighbor_ranks, mpi_neighbor_interfaces, - mpi_neighbor_mortars, - mpi_send_buffers, mpi_recv_buffers, - mpi_send_requests, mpi_recv_requests, - n_elements_by_rank, n_elements_global, - first_element_global_id) + P4estMPICache{Vector{uEltype}, Vector{Int}}(mpi_neighbor_ranks, + mpi_neighbor_interfaces, + mpi_neighbor_mortars, + mpi_send_buffers, mpi_recv_buffers, + mpi_send_requests, mpi_recv_requests, + n_elements_by_rank, n_elements_global, + first_element_global_id) end -@inline Base.eltype(::P4estMPICache{uEltype}) where {uEltype} = uEltype +@inline Base.eltype(::P4estMPICache{BufferType}) where {BufferType} = eltype(BufferType) + +# @eval due to @muladd +@eval Adapt.@adapt_structure(P4estMPICache) ## # Note that the code in `start_mpi_send`/`finish_mpi_receive!` is sensitive to inference on (at least) Julia 1.10. @@ -265,16 +270,16 @@ end function init_mpi_cache!(mpi_cache::P4estMPICache, mesh::ParallelP4estMesh, mpi_interfaces, mpi_mortars, nvars, n_nodes, uEltype) - mpi_neighbor_ranks, mpi_neighbor_interfaces, mpi_neighbor_mortars = init_mpi_neighbor_connectivity(mpi_interfaces, - mpi_mortars, - mesh) + mpi_neighbor_ranks, _mpi_neighbor_interfaces, _mpi_neighbor_mortars = init_mpi_neighbor_connectivity(mpi_interfaces, + mpi_mortars, + mesh) - mpi_send_buffers, mpi_recv_buffers, mpi_send_requests, mpi_recv_requests = init_mpi_data_structures(mpi_neighbor_interfaces, - mpi_neighbor_mortars, - ndims(mesh), - nvars, - n_nodes, - uEltype) + _mpi_send_buffers, _mpi_recv_buffers, mpi_send_requests, mpi_recv_requests = init_mpi_data_structures(_mpi_neighbor_interfaces, + _mpi_neighbor_mortars, + ndims(mesh), + nvars, + n_nodes, + uEltype) # Determine local and total number of elements n_elements_global = Int(mesh.p4est.global_num_quadrants[]) @@ -286,6 +291,11 @@ function init_mpi_cache!(mpi_cache::P4estMPICache, mesh::ParallelP4estMesh, first_element_global_id = Int(mesh.p4est.global_first_quadrant[mpi_rank() + 1]) + 1 @assert n_elements_global==sum(n_elements_by_rank) "error in total number of elements" + mpi_neighbor_interfaces = VecOfArrays(_mpi_neighbor_interfaces) + mpi_neighbor_mortars = VecOfArrays(_mpi_neighbor_mortars) + mpi_send_buffers = VecOfArrays(_mpi_send_buffers) + mpi_recv_buffers = VecOfArrays(_mpi_recv_buffers) + # TODO reuse existing structures @pack! mpi_cache = mpi_neighbor_ranks, mpi_neighbor_interfaces, mpi_neighbor_mortars, diff --git a/src/solvers/dgsem_unstructured/sort_boundary_conditions.jl b/src/solvers/dgsem_unstructured/sort_boundary_conditions.jl index 0cb3bd7f409..d6cf6e1ce6d 100644 --- a/src/solvers/dgsem_unstructured/sort_boundary_conditions.jl +++ b/src/solvers/dgsem_unstructured/sort_boundary_conditions.jl @@ -13,9 +13,10 @@ It stores a set of global indices for each boundary condition type and name to e during the call to `calc_boundary_flux!`. The original dictionary form of the boundary conditions set by the user in the elixir file is also stored for printing. """ -mutable struct UnstructuredSortedBoundaryTypes{N, BCs <: NTuple{N, Any}} +mutable struct UnstructuredSortedBoundaryTypes{N, BCs <: NTuple{N, Any}, + Vec <: AbstractVector{<:Integer}} boundary_condition_types::BCs # specific boundary condition type(s), e.g. BoundaryConditionDirichlet - boundary_indices::NTuple{N, Vector{Int}} # integer vectors containing global boundary indices + boundary_indices::NTuple{N, Vec} # integer vectors containing global boundary indices boundary_dictionary::Dict{Symbol, Any} # boundary conditions as set by the user in the elixir file boundary_symbol_indices::Dict{Symbol, Vector{Int}} # integer vectors containing global boundary indices per boundary identifier end @@ -33,10 +34,11 @@ function UnstructuredSortedBoundaryTypes(boundary_conditions::Dict, cache) boundary_symbol_indices = Dict{Symbol, Vector{Int}}() container = UnstructuredSortedBoundaryTypes{n_boundary_types, - typeof(boundary_condition_types)}(boundary_condition_types, - boundary_indices, - boundary_conditions, - boundary_symbol_indices) + typeof(boundary_condition_types), + Vector{Int}}(boundary_condition_types, + boundary_indices, + boundary_conditions, + boundary_symbol_indices) initialize!(container, cache) end @@ -119,4 +121,7 @@ function initialize!(boundary_types_container::UnstructuredSortedBoundaryTypes{N return boundary_types_container end + +# @eval due to @muladd +@eval Adapt.@adapt_structure(UnstructuredSortedBoundaryTypes) end # @muladd diff --git a/test/Project.toml b/test/Project.toml index ec1a13a4bd1..c399dd967bf 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -1,5 +1,6 @@ [deps] ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b" +Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595" CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0" Convex = "f65535da-76fb-5f13-bab9-19810c17039a" diff --git a/test/test_p4est_2d.jl b/test/test_p4est_2d.jl index 4b1c7f5caca..b1472cb99cf 100644 --- a/test/test_p4est_2d.jl +++ b/test/test_p4est_2d.jl @@ -27,6 +27,12 @@ isdir(outdir) && rm(outdir, recursive = true) du_ode = similar(u_ode) @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000 end + semi32 = Trixi.trixi_adapt(Array, Float32, semi) + @test real(semi32.solver) == Float32 + @test real(semi32.solver.basis) == Float32 + @test real(semi32.solver.mortar) == Float32 + # TODO: remake ignores the mesh itself as well + @test real(semi32.mesh) == Float64 end @trixi_testset "elixir_advection_nonconforming_flag.jl" begin diff --git a/test/test_unstructured_2d.jl b/test/test_unstructured_2d.jl index 259eb39c545..c3291c3ba9d 100644 --- a/test/test_unstructured_2d.jl +++ b/test/test_unstructured_2d.jl @@ -2,6 +2,7 @@ module TestExamplesUnstructuredMesh2D using Test using Trixi +using Adapt include("test_trixi.jl") @@ -32,6 +33,12 @@ isdir(outdir) && rm(outdir, recursive = true) du_ode = similar(u_ode) @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000 end + semi32 = Trixi.trixi_adapt(Array, Float32, semi) + @test real(semi32.solver) == Float32 + @test real(semi32.solver.basis) == Float32 + @test real(semi32.solver.mortar) == Float32 + # TODO: remake ignores the mesh as well + @test real(semi32.mesh) == Float64 end @trixi_testset "elixir_euler_free_stream.jl" begin From fc610f9c7a0bcee83150ad984777c23d16665122 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Mon, 21 Apr 2025 18:37:41 +0200 Subject: [PATCH 002/158] add docs and CUDAExt --- Project.toml | 7 +++- docs/make.jl | 3 +- docs/src/heterogeneous.md | 82 +++++++++++++++++++++++++++++++++++++++ ext/TrixiCUDAExt.jl | 11 ++++++ 4 files changed, 100 insertions(+), 3 deletions(-) create mode 100644 docs/src/heterogeneous.md create mode 100644 ext/TrixiCUDAExt.jl diff --git a/Project.toml b/Project.toml index 204c4088f2f..5afb3d64225 100644 --- a/Project.toml +++ b/Project.toml @@ -4,8 +4,8 @@ authors = ["Michael Schlottke-Lakemper ", " version = "0.11.16-DEV" [deps] -Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" Accessors = "7d9f7c33-5ae7-4f3b-8dc6-eff91059b697" +Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" CodeTracking = "da1fd8a2-8d9e-5ec2-8556-3022fb5608a2" ConstructionBase = "187b0558-2788-49d3-abe0-74a17ed4e7c9" DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8" @@ -57,15 +57,18 @@ Convex = "f65535da-76fb-5f13-bab9-19810c17039a" ECOS = "e2685f51-7e38-5353-a97d-a921fd2c8199" Makie = "ee78f7c6-11fb-53f2-987a-cfe4a2b5a57a" NLsolve = "2774e3e8-f4cf-5e23-947b-6d7e65073b56" +CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" [extensions] TrixiConvexECOSExt = ["Convex", "ECOS"] TrixiMakieExt = "Makie" TrixiNLsolveExt = "NLsolve" +TrixiCUDAExt = "CUDA" [compat] -Adapt = "4" Accessors = "0.1.36" +Adapt = "4" +CUDA = "5" CodeTracking = "1.0.5" ConstructionBase = "1.5" Convex = "0.16" diff --git a/docs/make.jl b/docs/make.jl index 60c11c5d2d1..a115294cc90 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -163,7 +163,8 @@ makedocs( "Style guide" => "styleguide.md", "Testing" => "testing.md", "Performance" => "performance.md", - "Parallelization" => "parallelization.md" + "Parallelization" => "parallelization.md", + "Heterogeneous" => "heterogeneous.md" ], "Troubleshooting and FAQ" => "troubleshooting.md", "Reference" => [ diff --git a/docs/src/heterogeneous.md b/docs/src/heterogeneous.md new file mode 100644 index 00000000000..60bda029a40 --- /dev/null +++ b/docs/src/heterogeneous.md @@ -0,0 +1,82 @@ +# Heterogeneous computing + +Support for heterogeneous computing is currently being worked on. + +## The use of Adapt.jl + +[`Adapt.jl`](https://github.com/JuliaGPU/Adapt.jl) is a package in the JuliaGPU family that allows for +the translation of nested data structures. The primary goal is to allow the substitution of `Array` +at the storage leaves with a GPU array like `CuArray`. + +To facilitate this data structures must be parameterized, so instead of: + +```julia +struct Container + data::Array{Float64,2} +end +``` + +They must be written as: + +```julia +struct Container{D<:AbstractArray} <: Trixi.AbstractContainer + data::D +end +``` + +furthermore, we need to define a function that allows for the conversion of storage +of our types: + +```julia +function Adapt.adapt_structure(to, C::Container) + return Container(adapt(to, C.data)) +end +``` + +or simply + +```julia +Adapt.@adapt_structure(Container) +``` + +additionally, we must define `Adapt.parent_type`. + +```julia +function Adapt.parent_type(::Type{<:Container{D}}) where D + return D +end +``` + +```julia-repl +julia> C = Container(zeros(3)) +Container{Vector{Float64}}([0.0, 0.0, 0.0]) + +julia> Trixi.storage_type(C) +Array + +julia> using CUDA + +julia> GPU_C = adapt(CuArray, C) +Container{CuArray{Float64, 1, CUDA.DeviceMemory}}([0.0, 0.0, 0.0]) + +julia> Trixi.storage_type(C) +CuArray +``` + +## Element-type conversion with `Trixi.trixi_adapt`. + +We can use Trixi.trixi_adapt to perform both an element-type and a storage-type adoption + +```julia-repl +julia> C = Container(zeros(3)) +Container{Vector{Float64}}([0.0, 0.0, 0.0]) + +julia> Trixi.trixi_adapt(Array, Float32, C) +Container{Vector{Float32}}(Float32[0.0, 0.0, 0.0]) + +julia> Trixi.trixi_adapt(CuArray, Float32, C) +Container{CuArray{Float32, 1, CUDA.DeviceMemory}}(Float32[0.0, 0.0, 0.0]) +``` + +!!! note + `adapt(Array{Float32}, C)` is tempting but will do the wrong thing in the presence of `StaticArrays`. \ No newline at end of file diff --git a/ext/TrixiCUDAExt.jl b/ext/TrixiCUDAExt.jl new file mode 100644 index 00000000000..681d2f53a1e --- /dev/null +++ b/ext/TrixiCUDAExt.jl @@ -0,0 +1,11 @@ +# Package extension for adding CUDA-based features to Trixi.jl +module TrixiCUDAExt + +import CUDA: CuArray +import Trixi + +function Trixi.storage_type(::Type{<:CuArray}) + return CuArray +end + +end From 7b5d81b1c09653bb50c4c214f2acbde9dfe9140a Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Mon, 21 Apr 2025 21:35:04 +0200 Subject: [PATCH 003/158] Aqua set unbound_args --- test/test_aqua.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/test/test_aqua.jl b/test/test_aqua.jl index 9b3f2d67903..154088995ca 100644 --- a/test/test_aqua.jl +++ b/test/test_aqua.jl @@ -10,6 +10,7 @@ include("test_trixi.jl") @timed_testset "Aqua.jl" begin Aqua.test_all(Trixi, ambiguities = false, + unbound_args = false, # FIXME: UnstructuredSortedBoundaryTypes # exceptions necessary for adding a new method `StartUpDG.estimate_h` # in src/solvers/dgmulti/sbp.jl piracies = (treat_as_own = [Trixi.StartUpDG.RefElemData, From f730ef410e5b9450ae5f18821731799f3b1725d5 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Tue, 22 Apr 2025 09:26:23 +0200 Subject: [PATCH 004/158] lower bound CUDA to 5.2 --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 5afb3d64225..3ce2daf16f9 100644 --- a/Project.toml +++ b/Project.toml @@ -68,7 +68,7 @@ TrixiCUDAExt = "CUDA" [compat] Accessors = "0.1.36" Adapt = "4" -CUDA = "5" +CUDA = "5.2" CodeTracking = "1.0.5" ConstructionBase = "1.5" Convex = "0.16" From 13b7f590b2604f53b92a681a51fe21582fc5c8eb Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Mon, 21 Apr 2025 17:16:18 +0200 Subject: [PATCH 005/158] add initial CUDA pipeline --- .buildkite/pipeline.yml | 9 ++++++--- test/Project.toml | 1 + test/runtests.jl | 9 +++++++++ test/test_cuda.jl | 20 ++++++++++++++++++++ 4 files changed, 36 insertions(+), 3 deletions(-) create mode 100644 test/test_cuda.jl diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index 0f8ad475db8..344b8eacc3a 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -1,3 +1,5 @@ +env: + steps: - label: "CUDA Julia {{matrix.version}}" matrix: @@ -7,12 +9,13 @@ steps: plugins: - JuliaCI/julia#v1: version: "{{matrix.version}}" - command: | - true + - JuliaCI/julia-test#v1: ~ + env: + TRIXI_TEST: "CUDA" agents: queue: "juliagpu" cuda: "*" if: build.message !~ /\[skip ci\]/ timeout_in_minutes: 60 soft_fail: - - exit_status: 3 \ No newline at end of file + - exit_status: 3 diff --git a/test/Project.toml b/test/Project.toml index c399dd967bf..206654281d9 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -4,6 +4,7 @@ Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595" CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0" Convex = "f65535da-76fb-5f13-bab9-19810c17039a" +CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" DelimitedFiles = "8bb1440f-4735-579b-a4ab-409b98df4dab" DoubleFloats = "497a8b3b-efae-58df-a0af-a86822472b78" Downloads = "f43a241f-c20a-4ad4-852c-f6b1247861c6" diff --git a/test/runtests.jl b/test/runtests.jl index a9dfc4cb999..d08ff018837 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -116,4 +116,13 @@ const TRIXI_NTHREADS = clamp(Sys.CPU_THREADS, 2, 3) @time if TRIXI_TEST == "all" || TRIXI_TEST == "paper_self_gravitating_gas_dynamics" include("test_paper_self_gravitating_gas_dynamics.jl") end + + @time if TRIXI_TEST == "all" || TRIXI_TEST == "CUDA" + import CUDA + if CUDA.functional() + include("test_cuda.jl") + else + @warn "Unable to run CUDA tests on this machine" + end + end end diff --git a/test/test_cuda.jl b/test/test_cuda.jl new file mode 100644 index 00000000000..f2fd11233c6 --- /dev/null +++ b/test/test_cuda.jl @@ -0,0 +1,20 @@ +module TestCUDA + +using CUDA +using Test +using Trixi + +include("test_trixi.jl") + +# EXAMPLES_DIR = joinpath(examples_dir(), "dgmulti_1d") + +# Start with a clean environment: remove Trixi.jl output directory if it exists +outdir = "out" +isdir(outdir) && rm(outdir, recursive = true) + +# TODO: + +# Clean up afterwards: delete Trixi.jl output directory +@test_nowarn isdir(outdir) && rm(outdir, recursive = true) + +end # module From 02de7d256adcdb4d2bd72cc7a98140f24648dacd Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Tue, 22 Apr 2025 10:08:37 +0200 Subject: [PATCH 006/158] add storage_type, real_type to semidiscretize --- .../p4est_2d_dgsem/elixir_advection_basic.jl | 2 +- src/semidiscretization/semidiscretization.jl | 21 ++++++++++++++++++- test/test_p4est_2d.jl | 21 +++++++++++++++++++ 3 files changed, 42 insertions(+), 2 deletions(-) diff --git a/examples/p4est_2d_dgsem/elixir_advection_basic.jl b/examples/p4est_2d_dgsem/elixir_advection_basic.jl index a87f1582121..33a049a3a1e 100644 --- a/examples/p4est_2d_dgsem/elixir_advection_basic.jl +++ b/examples/p4est_2d_dgsem/elixir_advection_basic.jl @@ -31,7 +31,7 @@ semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition_convergen # ODE solvers, callbacks etc. # Create ODE problem with time span from 0.0 to 1.0 -ode = semidiscretize(semi, (0.0, 1.0)) +ode = semidiscretize(semi, (0.0, 1.0); real_type = nothing, storage_type = nothing) # At the beginning of the main loop, the SummaryCallback prints a summary of the simulation setup # and resets the timers diff --git a/src/semidiscretization/semidiscretization.jl b/src/semidiscretization/semidiscretization.jl index f41c7ea4a7f..91599f4d63b 100644 --- a/src/semidiscretization/semidiscretization.jl +++ b/src/semidiscretization/semidiscretization.jl @@ -82,9 +82,15 @@ end Wrap the semidiscretization `semi` as an ODE problem in the time interval `tspan` that can be passed to `solve` from the [SciML ecosystem](https://diffeq.sciml.ai/latest/). + +The optional keyword arguments `storage_type` and `real_type` configure the underlying computational +datastructures. `storage_type` changes the fundamental array type being used, allowing the +experimental use of `CuArray` or other GPU array types. `real_type` changes the computational data type being used. """ function semidiscretize(semi::AbstractSemidiscretization, tspan; - reset_threads = true) + reset_threads = true, + storage_type = nothing, + real_type = nothing) # Optionally reset Polyester.jl threads. See # https://github.com/trixi-framework/Trixi.jl/issues/1583 # https://github.com/JuliaSIMD/Polyester.jl/issues/30 @@ -92,6 +98,19 @@ function semidiscretize(semi::AbstractSemidiscretization, tspan; Polyester.reset_threads!() end + if !(storage_type === nothing && real_type === nothing) + if storage_type === nothing + storage_type = Array + end + if real_type === nothing + real_type = Float64 + end + semi = trixi_adapt(storage_type, real_type, semi) + if eltype(tspan) !== real_type + tspan = convert.(real_type, tspan) + end + end + u0_ode = compute_coefficients(first(tspan), semi) # TODO: MPI, do we want to synchronize loading and print debug statements, e.g. using # mpi_isparallel() && MPI.Barrier(mpi_comm()) diff --git a/test/test_p4est_2d.jl b/test/test_p4est_2d.jl index b1472cb99cf..f436faffaa1 100644 --- a/test/test_p4est_2d.jl +++ b/test/test_p4est_2d.jl @@ -35,6 +35,27 @@ isdir(outdir) && rm(outdir, recursive = true) @test real(semi32.mesh) == Float64 end +@trixi_testset "elixir_advection_basic.jl (Float32)" begin + @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_basic.jl"), + # Expected errors are exactly the same as with TreeMesh! + l2=[8.311947673061856e-6], + linf=[6.627000273229378e-5], + real_type=Float32) + # Ensure that we do not have excessive memory allocations + # (e.g., from type instabilities) + let + t = sol.t[end] + u_ode = sol.u[end] + du_ode = similar(u_ode) + @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000 + end + @test real(ode.p.solver) == Float32 + @test real(ode.p.solver.basis) == Float32 + @test real(ode.p.solver.mortar) == Float32 + # TODO: remake ignores the mesh itself as well + @test real(ode.p.mesh) == Float64 +end + @trixi_testset "elixir_advection_nonconforming_flag.jl" begin @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_nonconforming_flag.jl"), From 671f5b16b065ba8bf2e832f2469d351083c17929 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Tue, 22 Apr 2025 10:25:33 +0200 Subject: [PATCH 007/158] add GPU construction test --- .../elixir_advection_basic_gpu.jl | 60 +++++++++++++++++++ test/test_cuda.jl | 24 +++++++- 2 files changed, 83 insertions(+), 1 deletion(-) create mode 100644 examples/p4est_2d_dgsem/elixir_advection_basic_gpu.jl diff --git a/examples/p4est_2d_dgsem/elixir_advection_basic_gpu.jl b/examples/p4est_2d_dgsem/elixir_advection_basic_gpu.jl new file mode 100644 index 00000000000..4e26ec3df1a --- /dev/null +++ b/examples/p4est_2d_dgsem/elixir_advection_basic_gpu.jl @@ -0,0 +1,60 @@ +# The same setup as tree_2d_dgsem/elixir_advection_basic.jl +# to verify the StructuredMesh implementation against TreeMesh + +using OrdinaryDiffEqSSPRK, OrdinaryDiffEqLowStorageRK +using Trixi + +############################################################################### +# semidiscretization of the linear advection equation + +advection_velocity = (0.2, -0.7) +equations = LinearScalarAdvectionEquation2D(advection_velocity) + +# Create DG solver with polynomial degree = 3 and (local) Lax-Friedrichs/Rusanov flux as surface flux +solver = DGSEM(polydeg = 3, surface_flux = flux_lax_friedrichs) + +coordinates_min = (-1.0, -1.0) # minimum coordinates (min(x), min(y)) +coordinates_max = (1.0, 1.0) # maximum coordinates (max(x), max(y)) + +trees_per_dimension = (8, 8) + +# Create P4estMesh with 8 x 8 trees and 16 x 16 elements +mesh = P4estMesh(trees_per_dimension, polydeg = 3, + coordinates_min = coordinates_min, coordinates_max = coordinates_max, + initial_refinement_level = 1) + +# A semidiscretization collects data structures and functions for the spatial discretization +semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition_convergence_test, + solver) + +############################################################################### +# ODE solvers, callbacks etc. + +# Create ODE problem with time span from 0.0 to 1.0 +ode = semidiscretize(semi, (0.0, 1.0); real_type = nothing, storage_type = nothing) + +# At the beginning of the main loop, the SummaryCallback prints a summary of the simulation setup +# and resets the timers +summary_callback = SummaryCallback() + +# The AnalysisCallback allows to analyse the solution in regular intervals and prints the results +analysis_callback = AnalysisCallback(semi, interval = 100) + +# The SaveSolutionCallback allows to save the solution to a file in regular intervals +save_solution = SaveSolutionCallback(interval = 100, + solution_variables = cons2prim) + +# The StepsizeCallback handles the re-calculation of the maximum Δt after each time step +stepsize_callback = StepsizeCallback(cfl = 1.6) + +# Create a CallbackSet to collect all callbacks such that they can be passed to the ODE solver +callbacks = CallbackSet(summary_callback, analysis_callback, save_solution, + stepsize_callback) + +############################################################################### +# run the simulation + +# # OrdinaryDiffEq's `solve` method evolves the solution in time and executes the passed callbacks +# sol = solve(ode, CarpenterKennedy2N54(williamson_condition = false); +# dt = 1.0, # solve needs some value here but it will be overwritten by the stepsize_callback +# ode_default_options()..., callback = callbacks); diff --git a/test/test_cuda.jl b/test/test_cuda.jl index f2fd11233c6..68872266986 100644 --- a/test/test_cuda.jl +++ b/test/test_cuda.jl @@ -12,7 +12,29 @@ include("test_trixi.jl") outdir = "out" isdir(outdir) && rm(outdir, recursive = true) -# TODO: +EXAMPLES_DIR = joinpath(examples_dir(), "p4est_2d_dgsem") + +@trixi_testset "elixir_advection_basic.jl (Float32)" begin + @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_basic.jl"), + # Expected errors are exactly the same as with TreeMesh! + l2=[8.311947673061856e-6], + linf=[6.627000273229378e-5], + real_type=Float32, + storage_type=CuArray) + # # Ensure that we do not have excessive memory allocations + # # (e.g., from type instabilities) + # let + # t = sol.t[end] + # u_ode = sol.u[end] + # du_ode = similar(u_ode) + # @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000 + # end + @test real(ode.p.solver) == Float32 + @test real(ode.p.solver.basis) == Float32 + @test real(ode.p.solver.mortar) == Float32 + # TODO: remake ignores the mesh itself as well + @test real(ode.p.mesh) == Float64 +end # Clean up afterwards: delete Trixi.jl output directory @test_nowarn isdir(outdir) && rm(outdir, recursive = true) From ecd09a59063135fb2bf981e86b3c5d21ed1fae26 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Tue, 22 Apr 2025 12:08:26 +0200 Subject: [PATCH 008/158] don't adapt Array{MArray} --- src/auxiliary/containers.jl | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/auxiliary/containers.jl b/src/auxiliary/containers.jl index 5738467ec6b..edc42db382b 100644 --- a/src/auxiliary/containers.jl +++ b/src/auxiliary/containers.jl @@ -388,6 +388,13 @@ function Adapt.adapt_storage(::TrixiAdaptor{Storage, Real}, adapt(Storage{StaticArrays.similar_type(T, Real)}, x) end +# Our threaded cache contains MArray, it is unlikely that we would want to adapt those +function Adapt.adapt_storage(::TrixiAdaptor{Storage, Real}, + x::Array{T}) where {Storage, Real, + T <: StaticArrays.MArray} + adapt(Array{StaticArrays.similar_type(T, Real)}, x) +end + function Adapt.adapt_storage(::TrixiAdaptor{Storage, Real}, x::AbstractArray) where {Storage, Real} adapt(Storage, x) From 312009af58e70430a7f00cd751ed3acaaea8def5 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Tue, 22 Apr 2025 13:36:22 +0200 Subject: [PATCH 009/158] add some more cuda adapt tests --- test/test_cuda.jl | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/test/test_cuda.jl b/test/test_cuda.jl index 68872266986..7a218f236d3 100644 --- a/test/test_cuda.jl +++ b/test/test_cuda.jl @@ -19,7 +19,7 @@ EXAMPLES_DIR = joinpath(examples_dir(), "p4est_2d_dgsem") # Expected errors are exactly the same as with TreeMesh! l2=[8.311947673061856e-6], linf=[6.627000273229378e-5], - real_type=Float32, + real_type=Float64, storage_type=CuArray) # # Ensure that we do not have excessive memory allocations # # (e.g., from type instabilities) @@ -34,6 +34,17 @@ EXAMPLES_DIR = joinpath(examples_dir(), "p4est_2d_dgsem") @test real(ode.p.solver.mortar) == Float32 # TODO: remake ignores the mesh itself as well @test real(ode.p.mesh) == Float64 + + @test_broken ode.u0 isa CuArray + @test ode.p.basis.boundary_interpolations isa CuArray + @test ode.p.basis.derivative_matrix isa CuArray + + @test ode.p.basis.forward_upper isa CuArray + + @test Trixi.storage_type(ode.p.cache.elements) === CuArray + @test Trixi.storage_type(ode.p.cache.interfaces) === CuArray + @test Trixi.storage_type(ode.p.cache.boundaries) === CuArray + @test Trixi.storage_type(ode.p.cache.mortrar) === CuArray end # Clean up afterwards: delete Trixi.jl output directory From 690efd1de65cbb4a34448fef15c78786c2fc4c69 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Mon, 28 Apr 2025 16:18:18 +0200 Subject: [PATCH 010/158] use sources for dev branch --- .buildkite/pipeline.yml | 2 +- test/Project.toml | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index 344b8eacc3a..fdb4a855961 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -5,7 +5,7 @@ steps: matrix: setup: version: - - "1.10" + - "1.11" plugins: - JuliaCI/julia#v1: version: "{{matrix.version}}" diff --git a/test/Project.toml b/test/Project.toml index 206654281d9..77e50547a4f 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -59,3 +59,6 @@ Random = "1" StableRNGs = "1.0.2" Test = "1" TrixiTest = "0.1" + +[sources] +CUDA = {url = "https://github.com/JuliaGPU/CUDA.jl", rev = "vc/unsafe_wrap_symbols"} From 15a898b773573a4742baa186468962a4b6d39c7c Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Thu, 8 May 2025 11:50:42 +0200 Subject: [PATCH 011/158] fixup! use sources for dev branch --- test/Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/Project.toml b/test/Project.toml index 77e50547a4f..71ad1ca24e2 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -61,4 +61,4 @@ Test = "1" TrixiTest = "0.1" [sources] -CUDA = {url = "https://github.com/JuliaGPU/CUDA.jl", rev = "vc/unsafe_wrap_symbols"} +CUDA = {url = "https://github.com/JuliaGPU/CUDA.jl", rev = "master"} From 45d344bdeb6661a04c1b8f5cd4a3e41ac844157f Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Wed, 14 May 2025 10:38:54 +0200 Subject: [PATCH 012/158] use released version of CUDA --- .github/workflows/GPUCompat.yml | 86 --------------------------------- Project.toml | 2 +- test/Project.toml | 3 -- 3 files changed, 1 insertion(+), 90 deletions(-) delete mode 100644 .github/workflows/GPUCompat.yml diff --git a/.github/workflows/GPUCompat.yml b/.github/workflows/GPUCompat.yml deleted file mode 100644 index 335e1c83c4c..00000000000 --- a/.github/workflows/GPUCompat.yml +++ /dev/null @@ -1,86 +0,0 @@ -name: GPU Package Compatibility - -on: - pull_request: - paths-ignore: - - 'AUTHORS.md' - - 'CITATION.bib' - - 'CONTRIBUTING.md' - - 'LICENSE.md' - - 'NEWS.md' - - 'README.md' - - '.zenodo.json' - - '.github/workflows/benchmark.yml' - - '.github/workflows/CompatHelper.yml' - - '.github/workflows/TagBot.yml' - - 'benchmark/**' - - 'docs/**' - - 'utils/**' - workflow_dispatch: - -concurrency: - group: ${{ github.workflow }}-${{ github.ref }} - cancel-in-progress: true - -jobs: - test: - if: "!contains(github.event.head_commit.message, 'skip ci')" - name: ${{ matrix.os }} - ${{ matrix.arch }} - runs-on: ${{ matrix.os }} - strategy: - fail-fast: false - matrix: - include: - - version: '1.10' - os: ubuntu-latest - arch: x64 - - version: '1.10' - os: windows-latest - arch: x64 - # CUDA.jl only supports 64-bit Linux and Windows, see https://github.com/JuliaGPU/CUDA.jl?tab=readme-ov-file#requirements - steps: - - name: Checkout repository - uses: actions/checkout@v4 - - - name: Set up Julia - uses: julia-actions/setup-julia@v2 - with: - version: ${{ matrix.version }} - arch: ${{ matrix.arch }} - - - name: Display version info - run: julia -e 'using InteractiveUtils; versioninfo(verbose=true)' - - - name: Cache Julia packages - uses: julia-actions/cache@v2 - - - name: Build project - uses: julia-actions/julia-buildpkg@v1 - - # Only CUDA.jl is needed for GPU compatibility test now - - name: Add CUDA.jl to environment - run: | - julia --project=. -e ' - using Pkg; - Pkg.activate(temp=true); - Pkg.develop(PackageSpec(path=pwd())); - Pkg.add("CUDA"); - Pkg.update()' - - # - name: Add Metal.jl to environment - # run: | - # julia --project=. -e ' - # using Pkg; - # Pkg.activate(temp=true); - # Pkg.develop(PackageSpec(path=pwd())); - # Pkg.add("Metal"); - # Pkg.update()' - - # - name: Add AMDGPU.jl to environment - # run: | - # julia --project=. -e ' - # using Pkg; - # Pkg.activate(temp=true); - # Pkg.develop(PackageSpec(path=pwd())); - # Pkg.add("AMDGPU"); - # Pkg.update()' diff --git a/Project.toml b/Project.toml index 3ce2daf16f9..f16e133231d 100644 --- a/Project.toml +++ b/Project.toml @@ -68,7 +68,7 @@ TrixiCUDAExt = "CUDA" [compat] Accessors = "0.1.36" Adapt = "4" -CUDA = "5.2" +CUDA = "5.8" CodeTracking = "1.0.5" ConstructionBase = "1.5" Convex = "0.16" diff --git a/test/Project.toml b/test/Project.toml index 71ad1ca24e2..206654281d9 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -59,6 +59,3 @@ Random = "1" StableRNGs = "1.0.2" Test = "1" TrixiTest = "0.1" - -[sources] -CUDA = {url = "https://github.com/JuliaGPU/CUDA.jl", rev = "master"} From 7e72effd09762722cb6a1dee9cfc9e7fa8114c77 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Wed, 14 May 2025 10:43:30 +0200 Subject: [PATCH 013/158] Update .buildkite/pipeline.yml --- .buildkite/pipeline.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index fdb4a855961..344b8eacc3a 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -5,7 +5,7 @@ steps: matrix: setup: version: - - "1.11" + - "1.10" plugins: - JuliaCI/julia#v1: version: "{{matrix.version}}" From 3450dddcdc19347412161d747e817cfef3124e78 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Tue, 17 Dec 2024 17:36:16 +0100 Subject: [PATCH 014/158] Use Adapt.jl to change storage and element type In order to eventually support GPU computation we need to use Adapt.jl to allow GPU backend packages to swap out host-array types like `CuArray` with device-side types like `CuDeviceArray`. Additionally this will allow us to change the element type of a simulation by using `adapt(Array{Float32}`. Co-authored-by: Lars Christmann Co-authored-by: Benedict Geihe --- Project.toml | 2 + src/Trixi.jl | 2 + src/auxiliary/containers.jl | 84 +++++ src/auxiliary/vector_of_arrays.jl | 31 ++ .../semidiscretization_hyperbolic.jl | 27 +- src/solvers/dg.jl | 3 + src/solvers/dgsem/basis_lobatto_legendre.jl | 37 +++ src/solvers/dgsem_p4est/containers.jl | 314 ++++++++++++++---- .../dgsem_p4est/containers_parallel.jl | 114 +++++-- src/solvers/dgsem_p4est/dg_parallel.jl | 60 ++-- .../sort_boundary_conditions.jl | 17 +- test/Project.toml | 1 + test/test_p4est_2d.jl | 6 + test/test_unstructured_2d.jl | 7 + 14 files changed, 567 insertions(+), 138 deletions(-) create mode 100644 src/auxiliary/vector_of_arrays.jl diff --git a/Project.toml b/Project.toml index 5af41465607..e10c47ff1be 100644 --- a/Project.toml +++ b/Project.toml @@ -4,6 +4,7 @@ authors = ["Michael Schlottke-Lakemper ", " version = "0.12.5-DEV" [deps] +Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" Accessors = "7d9f7c33-5ae7-4f3b-8dc6-eff91059b697" CodeTracking = "da1fd8a2-8d9e-5ec2-8556-3022fb5608a2" ConstructionBase = "187b0558-2788-49d3-abe0-74a17ed4e7c9" @@ -63,6 +64,7 @@ TrixiMakieExt = "Makie" TrixiNLsolveExt = "NLsolve" [compat] +Adapt = "4" Accessors = "0.1.36" CodeTracking = "1.0.5" ConstructionBase = "1.5" diff --git a/src/Trixi.jl b/src/Trixi.jl index a707437655e..a52dfd6d973 100644 --- a/src/Trixi.jl +++ b/src/Trixi.jl @@ -50,6 +50,7 @@ import SciMLBase: get_du, get_tmp_cache, u_modified!, using DelimitedFiles: readdlm using Downloads: Downloads +using Adapt: Adapt, adapt using CodeTracking: CodeTracking using ConstructionBase: ConstructionBase using DiffEqBase: DiffEqBase, get_tstops, get_tstops_array @@ -132,6 +133,7 @@ include("basic_types.jl") # Include all top-level source files include("auxiliary/auxiliary.jl") +include("auxiliary/vector_of_arrays.jl") include("auxiliary/mpi.jl") include("auxiliary/p4est.jl") include("auxiliary/t8code.jl") diff --git a/src/auxiliary/containers.jl b/src/auxiliary/containers.jl index 90650f6abcf..5738467ec6b 100644 --- a/src/auxiliary/containers.jl +++ b/src/auxiliary/containers.jl @@ -314,4 +314,88 @@ end function raw_copy!(c::AbstractContainer, from::Int, destination::Int) raw_copy!(c, c, from, from, destination) end + +# Trixi storage types must implement these two Adapt.jl methods +function Adapt.adapt_structure(to, c::AbstractContainer) + error("Interface: Must implement Adapt.adapt_structure(to, ::$(typeof(c)))") +end + +function Adapt.parent_type(C::Type{<:AbstractContainer}) + error("Interface: Must implement Adapt.parent_type(::Type{$C}") +end + +function Adapt.unwrap_type(C::Type{<:AbstractContainer}) + return Adapt.unwrap_type(Adapt.parent_type(C)) +end + +# TODO: Upstream to Adapt +function storage_type(x) + return storage_type(typeof(x)) +end + +function storage_type(T::Type) + error("Interface: Must implement storage_type(::Type{$T}") +end + +function storage_type(::Type{<:Array}) + Array +end + +function storage_type(C::Type{<:AbstractContainer}) + return storage_type(Adapt.unwrap_type(C)) +end + +# For some storage backends like CUDA.jl, empty arrays do seem to simply be +# null pointers which can cause `unsafe_wrap` to fail when calling +# Adapt.adapt (ArgumentError, see +# https://github.com/JuliaGPU/CUDA.jl/blob/v5.4.2/src/array.jl#L212-L229). +# To circumvent this, on length zero arrays this allocates +# a separate empty array instead of wrapping. +# However, since zero length arrays are not used in calculations, +# it should be okay if the underlying storage vectors and wrapped arrays +# are not the same as long as they are properly wrapped when `resize!`d etc. +function unsafe_wrap_or_alloc(to, vector, size) + if length(vector) == 0 + return similar(vector, size) + else + return unsafe_wrap(to, pointer(vector), size) + end +end + +struct TrixiAdaptor{Storage, Real} end + +function trixi_adapt(storage, real, x) + adapt(TrixiAdaptor{storage, real}(), x) +end + +# Custom rules +# 1. handling of StaticArrays +function Adapt.adapt_storage(::TrixiAdaptor{<:Any, Real}, + x::StaticArrays.StaticArray{S, T, N}) where {Real, S, T, N} + StaticArrays.similar_type(x, Real)(x) +end + +# 2. Handling of Arrays +function Adapt.adapt_storage(::TrixiAdaptor{Storage, Real}, + x::AbstractArray{T}) where {Storage, Real, + T <: AbstractFloat} + adapt(Storage{Real}, x) +end + +function Adapt.adapt_storage(::TrixiAdaptor{Storage, Real}, + x::AbstractArray{T}) where {Storage, Real, + T <: StaticArrays.StaticArray} + adapt(Storage{StaticArrays.similar_type(T, Real)}, x) +end + +function Adapt.adapt_storage(::TrixiAdaptor{Storage, Real}, + x::AbstractArray) where {Storage, Real} + adapt(Storage, x) +end + +# 3. TODO: Should we have a fallback? But that would imply implementing things for NamedTuple again + +function unsafe_wrap_or_alloc(::TrixiAdaptor{Storage}, vec, size) where {Storage} + return unsafe_wrap_or_alloc(Storage, vec, size) +end end # @muladd diff --git a/src/auxiliary/vector_of_arrays.jl b/src/auxiliary/vector_of_arrays.jl new file mode 100644 index 00000000000..0fa8dd7f1ec --- /dev/null +++ b/src/auxiliary/vector_of_arrays.jl @@ -0,0 +1,31 @@ +# By default, Julia/LLVM does not use fused multiply-add operations (FMAs). +# Since these FMAs can increase the performance of many numerical algorithms, +# we need to opt-in explicitly. +# See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. +@muladd begin +#! format: noindent + +# Wraps a Vector of Arrays, forwards `getindex` to the underlying Vector. +# Implements `Adapt.adapt_structure` to allow offloading to the GPU which is +# not possible for a plain Vector of Arrays. +struct VecOfArrays{T <: AbstractArray} + arrays::Vector{T} +end +Base.getindex(v::VecOfArrays, i::Int) = Base.getindex(v.arrays, i) +Base.IndexStyle(v::VecOfArrays) = Base.IndexStyle(v.arrays) +Base.size(v::VecOfArrays) = Base.size(v.arrays) +Base.length(v::VecOfArrays) = Base.length(v.arrays) +Base.eltype(v::VecOfArrays{T}) where {T} = T +function Adapt.adapt_structure(to, v::VecOfArrays) + return VecOfArrays([Adapt.adapt(to, arr) for arr in v.arrays]) +end +function Adapt.parent_type(::Type{<:VecOfArrays{T}}) where {T} + return T +end +function Adapt.unwrap_type(A::Type{<:VecOfArrays}) + Adapt.unwrap_type(Adapt.parent_type(A)) +end +function Base.convert(::Type{<:VecOfArrays}, v::Vector{<:AbstractArray}) + VecOfArrays(v) +end +end # @muladd diff --git a/src/semidiscretization/semidiscretization_hyperbolic.jl b/src/semidiscretization/semidiscretization_hyperbolic.jl index 7496a345661..2a563c02229 100644 --- a/src/semidiscretization/semidiscretization_hyperbolic.jl +++ b/src/semidiscretization/semidiscretization_hyperbolic.jl @@ -27,25 +27,6 @@ mutable struct SemidiscretizationHyperbolic{Mesh, Equations, InitialCondition, solver::Solver cache::Cache performance_counter::PerformanceCounter - - function SemidiscretizationHyperbolic{Mesh, Equations, InitialCondition, - BoundaryConditions, SourceTerms, Solver, - Cache}(mesh::Mesh, equations::Equations, - initial_condition::InitialCondition, - boundary_conditions::BoundaryConditions, - source_terms::SourceTerms, - solver::Solver, - cache::Cache) where {Mesh, Equations, - InitialCondition, - BoundaryConditions, - SourceTerms, - Solver, - Cache} - performance_counter = PerformanceCounter() - - new(mesh, equations, initial_condition, boundary_conditions, source_terms, - solver, cache, performance_counter) - end end """ @@ -71,6 +52,8 @@ function SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver check_periodicity_mesh_boundary_conditions(mesh, _boundary_conditions) + performance_counter = PerformanceCounter() + SemidiscretizationHyperbolic{typeof(mesh), typeof(equations), typeof(initial_condition), typeof(_boundary_conditions), typeof(source_terms), @@ -78,9 +61,13 @@ function SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver initial_condition, _boundary_conditions, source_terms, solver, - cache) + cache, + performance_counter) end +# @eval due to @muladd +@eval Adapt.@adapt_structure(SemidiscretizationHyperbolic) + # Create a new semidiscretization but change some parameters compared to the input. # `Base.similar` follows a related concept but would require us to `copy` the `mesh`, # which would impact the performance. Instead, `SciMLBase.remake` has exactly the diff --git a/src/solvers/dg.jl b/src/solvers/dg.jl index ad211b3c003..78f3901a346 100644 --- a/src/solvers/dg.jl +++ b/src/solvers/dg.jl @@ -415,6 +415,9 @@ struct DG{Basis, Mortar, SurfaceIntegral, VolumeIntegral} volume_integral::VolumeIntegral end +# @eval due to @muladd +@eval Adapt.@adapt_structure(DG) + function Base.show(io::IO, dg::DG) @nospecialize dg # reduce precompilation time diff --git a/src/solvers/dgsem/basis_lobatto_legendre.jl b/src/solvers/dgsem/basis_lobatto_legendre.jl index 777348aa8ce..9647f172e20 100644 --- a/src/solvers/dgsem/basis_lobatto_legendre.jl +++ b/src/solvers/dgsem/basis_lobatto_legendre.jl @@ -34,6 +34,32 @@ struct LobattoLegendreBasis{RealT <: Real, NNODES, # negative adjoint wrt the SBP dot product end +function Adapt.adapt_structure(to, basis::LobattoLegendreBasis) + inverse_vandermonde_legendre = adapt(to, basis.inverse_vandermonde_legendre) + RealT = eltype(inverse_vandermonde_legendre) + + nodes = SVector{<:Any, RealT}(basis.nodes) + weights = SVector{<:Any, RealT}(basis.weights) + inverse_weights = SVector{<:Any, RealT}(basis.inverse_weights) + boundary_interpolation = adapt(to, basis.boundary_interpolation) + derivative_matrix = adapt(to, basis.derivative_matrix) + derivative_split = adapt(to, basis.derivative_split) + derivative_split_transpose = adapt(to, basis.derivative_split_transpose) + derivative_dhat = adapt(to, basis.derivative_dhat) + return LobattoLegendreBasis{RealT, nnodes(basis), typeof(nodes), + typeof(inverse_vandermonde_legendre), + typeof(boundary_interpolation), + typeof(derivative_matrix)}(nodes, + weights, + inverse_weights, + inverse_vandermonde_legendre, + boundary_interpolation, + derivative_matrix, + derivative_split, + derivative_split_transpose, + derivative_dhat) +end + function LobattoLegendreBasis(RealT, polydeg::Integer) nnodes_ = polydeg + 1 @@ -155,6 +181,17 @@ struct LobattoLegendreMortarL2{RealT <: Real, NNODES, reverse_lower::ReverseMatrix end +function Adapt.adapt_structure(to, mortar::LobattoLegendreMortarL2) + forward_upper = adapt(to, mortar.forward_upper) + forward_lower = adapt(to, mortar.forward_lower) + reverse_upper = adapt(to, mortar.reverse_upper) + reverse_lower = adapt(to, mortar.reverse_lower) + return LobattoLegendreMortarL2{eltype(forward_upper), nnodes(mortar), + typeof(forward_upper), + typeof(reverse_upper)}(forward_upper, forward_lower, + reverse_upper, reverse_lower) +end + function MortarL2(basis::LobattoLegendreBasis) RealT = real(basis) nnodes_ = nnodes(basis) diff --git a/src/solvers/dgsem_p4est/containers.jl b/src/solvers/dgsem_p4est/containers.jl index a070db6b701..68e5b3d758b 100644 --- a/src/solvers/dgsem_p4est/containers.jl +++ b/src/solvers/dgsem_p4est/containers.jl @@ -6,25 +6,31 @@ #! format: noindent mutable struct P4estElementContainer{NDIMS, RealT <: Real, uEltype <: Real, NDIMSP1, - NDIMSP2, NDIMSP3} <: AbstractContainer + NDIMSP2, NDIMSP3, + ArrayNDIMSP1 <: DenseArray{RealT, NDIMSP1}, + ArrayNDIMSP2 <: DenseArray{RealT, NDIMSP2}, + ArrayNDIMSP3 <: DenseArray{RealT, NDIMSP3}, + VectorRealT <: DenseVector{RealT}, + VectoruEltype <: DenseVector{uEltype}} <: + AbstractContainer # Physical coordinates at each node - node_coordinates::Array{RealT, NDIMSP2} # [orientation, node_i, node_j, node_k, element] + node_coordinates::ArrayNDIMSP2 # [orientation, node_i, node_j, node_k, element] # Jacobian matrix of the transformation # [jacobian_i, jacobian_j, node_i, node_j, node_k, element] where jacobian_i is the first index of the Jacobian matrix,... - jacobian_matrix::Array{RealT, NDIMSP3} + jacobian_matrix::ArrayNDIMSP3 # Contravariant vectors, scaled by J, in Kopriva's blue book called Ja^i_n (i index, n dimension) - contravariant_vectors::Array{RealT, NDIMSP3} # [dimension, index, node_i, node_j, node_k, element] + contravariant_vectors::ArrayNDIMSP3 # [dimension, index, node_i, node_j, node_k, element] # 1/J where J is the Jacobian determinant (determinant of Jacobian matrix) - inverse_jacobian::Array{RealT, NDIMSP1} # [node_i, node_j, node_k, element] + inverse_jacobian::ArrayNDIMSP1 # [node_i, node_j, node_k, element] # Buffer for calculated surface flux - surface_flux_values::Array{uEltype, NDIMSP2} # [variable, i, j, direction, element] + surface_flux_values::ArrayNDIMSP2 # [variable, i, j, direction, element] # internal `resize!`able storage - _node_coordinates::Vector{RealT} - _jacobian_matrix::Vector{RealT} - _contravariant_vectors::Vector{RealT} - _inverse_jacobian::Vector{RealT} - _surface_flux_values::Vector{uEltype} + _node_coordinates::VectorRealT + _jacobian_matrix::VectorRealT + _contravariant_vectors::VectorRealT + _inverse_jacobian::VectorRealT + _surface_flux_values::VectoruEltype end @inline function nelements(elements::P4estElementContainer) @@ -36,7 +42,7 @@ end RealT, uEltype } - uEltype + return uEltype end # Only one-dimensional `Array`s are `resize!`able in Julia. @@ -51,28 +57,30 @@ function Base.resize!(elements::P4estElementContainer, capacity) n_dims = ndims(elements) n_nodes = size(elements.node_coordinates, 2) n_variables = size(elements.surface_flux_values, 1) + ArrayType = storage_type(elements) resize!(_node_coordinates, n_dims * n_nodes^n_dims * capacity) - elements.node_coordinates = unsafe_wrap(Array, pointer(_node_coordinates), + elements.node_coordinates = unsafe_wrap(ArrayType, pointer(_node_coordinates), (n_dims, ntuple(_ -> n_nodes, n_dims)..., capacity)) resize!(_jacobian_matrix, n_dims^2 * n_nodes^n_dims * capacity) - elements.jacobian_matrix = unsafe_wrap(Array, pointer(_jacobian_matrix), + elements.jacobian_matrix = unsafe_wrap(ArrayType, pointer(_jacobian_matrix), (n_dims, n_dims, ntuple(_ -> n_nodes, n_dims)..., capacity)) resize!(_contravariant_vectors, length(_jacobian_matrix)) - elements.contravariant_vectors = unsafe_wrap(Array, pointer(_contravariant_vectors), + elements.contravariant_vectors = unsafe_wrap(ArrayType, + pointer(_contravariant_vectors), size(elements.jacobian_matrix)) resize!(_inverse_jacobian, n_nodes^n_dims * capacity) - elements.inverse_jacobian = unsafe_wrap(Array, pointer(_inverse_jacobian), + elements.inverse_jacobian = unsafe_wrap(ArrayType, pointer(_inverse_jacobian), (ntuple(_ -> n_nodes, n_dims)..., capacity)) resize!(_surface_flux_values, n_variables * n_nodes^(n_dims - 1) * (n_dims * 2) * capacity) - elements.surface_flux_values = unsafe_wrap(Array, pointer(_surface_flux_values), + elements.surface_flux_values = unsafe_wrap(ArrayType, pointer(_surface_flux_values), (n_variables, ntuple(_ -> n_nodes, n_dims - 1)..., n_dims * 2, capacity)) @@ -117,33 +125,104 @@ function init_elements(mesh::Union{P4estMesh{NDIMS, NDIMS, RealT}, NDIMS * 2, nelements)) elements = P4estElementContainer{NDIMS, RealT, uEltype, NDIMS + 1, NDIMS + 2, - NDIMS + 3}(node_coordinates, jacobian_matrix, - contravariant_vectors, - inverse_jacobian, surface_flux_values, - _node_coordinates, _jacobian_matrix, - _contravariant_vectors, - _inverse_jacobian, _surface_flux_values) + NDIMS + 3, Array{RealT, NDIMS + 1}, + Array{RealT, NDIMS + 2}, Array{RealT, NDIMS + 3}, + Vector{RealT}, Vector{uEltype}}(node_coordinates, + jacobian_matrix, + contravariant_vectors, + inverse_jacobian, + surface_flux_values, + _node_coordinates, + _jacobian_matrix, + _contravariant_vectors, + _inverse_jacobian, + _surface_flux_values) init_elements!(elements, mesh, basis) return elements end -mutable struct P4estInterfaceContainer{NDIMS, uEltype <: Real, NDIMSP2} <: +function Adapt.parent_type(::Type{<:P4estElementContainer{<:Any, <:Any, <:Any, <:Any, + <:Any, <:Any, ArrayT}}) where {ArrayT} + ArrayT +end + +# Manual adapt_structure since we have aliasing memory +function Adapt.adapt_structure(to, + elements::P4estElementContainer{NDIMS}) where {NDIMS} + # Adapt underlying storage + _node_coordinates = adapt(to, elements._node_coordinates) + _jacobian_matrix = adapt(to, elements._jacobian_matrix) + _contravariant_vectors = adapt(to, elements._contravariant_vectors) + _inverse_jacobian = adapt(to, elements._inverse_jacobian) + _surface_flux_values = adapt(to, elements._surface_flux_values) + + RealT = eltype(_inverse_jacobian) + uEltype = eltype(_surface_flux_values) + + # Wrap arrays again + node_coordinates = unsafe_wrap_or_alloc(to, _node_coordinates, + size(elements.node_coordinates)) + jacobian_matrix = unsafe_wrap_or_alloc(to, _jacobian_matrix, + size(elements.jacobian_matrix)) + contravariant_vectors = unsafe_wrap_or_alloc(to, _contravariant_vectors, + size(jacobian_matrix)) + inverse_jacobian = unsafe_wrap_or_alloc(to, _inverse_jacobian, + size(elements.inverse_jacobian)) + surface_flux_values = unsafe_wrap_or_alloc(to, _surface_flux_values, + size(elements.surface_flux_values)) + + new_type_params = (NDIMS, + RealT, + uEltype, + NDIMS + 1, + NDIMS + 2, + NDIMS + 3, + typeof(inverse_jacobian), # ArrayNDIMSP1 + typeof(node_coordinates), # ArrayNDIMSP2 + typeof(jacobian_matrix), # ArrayNDIMSP3 + typeof(_node_coordinates), # VectorRealT + typeof(_surface_flux_values)) # VectoruEltype + return P4estElementContainer{new_type_params...}(node_coordinates, + jacobian_matrix, + contravariant_vectors, + inverse_jacobian, + surface_flux_values, + _node_coordinates, + _jacobian_matrix, + _contravariant_vectors, + _inverse_jacobian, + _surface_flux_values) +end + +mutable struct P4estInterfaceContainer{NDIMS, uEltype <: Real, NDIMSP2, + uArray <: DenseArray{uEltype, NDIMSP2}, + IdsMatrix <: DenseMatrix{Int}, + IndicesMatrix <: + DenseMatrix{NTuple{NDIMS, Symbol}}, + uVector <: DenseVector{uEltype}, + IdsVector <: DenseVector{Int}, + IndicesVector <: + DenseVector{NTuple{NDIMS, Symbol}}} <: AbstractContainer - u::Array{uEltype, NDIMSP2} # [primary/secondary, variable, i, j, interface] - neighbor_ids::Matrix{Int} # [primary/secondary, interface] - node_indices::Matrix{NTuple{NDIMS, Symbol}} # [primary/secondary, interface] + u::uArray # [primary/secondary, variable, i, j, interface] + neighbor_ids::IdsMatrix # [primary/secondary, interface] + node_indices::IndicesMatrix # [primary/secondary, interface] # internal `resize!`able storage - _u::Vector{uEltype} - _neighbor_ids::Vector{Int} - _node_indices::Vector{NTuple{NDIMS, Symbol}} + _u::uVector + _neighbor_ids::IdsVector + _node_indices::IndicesVector end @inline function ninterfaces(interfaces::P4estInterfaceContainer) size(interfaces.neighbor_ids, 2) end @inline Base.ndims(::P4estInterfaceContainer{NDIMS}) where {NDIMS} = NDIMS +@inline function Base.eltype(::P4estInterfaceContainer{NDIMS, uEltype}) where {NDIMS, + uEltype} + uEltype +end # See explanation of Base.resize! for the element container function Base.resize!(interfaces::P4estInterfaceContainer, capacity) @@ -152,17 +231,20 @@ function Base.resize!(interfaces::P4estInterfaceContainer, capacity) n_dims = ndims(interfaces) n_nodes = size(interfaces.u, 3) n_variables = size(interfaces.u, 2) + ArrayType = storage_type(interfaces) resize!(_u, 2 * n_variables * n_nodes^(n_dims - 1) * capacity) - interfaces.u = unsafe_wrap(Array, pointer(_u), + interfaces.u = unsafe_wrap(ArrayType, pointer(_u), (2, n_variables, ntuple(_ -> n_nodes, n_dims - 1)..., capacity)) resize!(_neighbor_ids, 2 * capacity) - interfaces.neighbor_ids = unsafe_wrap(Array, pointer(_neighbor_ids), (2, capacity)) + interfaces.neighbor_ids = unsafe_wrap(ArrayType, pointer(_neighbor_ids), + (2, capacity)) resize!(_node_indices, 2 * capacity) - interfaces.node_indices = unsafe_wrap(Array, pointer(_node_indices), (2, capacity)) + interfaces.node_indices = unsafe_wrap(ArrayType, pointer(_node_indices), + (2, capacity)) return nothing end @@ -189,10 +271,15 @@ function init_interfaces(mesh::Union{P4estMesh, P4estMeshView, T8codeMesh}, equa _node_indices = Vector{NTuple{NDIMS, Symbol}}(undef, 2 * n_interfaces) node_indices = unsafe_wrap(Array, pointer(_node_indices), (2, n_interfaces)) - interfaces = P4estInterfaceContainer{NDIMS, uEltype, NDIMS + 2}(u, neighbor_ids, - node_indices, - _u, _neighbor_ids, - _node_indices) + interfaces = P4estInterfaceContainer{NDIMS, uEltype, NDIMS + 2, + typeof(u), typeof(neighbor_ids), + typeof(node_indices), typeof(_u), + typeof(_neighbor_ids), typeof(_node_indices)}(u, + neighbor_ids, + node_indices, + _u, + _neighbor_ids, + _node_indices) init_interfaces!(interfaces, mesh) @@ -205,21 +292,58 @@ function init_interfaces!(interfaces, mesh::Union{P4estMesh, P4estMeshView}) return interfaces end -mutable struct P4estBoundaryContainer{NDIMS, uEltype <: Real, NDIMSP1} <: +function Adapt.parent_type(::Type{<:P4estInterfaceContainer{<:Any, <:Any, <:Any, + ArrayT}}) where {ArrayT} + ArrayT +end + +# Manual adapt_structure since we have aliasing memory +function Adapt.adapt_structure(to, interfaces::P4estInterfaceContainer) + # Adapt underlying storage + _u = adapt(to, interfaces._u) + _neighbor_ids = adapt(to, interfaces._neighbor_ids) + _node_indices = adapt(to, interfaces._node_indices) + # Wrap arrays again + u = unsafe_wrap_or_alloc(to, _u, size(interfaces.u)) + neighbor_ids = unsafe_wrap_or_alloc(to, _neighbor_ids, + size(interfaces.neighbor_ids)) + node_indices = unsafe_wrap_or_alloc(to, _node_indices, + size(interfaces.node_indices)) + + NDIMS = ndims(interfaces) + new_type_params = (NDIMS, + eltype(_u), + NDIMS + 2, + typeof(u), typeof(neighbor_ids), typeof(node_indices), + typeof(_u), typeof(_neighbor_ids), typeof(_node_indices)) + return P4estInterfaceContainer{new_type_params...}(u, neighbor_ids, node_indices, + _u, _neighbor_ids, _node_indices) +end + +mutable struct P4estBoundaryContainer{NDIMS, uEltype <: Real, NDIMSP1, + uArray <: DenseArray{uEltype, NDIMSP1}, + IdsVector <: DenseVector{Int}, + IndicesVector <: + DenseVector{NTuple{NDIMS, Symbol}}, + uVector <: DenseVector{uEltype}} <: AbstractContainer - u::Array{uEltype, NDIMSP1} # [variables, i, j, boundary] - neighbor_ids::Vector{Int} # [boundary] - node_indices::Vector{NTuple{NDIMS, Symbol}} # [boundary] + u::uArray # [variables, i, j, boundary] + neighbor_ids::IdsVector # [boundary] + node_indices::IndicesVector # [boundary] name::Vector{Symbol} # [boundary] # internal `resize!`able storage - _u::Vector{uEltype} + _u::uVector end @inline function nboundaries(boundaries::P4estBoundaryContainer) length(boundaries.neighbor_ids) end @inline Base.ndims(::P4estBoundaryContainer{NDIMS}) where {NDIMS} = NDIMS +@inline function Base.eltype(::P4estBoundaryContainer{NDIMS, uEltype}) where {NDIMS, + uEltype} + uEltype +end # See explanation of Base.resize! for the element container function Base.resize!(boundaries::P4estBoundaryContainer, capacity) @@ -228,9 +352,10 @@ function Base.resize!(boundaries::P4estBoundaryContainer, capacity) n_dims = ndims(boundaries) n_nodes = size(boundaries.u, 2) n_variables = size(boundaries.u, 1) + ArrayType = storage_type(boundaries) resize!(_u, n_variables * n_nodes^(n_dims - 1) * capacity) - boundaries.u = unsafe_wrap(Array, pointer(_u), + boundaries.u = unsafe_wrap(ArrayType, pointer(_u), (n_variables, ntuple(_ -> n_nodes, n_dims - 1)..., capacity)) @@ -263,9 +388,11 @@ function init_boundaries(mesh::Union{P4estMesh, P4estMeshView, T8codeMesh}, equa node_indices = Vector{NTuple{NDIMS, Symbol}}(undef, n_boundaries) names = Vector{Symbol}(undef, n_boundaries) - boundaries = P4estBoundaryContainer{NDIMS, uEltype, NDIMS + 1}(u, neighbor_ids, - node_indices, names, - _u) + boundaries = P4estBoundaryContainer{NDIMS, uEltype, NDIMS + 1, typeof(u), + typeof(neighbor_ids), typeof(node_indices), + typeof(_u)}(u, neighbor_ids, + node_indices, names, + _u) if n_boundaries > 0 init_boundaries!(boundaries, mesh) @@ -312,6 +439,25 @@ function init_boundaries_iter_face_inner(info_pw, boundaries, boundary_id, mesh) return nothing end +function Adapt.parent_type(::Type{<:P4estBoundaryContainer{<:Any, <:Any, <:Any, ArrayT}}) where {ArrayT} + ArrayT +end + +# Manual adapt_structure since we have aliasing memory +function Adapt.adapt_structure(to, boundaries::P4estBoundaryContainer) + _u = adapt(to, boundaries._u) + u = unsafe_wrap_or_alloc(to, _u, size(boundaries.u)) + neighbor_ids = adapt(to, boundaries.neighbor_ids) + node_indices = adapt(to, boundaries.node_indices) + name = boundaries.name + + NDIMS = ndims(boundaries) + return P4estBoundaryContainer{NDIMS, eltype(_u), NDIMS + 1, typeof(u), + typeof(neighbor_ids), typeof(node_indices), + typeof(_u)}(u, neighbor_ids, node_indices, + name, _u) +end + # Container data structure (structure-of-arrays style) for DG L2 mortars # # The positions used in `neighbor_ids` are 1:3 (in 2D) or 1:5 (in 3D), where 1:2 (in 2D) @@ -337,20 +483,32 @@ end # │ └─────────────┴─────────────┘ └───────────────────────────┘ # │ # ⋅────> ξ -mutable struct P4estMortarContainer{NDIMS, uEltype <: Real, NDIMSP1, NDIMSP3} <: +mutable struct P4estMortarContainer{NDIMS, uEltype <: Real, NDIMSP1, NDIMSP3, + uArray <: DenseArray{uEltype, NDIMSP3}, + IdsMatrix <: DenseMatrix{Int}, + IndicesMatrix <: + DenseMatrix{NTuple{NDIMS, Symbol}}, + uVector <: DenseVector{uEltype}, + IdsVector <: DenseVector{Int}, + IndicesVector <: + DenseVector{NTuple{NDIMS, Symbol}}} <: AbstractContainer - u::Array{uEltype, NDIMSP3} # [small/large side, variable, position, i, j, mortar] - neighbor_ids::Matrix{Int} # [position, mortar] - node_indices::Matrix{NTuple{NDIMS, Symbol}} # [small/large, mortar] + u::uArray # [small/large side, variable, position, i, j, mortar] + neighbor_ids::IdsMatrix # [position, mortar] + node_indices::IndicesMatrix # [small/large, mortar] # internal `resize!`able storage - _u::Vector{uEltype} - _neighbor_ids::Vector{Int} - _node_indices::Vector{NTuple{NDIMS, Symbol}} + _u::uVector + _neighbor_ids::IdsVector + _node_indices::IndicesVector end @inline nmortars(mortars::P4estMortarContainer) = size(mortars.neighbor_ids, 2) @inline Base.ndims(::P4estMortarContainer{NDIMS}) where {NDIMS} = NDIMS +@inline function Base.eltype(::P4estMortarContainer{NDIMS, uEltype}) where {NDIMS, + uEltype} + uEltype +end # See explanation of Base.resize! for the element container function Base.resize!(mortars::P4estMortarContainer, capacity) @@ -359,18 +517,19 @@ function Base.resize!(mortars::P4estMortarContainer, capacity) n_dims = ndims(mortars) n_nodes = size(mortars.u, 4) n_variables = size(mortars.u, 2) + ArrayType = storage_type(mortars) resize!(_u, 2 * n_variables * 2^(n_dims - 1) * n_nodes^(n_dims - 1) * capacity) - mortars.u = unsafe_wrap(Array, pointer(_u), + mortars.u = unsafe_wrap(ArrayType, pointer(_u), (2, n_variables, 2^(n_dims - 1), ntuple(_ -> n_nodes, n_dims - 1)..., capacity)) resize!(_neighbor_ids, (2^(n_dims - 1) + 1) * capacity) - mortars.neighbor_ids = unsafe_wrap(Array, pointer(_neighbor_ids), + mortars.neighbor_ids = unsafe_wrap(ArrayType, pointer(_neighbor_ids), (2^(n_dims - 1) + 1, capacity)) resize!(_node_indices, 2 * capacity) - mortars.node_indices = unsafe_wrap(Array, pointer(_node_indices), (2, capacity)) + mortars.node_indices = unsafe_wrap(ArrayType, pointer(_node_indices), (2, capacity)) return nothing end @@ -398,12 +557,15 @@ function init_mortars(mesh::Union{P4estMesh, P4estMeshView, T8codeMesh}, equatio _node_indices = Vector{NTuple{NDIMS, Symbol}}(undef, 2 * n_mortars) node_indices = unsafe_wrap(Array, pointer(_node_indices), (2, n_mortars)) - mortars = P4estMortarContainer{NDIMS, uEltype, NDIMS + 1, NDIMS + 3}(u, - neighbor_ids, - node_indices, - _u, - _neighbor_ids, - _node_indices) + mortars = P4estMortarContainer{NDIMS, uEltype, NDIMS + 1, NDIMS + 3, typeof(u), + typeof(neighbor_ids), typeof(node_indices), + typeof(_u), typeof(_neighbor_ids), + typeof(_node_indices)}(u, + neighbor_ids, + node_indices, + _u, + _neighbor_ids, + _node_indices) if n_mortars > 0 init_mortars!(mortars, mesh) @@ -418,6 +580,34 @@ function init_mortars!(mortars, mesh::Union{P4estMesh, P4estMeshView}) return mortars end +function Adapt.parent_type(::Type{<:P4estMortarContainer{<:Any, <:Any, <:Any, <:Any, + ArrayT}}) where {ArrayT} + ArrayT +end + +# Manual adapt_structure since we have aliasing memory +function Adapt.adapt_structure(to, mortars::P4estMortarContainer) + # Adapt underlying storage + _u = adapt(to, mortars._u) + _neighbor_ids = adapt(to, mortars._neighbor_ids) + _node_indices = adapt(to, mortars._node_indices) + + # Wrap arrays again + u = unsafe_wrap_or_alloc(to, _u, size(mortars.u)) + neighbor_ids = unsafe_wrap_or_alloc(to, _neighbor_ids, size(mortars.neighbor_ids)) + node_indices = unsafe_wrap_or_alloc(to, _node_indices, size(mortars.node_indices)) + + NDIMS = ndims(mortars) + new_type_params = (NDIMS, + eltype(_u), + NDIMS + 1, + NDIMS + 3, + typeof(u), typeof(neighbor_ids), typeof(node_indices), + typeof(_u), typeof(_neighbor_ids), typeof(_node_indices)) + return P4estMortarContainer{new_type_params...}(u, neighbor_ids, node_indices, + _u, _neighbor_ids, _node_indices) +end + function reinitialize_containers!(mesh::P4estMesh, equations, dg::DGSEM, cache) # Re-initialize elements container @unpack elements = cache diff --git a/src/solvers/dgsem_p4est/containers_parallel.jl b/src/solvers/dgsem_p4est/containers_parallel.jl index 676b37efff3..cb9cd1ffc95 100644 --- a/src/solvers/dgsem_p4est/containers_parallel.jl +++ b/src/solvers/dgsem_p4est/containers_parallel.jl @@ -5,15 +5,19 @@ @muladd begin #! format: noindent -mutable struct P4estMPIInterfaceContainer{NDIMS, uEltype <: Real, NDIMSP2} <: +mutable struct P4estMPIInterfaceContainer{NDIMS, uEltype <: Real, NDIMSP2, + uArray <: DenseArray{uEltype, NDIMSP2}, + VecInt <: DenseVector{Int}, + IndicesVector <: + DenseVector{NTuple{NDIMS, Symbol}}, + uVector <: DenseVector{uEltype}} <: AbstractContainer - u::Array{uEltype, NDIMSP2} # [primary/secondary, variable, i, j, interface] - local_neighbor_ids::Vector{Int} # [interface] - node_indices::Vector{NTuple{NDIMS, Symbol}} # [interface] - local_sides::Vector{Int} # [interface] - + u::uArray # [primary/secondary, variable, i, j, interface] + local_neighbor_ids::VecInt # [interface] + node_indices::IndicesVector # [interface] + local_sides::VecInt # [interface] # internal `resize!`able storage - _u::Vector{uEltype} + _u::uVector end @inline function nmpiinterfaces(interfaces::P4estMPIInterfaceContainer) @@ -27,9 +31,10 @@ function Base.resize!(mpi_interfaces::P4estMPIInterfaceContainer, capacity) n_dims = ndims(mpi_interfaces) n_nodes = size(mpi_interfaces.u, 3) n_variables = size(mpi_interfaces.u, 2) + ArrayType = storage_type(mpi_interfaces) resize!(_u, 2 * n_variables * n_nodes^(n_dims - 1) * capacity) - mpi_interfaces.u = unsafe_wrap(Array, pointer(_u), + mpi_interfaces.u = unsafe_wrap(ArrayType, pointer(_u), (2, n_variables, ntuple(_ -> n_nodes, n_dims - 1)..., capacity)) @@ -64,11 +69,13 @@ function init_mpi_interfaces(mesh::Union{ParallelP4estMesh, ParallelT8codeMesh}, local_sides = Vector{Int}(undef, n_mpi_interfaces) - mpi_interfaces = P4estMPIInterfaceContainer{NDIMS, uEltype, NDIMS + 2}(u, - local_neighbor_ids, - node_indices, - local_sides, - _u) + mpi_interfaces = P4estMPIInterfaceContainer{NDIMS, uEltype, NDIMS + 2, + typeof(u), typeof(local_neighbor_ids), + typeof(node_indices), typeof(_u)}(u, + local_neighbor_ids, + node_indices, + local_sides, + _u) init_mpi_interfaces!(mpi_interfaces, mesh) @@ -81,6 +88,32 @@ function init_mpi_interfaces!(mpi_interfaces, mesh::ParallelP4estMesh) return mpi_interfaces end +function Adapt.parent_type(::Type{<:Trixi.P4estMPIInterfaceContainer{<:Any, <:Any, + <:Any, A}}) where {A} + return A +end + +# Manual adapt_structure since we have aliasing memory +function Adapt.adapt_structure(to, mpi_interfaces::P4estMPIInterfaceContainer) + # Adapt Vectors and underlying storage + _u = adapt(to, mpi_interfaces._u) + local_neighbor_ids = adapt(to, mpi_interfaces.local_neighbor_ids) + node_indices = adapt(to, mpi_interfaces.node_indices) + local_sides = adapt(to, mpi_interfaces.local_sides) + + # Wrap array again + u = unsafe_wrap_or_alloc(to, _u, size(mpi_interfaces.u)) + + NDIMS = ndims(mpi_interfaces) + return P4estMPIInterfaceContainer{NDIMS, eltype(u), + NDIMS + 2, + typeof(u), typeof(local_neighbor_ids), + typeof(node_indices), typeof(_u)}(u, + local_neighbor_ids, + node_indices, + local_sides, _u) +end + # Container data structure (structure-of-arrays style) for DG L2 mortars # # Similar to `P4estMortarContainer`. The field `neighbor_ids` has been split up into @@ -88,14 +121,17 @@ end # available elements belonging to a particular MPI mortar. Furthermore, `normal_directions` holds # the normal vectors on the surface of the small elements for each mortar. mutable struct P4estMPIMortarContainer{NDIMS, uEltype <: Real, RealT <: Real, NDIMSP1, - NDIMSP2, NDIMSP3} <: AbstractContainer - u::Array{uEltype, NDIMSP3} # [small/large side, variable, position, i, j, mortar] - local_neighbor_ids::Vector{Vector{Int}} # [mortar][ids] - local_neighbor_positions::Vector{Vector{Int}} # [mortar][positions] - node_indices::Matrix{NTuple{NDIMS, Symbol}} # [small/large, mortar] - normal_directions::Array{RealT, NDIMSP2} # [dimension, i, j, position, mortar] + NDIMSP2, NDIMSP3, + uArray <: DenseArray{uEltype, NDIMSP3}, + uVector <: DenseVector{uEltype}} <: + AbstractContainer + u::uArray # [small/large side, variable, position, i, j, mortar] + local_neighbor_ids::Vector{Vector{Int}} # [mortar][ids] + local_neighbor_positions::Vector{Vector{Int}} # [mortar][positions] + node_indices::Matrix{NTuple{NDIMS, Symbol}} # [small/large, mortar] + normal_directions::Array{RealT, NDIMSP2} # [dimension, i, j, position, mortar] # internal `resize!`able storage - _u::Vector{uEltype} + _u::uVector _node_indices::Vector{NTuple{NDIMS, Symbol}} _normal_directions::Vector{RealT} end @@ -164,11 +200,12 @@ function init_mpi_mortars(mesh::Union{ParallelP4estMesh, ParallelT8codeMesh}, eq 2^(NDIMS - 1), n_mpi_mortars)) mpi_mortars = P4estMPIMortarContainer{NDIMS, uEltype, RealT, NDIMS + 1, NDIMS + 2, - NDIMS + 3}(u, local_neighbor_ids, - local_neighbor_positions, - node_indices, normal_directions, - _u, _node_indices, - _normal_directions) + NDIMS + 3, typeof(u), + typeof(_u)}(u, local_neighbor_ids, + local_neighbor_positions, + node_indices, normal_directions, + _u, _node_indices, + _normal_directions) if n_mpi_mortars > 0 init_mpi_mortars!(mpi_mortars, mesh, basis, elements) @@ -184,6 +221,33 @@ function init_mpi_mortars!(mpi_mortars, mesh::ParallelP4estMesh, basis, elements return mpi_mortars end +function Adapt.adapt_structure(to, mpi_mortars::P4estMPIMortarContainer) + # TODO: Vector of Vector type data structure does not work on GPUs, + # must be redesigned. This skeleton implementation here just exists just + # for compatibility with the rest of the KA.jl solver code + + _u = adapt(to, mpi_mortars._u) + _node_indices = mpi_mortars._node_indices + _normal_directions = mpi_mortars._normal_directions + + u = unsafe_wrap_or_alloc(to, _u, size(mpi_mortars.u)) + local_neighbor_ids = mpi_mortars.local_neighbor_ids + local_neighbor_positions = mpi_mortars.local_neighbor_positions + node_indices = mpi_mortars.node_indices + normal_directions = mpi_mortars.normal_directions + + NDIMS = ndims(mpi_mortars) + return P4estMPIMortarContainer{NDIMS, eltype(_u), + eltype(_normal_directions), + NDIMS + 1, NDIMS + 2, NDIMS + 3, + typeof(u), typeof(_u)}(u, local_neighbor_ids, + local_neighbor_positions, + node_indices, + normal_directions, _u, + _node_indices, + _normal_directions) +end + # Overload init! function for regular interfaces, regular mortars and boundaries since they must # call the appropriate init_surfaces! function for parallel p4est meshes function init_interfaces!(interfaces, mesh::ParallelP4estMesh) diff --git a/src/solvers/dgsem_p4est/dg_parallel.jl b/src/solvers/dgsem_p4est/dg_parallel.jl index 2cc201dd1f0..7acddf07b4b 100644 --- a/src/solvers/dgsem_p4est/dg_parallel.jl +++ b/src/solvers/dgsem_p4est/dg_parallel.jl @@ -5,12 +5,13 @@ @muladd begin #! format: noindent -mutable struct P4estMPICache{uEltype} +mutable struct P4estMPICache{BufferType <: DenseVector, + VecInt <: DenseVector{<:Integer}} mpi_neighbor_ranks::Vector{Int} - mpi_neighbor_interfaces::Vector{Vector{Int}} - mpi_neighbor_mortars::Vector{Vector{Int}} - mpi_send_buffers::Vector{Vector{uEltype}} - mpi_recv_buffers::Vector{Vector{uEltype}} + mpi_neighbor_interfaces::VecOfArrays{VecInt} + mpi_neighbor_mortars::VecOfArrays{VecInt} + mpi_send_buffers::VecOfArrays{BufferType} + mpi_recv_buffers::VecOfArrays{BufferType} mpi_send_requests::Vector{MPI.Request} mpi_recv_requests::Vector{MPI.Request} n_elements_by_rank::OffsetArray{Int, 1, Array{Int, 1}} @@ -25,25 +26,29 @@ function P4estMPICache(uEltype) end mpi_neighbor_ranks = Vector{Int}(undef, 0) - mpi_neighbor_interfaces = Vector{Vector{Int}}(undef, 0) - mpi_neighbor_mortars = Vector{Vector{Int}}(undef, 0) - mpi_send_buffers = Vector{Vector{uEltype}}(undef, 0) - mpi_recv_buffers = Vector{Vector{uEltype}}(undef, 0) + mpi_neighbor_interfaces = Vector{Vector{Int}}(undef, 0) |> VecOfArrays + mpi_neighbor_mortars = Vector{Vector{Int}}(undef, 0) |> VecOfArrays + mpi_send_buffers = Vector{Vector{uEltype}}(undef, 0) |> VecOfArrays + mpi_recv_buffers = Vector{Vector{uEltype}}(undef, 0) |> VecOfArrays mpi_send_requests = Vector{MPI.Request}(undef, 0) mpi_recv_requests = Vector{MPI.Request}(undef, 0) n_elements_by_rank = OffsetArray(Vector{Int}(undef, 0), 0:-1) n_elements_global = 0 first_element_global_id = 0 - P4estMPICache{uEltype}(mpi_neighbor_ranks, mpi_neighbor_interfaces, - mpi_neighbor_mortars, - mpi_send_buffers, mpi_recv_buffers, - mpi_send_requests, mpi_recv_requests, - n_elements_by_rank, n_elements_global, - first_element_global_id) + P4estMPICache{Vector{uEltype}, Vector{Int}}(mpi_neighbor_ranks, + mpi_neighbor_interfaces, + mpi_neighbor_mortars, + mpi_send_buffers, mpi_recv_buffers, + mpi_send_requests, mpi_recv_requests, + n_elements_by_rank, n_elements_global, + first_element_global_id) end -@inline Base.eltype(::P4estMPICache{uEltype}) where {uEltype} = uEltype +@inline Base.eltype(::P4estMPICache{BufferType}) where {BufferType} = eltype(BufferType) + +# @eval due to @muladd +@eval Adapt.@adapt_structure(P4estMPICache) ## # Note that the code in `start_mpi_send`/`finish_mpi_receive!` is sensitive to inference on (at least) Julia 1.10. @@ -265,16 +270,16 @@ end function init_mpi_cache!(mpi_cache::P4estMPICache, mesh::ParallelP4estMesh, mpi_interfaces, mpi_mortars, nvars, n_nodes, uEltype) - mpi_neighbor_ranks, mpi_neighbor_interfaces, mpi_neighbor_mortars = init_mpi_neighbor_connectivity(mpi_interfaces, - mpi_mortars, - mesh) + mpi_neighbor_ranks, _mpi_neighbor_interfaces, _mpi_neighbor_mortars = init_mpi_neighbor_connectivity(mpi_interfaces, + mpi_mortars, + mesh) - mpi_send_buffers, mpi_recv_buffers, mpi_send_requests, mpi_recv_requests = init_mpi_data_structures(mpi_neighbor_interfaces, - mpi_neighbor_mortars, - ndims(mesh), - nvars, - n_nodes, - uEltype) + _mpi_send_buffers, _mpi_recv_buffers, mpi_send_requests, mpi_recv_requests = init_mpi_data_structures(_mpi_neighbor_interfaces, + _mpi_neighbor_mortars, + ndims(mesh), + nvars, + n_nodes, + uEltype) # Determine local and total number of elements n_elements_global = Int(mesh.p4est.global_num_quadrants[]) @@ -286,6 +291,11 @@ function init_mpi_cache!(mpi_cache::P4estMPICache, mesh::ParallelP4estMesh, first_element_global_id = Int(mesh.p4est.global_first_quadrant[mpi_rank() + 1]) + 1 @assert n_elements_global==sum(n_elements_by_rank) "error in total number of elements" + mpi_neighbor_interfaces = VecOfArrays(_mpi_neighbor_interfaces) + mpi_neighbor_mortars = VecOfArrays(_mpi_neighbor_mortars) + mpi_send_buffers = VecOfArrays(_mpi_send_buffers) + mpi_recv_buffers = VecOfArrays(_mpi_recv_buffers) + # TODO reuse existing structures @pack! mpi_cache = mpi_neighbor_ranks, mpi_neighbor_interfaces, mpi_neighbor_mortars, diff --git a/src/solvers/dgsem_unstructured/sort_boundary_conditions.jl b/src/solvers/dgsem_unstructured/sort_boundary_conditions.jl index 0cb3bd7f409..d6cf6e1ce6d 100644 --- a/src/solvers/dgsem_unstructured/sort_boundary_conditions.jl +++ b/src/solvers/dgsem_unstructured/sort_boundary_conditions.jl @@ -13,9 +13,10 @@ It stores a set of global indices for each boundary condition type and name to e during the call to `calc_boundary_flux!`. The original dictionary form of the boundary conditions set by the user in the elixir file is also stored for printing. """ -mutable struct UnstructuredSortedBoundaryTypes{N, BCs <: NTuple{N, Any}} +mutable struct UnstructuredSortedBoundaryTypes{N, BCs <: NTuple{N, Any}, + Vec <: AbstractVector{<:Integer}} boundary_condition_types::BCs # specific boundary condition type(s), e.g. BoundaryConditionDirichlet - boundary_indices::NTuple{N, Vector{Int}} # integer vectors containing global boundary indices + boundary_indices::NTuple{N, Vec} # integer vectors containing global boundary indices boundary_dictionary::Dict{Symbol, Any} # boundary conditions as set by the user in the elixir file boundary_symbol_indices::Dict{Symbol, Vector{Int}} # integer vectors containing global boundary indices per boundary identifier end @@ -33,10 +34,11 @@ function UnstructuredSortedBoundaryTypes(boundary_conditions::Dict, cache) boundary_symbol_indices = Dict{Symbol, Vector{Int}}() container = UnstructuredSortedBoundaryTypes{n_boundary_types, - typeof(boundary_condition_types)}(boundary_condition_types, - boundary_indices, - boundary_conditions, - boundary_symbol_indices) + typeof(boundary_condition_types), + Vector{Int}}(boundary_condition_types, + boundary_indices, + boundary_conditions, + boundary_symbol_indices) initialize!(container, cache) end @@ -119,4 +121,7 @@ function initialize!(boundary_types_container::UnstructuredSortedBoundaryTypes{N return boundary_types_container end + +# @eval due to @muladd +@eval Adapt.@adapt_structure(UnstructuredSortedBoundaryTypes) end # @muladd diff --git a/test/Project.toml b/test/Project.toml index cd1c122a18a..94683d362f5 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -1,5 +1,6 @@ [deps] ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b" +Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595" CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0" Convex = "f65535da-76fb-5f13-bab9-19810c17039a" diff --git a/test/test_p4est_2d.jl b/test/test_p4est_2d.jl index 577344d1a4a..7425d243111 100644 --- a/test/test_p4est_2d.jl +++ b/test/test_p4est_2d.jl @@ -27,6 +27,12 @@ isdir(outdir) && rm(outdir, recursive = true) du_ode = similar(u_ode) @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000 end + semi32 = Trixi.trixi_adapt(Array, Float32, semi) + @test real(semi32.solver) == Float32 + @test real(semi32.solver.basis) == Float32 + @test real(semi32.solver.mortar) == Float32 + # TODO: remake ignores the mesh itself as well + @test real(semi32.mesh) == Float64 end @trixi_testset "elixir_advection_nonconforming_flag.jl" begin diff --git a/test/test_unstructured_2d.jl b/test/test_unstructured_2d.jl index 07a79f883d3..0d13ecaa821 100644 --- a/test/test_unstructured_2d.jl +++ b/test/test_unstructured_2d.jl @@ -2,6 +2,7 @@ module TestExamplesUnstructuredMesh2D using Test using Trixi +using Adapt include("test_trixi.jl") @@ -32,6 +33,12 @@ isdir(outdir) && rm(outdir, recursive = true) du_ode = similar(u_ode) @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000 end + semi32 = Trixi.trixi_adapt(Array, Float32, semi) + @test real(semi32.solver) == Float32 + @test real(semi32.solver.basis) == Float32 + @test real(semi32.solver.mortar) == Float32 + # TODO: remake ignores the mesh as well + @test real(semi32.mesh) == Float64 end @trixi_testset "elixir_euler_free_stream.jl" begin From cf2f5905a8ac55427c14666f742e8bc9001c31c0 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Mon, 21 Apr 2025 18:37:41 +0200 Subject: [PATCH 015/158] add docs and CUDAExt --- Project.toml | 7 +++- docs/make.jl | 3 +- docs/src/heterogeneous.md | 82 +++++++++++++++++++++++++++++++++++++++ ext/TrixiCUDAExt.jl | 11 ++++++ 4 files changed, 100 insertions(+), 3 deletions(-) create mode 100644 docs/src/heterogeneous.md create mode 100644 ext/TrixiCUDAExt.jl diff --git a/Project.toml b/Project.toml index e10c47ff1be..0c53ef69666 100644 --- a/Project.toml +++ b/Project.toml @@ -4,8 +4,8 @@ authors = ["Michael Schlottke-Lakemper ", " version = "0.12.5-DEV" [deps] -Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" Accessors = "7d9f7c33-5ae7-4f3b-8dc6-eff91059b697" +Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" CodeTracking = "da1fd8a2-8d9e-5ec2-8556-3022fb5608a2" ConstructionBase = "187b0558-2788-49d3-abe0-74a17ed4e7c9" DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8" @@ -57,15 +57,18 @@ Convex = "f65535da-76fb-5f13-bab9-19810c17039a" ECOS = "e2685f51-7e38-5353-a97d-a921fd2c8199" Makie = "ee78f7c6-11fb-53f2-987a-cfe4a2b5a57a" NLsolve = "2774e3e8-f4cf-5e23-947b-6d7e65073b56" +CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" [extensions] TrixiConvexECOSExt = ["Convex", "ECOS"] TrixiMakieExt = "Makie" TrixiNLsolveExt = "NLsolve" +TrixiCUDAExt = "CUDA" [compat] -Adapt = "4" Accessors = "0.1.36" +Adapt = "4" +CUDA = "5" CodeTracking = "1.0.5" ConstructionBase = "1.5" Convex = "0.16" diff --git a/docs/make.jl b/docs/make.jl index 7111b66ab94..0301f5ba64e 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -163,7 +163,8 @@ makedocs( "Style guide" => "styleguide.md", "Testing" => "testing.md", "Performance" => "performance.md", - "Parallelization" => "parallelization.md" + "Parallelization" => "parallelization.md", + "Heterogeneous" => "heterogeneous.md" ], "Troubleshooting and FAQ" => "troubleshooting.md", "Reference" => [ diff --git a/docs/src/heterogeneous.md b/docs/src/heterogeneous.md new file mode 100644 index 00000000000..60bda029a40 --- /dev/null +++ b/docs/src/heterogeneous.md @@ -0,0 +1,82 @@ +# Heterogeneous computing + +Support for heterogeneous computing is currently being worked on. + +## The use of Adapt.jl + +[`Adapt.jl`](https://github.com/JuliaGPU/Adapt.jl) is a package in the JuliaGPU family that allows for +the translation of nested data structures. The primary goal is to allow the substitution of `Array` +at the storage leaves with a GPU array like `CuArray`. + +To facilitate this data structures must be parameterized, so instead of: + +```julia +struct Container + data::Array{Float64,2} +end +``` + +They must be written as: + +```julia +struct Container{D<:AbstractArray} <: Trixi.AbstractContainer + data::D +end +``` + +furthermore, we need to define a function that allows for the conversion of storage +of our types: + +```julia +function Adapt.adapt_structure(to, C::Container) + return Container(adapt(to, C.data)) +end +``` + +or simply + +```julia +Adapt.@adapt_structure(Container) +``` + +additionally, we must define `Adapt.parent_type`. + +```julia +function Adapt.parent_type(::Type{<:Container{D}}) where D + return D +end +``` + +```julia-repl +julia> C = Container(zeros(3)) +Container{Vector{Float64}}([0.0, 0.0, 0.0]) + +julia> Trixi.storage_type(C) +Array + +julia> using CUDA + +julia> GPU_C = adapt(CuArray, C) +Container{CuArray{Float64, 1, CUDA.DeviceMemory}}([0.0, 0.0, 0.0]) + +julia> Trixi.storage_type(C) +CuArray +``` + +## Element-type conversion with `Trixi.trixi_adapt`. + +We can use Trixi.trixi_adapt to perform both an element-type and a storage-type adoption + +```julia-repl +julia> C = Container(zeros(3)) +Container{Vector{Float64}}([0.0, 0.0, 0.0]) + +julia> Trixi.trixi_adapt(Array, Float32, C) +Container{Vector{Float32}}(Float32[0.0, 0.0, 0.0]) + +julia> Trixi.trixi_adapt(CuArray, Float32, C) +Container{CuArray{Float32, 1, CUDA.DeviceMemory}}(Float32[0.0, 0.0, 0.0]) +``` + +!!! note + `adapt(Array{Float32}, C)` is tempting but will do the wrong thing in the presence of `StaticArrays`. \ No newline at end of file diff --git a/ext/TrixiCUDAExt.jl b/ext/TrixiCUDAExt.jl new file mode 100644 index 00000000000..681d2f53a1e --- /dev/null +++ b/ext/TrixiCUDAExt.jl @@ -0,0 +1,11 @@ +# Package extension for adding CUDA-based features to Trixi.jl +module TrixiCUDAExt + +import CUDA: CuArray +import Trixi + +function Trixi.storage_type(::Type{<:CuArray}) + return CuArray +end + +end From de96f850444b875767a114921569be25df027d1e Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Mon, 21 Apr 2025 21:35:04 +0200 Subject: [PATCH 016/158] Aqua set unbound_args --- test/test_aqua.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/test/test_aqua.jl b/test/test_aqua.jl index 9b3f2d67903..154088995ca 100644 --- a/test/test_aqua.jl +++ b/test/test_aqua.jl @@ -10,6 +10,7 @@ include("test_trixi.jl") @timed_testset "Aqua.jl" begin Aqua.test_all(Trixi, ambiguities = false, + unbound_args = false, # FIXME: UnstructuredSortedBoundaryTypes # exceptions necessary for adding a new method `StartUpDG.estimate_h` # in src/solvers/dgmulti/sbp.jl piracies = (treat_as_own = [Trixi.StartUpDG.RefElemData, From 1a7cff2673a2111ba6e143c757276ededf1e69a7 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Tue, 22 Apr 2025 09:26:23 +0200 Subject: [PATCH 017/158] lower bound CUDA to 5.2 --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 0c53ef69666..689f054adf0 100644 --- a/Project.toml +++ b/Project.toml @@ -68,7 +68,7 @@ TrixiCUDAExt = "CUDA" [compat] Accessors = "0.1.36" Adapt = "4" -CUDA = "5" +CUDA = "5.2" CodeTracking = "1.0.5" ConstructionBase = "1.5" Convex = "0.16" From 68edf29cc2a66669038e0b15e7bf1db19ca3a9c6 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Mon, 21 Apr 2025 17:16:18 +0200 Subject: [PATCH 018/158] add initial CUDA pipeline --- .buildkite/pipeline.yml | 9 ++++++--- test/Project.toml | 1 + test/runtests.jl | 9 +++++++++ test/test_cuda.jl | 20 ++++++++++++++++++++ 4 files changed, 36 insertions(+), 3 deletions(-) create mode 100644 test/test_cuda.jl diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index 0f8ad475db8..344b8eacc3a 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -1,3 +1,5 @@ +env: + steps: - label: "CUDA Julia {{matrix.version}}" matrix: @@ -7,12 +9,13 @@ steps: plugins: - JuliaCI/julia#v1: version: "{{matrix.version}}" - command: | - true + - JuliaCI/julia-test#v1: ~ + env: + TRIXI_TEST: "CUDA" agents: queue: "juliagpu" cuda: "*" if: build.message !~ /\[skip ci\]/ timeout_in_minutes: 60 soft_fail: - - exit_status: 3 \ No newline at end of file + - exit_status: 3 diff --git a/test/Project.toml b/test/Project.toml index 94683d362f5..78b35c6b2de 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -4,6 +4,7 @@ Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595" CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0" Convex = "f65535da-76fb-5f13-bab9-19810c17039a" +CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" DelimitedFiles = "8bb1440f-4735-579b-a4ab-409b98df4dab" DoubleFloats = "497a8b3b-efae-58df-a0af-a86822472b78" Downloads = "f43a241f-c20a-4ad4-852c-f6b1247861c6" diff --git a/test/runtests.jl b/test/runtests.jl index db2c2e9dd88..8f35e1fb58d 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -109,4 +109,13 @@ const TRIXI_NTHREADS = clamp(Sys.CPU_THREADS, 2, 3) @time if TRIXI_TEST == "all" || TRIXI_TEST == "paper_self_gravitating_gas_dynamics" include("test_paper_self_gravitating_gas_dynamics.jl") end + + @time if TRIXI_TEST == "all" || TRIXI_TEST == "CUDA" + import CUDA + if CUDA.functional() + include("test_cuda.jl") + else + @warn "Unable to run CUDA tests on this machine" + end + end end diff --git a/test/test_cuda.jl b/test/test_cuda.jl new file mode 100644 index 00000000000..f2fd11233c6 --- /dev/null +++ b/test/test_cuda.jl @@ -0,0 +1,20 @@ +module TestCUDA + +using CUDA +using Test +using Trixi + +include("test_trixi.jl") + +# EXAMPLES_DIR = joinpath(examples_dir(), "dgmulti_1d") + +# Start with a clean environment: remove Trixi.jl output directory if it exists +outdir = "out" +isdir(outdir) && rm(outdir, recursive = true) + +# TODO: + +# Clean up afterwards: delete Trixi.jl output directory +@test_nowarn isdir(outdir) && rm(outdir, recursive = true) + +end # module From 11ff63aade34a8d3be33bc0d46da9ef8f356db83 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Tue, 22 Apr 2025 10:08:37 +0200 Subject: [PATCH 019/158] add storage_type, real_type to semidiscretize --- .../p4est_2d_dgsem/elixir_advection_basic.jl | 2 +- src/semidiscretization/semidiscretization.jl | 21 ++++++++++++++++++- test/test_p4est_2d.jl | 21 +++++++++++++++++++ 3 files changed, 42 insertions(+), 2 deletions(-) diff --git a/examples/p4est_2d_dgsem/elixir_advection_basic.jl b/examples/p4est_2d_dgsem/elixir_advection_basic.jl index 4ff646365aa..e162e8997f2 100644 --- a/examples/p4est_2d_dgsem/elixir_advection_basic.jl +++ b/examples/p4est_2d_dgsem/elixir_advection_basic.jl @@ -31,7 +31,7 @@ semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition_convergen # ODE solvers, callbacks etc. # Create ODE problem with time span from 0.0 to 1.0 -ode = semidiscretize(semi, (0.0, 1.0)) +ode = semidiscretize(semi, (0.0, 1.0); real_type = nothing, storage_type = nothing) # At the beginning of the main loop, the SummaryCallback prints a summary of the simulation setup # and resets the timers diff --git a/src/semidiscretization/semidiscretization.jl b/src/semidiscretization/semidiscretization.jl index cc3900d42da..97c50aa46a1 100644 --- a/src/semidiscretization/semidiscretization.jl +++ b/src/semidiscretization/semidiscretization.jl @@ -82,9 +82,15 @@ end Wrap the semidiscretization `semi` as an ODE problem in the time interval `tspan` that can be passed to `solve` from the [SciML ecosystem](https://diffeq.sciml.ai/latest/). + +The optional keyword arguments `storage_type` and `real_type` configure the underlying computational +datastructures. `storage_type` changes the fundamental array type being used, allowing the +experimental use of `CuArray` or other GPU array types. `real_type` changes the computational data type being used. """ function semidiscretize(semi::AbstractSemidiscretization, tspan; - reset_threads = true) + reset_threads = true, + storage_type = nothing, + real_type = nothing) # Optionally reset Polyester.jl threads. See # https://github.com/trixi-framework/Trixi.jl/issues/1583 # https://github.com/JuliaSIMD/Polyester.jl/issues/30 @@ -92,6 +98,19 @@ function semidiscretize(semi::AbstractSemidiscretization, tspan; Polyester.reset_threads!() end + if !(storage_type === nothing && real_type === nothing) + if storage_type === nothing + storage_type = Array + end + if real_type === nothing + real_type = Float64 + end + semi = trixi_adapt(storage_type, real_type, semi) + if eltype(tspan) !== real_type + tspan = convert.(real_type, tspan) + end + end + u0_ode = compute_coefficients(first(tspan), semi) # TODO: MPI, do we want to synchronize loading and print debug statements, e.g. using # mpi_isparallel() && MPI.Barrier(mpi_comm()) diff --git a/test/test_p4est_2d.jl b/test/test_p4est_2d.jl index 7425d243111..307d70683a5 100644 --- a/test/test_p4est_2d.jl +++ b/test/test_p4est_2d.jl @@ -35,6 +35,27 @@ isdir(outdir) && rm(outdir, recursive = true) @test real(semi32.mesh) == Float64 end +@trixi_testset "elixir_advection_basic.jl (Float32)" begin + @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_basic.jl"), + # Expected errors are exactly the same as with TreeMesh! + l2=[8.311947673061856e-6], + linf=[6.627000273229378e-5], + real_type=Float32) + # Ensure that we do not have excessive memory allocations + # (e.g., from type instabilities) + let + t = sol.t[end] + u_ode = sol.u[end] + du_ode = similar(u_ode) + @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000 + end + @test real(ode.p.solver) == Float32 + @test real(ode.p.solver.basis) == Float32 + @test real(ode.p.solver.mortar) == Float32 + # TODO: remake ignores the mesh itself as well + @test real(ode.p.mesh) == Float64 +end + @trixi_testset "elixir_advection_nonconforming_flag.jl" begin @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_nonconforming_flag.jl"), From 4d8a31f0a1f4e08cd72262e90313d862e64f40b1 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Tue, 22 Apr 2025 10:25:33 +0200 Subject: [PATCH 020/158] add GPU construction test --- .../elixir_advection_basic_gpu.jl | 60 +++++++++++++++++++ test/test_cuda.jl | 24 +++++++- 2 files changed, 83 insertions(+), 1 deletion(-) create mode 100644 examples/p4est_2d_dgsem/elixir_advection_basic_gpu.jl diff --git a/examples/p4est_2d_dgsem/elixir_advection_basic_gpu.jl b/examples/p4est_2d_dgsem/elixir_advection_basic_gpu.jl new file mode 100644 index 00000000000..4e26ec3df1a --- /dev/null +++ b/examples/p4est_2d_dgsem/elixir_advection_basic_gpu.jl @@ -0,0 +1,60 @@ +# The same setup as tree_2d_dgsem/elixir_advection_basic.jl +# to verify the StructuredMesh implementation against TreeMesh + +using OrdinaryDiffEqSSPRK, OrdinaryDiffEqLowStorageRK +using Trixi + +############################################################################### +# semidiscretization of the linear advection equation + +advection_velocity = (0.2, -0.7) +equations = LinearScalarAdvectionEquation2D(advection_velocity) + +# Create DG solver with polynomial degree = 3 and (local) Lax-Friedrichs/Rusanov flux as surface flux +solver = DGSEM(polydeg = 3, surface_flux = flux_lax_friedrichs) + +coordinates_min = (-1.0, -1.0) # minimum coordinates (min(x), min(y)) +coordinates_max = (1.0, 1.0) # maximum coordinates (max(x), max(y)) + +trees_per_dimension = (8, 8) + +# Create P4estMesh with 8 x 8 trees and 16 x 16 elements +mesh = P4estMesh(trees_per_dimension, polydeg = 3, + coordinates_min = coordinates_min, coordinates_max = coordinates_max, + initial_refinement_level = 1) + +# A semidiscretization collects data structures and functions for the spatial discretization +semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition_convergence_test, + solver) + +############################################################################### +# ODE solvers, callbacks etc. + +# Create ODE problem with time span from 0.0 to 1.0 +ode = semidiscretize(semi, (0.0, 1.0); real_type = nothing, storage_type = nothing) + +# At the beginning of the main loop, the SummaryCallback prints a summary of the simulation setup +# and resets the timers +summary_callback = SummaryCallback() + +# The AnalysisCallback allows to analyse the solution in regular intervals and prints the results +analysis_callback = AnalysisCallback(semi, interval = 100) + +# The SaveSolutionCallback allows to save the solution to a file in regular intervals +save_solution = SaveSolutionCallback(interval = 100, + solution_variables = cons2prim) + +# The StepsizeCallback handles the re-calculation of the maximum Δt after each time step +stepsize_callback = StepsizeCallback(cfl = 1.6) + +# Create a CallbackSet to collect all callbacks such that they can be passed to the ODE solver +callbacks = CallbackSet(summary_callback, analysis_callback, save_solution, + stepsize_callback) + +############################################################################### +# run the simulation + +# # OrdinaryDiffEq's `solve` method evolves the solution in time and executes the passed callbacks +# sol = solve(ode, CarpenterKennedy2N54(williamson_condition = false); +# dt = 1.0, # solve needs some value here but it will be overwritten by the stepsize_callback +# ode_default_options()..., callback = callbacks); diff --git a/test/test_cuda.jl b/test/test_cuda.jl index f2fd11233c6..68872266986 100644 --- a/test/test_cuda.jl +++ b/test/test_cuda.jl @@ -12,7 +12,29 @@ include("test_trixi.jl") outdir = "out" isdir(outdir) && rm(outdir, recursive = true) -# TODO: +EXAMPLES_DIR = joinpath(examples_dir(), "p4est_2d_dgsem") + +@trixi_testset "elixir_advection_basic.jl (Float32)" begin + @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_basic.jl"), + # Expected errors are exactly the same as with TreeMesh! + l2=[8.311947673061856e-6], + linf=[6.627000273229378e-5], + real_type=Float32, + storage_type=CuArray) + # # Ensure that we do not have excessive memory allocations + # # (e.g., from type instabilities) + # let + # t = sol.t[end] + # u_ode = sol.u[end] + # du_ode = similar(u_ode) + # @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000 + # end + @test real(ode.p.solver) == Float32 + @test real(ode.p.solver.basis) == Float32 + @test real(ode.p.solver.mortar) == Float32 + # TODO: remake ignores the mesh itself as well + @test real(ode.p.mesh) == Float64 +end # Clean up afterwards: delete Trixi.jl output directory @test_nowarn isdir(outdir) && rm(outdir, recursive = true) From 6ca8c3d0359fa49efb55313ef0f63ad3cccd26a4 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Tue, 22 Apr 2025 12:08:26 +0200 Subject: [PATCH 021/158] don't adapt Array{MArray} --- src/auxiliary/containers.jl | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/auxiliary/containers.jl b/src/auxiliary/containers.jl index 5738467ec6b..edc42db382b 100644 --- a/src/auxiliary/containers.jl +++ b/src/auxiliary/containers.jl @@ -388,6 +388,13 @@ function Adapt.adapt_storage(::TrixiAdaptor{Storage, Real}, adapt(Storage{StaticArrays.similar_type(T, Real)}, x) end +# Our threaded cache contains MArray, it is unlikely that we would want to adapt those +function Adapt.adapt_storage(::TrixiAdaptor{Storage, Real}, + x::Array{T}) where {Storage, Real, + T <: StaticArrays.MArray} + adapt(Array{StaticArrays.similar_type(T, Real)}, x) +end + function Adapt.adapt_storage(::TrixiAdaptor{Storage, Real}, x::AbstractArray) where {Storage, Real} adapt(Storage, x) From 4ef2d98bd6d9ad4a3a50bec15ab82c8d6138f640 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Tue, 22 Apr 2025 13:36:22 +0200 Subject: [PATCH 022/158] add some more cuda adapt tests --- test/test_cuda.jl | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/test/test_cuda.jl b/test/test_cuda.jl index 68872266986..7a218f236d3 100644 --- a/test/test_cuda.jl +++ b/test/test_cuda.jl @@ -19,7 +19,7 @@ EXAMPLES_DIR = joinpath(examples_dir(), "p4est_2d_dgsem") # Expected errors are exactly the same as with TreeMesh! l2=[8.311947673061856e-6], linf=[6.627000273229378e-5], - real_type=Float32, + real_type=Float64, storage_type=CuArray) # # Ensure that we do not have excessive memory allocations # # (e.g., from type instabilities) @@ -34,6 +34,17 @@ EXAMPLES_DIR = joinpath(examples_dir(), "p4est_2d_dgsem") @test real(ode.p.solver.mortar) == Float32 # TODO: remake ignores the mesh itself as well @test real(ode.p.mesh) == Float64 + + @test_broken ode.u0 isa CuArray + @test ode.p.basis.boundary_interpolations isa CuArray + @test ode.p.basis.derivative_matrix isa CuArray + + @test ode.p.basis.forward_upper isa CuArray + + @test Trixi.storage_type(ode.p.cache.elements) === CuArray + @test Trixi.storage_type(ode.p.cache.interfaces) === CuArray + @test Trixi.storage_type(ode.p.cache.boundaries) === CuArray + @test Trixi.storage_type(ode.p.cache.mortrar) === CuArray end # Clean up afterwards: delete Trixi.jl output directory From 77395f5ecf581493fd76b5112f8ca8283f5df487 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Mon, 28 Apr 2025 16:18:18 +0200 Subject: [PATCH 023/158] use sources for dev branch --- .buildkite/pipeline.yml | 2 +- test/Project.toml | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index 344b8eacc3a..fdb4a855961 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -5,7 +5,7 @@ steps: matrix: setup: version: - - "1.10" + - "1.11" plugins: - JuliaCI/julia#v1: version: "{{matrix.version}}" diff --git a/test/Project.toml b/test/Project.toml index 78b35c6b2de..df66fe98966 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -59,3 +59,6 @@ Random = "1" StableRNGs = "1.0.2" Test = "1" TrixiTest = "0.1" + +[sources] +CUDA = {url = "https://github.com/JuliaGPU/CUDA.jl", rev = "vc/unsafe_wrap_symbols"} From 1d78f077d471f9f92fa135ce05f2edd39f0e1df9 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Thu, 8 May 2025 11:50:42 +0200 Subject: [PATCH 024/158] fixup! use sources for dev branch --- test/Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/Project.toml b/test/Project.toml index df66fe98966..ff6de774355 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -61,4 +61,4 @@ Test = "1" TrixiTest = "0.1" [sources] -CUDA = {url = "https://github.com/JuliaGPU/CUDA.jl", rev = "vc/unsafe_wrap_symbols"} +CUDA = {url = "https://github.com/JuliaGPU/CUDA.jl", rev = "master"} From 39535eec0bdd68f1bb21bfcd565f022f44e96c3a Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Wed, 14 May 2025 10:38:54 +0200 Subject: [PATCH 025/158] use released version of CUDA --- .github/workflows/GPUCompat.yml | 86 --------------------------------- Project.toml | 2 +- test/Project.toml | 3 -- 3 files changed, 1 insertion(+), 90 deletions(-) delete mode 100644 .github/workflows/GPUCompat.yml diff --git a/.github/workflows/GPUCompat.yml b/.github/workflows/GPUCompat.yml deleted file mode 100644 index 335e1c83c4c..00000000000 --- a/.github/workflows/GPUCompat.yml +++ /dev/null @@ -1,86 +0,0 @@ -name: GPU Package Compatibility - -on: - pull_request: - paths-ignore: - - 'AUTHORS.md' - - 'CITATION.bib' - - 'CONTRIBUTING.md' - - 'LICENSE.md' - - 'NEWS.md' - - 'README.md' - - '.zenodo.json' - - '.github/workflows/benchmark.yml' - - '.github/workflows/CompatHelper.yml' - - '.github/workflows/TagBot.yml' - - 'benchmark/**' - - 'docs/**' - - 'utils/**' - workflow_dispatch: - -concurrency: - group: ${{ github.workflow }}-${{ github.ref }} - cancel-in-progress: true - -jobs: - test: - if: "!contains(github.event.head_commit.message, 'skip ci')" - name: ${{ matrix.os }} - ${{ matrix.arch }} - runs-on: ${{ matrix.os }} - strategy: - fail-fast: false - matrix: - include: - - version: '1.10' - os: ubuntu-latest - arch: x64 - - version: '1.10' - os: windows-latest - arch: x64 - # CUDA.jl only supports 64-bit Linux and Windows, see https://github.com/JuliaGPU/CUDA.jl?tab=readme-ov-file#requirements - steps: - - name: Checkout repository - uses: actions/checkout@v4 - - - name: Set up Julia - uses: julia-actions/setup-julia@v2 - with: - version: ${{ matrix.version }} - arch: ${{ matrix.arch }} - - - name: Display version info - run: julia -e 'using InteractiveUtils; versioninfo(verbose=true)' - - - name: Cache Julia packages - uses: julia-actions/cache@v2 - - - name: Build project - uses: julia-actions/julia-buildpkg@v1 - - # Only CUDA.jl is needed for GPU compatibility test now - - name: Add CUDA.jl to environment - run: | - julia --project=. -e ' - using Pkg; - Pkg.activate(temp=true); - Pkg.develop(PackageSpec(path=pwd())); - Pkg.add("CUDA"); - Pkg.update()' - - # - name: Add Metal.jl to environment - # run: | - # julia --project=. -e ' - # using Pkg; - # Pkg.activate(temp=true); - # Pkg.develop(PackageSpec(path=pwd())); - # Pkg.add("Metal"); - # Pkg.update()' - - # - name: Add AMDGPU.jl to environment - # run: | - # julia --project=. -e ' - # using Pkg; - # Pkg.activate(temp=true); - # Pkg.develop(PackageSpec(path=pwd())); - # Pkg.add("AMDGPU"); - # Pkg.update()' diff --git a/Project.toml b/Project.toml index 689f054adf0..ea207a63cbe 100644 --- a/Project.toml +++ b/Project.toml @@ -68,7 +68,7 @@ TrixiCUDAExt = "CUDA" [compat] Accessors = "0.1.36" Adapt = "4" -CUDA = "5.2" +CUDA = "5.8" CodeTracking = "1.0.5" ConstructionBase = "1.5" Convex = "0.16" diff --git a/test/Project.toml b/test/Project.toml index ff6de774355..78b35c6b2de 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -59,6 +59,3 @@ Random = "1" StableRNGs = "1.0.2" Test = "1" TrixiTest = "0.1" - -[sources] -CUDA = {url = "https://github.com/JuliaGPU/CUDA.jl", rev = "master"} From b973758daa699c84be8e1e444f0b5cab0e74e1ab Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Wed, 14 May 2025 10:43:30 +0200 Subject: [PATCH 026/158] Update .buildkite/pipeline.yml --- .buildkite/pipeline.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index fdb4a855961..344b8eacc3a 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -5,7 +5,7 @@ steps: matrix: setup: version: - - "1.11" + - "1.10" plugins: - JuliaCI/julia#v1: version: "{{matrix.version}}" From 7105da72985c927b12200d775413e400101854e6 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Mon, 30 Jun 2025 14:01:15 +0200 Subject: [PATCH 027/158] fix test_p4est_2d --- test/test_p4est_2d.jl | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/test/test_p4est_2d.jl b/test/test_p4est_2d.jl index 307d70683a5..33d24c8d67e 100644 --- a/test/test_p4est_2d.jl +++ b/test/test_p4est_2d.jl @@ -38,8 +38,9 @@ end @trixi_testset "elixir_advection_basic.jl (Float32)" begin @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_basic.jl"), # Expected errors are exactly the same as with TreeMesh! - l2=[8.311947673061856e-6], - linf=[6.627000273229378e-5], + l2=[Float32(8.311947673061856e-6)], + linf=[Float32(6.627000273229378e-5)], + RealT=Float32, real_type=Float32) # Ensure that we do not have excessive memory allocations # (e.g., from type instabilities) @@ -47,7 +48,7 @@ end t = sol.t[end] u_ode = sol.u[end] du_ode = similar(u_ode) - @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000 + @test_broken (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000 end @test real(ode.p.solver) == Float32 @test real(ode.p.solver.basis) == Float32 From 1fd6fe6614ebe799da375e3cf15569634ca4fb13 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Mon, 30 Jun 2025 21:12:08 +0200 Subject: [PATCH 028/158] fix first GPU test --- test/test_cuda.jl | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/test/test_cuda.jl b/test/test_cuda.jl index 7a218f236d3..1f96d8c863e 100644 --- a/test/test_cuda.jl +++ b/test/test_cuda.jl @@ -1,25 +1,27 @@ module TestCUDA -using CUDA using Test using Trixi include("test_trixi.jl") -# EXAMPLES_DIR = joinpath(examples_dir(), "dgmulti_1d") - # Start with a clean environment: remove Trixi.jl output directory if it exists outdir = "out" isdir(outdir) && rm(outdir, recursive = true) EXAMPLES_DIR = joinpath(examples_dir(), "p4est_2d_dgsem") -@trixi_testset "elixir_advection_basic.jl (Float32)" begin - @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_basic.jl"), +@trixi_testset "elixir_advection_basic_gpu.jl" begin + # Using CUDA inside the testset since otherwise the bindings are hiddend by the anonymous modules + using CUDA + # TODO(benegee/vchuravy): Port compute_coefficients! to KernelAbstractions.jl + CUDA.allowscalar(true) + @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_basic_gpu.jl"), # Expected errors are exactly the same as with TreeMesh! - l2=[8.311947673061856e-6], - linf=[6.627000273229378e-5], - real_type=Float64, + l2=nothing, # [Float32(8.311947673061856e-6)], + linf=nothing, # [Float32(6.627000273229378e-5)], + RealT=Float32, + real_type=Float32, storage_type=CuArray) # # Ensure that we do not have excessive memory allocations # # (e.g., from type instabilities) @@ -36,15 +38,12 @@ EXAMPLES_DIR = joinpath(examples_dir(), "p4est_2d_dgsem") @test real(ode.p.mesh) == Float64 @test_broken ode.u0 isa CuArray - @test ode.p.basis.boundary_interpolations isa CuArray - @test ode.p.basis.derivative_matrix isa CuArray - - @test ode.p.basis.forward_upper isa CuArray + @test ode.p.solver.basis.derivative_matrix isa CuArray @test Trixi.storage_type(ode.p.cache.elements) === CuArray @test Trixi.storage_type(ode.p.cache.interfaces) === CuArray @test Trixi.storage_type(ode.p.cache.boundaries) === CuArray - @test Trixi.storage_type(ode.p.cache.mortrar) === CuArray + @test Trixi.storage_type(ode.p.cache.mortars) === CuArray end # Clean up afterwards: delete Trixi.jl output directory From 6ceef3af12898f74a12bbfe2359ca2a805fc51dd Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Tue, 1 Jul 2025 09:18:34 +0200 Subject: [PATCH 029/158] address review comments --- src/solvers/dgsem_p4est/containers.jl | 40 +++++++++++-------- .../dgsem_p4est/containers_parallel.jl | 7 ++-- 2 files changed, 28 insertions(+), 19 deletions(-) diff --git a/src/solvers/dgsem_p4est/containers.jl b/src/solvers/dgsem_p4est/containers.jl index 68e5b3d758b..3da09b5db55 100644 --- a/src/solvers/dgsem_p4est/containers.jl +++ b/src/solvers/dgsem_p4est/containers.jl @@ -60,30 +60,38 @@ function Base.resize!(elements::P4estElementContainer, capacity) ArrayType = storage_type(elements) resize!(_node_coordinates, n_dims * n_nodes^n_dims * capacity) - elements.node_coordinates = unsafe_wrap(ArrayType, pointer(_node_coordinates), - (n_dims, ntuple(_ -> n_nodes, n_dims)..., - capacity)) + elements.node_coordinates = unsafe_wrap_or_alloc(ArrayType, + pointer(_node_coordinates), + (n_dims, + ntuple(_ -> n_nodes, n_dims)..., + capacity)) resize!(_jacobian_matrix, n_dims^2 * n_nodes^n_dims * capacity) - elements.jacobian_matrix = unsafe_wrap(ArrayType, pointer(_jacobian_matrix), - (n_dims, n_dims, - ntuple(_ -> n_nodes, n_dims)..., capacity)) + elements.jacobian_matrix = unsafe_wrap_or_alloc(ArrayType, + pointer(_jacobian_matrix), + (n_dims, n_dims, + ntuple(_ -> n_nodes, n_dims)..., + capacity)) resize!(_contravariant_vectors, length(_jacobian_matrix)) - elements.contravariant_vectors = unsafe_wrap(ArrayType, - pointer(_contravariant_vectors), - size(elements.jacobian_matrix)) + elements.contravariant_vectors = unsafe_wrap_or_alloc(ArrayType, + pointer(_contravariant_vectors), + size(elements.jacobian_matrix)) resize!(_inverse_jacobian, n_nodes^n_dims * capacity) - elements.inverse_jacobian = unsafe_wrap(ArrayType, pointer(_inverse_jacobian), - (ntuple(_ -> n_nodes, n_dims)..., capacity)) + elements.inverse_jacobian = unsafe_wrap_or_alloc(ArrayType, + pointer(_inverse_jacobian), + (ntuple(_ -> n_nodes, n_dims)..., + capacity)) resize!(_surface_flux_values, n_variables * n_nodes^(n_dims - 1) * (n_dims * 2) * capacity) - elements.surface_flux_values = unsafe_wrap(ArrayType, pointer(_surface_flux_values), - (n_variables, - ntuple(_ -> n_nodes, n_dims - 1)..., - n_dims * 2, capacity)) + elements.surface_flux_values = unsafe_wrap_or_alloc(ArrayType, + pointer(_surface_flux_values), + (n_variables, + ntuple(_ -> n_nodes, + n_dims - 1)..., + n_dims * 2, capacity)) return nothing end @@ -221,7 +229,7 @@ end @inline Base.ndims(::P4estInterfaceContainer{NDIMS}) where {NDIMS} = NDIMS @inline function Base.eltype(::P4estInterfaceContainer{NDIMS, uEltype}) where {NDIMS, uEltype} - uEltype + return uEltype end # See explanation of Base.resize! for the element container diff --git a/src/solvers/dgsem_p4est/containers_parallel.jl b/src/solvers/dgsem_p4est/containers_parallel.jl index cb9cd1ffc95..123337d8c0a 100644 --- a/src/solvers/dgsem_p4est/containers_parallel.jl +++ b/src/solvers/dgsem_p4est/containers_parallel.jl @@ -222,9 +222,10 @@ function init_mpi_mortars!(mpi_mortars, mesh::ParallelP4estMesh, basis, elements end function Adapt.adapt_structure(to, mpi_mortars::P4estMPIMortarContainer) - # TODO: Vector of Vector type data structure does not work on GPUs, - # must be redesigned. This skeleton implementation here just exists just - # for compatibility with the rest of the KA.jl solver code + # Only parts of this container are adapted, since we currently don't + # use `local_neighbor_ids`, `local_neighbor_positions`, `normal_directions` + # on the GPU. If we do need them we need to redesign this to use the VecOfArrays + # approach. _u = adapt(to, mpi_mortars._u) _node_indices = mpi_mortars._node_indices From 7a53362dfac0d03e6dbad2fb47bd4a6839e90d3e Mon Sep 17 00:00:00 2001 From: Benedict Geihe Date: Tue, 1 Jul 2025 15:08:30 +0200 Subject: [PATCH 030/158] offload compute_coefficients --- Project.toml | 2 + .../elixir_advection_basic_gpu.jl | 18 ++++--- src/Trixi.jl | 1 + src/auxiliary/containers.jl | 4 ++ src/semidiscretization/semidiscretization.jl | 3 +- src/solvers/dg.jl | 47 +++++++++++++++---- 6 files changed, 54 insertions(+), 21 deletions(-) diff --git a/Project.toml b/Project.toml index ea207a63cbe..7bea3abf0f9 100644 --- a/Project.toml +++ b/Project.toml @@ -17,6 +17,7 @@ EllipsisNotation = "da5c29d0-fa7d-589e-88eb-ea29b0a81949" FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b" ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" HDF5 = "f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f" +KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" LinearMaps = "7a12625a-238d-50fd-b39a-03d52299707e" LoopVectorization = "bdcacae8-1622-11e9-2a5c-532679323890" @@ -82,6 +83,7 @@ EllipsisNotation = "1.0" FillArrays = "1.9" ForwardDiff = "0.10.36, 1" HDF5 = "0.16.10, 0.17" +KernelAbstractions = "0.9" LinearAlgebra = "1" LinearMaps = "2.7, 3.0" LoopVectorization = "0.12.171" diff --git a/examples/p4est_2d_dgsem/elixir_advection_basic_gpu.jl b/examples/p4est_2d_dgsem/elixir_advection_basic_gpu.jl index 4e26ec3df1a..5f34784ddf9 100644 --- a/examples/p4est_2d_dgsem/elixir_advection_basic_gpu.jl +++ b/examples/p4est_2d_dgsem/elixir_advection_basic_gpu.jl @@ -1,8 +1,6 @@ -# The same setup as tree_2d_dgsem/elixir_advection_basic.jl -# to verify the StructuredMesh implementation against TreeMesh - -using OrdinaryDiffEqSSPRK, OrdinaryDiffEqLowStorageRK +using OrdinaryDiffEqLowStorageRK using Trixi +using CUDA ############################################################################### # semidiscretization of the linear advection equation @@ -31,7 +29,7 @@ semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition_convergen # ODE solvers, callbacks etc. # Create ODE problem with time span from 0.0 to 1.0 -ode = semidiscretize(semi, (0.0, 1.0); real_type = nothing, storage_type = nothing) +ode = semidiscretize(semi, (0.0, 1.0); real_type = nothing, storage_type = CuArray) # At the beginning of the main loop, the SummaryCallback prints a summary of the simulation setup # and resets the timers @@ -48,13 +46,13 @@ save_solution = SaveSolutionCallback(interval = 100, stepsize_callback = StepsizeCallback(cfl = 1.6) # Create a CallbackSet to collect all callbacks such that they can be passed to the ODE solver -callbacks = CallbackSet(summary_callback, analysis_callback, save_solution, - stepsize_callback) +callbacks = CallbackSet(summary_callback) +# analysis_callback, save_solution, stepsize_callback) ############################################################################### # run the simulation # # OrdinaryDiffEq's `solve` method evolves the solution in time and executes the passed callbacks -# sol = solve(ode, CarpenterKennedy2N54(williamson_condition = false); -# dt = 1.0, # solve needs some value here but it will be overwritten by the stepsize_callback -# ode_default_options()..., callback = callbacks); + sol = solve(ode, CarpenterKennedy2N54(williamson_condition = false); + dt = 1e-2, # solve needs some value here but it will be overwritten by the stepsize_callback + ode_default_options()..., callback = callbacks); diff --git a/src/Trixi.jl b/src/Trixi.jl index a52dfd6d973..7836f1938b1 100644 --- a/src/Trixi.jl +++ b/src/Trixi.jl @@ -59,6 +59,7 @@ using DiffEqCallbacks: PeriodicCallback, PeriodicCallbackAffect using FillArrays: Ones, Zeros using ForwardDiff: ForwardDiff using HDF5: HDF5, h5open, attributes, create_dataset, datatype, dataspace +using KernelAbstractions using LinearMaps: LinearMap if _PREFERENCE_LOOPVECTORIZATION using LoopVectorization: LoopVectorization, @turbo, indices diff --git a/src/auxiliary/containers.jl b/src/auxiliary/containers.jl index edc42db382b..40aff873956 100644 --- a/src/auxiliary/containers.jl +++ b/src/auxiliary/containers.jl @@ -405,4 +405,8 @@ end function unsafe_wrap_or_alloc(::TrixiAdaptor{Storage}, vec, size) where {Storage} return unsafe_wrap_or_alloc(Storage, vec, size) end + +function KernelAbstractions.get_backend(semi::AbstractSemidiscretization) + KernelAbstractions.get_backend(semi.cache.elements.node_coordinates) +end end # @muladd diff --git a/src/semidiscretization/semidiscretization.jl b/src/semidiscretization/semidiscretization.jl index 97c50aa46a1..e214f569d13 100644 --- a/src/semidiscretization/semidiscretization.jl +++ b/src/semidiscretization/semidiscretization.jl @@ -176,7 +176,8 @@ Same as [`compute_coefficients`](@ref) but stores the result in `u_ode`. function compute_coefficients!(u_ode, func, t, semi::AbstractSemidiscretization) u = wrap_array(u_ode, semi) # Call `compute_coefficients` defined by the solver - compute_coefficients!(u, func, t, mesh_equations_solver_cache(semi)...) + backend = get_backend(semi) + compute_coefficients!(backend, u, func, t, mesh_equations_solver_cache(semi)...) end """ diff --git a/src/solvers/dg.jl b/src/solvers/dg.jl index 78f3901a346..273cc8f7a47 100644 --- a/src/solvers/dg.jl +++ b/src/solvers/dg.jl @@ -642,8 +642,10 @@ include("fdsbp_unstructured/fdsbp.jl") function allocate_coefficients(mesh::AbstractMesh, equations, dg::DG, cache) # We must allocate a `Vector` in order to be able to `resize!` it (AMR). # cf. wrap_array - zeros(eltype(cache.elements), - nvariables(equations) * nnodes(dg)^ndims(mesh) * nelements(dg, cache)) + u_ode = similar(cache.elements.node_coordinates, + nvariables(equations) * nnodes(dg)^ndims(mesh) * nelements(dg, cache)) + fill!(u_ode, zero(eltype(u_ode))) + return u_ode end @inline function wrap_array(u_ode::AbstractVector, mesh::AbstractMesh, equations, @@ -686,7 +688,8 @@ end # (nvariables(equations), ntuple(_ -> nnodes(dg), ndims(mesh))..., nelements(dg, cache))) else # The following version is reasonably fast and allows us to `resize!(u_ode, ...)`. - unsafe_wrap(Array{eltype(u_ode), ndims(mesh) + 2}, pointer(u_ode), + ArrayType = Trixi.storage_type(u_ode) + unsafe_wrap(ArrayType{eltype(u_ode), ndims(mesh) + 2}, pointer(u_ode), (nvariables(equations), ntuple(_ -> nnodes(dg), ndims(mesh))..., nelements(dg, cache))) end @@ -756,15 +759,39 @@ function compute_coefficients!(u, func, t, mesh::AbstractMesh{1}, equations, dg: end end -function compute_coefficients!(u, func, t, mesh::AbstractMesh{2}, equations, dg::DG, +function compute_coefficients!(backend::Any, func, t, mesh::AbstractMesh{2}, equations, dg::DG, cache) + @unpack node_coordinates = cache.elements @threaded for element in eachelement(dg, cache) - for j in eachnode(dg), i in eachnode(dg) - x_node = get_node_coords(cache.elements.node_coordinates, equations, dg, i, - j, element) - u_node = func(x_node, t, equations) - set_node_vars!(u, u_node, equations, dg, i, j, element) - end + compute_coefficients_element!(u, func, t, equations, dg, node_coordinates, element) + end +end + +function compute_coefficients!(backend::Backend, u, func, t, mesh::AbstractMesh{2}, + equations, dg::DG, cache) + nelements(dg, cache) == 0 && return nothing + # 1 cache not as argument + # 2 mesh not + @unpack node_coordinates = cache.elements + kernel! = compute_coefficients_kernel!(backend) + kernel!(u, func, t, equations, dg, node_coordinates, + ndrange = nelements(dg, cache)) + return nothing +end + +@kernel function compute_coefficients_kernel!(u, func, t, equations, + dg::DG, node_coordinates) + element = @index(Global) + compute_coefficients_element!(u, func, t, equations, dg, node_coordinates, element) +end + +function compute_coefficients_element!(u, func, t, equations, dg::DG, + node_coordinates, element) + for j in eachnode(dg), i in eachnode(dg) + x_node = get_node_coords(node_coordinates, equations, dg, i, + j, element) + u_node = func(x_node, t, equations) + set_node_vars!(u, u_node, equations, dg, i, j, element) end end From 68eb9052d11244268e1b1929295dec6bcfe8c070 Mon Sep 17 00:00:00 2001 From: Benedict Geihe Date: Tue, 1 Jul 2025 15:16:07 +0200 Subject: [PATCH 031/158] fmt --- examples/p4est_2d_dgsem/elixir_advection_basic_gpu.jl | 6 +++--- src/solvers/dg.jl | 11 +++++++---- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/examples/p4est_2d_dgsem/elixir_advection_basic_gpu.jl b/examples/p4est_2d_dgsem/elixir_advection_basic_gpu.jl index 5f34784ddf9..b5291ea2862 100644 --- a/examples/p4est_2d_dgsem/elixir_advection_basic_gpu.jl +++ b/examples/p4est_2d_dgsem/elixir_advection_basic_gpu.jl @@ -53,6 +53,6 @@ callbacks = CallbackSet(summary_callback) # run the simulation # # OrdinaryDiffEq's `solve` method evolves the solution in time and executes the passed callbacks - sol = solve(ode, CarpenterKennedy2N54(williamson_condition = false); - dt = 1e-2, # solve needs some value here but it will be overwritten by the stepsize_callback - ode_default_options()..., callback = callbacks); +sol = solve(ode, CarpenterKennedy2N54(williamson_condition = false); + dt = 1e-2, # solve needs some value here but it will be overwritten by the stepsize_callback + ode_default_options()..., callback = callbacks); diff --git a/src/solvers/dg.jl b/src/solvers/dg.jl index 273cc8f7a47..756036a0e55 100644 --- a/src/solvers/dg.jl +++ b/src/solvers/dg.jl @@ -643,7 +643,8 @@ function allocate_coefficients(mesh::AbstractMesh, equations, dg::DG, cache) # We must allocate a `Vector` in order to be able to `resize!` it (AMR). # cf. wrap_array u_ode = similar(cache.elements.node_coordinates, - nvariables(equations) * nnodes(dg)^ndims(mesh) * nelements(dg, cache)) + nvariables(equations) * nnodes(dg)^ndims(mesh) * + nelements(dg, cache)) fill!(u_ode, zero(eltype(u_ode))) return u_ode end @@ -759,11 +760,13 @@ function compute_coefficients!(u, func, t, mesh::AbstractMesh{1}, equations, dg: end end -function compute_coefficients!(backend::Any, func, t, mesh::AbstractMesh{2}, equations, dg::DG, +function compute_coefficients!(backend::Any, func, t, mesh::AbstractMesh{2}, equations, + dg::DG, cache) @unpack node_coordinates = cache.elements @threaded for element in eachelement(dg, cache) - compute_coefficients_element!(u, func, t, equations, dg, node_coordinates, element) + compute_coefficients_element!(u, func, t, equations, dg, node_coordinates, + element) end end @@ -789,7 +792,7 @@ function compute_coefficients_element!(u, func, t, equations, dg::DG, node_coordinates, element) for j in eachnode(dg), i in eachnode(dg) x_node = get_node_coords(node_coordinates, equations, dg, i, - j, element) + j, element) u_node = func(x_node, t, equations) set_node_vars!(u, u_node, equations, dg, i, j, element) end From 3d00bdfec5d4da71f13196c82c93f0fb92da24da Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Tue, 1 Jul 2025 18:50:30 +0200 Subject: [PATCH 032/158] fixup! address review comments --- src/solvers/dgsem_p4est/containers.jl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/solvers/dgsem_p4est/containers.jl b/src/solvers/dgsem_p4est/containers.jl index 3da09b5db55..83097f4a1ed 100644 --- a/src/solvers/dgsem_p4est/containers.jl +++ b/src/solvers/dgsem_p4est/containers.jl @@ -61,33 +61,33 @@ function Base.resize!(elements::P4estElementContainer, capacity) resize!(_node_coordinates, n_dims * n_nodes^n_dims * capacity) elements.node_coordinates = unsafe_wrap_or_alloc(ArrayType, - pointer(_node_coordinates), + _node_coordinates, (n_dims, ntuple(_ -> n_nodes, n_dims)..., capacity)) resize!(_jacobian_matrix, n_dims^2 * n_nodes^n_dims * capacity) elements.jacobian_matrix = unsafe_wrap_or_alloc(ArrayType, - pointer(_jacobian_matrix), + _jacobian_matrix, (n_dims, n_dims, ntuple(_ -> n_nodes, n_dims)..., capacity)) resize!(_contravariant_vectors, length(_jacobian_matrix)) elements.contravariant_vectors = unsafe_wrap_or_alloc(ArrayType, - pointer(_contravariant_vectors), + _contravariant_vectors, size(elements.jacobian_matrix)) resize!(_inverse_jacobian, n_nodes^n_dims * capacity) elements.inverse_jacobian = unsafe_wrap_or_alloc(ArrayType, - pointer(_inverse_jacobian), + _inverse_jacobian, (ntuple(_ -> n_nodes, n_dims)..., capacity)) resize!(_surface_flux_values, n_variables * n_nodes^(n_dims - 1) * (n_dims * 2) * capacity) elements.surface_flux_values = unsafe_wrap_or_alloc(ArrayType, - pointer(_surface_flux_values), + _surface_flux_values, (n_variables, ntuple(_ -> n_nodes, n_dims - 1)..., From 4b32fa0a384de43a1d6c8a3d89b5993391ec54fc Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Tue, 1 Jul 2025 18:59:47 +0200 Subject: [PATCH 033/158] add review comments --- docs/src/heterogeneous.md | 25 ++++++++++++++----- .../elixir_advection_basic_gpu.jl | 3 +++ 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/docs/src/heterogeneous.md b/docs/src/heterogeneous.md index 60bda029a40..b4027abdd3a 100644 --- a/docs/src/heterogeneous.md +++ b/docs/src/heterogeneous.md @@ -4,15 +4,16 @@ Support for heterogeneous computing is currently being worked on. ## The use of Adapt.jl -[`Adapt.jl`](https://github.com/JuliaGPU/Adapt.jl) is a package in the JuliaGPU family that allows for +[Adapt.jl](https://github.com/JuliaGPU/Adapt.jl) is a package in the +[JuliaGPU](https://github.com/JuliaGPU) family that allows for the translation of nested data structures. The primary goal is to allow the substitution of `Array` -at the storage leaves with a GPU array like `CuArray`. +at the storage leaves with a GPU array like `CuArray` from [CUDA.jl](https://github.com/JuliaGPU/CUDA.jl). To facilitate this data structures must be parameterized, so instead of: ```julia -struct Container - data::Array{Float64,2} +struct Container <: Trixi.AbstractContainer + data::Array{Float64, 2} end ``` @@ -47,7 +48,19 @@ function Adapt.parent_type(::Type{<:Container{D}}) where D end ``` -```julia-repl +All together we can use this machinery to perform conversions of a container. + +```jldoctest +julia> import Trixi, Adapt + +julia> struct Container{D<:AbstractArray} <: Trixi.AbstractContainer + data::D + end + +julia> Adapt.@adapt_structure(Container) + +julia> Adapt.parent_type(::Type{<:Container{D}}) where D = D + julia> C = Container(zeros(3)) Container{Vector{Float64}}([0.0, 0.0, 0.0]) @@ -65,7 +78,7 @@ CuArray ## Element-type conversion with `Trixi.trixi_adapt`. -We can use Trixi.trixi_adapt to perform both an element-type and a storage-type adoption +We can use [`Trixi.trixi_adapt`](@ref) to perform both an element-type and a storage-type adoption ```julia-repl julia> C = Container(zeros(3)) diff --git a/examples/p4est_2d_dgsem/elixir_advection_basic_gpu.jl b/examples/p4est_2d_dgsem/elixir_advection_basic_gpu.jl index 4e26ec3df1a..4c0f5744a88 100644 --- a/examples/p4est_2d_dgsem/elixir_advection_basic_gpu.jl +++ b/examples/p4est_2d_dgsem/elixir_advection_basic_gpu.jl @@ -54,6 +54,9 @@ callbacks = CallbackSet(summary_callback, analysis_callback, save_solution, ############################################################################### # run the simulation +# TODO: Currently we can only construct the ODE problem on the GPU, but we cannot solve it on the GPU yet. +# Uncomment the calls below to discover missing functionality. + # # OrdinaryDiffEq's `solve` method evolves the solution in time and executes the passed callbacks # sol = solve(ode, CarpenterKennedy2N54(williamson_condition = false); # dt = 1.0, # solve needs some value here but it will be overwritten by the stepsize_callback From 10f7593b3c08cbbfd69eefe893c07b7e1b8d5de7 Mon Sep 17 00:00:00 2001 From: Benedict Geihe Date: Tue, 1 Jul 2025 19:25:28 +0200 Subject: [PATCH 034/158] convert fstar_* cache entries to VecOfArrays --- src/solvers/dgsem_p4est/dg_3d.jl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/solvers/dgsem_p4est/dg_3d.jl b/src/solvers/dgsem_p4est/dg_3d.jl index e59f502c86c..4c099c9fd3f 100644 --- a/src/solvers/dgsem_p4est/dg_3d.jl +++ b/src/solvers/dgsem_p4est/dg_3d.jl @@ -13,18 +13,18 @@ function create_cache(mesh::Union{P4estMesh{3}, T8codeMesh{3}}, equations, fstar_primary_threaded = [Array{uEltype, 4}(undef, nvariables(equations), nnodes(mortar_l2), nnodes(mortar_l2), 4) - for _ in 1:Threads.nthreads()] + for _ in 1:Threads.nthreads()] |> VecOfArrays fstar_secondary_threaded = [Array{uEltype, 4}(undef, nvariables(equations), nnodes(mortar_l2), nnodes(mortar_l2), 4) - for _ in 1:Threads.nthreads()] + for _ in 1:Threads.nthreads()] |> VecOfArrays fstar_tmp_threaded = [Array{uEltype, 3}(undef, nvariables(equations), nnodes(mortar_l2), nnodes(mortar_l2)) - for _ in 1:Threads.nthreads()] + for _ in 1:Threads.nthreads()] |> VecOfArrays u_threaded = [Array{uEltype, 3}(undef, nvariables(equations), nnodes(mortar_l2), nnodes(mortar_l2)) - for _ in 1:Threads.nthreads()] + for _ in 1:Threads.nthreads()] |> VecOfArrays (; fstar_primary_threaded, fstar_secondary_threaded, fstar_tmp_threaded, u_threaded) end From c83bdbd59e401ebd2ebaf3eb5add9281cb2b62e5 Mon Sep 17 00:00:00 2001 From: Benedict Geihe Date: Tue, 1 Jul 2025 19:33:07 +0200 Subject: [PATCH 035/158] restore elixir --- examples/p4est_2d_dgsem/elixir_advection_basic.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/p4est_2d_dgsem/elixir_advection_basic.jl b/examples/p4est_2d_dgsem/elixir_advection_basic.jl index e162e8997f2..4ff646365aa 100644 --- a/examples/p4est_2d_dgsem/elixir_advection_basic.jl +++ b/examples/p4est_2d_dgsem/elixir_advection_basic.jl @@ -31,7 +31,7 @@ semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition_convergen # ODE solvers, callbacks etc. # Create ODE problem with time span from 0.0 to 1.0 -ode = semidiscretize(semi, (0.0, 1.0); real_type = nothing, storage_type = nothing) +ode = semidiscretize(semi, (0.0, 1.0)) # At the beginning of the main loop, the SummaryCallback prints a summary of the simulation setup # and resets the timers From d3b94fcaee421bc22f233a0e68e373093585ce1c Mon Sep 17 00:00:00 2001 From: Benedict Geihe Date: Wed, 2 Jul 2025 09:11:54 +0200 Subject: [PATCH 036/158] test native version as well --- .../elixir_advection_basic_gpu.jl | 9 +++-- src/Trixi.jl | 1 + src/auxiliary/containers.jl | 8 +++++ src/semidiscretization/semidiscretization.jl | 2 +- src/solvers/dg.jl | 7 ++-- test/test_cuda.jl | 35 ++++++++++++++++--- 6 files changed, 46 insertions(+), 16 deletions(-) diff --git a/examples/p4est_2d_dgsem/elixir_advection_basic_gpu.jl b/examples/p4est_2d_dgsem/elixir_advection_basic_gpu.jl index 8fd7c31a413..61277a2734f 100644 --- a/examples/p4est_2d_dgsem/elixir_advection_basic_gpu.jl +++ b/examples/p4est_2d_dgsem/elixir_advection_basic_gpu.jl @@ -1,6 +1,5 @@ using OrdinaryDiffEqLowStorageRK using Trixi -using CUDA ############################################################################### # semidiscretization of the linear advection equation @@ -29,7 +28,7 @@ semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition_convergen # ODE solvers, callbacks etc. # Create ODE problem with time span from 0.0 to 1.0 -ode = semidiscretize(semi, (0.0, 1.0); real_type = nothing, storage_type = CuArray) +ode = semidiscretize(semi, (0.0, 1.0); real_type = nothing, storage_type = nothing) # At the beginning of the main loop, the SummaryCallback prints a summary of the simulation setup # and resets the timers @@ -56,6 +55,6 @@ callbacks = CallbackSet(summary_callback) # Uncomment the calls below to discover missing functionality. # # OrdinaryDiffEq's `solve` method evolves the solution in time and executes the passed callbacks -sol = solve(ode, CarpenterKennedy2N54(williamson_condition = false); - dt = 1e-2, # solve needs some value here but it will be overwritten by the stepsize_callback - ode_default_options()..., callback = callbacks); +#sol = solve(ode, CarpenterKennedy2N54(williamson_condition = false); +# dt = 1e-2, # solve needs some value here but it will be overwritten by the stepsize_callback +# ode_default_options()..., callback = callbacks); diff --git a/src/Trixi.jl b/src/Trixi.jl index 7836f1938b1..18000e050bd 100644 --- a/src/Trixi.jl +++ b/src/Trixi.jl @@ -20,6 +20,7 @@ const _PREFERENCE_SQRT = @load_preference("sqrt", "sqrt_Trixi_NaN") const _PREFERENCE_LOG = @load_preference("log", "log_Trixi_NaN") const _PREFERENCE_POLYESTER = @load_preference("polyester", true) const _PREFERENCE_LOOPVECTORIZATION = @load_preference("loop_vectorization", true) +const _PREFERENCE_USE_NATIVE_THREADING = @load_preference("native_threading", true) # Include other packages that are used in Trixi.jl # (standard library packages first, other packages next, all of them sorted alphabetically) diff --git a/src/auxiliary/containers.jl b/src/auxiliary/containers.jl index 40aff873956..ac412eb2da8 100644 --- a/src/auxiliary/containers.jl +++ b/src/auxiliary/containers.jl @@ -406,6 +406,14 @@ function unsafe_wrap_or_alloc(::TrixiAdaptor{Storage}, vec, size) where {Storage return unsafe_wrap_or_alloc(Storage, vec, size) end +function trixi_backend(x) + backend = get_backend(x) + if _PREFERENCE_USE_NATIVE_THREADING && backend isa KernelAbstractions.CPU + backend = nothing + end + return backend +end + function KernelAbstractions.get_backend(semi::AbstractSemidiscretization) KernelAbstractions.get_backend(semi.cache.elements.node_coordinates) end diff --git a/src/semidiscretization/semidiscretization.jl b/src/semidiscretization/semidiscretization.jl index e214f569d13..b8f53237550 100644 --- a/src/semidiscretization/semidiscretization.jl +++ b/src/semidiscretization/semidiscretization.jl @@ -176,7 +176,7 @@ Same as [`compute_coefficients`](@ref) but stores the result in `u_ode`. function compute_coefficients!(u_ode, func, t, semi::AbstractSemidiscretization) u = wrap_array(u_ode, semi) # Call `compute_coefficients` defined by the solver - backend = get_backend(semi) + backend = trixi_backend(semi) compute_coefficients!(backend, u, func, t, mesh_equations_solver_cache(semi)...) end diff --git a/src/solvers/dg.jl b/src/solvers/dg.jl index 756036a0e55..9ec37647c97 100644 --- a/src/solvers/dg.jl +++ b/src/solvers/dg.jl @@ -760,9 +760,8 @@ function compute_coefficients!(u, func, t, mesh::AbstractMesh{1}, equations, dg: end end -function compute_coefficients!(backend::Any, func, t, mesh::AbstractMesh{2}, equations, - dg::DG, - cache) +function compute_coefficients!(backend::Nothing, u, func, t, mesh::AbstractMesh{2}, + equations, dg::DG, cache) @unpack node_coordinates = cache.elements @threaded for element in eachelement(dg, cache) compute_coefficients_element!(u, func, t, equations, dg, node_coordinates, @@ -773,8 +772,6 @@ end function compute_coefficients!(backend::Backend, u, func, t, mesh::AbstractMesh{2}, equations, dg::DG, cache) nelements(dg, cache) == 0 && return nothing - # 1 cache not as argument - # 2 mesh not @unpack node_coordinates = cache.elements kernel! = compute_coefficients_kernel!(backend) kernel!(u, func, t, equations, dg, node_coordinates, diff --git a/test/test_cuda.jl b/test/test_cuda.jl index 1f96d8c863e..c6904b41a9d 100644 --- a/test/test_cuda.jl +++ b/test/test_cuda.jl @@ -11,16 +11,41 @@ isdir(outdir) && rm(outdir, recursive = true) EXAMPLES_DIR = joinpath(examples_dir(), "p4est_2d_dgsem") -@trixi_testset "elixir_advection_basic_gpu.jl" begin +@trixi_testset "elixir_advection_basic_gpu.jl native" begin + @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_basic_gpu.jl"), + # Expected errors are exactly the same as with TreeMesh! + l2=nothing, # [Float32(8.311947673061856e-6)], + linf=nothing,) + # # Ensure that we do not have excessive memory allocations + # # (e.g., from type instabilities) + # let + # t = sol.t[end] + # u_ode = sol.u[end] + # du_ode = similar(u_ode) + # @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000 + # end + @test real(ode.p.solver) == Float64 + @test real(ode.p.solver.basis) == Float64 + @test real(ode.p.solver.mortar) == Float64 + # TODO: remake ignores the mesh itself as well + @test real(ode.p.mesh) == Float64 + + @test ode.u0 isa Array + @test ode.p.solver.basis.derivative_matrix isa Array + + @test Trixi.storage_type(ode.p.cache.elements) === Array + @test Trixi.storage_type(ode.p.cache.interfaces) === Array + @test Trixi.storage_type(ode.p.cache.boundaries) === Array + @test Trixi.storage_type(ode.p.cache.mortars) === Array +end + +@trixi_testset "elixir_advection_basic_gpu.jl Float32 / CUDA" begin # Using CUDA inside the testset since otherwise the bindings are hiddend by the anonymous modules using CUDA - # TODO(benegee/vchuravy): Port compute_coefficients! to KernelAbstractions.jl - CUDA.allowscalar(true) @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_basic_gpu.jl"), # Expected errors are exactly the same as with TreeMesh! l2=nothing, # [Float32(8.311947673061856e-6)], linf=nothing, # [Float32(6.627000273229378e-5)], - RealT=Float32, real_type=Float32, storage_type=CuArray) # # Ensure that we do not have excessive memory allocations @@ -37,7 +62,7 @@ EXAMPLES_DIR = joinpath(examples_dir(), "p4est_2d_dgsem") # TODO: remake ignores the mesh itself as well @test real(ode.p.mesh) == Float64 - @test_broken ode.u0 isa CuArray + @test ode.u0 isa CuArray @test ode.p.solver.basis.derivative_matrix isa CuArray @test Trixi.storage_type(ode.p.cache.elements) === CuArray From 97e13ec876c4ec3a95c5811b7a3c2eb35f87b9ce Mon Sep 17 00:00:00 2001 From: Benedict Geihe Date: Wed, 2 Jul 2025 09:34:33 +0200 Subject: [PATCH 037/158] adapt 1D and 3D version --- src/solvers/dg.jl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/solvers/dg.jl b/src/solvers/dg.jl index 9ec37647c97..a9ed65d7070 100644 --- a/src/solvers/dg.jl +++ b/src/solvers/dg.jl @@ -739,8 +739,8 @@ end nelements(dg, cache))) end -function compute_coefficients!(u, func, t, mesh::AbstractMesh{1}, equations, dg::DG, - cache) +function compute_coefficients!(backend::Nothing, u, func, t, mesh::AbstractMesh{1}, + equations, dg::DG, cache) @threaded for element in eachelement(dg, cache) for i in eachnode(dg) x_node = get_node_coords(cache.elements.node_coordinates, equations, dg, i, @@ -795,8 +795,8 @@ function compute_coefficients_element!(u, func, t, equations, dg::DG, end end -function compute_coefficients!(u, func, t, mesh::AbstractMesh{3}, equations, dg::DG, - cache) +function compute_coefficients!(backend::Nothing, u, func, t, mesh::AbstractMesh{3}, + equations, dg::DG, cache) @threaded for element in eachelement(dg, cache) for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) x_node = get_node_coords(cache.elements.node_coordinates, equations, dg, i, From 44f7134b3745ed9603a6d59faa1e47b0d65e271b Mon Sep 17 00:00:00 2001 From: Benedict Geihe Date: Wed, 2 Jul 2025 09:34:49 +0200 Subject: [PATCH 038/158] Downgrade compat with Adapt --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 7bea3abf0f9..f4af1d63f45 100644 --- a/Project.toml +++ b/Project.toml @@ -83,7 +83,7 @@ EllipsisNotation = "1.0" FillArrays = "1.9" ForwardDiff = "0.10.36, 1" HDF5 = "0.16.10, 0.17" -KernelAbstractions = "0.9" +KernelAbstractions = "0.9.15" LinearAlgebra = "1" LinearMaps = "2.7, 3.0" LoopVectorization = "0.12.171" From abbcc56da5240d828e4cd0093cb530c945d9654b Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Tue, 17 Dec 2024 17:36:16 +0100 Subject: [PATCH 039/158] Use Adapt.jl to change storage and element type In order to eventually support GPU computation we need to use Adapt.jl to allow GPU backend packages to swap out host-array types like `CuArray` with device-side types like `CuDeviceArray`. Additionally this will allow us to change the element type of a simulation by using `adapt(Array{Float32}`. Co-authored-by: Lars Christmann Co-authored-by: Benedict Geihe --- .buildkite/pipeline.yml | 9 +- .github/workflows/GPUCompat.yml | 86 ----- Project.toml | 5 + docs/make.jl | 3 +- docs/src/heterogeneous.md | 95 +++++ .../p4est_2d_dgsem/elixir_advection_basic.jl | 2 +- .../elixir_advection_basic_gpu.jl | 63 ++++ ext/TrixiCUDAExt.jl | 11 + src/Trixi.jl | 2 + src/auxiliary/containers.jl | 91 +++++ src/auxiliary/vector_of_arrays.jl | 31 ++ src/semidiscretization/semidiscretization.jl | 21 +- .../semidiscretization_hyperbolic.jl | 27 +- src/solvers/dg.jl | 3 + src/solvers/dgsem/basis_lobatto_legendre.jl | 37 ++ src/solvers/dgsem_p4est/containers.jl | 340 ++++++++++++++---- .../dgsem_p4est/containers_parallel.jl | 115 ++++-- src/solvers/dgsem_p4est/dg_3d.jl | 8 +- src/solvers/dgsem_p4est/dg_parallel.jl | 60 ++-- .../sort_boundary_conditions.jl | 17 +- test/Project.toml | 2 + test/runtests.jl | 9 + test/test_aqua.jl | 1 + test/test_cuda.jl | 52 +++ test/test_p4est_2d.jl | 28 ++ test/test_unstructured_2d.jl | 7 + 26 files changed, 882 insertions(+), 243 deletions(-) delete mode 100644 .github/workflows/GPUCompat.yml create mode 100644 docs/src/heterogeneous.md create mode 100644 examples/p4est_2d_dgsem/elixir_advection_basic_gpu.jl create mode 100644 ext/TrixiCUDAExt.jl create mode 100644 src/auxiliary/vector_of_arrays.jl create mode 100644 test/test_cuda.jl diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index 0f8ad475db8..344b8eacc3a 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -1,3 +1,5 @@ +env: + steps: - label: "CUDA Julia {{matrix.version}}" matrix: @@ -7,12 +9,13 @@ steps: plugins: - JuliaCI/julia#v1: version: "{{matrix.version}}" - command: | - true + - JuliaCI/julia-test#v1: ~ + env: + TRIXI_TEST: "CUDA" agents: queue: "juliagpu" cuda: "*" if: build.message !~ /\[skip ci\]/ timeout_in_minutes: 60 soft_fail: - - exit_status: 3 \ No newline at end of file + - exit_status: 3 diff --git a/.github/workflows/GPUCompat.yml b/.github/workflows/GPUCompat.yml deleted file mode 100644 index 335e1c83c4c..00000000000 --- a/.github/workflows/GPUCompat.yml +++ /dev/null @@ -1,86 +0,0 @@ -name: GPU Package Compatibility - -on: - pull_request: - paths-ignore: - - 'AUTHORS.md' - - 'CITATION.bib' - - 'CONTRIBUTING.md' - - 'LICENSE.md' - - 'NEWS.md' - - 'README.md' - - '.zenodo.json' - - '.github/workflows/benchmark.yml' - - '.github/workflows/CompatHelper.yml' - - '.github/workflows/TagBot.yml' - - 'benchmark/**' - - 'docs/**' - - 'utils/**' - workflow_dispatch: - -concurrency: - group: ${{ github.workflow }}-${{ github.ref }} - cancel-in-progress: true - -jobs: - test: - if: "!contains(github.event.head_commit.message, 'skip ci')" - name: ${{ matrix.os }} - ${{ matrix.arch }} - runs-on: ${{ matrix.os }} - strategy: - fail-fast: false - matrix: - include: - - version: '1.10' - os: ubuntu-latest - arch: x64 - - version: '1.10' - os: windows-latest - arch: x64 - # CUDA.jl only supports 64-bit Linux and Windows, see https://github.com/JuliaGPU/CUDA.jl?tab=readme-ov-file#requirements - steps: - - name: Checkout repository - uses: actions/checkout@v4 - - - name: Set up Julia - uses: julia-actions/setup-julia@v2 - with: - version: ${{ matrix.version }} - arch: ${{ matrix.arch }} - - - name: Display version info - run: julia -e 'using InteractiveUtils; versioninfo(verbose=true)' - - - name: Cache Julia packages - uses: julia-actions/cache@v2 - - - name: Build project - uses: julia-actions/julia-buildpkg@v1 - - # Only CUDA.jl is needed for GPU compatibility test now - - name: Add CUDA.jl to environment - run: | - julia --project=. -e ' - using Pkg; - Pkg.activate(temp=true); - Pkg.develop(PackageSpec(path=pwd())); - Pkg.add("CUDA"); - Pkg.update()' - - # - name: Add Metal.jl to environment - # run: | - # julia --project=. -e ' - # using Pkg; - # Pkg.activate(temp=true); - # Pkg.develop(PackageSpec(path=pwd())); - # Pkg.add("Metal"); - # Pkg.update()' - - # - name: Add AMDGPU.jl to environment - # run: | - # julia --project=. -e ' - # using Pkg; - # Pkg.activate(temp=true); - # Pkg.develop(PackageSpec(path=pwd())); - # Pkg.add("AMDGPU"); - # Pkg.update()' diff --git a/Project.toml b/Project.toml index 60443f419e7..875d2ae6db1 100644 --- a/Project.toml +++ b/Project.toml @@ -5,6 +5,7 @@ version = "0.12.6-DEV" [deps] Accessors = "7d9f7c33-5ae7-4f3b-8dc6-eff91059b697" +Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" CodeTracking = "da1fd8a2-8d9e-5ec2-8556-3022fb5608a2" ConstructionBase = "187b0558-2788-49d3-abe0-74a17ed4e7c9" DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8" @@ -56,14 +57,18 @@ Convex = "f65535da-76fb-5f13-bab9-19810c17039a" ECOS = "e2685f51-7e38-5353-a97d-a921fd2c8199" Makie = "ee78f7c6-11fb-53f2-987a-cfe4a2b5a57a" NLsolve = "2774e3e8-f4cf-5e23-947b-6d7e65073b56" +CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" [extensions] TrixiConvexECOSExt = ["Convex", "ECOS"] TrixiMakieExt = "Makie" TrixiNLsolveExt = "NLsolve" +TrixiCUDAExt = "CUDA" [compat] Accessors = "0.1.36" +Adapt = "4" +CUDA = "5.8" CodeTracking = "1.0.5" ConstructionBase = "1.5" Convex = "0.16" diff --git a/docs/make.jl b/docs/make.jl index 7111b66ab94..0301f5ba64e 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -163,7 +163,8 @@ makedocs( "Style guide" => "styleguide.md", "Testing" => "testing.md", "Performance" => "performance.md", - "Parallelization" => "parallelization.md" + "Parallelization" => "parallelization.md", + "Heterogeneous" => "heterogeneous.md" ], "Troubleshooting and FAQ" => "troubleshooting.md", "Reference" => [ diff --git a/docs/src/heterogeneous.md b/docs/src/heterogeneous.md new file mode 100644 index 00000000000..b4027abdd3a --- /dev/null +++ b/docs/src/heterogeneous.md @@ -0,0 +1,95 @@ +# Heterogeneous computing + +Support for heterogeneous computing is currently being worked on. + +## The use of Adapt.jl + +[Adapt.jl](https://github.com/JuliaGPU/Adapt.jl) is a package in the +[JuliaGPU](https://github.com/JuliaGPU) family that allows for +the translation of nested data structures. The primary goal is to allow the substitution of `Array` +at the storage leaves with a GPU array like `CuArray` from [CUDA.jl](https://github.com/JuliaGPU/CUDA.jl). + +To facilitate this data structures must be parameterized, so instead of: + +```julia +struct Container <: Trixi.AbstractContainer + data::Array{Float64, 2} +end +``` + +They must be written as: + +```julia +struct Container{D<:AbstractArray} <: Trixi.AbstractContainer + data::D +end +``` + +furthermore, we need to define a function that allows for the conversion of storage +of our types: + +```julia +function Adapt.adapt_structure(to, C::Container) + return Container(adapt(to, C.data)) +end +``` + +or simply + +```julia +Adapt.@adapt_structure(Container) +``` + +additionally, we must define `Adapt.parent_type`. + +```julia +function Adapt.parent_type(::Type{<:Container{D}}) where D + return D +end +``` + +All together we can use this machinery to perform conversions of a container. + +```jldoctest +julia> import Trixi, Adapt + +julia> struct Container{D<:AbstractArray} <: Trixi.AbstractContainer + data::D + end + +julia> Adapt.@adapt_structure(Container) + +julia> Adapt.parent_type(::Type{<:Container{D}}) where D = D + +julia> C = Container(zeros(3)) +Container{Vector{Float64}}([0.0, 0.0, 0.0]) + +julia> Trixi.storage_type(C) +Array + +julia> using CUDA + +julia> GPU_C = adapt(CuArray, C) +Container{CuArray{Float64, 1, CUDA.DeviceMemory}}([0.0, 0.0, 0.0]) + +julia> Trixi.storage_type(C) +CuArray +``` + +## Element-type conversion with `Trixi.trixi_adapt`. + +We can use [`Trixi.trixi_adapt`](@ref) to perform both an element-type and a storage-type adoption + +```julia-repl +julia> C = Container(zeros(3)) +Container{Vector{Float64}}([0.0, 0.0, 0.0]) + +julia> Trixi.trixi_adapt(Array, Float32, C) +Container{Vector{Float32}}(Float32[0.0, 0.0, 0.0]) + +julia> Trixi.trixi_adapt(CuArray, Float32, C) +Container{CuArray{Float32, 1, CUDA.DeviceMemory}}(Float32[0.0, 0.0, 0.0]) +``` + +!!! note + `adapt(Array{Float32}, C)` is tempting but will do the wrong thing in the presence of `StaticArrays`. \ No newline at end of file diff --git a/examples/p4est_2d_dgsem/elixir_advection_basic.jl b/examples/p4est_2d_dgsem/elixir_advection_basic.jl index 4ff646365aa..e162e8997f2 100644 --- a/examples/p4est_2d_dgsem/elixir_advection_basic.jl +++ b/examples/p4est_2d_dgsem/elixir_advection_basic.jl @@ -31,7 +31,7 @@ semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition_convergen # ODE solvers, callbacks etc. # Create ODE problem with time span from 0.0 to 1.0 -ode = semidiscretize(semi, (0.0, 1.0)) +ode = semidiscretize(semi, (0.0, 1.0); real_type = nothing, storage_type = nothing) # At the beginning of the main loop, the SummaryCallback prints a summary of the simulation setup # and resets the timers diff --git a/examples/p4est_2d_dgsem/elixir_advection_basic_gpu.jl b/examples/p4est_2d_dgsem/elixir_advection_basic_gpu.jl new file mode 100644 index 00000000000..4c0f5744a88 --- /dev/null +++ b/examples/p4est_2d_dgsem/elixir_advection_basic_gpu.jl @@ -0,0 +1,63 @@ +# The same setup as tree_2d_dgsem/elixir_advection_basic.jl +# to verify the StructuredMesh implementation against TreeMesh + +using OrdinaryDiffEqSSPRK, OrdinaryDiffEqLowStorageRK +using Trixi + +############################################################################### +# semidiscretization of the linear advection equation + +advection_velocity = (0.2, -0.7) +equations = LinearScalarAdvectionEquation2D(advection_velocity) + +# Create DG solver with polynomial degree = 3 and (local) Lax-Friedrichs/Rusanov flux as surface flux +solver = DGSEM(polydeg = 3, surface_flux = flux_lax_friedrichs) + +coordinates_min = (-1.0, -1.0) # minimum coordinates (min(x), min(y)) +coordinates_max = (1.0, 1.0) # maximum coordinates (max(x), max(y)) + +trees_per_dimension = (8, 8) + +# Create P4estMesh with 8 x 8 trees and 16 x 16 elements +mesh = P4estMesh(trees_per_dimension, polydeg = 3, + coordinates_min = coordinates_min, coordinates_max = coordinates_max, + initial_refinement_level = 1) + +# A semidiscretization collects data structures and functions for the spatial discretization +semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition_convergence_test, + solver) + +############################################################################### +# ODE solvers, callbacks etc. + +# Create ODE problem with time span from 0.0 to 1.0 +ode = semidiscretize(semi, (0.0, 1.0); real_type = nothing, storage_type = nothing) + +# At the beginning of the main loop, the SummaryCallback prints a summary of the simulation setup +# and resets the timers +summary_callback = SummaryCallback() + +# The AnalysisCallback allows to analyse the solution in regular intervals and prints the results +analysis_callback = AnalysisCallback(semi, interval = 100) + +# The SaveSolutionCallback allows to save the solution to a file in regular intervals +save_solution = SaveSolutionCallback(interval = 100, + solution_variables = cons2prim) + +# The StepsizeCallback handles the re-calculation of the maximum Δt after each time step +stepsize_callback = StepsizeCallback(cfl = 1.6) + +# Create a CallbackSet to collect all callbacks such that they can be passed to the ODE solver +callbacks = CallbackSet(summary_callback, analysis_callback, save_solution, + stepsize_callback) + +############################################################################### +# run the simulation + +# TODO: Currently we can only construct the ODE problem on the GPU, but we cannot solve it on the GPU yet. +# Uncomment the calls below to discover missing functionality. + +# # OrdinaryDiffEq's `solve` method evolves the solution in time and executes the passed callbacks +# sol = solve(ode, CarpenterKennedy2N54(williamson_condition = false); +# dt = 1.0, # solve needs some value here but it will be overwritten by the stepsize_callback +# ode_default_options()..., callback = callbacks); diff --git a/ext/TrixiCUDAExt.jl b/ext/TrixiCUDAExt.jl new file mode 100644 index 00000000000..681d2f53a1e --- /dev/null +++ b/ext/TrixiCUDAExt.jl @@ -0,0 +1,11 @@ +# Package extension for adding CUDA-based features to Trixi.jl +module TrixiCUDAExt + +import CUDA: CuArray +import Trixi + +function Trixi.storage_type(::Type{<:CuArray}) + return CuArray +end + +end diff --git a/src/Trixi.jl b/src/Trixi.jl index a707437655e..a52dfd6d973 100644 --- a/src/Trixi.jl +++ b/src/Trixi.jl @@ -50,6 +50,7 @@ import SciMLBase: get_du, get_tmp_cache, u_modified!, using DelimitedFiles: readdlm using Downloads: Downloads +using Adapt: Adapt, adapt using CodeTracking: CodeTracking using ConstructionBase: ConstructionBase using DiffEqBase: DiffEqBase, get_tstops, get_tstops_array @@ -132,6 +133,7 @@ include("basic_types.jl") # Include all top-level source files include("auxiliary/auxiliary.jl") +include("auxiliary/vector_of_arrays.jl") include("auxiliary/mpi.jl") include("auxiliary/p4est.jl") include("auxiliary/t8code.jl") diff --git a/src/auxiliary/containers.jl b/src/auxiliary/containers.jl index 90650f6abcf..edc42db382b 100644 --- a/src/auxiliary/containers.jl +++ b/src/auxiliary/containers.jl @@ -314,4 +314,95 @@ end function raw_copy!(c::AbstractContainer, from::Int, destination::Int) raw_copy!(c, c, from, from, destination) end + +# Trixi storage types must implement these two Adapt.jl methods +function Adapt.adapt_structure(to, c::AbstractContainer) + error("Interface: Must implement Adapt.adapt_structure(to, ::$(typeof(c)))") +end + +function Adapt.parent_type(C::Type{<:AbstractContainer}) + error("Interface: Must implement Adapt.parent_type(::Type{$C}") +end + +function Adapt.unwrap_type(C::Type{<:AbstractContainer}) + return Adapt.unwrap_type(Adapt.parent_type(C)) +end + +# TODO: Upstream to Adapt +function storage_type(x) + return storage_type(typeof(x)) +end + +function storage_type(T::Type) + error("Interface: Must implement storage_type(::Type{$T}") +end + +function storage_type(::Type{<:Array}) + Array +end + +function storage_type(C::Type{<:AbstractContainer}) + return storage_type(Adapt.unwrap_type(C)) +end + +# For some storage backends like CUDA.jl, empty arrays do seem to simply be +# null pointers which can cause `unsafe_wrap` to fail when calling +# Adapt.adapt (ArgumentError, see +# https://github.com/JuliaGPU/CUDA.jl/blob/v5.4.2/src/array.jl#L212-L229). +# To circumvent this, on length zero arrays this allocates +# a separate empty array instead of wrapping. +# However, since zero length arrays are not used in calculations, +# it should be okay if the underlying storage vectors and wrapped arrays +# are not the same as long as they are properly wrapped when `resize!`d etc. +function unsafe_wrap_or_alloc(to, vector, size) + if length(vector) == 0 + return similar(vector, size) + else + return unsafe_wrap(to, pointer(vector), size) + end +end + +struct TrixiAdaptor{Storage, Real} end + +function trixi_adapt(storage, real, x) + adapt(TrixiAdaptor{storage, real}(), x) +end + +# Custom rules +# 1. handling of StaticArrays +function Adapt.adapt_storage(::TrixiAdaptor{<:Any, Real}, + x::StaticArrays.StaticArray{S, T, N}) where {Real, S, T, N} + StaticArrays.similar_type(x, Real)(x) +end + +# 2. Handling of Arrays +function Adapt.adapt_storage(::TrixiAdaptor{Storage, Real}, + x::AbstractArray{T}) where {Storage, Real, + T <: AbstractFloat} + adapt(Storage{Real}, x) +end + +function Adapt.adapt_storage(::TrixiAdaptor{Storage, Real}, + x::AbstractArray{T}) where {Storage, Real, + T <: StaticArrays.StaticArray} + adapt(Storage{StaticArrays.similar_type(T, Real)}, x) +end + +# Our threaded cache contains MArray, it is unlikely that we would want to adapt those +function Adapt.adapt_storage(::TrixiAdaptor{Storage, Real}, + x::Array{T}) where {Storage, Real, + T <: StaticArrays.MArray} + adapt(Array{StaticArrays.similar_type(T, Real)}, x) +end + +function Adapt.adapt_storage(::TrixiAdaptor{Storage, Real}, + x::AbstractArray) where {Storage, Real} + adapt(Storage, x) +end + +# 3. TODO: Should we have a fallback? But that would imply implementing things for NamedTuple again + +function unsafe_wrap_or_alloc(::TrixiAdaptor{Storage}, vec, size) where {Storage} + return unsafe_wrap_or_alloc(Storage, vec, size) +end end # @muladd diff --git a/src/auxiliary/vector_of_arrays.jl b/src/auxiliary/vector_of_arrays.jl new file mode 100644 index 00000000000..0fa8dd7f1ec --- /dev/null +++ b/src/auxiliary/vector_of_arrays.jl @@ -0,0 +1,31 @@ +# By default, Julia/LLVM does not use fused multiply-add operations (FMAs). +# Since these FMAs can increase the performance of many numerical algorithms, +# we need to opt-in explicitly. +# See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. +@muladd begin +#! format: noindent + +# Wraps a Vector of Arrays, forwards `getindex` to the underlying Vector. +# Implements `Adapt.adapt_structure` to allow offloading to the GPU which is +# not possible for a plain Vector of Arrays. +struct VecOfArrays{T <: AbstractArray} + arrays::Vector{T} +end +Base.getindex(v::VecOfArrays, i::Int) = Base.getindex(v.arrays, i) +Base.IndexStyle(v::VecOfArrays) = Base.IndexStyle(v.arrays) +Base.size(v::VecOfArrays) = Base.size(v.arrays) +Base.length(v::VecOfArrays) = Base.length(v.arrays) +Base.eltype(v::VecOfArrays{T}) where {T} = T +function Adapt.adapt_structure(to, v::VecOfArrays) + return VecOfArrays([Adapt.adapt(to, arr) for arr in v.arrays]) +end +function Adapt.parent_type(::Type{<:VecOfArrays{T}}) where {T} + return T +end +function Adapt.unwrap_type(A::Type{<:VecOfArrays}) + Adapt.unwrap_type(Adapt.parent_type(A)) +end +function Base.convert(::Type{<:VecOfArrays}, v::Vector{<:AbstractArray}) + VecOfArrays(v) +end +end # @muladd diff --git a/src/semidiscretization/semidiscretization.jl b/src/semidiscretization/semidiscretization.jl index cc3900d42da..97c50aa46a1 100644 --- a/src/semidiscretization/semidiscretization.jl +++ b/src/semidiscretization/semidiscretization.jl @@ -82,9 +82,15 @@ end Wrap the semidiscretization `semi` as an ODE problem in the time interval `tspan` that can be passed to `solve` from the [SciML ecosystem](https://diffeq.sciml.ai/latest/). + +The optional keyword arguments `storage_type` and `real_type` configure the underlying computational +datastructures. `storage_type` changes the fundamental array type being used, allowing the +experimental use of `CuArray` or other GPU array types. `real_type` changes the computational data type being used. """ function semidiscretize(semi::AbstractSemidiscretization, tspan; - reset_threads = true) + reset_threads = true, + storage_type = nothing, + real_type = nothing) # Optionally reset Polyester.jl threads. See # https://github.com/trixi-framework/Trixi.jl/issues/1583 # https://github.com/JuliaSIMD/Polyester.jl/issues/30 @@ -92,6 +98,19 @@ function semidiscretize(semi::AbstractSemidiscretization, tspan; Polyester.reset_threads!() end + if !(storage_type === nothing && real_type === nothing) + if storage_type === nothing + storage_type = Array + end + if real_type === nothing + real_type = Float64 + end + semi = trixi_adapt(storage_type, real_type, semi) + if eltype(tspan) !== real_type + tspan = convert.(real_type, tspan) + end + end + u0_ode = compute_coefficients(first(tspan), semi) # TODO: MPI, do we want to synchronize loading and print debug statements, e.g. using # mpi_isparallel() && MPI.Barrier(mpi_comm()) diff --git a/src/semidiscretization/semidiscretization_hyperbolic.jl b/src/semidiscretization/semidiscretization_hyperbolic.jl index 7496a345661..2a563c02229 100644 --- a/src/semidiscretization/semidiscretization_hyperbolic.jl +++ b/src/semidiscretization/semidiscretization_hyperbolic.jl @@ -27,25 +27,6 @@ mutable struct SemidiscretizationHyperbolic{Mesh, Equations, InitialCondition, solver::Solver cache::Cache performance_counter::PerformanceCounter - - function SemidiscretizationHyperbolic{Mesh, Equations, InitialCondition, - BoundaryConditions, SourceTerms, Solver, - Cache}(mesh::Mesh, equations::Equations, - initial_condition::InitialCondition, - boundary_conditions::BoundaryConditions, - source_terms::SourceTerms, - solver::Solver, - cache::Cache) where {Mesh, Equations, - InitialCondition, - BoundaryConditions, - SourceTerms, - Solver, - Cache} - performance_counter = PerformanceCounter() - - new(mesh, equations, initial_condition, boundary_conditions, source_terms, - solver, cache, performance_counter) - end end """ @@ -71,6 +52,8 @@ function SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver check_periodicity_mesh_boundary_conditions(mesh, _boundary_conditions) + performance_counter = PerformanceCounter() + SemidiscretizationHyperbolic{typeof(mesh), typeof(equations), typeof(initial_condition), typeof(_boundary_conditions), typeof(source_terms), @@ -78,9 +61,13 @@ function SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver initial_condition, _boundary_conditions, source_terms, solver, - cache) + cache, + performance_counter) end +# @eval due to @muladd +@eval Adapt.@adapt_structure(SemidiscretizationHyperbolic) + # Create a new semidiscretization but change some parameters compared to the input. # `Base.similar` follows a related concept but would require us to `copy` the `mesh`, # which would impact the performance. Instead, `SciMLBase.remake` has exactly the diff --git a/src/solvers/dg.jl b/src/solvers/dg.jl index ad211b3c003..78f3901a346 100644 --- a/src/solvers/dg.jl +++ b/src/solvers/dg.jl @@ -415,6 +415,9 @@ struct DG{Basis, Mortar, SurfaceIntegral, VolumeIntegral} volume_integral::VolumeIntegral end +# @eval due to @muladd +@eval Adapt.@adapt_structure(DG) + function Base.show(io::IO, dg::DG) @nospecialize dg # reduce precompilation time diff --git a/src/solvers/dgsem/basis_lobatto_legendre.jl b/src/solvers/dgsem/basis_lobatto_legendre.jl index 777348aa8ce..9647f172e20 100644 --- a/src/solvers/dgsem/basis_lobatto_legendre.jl +++ b/src/solvers/dgsem/basis_lobatto_legendre.jl @@ -34,6 +34,32 @@ struct LobattoLegendreBasis{RealT <: Real, NNODES, # negative adjoint wrt the SBP dot product end +function Adapt.adapt_structure(to, basis::LobattoLegendreBasis) + inverse_vandermonde_legendre = adapt(to, basis.inverse_vandermonde_legendre) + RealT = eltype(inverse_vandermonde_legendre) + + nodes = SVector{<:Any, RealT}(basis.nodes) + weights = SVector{<:Any, RealT}(basis.weights) + inverse_weights = SVector{<:Any, RealT}(basis.inverse_weights) + boundary_interpolation = adapt(to, basis.boundary_interpolation) + derivative_matrix = adapt(to, basis.derivative_matrix) + derivative_split = adapt(to, basis.derivative_split) + derivative_split_transpose = adapt(to, basis.derivative_split_transpose) + derivative_dhat = adapt(to, basis.derivative_dhat) + return LobattoLegendreBasis{RealT, nnodes(basis), typeof(nodes), + typeof(inverse_vandermonde_legendre), + typeof(boundary_interpolation), + typeof(derivative_matrix)}(nodes, + weights, + inverse_weights, + inverse_vandermonde_legendre, + boundary_interpolation, + derivative_matrix, + derivative_split, + derivative_split_transpose, + derivative_dhat) +end + function LobattoLegendreBasis(RealT, polydeg::Integer) nnodes_ = polydeg + 1 @@ -155,6 +181,17 @@ struct LobattoLegendreMortarL2{RealT <: Real, NNODES, reverse_lower::ReverseMatrix end +function Adapt.adapt_structure(to, mortar::LobattoLegendreMortarL2) + forward_upper = adapt(to, mortar.forward_upper) + forward_lower = adapt(to, mortar.forward_lower) + reverse_upper = adapt(to, mortar.reverse_upper) + reverse_lower = adapt(to, mortar.reverse_lower) + return LobattoLegendreMortarL2{eltype(forward_upper), nnodes(mortar), + typeof(forward_upper), + typeof(reverse_upper)}(forward_upper, forward_lower, + reverse_upper, reverse_lower) +end + function MortarL2(basis::LobattoLegendreBasis) RealT = real(basis) nnodes_ = nnodes(basis) diff --git a/src/solvers/dgsem_p4est/containers.jl b/src/solvers/dgsem_p4est/containers.jl index a070db6b701..83097f4a1ed 100644 --- a/src/solvers/dgsem_p4est/containers.jl +++ b/src/solvers/dgsem_p4est/containers.jl @@ -6,25 +6,31 @@ #! format: noindent mutable struct P4estElementContainer{NDIMS, RealT <: Real, uEltype <: Real, NDIMSP1, - NDIMSP2, NDIMSP3} <: AbstractContainer + NDIMSP2, NDIMSP3, + ArrayNDIMSP1 <: DenseArray{RealT, NDIMSP1}, + ArrayNDIMSP2 <: DenseArray{RealT, NDIMSP2}, + ArrayNDIMSP3 <: DenseArray{RealT, NDIMSP3}, + VectorRealT <: DenseVector{RealT}, + VectoruEltype <: DenseVector{uEltype}} <: + AbstractContainer # Physical coordinates at each node - node_coordinates::Array{RealT, NDIMSP2} # [orientation, node_i, node_j, node_k, element] + node_coordinates::ArrayNDIMSP2 # [orientation, node_i, node_j, node_k, element] # Jacobian matrix of the transformation # [jacobian_i, jacobian_j, node_i, node_j, node_k, element] where jacobian_i is the first index of the Jacobian matrix,... - jacobian_matrix::Array{RealT, NDIMSP3} + jacobian_matrix::ArrayNDIMSP3 # Contravariant vectors, scaled by J, in Kopriva's blue book called Ja^i_n (i index, n dimension) - contravariant_vectors::Array{RealT, NDIMSP3} # [dimension, index, node_i, node_j, node_k, element] + contravariant_vectors::ArrayNDIMSP3 # [dimension, index, node_i, node_j, node_k, element] # 1/J where J is the Jacobian determinant (determinant of Jacobian matrix) - inverse_jacobian::Array{RealT, NDIMSP1} # [node_i, node_j, node_k, element] + inverse_jacobian::ArrayNDIMSP1 # [node_i, node_j, node_k, element] # Buffer for calculated surface flux - surface_flux_values::Array{uEltype, NDIMSP2} # [variable, i, j, direction, element] + surface_flux_values::ArrayNDIMSP2 # [variable, i, j, direction, element] # internal `resize!`able storage - _node_coordinates::Vector{RealT} - _jacobian_matrix::Vector{RealT} - _contravariant_vectors::Vector{RealT} - _inverse_jacobian::Vector{RealT} - _surface_flux_values::Vector{uEltype} + _node_coordinates::VectorRealT + _jacobian_matrix::VectorRealT + _contravariant_vectors::VectorRealT + _inverse_jacobian::VectorRealT + _surface_flux_values::VectoruEltype end @inline function nelements(elements::P4estElementContainer) @@ -36,7 +42,7 @@ end RealT, uEltype } - uEltype + return uEltype end # Only one-dimensional `Array`s are `resize!`able in Julia. @@ -51,31 +57,41 @@ function Base.resize!(elements::P4estElementContainer, capacity) n_dims = ndims(elements) n_nodes = size(elements.node_coordinates, 2) n_variables = size(elements.surface_flux_values, 1) + ArrayType = storage_type(elements) resize!(_node_coordinates, n_dims * n_nodes^n_dims * capacity) - elements.node_coordinates = unsafe_wrap(Array, pointer(_node_coordinates), - (n_dims, ntuple(_ -> n_nodes, n_dims)..., - capacity)) + elements.node_coordinates = unsafe_wrap_or_alloc(ArrayType, + _node_coordinates, + (n_dims, + ntuple(_ -> n_nodes, n_dims)..., + capacity)) resize!(_jacobian_matrix, n_dims^2 * n_nodes^n_dims * capacity) - elements.jacobian_matrix = unsafe_wrap(Array, pointer(_jacobian_matrix), - (n_dims, n_dims, - ntuple(_ -> n_nodes, n_dims)..., capacity)) + elements.jacobian_matrix = unsafe_wrap_or_alloc(ArrayType, + _jacobian_matrix, + (n_dims, n_dims, + ntuple(_ -> n_nodes, n_dims)..., + capacity)) resize!(_contravariant_vectors, length(_jacobian_matrix)) - elements.contravariant_vectors = unsafe_wrap(Array, pointer(_contravariant_vectors), - size(elements.jacobian_matrix)) + elements.contravariant_vectors = unsafe_wrap_or_alloc(ArrayType, + _contravariant_vectors, + size(elements.jacobian_matrix)) resize!(_inverse_jacobian, n_nodes^n_dims * capacity) - elements.inverse_jacobian = unsafe_wrap(Array, pointer(_inverse_jacobian), - (ntuple(_ -> n_nodes, n_dims)..., capacity)) + elements.inverse_jacobian = unsafe_wrap_or_alloc(ArrayType, + _inverse_jacobian, + (ntuple(_ -> n_nodes, n_dims)..., + capacity)) resize!(_surface_flux_values, n_variables * n_nodes^(n_dims - 1) * (n_dims * 2) * capacity) - elements.surface_flux_values = unsafe_wrap(Array, pointer(_surface_flux_values), - (n_variables, - ntuple(_ -> n_nodes, n_dims - 1)..., - n_dims * 2, capacity)) + elements.surface_flux_values = unsafe_wrap_or_alloc(ArrayType, + _surface_flux_values, + (n_variables, + ntuple(_ -> n_nodes, + n_dims - 1)..., + n_dims * 2, capacity)) return nothing end @@ -117,33 +133,104 @@ function init_elements(mesh::Union{P4estMesh{NDIMS, NDIMS, RealT}, NDIMS * 2, nelements)) elements = P4estElementContainer{NDIMS, RealT, uEltype, NDIMS + 1, NDIMS + 2, - NDIMS + 3}(node_coordinates, jacobian_matrix, - contravariant_vectors, - inverse_jacobian, surface_flux_values, - _node_coordinates, _jacobian_matrix, - _contravariant_vectors, - _inverse_jacobian, _surface_flux_values) + NDIMS + 3, Array{RealT, NDIMS + 1}, + Array{RealT, NDIMS + 2}, Array{RealT, NDIMS + 3}, + Vector{RealT}, Vector{uEltype}}(node_coordinates, + jacobian_matrix, + contravariant_vectors, + inverse_jacobian, + surface_flux_values, + _node_coordinates, + _jacobian_matrix, + _contravariant_vectors, + _inverse_jacobian, + _surface_flux_values) init_elements!(elements, mesh, basis) return elements end -mutable struct P4estInterfaceContainer{NDIMS, uEltype <: Real, NDIMSP2} <: +function Adapt.parent_type(::Type{<:P4estElementContainer{<:Any, <:Any, <:Any, <:Any, + <:Any, <:Any, ArrayT}}) where {ArrayT} + ArrayT +end + +# Manual adapt_structure since we have aliasing memory +function Adapt.adapt_structure(to, + elements::P4estElementContainer{NDIMS}) where {NDIMS} + # Adapt underlying storage + _node_coordinates = adapt(to, elements._node_coordinates) + _jacobian_matrix = adapt(to, elements._jacobian_matrix) + _contravariant_vectors = adapt(to, elements._contravariant_vectors) + _inverse_jacobian = adapt(to, elements._inverse_jacobian) + _surface_flux_values = adapt(to, elements._surface_flux_values) + + RealT = eltype(_inverse_jacobian) + uEltype = eltype(_surface_flux_values) + + # Wrap arrays again + node_coordinates = unsafe_wrap_or_alloc(to, _node_coordinates, + size(elements.node_coordinates)) + jacobian_matrix = unsafe_wrap_or_alloc(to, _jacobian_matrix, + size(elements.jacobian_matrix)) + contravariant_vectors = unsafe_wrap_or_alloc(to, _contravariant_vectors, + size(jacobian_matrix)) + inverse_jacobian = unsafe_wrap_or_alloc(to, _inverse_jacobian, + size(elements.inverse_jacobian)) + surface_flux_values = unsafe_wrap_or_alloc(to, _surface_flux_values, + size(elements.surface_flux_values)) + + new_type_params = (NDIMS, + RealT, + uEltype, + NDIMS + 1, + NDIMS + 2, + NDIMS + 3, + typeof(inverse_jacobian), # ArrayNDIMSP1 + typeof(node_coordinates), # ArrayNDIMSP2 + typeof(jacobian_matrix), # ArrayNDIMSP3 + typeof(_node_coordinates), # VectorRealT + typeof(_surface_flux_values)) # VectoruEltype + return P4estElementContainer{new_type_params...}(node_coordinates, + jacobian_matrix, + contravariant_vectors, + inverse_jacobian, + surface_flux_values, + _node_coordinates, + _jacobian_matrix, + _contravariant_vectors, + _inverse_jacobian, + _surface_flux_values) +end + +mutable struct P4estInterfaceContainer{NDIMS, uEltype <: Real, NDIMSP2, + uArray <: DenseArray{uEltype, NDIMSP2}, + IdsMatrix <: DenseMatrix{Int}, + IndicesMatrix <: + DenseMatrix{NTuple{NDIMS, Symbol}}, + uVector <: DenseVector{uEltype}, + IdsVector <: DenseVector{Int}, + IndicesVector <: + DenseVector{NTuple{NDIMS, Symbol}}} <: AbstractContainer - u::Array{uEltype, NDIMSP2} # [primary/secondary, variable, i, j, interface] - neighbor_ids::Matrix{Int} # [primary/secondary, interface] - node_indices::Matrix{NTuple{NDIMS, Symbol}} # [primary/secondary, interface] + u::uArray # [primary/secondary, variable, i, j, interface] + neighbor_ids::IdsMatrix # [primary/secondary, interface] + node_indices::IndicesMatrix # [primary/secondary, interface] # internal `resize!`able storage - _u::Vector{uEltype} - _neighbor_ids::Vector{Int} - _node_indices::Vector{NTuple{NDIMS, Symbol}} + _u::uVector + _neighbor_ids::IdsVector + _node_indices::IndicesVector end @inline function ninterfaces(interfaces::P4estInterfaceContainer) size(interfaces.neighbor_ids, 2) end @inline Base.ndims(::P4estInterfaceContainer{NDIMS}) where {NDIMS} = NDIMS +@inline function Base.eltype(::P4estInterfaceContainer{NDIMS, uEltype}) where {NDIMS, + uEltype} + return uEltype +end # See explanation of Base.resize! for the element container function Base.resize!(interfaces::P4estInterfaceContainer, capacity) @@ -152,17 +239,20 @@ function Base.resize!(interfaces::P4estInterfaceContainer, capacity) n_dims = ndims(interfaces) n_nodes = size(interfaces.u, 3) n_variables = size(interfaces.u, 2) + ArrayType = storage_type(interfaces) resize!(_u, 2 * n_variables * n_nodes^(n_dims - 1) * capacity) - interfaces.u = unsafe_wrap(Array, pointer(_u), + interfaces.u = unsafe_wrap(ArrayType, pointer(_u), (2, n_variables, ntuple(_ -> n_nodes, n_dims - 1)..., capacity)) resize!(_neighbor_ids, 2 * capacity) - interfaces.neighbor_ids = unsafe_wrap(Array, pointer(_neighbor_ids), (2, capacity)) + interfaces.neighbor_ids = unsafe_wrap(ArrayType, pointer(_neighbor_ids), + (2, capacity)) resize!(_node_indices, 2 * capacity) - interfaces.node_indices = unsafe_wrap(Array, pointer(_node_indices), (2, capacity)) + interfaces.node_indices = unsafe_wrap(ArrayType, pointer(_node_indices), + (2, capacity)) return nothing end @@ -189,10 +279,15 @@ function init_interfaces(mesh::Union{P4estMesh, P4estMeshView, T8codeMesh}, equa _node_indices = Vector{NTuple{NDIMS, Symbol}}(undef, 2 * n_interfaces) node_indices = unsafe_wrap(Array, pointer(_node_indices), (2, n_interfaces)) - interfaces = P4estInterfaceContainer{NDIMS, uEltype, NDIMS + 2}(u, neighbor_ids, - node_indices, - _u, _neighbor_ids, - _node_indices) + interfaces = P4estInterfaceContainer{NDIMS, uEltype, NDIMS + 2, + typeof(u), typeof(neighbor_ids), + typeof(node_indices), typeof(_u), + typeof(_neighbor_ids), typeof(_node_indices)}(u, + neighbor_ids, + node_indices, + _u, + _neighbor_ids, + _node_indices) init_interfaces!(interfaces, mesh) @@ -205,21 +300,58 @@ function init_interfaces!(interfaces, mesh::Union{P4estMesh, P4estMeshView}) return interfaces end -mutable struct P4estBoundaryContainer{NDIMS, uEltype <: Real, NDIMSP1} <: +function Adapt.parent_type(::Type{<:P4estInterfaceContainer{<:Any, <:Any, <:Any, + ArrayT}}) where {ArrayT} + ArrayT +end + +# Manual adapt_structure since we have aliasing memory +function Adapt.adapt_structure(to, interfaces::P4estInterfaceContainer) + # Adapt underlying storage + _u = adapt(to, interfaces._u) + _neighbor_ids = adapt(to, interfaces._neighbor_ids) + _node_indices = adapt(to, interfaces._node_indices) + # Wrap arrays again + u = unsafe_wrap_or_alloc(to, _u, size(interfaces.u)) + neighbor_ids = unsafe_wrap_or_alloc(to, _neighbor_ids, + size(interfaces.neighbor_ids)) + node_indices = unsafe_wrap_or_alloc(to, _node_indices, + size(interfaces.node_indices)) + + NDIMS = ndims(interfaces) + new_type_params = (NDIMS, + eltype(_u), + NDIMS + 2, + typeof(u), typeof(neighbor_ids), typeof(node_indices), + typeof(_u), typeof(_neighbor_ids), typeof(_node_indices)) + return P4estInterfaceContainer{new_type_params...}(u, neighbor_ids, node_indices, + _u, _neighbor_ids, _node_indices) +end + +mutable struct P4estBoundaryContainer{NDIMS, uEltype <: Real, NDIMSP1, + uArray <: DenseArray{uEltype, NDIMSP1}, + IdsVector <: DenseVector{Int}, + IndicesVector <: + DenseVector{NTuple{NDIMS, Symbol}}, + uVector <: DenseVector{uEltype}} <: AbstractContainer - u::Array{uEltype, NDIMSP1} # [variables, i, j, boundary] - neighbor_ids::Vector{Int} # [boundary] - node_indices::Vector{NTuple{NDIMS, Symbol}} # [boundary] + u::uArray # [variables, i, j, boundary] + neighbor_ids::IdsVector # [boundary] + node_indices::IndicesVector # [boundary] name::Vector{Symbol} # [boundary] # internal `resize!`able storage - _u::Vector{uEltype} + _u::uVector end @inline function nboundaries(boundaries::P4estBoundaryContainer) length(boundaries.neighbor_ids) end @inline Base.ndims(::P4estBoundaryContainer{NDIMS}) where {NDIMS} = NDIMS +@inline function Base.eltype(::P4estBoundaryContainer{NDIMS, uEltype}) where {NDIMS, + uEltype} + uEltype +end # See explanation of Base.resize! for the element container function Base.resize!(boundaries::P4estBoundaryContainer, capacity) @@ -228,9 +360,10 @@ function Base.resize!(boundaries::P4estBoundaryContainer, capacity) n_dims = ndims(boundaries) n_nodes = size(boundaries.u, 2) n_variables = size(boundaries.u, 1) + ArrayType = storage_type(boundaries) resize!(_u, n_variables * n_nodes^(n_dims - 1) * capacity) - boundaries.u = unsafe_wrap(Array, pointer(_u), + boundaries.u = unsafe_wrap(ArrayType, pointer(_u), (n_variables, ntuple(_ -> n_nodes, n_dims - 1)..., capacity)) @@ -263,9 +396,11 @@ function init_boundaries(mesh::Union{P4estMesh, P4estMeshView, T8codeMesh}, equa node_indices = Vector{NTuple{NDIMS, Symbol}}(undef, n_boundaries) names = Vector{Symbol}(undef, n_boundaries) - boundaries = P4estBoundaryContainer{NDIMS, uEltype, NDIMS + 1}(u, neighbor_ids, - node_indices, names, - _u) + boundaries = P4estBoundaryContainer{NDIMS, uEltype, NDIMS + 1, typeof(u), + typeof(neighbor_ids), typeof(node_indices), + typeof(_u)}(u, neighbor_ids, + node_indices, names, + _u) if n_boundaries > 0 init_boundaries!(boundaries, mesh) @@ -312,6 +447,25 @@ function init_boundaries_iter_face_inner(info_pw, boundaries, boundary_id, mesh) return nothing end +function Adapt.parent_type(::Type{<:P4estBoundaryContainer{<:Any, <:Any, <:Any, ArrayT}}) where {ArrayT} + ArrayT +end + +# Manual adapt_structure since we have aliasing memory +function Adapt.adapt_structure(to, boundaries::P4estBoundaryContainer) + _u = adapt(to, boundaries._u) + u = unsafe_wrap_or_alloc(to, _u, size(boundaries.u)) + neighbor_ids = adapt(to, boundaries.neighbor_ids) + node_indices = adapt(to, boundaries.node_indices) + name = boundaries.name + + NDIMS = ndims(boundaries) + return P4estBoundaryContainer{NDIMS, eltype(_u), NDIMS + 1, typeof(u), + typeof(neighbor_ids), typeof(node_indices), + typeof(_u)}(u, neighbor_ids, node_indices, + name, _u) +end + # Container data structure (structure-of-arrays style) for DG L2 mortars # # The positions used in `neighbor_ids` are 1:3 (in 2D) or 1:5 (in 3D), where 1:2 (in 2D) @@ -337,20 +491,32 @@ end # │ └─────────────┴─────────────┘ └───────────────────────────┘ # │ # ⋅────> ξ -mutable struct P4estMortarContainer{NDIMS, uEltype <: Real, NDIMSP1, NDIMSP3} <: +mutable struct P4estMortarContainer{NDIMS, uEltype <: Real, NDIMSP1, NDIMSP3, + uArray <: DenseArray{uEltype, NDIMSP3}, + IdsMatrix <: DenseMatrix{Int}, + IndicesMatrix <: + DenseMatrix{NTuple{NDIMS, Symbol}}, + uVector <: DenseVector{uEltype}, + IdsVector <: DenseVector{Int}, + IndicesVector <: + DenseVector{NTuple{NDIMS, Symbol}}} <: AbstractContainer - u::Array{uEltype, NDIMSP3} # [small/large side, variable, position, i, j, mortar] - neighbor_ids::Matrix{Int} # [position, mortar] - node_indices::Matrix{NTuple{NDIMS, Symbol}} # [small/large, mortar] + u::uArray # [small/large side, variable, position, i, j, mortar] + neighbor_ids::IdsMatrix # [position, mortar] + node_indices::IndicesMatrix # [small/large, mortar] # internal `resize!`able storage - _u::Vector{uEltype} - _neighbor_ids::Vector{Int} - _node_indices::Vector{NTuple{NDIMS, Symbol}} + _u::uVector + _neighbor_ids::IdsVector + _node_indices::IndicesVector end @inline nmortars(mortars::P4estMortarContainer) = size(mortars.neighbor_ids, 2) @inline Base.ndims(::P4estMortarContainer{NDIMS}) where {NDIMS} = NDIMS +@inline function Base.eltype(::P4estMortarContainer{NDIMS, uEltype}) where {NDIMS, + uEltype} + uEltype +end # See explanation of Base.resize! for the element container function Base.resize!(mortars::P4estMortarContainer, capacity) @@ -359,18 +525,19 @@ function Base.resize!(mortars::P4estMortarContainer, capacity) n_dims = ndims(mortars) n_nodes = size(mortars.u, 4) n_variables = size(mortars.u, 2) + ArrayType = storage_type(mortars) resize!(_u, 2 * n_variables * 2^(n_dims - 1) * n_nodes^(n_dims - 1) * capacity) - mortars.u = unsafe_wrap(Array, pointer(_u), + mortars.u = unsafe_wrap(ArrayType, pointer(_u), (2, n_variables, 2^(n_dims - 1), ntuple(_ -> n_nodes, n_dims - 1)..., capacity)) resize!(_neighbor_ids, (2^(n_dims - 1) + 1) * capacity) - mortars.neighbor_ids = unsafe_wrap(Array, pointer(_neighbor_ids), + mortars.neighbor_ids = unsafe_wrap(ArrayType, pointer(_neighbor_ids), (2^(n_dims - 1) + 1, capacity)) resize!(_node_indices, 2 * capacity) - mortars.node_indices = unsafe_wrap(Array, pointer(_node_indices), (2, capacity)) + mortars.node_indices = unsafe_wrap(ArrayType, pointer(_node_indices), (2, capacity)) return nothing end @@ -398,12 +565,15 @@ function init_mortars(mesh::Union{P4estMesh, P4estMeshView, T8codeMesh}, equatio _node_indices = Vector{NTuple{NDIMS, Symbol}}(undef, 2 * n_mortars) node_indices = unsafe_wrap(Array, pointer(_node_indices), (2, n_mortars)) - mortars = P4estMortarContainer{NDIMS, uEltype, NDIMS + 1, NDIMS + 3}(u, - neighbor_ids, - node_indices, - _u, - _neighbor_ids, - _node_indices) + mortars = P4estMortarContainer{NDIMS, uEltype, NDIMS + 1, NDIMS + 3, typeof(u), + typeof(neighbor_ids), typeof(node_indices), + typeof(_u), typeof(_neighbor_ids), + typeof(_node_indices)}(u, + neighbor_ids, + node_indices, + _u, + _neighbor_ids, + _node_indices) if n_mortars > 0 init_mortars!(mortars, mesh) @@ -418,6 +588,34 @@ function init_mortars!(mortars, mesh::Union{P4estMesh, P4estMeshView}) return mortars end +function Adapt.parent_type(::Type{<:P4estMortarContainer{<:Any, <:Any, <:Any, <:Any, + ArrayT}}) where {ArrayT} + ArrayT +end + +# Manual adapt_structure since we have aliasing memory +function Adapt.adapt_structure(to, mortars::P4estMortarContainer) + # Adapt underlying storage + _u = adapt(to, mortars._u) + _neighbor_ids = adapt(to, mortars._neighbor_ids) + _node_indices = adapt(to, mortars._node_indices) + + # Wrap arrays again + u = unsafe_wrap_or_alloc(to, _u, size(mortars.u)) + neighbor_ids = unsafe_wrap_or_alloc(to, _neighbor_ids, size(mortars.neighbor_ids)) + node_indices = unsafe_wrap_or_alloc(to, _node_indices, size(mortars.node_indices)) + + NDIMS = ndims(mortars) + new_type_params = (NDIMS, + eltype(_u), + NDIMS + 1, + NDIMS + 3, + typeof(u), typeof(neighbor_ids), typeof(node_indices), + typeof(_u), typeof(_neighbor_ids), typeof(_node_indices)) + return P4estMortarContainer{new_type_params...}(u, neighbor_ids, node_indices, + _u, _neighbor_ids, _node_indices) +end + function reinitialize_containers!(mesh::P4estMesh, equations, dg::DGSEM, cache) # Re-initialize elements container @unpack elements = cache diff --git a/src/solvers/dgsem_p4est/containers_parallel.jl b/src/solvers/dgsem_p4est/containers_parallel.jl index 676b37efff3..123337d8c0a 100644 --- a/src/solvers/dgsem_p4est/containers_parallel.jl +++ b/src/solvers/dgsem_p4est/containers_parallel.jl @@ -5,15 +5,19 @@ @muladd begin #! format: noindent -mutable struct P4estMPIInterfaceContainer{NDIMS, uEltype <: Real, NDIMSP2} <: +mutable struct P4estMPIInterfaceContainer{NDIMS, uEltype <: Real, NDIMSP2, + uArray <: DenseArray{uEltype, NDIMSP2}, + VecInt <: DenseVector{Int}, + IndicesVector <: + DenseVector{NTuple{NDIMS, Symbol}}, + uVector <: DenseVector{uEltype}} <: AbstractContainer - u::Array{uEltype, NDIMSP2} # [primary/secondary, variable, i, j, interface] - local_neighbor_ids::Vector{Int} # [interface] - node_indices::Vector{NTuple{NDIMS, Symbol}} # [interface] - local_sides::Vector{Int} # [interface] - + u::uArray # [primary/secondary, variable, i, j, interface] + local_neighbor_ids::VecInt # [interface] + node_indices::IndicesVector # [interface] + local_sides::VecInt # [interface] # internal `resize!`able storage - _u::Vector{uEltype} + _u::uVector end @inline function nmpiinterfaces(interfaces::P4estMPIInterfaceContainer) @@ -27,9 +31,10 @@ function Base.resize!(mpi_interfaces::P4estMPIInterfaceContainer, capacity) n_dims = ndims(mpi_interfaces) n_nodes = size(mpi_interfaces.u, 3) n_variables = size(mpi_interfaces.u, 2) + ArrayType = storage_type(mpi_interfaces) resize!(_u, 2 * n_variables * n_nodes^(n_dims - 1) * capacity) - mpi_interfaces.u = unsafe_wrap(Array, pointer(_u), + mpi_interfaces.u = unsafe_wrap(ArrayType, pointer(_u), (2, n_variables, ntuple(_ -> n_nodes, n_dims - 1)..., capacity)) @@ -64,11 +69,13 @@ function init_mpi_interfaces(mesh::Union{ParallelP4estMesh, ParallelT8codeMesh}, local_sides = Vector{Int}(undef, n_mpi_interfaces) - mpi_interfaces = P4estMPIInterfaceContainer{NDIMS, uEltype, NDIMS + 2}(u, - local_neighbor_ids, - node_indices, - local_sides, - _u) + mpi_interfaces = P4estMPIInterfaceContainer{NDIMS, uEltype, NDIMS + 2, + typeof(u), typeof(local_neighbor_ids), + typeof(node_indices), typeof(_u)}(u, + local_neighbor_ids, + node_indices, + local_sides, + _u) init_mpi_interfaces!(mpi_interfaces, mesh) @@ -81,6 +88,32 @@ function init_mpi_interfaces!(mpi_interfaces, mesh::ParallelP4estMesh) return mpi_interfaces end +function Adapt.parent_type(::Type{<:Trixi.P4estMPIInterfaceContainer{<:Any, <:Any, + <:Any, A}}) where {A} + return A +end + +# Manual adapt_structure since we have aliasing memory +function Adapt.adapt_structure(to, mpi_interfaces::P4estMPIInterfaceContainer) + # Adapt Vectors and underlying storage + _u = adapt(to, mpi_interfaces._u) + local_neighbor_ids = adapt(to, mpi_interfaces.local_neighbor_ids) + node_indices = adapt(to, mpi_interfaces.node_indices) + local_sides = adapt(to, mpi_interfaces.local_sides) + + # Wrap array again + u = unsafe_wrap_or_alloc(to, _u, size(mpi_interfaces.u)) + + NDIMS = ndims(mpi_interfaces) + return P4estMPIInterfaceContainer{NDIMS, eltype(u), + NDIMS + 2, + typeof(u), typeof(local_neighbor_ids), + typeof(node_indices), typeof(_u)}(u, + local_neighbor_ids, + node_indices, + local_sides, _u) +end + # Container data structure (structure-of-arrays style) for DG L2 mortars # # Similar to `P4estMortarContainer`. The field `neighbor_ids` has been split up into @@ -88,14 +121,17 @@ end # available elements belonging to a particular MPI mortar. Furthermore, `normal_directions` holds # the normal vectors on the surface of the small elements for each mortar. mutable struct P4estMPIMortarContainer{NDIMS, uEltype <: Real, RealT <: Real, NDIMSP1, - NDIMSP2, NDIMSP3} <: AbstractContainer - u::Array{uEltype, NDIMSP3} # [small/large side, variable, position, i, j, mortar] - local_neighbor_ids::Vector{Vector{Int}} # [mortar][ids] - local_neighbor_positions::Vector{Vector{Int}} # [mortar][positions] - node_indices::Matrix{NTuple{NDIMS, Symbol}} # [small/large, mortar] - normal_directions::Array{RealT, NDIMSP2} # [dimension, i, j, position, mortar] + NDIMSP2, NDIMSP3, + uArray <: DenseArray{uEltype, NDIMSP3}, + uVector <: DenseVector{uEltype}} <: + AbstractContainer + u::uArray # [small/large side, variable, position, i, j, mortar] + local_neighbor_ids::Vector{Vector{Int}} # [mortar][ids] + local_neighbor_positions::Vector{Vector{Int}} # [mortar][positions] + node_indices::Matrix{NTuple{NDIMS, Symbol}} # [small/large, mortar] + normal_directions::Array{RealT, NDIMSP2} # [dimension, i, j, position, mortar] # internal `resize!`able storage - _u::Vector{uEltype} + _u::uVector _node_indices::Vector{NTuple{NDIMS, Symbol}} _normal_directions::Vector{RealT} end @@ -164,11 +200,12 @@ function init_mpi_mortars(mesh::Union{ParallelP4estMesh, ParallelT8codeMesh}, eq 2^(NDIMS - 1), n_mpi_mortars)) mpi_mortars = P4estMPIMortarContainer{NDIMS, uEltype, RealT, NDIMS + 1, NDIMS + 2, - NDIMS + 3}(u, local_neighbor_ids, - local_neighbor_positions, - node_indices, normal_directions, - _u, _node_indices, - _normal_directions) + NDIMS + 3, typeof(u), + typeof(_u)}(u, local_neighbor_ids, + local_neighbor_positions, + node_indices, normal_directions, + _u, _node_indices, + _normal_directions) if n_mpi_mortars > 0 init_mpi_mortars!(mpi_mortars, mesh, basis, elements) @@ -184,6 +221,34 @@ function init_mpi_mortars!(mpi_mortars, mesh::ParallelP4estMesh, basis, elements return mpi_mortars end +function Adapt.adapt_structure(to, mpi_mortars::P4estMPIMortarContainer) + # Only parts of this container are adapted, since we currently don't + # use `local_neighbor_ids`, `local_neighbor_positions`, `normal_directions` + # on the GPU. If we do need them we need to redesign this to use the VecOfArrays + # approach. + + _u = adapt(to, mpi_mortars._u) + _node_indices = mpi_mortars._node_indices + _normal_directions = mpi_mortars._normal_directions + + u = unsafe_wrap_or_alloc(to, _u, size(mpi_mortars.u)) + local_neighbor_ids = mpi_mortars.local_neighbor_ids + local_neighbor_positions = mpi_mortars.local_neighbor_positions + node_indices = mpi_mortars.node_indices + normal_directions = mpi_mortars.normal_directions + + NDIMS = ndims(mpi_mortars) + return P4estMPIMortarContainer{NDIMS, eltype(_u), + eltype(_normal_directions), + NDIMS + 1, NDIMS + 2, NDIMS + 3, + typeof(u), typeof(_u)}(u, local_neighbor_ids, + local_neighbor_positions, + node_indices, + normal_directions, _u, + _node_indices, + _normal_directions) +end + # Overload init! function for regular interfaces, regular mortars and boundaries since they must # call the appropriate init_surfaces! function for parallel p4est meshes function init_interfaces!(interfaces, mesh::ParallelP4estMesh) diff --git a/src/solvers/dgsem_p4est/dg_3d.jl b/src/solvers/dgsem_p4est/dg_3d.jl index e59f502c86c..4c099c9fd3f 100644 --- a/src/solvers/dgsem_p4est/dg_3d.jl +++ b/src/solvers/dgsem_p4est/dg_3d.jl @@ -13,18 +13,18 @@ function create_cache(mesh::Union{P4estMesh{3}, T8codeMesh{3}}, equations, fstar_primary_threaded = [Array{uEltype, 4}(undef, nvariables(equations), nnodes(mortar_l2), nnodes(mortar_l2), 4) - for _ in 1:Threads.nthreads()] + for _ in 1:Threads.nthreads()] |> VecOfArrays fstar_secondary_threaded = [Array{uEltype, 4}(undef, nvariables(equations), nnodes(mortar_l2), nnodes(mortar_l2), 4) - for _ in 1:Threads.nthreads()] + for _ in 1:Threads.nthreads()] |> VecOfArrays fstar_tmp_threaded = [Array{uEltype, 3}(undef, nvariables(equations), nnodes(mortar_l2), nnodes(mortar_l2)) - for _ in 1:Threads.nthreads()] + for _ in 1:Threads.nthreads()] |> VecOfArrays u_threaded = [Array{uEltype, 3}(undef, nvariables(equations), nnodes(mortar_l2), nnodes(mortar_l2)) - for _ in 1:Threads.nthreads()] + for _ in 1:Threads.nthreads()] |> VecOfArrays (; fstar_primary_threaded, fstar_secondary_threaded, fstar_tmp_threaded, u_threaded) end diff --git a/src/solvers/dgsem_p4est/dg_parallel.jl b/src/solvers/dgsem_p4est/dg_parallel.jl index 2cc201dd1f0..7acddf07b4b 100644 --- a/src/solvers/dgsem_p4est/dg_parallel.jl +++ b/src/solvers/dgsem_p4est/dg_parallel.jl @@ -5,12 +5,13 @@ @muladd begin #! format: noindent -mutable struct P4estMPICache{uEltype} +mutable struct P4estMPICache{BufferType <: DenseVector, + VecInt <: DenseVector{<:Integer}} mpi_neighbor_ranks::Vector{Int} - mpi_neighbor_interfaces::Vector{Vector{Int}} - mpi_neighbor_mortars::Vector{Vector{Int}} - mpi_send_buffers::Vector{Vector{uEltype}} - mpi_recv_buffers::Vector{Vector{uEltype}} + mpi_neighbor_interfaces::VecOfArrays{VecInt} + mpi_neighbor_mortars::VecOfArrays{VecInt} + mpi_send_buffers::VecOfArrays{BufferType} + mpi_recv_buffers::VecOfArrays{BufferType} mpi_send_requests::Vector{MPI.Request} mpi_recv_requests::Vector{MPI.Request} n_elements_by_rank::OffsetArray{Int, 1, Array{Int, 1}} @@ -25,25 +26,29 @@ function P4estMPICache(uEltype) end mpi_neighbor_ranks = Vector{Int}(undef, 0) - mpi_neighbor_interfaces = Vector{Vector{Int}}(undef, 0) - mpi_neighbor_mortars = Vector{Vector{Int}}(undef, 0) - mpi_send_buffers = Vector{Vector{uEltype}}(undef, 0) - mpi_recv_buffers = Vector{Vector{uEltype}}(undef, 0) + mpi_neighbor_interfaces = Vector{Vector{Int}}(undef, 0) |> VecOfArrays + mpi_neighbor_mortars = Vector{Vector{Int}}(undef, 0) |> VecOfArrays + mpi_send_buffers = Vector{Vector{uEltype}}(undef, 0) |> VecOfArrays + mpi_recv_buffers = Vector{Vector{uEltype}}(undef, 0) |> VecOfArrays mpi_send_requests = Vector{MPI.Request}(undef, 0) mpi_recv_requests = Vector{MPI.Request}(undef, 0) n_elements_by_rank = OffsetArray(Vector{Int}(undef, 0), 0:-1) n_elements_global = 0 first_element_global_id = 0 - P4estMPICache{uEltype}(mpi_neighbor_ranks, mpi_neighbor_interfaces, - mpi_neighbor_mortars, - mpi_send_buffers, mpi_recv_buffers, - mpi_send_requests, mpi_recv_requests, - n_elements_by_rank, n_elements_global, - first_element_global_id) + P4estMPICache{Vector{uEltype}, Vector{Int}}(mpi_neighbor_ranks, + mpi_neighbor_interfaces, + mpi_neighbor_mortars, + mpi_send_buffers, mpi_recv_buffers, + mpi_send_requests, mpi_recv_requests, + n_elements_by_rank, n_elements_global, + first_element_global_id) end -@inline Base.eltype(::P4estMPICache{uEltype}) where {uEltype} = uEltype +@inline Base.eltype(::P4estMPICache{BufferType}) where {BufferType} = eltype(BufferType) + +# @eval due to @muladd +@eval Adapt.@adapt_structure(P4estMPICache) ## # Note that the code in `start_mpi_send`/`finish_mpi_receive!` is sensitive to inference on (at least) Julia 1.10. @@ -265,16 +270,16 @@ end function init_mpi_cache!(mpi_cache::P4estMPICache, mesh::ParallelP4estMesh, mpi_interfaces, mpi_mortars, nvars, n_nodes, uEltype) - mpi_neighbor_ranks, mpi_neighbor_interfaces, mpi_neighbor_mortars = init_mpi_neighbor_connectivity(mpi_interfaces, - mpi_mortars, - mesh) + mpi_neighbor_ranks, _mpi_neighbor_interfaces, _mpi_neighbor_mortars = init_mpi_neighbor_connectivity(mpi_interfaces, + mpi_mortars, + mesh) - mpi_send_buffers, mpi_recv_buffers, mpi_send_requests, mpi_recv_requests = init_mpi_data_structures(mpi_neighbor_interfaces, - mpi_neighbor_mortars, - ndims(mesh), - nvars, - n_nodes, - uEltype) + _mpi_send_buffers, _mpi_recv_buffers, mpi_send_requests, mpi_recv_requests = init_mpi_data_structures(_mpi_neighbor_interfaces, + _mpi_neighbor_mortars, + ndims(mesh), + nvars, + n_nodes, + uEltype) # Determine local and total number of elements n_elements_global = Int(mesh.p4est.global_num_quadrants[]) @@ -286,6 +291,11 @@ function init_mpi_cache!(mpi_cache::P4estMPICache, mesh::ParallelP4estMesh, first_element_global_id = Int(mesh.p4est.global_first_quadrant[mpi_rank() + 1]) + 1 @assert n_elements_global==sum(n_elements_by_rank) "error in total number of elements" + mpi_neighbor_interfaces = VecOfArrays(_mpi_neighbor_interfaces) + mpi_neighbor_mortars = VecOfArrays(_mpi_neighbor_mortars) + mpi_send_buffers = VecOfArrays(_mpi_send_buffers) + mpi_recv_buffers = VecOfArrays(_mpi_recv_buffers) + # TODO reuse existing structures @pack! mpi_cache = mpi_neighbor_ranks, mpi_neighbor_interfaces, mpi_neighbor_mortars, diff --git a/src/solvers/dgsem_unstructured/sort_boundary_conditions.jl b/src/solvers/dgsem_unstructured/sort_boundary_conditions.jl index 0cb3bd7f409..d6cf6e1ce6d 100644 --- a/src/solvers/dgsem_unstructured/sort_boundary_conditions.jl +++ b/src/solvers/dgsem_unstructured/sort_boundary_conditions.jl @@ -13,9 +13,10 @@ It stores a set of global indices for each boundary condition type and name to e during the call to `calc_boundary_flux!`. The original dictionary form of the boundary conditions set by the user in the elixir file is also stored for printing. """ -mutable struct UnstructuredSortedBoundaryTypes{N, BCs <: NTuple{N, Any}} +mutable struct UnstructuredSortedBoundaryTypes{N, BCs <: NTuple{N, Any}, + Vec <: AbstractVector{<:Integer}} boundary_condition_types::BCs # specific boundary condition type(s), e.g. BoundaryConditionDirichlet - boundary_indices::NTuple{N, Vector{Int}} # integer vectors containing global boundary indices + boundary_indices::NTuple{N, Vec} # integer vectors containing global boundary indices boundary_dictionary::Dict{Symbol, Any} # boundary conditions as set by the user in the elixir file boundary_symbol_indices::Dict{Symbol, Vector{Int}} # integer vectors containing global boundary indices per boundary identifier end @@ -33,10 +34,11 @@ function UnstructuredSortedBoundaryTypes(boundary_conditions::Dict, cache) boundary_symbol_indices = Dict{Symbol, Vector{Int}}() container = UnstructuredSortedBoundaryTypes{n_boundary_types, - typeof(boundary_condition_types)}(boundary_condition_types, - boundary_indices, - boundary_conditions, - boundary_symbol_indices) + typeof(boundary_condition_types), + Vector{Int}}(boundary_condition_types, + boundary_indices, + boundary_conditions, + boundary_symbol_indices) initialize!(container, cache) end @@ -119,4 +121,7 @@ function initialize!(boundary_types_container::UnstructuredSortedBoundaryTypes{N return boundary_types_container end + +# @eval due to @muladd +@eval Adapt.@adapt_structure(UnstructuredSortedBoundaryTypes) end # @muladd diff --git a/test/Project.toml b/test/Project.toml index 3559f8cb6e2..7e40da4ceae 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -1,8 +1,10 @@ [deps] ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b" +Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595" CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0" Convex = "f65535da-76fb-5f13-bab9-19810c17039a" +CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" DelimitedFiles = "8bb1440f-4735-579b-a4ab-409b98df4dab" DoubleFloats = "497a8b3b-efae-58df-a0af-a86822472b78" Downloads = "f43a241f-c20a-4ad4-852c-f6b1247861c6" diff --git a/test/runtests.jl b/test/runtests.jl index db2c2e9dd88..8f35e1fb58d 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -109,4 +109,13 @@ const TRIXI_NTHREADS = clamp(Sys.CPU_THREADS, 2, 3) @time if TRIXI_TEST == "all" || TRIXI_TEST == "paper_self_gravitating_gas_dynamics" include("test_paper_self_gravitating_gas_dynamics.jl") end + + @time if TRIXI_TEST == "all" || TRIXI_TEST == "CUDA" + import CUDA + if CUDA.functional() + include("test_cuda.jl") + else + @warn "Unable to run CUDA tests on this machine" + end + end end diff --git a/test/test_aqua.jl b/test/test_aqua.jl index 9b3f2d67903..154088995ca 100644 --- a/test/test_aqua.jl +++ b/test/test_aqua.jl @@ -10,6 +10,7 @@ include("test_trixi.jl") @timed_testset "Aqua.jl" begin Aqua.test_all(Trixi, ambiguities = false, + unbound_args = false, # FIXME: UnstructuredSortedBoundaryTypes # exceptions necessary for adding a new method `StartUpDG.estimate_h` # in src/solvers/dgmulti/sbp.jl piracies = (treat_as_own = [Trixi.StartUpDG.RefElemData, diff --git a/test/test_cuda.jl b/test/test_cuda.jl new file mode 100644 index 00000000000..1f96d8c863e --- /dev/null +++ b/test/test_cuda.jl @@ -0,0 +1,52 @@ +module TestCUDA + +using Test +using Trixi + +include("test_trixi.jl") + +# Start with a clean environment: remove Trixi.jl output directory if it exists +outdir = "out" +isdir(outdir) && rm(outdir, recursive = true) + +EXAMPLES_DIR = joinpath(examples_dir(), "p4est_2d_dgsem") + +@trixi_testset "elixir_advection_basic_gpu.jl" begin + # Using CUDA inside the testset since otherwise the bindings are hiddend by the anonymous modules + using CUDA + # TODO(benegee/vchuravy): Port compute_coefficients! to KernelAbstractions.jl + CUDA.allowscalar(true) + @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_basic_gpu.jl"), + # Expected errors are exactly the same as with TreeMesh! + l2=nothing, # [Float32(8.311947673061856e-6)], + linf=nothing, # [Float32(6.627000273229378e-5)], + RealT=Float32, + real_type=Float32, + storage_type=CuArray) + # # Ensure that we do not have excessive memory allocations + # # (e.g., from type instabilities) + # let + # t = sol.t[end] + # u_ode = sol.u[end] + # du_ode = similar(u_ode) + # @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000 + # end + @test real(ode.p.solver) == Float32 + @test real(ode.p.solver.basis) == Float32 + @test real(ode.p.solver.mortar) == Float32 + # TODO: remake ignores the mesh itself as well + @test real(ode.p.mesh) == Float64 + + @test_broken ode.u0 isa CuArray + @test ode.p.solver.basis.derivative_matrix isa CuArray + + @test Trixi.storage_type(ode.p.cache.elements) === CuArray + @test Trixi.storage_type(ode.p.cache.interfaces) === CuArray + @test Trixi.storage_type(ode.p.cache.boundaries) === CuArray + @test Trixi.storage_type(ode.p.cache.mortars) === CuArray +end + +# Clean up afterwards: delete Trixi.jl output directory +@test_nowarn isdir(outdir) && rm(outdir, recursive = true) + +end # module diff --git a/test/test_p4est_2d.jl b/test/test_p4est_2d.jl index 8f903a849d2..5d17bb1654e 100644 --- a/test/test_p4est_2d.jl +++ b/test/test_p4est_2d.jl @@ -27,6 +27,34 @@ isdir(outdir) && rm(outdir, recursive = true) du_ode = similar(u_ode) @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000 end + semi32 = Trixi.trixi_adapt(Array, Float32, semi) + @test real(semi32.solver) == Float32 + @test real(semi32.solver.basis) == Float32 + @test real(semi32.solver.mortar) == Float32 + # TODO: remake ignores the mesh itself as well + @test real(semi32.mesh) == Float64 +end + +@trixi_testset "elixir_advection_basic.jl (Float32)" begin + @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_basic.jl"), + # Expected errors are exactly the same as with TreeMesh! + l2=[Float32(8.311947673061856e-6)], + linf=[Float32(6.627000273229378e-5)], + RealT=Float32, + real_type=Float32) + # Ensure that we do not have excessive memory allocations + # (e.g., from type instabilities) + let + t = sol.t[end] + u_ode = sol.u[end] + du_ode = similar(u_ode) + @test_broken (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000 + end + @test real(ode.p.solver) == Float32 + @test real(ode.p.solver.basis) == Float32 + @test real(ode.p.solver.mortar) == Float32 + # TODO: remake ignores the mesh itself as well + @test real(ode.p.mesh) == Float64 end @trixi_testset "elixir_advection_nonconforming_flag.jl" begin diff --git a/test/test_unstructured_2d.jl b/test/test_unstructured_2d.jl index d16bc96fb83..758e42b7da1 100644 --- a/test/test_unstructured_2d.jl +++ b/test/test_unstructured_2d.jl @@ -2,6 +2,7 @@ module TestExamplesUnstructuredMesh2D using Test using Trixi +using Adapt include("test_trixi.jl") @@ -32,6 +33,12 @@ isdir(outdir) && rm(outdir, recursive = true) du_ode = similar(u_ode) @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000 end + semi32 = Trixi.trixi_adapt(Array, Float32, semi) + @test real(semi32.solver) == Float32 + @test real(semi32.solver.basis) == Float32 + @test real(semi32.solver.mortar) == Float32 + # TODO: remake ignores the mesh as well + @test real(semi32.mesh) == Float64 end @trixi_testset "elixir_euler_free_stream.jl" begin From a18e5d2f8a440e8c794d4084ea3237a981cd9ad7 Mon Sep 17 00:00:00 2001 From: Benedict Geihe Date: Tue, 1 Jul 2025 19:33:07 +0200 Subject: [PATCH 040/158] restore elixir --- examples/p4est_2d_dgsem/elixir_advection_basic.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/p4est_2d_dgsem/elixir_advection_basic.jl b/examples/p4est_2d_dgsem/elixir_advection_basic.jl index e162e8997f2..4ff646365aa 100644 --- a/examples/p4est_2d_dgsem/elixir_advection_basic.jl +++ b/examples/p4est_2d_dgsem/elixir_advection_basic.jl @@ -31,7 +31,7 @@ semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition_convergen # ODE solvers, callbacks etc. # Create ODE problem with time span from 0.0 to 1.0 -ode = semidiscretize(semi, (0.0, 1.0); real_type = nothing, storage_type = nothing) +ode = semidiscretize(semi, (0.0, 1.0)) # At the beginning of the main loop, the SummaryCallback prints a summary of the simulation setup # and resets the timers From 5c942fe351e0a16f3d367e67d0afe0e7f53094db Mon Sep 17 00:00:00 2001 From: Benedict Geihe Date: Tue, 1 Jul 2025 15:08:30 +0200 Subject: [PATCH 041/158] offload compute_coefficients --- Project.toml | 2 + .../elixir_advection_basic_gpu.jl | 18 ++++--- src/Trixi.jl | 1 + src/auxiliary/containers.jl | 4 ++ src/semidiscretization/semidiscretization.jl | 3 +- src/solvers/dg.jl | 47 +++++++++++++++---- 6 files changed, 54 insertions(+), 21 deletions(-) diff --git a/Project.toml b/Project.toml index 875d2ae6db1..27136900dc3 100644 --- a/Project.toml +++ b/Project.toml @@ -17,6 +17,7 @@ EllipsisNotation = "da5c29d0-fa7d-589e-88eb-ea29b0a81949" FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b" ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" HDF5 = "f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f" +KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" LinearMaps = "7a12625a-238d-50fd-b39a-03d52299707e" LoopVectorization = "bdcacae8-1622-11e9-2a5c-532679323890" @@ -82,6 +83,7 @@ EllipsisNotation = "1.0" FillArrays = "1.9" ForwardDiff = "0.10.36, 1" HDF5 = "0.16.10, 0.17" +KernelAbstractions = "0.9" LinearAlgebra = "1" LinearMaps = "2.7, 3.0" LoopVectorization = "0.12.171" diff --git a/examples/p4est_2d_dgsem/elixir_advection_basic_gpu.jl b/examples/p4est_2d_dgsem/elixir_advection_basic_gpu.jl index 4c0f5744a88..8a01d55f632 100644 --- a/examples/p4est_2d_dgsem/elixir_advection_basic_gpu.jl +++ b/examples/p4est_2d_dgsem/elixir_advection_basic_gpu.jl @@ -1,8 +1,6 @@ -# The same setup as tree_2d_dgsem/elixir_advection_basic.jl -# to verify the StructuredMesh implementation against TreeMesh - -using OrdinaryDiffEqSSPRK, OrdinaryDiffEqLowStorageRK +using OrdinaryDiffEqLowStorageRK using Trixi +using CUDA ############################################################################### # semidiscretization of the linear advection equation @@ -31,7 +29,7 @@ semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition_convergen # ODE solvers, callbacks etc. # Create ODE problem with time span from 0.0 to 1.0 -ode = semidiscretize(semi, (0.0, 1.0); real_type = nothing, storage_type = nothing) +ode = semidiscretize(semi, (0.0, 1.0); real_type = nothing, storage_type = CuArray) # At the beginning of the main loop, the SummaryCallback prints a summary of the simulation setup # and resets the timers @@ -48,8 +46,8 @@ save_solution = SaveSolutionCallback(interval = 100, stepsize_callback = StepsizeCallback(cfl = 1.6) # Create a CallbackSet to collect all callbacks such that they can be passed to the ODE solver -callbacks = CallbackSet(summary_callback, analysis_callback, save_solution, - stepsize_callback) +callbacks = CallbackSet(summary_callback) +# analysis_callback, save_solution, stepsize_callback) ############################################################################### # run the simulation @@ -58,6 +56,6 @@ callbacks = CallbackSet(summary_callback, analysis_callback, save_solution, # Uncomment the calls below to discover missing functionality. # # OrdinaryDiffEq's `solve` method evolves the solution in time and executes the passed callbacks -# sol = solve(ode, CarpenterKennedy2N54(williamson_condition = false); -# dt = 1.0, # solve needs some value here but it will be overwritten by the stepsize_callback -# ode_default_options()..., callback = callbacks); + sol = solve(ode, CarpenterKennedy2N54(williamson_condition = false); + dt = 1e-2, # solve needs some value here but it will be overwritten by the stepsize_callback + ode_default_options()..., callback = callbacks); diff --git a/src/Trixi.jl b/src/Trixi.jl index a52dfd6d973..7836f1938b1 100644 --- a/src/Trixi.jl +++ b/src/Trixi.jl @@ -59,6 +59,7 @@ using DiffEqCallbacks: PeriodicCallback, PeriodicCallbackAffect using FillArrays: Ones, Zeros using ForwardDiff: ForwardDiff using HDF5: HDF5, h5open, attributes, create_dataset, datatype, dataspace +using KernelAbstractions using LinearMaps: LinearMap if _PREFERENCE_LOOPVECTORIZATION using LoopVectorization: LoopVectorization, @turbo, indices diff --git a/src/auxiliary/containers.jl b/src/auxiliary/containers.jl index edc42db382b..40aff873956 100644 --- a/src/auxiliary/containers.jl +++ b/src/auxiliary/containers.jl @@ -405,4 +405,8 @@ end function unsafe_wrap_or_alloc(::TrixiAdaptor{Storage}, vec, size) where {Storage} return unsafe_wrap_or_alloc(Storage, vec, size) end + +function KernelAbstractions.get_backend(semi::AbstractSemidiscretization) + KernelAbstractions.get_backend(semi.cache.elements.node_coordinates) +end end # @muladd diff --git a/src/semidiscretization/semidiscretization.jl b/src/semidiscretization/semidiscretization.jl index 97c50aa46a1..e214f569d13 100644 --- a/src/semidiscretization/semidiscretization.jl +++ b/src/semidiscretization/semidiscretization.jl @@ -176,7 +176,8 @@ Same as [`compute_coefficients`](@ref) but stores the result in `u_ode`. function compute_coefficients!(u_ode, func, t, semi::AbstractSemidiscretization) u = wrap_array(u_ode, semi) # Call `compute_coefficients` defined by the solver - compute_coefficients!(u, func, t, mesh_equations_solver_cache(semi)...) + backend = get_backend(semi) + compute_coefficients!(backend, u, func, t, mesh_equations_solver_cache(semi)...) end """ diff --git a/src/solvers/dg.jl b/src/solvers/dg.jl index 78f3901a346..273cc8f7a47 100644 --- a/src/solvers/dg.jl +++ b/src/solvers/dg.jl @@ -642,8 +642,10 @@ include("fdsbp_unstructured/fdsbp.jl") function allocate_coefficients(mesh::AbstractMesh, equations, dg::DG, cache) # We must allocate a `Vector` in order to be able to `resize!` it (AMR). # cf. wrap_array - zeros(eltype(cache.elements), - nvariables(equations) * nnodes(dg)^ndims(mesh) * nelements(dg, cache)) + u_ode = similar(cache.elements.node_coordinates, + nvariables(equations) * nnodes(dg)^ndims(mesh) * nelements(dg, cache)) + fill!(u_ode, zero(eltype(u_ode))) + return u_ode end @inline function wrap_array(u_ode::AbstractVector, mesh::AbstractMesh, equations, @@ -686,7 +688,8 @@ end # (nvariables(equations), ntuple(_ -> nnodes(dg), ndims(mesh))..., nelements(dg, cache))) else # The following version is reasonably fast and allows us to `resize!(u_ode, ...)`. - unsafe_wrap(Array{eltype(u_ode), ndims(mesh) + 2}, pointer(u_ode), + ArrayType = Trixi.storage_type(u_ode) + unsafe_wrap(ArrayType{eltype(u_ode), ndims(mesh) + 2}, pointer(u_ode), (nvariables(equations), ntuple(_ -> nnodes(dg), ndims(mesh))..., nelements(dg, cache))) end @@ -756,15 +759,39 @@ function compute_coefficients!(u, func, t, mesh::AbstractMesh{1}, equations, dg: end end -function compute_coefficients!(u, func, t, mesh::AbstractMesh{2}, equations, dg::DG, +function compute_coefficients!(backend::Any, func, t, mesh::AbstractMesh{2}, equations, dg::DG, cache) + @unpack node_coordinates = cache.elements @threaded for element in eachelement(dg, cache) - for j in eachnode(dg), i in eachnode(dg) - x_node = get_node_coords(cache.elements.node_coordinates, equations, dg, i, - j, element) - u_node = func(x_node, t, equations) - set_node_vars!(u, u_node, equations, dg, i, j, element) - end + compute_coefficients_element!(u, func, t, equations, dg, node_coordinates, element) + end +end + +function compute_coefficients!(backend::Backend, u, func, t, mesh::AbstractMesh{2}, + equations, dg::DG, cache) + nelements(dg, cache) == 0 && return nothing + # 1 cache not as argument + # 2 mesh not + @unpack node_coordinates = cache.elements + kernel! = compute_coefficients_kernel!(backend) + kernel!(u, func, t, equations, dg, node_coordinates, + ndrange = nelements(dg, cache)) + return nothing +end + +@kernel function compute_coefficients_kernel!(u, func, t, equations, + dg::DG, node_coordinates) + element = @index(Global) + compute_coefficients_element!(u, func, t, equations, dg, node_coordinates, element) +end + +function compute_coefficients_element!(u, func, t, equations, dg::DG, + node_coordinates, element) + for j in eachnode(dg), i in eachnode(dg) + x_node = get_node_coords(node_coordinates, equations, dg, i, + j, element) + u_node = func(x_node, t, equations) + set_node_vars!(u, u_node, equations, dg, i, j, element) end end From 47a55f2ebea76a410e53e7a40f389587af95315f Mon Sep 17 00:00:00 2001 From: Benedict Geihe Date: Tue, 1 Jul 2025 15:16:07 +0200 Subject: [PATCH 042/158] fmt --- examples/p4est_2d_dgsem/elixir_advection_basic_gpu.jl | 6 +++--- src/solvers/dg.jl | 11 +++++++---- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/examples/p4est_2d_dgsem/elixir_advection_basic_gpu.jl b/examples/p4est_2d_dgsem/elixir_advection_basic_gpu.jl index 8a01d55f632..8fd7c31a413 100644 --- a/examples/p4est_2d_dgsem/elixir_advection_basic_gpu.jl +++ b/examples/p4est_2d_dgsem/elixir_advection_basic_gpu.jl @@ -56,6 +56,6 @@ callbacks = CallbackSet(summary_callback) # Uncomment the calls below to discover missing functionality. # # OrdinaryDiffEq's `solve` method evolves the solution in time and executes the passed callbacks - sol = solve(ode, CarpenterKennedy2N54(williamson_condition = false); - dt = 1e-2, # solve needs some value here but it will be overwritten by the stepsize_callback - ode_default_options()..., callback = callbacks); +sol = solve(ode, CarpenterKennedy2N54(williamson_condition = false); + dt = 1e-2, # solve needs some value here but it will be overwritten by the stepsize_callback + ode_default_options()..., callback = callbacks); diff --git a/src/solvers/dg.jl b/src/solvers/dg.jl index 273cc8f7a47..756036a0e55 100644 --- a/src/solvers/dg.jl +++ b/src/solvers/dg.jl @@ -643,7 +643,8 @@ function allocate_coefficients(mesh::AbstractMesh, equations, dg::DG, cache) # We must allocate a `Vector` in order to be able to `resize!` it (AMR). # cf. wrap_array u_ode = similar(cache.elements.node_coordinates, - nvariables(equations) * nnodes(dg)^ndims(mesh) * nelements(dg, cache)) + nvariables(equations) * nnodes(dg)^ndims(mesh) * + nelements(dg, cache)) fill!(u_ode, zero(eltype(u_ode))) return u_ode end @@ -759,11 +760,13 @@ function compute_coefficients!(u, func, t, mesh::AbstractMesh{1}, equations, dg: end end -function compute_coefficients!(backend::Any, func, t, mesh::AbstractMesh{2}, equations, dg::DG, +function compute_coefficients!(backend::Any, func, t, mesh::AbstractMesh{2}, equations, + dg::DG, cache) @unpack node_coordinates = cache.elements @threaded for element in eachelement(dg, cache) - compute_coefficients_element!(u, func, t, equations, dg, node_coordinates, element) + compute_coefficients_element!(u, func, t, equations, dg, node_coordinates, + element) end end @@ -789,7 +792,7 @@ function compute_coefficients_element!(u, func, t, equations, dg::DG, node_coordinates, element) for j in eachnode(dg), i in eachnode(dg) x_node = get_node_coords(node_coordinates, equations, dg, i, - j, element) + j, element) u_node = func(x_node, t, equations) set_node_vars!(u, u_node, equations, dg, i, j, element) end From 36b0e4aae600e79a3168249e97994855e7bb81dc Mon Sep 17 00:00:00 2001 From: Benedict Geihe Date: Wed, 2 Jul 2025 09:11:54 +0200 Subject: [PATCH 043/158] test native version as well --- .../elixir_advection_basic_gpu.jl | 9 +++-- src/Trixi.jl | 1 + src/auxiliary/containers.jl | 8 +++++ src/semidiscretization/semidiscretization.jl | 2 +- src/solvers/dg.jl | 7 ++-- test/test_cuda.jl | 35 ++++++++++++++++--- 6 files changed, 46 insertions(+), 16 deletions(-) diff --git a/examples/p4est_2d_dgsem/elixir_advection_basic_gpu.jl b/examples/p4est_2d_dgsem/elixir_advection_basic_gpu.jl index 8fd7c31a413..61277a2734f 100644 --- a/examples/p4est_2d_dgsem/elixir_advection_basic_gpu.jl +++ b/examples/p4est_2d_dgsem/elixir_advection_basic_gpu.jl @@ -1,6 +1,5 @@ using OrdinaryDiffEqLowStorageRK using Trixi -using CUDA ############################################################################### # semidiscretization of the linear advection equation @@ -29,7 +28,7 @@ semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition_convergen # ODE solvers, callbacks etc. # Create ODE problem with time span from 0.0 to 1.0 -ode = semidiscretize(semi, (0.0, 1.0); real_type = nothing, storage_type = CuArray) +ode = semidiscretize(semi, (0.0, 1.0); real_type = nothing, storage_type = nothing) # At the beginning of the main loop, the SummaryCallback prints a summary of the simulation setup # and resets the timers @@ -56,6 +55,6 @@ callbacks = CallbackSet(summary_callback) # Uncomment the calls below to discover missing functionality. # # OrdinaryDiffEq's `solve` method evolves the solution in time and executes the passed callbacks -sol = solve(ode, CarpenterKennedy2N54(williamson_condition = false); - dt = 1e-2, # solve needs some value here but it will be overwritten by the stepsize_callback - ode_default_options()..., callback = callbacks); +#sol = solve(ode, CarpenterKennedy2N54(williamson_condition = false); +# dt = 1e-2, # solve needs some value here but it will be overwritten by the stepsize_callback +# ode_default_options()..., callback = callbacks); diff --git a/src/Trixi.jl b/src/Trixi.jl index 7836f1938b1..18000e050bd 100644 --- a/src/Trixi.jl +++ b/src/Trixi.jl @@ -20,6 +20,7 @@ const _PREFERENCE_SQRT = @load_preference("sqrt", "sqrt_Trixi_NaN") const _PREFERENCE_LOG = @load_preference("log", "log_Trixi_NaN") const _PREFERENCE_POLYESTER = @load_preference("polyester", true) const _PREFERENCE_LOOPVECTORIZATION = @load_preference("loop_vectorization", true) +const _PREFERENCE_USE_NATIVE_THREADING = @load_preference("native_threading", true) # Include other packages that are used in Trixi.jl # (standard library packages first, other packages next, all of them sorted alphabetically) diff --git a/src/auxiliary/containers.jl b/src/auxiliary/containers.jl index 40aff873956..ac412eb2da8 100644 --- a/src/auxiliary/containers.jl +++ b/src/auxiliary/containers.jl @@ -406,6 +406,14 @@ function unsafe_wrap_or_alloc(::TrixiAdaptor{Storage}, vec, size) where {Storage return unsafe_wrap_or_alloc(Storage, vec, size) end +function trixi_backend(x) + backend = get_backend(x) + if _PREFERENCE_USE_NATIVE_THREADING && backend isa KernelAbstractions.CPU + backend = nothing + end + return backend +end + function KernelAbstractions.get_backend(semi::AbstractSemidiscretization) KernelAbstractions.get_backend(semi.cache.elements.node_coordinates) end diff --git a/src/semidiscretization/semidiscretization.jl b/src/semidiscretization/semidiscretization.jl index e214f569d13..b8f53237550 100644 --- a/src/semidiscretization/semidiscretization.jl +++ b/src/semidiscretization/semidiscretization.jl @@ -176,7 +176,7 @@ Same as [`compute_coefficients`](@ref) but stores the result in `u_ode`. function compute_coefficients!(u_ode, func, t, semi::AbstractSemidiscretization) u = wrap_array(u_ode, semi) # Call `compute_coefficients` defined by the solver - backend = get_backend(semi) + backend = trixi_backend(semi) compute_coefficients!(backend, u, func, t, mesh_equations_solver_cache(semi)...) end diff --git a/src/solvers/dg.jl b/src/solvers/dg.jl index 756036a0e55..9ec37647c97 100644 --- a/src/solvers/dg.jl +++ b/src/solvers/dg.jl @@ -760,9 +760,8 @@ function compute_coefficients!(u, func, t, mesh::AbstractMesh{1}, equations, dg: end end -function compute_coefficients!(backend::Any, func, t, mesh::AbstractMesh{2}, equations, - dg::DG, - cache) +function compute_coefficients!(backend::Nothing, u, func, t, mesh::AbstractMesh{2}, + equations, dg::DG, cache) @unpack node_coordinates = cache.elements @threaded for element in eachelement(dg, cache) compute_coefficients_element!(u, func, t, equations, dg, node_coordinates, @@ -773,8 +772,6 @@ end function compute_coefficients!(backend::Backend, u, func, t, mesh::AbstractMesh{2}, equations, dg::DG, cache) nelements(dg, cache) == 0 && return nothing - # 1 cache not as argument - # 2 mesh not @unpack node_coordinates = cache.elements kernel! = compute_coefficients_kernel!(backend) kernel!(u, func, t, equations, dg, node_coordinates, diff --git a/test/test_cuda.jl b/test/test_cuda.jl index 1f96d8c863e..c6904b41a9d 100644 --- a/test/test_cuda.jl +++ b/test/test_cuda.jl @@ -11,16 +11,41 @@ isdir(outdir) && rm(outdir, recursive = true) EXAMPLES_DIR = joinpath(examples_dir(), "p4est_2d_dgsem") -@trixi_testset "elixir_advection_basic_gpu.jl" begin +@trixi_testset "elixir_advection_basic_gpu.jl native" begin + @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_basic_gpu.jl"), + # Expected errors are exactly the same as with TreeMesh! + l2=nothing, # [Float32(8.311947673061856e-6)], + linf=nothing,) + # # Ensure that we do not have excessive memory allocations + # # (e.g., from type instabilities) + # let + # t = sol.t[end] + # u_ode = sol.u[end] + # du_ode = similar(u_ode) + # @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000 + # end + @test real(ode.p.solver) == Float64 + @test real(ode.p.solver.basis) == Float64 + @test real(ode.p.solver.mortar) == Float64 + # TODO: remake ignores the mesh itself as well + @test real(ode.p.mesh) == Float64 + + @test ode.u0 isa Array + @test ode.p.solver.basis.derivative_matrix isa Array + + @test Trixi.storage_type(ode.p.cache.elements) === Array + @test Trixi.storage_type(ode.p.cache.interfaces) === Array + @test Trixi.storage_type(ode.p.cache.boundaries) === Array + @test Trixi.storage_type(ode.p.cache.mortars) === Array +end + +@trixi_testset "elixir_advection_basic_gpu.jl Float32 / CUDA" begin # Using CUDA inside the testset since otherwise the bindings are hiddend by the anonymous modules using CUDA - # TODO(benegee/vchuravy): Port compute_coefficients! to KernelAbstractions.jl - CUDA.allowscalar(true) @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_basic_gpu.jl"), # Expected errors are exactly the same as with TreeMesh! l2=nothing, # [Float32(8.311947673061856e-6)], linf=nothing, # [Float32(6.627000273229378e-5)], - RealT=Float32, real_type=Float32, storage_type=CuArray) # # Ensure that we do not have excessive memory allocations @@ -37,7 +62,7 @@ EXAMPLES_DIR = joinpath(examples_dir(), "p4est_2d_dgsem") # TODO: remake ignores the mesh itself as well @test real(ode.p.mesh) == Float64 - @test_broken ode.u0 isa CuArray + @test ode.u0 isa CuArray @test ode.p.solver.basis.derivative_matrix isa CuArray @test Trixi.storage_type(ode.p.cache.elements) === CuArray From 153d8289418e33574425eafcdc443aeae52b5441 Mon Sep 17 00:00:00 2001 From: Benedict Geihe Date: Wed, 2 Jul 2025 09:34:33 +0200 Subject: [PATCH 044/158] adapt 1D and 3D version --- src/solvers/dg.jl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/solvers/dg.jl b/src/solvers/dg.jl index 9ec37647c97..a9ed65d7070 100644 --- a/src/solvers/dg.jl +++ b/src/solvers/dg.jl @@ -739,8 +739,8 @@ end nelements(dg, cache))) end -function compute_coefficients!(u, func, t, mesh::AbstractMesh{1}, equations, dg::DG, - cache) +function compute_coefficients!(backend::Nothing, u, func, t, mesh::AbstractMesh{1}, + equations, dg::DG, cache) @threaded for element in eachelement(dg, cache) for i in eachnode(dg) x_node = get_node_coords(cache.elements.node_coordinates, equations, dg, i, @@ -795,8 +795,8 @@ function compute_coefficients_element!(u, func, t, equations, dg::DG, end end -function compute_coefficients!(u, func, t, mesh::AbstractMesh{3}, equations, dg::DG, - cache) +function compute_coefficients!(backend::Nothing, u, func, t, mesh::AbstractMesh{3}, + equations, dg::DG, cache) @threaded for element in eachelement(dg, cache) for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) x_node = get_node_coords(cache.elements.node_coordinates, equations, dg, i, From 819ba7525c534568c3a127a6e371e2995e6e92bf Mon Sep 17 00:00:00 2001 From: Benedict Geihe Date: Wed, 2 Jul 2025 09:34:49 +0200 Subject: [PATCH 045/158] Downgrade compat with Adapt --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 27136900dc3..51614052357 100644 --- a/Project.toml +++ b/Project.toml @@ -83,7 +83,7 @@ EllipsisNotation = "1.0" FillArrays = "1.9" ForwardDiff = "0.10.36, 1" HDF5 = "0.16.10, 0.17" -KernelAbstractions = "0.9" +KernelAbstractions = "0.9.15" LinearAlgebra = "1" LinearMaps = "2.7, 3.0" LoopVectorization = "0.12.171" From e75cac7dbaf1eec9d45776a90125c541e57762f5 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Wed, 2 Jul 2025 10:41:15 +0200 Subject: [PATCH 046/158] update requires to 1.3 --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 51614052357..fa88a560ed2 100644 --- a/Project.toml +++ b/Project.toml @@ -101,7 +101,7 @@ Printf = "1" RecipesBase = "1.3.4" RecursiveArrayTools = "3.31.1" Reexport = "1.2" -Requires = "1.1" +Requires = "1.3" SciMLBase = "2.67.0" SimpleUnPack = "1.1" SparseArrays = "1" From e7cde27d80f50658d9061372ecd17e1980de9440 Mon Sep 17 00:00:00 2001 From: Benedict Geihe Date: Tue, 16 Sep 2025 11:04:49 +0200 Subject: [PATCH 047/158] missed during merge --- src/solvers/dgsem_p4est/containers.jl | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/solvers/dgsem_p4est/containers.jl b/src/solvers/dgsem_p4est/containers.jl index c8db5388e77..3f74f699f19 100644 --- a/src/solvers/dgsem_p4est/containers.jl +++ b/src/solvers/dgsem_p4est/containers.jl @@ -223,13 +223,8 @@ mutable struct P4estInterfaceContainer{NDIMS, uEltype <: Real, NDIMSP2, IndicesVector <: DenseVector{NTuple{NDIMS, Symbol}}} <: AbstractContainer -<<<<<<< HEAD - u::uArray # [primary/secondary, variable, i, j, interface] - neighbor_ids::IdsMatrix # [primary/secondary, interface] -======= u::uArray # [primary/secondary, variable, i, j, interface] neighbor_ids::IdsMatrix # [primary/secondary, interface] ->>>>>>> main node_indices::IndicesMatrix # [primary/secondary, interface] # internal `resize!`able storage From b174d6d9e5c0d66afd05bea3885952e069e2d5e4 Mon Sep 17 00:00:00 2001 From: Benedict Geihe Date: Tue, 16 Sep 2025 13:19:28 +0200 Subject: [PATCH 048/158] mistakes during merge --- src/Trixi.jl | 1 - src/semidiscretization/semidiscretization.jl | 1 - 2 files changed, 2 deletions(-) diff --git a/src/Trixi.jl b/src/Trixi.jl index d98920bcf0b..9412c33db6f 100644 --- a/src/Trixi.jl +++ b/src/Trixi.jl @@ -20,7 +20,6 @@ const _PREFERENCE_SQRT = @load_preference("sqrt", "sqrt_Trixi_NaN") const _PREFERENCE_LOG = @load_preference("log", "log_Trixi_NaN") const _PREFERENCE_THREADING = Symbol(@load_preference("backend", "polyester")) const _PREFERENCE_LOOPVECTORIZATION = @load_preference("loop_vectorization", true) -const _PREFERENCE_USE_NATIVE_THREADING = @load_preference("native_threading", true) # Include other packages that are used in Trixi.jl # (standard library packages first, other packages next, all of them sorted alphabetically) diff --git a/src/semidiscretization/semidiscretization.jl b/src/semidiscretization/semidiscretization.jl index ef2847ced6a..a629ff64f0d 100644 --- a/src/semidiscretization/semidiscretization.jl +++ b/src/semidiscretization/semidiscretization.jl @@ -230,7 +230,6 @@ function compute_coefficients!(u_ode, func, t, semi::AbstractSemidiscretization) backend = trixi_backend(u_ode) u = wrap_array(u_ode, semi) # Call `compute_coefficients` defined by the solver - backend = trixi_backend(semi) compute_coefficients!(backend, u, func, t, mesh_equations_solver_cache(semi)...) end From 489bb24933d57c68799b15ea8bf6efcbf09f597e Mon Sep 17 00:00:00 2001 From: Benedict Geihe Date: Thu, 18 Sep 2025 12:02:12 +0200 Subject: [PATCH 049/158] cleanup --- Project.toml | 2 -- src/auxiliary/containers.jl | 4 ---- 2 files changed, 6 deletions(-) diff --git a/Project.toml b/Project.toml index 8eb7aa80e5b..e898cdf144b 100644 --- a/Project.toml +++ b/Project.toml @@ -59,7 +59,6 @@ Convex = "f65535da-76fb-5f13-bab9-19810c17039a" ECOS = "e2685f51-7e38-5353-a97d-a921fd2c8199" Makie = "ee78f7c6-11fb-53f2-987a-cfe4a2b5a57a" NLsolve = "2774e3e8-f4cf-5e23-947b-6d7e65073b56" -CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" SparseConnectivityTracer = "9f842d2f-2579-4b1d-911e-f412cf18a3f5" [extensions] @@ -67,7 +66,6 @@ TrixiCUDAExt = "CUDA" TrixiConvexECOSExt = ["Convex", "ECOS"] TrixiMakieExt = "Makie" TrixiNLsolveExt = "NLsolve" -TrixiCUDAExt = "CUDA" TrixiSparseConnectivityTracerExt = "SparseConnectivityTracer" [compat] diff --git a/src/auxiliary/containers.jl b/src/auxiliary/containers.jl index 874b238f1cf..5036863ff4b 100644 --- a/src/auxiliary/containers.jl +++ b/src/auxiliary/containers.jl @@ -380,10 +380,6 @@ function trixi_backend(x::VectorOfArray) return get_backend(u[1]) end -function KernelAbstractions.get_backend(semi::AbstractSemidiscretization) - KernelAbstractions.get_backend(semi.cache.elements.node_coordinates) -end - # For some storage backends like CUDA.jl, empty arrays do seem to simply be # null pointers which can cause `unsafe_wrap` to fail when calling # Adapt.adapt (ArgumentError, see From b4d15354e80eb796bf4f17f2769444afc9faabdc Mon Sep 17 00:00:00 2001 From: Benedict Geihe Date: Thu, 18 Sep 2025 12:05:02 +0200 Subject: [PATCH 050/158] Basis kernels for 3D P4est - prolong2interfaces - calc_interface_flux - calc_surface_integral - calc_volume_integral (weak_form_kernel) - apply_jacobian --- .../semidiscretization_hyperbolic.jl | 3 +- src/solvers/dg.jl | 53 +- src/solvers/dgsem_p4est/dg_3d.jl | 455 +++++++++++------- src/solvers/dgsem_p4est/dg_3d_parallel.jl | 2 +- src/solvers/dgsem_structured/dg_1d.jl | 2 +- src/solvers/dgsem_structured/dg_2d.jl | 2 +- src/solvers/dgsem_structured/dg_3d.jl | 57 ++- src/solvers/dgsem_tree/dg_1d.jl | 2 +- src/solvers/dgsem_tree/dg_2d.jl | 2 +- src/solvers/dgsem_tree/dg_2d_parallel.jl | 2 +- src/solvers/dgsem_tree/dg_3d.jl | 45 +- src/solvers/dgsem_unstructured/dg_2d.jl | 2 +- 12 files changed, 396 insertions(+), 231 deletions(-) diff --git a/src/semidiscretization/semidiscretization_hyperbolic.jl b/src/semidiscretization/semidiscretization_hyperbolic.jl index 2a563c02229..b49c18cbd37 100644 --- a/src/semidiscretization/semidiscretization_hyperbolic.jl +++ b/src/semidiscretization/semidiscretization_hyperbolic.jl @@ -399,10 +399,11 @@ function rhs!(du_ode, u_ode, semi::SemidiscretizationHyperbolic, t) u = wrap_array(u_ode, mesh, equations, solver, cache) du = wrap_array(du_ode, mesh, equations, solver, cache) + backend = trixi_backend(u_ode) # TODO: Taal decide, do we need to pass the mesh? time_start = time_ns() - @trixi_timeit timer() "rhs!" rhs!(du, u, t, mesh, equations, + @trixi_timeit timer() "rhs!" rhs!(backend, du, u, t, mesh, equations, boundary_conditions, source_terms, solver, cache) runtime = time_ns() - time_start put!(semi.performance_counter, runtime) diff --git a/src/solvers/dg.jl b/src/solvers/dg.jl index 509c12dab95..f402aad2ebd 100644 --- a/src/solvers/dg.jl +++ b/src/solvers/dg.jl @@ -610,6 +610,13 @@ end return u_ll, u_rr end +# As above but dispatches on an type argument +@inline function get_surface_node_vars(u, equations, ::Type{<:DG}, indices...) + u_ll = SVector(ntuple(@inline(v->u[1, v, indices...]), Val(nvariables(equations)))) + u_rr = SVector(ntuple(@inline(v->u[2, v, indices...]), Val(nvariables(equations)))) + return u_ll, u_rr +end + @inline function set_node_vars!(u, u_node, equations, solver::DG, indices...) for v in eachvariable(equations) u[v, indices...] = u_node[v] @@ -774,54 +781,46 @@ function compute_coefficients!(backend::Nothing, u, func, t, mesh::AbstractMesh{ return nothing end -function compute_coefficients!(backend::Nothing, u, func, t, mesh::AbstractMesh{2}, +function compute_coefficients!(backend::Nothing, u, func, t, + mesh::Union{AbstractMesh{2}, AbstractMesh{3}}, equations, dg::DG, cache) @unpack node_coordinates = cache.elements + node_indices = CartesianIndices(ntuple(_ -> nnodes(dg), ndims(mesh))) @threaded for element in eachelement(dg, cache) compute_coefficients_element!(u, func, t, equations, dg, node_coordinates, - element) + element, node_indices) end return nothing end -function compute_coefficients!(backend::Backend, u, func, t, mesh::AbstractMesh{2}, +function compute_coefficients!(backend::Backend, u, func, t, + mesh::Union{AbstractMesh{2}, AbstractMesh{3}}, equations, dg::DG, cache) nelements(dg, cache) == 0 && return nothing + @unpack node_coordinates = cache.elements - kernel! = compute_coefficients_kernel!(backend) - kernel!(u, func, t, equations, dg, node_coordinates, + node_indices = CartesianIndices(ntuple(_ -> nnodes(dg), ndims(mesh))) + + kernel! = compute_coefficients_KAkernel!(backend) + kernel!(u, func, t, equations, dg, node_coordinates, node_indices, ndrange = nelements(dg, cache)) return nothing end -@kernel function compute_coefficients_kernel!(u, func, t, equations, - dg::DG, node_coordinates) +@kernel function compute_coefficients_KAkernel!(u, func, t, equations, + dg::DG, node_coordinates, node_indices) element = @index(Global) - compute_coefficients_element!(u, func, t, equations, dg, node_coordinates, element) + compute_coefficients_element!(u, func, t, equations, dg, node_coordinates, element, + node_indices) end function compute_coefficients_element!(u, func, t, equations, dg::DG, - node_coordinates, element) - for j in eachnode(dg), i in eachnode(dg) - x_node = get_node_coords(node_coordinates, equations, dg, i, - j, element) + node_coordinates, element, node_indices) + for indices in node_indices + x_node = get_node_coords(node_coordinates, equations, dg, indices, element) u_node = func(x_node, t, equations) - set_node_vars!(u, u_node, equations, dg, i, j, element) - end - - return nothing -end - -function compute_coefficients!(backend::Nothing, u, func, t, mesh::AbstractMesh{3}, - equations, dg::DG, cache) - @threaded for element in eachelement(dg, cache) - for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) - x_node = get_node_coords(cache.elements.node_coordinates, equations, dg, i, - j, k, element) - u_node = func(x_node, t, equations) - set_node_vars!(u, u_node, equations, dg, i, j, k, element) - end + set_node_vars!(u, u_node, equations, dg, indices, element) end return nothing diff --git a/src/solvers/dgsem_p4est/dg_3d.jl b/src/solvers/dgsem_p4est/dg_3d.jl index 63cf78ddd94..510f4d3c717 100644 --- a/src/solvers/dgsem_p4est/dg_3d.jl +++ b/src/solvers/dgsem_p4est/dg_3d.jl @@ -91,85 +91,116 @@ end return (i1, i2) end -function prolong2interfaces!(cache, u, +function prolong2interfaces!(backend::Nothing, cache, u, mesh::Union{P4estMesh{3}, T8codeMesh{3}}, equations, dg::DG) @unpack interfaces = cache + @unpack neighbor_ids, node_indices = cache.interfaces index_range = eachnode(dg) @threaded for interface in eachinterface(dg, cache) - # Copy solution data from the primary element using "delayed indexing" with - # a start value and two step sizes to get the correct face and orientation. - # Note that in the current implementation, the interface will be - # "aligned at the primary element", i.e., the indices of the primary side - # will always run forwards. - primary_element = interfaces.neighbor_ids[1, interface] - primary_indices = interfaces.node_indices[1, interface] - - i_primary_start, i_primary_step_i, i_primary_step_j = index_to_start_step_3d(primary_indices[1], - index_range) - j_primary_start, j_primary_step_i, j_primary_step_j = index_to_start_step_3d(primary_indices[2], - index_range) - k_primary_start, k_primary_step_i, k_primary_step_j = index_to_start_step_3d(primary_indices[3], - index_range) - - i_primary = i_primary_start - j_primary = j_primary_start - k_primary = k_primary_start - for j in eachnode(dg) - for i in eachnode(dg) - for v in eachvariable(equations) - interfaces.u[1, v, i, j, interface] = u[v, - i_primary, j_primary, - k_primary, - primary_element] - end - i_primary += i_primary_step_i - j_primary += j_primary_step_i - k_primary += k_primary_step_i + prolong2interfaces_interface!(interfaces.u, u, typeof(mesh), equations, + neighbor_ids, node_indices, index_range, + interface) + end + return nothing +end + +function prolong2interfaces!(backend::Backend, cache, u, + mesh::Union{P4estMesh{3}, T8codeMesh{3}}, + equations, dg::DG) + @unpack interfaces = cache + @unpack neighbor_ids, node_indices = cache.interfaces + index_range = eachnode(dg) + + kernel! = prolong2interfaces_KAkernel!(backend) + kernel!(interfaces.u, u, typeof(mesh), equations, neighbor_ids, node_indices, + index_range, + ndrange = ninterfaces(interfaces)) + return nothing +end + +@kernel function prolong2interfaces_KAkernel!(interface_u, u, meshT, equations, + neighbor_ids, node_indices, index_range) + interface = @index(Global) + prolong2interfaces_interface!(interface_u, u, meshT, equations, neighbor_ids, + node_indices, index_range, interface) +end + +function prolong2interfaces_interface!(u_interface, u, + ::Type{<:Union{P4estMesh{3}, T8codeMesh{3}}}, + equations, neighbor_ids, node_indices, + index_range, interface) + # Copy solution data from the primary element using "delayed indexing" with + # a start value and two step sizes to get the correct face and orientation. + # Note that in the current implementation, the interface will be + # "aligned at the primary element", i.e., the indices of the primary side + # will always run forwards. + primary_element = neighbor_ids[1, interface] + primary_indices = node_indices[1, interface] + + i_primary_start, i_primary_step_i, i_primary_step_j = index_to_start_step_3d(primary_indices[1], + index_range) + j_primary_start, j_primary_step_i, j_primary_step_j = index_to_start_step_3d(primary_indices[2], + index_range) + k_primary_start, k_primary_step_i, k_primary_step_j = index_to_start_step_3d(primary_indices[3], + index_range) + + i_primary = i_primary_start + j_primary = j_primary_start + k_primary = k_primary_start + for j in index_range + for i in index_range + for v in eachvariable(equations) + u_interface[1, v, i, j, interface] = u[v, + i_primary, j_primary, + k_primary, + primary_element] end - i_primary += i_primary_step_j - j_primary += j_primary_step_j - k_primary += k_primary_step_j + i_primary += i_primary_step_i + j_primary += j_primary_step_i + k_primary += k_primary_step_i end + i_primary += i_primary_step_j + j_primary += j_primary_step_j + k_primary += k_primary_step_j + end - # Copy solution data from the secondary element using "delayed indexing" with - # a start value and two step sizes to get the correct face and orientation. - secondary_element = interfaces.neighbor_ids[2, interface] - secondary_indices = interfaces.node_indices[2, interface] - - i_secondary_start, i_secondary_step_i, i_secondary_step_j = index_to_start_step_3d(secondary_indices[1], - index_range) - j_secondary_start, j_secondary_step_i, j_secondary_step_j = index_to_start_step_3d(secondary_indices[2], - index_range) - k_secondary_start, k_secondary_step_i, k_secondary_step_j = index_to_start_step_3d(secondary_indices[3], - index_range) - - i_secondary = i_secondary_start - j_secondary = j_secondary_start - k_secondary = k_secondary_start - for j in eachnode(dg) - for i in eachnode(dg) - for v in eachvariable(equations) - interfaces.u[2, v, i, j, interface] = u[v, - i_secondary, j_secondary, - k_secondary, - secondary_element] - end - i_secondary += i_secondary_step_i - j_secondary += j_secondary_step_i - k_secondary += k_secondary_step_i + # Copy solution data from the secondary element using "delayed indexing" with + # a start value and two step sizes to get the correct face and orientation. + secondary_element = neighbor_ids[2, interface] + secondary_indices = node_indices[2, interface] + + i_secondary_start, i_secondary_step_i, i_secondary_step_j = index_to_start_step_3d(secondary_indices[1], + index_range) + j_secondary_start, j_secondary_step_i, j_secondary_step_j = index_to_start_step_3d(secondary_indices[2], + index_range) + k_secondary_start, k_secondary_step_i, k_secondary_step_j = index_to_start_step_3d(secondary_indices[3], + index_range) + + i_secondary = i_secondary_start + j_secondary = j_secondary_start + k_secondary = k_secondary_start + for j in index_range + for i in index_range + for v in eachvariable(equations) + u_interface[2, v, i, j, interface] = u[v, + i_secondary, j_secondary, + k_secondary, + secondary_element] end - i_secondary += i_secondary_step_j - j_secondary += j_secondary_step_j - k_secondary += k_secondary_step_j + i_secondary += i_secondary_step_i + j_secondary += j_secondary_step_i + k_secondary += k_secondary_step_i end + i_secondary += i_secondary_step_j + j_secondary += j_secondary_step_j + k_secondary += k_secondary_step_j end - return nothing end -function calc_interface_flux!(surface_flux_values, +function calc_interface_flux!(backend::Nothing, surface_flux_values, mesh::Union{P4estMesh{3}, T8codeMesh{3}}, nonconservative_terms, equations, surface_integral, dg::DG, cache) @@ -178,92 +209,139 @@ function calc_interface_flux!(surface_flux_values, index_range = eachnode(dg) @threaded for interface in eachinterface(dg, cache) - # Get element and side information on the primary element - primary_element = neighbor_ids[1, interface] - primary_indices = node_indices[1, interface] - primary_direction = indices2direction(primary_indices) - - i_primary_start, i_primary_step_i, i_primary_step_j = index_to_start_step_3d(primary_indices[1], - index_range) - j_primary_start, j_primary_step_i, j_primary_step_j = index_to_start_step_3d(primary_indices[2], - index_range) - k_primary_start, k_primary_step_i, k_primary_step_j = index_to_start_step_3d(primary_indices[3], - index_range) - - i_primary = i_primary_start - j_primary = j_primary_start - k_primary = k_primary_start - - # Get element and side information on the secondary element - secondary_element = neighbor_ids[2, interface] - secondary_indices = node_indices[2, interface] - secondary_direction = indices2direction(secondary_indices) - secondary_surface_indices = surface_indices(secondary_indices) - - # Get the surface indexing on the secondary element. - # Note that the indices of the primary side will always run forward but - # the secondary indices might need to run backwards for flipped sides. - i_secondary_start, i_secondary_step_i, i_secondary_step_j = index_to_start_step_3d(secondary_surface_indices[1], - index_range) - j_secondary_start, j_secondary_step_i, j_secondary_step_j = index_to_start_step_3d(secondary_surface_indices[2], - index_range) - i_secondary = i_secondary_start - j_secondary = j_secondary_start + calc_interface_flux_interface!(surface_flux_values, + typeof(mesh), + nonconservative_terms, + equations, surface_integral, typeof(dg), + cache.interfaces.u, neighbor_ids, node_indices, + contravariant_vectors, index_range, interface) + end + return nothing +end + +function calc_interface_flux!(backend::Backend, surface_flux_values, + mesh::Union{P4estMesh{3}, T8codeMesh{3}}, + nonconservative_terms, + equations, surface_integral, dg::DG, cache) + @unpack neighbor_ids, node_indices = cache.interfaces + @unpack contravariant_vectors = cache.elements + index_range = eachnode(dg) + + kernel! = calc_interface_flux_KAkernel!(backend) + kernel!(surface_flux_values, typeof(mesh), nonconservative_terms, equations, + surface_integral, typeof(dg), cache.interfaces.u, + neighbor_ids, node_indices, contravariant_vectors, index_range, + ndrange = ninterfaces(cache.interfaces)) + return nothing +end + +@kernel function calc_interface_flux_KAkernel!(surface_flux_values, meshT, + nonconservative_terms, equations, + surface_integral, solverT, u_inferface, + neighbor_ids, node_indices, + contravariant_vectors, index_range) + interface = @index(Global) + calc_interface_flux_interface!(surface_flux_values, + meshT, + nonconservative_terms, + equations, surface_integral, solverT, u_inferface, + neighbor_ids, node_indices, contravariant_vectors, + index_range, interface) +end + +function calc_interface_flux_interface!(surface_flux_values, + meshT::Type{<:Union{P4estMesh{3}, + T8codeMesh{3}}}, + nonconservative_terms, + equations, surface_integral, + solverT::Type{<:DG}, u_interface, neighbor_ids, + node_indices, contravariant_vectors, + index_range, interface) + # Get element and side information on the primary element + primary_element = neighbor_ids[1, interface] + primary_indices = node_indices[1, interface] + primary_direction = indices2direction(primary_indices) + + i_primary_start, i_primary_step_i, i_primary_step_j = index_to_start_step_3d(primary_indices[1], + index_range) + j_primary_start, j_primary_step_i, j_primary_step_j = index_to_start_step_3d(primary_indices[2], + index_range) + k_primary_start, k_primary_step_i, k_primary_step_j = index_to_start_step_3d(primary_indices[3], + index_range) + + i_primary = i_primary_start + j_primary = j_primary_start + k_primary = k_primary_start + + # Get element and side information on the secondary element + secondary_element = neighbor_ids[2, interface] + secondary_indices = node_indices[2, interface] + secondary_direction = indices2direction(secondary_indices) + secondary_surface_indices = surface_indices(secondary_indices) + + # Get the surface indexing on the secondary element. + # Note that the indices of the primary side will always run forward but + # the secondary indices might need to run backwards for flipped sides. + i_secondary_start, i_secondary_step_i, i_secondary_step_j = index_to_start_step_3d(secondary_surface_indices[1], + index_range) + j_secondary_start, j_secondary_step_i, j_secondary_step_j = index_to_start_step_3d(secondary_surface_indices[2], + index_range) + i_secondary = i_secondary_start + j_secondary = j_secondary_start + + for j in index_range + for i in index_range + # Get the normal direction from the primary element. + # Note, contravariant vectors at interfaces in negative coordinate direction + # are pointing inwards. This is handled by `get_normal_direction`. + normal_direction = get_normal_direction(primary_direction, + contravariant_vectors, + i_primary, j_primary, k_primary, + primary_element) + + calc_interface_flux!(surface_flux_values, meshT, nonconservative_terms, + equations, + surface_integral, solverT, u_interface, + interface, normal_direction, + i, j, primary_direction, primary_element, + i_secondary, j_secondary, secondary_direction, + secondary_element) - for j in eachnode(dg) - for i in eachnode(dg) - # Get the normal direction from the primary element. - # Note, contravariant vectors at interfaces in negative coordinate direction - # are pointing inwards. This is handled by `get_normal_direction`. - normal_direction = get_normal_direction(primary_direction, - contravariant_vectors, - i_primary, j_primary, k_primary, - primary_element) - - calc_interface_flux!(surface_flux_values, mesh, nonconservative_terms, - equations, - surface_integral, dg, cache, - interface, normal_direction, - i, j, primary_direction, primary_element, - i_secondary, j_secondary, secondary_direction, - secondary_element) - - # Increment the primary element indices - i_primary += i_primary_step_i - j_primary += j_primary_step_i - k_primary += k_primary_step_i - # Increment the secondary element surface indices - i_secondary += i_secondary_step_i - j_secondary += j_secondary_step_i - end # Increment the primary element indices - i_primary += i_primary_step_j - j_primary += j_primary_step_j - k_primary += k_primary_step_j + i_primary += i_primary_step_i + j_primary += j_primary_step_i + k_primary += k_primary_step_i # Increment the secondary element surface indices - i_secondary += i_secondary_step_j - j_secondary += j_secondary_step_j + i_secondary += i_secondary_step_i + j_secondary += j_secondary_step_i end + # Increment the primary element indices + i_primary += i_primary_step_j + j_primary += j_primary_step_j + k_primary += k_primary_step_j + # Increment the secondary element surface indices + i_secondary += i_secondary_step_j + j_secondary += j_secondary_step_j end - return nothing end # Inlined function for interface flux computation for conservative flux terms @inline function calc_interface_flux!(surface_flux_values, - mesh::Union{P4estMesh{3}, T8codeMesh{3}}, + ::Type{<:Union{P4estMesh{3}, T8codeMesh{3}}}, nonconservative_terms::False, equations, - surface_integral, dg::DG, cache, + surface_integral, solverT::Type{<:DG}, + u_interface, interface_index, normal_direction, primary_i_node_index, primary_j_node_index, primary_direction_index, primary_element_index, secondary_i_node_index, secondary_j_node_index, secondary_direction_index, secondary_element_index) - @unpack u = cache.interfaces @unpack surface_flux = surface_integral - u_ll, u_rr = get_surface_node_vars(u, equations, dg, primary_i_node_index, + u_ll, u_rr = get_surface_node_vars(u_interface, equations, solverT, + primary_i_node_index, primary_j_node_index, interface_index) flux_ = surface_flux(u_ll, u_rr, normal_direction, equations) @@ -813,7 +891,7 @@ end return nothing end -function calc_surface_integral!(du, u, +function calc_surface_integral!(backend::Nothing, du, u, mesh::Union{P4estMesh{3}, T8codeMesh{3}}, equations, surface_integral::SurfaceIntegralWeakForm, @@ -821,51 +899,86 @@ function calc_surface_integral!(du, u, @unpack boundary_interpolation = dg.basis @unpack surface_flux_values = cache.elements + @threaded for element in eachelement(dg, cache) + calc_surface_integral_element!(du, typeof(mesh), + equations, + surface_integral, dg, surface_flux_values, + element) + end + return nothing +end + +function calc_surface_integral!(backend::Backend, du, u, + mesh::Union{P4estMesh{3}, T8codeMesh{3}}, + equations, + surface_integral::SurfaceIntegralWeakForm, + dg::DGSEM, cache) + @unpack boundary_interpolation = dg.basis + @unpack surface_flux_values = cache.elements + + kernel! = calc_surface_integral_KAkernel!(backend) + kernel!(du, typeof(mesh), equations, surface_integral, dg, surface_flux_values, + ndrange = nelements(cache.elements)) + return nothing +end + +@kernel function calc_surface_integral_KAkernel!(du, meshT, equations, + surface_integral, dg, + surface_flux_values) + element = @index(Global) + calc_surface_integral_element!(du, meshT, + equations, + surface_integral, dg, surface_flux_values, element) +end + +function calc_surface_integral_element!(du, + ::Type{<:Union{P4estMesh{3}, T8codeMesh{3}}}, + equations, + surface_integral::SurfaceIntegralWeakForm, + dg::DGSEM, surface_flux_values, element) # Note that all fluxes have been computed with outward-pointing normal vectors. # Access the factors only once before beginning the loop to increase performance. # We also use explicit assignments instead of `+=` to let `@muladd` turn these # into FMAs (see comment at the top of the file). - factor_1 = boundary_interpolation[1, 1] - factor_2 = boundary_interpolation[nnodes(dg), 2] - @threaded for element in eachelement(dg, cache) - for m in eachnode(dg), l in eachnode(dg) - for v in eachvariable(equations) - # surface at -x - du[v, 1, l, m, element] = (du[v, 1, l, m, element] + - surface_flux_values[v, l, m, 1, element] * - factor_1) - - # surface at +x - du[v, nnodes(dg), l, m, element] = (du[v, nnodes(dg), l, m, element] + - surface_flux_values[v, l, m, 2, - element] * - factor_2) - - # surface at -y - du[v, l, 1, m, element] = (du[v, l, 1, m, element] + - surface_flux_values[v, l, m, 3, element] * - factor_1) - - # surface at +y - du[v, l, nnodes(dg), m, element] = (du[v, l, nnodes(dg), m, element] + - surface_flux_values[v, l, m, 4, - element] * - factor_2) - - # surface at -z - du[v, l, m, 1, element] = (du[v, l, m, 1, element] + - surface_flux_values[v, l, m, 5, element] * - factor_1) - - # surface at +z - du[v, l, m, nnodes(dg), element] = (du[v, l, m, nnodes(dg), element] + - surface_flux_values[v, l, m, 6, - element] * - factor_2) - end + # TODO GPU: dg is adapted, accessing scalars outside of kernel is therefor not useful + factor_1 = dg.basis.boundary_interpolation[1, 1] + factor_2 = dg.basis.boundary_interpolation[nnodes(dg), 2] + for m in eachnode(dg), l in eachnode(dg) + for v in eachvariable(equations) + # surface at -x + du[v, 1, l, m, element] = (du[v, 1, l, m, element] + + surface_flux_values[v, l, m, 1, element] * + factor_1) + + # surface at +x + du[v, nnodes(dg), l, m, element] = (du[v, nnodes(dg), l, m, element] + + surface_flux_values[v, l, m, 2, + element] * + factor_2) + + # surface at -y + du[v, l, 1, m, element] = (du[v, l, 1, m, element] + + surface_flux_values[v, l, m, 3, element] * + factor_1) + + # surface at +y + du[v, l, nnodes(dg), m, element] = (du[v, l, nnodes(dg), m, element] + + surface_flux_values[v, l, m, 4, + element] * + factor_2) + + # surface at -z + du[v, l, m, 1, element] = (du[v, l, m, 1, element] + + surface_flux_values[v, l, m, 5, element] * + factor_1) + + # surface at +z + du[v, l, m, nnodes(dg), element] = (du[v, l, m, nnodes(dg), element] + + surface_flux_values[v, l, m, 6, + element] * + factor_2) end end - return nothing end end # @muladd diff --git a/src/solvers/dgsem_p4est/dg_3d_parallel.jl b/src/solvers/dgsem_p4est/dg_3d_parallel.jl index 520bc1c0599..276ddd9fb56 100644 --- a/src/solvers/dgsem_p4est/dg_3d_parallel.jl +++ b/src/solvers/dgsem_p4est/dg_3d_parallel.jl @@ -5,7 +5,7 @@ @muladd begin #! format: noindent -function rhs!(du, u, t, +function rhs!(backend, du, u, t, mesh::Union{ParallelP4estMesh{3}, ParallelT8codeMesh{3}}, equations, boundary_conditions, source_terms::Source, dg::DG, cache) where {Source} diff --git a/src/solvers/dgsem_structured/dg_1d.jl b/src/solvers/dgsem_structured/dg_1d.jl index ee2832e66a8..d85e4bab7a9 100644 --- a/src/solvers/dgsem_structured/dg_1d.jl +++ b/src/solvers/dgsem_structured/dg_1d.jl @@ -5,7 +5,7 @@ @muladd begin #! format: noindent -function rhs!(du, u, t, +function rhs!(backend, du, u, t, mesh::StructuredMesh{1}, equations, boundary_conditions, source_terms::Source, dg::DG, cache) where {Source} diff --git a/src/solvers/dgsem_structured/dg_2d.jl b/src/solvers/dgsem_structured/dg_2d.jl index a02a44bf4dd..2979bf1b254 100644 --- a/src/solvers/dgsem_structured/dg_2d.jl +++ b/src/solvers/dgsem_structured/dg_2d.jl @@ -5,7 +5,7 @@ @muladd begin #! format: noindent -function rhs!(du, u, t, +function rhs!(backend, du, u, t, mesh::Union{StructuredMesh{2}, StructuredMeshView{2}}, equations, boundary_conditions, source_terms::Source, dg::DG, cache) where {Source} diff --git a/src/solvers/dgsem_structured/dg_3d.jl b/src/solvers/dgsem_structured/dg_3d.jl index aba79f3a5a5..0ad3fca68b8 100644 --- a/src/solvers/dgsem_structured/dg_3d.jl +++ b/src/solvers/dgsem_structured/dg_3d.jl @@ -5,7 +5,7 @@ @muladd begin #! format: noindent -function rhs!(du, u, t, +function rhs!(backend, du, u, t, mesh::StructuredMesh{3}, equations, boundary_conditions, source_terms::Source, dg::DG, cache) where {Source} @@ -56,16 +56,17 @@ see `flux_differencing_kernel!`. This treatment is required to achieve, e.g., entropy-stability or well-balancedness. See also https://github.com/trixi-framework/Trixi.jl/issues/1671#issuecomment-1765644064 =# -@inline function weak_form_kernel!(du, u, - element, - mesh::Union{StructuredMesh{3}, P4estMesh{3}, - T8codeMesh{3}}, - nonconservative_terms::False, equations, - dg::DGSEM, cache, alpha = true) +@inline function weak_form_kernel_element!(du, u, + element, + ::Type{<:Union{StructuredMesh{3}, + P4estMesh{3}, + T8codeMesh{3}}}, + nonconservative_terms::False, equations, + dg::DGSEM, contravariant_vectors, + alpha = true) # true * [some floating point value] == [exactly the same floating point value] # This can (hopefully) be optimized away due to constant propagation. @unpack derivative_dhat = dg.basis - @unpack contravariant_vectors = cache.elements for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) u_node = get_node_vars(u, equations, dg, i, j, k, element) @@ -800,19 +801,45 @@ function calc_boundary_flux!(cache, u, t, boundary_conditions::NamedTuple, return nothing end -function apply_jacobian!(du, +function apply_jacobian!(backend::Nothing, du, mesh::Union{StructuredMesh{3}, P4estMesh{3}, T8codeMesh{3}}, equations, dg::DG, cache) + @unpack inverse_jacobian = cache.elements @threaded for element in eachelement(dg, cache) - for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) - factor = -cache.elements.inverse_jacobian[i, j, k, element] + apply_jacobian_element!(du, typeof(mesh), equations, dg, inverse_jacobian, + element) + end + return nothing +end - for v in eachvariable(equations) - du[v, i, j, k, element] *= factor - end +function apply_jacobian!(backend::Backend, du, + mesh::Union{StructuredMesh{3}, P4estMesh{3}, T8codeMesh{3}}, + equations, dg::DG, cache) + @unpack inverse_jacobian = cache.elements + + kernel! = apply_jacobian_KAkernel!(backend) + kernel!(du, typeof(mesh), equations, dg, inverse_jacobian, + ndrange = nelements(cache.elements)) + return nothing +end + +@kernel function apply_jacobian_KAkernel!(du, meshT, equations, dg::DG, + inverse_jacobian) + element = @index(Global) + apply_jacobian_element!(du, meshT, equations, dg, inverse_jacobian, element) +end + +function apply_jacobian_element!(du, + ::Type{<:Union{StructuredMesh{3}, P4estMesh{3}, + T8codeMesh{3}}}, + equations, dg, inverse_jacobian, element) + for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) + factor = -inverse_jacobian[i, j, k, element] + + for v in eachvariable(equations) + du[v, i, j, k, element] *= factor end end - return nothing end end # @muladd diff --git a/src/solvers/dgsem_tree/dg_1d.jl b/src/solvers/dgsem_tree/dg_1d.jl index 659a3babdcc..b0528a341ef 100644 --- a/src/solvers/dgsem_tree/dg_1d.jl +++ b/src/solvers/dgsem_tree/dg_1d.jl @@ -67,7 +67,7 @@ end # TODO: Taal discuss/refactor timer, allowing users to pass a custom timer? -function rhs!(du, u, t, +function rhs!(backend, du, u, t, mesh::TreeMesh{1}, equations, boundary_conditions, source_terms::Source, dg::DG, cache) where {Source} diff --git a/src/solvers/dgsem_tree/dg_2d.jl b/src/solvers/dgsem_tree/dg_2d.jl index 8b30219d29b..e7ca6b19dcb 100644 --- a/src/solvers/dgsem_tree/dg_2d.jl +++ b/src/solvers/dgsem_tree/dg_2d.jl @@ -112,7 +112,7 @@ end # TODO: Taal discuss/refactor timer, allowing users to pass a custom timer? -function rhs!(du, u, t, +function rhs!(backend, du, u, t, mesh::Union{TreeMesh{2}, P4estMesh{2}, P4estMeshView{2}, T8codeMesh{2}}, equations, boundary_conditions, source_terms::Source, diff --git a/src/solvers/dgsem_tree/dg_2d_parallel.jl b/src/solvers/dgsem_tree/dg_2d_parallel.jl index cb522aa3eaa..ef8b57c93d8 100644 --- a/src/solvers/dgsem_tree/dg_2d_parallel.jl +++ b/src/solvers/dgsem_tree/dg_2d_parallel.jl @@ -447,7 +447,7 @@ function init_mpi_neighbor_connectivity(elements, mpi_interfaces, mpi_mortars, return mpi_neighbor_ranks, mpi_neighbor_interfaces, mpi_neighbor_mortars end -function rhs!(du, u, t, +function rhs!(backend, du, u, t, mesh::Union{ParallelTreeMesh{2}, ParallelP4estMesh{2}, ParallelT8codeMesh{2}}, equations, boundary_conditions, source_terms::Source, diff --git a/src/solvers/dgsem_tree/dg_3d.jl b/src/solvers/dgsem_tree/dg_3d.jl index 7c8f5e0749c..f6147eb5056 100644 --- a/src/solvers/dgsem_tree/dg_3d.jl +++ b/src/solvers/dgsem_tree/dg_3d.jl @@ -159,7 +159,7 @@ end # TODO: Taal discuss/refactor timer, allowing users to pass a custom timer? -function rhs!(du, u, t, +function rhs!(backend, du, u, t, mesh::Union{TreeMesh{3}, P4estMesh{3}, T8codeMesh{3}}, equations, boundary_conditions, source_terms::Source, dg::DG, cache) where {Source} @@ -168,19 +168,19 @@ function rhs!(du, u, t, # Calculate volume integral @trixi_timeit timer() "volume integral" begin - calc_volume_integral!(du, u, mesh, + calc_volume_integral!(backend, du, u, mesh, have_nonconservative_terms(equations), equations, dg.volume_integral, dg, cache) end # Prolong solution to interfaces @trixi_timeit timer() "prolong2interfaces" begin - prolong2interfaces!(cache, u, mesh, equations, dg) + prolong2interfaces!(backend, cache, u, mesh, equations, dg) end # Calculate interface fluxes @trixi_timeit timer() "interface flux" begin - calc_interface_flux!(cache.elements.surface_flux_values, mesh, + calc_interface_flux!(backend, cache.elements.surface_flux_values, mesh, have_nonconservative_terms(equations), equations, dg.surface_integral, dg, cache) end @@ -212,12 +212,13 @@ function rhs!(du, u, t, # Calculate surface integrals @trixi_timeit timer() "surface integral" begin - calc_surface_integral!(du, u, mesh, equations, + calc_surface_integral!(backend, du, u, mesh, equations, dg.surface_integral, dg, cache) end # Apply Jacobian from mapping to reference element - @trixi_timeit timer() "Jacobian" apply_jacobian!(du, mesh, equations, dg, cache) + @trixi_timeit timer() "Jacobian" apply_jacobian!(backend, du, mesh, equations, dg, + cache) # Calculate source terms @trixi_timeit timer() "source terms" begin @@ -227,21 +228,45 @@ function rhs!(du, u, t, return nothing end -function calc_volume_integral!(du, u, +function calc_volume_integral!(backend::Nothing, du, u, mesh::Union{TreeMesh{3}, StructuredMesh{3}, P4estMesh{3}, T8codeMesh{3}}, nonconservative_terms, equations, volume_integral::VolumeIntegralWeakForm, dg::DGSEM, cache) + @unpack contravariant_vectors = cache.elements @threaded for element in eachelement(dg, cache) - weak_form_kernel!(du, u, element, mesh, - nonconservative_terms, equations, - dg, cache) + weak_form_kernel_element!(du, u, element, typeof(mesh), + nonconservative_terms, equations, + dg, contravariant_vectors) end + return nothing +end + +function calc_volume_integral!(backend::Backend, du, u, + mesh::Union{TreeMesh{3}, StructuredMesh{3}, P4estMesh{3}, + T8codeMesh{3}}, + nonconservative_terms, equations, + volume_integral::VolumeIntegralWeakForm, + dg::DGSEM, cache) + nelements(dg, cache) == 0 && return nothing + @unpack contravariant_vectors = cache.elements + kernel! = weak_form_KAkernel!(backend) + kernel!(du, u, typeof(mesh), nonconservative_terms, equations, dg, + contravariant_vectors, + ndrange = nelements(dg, cache)) return nothing end +@kernel function weak_form_KAkernel!(du, u, meshT, nonconservative_terms, equations, + dg::DGSEM, contravariant_vectors) + element = @index(Global) + weak_form_kernel_element!(du, u, element, meshT, + nonconservative_terms, equations, + dg, contravariant_vectors) +end + #= `weak_form_kernel!` is only implemented for conserved terms as non-conservative terms should always be discretized in conjunction with a flux-splitting scheme, diff --git a/src/solvers/dgsem_unstructured/dg_2d.jl b/src/solvers/dgsem_unstructured/dg_2d.jl index 4f90ba11a46..27554ffd320 100644 --- a/src/solvers/dgsem_unstructured/dg_2d.jl +++ b/src/solvers/dgsem_unstructured/dg_2d.jl @@ -34,7 +34,7 @@ function create_cache(mesh::UnstructuredMesh2D, equations, return cache end -function rhs!(du, u, t, +function rhs!(backend, du, u, t, mesh::UnstructuredMesh2D, equations, boundary_conditions, source_terms::Source, dg::DG, cache) where {Source} From 2443cf85193ff8ef418fce7a969ba5f1c9c26bf1 Mon Sep 17 00:00:00 2001 From: Benedict Geihe Date: Thu, 18 Sep 2025 12:06:14 +0200 Subject: [PATCH 051/158] port stepsize computation --- src/callbacks_step/stepsize.jl | 6 +- src/callbacks_step/stepsize_dg1d.jl | 8 +-- src/callbacks_step/stepsize_dg2d.jl | 20 +++--- src/callbacks_step/stepsize_dg3d.jl | 108 +++++++++++++++++++--------- src/solvers/dgmulti/dg.jl | 4 +- 5 files changed, 96 insertions(+), 50 deletions(-) diff --git a/src/callbacks_step/stepsize.jl b/src/callbacks_step/stepsize.jl index eac6f54261c..d643e91bd8d 100644 --- a/src/callbacks_step/stepsize.jl +++ b/src/callbacks_step/stepsize.jl @@ -118,8 +118,9 @@ end function calculate_dt(u_ode, t, cfl_number::Real, semi::AbstractSemidiscretization) mesh, equations, solver, cache = mesh_equations_solver_cache(semi) u = wrap_array(u_ode, mesh, equations, solver, cache) + backend = trixi_backend(u_ode) - dt = cfl_number * max_dt(u, t, mesh, + dt = cfl_number * max_dt(backend, u, t, mesh, have_constant_speed(equations), equations, solver, cache) end @@ -127,8 +128,9 @@ end function calculate_dt(u_ode, t, cfl_number, semi::AbstractSemidiscretization) mesh, equations, solver, cache = mesh_equations_solver_cache(semi) u = wrap_array(u_ode, mesh, equations, solver, cache) + backend = trixi_backend(u_ode) - dt = cfl_number(t) * max_dt(u, t, mesh, + dt = cfl_number(t) * max_dt(backend, u, t, mesh, have_constant_speed(equations), equations, solver, cache) end diff --git a/src/callbacks_step/stepsize_dg1d.jl b/src/callbacks_step/stepsize_dg1d.jl index 7be0f074135..cfaa3adff2d 100644 --- a/src/callbacks_step/stepsize_dg1d.jl +++ b/src/callbacks_step/stepsize_dg1d.jl @@ -5,7 +5,7 @@ @muladd begin #! format: noindent -function max_dt(u, t, mesh::TreeMesh{1}, +function max_dt(backend, u, t, mesh::TreeMesh{1}, constant_speed::False, equations, dg::DG, cache) # to avoid a division by zero if the speed vanishes everywhere, # e.g. for steady-state linear advection @@ -25,7 +25,7 @@ function max_dt(u, t, mesh::TreeMesh{1}, return 2 / (nnodes(dg) * max_scaled_speed) end -function max_dt(u, t, mesh::TreeMesh{1}, +function max_dt(backend, u, t, mesh::TreeMesh{1}, constant_speed::True, equations, dg::DG, cache) # to avoid a division by zero if the speed vanishes everywhere, # e.g. for steady-state linear advection @@ -41,7 +41,7 @@ function max_dt(u, t, mesh::TreeMesh{1}, return 2 / (nnodes(dg) * max_scaled_speed) end -function max_dt(u, t, mesh::StructuredMesh{1}, +function max_dt(backend, u, t, mesh::StructuredMesh{1}, constant_speed::False, equations, dg::DG, cache) # to avoid a division by zero if the speed vanishes everywhere, # e.g. for steady-state linear advection @@ -65,7 +65,7 @@ function max_dt(u, t, mesh::StructuredMesh{1}, return 2 / (nnodes(dg) * max_scaled_speed) end -function max_dt(u, t, mesh::StructuredMesh{1}, +function max_dt(backend, u, t, mesh::StructuredMesh{1}, constant_speed::True, equations, dg::DG, cache) # to avoid a division by zero if the speed vanishes everywhere, # e.g. for steady-state linear advection diff --git a/src/callbacks_step/stepsize_dg2d.jl b/src/callbacks_step/stepsize_dg2d.jl index a7c0dd2a0af..0d3e798b28f 100644 --- a/src/callbacks_step/stepsize_dg2d.jl +++ b/src/callbacks_step/stepsize_dg2d.jl @@ -5,7 +5,7 @@ @muladd begin #! format: noindent -function max_dt(u, t, mesh::TreeMesh{2}, +function max_dt(backend, u, t, mesh::TreeMesh{2}, constant_speed::False, equations, dg::DG, cache) # to avoid a division by zero if the speed vanishes everywhere, # e.g. for steady-state linear advection @@ -27,7 +27,7 @@ function max_dt(u, t, mesh::TreeMesh{2}, return 2 / (nnodes(dg) * max_scaled_speed) end -function max_dt(u, t, mesh::TreeMesh{2}, +function max_dt(backend, u, t, mesh::TreeMesh{2}, constant_speed::True, equations, dg::DG, cache) # to avoid a division by zero if the speed vanishes everywhere, # e.g. for steady-state linear advection @@ -44,7 +44,7 @@ function max_dt(u, t, mesh::TreeMesh{2}, return 2 / (nnodes(dg) * max_scaled_speed) end -function max_dt(u, t, mesh::ParallelTreeMesh{2}, +function max_dt(backend, u, t, mesh::ParallelTreeMesh{2}, constant_speed::False, equations, dg::DG, cache) # call the method accepting a general `mesh::TreeMesh{2}` # TODO: MPI, we should improve this; maybe we should dispatch on `u` @@ -61,7 +61,7 @@ function max_dt(u, t, mesh::ParallelTreeMesh{2}, return dt end -function max_dt(u, t, mesh::ParallelTreeMesh{2}, +function max_dt(backend, u, t, mesh::ParallelTreeMesh{2}, constant_speed::True, equations, dg::DG, cache) # call the method accepting a general `mesh::TreeMesh{2}` # TODO: MPI, we should improve this; maybe we should dispatch on `u` @@ -78,7 +78,7 @@ function max_dt(u, t, mesh::ParallelTreeMesh{2}, return dt end -function max_dt(u, t, +function max_dt(backend, u, t, mesh::Union{StructuredMesh{2}, UnstructuredMesh2D, P4estMesh{2}, T8codeMesh{2}, StructuredMeshView{2}}, constant_speed::False, equations, dg::DG, cache) @@ -114,7 +114,7 @@ function max_dt(u, t, return 2 / (nnodes(dg) * max_scaled_speed) end -function max_dt(u, t, +function max_dt(backend, u, t, mesh::Union{StructuredMesh{2}, UnstructuredMesh2D, P4estMesh{2}, P4estMeshView{2}, T8codeMesh{2}, StructuredMeshView{2}}, constant_speed::True, equations, dg::DG, cache) @@ -146,7 +146,7 @@ function max_dt(u, t, return 2 / (nnodes(dg) * max_scaled_speed) end -function max_dt(u, t, mesh::ParallelP4estMesh{2}, +function max_dt(backend, u, t, mesh::ParallelP4estMesh{2}, constant_speed::False, equations, dg::DG, cache) # call the method accepting a general `mesh::P4estMesh{2}` # TODO: MPI, we should improve this; maybe we should dispatch on `u` @@ -163,7 +163,7 @@ function max_dt(u, t, mesh::ParallelP4estMesh{2}, return dt end -function max_dt(u, t, mesh::ParallelP4estMesh{2}, +function max_dt(backend, u, t, mesh::ParallelP4estMesh{2}, constant_speed::True, equations, dg::DG, cache) # call the method accepting a general `mesh::P4estMesh{2}` # TODO: MPI, we should improve this; maybe we should dispatch on `u` @@ -180,7 +180,7 @@ function max_dt(u, t, mesh::ParallelP4estMesh{2}, return dt end -function max_dt(u, t, mesh::ParallelT8codeMesh{2}, +function max_dt(backend, u, t, mesh::ParallelT8codeMesh{2}, constant_speed::False, equations, dg::DG, cache) # call the method accepting a general `mesh::T8codeMesh{2}` # TODO: MPI, we should improve this; maybe we should dispatch on `u` @@ -197,7 +197,7 @@ function max_dt(u, t, mesh::ParallelT8codeMesh{2}, return dt end -function max_dt(u, t, mesh::ParallelT8codeMesh{2}, +function max_dt(backend, u, t, mesh::ParallelT8codeMesh{2}, constant_speed::True, equations, dg::DG, cache) # call the method accepting a general `mesh::T8codeMesh{2}` # TODO: MPI, we should improve this; maybe we should dispatch on `u` diff --git a/src/callbacks_step/stepsize_dg3d.jl b/src/callbacks_step/stepsize_dg3d.jl index 897f7d8b22b..159dca720d6 100644 --- a/src/callbacks_step/stepsize_dg3d.jl +++ b/src/callbacks_step/stepsize_dg3d.jl @@ -5,7 +5,7 @@ @muladd begin #! format: noindent -function max_dt(u, t, mesh::TreeMesh{3}, +function max_dt(backend, u, t, mesh::TreeMesh{3}, constant_speed::False, equations, dg::DG, cache) # to avoid a division by zero if the speed vanishes everywhere, # e.g. for steady-state linear advection @@ -28,7 +28,7 @@ function max_dt(u, t, mesh::TreeMesh{3}, return 2 / (nnodes(dg) * max_scaled_speed) end -function max_dt(u, t, mesh::TreeMesh{3}, +function max_dt(backend, u, t, mesh::TreeMesh{3}, constant_speed::True, equations, dg::DG, cache) # to avoid a division by zero if the speed vanishes everywhere, # e.g. for steady-state linear advection @@ -45,51 +45,95 @@ function max_dt(u, t, mesh::TreeMesh{3}, return 2 / (nnodes(dg) * max_scaled_speed) end -function max_dt(u, t, mesh::Union{StructuredMesh{3}, P4estMesh{3}, T8codeMesh{3}}, +function max_dt(backend::Nothing, u, t, + mesh::Union{StructuredMesh{3}, P4estMesh{3}, T8codeMesh{3}}, constant_speed::False, equations, dg::DG, cache) + # to avoid a division by zero if the speed vanishes everywhere, # e.g. for steady-state linear advection max_scaled_speed = nextfloat(zero(t)) - @unpack contravariant_vectors = cache.elements + @unpack contravariant_vectors, inverse_jacobian = cache.elements @batch reduction=(max, max_scaled_speed) for element in eachelement(dg, cache) - max_lambda1 = max_lambda2 = max_lambda3 = zero(max_scaled_speed) - for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) - u_node = get_node_vars(u, equations, dg, i, j, k, element) - lambda1, lambda2, lambda3 = max_abs_speeds(u_node, equations) + max_lambda = max_scaled_speed_element(u, typeof(mesh), equations, dg, + contravariant_vectors, inverse_jacobian, + element) + max_scaled_speed = max(max_scaled_speed, max_lambda) + end - Ja11, Ja12, Ja13 = get_contravariant_vector(1, contravariant_vectors, - i, j, k, element) - lambda1_transformed = abs(Ja11 * lambda1 + Ja12 * lambda2 + Ja13 * lambda3) - Ja21, Ja22, Ja23 = get_contravariant_vector(2, contravariant_vectors, - i, j, k, element) - lambda2_transformed = abs(Ja21 * lambda1 + Ja22 * lambda2 + Ja23 * lambda3) - Ja31, Ja32, Ja33 = get_contravariant_vector(3, contravariant_vectors, - i, j, k, element) - lambda3_transformed = abs(Ja31 * lambda1 + Ja32 * lambda2 + Ja33 * lambda3) + return 2 / (nnodes(dg) * max_scaled_speed) +end - inv_jacobian = abs(cache.elements.inverse_jacobian[i, j, k, element]) +function max_dt(backend::Backend, u, t, + mesh::Union{StructuredMesh{3}, P4estMesh{3}, T8codeMesh{3}}, + constant_speed::False, equations, dg::DG, cache) + @unpack contravariant_vectors, inverse_jacobian = cache.elements + num_elements = nelements(dg, cache) + max_scaled_speeds = allocate(backend, eltype(t), num_elements) - max_lambda1 = max(max_lambda1, inv_jacobian * lambda1_transformed) - max_lambda2 = max(max_lambda2, inv_jacobian * lambda2_transformed) - max_lambda3 = max(max_lambda3, inv_jacobian * lambda3_transformed) - end + kernel! = max_scaled_speed_KAkernel!(backend) + kernel!(max_scaled_speeds, u, typeof(mesh), equations, dg, contravariant_vectors, + inverse_jacobian; + ndrange = num_elements) - max_scaled_speed = max(max_scaled_speed, - max_lambda1 + max_lambda2 + max_lambda3) - end + # TODO GPU dt on CPU? (time integration happens on CPU) + max_scaled_speed = max(nextfloat(zero(t)), maximum(max_scaled_speeds)) return 2 / (nnodes(dg) * max_scaled_speed) end -function max_dt(u, t, mesh::Union{StructuredMesh{3}, P4estMesh{3}, T8codeMesh{3}}, +@kernel function max_scaled_speed_KAkernel!(max_scaled_speeds, u, meshT, equations, + dg, contravariant_vectors, inverse_jacobian) + element = @index(Global) + max_scaled_speeds[element] = max_scaled_speed_element(du, meshT, + equations, + surface_integral, dg, + surface_flux_values, element) +end + +function max_scaled_speed_element(u, + ::Type{<:Union{StructuredMesh{3}, P4estMesh{3}, + T8codeMesh{3}}}, equations, dg, + contravariant_vectors, inverse_jacobian, element) + max_lambda1 = max_lambda2 = max_lambda3 = zero(max_scaled_speed) + for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) + u_node = get_node_vars(u, equations, dg, i, j, k, element) + lambda1, lambda2, lambda3 = max_abs_speeds(u_node, equations) + + Ja11, Ja12, Ja13 = get_contravariant_vector(1, contravariant_vectors, + i, j, k, element) + lambda1_transformed = abs(Ja11 * lambda1 + Ja12 * lambda2 + Ja13 * lambda3) + Ja21, Ja22, Ja23 = get_contravariant_vector(2, contravariant_vectors, + i, j, k, element) + lambda2_transformed = abs(Ja21 * lambda1 + Ja22 * lambda2 + Ja23 * lambda3) + Ja31, Ja32, Ja33 = get_contravariant_vector(3, contravariant_vectors, + i, j, k, element) + lambda3_transformed = abs(Ja31 * lambda1 + Ja32 * lambda2 + Ja33 * lambda3) + + inv_jacobian = abs(inverse_jacobian[i, j, k, element]) + + max_lambda1 = max(max_lambda1, inv_jacobian * lambda1_transformed) + max_lambda2 = max(max_lambda2, inv_jacobian * lambda2_transformed) + max_lambda3 = max(max_lambda3, inv_jacobian * lambda3_transformed) + end + return max_lambda1 + max_lambda2 + max_lambda3 +end + +function max_dt(backend, u, t, + mesh::Union{StructuredMesh{3}, P4estMesh{3}, T8codeMesh{3}}, constant_speed::True, equations, dg::DG, cache) # to avoid a division by zero if the speed vanishes everywhere, # e.g. for steady-state linear advection max_scaled_speed = nextfloat(zero(t)) - @unpack contravariant_vectors = cache.elements + if backend isa Nothing # TODO GPU KA CPU backend as well + @unpack contravariant_vectors, inverse_jacobian = cache.elements + else + # TODO GPU is this sufficient? + contravariant_vectors = Array(cache.elements.contravariant_vectors) + inverse_jacobian = Array(cache.elements.inverse_jacobian) + end max_lambda1, max_lambda2, max_lambda3 = max_abs_speeds(equations) @@ -108,7 +152,7 @@ function max_dt(u, t, mesh::Union{StructuredMesh{3}, P4estMesh{3}, T8codeMesh{3} lambda3_transformed = abs(Ja31 * max_lambda1 + Ja32 * max_lambda2 + Ja33 * max_lambda3) - inv_jacobian = abs(cache.elements.inverse_jacobian[i, j, k, element]) + inv_jacobian = abs(inverse_jacobian[i, j, k, element]) max_scaled_speed = max(max_scaled_speed, inv_jacobian * @@ -120,7 +164,7 @@ function max_dt(u, t, mesh::Union{StructuredMesh{3}, P4estMesh{3}, T8codeMesh{3} return 2 / (nnodes(dg) * max_scaled_speed) end -function max_dt(u, t, mesh::ParallelP4estMesh{3}, +function max_dt(backend, u, t, mesh::ParallelP4estMesh{3}, constant_speed::False, equations, dg::DG, cache) # call the method accepting a general `mesh::P4estMesh{3}` # TODO: MPI, we should improve this; maybe we should dispatch on `u` @@ -137,7 +181,7 @@ function max_dt(u, t, mesh::ParallelP4estMesh{3}, return dt end -function max_dt(u, t, mesh::ParallelP4estMesh{3}, +function max_dt(backend, u, t, mesh::ParallelP4estMesh{3}, constant_speed::True, equations, dg::DG, cache) # call the method accepting a general `mesh::P4estMesh{3}` # TODO: MPI, we should improve this; maybe we should dispatch on `u` @@ -154,7 +198,7 @@ function max_dt(u, t, mesh::ParallelP4estMesh{3}, return dt end -function max_dt(u, t, mesh::ParallelT8codeMesh{3}, +function max_dt(backend, u, t, mesh::ParallelT8codeMesh{3}, constant_speed::False, equations, dg::DG, cache) # call the method accepting a general `mesh::T8codeMesh{3}` # TODO: MPI, we should improve this; maybe we should dispatch on `u` @@ -171,7 +215,7 @@ function max_dt(u, t, mesh::ParallelT8codeMesh{3}, return dt end -function max_dt(u, t, mesh::ParallelT8codeMesh{3}, +function max_dt(backend, u, t, mesh::ParallelT8codeMesh{3}, constant_speed::True, equations, dg::DG, cache) # call the method accepting a general `mesh::T8codeMesh{3}` # TODO: MPI, we should improve this; maybe we should dispatch on `u` diff --git a/src/solvers/dgmulti/dg.jl b/src/solvers/dgmulti/dg.jl index e3e01d42171..2be73e5e208 100644 --- a/src/solvers/dgmulti/dg.jl +++ b/src/solvers/dgmulti/dg.jl @@ -240,7 +240,7 @@ function dt_polydeg_scaling(dg::DGMulti{3, <:Wedge, <:TensorProductWedge}) end # for the stepsize callback -function max_dt(u, t, mesh::DGMultiMesh, +function max_dt(backend, u, t, mesh::DGMultiMesh, constant_speed::False, equations, dg::DGMulti{NDIMS}, cache) where {NDIMS} @unpack md = mesh @@ -263,7 +263,7 @@ function max_dt(u, t, mesh::DGMultiMesh, return 2 * dt_min * dt_polydeg_scaling(dg) end -function max_dt(u, t, mesh::DGMultiMesh, +function max_dt(backend, u, t, mesh::DGMultiMesh, constant_speed::True, equations, dg::DGMulti{NDIMS}, cache) where {NDIMS} @unpack md = mesh From fc13ea55f2c2fbde5a361e3d24109bfd49bf5470 Mon Sep 17 00:00:00 2001 From: Benedict Geihe Date: Thu, 18 Sep 2025 12:08:08 +0200 Subject: [PATCH 052/158] CPU workaround for analysis callback --- src/callbacks_step/analysis_dg2d.jl | 41 ++++++++++++++++++++++++----- src/callbacks_step/analysis_dg3d.jl | 40 +++++++++++++++++++++++----- src/callbacks_step/save_solution.jl | 9 ++++++- 3 files changed, 77 insertions(+), 13 deletions(-) diff --git a/src/callbacks_step/analysis_dg2d.jl b/src/callbacks_step/analysis_dg2d.jl index fa18c5af63a..0c4b1bc0b22 100644 --- a/src/callbacks_step/analysis_dg2d.jl +++ b/src/callbacks_step/analysis_dg2d.jl @@ -138,7 +138,7 @@ function calc_error_norms(func, u, t, analyzer, return l2_error, linf_error end -function calc_error_norms(func, u, t, analyzer, +function calc_error_norms(func, _u, t, analyzer, mesh::Union{StructuredMesh{2}, StructuredMeshView{2}, UnstructuredMesh2D, P4estMesh{2}, P4estMeshView{2}, @@ -146,9 +146,19 @@ function calc_error_norms(func, u, t, analyzer, equations, initial_condition, dg::DGSEM, cache, cache_analysis) @unpack vandermonde, weights = analyzer - @unpack node_coordinates, inverse_jacobian = cache.elements @unpack u_local, u_tmp1, x_local, x_tmp1, jacobian_local, jacobian_tmp1 = cache_analysis + # TODO GPU AnalysiCallback currently lives on CPU + backend = trixi_backend(_u) + if backend isa Nothing # TODO GPU KA CPU backend + @unpack node_coordinates, inverse_jacobian = cache.elements + u = _u + else + node_coordinates = Array(cache.elements.node_coordinates) + inverse_jacobian = Array(cache.elements.inverse_jacobian) + u = Array(_u) + end + # Set up data structures l2_error = zero(func(get_node_vars(u, equations, dg, 1, 1, 1), equations)) linf_error = copy(l2_error) @@ -210,13 +220,23 @@ function integrate_via_indices(func::Func, u, return integral end -function integrate_via_indices(func::Func, u, +function integrate_via_indices(func::Func, _u, mesh::Union{StructuredMesh{2}, StructuredMeshView{2}, UnstructuredMesh2D, P4estMesh{2}, T8codeMesh{2}}, equations, dg::DGSEM, cache, args...; normalize = true) where {Func} - @unpack weights = dg.basis + # TODO GPU AnalysiCallback currently lives on CPU + backend = trixi_backend(_u) + if backend isa Nothing # TODO GPU KA CPU backend + @unpack weights = dg.basis + @unpack inverse_jacobian = cache.elements + u = _u + else + weights = Array(dg.basis.weights) + inverse_jacobian = Array(cache.elements.inverse_jacobian) + u = Array(_u) + end # Initialize integral with zeros of the right shape integral = zero(func(u, 1, 1, 1, equations, dg, args...)) @@ -226,7 +246,7 @@ function integrate_via_indices(func::Func, u, @batch reduction=((+, integral), (+, total_volume)) for element in eachelement(dg, cache) for j in eachnode(dg), i in eachnode(dg) - volume_jacobian = abs(inv(cache.elements.inverse_jacobian[i, j, element])) + volume_jacobian = abs(inv(inverse_jacobian[i, j, element])) integral += volume_jacobian * weights[i] * weights[j] * func(u, i, j, element, equations, dg, args...) total_volume += volume_jacobian * weights[i] * weights[j] @@ -271,10 +291,19 @@ function integrate(func::Func, u, end end -function analyze(::typeof(entropy_timederivative), du, u, t, +function analyze(::typeof(entropy_timederivative), _du, u, t, mesh::Union{TreeMesh{2}, StructuredMesh{2}, StructuredMeshView{2}, UnstructuredMesh2D, P4estMesh{2}, T8codeMesh{2}}, equations, dg::DG, cache) + # TODO GPU AnalysiCallback currently lives on CPU + backend = trixi_backend(u) + if backend isa Nothing # TODO GPU KA CPU backend + du = _du + else + du = Array(_du) + end + + # Calculate # Calculate ∫(∂S/∂u ⋅ ∂u/∂t)dΩ integrate_via_indices(u, mesh, equations, dg, cache, du) do u, i, j, element, equations, dg, du diff --git a/src/callbacks_step/analysis_dg3d.jl b/src/callbacks_step/analysis_dg3d.jl index 072ffc16096..d9bd08a868d 100644 --- a/src/callbacks_step/analysis_dg3d.jl +++ b/src/callbacks_step/analysis_dg3d.jl @@ -161,14 +161,24 @@ function calc_error_norms(func, u, t, analyzer, return l2_error, linf_error end -function calc_error_norms(func, u, t, analyzer, +function calc_error_norms(func, _u, t, analyzer, mesh::Union{StructuredMesh{3}, P4estMesh{3}, T8codeMesh{3}}, equations, initial_condition, dg::DGSEM, cache, cache_analysis) @unpack vandermonde, weights = analyzer - @unpack node_coordinates, inverse_jacobian = cache.elements @unpack u_local, u_tmp1, u_tmp2, x_local, x_tmp1, x_tmp2, jacobian_local, jacobian_tmp1, jacobian_tmp2 = cache_analysis + # TODO GPU AnalysiCallback currently lives on CPU + backend = trixi_backend(_u) + if backend isa Nothing # TODO GPU KA CPU backend + @unpack node_coordinates, inverse_jacobian = cache.elements + u = _u + else + node_coordinates = Array(cache.elements.node_coordinates) + inverse_jacobian = Array(cache.elements.inverse_jacobian) + u = Array(_u) + end + # Set up data structures l2_error = zero(func(get_node_vars(u, equations, dg, 1, 1, 1, 1), equations)) linf_error = copy(l2_error) @@ -234,12 +244,22 @@ function integrate_via_indices(func::Func, u, return integral end -function integrate_via_indices(func::Func, u, +function integrate_via_indices(func::Func, _u, mesh::Union{StructuredMesh{3}, P4estMesh{3}, T8codeMesh{3}}, equations, dg::DGSEM, cache, args...; normalize = true) where {Func} - @unpack weights = dg.basis + # TODO GPU AnalysiCallback currently lives on CPU + backend = trixi_backend(_u) + if backend isa Nothing # TODO GPU KA CPU backend + @unpack weights = dg.basis + @unpack inverse_jacobian = cache.elements + u = _u + else + weights = Array(dg.basis.weights) + inverse_jacobian = Array(cache.elements.inverse_jacobian) + u = Array(_u) + end # Initialize integral with zeros of the right shape integral = zero(func(u, 1, 1, 1, 1, equations, dg, args...)) @@ -249,7 +269,7 @@ function integrate_via_indices(func::Func, u, @batch reduction=((+, integral), (+, total_volume)) for element in eachelement(dg, cache) for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) - volume_jacobian = abs(inv(cache.elements.inverse_jacobian[i, j, k, element])) + volume_jacobian = abs(inv(inverse_jacobian[i, j, k, element])) integral += volume_jacobian * weights[i] * weights[j] * weights[k] * func(u, i, j, k, element, equations, dg, args...) total_volume += volume_jacobian * weights[i] * weights[j] * weights[k] @@ -295,10 +315,18 @@ function integrate(func::Func, u, end end -function analyze(::typeof(entropy_timederivative), du, u, t, +function analyze(::typeof(entropy_timederivative), _du, u, t, mesh::Union{TreeMesh{3}, StructuredMesh{3}, P4estMesh{3}, T8codeMesh{3}}, equations, dg::DG, cache) + # TODO GPU AnalysiCallback currently lives on CPU + backend = trixi_backend(u) + if backend isa Nothing # TODO GPU KA CPU backend + du = _du + else + du = Array(_du) + end + # Calculate ∫(∂S/∂u ⋅ ∂u/∂t)dΩ integrate_via_indices(u, mesh, equations, dg, cache, du) do u, i, j, k, element, equations, dg, du diff --git a/src/callbacks_step/save_solution.jl b/src/callbacks_step/save_solution.jl index ac40bc42de0..71196d6fe1f 100644 --- a/src/callbacks_step/save_solution.jl +++ b/src/callbacks_step/save_solution.jl @@ -280,11 +280,18 @@ end return nothing end -@inline function save_solution_file(u_ode, t, dt, iter, +@inline function save_solution_file(_u_ode, t, dt, iter, semi::AbstractSemidiscretization, solution_callback, element_variables = Dict{Symbol, Any}(), node_variables = Dict{Symbol, Any}(); system = "") + # TODO GPU currently on CPU + backend = trixi_backend(_u_ode) + if backend isa Nothing # TODO GPU KA CPU backend + u_ode = _u_ode + else + u_ode = Array(_u_ode) + end mesh, equations, solver, cache = mesh_equations_solver_cache(semi) u = wrap_array_native(u_ode, mesh, equations, solver, cache) save_solution_file(u, t, dt, iter, mesh, equations, solver, cache, From 2ff2f529b4f7db08828aab475e20e9080896408e Mon Sep 17 00:00:00 2001 From: Benedict Geihe Date: Thu, 18 Sep 2025 12:09:17 +0200 Subject: [PATCH 053/158] tests --- .../elixir_advection_basic_gpu.jl | 5 +- .../elixir_advection_basic_gpu.jl | 60 +++++++++++++++ test/runtests.jl | 3 +- test/{test_cuda.jl => test_cuda_2d.jl} | 7 +- test/test_cuda_3d.jl | 73 +++++++++++++++++++ 5 files changed, 142 insertions(+), 6 deletions(-) create mode 100644 examples/p4est_3d_dgsem/elixir_advection_basic_gpu.jl rename test/{test_cuda.jl => test_cuda_2d.jl} (98%) create mode 100644 test/test_cuda_3d.jl diff --git a/examples/p4est_2d_dgsem/elixir_advection_basic_gpu.jl b/examples/p4est_2d_dgsem/elixir_advection_basic_gpu.jl index 6f9e8e56986..ac3934eca7a 100644 --- a/examples/p4est_2d_dgsem/elixir_advection_basic_gpu.jl +++ b/examples/p4est_2d_dgsem/elixir_advection_basic_gpu.jl @@ -48,9 +48,8 @@ save_solution = SaveSolutionCallback(interval = 100, stepsize_callback = StepsizeCallback(cfl = 1.6) # Create a CallbackSet to collect all callbacks such that they can be passed to the ODE solver -callbacks = CallbackSet(summary_callback, stepsize_callback) -# TODO: GPU. The `analysis_callback` needs to be updated for GPU support -# analysis_callback, save_solution, stepsize_callback) +callbacks = CallbackSet(summary_callback, analysis_callback, + save_solution, stepsize_callback) ############################################################################### # run the simulation diff --git a/examples/p4est_3d_dgsem/elixir_advection_basic_gpu.jl b/examples/p4est_3d_dgsem/elixir_advection_basic_gpu.jl new file mode 100644 index 00000000000..801ae4cb6bc --- /dev/null +++ b/examples/p4est_3d_dgsem/elixir_advection_basic_gpu.jl @@ -0,0 +1,60 @@ +# The same setup as tree_3d_dgsem/elixir_advection_basic.jl +# to verify the P4estMesh implementation against TreeMesh + +using OrdinaryDiffEqLowStorageRK +using Trixi + +############################################################################### +# semidiscretization of the linear advection equation + +advection_velocity = (0.2, -0.7, 0.5) +equations = LinearScalarAdvectionEquation3D(advection_velocity) + +# Create DG solver with polynomial degree = 3 and (local) Lax-Friedrichs/Rusanov flux as surface flux +solver = DGSEM(polydeg = 3, surface_flux = flux_lax_friedrichs) + +coordinates_min = (-1.0, -1.0, -1.0) # minimum coordinates (min(x), min(y), min(z)) +coordinates_max = (1.0, 1.0, 1.0) # maximum coordinates (max(x), max(y), max(z)) + +# Create P4estMesh with 8 x 8 x 8 elements (note `refinement_level=1`) +trees_per_dimension = (4, 4, 4) +mesh = P4estMesh(trees_per_dimension, polydeg = 3, + coordinates_min = coordinates_min, coordinates_max = coordinates_max, + initial_refinement_level = 1) + +# A semidiscretization collects data structures and functions for the spatial discretization +semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition_convergence_test, + solver) + +############################################################################### +# ODE solvers, callbacks etc. + +# Create ODE problem with time span from 0.0 to 1.0 +tspan = (0.0, 1.0) +ode = semidiscretize(semi, tspan; real_type = nothing, storage_type = nothing) + +# At the beginning of the main loop, the SummaryCallback prints a summary of the simulation setup +# and resets the timers +summary_callback = SummaryCallback() + +# The AnalysisCallback allows to analyse the solution in regular intervals and prints the results +analysis_callback = AnalysisCallback(semi, interval = 100) + +# The SaveSolutionCallback allows to save the solution to a file in regular intervals +save_solution = SaveSolutionCallback(interval = 100, + solution_variables = cons2prim) + +# The StepsizeCallback handles the re-calculation of the maximum Δt after each time step +stepsize_callback = StepsizeCallback(cfl = 1.2) + +# Create a CallbackSet to collect all callbacks such that they can be passed to the ODE solver +callbacks = CallbackSet(summary_callback, analysis_callback, + save_solution, stepsize_callback) + +############################################################################### +# run the simulation + +# OrdinaryDiffEq's `solve` method evolves the solution in time and executes the passed callbacks +sol = solve(ode, CarpenterKennedy2N54(williamson_condition = false); + dt = 0.05, # solve needs some value here but it will be overwritten by the stepsize_callback + ode_default_options()..., callback = callbacks); diff --git a/test/runtests.jl b/test/runtests.jl index 8f35e1fb58d..df348546130 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -113,7 +113,8 @@ const TRIXI_NTHREADS = clamp(Sys.CPU_THREADS, 2, 3) @time if TRIXI_TEST == "all" || TRIXI_TEST == "CUDA" import CUDA if CUDA.functional() - include("test_cuda.jl") + include("test_cuda_2d.jl") + include("test_cuda_3d.jl") else @warn "Unable to run CUDA tests on this machine" end diff --git a/test/test_cuda.jl b/test/test_cuda_2d.jl similarity index 98% rename from test/test_cuda.jl rename to test/test_cuda_2d.jl index 4380ab0e111..da628f890cb 100644 --- a/test/test_cuda.jl +++ b/test/test_cuda_2d.jl @@ -5,11 +5,14 @@ using Trixi include("test_trixi.jl") +EXAMPLES_DIR = joinpath(examples_dir(), "p4est_2d_dgsem") + # Start with a clean environment: remove Trixi.jl output directory if it exists outdir = "out" isdir(outdir) && rm(outdir, recursive = true) -EXAMPLES_DIR = joinpath(examples_dir(), "p4est_2d_dgsem") +@testset "CUDA 2D" begin +#! format: noindent @trixi_testset "elixir_advection_basic_gpu.jl native" begin @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_basic_gpu.jl"), @@ -75,5 +78,5 @@ end # Clean up afterwards: delete Trixi.jl output directory @test_nowarn isdir(outdir) && rm(outdir, recursive = true) - +end end # module diff --git a/test/test_cuda_3d.jl b/test/test_cuda_3d.jl new file mode 100644 index 00000000000..f4281e880e4 --- /dev/null +++ b/test/test_cuda_3d.jl @@ -0,0 +1,73 @@ +module TestCUDA + +using Test +using Trixi + +include("test_trixi.jl") + +EXAMPLES_DIR = joinpath(examples_dir(), "p4est_3d_dgsem") + +# Start with a clean environment: remove Trixi.jl output directory if it exists +outdir = "out" +isdir(outdir) && rm(outdir, recursive = true) + +@testset "CUDA 3D" begin +#! format: noindent + +@trixi_testset "elixir_advection_basic_gpu.jl native" begin + @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_basic_gpu.jl"), + # Expected errors are exactly the same as with TreeMesh! + l2=[0.00016263963870641478], + linf=[0.0014537194925779984]) + # Ensure that we do not have excessive memory allocations + # (e.g., from type instabilities) + let + t = sol.t[end] + u_ode = sol.u[end] + du_ode = similar(u_ode) + @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000 + end + @test real(ode.p.solver) == Float64 + @test real(ode.p.solver.basis) == Float64 + @test real(ode.p.solver.mortar) == Float64 + # TODO: remake ignores the mesh itself as well + @test real(ode.p.mesh) == Float64 + + @test ode.u0 isa Array + @test ode.p.solver.basis.derivative_matrix isa Array + + @test Trixi.storage_type(ode.p.cache.elements) === Array + @test Trixi.storage_type(ode.p.cache.interfaces) === Array + @test Trixi.storage_type(ode.p.cache.boundaries) === Array + @test Trixi.storage_type(ode.p.cache.mortars) === Array +end + +@trixi_testset "elixir_advection_basic_gpu.jl Float32 / CUDA" begin + # Using CUDA inside the testset since otherwise the bindings are hiddend by the anonymous modules + using CUDA + @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_basic_gpu.jl"), + # Expected errors similar to reference on CPU + l2=[Float32(0.00016263963870641478)], + linf=[Float32(0.0014537194925779984)], + RealT=Float32, + real_type=Float32, + storage_type=CuArray) + @test real(ode.p.solver) == Float32 + @test real(ode.p.solver.basis) == Float32 + @test real(ode.p.solver.mortar) == Float32 + # TODO: remake ignores the mesh itself as well + @test real(ode.p.mesh) == Float64 + + @test ode.u0 isa CuArray + @test ode.p.solver.basis.derivative_matrix isa CuArray + + @test Trixi.storage_type(ode.p.cache.elements) === CuArray + @test Trixi.storage_type(ode.p.cache.interfaces) === CuArray + @test Trixi.storage_type(ode.p.cache.boundaries) === CuArray + @test Trixi.storage_type(ode.p.cache.mortars) === CuArray +end + +# Clean up afterwards: delete Trixi.jl output directory +@test_nowarn isdir(outdir) && rm(outdir, recursive = true) +end +end # module From bc4ad17b482ed85976397043031d5cc9f7fec739 Mon Sep 17 00:00:00 2001 From: Benedict Geihe Date: Fri, 19 Sep 2025 09:38:22 +0200 Subject: [PATCH 054/158] add benchmark --- benchmark/CUDA/Project.toml | 6 ++ .../CUDA/elixir_euler_taylor_green_vortex.jl | 82 +++++++++++++++++++ benchmark/CUDA/run.jl | 78 ++++++++++++++++++ 3 files changed, 166 insertions(+) create mode 100644 benchmark/CUDA/Project.toml create mode 100644 benchmark/CUDA/elixir_euler_taylor_green_vortex.jl create mode 100644 benchmark/CUDA/run.jl diff --git a/benchmark/CUDA/Project.toml b/benchmark/CUDA/Project.toml new file mode 100644 index 00000000000..221c03a5947 --- /dev/null +++ b/benchmark/CUDA/Project.toml @@ -0,0 +1,6 @@ +[deps] +CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" +JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" +OrdinaryDiffEqLowStorageRK = "b0944070-b475-4768-8dec-fb6eb410534d" +TimerOutputs = "a759f4b9-e2f1-59dc-863e-4aeb61b1ea8f" +Trixi = "a7f1ee26-1774-49b1-8366-f1abc58fbfcb" diff --git a/benchmark/CUDA/elixir_euler_taylor_green_vortex.jl b/benchmark/CUDA/elixir_euler_taylor_green_vortex.jl new file mode 100644 index 00000000000..2b4275afc86 --- /dev/null +++ b/benchmark/CUDA/elixir_euler_taylor_green_vortex.jl @@ -0,0 +1,82 @@ +using OrdinaryDiffEqLowStorageRK +using Trixi + +############################################################################### +# semidiscretization of the compressible Euler equations + +equations = CompressibleEulerEquations3D(1.4) + +function initial_condition_taylor_green_vortex(x, t, + equations::CompressibleEulerEquations3D) + A = 1.0 # magnitude of speed + Ms = 0.1 # maximum Mach number + + rho = 1.0 + v1 = A * sin(x[1]) * cos(x[2]) * cos(x[3]) + v2 = -A * cos(x[1]) * sin(x[2]) * cos(x[3]) + v3 = 0.0 + p = (A / Ms)^2 * rho / equations.gamma # scaling to get Ms + p = p + 1.0/16.0 * A^2 * rho * (cos(2*x[1])*cos(2*x[3]) + + 2*cos(2*x[2]) + 2*cos(2*x[1]) + cos(2*x[2])*cos(2*x[3])) + + return prim2cons(SVector(rho, v1, v2, v3, p), equations) +end + +initial_condition = initial_condition_taylor_green_vortex + +# TODO Undefined external symbol "log" +#volume_flux = flux_ranocha +volume_flux = flux_lax_friedrichs +solver = DGSEM(polydeg=5, surface_flux=volume_flux) +# TODO flux diff + #volume_integral=VolumeIntegralFluxDifferencing(volume_flux)) + +coordinates_min = (-1.0, -1.0, -1.0) .* pi +coordinates_max = ( 1.0, 1.0, 1.0) .* pi + +initial_refinement_level = 1 +trees_per_dimension = (4, 4, 4) + +mesh = P4estMesh(trees_per_dimension, polydeg=1, + coordinates_min=coordinates_min, coordinates_max=coordinates_max, + periodicity=true, initial_refinement_level=initial_refinement_level) + +semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver) + + +############################################################################### +# ODE solvers, callbacks etc. + +tspan = (0.0, 100.0) +ode = semidiscretize(semi, tspan; storage_type=nothing, real_type=nothing) + +summary_callback = SummaryCallback() + +stepsize_callback = StepsizeCallback(cfl=0.1) + +callbacks = CallbackSet(summary_callback, + stepsize_callback) + + +############################################################################### +# run the simulation + +maxiters = 200 +run_profiler = false + +# disable warnings when maxiters is reached +integrator = init(ode, CarpenterKennedy2N54(williamson_condition=false), + dt=1.0, + save_everystep=false, callback=callbacks, + maxiters=maxiters, verbose=false) +if run_profiler + prof_result = CUDA.@profile solve!(integrator) + # the internal profiler will return the results to be printed + if isa(prof_result, CUDA.Profile.ProfileResults) + print(prof_result) + end +else + solve!(integrator) +end + +finalize(mesh) diff --git a/benchmark/CUDA/run.jl b/benchmark/CUDA/run.jl new file mode 100644 index 00000000000..cc1b62306f0 --- /dev/null +++ b/benchmark/CUDA/run.jl @@ -0,0 +1,78 @@ +using Trixi +using CUDA +using TimerOutputs +using JSON + +function main(elixir_path) + + # setup + maxiters = 10 + initial_refinement_level = 3 + storage_type = CuArray + real_type = Float64 + + println("Warming up...") + + # start simulation with tiny final time to trigger compilation + duration_compile = @elapsed begin + trixi_include(elixir_path, + tspan=(0.0, 1e-14), + storage_type=storage_type, + real_type=real_type) + trixi_include(elixir_path, + tspan=(0.0, 1e-14), + storage_type=storage_type, + real_type=Float32) + end + + println("Finished warm-up in $duration_compile seconds\n") + println("Starting simulation...") + + # start the real simulation + duration_elixir = @elapsed trixi_include(elixir_path, + maxiters=maxiters, + initial_refinement_level=initial_refinement_level, + storage_type=storage_type, + real_type=real_type) + + # store metrics (on every rank!) + metrics = Dict{String, Float64}("elapsed time" => duration_elixir) + + # read TimerOutputs timings + timer = Trixi.timer() + metrics["total time"] = 1.0e-9 * TimerOutputs.tottime(timer) + metrics["rhs! time"] = 1.0e-9 * TimerOutputs.time(timer["rhs!"]) + + # compute performance index + nrhscalls = Trixi.ncalls(semi.performance_counter) + walltime = 1.0e-9 * take!(semi.performance_counter) + metrics["PID"] = walltime * Trixi.mpi_nranks() / (Trixi.ndofsglobal(semi) * nrhscalls) + + # write json file + open("metrics.out", "w") do f + indent = 2 + JSON.print(f, metrics, indent) + end + + # run profiler + println("Running profiler (Float64)...") + trixi_include(elixir_path, + maxiters=5, + initial_refinement_level=initial_refinement_level, + storage_type=storage_type, + real_type=Float64, + run_profiler=true) + + println("Running profiler (Float32)...") + trixi_include(elixir_path, + maxiters=5, + initial_refinement_level=initial_refinement_level, + storage_type=storage_type, + real_type=Float32, + run_profiler=true) +end + +# hardcoded elixir +elixir_path = joinpath(@__DIR__(), "elixir_euler_taylor_green_vortex.jl") + +main(elixir_path) From de06c618980623845f67f913bf248f64599ccf3c Mon Sep 17 00:00:00 2001 From: Benedict Geihe Date: Fri, 19 Sep 2025 09:38:56 +0200 Subject: [PATCH 055/158] fix max_dt --- src/Trixi.jl | 2 +- src/callbacks_step/stepsize_dg3d.jl | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/Trixi.jl b/src/Trixi.jl index 9412c33db6f..e0d4f2dc24b 100644 --- a/src/Trixi.jl +++ b/src/Trixi.jl @@ -59,7 +59,7 @@ using DiffEqCallbacks: PeriodicCallback, PeriodicCallbackAffect using FillArrays: Ones, Zeros using ForwardDiff: ForwardDiff using HDF5: HDF5, h5open, attributes, create_dataset, datatype, dataspace -using KernelAbstractions: KernelAbstractions, @index, @kernel, get_backend, Backend +using KernelAbstractions: KernelAbstractions, @index, @kernel, get_backend, Backend, allocate using LinearMaps: LinearMap if _PREFERENCE_LOOPVECTORIZATION using LoopVectorization: LoopVectorization, @turbo, indices diff --git a/src/callbacks_step/stepsize_dg3d.jl b/src/callbacks_step/stepsize_dg3d.jl index 159dca720d6..c609b0a5fe4 100644 --- a/src/callbacks_step/stepsize_dg3d.jl +++ b/src/callbacks_step/stepsize_dg3d.jl @@ -86,17 +86,17 @@ end @kernel function max_scaled_speed_KAkernel!(max_scaled_speeds, u, meshT, equations, dg, contravariant_vectors, inverse_jacobian) element = @index(Global) - max_scaled_speeds[element] = max_scaled_speed_element(du, meshT, - equations, - surface_integral, dg, - surface_flux_values, element) + max_scaled_speeds[element] = max_scaled_speed_element(u, meshT, equations, dg, + contravariant_vectors, + inverse_jacobian, + element) end function max_scaled_speed_element(u, ::Type{<:Union{StructuredMesh{3}, P4estMesh{3}, T8codeMesh{3}}}, equations, dg, contravariant_vectors, inverse_jacobian, element) - max_lambda1 = max_lambda2 = max_lambda3 = zero(max_scaled_speed) + max_lambda1 = max_lambda2 = max_lambda3 = zero(eltype(u)) for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) u_node = get_node_vars(u, equations, dg, i, j, k, element) lambda1, lambda2, lambda3 = max_abs_speeds(u_node, equations) From 29298a5a069e806ed21aa91fdb4e71af0081be32 Mon Sep 17 00:00:00 2001 From: Benedict Geihe Date: Thu, 25 Sep 2025 21:41:37 +0200 Subject: [PATCH 056/158] profiler output --- benchmark/CUDA/Project.toml | 6 ------ .../CUDA/elixir_euler_taylor_green_vortex.jl | 5 +---- benchmark/CUDA/run.jl | 17 ++++++++++++++--- 3 files changed, 15 insertions(+), 13 deletions(-) delete mode 100644 benchmark/CUDA/Project.toml diff --git a/benchmark/CUDA/Project.toml b/benchmark/CUDA/Project.toml deleted file mode 100644 index 221c03a5947..00000000000 --- a/benchmark/CUDA/Project.toml +++ /dev/null @@ -1,6 +0,0 @@ -[deps] -CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" -JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" -OrdinaryDiffEqLowStorageRK = "b0944070-b475-4768-8dec-fb6eb410534d" -TimerOutputs = "a759f4b9-e2f1-59dc-863e-4aeb61b1ea8f" -Trixi = "a7f1ee26-1774-49b1-8366-f1abc58fbfcb" diff --git a/benchmark/CUDA/elixir_euler_taylor_green_vortex.jl b/benchmark/CUDA/elixir_euler_taylor_green_vortex.jl index 2b4275afc86..4e9c777fe7c 100644 --- a/benchmark/CUDA/elixir_euler_taylor_green_vortex.jl +++ b/benchmark/CUDA/elixir_euler_taylor_green_vortex.jl @@ -71,12 +71,9 @@ integrator = init(ode, CarpenterKennedy2N54(williamson_condition=false), maxiters=maxiters, verbose=false) if run_profiler prof_result = CUDA.@profile solve!(integrator) - # the internal profiler will return the results to be printed - if isa(prof_result, CUDA.Profile.ProfileResults) - print(prof_result) - end else solve!(integrator) + prof_result = nothing end finalize(mesh) diff --git a/benchmark/CUDA/run.jl b/benchmark/CUDA/run.jl index cc1b62306f0..d42fac4af23 100644 --- a/benchmark/CUDA/run.jl +++ b/benchmark/CUDA/run.jl @@ -6,7 +6,7 @@ using JSON function main(elixir_path) # setup - maxiters = 10 + maxiters = 50 initial_refinement_level = 3 storage_type = CuArray real_type = Float64 @@ -55,21 +55,32 @@ function main(elixir_path) end # run profiler + maxiters = 5 + initial_refinement_level = 2 + println("Running profiler (Float64)...") trixi_include(elixir_path, - maxiters=5, + maxiters=maxiters, initial_refinement_level=initial_refinement_level, storage_type=storage_type, real_type=Float64, run_profiler=true) + open("profile_float64.txt", "w") do io + show(io, prof_result) + end + println("Running profiler (Float32)...") trixi_include(elixir_path, - maxiters=5, + maxiters=maxiters, initial_refinement_level=initial_refinement_level, storage_type=storage_type, real_type=Float32, run_profiler=true) + + open("profile_float32.txt", "w") do io + show(io, prof_result) + end end # hardcoded elixir From 962a383a520a28eb5ec5392a9f3e3b497babfe98 Mon Sep 17 00:00:00 2001 From: Benedict Geihe Date: Mon, 29 Sep 2025 15:43:02 +0200 Subject: [PATCH 057/158] fmt --- .../CUDA/elixir_euler_taylor_green_vortex.jl | 42 +++++++++--------- benchmark/CUDA/run.jl | 44 +++++++++---------- src/Trixi.jl | 3 +- src/solvers/dgsem_p4est/dg_3d.jl | 4 +- 4 files changed, 47 insertions(+), 46 deletions(-) diff --git a/benchmark/CUDA/elixir_euler_taylor_green_vortex.jl b/benchmark/CUDA/elixir_euler_taylor_green_vortex.jl index 4e9c777fe7c..de491a3761b 100644 --- a/benchmark/CUDA/elixir_euler_taylor_green_vortex.jl +++ b/benchmark/CUDA/elixir_euler_taylor_green_vortex.jl @@ -8,16 +8,18 @@ equations = CompressibleEulerEquations3D(1.4) function initial_condition_taylor_green_vortex(x, t, equations::CompressibleEulerEquations3D) - A = 1.0 # magnitude of speed + A = 1.0 # magnitude of speed Ms = 0.1 # maximum Mach number rho = 1.0 - v1 = A * sin(x[1]) * cos(x[2]) * cos(x[3]) - v2 = -A * cos(x[1]) * sin(x[2]) * cos(x[3]) - v3 = 0.0 - p = (A / Ms)^2 * rho / equations.gamma # scaling to get Ms - p = p + 1.0/16.0 * A^2 * rho * (cos(2*x[1])*cos(2*x[3]) + - 2*cos(2*x[2]) + 2*cos(2*x[1]) + cos(2*x[2])*cos(2*x[3])) + v1 = A * sin(x[1]) * cos(x[2]) * cos(x[3]) + v2 = -A * cos(x[1]) * sin(x[2]) * cos(x[3]) + v3 = 0.0 + p = (A / Ms)^2 * rho / equations.gamma # scaling to get Ms + p = p + + 1.0 / 16.0 * A^2 * rho * + (cos(2 * x[1]) * cos(2 * x[3]) + + 2 * cos(2 * x[2]) + 2 * cos(2 * x[1]) + cos(2 * x[2]) * cos(2 * x[3])) return prim2cons(SVector(rho, v1, v2, v3, p), equations) end @@ -27,37 +29,35 @@ initial_condition = initial_condition_taylor_green_vortex # TODO Undefined external symbol "log" #volume_flux = flux_ranocha volume_flux = flux_lax_friedrichs -solver = DGSEM(polydeg=5, surface_flux=volume_flux) +solver = DGSEM(polydeg = 5, surface_flux = volume_flux) # TODO flux diff - #volume_integral=VolumeIntegralFluxDifferencing(volume_flux)) +#volume_integral=VolumeIntegralFluxDifferencing(volume_flux)) coordinates_min = (-1.0, -1.0, -1.0) .* pi -coordinates_max = ( 1.0, 1.0, 1.0) .* pi +coordinates_max = (1.0, 1.0, 1.0) .* pi initial_refinement_level = 1 trees_per_dimension = (4, 4, 4) -mesh = P4estMesh(trees_per_dimension, polydeg=1, - coordinates_min=coordinates_min, coordinates_max=coordinates_max, - periodicity=true, initial_refinement_level=initial_refinement_level) +mesh = P4estMesh(trees_per_dimension, polydeg = 1, + coordinates_min = coordinates_min, coordinates_max = coordinates_max, + periodicity = true, initial_refinement_level = initial_refinement_level) semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver) - ############################################################################### # ODE solvers, callbacks etc. tspan = (0.0, 100.0) -ode = semidiscretize(semi, tspan; storage_type=nothing, real_type=nothing) +ode = semidiscretize(semi, tspan; storage_type = nothing, real_type = nothing) summary_callback = SummaryCallback() -stepsize_callback = StepsizeCallback(cfl=0.1) +stepsize_callback = StepsizeCallback(cfl = 0.1) callbacks = CallbackSet(summary_callback, stepsize_callback) - ############################################################################### # run the simulation @@ -65,10 +65,10 @@ maxiters = 200 run_profiler = false # disable warnings when maxiters is reached -integrator = init(ode, CarpenterKennedy2N54(williamson_condition=false), - dt=1.0, - save_everystep=false, callback=callbacks, - maxiters=maxiters, verbose=false) +integrator = init(ode, CarpenterKennedy2N54(williamson_condition = false), + dt = 1.0, + save_everystep = false, callback = callbacks, + maxiters = maxiters, verbose = false) if run_profiler prof_result = CUDA.@profile solve!(integrator) else diff --git a/benchmark/CUDA/run.jl b/benchmark/CUDA/run.jl index d42fac4af23..5b9f318bfdb 100644 --- a/benchmark/CUDA/run.jl +++ b/benchmark/CUDA/run.jl @@ -16,13 +16,13 @@ function main(elixir_path) # start simulation with tiny final time to trigger compilation duration_compile = @elapsed begin trixi_include(elixir_path, - tspan=(0.0, 1e-14), - storage_type=storage_type, - real_type=real_type) + tspan = (0.0, 1e-14), + storage_type = storage_type, + real_type = real_type) trixi_include(elixir_path, - tspan=(0.0, 1e-14), - storage_type=storage_type, - real_type=Float32) + tspan = (0.0, 1e-14), + storage_type = storage_type, + real_type = Float32) end println("Finished warm-up in $duration_compile seconds\n") @@ -30,10 +30,10 @@ function main(elixir_path) # start the real simulation duration_elixir = @elapsed trixi_include(elixir_path, - maxiters=maxiters, - initial_refinement_level=initial_refinement_level, - storage_type=storage_type, - real_type=real_type) + maxiters = maxiters, + initial_refinement_level = initial_refinement_level, + storage_type = storage_type, + real_type = real_type) # store metrics (on every rank!) metrics = Dict{String, Float64}("elapsed time" => duration_elixir) @@ -60,26 +60,26 @@ function main(elixir_path) println("Running profiler (Float64)...") trixi_include(elixir_path, - maxiters=maxiters, - initial_refinement_level=initial_refinement_level, - storage_type=storage_type, - real_type=Float64, - run_profiler=true) + maxiters = maxiters, + initial_refinement_level = initial_refinement_level, + storage_type = storage_type, + real_type = Float64, + run_profiler = true) open("profile_float64.txt", "w") do io - show(io, prof_result) + show(io, prof_result) end println("Running profiler (Float32)...") trixi_include(elixir_path, - maxiters=maxiters, - initial_refinement_level=initial_refinement_level, - storage_type=storage_type, - real_type=Float32, - run_profiler=true) + maxiters = maxiters, + initial_refinement_level = initial_refinement_level, + storage_type = storage_type, + real_type = Float32, + run_profiler = true) open("profile_float32.txt", "w") do io - show(io, prof_result) + show(io, prof_result) end end diff --git a/src/Trixi.jl b/src/Trixi.jl index e94d7fdbe68..289e48c572e 100644 --- a/src/Trixi.jl +++ b/src/Trixi.jl @@ -59,7 +59,8 @@ using DiffEqCallbacks: PeriodicCallback, PeriodicCallbackAffect using FillArrays: Ones, Zeros using ForwardDiff: ForwardDiff using HDF5: HDF5, h5open, attributes, create_dataset, datatype, dataspace -using KernelAbstractions: KernelAbstractions, @index, @kernel, get_backend, Backend, allocate +using KernelAbstractions: KernelAbstractions, @index, @kernel, get_backend, Backend, + allocate using LinearMaps: LinearMap if _PREFERENCE_LOOPVECTORIZATION using LoopVectorization: LoopVectorization, @turbo, indices diff --git a/src/solvers/dgsem_p4est/dg_3d.jl b/src/solvers/dgsem_p4est/dg_3d.jl index 510f4d3c717..8013bb6d8db 100644 --- a/src/solvers/dgsem_p4est/dg_3d.jl +++ b/src/solvers/dgsem_p4est/dg_3d.jl @@ -237,14 +237,14 @@ end @kernel function calc_interface_flux_KAkernel!(surface_flux_values, meshT, nonconservative_terms, equations, - surface_integral, solverT, u_inferface, + surface_integral, solverT, u_interface, neighbor_ids, node_indices, contravariant_vectors, index_range) interface = @index(Global) calc_interface_flux_interface!(surface_flux_values, meshT, nonconservative_terms, - equations, surface_integral, solverT, u_inferface, + equations, surface_integral, solverT, u_interface, neighbor_ids, node_indices, contravariant_vectors, index_range, interface) end From a60e27d0d6df9beceff5efecfc1ae2cea21fef7b Mon Sep 17 00:00:00 2001 From: Benedict Geihe Date: Mon, 29 Sep 2025 17:28:07 +0200 Subject: [PATCH 058/158] missed max_dt calls --- benchmark/CUDA/run.jl | 2 +- src/callbacks_step/stepsize.jl | 3 ++- src/callbacks_step/stepsize_dg1d.jl | 4 ++-- src/semidiscretization/semidiscretization_euler_gravity.jl | 3 ++- .../paired_explicit_runge_kutta.jl | 3 ++- 5 files changed, 9 insertions(+), 6 deletions(-) diff --git a/benchmark/CUDA/run.jl b/benchmark/CUDA/run.jl index 5b9f318bfdb..70c840722af 100644 --- a/benchmark/CUDA/run.jl +++ b/benchmark/CUDA/run.jl @@ -56,7 +56,7 @@ function main(elixir_path) # run profiler maxiters = 5 - initial_refinement_level = 2 + initial_refinement_level = 1 println("Running profiler (Float64)...") trixi_include(elixir_path, diff --git a/src/callbacks_step/stepsize.jl b/src/callbacks_step/stepsize.jl index fd5c4f63ff5..f6f04d09893 100644 --- a/src/callbacks_step/stepsize.jl +++ b/src/callbacks_step/stepsize.jl @@ -168,6 +168,7 @@ function calculate_dt(u_ode, t, cfl_advective, cfl_diffusive, equations_parabolic = semi.equations_parabolic u = wrap_array(u_ode, mesh, equations, solver, cache) + backend = trixi_backend(u_ode) dt_advective = cfl_advective(t) * max_dt(backend, u, t, mesh, have_constant_speed(equations), equations, @@ -175,7 +176,7 @@ function calculate_dt(u_ode, t, cfl_advective, cfl_diffusive, cfl_diff = cfl_diffusive(t) if cfl_diff > 0 # Check if diffusive CFL should be considered - dt_diffusive = cfl_diff * max_dt(u, t, mesh, + dt_diffusive = cfl_diff * max_dt(backend, u, t, mesh, have_constant_diffusivity(equations_parabolic), equations, equations_parabolic, solver, cache) diff --git a/src/callbacks_step/stepsize_dg1d.jl b/src/callbacks_step/stepsize_dg1d.jl index c4cd159edfe..e0cac1ce57c 100644 --- a/src/callbacks_step/stepsize_dg1d.jl +++ b/src/callbacks_step/stepsize_dg1d.jl @@ -29,7 +29,7 @@ function max_dt(backend, u, t, mesh::TreeMesh{1}, return 2 / (nnodes(dg) * max_scaled_speed) end -function max_dt(u, t, mesh::TreeMesh{1}, +function max_dt(backend, u, t, mesh::TreeMesh{1}, constant_diffusivity::False, equations, equations_parabolic::AbstractEquationsParabolic, dg::DG, cache) @@ -72,7 +72,7 @@ function max_dt(backend, u, t, mesh::TreeMesh{1}, return 2 / (nnodes(dg) * max_scaled_speed) end -function max_dt(u, t, mesh::TreeMesh{1}, +function max_dt(backend, u, t, mesh::TreeMesh{1}, constant_diffusivity::True, equations, equations_parabolic::AbstractEquationsParabolic, dg::DG, cache) diff --git a/src/semidiscretization/semidiscretization_euler_gravity.jl b/src/semidiscretization/semidiscretization_euler_gravity.jl index 0b1efc00aef..c194da63f90 100644 --- a/src/semidiscretization/semidiscretization_euler_gravity.jl +++ b/src/semidiscretization/semidiscretization_euler_gravity.jl @@ -306,6 +306,7 @@ function update_gravity!(semi::SemidiscretizationEulerGravity, u_ode) u_euler = wrap_array(u_ode, semi_euler) u_gravity = wrap_array(cache.u_ode, semi_gravity) du_gravity = wrap_array(cache.du_ode, semi_gravity) + backend = trixi_backend(u_ode) # set up main loop finalstep = false @@ -317,7 +318,7 @@ function update_gravity!(semi::SemidiscretizationEulerGravity, u_ode) @unpack equations = semi_gravity while !finalstep dtau = @trixi_timeit timer() "calculate dtau" begin - cfl * max_dt(u_gravity, tau, semi_gravity.mesh, + cfl * max_dt(backend, u_gravity, tau, semi_gravity.mesh, have_constant_speed(equations), equations, semi_gravity.solver, semi_gravity.cache) end diff --git a/src/time_integration/paired_explicit_runge_kutta/paired_explicit_runge_kutta.jl b/src/time_integration/paired_explicit_runge_kutta/paired_explicit_runge_kutta.jl index 333ebc14983..4e87c9ff35f 100644 --- a/src/time_integration/paired_explicit_runge_kutta/paired_explicit_runge_kutta.jl +++ b/src/time_integration/paired_explicit_runge_kutta/paired_explicit_runge_kutta.jl @@ -57,8 +57,9 @@ function calculate_cfl(ode_algorithm::AbstractPairedExplicitRK, ode) mesh, equations, solver, cache = mesh_equations_solver_cache(semi) u = wrap_array(u_ode, mesh, equations, solver, cache) + backend = trixi_backend(u_ode) - cfl_number = dt_opt / max_dt(u, t0, mesh, + cfl_number = dt_opt / max_dt(backend, u, t0, mesh, have_constant_speed(equations), equations, solver, cache) return cfl_number From 2073d7cd7d135fd00511c386be06ecea7d76638c Mon Sep 17 00:00:00 2001 From: Benedict Geihe Date: Tue, 30 Sep 2025 10:21:25 +0200 Subject: [PATCH 059/158] some fixes --- .../semidiscretization_hyperbolic_parabolic.jl | 3 ++- src/solvers/dgsem_tree/dg_3d.jl | 6 +++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/semidiscretization/semidiscretization_hyperbolic_parabolic.jl b/src/semidiscretization/semidiscretization_hyperbolic_parabolic.jl index 54ede387fa2..e020903df2c 100644 --- a/src/semidiscretization/semidiscretization_hyperbolic_parabolic.jl +++ b/src/semidiscretization/semidiscretization_hyperbolic_parabolic.jl @@ -330,10 +330,11 @@ function rhs!(du_ode, u_ode, semi::SemidiscretizationHyperbolicParabolic, t) u = wrap_array(u_ode, mesh, equations, solver, cache) du = wrap_array(du_ode, mesh, equations, solver, cache) + backend = trixi_backend(u_ode) # TODO: Taal decide, do we need to pass the mesh? time_start = time_ns() - @trixi_timeit timer() "rhs!" rhs!(du, u, t, mesh, equations, + @trixi_timeit timer() "rhs!" rhs!(backend, du, u, t, mesh, equations, boundary_conditions, source_terms, solver, cache) runtime = time_ns() - time_start put!(semi.performance_counter.counters[1], runtime) diff --git a/src/solvers/dgsem_tree/dg_3d.jl b/src/solvers/dgsem_tree/dg_3d.jl index 914018ce8b4..5a651ec38ba 100644 --- a/src/solvers/dgsem_tree/dg_3d.jl +++ b/src/solvers/dgsem_tree/dg_3d.jl @@ -307,7 +307,7 @@ See also https://github.com/trixi-framework/Trixi.jl/issues/1671#issuecomment-17 return nothing end -function calc_volume_integral!(du, u, +function calc_volume_integral!(backend::Nothing, du, u, mesh::Union{TreeMesh{3}, StructuredMesh{3}, P4estMesh{3}, T8codeMesh{3}}, have_nonconservative_terms, equations, @@ -427,7 +427,7 @@ end end # TODO: Taal dimension agnostic -function calc_volume_integral!(du, u, +function calc_volume_integral!(backend::Nothing, du, u, mesh::Union{TreeMesh{3}, StructuredMesh{3}, P4estMesh{3}, T8codeMesh{3}}, have_nonconservative_terms, equations, @@ -468,7 +468,7 @@ function calc_volume_integral!(du, u, end # TODO: Taal dimension agnostic -function calc_volume_integral!(du, u, +function calc_volume_integral!(backend::Nothing, du, u, mesh::Union{TreeMesh{3}, StructuredMesh{3}, P4estMesh{3}, T8codeMesh{3}}, have_nonconservative_terms, equations, From 9a2f130c41aaab95f0a2c33b8793014d1ba455c3 Mon Sep 17 00:00:00 2001 From: Benedict Geihe Date: Tue, 30 Sep 2025 16:33:21 +0200 Subject: [PATCH 060/158] after merge fixes --- src/solvers/dgsem_p4est/dg_3d.jl | 2 +- src/solvers/dgsem_tree/dg_3d.jl | 19 ++++++++++++++++--- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/src/solvers/dgsem_p4est/dg_3d.jl b/src/solvers/dgsem_p4est/dg_3d.jl index 1713f0693a9..39a8a24de65 100644 --- a/src/solvers/dgsem_p4est/dg_3d.jl +++ b/src/solvers/dgsem_p4est/dg_3d.jl @@ -299,7 +299,7 @@ function calc_interface_flux_interface!(surface_flux_values, i_primary, j_primary, k_primary, primary_element) - calc_interface_flux!(surface_flux_values, meshT, nonconservative_terms, + calc_interface_flux!(surface_flux_values, meshT, have_nonconservative_terms, equations, surface_integral, solverT, u_interface, interface, normal_direction, diff --git a/src/solvers/dgsem_tree/dg_3d.jl b/src/solvers/dgsem_tree/dg_3d.jl index 5a651ec38ba..62f7ee7f78c 100644 --- a/src/solvers/dgsem_tree/dg_3d.jl +++ b/src/solvers/dgsem_tree/dg_3d.jl @@ -228,8 +228,21 @@ function rhs!(backend, du, u, t, return nothing end +function calc_volume_integral!(backend, du, u, + mesh::TreeMesh{3}, + have_nonconservative_terms, equations, + volume_integral::VolumeIntegralWeakForm, + dg::DGSEM, cache) + @threaded for element in eachelement(dg, cache) + weak_form_kernel!(du, u, element, mesh, + have_nonconservative_terms, equations, + dg, cache) + end + return nothing +end + function calc_volume_integral!(backend::Nothing, du, u, - mesh::Union{TreeMesh{3}, StructuredMesh{3}, P4estMesh{3}, + mesh::Union{StructuredMesh{3}, P4estMesh{3}, T8codeMesh{3}}, have_nonconservative_terms, equations, volume_integral::VolumeIntegralWeakForm, @@ -244,7 +257,7 @@ function calc_volume_integral!(backend::Nothing, du, u, end function calc_volume_integral!(backend::Backend, du, u, - mesh::Union{TreeMesh{3}, StructuredMesh{3}, P4estMesh{3}, + mesh::Union{StructuredMesh{3}, P4estMesh{3}, T8codeMesh{3}}, have_nonconservative_terms, equations, volume_integral::VolumeIntegralWeakForm, @@ -652,7 +665,7 @@ end return nothing end -function prolong2interfaces!(cache, u, mesh::TreeMesh{3}, equations, dg::DG) +function prolong2interfaces!(backend, cache, u, mesh::TreeMesh{3}, equations, dg::DG) @unpack interfaces = cache @unpack orientations, neighbor_ids = interfaces interfaces_u = interfaces.u From 9a47f292056c934d6b11239ab7b28e31c6689ec2 Mon Sep 17 00:00:00 2001 From: Benedict Geihe Date: Tue, 30 Sep 2025 21:37:57 +0200 Subject: [PATCH 061/158] some more fixes --- src/solvers/dgsem_tree/dg_3d.jl | 4 ++-- src/solvers/dgsem_tree/dg_3d_parabolic.jl | 2 +- src/solvers/fdsbp_tree/fdsbp_3d.jl | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/solvers/dgsem_tree/dg_3d.jl b/src/solvers/dgsem_tree/dg_3d.jl index 62f7ee7f78c..e7795260c6f 100644 --- a/src/solvers/dgsem_tree/dg_3d.jl +++ b/src/solvers/dgsem_tree/dg_3d.jl @@ -703,7 +703,7 @@ function prolong2interfaces!(backend, cache, u, mesh::TreeMesh{3}, equations, dg return nothing end -function calc_interface_flux!(surface_flux_values, +function calc_interface_flux!(backend, surface_flux_values, mesh::TreeMesh{3}, have_nonconservative_terms::False, equations, surface_integral, dg::DG, cache) @@ -738,7 +738,7 @@ function calc_interface_flux!(surface_flux_values, return nothing end -function calc_interface_flux!(surface_flux_values, +function calc_interface_flux!(backend, surface_flux_values, mesh::TreeMesh{3}, have_nonconservative_terms::True, equations, surface_integral, dg::DG, cache) diff --git a/src/solvers/dgsem_tree/dg_3d_parabolic.jl b/src/solvers/dgsem_tree/dg_3d_parabolic.jl index a39d704199d..ee614b873db 100644 --- a/src/solvers/dgsem_tree/dg_3d_parabolic.jl +++ b/src/solvers/dgsem_tree/dg_3d_parabolic.jl @@ -974,7 +974,7 @@ function calc_gradient!(gradients, u_transformed, t, # Prolong solution to interfaces @trixi_timeit timer() "prolong2interfaces" begin - prolong2interfaces!(cache_parabolic, u_transformed, mesh, + prolong2interfaces!(nothing, cache_parabolic, u_transformed, mesh, equations_parabolic, dg) end diff --git a/src/solvers/fdsbp_tree/fdsbp_3d.jl b/src/solvers/fdsbp_tree/fdsbp_3d.jl index 8d220217216..b89dc3bee93 100644 --- a/src/solvers/fdsbp_tree/fdsbp_3d.jl +++ b/src/solvers/fdsbp_tree/fdsbp_3d.jl @@ -40,7 +40,7 @@ function create_cache(mesh::TreeMesh{3}, equations, end # 3D volume integral contributions for `VolumeIntegralStrongForm` -function calc_volume_integral!(du, u, +function calc_volume_integral!(backend, du, u, mesh::TreeMesh{3}, have_nonconservative_terms::False, equations, volume_integral::VolumeIntegralStrongForm, @@ -103,7 +103,7 @@ end # the finite difference stencils. Thus, the D^- operator acts on the positive # part of the flux splitting f^+ and the D^+ operator acts on the negative part # of the flux splitting f^-. -function calc_volume_integral!(du, u, +function calc_volume_integral!(backend, du, u, mesh::TreeMesh{3}, have_nonconservative_terms::False, equations, volume_integral::VolumeIntegralUpwind, From 6ffb69fed093819ba7952805a772c0f7d54f97bf Mon Sep 17 00:00:00 2001 From: Benedict Geihe Date: Wed, 1 Oct 2025 14:11:43 +0200 Subject: [PATCH 062/158] post merge fixes --- .../dgsem_tree/dg_2d_subcell_limiters.jl | 2 +- src/solvers/dgsem_tree/dg_3d.jl | 80 ------------------- src/solvers/fdsbp_tree/fdsbp_1d.jl | 4 +- src/solvers/fdsbp_tree/fdsbp_2d.jl | 4 +- src/solvers/fdsbp_tree/fdsbp_3d.jl | 4 +- src/solvers/fdsbp_unstructured/fdsbp_2d.jl | 4 +- 6 files changed, 9 insertions(+), 89 deletions(-) diff --git a/src/solvers/dgsem_tree/dg_2d_subcell_limiters.jl b/src/solvers/dgsem_tree/dg_2d_subcell_limiters.jl index f87fcbdcd32..bb1126c02f9 100644 --- a/src/solvers/dgsem_tree/dg_2d_subcell_limiters.jl +++ b/src/solvers/dgsem_tree/dg_2d_subcell_limiters.jl @@ -60,7 +60,7 @@ function create_cache(mesh::Union{TreeMesh{2}, StructuredMesh{2}, P4estMesh{2}}, end # Subcell limiting currently only implemented for certain mesh types -function calc_volume_integral!(du, u, +function calc_volume_integral!(backend::Nothing, du, u, mesh::Union{TreeMesh{2}, StructuredMesh{2}, P4estMesh{2}}, have_nonconservative_terms, equations, diff --git a/src/solvers/dgsem_tree/dg_3d.jl b/src/solvers/dgsem_tree/dg_3d.jl index 5abbfc7349b..6ae047d519c 100644 --- a/src/solvers/dgsem_tree/dg_3d.jl +++ b/src/solvers/dgsem_tree/dg_3d.jl @@ -233,24 +233,6 @@ See also https://github.com/trixi-framework/Trixi.jl/issues/1671#issuecomment-17 return nothing end -<<<<<<< HEAD -function calc_volume_integral!(backend::Nothing, du, u, - mesh::Union{TreeMesh{3}, StructuredMesh{3}, P4estMesh{3}, - T8codeMesh{3}}, - have_nonconservative_terms, equations, - volume_integral::VolumeIntegralFluxDifferencing, - dg::DGSEM, cache) - @threaded for element in eachelement(dg, cache) - flux_differencing_kernel!(du, u, element, mesh, - have_nonconservative_terms, equations, - volume_integral.volume_flux, dg, cache) - end - - return nothing -end - -======= ->>>>>>> main @inline function flux_differencing_kernel!(du, u, element, mesh::TreeMesh{3}, have_nonconservative_terms::False, equations, @@ -355,68 +337,6 @@ end return nothing end -<<<<<<< HEAD -# TODO: Taal dimension agnostic -function calc_volume_integral!(backend::Nothing, du, u, - mesh::Union{TreeMesh{3}, StructuredMesh{3}, P4estMesh{3}, - T8codeMesh{3}}, - have_nonconservative_terms, equations, - volume_integral::VolumeIntegralShockCapturingHG, - dg::DGSEM, cache) - @unpack volume_flux_dg, volume_flux_fv, indicator = volume_integral - - # Calculate blending factors α: u = u_DG * (1 - α) + u_FV * α - alpha = @trixi_timeit timer() "blending factors" indicator(u, mesh, equations, dg, - cache) - - # For `Float64`, this gives 1.8189894035458565e-12 - # For `Float32`, this gives 1.1920929f-5 - RealT = eltype(alpha) - atol = max(100 * eps(RealT), eps(RealT)^convert(RealT, 0.75f0)) - @threaded for element in eachelement(dg, cache) - alpha_element = alpha[element] - # Clip blending factor for values close to zero (-> pure DG) - dg_only = isapprox(alpha_element, 0, atol = atol) - - if dg_only - flux_differencing_kernel!(du, u, element, mesh, - have_nonconservative_terms, equations, - volume_flux_dg, dg, cache) - else - # Calculate DG volume integral contribution - flux_differencing_kernel!(du, u, element, mesh, - have_nonconservative_terms, equations, - volume_flux_dg, dg, cache, 1 - alpha_element) - - # Calculate FV volume integral contribution - fv_kernel!(du, u, mesh, have_nonconservative_terms, equations, - volume_flux_fv, dg, cache, element, alpha_element) - end - end - - return nothing -end - -# TODO: Taal dimension agnostic -function calc_volume_integral!(backend::Nothing, du, u, - mesh::Union{TreeMesh{3}, StructuredMesh{3}, P4estMesh{3}, - T8codeMesh{3}}, - have_nonconservative_terms, equations, - volume_integral::VolumeIntegralPureLGLFiniteVolume, - dg::DGSEM, cache) - @unpack volume_flux_fv = volume_integral - - # Calculate LGL FV volume integral - @threaded for element in eachelement(dg, cache) - fv_kernel!(du, u, mesh, have_nonconservative_terms, equations, volume_flux_fv, - dg, cache, element, true) - end - - return nothing -end - -======= ->>>>>>> main @inline function fv_kernel!(du, u, mesh::Union{TreeMesh{3}, StructuredMesh{3}, P4estMesh{3}, T8codeMesh{3}}, diff --git a/src/solvers/fdsbp_tree/fdsbp_1d.jl b/src/solvers/fdsbp_tree/fdsbp_1d.jl index 051e488d08c..6e71d7627d9 100644 --- a/src/solvers/fdsbp_tree/fdsbp_1d.jl +++ b/src/solvers/fdsbp_tree/fdsbp_1d.jl @@ -40,7 +40,7 @@ function create_cache(mesh::TreeMesh{1}, equations, end # 2D volume integral contributions for `VolumeIntegralStrongForm` -function calc_volume_integral!(du, u, +function calc_volume_integral!(backend::Nothing, du, u, mesh::TreeMesh{1}, have_nonconservative_terms::False, equations, volume_integral::VolumeIntegralStrongForm, @@ -87,7 +87,7 @@ end # the finite difference stencils. Thus, the D^- operator acts on the positive # part of the flux splitting f^+ and the D^+ operator acts on the negative part # of the flux splitting f^-. -function calc_volume_integral!(du, u, +function calc_volume_integral!(backend::Nothing, du, u, mesh::TreeMesh{1}, have_nonconservative_terms::False, equations, volume_integral::VolumeIntegralUpwind, diff --git a/src/solvers/fdsbp_tree/fdsbp_2d.jl b/src/solvers/fdsbp_tree/fdsbp_2d.jl index db3130e6ed3..6f642ef1ab6 100644 --- a/src/solvers/fdsbp_tree/fdsbp_2d.jl +++ b/src/solvers/fdsbp_tree/fdsbp_2d.jl @@ -40,7 +40,7 @@ function create_cache(mesh::Union{TreeMesh{2}, UnstructuredMesh2D}, equations, end # 2D volume integral contributions for `VolumeIntegralStrongForm` -function calc_volume_integral!(du, u, +function calc_volume_integral!(backend::Nothing, du, u, mesh::TreeMesh{2}, have_nonconservative_terms::False, equations, volume_integral::VolumeIntegralStrongForm, @@ -96,7 +96,7 @@ end # the finite difference stencils. Thus, the D^- operator acts on the positive # part of the flux splitting f^+ and the D^+ operator acts on the negative part # of the flux splitting f^-. -function calc_volume_integral!(du, u, +function calc_volume_integral!(backend::Nothing, du, u, mesh::TreeMesh{2}, have_nonconservative_terms::False, equations, volume_integral::VolumeIntegralUpwind, diff --git a/src/solvers/fdsbp_tree/fdsbp_3d.jl b/src/solvers/fdsbp_tree/fdsbp_3d.jl index b89dc3bee93..1eff0986e17 100644 --- a/src/solvers/fdsbp_tree/fdsbp_3d.jl +++ b/src/solvers/fdsbp_tree/fdsbp_3d.jl @@ -40,7 +40,7 @@ function create_cache(mesh::TreeMesh{3}, equations, end # 3D volume integral contributions for `VolumeIntegralStrongForm` -function calc_volume_integral!(backend, du, u, +function calc_volume_integral!(backend::Nothing, du, u, mesh::TreeMesh{3}, have_nonconservative_terms::False, equations, volume_integral::VolumeIntegralStrongForm, @@ -103,7 +103,7 @@ end # the finite difference stencils. Thus, the D^- operator acts on the positive # part of the flux splitting f^+ and the D^+ operator acts on the negative part # of the flux splitting f^-. -function calc_volume_integral!(backend, du, u, +function calc_volume_integral!(backend::Nothing, du, u, mesh::TreeMesh{3}, have_nonconservative_terms::False, equations, volume_integral::VolumeIntegralUpwind, diff --git a/src/solvers/fdsbp_unstructured/fdsbp_2d.jl b/src/solvers/fdsbp_unstructured/fdsbp_2d.jl index ac7e4c36758..5b3bd95b8cd 100644 --- a/src/solvers/fdsbp_unstructured/fdsbp_2d.jl +++ b/src/solvers/fdsbp_unstructured/fdsbp_2d.jl @@ -28,7 +28,7 @@ end # 2D volume integral contributions for `VolumeIntegralStrongForm` # OBS! This is the standard (not de-aliased) form of the volume integral. # So it is not provably stable for variable coefficients due to the the metric terms. -function calc_volume_integral!(du, u, +function calc_volume_integral!(backend::Nothing, du, u, mesh::UnstructuredMesh2D, have_nonconservative_terms::False, equations, volume_integral::VolumeIntegralStrongForm, @@ -91,7 +91,7 @@ end # the finite difference stencils. Thus, the D^- operator acts on the positive # part of the flux splitting f^+ and the D^+ operator acts on the negative part # of the flux splitting f^-. -function calc_volume_integral!(du, u, +function calc_volume_integral!(backend::Nothing, du, u, mesh::UnstructuredMesh2D, have_nonconservative_terms::False, equations, volume_integral::VolumeIntegralUpwind, From 307c3eba667b144223f268c2beaa1f9695681e94 Mon Sep 17 00:00:00 2001 From: Benedict Geihe Date: Wed, 1 Oct 2025 16:32:35 +0200 Subject: [PATCH 063/158] more --- src/solvers/dgsem/calc_volume_integral.jl | 3 ++- src/solvers/dgsem_p4est/dg_3d.jl | 2 +- src/solvers/dgsem_structured/dg_3d.jl | 6 ++++-- src/solvers/dgsem_tree/dg_3d.jl | 3 ++- 4 files changed, 9 insertions(+), 5 deletions(-) diff --git a/src/solvers/dgsem/calc_volume_integral.jl b/src/solvers/dgsem/calc_volume_integral.jl index 7900b967aa6..e0041305e88 100644 --- a/src/solvers/dgsem/calc_volume_integral.jl +++ b/src/solvers/dgsem/calc_volume_integral.jl @@ -70,7 +70,8 @@ function calc_volume_integral!(backend::Nothing, du, u, mesh, volume_flux_dg, dg, cache, 1 - alpha_element) # Calculate FV volume integral contribution - fv_kernel!(du, u, mesh, have_nonconservative_terms, equations, volume_flux_fv, + fv_kernel!(du, u, mesh, have_nonconservative_terms, equations, + volume_flux_fv, dg, cache, element, alpha_element) end end diff --git a/src/solvers/dgsem_p4est/dg_3d.jl b/src/solvers/dgsem_p4est/dg_3d.jl index 39a8a24de65..ea59ff6a1c6 100644 --- a/src/solvers/dgsem_p4est/dg_3d.jl +++ b/src/solvers/dgsem_p4est/dg_3d.jl @@ -358,7 +358,7 @@ end # Inlined function for interface flux computation for flux + nonconservative terms @inline function calc_interface_flux!(surface_flux_values, - mesh::Union{P4estMesh{3}, T8codeMesh{3}}, + ::Type{<:Union{P4estMesh{3}, T8codeMesh{3}}}, have_nonconservative_terms::True, equations, surface_integral, dg::DG, cache, interface_index, normal_direction, diff --git a/src/solvers/dgsem_structured/dg_3d.jl b/src/solvers/dgsem_structured/dg_3d.jl index ab555c481f8..b4421589520 100644 --- a/src/solvers/dgsem_structured/dg_3d.jl +++ b/src/solvers/dgsem_structured/dg_3d.jl @@ -39,7 +39,8 @@ function rhs!(backend, du, u, t, end # Apply Jacobian from mapping to reference element - @trixi_timeit timer() "Jacobian" apply_jacobian!(du, mesh, equations, dg, cache) + @trixi_timeit timer() "Jacobian" apply_jacobian!(backend, du, mesh, equations, dg, + cache) # Calculate source terms @trixi_timeit timer() "source terms" begin @@ -80,7 +81,8 @@ function calc_volume_integral!(backend::Backend, du, u, return nothing end -@kernel function weak_form_KAkernel!(du, u, meshT, have_nonconservative_terms, equations, +@kernel function weak_form_KAkernel!(du, u, meshT, have_nonconservative_terms, + equations, dg::DGSEM, contravariant_vectors) element = @index(Global) weak_form_kernel_element!(du, u, element, meshT, diff --git a/src/solvers/dgsem_tree/dg_3d.jl b/src/solvers/dgsem_tree/dg_3d.jl index 6ae047d519c..2a510982f6d 100644 --- a/src/solvers/dgsem_tree/dg_3d.jl +++ b/src/solvers/dgsem_tree/dg_3d.jl @@ -1318,7 +1318,8 @@ end return nothing end -function calc_surface_integral!(du, u, mesh::Union{TreeMesh{3}, StructuredMesh{3}}, +function calc_surface_integral!(backend::Nothing, du, u, + mesh::Union{TreeMesh{3}, StructuredMesh{3}}, equations, surface_integral, dg::DGSEM, cache) @unpack boundary_interpolation = dg.basis @unpack surface_flux_values = cache.elements From c39b4de1af51d8cd2f1436e18ece76ea082daaed Mon Sep 17 00:00:00 2001 From: Benedict Geihe Date: Wed, 1 Oct 2025 22:18:29 +0200 Subject: [PATCH 064/158] more --- src/solvers/dgsem_p4est/dg_2d.jl | 2 +- src/solvers/dgsem_p4est/dg_3d.jl | 7 ++++--- src/solvers/dgsem_structured/dg_3d.jl | 2 +- src/solvers/dgsem_tree/dg_2d.jl | 4 ++-- src/solvers/dgsem_tree/dg_2d_parabolic.jl | 2 +- src/solvers/dgsem_tree/dg_3d.jl | 2 +- 6 files changed, 10 insertions(+), 9 deletions(-) diff --git a/src/solvers/dgsem_p4est/dg_2d.jl b/src/solvers/dgsem_p4est/dg_2d.jl index 2b2f9ff8b72..b417e87a77d 100644 --- a/src/solvers/dgsem_p4est/dg_2d.jl +++ b/src/solvers/dgsem_p4est/dg_2d.jl @@ -647,7 +647,7 @@ end return nothing end -function calc_surface_integral!(du, u, +function calc_surface_integral!(backend::Nothing, du, u, mesh::Union{P4estMesh{2}, P4estMeshView{2}, T8codeMesh{2}}, equations, diff --git a/src/solvers/dgsem_p4est/dg_3d.jl b/src/solvers/dgsem_p4est/dg_3d.jl index ea59ff6a1c6..6ab4f33e677 100644 --- a/src/solvers/dgsem_p4est/dg_3d.jl +++ b/src/solvers/dgsem_p4est/dg_3d.jl @@ -360,17 +360,18 @@ end @inline function calc_interface_flux!(surface_flux_values, ::Type{<:Union{P4estMesh{3}, T8codeMesh{3}}}, have_nonconservative_terms::True, equations, - surface_integral, dg::DG, cache, + surface_integral, solverT::Type{<:DG}, + u_interface, interface_index, normal_direction, primary_i_node_index, primary_j_node_index, primary_direction_index, primary_element_index, secondary_i_node_index, secondary_j_node_index, secondary_direction_index, secondary_element_index) - @unpack u = cache.interfaces surface_flux, nonconservative_flux = surface_integral.surface_flux - u_ll, u_rr = get_surface_node_vars(u, equations, dg, primary_i_node_index, + u_ll, u_rr = get_surface_node_vars(u_interface, equations, solverT, + primary_i_node_index, primary_j_node_index, interface_index) flux_ = surface_flux(u_ll, u_rr, normal_direction, equations) diff --git a/src/solvers/dgsem_structured/dg_3d.jl b/src/solvers/dgsem_structured/dg_3d.jl index b4421589520..64f03d30dca 100644 --- a/src/solvers/dgsem_structured/dg_3d.jl +++ b/src/solvers/dgsem_structured/dg_3d.jl @@ -34,7 +34,7 @@ function rhs!(backend, du, u, t, # Calculate surface integrals @trixi_timeit timer() "surface integral" begin - calc_surface_integral!(du, u, mesh, equations, + calc_surface_integral!(backend, du, u, mesh, equations, dg.surface_integral, dg, cache) end diff --git a/src/solvers/dgsem_tree/dg_2d.jl b/src/solvers/dgsem_tree/dg_2d.jl index 563b4d49e7e..57f7bf81ec6 100644 --- a/src/solvers/dgsem_tree/dg_2d.jl +++ b/src/solvers/dgsem_tree/dg_2d.jl @@ -156,7 +156,7 @@ function rhs!(backend, du, u, t, # Calculate surface integrals @trixi_timeit timer() "surface integral" begin - calc_surface_integral!(du, u, mesh, equations, + calc_surface_integral!(backend, du, u, mesh, equations, dg.surface_integral, dg, cache) end @@ -1021,7 +1021,7 @@ end return nothing end -function calc_surface_integral!(du, u, +function calc_surface_integral!(backend::Nothing, du, u, mesh::Union{TreeMesh{2}, StructuredMesh{2}, StructuredMeshView{2}}, equations, surface_integral::SurfaceIntegralWeakForm, diff --git a/src/solvers/dgsem_tree/dg_2d_parabolic.jl b/src/solvers/dgsem_tree/dg_2d_parabolic.jl index 232e13de88b..35f259ca9e5 100644 --- a/src/solvers/dgsem_tree/dg_2d_parabolic.jl +++ b/src/solvers/dgsem_tree/dg_2d_parabolic.jl @@ -103,7 +103,7 @@ function rhs_parabolic!(du, u, t, mesh::Union{TreeMesh{2}, TreeMesh{3}}, # Calculate surface integrals @trixi_timeit timer() "surface integral" begin - calc_surface_integral!(du, u, mesh, equations_parabolic, + calc_surface_integral!(nothing, du, u, mesh, equations_parabolic, dg.surface_integral, dg, cache_parabolic) end diff --git a/src/solvers/dgsem_tree/dg_3d.jl b/src/solvers/dgsem_tree/dg_3d.jl index 2a510982f6d..27a6158c637 100644 --- a/src/solvers/dgsem_tree/dg_3d.jl +++ b/src/solvers/dgsem_tree/dg_3d.jl @@ -1371,7 +1371,7 @@ function calc_surface_integral!(backend::Nothing, du, u, return nothing end -function apply_jacobian!(du, mesh::TreeMesh{3}, +function apply_jacobian!(backend::Nothing, du, mesh::TreeMesh{3}, equations, dg::DG, cache) @unpack inverse_jacobian = cache.elements From a38cc03f1a35c415d212f827694a0e8f68731ef7 Mon Sep 17 00:00:00 2001 From: Benedict Geihe Date: Tue, 7 Oct 2025 18:07:27 +0200 Subject: [PATCH 065/158] Squashed commit of the following: MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit f4bbcd9ffd18e933fe0b3888d8dbad1b92afd21e Author: Daniel Doehring Date: Sun Oct 5 08:33:38 2025 +0200 Comment `temperature` and /3 (#2594) --------- Co-authored-by: Hendrik Ranocha commit 68c0c71a20a8eace38dcd224277654eece7f57ca Author: Daniel Doehring Date: Fri Oct 3 15:06:55 2025 +0200 Second-Order Finite Volume Integral in 1D (#2022) * Pick up where Gregor left * preliminary example * more limiters * comments * fmt * continue * comments * print some more info * Add unit tests * add comment * Remove some alternative limiter implementations. * move, comments, fmt * Use second order timestepping * debug superbee * prim2cons 1D Adv * test * fmt, typo * typos * some more tests * fmt * Update src/solvers/dgsem_tree/finite_volume_O2.jl * Update test/test_unit.jl * Update src/solvers/dgsem_tree/dg_1d.jl * fmt * add different recontruction mode * Update src/solvers/dgsem_tree/finite_volume_O2.jl Co-authored-by: Andrés Rueda-Ramírez * test + fmt * comments * correct way cells dim * increase coverage * revisit * continue * fmt * shorten * extra test * comment "inverse_weights" * change files * test vals * Update test/test_tree_1d_euler.jl * Update examples/structured_1d_dgsem/elixir_euler_source_terms_nonperiodic_fvO2.jl * Update examples/tree_1d_dgsem/elixir_euler_convergence_pure_fvO2.jl * Update test/test_tree_1d_euler.jl * fix * test compact print * comment * relabel * comments * comments * comemnts * commenbts * rm * test * rename * docstrings * comments * Apply suggestions from code review Co-authored-by: Joshua Lampert <51029046+JoshuaLampert@users.noreply.github.com> * fmt * fmt * mv * fix * Apply suggestions from code review Co-authored-by: Joshua Lampert <51029046+JoshuaLampert@users.noreply.github.com> --------- Co-authored-by: Andrés Rueda-Ramírez Co-authored-by: Joshua Lampert <51029046+JoshuaLampert@users.noreply.github.com> commit 96c7aef8e0c3086901d4fd6ce7594c0902f2bfda Author: Daniel Doehring Date: Thu Oct 2 18:19:13 2025 +0200 Bundle identical `rhs!` (#2552) * Bundle identical `rhs!` * fix 1d * comment * bring back --------- Co-authored-by: Hendrik Ranocha commit 5c978033d273b4a2e4cfc279fe31e2abfff90648 Author: Daniel Doehring Date: Thu Oct 2 15:39:55 2025 +0200 Use variable name `have_nonconservative_terms` (#2592) * Use variable name `have_nonconservative_terms` * fix * cons fmt --------- Co-authored-by: Benedict <135045760+benegee@users.noreply.github.com> Co-authored-by: Hendrik Ranocha commit 26886239f1194073a62cbc215846d62c658a8200 Author: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu Oct 2 08:10:10 2025 +0200 Bump crate-ci/typos from 1.35.7 to 1.37.1 (#2593) * Bump crate-ci/typos from 1.35.7 to 1.37.1 Bumps [crate-ci/typos](https://github.com/crate-ci/typos) from 1.35.7 to 1.37.1. - [Release notes](https://github.com/crate-ci/typos/releases) - [Changelog](https://github.com/crate-ci/typos/blob/master/CHANGELOG.md) - [Commits](https://github.com/crate-ci/typos/compare/v1.35.7...v1.37.1) --- updated-dependencies: - dependency-name: crate-ci/typos dependency-version: 1.37.1 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] * fix typos --------- Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Joshua Lampert --- .github/workflows/SpellCheck.yml | 2 +- NEWS.md | 6 +- .../elixir_navierstokes_couette_flow.jl | 5 +- .../elixir_navierstokes_poiseuille_flow.jl | 5 +- .../elixir_navierstokes_viscous_shock.jl | 12 +- ...avierstokes_viscous_shock_newton_krylov.jl | 12 +- .../elixir_navierstokes_viscous_shock.jl | 12 +- ...xir_euler_source_terms_nonperiodic_fvO2.jl | 63 +++++ .../elixir_euler_convergence_pure_fvO2.jl | 57 ++++ .../elixir_hypdiff_harmonic_nonperiodic.jl | 2 +- .../elixir_linearizedeuler_gauss_wall.jl | 2 +- .../elixir_navierstokes_convergence_walls.jl | 8 +- ...ixir_navierstokes_convergence_walls_amr.jl | 8 +- .../elixir_navierstokes_viscous_shock.jl | 12 +- .../elixir_navierstokes_viscous_shock_imex.jl | 12 +- ...erstokes_taylor_green_vortex_sutherland.jl | 2 +- src/Trixi.jl | 13 +- src/auxiliary/math.jl | 5 + .../subcell_limiter_idp_correction_2d.jl | 2 +- .../compressible_navier_stokes_1d.jl | 11 +- .../compressible_navier_stokes_2d.jl | 15 +- .../compressible_navier_stokes_3d.jl | 17 +- src/equations/hyperbolic_diffusion_1d.jl | 2 +- src/solvers/dg.jl | 87 +++++- src/solvers/dgmulti/flux_differencing.jl | 2 +- src/solvers/dgsem/calc_volume_integral.jl | 15 +- src/solvers/dgsem_p4est/dg_2d_parabolic.jl | 2 +- src/solvers/dgsem_structured/dg.jl | 44 ++++ src/solvers/dgsem_structured/dg_1d.jl | 43 +-- src/solvers/dgsem_structured/dg_2d.jl | 44 ---- src/solvers/dgsem_structured/dg_3d.jl | 45 ---- src/solvers/dgsem_tree/dg.jl | 4 + src/solvers/dgsem_tree/dg_1d.jl | 125 ++++++++- src/solvers/dgsem_tree/dg_2d.jl | 5 +- .../dgsem_tree/dg_2d_subcell_limiters.jl | 2 +- src/solvers/dgsem_tree/dg_3d.jl | 73 +----- .../dgsem_tree/subcell_finite_volume_O2.jl | 247 ++++++++++++++++++ src/solvers/dgsem_tree/subcell_limiters_2d.jl | 2 +- src/solvers/dgsem_unstructured/dg_2d.jl | 2 +- test/test_parabolic_2d.jl | 40 +-- test/test_structured_1d.jl | 21 ++ test/test_tree_1d_euler.jl | 38 +++ test/test_unit.jl | 52 ++++ 43 files changed, 862 insertions(+), 316 deletions(-) create mode 100644 examples/structured_1d_dgsem/elixir_euler_source_terms_nonperiodic_fvO2.jl create mode 100644 examples/tree_1d_dgsem/elixir_euler_convergence_pure_fvO2.jl create mode 100644 src/solvers/dgsem_tree/subcell_finite_volume_O2.jl diff --git a/.github/workflows/SpellCheck.yml b/.github/workflows/SpellCheck.yml index 172991d9f12..606c4b1add8 100644 --- a/.github/workflows/SpellCheck.yml +++ b/.github/workflows/SpellCheck.yml @@ -10,4 +10,4 @@ jobs: - name: Checkout Actions Repository uses: actions/checkout@v5 - name: Check spelling - uses: crate-ci/typos@v1.35.7 + uses: crate-ci/typos@v1.37.1 diff --git a/NEWS.md b/NEWS.md index b87a369b042..0290b08acd5 100644 --- a/NEWS.md +++ b/NEWS.md @@ -10,12 +10,12 @@ for human readability. #### Changed -- The `polyester` preference got merged with the `native_threading` preference and the `Trixi.set_polyester!` +- The `polyester` preference got merged with the `native_threading` preference and the `Trixi.set_polyester!` function got renamed to `Trixi.set_threading_backend!` ([#2476]). - Default wave-speed estimate used within `flux_lax_friedrichs` changed from `max_abs_speed_naive` to `max_abs_speed` which is less diffusive. In v0.13, `flux_lax_friedrichs = FluxLaxFriedrichs(max_abs_speed = max_abs_speed)` - instead of the previous default + instead of the previous default `FluxLaxFriedrichs(max_abs_speed = max_abs_speed_naive)` ([#2458]). - The signature of the `VisualizationCallback` constructor changed. In the new version, it is mandatory to pass the semidiscretization `semi` to @@ -296,7 +296,7 @@ for human readability. `(; a, b) = stuff` instead of `@unpack a, b = stuff`. - The constructor `DGMultiMesh(dg; cells_per_dimension, kwargs...)` is deprecated and will be removed. The new constructor `DGMultiMesh(dg, cells_per_dimension; kwargs...)` - does not have `cells_per_dimesion` as a keyword argument. + does not have `cells_per_dimension` as a keyword argument. #### Removed diff --git a/examples/p4est_2d_dgsem/elixir_navierstokes_couette_flow.jl b/examples/p4est_2d_dgsem/elixir_navierstokes_couette_flow.jl index 84b56aad1c1..22e866a9bdd 100644 --- a/examples/p4est_2d_dgsem/elixir_navierstokes_couette_flow.jl +++ b/examples/p4est_2d_dgsem/elixir_navierstokes_couette_flow.jl @@ -67,9 +67,8 @@ bs_hyperbolic = Dict(:x_neg => BoundaryConditionDirichlet(initial_condition), # velocity_bc_top_left = NoSlip((x, t, equations) -> SVector(x[2] / height() * v_top(), 0)) # Use isothermal for inflow - adiabatic should also work heat_bc_top_left = Isothermal() do x, t, equations_parabolic - Trixi.temperature(initial_condition(x, t, - equations_parabolic), - equations_parabolic) + temperature(initial_condition(x, t, equations_parabolic), + equations_parabolic) end bc_parabolic_top_left = BoundaryConditionNavierStokesWall(velocity_bc_top_left, heat_bc_top_left) diff --git a/examples/p4est_2d_dgsem/elixir_navierstokes_poiseuille_flow.jl b/examples/p4est_2d_dgsem/elixir_navierstokes_poiseuille_flow.jl index fcbcd7d65e6..3ee1f85674a 100644 --- a/examples/p4est_2d_dgsem/elixir_navierstokes_poiseuille_flow.jl +++ b/examples/p4est_2d_dgsem/elixir_navierstokes_poiseuille_flow.jl @@ -69,9 +69,8 @@ bs_hyperbolic = Dict(:x_neg => BoundaryConditionDirichlet(initial_condition), # velocity_bc_inflow = NoSlip((x, t, equations) -> SVector(v_in, 0)) # Use isothermal for inflow - adiabatic should also work heat_bc_inflow = Isothermal() do x, t, equations_parabolic - Trixi.temperature(initial_condition(x, t, - equations_parabolic), - equations_parabolic) + temperature(initial_condition(x, t, equations_parabolic), + equations_parabolic) end bc_parabolic_inflow = BoundaryConditionNavierStokesWall(velocity_bc_inflow, heat_bc_inflow) diff --git a/examples/p4est_2d_dgsem/elixir_navierstokes_viscous_shock.jl b/examples/p4est_2d_dgsem/elixir_navierstokes_viscous_shock.jl index e0085091369..af1f04b7349 100644 --- a/examples/p4est_2d_dgsem/elixir_navierstokes_viscous_shock.jl +++ b/examples/p4est_2d_dgsem/elixir_navierstokes_viscous_shock.jl @@ -129,17 +129,13 @@ boundary_conditions = Dict(:x_neg => boundary_condition_inflow, ### Viscous boundary conditions ### # For the viscous BCs, we use the known analytical solution velocity_bc = NoSlip() do x, t, equations_parabolic - Trixi.velocity(initial_condition_viscous_shock(x, - t, - equations_parabolic), - equations_parabolic) + velocity(initial_condition_viscous_shock(x, t, equations_parabolic), + equations_parabolic) end heat_bc = Isothermal() do x, t, equations_parabolic - Trixi.temperature(initial_condition_viscous_shock(x, - t, - equations_parabolic), - equations_parabolic) + temperature(initial_condition_viscous_shock(x, t, equations_parabolic), + equations_parabolic) end boundary_condition_parabolic = BoundaryConditionNavierStokesWall(velocity_bc, heat_bc) diff --git a/examples/p4est_2d_dgsem/elixir_navierstokes_viscous_shock_newton_krylov.jl b/examples/p4est_2d_dgsem/elixir_navierstokes_viscous_shock_newton_krylov.jl index 142289aaace..5080de3ee56 100644 --- a/examples/p4est_2d_dgsem/elixir_navierstokes_viscous_shock_newton_krylov.jl +++ b/examples/p4est_2d_dgsem/elixir_navierstokes_viscous_shock_newton_krylov.jl @@ -124,17 +124,13 @@ boundary_conditions = Dict(:x_neg => boundary_condition_inflow, ### Viscous boundary conditions ### # For the viscous BCs, we use the known analytical solution velocity_bc = NoSlip() do x, t, equations_parabolic - Trixi.velocity(initial_condition_viscous_shock(x, - t, - equations_parabolic), - equations_parabolic) + velocity(initial_condition_viscous_shock(x, t, equations_parabolic), + equations_parabolic) end heat_bc = Isothermal() do x, t, equations_parabolic - Trixi.temperature(initial_condition_viscous_shock(x, - t, - equations_parabolic), - equations_parabolic) + temperature(initial_condition_viscous_shock(x, t, equations_parabolic), + equations_parabolic) end boundary_condition_parabolic = BoundaryConditionNavierStokesWall(velocity_bc, heat_bc) diff --git a/examples/p4est_3d_dgsem/elixir_navierstokes_viscous_shock.jl b/examples/p4est_3d_dgsem/elixir_navierstokes_viscous_shock.jl index 23abd9d1618..e048e4798e6 100644 --- a/examples/p4est_3d_dgsem/elixir_navierstokes_viscous_shock.jl +++ b/examples/p4est_3d_dgsem/elixir_navierstokes_viscous_shock.jl @@ -129,17 +129,13 @@ boundary_conditions = Dict(:x_neg => boundary_condition_inflow, ### Viscous boundary conditions ### # For the viscous BCs, we use the known analytical solution velocity_bc = NoSlip() do x, t, equations_parabolic - Trixi.velocity(initial_condition_viscous_shock(x, - t, - equations_parabolic), - equations_parabolic) + velocity(initial_condition_viscous_shock(x, t, equations_parabolic), + equations_parabolic) end heat_bc = Isothermal() do x, t, equations_parabolic - Trixi.temperature(initial_condition_viscous_shock(x, - t, - equations_parabolic), - equations_parabolic) + temperature(initial_condition_viscous_shock(x, t, equations_parabolic), + equations_parabolic) end boundary_condition_parabolic = BoundaryConditionNavierStokesWall(velocity_bc, heat_bc) diff --git a/examples/structured_1d_dgsem/elixir_euler_source_terms_nonperiodic_fvO2.jl b/examples/structured_1d_dgsem/elixir_euler_source_terms_nonperiodic_fvO2.jl new file mode 100644 index 00000000000..392a371f38c --- /dev/null +++ b/examples/structured_1d_dgsem/elixir_euler_source_terms_nonperiodic_fvO2.jl @@ -0,0 +1,63 @@ + +using OrdinaryDiffEqLowStorageRK +using Trixi + +############################################################################### +# semidiscretization of the compressible Euler equations + +equations = CompressibleEulerEquations1D(1.4) + +initial_condition = initial_condition_convergence_test +source_terms = source_terms_convergence_test + +# you can either use a single function to impose the BCs weakly in all +# 2*ndims == 2 directions or you can pass a tuple containing BCs for +# each direction +boundary_condition = BoundaryConditionDirichlet(initial_condition) +boundary_conditions = (x_neg = boundary_condition, + x_pos = boundary_condition) + +polydeg = 8 # Governs in this case only the number of subcells +basis = LobattoLegendreBasis(polydeg) +surface_flux = flux_hll +volume_integral = VolumeIntegralPureLGLFiniteVolumeO2(basis, surface_flux, + reconstruction_mode = reconstruction_O2_inner, + slope_limiter = vanLeer) +solver = DGSEM(polydeg = polydeg, surface_flux = surface_flux, + volume_integral = volume_integral) + +coordinates_min = (0.0,) +coordinates_max = (2.0,) +cells_per_dimension = (8,) +mesh = StructuredMesh(cells_per_dimension, coordinates_min, coordinates_max, + periodicity = false) + +semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver, + source_terms = source_terms, + boundary_conditions = boundary_conditions) + +############################################################################### +# ODE solvers, callbacks etc. + +tspan = (0.0, 2.0) +ode = semidiscretize(semi, tspan) + +summary_callback = SummaryCallback() + +analysis_interval = 100 +analysis_callback = AnalysisCallback(semi, interval = analysis_interval) + +alive_callback = AliveCallback(analysis_interval = analysis_interval) + +stepsize_callback = StepsizeCallback(cfl = 1.1) + +callbacks = CallbackSet(summary_callback, + analysis_callback, alive_callback, + stepsize_callback) + +############################################################################### +# run the simulation + +sol = solve(ode, ParsaniKetchesonDeconinck3S82(), + dt = 1.0, # solve needs some value here but it will be overwritten by the stepsize_callback + save_everystep = false, callback = callbacks); diff --git a/examples/tree_1d_dgsem/elixir_euler_convergence_pure_fvO2.jl b/examples/tree_1d_dgsem/elixir_euler_convergence_pure_fvO2.jl new file mode 100644 index 00000000000..0021569442f --- /dev/null +++ b/examples/tree_1d_dgsem/elixir_euler_convergence_pure_fvO2.jl @@ -0,0 +1,57 @@ + +using OrdinaryDiffEqLowStorageRK +using Trixi + +############################################################################### +# semidiscretization of the compressible Euler equations + +equations = CompressibleEulerEquations1D(1.4) + +initial_condition = initial_condition_convergence_test + +polydeg = 3 # Governs in this case only the number of subcells +basis = LobattoLegendreBasis(polydeg) +surface_flux = flux_hllc +volume_integral = VolumeIntegralPureLGLFiniteVolumeO2(basis, surface_flux, + reconstruction_mode = reconstruction_O2_full, + slope_limiter = monotonized_central) +solver = DGSEM(polydeg = polydeg, surface_flux = surface_flux, + volume_integral = volume_integral) + +coordinates_min = 0.0 +coordinates_max = 2.0 +mesh = TreeMesh(coordinates_min, coordinates_max, + initial_refinement_level = 4, + n_cells_max = 10_000) + +semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver, + source_terms = source_terms_convergence_test) + +############################################################################### +# ODE solvers, callbacks etc. + +tspan = (0.0, 2.0) +ode = semidiscretize(semi, tspan) + +summary_callback = SummaryCallback() + +analysis_interval = 100 +analysis_callback = AnalysisCallback(semi, interval = analysis_interval, + extra_analysis_errors = (:l2_error_primitive, + :linf_error_primitive, + :conservation_error)) + +alive_callback = AliveCallback(analysis_interval = analysis_interval) + +stepsize_callback = StepsizeCallback(cfl = 1.1) + +callbacks = CallbackSet(summary_callback, + analysis_callback, alive_callback, + stepsize_callback) + +############################################################################### +# run the simulation + +sol = solve(ode, ORK256(), + dt = 1.0, # solve needs some value here but it will be overwritten by the stepsize_callback + save_everystep = false, callback = callbacks); diff --git a/examples/tree_1d_dgsem/elixir_hypdiff_harmonic_nonperiodic.jl b/examples/tree_1d_dgsem/elixir_hypdiff_harmonic_nonperiodic.jl index 52653c0f923..ae6a9e28b80 100644 --- a/examples/tree_1d_dgsem/elixir_hypdiff_harmonic_nonperiodic.jl +++ b/examples/tree_1d_dgsem/elixir_hypdiff_harmonic_nonperiodic.jl @@ -8,7 +8,7 @@ equations = HyperbolicDiffusionEquations1D(nu = 1.25) """ initial_condition_poisson_nonperiodic(x, t, equations::HyperbolicDiffusionEquations1D) -A non-priodic harmonic function used in combination with +A non-periodic harmonic function used in combination with [`source_terms_poisson_nonperiodic`](@ref) and [`boundary_condition_poisson_nonperiodic`](@ref). !!! note diff --git a/examples/tree_1d_dgsem/elixir_linearizedeuler_gauss_wall.jl b/examples/tree_1d_dgsem/elixir_linearizedeuler_gauss_wall.jl index 4880c6ae623..a7844b5ce0a 100644 --- a/examples/tree_1d_dgsem/elixir_linearizedeuler_gauss_wall.jl +++ b/examples/tree_1d_dgsem/elixir_linearizedeuler_gauss_wall.jl @@ -19,7 +19,7 @@ mesh = TreeMesh(coordinates_min, coordinates_max, # Initialize density and pressure perturbation with a Gaussian bump # that is advected to left with v - c and to the right with v + c. -# Correspondigly, the bump splits in half. +# Correspondingly, the bump splits in half. function initial_condition_gauss_wall(x, t, equations::LinearizedEulerEquations1D) v1_prime = 0 rho_prime = p_prime = 2 * exp(-(x[1] - 45)^2 / 25) diff --git a/examples/tree_1d_dgsem/elixir_navierstokes_convergence_walls.jl b/examples/tree_1d_dgsem/elixir_navierstokes_convergence_walls.jl index 2b9979db443..2f7e078d3fb 100644 --- a/examples/tree_1d_dgsem/elixir_navierstokes_convergence_walls.jl +++ b/examples/tree_1d_dgsem/elixir_navierstokes_convergence_walls.jl @@ -135,10 +135,10 @@ velocity_bc_left_right = NoSlip() do x, t, equations_parabolic end heat_bc_left = Isothermal() do x, t, equations_parabolic - Trixi.temperature(initial_condition_navier_stokes_convergence_test(x, - t, - equations_parabolic), - equations_parabolic) + temperature(initial_condition_navier_stokes_convergence_test(x, + t, + equations_parabolic), + equations_parabolic) end heat_bc_right = Adiabatic((x, t, equations_parabolic) -> 0.0) diff --git a/examples/tree_1d_dgsem/elixir_navierstokes_convergence_walls_amr.jl b/examples/tree_1d_dgsem/elixir_navierstokes_convergence_walls_amr.jl index cb7b4310b6e..d06f0b85e07 100644 --- a/examples/tree_1d_dgsem/elixir_navierstokes_convergence_walls_amr.jl +++ b/examples/tree_1d_dgsem/elixir_navierstokes_convergence_walls_amr.jl @@ -135,10 +135,10 @@ velocity_bc_left_right = NoSlip() do x, t, equations_parabolic end heat_bc_left = Isothermal() do x, t, equations_parabolic - Trixi.temperature(initial_condition_navier_stokes_convergence_test(x, - t, - equations_parabolic), - equations_parabolic) + temperature(initial_condition_navier_stokes_convergence_test(x, + t, + equations_parabolic), + equations_parabolic) end heat_bc_right = Adiabatic((x, t, equations_parabolic) -> 0.0) diff --git a/examples/tree_1d_dgsem/elixir_navierstokes_viscous_shock.jl b/examples/tree_1d_dgsem/elixir_navierstokes_viscous_shock.jl index 80597cab362..ad2e7ef7040 100644 --- a/examples/tree_1d_dgsem/elixir_navierstokes_viscous_shock.jl +++ b/examples/tree_1d_dgsem/elixir_navierstokes_viscous_shock.jl @@ -123,17 +123,13 @@ boundary_conditions = (; x_neg = boundary_condition_inflow, ### Viscous boundary conditions ### # For the viscous BCs, we use the known analytical solution velocity_bc = NoSlip() do x, t, equations_parabolic - Trixi.velocity(initial_condition_viscous_shock(x, - t, - equations_parabolic), - equations_parabolic) + velocity(initial_condition_viscous_shock(x, t, equations_parabolic), + equations_parabolic) end heat_bc = Isothermal() do x, t, equations_parabolic - Trixi.temperature(initial_condition_viscous_shock(x, - t, - equations_parabolic), - equations_parabolic) + temperature(initial_condition_viscous_shock(x, t, equations_parabolic), + equations_parabolic) end boundary_condition_parabolic = BoundaryConditionNavierStokesWall(velocity_bc, heat_bc) diff --git a/examples/tree_1d_dgsem/elixir_navierstokes_viscous_shock_imex.jl b/examples/tree_1d_dgsem/elixir_navierstokes_viscous_shock_imex.jl index 18f1df5bd28..fe29e9feb9e 100644 --- a/examples/tree_1d_dgsem/elixir_navierstokes_viscous_shock_imex.jl +++ b/examples/tree_1d_dgsem/elixir_navierstokes_viscous_shock_imex.jl @@ -117,17 +117,13 @@ boundary_conditions = (; x_neg = boundary_condition_inflow, ### Viscous boundary conditions ### # For the viscous BCs, we use the known analytical solution velocity_bc = NoSlip() do x, t, equations_parabolic - Trixi.velocity(initial_condition_viscous_shock(x, - t, - equations_parabolic), - equations_parabolic) + velocity(initial_condition_viscous_shock(x, t, equations_parabolic), + equations_parabolic) end heat_bc = Isothermal() do x, t, equations_parabolic - Trixi.temperature(initial_condition_viscous_shock(x, - t, - equations_parabolic), - equations_parabolic) + temperature(initial_condition_viscous_shock(x, t, equations_parabolic), + equations_parabolic) end boundary_condition_parabolic = BoundaryConditionNavierStokesWall(velocity_bc, heat_bc) diff --git a/examples/tree_2d_dgsem/elixir_navierstokes_taylor_green_vortex_sutherland.jl b/examples/tree_2d_dgsem/elixir_navierstokes_taylor_green_vortex_sutherland.jl index 3beade2b09a..df16dca0302 100644 --- a/examples/tree_2d_dgsem/elixir_navierstokes_taylor_green_vortex_sutherland.jl +++ b/examples/tree_2d_dgsem/elixir_navierstokes_taylor_green_vortex_sutherland.jl @@ -16,7 +16,7 @@ prandtl_number() = 0.72 T_ref = convert(RealT, 291.15) R_specific_air = convert(RealT, 287.052874) - T = R_specific_air * Trixi.temperature(u, equations) + T = R_specific_air * temperature(u, equations) C_air = 120 mu_ref_air = convert(RealT, 1.827e-5) diff --git a/src/Trixi.jl b/src/Trixi.jl index 289e48c572e..8192520696d 100644 --- a/src/Trixi.jl +++ b/src/Trixi.jl @@ -243,8 +243,10 @@ export initial_condition_eoc_test_coupled_euler_gravity, export cons2cons, cons2prim, prim2cons, cons2macroscopic, cons2state, cons2mean, cons2entropy, entropy2cons -export density, pressure, density_pressure, velocity, global_mean_vars, - equilibrium_distribution, waterheight, waterheight_pressure +export density, pressure, density_pressure, velocity, temperature, + global_mean_vars, + equilibrium_distribution, + waterheight, waterheight_pressure export entropy, energy_total, energy_kinetic, energy_internal, energy_magnetic, cross_helicity, magnetic_field, divergence_cleaning_field, enstrophy, vorticity @@ -259,13 +261,18 @@ export DG, FDSBP, VolumeIntegralWeakForm, VolumeIntegralStrongForm, VolumeIntegralFluxDifferencing, - VolumeIntegralPureLGLFiniteVolume, + VolumeIntegralPureLGLFiniteVolume, VolumeIntegralPureLGLFiniteVolumeO2, VolumeIntegralShockCapturingHG, IndicatorHennemannGassner, VolumeIntegralUpwind, SurfaceIntegralWeakForm, SurfaceIntegralStrongForm, SurfaceIntegralUpwind, MortarL2 +export reconstruction_O2_inner, reconstruction_O2_full, + reconstruction_constant, + minmod, monotonized_central, superbee, vanLeer, + central_slope + export VolumeIntegralSubcellLimiting, BoundsCheckCallback, SubcellLimiterIDP, SubcellLimiterIDPCorrection diff --git a/src/auxiliary/math.jl b/src/auxiliary/math.jl index e2fcab85fa0..2ef360c6e96 100644 --- a/src/auxiliary/math.jl +++ b/src/auxiliary/math.jl @@ -434,4 +434,9 @@ Given ε = 1.0e-4, we use the following algorithm. (y^(gamma - 1) - x^(gamma - 1)) end end + +# Note: This is not a limiter, instead a helper for the `superbee` limiter. +@inline function maxmod(sl, sr) + return 0.5f0 * (sign(sl) + sign(sr)) * max(abs(sl), abs(sr)) +end end # @muladd diff --git a/src/callbacks_stage/subcell_limiter_idp_correction_2d.jl b/src/callbacks_stage/subcell_limiter_idp_correction_2d.jl index 337b62a8fb1..4caaff8fc17 100644 --- a/src/callbacks_stage/subcell_limiter_idp_correction_2d.jl +++ b/src/callbacks_stage/subcell_limiter_idp_correction_2d.jl @@ -9,7 +9,7 @@ function perform_idp_correction!(u, dt, mesh::Union{TreeMesh{2}, StructuredMesh{2}, P4estMesh{2}}, equations, dg, cache) - @unpack inverse_weights = dg.basis + @unpack inverse_weights = dg.basis # Plays role of inverse DG-subcell sizes @unpack antidiffusive_flux1_L, antidiffusive_flux2_L, antidiffusive_flux1_R, antidiffusive_flux2_R = cache.antidiffusive_fluxes @unpack alpha = dg.volume_integral.limiter.cache.subcell_limiter_coefficients diff --git a/src/equations/compressible_navier_stokes_1d.jl b/src/equations/compressible_navier_stokes_1d.jl index 8d66b0d077f..07ca7df987b 100644 --- a/src/equations/compressible_navier_stokes_1d.jl +++ b/src/equations/compressible_navier_stokes_1d.jl @@ -280,11 +280,20 @@ end prim2cons(u, equations.equations_hyperbolic) end +""" + temperature(u, equations::CompressibleNavierStokesDiffusion1D) + +Compute the temperature from the conservative variables `u`. +In particular, this assumes a specific gas constant ``R = 1``: +```math +T = \\frac{p}{\\rho} +``` +""" @inline function temperature(u, equations::CompressibleNavierStokesDiffusion1D) rho, rho_v1, rho_e = u p = (equations.gamma - 1) * (rho_e - 0.5f0 * rho_v1^2 / rho) - T = p / rho + T = p / rho # Corresponds to a specific gas constant R = 1 return T end diff --git a/src/equations/compressible_navier_stokes_2d.jl b/src/equations/compressible_navier_stokes_2d.jl index c3ad64143fd..96f00c866e7 100644 --- a/src/equations/compressible_navier_stokes_2d.jl +++ b/src/equations/compressible_navier_stokes_2d.jl @@ -159,12 +159,12 @@ function flux(u, gradients, orientation::Integer, # Components of viscous stress tensor # (4 * (v1)_x / 3 - 2 * (v2)_y / 3) - tau_11 = 4 * dv1dx / 3 - 2 * dv2dy / 3 + tau_11 = (4 * dv1dx - 2 * dv2dy) / 3 # ((v1)_y + (v2)_x) # stress tensor is symmetric tau_12 = dv1dy + dv2dx # = tau_21 # (4/3 * (v2)_y - 2/3 * (v1)_x) - tau_22 = 4 * dv2dy / 3 - 2 * dv1dx / 3 + tau_22 = (4 * dv2dy - 2 * dv1dx) / 3 # Fick's law q = -kappa * grad(T) = -kappa * grad(p / (R rho)) # with thermal diffusivity constant kappa = gamma μ R / ((gamma-1) Pr) @@ -274,11 +274,20 @@ end prim2cons(u, equations.equations_hyperbolic) end +""" + temperature(u, equations::CompressibleNavierStokesDiffusion2D) + +Compute the temperature from the conservative variables `u`. +In particular, this assumes a specific gas constant ``R = 1``: +```math +T = \\frac{p}{\\rho} +``` +""" @inline function temperature(u, equations::CompressibleNavierStokesDiffusion2D) rho, rho_v1, rho_v2, rho_e = u p = (equations.gamma - 1) * (rho_e - 0.5f0 * (rho_v1^2 + rho_v2^2) / rho) - T = p / rho + T = p / rho # Corresponds to a specific gas constant R = 1 return T end diff --git a/src/equations/compressible_navier_stokes_3d.jl b/src/equations/compressible_navier_stokes_3d.jl index fa6075b5a2f..6c615a11ced 100644 --- a/src/equations/compressible_navier_stokes_3d.jl +++ b/src/equations/compressible_navier_stokes_3d.jl @@ -164,11 +164,11 @@ function flux(u, gradients, orientation::Integer, # Diagonal parts # (4 * (v1)_x / 3 - 2 * ((v2)_y + (v3)_z)) / 3) - tau_11 = 4 * dv1dx / 3 - 2 * (dv2dy + dv3dz) / 3 + tau_11 = (4 * dv1dx - 2 * (dv2dy + dv3dz)) / 3 # (4 * (v2)_y / 3 - 2 * ((v1)_x + (v3)_z) / 3) - tau_22 = 4 * dv2dy / 3 - 2 * (dv1dx + dv3dz) / 3 + tau_22 = (4 * dv2dy - 2 * (dv1dx + dv3dz)) / 3 # (4 * (v3)_z / 3 - 2 * ((v1)_x + (v2)_y) / 3) - tau_33 = 4 * dv3dz / 3 - 2 * (dv1dx + dv2dy) / 3 + tau_33 = (4 * dv3dz - 2 * (dv1dx + dv2dy)) / 3 # Off diagonal parts, exploit that stress tensor is symmetric # ((v1)_y + (v2)_x) @@ -302,11 +302,20 @@ end prim2cons(u, equations.equations_hyperbolic) end +""" + temperature(u, equations::CompressibleNavierStokesDiffusion3D) + +Compute the temperature from the conservative variables `u`. +In particular, this assumes a specific gas constant ``R = 1``: +```math +T = \\frac{p}{\\rho} +``` +""" @inline function temperature(u, equations::CompressibleNavierStokesDiffusion3D) rho, rho_v1, rho_v2, rho_v3, rho_e = u p = (equations.gamma - 1) * (rho_e - 0.5f0 * (rho_v1^2 + rho_v2^2 + rho_v3^2) / rho) - T = p / rho + T = p / rho # Corresponds to a specific gas constant R = 1 return T end diff --git a/src/equations/hyperbolic_diffusion_1d.jl b/src/equations/hyperbolic_diffusion_1d.jl index 804a3e0b499..48601dfd675 100644 --- a/src/equations/hyperbolic_diffusion_1d.jl +++ b/src/equations/hyperbolic_diffusion_1d.jl @@ -44,7 +44,7 @@ end """ initial_condition_poisson_nonperiodic(x, t, equations::HyperbolicDiffusionEquations1D) -A non-priodic smooth initial condition. Can be used for convergence tests in combination with +A non-periodic smooth initial condition. Can be used for convergence tests in combination with [`source_terms_poisson_nonperiodic`](@ref) and [`boundary_condition_poisson_nonperiodic`](@ref). !!! note The solution is periodic but the initial guess is not. diff --git a/src/solvers/dg.jl b/src/solvers/dg.jl index f402aad2ebd..b08d2d3de15 100644 --- a/src/solvers/dg.jl +++ b/src/solvers/dg.jl @@ -185,6 +185,11 @@ function get_element_variables!(element_variables, u, mesh, equations, volume_integral) end +# Abstract supertype for first-order `VolumeIntegralPureLGLFiniteVolume` and +# second-order `VolumeIntegralPureLGLFiniteVolumeO2` subcell-based finite volume +# volume integrals. +abstract type AbstractVolumeIntegralPureLGLFiniteVolume <: AbstractVolumeIntegral end + """ VolumeIntegralPureLGLFiniteVolume(volume_flux_fv) @@ -203,7 +208,8 @@ mesh (LGL = Legendre-Gauss-Lobatto). "A provably entropy stable subcell shock capturing approach for high order split form DG" [arXiv: 2008.12044](https://arxiv.org/abs/2008.12044) """ -struct VolumeIntegralPureLGLFiniteVolume{VolumeFluxFV} <: AbstractVolumeIntegral +struct VolumeIntegralPureLGLFiniteVolume{VolumeFluxFV} <: + AbstractVolumeIntegralPureLGLFiniteVolume volume_flux_fv::VolumeFluxFV # non-symmetric in general, e.g. entropy-dissipative end # TODO: Figure out if this can also be used for Gauss nodes, not just LGL, and adjust the name accordingly @@ -222,6 +228,85 @@ function Base.show(io::IO, ::MIME"text/plain", end end +""" + VolumeIntegralPureLGLFiniteVolumeO2(basis::Basis, volume_flux_fv; + reconstruction_mode = reconstruction_O2_full, + slope_limiter = minmod) + +This gives an up to second order accurate finite volume scheme on an LGL-type subcell +mesh (LGL = Legendre-Gauss-Lobatto). +Depending on the `reconstruction_mode` and `slope_limiter`, experimental orders of convergence +between 1 and 2 can be expected in practice. +Since this is a volume integral, all reconstructions are purely cell-local, i.e., +no neighboring elements are queried at reconstruction stage. + +The interface values of the inner DG-subcells are reconstructed using the standard MUSCL-type reconstruction. +For the DG-subcells at the boundaries, two options are available: + +1) The unlimited slope is used on these cells. + This gives full second order accuracy, but also does not damp overshoots between cells. + The `reconstruction_mode` corresponding to this is `reconstruction_O2_full`. +2) On boundary subcells, the solution is represented using a constant value, thereby falling back to formally only first order. + The `reconstruction_mode` corresponding to this is `reconstruction_O2_inner`. + In the reference below, this is the recommended reconstruction mode and is thus used by default. + +!!! note "Conservative Systems only" + Currently only implemented for systems in conservative form, i.e., + `have_nonconservative_terms(equations) = False()` + +!!! warning "Experimental implementation" + This is an experimental feature and may change in future releases. + +## References + +See especially Sections 3.2, Section 4, and Appendix D of the paper + +- Rueda-Ramírez, Hennemann, Hindenlang, Winters, & Gassner (2021). + "An entropy stable nodal discontinuous Galerkin method for the resistive MHD equations. + Part II: Subcell finite volume shock capturing" + [JCP: 2021.110580](https://doi.org/10.1016/j.jcp.2021.110580) +""" +struct VolumeIntegralPureLGLFiniteVolumeO2{RealT <: Real, Basis, VolumeFluxFV, + Reconstruction, Limiter} <: + AbstractVolumeIntegralPureLGLFiniteVolume + x_interfaces::Vector{RealT} # x-coordinates of the sub-cell element interfaces + volume_flux_fv::VolumeFluxFV # non-symmetric in general, e.g. entropy-dissipative + reconstruction_mode::Reconstruction # which type of FV reconstruction to use + slope_limiter::Limiter # which type of slope limiter function +end + +function VolumeIntegralPureLGLFiniteVolumeO2(basis::Basis, volume_flux_fv; + reconstruction_mode = reconstruction_O2_full, + slope_limiter = minmod) where {Basis} + # Suffices to store only the intermediate boundaries of the sub-cell elements + x_interfaces = cumsum(basis.weights)[1:(end - 1)] .- 1 + VolumeIntegralPureLGLFiniteVolumeO2{eltype(basis.weights), + typeof(basis), + typeof(volume_flux_fv), + typeof(reconstruction_mode), + typeof(slope_limiter)}(x_interfaces, + volume_flux_fv, + reconstruction_mode, + slope_limiter) +end + +function Base.show(io::IO, ::MIME"text/plain", + integral::VolumeIntegralPureLGLFiniteVolumeO2) + @nospecialize integral # reduce precompilation time + + if get(io, :compact, false) + show(io, integral) + else + setup = [ + "FV flux" => integral.volume_flux_fv, + "Reconstruction" => integral.reconstruction_mode, + "Slope limiter" => integral.slope_limiter, + "Subcell boundaries" => vcat([-1.0], integral.x_interfaces, [1.0]) + ] + summary_box(io, "VolumeIntegralPureLGLFiniteVolumeO2", setup) + end +end + """ VolumeIntegralSubcellLimiting(limiter; volume_flux_dg, volume_flux_fv) diff --git a/src/solvers/dgmulti/flux_differencing.jl b/src/solvers/dgmulti/flux_differencing.jl index 47750ffd5a0..458e06e88b6 100644 --- a/src/solvers/dgmulti/flux_differencing.jl +++ b/src/solvers/dgmulti/flux_differencing.jl @@ -234,7 +234,7 @@ end end # Return the contravariant basis vector corresponding to the Cartesian -# coordinate diretion `orientation` in a given `element` of the `mesh`. +# coordinate direction `orientation` in a given `element` of the `mesh`. # The contravariant basis vectors have entries `dx_i / dxhat_j` where # j ∈ {1, ..., NDIMS}. Here, `x_i` and `xhat_j` are the ith physical coordinate # and jth reference coordinate, respectively. These are geometric terms which diff --git a/src/solvers/dgsem/calc_volume_integral.jl b/src/solvers/dgsem/calc_volume_integral.jl index e0041305e88..84c914c340f 100644 --- a/src/solvers/dgsem/calc_volume_integral.jl +++ b/src/solvers/dgsem/calc_volume_integral.jl @@ -32,8 +32,8 @@ function calc_volume_integral!(backend::Nothing, du, u, mesh, volume_integral::VolumeIntegralFluxDifferencing, dg::DGSEM, cache) @threaded for element in eachelement(dg, cache) - flux_differencing_kernel!(du, u, element, mesh, have_nonconservative_terms, - equations, + flux_differencing_kernel!(du, u, element, mesh, + have_nonconservative_terms, equations, volume_integral.volume_flux, dg, cache) end @@ -70,9 +70,9 @@ function calc_volume_integral!(backend::Nothing, du, u, mesh, volume_flux_dg, dg, cache, 1 - alpha_element) # Calculate FV volume integral contribution - fv_kernel!(du, u, mesh, have_nonconservative_terms, equations, - volume_flux_fv, - dg, cache, element, alpha_element) + fv_kernel!(du, u, mesh, + have_nonconservative_terms, equations, + volume_flux_fv, dg, cache, element, alpha_element) end end @@ -87,8 +87,9 @@ function calc_volume_integral!(backend::Nothing, du, u, mesh, # Calculate LGL FV volume integral @threaded for element in eachelement(dg, cache) - fv_kernel!(du, u, mesh, have_nonconservative_terms, equations, volume_flux_fv, - dg, cache, element, true) + fv_kernel!(du, u, mesh, + have_nonconservative_terms, equations, + volume_flux_fv, dg, cache, element, true) end return nothing diff --git a/src/solvers/dgsem_p4est/dg_2d_parabolic.jl b/src/solvers/dgsem_p4est/dg_2d_parabolic.jl index 8d56fdf7515..7d263b5fa2e 100644 --- a/src/solvers/dgsem_p4est/dg_2d_parabolic.jl +++ b/src/solvers/dgsem_p4est/dg_2d_parabolic.jl @@ -246,7 +246,7 @@ function calc_gradient!(gradients, u_transformed, t, dg) end - # Prolong solution to mortars. This resues the hyperbolic version of `prolong2mortars` + # Prolong solution to mortars. This reuses the hyperbolic version of `prolong2mortars` @trixi_timeit timer() "prolong2mortars" begin prolong2mortars!(cache, u_transformed, mesh, equations_parabolic, dg.mortar, dg) diff --git a/src/solvers/dgsem_structured/dg.jl b/src/solvers/dgsem_structured/dg.jl index 557b5c3364f..6cc2791c27e 100644 --- a/src/solvers/dgsem_structured/dg.jl +++ b/src/solvers/dgsem_structured/dg.jl @@ -35,6 +35,50 @@ function calc_boundary_flux!(cache, u, t, boundary_condition::BoundaryConditionP @assert isperiodic(mesh) end +function rhs!(du, u, t, + mesh::Union{StructuredMesh, StructuredMeshView{2}}, equations, + boundary_conditions, source_terms::Source, + dg::DG, cache) where {Source} + # Reset du + @trixi_timeit timer() "reset ∂u/∂t" reset_du!(du, dg, cache) + + # Calculate volume integral + @trixi_timeit timer() "volume integral" begin + calc_volume_integral!(du, u, mesh, + have_nonconservative_terms(equations), equations, + dg.volume_integral, dg, cache) + end + + # Calculate interface and boundary fluxes + @trixi_timeit timer() "interface flux" begin + calc_interface_flux!(cache, u, mesh, + have_nonconservative_terms(equations), equations, + dg.surface_integral, dg) + end + + # Calculate boundary fluxes + @trixi_timeit timer() "boundary flux" begin + calc_boundary_flux!(cache, u, t, boundary_conditions, mesh, equations, + dg.surface_integral, dg) + end + + # Calculate surface integrals + @trixi_timeit timer() "surface integral" begin + calc_surface_integral!(du, u, mesh, equations, + dg.surface_integral, dg, cache) + end + + # Apply Jacobian from mapping to reference element + @trixi_timeit timer() "Jacobian" apply_jacobian!(du, mesh, equations, dg, cache) + + # Calculate source terms + @trixi_timeit timer() "source terms" begin + calc_sources!(du, u, t, source_terms, equations, dg, cache) + end + + return nothing +end + @inline function calc_boundary_flux_by_direction!(surface_flux_values, u, t, orientation, boundary_condition::BoundaryConditionPeriodic, diff --git a/src/solvers/dgsem_structured/dg_1d.jl b/src/solvers/dgsem_structured/dg_1d.jl index 0a9618c6d9a..8417c709338 100644 --- a/src/solvers/dgsem_structured/dg_1d.jl +++ b/src/solvers/dgsem_structured/dg_1d.jl @@ -5,49 +5,8 @@ @muladd begin #! format: noindent -function rhs!(backend, du, u, t, - mesh::StructuredMesh{1}, equations, - boundary_conditions, source_terms::Source, - dg::DG, cache) where {Source} - # Reset du - @trixi_timeit timer() "reset ∂u/∂t" reset_du!(du, dg, cache) - - # Calculate volume integral - @trixi_timeit timer() "volume integral" begin - calc_volume_integral!(backend, du, u, mesh, - have_nonconservative_terms(equations), equations, - dg.volume_integral, dg, cache) - end - - # Calculate interface and boundary fluxes - @trixi_timeit timer() "interface flux" begin - calc_interface_flux!(cache, u, mesh, equations, dg.surface_integral, dg) - end - - # Calculate boundary fluxes - @trixi_timeit timer() "boundary flux" begin - calc_boundary_flux!(cache, u, t, boundary_conditions, mesh, equations, - dg.surface_integral, dg) - end - - # Calculate surface integrals - @trixi_timeit timer() "surface integral" begin - calc_surface_integral!(du, u, mesh, equations, - dg.surface_integral, dg, cache) - end - - # Apply Jacobian from mapping to reference element - @trixi_timeit timer() "Jacobian" apply_jacobian!(du, mesh, equations, dg, cache) - - # Calculate source terms - @trixi_timeit timer() "source terms" begin - calc_sources!(du, u, t, source_terms, equations, dg, cache) - end - - return nothing -end - function calc_interface_flux!(cache, u, mesh::StructuredMesh{1}, + nonconservative_terms, # can be True/False equations, surface_integral, dg::DG) @unpack surface_flux = surface_integral diff --git a/src/solvers/dgsem_structured/dg_2d.jl b/src/solvers/dgsem_structured/dg_2d.jl index b74ab435228..6430b61b276 100644 --- a/src/solvers/dgsem_structured/dg_2d.jl +++ b/src/solvers/dgsem_structured/dg_2d.jl @@ -5,50 +5,6 @@ @muladd begin #! format: noindent -function rhs!(backend, du, u, t, - mesh::Union{StructuredMesh{2}, StructuredMeshView{2}}, equations, - boundary_conditions, source_terms::Source, - dg::DG, cache) where {Source} - # Reset du - @trixi_timeit timer() "reset ∂u/∂t" reset_du!(du, dg, cache) - - # Calculate volume integral - @trixi_timeit timer() "volume integral" begin - calc_volume_integral!(backend, du, u, mesh, - have_nonconservative_terms(equations), equations, - dg.volume_integral, dg, cache) - end - - # Calculate interface fluxes - @trixi_timeit timer() "interface flux" begin - calc_interface_flux!(cache, u, mesh, - have_nonconservative_terms(equations), equations, - dg.surface_integral, dg) - end - - # Calculate boundary fluxes - @trixi_timeit timer() "boundary flux" begin - calc_boundary_flux!(cache, u, t, boundary_conditions, mesh, equations, - dg.surface_integral, dg) - end - - # Calculate surface integrals - @trixi_timeit timer() "surface integral" begin - calc_surface_integral!(du, u, mesh, equations, - dg.surface_integral, dg, cache) - end - - # Apply Jacobian from mapping to reference element - @trixi_timeit timer() "Jacobian" apply_jacobian!(du, mesh, equations, dg, cache) - - # Calculate source terms - @trixi_timeit timer() "source terms" begin - calc_sources!(du, u, t, source_terms, equations, dg, cache) - end - - return nothing -end - #= `weak_form_kernel!` is only implemented for conserved terms as non-conservative terms should always be discretized in conjunction with a flux-splitting scheme, diff --git a/src/solvers/dgsem_structured/dg_3d.jl b/src/solvers/dgsem_structured/dg_3d.jl index 64f03d30dca..cd39623a367 100644 --- a/src/solvers/dgsem_structured/dg_3d.jl +++ b/src/solvers/dgsem_structured/dg_3d.jl @@ -5,51 +5,6 @@ @muladd begin #! format: noindent -function rhs!(backend, du, u, t, - mesh::StructuredMesh{3}, equations, - boundary_conditions, source_terms::Source, - dg::DG, cache) where {Source} - # Reset du - @trixi_timeit timer() "reset ∂u/∂t" reset_du!(du, dg, cache) - - # Calculate volume integral - @trixi_timeit timer() "volume integral" begin - calc_volume_integral!(backend, du, u, mesh, - have_nonconservative_terms(equations), equations, - dg.volume_integral, dg, cache) - end - - # Calculate interface fluxes - @trixi_timeit timer() "interface flux" begin - calc_interface_flux!(cache, u, mesh, - have_nonconservative_terms(equations), equations, - dg.surface_integral, dg) - end - - # Calculate boundary fluxes - @trixi_timeit timer() "boundary flux" begin - calc_boundary_flux!(cache, u, t, boundary_conditions, mesh, equations, - dg.surface_integral, dg) - end - - # Calculate surface integrals - @trixi_timeit timer() "surface integral" begin - calc_surface_integral!(backend, du, u, mesh, equations, - dg.surface_integral, dg, cache) - end - - # Apply Jacobian from mapping to reference element - @trixi_timeit timer() "Jacobian" apply_jacobian!(backend, du, mesh, equations, dg, - cache) - - # Calculate source terms - @trixi_timeit timer() "source terms" begin - calc_sources!(du, u, t, source_terms, equations, dg, cache) - end - - return nothing -end - function calc_volume_integral!(backend::Nothing, du, u, mesh::Union{StructuredMesh{3}, P4estMesh{3}, T8codeMesh{3}}, diff --git a/src/solvers/dgsem_tree/dg.jl b/src/solvers/dgsem_tree/dg.jl index 125773c1fd5..af4615726b0 100644 --- a/src/solvers/dgsem_tree/dg.jl +++ b/src/solvers/dgsem_tree/dg.jl @@ -38,6 +38,10 @@ include("dg_parallel.jl") # Helper structs for parabolic AMR include("containers_viscous.jl") +# Some functions for a second-order Finite-Volume (MUSCL) alike +# scheme on DG-subcells. +include("subcell_finite_volume_O2.jl") + # 1D DG implementation include("dg_1d.jl") include("dg_1d_parabolic.jl") diff --git a/src/solvers/dgsem_tree/dg_1d.jl b/src/solvers/dgsem_tree/dg_1d.jl index 6f6d3dc3385..986bc6d6830 100644 --- a/src/solvers/dgsem_tree/dg_1d.jl +++ b/src/solvers/dgsem_tree/dg_1d.jl @@ -50,8 +50,8 @@ function create_cache(mesh::Union{TreeMesh{1}, StructuredMesh{1}}, equations, end function create_cache(mesh::Union{TreeMesh{1}, StructuredMesh{1}}, equations, - volume_integral::VolumeIntegralPureLGLFiniteVolume, dg::DG, - uEltype) + volume_integral::AbstractVolumeIntegralPureLGLFiniteVolume, + dg::DG, uEltype) A2dp1_x = Array{uEltype, 2} fstar1_L_threaded = A2dp1_x[A2dp1_x(undef, nvariables(equations), nnodes(dg) + 1) for _ in 1:Threads.nthreads()] @@ -217,14 +217,59 @@ end have_nonconservative_terms, equations, volume_flux_fv, dg::DGSEM, cache, element, alpha = true) @unpack fstar1_L_threaded, fstar1_R_threaded = cache - @unpack inverse_weights = dg.basis + @unpack inverse_weights = dg.basis # Plays role of inverse DG-subcell sizes # Calculate FV two-point fluxes fstar1_L = fstar1_L_threaded[Threads.threadid()] fstar1_R = fstar1_R_threaded[Threads.threadid()] - calcflux_fv!(fstar1_L, fstar1_R, u, mesh, have_nonconservative_terms, equations, - volume_flux_fv, - dg, element, cache) + calcflux_fv!(fstar1_L, fstar1_R, u, mesh, + have_nonconservative_terms, equations, + volume_flux_fv, dg, element, cache) + + # Calculate FV volume integral contribution + for i in eachnode(dg) + for v in eachvariable(equations) + du[v, i, element] += (alpha * + (inverse_weights[i] * + (fstar1_L[v, i + 1] - fstar1_R[v, i]))) + end + end + + return nothing +end + +function calc_volume_integral!(du, u, mesh::Union{TreeMesh{1}, StructuredMesh{1}}, + have_nonconservative_terms, equations, + volume_integral::VolumeIntegralPureLGLFiniteVolumeO2, + dg::DGSEM, cache) + @unpack x_interfaces, volume_flux_fv, reconstruction_mode, slope_limiter = volume_integral + + # Calculate LGL second-order FV volume integral + @threaded for element in eachelement(dg, cache) + fvO2_kernel!(du, u, mesh, + have_nonconservative_terms, equations, + volume_flux_fv, dg, cache, element, + x_interfaces, reconstruction_mode, slope_limiter, true) + end + + return nothing +end + +@inline function fvO2_kernel!(du, u, + mesh::Union{TreeMesh{1}, StructuredMesh{1}}, + nonconservative_terms, equations, + volume_flux_fv, dg::DGSEM, cache, element, + x_interfaces, reconstruction_mode, slope_limiter, + alpha = true) + @unpack fstar1_L_threaded, fstar1_R_threaded = cache + @unpack inverse_weights = dg.basis # Plays role of inverse DG-subcell sizes + + # Calculate FV two-point fluxes + fstar1_L = fstar1_L_threaded[Threads.threadid()] + fstar1_R = fstar1_R_threaded[Threads.threadid()] + calcflux_fvO2!(fstar1_L, fstar1_R, u, mesh, nonconservative_terms, equations, + volume_flux_fv, dg, element, cache, + x_interfaces, reconstruction_mode, slope_limiter) # Calculate FV volume integral contribution for i in eachnode(dg) @@ -291,6 +336,74 @@ end return nothing end +@inline function calcflux_fvO2!(fstar1_L, fstar1_R, u::AbstractArray{<:Any, 3}, + mesh::Union{TreeMesh{1}, StructuredMesh{1}}, + nonconservative_terms::False, + equations, volume_flux_fv, dg::DGSEM, element, cache, + x_interfaces, reconstruction_mode, slope_limiter) + fstar1_L[:, 1] .= zero(eltype(fstar1_L)) + fstar1_L[:, nnodes(dg) + 1] .= zero(eltype(fstar1_L)) + fstar1_R[:, 1] .= zero(eltype(fstar1_R)) + fstar1_R[:, nnodes(dg) + 1] .= zero(eltype(fstar1_R)) + + for i in 2:nnodes(dg) # We compute FV02 fluxes at the (nnodes(dg) - 1) subcell boundaries + # Reference element: + # -1 ------------------0------------------ 1 -> x + # Gauss-Lobatto-Legendre nodes (schematic for k = 3): + # . . . . + # ^ ^ ^ ^ + # Node indices: + # 1 2 3 4 + # The inner subcell boundaries are governed by the + # cumulative sum of the quadrature weights - 1 . + # -1 ------------------0------------------ 1 -> x + # w1-1 (w1+w2)-1 (w1+w2+w3)-1 + # | | | | | + # Note that only the inner boundaries are stored. + # Subcell interface indices, loop only over 2 -> nnodes(dg) = 4 + # 1 2 3 4 5 + # + # In general a four-point stencil is required, since we reconstruct the + # piecewise linear solution in both subcells next to the subcell interface. + # Since these subcell boundaries are not aligned with the DG nodes, + # on each neighboring subcell two linear solutions are reconstructed => 4 point stencil. + # For the outer interfaces the stencil shrinks since we do not consider values + # outside the element (this is a volume integral). + # + # The left subcell node values are labelled `_ll` (left-left) and `_lr` (left-right), while + # the right subcell node values are labelled `_rl` (right-left) and `_rr` (right-right). + + ## Obtain unlimited values in primitive variables ## + + # Note: If i - 2 = 0 we do not go to neighbor element, as one would do in a finite volume scheme. + # Here, we keep it purely cell-local, thus overshoots between elements are not ruled out. + u_ll = cons2prim(get_node_vars(u, equations, dg, max(1, i - 2), element), + equations) + u_lr = cons2prim(get_node_vars(u, equations, dg, i - 1, element), + equations) + u_rl = cons2prim(get_node_vars(u, equations, dg, i, element), + equations) + # Note: If i + 1 > nnodes(dg) we do not go to neighbor element, as one would do in a finite volume scheme. + # Here, we keep it purely cell-local, thus overshoots between elements are not ruled out. + u_rr = cons2prim(get_node_vars(u, equations, dg, min(nnodes(dg), i + 1), + element), equations) + + ## Reconstruct values at interfaces with limiting ## + u_l, u_r = reconstruction_mode(u_ll, u_lr, u_rl, u_rr, + x_interfaces, i, + slope_limiter, dg) + + ## Convert primitive variables back to conservative variables ## + flux = volume_flux_fv(prim2cons(u_l, equations), prim2cons(u_r, equations), + 1, equations) # orientation 1: x direction + + set_node_vars!(fstar1_L, flux, equations, dg, i) + set_node_vars!(fstar1_R, flux, equations, dg, i) + end + + return nothing +end + function prolong2interfaces!(cache, u, mesh::TreeMesh{1}, equations, dg::DG) @unpack interfaces = cache @unpack neighbor_ids = interfaces diff --git a/src/solvers/dgsem_tree/dg_2d.jl b/src/solvers/dgsem_tree/dg_2d.jl index 57f7bf81ec6..0d1b3c885b8 100644 --- a/src/solvers/dgsem_tree/dg_2d.jl +++ b/src/solvers/dgsem_tree/dg_2d.jl @@ -103,7 +103,8 @@ end # TODO: Taal discuss/refactor timer, allowing users to pass a custom timer? function rhs!(backend, du, u, t, - mesh::Union{TreeMesh{2}, P4estMesh{2}, P4estMeshView{2}, T8codeMesh{2}}, + mesh::Union{TreeMesh{2}, P4estMesh{2}, P4estMeshView{2}, T8codeMesh{2}, + TreeMesh{3}, P4estMesh{3}, T8codeMesh{3}}, equations, boundary_conditions, source_terms::Source, dg::DG, cache) where {Source} @@ -295,7 +296,7 @@ end have_nonconservative_terms, equations, volume_flux_fv, dg::DGSEM, cache, element, alpha = true) @unpack fstar1_L_threaded, fstar1_R_threaded, fstar2_L_threaded, fstar2_R_threaded = cache - @unpack inverse_weights = dg.basis + @unpack inverse_weights = dg.basis # Plays role of inverse DG-subcell sizes # Calculate FV two-point fluxes fstar1_L = fstar1_L_threaded[Threads.threadid()] diff --git a/src/solvers/dgsem_tree/dg_2d_subcell_limiters.jl b/src/solvers/dgsem_tree/dg_2d_subcell_limiters.jl index bb1126c02f9..04889cae459 100644 --- a/src/solvers/dgsem_tree/dg_2d_subcell_limiters.jl +++ b/src/solvers/dgsem_tree/dg_2d_subcell_limiters.jl @@ -84,7 +84,7 @@ end have_nonconservative_terms, equations, volume_integral, limiter::SubcellLimiterIDP, dg::DGSEM, cache) - @unpack inverse_weights = dg.basis + @unpack inverse_weights = dg.basis # Plays role of inverse DG-subcell sizes @unpack volume_flux_dg, volume_flux_fv = volume_integral # high-order DG fluxes diff --git a/src/solvers/dgsem_tree/dg_3d.jl b/src/solvers/dgsem_tree/dg_3d.jl index 27a6158c637..664a8e168ef 100644 --- a/src/solvers/dgsem_tree/dg_3d.jl +++ b/src/solvers/dgsem_tree/dg_3d.jl @@ -122,77 +122,6 @@ function create_cache(mesh::TreeMesh{3}, equations, return cache end -# TODO: Taal discuss/refactor timer, allowing users to pass a custom timer? - -function rhs!(backend, du, u, t, - mesh::Union{TreeMesh{3}, P4estMesh{3}, T8codeMesh{3}}, equations, - boundary_conditions, source_terms::Source, - dg::DG, cache) where {Source} - # Reset du - @trixi_timeit timer() "reset ∂u/∂t" reset_du!(du, dg, cache) - - # Calculate volume integral - @trixi_timeit timer() "volume integral" begin - calc_volume_integral!(backend, du, u, mesh, - have_nonconservative_terms(equations), equations, - dg.volume_integral, dg, cache) - end - - # Prolong solution to interfaces - @trixi_timeit timer() "prolong2interfaces" begin - prolong2interfaces!(backend, cache, u, mesh, equations, dg) - end - - # Calculate interface fluxes - @trixi_timeit timer() "interface flux" begin - calc_interface_flux!(backend, cache.elements.surface_flux_values, mesh, - have_nonconservative_terms(equations), equations, - dg.surface_integral, dg, cache) - end - - # Prolong solution to boundaries - @trixi_timeit timer() "prolong2boundaries" begin - prolong2boundaries!(cache, u, mesh, equations, - dg.surface_integral, dg) - end - - # Calculate boundary fluxes - @trixi_timeit timer() "boundary flux" begin - calc_boundary_flux!(cache, t, boundary_conditions, mesh, equations, - dg.surface_integral, dg) - end - - # Prolong solution to mortars - @trixi_timeit timer() "prolong2mortars" begin - prolong2mortars!(cache, u, mesh, equations, - dg.mortar, dg) - end - - # Calculate mortar fluxes - @trixi_timeit timer() "mortar flux" begin - calc_mortar_flux!(cache.elements.surface_flux_values, mesh, - have_nonconservative_terms(equations), equations, - dg.mortar, dg.surface_integral, dg, cache) - end - - # Calculate surface integrals - @trixi_timeit timer() "surface integral" begin - calc_surface_integral!(backend, du, u, mesh, equations, - dg.surface_integral, dg, cache) - end - - # Apply Jacobian from mapping to reference element - @trixi_timeit timer() "Jacobian" apply_jacobian!(backend, du, mesh, equations, dg, - cache) - - # Calculate source terms - @trixi_timeit timer() "source terms" begin - calc_sources!(du, u, t, source_terms, equations, dg, cache) - end - - return nothing -end - #= `weak_form_kernel!` is only implemented for conserved terms as non-conservative terms should always be discretized in conjunction with a flux-splitting scheme, @@ -343,7 +272,7 @@ end have_nonconservative_terms, equations, volume_flux_fv, dg::DGSEM, cache, element, alpha = true) @unpack fstar1_L_threaded, fstar1_R_threaded, fstar2_L_threaded, fstar2_R_threaded, fstar3_L_threaded, fstar3_R_threaded = cache - @unpack inverse_weights = dg.basis + @unpack inverse_weights = dg.basis # Plays role of inverse DG-subcell sizes # Calculate FV two-point fluxes fstar1_L = fstar1_L_threaded[Threads.threadid()] diff --git a/src/solvers/dgsem_tree/subcell_finite_volume_O2.jl b/src/solvers/dgsem_tree/subcell_finite_volume_O2.jl new file mode 100644 index 00000000000..589b573154b --- /dev/null +++ b/src/solvers/dgsem_tree/subcell_finite_volume_O2.jl @@ -0,0 +1,247 @@ +""" + reconstruction_constant(u_ll, u_lr, u_rl, u_rr, + x_interfaces, + node_index, limiter, dg) + +Returns the constant "reconstructed" values `u_lr, u_rl` at the interface `x_interfaces[node_index - 1]`. +Supposed to be used in conjunction with [`VolumeIntegralPureLGLFiniteVolumeO2`](@ref). +Formally first order accurate. +If a first-order finite volume scheme is desired, [`VolumeIntegralPureLGLFiniteVolume`](@ref) is an +equivalent, but more efficient choice. +""" +@inline function reconstruction_constant(u_ll, u_lr, u_rl, u_rr, + x_interfaces, node_index, + limiter, dg) + return u_lr, u_rl +end + +# Helper functions for reconstructions below +@inline function reconstruction_linear(u_lr, u_rl, s_l, s_r, + x_lr, x_rl, x_interfaces, node_index) + # Linear reconstruction at the interface + u_lr = u_lr + s_l * (x_interfaces[node_index - 1] - x_lr) + u_rl = u_rl + s_r * (x_interfaces[node_index - 1] - x_rl) + + return u_lr, u_rl +end + +# Reference element: +# -1 ------------------0------------------ 1 -> x +# Gauss-Lobatto-Legendre nodes (schematic for k = 3): +# . . . . +# ^ ^ ^ ^ +# Node indices: +# 1 2 3 4 +# The inner subcell boundaries are governed by the +# cumulative sum of the quadrature weights - 1 . +# -1 ------------------0------------------ 1 -> x +# w1-1 (w1+w2)-1 (w1+w2+w3)-1 +# | | | | | +# Note that only the inner boundaries are stored. +# Subcell interface indices, loop only over 2 -> nnodes(dg) = 4 +# 1 2 3 4 5 +# +# In general a four-point stencil is required, since we reconstruct the +# piecewise linear solution in both subcells next to the subcell interface. +# Since these subcell boundaries are not aligned with the DG nodes, +# on each neighboring subcell two linear solutions are reconstructed => 4 point stencil. +# For the outer interfaces the stencil shrinks since we do not consider values +# outside the element (volume integral). +# +# The left subcell node values are labelled `_ll` (left-left) and `_lr` (left-right), while +# the right subcell node values are labelled `_rl` (right-left) and `_rr` (right-right). + +""" + reconstruction_O2_full(u_ll, u_lr, u_rl, u_rr, + x_interfaces, node_index, + limiter, dg::DGSEM) + +Returns the reconstructed values `u_lr, u_rl` at the interface `x_interfaces[node_index - 1]`. +Computes limited (linear) slopes on the subcells for a DGSEM element. +Supposed to be used in conjunction with [`VolumeIntegralPureLGLFiniteVolumeO2`](@ref). + +The supplied `limiter` governs the choice of slopes given the nodal values +`u_ll`, `u_lr`, `u_rl`, and `u_rr` at the (Gauss-Lobatto Legendre) nodes. +Total-Variation-Diminishing (TVD) choices for the limiter are + 1) [`minmod`](@ref) + 2) [`monotonized_central`](@ref) + 3) [`superbee`](@ref) + 4) [`vanLeer`](@ref) + +The reconstructed slopes are for `reconstruction_O2_full` not limited at the cell boundaries. +Formally second order accurate when used without a limiter, i.e., `limiter = `[`central_slope`](@ref). +This approach corresponds to equation (79) described in +- Rueda-Ramírez, Hennemann, Hindenlang, Winters, & Gassner (2021). + "An entropy stable nodal discontinuous Galerkin method for the resistive MHD equations. + Part II: Subcell finite volume shock capturing" + [JCP: 2021.110580](https://doi.org/10.1016/j.jcp.2021.110580) +""" +@inline function reconstruction_O2_full(u_ll, u_lr, u_rl, u_rr, + x_interfaces, node_index, + limiter, dg::DGSEM) + @unpack nodes = dg.basis + x_lr = nodes[node_index - 1] + x_rl = nodes[node_index] + + # Slope between "middle" nodes + s_m = (u_rl - u_lr) / (x_rl - x_lr) + + if node_index == 2 # Catch case ll == lr + s_l = s_m # Use unlimited "central" slope + else + x_ll = nodes[node_index - 2] + # Slope between "left" nodes + s_lr = (u_lr - u_ll) / (x_lr - x_ll) + # Select slope between extrapolated (left) and crossing (middle) slope + s_l = limiter.(s_lr, s_m) + end + + if node_index == nnodes(dg) # Catch case rl == rr + s_r = s_m # Use unlimited "central" slope + else + x_rr = nodes[node_index + 1] + # Slope between "right" nodes + s_rl = (u_rr - u_rl) / (x_rr - x_rl) + # Select slope between crossing (middle) and extrapolated (right) slope + s_r = limiter.(s_m, s_rl) + end + + return reconstruction_linear(u_lr, u_rl, s_l, s_r, + x_lr, x_rl, x_interfaces, node_index) +end + +""" + reconstruction_O2_inner(u_ll, u_lr, u_rl, u_rr, + x_interfaces, node_index, + limiter, dg::DGSEM) + +Returns the reconstructed values `u_lr, u_rl` at the interface `x_interfaces[node_index - 1]`. +Computes limited (linear) slopes on the *inner* subcells for a DGSEM element. +Supposed to be used in conjunction with [`VolumeIntegralPureLGLFiniteVolumeO2`](@ref). + +The supplied `limiter` governs the choice of slopes given the nodal values +`u_ll`, `u_lr`, `u_rl`, and `u_rr` at the (Gauss-Lobatto Legendre) nodes. +Total-Variation-Diminishing (TVD) choices for the limiter are + 1) [`minmod`](@ref) + 2) [`monotonized_central`](@ref) + 3) [`superbee`](@ref) + 4) [`vanLeer`](@ref) + +For the outer, i.e., boundary subcells, constant values are used, i.e, no reconstruction. +This reduces the order of the scheme below 2. +This approach corresponds to equation (78) described in +- Rueda-Ramírez, Hennemann, Hindenlang, Winters, & Gassner (2021). + "An entropy stable nodal discontinuous Galerkin method for the resistive MHD equations. + Part II: Subcell finite volume shock capturing" + [JCP: 2021.110580](https://doi.org/10.1016/j.jcp.2021.110580) +""" +@inline function reconstruction_O2_inner(u_ll, u_lr, u_rl, u_rr, + x_interfaces, node_index, + limiter, dg::DGSEM) + @unpack nodes = dg.basis + x_lr = nodes[node_index - 1] + x_rl = nodes[node_index] + + # Slope between "middle" nodes + s_m = (u_rl - u_lr) / (x_rl - x_lr) + + if node_index == 2 # Catch case ll == lr + # Do not reconstruct at the boundary + s_l = zero(s_m) + else + x_ll = nodes[node_index - 2] + # Slope between "left" nodes + s_lr = (u_lr - u_ll) / (x_lr - x_ll) + # Select slope between extrapolated (left) and crossing (middle) slope + s_l = limiter.(s_lr, s_m) + end + + if node_index == nnodes(dg) # Catch case rl == rr + # Do not reconstruct at the boundary + s_r = zero(s_m) + else + x_rr = nodes[node_index + 1] + # Slope between "right" nodes + s_rl = (u_rr - u_rl) / (x_rr - x_rl) + # Select slope between crossing (middle) and extrapolated (right) slope + s_r = limiter.(s_m, s_rl) + end + + return reconstruction_linear(u_lr, u_rl, s_l, s_r, + x_lr, x_rl, x_interfaces, node_index) +end + +""" + central_slope(sl, sr) + +Central, non-TVD reconstruction given left and right slopes `sl` and `sr`. +Gives formally full order of accuracy at the expense of sacrificed nonlinear stability. +Similar in spirit to [`flux_central`](@ref). +""" +@inline function central_slope(sl, sr) + return 0.5f0 * (sl + sr) +end + +""" + minmod(sl, sr) + +Classic minmod limiter function for a TVD reconstruction given left and right slopes `sl` and `sr`. +There are many different ways how the minmod limiter can be implemented. +For reference, see for instance Eq. (6.27) in + +- Randall J. LeVeque (2002) + Finite Volume Methods for Hyperbolic Problems + [DOI: 10.1017/CBO9780511791253](https://doi.org/10.1017/CBO9780511791253) +""" +@inline function minmod(sl, sr) + return 0.5f0 * (sign(sl) + sign(sr)) * min(abs(sl), abs(sr)) +end + +""" + monotonized_central(sl, sr) + +Monotonized central limiter function for a TVD reconstruction given left and right slopes `sl` and `sr`. +There are many different ways how the monotonized central limiter can be implemented. +For reference, see for instance Eq. (6.29) in + +- Randall J. LeVeque (2002) + Finite Volume Methods for Hyperbolic Problems + [DOI: 10.1017/CBO9780511791253](https://doi.org/10.1017/CBO9780511791253) +""" +@inline function monotonized_central(sl, sr) + # Use recursive property of minmod function + return minmod(0.5f0 * (sl + sr), minmod(2 * sl, 2 * sr)) +end + +""" + superbee(sl, sr) + +Superbee limiter function for a TVD reconstruction given left and right slopes `sl` and `sr`. +There are many different ways how the superbee limiter can be implemented. +For reference, see for instance Eq. (6.28) in + +- Randall J. LeVeque (2002) + Finite Volume Methods for Hyperbolic Problems + [DOI: 10.1017/CBO9780511791253](https://doi.org/10.1017/CBO9780511791253) +""" +@inline function superbee(sl, sr) + return maxmod(minmod(sl, 2 * sr), minmod(2 * sl, sr)) +end + +""" + vanLeer(sl, sr) + +Symmetric limiter by van Leer. +See for reference page 70 in + +- Siddhartha Mishra, Ulrik Skre Fjordholm and Rémi Abgrall + Numerical methods for conservation laws and related equations. + [Link](https://metaphor.ethz.ch/x/2019/hs/401-4671-00L/literature/mishra_hyperbolic_pdes.pdf) +""" +@inline function vanLeer(sl, sr) + if abs(sl) + abs(sr) > zero(sl) + return (abs(sr) * sl + abs(sl) * sr) / (abs(sl) + abs(sr)) + else + return zero(sl) + end +end diff --git a/src/solvers/dgsem_tree/subcell_limiters_2d.jl b/src/solvers/dgsem_tree/subcell_limiters_2d.jl index c8e0373d9b6..cca91aa94b0 100644 --- a/src/solvers/dgsem_tree/subcell_limiters_2d.jl +++ b/src/solvers/dgsem_tree/subcell_limiters_2d.jl @@ -233,7 +233,7 @@ end semi, variable) mesh, equations, dg, cache = mesh_equations_solver_cache(semi) (; antidiffusive_flux1_L, antidiffusive_flux2_L, antidiffusive_flux1_R, antidiffusive_flux2_R) = cache.antidiffusive_fluxes - (; inverse_weights) = dg.basis + (; inverse_weights) = dg.basis # Plays role of inverse DG-subcell sizes (; variable_bounds) = limiter.cache.subcell_limiter_coefficients variable_string = string(variable) diff --git a/src/solvers/dgsem_unstructured/dg_2d.jl b/src/solvers/dgsem_unstructured/dg_2d.jl index e17197f843d..b5367b45d72 100644 --- a/src/solvers/dgsem_unstructured/dg_2d.jl +++ b/src/solvers/dgsem_unstructured/dg_2d.jl @@ -503,7 +503,7 @@ function calc_surface_integral!(du, u, mesh::UnstructuredMesh2D, end # This routine computes the maximum value of the discrete metric identities necessary to ensure -# that the approxmiation will be free-stream preserving (i.e. a constant solution remains constant) +# that the approximation will be free-stream preserving (i.e. a constant solution remains constant) # on a curvilinear mesh. # Note! Independent of the equation system and is only a check on the discrete mapping terms. # Can be used for a metric identities check on StructuredMesh{2} or UnstructuredMesh2D diff --git a/test/test_parabolic_2d.jl b/test/test_parabolic_2d.jl index 0d23b43ef4b..75f728ef6da 100644 --- a/test/test_parabolic_2d.jl +++ b/test/test_parabolic_2d.jl @@ -714,17 +714,21 @@ end @test_trixi_include(joinpath(EXAMPLES_DIR, "p4est_2d_dgsem", "elixir_navierstokes_viscous_shock_newton_krylov.jl"), tspan=(0.0, 0.1), + atol_lin_solve=1e-11, + rtol_lin_solve=1e-11, + atol_ode_solve=1e-10, + rtol_ode_solve=1e-10, l2=[ - 3.468233560427797e-5, - 2.64864594855224e-5, - 7.879490760481979e-10, - 2.8748482665365446e-5 + 3.428501006908931e-5, + 2.5967418005884837e-5, + 2.7084890458524478e-17, + 2.855861765163304e-5 ], linf=[ - 0.00018754529350140103, - 0.00014045634087878067, - 9.043610782328732e-9, - 0.00014499382160382268 + 0.00018762342908784646, + 0.0001405900207752664, + 3.661971738081151e-16, + 0.00014510700486747297 ]) # Ensure that we do not have excessive memory allocations # (e.g., from type instabilities) @@ -884,19 +888,19 @@ end @test_trixi_include(joinpath(EXAMPLES_DIR, "p4est_2d_dgsem", "elixir_navierstokes_blast_reflective.jl"), l2=[ - 0.08271777454941344, - 0.10020048140682014, - 0.10020048140682006, - 0.5954017435122945 + 0.015140702486341239, + 0.035675739843665635, + 0.035675739843665615, + 0.21415725909973524 ], linf=[ - 0.4785944470287504, - 0.7205772140501768, - 0.7205772140501767, - 3.25120873497427 + 0.2339198598727935, + 0.5951310665112189, + 0.5951310665112187, + 3.0106576605775333 ], - tspan=(0.0, 0.05), - abstol=1e-7, reltol=1e-7) + tspan=(0.0, 0.01), + abstol=1e-11, reltol=1e-11) # Ensure that we do not have excessive memory allocations # (e.g., from type instabilities) @test_allocations(Trixi.rhs!, semi, sol, 1000) diff --git a/test/test_structured_1d.jl b/test/test_structured_1d.jl index 04398b5ed9a..daf8ac6e1af 100644 --- a/test/test_structured_1d.jl +++ b/test/test_structured_1d.jl @@ -149,6 +149,27 @@ end @test_allocations(Trixi.rhs!, semi, sol, 1000) end +@trixi_testset "elixir_euler_source_terms_nonperiodic_fvO2.jl" begin + @test_trixi_include(joinpath(EXAMPLES_DIR, + "elixir_euler_source_terms_nonperiodic_fvO2.jl"), + l2=[ + 0.0005159476609077155, + 0.000649450399792432, + 0.0010602371635625239 + ], + linf=[ + 0.0017927309507015377, + 0.001662532939591621, + 0.004580416775184837 + ]) + # Ensure that we do not have excessive memory allocations + # (e.g., from type instabilities) + @test_allocations(Trixi.rhs!, semi, sol, 1000) + + # Test/cover `:compact` printing + show(IOContext(IOBuffer(), :compact => true), MIME"text/plain"(), volume_integral) +end + @trixi_testset "elixir_euler_weak_blast_er.jl" begin @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_euler_weak_blast_er.jl"), diff --git a/test/test_tree_1d_euler.jl b/test/test_tree_1d_euler.jl index b110c4fa465..614dcc1b370 100644 --- a/test/test_tree_1d_euler.jl +++ b/test/test_tree_1d_euler.jl @@ -55,6 +55,27 @@ end @test_allocations(Trixi.rhs!, semi, sol, 1000) end +@trixi_testset "elixir_euler_convergence_pure_fv.jl (O2, constant reconstruction)" begin + @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_euler_convergence_pure_fv.jl"), + volume_integral=VolumeIntegralPureLGLFiniteVolumeO2(LobattoLegendreBasis(3), + flux_hllc, + reconstruction_mode = reconstruction_constant, + slope_limiter = central_slope), + l2=[ + 0.019355699748523896, + 0.022326984561234497, + 0.02523665947241734 + ], + linf=[ + 0.02895961127645519, + 0.03293442484199227, + 0.04246098278632804 + ]) + # Ensure that we do not have excessive memory allocations + # (e.g., from type instabilities) + @test_allocations(Trixi.rhs!, semi, sol, 1000) +end + @trixi_testset "elixir_euler_density_wave.jl" begin @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_euler_density_wave.jl"), l2=[ @@ -431,6 +452,23 @@ end @test_allocations(Trixi.rhs!, semi, sol, 1000) end +@trixi_testset "elixir_euler_convergence_pure_fvO2.jl" begin + @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_euler_convergence_pure_fvO2.jl"), + l2=[ + 0.0004651066144227485, + 0.0005058715155540577, + 0.0007705686813156139 + ], + linf=[ + 0.0014354711538595577, + 0.0014154880871579678, + 0.0027044481967184453 + ]) + # Ensure that we do not have excessive memory allocations + # (e.g., from type instabilities) + @test_allocations(Trixi.rhs!, semi, sol, 1000) +end + @trixi_testset "elixir_euler_laplace_diffusion.jl" begin @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_euler_laplace_diffusion.jl"), l2=[0.10954500481114468, diff --git a/test/test_unit.jl b/test/test_unit.jl index 54403a3e3c2..3123b4022de 100644 --- a/test/test_unit.jl +++ b/test/test_unit.jl @@ -2405,6 +2405,58 @@ end 1.803e-5, atol = 5e-8) end +@testset "Slope Limiters" begin + sl = 1.0 + sr = -1.0 + + # Test for code coverage + dummy = 42 + @test reconstruction_constant(dummy, sl, sr, dummy, dummy, dummy, dummy, dummy) == + (sl, sr) + + @test minmod(sl, sr) == 0.0 + @test monotonized_central(sl, sr) == 0.0 + @test superbee(sl, sr) == 0.0 + @test vanLeer(sl, sr) == 0.0 + + sr = 0.5 + @test minmod(sl, sr) == 0.5 + @test monotonized_central(sl, sr) == 0.75 + @test superbee(sl, sr) == 1.0 + @test isapprox(vanLeer(sl, sr), 2 / 3) + + sl = -1.0 + sr = 0.0 + @test minmod(sl, sr) == 0.0 + @test monotonized_central(sl, sr) == 0.0 + @test superbee(sl, sr) == 0.0 + @test vanLeer(sl, sr) == 0.0 + + sr = -0.8 + @test minmod(sl, sr) == -0.8 + @test monotonized_central(sl, sr) == -0.9 + @test superbee(sl, sr) == -1.0 + @test isapprox(vanLeer(sl, sr), -8 / 9) + + # Test symmetry + @test minmod(sr, sl) == -0.8 + @test monotonized_central(sr, sl) == -0.9 + @test superbee(sr, sl) == -1.0 + @test isapprox(vanLeer(sr, sl), -8 / 9) + + sl = 1.0 + sr = 0.0 + @test minmod(sl, sr) == 0.0 + @test monotonized_central(sl, sr) == 0.0 + @test superbee(sl, sr) == 0.0 + @test vanLeer(sl, sr) == 0.0 + + @test central_slope(sl, sr) == 0.5 + + # Test van Leer zero case + @test vanLeer(0.0, 0.0) == 0.0 +end + # Velocity functions are present in many equations and are tested here @testset "Velocity functions for different equations" begin gamma = 1.4 From 013244d1bcfee588809908bd7bb865880add9f4b Mon Sep 17 00:00:00 2001 From: Benedict <135045760+benegee@users.noreply.github.com> Date: Wed, 8 Oct 2025 09:56:16 +0200 Subject: [PATCH 066/158] Apply suggestions from code review Co-authored-by: Valentin Churavy --- src/callbacks_step/save_solution.jl | 6 ++---- src/callbacks_step/stepsize_dg1d.jl | 12 ++++++------ src/callbacks_step/stepsize_dg2d.jl | 8 ++++---- src/callbacks_step/stepsize_dg3d.jl | 11 +++++------ 4 files changed, 17 insertions(+), 20 deletions(-) diff --git a/src/callbacks_step/save_solution.jl b/src/callbacks_step/save_solution.jl index 71196d6fe1f..a74d374390f 100644 --- a/src/callbacks_step/save_solution.jl +++ b/src/callbacks_step/save_solution.jl @@ -287,10 +287,8 @@ end system = "") # TODO GPU currently on CPU backend = trixi_backend(_u_ode) - if backend isa Nothing # TODO GPU KA CPU backend - u_ode = _u_ode - else - u_ode = Array(_u_ode) + if backend !== nothing + u_ode = Array(u_ode) end mesh, equations, solver, cache = mesh_equations_solver_cache(semi) u = wrap_array_native(u_ode, mesh, equations, solver, cache) diff --git a/src/callbacks_step/stepsize_dg1d.jl b/src/callbacks_step/stepsize_dg1d.jl index e0cac1ce57c..613bf3198b2 100644 --- a/src/callbacks_step/stepsize_dg1d.jl +++ b/src/callbacks_step/stepsize_dg1d.jl @@ -5,7 +5,7 @@ @muladd begin #! format: noindent -function max_dt(backend, u, t, mesh::TreeMesh{1}, +function max_dt(backend::Nothing, u, t, mesh::TreeMesh{1}, constant_speed::False, equations, dg::DG, cache) # to avoid a division by zero if the speed vanishes everywhere, @@ -29,7 +29,7 @@ function max_dt(backend, u, t, mesh::TreeMesh{1}, return 2 / (nnodes(dg) * max_scaled_speed) end -function max_dt(backend, u, t, mesh::TreeMesh{1}, +function max_dt(backend::Nothing, u, t, mesh::TreeMesh{1}, constant_diffusivity::False, equations, equations_parabolic::AbstractEquationsParabolic, dg::DG, cache) @@ -52,7 +52,7 @@ function max_dt(backend, u, t, mesh::TreeMesh{1}, return 4 / (nnodes(dg) * max_scaled_speed) end -function max_dt(backend, u, t, mesh::TreeMesh{1}, +function max_dt(backend::Nothing, u, t, mesh::TreeMesh{1}, constant_speed::True, equations, dg::DG, cache) # to avoid a division by zero if the speed vanishes everywhere, @@ -72,7 +72,7 @@ function max_dt(backend, u, t, mesh::TreeMesh{1}, return 2 / (nnodes(dg) * max_scaled_speed) end -function max_dt(backend, u, t, mesh::TreeMesh{1}, +function max_dt(backend::Nothing, u, t, mesh::TreeMesh{1}, constant_diffusivity::True, equations, equations_parabolic::AbstractEquationsParabolic, dg::DG, cache) @@ -91,7 +91,7 @@ function max_dt(backend, u, t, mesh::TreeMesh{1}, return 4 / (nnodes(dg) * max_scaled_speed) end -function max_dt(backend, u, t, mesh::StructuredMesh{1}, +function max_dt(backend::Nothing, u, t, mesh::StructuredMesh{1}, constant_speed::False, equations, dg::DG, cache) # to avoid a division by zero if the speed vanishes everywhere, @@ -119,7 +119,7 @@ function max_dt(backend, u, t, mesh::StructuredMesh{1}, return 2 / (nnodes(dg) * max_scaled_speed) end -function max_dt(backend, u, t, mesh::StructuredMesh{1}, +function max_dt(backend::Nothing, u, t, mesh::StructuredMesh{1}, constant_speed::True, equations, dg::DG, cache) # to avoid a division by zero if the speed vanishes everywhere, diff --git a/src/callbacks_step/stepsize_dg2d.jl b/src/callbacks_step/stepsize_dg2d.jl index fe30e5019b7..a5d5ba53c2a 100644 --- a/src/callbacks_step/stepsize_dg2d.jl +++ b/src/callbacks_step/stepsize_dg2d.jl @@ -5,7 +5,7 @@ @muladd begin #! format: noindent -function max_dt(backend, u, t, mesh::TreeMesh{2}, +function max_dt(backend::Nothing, u, t, mesh::TreeMesh{2}, constant_speed::False, equations, dg::DG, cache) # to avoid a division by zero if the speed vanishes everywhere, # e.g. for steady-state linear advection @@ -29,7 +29,7 @@ function max_dt(backend, u, t, mesh::TreeMesh{2}, return 2 / (nnodes(dg) * max_scaled_speed) end -function max_dt(backend, u, t, mesh::TreeMesh{2}, +function max_dt(backend::Nothing, u, t, mesh::TreeMesh{2}, constant_speed::True, equations, dg::DG, cache) # to avoid a division by zero if the speed vanishes everywhere, # e.g. for steady-state linear advection @@ -82,7 +82,7 @@ function max_dt(backend, u, t, mesh::ParallelTreeMesh{2}, return dt end -function max_dt(backend, u, t, +function max_dt(backend::Nothing, u, t, mesh::Union{StructuredMesh{2}, UnstructuredMesh2D, P4estMesh{2}, T8codeMesh{2}, StructuredMeshView{2}}, constant_speed::False, equations, dg::DG, cache) @@ -120,7 +120,7 @@ function max_dt(backend, u, t, return 2 / (nnodes(dg) * max_scaled_speed) end -function max_dt(backend, u, t, +function max_dt(backend::Nothing, u, t, mesh::Union{StructuredMesh{2}, UnstructuredMesh2D, P4estMesh{2}, P4estMeshView{2}, T8codeMesh{2}, StructuredMeshView{2}}, constant_speed::True, equations, dg::DG, cache) diff --git a/src/callbacks_step/stepsize_dg3d.jl b/src/callbacks_step/stepsize_dg3d.jl index 8cdc7d74487..c211f765a93 100644 --- a/src/callbacks_step/stepsize_dg3d.jl +++ b/src/callbacks_step/stepsize_dg3d.jl @@ -5,7 +5,7 @@ @muladd begin #! format: noindent -function max_dt(backend, u, t, mesh::TreeMesh{3}, +function max_dt(backend::Nothing, u, t, mesh::TreeMesh{3}, constant_speed::False, equations, dg::DG, cache) # to avoid a division by zero if the speed vanishes everywhere, # e.g. for steady-state linear advection @@ -31,7 +31,7 @@ function max_dt(backend, u, t, mesh::TreeMesh{3}, return 2 / (nnodes(dg) * max_scaled_speed) end -function max_dt(backend, u, t, mesh::TreeMesh{3}, +function max_dt(backend::Nothing, u, t, mesh::TreeMesh{3}, constant_speed::True, equations, dg::DG, cache) # to avoid a division by zero if the speed vanishes everywhere, # e.g. for steady-state linear advection @@ -135,10 +135,9 @@ function max_dt(backend, u, t, # e.g. for steady-state linear advection max_scaled_speed = nextfloat(zero(t)) - if backend isa Nothing # TODO GPU KA CPU backend as well - @unpack contravariant_vectors, inverse_jacobian = cache.elements - else - # TODO GPU is this sufficient? + @unpack contravariant_vectors, inverse_jacobian = cache.elements + if backend !== nothing + # TODO: Port to GPU contravariant_vectors = Array(cache.elements.contravariant_vectors) inverse_jacobian = Array(cache.elements.inverse_jacobian) end From 8a98d27940a3cdd947f88f0358c35ee513e45d86 Mon Sep 17 00:00:00 2001 From: Benedict Geihe Date: Wed, 8 Oct 2025 10:00:29 +0200 Subject: [PATCH 067/158] !fixup --- src/callbacks_step/save_solution.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/callbacks_step/save_solution.jl b/src/callbacks_step/save_solution.jl index a74d374390f..12f63792281 100644 --- a/src/callbacks_step/save_solution.jl +++ b/src/callbacks_step/save_solution.jl @@ -280,13 +280,13 @@ end return nothing end -@inline function save_solution_file(_u_ode, t, dt, iter, +@inline function save_solution_file(u_ode, t, dt, iter, semi::AbstractSemidiscretization, solution_callback, element_variables = Dict{Symbol, Any}(), node_variables = Dict{Symbol, Any}(); system = "") # TODO GPU currently on CPU - backend = trixi_backend(_u_ode) + backend = trixi_backend(u_ode) if backend !== nothing u_ode = Array(u_ode) end From 7de1e571b08623234735d3d769e96318966e484e Mon Sep 17 00:00:00 2001 From: Benedict Geihe Date: Wed, 8 Oct 2025 10:12:19 +0200 Subject: [PATCH 068/158] fmt --- src/callbacks_step/stepsize_dg3d.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/callbacks_step/stepsize_dg3d.jl b/src/callbacks_step/stepsize_dg3d.jl index c211f765a93..b3fdd3d9807 100644 --- a/src/callbacks_step/stepsize_dg3d.jl +++ b/src/callbacks_step/stepsize_dg3d.jl @@ -136,7 +136,7 @@ function max_dt(backend, u, t, max_scaled_speed = nextfloat(zero(t)) @unpack contravariant_vectors, inverse_jacobian = cache.elements - if backend !== nothing + if backend !== nothing # TODO: Port to GPU contravariant_vectors = Array(cache.elements.contravariant_vectors) inverse_jacobian = Array(cache.elements.inverse_jacobian) From 31a65cb2acb608de40ea63452a6e22a38a0b249d Mon Sep 17 00:00:00 2001 From: Benedict Geihe Date: Wed, 8 Oct 2025 11:27:38 +0200 Subject: [PATCH 069/158] pass backend through --- src/callbacks_step/stepsize_dg2d.jl | 16 ++++++++-------- src/callbacks_step/stepsize_dg3d.jl | 16 ++++++++-------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/src/callbacks_step/stepsize_dg2d.jl b/src/callbacks_step/stepsize_dg2d.jl index a5d5ba53c2a..a6c217f2885 100644 --- a/src/callbacks_step/stepsize_dg2d.jl +++ b/src/callbacks_step/stepsize_dg2d.jl @@ -55,10 +55,10 @@ function max_dt(backend, u, t, mesh::ParallelTreeMesh{2}, # and create some MPI array type, overloading broadcasting and mapreduce etc. # Then, this specific array type should also work well with DiffEq etc. dt = invoke(max_dt, - Tuple{typeof(u), typeof(t), TreeMesh{2}, + Tuple{typeof(backend), typeof(u), typeof(t), TreeMesh{2}, typeof(constant_speed), typeof(equations), typeof(dg), typeof(cache)}, - u, t, mesh, constant_speed, equations, dg, cache) + backend, u, t, mesh, constant_speed, equations, dg, cache) # Base.min instead of min needed, see comment in src/auxiliary/math.jl dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[] @@ -72,10 +72,10 @@ function max_dt(backend, u, t, mesh::ParallelTreeMesh{2}, # and create some MPI array type, overloading broadcasting and mapreduce etc. # Then, this specific array type should also work well with DiffEq etc. dt = invoke(max_dt, - Tuple{typeof(u), typeof(t), TreeMesh{2}, + Tuple{typeof(backend), typeof(u), typeof(t), TreeMesh{2}, typeof(constant_speed), typeof(equations), typeof(dg), typeof(cache)}, - u, t, mesh, constant_speed, equations, dg, cache) + backend, u, t, mesh, constant_speed, equations, dg, cache) # Base.min instead of min needed, see comment in src/auxiliary/math.jl dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[] @@ -161,10 +161,10 @@ function max_dt(backend, u, t, mesh::ParallelP4estMesh{2}, # and create some MPI array type, overloading broadcasting and mapreduce etc. # Then, this specific array type should also work well with DiffEq etc. dt = invoke(max_dt, - Tuple{typeof(u), typeof(t), P4estMesh{2}, + Tuple{typeof(backend), typeof(u), typeof(t), P4estMesh{2}, typeof(constant_speed), typeof(equations), typeof(dg), typeof(cache)}, - u, t, mesh, constant_speed, equations, dg, cache) + backend, u, t, mesh, constant_speed, equations, dg, cache) # Base.min instead of min needed, see comment in src/auxiliary/math.jl dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[] @@ -178,10 +178,10 @@ function max_dt(backend, u, t, mesh::ParallelP4estMesh{2}, # and create some MPI array type, overloading broadcasting and mapreduce etc. # Then, this specific array type should also work well with DiffEq etc. dt = invoke(max_dt, - Tuple{typeof(u), typeof(t), P4estMesh{2}, + Tuple{typeof(backend), typeof(u), typeof(t), P4estMesh{2}, typeof(constant_speed), typeof(equations), typeof(dg), typeof(cache)}, - u, t, mesh, constant_speed, equations, dg, cache) + backend, u, t, mesh, constant_speed, equations, dg, cache) # Base.min instead of min needed, see comment in src/auxiliary/math.jl dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[] diff --git a/src/callbacks_step/stepsize_dg3d.jl b/src/callbacks_step/stepsize_dg3d.jl index b3fdd3d9807..1f67dfe7fc2 100644 --- a/src/callbacks_step/stepsize_dg3d.jl +++ b/src/callbacks_step/stepsize_dg3d.jl @@ -180,10 +180,10 @@ function max_dt(backend, u, t, mesh::ParallelP4estMesh{3}, # and create some MPI array type, overloading broadcasting and mapreduce etc. # Then, this specific array type should also work well with DiffEq etc. dt = invoke(max_dt, - Tuple{typeof(u), typeof(t), P4estMesh{3}, + Tuple{typeof(backend), typeof(u), typeof(t), P4estMesh{3}, typeof(constant_speed), typeof(equations), typeof(dg), typeof(cache)}, - u, t, mesh, constant_speed, equations, dg, cache) + backend, u, t, mesh, constant_speed, equations, dg, cache) # Base.min instead of min needed, see comment in src/auxiliary/math.jl dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[] @@ -197,10 +197,10 @@ function max_dt(backend, u, t, mesh::ParallelP4estMesh{3}, # and create some MPI array type, overloading broadcasting and mapreduce etc. # Then, this specific array type should also work well with DiffEq etc. dt = invoke(max_dt, - Tuple{typeof(u), typeof(t), P4estMesh{3}, + Tuple{typeof(backend), typeof(u), typeof(t), P4estMesh{3}, typeof(constant_speed), typeof(equations), typeof(dg), typeof(cache)}, - u, t, mesh, constant_speed, equations, dg, cache) + backend, u, t, mesh, constant_speed, equations, dg, cache) # Base.min instead of min needed, see comment in src/auxiliary/math.jl dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[] @@ -214,10 +214,10 @@ function max_dt(backend, u, t, mesh::ParallelT8codeMesh{3}, # and create some MPI array type, overloading broadcasting and mapreduce etc. # Then, this specific array type should also work well with DiffEq etc. dt = invoke(max_dt, - Tuple{typeof(u), typeof(t), T8codeMesh{3}, + Tuple{typeof(backend), typeof(u), typeof(t), T8codeMesh{3}, typeof(constant_speed), typeof(equations), typeof(dg), typeof(cache)}, - u, t, mesh, constant_speed, equations, dg, cache) + backend, u, t, mesh, constant_speed, equations, dg, cache) # Base.min instead of min needed, see comment in src/auxiliary/math.jl dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[] @@ -231,10 +231,10 @@ function max_dt(backend, u, t, mesh::ParallelT8codeMesh{3}, # and create some MPI array type, overloading broadcasting and mapreduce etc. # Then, this specific array type should also work well with DiffEq etc. dt = invoke(max_dt, - Tuple{typeof(u), typeof(t), T8codeMesh{3}, + Tuple{typeof(backend), typeof(u), typeof(t), T8codeMesh{3}, typeof(constant_speed), typeof(equations), typeof(dg), typeof(cache)}, - u, t, mesh, constant_speed, equations, dg, cache) + backend, u, t, mesh, constant_speed, equations, dg, cache) # Base.min instead of min needed, see comment in src/auxiliary/math.jl dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[] From 4064e79478a7b1a452bb7f9a63fb040d9bc83e9f Mon Sep 17 00:00:00 2001 From: Benedict Geihe Date: Wed, 8 Oct 2025 11:28:30 +0200 Subject: [PATCH 070/158] fixes --- src/solvers/dgsem_p4est/dg_2d.jl | 2 +- src/solvers/dgsem_structured/dg.jl | 2 +- src/solvers/dgsem_tree/dg_1d.jl | 6 ++++-- src/solvers/dgsem_tree/dg_2d.jl | 5 +++-- src/solvers/dgsem_tree/dg_3d.jl | 3 ++- src/solvers/fdsbp_tree/fdsbp_2d.jl | 8 ++++---- src/solvers/fdsbp_tree/fdsbp_3d.jl | 8 ++++---- 7 files changed, 19 insertions(+), 15 deletions(-) diff --git a/src/solvers/dgsem_p4est/dg_2d.jl b/src/solvers/dgsem_p4est/dg_2d.jl index b417e87a77d..87565720c99 100644 --- a/src/solvers/dgsem_p4est/dg_2d.jl +++ b/src/solvers/dgsem_p4est/dg_2d.jl @@ -63,7 +63,7 @@ end end end -function prolong2interfaces!(cache, u, +function prolong2interfaces!(backend::Nothing, cache, u, mesh::Union{P4estMesh{2}, P4estMeshView{2}, T8codeMesh{2}}, equations, dg::DG) @unpack interfaces = cache diff --git a/src/solvers/dgsem_structured/dg.jl b/src/solvers/dgsem_structured/dg.jl index 6cc2791c27e..17bd6dd0f20 100644 --- a/src/solvers/dgsem_structured/dg.jl +++ b/src/solvers/dgsem_structured/dg.jl @@ -35,7 +35,7 @@ function calc_boundary_flux!(cache, u, t, boundary_condition::BoundaryConditionP @assert isperiodic(mesh) end -function rhs!(du, u, t, +function rhs!(backend, du, u, t, mesh::Union{StructuredMesh, StructuredMeshView{2}}, equations, boundary_conditions, source_terms::Source, dg::DG, cache) where {Source} diff --git a/src/solvers/dgsem_tree/dg_1d.jl b/src/solvers/dgsem_tree/dg_1d.jl index 986bc6d6830..d7e8c0e8464 100644 --- a/src/solvers/dgsem_tree/dg_1d.jl +++ b/src/solvers/dgsem_tree/dg_1d.jl @@ -238,7 +238,8 @@ end return nothing end -function calc_volume_integral!(du, u, mesh::Union{TreeMesh{1}, StructuredMesh{1}}, +function calc_volume_integral!(backend::Nothing, du, u, + mesh::Union{TreeMesh{1}, StructuredMesh{1}}, have_nonconservative_terms, equations, volume_integral::VolumeIntegralPureLGLFiniteVolumeO2, dg::DGSEM, cache) @@ -404,7 +405,8 @@ end return nothing end -function prolong2interfaces!(cache, u, mesh::TreeMesh{1}, equations, dg::DG) +function prolong2interfaces!(backend::Nothing, cache, u, mesh::TreeMesh{1}, equations, + dg::DG) @unpack interfaces = cache @unpack neighbor_ids = interfaces interfaces_u = interfaces.u diff --git a/src/solvers/dgsem_tree/dg_2d.jl b/src/solvers/dgsem_tree/dg_2d.jl index 0d1b3c885b8..1d8b6f65f8d 100644 --- a/src/solvers/dgsem_tree/dg_2d.jl +++ b/src/solvers/dgsem_tree/dg_2d.jl @@ -120,7 +120,7 @@ function rhs!(backend, du, u, t, # Prolong solution to interfaces @trixi_timeit timer() "prolong2interfaces" begin - prolong2interfaces!(cache, u, mesh, equations, dg) + prolong2interfaces!(backend, cache, u, mesh, equations, dg) end # Calculate interface fluxes @@ -439,7 +439,8 @@ end return nothing end -function prolong2interfaces!(cache, u, mesh::TreeMesh{2}, equations, dg::DG) +function prolong2interfaces!(backend::Nothing, cache, u, mesh::TreeMesh{2}, equations, + dg::DG) @unpack interfaces = cache @unpack orientations, neighbor_ids = interfaces interfaces_u = interfaces.u diff --git a/src/solvers/dgsem_tree/dg_3d.jl b/src/solvers/dgsem_tree/dg_3d.jl index 664a8e168ef..b04fd0f885b 100644 --- a/src/solvers/dgsem_tree/dg_3d.jl +++ b/src/solvers/dgsem_tree/dg_3d.jl @@ -433,7 +433,8 @@ end return nothing end -function prolong2interfaces!(backend, cache, u, mesh::TreeMesh{3}, equations, dg::DG) +function prolong2interfaces!(backend::Nothing, cache, u, mesh::TreeMesh{3}, equations, + dg::DG) @unpack interfaces = cache @unpack orientations, neighbor_ids = interfaces interfaces_u = interfaces.u diff --git a/src/solvers/fdsbp_tree/fdsbp_2d.jl b/src/solvers/fdsbp_tree/fdsbp_2d.jl index 6f642ef1ab6..132b5161e78 100644 --- a/src/solvers/fdsbp_tree/fdsbp_2d.jl +++ b/src/solvers/fdsbp_tree/fdsbp_2d.jl @@ -159,7 +159,7 @@ function calc_volume_integral!(backend::Nothing, du, u, return nothing end -function calc_surface_integral!(du, u, mesh::TreeMesh{2}, +function calc_surface_integral!(backend::Nothing, du, u, mesh::TreeMesh{2}, equations, surface_integral::SurfaceIntegralStrongForm, dg::DG, cache) inv_weight_left = inv(left_boundary_weight(dg.basis)) @@ -202,7 +202,7 @@ function calc_surface_integral!(du, u, mesh::TreeMesh{2}, end # Periodic FDSBP operators need to use a single element without boundaries -function calc_surface_integral!(du, u, mesh::TreeMesh2D, +function calc_surface_integral!(backend::Nothing, du, u, mesh::TreeMesh2D, equations, surface_integral::SurfaceIntegralStrongForm, dg::PeriodicFDSBP, cache) @assert nelements(dg, cache) == 1 @@ -260,7 +260,7 @@ end # in the specialized `calc_interface_flux` routine. These SATs are still of # a strong form penalty type, except that the interior flux at a particular # side of the element are computed in the upwind direction. -function calc_surface_integral!(du, u, mesh::TreeMesh{2}, +function calc_surface_integral!(backend::Nothing, du, u, mesh::TreeMesh{2}, equations, surface_integral::SurfaceIntegralUpwind, dg::FDSBP, cache) inv_weight_left = inv(left_boundary_weight(dg.basis)) @@ -304,7 +304,7 @@ function calc_surface_integral!(du, u, mesh::TreeMesh{2}, end # Periodic FDSBP operators need to use a single element without boundaries -function calc_surface_integral!(du, u, mesh::TreeMesh2D, +function calc_surface_integral!(backend::Nothing, du, u, mesh::TreeMesh2D, equations, surface_integral::SurfaceIntegralUpwind, dg::PeriodicFDSBP, cache) @assert nelements(dg, cache) == 1 diff --git a/src/solvers/fdsbp_tree/fdsbp_3d.jl b/src/solvers/fdsbp_tree/fdsbp_3d.jl index 1eff0986e17..9fe7cd3044d 100644 --- a/src/solvers/fdsbp_tree/fdsbp_3d.jl +++ b/src/solvers/fdsbp_tree/fdsbp_3d.jl @@ -181,7 +181,7 @@ function calc_volume_integral!(backend::Nothing, du, u, return nothing end -function calc_surface_integral!(du, u, mesh::TreeMesh{3}, +function calc_surface_integral!(backend::Nothing, du, u, mesh::TreeMesh{3}, equations, surface_integral::SurfaceIntegralStrongForm, dg::DG, cache) inv_weight_left = inv(left_boundary_weight(dg.basis)) @@ -238,7 +238,7 @@ function calc_surface_integral!(du, u, mesh::TreeMesh{3}, end # Periodic FDSBP operators need to use a single element without boundaries -function calc_surface_integral!(du, u, mesh::TreeMesh3D, +function calc_surface_integral!(backend::Nothing, du, u, mesh::TreeMesh3D, equations, surface_integral::SurfaceIntegralStrongForm, dg::PeriodicFDSBP, cache) @assert nelements(dg, cache) == 1 @@ -297,7 +297,7 @@ end # in the specialized `calc_interface_flux` routine. These SATs are still of # a strong form penalty type, except that the interior flux at a particular # side of the element are computed in the upwind direction. -function calc_surface_integral!(du, u, mesh::TreeMesh{3}, +function calc_surface_integral!(backend::Nothing, du, u, mesh::TreeMesh{3}, equations, surface_integral::SurfaceIntegralUpwind, dg::FDSBP, cache) inv_weight_left = inv(left_boundary_weight(dg.basis)) @@ -355,7 +355,7 @@ function calc_surface_integral!(du, u, mesh::TreeMesh{3}, end # Periodic FDSBP operators need to use a single element without boundaries -function calc_surface_integral!(du, u, mesh::TreeMesh3D, +function calc_surface_integral!(backend::Nothing, du, u, mesh::TreeMesh3D, equations, surface_integral::SurfaceIntegralUpwind, dg::PeriodicFDSBP, cache) @assert nelements(dg, cache) == 1 From af50cda41b961227336402bc4080f0be9a73f122 Mon Sep 17 00:00:00 2001 From: Benedict Geihe Date: Wed, 8 Oct 2025 12:02:00 +0200 Subject: [PATCH 071/158] backends here and there --- src/solvers/dgsem_p4est/dg_2d.jl | 2 +- src/solvers/dgsem_p4est/dg_3d_parallel.jl | 10 ++++++---- src/solvers/dgsem_structured/dg.jl | 3 ++- src/solvers/dgsem_structured/dg_2d.jl | 2 +- src/solvers/dgsem_tree/dg_1d.jl | 2 +- src/solvers/dgsem_tree/dg_2d.jl | 11 ++++++----- src/solvers/dgsem_tree/dg_2d_parallel.jl | 9 +++++---- src/solvers/dgsem_tree/dg_3d.jl | 4 ++-- src/solvers/dgsem_unstructured/dg_2d.jl | 3 ++- src/solvers/fdsbp_tree/fdsbp_2d.jl | 2 +- src/solvers/fdsbp_tree/fdsbp_3d.jl | 2 +- 11 files changed, 28 insertions(+), 22 deletions(-) diff --git a/src/solvers/dgsem_p4est/dg_2d.jl b/src/solvers/dgsem_p4est/dg_2d.jl index 87565720c99..56b6568072d 100644 --- a/src/solvers/dgsem_p4est/dg_2d.jl +++ b/src/solvers/dgsem_p4est/dg_2d.jl @@ -119,7 +119,7 @@ function prolong2interfaces!(backend::Nothing, cache, u, return nothing end -function calc_interface_flux!(surface_flux_values, +function calc_interface_flux!(backend::Nothing, surface_flux_values, mesh::Union{P4estMesh{2}, P4estMeshView{2}, T8codeMesh{2}}, have_nonconservative_terms, diff --git a/src/solvers/dgsem_p4est/dg_3d_parallel.jl b/src/solvers/dgsem_p4est/dg_3d_parallel.jl index 616ce759486..188560fa95f 100644 --- a/src/solvers/dgsem_p4est/dg_3d_parallel.jl +++ b/src/solvers/dgsem_p4est/dg_3d_parallel.jl @@ -40,12 +40,12 @@ function rhs!(backend, du, u, t, # Prolong solution to interfaces @trixi_timeit timer() "prolong2interfaces" begin - prolong2interfaces!(cache, u, mesh, equations, dg) + prolong2interfaces!(backend, cache, u, mesh, equations, dg) end # Calculate interface fluxes @trixi_timeit timer() "interface flux" begin - calc_interface_flux!(cache.elements.surface_flux_values, mesh, + calc_interface_flux!(backend, cache.elements.surface_flux_values, mesh, have_nonconservative_terms(equations), equations, dg.surface_integral, dg, cache) end @@ -95,11 +95,13 @@ function rhs!(backend, du, u, t, # Calculate surface integrals @trixi_timeit timer() "surface integral" begin - calc_surface_integral!(du, u, mesh, equations, dg.surface_integral, dg, cache) + calc_surface_integral!(backend, du, u, mesh, equations, dg.surface_integral, dg, + cache) end # Apply Jacobian from mapping to reference element - @trixi_timeit timer() "Jacobian" apply_jacobian!(du, mesh, equations, dg, cache) + @trixi_timeit timer() "Jacobian" apply_jacobian!(backend, du, mesh, equations, dg, + cache) # Calculate source terms @trixi_timeit timer() "source terms" begin diff --git a/src/solvers/dgsem_structured/dg.jl b/src/solvers/dgsem_structured/dg.jl index 17bd6dd0f20..931a5b81602 100644 --- a/src/solvers/dgsem_structured/dg.jl +++ b/src/solvers/dgsem_structured/dg.jl @@ -69,7 +69,8 @@ function rhs!(backend, du, u, t, end # Apply Jacobian from mapping to reference element - @trixi_timeit timer() "Jacobian" apply_jacobian!(du, mesh, equations, dg, cache) + @trixi_timeit timer() "Jacobian" apply_jacobian!(backend, du, mesh, equations, dg, + cache) # Calculate source terms @trixi_timeit timer() "source terms" begin diff --git a/src/solvers/dgsem_structured/dg_2d.jl b/src/solvers/dgsem_structured/dg_2d.jl index 6430b61b276..1883fa5f881 100644 --- a/src/solvers/dgsem_structured/dg_2d.jl +++ b/src/solvers/dgsem_structured/dg_2d.jl @@ -588,7 +588,7 @@ function calc_boundary_flux!(cache, u, t, boundary_conditions::NamedTuple, return nothing end -function apply_jacobian!(du, +function apply_jacobian!(backend::Nothing, du, mesh::Union{StructuredMesh{2}, StructuredMeshView{2}, UnstructuredMesh2D, P4estMesh{2}, P4estMeshView{2}, T8codeMesh{2}}, diff --git a/src/solvers/dgsem_tree/dg_1d.jl b/src/solvers/dgsem_tree/dg_1d.jl index d7e8c0e8464..30cdd500646 100644 --- a/src/solvers/dgsem_tree/dg_1d.jl +++ b/src/solvers/dgsem_tree/dg_1d.jl @@ -405,7 +405,7 @@ end return nothing end -function prolong2interfaces!(backend::Nothing, cache, u, mesh::TreeMesh{1}, equations, +function prolong2interfaces!(cache, u, mesh::TreeMesh{1}, equations, dg::DG) @unpack interfaces = cache @unpack neighbor_ids = interfaces diff --git a/src/solvers/dgsem_tree/dg_2d.jl b/src/solvers/dgsem_tree/dg_2d.jl index 1d8b6f65f8d..fbac4822c60 100644 --- a/src/solvers/dgsem_tree/dg_2d.jl +++ b/src/solvers/dgsem_tree/dg_2d.jl @@ -125,7 +125,7 @@ function rhs!(backend, du, u, t, # Calculate interface fluxes @trixi_timeit timer() "interface flux" begin - calc_interface_flux!(cache.elements.surface_flux_values, mesh, + calc_interface_flux!(backend, cache.elements.surface_flux_values, mesh, have_nonconservative_terms(equations), equations, dg.surface_integral, dg, cache) end @@ -162,7 +162,8 @@ function rhs!(backend, du, u, t, end # Apply Jacobian from mapping to reference element - @trixi_timeit timer() "Jacobian" apply_jacobian!(du, mesh, equations, dg, cache) + @trixi_timeit timer() "Jacobian" apply_jacobian!(backend, du, mesh, equations, dg, + cache) # Calculate source terms @trixi_timeit timer() "source terms" begin @@ -467,7 +468,7 @@ function prolong2interfaces!(backend::Nothing, cache, u, mesh::TreeMesh{2}, equa return nothing end -function calc_interface_flux!(surface_flux_values, +function calc_interface_flux!(backend::Nothing, surface_flux_values, mesh::TreeMesh{2}, have_nonconservative_terms::False, equations, surface_integral, dg::DG, cache) @@ -501,7 +502,7 @@ function calc_interface_flux!(surface_flux_values, return nothing end -function calc_interface_flux!(surface_flux_values, +function calc_interface_flux!(backend::Nothing, surface_flux_values, mesh::TreeMesh{2}, have_nonconservative_terms::True, equations, surface_integral, dg::DG, cache) @@ -1066,7 +1067,7 @@ function calc_surface_integral!(backend::Nothing, du, u, return nothing end -function apply_jacobian!(du, mesh::TreeMesh{2}, +function apply_jacobian!(backend::Nothing, du, mesh::TreeMesh{2}, equations, dg::DG, cache) @unpack inverse_jacobian = cache.elements diff --git a/src/solvers/dgsem_tree/dg_2d_parallel.jl b/src/solvers/dgsem_tree/dg_2d_parallel.jl index b4ab0bdaaee..614af8e0da1 100644 --- a/src/solvers/dgsem_tree/dg_2d_parallel.jl +++ b/src/solvers/dgsem_tree/dg_2d_parallel.jl @@ -484,12 +484,12 @@ function rhs!(backend, du, u, t, # Prolong solution to interfaces # TODO: Taal decide order of arguments, consistent vs. modified cache first? @trixi_timeit timer() "prolong2interfaces" begin - prolong2interfaces!(cache, u, mesh, equations, dg) + prolong2interfaces!(backend, cache, u, mesh, equations, dg) end # Calculate interface fluxes @trixi_timeit timer() "interface flux" begin - calc_interface_flux!(cache.elements.surface_flux_values, mesh, + calc_interface_flux!(backend, cache.elements.surface_flux_values, mesh, have_nonconservative_terms(equations), equations, dg.surface_integral, dg, cache) end @@ -540,12 +540,13 @@ function rhs!(backend, du, u, t, # Calculate surface integrals @trixi_timeit timer() "surface integral" begin - calc_surface_integral!(du, u, mesh, equations, + calc_surface_integral!(backend, du, u, mesh, equations, dg.surface_integral, dg, cache) end # Apply Jacobian from mapping to reference element - @trixi_timeit timer() "Jacobian" apply_jacobian!(du, mesh, equations, dg, cache) + @trixi_timeit timer() "Jacobian" apply_jacobian!(backend, du, mesh, equations, dg, + cache) # Calculate source terms @trixi_timeit timer() "source terms" begin diff --git a/src/solvers/dgsem_tree/dg_3d.jl b/src/solvers/dgsem_tree/dg_3d.jl index b04fd0f885b..d181eab61fd 100644 --- a/src/solvers/dgsem_tree/dg_3d.jl +++ b/src/solvers/dgsem_tree/dg_3d.jl @@ -472,7 +472,7 @@ function prolong2interfaces!(backend::Nothing, cache, u, mesh::TreeMesh{3}, equa return nothing end -function calc_interface_flux!(backend, surface_flux_values, +function calc_interface_flux!(backend::Nothing, surface_flux_values, mesh::TreeMesh{3}, have_nonconservative_terms::False, equations, surface_integral, dg::DG, cache) @@ -507,7 +507,7 @@ function calc_interface_flux!(backend, surface_flux_values, return nothing end -function calc_interface_flux!(backend, surface_flux_values, +function calc_interface_flux!(backend::Nothing, surface_flux_values, mesh::TreeMesh{3}, have_nonconservative_terms::True, equations, surface_integral, dg::DG, cache) diff --git a/src/solvers/dgsem_unstructured/dg_2d.jl b/src/solvers/dgsem_unstructured/dg_2d.jl index b5367b45d72..91152903540 100644 --- a/src/solvers/dgsem_unstructured/dg_2d.jl +++ b/src/solvers/dgsem_unstructured/dg_2d.jl @@ -80,7 +80,8 @@ function rhs!(backend, du, u, t, # Apply Jacobian from mapping to reference element # Note! this routine is reused from dgsem_structured/dg_2d.jl - @trixi_timeit timer() "Jacobian" apply_jacobian!(du, mesh, equations, dg, cache) + @trixi_timeit timer() "Jacobian" apply_jacobian!(backend, du, mesh, equations, dg, + cache) # Calculate source terms @trixi_timeit timer() "source terms" begin diff --git a/src/solvers/fdsbp_tree/fdsbp_2d.jl b/src/solvers/fdsbp_tree/fdsbp_2d.jl index 132b5161e78..2b08cfe7f11 100644 --- a/src/solvers/fdsbp_tree/fdsbp_2d.jl +++ b/src/solvers/fdsbp_tree/fdsbp_2d.jl @@ -214,7 +214,7 @@ end # already separates the solution information into right-traveling and # left-traveling information. So we only need to compute the appropriate # flux information at each side of an interface. -function calc_interface_flux!(surface_flux_values, +function calc_interface_flux!(backend::Nothing, surface_flux_values, mesh::TreeMesh{2}, have_nonconservative_terms::False, equations, surface_integral::SurfaceIntegralUpwind, diff --git a/src/solvers/fdsbp_tree/fdsbp_3d.jl b/src/solvers/fdsbp_tree/fdsbp_3d.jl index 9fe7cd3044d..86d82fe752e 100644 --- a/src/solvers/fdsbp_tree/fdsbp_3d.jl +++ b/src/solvers/fdsbp_tree/fdsbp_3d.jl @@ -250,7 +250,7 @@ end # already separates the solution information into right-traveling and # left-traveling information. So we only need to compute the appropriate # flux information at each side of an interface. -function calc_interface_flux!(surface_flux_values, +function calc_interface_flux!(backend::Nothing, surface_flux_values, mesh::TreeMesh{3}, have_nonconservative_terms::False, equations, surface_integral::SurfaceIntegralUpwind, From 5893d4dca815131d5e26aafaac318d9c3ea87c68 Mon Sep 17 00:00:00 2001 From: Benedict Geihe Date: Wed, 8 Oct 2025 15:31:53 +0200 Subject: [PATCH 072/158] almost everywhere --- src/callbacks_step/stepsize_dg2d.jl | 12 ++++++------ src/callbacks_step/stepsize_dg3d.jl | 8 ++++---- src/solvers/dgmulti/dg.jl | 2 +- src/solvers/dgmulti/flux_differencing.jl | 4 ++-- src/solvers/dgmulti/flux_differencing_gauss_sbp.jl | 2 +- src/solvers/dgsem_structured/dg.jl | 2 +- src/solvers/dgsem_tree/dg_2d_parabolic.jl | 2 +- 7 files changed, 16 insertions(+), 16 deletions(-) diff --git a/src/callbacks_step/stepsize_dg2d.jl b/src/callbacks_step/stepsize_dg2d.jl index a6c217f2885..2691511c747 100644 --- a/src/callbacks_step/stepsize_dg2d.jl +++ b/src/callbacks_step/stepsize_dg2d.jl @@ -48,7 +48,7 @@ function max_dt(backend::Nothing, u, t, mesh::TreeMesh{2}, return 2 / (nnodes(dg) * max_scaled_speed) end -function max_dt(backend, u, t, mesh::ParallelTreeMesh{2}, +function max_dt(backend::Nothing, u, t, mesh::ParallelTreeMesh{2}, constant_speed::False, equations, dg::DG, cache) # call the method accepting a general `mesh::TreeMesh{2}` # TODO: MPI, we should improve this; maybe we should dispatch on `u` @@ -65,7 +65,7 @@ function max_dt(backend, u, t, mesh::ParallelTreeMesh{2}, return dt end -function max_dt(backend, u, t, mesh::ParallelTreeMesh{2}, +function max_dt(backend::Nothing, u, t, mesh::ParallelTreeMesh{2}, constant_speed::True, equations, dg::DG, cache) # call the method accepting a general `mesh::TreeMesh{2}` # TODO: MPI, we should improve this; maybe we should dispatch on `u` @@ -154,7 +154,7 @@ function max_dt(backend::Nothing, u, t, return 2 / (nnodes(dg) * max_scaled_speed) end -function max_dt(backend, u, t, mesh::ParallelP4estMesh{2}, +function max_dt(backend::Nothing, u, t, mesh::ParallelP4estMesh{2}, constant_speed::False, equations, dg::DG, cache) # call the method accepting a general `mesh::P4estMesh{2}` # TODO: MPI, we should improve this; maybe we should dispatch on `u` @@ -171,7 +171,7 @@ function max_dt(backend, u, t, mesh::ParallelP4estMesh{2}, return dt end -function max_dt(backend, u, t, mesh::ParallelP4estMesh{2}, +function max_dt(backend::Nothing, u, t, mesh::ParallelP4estMesh{2}, constant_speed::True, equations, dg::DG, cache) # call the method accepting a general `mesh::P4estMesh{2}` # TODO: MPI, we should improve this; maybe we should dispatch on `u` @@ -188,7 +188,7 @@ function max_dt(backend, u, t, mesh::ParallelP4estMesh{2}, return dt end -function max_dt(backend, u, t, mesh::ParallelT8codeMesh{2}, +function max_dt(backend::Nothing, u, t, mesh::ParallelT8codeMesh{2}, constant_speed::False, equations, dg::DG, cache) # call the method accepting a general `mesh::T8codeMesh{2}` # TODO: MPI, we should improve this; maybe we should dispatch on `u` @@ -205,7 +205,7 @@ function max_dt(backend, u, t, mesh::ParallelT8codeMesh{2}, return dt end -function max_dt(backend, u, t, mesh::ParallelT8codeMesh{2}, +function max_dt(backend::Nothing, u, t, mesh::ParallelT8codeMesh{2}, constant_speed::True, equations, dg::DG, cache) # call the method accepting a general `mesh::T8codeMesh{2}` # TODO: MPI, we should improve this; maybe we should dispatch on `u` diff --git a/src/callbacks_step/stepsize_dg3d.jl b/src/callbacks_step/stepsize_dg3d.jl index 1f67dfe7fc2..3f50d618fd1 100644 --- a/src/callbacks_step/stepsize_dg3d.jl +++ b/src/callbacks_step/stepsize_dg3d.jl @@ -173,7 +173,7 @@ function max_dt(backend, u, t, return 2 / (nnodes(dg) * max_scaled_speed) end -function max_dt(backend, u, t, mesh::ParallelP4estMesh{3}, +function max_dt(backend::Nothing, u, t, mesh::ParallelP4estMesh{3}, constant_speed::False, equations, dg::DG, cache) # call the method accepting a general `mesh::P4estMesh{3}` # TODO: MPI, we should improve this; maybe we should dispatch on `u` @@ -190,7 +190,7 @@ function max_dt(backend, u, t, mesh::ParallelP4estMesh{3}, return dt end -function max_dt(backend, u, t, mesh::ParallelP4estMesh{3}, +function max_dt(backend::Nothing, u, t, mesh::ParallelP4estMesh{3}, constant_speed::True, equations, dg::DG, cache) # call the method accepting a general `mesh::P4estMesh{3}` # TODO: MPI, we should improve this; maybe we should dispatch on `u` @@ -207,7 +207,7 @@ function max_dt(backend, u, t, mesh::ParallelP4estMesh{3}, return dt end -function max_dt(backend, u, t, mesh::ParallelT8codeMesh{3}, +function max_dt(backend::Nothing, u, t, mesh::ParallelT8codeMesh{3}, constant_speed::False, equations, dg::DG, cache) # call the method accepting a general `mesh::T8codeMesh{3}` # TODO: MPI, we should improve this; maybe we should dispatch on `u` @@ -224,7 +224,7 @@ function max_dt(backend, u, t, mesh::ParallelT8codeMesh{3}, return dt end -function max_dt(backend, u, t, mesh::ParallelT8codeMesh{3}, +function max_dt(backend::Nothing, u, t, mesh::ParallelT8codeMesh{3}, constant_speed::True, equations, dg::DG, cache) # call the method accepting a general `mesh::T8codeMesh{3}` # TODO: MPI, we should improve this; maybe we should dispatch on `u` diff --git a/src/solvers/dgmulti/dg.jl b/src/solvers/dgmulti/dg.jl index 2be73e5e208..91279a461bd 100644 --- a/src/solvers/dgmulti/dg.jl +++ b/src/solvers/dgmulti/dg.jl @@ -662,7 +662,7 @@ function calc_sources!(du, u, t, source_terms, return nothing end -function rhs!(du, u, t, mesh, equations, +function rhs!(backend, du, u, t, mesh, equations, boundary_conditions::BC, source_terms::Source, dg::DGMulti, cache) where {BC, Source} @trixi_timeit timer() "reset ∂u/∂t" reset_du!(du, dg, cache) diff --git a/src/solvers/dgmulti/flux_differencing.jl b/src/solvers/dgmulti/flux_differencing.jl index 458e06e88b6..139c4d706c5 100644 --- a/src/solvers/dgmulti/flux_differencing.jl +++ b/src/solvers/dgmulti/flux_differencing.jl @@ -616,7 +616,7 @@ end # an entropy conservative/stable discretization. For modal DG schemes, an extra `entropy_projection!` # is required (see https://doi.org/10.1016/j.jcp.2018.02.033, Section 4.3). # Also called by DGMultiFluxDiff{<:GaussSBP} solvers. -function rhs!(du, u, t, mesh, equations, boundary_conditions::BC, +function rhs!(backend, du, u, t, mesh, equations, boundary_conditions::BC, source_terms::Source, dg::DGMultiFluxDiff, cache) where {Source, BC} @trixi_timeit timer() "reset ∂u/∂t" reset_du!(du, dg, cache) @@ -661,7 +661,7 @@ end # integral, e.g., an entropy conservative/stable discretization. The implementation of `rhs!` # for such schemes is very similar to the implementation of `rhs!` for standard DG methods, # but specializes `calc_volume_integral`. -function rhs!(du, u, t, mesh, equations, +function rhs!(backend, du, u, t, mesh, equations, boundary_conditions::BC, source_terms::Source, dg::DGMultiFluxDiffSBP, cache) where {BC, Source} @trixi_timeit timer() "reset ∂u/∂t" reset_du!(du, dg, cache) diff --git a/src/solvers/dgmulti/flux_differencing_gauss_sbp.jl b/src/solvers/dgmulti/flux_differencing_gauss_sbp.jl index cb06a40009a..f9d13334a11 100644 --- a/src/solvers/dgmulti/flux_differencing_gauss_sbp.jl +++ b/src/solvers/dgmulti/flux_differencing_gauss_sbp.jl @@ -582,7 +582,7 @@ end # Specialize RHS so that we can call `invert_jacobian_and_interpolate!` instead of just `invert_jacobian!`, # since `invert_jacobian!` is also used in other places (e.g., parabolic terms). -function rhs!(du, u, t, mesh, equations, boundary_conditions::BC, +function rhs!(backend, du, u, t, mesh, equations, boundary_conditions::BC, source_terms::Source, dg::DGMultiFluxDiff{<:GaussSBP}, cache) where {Source, BC} @trixi_timeit timer() "reset ∂u/∂t" reset_du!(du, dg, cache) diff --git a/src/solvers/dgsem_structured/dg.jl b/src/solvers/dgsem_structured/dg.jl index 931a5b81602..b661c2bbd02 100644 --- a/src/solvers/dgsem_structured/dg.jl +++ b/src/solvers/dgsem_structured/dg.jl @@ -44,7 +44,7 @@ function rhs!(backend, du, u, t, # Calculate volume integral @trixi_timeit timer() "volume integral" begin - calc_volume_integral!(du, u, mesh, + calc_volume_integral!(backend, du, u, mesh, have_nonconservative_terms(equations), equations, dg.volume_integral, dg, cache) end diff --git a/src/solvers/dgsem_tree/dg_2d_parabolic.jl b/src/solvers/dgsem_tree/dg_2d_parabolic.jl index 35f259ca9e5..ed2ba183454 100644 --- a/src/solvers/dgsem_tree/dg_2d_parabolic.jl +++ b/src/solvers/dgsem_tree/dg_2d_parabolic.jl @@ -835,7 +835,7 @@ function calc_gradient!(gradients, u_transformed, t, # Prolong solution to interfaces @trixi_timeit timer() "prolong2interfaces" begin - prolong2interfaces!(cache_parabolic, u_transformed, mesh, + prolong2interfaces!(nothing, cache_parabolic, u_transformed, mesh, equations_parabolic, dg) end From a1caa12dc35bfd4820ee225bef159ddf93db3966 Mon Sep 17 00:00:00 2001 From: Benedict Geihe Date: Wed, 8 Oct 2025 17:19:48 +0200 Subject: [PATCH 073/158] some more --- src/callbacks_step/stepsize_dg2d.jl | 8 ++++---- src/solvers/dgsem_p4est/dg_2d_parabolic.jl | 2 +- src/solvers/dgsem_p4est/dg_3d_parabolic.jl | 2 +- src/solvers/dgsem_structured/dg.jl | 2 +- src/solvers/dgsem_tree/dg_1d.jl | 7 ++++--- 5 files changed, 11 insertions(+), 10 deletions(-) diff --git a/src/callbacks_step/stepsize_dg2d.jl b/src/callbacks_step/stepsize_dg2d.jl index 2691511c747..a1b5eda6e30 100644 --- a/src/callbacks_step/stepsize_dg2d.jl +++ b/src/callbacks_step/stepsize_dg2d.jl @@ -195,10 +195,10 @@ function max_dt(backend::Nothing, u, t, mesh::ParallelT8codeMesh{2}, # and create some MPI array type, overloading broadcasting and mapreduce etc. # Then, this specific array type should also work well with DiffEq etc. dt = invoke(max_dt, - Tuple{typeof(u), typeof(t), T8codeMesh{2}, + Tuple{typeof(backend), typeof(u), typeof(t), T8codeMesh{2}, typeof(constant_speed), typeof(equations), typeof(dg), typeof(cache)}, - u, t, mesh, constant_speed, equations, dg, cache) + backend, u, t, mesh, constant_speed, equations, dg, cache) # Base.min instead of min needed, see comment in src/auxiliary/math.jl dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[] @@ -212,10 +212,10 @@ function max_dt(backend::Nothing, u, t, mesh::ParallelT8codeMesh{2}, # and create some MPI array type, overloading broadcasting and mapreduce etc. # Then, this specific array type should also work well with DiffEq etc. dt = invoke(max_dt, - Tuple{typeof(u), typeof(t), T8codeMesh{2}, + Tuple{typeof(backend), typeof(u), typeof(t), T8codeMesh{2}, typeof(constant_speed), typeof(equations), typeof(dg), typeof(cache)}, - u, t, mesh, constant_speed, equations, dg, cache) + backend, u, t, mesh, constant_speed, equations, dg, cache) # Base.min instead of min needed, see comment in src/auxiliary/math.jl dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[] diff --git a/src/solvers/dgsem_p4est/dg_2d_parabolic.jl b/src/solvers/dgsem_p4est/dg_2d_parabolic.jl index 7d263b5fa2e..4f43c041637 100644 --- a/src/solvers/dgsem_p4est/dg_2d_parabolic.jl +++ b/src/solvers/dgsem_p4est/dg_2d_parabolic.jl @@ -220,7 +220,7 @@ function calc_gradient!(gradients, u_transformed, t, # Prolong solution to interfaces. # This reuses `prolong2interfaces` for the purely hyperbolic case. @trixi_timeit timer() "prolong2interfaces" begin - prolong2interfaces!(cache_parabolic, u_transformed, mesh, + prolong2interfaces!(nothing, cache_parabolic, u_transformed, mesh, equations_parabolic, dg) end diff --git a/src/solvers/dgsem_p4est/dg_3d_parabolic.jl b/src/solvers/dgsem_p4est/dg_3d_parabolic.jl index 6703d3014de..ff0cff761cc 100644 --- a/src/solvers/dgsem_p4est/dg_3d_parabolic.jl +++ b/src/solvers/dgsem_p4est/dg_3d_parabolic.jl @@ -114,7 +114,7 @@ function calc_gradient!(gradients, u_transformed, t, # Prolong solution to interfaces @trixi_timeit timer() "prolong2interfaces" begin - prolong2interfaces!(cache_parabolic, u_transformed, mesh, + prolong2interfaces!(nothing, cache_parabolic, u_transformed, mesh, equations_parabolic, dg) end diff --git a/src/solvers/dgsem_structured/dg.jl b/src/solvers/dgsem_structured/dg.jl index b661c2bbd02..8828c32666f 100644 --- a/src/solvers/dgsem_structured/dg.jl +++ b/src/solvers/dgsem_structured/dg.jl @@ -64,7 +64,7 @@ function rhs!(backend, du, u, t, # Calculate surface integrals @trixi_timeit timer() "surface integral" begin - calc_surface_integral!(du, u, mesh, equations, + calc_surface_integral!(backend, du, u, mesh, equations, dg.surface_integral, dg, cache) end diff --git a/src/solvers/dgsem_tree/dg_1d.jl b/src/solvers/dgsem_tree/dg_1d.jl index 30cdd500646..57ecf8efc9c 100644 --- a/src/solvers/dgsem_tree/dg_1d.jl +++ b/src/solvers/dgsem_tree/dg_1d.jl @@ -103,7 +103,7 @@ function rhs!(backend, du, u, t, # Calculate surface integrals @trixi_timeit timer() "surface integral" begin - calc_surface_integral!(du, u, mesh, equations, + calc_surface_integral!(backend, du, u, mesh, equations, dg.surface_integral, dg, cache) end @@ -613,7 +613,8 @@ function calc_boundary_flux_by_direction!(surface_flux_values::AbstractArray{<:A return nothing end -function calc_surface_integral!(du, u, mesh::Union{TreeMesh{1}, StructuredMesh{1}}, +function calc_surface_integral!(backend::Nothing, du, u, + mesh::Union{TreeMesh{1}, StructuredMesh{1}}, equations, surface_integral, dg::DGSEM, cache) @unpack boundary_interpolation = dg.basis @unpack surface_flux_values = cache.elements @@ -639,7 +640,7 @@ function calc_surface_integral!(du, u, mesh::Union{TreeMesh{1}, StructuredMesh{1 return nothing end -function apply_jacobian!(du, mesh::Union{TreeMesh{1}, StructuredMesh{1}}, +function apply_jacobian!(backend::Nothing, du, mesh::Union{TreeMesh{1}, StructuredMesh{1}}, equations, dg::DG, cache) @unpack inverse_jacobian = cache.elements From a5cded3ae7cf1e94d0a19097d70b9ef2b16a2d55 Mon Sep 17 00:00:00 2001 From: Benedict Geihe Date: Wed, 8 Oct 2025 21:35:15 +0200 Subject: [PATCH 074/158] next round --- src/solvers/dgsem_p4est/dg_2d_parabolic.jl | 4 ++-- src/solvers/dgsem_p4est/dg_3d_parabolic.jl | 2 +- src/solvers/dgsem_tree/dg_1d.jl | 2 +- src/solvers/dgsem_tree/dg_1d_parabolic.jl | 2 +- src/solvers/fdsbp_tree/fdsbp_1d.jl | 8 ++++---- 5 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/solvers/dgsem_p4est/dg_2d_parabolic.jl b/src/solvers/dgsem_p4est/dg_2d_parabolic.jl index 4f43c041637..2ecd0025ef8 100644 --- a/src/solvers/dgsem_p4est/dg_2d_parabolic.jl +++ b/src/solvers/dgsem_p4est/dg_2d_parabolic.jl @@ -138,7 +138,7 @@ function rhs_parabolic!(du, u, t, mesh::Union{P4estMesh{2}, P4estMesh{3}}, # Calculate surface integrals @trixi_timeit timer() "surface integral" begin - calc_surface_integral!(du, u, mesh, equations_parabolic, + calc_surface_integral!(nothing, du, u, mesh, equations_parabolic, dg.surface_integral, dg, cache_parabolic) end @@ -227,7 +227,7 @@ function calc_gradient!(gradients, u_transformed, t, # Calculate interface fluxes for the gradient. # This reuses `calc_interface_flux!` for the purely hyperbolic case. @trixi_timeit timer() "interface flux" begin - calc_interface_flux!(cache_parabolic.elements.surface_flux_values, + calc_interface_flux!(nothing, cache_parabolic.elements.surface_flux_values, mesh, False(), # False() = no nonconservative terms equations_parabolic, dg.surface_integral, dg, cache_parabolic) diff --git a/src/solvers/dgsem_p4est/dg_3d_parabolic.jl b/src/solvers/dgsem_p4est/dg_3d_parabolic.jl index ff0cff761cc..34bfe1fa908 100644 --- a/src/solvers/dgsem_p4est/dg_3d_parabolic.jl +++ b/src/solvers/dgsem_p4est/dg_3d_parabolic.jl @@ -121,7 +121,7 @@ function calc_gradient!(gradients, u_transformed, t, # Calculate interface fluxes for the gradient. This reuses P4est `calc_interface_flux!` along with a # specialization for AbstractEquationsParabolic. @trixi_timeit timer() "interface flux" begin - calc_interface_flux!(cache_parabolic.elements.surface_flux_values, + calc_interface_flux!(nothing, cache_parabolic.elements.surface_flux_values, mesh, False(), # False() = no nonconservative terms equations_parabolic, dg.surface_integral, dg, cache_parabolic) diff --git a/src/solvers/dgsem_tree/dg_1d.jl b/src/solvers/dgsem_tree/dg_1d.jl index 57ecf8efc9c..7c5878b0dc1 100644 --- a/src/solvers/dgsem_tree/dg_1d.jl +++ b/src/solvers/dgsem_tree/dg_1d.jl @@ -108,7 +108,7 @@ function rhs!(backend, du, u, t, end # Apply Jacobian from mapping to reference element - @trixi_timeit timer() "Jacobian" apply_jacobian!(du, mesh, equations, dg, cache) + @trixi_timeit timer() "Jacobian" apply_jacobian!(backend, du, mesh, equations, dg, cache) # Calculate source terms @trixi_timeit timer() "source terms" begin diff --git a/src/solvers/dgsem_tree/dg_1d_parabolic.jl b/src/solvers/dgsem_tree/dg_1d_parabolic.jl index 06a6a4488ec..faa9a7240a4 100644 --- a/src/solvers/dgsem_tree/dg_1d_parabolic.jl +++ b/src/solvers/dgsem_tree/dg_1d_parabolic.jl @@ -90,7 +90,7 @@ function rhs_parabolic!(du, u, t, mesh::TreeMesh{1}, # Calculate surface integrals @trixi_timeit timer() "surface integral" begin - calc_surface_integral!(du, u, mesh, equations_parabolic, + calc_surface_integral!(nothing, du, u, mesh, equations_parabolic, dg.surface_integral, dg, cache_parabolic) end diff --git a/src/solvers/fdsbp_tree/fdsbp_1d.jl b/src/solvers/fdsbp_tree/fdsbp_1d.jl index 6e71d7627d9..ceebd104f43 100644 --- a/src/solvers/fdsbp_tree/fdsbp_1d.jl +++ b/src/solvers/fdsbp_tree/fdsbp_1d.jl @@ -139,7 +139,7 @@ function calc_volume_integral!(backend::Nothing, du, u, return nothing end -function calc_surface_integral!(du, u, mesh::TreeMesh{1}, +function calc_surface_integral!(backend::Nothing, du, u, mesh::TreeMesh{1}, equations, surface_integral::SurfaceIntegralStrongForm, dg::DG, cache) inv_weight_left = inv(left_boundary_weight(dg.basis)) @@ -166,7 +166,7 @@ function calc_surface_integral!(du, u, mesh::TreeMesh{1}, end # Periodic FDSBP operators need to use a single element without boundaries -function calc_surface_integral!(du, u, mesh::TreeMesh1D, +function calc_surface_integral!(backend::Nothing, du, u, mesh::TreeMesh1D, equations, surface_integral::SurfaceIntegralStrongForm, dg::PeriodicFDSBP, cache) @assert nelements(dg, cache) == 1 @@ -220,7 +220,7 @@ end # in the specialized `calc_interface_flux` routine. These SATs are still of # a strong form penalty type, except that the interior flux at a particular # side of the element are computed in the upwind direction. -function calc_surface_integral!(du, u, mesh::TreeMesh{1}, +function calc_surface_integral!(backend::Nothing, du, u, mesh::TreeMesh{1}, equations, surface_integral::SurfaceIntegralUpwind, dg::FDSBP, cache) inv_weight_left = inv(left_boundary_weight(dg.basis)) @@ -248,7 +248,7 @@ function calc_surface_integral!(du, u, mesh::TreeMesh{1}, end # Periodic FDSBP operators need to use a single element without boundaries -function calc_surface_integral!(du, u, mesh::TreeMesh1D, +function calc_surface_integral!(backend::Nothing, du, u, mesh::TreeMesh1D, equations, surface_integral::SurfaceIntegralUpwind, dg::PeriodicFDSBP, cache) @assert nelements(dg, cache) == 1 From 7c6ab4a571b2d0b7ac72a7cb2dac6ec8c64104b3 Mon Sep 17 00:00:00 2001 From: Benedict Geihe Date: Thu, 9 Oct 2025 08:53:43 +0200 Subject: [PATCH 075/158] could this be... --- src/solvers/dgsem_p4est/dg_3d_parabolic.jl | 11 ++++++----- src/solvers/dgsem_tree/dg_1d.jl | 6 ++++-- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/src/solvers/dgsem_p4est/dg_3d_parabolic.jl b/src/solvers/dgsem_p4est/dg_3d_parabolic.jl index 34bfe1fa908..8d7049d37e6 100644 --- a/src/solvers/dgsem_p4est/dg_3d_parabolic.jl +++ b/src/solvers/dgsem_p4est/dg_3d_parabolic.jl @@ -352,21 +352,22 @@ end end # This version is used for parabolic gradient computations -@inline function calc_interface_flux!(surface_flux_values, mesh::P4estMesh{3}, +@inline function calc_interface_flux!(surface_flux_values, + ::Type{<:Union{P4estMesh{3}}}, have_nonconservative_terms::False, equations::AbstractEquationsParabolic, - surface_integral, dg::DG, cache, + surface_integral, solverT::Type{<:DG}, + u_interface, interface_index, normal_direction, primary_i_node_index, primary_j_node_index, primary_direction_index, primary_element_index, secondary_i_node_index, secondary_j_node_index, secondary_direction_index, secondary_element_index) - @unpack u = cache.interfaces @unpack surface_flux = surface_integral - u_ll, u_rr = get_surface_node_vars(u, equations, dg, primary_i_node_index, - primary_j_node_index, + u_ll, u_rr = get_surface_node_vars(u_interface, equations, solverT, + primary_i_node_index, primary_j_node_index, interface_index) flux_ = 0.5f0 * (u_ll + u_rr) # we assume that the gradient computations utilize a central flux diff --git a/src/solvers/dgsem_tree/dg_1d.jl b/src/solvers/dgsem_tree/dg_1d.jl index 7c5878b0dc1..f594ea7eb08 100644 --- a/src/solvers/dgsem_tree/dg_1d.jl +++ b/src/solvers/dgsem_tree/dg_1d.jl @@ -108,7 +108,8 @@ function rhs!(backend, du, u, t, end # Apply Jacobian from mapping to reference element - @trixi_timeit timer() "Jacobian" apply_jacobian!(backend, du, mesh, equations, dg, cache) + @trixi_timeit timer() "Jacobian" apply_jacobian!(backend, du, mesh, equations, dg, + cache) # Calculate source terms @trixi_timeit timer() "source terms" begin @@ -640,7 +641,8 @@ function calc_surface_integral!(backend::Nothing, du, u, return nothing end -function apply_jacobian!(backend::Nothing, du, mesh::Union{TreeMesh{1}, StructuredMesh{1}}, +function apply_jacobian!(backend::Nothing, du, + mesh::Union{TreeMesh{1}, StructuredMesh{1}}, equations, dg::DG, cache) @unpack inverse_jacobian = cache.elements From 719c2d15cfb7f250b6af8e031d3cc6d7377a54b2 Mon Sep 17 00:00:00 2001 From: Vivienne Ehlert <201612348+vivimie@users.noreply.github.com> Date: Thu, 6 Nov 2025 16:01:55 +0100 Subject: [PATCH 076/158] adapts until 2d prolong2interfaces! --- src/solvers/dgsem_p4est/dg_2d.jl | 111 +++++++++++++++++--------- src/solvers/dgsem_structured/dg_2d.jl | 62 ++++++++++++-- 2 files changed, 127 insertions(+), 46 deletions(-) diff --git a/src/solvers/dgsem_p4est/dg_2d.jl b/src/solvers/dgsem_p4est/dg_2d.jl index 56b6568072d..b1c5d932b3e 100644 --- a/src/solvers/dgsem_p4est/dg_2d.jl +++ b/src/solvers/dgsem_p4est/dg_2d.jl @@ -64,56 +64,91 @@ end end function prolong2interfaces!(backend::Nothing, cache, u, - mesh::Union{P4estMesh{2}, P4estMeshView{2}, T8codeMesh{2}}, + mesh::Union{P4estMesh{2}, P4estMeshView{2}, + T8codeMesh{2}}, equations, dg::DG) @unpack interfaces = cache + @unpack neighbor_ids, node_indices = cache.interfaces index_range = eachnode(dg) @threaded for interface in eachinterface(dg, cache) - # Copy solution data from the primary element using "delayed indexing" with - # a start value and a step size to get the correct face and orientation. - # Note that in the current implementation, the interface will be - # "aligned at the primary element", i.e., the index of the primary side - # will always run forwards. - primary_element = interfaces.neighbor_ids[1, interface] - primary_indices = interfaces.node_indices[1, interface] + prolong2interfaces_interface!(interfaces.u, u, interface, typeof(mesh), + equations, neighbor_ids, node_indices, + index_range) + end + return nothing +end - i_primary_start, i_primary_step = index_to_start_step_2d(primary_indices[1], - index_range) - j_primary_start, j_primary_step = index_to_start_step_2d(primary_indices[2], - index_range) +function prolong2interfaces!(backend::Backend, cache, u, + mesh::Union{P4estMesh{2}, P4estMeshView{2}, + T8codeMesh{2}}, + equations, dg::DG) + @unpack interfaces = cache + ninterfaces(interfaces) == 0 && return nothing + @unpack neighbor_ids, node_indices = cache.interfaces + index_range = eachnode(dg) - i_primary = i_primary_start - j_primary = j_primary_start - for i in eachnode(dg) - for v in eachvariable(equations) - interfaces.u[1, v, i, interface] = u[v, i_primary, j_primary, - primary_element] - end - i_primary += i_primary_step - j_primary += j_primary_step + kernel! = prolong2interfaces_KAkernel!(backend) + kernel!(interfaces.u, u, typeof(mesh), equations, neighbor_ids, node_indices, + index_range, ndrange = ninterfaces(interfaces)) + return nothing +end + +@kernel function prolong2interfaces_KAkernel!(interfaces_u, u, + mT::Type{<:Union{P4estMesh{2}, + P4estMeshView{2}, + T8codeMesh{2}}}, + equations, neighbor_ids, + node_indices, index_range) + interface = @index(Global) + prolong2interfaces_per_interface!(interfaces_u, u, interface, mT, equations, + neighbor_ids, node_indices, index_range) +end + +function prolong2interfaces_per_interface!(interfaces_u, u, interface, + ::Type{<:Union{P4estMesh{2}, + P4estMeshView{2}, + T8codeMesh{2}}}, + equations, neighbor_ids, node_indices, + index_range) + primary_element = neighbor_ids[1, interface] + primary_indices = node_indices[1, interface] + + i_primary_start, i_primary_step = index_to_start_step_2d(primary_indices[1], + index_range) + j_primary_start, j_primary_step = index_to_start_step_2d(primary_indices[2], + index_range) + + i_primary = i_primary_start + j_primary = j_primary_start + for i in index_range + for v in eachvariable(equations) + interfaces_u[1, v, i, interface] = u[v, i_primary, j_primary, + primary_element] end + i_primary += i_primary_step + j_primary += j_primary_step + end - # Copy solution data from the secondary element using "delayed indexing" with - # a start value and a step size to get the correct face and orientation. - secondary_element = interfaces.neighbor_ids[2, interface] - secondary_indices = interfaces.node_indices[2, interface] + # Copy solution data from the secondary element using "delayed indexing" with + # a start value and a step size to get the correct face and orientation. + secondary_element = neighbor_ids[2, interface] + secondary_indices = node_indices[2, interface] - i_secondary_start, i_secondary_step = index_to_start_step_2d(secondary_indices[1], - index_range) - j_secondary_start, j_secondary_step = index_to_start_step_2d(secondary_indices[2], - index_range) + i_secondary_start, i_secondary_step = index_to_start_step_2d(secondary_indices[1], + index_range) + j_secondary_start, j_secondary_step = index_to_start_step_2d(secondary_indices[2], + index_range) - i_secondary = i_secondary_start - j_secondary = j_secondary_start - for i in eachnode(dg) - for v in eachvariable(equations) - interfaces.u[2, v, i, interface] = u[v, i_secondary, j_secondary, - secondary_element] - end - i_secondary += i_secondary_step - j_secondary += j_secondary_step + i_secondary = i_secondary_start + j_secondary = j_secondary_start + for i in index_range + for v in eachvariable(equations) + interfaces_u[2, v, i, interface] = u[v, i_secondary, j_secondary, + secondary_element] end + i_secondary += i_secondary_step + j_secondary += j_secondary_step end return nothing diff --git a/src/solvers/dgsem_structured/dg_2d.jl b/src/solvers/dgsem_structured/dg_2d.jl index 1883fa5f881..bfeaab65c7d 100644 --- a/src/solvers/dgsem_structured/dg_2d.jl +++ b/src/solvers/dgsem_structured/dg_2d.jl @@ -5,6 +5,50 @@ @muladd begin #! format: noindent +function calc_volume_integral!(::Nothing, du, u, + mesh::Union{StructuredMesh{2}, StructuredMeshView{2}, + UnstructuredMesh2D, P4estMesh{2}, + P4estMeshView{2}, T8codeMesh{2}}, + have_nonconservative_terms, equations, + volume_integral::VolumeIntegralWeakForm, + dg::DGSEM, cache) + @unpack contravariant_vectors = cache.elements + @threaded for element in eachelement(dg, cache) + weak_form_kernel_per_element!(du, u, element, typeof(mesh), + have_nonconservative_terms, equations, dg, + contravariant_vectors) + end + return nothing +end + +function calc_volume_integral!(backend::Backend, du, u, + mesh::Union{StructuredMesh{2}, StructuredMeshView{2}, + UnstructuredMesh2D, P4estMesh{2}, + P4estMeshView{2}, T8codeMesh{2}}, + have_nonconservative_terms, equations, + volume_integral::VolumeIntegralWeakForm, + dg::DGSEM, cache) + nelements(dg, cache) == 0 && return nothing + @unpack contravariant_vectors = cache.elements + kernel! = weak_form_KAkernel!(backend) + kernel!(du, u, typeof(mesh), have_nonconservative_terms, equations, dg, + contravariant_vectors, ndrange = nelements(dg, cache)) + return nothing +end + +@kernel function weak_form_KAkernel!(du, u, + mT::Type{<:Union{StructuredMesh{2}, + StructuredMeshView{2}, + UnstructuredMesh2D, + P4estMesh{2}, + P4estMeshView{2}, + T8codeMesh{2}}}, + have_nonconservative_terms, equations, + dg::DGSEM, contravariant_vectors) + element = @index(Global) + weak_form_kernel_per_element!(du, u, element, mT, have_nonconservative_terms, + equations, dg, contravariant_vectors) +end #= `weak_form_kernel!` is only implemented for conserved terms as non-conservative terms should always be discretized in conjunction with a flux-splitting scheme, @@ -12,17 +56,19 @@ see `flux_differencing_kernel!`. This treatment is required to achieve, e.g., entropy-stability or well-balancedness. See also https://github.com/trixi-framework/Trixi.jl/issues/1671#issuecomment-1765644064 =# -@inline function weak_form_kernel!(du, u, - element, - mesh::Union{StructuredMesh{2}, StructuredMeshView{2}, - UnstructuredMesh2D, P4estMesh{2}, - P4estMeshView{2}, T8codeMesh{2}}, - have_nonconservative_terms::False, equations, - dg::DGSEM, cache, alpha = true) +@inline function weak_form_kernel_per_element!(du, u, element, + ::Type{<:Union{StructuredMesh{2}, + StructuredMeshView{2}, + UnstructuredMesh2D, + P4estMesh{2}, + P4estMeshView{2}, + T8codeMesh{2}}}, + have_nonconservative_terms::False, + equations, dg::DGSEM, + contravariant_vectors, alpha = true) # true * [some floating point value] == [exactly the same floating point value] # This can (hopefully) be optimized away due to constant propagation. @unpack derivative_dhat = dg.basis - @unpack contravariant_vectors = cache.elements for j in eachnode(dg), i in eachnode(dg) u_node = get_node_vars(u, equations, dg, i, j, element) From 6bbc069a9503c0678df705543fc8497b1ba998a4 Mon Sep 17 00:00:00 2001 From: Vivienne Ehlert <201612348+vivimie@users.noreply.github.com> Date: Thu, 6 Nov 2025 16:04:03 +0100 Subject: [PATCH 077/158] adds explicit mesh type in signature --- src/solvers/dgsem_structured/dg_3d.jl | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/solvers/dgsem_structured/dg_3d.jl b/src/solvers/dgsem_structured/dg_3d.jl index cd39623a367..50772a4d1c2 100644 --- a/src/solvers/dgsem_structured/dg_3d.jl +++ b/src/solvers/dgsem_structured/dg_3d.jl @@ -36,7 +36,11 @@ function calc_volume_integral!(backend::Backend, du, u, return nothing end -@kernel function weak_form_KAkernel!(du, u, meshT, have_nonconservative_terms, +@kernel function weak_form_KAkernel!(du, u, + meshT::Type{<:Union{StructuredMesh{3}, + P4estMesh{3}, + T8codeMesh{3}}}, + have_nonconservative_terms, equations, dg::DGSEM, contravariant_vectors) element = @index(Global) From e58c2985ab1e99397e0138f57906cb9387e80ed8 Mon Sep 17 00:00:00 2001 From: Vivienne Ehlert <201612348+vivimie@users.noreply.github.com> Date: Fri, 7 Nov 2025 16:38:15 +0100 Subject: [PATCH 078/158] adapts the rest for the 2d basic advection gpu elixir --- src/callbacks_step/stepsize_dg2d.jl | 163 ++++++++++---- src/solvers/dgsem_p4est/containers.jl | 12 ++ src/solvers/dgsem_p4est/dg_2d.jl | 299 +++++++++++++++++--------- src/solvers/dgsem_structured/dg_2d.jl | 45 +++- 4 files changed, 367 insertions(+), 152 deletions(-) diff --git a/src/callbacks_step/stepsize_dg2d.jl b/src/callbacks_step/stepsize_dg2d.jl index a1b5eda6e30..8c5560f3b9d 100644 --- a/src/callbacks_step/stepsize_dg2d.jl +++ b/src/callbacks_step/stepsize_dg2d.jl @@ -81,7 +81,6 @@ function max_dt(backend::Nothing, u, t, mesh::ParallelTreeMesh{2}, return dt end - function max_dt(backend::Nothing, u, t, mesh::Union{StructuredMesh{2}, UnstructuredMesh2D, P4estMesh{2}, T8codeMesh{2}, StructuredMeshView{2}}, @@ -89,69 +88,145 @@ function max_dt(backend::Nothing, u, t, # to avoid a division by zero if the speed vanishes everywhere, # e.g. for steady-state linear advection max_scaled_speed = nextfloat(zero(t)) + @unpack contravariant_vectors, inverse_jacobian = cache + @batch reduction=(max, max_scaled_speed) for element in eachelement(dg, cache) + max_lambda = max_scaled_speed_per_element(u, typeof(mesh), equations, dg, + contravariant_vectors, + inverse_jacobian, element) + # Use `Base.max` to prevent silent failures, as `max` from `@fastmath` doesn't propagate + # `NaN`s properly. See https://github.com/trixi-framework/Trixi.jl/pull/2445#discussion_r2336812323 + max_scaled_speed = Base.max(max_scaled_speed, max_lambda) + end + return 2 / (nnodes(dg) * max_scaled_speed) +end +function max_dt(backend::Backend, u, t, + mesh::Union{StructuredMesh{2}, UnstructuredMesh2D, P4estMesh{2}, + T8codeMesh{2}, StructuredMeshView{2}}, + constant_speed::False, equations, dg::DG, cache) @unpack contravariant_vectors, inverse_jacobian = cache.elements + num_elements = nelements(dg,cache) + max_scaled_speeds = allocate(backend, eltype(t), num_elements) + + kernel! = max_scaled_speed_KAkernel!(backend) + kernel!(max_scaled_speeds, u, typeof(mesh), constant_speed, equations, dg, + contravariant_vectors, inverse_jacobian, ndrange = num_elements) + # TODO GPU dt on CPU? (time integration happens on CPU) + max_scaled_speed = max(nextfloat(zero(t)), maximum(max_scaled_speeds)) + return 2 / (nnodes(dg) * max_scaled_speed) +end - @batch reduction=(max, max_scaled_speed) for element in eachelement(dg, cache) - max_lambda1 = max_lambda2 = zero(max_scaled_speed) - for j in eachnode(dg), i in eachnode(dg) - u_node = get_node_vars(u, equations, dg, i, j, element) - lambda1, lambda2 = max_abs_speeds(u_node, equations) - - # Local speeds transformed to the reference element - Ja11, Ja12 = get_contravariant_vector(1, contravariant_vectors, - i, j, element) - lambda1_transformed = abs(Ja11 * lambda1 + Ja12 * lambda2) - Ja21, Ja22 = get_contravariant_vector(2, contravariant_vectors, - i, j, element) - lambda2_transformed = abs(Ja21 * lambda1 + Ja22 * lambda2) +# works for both constant and non-constant speed +@kernel function max_scaled_speed_KAkernel!(max_scaled_speeds, u, + mT::Type{<:Union{StructuredMesh{2}, + UnstructuredMesh2D, + P4estMesh{2}, + T8codeMesh{2}, + StructuredMeshView{2}}}, + constant_speed, equations, + dg::DG, contravariant_vectors, + inverse_jacobian) + element = @index(Global) + max_scaled_speeds[element] = max_scaled_speed_per_element(u, mT, constant_speed, + equations, dg, + contravariant_vectors, + inverse_jacobian, + element) +end - inv_jacobian = abs(inverse_jacobian[i, j, element]) +function max_scaled_speed_per_element(u, + mT::Type{<:Union{StructuredMesh{2}, + UnstructuredMesh2D, + P4estMesh{2}, T8codeMesh{2}, + StructuredMeshView{2}}}, + constant_speed::False, equations, dg::DG, + contravariant_vectors, inverse_jacobian, + element) + max_lambda1 = max_lambda2 = zero(max_scaled_speed) + for j in eachnode(dg), i in eachnode(dg) + u_node = get_node_vars(u, equations, dg, i, j, element) + lambda1, lambda2 = max_abs_speeds(u_node, equations) + + # Local speeds transformed to the reference element + Ja11, Ja12 = get_contravariant_vector(1, contravariant_vectors, + i, j, element) + lambda1_transformed = abs(Ja11 * lambda1 + Ja12 * lambda2) + Ja21, Ja22 = get_contravariant_vector(2, contravariant_vectors, + i, j, element) + lambda2_transformed = abs(Ja21 * lambda1 + Ja22 * lambda2) + + inv_jacobian = abs(inverse_jacobian[i, j, element]) + + max_lambda1 = Base.max(max_lambda1, lambda1_transformed * inv_jacobian) + max_lambda2 = Base.max(max_lambda2, lambda2_transformed * inv_jacobian) + end + return max_lambda1 + max_lambda2 +end - max_lambda1 = Base.max(max_lambda1, lambda1_transformed * inv_jacobian) - max_lambda2 = Base.max(max_lambda2, lambda2_transformed * inv_jacobian) - end +function max_dt(backend::Nothing, u, t, + mesh::Union{StructuredMesh{2}, UnstructuredMesh2D, P4estMesh{2}, + P4estMeshView{2}, T8codeMesh{2}, StructuredMeshView{2}}, + constant_speed::True, equations, dg::DG, cache) + max_scaled_speed = nextfloat(zero(t)) + @unpack contravariant_vectors, inverse_jacobian = cache.elements + @batch reduction=(max, max_scaled_speed) for element in eachelement(dg, cache) + max_lambda = max_scaled_speed_per_element(u, typeof(mesh), constant_speed, + equations, dg, contravariant_vectors, + inverse_jacobian, element) # Use `Base.max` to prevent silent failures, as `max` from `@fastmath` doesn't propagate # `NaN`s properly. See https://github.com/trixi-framework/Trixi.jl/pull/2445#discussion_r2336812323 - max_scaled_speed = Base.max(max_scaled_speed, max_lambda1 + max_lambda2) + max_scaled_speed = Base.max(max_scaled_speed, max_lambda) end return 2 / (nnodes(dg) * max_scaled_speed) end -function max_dt(backend::Nothing, u, t, +function max_dt(backend::Backend, u, t, mesh::Union{StructuredMesh{2}, UnstructuredMesh2D, P4estMesh{2}, P4estMeshView{2}, T8codeMesh{2}, StructuredMeshView{2}}, constant_speed::True, equations, dg::DG, cache) @unpack contravariant_vectors, inverse_jacobian = cache.elements + num_elements = nelements(dg,cache) + max_scaled_speeds = allocate(backend, eltype(t), num_elements) + + kernel! = max_scaled_speed_KAkernel!(backend) + kernel!(max_scaled_speeds, u, typeof(mesh), constant_speed, equations, dg, + contravariant_vectors, inverse_jacobian, ndrange = num_elements) + # TODO GPU dt on CPU? (time integration happens on CPU) + max_scaled_speed = max(nextfloat(zero(t)), maximum(max_scaled_speeds)) + return 2 / (nnodes(dg) * max_scaled_speed) +end - # to avoid a division by zero if the speed vanishes everywhere, - # e.g. for steady-state linear advection - max_scaled_speed = nextfloat(zero(t)) - +function max_scaled_speed_per_element(u, + ::Type{<:Union{StructuredMesh{2}, + UnstructuredMesh2D, + P4estMesh{2}, + P4estMeshView{2}, + T8codeMesh{2}, + StructuredMeshView{2}}}, + constant_speed::True, equations, dg::DG, + contravariant_vectors, inverse_jacobian, + element) + + max_lambda1_loc = max_lambda2_loc = nextfloat(zero(eltype(u))) max_lambda1, max_lambda2 = max_abs_speeds(equations) - - @batch reduction=(max, max_scaled_speed) for element in eachelement(dg, cache) - for j in eachnode(dg), i in eachnode(dg) - # Local speeds transformed to the reference element - Ja11, Ja12 = get_contravariant_vector(1, contravariant_vectors, - i, j, element) - lambda1_transformed = abs(Ja11 * max_lambda1 + Ja12 * max_lambda2) - Ja21, Ja22 = get_contravariant_vector(2, contravariant_vectors, - i, j, element) - lambda2_transformed = abs(Ja21 * max_lambda1 + Ja22 * max_lambda2) - - inv_jacobian = abs(inverse_jacobian[i, j, element]) - # Use `Base.max` to prevent silent failures, as `max` from `@fastmath` doesn't propagate - # `NaN`s properly. See https://github.com/trixi-framework/Trixi.jl/pull/2445#discussion_r2336812323 - max_scaled_speed = Base.max(max_scaled_speed, - inv_jacobian * - (lambda1_transformed + lambda2_transformed)) - end + for j in eachnode(dg), i in eachnode(dg) + # Local speeds transformed to the reference element + Ja11, Ja12 = get_contravariant_vector(1, contravariant_vectors, + i, j, element) + lambda1_transformed = abs(Ja11 * max_lambda1 + Ja12 * max_lambda2) + Ja21, Ja22 = get_contravariant_vector(2, contravariant_vectors, + i, j, element) + lambda2_transformed = abs(Ja21 * max_lambda1 + Ja22 * max_lambda2) + + inv_jacobian = abs(inverse_jacobian[i, j, element]) + + max_lambda1_loc = max(max_lambda1_loc, inv_jacobian * lambda1_transformed) + max_lambda2_loc = max(max_lambda2_loc, inv_jacobian * lambda2_transformed) end - - return 2 / (nnodes(dg) * max_scaled_speed) + + return max_lambda1_loc + max_lambda2_loc end function max_dt(backend::Nothing, u, t, mesh::ParallelP4estMesh{2}, diff --git a/src/solvers/dgsem_p4est/containers.jl b/src/solvers/dgsem_p4est/containers.jl index 3f74f699f19..836805bbf86 100644 --- a/src/solvers/dgsem_p4est/containers.jl +++ b/src/solvers/dgsem_p4est/containers.jl @@ -933,6 +933,18 @@ end end end +@inline function indices2direction2d(indices) + if indices[1] === :begin + return 1 + elseif indices[1] === :end + return 2 + elseif indices[2] === :begin + return 3 + else # if indices[2] === :end + return 4 + end +end + include("containers_2d.jl") include("containers_3d.jl") include("containers_parallel.jl") diff --git a/src/solvers/dgsem_p4est/dg_2d.jl b/src/solvers/dgsem_p4est/dg_2d.jl index b1c5d932b3e..3b587df1fc4 100644 --- a/src/solvers/dgsem_p4est/dg_2d.jl +++ b/src/solvers/dgsem_p4est/dg_2d.jl @@ -159,84 +159,145 @@ function calc_interface_flux!(backend::Nothing, surface_flux_values, T8codeMesh{2}}, have_nonconservative_terms, equations, surface_integral, dg::DG, cache) + @unpack neighbor_ids, node_indices = cache.interfaces @unpack contravariant_vectors = cache.elements index_range = eachnode(dg) - index_end = last(index_range) @threaded for interface in eachinterface(dg, cache) - # Get element and side index information on the primary element - primary_element = neighbor_ids[1, interface] - primary_indices = node_indices[1, interface] - primary_direction = indices2direction(primary_indices) + calc_interface_flux_per_interface!(surface_flux_values, typeof(mesh), + have_nonconservative_terms, + equations, surface_integral, typeof(dg), + interface, cache.interfaces.u, + neighbor_ids, node_indices, + contravariant_vectors, index_range) + end - # Create the local i,j indexing on the primary element used to pull normal direction information - i_primary_start, i_primary_step = index_to_start_step_2d(primary_indices[1], - index_range) - j_primary_start, j_primary_step = index_to_start_step_2d(primary_indices[2], - index_range) + return nothing +end - i_primary = i_primary_start - j_primary = j_primary_start - - # Get element and side index information on the secondary element - secondary_element = neighbor_ids[2, interface] - secondary_indices = node_indices[2, interface] - secondary_direction = indices2direction(secondary_indices) - - # Initiate the secondary index to be used in the surface for loop. - # This index on the primary side will always run forward but - # the secondary index might need to run backwards for flipped sides. - if :i_backward in secondary_indices - node_secondary = index_end - node_secondary_step = -1 - else - node_secondary = 1 - node_secondary_step = 1 - end +function calc_interface_flux!(backend::Backend, surface_flux_values, + mesh::Union{P4estMesh{2}, P4estMeshView{2}, + T8codeMesh{2}}, + have_nonconservative_terms, + equations, surface_integral, dg::DG, cache) - for node in eachnode(dg) - # Get the normal direction on the primary element. - # Contravariant vectors at interfaces in negative coordinate direction - # are pointing inwards. This is handled by `get_normal_direction`. - normal_direction = get_normal_direction(primary_direction, - contravariant_vectors, - i_primary, j_primary, - primary_element) - - calc_interface_flux!(surface_flux_values, mesh, have_nonconservative_terms, - equations, - surface_integral, dg, cache, - interface, normal_direction, - node, primary_direction, primary_element, - node_secondary, secondary_direction, secondary_element) - - # Increment primary element indices to pull the normal direction - i_primary += i_primary_step - j_primary += j_primary_step - # Increment the surface node index along the secondary element - node_secondary += node_secondary_step - end + ninterfaces(cache.interfaces) == 0 && return nothing + @unpack neighbor_ids, node_indices = cache.interfaces + @unpack contravariant_vectors = cache.elements + index_range = eachnode(dg) + + kernel! = calc_interface_flux_KAkernel!(backend) + kernel!(surface_flux_values, typeof(mesh), have_nonconservative_terms, + equations, surface_integral, typeof(dg), cache.interfaces.u, + neighbor_ids, node_indices, contravariant_vectors, index_range, + ndrange=ninterfaces(cache.interfaces)) + + return nothing +end + +@kernel function calc_interface_flux_KAkernel!(surface_flux_values, + mt::Type{<:Union{P4estMesh{2}, + P4estMeshView{2}, + T8codeMesh{2}}}, + have_nonconservative_terms, + equations, surface_integral, + st::Type{<:DG}, u_interface, + neighbor_ids, node_indices, + contravariant_vectors, index_range) + interface = @index(Global) + calc_interface_flux_per_interface!(surface_flux_values, mt, + have_nonconservative_terms, equations, + surface_integral, st, u_interface, + interface, neighbor_ids, node_indices, + contravariant_vectors, index_range) +end + +function calc_interface_flux_per_interface!(surface_flux_values, + mt::Type{<:Union{P4estMesh{2}, + P4estMeshView{2}, + T8codeMesh{2}}}, + have_nonconservative_terms, + equations, surface_integral, st::Type{<:DG}, + u_interface, interface, neighbor_ids, + node_indices, contravariant_vectors, + index_range) + index_end = last(index_range) + + # Get element and side index information on the primary element + primary_element = neighbor_ids[1, interface] + primary_indices = node_indices[1, interface] + primary_direction = indices2direction2d(primary_indices) + + # Create the local i,j indexing on the primary element used to pull normal direction information + i_primary_start, i_primary_step = index_to_start_step_2d(primary_indices[1], + index_range) + j_primary_start, j_primary_step = index_to_start_step_2d(primary_indices[2], + index_range) + + i_primary = i_primary_start + j_primary = j_primary_start + + # Get element and side index information on the secondary element + secondary_element = neighbor_ids[2, interface] + secondary_indices = node_indices[2, interface] + secondary_direction = indices2direction2d(secondary_indices) + + # Initiate the secondary index to be used in the surface for loop. + # This index on the primary side will always run forward but + # the secondary index might need to run backwards for flipped sides. + if :i_backward in secondary_indices + node_secondary = index_end + node_secondary_step = -1 + else + node_secondary = 1 + node_secondary_step = 1 + end + + for node in index_range + # Get the normal direction on the primary element. + # Contravariant vectors at interfaces in negative coordinate direction + # are pointing inwards. This is handled by `get_normal_direction`. + normal_direction = get_normal_direction(primary_direction, + contravariant_vectors, + i_primary, j_primary, + primary_element) + + calc_interface_flux!(surface_flux_values, mt, have_nonconservative_terms, + equations, surface_integral, st, u_interface, interface, + normal_direction, node, primary_direction, + primary_element, node_secondary, + secondary_direction, secondary_element) + + # Increment primary element indices to pull the normal direction + i_primary += i_primary_step + j_primary += j_primary_step + # Increment the surface node index along the secondary element + node_secondary += node_secondary_step end + return nothing end # Inlined version of the interface flux computation for conservation laws @inline function calc_interface_flux!(surface_flux_values, - mesh::Union{P4estMesh{2}, P4estMeshView{2}, - T8codeMesh{2}}, + ::Type{<:Union{P4estMesh{2}, + P4estMeshView{2}, + T8codeMesh{2}}}, have_nonconservative_terms::False, equations, - surface_integral, dg::DG, cache, - interface_index, normal_direction, - primary_node_index, primary_direction_index, + surface_integral, st::Type{<:DG}, + u_interface, interface_index, + normal_direction, primary_node_index, + primary_direction_index, primary_element_index, - secondary_node_index, secondary_direction_index, + secondary_node_index, + secondary_direction_index, secondary_element_index) - @unpack u = cache.interfaces @unpack surface_flux = surface_integral - u_ll, u_rr = get_surface_node_vars(u, equations, dg, primary_node_index, + u_ll, u_rr = get_surface_node_vars(u_interface, equations, st, + primary_node_index, interface_index) flux_ = surface_flux(u_ll, u_rr, normal_direction, equations) @@ -251,18 +312,19 @@ end # Inlined version of the interface flux computation for equations with conservative and nonconservative terms @inline function calc_interface_flux!(surface_flux_values, - mesh::Union{P4estMesh{2}, T8codeMesh{2}}, + ::Type{<:Union{P4estMesh{2}, T8codeMesh{2}}}, have_nonconservative_terms::True, equations, - surface_integral, dg::DG, cache, - interface_index, normal_direction, - primary_node_index, primary_direction_index, + surface_integral, st::Type{<:DG}, + u_interface, interface_index, + normal_direction, primary_node_index, + primary_direction_index, primary_element_index, - secondary_node_index, secondary_direction_index, + secondary_node_index, + secondary_direction_index, secondary_element_index) - @unpack u = cache.interfaces surface_flux, nonconservative_flux = surface_integral.surface_flux - u_ll, u_rr = get_surface_node_vars(u, equations, dg, primary_node_index, + u_ll, u_rr = get_surface_node_vars(u_interface, equations, st, primary_node_index, interface_index) flux_ = surface_flux(u_ll, u_rr, normal_direction, equations) @@ -276,12 +338,8 @@ end # Note the factor 0.5 necessary for the nonconservative fluxes based on # the interpretation of global SBP operators coupled discontinuously via # central fluxes/SATs - surface_flux_values[v, primary_node_index, primary_direction_index, primary_element_index] = (flux_[v] + - 0.5f0 * - noncons_primary[v]) - surface_flux_values[v, secondary_node_index, secondary_direction_index, secondary_element_index] = -(flux_[v] + - 0.5f0 * - noncons_secondary[v]) + surface_flux_values[v, primary_node_index, primary_direction_index, primary_element_index] = Float64(flux_[v] + 0.5f0 * noncons_primary[v]) + surface_flux_values[v, secondary_node_index, secondary_direction_index, secondary_element_index] = Float64(-(flux_[v] + 0.5f0 * noncons_secondary[v])) end return nothing @@ -682,47 +740,86 @@ end return nothing end + function calc_surface_integral!(backend::Nothing, du, u, mesh::Union{P4estMesh{2}, P4estMeshView{2}, T8codeMesh{2}}, equations, surface_integral::SurfaceIntegralWeakForm, dg::DGSEM, cache) - @unpack boundary_interpolation = dg.basis @unpack surface_flux_values = cache.elements - # Note that all fluxes have been computed with outward-pointing normal vectors. - # Access the factors only once before beginning the loop to increase performance. - # We also use explicit assignments instead of `+=` to let `@muladd` turn these - # into FMAs (see comment at the top of the file). - factor_1 = boundary_interpolation[1, 1] - factor_2 = boundary_interpolation[nnodes(dg), 2] @threaded for element in eachelement(dg, cache) - for l in eachnode(dg) - for v in eachvariable(equations) - # surface at -x - du[v, 1, l, element] = (du[v, 1, l, element] + - surface_flux_values[v, l, 1, element] * - factor_1) - - # surface at +x - du[v, nnodes(dg), l, element] = (du[v, nnodes(dg), l, element] + - surface_flux_values[v, l, 2, element] * - factor_2) - - # surface at -y - du[v, l, 1, element] = (du[v, l, 1, element] + - surface_flux_values[v, l, 3, element] * - factor_1) - - # surface at +y - du[v, l, nnodes(dg), element] = (du[v, l, nnodes(dg), element] + - surface_flux_values[v, l, 4, element] * - factor_2) - end - end + calc_surface_integral_per_element(du, typeof(mesh), equations, + surface_integral, dg, + surface_flux_values, element) end +end +function calc_surface_integral!(backend::Backend, du, u, + mesh::Union{P4estMesh{2}, P4estMeshView{2}, + T8codeMesh{2}}, + equations, + surface_integral::SurfaceIntegralWeakForm, + dg::DGSEM, cache) + nelements(dg,cache) == 0 && return nothing + @unpack surface_flux_values = cache.elements + + kernel! = calc_surface_integral_KAkernel!(backend) + kernel!(du, typeof(mesh), equations, surface_integral, dg, + surface_flux_values, ndrange=nelements(dg,cache)) + return nothing +end + +@kernel function calc_surface_integral_KAkernel!(du, + mT::Type{<:Union{P4estMesh{2}, + P4estMeshView{2}, + T8codeMesh{2}}}, + equations, + surface_integral::SurfaceIntegralWeakForm, + dg::DGSEM, + surface_flux_values) + element = @index(Global) + calc_surface_integral_per_element!(du, mT, equations, surface_integral, + dg, surface_flux_values, element) +end + +function calc_surface_integral_per_element!(du, ::Type{<:Union{P4estMesh{2}, + P4estMeshView{2}, + T8codeMesh{2}}}, + equations, + surface_integral::SurfaceIntegralWeakForm, + dg::DGSEM, surface_flux_values, + element) + # Note that all fluxes have been computed with outward-pointing normal vectors. + # Access the factors only once before beginning the loop (outside this function) + # to increase performance. We also use explicit assignments instead of `+=` + # to let `@muladd` turn these into FMAs (see comment at the top of the file). + factor_1 = dg.basis.boundary_interpolation[1, 1] + factor_2 = dg.basis.boundary_interpolation[nnodes(dg), 2] + for l in eachnode(dg) + for v in eachvariable(equations) + # surface at -x + du[v, 1, l, element] = (du[v, 1, l, element] + + surface_flux_values[v, l, 1, element] * + factor_1) + + # surface at +x + du[v, nnodes(dg), l, element] = (du[v, nnodes(dg), l, element] + + surface_flux_values[v, l, 2, element] * + factor_2) + + # surface at -y + du[v, l, 1, element] = (du[v, l, 1, element] + + surface_flux_values[v, l, 3, element] * + factor_1) + + # surface at +y + du[v, l, nnodes(dg), element] = (du[v, l, nnodes(dg), element] + + surface_flux_values[v, l, 4, element] * + factor_2) + end + end return nothing end end # @muladd diff --git a/src/solvers/dgsem_structured/dg_2d.jl b/src/solvers/dgsem_structured/dg_2d.jl index bfeaab65c7d..6967a05a9d1 100644 --- a/src/solvers/dgsem_structured/dg_2d.jl +++ b/src/solvers/dgsem_structured/dg_2d.jl @@ -640,17 +640,48 @@ function apply_jacobian!(backend::Nothing, du, T8codeMesh{2}}, equations, dg::DG, cache) @unpack inverse_jacobian = cache.elements + @threaded for element in eachelement(dg,cache) + apply_jacobian_per_element!(du, typeof(mesh), equations, dg, inverse_jacobian, + element) + end +end - @threaded for element in eachelement(dg, cache) - for j in eachnode(dg), i in eachnode(dg) - factor = -inverse_jacobian[i, j, element] +function apply_jacobian!(backend::Backend, du, + mesh::Union{StructuredMesh{2}, StructuredMeshView{2}, + UnstructuredMesh2D, P4estMesh{2}, P4estMeshView{2}, + T8codeMesh{2}}, + equations, dg::DG, cache) + nelements(dg,cache) == 0 && return nothing + @unpack inverse_jacobian = cache.elements + kernel! = apply_jacobian_KAkernel!(backend) + kernel!(du, typeof(mesh), equations, dg, inverse_jacobian, + ndrange=nelements(dg,cache)) +end - for v in eachvariable(equations) - du[v, i, j, element] *= factor - end +@kernel function apply_jacobian_KAkernel!(du, mT::Type{<:Union{StructuredMesh{2}, + StructuredMeshView{2}, + UnstructuredMesh2D, + P4estMesh{2}, + P4estMeshView{2}, + T8codeMesh{2}}}, + equations, dg::DG, inverse_jacobian) + element = @index(Global) + apply_jacobian_per_element!(du, mT, equations, dg, inverse_jacobian, element) +end + +function apply_jacobian_per_element!(du, + ::Type{<:Union{StructuredMesh{2}, + StructuredMeshView{2}, + UnstructuredMesh2D, P4estMesh{2}, + P4estMeshView{2}, T8codeMesh{2}}}, + equations, dg::DG, inverse_jacobian, element) + for j in eachnode(dg), i in eachnode(dg) + factor = -inverse_jacobian[i, j, element] + + for v in eachvariable(equations) + du[v, i, j, element] *= factor end end - return nothing end end # @muladd From b59239b4c90ca1ce9739acdf007a45fcb691d279 Mon Sep 17 00:00:00 2001 From: Benedict Geihe Date: Thu, 27 Nov 2025 10:05:45 +0100 Subject: [PATCH 079/158] enable 2D CUDA tests --- test/test_cuda_2d.jl | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/test/test_cuda_2d.jl b/test/test_cuda_2d.jl index 1e20b22c34a..c13c0a4af2b 100644 --- a/test/test_cuda_2d.jl +++ b/test/test_cuda_2d.jl @@ -42,12 +42,11 @@ end using CUDA @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_basic_gpu.jl"), # Expected errors are exactly the same as with TreeMesh! - l2=nothing, # TODO: GPU. [Float32(8.311947673061856e-6)], - linf=nothing, # TODO: GPU. [Float32(6.627000273229378e-5)], + l2=[Float32(8.311947673061856e-6)], + linf=[Float32(6.627000273229378e-5)], RealT=Float32, real_type=Float32, - storage_type=CuArray, - sol=nothing,) # TODO: GPU. Remove this once we can run the simulation on the GPU + storage_type=CuArray) # # Ensure that we do not have excessive memory allocations # # (e.g., from type instabilities) # @test_allocations(Trixi.rhs!, semi, sol, 1000) From c0dd4b5bc0e4b3a93b6d20236c650caf51b41869 Mon Sep 17 00:00:00 2001 From: Benedict Geihe Date: Thu, 27 Nov 2025 11:31:14 +0100 Subject: [PATCH 080/158] fmt --- src/callbacks_step/stepsize_dg2d.jl | 13 +++++---- src/solvers/dgsem_p4est/dg_2d.jl | 39 ++++++++++++++------------- src/solvers/dgsem_p4est/dg_3d.jl | 18 ++++++++----- src/solvers/dgsem_structured/dg_1d.jl | 2 +- src/solvers/dgsem_structured/dg_2d.jl | 19 ++++++------- 5 files changed, 48 insertions(+), 43 deletions(-) diff --git a/src/callbacks_step/stepsize_dg2d.jl b/src/callbacks_step/stepsize_dg2d.jl index 8c5560f3b9d..d0612cc1d60 100644 --- a/src/callbacks_step/stepsize_dg2d.jl +++ b/src/callbacks_step/stepsize_dg2d.jl @@ -88,7 +88,7 @@ function max_dt(backend::Nothing, u, t, # to avoid a division by zero if the speed vanishes everywhere, # e.g. for steady-state linear advection max_scaled_speed = nextfloat(zero(t)) - @unpack contravariant_vectors, inverse_jacobian = cache + @unpack contravariant_vectors, inverse_jacobian = cache.elements @batch reduction=(max, max_scaled_speed) for element in eachelement(dg, cache) max_lambda = max_scaled_speed_per_element(u, typeof(mesh), equations, dg, contravariant_vectors, @@ -105,7 +105,7 @@ function max_dt(backend::Backend, u, t, T8codeMesh{2}, StructuredMeshView{2}}, constant_speed::False, equations, dg::DG, cache) @unpack contravariant_vectors, inverse_jacobian = cache.elements - num_elements = nelements(dg,cache) + num_elements = nelements(dg, cache) max_scaled_speeds = allocate(backend, eltype(t), num_elements) kernel! = max_scaled_speed_KAkernel!(backend) @@ -172,8 +172,8 @@ function max_dt(backend::Nothing, u, t, @unpack contravariant_vectors, inverse_jacobian = cache.elements @batch reduction=(max, max_scaled_speed) for element in eachelement(dg, cache) max_lambda = max_scaled_speed_per_element(u, typeof(mesh), constant_speed, - equations, dg, contravariant_vectors, - inverse_jacobian, element) + equations, dg, contravariant_vectors, + inverse_jacobian, element) # Use `Base.max` to prevent silent failures, as `max` from `@fastmath` doesn't propagate # `NaN`s properly. See https://github.com/trixi-framework/Trixi.jl/pull/2445#discussion_r2336812323 max_scaled_speed = Base.max(max_scaled_speed, max_lambda) @@ -187,7 +187,7 @@ function max_dt(backend::Backend, u, t, P4estMeshView{2}, T8codeMesh{2}, StructuredMeshView{2}}, constant_speed::True, equations, dg::DG, cache) @unpack contravariant_vectors, inverse_jacobian = cache.elements - num_elements = nelements(dg,cache) + num_elements = nelements(dg, cache) max_scaled_speeds = allocate(backend, eltype(t), num_elements) kernel! = max_scaled_speed_KAkernel!(backend) @@ -208,7 +208,6 @@ function max_scaled_speed_per_element(u, constant_speed::True, equations, dg::DG, contravariant_vectors, inverse_jacobian, element) - max_lambda1_loc = max_lambda2_loc = nextfloat(zero(eltype(u))) max_lambda1, max_lambda2 = max_abs_speeds(equations) for j in eachnode(dg), i in eachnode(dg) @@ -225,7 +224,7 @@ function max_scaled_speed_per_element(u, max_lambda1_loc = max(max_lambda1_loc, inv_jacobian * lambda1_transformed) max_lambda2_loc = max(max_lambda2_loc, inv_jacobian * lambda2_transformed) end - + return max_lambda1_loc + max_lambda2_loc end diff --git a/src/solvers/dgsem_p4est/dg_2d.jl b/src/solvers/dgsem_p4est/dg_2d.jl index a85d0de2392..e2e58ec2cd4 100644 --- a/src/solvers/dgsem_p4est/dg_2d.jl +++ b/src/solvers/dgsem_p4est/dg_2d.jl @@ -72,9 +72,9 @@ function prolong2interfaces!(backend::Nothing, cache, u, index_range = eachnode(dg) @threaded for interface in eachinterface(dg, cache) - prolong2interfaces_interface!(interfaces.u, u, interface, typeof(mesh), - equations, neighbor_ids, node_indices, - index_range) + prolong2interfaces_per_interface!(interfaces.u, u, interface, typeof(mesh), + equations, neighbor_ids, node_indices, + index_range) end return nothing end @@ -159,7 +159,6 @@ function calc_interface_flux!(backend::Nothing, surface_flux_values, T8codeMesh{2}}, have_nonconservative_terms, equations, surface_integral, dg::DG, cache) - @unpack neighbor_ids, node_indices = cache.interfaces @unpack contravariant_vectors = cache.elements index_range = eachnode(dg) @@ -181,7 +180,6 @@ function calc_interface_flux!(backend::Backend, surface_flux_values, T8codeMesh{2}}, have_nonconservative_terms, equations, surface_integral, dg::DG, cache) - ninterfaces(cache.interfaces) == 0 && return nothing @unpack neighbor_ids, node_indices = cache.interfaces @unpack contravariant_vectors = cache.elements @@ -191,7 +189,7 @@ function calc_interface_flux!(backend::Backend, surface_flux_values, kernel!(surface_flux_values, typeof(mesh), have_nonconservative_terms, equations, surface_integral, typeof(dg), cache.interfaces.u, neighbor_ids, node_indices, contravariant_vectors, index_range, - ndrange=ninterfaces(cache.interfaces)) + ndrange = ninterfaces(cache.interfaces)) return nothing end @@ -275,7 +273,6 @@ function calc_interface_flux_per_interface!(surface_flux_values, # Increment the surface node index along the secondary element node_secondary += node_secondary_step end - return nothing end @@ -363,8 +360,12 @@ end # Note the factor 0.5 necessary for the nonconservative fluxes based on # the interpretation of global SBP operators coupled discontinuously via # central fluxes/SATs - surface_flux_values[v, primary_node_index, primary_direction_index, primary_element_index] = Float64(flux_[v] + 0.5f0 * noncons_primary[v]) - surface_flux_values[v, secondary_node_index, secondary_direction_index, secondary_element_index] = Float64(-(flux_[v] + 0.5f0 * noncons_secondary[v])) + surface_flux_values[v, primary_node_index, primary_direction_index, primary_element_index] = Float64(flux_[v] + + 0.5f0 * + noncons_primary[v]) + surface_flux_values[v, secondary_node_index, secondary_direction_index, secondary_element_index] = Float64(-(flux_[v] + + 0.5f0 * + noncons_secondary[v])) end return nothing @@ -847,7 +848,6 @@ end return nothing end - function calc_surface_integral!(backend::Nothing, du, u, mesh::Union{P4estMesh{2}, P4estMeshView{2}, T8codeMesh{2}}, @@ -869,12 +869,12 @@ function calc_surface_integral!(backend::Backend, du, u, equations, surface_integral::SurfaceIntegralWeakForm, dg::DGSEM, cache) - nelements(dg,cache) == 0 && return nothing + nelements(dg, cache) == 0 && return nothing @unpack surface_flux_values = cache.elements kernel! = calc_surface_integral_KAkernel!(backend) kernel!(du, typeof(mesh), equations, surface_integral, dg, - surface_flux_values, ndrange=nelements(dg,cache)) + surface_flux_values, ndrange = nelements(dg, cache)) return nothing end @@ -891,9 +891,10 @@ end dg, surface_flux_values, element) end -function calc_surface_integral_per_element!(du, ::Type{<:Union{P4estMesh{2}, - P4estMeshView{2}, - T8codeMesh{2}}}, +function calc_surface_integral_per_element!(du, + ::Type{<:Union{P4estMesh{2}, + P4estMeshView{2}, + T8codeMesh{2}}}, equations, surface_integral::SurfaceIntegralWeakForm, dg::DGSEM, surface_flux_values, @@ -913,8 +914,8 @@ function calc_surface_integral_per_element!(du, ::Type{<:Union{P4estMesh{2}, # surface at +x du[v, nnodes(dg), l, element] = (du[v, nnodes(dg), l, element] + - surface_flux_values[v, l, 2, element] * - factor_2) + surface_flux_values[v, l, 2, element] * + factor_2) # surface at -y du[v, l, 1, element] = (du[v, l, 1, element] + @@ -923,8 +924,8 @@ function calc_surface_integral_per_element!(du, ::Type{<:Union{P4estMesh{2}, # surface at +y du[v, l, nnodes(dg), element] = (du[v, l, nnodes(dg), element] + - surface_flux_values[v, l, 4, element] * - factor_2) + surface_flux_values[v, l, 4, element] * + factor_2) end end return nothing diff --git a/src/solvers/dgsem_p4est/dg_3d.jl b/src/solvers/dgsem_p4est/dg_3d.jl index 65cffed4a38..c92a69777ef 100644 --- a/src/solvers/dgsem_p4est/dg_3d.jl +++ b/src/solvers/dgsem_p4est/dg_3d.jl @@ -370,8 +370,10 @@ end secondary_direction_index, secondary_element_index) calc_interface_flux!(surface_flux_values, meshT, have_nonconservative_terms, - combine_conservative_and_nonconservative_fluxes(surface_integral.surface_flux, equations), - equations, surface_integral, solverT, u_interface, interface_index, + combine_conservative_and_nonconservative_fluxes(surface_integral.surface_flux, + equations), + equations, surface_integral, solverT, u_interface, + interface_index, normal_direction, primary_i_node_index, primary_j_node_index, primary_direction_index, primary_element_index, secondary_i_node_index, secondary_j_node_index, @@ -384,7 +386,8 @@ end have_nonconservative_terms::True, combine_conservative_and_nonconservative_fluxes::False, equations, - surface_integral, solverT::Type{<:DG}, u_interface, + surface_integral, solverT::Type{<:DG}, + u_interface, interface_index, normal_direction, primary_i_node_index, primary_j_node_index, primary_direction_index, primary_element_index, @@ -424,7 +427,8 @@ end have_nonconservative_terms::True, combine_conservative_and_nonconservative_fluxes::True, equations, - surface_integral, solverT::Type{<:DG}, u_interface, + surface_integral, solverT::Type{<:DG}, + u_interface, interface_index, normal_direction, primary_i_node_index, primary_j_node_index, primary_direction_index, primary_element_index, @@ -582,7 +586,7 @@ end direction_index, element_index, boundary_index) calc_boundary_flux!(surface_flux_values, t, boundary_condition, mesh, - nonconservative_terms, + have_nonconservative_terms, combine_conservative_and_nonconservative_fluxes(surface_integral.surface_flux, equations), equations, @@ -594,7 +598,7 @@ end @inline function calc_boundary_flux!(surface_flux_values, t, boundary_condition, mesh::Union{P4estMesh{3}, T8codeMesh{3}}, - nonconservative_terms::True, + have_nonconservative_terms::True, combine_conservative_and_nonconservative_fluxes::False, equations, surface_integral, dg::DG, cache, i_index, j_index, @@ -637,7 +641,7 @@ end @inline function calc_boundary_flux!(surface_flux_values, t, boundary_condition, mesh::Union{P4estMesh{3}, T8codeMesh{3}}, - nonconservative_terms::True, + have_nonconservative_terms::True, combine_conservative_and_nonconservative_fluxes::True, equations, surface_integral, dg::DG, cache, i_index, j_index, diff --git a/src/solvers/dgsem_structured/dg_1d.jl b/src/solvers/dgsem_structured/dg_1d.jl index cb98c45aed3..433d34e199f 100644 --- a/src/solvers/dgsem_structured/dg_1d.jl +++ b/src/solvers/dgsem_structured/dg_1d.jl @@ -69,7 +69,7 @@ function calc_boundary_flux!(cache, u, t, boundary_conditions::NamedTuple, return nothing end -function apply_jacobian!(du, mesh::StructuredMesh{1}, +function apply_jacobian!(backend::Nothing, du, mesh::StructuredMesh{1}, equations, dg::DG, cache) @unpack inverse_jacobian = cache.elements diff --git a/src/solvers/dgsem_structured/dg_2d.jl b/src/solvers/dgsem_structured/dg_2d.jl index 89507a1b144..dc2dc3a119b 100644 --- a/src/solvers/dgsem_structured/dg_2d.jl +++ b/src/solvers/dgsem_structured/dg_2d.jl @@ -731,7 +731,7 @@ function apply_jacobian!(backend::Nothing, du, T8codeMesh{2}}, equations, dg::DG, cache) @unpack inverse_jacobian = cache.elements - @threaded for element in eachelement(dg,cache) + @threaded for element in eachelement(dg, cache) apply_jacobian_per_element!(du, typeof(mesh), equations, dg, inverse_jacobian, element) end @@ -742,19 +742,20 @@ function apply_jacobian!(backend::Backend, du, UnstructuredMesh2D, P4estMesh{2}, P4estMeshView{2}, T8codeMesh{2}}, equations, dg::DG, cache) - nelements(dg,cache) == 0 && return nothing + nelements(dg, cache) == 0 && return nothing @unpack inverse_jacobian = cache.elements kernel! = apply_jacobian_KAkernel!(backend) kernel!(du, typeof(mesh), equations, dg, inverse_jacobian, - ndrange=nelements(dg,cache)) + ndrange = nelements(dg, cache)) end -@kernel function apply_jacobian_KAkernel!(du, mT::Type{<:Union{StructuredMesh{2}, - StructuredMeshView{2}, - UnstructuredMesh2D, - P4estMesh{2}, - P4estMeshView{2}, - T8codeMesh{2}}}, +@kernel function apply_jacobian_KAkernel!(du, + mT::Type{<:Union{StructuredMesh{2}, + StructuredMeshView{2}, + UnstructuredMesh2D, + P4estMesh{2}, + P4estMeshView{2}, + T8codeMesh{2}}}, equations, dg::DG, inverse_jacobian) element = @index(Global) apply_jacobian_per_element!(du, mT, equations, dg, inverse_jacobian, element) From f90f5a8866c69ab3d5669135c98628e10bf2ea4c Mon Sep 17 00:00:00 2001 From: Vivienne Ehlert <201612348+vivimie@users.noreply.github.com> Date: Wed, 3 Dec 2025 15:28:41 +0100 Subject: [PATCH 081/158] fixes bugs in the CPU implementation --- src/solvers/dgsem_p4est/dg_2d.jl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/solvers/dgsem_p4est/dg_2d.jl b/src/solvers/dgsem_p4est/dg_2d.jl index e2e58ec2cd4..11b19e19ffd 100644 --- a/src/solvers/dgsem_p4est/dg_2d.jl +++ b/src/solvers/dgsem_p4est/dg_2d.jl @@ -167,7 +167,7 @@ function calc_interface_flux!(backend::Nothing, surface_flux_values, calc_interface_flux_per_interface!(surface_flux_values, typeof(mesh), have_nonconservative_terms, equations, surface_integral, typeof(dg), - interface, cache.interfaces.u, + cache.interfaces.u, interface, neighbor_ids, node_indices, contravariant_vectors, index_range) end @@ -857,9 +857,9 @@ function calc_surface_integral!(backend::Nothing, du, u, @unpack surface_flux_values = cache.elements @threaded for element in eachelement(dg, cache) - calc_surface_integral_per_element(du, typeof(mesh), equations, - surface_integral, dg, - surface_flux_values, element) + calc_surface_integral_per_element!(du, typeof(mesh), equations, + surface_integral, dg, + surface_flux_values, element) end end From 4ce90ab33d23d5e78aeaad78395f1fe020b50af3 Mon Sep 17 00:00:00 2001 From: Benedict Geihe Date: Mon, 19 Jan 2026 15:41:00 +0100 Subject: [PATCH 082/158] fix --- src/solvers/dgsem_structured/dg_3d.jl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/solvers/dgsem_structured/dg_3d.jl b/src/solvers/dgsem_structured/dg_3d.jl index c55cd9383da..8be50e6277c 100644 --- a/src/solvers/dgsem_structured/dg_3d.jl +++ b/src/solvers/dgsem_structured/dg_3d.jl @@ -47,6 +47,8 @@ end weak_form_kernel_element!(du, u, element, meshT, have_nonconservative_terms, equations, dg, contravariant_vectors) +end + function create_cache(mesh::Union{StructuredMesh{3}, P4estMesh{3}, T8codeMesh{3}}, equations, From ae9719d21e90ee048325cde18aef4cdcb726a529 Mon Sep 17 00:00:00 2001 From: Benedict Geihe Date: Mon, 19 Jan 2026 17:09:21 +0100 Subject: [PATCH 083/158] fixes --- src/callbacks_step/stepsize_dg2d.jl | 8 ++++---- src/callbacks_step/stepsize_dg3d.jl | 6 +++--- src/solvers/dgmulti/dg.jl | 4 ++-- src/solvers/dgsem/calc_volume_integral.jl | 4 ++-- src/solvers/dgsem_p4est/dg_2d.jl | 1 + src/solvers/dgsem_structured/dg_2d.jl | 4 ++-- src/solvers/dgsem_structured/dg_3d.jl | 2 +- 7 files changed, 15 insertions(+), 14 deletions(-) diff --git a/src/callbacks_step/stepsize_dg2d.jl b/src/callbacks_step/stepsize_dg2d.jl index 2862403b523..9243f507974 100644 --- a/src/callbacks_step/stepsize_dg2d.jl +++ b/src/callbacks_step/stepsize_dg2d.jl @@ -119,8 +119,8 @@ function max_dt(backend::Nothing, u, t, max_scaled_speed = nextfloat(zero(t)) @unpack contravariant_vectors, inverse_jacobian = cache.elements @batch reduction=(max, max_scaled_speed) for element in eachelement(dg, cache) - max_lambda = max_scaled_speed_per_element(u, typeof(mesh), equations, dg, - contravariant_vectors, + max_lambda = max_scaled_speed_per_element(u, typeof(mesh), constant_speed, + equations, dg, contravariant_vectors, inverse_jacobian, element) # Use `Base.max` to prevent silent failures, as `max` from `@fastmath` doesn't propagate # `NaN`s properly. See https://github.com/trixi-framework/Trixi.jl/pull/2445#discussion_r2336812323 @@ -211,7 +211,7 @@ function max_dt(backend::Nothing, u, t, return 2 / (nnodes(dg) * max_scaled_speed) end -function max_dt(u, t, +function max_dt(backend::Nothing, u, t, mesh::P4estMesh{2}, # Parabolic terms currently only for `TreeMesh` and `P4estMesh` constant_diffusivity::False, equations, equations_parabolic::AbstractEquationsParabolic, @@ -303,7 +303,7 @@ function max_scaled_speed_per_element(u, return max_lambda1_loc + max_lambda2_loc end -function max_dt(u, t, +function max_dt(backend::Nothing, u, t, mesh::P4estMesh{2}, # Parabolic terms currently only for `TreeMesh` and `P4estMesh` constant_diffusivity::True, equations, equations_parabolic::AbstractEquationsParabolic, diff --git a/src/callbacks_step/stepsize_dg3d.jl b/src/callbacks_step/stepsize_dg3d.jl index 885c9db8d0f..f61adeaa008 100644 --- a/src/callbacks_step/stepsize_dg3d.jl +++ b/src/callbacks_step/stepsize_dg3d.jl @@ -31,7 +31,7 @@ function max_dt(backend::Nothing, u, t, mesh::TreeMesh{3}, return 2 / (nnodes(dg) * max_scaled_speed) end -function max_dt(u, t, mesh::TreeMesh{3}, +function max_dt(backend::Nothing, u, t, mesh::TreeMesh{3}, constant_diffusivity::False, equations, equations_parabolic::AbstractEquationsParabolic, dg::DG, cache) @@ -152,7 +152,7 @@ function max_scaled_speed_element(u, return max_lambda1 + max_lambda2 + max_lambda3 end -function max_dt(u, t, +function max_dt(backend::Nothing, u, t, mesh::P4estMesh{3}, # Parabolic terms currently only for `TreeMesh` and `P4estMesh` constant_diffusivity::False, equations, equations_parabolic::AbstractEquationsParabolic, @@ -249,7 +249,7 @@ function max_dt(backend, u, t, return 2 / (nnodes(dg) * max_scaled_speed) end -function max_dt(u, t, +function max_dt(backend::Nothing, u, t, mesh::P4estMesh{3}, # Parabolic terms currently only for `TreeMesh` and `P4estMesh` constant_diffusivity::True, equations, equations_parabolic::AbstractEquationsParabolic, diff --git a/src/solvers/dgmulti/dg.jl b/src/solvers/dgmulti/dg.jl index 444220e7230..f97f36de035 100644 --- a/src/solvers/dgmulti/dg.jl +++ b/src/solvers/dgmulti/dg.jl @@ -240,7 +240,7 @@ function dt_polydeg_scaling(dg::DGMulti{3, <:Wedge, <:TensorProductWedge}) end # for the stepsize callback -function max_dt(u, t, mesh::DGMultiMesh, +function max_dt(backend::Nothing, u, t, mesh::DGMultiMesh, constant_diffusivity::False, equations, equations_parabolic::AbstractEquationsParabolic, dg::DGMulti{NDIMS}, @@ -269,7 +269,7 @@ function max_dt(u, t, mesh::DGMultiMesh, return 2 * dt_min * dt_polydeg_scaling(dg) end -function max_dt(u, t, mesh::DGMultiMesh, +function max_dt(backend::Nothing, u, t, mesh::DGMultiMesh, constant_diffusivity::True, equations, equations_parabolic::AbstractEquationsParabolic, dg::DGMulti{NDIMS}, diff --git a/src/solvers/dgsem/calc_volume_integral.jl b/src/solvers/dgsem/calc_volume_integral.jl index d0289ff301d..a0677c17baf 100644 --- a/src/solvers/dgsem/calc_volume_integral.jl +++ b/src/solvers/dgsem/calc_volume_integral.jl @@ -80,7 +80,7 @@ function calc_volume_integral!(backend::Nothing, du, u, mesh, return nothing end -function calc_volume_integral!(du, u, +function calc_volume_integral!(backend::Nothing, du, u, mesh::Union{TreeMesh{1}, StructuredMesh{1}, TreeMesh{2}, StructuredMesh{2}, P4estMesh{2}, UnstructuredMesh2D, T8codeMesh{2}, @@ -143,7 +143,7 @@ function calc_volume_integral!(backend::Nothing, du, u, mesh, return nothing end -function calc_volume_integral!(du, u, +function calc_volume_integral!(backend::Nothing, du, u, mesh::Union{TreeMesh{1}, StructuredMesh{1}, TreeMesh{2}, StructuredMesh{2}, P4estMesh{2}, UnstructuredMesh2D, T8codeMesh{2}, diff --git a/src/solvers/dgsem_p4est/dg_2d.jl b/src/solvers/dgsem_p4est/dg_2d.jl index 48f87218661..9500ff11a38 100644 --- a/src/solvers/dgsem_p4est/dg_2d.jl +++ b/src/solvers/dgsem_p4est/dg_2d.jl @@ -899,6 +899,7 @@ function calc_surface_integral_per_element!(du, surface_integral::SurfaceIntegralWeakForm, dg::DGSEM, surface_flux_values, element) + @unpack boundary_interpolation = dg.basis # Note that all fluxes have been computed with outward-pointing normal vectors. # This computes the **negative** surface integral contribution, diff --git a/src/solvers/dgsem_structured/dg_2d.jl b/src/solvers/dgsem_structured/dg_2d.jl index 71f2e88bc0e..27efec14fc3 100644 --- a/src/solvers/dgsem_structured/dg_2d.jl +++ b/src/solvers/dgsem_structured/dg_2d.jl @@ -5,7 +5,7 @@ @muladd begin #! format: noindent -function calc_volume_integral!(::Nothing, du, u, +function calc_volume_integral!(backend::Nothing, du, u, mesh::Union{StructuredMesh{2}, StructuredMeshView{2}, UnstructuredMesh2D, P4estMesh{2}, P4estMeshView{2}, T8codeMesh{2}}, @@ -83,7 +83,7 @@ See also https://github.com/trixi-framework/Trixi.jl/issues/1671#issuecomment-17 contravariant_vectors, alpha = true) # true * [some floating point value] == [exactly the same floating point value] # This can (hopefully) be optimized away due to constant propagation. - @unpack derivative_dhat = dg.basis + @unpack derivative_hat = dg.basis for j in eachnode(dg), i in eachnode(dg) u_node = get_node_vars(u, equations, dg, i, j, element) diff --git a/src/solvers/dgsem_structured/dg_3d.jl b/src/solvers/dgsem_structured/dg_3d.jl index 8be50e6277c..ae73f15d184 100644 --- a/src/solvers/dgsem_structured/dg_3d.jl +++ b/src/solvers/dgsem_structured/dg_3d.jl @@ -84,7 +84,7 @@ See also https://github.com/trixi-framework/Trixi.jl/issues/1671#issuecomment-17 alpha = true) # true * [some floating point value] == [exactly the same floating point value] # This can (hopefully) be optimized away due to constant propagation. - @unpack derivative_dhat = dg.basis + @unpack derivative_hat = dg.basis for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) u_node = get_node_vars(u, equations, dg, i, j, k, element) From a13dd61ae15379e738ff4b458f27a8cea23e1efe Mon Sep 17 00:00:00 2001 From: Benedict Geihe Date: Tue, 20 Jan 2026 13:17:33 +0100 Subject: [PATCH 084/158] fix --- src/callbacks_step/stepsize_dg2d.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/callbacks_step/stepsize_dg2d.jl b/src/callbacks_step/stepsize_dg2d.jl index 9243f507974..a7f031f6ecc 100644 --- a/src/callbacks_step/stepsize_dg2d.jl +++ b/src/callbacks_step/stepsize_dg2d.jl @@ -171,7 +171,7 @@ function max_scaled_speed_per_element(u, constant_speed::False, equations, dg::DG, contravariant_vectors, inverse_jacobian, element) - max_lambda1 = max_lambda2 = zero(max_scaled_speed) + max_lambda1 = max_lambda2 = zero(eltype(u)) for j in eachnode(dg), i in eachnode(dg) u_node = get_node_vars(u, equations, dg, i, j, element) lambda1, lambda2 = max_abs_speeds(u_node, equations) From 8ecb6c4aec8f57cd156c1e7183b8aeee4471d327 Mon Sep 17 00:00:00 2001 From: Benedict Geihe Date: Tue, 20 Jan 2026 15:21:06 +0100 Subject: [PATCH 085/158] fix --- src/callbacks_step/stepsize_dg2d.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/callbacks_step/stepsize_dg2d.jl b/src/callbacks_step/stepsize_dg2d.jl index a7f031f6ecc..bf019ba3990 100644 --- a/src/callbacks_step/stepsize_dg2d.jl +++ b/src/callbacks_step/stepsize_dg2d.jl @@ -29,7 +29,7 @@ function max_dt(backend::Nothing, u, t, mesh::TreeMesh{2}, return 2 / (nnodes(dg) * max_scaled_speed) end -function max_dt(u, t, mesh::TreeMesh{2}, +function max_dt(backend::Nothing, u, t, mesh::TreeMesh{2}, constant_diffusivity::False, equations, equations_parabolic::AbstractEquationsParabolic, dg::DG, cache) From 3d693112528a53601afef679bbf365f13f573ac6 Mon Sep 17 00:00:00 2001 From: Benedict Geihe Date: Tue, 20 Jan 2026 15:21:53 +0100 Subject: [PATCH 086/158] no nextfloat per element --- src/callbacks_step/stepsize_dg2d.jl | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/callbacks_step/stepsize_dg2d.jl b/src/callbacks_step/stepsize_dg2d.jl index bf019ba3990..59dae0cce83 100644 --- a/src/callbacks_step/stepsize_dg2d.jl +++ b/src/callbacks_step/stepsize_dg2d.jl @@ -200,12 +200,12 @@ function max_dt(backend::Nothing, u, t, @unpack contravariant_vectors, inverse_jacobian = cache.elements @batch reduction=(max, max_scaled_speed) for element in eachelement(dg, cache) - max_lambda = max_scaled_speed_per_element(u, typeof(mesh), constant_speed, + max_scaled_speed_loc = max_scaled_speed_per_element(u, typeof(mesh), constant_speed, equations, dg, contravariant_vectors, inverse_jacobian, element) # Use `Base.max` to prevent silent failures, as `max` from `@fastmath` doesn't propagate # `NaN`s properly. See https://github.com/trixi-framework/Trixi.jl/pull/2445#discussion_r2336812323 - max_scaled_speed = Base.max(max_scaled_speed, max_lambda) + max_scaled_speed = Base.max(max_scaled_speed, max_scaled_speed_loc) end return 2 / (nnodes(dg) * max_scaled_speed) @@ -283,7 +283,7 @@ function max_scaled_speed_per_element(u, constant_speed::True, equations, dg::DG, contravariant_vectors, inverse_jacobian, element) - max_lambda1_loc = max_lambda2_loc = nextfloat(zero(eltype(u))) + max_scaled_speed = zero(eltype(u)) max_lambda1, max_lambda2 = max_abs_speeds(equations) for j in eachnode(dg), i in eachnode(dg) # Local speeds transformed to the reference element @@ -296,11 +296,12 @@ function max_scaled_speed_per_element(u, inv_jacobian = abs(inverse_jacobian[i, j, element]) - max_lambda1_loc = max(max_lambda1_loc, inv_jacobian * lambda1_transformed) - max_lambda2_loc = max(max_lambda2_loc, inv_jacobian * lambda2_transformed) + max_scaled_speed = Base.max(max_scaled_speed, + inv_jacobian * + (lambda1_transformed + lambda2_transformed)) end - return max_lambda1_loc + max_lambda2_loc + return max_scaled_speed end function max_dt(backend::Nothing, u, t, From a2f04888fd3bbbadcac62d3ac7a40f5c177f5fe0 Mon Sep 17 00:00:00 2001 From: Benedict Geihe Date: Tue, 20 Jan 2026 15:22:27 +0100 Subject: [PATCH 087/158] fmt --- src/callbacks_step/stepsize_dg2d.jl | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/callbacks_step/stepsize_dg2d.jl b/src/callbacks_step/stepsize_dg2d.jl index 59dae0cce83..9edba2008f0 100644 --- a/src/callbacks_step/stepsize_dg2d.jl +++ b/src/callbacks_step/stepsize_dg2d.jl @@ -200,9 +200,11 @@ function max_dt(backend::Nothing, u, t, @unpack contravariant_vectors, inverse_jacobian = cache.elements @batch reduction=(max, max_scaled_speed) for element in eachelement(dg, cache) - max_scaled_speed_loc = max_scaled_speed_per_element(u, typeof(mesh), constant_speed, - equations, dg, contravariant_vectors, - inverse_jacobian, element) + max_scaled_speed_loc = max_scaled_speed_per_element(u, typeof(mesh), + constant_speed, + equations, dg, + contravariant_vectors, + inverse_jacobian, element) # Use `Base.max` to prevent silent failures, as `max` from `@fastmath` doesn't propagate # `NaN`s properly. See https://github.com/trixi-framework/Trixi.jl/pull/2445#discussion_r2336812323 max_scaled_speed = Base.max(max_scaled_speed, max_scaled_speed_loc) From 31490d32ee899fde5f4be36d71bccf857e61779f Mon Sep 17 00:00:00 2001 From: Benedict Geihe Date: Tue, 20 Jan 2026 16:31:29 +0100 Subject: [PATCH 088/158] another RealT_for_test_tolerances --- test/test_cuda_3d.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_cuda_3d.jl b/test/test_cuda_3d.jl index f4281e880e4..157a95cfabe 100644 --- a/test/test_cuda_3d.jl +++ b/test/test_cuda_3d.jl @@ -49,7 +49,7 @@ end # Expected errors similar to reference on CPU l2=[Float32(0.00016263963870641478)], linf=[Float32(0.0014537194925779984)], - RealT=Float32, + RealT_for_test_tolerances=Float32, real_type=Float32, storage_type=CuArray) @test real(ode.p.solver) == Float32 From 8e802ee3bcc071b671e6fc70dba75744ae801833 Mon Sep 17 00:00:00 2001 From: Benedict Geihe Date: Tue, 10 Feb 2026 12:26:01 +0100 Subject: [PATCH 089/158] readd Project.toml --- benchmark/CUDA/Project.toml | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 benchmark/CUDA/Project.toml diff --git a/benchmark/CUDA/Project.toml b/benchmark/CUDA/Project.toml new file mode 100644 index 00000000000..221c03a5947 --- /dev/null +++ b/benchmark/CUDA/Project.toml @@ -0,0 +1,6 @@ +[deps] +CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" +JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" +OrdinaryDiffEqLowStorageRK = "b0944070-b475-4768-8dec-fb6eb410534d" +TimerOutputs = "a759f4b9-e2f1-59dc-863e-4aeb61b1ea8f" +Trixi = "a7f1ee26-1774-49b1-8366-f1abc58fbfcb" From 71d837b4294df88cc2f3f7d3a1d35b8efe557ecd Mon Sep 17 00:00:00 2001 From: Benedict Geihe Date: Mon, 23 Feb 2026 13:05:19 +0100 Subject: [PATCH 090/158] fmt --- src/solvers/dgsem_p4est/dg_2d.jl | 8 ++++---- src/solvers/dgsem_p4est/dg_3d.jl | 20 ++++++++++---------- src/solvers/dgsem_structured/dg_2d.jl | 27 ++++++++++++++++----------- src/solvers/dgsem_structured/dg_3d.jl | 22 +++++++++++++--------- 4 files changed, 43 insertions(+), 34 deletions(-) diff --git a/src/solvers/dgsem_p4est/dg_2d.jl b/src/solvers/dgsem_p4est/dg_2d.jl index 949905f45f9..c85f09dc4a9 100644 --- a/src/solvers/dgsem_p4est/dg_2d.jl +++ b/src/solvers/dgsem_p4est/dg_2d.jl @@ -923,8 +923,8 @@ function calc_surface_integral_per_element!(du, # surface at +x du[v, nnodes(dg), l, element] = (du[v, nnodes(dg), l, element] + - surface_flux_values[v, l, 2, element] * - factor) + surface_flux_values[v, l, 2, element] * + factor) # surface at -y du[v, l, 1, element] = (du[v, l, 1, element] + @@ -933,8 +933,8 @@ function calc_surface_integral_per_element!(du, # surface at +y du[v, l, nnodes(dg), element] = (du[v, l, nnodes(dg), element] + - surface_flux_values[v, l, 4, element] * - factor) + surface_flux_values[v, l, 4, element] * + factor) end end return nothing diff --git a/src/solvers/dgsem_p4est/dg_3d.jl b/src/solvers/dgsem_p4est/dg_3d.jl index a17f05f8ae8..b53b2654aea 100644 --- a/src/solvers/dgsem_p4est/dg_3d.jl +++ b/src/solvers/dgsem_p4est/dg_3d.jl @@ -1045,7 +1045,7 @@ end function calc_surface_integral_element!(du, ::Type{<:Union{P4estMesh{3}, T8codeMesh{3}}}, - equations, + equations, surface_integral::SurfaceIntegralWeakForm, dg::DGSEM, factor, surface_flux_values, element) # Note that all fluxes have been computed with outward-pointing normal vectors. @@ -1062,9 +1062,9 @@ function calc_surface_integral_element!(du, for v in eachvariable(equations) # surface at -x du[v, 1, l, m, element] = (du[v, 1, l, m, element] + - surface_flux_values[v, l, m, 1, - element] * - factor) + surface_flux_values[v, l, m, 1, + element] * + factor) # surface at +x du[v, nnodes(dg), l, m, element] = (du[v, nnodes(dg), l, m, element] + @@ -1074,9 +1074,9 @@ function calc_surface_integral_element!(du, # surface at -y du[v, l, 1, m, element] = (du[v, l, 1, m, element] + - surface_flux_values[v, l, m, 3, - element] * - factor) + surface_flux_values[v, l, m, 3, + element] * + factor) # surface at +y du[v, l, nnodes(dg), m, element] = (du[v, l, nnodes(dg), m, element] + @@ -1086,9 +1086,9 @@ function calc_surface_integral_element!(du, # surface at -z du[v, l, m, 1, element] = (du[v, l, m, 1, element] + - surface_flux_values[v, l, m, 5, - element] * - factor) + surface_flux_values[v, l, m, 5, + element] * + factor) # surface at +z du[v, l, m, nnodes(dg), element] = (du[v, l, m, nnodes(dg), element] + diff --git a/src/solvers/dgsem_structured/dg_2d.jl b/src/solvers/dgsem_structured/dg_2d.jl index c45af8e7f69..991894efb3a 100644 --- a/src/solvers/dgsem_structured/dg_2d.jl +++ b/src/solvers/dgsem_structured/dg_2d.jl @@ -29,7 +29,8 @@ See also https://github.com/trixi-framework/Trixi.jl/issues/1671#issuecomment-17 =# @inline function weak_form_kernel!(du, u, element, - ::Type{<:Union{StructuredMesh{2}, StructuredMeshView{2}, + ::Type{<:Union{StructuredMesh{2}, + StructuredMeshView{2}, UnstructuredMesh2D, P4estMesh{2}, P4estMeshView{2}, T8codeMesh{2}}}, have_nonconservative_terms::False, equations, @@ -72,7 +73,8 @@ end @inline function flux_differencing_kernel!(du, u, element, ::Type{<:Union{StructuredMesh{2}, StructuredMeshView{2}, - UnstructuredMesh2D, P4estMesh{2}, + UnstructuredMesh2D, + P4estMesh{2}, T8codeMesh{2}}}, have_nonconservative_terms::False, equations, volume_flux, dg::DGSEM, cache, alpha = true) @@ -134,9 +136,10 @@ end @inline function flux_differencing_kernel!(du, u, element, ::Type{<:Union{StructuredMesh{2}, - StructuredMeshView{2}, - UnstructuredMesh2D, P4estMesh{2}, - T8codeMesh{2}}}, + StructuredMeshView{2}, + UnstructuredMesh2D, + P4estMesh{2}, + T8codeMesh{2}}}, have_nonconservative_terms::True, equations, volume_flux, dg::DGSEM, cache, alpha = true) flux_differencing_kernel!(du, u, element, mesh, have_nonconservative_terms, @@ -150,9 +153,10 @@ end @inline function flux_differencing_kernel!(du, u, element, ::Type{<:Union{StructuredMesh{2}, - StructuredMeshView{2}, - UnstructuredMesh2D, P4estMesh{2}, - T8codeMesh{2}}}, + StructuredMeshView{2}, + UnstructuredMesh2D, + P4estMesh{2}, + T8codeMesh{2}}}, have_nonconservative_terms::True, combine_conservative_and_nonconservative_fluxes::False, equations, @@ -223,9 +227,10 @@ end @inline function flux_differencing_kernel!(du, u, element, ::Type{<:Union{StructuredMesh{2}, - StructuredMeshView{2}, - UnstructuredMesh2D, P4estMesh{2}, - T8codeMesh{2}}}, + StructuredMeshView{2}, + UnstructuredMesh2D, + P4estMesh{2}, + T8codeMesh{2}}}, have_nonconservative_terms::True, combine_conservative_and_nonconservative_fluxes::True, equations, diff --git a/src/solvers/dgsem_structured/dg_3d.jl b/src/solvers/dgsem_structured/dg_3d.jl index 51ceb719eb6..44c55f68895 100644 --- a/src/solvers/dgsem_structured/dg_3d.jl +++ b/src/solvers/dgsem_structured/dg_3d.jl @@ -32,7 +32,7 @@ See also https://github.com/trixi-framework/Trixi.jl/issues/1671#issuecomment-17 @inline function weak_form_kernel!(du, u, element, ::Type{<:Union{StructuredMesh{3}, P4estMesh{3}, - T8codeMesh{3}}}, + T8codeMesh{3}}}, have_nonconservative_terms::False, equations, dg::DGSEM, cache, alpha = true) # true * [some floating point value] == [exactly the same floating point value] @@ -88,8 +88,9 @@ end # mapping terms, stored in `contravariant_vectors`, is peeled apart from the evaluation of # the physical fluxes in each Cartesian direction @inline function flux_differencing_kernel!(du, u, element, - ::Type{<:Union{StructuredMesh{3}, P4estMesh{3}, - T8codeMesh{3}}}, + ::Type{<:Union{StructuredMesh{3}, + P4estMesh{3}, + T8codeMesh{3}}}, have_nonconservative_terms::False, equations, volume_flux, dg::DGSEM, cache, alpha = true) # true * [some floating point value] == [exactly the same floating point value] @@ -170,8 +171,9 @@ end end @inline function flux_differencing_kernel!(du, u, element, - ::Type{<:Union{StructuredMesh{3}, P4estMesh{3}, - T8codeMesh{3}}}, + ::Type{<:Union{StructuredMesh{3}, + P4estMesh{3}, + T8codeMesh{3}}}, have_nonconservative_terms::True, equations, volume_flux, dg::DGSEM, cache, alpha = true) flux_differencing_kernel!(du, u, element, mesh, have_nonconservative_terms, @@ -183,8 +185,9 @@ end end @inline function flux_differencing_kernel!(du, u, element, - ::Type{<:Union{StructuredMesh{3}, P4estMesh{3}, - T8codeMesh{3}}}, + ::Type{<:Union{StructuredMesh{3}, + P4estMesh{3}, + T8codeMesh{3}}}, have_nonconservative_terms::True, combine_conservative_and_nonconservative_fluxes::False, equations, @@ -273,8 +276,9 @@ end end @inline function flux_differencing_kernel!(du, u, element, - ::Type{<:Union{StructuredMesh{3}, P4estMesh{3}, - T8codeMesh{3}}}, + ::Type{<:Union{StructuredMesh{3}, + P4estMesh{3}, + T8codeMesh{3}}}, have_nonconservative_terms::True, combine_conservative_and_nonconservative_fluxes::True, equations, From ae3e4158928dd943b5f12d2584b2f9b6ad44dabb Mon Sep 17 00:00:00 2001 From: Benedict Geihe Date: Mon, 23 Feb 2026 14:25:36 +0100 Subject: [PATCH 091/158] fixes --- src/solvers/dgsem_p4est/dg_2d.jl | 2 -- src/solvers/dgsem_tree/dg_1d.jl | 4 ++-- src/solvers/dgsem_tree/dg_2d.jl | 12 ++++++------ src/solvers/dgsem_tree/dg_3d.jl | 9 +++++---- 4 files changed, 13 insertions(+), 14 deletions(-) diff --git a/src/solvers/dgsem_p4est/dg_2d.jl b/src/solvers/dgsem_p4est/dg_2d.jl index c85f09dc4a9..6c53b0e5335 100644 --- a/src/solvers/dgsem_p4est/dg_2d.jl +++ b/src/solvers/dgsem_p4est/dg_2d.jl @@ -902,8 +902,6 @@ function calc_surface_integral_per_element!(du, surface_integral::SurfaceIntegralWeakForm, dg::DGSEM, factor, surface_flux_values, element) - @unpack boundary_interpolation = dg.basis - # Note that all fluxes have been computed with outward-pointing normal vectors. # This computes the **negative** surface integral contribution, # i.e., M^{-1} * boundary_interpolation^T (which is for DGSEM just M^{-1} * B) diff --git a/src/solvers/dgsem_tree/dg_1d.jl b/src/solvers/dgsem_tree/dg_1d.jl index a83faca9892..551e6bb5333 100644 --- a/src/solvers/dgsem_tree/dg_1d.jl +++ b/src/solvers/dgsem_tree/dg_1d.jl @@ -203,7 +203,7 @@ end end @inline function fv_kernel!(du, u, - mesh::Union{TreeMesh{1}, StructuredMesh{1}}, + ::Type{<:Union{TreeMesh{1}, StructuredMesh{1}}}, have_nonconservative_terms, equations, volume_flux_fv, dg::DGSEM, cache, element, alpha = true) @unpack fstar1_L_threaded, fstar1_R_threaded = cache @@ -229,7 +229,7 @@ end end @inline function fvO2_kernel!(du, u, - mesh::Union{TreeMesh{1}, StructuredMesh{1}}, + ::Type{<:Union{TreeMesh{1}, StructuredMesh{1}}}, nonconservative_terms, equations, volume_flux_fv, dg::DGSEM, cache, element, sc_interface_coords, reconstruction_mode, slope_limiter, diff --git a/src/solvers/dgsem_tree/dg_2d.jl b/src/solvers/dgsem_tree/dg_2d.jl index 473edc00833..b6f7fef41a0 100644 --- a/src/solvers/dgsem_tree/dg_2d.jl +++ b/src/solvers/dgsem_tree/dg_2d.jl @@ -286,9 +286,9 @@ end end @inline function fvO2_kernel!(du, u, - mesh::Union{TreeMesh{2}, StructuredMesh{2}, - UnstructuredMesh2D, P4estMesh{2}, - T8codeMesh{2}}, + ::Type{<:Union{TreeMesh{2}, StructuredMesh{2}, + UnstructuredMesh2D, P4estMesh{2}, + T8codeMesh{2}}}, have_nonconservative_terms, equations, volume_flux_fv, dg::DGSEM, cache, element, sc_interface_coords, reconstruction_mode, slope_limiter, @@ -390,9 +390,9 @@ end end @inline function fv_kernel!(du, u, - mesh::Union{TreeMesh{2}, StructuredMesh{2}, - UnstructuredMesh2D, P4estMesh{2}, - T8codeMesh{2}}, + ::Type{<:Union{TreeMesh{2}, StructuredMesh{2}, + UnstructuredMesh2D, P4estMesh{2}, + T8codeMesh{2}}}, have_nonconservative_terms, equations, volume_flux_fv, dg::DGSEM, cache, element, alpha = true) @unpack fstar1_L_threaded, fstar1_R_threaded, fstar2_L_threaded, fstar2_R_threaded = cache diff --git a/src/solvers/dgsem_tree/dg_3d.jl b/src/solvers/dgsem_tree/dg_3d.jl index 0c90936671f..31edad0b45b 100644 --- a/src/solvers/dgsem_tree/dg_3d.jl +++ b/src/solvers/dgsem_tree/dg_3d.jl @@ -261,8 +261,8 @@ end end @inline function fv_kernel!(du, u, - mesh::Union{TreeMesh{3}, StructuredMesh{3}, P4estMesh{3}, - T8codeMesh{3}}, + ::Type{<:Union{TreeMesh{3}, StructuredMesh{3}, P4estMesh{3}, + T8codeMesh{3}}}, have_nonconservative_terms, equations, volume_flux_fv, dg::DGSEM, cache, element, alpha = true) @unpack fstar1_L_threaded, fstar1_R_threaded, fstar2_L_threaded, fstar2_R_threaded, fstar3_L_threaded, fstar3_R_threaded = cache @@ -300,8 +300,9 @@ end end @inline function fvO2_kernel!(du, u, - mesh::Union{TreeMesh{3}, StructuredMesh{3}, P4estMesh{3}, - T8codeMesh{3}}, + ::Type{<:Union{TreeMesh{3}, StructuredMesh{3}, + P4estMesh{3}, + T8codeMesh{3}}}, have_nonconservative_terms, equations, volume_flux_fv, dg::DGSEM, cache, element, sc_interface_coords, reconstruction_mode, slope_limiter, From a801ebe1a7d8151a891450929087e240a5c52b28 Mon Sep 17 00:00:00 2001 From: Benedict Geihe Date: Mon, 23 Feb 2026 17:07:13 +0100 Subject: [PATCH 092/158] more --- src/callbacks_step/analysis_dg1d.jl | 10 +++-- src/callbacks_step/analysis_dg2d.jl | 19 +++++---- src/callbacks_step/analysis_dg3d.jl | 14 ++++--- .../dgsem_p4est/dg_3d_subcell_limiters.jl | 16 +++---- src/solvers/dgsem_structured/dg_2d.jl | 42 +++++++++---------- .../dg_2d_compressible_euler.jl | 26 ++++++------ .../dg_2d_subcell_limiters.jl | 6 +-- src/solvers/dgsem_structured/dg_3d.jl | 12 +++--- .../dg_3d_compressible_euler.jl | 26 ++++++------ src/solvers/dgsem_tree/dg_1d.jl | 15 ++++--- src/solvers/dgsem_tree/dg_2d.jl | 28 ++++++------- .../dgsem_tree/dg_2d_compressible_euler.jl | 4 +- .../dgsem_tree/dg_2d_subcell_limiters.jl | 29 +++++++------ src/solvers/dgsem_tree/dg_3d.jl | 18 ++++---- .../dgsem_tree/dg_3d_compressible_euler.jl | 4 +- 15 files changed, 142 insertions(+), 127 deletions(-) diff --git a/src/callbacks_step/analysis_dg1d.jl b/src/callbacks_step/analysis_dg1d.jl index e53df1dd4c3..83bd9746848 100644 --- a/src/callbacks_step/analysis_dg1d.jl +++ b/src/callbacks_step/analysis_dg1d.jl @@ -124,7 +124,8 @@ end # This avoids the need to divide the RHS of the DG scheme by the Jacobian when computing # the time derivative of entropy, see `entropy_change_reference_element`. function integrate_reference_element(func::Func, u, element, - mesh::AbstractMesh{1}, equations, dg::DGSEM, cache, + ::Type{<:AbstractMesh{1}}, equations, dg::DGSEM, + cache, args...) where {Func} @unpack weights = dg.basis @@ -142,9 +143,9 @@ end # Calculate ∫_e (∂S/∂u ⋅ ∂u/∂t) dΩ_e where the result on element 'e' is kept in reference space # Note that ∂S/∂u = w(u) with entropy variables w function entropy_change_reference_element(du, u, element, - mesh::AbstractMesh{1}, + meshT::Type{<:AbstractMesh{1}}, equations, dg::DGSEM, cache, args...) - return integrate_reference_element(u, element, mesh, equations, dg, cache, + return integrate_reference_element(u, element, meshT, equations, dg, cache, du) do u, i, element, equations, dg, du u_node = get_node_vars(u, equations, dg, i, element) du_node = get_node_vars(du, equations, dg, i, element) @@ -155,7 +156,8 @@ end # calculate surface integral of func(u, equations) * normal on the reference element. function surface_integral_reference_element(func::Func, u, element, - mesh::Union{TreeMesh{1}, StructuredMesh{1}}, + ::Type{<:Union{TreeMesh{1}, + StructuredMesh{1}}}, equations, dg::DGSEM, cache, args...) where {Func} u_left = get_node_vars(u, equations, dg, 1, element) diff --git a/src/callbacks_step/analysis_dg2d.jl b/src/callbacks_step/analysis_dg2d.jl index fb88a0c2115..96e640f8d62 100644 --- a/src/callbacks_step/analysis_dg2d.jl +++ b/src/callbacks_step/analysis_dg2d.jl @@ -200,7 +200,8 @@ end # This avoids the need to divide the RHS of the DG scheme by the Jacobian when computing # the time derivative of entropy, see `entropy_change_reference_element`. function integrate_reference_element(func::Func, u, element, - mesh::AbstractMesh{2}, equations, dg::DGSEM, cache, + ::Type{<:AbstractMesh{2}}, equations, dg::DGSEM, + cache, args...) where {Func} @unpack weights = dg.basis @@ -218,9 +219,9 @@ end # Calculate ∫_e (∂S/∂u ⋅ ∂u/∂t) dΩ_e where the result on element 'e' is kept in reference space # Note that ∂S/∂u = w(u) with entropy variables w function entropy_change_reference_element(du, u, element, - mesh::AbstractMesh{2}, + meshT::Type{<:AbstractMesh{2}}, equations, dg::DGSEM, cache, args...) - return integrate_reference_element(u, element, mesh, equations, dg, cache, + return integrate_reference_element(u, element, meshT, equations, dg, cache, du) do u, i, j, element, equations, dg, du u_node = get_node_vars(u, equations, dg, i, j, element) du_node = get_node_vars(du, equations, dg, i, j, element) @@ -231,7 +232,7 @@ end # calculate surface integral of func(u, equations) * normal on the reference element. function surface_integral_reference_element(func::Func, u, element, - mesh::TreeMesh{2}, equations, dg::DGSEM, + ::Type{<:TreeMesh{2}}, equations, dg::DGSEM, cache, args...) where {Func} @unpack weights = dg.basis @@ -260,11 +261,11 @@ end # Note: `get_normal_direction` already returns an outward-pointing normal for all directions, # thus no +- flips are needed here. function surface_integral_reference_element(func::Func, u, element, - mesh::Union{StructuredMesh{2}, - StructuredMeshView{2}, - UnstructuredMesh2D, - P4estMesh{2}, - T8codeMesh{2}}, + ::Type{<:Union{StructuredMesh{2}, + StructuredMeshView{2}, + UnstructuredMesh2D, + P4estMesh{2}, + T8codeMesh{2}}}, equations, dg::DGSEM, cache, args...) where {Func} @unpack contravariant_vectors = cache.elements diff --git a/src/callbacks_step/analysis_dg3d.jl b/src/callbacks_step/analysis_dg3d.jl index 693fd95ac1a..2578fa7174a 100644 --- a/src/callbacks_step/analysis_dg3d.jl +++ b/src/callbacks_step/analysis_dg3d.jl @@ -224,7 +224,8 @@ end # This avoids the need to divide the RHS of the DG scheme by the Jacobian when computing # the time derivative of entropy, see `entropy_change_reference_element`. function integrate_reference_element(func::Func, u, element, - mesh::AbstractMesh{3}, equations, dg::DGSEM, cache, + ::Type{<:AbstractMesh{3}}, equations, dg::DGSEM, + cache, args...; normalize = true) where {Func} @unpack weights = dg.basis @@ -242,9 +243,9 @@ end # Calculate ∫_e (∂S/∂u ⋅ ∂u/∂t) dΩ_e where the result on element 'e' is kept in reference space # Note that ∂S/∂u = w(u) with entropy variables w function entropy_change_reference_element(du, u, element, - mesh::AbstractMesh{3}, + meshT::Type{<:AbstractMesh{3}}, equations, dg::DGSEM, cache, args...) - return integrate_reference_element(u, element, mesh, equations, dg, cache, + return integrate_reference_element(u, element, meshT, equations, dg, cache, du) do u, i, j, k, element, equations, dg, du u_node = get_node_vars(u, equations, dg, i, j, k, element) du_node = get_node_vars(du, equations, dg, i, j, k, element) @@ -255,7 +256,7 @@ end # calculate surface integral of func(u, equations) * normal on the reference element. function surface_integral_reference_element(func::Func, u, element, - mesh::TreeMesh{3}, equations, dg::DGSEM, + ::Type{<:TreeMesh{3}}, equations, dg::DGSEM, cache, args...) where {Func} @unpack weights = dg.basis @@ -291,8 +292,9 @@ end # Note: `get_normal_direction` already returns an outward-pointing normal for all directions, # thus no +- flips are needed here. function surface_integral_reference_element(func::Func, u, element, - mesh::Union{StructuredMesh{3}, P4estMesh{3}, - T8codeMesh{3}}, + ::Type{<:Union{StructuredMesh{3}, + P4estMesh{3}, + T8codeMesh{3}}}, equations, dg::DGSEM, cache, args...) where {Func} @unpack contravariant_vectors = cache.elements diff --git a/src/solvers/dgsem_p4est/dg_3d_subcell_limiters.jl b/src/solvers/dgsem_p4est/dg_3d_subcell_limiters.jl index b7ef4c24c88..48532a79f5a 100644 --- a/src/solvers/dgsem_p4est/dg_3d_subcell_limiters.jl +++ b/src/solvers/dgsem_p4est/dg_3d_subcell_limiters.jl @@ -59,7 +59,7 @@ function create_cache(mesh::P4estMesh{3}, end @inline function subcell_limiting_kernel!(du, u, element, - mesh::P4estMesh{3}, + meshT::Type{<:P4estMesh{3}}, nonconservative_terms, equations, volume_integral, limiter::SubcellLimiterIDP, dg::DGSEM, cache) @@ -76,7 +76,7 @@ end fhat3_L = fhat3_L_threaded[Threads.threadid()] fhat3_R = fhat3_R_threaded[Threads.threadid()] calcflux_fhat!(fhat1_L, fhat1_R, fhat2_L, fhat2_R, fhat3_L, fhat3_R, - u, mesh, nonconservative_terms, equations, volume_flux_dg, + u, meshT, nonconservative_terms, equations, volume_flux_dg, dg, element, cache) # low-order FV fluxes @@ -89,13 +89,13 @@ end fstar3_L = fstar3_L_threaded[Threads.threadid()] fstar3_R = fstar3_R_threaded[Threads.threadid()] calcflux_fv!(fstar1_L, fstar1_R, fstar2_L, fstar2_R, fstar3_L, fstar3_R, - u, mesh, nonconservative_terms, equations, volume_flux_fv, + u, meshT, nonconservative_terms, equations, volume_flux_fv, dg, element, cache) # antidiffusive flux calcflux_antidiffusive!(fhat1_L, fhat1_R, fhat2_L, fhat2_R, fhat3_L, fhat3_R, fstar1_L, fstar1_R, fstar2_L, fstar2_R, fstar3_L, fstar3_R, - u, mesh, nonconservative_terms, equations, limiter, + u, meshT, nonconservative_terms, equations, limiter, dg, element, cache) # Calculate volume integral contribution of low-order FV flux @@ -118,7 +118,7 @@ end # # See also `flux_differencing_kernel!`. @inline function calcflux_fhat!(fhat1_L, fhat1_R, fhat2_L, fhat2_R, fhat3_L, fhat3_R, - u, mesh::P4estMesh{3}, + u, ::Type{<:P4estMesh{3}}, nonconservative_terms::False, equations, volume_flux, dg::DGSEM, element, cache) (; contravariant_vectors) = cache.elements @@ -262,7 +262,7 @@ end # Discretizations of Non-Conservative Systems. https://arxiv.org/pdf/2211.14009.pdf. # @inline function calcflux_fhat!(fhat1_L, fhat1_R, fhat2_L, fhat2_R, fhat3_L, fhat3_R, - u, mesh::P4estMesh{3}, + u, ::Type{<:P4estMesh{3}}, nonconservative_terms::True, equations, volume_flux::Tuple{F_CONS, F_NONCONS}, dg::DGSEM, element, @@ -549,7 +549,7 @@ end fstar1_L, fstar1_R, fstar2_L, fstar2_R, fstar3_L, fstar3_R, - u, mesh::P4estMesh{3}, + u, ::Type{<:P4estMesh{3}}, nonconservative_terms::False, equations, limiter::SubcellLimiterIDP, dg, element, cache) @unpack antidiffusive_flux1_L, antidiffusive_flux1_R, antidiffusive_flux2_L, antidiffusive_flux2_R, antidiffusive_flux3_L, antidiffusive_flux3_R = cache.antidiffusive_fluxes @@ -600,7 +600,7 @@ end fstar1_L, fstar1_R, fstar2_L, fstar2_R, fstar3_L, fstar3_R, - u, mesh::P4estMesh{3}, + u, ::Type{<:P4estMesh{3}}, nonconservative_terms::True, equations, limiter::SubcellLimiterIDP, dg, element, cache) @unpack antidiffusive_flux1_L, antidiffusive_flux2_L, antidiffusive_flux1_R, antidiffusive_flux2_R, antidiffusive_flux3_L, antidiffusive_flux3_R = cache.antidiffusive_fluxes diff --git a/src/solvers/dgsem_structured/dg_2d.jl b/src/solvers/dgsem_structured/dg_2d.jl index 991894efb3a..bed6131ac05 100644 --- a/src/solvers/dgsem_structured/dg_2d.jl +++ b/src/solvers/dgsem_structured/dg_2d.jl @@ -135,14 +135,14 @@ end end @inline function flux_differencing_kernel!(du, u, element, - ::Type{<:Union{StructuredMesh{2}, - StructuredMeshView{2}, - UnstructuredMesh2D, - P4estMesh{2}, - T8codeMesh{2}}}, + meshT::Type{<:Union{StructuredMesh{2}, + StructuredMeshView{2}, + UnstructuredMesh2D, + P4estMesh{2}, + T8codeMesh{2}}}, have_nonconservative_terms::True, equations, volume_flux, dg::DGSEM, cache, alpha = true) - flux_differencing_kernel!(du, u, element, mesh, have_nonconservative_terms, + flux_differencing_kernel!(du, u, element, meshT, have_nonconservative_terms, combine_conservative_and_nonconservative_fluxes(volume_flux, equations), equations, @@ -152,11 +152,11 @@ end end @inline function flux_differencing_kernel!(du, u, element, - ::Type{<:Union{StructuredMesh{2}, - StructuredMeshView{2}, - UnstructuredMesh2D, - P4estMesh{2}, - T8codeMesh{2}}}, + meshT::Type{<:Union{StructuredMesh{2}, + StructuredMeshView{2}, + UnstructuredMesh2D, + P4estMesh{2}, + T8codeMesh{2}}}, have_nonconservative_terms::True, combine_conservative_and_nonconservative_fluxes::False, equations, @@ -166,7 +166,7 @@ end symmetric_flux, nonconservative_flux = volume_flux # Apply the symmetric flux as usual - flux_differencing_kernel!(du, u, element, mesh, False(), equations, symmetric_flux, + flux_differencing_kernel!(du, u, element, meshT, False(), equations, symmetric_flux, dg, cache, alpha) # Calculate the remaining volume terms using the nonsymmetric generalized flux @@ -298,9 +298,9 @@ end end @inline function calcflux_fv!(fstar1_L, fstar1_R, fstar2_L, fstar2_R, u, - mesh::Union{StructuredMesh{2}, StructuredMeshView{2}, - UnstructuredMesh2D, - P4estMesh{2}, T8codeMesh{2}}, + ::Type{<:Union{StructuredMesh{2}, StructuredMeshView{2}, + UnstructuredMesh2D, + P4estMesh{2}, T8codeMesh{2}}}, have_nonconservative_terms::False, equations, volume_flux_fv, dg::DGSEM, element, cache) @unpack normal_vectors_1, normal_vectors_2 = cache.normal_vectors @@ -340,9 +340,9 @@ end end @inline function calcflux_fvO2!(fstar1_L, fstar1_R, fstar2_L, fstar2_R, u, - mesh::Union{StructuredMesh{2}, StructuredMeshView{2}, - UnstructuredMesh2D, - P4estMesh{2}, T8codeMesh{2}}, + ::Type{<:Union{StructuredMesh{2}, StructuredMeshView{2}, + UnstructuredMesh2D, + P4estMesh{2}, T8codeMesh{2}}}, have_nonconservative_terms::False, equations, volume_flux_fv, dg::DGSEM, element, cache, sc_interface_coords, reconstruction_mode, slope_limiter, @@ -421,9 +421,9 @@ end end @inline function calcflux_fv!(fstar1_L, fstar1_R, fstar2_L, fstar2_R, u, - mesh::Union{StructuredMesh{2}, StructuredMesh{2}, - UnstructuredMesh2D, - P4estMesh{2}, T8codeMesh{2}}, + ::Type{<:Union{StructuredMesh{2}, StructuredMesh{2}, + UnstructuredMesh2D, + P4estMesh{2}, T8codeMesh{2}}}, have_nonconservative_terms::True, equations, volume_flux_fv, dg::DGSEM, element, cache) @unpack normal_vectors_1, normal_vectors_2 = cache.normal_vectors diff --git a/src/solvers/dgsem_structured/dg_2d_compressible_euler.jl b/src/solvers/dgsem_structured/dg_2d_compressible_euler.jl index c2956d027b8..508d3c92d82 100644 --- a/src/solvers/dgsem_structured/dg_2d_compressible_euler.jl +++ b/src/solvers/dgsem_structured/dg_2d_compressible_euler.jl @@ -19,26 +19,27 @@ # works efficiently here. @inline function flux_differencing_kernel!(_du::PtrArray, u_cons::PtrArray, element, - mesh::Union{StructuredMesh{2}, - UnstructuredMesh2D, P4estMesh{2}}, + ::Type{<:Union{StructuredMesh{2}, + UnstructuredMesh2D, P4estMesh{2}}}, have_nonconservative_terms::False, equations::CompressibleEulerEquations2D, volume_flux::typeof(flux_shima_etal_turbo), dg::DGSEM, cache, alpha) @unpack derivative_split = dg.basis @unpack contravariant_vectors = cache.elements + ndims = 2 # Create a temporary array that will be used to store the RHS with permuted # indices `[i, j, v]` to allow using SIMD instructions. # `StrideArray`s with purely static dimensions do not allocate on the heap. du = StrideArray{eltype(u_cons)}(undef, - (ntuple(_ -> StaticInt(nnodes(dg)), ndims(mesh))..., + (ntuple(_ -> StaticInt(nnodes(dg)), ndims)..., StaticInt(nvariables(equations)))) # Convert conserved to primitive variables on the given `element`. u_prim = StrideArray{eltype(u_cons)}(undef, (ntuple(_ -> StaticInt(nnodes(dg)), - ndims(mesh))..., + ndims)..., StaticInt(nvariables(equations)))) @turbo for j in eachnode(dg), i in eachnode(dg) @@ -82,7 +83,7 @@ contravariant_vectors_x = StrideArray{eltype(contravariant_vectors)}(undef, (StaticInt(nnodes(dg)), StaticInt(nnodes(dg)), - StaticInt(ndims(mesh)))) + StaticInt(ndims))) @turbo for j in eachnode(dg), i in eachnode(dg) contravariant_vectors_x[j, i, 1] = contravariant_vectors[1, 1, i, j, element] @@ -155,7 +156,7 @@ contravariant_vectors_y = StrideArray{eltype(contravariant_vectors)}(undef, (StaticInt(nnodes(dg)), StaticInt(nnodes(dg)), - StaticInt(ndims(mesh)))) + StaticInt(ndims))) @turbo for j in eachnode(dg), i in eachnode(dg) contravariant_vectors_y[i, j, 1] = contravariant_vectors[1, 2, i, j, element] @@ -226,20 +227,21 @@ end @inline function flux_differencing_kernel!(_du::PtrArray, u_cons::PtrArray, element, - mesh::Union{StructuredMesh{2}, - UnstructuredMesh2D, P4estMesh{2}}, + ::Type{<:Union{StructuredMesh{2}, + UnstructuredMesh2D, P4estMesh{2}}}, have_nonconservative_terms::False, equations::CompressibleEulerEquations2D, volume_flux::typeof(flux_ranocha_turbo), dg::DGSEM, cache, alpha) @unpack derivative_split = dg.basis @unpack contravariant_vectors = cache.elements + ndims = 2 # Create a temporary array that will be used to store the RHS with permuted # indices `[i, j, v]` to allow using SIMD instructions. # `StrideArray`s with purely static dimensions do not allocate on the heap. du = StrideArray{eltype(u_cons)}(undef, - (ntuple(_ -> StaticInt(nnodes(dg)), ndims(mesh))..., + (ntuple(_ -> StaticInt(nnodes(dg)), ndims)..., StaticInt(nvariables(equations)))) # Convert conserved to primitive variables on the given `element`. In addition @@ -248,7 +250,7 @@ end # values. u_prim = StrideArray{eltype(u_cons)}(undef, (ntuple(_ -> StaticInt(nnodes(dg)), - ndims(mesh))..., + ndims)..., StaticInt(nvariables(equations) + 2))) # We also compute "+ 2" logs @turbo for j in eachnode(dg), i in eachnode(dg) @@ -294,7 +296,7 @@ end contravariant_vectors_x = StrideArray{eltype(contravariant_vectors)}(undef, (StaticInt(nnodes(dg)), StaticInt(nnodes(dg)), - StaticInt(ndims(mesh)))) + StaticInt(ndims))) @turbo for j in eachnode(dg), i in eachnode(dg) contravariant_vectors_x[j, i, 1] = contravariant_vectors[1, 1, i, j, element] @@ -400,7 +402,7 @@ end contravariant_vectors_y = StrideArray{eltype(contravariant_vectors)}(undef, (StaticInt(nnodes(dg)), StaticInt(nnodes(dg)), - StaticInt(ndims(mesh)))) + StaticInt(ndims))) @turbo for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) contravariant_vectors_y[i, j, 1] = contravariant_vectors[1, 2, i, j, element] diff --git a/src/solvers/dgsem_structured/dg_2d_subcell_limiters.jl b/src/solvers/dgsem_structured/dg_2d_subcell_limiters.jl index 350f9c39587..b0c65960f6c 100644 --- a/src/solvers/dgsem_structured/dg_2d_subcell_limiters.jl +++ b/src/solvers/dgsem_structured/dg_2d_subcell_limiters.jl @@ -10,7 +10,7 @@ # # See also `flux_differencing_kernel!`. @inline function calcflux_fhat!(fhat1_L, fhat1_R, fhat2_L, fhat2_R, u, - mesh::Union{StructuredMesh{2}, P4estMesh{2}}, + ::Type{<:Union{StructuredMesh{2}, P4estMesh{2}}}, have_nonconservative_terms::False, equations, volume_flux, dg::DGSEM, element, cache) (; contravariant_vectors) = cache.elements @@ -111,7 +111,7 @@ end # Discretizations of Non-Conservative Systems. https://arxiv.org/pdf/2211.14009.pdf. # @inline function calcflux_fhat!(fhat1_L, fhat1_R, fhat2_L, fhat2_R, u, - mesh::Union{StructuredMesh{2}, P4estMesh{2}}, + ::Type{<:Union{StructuredMesh{2}, P4estMesh{2}}}, have_nonconservative_terms::True, equations, volume_flux::Tuple{F_CONS, F_NONCONS}, dg::DGSEM, element, @@ -315,7 +315,7 @@ end # The calculation of the non-conservative staggered "fluxes" requires non-conservative # terms that can be written as a product of local and jump contributions. @inline function calcflux_fhat!(fhat1_L, fhat1_R, fhat2_L, fhat2_R, u, - mesh::Union{StructuredMesh{2}, P4estMesh{2}}, + ::Type{<:Union{StructuredMesh{2}, P4estMesh{2}}}, have_nonconservative_terms::True, equations, volume_flux::Tuple{F_CONS, F_NONCONS}, dg::DGSEM, element, diff --git a/src/solvers/dgsem_structured/dg_3d.jl b/src/solvers/dgsem_structured/dg_3d.jl index 44c55f68895..dbcae49b5ab 100644 --- a/src/solvers/dgsem_structured/dg_3d.jl +++ b/src/solvers/dgsem_structured/dg_3d.jl @@ -372,8 +372,8 @@ end # [arXiv: 2008.12044v2](https://arxiv.org/pdf/2008.12044) @inline function calcflux_fv!(fstar1_L, fstar1_R, fstar2_L, fstar2_R, fstar3_L, fstar3_R, u, - mesh::Union{StructuredMesh{3}, P4estMesh{3}, - T8codeMesh{3}}, + ::Type{<:Union{StructuredMesh{3}, P4estMesh{3}, + T8codeMesh{3}}}, have_nonconservative_terms::False, equations, volume_flux_fv, dg::DGSEM, element, cache) @unpack contravariant_vectors = cache.elements @@ -433,8 +433,8 @@ end @inline function calcflux_fv!(fstar1_L, fstar1_R, fstar2_L, fstar2_R, fstar3_L, fstar3_R, u, - mesh::Union{StructuredMesh{3}, P4estMesh{3}, - T8codeMesh{3}}, + ::Type{<:Union{StructuredMesh{3}, P4estMesh{3}, + T8codeMesh{3}}}, have_nonconservative_terms::True, equations, volume_flux_fv, dg::DGSEM, element, cache) @unpack contravariant_vectors = cache.elements @@ -529,8 +529,8 @@ end @inline function calcflux_fvO2!(fstar1_L, fstar1_R, fstar2_L, fstar2_R, fstar3_L, fstar3_R, u, - mesh::Union{StructuredMesh{3}, P4estMesh{3}, - T8codeMesh{3}}, + ::Type{<:Union{StructuredMesh{3}, P4estMesh{3}, + T8codeMesh{3}}}, have_nonconservative_terms::False, equations, volume_flux_fv, dg::DGSEM, element, cache, sc_interface_coords, reconstruction_mode, slope_limiter, diff --git a/src/solvers/dgsem_structured/dg_3d_compressible_euler.jl b/src/solvers/dgsem_structured/dg_3d_compressible_euler.jl index 8b710417ff7..9143286b88e 100644 --- a/src/solvers/dgsem_structured/dg_3d_compressible_euler.jl +++ b/src/solvers/dgsem_structured/dg_3d_compressible_euler.jl @@ -19,25 +19,26 @@ # works efficiently here. @inline function flux_differencing_kernel!(_du::PtrArray, u_cons::PtrArray, element, - mesh::Union{StructuredMesh{3}, P4estMesh{3}}, + ::Type{<:Union{StructuredMesh{3}, P4estMesh{3}}}, have_nonconservative_terms::False, equations::CompressibleEulerEquations3D, volume_flux::typeof(flux_shima_etal_turbo), dg::DGSEM, cache, alpha) @unpack derivative_split = dg.basis @unpack contravariant_vectors = cache.elements + ndims = 3 # Create a temporary array that will be used to store the RHS with permuted # indices `[i, j, k, v]` to allow using SIMD instructions. # `StrideArray`s with purely static dimensions do not allocate on the heap. du = StrideArray{eltype(u_cons)}(undef, - (ntuple(_ -> StaticInt(nnodes(dg)), ndims(mesh))..., + (ntuple(_ -> StaticInt(nnodes(dg)), ndims)..., StaticInt(nvariables(equations)))) # Convert conserved to primitive variables on the given `element`. u_prim = StrideArray{eltype(u_cons)}(undef, (ntuple(_ -> StaticInt(nnodes(dg)), - ndims(mesh))..., + ndims)..., StaticInt(nvariables(equations)))) @turbo for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) @@ -88,7 +89,7 @@ contravariant_vectors_x = StrideArray{eltype(contravariant_vectors)}(undef, (StaticInt(nnodes(dg)^2), StaticInt(nnodes(dg)), - StaticInt(ndims(mesh)))) + StaticInt(ndims))) @turbo for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) jk = j + nnodes(dg) * (k - 1) @@ -176,7 +177,7 @@ (StaticInt(nnodes(dg)), StaticInt(nnodes(dg)), StaticInt(nnodes(dg)), - StaticInt(ndims(mesh)))) + StaticInt(ndims))) @turbo for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) contravariant_vectors_y[i, j, k, 1] = contravariant_vectors[1, 2, i, j, k, element] @@ -264,7 +265,7 @@ contravariant_vectors_z = StrideArray{eltype(contravariant_vectors)}(undef, (StaticInt(nnodes(dg)^2), StaticInt(nnodes(dg)), - StaticInt(ndims(mesh)))) + StaticInt(ndims))) @turbo for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) ij = i + nnodes(dg) * (j - 1) @@ -351,19 +352,20 @@ end @inline function flux_differencing_kernel!(_du::PtrArray, u_cons::PtrArray, element, - mesh::Union{StructuredMesh{3}, P4estMesh{3}}, + ::Type{<:Union{StructuredMesh{3}, P4estMesh{3}}}, have_nonconservative_terms::False, equations::CompressibleEulerEquations3D, volume_flux::typeof(flux_ranocha_turbo), dg::DGSEM, cache, alpha) @unpack derivative_split = dg.basis @unpack contravariant_vectors = cache.elements + ndims = 3 # Create a temporary array that will be used to store the RHS with permuted # indices `[i, j, k, v]` to allow using SIMD instructions. # `StrideArray`s with purely static dimensions do not allocate on the heap. du = StrideArray{eltype(u_cons)}(undef, - (ntuple(_ -> StaticInt(nnodes(dg)), ndims(mesh))..., + (ntuple(_ -> StaticInt(nnodes(dg)), ndims)..., StaticInt(nvariables(equations)))) # Convert conserved to primitive variables on the given `element`. In addition @@ -372,7 +374,7 @@ end # values. u_prim = StrideArray{eltype(u_cons)}(undef, (ntuple(_ -> StaticInt(nnodes(dg)), - ndims(mesh))..., + ndims)..., StaticInt(nvariables(equations) + 2))) # We also compute "+ 2" logs @turbo for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) @@ -425,7 +427,7 @@ end contravariant_vectors_x = StrideArray{eltype(contravariant_vectors)}(undef, (StaticInt(nnodes(dg)^2), StaticInt(nnodes(dg)), - StaticInt(ndims(mesh)))) + StaticInt(ndims))) @turbo for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) jk = j + nnodes(dg) * (k - 1) @@ -546,7 +548,7 @@ end (StaticInt(nnodes(dg)), StaticInt(nnodes(dg)), StaticInt(nnodes(dg)), - StaticInt(ndims(mesh)))) + StaticInt(ndims))) @turbo for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) contravariant_vectors_y[i, j, k, 1] = contravariant_vectors[1, 2, i, j, k, element] @@ -667,7 +669,7 @@ end contravariant_vectors_z = StrideArray{eltype(contravariant_vectors)}(undef, (StaticInt(nnodes(dg)^2), StaticInt(nnodes(dg)), - StaticInt(ndims(mesh)))) + StaticInt(ndims))) @turbo for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) ij = i + nnodes(dg) * (j - 1) diff --git a/src/solvers/dgsem_tree/dg_1d.jl b/src/solvers/dgsem_tree/dg_1d.jl index 551e6bb5333..e5f24da27cc 100644 --- a/src/solvers/dgsem_tree/dg_1d.jl +++ b/src/solvers/dgsem_tree/dg_1d.jl @@ -118,7 +118,8 @@ This treatment is required to achieve, e.g., entropy-stability or well-balancedn See also https://github.com/trixi-framework/Trixi.jl/issues/1671#issuecomment-1765644064 =# @inline function weak_form_kernel!(du, u, - element, mesh::Union{TreeMesh{1}, StructuredMesh{1}}, + element, + ::Type{<:Union{TreeMesh{1}, StructuredMesh{1}}}, have_nonconservative_terms::False, equations, dg::DGSEM, cache, alpha = true) # true * [some floating point value] == [exactly the same floating point value] @@ -139,7 +140,8 @@ See also https://github.com/trixi-framework/Trixi.jl/issues/1671#issuecomment-17 end @inline function flux_differencing_kernel!(du, u, element, - mesh::Union{TreeMesh{1}, StructuredMesh{1}}, + ::Type{<:Union{TreeMesh{1}, + StructuredMesh{1}}}, have_nonconservative_terms::False, equations, volume_flux, dg::DGSEM, cache, alpha = true) # true * [some floating point value] == [exactly the same floating point value] @@ -168,7 +170,8 @@ end end @inline function flux_differencing_kernel!(du, u, element, - mesh::Union{TreeMesh{1}, StructuredMesh{1}}, + ::Type{<:Union{TreeMesh{1}, + StructuredMesh{1}}}, have_nonconservative_terms::True, equations, volume_flux, dg::DGSEM, cache, alpha = true) # true * [some floating point value] == [exactly the same floating point value] @@ -263,7 +266,7 @@ end # "A provably entropy stable subcell shock capturing approach for high order split form DG for the compressible Euler equations" # [arXiv: 2008.12044v2](https://arxiv.org/pdf/2008.12044) @inline function calcflux_fv!(fstar1_L, fstar1_R, u, - mesh::Union{TreeMesh{1}, StructuredMesh{1}}, + ::Type{<:Union{TreeMesh{1}, StructuredMesh{1}}}, have_nonconservative_terms::False, equations, volume_flux_fv, dg::DGSEM, element, cache) for i in 2:nnodes(dg) @@ -278,7 +281,7 @@ end end @inline function calcflux_fv!(fstar1_L, fstar1_R, u, - mesh::TreeMesh{1}, + ::Type{<:TreeMesh{1}}, have_nonconservative_terms::True, equations, volume_flux_fv, dg::DGSEM, element, cache) volume_flux, nonconservative_flux = volume_flux_fv @@ -309,7 +312,7 @@ end # "An entropy stable nodal discontinuous Galerkin method for the resistive MHD equations. Part II: Subcell finite volume shock capturing" # [JCP: 2021.110580](https://doi.org/10.1016/j.jcp.2021.110580) @inline function calcflux_fvO2!(fstar1_L, fstar1_R, u, - mesh::Union{TreeMesh{1}, StructuredMesh{1}}, + ::Type{<:Union{TreeMesh{1}, StructuredMesh{1}}}, nonconservative_terms::False, equations, volume_flux_fv, dg::DGSEM, element, cache, sc_interface_coords, reconstruction_mode, slope_limiter, diff --git a/src/solvers/dgsem_tree/dg_2d.jl b/src/solvers/dgsem_tree/dg_2d.jl index b6f7fef41a0..a4d4ed05fdb 100644 --- a/src/solvers/dgsem_tree/dg_2d.jl +++ b/src/solvers/dgsem_tree/dg_2d.jl @@ -178,7 +178,7 @@ This treatment is required to achieve, e.g., entropy-stability or well-balancedn See also https://github.com/trixi-framework/Trixi.jl/issues/1671#issuecomment-1765644064 =# @inline function weak_form_kernel!(du, u, - element, mesh::TreeMesh{2}, + element, ::Type{<:TreeMesh{2}}, have_nonconservative_terms::False, equations, dg::DGSEM, cache, alpha = true) # true * [some floating point value] == [exactly the same floating point value] @@ -205,7 +205,7 @@ See also https://github.com/trixi-framework/Trixi.jl/issues/1671#issuecomment-17 return nothing end -@inline function flux_differencing_kernel!(du, u, element, mesh::TreeMesh{2}, +@inline function flux_differencing_kernel!(du, u, element, ::Type{<:TreeMesh{2}}, have_nonconservative_terms::False, equations, volume_flux, dg::DGSEM, cache, alpha = true) # true * [some floating point value] == [exactly the same floating point value] @@ -243,7 +243,7 @@ end end end -@inline function flux_differencing_kernel!(du, u, element, mesh::TreeMesh{2}, +@inline function flux_differencing_kernel!(du, u, element, ::Type{<:TreeMesh{2}}, have_nonconservative_terms::True, equations, volume_flux, dg::DGSEM, cache, alpha = true) # true * [some floating point value] == [exactly the same floating point value] @@ -286,9 +286,9 @@ end end @inline function fvO2_kernel!(du, u, - ::Type{<:Union{TreeMesh{2}, StructuredMesh{2}, - UnstructuredMesh2D, P4estMesh{2}, - T8codeMesh{2}}}, + meshT::Type{<:Union{TreeMesh{2}, StructuredMesh{2}, + UnstructuredMesh2D, P4estMesh{2}, + T8codeMesh{2}}}, have_nonconservative_terms, equations, volume_flux_fv, dg::DGSEM, cache, element, sc_interface_coords, reconstruction_mode, slope_limiter, @@ -302,7 +302,7 @@ end fstar2_L = fstar2_L_threaded[Threads.threadid()] fstar1_R = fstar1_R_threaded[Threads.threadid()] fstar2_R = fstar2_R_threaded[Threads.threadid()] - calcflux_fvO2!(fstar1_L, fstar1_R, fstar2_L, fstar2_R, u, mesh, + calcflux_fvO2!(fstar1_L, fstar1_R, fstar2_L, fstar2_R, u, meshT, have_nonconservative_terms, equations, volume_flux_fv, dg, element, cache, sc_interface_coords, reconstruction_mode, slope_limiter, @@ -323,7 +323,7 @@ end end @inline function calcflux_fvO2!(fstar1_L, fstar1_R, fstar2_L, fstar2_R, u, - mesh::TreeMesh{2}, + ::Type{<:TreeMesh{2}}, have_nonconservative_terms::False, equations, volume_flux_fv, dg::DGSEM, element, cache, sc_interface_coords, reconstruction_mode, slope_limiter, @@ -390,9 +390,9 @@ end end @inline function fv_kernel!(du, u, - ::Type{<:Union{TreeMesh{2}, StructuredMesh{2}, - UnstructuredMesh2D, P4estMesh{2}, - T8codeMesh{2}}}, + meshT::Type{<:Union{TreeMesh{2}, StructuredMesh{2}, + UnstructuredMesh2D, P4estMesh{2}, + T8codeMesh{2}}}, have_nonconservative_terms, equations, volume_flux_fv, dg::DGSEM, cache, element, alpha = true) @unpack fstar1_L_threaded, fstar1_R_threaded, fstar2_L_threaded, fstar2_R_threaded = cache @@ -403,7 +403,7 @@ end fstar2_L = fstar2_L_threaded[Threads.threadid()] fstar1_R = fstar1_R_threaded[Threads.threadid()] fstar2_R = fstar2_R_threaded[Threads.threadid()] - calcflux_fv!(fstar1_L, fstar1_R, fstar2_L, fstar2_R, u, mesh, + calcflux_fv!(fstar1_L, fstar1_R, fstar2_L, fstar2_R, u, meshT, have_nonconservative_terms, equations, volume_flux_fv, dg, element, cache) @@ -426,7 +426,7 @@ end # "A provably entropy stable subcell shock capturing approach for high order split form DG for the compressible Euler equations" # [arXiv: 2008.12044v2](https://arxiv.org/pdf/2008.12044) @inline function calcflux_fv!(fstar1_L, fstar1_R, fstar2_L, fstar2_R, u, - mesh::TreeMesh{2}, + ::Type{<:TreeMesh{2}}, have_nonconservative_terms::False, equations, volume_flux_fv, dg::DGSEM, element, cache) for j in eachnode(dg), i in 2:nnodes(dg) @@ -449,7 +449,7 @@ end end @inline function calcflux_fv!(fstar1_L, fstar1_R, fstar2_L, fstar2_R, u, - mesh::TreeMesh{2}, + ::Type{<:TreeMesh{2}}, have_nonconservative_terms::True, equations, volume_flux_fv, dg::DGSEM, element, cache) volume_flux, nonconservative_flux = volume_flux_fv diff --git a/src/solvers/dgsem_tree/dg_2d_compressible_euler.jl b/src/solvers/dgsem_tree/dg_2d_compressible_euler.jl index 51a5897b065..37f6b7720e9 100644 --- a/src/solvers/dgsem_tree/dg_2d_compressible_euler.jl +++ b/src/solvers/dgsem_tree/dg_2d_compressible_euler.jl @@ -65,7 +65,7 @@ end # muladd # if LoopVectorization.jl can handle the array types. This ensures that `@turbo` # works efficiently here. @inline function flux_differencing_kernel!(_du::PtrArray, u_cons::PtrArray, - element, mesh::TreeMesh{2}, + element, ::Type{<:TreeMesh{2}}, have_nonconservative_terms::False, equations::CompressibleEulerEquations2D, volume_flux::typeof(flux_shima_etal_turbo), @@ -227,7 +227,7 @@ end # muladd end @inline function flux_differencing_kernel!(_du::PtrArray, u_cons::PtrArray, - element, mesh::TreeMesh{2}, + element, ::Type{<:TreeMesh{2}}, have_nonconservative_terms::False, equations::CompressibleEulerEquations2D, volume_flux::typeof(flux_ranocha_turbo), diff --git a/src/solvers/dgsem_tree/dg_2d_subcell_limiters.jl b/src/solvers/dgsem_tree/dg_2d_subcell_limiters.jl index 5537fdadd28..c2e7e538b37 100644 --- a/src/solvers/dgsem_tree/dg_2d_subcell_limiters.jl +++ b/src/solvers/dgsem_tree/dg_2d_subcell_limiters.jl @@ -66,7 +66,7 @@ function calc_volume_integral!(backend::Nothing, du, u, @unpack limiter = volume_integral @threaded for element in eachelement(dg, cache) - subcell_limiting_kernel!(du, u, element, mesh, + subcell_limiting_kernel!(du, u, element, typeof(mesh), have_nonconservative_terms, equations, volume_integral, limiter, dg, cache) @@ -76,8 +76,9 @@ function calc_volume_integral!(backend::Nothing, du, u, end @inline function subcell_limiting_kernel!(du, u, element, - mesh::Union{TreeMesh{2}, StructuredMesh{2}, - P4estMesh{2}}, + meshT::Type{<:Union{TreeMesh{2}, + StructuredMesh{2}, + P4estMesh{2}}}, have_nonconservative_terms, equations, volume_integral, limiter::SubcellLimiterIDP, dg::DGSEM, cache) @@ -91,7 +92,7 @@ end fhat1_R = fhat1_R_threaded[Threads.threadid()] fhat2_L = fhat2_L_threaded[Threads.threadid()] fhat2_R = fhat2_R_threaded[Threads.threadid()] - calcflux_fhat!(fhat1_L, fhat1_R, fhat2_L, fhat2_R, u, mesh, + calcflux_fhat!(fhat1_L, fhat1_R, fhat2_L, fhat2_R, u, meshT, have_nonconservative_terms, equations, volume_flux_dg, dg, element, cache) @@ -102,14 +103,15 @@ end fstar2_L = fstar2_L_threaded[Threads.threadid()] fstar1_R = fstar1_R_threaded[Threads.threadid()] fstar2_R = fstar2_R_threaded[Threads.threadid()] - calcflux_fv!(fstar1_L, fstar1_R, fstar2_L, fstar2_R, u, mesh, + calcflux_fv!(fstar1_L, fstar1_R, fstar2_L, fstar2_R, u, meshT, have_nonconservative_terms, equations, volume_flux_fv, dg, element, cache) # antidiffusive flux calcflux_antidiffusive!(fhat1_L, fhat1_R, fhat2_L, fhat2_R, fstar1_L, fstar1_R, fstar2_L, fstar2_R, - u, mesh, have_nonconservative_terms, equations, limiter, dg, + u, meshT, have_nonconservative_terms, equations, limiter, + dg, element, cache) # Calculate volume integral contribution of low-order FV flux @@ -130,7 +132,8 @@ end # # See also `flux_differencing_kernel!`. @inline function calcflux_fhat!(fhat1_L, fhat1_R, fhat2_L, fhat2_R, u, - mesh::TreeMesh{2}, have_nonconservative_terms::False, + ::Type{<:TreeMesh{2}}, + have_nonconservative_terms::False, equations, volume_flux, dg::DGSEM, element, cache) @unpack weights, derivative_split = dg.basis @@ -209,7 +212,7 @@ end # Discretizations of Non-Conservative Systems. https://arxiv.org/pdf/2211.14009.pdf. # @inline function calcflux_fhat!(fhat1_L, fhat1_R, fhat2_L, fhat2_R, u, - mesh::TreeMesh{2}, have_nonconservative_terms::True, + ::Type{<:TreeMesh{2}}, have_nonconservative_terms::True, equations, volume_flux::Tuple{F_CONS, F_NONCONS}, dg::DGSEM, element, @@ -388,7 +391,7 @@ end # The calculation of the non-conservative staggered "fluxes" requires non-conservative # terms that can be written as a product of local and jump contributions. @inline function calcflux_fhat!(fhat1_L, fhat1_R, fhat2_L, fhat2_R, u, - mesh::TreeMesh{2}, nonconservative_terms::True, + ::Type{<:TreeMesh{2}}, nonconservative_terms::True, equations, volume_flux::Tuple{F_CONS, F_NONCONS}, dg::DGSEM, element, @@ -626,8 +629,8 @@ end # Calculate the antidiffusive flux `antidiffusive_flux` as the subtraction between `fhat` and `fstar` for conservative systems. @inline function calcflux_antidiffusive!(fhat1_L, fhat1_R, fhat2_L, fhat2_R, fstar1_L, fstar1_R, fstar2_L, fstar2_R, u, - mesh::Union{TreeMesh{2}, StructuredMesh{2}, - P4estMesh{2}}, + ::Type{<:Union{TreeMesh{2}, StructuredMesh{2}, + P4estMesh{2}}}, have_nonconservative_terms::False, equations, limiter::SubcellLimiterIDP, dg, element, cache) @unpack antidiffusive_flux1_L, antidiffusive_flux2_L, antidiffusive_flux1_R, antidiffusive_flux2_R = cache.antidiffusive_fluxes @@ -663,8 +666,8 @@ end # Calculate the antidiffusive flux `antidiffusive_flux` as the subtraction between `fhat` and `fstar` for conservative systems. @inline function calcflux_antidiffusive!(fhat1_L, fhat1_R, fhat2_L, fhat2_R, fstar1_L, fstar1_R, fstar2_L, fstar2_R, u, - mesh::Union{TreeMesh{2}, StructuredMesh{2}, - P4estMesh{2}}, + ::Type{<:Union{TreeMesh{2}, StructuredMesh{2}, + P4estMesh{2}}}, have_nonconservative_terms::True, equations, limiter::SubcellLimiterIDP, dg, element, cache) @unpack antidiffusive_flux1_L, antidiffusive_flux2_L, antidiffusive_flux1_R, antidiffusive_flux2_R = cache.antidiffusive_fluxes diff --git a/src/solvers/dgsem_tree/dg_3d.jl b/src/solvers/dgsem_tree/dg_3d.jl index 31edad0b45b..2bce1e949f4 100644 --- a/src/solvers/dgsem_tree/dg_3d.jl +++ b/src/solvers/dgsem_tree/dg_3d.jl @@ -126,7 +126,7 @@ This treatment is required to achieve, e.g., entropy-stability or well-balancedn See also https://github.com/trixi-framework/Trixi.jl/issues/1671#issuecomment-1765644064 =# @inline function weak_form_kernel!(du, u, - element, mesh::TreeMesh{3}, + element, ::Type{<:TreeMesh{3}}, have_nonconservative_terms::False, equations, dg::DGSEM, cache, alpha = true) # true * [some floating point value] == [exactly the same floating point value] @@ -158,7 +158,7 @@ See also https://github.com/trixi-framework/Trixi.jl/issues/1671#issuecomment-17 return nothing end -@inline function flux_differencing_kernel!(du, u, element, mesh::TreeMesh{3}, +@inline function flux_differencing_kernel!(du, u, element, ::Type{<:TreeMesh{3}}, have_nonconservative_terms::False, equations, volume_flux, dg::DGSEM, cache, alpha = true) # true * [some floating point value] == [exactly the same floating point value] @@ -208,7 +208,7 @@ end return nothing end -@inline function flux_differencing_kernel!(du, u, element, mesh::TreeMesh{3}, +@inline function flux_differencing_kernel!(du, u, element, ::Type{<:TreeMesh{3}}, have_nonconservative_terms::True, equations, volume_flux, dg::DGSEM, cache, alpha = true) # true * [some floating point value] == [exactly the same floating point value] @@ -300,9 +300,9 @@ end end @inline function fvO2_kernel!(du, u, - ::Type{<:Union{TreeMesh{3}, StructuredMesh{3}, - P4estMesh{3}, - T8codeMesh{3}}}, + meshT::Type{<:Union{TreeMesh{3}, StructuredMesh{3}, + P4estMesh{3}, + T8codeMesh{3}}}, have_nonconservative_terms, equations, volume_flux_fv, dg::DGSEM, cache, element, sc_interface_coords, reconstruction_mode, slope_limiter, @@ -322,7 +322,7 @@ end fstar3_R = fstar3_R_threaded[Threads.threadid()] calcflux_fvO2!(fstar1_L, fstar1_R, fstar2_L, fstar2_R, fstar3_L, fstar3_R, u, - mesh, have_nonconservative_terms, equations, + meshT, have_nonconservative_terms, equations, volume_flux_fv, dg, element, cache, sc_interface_coords, reconstruction_mode, slope_limiter, cons2recon, recon2cons) @@ -352,7 +352,7 @@ end # [arXiv: 2008.12044v2](https://arxiv.org/pdf/2008.12044) @inline function calcflux_fv!(fstar1_L, fstar1_R, fstar2_L, fstar2_R, fstar3_L, fstar3_R, u, - mesh::TreeMesh{3}, have_nonconservative_terms::False, + ::Type{<:TreeMesh{3}}, have_nonconservative_terms::False, equations, volume_flux_fv, dg::DGSEM, element, cache) for k in eachnode(dg), j in eachnode(dg), i in 2:nnodes(dg) @@ -384,7 +384,7 @@ end @inline function calcflux_fv!(fstar1_L, fstar1_R, fstar2_L, fstar2_R, fstar3_L, fstar3_R, u, - mesh::TreeMesh{3}, + ::Type{<:TreeMesh{3}}, have_nonconservative_terms::True, equations, volume_flux_fv, dg::DGSEM, element, cache) volume_flux, nonconservative_flux = volume_flux_fv diff --git a/src/solvers/dgsem_tree/dg_3d_compressible_euler.jl b/src/solvers/dgsem_tree/dg_3d_compressible_euler.jl index b2c48c9f00a..ce4155b06a7 100644 --- a/src/solvers/dgsem_tree/dg_3d_compressible_euler.jl +++ b/src/solvers/dgsem_tree/dg_3d_compressible_euler.jl @@ -17,7 +17,7 @@ # if LoopVectorization.jl can handle the array types. This ensures that `@turbo` # works efficiently here. @inline function flux_differencing_kernel!(_du::PtrArray, u_cons::PtrArray, - element, mesh::TreeMesh{3}, + element, ::Type{<:TreeMesh{3}}, have_nonconservative_terms::False, equations::CompressibleEulerEquations3D, volume_flux::typeof(flux_shima_etal_turbo), @@ -263,7 +263,7 @@ end @inline function flux_differencing_kernel!(_du::PtrArray, u_cons::PtrArray, - element, mesh::TreeMesh{3}, + element, ::Type{<:TreeMesh{3}}, have_nonconservative_terms::False, equations::CompressibleEulerEquations3D, volume_flux::typeof(flux_ranocha_turbo), From 2831c9ca1afbbd2501de68635cc32572082ca6f0 Mon Sep 17 00:00:00 2001 From: Benedict Geihe Date: Tue, 24 Feb 2026 10:27:05 +0100 Subject: [PATCH 093/158] add @inline for inner functions --- docs/src/heterogeneous.md | 11 ++++++++--- src/callbacks_step/stepsize_dg2d.jl | 2 +- src/callbacks_step/stepsize_dg3d.jl | 2 +- src/solvers/dg.jl | 2 +- src/solvers/dgsem_p4est/dg_2d.jl | 6 +++--- src/solvers/dgsem_p4est/dg_3d.jl | 6 +++--- src/solvers/dgsem_structured/dg_2d.jl | 2 +- src/solvers/dgsem_structured/dg_3d.jl | 2 +- 8 files changed, 19 insertions(+), 14 deletions(-) diff --git a/docs/src/heterogeneous.md b/docs/src/heterogeneous.md index 9d4dc50c181..70d40dd2f6d 100644 --- a/docs/src/heterogeneous.md +++ b/docs/src/heterogeneous.md @@ -120,9 +120,14 @@ function trixi_rhs_fct(mesh, equations, solver, cache, args) end ``` -1. Put the inner code in a new function `rhs_fct_per_element`. Besides the index - `element`, pass all required fields as arguments, but make sure to `@unpack` them from - their structs in advance. +1. Move the inner code into a new inlined function `rhs_fct_per_element`. + ```julia + @inline function rhs_fct_per_element(..., element, ...) + ... + end + ``` + Besides the index `element`, pass all required fields as arguments, but make sure to + `@unpack` them from their structs in advance. 2. Where `trixi_rhs_fct` is called, get the backend, i.e., the hardware we are currently running on via `trixi_backend(x)`. This will, e.g., work with `u_ode`. Internally, KernelAbstractions.jl's `get_backend` diff --git a/src/callbacks_step/stepsize_dg2d.jl b/src/callbacks_step/stepsize_dg2d.jl index 4af0e63a43a..df57c9a2863 100644 --- a/src/callbacks_step/stepsize_dg2d.jl +++ b/src/callbacks_step/stepsize_dg2d.jl @@ -163,7 +163,7 @@ end element) end -function max_scaled_speed_per_element(u, +@inline function max_scaled_speed_per_element(u, mT::Type{<:Union{StructuredMesh{2}, UnstructuredMesh2D, P4estMesh{2}, T8codeMesh{2}, diff --git a/src/callbacks_step/stepsize_dg3d.jl b/src/callbacks_step/stepsize_dg3d.jl index ef53e5bf76c..53dba485ad6 100644 --- a/src/callbacks_step/stepsize_dg3d.jl +++ b/src/callbacks_step/stepsize_dg3d.jl @@ -124,7 +124,7 @@ end element) end -function max_scaled_speed_element(u, +@inline function max_scaled_speed_element(u, ::Type{<:Union{StructuredMesh{3}, P4estMesh{3}, T8codeMesh{3}}}, equations, dg, contravariant_vectors, inverse_jacobian, element) diff --git a/src/solvers/dg.jl b/src/solvers/dg.jl index bf9608f169c..2a9e51f0ecb 100644 --- a/src/solvers/dg.jl +++ b/src/solvers/dg.jl @@ -1259,7 +1259,7 @@ end node_indices) end -function compute_coefficients_element!(u, func, t, equations, dg::DG, +@inline function compute_coefficients_element!(u, func, t, equations, dg::DG, node_coordinates, element, node_indices) for indices in node_indices x_node = get_node_coords(node_coordinates, equations, dg, indices, element) diff --git a/src/solvers/dgsem_p4est/dg_2d.jl b/src/solvers/dgsem_p4est/dg_2d.jl index 6c53b0e5335..cec19a0f64c 100644 --- a/src/solvers/dgsem_p4est/dg_2d.jl +++ b/src/solvers/dgsem_p4est/dg_2d.jl @@ -104,7 +104,7 @@ end neighbor_ids, node_indices, index_range) end -function prolong2interfaces_per_interface!(interfaces_u, u, interface, +@inline function prolong2interfaces_per_interface!(interfaces_u, u, interface, ::Type{<:Union{P4estMesh{2}, P4estMeshView{2}, T8codeMesh{2}}}, @@ -210,7 +210,7 @@ end contravariant_vectors, index_range) end -function calc_interface_flux_per_interface!(surface_flux_values, +@inline function calc_interface_flux_per_interface!(surface_flux_values, mt::Type{<:Union{P4estMesh{2}, P4estMeshView{2}, T8codeMesh{2}}}, @@ -894,7 +894,7 @@ end dg, factor, surface_flux_values, element) end -function calc_surface_integral_per_element!(du, +@inline function calc_surface_integral_per_element!(du, ::Type{<:Union{P4estMesh{2}, P4estMeshView{2}, T8codeMesh{2}}}, diff --git a/src/solvers/dgsem_p4est/dg_3d.jl b/src/solvers/dgsem_p4est/dg_3d.jl index b53b2654aea..d6a3c562e09 100644 --- a/src/solvers/dgsem_p4est/dg_3d.jl +++ b/src/solvers/dgsem_p4est/dg_3d.jl @@ -127,7 +127,7 @@ end node_indices, index_range, interface) end -function prolong2interfaces_interface!(u_interface, u, +@inline function prolong2interfaces_interface!(u_interface, u, ::Type{<:Union{P4estMesh{3}, T8codeMesh{3}}}, equations, neighbor_ids, node_indices, index_range, interface) @@ -249,7 +249,7 @@ end index_range, interface) end -function calc_interface_flux_interface!(surface_flux_values, +@inline function calc_interface_flux_interface!(surface_flux_values, meshT::Type{<:Union{P4estMesh{3}, T8codeMesh{3}}}, have_nonconservative_terms, @@ -1043,7 +1043,7 @@ end surface_flux_values, element) end -function calc_surface_integral_element!(du, +@inline function calc_surface_integral_element!(du, ::Type{<:Union{P4estMesh{3}, T8codeMesh{3}}}, equations, surface_integral::SurfaceIntegralWeakForm, diff --git a/src/solvers/dgsem_structured/dg_2d.jl b/src/solvers/dgsem_structured/dg_2d.jl index bed6131ac05..291b060f267 100644 --- a/src/solvers/dgsem_structured/dg_2d.jl +++ b/src/solvers/dgsem_structured/dg_2d.jl @@ -752,7 +752,7 @@ end apply_jacobian_per_element!(du, mT, equations, dg, inverse_jacobian, element) end -function apply_jacobian_per_element!(du, +@inline function apply_jacobian_per_element!(du, ::Type{<:Union{StructuredMesh{2}, StructuredMeshView{2}, UnstructuredMesh2D, P4estMesh{2}, diff --git a/src/solvers/dgsem_structured/dg_3d.jl b/src/solvers/dgsem_structured/dg_3d.jl index dbcae49b5ab..01a1d3adeb0 100644 --- a/src/solvers/dgsem_structured/dg_3d.jl +++ b/src/solvers/dgsem_structured/dg_3d.jl @@ -927,7 +927,7 @@ end apply_jacobian_element!(du, meshT, equations, dg, inverse_jacobian, element) end -function apply_jacobian_element!(du, +@inline function apply_jacobian_element!(du, ::Type{<:Union{StructuredMesh{3}, P4estMesh{3}, T8codeMesh{3}}}, equations, dg, inverse_jacobian, element) From 34c4684905216d840389661fc4a8cdafcd16f280 Mon Sep 17 00:00:00 2001 From: Benedict Geihe Date: Tue, 24 Feb 2026 10:27:57 +0100 Subject: [PATCH 094/158] more fixes --- src/callbacks_step/stepsize_dg2d.jl | 15 ++++--- src/callbacks_step/stepsize_dg3d.jl | 8 ++-- src/solvers/dg.jl | 2 +- src/solvers/dgsem/calc_volume_integral.jl | 2 +- src/solvers/dgsem_p4est/dg_2d.jl | 44 ++++++++++--------- src/solvers/dgsem_p4est/dg_3d.jl | 32 ++++++++------ src/solvers/dgsem_structured/dg_2d.jl | 13 +++--- src/solvers/dgsem_structured/dg_3d.jl | 22 +++++----- src/solvers/dgsem_tree/dg_1d.jl | 14 +++--- src/solvers/dgsem_tree/dg_2d.jl | 4 +- .../dgsem_tree/dg_2d_compressible_euler.jl | 10 +++-- .../dgsem_tree/dg_2d_subcell_limiters.jl | 5 ++- src/solvers/dgsem_tree/dg_3d.jl | 14 +++--- .../dgsem_tree/dg_3d_compressible_euler.jl | 10 +++-- test/test_performance_specializations_2d.jl | 24 +++++----- test/test_performance_specializations_3d.jl | 24 +++++----- 16 files changed, 132 insertions(+), 111 deletions(-) diff --git a/src/callbacks_step/stepsize_dg2d.jl b/src/callbacks_step/stepsize_dg2d.jl index df57c9a2863..35374ed7028 100644 --- a/src/callbacks_step/stepsize_dg2d.jl +++ b/src/callbacks_step/stepsize_dg2d.jl @@ -164,13 +164,14 @@ end end @inline function max_scaled_speed_per_element(u, - mT::Type{<:Union{StructuredMesh{2}, - UnstructuredMesh2D, - P4estMesh{2}, T8codeMesh{2}, - StructuredMeshView{2}}}, - constant_speed::False, equations, dg::DG, - contravariant_vectors, inverse_jacobian, - element) + mT::Type{<:Union{StructuredMesh{2}, + UnstructuredMesh2D, + P4estMesh{2}, + T8codeMesh{2}, + StructuredMeshView{2}}}, + constant_speed::False, equations, dg::DG, + contravariant_vectors, inverse_jacobian, + element) max_lambda1 = max_lambda2 = zero(eltype(u)) for j in eachnode(dg), i in eachnode(dg) u_node = get_node_vars(u, equations, dg, i, j, element) diff --git a/src/callbacks_step/stepsize_dg3d.jl b/src/callbacks_step/stepsize_dg3d.jl index 53dba485ad6..55645aa92b2 100644 --- a/src/callbacks_step/stepsize_dg3d.jl +++ b/src/callbacks_step/stepsize_dg3d.jl @@ -125,9 +125,11 @@ end end @inline function max_scaled_speed_element(u, - ::Type{<:Union{StructuredMesh{3}, P4estMesh{3}, - T8codeMesh{3}}}, equations, dg, - contravariant_vectors, inverse_jacobian, element) + ::Type{<:Union{StructuredMesh{3}, + P4estMesh{3}, + T8codeMesh{3}}}, equations, dg, + contravariant_vectors, inverse_jacobian, + element) max_lambda1 = max_lambda2 = max_lambda3 = zero(eltype(u)) for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) u_node = get_node_vars(u, equations, dg, i, j, k, element) diff --git a/src/solvers/dg.jl b/src/solvers/dg.jl index 2a9e51f0ecb..978ee5cfe39 100644 --- a/src/solvers/dg.jl +++ b/src/solvers/dg.jl @@ -1260,7 +1260,7 @@ end end @inline function compute_coefficients_element!(u, func, t, equations, dg::DG, - node_coordinates, element, node_indices) + node_coordinates, element, node_indices) for indices in node_indices x_node = get_node_coords(node_coordinates, equations, dg, indices, element) u_node = func(x_node, t, equations) diff --git a/src/solvers/dgsem/calc_volume_integral.jl b/src/solvers/dgsem/calc_volume_integral.jl index 06115e8aacf..e25432a35fa 100644 --- a/src/solvers/dgsem/calc_volume_integral.jl +++ b/src/solvers/dgsem/calc_volume_integral.jl @@ -250,7 +250,7 @@ function calc_volume_integral!(backend::Nothing, du, u, mesh, return nothing end -function calc_volume_integral!(du, u, mesh, +function calc_volume_integral!(backend::Nothing, du, u, mesh, have_nonconservative_terms, equations, volume_integral::VolumeIntegralEntropyCorrectionShockCapturingCombined, dg::DGSEM, cache) diff --git a/src/solvers/dgsem_p4est/dg_2d.jl b/src/solvers/dgsem_p4est/dg_2d.jl index cec19a0f64c..d2cdff7c6b5 100644 --- a/src/solvers/dgsem_p4est/dg_2d.jl +++ b/src/solvers/dgsem_p4est/dg_2d.jl @@ -105,11 +105,12 @@ end end @inline function prolong2interfaces_per_interface!(interfaces_u, u, interface, - ::Type{<:Union{P4estMesh{2}, - P4estMeshView{2}, - T8codeMesh{2}}}, - equations, neighbor_ids, node_indices, - index_range) + ::Type{<:Union{P4estMesh{2}, + P4estMeshView{2}, + T8codeMesh{2}}}, + equations, neighbor_ids, + node_indices, + index_range) primary_element = neighbor_ids[1, interface] primary_indices = node_indices[1, interface] @@ -211,14 +212,16 @@ end end @inline function calc_interface_flux_per_interface!(surface_flux_values, - mt::Type{<:Union{P4estMesh{2}, - P4estMeshView{2}, - T8codeMesh{2}}}, - have_nonconservative_terms, - equations, surface_integral, st::Type{<:DG}, - u_interface, interface, neighbor_ids, - node_indices, contravariant_vectors, - index_range) + mt::Type{<:Union{P4estMesh{2}, + P4estMeshView{2}, + T8codeMesh{2}}}, + have_nonconservative_terms, + equations, surface_integral, + st::Type{<:DG}, + u_interface, interface, + neighbor_ids, + node_indices, contravariant_vectors, + index_range) index_end = last(index_range) # Get element and side index information on the primary element @@ -895,13 +898,14 @@ end end @inline function calc_surface_integral_per_element!(du, - ::Type{<:Union{P4estMesh{2}, - P4estMeshView{2}, - T8codeMesh{2}}}, - equations, - surface_integral::SurfaceIntegralWeakForm, - dg::DGSEM, factor, surface_flux_values, - element) + ::Type{<:Union{P4estMesh{2}, + P4estMeshView{2}, + T8codeMesh{2}}}, + equations, + surface_integral::SurfaceIntegralWeakForm, + dg::DGSEM, factor, + surface_flux_values, + element) # Note that all fluxes have been computed with outward-pointing normal vectors. # This computes the **negative** surface integral contribution, # i.e., M^{-1} * boundary_interpolation^T (which is for DGSEM just M^{-1} * B) diff --git a/src/solvers/dgsem_p4est/dg_3d.jl b/src/solvers/dgsem_p4est/dg_3d.jl index d6a3c562e09..b7ad25c28ac 100644 --- a/src/solvers/dgsem_p4est/dg_3d.jl +++ b/src/solvers/dgsem_p4est/dg_3d.jl @@ -128,9 +128,10 @@ end end @inline function prolong2interfaces_interface!(u_interface, u, - ::Type{<:Union{P4estMesh{3}, T8codeMesh{3}}}, - equations, neighbor_ids, node_indices, - index_range, interface) + ::Type{<:Union{P4estMesh{3}, + T8codeMesh{3}}}, + equations, neighbor_ids, node_indices, + index_range, interface) # Copy solution data from the primary element using "delayed indexing" with # a start value and two step sizes to get the correct face and orientation. # Note that in the current implementation, the interface will be @@ -250,13 +251,14 @@ end end @inline function calc_interface_flux_interface!(surface_flux_values, - meshT::Type{<:Union{P4estMesh{3}, - T8codeMesh{3}}}, - have_nonconservative_terms, - equations, surface_integral, - solverT::Type{<:DG}, u_interface, neighbor_ids, - node_indices, contravariant_vectors, - index_range, interface) + meshT::Type{<:Union{P4estMesh{3}, + T8codeMesh{3}}}, + have_nonconservative_terms, + equations, surface_integral, + solverT::Type{<:DG}, u_interface, + neighbor_ids, + node_indices, contravariant_vectors, + index_range, interface) # Get element and side information on the primary element primary_element = neighbor_ids[1, interface] primary_indices = node_indices[1, interface] @@ -1044,10 +1046,12 @@ end end @inline function calc_surface_integral_element!(du, - ::Type{<:Union{P4estMesh{3}, T8codeMesh{3}}}, - equations, - surface_integral::SurfaceIntegralWeakForm, - dg::DGSEM, factor, surface_flux_values, element) + ::Type{<:Union{P4estMesh{3}, + T8codeMesh{3}}}, + equations, + surface_integral::SurfaceIntegralWeakForm, + dg::DGSEM, factor, surface_flux_values, + element) # Note that all fluxes have been computed with outward-pointing normal vectors. # This computes the **negative** surface integral contribution, # i.e., M^{-1} * boundary_interpolation^T (which is for DGSEM just M^{-1} * B) diff --git a/src/solvers/dgsem_structured/dg_2d.jl b/src/solvers/dgsem_structured/dg_2d.jl index 291b060f267..b7cb31c7b0f 100644 --- a/src/solvers/dgsem_structured/dg_2d.jl +++ b/src/solvers/dgsem_structured/dg_2d.jl @@ -753,11 +753,14 @@ end end @inline function apply_jacobian_per_element!(du, - ::Type{<:Union{StructuredMesh{2}, - StructuredMeshView{2}, - UnstructuredMesh2D, P4estMesh{2}, - P4estMeshView{2}, T8codeMesh{2}}}, - equations, dg::DG, inverse_jacobian, element) + ::Type{<:Union{StructuredMesh{2}, + StructuredMeshView{2}, + UnstructuredMesh2D, + P4estMesh{2}, + P4estMeshView{2}, + T8codeMesh{2}}}, + equations, dg::DG, inverse_jacobian, + element) for j in eachnode(dg), i in eachnode(dg) # Negative sign included to account for the negated surface and volume terms, # see e.g. the computation of `derivative_hat` in the basis setup and diff --git a/src/solvers/dgsem_structured/dg_3d.jl b/src/solvers/dgsem_structured/dg_3d.jl index 01a1d3adeb0..146aba255d1 100644 --- a/src/solvers/dgsem_structured/dg_3d.jl +++ b/src/solvers/dgsem_structured/dg_3d.jl @@ -171,12 +171,12 @@ end end @inline function flux_differencing_kernel!(du, u, element, - ::Type{<:Union{StructuredMesh{3}, - P4estMesh{3}, - T8codeMesh{3}}}, + meshT::Type{<:Union{StructuredMesh{3}, + P4estMesh{3}, + T8codeMesh{3}}}, have_nonconservative_terms::True, equations, volume_flux, dg::DGSEM, cache, alpha = true) - flux_differencing_kernel!(du, u, element, mesh, have_nonconservative_terms, + flux_differencing_kernel!(du, u, element, meshT, have_nonconservative_terms, combine_conservative_and_nonconservative_fluxes(volume_flux, equations), equations, volume_flux, dg, cache, alpha) @@ -185,9 +185,9 @@ end end @inline function flux_differencing_kernel!(du, u, element, - ::Type{<:Union{StructuredMesh{3}, - P4estMesh{3}, - T8codeMesh{3}}}, + meshT::Type{<:Union{StructuredMesh{3}, + P4estMesh{3}, + T8codeMesh{3}}}, have_nonconservative_terms::True, combine_conservative_and_nonconservative_fluxes::False, equations, @@ -197,7 +197,7 @@ end symmetric_flux, nonconservative_flux = volume_flux # Apply the symmetric flux as usual - flux_differencing_kernel!(du, u, element, mesh, False(), equations, symmetric_flux, + flux_differencing_kernel!(du, u, element, meshT, False(), equations, symmetric_flux, dg, cache, alpha) # Calculate the remaining volume terms using the nonsymmetric generalized flux @@ -928,9 +928,9 @@ end end @inline function apply_jacobian_element!(du, - ::Type{<:Union{StructuredMesh{3}, P4estMesh{3}, - T8codeMesh{3}}}, - equations, dg, inverse_jacobian, element) + ::Type{<:Union{StructuredMesh{3}, P4estMesh{3}, + T8codeMesh{3}}}, + equations, dg, inverse_jacobian, element) for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) # Negative sign included to account for the negated surface and volume terms, # see e.g. the computation of `derivative_hat` in the basis setup and diff --git a/src/solvers/dgsem_tree/dg_1d.jl b/src/solvers/dgsem_tree/dg_1d.jl index e5f24da27cc..b7033fc1162 100644 --- a/src/solvers/dgsem_tree/dg_1d.jl +++ b/src/solvers/dgsem_tree/dg_1d.jl @@ -170,8 +170,8 @@ end end @inline function flux_differencing_kernel!(du, u, element, - ::Type{<:Union{TreeMesh{1}, - StructuredMesh{1}}}, + meshT::Type{<:Union{TreeMesh{1}, + StructuredMesh{1}}}, have_nonconservative_terms::True, equations, volume_flux, dg::DGSEM, cache, alpha = true) # true * [some floating point value] == [exactly the same floating point value] @@ -180,7 +180,7 @@ end symmetric_flux, nonconservative_flux = volume_flux # Apply the symmetric flux as usual - flux_differencing_kernel!(du, u, element, mesh, False(), equations, symmetric_flux, + flux_differencing_kernel!(du, u, element, meshT, False(), equations, symmetric_flux, dg, cache, alpha) # Calculate the remaining volume terms using the nonsymmetric generalized flux @@ -206,7 +206,7 @@ end end @inline function fv_kernel!(du, u, - ::Type{<:Union{TreeMesh{1}, StructuredMesh{1}}}, + meshT::Type{<:Union{TreeMesh{1}, StructuredMesh{1}}}, have_nonconservative_terms, equations, volume_flux_fv, dg::DGSEM, cache, element, alpha = true) @unpack fstar1_L_threaded, fstar1_R_threaded = cache @@ -215,7 +215,7 @@ end # Calculate FV two-point fluxes fstar1_L = fstar1_L_threaded[Threads.threadid()] fstar1_R = fstar1_R_threaded[Threads.threadid()] - calcflux_fv!(fstar1_L, fstar1_R, u, mesh, + calcflux_fv!(fstar1_L, fstar1_R, u, meshT, have_nonconservative_terms, equations, volume_flux_fv, dg, element, cache) @@ -232,7 +232,7 @@ end end @inline function fvO2_kernel!(du, u, - ::Type{<:Union{TreeMesh{1}, StructuredMesh{1}}}, + meshT::Type{<:Union{TreeMesh{1}, StructuredMesh{1}}}, nonconservative_terms, equations, volume_flux_fv, dg::DGSEM, cache, element, sc_interface_coords, reconstruction_mode, slope_limiter, @@ -244,7 +244,7 @@ end # Calculate FV two-point fluxes fstar1_L = fstar1_L_threaded[Threads.threadid()] fstar1_R = fstar1_R_threaded[Threads.threadid()] - calcflux_fvO2!(fstar1_L, fstar1_R, u, mesh, nonconservative_terms, equations, + calcflux_fvO2!(fstar1_L, fstar1_R, u, meshT, nonconservative_terms, equations, volume_flux_fv, dg, element, cache, sc_interface_coords, reconstruction_mode, slope_limiter, cons2recon, recon2cons) diff --git a/src/solvers/dgsem_tree/dg_2d.jl b/src/solvers/dgsem_tree/dg_2d.jl index a4d4ed05fdb..036524d75a5 100644 --- a/src/solvers/dgsem_tree/dg_2d.jl +++ b/src/solvers/dgsem_tree/dg_2d.jl @@ -243,7 +243,7 @@ end end end -@inline function flux_differencing_kernel!(du, u, element, ::Type{<:TreeMesh{2}}, +@inline function flux_differencing_kernel!(du, u, element, meshT::Type{<:TreeMesh{2}}, have_nonconservative_terms::True, equations, volume_flux, dg::DGSEM, cache, alpha = true) # true * [some floating point value] == [exactly the same floating point value] @@ -252,7 +252,7 @@ end symmetric_flux, nonconservative_flux = volume_flux # Apply the symmetric flux as usual - flux_differencing_kernel!(du, u, element, mesh, False(), equations, symmetric_flux, + flux_differencing_kernel!(du, u, element, meshT, False(), equations, symmetric_flux, dg, cache, alpha) # Calculate the remaining volume terms using the nonsymmetric generalized flux diff --git a/src/solvers/dgsem_tree/dg_2d_compressible_euler.jl b/src/solvers/dgsem_tree/dg_2d_compressible_euler.jl index 37f6b7720e9..efcb7cc6794 100644 --- a/src/solvers/dgsem_tree/dg_2d_compressible_euler.jl +++ b/src/solvers/dgsem_tree/dg_2d_compressible_euler.jl @@ -71,18 +71,19 @@ end # muladd volume_flux::typeof(flux_shima_etal_turbo), dg::DGSEM, cache, alpha) @unpack derivative_split = dg.basis + ndims_mesh = 2 # Create a temporary array that will be used to store the RHS with permuted # indices `[i, j, v]` to allow using SIMD instructions. # `StrideArray`s with purely static dimensions do not allocate on the heap. du = StrideArray{eltype(u_cons)}(undef, - (ntuple(_ -> StaticInt(nnodes(dg)), ndims(mesh))..., + (ntuple(_ -> StaticInt(nnodes(dg)), ndims_mesh)..., StaticInt(nvariables(equations)))) # Convert conserved to primitive variables on the given `element`. u_prim = StrideArray{eltype(u_cons)}(undef, (ntuple(_ -> StaticInt(nnodes(dg)), - ndims(mesh))..., + ndims_mesh)..., StaticInt(nvariables(equations)))) @turbo for j in eachnode(dg), i in eachnode(dg) @@ -233,12 +234,13 @@ end volume_flux::typeof(flux_ranocha_turbo), dg::DGSEM, cache, alpha) @unpack derivative_split = dg.basis + ndims_mesh = 2 # Create a temporary array that will be used to store the RHS with permuted # indices `[i, j, v]` to allow using SIMD instructions. # `StrideArray`s with purely static dimensions do not allocate on the heap. du = StrideArray{eltype(u_cons)}(undef, - (ntuple(_ -> StaticInt(nnodes(dg)), ndims(mesh))..., + (ntuple(_ -> StaticInt(nnodes(dg)), ndims_mesh)..., StaticInt(nvariables(equations)))) # Convert conserved to primitive variables on the given `element`. In addition @@ -247,7 +249,7 @@ end # values. u_prim = StrideArray{eltype(u_cons)}(undef, (ntuple(_ -> StaticInt(nnodes(dg)), - ndims(mesh))..., + ndims_mesh)..., StaticInt(nvariables(equations) + 2))) # We also compute "+ 2" logs @turbo for j in eachnode(dg), i in eachnode(dg) diff --git a/src/solvers/dgsem_tree/dg_2d_subcell_limiters.jl b/src/solvers/dgsem_tree/dg_2d_subcell_limiters.jl index 9222e5469fc..b9d46ddf164 100644 --- a/src/solvers/dgsem_tree/dg_2d_subcell_limiters.jl +++ b/src/solvers/dgsem_tree/dg_2d_subcell_limiters.jl @@ -58,8 +58,9 @@ end # Subcell limiting currently only implemented for certain mesh types @inline function volume_integral_kernel!(du, u, element, - meshT::Type{<:Union{TreeMesh{2}, StructuredMesh{2}, - P4estMesh{2}}}, + meshT::Type{<:Union{TreeMesh{2}, + StructuredMesh{2}, + P4estMesh{2}}}, have_nonconservative_terms, equations, volume_integral::VolumeIntegralSubcellLimiting, dg::DGSEM, cache) diff --git a/src/solvers/dgsem_tree/dg_3d.jl b/src/solvers/dgsem_tree/dg_3d.jl index 2bce1e949f4..ef96cb03250 100644 --- a/src/solvers/dgsem_tree/dg_3d.jl +++ b/src/solvers/dgsem_tree/dg_3d.jl @@ -208,7 +208,7 @@ end return nothing end -@inline function flux_differencing_kernel!(du, u, element, ::Type{<:TreeMesh{3}}, +@inline function flux_differencing_kernel!(du, u, element, meshT::Type{<:TreeMesh{3}}, have_nonconservative_terms::True, equations, volume_flux, dg::DGSEM, cache, alpha = true) # true * [some floating point value] == [exactly the same floating point value] @@ -217,7 +217,7 @@ end symmetric_flux, nonconservative_flux = volume_flux # Apply the symmetric flux as usual - flux_differencing_kernel!(du, u, element, mesh, False(), equations, symmetric_flux, + flux_differencing_kernel!(du, u, element, meshT, False(), equations, symmetric_flux, dg, cache, alpha) # Calculate the remaining volume terms using the nonsymmetric generalized flux @@ -261,8 +261,9 @@ end end @inline function fv_kernel!(du, u, - ::Type{<:Union{TreeMesh{3}, StructuredMesh{3}, P4estMesh{3}, - T8codeMesh{3}}}, + meshT::Type{<:Union{TreeMesh{3}, StructuredMesh{3}, + P4estMesh{3}, + T8codeMesh{3}}}, have_nonconservative_terms, equations, volume_flux_fv, dg::DGSEM, cache, element, alpha = true) @unpack fstar1_L_threaded, fstar1_R_threaded, fstar2_L_threaded, fstar2_R_threaded, fstar3_L_threaded, fstar3_R_threaded = cache @@ -277,7 +278,7 @@ end fstar3_R = fstar3_R_threaded[Threads.threadid()] calcflux_fv!(fstar1_L, fstar1_R, fstar2_L, fstar2_R, fstar3_L, fstar3_R, u, - mesh, have_nonconservative_terms, equations, + meshT, have_nonconservative_terms, equations, volume_flux_fv, dg, element, cache) # Calculate FV volume integral contribution @@ -448,7 +449,8 @@ end @inline function calcflux_fvO2!(fstar1_L, fstar1_R, fstar2_L, fstar2_R, fstar3_L, fstar3_R, u, - mesh::TreeMesh{3}, have_nonconservative_terms::False, + ::Type{<:TreeMesh{3}}, + have_nonconservative_terms::False, equations, volume_flux_fv, dg::DGSEM, element, cache, sc_interface_coords, reconstruction_mode, slope_limiter, diff --git a/src/solvers/dgsem_tree/dg_3d_compressible_euler.jl b/src/solvers/dgsem_tree/dg_3d_compressible_euler.jl index ce4155b06a7..f1d2573dc79 100644 --- a/src/solvers/dgsem_tree/dg_3d_compressible_euler.jl +++ b/src/solvers/dgsem_tree/dg_3d_compressible_euler.jl @@ -23,18 +23,19 @@ volume_flux::typeof(flux_shima_etal_turbo), dg::DGSEM, cache, alpha) @unpack derivative_split = dg.basis + ndims_mesh = 3 # Create a temporary array that will be used to store the RHS with permuted # indices `[i, j, k, v]` to allow using SIMD instructions. # `StrideArray`s with purely static dimensions do not allocate on the heap. du = StrideArray{eltype(u_cons)}(undef, - (ntuple(_ -> StaticInt(nnodes(dg)), ndims(mesh))..., + (ntuple(_ -> StaticInt(nnodes(dg)), ndims_mesh)..., StaticInt(nvariables(equations)))) # Convert conserved to primitive variables on the given `element`. u_prim = StrideArray{eltype(u_cons)}(undef, (ntuple(_ -> StaticInt(nnodes(dg)), - ndims(mesh))..., + ndims_mesh)..., StaticInt(nvariables(equations)))) @turbo for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) @@ -269,12 +270,13 @@ end volume_flux::typeof(flux_ranocha_turbo), dg::DGSEM, cache, alpha) @unpack derivative_split = dg.basis + ndims_mesh = 3 # Create a temporary array that will be used to store the RHS with permuted # indices `[i, j, k, v]` to allow using SIMD instructions. # `StrideArray`s with purely static dimensions do not allocate on the heap. du = StrideArray{eltype(u_cons)}(undef, - (ntuple(_ -> StaticInt(nnodes(dg)), ndims(mesh))..., + (ntuple(_ -> StaticInt(nnodes(dg)), ndims_mesh)..., StaticInt(nvariables(equations)))) # Convert conserved to primitive variables on the given `element`. In addition @@ -283,7 +285,7 @@ end # values. u_prim = StrideArray{eltype(u_cons)}(undef, (ntuple(_ -> StaticInt(nnodes(dg)), - ndims(mesh))..., + ndims_mesh)..., StaticInt(nvariables(equations) + 2))) # We also compute "+ 2" logs @turbo for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) diff --git a/test/test_performance_specializations_2d.jl b/test/test_performance_specializations_2d.jl index b42a1b8f640..7dceea2b6a7 100644 --- a/test/test_performance_specializations_2d.jl +++ b/test/test_performance_specializations_2d.jl @@ -33,7 +33,7 @@ isdir(outdir) && rm(outdir, recursive = true) # Call the optimized default version du .= 0 - Trixi.flux_differencing_kernel!(du, u, 1, semi.mesh, + Trixi.flux_differencing_kernel!(du, u, 1, typeof(semi.mesh), have_nonconservative_terms, semi.equations, semi.solver.volume_integral.volume_flux, semi.solver, semi.cache, true) @@ -43,10 +43,10 @@ isdir(outdir) && rm(outdir, recursive = true) # `semi.solver.volume_integral.volume_flux` du .= 0 invoke(Trixi.flux_differencing_kernel!, - Tuple{typeof(du), typeof(u), Integer, typeof(semi.mesh), + Tuple{typeof(du), typeof(u), Integer, Type{typeof(semi.mesh)}, typeof(have_nonconservative_terms), typeof(semi.equations), Function, typeof(semi.solver), typeof(semi.cache), Bool}, - du, u, 1, semi.mesh, + du, u, 1, typeof(semi.mesh), have_nonconservative_terms, semi.equations, semi.solver.volume_integral.volume_flux, semi.solver, semi.cache, true) du_baseline = du[:, :, :, 1] @@ -72,7 +72,7 @@ end # Call the optimized default version du .= 0 - Trixi.flux_differencing_kernel!(du, u, 1, semi.mesh, + Trixi.flux_differencing_kernel!(du, u, 1, typeof(semi.mesh), have_nonconservative_terms, semi.equations, semi.solver.volume_integral.volume_flux, semi.solver, semi.cache, true) @@ -82,10 +82,10 @@ end # `semi.solver.volume_integral.volume_flux` du .= 0 invoke(Trixi.flux_differencing_kernel!, - Tuple{typeof(du), typeof(u), Integer, typeof(semi.mesh), + Tuple{typeof(du), typeof(u), Integer, Type{typeof(semi.mesh)}, typeof(have_nonconservative_terms), typeof(semi.equations), Function, typeof(semi.solver), typeof(semi.cache), Bool}, - du, u, 1, semi.mesh, + du, u, 1, typeof(semi.mesh), have_nonconservative_terms, semi.equations, semi.solver.volume_integral.volume_flux, semi.solver, semi.cache, true) du_baseline = du[:, :, :, 1] @@ -112,7 +112,7 @@ end # Call the optimized default version du .= 0 - Trixi.flux_differencing_kernel!(du, u, 1, semi.mesh, + Trixi.flux_differencing_kernel!(du, u, 1, typeof(semi.mesh), have_nonconservative_terms, semi.equations, semi.solver.volume_integral.volume_flux, semi.solver, semi.cache, true) @@ -122,10 +122,10 @@ end # `semi.solver.volume_integral.volume_flux` du .= 0 invoke(Trixi.flux_differencing_kernel!, - Tuple{typeof(du), typeof(u), Integer, typeof(semi.mesh), + Tuple{typeof(du), typeof(u), Integer, Type{typeof(semi.mesh)}, typeof(have_nonconservative_terms), typeof(semi.equations), Function, typeof(semi.solver), typeof(semi.cache), Bool}, - du, u, 1, semi.mesh, + du, u, 1, typeof(semi.mesh), have_nonconservative_terms, semi.equations, semi.solver.volume_integral.volume_flux, semi.solver, semi.cache, true) du_baseline = du[:, :, :, 1] @@ -151,7 +151,7 @@ end # Call the optimized default version du .= 0 - Trixi.flux_differencing_kernel!(du, u, 1, semi.mesh, + Trixi.flux_differencing_kernel!(du, u, 1, typeof(semi.mesh), have_nonconservative_terms, semi.equations, semi.solver.volume_integral.volume_flux, semi.solver, semi.cache, true) @@ -161,10 +161,10 @@ end # `semi.solver.volume_integral.volume_flux` du .= 0 invoke(Trixi.flux_differencing_kernel!, - Tuple{typeof(du), typeof(u), Integer, typeof(semi.mesh), + Tuple{typeof(du), typeof(u), Integer, Type{typeof(semi.mesh)}, typeof(have_nonconservative_terms), typeof(semi.equations), Function, typeof(semi.solver), typeof(semi.cache), Bool}, - du, u, 1, semi.mesh, + du, u, 1, typeof(semi.mesh), have_nonconservative_terms, semi.equations, semi.solver.volume_integral.volume_flux, semi.solver, semi.cache, true) du_baseline = du[:, :, :, 1] diff --git a/test/test_performance_specializations_3d.jl b/test/test_performance_specializations_3d.jl index 3b3bd40b2f5..967b0f9cf3e 100644 --- a/test/test_performance_specializations_3d.jl +++ b/test/test_performance_specializations_3d.jl @@ -33,7 +33,7 @@ isdir(outdir) && rm(outdir, recursive = true) # Call the optimized default version du .= 0 - Trixi.flux_differencing_kernel!(du, u, 1, semi.mesh, + Trixi.flux_differencing_kernel!(du, u, 1, typeof(semi.mesh), have_nonconservative_terms, semi.equations, semi.solver.volume_integral.volume_flux, semi.solver, semi.cache, true) @@ -43,10 +43,10 @@ isdir(outdir) && rm(outdir, recursive = true) # `semi.solver.volume_integral.volume_flux` du .= 0 invoke(Trixi.flux_differencing_kernel!, - Tuple{typeof(du), typeof(u), Integer, typeof(semi.mesh), + Tuple{typeof(du), typeof(u), Integer, Type{typeof(semi.mesh)}, typeof(have_nonconservative_terms), typeof(semi.equations), Function, typeof(semi.solver), typeof(semi.cache), Bool}, - du, u, 1, semi.mesh, + du, u, 1, typeof(semi.mesh), have_nonconservative_terms, semi.equations, semi.solver.volume_integral.volume_flux, semi.solver, semi.cache, true) du_baseline = du[:, :, :, :, 1] @@ -72,7 +72,7 @@ end # Call the optimized default version du .= 0 - Trixi.flux_differencing_kernel!(du, u, 1, semi.mesh, + Trixi.flux_differencing_kernel!(du, u, 1, typeof(semi.mesh), have_nonconservative_terms, semi.equations, semi.solver.volume_integral.volume_flux, semi.solver, semi.cache, true) @@ -82,10 +82,10 @@ end # `semi.solver.volume_integral.volume_flux` du .= 0 invoke(Trixi.flux_differencing_kernel!, - Tuple{typeof(du), typeof(u), Integer, typeof(semi.mesh), + Tuple{typeof(du), typeof(u), Integer, Type{typeof(semi.mesh)}, typeof(have_nonconservative_terms), typeof(semi.equations), Function, typeof(semi.solver), typeof(semi.cache), Bool}, - du, u, 1, semi.mesh, + du, u, 1, typeof(semi.mesh), have_nonconservative_terms, semi.equations, semi.solver.volume_integral.volume_flux, semi.solver, semi.cache, true) du_baseline = du[:, :, :, :, 1] @@ -112,7 +112,7 @@ end # Call the optimized default version du .= 0 - Trixi.flux_differencing_kernel!(du, u, 1, semi.mesh, + Trixi.flux_differencing_kernel!(du, u, 1, typeof(semi.mesh), have_nonconservative_terms, semi.equations, semi.solver.volume_integral.volume_flux, semi.solver, semi.cache, true) @@ -122,10 +122,10 @@ end # `semi.solver.volume_integral.volume_flux` du .= 0 invoke(Trixi.flux_differencing_kernel!, - Tuple{typeof(du), typeof(u), Integer, typeof(semi.mesh), + Tuple{typeof(du), typeof(u), Integer, Type{typeof(semi.mesh)}, typeof(have_nonconservative_terms), typeof(semi.equations), Function, typeof(semi.solver), typeof(semi.cache), Bool}, - du, u, 1, semi.mesh, + du, u, 1, typeof(semi.mesh), have_nonconservative_terms, semi.equations, semi.solver.volume_integral.volume_flux, semi.solver, semi.cache, true) du_baseline = du[:, :, :, :, 1] @@ -151,7 +151,7 @@ end # Call the optimized default version du .= 0 - Trixi.flux_differencing_kernel!(du, u, 1, semi.mesh, + Trixi.flux_differencing_kernel!(du, u, 1, typeof(semi.mesh), have_nonconservative_terms, semi.equations, semi.solver.volume_integral.volume_flux, semi.solver, semi.cache, true) @@ -161,10 +161,10 @@ end # `semi.solver.volume_integral.volume_flux` du .= 0 invoke(Trixi.flux_differencing_kernel!, - Tuple{typeof(du), typeof(u), Integer, typeof(semi.mesh), + Tuple{typeof(du), typeof(u), Integer, Type{typeof(semi.mesh)}, typeof(have_nonconservative_terms), typeof(semi.equations), Function, typeof(semi.solver), typeof(semi.cache), Bool}, - du, u, 1, semi.mesh, + du, u, 1, typeof(semi.mesh), have_nonconservative_terms, semi.equations, semi.solver.volume_integral.volume_flux, semi.solver, semi.cache, true) du_baseline = du[:, :, :, :, 1] From e320bc52afd30e509e39b92d8820b96a3b16ee0d Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Tue, 24 Feb 2026 15:08:12 +0100 Subject: [PATCH 095/158] define unsafe_wrap_or_alloc fuer CUDA.KernelAdaptor --- ext/TrixiCUDAExt.jl | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/ext/TrixiCUDAExt.jl b/ext/TrixiCUDAExt.jl index 681d2f53a1e..c747414a3d4 100644 --- a/ext/TrixiCUDAExt.jl +++ b/ext/TrixiCUDAExt.jl @@ -8,4 +8,8 @@ function Trixi.storage_type(::Type{<:CuArray}) return CuArray end +function Trixi.unsafe_wrap_or_alloc(::CUDA.KernelAdaptor, vec, size) + return Trixi.unsafe_wrap_or_alloc(CuDeviceArray, vec, size) +end + end From f72fcc1e341a558982edb26b40e8855740d3caf9 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Tue, 24 Feb 2026 15:11:29 +0100 Subject: [PATCH 096/158] fixup! define unsafe_wrap_or_alloc fuer CUDA.KernelAdaptor --- ext/TrixiCUDAExt.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ext/TrixiCUDAExt.jl b/ext/TrixiCUDAExt.jl index c747414a3d4..d772d535ec3 100644 --- a/ext/TrixiCUDAExt.jl +++ b/ext/TrixiCUDAExt.jl @@ -1,14 +1,14 @@ # Package extension for adding CUDA-based features to Trixi.jl module TrixiCUDAExt -import CUDA: CuArray +import CUDA: CuArray, CuDeviceArray, KernelAdaptor import Trixi function Trixi.storage_type(::Type{<:CuArray}) return CuArray end -function Trixi.unsafe_wrap_or_alloc(::CUDA.KernelAdaptor, vec, size) +function Trixi.unsafe_wrap_or_alloc(::KernelAdaptor, vec, size) return Trixi.unsafe_wrap_or_alloc(CuDeviceArray, vec, size) end From 4805a702830ac12e522ce10ae36bc806fdbb9cb3 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Tue, 24 Feb 2026 15:23:39 +0100 Subject: [PATCH 097/158] fixup! define unsafe_wrap_or_alloc fuer CUDA.KernelAdaptor --- ext/TrixiCUDAExt.jl | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/ext/TrixiCUDAExt.jl b/ext/TrixiCUDAExt.jl index d772d535ec3..3326c536a76 100644 --- a/ext/TrixiCUDAExt.jl +++ b/ext/TrixiCUDAExt.jl @@ -12,4 +12,8 @@ function Trixi.unsafe_wrap_or_alloc(::KernelAdaptor, vec, size) return Trixi.unsafe_wrap_or_alloc(CuDeviceArray, vec, size) end +function Trixi.unsafe_wrap_or_alloc(::Type{<:CuDeviceArray}, vec::CuDeviceArray, size) + return reshape(vec, size) +end + end From 0fa07c4acd3b1d2c6308f1925984c036442c2e26 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Tue, 24 Feb 2026 15:38:55 +0100 Subject: [PATCH 098/158] apply bandaid --- src/solvers/dgsem/calc_volume_integral.jl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/solvers/dgsem/calc_volume_integral.jl b/src/solvers/dgsem/calc_volume_integral.jl index e25432a35fa..06d758e0151 100644 --- a/src/solvers/dgsem/calc_volume_integral.jl +++ b/src/solvers/dgsem/calc_volume_integral.jl @@ -194,8 +194,10 @@ function calc_volume_integral!(backend::Backend, du, u, mesh, volume_integral, dg::DGSEM, cache) nelements(dg, cache) == 0 && return nothing kernel! = volume_integral_KAkernel!(backend) + # TODO(benegee) Can we generalize this kind of filtering? + kernel_cache = (; elements = (; contravariant_vectors = cache.elements.contravariant_vectors)) kernel!(du, u, typeof(mesh), have_nonconservative_terms, equations, - volume_integral, dg, cache, + volume_integral, dg, kernel_cache, ndrange = nelements(dg, cache)) return nothing end From 287a1138330712817d917901b611f4e343d333b9 Mon Sep 17 00:00:00 2001 From: Benedict Geihe Date: Tue, 24 Feb 2026 16:22:05 +0100 Subject: [PATCH 099/158] final fix? --- examples/p4est_3d_dgsem/elixir_advection_basic_gpu.jl | 6 ++++-- src/solvers/dgsem/calc_volume_integral.jl | 4 ++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/examples/p4est_3d_dgsem/elixir_advection_basic_gpu.jl b/examples/p4est_3d_dgsem/elixir_advection_basic_gpu.jl index 801ae4cb6bc..c0161c2683a 100644 --- a/examples/p4est_3d_dgsem/elixir_advection_basic_gpu.jl +++ b/examples/p4est_3d_dgsem/elixir_advection_basic_gpu.jl @@ -20,11 +20,13 @@ coordinates_max = (1.0, 1.0, 1.0) # maximum coordinates (max(x), max(y), max(z)) trees_per_dimension = (4, 4, 4) mesh = P4estMesh(trees_per_dimension, polydeg = 3, coordinates_min = coordinates_min, coordinates_max = coordinates_max, - initial_refinement_level = 1) + initial_refinement_level = 1, + periodicity = true) # A semidiscretization collects data structures and functions for the spatial discretization semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition_convergence_test, - solver) + solver; + boundary_conditions = boundary_condition_periodic) ############################################################################### # ODE solvers, callbacks etc. diff --git a/src/solvers/dgsem/calc_volume_integral.jl b/src/solvers/dgsem/calc_volume_integral.jl index e25432a35fa..fc79a4d192d 100644 --- a/src/solvers/dgsem/calc_volume_integral.jl +++ b/src/solvers/dgsem/calc_volume_integral.jl @@ -269,7 +269,7 @@ function calc_volume_integral!(backend::Nothing, du, u, mesh, @threaded for element in eachelement(dg, cache) # run default volume integral - volume_integral_kernel!(du, u, element, mesh, + volume_integral_kernel!(du, u, element, typeof(mesh), have_nonconservative_terms, equations, volume_integral_default, dg, cache) @@ -306,7 +306,7 @@ function calc_volume_integral!(backend::Nothing, du, u, mesh, du[.., element] .= zero(eltype(du)) # Calculate entropy stable volume integral contribution - volume_integral_kernel!(du, u, element, mesh, + volume_integral_kernel!(du, u, element, typeof(mesh), have_nonconservative_terms, equations, volume_integral_stabilized, dg, cache) From dc6455d8af3d641fa72cb52538bf56beefbde2a6 Mon Sep 17 00:00:00 2001 From: Benedict Geihe Date: Tue, 24 Feb 2026 16:30:10 +0100 Subject: [PATCH 100/158] add method to filter the cache --- src/solvers/dgsem/calc_volume_integral.jl | 3 +-- src/solvers/dgsem_p4est/containers.jl | 5 +++++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/solvers/dgsem/calc_volume_integral.jl b/src/solvers/dgsem/calc_volume_integral.jl index a4dfba59072..7e0b438e9cc 100644 --- a/src/solvers/dgsem/calc_volume_integral.jl +++ b/src/solvers/dgsem/calc_volume_integral.jl @@ -194,8 +194,7 @@ function calc_volume_integral!(backend::Backend, du, u, mesh, volume_integral, dg::DGSEM, cache) nelements(dg, cache) == 0 && return nothing kernel! = volume_integral_KAkernel!(backend) - # TODO(benegee) Can we generalize this kind of filtering? - kernel_cache = (; elements = (; contravariant_vectors = cache.elements.contravariant_vectors)) + kernel_cache = kernel_filter_cache(cache) kernel!(du, u, typeof(mesh), have_nonconservative_terms, equations, volume_integral, dg, kernel_cache, ndrange = nelements(dg, cache)) diff --git a/src/solvers/dgsem_p4est/containers.jl b/src/solvers/dgsem_p4est/containers.jl index 52929319120..44abce1d00e 100644 --- a/src/solvers/dgsem_p4est/containers.jl +++ b/src/solvers/dgsem_p4est/containers.jl @@ -948,6 +948,11 @@ end end end +# Build a reduced cache which can be passed to GPU kernels +@inline function kernel_filter_cache(cache) + return (; elements = (; contravariant_vectors = cache.elements.contravariant_vectors)) +end + include("containers_2d.jl") include("containers_3d.jl") include("containers_parallel.jl") From 6af1201bdb858daca35112457a642e9555bd06d4 Mon Sep 17 00:00:00 2001 From: Benedict Geihe Date: Tue, 24 Feb 2026 17:52:44 +0100 Subject: [PATCH 101/158] final^2 --- src/solvers/dgsem/calc_volume_integral.jl | 6 +++--- src/solvers/dgsem_p4est/containers.jl | 3 ++- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/solvers/dgsem/calc_volume_integral.jl b/src/solvers/dgsem/calc_volume_integral.jl index 7e0b438e9cc..9a7865948dc 100644 --- a/src/solvers/dgsem/calc_volume_integral.jl +++ b/src/solvers/dgsem/calc_volume_integral.jl @@ -285,12 +285,12 @@ function calc_volume_integral!(backend::Nothing, du, u, mesh, # No scaling by inverse Jacobian here, as there is no Jacobian multiplication # in `integrate_reference_element`. dS_volume_integral = -entropy_change_reference_element(du, u, element, - mesh, equations, + typeof(mesh), equations, dg, cache) # Compute true entropy change given by surface integral of the entropy potential dS_true = surface_integral_reference_element(entropy_potential, u, element, - mesh, equations, dg, cache) + typeof(mesh), equations, dg, cache) # This quantity should be ≤ 0 for an entropy stable volume integral, and # exactly zero for an entropy conservative volume integral. @@ -313,7 +313,7 @@ function calc_volume_integral!(backend::Nothing, du, u, mesh, dS_volume_integral_stabilized = -entropy_change_reference_element(du, u, element, - mesh, + typeof(mesh), equations, dg, cache) diff --git a/src/solvers/dgsem_p4est/containers.jl b/src/solvers/dgsem_p4est/containers.jl index 44abce1d00e..3f86fff2bb9 100644 --- a/src/solvers/dgsem_p4est/containers.jl +++ b/src/solvers/dgsem_p4est/containers.jl @@ -950,7 +950,8 @@ end # Build a reduced cache which can be passed to GPU kernels @inline function kernel_filter_cache(cache) - return (; elements = (; contravariant_vectors = cache.elements.contravariant_vectors)) + return (; + elements = (; contravariant_vectors = cache.elements.contravariant_vectors)) end include("containers_2d.jl") From 2ecdf140ea623348d9e075e62da0a5e485d7d365 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Wed, 25 Feb 2026 12:34:05 +0100 Subject: [PATCH 102/158] setup kernelabstraction harness --- .github/workflows/ci.yml | 1 + test/runtests.jl | 12 ++++++++++++ test/test_kernelabstractions.jl | 27 +++++++++++++++++++++++++++ 3 files changed, 40 insertions(+) create mode 100644 test/test_kernelabstractions.jl diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 6f8e4a3f2e4..ff47696c58e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -85,6 +85,7 @@ jobs: - performance_specializations - mpi - threaded + - kernelabstractions include: - version: '1.11' os: ubuntu-latest diff --git a/test/runtests.jl b/test/runtests.jl index a6910b2616c..7bd8e8928fc 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,5 +1,6 @@ using Test using MPI: mpiexec +import Trixi # We run tests in parallel with CI jobs setting the `TRIXI_TEST` environment # variable to determine the subset of tests to execute. @@ -120,4 +121,15 @@ const TRIXI_NTHREADS = clamp(Sys.CPU_THREADS, 2, 3) @warn "Unable to run CUDA tests on this machine" end end + + @time if TRIXI_TEST == "all" || TRIXI_TEST == "kernelabstractions" + previous_backend = Trixi._PREFERENCE_THREADING + Trixi.set_threading_backend!(:kernelabstractions) + # relaunching julia + try + run(`$(Base.julia_cmd()) --threads=$TRIXI_NTHREADS --check-bounds=yes $(abspath("test_kernelabstractions.jl"))`) + finally + Trixi.set_threading_backend!(Symbol(previous_backend)) + end + end end diff --git a/test/test_kernelabstractions.jl b/test/test_kernelabstractions.jl new file mode 100644 index 00000000000..01ccf7901eb --- /dev/null +++ b/test/test_kernelabstractions.jl @@ -0,0 +1,27 @@ +module TestExamplesKernelAbstractions + +using Test +using Trixi + +include("test_trixi.jl") + +EXAMPLES_DIR = examples_dir() + +# Start with a clean environment: remove Trixi.jl output directory if it exists +outdir = "out" +Trixi.mpi_isroot() && isdir(outdir) && rm(outdir, recursive = true) +Trixi.MPI.Barrier(Trixi.mpi_comm()) + +@testset "Threaded tests" begin +#! format: noindent + +@testset "basic" begin + @test Trixi._PREFERENCE_THREADING == :kernelabstractions +end +end + +# Clean up afterwards: delete Trixi.jl output directory +Trixi.mpi_isroot() && isdir(outdir) && @test_nowarn rm(outdir, recursive = true) +Trixi.MPI.Barrier(Trixi.mpi_comm()) + +end # module From c83404a176caff685cb1acf652c72c298b77666a Mon Sep 17 00:00:00 2001 From: Benedict Geihe Date: Wed, 25 Feb 2026 21:09:28 +0100 Subject: [PATCH 103/158] add advection_basic to KA tests --- test/test_cuda_2d.jl | 6 ++-- test/test_cuda_3d.jl | 10 +++--- test/test_kernelabstractions.jl | 56 +++++++++++++++++++++++++++++++-- 3 files changed, 61 insertions(+), 11 deletions(-) diff --git a/test/test_cuda_2d.jl b/test/test_cuda_2d.jl index e43c3f5c1a0..84dc29dd0a0 100644 --- a/test/test_cuda_2d.jl +++ b/test/test_cuda_2d.jl @@ -47,9 +47,9 @@ end RealT_for_test_tolerances=Float32, real_type=Float32, storage_type=CuArray) - # # Ensure that we do not have excessive memory allocations - # # (e.g., from type instabilities) - # @test_allocations(Trixi.rhs!, semi, sol, 1000) + # Ensure that we do not have excessive memory allocations + # (e.g., from type instabilities) + @test_allocations(Trixi.rhs!, semi, sol, 1000) @test real(ode.p.solver) == Float32 @test real(ode.p.solver.basis) == Float32 @test real(ode.p.solver.mortar) == Float32 diff --git a/test/test_cuda_3d.jl b/test/test_cuda_3d.jl index 157a95cfabe..92fedb55bf9 100644 --- a/test/test_cuda_3d.jl +++ b/test/test_cuda_3d.jl @@ -21,12 +21,7 @@ isdir(outdir) && rm(outdir, recursive = true) linf=[0.0014537194925779984]) # Ensure that we do not have excessive memory allocations # (e.g., from type instabilities) - let - t = sol.t[end] - u_ode = sol.u[end] - du_ode = similar(u_ode) - @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000 - end + @test_allocations(Trixi.rhs!, semi, sol, 1000) @test real(ode.p.solver) == Float64 @test real(ode.p.solver.basis) == Float64 @test real(ode.p.solver.mortar) == Float64 @@ -52,6 +47,9 @@ end RealT_for_test_tolerances=Float32, real_type=Float32, storage_type=CuArray) + # Ensure that we do not have excessive memory allocations + # (e.g., from type instabilities) + @test_allocations(Trixi.rhs!, semi, sol, 1000) @test real(ode.p.solver) == Float32 @test real(ode.p.solver.basis) == Float32 @test real(ode.p.solver.mortar) == Float32 diff --git a/test/test_kernelabstractions.jl b/test/test_kernelabstractions.jl index 01ccf7901eb..ca8b90e6377 100644 --- a/test/test_kernelabstractions.jl +++ b/test/test_kernelabstractions.jl @@ -12,14 +12,66 @@ outdir = "out" Trixi.mpi_isroot() && isdir(outdir) && rm(outdir, recursive = true) Trixi.MPI.Barrier(Trixi.mpi_comm()) -@testset "Threaded tests" begin -#! format: noindent @testset "basic" begin @test Trixi._PREFERENCE_THREADING == :kernelabstractions end + + +@testset "KernelAbstractions CPU 2D" begin +#! format: noindent + +@trixi_testset "elixir_advection_basic_gpu.jl" begin + @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_basic_gpu.jl"), + # Expected errors are exactly the same as with TreeMesh! + l2=8.311947673061856e-6, + linf=6.627000273229378e-5) + # Ensure that we do not have excessive memory allocations + # (e.g., from type instabilities) + @test_allocations(Trixi.rhs!, semi, sol, 1000) end +@trixi_testset "elixir_advection_basic_gpu.jl Float32" begin + @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_basic_gpu.jl"), + # Expected errors are exactly the same as with TreeMesh! + l2=[Float32(8.311947673061856e-6)], + linf=[Float32(6.627000273229378e-5)], + RealT_for_test_tolerances=Float32, + real_type=Float32) + # Ensure that we do not have excessive memory allocations + # (e.g., from type instabilities) + @test_allocations(Trixi.rhs!, semi, sol, 1000) +end +end + + +@testset "KernelAbstractions CPU 3D" begin +#! format: noindent + +@trixi_testset "elixir_advection_basic_gpu.jl" begin + @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_basic_gpu.jl"), + # Expected errors are exactly the same as with TreeMesh! + l2=[0.00016263963870641478], + linf=[0.0014537194925779984]) + # Ensure that we do not have excessive memory allocations + # (e.g., from type instabilities) + @test_allocations(Trixi.rhs!, semi, sol, 1000) +end + +@trixi_testset "elixir_advection_basic_gpu.jl Float32" begin + @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_basic_gpu.jl"), + # Expected errors similar to reference on CPU + l2=[Float32(0.00016263963870641478)], + linf=[Float32(0.0014537194925779984)], + RealT_for_test_tolerances=Float32, + real_type=Float32) + # Ensure that we do not have excessive memory allocations + # (e.g., from type instabilities) + @test_allocations(Trixi.rhs!, semi, sol, 1000) +end +end + + # Clean up afterwards: delete Trixi.jl output directory Trixi.mpi_isroot() && isdir(outdir) && @test_nowarn rm(outdir, recursive = true) Trixi.MPI.Barrier(Trixi.mpi_comm()) From c470dc9dc5249feb3ec56d0e69c0af1471eda140 Mon Sep 17 00:00:00 2001 From: Benedict Geihe Date: Thu, 26 Feb 2026 09:34:13 +0100 Subject: [PATCH 104/158] no allocation tests --- test/test_cuda_2d.jl | 2 +- test/test_cuda_3d.jl | 2 +- test/test_kernelabstractions.jl | 26 +++++++++++++------------- 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/test/test_cuda_2d.jl b/test/test_cuda_2d.jl index 84dc29dd0a0..4b4894f2c73 100644 --- a/test/test_cuda_2d.jl +++ b/test/test_cuda_2d.jl @@ -49,7 +49,7 @@ end storage_type=CuArray) # Ensure that we do not have excessive memory allocations # (e.g., from type instabilities) - @test_allocations(Trixi.rhs!, semi, sol, 1000) + # @test_allocations(Trixi.rhs!, semi, sol, 1000) @test real(ode.p.solver) == Float32 @test real(ode.p.solver.basis) == Float32 @test real(ode.p.solver.mortar) == Float32 diff --git a/test/test_cuda_3d.jl b/test/test_cuda_3d.jl index 92fedb55bf9..908d4f20959 100644 --- a/test/test_cuda_3d.jl +++ b/test/test_cuda_3d.jl @@ -49,7 +49,7 @@ end storage_type=CuArray) # Ensure that we do not have excessive memory allocations # (e.g., from type instabilities) - @test_allocations(Trixi.rhs!, semi, sol, 1000) + # @test_allocations(Trixi.rhs!, semi, sol, 1000) @test real(ode.p.solver) == Float32 @test real(ode.p.solver.basis) == Float32 @test real(ode.p.solver.mortar) == Float32 diff --git a/test/test_kernelabstractions.jl b/test/test_kernelabstractions.jl index ca8b90e6377..dff169be3bd 100644 --- a/test/test_kernelabstractions.jl +++ b/test/test_kernelabstractions.jl @@ -12,54 +12,55 @@ outdir = "out" Trixi.mpi_isroot() && isdir(outdir) && rm(outdir, recursive = true) Trixi.MPI.Barrier(Trixi.mpi_comm()) - @testset "basic" begin @test Trixi._PREFERENCE_THREADING == :kernelabstractions end - @testset "KernelAbstractions CPU 2D" begin #! format: noindent @trixi_testset "elixir_advection_basic_gpu.jl" begin - @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_basic_gpu.jl"), + @test_trixi_include(joinpath(EXAMPLES_DIR, "p4est_2d_dgsem", + "elixir_advection_basic_gpu.jl"), # Expected errors are exactly the same as with TreeMesh! l2=8.311947673061856e-6, linf=6.627000273229378e-5) # Ensure that we do not have excessive memory allocations # (e.g., from type instabilities) - @test_allocations(Trixi.rhs!, semi, sol, 1000) + # @test_allocations(Trixi.rhs!, semi, sol, 1000) end @trixi_testset "elixir_advection_basic_gpu.jl Float32" begin - @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_basic_gpu.jl"), - # Expected errors are exactly the same as with TreeMesh! + @test_trixi_include(joinpath(EXAMPLES_DIR, "p4est_2d_dgsem", + "elixir_advection_basic_gpu.jl"), + # Expected errors similar to reference on CPU l2=[Float32(8.311947673061856e-6)], linf=[Float32(6.627000273229378e-5)], RealT_for_test_tolerances=Float32, real_type=Float32) # Ensure that we do not have excessive memory allocations # (e.g., from type instabilities) - @test_allocations(Trixi.rhs!, semi, sol, 1000) + # @test_allocations(Trixi.rhs!, semi, sol, 1000) end end - @testset "KernelAbstractions CPU 3D" begin #! format: noindent @trixi_testset "elixir_advection_basic_gpu.jl" begin - @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_basic_gpu.jl"), + @test_trixi_include(joinpath(EXAMPLES_DIR, "p4est_3d_dgsem", + "elixir_advection_basic_gpu.jl"), # Expected errors are exactly the same as with TreeMesh! l2=[0.00016263963870641478], linf=[0.0014537194925779984]) # Ensure that we do not have excessive memory allocations # (e.g., from type instabilities) - @test_allocations(Trixi.rhs!, semi, sol, 1000) + # @test_allocations(Trixi.rhs!, semi, sol, 1000) end @trixi_testset "elixir_advection_basic_gpu.jl Float32" begin - @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_basic_gpu.jl"), + @test_trixi_include(joinpath(EXAMPLES_DIR, "p4est_3d_dgsem", + "elixir_advection_basic_gpu.jl"), # Expected errors similar to reference on CPU l2=[Float32(0.00016263963870641478)], linf=[Float32(0.0014537194925779984)], @@ -67,11 +68,10 @@ end real_type=Float32) # Ensure that we do not have excessive memory allocations # (e.g., from type instabilities) - @test_allocations(Trixi.rhs!, semi, sol, 1000) + # @test_allocations(Trixi.rhs!, semi, sol, 1000) end end - # Clean up afterwards: delete Trixi.jl output directory Trixi.mpi_isroot() && isdir(outdir) && @test_nowarn rm(outdir, recursive = true) Trixi.MPI.Barrier(Trixi.mpi_comm()) From 6a3567ab58e285a2352b80a3c1aca1ae82719ed7 Mon Sep 17 00:00:00 2001 From: Benedict Geihe Date: Mon, 16 Mar 2026 20:53:05 +0100 Subject: [PATCH 105/158] missed --- src/solvers/dgsem_tree/dg_2d.jl | 3 ++- test/runtests.jl | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/solvers/dgsem_tree/dg_2d.jl b/src/solvers/dgsem_tree/dg_2d.jl index 297d1eb18e5..f3c54a61769 100644 --- a/src/solvers/dgsem_tree/dg_2d.jl +++ b/src/solvers/dgsem_tree/dg_2d.jl @@ -108,7 +108,8 @@ function rhs!(du, u, t, equations, boundary_conditions, source_terms::Source, dg::DG, cache) where {Source} - return rhs!(nothing, du, u, t, mesh, equations, boundary_conditions, source_terms, dg, cache) + return rhs!(nothing, du, u, t, mesh, equations, boundary_conditions, source_terms, + dg, cache) end # This function is valid for all non-conforming mesh types, i.e., diff --git a/test/runtests.jl b/test/runtests.jl index ee323744692..e30fed631b6 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -123,8 +123,8 @@ const TRIXI_NTHREADS = clamp(Sys.CPU_THREADS, 2, 3) @time if TRIXI_TEST == "all" || TRIXI_TEST == "CUDA" import CUDA if CUDA.functional() - include("test_cuda_2d.jl") - include("test_cuda_3d.jl") + include(joinpath(@__DIR__, "test_cuda_2d.jl")) + include(joinpath(@__DIR__, "test_cuda_3d.jl")) else @warn "Unable to run CUDA tests on this machine" end From 96cdec4ebb89297f4b7f404eee42c6b04660a4a8 Mon Sep 17 00:00:00 2001 From: Benedict <135045760+benegee@users.noreply.github.com> Date: Mon, 16 Mar 2026 20:54:38 +0100 Subject: [PATCH 106/158] Update Project.toml Co-authored-by: Valentin Churavy --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index e8c0013c9a6..ae3e36a2559 100644 --- a/Project.toml +++ b/Project.toml @@ -85,7 +85,7 @@ EllipsisNotation = "1.0" FillArrays = "1.13" ForwardDiff = "0.10.38, 1" HDF5 = "0.17" -KernelAbstractions = "0.9.36" +KernelAbstractions = "0.9.38" LinearAlgebra = "1" LinearMaps = "2.7, 3.0" LoopVectorization = "0.12.171" From 5f123eec9406bab193ba75b63feebb2f2f1878ac Mon Sep 17 00:00:00 2001 From: Benedict Geihe Date: Mon, 16 Mar 2026 20:56:47 +0100 Subject: [PATCH 107/158] add sources section to benchmark Project.toml --- benchmark/CUDA/Project.toml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/benchmark/CUDA/Project.toml b/benchmark/CUDA/Project.toml index 221c03a5947..22dba338fec 100644 --- a/benchmark/CUDA/Project.toml +++ b/benchmark/CUDA/Project.toml @@ -4,3 +4,6 @@ JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" OrdinaryDiffEqLowStorageRK = "b0944070-b475-4768-8dec-fb6eb410534d" TimerOutputs = "a759f4b9-e2f1-59dc-863e-4aeb61b1ea8f" Trixi = "a7f1ee26-1774-49b1-8366-f1abc58fbfcb" + +[sources] +Trixi = {path = "../.."} From 5974c2a5358d94f713689d5fb63d6165b405f0f8 Mon Sep 17 00:00:00 2001 From: Benedict Geihe Date: Wed, 18 Mar 2026 08:01:07 +0100 Subject: [PATCH 108/158] fix meshT --- src/solvers/dgsem_tree/dg_3d_subcell_limiters.jl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/solvers/dgsem_tree/dg_3d_subcell_limiters.jl b/src/solvers/dgsem_tree/dg_3d_subcell_limiters.jl index 5d3d9d86069..a370eb1bce5 100644 --- a/src/solvers/dgsem_tree/dg_3d_subcell_limiters.jl +++ b/src/solvers/dgsem_tree/dg_3d_subcell_limiters.jl @@ -61,7 +61,7 @@ end # Subcell limiting currently only implemented for certain mesh types @inline function volume_integral_kernel!(du, u, element, - mesh::Union{TreeMesh{3}, P4estMesh{3}}, + meshT::Type{<:Union{TreeMesh{3}, P4estMesh{3}}}, nonconservative_terms, equations, volume_integral::VolumeIntegralSubcellLimiting, dg::DGSEM, cache) @@ -78,7 +78,7 @@ end fhat3_L = fhat3_L_threaded[Threads.threadid()] fhat3_R = fhat3_R_threaded[Threads.threadid()] calcflux_fhat!(fhat1_L, fhat1_R, fhat2_L, fhat2_R, fhat3_L, fhat3_R, - u, mesh, nonconservative_terms, equations, volume_flux_dg, + u, meshT, nonconservative_terms, equations, volume_flux_dg, dg, element, cache) # low-order FV fluxes @@ -91,13 +91,13 @@ end fstar3_L = fstar3_L_threaded[Threads.threadid()] fstar3_R = fstar3_R_threaded[Threads.threadid()] calcflux_fv!(fstar1_L, fstar1_R, fstar2_L, fstar2_R, fstar3_L, fstar3_R, - u, mesh, nonconservative_terms, equations, volume_flux_fv, + u, meshT, nonconservative_terms, equations, volume_flux_fv, dg, element, cache) # antidiffusive flux calcflux_antidiffusive!(fhat1_L, fhat1_R, fhat2_L, fhat2_R, fhat3_L, fhat3_R, fstar1_L, fstar1_R, fstar2_L, fstar2_R, fstar3_L, fstar3_R, - u, mesh, nonconservative_terms, equations, limiter, + u, meshT, nonconservative_terms, equations, limiter, dg, element, cache) # Calculate volume integral contribution of low-order FV flux From 0a3448f8097de1ba39ff7185950f17b0e1f64867 Mon Sep 17 00:00:00 2001 From: Benedict Geihe Date: Wed, 18 Mar 2026 09:30:29 +0100 Subject: [PATCH 109/158] add backend argument for coupled semis --- .../semidiscretization_coupled_p4est.jl | 3 ++- src/solvers/dgsem_p4est/dg_2d.jl | 12 ++++++------ 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/src/semidiscretization/semidiscretization_coupled_p4est.jl b/src/semidiscretization/semidiscretization_coupled_p4est.jl index 18fc21c8f50..d481b413b72 100644 --- a/src/semidiscretization/semidiscretization_coupled_p4est.jl +++ b/src/semidiscretization/semidiscretization_coupled_p4est.jl @@ -202,9 +202,10 @@ function rhs!(du_ode, u_ode, u_parent, semis, u = wrap_array(u_ode, mesh, equations, solver, cache) du = wrap_array(du_ode, mesh, equations, solver, cache) + backend = trixi_backend(u_ode) time_start = time_ns() - @trixi_timeit timer() "rhs!" rhs!(du, u, t, u_parent, semis, mesh, equations, + @trixi_timeit timer() "rhs!" rhs!(backend, du, u, t, u_parent, semis, mesh, equations, boundary_conditions, source_terms, solver, cache) runtime = time_ns() - time_start put!(semi.performance_counter, runtime) diff --git a/src/solvers/dgsem_p4est/dg_2d.jl b/src/solvers/dgsem_p4est/dg_2d.jl index 9aad16176d4..2a7950aaa9e 100644 --- a/src/solvers/dgsem_p4est/dg_2d.jl +++ b/src/solvers/dgsem_p4est/dg_2d.jl @@ -994,7 +994,7 @@ end # Call this for coupled P4estMeshView simulations. # The coupling calculations (especially boundary conditions) require data from the parent mesh, which is why # the additional variable u_parent is needed, compared to non-coupled systems. -function rhs!(du, u, t, u_parent, semis, +function rhs!(backend, du, u, t, u_parent, semis, mesh::P4estMeshView{2}, equations, boundary_conditions, source_terms::Source, @@ -1004,19 +1004,19 @@ function rhs!(du, u, t, u_parent, semis, # Calculate volume integral @trixi_timeit timer() "volume integral" begin - calc_volume_integral!(du, u, mesh, + calc_volume_integral!(backend, du, u, mesh, have_nonconservative_terms(equations), equations, dg.volume_integral, dg, cache) end # Prolong solution to interfaces @trixi_timeit timer() "prolong2interfaces" begin - prolong2interfaces!(cache, u, mesh, equations, dg) + prolong2interfaces!(backend, cache, u, mesh, equations, dg) end # Calculate interface fluxes @trixi_timeit timer() "interface flux" begin - calc_interface_flux!(cache.elements.surface_flux_values, mesh, + calc_interface_flux!(backend, cache.elements.surface_flux_values, mesh, have_nonconservative_terms(equations), equations, dg.surface_integral, dg, cache) end @@ -1048,12 +1048,12 @@ function rhs!(du, u, t, u_parent, semis, # Calculate surface integrals @trixi_timeit timer() "surface integral" begin - calc_surface_integral!(du, u, mesh, equations, + calc_surface_integral!(backend, du, u, mesh, equations, dg.surface_integral, dg, cache) end # Apply Jacobian from mapping to reference element - @trixi_timeit timer() "Jacobian" apply_jacobian!(du, mesh, equations, dg, cache) + @trixi_timeit timer() "Jacobian" apply_jacobian!(backend, du, mesh, equations, dg, cache) # Calculate source terms @trixi_timeit timer() "source terms" begin From 70ea4105b9a0a678f3272cafe5f0be5757a4104a Mon Sep 17 00:00:00 2001 From: Benedict Geihe Date: Wed, 18 Mar 2026 09:31:53 +0100 Subject: [PATCH 110/158] fmt --- src/semidiscretization/semidiscretization_coupled_p4est.jl | 3 ++- src/solvers/dgsem_p4est/dg_2d.jl | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/semidiscretization/semidiscretization_coupled_p4est.jl b/src/semidiscretization/semidiscretization_coupled_p4est.jl index d481b413b72..6b8ff6a0a0b 100644 --- a/src/semidiscretization/semidiscretization_coupled_p4est.jl +++ b/src/semidiscretization/semidiscretization_coupled_p4est.jl @@ -205,7 +205,8 @@ function rhs!(du_ode, u_ode, u_parent, semis, backend = trixi_backend(u_ode) time_start = time_ns() - @trixi_timeit timer() "rhs!" rhs!(backend, du, u, t, u_parent, semis, mesh, equations, + @trixi_timeit timer() "rhs!" rhs!(backend, du, u, t, u_parent, semis, mesh, + equations, boundary_conditions, source_terms, solver, cache) runtime = time_ns() - time_start put!(semi.performance_counter, runtime) diff --git a/src/solvers/dgsem_p4est/dg_2d.jl b/src/solvers/dgsem_p4est/dg_2d.jl index 2a7950aaa9e..f8198b6284f 100644 --- a/src/solvers/dgsem_p4est/dg_2d.jl +++ b/src/solvers/dgsem_p4est/dg_2d.jl @@ -1053,7 +1053,8 @@ function rhs!(backend, du, u, t, u_parent, semis, end # Apply Jacobian from mapping to reference element - @trixi_timeit timer() "Jacobian" apply_jacobian!(backend, du, mesh, equations, dg, cache) + @trixi_timeit timer() "Jacobian" apply_jacobian!(backend, du, mesh, equations, dg, + cache) # Calculate source terms @trixi_timeit timer() "source terms" begin From 95f0f03f1abe01878f6b4af0c9ccc01681393604 Mon Sep 17 00:00:00 2001 From: Benedict Geihe Date: Wed, 18 Mar 2026 10:00:01 +0100 Subject: [PATCH 111/158] fix --- src/solvers/dgsem_unstructured/dg_2d.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/solvers/dgsem_unstructured/dg_2d.jl b/src/solvers/dgsem_unstructured/dg_2d.jl index 642c1cf7aa6..760e7e40405 100644 --- a/src/solvers/dgsem_unstructured/dg_2d.jl +++ b/src/solvers/dgsem_unstructured/dg_2d.jl @@ -420,7 +420,7 @@ end # ----------------- ----------------- # 3 1 # Therefore, we require a different surface integral routine here despite their similar structure. -function calc_surface_integral!(du, u, mesh::UnstructuredMesh2D, +function calc_surface_integral!(backend, du, u, mesh::UnstructuredMesh2D, equations, surface_integral, dg::DGSEM, cache) @unpack inverse_weights = dg.basis @unpack surface_flux_values = cache.elements From 0727ec405ffc1dae7364445ad045d8668d33f653 Mon Sep 17 00:00:00 2001 From: Benedict Geihe Date: Wed, 18 Mar 2026 10:31:47 +0100 Subject: [PATCH 112/158] fix mesh type --- src/solvers/dgsem_tree/dg_3d_subcell_limiters.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/solvers/dgsem_tree/dg_3d_subcell_limiters.jl b/src/solvers/dgsem_tree/dg_3d_subcell_limiters.jl index a370eb1bce5..674c3c1f8df 100644 --- a/src/solvers/dgsem_tree/dg_3d_subcell_limiters.jl +++ b/src/solvers/dgsem_tree/dg_3d_subcell_limiters.jl @@ -120,7 +120,7 @@ end # # See also `flux_differencing_kernel!`. @inline function calcflux_fhat!(fhat1_L, fhat1_R, fhat2_L, fhat2_R, fhat3_L, fhat3_R, - u, mesh::TreeMesh{3}, + u, ::Type{<:TreeMesh{3}}, have_nonconservative_terms::False, equations, volume_flux, dg::DGSEM, element, cache) @unpack weights, derivative_split = dg.basis @@ -224,7 +224,7 @@ end fstar1_L, fstar1_R, fstar2_L, fstar2_R, fstar3_L, fstar3_R, - u, mesh::Union{TreeMesh{3}, P4estMesh{3}}, + u, ::Type{<:Union{TreeMesh{3}, P4estMesh{3}}}, nonconservative_terms::False, equations, limiter::SubcellLimiterIDP, dg, element, cache) @unpack antidiffusive_flux1_L, antidiffusive_flux1_R, antidiffusive_flux2_L, antidiffusive_flux2_R, antidiffusive_flux3_L, antidiffusive_flux3_R = cache.antidiffusive_fluxes @@ -275,7 +275,7 @@ end fstar1_L, fstar1_R, fstar2_L, fstar2_R, fstar3_L, fstar3_R, - u, mesh::Union{TreeMesh{3}, P4estMesh{3}}, + u, ::Type{<:Union{TreeMesh{3}, P4estMesh{3}}}, nonconservative_terms::True, equations, limiter::SubcellLimiterIDP, dg, element, cache) @unpack antidiffusive_flux1_L, antidiffusive_flux2_L, antidiffusive_flux1_R, antidiffusive_flux2_R, antidiffusive_flux3_L, antidiffusive_flux3_R = cache.antidiffusive_fluxes From 39d4957dd02e7a1f064cf1bc6288e6adbcccb50e Mon Sep 17 00:00:00 2001 From: Benedict Geihe Date: Wed, 18 Mar 2026 11:32:39 +0100 Subject: [PATCH 113/158] fix --- src/solvers/fdsbp_unstructured/fdsbp_2d.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/solvers/fdsbp_unstructured/fdsbp_2d.jl b/src/solvers/fdsbp_unstructured/fdsbp_2d.jl index a9239ada1fa..2d7058b9957 100644 --- a/src/solvers/fdsbp_unstructured/fdsbp_2d.jl +++ b/src/solvers/fdsbp_unstructured/fdsbp_2d.jl @@ -186,7 +186,7 @@ end # Therefore, we require a different surface integral routine here despite their similar structure. # Also, the normal directions are already outward pointing for `UnstructuredMesh2D` so all the # surface contributions are added. -function calc_surface_integral!(du, u, mesh::UnstructuredMesh2D, +function calc_surface_integral!(backend::Nothing, du, u, mesh::UnstructuredMesh2D, equations, surface_integral::SurfaceIntegralStrongForm, dg::DG, cache) inv_weight_left = inv(left_boundary_weight(dg.basis)) From dc7dbb68ec2ff157e12a53e16340be4b08cc47ca Mon Sep 17 00:00:00 2001 From: Benedict Geihe Date: Wed, 18 Mar 2026 17:00:36 +0100 Subject: [PATCH 114/158] move get_backend to within rhs! --- .../semidiscretization_coupled_p4est.jl | 4 +--- .../semidiscretization_hyperbolic.jl | 3 +-- src/solvers/dgmulti/dg.jl | 2 +- src/solvers/dgmulti/flux_differencing.jl | 4 ++-- .../dgmulti/flux_differencing_gauss_sbp.jl | 2 +- src/solvers/dgsem_p4est/dg_2d.jl | 3 ++- src/solvers/dgsem_p4est/dg_3d_parallel.jl | 4 +++- src/solvers/dgsem_structured/dg.jl | 4 +++- src/solvers/dgsem_tree/dg_1d.jl | 4 +++- src/solvers/dgsem_tree/dg_2d.jl | 16 +++------------- src/solvers/dgsem_tree/dg_2d_parallel.jl | 4 +++- 11 files changed, 23 insertions(+), 27 deletions(-) diff --git a/src/semidiscretization/semidiscretization_coupled_p4est.jl b/src/semidiscretization/semidiscretization_coupled_p4est.jl index 6b8ff6a0a0b..18fc21c8f50 100644 --- a/src/semidiscretization/semidiscretization_coupled_p4est.jl +++ b/src/semidiscretization/semidiscretization_coupled_p4est.jl @@ -202,11 +202,9 @@ function rhs!(du_ode, u_ode, u_parent, semis, u = wrap_array(u_ode, mesh, equations, solver, cache) du = wrap_array(du_ode, mesh, equations, solver, cache) - backend = trixi_backend(u_ode) time_start = time_ns() - @trixi_timeit timer() "rhs!" rhs!(backend, du, u, t, u_parent, semis, mesh, - equations, + @trixi_timeit timer() "rhs!" rhs!(du, u, t, u_parent, semis, mesh, equations, boundary_conditions, source_terms, solver, cache) runtime = time_ns() - time_start put!(semi.performance_counter, runtime) diff --git a/src/semidiscretization/semidiscretization_hyperbolic.jl b/src/semidiscretization/semidiscretization_hyperbolic.jl index d45658a1aff..afdaf93d520 100644 --- a/src/semidiscretization/semidiscretization_hyperbolic.jl +++ b/src/semidiscretization/semidiscretization_hyperbolic.jl @@ -580,11 +580,10 @@ function rhs!(du_ode, u_ode, semi::SemidiscretizationHyperbolic, t) u = wrap_array(u_ode, mesh, equations, solver, cache) du = wrap_array(du_ode, mesh, equations, solver, cache) - backend = trixi_backend(u_ode) # TODO: Taal decide, do we need to pass the mesh? time_start = time_ns() - @trixi_timeit timer() "rhs!" rhs!(backend, du, u, t, mesh, equations, + @trixi_timeit timer() "rhs!" rhs!(du, u, t, mesh, equations, boundary_conditions, source_terms, solver, cache) runtime = time_ns() - time_start put!(semi.performance_counter, runtime) diff --git a/src/solvers/dgmulti/dg.jl b/src/solvers/dgmulti/dg.jl index d96a7c98323..14394e2c664 100644 --- a/src/solvers/dgmulti/dg.jl +++ b/src/solvers/dgmulti/dg.jl @@ -750,7 +750,7 @@ function calc_sources!(du, u, t, source_terms, return nothing end -function rhs!(backend, du, u, t, mesh, equations, +function rhs!(du, u, t, mesh, equations, boundary_conditions::BC, source_terms::Source, dg::DGMulti, cache) where {BC, Source} @trixi_timeit timer() "reset ∂u/∂t" set_zero!(du, dg, cache) diff --git a/src/solvers/dgmulti/flux_differencing.jl b/src/solvers/dgmulti/flux_differencing.jl index 2f8fa10114e..8d056fe1f36 100644 --- a/src/solvers/dgmulti/flux_differencing.jl +++ b/src/solvers/dgmulti/flux_differencing.jl @@ -629,7 +629,7 @@ end # an entropy conservative/stable discretization. For modal DG schemes, an extra `entropy_projection!` # is required (see https://doi.org/10.1016/j.jcp.2018.02.033, Section 4.3). # Also called by DGMultiFluxDiff{<:GaussSBP} solvers. -function rhs!(backend, du, u, t, mesh, equations, boundary_conditions::BC, +function rhs!(du, u, t, mesh, equations, boundary_conditions::BC, source_terms::Source, dg::DGMultiFluxDiff, cache) where {Source, BC} @trixi_timeit timer() "reset ∂u/∂t" set_zero!(du, dg, cache) @@ -674,7 +674,7 @@ end # integral, e.g., an entropy conservative/stable discretization. The implementation of `rhs!` # for such schemes is very similar to the implementation of `rhs!` for standard DG methods, # but specializes `calc_volume_integral`. -function rhs!(backend, du, u, t, mesh, equations, +function rhs!(du, u, t, mesh, equations, boundary_conditions::BC, source_terms::Source, dg::DGMultiFluxDiffSBP, cache) where {BC, Source} @trixi_timeit timer() "reset ∂u/∂t" set_zero!(du, dg, cache) diff --git a/src/solvers/dgmulti/flux_differencing_gauss_sbp.jl b/src/solvers/dgmulti/flux_differencing_gauss_sbp.jl index 843168e1411..48d75938d86 100644 --- a/src/solvers/dgmulti/flux_differencing_gauss_sbp.jl +++ b/src/solvers/dgmulti/flux_differencing_gauss_sbp.jl @@ -599,7 +599,7 @@ end # Specialize RHS so that we can call `invert_jacobian_and_interpolate!` instead of just `invert_jacobian!`, # since `invert_jacobian!` is also used in other places (e.g., parabolic terms). -function rhs!(backend, du, u, t, mesh, equations, boundary_conditions::BC, +function rhs!(du, u, t, mesh, equations, boundary_conditions::BC, source_terms::Source, dg::DGMultiFluxDiff{<:GaussSBP}, cache) where {Source, BC} @trixi_timeit timer() "reset ∂u/∂t" set_zero!(du, dg, cache) diff --git a/src/solvers/dgsem_p4est/dg_2d.jl b/src/solvers/dgsem_p4est/dg_2d.jl index f8198b6284f..a70a37c46d7 100644 --- a/src/solvers/dgsem_p4est/dg_2d.jl +++ b/src/solvers/dgsem_p4est/dg_2d.jl @@ -994,11 +994,12 @@ end # Call this for coupled P4estMeshView simulations. # The coupling calculations (especially boundary conditions) require data from the parent mesh, which is why # the additional variable u_parent is needed, compared to non-coupled systems. -function rhs!(backend, du, u, t, u_parent, semis, +function rhs!(du, u, t, u_parent, semis, mesh::P4estMeshView{2}, equations, boundary_conditions, source_terms::Source, dg::DG, cache) where {Source} + backend = nothing # Reset du @trixi_timeit timer() "reset ∂u/∂t" set_zero!(du, dg, cache) diff --git a/src/solvers/dgsem_p4est/dg_3d_parallel.jl b/src/solvers/dgsem_p4est/dg_3d_parallel.jl index d062d1fdee4..91a700a847c 100644 --- a/src/solvers/dgsem_p4est/dg_3d_parallel.jl +++ b/src/solvers/dgsem_p4est/dg_3d_parallel.jl @@ -5,10 +5,12 @@ @muladd begin #! format: noindent -function rhs!(backend, du, u, t, +function rhs!(du, u, t, mesh::Union{P4estMeshParallel{3}, T8codeMeshParallel{3}}, equations, boundary_conditions, source_terms::Source, dg::DG, cache) where {Source} + backend = trixi_backend(u) + # Start to receive MPI data @trixi_timeit timer() "start MPI receive" start_mpi_receive!(cache.mpi_cache) diff --git a/src/solvers/dgsem_structured/dg.jl b/src/solvers/dgsem_structured/dg.jl index e722b087227..f3e8365b6f6 100644 --- a/src/solvers/dgsem_structured/dg.jl +++ b/src/solvers/dgsem_structured/dg.jl @@ -38,10 +38,12 @@ function calc_boundary_flux!(cache, t, boundary_condition::BoundaryConditionPeri return nothing end -function rhs!(backend, du, u, t, +function rhs!(du, u, t, mesh::Union{StructuredMesh, StructuredMeshView{2}}, equations, boundary_conditions, source_terms::Source, dg::DG, cache) where {Source} + backend = trixi_backend(u) + # Reset du @trixi_timeit timer() "reset ∂u/∂t" set_zero!(du, dg, cache) diff --git a/src/solvers/dgsem_tree/dg_1d.jl b/src/solvers/dgsem_tree/dg_1d.jl index 389c584f4d7..db7a0392af1 100644 --- a/src/solvers/dgsem_tree/dg_1d.jl +++ b/src/solvers/dgsem_tree/dg_1d.jl @@ -58,12 +58,14 @@ end # This function is valid for all conforming mesh types (except for `StructuredMesh`), i.e., # all meshes that do not involve mortar operations. # Thus, we can use it for 1D `TreeMesh` and `UnstructuredMesh2D`. -function rhs!(backend, du, u, t, +function rhs!(du, u, t, mesh::Union{TreeMesh{1}, UnstructuredMesh2D}, equations, boundary_conditions, source_terms::Source, dg::DG, cache) where {Source} + backend = trixi_backend(u) + # Reset du @trixi_timeit timer() "reset ∂u/∂t" set_zero!(du, dg, cache) diff --git a/src/solvers/dgsem_tree/dg_2d.jl b/src/solvers/dgsem_tree/dg_2d.jl index f3c54a61769..fa0bb1799db 100644 --- a/src/solvers/dgsem_tree/dg_2d.jl +++ b/src/solvers/dgsem_tree/dg_2d.jl @@ -100,28 +100,18 @@ end # TODO: Taal discuss/refactor timer, allowing users to pass a custom timer? -# Deprecated signature -# Remove once TrixiAtmo.jl has been adapted -function rhs!(du, u, t, - mesh::Union{TreeMesh{2}, P4estMesh{2}, P4estMeshView{2}, T8codeMesh{2}, - TreeMesh{3}, P4estMesh{3}, T8codeMesh{3}}, - equations, - boundary_conditions, source_terms::Source, - dg::DG, cache) where {Source} - return rhs!(nothing, du, u, t, mesh, equations, boundary_conditions, source_terms, - dg, cache) -end - # This function is valid for all non-conforming mesh types, i.e., # all meshes that do involve mortar operations. # Thus, we can use it for the serial (i.e., non-distributed memory parallelized) # 2D/3D `TreeMesh`es, `P4estMesh`es, and `T8codeMesh`es. -function rhs!(backend, du, u, t, +function rhs!(du, u, t, mesh::Union{TreeMesh{2}, P4estMesh{2}, P4estMeshView{2}, T8codeMesh{2}, TreeMesh{3}, P4estMesh{3}, T8codeMesh{3}}, equations, boundary_conditions, source_terms::Source, dg::DG, cache) where {Source} + backend = trixi_backend(u) + # Reset du @trixi_timeit timer() "reset ∂u/∂t" set_zero!(du, dg, cache) diff --git a/src/solvers/dgsem_tree/dg_2d_parallel.jl b/src/solvers/dgsem_tree/dg_2d_parallel.jl index cfa7f5e6135..d4b6192a4df 100644 --- a/src/solvers/dgsem_tree/dg_2d_parallel.jl +++ b/src/solvers/dgsem_tree/dg_2d_parallel.jl @@ -450,11 +450,13 @@ function init_mpi_neighbor_connectivity(elements, mpi_interfaces, mpi_mortars, return mpi_neighbor_ranks, mpi_neighbor_interfaces, mpi_neighbor_mortars end -function rhs!(backend, du, u, t, +function rhs!(du, u, t, mesh::Union{TreeMeshParallel{2}, P4estMeshParallel{2}, T8codeMeshParallel{2}}, equations, boundary_conditions, source_terms::Source, dg::DG, cache) where {Source} + backend = trixi_backend(u) + # Start to receive MPI data @trixi_timeit timer() "start MPI receive" start_mpi_receive!(cache.mpi_cache) From 476b54f47bf55e0aac61241c380e724bf2264fb7 Mon Sep 17 00:00:00 2001 From: Benedict Geihe Date: Thu, 19 Mar 2026 00:00:10 +0100 Subject: [PATCH 115/158] remove backend from max_dt --- src/callbacks_step/stepsize.jl | 55 +++++- src/callbacks_step/stepsize_dg1d.jl | 12 +- src/callbacks_step/stepsize_dg2d.jl | 174 +++++------------- src/callbacks_step/stepsize_dg3d.jl | 153 ++++++--------- .../paired_explicit_runge_kutta.jl | 3 +- 5 files changed, 161 insertions(+), 236 deletions(-) diff --git a/src/callbacks_step/stepsize.jl b/src/callbacks_step/stepsize.jl index c3d8238ed0a..2a17022c358 100644 --- a/src/callbacks_step/stepsize.jl +++ b/src/callbacks_step/stepsize.jl @@ -143,9 +143,8 @@ function calculate_dt(u_ode, t, cfl_advective, cfl_diffusive, semi::AbstractSemidiscretization) mesh, equations, solver, cache = mesh_equations_solver_cache(semi) u = wrap_array(u_ode, mesh, equations, solver, cache) - backend = trixi_backend(u_ode) - return cfl_advective(t) * max_dt(backend, u, t, mesh, + return cfl_advective(t) * max_dt(u, t, mesh, have_constant_speed(equations), equations, solver, cache) end @@ -155,9 +154,8 @@ function calculate_dt(u_ode, t, cfl_advective::Real, cfl_diffusive::Real, semi::AbstractSemidiscretization) mesh, equations, solver, cache = mesh_equations_solver_cache(semi) u = wrap_array(u_ode, mesh, equations, solver, cache) - backend = trixi_backend(u_ode) - return cfl_advective * max_dt(backend, u, t, mesh, + return cfl_advective * max_dt(u, t, mesh, have_constant_speed(equations), equations, solver, cache) end @@ -169,15 +167,14 @@ function calculate_dt(u_ode, t, cfl_advective, cfl_diffusive, equations_parabolic = semi.equations_parabolic u = wrap_array(u_ode, mesh, equations, solver, cache) - backend = trixi_backend(u_ode) - dt_advective = cfl_advective(t) * max_dt(backend, u, t, mesh, + dt_advective = cfl_advective(t) * max_dt(u, t, mesh, have_constant_speed(equations), equations, solver, cache) cfl_diff = cfl_diffusive(t) if cfl_diff > 0 # Check if diffusive CFL should be considered - dt_diffusive = cfl_diff * max_dt(backend, u, t, mesh, + dt_diffusive = cfl_diff * max_dt(u, t, mesh, have_constant_diffusivity(equations_parabolic), equations, equations_parabolic, solver, cache) @@ -187,6 +184,50 @@ function calculate_dt(u_ode, t, cfl_advective, cfl_diffusive, end end +function calc_max_scaled_speed(backend::Nothing, u, mesh, constant_speed, equations, dg, + cache) + @unpack contravariant_vectors, inverse_jacobian = cache.elements + + max_scaled_speed = zero(eltype(u)) + @batch reduction=(max, max_scaled_speed) for element in eachelement(dg, cache) + max_lambda = max_scaled_speed_element(u, typeof(mesh), constant_speed, + equations, dg, + contravariant_vectors, inverse_jacobian, + element) + # Use `Base.max` to prevent silent failures, as `max` from `@fastmath` doesn't propagate + # `NaN`s properly. See https://github.com/trixi-framework/Trixi.jl/pull/2445#discussion_r2336812323 + max_scaled_speed = Base.max(max_scaled_speed, max_lambda) + end + return max_scaled_speed +end + +function calc_max_scaled_speed(backend::Backend, u, mesh, constant_speed, equations, dg, + cache) + @unpack contravariant_vectors, inverse_jacobian = cache.elements + + num_elements = nelements(dg, cache) + max_scaled_speeds = allocate(backend, eltype(t), num_elements) + + kernel! = max_scaled_speed_KAkernel!(backend) + kernel!(max_scaled_speeds, u, typeof(mesh), constant_speed, equations, dg, + contravariant_vectors, + inverse_jacobian; + ndrange = num_elements) + + return maximum(max_scaled_speeds) +end + +@kernel function max_scaled_speed_KAkernel!(max_scaled_speeds, u, meshT, constant_speed, + equations, + dg, contravariant_vectors, inverse_jacobian) + element = @index(Global) + max_scaled_speeds[element] = max_scaled_speed_element(u, meshT, constant_speed, + equations, dg, + contravariant_vectors, + inverse_jacobian, + element) +end + include("stepsize_dg1d.jl") include("stepsize_dg2d.jl") include("stepsize_dg3d.jl") diff --git a/src/callbacks_step/stepsize_dg1d.jl b/src/callbacks_step/stepsize_dg1d.jl index f445fc79c88..8a029543575 100644 --- a/src/callbacks_step/stepsize_dg1d.jl +++ b/src/callbacks_step/stepsize_dg1d.jl @@ -5,7 +5,7 @@ @muladd begin #! format: noindent -function max_dt(backend::Nothing, u, t, mesh::TreeMesh{1}, +function max_dt(u, t, mesh::TreeMesh{1}, constant_speed::False, equations, dg::DG, cache) # Avoid division by zero if the speed vanishes everywhere @@ -28,7 +28,7 @@ function max_dt(backend::Nothing, u, t, mesh::TreeMesh{1}, return 2 / (nnodes(dg) * max_scaled_speed) end -function max_dt(backend::Nothing, u, t, mesh::TreeMesh{1}, +function max_dt(u, t, mesh::TreeMesh{1}, constant_diffusivity::False, equations, equations_parabolic::AbstractEquationsParabolic, dg::DG, cache) @@ -53,7 +53,7 @@ function max_dt(backend::Nothing, u, t, mesh::TreeMesh{1}, return 4 / (nnodes(dg) * max_scaled_diffusivity) end -function max_dt(backend::Nothing, u, t, mesh::TreeMesh{1}, +function max_dt(u, t, mesh::TreeMesh{1}, constant_speed::True, equations, dg::DG, cache) # Avoid division by zero if the speed vanishes everywhere, @@ -73,7 +73,7 @@ function max_dt(backend::Nothing, u, t, mesh::TreeMesh{1}, return 2 / (nnodes(dg) * max_scaled_speed) end -function max_dt(backend::Nothing, u, t, mesh::TreeMesh, # for all dimensions +function max_dt(u, t, mesh::TreeMesh, # for all dimensions constant_diffusivity::True, equations, equations_parabolic::AbstractEquationsParabolic, dg::DG, cache) @@ -95,7 +95,7 @@ function max_dt(backend::Nothing, u, t, mesh::TreeMesh, # for all dimensions return 4 / (nnodes(dg) * max_scaled_diffusivity) end -function max_dt(backend::Nothing, u, t, mesh::StructuredMesh{1}, +function max_dt(u, t, mesh::StructuredMesh{1}, constant_speed::False, equations, dg::DG, cache) # Avoid division by zero if the speed vanishes everywhere @@ -122,7 +122,7 @@ function max_dt(backend::Nothing, u, t, mesh::StructuredMesh{1}, return 2 / (nnodes(dg) * max_scaled_speed) end -function max_dt(backend::Nothing, u, t, mesh::StructuredMesh{1}, +function max_dt(u, t, mesh::StructuredMesh{1}, constant_speed::True, equations, dg::DG, cache) # Avoid division by zero if the speed vanishes everywhere, diff --git a/src/callbacks_step/stepsize_dg2d.jl b/src/callbacks_step/stepsize_dg2d.jl index 35374ed7028..d057780a380 100644 --- a/src/callbacks_step/stepsize_dg2d.jl +++ b/src/callbacks_step/stepsize_dg2d.jl @@ -5,7 +5,7 @@ @muladd begin #! format: noindent -function max_dt(backend::Nothing, u, t, mesh::TreeMesh{2}, +function max_dt(u, t, mesh::TreeMesh{2}, constant_speed::False, equations, dg::DG, cache) # Avoid division by zero if the speed vanishes everywhere, # e.g. for steady-state linear advection @@ -29,7 +29,7 @@ function max_dt(backend::Nothing, u, t, mesh::TreeMesh{2}, return 2 / (nnodes(dg) * max_scaled_speed) end -function max_dt(backend::Nothing, u, t, mesh::TreeMesh{2}, +function max_dt(u, t, mesh::TreeMesh{2}, constant_diffusivity::False, equations, equations_parabolic::AbstractEquationsParabolic, dg::DG, cache) @@ -53,7 +53,7 @@ function max_dt(backend::Nothing, u, t, mesh::TreeMesh{2}, return 4 / (nnodes(dg) * max_scaled_speed) end -function max_dt(backend::Nothing, u, t, mesh::TreeMesh{2}, +function max_dt(u, t, mesh::TreeMesh{2}, constant_speed::True, equations, dg::DG, cache) # Avoid division by zero if the speed vanishes everywhere, # e.g. for steady-state linear advection @@ -72,34 +72,34 @@ function max_dt(backend::Nothing, u, t, mesh::TreeMesh{2}, return 2 / (nnodes(dg) * max_scaled_speed) end -function max_dt(backend::Nothing, u, t, mesh::TreeMeshParallel{2}, +function max_dt(u, t, mesh::TreeMeshParallel{2}, constant_speed::False, equations, dg::DG, cache) # call the method accepting a general `mesh::TreeMesh{2}` # TODO: MPI, we should improve this; maybe we should dispatch on `u` # and create some MPI array type, overloading broadcasting and mapreduce etc. # Then, this specific array type should also work well with DiffEq etc. dt = invoke(max_dt, - Tuple{typeof(backend), typeof(u), typeof(t), TreeMesh{2}, + Tuple{typeof(u), typeof(t), TreeMesh{2}, typeof(constant_speed), typeof(equations), typeof(dg), typeof(cache)}, - backend, u, t, mesh, constant_speed, equations, dg, cache) + u, t, mesh, constant_speed, equations, dg, cache) # Base.min instead of min needed, see comment in src/auxiliary/math.jl dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[] return dt end -function max_dt(backend::Nothing, u, t, mesh::TreeMeshParallel{2}, +function max_dt(u, t, mesh::TreeMeshParallel{2}, constant_speed::True, equations, dg::DG, cache) # call the method accepting a general `mesh::TreeMesh{2}` # TODO: MPI, we should improve this; maybe we should dispatch on `u` # and create some MPI array type, overloading broadcasting and mapreduce etc. # Then, this specific array type should also work well with DiffEq etc. dt = invoke(max_dt, - Tuple{typeof(backend), typeof(u), typeof(t), TreeMesh{2}, + Tuple{typeof(u), typeof(t), TreeMesh{2}, typeof(constant_speed), typeof(equations), typeof(dg), typeof(cache)}, - backend, u, t, mesh, constant_speed, equations, dg, cache) + u, t, mesh, constant_speed, equations, dg, cache) # Base.min instead of min needed, see comment in src/auxiliary/math.jl dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[] @@ -110,68 +110,31 @@ end # Thus, there is no `max_dt` function for `TreeMeshParallel{2}` and # `equations_parabolic::AbstractEquationsParabolic` implemented. -function max_dt(backend::Nothing, u, t, +function max_dt(u, t, mesh::Union{StructuredMesh{2}, UnstructuredMesh2D, P4estMesh{2}, T8codeMesh{2}, StructuredMeshView{2}}, - constant_speed::False, equations, dg::DG, cache) + constant_speed, equations, dg::DG, cache) + backend = trixi_backend(u) + + max_lambda = calc_max_scaled_speed(backend, u, mesh, constant_speed, equations, dg, + cache) + # Avoid division by zero if the speed vanishes everywhere, # e.g. for steady-state linear advection - max_scaled_speed = nextfloat(zero(t)) - @unpack contravariant_vectors, inverse_jacobian = cache.elements - @batch reduction=(max, max_scaled_speed) for element in eachelement(dg, cache) - max_lambda = max_scaled_speed_per_element(u, typeof(mesh), constant_speed, - equations, dg, contravariant_vectors, - inverse_jacobian, element) - # Use `Base.max` to prevent silent failures, as `max` from `@fastmath` doesn't propagate - # `NaN`s properly. See https://github.com/trixi-framework/Trixi.jl/pull/2445#discussion_r2336812323 - max_scaled_speed = Base.max(max_scaled_speed, max_lambda) - end - return 2 / (nnodes(dg) * max_scaled_speed) -end + max_scaled_speed = Base.max(nextfloat(zero(t)), max_lambda) -function max_dt(backend::Backend, u, t, - mesh::Union{StructuredMesh{2}, UnstructuredMesh2D, P4estMesh{2}, - T8codeMesh{2}, StructuredMeshView{2}}, - constant_speed::False, equations, dg::DG, cache) - @unpack contravariant_vectors, inverse_jacobian = cache.elements - num_elements = nelements(dg, cache) - max_scaled_speeds = allocate(backend, eltype(t), num_elements) - - kernel! = max_scaled_speed_KAkernel!(backend) - kernel!(max_scaled_speeds, u, typeof(mesh), constant_speed, equations, dg, - contravariant_vectors, inverse_jacobian, ndrange = num_elements) - # TODO GPU dt on CPU? (time integration happens on CPU) - max_scaled_speed = max(nextfloat(zero(t)), maximum(max_scaled_speeds)) return 2 / (nnodes(dg) * max_scaled_speed) end -# works for both constant and non-constant speed -@kernel function max_scaled_speed_KAkernel!(max_scaled_speeds, u, - mT::Type{<:Union{StructuredMesh{2}, - UnstructuredMesh2D, - P4estMesh{2}, - T8codeMesh{2}, - StructuredMeshView{2}}}, - constant_speed, equations, - dg::DG, contravariant_vectors, - inverse_jacobian) - element = @index(Global) - max_scaled_speeds[element] = max_scaled_speed_per_element(u, mT, constant_speed, - equations, dg, - contravariant_vectors, - inverse_jacobian, - element) -end - -@inline function max_scaled_speed_per_element(u, - mT::Type{<:Union{StructuredMesh{2}, - UnstructuredMesh2D, - P4estMesh{2}, - T8codeMesh{2}, - StructuredMeshView{2}}}, - constant_speed::False, equations, dg::DG, - contravariant_vectors, inverse_jacobian, - element) +@inline function max_scaled_speed_element(u, + ::Type{<:Union{StructuredMesh{2}, + UnstructuredMesh2D, + P4estMesh{2}, + T8codeMesh{2}, + StructuredMeshView{2}}}, + constant_speed::False, equations, dg::DG, + contravariant_vectors, inverse_jacobian, + element) max_lambda1 = max_lambda2 = zero(eltype(u)) for j in eachnode(dg), i in eachnode(dg) u_node = get_node_vars(u, equations, dg, i, j, element) @@ -193,28 +156,7 @@ end return max_lambda1 + max_lambda2 end -function max_dt(backend::Nothing, u, t, - mesh::Union{StructuredMesh{2}, UnstructuredMesh2D, P4estMesh{2}, - P4estMeshView{2}, T8codeMesh{2}, StructuredMeshView{2}}, - constant_speed::True, equations, dg::DG, cache) - max_scaled_speed = nextfloat(zero(t)) - - @unpack contravariant_vectors, inverse_jacobian = cache.elements - @batch reduction=(max, max_scaled_speed) for element in eachelement(dg, cache) - max_scaled_speed_loc = max_scaled_speed_per_element(u, typeof(mesh), - constant_speed, - equations, dg, - contravariant_vectors, - inverse_jacobian, element) - # Use `Base.max` to prevent silent failures, as `max` from `@fastmath` doesn't propagate - # `NaN`s properly. See https://github.com/trixi-framework/Trixi.jl/pull/2445#discussion_r2336812323 - max_scaled_speed = Base.max(max_scaled_speed, max_scaled_speed_loc) - end - - return 2 / (nnodes(dg) * max_scaled_speed) -end - -function max_dt(backend::Nothing, u, t, +function max_dt(u, t, mesh::P4estMesh{2}, # Parabolic terms currently only for `TreeMesh` and `P4estMesh` constant_diffusivity::False, equations, equations_parabolic::AbstractEquationsParabolic, @@ -260,32 +202,16 @@ function max_dt(backend::Nothing, u, t, return 4 / (nnodes(dg) * max_scaled_diffusivity) end -function max_dt(backend::Backend, u, t, - mesh::Union{StructuredMesh{2}, UnstructuredMesh2D, P4estMesh{2}, - P4estMeshView{2}, T8codeMesh{2}, StructuredMeshView{2}}, - constant_speed::True, equations, dg::DG, cache) - @unpack contravariant_vectors, inverse_jacobian = cache.elements - num_elements = nelements(dg, cache) - max_scaled_speeds = allocate(backend, eltype(t), num_elements) - - kernel! = max_scaled_speed_KAkernel!(backend) - kernel!(max_scaled_speeds, u, typeof(mesh), constant_speed, equations, dg, - contravariant_vectors, inverse_jacobian, ndrange = num_elements) - # TODO GPU dt on CPU? (time integration happens on CPU) - max_scaled_speed = max(nextfloat(zero(t)), maximum(max_scaled_speeds)) - return 2 / (nnodes(dg) * max_scaled_speed) -end - -function max_scaled_speed_per_element(u, - ::Type{<:Union{StructuredMesh{2}, - UnstructuredMesh2D, - P4estMesh{2}, - P4estMeshView{2}, - T8codeMesh{2}, - StructuredMeshView{2}}}, - constant_speed::True, equations, dg::DG, - contravariant_vectors, inverse_jacobian, - element) +@inline function max_scaled_speed_element(u, + ::Type{<:Union{StructuredMesh{2}, + UnstructuredMesh2D, + P4estMesh{2}, + P4estMeshView{2}, + T8codeMesh{2}, + StructuredMeshView{2}}}, + constant_speed::True, equations, dg::DG, + contravariant_vectors, inverse_jacobian, + element) max_scaled_speed = zero(eltype(u)) max_lambda1, max_lambda2 = max_abs_speeds(equations) for j in eachnode(dg), i in eachnode(dg) @@ -307,7 +233,7 @@ function max_scaled_speed_per_element(u, return max_scaled_speed end -function max_dt(backend::Nothing, u, t, +function max_dt(u, t, mesh::P4estMesh{2}, # Parabolic terms currently only for `TreeMesh` and `P4estMesh` constant_diffusivity::True, equations, equations_parabolic::AbstractEquationsParabolic, @@ -352,68 +278,68 @@ function max_dt(backend::Nothing, u, t, return 4 / (nnodes(dg) * max_scaled_diffusivity) end -function max_dt(backend::Nothing, u, t, mesh::P4estMeshParallel{2}, +function max_dt(u, t, mesh::P4estMeshParallel{2}, constant_speed::False, equations, dg::DG, cache) # call the method accepting a general `mesh::P4estMesh{2}` # TODO: MPI, we should improve this; maybe we should dispatch on `u` # and create some MPI array type, overloading broadcasting and mapreduce etc. # Then, this specific array type should also work well with DiffEq etc. dt = invoke(max_dt, - Tuple{typeof(backend), typeof(u), typeof(t), P4estMesh{2}, + Tuple{typeof(u), typeof(t), P4estMesh{2}, typeof(constant_speed), typeof(equations), typeof(dg), typeof(cache)}, - backend, u, t, mesh, constant_speed, equations, dg, cache) + u, t, mesh, constant_speed, equations, dg, cache) # Base.min instead of min needed, see comment in src/auxiliary/math.jl dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[] return dt end -function max_dt(backend::Nothing, u, t, mesh::P4estMeshParallel{2}, +function max_dt(u, t, mesh::P4estMeshParallel{2}, constant_speed::True, equations, dg::DG, cache) # call the method accepting a general `mesh::P4estMesh{2}` # TODO: MPI, we should improve this; maybe we should dispatch on `u` # and create some MPI array type, overloading broadcasting and mapreduce etc. # Then, this specific array type should also work well with DiffEq etc. dt = invoke(max_dt, - Tuple{typeof(backend), typeof(u), typeof(t), P4estMesh{2}, + Tuple{typeof(u), typeof(t), P4estMesh{2}, typeof(constant_speed), typeof(equations), typeof(dg), typeof(cache)}, - backend, u, t, mesh, constant_speed, equations, dg, cache) + u, t, mesh, constant_speed, equations, dg, cache) # Base.min instead of min needed, see comment in src/auxiliary/math.jl dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[] return dt end -function max_dt(backend::Nothing, u, t, mesh::T8codeMeshParallel{2}, +function max_dt(u, t, mesh::T8codeMeshParallel{2}, constant_speed::False, equations, dg::DG, cache) # call the method accepting a general `mesh::T8codeMesh{2}` # TODO: MPI, we should improve this; maybe we should dispatch on `u` # and create some MPI array type, overloading broadcasting and mapreduce etc. # Then, this specific array type should also work well with DiffEq etc. dt = invoke(max_dt, - Tuple{typeof(backend), typeof(u), typeof(t), T8codeMesh{2}, + Tuple{typeof(u), typeof(t), T8codeMesh{2}, typeof(constant_speed), typeof(equations), typeof(dg), typeof(cache)}, - backend, u, t, mesh, constant_speed, equations, dg, cache) + u, t, mesh, constant_speed, equations, dg, cache) # Base.min instead of min needed, see comment in src/auxiliary/math.jl dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[] return dt end -function max_dt(backend::Nothing, u, t, mesh::T8codeMeshParallel{2}, +function max_dt(u, t, mesh::T8codeMeshParallel{2}, constant_speed::True, equations, dg::DG, cache) # call the method accepting a general `mesh::T8codeMesh{2}` # TODO: MPI, we should improve this; maybe we should dispatch on `u` # and create some MPI array type, overloading broadcasting and mapreduce etc. # Then, this specific array type should also work well with DiffEq etc. dt = invoke(max_dt, - Tuple{typeof(backend), typeof(u), typeof(t), T8codeMesh{2}, + Tuple{typeof(u), typeof(t), T8codeMesh{2}, typeof(constant_speed), typeof(equations), typeof(dg), typeof(cache)}, - backend, u, t, mesh, constant_speed, equations, dg, cache) + u, t, mesh, constant_speed, equations, dg, cache) # Base.min instead of min needed, see comment in src/auxiliary/math.jl dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[] diff --git a/src/callbacks_step/stepsize_dg3d.jl b/src/callbacks_step/stepsize_dg3d.jl index 55645aa92b2..3ff3d7893b9 100644 --- a/src/callbacks_step/stepsize_dg3d.jl +++ b/src/callbacks_step/stepsize_dg3d.jl @@ -5,7 +5,7 @@ @muladd begin #! format: noindent -function max_dt(backend::Nothing, u, t, mesh::TreeMesh{3}, +function max_dt(u, t, mesh::TreeMesh{3}, constant_speed::False, equations, dg::DG, cache) # Avoid division by zero if the speed vanishes everywhere, # e.g. for steady-state linear advection @@ -31,7 +31,7 @@ function max_dt(backend::Nothing, u, t, mesh::TreeMesh{3}, return 2 / (nnodes(dg) * max_scaled_speed) end -function max_dt(backend::Nothing, u, t, mesh::TreeMesh{3}, +function max_dt(u, t, mesh::TreeMesh{3}, constant_diffusivity::False, equations, equations_parabolic::AbstractEquationsParabolic, dg::DG, cache) @@ -56,7 +56,7 @@ function max_dt(backend::Nothing, u, t, mesh::TreeMesh{3}, return 4 / (nnodes(dg) * max_scaled_diffusivity) end -function max_dt(backend::Nothing, u, t, mesh::TreeMesh{3}, +function max_dt(u, t, mesh::TreeMesh{3}, constant_speed::True, equations, dg::DG, cache) # Avoid division by zero if the speed vanishes everywhere, # e.g. for steady-state linear advection @@ -76,58 +76,26 @@ function max_dt(backend::Nothing, u, t, mesh::TreeMesh{3}, return 2 / (nnodes(dg) * max_scaled_speed) end -function max_dt(backend::Nothing, u, t, +function max_dt(u, t, mesh::Union{StructuredMesh{3}, P4estMesh{3}, T8codeMesh{3}}, - constant_speed::False, equations, dg::DG, cache) - # Avoid division by zero if the speed vanishes everywhere, - # e.g. for steady-state linear advection - max_scaled_speed = nextfloat(zero(t)) - - @unpack contravariant_vectors, inverse_jacobian = cache.elements - - @batch reduction=(max, max_scaled_speed) for element in eachelement(dg, cache) - max_lambda = max_scaled_speed_element(u, typeof(mesh), equations, dg, - contravariant_vectors, inverse_jacobian, - element) - # Use `Base.max` to prevent silent failures, as `max` from `@fastmath` doesn't propagate - # `NaN`s properly. See https://github.com/trixi-framework/Trixi.jl/pull/2445#discussion_r2336812323 - max_scaled_speed = Base.max(max_scaled_speed, max_lambda) - end - - return 2 / (nnodes(dg) * max_scaled_speed) -end - -function max_dt(backend::Backend, u, t, - mesh::Union{StructuredMesh{3}, P4estMesh{3}, T8codeMesh{3}}, - constant_speed::False, equations, dg::DG, cache) - @unpack contravariant_vectors, inverse_jacobian = cache.elements - num_elements = nelements(dg, cache) - max_scaled_speeds = allocate(backend, eltype(t), num_elements) + constant_speed, equations, dg::DG, cache) + backend = trixi_backend(u) - kernel! = max_scaled_speed_KAkernel!(backend) - kernel!(max_scaled_speeds, u, typeof(mesh), equations, dg, contravariant_vectors, - inverse_jacobian; - ndrange = num_elements) + max_lambda = calc_max_scaled_speed(backend, u, mesh, constant_speed, equations, dg, + cache) - # TODO GPU dt on CPU? (time integration happens on CPU) - max_scaled_speed = max(nextfloat(zero(t)), maximum(max_scaled_speeds)) + # Avoid division by zero if the speed vanishes everywhere, + # e.g. for steady-state linear advection + max_scaled_speed = Base.max(nextfloat(zero(t)), max_lambda) return 2 / (nnodes(dg) * max_scaled_speed) end -@kernel function max_scaled_speed_KAkernel!(max_scaled_speeds, u, meshT, equations, - dg, contravariant_vectors, inverse_jacobian) - element = @index(Global) - max_scaled_speeds[element] = max_scaled_speed_element(u, meshT, equations, dg, - contravariant_vectors, - inverse_jacobian, - element) -end - @inline function max_scaled_speed_element(u, ::Type{<:Union{StructuredMesh{3}, P4estMesh{3}, - T8codeMesh{3}}}, equations, dg, + T8codeMesh{3}}}, + constant_speed::False, equations, dg, contravariant_vectors, inverse_jacobian, element) max_lambda1 = max_lambda2 = max_lambda3 = zero(eltype(u)) @@ -154,7 +122,7 @@ end return max_lambda1 + max_lambda2 + max_lambda3 end -function max_dt(backend::Nothing, u, t, +function max_dt(u, t, mesh::P4estMesh{3}, # Parabolic terms currently only for `TreeMesh` and `P4estMesh` constant_diffusivity::False, equations, equations_parabolic::AbstractEquationsParabolic, @@ -206,52 +174,43 @@ function max_dt(backend::Nothing, u, t, return 4 / (nnodes(dg) * max_scaled_diffusivity) end -function max_dt(backend, u, t, - mesh::Union{StructuredMesh{3}, P4estMesh{3}, T8codeMesh{3}}, - constant_speed::True, equations, dg::DG, cache) - # Avoid division by zero if the speed vanishes everywhere, - # e.g. for steady-state linear advection - max_scaled_speed = nextfloat(zero(t)) - - @unpack contravariant_vectors, inverse_jacobian = cache.elements - if backend !== nothing - # TODO: Port to GPU - contravariant_vectors = Array(cache.elements.contravariant_vectors) - inverse_jacobian = Array(cache.elements.inverse_jacobian) - end - +@inline function max_scaled_speed_element(u, + ::Type{<:Union{StructuredMesh{3}, + P4estMesh{3}, + T8codeMesh{3}}}, + constant_speed::True, equations, dg::DG, + contravariant_vectors, inverse_jacobian, + element) + max_scaled_speed = zero(eltype(u)) max_lambda1, max_lambda2, max_lambda3 = max_abs_speeds(equations) + for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) + Ja11, Ja12, Ja13 = get_contravariant_vector(1, contravariant_vectors, + i, j, k, element) + lambda1_transformed = abs(Ja11 * max_lambda1 + Ja12 * max_lambda2 + + Ja13 * max_lambda3) + Ja21, Ja22, Ja23 = get_contravariant_vector(2, contravariant_vectors, + i, j, k, element) + lambda2_transformed = abs(Ja21 * max_lambda1 + Ja22 * max_lambda2 + + Ja23 * max_lambda3) + Ja31, Ja32, Ja33 = get_contravariant_vector(3, contravariant_vectors, + i, j, k, element) + lambda3_transformed = abs(Ja31 * max_lambda1 + Ja32 * max_lambda2 + + Ja33 * max_lambda3) - @batch reduction=(max, max_scaled_speed) for element in eachelement(dg, cache) - for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) - Ja11, Ja12, Ja13 = get_contravariant_vector(1, contravariant_vectors, - i, j, k, element) - lambda1_transformed = abs(Ja11 * max_lambda1 + Ja12 * max_lambda2 + - Ja13 * max_lambda3) - Ja21, Ja22, Ja23 = get_contravariant_vector(2, contravariant_vectors, - i, j, k, element) - lambda2_transformed = abs(Ja21 * max_lambda1 + Ja22 * max_lambda2 + - Ja23 * max_lambda3) - Ja31, Ja32, Ja33 = get_contravariant_vector(3, contravariant_vectors, - i, j, k, element) - lambda3_transformed = abs(Ja31 * max_lambda1 + Ja32 * max_lambda2 + - Ja33 * max_lambda3) - - inv_jacobian = abs(inverse_jacobian[i, j, k, element]) + inv_jacobian = abs(inverse_jacobian[i, j, k, element]) - # Use `Base.max` to prevent silent failures, as `max` from `@fastmath` doesn't propagate - # `NaN`s properly. See https://github.com/trixi-framework/Trixi.jl/pull/2445#discussion_r2336812323 - max_scaled_speed = Base.max(max_scaled_speed, - inv_jacobian * - (lambda1_transformed + lambda2_transformed + - lambda3_transformed)) - end + # Use `Base.max` to prevent silent failures, as `max` from `@fastmath` doesn't propagate + # `NaN`s properly. See https://github.com/trixi-framework/Trixi.jl/pull/2445#discussion_r2336812323 + max_scaled_speed = Base.max(max_scaled_speed, + inv_jacobian * + (lambda1_transformed + lambda2_transformed + + lambda3_transformed)) end - return 2 / (nnodes(dg) * max_scaled_speed) + return max_scaled_speed end -function max_dt(backend::Nothing, u, t, +function max_dt(u, t, mesh::P4estMesh{3}, # Parabolic terms currently only for `TreeMesh` and `P4estMesh` constant_diffusivity::True, equations, equations_parabolic::AbstractEquationsParabolic, @@ -302,68 +261,68 @@ function max_dt(backend::Nothing, u, t, return 4 / (nnodes(dg) * max_scaled_diffusivity) end -function max_dt(backend::Nothing, u, t, mesh::P4estMeshParallel{3}, +function max_dt(u, t, mesh::P4estMeshParallel{3}, constant_speed::False, equations, dg::DG, cache) # call the method accepting a general `mesh::P4estMesh{3}` # TODO: MPI, we should improve this; maybe we should dispatch on `u` # and create some MPI array type, overloading broadcasting and mapreduce etc. # Then, this specific array type should also work well with DiffEq etc. dt = invoke(max_dt, - Tuple{typeof(backend), typeof(u), typeof(t), P4estMesh{3}, + Tuple{typeof(u), typeof(t), P4estMesh{3}, typeof(constant_speed), typeof(equations), typeof(dg), typeof(cache)}, - backend, u, t, mesh, constant_speed, equations, dg, cache) + u, t, mesh, constant_speed, equations, dg, cache) # Base.min instead of min needed, see comment in src/auxiliary/math.jl dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[] return dt end -function max_dt(backend::Nothing, u, t, mesh::P4estMeshParallel{3}, +function max_dt(u, t, mesh::P4estMeshParallel{3}, constant_speed::True, equations, dg::DG, cache) # call the method accepting a general `mesh::P4estMesh{3}` # TODO: MPI, we should improve this; maybe we should dispatch on `u` # and create some MPI array type, overloading broadcasting and mapreduce etc. # Then, this specific array type should also work well with DiffEq etc. dt = invoke(max_dt, - Tuple{typeof(backend), typeof(u), typeof(t), P4estMesh{3}, + Tuple{typeof(u), typeof(t), P4estMesh{3}, typeof(constant_speed), typeof(equations), typeof(dg), typeof(cache)}, - backend, u, t, mesh, constant_speed, equations, dg, cache) + u, t, mesh, constant_speed, equations, dg, cache) # Base.min instead of min needed, see comment in src/auxiliary/math.jl dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[] return dt end -function max_dt(backend::Nothing, u, t, mesh::T8codeMeshParallel{3}, +function max_dt(u, t, mesh::T8codeMeshParallel{3}, constant_speed::False, equations, dg::DG, cache) # call the method accepting a general `mesh::T8codeMesh{3}` # TODO: MPI, we should improve this; maybe we should dispatch on `u` # and create some MPI array type, overloading broadcasting and mapreduce etc. # Then, this specific array type should also work well with DiffEq etc. dt = invoke(max_dt, - Tuple{typeof(backend), typeof(u), typeof(t), T8codeMesh{3}, + Tuple{typeof(u), typeof(t), T8codeMesh{3}, typeof(constant_speed), typeof(equations), typeof(dg), typeof(cache)}, - backend, u, t, mesh, constant_speed, equations, dg, cache) + u, t, mesh, constant_speed, equations, dg, cache) # Base.min instead of min needed, see comment in src/auxiliary/math.jl dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[] return dt end -function max_dt(backend::Nothing, u, t, mesh::T8codeMeshParallel{3}, +function max_dt(u, t, mesh::T8codeMeshParallel{3}, constant_speed::True, equations, dg::DG, cache) # call the method accepting a general `mesh::T8codeMesh{3}` # TODO: MPI, we should improve this; maybe we should dispatch on `u` # and create some MPI array type, overloading broadcasting and mapreduce etc. # Then, this specific array type should also work well with DiffEq etc. dt = invoke(max_dt, - Tuple{typeof(backend), typeof(u), typeof(t), T8codeMesh{3}, + Tuple{typeof(u), typeof(t), T8codeMesh{3}, typeof(constant_speed), typeof(equations), typeof(dg), typeof(cache)}, - backend, u, t, mesh, constant_speed, equations, dg, cache) + u, t, mesh, constant_speed, equations, dg, cache) # Base.min instead of min needed, see comment in src/auxiliary/math.jl dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[] diff --git a/src/time_integration/paired_explicit_runge_kutta/paired_explicit_runge_kutta.jl b/src/time_integration/paired_explicit_runge_kutta/paired_explicit_runge_kutta.jl index 414a016bd0d..5de2164fa3e 100644 --- a/src/time_integration/paired_explicit_runge_kutta/paired_explicit_runge_kutta.jl +++ b/src/time_integration/paired_explicit_runge_kutta/paired_explicit_runge_kutta.jl @@ -58,9 +58,8 @@ function calculate_cfl(ode_algorithm::AbstractPairedExplicitRK, ode) mesh, equations, solver, cache = mesh_equations_solver_cache(semi) u = wrap_array(u_ode, mesh, equations, solver, cache) - backend = trixi_backend(u_ode) - cfl_number = dt_opt / max_dt(backend, u, t0, mesh, + cfl_number = dt_opt / max_dt(u, t0, mesh, have_constant_speed(equations), equations, solver, cache) return cfl_number From fbe217154e06e6cc91513af705a3f62900f31b04 Mon Sep 17 00:00:00 2001 From: Benedict Geihe Date: Thu, 19 Mar 2026 00:01:00 +0100 Subject: [PATCH 116/158] here as well --- .../semidiscretization_euler_gravity.jl | 3 +-- src/solvers/dgmulti/dg.jl | 8 ++++---- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/src/semidiscretization/semidiscretization_euler_gravity.jl b/src/semidiscretization/semidiscretization_euler_gravity.jl index 161af1b079b..661fcdfd5ad 100644 --- a/src/semidiscretization/semidiscretization_euler_gravity.jl +++ b/src/semidiscretization/semidiscretization_euler_gravity.jl @@ -301,7 +301,6 @@ function update_gravity!(semi::SemidiscretizationEulerGravity, u_ode) u_euler = wrap_array(u_ode, semi_euler) u_gravity = wrap_array(cache.u_ode, semi_gravity) du_gravity = wrap_array(cache.du_ode, semi_gravity) - backend = trixi_backend(u_ode) # set up main loop finalstep = false @@ -313,7 +312,7 @@ function update_gravity!(semi::SemidiscretizationEulerGravity, u_ode) @unpack equations = semi_gravity while !finalstep dtau = @trixi_timeit timer() "calculate dtau" begin - cfl * max_dt(backend, u_gravity, tau, semi_gravity.mesh, + cfl * max_dt(u_gravity, tau, semi_gravity.mesh, have_constant_speed(equations), equations, semi_gravity.solver, semi_gravity.cache) end diff --git a/src/solvers/dgmulti/dg.jl b/src/solvers/dgmulti/dg.jl index 14394e2c664..3d7aba9536f 100644 --- a/src/solvers/dgmulti/dg.jl +++ b/src/solvers/dgmulti/dg.jl @@ -257,7 +257,7 @@ function dt_polydeg_scaling(dg::DGMulti{3, <:Wedge, <:TensorProductWedge}) end # for the stepsize callback -function max_dt(backend::Nothing, u, t, mesh::DGMultiMesh, +function max_dt(u, t, mesh::DGMultiMesh, constant_diffusivity::False, equations, equations_parabolic::AbstractEquationsParabolic, dg::DGMulti{NDIMS}, @@ -286,7 +286,7 @@ function max_dt(backend::Nothing, u, t, mesh::DGMultiMesh, return 2 * dt_min * dt_polydeg_scaling(dg) end -function max_dt(backend::Nothing, u, t, mesh::DGMultiMesh, +function max_dt(u, t, mesh::DGMultiMesh, constant_diffusivity::True, equations, equations_parabolic::AbstractEquationsParabolic, dg::DGMulti{NDIMS}, @@ -315,7 +315,7 @@ function max_dt(backend::Nothing, u, t, mesh::DGMultiMesh, end # for the stepsize callback -function max_dt(backend, u, t, mesh::DGMultiMesh, +function max_dt(u, t, mesh::DGMultiMesh, constant_speed::False, equations, dg::DGMulti{NDIMS}, cache) where {NDIMS} @unpack md = mesh @@ -338,7 +338,7 @@ function max_dt(backend, u, t, mesh::DGMultiMesh, return 2 * dt_min * dt_polydeg_scaling(dg) end -function max_dt(backend, u, t, mesh::DGMultiMesh, +function max_dt(u, t, mesh::DGMultiMesh, constant_speed::True, equations, dg::DGMulti{NDIMS}, cache) where {NDIMS} @unpack md = mesh From f344f659f9a40e0090c435f44db7e68cf3a6722d Mon Sep 17 00:00:00 2001 From: Benedict Geihe Date: Thu, 19 Mar 2026 00:01:33 +0100 Subject: [PATCH 117/158] fix --- .../semidiscretization_hyperbolic_parabolic.jl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/semidiscretization/semidiscretization_hyperbolic_parabolic.jl b/src/semidiscretization/semidiscretization_hyperbolic_parabolic.jl index cab2626783c..c730439017c 100644 --- a/src/semidiscretization/semidiscretization_hyperbolic_parabolic.jl +++ b/src/semidiscretization/semidiscretization_hyperbolic_parabolic.jl @@ -369,11 +369,10 @@ function rhs!(du_ode, u_ode, semi::SemidiscretizationHyperbolicParabolic, t) u = wrap_array(u_ode, mesh, equations, solver, cache) du = wrap_array(du_ode, mesh, equations, solver, cache) - backend = trixi_backend(u_ode) # TODO: Taal decide, do we need to pass the mesh? time_start = time_ns() - @trixi_timeit timer() "rhs!" rhs!(backend, du, u, t, mesh, equations, + @trixi_timeit timer() "rhs!" rhs!(du, u, t, mesh, equations, boundary_conditions, source_terms, solver, cache) runtime = time_ns() - time_start put!(semi.performance_counter.counters[1], runtime) From a3eb8c8a90f426d79c15f468324e36fad7db6153 Mon Sep 17 00:00:00 2001 From: Benedict Geihe Date: Thu, 19 Mar 2026 00:01:40 +0100 Subject: [PATCH 118/158] add old method signatures to stay compatible with TrixiAtmo.jl --- src/solvers/dgsem/calc_volume_integral.jl | 9 +++++++++ src/solvers/dgsem_p4est/dg_2d.jl | 19 +++++++++++++++++++ src/solvers/dgsem_structured/dg_2d.jl | 15 +++++++++++++++ 3 files changed, 43 insertions(+) diff --git a/src/solvers/dgsem/calc_volume_integral.jl b/src/solvers/dgsem/calc_volume_integral.jl index 9a7865948dc..9a93e7b0a7f 100644 --- a/src/solvers/dgsem/calc_volume_integral.jl +++ b/src/solvers/dgsem/calc_volume_integral.jl @@ -177,6 +177,15 @@ end return nothing end +function calc_volume_integral!(du, u, mesh, have_nonconservative_terms, equations, + volume_integral, dg::DGSEM, cache) + @threaded for element in eachelement(dg, cache) + volume_integral_kernel!(du, u, element, mesh, + have_nonconservative_terms, equations, + volume_integral, dg, cache) + end +end + function calc_volume_integral!(backend::Nothing, du, u, mesh, have_nonconservative_terms, equations, volume_integral, dg::DGSEM, cache) diff --git a/src/solvers/dgsem_p4est/dg_2d.jl b/src/solvers/dgsem_p4est/dg_2d.jl index a70a37c46d7..caf86da9308 100644 --- a/src/solvers/dgsem_p4est/dg_2d.jl +++ b/src/solvers/dgsem_p4est/dg_2d.jl @@ -62,6 +62,11 @@ end end end +# DEPRECATED! Remove when TrixiAtmo.jl has been adapted +function prolong2interfaces!(cache, u, mesh, equations, dg::DG) + prolong2interfaces!(nothing, cache, u, mesh, equations, dg) +end + function prolong2interfaces!(backend::Nothing, cache, u, mesh::Union{P4estMesh{2}, P4estMeshView{2}, T8codeMesh{2}}, @@ -154,6 +159,15 @@ end return nothing end +# DEPRECATED! Remove when TrixiAtmo.jl has been adapted +function calc_interface_flux!(surface_flux_values, + mesh::Union{P4estMesh{2}, P4estMeshView{2}, + T8codeMesh{2}}, have_nonconservative_terms, + equations, surface_integral, dg::DG, cache) + calc_interface_flux!(nothing, surface_flux_values, mesh, have_nonconservative_terms, + equations, surface_integral, dg, cache) +end + function calc_interface_flux!(backend::Nothing, surface_flux_values, mesh::Union{P4estMesh{2}, P4estMeshView{2}, T8codeMesh{2}}, @@ -902,6 +916,11 @@ end return nothing end +# DEPRECATED! Remove when TrixiAtmo.jl has been adapted +function calc_surface_integral!(du, u, mesh, equations, surface_integral, dg, cache) + calc_surface_integral!(nothing, du, u, mesh, equations, surface_integral, dg, cache) +end + function calc_surface_integral!(backend::Nothing, du, u, mesh::Union{P4estMesh{2}, P4estMeshView{2}, T8codeMesh{2}}, diff --git a/src/solvers/dgsem_structured/dg_2d.jl b/src/solvers/dgsem_structured/dg_2d.jl index c2859434239..e88e8504bee 100644 --- a/src/solvers/dgsem_structured/dg_2d.jl +++ b/src/solvers/dgsem_structured/dg_2d.jl @@ -70,6 +70,13 @@ See also https://github.com/trixi-framework/Trixi.jl/issues/1671#issuecomment-17 return nothing end +# DEPRECATED! Remove when TrixiAtmo.jl has been adapted +function flux_differencing_kernel!(du, u, element, mesh, nonconservative_terms, + equations, volume_flux, dg::DGSEM, cache, alpha) + flux_differencing_kernel!(du, u, element, typeof(mesh), nonconservative_terms, + equations, volume_flux, dg, cache, alpha) +end + @inline function flux_differencing_kernel!(du, u, element, ::Type{<:Union{StructuredMesh{2}, StructuredMeshView{2}, @@ -743,6 +750,14 @@ function calc_boundary_flux!(cache, t, boundary_conditions::NamedTuple, return nothing end +# DEPRECATED! Remove when TrixiAtmo.jl has been adapted +function apply_jacobian!(du, + mesh::Union{StructuredMesh{2}, StructuredMeshView{2}, + UnstructuredMesh2D, P4estMesh{2}, P4estMeshView{2}, + T8codeMesh{2}}, equations, dg::DG, cache) + apply_jacobian!(nothing, du, mesh, equations, dg, cache) +end + function apply_jacobian!(backend::Nothing, du, mesh::Union{StructuredMesh{2}, StructuredMeshView{2}, UnstructuredMesh2D, P4estMesh{2}, P4estMeshView{2}, From 04e0e2bf92d1734a20a424b2122074a3cdb28744 Mon Sep 17 00:00:00 2001 From: Benedict Geihe Date: Thu, 19 Mar 2026 14:31:22 +0100 Subject: [PATCH 119/158] fix --- src/callbacks_step/stepsize.jl | 2 +- src/callbacks_step/stepsize_dg2d.jl | 2 +- src/solvers/dgsem/calc_volume_integral.jl | 1 + 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/callbacks_step/stepsize.jl b/src/callbacks_step/stepsize.jl index 2a17022c358..2133a0ebe5f 100644 --- a/src/callbacks_step/stepsize.jl +++ b/src/callbacks_step/stepsize.jl @@ -206,7 +206,7 @@ function calc_max_scaled_speed(backend::Backend, u, mesh, constant_speed, equati @unpack contravariant_vectors, inverse_jacobian = cache.elements num_elements = nelements(dg, cache) - max_scaled_speeds = allocate(backend, eltype(t), num_elements) + max_scaled_speeds = allocate(backend, eltype(u), num_elements) kernel! = max_scaled_speed_KAkernel!(backend) kernel!(max_scaled_speeds, u, typeof(mesh), constant_speed, equations, dg, diff --git a/src/callbacks_step/stepsize_dg2d.jl b/src/callbacks_step/stepsize_dg2d.jl index d057780a380..476c43018b5 100644 --- a/src/callbacks_step/stepsize_dg2d.jl +++ b/src/callbacks_step/stepsize_dg2d.jl @@ -112,7 +112,7 @@ end function max_dt(u, t, mesh::Union{StructuredMesh{2}, UnstructuredMesh2D, P4estMesh{2}, - T8codeMesh{2}, StructuredMeshView{2}}, + P4estMeshView{2}, T8codeMesh{2}, StructuredMeshView{2}}, constant_speed, equations, dg::DG, cache) backend = trixi_backend(u) diff --git a/src/solvers/dgsem/calc_volume_integral.jl b/src/solvers/dgsem/calc_volume_integral.jl index 9a93e7b0a7f..e5d75fc1388 100644 --- a/src/solvers/dgsem/calc_volume_integral.jl +++ b/src/solvers/dgsem/calc_volume_integral.jl @@ -177,6 +177,7 @@ end return nothing end +# DEPRECATED! Remove when TrixiAtmo.jl has been adapted function calc_volume_integral!(du, u, mesh, have_nonconservative_terms, equations, volume_integral, dg::DGSEM, cache) @threaded for element in eachelement(dg, cache) From 6a95f55b45846752d35b0aa13c8879fcf0897545 Mon Sep 17 00:00:00 2001 From: Benedict Geihe Date: Tue, 24 Mar 2026 09:56:32 +0100 Subject: [PATCH 120/158] meshT -> MeshT --- src/callbacks_step/analysis_dg1d.jl | 4 +- src/callbacks_step/analysis_dg2d.jl | 4 +- src/callbacks_step/analysis_dg3d.jl | 4 +- src/callbacks_step/stepsize.jl | 4 +- src/solvers/dgsem/calc_volume_integral.jl | 42 +++++++++---------- src/solvers/dgsem_p4est/dg_2d.jl | 4 +- src/solvers/dgsem_p4est/dg_3d.jl | 20 ++++----- src/solvers/dgsem_structured/dg_2d.jl | 8 ++-- src/solvers/dgsem_structured/dg_3d.jl | 12 +++--- src/solvers/dgsem_tree/dg_1d.jl | 12 +++--- src/solvers/dgsem_tree/dg_2d.jl | 12 +++--- .../dgsem_tree/dg_2d_subcell_limiters.jl | 8 ++-- src/solvers/dgsem_tree/dg_3d.jl | 12 +++--- .../dgsem_tree/dg_3d_subcell_limiters.jl | 8 ++-- 14 files changed, 77 insertions(+), 77 deletions(-) diff --git a/src/callbacks_step/analysis_dg1d.jl b/src/callbacks_step/analysis_dg1d.jl index 83bd9746848..b098b89d7e5 100644 --- a/src/callbacks_step/analysis_dg1d.jl +++ b/src/callbacks_step/analysis_dg1d.jl @@ -143,9 +143,9 @@ end # Calculate ∫_e (∂S/∂u ⋅ ∂u/∂t) dΩ_e where the result on element 'e' is kept in reference space # Note that ∂S/∂u = w(u) with entropy variables w function entropy_change_reference_element(du, u, element, - meshT::Type{<:AbstractMesh{1}}, + MeshT::Type{<:AbstractMesh{1}}, equations, dg::DGSEM, cache, args...) - return integrate_reference_element(u, element, meshT, equations, dg, cache, + return integrate_reference_element(u, element, MeshT, equations, dg, cache, du) do u, i, element, equations, dg, du u_node = get_node_vars(u, equations, dg, i, element) du_node = get_node_vars(du, equations, dg, i, element) diff --git a/src/callbacks_step/analysis_dg2d.jl b/src/callbacks_step/analysis_dg2d.jl index 5fc6d50e543..0a4c85188d1 100644 --- a/src/callbacks_step/analysis_dg2d.jl +++ b/src/callbacks_step/analysis_dg2d.jl @@ -219,9 +219,9 @@ end # Calculate ∫_e (∂S/∂u ⋅ ∂u/∂t) dΩ_e where the result on element 'e' is kept in reference space # Note that ∂S/∂u = w(u) with entropy variables w function entropy_change_reference_element(du, u, element, - meshT::Type{<:AbstractMesh{2}}, + MeshT::Type{<:AbstractMesh{2}}, equations, dg::DGSEM, cache, args...) - return integrate_reference_element(u, element, meshT, equations, dg, cache, + return integrate_reference_element(u, element, MeshT, equations, dg, cache, du) do u, i, j, element, equations, dg, du u_node = get_node_vars(u, equations, dg, i, j, element) du_node = get_node_vars(du, equations, dg, i, j, element) diff --git a/src/callbacks_step/analysis_dg3d.jl b/src/callbacks_step/analysis_dg3d.jl index 2578fa7174a..2b98c9fd51e 100644 --- a/src/callbacks_step/analysis_dg3d.jl +++ b/src/callbacks_step/analysis_dg3d.jl @@ -243,9 +243,9 @@ end # Calculate ∫_e (∂S/∂u ⋅ ∂u/∂t) dΩ_e where the result on element 'e' is kept in reference space # Note that ∂S/∂u = w(u) with entropy variables w function entropy_change_reference_element(du, u, element, - meshT::Type{<:AbstractMesh{3}}, + MeshT::Type{<:AbstractMesh{3}}, equations, dg::DGSEM, cache, args...) - return integrate_reference_element(u, element, meshT, equations, dg, cache, + return integrate_reference_element(u, element, MeshT, equations, dg, cache, du) do u, i, j, k, element, equations, dg, du u_node = get_node_vars(u, equations, dg, i, j, k, element) du_node = get_node_vars(du, equations, dg, i, j, k, element) diff --git a/src/callbacks_step/stepsize.jl b/src/callbacks_step/stepsize.jl index 2133a0ebe5f..46ddd95304f 100644 --- a/src/callbacks_step/stepsize.jl +++ b/src/callbacks_step/stepsize.jl @@ -217,11 +217,11 @@ function calc_max_scaled_speed(backend::Backend, u, mesh, constant_speed, equati return maximum(max_scaled_speeds) end -@kernel function max_scaled_speed_KAkernel!(max_scaled_speeds, u, meshT, constant_speed, +@kernel function max_scaled_speed_KAkernel!(max_scaled_speeds, u, MeshT, constant_speed, equations, dg, contravariant_vectors, inverse_jacobian) element = @index(Global) - max_scaled_speeds[element] = max_scaled_speed_element(u, meshT, constant_speed, + max_scaled_speeds[element] = max_scaled_speed_element(u, MeshT, constant_speed, equations, dg, contravariant_vectors, inverse_jacobian, diff --git a/src/solvers/dgsem/calc_volume_integral.jl b/src/solvers/dgsem/calc_volume_integral.jl index e5d75fc1388..0f286d90ec3 100644 --- a/src/solvers/dgsem/calc_volume_integral.jl +++ b/src/solvers/dgsem/calc_volume_integral.jl @@ -8,44 +8,44 @@ # The following `volume_integral_kernel!` and `calc_volume_integral!` functions are # dimension and meshtype agnostic, i.e., valid for all 1D, 2D, and 3D meshes. -@inline function volume_integral_kernel!(du, u, element, meshT, +@inline function volume_integral_kernel!(du, u, element, MeshT, have_nonconservative_terms, equations, volume_integral::VolumeIntegralWeakForm, dg, cache, alpha = true) - weak_form_kernel!(du, u, element, meshT, + weak_form_kernel!(du, u, element, MeshT, have_nonconservative_terms, equations, dg, cache, alpha) return nothing end -@inline function volume_integral_kernel!(du, u, element, meshT, +@inline function volume_integral_kernel!(du, u, element, MeshT, have_nonconservative_terms, equations, volume_integral::VolumeIntegralFluxDifferencing, dg, cache, alpha = true) @unpack volume_flux = volume_integral # Volume integral specific data - flux_differencing_kernel!(du, u, element, meshT, + flux_differencing_kernel!(du, u, element, MeshT, have_nonconservative_terms, equations, volume_flux, dg, cache, alpha) return nothing end -@inline function volume_integral_kernel!(du, u, element, meshT, +@inline function volume_integral_kernel!(du, u, element, MeshT, have_nonconservative_terms, equations, volume_integral::VolumeIntegralPureLGLFiniteVolume, dg::DGSEM, cache, alpha = true) @unpack volume_flux_fv = volume_integral # Volume integral specific data - fv_kernel!(du, u, meshT, + fv_kernel!(du, u, MeshT, have_nonconservative_terms, equations, volume_flux_fv, dg, cache, element, alpha) return nothing end -@inline function volume_integral_kernel!(du, u, element, meshT, +@inline function volume_integral_kernel!(du, u, element, MeshT, have_nonconservative_terms, equations, volume_integral::VolumeIntegralPureLGLFiniteVolumeO2, dg::DGSEM, cache, alpha = true) @@ -53,7 +53,7 @@ end @unpack (sc_interface_coords, volume_flux_fv, reconstruction_mode, slope_limiter, cons2recon, recon2cons) = volume_integral - fvO2_kernel!(du, u, meshT, + fvO2_kernel!(du, u, MeshT, have_nonconservative_terms, equations, volume_flux_fv, dg, cache, element, sc_interface_coords, reconstruction_mode, slope_limiter, @@ -63,14 +63,14 @@ end return nothing end -@inline function volume_integral_kernel!(du, u, element, meshT, +@inline function volume_integral_kernel!(du, u, element, MeshT, have_nonconservative_terms, equations, volume_integral::VolumeIntegralAdaptive{<:IndicatorEntropyChange}, dg::DGSEM, cache) @unpack volume_integral_default, volume_integral_stabilized, indicator = volume_integral @unpack maximum_entropy_increase = indicator - volume_integral_kernel!(du, u, element, meshT, + volume_integral_kernel!(du, u, element, MeshT, have_nonconservative_terms, equations, volume_integral_default, dg, cache) @@ -79,11 +79,11 @@ end # No scaling by inverse Jacobian here, as there is no Jacobian multiplication # in `integrate_reference_element`. dS_default = -entropy_change_reference_element(du, u, element, - meshT, equations, dg, cache) + MeshT, equations, dg, cache) # Compute true entropy change given by surface integral of the entropy potential dS_true = surface_integral_reference_element(entropy_potential, u, element, - meshT, equations, dg, cache) + MeshT, equations, dg, cache) entropy_change = dS_default - dS_true if entropy_change > maximum_entropy_increase # Recompute using EC FD volume integral @@ -92,7 +92,7 @@ end # before any surface terms are added. du[.., element] .= zero(eltype(du)) - volume_integral_kernel!(du, u, element, meshT, + volume_integral_kernel!(du, u, element, MeshT, have_nonconservative_terms, equations, volume_integral_stabilized, dg, cache) end @@ -100,7 +100,7 @@ end return nothing end -@inline function volume_integral_kernel!(du, u, element, meshT, +@inline function volume_integral_kernel!(du, u, element, MeshT, have_nonconservative_terms, equations, volume_integral::VolumeIntegralEntropyCorrection, dg::DGSEM, cache) @@ -110,7 +110,7 @@ end du_element_threaded = indicator.cache.volume_integral_values_threaded # run default volume integral - volume_integral_kernel!(du, u, element, meshT, + volume_integral_kernel!(du, u, element, MeshT, have_nonconservative_terms, equations, volume_integral_default, dg, cache) @@ -125,12 +125,12 @@ end # No scaling by inverse Jacobian here, as there is no Jacobian multiplication # in `integrate_reference_element`. dS_volume_integral = -entropy_change_reference_element(du, u, element, - meshT, equations, + MeshT, equations, dg, cache) # Compute true entropy change given by surface integral of the entropy potential dS_true = surface_integral_reference_element(entropy_potential, u, element, - meshT, equations, dg, cache) + MeshT, equations, dg, cache) # This quantity should be ≤ 0 for an entropy stable volume integral, and # exactly zero for an entropy conservative volume integral. @@ -147,13 +147,13 @@ end du[.., element] .= zero(eltype(du)) # Calculate entropy stable volume integral contribution - volume_integral_kernel!(du, u, element, meshT, + volume_integral_kernel!(du, u, element, MeshT, have_nonconservative_terms, equations, volume_integral_stabilized, dg, cache) dS_volume_integral_stabilized = -entropy_change_reference_element(du, u, element, - meshT, + MeshT, equations, dg, cache) @@ -211,11 +211,11 @@ function calc_volume_integral!(backend::Backend, du, u, mesh, return nothing end -@kernel function volume_integral_KAkernel!(du, u, meshT, +@kernel function volume_integral_KAkernel!(du, u, MeshT, have_nonconservative_terms, equations, volume_integral, dg::DGSEM, cache) element = @index(Global) - volume_integral_kernel!(du, u, element, meshT, have_nonconservative_terms, + volume_integral_kernel!(du, u, element, MeshT, have_nonconservative_terms, equations, volume_integral, dg, cache) end diff --git a/src/solvers/dgsem_p4est/dg_2d.jl b/src/solvers/dgsem_p4est/dg_2d.jl index caf86da9308..64578ce5f66 100644 --- a/src/solvers/dgsem_p4est/dg_2d.jl +++ b/src/solvers/dgsem_p4est/dg_2d.jl @@ -325,7 +325,7 @@ end # Inlined version of the interface flux computation for equations with conservative and nonconservative terms @inline function calc_interface_flux!(surface_flux_values, - meshT::Type{<:Union{P4estMesh{2}, T8codeMesh{2}}}, + MeshT::Type{<:Union{P4estMesh{2}, T8codeMesh{2}}}, have_nonconservative_terms::True, equations, surface_integral, st::Type{<:DG}, u_interface, interface_index, @@ -336,7 +336,7 @@ end secondary_direction_index, secondary_element_index) @unpack surface_flux = surface_integral - calc_interface_flux!(surface_flux_values, meshT, have_nonconservative_terms, + calc_interface_flux!(surface_flux_values, MeshT, have_nonconservative_terms, combine_conservative_and_nonconservative_fluxes(surface_flux, equations), equations, diff --git a/src/solvers/dgsem_p4est/dg_3d.jl b/src/solvers/dgsem_p4est/dg_3d.jl index 07c6b1b6a0b..f23b1b9b6c6 100644 --- a/src/solvers/dgsem_p4est/dg_3d.jl +++ b/src/solvers/dgsem_p4est/dg_3d.jl @@ -120,10 +120,10 @@ function prolong2interfaces!(backend::Backend, cache, u, return nothing end -@kernel function prolong2interfaces_KAkernel!(interface_u, u, meshT, equations, +@kernel function prolong2interfaces_KAkernel!(interface_u, u, MeshT, equations, neighbor_ids, node_indices, index_range) interface = @index(Global) - prolong2interfaces_interface!(interface_u, u, meshT, equations, neighbor_ids, + prolong2interfaces_interface!(interface_u, u, MeshT, equations, neighbor_ids, node_indices, index_range, interface) end @@ -236,14 +236,14 @@ function calc_interface_flux!(backend::Backend, surface_flux_values, return nothing end -@kernel function calc_interface_flux_KAkernel!(surface_flux_values, meshT, +@kernel function calc_interface_flux_KAkernel!(surface_flux_values, MeshT, have_nonconservative_terms, equations, surface_integral, solverT, u_interface, neighbor_ids, node_indices, contravariant_vectors, index_range) interface = @index(Global) calc_interface_flux_interface!(surface_flux_values, - meshT, + MeshT, have_nonconservative_terms, equations, surface_integral, solverT, u_interface, neighbor_ids, node_indices, contravariant_vectors, @@ -251,7 +251,7 @@ end end @inline function calc_interface_flux_interface!(surface_flux_values, - meshT::Type{<:Union{P4estMesh{3}, + MeshT::Type{<:Union{P4estMesh{3}, T8codeMesh{3}}}, have_nonconservative_terms, equations, surface_integral, @@ -301,7 +301,7 @@ end i_primary, j_primary, k_primary, primary_element) - calc_interface_flux!(surface_flux_values, meshT, have_nonconservative_terms, + calc_interface_flux!(surface_flux_values, MeshT, have_nonconservative_terms, equations, surface_integral, solverT, u_interface, interface, normal_direction, @@ -360,7 +360,7 @@ end # Inlined function for interface flux computation for flux + nonconservative terms @inline function calc_interface_flux!(surface_flux_values, - meshT::Type{<:Union{P4estMesh{3}, T8codeMesh{3}}}, + MeshT::Type{<:Union{P4estMesh{3}, T8codeMesh{3}}}, have_nonconservative_terms::True, equations, surface_integral, solverT::Type{<:DG}, u_interface, @@ -370,7 +370,7 @@ end secondary_i_node_index, secondary_j_node_index, secondary_direction_index, secondary_element_index) - calc_interface_flux!(surface_flux_values, meshT, have_nonconservative_terms, + calc_interface_flux!(surface_flux_values, MeshT, have_nonconservative_terms, combine_conservative_and_nonconservative_fluxes(surface_integral.surface_flux, equations), equations, surface_integral, solverT, u_interface, @@ -1035,11 +1035,11 @@ function calc_surface_integral!(backend::Backend, du, u, return nothing end -@kernel function calc_surface_integral_KAkernel!(du, meshT, equations, +@kernel function calc_surface_integral_KAkernel!(du, MeshT, equations, surface_integral, dg, factor, surface_flux_values) element = @index(Global) - calc_surface_integral_element!(du, meshT, + calc_surface_integral_element!(du, MeshT, equations, surface_integral, dg, factor, surface_flux_values, element) end diff --git a/src/solvers/dgsem_structured/dg_2d.jl b/src/solvers/dgsem_structured/dg_2d.jl index e88e8504bee..33d07e490c6 100644 --- a/src/solvers/dgsem_structured/dg_2d.jl +++ b/src/solvers/dgsem_structured/dg_2d.jl @@ -142,14 +142,14 @@ end end @inline function flux_differencing_kernel!(du, u, element, - meshT::Type{<:Union{StructuredMesh{2}, + MeshT::Type{<:Union{StructuredMesh{2}, StructuredMeshView{2}, UnstructuredMesh2D, P4estMesh{2}, T8codeMesh{2}}}, have_nonconservative_terms::True, equations, volume_flux, dg::DGSEM, cache, alpha = true) - flux_differencing_kernel!(du, u, element, meshT, have_nonconservative_terms, + flux_differencing_kernel!(du, u, element, MeshT, have_nonconservative_terms, combine_conservative_and_nonconservative_fluxes(volume_flux, equations), equations, @@ -159,7 +159,7 @@ end end @inline function flux_differencing_kernel!(du, u, element, - meshT::Type{<:Union{StructuredMesh{2}, + MeshT::Type{<:Union{StructuredMesh{2}, StructuredMeshView{2}, UnstructuredMesh2D, P4estMesh{2}, @@ -173,7 +173,7 @@ end symmetric_flux, nonconservative_flux = volume_flux # Apply the symmetric flux as usual - flux_differencing_kernel!(du, u, element, meshT, False(), equations, symmetric_flux, + flux_differencing_kernel!(du, u, element, MeshT, False(), equations, symmetric_flux, dg, cache, alpha) # Calculate the remaining volume terms using the nonsymmetric generalized flux diff --git a/src/solvers/dgsem_structured/dg_3d.jl b/src/solvers/dgsem_structured/dg_3d.jl index ee166ed8ca4..08677ae8571 100644 --- a/src/solvers/dgsem_structured/dg_3d.jl +++ b/src/solvers/dgsem_structured/dg_3d.jl @@ -171,12 +171,12 @@ end end @inline function flux_differencing_kernel!(du, u, element, - meshT::Type{<:Union{StructuredMesh{3}, + MeshT::Type{<:Union{StructuredMesh{3}, P4estMesh{3}, T8codeMesh{3}}}, have_nonconservative_terms::True, equations, volume_flux, dg::DGSEM, cache, alpha = true) - flux_differencing_kernel!(du, u, element, meshT, have_nonconservative_terms, + flux_differencing_kernel!(du, u, element, MeshT, have_nonconservative_terms, combine_conservative_and_nonconservative_fluxes(volume_flux, equations), equations, volume_flux, dg, cache, alpha) @@ -185,7 +185,7 @@ end end @inline function flux_differencing_kernel!(du, u, element, - meshT::Type{<:Union{StructuredMesh{3}, + MeshT::Type{<:Union{StructuredMesh{3}, P4estMesh{3}, T8codeMesh{3}}}, have_nonconservative_terms::True, @@ -197,7 +197,7 @@ end symmetric_flux, nonconservative_flux = volume_flux # Apply the symmetric flux as usual - flux_differencing_kernel!(du, u, element, meshT, False(), equations, symmetric_flux, + flux_differencing_kernel!(du, u, element, MeshT, False(), equations, symmetric_flux, dg, cache, alpha) # Calculate the remaining volume terms using the nonsymmetric generalized flux @@ -951,10 +951,10 @@ function apply_jacobian!(backend::Backend, du, return nothing end -@kernel function apply_jacobian_KAkernel!(du, meshT, equations, dg::DG, +@kernel function apply_jacobian_KAkernel!(du, MeshT, equations, dg::DG, inverse_jacobian) element = @index(Global) - apply_jacobian_element!(du, meshT, equations, dg, inverse_jacobian, element) + apply_jacobian_element!(du, MeshT, equations, dg, inverse_jacobian, element) end @inline function apply_jacobian_element!(du, diff --git a/src/solvers/dgsem_tree/dg_1d.jl b/src/solvers/dgsem_tree/dg_1d.jl index db7a0392af1..79b649e7955 100644 --- a/src/solvers/dgsem_tree/dg_1d.jl +++ b/src/solvers/dgsem_tree/dg_1d.jl @@ -177,7 +177,7 @@ end end @inline function flux_differencing_kernel!(du, u, element, - meshT::Type{<:Union{TreeMesh{1}, + MeshT::Type{<:Union{TreeMesh{1}, StructuredMesh{1}}}, have_nonconservative_terms::True, equations, volume_flux, dg::DGSEM, cache, alpha = true) @@ -187,7 +187,7 @@ end symmetric_flux, nonconservative_flux = volume_flux # Apply the symmetric flux as usual - flux_differencing_kernel!(du, u, element, meshT, False(), equations, symmetric_flux, + flux_differencing_kernel!(du, u, element, MeshT, False(), equations, symmetric_flux, dg, cache, alpha) # Calculate the remaining volume terms using the nonsymmetric generalized flux @@ -213,7 +213,7 @@ end end @inline function fv_kernel!(du, u, - meshT::Type{<:Union{TreeMesh{1}, StructuredMesh{1}}}, + MeshT::Type{<:Union{TreeMesh{1}, StructuredMesh{1}}}, have_nonconservative_terms, equations, volume_flux_fv, dg::DGSEM, cache, element, alpha = true) @unpack fstar1_L_threaded, fstar1_R_threaded = cache @@ -222,7 +222,7 @@ end # Calculate FV two-point fluxes fstar1_L = fstar1_L_threaded[Threads.threadid()] fstar1_R = fstar1_R_threaded[Threads.threadid()] - calcflux_fv!(fstar1_L, fstar1_R, u, meshT, + calcflux_fv!(fstar1_L, fstar1_R, u, MeshT, have_nonconservative_terms, equations, volume_flux_fv, dg, element, cache) @@ -239,7 +239,7 @@ end end @inline function fvO2_kernel!(du, u, - meshT::Type{<:Union{TreeMesh{1}, StructuredMesh{1}}}, + MeshT::Type{<:Union{TreeMesh{1}, StructuredMesh{1}}}, nonconservative_terms, equations, volume_flux_fv, dg::DGSEM, cache, element, sc_interface_coords, reconstruction_mode, slope_limiter, @@ -251,7 +251,7 @@ end # Calculate FV two-point fluxes fstar1_L = fstar1_L_threaded[Threads.threadid()] fstar1_R = fstar1_R_threaded[Threads.threadid()] - calcflux_fvO2!(fstar1_L, fstar1_R, u, meshT, nonconservative_terms, equations, + calcflux_fvO2!(fstar1_L, fstar1_R, u, MeshT, nonconservative_terms, equations, volume_flux_fv, dg, element, cache, sc_interface_coords, reconstruction_mode, slope_limiter, cons2recon, recon2cons) diff --git a/src/solvers/dgsem_tree/dg_2d.jl b/src/solvers/dgsem_tree/dg_2d.jl index fa0bb1799db..55002f3da25 100644 --- a/src/solvers/dgsem_tree/dg_2d.jl +++ b/src/solvers/dgsem_tree/dg_2d.jl @@ -249,7 +249,7 @@ end end end -@inline function flux_differencing_kernel!(du, u, element, meshT::Type{<:TreeMesh{2}}, +@inline function flux_differencing_kernel!(du, u, element, MeshT::Type{<:TreeMesh{2}}, have_nonconservative_terms::True, equations, volume_flux, dg::DGSEM, cache, alpha = true) # true * [some floating point value] == [exactly the same floating point value] @@ -258,7 +258,7 @@ end symmetric_flux, nonconservative_flux = volume_flux # Apply the symmetric flux as usual - flux_differencing_kernel!(du, u, element, meshT, False(), equations, symmetric_flux, + flux_differencing_kernel!(du, u, element, MeshT, False(), equations, symmetric_flux, dg, cache, alpha) # Calculate the remaining volume terms using the nonsymmetric generalized flux @@ -292,7 +292,7 @@ end end @inline function fvO2_kernel!(du, u, - meshT::Type{<:Union{TreeMesh{2}, StructuredMesh{2}, + MeshT::Type{<:Union{TreeMesh{2}, StructuredMesh{2}, UnstructuredMesh2D, P4estMesh{2}, T8codeMesh{2}}}, have_nonconservative_terms, equations, @@ -308,7 +308,7 @@ end fstar2_L = fstar2_L_threaded[Threads.threadid()] fstar1_R = fstar1_R_threaded[Threads.threadid()] fstar2_R = fstar2_R_threaded[Threads.threadid()] - calcflux_fvO2!(fstar1_L, fstar1_R, fstar2_L, fstar2_R, u, meshT, + calcflux_fvO2!(fstar1_L, fstar1_R, fstar2_L, fstar2_R, u, MeshT, have_nonconservative_terms, equations, volume_flux_fv, dg, element, cache, sc_interface_coords, reconstruction_mode, slope_limiter, @@ -396,7 +396,7 @@ end end @inline function fv_kernel!(du, u, - meshT::Type{<:Union{TreeMesh{2}, StructuredMesh{2}, + MeshT::Type{<:Union{TreeMesh{2}, StructuredMesh{2}, UnstructuredMesh2D, P4estMesh{2}, T8codeMesh{2}}}, have_nonconservative_terms, equations, @@ -409,7 +409,7 @@ end fstar2_L = fstar2_L_threaded[Threads.threadid()] fstar1_R = fstar1_R_threaded[Threads.threadid()] fstar2_R = fstar2_R_threaded[Threads.threadid()] - calcflux_fv!(fstar1_L, fstar1_R, fstar2_L, fstar2_R, u, meshT, + calcflux_fv!(fstar1_L, fstar1_R, fstar2_L, fstar2_R, u, MeshT, have_nonconservative_terms, equations, volume_flux_fv, dg, element, cache) diff --git a/src/solvers/dgsem_tree/dg_2d_subcell_limiters.jl b/src/solvers/dgsem_tree/dg_2d_subcell_limiters.jl index cf701dc2937..4e819ab2255 100644 --- a/src/solvers/dgsem_tree/dg_2d_subcell_limiters.jl +++ b/src/solvers/dgsem_tree/dg_2d_subcell_limiters.jl @@ -59,7 +59,7 @@ end # Subcell limiting currently only implemented for certain mesh types @inline function volume_integral_kernel!(du, u, element, - meshT::Type{<:Union{TreeMesh{2}, + MeshT::Type{<:Union{TreeMesh{2}, StructuredMesh{2}, P4estMesh{2}}}, have_nonconservative_terms, equations, @@ -75,7 +75,7 @@ end fhat1_R = fhat1_R_threaded[Threads.threadid()] fhat2_L = fhat2_L_threaded[Threads.threadid()] fhat2_R = fhat2_R_threaded[Threads.threadid()] - calcflux_fhat!(fhat1_L, fhat1_R, fhat2_L, fhat2_R, u, meshT, + calcflux_fhat!(fhat1_L, fhat1_R, fhat2_L, fhat2_R, u, MeshT, have_nonconservative_terms, equations, volume_flux_dg, dg, element, cache) @@ -86,14 +86,14 @@ end fstar2_L = fstar2_L_threaded[Threads.threadid()] fstar1_R = fstar1_R_threaded[Threads.threadid()] fstar2_R = fstar2_R_threaded[Threads.threadid()] - calcflux_fv!(fstar1_L, fstar1_R, fstar2_L, fstar2_R, u, meshT, + calcflux_fv!(fstar1_L, fstar1_R, fstar2_L, fstar2_R, u, MeshT, have_nonconservative_terms, equations, volume_flux_fv, dg, element, cache) # antidiffusive flux calcflux_antidiffusive!(fhat1_L, fhat1_R, fhat2_L, fhat2_R, fstar1_L, fstar1_R, fstar2_L, fstar2_R, - u, meshT, have_nonconservative_terms, equations, limiter, + u, MeshT, have_nonconservative_terms, equations, limiter, dg, element, cache) diff --git a/src/solvers/dgsem_tree/dg_3d.jl b/src/solvers/dgsem_tree/dg_3d.jl index a384d81d289..5626cb62a6d 100644 --- a/src/solvers/dgsem_tree/dg_3d.jl +++ b/src/solvers/dgsem_tree/dg_3d.jl @@ -208,7 +208,7 @@ end return nothing end -@inline function flux_differencing_kernel!(du, u, element, meshT::Type{<:TreeMesh{3}}, +@inline function flux_differencing_kernel!(du, u, element, MeshT::Type{<:TreeMesh{3}}, have_nonconservative_terms::True, equations, volume_flux, dg::DGSEM, cache, alpha = true) # true * [some floating point value] == [exactly the same floating point value] @@ -217,7 +217,7 @@ end symmetric_flux, nonconservative_flux = volume_flux # Apply the symmetric flux as usual - flux_differencing_kernel!(du, u, element, meshT, False(), equations, symmetric_flux, + flux_differencing_kernel!(du, u, element, MeshT, False(), equations, symmetric_flux, dg, cache, alpha) # Calculate the remaining volume terms using the nonsymmetric generalized flux @@ -261,7 +261,7 @@ end end @inline function fv_kernel!(du, u, - meshT::Type{<:Union{TreeMesh{3}, StructuredMesh{3}, + MeshT::Type{<:Union{TreeMesh{3}, StructuredMesh{3}, P4estMesh{3}, T8codeMesh{3}}}, have_nonconservative_terms, equations, @@ -278,7 +278,7 @@ end fstar3_R = fstar3_R_threaded[Threads.threadid()] calcflux_fv!(fstar1_L, fstar1_R, fstar2_L, fstar2_R, fstar3_L, fstar3_R, u, - meshT, have_nonconservative_terms, equations, + MeshT, have_nonconservative_terms, equations, volume_flux_fv, dg, element, cache) # Calculate FV volume integral contribution @@ -301,7 +301,7 @@ end end @inline function fvO2_kernel!(du, u, - meshT::Type{<:Union{TreeMesh{3}, StructuredMesh{3}, + MeshT::Type{<:Union{TreeMesh{3}, StructuredMesh{3}, P4estMesh{3}, T8codeMesh{3}}}, have_nonconservative_terms, equations, @@ -323,7 +323,7 @@ end fstar3_R = fstar3_R_threaded[Threads.threadid()] calcflux_fvO2!(fstar1_L, fstar1_R, fstar2_L, fstar2_R, fstar3_L, fstar3_R, u, - meshT, have_nonconservative_terms, equations, + MeshT, have_nonconservative_terms, equations, volume_flux_fv, dg, element, cache, sc_interface_coords, reconstruction_mode, slope_limiter, cons2recon, recon2cons) diff --git a/src/solvers/dgsem_tree/dg_3d_subcell_limiters.jl b/src/solvers/dgsem_tree/dg_3d_subcell_limiters.jl index 674c3c1f8df..c3364e2389f 100644 --- a/src/solvers/dgsem_tree/dg_3d_subcell_limiters.jl +++ b/src/solvers/dgsem_tree/dg_3d_subcell_limiters.jl @@ -61,7 +61,7 @@ end # Subcell limiting currently only implemented for certain mesh types @inline function volume_integral_kernel!(du, u, element, - meshT::Type{<:Union{TreeMesh{3}, P4estMesh{3}}}, + MeshT::Type{<:Union{TreeMesh{3}, P4estMesh{3}}}, nonconservative_terms, equations, volume_integral::VolumeIntegralSubcellLimiting, dg::DGSEM, cache) @@ -78,7 +78,7 @@ end fhat3_L = fhat3_L_threaded[Threads.threadid()] fhat3_R = fhat3_R_threaded[Threads.threadid()] calcflux_fhat!(fhat1_L, fhat1_R, fhat2_L, fhat2_R, fhat3_L, fhat3_R, - u, meshT, nonconservative_terms, equations, volume_flux_dg, + u, MeshT, nonconservative_terms, equations, volume_flux_dg, dg, element, cache) # low-order FV fluxes @@ -91,13 +91,13 @@ end fstar3_L = fstar3_L_threaded[Threads.threadid()] fstar3_R = fstar3_R_threaded[Threads.threadid()] calcflux_fv!(fstar1_L, fstar1_R, fstar2_L, fstar2_R, fstar3_L, fstar3_R, - u, meshT, nonconservative_terms, equations, volume_flux_fv, + u, MeshT, nonconservative_terms, equations, volume_flux_fv, dg, element, cache) # antidiffusive flux calcflux_antidiffusive!(fhat1_L, fhat1_R, fhat2_L, fhat2_R, fhat3_L, fhat3_R, fstar1_L, fstar1_R, fstar2_L, fstar2_R, fstar3_L, fstar3_R, - u, meshT, nonconservative_terms, equations, limiter, + u, MeshT, nonconservative_terms, equations, limiter, dg, element, cache) # Calculate volume integral contribution of low-order FV flux From d7910c750f57f810662b1731876985b57afb9857 Mon Sep 17 00:00:00 2001 From: Benedict <135045760+benegee@users.noreply.github.com> Date: Tue, 24 Mar 2026 10:07:46 +0100 Subject: [PATCH 121/158] Apply suggestions from code review Co-authored-by: Michael Schlottke-Lakemper --- src/callbacks_step/analysis_dg2d.jl | 2 +- src/callbacks_step/analysis_dg3d.jl | 2 +- test/runtests.jl | 1 + test/test_cuda_3d.jl | 2 +- 4 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/callbacks_step/analysis_dg2d.jl b/src/callbacks_step/analysis_dg2d.jl index 0a4c85188d1..ccf8843b8aa 100644 --- a/src/callbacks_step/analysis_dg2d.jl +++ b/src/callbacks_step/analysis_dg2d.jl @@ -148,7 +148,7 @@ function calc_error_norms(func, _u, t, analyzer, @unpack vandermonde, weights = analyzer @unpack u_local, u_tmp1, x_local, x_tmp1, jacobian_local, jacobian_tmp1 = cache_analysis - # TODO GPU AnalysiCallback currently lives on CPU + # TODO GPU AnalysisCallback currently lives on CPU backend = trixi_backend(_u) if backend isa Nothing # TODO GPU KA CPU backend @unpack node_coordinates, inverse_jacobian = cache.elements diff --git a/src/callbacks_step/analysis_dg3d.jl b/src/callbacks_step/analysis_dg3d.jl index 2b98c9fd51e..09b166646f5 100644 --- a/src/callbacks_step/analysis_dg3d.jl +++ b/src/callbacks_step/analysis_dg3d.jl @@ -168,7 +168,7 @@ function calc_error_norms(func, _u, t, analyzer, @unpack vandermonde, weights = analyzer @unpack u_local, u_tmp1, u_tmp2, x_local, x_tmp1, x_tmp2, jacobian_local, jacobian_tmp1, jacobian_tmp2 = cache_analysis - # TODO GPU AnalysiCallback currently lives on CPU + # TODO GPU AnalysisCallback currently lives on CPU backend = trixi_backend(_u) if backend isa Nothing # TODO GPU KA CPU backend @unpack node_coordinates, inverse_jacobian = cache.elements diff --git a/test/runtests.jl b/test/runtests.jl index e30fed631b6..faacce41a27 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -137,6 +137,7 @@ const TRIXI_NTHREADS = clamp(Sys.CPU_THREADS, 2, 3) try run(`$(Base.julia_cmd()) --threads=$TRIXI_NTHREADS --check-bounds=yes $(abspath("test_kernelabstractions.jl"))`) finally + # Restore previous threading backend for later tests Trixi.set_threading_backend!(Symbol(previous_backend)) end end diff --git a/test/test_cuda_3d.jl b/test/test_cuda_3d.jl index 908d4f20959..5c6d5a52709 100644 --- a/test/test_cuda_3d.jl +++ b/test/test_cuda_3d.jl @@ -1,4 +1,4 @@ -module TestCUDA +module TestCUDA3D using Test using Trixi From 32d41ef871af2bdeb56fdcff8660242a71cb0a02 Mon Sep 17 00:00:00 2001 From: Benedict Geihe Date: Tue, 24 Mar 2026 10:08:34 +0100 Subject: [PATCH 122/158] module TestCUDA2D --- test/test_cuda_2d.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_cuda_2d.jl b/test/test_cuda_2d.jl index 4b4894f2c73..f4b1fa1396e 100644 --- a/test/test_cuda_2d.jl +++ b/test/test_cuda_2d.jl @@ -1,4 +1,4 @@ -module TestCUDA +module TestCUDA2D using Test using Trixi From cc3c78b858be3d318463dcb5ecb56a8cdeeb071d Mon Sep 17 00:00:00 2001 From: Benedict Geihe Date: Tue, 24 Mar 2026 13:31:26 +0100 Subject: [PATCH 123/158] use log_base and enable flux differencing --- benchmark/CUDA/LocalPreferences.toml | 2 ++ benchmark/CUDA/elixir_euler_taylor_green_vortex.jl | 13 ++++++------- 2 files changed, 8 insertions(+), 7 deletions(-) create mode 100644 benchmark/CUDA/LocalPreferences.toml diff --git a/benchmark/CUDA/LocalPreferences.toml b/benchmark/CUDA/LocalPreferences.toml new file mode 100644 index 00000000000..a49b55b6127 --- /dev/null +++ b/benchmark/CUDA/LocalPreferences.toml @@ -0,0 +1,2 @@ +[Trixi] +log = "log_Base" diff --git a/benchmark/CUDA/elixir_euler_taylor_green_vortex.jl b/benchmark/CUDA/elixir_euler_taylor_green_vortex.jl index de491a3761b..a00944f6f31 100644 --- a/benchmark/CUDA/elixir_euler_taylor_green_vortex.jl +++ b/benchmark/CUDA/elixir_euler_taylor_green_vortex.jl @@ -26,12 +26,10 @@ end initial_condition = initial_condition_taylor_green_vortex -# TODO Undefined external symbol "log" -#volume_flux = flux_ranocha -volume_flux = flux_lax_friedrichs -solver = DGSEM(polydeg = 5, surface_flux = volume_flux) -# TODO flux diff -#volume_integral=VolumeIntegralFluxDifferencing(volume_flux)) +volume_flux = flux_ranocha +surface_flux = flux_lax_friedrichs +volume_integral=VolumeIntegralFluxDifferencing(volume_flux) +solver = DGSEM(polydeg = 5, surface_flux = surface_flux, volume_integral = volume_integral) coordinates_min = (-1.0, -1.0, -1.0) .* pi coordinates_max = (1.0, 1.0, 1.0) .* pi @@ -43,7 +41,8 @@ mesh = P4estMesh(trees_per_dimension, polydeg = 1, coordinates_min = coordinates_min, coordinates_max = coordinates_max, periodicity = true, initial_refinement_level = initial_refinement_level) -semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver) +semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver; + boundary_conditions = boundary_condition_periodic) ############################################################################### # ODE solvers, callbacks etc. From 418c9443da93f6ef04d3f88e1463b3592ae45559 Mon Sep 17 00:00:00 2001 From: Benedict Geihe Date: Tue, 24 Mar 2026 13:37:05 +0100 Subject: [PATCH 124/158] add a short note to the benchmark problem --- benchmark/CUDA/README.md | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 benchmark/CUDA/README.md diff --git a/benchmark/CUDA/README.md b/benchmark/CUDA/README.md new file mode 100644 index 00000000000..8f3cf864a1e --- /dev/null +++ b/benchmark/CUDA/README.md @@ -0,0 +1,10 @@ +# CUDA benchmark + +This benchmark runs a moderately sized instance of the Taylor-Green-Vortex problem on +NVIDIA GPUs. + +Note we currently have to switch to `log_Base` using `LocalPreferences.toml` as otherwise we +wiil see +``` +ERROR: LoadError: LLVM error: Undefined external symbol "log" +``` From 2962ed78ad6db9ea8c96c4f6f99a037a5d498ee1 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Tue, 24 Mar 2026 13:50:04 +0100 Subject: [PATCH 125/158] add device_override for Trixi.log --- ext/TrixiCUDAExt.jl | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/ext/TrixiCUDAExt.jl b/ext/TrixiCUDAExt.jl index 3326c536a76..2c805e6d53e 100644 --- a/ext/TrixiCUDAExt.jl +++ b/ext/TrixiCUDAExt.jl @@ -1,7 +1,7 @@ # Package extension for adding CUDA-based features to Trixi.jl module TrixiCUDAExt -import CUDA: CuArray, CuDeviceArray, KernelAdaptor +import CUDA: CuArray, CuDeviceArray, KernelAdaptor, @device_override import Trixi function Trixi.storage_type(::Type{<:CuArray}) @@ -16,4 +16,12 @@ function Trixi.unsafe_wrap_or_alloc(::Type{<:CuDeviceArray}, vec::CuDeviceArray, return reshape(vec, size) end +@static if TRIXI._PREFERENCE_LOG == "log_Trixi_NaN" + @device_override Trixi.log(x::Float64) = ccall("extern __nv_log", llvmcall, Cdouble, + (Cdouble,), x) + @device_override Trixi.log(x::Float32) = ccall("extern __nv_logf", llvmcall, Cfloat, + (Cfloat,), x) + # TODO: Trixi.log(x::Float16) +end + end From e116f7b3c8a44cfbc9f20ddb7b06ea70c8cbeff1 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Tue, 24 Mar 2026 13:50:43 +0100 Subject: [PATCH 126/158] fixup! add device_override for Trixi.log --- benchmark/CUDA/LocalPreferences.toml | 2 -- 1 file changed, 2 deletions(-) delete mode 100644 benchmark/CUDA/LocalPreferences.toml diff --git a/benchmark/CUDA/LocalPreferences.toml b/benchmark/CUDA/LocalPreferences.toml deleted file mode 100644 index a49b55b6127..00000000000 --- a/benchmark/CUDA/LocalPreferences.toml +++ /dev/null @@ -1,2 +0,0 @@ -[Trixi] -log = "log_Base" From a1d4481ce3af1b9e4305ee07e412e769f259375e Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Tue, 24 Mar 2026 13:51:06 +0100 Subject: [PATCH 127/158] fixup! add device_override for Trixi.log --- benchmark/CUDA/README.md | 10 ---------- 1 file changed, 10 deletions(-) delete mode 100644 benchmark/CUDA/README.md diff --git a/benchmark/CUDA/README.md b/benchmark/CUDA/README.md deleted file mode 100644 index 8f3cf864a1e..00000000000 --- a/benchmark/CUDA/README.md +++ /dev/null @@ -1,10 +0,0 @@ -# CUDA benchmark - -This benchmark runs a moderately sized instance of the Taylor-Green-Vortex problem on -NVIDIA GPUs. - -Note we currently have to switch to `log_Base` using `LocalPreferences.toml` as otherwise we -wiil see -``` -ERROR: LoadError: LLVM error: Undefined external symbol "log" -``` From 5daf2c653cf55bf16418c0b3fb65644060607952 Mon Sep 17 00:00:00 2001 From: Benedict Geihe Date: Tue, 24 Mar 2026 14:58:56 +0100 Subject: [PATCH 128/158] typo? --- ext/TrixiCUDAExt.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ext/TrixiCUDAExt.jl b/ext/TrixiCUDAExt.jl index 2c805e6d53e..91a21a2cbda 100644 --- a/ext/TrixiCUDAExt.jl +++ b/ext/TrixiCUDAExt.jl @@ -16,7 +16,7 @@ function Trixi.unsafe_wrap_or_alloc(::Type{<:CuDeviceArray}, vec::CuDeviceArray, return reshape(vec, size) end -@static if TRIXI._PREFERENCE_LOG == "log_Trixi_NaN" +@static if Trixi._PREFERENCE_LOG == "log_Trixi_NaN" @device_override Trixi.log(x::Float64) = ccall("extern __nv_log", llvmcall, Cdouble, (Cdouble,), x) @device_override Trixi.log(x::Float32) = ccall("extern __nv_logf", llvmcall, Cfloat, From b93bba919a9ad167d7dacf7f7fc9c545a32598d0 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Tue, 24 Mar 2026 15:13:49 +0100 Subject: [PATCH 129/158] fixup! add device_override for Trixi.log --- ext/TrixiCUDAExt.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ext/TrixiCUDAExt.jl b/ext/TrixiCUDAExt.jl index 91a21a2cbda..7f4f31c3f0f 100644 --- a/ext/TrixiCUDAExt.jl +++ b/ext/TrixiCUDAExt.jl @@ -1,7 +1,7 @@ # Package extension for adding CUDA-based features to Trixi.jl module TrixiCUDAExt -import CUDA: CuArray, CuDeviceArray, KernelAdaptor, @device_override +using CUDA: CUDA, CuArray, CuDeviceArray, KernelAdaptor, @device_override import Trixi function Trixi.storage_type(::Type{<:CuArray}) From fc1cdf54c88ba655b3338fd4616c70332829e9b8 Mon Sep 17 00:00:00 2001 From: Benedict Geihe Date: Tue, 24 Mar 2026 15:18:06 +0100 Subject: [PATCH 130/158] unify naming of inner methods use _per_[element|interface] --- src/callbacks_step/stepsize.jl | 4 ++-- src/callbacks_step/stepsize_dg2d.jl | 4 ++-- src/callbacks_step/stepsize_dg3d.jl | 4 ++-- src/solvers/dg.jl | 6 +++--- src/solvers/dgsem_p4est/dg_3d.jl | 18 +++++++++--------- src/solvers/dgsem_structured/dg_3d.jl | 6 +++--- 6 files changed, 21 insertions(+), 21 deletions(-) diff --git a/src/callbacks_step/stepsize.jl b/src/callbacks_step/stepsize.jl index 46ddd95304f..5e6101e3ada 100644 --- a/src/callbacks_step/stepsize.jl +++ b/src/callbacks_step/stepsize.jl @@ -190,7 +190,7 @@ function calc_max_scaled_speed(backend::Nothing, u, mesh, constant_speed, equati max_scaled_speed = zero(eltype(u)) @batch reduction=(max, max_scaled_speed) for element in eachelement(dg, cache) - max_lambda = max_scaled_speed_element(u, typeof(mesh), constant_speed, + max_lambda = max_scaled_speed_per_element(u, typeof(mesh), constant_speed, equations, dg, contravariant_vectors, inverse_jacobian, element) @@ -221,7 +221,7 @@ end equations, dg, contravariant_vectors, inverse_jacobian) element = @index(Global) - max_scaled_speeds[element] = max_scaled_speed_element(u, MeshT, constant_speed, + max_scaled_speeds[element] = max_scaled_speed_per_element(u, MeshT, constant_speed, equations, dg, contravariant_vectors, inverse_jacobian, diff --git a/src/callbacks_step/stepsize_dg2d.jl b/src/callbacks_step/stepsize_dg2d.jl index 476c43018b5..9f80ac81ebc 100644 --- a/src/callbacks_step/stepsize_dg2d.jl +++ b/src/callbacks_step/stepsize_dg2d.jl @@ -126,7 +126,7 @@ function max_dt(u, t, return 2 / (nnodes(dg) * max_scaled_speed) end -@inline function max_scaled_speed_element(u, +@inline function max_scaled_speed_per_element(u, ::Type{<:Union{StructuredMesh{2}, UnstructuredMesh2D, P4estMesh{2}, @@ -202,7 +202,7 @@ function max_dt(u, t, return 4 / (nnodes(dg) * max_scaled_diffusivity) end -@inline function max_scaled_speed_element(u, +@inline function max_scaled_speed_per_element(u, ::Type{<:Union{StructuredMesh{2}, UnstructuredMesh2D, P4estMesh{2}, diff --git a/src/callbacks_step/stepsize_dg3d.jl b/src/callbacks_step/stepsize_dg3d.jl index 3ff3d7893b9..816feede122 100644 --- a/src/callbacks_step/stepsize_dg3d.jl +++ b/src/callbacks_step/stepsize_dg3d.jl @@ -91,7 +91,7 @@ function max_dt(u, t, return 2 / (nnodes(dg) * max_scaled_speed) end -@inline function max_scaled_speed_element(u, +@inline function max_scaled_speed_per_element(u, ::Type{<:Union{StructuredMesh{3}, P4estMesh{3}, T8codeMesh{3}}}, @@ -174,7 +174,7 @@ function max_dt(u, t, return 4 / (nnodes(dg) * max_scaled_diffusivity) end -@inline function max_scaled_speed_element(u, +@inline function max_scaled_speed_per_element(u, ::Type{<:Union{StructuredMesh{3}, P4estMesh{3}, T8codeMesh{3}}}, diff --git a/src/solvers/dg.jl b/src/solvers/dg.jl index 86995dd05f5..f124302eb86 100644 --- a/src/solvers/dg.jl +++ b/src/solvers/dg.jl @@ -1219,7 +1219,7 @@ function compute_coefficients!(backend::Nothing, u, func, t, @unpack node_coordinates = cache.elements node_indices = CartesianIndices(ntuple(_ -> nnodes(dg), ndims(mesh))) @threaded for element in eachelement(dg, cache) - compute_coefficients_element!(u, func, t, equations, dg, node_coordinates, + compute_coefficients_per_element!(u, func, t, equations, dg, node_coordinates, element, node_indices) end @@ -1243,11 +1243,11 @@ end @kernel function compute_coefficients_KAkernel!(u, func, t, equations, dg::DG, node_coordinates, node_indices) element = @index(Global) - compute_coefficients_element!(u, func, t, equations, dg, node_coordinates, element, + compute_coefficients_per_element!(u, func, t, equations, dg, node_coordinates, element, node_indices) end -@inline function compute_coefficients_element!(u, func, t, equations, dg::DG, +@inline function compute_coefficients_per_element!(u, func, t, equations, dg::DG, node_coordinates, element, node_indices) for indices in node_indices x_node = get_node_coords(node_coordinates, equations, dg, indices, element) diff --git a/src/solvers/dgsem_p4est/dg_3d.jl b/src/solvers/dgsem_p4est/dg_3d.jl index f23b1b9b6c6..1487de6842e 100644 --- a/src/solvers/dgsem_p4est/dg_3d.jl +++ b/src/solvers/dgsem_p4est/dg_3d.jl @@ -99,7 +99,7 @@ function prolong2interfaces!(backend::Nothing, cache, u, index_range = eachnode(dg) @threaded for interface in eachinterface(dg, cache) - prolong2interfaces_interface!(interfaces.u, u, typeof(mesh), equations, + prolong2interfaces_per_interface!(interfaces.u, u, typeof(mesh), equations, neighbor_ids, node_indices, index_range, interface) end @@ -123,11 +123,11 @@ end @kernel function prolong2interfaces_KAkernel!(interface_u, u, MeshT, equations, neighbor_ids, node_indices, index_range) interface = @index(Global) - prolong2interfaces_interface!(interface_u, u, MeshT, equations, neighbor_ids, + prolong2interfaces_per_interface!(interface_u, u, MeshT, equations, neighbor_ids, node_indices, index_range, interface) end -@inline function prolong2interfaces_interface!(u_interface, u, +@inline function prolong2interfaces_per_interface!(u_interface, u, ::Type{<:Union{P4estMesh{3}, T8codeMesh{3}}}, equations, neighbor_ids, node_indices, @@ -210,7 +210,7 @@ function calc_interface_flux!(backend::Nothing, surface_flux_values, index_range = eachnode(dg) @threaded for interface in eachinterface(dg, cache) - calc_interface_flux_interface!(surface_flux_values, + calc_interface_flux_per_interface!(surface_flux_values, typeof(mesh), have_nonconservative_terms, equations, surface_integral, typeof(dg), @@ -242,7 +242,7 @@ end neighbor_ids, node_indices, contravariant_vectors, index_range) interface = @index(Global) - calc_interface_flux_interface!(surface_flux_values, + calc_interface_flux_per_interface!(surface_flux_values, MeshT, have_nonconservative_terms, equations, surface_integral, solverT, u_interface, @@ -250,7 +250,7 @@ end index_range, interface) end -@inline function calc_interface_flux_interface!(surface_flux_values, +@inline function calc_interface_flux_per_interface!(surface_flux_values, MeshT::Type{<:Union{P4estMesh{3}, T8codeMesh{3}}}, have_nonconservative_terms, @@ -1012,7 +1012,7 @@ function calc_surface_integral!(backend::Nothing, du, u, @unpack surface_flux_values = cache.elements @threaded for element in eachelement(dg, cache) - calc_surface_integral_element!(du, typeof(mesh), + calc_surface_integral_per_element!(du, typeof(mesh), equations, surface_integral, dg, inverse_weights[1], surface_flux_values, @@ -1039,12 +1039,12 @@ end surface_integral, dg, factor, surface_flux_values) element = @index(Global) - calc_surface_integral_element!(du, MeshT, + calc_surface_integral_per_element!(du, MeshT, equations, surface_integral, dg, factor, surface_flux_values, element) end -@inline function calc_surface_integral_element!(du, +@inline function calc_surface_integral_per_element!(du, ::Type{<:Union{P4estMesh{3}, T8codeMesh{3}}}, equations, diff --git a/src/solvers/dgsem_structured/dg_3d.jl b/src/solvers/dgsem_structured/dg_3d.jl index 08677ae8571..766a196eb97 100644 --- a/src/solvers/dgsem_structured/dg_3d.jl +++ b/src/solvers/dgsem_structured/dg_3d.jl @@ -934,7 +934,7 @@ function apply_jacobian!(backend::Nothing, du, equations, dg::DG, cache) @unpack inverse_jacobian = cache.elements @threaded for element in eachelement(dg, cache) - apply_jacobian_element!(du, typeof(mesh), equations, dg, inverse_jacobian, + apply_jacobian_per_element!(du, typeof(mesh), equations, dg, inverse_jacobian, element) end return nothing @@ -954,10 +954,10 @@ end @kernel function apply_jacobian_KAkernel!(du, MeshT, equations, dg::DG, inverse_jacobian) element = @index(Global) - apply_jacobian_element!(du, MeshT, equations, dg, inverse_jacobian, element) + apply_jacobian_per_element!(du, MeshT, equations, dg, inverse_jacobian, element) end -@inline function apply_jacobian_element!(du, +@inline function apply_jacobian_per_element!(du, ::Type{<:Union{StructuredMesh{3}, P4estMesh{3}, T8codeMesh{3}}}, equations, dg, inverse_jacobian, element) From 2f52234d40358c5a41d7b15ba92d9df87f5e2bf1 Mon Sep 17 00:00:00 2001 From: Benedict Geihe Date: Tue, 24 Mar 2026 15:20:07 +0100 Subject: [PATCH 131/158] fmt --- .../CUDA/elixir_euler_taylor_green_vortex.jl | 2 +- src/callbacks_step/stepsize.jl | 15 ++-- src/callbacks_step/stepsize_dg2d.jl | 34 ++++---- src/callbacks_step/stepsize_dg3d.jl | 24 +++--- src/solvers/dg.jl | 10 ++- src/solvers/dgsem_p4est/dg_3d.jl | 80 ++++++++++--------- src/solvers/dgsem_structured/dg_3d.jl | 9 ++- 7 files changed, 92 insertions(+), 82 deletions(-) diff --git a/benchmark/CUDA/elixir_euler_taylor_green_vortex.jl b/benchmark/CUDA/elixir_euler_taylor_green_vortex.jl index a00944f6f31..d183c0e1770 100644 --- a/benchmark/CUDA/elixir_euler_taylor_green_vortex.jl +++ b/benchmark/CUDA/elixir_euler_taylor_green_vortex.jl @@ -28,7 +28,7 @@ initial_condition = initial_condition_taylor_green_vortex volume_flux = flux_ranocha surface_flux = flux_lax_friedrichs -volume_integral=VolumeIntegralFluxDifferencing(volume_flux) +volume_integral = VolumeIntegralFluxDifferencing(volume_flux) solver = DGSEM(polydeg = 5, surface_flux = surface_flux, volume_integral = volume_integral) coordinates_min = (-1.0, -1.0, -1.0) .* pi diff --git a/src/callbacks_step/stepsize.jl b/src/callbacks_step/stepsize.jl index 5e6101e3ada..e81cf027745 100644 --- a/src/callbacks_step/stepsize.jl +++ b/src/callbacks_step/stepsize.jl @@ -191,9 +191,10 @@ function calc_max_scaled_speed(backend::Nothing, u, mesh, constant_speed, equati max_scaled_speed = zero(eltype(u)) @batch reduction=(max, max_scaled_speed) for element in eachelement(dg, cache) max_lambda = max_scaled_speed_per_element(u, typeof(mesh), constant_speed, - equations, dg, - contravariant_vectors, inverse_jacobian, - element) + equations, dg, + contravariant_vectors, + inverse_jacobian, + element) # Use `Base.max` to prevent silent failures, as `max` from `@fastmath` doesn't propagate # `NaN`s properly. See https://github.com/trixi-framework/Trixi.jl/pull/2445#discussion_r2336812323 max_scaled_speed = Base.max(max_scaled_speed, max_lambda) @@ -222,10 +223,10 @@ end dg, contravariant_vectors, inverse_jacobian) element = @index(Global) max_scaled_speeds[element] = max_scaled_speed_per_element(u, MeshT, constant_speed, - equations, dg, - contravariant_vectors, - inverse_jacobian, - element) + equations, dg, + contravariant_vectors, + inverse_jacobian, + element) end include("stepsize_dg1d.jl") diff --git a/src/callbacks_step/stepsize_dg2d.jl b/src/callbacks_step/stepsize_dg2d.jl index 9f80ac81ebc..201b112b65e 100644 --- a/src/callbacks_step/stepsize_dg2d.jl +++ b/src/callbacks_step/stepsize_dg2d.jl @@ -127,14 +127,14 @@ function max_dt(u, t, end @inline function max_scaled_speed_per_element(u, - ::Type{<:Union{StructuredMesh{2}, - UnstructuredMesh2D, - P4estMesh{2}, - T8codeMesh{2}, - StructuredMeshView{2}}}, - constant_speed::False, equations, dg::DG, - contravariant_vectors, inverse_jacobian, - element) + ::Type{<:Union{StructuredMesh{2}, + UnstructuredMesh2D, + P4estMesh{2}, + T8codeMesh{2}, + StructuredMeshView{2}}}, + constant_speed::False, equations, dg::DG, + contravariant_vectors, inverse_jacobian, + element) max_lambda1 = max_lambda2 = zero(eltype(u)) for j in eachnode(dg), i in eachnode(dg) u_node = get_node_vars(u, equations, dg, i, j, element) @@ -203,15 +203,15 @@ function max_dt(u, t, end @inline function max_scaled_speed_per_element(u, - ::Type{<:Union{StructuredMesh{2}, - UnstructuredMesh2D, - P4estMesh{2}, - P4estMeshView{2}, - T8codeMesh{2}, - StructuredMeshView{2}}}, - constant_speed::True, equations, dg::DG, - contravariant_vectors, inverse_jacobian, - element) + ::Type{<:Union{StructuredMesh{2}, + UnstructuredMesh2D, + P4estMesh{2}, + P4estMeshView{2}, + T8codeMesh{2}, + StructuredMeshView{2}}}, + constant_speed::True, equations, dg::DG, + contravariant_vectors, inverse_jacobian, + element) max_scaled_speed = zero(eltype(u)) max_lambda1, max_lambda2 = max_abs_speeds(equations) for j in eachnode(dg), i in eachnode(dg) diff --git a/src/callbacks_step/stepsize_dg3d.jl b/src/callbacks_step/stepsize_dg3d.jl index 816feede122..ca434918f53 100644 --- a/src/callbacks_step/stepsize_dg3d.jl +++ b/src/callbacks_step/stepsize_dg3d.jl @@ -92,12 +92,12 @@ function max_dt(u, t, end @inline function max_scaled_speed_per_element(u, - ::Type{<:Union{StructuredMesh{3}, - P4estMesh{3}, - T8codeMesh{3}}}, - constant_speed::False, equations, dg, - contravariant_vectors, inverse_jacobian, - element) + ::Type{<:Union{StructuredMesh{3}, + P4estMesh{3}, + T8codeMesh{3}}}, + constant_speed::False, equations, dg, + contravariant_vectors, inverse_jacobian, + element) max_lambda1 = max_lambda2 = max_lambda3 = zero(eltype(u)) for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) u_node = get_node_vars(u, equations, dg, i, j, k, element) @@ -175,12 +175,12 @@ function max_dt(u, t, end @inline function max_scaled_speed_per_element(u, - ::Type{<:Union{StructuredMesh{3}, - P4estMesh{3}, - T8codeMesh{3}}}, - constant_speed::True, equations, dg::DG, - contravariant_vectors, inverse_jacobian, - element) + ::Type{<:Union{StructuredMesh{3}, + P4estMesh{3}, + T8codeMesh{3}}}, + constant_speed::True, equations, dg::DG, + contravariant_vectors, inverse_jacobian, + element) max_scaled_speed = zero(eltype(u)) max_lambda1, max_lambda2, max_lambda3 = max_abs_speeds(equations) for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) diff --git a/src/solvers/dg.jl b/src/solvers/dg.jl index f124302eb86..b2404041e1b 100644 --- a/src/solvers/dg.jl +++ b/src/solvers/dg.jl @@ -1220,7 +1220,7 @@ function compute_coefficients!(backend::Nothing, u, func, t, node_indices = CartesianIndices(ntuple(_ -> nnodes(dg), ndims(mesh))) @threaded for element in eachelement(dg, cache) compute_coefficients_per_element!(u, func, t, equations, dg, node_coordinates, - element, node_indices) + element, node_indices) end return nothing @@ -1243,12 +1243,14 @@ end @kernel function compute_coefficients_KAkernel!(u, func, t, equations, dg::DG, node_coordinates, node_indices) element = @index(Global) - compute_coefficients_per_element!(u, func, t, equations, dg, node_coordinates, element, - node_indices) + compute_coefficients_per_element!(u, func, t, equations, dg, node_coordinates, + element, + node_indices) end @inline function compute_coefficients_per_element!(u, func, t, equations, dg::DG, - node_coordinates, element, node_indices) + node_coordinates, element, + node_indices) for indices in node_indices x_node = get_node_coords(node_coordinates, equations, dg, indices, element) u_node = func(x_node, t, equations) diff --git a/src/solvers/dgsem_p4est/dg_3d.jl b/src/solvers/dgsem_p4est/dg_3d.jl index 1487de6842e..4713ded188f 100644 --- a/src/solvers/dgsem_p4est/dg_3d.jl +++ b/src/solvers/dgsem_p4est/dg_3d.jl @@ -100,8 +100,8 @@ function prolong2interfaces!(backend::Nothing, cache, u, @threaded for interface in eachinterface(dg, cache) prolong2interfaces_per_interface!(interfaces.u, u, typeof(mesh), equations, - neighbor_ids, node_indices, index_range, - interface) + neighbor_ids, node_indices, index_range, + interface) end return nothing end @@ -124,14 +124,15 @@ end neighbor_ids, node_indices, index_range) interface = @index(Global) prolong2interfaces_per_interface!(interface_u, u, MeshT, equations, neighbor_ids, - node_indices, index_range, interface) + node_indices, index_range, interface) end @inline function prolong2interfaces_per_interface!(u_interface, u, - ::Type{<:Union{P4estMesh{3}, - T8codeMesh{3}}}, - equations, neighbor_ids, node_indices, - index_range, interface) + ::Type{<:Union{P4estMesh{3}, + T8codeMesh{3}}}, + equations, neighbor_ids, + node_indices, + index_range, interface) # Copy solution data from the primary element using "delayed indexing" with # a start value and two step sizes to get the correct face and orientation. # Note that in the current implementation, the interface will be @@ -211,11 +212,13 @@ function calc_interface_flux!(backend::Nothing, surface_flux_values, @threaded for interface in eachinterface(dg, cache) calc_interface_flux_per_interface!(surface_flux_values, - typeof(mesh), - have_nonconservative_terms, - equations, surface_integral, typeof(dg), - cache.interfaces.u, neighbor_ids, node_indices, - contravariant_vectors, index_range, interface) + typeof(mesh), + have_nonconservative_terms, + equations, surface_integral, typeof(dg), + cache.interfaces.u, neighbor_ids, + node_indices, + contravariant_vectors, index_range, + interface) end return nothing end @@ -243,22 +246,24 @@ end contravariant_vectors, index_range) interface = @index(Global) calc_interface_flux_per_interface!(surface_flux_values, - MeshT, - have_nonconservative_terms, - equations, surface_integral, solverT, u_interface, - neighbor_ids, node_indices, contravariant_vectors, - index_range, interface) + MeshT, + have_nonconservative_terms, + equations, surface_integral, solverT, + u_interface, + neighbor_ids, node_indices, + contravariant_vectors, + index_range, interface) end @inline function calc_interface_flux_per_interface!(surface_flux_values, - MeshT::Type{<:Union{P4estMesh{3}, - T8codeMesh{3}}}, - have_nonconservative_terms, - equations, surface_integral, - solverT::Type{<:DG}, u_interface, - neighbor_ids, - node_indices, contravariant_vectors, - index_range, interface) + MeshT::Type{<:Union{P4estMesh{3}, + T8codeMesh{3}}}, + have_nonconservative_terms, + equations, surface_integral, + solverT::Type{<:DG}, u_interface, + neighbor_ids, + node_indices, contravariant_vectors, + index_range, interface) # Get element and side information on the primary element primary_element = neighbor_ids[1, interface] primary_indices = node_indices[1, interface] @@ -1013,10 +1018,10 @@ function calc_surface_integral!(backend::Nothing, du, u, @threaded for element in eachelement(dg, cache) calc_surface_integral_per_element!(du, typeof(mesh), - equations, surface_integral, - dg, inverse_weights[1], - surface_flux_values, - element) + equations, surface_integral, + dg, inverse_weights[1], + surface_flux_values, + element) end return nothing end @@ -1040,17 +1045,18 @@ end surface_flux_values) element = @index(Global) calc_surface_integral_per_element!(du, MeshT, - equations, surface_integral, dg, factor, - surface_flux_values, element) + equations, surface_integral, dg, factor, + surface_flux_values, element) end @inline function calc_surface_integral_per_element!(du, - ::Type{<:Union{P4estMesh{3}, - T8codeMesh{3}}}, - equations, - surface_integral::SurfaceIntegralWeakForm, - dg::DGSEM, factor, surface_flux_values, - element) + ::Type{<:Union{P4estMesh{3}, + T8codeMesh{3}}}, + equations, + surface_integral::SurfaceIntegralWeakForm, + dg::DGSEM, factor, + surface_flux_values, + element) # Note that all fluxes have been computed with outward-pointing normal vectors. # This computes the **negative** surface integral contribution, # i.e., M^{-1} * boundary_interpolation^T (which is for Gauss-Lobatto DGSEM just M^{-1} * B) diff --git a/src/solvers/dgsem_structured/dg_3d.jl b/src/solvers/dgsem_structured/dg_3d.jl index 766a196eb97..f5bba91f44c 100644 --- a/src/solvers/dgsem_structured/dg_3d.jl +++ b/src/solvers/dgsem_structured/dg_3d.jl @@ -935,7 +935,7 @@ function apply_jacobian!(backend::Nothing, du, @unpack inverse_jacobian = cache.elements @threaded for element in eachelement(dg, cache) apply_jacobian_per_element!(du, typeof(mesh), equations, dg, inverse_jacobian, - element) + element) end return nothing end @@ -958,9 +958,10 @@ end end @inline function apply_jacobian_per_element!(du, - ::Type{<:Union{StructuredMesh{3}, P4estMesh{3}, - T8codeMesh{3}}}, - equations, dg, inverse_jacobian, element) + ::Type{<:Union{StructuredMesh{3}, + P4estMesh{3}, + T8codeMesh{3}}}, + equations, dg, inverse_jacobian, element) for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) # Negative sign included to account for the negated surface and volume terms, # see e.g. the computation of `derivative_hat` in the basis setup and From 917b3a6e5304df5a8c80069f18ec8d9975a08153 Mon Sep 17 00:00:00 2001 From: Benedict Geihe Date: Tue, 24 Mar 2026 15:39:54 +0100 Subject: [PATCH 132/158] add ndims(MeshT) --- src/meshes/meshes.jl | 2 ++ .../dgsem_structured/dg_2d_compressible_euler.jl | 14 ++++++-------- .../dgsem_structured/dg_3d_compressible_euler.jl | 14 ++++++-------- 3 files changed, 14 insertions(+), 16 deletions(-) diff --git a/src/meshes/meshes.jl b/src/meshes/meshes.jl index 69a8ea79ffa..b1c1b6c918e 100644 --- a/src/meshes/meshes.jl +++ b/src/meshes/meshes.jl @@ -5,6 +5,8 @@ @muladd begin #! format: noindent +@inline Base.ndims(::Type{<:AbstractMesh{NDIMS}}) where {NDIMS} = NDIMS + include("tree_mesh.jl") include("structured_mesh.jl") include("structured_mesh_view.jl") diff --git a/src/solvers/dgsem_structured/dg_2d_compressible_euler.jl b/src/solvers/dgsem_structured/dg_2d_compressible_euler.jl index 508d3c92d82..268bae7f480 100644 --- a/src/solvers/dgsem_structured/dg_2d_compressible_euler.jl +++ b/src/solvers/dgsem_structured/dg_2d_compressible_euler.jl @@ -19,7 +19,7 @@ # works efficiently here. @inline function flux_differencing_kernel!(_du::PtrArray, u_cons::PtrArray, element, - ::Type{<:Union{StructuredMesh{2}, + MeshT::Type{<:Union{StructuredMesh{2}, UnstructuredMesh2D, P4estMesh{2}}}, have_nonconservative_terms::False, equations::CompressibleEulerEquations2D, @@ -27,19 +27,18 @@ dg::DGSEM, cache, alpha) @unpack derivative_split = dg.basis @unpack contravariant_vectors = cache.elements - ndims = 2 # Create a temporary array that will be used to store the RHS with permuted # indices `[i, j, v]` to allow using SIMD instructions. # `StrideArray`s with purely static dimensions do not allocate on the heap. du = StrideArray{eltype(u_cons)}(undef, - (ntuple(_ -> StaticInt(nnodes(dg)), ndims)..., + (ntuple(_ -> StaticInt(nnodes(dg)), ndims(MeshT))..., StaticInt(nvariables(equations)))) # Convert conserved to primitive variables on the given `element`. u_prim = StrideArray{eltype(u_cons)}(undef, (ntuple(_ -> StaticInt(nnodes(dg)), - ndims)..., + ndims(MeshT))..., StaticInt(nvariables(equations)))) @turbo for j in eachnode(dg), i in eachnode(dg) @@ -227,7 +226,7 @@ end @inline function flux_differencing_kernel!(_du::PtrArray, u_cons::PtrArray, element, - ::Type{<:Union{StructuredMesh{2}, + MeshT::Type{<:Union{StructuredMesh{2}, UnstructuredMesh2D, P4estMesh{2}}}, have_nonconservative_terms::False, equations::CompressibleEulerEquations2D, @@ -235,13 +234,12 @@ end dg::DGSEM, cache, alpha) @unpack derivative_split = dg.basis @unpack contravariant_vectors = cache.elements - ndims = 2 # Create a temporary array that will be used to store the RHS with permuted # indices `[i, j, v]` to allow using SIMD instructions. # `StrideArray`s with purely static dimensions do not allocate on the heap. du = StrideArray{eltype(u_cons)}(undef, - (ntuple(_ -> StaticInt(nnodes(dg)), ndims)..., + (ntuple(_ -> StaticInt(nnodes(dg)), ndims(MeshT))..., StaticInt(nvariables(equations)))) # Convert conserved to primitive variables on the given `element`. In addition @@ -250,7 +248,7 @@ end # values. u_prim = StrideArray{eltype(u_cons)}(undef, (ntuple(_ -> StaticInt(nnodes(dg)), - ndims)..., + ndims(MeshT))..., StaticInt(nvariables(equations) + 2))) # We also compute "+ 2" logs @turbo for j in eachnode(dg), i in eachnode(dg) diff --git a/src/solvers/dgsem_structured/dg_3d_compressible_euler.jl b/src/solvers/dgsem_structured/dg_3d_compressible_euler.jl index 9143286b88e..79008f7a552 100644 --- a/src/solvers/dgsem_structured/dg_3d_compressible_euler.jl +++ b/src/solvers/dgsem_structured/dg_3d_compressible_euler.jl @@ -19,26 +19,25 @@ # works efficiently here. @inline function flux_differencing_kernel!(_du::PtrArray, u_cons::PtrArray, element, - ::Type{<:Union{StructuredMesh{3}, P4estMesh{3}}}, + MeshT::Type{<:Union{StructuredMesh{3}, P4estMesh{3}}}, have_nonconservative_terms::False, equations::CompressibleEulerEquations3D, volume_flux::typeof(flux_shima_etal_turbo), dg::DGSEM, cache, alpha) @unpack derivative_split = dg.basis @unpack contravariant_vectors = cache.elements - ndims = 3 # Create a temporary array that will be used to store the RHS with permuted # indices `[i, j, k, v]` to allow using SIMD instructions. # `StrideArray`s with purely static dimensions do not allocate on the heap. du = StrideArray{eltype(u_cons)}(undef, - (ntuple(_ -> StaticInt(nnodes(dg)), ndims)..., + (ntuple(_ -> StaticInt(nnodes(dg)), ndims(MeshT))..., StaticInt(nvariables(equations)))) # Convert conserved to primitive variables on the given `element`. u_prim = StrideArray{eltype(u_cons)}(undef, (ntuple(_ -> StaticInt(nnodes(dg)), - ndims)..., + ndims(MeshT))..., StaticInt(nvariables(equations)))) @turbo for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) @@ -352,20 +351,19 @@ end @inline function flux_differencing_kernel!(_du::PtrArray, u_cons::PtrArray, element, - ::Type{<:Union{StructuredMesh{3}, P4estMesh{3}}}, + MeshT::Type{<:Union{StructuredMesh{3}, P4estMesh{3}}}, have_nonconservative_terms::False, equations::CompressibleEulerEquations3D, volume_flux::typeof(flux_ranocha_turbo), dg::DGSEM, cache, alpha) @unpack derivative_split = dg.basis @unpack contravariant_vectors = cache.elements - ndims = 3 # Create a temporary array that will be used to store the RHS with permuted # indices `[i, j, k, v]` to allow using SIMD instructions. # `StrideArray`s with purely static dimensions do not allocate on the heap. du = StrideArray{eltype(u_cons)}(undef, - (ntuple(_ -> StaticInt(nnodes(dg)), ndims)..., + (ntuple(_ -> StaticInt(nnodes(dg)), ndims(MeshT))..., StaticInt(nvariables(equations)))) # Convert conserved to primitive variables on the given `element`. In addition @@ -374,7 +372,7 @@ end # values. u_prim = StrideArray{eltype(u_cons)}(undef, (ntuple(_ -> StaticInt(nnodes(dg)), - ndims)..., + ndims(MeshT))..., StaticInt(nvariables(equations) + 2))) # We also compute "+ 2" logs @turbo for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) From 0b751ac3e013c47876f4db863a8a7197fc1dc6c3 Mon Sep 17 00:00:00 2001 From: Benedict Geihe Date: Wed, 25 Mar 2026 08:50:10 +0100 Subject: [PATCH 133/158] add NEWS --- NEWS.md | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/NEWS.md b/NEWS.md index 711bad308be..db4cb61928f 100644 --- a/NEWS.md +++ b/NEWS.md @@ -9,6 +9,18 @@ for human readability. #### Added +- Introducing GPU support: Based on work by Jan Kraus and Lars Christmann, `Trixi.jl` can + now partly be exectuted on GPUs. This includes simulations on `P4estMesh` in 2D and 3D, + with flux differencing and MPI. Adaptive mesh refinement and callbacks have not been + ported, yet. Offloading is achieved via `KernelAbstractions.jl` kernels, + which, at the moment, execute the same code as usually run on CPUs. A backend is selected + by passing an appropriate data type as keyword argument `storage_type` to + `semidiscretize`. See the + [heterogeneous](https://trixi-framework.github.io/TrixiDocumentation/dev/heterogeneous/) + section for some instructions on how to port kernels. This is however still preliminaray + and will change. + GPU kernels are currently CI-tested on NVIDIA GPUs in a buildkite workflow using + `TRIXI_TEST=CUDA` - It is now possible to use `ViscousFormulationLocalDG()` as the `solver_parabolic` for non-conforming `P4estMesh`es. This is useful for (locally) diffusion-dominated problems. This enables in particular adaptive mesh refinement for that solver-mesh combination ([#2712]). From 97fdccb7d975efbee3c92213a6f6ba57860cee5b Mon Sep 17 00:00:00 2001 From: Benedict Geihe Date: Wed, 25 Mar 2026 08:50:28 +0100 Subject: [PATCH 134/158] add comment on how to use GPU --- examples/p4est_2d_dgsem/elixir_advection_basic_gpu.jl | 1 + examples/p4est_3d_dgsem/elixir_advection_basic_gpu.jl | 1 + 2 files changed, 2 insertions(+) diff --git a/examples/p4est_2d_dgsem/elixir_advection_basic_gpu.jl b/examples/p4est_2d_dgsem/elixir_advection_basic_gpu.jl index 9e1fc0dba8e..a362af748c8 100644 --- a/examples/p4est_2d_dgsem/elixir_advection_basic_gpu.jl +++ b/examples/p4est_2d_dgsem/elixir_advection_basic_gpu.jl @@ -33,6 +33,7 @@ semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition_convergen # ODE solvers, callbacks etc. # Create ODE problem with time span from 0.0 to 1.0 +# Change `storage_type` to, e.g., `CuArray` to acutally run on GPU ode = semidiscretize(semi, (0.0, 1.0); real_type = nothing, storage_type = nothing) # At the beginning of the main loop, the SummaryCallback prints a summary of the simulation setup diff --git a/examples/p4est_3d_dgsem/elixir_advection_basic_gpu.jl b/examples/p4est_3d_dgsem/elixir_advection_basic_gpu.jl index c0161c2683a..18c4e1b3817 100644 --- a/examples/p4est_3d_dgsem/elixir_advection_basic_gpu.jl +++ b/examples/p4est_3d_dgsem/elixir_advection_basic_gpu.jl @@ -32,6 +32,7 @@ semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition_convergen # ODE solvers, callbacks etc. # Create ODE problem with time span from 0.0 to 1.0 +# Change `storage_type` to, e.g., `CuArray` to acutally run on GPU tspan = (0.0, 1.0) ode = semidiscretize(semi, tspan; real_type = nothing, storage_type = nothing) From 44cb1ba7b6f676e248e4eb35b517114127434a23 Mon Sep 17 00:00:00 2001 From: Benedict Geihe Date: Wed, 25 Mar 2026 09:31:21 +0100 Subject: [PATCH 135/158] comment [skip ci] --- examples/p4est_3d_dgsem/elixir_advection_basic_gpu.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/p4est_3d_dgsem/elixir_advection_basic_gpu.jl b/examples/p4est_3d_dgsem/elixir_advection_basic_gpu.jl index 18c4e1b3817..d1497646a15 100644 --- a/examples/p4est_3d_dgsem/elixir_advection_basic_gpu.jl +++ b/examples/p4est_3d_dgsem/elixir_advection_basic_gpu.jl @@ -1,5 +1,5 @@ # The same setup as tree_3d_dgsem/elixir_advection_basic.jl -# to verify the P4estMesh implementation against TreeMesh +# to verify GPU support and Adapt.jl support. using OrdinaryDiffEqLowStorageRK using Trixi From 484f58701ebed5a905d5424147f2a61f734e7205 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Wed, 25 Mar 2026 10:25:07 +0100 Subject: [PATCH 136/158] activate test_allocations for GPU tests --- test/test_cuda_2d.jl | 2 +- test/test_cuda_3d.jl | 2 +- test/test_kernelabstractions.jl | 8 ++++---- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/test/test_cuda_2d.jl b/test/test_cuda_2d.jl index f4b1fa1396e..85dc7b40178 100644 --- a/test/test_cuda_2d.jl +++ b/test/test_cuda_2d.jl @@ -49,7 +49,7 @@ end storage_type=CuArray) # Ensure that we do not have excessive memory allocations # (e.g., from type instabilities) - # @test_allocations(Trixi.rhs!, semi, sol, 1000) + @test_allocations(Trixi.rhs!, semi, sol, 1000) @test real(ode.p.solver) == Float32 @test real(ode.p.solver.basis) == Float32 @test real(ode.p.solver.mortar) == Float32 diff --git a/test/test_cuda_3d.jl b/test/test_cuda_3d.jl index 5c6d5a52709..8880f8eb393 100644 --- a/test/test_cuda_3d.jl +++ b/test/test_cuda_3d.jl @@ -49,7 +49,7 @@ end storage_type=CuArray) # Ensure that we do not have excessive memory allocations # (e.g., from type instabilities) - # @test_allocations(Trixi.rhs!, semi, sol, 1000) + @test_allocations(Trixi.rhs!, semi, sol, 1000) @test real(ode.p.solver) == Float32 @test real(ode.p.solver.basis) == Float32 @test real(ode.p.solver.mortar) == Float32 diff --git a/test/test_kernelabstractions.jl b/test/test_kernelabstractions.jl index dff169be3bd..f8f1a3084df 100644 --- a/test/test_kernelabstractions.jl +++ b/test/test_kernelabstractions.jl @@ -27,7 +27,7 @@ end linf=6.627000273229378e-5) # Ensure that we do not have excessive memory allocations # (e.g., from type instabilities) - # @test_allocations(Trixi.rhs!, semi, sol, 1000) + @test_allocations(Trixi.rhs!, semi, sol, 1000) end @trixi_testset "elixir_advection_basic_gpu.jl Float32" begin @@ -40,7 +40,7 @@ end real_type=Float32) # Ensure that we do not have excessive memory allocations # (e.g., from type instabilities) - # @test_allocations(Trixi.rhs!, semi, sol, 1000) + @test_allocations(Trixi.rhs!, semi, sol, 1000) end end @@ -55,7 +55,7 @@ end linf=[0.0014537194925779984]) # Ensure that we do not have excessive memory allocations # (e.g., from type instabilities) - # @test_allocations(Trixi.rhs!, semi, sol, 1000) + @test_allocations(Trixi.rhs!, semi, sol, 1000) end @trixi_testset "elixir_advection_basic_gpu.jl Float32" begin @@ -68,7 +68,7 @@ end real_type=Float32) # Ensure that we do not have excessive memory allocations # (e.g., from type instabilities) - # @test_allocations(Trixi.rhs!, semi, sol, 1000) + @test_allocations(Trixi.rhs!, semi, sol, 1000) end end From e9e318f7cec1ca77dbdc8caebf25a2afffa20013 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Wed, 25 Mar 2026 10:42:58 +0100 Subject: [PATCH 137/158] fixup! activate test_allocations for GPU tests --- test/test_kernelabstractions.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/test_kernelabstractions.jl b/test/test_kernelabstractions.jl index f8f1a3084df..6b59b58916a 100644 --- a/test/test_kernelabstractions.jl +++ b/test/test_kernelabstractions.jl @@ -27,7 +27,7 @@ end linf=6.627000273229378e-5) # Ensure that we do not have excessive memory allocations # (e.g., from type instabilities) - @test_allocations(Trixi.rhs!, semi, sol, 1000) + @test_allocations(Trixi.rhs!, semi, sol, 75_000) end @trixi_testset "elixir_advection_basic_gpu.jl Float32" begin @@ -40,7 +40,7 @@ end real_type=Float32) # Ensure that we do not have excessive memory allocations # (e.g., from type instabilities) - @test_allocations(Trixi.rhs!, semi, sol, 1000) + @test_allocations(Trixi.rhs!, semi, sol, 70_000_000) end end From e63e887d8ee0b4b1ee2ebe122760b372cb616cac Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Wed, 25 Mar 2026 10:48:55 +0100 Subject: [PATCH 138/158] fixup! activate test_allocations for GPU tests --- test/test_cuda_2d.jl | 2 ++ test/test_cuda_3d.jl | 2 ++ test/test_kernelabstractions.jl | 8 ++++++-- 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/test/test_cuda_2d.jl b/test/test_cuda_2d.jl index 85dc7b40178..a9eb2770896 100644 --- a/test/test_cuda_2d.jl +++ b/test/test_cuda_2d.jl @@ -21,6 +21,7 @@ isdir(outdir) && rm(outdir, recursive = true) linf=6.627000273229378e-5) # Ensure that we do not have excessive memory allocations # (e.g., from type instabilities) + semi = ode.p # `semidiscretize` adapts the semi, so we need to obtain it from the ODE problem. @test_allocations(Trixi.rhs!, semi, sol, 1000) @test real(ode.p.solver) == Float64 @test real(ode.p.solver.basis) == Float64 @@ -49,6 +50,7 @@ end storage_type=CuArray) # Ensure that we do not have excessive memory allocations # (e.g., from type instabilities) + semi = ode.p # `semidiscretize` adapts the semi, so we need to obtain it from the ODE problem. @test_allocations(Trixi.rhs!, semi, sol, 1000) @test real(ode.p.solver) == Float32 @test real(ode.p.solver.basis) == Float32 diff --git a/test/test_cuda_3d.jl b/test/test_cuda_3d.jl index 8880f8eb393..b9db5ed1b51 100644 --- a/test/test_cuda_3d.jl +++ b/test/test_cuda_3d.jl @@ -21,6 +21,7 @@ isdir(outdir) && rm(outdir, recursive = true) linf=[0.0014537194925779984]) # Ensure that we do not have excessive memory allocations # (e.g., from type instabilities) + semi = ode.p # `semidiscretize` adapts the semi, so we need to obtain it from the ODE problem. @test_allocations(Trixi.rhs!, semi, sol, 1000) @test real(ode.p.solver) == Float64 @test real(ode.p.solver.basis) == Float64 @@ -49,6 +50,7 @@ end storage_type=CuArray) # Ensure that we do not have excessive memory allocations # (e.g., from type instabilities) + semi = ode.p # `semidiscretize` adapts the semi, so we need to obtain it from the ODE problem. @test_allocations(Trixi.rhs!, semi, sol, 1000) @test real(ode.p.solver) == Float32 @test real(ode.p.solver.basis) == Float32 diff --git a/test/test_kernelabstractions.jl b/test/test_kernelabstractions.jl index 6b59b58916a..bcc6f2a77b7 100644 --- a/test/test_kernelabstractions.jl +++ b/test/test_kernelabstractions.jl @@ -27,7 +27,8 @@ end linf=6.627000273229378e-5) # Ensure that we do not have excessive memory allocations # (e.g., from type instabilities) - @test_allocations(Trixi.rhs!, semi, sol, 75_000) + semi = ode.p # `semidiscretize` adapts the semi, so we need to obtain it from the ODE problem. + @test_allocations(Trixi.rhs!, ode.p, sol, 1000) end @trixi_testset "elixir_advection_basic_gpu.jl Float32" begin @@ -40,7 +41,8 @@ end real_type=Float32) # Ensure that we do not have excessive memory allocations # (e.g., from type instabilities) - @test_allocations(Trixi.rhs!, semi, sol, 70_000_000) + semi = ode.p # `semidiscretize` adapts the semi, so we need to obtain it from the ODE problem. + @test_allocations(Trixi.rhs!, ode.p, sol, 1000) end end @@ -55,6 +57,7 @@ end linf=[0.0014537194925779984]) # Ensure that we do not have excessive memory allocations # (e.g., from type instabilities) + semi = ode.p # `semidiscretize` adapts the semi, so we need to obtain it from the ODE problem. @test_allocations(Trixi.rhs!, semi, sol, 1000) end @@ -68,6 +71,7 @@ end real_type=Float32) # Ensure that we do not have excessive memory allocations # (e.g., from type instabilities) + semi = ode.p # `semidiscretize` adapts the semi, so we need to obtain it from the ODE problem. @test_allocations(Trixi.rhs!, semi, sol, 1000) end end From 7fc5dfe44bc379f91a70194aeb92cdf5aee3fb06 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Wed, 25 Mar 2026 10:59:52 +0100 Subject: [PATCH 139/158] fixup! fixup! activate test_allocations for GPU tests --- test/test_kernelabstractions.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/test_kernelabstractions.jl b/test/test_kernelabstractions.jl index bcc6f2a77b7..f1d550890e6 100644 --- a/test/test_kernelabstractions.jl +++ b/test/test_kernelabstractions.jl @@ -28,7 +28,7 @@ end # Ensure that we do not have excessive memory allocations # (e.g., from type instabilities) semi = ode.p # `semidiscretize` adapts the semi, so we need to obtain it from the ODE problem. - @test_allocations(Trixi.rhs!, ode.p, sol, 1000) + @test_allocations(Trixi.rhs!, ode.p, sol, 75_000) end @trixi_testset "elixir_advection_basic_gpu.jl Float32" begin @@ -42,7 +42,7 @@ end # Ensure that we do not have excessive memory allocations # (e.g., from type instabilities) semi = ode.p # `semidiscretize` adapts the semi, so we need to obtain it from the ODE problem. - @test_allocations(Trixi.rhs!, ode.p, sol, 1000) + @test_allocations(Trixi.rhs!, ode.p, sol, 60_000) end end From 984c4026144cbc7a3f40869cd8bb5f2c4baae4c6 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Wed, 25 Mar 2026 11:02:15 +0100 Subject: [PATCH 140/158] fixup! fixup! fixup! activate test_allocations for GPU tests --- test/test_cuda_2d.jl | 2 +- test/test_cuda_3d.jl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/test/test_cuda_2d.jl b/test/test_cuda_2d.jl index a9eb2770896..85fcb75139f 100644 --- a/test/test_cuda_2d.jl +++ b/test/test_cuda_2d.jl @@ -51,7 +51,7 @@ end # Ensure that we do not have excessive memory allocations # (e.g., from type instabilities) semi = ode.p # `semidiscretize` adapts the semi, so we need to obtain it from the ODE problem. - @test_allocations(Trixi.rhs!, semi, sol, 1000) + @test_allocations(Trixi.rhs!, semi, sol, 700_000) @test real(ode.p.solver) == Float32 @test real(ode.p.solver.basis) == Float32 @test real(ode.p.solver.mortar) == Float32 diff --git a/test/test_cuda_3d.jl b/test/test_cuda_3d.jl index b9db5ed1b51..6c590332555 100644 --- a/test/test_cuda_3d.jl +++ b/test/test_cuda_3d.jl @@ -51,7 +51,7 @@ end # Ensure that we do not have excessive memory allocations # (e.g., from type instabilities) semi = ode.p # `semidiscretize` adapts the semi, so we need to obtain it from the ODE problem. - @test_allocations(Trixi.rhs!, semi, sol, 1000) + @test_allocations(Trixi.rhs!, semi, sol, 1_700_000) @test real(ode.p.solver) == Float32 @test real(ode.p.solver.basis) == Float32 @test real(ode.p.solver.mortar) == Float32 From e279fc18071a870e66a8390778ba525292cbc2bd Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Wed, 25 Mar 2026 11:16:03 +0100 Subject: [PATCH 141/158] fixup! --- test/test_kernelabstractions.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/test_kernelabstractions.jl b/test/test_kernelabstractions.jl index f1d550890e6..a1a771ee402 100644 --- a/test/test_kernelabstractions.jl +++ b/test/test_kernelabstractions.jl @@ -58,7 +58,7 @@ end # Ensure that we do not have excessive memory allocations # (e.g., from type instabilities) semi = ode.p # `semidiscretize` adapts the semi, so we need to obtain it from the ODE problem. - @test_allocations(Trixi.rhs!, semi, sol, 1000) + @test_allocations(Trixi.rhs!, semi, sol, 450_000) end @trixi_testset "elixir_advection_basic_gpu.jl Float32" begin @@ -72,7 +72,7 @@ end # Ensure that we do not have excessive memory allocations # (e.g., from type instabilities) semi = ode.p # `semidiscretize` adapts the semi, so we need to obtain it from the ODE problem. - @test_allocations(Trixi.rhs!, semi, sol, 1000) + @test_allocations(Trixi.rhs!, semi, sol, 370_000) end end From d0301fb29e9e05073f17819e8c8b0c9ccb68604e Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Wed, 25 Mar 2026 11:36:25 +0100 Subject: [PATCH 142/158] fix spell check --- NEWS.md | 2 +- examples/p4est_2d_dgsem/elixir_advection_basic_gpu.jl | 2 +- examples/p4est_3d_dgsem/elixir_advection_basic_gpu.jl | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/NEWS.md b/NEWS.md index db4cb61928f..e6abef585b8 100644 --- a/NEWS.md +++ b/NEWS.md @@ -10,7 +10,7 @@ for human readability. #### Added - Introducing GPU support: Based on work by Jan Kraus and Lars Christmann, `Trixi.jl` can - now partly be exectuted on GPUs. This includes simulations on `P4estMesh` in 2D and 3D, + now partly be executed on GPUs. This includes simulations on `P4estMesh` in 2D and 3D, with flux differencing and MPI. Adaptive mesh refinement and callbacks have not been ported, yet. Offloading is achieved via `KernelAbstractions.jl` kernels, which, at the moment, execute the same code as usually run on CPUs. A backend is selected diff --git a/examples/p4est_2d_dgsem/elixir_advection_basic_gpu.jl b/examples/p4est_2d_dgsem/elixir_advection_basic_gpu.jl index a362af748c8..db474e2b624 100644 --- a/examples/p4est_2d_dgsem/elixir_advection_basic_gpu.jl +++ b/examples/p4est_2d_dgsem/elixir_advection_basic_gpu.jl @@ -33,7 +33,7 @@ semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition_convergen # ODE solvers, callbacks etc. # Create ODE problem with time span from 0.0 to 1.0 -# Change `storage_type` to, e.g., `CuArray` to acutally run on GPU +# Change `storage_type` to, e.g., `CuArray` to actually run on GPU ode = semidiscretize(semi, (0.0, 1.0); real_type = nothing, storage_type = nothing) # At the beginning of the main loop, the SummaryCallback prints a summary of the simulation setup diff --git a/examples/p4est_3d_dgsem/elixir_advection_basic_gpu.jl b/examples/p4est_3d_dgsem/elixir_advection_basic_gpu.jl index d1497646a15..e0f8d735e21 100644 --- a/examples/p4est_3d_dgsem/elixir_advection_basic_gpu.jl +++ b/examples/p4est_3d_dgsem/elixir_advection_basic_gpu.jl @@ -32,7 +32,7 @@ semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition_convergen # ODE solvers, callbacks etc. # Create ODE problem with time span from 0.0 to 1.0 -# Change `storage_type` to, e.g., `CuArray` to acutally run on GPU +# Change `storage_type` to, e.g., `CuArray` to actually run on GPU tspan = (0.0, 1.0) ode = semidiscretize(semi, tspan; real_type = nothing, storage_type = nothing) From 63565dc55ddee1608fff0fb9ad3d19e5a7eada6a Mon Sep 17 00:00:00 2001 From: Benedict Geihe Date: Wed, 25 Mar 2026 12:09:02 +0100 Subject: [PATCH 143/158] add @invokelatest --- benchmark/CUDA/run.jl | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/benchmark/CUDA/run.jl b/benchmark/CUDA/run.jl index 70c840722af..ebe509d8d31 100644 --- a/benchmark/CUDA/run.jl +++ b/benchmark/CUDA/run.jl @@ -44,9 +44,10 @@ function main(elixir_path) metrics["rhs! time"] = 1.0e-9 * TimerOutputs.time(timer["rhs!"]) # compute performance index - nrhscalls = Trixi.ncalls(semi.performance_counter) - walltime = 1.0e-9 * take!(semi.performance_counter) - metrics["PID"] = walltime * Trixi.mpi_nranks() / (Trixi.ndofsglobal(semi) * nrhscalls) + latest_semi = @invokelatest (@__MODULE__).semi + nrhscalls = Trixi.ncalls(latest_semi.performance_counter) + walltime = 1.0e-9 * take!(latest_semi.performance_counter) + metrics["PID"] = walltime * Trixi.mpi_nranks() / (Trixi.ndofsglobal(latest_semi) * nrhscalls) # write json file open("metrics.out", "w") do f @@ -67,7 +68,7 @@ function main(elixir_path) run_profiler = true) open("profile_float64.txt", "w") do io - show(io, prof_result) + show(io, @invokelatest (@__MODULE__).prof_result) end println("Running profiler (Float32)...") @@ -79,7 +80,7 @@ function main(elixir_path) run_profiler = true) open("profile_float32.txt", "w") do io - show(io, prof_result) + show(io, @invokelatest (@__MODULE__).prof_result) end end From 90d19e660967b534a12031ddde74f9d66e33cb51 Mon Sep 17 00:00:00 2001 From: Benedict Geihe Date: Wed, 25 Mar 2026 12:09:14 +0100 Subject: [PATCH 144/158] add compat bounds --- benchmark/CUDA/Project.toml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/benchmark/CUDA/Project.toml b/benchmark/CUDA/Project.toml index 22dba338fec..ea0a0f77633 100644 --- a/benchmark/CUDA/Project.toml +++ b/benchmark/CUDA/Project.toml @@ -7,3 +7,10 @@ Trixi = "a7f1ee26-1774-49b1-8366-f1abc58fbfcb" [sources] Trixi = {path = "../.."} + +[compat] +CUDA = "5.8.2" +JSON = "1.4.0" +OrdinaryDiffEqLowStorageRK = "1.12.0" +TimerOutputs = "0.5.25" +Trixi = "0.15.10" From f30dd8d0d1c11e53d182e1640f772ca3345df88a Mon Sep 17 00:00:00 2001 From: Benedict Geihe Date: Wed, 25 Mar 2026 12:09:34 +0100 Subject: [PATCH 145/158] remove finalize --- benchmark/CUDA/elixir_euler_taylor_green_vortex.jl | 2 -- 1 file changed, 2 deletions(-) diff --git a/benchmark/CUDA/elixir_euler_taylor_green_vortex.jl b/benchmark/CUDA/elixir_euler_taylor_green_vortex.jl index d183c0e1770..b8b1084932b 100644 --- a/benchmark/CUDA/elixir_euler_taylor_green_vortex.jl +++ b/benchmark/CUDA/elixir_euler_taylor_green_vortex.jl @@ -74,5 +74,3 @@ else solve!(integrator) prof_result = nothing end - -finalize(mesh) From b000ffbd1bff4d44207df3b1218e2f9e4c1a7d93 Mon Sep 17 00:00:00 2001 From: Benedict Geihe Date: Wed, 25 Mar 2026 12:11:40 +0100 Subject: [PATCH 146/158] remove kernels for backward compatibilty this breaks downstream tests! --- src/solvers/dgsem/calc_volume_integral.jl | 10 ---------- src/solvers/dgsem_p4est/dg_2d.jl | 19 ------------------- src/solvers/dgsem_structured/dg_2d.jl | 15 --------------- 3 files changed, 44 deletions(-) diff --git a/src/solvers/dgsem/calc_volume_integral.jl b/src/solvers/dgsem/calc_volume_integral.jl index 0f286d90ec3..d7017a3ceb4 100644 --- a/src/solvers/dgsem/calc_volume_integral.jl +++ b/src/solvers/dgsem/calc_volume_integral.jl @@ -177,16 +177,6 @@ end return nothing end -# DEPRECATED! Remove when TrixiAtmo.jl has been adapted -function calc_volume_integral!(du, u, mesh, have_nonconservative_terms, equations, - volume_integral, dg::DGSEM, cache) - @threaded for element in eachelement(dg, cache) - volume_integral_kernel!(du, u, element, mesh, - have_nonconservative_terms, equations, - volume_integral, dg, cache) - end -end - function calc_volume_integral!(backend::Nothing, du, u, mesh, have_nonconservative_terms, equations, volume_integral, dg::DGSEM, cache) diff --git a/src/solvers/dgsem_p4est/dg_2d.jl b/src/solvers/dgsem_p4est/dg_2d.jl index 64578ce5f66..ab4dabee35e 100644 --- a/src/solvers/dgsem_p4est/dg_2d.jl +++ b/src/solvers/dgsem_p4est/dg_2d.jl @@ -62,11 +62,6 @@ end end end -# DEPRECATED! Remove when TrixiAtmo.jl has been adapted -function prolong2interfaces!(cache, u, mesh, equations, dg::DG) - prolong2interfaces!(nothing, cache, u, mesh, equations, dg) -end - function prolong2interfaces!(backend::Nothing, cache, u, mesh::Union{P4estMesh{2}, P4estMeshView{2}, T8codeMesh{2}}, @@ -159,15 +154,6 @@ end return nothing end -# DEPRECATED! Remove when TrixiAtmo.jl has been adapted -function calc_interface_flux!(surface_flux_values, - mesh::Union{P4estMesh{2}, P4estMeshView{2}, - T8codeMesh{2}}, have_nonconservative_terms, - equations, surface_integral, dg::DG, cache) - calc_interface_flux!(nothing, surface_flux_values, mesh, have_nonconservative_terms, - equations, surface_integral, dg, cache) -end - function calc_interface_flux!(backend::Nothing, surface_flux_values, mesh::Union{P4estMesh{2}, P4estMeshView{2}, T8codeMesh{2}}, @@ -916,11 +902,6 @@ end return nothing end -# DEPRECATED! Remove when TrixiAtmo.jl has been adapted -function calc_surface_integral!(du, u, mesh, equations, surface_integral, dg, cache) - calc_surface_integral!(nothing, du, u, mesh, equations, surface_integral, dg, cache) -end - function calc_surface_integral!(backend::Nothing, du, u, mesh::Union{P4estMesh{2}, P4estMeshView{2}, T8codeMesh{2}}, diff --git a/src/solvers/dgsem_structured/dg_2d.jl b/src/solvers/dgsem_structured/dg_2d.jl index 33d07e490c6..685395c9739 100644 --- a/src/solvers/dgsem_structured/dg_2d.jl +++ b/src/solvers/dgsem_structured/dg_2d.jl @@ -70,13 +70,6 @@ See also https://github.com/trixi-framework/Trixi.jl/issues/1671#issuecomment-17 return nothing end -# DEPRECATED! Remove when TrixiAtmo.jl has been adapted -function flux_differencing_kernel!(du, u, element, mesh, nonconservative_terms, - equations, volume_flux, dg::DGSEM, cache, alpha) - flux_differencing_kernel!(du, u, element, typeof(mesh), nonconservative_terms, - equations, volume_flux, dg, cache, alpha) -end - @inline function flux_differencing_kernel!(du, u, element, ::Type{<:Union{StructuredMesh{2}, StructuredMeshView{2}, @@ -750,14 +743,6 @@ function calc_boundary_flux!(cache, t, boundary_conditions::NamedTuple, return nothing end -# DEPRECATED! Remove when TrixiAtmo.jl has been adapted -function apply_jacobian!(du, - mesh::Union{StructuredMesh{2}, StructuredMeshView{2}, - UnstructuredMesh2D, P4estMesh{2}, P4estMeshView{2}, - T8codeMesh{2}}, equations, dg::DG, cache) - apply_jacobian!(nothing, du, mesh, equations, dg, cache) -end - function apply_jacobian!(backend::Nothing, du, mesh::Union{StructuredMesh{2}, StructuredMeshView{2}, UnstructuredMesh2D, P4estMesh{2}, P4estMeshView{2}, From ae433244795b9fc116504a86a0b6936c37817d32 Mon Sep 17 00:00:00 2001 From: Benedict Geihe Date: Wed, 25 Mar 2026 12:14:52 +0100 Subject: [PATCH 147/158] no source terms [skip ci] --- NEWS.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/NEWS.md b/NEWS.md index db4cb61928f..11ff3d5b2e2 100644 --- a/NEWS.md +++ b/NEWS.md @@ -11,8 +11,8 @@ for human readability. - Introducing GPU support: Based on work by Jan Kraus and Lars Christmann, `Trixi.jl` can now partly be exectuted on GPUs. This includes simulations on `P4estMesh` in 2D and 3D, - with flux differencing and MPI. Adaptive mesh refinement and callbacks have not been - ported, yet. Offloading is achieved via `KernelAbstractions.jl` kernels, + with flux differencing and MPI. Adaptive mesh refinement, source terms, and callbacks + have not been ported, yet. Offloading is achieved via `KernelAbstractions.jl` kernels, which, at the moment, execute the same code as usually run on CPUs. A backend is selected by passing an appropriate data type as keyword argument `storage_type` to `semidiscretize`. See the From 56d93929a87bf872ddcb4bbb6eebd6151c9795a0 Mon Sep 17 00:00:00 2001 From: Benedict Geihe Date: Wed, 25 Mar 2026 12:43:41 +0100 Subject: [PATCH 148/158] comment on MeshT [skip ci] --- NEWS.md | 6 +++--- docs/src/styleguide.md | 4 ++++ 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/NEWS.md b/NEWS.md index e276dea97f4..807552b1ec8 100644 --- a/NEWS.md +++ b/NEWS.md @@ -10,9 +10,9 @@ for human readability. #### Added - Introducing GPU support: Based on work by Jan Kraus and Lars Christmann, `Trixi.jl` can - now partly be executed on GPUs. This includes simulations on `P4estMesh` in 2D and 3D, - with flux differencing and MPI. Adaptive mesh refinement, source terms, and callbacks - have not been ported, yet. Offloading is achieved via `KernelAbstractions.jl` kernels, + now partly be executed on GPUs. This includes simulations with flux differencing on + `P4estMesh` in 2D and 3D. Adaptive mesh refinement, multi-GPU, source terms, and callbacks + are not available, yet. Offloading is achieved via `KernelAbstractions.jl` kernels, which, at the moment, execute the same code as usually run on CPUs. A backend is selected by passing an appropriate data type as keyword argument `storage_type` to `semidiscretize`. See the diff --git a/docs/src/styleguide.md b/docs/src/styleguide.md index 07f2d90cddc..5fa838e83dc 100644 --- a/docs/src/styleguide.md +++ b/docs/src/styleguide.md @@ -22,6 +22,10 @@ conventions, we apply and enforce automated source code formatting and its siblings, put the `cache` first. * Some internal functions take a "computational backend" argument, this should always be passed as the first argument. * Otherwise, use the order `mesh, equations, solver, cache`. + * In course of GPU offloading we sometimes pass `MeshT = typeof(mesh)` instead of + `mesh` when the called method needs the type of the mesh for dispatch only. This part + of the code is in active development and not considered to be stable API at the + moment. * If something needs to be specified in more detail for dispatch, put the additional argument before the general one that is specified in more detail. For example, we use `have_nonconservative_terms(equations), equations` and `dg.mortar, dg`. From eee2168974eeb6c3b7700ac09bb627b5de104459 Mon Sep 17 00:00:00 2001 From: Benedict Geihe Date: Wed, 25 Mar 2026 13:08:10 +0100 Subject: [PATCH 149/158] fixes [skip ci] --- src/solvers/dgsem_p4est/dg_2d.jl | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/src/solvers/dgsem_p4est/dg_2d.jl b/src/solvers/dgsem_p4est/dg_2d.jl index ab4dabee35e..b2d851745f3 100644 --- a/src/solvers/dgsem_p4est/dg_2d.jl +++ b/src/solvers/dgsem_p4est/dg_2d.jl @@ -245,7 +245,7 @@ end # Initiate the secondary index to be used in the surface for loop. # This index on the primary side will always run forward but - # the secondary index might need to run backwards for flipped sides. + # the secondary index might need to run backwards for flipped sides. if :i_backward in secondary_indices node_secondary = index_end node_secondary_step = -1 @@ -362,12 +362,10 @@ end # Note the factor 0.5 necessary for the nonconservative fluxes based on # the interpretation of global SBP operators coupled discontinuously via # central fluxes/SATs - surface_flux_values[v, primary_node_index, primary_direction_index, primary_element_index] = Float64(flux_[v] + - 0.5f0 * - noncons_primary[v]) - surface_flux_values[v, secondary_node_index, secondary_direction_index, secondary_element_index] = Float64(-(flux_[v] + - 0.5f0 * - noncons_secondary[v])) + surface_flux_values[v, primary_node_index, primary_direction_index, + primary_element_index] = flux_[v] + 0.5f0 * noncons_primary[v] + surface_flux_values[v, secondary_node_index, secondary_direction_index, + secondary_element_index] = -(flux_[v] + 0.5f0 * noncons_secondary[v]) end return nothing From 70b62d5363f162ab41a2d725f486668fe925b85a Mon Sep 17 00:00:00 2001 From: Benedict Geihe Date: Wed, 25 Mar 2026 13:11:15 +0100 Subject: [PATCH 150/158] fmt [skip ci] --- benchmark/CUDA/run.jl | 3 ++- src/solvers/dgsem_structured/dg_2d_compressible_euler.jl | 6 ++++-- src/solvers/dgsem_structured/dg_3d_compressible_euler.jl | 6 ++++-- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/benchmark/CUDA/run.jl b/benchmark/CUDA/run.jl index ebe509d8d31..b9d02246c9b 100644 --- a/benchmark/CUDA/run.jl +++ b/benchmark/CUDA/run.jl @@ -47,7 +47,8 @@ function main(elixir_path) latest_semi = @invokelatest (@__MODULE__).semi nrhscalls = Trixi.ncalls(latest_semi.performance_counter) walltime = 1.0e-9 * take!(latest_semi.performance_counter) - metrics["PID"] = walltime * Trixi.mpi_nranks() / (Trixi.ndofsglobal(latest_semi) * nrhscalls) + metrics["PID"] = walltime * Trixi.mpi_nranks() / + (Trixi.ndofsglobal(latest_semi) * nrhscalls) # write json file open("metrics.out", "w") do f diff --git a/src/solvers/dgsem_structured/dg_2d_compressible_euler.jl b/src/solvers/dgsem_structured/dg_2d_compressible_euler.jl index 268bae7f480..ada75576e2c 100644 --- a/src/solvers/dgsem_structured/dg_2d_compressible_euler.jl +++ b/src/solvers/dgsem_structured/dg_2d_compressible_euler.jl @@ -20,7 +20,8 @@ @inline function flux_differencing_kernel!(_du::PtrArray, u_cons::PtrArray, element, MeshT::Type{<:Union{StructuredMesh{2}, - UnstructuredMesh2D, P4estMesh{2}}}, + UnstructuredMesh2D, + P4estMesh{2}}}, have_nonconservative_terms::False, equations::CompressibleEulerEquations2D, volume_flux::typeof(flux_shima_etal_turbo), @@ -227,7 +228,8 @@ end @inline function flux_differencing_kernel!(_du::PtrArray, u_cons::PtrArray, element, MeshT::Type{<:Union{StructuredMesh{2}, - UnstructuredMesh2D, P4estMesh{2}}}, + UnstructuredMesh2D, + P4estMesh{2}}}, have_nonconservative_terms::False, equations::CompressibleEulerEquations2D, volume_flux::typeof(flux_ranocha_turbo), diff --git a/src/solvers/dgsem_structured/dg_3d_compressible_euler.jl b/src/solvers/dgsem_structured/dg_3d_compressible_euler.jl index 79008f7a552..f97e60a3077 100644 --- a/src/solvers/dgsem_structured/dg_3d_compressible_euler.jl +++ b/src/solvers/dgsem_structured/dg_3d_compressible_euler.jl @@ -19,7 +19,8 @@ # works efficiently here. @inline function flux_differencing_kernel!(_du::PtrArray, u_cons::PtrArray, element, - MeshT::Type{<:Union{StructuredMesh{3}, P4estMesh{3}}}, + MeshT::Type{<:Union{StructuredMesh{3}, + P4estMesh{3}}}, have_nonconservative_terms::False, equations::CompressibleEulerEquations3D, volume_flux::typeof(flux_shima_etal_turbo), @@ -351,7 +352,8 @@ end @inline function flux_differencing_kernel!(_du::PtrArray, u_cons::PtrArray, element, - MeshT::Type{<:Union{StructuredMesh{3}, P4estMesh{3}}}, + MeshT::Type{<:Union{StructuredMesh{3}, + P4estMesh{3}}}, have_nonconservative_terms::False, equations::CompressibleEulerEquations3D, volume_flux::typeof(flux_ranocha_turbo), From 7a27196c378b57af2aca1a9412995e6b1ddc1c0f Mon Sep 17 00:00:00 2001 From: Benedict Geihe Date: Wed, 25 Mar 2026 20:57:10 +0100 Subject: [PATCH 151/158] try different formatting --- src/solvers/dgsem_p4est/dg_2d.jl | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/solvers/dgsem_p4est/dg_2d.jl b/src/solvers/dgsem_p4est/dg_2d.jl index b2d851745f3..037a165738e 100644 --- a/src/solvers/dgsem_p4est/dg_2d.jl +++ b/src/solvers/dgsem_p4est/dg_2d.jl @@ -362,10 +362,12 @@ end # Note the factor 0.5 necessary for the nonconservative fluxes based on # the interpretation of global SBP operators coupled discontinuously via # central fluxes/SATs - surface_flux_values[v, primary_node_index, primary_direction_index, - primary_element_index] = flux_[v] + 0.5f0 * noncons_primary[v] - surface_flux_values[v, secondary_node_index, secondary_direction_index, - secondary_element_index] = -(flux_[v] + 0.5f0 * noncons_secondary[v]) + surface_flux_values[v, primary_node_index, primary_direction_index, primary_element_index] = flux_[v] + + 0.5f0 * + noncons_primary[v] + surface_flux_values[v, secondary_node_index, secondary_direction_index, secondary_element_index] = -(flux_[v] + + 0.5f0 * + noncons_secondary[v]) end return nothing From fb7d10b8b6e5a195869d2a3ec7f63d9d63efd7d2 Mon Sep 17 00:00:00 2001 From: Benedict Geihe Date: Wed, 25 Mar 2026 22:37:19 +0100 Subject: [PATCH 152/158] fix --- src/solvers/dgsem_structured/dg_2d_compressible_euler.jl | 8 ++++---- src/solvers/dgsem_structured/dg_3d_compressible_euler.jl | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/solvers/dgsem_structured/dg_2d_compressible_euler.jl b/src/solvers/dgsem_structured/dg_2d_compressible_euler.jl index ada75576e2c..77b5bc51a09 100644 --- a/src/solvers/dgsem_structured/dg_2d_compressible_euler.jl +++ b/src/solvers/dgsem_structured/dg_2d_compressible_euler.jl @@ -83,7 +83,7 @@ contravariant_vectors_x = StrideArray{eltype(contravariant_vectors)}(undef, (StaticInt(nnodes(dg)), StaticInt(nnodes(dg)), - StaticInt(ndims))) + StaticInt(ndims(MeshT)))) @turbo for j in eachnode(dg), i in eachnode(dg) contravariant_vectors_x[j, i, 1] = contravariant_vectors[1, 1, i, j, element] @@ -156,7 +156,7 @@ contravariant_vectors_y = StrideArray{eltype(contravariant_vectors)}(undef, (StaticInt(nnodes(dg)), StaticInt(nnodes(dg)), - StaticInt(ndims))) + StaticInt(ndims(MeshT)))) @turbo for j in eachnode(dg), i in eachnode(dg) contravariant_vectors_y[i, j, 1] = contravariant_vectors[1, 2, i, j, element] @@ -296,7 +296,7 @@ end contravariant_vectors_x = StrideArray{eltype(contravariant_vectors)}(undef, (StaticInt(nnodes(dg)), StaticInt(nnodes(dg)), - StaticInt(ndims))) + StaticInt(ndims(MeshT)))) @turbo for j in eachnode(dg), i in eachnode(dg) contravariant_vectors_x[j, i, 1] = contravariant_vectors[1, 1, i, j, element] @@ -402,7 +402,7 @@ end contravariant_vectors_y = StrideArray{eltype(contravariant_vectors)}(undef, (StaticInt(nnodes(dg)), StaticInt(nnodes(dg)), - StaticInt(ndims))) + StaticInt(ndims(MeshT)))) @turbo for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) contravariant_vectors_y[i, j, 1] = contravariant_vectors[1, 2, i, j, element] diff --git a/src/solvers/dgsem_structured/dg_3d_compressible_euler.jl b/src/solvers/dgsem_structured/dg_3d_compressible_euler.jl index f97e60a3077..6cf9fc5673c 100644 --- a/src/solvers/dgsem_structured/dg_3d_compressible_euler.jl +++ b/src/solvers/dgsem_structured/dg_3d_compressible_euler.jl @@ -89,7 +89,7 @@ contravariant_vectors_x = StrideArray{eltype(contravariant_vectors)}(undef, (StaticInt(nnodes(dg)^2), StaticInt(nnodes(dg)), - StaticInt(ndims))) + StaticInt(ndims(MeshT)))) @turbo for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) jk = j + nnodes(dg) * (k - 1) @@ -427,7 +427,7 @@ end contravariant_vectors_x = StrideArray{eltype(contravariant_vectors)}(undef, (StaticInt(nnodes(dg)^2), StaticInt(nnodes(dg)), - StaticInt(ndims))) + StaticInt(ndims(MeshT)))) @turbo for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) jk = j + nnodes(dg) * (k - 1) @@ -548,7 +548,7 @@ end (StaticInt(nnodes(dg)), StaticInt(nnodes(dg)), StaticInt(nnodes(dg)), - StaticInt(ndims))) + StaticInt(ndims(MeshT)))) @turbo for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) contravariant_vectors_y[i, j, k, 1] = contravariant_vectors[1, 2, i, j, k, element] @@ -669,7 +669,7 @@ end contravariant_vectors_z = StrideArray{eltype(contravariant_vectors)}(undef, (StaticInt(nnodes(dg)^2), StaticInt(nnodes(dg)), - StaticInt(ndims))) + StaticInt(ndims(MeshT)))) @turbo for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) ij = i + nnodes(dg) * (j - 1) From ef194450f4ed039e1b01890c9ad4a0ecc9c8380d Mon Sep 17 00:00:00 2001 From: Benedict Geihe Date: Thu, 26 Mar 2026 07:37:07 +0100 Subject: [PATCH 153/158] missed ndims [skip ci] --- src/solvers/dgsem_structured/dg_3d_compressible_euler.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/solvers/dgsem_structured/dg_3d_compressible_euler.jl b/src/solvers/dgsem_structured/dg_3d_compressible_euler.jl index 6cf9fc5673c..2022eb9f3e6 100644 --- a/src/solvers/dgsem_structured/dg_3d_compressible_euler.jl +++ b/src/solvers/dgsem_structured/dg_3d_compressible_euler.jl @@ -177,7 +177,7 @@ (StaticInt(nnodes(dg)), StaticInt(nnodes(dg)), StaticInt(nnodes(dg)), - StaticInt(ndims))) + StaticInt(ndims(MeshT)))) @turbo for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) contravariant_vectors_y[i, j, k, 1] = contravariant_vectors[1, 2, i, j, k, element] @@ -265,7 +265,7 @@ contravariant_vectors_z = StrideArray{eltype(contravariant_vectors)}(undef, (StaticInt(nnodes(dg)^2), StaticInt(nnodes(dg)), - StaticInt(ndims))) + StaticInt(ndims(MeshT)))) @turbo for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) ij = i + nnodes(dg) * (j - 1) From c8431d2992b59b9b1759aad2c8f9cd54182536b4 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Thu, 26 Mar 2026 04:27:51 -0500 Subject: [PATCH 154/158] Add GPU parallel set_zero! --- src/solvers/solvers.jl | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/solvers/solvers.jl b/src/solvers/solvers.jl index 5e242bb84bd..0716477be78 100644 --- a/src/solvers/solvers.jl +++ b/src/solvers/solvers.jl @@ -5,8 +5,14 @@ @muladd begin #! format: noindent -# Used by both `dg::DGSEM` and `dg::FDSBP` function set_zero!(du, dg, cache) + set_zero!(trixi_backend(du), du, dg, cache) + + return nothing +end + +# Used by both `dg::DGSEM` and `dg::FDSBP` +function set_zero!(::Nothing, du, dg, cache) # du .= zero(eltype(du)) doesn't scale when using multiple threads. # See https://github.com/trixi-framework/Trixi.jl/pull/924 for a performance comparison. @threaded for element in eachelement(dg, cache) @@ -16,6 +22,12 @@ function set_zero!(du, dg, cache) return nothing end +function set_zero!(::Backend, du, dg, cache) + # Broadcasting is parallel on the GPU + du .= zero(eltype(du)) + return nothing +end + # define types for parabolic solvers include("solvers_parabolic.jl") From 4dc17be09bed741fd6a8ede7e02906b81ff96990 Mon Sep 17 00:00:00 2001 From: Hendrik Ranocha Date: Thu, 26 Mar 2026 15:24:47 +0100 Subject: [PATCH 155/158] set version to v0.16.0-DEV --- NEWS.md | 13 +++++++++---- Project.toml | 2 +- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/NEWS.md b/NEWS.md index 807552b1ec8..efad76ddaea 100644 --- a/NEWS.md +++ b/NEWS.md @@ -5,14 +5,14 @@ Trixi.jl follows the interpretation of used in the Julia ecosystem. Notable changes will be documented in this file for human readability. -## Changes in the v0.15 lifecycle +## Changes when updating to v0.16 from v0.15 #### Added -- Introducing GPU support: Based on work by Jan Kraus and Lars Christmann, `Trixi.jl` can +- Introducing GPU support: Based on work by Jan Kraus and Lars Christmann, Trixi.jl can now partly be executed on GPUs. This includes simulations with flux differencing on `P4estMesh` in 2D and 3D. Adaptive mesh refinement, multi-GPU, source terms, and callbacks - are not available, yet. Offloading is achieved via `KernelAbstractions.jl` kernels, + are not available, yet. Offloading is achieved via KernelAbstractions.jl kernels, which, at the moment, execute the same code as usually run on CPUs. A backend is selected by passing an appropriate data type as keyword argument `storage_type` to `semidiscretize`. See the @@ -20,7 +20,12 @@ for human readability. section for some instructions on how to port kernels. This is however still preliminaray and will change. GPU kernels are currently CI-tested on NVIDIA GPUs in a buildkite workflow using - `TRIXI_TEST=CUDA` + `TRIXI_TEST=CUDA` ([#2590]). + +## Changes in the v0.15 lifecycle + +#### Added + - It is now possible to use `ViscousFormulationLocalDG()` as the `solver_parabolic` for non-conforming `P4estMesh`es. This is useful for (locally) diffusion-dominated problems. This enables in particular adaptive mesh refinement for that solver-mesh combination ([#2712]). diff --git a/Project.toml b/Project.toml index aa94be549e2..88c58915f3b 100644 --- a/Project.toml +++ b/Project.toml @@ -1,6 +1,6 @@ name = "Trixi" uuid = "a7f1ee26-1774-49b1-8366-f1abc58fbfcb" -version = "0.15.10-DEV" +version = "0.16.0-DEV" authors = ["Michael Schlottke-Lakemper ", "Gregor Gassner ", "Hendrik Ranocha ", "Andrew R. Winters ", "Jesse Chan ", "Andrés Rueda-Ramírez "] [deps] From cb3f7f968afecd47acffde77488562f6b2643fd2 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Thu, 26 Mar 2026 16:50:15 +0100 Subject: [PATCH 156/158] Apply suggestions from code review Co-authored-by: Hendrik Ranocha --- benchmark/CUDA/Project.toml | 2 +- src/solvers/dgsem_tree/dg_2d_compressible_euler.jl | 14 ++++++-------- src/solvers/dgsem_tree/dg_3d_compressible_euler.jl | 14 ++++++-------- 3 files changed, 13 insertions(+), 17 deletions(-) diff --git a/benchmark/CUDA/Project.toml b/benchmark/CUDA/Project.toml index ea0a0f77633..2e9f130fe6c 100644 --- a/benchmark/CUDA/Project.toml +++ b/benchmark/CUDA/Project.toml @@ -13,4 +13,4 @@ CUDA = "5.8.2" JSON = "1.4.0" OrdinaryDiffEqLowStorageRK = "1.12.0" TimerOutputs = "0.5.25" -Trixi = "0.15.10" +Trixi = "0.16" diff --git a/src/solvers/dgsem_tree/dg_2d_compressible_euler.jl b/src/solvers/dgsem_tree/dg_2d_compressible_euler.jl index efcb7cc6794..507b48b20ea 100644 --- a/src/solvers/dgsem_tree/dg_2d_compressible_euler.jl +++ b/src/solvers/dgsem_tree/dg_2d_compressible_euler.jl @@ -65,25 +65,24 @@ end # muladd # if LoopVectorization.jl can handle the array types. This ensures that `@turbo` # works efficiently here. @inline function flux_differencing_kernel!(_du::PtrArray, u_cons::PtrArray, - element, ::Type{<:TreeMesh{2}}, + element, MeshT::Type{<:TreeMesh{2}}, have_nonconservative_terms::False, equations::CompressibleEulerEquations2D, volume_flux::typeof(flux_shima_etal_turbo), dg::DGSEM, cache, alpha) @unpack derivative_split = dg.basis - ndims_mesh = 2 # Create a temporary array that will be used to store the RHS with permuted # indices `[i, j, v]` to allow using SIMD instructions. # `StrideArray`s with purely static dimensions do not allocate on the heap. du = StrideArray{eltype(u_cons)}(undef, - (ntuple(_ -> StaticInt(nnodes(dg)), ndims_mesh)..., + (ntuple(_ -> StaticInt(nnodes(dg)), ndims(MeshT))..., StaticInt(nvariables(equations)))) # Convert conserved to primitive variables on the given `element`. u_prim = StrideArray{eltype(u_cons)}(undef, (ntuple(_ -> StaticInt(nnodes(dg)), - ndims_mesh)..., + ndims(MeshT))..., StaticInt(nvariables(equations)))) @turbo for j in eachnode(dg), i in eachnode(dg) @@ -228,19 +227,18 @@ end # muladd end @inline function flux_differencing_kernel!(_du::PtrArray, u_cons::PtrArray, - element, ::Type{<:TreeMesh{2}}, + element, MeshT::Type{<:TreeMesh{2}}, have_nonconservative_terms::False, equations::CompressibleEulerEquations2D, volume_flux::typeof(flux_ranocha_turbo), dg::DGSEM, cache, alpha) @unpack derivative_split = dg.basis - ndims_mesh = 2 # Create a temporary array that will be used to store the RHS with permuted # indices `[i, j, v]` to allow using SIMD instructions. # `StrideArray`s with purely static dimensions do not allocate on the heap. du = StrideArray{eltype(u_cons)}(undef, - (ntuple(_ -> StaticInt(nnodes(dg)), ndims_mesh)..., + (ntuple(_ -> StaticInt(nnodes(dg)), ndims(MeshT))..., StaticInt(nvariables(equations)))) # Convert conserved to primitive variables on the given `element`. In addition @@ -249,7 +247,7 @@ end # values. u_prim = StrideArray{eltype(u_cons)}(undef, (ntuple(_ -> StaticInt(nnodes(dg)), - ndims_mesh)..., + ndims(MeshT))..., StaticInt(nvariables(equations) + 2))) # We also compute "+ 2" logs @turbo for j in eachnode(dg), i in eachnode(dg) diff --git a/src/solvers/dgsem_tree/dg_3d_compressible_euler.jl b/src/solvers/dgsem_tree/dg_3d_compressible_euler.jl index f1d2573dc79..1cdf1ca07e6 100644 --- a/src/solvers/dgsem_tree/dg_3d_compressible_euler.jl +++ b/src/solvers/dgsem_tree/dg_3d_compressible_euler.jl @@ -17,25 +17,24 @@ # if LoopVectorization.jl can handle the array types. This ensures that `@turbo` # works efficiently here. @inline function flux_differencing_kernel!(_du::PtrArray, u_cons::PtrArray, - element, ::Type{<:TreeMesh{3}}, + element, MeshT::Type{<:TreeMesh{3}}, have_nonconservative_terms::False, equations::CompressibleEulerEquations3D, volume_flux::typeof(flux_shima_etal_turbo), dg::DGSEM, cache, alpha) @unpack derivative_split = dg.basis - ndims_mesh = 3 # Create a temporary array that will be used to store the RHS with permuted # indices `[i, j, k, v]` to allow using SIMD instructions. # `StrideArray`s with purely static dimensions do not allocate on the heap. du = StrideArray{eltype(u_cons)}(undef, - (ntuple(_ -> StaticInt(nnodes(dg)), ndims_mesh)..., + (ntuple(_ -> StaticInt(nnodes(dg)), ndims(MeshT))..., StaticInt(nvariables(equations)))) # Convert conserved to primitive variables on the given `element`. u_prim = StrideArray{eltype(u_cons)}(undef, (ntuple(_ -> StaticInt(nnodes(dg)), - ndims_mesh)..., + ndims(MeshT))..., StaticInt(nvariables(equations)))) @turbo for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) @@ -264,19 +263,18 @@ end @inline function flux_differencing_kernel!(_du::PtrArray, u_cons::PtrArray, - element, ::Type{<:TreeMesh{3}}, + element, MeshT::Type{<:TreeMesh{3}}, have_nonconservative_terms::False, equations::CompressibleEulerEquations3D, volume_flux::typeof(flux_ranocha_turbo), dg::DGSEM, cache, alpha) @unpack derivative_split = dg.basis - ndims_mesh = 3 # Create a temporary array that will be used to store the RHS with permuted # indices `[i, j, k, v]` to allow using SIMD instructions. # `StrideArray`s with purely static dimensions do not allocate on the heap. du = StrideArray{eltype(u_cons)}(undef, - (ntuple(_ -> StaticInt(nnodes(dg)), ndims_mesh)..., + (ntuple(_ -> StaticInt(nnodes(dg)), ndims(MeshT))..., StaticInt(nvariables(equations)))) # Convert conserved to primitive variables on the given `element`. In addition @@ -285,7 +283,7 @@ end # values. u_prim = StrideArray{eltype(u_cons)}(undef, (ntuple(_ -> StaticInt(nnodes(dg)), - ndims_mesh)..., + ndims(MeshT))..., StaticInt(nvariables(equations) + 2))) # We also compute "+ 2" logs @turbo for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) From e98fcf703d90875682b92e08381c92acdbc4c1f0 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Thu, 26 Mar 2026 16:53:17 +0100 Subject: [PATCH 157/158] use dispatch for indices2direction --- src/solvers/dgsem_p4est/containers.jl | 4 ++-- src/solvers/dgsem_p4est/dg_2d.jl | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/solvers/dgsem_p4est/containers.jl b/src/solvers/dgsem_p4est/containers.jl index 3f86fff2bb9..2dd360b50cc 100644 --- a/src/solvers/dgsem_p4est/containers.jl +++ b/src/solvers/dgsem_p4est/containers.jl @@ -920,7 +920,7 @@ function count_required_surfaces(mesh::P4estMesh) end # Return direction of the face, which is indexed by node_indices -@inline function indices2direction(indices) +@inline function indices2direction(indices::NTuple{3, Symbol}) if indices[1] === :begin return 1 elseif indices[1] === :end @@ -936,7 +936,7 @@ end end end -@inline function indices2direction2d(indices) +@inline function indices2direction(indices::NTuple{2, Symbol}) if indices[1] === :begin return 1 elseif indices[1] === :end diff --git a/src/solvers/dgsem_p4est/dg_2d.jl b/src/solvers/dgsem_p4est/dg_2d.jl index 037a165738e..94c477ca8e6 100644 --- a/src/solvers/dgsem_p4est/dg_2d.jl +++ b/src/solvers/dgsem_p4est/dg_2d.jl @@ -227,7 +227,7 @@ end # Get element and side index information on the primary element primary_element = neighbor_ids[1, interface] primary_indices = node_indices[1, interface] - primary_direction = indices2direction2d(primary_indices) + primary_direction = indices2direction(primary_indices) # Create the local i,j indexing on the primary element used to pull normal direction information i_primary_start, i_primary_step = index_to_start_step_2d(primary_indices[1], @@ -241,7 +241,7 @@ end # Get element and side index information on the secondary element secondary_element = neighbor_ids[2, interface] secondary_indices = node_indices[2, interface] - secondary_direction = indices2direction2d(secondary_indices) + secondary_direction = indices2direction(secondary_indices) # Initiate the secondary index to be used in the surface for loop. # This index on the primary side will always run forward but From cb56c8aa49d4ccd9cc5c45eb4ae06d4808149a06 Mon Sep 17 00:00:00 2001 From: Hendrik Ranocha Date: Thu, 26 Mar 2026 17:47:18 +0100 Subject: [PATCH 158/158] Apply suggestion from @ranocha --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 8d97608b2e2..24c7e28af6f 100644 --- a/NEWS.md +++ b/NEWS.md @@ -5,7 +5,7 @@ Trixi.jl follows the interpretation of used in the Julia ecosystem. Notable changes will be documented in this file for human readability. -## Changes when updating to v0.16 from v0.15 +## Changes when updating to v0.16 from v0.15.x #### Added