diff --git a/common/cuda_hip/base/index_set_kernels.cpp b/common/cuda_hip/base/index_set_kernels.cpp index 0a47752d17e..bcc94bb2d57 100644 --- a/common/cuda_hip/base/index_set_kernels.cpp +++ b/common/cuda_hip/base/index_set_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -32,10 +32,10 @@ GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE( template void populate_subsets(std::shared_ptr exec, const IndexType index_space_size, - const array* indices, - array* subset_begin, - array* subset_end, - array* superset_indices, + const array& indices, + array& subset_begin, + array& subset_end, + array& superset_indices, const bool is_sorted) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_INDEX_SET_POPULATE_KERNEL); diff --git a/common/cuda_hip/distributed/vector_kernels.cpp b/common/cuda_hip/distributed/vector_kernels.cpp index b9c55990fb5..0d579a64581 100644 --- a/common/cuda_hip/distributed/vector_kernels.cpp +++ b/common/cuda_hip/distributed/vector_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -28,7 +28,7 @@ void build_local( const device_matrix_data& input, const experimental::distributed::Partition* partition, - comm_index_type local_part, matrix::Dense* local_mtx) + comm_index_type local_part, matrix::view::dense local_mtx) { const auto* range_bounds = partition->get_range_bounds(); const auto* range_starting_indices = @@ -62,7 +62,7 @@ void build_local( range_id.get_data())), map_to_local_row); - auto stride = local_mtx->get_stride(); + auto stride = local_mtx.stride; auto map_to_flat_idx = [stride] __host__ __device__( const thrust::tuple& row_col) { @@ -80,7 +80,7 @@ void build_local( thrust::scatter_if( thrust_policy(exec), input.get_const_values(), input.get_const_values() + input.get_num_stored_elements(), flat_idx_it, - range_id.get_data(), local_mtx->get_values(), is_local_row); + range_id.get_data(), local_mtx.values, is_local_row); } GKO_INSTANTIATE_FOR_EACH_VALUE_AND_LOCAL_GLOBAL_INDEX_TYPE( diff --git a/common/cuda_hip/matrix/coo_kernels.cpp b/common/cuda_hip/matrix/coo_kernels.cpp index fca15e724ae..b74cef17c37 100644 --- a/common/cuda_hip/matrix/coo_kernels.cpp +++ b/common/cuda_hip/matrix/coo_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -232,7 +232,8 @@ __global__ __launch_bounds__(spmv_block_size) void abstract_spmm( template void spmv(std::shared_ptr exec, const matrix::Coo* a, - const matrix::Dense* b, matrix::Dense* c) + matrix::view::dense b, + matrix::view::dense c) { dense::fill(exec, c, zero()); spmv2(exec, a, b, c); @@ -243,11 +244,11 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_COO_SPMV_KERNEL); template void advanced_spmv(std::shared_ptr exec, - const matrix::Dense* alpha, + matrix::view::dense alpha, const matrix::Coo* a, - const matrix::Dense* b, - const matrix::Dense* beta, - matrix::Dense* c) + matrix::view::dense b, + matrix::view::dense beta, + matrix::view::dense c) { dense::scale(exec, beta, c); advanced_spmv2(exec, alpha, a, b, c); @@ -260,10 +261,11 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void spmv2(std::shared_ptr exec, const matrix::Coo* a, - const matrix::Dense* b, matrix::Dense* c) + matrix::view::dense b, + matrix::view::dense c) { const auto nnz = a->get_num_stored_elements(); - const auto b_ncols = b->get_size()[1]; + const auto b_ncols = b.size[1]; const dim3 coo_block(config::warp_size, warps_in_block, 1); const auto nwarps = host_kernel::calculate_nwarps(exec, nnz); @@ -296,8 +298,8 @@ void spmv2(std::shared_ptr exec, nnz, num_lines, as_device_type(a->get_const_values()), a->get_const_col_idxs(), as_device_type(a->get_const_row_idxs()), - as_device_type(b->get_const_values()), b->get_stride(), - as_device_type(c->get_values()), c->get_stride()); + as_device_type(b.values), b.stride, as_device_type(c.values), + c.stride); } else { int num_elems = ceildiv(nnz, nwarps * config::warp_size) * config::warp_size; @@ -308,8 +310,8 @@ void spmv2(std::shared_ptr exec, nnz, num_elems, as_device_type(a->get_const_values()), a->get_const_col_idxs(), as_device_type(a->get_const_row_idxs()), b_ncols, - as_device_type(b->get_const_values()), b->get_stride(), - as_device_type(c->get_values()), c->get_stride()); + as_device_type(b.values), b.stride, as_device_type(c.values), + c.stride); } } } @@ -319,15 +321,15 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_COO_SPMV2_KERNEL); template void advanced_spmv2(std::shared_ptr exec, - const matrix::Dense* alpha, + matrix::view::dense alpha, const matrix::Coo* a, - const matrix::Dense* b, - matrix::Dense* c) + matrix::view::dense b, + matrix::view::dense c) { const auto nnz = a->get_num_stored_elements(); const auto nwarps = host_kernel::calculate_nwarps(exec, nnz); const dim3 coo_block(config::warp_size, warps_in_block, 1); - const auto b_ncols = b->get_size()[1]; + const auto b_ncols = b.size[1]; if (nwarps <= 0 || b_ncols <= 0) { return; @@ -355,11 +357,11 @@ void advanced_spmv2(std::shared_ptr exec, const dim3 coo_grid(ceildiv(nwarps, warps_in_block), b_ncols); abstract_spmv<<get_stream()>>>( - nnz, num_lines, as_device_type(alpha->get_const_values()), + nnz, num_lines, as_device_type(alpha.values), as_device_type(a->get_const_values()), a->get_const_col_idxs(), as_device_type(a->get_const_row_idxs()), - as_device_type(b->get_const_values()), b->get_stride(), - as_device_type(c->get_values()), c->get_stride()); + as_device_type(b.values), b.stride, as_device_type(c.values), + c.stride); } else { int num_elems = ceildiv(nnz, nwarps * config::warp_size) * config::warp_size; @@ -367,11 +369,11 @@ void advanced_spmv2(std::shared_ptr exec, ceildiv(b_ncols, config::warp_size)); abstract_spmm<<get_stream()>>>( - nnz, num_elems, as_device_type(alpha->get_const_values()), + nnz, num_elems, as_device_type(alpha.values), as_device_type(a->get_const_values()), a->get_const_col_idxs(), as_device_type(a->get_const_row_idxs()), b_ncols, - as_device_type(b->get_const_values()), b->get_stride(), - as_device_type(c->get_values()), c->get_stride()); + as_device_type(b.values), b.stride, as_device_type(c.values), + c.stride); } } } diff --git a/common/cuda_hip/matrix/csr_kernels.template.cpp b/common/cuda_hip/matrix/csr_kernels.template.cpp index f0017b76718..4772812b9a3 100644 --- a/common/cuda_hip/matrix/csr_kernels.template.cpp +++ b/common/cuda_hip/matrix/csr_kernels.template.cpp @@ -1,10 +1,11 @@ -// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause #include "core/matrix/csr_kernels.hpp" #include +#include #include #include @@ -17,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -1395,9 +1397,9 @@ GKO_ENABLE_IMPLEMENTATION_SELECTION(select_spgeam, spgeam); template void spgeam(std::shared_ptr exec, - const matrix::Dense* alpha, + matrix::view::dense alpha, const matrix::Csr* a, - const matrix::Dense* beta, + matrix::view::dense beta, const matrix::Csr* b, matrix::Csr* c) { @@ -1410,11 +1412,10 @@ void spgeam(std::shared_ptr exec, return compiled_subwarp_size >= nnz_per_row || compiled_subwarp_size == config::warp_size; }, - syn::value_list(), syn::type_list<>(), exec, - alpha->get_const_values(), a->get_const_row_ptrs(), - a->get_const_col_idxs(), a->get_const_values(), - beta->get_const_values(), b->get_const_row_ptrs(), - b->get_const_col_idxs(), b->get_const_values(), c); + syn::value_list(), syn::type_list<>(), exec, alpha.values, + a->get_const_row_ptrs(), a->get_const_col_idxs(), a->get_const_values(), + beta.values, b->get_const_row_ptrs(), b->get_const_col_idxs(), + b->get_const_values(), c); } @@ -1452,9 +1453,9 @@ GKO_ENABLE_IMPLEMENTATION_SELECTION(select_spgeam_numeric, spgeam_numeric); template void spgeam_numeric(std::shared_ptr exec, - const matrix::Dense* alpha, + matrix::view::dense alpha, const matrix::Csr* a, - const matrix::Dense* beta, + matrix::view::dense beta, const matrix::Csr* b, matrix::Csr* c) { @@ -1467,23 +1468,22 @@ void spgeam_numeric(std::shared_ptr exec, return compiled_subwarp_size >= nnz_per_row || compiled_subwarp_size == config::warp_size; }, - syn::value_list(), syn::type_list<>(), exec, - alpha->get_const_values(), a->get_const_row_ptrs(), - a->get_const_col_idxs(), a->get_const_values(), - beta->get_const_values(), b->get_const_row_ptrs(), - b->get_const_col_idxs(), b->get_const_values(), c->get_const_row_ptrs(), - c->get_values(), a->get_size()[0]); + syn::value_list(), syn::type_list<>(), exec, alpha.values, + a->get_const_row_ptrs(), a->get_const_col_idxs(), a->get_const_values(), + beta.values, b->get_const_row_ptrs(), b->get_const_col_idxs(), + b->get_const_values(), c->get_const_row_ptrs(), c->get_values(), + a->get_size()[0]); } template void fill_in_dense(std::shared_ptr exec, const matrix::Csr* source, - matrix::Dense* result) + matrix::view::dense result) { - const auto num_rows = result->get_size()[0]; - const auto num_cols = result->get_size()[1]; - const auto stride = result->get_stride(); + const auto num_rows = result.size[0]; + const auto num_cols = result.size[1]; + const auto stride = result.stride; const auto row_ptrs = source->get_const_row_ptrs(); const auto col_idxs = source->get_const_col_idxs(); const auto vals = source->get_const_values(); @@ -1493,7 +1493,7 @@ void fill_in_dense(std::shared_ptr exec, kernel::fill_in_dense<<get_stream()>>>( num_rows, as_device_type(row_ptrs), as_device_type(col_idxs), - as_device_type(vals), stride, as_device_type(result->get_values())); + as_device_type(vals), stride, as_device_type(result.values)); } } @@ -1746,7 +1746,7 @@ template void calculate_nonzeros_per_row_in_span( std::shared_ptr exec, const matrix::Csr* source, const span& row_span, - const span& col_span, array* row_nnz) + const span& col_span, array& row_nnz) { const auto num_rows = source->get_size()[0]; auto row_ptrs = source->get_const_row_ptrs(); @@ -1756,7 +1756,7 @@ void calculate_nonzeros_per_row_in_span( kernel::calculate_nnz_per_row_in_span<<get_stream()>>>( row_span, col_span, as_device_type(row_ptrs), - as_device_type(col_idxs), as_device_type(row_nnz->get_data())); + as_device_type(col_idxs), as_device_type(row_nnz.get_data())); } } @@ -1859,11 +1859,12 @@ void fallback_sort(std::shared_ptr exec, template void is_sorted_by_column_index( std::shared_ptr exec, - const matrix::Csr* to_check, bool* is_sorted) + const matrix::Csr* to_check, bool& is_sorted) { - *is_sorted = true; - auto cpu_array = make_array_view(exec->get_master(), 1, is_sorted); - auto gpu_array = array{exec, cpu_array}; + is_sorted = true; + auto gpu_array = array{exec, 1}; + // need to initialize the GPU value to true + exec->copy_from(exec->get_master(), 1, &is_sorted, gpu_array.get_data()); auto block_size = default_block_size; auto num_rows = static_cast(to_check->get_size()[0]); auto num_blocks = ceildiv(num_rows, block_size); @@ -1873,7 +1874,7 @@ void is_sorted_by_column_index( to_check->get_const_row_ptrs(), to_check->get_const_col_idxs(), num_rows, gpu_array.get_data()); } - cpu_array = gpu_array; + is_sorted = get_element(gpu_array, 0); } @@ -1925,8 +1926,8 @@ void check_diagonal_entries_exist(std::shared_ptr exec, template void add_scaled_identity(std::shared_ptr exec, - const matrix::Dense* alpha, - const matrix::Dense* beta, + matrix::view::dense alpha, + matrix::view::dense beta, matrix::Csr* mtx) { const auto nrows = mtx->get_size()[0]; @@ -1937,10 +1938,9 @@ void add_scaled_identity(std::shared_ptr exec, const auto nblocks = ceildiv(nthreads, default_block_size); kernel::add_scaled_identity<<get_stream()>>>( - as_device_type(alpha->get_const_values()), - as_device_type(beta->get_const_values()), static_cast(nrows), - mtx->get_const_row_ptrs(), mtx->get_const_col_idxs(), - as_device_type(mtx->get_values())); + as_device_type(alpha.values), as_device_type(beta.values), + static_cast(nrows), mtx->get_const_row_ptrs(), + mtx->get_const_col_idxs(), as_device_type(mtx->get_values())); } @@ -1950,13 +1950,18 @@ namespace { template -void merge_path_spmv(syn::value_list, - std::shared_ptr exec, - const matrix::Csr* a, - const matrix::Dense* b, - matrix::Dense* c, - const matrix::Dense* alpha = nullptr, - const matrix::Dense* beta = nullptr) +void merge_path_spmv( + syn::value_list, + std::shared_ptr exec, + const matrix::Csr* a, + matrix::view::dense b, + matrix::view::dense c, + xstd::type_identity_t< + std::optional>> + alpha = {}, + xstd::type_identity_t< + std::optional>> + beta = {}) { using arithmetic_type = highest_precision; @@ -1973,7 +1978,7 @@ void merge_path_spmv(syn::value_list, const auto a_vals = acc::helper::build_const_rrm_accessor(a); - for (IndexType column_id = 0; column_id < b->get_size()[1]; column_id++) { + for (IndexType column_id = 0; column_id < b.size[1]; column_id++) { const auto column_span = acc::index_span(static_cast(column_id), static_cast(column_id + 1)); @@ -1982,7 +1987,7 @@ void merge_path_spmv(syn::value_list, column_span); auto c_vals = acc::helper::build_rrm_accessor(c, column_span); - if (alpha == nullptr && beta == nullptr) { + if (!alpha && !beta) { if (grid_num > 0) { kernel::abstract_merge_path_spmv <<get_stream()>>>( @@ -2001,17 +2006,17 @@ void merge_path_spmv(syn::value_list, as_device_type(row_out.get_data()), acc::as_device_range(c_vals)); - } else if (alpha != nullptr && beta != nullptr) { + } else if (alpha && beta) { if (grid_num > 0) { kernel::abstract_merge_path_spmv <<get_stream()>>>( static_cast(a->get_size()[0]), - as_device_type(alpha->get_const_values()), + as_device_type(alpha->values), acc::as_device_range(a_vals), a->get_const_col_idxs(), as_device_type(a->get_const_row_ptrs()), as_device_type(a->get_const_srow()), acc::as_device_range(b_vals), - as_device_type(beta->get_const_values()), + as_device_type(beta->values), acc::as_device_range(c_vals), as_device_type(row_out.get_data()), as_device_type(val_out.get_data())); @@ -2020,7 +2025,7 @@ void merge_path_spmv(syn::value_list, abstract_reduce<<<1, spmv_block_size, 0, exec->get_stream()>>>( grid_num, as_device_type(val_out.get_data()), as_device_type(row_out.get_data()), - as_device_type(alpha->get_const_values()), + as_device_type(alpha->values), acc::as_device_range(c_vals)); } else { GKO_KERNEL_NOT_FOUND; @@ -2088,13 +2093,18 @@ int compute_items_per_thread(std::shared_ptr exec) template -void classical_spmv(syn::value_list, - std::shared_ptr exec, - const matrix::Csr* a, - const matrix::Dense* b, - matrix::Dense* c, - const matrix::Dense* alpha = nullptr, - const matrix::Dense* beta = nullptr) +void classical_spmv( + syn::value_list, + std::shared_ptr exec, + const matrix::Csr* a, + matrix::view::dense b, + matrix::view::dense c, + xstd::type_identity_t< + std::optional>> + alpha = {}, + xstd::type_identity_t< + std::optional>> + beta = {}) { using arithmetic_type = highest_precision; @@ -2105,7 +2115,7 @@ void classical_spmv(syn::value_list, const auto gridx = std::min(ceildiv(a->get_size()[0], spmv_block_size / subwarp_size), int64(nwarps / warps_in_block)); - const dim3 grid(gridx, b->get_size()[1]); + const dim3 grid(gridx, b.size[1]); const auto block = spmv_block_size; const auto a_vals = @@ -2113,7 +2123,7 @@ void classical_spmv(syn::value_list, const auto b_vals = acc::helper::build_const_rrm_accessor(b); auto c_vals = acc::helper::build_rrm_accessor(c); - if (alpha == nullptr && beta == nullptr) { + if (!alpha && !beta) { if (grid.x > 0 && grid.y > 0) { kernel::abstract_classical_spmv <<get_stream()>>>( @@ -2122,15 +2132,14 @@ void classical_spmv(syn::value_list, as_device_type(a->get_const_row_ptrs()), acc::as_device_range(b_vals), acc::as_device_range(c_vals)); } - } else if (alpha != nullptr && beta != nullptr) { + } else if (alpha && beta) { if (grid.x > 0 && grid.y > 0) { kernel::abstract_classical_spmv <<get_stream()>>>( - a->get_size()[0], as_device_type(alpha->get_const_values()), + a->get_size()[0], as_device_type(alpha->values), acc::as_device_range(a_vals), a->get_const_col_idxs(), as_device_type(a->get_const_row_ptrs()), - acc::as_device_range(b_vals), - as_device_type(beta->get_const_values()), + acc::as_device_range(b_vals), as_device_type(beta->values), acc::as_device_range(c_vals)); } } else { @@ -2143,12 +2152,17 @@ GKO_ENABLE_IMPLEMENTATION_SELECTION(select_classical_spmv, classical_spmv); template -bool load_balance_spmv(std::shared_ptr exec, - const matrix::Csr* a, - const matrix::Dense* b, - matrix::Dense* c, - const matrix::Dense* alpha = nullptr, - const matrix::Dense* beta = nullptr) +bool load_balance_spmv( + std::shared_ptr exec, + const matrix::Csr* a, + matrix::view::dense b, + matrix::view::dense c, + xstd::type_identity_t< + std::optional>> + alpha = {}, + xstd::type_identity_t< + std::optional>> + beta = {}) { using arithmetic_type = highest_precision; @@ -2170,15 +2184,14 @@ bool load_balance_spmv(std::shared_ptr exec, #endif { if (beta) { - dense::scale(exec, beta, c); + dense::scale(exec, *beta, c); } else { dense::fill(exec, c, zero()); } const IndexType nwarps = a->get_num_srow_elements(); if (nwarps > 0) { const dim3 csr_block(config::warp_size, warps_in_block, 1); - const dim3 csr_grid(ceildiv(nwarps, warps_in_block), - b->get_size()[1]); + const dim3 csr_grid(ceildiv(nwarps, warps_in_block), b.size[1]); const auto a_vals = acc::helper::build_const_rrm_accessor(a); const auto b_vals = @@ -2189,7 +2202,7 @@ bool load_balance_spmv(std::shared_ptr exec, kernel::abstract_spmv<<get_stream()>>>( nwarps, static_cast(a->get_size()[0]), - as_device_type(alpha->get_const_values()), + as_device_type(alpha->values), acc::as_device_range(a_vals), a->get_const_col_idxs(), as_device_type(a->get_const_row_ptrs()), as_device_type(a->get_const_srow()), @@ -2218,14 +2231,13 @@ template bool try_general_sparselib_spmv(std::shared_ptr exec, const ValueType* alpha, const matrix::Csr* a, - const matrix::Dense* b, + matrix::view::dense b, const ValueType* beta, - matrix::Dense* c) + matrix::view::dense c) { #ifdef GKO_COMPILING_HIP bool try_sparselib = sparselib::is_supported::value; - try_sparselib = - try_sparselib && b->get_stride() == 1 && c->get_stride() == 1; + try_sparselib = try_sparselib && b.stride == 1 && c.stride == 1; // rocSPARSE has issues with zero matrices try_sparselib = try_sparselib && a->get_num_stored_elements() > 0; if (try_sparselib) { @@ -2238,7 +2250,7 @@ bool try_general_sparselib_spmv(std::shared_ptr exec, SPARSELIB_OPERATION_NON_TRANSPOSE, a->get_size()[0], a->get_size()[1], a->get_num_stored_elements(), alpha, descr, a->get_const_values(), row_ptrs, col_idxs, - b->get_const_values(), beta, c->get_values()); + b.values, beta, c.values); sparselib::destroy(descr); } @@ -2256,11 +2268,11 @@ bool try_general_sparselib_spmv(std::shared_ptr exec, auto mat = sparselib::create_csr(a->get_size()[0], a->get_size()[1], a->get_num_stored_elements(), row_ptrs, col_idxs, values); - auto b_val = const_cast(b->get_const_values()); - auto c_val = c->get_values(); - if (b->get_stride() == 1 && c->get_stride() == 1) { - auto vecb = sparselib::create_dnvec(b->get_size()[0], b_val); - auto vecc = sparselib::create_dnvec(c->get_size()[0], c_val); + auto b_val = const_cast(b.values); + auto c_val = c.values; + if (b.stride == 1 && c.stride == 1) { + auto vecb = sparselib::create_dnvec(b.size[0], b_val); + auto vecc = sparselib::create_dnvec(c.size[0], c_val); #if CUDA_VERSION >= 11021 constexpr auto alg = CUSPARSE_SPMV_CSR_ALG1; #else @@ -2278,17 +2290,15 @@ bool try_general_sparselib_spmv(std::shared_ptr exec, sparselib::destroy(vecc); } else { #if CUDA_VERSION >= 11060 - if (b->get_size()[1] == 1) { + if (b.size[1] == 1) { // cusparseSpMM seems to take the single strided vector as column // major without considering stride and row major (cuda 11.6) return false; } #endif // CUDA_VERSION >= 11060 cusparseSpMMAlg_t alg = CUSPARSE_SPMM_CSR_ALG2; - auto vecb = - sparselib::create_dnmat(b->get_size(), b->get_stride(), b_val); - auto vecc = - sparselib::create_dnmat(c->get_size(), c->get_stride(), c_val); + auto vecb = sparselib::create_dnmat(b.size, b.stride, b_val); + auto vecc = sparselib::create_dnmat(c.size, c.stride, c_val); size_type buffer_size = 0; sparselib::spmm_buffersize(handle, trans, trans, alpha, mat, vecb, beta, vecc, alg, @@ -2312,28 +2322,35 @@ template ::value || !std::is_same::value>> -bool try_sparselib_spmv(std::shared_ptr exec, - const matrix::Csr* a, - const matrix::Dense* b, - matrix::Dense* c, - const matrix::Dense* alpha = nullptr, - const matrix::Dense* beta = nullptr) +bool try_sparselib_spmv( + std::shared_ptr exec, + const matrix::Csr* a, + matrix::view::dense b, + matrix::view::dense c, + xstd::type_identity_t< + std::optional>> + alpha = {}, + xstd::type_identity_t< + std::optional>> + beta = {}) { // TODO: support sparselib mixed return false; } template -bool try_sparselib_spmv(std::shared_ptr exec, - const matrix::Csr* a, - const matrix::Dense* b, - matrix::Dense* c, - const matrix::Dense* alpha = nullptr, - const matrix::Dense* beta = nullptr) +bool try_sparselib_spmv( + std::shared_ptr exec, + const matrix::Csr* a, + matrix::view::dense b, matrix::view::dense c, + xstd::type_identity_t>> + alpha = {}, + xstd::type_identity_t>> + beta = {}) { if (alpha) { - return try_general_sparselib_spmv(exec, alpha->get_const_values(), a, b, - beta->get_const_values(), c); + return try_general_sparselib_spmv(exec, alpha->values, a, b, + beta->values, c); } else { auto handle = exec->get_sparselib_handle(); sparselib::pointer_mode_guard pm_guard(handle); @@ -2352,10 +2369,10 @@ template void spmv(std::shared_ptr exec, const matrix::Csr* a, - const matrix::Dense* b, - matrix::Dense* c) + matrix::view::dense b, + matrix::view::dense c) { - if (c->get_size()[0] == 0 || c->get_size()[1] == 0) { + if (c.size[0] == 0 || c.size[1] == 0) { // empty output: nothing to do } else if (a->get_strategy()->get_name() == "merge_path") { using arithmetic_type = @@ -2408,13 +2425,13 @@ void spmv(std::shared_ptr exec, template void advanced_spmv(std::shared_ptr exec, - const matrix::Dense* alpha, + matrix::view::dense alpha, const matrix::Csr* a, - const matrix::Dense* b, - const matrix::Dense* beta, - matrix::Dense* c) + matrix::view::dense b, + matrix::view::dense beta, + matrix::view::dense c) { - if (c->get_size()[0] == 0 || c->get_size()[1] == 0) { + if (c.size[0] == 0 || c.size[1] == 0) { // empty output: nothing to do } else if (a->get_strategy()->get_name() == "merge_path") { using arithmetic_type = @@ -2617,10 +2634,10 @@ void spgemm(std::shared_ptr exec, template void advanced_spgemm(std::shared_ptr exec, - const matrix::Dense* alpha, + matrix::view::dense alpha, const matrix::Csr* a, const matrix::Csr* b, - const matrix::Dense* beta, + matrix::view::dense beta, const matrix::Csr* d, matrix::Csr* c) { @@ -2697,10 +2714,9 @@ void advanced_spgemm(std::shared_ptr exec, return compiled_subwarp_size >= nnz_per_row || compiled_subwarp_size == config::warp_size; }, - syn::value_list(), syn::type_list<>(), exec, - alpha->get_const_values(), c_tmp_row_ptrs, c_tmp_col_idxs, - c_tmp_vals, beta->get_const_values(), d_row_ptrs, d_col_idxs, - d_vals, c); + syn::value_list(), syn::type_list<>(), exec, alpha.values, + c_tmp_row_ptrs, c_tmp_col_idxs, c_tmp_vals, beta.values, d_row_ptrs, + d_col_idxs, d_vals, c); } else { GKO_NOT_IMPLEMENTED; } @@ -2708,7 +2724,7 @@ void advanced_spgemm(std::shared_ptr exec, auto handle = exec->get_sparselib_handle(); sparselib::pointer_mode_guard pm_guard(handle); - auto valpha = exec->copy_val_to_host(alpha->get_const_values()); + auto valpha = exec->copy_val_to_host(alpha.values); auto a_nnz = IndexType(a->get_num_stored_elements()); auto a_vals = a->get_const_values(); auto a_row_ptrs = a->get_const_row_ptrs(); @@ -2717,7 +2733,7 @@ void advanced_spgemm(std::shared_ptr exec, auto b_vals = b->get_const_values(); auto b_row_ptrs = b->get_const_row_ptrs(); auto b_col_idxs = b->get_const_col_idxs(); - auto vbeta = exec->copy_val_to_host(beta->get_const_values()); + auto vbeta = exec->copy_val_to_host(beta.values); auto d_nnz = IndexType(d->get_num_stored_elements()); auto d_vals = d->get_const_values(); auto d_row_ptrs = d->get_const_row_ptrs(); @@ -2788,11 +2804,11 @@ void advanced_spgemm(std::shared_ptr exec, return compiled_subwarp_size >= nnz_per_row || compiled_subwarp_size == config::warp_size; }, - syn::value_list(), syn::type_list<>(), exec, - alpha->get_const_values(), c_tmp_row_ptrs_array.get_const_data(), + syn::value_list(), syn::type_list<>(), exec, alpha.values, + c_tmp_row_ptrs_array.get_const_data(), c_tmp_col_idxs_array.get_const_data(), - c_tmp_vals_array.get_const_data(), beta->get_const_values(), d_row_ptrs, - d_col_idxs, d_vals, c); + c_tmp_vals_array.get_const_data(), beta.values, d_row_ptrs, d_col_idxs, + d_vals, c); #endif // GKO_COMPILING_CUDA } @@ -2952,10 +2968,10 @@ void spgemm_reuse(std::shared_ptr exec, template void advanced_spgemm_reuse(std::shared_ptr exec, - const matrix::Dense* alpha, + matrix::view::dense alpha, const matrix::Csr* a, const matrix::Csr* b, - const matrix::Dense* beta, + matrix::view::dense beta, const matrix::Csr* d, const matrix::csr::lookup_data& c_lookup, matrix::Csr* c) @@ -2973,8 +2989,8 @@ void advanced_spgemm_reuse(std::shared_ptr exec, const auto b_vals = as_device_type(b->get_const_values()); const auto c_vals = as_device_type(c->get_values()); const auto d_vals = as_device_type(d->get_const_values()); - const auto palpha = as_device_type(alpha->get_const_values()); - const auto pbeta = as_device_type(beta->get_const_values()); + const auto palpha = as_device_type(alpha.values); + const auto pbeta = as_device_type(beta.values); const auto lookup_storage_offsets = c_lookup.storage_offsets.get_const_data(); const auto lookup_storage = c_lookup.storage.get_const_data(); diff --git a/common/cuda_hip/matrix/dense_kernels.cpp b/common/cuda_hip/matrix/dense_kernels.cpp index 5e31215faaa..4508f10f07b 100644 --- a/common/cuda_hip/matrix/dense_kernels.cpp +++ b/common/cuda_hip/matrix/dense_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -439,7 +439,7 @@ __global__ __launch_bounds__(default_block_size) void fill_in_sellp( template void convert_to_coo(std::shared_ptr exec, - const matrix::Dense* source, + matrix::view::dense source, const int64* row_ptrs, matrix::Coo* result) { @@ -450,16 +450,15 @@ void convert_to_coo(std::shared_ptr exec, auto col_idxs = result->get_col_idxs(); auto values = result->get_values(); - auto stride = source->get_stride(); + auto stride = source.stride; const auto grid_dim = ceildiv(num_rows, default_block_size / config::warp_size); if (grid_dim > 0) { kernel::fill_in_coo<<get_stream()>>>( - num_rows, num_cols, stride, - as_device_type(source->get_const_values()), row_ptrs, row_idxs, - col_idxs, as_device_type(values)); + num_rows, num_cols, stride, as_device_type(source.values), row_ptrs, + row_idxs, col_idxs, as_device_type(values)); } } @@ -469,7 +468,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_csr(std::shared_ptr exec, - const matrix::Dense* source, + matrix::view::dense source, matrix::Csr* result) { auto num_rows = result->get_size()[0]; @@ -479,15 +478,14 @@ void convert_to_csr(std::shared_ptr exec, auto col_idxs = result->get_col_idxs(); auto values = result->get_values(); - auto stride = source->get_stride(); + auto stride = source.stride; const auto grid_dim = ceildiv(num_rows, default_block_size / config::warp_size); if (grid_dim > 0) { kernel::fill_in_csr<<get_stream()>>>( - num_rows, num_cols, stride, - as_device_type(source->get_const_values()), + num_rows, num_cols, stride, as_device_type(source.values), as_device_type(row_ptrs), as_device_type(col_idxs), as_device_type(values)); } @@ -499,7 +497,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_ell(std::shared_ptr exec, - const matrix::Dense* source, + matrix::view::dense source, matrix::Ell* result) { auto num_rows = result->get_size()[0]; @@ -509,7 +507,7 @@ void convert_to_ell(std::shared_ptr exec, auto col_idxs = result->get_col_idxs(); auto values = result->get_values(); - auto source_stride = source->get_stride(); + auto source_stride = source.stride; auto result_stride = result->get_stride(); const auto grid_dim = @@ -517,9 +515,8 @@ void convert_to_ell(std::shared_ptr exec, if (grid_dim > 0) { kernel::fill_in_ell<<get_stream()>>>( - num_rows, num_cols, source_stride, - as_device_type(source->get_const_values()), max_nnz_per_row, - result_stride, col_idxs, as_device_type(values)); + num_rows, num_cols, source_stride, as_device_type(source.values), + max_nnz_per_row, result_stride, col_idxs, as_device_type(values)); } } @@ -529,7 +526,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_fbcsr(std::shared_ptr exec, - const matrix::Dense* source, + matrix::view::dense source, matrix::Fbcsr* result) { const auto num_block_rows = result->get_num_block_rows(); @@ -538,9 +535,8 @@ void convert_to_fbcsr(std::shared_ptr exec, ceildiv(num_block_rows, default_block_size / config::warp_size); kernel::convert_to_fbcsr<<get_stream()>>>( - num_block_rows, result->get_num_block_cols(), source->get_stride(), - result->get_block_size(), - as_device_type(source->get_const_values()), + num_block_rows, result->get_num_block_cols(), source.stride, + result->get_block_size(), as_device_type(source.values), result->get_const_row_ptrs(), result->get_col_idxs(), as_device_type(result->get_values())); } @@ -552,18 +548,18 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void count_nonzero_blocks_per_row(std::shared_ptr exec, - const matrix::Dense* source, + matrix::view::dense source, int bs, IndexType* result) { - const auto num_block_rows = source->get_size()[0] / bs; - const auto num_block_cols = source->get_size()[1] / bs; + const auto num_block_rows = source.size[0] / bs; + const auto num_block_cols = source.size[1] / bs; if (num_block_rows > 0) { const auto num_blocks = ceildiv(num_block_rows, default_block_size / config::warp_size); kernel::count_nonzero_blocks_per_row<<get_stream()>>>( - num_block_rows, num_block_cols, source->get_stride(), bs, - as_device_type(source->get_const_values()), result); + num_block_rows, num_block_cols, source.stride, bs, + as_device_type(source.values), result); } } @@ -573,7 +569,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_hybrid(std::shared_ptr exec, - const matrix::Dense* source, + matrix::view::dense source, const int64* coo_row_ptrs, matrix::Hybrid* result) { @@ -581,7 +577,7 @@ void convert_to_hybrid(std::shared_ptr exec, const auto num_cols = result->get_size()[1]; const auto ell_max_nnz_per_row = result->get_ell_num_stored_elements_per_row(); - const auto source_stride = source->get_stride(); + const auto source_stride = source.stride; const auto ell_stride = result->get_ell_stride(); auto ell_col_idxs = result->get_ell_col_idxs(); auto ell_values = result->get_ell_values(); @@ -593,10 +589,10 @@ void convert_to_hybrid(std::shared_ptr exec, if (grid_dim > 0) { kernel::fill_in_hybrid<<get_stream()>>>( - num_rows, num_cols, source_stride, - as_device_type(source->get_const_values()), ell_max_nnz_per_row, - ell_stride, ell_col_idxs, as_device_type(ell_values), coo_row_ptrs, - coo_row_idxs, coo_col_idxs, as_device_type(coo_values)); + num_rows, num_cols, source_stride, as_device_type(source.values), + ell_max_nnz_per_row, ell_stride, ell_col_idxs, + as_device_type(ell_values), coo_row_ptrs, coo_row_idxs, + coo_col_idxs, as_device_type(coo_values)); } } @@ -606,10 +602,10 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_sellp(std::shared_ptr exec, - const matrix::Dense* source, + matrix::view::dense source, matrix::Sellp* result) { - const auto stride = source->get_stride(); + const auto stride = source.stride; const auto num_rows = result->get_size()[0]; const auto num_cols = result->get_size()[1]; @@ -625,9 +621,8 @@ void convert_to_sellp(std::shared_ptr exec, kernel::fill_in_sellp<<get_stream()>>>( num_rows, num_cols, slice_size, stride, - as_device_type(source->get_const_values()), - as_device_type(slice_sets), as_device_type(col_idxs), - as_device_type(vals)); + as_device_type(source.values), as_device_type(slice_sets), + as_device_type(col_idxs), as_device_type(vals)); } } @@ -637,7 +632,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_sparsity_csr(std::shared_ptr exec, - const matrix::Dense* source, + matrix::view::dense source, matrix::SparsityCsr* result) { auto num_rows = result->get_size()[0]; @@ -646,15 +641,14 @@ void convert_to_sparsity_csr(std::shared_ptr exec, auto row_ptrs = result->get_row_ptrs(); auto col_idxs = result->get_col_idxs(); - auto stride = source->get_stride(); + auto stride = source.stride; const auto grid_dim = ceildiv(num_rows, default_block_size / config::warp_size); if (grid_dim > 0) { kernel::fill_in_sparsity_csr<<get_stream()>>>( - num_rows, num_cols, stride, - as_device_type(source->get_const_values()), + num_rows, num_cols, stride, as_device_type(source.values), as_device_type(row_ptrs), as_device_type(col_idxs)); } } @@ -665,16 +659,16 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void compute_dot_dispatch(std::shared_ptr exec, - const matrix::Dense* x, - const matrix::Dense* y, - matrix::Dense* result, array& tmp) + matrix::view::dense x, + matrix::view::dense y, + matrix::view::dense result, + array& tmp) { - if (x->get_size()[1] == 1 && y->get_size()[1] == 1) { + if (x.size[1] == 1 && y.size[1] == 1) { if (blas::is_supported::value) { auto handle = exec->get_blas_handle(); - blas::dot(handle, x->get_size()[0], x->get_const_values(), - x->get_stride(), y->get_const_values(), y->get_stride(), - result->get_values()); + blas::dot(handle, x.size[0], x.values, x.stride, y.values, y.stride, + result.values); } else { compute_dot(exec, x, y, result, tmp); } @@ -689,17 +683,16 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( template void compute_conj_dot_dispatch(std::shared_ptr exec, - const matrix::Dense* x, - const matrix::Dense* y, - matrix::Dense* result, + matrix::view::dense x, + matrix::view::dense y, + matrix::view::dense result, array& tmp) { - if (x->get_size()[1] == 1 && y->get_size()[1] == 1) { + if (x.size[1] == 1 && y.size[1] == 1) { if (blas::is_supported::value) { auto handle = exec->get_blas_handle(); - blas::conj_dot(handle, x->get_size()[0], x->get_const_values(), - x->get_stride(), y->get_const_values(), - y->get_stride(), result->get_values()); + blas::conj_dot(handle, x.size[0], x.values, x.stride, y.values, + y.stride, result.values); } else { compute_conj_dot(exec, x, y, result, tmp); } @@ -713,16 +706,15 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( template -void compute_norm2_dispatch(std::shared_ptr exec, - const matrix::Dense* x, - matrix::Dense>* result, - array& tmp) +void compute_norm2_dispatch( + std::shared_ptr exec, + matrix::view::dense x, + matrix::view::dense> result, array& tmp) { - if (x->get_size()[1] == 1) { + if (x.size[1] == 1) { if (blas::is_supported::value) { auto handle = exec->get_blas_handle(); - blas::norm2(handle, x->get_size()[0], x->get_const_values(), - x->get_stride(), result->get_values()); + blas::norm2(handle, x.size[0], x.values, x.stride, result.values); } else { compute_norm2(exec, x, result, tmp); } @@ -737,22 +729,20 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( template void simple_apply(std::shared_ptr exec, - const matrix::Dense* a, - const matrix::Dense* b, - matrix::Dense* c) + matrix::view::dense a, + matrix::view::dense b, + matrix::view::dense c) { if (blas::is_supported::value) { auto handle = exec->get_blas_handle(); - if (c->get_size()[0] > 0 && c->get_size()[1] > 0) { - if (a->get_size()[1] > 0) { + if (c.size[0] > 0 && c.size[1] > 0) { + if (a.size[1] > 0) { blas::pointer_mode_guard pm_guard(handle); auto alpha = one(); auto beta = zero(); - blas::gemm(handle, BLAS_OP_N, BLAS_OP_N, c->get_size()[1], - c->get_size()[0], a->get_size()[1], &alpha, - b->get_const_values(), b->get_stride(), - a->get_const_values(), a->get_stride(), &beta, - c->get_values(), c->get_stride()); + blas::gemm(handle, BLAS_OP_N, BLAS_OP_N, c.size[1], c.size[0], + a.size[1], &alpha, b.values, b.stride, a.values, + a.stride, &beta, c.values, c.stride); } else { dense::fill(exec, c, zero()); } @@ -767,19 +757,19 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_SIMPLE_APPLY_KERNEL); template void apply(std::shared_ptr exec, - const matrix::Dense* alpha, - const matrix::Dense* a, const matrix::Dense* b, - const matrix::Dense* beta, matrix::Dense* c) + matrix::view::dense alpha, + matrix::view::dense a, + matrix::view::dense b, + matrix::view::dense beta, + matrix::view::dense c) { if (blas::is_supported::value) { - if (c->get_size()[0] > 0 && c->get_size()[1] > 0) { - if (a->get_size()[1] > 0) { + if (c.size[0] > 0 && c.size[1] > 0) { + if (a.size[1] > 0) { blas::gemm(exec->get_blas_handle(), BLAS_OP_N, BLAS_OP_N, - c->get_size()[1], c->get_size()[0], a->get_size()[1], - alpha->get_const_values(), b->get_const_values(), - b->get_stride(), a->get_const_values(), - a->get_stride(), beta->get_const_values(), - c->get_values(), c->get_stride()); + c.size[1], c.size[0], a.size[1], alpha.values, + b.values, b.stride, a.values, a.stride, beta.values, + c.values, c.stride); } else { dense::scale(exec, beta, c); } @@ -794,20 +784,18 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_APPLY_KERNEL); template void transpose(std::shared_ptr exec, - const matrix::Dense* orig, - matrix::Dense* trans) + matrix::view::dense orig, + matrix::view::dense trans) { if (blas::is_supported::value) { auto handle = exec->get_blas_handle(); - if (orig->get_size()[0] > 0 && orig->get_size()[1] > 0) { + if (orig.size[0] > 0 && orig.size[1] > 0) { blas::pointer_mode_guard pm_guard(handle); auto alpha = one(); auto beta = zero(); - blas::geam(handle, BLAS_OP_T, BLAS_OP_N, orig->get_size()[0], - orig->get_size()[1], &alpha, orig->get_const_values(), - orig->get_stride(), &beta, trans->get_const_values(), - trans->get_stride(), trans->get_values(), - trans->get_stride()); + blas::geam(handle, BLAS_OP_T, BLAS_OP_N, orig.size[0], orig.size[1], + &alpha, orig.values, orig.stride, &beta, trans.values, + trans.stride, trans.values, trans.stride); } } else { GKO_NOT_IMPLEMENTED; @@ -819,20 +807,18 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_TRANSPOSE_KERNEL); template void conj_transpose(std::shared_ptr exec, - const matrix::Dense* orig, - matrix::Dense* trans) + matrix::view::dense orig, + matrix::view::dense trans) { if (blas::is_supported::value) { auto handle = exec->get_blas_handle(); - if (orig->get_size()[0] > 0 && orig->get_size()[1] > 0) { + if (orig.size[0] > 0 && orig.size[1] > 0) { blas::pointer_mode_guard pm_guard(handle); auto alpha = one(); auto beta = zero(); - blas::geam(handle, BLAS_OP_C, BLAS_OP_N, orig->get_size()[0], - orig->get_size()[1], &alpha, orig->get_const_values(), - orig->get_stride(), &beta, trans->get_const_values(), - trans->get_stride(), trans->get_values(), - trans->get_stride()); + blas::geam(handle, BLAS_OP_C, BLAS_OP_N, orig.size[0], orig.size[1], + &alpha, orig.values, orig.stride, &beta, trans.values, + trans.stride, trans.values, trans.stride); } } else { GKO_NOT_IMPLEMENTED; diff --git a/common/cuda_hip/matrix/ell_kernels.cpp b/common/cuda_hip/matrix/ell_kernels.cpp index c448f0783a3..4bc6ed41515 100644 --- a/common/cuda_hip/matrix/ell_kernels.cpp +++ b/common/cuda_hip/matrix/ell_kernels.cpp @@ -1,13 +1,15 @@ -// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause #include "core/matrix/ell_kernels.hpp" #include +#include #include #include +#include #include #include #include @@ -222,14 +224,17 @@ namespace { template -void abstract_spmv(syn::value_list, - std::shared_ptr exec, - int num_worker_per_row, - const matrix::Ell* a, - const matrix::Dense* b, - matrix::Dense* c, - const matrix::Dense* alpha = nullptr, - const matrix::Dense* beta = nullptr) +void abstract_spmv( + syn::value_list, std::shared_ptr exec, + int num_worker_per_row, const matrix::Ell* a, + matrix::view::dense b, + matrix::view::dense c, + xstd::type_identity_t< + std::optional>> + alpha = {}, + xstd::type_identity_t< + std::optional>> + beta = {}) { using arithmetic_type = highest_precision; @@ -249,7 +254,7 @@ void abstract_spmv(syn::value_list, const dim3 block_size(default_block_size / num_thread_per_worker, num_thread_per_worker, 1); const dim3 grid_size(ceildiv(nrows * num_worker_per_row, block_size.x), - b->get_size()[1], 1); + b.size[1], 1); // not support 16 bit atomic #if !defined(CUDA_VERSION) @@ -290,25 +295,25 @@ void abstract_spmv(syn::value_list, a->get_const_values()); const auto b_vals = acc::range( std::array{ - {static_cast(b->get_size()[0]), - static_cast(b->get_size()[1])}}, - b->get_const_values(), + {static_cast(b.size[0]), + static_cast(b.size[1])}}, + b.values, std::array{ - {static_cast(b->get_stride())}}); + {static_cast(b.stride)}}); - if (alpha == nullptr && beta == nullptr) { + if (!alpha && !beta) { if (grid_size.x > 0 && grid_size.y > 0) { kernel::spmv <<get_stream()>>>( nrows, num_worker_per_row, acc::as_device_range(a_vals), a->get_const_col_idxs(), stride, num_stored_elements_per_row, - acc::as_device_range(b_vals), - as_device_type(c->get_values()), c->get_stride()); + acc::as_device_range(b_vals), as_device_type(c.values), + c.stride); } - } else if (alpha != nullptr && beta != nullptr) { + } else if (alpha && beta) { const auto alpha_val = acc::range( - std::array{1}, alpha->get_const_values()); + std::array{1}, alpha->values); if (grid_size.x > 0 && grid_size.y > 0) { kernel::spmv <<get_stream()>>>( @@ -317,8 +322,8 @@ void abstract_spmv(syn::value_list, acc::as_device_range(a_vals), a->get_const_col_idxs(), stride, num_stored_elements_per_row, acc::as_device_range(b_vals), - as_device_type(beta->get_const_values()), - as_device_type(c->get_values()), c->get_stride()); + as_device_type(beta->values), as_device_type(c.values), + c.stride); } } else { GKO_KERNEL_NOT_FOUND; @@ -377,8 +382,8 @@ template void spmv(std::shared_ptr exec, const matrix::Ell* a, - const matrix::Dense* b, - matrix::Dense* c) + matrix::view::dense b, + matrix::view::dense c) { const auto data = compute_thread_worker_and_atomicity(exec, a); const int num_thread_per_worker = std::get<0>(data); @@ -408,11 +413,11 @@ GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_AND_INDEX_TYPE( template void advanced_spmv(std::shared_ptr exec, - const matrix::Dense* alpha, + matrix::view::dense alpha, const matrix::Ell* a, - const matrix::Dense* b, - const matrix::Dense* beta, - matrix::Dense* c) + matrix::view::dense b, + matrix::view::dense beta, + matrix::view::dense c) { const auto data = compute_thread_worker_and_atomicity(exec, a); const int num_thread_per_worker = std::get<0>(data); diff --git a/common/cuda_hip/matrix/fbcsr_kernels.template.cpp b/common/cuda_hip/matrix/fbcsr_kernels.template.cpp index e10cf10b540..e9b1821484e 100644 --- a/common/cuda_hip/matrix/fbcsr_kernels.template.cpp +++ b/common/cuda_hip/matrix/fbcsr_kernels.template.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -335,7 +335,7 @@ void fallback_transpose(const std::shared_ptr exec, template void fill_in_dense(std::shared_ptr exec, const matrix::Fbcsr* source, - matrix::Dense* result) + matrix::view::dense result) { constexpr auto warps_per_block = default_block_size / config::warp_size; const auto num_blocks = @@ -345,7 +345,7 @@ void fill_in_dense(std::shared_ptr exec, exec->get_stream()>>>( source->get_const_row_ptrs(), source->get_const_col_idxs(), as_device_type(source->get_const_values()), - as_device_type(result->get_values()), result->get_stride(), + as_device_type(result.values), result.stride, source->get_num_block_rows(), source->get_block_size()); } } @@ -373,12 +373,12 @@ void convert_to_csr(const std::shared_ptr exec, template void is_sorted_by_column_index( std::shared_ptr exec, - const matrix::Fbcsr* to_check, bool* is_sorted) + const matrix::Fbcsr* to_check, bool& is_sorted) { - *is_sorted = true; + is_sorted = true; auto gpu_array = array(exec, 1); // need to initialize the GPU value to true - exec->copy_from(exec->get_master(), 1, is_sorted, gpu_array.get_data()); + exec->copy_from(exec->get_master(), 1, &is_sorted, gpu_array.get_data()); auto block_size = default_block_size; const auto num_brows = static_cast(to_check->get_num_block_rows()); @@ -389,7 +389,7 @@ void is_sorted_by_column_index( to_check->get_const_row_ptrs(), to_check->get_const_col_idxs(), num_brows, gpu_array.get_data()); } - *is_sorted = get_element(gpu_array, 0); + is_sorted = get_element(gpu_array, 0); } @@ -439,13 +439,14 @@ void dense_transpose(std::shared_ptr exec, template void spmv(std::shared_ptr exec, const matrix::Fbcsr* a, - const matrix::Dense* b, matrix::Dense* c) + matrix::view::dense b, + matrix::view::dense c) { - if (c->get_size()[0] == 0 || c->get_size()[1] == 0) { + if (c.size[0] == 0 || c.size[1] == 0) { // empty output: nothing to do return; } - if (b->get_size()[0] == 0 || a->get_num_stored_blocks() == 0) { + if (b.size[0] == 0 || a->get_num_stored_blocks() == 0) { // empty input: fill output with zero dense::fill(exec, c, zero()); return; @@ -463,25 +464,25 @@ void spmv(std::shared_ptr exec, const IndexType mb = a->get_num_block_rows(); const IndexType nb = a->get_num_block_cols(); const auto nnzb = static_cast(a->get_num_stored_blocks()); - const auto nrhs = static_cast(b->get_size()[1]); + const auto nrhs = static_cast(b.size[1]); const auto nrows = a->get_size()[0]; const auto ncols = a->get_size()[1]; - const auto in_stride = b->get_stride(); - const auto out_stride = c->get_stride(); + const auto in_stride = b.stride; + const auto out_stride = c.stride; if (nrhs == 1 && in_stride == 1 && out_stride == 1) { sparselib::bsrmv(handle, SPARSELIB_OPERATION_NON_TRANSPOSE, mb, nb, nnzb, &alpha, descr, values, row_ptrs, col_idxs, - bs, b->get_const_values(), &beta, c->get_values()); + bs, b.values, &beta, c.values); } else { const auto trans_stride = nrows; auto trans_c = array(exec, nrows * nrhs); sparselib::bsrmm(handle, SPARSELIB_OPERATION_NON_TRANSPOSE, SPARSELIB_OPERATION_TRANSPOSE, mb, nrhs, nb, nnzb, &alpha, descr, values, row_ptrs, col_idxs, bs, - b->get_const_values(), in_stride, &beta, - trans_c.get_data(), trans_stride); + b.values, in_stride, &beta, trans_c.get_data(), + trans_stride); dense_transpose(exec, nrhs, nrows, trans_stride, trans_c.get_data(), - out_stride, c->get_values()); + out_stride, c.values); } sparselib::destroy(descr); } else { @@ -492,25 +493,25 @@ void spmv(std::shared_ptr exec, template void advanced_spmv(std::shared_ptr exec, - const matrix::Dense* alpha, + matrix::view::dense alpha, const matrix::Fbcsr* a, - const matrix::Dense* b, - const matrix::Dense* beta, - matrix::Dense* c) + matrix::view::dense b, + matrix::view::dense beta, + matrix::view::dense c) { - if (c->get_size()[0] == 0 || c->get_size()[1] == 0) { + if (c.size[0] == 0 || c.size[1] == 0) { // empty output: nothing to do return; } - if (b->get_size()[0] == 0 || a->get_num_stored_blocks() == 0) { + if (b.size[0] == 0 || a->get_num_stored_blocks() == 0) { // empty input: scale output dense::scale(exec, beta, c); return; } if (sparselib::is_supported::value) { auto handle = exec->get_sparselib_handle(); - const auto alphp = alpha->get_const_values(); - const auto betap = beta->get_const_values(); + const auto alphp = alpha.values; + const auto betap = beta.values; auto descr = sparselib::create_mat_descr(); const auto row_ptrs = a->get_const_row_ptrs(); const auto col_idxs = a->get_const_col_idxs(); @@ -519,27 +520,27 @@ void advanced_spmv(std::shared_ptr exec, const IndexType mb = a->get_num_block_rows(); const IndexType nb = a->get_num_block_cols(); const auto nnzb = static_cast(a->get_num_stored_blocks()); - const auto nrhs = static_cast(b->get_size()[1]); + const auto nrhs = static_cast(b.size[1]); const auto nrows = a->get_size()[0]; const auto ncols = a->get_size()[1]; - const auto in_stride = b->get_stride(); - const auto out_stride = c->get_stride(); + const auto in_stride = b.stride; + const auto out_stride = c.stride; if (nrhs == 1 && in_stride == 1 && out_stride == 1) { sparselib::bsrmv(handle, SPARSELIB_OPERATION_NON_TRANSPOSE, mb, nb, nnzb, alphp, descr, values, row_ptrs, col_idxs, bs, - b->get_const_values(), betap, c->get_values()); + b.values, betap, c.values); } else { const auto trans_stride = nrows; auto trans_c = array(exec, nrows * nrhs); - dense_transpose(exec, nrows, nrhs, out_stride, c->get_values(), + dense_transpose(exec, nrows, nrhs, out_stride, c.values, trans_stride, trans_c.get_data()); sparselib::bsrmm(handle, SPARSELIB_OPERATION_NON_TRANSPOSE, SPARSELIB_OPERATION_TRANSPOSE, mb, nrhs, nb, nnzb, alphp, descr, values, row_ptrs, col_idxs, bs, - b->get_const_values(), in_stride, betap, - trans_c.get_data(), trans_stride); + b.values, in_stride, betap, trans_c.get_data(), + trans_stride); dense_transpose(exec, nrhs, nrows, trans_stride, trans_c.get_data(), - out_stride, c->get_values()); + out_stride, c.values); } sparselib::destroy(descr); } else { diff --git a/common/cuda_hip/matrix/sellp_kernels.cpp b/common/cuda_hip/matrix/sellp_kernels.cpp index e704feb355c..1344ceada73 100644 --- a/common/cuda_hip/matrix/sellp_kernels.cpp +++ b/common/cuda_hip/matrix/sellp_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -92,18 +92,18 @@ __global__ __launch_bounds__(default_block_size) void advanced_spmv_kernel( template void spmv(std::shared_ptr exec, const matrix::Sellp* a, - const matrix::Dense* b, matrix::Dense* c) + matrix::view::dense b, + matrix::view::dense c) { const auto block_size = default_block_size; - const dim3 grid(ceildiv(a->get_size()[0], block_size), b->get_size()[1]); + const dim3 grid(ceildiv(a->get_size()[0], block_size), b.size[1]); if (grid.x > 0 && grid.y > 0) { spmv_kernel<<get_stream()>>>( - a->get_size()[0], b->get_size()[1], b->get_stride(), - c->get_stride(), a->get_slice_size(), a->get_const_slice_sets(), + a->get_size()[0], b.size[1], b.stride, c.stride, + a->get_slice_size(), a->get_const_slice_sets(), as_device_type(a->get_const_values()), a->get_const_col_idxs(), - as_device_type(b->get_const_values()), - as_device_type(c->get_values())); + as_device_type(b.values), as_device_type(c.values)); } } @@ -112,24 +112,22 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_SELLP_SPMV_KERNEL); template void advanced_spmv(std::shared_ptr exec, - const matrix::Dense* alpha, + matrix::view::dense alpha, const matrix::Sellp* a, - const matrix::Dense* b, - const matrix::Dense* beta, - matrix::Dense* c) + matrix::view::dense b, + matrix::view::dense beta, + matrix::view::dense c) { const auto block_size = default_block_size; - const dim3 grid(ceildiv(a->get_size()[0], block_size), b->get_size()[1]); + const dim3 grid(ceildiv(a->get_size()[0], block_size), b.size[1]); if (grid.x > 0 && grid.y > 0) { advanced_spmv_kernel<<get_stream()>>>( - a->get_size()[0], b->get_size()[1], b->get_stride(), - c->get_stride(), a->get_slice_size(), a->get_const_slice_sets(), - as_device_type(alpha->get_const_values()), - as_device_type(a->get_const_values()), a->get_const_col_idxs(), - as_device_type(b->get_const_values()), - as_device_type(beta->get_const_values()), - as_device_type(c->get_values())); + a->get_size()[0], b.size[1], b.stride, c.stride, + a->get_slice_size(), a->get_const_slice_sets(), + as_device_type(alpha.values), as_device_type(a->get_const_values()), + a->get_const_col_idxs(), as_device_type(b.values), + as_device_type(beta.values), as_device_type(c.values)); } } diff --git a/common/cuda_hip/matrix/sparsity_csr_kernels.cpp b/common/cuda_hip/matrix/sparsity_csr_kernels.cpp index 5c2aec6dac9..a65836561c1 100644 --- a/common/cuda_hip/matrix/sparsity_csr_kernels.cpp +++ b/common/cuda_hip/matrix/sparsity_csr_kernels.cpp @@ -1,12 +1,15 @@ -// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause #include "core/matrix/sparsity_csr_kernels.hpp" +#include + #include #include +#include #include "accessor/cuda_hip_helper.hpp" #include "accessor/reduced_row_major.hpp" @@ -174,13 +177,18 @@ namespace host_kernel { template -void classical_spmv(syn::value_list, - std::shared_ptr exec, - const matrix::SparsityCsr* a, - const matrix::Dense* b, - matrix::Dense* c, - const matrix::Dense* alpha = nullptr, - const matrix::Dense* beta = nullptr) +void classical_spmv( + syn::value_list, + std::shared_ptr exec, + const matrix::SparsityCsr* a, + matrix::view::dense b, + matrix::view::dense c, + xstd::type_identity_t< + std::optional>> + alpha = {}, + xstd::type_identity_t< + std::optional>> + beta = {}) { using arithmetic_type = highest_precision; @@ -195,42 +203,37 @@ void classical_spmv(syn::value_list, const auto gridx = std::min(ceildiv(a->get_size()[0], spmv_block_size / subwarp_size), int64(nwarps / warps_in_block)); - const dim3 grid(gridx, b->get_size()[1]); + const dim3 grid(gridx, b.size[1]); const auto block = spmv_block_size; const auto b_vals = gko::acc::range( - std::array{ - {static_cast(b->get_size()[0]), - static_cast(b->get_size()[1])}}, - b->get_const_values(), - std::array{ - {static_cast(b->get_stride())}}); + std::array{{static_cast(b.size[0]), + static_cast(b.size[1])}}, + b.values, + std::array{{static_cast(b.stride)}}); auto c_vals = gko::acc::range( - std::array{ - {static_cast(c->get_size()[0]), - static_cast(c->get_size()[1])}}, - c->get_values(), - std::array{ - {static_cast(c->get_stride())}}); - if (c->get_size()[0] == 0 || c->get_size()[1] == 0) { + std::array{{static_cast(c.size[0]), + static_cast(c.size[1])}}, + c.values, + std::array{{static_cast(c.stride)}}); + if (c.size[0] == 0 || c.size[1] == 0) { // empty output: nothing to do return; } - if (alpha == nullptr && beta == nullptr) { + if (!alpha && !beta) { kernel::abstract_classical_spmv <<get_stream()>>>( a->get_size()[0], as_device_type(a->get_const_value()), a->get_const_col_idxs(), as_device_type(a->get_const_row_ptrs()), acc::as_device_range(b_vals), acc::as_device_range(c_vals)); - } else if (alpha != nullptr && beta != nullptr) { + } else if (alpha && beta) { kernel::abstract_classical_spmv <<get_stream()>>>( - a->get_size()[0], as_device_type(alpha->get_const_values()), + a->get_size()[0], as_device_type(alpha->values), as_device_type(a->get_const_value()), a->get_const_col_idxs(), as_device_type(a->get_const_row_ptrs()), - acc::as_device_range(b_vals), - as_device_type(beta->get_const_values()), + acc::as_device_range(b_vals), as_device_type(beta->values), acc::as_device_range(c_vals)); } else { GKO_KERNEL_NOT_FOUND; @@ -246,8 +249,8 @@ template void spmv(std::shared_ptr exec, const matrix::SparsityCsr* a, - const matrix::Dense* b, - matrix::Dense* c) + matrix::view::dense b, + matrix::view::dense c) { host_kernel::select_classical_spmv( classical_kernels(), [](int compiled_info) { return true; }, @@ -261,11 +264,11 @@ GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_AND_INDEX_TYPE( template void advanced_spmv(std::shared_ptr exec, - const matrix::Dense* alpha, + matrix::view::dense alpha, const matrix::SparsityCsr* a, - const matrix::Dense* b, - const matrix::Dense* beta, - matrix::Dense* c) + matrix::view::dense b, + matrix::view::dense beta, + matrix::view::dense c) { host_kernel::select_classical_spmv( classical_kernels(), [](int compiled_info) { return true; }, @@ -312,11 +315,12 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void is_sorted_by_column_index( std::shared_ptr exec, - const matrix::SparsityCsr* to_check, bool* is_sorted) + const matrix::SparsityCsr* to_check, bool& is_sorted) { - *is_sorted = true; - auto cpu_array = make_array_view(exec->get_master(), 1, is_sorted); - auto gpu_array = array{exec, cpu_array}; + is_sorted = true; + auto gpu_array = array{exec, 1}; + // need to initialize the GPU value to true + exec->copy_from(exec->get_master(), 1, &is_sorted, gpu_array.get_data()); const auto num_rows = static_cast(to_check->get_size()[0]); auto num_blocks = ceildiv(num_rows, default_block_size); if (num_blocks > 0) { @@ -325,7 +329,7 @@ void is_sorted_by_column_index( to_check->get_const_row_ptrs(), to_check->get_const_col_idxs(), num_rows, gpu_array.get_data()); } - cpu_array = gpu_array; + is_sorted = get_element(gpu_array, 0); } GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( diff --git a/common/cuda_hip/preconditioner/isai_kernels.cpp b/common/cuda_hip/preconditioner/isai_kernels.cpp index 9c74bafd289..008c627d874 100644 --- a/common/cuda_hip/preconditioner/isai_kernels.cpp +++ b/common/cuda_hip/preconditioner/isai_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -533,7 +533,7 @@ void generate_excess_system(std::shared_ptr exec, const IndexType* excess_rhs_ptrs, const IndexType* excess_nz_ptrs, matrix::Csr* excess_system, - matrix::Dense* excess_rhs, + matrix::view::dense excess_rhs, size_type e_start, size_type e_end) { const auto num_rows = input->get_size()[0]; @@ -550,7 +550,7 @@ void generate_excess_system(std::shared_ptr exec, excess_rhs_ptrs, excess_nz_ptrs, excess_system->get_row_ptrs(), excess_system->get_col_idxs(), as_device_type(excess_system->get_values()), - as_device_type(excess_rhs->get_values()), e_start, e_end); + as_device_type(excess_rhs.values), e_start, e_end); } } @@ -561,7 +561,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void scale_excess_solution(std::shared_ptr exec, const IndexType* excess_block_ptrs, - matrix::Dense* excess_solution, + matrix::view::dense excess_solution, size_type e_start, size_type e_end) { const auto block = default_block_size; @@ -569,8 +569,8 @@ void scale_excess_solution(std::shared_ptr exec, if (grid > 0) { kernel::scale_excess_solution <<get_stream()>>>( - excess_block_ptrs, - as_device_type(excess_solution->get_values()), e_start, e_end); + excess_block_ptrs, as_device_type(excess_solution.values), + e_start, e_end); } } @@ -579,11 +579,12 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template -void scatter_excess_solution(std::shared_ptr exec, - const IndexType* excess_rhs_ptrs, - const matrix::Dense* excess_solution, - matrix::Csr* inverse, - size_type e_start, size_type e_end) +void scatter_excess_solution( + std::shared_ptr exec, + const IndexType* excess_rhs_ptrs, + matrix::view::dense excess_solution, + matrix::Csr* inverse, size_type e_start, + size_type e_end) { const auto num_rows = inverse->get_size()[0]; @@ -593,8 +594,7 @@ void scatter_excess_solution(std::shared_ptr exec, kernel::copy_excess_solution <<get_stream()>>>( static_cast(num_rows), inverse->get_const_row_ptrs(), - excess_rhs_ptrs, - as_device_type(excess_solution->get_const_values()), + excess_rhs_ptrs, as_device_type(excess_solution.values), as_device_type(inverse->get_values()), e_start, e_end); } } diff --git a/common/cuda_hip/preconditioner/jacobi_advanced_apply_kernels.cpp b/common/cuda_hip/preconditioner/jacobi_advanced_apply_kernels.cpp index 27b4f57eb6c..91bfc9e3b79 100644 --- a/common/cuda_hip/preconditioner/jacobi_advanced_apply_kernels.cpp +++ b/common/cuda_hip/preconditioner/jacobi_advanced_apply_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -45,13 +45,14 @@ void apply(std::shared_ptr exec, size_type num_blocks, const array& block_precisions, const array& block_pointers, const array& blocks, - const matrix::Dense* alpha, - const matrix::Dense* b, - const matrix::Dense* beta, matrix::Dense* x) + matrix::view::dense alpha, + matrix::view::dense b, + matrix::view::dense beta, + matrix::view::dense x) { // TODO: write a special kernel for multiple RHS dense::scale(exec, beta, x); - for (size_type col = 0; col < b->get_size()[1]; ++col) { + for (size_type col = 0; col < b.size[1]; ++col) { select_advanced_apply( compiled_kernels(), [&](int compiled_block_size) { @@ -60,9 +61,8 @@ void apply(std::shared_ptr exec, size_type num_blocks, syn::value_list(), syn::type_list<>(), exec, num_blocks, block_precisions.get_const_data(), block_pointers.get_const_data(), - blocks.get_const_data(), storage_scheme, alpha->get_const_values(), - b->get_const_values() + col, b->get_stride(), x->get_values() + col, - x->get_stride()); + blocks.get_const_data(), storage_scheme, alpha.values, + b.values + col, b.stride, x.values + col, x.stride); } } diff --git a/common/cuda_hip/preconditioner/jacobi_simple_apply_kernels.cpp b/common/cuda_hip/preconditioner/jacobi_simple_apply_kernels.cpp index e9b7b10fd88..ccfc30c3334 100644 --- a/common/cuda_hip/preconditioner/jacobi_simple_apply_kernels.cpp +++ b/common/cuda_hip/preconditioner/jacobi_simple_apply_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -39,10 +39,10 @@ void simple_apply( storage_scheme, const array& block_precisions, const array& block_pointers, const array& blocks, - const matrix::Dense* b, matrix::Dense* x) + matrix::view::dense b, matrix::view::dense x) { // TODO: write a special kernel for multiple RHS - for (size_type col = 0; col < b->get_size()[1]; ++col) { + for (size_type col = 0; col < b.size[1]; ++col) { select_apply( compiled_kernels(), [&](int compiled_block_size) { @@ -51,9 +51,8 @@ void simple_apply( syn::value_list(), syn::type_list<>(), exec, num_blocks, block_precisions.get_const_data(), block_pointers.get_const_data(), - blocks.get_const_data(), storage_scheme, - b->get_const_values() + col, b->get_stride(), x->get_values() + col, - x->get_stride()); + blocks.get_const_data(), storage_scheme, b.values + col, b.stride, + x.values + col, x.stride); } } diff --git a/common/cuda_hip/solver/cb_gmres_kernels.cpp b/common/cuda_hip/solver/cb_gmres_kernels.cpp index 0ee6d1b5321..4270f2ebb1e 100644 --- a/common/cuda_hip/solver/cb_gmres_kernels.cpp +++ b/common/cuda_hip/solver/cb_gmres_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -611,14 +611,14 @@ void zero_matrix(std::shared_ptr exec, size_type m, template void initialize(std::shared_ptr exec, - const matrix::Dense* b, - matrix::Dense* residual, - matrix::Dense* givens_sin, - matrix::Dense* givens_cos, - array* stop_status, size_type krylov_dim) + matrix::view::dense b, + matrix::view::dense residual, + matrix::view::dense givens_sin, + matrix::view::dense givens_cos, + array& stop_status, size_type krylov_dim) { - const auto num_threads = std::max(b->get_size()[0] * b->get_stride(), - krylov_dim * b->get_size()[1]); + const auto num_threads = + std::max(b.size[0] * b.stride, krylov_dim * b.size[1]); const auto grid_dim = ceildiv(num_threads, default_block_size); const auto block_dim = default_block_size; constexpr auto block_size = default_block_size; @@ -626,14 +626,11 @@ void initialize(std::shared_ptr exec, if (grid_dim != 0) { initialize_kernel <<get_stream()>>>( - b->get_size()[0], b->get_size()[1], krylov_dim, - as_device_type(b->get_const_values()), b->get_stride(), - as_device_type(residual->get_values()), residual->get_stride(), - as_device_type(givens_sin->get_values()), - givens_sin->get_stride(), - as_device_type(givens_cos->get_values()), - givens_cos->get_stride(), - as_device_type(stop_status->get_data())); + b.size[0], b.size[1], krylov_dim, as_device_type(b.values), + b.stride, as_device_type(residual.values), residual.stride, + as_device_type(givens_sin.values), givens_sin.stride, + as_device_type(givens_cos.values), givens_cos.stride, + as_device_type(stop_status.get_data())); } } @@ -643,19 +640,19 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE_BASE( template void restart(std::shared_ptr exec, - const matrix::Dense* residual, - matrix::Dense>* residual_norm, - matrix::Dense* residual_norm_collection, - matrix::Dense>* arnoldi_norm, + matrix::view::dense residual, + matrix::view::dense> residual_norm, + matrix::view::dense residual_norm_collection, + matrix::view::dense> arnoldi_norm, Accessor3d krylov_bases, - matrix::Dense* next_krylov_basis, - array* final_iter_nums, array& reduction_tmp, + matrix::view::dense next_krylov_basis, + array& final_iter_nums, array& reduction_tmp, size_type krylov_dim) { constexpr bool use_scalar = gko::cb_gmres::detail::has_3d_scaled_accessor::value; - const auto num_rows = residual->get_size()[0]; - const auto num_rhs = residual->get_size()[1]; + const auto num_rows = residual.size[0]; + const auto num_rhs = residual.size[1]; const auto krylov_stride = gko::cb_gmres::helper_functions_accessor::get_stride( krylov_bases); @@ -663,42 +660,38 @@ void restart(std::shared_ptr exec, ceildiv((krylov_dim + 1) * krylov_stride[0], default_block_size); const auto block_dim = default_block_size; constexpr auto block_size = default_block_size; - const auto stride_arnoldi = arnoldi_norm->get_stride(); + const auto stride_arnoldi = arnoldi_norm.stride; if (grid_dim_1 != 0) { restart_1_kernel <<get_stream()>>>( - residual->get_size()[0], residual->get_size()[1], krylov_dim, + residual.size[0], residual.size[1], krylov_dim, acc::as_device_range(krylov_bases), - as_device_type(residual_norm_collection->get_values()), - residual_norm_collection->get_stride()); + as_device_type(residual_norm_collection.values), + residual_norm_collection.stride); } kernels::GKO_DEVICE_NAMESPACE::dense::compute_norm2_dispatch( exec, residual, residual_norm, reduction_tmp); if (use_scalar) { - components::fill_array(exec, - arnoldi_norm->get_values() + 2 * stride_arnoldi, + components::fill_array(exec, arnoldi_norm.values + 2 * stride_arnoldi, num_rhs, zero>()); const dim3 grid_size_nrm(ceildiv(num_rhs, default_dot_dim), exec->get_num_multiprocessor() * 2); const dim3 block_size_nrm(default_dot_dim, default_dot_dim); multinorminf_without_stop_kernel<<get_stream()>>>( - num_rows, num_rhs, as_device_type(residual->get_const_values()), - residual->get_stride(), - as_device_type(arnoldi_norm->get_values() + 2 * stride_arnoldi), 0); + num_rows, num_rhs, as_device_type(residual.values), residual.stride, + as_device_type(arnoldi_norm.values + 2 * stride_arnoldi), 0); } if (gko::cb_gmres::detail::has_3d_scaled_accessor::value) { set_scalar_kernel <<get_stream()>>>( - num_rhs, krylov_dim + 1, - as_device_type(residual_norm->get_const_values()), - residual_norm->get_stride(), - as_device_type(arnoldi_norm->get_const_values() + - 2 * stride_arnoldi), + num_rhs, krylov_dim + 1, as_device_type(residual_norm.values), + residual_norm.stride, + as_device_type(arnoldi_norm.values + 2 * stride_arnoldi), stride_arnoldi, acc::as_device_range(krylov_bases)); } const auto grid_dim_2 = @@ -708,15 +701,14 @@ void restart(std::shared_ptr exec, if (grid_dim_2 != 0) { restart_2_kernel <<get_stream()>>>( - residual->get_size()[0], residual->get_size()[1], - as_device_type(residual->get_const_values()), - residual->get_stride(), - as_device_type(residual_norm->get_const_values()), - as_device_type(residual_norm_collection->get_values()), + residual.size[0], residual.size[1], + as_device_type(residual.values), residual.stride, + as_device_type(residual_norm.values), + as_device_type(residual_norm_collection.values), acc::as_device_range(krylov_bases), - as_device_type(next_krylov_basis->get_values()), - next_krylov_basis->get_stride(), - as_device_type(final_iter_nums->get_data())); + as_device_type(next_krylov_basis.values), + next_krylov_basis.stride, + as_device_type(final_iter_nums.get_data())); } } @@ -724,17 +716,16 @@ GKO_INSTANTIATE_FOR_EACH_CB_GMRES_TYPE(GKO_DECLARE_CB_GMRES_RESTART_KERNEL); template -void finish_arnoldi_CGS(std::shared_ptr exec, - matrix::Dense* next_krylov_basis, - Accessor3dim krylov_bases, - matrix::Dense* hessenberg_iter, - matrix::Dense* buffer_iter, - matrix::Dense>* arnoldi_norm, - size_type iter, const stopping_status* stop_status, - stopping_status* reorth_status, - array* num_reorth) +void finish_arnoldi_CGS( + std::shared_ptr exec, + matrix::view::dense next_krylov_basis, Accessor3dim krylov_bases, + matrix::view::dense hessenberg_iter, + matrix::view::dense buffer_iter, + matrix::view::dense> arnoldi_norm, size_type iter, + const stopping_status* stop_status, stopping_status* reorth_status, + array& num_reorth) { - const auto dim_size = next_krylov_basis->get_size(); + const auto dim_size = next_krylov_basis.size; if (dim_size[1] == 0) { return; } @@ -743,10 +734,10 @@ void finish_arnoldi_CGS(std::shared_ptr exec, constexpr int singledot_block_size = default_dot_dim; constexpr bool use_scalar = gko::cb_gmres::detail::has_3d_scaled_accessor::value; - const auto stride_next_krylov = next_krylov_basis->get_stride(); - const auto stride_hessenberg = hessenberg_iter->get_stride(); - const auto stride_buffer = buffer_iter->get_stride(); - const auto stride_arnoldi = arnoldi_norm->get_stride(); + const auto stride_next_krylov = next_krylov_basis.stride; + const auto stride_hessenberg = hessenberg_iter.stride; + const auto stride_buffer = buffer_iter.stride; + const auto stride_arnoldi = arnoldi_norm.stride; const dim3 grid_size(ceildiv(dim_size[1], default_dot_dim), exec->get_num_multiprocessor() * 2); const dim3 grid_size_num_iters(ceildiv(dim_size[1], default_dot_dim), @@ -761,33 +752,31 @@ void finish_arnoldi_CGS(std::shared_ptr exec, const auto block_size_iters_single = singledot_block_size; size_type num_reorth_host; - components::fill_array(exec, arnoldi_norm->get_values(), dim_size[1], + components::fill_array(exec, arnoldi_norm.values, dim_size[1], zero()); multinorm2_kernel<<get_stream()>>>( - dim_size[0], dim_size[1], - as_device_type(next_krylov_basis->get_const_values()), - stride_next_krylov, as_device_type(arnoldi_norm->get_values()), + dim_size[0], dim_size[1], as_device_type(next_krylov_basis.values), + stride_next_krylov, as_device_type(arnoldi_norm.values), as_device_type(stop_status)); // nrmP = norm(next_krylov_basis) zero_matrix(exec, iter + 1, dim_size[1], stride_hessenberg, - hessenberg_iter->get_values()); + hessenberg_iter.values); if (dim_size[1] > 1) { multidot_kernel <<get_stream()>>>( dim_size[0], dim_size[1], - as_device_type(next_krylov_basis->get_const_values()), - stride_next_krylov, acc::as_device_range(krylov_bases), - as_device_type(hessenberg_iter->get_values()), - stride_hessenberg, as_device_type(stop_status)); + as_device_type(next_krylov_basis.values), stride_next_krylov, + acc::as_device_range(krylov_bases), + as_device_type(hessenberg_iter.values), stride_hessenberg, + as_device_type(stop_status)); } else { singledot_kernel <<get_stream()>>>( - dim_size[0], - as_device_type(next_krylov_basis->get_const_values()), + dim_size[0], as_device_type(next_krylov_basis.values), stride_next_krylov, acc::as_device_range(krylov_bases), - as_device_type(hessenberg_iter->get_values()), - stride_hessenberg, as_device_type(stop_status)); + as_device_type(hessenberg_iter.values), stride_hessenberg, + as_device_type(stop_status)); } // for i in 1:iter // hessenberg(iter, i) = next_krylov_basis' * krylov_bases(:, i) @@ -796,60 +785,57 @@ void finish_arnoldi_CGS(std::shared_ptr exec, <<get_stream()>>>( iter + 1, dim_size[0], dim_size[1], - as_device_type(next_krylov_basis->get_values()), stride_next_krylov, + as_device_type(next_krylov_basis.values), stride_next_krylov, acc::as_device_range(krylov_bases), - as_device_type(hessenberg_iter->get_const_values()), - stride_hessenberg, as_device_type(stop_status)); + as_device_type(hessenberg_iter.values), stride_hessenberg, + as_device_type(stop_status)); // for i in 1:iter // next_krylov_basis -= hessenberg(iter, i) * krylov_bases(:, i) // end - components::fill_array(exec, arnoldi_norm->get_values() + stride_arnoldi, + components::fill_array(exec, arnoldi_norm.values + stride_arnoldi, dim_size[1], zero()); if (use_scalar) { - components::fill_array(exec, - arnoldi_norm->get_values() + 2 * stride_arnoldi, + components::fill_array(exec, arnoldi_norm.values + 2 * stride_arnoldi, dim_size[1], zero()); } multinorm2_inf_kernel <<get_stream()>>>( - dim_size[0], dim_size[1], - as_device_type(next_krylov_basis->get_const_values()), + dim_size[0], dim_size[1], as_device_type(next_krylov_basis.values), stride_next_krylov, - as_device_type(arnoldi_norm->get_values() + stride_arnoldi), - as_device_type(arnoldi_norm->get_values() + 2 * stride_arnoldi), + as_device_type(arnoldi_norm.values + stride_arnoldi), + as_device_type(arnoldi_norm.values + 2 * stride_arnoldi), as_device_type(stop_status)); // nrmN = norm(next_krylov_basis) - components::fill_array(exec, num_reorth->get_data(), 1, zero()); + components::fill_array(exec, num_reorth.get_data(), 1, zero()); check_arnoldi_norms <<get_stream()>>>( - dim_size[1], as_device_type(arnoldi_norm->get_values()), - stride_arnoldi, as_device_type(hessenberg_iter->get_values()), - stride_hessenberg, iter + 1, acc::as_device_range(krylov_bases), - as_device_type(stop_status), as_device_type(reorth_status), - as_device_type(num_reorth->get_data())); - num_reorth_host = get_element(*num_reorth, 0); + dim_size[1], as_device_type(arnoldi_norm.values), stride_arnoldi, + as_device_type(hessenberg_iter.values), stride_hessenberg, iter + 1, + acc::as_device_range(krylov_bases), as_device_type(stop_status), + as_device_type(reorth_status), + as_device_type(num_reorth.get_data())); + num_reorth_host = get_element(num_reorth, 0); // num_reorth_host := number of next_krylov vector to be reorthogonalization for (size_type l = 1; (num_reorth_host > 0) && (l < 3); l++) { zero_matrix(exec, iter + 1, dim_size[1], stride_buffer, - buffer_iter->get_values()); + buffer_iter.values); if (dim_size[1] > 1) { multidot_kernel <<get_stream()>>>( dim_size[0], dim_size[1], - as_device_type(next_krylov_basis->get_const_values()), + as_device_type(next_krylov_basis.values), stride_next_krylov, acc::as_device_range(krylov_bases), - as_device_type(buffer_iter->get_values()), stride_buffer, + as_device_type(buffer_iter.values), stride_buffer, as_device_type(stop_status)); } else { singledot_kernel <<get_stream()>>>( - dim_size[0], - as_device_type(next_krylov_basis->get_const_values()), + dim_size[0], as_device_type(next_krylov_basis.values), stride_next_krylov, acc::as_device_range(krylov_bases), - as_device_type(buffer_iter->get_values()), stride_buffer, + as_device_type(buffer_iter.values), stride_buffer, as_device_type(stop_status)); } // for i in 1:iter @@ -859,43 +845,40 @@ void finish_arnoldi_CGS(std::shared_ptr exec, <<get_stream()>>>( iter + 1, dim_size[0], dim_size[1], - as_device_type(next_krylov_basis->get_values()), - stride_next_krylov, acc::as_device_range(krylov_bases), - as_device_type(hessenberg_iter->get_values()), - stride_hessenberg, - as_device_type(buffer_iter->get_const_values()), stride_buffer, + as_device_type(next_krylov_basis.values), stride_next_krylov, + acc::as_device_range(krylov_bases), + as_device_type(hessenberg_iter.values), stride_hessenberg, + as_device_type(buffer_iter.values), stride_buffer, as_device_type(stop_status), as_device_type(reorth_status)); // for i in 1:iter // next_krylov_basis -= hessenberg(iter, i) * krylov_bases(:, i) // end - components::fill_array(exec, - arnoldi_norm->get_values() + stride_arnoldi, + components::fill_array(exec, arnoldi_norm.values + stride_arnoldi, dim_size[1], zero()); if (use_scalar) { - components::fill_array( - exec, arnoldi_norm->get_values() + 2 * stride_arnoldi, - dim_size[1], zero()); + components::fill_array(exec, + arnoldi_norm.values + 2 * stride_arnoldi, + dim_size[1], zero()); } multinorm2_inf_kernel <<get_stream()>>>( dim_size[0], dim_size[1], - as_device_type(next_krylov_basis->get_const_values()), - stride_next_krylov, - as_device_type(arnoldi_norm->get_values() + stride_arnoldi), - as_device_type(arnoldi_norm->get_values() + 2 * stride_arnoldi), + as_device_type(next_krylov_basis.values), stride_next_krylov, + as_device_type(arnoldi_norm.values + stride_arnoldi), + as_device_type(arnoldi_norm.values + 2 * stride_arnoldi), as_device_type(stop_status)); // nrmN = norm(next_krylov_basis) - components::fill_array(exec, num_reorth->get_data(), 1, + components::fill_array(exec, num_reorth.get_data(), 1, zero()); check_arnoldi_norms <<get_stream()>>>( - dim_size[1], as_device_type(arnoldi_norm->get_values()), - stride_arnoldi, as_device_type(hessenberg_iter->get_values()), + dim_size[1], as_device_type(arnoldi_norm.values), + stride_arnoldi, as_device_type(hessenberg_iter.values), stride_hessenberg, iter + 1, acc::as_device_range(krylov_bases), as_device_type(stop_status), as_device_type(reorth_status), - num_reorth->get_data()); - num_reorth_host = get_element(*num_reorth, 0); + num_reorth.get_data()); + num_reorth_host = get_element(num_reorth, 0); // num_reorth_host := number of next_krylov vector to be // reorthogonalization } @@ -903,74 +886,72 @@ void finish_arnoldi_CGS(std::shared_ptr exec, <<get_stream()>>>( iter, dim_size[0], dim_size[1], - as_device_type(next_krylov_basis->get_values()), stride_next_krylov, + as_device_type(next_krylov_basis.values), stride_next_krylov, acc::as_device_range(krylov_bases), - as_device_type(hessenberg_iter->get_const_values()), - stride_hessenberg, as_device_type(stop_status)); + as_device_type(hessenberg_iter.values), stride_hessenberg, + as_device_type(stop_status)); // next_krylov_basis /= hessenberg(iter, iter + 1) // krylov_bases(:, iter + 1) = next_krylov_basis // End of arnoldi } template -void givens_rotation(std::shared_ptr exec, - matrix::Dense* givens_sin, - matrix::Dense* givens_cos, - matrix::Dense* hessenberg_iter, - matrix::Dense>* residual_norm, - matrix::Dense* residual_norm_collection, - size_type iter, const array* stop_status) +void givens_rotation( + std::shared_ptr exec, + matrix::view::dense givens_sin, + matrix::view::dense givens_cos, + matrix::view::dense hessenberg_iter, + matrix::view::dense> residual_norm, + matrix::view::dense residual_norm_collection, size_type iter, + const array& stop_status) { // TODO: tune block_size for optimal performance constexpr auto block_size = default_block_size; - const auto num_cols = hessenberg_iter->get_size()[1]; + const auto num_cols = hessenberg_iter.size[1]; const auto block_dim = block_size; const auto grid_dim = static_cast(ceildiv(num_cols, block_size)); if (grid_dim != 0) { givens_rotation_kernel <<get_stream()>>>( - hessenberg_iter->get_size()[0], hessenberg_iter->get_size()[1], - iter, as_device_type(hessenberg_iter->get_values()), - hessenberg_iter->get_stride(), - as_device_type(givens_sin->get_values()), - givens_sin->get_stride(), - as_device_type(givens_cos->get_values()), - givens_cos->get_stride(), - as_device_type(residual_norm->get_values()), - as_device_type(residual_norm_collection->get_values()), - residual_norm_collection->get_stride(), - stop_status->get_const_data()); + hessenberg_iter.size[0], hessenberg_iter.size[1], iter, + as_device_type(hessenberg_iter.values), hessenberg_iter.stride, + as_device_type(givens_sin.values), givens_sin.stride, + as_device_type(givens_cos.values), givens_cos.stride, + as_device_type(residual_norm.values), + as_device_type(residual_norm_collection.values), + residual_norm_collection.stride, stop_status.get_const_data()); } } template void arnoldi(std::shared_ptr exec, - matrix::Dense* next_krylov_basis, - matrix::Dense* givens_sin, - matrix::Dense* givens_cos, - matrix::Dense>* residual_norm, - matrix::Dense* residual_norm_collection, - Accessor3d krylov_bases, matrix::Dense* hessenberg_iter, - matrix::Dense* buffer_iter, - matrix::Dense>* arnoldi_norm, - size_type iter, array* final_iter_nums, - const array* stop_status, - array* reorth_status, - array* num_reorth) + matrix::view::dense next_krylov_basis, + matrix::view::dense givens_sin, + matrix::view::dense givens_cos, + matrix::view::dense> residual_norm, + matrix::view::dense residual_norm_collection, + Accessor3d krylov_bases, + matrix::view::dense hessenberg_iter, + matrix::view::dense buffer_iter, + matrix::view::dense> arnoldi_norm, + size_type iter, array& final_iter_nums, + const array& stop_status, + array& reorth_status, + array& num_reorth) { - if (final_iter_nums->get_size() != 0) { + if (final_iter_nums.get_size() != 0) { increase_final_iteration_numbers_kernel<<< static_cast( - ceildiv(final_iter_nums->get_size(), default_block_size)), + ceildiv(final_iter_nums.get_size(), default_block_size)), default_block_size, 0, exec->get_stream()>>>( - as_device_type(final_iter_nums->get_data()), - stop_status->get_const_data(), final_iter_nums->get_size()); + as_device_type(final_iter_nums.get_data()), + stop_status.get_const_data(), final_iter_nums.get_size()); } finish_arnoldi_CGS(exec, next_krylov_basis, krylov_bases, hessenberg_iter, buffer_iter, arnoldi_norm, iter, - stop_status->get_const_data(), reorth_status->get_data(), + stop_status.get_const_data(), reorth_status.get_data(), num_reorth); givens_rotation(exec, givens_sin, givens_cos, hessenberg_iter, residual_norm, residual_norm_collection, iter, stop_status); @@ -982,39 +963,37 @@ GKO_INSTANTIATE_FOR_EACH_CB_GMRES_TYPE(GKO_DECLARE_CB_GMRES_ARNOLDI_KERNEL); template void solve_upper_triangular( std::shared_ptr exec, - const matrix::Dense* residual_norm_collection, - const matrix::Dense* hessenberg, matrix::Dense* y, - const array* final_iter_nums) + matrix::view::dense residual_norm_collection, + matrix::view::dense hessenberg, + matrix::view::dense y, const array& final_iter_nums) { // TODO: tune block_size for optimal performance constexpr auto block_size = default_block_size; - const auto num_rhs = residual_norm_collection->get_size()[1]; + const auto num_rhs = residual_norm_collection.size[1]; const auto block_dim = block_size; const auto grid_dim = static_cast(ceildiv(num_rhs, block_size)); solve_upper_triangular_kernel <<get_stream()>>>( - hessenberg->get_size()[1], num_rhs, - as_device_type(residual_norm_collection->get_const_values()), - residual_norm_collection->get_stride(), - as_device_type(hessenberg->get_const_values()), - hessenberg->get_stride(), as_device_type(y->get_values()), - y->get_stride(), as_device_type(final_iter_nums->get_const_data())); + hessenberg.size[1], num_rhs, + as_device_type(residual_norm_collection.values), + residual_norm_collection.stride, as_device_type(hessenberg.values), + hessenberg.stride, as_device_type(y.values), y.stride, + as_device_type(final_iter_nums.get_const_data())); } template void calculate_qy(std::shared_ptr exec, ConstAccessor3d krylov_bases, size_type num_krylov_bases, - const matrix::Dense* y, - matrix::Dense* before_preconditioner, - const array* final_iter_nums) + matrix::view::dense y, + matrix::view::dense before_preconditioner, + const array& final_iter_nums) { - const auto num_rows = before_preconditioner->get_size()[0]; - const auto num_cols = before_preconditioner->get_size()[1]; - const auto stride_before_preconditioner = - before_preconditioner->get_stride(); + const auto num_rows = before_preconditioner.size[0]; + const auto num_cols = before_preconditioner.size[1]; + const auto stride_before_preconditioner = before_preconditioner.stride; constexpr auto block_size = default_block_size; const auto grid_dim = static_cast( @@ -1024,10 +1003,10 @@ void calculate_qy(std::shared_ptr exec, calculate_Qy_kernel <<get_stream()>>>( num_rows, num_cols, acc::as_device_range(krylov_bases), - as_device_type(y->get_const_values()), y->get_stride(), - as_device_type(before_preconditioner->get_values()), + as_device_type(y.values), y.stride, + as_device_type(before_preconditioner.values), stride_before_preconditioner, - as_device_type(final_iter_nums->get_const_data())); + as_device_type(final_iter_nums.get_const_data())); } // Calculate qy // before_preconditioner = krylov_bases * y @@ -1036,25 +1015,24 @@ void calculate_qy(std::shared_ptr exec, template void solve_krylov(std::shared_ptr exec, - const matrix::Dense* residual_norm_collection, + matrix::view::dense residual_norm_collection, ConstAccessor3d krylov_bases, - const matrix::Dense* hessenberg, - matrix::Dense* y, - matrix::Dense* before_preconditioner, - const array* final_iter_nums) + matrix::view::dense hessenberg, + matrix::view::dense y, + matrix::view::dense before_preconditioner, + const array& final_iter_nums) { - if (before_preconditioner->get_size()[1] == 0) { + if (before_preconditioner.size[1] == 0) { return; } // since hessenberg has dims: iters x iters * num_rhs // krylov_bases has dims: (iters + 1) x sysmtx[0] x num_rhs - const auto iters = - hessenberg->get_size()[1] / before_preconditioner->get_size()[1]; + const auto iters = hessenberg.size[1] / before_preconditioner.size[1]; const auto num_krylov_bases = iters + 1; solve_upper_triangular(exec, residual_norm_collection, hessenberg, y, final_iter_nums); - calculate_qy(exec, krylov_bases, num_krylov_bases, y, before_preconditioner, - final_iter_nums); + calculate_qy(exec, krylov_bases, num_krylov_bases, y.as_const(), + before_preconditioner, final_iter_nums); } GKO_INSTANTIATE_FOR_EACH_CB_GMRES_CONST_TYPE( diff --git a/common/cuda_hip/solver/idr_kernels.cpp b/common/cuda_hip/solver/idr_kernels.cpp index 6471aea4ec8..5ac109bc18e 100644 --- a/common/cuda_hip/solver/idr_kernels.cpp +++ b/common/cuda_hip/solver/idr_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -364,33 +364,32 @@ namespace { template void initialize_m(std::shared_ptr exec, - const size_type nrhs, matrix::Dense* m, - array* stop_status) + const size_type nrhs, matrix::view::dense m, + array& stop_status) { - const auto subspace_dim = m->get_size()[0]; - const auto m_stride = m->get_stride(); + const auto subspace_dim = m.size[0]; + const auto m_stride = m.stride; const auto grid_dim = ceildiv(m_stride * subspace_dim, default_block_size); initialize_m_kernel<<get_stream()>>>( - subspace_dim, nrhs, as_device_type(m->get_values()), m_stride, - as_device_type(stop_status->get_data())); + subspace_dim, nrhs, as_device_type(m.values), m_stride, + as_device_type(stop_status.get_data())); } template -void initialize_subspace_vectors(std::shared_ptr exec, - matrix::Dense* subspace_vectors, - bool deterministic) +void initialize_subspace_vectors( + std::shared_ptr exec, + matrix::view::dense subspace_vectors, bool deterministic) { if (!deterministic) { auto gen = randlib::rand_generator(std::random_device{}(), RANDLIB_RNG_PSEUDO_DEFAULT, exec->get_stream()); - randlib::rand_vector( - gen, - subspace_vectors->get_size()[0] * subspace_vectors->get_stride(), - 0.0, 1.0, subspace_vectors->get_values()); + randlib::rand_vector(gen, + subspace_vectors.size[0] * subspace_vectors.stride, + 0.0, 1.0, subspace_vectors.values); randlib::destroy(gen); } } @@ -399,61 +398,59 @@ void initialize_subspace_vectors(std::shared_ptr exec, template void orthonormalize_subspace_vectors( std::shared_ptr exec, - matrix::Dense* subspace_vectors) + matrix::view::dense subspace_vectors) { orthonormalize_subspace_vectors_kernel <<<1, default_block_size, 0, exec->get_stream()>>>( - subspace_vectors->get_size()[0], subspace_vectors->get_size()[1], - as_device_type(subspace_vectors->get_values()), - subspace_vectors->get_stride()); + subspace_vectors.size[0], subspace_vectors.size[1], + as_device_type(subspace_vectors.values), subspace_vectors.stride); } template void solve_lower_triangular(std::shared_ptr exec, const size_type nrhs, - const matrix::Dense* m, - const matrix::Dense* f, - matrix::Dense* c, - const array* stop_status) + matrix::view::dense m, + matrix::view::dense f, + matrix::view::dense c, + const array& stop_status) { - const auto subspace_dim = m->get_size()[0]; + const auto subspace_dim = m.size[0]; const auto grid_dim = ceildiv(nrhs, default_block_size); solve_lower_triangular_kernel<<get_stream()>>>( - subspace_dim, nrhs, as_device_type(m->get_const_values()), - m->get_stride(), as_device_type(f->get_const_values()), f->get_stride(), - as_device_type(c->get_values()), c->get_stride(), - stop_status->get_const_data()); + subspace_dim, nrhs, as_device_type(m.values), m.stride, + as_device_type(f.values), f.stride, as_device_type(c.values), c.stride, + stop_status.get_const_data()); } template void update_g_and_u(std::shared_ptr exec, const size_type nrhs, const size_type k, - const matrix::Dense* p, - const matrix::Dense* m, - matrix::Dense* alpha, - matrix::Dense* g, matrix::Dense* g_k, - matrix::Dense* u, - const array* stop_status) + matrix::view::dense p, + matrix::view::dense m, + matrix::view::dense alpha, + matrix::view::dense g, + matrix::view::dense g_k, + matrix::view::dense u, + const array& stop_status) { if (nrhs == 0) { return; } - const auto size = g->get_size()[0]; - const auto p_stride = p->get_stride(); + const auto size = g.size[0]; + const auto p_stride = p.stride; const dim3 grid_dim(ceildiv(nrhs, default_dot_dim), exec->get_num_multiprocessor() * 2); const dim3 block_dim(default_dot_dim, default_dot_dim); for (size_type i = 0; i < k; i++) { - const auto p_i = p->get_const_values() + i * p_stride; + const auto p_i = p.values + i * p_stride; if (nrhs > 1 || is_complex()) { - components::fill_array(exec, alpha->get_values(), nrhs, - zero()); + components::fill_array(exec, alpha.values, nrhs, zero()); // not support 16 bit atomic #if !defined(CUDA_VERSION) if constexpr (sizeof(remove_complex) == sizeof(int16)) { @@ -472,55 +469,53 @@ void update_g_and_u(std::shared_ptr exec, #endif { multidot_kernel<<get_stream()>>>( - size, nrhs, as_device_type(p_i), - as_device_type(g_k->get_values()), g_k->get_stride(), - as_device_type(alpha->get_values()), - stop_status->get_const_data()); + size, nrhs, as_device_type(p_i), as_device_type(g_k.values), + g_k.stride, as_device_type(alpha.values), + stop_status.get_const_data()); } } else { - blas::dot(exec->get_blas_handle(), size, p_i, 1, g_k->get_values(), - g_k->get_stride(), alpha->get_values()); + blas::dot(exec->get_blas_handle(), size, p_i, 1, g_k.values, + g_k.stride, alpha.values); } update_g_k_and_u_kernel - <<get_stride(), default_block_size), + <<get_stream()>>>( - k, i, size, nrhs, as_device_type(alpha->get_const_values()), - as_device_type(m->get_const_values()), m->get_stride(), - as_device_type(g->get_const_values()), g->get_stride(), - as_device_type(g_k->get_values()), g_k->get_stride(), - as_device_type(u->get_values()), u->get_stride(), - stop_status->get_const_data()); + k, i, size, nrhs, as_device_type(alpha.values), + as_device_type(m.values), m.stride, as_device_type(g.values), + g.stride, as_device_type(g_k.values), g_k.stride, + as_device_type(u.values), u.stride, + stop_status.get_const_data()); } update_g_kernel - <<get_stride(), default_block_size), - default_block_size, 0, exec->get_stream()>>>( - k, size, nrhs, as_device_type(g_k->get_const_values()), - g_k->get_stride(), as_device_type(g->get_values()), g->get_stride(), - stop_status->get_const_data()); + <<get_stream()>>>(k, size, nrhs, as_device_type(g_k.values), + g_k.stride, as_device_type(g.values), + g.stride, stop_status.get_const_data()); } template void update_m(std::shared_ptr exec, const size_type nrhs, - const size_type k, const matrix::Dense* p, - const matrix::Dense* g_k, matrix::Dense* m, - const array* stop_status) + const size_type k, matrix::view::dense p, + matrix::view::dense g_k, + matrix::view::dense m, + const array& stop_status) { if (nrhs == 0) { return; } - const auto size = g_k->get_size()[0]; - const auto subspace_dim = m->get_size()[0]; - const auto p_stride = p->get_stride(); - const auto m_stride = m->get_stride(); + const auto size = g_k.size[0]; + const auto subspace_dim = m.size[0]; + const auto p_stride = p.stride; + const auto m_stride = m.stride; const dim3 grid_dim(ceildiv(nrhs, default_dot_dim), exec->get_num_multiprocessor() * 2); const dim3 block_dim(default_dot_dim, default_dot_dim); for (size_type i = k; i < subspace_dim; i++) { - const auto p_i = p->get_const_values() + i * p_stride; - auto m_i = m->get_values() + i * m_stride + k * nrhs; + const auto p_i = p.values + i * p_stride; + auto m_i = m.values + i * m_stride + k * nrhs; if (nrhs > 1 || is_complex()) { components::fill_array(exec, m_i, nrhs, zero()); @@ -542,13 +537,13 @@ void update_m(std::shared_ptr exec, const size_type nrhs, #endif { multidot_kernel<<get_stream()>>>( - size, nrhs, as_device_type(p_i), - as_device_type(g_k->get_const_values()), g_k->get_stride(), - as_device_type(m_i), stop_status->get_const_data()); + size, nrhs, as_device_type(p_i), as_device_type(g_k.values), + g_k.stride, as_device_type(m_i), + stop_status.get_const_data()); } } else { - blas::dot(exec->get_blas_handle(), size, p_i, 1, - g_k->get_const_values(), g_k->get_stride(), m_i); + blas::dot(exec->get_blas_handle(), size, p_i, 1, g_k.values, + g_k.stride, m_i); } } } @@ -557,27 +552,25 @@ void update_m(std::shared_ptr exec, const size_type nrhs, template void update_x_r_and_f(std::shared_ptr exec, const size_type nrhs, const size_type k, - const matrix::Dense* m, - const matrix::Dense* g, - const matrix::Dense* u, - matrix::Dense* f, matrix::Dense* r, - matrix::Dense* x, - const array* stop_status) + matrix::view::dense m, + matrix::view::dense g, + matrix::view::dense u, + matrix::view::dense f, + matrix::view::dense r, + matrix::view::dense x, + const array& stop_status) { - const auto size = x->get_size()[0]; - const auto subspace_dim = m->get_size()[0]; + const auto size = x.size[0]; + const auto subspace_dim = m.size[0]; - const auto grid_dim = ceildiv(size * x->get_stride(), default_block_size); + const auto grid_dim = ceildiv(size * x.stride, default_block_size); update_x_r_and_f_kernel<<get_stream()>>>( - k, size, subspace_dim, nrhs, as_device_type(m->get_const_values()), - m->get_stride(), as_device_type(g->get_const_values()), g->get_stride(), - as_device_type(u->get_const_values()), u->get_stride(), - as_device_type(f->get_values()), f->get_stride(), - as_device_type(r->get_values()), r->get_stride(), - as_device_type(x->get_values()), x->get_stride(), - stop_status->get_const_data()); - components::fill_array(exec, f->get_values() + k * f->get_stride(), nrhs, + k, size, subspace_dim, nrhs, as_device_type(m.values), m.stride, + as_device_type(g.values), g.stride, as_device_type(u.values), u.stride, + as_device_type(f.values), f.stride, as_device_type(r.values), r.stride, + as_device_type(x.values), x.stride, stop_status.get_const_data()); + components::fill_array(exec, f.values + k * f.stride, nrhs, zero()); } @@ -587,9 +580,9 @@ void update_x_r_and_f(std::shared_ptr exec, template void initialize(std::shared_ptr exec, - const size_type nrhs, matrix::Dense* m, - matrix::Dense* subspace_vectors, bool deterministic, - array* stop_status) + const size_type nrhs, matrix::view::dense m, + matrix::view::dense subspace_vectors, + bool deterministic, array& stop_status) { initialize_m(exec, nrhs, m, stop_status); initialize_subspace_vectors(exec, subspace_vectors, deterministic); @@ -601,26 +594,24 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_IDR_INITIALIZE_KERNEL); template void step_1(std::shared_ptr exec, const size_type nrhs, - const size_type k, const matrix::Dense* m, - const matrix::Dense* f, - const matrix::Dense* residual, - const matrix::Dense* g, matrix::Dense* c, - matrix::Dense* v, - const array* stop_status) + const size_type k, matrix::view::dense m, + matrix::view::dense f, + matrix::view::dense residual, + matrix::view::dense g, + matrix::view::dense c, matrix::view::dense v, + const array& stop_status) { solve_lower_triangular(exec, nrhs, m, f, c, stop_status); - const auto num_rows = v->get_size()[0]; - const auto subspace_dim = m->get_size()[0]; + const auto num_rows = v.size[0]; + const auto subspace_dim = m.size[0]; const auto grid_dim = ceildiv(nrhs * num_rows, default_block_size); step_1_kernel<<get_stream()>>>( - k, num_rows, subspace_dim, nrhs, - as_device_type(residual->get_const_values()), residual->get_stride(), - as_device_type(c->get_const_values()), c->get_stride(), - as_device_type(g->get_const_values()), g->get_stride(), - as_device_type(v->get_values()), v->get_stride(), - stop_status->get_const_data()); + k, num_rows, subspace_dim, nrhs, as_device_type(residual.values), + residual.stride, as_device_type(c.values), c.stride, + as_device_type(g.values), g.stride, as_device_type(v.values), v.stride, + stop_status.get_const_data()); } GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_IDR_STEP_1_KERNEL); @@ -628,26 +619,24 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_IDR_STEP_1_KERNEL); template void step_2(std::shared_ptr exec, const size_type nrhs, - const size_type k, const matrix::Dense* omega, - const matrix::Dense* preconditioned_vector, - const matrix::Dense* c, matrix::Dense* u, - const array* stop_status) + const size_type k, matrix::view::dense omega, + matrix::view::dense preconditioned_vector, + matrix::view::dense c, + matrix::view::dense u, + const array& stop_status) { if (nrhs == 0) { return; } - const auto num_rows = preconditioned_vector->get_size()[0]; - const auto subspace_dim = u->get_size()[1] / nrhs; + const auto num_rows = preconditioned_vector.size[0]; + const auto subspace_dim = u.size[1] / nrhs; const auto grid_dim = ceildiv(nrhs * num_rows, default_block_size); step_2_kernel<<get_stream()>>>( - k, num_rows, subspace_dim, nrhs, - as_device_type(omega->get_const_values()), - as_device_type(preconditioned_vector->get_const_values()), - preconditioned_vector->get_stride(), - as_device_type(c->get_const_values()), c->get_stride(), - as_device_type(u->get_values()), u->get_stride(), - stop_status->get_const_data()); + k, num_rows, subspace_dim, nrhs, as_device_type(omega.values), + as_device_type(preconditioned_vector.values), + preconditioned_vector.stride, as_device_type(c.values), c.stride, + as_device_type(u.values), u.stride, stop_status.get_const_data()); } GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_IDR_STEP_2_KERNEL); @@ -655,16 +644,21 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_IDR_STEP_2_KERNEL); template void step_3(std::shared_ptr exec, const size_type nrhs, - const size_type k, const matrix::Dense* p, - matrix::Dense* g, matrix::Dense* g_k, - matrix::Dense* u, matrix::Dense* m, - matrix::Dense* f, matrix::Dense* alpha, - matrix::Dense* residual, matrix::Dense* x, - const array* stop_status) + const size_type k, matrix::view::dense p, + matrix::view::dense g, + matrix::view::dense g_k, + matrix::view::dense u, matrix::view::dense m, + matrix::view::dense f, + matrix::view::dense alpha, + matrix::view::dense residual, + matrix::view::dense x, + const array& stop_status) { - update_g_and_u(exec, nrhs, k, p, m, alpha, g, g_k, u, stop_status); - update_m(exec, nrhs, k, p, g_k, m, stop_status); - update_x_r_and_f(exec, nrhs, k, m, g, u, f, residual, x, stop_status); + update_g_and_u(exec, nrhs, k, p, m.as_const(), alpha, g, g_k, u, + stop_status); + update_m(exec, nrhs, k, p.as_const(), g_k.as_const(), m, stop_status); + update_x_r_and_f(exec, nrhs, k, m.as_const(), g.as_const(), u.as_const(), f, + residual, x, stop_status); } GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_IDR_STEP_3_KERNEL); @@ -673,16 +667,18 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_IDR_STEP_3_KERNEL); template void compute_omega( std::shared_ptr exec, const size_type nrhs, - const remove_complex kappa, const matrix::Dense* tht, - const matrix::Dense>* residual_norm, - matrix::Dense* omega, const array* stop_status) + const remove_complex kappa, + matrix::view::dense tht, + matrix::view::dense> residual_norm, + matrix::view::dense omega, + const array& stop_status) { const auto grid_dim = ceildiv(nrhs, config::warp_size); compute_omega_kernel<<get_stream()>>>( - nrhs, as_device_type(kappa), as_device_type(tht->get_const_values()), - as_device_type(residual_norm->get_const_values()), - as_device_type(omega->get_values()), stop_status->get_const_data()); + nrhs, as_device_type(kappa), as_device_type(tht.values), + as_device_type(residual_norm.values), as_device_type(omega.values), + stop_status.get_const_data()); } GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_IDR_COMPUTE_OMEGA_KERNEL); diff --git a/common/cuda_hip/solver/multigrid_kernels.cpp b/common/cuda_hip/solver/multigrid_kernels.cpp index f172c391864..9b0685ff5aa 100644 --- a/common/cuda_hip/solver/multigrid_kernels.cpp +++ b/common/cuda_hip/solver/multigrid_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -117,14 +117,15 @@ __global__ __launch_bounds__(default_block_size) void kcycle_check_stop_kernel( template void kcycle_step_1(std::shared_ptr exec, - const matrix::Dense* alpha, - const matrix::Dense* rho, - const matrix::Dense* v, - matrix::Dense* g, matrix::Dense* d, - matrix::Dense* e) + matrix::view::dense alpha, + matrix::view::dense rho, + matrix::view::dense v, + matrix::view::dense g, + matrix::view::dense d, + matrix::view::dense e) { - const auto nrows = e->get_size()[0]; - const auto nrhs = e->get_size()[1]; + const auto nrows = e.size[0]; + const auto nrhs = e.size[1]; constexpr int max_size = (1U << 31) - 1; const size_type grid_nrows = max_size / nrhs < nrows ? max_size / nrhs : nrows; @@ -132,12 +133,10 @@ void kcycle_step_1(std::shared_ptr exec, if (grid > 0) { kernel::kcycle_step_1_kernel<<get_stream()>>>( - nrows, nrhs, e->get_stride(), grid_nrows, - as_device_type(alpha->get_const_values()), - as_device_type(rho->get_const_values()), - as_device_type(v->get_const_values()), - as_device_type(g->get_values()), as_device_type(d->get_values()), - as_device_type(e->get_values())); + nrows, nrhs, e.stride, grid_nrows, as_device_type(alpha.values), + as_device_type(rho.values), as_device_type(v.values), + as_device_type(g.values), as_device_type(d.values), + as_device_type(e.values)); } } @@ -146,16 +145,16 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_MULTIGRID_KCYCLE_STEP_1_KERNEL); template void kcycle_step_2(std::shared_ptr exec, - const matrix::Dense* alpha, - const matrix::Dense* rho, - const matrix::Dense* gamma, - const matrix::Dense* beta, - const matrix::Dense* zeta, - const matrix::Dense* d, - matrix::Dense* e) + matrix::view::dense alpha, + matrix::view::dense rho, + matrix::view::dense gamma, + matrix::view::dense beta, + matrix::view::dense zeta, + matrix::view::dense d, + matrix::view::dense e) { - const auto nrows = e->get_size()[0]; - const auto nrhs = e->get_size()[1]; + const auto nrows = e.size[0]; + const auto nrhs = e.size[1]; constexpr int max_size = (1U << 31) - 1; const size_type grid_nrows = max_size / nrhs < nrows ? max_size / nrhs : nrows; @@ -163,14 +162,10 @@ void kcycle_step_2(std::shared_ptr exec, if (grid > 0) { kernel::kcycle_step_2_kernel<<get_stream()>>>( - nrows, nrhs, e->get_stride(), grid_nrows, - as_device_type(alpha->get_const_values()), - as_device_type(rho->get_const_values()), - as_device_type(gamma->get_const_values()), - as_device_type(beta->get_const_values()), - as_device_type(zeta->get_const_values()), - as_device_type(d->get_const_values()), - as_device_type(e->get_values())); + nrows, nrhs, e.stride, grid_nrows, as_device_type(alpha.values), + as_device_type(rho.values), as_device_type(gamma.values), + as_device_type(beta.values), as_device_type(zeta.values), + as_device_type(d.values), as_device_type(e.values)); } } @@ -179,21 +174,21 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_MULTIGRID_KCYCLE_STEP_2_KERNEL); template void kcycle_check_stop(std::shared_ptr exec, - const matrix::Dense* old_norm, - const matrix::Dense* new_norm, + matrix::view::dense old_norm, + matrix::view::dense new_norm, const ValueType rel_tol, bool& is_stop) { gko::array dis_stop(exec, 1); components::fill_array(exec, dis_stop.get_data(), dis_stop.get_size(), true); - const auto nrhs = new_norm->get_size()[1]; + const auto nrhs = new_norm.size[1]; const auto grid = ceildiv(nrhs, default_block_size); if (grid > 0) { kernel::kcycle_check_stop_kernel<<get_stream()>>>( - nrhs, as_device_type(old_norm->get_const_values()), - as_device_type(new_norm->get_const_values()), - as_device_type(rel_tol), as_device_type(dis_stop.get_data())); + nrhs, as_device_type(old_norm.values), + as_device_type(new_norm.values), as_device_type(rel_tol), + as_device_type(dis_stop.get_data())); } is_stop = get_element(dis_stop, 0); } diff --git a/common/cuda_hip/stop/criterion_kernels.cpp b/common/cuda_hip/stop/criterion_kernels.cpp index 8e3a69f725e..676f6934f5b 100644 --- a/common/cuda_hip/stop/criterion_kernels.cpp +++ b/common/cuda_hip/stop/criterion_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -41,15 +41,15 @@ __global__ __launch_bounds__(default_block_size) void set_all_statuses( void set_all_statuses(std::shared_ptr exec, uint8 stoppingId, bool setFinalized, - array* stop_status) + array& stop_status) { const auto block_size = default_block_size; - const auto grid_size = ceildiv(stop_status->get_size(), block_size); + const auto grid_size = ceildiv(stop_status.get_size(), block_size); if (grid_size > 0) { set_all_statuses<<get_stream()>>>( - stop_status->get_size(), stoppingId, setFinalized, - as_device_type(stop_status->get_data())); + stop_status.get_size(), stoppingId, setFinalized, + as_device_type(stop_status.get_data())); } } diff --git a/common/cuda_hip/stop/residual_norm_kernels.cpp b/common/cuda_hip/stop/residual_norm_kernels.cpp index 9d6db5211e8..4a627ae7a8e 100644 --- a/common/cuda_hip/stop/residual_norm_kernels.cpp +++ b/common/cuda_hip/stop/residual_norm_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -62,33 +62,32 @@ __global__ __launch_bounds__(1) void init_kernel( template void residual_norm(std::shared_ptr exec, - const matrix::Dense* tau, - const matrix::Dense* orig_tau, + matrix::view::dense tau, + matrix::view::dense orig_tau, ValueType rel_residual_goal, uint8 stoppingId, - bool setFinalized, array* stop_status, - array* device_storage, bool* all_converged, - bool* one_changed) + bool setFinalized, array& stop_status, + array& device_storage, bool& all_converged, + bool& one_changed) { static_assert(is_complex_s::value == false, "ValueType must not be complex in this function!"); init_kernel<<<1, 1, 0, exec->get_stream()>>>( - as_device_type(device_storage->get_data())); + as_device_type(device_storage.get_data())); const auto block_size = default_block_size; - const auto grid_size = ceildiv(tau->get_size()[1], block_size); + const auto grid_size = ceildiv(tau.size[1], block_size); if (grid_size > 0) { residual_norm_kernel<<get_stream()>>>( - tau->get_size()[1], as_device_type(rel_residual_goal), - as_device_type(tau->get_const_values()), - as_device_type(orig_tau->get_const_values()), stoppingId, - setFinalized, as_device_type(stop_status->get_data()), - as_device_type(device_storage->get_data())); + tau.size[1], as_device_type(rel_residual_goal), + as_device_type(tau.values), as_device_type(orig_tau.values), + stoppingId, setFinalized, as_device_type(stop_status.get_data()), + as_device_type(device_storage.get_data())); } /* Represents all_converged, one_changed */ - *all_converged = get_element(*device_storage, 0); - *one_changed = get_element(*device_storage, 1); + all_converged = get_element(device_storage, 0); + one_changed = get_element(device_storage, 1); } GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_TYPE( @@ -144,31 +143,30 @@ __global__ __launch_bounds__(1) void init_kernel( template void implicit_residual_norm( std::shared_ptr exec, - const matrix::Dense* tau, - const matrix::Dense>* orig_tau, + matrix::view::dense tau, + matrix::view::dense> orig_tau, remove_complex rel_residual_goal, uint8 stoppingId, - bool setFinalized, array* stop_status, - array* device_storage, bool* all_converged, bool* one_changed) + bool setFinalized, array& stop_status, + array& device_storage, bool& all_converged, bool& one_changed) { init_kernel<<<1, 1, 0, exec->get_stream()>>>( - as_device_type(device_storage->get_data())); + as_device_type(device_storage.get_data())); const auto block_size = default_block_size; - const auto grid_size = ceildiv(tau->get_size()[1], block_size); + const auto grid_size = ceildiv(tau.size[1], block_size); if (grid_size > 0) { implicit_residual_norm_kernel<<get_stream()>>>( - tau->get_size()[1], as_device_type(rel_residual_goal), - as_device_type(tau->get_const_values()), - as_device_type(orig_tau->get_const_values()), stoppingId, - setFinalized, as_device_type(stop_status->get_data()), - as_device_type(device_storage->get_data())); + tau.size[1], as_device_type(rel_residual_goal), + as_device_type(tau.values), as_device_type(orig_tau.values), + stoppingId, setFinalized, as_device_type(stop_status.get_data()), + as_device_type(device_storage.get_data())); } /* Represents all_converged, one_changed */ - *all_converged = get_element(*device_storage, 0); - *one_changed = get_element(*device_storage, 1); + all_converged = get_element(device_storage, 0); + one_changed = get_element(device_storage, 1); } GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_IMPLICIT_RESIDUAL_NORM_KERNEL); diff --git a/common/unified/base/index_set_kernels.cpp b/common/unified/base/index_set_kernels.cpp index 86aff129f00..91a6e00260b 100644 --- a/common/unified/base/index_set_kernels.cpp +++ b/common/unified/base/index_set_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -22,8 +22,8 @@ namespace idx_set { template void compute_validity(std::shared_ptr exec, - const array* local_indices, - array* validity_array) + const array& local_indices, + array& validity_array) { run_kernel( exec, @@ -31,7 +31,7 @@ void compute_validity(std::shared_ptr exec, validity_array[elem] = local_indices[elem] != invalid_index(); }, - local_indices->get_size(), *local_indices, *validity_array); + local_indices.get_size(), local_indices, validity_array); } GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE( diff --git a/common/unified/base/kernel_launch.hpp b/common/unified/base/kernel_launch.hpp index 0dd2d86e67e..37e20693dbd 100644 --- a/common/unified/base/kernel_launch.hpp +++ b/common/unified/base/kernel_launch.hpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -152,7 +152,7 @@ namespace GKO_DEVICE_NAMESPACE { */ template struct matrix_accessor { - ValueType* data; + ValueType* values; int64 stride; /** @@ -161,7 +161,7 @@ struct matrix_accessor { */ GKO_INLINE GKO_ATTRIBUTES ValueType& operator()(int64 row, int64 col) { - return data[row * stride + col]; + return values[row * stride + col]; } /** @@ -171,7 +171,7 @@ struct matrix_accessor { */ GKO_INLINE GKO_ATTRIBUTES ValueType& operator[](int64 idx) { - return data[idx]; + return values[idx]; } }; @@ -201,32 +201,21 @@ struct to_device_type_impl { }; template -struct to_device_type_impl*&> { +struct to_device_type_impl&> { using type = matrix_accessor>; - static type map_to_device(matrix::Dense* mtx) + static type map_to_device(matrix::view::dense mtx) { return to_device_type_impl< - matrix::Dense* const&>::map_to_device(mtx); + matrix::view::dense const&>::map_to_device(mtx); } }; template -struct to_device_type_impl* const&> { +struct to_device_type_impl const&> { using type = matrix_accessor>; - static type map_to_device(matrix::Dense* mtx) + static type map_to_device(matrix::view::dense mtx) { - return {as_device_type(mtx->get_values()), - static_cast(mtx->get_stride())}; - } -}; - -template -struct to_device_type_impl*&> { - using type = matrix_accessor>; - static type map_to_device(const matrix::Dense* mtx) - { - return {as_device_type(mtx->get_const_values()), - static_cast(mtx->get_stride())}; + return {as_device_type(mtx.values), static_cast(mtx.stride)}; } }; diff --git a/common/unified/base/kernel_launch_solver.hpp b/common/unified/base/kernel_launch_solver.hpp index 14f2cbfeacf..a2aa1eb8385 100644 --- a/common/unified/base/kernel_launch_solver.hpp +++ b/common/unified/base/kernel_launch_solver.hpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -16,12 +16,12 @@ namespace GKO_DEVICE_NAMESPACE { /** * @internal - * Wrapper class used by default_stride(matrix::Dense*) to wrap a - * dense matrix using the default stride. + * Wrapper class used by default_stride(matrix::view::dense) to wrap + * a dense matrix using the default stride. */ template struct default_stride_dense_wrapper { - ValueType* data; + ValueType* values; }; @@ -30,7 +30,7 @@ struct default_stride_dense_wrapper { * Helper that creates a device representation of the input object based on the * default stride that was passed to run_kernel_solver. * @see default_stride_dense_wrapper - * @see default_stride(matrix::Dense*) + * @see default_stride(matrix::view::dense) */ template struct device_unpack_solver_impl { @@ -47,7 +47,7 @@ struct device_unpack_solver_impl> { static GKO_INLINE GKO_ATTRIBUTES type unpack(default_stride_dense_wrapper param, int64 default_stride) { - return {param.data, default_stride}; + return {param.values, default_stride}; } }; @@ -60,20 +60,20 @@ struct device_unpack_solver_impl> { */ template default_stride_dense_wrapper> default_stride( - matrix::Dense* mtx) + matrix::view::dense mtx) { - return {as_device_type(mtx->get_values())}; + return {as_device_type(mtx.values)}; } /** * @internal - * @copydoc default_stride(matrix::Dense*) + * @copydoc default_stride(matrix::view::dense) */ template default_stride_dense_wrapper> default_stride( - const matrix::Dense* mtx) + matrix::view::dense mtx) { - return {as_device_type(mtx->get_const_values())}; + return {as_device_type(mtx.values)}; } @@ -84,21 +84,22 @@ default_stride_dense_wrapper> default_stride( * pointer. */ template -device_type* row_vector(matrix::Dense* mtx) +device_type* row_vector(matrix::view::dense mtx) { - GKO_ASSERT(mtx->get_size()[0] == 1); - return as_device_type(mtx->get_values()); + GKO_ASSERT(mtx.size[0] == 1); + return as_device_type(mtx.values); } /** * @internal - * @copydoc row_vector(matrix::Dense*) + * @copydoc row_vector(matrix::view::dense) */ template -const device_type* row_vector(const matrix::Dense* mtx) +const device_type* row_vector( + matrix::view::dense mtx) { - GKO_ASSERT(mtx->get_size()[0] == 1); - return as_device_type(mtx->get_const_values()); + GKO_ASSERT(mtx.size[0] == 1); + return as_device_type(mtx.values); } diff --git a/common/unified/distributed/partition_kernels.cpp b/common/unified/distributed/partition_kernels.cpp index 8d6f23101ee..ad31ad8c6a4 100644 --- a/common/unified/distributed/partition_kernels.cpp +++ b/common/unified/distributed/partition_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -127,7 +127,7 @@ void has_ordered_parts( std::shared_ptr exec, const experimental::distributed::Partition* partition, - bool* result) + bool& result) { const auto part_ids = partition->get_part_ids(); const auto num_ranges = partition->get_num_ranges(); @@ -144,7 +144,7 @@ void has_ordered_parts( }, [] GKO_KERNEL(const auto a) { return a; }, uint32(1), result_uint32.get_data(), num_ranges - 1, part_ids); - *result = static_cast(get_element(result_uint32, 0)); + result = static_cast(get_element(result_uint32, 0)); } GKO_INSTANTIATE_FOR_EACH_LOCAL_GLOBAL_INDEX_TYPE( diff --git a/common/unified/matrix/coo_kernels.cpp b/common/unified/matrix/coo_kernels.cpp index 5d4de372e70..3610f4c9e17 100644 --- a/common/unified/matrix/coo_kernels.cpp +++ b/common/unified/matrix/coo_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -45,7 +45,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void fill_in_dense(std::shared_ptr exec, const matrix::Coo* orig, - matrix::Dense* result) + matrix::view::dense result) { run_kernel( exec, diff --git a/common/unified/matrix/csr_kernels.cpp b/common/unified/matrix/csr_kernels.cpp index 745ecd61080..1543f8d3724 100644 --- a/common/unified/matrix/csr_kernels.cpp +++ b/common/unified/matrix/csr_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -92,14 +92,13 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void scale(std::shared_ptr exec, - const matrix::Dense* alpha, + matrix::view::dense alpha, matrix::Csr* x) { run_kernel( exec, [] GKO_KERNEL(auto nnz, auto alpha, auto x) { x[nnz] *= alpha[0]; }, - x->get_num_stored_elements(), alpha->get_const_values(), - x->get_values()); + x->get_num_stored_elements(), alpha.values, x->get_values()); } GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_SCALE_KERNEL); @@ -107,14 +106,13 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_SCALE_KERNEL); template void inv_scale(std::shared_ptr exec, - const matrix::Dense* alpha, + matrix::view::dense alpha, matrix::Csr* x) { run_kernel( exec, [] GKO_KERNEL(auto nnz, auto alpha, auto x) { x[nnz] /= alpha[0]; }, - x->get_num_stored_elements(), alpha->get_const_values(), - x->get_values()); + x->get_num_stored_elements(), alpha.values, x->get_values()); } GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_INV_SCALE_KERNEL); diff --git a/common/unified/matrix/dense_kernels.template.cpp b/common/unified/matrix/dense_kernels.template.cpp index 45b2d122da0..8a75b6160e4 100644 --- a/common/unified/matrix/dense_kernels.template.cpp +++ b/common/unified/matrix/dense_kernels.template.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -27,8 +27,8 @@ namespace dense { template void copy(std::shared_ptr exec, - const matrix::Dense* input, - matrix::Dense* output) + matrix::view::dense input, + matrix::view::dense output) { run_kernel( exec, @@ -51,27 +51,27 @@ void copy(std::shared_ptr exec, static_cast>(input(row, col)); } }, - input->get_size(), input, output); + input.size, input, output); } template void fill(std::shared_ptr exec, - matrix::Dense* mat, ValueType value) + matrix::view::dense mat, ValueType value) { run_kernel( exec, [] GKO_KERNEL(auto row, auto col, auto mat, auto value) { mat(row, col) = value; }, - mat->get_size(), mat, value); + mat.size, mat, value); } template void fill_in_matrix_data(std::shared_ptr exec, const device_matrix_data& data, - matrix::Dense* output) + matrix::view::dense output) { run_kernel( exec, @@ -85,15 +85,16 @@ void fill_in_matrix_data(std::shared_ptr exec, template void scale(std::shared_ptr exec, - const matrix::Dense* alpha, matrix::Dense* x) + matrix::view::dense alpha, + matrix::view::dense x) { - if (alpha->get_size()[1] > 1) { + if (alpha.size[1] > 1) { run_kernel( exec, [] GKO_KERNEL(auto row, auto col, auto alpha, auto x) { x(row, col) *= alpha[col]; }, - x->get_size(), alpha->get_const_values(), x); + x.size, alpha.values, x); } else { run_kernel( exec, @@ -104,46 +105,47 @@ void scale(std::shared_ptr exec, x(row, col) *= alpha[0]; } }, - x->get_size(), alpha->get_const_values(), x); + x.size, alpha.values, x); } } template void inv_scale(std::shared_ptr exec, - const matrix::Dense* alpha, - matrix::Dense* x) + matrix::view::dense alpha, + matrix::view::dense x) { - if (alpha->get_size()[1] > 1) { + if (alpha.size[1] > 1) { run_kernel( exec, [] GKO_KERNEL(auto row, auto col, auto alpha, auto x) { x(row, col) /= alpha[col]; }, - x->get_size(), alpha->get_const_values(), x); + x.size, alpha.values, x); } else { run_kernel( exec, [] GKO_KERNEL(auto row, auto col, auto alpha, auto x) { x(row, col) /= alpha[0]; }, - x->get_size(), alpha->get_const_values(), x); + x.size, alpha.values, x); } } template void add_scaled(std::shared_ptr exec, - const matrix::Dense* alpha, - const matrix::Dense* x, matrix::Dense* y) + matrix::view::dense alpha, + matrix::view::dense x, + matrix::view::dense y) { - if (alpha->get_size()[1] > 1) { + if (alpha.size[1] > 1) { run_kernel( exec, [] GKO_KERNEL(auto row, auto col, auto alpha, auto x, auto y) { y(row, col) += alpha[col] * x(row, col); }, - x->get_size(), alpha->get_const_values(), x, y); + x.size, alpha.values, x, y); } else { run_kernel( exec, @@ -152,23 +154,24 @@ void add_scaled(std::shared_ptr exec, y(row, col) += alpha[0] * x(row, col); } }, - x->get_size(), alpha->get_const_values(), x, y); + x.size, alpha.values, x, y); } } template void sub_scaled(std::shared_ptr exec, - const matrix::Dense* alpha, - const matrix::Dense* x, matrix::Dense* y) + matrix::view::dense alpha, + matrix::view::dense x, + matrix::view::dense y) { - if (alpha->get_size()[1] > 1) { + if (alpha.size[1] > 1) { run_kernel( exec, [] GKO_KERNEL(auto row, auto col, auto alpha, auto x, auto y) { y(row, col) -= alpha[col] * x(row, col); }, - x->get_size(), alpha->get_const_values(), x, y); + x.size, alpha.values, x, y); } else { run_kernel( exec, @@ -177,16 +180,16 @@ void sub_scaled(std::shared_ptr exec, y(row, col) -= alpha[0] * x(row, col); } }, - x->get_size(), alpha->get_const_values(), x, y); + x.size, alpha.values, x, y); } } template void add_scaled_diag(std::shared_ptr exec, - const matrix::Dense* alpha, + matrix::view::dense alpha, const matrix::Diagonal* x, - matrix::Dense* y) + matrix::view::dense y) { const auto diag_values = x->get_const_values(); run_kernel( @@ -196,15 +199,15 @@ void add_scaled_diag(std::shared_ptr exec, y(i, i) += alpha[0] * diag[i]; } }, - x->get_size()[0], alpha->get_const_values(), x->get_const_values(), y); + x->get_size()[0], alpha.values, x->get_const_values(), y); } template void sub_scaled_diag(std::shared_ptr exec, - const matrix::Dense* alpha, + matrix::view::dense alpha, const matrix::Diagonal* x, - matrix::Dense* y) + matrix::view::dense y) { const auto diag_values = x->get_const_values(); run_kernel( @@ -214,46 +217,44 @@ void sub_scaled_diag(std::shared_ptr exec, y(i, i) -= alpha[0] * diag[i]; } }, - x->get_size()[0], alpha->get_const_values(), x->get_const_values(), y); + x->get_size()[0], alpha.values, x->get_const_values(), y); } template void compute_dot(std::shared_ptr exec, - const matrix::Dense* x, - const matrix::Dense* y, - matrix::Dense* result, array& tmp) + matrix::view::dense x, + matrix::view::dense y, + matrix::view::dense result, array& tmp) { run_kernel_col_reduction_cached( exec, [] GKO_KERNEL(auto i, auto j, auto x, auto y) { return x(i, j) * y(i, j); }, - GKO_KERNEL_REDUCE_SUM(ValueType), result->get_values(), x->get_size(), - tmp, x, y); + GKO_KERNEL_REDUCE_SUM(ValueType), result.values, x.size, tmp, x, y); } template void compute_conj_dot(std::shared_ptr exec, - const matrix::Dense* x, - const matrix::Dense* y, - matrix::Dense* result, array& tmp) + matrix::view::dense x, + matrix::view::dense y, + matrix::view::dense result, array& tmp) { run_kernel_col_reduction_cached( exec, [] GKO_KERNEL(auto i, auto j, auto x, auto y) { return conj(x(i, j)) * y(i, j); }, - GKO_KERNEL_REDUCE_SUM(ValueType), result->get_values(), x->get_size(), - tmp, x, y); + GKO_KERNEL_REDUCE_SUM(ValueType), result.values, x.size, tmp, x, y); } template void compute_norm2(std::shared_ptr exec, - const matrix::Dense* x, - matrix::Dense>* result, + matrix::view::dense x, + matrix::view::dense> result, array& tmp) { run_kernel_col_reduction_cached( @@ -261,26 +262,26 @@ void compute_norm2(std::shared_ptr exec, [] GKO_KERNEL(auto i, auto j, auto x) { return squared_norm(x(i, j)); }, [] GKO_KERNEL(auto a, auto b) { return a + b; }, [] GKO_KERNEL(auto a) { return sqrt(a); }, remove_complex{}, - result->get_values(), x->get_size(), tmp, x); + result.values, x.size, tmp, x); } template void compute_norm1(std::shared_ptr exec, - const matrix::Dense* x, - matrix::Dense>* result, + matrix::view::dense x, + matrix::view::dense> result, array& tmp) { run_kernel_col_reduction_cached( exec, [] GKO_KERNEL(auto i, auto j, auto x) { return abs(x(i, j)); }, - GKO_KERNEL_REDUCE_SUM(remove_complex), result->get_values(), - x->get_size(), tmp, x); + GKO_KERNEL_REDUCE_SUM(remove_complex), result.values, x.size, + tmp, x); } template void compute_mean(std::shared_ptr exec, - const matrix::Dense* x, - matrix::Dense* result, array& tmp) + matrix::view::dense x, + matrix::view::dense result, array& tmp) { using ValueType_nc = gko::remove_complex; run_kernel_col_reduction_cached( @@ -288,34 +289,33 @@ void compute_mean(std::shared_ptr exec, [] GKO_KERNEL(auto i, auto j, auto x, auto inv_total_size) { return x(i, j) * inv_total_size; }, - GKO_KERNEL_REDUCE_SUM(ValueType), result->get_values(), x->get_size(), - tmp, x, ValueType_nc{1.} / std::max(1, x->get_size()[0])); + GKO_KERNEL_REDUCE_SUM(ValueType), result.values, x.size, tmp, x, + ValueType_nc{1.} / std::max(1, x.size[0])); } template void compute_max_nnz_per_row(std::shared_ptr exec, - const matrix::Dense* source, + matrix::view::dense source, size_type& result) { - array partial{exec, source->get_size()[0] + 1}; + array partial{exec, source.size[0] + 1}; count_nonzeros_per_row(exec, source, partial.get_data()); run_kernel_reduction( exec, [] GKO_KERNEL(auto i, auto partial) { return partial[i]; }, - GKO_KERNEL_REDUCE_MAX(size_type), - partial.get_data() + source->get_size()[0], source->get_size()[0], - partial); - result = get_element(partial, source->get_size()[0]); + GKO_KERNEL_REDUCE_MAX(size_type), partial.get_data() + source.size[0], + source.size[0], partial); + result = get_element(partial, source.size[0]); } template void compute_slice_sets(std::shared_ptr exec, - const matrix::Dense* source, + matrix::view::dense source, size_type slice_size, size_type stride_factor, size_type* slice_sets, size_type* slice_lengths) { - const auto num_rows = source->get_size()[0]; + const auto num_rows = source.size[0]; array row_nnz{exec, num_rows}; count_nonzeros_per_row(exec, source, row_nnz.get_data()); const auto num_slices = @@ -340,7 +340,7 @@ void compute_slice_sets(std::shared_ptr exec, template void count_nonzeros_per_row(std::shared_ptr exec, - const matrix::Dense* mtx, + matrix::view::dense mtx, IndexType* result) { run_kernel_row_reduction( @@ -348,64 +348,64 @@ void count_nonzeros_per_row(std::shared_ptr exec, [] GKO_KERNEL(auto i, auto j, auto mtx) { return is_nonzero(mtx(i, j)) ? 1 : 0; }, - GKO_KERNEL_REDUCE_SUM(IndexType), result, 1, mtx->get_size(), mtx); + GKO_KERNEL_REDUCE_SUM(IndexType), result, 1, mtx.size, mtx); } template -void compute_squared_norm2(std::shared_ptr exec, - const matrix::Dense* x, - matrix::Dense>* result, - array& tmp) +void compute_squared_norm2( + std::shared_ptr exec, + matrix::view::dense x, + matrix::view::dense> result, array& tmp) { run_kernel_col_reduction_cached( exec, [] GKO_KERNEL(auto i, auto j, auto x) { return squared_norm(x(i, j)); }, - GKO_KERNEL_REDUCE_SUM(remove_complex), result->get_values(), - x->get_size(), tmp, x); + GKO_KERNEL_REDUCE_SUM(remove_complex), result.values, x.size, + tmp, x); } template void compute_sqrt(std::shared_ptr exec, - matrix::Dense* x) + matrix::view::dense x) { run_kernel( exec, [] GKO_KERNEL(auto row, auto col, auto x) { x(row, col) = sqrt(x(row, col)); }, - x->get_size(), x); + x.size, x); } template void symm_permute(std::shared_ptr exec, const IndexType* permutation_indices, - const matrix::Dense* orig, - matrix::Dense* permuted) + matrix::view::dense orig, + matrix::view::dense permuted) { run_kernel( exec, [] GKO_KERNEL(auto row, auto col, auto orig, auto perm, auto permuted) { permuted(row, col) = orig(perm[row], perm[col]); }, - orig->get_size(), orig, permutation_indices, permuted); + orig.size, orig, permutation_indices, permuted); } template void inv_symm_permute(std::shared_ptr exec, const IndexType* permutation_indices, - const matrix::Dense* orig, - matrix::Dense* permuted) + matrix::view::dense orig, + matrix::view::dense permuted) { run_kernel( exec, [] GKO_KERNEL(auto row, auto col, auto orig, auto perm, auto permuted) { permuted(perm[row], perm[col]) = orig(row, col); }, - orig->get_size(), orig, permutation_indices, permuted); + orig.size, orig, permutation_indices, permuted); } @@ -413,8 +413,8 @@ template void nonsymm_permute(std::shared_ptr exec, const IndexType* row_permutation_indices, const IndexType* column_permutation_indices, - const matrix::Dense* orig, - matrix::Dense* permuted) + matrix::view::dense orig, + matrix::view::dense permuted) { run_kernel( exec, @@ -422,8 +422,8 @@ void nonsymm_permute(std::shared_ptr exec, auto col_perm, auto permuted) { permuted(row, col) = orig(row_perm[row], col_perm[col]); }, - orig->get_size(), orig, row_permutation_indices, - column_permutation_indices, permuted); + orig.size, orig, row_permutation_indices, column_permutation_indices, + permuted); } @@ -431,8 +431,8 @@ template void inv_nonsymm_permute(std::shared_ptr exec, const IndexType* row_permutation_indices, const IndexType* column_permutation_indices, - const matrix::Dense* orig, - matrix::Dense* permuted) + matrix::view::dense orig, + matrix::view::dense permuted) { run_kernel( exec, @@ -440,15 +440,16 @@ void inv_nonsymm_permute(std::shared_ptr exec, auto col_perm, auto permuted) { permuted(row_perm[row], col_perm[col]) = orig(row, col); }, - orig->get_size(), orig, row_permutation_indices, - column_permutation_indices, permuted); + orig.size, orig, row_permutation_indices, column_permutation_indices, + permuted); } template void row_gather(std::shared_ptr exec, - const IndexType* row_idxs, const matrix::Dense* orig, - matrix::Dense* row_collection) + const IndexType* row_idxs, + matrix::view::dense orig, + matrix::view::dense row_collection) { run_kernel( exec, @@ -469,17 +470,17 @@ void row_gather(std::shared_ptr exec, static_cast>(orig(rows[row], col)); } }, - row_collection->get_size(), orig, row_idxs, row_collection); + row_collection.size, orig, row_idxs, row_collection); } template void advanced_row_gather(std::shared_ptr exec, - const matrix::Dense* alpha, + matrix::view::dense alpha, const IndexType* row_idxs, - const matrix::Dense* orig, - const matrix::Dense* beta, - matrix::Dense* row_collection) + matrix::view::dense orig, + matrix::view::dense beta, + matrix::view::dense row_collection) { run_kernel( exec, @@ -491,61 +492,61 @@ void advanced_row_gather(std::shared_ptr exec, static_cast(beta[0]) * static_cast(gathered(row, col))); }, - row_collection->get_size(), alpha->get_const_values(), orig, row_idxs, - beta->get_const_values(), row_collection); + row_collection.size, alpha.values, orig, row_idxs, beta.values, + row_collection); } template void col_permute(std::shared_ptr exec, const IndexType* permutation_indices, - const matrix::Dense* orig, - matrix::Dense* col_permuted) + matrix::view::dense orig, + matrix::view::dense col_permuted) { run_kernel( exec, [] GKO_KERNEL(auto row, auto col, auto orig, auto perm, auto permuted) { permuted(row, col) = orig(row, perm[col]); }, - orig->get_size(), orig, permutation_indices, col_permuted); + orig.size, orig, permutation_indices, col_permuted); } template void inv_row_permute(std::shared_ptr exec, const IndexType* permutation_indices, - const matrix::Dense* orig, - matrix::Dense* row_permuted) + matrix::view::dense orig, + matrix::view::dense row_permuted) { run_kernel( exec, [] GKO_KERNEL(auto row, auto col, auto orig, auto perm, auto permuted) { permuted(perm[row], col) = orig(row, col); }, - orig->get_size(), orig, permutation_indices, row_permuted); + orig.size, orig, permutation_indices, row_permuted); } template void inv_col_permute(std::shared_ptr exec, const IndexType* permutation_indices, - const matrix::Dense* orig, - matrix::Dense* col_permuted) + matrix::view::dense orig, + matrix::view::dense col_permuted) { run_kernel( exec, [] GKO_KERNEL(auto row, auto col, auto orig, auto perm, auto permuted) { permuted(row, perm[col]) = orig(row, col); }, - orig->get_size(), orig, permutation_indices, col_permuted); + orig.size, orig, permutation_indices, col_permuted); } template void symm_scale_permute(std::shared_ptr exec, const ValueType* scale, const IndexType* perm, - const matrix::Dense* orig, - matrix::Dense* permuted) + matrix::view::dense orig, + matrix::view::dense permuted) { run_kernel( exec, @@ -555,15 +556,15 @@ void symm_scale_permute(std::shared_ptr exec, const auto col = perm[j]; permuted(i, j) = scale[row] * scale[col] * orig(row, col); }, - orig->get_size(), scale, perm, orig, permuted); + orig.size, scale, perm, orig, permuted); } template void inv_symm_scale_permute(std::shared_ptr exec, const ValueType* scale, const IndexType* perm, - const matrix::Dense* orig, - matrix::Dense* permuted) + matrix::view::dense orig, + matrix::view::dense permuted) { run_kernel( exec, @@ -573,7 +574,7 @@ void inv_symm_scale_permute(std::shared_ptr exec, const auto col = perm[j]; permuted(row, col) = orig(i, j) / (scale[row] * scale[col]); }, - orig->get_size(), scale, perm, orig, permuted); + orig.size, scale, perm, orig, permuted); } @@ -583,8 +584,8 @@ void nonsymm_scale_permute(std::shared_ptr exec, const IndexType* row_perm, const ValueType* col_scale, const IndexType* col_perm, - const matrix::Dense* orig, - matrix::Dense* permuted) + matrix::view::dense orig, + matrix::view::dense permuted) { run_kernel( exec, @@ -594,8 +595,7 @@ void nonsymm_scale_permute(std::shared_ptr exec, const auto col = col_perm[j]; permuted(i, j) = row_scale[row] * col_scale[col] * orig(row, col); }, - orig->get_size(), row_scale, row_perm, col_scale, col_perm, orig, - permuted); + orig.size, row_scale, row_perm, col_scale, col_perm, orig, permuted); } @@ -605,8 +605,8 @@ void inv_nonsymm_scale_permute(std::shared_ptr exec, const IndexType* row_perm, const ValueType* col_scale, const IndexType* col_perm, - const matrix::Dense* orig, - matrix::Dense* permuted) + matrix::view::dense orig, + matrix::view::dense permuted) { run_kernel( exec, @@ -616,16 +616,15 @@ void inv_nonsymm_scale_permute(std::shared_ptr exec, const auto col = col_perm[j]; permuted(row, col) = orig(i, j) / (row_scale[row] * col_scale[col]); }, - orig->get_size(), row_scale, row_perm, col_scale, col_perm, orig, - permuted); + orig.size, row_scale, row_perm, col_scale, col_perm, orig, permuted); } template void row_scale_permute(std::shared_ptr exec, const ValueType* scale, const IndexType* perm, - const matrix::Dense* orig, - matrix::Dense* permuted) + matrix::view::dense orig, + matrix::view::dense permuted) { run_kernel( exec, @@ -634,15 +633,15 @@ void row_scale_permute(std::shared_ptr exec, const auto row = perm[i]; permuted(i, j) = scale[row] * orig(row, j); }, - orig->get_size(), scale, perm, orig, permuted); + orig.size, scale, perm, orig, permuted); } template void inv_row_scale_permute(std::shared_ptr exec, const ValueType* scale, const IndexType* perm, - const matrix::Dense* orig, - matrix::Dense* permuted) + matrix::view::dense orig, + matrix::view::dense permuted) { run_kernel( exec, @@ -651,15 +650,15 @@ void inv_row_scale_permute(std::shared_ptr exec, const auto row = perm[i]; permuted(row, j) = orig(i, j) / scale[row]; }, - orig->get_size(), scale, perm, orig, permuted); + orig.size, scale, perm, orig, permuted); } template void col_scale_permute(std::shared_ptr exec, const ValueType* scale, const IndexType* perm, - const matrix::Dense* orig, - matrix::Dense* permuted) + matrix::view::dense orig, + matrix::view::dense permuted) { run_kernel( exec, @@ -668,15 +667,15 @@ void col_scale_permute(std::shared_ptr exec, const auto col = perm[j]; permuted(i, j) = scale[col] * orig(i, col); }, - orig->get_size(), scale, perm, orig, permuted); + orig.size, scale, perm, orig, permuted); } template void inv_col_scale_permute(std::shared_ptr exec, const ValueType* scale, const IndexType* perm, - const matrix::Dense* orig, - matrix::Dense* permuted) + matrix::view::dense orig, + matrix::view::dense permuted) { run_kernel( exec, @@ -685,13 +684,13 @@ void inv_col_scale_permute(std::shared_ptr exec, const auto col = perm[j]; permuted(i, col) = orig(i, j) / scale[col]; }, - orig->get_size(), scale, perm, orig, permuted); + orig.size, scale, perm, orig, permuted); } template void extract_diagonal(std::shared_ptr exec, - const matrix::Dense* orig, + matrix::view::dense orig, matrix::Diagonal* diag) { run_kernel( @@ -703,78 +702,79 @@ void extract_diagonal(std::shared_ptr exec, template void inplace_absolute_dense(std::shared_ptr exec, - matrix::Dense* source) + matrix::view::dense source) { run_kernel( exec, [] GKO_KERNEL(auto row, auto col, auto source) { source(row, col) = abs(source(row, col)); }, - source->get_size(), source); + source.size, source); } template -void outplace_absolute_dense(std::shared_ptr exec, - const matrix::Dense* source, - matrix::Dense>* result) +void outplace_absolute_dense( + std::shared_ptr exec, + matrix::view::dense source, + matrix::view::dense> result) { run_kernel( exec, [] GKO_KERNEL(auto row, auto col, auto source, auto result) { result(row, col) = abs(source(row, col)); }, - source->get_size(), source, result); + source.size, source, result); } template void make_complex(std::shared_ptr exec, - const matrix::Dense* source, - matrix::Dense>* result) + matrix::view::dense source, + matrix::view::dense> result) { run_kernel( exec, [] GKO_KERNEL(auto row, auto col, auto source, auto result) { result(row, col) = source(row, col); }, - source->get_size(), source, result); + source.size, source, result); } template void get_real(std::shared_ptr exec, - const matrix::Dense* source, - matrix::Dense>* result) + matrix::view::dense source, + matrix::view::dense> result) { run_kernel( exec, [] GKO_KERNEL(auto row, auto col, auto source, auto result) { result(row, col) = real(source(row, col)); }, - source->get_size(), source, result); + source.size, source, result); } template void get_imag(std::shared_ptr exec, - const matrix::Dense* source, - matrix::Dense>* result) + matrix::view::dense source, + matrix::view::dense> result) { run_kernel( exec, [] GKO_KERNEL(auto row, auto col, auto source, auto result) { result(row, col) = imag(source(row, col)); }, - source->get_size(), source, result); + source.size, source, result); } template void add_scaled_identity(std::shared_ptr exec, - const matrix::Dense* alpha, - const matrix::Dense* beta, - matrix::Dense* mtx) + matrix::view::dense alpha, + matrix::view::dense beta, + matrix::view::dense mtx) { run_kernel( exec, @@ -784,8 +784,7 @@ void add_scaled_identity(std::shared_ptr exec, mtx(row, row) += alpha[0]; } }, - mtx->get_size(), alpha->get_const_values(), beta->get_const_values(), - mtx); + mtx.size, alpha.values, beta.values, mtx); } diff --git a/common/unified/matrix/diagonal_kernels.cpp b/common/unified/matrix/diagonal_kernels.cpp index dae037a5134..b1ddf760742 100644 --- a/common/unified/matrix/diagonal_kernels.cpp +++ b/common/unified/matrix/diagonal_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -23,8 +23,8 @@ namespace diagonal { template void apply_to_dense(std::shared_ptr exec, const matrix::Diagonal* a, - const matrix::Dense* b, - matrix::Dense* c, bool inverse) + matrix::view::dense b, + matrix::view::dense c, bool inverse) { run_kernel( exec, @@ -33,7 +33,7 @@ void apply_to_dense(std::shared_ptr exec, result(row, col) = inverse ? source(row, col) / diag[row] : source(row, col) * diag[row]; }, - b->get_size(), a->get_const_values(), b, c, inverse); + b.size, a->get_const_values(), b, c, inverse); } GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DIAGONAL_APPLY_TO_DENSE_KERNEL); @@ -42,15 +42,15 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DIAGONAL_APPLY_TO_DENSE_KERNEL); template void right_apply_to_dense(std::shared_ptr exec, const matrix::Diagonal* a, - const matrix::Dense* b, - matrix::Dense* c) + matrix::view::dense b, + matrix::view::dense c) { run_kernel( exec, [] GKO_KERNEL(auto row, auto col, auto diag, auto source, auto result) { result(row, col) = source(row, col) * diag[col]; }, - b->get_size(), a->get_const_values(), b, c); + b.size, a->get_const_values(), b, c); } GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( diff --git a/common/unified/matrix/ell_kernels.cpp b/common/unified/matrix/ell_kernels.cpp index 6d23e08b68b..e94812f9b2c 100644 --- a/common/unified/matrix/ell_kernels.cpp +++ b/common/unified/matrix/ell_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -74,7 +74,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void fill_in_dense(std::shared_ptr exec, const matrix::Ell* source, - matrix::Dense* result) + matrix::view::dense result) { // ELL is stored in column-major, so we swap row and column parameters run_kernel( diff --git a/common/unified/matrix/sellp_kernels.cpp b/common/unified/matrix/sellp_kernels.cpp index 93b71ff43f2..6046b02af3c 100644 --- a/common/unified/matrix/sellp_kernels.cpp +++ b/common/unified/matrix/sellp_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -94,7 +94,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void fill_in_dense(std::shared_ptr exec, const matrix::Sellp* source, - matrix::Dense* result) + matrix::view::dense result) { run_kernel( exec, diff --git a/common/unified/matrix/sparsity_csr_kernels.cpp b/common/unified/matrix/sparsity_csr_kernels.cpp index c5a9c79a89b..834dcaa529c 100644 --- a/common/unified/matrix/sparsity_csr_kernels.cpp +++ b/common/unified/matrix/sparsity_csr_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -24,7 +24,7 @@ namespace sparsity_csr { template void fill_in_dense(std::shared_ptr exec, const matrix::SparsityCsr* input, - matrix::Dense* output) + matrix::view::dense output) { run_kernel( exec, diff --git a/common/unified/preconditioner/jacobi_kernels.cpp b/common/unified/preconditioner/jacobi_kernels.cpp index 6e336274831..d65223ae0a1 100644 --- a/common/unified/preconditioner/jacobi_kernels.cpp +++ b/common/unified/preconditioner/jacobi_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -55,12 +55,12 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_JACOBI_INVERT_DIAGONAL_KERNEL); template void scalar_apply(std::shared_ptr exec, const array& diag, - const matrix::Dense* alpha, - const matrix::Dense* b, - const matrix::Dense* beta, - matrix::Dense* x) + matrix::view::dense alpha, + matrix::view::dense b, + matrix::view::dense beta, + matrix::view::dense x) { - if (alpha->get_size()[1] > 1) { + if (alpha.size[1] > 1) { run_kernel( exec, [] GKO_KERNEL(auto row, auto col, auto diag, auto alpha, auto b, @@ -68,8 +68,7 @@ void scalar_apply(std::shared_ptr exec, x(row, col) = beta[col] * x(row, col) + alpha[col] * b(row, col) * diag[row]; }, - x->get_size(), diag, alpha->get_const_values(), b, - beta->get_const_values(), x); + x.size, diag, alpha.values, b, beta.values, x); } else { run_kernel( exec, @@ -78,8 +77,7 @@ void scalar_apply(std::shared_ptr exec, x(row, col) = beta[0] * x(row, col) + alpha[0] * b(row, col) * diag[row]; }, - x->get_size(), diag, alpha->get_const_values(), b, - beta->get_const_values(), x); + x.size, diag, alpha.values, b, beta.values, x); } } @@ -89,15 +87,15 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_JACOBI_SCALAR_APPLY_KERNEL); template void simple_scalar_apply(std::shared_ptr exec, const array& diag, - const matrix::Dense* b, - matrix::Dense* x) + matrix::view::dense b, + matrix::view::dense x) { run_kernel( exec, [] GKO_KERNEL(auto row, auto col, auto diag, auto b, auto x) { x(row, col) = b(row, col) * diag[row]; }, - x->get_size(), diag, b, x); + x.size, diag, b, x); } GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( @@ -107,7 +105,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( template void scalar_convert_to_dense(std::shared_ptr exec, const array& blocks, - matrix::Dense* result) + matrix::view::dense result) { run_kernel( exec, @@ -117,7 +115,7 @@ void scalar_convert_to_dense(std::shared_ptr exec, result(row, col) = diag[row]; } }, - result->get_size(), blocks, result); + result.size, blocks, result); } GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( diff --git a/common/unified/solver/bicg_kernels.cpp b/common/unified/solver/bicg_kernels.cpp index 7d15718c05d..5c78ebfdbc6 100644 --- a/common/unified/solver/bicg_kernels.cpp +++ b/common/unified/solver/bicg_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -21,16 +21,16 @@ namespace bicg { template -void initialize(std::shared_ptr exec, - const matrix::Dense* b, matrix::Dense* r, - matrix::Dense* z, matrix::Dense* p, - matrix::Dense* q, matrix::Dense* prev_rho, - matrix::Dense* rho, matrix::Dense* r2, - matrix::Dense* z2, matrix::Dense* p2, - matrix::Dense* q2, - array* stop_status) +void initialize( + std::shared_ptr exec, + matrix::view::dense b, matrix::view::dense r, + matrix::view::dense z, matrix::view::dense p, + matrix::view::dense q, matrix::view::dense prev_rho, + matrix::view::dense rho, matrix::view::dense r2, + matrix::view::dense z2, matrix::view::dense p2, + matrix::view::dense q2, array& stop_status) { - if (b->get_size()) { + if (b.size) { run_kernel_solver( exec, [] GKO_KERNEL(auto row, auto col, auto b, auto r, auto z, auto p, @@ -46,11 +46,11 @@ void initialize(std::shared_ptr exec, z(row, col) = p(row, col) = q(row, col) = z2(row, col) = p2(row, col) = q2(row, col) = zero(z(row, col)); }, - b->get_size(), b->get_stride(), default_stride(b), - default_stride(r), default_stride(z), default_stride(p), - default_stride(q), row_vector(prev_rho), row_vector(rho), - default_stride(r2), default_stride(z2), default_stride(p2), - default_stride(q2), *stop_status); + b.size, b.stride, default_stride(b), default_stride(r), + default_stride(z), default_stride(p), default_stride(q), + row_vector(prev_rho), row_vector(rho), default_stride(r2), + default_stride(z2), default_stride(p2), default_stride(q2), + stop_status); } else { run_kernel( exec, @@ -59,8 +59,7 @@ void initialize(std::shared_ptr exec, prev_rho[col] = one(prev_rho[col]); stop[col].reset(); }, - b->get_size()[1], row_vector(prev_rho), row_vector(rho), - *stop_status); + b.size[1], row_vector(prev_rho), row_vector(rho), stop_status); } } @@ -69,11 +68,13 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BICG_INITIALIZE_KERNEL); template void step_1(std::shared_ptr exec, - matrix::Dense* p, const matrix::Dense* z, - matrix::Dense* p2, const matrix::Dense* z2, - const matrix::Dense* rho, - const matrix::Dense* prev_rho, - const array* stop_status) + matrix::view::dense p, + matrix::view::dense z, + matrix::view::dense p2, + matrix::view::dense z2, + matrix::view::dense rho, + matrix::view::dense prev_rho, + const array& stop_status) { run_kernel_solver( exec, @@ -85,9 +86,9 @@ void step_1(std::shared_ptr exec, p2(row, col) = z2(row, col) + tmp * p2(row, col); } }, - p->get_size(), p->get_stride(), default_stride(p), default_stride(z), + p.size, p.stride, default_stride(p), default_stride(z), default_stride(p2), default_stride(z2), row_vector(rho), - row_vector(prev_rho), *stop_status); + row_vector(prev_rho), stop_status); } GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BICG_STEP_1_KERNEL); @@ -95,13 +96,14 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BICG_STEP_1_KERNEL); template void step_2(std::shared_ptr exec, - matrix::Dense* x, matrix::Dense* r, - matrix::Dense* r2, const matrix::Dense* p, - const matrix::Dense* q, - const matrix::Dense* q2, - const matrix::Dense* beta, - const matrix::Dense* rho, - const array* stop_status) + matrix::view::dense x, matrix::view::dense r, + matrix::view::dense r2, + matrix::view::dense p, + matrix::view::dense q, + matrix::view::dense q2, + matrix::view::dense beta, + matrix::view::dense rho, + const array& stop_status) { run_kernel_solver( exec, @@ -114,9 +116,9 @@ void step_2(std::shared_ptr exec, r2(row, col) -= tmp * q2(row, col); } }, - x->get_size(), r->get_stride(), x, default_stride(r), - default_stride(r2), default_stride(p), default_stride(q), - default_stride(q2), row_vector(beta), row_vector(rho), *stop_status); + x.size, r.stride, x, default_stride(r), default_stride(r2), + default_stride(p), default_stride(q), default_stride(q2), + row_vector(beta), row_vector(rho), stop_status); } GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BICG_STEP_2_KERNEL); diff --git a/common/unified/solver/bicgstab_kernels.cpp b/common/unified/solver/bicgstab_kernels.cpp index c403da3bf96..a4a7ace2fa7 100644 --- a/common/unified/solver/bicgstab_kernels.cpp +++ b/common/unified/solver/bicgstab_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -21,18 +21,18 @@ namespace bicgstab { template -void initialize(std::shared_ptr exec, - const matrix::Dense* b, matrix::Dense* r, - matrix::Dense* rr, matrix::Dense* y, - matrix::Dense* s, matrix::Dense* t, - matrix::Dense* z, matrix::Dense* v, - matrix::Dense* p, matrix::Dense* prev_rho, - matrix::Dense* rho, matrix::Dense* alpha, - matrix::Dense* beta, matrix::Dense* gamma, - matrix::Dense* omega, - array* stop_status) +void initialize( + std::shared_ptr exec, + matrix::view::dense b, matrix::view::dense r, + matrix::view::dense rr, matrix::view::dense y, + matrix::view::dense s, matrix::view::dense t, + matrix::view::dense z, matrix::view::dense v, + matrix::view::dense p, matrix::view::dense prev_rho, + matrix::view::dense rho, matrix::view::dense alpha, + matrix::view::dense beta, matrix::view::dense gamma, + matrix::view::dense omega, array& stop_status) { - if (b->get_size()) { + if (b.size) { run_kernel_solver( exec, [] GKO_KERNEL(auto row, auto col, auto b, auto r, auto rr, auto y, @@ -48,12 +48,12 @@ void initialize(std::shared_ptr exec, rr(row, col) = z(row, col) = v(row, col) = s(row, col) = t( row, col) = y(row, col) = p(row, col) = zero(rr(row, col)); }, - b->get_size(), b->get_stride(), default_stride(b), - default_stride(r), default_stride(rr), default_stride(y), - default_stride(s), default_stride(t), default_stride(z), - default_stride(v), default_stride(p), row_vector(prev_rho), - row_vector(rho), row_vector(alpha), row_vector(beta), - row_vector(gamma), row_vector(omega), *stop_status); + b.size, b.stride, default_stride(b), default_stride(r), + default_stride(rr), default_stride(y), default_stride(s), + default_stride(t), default_stride(z), default_stride(v), + default_stride(p), row_vector(prev_rho), row_vector(rho), + row_vector(alpha), row_vector(beta), row_vector(gamma), + row_vector(omega), stop_status); } else { run_kernel( exec, @@ -63,9 +63,9 @@ void initialize(std::shared_ptr exec, omega[col] = one(rho[col]); stop[col].reset(); }, - b->get_size()[1], row_vector(prev_rho), row_vector(rho), - row_vector(alpha), row_vector(beta), row_vector(gamma), - row_vector(omega), *stop_status); + b.size[1], row_vector(prev_rho), row_vector(rho), row_vector(alpha), + row_vector(beta), row_vector(gamma), row_vector(omega), + stop_status); } } @@ -74,13 +74,14 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BICGSTAB_INITIALIZE_KERNEL); template void step_1(std::shared_ptr exec, - const matrix::Dense* r, matrix::Dense* p, - const matrix::Dense* v, - const matrix::Dense* rho, - const matrix::Dense* prev_rho, - const matrix::Dense* alpha, - const matrix::Dense* omega, - const array* stop_status) + matrix::view::dense r, + matrix::view::dense p, + matrix::view::dense v, + matrix::view::dense rho, + matrix::view::dense prev_rho, + matrix::view::dense alpha, + matrix::view::dense omega, + const array& stop_status) { run_kernel_solver( exec, @@ -93,9 +94,9 @@ void step_1(std::shared_ptr exec, tmp * (p(row, col) - omega[col] * v(row, col)); } }, - r->get_size(), r->get_stride(), default_stride(r), default_stride(p), + r.size, r.stride, default_stride(r), default_stride(p), default_stride(v), row_vector(rho), row_vector(prev_rho), - row_vector(alpha), row_vector(omega), *stop_status); + row_vector(alpha), row_vector(omega), stop_status); } GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BICGSTAB_STEP_1_KERNEL); @@ -103,12 +104,13 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BICGSTAB_STEP_1_KERNEL); template void step_2(std::shared_ptr exec, - const matrix::Dense* r, matrix::Dense* s, - const matrix::Dense* v, - const matrix::Dense* rho, - matrix::Dense* alpha, - const matrix::Dense* beta, - const array* stop_status) + matrix::view::dense r, + matrix::view::dense s, + matrix::view::dense v, + matrix::view::dense rho, + matrix::view::dense alpha, + matrix::view::dense beta, + const array& stop_status) { run_kernel_solver( exec, @@ -122,22 +124,26 @@ void step_2(std::shared_ptr exec, s(row, col) = r(row, col) - tmp * v(row, col); } }, - r->get_size(), r->get_stride(), default_stride(r), default_stride(s), + r.size, r.stride, default_stride(r), default_stride(s), default_stride(v), row_vector(rho), row_vector(alpha), row_vector(beta), - *stop_status); + stop_status); } GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BICGSTAB_STEP_2_KERNEL); template -void step_3( - std::shared_ptr exec, matrix::Dense* x, - matrix::Dense* r, const matrix::Dense* s, - const matrix::Dense* t, const matrix::Dense* y, - const matrix::Dense* z, const matrix::Dense* alpha, - const matrix::Dense* beta, const matrix::Dense* gamma, - matrix::Dense* omega, const array* stop_status) +void step_3(std::shared_ptr exec, + matrix::view::dense x, matrix::view::dense r, + matrix::view::dense s, + matrix::view::dense t, + matrix::view::dense y, + matrix::view::dense z, + matrix::view::dense alpha, + matrix::view::dense beta, + matrix::view::dense gamma, + matrix::view::dense omega, + const array& stop_status) { run_kernel_solver( exec, @@ -153,10 +159,10 @@ void step_3( r(row, col) = s(row, col) - tmp * t(row, col); } }, - x->get_size(), r->get_stride(), x, default_stride(r), default_stride(s), + x.size, r.stride, x, default_stride(r), default_stride(s), default_stride(t), default_stride(y), default_stride(z), row_vector(alpha), row_vector(beta), row_vector(gamma), - row_vector(omega), *stop_status); + row_vector(omega), stop_status); } GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BICGSTAB_STEP_3_KERNEL); @@ -164,9 +170,10 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BICGSTAB_STEP_3_KERNEL); template void finalize(std::shared_ptr exec, - matrix::Dense* x, const matrix::Dense* y, - const matrix::Dense* alpha, - array* stop_status) + matrix::view::dense x, + matrix::view::dense y, + matrix::view::dense alpha, + array& stop_status) { run_kernel_solver( exec, @@ -176,8 +183,7 @@ void finalize(std::shared_ptr exec, x(row, col) += alpha[col] * y(row, col); } }, - x->get_size(), y->get_stride(), x, default_stride(y), row_vector(alpha), - *stop_status); + x.size, y.stride, x, default_stride(y), row_vector(alpha), stop_status); run_kernel( exec, [] GKO_KERNEL(auto col, auto stop) { @@ -185,7 +191,7 @@ void finalize(std::shared_ptr exec, stop[col].finalize(); } }, - x->get_size()[1], *stop_status); + x.size[1], stop_status); } GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BICGSTAB_FINALIZE_KERNEL); diff --git a/common/unified/solver/cg_kernels.cpp b/common/unified/solver/cg_kernels.cpp index 822dddf1c3b..119c06ce4d0 100644 --- a/common/unified/solver/cg_kernels.cpp +++ b/common/unified/solver/cg_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -21,14 +21,14 @@ namespace cg { template -void initialize(std::shared_ptr exec, - const matrix::Dense* b, matrix::Dense* r, - matrix::Dense* z, matrix::Dense* p, - matrix::Dense* q, matrix::Dense* prev_rho, - matrix::Dense* rho, - array* stop_status) +void initialize( + std::shared_ptr exec, + matrix::view::dense b, matrix::view::dense r, + matrix::view::dense z, matrix::view::dense p, + matrix::view::dense q, matrix::view::dense prev_rho, + matrix::view::dense rho, array& stop_status) { - if (b->get_size()) { + if (b.size) { run_kernel_solver( exec, [] GKO_KERNEL(auto row, auto col, auto b, auto r, auto z, auto p, @@ -41,9 +41,9 @@ void initialize(std::shared_ptr exec, r(row, col) = b(row, col); z(row, col) = p(row, col) = q(row, col) = zero(z(row, col)); }, - b->get_size(), b->get_stride(), b, default_stride(r), - default_stride(z), default_stride(p), default_stride(q), - row_vector(prev_rho), row_vector(rho), *stop_status); + b.size, b.stride, b, default_stride(r), default_stride(z), + default_stride(p), default_stride(q), row_vector(prev_rho), + row_vector(rho), stop_status); } else { run_kernel( exec, @@ -52,8 +52,7 @@ void initialize(std::shared_ptr exec, prev_rho[col] = one(prev_rho[col]); stop[col].reset(); }, - b->get_size()[1], row_vector(prev_rho), row_vector(rho), - *stop_status); + b.size[1], row_vector(prev_rho), row_vector(rho), stop_status); } } @@ -62,10 +61,11 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_CG_INITIALIZE_KERNEL); template void step_1(std::shared_ptr exec, - matrix::Dense* p, const matrix::Dense* z, - const matrix::Dense* rho, - const matrix::Dense* prev_rho, - const array* stop_status) + matrix::view::dense p, + matrix::view::dense z, + matrix::view::dense rho, + matrix::view::dense prev_rho, + const array& stop_status) { run_kernel_solver( exec, @@ -76,8 +76,8 @@ void step_1(std::shared_ptr exec, p(row, col) = z(row, col) + tmp * p(row, col); } }, - p->get_size(), p->get_stride(), default_stride(p), default_stride(z), - row_vector(rho), row_vector(prev_rho), *stop_status); + p.size, p.stride, default_stride(p), default_stride(z), row_vector(rho), + row_vector(prev_rho), stop_status); } GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_CG_STEP_1_KERNEL); @@ -85,12 +85,12 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_CG_STEP_1_KERNEL); template void step_2(std::shared_ptr exec, - matrix::Dense* x, matrix::Dense* r, - const matrix::Dense* p, - const matrix::Dense* q, - const matrix::Dense* beta, - const matrix::Dense* rho, - const array* stop_status) + matrix::view::dense x, matrix::view::dense r, + matrix::view::dense p, + matrix::view::dense q, + matrix::view::dense beta, + matrix::view::dense rho, + const array& stop_status) { run_kernel_solver( exec, @@ -102,8 +102,8 @@ void step_2(std::shared_ptr exec, r(row, col) -= tmp * q(row, col); } }, - x->get_size(), r->get_stride(), x, default_stride(r), default_stride(p), - default_stride(q), row_vector(beta), row_vector(rho), *stop_status); + x.size, r.stride, x, default_stride(r), default_stride(p), + default_stride(q), row_vector(beta), row_vector(rho), stop_status); } GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_CG_STEP_2_KERNEL); diff --git a/common/unified/solver/cgs_kernels.cpp b/common/unified/solver/cgs_kernels.cpp index 0618b8f8208..a437cd14ead 100644 --- a/common/unified/solver/cgs_kernels.cpp +++ b/common/unified/solver/cgs_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -21,19 +21,18 @@ namespace cgs { template -void initialize(std::shared_ptr exec, - const matrix::Dense* b, matrix::Dense* r, - matrix::Dense* r_tld, matrix::Dense* p, - matrix::Dense* q, matrix::Dense* u, - matrix::Dense* u_hat, - matrix::Dense* v_hat, matrix::Dense* t, - matrix::Dense* alpha, matrix::Dense* beta, - matrix::Dense* gamma, - matrix::Dense* prev_rho, - matrix::Dense* rho, - array* stop_status) +void initialize( + std::shared_ptr exec, + matrix::view::dense b, matrix::view::dense r, + matrix::view::dense r_tld, matrix::view::dense p, + matrix::view::dense q, matrix::view::dense u, + matrix::view::dense u_hat, matrix::view::dense v_hat, + matrix::view::dense t, matrix::view::dense alpha, + matrix::view::dense beta, matrix::view::dense gamma, + matrix::view::dense prev_rho, matrix::view::dense rho, + array& stop_status) { - if (b->get_size()) { + if (b.size) { run_kernel_solver( exec, [] GKO_KERNEL(auto row, auto col, auto b, auto r, auto r_tld, @@ -50,12 +49,12 @@ void initialize(std::shared_ptr exec, u(row, col) = u_hat(row, col) = p(row, col) = q(row, col) = v_hat(row, col) = t(row, col) = zero(u(row, col)); }, - b->get_size(), b->get_stride(), default_stride(b), - default_stride(r), default_stride(r_tld), default_stride(p), - default_stride(q), default_stride(u), default_stride(u_hat), - default_stride(v_hat), default_stride(t), row_vector(alpha), - row_vector(beta), row_vector(gamma), row_vector(prev_rho), - row_vector(rho), *stop_status); + b.size, b.stride, default_stride(b), default_stride(r), + default_stride(r_tld), default_stride(p), default_stride(q), + default_stride(u), default_stride(u_hat), default_stride(v_hat), + default_stride(t), row_vector(alpha), row_vector(beta), + row_vector(gamma), row_vector(prev_rho), row_vector(rho), + stop_status); } else { run_kernel( exec, @@ -66,9 +65,8 @@ void initialize(std::shared_ptr exec, one(prev_rho[col]); stop[col].reset(); }, - b->get_size()[1], row_vector(alpha), row_vector(beta), - row_vector(gamma), row_vector(prev_rho), row_vector(rho), - *stop_status); + b.size[1], row_vector(alpha), row_vector(beta), row_vector(gamma), + row_vector(prev_rho), row_vector(rho), stop_status); } } @@ -77,11 +75,13 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_CGS_INITIALIZE_KERNEL); template void step_1(std::shared_ptr exec, - const matrix::Dense* r, matrix::Dense* u, - matrix::Dense* p, const matrix::Dense* q, - matrix::Dense* beta, const matrix::Dense* rho, - const matrix::Dense* prev_rho, - const array* stop_status) + matrix::view::dense r, + matrix::view::dense u, matrix::view::dense p, + matrix::view::dense q, + matrix::view::dense beta, + matrix::view::dense rho, + matrix::view::dense prev_rho, + const array& stop_status) { run_kernel_solver( exec, @@ -98,9 +98,9 @@ void step_1(std::shared_ptr exec, u(row, col) + tmp * (q(row, col) + tmp * p(row, col)); } }, - r->get_size(), r->get_stride(), default_stride(r), default_stride(u), + r.size, r.stride, default_stride(r), default_stride(u), default_stride(p), default_stride(q), row_vector(beta), row_vector(rho), - row_vector(prev_rho), *stop_status); + row_vector(prev_rho), stop_status); } GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_CGS_STEP_1_KERNEL); @@ -108,12 +108,13 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_CGS_STEP_1_KERNEL); template void step_2(std::shared_ptr exec, - const matrix::Dense* u, - const matrix::Dense* v_hat, matrix::Dense* q, - matrix::Dense* t, matrix::Dense* alpha, - const matrix::Dense* rho, - const matrix::Dense* gamma, - const array* stop_status) + matrix::view::dense u, + matrix::view::dense v_hat, + matrix::view::dense q, matrix::view::dense t, + matrix::view::dense alpha, + matrix::view::dense rho, + matrix::view::dense gamma, + const array& stop_status) { run_kernel_solver( exec, @@ -129,19 +130,20 @@ void step_2(std::shared_ptr exec, t(row, col) = u(row, col) + q(row, col); } }, - u->get_size(), u->get_stride(), default_stride(u), - default_stride(v_hat), default_stride(q), default_stride(t), - row_vector(alpha), row_vector(rho), row_vector(gamma), *stop_status); + u.size, u.stride, default_stride(u), default_stride(v_hat), + default_stride(q), default_stride(t), row_vector(alpha), + row_vector(rho), row_vector(gamma), stop_status); } GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_CGS_STEP_2_KERNEL); template void step_3(std::shared_ptr exec, - const matrix::Dense* t, - const matrix::Dense* u_hat, matrix::Dense* r, - matrix::Dense* x, const matrix::Dense* alpha, - const array* stop_status) + matrix::view::dense t, + matrix::view::dense u_hat, + matrix::view::dense r, matrix::view::dense x, + matrix::view::dense alpha, + const array& stop_status) { run_kernel_solver( exec, @@ -152,9 +154,8 @@ void step_3(std::shared_ptr exec, r(row, col) -= alpha[col] * t(row, col); } }, - t->get_size(), t->get_stride(), default_stride(t), - default_stride(u_hat), default_stride(r), x, row_vector(alpha), - *stop_status); + t.size, t.stride, default_stride(t), default_stride(u_hat), + default_stride(r), x, row_vector(alpha), stop_status); } GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_CGS_STEP_3_KERNEL); diff --git a/common/unified/solver/chebyshev_kernels.cpp b/common/unified/solver/chebyshev_kernels.cpp index d78b1cc888b..39d409194d3 100644 --- a/common/unified/solver/chebyshev_kernels.cpp +++ b/common/unified/solver/chebyshev_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2025 The Ginkgo authors +// SPDX-FileCopyrightText: 2025 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -43,9 +43,9 @@ using if_single_only_type = xstd::type_identity_t; template void init_update(std::shared_ptr exec, const solver::detail::coeff_type alpha, - const matrix::Dense* inner_sol, - matrix::Dense* update_sol, - matrix::Dense* output) + matrix::view::dense inner_sol, + matrix::view::dense update_sol, + matrix::view::dense output) { using coeff_type = if_single_only_type>; @@ -67,7 +67,7 @@ void init_update(std::shared_ptr exec, static_cast(output(row, col)) + alpha * inner_val); }, - output->get_size(), alpha_val, inner_sol, update_sol, output); + output.size, alpha_val, inner_sol, update_sol, output); } GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_CHEBYSHEV_INIT_UPDATE_KERNEL); @@ -77,9 +77,9 @@ template void update(std::shared_ptr exec, const solver::detail::coeff_type alpha, const solver::detail::coeff_type beta, - matrix::Dense* inner_sol, - matrix::Dense* update_sol, - matrix::Dense* output) + matrix::view::dense inner_sol, + matrix::view::dense update_sol, + matrix::view::dense output) { using coeff_type = if_single_only_type>; @@ -102,7 +102,7 @@ void update(std::shared_ptr exec, output(row, col) = static_cast>( static_cast(output(row, col)) + alpha * val); }, - output->get_size(), alpha_val, beta_val, inner_sol, update_sol, output); + output.size, alpha_val, beta_val, inner_sol, update_sol, output); } GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_CHEBYSHEV_UPDATE_KERNEL); diff --git a/common/unified/solver/common_gmres_kernels.cpp b/common/unified/solver/common_gmres_kernels.cpp index 679aebcfaa2..2201f89d4eb 100644 --- a/common/unified/solver/common_gmres_kernels.cpp +++ b/common/unified/solver/common_gmres_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -23,13 +23,13 @@ namespace common_gmres { template void initialize(std::shared_ptr exec, - const matrix::Dense* b, - matrix::Dense* residual, - matrix::Dense* givens_sin, - matrix::Dense* givens_cos, + matrix::view::dense b, + matrix::view::dense residual, + matrix::view::dense givens_sin, + matrix::view::dense givens_cos, stopping_status* stop_status) { - const auto krylov_dim = givens_sin->get_size()[0]; + const auto krylov_dim = givens_sin.size[0]; run_kernel( exec, [] GKO_KERNEL(auto i, auto j, auto b, auto residual, auto givens_sin, @@ -47,9 +47,8 @@ void initialize(std::shared_ptr exec, givens_cos(i, j) = zero(); } }, - dim<2>{std::max(b->get_size()[0], krylov_dim), b->get_size()[1]}, b, - residual, givens_sin, givens_cos, stop_status, krylov_dim, - b->get_size()[0]); + dim<2>{std::max(b.size[0], krylov_dim), b.size[1]}, b, residual, + givens_sin, givens_cos, stop_status, krylov_dim, b.size[0]); } GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_COMMON_GMRES_INITIALIZE_KERNEL); @@ -57,12 +56,12 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_COMMON_GMRES_INITIALIZE_KERNEL); template void hessenberg_qr(std::shared_ptr exec, - matrix::Dense* givens_sin, - matrix::Dense* givens_cos, - matrix::Dense>* residual_norm, - matrix::Dense* residual_norm_collection, - matrix::Dense* hessenberg_iter, size_type iter, - size_type* final_iter_nums, + matrix::view::dense givens_sin, + matrix::view::dense givens_cos, + matrix::view::dense> residual_norm, + matrix::view::dense residual_norm_collection, + matrix::view::dense hessenberg_iter, + size_type iter, size_type* final_iter_nums, const stopping_status* stop_status) { run_kernel( @@ -120,7 +119,7 @@ void hessenberg_qr(std::shared_ptr exec, gc * residual_norm_collection(iter, rhs); residual_norm(0, rhs) = abs(rnc_new); }, - hessenberg_iter->get_size()[1], givens_sin, givens_cos, residual_norm, + hessenberg_iter.size[1], givens_sin, givens_cos, residual_norm, residual_norm_collection, hessenberg_iter, iter, final_iter_nums, stop_status); } @@ -131,9 +130,10 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( template void solve_krylov(std::shared_ptr exec, - const matrix::Dense* residual_norm_collection, - const matrix::Dense* hessenberg, - matrix::Dense* y, const size_type* final_iter_nums, + matrix::view::dense residual_norm_collection, + matrix::view::dense hessenberg, + matrix::view::dense y, + const size_type* final_iter_nums, const stopping_status* stop_status) { run_kernel( @@ -153,9 +153,8 @@ void solve_krylov(std::shared_ptr exec, y(i, col) = value / mtx(i, i * num_cols + col); } }, - residual_norm_collection->get_size()[1], residual_norm_collection, - hessenberg, y, final_iter_nums, stop_status, - residual_norm_collection->get_size()[1]); + residual_norm_collection.size[1], residual_norm_collection, hessenberg, + y, final_iter_nums, stop_status, residual_norm_collection.size[1]); } GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( diff --git a/common/unified/solver/fcg_kernels.cpp b/common/unified/solver/fcg_kernels.cpp index 7853d97c358..fd9f57db3b9 100644 --- a/common/unified/solver/fcg_kernels.cpp +++ b/common/unified/solver/fcg_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -21,15 +21,15 @@ namespace fcg { template -void initialize(std::shared_ptr exec, - const matrix::Dense* b, matrix::Dense* r, - matrix::Dense* z, matrix::Dense* p, - matrix::Dense* q, matrix::Dense* t, - matrix::Dense* prev_rho, - matrix::Dense* rho, matrix::Dense* rho_t, - array* stop_status) +void initialize( + std::shared_ptr exec, + matrix::view::dense b, matrix::view::dense r, + matrix::view::dense z, matrix::view::dense p, + matrix::view::dense q, matrix::view::dense t, + matrix::view::dense prev_rho, matrix::view::dense rho, + matrix::view::dense rho_t, array& stop_status) { - if (b->get_size()) { + if (b.size) { run_kernel_solver( exec, [] GKO_KERNEL(auto row, auto col, auto b, auto r, auto z, auto p, @@ -43,10 +43,10 @@ void initialize(std::shared_ptr exec, t(row, col) = r(row, col) = b(row, col); z(row, col) = p(row, col) = q(row, col) = zero(z(row, col)); }, - b->get_size(), b->get_stride(), default_stride(b), - default_stride(r), default_stride(z), default_stride(p), - default_stride(q), default_stride(t), row_vector(prev_rho), - row_vector(rho), row_vector(rho_t), *stop_status); + b.size, b.stride, default_stride(b), default_stride(r), + default_stride(z), default_stride(p), default_stride(q), + default_stride(t), row_vector(prev_rho), row_vector(rho), + row_vector(rho_t), stop_status); } else { run_kernel( exec, @@ -56,8 +56,8 @@ void initialize(std::shared_ptr exec, prev_rho[col] = rho_t[col] = one(prev_rho[col]); stop[col].reset(); }, - b->get_size()[1], row_vector(prev_rho), row_vector(rho), - row_vector(rho_t), *stop_status); + b.size[1], row_vector(prev_rho), row_vector(rho), row_vector(rho_t), + stop_status); } } @@ -66,10 +66,11 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_FCG_INITIALIZE_KERNEL); template void step_1(std::shared_ptr exec, - matrix::Dense* p, const matrix::Dense* z, - const matrix::Dense* rho_t, - const matrix::Dense* prev_rho, - const array* stop_status) + matrix::view::dense p, + matrix::view::dense z, + matrix::view::dense rho_t, + matrix::view::dense prev_rho, + const array& stop_status) { run_kernel_solver( exec, @@ -80,8 +81,8 @@ void step_1(std::shared_ptr exec, p(row, col) = z(row, col) + tmp * p(row, col); } }, - p->get_size(), p->get_stride(), default_stride(p), default_stride(z), - row_vector(rho_t), row_vector(prev_rho), *stop_status); + p.size, p.stride, default_stride(p), default_stride(z), + row_vector(rho_t), row_vector(prev_rho), stop_status); } GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_FCG_STEP_1_KERNEL); @@ -89,12 +90,13 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_FCG_STEP_1_KERNEL); template void step_2(std::shared_ptr exec, - matrix::Dense* x, matrix::Dense* r, - matrix::Dense* t, const matrix::Dense* p, - const matrix::Dense* q, - const matrix::Dense* beta, - const matrix::Dense* rho, - const array* stop_status) + matrix::view::dense x, matrix::view::dense r, + matrix::view::dense t, + matrix::view::dense p, + matrix::view::dense q, + matrix::view::dense beta, + matrix::view::dense rho, + const array& stop_status) { run_kernel_solver( exec, @@ -108,9 +110,9 @@ void step_2(std::shared_ptr exec, t(row, col) = r(row, col) - prev_r; } }, - x->get_size(), r->get_stride(), x, default_stride(r), default_stride(t), + x.size, r.stride, x, default_stride(r), default_stride(t), default_stride(p), default_stride(q), row_vector(beta), row_vector(rho), - *stop_status); + stop_status); } GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_FCG_STEP_2_KERNEL); diff --git a/common/unified/solver/gcr_kernels.cpp b/common/unified/solver/gcr_kernels.cpp index 7adef77dfb1..92948396c4e 100644 --- a/common/unified/solver/gcr_kernels.cpp +++ b/common/unified/solver/gcr_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -22,11 +22,11 @@ namespace gcr { template void initialize(std::shared_ptr exec, - const matrix::Dense* b, - matrix::Dense* residual, + matrix::view::dense b, + matrix::view::dense residual, stopping_status* stop_status) { - if (b->get_size()) { + if (b.size) { run_kernel( exec, [] GKO_KERNEL(auto row, auto col, auto b, auto residual, @@ -36,11 +36,11 @@ void initialize(std::shared_ptr exec, } residual(row, col) = b(row, col); }, - b->get_size(), b, residual, stop_status); + b.size, b, residual, stop_status); } else { run_kernel( exec, [] GKO_KERNEL(auto col, auto stop) { stop[col].reset(); }, - b->get_size()[1], stop_status); + b.size[1], stop_status); } } @@ -49,12 +49,13 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_GCR_INITIALIZE_KERNEL); template void restart(std::shared_ptr exec, - const matrix::Dense* residual, - const matrix::Dense* A_residual, - matrix::Dense* p_bases, - matrix::Dense* Ap_bases, size_type* final_iter_nums) + matrix::view::dense residual, + matrix::view::dense A_residual, + matrix::view::dense p_bases, + matrix::view::dense Ap_bases, + size_type* final_iter_nums) { - if (residual->get_size()) { + if (residual.size) { run_kernel_solver( exec, [] GKO_KERNEL(auto row, auto col, auto residual, auto A_residual, @@ -65,16 +66,15 @@ void restart(std::shared_ptr exec, p_bases(row, col) = residual(row, col); Ap_bases(row, col) = A_residual(row, col); }, - residual->get_size(), residual->get_stride(), - default_stride(residual), default_stride(A_residual), p_bases, - Ap_bases, final_iter_nums); + residual.size, residual.stride, default_stride(residual), + default_stride(A_residual), p_bases, Ap_bases, final_iter_nums); } else { run_kernel( exec, [] GKO_KERNEL(auto col, auto final_iter_nums) { final_iter_nums[col] = 0; }, - residual->get_size()[1], final_iter_nums); + residual.size[1], final_iter_nums); } } @@ -83,11 +83,12 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_GCR_RESTART_KERNEL); template void step_1(std::shared_ptr exec, - matrix::Dense* x, matrix::Dense* residual, - const matrix::Dense* p, - const matrix::Dense* Ap, - const matrix::Dense>* Ap_norm, - const matrix::Dense* rAp, + matrix::view::dense x, + matrix::view::dense residual, + matrix::view::dense p, + matrix::view::dense Ap, + matrix::view::dense> Ap_norm, + matrix::view::dense rAp, const stopping_status* stop_status) { run_kernel_solver( @@ -100,8 +101,7 @@ void step_1(std::shared_ptr exec, residual(row, col) -= tmp * Ap(row, col); } }, - x->get_size(), p->get_stride(), x, residual, p, Ap, Ap_norm, rAp, - stop_status); + x.size, p.stride, x, residual, p, Ap, Ap_norm, rAp, stop_status); } GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_GCR_STEP_1_KERNEL); diff --git a/common/unified/solver/gmres_kernels.cpp b/common/unified/solver/gmres_kernels.cpp index f24ae445edb..958ac3a7076 100644 --- a/common/unified/solver/gmres_kernels.cpp +++ b/common/unified/solver/gmres_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -24,12 +24,13 @@ namespace gmres { template void restart(std::shared_ptr exec, - const matrix::Dense* residual, - const matrix::Dense>* residual_norm, - matrix::Dense* residual_norm_collection, - matrix::Dense* krylov_bases, size_type* final_iter_nums) + matrix::view::dense residual, + matrix::view::dense> residual_norm, + matrix::view::dense residual_norm_collection, + matrix::view::dense krylov_bases, + size_type* final_iter_nums) { - if (residual->get_size()[0] == 0) { + if (residual.size[0] == 0) { run_kernel( exec, [] GKO_KERNEL(auto j, auto residual_norm, @@ -37,7 +38,7 @@ void restart(std::shared_ptr exec, residual_norm_collection(0, j) = residual_norm(0, j); final_iter_nums[j] = 0; }, - residual->get_size()[1], residual_norm, residual_norm_collection, + residual.size[1], residual_norm, residual_norm_collection, final_iter_nums); } else { run_kernel( @@ -51,8 +52,8 @@ void restart(std::shared_ptr exec, } krylov_bases(i, j) = residual(i, j) / residual_norm(0, j); }, - residual->get_size(), residual, residual_norm, - residual_norm_collection, krylov_bases, final_iter_nums); + residual.size, residual, residual_norm, residual_norm_collection, + krylov_bases, final_iter_nums); } } @@ -61,9 +62,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_GMRES_RESTART_KERNEL); template void multi_axpy(std::shared_ptr exec, - const matrix::Dense* krylov_bases, - const matrix::Dense* y, - matrix::Dense* before_preconditioner, + matrix::view::dense krylov_bases, + matrix::view::dense y, + matrix::view::dense before_preconditioner, const size_type* final_iter_nums, stopping_status* stop_status) { run_kernel( @@ -79,9 +80,8 @@ void multi_axpy(std::shared_ptr exec, } out(row, col) = value; }, - before_preconditioner->get_size(), krylov_bases, y, - before_preconditioner, final_iter_nums, stop_status, - before_preconditioner->get_size()[0]); + before_preconditioner.size, krylov_bases, y, before_preconditioner, + final_iter_nums, stop_status, before_preconditioner.size[0]); run_kernel( exec, [] GKO_KERNEL(auto col, auto stop) { @@ -89,7 +89,7 @@ void multi_axpy(std::shared_ptr exec, stop[col].finalize(); } }, - before_preconditioner->get_size()[1], stop_status); + before_preconditioner.size[1], stop_status); } GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_GMRES_MULTI_AXPY_KERNEL); @@ -97,9 +97,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_GMRES_MULTI_AXPY_KERNEL); template void multi_dot(std::shared_ptr exec, - const matrix::Dense* krylov_bases, - const matrix::Dense* next_krylov, - matrix::Dense* hessenberg_col) + matrix::view::dense krylov_bases, + matrix::view::dense next_krylov, + matrix::view::dense hessenberg_col) { run_kernel_col_reduction( exec, @@ -110,13 +110,11 @@ void multi_dot(std::shared_ptr exec, return conj(bases(ivec * num_rows + row, irhs)) * next_krylov(row, irhs); }, - GKO_KERNEL_REDUCE_SUM(ValueType), hessenberg_col->get_values(), - gko::dim<2>{ - next_krylov->get_size()[0], - hessenberg_col->get_size()[0] * hessenberg_col->get_size()[1] - - next_krylov->get_size()[1]}, - krylov_bases, next_krylov, next_krylov->get_size()[1], - next_krylov->get_size()[0]); + GKO_KERNEL_REDUCE_SUM(ValueType), hessenberg_col.values, + gko::dim<2>{next_krylov.size[0], + hessenberg_col.size[0] * hessenberg_col.size[1] - + next_krylov.size[1]}, + krylov_bases, next_krylov, next_krylov.size[1], next_krylov.size[0]); } GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_GMRES_MULTI_DOT_KERNEL); diff --git a/common/unified/solver/ir_kernels.cpp b/common/unified/solver/ir_kernels.cpp index 96f0731f039..7b64a0edd5f 100644 --- a/common/unified/solver/ir_kernels.cpp +++ b/common/unified/solver/ir_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -19,11 +19,11 @@ namespace ir { void initialize(std::shared_ptr exec, - array* stop_status) + array& stop_status) { run_kernel( exec, [] GKO_KERNEL(auto i, auto stop) { stop[i].reset(); }, - stop_status->get_size(), *stop_status); + stop_status.get_size(), stop_status); } diff --git a/common/unified/solver/minres_kernels.cpp b/common/unified/solver/minres_kernels.cpp index 277c9233955..e3b6212b947 100644 --- a/common/unified/solver/minres_kernels.cpp +++ b/common/unified/solver/minres_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -36,15 +36,15 @@ GKO_INLINE GKO_ATTRIBUTES void swap(T& a, T& b) template void initialize( std::shared_ptr exec, - const matrix::Dense* r, matrix::Dense* z, - matrix::Dense* p, matrix::Dense* p_prev, - matrix::Dense* q, matrix::Dense* q_prev, - matrix::Dense* v, matrix::Dense* beta, - matrix::Dense* gamma, matrix::Dense* delta, - matrix::Dense* cos_prev, matrix::Dense* cos, - matrix::Dense* sin_prev, matrix::Dense* sin, - matrix::Dense* eta_next, matrix::Dense* eta, - array* stop_status) + matrix::view::dense r, matrix::view::dense z, + matrix::view::dense p, matrix::view::dense p_prev, + matrix::view::dense q, matrix::view::dense q_prev, + matrix::view::dense v, matrix::view::dense beta, + matrix::view::dense gamma, matrix::view::dense delta, + matrix::view::dense cos_prev, matrix::view::dense cos, + matrix::view::dense sin_prev, matrix::view::dense sin, + matrix::view::dense eta_next, matrix::view::dense eta, + array& stop_status) { run_kernel( exec, @@ -57,10 +57,9 @@ void initialize( eta_next[col] = eta[col] = beta[col] = sqrt(beta[col]); stop[col].reset(); }, - beta->get_num_stored_elements(), row_vector(beta), row_vector(gamma), - row_vector(delta), row_vector(cos_prev), row_vector(cos), - row_vector(sin_prev), row_vector(sin), row_vector(eta_next), - row_vector(eta), *stop_status); + beta.size[1], row_vector(beta), row_vector(gamma), row_vector(delta), + row_vector(cos_prev), row_vector(cos), row_vector(sin_prev), + row_vector(sin), row_vector(eta_next), row_vector(eta), stop_status); run_kernel_solver( exec, @@ -71,10 +70,10 @@ void initialize( p(row, col) = p_prev(row, col) = q_prev(row, col) = v(row, col) = zero(p(row, col)); }, - r->get_size(), r->get_stride(), default_stride(r), default_stride(z), + r.size, r.stride, default_stride(r), default_stride(z), default_stride(p), default_stride(p_prev), default_stride(q), default_stride(q_prev), default_stride(v), row_vector(beta), - *stop_status); + stop_status); } GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_MINRES_INITIALIZE_KERNEL); @@ -100,14 +99,15 @@ GKO_KERNEL void update_givens_rotation(ValueType& alpha, const ValueType& beta, template -void step_1(std::shared_ptr exec, - matrix::Dense* alpha, matrix::Dense* beta, - matrix::Dense* gamma, matrix::Dense* delta, - matrix::Dense* cos_prev, matrix::Dense* cos, - matrix::Dense* sin_prev, matrix::Dense* sin, - matrix::Dense* eta, matrix::Dense* eta_next, - matrix::Dense* tau, - const array* stop_status) +void step_1( + std::shared_ptr exec, + matrix::view::dense alpha, matrix::view::dense beta, + matrix::view::dense gamma, matrix::view::dense delta, + matrix::view::dense cos_prev, matrix::view::dense cos, + matrix::view::dense sin_prev, matrix::view::dense sin, + matrix::view::dense eta, matrix::view::dense eta_next, + matrix::view::dense tau, + const array& stop_status) { run_kernel( exec, @@ -134,10 +134,10 @@ void step_1(std::shared_ptr exec, eta_next[col] = -conj(sin[col]) * eta[col]; } }, - alpha->get_num_stored_elements(), row_vector(alpha), row_vector(beta), - row_vector(gamma), row_vector(delta), row_vector(cos_prev), - row_vector(cos), row_vector(sin_prev), row_vector(sin), - row_vector(eta_next), row_vector(eta), row_vector(tau), *stop_status); + alpha.size[1], row_vector(alpha), row_vector(beta), row_vector(gamma), + row_vector(delta), row_vector(cos_prev), row_vector(cos), + row_vector(sin_prev), row_vector(sin), row_vector(eta_next), + row_vector(eta), row_vector(tau), stop_status); } GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_MINRES_STEP_1_KERNEL); @@ -145,17 +145,20 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_MINRES_STEP_1_KERNEL); template void step_2(std::shared_ptr exec, - matrix::Dense* x, matrix::Dense* p, - const matrix::Dense* p_prev, matrix::Dense* z, - const matrix::Dense* z_tilde, - matrix::Dense* q, matrix::Dense* q_prev, - matrix::Dense* v, const matrix::Dense* alpha, - const matrix::Dense* beta, - const matrix::Dense* gamma, - const matrix::Dense* delta, - const matrix::Dense* cos, - const matrix::Dense* eta, - const array* stop_status) + matrix::view::dense x, matrix::view::dense p, + matrix::view::dense p_prev, + matrix::view::dense z, + matrix::view::dense z_tilde, + matrix::view::dense q, + matrix::view::dense q_prev, + matrix::view::dense v, + matrix::view::dense alpha, + matrix::view::dense beta, + matrix::view::dense gamma, + matrix::view::dense delta, + matrix::view::dense cos, + matrix::view::dense eta, + const array& stop_status) { run_kernel_solver( exec, @@ -177,11 +180,11 @@ void step_2(std::shared_ptr exec, v(row, col) = tmp * beta[col]; } }, - x->get_size(), p->get_stride(), x, default_stride(p), - default_stride(p_prev), default_stride(q), default_stride(q_prev), - default_stride(v), default_stride(z), default_stride(z_tilde), - row_vector(alpha), row_vector(beta), row_vector(gamma), - row_vector(delta), row_vector(cos), row_vector(eta), *stop_status); + x.size, p.stride, x, default_stride(p), default_stride(p_prev), + default_stride(q), default_stride(q_prev), default_stride(v), + default_stride(z), default_stride(z_tilde), row_vector(alpha), + row_vector(beta), row_vector(gamma), row_vector(delta), row_vector(cos), + row_vector(eta), stop_status); } GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_MINRES_STEP_2_KERNEL); diff --git a/common/unified/solver/pipe_cg_kernels.cpp b/common/unified/solver/pipe_cg_kernels.cpp index 8e3582818be..e4e7480e69a 100644 --- a/common/unified/solver/pipe_cg_kernels.cpp +++ b/common/unified/solver/pipe_cg_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2025 The Ginkgo authors +// SPDX-FileCopyrightText: 2025 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -22,12 +22,12 @@ namespace pipe_cg { template void initialize_1(std::shared_ptr exec, - const matrix::Dense* b, - matrix::Dense* r, - matrix::Dense* prev_rho, - array* stop_status) + matrix::view::dense b, + matrix::view::dense r, + matrix::view::dense prev_rho, + array& stop_status) { - if (b->get_size()) { + if (b.size) { run_kernel_solver( exec, [] GKO_KERNEL(auto row, auto col, auto b, auto r, auto prev_rho, @@ -38,8 +38,7 @@ void initialize_1(std::shared_ptr exec, } r(row, col) = b(row, col); }, - b->get_size(), b->get_stride(), b, r, row_vector(prev_rho), - *stop_status); + b.size, b.stride, b, r, row_vector(prev_rho), stop_status); } else { run_kernel( exec, @@ -47,7 +46,7 @@ void initialize_1(std::shared_ptr exec, prev_rho[col] = one(prev_rho[col]); stop[col].reset(); }, - b->get_size()[1], row_vector(prev_rho), *stop_status); + b.size[1], row_vector(prev_rho), stop_status); } } @@ -55,21 +54,23 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_PIPE_CG_INITIALIZE_1_KERNEL); template void initialize_2(std::shared_ptr exec, - matrix::Dense* p, matrix::Dense* q, - matrix::Dense* f, matrix::Dense* g, - matrix::Dense* beta, - const matrix::Dense* z, - const matrix::Dense* w, - const matrix::Dense* m, - const matrix::Dense* n, - const matrix::Dense* delta) + matrix::view::dense p, + matrix::view::dense q, + matrix::view::dense f, + matrix::view::dense g, + matrix::view::dense beta, + matrix::view::dense z, + matrix::view::dense w, + matrix::view::dense m, + matrix::view::dense n, + matrix::view::dense delta) { // beta = delta // p = z // q = w // f = m // g = n - if (p->get_size()) { + if (p.size) { run_kernel_solver( exec, [] GKO_KERNEL(auto row, auto col, auto p, auto q, auto f, auto g, @@ -83,17 +84,16 @@ void initialize_2(std::shared_ptr exec, f(row, col) = m(row, col); g(row, col) = n(row, col); }, - p->get_size(), p->get_stride(), default_stride(p), - default_stride(q), default_stride(f), default_stride(g), - row_vector(beta), z, w, default_stride(m), default_stride(n), - row_vector(delta)); + p.size, p.stride, default_stride(p), default_stride(q), + default_stride(f), default_stride(g), row_vector(beta), z, w, + default_stride(m), default_stride(n), row_vector(delta)); } else { run_kernel( exec, [] GKO_KERNEL(auto col, auto beta, auto delta) { beta[col] = delta[col]; }, - p->get_size()[1], row_vector(beta), row_vector(delta)); + p.size[1], row_vector(beta), row_vector(delta)); } } @@ -102,15 +102,16 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_PIPE_CG_INITIALIZE_2_KERNEL); template void step_1(std::shared_ptr exec, - matrix::Dense* x, matrix::Dense* r, - matrix::Dense* z1, matrix::Dense* z2, - matrix::Dense* w, const matrix::Dense* p, - const matrix::Dense* q, - const matrix::Dense* f, - const matrix::Dense* g, - const matrix::Dense* rho, - const matrix::Dense* beta, - const array* stop_status) + matrix::view::dense x, matrix::view::dense r, + matrix::view::dense z1, + matrix::view::dense z2, matrix::view::dense w, + matrix::view::dense p, + matrix::view::dense q, + matrix::view::dense f, + matrix::view::dense g, + matrix::view::dense rho, + matrix::view::dense beta, + const array& stop_status) { // tmp = rho / beta // x = x + tmp * p @@ -131,9 +132,9 @@ void step_1(std::shared_ptr exec, w(row, col) -= tmp * g(row, col); } }, - x->get_size(), x->get_stride(), default_stride(x), r, z1, z2, w, - default_stride(p), default_stride(q), default_stride(f), - default_stride(g), row_vector(rho), row_vector(beta), *stop_status); + x.size, x.stride, default_stride(x), r, z1, z2, w, default_stride(p), + default_stride(q), default_stride(f), default_stride(g), + row_vector(rho), row_vector(beta), stop_status); } GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_PIPE_CG_STEP_1_KERNEL); @@ -141,16 +142,17 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_PIPE_CG_STEP_1_KERNEL); template void step_2(std::shared_ptr exec, - matrix::Dense* beta, matrix::Dense* p, - matrix::Dense* q, matrix::Dense* f, - matrix::Dense* g, const matrix::Dense* z, - const matrix::Dense* w, - const matrix::Dense* m, - const matrix::Dense* n, - const matrix::Dense* prev_rho, - const matrix::Dense* rho, - const matrix::Dense* delta, - const array* stop_status) + matrix::view::dense beta, + matrix::view::dense p, matrix::view::dense q, + matrix::view::dense f, matrix::view::dense g, + matrix::view::dense z, + matrix::view::dense w, + matrix::view::dense m, + matrix::view::dense n, + matrix::view::dense prev_rho, + matrix::view::dense rho, + matrix::view::dense delta, + const array& stop_status) { // tmp = rho / prev_rho // beta = delta - |tmp|^2 * beta @@ -178,10 +180,10 @@ void step_2(std::shared_ptr exec, g(row, col) = n(row, col) + tmp * g(row, col); } }, - p->get_size(), p->get_stride(), row_vector(beta), default_stride(p), + p.size, p.stride, row_vector(beta), default_stride(p), default_stride(q), default_stride(f), default_stride(g), z, w, default_stride(m), default_stride(n), row_vector(prev_rho), - row_vector(rho), row_vector(delta), *stop_status); + row_vector(rho), row_vector(delta), stop_status); } GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_PIPE_CG_STEP_2_KERNEL); diff --git a/core/base/array.cpp b/core/base/array.cpp index 2dd5a839290..72afe4aadd2 100644 --- a/core/base/array.cpp +++ b/core/base/array.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -88,7 +88,8 @@ ValueType reduce_add(const array& input_arr, } -#define GKO_DECLARE_ARRAY_FILL(_type) void array<_type>::fill(const _type value) +#define GKO_DECLARE_ARRAY_FILL(ValueType) \ + void array::fill(const ValueType value) GKO_INSTANTIATE_FOR_EACH_TEMPLATE_TYPE(GKO_DECLARE_ARRAY_FILL); template GKO_DECLARE_ARRAY_FILL(bool); @@ -100,14 +101,14 @@ template GKO_DECLARE_ARRAY_FILL(uint64); #endif -#define GKO_DECLARE_ARRAY_REDUCE_ADD(_type) \ - void reduce_add(const array<_type>& arr, array<_type>& value) +#define GKO_DECLARE_ARRAY_REDUCE_ADD(ValueType) \ + void reduce_add(const array& arr, array& value) GKO_INSTANTIATE_FOR_EACH_TEMPLATE_TYPE(GKO_DECLARE_ARRAY_REDUCE_ADD); -#define GKO_DECLARE_ARRAY_REDUCE_ADD2(_type) \ - _type reduce_add(const array<_type>& arr, const _type val) +#define GKO_DECLARE_ARRAY_REDUCE_ADD2(ValueType) \ + ValueType reduce_add(const array& arr, const ValueType val) GKO_INSTANTIATE_FOR_EACH_TEMPLATE_TYPE(GKO_DECLARE_ARRAY_REDUCE_ADD2); diff --git a/core/base/combination.cpp b/core/base/combination.cpp index 3b30b77d38c..3b846a2676f 100644 --- a/core/base/combination.cpp +++ b/core/base/combination.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -167,7 +167,7 @@ void Combination::apply_impl(const LinOp* alpha, const LinOp* b, } -#define GKO_DECLARE_COMBINATION(_type) class Combination<_type> +#define GKO_DECLARE_COMBINATION(ValueType) class Combination GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_COMBINATION); diff --git a/core/base/composition.cpp b/core/base/composition.cpp index 82c8152300b..acf19ebac67 100644 --- a/core/base/composition.cpp +++ b/core/base/composition.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -221,7 +221,7 @@ void Composition::apply_impl(const LinOp* alpha, const LinOp* b, } -#define GKO_DECLARE_COMPOSITION(_type) class Composition<_type> +#define GKO_DECLARE_COMPOSITION(ValueType) class Composition GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_COMPOSITION); diff --git a/core/base/dense_cache.cpp b/core/base/dense_cache.cpp index a3329cadd70..770a9037333 100644 --- a/core/base/dense_cache.cpp +++ b/core/base/dense_cache.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -153,16 +153,18 @@ std::shared_ptr> ScalarCache::get() const } -#define GKO_DECLARE_DENSE_CACHE(_type) struct DenseCache<_type> +#define GKO_DECLARE_DENSE_CACHE(ValueType) struct DenseCache GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_CACHE); -#define GKO_DECLARE_GENERIC_DENSE_CACHE_GET(_type) \ - std::shared_ptr> GenericDenseCache::get<_type>( \ - std::shared_ptr, dim<2>) const +#define GKO_DECLARE_GENERIC_DENSE_CACHE_GET(ValueType) \ + std::shared_ptr> \ + GenericDenseCache::get(std::shared_ptr, dim<2>) \ + const GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_GENERIC_DENSE_CACHE_GET); -#define GKO_DECLARE_SCALAR_CACHE_GET(_type) \ - std::shared_ptr> ScalarCache::get<_type>() const +#define GKO_DECLARE_SCALAR_CACHE_GET(ValueType) \ + std::shared_ptr> \ + ScalarCache::get() const GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_SCALAR_CACHE_GET); diff --git a/core/base/index_set.cpp b/core/base/index_set.cpp index 715916aa37a..8af62c86bfb 100644 --- a/core/base/index_set.cpp +++ b/core/base/index_set.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -36,8 +36,8 @@ void index_set::populate_subsets( auto exec = this->get_executor(); this->num_stored_indices_ = indices.get_size(); exec->run(idx_set::make_populate_subsets( - this->index_space_size_, &indices, &this->subsets_begin_, - &this->subsets_end_, &this->superset_cumulative_indices_, is_sorted)); + this->index_space_size_, indices, this->subsets_begin_, + this->subsets_end_, this->superset_cumulative_indices_, is_sorted)); } @@ -127,7 +127,7 @@ array index_set::map_global_to_local( } -#define GKO_DECLARE_INDEX_SET(_type) class index_set<_type> +#define GKO_DECLARE_INDEX_SET(IndexType) class index_set GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_INDEX_SET); diff --git a/core/base/index_set_kernels.hpp b/core/base/index_set_kernels.hpp index 7e742ea062f..03dfcf6fac2 100644 --- a/core/base/index_set_kernels.hpp +++ b/core/base/index_set_kernels.hpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -19,8 +19,8 @@ namespace kernels { #define GKO_DECLARE_INDEX_SET_COMPUTE_VALIDITY_KERNEL(IndexType) \ void compute_validity(std::shared_ptr exec, \ - const array* local_indices, \ - array* validity_array) + const array& local_indices, \ + array& validity_array) #define GKO_DECLARE_INDEX_SET_TO_GLOBAL_INDICES_KERNEL(IndexType) \ void to_global_indices( \ @@ -32,9 +32,9 @@ namespace kernels { #define GKO_DECLARE_INDEX_SET_POPULATE_KERNEL(IndexType) \ void populate_subsets( \ std::shared_ptr exec, \ - const IndexType index_space_size, const array* indices, \ - array* subset_begin, array* subset_end, \ - array* superset_indices, const bool is_sorted) + const IndexType index_space_size, const array& indices, \ + array& subset_begin, array& subset_end, \ + array& superset_indices, const bool is_sorted) #define GKO_DECLARE_INDEX_SET_GLOBAL_TO_LOCAL_KERNEL(IndexType) \ void global_to_local( \ diff --git a/core/base/mtx_io.cpp b/core/base/mtx_io.cpp index 0ebdcdf9321..5125ba3dfb1 100644 --- a/core/base/mtx_io.cpp +++ b/core/base/mtx_io.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -893,11 +893,12 @@ matrix_data read_binary_raw(std::istream& is) std::memcpy(&num_rows, &header[8], 8); std::memcpy(&num_cols, &header[16], 8); std::memcpy(&num_entries, &header[24], 8); -#define DECLARE_OVERLOAD(_vtype, _itype) \ - else if (magic == binary_format_magic<_vtype, _itype>()) \ - { \ - return read_binary_convert<_vtype, _itype, ValueType, IndexType>( \ - is, num_rows, num_cols, num_entries); \ +#define DECLARE_OVERLOAD(FileValueType, FileIndexType) \ + else if (magic == binary_format_magic()) \ + { \ + return read_binary_convert(is, num_rows, num_cols, \ + num_entries); \ } if (false) { } diff --git a/core/base/perturbation.cpp b/core/base/perturbation.cpp index 87501361c05..6f22d519ec5 100644 --- a/core/base/perturbation.cpp +++ b/core/base/perturbation.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -181,7 +181,7 @@ void Perturbation::apply_impl(const LinOp* alpha, const LinOp* b, } -#define GKO_DECLARE_PERTURBATION(_type) class Perturbation<_type> +#define GKO_DECLARE_PERTURBATION(ValueType) class Perturbation GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_PERTURBATION); diff --git a/core/base/segmented_array.cpp b/core/base/segmented_array.cpp index d113139f8e2..b56055470a5 100644 --- a/core/base/segmented_array.cpp +++ b/core/base/segmented_array.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -178,7 +178,7 @@ segmented_array& segmented_array::operator=(segmented_array&& other) } -#define GKO_DECLARE_SEGMENTED_ARRAY(_type) struct segmented_array<_type> +#define GKO_DECLARE_SEGMENTED_ARRAY(T) struct segmented_array GKO_INSTANTIATE_FOR_EACH_POD_TYPE(GKO_DECLARE_SEGMENTED_ARRAY); diff --git a/core/distributed/partition.cpp b/core/distributed/partition.cpp index fb6e5e12a54..b4939d51ec1 100644 --- a/core/distributed/partition.cpp +++ b/core/distributed/partition.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -159,7 +159,7 @@ bool Partition::has_ordered_parts() const if (this->has_connected_parts()) { auto exec = this->get_executor(); bool has_ordered_parts; - exec->run(partition::make_has_ordered_parts(this, &has_ordered_parts)); + exec->run(partition::make_has_ordered_parts(this, has_ordered_parts)); return has_ordered_parts; } else { return false; diff --git a/core/distributed/partition_helpers_kernels.hpp b/core/distributed/partition_helpers_kernels.hpp index 0262c3007ee..7afd9d2b1cc 100644 --- a/core/distributed/partition_helpers_kernels.hpp +++ b/core/distributed/partition_helpers_kernels.hpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -15,23 +15,23 @@ namespace gko { namespace kernels { -#define GKO_DECLARE_PARTITION_HELPERS_SORT_BY_RANGE_START(_type) \ - void sort_by_range_start( \ - std::shared_ptr exec, \ - array<_type>& range_start_ends, \ +#define GKO_DECLARE_PARTITION_HELPERS_SORT_BY_RANGE_START(IndexType) \ + void sort_by_range_start( \ + std::shared_ptr exec, \ + array& range_start_ends, \ array& part_ids) -#define GKO_DECLARE_PARTITION_HELPERS_CHECK_CONSECUTIVE_RANGES(_type) \ +#define GKO_DECLARE_PARTITION_HELPERS_CHECK_CONSECUTIVE_RANGES(IndexType) \ void check_consecutive_ranges(std::shared_ptr exec, \ - const array<_type>& range_start_ends, \ + const array& range_start_ends, \ bool& result) -#define GKO_DECLARE_PARTITION_HELPERS_COMPRESS_RANGES(_type) \ +#define GKO_DECLARE_PARTITION_HELPERS_COMPRESS_RANGES(IndexType) \ void compress_ranges(std::shared_ptr exec, \ - const array<_type>& range_start_ends, \ - array<_type>& range_offsets) + const array& range_start_ends, \ + array& range_offsets) #define GKO_DECLARE_ALL_AS_TEMPLATES \ diff --git a/core/distributed/partition_kernels.hpp b/core/distributed/partition_kernels.hpp index 6573bed6903..74774bc4e03 100644 --- a/core/distributed/partition_kernels.hpp +++ b/core/distributed/partition_kernels.hpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -58,7 +58,7 @@ namespace kernels { void has_ordered_parts(std::shared_ptr exec, \ const experimental::distributed::Partition< \ LocalIndexType, GlobalIndexType>* partition, \ - bool* result) + bool& result) #define GKO_DECLARE_ALL_AS_TEMPLATES \ diff --git a/core/distributed/row_gatherer.cpp b/core/distributed/row_gatherer.cpp index f74bc47e0bf..dc7012b151f 100644 --- a/core/distributed/row_gatherer.cpp +++ b/core/distributed/row_gatherer.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -214,21 +214,21 @@ mpi::request apply_finalize(const RowGatherer* rg, } -#define GKO_DECLARE_TEST_APPLY_PREPARE(_itype) \ +#define GKO_DECLARE_TEST_APPLY_PREPARE(IndexType) \ std::shared_ptr apply_prepare( \ - const RowGatherer<_itype>*, ptr_param) + const RowGatherer*, ptr_param) -#define GKO_DECLARE_TEST_APPLY_PREPARE_WORKSPACE(_itype) \ +#define GKO_DECLARE_TEST_APPLY_PREPARE_WORKSPACE(IndexType) \ std::shared_ptr apply_prepare( \ - const RowGatherer<_itype>*, ptr_param, array&) + const RowGatherer*, ptr_param, array&) -#define GKO_DECLARE_TEST_APPLY_FINALIZE(_itype) \ - mpi::request apply_finalize(const RowGatherer<_itype>* rg, \ +#define GKO_DECLARE_TEST_APPLY_FINALIZE(IndexType) \ + mpi::request apply_finalize(const RowGatherer* rg, \ ptr_param b, ptr_param x, \ std::shared_ptr ev) -#define GKO_DECLARE_TEST_APPLY_FINALIZE_WORKSPACE(_itype) \ - mpi::request apply_finalize(const RowGatherer<_itype>* rg, \ +#define GKO_DECLARE_TEST_APPLY_FINALIZE_WORKSPACE(IndexType) \ + mpi::request apply_finalize(const RowGatherer* rg, \ ptr_param b, ptr_param x, \ std::shared_ptr ev, \ array&) @@ -420,7 +420,7 @@ RowGatherer::RowGatherer(const RowGatherer& o) } -#define GKO_DECLARE_ROW_GATHERER(_itype) class RowGatherer<_itype> +#define GKO_DECLARE_ROW_GATHERER(IndexType) class RowGatherer GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_ROW_GATHERER); diff --git a/core/distributed/vector.cpp b/core/distributed/vector.cpp index c9370d2f213..6f33168432c 100644 --- a/core/distributed/vector.cpp +++ b/core/distributed/vector.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -204,7 +204,8 @@ void Vector::read_distributed_impl( auto rank = this->get_communicator().rank(); local_.fill(zero()); exec->run(vector::make_build_local( - data, make_temporary_clone(exec, partition).get(), rank, &local_)); + data, make_temporary_clone(exec, partition).get(), rank, + local_.get_device_view())); } @@ -345,8 +346,9 @@ Vector::compute_absolute() const absolute_type::create(exec, this->get_communicator(), this->get_size(), this->get_local_vector()->get_size()); - exec->run(vector::make_outplace_absolute_dense(this->get_local_vector(), - &result->local_)); + exec->run(vector::make_outplace_absolute_dense( + this->get_local_vector()->get_const_device_view(), + result->local_.get_device_view())); return result; } @@ -551,7 +553,7 @@ void Vector::compute_norm2(ptr_param result, const auto comm = this->get_communicator(); auto dense_res = make_temporary_clone(exec, as(result)); this->compute_squared_norm2(dense_res.get(), tmp); - exec->run(vector::make_compute_sqrt(dense_res.get())); + exec->run(vector::make_compute_sqrt(dense_res->get_device_view())); } @@ -607,8 +609,9 @@ void Vector::compute_squared_norm2(ptr_param result, auto exec = this->get_executor(); const auto comm = this->get_communicator(); auto dense_res = make_temporary_clone(exec, as(result)); - exec->run(vector::make_compute_squared_norm2(this->get_local_vector(), - dense_res.get(), tmp)); + exec->run(vector::make_compute_squared_norm2( + this->get_local_vector()->get_const_device_view(), + dense_res->get_device_view(), tmp)); exec->synchronize(); auto norm_sum_op = gko::experimental::mpi::sum>(); if (mpi::requires_host_buffer(exec, comm)) { diff --git a/core/distributed/vector_cache.cpp b/core/distributed/vector_cache.cpp index dff739455df..830ff3f9122 100644 --- a/core/distributed/vector_cache.cpp +++ b/core/distributed/vector_cache.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -99,13 +99,13 @@ const array& GenericVectorCacheAccessor::get_workspace( } -#define GKO_DECLARE_VECTOR_CACHE(_type) class VectorCache<_type> +#define GKO_DECLARE_VECTOR_CACHE(ValueType) class VectorCache GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_VECTOR_CACHE); class GenericVectorCache; -#define GKO_DECLARE_GENERIC_VECTOR_CACHE_GET(_type) \ - std::shared_ptr> GenericVectorCache::get( \ +#define GKO_DECLARE_GENERIC_VECTOR_CACHE_GET(ValueType) \ + std::shared_ptr> GenericVectorCache::get( \ std::shared_ptr exec, \ gko::experimental::mpi::communicator comm, dim<2> global_size, \ dim<2> local_size) const diff --git a/core/distributed/vector_kernels.hpp b/core/distributed/vector_kernels.hpp index c288b8918a1..6909403f6c4 100644 --- a/core/distributed/vector_kernels.hpp +++ b/core/distributed/vector_kernels.hpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -27,7 +27,7 @@ namespace kernels { const device_matrix_data& input, \ const experimental::distributed::Partition< \ LocalIndexType, GlobalIndexType>* partition, \ - comm_index_type local_part, matrix::Dense* local_mtx) + comm_index_type local_part, matrix::view::dense local_mtx) #define GKO_DECLARE_ALL_AS_TEMPLATES \ diff --git a/core/log/batch_logger.cpp b/core/log/batch_logger.cpp index f274019016f..a6ae1969e91 100644 --- a/core/log/batch_logger.cpp +++ b/core/log/batch_logger.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -63,7 +63,7 @@ log_data::log_data(std::shared_ptr exec, } } -#define GKO_DECLARE_LOG_DATA(_type) struct log_data<_type> +#define GKO_DECLARE_LOG_DATA(ValueType) struct log_data GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_TYPE(GKO_DECLARE_LOG_DATA); @@ -91,7 +91,8 @@ void BatchConvergence::on_batch_solver_completed( } -#define GKO_DECLARE_BATCH_CONVERGENCE(_type) class BatchConvergence<_type> +#define GKO_DECLARE_BATCH_CONVERGENCE(ValueType) \ + class BatchConvergence GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_CONVERGENCE); diff --git a/core/log/convergence.cpp b/core/log/convergence.cpp index 7cfa764dfd1..2c8151f4b7c 100644 --- a/core/log/convergence.cpp +++ b/core/log/convergence.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -109,7 +109,7 @@ void Convergence::on_iteration_complete( } -#define GKO_DECLARE_CONVERGENCE(_type) class Convergence<_type> +#define GKO_DECLARE_CONVERGENCE(ValueType) class Convergence GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_CONVERGENCE); diff --git a/core/log/papi.cpp b/core/log/papi.cpp index 5ced377ca38..abb437da036 100644 --- a/core/log/papi.cpp +++ b/core/log/papi.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -278,7 +278,7 @@ void Papi::on_iteration_complete( } -#define GKO_DECLARE_PAPI(_type) class Papi<_type> +#define GKO_DECLARE_PAPI(ValueType) class Papi GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_PAPI); diff --git a/core/log/stream.cpp b/core/log/stream.cpp index 5e510d409e2..2f0fe9404c0 100644 --- a/core/log/stream.cpp +++ b/core/log/stream.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -481,7 +481,7 @@ void Stream::on_iteration_complete( } -#define GKO_DECLARE_STREAM(_type) class Stream<_type> +#define GKO_DECLARE_STREAM(ValueType) class Stream GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_STREAM); diff --git a/core/matrix/coo.cpp b/core/matrix/coo.cpp index 746b5406415..b22fc8e0a61 100644 --- a/core/matrix/coo.cpp +++ b/core/matrix/coo.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -171,7 +171,9 @@ void Coo::apply_impl(const LinOp* b, LinOp* x) const { precision_dispatch_real_complex( [this](auto dense_b, auto dense_x) { - this->get_executor()->run(coo::make_spmv(this, dense_b, dense_x)); + this->get_executor()->run( + coo::make_spmv(this, dense_b->get_const_device_view(), + dense_x->get_device_view())); }, b, x); } @@ -183,8 +185,11 @@ void Coo::apply_impl(const LinOp* alpha, const LinOp* b, { precision_dispatch_real_complex( [this](auto dense_alpha, auto dense_b, auto dense_beta, auto dense_x) { - this->get_executor()->run(coo::make_advanced_spmv( - dense_alpha, this, dense_b, dense_beta, dense_x)); + this->get_executor()->run( + coo::make_advanced_spmv(dense_alpha->get_const_device_view(), + this, dense_b->get_const_device_view(), + dense_beta->get_const_device_view(), + dense_x->get_device_view())); }, alpha, b, beta, x); } @@ -195,7 +200,9 @@ void Coo::apply2_impl(const LinOp* b, LinOp* x) const { precision_dispatch_real_complex( [this](auto dense_b, auto dense_x) { - this->get_executor()->run(coo::make_spmv2(this, dense_b, dense_x)); + this->get_executor()->run( + coo::make_spmv2(this, dense_b->get_const_device_view(), + dense_x->get_device_view())); }, b, x); } @@ -207,8 +214,9 @@ void Coo::apply2_impl(const LinOp* alpha, const LinOp* b, { precision_dispatch_real_complex( [this](auto dense_alpha, auto dense_b, auto dense_x) { - this->get_executor()->run( - coo::make_advanced_spmv2(dense_alpha, this, dense_b, dense_x)); + this->get_executor()->run(coo::make_advanced_spmv2( + dense_alpha->get_const_device_view(), this, + dense_b->get_const_device_view(), dense_x->get_device_view())); }, alpha, b, x); } @@ -315,7 +323,7 @@ void Coo::convert_to(Dense* result) const auto tmp_result = make_temporary_output_clone(exec, result); tmp_result->resize(this->get_size()); tmp_result->fill(zero()); - exec->run(coo::make_fill_in_dense(this, tmp_result.get())); + exec->run(coo::make_fill_in_dense(this, tmp_result->get_device_view())); } diff --git a/core/matrix/coo_kernels.hpp b/core/matrix/coo_kernels.hpp index d68e838e833..d6c74ce7e26 100644 --- a/core/matrix/coo_kernels.hpp +++ b/core/matrix/coo_kernels.hpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -22,32 +22,34 @@ namespace kernels { #define GKO_DECLARE_COO_SPMV_KERNEL(ValueType, IndexType) \ void spmv(std::shared_ptr exec, \ const matrix::Coo* a, \ - const matrix::Dense* b, matrix::Dense* c) + matrix::view::dense b, \ + matrix::view::dense c) #define GKO_DECLARE_COO_ADVANCED_SPMV_KERNEL(ValueType, IndexType) \ void advanced_spmv(std::shared_ptr exec, \ - const matrix::Dense* alpha, \ + matrix::view::dense alpha, \ const matrix::Coo* a, \ - const matrix::Dense* b, \ - const matrix::Dense* beta, \ - matrix::Dense* c) + matrix::view::dense b, \ + matrix::view::dense beta, \ + matrix::view::dense c) #define GKO_DECLARE_COO_SPMV2_KERNEL(ValueType, IndexType) \ void spmv2(std::shared_ptr exec, \ const matrix::Coo* a, \ - const matrix::Dense* b, matrix::Dense* c) + matrix::view::dense b, \ + matrix::view::dense c) #define GKO_DECLARE_COO_ADVANCED_SPMV2_KERNEL(ValueType, IndexType) \ void advanced_spmv2(std::shared_ptr exec, \ - const matrix::Dense* alpha, \ + matrix::view::dense alpha, \ const matrix::Coo* a, \ - const matrix::Dense* b, \ - matrix::Dense* c) + matrix::view::dense b, \ + matrix::view::dense c) #define GKO_DECLARE_COO_FILL_IN_DENSE_KERNEL(ValueType, IndexType) \ void fill_in_dense(std::shared_ptr exec, \ const matrix::Coo* source, \ - matrix::Dense* result) + matrix::view::dense result) #define GKO_DECLARE_COO_EXTRACT_DIAGONAL_KERNEL(ValueType, IndexType) \ void extract_diagonal(std::shared_ptr exec, \ diff --git a/core/matrix/csr.cpp b/core/matrix/csr.cpp index 495311fe835..b94b685e7c9 100644 --- a/core/matrix/csr.cpp +++ b/core/matrix/csr.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -266,7 +266,8 @@ void Csr::apply_impl(const LinOp* b, LinOp* x) const mixed_precision_dispatch_real_complex( [this](auto dense_b, auto dense_x) { this->get_executor()->run( - csr::make_spmv(this, dense_b, dense_x)); + csr::make_spmv(this, dense_b->get_const_device_view(), + dense_x->get_device_view())); }, b, x); } @@ -285,15 +286,17 @@ void Csr::apply_impl(const LinOp* alpha, const LinOp* b, auto x_csr = as(x); auto x_copy = x_csr->clone(); this->get_executor()->run(csr::make_advanced_spgemm( - as>(alpha), this, b_csr, - as>(beta), x_copy.get(), x_csr)); + as>(alpha)->get_const_device_view(), this, b_csr, + as>(beta)->get_const_device_view(), x_copy.get(), + x_csr)); } else if (dynamic_cast*>(b)) { // if b is an identity matrix, we compute an SpGEAM auto x_csr = as(x); auto x_copy = x_csr->clone(); - this->get_executor()->run( - csr::make_spgeam(as>(alpha), this, - as>(beta), x_copy.get(), x_csr)); + this->get_executor()->run(csr::make_spgeam( + as>(alpha)->get_const_device_view(), this, + as>(beta)->get_const_device_view(), x_copy.get(), + x_csr)); } else { mixed_precision_dispatch_real_complex( [this, alpha, beta](auto dense_b, auto dense_x) { @@ -301,9 +304,11 @@ void Csr::apply_impl(const LinOp* alpha, const LinOp* b, auto dense_beta = make_temporary_conversion< typename std::decay_t::value_type>( beta); - this->get_executor()->run( - csr::make_advanced_spmv(dense_alpha.get(), this, dense_b, - dense_beta.get(), dense_x)); + this->get_executor()->run(csr::make_advanced_spmv( + dense_alpha->get_const_device_view(), this, + dense_b->get_const_device_view(), + dense_beta->get_const_device_view(), + dense_x->get_device_view())); }, b, x); } @@ -402,7 +407,7 @@ void Csr::convert_to(Dense* result) const auto tmp_result = make_temporary_output_clone(exec, result); tmp_result->resize(this->get_size()); tmp_result->fill(zero()); - exec->run(csr::make_fill_in_dense(this, tmp_result.get())); + exec->run(csr::make_fill_in_dense(this, tmp_result->get_device_view())); } @@ -769,8 +774,9 @@ Csr::multiply_add( auto local_mtx_add = make_temporary_clone(exec, mtx_add); auto result = Csr::create(exec, result_size); exec->run(csr::make_advanced_spgemm( - local_scale_mult.get(), this, local_mtx_mult.get(), - local_scale_add.get(), local_mtx_add.get(), result.get())); + local_scale_mult->get_const_device_view(), this, local_mtx_mult.get(), + local_scale_add->get_const_device_view(), local_mtx_add.get(), + result.get())); return result; } @@ -845,8 +851,9 @@ void Csr::multiply_add_reuse_info::update_values( auto local_alpha = make_temporary_clone(exec, alpha); auto local_beta = make_temporary_clone(exec, beta); exec->run(csr::make_advanced_spgemm_reuse( - local_alpha.get(), local_mtx1.get(), local_mtx2.get(), local_beta.get(), - local_mtx3.get(), internal->data, local_out.get())); + local_alpha->get_const_device_view(), local_mtx1.get(), + local_mtx2.get(), local_beta->get_const_device_view(), local_mtx3.get(), + internal->data, local_out.get())); } @@ -871,8 +878,9 @@ Csr::multiply_add_reuse( auto local_mtx_add = make_temporary_clone(exec, mtx_add); auto result = Csr::create(exec, result_size); exec->run(csr::make_advanced_spgemm( - local_scale_mult.get(), this, local_mtx_mult.get(), - local_scale_add.get(), local_mtx_add.get(), result.get())); + local_scale_mult->get_const_device_view(), this, local_mtx_mult.get(), + local_scale_add->get_const_device_view(), local_mtx_add.get(), + result.get())); auto lookup = csr::build_lookup(result.get()); auto reuse_info = multiply_add_reuse_info{ std::make_unique( @@ -900,9 +908,9 @@ std::unique_ptr> Csr::scale_add( auto local_scale_other = make_temporary_clone(exec, scale_other); auto local_mtx_other = make_temporary_clone(exec, mtx_other); auto result = Csr::create(exec, this->get_size()); - exec->run(csr::make_spgeam(local_scale_this.get(), this, - local_scale_other.get(), local_mtx_other.get(), - result.get())); + exec->run(csr::make_spgeam(local_scale_this->get_const_device_view(), this, + local_scale_other->get_const_device_view(), + local_mtx_other.get(), result.get())); return result; } @@ -960,9 +968,10 @@ void Csr::scale_add_reuse_info::update_values( auto local_mtx1 = make_temporary_clone(exec, mtx1); auto local_mtx2 = make_temporary_clone(exec, mtx2); auto local_mtx_out = make_temporary_clone(exec, out); - exec->run(csr::make_spgeam_numeric(local_scale1.get(), local_mtx1.get(), - local_scale2.get(), local_mtx2.get(), - local_mtx_out.get())); + exec->run(csr::make_spgeam_numeric(local_scale1->get_const_device_view(), + local_mtx1.get(), + local_scale2->get_const_device_view(), + local_mtx2.get(), local_mtx_out.get())); } @@ -993,9 +1002,9 @@ Csr::add_scale_reuse( auto local_scale_other = make_temporary_clone(exec, scale_other); auto local_mtx_other = make_temporary_clone(exec, mtx_other); auto result = Csr::create(exec, this->get_size()); - exec->run(csr::make_spgeam(local_scale_this.get(), this, - local_scale_other.get(), local_mtx_other.get(), - result.get())); + exec->run(csr::make_spgeam(local_scale_this->get_const_device_view(), this, + local_scale_other->get_const_device_view(), + local_mtx_other.get(), result.get())); return std::make_pair( std::move(result), scale_add_reuse_info{ @@ -1411,7 +1420,7 @@ bool Csr::is_sorted_by_column_index() const { auto exec = this->get_executor(); bool is_sorted; - exec->run(csr::make_is_sorted_by_column_index(this, &is_sorted)); + exec->run(csr::make_is_sorted_by_column_index(this, is_sorted)); return is_sorted; } @@ -1426,7 +1435,7 @@ Csr::create_submatrix(const gko::span& row_span, auto sub_mat_size = gko::dim<2>(row_span.length(), column_span.length()); array row_ptrs(exec, row_span.length() + 1); exec->run(csr::make_calculate_nonzeros_per_row_in_span( - this, row_span, column_span, &row_ptrs)); + this, row_span, column_span, row_ptrs)); exec->run(csr::make_prefix_sum_nonnegative(row_ptrs.get_data(), row_span.length() + 1)); auto num_nnz = get_element(row_ptrs, sub_mat_size[0]); @@ -1558,8 +1567,9 @@ template void Csr::scale_impl(const LinOp* alpha) { auto exec = this->get_executor(); - exec->run(csr::make_scale(make_temporary_conversion(alpha).get(), - this)); + exec->run(csr::make_scale( + make_temporary_conversion(alpha)->get_const_device_view(), + this)); } @@ -1568,7 +1578,8 @@ void Csr::inv_scale_impl(const LinOp* alpha) { auto exec = this->get_executor(); exec->run(csr::make_inv_scale( - make_temporary_conversion(alpha).get(), this)); + make_temporary_conversion(alpha)->get_const_device_view(), + this)); } @@ -1584,8 +1595,9 @@ void Csr::add_scaled_identity_impl(const LinOp* a, "The matrix has one or more structurally zero diagonal entries!"); } this->get_executor()->run(csr::make_add_scaled_identity( - make_temporary_conversion(a).get(), - make_temporary_conversion(b).get(), this)); + make_temporary_conversion(a)->get_const_device_view(), + make_temporary_conversion(b)->get_const_device_view(), + this)); } diff --git a/core/matrix/csr_accessor_helper.hpp b/core/matrix/csr_accessor_helper.hpp index 2187439920e..c3c28a4f705 100644 --- a/core/matrix/csr_accessor_helper.hpp +++ b/core/matrix/csr_accessor_helper.hpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -20,61 +20,62 @@ namespace helper { template -auto build_rrm_accessor(matrix::Dense* input) +auto build_rrm_accessor(matrix::view::dense input) { using accessor = gko::acc::reduced_row_major<2, ArthType, ValueType>; return range( std::array{ - {static_cast(input->get_size()[0]), - static_cast(input->get_size()[1])}}, - input->get_values(), + {static_cast(input.size[0]), + static_cast(input.size[1])}}, + input.values, std::array{ - {static_cast(input->get_stride())}}); + {static_cast(input.stride)}}); } template -auto build_rrm_accessor(matrix::Dense* input, index_span column_span) +auto build_rrm_accessor(matrix::view::dense input, + index_span column_span) { using accessor = gko::acc::reduced_row_major<2, ArthType, ValueType>; assert(column_span.is_valid()); return range( std::array{ - {static_cast(input->get_size()[0]), + {static_cast(input.size[0]), static_cast(column_span.end - column_span.begin)}}, - input->get_values() + column_span.begin, + input.values + column_span.begin, std::array{ - {static_cast(input->get_stride())}}); + {static_cast(input.stride)}}); } // use a different name for const to allow the non-const to create const // accessor template -auto build_const_rrm_accessor(const matrix::Dense* input) +auto build_const_rrm_accessor(matrix::view::dense input) { using accessor = gko::acc::reduced_row_major<2, ArthType, const ValueType>; return range( std::array{ - {static_cast(input->get_size()[0]), - static_cast(input->get_size()[1])}}, - input->get_const_values(), + {static_cast(input.size[0]), + static_cast(input.size[1])}}, + input.values, std::array{ - {static_cast(input->get_stride())}}); + {static_cast(input.stride)}}); } template -auto build_const_rrm_accessor(const matrix::Dense* input, +auto build_const_rrm_accessor(matrix::view::dense input, index_span column_span) { using accessor = gko::acc::reduced_row_major<2, ArthType, const ValueType>; assert(column_span.is_valid()); return range( std::array{ - {static_cast(input->get_size()[0]), + {static_cast(input.size[0]), static_cast(column_span.end - column_span.begin)}}, - input->get_const_values() + column_span.begin, + input.values + column_span.begin, std::array{ - {static_cast(input->get_stride())}}); + {static_cast(input.stride)}}); } diff --git a/core/matrix/csr_kernels.hpp b/core/matrix/csr_kernels.hpp index 6d5ebc75bc5..7e2ae73d637 100644 --- a/core/matrix/csr_kernels.hpp +++ b/core/matrix/csr_kernels.hpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -30,17 +30,17 @@ namespace kernels { OutputValueType, IndexType) \ void spmv(std::shared_ptr exec, \ const matrix::Csr* a, \ - const matrix::Dense* b, \ - matrix::Dense* c) + matrix::view::dense b, \ + matrix::view::dense c) #define GKO_DECLARE_CSR_ADVANCED_SPMV_KERNEL(MatrixValueType, InputValueType, \ OutputValueType, IndexType) \ void advanced_spmv(std::shared_ptr exec, \ - const matrix::Dense* alpha, \ + matrix::view::dense alpha, \ const matrix::Csr* a, \ - const matrix::Dense* b, \ - const matrix::Dense* beta, \ - matrix::Dense* c) + matrix::view::dense b, \ + matrix::view::dense beta, \ + matrix::view::dense c) #define GKO_DECLARE_CSR_SPGEMM_KERNEL(ValueType, IndexType) \ void spgemm(std::shared_ptr exec, \ @@ -50,10 +50,10 @@ namespace kernels { #define GKO_DECLARE_CSR_ADVANCED_SPGEMM_KERNEL(ValueType, IndexType) \ void advanced_spgemm(std::shared_ptr exec, \ - const matrix::Dense* alpha, \ + matrix::view::dense alpha, \ const matrix::Csr* a, \ const matrix::Csr* b, \ - const matrix::Dense* beta, \ + matrix::view::dense beta, \ const matrix::Csr* d, \ matrix::Csr* c) @@ -67,34 +67,34 @@ namespace kernels { #define GKO_DECLARE_CSR_ADVANCED_SPGEMM_REUSE_KERNEL(ValueType, IndexType) \ void advanced_spgemm_reuse( \ std::shared_ptr exec, \ - const matrix::Dense* alpha, \ + matrix::view::dense alpha, \ const matrix::Csr* a, \ const matrix::Csr* b, \ - const matrix::Dense* beta, \ + matrix::view::dense beta, \ const matrix::Csr* d, \ const matrix::csr::lookup_data& c_lookup, \ matrix::Csr* c) #define GKO_DECLARE_CSR_SPGEAM_KERNEL(ValueType, IndexType) \ void spgeam(std::shared_ptr exec, \ - const matrix::Dense* alpha, \ + matrix::view::dense alpha, \ const matrix::Csr* a, \ - const matrix::Dense* beta, \ + matrix::view::dense beta, \ const matrix::Csr* b, \ matrix::Csr* c) #define GKO_DECLARE_CSR_SPGEAM_NUMERIC_KERNEL(ValueType, IndexType) \ void spgeam_numeric(std::shared_ptr exec, \ - const matrix::Dense* alpha, \ + matrix::view::dense alpha, \ const matrix::Csr* a, \ - const matrix::Dense* beta, \ + matrix::view::dense beta, \ const matrix::Csr* b, \ matrix::Csr* c) #define GKO_DECLARE_CSR_FILL_IN_DENSE_KERNEL(ValueType, IndexType) \ void fill_in_dense(std::shared_ptr exec, \ const matrix::Csr* source, \ - matrix::Dense* result) + matrix::view::dense result) #define GKO_DECLARE_CSR_CONVERT_TO_ELL_KERNEL(ValueType, IndexType) \ void convert_to_ell(std::shared_ptr exec, \ @@ -201,7 +201,7 @@ namespace kernels { void calculate_nonzeros_per_row_in_span( \ std::shared_ptr exec, \ const matrix::Csr* source, const span& row_span, \ - const span& col_span, array* row_nnz) + const span& col_span, array& row_nnz) #define GKO_DECLARE_CSR_CALC_NNZ_PER_ROW_IN_INDEX_SET_KERNEL(ValueType, \ IndexType) \ @@ -233,7 +233,7 @@ namespace kernels { #define GKO_DECLARE_CSR_IS_SORTED_BY_COLUMN_INDEX(ValueType, IndexType) \ void is_sorted_by_column_index( \ std::shared_ptr exec, \ - const matrix::Csr* to_check, bool* is_sorted) + const matrix::Csr* to_check, bool& is_sorted) #define GKO_DECLARE_CSR_EXTRACT_DIAGONAL(ValueType, IndexType) \ void extract_diagonal(std::shared_ptr exec, \ @@ -242,12 +242,12 @@ namespace kernels { #define GKO_DECLARE_CSR_SCALE_KERNEL(ValueType, IndexType) \ void scale(std::shared_ptr exec, \ - const matrix::Dense* alpha, \ + matrix::view::dense alpha, \ matrix::Csr* to_scale) #define GKO_DECLARE_CSR_INV_SCALE_KERNEL(ValueType, IndexType) \ void inv_scale(std::shared_ptr exec, \ - const matrix::Dense* alpha, \ + matrix::view::dense alpha, \ matrix::Csr* to_scale) #define GKO_DECLARE_CSR_CHECK_DIAGONAL_ENTRIES_EXIST(ValueType, IndexType) \ @@ -257,8 +257,8 @@ namespace kernels { #define GKO_DECLARE_CSR_ADD_SCALED_IDENTITY_KERNEL(ValueType, IndexType) \ void add_scaled_identity(std::shared_ptr exec, \ - const matrix::Dense* alpha, \ - const matrix::Dense* beta, \ + matrix::view::dense alpha, \ + matrix::view::dense beta, \ matrix::Csr* mtx) #define GKO_DECLARE_CSR_BUILD_LOOKUP_OFFSETS_KERNEL(IndexType) \ diff --git a/core/matrix/dense.cpp b/core/matrix/dense.cpp index aa9a26ee8b3..12721ebc7a9 100644 --- a/core/matrix/dense.cpp +++ b/core/matrix/dense.cpp @@ -112,8 +112,9 @@ void Dense::apply_impl(const LinOp* b, LinOp* x) const { precision_dispatch_real_complex( [this](auto dense_b, auto dense_x) { - this->get_executor()->run( - dense::make_simple_apply(this, dense_b, dense_x)); + this->get_executor()->run(dense::make_simple_apply( + this->get_const_device_view(), dense_b->get_const_device_view(), + dense_x->get_device_view())); }, b, x); } @@ -126,7 +127,10 @@ void Dense::apply_impl(const LinOp* alpha, const LinOp* b, precision_dispatch_real_complex( [this](auto dense_alpha, auto dense_b, auto dense_beta, auto dense_x) { this->get_executor()->run(dense::make_apply( - dense_alpha, this, dense_b, dense_beta, dense_x)); + dense_alpha->get_const_device_view(), + this->get_const_device_view(), dense_b->get_const_device_view(), + dense_beta->get_const_device_view(), + dense_x->get_device_view())); }, alpha, b, beta, x); } @@ -135,7 +139,7 @@ void Dense::apply_impl(const LinOp* alpha, const LinOp* b, template void Dense::fill(const ValueType value) { - this->get_executor()->run(dense::make_fill(this, value)); + this->get_executor()->run(dense::make_fill(this->get_device_view(), value)); } @@ -235,14 +239,17 @@ void Dense::inv_scale_impl(const LinOp* alpha) is_complex()) { // use the real-complex kernel exec->run(dense::make_inv_scale( - make_temporary_conversion>(alpha).get(), - dynamic_cast(this))); + make_temporary_conversion>(alpha) + ->get_const_device_view(), + dynamic_cast(this)->get_device_view())); // this last cast is a no-op for complex value type and the branch is // never taken for real value type } else { // otherwise: use the normal kernel - exec->run(dense::make_inv_scale( - make_temporary_conversion(alpha).get(), this)); + exec->run( + dense::make_inv_scale(make_temporary_conversion(alpha) + ->get_const_device_view(), + this->get_device_view())); } } @@ -261,14 +268,16 @@ void Dense::scale_impl(const LinOp* alpha) is_complex()) { // use the real-complex kernel exec->run(dense::make_scale( - make_temporary_conversion>(alpha).get(), - dynamic_cast(this))); + make_temporary_conversion>(alpha) + ->get_const_device_view(), + dynamic_cast(this)->get_device_view())); // this last cast is a no-op for complex value type and the branch is // never taken for real value type } else { // otherwise: use the normal kernel - exec->run(dense::make_scale( - make_temporary_conversion(alpha).get(), this)); + exec->run(dense::make_scale(make_temporary_conversion(alpha) + ->get_const_device_view(), + this->get_device_view())); } } @@ -288,18 +297,25 @@ void Dense::add_scaled_impl(const LinOp* alpha, const LinOp* b) if (dynamic_cast>*>(alpha) && is_complex()) { exec->run(dense::make_add_scaled( - make_temporary_conversion>(alpha).get(), - make_temporary_conversion>(b).get(), - dynamic_cast(this))); + make_temporary_conversion>(alpha) + ->get_const_device_view(), + make_temporary_conversion>(b) + ->get_const_device_view(), + dynamic_cast(this)->get_device_view())); } else { if (dynamic_cast*>(b)) { exec->run(dense::make_add_scaled_diag( - make_temporary_conversion(alpha).get(), - dynamic_cast*>(b), this)); + make_temporary_conversion(alpha) + ->get_const_device_view(), + dynamic_cast*>(b), + this->get_device_view())); } else { exec->run(dense::make_add_scaled( - make_temporary_conversion(alpha).get(), - make_temporary_conversion(b).get(), this)); + make_temporary_conversion(alpha) + ->get_const_device_view(), + make_temporary_conversion(b) + ->get_const_device_view(), + this->get_device_view())); } } } @@ -319,18 +335,25 @@ void Dense::sub_scaled_impl(const LinOp* alpha, const LinOp* b) if (dynamic_cast>*>(alpha) && is_complex()) { exec->run(dense::make_sub_scaled( - make_temporary_conversion>(alpha).get(), - make_temporary_conversion>(b).get(), - dynamic_cast(this))); + make_temporary_conversion>(alpha) + ->get_const_device_view(), + make_temporary_conversion>(b) + ->get_const_device_view(), + dynamic_cast(this)->get_device_view())); } else { if (dynamic_cast*>(b)) { exec->run(dense::make_sub_scaled_diag( - make_temporary_conversion(alpha).get(), - dynamic_cast*>(b), this)); + make_temporary_conversion(alpha) + ->get_const_device_view(), + dynamic_cast*>(b), + this->get_device_view())); } else { exec->run(dense::make_sub_scaled( - make_temporary_conversion(alpha).get(), - make_temporary_conversion(b).get(), this)); + make_temporary_conversion(alpha) + ->get_const_device_view(), + make_temporary_conversion(b) + ->get_const_device_view(), + this->get_device_view())); } } } @@ -352,8 +375,9 @@ void Dense::compute_dot(ptr_param b, auto local_res = make_temporary_clone(exec, result); auto dense_b = make_temporary_conversion(local_b.get()); auto dense_res = make_temporary_conversion(local_res.get()); - exec->run( - dense::make_compute_dot(this, dense_b.get(), dense_res.get(), tmp)); + exec->run(dense::make_compute_dot(this->get_const_device_view(), + dense_b->get_const_device_view(), + dense_res->get_device_view(), tmp)); } @@ -366,8 +390,9 @@ void Dense::compute_dot_impl(const LinOp* b, LinOp* result) const auto dense_b = make_temporary_conversion(b); auto dense_res = make_temporary_conversion(result); array tmp{exec}; - exec->run( - dense::make_compute_dot(this, dense_b.get(), dense_res.get(), tmp)); + exec->run(dense::make_compute_dot(this->get_const_device_view(), + dense_b->get_const_device_view(), + dense_res->get_device_view(), tmp)); } @@ -387,8 +412,9 @@ void Dense::compute_conj_dot(ptr_param b, auto local_res = make_temporary_clone(exec, result); auto dense_b = make_temporary_conversion(local_b.get()); auto dense_res = make_temporary_conversion(local_res.get()); - exec->run(dense::make_compute_conj_dot(this, dense_b.get(), dense_res.get(), - tmp)); + exec->run(dense::make_compute_conj_dot(this->get_const_device_view(), + dense_b->get_const_device_view(), + dense_res->get_device_view(), tmp)); } @@ -402,8 +428,9 @@ void Dense::compute_conj_dot_impl(const LinOp* b, auto dense_b = make_temporary_conversion(b); auto dense_res = make_temporary_conversion(result); array tmp{exec}; - exec->run(dense::make_compute_conj_dot(this, dense_b.get(), dense_res.get(), - tmp)); + exec->run(dense::make_compute_conj_dot(this->get_const_device_view(), + dense_b->get_const_device_view(), + dense_res->get_device_view(), tmp)); } @@ -420,7 +447,8 @@ void Dense::compute_norm2(ptr_param result, auto local_result = make_temporary_clone(exec, result); auto dense_res = make_temporary_conversion>( local_result.get()); - exec->run(dense::make_compute_norm2(this, dense_res.get(), tmp)); + exec->run(dense::make_compute_norm2(this->get_const_device_view(), + dense_res->get_device_view(), tmp)); } @@ -432,7 +460,8 @@ void Dense::compute_norm2_impl(LinOp* result) const auto dense_res = make_temporary_conversion>(result); array tmp{exec}; - exec->run(dense::make_compute_norm2(this, dense_res.get(), tmp)); + exec->run(dense::make_compute_norm2(this->get_const_device_view(), + dense_res->get_device_view(), tmp)); } @@ -449,7 +478,8 @@ void Dense::compute_norm1(ptr_param result, auto local_result = make_temporary_clone(exec, result); auto dense_res = make_temporary_conversion>( local_result.get()); - exec->run(dense::make_compute_norm1(this, dense_res.get(), tmp)); + exec->run(dense::make_compute_norm1(this->get_const_device_view(), + dense_res->get_device_view(), tmp)); } @@ -461,7 +491,8 @@ void Dense::compute_norm1_impl(LinOp* result) const auto dense_res = make_temporary_conversion>(result); array tmp{exec}; - exec->run(dense::make_compute_norm1(this, dense_res.get(), tmp)); + exec->run(dense::make_compute_norm1(this->get_const_device_view(), + dense_res->get_device_view(), tmp)); } @@ -478,7 +509,8 @@ void Dense::compute_squared_norm2(ptr_param result, auto local_result = make_temporary_clone(exec, result); auto dense_res = make_temporary_conversion>( local_result.get()); - exec->run(dense::make_compute_squared_norm2(this, dense_res.get(), tmp)); + exec->run(dense::make_compute_squared_norm2( + this->get_const_device_view(), dense_res->get_device_view(), tmp)); } @@ -501,7 +533,8 @@ void Dense::compute_mean(ptr_param result, tmp.set_executor(exec); } auto dense_res = make_temporary_conversion(result); - exec->run(dense::make_compute_mean(this, dense_res.get(), tmp)); + exec->run(dense::make_compute_mean(this->get_const_device_view(), + dense_res->get_device_view(), tmp)); } @@ -547,7 +580,8 @@ Dense& Dense::operator=(const Dense& other) make_array_view(exec, exec_values_array->get_size(), exec_values_array->get_data()), this->get_stride()}; - exec->run(dense::make_copy(&other, &exec_this_view)); + exec->run(dense::make_copy(other.get_const_device_view(), + exec_this_view.get_device_view())); } return *this; } @@ -592,7 +626,8 @@ void Dense::convert_to( } auto exec = this->get_executor(); exec->run(dense::make_copy( - this, make_temporary_output_clone(exec, result).get())); + this->get_const_device_view(), + make_temporary_output_clone(exec, result)->get_device_view())); } @@ -616,7 +651,8 @@ void Dense::convert_to( } auto exec = this->get_executor(); exec->run(dense::make_copy( - this, make_temporary_output_clone(exec, result).get())); + this->get_const_device_view(), + make_temporary_output_clone(exec, result)->get_device_view())); } @@ -641,7 +677,8 @@ void Dense::convert_to( } auto exec = this->get_executor(); exec->run(dense::make_copy( - this, make_temporary_output_clone(exec, result).get())); + this->get_const_device_view(), + make_temporary_output_clone(exec, result)->get_device_view())); } @@ -661,14 +698,15 @@ void Dense::convert_impl(Coo* result) const const auto num_rows = this->get_size()[0]; array row_ptrs{exec, num_rows + 1}; - exec->run(dense::make_count_nonzeros_per_row(this, row_ptrs.get_data())); + exec->run(dense::make_count_nonzeros_per_row(this->get_const_device_view(), + row_ptrs.get_data())); exec->run( dense::make_prefix_sum_nonnegative(row_ptrs.get_data(), num_rows + 1)); const auto nnz = get_element(row_ptrs, num_rows); result->resize(this->get_size(), nnz); - exec->run( - dense::make_convert_to_coo(this, row_ptrs.get_const_data(), - make_temporary_clone(exec, result).get())); + exec->run(dense::make_convert_to_coo( + this->get_const_device_view(), row_ptrs.get_const_data(), + make_temporary_clone(exec, result).get())); } @@ -709,8 +747,8 @@ void Dense::convert_impl(Csr* result) const const auto num_rows = this->get_size()[0]; auto tmp = make_temporary_clone(exec, result); tmp->row_ptrs_.resize_and_reset(num_rows + 1); - exec->run( - dense::make_count_nonzeros_per_row(this, tmp->get_row_ptrs())); + exec->run(dense::make_count_nonzeros_per_row( + this->get_const_device_view(), tmp->get_row_ptrs())); exec->run(dense::make_prefix_sum_nonnegative(tmp->get_row_ptrs(), num_rows + 1)); const auto nnz = @@ -718,7 +756,8 @@ void Dense::convert_impl(Csr* result) const tmp->col_idxs_.resize_and_reset(nnz); tmp->values_.resize_and_reset(nnz); tmp->set_size(this->get_size()); - exec->run(dense::make_convert_to_csr(this, tmp.get())); + exec->run(dense::make_convert_to_csr(this->get_const_device_view(), + tmp.get())); } result->make_srow(); } @@ -762,8 +801,8 @@ void Dense::convert_impl(Fbcsr* result) const const auto col_blocks = detail::get_num_blocks(bs, this->get_size()[1]); auto tmp = make_temporary_clone(exec, result); tmp->row_ptrs_.resize_and_reset(row_blocks + 1); - exec->run(dense::make_count_nonzero_blocks_per_row(this, bs, - tmp->get_row_ptrs())); + exec->run(dense::make_count_nonzero_blocks_per_row( + this->get_const_device_view(), bs, tmp->get_row_ptrs())); exec->run(dense::make_prefix_sum_nonnegative(tmp->get_row_ptrs(), row_blocks + 1)); const auto nnz_blocks = @@ -772,7 +811,8 @@ void Dense::convert_impl(Fbcsr* result) const tmp->values_.resize_and_reset(nnz_blocks * bs * bs); tmp->values_.fill(zero()); tmp->set_size(this->get_size()); - exec->run(dense::make_convert_to_fbcsr(this, tmp.get())); + exec->run( + dense::make_convert_to_fbcsr(this->get_const_device_view(), tmp.get())); } @@ -810,11 +850,12 @@ void Dense::convert_impl(Ell* result) const { auto exec = this->get_executor(); size_type num_stored_elements_per_row{}; - exec->run( - dense::make_compute_max_nnz_per_row(this, num_stored_elements_per_row)); + exec->run(dense::make_compute_max_nnz_per_row(this->get_const_device_view(), + num_stored_elements_per_row)); result->resize(this->get_size(), num_stored_elements_per_row); - exec->run(dense::make_convert_to_ell( - this, make_temporary_clone(exec, result).get())); + exec->run( + dense::make_convert_to_ell(this->get_const_device_view(), + make_temporary_clone(exec, result).get())); } @@ -855,7 +896,8 @@ void Dense::convert_impl(Hybrid* result) const const auto num_cols = this->get_size()[1]; array row_nnz{exec, num_rows}; array coo_row_ptrs{exec, num_rows + 1}; - exec->run(dense::make_count_nonzeros_per_row(this, row_nnz.get_data())); + exec->run(dense::make_count_nonzeros_per_row(this->get_const_device_view(), + row_nnz.get_data())); size_type ell_lim{}; size_type coo_nnz{}; result->get_strategy()->compute_hybrid_config(row_nnz, &ell_lim, &coo_nnz); @@ -868,7 +910,8 @@ void Dense::convert_impl(Hybrid* result) const coo_nnz = get_element(coo_row_ptrs, num_rows); auto tmp = make_temporary_clone(exec, result); tmp->resize(this->get_size(), ell_lim, coo_nnz); - exec->run(dense::make_convert_to_hybrid(this, coo_row_ptrs.get_const_data(), + exec->run(dense::make_convert_to_hybrid(this->get_const_device_view(), + coo_row_ptrs.get_const_data(), tmp.get())); } @@ -915,15 +958,16 @@ void Dense::convert_impl(Sellp* result) const tmp->slice_size_ = slice_size; tmp->slice_sets_.resize_and_reset(num_slices + 1); tmp->slice_lengths_.resize_and_reset(num_slices); - exec->run(dense::make_compute_slice_sets(this, slice_size, stride_factor, - tmp->get_slice_sets(), - tmp->get_slice_lengths())); + exec->run(dense::make_compute_slice_sets( + this->get_const_device_view(), slice_size, stride_factor, + tmp->get_slice_sets(), tmp->get_slice_lengths())); auto total_cols = exec->copy_val_to_host(tmp->get_slice_sets() + num_slices); tmp->col_idxs_.resize_and_reset(total_cols * slice_size); tmp->values_.resize_and_reset(total_cols * slice_size); tmp->set_size(this->get_size()); - exec->run(dense::make_convert_to_sellp(this, tmp.get())); + exec->run( + dense::make_convert_to_sellp(this->get_const_device_view(), tmp.get())); } @@ -964,15 +1008,16 @@ void Dense::convert_impl( const auto num_rows = this->get_size()[0]; auto tmp = make_temporary_clone(exec, result); tmp->row_ptrs_.resize_and_reset(num_rows + 1); - exec->run( - dense::make_count_nonzeros_per_row(this, tmp->row_ptrs_.get_data())); + exec->run(dense::make_count_nonzeros_per_row(this->get_const_device_view(), + tmp->row_ptrs_.get_data())); exec->run(dense::make_prefix_sum_nonnegative(tmp->row_ptrs_.get_data(), num_rows + 1)); const auto nnz = get_element(tmp->row_ptrs_, num_rows); tmp->col_idxs_.resize_and_reset(nnz); tmp->value_.fill(one()); tmp->set_size(this->get_size()); - exec->run(dense::make_convert_to_sparsity_csr(this, tmp.get())); + exec->run(dense::make_convert_to_sparsity_csr(this->get_const_device_view(), + tmp.get())); } @@ -1022,7 +1067,7 @@ void Dense::read(const device_mat_data& data) this->resize(data.get_size()); this->fill(zero()); exec->run(dense::make_fill_in_matrix_data( - *make_temporary_clone(exec, &data), this)); + *make_temporary_clone(exec, &data), this->get_device_view())); } @@ -1033,7 +1078,7 @@ void Dense::read(const device_mat_data32& data) this->resize(data.get_size()); this->fill(zero()); exec->run(dense::make_fill_in_matrix_data( - *make_temporary_clone(exec, &data), this)); + *make_temporary_clone(exec, &data), this->get_device_view())); } @@ -1130,7 +1175,8 @@ void Dense::transpose(ptr_param> output) const GKO_ASSERT_EQUAL_DIMENSIONS(output, gko::transpose(this->get_size())); auto exec = this->get_executor(); exec->run(dense::make_transpose( - this, make_temporary_output_clone(exec, output).get())); + this->get_const_device_view(), + make_temporary_output_clone(exec, output)->get_device_view())); } @@ -1140,7 +1186,8 @@ void Dense::conj_transpose(ptr_param> output) const GKO_ASSERT_EQUAL_DIMENSIONS(output, gko::transpose(this->get_size())); auto exec = this->get_executor(); exec->run(dense::make_conj_transpose( - this, make_temporary_output_clone(exec, output).get())); + this->get_const_device_view(), + make_temporary_output_clone(exec, output)->get_device_view())); } @@ -1162,27 +1209,33 @@ void Dense::permute_impl(const Permutation* permutation, switch (mode) { case permute_mode::rows: exec->run(dense::make_row_gather(local_perm->get_const_permutation(), - this, local_output.get())); + this->get_const_device_view(), + local_output->get_device_view())); break; case permute_mode::columns: exec->run(dense::make_col_permute(local_perm->get_const_permutation(), - this, local_output.get())); + this->get_const_device_view(), + local_output->get_device_view())); break; case permute_mode::symmetric: exec->run(dense::make_symm_permute(local_perm->get_const_permutation(), - this, local_output.get())); + this->get_const_device_view(), + local_output->get_device_view())); break; case permute_mode::inverse_rows: exec->run(dense::make_inverse_row_permute( - local_perm->get_const_permutation(), this, local_output.get())); + local_perm->get_const_permutation(), this->get_const_device_view(), + local_output->get_device_view())); break; case permute_mode::inverse_columns: exec->run(dense::make_inverse_col_permute( - local_perm->get_const_permutation(), this, local_output.get())); + local_perm->get_const_permutation(), this->get_const_device_view(), + local_output->get_device_view())); break; case permute_mode::inverse_symmetric: exec->run(dense::make_inv_symm_permute( - local_perm->get_const_permutation(), this, local_output.get())); + local_perm->get_const_permutation(), this->get_const_device_view(), + local_output->get_device_view())); break; default: GKO_INVALID_STATE("Invalid permute mode"); @@ -1208,11 +1261,13 @@ void Dense::permute_impl( if (invert) { exec->run(dense::make_inv_nonsymm_permute( local_row_perm->get_const_permutation(), - local_col_perm->get_const_permutation(), this, local_output.get())); + local_col_perm->get_const_permutation(), + this->get_const_device_view(), local_output->get_device_view())); } else { exec->run(dense::make_nonsymm_permute( local_row_perm->get_const_permutation(), - local_col_perm->get_const_permutation(), this, local_output.get())); + local_col_perm->get_const_permutation(), + this->get_const_device_view(), local_output->get_device_view())); } } @@ -1237,32 +1292,38 @@ void Dense::scale_permute_impl( case permute_mode::rows: exec->run(dense::make_row_scale_permute( local_perm->get_const_scaling_factors(), - local_perm->get_const_permutation(), this, local_output.get())); + local_perm->get_const_permutation(), this->get_const_device_view(), + local_output->get_device_view())); break; case permute_mode::columns: exec->run(dense::make_col_scale_permute( local_perm->get_const_scaling_factors(), - local_perm->get_const_permutation(), this, local_output.get())); + local_perm->get_const_permutation(), this->get_const_device_view(), + local_output->get_device_view())); break; case permute_mode::symmetric: exec->run(dense::make_symm_scale_permute( local_perm->get_const_scaling_factors(), - local_perm->get_const_permutation(), this, local_output.get())); + local_perm->get_const_permutation(), this->get_const_device_view(), + local_output->get_device_view())); break; case permute_mode::inverse_rows: exec->run(dense::make_inv_row_scale_permute( local_perm->get_const_scaling_factors(), - local_perm->get_const_permutation(), this, local_output.get())); + local_perm->get_const_permutation(), this->get_const_device_view(), + local_output->get_device_view())); break; case permute_mode::inverse_columns: exec->run(dense::make_inv_col_scale_permute( local_perm->get_const_scaling_factors(), - local_perm->get_const_permutation(), this, local_output.get())); + local_perm->get_const_permutation(), this->get_const_device_view(), + local_output->get_device_view())); break; case permute_mode::inverse_symmetric: exec->run(dense::make_inv_symm_scale_permute( local_perm->get_const_scaling_factors(), - local_perm->get_const_permutation(), this, local_output.get())); + local_perm->get_const_permutation(), this->get_const_device_view(), + local_output->get_device_view())); break; default: GKO_INVALID_STATE("Invalid permute mode"); @@ -1290,13 +1351,15 @@ void Dense::scale_permute_impl( local_row_perm->get_const_scaling_factors(), local_row_perm->get_const_permutation(), local_col_perm->get_const_scaling_factors(), - local_col_perm->get_const_permutation(), this, local_output.get())); + local_col_perm->get_const_permutation(), + this->get_const_device_view(), local_output->get_device_view())); } else { exec->run(dense::make_nonsymm_scale_permute( local_row_perm->get_const_scaling_factors(), local_row_perm->get_const_permutation(), local_col_perm->get_const_scaling_factors(), - local_col_perm->get_const_permutation(), this, local_output.get())); + local_col_perm->get_const_permutation(), + this->get_const_device_view(), local_output->get_device_view())); } } @@ -1311,8 +1374,9 @@ void Dense::row_gather_impl(const array* row_idxs, GKO_ASSERT_EQUAL_DIMENSIONS(expected_dim, row_collection); exec->run(dense::make_row_gather( - make_temporary_clone(exec, row_idxs)->get_const_data(), this, - make_temporary_output_clone(exec, row_collection).get())); + make_temporary_clone(exec, row_idxs)->get_const_data(), + this->get_const_device_view(), + make_temporary_output_clone(exec, row_collection)->get_device_view())); } template @@ -1327,10 +1391,11 @@ void Dense::row_gather_impl(const Dense* alpha, GKO_ASSERT_EQUAL_DIMENSIONS(expected_dim, row_collection); exec->run(dense::make_advanced_row_gather( - make_temporary_clone(exec, alpha).get(), - make_temporary_clone(exec, row_idxs)->get_const_data(), this, - make_temporary_clone(exec, beta).get(), - make_temporary_clone(exec, row_collection).get())); + make_temporary_clone(exec, alpha)->get_const_device_view(), + make_temporary_clone(exec, row_idxs)->get_const_data(), + this->get_const_device_view(), + make_temporary_clone(exec, beta)->get_const_device_view(), + make_temporary_clone(exec, row_collection)->get_device_view())); } @@ -1850,7 +1915,8 @@ void Dense::extract_diagonal( GKO_ASSERT_EQ(output->get_size()[0], diag_size); exec->run(dense::make_extract_diagonal( - this, make_temporary_output_clone(exec, output).get())); + this->get_const_device_view(), + make_temporary_output_clone(exec, output).get())); } @@ -1867,7 +1933,8 @@ std::unique_ptr> Dense::extract_diagonal() const template void Dense::compute_absolute_inplace() { - this->get_executor()->run(dense::make_inplace_absolute_dense(this)); + this->get_executor()->run( + dense::make_inplace_absolute_dense(this->get_device_view())); } @@ -1889,7 +1956,8 @@ void Dense::compute_absolute(ptr_param output) const auto exec = this->get_executor(); exec->run(dense::make_outplace_absolute_dense( - this, make_temporary_output_clone(exec, output).get())); + this->get_const_device_view(), + make_temporary_output_clone(exec, output)->get_device_view())); } @@ -1910,7 +1978,8 @@ void Dense::make_complex(ptr_param result) const auto exec = this->get_executor(); exec->run(dense::make_make_complex( - this, make_temporary_output_clone(exec, result).get())); + this->get_const_device_view(), + make_temporary_output_clone(exec, result)->get_device_view())); } @@ -1931,7 +2000,8 @@ void Dense::get_real(ptr_param result) const auto exec = this->get_executor(); exec->run(dense::make_get_real( - this, make_temporary_output_clone(exec, result).get())); + this->get_const_device_view(), + make_temporary_output_clone(exec, result)->get_device_view())); } @@ -1952,7 +2022,8 @@ void Dense::get_imag(ptr_param result) const auto exec = this->get_executor(); exec->run(dense::make_get_imag( - this, make_temporary_output_clone(exec, result).get())); + this->get_const_device_view(), + make_temporary_output_clone(exec, result)->get_device_view())); } @@ -1978,7 +2049,9 @@ void Dense::add_scaled_identity_impl(const LinOp* a, const LinOp* b) precision_dispatch_real_complex( [this](auto dense_alpha, auto dense_beta, auto dense_x) { this->get_executor()->run(dense::make_add_scaled_identity( - dense_alpha, dense_beta, dense_x)); + dense_alpha->get_const_device_view(), + dense_beta->get_const_device_view(), + dense_x->get_device_view())); }, a, b, this); } @@ -2095,7 +2168,7 @@ Dense::Dense(std::shared_ptr exec, } -#define GKO_DECLARE_DENSE_MATRIX(_type) class Dense<_type> +#define GKO_DECLARE_DENSE_MATRIX(ValueType) class Dense GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_MATRIX); diff --git a/core/matrix/dense_kernels.hpp b/core/matrix/dense_kernels.hpp index 7422b431aa0..65df2f9b0e0 100644 --- a/core/matrix/dense_kernels.hpp +++ b/core/matrix/dense_kernels.hpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -20,333 +20,358 @@ namespace gko { namespace kernels { -#define GKO_DECLARE_DENSE_SIMPLE_APPLY_KERNEL(_type) \ +#define GKO_DECLARE_DENSE_SIMPLE_APPLY_KERNEL(ValueType) \ void simple_apply(std::shared_ptr exec, \ - const matrix::Dense<_type>* a, \ - const matrix::Dense<_type>* b, matrix::Dense<_type>* c) - -#define GKO_DECLARE_DENSE_APPLY_KERNEL(_type) \ - void apply(std::shared_ptr exec, \ - const matrix::Dense<_type>* alpha, \ - const matrix::Dense<_type>* a, const matrix::Dense<_type>* b, \ - const matrix::Dense<_type>* beta, matrix::Dense<_type>* c) - -#define GKO_DECLARE_DENSE_COPY_KERNEL(_intype, _outtype) \ - void copy(std::shared_ptr exec, \ - const matrix::Dense<_intype>* input, \ - matrix::Dense<_outtype>* output) - -#define GKO_DECLARE_DENSE_FILL_KERNEL(_type) \ + matrix::view::dense a, \ + matrix::view::dense b, \ + matrix::view::dense c) + +#define GKO_DECLARE_DENSE_APPLY_KERNEL(ValueType) \ + void apply(std::shared_ptr exec, \ + matrix::view::dense alpha, \ + matrix::view::dense a, \ + matrix::view::dense b, \ + matrix::view::dense beta, \ + matrix::view::dense c) + +#define GKO_DECLARE_DENSE_COPY_KERNEL(InValueType, OutValueType) \ + void copy(std::shared_ptr exec, \ + matrix::view::dense input, \ + matrix::view::dense output) + +#define GKO_DECLARE_DENSE_FILL_KERNEL(ValueType) \ void fill(std::shared_ptr exec, \ - matrix::Dense<_type>* mat, _type value) - -#define GKO_DECLARE_DENSE_SCALE_KERNEL(_type, _scalar_type) \ - void scale(std::shared_ptr exec, \ - const matrix::Dense<_scalar_type>* alpha, \ - matrix::Dense<_type>* x) - -#define GKO_DECLARE_DENSE_INV_SCALE_KERNEL(_type, _scalar_type) \ - void inv_scale(std::shared_ptr exec, \ - const matrix::Dense<_scalar_type>* alpha, \ - matrix::Dense<_type>* x) - -#define GKO_DECLARE_DENSE_ADD_SCALED_KERNEL(_type, _scalar_type) \ - void add_scaled(std::shared_ptr exec, \ - const matrix::Dense<_scalar_type>* alpha, \ - const matrix::Dense<_type>* x, matrix::Dense<_type>* y) - -#define GKO_DECLARE_DENSE_SUB_SCALED_KERNEL(_type, _scalar_type) \ - void sub_scaled(std::shared_ptr exec, \ - const matrix::Dense<_scalar_type>* alpha, \ - const matrix::Dense<_type>* x, matrix::Dense<_type>* y) - -#define GKO_DECLARE_DENSE_ADD_SCALED_DIAG_KERNEL(_type) \ + matrix::view::dense mat, ValueType value) + +#define GKO_DECLARE_DENSE_SCALE_KERNEL(ValueType, ScalarType) \ + void scale(std::shared_ptr exec, \ + matrix::view::dense alpha, \ + matrix::view::dense x) + +#define GKO_DECLARE_DENSE_INV_SCALE_KERNEL(ValueType, ScalarType) \ + void inv_scale(std::shared_ptr exec, \ + matrix::view::dense alpha, \ + matrix::view::dense x) + +#define GKO_DECLARE_DENSE_ADD_SCALED_KERNEL(ValueType, ScalarType) \ + void add_scaled(std::shared_ptr exec, \ + matrix::view::dense alpha, \ + matrix::view::dense x, \ + matrix::view::dense y) + +#define GKO_DECLARE_DENSE_SUB_SCALED_KERNEL(ValueType, ScalarType) \ + void sub_scaled(std::shared_ptr exec, \ + matrix::view::dense alpha, \ + matrix::view::dense x, \ + matrix::view::dense y) + +#define GKO_DECLARE_DENSE_ADD_SCALED_DIAG_KERNEL(ValueType) \ void add_scaled_diag(std::shared_ptr exec, \ - const matrix::Dense<_type>* alpha, \ - const matrix::Diagonal<_type>* x, \ - matrix::Dense<_type>* y) + matrix::view::dense alpha, \ + const matrix::Diagonal* x, \ + matrix::view::dense y) -#define GKO_DECLARE_DENSE_SUB_SCALED_DIAG_KERNEL(_type) \ +#define GKO_DECLARE_DENSE_SUB_SCALED_DIAG_KERNEL(ValueType) \ void sub_scaled_diag(std::shared_ptr exec, \ - const matrix::Dense<_type>* alpha, \ - const matrix::Diagonal<_type>* x, \ - matrix::Dense<_type>* y) + matrix::view::dense alpha, \ + const matrix::Diagonal* x, \ + matrix::view::dense y) -#define GKO_DECLARE_DENSE_COMPUTE_DOT_DISPATCH_KERNEL(_type) \ +#define GKO_DECLARE_DENSE_COMPUTE_DOT_DISPATCH_KERNEL(ValueType) \ void compute_dot_dispatch(std::shared_ptr exec, \ - const matrix::Dense<_type>* x, \ - const matrix::Dense<_type>* y, \ - matrix::Dense<_type>* result, array& tmp) + matrix::view::dense x, \ + matrix::view::dense y, \ + matrix::view::dense result, \ + array& tmp) -#define GKO_DECLARE_DENSE_COMPUTE_DOT_KERNEL(_type) \ +#define GKO_DECLARE_DENSE_COMPUTE_DOT_KERNEL(ValueType) \ void compute_dot(std::shared_ptr exec, \ - const matrix::Dense<_type>* x, \ - const matrix::Dense<_type>* y, \ - matrix::Dense<_type>* result, array& tmp) + matrix::view::dense x, \ + matrix::view::dense y, \ + matrix::view::dense result, array& tmp) -#define GKO_DECLARE_DENSE_COMPUTE_CONJ_DOT_DISPATCH_KERNEL(_type) \ +#define GKO_DECLARE_DENSE_COMPUTE_CONJ_DOT_DISPATCH_KERNEL(ValueType) \ void compute_conj_dot_dispatch( \ std::shared_ptr exec, \ - const matrix::Dense<_type>* x, const matrix::Dense<_type>* y, \ - matrix::Dense<_type>* result, array& tmp) + matrix::view::dense x, \ + matrix::view::dense y, \ + matrix::view::dense result, array& tmp) -#define GKO_DECLARE_DENSE_COMPUTE_CONJ_DOT_KERNEL(_type) \ +#define GKO_DECLARE_DENSE_COMPUTE_CONJ_DOT_KERNEL(ValueType) \ void compute_conj_dot(std::shared_ptr exec, \ - const matrix::Dense<_type>* x, \ - const matrix::Dense<_type>* y, \ - matrix::Dense<_type>* result, array& tmp) - -#define GKO_DECLARE_DENSE_COMPUTE_NORM2_KERNEL(_type) \ - void compute_norm2(std::shared_ptr exec, \ - const matrix::Dense<_type>* x, \ - matrix::Dense>* result, \ + matrix::view::dense x, \ + matrix::view::dense y, \ + matrix::view::dense result, \ + array& tmp) + +#define GKO_DECLARE_DENSE_COMPUTE_NORM2_KERNEL(ValueType) \ + void compute_norm2(std::shared_ptr exec, \ + matrix::view::dense x, \ + matrix::view::dense> result, \ array& tmp) -#define GKO_DECLARE_DENSE_COMPUTE_NORM2_DISPATCH_KERNEL(_type) \ - void compute_norm2_dispatch(std::shared_ptr exec, \ - const matrix::Dense<_type>* x, \ - matrix::Dense>* result, \ - array& tmp) - -#define GKO_DECLARE_DENSE_COMPUTE_NORM1_KERNEL(_type) \ - void compute_norm1(std::shared_ptr exec, \ - const matrix::Dense<_type>* x, \ - matrix::Dense>* result, \ +#define GKO_DECLARE_DENSE_COMPUTE_NORM2_DISPATCH_KERNEL(ValueType) \ + void compute_norm2_dispatch( \ + std::shared_ptr exec, \ + matrix::view::dense x, \ + matrix::view::dense> result, \ + array& tmp) + +#define GKO_DECLARE_DENSE_COMPUTE_NORM1_KERNEL(ValueType) \ + void compute_norm1(std::shared_ptr exec, \ + matrix::view::dense x, \ + matrix::view::dense> result, \ array& tmp) -#define GKO_DECLARE_DENSE_COMPUTE_MEAN_KERNEL(_type) \ +#define GKO_DECLARE_DENSE_COMPUTE_MEAN_KERNEL(ValueType) \ void compute_mean(std::shared_ptr exec, \ - const matrix::Dense<_type>* x, \ - matrix::Dense<_type>* result, array& tmp) - -#define GKO_DECLARE_DENSE_FILL_IN_MATRIX_DATA_KERNEL(_type, _prec) \ - void fill_in_matrix_data(std::shared_ptr exec, \ - const device_matrix_data<_type, _prec>& data, \ - matrix::Dense<_type>* output) - -#define GKO_DECLARE_DENSE_COMPUTE_SQUARED_NORM2_KERNEL(_type) \ - void compute_squared_norm2(std::shared_ptr exec, \ - const matrix::Dense<_type>* x, \ - matrix::Dense>* result, \ - array& tmp) - -#define GKO_DECLARE_DENSE_COMPUTE_SQRT_KERNEL(_type) \ + matrix::view::dense x, \ + matrix::view::dense result, array& tmp) + +#define GKO_DECLARE_DENSE_FILL_IN_MATRIX_DATA_KERNEL(ValueType, _prec) \ + void fill_in_matrix_data(std::shared_ptr exec, \ + const device_matrix_data& data, \ + matrix::view::dense output) + +#define GKO_DECLARE_DENSE_COMPUTE_SQUARED_NORM2_KERNEL(ValueType) \ + void compute_squared_norm2( \ + std::shared_ptr exec, \ + matrix::view::dense x, \ + matrix::view::dense> result, \ + array& tmp) + +#define GKO_DECLARE_DENSE_COMPUTE_SQRT_KERNEL(ValueType) \ void compute_sqrt(std::shared_ptr exec, \ - matrix::Dense<_type>* data) + matrix::view::dense data) -#define GKO_DECLARE_DENSE_CONVERT_TO_COO_KERNEL(_type, _prec) \ +#define GKO_DECLARE_DENSE_CONVERT_TO_COO_KERNEL(ValueType, _prec) \ void convert_to_coo(std::shared_ptr exec, \ - const matrix::Dense<_type>* source, \ + matrix::view::dense source, \ const int64* row_ptrs, \ - matrix::Coo<_type, _prec>* other) + matrix::Coo* other) -#define GKO_DECLARE_DENSE_CONVERT_TO_CSR_KERNEL(_type, _prec) \ +#define GKO_DECLARE_DENSE_CONVERT_TO_CSR_KERNEL(ValueType, _prec) \ void convert_to_csr(std::shared_ptr exec, \ - const matrix::Dense<_type>* source, \ - matrix::Csr<_type, _prec>* other) + matrix::view::dense source, \ + matrix::Csr* other) -#define GKO_DECLARE_DENSE_CONVERT_TO_ELL_KERNEL(_type, _prec) \ +#define GKO_DECLARE_DENSE_CONVERT_TO_ELL_KERNEL(ValueType, _prec) \ void convert_to_ell(std::shared_ptr exec, \ - const matrix::Dense<_type>* source, \ - matrix::Ell<_type, _prec>* other) + matrix::view::dense source, \ + matrix::Ell* other) -#define GKO_DECLARE_DENSE_CONVERT_TO_FBCSR_KERNEL(_type, _prec) \ +#define GKO_DECLARE_DENSE_CONVERT_TO_FBCSR_KERNEL(ValueType, _prec) \ void convert_to_fbcsr(std::shared_ptr exec, \ - const matrix::Dense<_type>* source, \ - matrix::Fbcsr<_type, _prec>* other) + matrix::view::dense source, \ + matrix::Fbcsr* other) -#define GKO_DECLARE_DENSE_CONVERT_TO_HYBRID_KERNEL(_type, _prec) \ +#define GKO_DECLARE_DENSE_CONVERT_TO_HYBRID_KERNEL(ValueType, _prec) \ void convert_to_hybrid(std::shared_ptr exec, \ - const matrix::Dense<_type>* source, \ + matrix::view::dense source, \ const int64* coo_row_ptrs, \ - matrix::Hybrid<_type, _prec>* other) + matrix::Hybrid* other) -#define GKO_DECLARE_DENSE_CONVERT_TO_SELLP_KERNEL(_type, _prec) \ +#define GKO_DECLARE_DENSE_CONVERT_TO_SELLP_KERNEL(ValueType, _prec) \ void convert_to_sellp(std::shared_ptr exec, \ - const matrix::Dense<_type>* source, \ - matrix::Sellp<_type, _prec>* other) + matrix::view::dense source, \ + matrix::Sellp* other) -#define GKO_DECLARE_DENSE_CONVERT_TO_SPARSITY_CSR_KERNEL(_type, _prec) \ +#define GKO_DECLARE_DENSE_CONVERT_TO_SPARSITY_CSR_KERNEL(ValueType, _prec) \ void convert_to_sparsity_csr(std::shared_ptr exec, \ - const matrix::Dense<_type>* source, \ - matrix::SparsityCsr<_type, _prec>* other) + matrix::view::dense source, \ + matrix::SparsityCsr* other) -#define GKO_DECLARE_DENSE_COMPUTE_MAX_NNZ_PER_ROW_KERNEL(_type) \ +#define GKO_DECLARE_DENSE_COMPUTE_MAX_NNZ_PER_ROW_KERNEL(ValueType) \ void compute_max_nnz_per_row(std::shared_ptr exec, \ - const matrix::Dense<_type>* source, \ + matrix::view::dense source, \ size_type& result) -#define GKO_DECLARE_DENSE_COMPUTE_SLICE_SETS_KERNEL(_type) \ +#define GKO_DECLARE_DENSE_COMPUTE_SLICE_SETS_KERNEL(ValueType) \ void compute_slice_sets(std::shared_ptr exec, \ - const matrix::Dense<_type>* source, \ + matrix::view::dense source, \ size_type slice_size, size_type stride_factor, \ size_type* slice_sets, size_type* slice_lengths) -#define GKO_DECLARE_DENSE_COUNT_NONZEROS_PER_ROW_KERNEL(_vtype, _itype) \ - void count_nonzeros_per_row(std::shared_ptr exec, \ - const matrix::Dense<_vtype>* source, \ - _itype* result) +#define GKO_DECLARE_DENSE_COUNT_NONZEROS_PER_ROW_KERNEL(ValueType, IndexType) \ + void count_nonzeros_per_row(std::shared_ptr exec, \ + matrix::view::dense source, \ + IndexType* result) -#define GKO_DECLARE_DENSE_COUNT_NONZERO_BLOCKS_PER_ROW_KERNEL(_vtype, _itype) \ - void count_nonzero_blocks_per_row( \ - std::shared_ptr exec, \ - const matrix::Dense<_vtype>* source, int block_size, _itype* result) +#define GKO_DECLARE_DENSE_COUNT_NONZERO_BLOCKS_PER_ROW_KERNEL(ValueType, \ + IndexType) \ + void count_nonzero_blocks_per_row( \ + std::shared_ptr exec, \ + matrix::view::dense source, int block_size, \ + IndexType* result) -#define GKO_DECLARE_DENSE_COUNT_NONZEROS_PER_ROW_KERNEL_SIZE_T(_type) \ - GKO_DECLARE_DENSE_COUNT_NONZEROS_PER_ROW_KERNEL(_type, ::gko::size_type) +#define GKO_DECLARE_DENSE_COUNT_NONZEROS_PER_ROW_KERNEL_SIZE_T(ValueType) \ + GKO_DECLARE_DENSE_COUNT_NONZEROS_PER_ROW_KERNEL(ValueType, ::gko::size_type) -#define GKO_DECLARE_DENSE_TRANSPOSE_KERNEL(_type) \ +#define GKO_DECLARE_DENSE_TRANSPOSE_KERNEL(ValueType) \ void transpose(std::shared_ptr exec, \ - const matrix::Dense<_type>* orig, \ - matrix::Dense<_type>* trans) + matrix::view::dense orig, \ + matrix::view::dense trans) -#define GKO_DECLARE_DENSE_CONJ_TRANSPOSE_KERNEL(_type) \ +#define GKO_DECLARE_DENSE_CONJ_TRANSPOSE_KERNEL(ValueType) \ void conj_transpose(std::shared_ptr exec, \ - const matrix::Dense<_type>* orig, \ - matrix::Dense<_type>* trans) - -#define GKO_DECLARE_DENSE_SYMM_SCALE_PERMUTE_KERNEL(_vtype, _itype) \ - void symm_scale_permute( \ - std::shared_ptr exec, const _vtype* scale, \ - const _itype* permutation_indices, const matrix::Dense<_vtype>* orig, \ - matrix::Dense<_vtype>* permuted) - -#define GKO_DECLARE_DENSE_ROW_SCALE_PERMUTE_KERNEL(_vtype, _itype) \ - void row_scale_permute( \ - std::shared_ptr exec, const _vtype* scale, \ - const _itype* permutation_indices, const matrix::Dense<_vtype>* orig, \ - matrix::Dense<_vtype>* permuted) - -#define GKO_DECLARE_DENSE_COL_SCALE_PERMUTE_KERNEL(_vtype, _itype) \ - void col_scale_permute( \ - std::shared_ptr exec, const _vtype* scale, \ - const _itype* permutation_indices, const matrix::Dense<_vtype>* orig, \ - matrix::Dense<_vtype>* permuted) - -#define GKO_DECLARE_DENSE_INV_SYMM_SCALE_PERMUTE_KERNEL(_vtype, _itype) \ - void inv_symm_scale_permute( \ - std::shared_ptr exec, const _vtype* scale, \ - const _itype* permutation_indices, const matrix::Dense<_vtype>* orig, \ - matrix::Dense<_vtype>* permuted) - -#define GKO_DECLARE_DENSE_INV_ROW_SCALE_PERMUTE_KERNEL(_vtype, _itype) \ - void inv_row_scale_permute( \ - std::shared_ptr exec, const _vtype* scale, \ - const _itype* permutation_indices, const matrix::Dense<_vtype>* orig, \ - matrix::Dense<_vtype>* permuted) - -#define GKO_DECLARE_DENSE_INV_COL_SCALE_PERMUTE_KERNEL(_vtype, _itype) \ - void inv_col_scale_permute( \ - std::shared_ptr exec, const _vtype* scale, \ - const _itype* permutation_indices, const matrix::Dense<_vtype>* orig, \ - matrix::Dense<_vtype>* permuted) - -#define GKO_DECLARE_DENSE_NONSYMM_SCALE_PERMUTE_KERNEL(_vtype, _itype) \ - void nonsymm_scale_permute( \ - std::shared_ptr exec, const _vtype* row_scale, \ - const _itype* row_permutation_indices, const _vtype* column_scale, \ - const _itype* column_permutation_indices, \ - const matrix::Dense<_vtype>* orig, matrix::Dense<_vtype>* permuted) - -#define GKO_DECLARE_DENSE_INV_NONSYMM_SCALE_PERMUTE_KERNEL(_vtype, _itype) \ + matrix::view::dense orig, \ + matrix::view::dense trans) + +#define GKO_DECLARE_DENSE_SYMM_SCALE_PERMUTE_KERNEL(ValueType, IndexType) \ + void symm_scale_permute(std::shared_ptr exec, \ + const ValueType* scale, \ + const IndexType* permutation_indices, \ + matrix::view::dense orig, \ + matrix::view::dense permuted) + +#define GKO_DECLARE_DENSE_ROW_SCALE_PERMUTE_KERNEL(ValueType, IndexType) \ + void row_scale_permute(std::shared_ptr exec, \ + const ValueType* scale, \ + const IndexType* permutation_indices, \ + matrix::view::dense orig, \ + matrix::view::dense permuted) + +#define GKO_DECLARE_DENSE_COL_SCALE_PERMUTE_KERNEL(ValueType, IndexType) \ + void col_scale_permute(std::shared_ptr exec, \ + const ValueType* scale, \ + const IndexType* permutation_indices, \ + matrix::view::dense orig, \ + matrix::view::dense permuted) + +#define GKO_DECLARE_DENSE_INV_SYMM_SCALE_PERMUTE_KERNEL(ValueType, IndexType) \ + void inv_symm_scale_permute(std::shared_ptr exec, \ + const ValueType* scale, \ + const IndexType* permutation_indices, \ + matrix::view::dense orig, \ + matrix::view::dense permuted) + +#define GKO_DECLARE_DENSE_INV_ROW_SCALE_PERMUTE_KERNEL(ValueType, IndexType) \ + void inv_row_scale_permute(std::shared_ptr exec, \ + const ValueType* scale, \ + const IndexType* permutation_indices, \ + matrix::view::dense orig, \ + matrix::view::dense permuted) + +#define GKO_DECLARE_DENSE_INV_COL_SCALE_PERMUTE_KERNEL(ValueType, IndexType) \ + void inv_col_scale_permute(std::shared_ptr exec, \ + const ValueType* scale, \ + const IndexType* permutation_indices, \ + matrix::view::dense orig, \ + matrix::view::dense permuted) + +#define GKO_DECLARE_DENSE_NONSYMM_SCALE_PERMUTE_KERNEL(ValueType, IndexType) \ + void nonsymm_scale_permute(std::shared_ptr exec, \ + const ValueType* row_scale, \ + const IndexType* row_permutation_indices, \ + const ValueType* column_scale, \ + const IndexType* column_permutation_indices, \ + matrix::view::dense orig, \ + matrix::view::dense permuted) + +#define GKO_DECLARE_DENSE_INV_NONSYMM_SCALE_PERMUTE_KERNEL(ValueType, \ + IndexType) \ void inv_nonsymm_scale_permute( \ - std::shared_ptr exec, const _vtype* row_scale, \ - const _itype* row_permutation_indices, const _vtype* column_scale, \ - const _itype* column_permutation_indices, \ - const matrix::Dense<_vtype>* orig, matrix::Dense<_vtype>* permuted) - -#define GKO_DECLARE_DENSE_SYMM_PERMUTE_KERNEL(_vtype, _itype) \ - void symm_permute(std::shared_ptr exec, \ - const _itype* permutation_indices, \ - const matrix::Dense<_vtype>* orig, \ - matrix::Dense<_vtype>* permuted) - -#define GKO_DECLARE_DENSE_INV_SYMM_PERMUTE_KERNEL(_vtype, _itype) \ - void inv_symm_permute(std::shared_ptr exec, \ - const _itype* permutation_indices, \ - const matrix::Dense<_vtype>* orig, \ - matrix::Dense<_vtype>* permuted) - -#define GKO_DECLARE_DENSE_NONSYMM_PERMUTE_KERNEL(_vtype, _itype) \ - void nonsymm_permute(std::shared_ptr exec, \ - const _itype* row_permutation_indices, \ - const _itype* column_permutation_indices, \ - const matrix::Dense<_vtype>* orig, \ - matrix::Dense<_vtype>* permuted) - -#define GKO_DECLARE_DENSE_INV_NONSYMM_PERMUTE_KERNEL(_vtype, _itype) \ - void inv_nonsymm_permute(std::shared_ptr exec, \ - const _itype* row_permutation_indices, \ - const _itype* column_permutation_indices, \ - const matrix::Dense<_vtype>* orig, \ - matrix::Dense<_vtype>* permuted) - -#define GKO_DECLARE_DENSE_ROW_GATHER_KERNEL(_vtype, _otype, _itype) \ - void row_gather(std::shared_ptr exec, \ - const _itype* gather_indices, \ - const matrix::Dense<_vtype>* orig, \ - matrix::Dense<_otype>* row_collection) - -#define GKO_DECLARE_DENSE_ADVANCED_ROW_GATHER_KERNEL(_vtype, _otype, _itype) \ - void advanced_row_gather( \ std::shared_ptr exec, \ - const matrix::Dense<_vtype>* alpha, const _itype* gather_indices, \ - const matrix::Dense<_vtype>* orig, const matrix::Dense<_vtype>* beta, \ - matrix::Dense<_otype>* row_collection) - -#define GKO_DECLARE_DENSE_COL_PERMUTE_KERNEL(_vtype, _itype) \ - void col_permute(std::shared_ptr exec, \ - const _itype* permutation_indices, \ - const matrix::Dense<_vtype>* orig, \ - matrix::Dense<_vtype>* col_permuted) - -#define GKO_DECLARE_DENSE_INV_ROW_PERMUTE_KERNEL(_vtype, _itype) \ - void inv_row_permute(std::shared_ptr exec, \ - const _itype* permutation_indices, \ - const matrix::Dense<_vtype>* orig, \ - matrix::Dense<_vtype>* row_permuted) - -#define GKO_DECLARE_DENSE_INV_COL_PERMUTE_KERNEL(_vtype, _itype) \ - void inv_col_permute(std::shared_ptr exec, \ - const _itype* permutation_indices, \ - const matrix::Dense<_vtype>* orig, \ - matrix::Dense<_vtype>* col_permuted) - -#define GKO_DECLARE_DENSE_EXTRACT_DIAGONAL_KERNEL(_vtype) \ + const ValueType* row_scale, const IndexType* row_permutation_indices, \ + const ValueType* column_scale, \ + const IndexType* column_permutation_indices, \ + matrix::view::dense orig, \ + matrix::view::dense permuted) + +#define GKO_DECLARE_DENSE_SYMM_PERMUTE_KERNEL(ValueType, IndexType) \ + void symm_permute(std::shared_ptr exec, \ + const IndexType* permutation_indices, \ + matrix::view::dense orig, \ + matrix::view::dense permuted) + +#define GKO_DECLARE_DENSE_INV_SYMM_PERMUTE_KERNEL(ValueType, IndexType) \ + void inv_symm_permute(std::shared_ptr exec, \ + const IndexType* permutation_indices, \ + matrix::view::dense orig, \ + matrix::view::dense permuted) + +#define GKO_DECLARE_DENSE_NONSYMM_PERMUTE_KERNEL(ValueType, IndexType) \ + void nonsymm_permute(std::shared_ptr exec, \ + const IndexType* row_permutation_indices, \ + const IndexType* column_permutation_indices, \ + matrix::view::dense orig, \ + matrix::view::dense permuted) + +#define GKO_DECLARE_DENSE_INV_NONSYMM_PERMUTE_KERNEL(ValueType, IndexType) \ + void inv_nonsymm_permute(std::shared_ptr exec, \ + const IndexType* row_permutation_indices, \ + const IndexType* column_permutation_indices, \ + matrix::view::dense orig, \ + matrix::view::dense permuted) + +#define GKO_DECLARE_DENSE_ROW_GATHER_KERNEL(ValueType, OutputType, IndexType) \ + void row_gather(std::shared_ptr exec, \ + const IndexType* gather_indices, \ + matrix::view::dense orig, \ + matrix::view::dense row_collection) + +#define GKO_DECLARE_DENSE_ADVANCED_ROW_GATHER_KERNEL(ValueType, OutputType, \ + IndexType) \ + void advanced_row_gather(std::shared_ptr exec, \ + matrix::view::dense alpha, \ + const IndexType* gather_indices, \ + matrix::view::dense orig, \ + matrix::view::dense beta, \ + matrix::view::dense row_collection) + +#define GKO_DECLARE_DENSE_COL_PERMUTE_KERNEL(ValueType, IndexType) \ + void col_permute(std::shared_ptr exec, \ + const IndexType* permutation_indices, \ + matrix::view::dense orig, \ + matrix::view::dense col_permuted) + +#define GKO_DECLARE_DENSE_INV_ROW_PERMUTE_KERNEL(ValueType, IndexType) \ + void inv_row_permute(std::shared_ptr exec, \ + const IndexType* permutation_indices, \ + matrix::view::dense orig, \ + matrix::view::dense row_permuted) + +#define GKO_DECLARE_DENSE_INV_COL_PERMUTE_KERNEL(ValueType, IndexType) \ + void inv_col_permute(std::shared_ptr exec, \ + const IndexType* permutation_indices, \ + matrix::view::dense orig, \ + matrix::view::dense col_permuted) + +#define GKO_DECLARE_DENSE_EXTRACT_DIAGONAL_KERNEL(ValueType) \ void extract_diagonal(std::shared_ptr exec, \ - const matrix::Dense<_vtype>* orig, \ - matrix::Diagonal<_vtype>* diag) + matrix::view::dense orig, \ + matrix::Diagonal* diag) -#define GKO_DECLARE_INPLACE_ABSOLUTE_DENSE_KERNEL(_vtype) \ +#define GKO_DECLARE_INPLACE_ABSOLUTE_DENSE_KERNEL(ValueType) \ void inplace_absolute_dense(std::shared_ptr exec, \ - matrix::Dense<_vtype>* source) + matrix::view::dense source) -#define GKO_DECLARE_OUTPLACE_ABSOLUTE_DENSE_KERNEL(_vtype) \ - void outplace_absolute_dense( \ - std::shared_ptr exec, \ - const matrix::Dense<_vtype>* source, \ - matrix::Dense>* result) +#define GKO_DECLARE_OUTPLACE_ABSOLUTE_DENSE_KERNEL(ValueType) \ + void outplace_absolute_dense( \ + std::shared_ptr exec, \ + matrix::view::dense source, \ + matrix::view::dense> result) -#define GKO_DECLARE_MAKE_COMPLEX_KERNEL(_vtype) \ +#define GKO_DECLARE_MAKE_COMPLEX_KERNEL(ValueType) \ void make_complex(std::shared_ptr exec, \ - const matrix::Dense<_vtype>* source, \ - matrix::Dense>* result) + matrix::view::dense source, \ + matrix::view::dense> result) -#define GKO_DECLARE_GET_REAL_KERNEL(_vtype) \ +#define GKO_DECLARE_GET_REAL_KERNEL(ValueType) \ void get_real(std::shared_ptr exec, \ - const matrix::Dense<_vtype>* source, \ - matrix::Dense>* result) + matrix::view::dense source, \ + matrix::view::dense> result) -#define GKO_DECLARE_GET_IMAG_KERNEL(_vtype) \ +#define GKO_DECLARE_GET_IMAG_KERNEL(ValueType) \ void get_imag(std::shared_ptr exec, \ - const matrix::Dense<_vtype>* source, \ - matrix::Dense>* result) - -#define GKO_DECLARE_DENSE_ADD_SCALED_IDENTITY_KERNEL(_vtype, _scalar_type) \ - void add_scaled_identity(std::shared_ptr exec, \ - const matrix::Dense<_scalar_type>* alpha, \ - const matrix::Dense<_scalar_type>* beta, \ - matrix::Dense<_vtype>* mtx) + matrix::view::dense source, \ + matrix::view::dense> result) + +#define GKO_DECLARE_DENSE_ADD_SCALED_IDENTITY_KERNEL(ValueType, ScalarType) \ + void add_scaled_identity(std::shared_ptr exec, \ + matrix::view::dense alpha, \ + matrix::view::dense beta, \ + matrix::view::dense mtx) #define GKO_DECLARE_ALL_AS_TEMPLATES \ diff --git a/core/matrix/diagonal.cpp b/core/matrix/diagonal.cpp index cf798cae4c6..0968676abe8 100644 --- a/core/matrix/diagonal.cpp +++ b/core/matrix/diagonal.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -54,8 +54,9 @@ void Diagonal::apply_impl(const LinOp* b, LinOp* x) const } else { precision_dispatch_real_complex( [this, &exec](auto dense_b, auto dense_x) { - exec->run(diagonal::make_apply_to_dense(this, dense_b, dense_x, - false)); + exec->run(diagonal::make_apply_to_dense( + this, dense_b->get_const_device_view(), + dense_x->get_device_view(), false)); }, b, x); } @@ -80,8 +81,9 @@ void Diagonal::rapply_impl(const LinOp* b, LinOp* x) const // diagonal entries for the complex-to-real columns precision_dispatch( [this, &exec](auto dense_b, auto dense_x) { - exec->run(diagonal::make_right_apply_to_dense(this, dense_b, - dense_x)); + exec->run(diagonal::make_right_apply_to_dense( + this, dense_b->get_const_device_view(), + dense_x->get_device_view())); }, b, x); } @@ -106,8 +108,9 @@ void Diagonal::inverse_apply_impl(const LinOp* b, LinOp* x) const } else { precision_dispatch_real_complex( [this, &exec](auto dense_b, auto dense_x) { - exec->run(diagonal::make_apply_to_dense(this, dense_b, dense_x, - true)); + exec->run(diagonal::make_apply_to_dense( + this, dense_b->get_const_device_view(), + dense_x->get_device_view(), true)); }, b, x); } diff --git a/core/matrix/diagonal_kernels.hpp b/core/matrix/diagonal_kernels.hpp index 630c76e43ad..8ff3c51bfdc 100644 --- a/core/matrix/diagonal_kernels.hpp +++ b/core/matrix/diagonal_kernels.hpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -18,29 +18,29 @@ namespace gko { namespace kernels { -#define GKO_DECLARE_DIAGONAL_APPLY_TO_DENSE_KERNEL(value_type) \ +#define GKO_DECLARE_DIAGONAL_APPLY_TO_DENSE_KERNEL(ValueType) \ void apply_to_dense(std::shared_ptr exec, \ - const matrix::Diagonal* a, \ - const matrix::Dense* b, \ - matrix::Dense* c, bool inverse) + const matrix::Diagonal* a, \ + matrix::view::dense b, \ + matrix::view::dense c, bool inverse) -#define GKO_DECLARE_DIAGONAL_RIGHT_APPLY_TO_DENSE_KERNEL(value_type) \ +#define GKO_DECLARE_DIAGONAL_RIGHT_APPLY_TO_DENSE_KERNEL(ValueType) \ void right_apply_to_dense(std::shared_ptr exec, \ - const matrix::Diagonal* a, \ - const matrix::Dense* b, \ - matrix::Dense* c) - -#define GKO_DECLARE_DIAGONAL_APPLY_TO_CSR_KERNEL(value_type, index_type) \ - void apply_to_csr(std::shared_ptr exec, \ - const matrix::Diagonal* a, \ - const matrix::Csr* b, \ - matrix::Csr* c, bool inverse) - -#define GKO_DECLARE_DIAGONAL_RIGHT_APPLY_TO_CSR_KERNEL(value_type, index_type) \ - void right_apply_to_csr(std::shared_ptr exec, \ - const matrix::Diagonal* a, \ - const matrix::Csr* b, \ - matrix::Csr* c) + const matrix::Diagonal* a, \ + matrix::view::dense b, \ + matrix::view::dense c) + +#define GKO_DECLARE_DIAGONAL_APPLY_TO_CSR_KERNEL(ValueType, IndexType) \ + void apply_to_csr(std::shared_ptr exec, \ + const matrix::Diagonal* a, \ + const matrix::Csr* b, \ + matrix::Csr* c, bool inverse) + +#define GKO_DECLARE_DIAGONAL_RIGHT_APPLY_TO_CSR_KERNEL(ValueType, IndexType) \ + void right_apply_to_csr(std::shared_ptr exec, \ + const matrix::Diagonal* a, \ + const matrix::Csr* b, \ + matrix::Csr* c) #define GKO_DECLARE_DIAGONAL_FILL_IN_MATRIX_DATA_KERNEL(ValueType, IndexType) \ void fill_in_matrix_data( \ diff --git a/core/matrix/ell.cpp b/core/matrix/ell.cpp index 25ec20acb77..c932cb3c8b3 100644 --- a/core/matrix/ell.cpp +++ b/core/matrix/ell.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -130,7 +130,9 @@ void Ell::apply_impl(const LinOp* b, LinOp* x) const { mixed_precision_dispatch_real_complex( [this](auto dense_b, auto dense_x) { - this->get_executor()->run(ell::make_spmv(this, dense_b, dense_x)); + this->get_executor()->run( + ell::make_spmv(this, dense_b->get_const_device_view(), + dense_x->get_device_view())); }, b, x); } @@ -145,8 +147,11 @@ void Ell::apply_impl(const LinOp* alpha, const LinOp* b, auto dense_alpha = make_temporary_conversion(alpha); auto dense_beta = make_temporary_conversion< typename std::decay_t::value_type>(beta); - this->get_executor()->run(ell::make_advanced_spmv( - dense_alpha.get(), this, dense_b, dense_beta.get(), dense_x)); + this->get_executor()->run( + ell::make_advanced_spmv(dense_alpha->get_const_device_view(), + this, dense_b->get_const_device_view(), + dense_beta->get_const_device_view(), + dense_x->get_device_view())); }, b, x); } @@ -223,7 +228,7 @@ void Ell::convert_to(Dense* result) const auto tmp_result = make_temporary_output_clone(exec, result); tmp_result->resize(this->get_size()); tmp_result->fill(zero()); - exec->run(ell::make_fill_in_dense(this, tmp_result.get())); + exec->run(ell::make_fill_in_dense(this, tmp_result->get_device_view())); } diff --git a/core/matrix/ell_kernels.hpp b/core/matrix/ell_kernels.hpp index 7b88507d650..b7676e13fb3 100644 --- a/core/matrix/ell_kernels.hpp +++ b/core/matrix/ell_kernels.hpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -22,17 +22,17 @@ namespace kernels { OutputValueType, IndexType) \ void spmv(std::shared_ptr exec, \ const matrix::Ell* a, \ - const matrix::Dense* b, \ - matrix::Dense* c) + matrix::view::dense b, \ + matrix::view::dense c) #define GKO_DECLARE_ELL_ADVANCED_SPMV_KERNEL(InputValueType, MatrixValueType, \ OutputValueType, IndexType) \ void advanced_spmv(std::shared_ptr exec, \ - const matrix::Dense* alpha, \ + matrix::view::dense alpha, \ const matrix::Ell* a, \ - const matrix::Dense* b, \ - const matrix::Dense* beta, \ - matrix::Dense* c) + matrix::view::dense b, \ + matrix::view::dense beta, \ + matrix::view::dense c) #define GKO_DECLARE_ELL_COMPUTE_MAX_ROW_NNZ_KERNEL(IndexType) \ void compute_max_row_nnz(std::shared_ptr exec, \ @@ -48,7 +48,7 @@ namespace kernels { #define GKO_DECLARE_ELL_FILL_IN_DENSE_KERNEL(ValueType, IndexType) \ void fill_in_dense(std::shared_ptr exec, \ const matrix::Ell* source, \ - matrix::Dense* result) + matrix::view::dense result) #define GKO_DECLARE_ELL_COPY_KERNEL(ValueType, IndexType) \ void copy(std::shared_ptr exec, \ diff --git a/core/matrix/fbcsr.cpp b/core/matrix/fbcsr.cpp index 7926e9e9054..9d0e4a403d5 100644 --- a/core/matrix/fbcsr.cpp +++ b/core/matrix/fbcsr.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -113,7 +113,8 @@ void Fbcsr::apply_impl(const LinOp* b, LinOp* x) const precision_dispatch_real_complex( [this](auto dense_b, auto dense_x) { this->get_executor()->run( - fbcsr::make_spmv(this, dense_b, dense_x)); + fbcsr::make_spmv(this, dense_b->get_const_device_view(), + dense_x->get_device_view())); }, b, x); } @@ -136,7 +137,10 @@ void Fbcsr::apply_impl(const LinOp* alpha, const LinOp* b, [this](auto dense_alpha, auto dense_b, auto dense_beta, auto dense_x) { this->get_executor()->run(fbcsr::make_advanced_spmv( - dense_alpha, this, dense_b, dense_beta, dense_x)); + dense_alpha->get_const_device_view(), this, + dense_b->get_const_device_view(), + dense_beta->get_const_device_view(), + dense_x->get_device_view())); }, alpha, b, beta, x); } @@ -217,7 +221,7 @@ void Fbcsr::convert_to(Dense* result) const auto tmp_result = make_temporary_output_clone(exec, result); tmp_result->resize(this->get_size()); tmp_result->fill(zero()); - exec->run(fbcsr::make_fill_in_dense(this, tmp_result.get())); + exec->run(fbcsr::make_fill_in_dense(this, tmp_result->get_device_view())); } @@ -377,7 +381,7 @@ bool Fbcsr::is_sorted_by_column_index() const { auto exec = this->get_executor(); bool is_sorted; - exec->run(fbcsr::make_is_sorted_by_column_index(this, &is_sorted)); + exec->run(fbcsr::make_is_sorted_by_column_index(this, is_sorted)); return is_sorted; } diff --git a/core/matrix/fbcsr_kernels.hpp b/core/matrix/fbcsr_kernels.hpp index 7a644d48d78..9b25085b3b3 100644 --- a/core/matrix/fbcsr_kernels.hpp +++ b/core/matrix/fbcsr_kernels.hpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -24,15 +24,16 @@ namespace kernels { #define GKO_DECLARE_FBCSR_SPMV_KERNEL(ValueType, IndexType) \ void spmv(std::shared_ptr exec, \ const matrix::Fbcsr* a, \ - const matrix::Dense* b, matrix::Dense* c) + matrix::view::dense b, \ + matrix::view::dense c) #define GKO_DECLARE_FBCSR_ADVANCED_SPMV_KERNEL(ValueType, IndexType) \ void advanced_spmv(std::shared_ptr exec, \ - const matrix::Dense* alpha, \ + matrix::view::dense alpha, \ const matrix::Fbcsr* a, \ - const matrix::Dense* b, \ - const matrix::Dense* beta, \ - matrix::Dense* c) + matrix::view::dense b, \ + matrix::view::dense beta, \ + matrix::view::dense c) #define GKO_DECLARE_FBCSR_FILL_IN_MATRIX_DATA_KERNEL(ValueType, IndexType) \ void fill_in_matrix_data(std::shared_ptr exec, \ @@ -44,7 +45,7 @@ namespace kernels { #define GKO_DECLARE_FBCSR_FILL_IN_DENSE_KERNEL(ValueType, IndexType) \ void fill_in_dense(std::shared_ptr exec, \ const matrix::Fbcsr* source, \ - matrix::Dense* result) + matrix::view::dense result) #define GKO_DECLARE_FBCSR_CONVERT_TO_CSR_KERNEL(ValueType, IndexType) \ void convert_to_csr(std::shared_ptr exec, \ @@ -68,7 +69,7 @@ namespace kernels { #define GKO_DECLARE_FBCSR_IS_SORTED_BY_COLUMN_INDEX(ValueType, IndexType) \ void is_sorted_by_column_index( \ std::shared_ptr exec, \ - const matrix::Fbcsr* to_check, bool* is_sorted) + const matrix::Fbcsr* to_check, bool& is_sorted) #define GKO_DECLARE_FBCSR_EXTRACT_DIAGONAL(ValueType, IndexType) \ void extract_diagonal(std::shared_ptr exec, \ diff --git a/core/matrix/fft.cpp b/core/matrix/fft.cpp index cd6f20c1edc..268ce16c48a 100644 --- a/core/matrix/fft.cpp +++ b/core/matrix/fft.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -154,11 +154,15 @@ void Fft::apply_impl(const LinOp* b, LinOp* x) const { if (auto float_b = dynamic_cast>*>(b)) { auto dense_x = as>>(x); - get_executor()->run(fft::make_fft(float_b, dense_x, inverse_, buffer_)); + get_executor()->run(fft::make_fft(float_b->get_const_device_view(), + dense_x->get_device_view(), inverse_, + buffer_)); } else { auto dense_b = as>>(b); auto dense_x = as>>(x); - get_executor()->run(fft::make_fft(dense_b, dense_x, inverse_, buffer_)); + get_executor()->run(fft::make_fft(dense_b->get_const_device_view(), + dense_x->get_device_view(), inverse_, + buffer_)); } } @@ -247,13 +251,15 @@ void Fft2::apply_impl(const LinOp* b, LinOp* x) const { if (auto float_b = dynamic_cast>*>(b)) { auto dense_x = as>>(x); - get_executor()->run(fft::make_fft2(float_b, dense_x, fft_size_[0], - fft_size_[1], inverse_, buffer_)); + get_executor()->run(fft::make_fft2( + float_b->get_const_device_view(), dense_x->get_device_view(), + fft_size_[0], fft_size_[1], inverse_, buffer_)); } else { auto dense_b = as>>(b); auto dense_x = as>>(x); - get_executor()->run(fft::make_fft2(dense_b, dense_x, fft_size_[0], - fft_size_[1], inverse_, buffer_)); + get_executor()->run(fft::make_fft2( + dense_b->get_const_device_view(), dense_x->get_device_view(), + fft_size_[0], fft_size_[1], inverse_, buffer_)); } } @@ -358,15 +364,15 @@ void Fft3::apply_impl(const LinOp* b, LinOp* x) const { if (auto float_b = dynamic_cast>*>(b)) { auto dense_x = as>>(x); - get_executor()->run(fft::make_fft3(float_b, dense_x, fft_size_[0], - fft_size_[1], fft_size_[2], inverse_, - buffer_)); + get_executor()->run(fft::make_fft3( + float_b->get_const_device_view(), dense_x->get_device_view(), + fft_size_[0], fft_size_[1], fft_size_[2], inverse_, buffer_)); } else { auto dense_b = as>>(b); auto dense_x = as>>(x); - get_executor()->run(fft::make_fft3(dense_b, dense_x, fft_size_[0], - fft_size_[1], fft_size_[2], inverse_, - buffer_)); + get_executor()->run(fft::make_fft3( + dense_b->get_const_device_view(), dense_x->get_device_view(), + fft_size_[0], fft_size_[1], fft_size_[2], inverse_, buffer_)); } } diff --git a/core/matrix/fft_kernels.hpp b/core/matrix/fft_kernels.hpp index bd0e231c394..18a27983f19 100644 --- a/core/matrix/fft_kernels.hpp +++ b/core/matrix/fft_kernels.hpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -15,23 +15,23 @@ namespace gko { namespace kernels { -#define GKO_DECLARE_FFT_KERNEL(ValueType) \ - void fft(std::shared_ptr exec, \ - const matrix::Dense>* b, \ - matrix::Dense>* x, bool inverse, \ +#define GKO_DECLARE_FFT_KERNEL(ValueType) \ + void fft(std::shared_ptr exec, \ + matrix::view::dense> b, \ + matrix::view::dense> x, bool inverse, \ array& buffer) -#define GKO_DECLARE_FFT2_KERNEL(ValueType) \ - void fft2(std::shared_ptr exec, \ - const matrix::Dense>* b, \ - matrix::Dense>* x, size_type size1, \ +#define GKO_DECLARE_FFT2_KERNEL(ValueType) \ + void fft2(std::shared_ptr exec, \ + matrix::view::dense> b, \ + matrix::view::dense> x, size_type size1, \ size_type size2, bool inverse, array& buffer) -#define GKO_DECLARE_FFT3_KERNEL(ValueType) \ - void fft3(std::shared_ptr exec, \ - const matrix::Dense>* b, \ - matrix::Dense>* x, size_type size1, \ - size_type size2, size_type size3, bool inverse, \ +#define GKO_DECLARE_FFT3_KERNEL(ValueType) \ + void fft3(std::shared_ptr exec, \ + matrix::view::dense> b, \ + matrix::view::dense> x, size_type size1, \ + size_type size2, size_type size3, bool inverse, \ array& buffer) diff --git a/core/matrix/hybrid.cpp b/core/matrix/hybrid.cpp index 8d68afbcfe8..5681f943db3 100644 --- a/core/matrix/hybrid.cpp +++ b/core/matrix/hybrid.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -275,10 +275,10 @@ void Hybrid::convert_to(Dense* result) const result->resize(this->get_size()); result->fill(zero()); auto result_local = make_temporary_clone(exec, result); - exec->run( - hybrid::make_ell_fill_in_dense(this->get_ell(), result_local.get())); - exec->run( - hybrid::make_coo_fill_in_dense(this->get_coo(), result_local.get())); + exec->run(hybrid::make_ell_fill_in_dense(this->get_ell(), + result_local->get_device_view())); + exec->run(hybrid::make_coo_fill_in_dense(this->get_coo(), + result_local->get_device_view())); } diff --git a/core/matrix/identity.cpp b/core/matrix/identity.cpp index 7e035be82a3..9e5a530559b 100644 --- a/core/matrix/identity.cpp +++ b/core/matrix/identity.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -82,9 +82,9 @@ std::unique_ptr> Identity::create( } -#define GKO_DECLARE_IDENTITY_MATRIX(_type) class Identity<_type> +#define GKO_DECLARE_IDENTITY_MATRIX(ValueType) class Identity GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_IDENTITY_MATRIX); -#define GKO_DECLARE_IDENTITY_FACTORY(_type) class IdentityFactory<_type> +#define GKO_DECLARE_IDENTITY_FACTORY(ValueType) class IdentityFactory GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_IDENTITY_FACTORY); diff --git a/core/matrix/permutation.cpp b/core/matrix/permutation.cpp index 1b409ca6625..3ac74472e71 100644 --- a/core/matrix/permutation.cpp +++ b/core/matrix/permutation.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -303,7 +303,7 @@ void Permutation::apply_impl(const LinOp* alpha, const LinOp* in, } -#define GKO_DECLARE_PERMUTATION_MATRIX(_type) class Permutation<_type> +#define GKO_DECLARE_PERMUTATION_MATRIX(ValueType) class Permutation GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_PERMUTATION_MATRIX); diff --git a/core/matrix/row_gatherer.cpp b/core/matrix/row_gatherer.cpp index 3b020745ac6..f9169856a42 100644 --- a/core/matrix/row_gatherer.cpp +++ b/core/matrix/row_gatherer.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -93,7 +93,7 @@ void RowGatherer::apply_impl(const LinOp* alpha, const LinOp* in, } -#define GKO_DECLARE_ROWGATHERER_MATRIX(_type) class RowGatherer<_type> +#define GKO_DECLARE_ROWGATHERER_MATRIX(ValueType) class RowGatherer GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_ROWGATHERER_MATRIX); diff --git a/core/matrix/sellp.cpp b/core/matrix/sellp.cpp index d57f40bc64b..da505f45762 100644 --- a/core/matrix/sellp.cpp +++ b/core/matrix/sellp.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -155,7 +155,9 @@ void Sellp::apply_impl(const LinOp* b, LinOp* x) const { precision_dispatch_real_complex( [this](auto dense_b, auto dense_x) { - this->get_executor()->run(sellp::make_spmv(this, dense_b, dense_x)); + this->get_executor()->run( + sellp::make_spmv(this, dense_b->get_const_device_view(), + dense_x->get_device_view())); }, b, x); } @@ -168,7 +170,10 @@ void Sellp::apply_impl(const LinOp* alpha, const LinOp* b, precision_dispatch_real_complex( [this](auto dense_alpha, auto dense_b, auto dense_beta, auto dense_x) { this->get_executor()->run(sellp::make_advanced_spmv( - dense_alpha, this, dense_b, dense_beta, dense_x)); + dense_alpha->get_const_device_view(), this, + dense_b->get_const_device_view(), + dense_beta->get_const_device_view(), + dense_x->get_device_view())); }, alpha, b, beta, x); } @@ -251,7 +256,7 @@ void Sellp::convert_to(Dense* result) const auto tmp_result = make_temporary_output_clone(exec, result); tmp_result->resize(this->get_size()); tmp_result->fill(zero()); - exec->run(sellp::make_fill_in_dense(this, tmp_result.get())); + exec->run(sellp::make_fill_in_dense(this, tmp_result->get_device_view())); } diff --git a/core/matrix/sellp_kernels.hpp b/core/matrix/sellp_kernels.hpp index fb4706039fb..7b46c719ec1 100644 --- a/core/matrix/sellp_kernels.hpp +++ b/core/matrix/sellp_kernels.hpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -21,15 +21,16 @@ namespace kernels { #define GKO_DECLARE_SELLP_SPMV_KERNEL(ValueType, IndexType) \ void spmv(std::shared_ptr exec, \ const matrix::Sellp* a, \ - const matrix::Dense* b, matrix::Dense* c) + matrix::view::dense b, \ + matrix::view::dense c) #define GKO_DECLARE_SELLP_ADVANCED_SPMV_KERNEL(ValueType, IndexType) \ void advanced_spmv(std::shared_ptr exec, \ - const matrix::Dense* alpha, \ + matrix::view::dense alpha, \ const matrix::Sellp* a, \ - const matrix::Dense* b, \ - const matrix::Dense* beta, \ - matrix::Dense* c) + matrix::view::dense b, \ + matrix::view::dense beta, \ + matrix::view::dense c) #define GKO_DECLARE_SELLP_FILL_IN_MATRIX_DATA_KERNEL(ValueType, IndexType) \ void fill_in_matrix_data( \ @@ -46,7 +47,7 @@ namespace kernels { #define GKO_DECLARE_SELLP_FILL_IN_DENSE_KERNEL(ValueType, IndexType) \ void fill_in_dense(std::shared_ptr exec, \ const matrix::Sellp* source, \ - matrix::Dense* result) + matrix::view::dense result) #define GKO_DECLARE_SELLP_CONVERT_TO_CSR_KERNEL(ValueType, IndexType) \ void convert_to_csr(std::shared_ptr exec, \ diff --git a/core/matrix/sparsity_csr.cpp b/core/matrix/sparsity_csr.cpp index 9b8ea04da52..2fdb96b5418 100644 --- a/core/matrix/sparsity_csr.cpp +++ b/core/matrix/sparsity_csr.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -50,7 +50,8 @@ void SparsityCsr::apply_impl(const LinOp* b, mixed_precision_dispatch_real_complex( [this](auto dense_b, auto dense_x) { this->get_executor()->run( - sparsity_csr::make_spmv(this, dense_b, dense_x)); + sparsity_csr::make_spmv(this, dense_b->get_const_device_view(), + dense_x->get_device_view())); }, b, x); } @@ -68,7 +69,10 @@ void SparsityCsr::apply_impl(const LinOp* alpha, auto dense_beta = make_temporary_conversion< typename std::decay_t::value_type>(beta); this->get_executor()->run(sparsity_csr::make_advanced_spmv( - dense_alpha.get(), this, dense_b, dense_beta.get(), dense_x)); + dense_alpha->get_const_device_view(), this, + dense_b->get_const_device_view(), + dense_beta->get_const_device_view(), + dense_x->get_device_view())); }, b, x); } @@ -222,7 +226,8 @@ void SparsityCsr::convert_to( auto tmp_result = make_temporary_output_clone(exec, result); tmp_result->resize(this->get_size()); tmp_result->fill(zero()); - exec->run(sparsity_csr::make_fill_in_dense(this, tmp_result.get())); + exec->run( + sparsity_csr::make_fill_in_dense(this, tmp_result->get_device_view())); } @@ -339,7 +344,7 @@ bool SparsityCsr::is_sorted_by_column_index() const { auto exec = this->get_executor(); bool is_sorted; - exec->run(sparsity_csr::make_is_sorted_by_column_index(this, &is_sorted)); + exec->run(sparsity_csr::make_is_sorted_by_column_index(this, is_sorted)); return is_sorted; } diff --git a/core/matrix/sparsity_csr_kernels.hpp b/core/matrix/sparsity_csr_kernels.hpp index e07bb980dce..a7b3ef8f6d9 100644 --- a/core/matrix/sparsity_csr_kernels.hpp +++ b/core/matrix/sparsity_csr_kernels.hpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -21,23 +21,23 @@ namespace kernels { OutputValueType, IndexType) \ void spmv(std::shared_ptr exec, \ const matrix::SparsityCsr* a, \ - const matrix::Dense* b, \ - matrix::Dense* c) + matrix::view::dense b, \ + matrix::view::dense c) #define GKO_DECLARE_SPARSITY_CSR_ADVANCED_SPMV_KERNEL( \ MatrixValueType, InputValueType, OutputValueType, IndexType) \ void advanced_spmv( \ std::shared_ptr exec, \ - const matrix::Dense* alpha, \ + matrix::view::dense alpha, \ const matrix::SparsityCsr* a, \ - const matrix::Dense* b, \ - const matrix::Dense* beta, \ - matrix::Dense* c) + matrix::view::dense b, \ + matrix::view::dense beta, \ + matrix::view::dense c) #define GKO_DECLARE_SPARSITY_CSR_FILL_IN_DENSE_KERNEL(ValueType, IndexType) \ void fill_in_dense(std::shared_ptr exec, \ const matrix::SparsityCsr* input, \ - matrix::Dense* output) + matrix::view::dense output) #define GKO_DECLARE_SPARSITY_CSR_REMOVE_DIAGONAL_ELEMENTS_KERNEL(ValueType, \ IndexType) \ @@ -69,7 +69,7 @@ namespace kernels { void is_sorted_by_column_index( \ std::shared_ptr exec, \ const matrix::SparsityCsr* to_check, \ - bool* is_sorted) + bool& is_sorted) #define GKO_DECLARE_ALL_AS_TEMPLATES \ template ::generate() } -#define GKO_DECLARE_FIXED_COARSENING(_vtype, _itype) \ - class FixedCoarsening<_vtype, _itype> +#define GKO_DECLARE_FIXED_COARSENING(ValueType, IndexType) \ + class FixedCoarsening GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_FIXED_COARSENING); diff --git a/core/multigrid/pgm.cpp b/core/multigrid/pgm.cpp index e40686ac479..72df5eddabf 100644 --- a/core/multigrid/pgm.cpp +++ b/core/multigrid/pgm.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -489,7 +489,7 @@ void Pgm::generate() } -#define GKO_DECLARE_PGM(_vtype, _itype) class Pgm<_vtype, _itype> +#define GKO_DECLARE_PGM(ValueType, IndexType) class Pgm GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_PGM); diff --git a/core/preconditioner/batch_jacobi.cpp b/core/preconditioner/batch_jacobi.cpp index e4382de38ec..f51cd0659ca 100644 --- a/core/preconditioner/batch_jacobi.cpp +++ b/core/preconditioner/batch_jacobi.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -174,7 +174,7 @@ void Jacobi::generate_precond( } -#define GKO_DECLARE_BATCH_JACOBI(_type) class Jacobi<_type, int32> +#define GKO_DECLARE_BATCH_JACOBI(ValueType) class Jacobi GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_JACOBI); diff --git a/core/preconditioner/isai.cpp b/core/preconditioner/isai.cpp index f33a21812db..d58c7647cf6 100644 --- a/core/preconditioner/isai.cpp +++ b/core/preconditioner/isai.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -219,7 +219,7 @@ void Isai::generate_inverse( to_invert.get(), inverted.get(), excess_block_ptrs.get_const_data(), excess_row_ptrs_full.get_const_data(), excess_system.get(), - excess_rhs.get(), excess_start, block)); + excess_rhs->get_device_view(), excess_start, block)); // solve it after transposing auto system_copy = gko::clone(exec->get_master(), excess_system); auto rhs_copy = gko::clone(exec->get_master(), excess_rhs); @@ -251,13 +251,14 @@ void Isai::generate_inverse( ->apply(excess_rhs, excess_solution); if (is_spd) { exec->run(isai::make_scale_excess_solution( - excess_block_ptrs.get_const_data(), excess_solution.get(), - excess_start, block)); + excess_block_ptrs.get_const_data(), + excess_solution->get_device_view(), excess_start, block)); } // and copy the results back to the original ISAI exec->run(isai::make_scatter_excess_solution( - excess_block_ptrs.get_const_data(), excess_solution.get(), - inverted.get(), excess_start, block)); + excess_block_ptrs.get_const_data(), + excess_solution->get_const_device_view(), inverted.get(), + excess_start, block)); } } diff --git a/core/preconditioner/isai_kernels.hpp b/core/preconditioner/isai_kernels.hpp index d1897251916..3286d21c9cb 100644 --- a/core/preconditioner/isai_kernels.hpp +++ b/core/preconditioner/isai_kernels.hpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -37,20 +37,20 @@ namespace kernels { const matrix::Csr* inverse, \ const IndexType* excess_rhs_ptrs, const IndexType* excess_nz_ptrs, \ matrix::Csr* excess_system, \ - matrix::Dense* excess_rhs, size_type e_start, \ + matrix::view::dense excess_rhs, size_type e_start, \ size_type e_end) -#define GKO_DECLARE_ISAI_SCALE_EXCESS_SOLUTION_KERNEL(ValueType, IndexType) \ - void scale_excess_solution(std::shared_ptr exec, \ - const IndexType* excess_block_ptrs, \ - matrix::Dense* excess_solution, \ +#define GKO_DECLARE_ISAI_SCALE_EXCESS_SOLUTION_KERNEL(ValueType, IndexType) \ + void scale_excess_solution(std::shared_ptr exec, \ + const IndexType* excess_block_ptrs, \ + matrix::view::dense excess_solution, \ size_type e_start, size_type e_end) #define GKO_DECLARE_ISAI_SCATTER_EXCESS_SOLUTION_KERNEL(ValueType, IndexType) \ void scatter_excess_solution( \ std::shared_ptr exec, \ const IndexType* excess_rhs_ptrs, \ - const matrix::Dense* excess_solution, \ + matrix::view::dense excess_solution, \ matrix::Csr* inverse, size_type e_start, \ size_type e_end) diff --git a/core/preconditioner/jacobi.cpp b/core/preconditioner/jacobi.cpp index a2cbdff793c..3b71ce70937 100644 --- a/core/preconditioner/jacobi.cpp +++ b/core/preconditioner/jacobi.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -153,12 +153,15 @@ void Jacobi::apply_impl(const LinOp* b, LinOp* x) const [this](auto dense_b, auto dense_x) { if (parameters_.max_block_size == 1) { this->get_executor()->run(jacobi::make_simple_scalar_apply( - this->blocks_, dense_b, dense_x)); + this->blocks_, dense_b->get_const_device_view(), + dense_x->get_device_view())); } else { this->get_executor()->run(jacobi::make_simple_apply( num_blocks_, parameters_.max_block_size, storage_scheme_, parameters_.storage_optimization.block_wise, - parameters_.block_pointers, blocks_, dense_b, dense_x)); + parameters_.block_pointers, blocks_, + dense_b->get_const_device_view(), + dense_x->get_device_view())); } }, b, x); @@ -174,13 +177,19 @@ void Jacobi::apply_impl(const LinOp* alpha, [this](auto dense_alpha, auto dense_b, auto dense_beta, auto dense_x) { if (parameters_.max_block_size == 1) { this->get_executor()->run(jacobi::make_scalar_apply( - this->blocks_, dense_alpha, dense_b, dense_beta, dense_x)); + this->blocks_, dense_alpha->get_const_device_view(), + dense_b->get_const_device_view(), + dense_beta->get_const_device_view(), + dense_x->get_device_view())); } else { this->get_executor()->run(jacobi::make_apply( num_blocks_, parameters_.max_block_size, storage_scheme_, parameters_.storage_optimization.block_wise, - parameters_.block_pointers, blocks_, dense_alpha, dense_b, - dense_beta, dense_x)); + parameters_.block_pointers, blocks_, + dense_alpha->get_const_device_view(), + dense_b->get_const_device_view(), + dense_beta->get_const_device_view(), + dense_x->get_device_view())); } }, alpha, b, beta, x); @@ -194,7 +203,8 @@ void Jacobi::convert_to( auto exec = this->get_executor(); auto tmp = matrix::Dense::create(exec, this->get_size()); if (parameters_.max_block_size == 1) { - exec->run(jacobi::make_scalar_convert_to_dense(blocks_, tmp.get())); + exec->run(jacobi::make_scalar_convert_to_dense(blocks_, + tmp->get_device_view())); } else { exec->run(jacobi::make_convert_to_dense( num_blocks_, parameters_.storage_optimization.block_wise, diff --git a/core/preconditioner/jacobi_kernels.hpp b/core/preconditioner/jacobi_kernels.hpp index 20fe5cf9564..f15dba2f57c 100644 --- a/core/preconditioner/jacobi_kernels.hpp +++ b/core/preconditioner/jacobi_kernels.hpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -44,23 +44,25 @@ namespace kernels { const array& diag, \ array& inv_diag) -#define GKO_DECLARE_JACOBI_APPLY_KERNEL(ValueType, IndexType) \ - void apply( \ - std::shared_ptr exec, size_type num_blocks, \ - uint32 max_block_size, \ - const preconditioner::block_interleaved_storage_scheme& \ - storage_scheme, \ - const array& block_precisions, \ - const array& block_pointers, \ - const array& blocks, const matrix::Dense* alpha, \ - const matrix::Dense* b, \ - const matrix::Dense* beta, matrix::Dense* x) +#define GKO_DECLARE_JACOBI_APPLY_KERNEL(ValueType, IndexType) \ + void apply( \ + std::shared_ptr exec, size_type num_blocks, \ + uint32 max_block_size, \ + const preconditioner::block_interleaved_storage_scheme& \ + storage_scheme, \ + const array& block_precisions, \ + const array& block_pointers, \ + const array& blocks, \ + matrix::view::dense alpha, \ + matrix::view::dense b, \ + matrix::view::dense beta, \ + matrix::view::dense x) #define GKO_DECLARE_JACOBI_SIMPLE_SCALAR_APPLY_KERNEL(ValueType) \ void simple_scalar_apply(std::shared_ptr exec, \ const array& diag, \ - const matrix::Dense* b, \ - matrix::Dense* x) + matrix::view::dense b, \ + matrix::view::dense x) #define GKO_DECLARE_JACOBI_SIMPLE_APPLY_KERNEL(ValueType, IndexType) \ void simple_apply( \ @@ -70,15 +72,17 @@ namespace kernels { storage_scheme, \ const array& block_precisions, \ const array& block_pointers, \ - const array& blocks, const matrix::Dense* b, \ - matrix::Dense* x) + const array& blocks, \ + matrix::view::dense b, \ + matrix::view::dense x) -#define GKO_DECLARE_JACOBI_SCALAR_APPLY_KERNEL(ValueType) \ - void scalar_apply( \ - std::shared_ptr exec, \ - const array& diag, const matrix::Dense* alpha, \ - const matrix::Dense* b, \ - const matrix::Dense* beta, matrix::Dense* x) +#define GKO_DECLARE_JACOBI_SCALAR_APPLY_KERNEL(ValueType) \ + void scalar_apply(std::shared_ptr exec, \ + const array& diag, \ + matrix::view::dense alpha, \ + matrix::view::dense b, \ + matrix::view::dense beta, \ + matrix::view::dense x) #define GKO_DECLARE_JACOBI_TRANSPOSE_KERNEL(ValueType, IndexType) \ void transpose_jacobi( \ @@ -105,7 +109,7 @@ namespace kernels { #define GKO_DECLARE_JACOBI_SCALAR_CONVERT_TO_DENSE_KERNEL(ValueType) \ void scalar_convert_to_dense(std::shared_ptr exec, \ const array& blocks, \ - matrix::Dense* result) + matrix::view::dense result) #define GKO_DECLARE_JACOBI_CONVERT_TO_DENSE_KERNEL(ValueType, IndexType) \ void convert_to_dense( \ diff --git a/core/preconditioner/sor_kernels.hpp b/core/preconditioner/sor_kernels.hpp index fbca3de612c..bf9721b07aa 100644 --- a/core/preconditioner/sor_kernels.hpp +++ b/core/preconditioner/sor_kernels.hpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -16,19 +16,21 @@ namespace gko { namespace kernels { -#define GKO_DECLARE_SOR_INITIALIZE_WEIGHTED_L(_vtype, _itype) \ - void initialize_weighted_l( \ - std::shared_ptr exec, \ - const matrix::Csr<_vtype, _itype>* system_matrix, \ - remove_complex<_vtype> weight, matrix::Csr<_vtype, _itype>* l_factor) +#define GKO_DECLARE_SOR_INITIALIZE_WEIGHTED_L(ValueType, IndexType) \ + void initialize_weighted_l( \ + std::shared_ptr exec, \ + const matrix::Csr* system_matrix, \ + remove_complex weight, \ + matrix::Csr* l_factor) -#define GKO_DECLARE_SOR_INITIALIZE_WEIGHTED_L_U(_vtype, _itype) \ - void initialize_weighted_l_u( \ - std::shared_ptr exec, \ - const matrix::Csr<_vtype, _itype>* system_matrix, \ - remove_complex<_vtype> weight, matrix::Csr<_vtype, _itype>* l_factor, \ - matrix::Csr<_vtype, _itype>* u_factor) +#define GKO_DECLARE_SOR_INITIALIZE_WEIGHTED_L_U(ValueType, IndexType) \ + void initialize_weighted_l_u( \ + std::shared_ptr exec, \ + const matrix::Csr* system_matrix, \ + remove_complex weight, \ + matrix::Csr* l_factor, \ + matrix::Csr* u_factor) #define GKO_DECLARE_ALL_AS_TEMPLATES \ diff --git a/core/solver/bicg.cpp b/core/solver/bicg.cpp index 8719d556eaf..52acbf5d477 100644 --- a/core/solver/bicg.cpp +++ b/core/solver/bicg.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -142,8 +142,12 @@ void Bicg::apply_dense_impl(const matrix::Dense* dense_b, // z = p = q = 0 // r = r2 = dense_b // z2 = p2 = q2 = 0 - exec->run(bicg::make_initialize(dense_b, r, z, p, q, prev_rho, rho, r2, z2, - p2, q2, &stop_status)); + exec->run(bicg::make_initialize( + dense_b->get_const_device_view(), r->get_device_view(), + z->get_device_view(), p->get_device_view(), q->get_device_view(), + prev_rho->get_device_view(), rho->get_device_view(), + r2->get_device_view(), z2->get_device_view(), p2->get_device_view(), + q2->get_device_view(), stop_status)); std::unique_ptr conj_trans_A; auto conj_transposable_system_matrix = @@ -212,7 +216,11 @@ void Bicg::apply_dense_impl(const matrix::Dense* dense_b, // tmp = rho / prev_rho // p = z + tmp * p // p2 = z2 + tmp * p2 - exec->run(bicg::make_step_1(p, z, p2, z2, rho, prev_rho, &stop_status)); + exec->run(bicg::make_step_1( + p->get_device_view(), z->get_const_device_view(), + p2->get_device_view(), z2->get_const_device_view(), + rho->get_const_device_view(), prev_rho->get_const_device_view(), + stop_status)); // q = A * p this->get_system_matrix()->apply(p, q); // q2 = A^T * p2 @@ -223,8 +231,12 @@ void Bicg::apply_dense_impl(const matrix::Dense* dense_b, // x = x + tmp * p // r = r - tmp * q // r2 = r2 - tmp * q2 - exec->run(bicg::make_step_2(dense_x, r, r2, p, q, q2, beta, rho, - &stop_status)); + exec->run(bicg::make_step_2( + dense_x->get_device_view(), r->get_device_view(), + r2->get_device_view(), p->get_const_device_view(), + q->get_const_device_view(), q2->get_const_device_view(), + beta->get_const_device_view(), rho->get_const_device_view(), + stop_status)); swap(prev_rho, rho); } } @@ -295,8 +307,9 @@ std::vector workspace_traits>::vectors(const Solver&) } -#define GKO_DECLARE_BICG(_type) class Bicg<_type> -#define GKO_DECLARE_BICG_TRAITS(_type) struct workspace_traits> +#define GKO_DECLARE_BICG(ValueType) class Bicg +#define GKO_DECLARE_BICG_TRAITS(ValueType) \ + struct workspace_traits> GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BICG); GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BICG_TRAITS); diff --git a/core/solver/bicg_kernels.hpp b/core/solver/bicg_kernels.hpp index 5e94d8ca350..7e1424ffe11 100644 --- a/core/solver/bicg_kernels.hpp +++ b/core/solver/bicg_kernels.hpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -22,34 +22,41 @@ namespace kernels { namespace bicg { -#define GKO_DECLARE_BICG_INITIALIZE_KERNEL(_type) \ - void initialize(std::shared_ptr exec, \ - const matrix::Dense<_type>* b, matrix::Dense<_type>* r, \ - matrix::Dense<_type>* z, matrix::Dense<_type>* p, \ - matrix::Dense<_type>* q, matrix::Dense<_type>* prev_rho, \ - matrix::Dense<_type>* rho, matrix::Dense<_type>* r2, \ - matrix::Dense<_type>* z2, matrix::Dense<_type>* p2, \ - matrix::Dense<_type>* q2, \ - array* stop_status) - - -#define GKO_DECLARE_BICG_STEP_1_KERNEL(_type) \ - void step_1(std::shared_ptr exec, \ - matrix::Dense<_type>* p, const matrix::Dense<_type>* z, \ - matrix::Dense<_type>* p2, const matrix::Dense<_type>* z2, \ - const matrix::Dense<_type>* rho, \ - const matrix::Dense<_type>* prev_rho, \ - const array* stop_status) - - -#define GKO_DECLARE_BICG_STEP_2_KERNEL(_type) \ - void step_2(std::shared_ptr exec, \ - matrix::Dense<_type>* x, matrix::Dense<_type>* r, \ - matrix::Dense<_type>* r2, const matrix::Dense<_type>* p, \ - const matrix::Dense<_type>* q, const matrix::Dense<_type>* q2, \ - const matrix::Dense<_type>* beta, \ - const matrix::Dense<_type>* rho, \ - const array* stop_status) +#define GKO_DECLARE_BICG_INITIALIZE_KERNEL(ValueType) \ + void initialize( \ + std::shared_ptr exec, \ + matrix::view::dense b, \ + matrix::view::dense r, matrix::view::dense z, \ + matrix::view::dense p, matrix::view::dense q, \ + matrix::view::dense prev_rho, \ + matrix::view::dense rho, matrix::view::dense r2, \ + matrix::view::dense z2, matrix::view::dense p2, \ + matrix::view::dense q2, \ + array& stop_status) + + +#define GKO_DECLARE_BICG_STEP_1_KERNEL(ValueType) \ + void step_1(std::shared_ptr exec, \ + matrix::view::dense p, \ + matrix::view::dense z, \ + matrix::view::dense p2, \ + matrix::view::dense z2, \ + matrix::view::dense rho, \ + matrix::view::dense prev_rho, \ + const array& stop_status) + + +#define GKO_DECLARE_BICG_STEP_2_KERNEL(ValueType) \ + void step_2(std::shared_ptr exec, \ + matrix::view::dense x, \ + matrix::view::dense r, \ + matrix::view::dense r2, \ + matrix::view::dense p, \ + matrix::view::dense q, \ + matrix::view::dense q2, \ + matrix::view::dense beta, \ + matrix::view::dense rho, \ + const array& stop_status) #define GKO_DECLARE_ALL_AS_TEMPLATES \ diff --git a/core/solver/bicgstab.cpp b/core/solver/bicgstab.cpp index 2b42982af8a..d4ff695dfc6 100644 --- a/core/solver/bicgstab.cpp +++ b/core/solver/bicgstab.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -128,12 +128,18 @@ void Bicgstab::apply_dense_impl(const VectorType* dense_b, // rr = v = s = t = z = y = p = 0 // stop_status = 0x00 exec->run(bicgstab::make_initialize( - gko::detail::get_local(dense_b), gko::detail::get_local(r), - gko::detail::get_local(rr), gko::detail::get_local(y), - gko::detail::get_local(s), gko::detail::get_local(t), - gko::detail::get_local(z), gko::detail::get_local(v), - gko::detail::get_local(p), prev_rho, rho, alpha, beta, gamma, omega, - &stop_status)); + gko::detail::get_local(dense_b)->get_const_device_view(), + gko::detail::get_local(r)->get_device_view(), + gko::detail::get_local(rr)->get_device_view(), + gko::detail::get_local(y)->get_device_view(), + gko::detail::get_local(s)->get_device_view(), + gko::detail::get_local(t)->get_device_view(), + gko::detail::get_local(z)->get_device_view(), + gko::detail::get_local(v)->get_device_view(), + gko::detail::get_local(p)->get_device_view(), + prev_rho->get_device_view(), rho->get_device_view(), + alpha->get_device_view(), beta->get_device_view(), + gamma->get_device_view(), omega->get_device_view(), stop_status)); // r = b - Ax this->get_system_matrix()->apply(neg_one_op, dense_x, one_op, r); @@ -176,10 +182,13 @@ void Bicgstab::apply_dense_impl(const VectorType* dense_b, // tmp = rho / prev_rho * alpha / omega // p = r + tmp * (p - omega * v) - exec->run(bicgstab::make_step_1(gko::detail::get_local(r), - gko::detail::get_local(p), - gko::detail::get_local(v), rho, - prev_rho, alpha, omega, &stop_status)); + exec->run(bicgstab::make_step_1( + gko::detail::get_local(r)->get_const_device_view(), + gko::detail::get_local(p)->get_device_view(), + gko::detail::get_local(v)->get_const_device_view(), + rho->get_const_device_view(), prev_rho->get_const_device_view(), + alpha->get_const_device_view(), omega->get_const_device_view(), + stop_status)); // y = preconditioner * p this->get_preconditioner()->apply(p, y); @@ -190,8 +199,11 @@ void Bicgstab::apply_dense_impl(const VectorType* dense_b, // alpha = rho / beta // s = r - alpha * v exec->run(bicgstab::make_step_2( - gko::detail::get_local(r), gko::detail::get_local(s), - gko::detail::get_local(v), rho, alpha, beta, &stop_status)); + gko::detail::get_local(r)->get_const_device_view(), + gko::detail::get_local(s)->get_device_view(), + gko::detail::get_local(v)->get_const_device_view(), + rho->get_const_device_view(), alpha->get_device_view(), + beta->get_const_device_view(), stop_status)); all_stopped = stop_criterion->update() @@ -201,9 +213,10 @@ void Bicgstab::apply_dense_impl(const VectorType* dense_b, // .solution(dense_x) // outdated at this point .check(RelativeStoppingId, false, &stop_status, &one_changed); if (one_changed) { - exec->run(bicgstab::make_finalize(gko::detail::get_local(dense_x), - gko::detail::get_local(y), alpha, - &stop_status)); + exec->run(bicgstab::make_finalize( + gko::detail::get_local(dense_x)->get_device_view(), + gko::detail::get_local(y)->get_const_device_view(), + alpha->get_const_device_view(), stop_status)); } this->template log( this, dense_b, dense_x, iter, r, nullptr, rho, &stop_status, @@ -224,10 +237,15 @@ void Bicgstab::apply_dense_impl(const VectorType* dense_b, // x = x + alpha * y + omega * z // r = s - omega * t exec->run(bicgstab::make_step_3( - gko::detail::get_local(dense_x), gko::detail::get_local(r), - gko::detail::get_local(s), gko::detail::get_local(t), - gko::detail::get_local(y), gko::detail::get_local(z), alpha, beta, - gamma, omega, &stop_status)); + gko::detail::get_local(dense_x)->get_device_view(), + gko::detail::get_local(r)->get_device_view(), + gko::detail::get_local(s)->get_const_device_view(), + gko::detail::get_local(t)->get_const_device_view(), + gko::detail::get_local(y)->get_const_device_view(), + gko::detail::get_local(z)->get_const_device_view(), + alpha->get_const_device_view(), beta->get_const_device_view(), + gamma->get_const_device_view(), omega->get_device_view(), + stop_status)); swap(prev_rho, rho); } } @@ -299,9 +317,9 @@ std::vector workspace_traits>::vectors(const Solver&) } -#define GKO_DECLARE_BICGSTAB(_type) class Bicgstab<_type> -#define GKO_DECLARE_BICGSTAB_TRAITS(_type) \ - struct workspace_traits> +#define GKO_DECLARE_BICGSTAB(ValueType) class Bicgstab +#define GKO_DECLARE_BICGSTAB_TRAITS(ValueType) \ + struct workspace_traits> GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BICGSTAB); GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BICGSTAB_TRAITS); diff --git a/core/solver/bicgstab_kernels.hpp b/core/solver/bicgstab_kernels.hpp index e3bfbdcdcb6..3916a59a7b5 100644 --- a/core/solver/bicgstab_kernels.hpp +++ b/core/solver/bicgstab_kernels.hpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -22,54 +22,67 @@ namespace kernels { namespace bicgstab { -#define GKO_DECLARE_BICGSTAB_INITIALIZE_KERNEL(_type) \ - void initialize(std::shared_ptr exec, \ - const matrix::Dense<_type>* b, matrix::Dense<_type>* r, \ - matrix::Dense<_type>* rr, matrix::Dense<_type>* y, \ - matrix::Dense<_type>* s, matrix::Dense<_type>* t, \ - matrix::Dense<_type>* z, matrix::Dense<_type>* v, \ - matrix::Dense<_type>* p, matrix::Dense<_type>* prev_rho, \ - matrix::Dense<_type>* rho, matrix::Dense<_type>* alpha, \ - matrix::Dense<_type>* beta, matrix::Dense<_type>* gamma, \ - matrix::Dense<_type>* omega, \ - array* stop_status) - - -#define GKO_DECLARE_BICGSTAB_STEP_1_KERNEL(_type) \ - void step_1( \ - std::shared_ptr exec, \ - const matrix::Dense<_type>* r, matrix::Dense<_type>* p, \ - const matrix::Dense<_type>* v, const matrix::Dense<_type>* rho, \ - const matrix::Dense<_type>* prev_rho, \ - const matrix::Dense<_type>* alpha, const matrix::Dense<_type>* omega, \ - const array* stop_status) - - -#define GKO_DECLARE_BICGSTAB_STEP_2_KERNEL(_type) \ - void step_2(std::shared_ptr exec, \ - const matrix::Dense<_type>* r, matrix::Dense<_type>* s, \ - const matrix::Dense<_type>* v, \ - const matrix::Dense<_type>* rho, matrix::Dense<_type>* alpha, \ - const matrix::Dense<_type>* beta, \ - const array* stop_status) - - -#define GKO_DECLARE_BICGSTAB_STEP_3_KERNEL(_type) \ - void step_3( \ - std::shared_ptr exec, matrix::Dense<_type>* x, \ - matrix::Dense<_type>* r, const matrix::Dense<_type>* s, \ - const matrix::Dense<_type>* t, const matrix::Dense<_type>* y, \ - const matrix::Dense<_type>* z, const matrix::Dense<_type>* alpha, \ - const matrix::Dense<_type>* beta, const matrix::Dense<_type>* gamma, \ - matrix::Dense<_type>* omega, \ - const array* stop_status) - - -#define GKO_DECLARE_BICGSTAB_FINALIZE_KERNEL(_type) \ - void finalize(std::shared_ptr exec, \ - matrix::Dense<_type>* x, const matrix::Dense<_type>* y, \ - const matrix::Dense<_type>* alpha, \ - array* stop_status) +#define GKO_DECLARE_BICGSTAB_INITIALIZE_KERNEL(ValueType) \ + void initialize( \ + std::shared_ptr exec, \ + matrix::view::dense b, \ + matrix::view::dense r, matrix::view::dense rr, \ + matrix::view::dense y, matrix::view::dense s, \ + matrix::view::dense t, matrix::view::dense z, \ + matrix::view::dense v, matrix::view::dense p, \ + matrix::view::dense prev_rho, \ + matrix::view::dense rho, \ + matrix::view::dense alpha, \ + matrix::view::dense beta, \ + matrix::view::dense gamma, \ + matrix::view::dense omega, \ + array& stop_status) + + +#define GKO_DECLARE_BICGSTAB_STEP_1_KERNEL(ValueType) \ + void step_1(std::shared_ptr exec, \ + matrix::view::dense r, \ + matrix::view::dense p, \ + matrix::view::dense v, \ + matrix::view::dense rho, \ + matrix::view::dense prev_rho, \ + matrix::view::dense alpha, \ + matrix::view::dense omega, \ + const array& stop_status) + + +#define GKO_DECLARE_BICGSTAB_STEP_2_KERNEL(ValueType) \ + void step_2(std::shared_ptr exec, \ + matrix::view::dense r, \ + matrix::view::dense s, \ + matrix::view::dense v, \ + matrix::view::dense rho, \ + matrix::view::dense alpha, \ + matrix::view::dense beta, \ + const array& stop_status) + + +#define GKO_DECLARE_BICGSTAB_STEP_3_KERNEL(ValueType) \ + void step_3(std::shared_ptr exec, \ + matrix::view::dense x, \ + matrix::view::dense r, \ + matrix::view::dense s, \ + matrix::view::dense t, \ + matrix::view::dense y, \ + matrix::view::dense z, \ + matrix::view::dense alpha, \ + matrix::view::dense beta, \ + matrix::view::dense gamma, \ + matrix::view::dense omega, \ + const array& stop_status) + + +#define GKO_DECLARE_BICGSTAB_FINALIZE_KERNEL(ValueType) \ + void finalize(std::shared_ptr exec, \ + matrix::view::dense x, \ + matrix::view::dense y, \ + matrix::view::dense alpha, \ + array& stop_status) #define GKO_DECLARE_ALL_AS_TEMPLATES \ diff --git a/core/solver/cb_gmres.cpp b/core/solver/cb_gmres.cpp index 1496e37f77d..99004d3dce5 100644 --- a/core/solver/cb_gmres.cpp +++ b/core/solver/cb_gmres.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -280,18 +280,21 @@ void CbGmres::apply_dense_impl( array num_reorth(this->get_executor(), 1); // Initialization - exec->run(cb_gmres::make_initialize(dense_b, residual.get(), - givens_sin.get(), givens_cos.get(), - &stop_status, krylov_dim)); + exec->run(cb_gmres::make_initialize( + dense_b->get_const_device_view(), residual->get_device_view(), + givens_sin->get_device_view(), givens_cos->get_device_view(), + stop_status, krylov_dim)); // residual = dense_b // givens_sin = givens_cos = 0 this->get_system_matrix()->apply(neg_one_op, dense_x, one_op, residual); // residual = residual - Ax exec->run(cb_gmres::make_restart( - residual.get(), residual_norm.get(), residual_norm_collection.get(), - arnoldi_norm.get(), krylov_bases_range, next_krylov_basis.get(), - &final_iter_nums, reduction_tmp, krylov_dim)); + residual->get_const_device_view(), residual_norm->get_device_view(), + residual_norm_collection->get_device_view(), + arnoldi_norm->get_device_view(), krylov_bases_range, + next_krylov_basis->get_device_view(), final_iter_nums, + reduction_tmp, krylov_dim)); // residual_norm = norm(residual) // residual_norm_collection = {residual_norm, 0, ..., 0} // krylov_bases(:, 1) = residual / residual_norm @@ -401,10 +404,11 @@ void CbGmres::apply_dense_impl( span{0, restart_iter}, span{0, num_rhs * (restart_iter)}); exec->run(cb_gmres::make_solve_krylov( - residual_norm_collection.get(), + residual_norm_collection->get_const_device_view(), krylov_bases_range.get_accessor().to_const(), - hessenberg_view.get(), y.get(), before_preconditioner.get(), - &final_iter_nums)); + hessenberg_view->get_const_device_view(), + y->get_device_view(), + before_preconditioner->get_device_view(), final_iter_nums)); // Solve upper triangular. // y = hessenberg \ residual_norm_collection @@ -419,10 +423,12 @@ void CbGmres::apply_dense_impl( residual); // residual = residual - Ax exec->run(cb_gmres::make_restart( - residual.get(), residual_norm.get(), - residual_norm_collection.get(), arnoldi_norm.get(), - krylov_bases_range, next_krylov_basis.get(), - &final_iter_nums, reduction_tmp, krylov_dim)); + residual->get_const_device_view(), + residual_norm->get_device_view(), + residual_norm_collection->get_device_view(), + arnoldi_norm->get_device_view(), krylov_bases_range, + next_krylov_basis->get_device_view(), final_iter_nums, + reduction_tmp, krylov_dim)); // residual_norm = norm(residual) // residual_norm_collection = {residual_norm, 0, ..., 0} // krylov_bases(:, 1) = residual / residual_norm @@ -448,11 +454,14 @@ void CbGmres::apply_dense_impl( next_krylov_basis); // next_krylov_basis = A * preconditioned_vector exec->run(cb_gmres::make_arnoldi( - next_krylov_basis.get(), givens_sin.get(), givens_cos.get(), - residual_norm.get(), residual_norm_collection.get(), - krylov_bases_range, hessenberg_iter.get(), buffer_iter.get(), - arnoldi_norm.get(), restart_iter, &final_iter_nums, - &stop_status, &reorth_status, &num_reorth)); + next_krylov_basis->get_device_view(), + givens_sin->get_device_view(), givens_cos->get_device_view(), + residual_norm->get_device_view(), + residual_norm_collection->get_device_view(), krylov_bases_range, + hessenberg_iter->get_device_view(), + buffer_iter->get_device_view(), arnoldi_norm->get_device_view(), + restart_iter, final_iter_nums, stop_status, reorth_status, + num_reorth)); // for i in 0:restart_iter // hessenberg(restart_iter, i) = next_krylov_basis' * // krylov_bases(:, i) next_krylov_basis -= @@ -484,10 +493,10 @@ void CbGmres::apply_dense_impl( span{0, restart_iter}, span{0, num_rhs * restart_iter}); exec->run(cb_gmres::make_solve_krylov( - residual_norm_collection.get(), + residual_norm_collection->get_const_device_view(), krylov_bases_range.get_accessor().to_const(), - hessenberg_small.get(), y.get(), before_preconditioner.get(), - &final_iter_nums)); + hessenberg_small->get_const_device_view(), y->get_device_view(), + before_preconditioner->get_device_view(), final_iter_nums)); // Solve upper triangular. // y = hessenberg \ residual_norm_collection this->get_preconditioner()->apply(before_preconditioner, diff --git a/core/solver/cb_gmres_kernels.hpp b/core/solver/cb_gmres_kernels.hpp index 29a84f25ba1..bf56098b710 100644 --- a/core/solver/cb_gmres_kernels.hpp +++ b/core/solver/cb_gmres_kernels.hpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -96,48 +96,52 @@ namespace gko { namespace kernels { -#define GKO_DECLARE_CB_GMRES_INITIALIZE_KERNEL(_type) \ - void initialize( \ - std::shared_ptr exec, \ - const matrix::Dense<_type>* b, matrix::Dense<_type>* residual, \ - matrix::Dense<_type>* givens_sin, matrix::Dense<_type>* givens_cos, \ - array* stop_status, size_type krylov_dim) - - -#define GKO_DECLARE_CB_GMRES_RESTART_KERNEL(_type1, _range) \ - void restart(std::shared_ptr exec, \ - const matrix::Dense<_type1>* residual, \ - matrix::Dense>* residual_norm, \ - matrix::Dense<_type1>* residual_norm_collection, \ - matrix::Dense>* arnoldi_norm, \ - _range krylov_bases, \ - matrix::Dense<_type1>* next_krylov_basis, \ - array* final_iter_nums, \ +#define GKO_DECLARE_CB_GMRES_INITIALIZE_KERNEL(ValueType) \ + void initialize(std::shared_ptr exec, \ + matrix::view::dense b, \ + matrix::view::dense residual, \ + matrix::view::dense givens_sin, \ + matrix::view::dense givens_cos, \ + array& stop_status, size_type krylov_dim) + + +#define GKO_DECLARE_CB_GMRES_RESTART_KERNEL(ValueType, Accessor3d) \ + void restart(std::shared_ptr exec, \ + matrix::view::dense residual, \ + matrix::view::dense> residual_norm, \ + matrix::view::dense residual_norm_collection, \ + matrix::view::dense> arnoldi_norm, \ + Accessor3d krylov_bases, \ + matrix::view::dense next_krylov_basis, \ + array& final_iter_nums, \ array& reduction_tmp, size_type krylov_dim) -#define GKO_DECLARE_CB_GMRES_ARNOLDI_KERNEL(_type1, _range) \ - void arnoldi( \ - std::shared_ptr exec, \ - matrix::Dense<_type1>* next_krylov_basis, \ - matrix::Dense<_type1>* givens_sin, matrix::Dense<_type1>* givens_cos, \ - matrix::Dense>* residual_norm, \ - matrix::Dense<_type1>* residual_norm_collection, _range krylov_bases, \ - matrix::Dense<_type1>* hessenberg_iter, \ - matrix::Dense<_type1>* buffer_iter, \ - matrix::Dense>* arnoldi_norm, size_type iter, \ - array* final_iter_nums, \ - const array* stop_status, \ - array* reorth_status, array* num_reorth) - -#define GKO_DECLARE_CB_GMRES_SOLVE_KRYLOV_KERNEL(_type1, _range) \ - void solve_krylov(std::shared_ptr exec, \ - const matrix::Dense<_type1>* residual_norm_collection, \ - _range krylov_bases, \ - const matrix::Dense<_type1>* hessenberg, \ - matrix::Dense<_type1>* y, \ - matrix::Dense<_type1>* before_preconditioner, \ - const array* final_iter_nums) +#define GKO_DECLARE_CB_GMRES_ARNOLDI_KERNEL(ValueType, Accessor3d) \ + void arnoldi(std::shared_ptr exec, \ + matrix::view::dense next_krylov_basis, \ + matrix::view::dense givens_sin, \ + matrix::view::dense givens_cos, \ + matrix::view::dense> residual_norm, \ + matrix::view::dense residual_norm_collection, \ + Accessor3d krylov_bases, \ + matrix::view::dense hessenberg_iter, \ + matrix::view::dense buffer_iter, \ + matrix::view::dense> arnoldi_norm, \ + size_type iter, array& final_iter_nums, \ + const array& stop_status, \ + array& reorth_status, \ + array& num_reorth) + +#define GKO_DECLARE_CB_GMRES_SOLVE_KRYLOV_KERNEL(ValueType, Accessor3d) \ + void solve_krylov( \ + std::shared_ptr exec, \ + matrix::view::dense residual_norm_collection, \ + Accessor3d krylov_bases, \ + matrix::view::dense hessenberg, \ + matrix::view::dense y, \ + matrix::view::dense before_preconditioner, \ + const array& final_iter_nums) #define GKO_DECLARE_ALL_AS_TEMPLATES \ diff --git a/core/solver/cg.cpp b/core/solver/cg.cpp index 04fd3c4de22..95099da64c2 100644 --- a/core/solver/cg.cpp +++ b/core/solver/cg.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -120,9 +120,12 @@ void Cg::apply_dense_impl(const VectorType* dense_b, // prev_rho = 1.0 // z = p = q = 0 exec->run(cg::make_initialize( - gko::detail::get_local(dense_b), gko::detail::get_local(r), - gko::detail::get_local(z), gko::detail::get_local(p), - gko::detail::get_local(q), prev_rho, rho, &stop_status)); + gko::detail::get_local(dense_b)->get_const_device_view(), + gko::detail::get_local(r)->get_device_view(), + gko::detail::get_local(z)->get_device_view(), + gko::detail::get_local(p)->get_device_view(), + gko::detail::get_local(q)->get_device_view(), + prev_rho->get_device_view(), rho->get_device_view(), stop_status)); this->get_system_matrix()->apply(neg_one_op, dense_x, one_op, r); auto stop_criterion = this->get_stop_criterion_factory()->generate( @@ -162,9 +165,11 @@ void Cg::apply_dense_impl(const VectorType* dense_b, // tmp = rho / prev_rho // p = z + tmp * p - exec->run(cg::make_step_1(gko::detail::get_local(p), - gko::detail::get_local(z), rho, prev_rho, - &stop_status)); + exec->run( + cg::make_step_1(gko::detail::get_local(p)->get_device_view(), + gko::detail::get_local(z)->get_const_device_view(), + rho->get_const_device_view(), + prev_rho->get_const_device_view(), stop_status)); // q = A * p this->get_system_matrix()->apply(p, q); // beta = dot(p, q) @@ -172,10 +177,13 @@ void Cg::apply_dense_impl(const VectorType* dense_b, // tmp = rho / beta // x = x + tmp * p // r = r - tmp * q - exec->run(cg::make_step_2( - gko::detail::get_local(dense_x), gko::detail::get_local(r), - gko::detail::get_local(p), gko::detail::get_local(q), beta, rho, - &stop_status)); + exec->run( + cg::make_step_2(gko::detail::get_local(dense_x)->get_device_view(), + gko::detail::get_local(r)->get_device_view(), + gko::detail::get_local(p)->get_const_device_view(), + gko::detail::get_local(q)->get_const_device_view(), + beta->get_const_device_view(), + rho->get_const_device_view(), stop_status)); swap(prev_rho, rho); } } @@ -245,8 +253,8 @@ std::vector workspace_traits>::vectors(const Solver&) } -#define GKO_DECLARE_CG(_type) class Cg<_type> -#define GKO_DECLARE_CG_TRAITS(_type) struct workspace_traits> +#define GKO_DECLARE_CG(ValueType) class Cg +#define GKO_DECLARE_CG_TRAITS(ValueType) struct workspace_traits> GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_CG); GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_CG_TRAITS); diff --git a/core/solver/cg_kernels.hpp b/core/solver/cg_kernels.hpp index bec5f04d0e5..6a449e8fe53 100644 --- a/core/solver/cg_kernels.hpp +++ b/core/solver/cg_kernels.hpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -22,30 +22,35 @@ namespace kernels { namespace cg { -#define GKO_DECLARE_CG_INITIALIZE_KERNEL(_type) \ - void initialize(std::shared_ptr exec, \ - const matrix::Dense<_type>* b, matrix::Dense<_type>* r, \ - matrix::Dense<_type>* z, matrix::Dense<_type>* p, \ - matrix::Dense<_type>* q, matrix::Dense<_type>* prev_rho, \ - matrix::Dense<_type>* rho, \ - array* stop_status) - - -#define GKO_DECLARE_CG_STEP_1_KERNEL(_type) \ - void step_1(std::shared_ptr exec, \ - matrix::Dense<_type>* p, const matrix::Dense<_type>* z, \ - const matrix::Dense<_type>* rho, \ - const matrix::Dense<_type>* prev_rho, \ - const array* stop_status) - - -#define GKO_DECLARE_CG_STEP_2_KERNEL(_type) \ - void step_2(std::shared_ptr exec, \ - matrix::Dense<_type>* x, matrix::Dense<_type>* r, \ - const matrix::Dense<_type>* p, const matrix::Dense<_type>* q, \ - const matrix::Dense<_type>* beta, \ - const matrix::Dense<_type>* rho, \ - const array* stop_status) +#define GKO_DECLARE_CG_INITIALIZE_KERNEL(ValueType) \ + void initialize( \ + std::shared_ptr exec, \ + matrix::view::dense b, \ + matrix::view::dense r, matrix::view::dense z, \ + matrix::view::dense p, matrix::view::dense q, \ + matrix::view::dense prev_rho, \ + matrix::view::dense rho, \ + array& stop_status) + + +#define GKO_DECLARE_CG_STEP_1_KERNEL(ValueType) \ + void step_1(std::shared_ptr exec, \ + matrix::view::dense p, \ + matrix::view::dense z, \ + matrix::view::dense rho, \ + matrix::view::dense prev_rho, \ + const array& stop_status) + + +#define GKO_DECLARE_CG_STEP_2_KERNEL(ValueType) \ + void step_2(std::shared_ptr exec, \ + matrix::view::dense x, \ + matrix::view::dense r, \ + matrix::view::dense p, \ + matrix::view::dense q, \ + matrix::view::dense beta, \ + matrix::view::dense rho, \ + const array& stop_status) #define GKO_DECLARE_ALL_AS_TEMPLATES \ diff --git a/core/solver/cgs.cpp b/core/solver/cgs.cpp index 983140c18e3..a7d6c349a13 100644 --- a/core/solver/cgs.cpp +++ b/core/solver/cgs.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -127,12 +127,17 @@ void Cgs::apply_dense_impl(const VectorType* dense_b, // prev_rho = alpha = beta = gamma = 1.0 // p = q = u = u_hat = v_hat = t = 0 exec->run(cgs::make_initialize( - gko::detail::get_local(dense_b), gko::detail::get_local(r), - gko::detail::get_local(r_tld), gko::detail::get_local(p), - gko::detail::get_local(q), gko::detail::get_local(u), - gko::detail::get_local(u_hat), gko::detail::get_local(v_hat), - gko::detail::get_local(t), alpha, beta, gamma, prev_rho, rho, - &stop_status)); + gko::detail::get_local(dense_b)->get_const_device_view(), + gko::detail::get_local(r)->get_device_view(), + gko::detail::get_local(r_tld)->get_device_view(), + gko::detail::get_local(p)->get_device_view(), + gko::detail::get_local(q)->get_device_view(), + gko::detail::get_local(u)->get_device_view(), + gko::detail::get_local(u_hat)->get_device_view(), + gko::detail::get_local(v_hat)->get_device_view(), + gko::detail::get_local(t)->get_device_view(), alpha->get_device_view(), + beta->get_device_view(), gamma->get_device_view(), + prev_rho->get_device_view(), rho->get_device_view(), stop_status)); this->get_system_matrix()->apply(neg_one_op, dense_x, one_op, r); auto stop_criterion = this->get_stop_criterion_factory()->generate( @@ -173,9 +178,12 @@ void Cgs::apply_dense_impl(const VectorType* dense_b, // u = r + beta * q // p = u + beta * ( q + beta * p ) exec->run(cgs::make_step_1( - gko::detail::get_local(r), gko::detail::get_local(u), - gko::detail::get_local(p), gko::detail::get_local(q), beta, rho, - prev_rho, &stop_status)); + gko::detail::get_local(r)->get_const_device_view(), + gko::detail::get_local(u)->get_device_view(), + gko::detail::get_local(p)->get_device_view(), + gko::detail::get_local(q)->get_const_device_view(), + beta->get_device_view(), rho->get_const_device_view(), + prev_rho->get_const_device_view(), stop_status)); this->get_preconditioner()->apply(p, t); this->get_system_matrix()->apply(t, v_hat); r_tld->compute_conj_dot(v_hat, gamma, reduction_tmp); @@ -183,18 +191,23 @@ void Cgs::apply_dense_impl(const VectorType* dense_b, // q = u - alpha * v_hat // t = u + q exec->run(cgs::make_step_2( - gko::detail::get_local(u), gko::detail::get_local(v_hat), - gko::detail::get_local(q), gko::detail::get_local(t), alpha, rho, - gamma, &stop_status)); + gko::detail::get_local(u)->get_const_device_view(), + gko::detail::get_local(v_hat)->get_const_device_view(), + gko::detail::get_local(q)->get_device_view(), + gko::detail::get_local(t)->get_device_view(), + alpha->get_device_view(), rho->get_const_device_view(), + gamma->get_const_device_view(), stop_status)); this->get_preconditioner()->apply(t, u_hat); this->get_system_matrix()->apply(u_hat, t); // r = r - alpha * t // x = x + alpha * u_hat exec->run(cgs::make_step_3( - gko::detail::get_local(t), gko::detail::get_local(u_hat), - gko::detail::get_local(r), gko::detail::get_local(dense_x), alpha, - &stop_status)); + gko::detail::get_local(t)->get_const_device_view(), + gko::detail::get_local(u_hat)->get_const_device_view(), + gko::detail::get_local(r)->get_device_view(), + gko::detail::get_local(dense_x)->get_device_view(), + alpha->get_const_device_view(), stop_status)); swap(prev_rho, rho); } @@ -266,8 +279,9 @@ std::vector workspace_traits>::vectors(const Solver&) } -#define GKO_DECLARE_CGS(_type) class Cgs<_type> -#define GKO_DECLARE_CGS_TRAITS(_type) struct workspace_traits> +#define GKO_DECLARE_CGS(ValueType) class Cgs +#define GKO_DECLARE_CGS_TRAITS(ValueType) \ + struct workspace_traits> GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_CGS); GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_CGS_TRAITS); diff --git a/core/solver/cgs_kernels.hpp b/core/solver/cgs_kernels.hpp index d64aeedb549..f6b5c06d878 100644 --- a/core/solver/cgs_kernels.hpp +++ b/core/solver/cgs_kernels.hpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -22,43 +22,57 @@ namespace kernels { namespace cgs { -#define GKO_DECLARE_CGS_INITIALIZE_KERNEL(_type) \ - void initialize(std::shared_ptr exec, \ - const matrix::Dense<_type>* b, matrix::Dense<_type>* r, \ - matrix::Dense<_type>* r_tld, matrix::Dense<_type>* p, \ - matrix::Dense<_type>* q, matrix::Dense<_type>* u, \ - matrix::Dense<_type>* u_hat, matrix::Dense<_type>* v_hat, \ - matrix::Dense<_type>* t, matrix::Dense<_type>* alpha, \ - matrix::Dense<_type>* beta, matrix::Dense<_type>* gamma, \ - matrix::Dense<_type>* prev_rho, matrix::Dense<_type>* rho, \ - array* stop_status) - - -#define GKO_DECLARE_CGS_STEP_1_KERNEL(_type) \ - void step_1(std::shared_ptr exec, \ - const matrix::Dense<_type>* r, matrix::Dense<_type>* u, \ - matrix::Dense<_type>* p, const matrix::Dense<_type>* q, \ - matrix::Dense<_type>* beta, const matrix::Dense<_type>* rho, \ - const matrix::Dense<_type>* rho_prev, \ - const array* stop_status) - - -#define GKO_DECLARE_CGS_STEP_2_KERNEL(_type) \ - void step_2(std::shared_ptr exec, \ - const matrix::Dense<_type>* u, \ - const matrix::Dense<_type>* v_hat, matrix::Dense<_type>* q, \ - matrix::Dense<_type>* t, matrix::Dense<_type>* alpha, \ - const matrix::Dense<_type>* rho, \ - const matrix::Dense<_type>* gamma, \ - const array* stop_status) - - -#define GKO_DECLARE_CGS_STEP_3_KERNEL(_type) \ - void step_3(std::shared_ptr exec, \ - const matrix::Dense<_type>* t, \ - const matrix::Dense<_type>* u_hat, matrix::Dense<_type>* r, \ - matrix::Dense<_type>* x, const matrix::Dense<_type>* alpha, \ - const array* stop_status) +#define GKO_DECLARE_CGS_INITIALIZE_KERNEL(ValueType) \ + void initialize(std::shared_ptr exec, \ + matrix::view::dense b, \ + matrix::view::dense r, \ + matrix::view::dense r_tld, \ + matrix::view::dense p, \ + matrix::view::dense q, \ + matrix::view::dense u, \ + matrix::view::dense u_hat, \ + matrix::view::dense v_hat, \ + matrix::view::dense t, \ + matrix::view::dense alpha, \ + matrix::view::dense beta, \ + matrix::view::dense gamma, \ + matrix::view::dense prev_rho, \ + matrix::view::dense rho, \ + array& stop_status) + + +#define GKO_DECLARE_CGS_STEP_1_KERNEL(ValueType) \ + void step_1(std::shared_ptr exec, \ + matrix::view::dense r, \ + matrix::view::dense u, \ + matrix::view::dense p, \ + matrix::view::dense q, \ + matrix::view::dense beta, \ + matrix::view::dense rho, \ + matrix::view::dense rho_prev, \ + const array& stop_status) + + +#define GKO_DECLARE_CGS_STEP_2_KERNEL(ValueType) \ + void step_2(std::shared_ptr exec, \ + matrix::view::dense u, \ + matrix::view::dense v_hat, \ + matrix::view::dense q, \ + matrix::view::dense t, \ + matrix::view::dense alpha, \ + matrix::view::dense rho, \ + matrix::view::dense gamma, \ + const array& stop_status) + + +#define GKO_DECLARE_CGS_STEP_3_KERNEL(ValueType) \ + void step_3(std::shared_ptr exec, \ + matrix::view::dense t, \ + matrix::view::dense u_hat, \ + matrix::view::dense r, \ + matrix::view::dense x, \ + matrix::view::dense alpha, \ + const array& stop_status) #define GKO_DECLARE_ALL_AS_TEMPLATES \ diff --git a/core/solver/chebyshev.cpp b/core/solver/chebyshev.cpp index 7f409484e6f..599859e7ebc 100644 --- a/core/solver/chebyshev.cpp +++ b/core/solver/chebyshev.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2025 The Ginkgo authors +// SPDX-FileCopyrightText: 2025 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -224,7 +224,7 @@ void Chebyshev::apply_dense_impl(const VectorType* dense_b, auto& stop_status = this->template create_workspace_array( ws::stop, dense_b->get_size()[1]); - exec->run(ir::make_initialize(&stop_status)); + exec->run(ir::make_initialize(stop_status)); if (guess != initial_guess_mode::zero) { residual->copy_from(dense_b); this->get_system_matrix()->apply(neg_one_op, dense_x, one_op, residual); @@ -265,11 +265,12 @@ void Chebyshev::apply_dense_impl(const VectorType* dense_b, this->get_preconditioner()->apply(residual_ptr, inner_solution); if (iter == 0) { // x = x + alpha * inner_solution - // update_solultion = inner_solution + // update_solution = inner_solution exec->run(chebyshev::make_init_update( - alpha_host, gko::detail::get_local(inner_solution), - gko::detail::get_local(update_solution), - gko::detail::get_local(dense_x))); + alpha_host, + gko::detail::get_local(inner_solution)->get_const_device_view(), + gko::detail::get_local(update_solution)->get_device_view(), + gko::detail::get_local(dense_x)->get_device_view())); continue; } // beta_host for iter == 1 is initialized in the beginning @@ -282,9 +283,10 @@ void Chebyshev::apply_dense_impl(const VectorType* dense_b, // p = z // x += alpha * p exec->run(chebyshev::make_update( - alpha_host, beta_host, gko::detail::get_local(inner_solution), - gko::detail::get_local(update_solution), - gko::detail::get_local(dense_x))); + alpha_host, beta_host, + gko::detail::get_local(inner_solution)->get_device_view(), + gko::detail::get_local(update_solution)->get_device_view(), + gko::detail::get_local(dense_x)->get_device_view())); } } @@ -364,9 +366,9 @@ std::vector workspace_traits>::vectors(const Solver&) } -#define GKO_DECLARE_CHEBYSHEV(_type) class Chebyshev<_type> -#define GKO_DECLARE_CHEBYSHEV_TRAITS(_type) \ - struct workspace_traits> +#define GKO_DECLARE_CHEBYSHEV(ValueType) class Chebyshev +#define GKO_DECLARE_CHEBYSHEV_TRAITS(ValueType) \ + struct workspace_traits> GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_CHEBYSHEV); GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_CHEBYSHEV_TRAITS); diff --git a/core/solver/chebyshev_kernels.hpp b/core/solver/chebyshev_kernels.hpp index 9e2af545a6e..0edb7bc9e25 100644 --- a/core/solver/chebyshev_kernels.hpp +++ b/core/solver/chebyshev_kernels.hpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2025 The Ginkgo authors +// SPDX-FileCopyrightText: 2025 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -24,17 +24,17 @@ namespace chebyshev { #define GKO_DECLARE_CHEBYSHEV_INIT_UPDATE_KERNEL(ValueType) \ void init_update(std::shared_ptr exec, \ const solver::detail::coeff_type alpha, \ - const matrix::Dense* inner_sol, \ - matrix::Dense* update_sol, \ - matrix::Dense* output) + matrix::view::dense inner_sol, \ + matrix::view::dense update_sol, \ + matrix::view::dense output) #define GKO_DECLARE_CHEBYSHEV_UPDATE_KERNEL(ValueType) \ void update(std::shared_ptr exec, \ const solver::detail::coeff_type alpha, \ const solver::detail::coeff_type beta, \ - matrix::Dense* inner_sol, \ - matrix::Dense* update_sol, \ - matrix::Dense* output) + matrix::view::dense inner_sol, \ + matrix::view::dense update_sol, \ + matrix::view::dense output) #define GKO_DECLARE_ALL_AS_TEMPLATES \ template \ diff --git a/core/solver/common_gmres_kernels.hpp b/core/solver/common_gmres_kernels.hpp index 0209284c446..73646c59438 100644 --- a/core/solver/common_gmres_kernels.hpp +++ b/core/solver/common_gmres_kernels.hpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -20,30 +20,33 @@ namespace kernels { namespace common_gmres { -#define GKO_DECLARE_COMMON_GMRES_INITIALIZE_KERNEL(_type) \ - void initialize( \ - std::shared_ptr exec, \ - const matrix::Dense<_type>* b, matrix::Dense<_type>* residual, \ - matrix::Dense<_type>* givens_sin, matrix::Dense<_type>* givens_cos, \ - stopping_status* stop_status) +#define GKO_DECLARE_COMMON_GMRES_INITIALIZE_KERNEL(ValueType) \ + void initialize(std::shared_ptr exec, \ + matrix::view::dense b, \ + matrix::view::dense residual, \ + matrix::view::dense givens_sin, \ + matrix::view::dense givens_cos, \ + stopping_status* stop_status) -#define GKO_DECLARE_COMMON_GMRES_HESSENBERG_QR_KERNEL(_type) \ - void hessenberg_qr( \ - std::shared_ptr exec, \ - matrix::Dense<_type>* givens_sin, matrix::Dense<_type>* givens_cos, \ - matrix::Dense>* residual_norm, \ - matrix::Dense<_type>* residual_norm_collection, \ - matrix::Dense<_type>* hessenberg_iter, size_type iter, \ +#define GKO_DECLARE_COMMON_GMRES_HESSENBERG_QR_KERNEL(ValueType) \ + void hessenberg_qr( \ + std::shared_ptr exec, \ + matrix::view::dense givens_sin, \ + matrix::view::dense givens_cos, \ + matrix::view::dense> residual_norm, \ + matrix::view::dense residual_norm_collection, \ + matrix::view::dense hessenberg_iter, size_type iter, \ size_type* final_iter_nums, const stopping_status* stop_status) -#define GKO_DECLARE_COMMON_GMRES_SOLVE_KRYLOV_KERNEL(_type1) \ - void solve_krylov( \ - std::shared_ptr exec, \ - const matrix::Dense<_type1>* residual_norm_collection, \ - const matrix::Dense<_type1>* hessenberg, matrix::Dense<_type1>* y, \ - const size_type* final_iter_nums, const stopping_status* stop_status) +#define GKO_DECLARE_COMMON_GMRES_SOLVE_KRYLOV_KERNEL(ValueType) \ + void solve_krylov( \ + std::shared_ptr exec, \ + matrix::view::dense residual_norm_collection, \ + matrix::view::dense hessenberg, \ + matrix::view::dense y, const size_type* final_iter_nums, \ + const stopping_status* stop_status) #define GKO_DECLARE_ALL_AS_TEMPLATES \ diff --git a/core/solver/fcg.cpp b/core/solver/fcg.cpp index 7f04b8ba2fd..e0e2e9745cf 100644 --- a/core/solver/fcg.cpp +++ b/core/solver/fcg.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -122,10 +122,14 @@ void Fcg::apply_dense_impl(const VectorType* dense_b, // rho_t = 1.0 // z = p = q = 0 exec->run(fcg::make_initialize( - gko::detail::get_local(dense_b), gko::detail::get_local(r), - gko::detail::get_local(z), gko::detail::get_local(p), - gko::detail::get_local(q), gko::detail::get_local(t), prev_rho, rho, - rho_t, &stop_status)); + gko::detail::get_local(dense_b)->get_const_device_view(), + gko::detail::get_local(r)->get_device_view(), + gko::detail::get_local(z)->get_device_view(), + gko::detail::get_local(p)->get_device_view(), + gko::detail::get_local(q)->get_device_view(), + gko::detail::get_local(t)->get_device_view(), + prev_rho->get_device_view(), rho->get_device_view(), + rho_t->get_device_view(), stop_status)); this->get_system_matrix()->apply(neg_one_op, dense_x, one_op, r); auto stop_criterion = this->get_stop_criterion_factory()->generate( @@ -165,8 +169,10 @@ void Fcg::apply_dense_impl(const VectorType* dense_b, // tmp = rho_t / prev_rho // p = z + tmp * p exec->run(fcg::make_step_1( - gko::detail::get_local(p), gko::detail::get_local(z), - gko::detail::get_local(rho_t), prev_rho, &stop_status)); + gko::detail::get_local(p)->get_device_view(), + gko::detail::get_local(z)->get_const_device_view(), + gko::detail::get_local(rho_t)->get_const_device_view(), + prev_rho->get_const_device_view(), stop_status)); this->get_system_matrix()->apply(p, q); p->compute_conj_dot(q, beta, reduction_tmp); // tmp = rho / beta @@ -174,10 +180,14 @@ void Fcg::apply_dense_impl(const VectorType* dense_b, // x = x + tmp * p // r = r - tmp * q // t = r - [prev_r] - exec->run(fcg::make_step_2( - gko::detail::get_local(dense_x), gko::detail::get_local(r), - gko::detail::get_local(t), gko::detail::get_local(p), - gko::detail::get_local(q), beta, rho, &stop_status)); + exec->run( + fcg::make_step_2(gko::detail::get_local(dense_x)->get_device_view(), + gko::detail::get_local(r)->get_device_view(), + gko::detail::get_local(t)->get_device_view(), + gko::detail::get_local(p)->get_const_device_view(), + gko::detail::get_local(q)->get_const_device_view(), + beta->get_const_device_view(), + rho->get_const_device_view(), stop_status)); swap(prev_rho, rho); } } @@ -248,8 +258,9 @@ std::vector workspace_traits>::vectors(const Solver&) } -#define GKO_DECLARE_FCG(_type) class Fcg<_type> -#define GKO_DECLARE_FCG_TRAITS(_type) struct workspace_traits> +#define GKO_DECLARE_FCG(ValueType) class Fcg +#define GKO_DECLARE_FCG_TRAITS(ValueType) \ + struct workspace_traits> GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_FCG); GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_FCG_TRAITS); diff --git a/core/solver/fcg_kernels.hpp b/core/solver/fcg_kernels.hpp index bb646055906..4bd26daf439 100644 --- a/core/solver/fcg_kernels.hpp +++ b/core/solver/fcg_kernels.hpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -21,31 +21,38 @@ namespace kernels { namespace fcg { -#define GKO_DECLARE_FCG_INITIALIZE_KERNEL(_type) \ - void initialize(std::shared_ptr exec, \ - const matrix::Dense<_type>* b, matrix::Dense<_type>* r, \ - matrix::Dense<_type>* z, matrix::Dense<_type>* p, \ - matrix::Dense<_type>* q, matrix::Dense<_type>* t, \ - matrix::Dense<_type>* prev_rho, matrix::Dense<_type>* rho, \ - matrix::Dense<_type>* rho_t, \ - array* stop_status) - - -#define GKO_DECLARE_FCG_STEP_1_KERNEL(_type) \ - void step_1(std::shared_ptr exec, \ - matrix::Dense<_type>* p, const matrix::Dense<_type>* z, \ - const matrix::Dense<_type>* rho_t, \ - const matrix::Dense<_type>* prev_rho, \ - const array* stop_status) - - -#define GKO_DECLARE_FCG_STEP_2_KERNEL(_type) \ - void step_2( \ - std::shared_ptr exec, matrix::Dense<_type>* x, \ - matrix::Dense<_type>* r, matrix::Dense<_type>* t, \ - const matrix::Dense<_type>* p, const matrix::Dense<_type>* q, \ - const matrix::Dense<_type>* beta, const matrix::Dense<_type>* rho, \ - const array* stop_status) +#define GKO_DECLARE_FCG_INITIALIZE_KERNEL(ValueType) \ + void initialize( \ + std::shared_ptr exec, \ + matrix::view::dense b, \ + matrix::view::dense r, matrix::view::dense z, \ + matrix::view::dense p, matrix::view::dense q, \ + matrix::view::dense t, \ + matrix::view::dense prev_rho, \ + matrix::view::dense rho, \ + matrix::view::dense rho_t, \ + array& stop_status) + + +#define GKO_DECLARE_FCG_STEP_1_KERNEL(ValueType) \ + void step_1(std::shared_ptr exec, \ + matrix::view::dense p, \ + matrix::view::dense z, \ + matrix::view::dense rho_t, \ + matrix::view::dense prev_rho, \ + const array& stop_status) + + +#define GKO_DECLARE_FCG_STEP_2_KERNEL(ValueType) \ + void step_2(std::shared_ptr exec, \ + matrix::view::dense x, \ + matrix::view::dense r, \ + matrix::view::dense t, \ + matrix::view::dense p, \ + matrix::view::dense q, \ + matrix::view::dense beta, \ + matrix::view::dense rho, \ + const array& stop_status) #define GKO_DECLARE_ALL_AS_TEMPLATES \ diff --git a/core/solver/gcr.cpp b/core/solver/gcr.cpp index 575114968b3..1797f8d2239 100644 --- a/core/solver/gcr.cpp +++ b/core/solver/gcr.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -144,9 +144,10 @@ void Gcr::apply_dense_impl(const VectorType* dense_b, // Initialization // residual = dense_b // reset stop status - exec->run(gcr::make_initialize(::gko::detail::get_local(dense_b), - ::gko::detail::get_local(residual), - stop_status.get_data())); + exec->run(gcr::make_initialize( + ::gko::detail::get_local(dense_b)->get_const_device_view(), + ::gko::detail::get_local(residual)->get_device_view(), + stop_status.get_data())); // residual = residual - Ax // Note: x is passed in with initial guess this->get_system_matrix()->apply(neg_one_op, dense_x, one_op, residual); @@ -158,12 +159,12 @@ void Gcr::apply_dense_impl(const VectorType* dense_b, // p(:, 1) = precon_residual(:, 1) // Ap(:, 1) = A_precon_residual(:, 1) // final_iter_nums = {0, ..., 0} - exec->run( - gcr::make_restart(::gko::detail::get_local(precon_residual), - ::gko::detail::get_local(A_precon_residual), - ::gko::detail::get_local(krylov_bases_p), - ::gko::detail::get_local(mapped_krylov_bases_Ap), - final_iter_nums.get_data())); + exec->run(gcr::make_restart( + ::gko::detail::get_local(precon_residual)->get_const_device_view(), + ::gko::detail::get_local(A_precon_residual)->get_const_device_view(), + ::gko::detail::get_local(krylov_bases_p)->get_device_view(), + ::gko::detail::get_local(mapped_krylov_bases_Ap)->get_device_view(), + final_iter_nums.get_data())); auto stop_criterion = this->get_stop_criterion_factory()->generate( this->get_system_matrix(), @@ -220,10 +221,13 @@ void Gcr::apply_dense_impl(const VectorType* dense_b, // Ap(:, 1) = A_precon_residual(:) // final_iter_nums = {0, ..., 0} exec->run(gcr::make_restart( - ::gko::detail::get_local(precon_residual), - ::gko::detail::get_local(A_precon_residual), - ::gko::detail::get_local(krylov_bases_p), - ::gko::detail::get_local(mapped_krylov_bases_Ap), + ::gko::detail::get_local(precon_residual) + ->get_const_device_view(), + ::gko::detail::get_local(A_precon_residual) + ->get_const_device_view(), + ::gko::detail::get_local(krylov_bases_p)->get_device_view(), + ::gko::detail::get_local(mapped_krylov_bases_Ap) + ->get_device_view(), final_iter_nums.get_data())); restart_iter = 0; } @@ -246,12 +250,13 @@ void Gcr::apply_dense_impl(const VectorType* dense_b, // alpha = r*Ap / Ap_norm // x = x + alpha * p // r = r - alpha * Ap - exec->run(gcr::make_step_1(::gko::detail::get_local(dense_x), - ::gko::detail::get_local(residual), - ::gko::detail::get_local(p.get()), - ::gko::detail::get_local(Ap.get()), - Ap_norm.get(), tmp_rAp, - stop_status.get_const_data())); + exec->run(gcr::make_step_1( + ::gko::detail::get_local(dense_x)->get_device_view(), + ::gko::detail::get_local(residual)->get_device_view(), + ::gko::detail::get_local(p.get())->get_const_device_view(), + ::gko::detail::get_local(Ap.get())->get_const_device_view(), + Ap_norm->get_const_device_view(), tmp_rAp->get_const_device_view(), + stop_status.get_const_data())); // apply preconditioner to residual this->get_preconditioner()->apply(residual, precon_residual); @@ -366,8 +371,9 @@ std::vector workspace_traits>::vectors(const Solver&) } -#define GKO_DECLARE_GCR(_type) class Gcr<_type> -#define GKO_DECLARE_GCR_TRAITS(_type) struct workspace_traits> +#define GKO_DECLARE_GCR(ValueType) class Gcr +#define GKO_DECLARE_GCR_TRAITS(ValueType) \ + struct workspace_traits> GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_GCR); GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_GCR_TRAITS); diff --git a/core/solver/gcr_kernels.hpp b/core/solver/gcr_kernels.hpp index d33f31db571..1b90a6055b7 100644 --- a/core/solver/gcr_kernels.hpp +++ b/core/solver/gcr_kernels.hpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -20,27 +20,30 @@ namespace kernels { namespace gcr { -#define GKO_DECLARE_GCR_INITIALIZE_KERNEL(_type) \ +#define GKO_DECLARE_GCR_INITIALIZE_KERNEL(ValueType) \ void initialize(std::shared_ptr exec, \ - const matrix::Dense<_type>* b, \ - matrix::Dense<_type>* residual, \ + matrix::view::dense b, \ + matrix::view::dense residual, \ stopping_status* stop_status) -#define GKO_DECLARE_GCR_RESTART_KERNEL(_type) \ - void restart(std::shared_ptr exec, \ - const matrix::Dense<_type>* residual, \ - const matrix::Dense<_type>* A_residual, \ - matrix::Dense<_type>* p_bases, \ - matrix::Dense<_type>* Ap_bases, size_type* final_iter_nums) +#define GKO_DECLARE_GCR_RESTART_KERNEL(ValueType) \ + void restart(std::shared_ptr exec, \ + matrix::view::dense residual, \ + matrix::view::dense A_residual, \ + matrix::view::dense p_bases, \ + matrix::view::dense Ap_bases, \ + size_type* final_iter_nums) -#define GKO_DECLARE_GCR_STEP_1_KERNEL(_type) \ - void step_1(std::shared_ptr exec, \ - matrix::Dense<_type>* x, matrix::Dense<_type>* residual, \ - const matrix::Dense<_type>* p, const matrix::Dense<_type>* Ap, \ - const matrix::Dense>* Ap_norm, \ - const matrix::Dense<_type>* rAp, \ +#define GKO_DECLARE_GCR_STEP_1_KERNEL(ValueType) \ + void step_1(std::shared_ptr exec, \ + matrix::view::dense x, \ + matrix::view::dense residual, \ + matrix::view::dense p, \ + matrix::view::dense Ap, \ + matrix::view::dense> Ap_norm, \ + matrix::view::dense rAp, \ const stopping_status* stop_status) diff --git a/core/solver/gmres.cpp b/core/solver/gmres.cpp index a61c1fa32ad..bd4e995a7fc 100644 --- a/core/solver/gmres.cpp +++ b/core/solver/gmres.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -232,8 +232,10 @@ void orthogonalize_cgs(matrix::Dense* hessenberg_iter, local_span{0, local_num_rows * (restart_iter + 1)}, local_span{0, num_rhs}, dim<2>{num_rows * (restart_iter + 1), num_rhs}); exec->run(gmres::make_multi_dot( - gko::detail::get_local(krylov_basis_small.get()), - gko::detail::get_local(next_krylov), hessenberg_iter)); + gko::detail::get_local(krylov_basis_small.get()) + ->get_const_device_view(), + gko::detail::get_local(next_krylov)->get_const_device_view(), + hessenberg_iter->get_device_view())); finish_reduce(hessenberg_iter, next_krylov, num_rhs, restart_iter); for (size_type i = 0; i <= restart_iter; i++) { // next_krylov -= hessenberg(i, restart_iter) * krylov_bases(:, @@ -263,8 +265,10 @@ void orthogonalize_cgs2(matrix::Dense* hessenberg_iter, local_span{0, local_num_rows * (restart_iter + 1)}, local_span{0, num_rhs}, dim<2>{num_rows * (restart_iter + 1), num_rhs}); exec->run(gmres::make_multi_dot( - gko::detail::get_local(krylov_basis_small.get()), - gko::detail::get_local(next_krylov), hessenberg_iter)); + gko::detail::get_local(krylov_basis_small.get()) + ->get_const_device_view(), + gko::detail::get_local(next_krylov)->get_const_device_view(), + hessenberg_iter->get_device_view())); finish_reduce(hessenberg_iter, next_krylov, num_rhs, restart_iter); for (size_type i = 0; i <= restart_iter; i++) { // next_krylov -= hessenberg(i, restart_iter) * krylov_bases(:, @@ -280,8 +284,10 @@ void orthogonalize_cgs2(matrix::Dense* hessenberg_iter, auto hessenberg_aux_iter = hessenberg_aux->create_submatrix( span{0, restart_iter + 2}, span{0, num_rhs}); exec->run(gmres::make_multi_dot( - gko::detail::get_local(krylov_basis_small.get()), - gko::detail::get_local(next_krylov), hessenberg_aux_iter.get())); + gko::detail::get_local(krylov_basis_small.get()) + ->get_const_device_view(), + gko::detail::get_local(next_krylov)->get_const_device_view(), + hessenberg_aux_iter->get_device_view())); finish_reduce(hessenberg_aux_iter.get(), next_krylov, num_rhs, restart_iter); @@ -401,8 +407,10 @@ void Gmres::apply_dense_impl(const VectorType* dense_b, // givens_sin = givens_cos = 0 // reset stop status exec->run(gmres::make_initialize( - gko::detail::get_local(dense_b), gko::detail::get_local(residual), - givens_sin, givens_cos, stop_status.get_data())); + gko::detail::get_local(dense_b)->get_const_device_view(), + gko::detail::get_local(residual)->get_device_view(), + givens_sin->get_device_view(), givens_cos->get_device_view(), + stop_status.get_data())); // residual = residual - Ax this->get_system_matrix()->apply(neg_one_op, dense_x, one_op, residual); @@ -411,10 +419,12 @@ void Gmres::apply_dense_impl(const VectorType* dense_b, // residual_norm_collection = {residual_norm, unchanged} // krylov_bases(:, 1) = residual / residual_norm // final_iter_nums = {0, ..., 0} - exec->run(gmres::make_restart(gko::detail::get_local(residual), - residual_norm, residual_norm_collection, - gko::detail::get_local(krylov_bases), - final_iter_nums.get_data())); + exec->run(gmres::make_restart( + gko::detail::get_local(residual)->get_const_device_view(), + residual_norm->get_const_device_view(), + residual_norm_collection->get_device_view(), + gko::detail::get_local(krylov_bases)->get_device_view(), + final_iter_nums.get_data())); auto stop_criterion = this->get_stop_criterion_factory()->generate( this->get_system_matrix(), @@ -462,14 +472,17 @@ void Gmres::apply_dense_impl(const VectorType* dense_b, // Restart // Solve upper triangular. // y = hessenberg \ residual_norm_collection - exec->run(gmres::make_solve_krylov(residual_norm_collection, - hessenberg, y, - final_iter_nums.get_const_data(), - stop_status.get_const_data())); + exec->run(gmres::make_solve_krylov( + residual_norm_collection->get_const_device_view(), + hessenberg->get_const_device_view(), y->get_device_view(), + final_iter_nums.get_const_data(), + stop_status.get_const_data())); // before_preconditioner = krylov_bases * y exec->run(gmres::make_multi_axpy( - gko::detail::get_local(krylov_bases), y, - gko::detail::get_local(before_preconditioner), + gko::detail::get_local(krylov_bases)->get_const_device_view(), + y->get_const_device_view(), + gko::detail::get_local(before_preconditioner) + ->get_device_view(), final_iter_nums.get_const_data(), stop_status.get_data())); // x = x + get_preconditioner() * before_preconditioner @@ -487,8 +500,10 @@ void Gmres::apply_dense_impl(const VectorType* dense_b, // krylov_bases(:, 1) = residual / residual_norm // final_iter_nums = {0, ..., 0} exec->run(gmres::make_restart( - gko::detail::get_local(residual), residual_norm, - residual_norm_collection, gko::detail::get_local(krylov_bases), + gko::detail::get_local(residual)->get_const_device_view(), + residual_norm->get_const_device_view(), + residual_norm_collection->get_device_view(), + gko::detail::get_local(krylov_bases)->get_device_view(), final_iter_nums.get_data())); restart_iter = 0; } @@ -579,9 +594,11 @@ void Gmres::apply_dense_impl(const VectorType* dense_b, // residual_norm_collection(restart_iter + 1) = // -conj(sin(restart_iter)) * this_rnc exec->run(gmres::make_hessenberg_qr( - givens_sin, givens_cos, residual_norm, residual_norm_collection, - hessenberg_iter.get(), restart_iter, final_iter_nums.get_data(), - stop_status.get_const_data())); + givens_sin->get_device_view(), givens_cos->get_device_view(), + residual_norm->get_device_view(), + residual_norm_collection->get_device_view(), + hessenberg_iter->get_device_view(), restart_iter, + final_iter_nums.get_data(), stop_status.get_const_data())); restart_iter++; } @@ -592,7 +609,8 @@ void Gmres::apply_dense_impl(const VectorType* dense_b, // Solve upper triangular. // y = hessenberg \ residual_norm_collection exec->run(gmres::make_solve_krylov( - residual_norm_collection, hessenberg_small.get(), y, + residual_norm_collection->get_const_device_view(), + hessenberg_small->get_const_device_view(), y->get_device_view(), final_iter_nums.get_const_data(), stop_status.get_const_data())); if (is_flexible) { auto preconditioned_krylov_bases_small = @@ -602,8 +620,10 @@ void Gmres::apply_dense_impl(const VectorType* dense_b, dim<2>{num_rows * (restart_iter + 1), num_rhs}); // after_preconditioner = preconditioned_krylov_bases * y exec->run(gmres::make_multi_axpy( - gko::detail::get_local(preconditioned_krylov_bases_small.get()), y, - gko::detail::get_local(after_preconditioner), + gko::detail::get_local(preconditioned_krylov_bases_small.get()) + ->get_const_device_view(), + y->get_const_device_view(), + gko::detail::get_local(after_preconditioner)->get_device_view(), final_iter_nums.get_const_data(), stop_status.get_data())); } else { auto krylov_bases_small = krylov_bases->create_submatrix( @@ -612,8 +632,10 @@ void Gmres::apply_dense_impl(const VectorType* dense_b, dim<2>{num_rows * (restart_iter + 1), num_rhs}); // before_preconditioner = krylov_bases * y exec->run(gmres::make_multi_axpy( - gko::detail::get_local(krylov_bases_small.get()), y, - gko::detail::get_local(before_preconditioner), + gko::detail::get_local(krylov_bases_small.get()) + ->get_const_device_view(), + y->get_const_device_view(), + gko::detail::get_local(before_preconditioner)->get_device_view(), final_iter_nums.get_const_data(), stop_status.get_data())); // after_preconditioner = get_preconditioner() * before_preconditioner @@ -709,8 +731,9 @@ std::vector workspace_traits>::vectors(const Solver&) } -#define GKO_DECLARE_GMRES(_type) class Gmres<_type> -#define GKO_DECLARE_GMRES_TRAITS(_type) struct workspace_traits> +#define GKO_DECLARE_GMRES(ValueType) class Gmres +#define GKO_DECLARE_GMRES_TRAITS(ValueType) \ + struct workspace_traits> GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_GMRES); GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_GMRES_TRAITS); diff --git a/core/solver/gmres_kernels.hpp b/core/solver/gmres_kernels.hpp index 21bb5854816..4653299180f 100644 --- a/core/solver/gmres_kernels.hpp +++ b/core/solver/gmres_kernels.hpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -20,29 +20,30 @@ namespace kernels { namespace gmres { -#define GKO_DECLARE_GMRES_RESTART_KERNEL(_type) \ - void restart(std::shared_ptr exec, \ - const matrix::Dense<_type>* residual, \ - const matrix::Dense>* residual_norm, \ - matrix::Dense<_type>* residual_norm_collection, \ - matrix::Dense<_type>* krylov_bases, \ - size_type* final_iter_nums) +#define GKO_DECLARE_GMRES_RESTART_KERNEL(ValueType) \ + void restart( \ + std::shared_ptr exec, \ + matrix::view::dense residual, \ + matrix::view::dense> residual_norm, \ + matrix::view::dense residual_norm_collection, \ + matrix::view::dense krylov_bases, \ + size_type* final_iter_nums) -#define GKO_DECLARE_GMRES_MULTI_AXPY_KERNEL(_type) \ - void multi_axpy(std::shared_ptr exec, \ - const matrix::Dense<_type>* krylov_bases, \ - const matrix::Dense<_type>* y, \ - matrix::Dense<_type>* before_preconditioner, \ - const size_type* final_iter_nums, \ +#define GKO_DECLARE_GMRES_MULTI_AXPY_KERNEL(ValueType) \ + void multi_axpy(std::shared_ptr exec, \ + matrix::view::dense krylov_bases, \ + matrix::view::dense y, \ + matrix::view::dense before_preconditioner, \ + const size_type* final_iter_nums, \ stopping_status* stop_status) -#define GKO_DECLARE_GMRES_MULTI_DOT_KERNEL(_type) \ - void multi_dot(std::shared_ptr exec, \ - const matrix::Dense<_type>* krylov_bases, \ - const matrix::Dense<_type>* next_krylov, \ - matrix::Dense<_type>* hessenberg_col) +#define GKO_DECLARE_GMRES_MULTI_DOT_KERNEL(ValueType) \ + void multi_dot(std::shared_ptr exec, \ + matrix::view::dense krylov_bases, \ + matrix::view::dense next_krylov, \ + matrix::view::dense hessenberg_col) #define GKO_DECLARE_ALL_AS_TEMPLATES \ diff --git a/core/solver/idr.cpp b/core/solver/idr.cpp index e1dfa4ef349..76f4c8be698 100644 --- a/core/solver/idr.cpp +++ b/core/solver/idr.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -168,9 +168,10 @@ void Idr::iterate(const VectorType* dense_b, std::default_random_engine(15)); subspace_vectors->read(subspace_vectors_data); } - exec->run(idr::make_initialize(nrhs, gko::detail::get_local(m), - gko::detail::get_local(subspace_vectors), - is_deterministic, &stop_status)); + exec->run(idr::make_initialize( + nrhs, gko::detail::get_local(m)->get_device_view(), + gko::detail::get_local(subspace_vectors)->get_device_view(), + is_deterministic, stop_status)); // omega = 1 omega->fill(one()); @@ -233,18 +234,21 @@ void Idr::iterate(const VectorType* dense_b, // c = M \ f = (c_1, ..., c_s)^T // v = residual - sum i=[k,s) of (c_i * g_i) exec->run(idr::make_step_1( - nrhs, k, gko::detail::get_local(m), gko::detail::get_local(f), - gko::detail::get_local(residual), gko::detail::get_local(g), - gko::detail::get_local(c), gko::detail::get_local(v), - &stop_status)); + nrhs, k, gko::detail::get_local(m)->get_const_device_view(), + gko::detail::get_local(f)->get_const_device_view(), + gko::detail::get_local(residual)->get_const_device_view(), + gko::detail::get_local(g)->get_const_device_view(), + gko::detail::get_local(c)->get_device_view(), + gko::detail::get_local(v)->get_device_view(), stop_status)); this->get_preconditioner()->apply(v, helper); // u_k = omega * precond_vector + sum i=[k,s) of (c_i * u_i) exec->run(idr::make_step_2( - nrhs, k, gko::detail::get_local(omega), - gko::detail::get_local(helper), gko::detail::get_local(c), - gko::detail::get_local(u), &stop_status)); + nrhs, k, gko::detail::get_local(omega)->get_const_device_view(), + gko::detail::get_local(helper)->get_const_device_view(), + gko::detail::get_local(c)->get_const_device_view(), + gko::detail::get_local(u)->get_device_view(), stop_status)); auto u_k = u->create_submatrix(span{0, problem_size}, span{k * nrhs, (k + 1) * nrhs}); @@ -266,12 +270,18 @@ void Idr::iterate(const VectorType* dense_b, // dense_x += beta * u_k // f = (0,...,0,f_k+1 - beta * m_k+1,k,...,f_s-1 - beta * m_s-1,k) exec->run(idr::make_step_3( - nrhs, k, gko::detail::get_local(subspace_vectors), - gko::detail::get_local(g), gko::detail::get_local(helper), - gko::detail::get_local(u), gko::detail::get_local(m), - gko::detail::get_local(f), gko::detail::get_local(alpha), - gko::detail::get_local(residual), - gko::detail::get_local(dense_x), &stop_status)); + nrhs, k, + gko::detail::get_local(subspace_vectors) + ->get_const_device_view(), + gko::detail::get_local(g)->get_device_view(), + gko::detail::get_local(helper)->get_device_view(), + gko::detail::get_local(u)->get_device_view(), + gko::detail::get_local(m)->get_device_view(), + gko::detail::get_local(f)->get_device_view(), + gko::detail::get_local(alpha)->get_device_view(), + gko::detail::get_local(residual)->get_device_view(), + gko::detail::get_local(dense_x)->get_device_view(), + stop_status)); } this->get_preconditioner()->apply(residual, helper); @@ -291,9 +301,9 @@ void Idr::iterate(const VectorType* dense_b, // residual -= omega * t // dense_x += omega * v exec->run(idr::make_compute_omega( - nrhs, kappa, gko::detail::get_local(tht), - gko::detail::get_local(residual_norm), - gko::detail::get_local(omega), &stop_status)); + nrhs, kappa, gko::detail::get_local(tht)->get_const_device_view(), + gko::detail::get_local(residual_norm)->get_const_device_view(), + gko::detail::get_local(omega)->get_device_view(), stop_status)); t->scale(subspace_neg_one_op); residual->add_scaled(omega, t); @@ -407,8 +417,9 @@ std::vector workspace_traits>::vectors(const Solver&) } -#define GKO_DECLARE_IDR(_type) class Idr<_type> -#define GKO_DECLARE_IDR_TRAITS(_type) struct workspace_traits> +#define GKO_DECLARE_IDR(ValueType) class Idr +#define GKO_DECLARE_IDR_TRAITS(ValueType) \ + struct workspace_traits> GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_IDR); GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_IDR_TRAITS); diff --git a/core/solver/idr_kernels.hpp b/core/solver/idr_kernels.hpp index 3d579bd01af..4a26155f132 100644 --- a/core/solver/idr_kernels.hpp +++ b/core/solver/idr_kernels.hpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -20,49 +20,55 @@ namespace kernels { namespace idr { -#define GKO_DECLARE_IDR_INITIALIZE_KERNEL(_type) \ - void initialize(std::shared_ptr exec, \ - const size_type nrhs, matrix::Dense<_type>* m, \ - matrix::Dense<_type>* subspace_vectors, \ - bool deterministic, array* stop_status) +#define GKO_DECLARE_IDR_INITIALIZE_KERNEL(ValueType) \ + void initialize(std::shared_ptr exec, \ + const size_type nrhs, matrix::view::dense m, \ + matrix::view::dense subspace_vectors, \ + bool deterministic, array& stop_status) -#define GKO_DECLARE_IDR_STEP_1_KERNEL(_type) \ - void step_1( \ - std::shared_ptr exec, const size_type nrhs, \ - const size_type k, const matrix::Dense<_type>* m, \ - const matrix::Dense<_type>* f, const matrix::Dense<_type>* residual, \ - const matrix::Dense<_type>* g, matrix::Dense<_type>* c, \ - matrix::Dense<_type>* v, const array* stop_status) - - -#define GKO_DECLARE_IDR_STEP_2_KERNEL(_type) \ - void step_2(std::shared_ptr exec, \ - const size_type nrhs, const size_type k, \ - const matrix::Dense<_type>* omega, \ - const matrix::Dense<_type>* preconditioned_vector, \ - const matrix::Dense<_type>* c, matrix::Dense<_type>* u, \ - const array* stop_status) - - -#define GKO_DECLARE_IDR_STEP_3_KERNEL(_type) \ - void step_3(std::shared_ptr exec, \ - const size_type nrhs, const size_type k, \ - const matrix::Dense<_type>* p, matrix::Dense<_type>* g, \ - matrix::Dense<_type>* g_k, matrix::Dense<_type>* u, \ - matrix::Dense<_type>* m, matrix::Dense<_type>* f, \ - matrix::Dense<_type>* alpha, matrix::Dense<_type>* residual, \ - matrix::Dense<_type>* x, \ - const array* stop_status) - - -#define GKO_DECLARE_IDR_COMPUTE_OMEGA_KERNEL(_type) \ +#define GKO_DECLARE_IDR_STEP_1_KERNEL(ValueType) \ + void step_1( \ + std::shared_ptr exec, const size_type nrhs, \ + const size_type k, matrix::view::dense m, \ + matrix::view::dense f, \ + matrix::view::dense residual, \ + matrix::view::dense g, \ + matrix::view::dense c, matrix::view::dense v, \ + const array& stop_status) + + +#define GKO_DECLARE_IDR_STEP_2_KERNEL(ValueType) \ + void step_2(std::shared_ptr exec, \ + const size_type nrhs, const size_type k, \ + matrix::view::dense omega, \ + matrix::view::dense preconditioned_vector, \ + matrix::view::dense c, \ + matrix::view::dense u, \ + const array& stop_status) + + +#define GKO_DECLARE_IDR_STEP_3_KERNEL(ValueType) \ + void step_3( \ + std::shared_ptr exec, const size_type nrhs, \ + const size_type k, matrix::view::dense p, \ + matrix::view::dense g, matrix::view::dense g_k, \ + matrix::view::dense u, matrix::view::dense m, \ + matrix::view::dense f, \ + matrix::view::dense alpha, \ + matrix::view::dense residual, \ + matrix::view::dense x, \ + const array& stop_status) + + +#define GKO_DECLARE_IDR_COMPUTE_OMEGA_KERNEL(ValueType) \ void compute_omega( \ std::shared_ptr exec, const size_type nrhs, \ - const remove_complex<_type> kappa, const matrix::Dense<_type>* tht, \ - const matrix::Dense>* residual_norm, \ - matrix::Dense<_type>* omega, \ - const array* stop_status) + const remove_complex kappa, \ + matrix::view::dense tht, \ + matrix::view::dense> residual_norm, \ + matrix::view::dense omega, \ + const array& stop_status) #define GKO_DECLARE_ALL_AS_TEMPLATES \ diff --git a/core/solver/ir.cpp b/core/solver/ir.cpp index 62f618f1d7a..cfe0fa8fd1c 100644 --- a/core/solver/ir.cpp +++ b/core/solver/ir.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -207,7 +207,7 @@ void Ir::apply_dense_impl(const VectorType* dense_b, auto& stop_status = this->template create_workspace_array( ws::stop, dense_b->get_size()[1]); - exec->run(ir::make_initialize(&stop_status)); + exec->run(ir::make_initialize(stop_status)); if (guess != initial_guess_mode::zero) { residual->copy_from(dense_b); this->get_system_matrix()->apply(neg_one_op, dense_x, one_op, residual); @@ -335,8 +335,8 @@ std::vector workspace_traits>::vectors(const Solver&) } -#define GKO_DECLARE_IR(_type) class Ir<_type> -#define GKO_DECLARE_IR_TRAITS(_type) struct workspace_traits> +#define GKO_DECLARE_IR(ValueType) class Ir +#define GKO_DECLARE_IR_TRAITS(ValueType) struct workspace_traits> GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_IR); GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_IR_TRAITS); diff --git a/core/solver/ir_kernels.hpp b/core/solver/ir_kernels.hpp index a411c9f375d..0aa0e3f76a2 100644 --- a/core/solver/ir_kernels.hpp +++ b/core/solver/ir_kernels.hpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -23,7 +23,7 @@ namespace ir { #define GKO_DECLARE_IR_INITIALIZE_KERNEL \ void initialize(std::shared_ptr exec, \ - array* stop_status) + array& stop_status) #define GKO_DECLARE_ALL_AS_TEMPLATES GKO_DECLARE_IR_INITIALIZE_KERNEL diff --git a/core/solver/lower_trs.cpp b/core/solver/lower_trs.cpp index 3649e1201c7..cf3fd3752aa 100644 --- a/core/solver/lower_trs.cpp +++ b/core/solver/lower_trs.cpp @@ -1,7 +1,8 @@ -// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause +#include #include #include @@ -160,6 +161,7 @@ void LowerTrs::apply_impl(const LinOp* b, LinOp* x) const // pointers. Vector* trans_b{}; Vector* trans_x{}; + using optional_view = std::optional>; if (needs_transpose(exec)) { trans_b = this->template create_workspace_op( ws::transposed_b, gko::transpose(dense_b->get_size())); @@ -169,7 +171,11 @@ void LowerTrs::apply_impl(const LinOp* b, LinOp* x) const exec->run(lower_trs::make_solve( this->get_system_matrix().get(), this->solve_struct_.get(), this->get_parameters().unit_diagonal, parameters_.algorithm, - trans_b, trans_x, dense_b, dense_x)); + trans_b ? optional_view{trans_b->get_device_view()} + : optional_view{}, + trans_x ? optional_view{trans_x->get_device_view()} + : optional_view{}, + dense_b->get_const_device_view(), dense_x->get_device_view())); }, b, x); } @@ -248,9 +254,10 @@ std::vector workspace_traits>::vectors( } -#define GKO_DECLARE_LOWER_TRS(_vtype, _itype) class LowerTrs<_vtype, _itype> -#define GKO_DECLARE_LOWER_TRS_TRAITS(_vtype, _itype) \ - struct workspace_traits> +#define GKO_DECLARE_LOWER_TRS(ValueType, IndexType) \ + class LowerTrs +#define GKO_DECLARE_LOWER_TRS_TRAITS(ValueType, IndexType) \ + struct workspace_traits> GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_LOWER_TRS); GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_LOWER_TRS_TRAITS); diff --git a/core/solver/lower_trs_kernels.hpp b/core/solver/lower_trs_kernels.hpp index 8bb0031e801..fc74393587e 100644 --- a/core/solver/lower_trs_kernels.hpp +++ b/core/solver/lower_trs_kernels.hpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -7,6 +7,7 @@ #include +#include #include #include @@ -26,21 +27,23 @@ namespace lower_trs { bool& do_transpose) -#define GKO_DECLARE_LOWER_TRS_GENERATE_KERNEL(_vtype, _itype) \ +#define GKO_DECLARE_LOWER_TRS_GENERATE_KERNEL(ValueType, IndexType) \ void generate(std::shared_ptr exec, \ - const matrix::Csr<_vtype, _itype>* matrix, \ + const matrix::Csr* matrix, \ std::shared_ptr& solve_struct, \ bool unit_diag, const solver::trisolve_algorithm algorithm, \ const size_type num_rhs) -#define GKO_DECLARE_LOWER_TRS_SOLVE_KERNEL(_vtype, _itype) \ - void solve(std::shared_ptr exec, \ - const matrix::Csr<_vtype, _itype>* matrix, \ - const solver::SolveStruct* solve_struct, bool unit_diag, \ - const solver::trisolve_algorithm algorithm, \ - matrix::Dense<_vtype>* trans_b, matrix::Dense<_vtype>* trans_x, \ - const matrix::Dense<_vtype>* b, matrix::Dense<_vtype>* x) +#define GKO_DECLARE_LOWER_TRS_SOLVE_KERNEL(ValueType, IndexType) \ + void solve(std::shared_ptr exec, \ + const matrix::Csr* matrix, \ + const solver::SolveStruct* solve_struct, bool unit_diag, \ + const solver::trisolve_algorithm algorithm, \ + std::optional> trans_b, \ + std::optional> trans_x, \ + matrix::view::dense b, \ + matrix::view::dense x) #define GKO_DECLARE_ALL_AS_TEMPLATES \ diff --git a/core/solver/minres.cpp b/core/solver/minres.cpp index 80ed81e1ee4..5d41d6569e0 100644 --- a/core/solver/minres.cpp +++ b/core/solver/minres.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -177,15 +177,22 @@ void Minres::apply_dense_impl(const VectorType* dense_b, // z = z / beta // p = p_prev = q_prev = v = 0 exec->run(minres::make_initialize( - gko::detail::get_local(r), gko::detail::get_local(z), - gko::detail::get_local(p), gko::detail::get_local(p_prev), - gko::detail::get_local(q), gko::detail::get_local(q_prev), - gko::detail::get_local(v), gko::detail::get_local(beta), - gko::detail::get_local(gamma), gko::detail::get_local(delta), - gko::detail::get_local(cos_prev), gko::detail::get_local(cos), - gko::detail::get_local(sin_prev), gko::detail::get_local(sin), - gko::detail::get_local(eta_next), gko::detail::get_local(eta), - &stop_status)); + gko::detail::get_local(r)->get_const_device_view(), + gko::detail::get_local(z)->get_device_view(), + gko::detail::get_local(p)->get_device_view(), + gko::detail::get_local(p_prev)->get_device_view(), + gko::detail::get_local(q)->get_device_view(), + gko::detail::get_local(q_prev)->get_device_view(), + gko::detail::get_local(v)->get_device_view(), + gko::detail::get_local(beta)->get_device_view(), + gko::detail::get_local(gamma)->get_device_view(), + gko::detail::get_local(delta)->get_device_view(), + gko::detail::get_local(cos_prev)->get_device_view(), + gko::detail::get_local(cos)->get_device_view(), + gko::detail::get_local(sin_prev)->get_device_view(), + gko::detail::get_local(sin)->get_device_view(), + gko::detail::get_local(eta_next)->get_device_view(), + gko::detail::get_local(eta)->get_device_view(), stop_status)); int iter = -1; /* Memory movement summary: @@ -248,12 +255,17 @@ void Minres::apply_dense_impl(const VectorType* dense_b, // update the squared residual norm approximation: // tau = abs(sin)^2 * tau exec->run(minres::make_step_1( - gko::detail::get_local(alpha), gko::detail::get_local(beta), - gko::detail::get_local(gamma), gko::detail::get_local(delta), - gko::detail::get_local(cos_prev), gko::detail::get_local(cos), - gko::detail::get_local(sin_prev), gko::detail::get_local(sin), - gko::detail::get_local(eta), gko::detail::get_local(eta_next), - gko::detail::get_local(tau), &stop_status)); + gko::detail::get_local(alpha)->get_device_view(), + gko::detail::get_local(beta)->get_device_view(), + gko::detail::get_local(gamma)->get_device_view(), + gko::detail::get_local(delta)->get_device_view(), + gko::detail::get_local(cos_prev)->get_device_view(), + gko::detail::get_local(cos)->get_device_view(), + gko::detail::get_local(sin_prev)->get_device_view(), + gko::detail::get_local(sin)->get_device_view(), + gko::detail::get_local(eta)->get_device_view(), + gko::detail::get_local(eta_next)->get_device_view(), + gko::detail::get_local(tau)->get_device_view(), stop_status)); // update vectors @@ -273,14 +285,20 @@ void Minres::apply_dense_impl(const VectorType* dense_b, // gamma = beta swap(p, p_prev); exec->run(minres::make_step_2( - gko::detail::get_local(dense_x), gko::detail::get_local(p), - gko::detail::get_local(p_prev), gko::detail::get_local(z), - gko::detail::get_local(z_tilde), gko::detail::get_local(q), - gko::detail::get_local(q_prev), gko::detail::get_local(v), - gko::detail::get_local(alpha), gko::detail::get_local(beta), - gko::detail::get_local(gamma), gko::detail::get_local(delta), - gko::detail::get_local(cos), gko::detail::get_local(eta), - &stop_status)); + gko::detail::get_local(dense_x)->get_device_view(), + gko::detail::get_local(p)->get_device_view(), + gko::detail::get_local(p_prev)->get_const_device_view(), + gko::detail::get_local(z)->get_device_view(), + gko::detail::get_local(z_tilde)->get_const_device_view(), + gko::detail::get_local(q)->get_device_view(), + gko::detail::get_local(q_prev)->get_device_view(), + gko::detail::get_local(v)->get_device_view(), + gko::detail::get_local(alpha)->get_const_device_view(), + gko::detail::get_local(beta)->get_const_device_view(), + gko::detail::get_local(gamma)->get_const_device_view(), + gko::detail::get_local(delta)->get_const_device_view(), + gko::detail::get_local(cos)->get_const_device_view(), + gko::detail::get_local(eta)->get_const_device_view(), stop_status)); swap(gamma, beta); } } @@ -369,8 +387,9 @@ std::vector workspace_traits>::vectors(const Solver&) } -#define GKO_DECLARE_MINRES(_type) class Minres<_type> -#define GKO_DECLARE_MINRES_TRAITS(_type) struct workspace_traits> +#define GKO_DECLARE_MINRES(ValueType) class Minres +#define GKO_DECLARE_MINRES_TRAITS(ValueType) \ + struct workspace_traits> GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_MINRES); GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_MINRES_TRAITS); diff --git a/core/solver/minres_kernels.hpp b/core/solver/minres_kernels.hpp index 0642759216b..f62ade1db17 100644 --- a/core/solver/minres_kernels.hpp +++ b/core/solver/minres_kernels.hpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -22,40 +22,59 @@ namespace kernels { namespace minres { -#define GKO_DECLARE_MINRES_INITIALIZE_KERNEL(_type) \ - void initialize(std::shared_ptr exec, \ - const matrix::Dense<_type>* r, matrix::Dense<_type>* z, \ - matrix::Dense<_type>* p, matrix::Dense<_type>* p_prev, \ - matrix::Dense<_type>* q, matrix::Dense<_type>* q_prev, \ - matrix::Dense<_type>* q_tilde, matrix::Dense<_type>* beta, \ - matrix::Dense<_type>* gamma, matrix::Dense<_type>* delta, \ - matrix::Dense<_type>* cos_prev, matrix::Dense<_type>* cos, \ - matrix::Dense<_type>* sin_prev, matrix::Dense<_type>* sin, \ - matrix::Dense<_type>* eta_next, matrix::Dense<_type>* eta, \ - array* stop_status) - - -#define GKO_DECLARE_MINRES_STEP_1_KERNEL(_type) \ - void step_1(std::shared_ptr exec, \ - matrix::Dense<_type>* alpha, matrix::Dense<_type>* beta, \ - matrix::Dense<_type>* gamma, matrix::Dense<_type>* delta, \ - matrix::Dense<_type>* cos_prev, matrix::Dense<_type>* cos, \ - matrix::Dense<_type>* sin_prev, matrix::Dense<_type>* sin, \ - matrix::Dense<_type>* eta, matrix::Dense<_type>* eta_next, \ - matrix::Dense<_type>* tau, \ - const array* stop_status) - -#define GKO_DECLARE_MINRES_STEP_2_KERNEL(_type) \ - void step_2( \ - std::shared_ptr exec, matrix::Dense<_type>* x, \ - matrix::Dense<_type>* p, const matrix::Dense<_type>* p_prev, \ - matrix::Dense<_type>* z, const matrix::Dense<_type>* z_tilde, \ - matrix::Dense<_type>* q, matrix::Dense<_type>* q_prev, \ - matrix::Dense<_type>* v, const matrix::Dense<_type>* alpha, \ - const matrix::Dense<_type>* beta, const matrix::Dense<_type>* gamma, \ - const matrix::Dense<_type>* delta, const matrix::Dense<_type>* cos, \ - const matrix::Dense<_type>* eta, \ - const array* stop_status) +#define GKO_DECLARE_MINRES_INITIALIZE_KERNEL(ValueType) \ + void initialize(std::shared_ptr exec, \ + matrix::view::dense r, \ + matrix::view::dense z, \ + matrix::view::dense p, \ + matrix::view::dense p_prev, \ + matrix::view::dense q, \ + matrix::view::dense q_prev, \ + matrix::view::dense q_tilde, \ + matrix::view::dense beta, \ + matrix::view::dense gamma, \ + matrix::view::dense delta, \ + matrix::view::dense cos_prev, \ + matrix::view::dense cos, \ + matrix::view::dense sin_prev, \ + matrix::view::dense sin, \ + matrix::view::dense eta_next, \ + matrix::view::dense eta, \ + array& stop_status) + + +#define GKO_DECLARE_MINRES_STEP_1_KERNEL(ValueType) \ + void step_1(std::shared_ptr exec, \ + matrix::view::dense alpha, \ + matrix::view::dense beta, \ + matrix::view::dense gamma, \ + matrix::view::dense delta, \ + matrix::view::dense cos_prev, \ + matrix::view::dense cos, \ + matrix::view::dense sin_prev, \ + matrix::view::dense sin, \ + matrix::view::dense eta, \ + matrix::view::dense eta_next, \ + matrix::view::dense tau, \ + const array& stop_status) + +#define GKO_DECLARE_MINRES_STEP_2_KERNEL(ValueType) \ + void step_2(std::shared_ptr exec, \ + matrix::view::dense x, \ + matrix::view::dense p, \ + matrix::view::dense p_prev, \ + matrix::view::dense z, \ + matrix::view::dense z_tilde, \ + matrix::view::dense q, \ + matrix::view::dense q_prev, \ + matrix::view::dense v, \ + matrix::view::dense alpha, \ + matrix::view::dense beta, \ + matrix::view::dense gamma, \ + matrix::view::dense delta, \ + matrix::view::dense cos, \ + matrix::view::dense eta, \ + const array& stop_status) #define GKO_DECLARE_ALL_AS_TEMPLATES \ diff --git a/core/solver/multigrid.cpp b/core/solver/multigrid.cpp index fb7988f1057..de64579cc2b 100644 --- a/core/solver/multigrid.cpp +++ b/core/solver/multigrid.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -970,7 +970,7 @@ void Multigrid::apply_dense_impl(const VectorType* b, VectorType* x, this->template create_workspace_array( ws::stop, b->get_size()[1]); bool one_changed{}; - exec->run(multigrid::make_initialize(&stop_status)); + exec->run(multigrid::make_initialize(stop_status)); auto stop_criterion = this->get_stop_criterion_factory()->generate( this->get_system_matrix(), std::shared_ptr(b, null_deleter{}), x, diff --git a/core/solver/multigrid_kernels.hpp b/core/solver/multigrid_kernels.hpp index 73c660cbefb..7cd18b956e8 100644 --- a/core/solver/multigrid_kernels.hpp +++ b/core/solver/multigrid_kernels.hpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -20,26 +20,30 @@ namespace kernels { namespace multigrid { -#define GKO_DECLARE_MULTIGRID_KCYCLE_STEP_1_KERNEL(_type) \ - void kcycle_step_1(std::shared_ptr exec, \ - const matrix::Dense<_type>* alpha, \ - const matrix::Dense<_type>* rho, \ - const matrix::Dense<_type>* v, matrix::Dense<_type>* g, \ - matrix::Dense<_type>* d, matrix::Dense<_type>* e) - -#define GKO_DECLARE_MULTIGRID_KCYCLE_STEP_2_KERNEL(_type) \ - void kcycle_step_2( \ - std::shared_ptr exec, \ - const matrix::Dense<_type>* alpha, const matrix::Dense<_type>* rho, \ - const matrix::Dense<_type>* gamma, const matrix::Dense<_type>* beta, \ - const matrix::Dense<_type>* zeta, const matrix::Dense<_type>* d, \ - matrix::Dense<_type>* e) - -#define GKO_DECLARE_MULTIGRID_KCYCLE_CHECK_STOP_KERNEL(_type) \ - void kcycle_check_stop(std::shared_ptr exec, \ - const matrix::Dense<_type>* old_norm, \ - const matrix::Dense<_type>* new_norm, \ - const _type rel_tol, bool& is_stop) +#define GKO_DECLARE_MULTIGRID_KCYCLE_STEP_1_KERNEL(ValueType) \ + void kcycle_step_1(std::shared_ptr exec, \ + matrix::view::dense alpha, \ + matrix::view::dense rho, \ + matrix::view::dense v, \ + matrix::view::dense g, \ + matrix::view::dense d, \ + matrix::view::dense e) + +#define GKO_DECLARE_MULTIGRID_KCYCLE_STEP_2_KERNEL(ValueType) \ + void kcycle_step_2(std::shared_ptr exec, \ + matrix::view::dense alpha, \ + matrix::view::dense rho, \ + matrix::view::dense gamma, \ + matrix::view::dense beta, \ + matrix::view::dense zeta, \ + matrix::view::dense d, \ + matrix::view::dense e) + +#define GKO_DECLARE_MULTIGRID_KCYCLE_CHECK_STOP_KERNEL(ValueType) \ + void kcycle_check_stop(std::shared_ptr exec, \ + matrix::view::dense old_norm, \ + matrix::view::dense new_norm, \ + const ValueType rel_tol, bool& is_stop) #define GKO_DECLARE_ALL_AS_TEMPLATES \ diff --git a/core/solver/pipe_cg.cpp b/core/solver/pipe_cg.cpp index 0a63ff41fdb..51fb63d67dd 100644 --- a/core/solver/pipe_cg.cpp +++ b/core/solver/pipe_cg.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2025 The Ginkgo authors +// SPDX-FileCopyrightText: 2025 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -175,9 +175,10 @@ void PipeCg::apply_dense_impl(const VectorType* dense_b, // r = b // prev_rho = 1.0 - exec->run(pipe_cg::make_initialize_1(gko::detail::get_local(dense_b), - gko::detail::get_local(r), prev_rho, - &stop_status)); + exec->run(pipe_cg::make_initialize_1( + gko::detail::get_local(dense_b)->get_const_device_view(), + gko::detail::get_local(r)->get_device_view(), + prev_rho->get_device_view(), stop_status)); // r = r - Ax this->get_system_matrix()->apply(neg_one_op, dense_x, one_op, r); // z = preconditioner * r @@ -220,10 +221,15 @@ void PipeCg::apply_dense_impl(const VectorType* dense_b, // f = m // g = n exec->run(pipe_cg::make_initialize_2( - gko::detail::get_local(p), gko::detail::get_local(q), - gko::detail::get_local(f), gko::detail::get_local(g), beta, - gko::detail::get_local(z1), gko::detail::get_local(w), - gko::detail::get_local(m), gko::detail::get_local(n), delta)); + gko::detail::get_local(p)->get_device_view(), + gko::detail::get_local(q)->get_device_view(), + gko::detail::get_local(f)->get_device_view(), + gko::detail::get_local(g)->get_device_view(), beta->get_device_view(), + gko::detail::get_local(z1)->get_const_device_view(), + gko::detail::get_local(w)->get_const_device_view(), + gko::detail::get_local(m)->get_const_device_view(), + gko::detail::get_local(n)->get_const_device_view(), + delta->get_const_device_view())); /* Memory movement summary: TODO @@ -237,11 +243,17 @@ void PipeCg::apply_dense_impl(const VectorType* dense_b, // it's the only place where z is updated so we updated both z1 and z2 // here exec->run(pipe_cg::make_step_1( - gko::detail::get_local(dense_x), gko::detail::get_local(r), - gko::detail::get_local(z1), gko::detail::get_local(z2), - gko::detail::get_local(w), gko::detail::get_local(p), - gko::detail::get_local(q), gko::detail::get_local(f), - gko::detail::get_local(g), rho, beta, &stop_status)); + gko::detail::get_local(dense_x)->get_device_view(), + gko::detail::get_local(r)->get_device_view(), + gko::detail::get_local(z1)->get_device_view(), + gko::detail::get_local(z2)->get_device_view(), + gko::detail::get_local(w)->get_device_view(), + gko::detail::get_local(p)->get_const_device_view(), + gko::detail::get_local(q)->get_const_device_view(), + gko::detail::get_local(f)->get_const_device_view(), + gko::detail::get_local(g)->get_const_device_view(), + rho->get_const_device_view(), beta->get_const_device_view(), + stop_status)); // m = preconditioner * w this->get_preconditioner()->apply(w, m); @@ -276,11 +288,17 @@ void PipeCg::apply_dense_impl(const VectorType* dense_b, // f = m + tmp * f // g = n + tmp * g exec->run(pipe_cg::make_step_2( - beta, gko::detail::get_local(p), gko::detail::get_local(q), - gko::detail::get_local(f), gko::detail::get_local(g), - gko::detail::get_local(z1), gko::detail::get_local(w), - gko::detail::get_local(m), gko::detail::get_local(n), prev_rho, rho, - delta, &stop_status)); + beta->get_device_view(), + gko::detail::get_local(p)->get_device_view(), + gko::detail::get_local(q)->get_device_view(), + gko::detail::get_local(f)->get_device_view(), + gko::detail::get_local(g)->get_device_view(), + gko::detail::get_local(z1)->get_const_device_view(), + gko::detail::get_local(w)->get_const_device_view(), + gko::detail::get_local(m)->get_const_device_view(), + gko::detail::get_local(n)->get_const_device_view(), + prev_rho->get_const_device_view(), rho->get_const_device_view(), + delta->get_const_device_view(), stop_status)); } } @@ -350,8 +368,9 @@ std::vector workspace_traits>::vectors(const Solver&) } -#define GKO_DECLARE_PIPE_CG(_type) class PipeCg<_type> -#define GKO_DECLARE_PIPE_CG_TRAITS(_type) struct workspace_traits> +#define GKO_DECLARE_PIPE_CG(ValueType) class PipeCg +#define GKO_DECLARE_PIPE_CG_TRAITS(ValueType) \ + struct workspace_traits> GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_PIPE_CG); GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_PIPE_CG_TRAITS); diff --git a/core/solver/pipe_cg_kernels.hpp b/core/solver/pipe_cg_kernels.hpp index e107390d79e..41b1e00314b 100644 --- a/core/solver/pipe_cg_kernels.hpp +++ b/core/solver/pipe_cg_kernels.hpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2025 The Ginkgo authors +// SPDX-FileCopyrightText: 2025 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -22,54 +22,66 @@ namespace kernels { namespace pipe_cg { -#define GKO_DECLARE_PIPE_CG_INITIALIZE_1_KERNEL(_type) \ - void initialize_1(std::shared_ptr exec, \ - const matrix::Dense<_type>* b, matrix::Dense<_type>* r, \ - matrix::Dense<_type>* prev_rho, \ - array* stop_status) +#define GKO_DECLARE_PIPE_CG_INITIALIZE_1_KERNEL(ValueType) \ + void initialize_1(std::shared_ptr exec, \ + matrix::view::dense b, \ + matrix::view::dense r, \ + matrix::view::dense prev_rho, \ + array& stop_status) -#define GKO_DECLARE_PIPE_CG_INITIALIZE_2_KERNEL(_type) \ - void initialize_2( \ - std::shared_ptr exec, matrix::Dense<_type>* p, \ - matrix::Dense<_type>* q, matrix::Dense<_type>* f, \ - matrix::Dense<_type>* g, matrix::Dense<_type>* beta, \ - const matrix::Dense<_type>* z, const matrix::Dense<_type>* w, \ - const matrix::Dense<_type>* m, const matrix::Dense<_type>* n, \ - const matrix::Dense<_type>* delta) +#define GKO_DECLARE_PIPE_CG_INITIALIZE_2_KERNEL(ValueType) \ + void initialize_2( \ + std::shared_ptr exec, \ + matrix::view::dense p, matrix::view::dense q, \ + matrix::view::dense f, matrix::view::dense g, \ + matrix::view::dense beta, \ + matrix::view::dense z, \ + matrix::view::dense w, \ + matrix::view::dense m, \ + matrix::view::dense n, \ + matrix::view::dense delta) -#define GKO_DECLARE_PIPE_CG_STEP_1_KERNEL(_type) \ +#define GKO_DECLARE_PIPE_CG_STEP_1_KERNEL(ValueType) \ void step_1( \ - std::shared_ptr exec, matrix::Dense<_type>* x, \ - matrix::Dense<_type>* r, matrix::Dense<_type>* z1, \ - matrix::Dense<_type>* z2, matrix::Dense<_type>* w, \ - const matrix::Dense<_type>* p, const matrix::Dense<_type>* q, \ - const matrix::Dense<_type>* f, const matrix::Dense<_type>* g, \ - const matrix::Dense<_type>* rho, const matrix::Dense<_type>* beta, \ - const array* stop_status) - - -#define GKO_DECLARE_PIPE_CG_STEP_2_KERNEL(_type) \ - void step_2( \ - std::shared_ptr exec, \ - matrix::Dense<_type>* beta, matrix::Dense<_type>* p, \ - matrix::Dense<_type>* q, matrix::Dense<_type>* f, \ - matrix::Dense<_type>* g, const matrix::Dense<_type>* z, \ - const matrix::Dense<_type>* w, const matrix::Dense<_type>* m, \ - const matrix::Dense<_type>* n, const matrix::Dense<_type>* prev_rho, \ - const matrix::Dense<_type>* rho, const matrix::Dense<_type>* delta, \ - const array* stop_status) - - -#define GKO_DECLARE_ALL_AS_TEMPLATES \ - template \ - GKO_DECLARE_PIPE_CG_INITIALIZE_1_KERNEL(_type); \ - template \ - GKO_DECLARE_PIPE_CG_INITIALIZE_2_KERNEL(_type); \ - template \ - GKO_DECLARE_PIPE_CG_STEP_1_KERNEL(_type); \ - template \ - GKO_DECLARE_PIPE_CG_STEP_2_KERNEL(_type) + std::shared_ptr exec, \ + matrix::view::dense x, matrix::view::dense r, \ + matrix::view::dense z1, matrix::view::dense z2, \ + matrix::view::dense w, \ + matrix::view::dense p, \ + matrix::view::dense q, \ + matrix::view::dense f, \ + matrix::view::dense g, \ + matrix::view::dense rho, \ + matrix::view::dense beta, \ + const array& stop_status) + + +#define GKO_DECLARE_PIPE_CG_STEP_2_KERNEL(ValueType) \ + void step_2( \ + std::shared_ptr exec, \ + matrix::view::dense beta, matrix::view::dense p, \ + matrix::view::dense q, matrix::view::dense f, \ + matrix::view::dense g, \ + matrix::view::dense z, \ + matrix::view::dense w, \ + matrix::view::dense m, \ + matrix::view::dense n, \ + matrix::view::dense prev_rho, \ + matrix::view::dense rho, \ + matrix::view::dense delta, \ + const array& stop_status) + + +#define GKO_DECLARE_ALL_AS_TEMPLATES \ + template \ + GKO_DECLARE_PIPE_CG_INITIALIZE_1_KERNEL(ValueType); \ + template \ + GKO_DECLARE_PIPE_CG_INITIALIZE_2_KERNEL(ValueType); \ + template \ + GKO_DECLARE_PIPE_CG_STEP_1_KERNEL(ValueType); \ + template \ + GKO_DECLARE_PIPE_CG_STEP_2_KERNEL(ValueType) } // namespace pipe_cg diff --git a/core/solver/upper_trs.cpp b/core/solver/upper_trs.cpp index 70e37c047bc..bee79b80b90 100644 --- a/core/solver/upper_trs.cpp +++ b/core/solver/upper_trs.cpp @@ -1,7 +1,8 @@ -// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause +#include #include #include @@ -161,6 +162,7 @@ void UpperTrs::apply_impl(const LinOp* b, LinOp* x) const // pointers. Vector* trans_b{}; Vector* trans_x{}; + using optional_view = std::optional>; if (needs_transpose(exec)) { trans_b = this->template create_workspace_op( ws::transposed_b, gko::transpose(dense_b->get_size())); @@ -170,7 +172,11 @@ void UpperTrs::apply_impl(const LinOp* b, LinOp* x) const exec->run(upper_trs::make_solve( this->get_system_matrix().get(), this->solve_struct_.get(), this->get_parameters().unit_diagonal, parameters_.algorithm, - trans_b, trans_x, dense_b, dense_x)); + trans_b ? optional_view{trans_b->get_device_view()} + : optional_view{}, + trans_x ? optional_view{trans_x->get_device_view()} + : optional_view{}, + dense_b->get_const_device_view(), dense_x->get_device_view())); }, b, x); } @@ -249,9 +255,10 @@ std::vector workspace_traits>::vectors( } -#define GKO_DECLARE_UPPER_TRS(_vtype, _itype) class UpperTrs<_vtype, _itype> -#define GKO_DECLARE_UPPER_TRS_TRAITS(_vtype, _itype) \ - struct workspace_traits> +#define GKO_DECLARE_UPPER_TRS(ValueType, IndexType) \ + class UpperTrs +#define GKO_DECLARE_UPPER_TRS_TRAITS(ValueType, IndexType) \ + struct workspace_traits> GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_UPPER_TRS); GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_UPPER_TRS_TRAITS); diff --git a/core/solver/upper_trs_kernels.hpp b/core/solver/upper_trs_kernels.hpp index d409aa7db09..7f3fcdd05b4 100644 --- a/core/solver/upper_trs_kernels.hpp +++ b/core/solver/upper_trs_kernels.hpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -7,6 +7,7 @@ #include +#include #include #include @@ -26,21 +27,23 @@ namespace upper_trs { bool& do_transpose) -#define GKO_DECLARE_UPPER_TRS_GENERATE_KERNEL(_vtype, _itype) \ +#define GKO_DECLARE_UPPER_TRS_GENERATE_KERNEL(ValueType, IndexType) \ void generate(std::shared_ptr exec, \ - const matrix::Csr<_vtype, _itype>* matrix, \ + const matrix::Csr* matrix, \ std::shared_ptr& solve_struct, \ bool unit_diag, const solver::trisolve_algorithm algorithm, \ const size_type num_rhs) -#define GKO_DECLARE_UPPER_TRS_SOLVE_KERNEL(_vtype, _itype) \ - void solve(std::shared_ptr exec, \ - const matrix::Csr<_vtype, _itype>* matrix, \ - const solver::SolveStruct* solve_struct, bool unit_diag, \ - const solver::trisolve_algorithm algorithm, \ - matrix::Dense<_vtype>* trans_b, matrix::Dense<_vtype>* trans_x, \ - const matrix::Dense<_vtype>* b, matrix::Dense<_vtype>* x) +#define GKO_DECLARE_UPPER_TRS_SOLVE_KERNEL(ValueType, IndexType) \ + void solve(std::shared_ptr exec, \ + const matrix::Csr* matrix, \ + const solver::SolveStruct* solve_struct, bool unit_diag, \ + const solver::trisolve_algorithm algorithm, \ + std::optional> trans_b, \ + std::optional> trans_x, \ + matrix::view::dense b, \ + matrix::view::dense x) #define GKO_DECLARE_ALL_AS_TEMPLATES \ diff --git a/core/stop/criterion.cpp b/core/stop/criterion.cpp index 1684e4bdeab..df033a0ab2c 100644 --- a/core/stop/criterion.cpp +++ b/core/stop/criterion.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -24,7 +24,7 @@ void Criterion::set_all_statuses(uint8 stoppingId, bool setFinalized, array* stop_status) { this->get_executor()->run(criterion::make_set_all_statuses( - stoppingId, setFinalized, stop_status)); + stoppingId, setFinalized, *stop_status)); } diff --git a/core/stop/criterion_kernels.hpp b/core/stop/criterion_kernels.hpp index 62e4135ee37..300bdc21e36 100644 --- a/core/stop/criterion_kernels.hpp +++ b/core/stop/criterion_kernels.hpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -21,7 +21,7 @@ namespace set_all_statuses { #define GKO_DECLARE_SET_ALL_STATUSES_KERNEL \ void set_all_statuses(std::shared_ptr exec, \ uint8 stoppingId, bool setFinalized, \ - array* stop_status) + array& stop_status) } // namespace set_all_statuses diff --git a/core/stop/residual_norm.cpp b/core/stop/residual_norm.cpp index 05fb60530f1..011d673668f 100644 --- a/core/stop/residual_norm.cpp +++ b/core/stop/residual_norm.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -197,9 +197,10 @@ bool ResidualNormBase::check_impl( bool all_converged = true; this->get_executor()->run(residual_norm::make_residual_norm( - dense_tau, starting_tau_.get(), reduction_factor_, stopping_id, - set_finalized, stop_status, &device_storage_, &all_converged, - one_changed)); + dense_tau->get_const_device_view(), + starting_tau_->get_const_device_view(), reduction_factor_, stopping_id, + set_finalized, *stop_status, device_storage_, all_converged, + *one_changed)); return all_converged; } @@ -220,20 +221,21 @@ bool ImplicitResidualNorm::check_impl( this->get_executor()->run( implicit_residual_norm::make_implicit_residual_norm( - dense_tau, this->starting_tau_.get(), this->reduction_factor_, - stopping_id, set_finalized, stop_status, &this->device_storage_, - &all_converged, one_changed)); + dense_tau->get_const_device_view(), + this->starting_tau_->get_const_device_view(), + this->reduction_factor_, stopping_id, set_finalized, *stop_status, + this->device_storage_, all_converged, *one_changed)); return all_converged; } -#define GKO_DECLARE_RESIDUAL_NORM(_type) class ResidualNormBase<_type> +#define GKO_DECLARE_RESIDUAL_NORM(ValueType) class ResidualNormBase GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_RESIDUAL_NORM); -#define GKO_DECLARE_IMPLICIT_RESIDUAL_NORM(_type) \ - class ImplicitResidualNorm<_type> +#define GKO_DECLARE_IMPLICIT_RESIDUAL_NORM(ValueType) \ + class ImplicitResidualNorm GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_IMPLICIT_RESIDUAL_NORM); class ResidualNormFactory; diff --git a/core/stop/residual_norm_kernels.hpp b/core/stop/residual_norm_kernels.hpp index 7625dadefeb..37e76156564 100644 --- a/core/stop/residual_norm_kernels.hpp +++ b/core/stop/residual_norm_kernels.hpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -18,13 +18,14 @@ namespace kernels { namespace residual_norm { -#define GKO_DECLARE_RESIDUAL_NORM_KERNEL(_type) \ - void residual_norm( \ - std::shared_ptr exec, \ - const matrix::Dense<_type>* tau, const matrix::Dense<_type>* orig_tau, \ - _type rel_residual_goal, uint8 stoppingId, bool setFinalized, \ - array* stop_status, array* device_storage, \ - bool* all_converged, bool* one_changed) +#define GKO_DECLARE_RESIDUAL_NORM_KERNEL(ValueType) \ + void residual_norm(std::shared_ptr exec, \ + matrix::view::dense tau, \ + matrix::view::dense orig_tau, \ + ValueType rel_residual_goal, uint8 stoppingId, \ + bool setFinalized, array& stop_status, \ + array& device_storage, bool& all_converged, \ + bool& one_changed) #define GKO_DECLARE_ALL_AS_TEMPLATES \ @@ -38,14 +39,14 @@ namespace residual_norm { namespace implicit_residual_norm { -#define GKO_DECLARE_IMPLICIT_RESIDUAL_NORM_KERNEL(_type) \ - void implicit_residual_norm( \ - std::shared_ptr exec, \ - const matrix::Dense<_type>* tau, \ - const matrix::Dense>* orig_tau, \ - remove_complex<_type> rel_residual_goal, uint8 stoppingId, \ - bool setFinalized, array* stop_status, \ - array* device_storage, bool* all_converged, bool* one_changed) +#define GKO_DECLARE_IMPLICIT_RESIDUAL_NORM_KERNEL(ValueType) \ + void implicit_residual_norm( \ + std::shared_ptr exec, \ + matrix::view::dense tau, \ + matrix::view::dense> orig_tau, \ + remove_complex rel_residual_goal, uint8 stoppingId, \ + bool setFinalized, array& stop_status, \ + array& device_storage, bool& all_converged, bool& one_changed) #define GKO_DECLARE_ALL_AS_TEMPLATES2 \ diff --git a/core/test/matrix/dense.cpp b/core/test/matrix/dense.cpp index e0098e87217..2515713b92b 100644 --- a/core/test/matrix/dense.cpp +++ b/core/test/matrix/dense.cpp @@ -312,7 +312,7 @@ TYPED_TEST(Dense, CanCreateDeviceView) EXPECT_EQ(view.size, this->mtx->get_size()); EXPECT_EQ(view.stride, this->mtx->get_stride()); - EXPECT_EQ(view.data, this->mtx->get_values()); + EXPECT_EQ(view.values, this->mtx->get_values()); } @@ -322,7 +322,7 @@ TYPED_TEST(Dense, CanCreateConstDeviceView) EXPECT_EQ(view.size, this->mtx->get_size()); EXPECT_EQ(view.stride, this->mtx->get_stride()); - EXPECT_EQ(view.data, this->mtx->get_values()); + EXPECT_EQ(view.values, this->mtx->get_values()); } diff --git a/core/test/matrix/device_views.cpp b/core/test/matrix/device_views.cpp index bcc77e3c6f3..47508fc853b 100644 --- a/core/test/matrix/device_views.cpp +++ b/core/test/matrix/device_views.cpp @@ -24,11 +24,11 @@ TYPED_TEST(DenseView, AccessWorks) ASSERT_EQ(view.size, gko::dim<2>(1, 2)); ASSERT_EQ(view.stride, 3); - ASSERT_EQ(view.data, values.data()); + ASSERT_EQ(view.values, values.data()); ASSERT_EQ(&view(0, 0), &values[0]); ASSERT_EQ(&view(1, 0), &values[3]); ASSERT_EQ(&view(1, 1), &values[4]); ASSERT_EQ(const_view.size, view.size); ASSERT_EQ(const_view.stride, view.stride); - ASSERT_EQ(const_view.data, view.data); + ASSERT_EQ(const_view.values, view.values); } diff --git a/cuda/matrix/fft_kernels.cu b/cuda/matrix/fft_kernels.cu index 23105f3e7ae..04ebd27a976 100644 --- a/cuda/matrix/fft_kernels.cu +++ b/cuda/matrix/fft_kernels.cu @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -109,15 +109,14 @@ private: template void fft(std::shared_ptr exec, - const matrix::Dense>* b, - matrix::Dense>* x, bool inverse, + matrix::view::dense> b, + matrix::view::dense> x, bool inverse, array& buffer) { cufft_handle handle{exec->get_stream()}; handle.template setup<1, std::complex, std::complex>( - {b->get_size()[0]}, b->get_stride(), x->get_stride(), b->get_size()[1], - buffer); - handle.execute(b->get_const_values(), x->get_values(), inverse); + {b.size[0]}, b.stride, x.stride, b.size[1], buffer); + handle.execute(b.values, x.values, inverse); } GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_TYPE_BASE(GKO_DECLARE_FFT_KERNEL); @@ -125,15 +124,14 @@ GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_TYPE_BASE(GKO_DECLARE_FFT_KERNEL); template void fft2(std::shared_ptr exec, - const matrix::Dense>* b, - matrix::Dense>* x, size_type size1, + matrix::view::dense> b, + matrix::view::dense> x, size_type size1, size_type size2, bool inverse, array& buffer) { cufft_handle handle{exec->get_stream()}; handle.template setup<2, std::complex, std::complex>( - {size1, size2}, b->get_stride(), x->get_stride(), b->get_size()[1], - buffer); - handle.execute(b->get_const_values(), x->get_values(), inverse); + {size1, size2}, b.stride, x.stride, b.size[1], buffer); + handle.execute(b.values, x.values, inverse); } GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_TYPE_BASE(GKO_DECLARE_FFT2_KERNEL); @@ -141,15 +139,14 @@ GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_TYPE_BASE(GKO_DECLARE_FFT2_KERNEL); template void fft3(std::shared_ptr exec, - const matrix::Dense>* b, - matrix::Dense>* x, size_type size1, + matrix::view::dense> b, + matrix::view::dense> x, size_type size1, size_type size2, size_type size3, bool inverse, array& buffer) { cufft_handle handle{exec->get_stream()}; handle.template setup<3, std::complex, std::complex>( - {size1, size2, size3}, b->get_stride(), x->get_stride(), - b->get_size()[1], buffer); - handle.execute(b->get_const_values(), x->get_values(), inverse); + {size1, size2, size3}, b.stride, x.stride, b.size[1], buffer); + handle.execute(b.values, x.values, inverse); } GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_TYPE_BASE(GKO_DECLARE_FFT3_KERNEL); diff --git a/cuda/solver/common_trs_kernels.cuh b/cuda/solver/common_trs_kernels.cuh index c9cba2d889a..367f5d78348 100644 --- a/cuda/solver/common_trs_kernels.cuh +++ b/cuda/solver/common_trs_kernels.cuh @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -117,16 +117,15 @@ struct CudaSolveStruct : gko::solver::SolveStruct { } void solve(const matrix::Csr*, - const matrix::Dense* input, - matrix::Dense* output, matrix::Dense*, - matrix::Dense*) const + matrix::view::dense input, + matrix::view::dense output) const { - if (input->get_size()[1] != num_rhs) { + if (input.size[1] != num_rhs) { throw gko::ValueMismatch{ __FILE__, __LINE__, __FUNCTION__, - input->get_size()[1], + input.size[1], num_rhs, "the dimensions of the multivector do not match the value " "provided at generation time. Check the value specified in " @@ -134,10 +133,9 @@ struct CudaSolveStruct : gko::solver::SolveStruct { } sparselib::pointer_mode_guard pm_guard(handle); auto descr_b = sparselib::create_dnmat( - input->get_size(), input->get_stride(), - const_cast(input->get_const_values())); - auto descr_c = sparselib::create_dnmat( - output->get_size(), output->get_stride(), output->get_values()); + input.size, input.stride, const_cast(input.values)); + auto descr_c = + sparselib::create_dnmat(output.size, output.stride, output.values); sparselib::spsm_solve(handle, SPARSELIB_OPERATION_NON_TRANSPOSE, SPARSELIB_OPERATION_NON_TRANSPOSE, @@ -238,16 +236,15 @@ struct CudaSolveStruct : gko::solver::SolveStruct { } void solve(const matrix::Csr* matrix, - const matrix::Dense* input, - matrix::Dense* output, matrix::Dense*, - matrix::Dense*) const + matrix::view::dense input, + matrix::view::dense output) const { - if (input->get_size()[1] != num_rhs) { + if (input.size[1] != num_rhs) { throw gko::ValueMismatch{ __FILE__, __LINE__, __FUNCTION__, - input->get_size()[1], + input.size[1], num_rhs, "the dimensions of the multivector do not match the value " "provided at generation time. Check the value specified in " @@ -257,12 +254,11 @@ struct CudaSolveStruct : gko::solver::SolveStruct { dense::copy(exec, input, output); sparselib::csrsm2_solve( handle, algorithm, SPARSELIB_OPERATION_NON_TRANSPOSE, - SPARSELIB_OPERATION_TRANSPOSE, matrix->get_size()[0], - output->get_stride(), matrix->get_num_stored_elements(), - one(), factor_descr, matrix->get_const_values(), - matrix->get_const_row_ptrs(), matrix->get_const_col_idxs(), - output->get_values(), output->get_stride(), solve_info, policy, - work.get_data()); + SPARSELIB_OPERATION_TRANSPOSE, matrix->get_size()[0], output.stride, + matrix->get_num_stored_elements(), one(), factor_descr, + matrix->get_const_values(), matrix->get_const_row_ptrs(), + matrix->get_const_col_idxs(), output.values, output.stride, + solve_info, policy, work.get_data()); } ~CudaSolveStruct() @@ -320,12 +316,10 @@ template void solve_kernel(std::shared_ptr exec, const matrix::Csr* matrix, const solver::SolveStruct* solve_struct, - matrix::Dense* trans_b, - matrix::Dense* trans_x, - const matrix::Dense* b, - matrix::Dense* x) + matrix::view::dense b, + matrix::view::dense x) { - if (matrix->get_size()[0] == 0 || b->get_size()[1] == 0) { + if (matrix->get_size()[0] == 0 || b.size[1] == 0) { return; } using vec = matrix::Dense; @@ -334,7 +328,7 @@ void solve_kernel(std::shared_ptr exec, if (auto cuda_solve_struct = dynamic_cast*>( solve_struct)) { - cuda_solve_struct->solve(matrix, b, x, trans_b, trans_x); + cuda_solve_struct->solve(matrix, b, x); } else { GKO_NOT_SUPPORTED(solve_struct); } @@ -607,14 +601,15 @@ __global__ void sptrsv_init_kernel(bool* const nan_produced, template void sptrsv_naive_caching(std::shared_ptr exec, const matrix::Csr* matrix, - bool unit_diag, const matrix::Dense* b, - matrix::Dense* x) + bool unit_diag, + matrix::view::dense b, + matrix::view::dense x) { // Pre-Volta GPUs may deadlock due to missing independent thread scheduling. const auto is_fallback_required = exec->get_major_version() < 7; const auto n = matrix->get_size()[0]; - const auto nrhs = b->get_size()[1]; + const auto nrhs = b.size[1]; // Initialize x to all NaNs. dense::fill(exec, x, nan()); @@ -633,17 +628,17 @@ void sptrsv_naive_caching(std::shared_ptr exec, <<get_stream()>>>( matrix->get_const_row_ptrs(), matrix->get_const_col_idxs(), as_device_type(matrix->get_const_values()), - as_device_type(b->get_const_values()), b->get_stride(), - as_device_type(x->get_values()), x->get_stride(), n, nrhs, - unit_diag, nan_produced.get_data(), atomic_counter.get_data()); + as_device_type(b.values), b.stride, as_device_type(x.values), + x.stride, n, nrhs, unit_diag, nan_produced.get_data(), + atomic_counter.get_data()); } else { sptrsv_naive_caching_kernel <<get_stream()>>>( matrix->get_const_row_ptrs(), matrix->get_const_col_idxs(), as_device_type(matrix->get_const_values()), - as_device_type(b->get_const_values()), b->get_stride(), - as_device_type(x->get_values()), x->get_stride(), n, nrhs, - unit_diag, nan_produced.get_data(), atomic_counter.get_data()); + as_device_type(b.values), b.stride, as_device_type(x.values), + x.stride, n, nrhs, unit_diag, nan_produced.get_data(), + atomic_counter.get_data()); } #if GKO_VERBOSE_LEVEL >= 1 diff --git a/cuda/solver/lower_trs_kernels.cu b/cuda/solver/lower_trs_kernels.cu index b37f6536b0f..63a8bd40be7 100644 --- a/cuda/solver/lower_trs_kernels.cu +++ b/cuda/solver/lower_trs_kernels.cu @@ -1,10 +1,11 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause #include "core/solver/lower_trs_kernels.hpp" #include +#include #include #include @@ -59,12 +60,13 @@ void solve(std::shared_ptr exec, const matrix::Csr* matrix, const solver::SolveStruct* solve_struct, bool unit_diag, const solver::trisolve_algorithm algorithm, - matrix::Dense* trans_b, matrix::Dense* trans_x, - const matrix::Dense* b, matrix::Dense* x) + std::optional> trans_b, + std::optional> trans_x, + matrix::view::dense b, + matrix::view::dense x) { if (algorithm == solver::trisolve_algorithm::sparselib) { - solve_kernel(exec, matrix, solve_struct, trans_b, - trans_x, b, x); + solve_kernel(exec, matrix, solve_struct, b, x); } else { sptrsv_naive_caching(exec, matrix, unit_diag, b, x); } diff --git a/cuda/solver/upper_trs_kernels.cu b/cuda/solver/upper_trs_kernels.cu index eb7d8386083..f5a3073fdfe 100644 --- a/cuda/solver/upper_trs_kernels.cu +++ b/cuda/solver/upper_trs_kernels.cu @@ -1,10 +1,11 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause #include "core/solver/upper_trs_kernels.hpp" #include +#include #include #include @@ -59,12 +60,13 @@ void solve(std::shared_ptr exec, const matrix::Csr* matrix, const solver::SolveStruct* solve_struct, bool unit_diag, const solver::trisolve_algorithm algorithm, - matrix::Dense* trans_b, matrix::Dense* trans_x, - const matrix::Dense* b, matrix::Dense* x) + std::optional> trans_b, + std::optional> trans_x, + matrix::view::dense b, + matrix::view::dense x) { if (algorithm == solver::trisolve_algorithm::sparselib) { - solve_kernel(exec, matrix, solve_struct, trans_b, - trans_x, b, x); + solve_kernel(exec, matrix, solve_struct, b, x); } else { sptrsv_naive_caching(exec, matrix, unit_diag, b, x); } diff --git a/cuda/test/base/kernel_launch.cu b/cuda/test/base/kernel_launch.cu index ddf14f7baf9..f2a417ac95d 100644 --- a/cuda/test/base/kernel_launch.cu +++ b/cuda/test/base/kernel_launch.cu @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -152,7 +152,7 @@ void run1d(std::shared_ptr exec, gko::matrix::Dense<>* m) static_assert(is_same::value, "type"); static_assert(is_same::value, "dummy"); - bool pointers_correct = d.data == d_ptr && d2.data == d_ptr; + bool pointers_correct = d.values == d_ptr && d2.values == d_ptr; bool strides_correct = d.stride == 5 && d2.stride == 5; bool accessors_2d_correct = &d(0, 0) == d_ptr && &d(1, 0) == d_ptr + d.stride && @@ -165,7 +165,7 @@ void run1d(std::shared_ptr exec, gko::matrix::Dense<>* m) d(i / 4, i % 4) = 0; } }, - 16, m, static_cast*>(m), + 16, m->get_device_view(), m->get_const_device_view(), m->get_const_values(), move_only_val); } @@ -244,8 +244,8 @@ void run2d(std::shared_ptr exec, gko::matrix::Dense<>* m1, static_assert(is_same::value, "type"); static_assert(is_same::value, "type"); static_assert(is_same::value, "dummy"); - bool pointers_correct = d.data == d_ptr && d2.data == d_ptr && - d3.data == d2_ptr && d4 == d3_ptr; + bool pointers_correct = d.values == d_ptr && d2.values == d_ptr && + d3.values == d2_ptr && d4 == d3_ptr; bool strides_correct = d.stride == 5 && d2.stride == 5 && d3.stride == 6; bool accessors_2d_correct = @@ -261,11 +261,11 @@ void run2d(std::shared_ptr exec, gko::matrix::Dense<>* m1, d(i, j) = 0; } }, - dim<2>{4, 4}, m2->get_stride(), m1, - static_cast*>(m1), m1->get_const_values(), - gko::kernels::cuda::default_stride(m2), - gko::kernels::cuda::row_vector(m3), m2->get_values(), m3->get_values(), - move_only_val); + dim<2>{4, 4}, m2->get_stride(), m1->get_device_view(), + m1->get_const_device_view(), m1->get_const_values(), + gko::kernels::cuda::default_stride(m2->get_device_view()), + gko::kernels::cuda::row_vector(m3->get_device_view()), m2->get_values(), + m3->get_values(), move_only_val); } TEST_F(KernelLaunch, Runs2DDense) diff --git a/dpcpp/CMakeLists.txt b/dpcpp/CMakeLists.txt index fe79c094043..458b6ed364c 100644 --- a/dpcpp/CMakeLists.txt +++ b/dpcpp/CMakeLists.txt @@ -11,12 +11,12 @@ add_instantiation_files( DENSE_INSTANTIATE ) add_instantiation_files( - . + ${CMAKE_CURRENT_SOURCE_DIR} solver/batch_bicgstab_launch.instantiate.dp.cpp BATCH_BICGSTAB_INSTANTIATE ) add_instantiation_files( - . + ${CMAKE_CURRENT_SOURCE_DIR} solver/batch_cg_launch.instantiate.dp.cpp BATCH_CG_INSTANTIATE ) diff --git a/dpcpp/base/index_set_kernels.dp.cpp b/dpcpp/base/index_set_kernels.dp.cpp index 8f6c46d2405..64a65bfb36b 100644 --- a/dpcpp/base/index_set_kernels.dp.cpp +++ b/dpcpp/base/index_set_kernels.dp.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -42,10 +42,10 @@ GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE( template void populate_subsets(std::shared_ptr exec, const IndexType index_space_size, - const array* indices, - array* subset_begin, - array* subset_end, - array* superset_indices, + const array& indices, + array& subset_begin, + array& subset_end, + array& superset_indices, const bool is_sorted) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_INDEX_SET_POPULATE_KERNEL); diff --git a/dpcpp/distributed/vector_kernels.dp.cpp b/dpcpp/distributed/vector_kernels.dp.cpp index a0cc8bc041a..8fefa7377dc 100644 --- a/dpcpp/distributed/vector_kernels.dp.cpp +++ b/dpcpp/distributed/vector_kernels.dp.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -20,7 +20,7 @@ void build_local( const experimental::distributed::Partition* partition, comm_index_type local_part, - matrix::Dense* local_mtx) GKO_NOT_IMPLEMENTED; + matrix::view::dense local_mtx) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_LOCAL_GLOBAL_INDEX_TYPE( GKO_DECLARE_DISTRIBUTED_VECTOR_BUILD_LOCAL); diff --git a/dpcpp/matrix/coo_kernels.dp.cpp b/dpcpp/matrix/coo_kernels.dp.cpp index 28d45e5280f..0690d029f7f 100644 --- a/dpcpp/matrix/coo_kernels.dp.cpp +++ b/dpcpp/matrix/coo_kernels.dp.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -255,7 +255,8 @@ GKO_ENABLE_DEFAULT_HOST(abstract_spmm, abstract_spmm); template void spmv(std::shared_ptr exec, const matrix::Coo* a, - const matrix::Dense* b, matrix::Dense* c) + matrix::view::dense b, + matrix::view::dense c) { dense::fill(exec, c, zero()); spmv2(exec, a, b, c); @@ -266,11 +267,11 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_COO_SPMV_KERNEL); template void advanced_spmv(std::shared_ptr exec, - const matrix::Dense* alpha, + matrix::view::dense alpha, const matrix::Coo* a, - const matrix::Dense* b, - const matrix::Dense* beta, - matrix::Dense* c) + matrix::view::dense b, + matrix::view::dense beta, + matrix::view::dense c) { dense::scale(exec, beta, c); advanced_spmv2(exec, alpha, a, b, c); @@ -283,10 +284,11 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void spmv2(std::shared_ptr exec, const matrix::Coo* a, - const matrix::Dense* b, matrix::Dense* c) + matrix::view::dense b, + matrix::view::dense c) { const auto nnz = a->get_num_stored_elements(); - const auto b_ncols = b->get_size()[1]; + const auto b_ncols = b.size[1]; const dim3 coo_block(config::warp_size, warps_in_block, 1); const auto nwarps = host_kernel::calculate_nwarps(exec, nnz); @@ -303,9 +305,8 @@ void spmv2(std::shared_ptr exec, abstract_spmv(coo_grid, coo_block, 0, exec->get_queue(), nnz, num_lines, as_device_type(a->get_const_values()), a->get_const_col_idxs(), a->get_const_row_idxs(), - as_device_type(b->get_const_values()), - b->get_stride(), as_device_type(c->get_values()), - c->get_stride()); + as_device_type(b.values), b.stride, + as_device_type(c.values), c.stride); } else { int num_elems = ceildiv(nnz, nwarps * config::warp_size) * config::warp_size; @@ -314,9 +315,8 @@ void spmv2(std::shared_ptr exec, abstract_spmm(coo_grid, coo_block, 0, exec->get_queue(), nnz, num_elems, as_device_type(a->get_const_values()), a->get_const_col_idxs(), a->get_const_row_idxs(), - b_ncols, as_device_type(b->get_const_values()), - b->get_stride(), as_device_type(c->get_values()), - c->get_stride()); + b_ncols, as_device_type(b.values), b.stride, + as_device_type(c.values), c.stride); } } } @@ -326,15 +326,15 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_COO_SPMV2_KERNEL); template void advanced_spmv2(std::shared_ptr exec, - const matrix::Dense* alpha, + matrix::view::dense alpha, const matrix::Coo* a, - const matrix::Dense* b, - matrix::Dense* c) + matrix::view::dense b, + matrix::view::dense c) { const auto nnz = a->get_num_stored_elements(); const auto nwarps = host_kernel::calculate_nwarps(exec, nnz); const dim3 coo_block(config::warp_size, warps_in_block, 1); - const auto b_ncols = b->get_size()[1]; + const auto b_ncols = b.size[1]; if (nwarps <= 0 || b_ncols <= 0) { return; @@ -347,24 +347,22 @@ void advanced_spmv2(std::shared_ptr exec, int num_lines = ceildiv(nnz, nwarps * config::warp_size); const dim3 coo_grid(ceildiv(nwarps, warps_in_block), b_ncols); abstract_spmv(coo_grid, coo_block, 0, exec->get_queue(), nnz, - num_lines, as_device_type(alpha->get_const_values()), + num_lines, as_device_type(alpha.values), as_device_type(a->get_const_values()), a->get_const_col_idxs(), a->get_const_row_idxs(), - as_device_type(b->get_const_values()), - b->get_stride(), as_device_type(c->get_values()), - c->get_stride()); + as_device_type(b.values), b.stride, + as_device_type(c.values), c.stride); } else { int num_elems = ceildiv(nnz, nwarps * config::warp_size) * config::warp_size; const dim3 coo_grid(ceildiv(nwarps, warps_in_block), ceildiv(b_ncols, config::warp_size)); abstract_spmm(coo_grid, coo_block, 0, exec->get_queue(), nnz, - num_elems, as_device_type(alpha->get_const_values()), + num_elems, as_device_type(alpha.values), as_device_type(a->get_const_values()), a->get_const_col_idxs(), a->get_const_row_idxs(), - b_ncols, as_device_type(b->get_const_values()), - b->get_stride(), as_device_type(c->get_values()), - c->get_stride()); + b_ncols, as_device_type(b.values), b.stride, + as_device_type(c.values), c.stride); } } } diff --git a/dpcpp/matrix/csr_kernels.dp.cpp b/dpcpp/matrix/csr_kernels.dp.cpp index a98dad0d843..7fa78c71485 100644 --- a/dpcpp/matrix/csr_kernels.dp.cpp +++ b/dpcpp/matrix/csr_kernels.dp.cpp @@ -1,10 +1,11 @@ -// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause #include "core/matrix/csr_kernels.hpp" #include +#include #include @@ -13,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -1228,13 +1230,18 @@ namespace host_kernel { template -void merge_path_spmv(syn::value_list, - std::shared_ptr exec, - const matrix::Csr* a, - const matrix::Dense* b, - matrix::Dense* c, - const matrix::Dense* alpha = nullptr, - const matrix::Dense* beta = nullptr) +void merge_path_spmv( + syn::value_list, + std::shared_ptr exec, + const matrix::Csr* a, + matrix::view::dense b, + matrix::view::dense c, + xstd::type_identity_t< + std::optional>> + alpha = {}, + xstd::type_identity_t< + std::optional>> + beta = {}) { using arithmetic_type = highest_precision; @@ -1251,7 +1258,7 @@ void merge_path_spmv(syn::value_list, const auto a_vals = acc::helper::build_const_rrm_accessor(a); - for (IndexType column_id = 0; column_id < b->get_size()[1]; column_id++) { + for (IndexType column_id = 0; column_id < b.size[1]; column_id++) { const auto column_span = acc::index_span(static_cast(column_id), static_cast(column_id + 1)); @@ -1260,7 +1267,7 @@ void merge_path_spmv(syn::value_list, column_span); auto c_vals = acc::helper::build_rrm_accessor(c, column_span); - if (alpha == nullptr && beta == nullptr) { + if (!alpha && !beta) { if (grid_num > 0) { csr::kernel::abstract_merge_path_spmv( grid, block, 0, exec->get_queue(), @@ -1275,24 +1282,21 @@ void merge_path_spmv(syn::value_list, as_device_type(val_out.get_data()), row_out.get_data(), acc::as_device_range(c_vals)); - } else if (alpha != nullptr && beta != nullptr) { + } else if (alpha && beta) { if (grid_num > 0) { csr::kernel::abstract_merge_path_spmv( grid, block, 0, exec->get_queue(), static_cast(a->get_size()[0]), - as_device_type(alpha->get_const_values()), - acc::as_device_range(a_vals), a->get_const_col_idxs(), - a->get_const_row_ptrs(), a->get_const_srow(), - acc::as_device_range(b_vals), - as_device_type(beta->get_const_values()), - acc::as_device_range(c_vals), row_out.get_data(), - as_device_type(val_out.get_data())); + as_device_type(alpha->values), acc::as_device_range(a_vals), + a->get_const_col_idxs(), a->get_const_row_ptrs(), + a->get_const_srow(), acc::as_device_range(b_vals), + as_device_type(beta->values), acc::as_device_range(c_vals), + row_out.get_data(), as_device_type(val_out.get_data())); } csr::kernel::abstract_reduce( 1, spmv_block_size, 0, exec->get_queue(), grid_num, as_device_type(val_out.get_data()), row_out.get_data(), - as_device_type(alpha->get_const_values()), - acc::as_device_range(c_vals)); + as_device_type(alpha->values), acc::as_device_range(c_vals)); } else { GKO_KERNEL_NOT_FOUND; } @@ -1318,13 +1322,18 @@ int compute_items_per_thread(std::shared_ptr exec) template -void classical_spmv(syn::value_list, - std::shared_ptr exec, - const matrix::Csr* a, - const matrix::Dense* b, - matrix::Dense* c, - const matrix::Dense* alpha = nullptr, - const matrix::Dense* beta = nullptr) +void classical_spmv( + syn::value_list, + std::shared_ptr exec, + const matrix::Csr* a, + matrix::view::dense b, + matrix::view::dense c, + xstd::type_identity_t< + std::optional>> + alpha = {}, + xstd::type_identity_t< + std::optional>> + beta = {}) { using arithmetic_type = highest_precision; @@ -1335,7 +1344,7 @@ void classical_spmv(syn::value_list, const auto gridx = std::min(ceildiv(a->get_size()[0], spmv_block_size / subgroup_size), int64(num_subgroup / nsg_in_group)); - const dim3 grid(gridx, b->get_size()[1]); + const dim3 grid(gridx, b.size[1]); const dim3 block(spmv_block_size); const auto a_vals = @@ -1343,7 +1352,7 @@ void classical_spmv(syn::value_list, const auto b_vals = acc::helper::build_const_rrm_accessor(b); auto c_vals = acc::helper::build_rrm_accessor(c); - if (alpha == nullptr && beta == nullptr) { + if (!alpha && !beta) { if (grid.x > 0 && grid.y > 0) { kernel::abstract_classical_spmv( grid, block, 0, exec->get_queue(), a->get_size()[0], @@ -1351,14 +1360,13 @@ void classical_spmv(syn::value_list, a->get_const_row_ptrs(), acc::as_device_range(b_vals), acc::as_device_range(c_vals)); } - } else if (alpha != nullptr && beta != nullptr) { + } else if (alpha && beta) { if (grid.x > 0 && grid.y > 0) { kernel::abstract_classical_spmv( grid, block, 0, exec->get_queue(), a->get_size()[0], - as_device_type(alpha->get_const_values()), - acc::as_device_range(a_vals), a->get_const_col_idxs(), - a->get_const_row_ptrs(), acc::as_device_range(b_vals), - as_device_type(beta->get_const_values()), + as_device_type(alpha->values), acc::as_device_range(a_vals), + a->get_const_col_idxs(), a->get_const_row_ptrs(), + acc::as_device_range(b_vals), as_device_type(beta->values), acc::as_device_range(c_vals)); } } else { @@ -1371,12 +1379,17 @@ GKO_ENABLE_IMPLEMENTATION_SELECTION(select_classical_spmv, classical_spmv); template -bool load_balance_spmv(std::shared_ptr exec, - const matrix::Csr* a, - const matrix::Dense* b, - matrix::Dense* c, - const matrix::Dense* alpha = nullptr, - const matrix::Dense* beta = nullptr) +bool load_balance_spmv( + std::shared_ptr exec, + const matrix::Csr* a, + matrix::view::dense b, + matrix::view::dense c, + xstd::type_identity_t< + std::optional>> + alpha = {}, + xstd::type_identity_t< + std::optional>> + beta = {}) { using arithmetic_type = highest_precision; @@ -1386,15 +1399,14 @@ bool load_balance_spmv(std::shared_ptr exec, return false; } else { if (beta) { - dense::scale(exec, beta, c); + dense::scale(exec, *beta, c); } else { dense::fill(exec, c, zero()); } const IndexType nwarps = a->get_num_srow_elements(); if (nwarps > 0) { const dim3 csr_block(config::warp_size, warps_in_block, 1); - const dim3 csr_grid(ceildiv(nwarps, warps_in_block), - b->get_size()[1]); + const dim3 csr_grid(ceildiv(nwarps, warps_in_block), b.size[1]); const auto a_vals = acc::helper::build_const_rrm_accessor(a); const auto b_vals = @@ -1405,7 +1417,7 @@ bool load_balance_spmv(std::shared_ptr exec, csr::kernel::abstract_spmv( csr_grid, csr_block, 0, exec->get_queue(), nwarps, static_cast(a->get_size()[0]), - as_device_type(alpha->get_const_values()), + as_device_type(alpha->values), acc::as_device_range(a_vals), a->get_const_col_idxs(), a->get_const_row_ptrs(), a->get_const_srow(), acc::as_device_range(b_vals), @@ -1432,9 +1444,9 @@ template bool try_general_sparselib_spmv(std::shared_ptr exec, const ValueType host_alpha, const matrix::Csr* a, - const matrix::Dense* b, + matrix::view::dense b, const ValueType host_beta, - matrix::Dense* c) + matrix::view::dense c) { constexpr bool try_sparselib = !is_complex() && @@ -1452,19 +1464,18 @@ bool try_general_sparselib_spmv(std::shared_ptr exec, const_cast(a->get_const_row_ptrs()), const_cast(a->get_const_col_idxs()), const_cast(a->get_const_values())); - if (b->get_size()[1] == 1 && b->get_stride() == 1) { + if (b.size[1] == 1 && b.stride == 1) { oneapi::mkl::sparse::gemv( *exec->get_queue(), oneapi::mkl::transpose::nontrans, - host_alpha, mat_handle, - const_cast(b->get_const_values()), host_beta, - c->get_values()); + host_alpha, mat_handle, const_cast(b.values), + host_beta, c.values); } else { oneapi::mkl::sparse::gemm( *exec->get_queue(), oneapi::mkl::layout::row_major, oneapi::mkl::transpose::nontrans, oneapi::mkl::transpose::nontrans, host_alpha, mat_handle, - const_cast(b->get_const_values()), b->get_size()[1], - b->get_stride(), host_beta, c->get_values(), c->get_stride()); + const_cast(b.values), b.size[1], b.stride, + host_beta, c.values, c.stride); } oneapi::mkl::sparse::release_matrix_handle( #if INTEL_MKL_VERSION >= 20240000 @@ -1481,30 +1492,37 @@ template ::value || !std::is_same::value>> -bool try_sparselib_spmv(std::shared_ptr exec, - const matrix::Csr* a, - const matrix::Dense* b, - matrix::Dense* c, - const matrix::Dense* alpha = nullptr, - const matrix::Dense* beta = nullptr) +bool try_sparselib_spmv( + std::shared_ptr exec, + const matrix::Csr* a, + matrix::view::dense b, + matrix::view::dense c, + xstd::type_identity_t< + std::optional>> + alpha = {}, + xstd::type_identity_t< + std::optional>> + beta = {}) { // TODO: support sparselib mixed return false; } template -bool try_sparselib_spmv(std::shared_ptr exec, - const matrix::Csr* a, - const matrix::Dense* b, - matrix::Dense* c, - const matrix::Dense* alpha = nullptr, - const matrix::Dense* beta = nullptr) +bool try_sparselib_spmv( + std::shared_ptr exec, + const matrix::Csr* a, + matrix::view::dense b, matrix::view::dense c, + xstd::type_identity_t>> + alpha = {}, + xstd::type_identity_t>> + beta = {}) { // onemkl only supports host scalar if (alpha) { return try_general_sparselib_spmv( - exec, exec->copy_val_to_host(alpha->get_const_values()), a, b, - exec->copy_val_to_host(beta->get_const_values()), c); + exec, exec->copy_val_to_host(alpha->values), a, b, + exec->copy_val_to_host(beta->values), c); } else { return try_general_sparselib_spmv(exec, one(), a, b, zero(), c); @@ -1519,14 +1537,14 @@ template void spmv(std::shared_ptr exec, const matrix::Csr* a, - const matrix::Dense* b, - matrix::Dense* c) + matrix::view::dense b, + matrix::view::dense c) { - if (c->get_size()[0] == 0 || c->get_size()[1] == 0) { + if (c.size[0] == 0 || c.size[1] == 0) { // empty output: nothing to do return; } - if (b->get_size()[0] == 0 || a->get_num_stored_elements() == 0) { + if (b.size[0] == 0 || a->get_num_stored_elements() == 0) { // empty input: zero output dense::fill(exec, c, zero()); return; @@ -1585,17 +1603,17 @@ GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_AND_INDEX_TYPE( template void advanced_spmv(std::shared_ptr exec, - const matrix::Dense* alpha, + matrix::view::dense alpha, const matrix::Csr* a, - const matrix::Dense* b, - const matrix::Dense* beta, - matrix::Dense* c) + matrix::view::dense b, + matrix::view::dense beta, + matrix::view::dense c) { - if (c->get_size()[0] == 0 || c->get_size()[1] == 0) { + if (c.size[0] == 0 || c.size[1] == 0) { // empty output: nothing to do return; } - if (b->get_size()[0] == 0 || a->get_num_stored_elements() == 0) { + if (b.size[0] == 0 || a->get_num_stored_elements() == 0) { // empty input: scale output dense::scale(exec, beta, c); return; @@ -1718,7 +1736,7 @@ template void calculate_nonzeros_per_row_in_span( std::shared_ptr exec, const matrix::Csr* source, const span& row_span, - const span& col_span, array* row_nnz) + const span& col_span, array& row_nnz) { const auto num_rows = source->get_size()[0]; auto row_ptrs = source->get_const_row_ptrs(); @@ -1728,7 +1746,7 @@ void calculate_nonzeros_per_row_in_span( kernel::calc_nnz_in_span(grid_dim, block_dim, 0, exec->get_queue(), row_span, col_span, row_ptrs, col_idxs, - row_nnz->get_data()); + row_nnz.get_data()); } GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( @@ -2053,10 +2071,10 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_SPGEMM_KERNEL); template void advanced_spgemm(std::shared_ptr exec, - const matrix::Dense* alpha, + matrix::view::dense alpha, const matrix::Csr* a, const matrix::Csr* b, - const matrix::Dense* beta, + matrix::view::dense beta, const matrix::Csr* d, matrix::Csr* c) { @@ -2071,8 +2089,8 @@ void advanced_spgemm(std::shared_ptr exec, const auto d_cols = d->get_const_col_idxs(); const auto d_vals = as_device_type(d->get_const_values()); auto c_row_ptrs = c->get_row_ptrs(); - const auto alpha_vals = as_device_type(alpha->get_const_values()); - const auto beta_vals = as_device_type(beta->get_const_values()); + const auto alpha_vals = as_device_type(alpha.values); + const auto beta_vals = as_device_type(beta.values); constexpr auto sentinel = std::numeric_limits::max(); auto queue = exec->get_queue(); @@ -2252,10 +2270,10 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void advanced_spgemm_reuse(std::shared_ptr exec, - const matrix::Dense* alpha, + matrix::view::dense alpha, const matrix::Csr* a, const matrix::Csr* b, - const matrix::Dense* beta, + matrix::view::dense beta, const matrix::Csr* d, const matrix::csr::lookup_data& c_lookup, matrix::Csr* c) @@ -2273,8 +2291,8 @@ void advanced_spgemm_reuse(std::shared_ptr exec, const auto b_vals = as_device_type(b->get_const_values()); const auto c_vals = as_device_type(c->get_values()); const auto d_vals = as_device_type(d->get_const_values()); - const auto palpha = as_device_type(alpha->get_const_values()); - const auto pbeta = as_device_type(beta->get_const_values()); + const auto palpha = as_device_type(alpha.values); + const auto pbeta = as_device_type(beta.values); const auto lookup_storage_offsets = c_lookup.storage_offsets.get_const_data(); const auto lookup_storage = c_lookup.storage.get_const_data(); @@ -2339,9 +2357,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void spgeam(std::shared_ptr exec, - const matrix::Dense* alpha, + matrix::view::dense alpha, const matrix::Csr* a, - const matrix::Dense* beta, + matrix::view::dense beta, const matrix::Csr* b, matrix::Csr* c) { @@ -2388,8 +2406,8 @@ void spgeam(std::shared_ptr exec, const auto a_vals = as_device_type(a->get_const_values()); const auto b_vals = as_device_type(b->get_const_values()); - const auto alpha_vals = as_device_type(alpha->get_const_values()); - const auto beta_vals = as_device_type(beta->get_const_values()); + const auto alpha_vals = as_device_type(alpha.values); + const auto beta_vals = as_device_type(beta.values); using device_value_type = device_type; // count number of non-zeros per row @@ -2427,9 +2445,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_SPGEAM_KERNEL); template void spgeam_numeric(std::shared_ptr exec, - const matrix::Dense* alpha, + matrix::view::dense alpha, const matrix::Csr* a, - const matrix::Dense* beta, + matrix::view::dense beta, const matrix::Csr* b, matrix::Csr* c) { @@ -2443,8 +2461,8 @@ void spgeam_numeric(std::shared_ptr exec, const auto b_vals = as_device_type(b->get_const_values()); const auto c_row_ptrs = c->get_row_ptrs(); const auto c_vals = as_device_type(c->get_values()); - const auto alpha_vals = as_device_type(alpha->get_const_values()); - const auto beta_vals = as_device_type(beta->get_const_values()); + const auto alpha_vals = as_device_type(alpha.values); + const auto beta_vals = as_device_type(beta.values); auto queue = exec->get_queue(); using device_value_type = device_type; @@ -2484,11 +2502,11 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void fill_in_dense(std::shared_ptr exec, const matrix::Csr* source, - matrix::Dense* result) + matrix::view::dense result) { - const auto num_rows = result->get_size()[0]; - const auto num_cols = result->get_size()[1]; - const auto stride = result->get_stride(); + const auto num_rows = result.size[0]; + const auto num_cols = result.size[1]; + const auto stride = result.stride; const auto row_ptrs = source->get_const_row_ptrs(); const auto col_idxs = source->get_const_col_idxs(); const auto vals = as_device_type(source->get_const_values()); @@ -2496,7 +2514,7 @@ void fill_in_dense(std::shared_ptr exec, auto grid_dim = ceildiv(num_rows, default_block_size); kernel::fill_in_dense(grid_dim, default_block_size, 0, exec->get_queue(), num_rows, row_ptrs, col_idxs, vals, stride, - as_device_type(result->get_values())); + as_device_type(result.values)); } GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( @@ -2866,7 +2884,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void is_sorted_by_column_index( std::shared_ptr exec, - const matrix::Csr* to_check, bool* is_sorted) + const matrix::Csr* to_check, bool& is_sorted) { array is_sorted_device_array{exec, {true}}; const auto num_rows = to_check->get_size()[0]; @@ -2888,7 +2906,7 @@ void is_sorted_by_column_index( } }); }); - *is_sorted = get_element(is_sorted_device_array, 0); + is_sorted = get_element(is_sorted_device_array, 0); }; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( @@ -2945,8 +2963,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void add_scaled_identity(std::shared_ptr exec, - const matrix::Dense* alpha, - const matrix::Dense* beta, + matrix::view::dense alpha, + matrix::view::dense beta, matrix::Csr* mtx) { const auto nrows = mtx->get_size()[0]; @@ -2957,10 +2975,9 @@ void add_scaled_identity(std::shared_ptr exec, const auto nblocks = ceildiv(nthreads, default_block_size); kernel::add_scaled_identity( nblocks, default_block_size, 0, exec->get_queue(), - as_device_type(alpha->get_const_values()), - as_device_type(beta->get_const_values()), static_cast(nrows), - mtx->get_const_row_ptrs(), mtx->get_const_col_idxs(), - as_device_type(mtx->get_values())); + as_device_type(alpha.values), as_device_type(beta.values), + static_cast(nrows), mtx->get_const_row_ptrs(), + mtx->get_const_col_idxs(), as_device_type(mtx->get_values())); } GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( diff --git a/dpcpp/matrix/dense_kernels.dp.cpp b/dpcpp/matrix/dense_kernels.dp.cpp index 99aac7064e5..1ac2833a7cd 100644 --- a/dpcpp/matrix/dense_kernels.dp.cpp +++ b/dpcpp/matrix/dense_kernels.dp.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -36,7 +36,7 @@ namespace gko { namespace kernels { namespace dpcpp { /** - * @brief The Dense matrix format namespace. + * @brief The dense matrix format namespace. * * @ingroup dense */ @@ -93,10 +93,10 @@ void transpose( } template -void transpose(sycl::queue* queue, const matrix::Dense* orig, - matrix::Dense* trans) +void transpose(sycl::queue* queue, matrix::view::dense orig, + matrix::view::dense trans) { - auto size = orig->get_size(); + auto size = orig.size; constexpr auto sg_size = DeviceConfig::subgroup_size; dim3 grid(ceildiv(size[1], sg_size), ceildiv(size[0], sg_size)); dim3 block(sg_size, sg_size); @@ -107,10 +107,10 @@ void transpose(sycl::queue* queue, const matrix::Dense* orig, 0> space_acc_ct1(cgh); // Can not pass the member to device function directly - auto in = as_device_type(orig->get_const_values()); - auto in_stride = orig->get_stride(); - auto out = as_device_type(trans->get_values()); - auto out_stride = trans->get_stride(); + auto in = as_device_type(orig.values); + auto in_stride = orig.stride; + auto out = as_device_type(trans.values); + auto out_stride = trans.stride; cgh.parallel_for( sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) { transpose(size[0], size[1], in, in_stride, out, @@ -173,9 +173,10 @@ GKO_ENABLE_DEFAULT_CONFIG_CALL(conj_transpose_call, conj_transpose, template void compute_dot_dispatch(std::shared_ptr exec, - const matrix::Dense* x, - const matrix::Dense* y, - matrix::Dense* result, array& tmp) + matrix::view::dense x, + matrix::view::dense y, + matrix::view::dense result, + array& tmp) { // TODO Add onemkl for single column ? compute_dot(exec, x, y, result, tmp); @@ -187,9 +188,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( template void compute_conj_dot_dispatch(std::shared_ptr exec, - const matrix::Dense* x, - const matrix::Dense* y, - matrix::Dense* result, + matrix::view::dense x, + matrix::view::dense y, + matrix::view::dense result, array& tmp) { // TODO Add onemkl for single column ? @@ -201,10 +202,10 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( template -void compute_norm2_dispatch(std::shared_ptr exec, - const matrix::Dense* x, - matrix::Dense>* result, - array& tmp) +void compute_norm2_dispatch( + std::shared_ptr exec, + matrix::view::dense x, + matrix::view::dense> result, array& tmp) { // TODO Add onemkl for single column ? compute_norm2(exec, x, result, tmp); @@ -216,23 +217,20 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( template void simple_apply(std::shared_ptr exec, - const matrix::Dense* a, - const matrix::Dense* b, - matrix::Dense* c) + matrix::view::dense a, + matrix::view::dense b, + matrix::view::dense c) { using namespace oneapi::mkl; if constexpr (onemkl::is_supported::value) { - if (b->get_stride() != 0 && c->get_stride() != 0) { - if (a->get_size()[1] > 0 && a->get_const_values() && - b->get_const_values() && c->get_const_values()) { + if (b.stride != 0 && c.stride != 0) { + if (a.size[1] > 0 && a.values && b.values && c.values) { oneapi::mkl::blas::row_major::gemm( *exec->get_queue(), transpose::nontrans, - transpose::nontrans, c->get_size()[0], c->get_size()[1], - a->get_size()[1], one(), - as_device_type(a->get_const_values()), a->get_stride(), - as_device_type(b->get_const_values()), b->get_stride(), - zero(), as_device_type(c->get_values()), - c->get_stride()); + transpose::nontrans, c.size[0], c.size[1], a.size[1], + one(), as_device_type(a.values), a.stride, + as_device_type(b.values), b.stride, zero(), + as_device_type(c.values), c.stride); } else { dense::fill(exec, c, zero()); } @@ -247,24 +245,24 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_SIMPLE_APPLY_KERNEL); template void apply(std::shared_ptr exec, - const matrix::Dense* alpha, - const matrix::Dense* a, const matrix::Dense* b, - const matrix::Dense* beta, matrix::Dense* c) + matrix::view::dense alpha, + matrix::view::dense a, + matrix::view::dense b, + matrix::view::dense beta, + matrix::view::dense c) { using namespace oneapi::mkl; if constexpr (onemkl::is_supported::value) { - if (b->get_stride() != 0 && c->get_stride() != 0) { - if (a->get_size()[1] > 0 && a->get_const_values() && - b->get_const_values() && c->get_const_values()) { + if (b.stride != 0 && c.stride != 0) { + if (a.size[1] > 0 && a.values && b.values && c.values) { oneapi::mkl::blas::row_major::gemm( *exec->get_queue(), transpose::nontrans, - transpose::nontrans, c->get_size()[0], c->get_size()[1], - a->get_size()[1], - exec->copy_val_to_host(alpha->get_const_values()), - as_device_type(a->get_const_values()), a->get_stride(), - as_device_type(b->get_const_values()), b->get_stride(), - exec->copy_val_to_host(beta->get_const_values()), - as_device_type(c->get_values()), c->get_stride()); + transpose::nontrans, c.size[0], c.size[1], a.size[1], + exec->copy_val_to_host(alpha.values), + as_device_type(a.values), a.stride, + as_device_type(b.values), b.stride, + exec->copy_val_to_host(beta.values), + as_device_type(c.values), c.stride); } else { dense::scale(exec, beta, c); } @@ -279,14 +277,14 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_APPLY_KERNEL); template void convert_to_coo(std::shared_ptr exec, - const matrix::Dense* source, + matrix::view::dense source, const int64* row_ptrs, matrix::Coo* result) { const auto num_rows = result->get_size()[0]; const auto num_cols = result->get_size()[1]; - const auto in_vals = as_device_type(source->get_const_values()); - const auto stride = source->get_stride(); + const auto in_vals = as_device_type(source.values); + const auto stride = source.stride; auto rows = result->get_row_idxs(); auto cols = result->get_col_idxs(); @@ -315,13 +313,13 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_csr(std::shared_ptr exec, - const matrix::Dense* source, + matrix::view::dense source, matrix::Csr* result) { const auto num_rows = result->get_size()[0]; const auto num_cols = result->get_size()[1]; - const auto in_vals = as_device_type(source->get_const_values()); - const auto stride = source->get_stride(); + const auto in_vals = as_device_type(source.values); + const auto stride = source.stride; const auto row_ptrs = result->get_const_row_ptrs(); auto cols = result->get_col_idxs(); @@ -349,14 +347,14 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_ell(std::shared_ptr exec, - const matrix::Dense* source, + matrix::view::dense source, matrix::Ell* result) { const auto num_rows = result->get_size()[0]; const auto num_cols = result->get_size()[1]; const auto max_nnz_per_row = result->get_num_stored_elements_per_row(); - const auto in_vals = as_device_type(source->get_const_values()); - const auto in_stride = source->get_stride(); + const auto in_vals = as_device_type(source.values); + const auto in_stride = source.stride; auto cols = result->get_col_idxs(); auto vals = as_device_type(result->get_values()); @@ -388,7 +386,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_fbcsr(std::shared_ptr exec, - const matrix::Dense* source, + matrix::view::dense source, matrix::Fbcsr* result) GKO_NOT_IMPLEMENTED; @@ -398,7 +396,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void count_nonzero_blocks_per_row(std::shared_ptr exec, - const matrix::Dense* source, + matrix::view::dense source, int bs, IndexType* result) GKO_NOT_IMPLEMENTED; @@ -408,15 +406,15 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_hybrid(std::shared_ptr exec, - const matrix::Dense* source, + matrix::view::dense source, const int64* coo_row_ptrs, matrix::Hybrid* result) { const auto num_rows = result->get_size()[0]; const auto num_cols = result->get_size()[1]; const auto ell_lim = result->get_ell_num_stored_elements_per_row(); - const auto in_vals = as_device_type(source->get_const_values()); - const auto in_stride = source->get_stride(); + const auto in_vals = as_device_type(source.values); + const auto in_stride = source.stride; const auto ell_stride = result->get_ell_stride(); auto ell_cols = result->get_ell_col_idxs(); auto ell_vals = as_device_type(result->get_ell_values()); @@ -464,13 +462,13 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_sellp(std::shared_ptr exec, - const matrix::Dense* source, + matrix::view::dense source, matrix::Sellp* result) { const auto num_rows = result->get_size()[0]; const auto num_cols = result->get_size()[1]; - const auto stride = source->get_stride(); - const auto in_vals = as_device_type(source->get_const_values()); + const auto stride = source.stride; + const auto in_vals = as_device_type(source.values); const auto slice_sets = result->get_const_slice_sets(); const auto slice_size = result->get_slice_size(); @@ -507,13 +505,13 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_sparsity_csr(std::shared_ptr exec, - const matrix::Dense* source, + matrix::view::dense source, matrix::SparsityCsr* result) { const auto num_rows = result->get_size()[0]; const auto num_cols = result->get_size()[1]; - const auto in_vals = as_device_type(source->get_const_values()); - const auto stride = source->get_stride(); + const auto in_vals = as_device_type(source.values); + const auto stride = source.stride; const auto row_ptrs = result->get_const_row_ptrs(); auto cols = result->get_col_idxs(); @@ -539,8 +537,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void transpose(std::shared_ptr exec, - const matrix::Dense* orig, - matrix::Dense* trans) + matrix::view::dense orig, + matrix::view::dense trans) { auto queue = exec->get_queue(); kernel::transpose_call( @@ -560,10 +558,10 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_TRANSPOSE_KERNEL); template void conj_transpose(std::shared_ptr exec, - const matrix::Dense* orig, - matrix::Dense* trans) + matrix::view::dense orig, + matrix::view::dense trans) { - auto size = orig->get_size(); + auto size = orig.size; auto sq_array = syn::as_array(dcfg_sq_list); auto queue = exec->get_queue(); const std::uint32_t cfg = @@ -577,10 +575,9 @@ void conj_transpose(std::shared_ptr exec, const auto sg_size = DCFG_1D::decode<1>(cfg); dim3 grid(ceildiv(size[1], sg_size), ceildiv(size[0], sg_size)); dim3 block(sg_size, sg_size); - kernel::conj_transpose_call( - cfg, grid, block, 0, queue, size[0], size[1], - as_device_type(orig->get_const_values()), orig->get_stride(), - as_device_type(trans->get_values()), trans->get_stride()); + kernel::conj_transpose_call(cfg, grid, block, 0, queue, size[0], size[1], + as_device_type(orig.values), orig.stride, + as_device_type(trans.values), trans.stride); } GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_CONJ_TRANSPOSE_KERNEL); diff --git a/dpcpp/matrix/ell_kernels.dp.cpp b/dpcpp/matrix/ell_kernels.dp.cpp index e921e04f6cb..816d506e1c4 100644 --- a/dpcpp/matrix/ell_kernels.dp.cpp +++ b/dpcpp/matrix/ell_kernels.dp.cpp @@ -1,15 +1,17 @@ -// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause #include "core/matrix/ell_kernels.hpp" #include +#include #include #include #include +#include #include #include #include @@ -294,14 +296,17 @@ namespace { template -void abstract_spmv(syn::value_list, - std::shared_ptr exec, - int num_worker_per_row, - const matrix::Ell* a, - const matrix::Dense* b, - matrix::Dense* c, - const matrix::Dense* alpha = nullptr, - const matrix::Dense* beta = nullptr) +void abstract_spmv( + syn::value_list, std::shared_ptr exec, + int num_worker_per_row, const matrix::Ell* a, + matrix::view::dense b, + matrix::view::dense c, + xstd::type_identity_t< + std::optional>> + alpha = {}, + xstd::type_identity_t< + std::optional>> + beta = {}) { using arithmetic_type = highest_precision; @@ -321,7 +326,7 @@ void abstract_spmv(syn::value_list, const dim3 block_size(default_block_size / num_thread_per_worker, num_thread_per_worker, 1); const dim3 grid_size(ceildiv(nrows * num_worker_per_row, block_size.x), - b->get_size()[1], 1); + b.size[1], 1); // not support 16 bit atomic // We do atomic on shared memory when num_thread_per_worker is not 1. @@ -340,29 +345,29 @@ void abstract_spmv(syn::value_list, a->get_const_values()); const auto b_vals = gko::acc::range( std::array{ - {static_cast(b->get_size()[0]), - static_cast(b->get_size()[1])}}, - b->get_const_values(), + {static_cast(b.size[0]), + static_cast(b.size[1])}}, + b.values, std::array{ - {static_cast(b->get_stride())}}); + {static_cast(b.stride)}}); - if (alpha == nullptr && beta == nullptr) { + if (!alpha && !beta) { kernel::spmv( grid_size, block_size, 0, exec->get_queue(), nrows, num_worker_per_row, acc::as_device_range(a_vals), a->get_const_col_idxs(), stride, num_stored_elements_per_row, - acc::as_device_range(b_vals), as_device_type(c->get_values()), - c->get_stride()); - } else if (alpha != nullptr && beta != nullptr) { + acc::as_device_range(b_vals), as_device_type(c.values), + c.stride); + } else if (alpha && beta) { const auto alpha_val = gko::acc::range( - std::array{1}, alpha->get_const_values()); + std::array{1}, alpha->values); kernel::spmv( grid_size, block_size, 0, exec->get_queue(), nrows, num_worker_per_row, acc::as_device_range(alpha_val), acc::as_device_range(a_vals), a->get_const_col_idxs(), stride, num_stored_elements_per_row, acc::as_device_range(b_vals), - as_device_type(beta->get_const_values()), - as_device_type(c->get_values()), c->get_stride()); + as_device_type(beta->values), as_device_type(c.values), + c.stride); } else { GKO_KERNEL_NOT_FOUND; } @@ -419,8 +424,8 @@ template void spmv(std::shared_ptr exec, const matrix::Ell* a, - const matrix::Dense* b, - matrix::Dense* c) + matrix::view::dense b, + matrix::view::dense c) { const auto data = compute_thread_worker_and_atomicity(exec, a); const int num_thread_per_worker = std::get<0>(data); @@ -452,11 +457,11 @@ GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_AND_INDEX_TYPE( template void advanced_spmv(std::shared_ptr exec, - const matrix::Dense* alpha, + matrix::view::dense alpha, const matrix::Ell* a, - const matrix::Dense* b, - const matrix::Dense* beta, - matrix::Dense* c) + matrix::view::dense b, + matrix::view::dense beta, + matrix::view::dense c) { const auto data = compute_thread_worker_and_atomicity(exec, a); const int num_thread_per_worker = std::get<0>(data); diff --git a/dpcpp/matrix/fbcsr_kernels.dp.cpp b/dpcpp/matrix/fbcsr_kernels.dp.cpp index e9eb02f5fb2..237881f8324 100644 --- a/dpcpp/matrix/fbcsr_kernels.dp.cpp +++ b/dpcpp/matrix/fbcsr_kernels.dp.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -29,19 +29,19 @@ namespace fbcsr { template void spmv(std::shared_ptr exec, const matrix::Fbcsr* a, - const matrix::Dense* b, - matrix::Dense* c) GKO_NOT_IMPLEMENTED; + matrix::view::dense b, + matrix::view::dense c) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_FBCSR_SPMV_KERNEL); template void advanced_spmv(std::shared_ptr exec, - const matrix::Dense* alpha, + matrix::view::dense alpha, const matrix::Fbcsr* a, - const matrix::Dense* b, - const matrix::Dense* beta, - matrix::Dense* c) GKO_NOT_IMPLEMENTED; + matrix::view::dense b, + matrix::view::dense beta, + matrix::view::dense c) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_ADVANCED_SPMV_KERNEL); @@ -61,7 +61,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void fill_in_dense(std::shared_ptr exec, const matrix::Fbcsr* source, - matrix::Dense* result) GKO_NOT_IMPLEMENTED; + matrix::view::dense result) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_FILL_IN_DENSE_KERNEL); @@ -100,7 +100,7 @@ template void is_sorted_by_column_index( std::shared_ptr exec, const matrix::Fbcsr* to_check, - bool* is_sorted) GKO_NOT_IMPLEMENTED; + bool& is_sorted) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_IS_SORTED_BY_COLUMN_INDEX); diff --git a/dpcpp/matrix/fft_kernels.dp.cpp b/dpcpp/matrix/fft_kernels.dp.cpp index 7ce02692d83..407d82485fd 100644 --- a/dpcpp/matrix/fft_kernels.dp.cpp +++ b/dpcpp/matrix/fft_kernels.dp.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -22,8 +22,8 @@ namespace fft { template void fft(std::shared_ptr exec, - const matrix::Dense>* b, - matrix::Dense>* x, bool inverse, + matrix::view::dense> b, + matrix::view::dense> x, bool inverse, array& buffer) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_TYPE_BASE(GKO_DECLARE_FFT_KERNEL); @@ -31,8 +31,8 @@ GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_TYPE_BASE(GKO_DECLARE_FFT_KERNEL); template void fft2(std::shared_ptr exec, - const matrix::Dense>* b, - matrix::Dense>* x, size_type size1, + matrix::view::dense> b, + matrix::view::dense> x, size_type size1, size_type size2, bool inverse, array& buffer) GKO_NOT_IMPLEMENTED; @@ -41,8 +41,8 @@ GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_TYPE_BASE(GKO_DECLARE_FFT2_KERNEL); template void fft3(std::shared_ptr exec, - const matrix::Dense>* b, - matrix::Dense>* x, size_type size1, + matrix::view::dense> b, + matrix::view::dense> x, size_type size1, size_type size2, size_type size3, bool inverse, array& buffer) GKO_NOT_IMPLEMENTED; diff --git a/dpcpp/matrix/sellp_kernels.dp.cpp b/dpcpp/matrix/sellp_kernels.dp.cpp index 5eb2c57f872..3292cf7d498 100644 --- a/dpcpp/matrix/sellp_kernels.dp.cpp +++ b/dpcpp/matrix/sellp_kernels.dp.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -108,17 +108,17 @@ GKO_ENABLE_DEFAULT_HOST(advanced_spmv_kernel, advanced_spmv_kernel); template void spmv(std::shared_ptr exec, const matrix::Sellp* a, - const matrix::Dense* b, matrix::Dense* c) + matrix::view::dense b, + matrix::view::dense c) { const dim3 blockSize(default_block_size); const dim3 gridSize(ceildiv(a->get_size()[0], default_block_size), - b->get_size()[1]); + b.size[1]); spmv_kernel(gridSize, blockSize, 0, exec->get_queue(), a->get_size()[0], - b->get_size()[1], b->get_stride(), c->get_stride(), - a->get_slice_size(), a->get_const_slice_sets(), - a->get_const_values(), a->get_const_col_idxs(), - b->get_const_values(), c->get_values()); + b.size[1], b.stride, c.stride, a->get_slice_size(), + a->get_const_slice_sets(), a->get_const_values(), + a->get_const_col_idxs(), b.values, c.values); } GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_SELLP_SPMV_KERNEL); @@ -126,22 +126,21 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_SELLP_SPMV_KERNEL); template void advanced_spmv(std::shared_ptr exec, - const matrix::Dense* alpha, + matrix::view::dense alpha, const matrix::Sellp* a, - const matrix::Dense* b, - const matrix::Dense* beta, - matrix::Dense* c) + matrix::view::dense b, + matrix::view::dense beta, + matrix::view::dense c) { const dim3 blockSize(default_block_size); const dim3 gridSize(ceildiv(a->get_size()[0], default_block_size), - b->get_size()[1]); + b.size[1]); advanced_spmv_kernel( - gridSize, blockSize, 0, exec->get_queue(), a->get_size()[0], - b->get_size()[1], b->get_stride(), c->get_stride(), a->get_slice_size(), - a->get_const_slice_sets(), alpha->get_const_values(), - a->get_const_values(), a->get_const_col_idxs(), b->get_const_values(), - beta->get_const_values(), c->get_values()); + gridSize, blockSize, 0, exec->get_queue(), a->get_size()[0], b.size[1], + b.stride, c.stride, a->get_slice_size(), a->get_const_slice_sets(), + alpha.values, a->get_const_values(), a->get_const_col_idxs(), b.values, + beta.values, c.values); } GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( diff --git a/dpcpp/matrix/sparsity_csr_kernels.dp.cpp b/dpcpp/matrix/sparsity_csr_kernels.dp.cpp index cd9483e893d..a15dda1f260 100644 --- a/dpcpp/matrix/sparsity_csr_kernels.dp.cpp +++ b/dpcpp/matrix/sparsity_csr_kernels.dp.cpp @@ -1,12 +1,15 @@ -// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause #include "core/matrix/sparsity_csr_kernels.hpp" +#include + #include #include +#include #include "accessor/reduced_row_major.hpp" #include "accessor/sycl_helper.hpp" @@ -173,13 +176,18 @@ namespace host_kernel { template -void classical_spmv(syn::value_list, - std::shared_ptr exec, - const matrix::SparsityCsr* a, - const matrix::Dense* b, - matrix::Dense* c, - const matrix::Dense* alpha = nullptr, - const matrix::Dense* beta = nullptr) +void classical_spmv( + syn::value_list, + std::shared_ptr exec, + const matrix::SparsityCsr* a, + matrix::view::dense b, + matrix::view::dense c, + xstd::type_identity_t< + std::optional>> + alpha = {}, + xstd::type_identity_t< + std::optional>> + beta = {}) { using arithmetic_type = highest_precision; @@ -194,40 +202,35 @@ void classical_spmv(syn::value_list, const auto gridx = std::min(ceildiv(a->get_size()[0], spmv_block_size / subgroup_size), int64(num_subgroup / nsg_in_group)); - const dim3 grid(gridx, b->get_size()[1]); + const dim3 grid(gridx, b.size[1]); const auto block = spmv_block_size; const auto b_vals = gko::acc::range( - std::array{ - {static_cast(b->get_size()[0]), - static_cast(b->get_size()[1])}}, - b->get_const_values(), - std::array{ - {static_cast(b->get_stride())}}); + std::array{{static_cast(b.size[0]), + static_cast(b.size[1])}}, + b.values, + std::array{{static_cast(b.stride)}}); auto c_vals = gko::acc::range( - std::array{ - {static_cast(c->get_size()[0]), - static_cast(c->get_size()[1])}}, - c->get_values(), - std::array{ - {static_cast(c->get_stride())}}); - if (c->get_size()[0] == 0 || c->get_size()[1] == 0) { + std::array{{static_cast(c.size[0]), + static_cast(c.size[1])}}, + c.values, + std::array{{static_cast(c.stride)}}); + if (c.size[0] == 0 || c.size[1] == 0) { // empty output: nothing to do return; } - if (alpha == nullptr && beta == nullptr) { + if (!alpha && !beta) { kernel::abstract_classical_spmv( grid, block, 0, exec->get_queue(), a->get_size()[0], as_device_type(a->get_const_value()), a->get_const_col_idxs(), a->get_const_row_ptrs(), acc::as_device_range(b_vals), acc::as_device_range(c_vals)); - } else if (alpha != nullptr && beta != nullptr) { + } else if (alpha && beta) { kernel::abstract_classical_spmv( grid, block, 0, exec->get_queue(), a->get_size()[0], - as_device_type(alpha->get_const_values()), - as_device_type(a->get_const_value()), a->get_const_col_idxs(), - a->get_const_row_ptrs(), acc::as_device_range(b_vals), - as_device_type(beta->get_const_values()), + as_device_type(alpha->values), as_device_type(a->get_const_value()), + a->get_const_col_idxs(), a->get_const_row_ptrs(), + acc::as_device_range(b_vals), as_device_type(beta->values), acc::as_device_range(c_vals)); } else { GKO_KERNEL_NOT_FOUND; @@ -244,8 +247,8 @@ template void spmv(std::shared_ptr exec, const matrix::SparsityCsr* a, - const matrix::Dense* b, - matrix::Dense* c) + matrix::view::dense b, + matrix::view::dense c) { host_kernel::select_classical_spmv( classical_kernels(), [](int compiled_info) { return true; }, @@ -259,11 +262,11 @@ GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_AND_INDEX_TYPE( template void advanced_spmv(std::shared_ptr exec, - const matrix::Dense* alpha, + matrix::view::dense alpha, const matrix::SparsityCsr* a, - const matrix::Dense* b, - const matrix::Dense* beta, - matrix::Dense* c) + matrix::view::dense b, + matrix::view::dense beta, + matrix::view::dense c) { host_kernel::select_classical_spmv( classical_kernels(), [](int compiled_info) { return true; }, @@ -312,10 +315,10 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void is_sorted_by_column_index( std::shared_ptr exec, - const matrix::SparsityCsr* to_check, bool* is_sorted) + const matrix::SparsityCsr* to_check, bool& is_sorted) { - *is_sorted = true; - auto cpu_array = make_array_view(exec->get_master(), 1, is_sorted); + is_sorted = true; + auto cpu_array = make_array_view(exec->get_master(), 1, &is_sorted); auto gpu_array = array{exec, cpu_array}; const auto num_rows = to_check->get_size()[0]; const auto row_ptrs = to_check->get_const_row_ptrs(); diff --git a/dpcpp/preconditioner/isai_kernels.dp.cpp b/dpcpp/preconditioner/isai_kernels.dp.cpp index 87eff57fce8..25d7d85679b 100644 --- a/dpcpp/preconditioner/isai_kernels.dp.cpp +++ b/dpcpp/preconditioner/isai_kernels.dp.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -690,7 +690,7 @@ void generate_excess_system(std::shared_ptr exec, const IndexType* excess_rhs_ptrs, const IndexType* excess_nz_ptrs, matrix::Csr* excess_system, - matrix::Dense* excess_rhs, + matrix::view::dense excess_rhs, size_type e_start, size_type e_end) { const auto num_rows = input->get_size()[0]; @@ -706,7 +706,7 @@ void generate_excess_system(std::shared_ptr exec, excess_rhs_ptrs, excess_nz_ptrs, excess_system->get_row_ptrs(), excess_system->get_col_idxs(), as_device_type(excess_system->get_values()), - as_device_type(excess_rhs->get_values()), e_start, e_end); + as_device_type(excess_rhs.values), e_start, e_end); } } @@ -717,7 +717,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void scale_excess_solution(std::shared_ptr exec, const IndexType* excess_block_ptrs, - matrix::Dense* excess_solution, + matrix::view::dense excess_solution, size_type e_start, size_type e_end) { const auto block = default_block_size; @@ -725,7 +725,7 @@ void scale_excess_solution(std::shared_ptr exec, if (grid > 0) { kernel::scale_excess_solution( grid, block, 0, exec->get_queue(), excess_block_ptrs, - as_device_type(excess_solution->get_values()), e_start, e_end); + as_device_type(excess_solution.values), e_start, e_end); } } @@ -734,11 +734,12 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template -void scatter_excess_solution(std::shared_ptr exec, - const IndexType* excess_rhs_ptrs, - const matrix::Dense* excess_solution, - matrix::Csr* inverse, - size_type e_start, size_type e_end) +void scatter_excess_solution( + std::shared_ptr exec, + const IndexType* excess_rhs_ptrs, + matrix::view::dense excess_solution, + matrix::Csr* inverse, size_type e_start, + size_type e_end) { const auto num_rows = inverse->get_size()[0]; @@ -748,7 +749,7 @@ void scatter_excess_solution(std::shared_ptr exec, kernel::copy_excess_solution( grid, block, 0, exec->get_queue(), static_cast(num_rows), inverse->get_const_row_ptrs(), excess_rhs_ptrs, - as_device_type(excess_solution->get_const_values()), + as_device_type(excess_solution.values), as_device_type(inverse->get_values()), e_start, e_end); } } diff --git a/dpcpp/preconditioner/jacobi_advanced_apply_kernel.dp.cpp b/dpcpp/preconditioner/jacobi_advanced_apply_kernel.dp.cpp index 6b28e8d866b..17e3221df4e 100644 --- a/dpcpp/preconditioner/jacobi_advanced_apply_kernel.dp.cpp +++ b/dpcpp/preconditioner/jacobi_advanced_apply_kernel.dp.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -45,13 +45,14 @@ void apply(std::shared_ptr exec, size_type num_blocks, const array& block_precisions, const array& block_pointers, const array& blocks, - const matrix::Dense* alpha, - const matrix::Dense* b, - const matrix::Dense* beta, matrix::Dense* x) + matrix::view::dense alpha, + matrix::view::dense b, + matrix::view::dense beta, + matrix::view::dense x) { // TODO: write a special kernel for multiple RHS dense::scale(exec, beta, x); - for (size_type col = 0; col < b->get_size()[1]; ++col) { + for (size_type col = 0; col < b.size[1]; ++col) { select_advanced_apply( compiled_kernels(), [&](int compiled_block_size) { @@ -60,9 +61,8 @@ void apply(std::shared_ptr exec, size_type num_blocks, syn::value_list(), syn::type_list<>(), exec, num_blocks, block_precisions.get_const_data(), block_pointers.get_const_data(), - blocks.get_const_data(), storage_scheme, alpha->get_const_values(), - b->get_const_values() + col, b->get_stride(), x->get_values() + col, - x->get_stride()); + blocks.get_const_data(), storage_scheme, alpha.values, + b.values + col, b.stride, x.values + col, x.stride); } } diff --git a/dpcpp/preconditioner/jacobi_simple_apply_kernel.dp.cpp b/dpcpp/preconditioner/jacobi_simple_apply_kernel.dp.cpp index b3012b4ff40..36532d801b3 100644 --- a/dpcpp/preconditioner/jacobi_simple_apply_kernel.dp.cpp +++ b/dpcpp/preconditioner/jacobi_simple_apply_kernel.dp.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -43,10 +43,10 @@ void simple_apply( storage_scheme, const array& block_precisions, const array& block_pointers, const array& blocks, - const matrix::Dense* b, matrix::Dense* x) + matrix::view::dense b, matrix::view::dense x) { // TODO: write a special kernel for multiple RHS - for (size_type col = 0; col < b->get_size()[1]; ++col) { + for (size_type col = 0; col < b.size[1]; ++col) { select_apply( compiled_kernels(), [&](int compiled_block_size) { @@ -55,9 +55,8 @@ void simple_apply( syn::value_list(), syn::type_list<>(), exec, num_blocks, block_precisions.get_const_data(), block_pointers.get_const_data(), - blocks.get_const_data(), storage_scheme, - b->get_const_values() + col, b->get_stride(), x->get_values() + col, - x->get_stride()); + blocks.get_const_data(), storage_scheme, b.values + col, b.stride, + x.values + col, x.stride); } } diff --git a/dpcpp/solver/cb_gmres_kernels.dp.cpp b/dpcpp/solver/cb_gmres_kernels.dp.cpp index 80886af39cc..2a79f673f1e 100644 --- a/dpcpp/solver/cb_gmres_kernels.dp.cpp +++ b/dpcpp/solver/cb_gmres_kernels.dp.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -926,25 +926,24 @@ void zero_matrix(std::shared_ptr exec, size_type m, template void initialize(std::shared_ptr exec, - const matrix::Dense* b, - matrix::Dense* residual, - matrix::Dense* givens_sin, - matrix::Dense* givens_cos, - array* stop_status, size_type krylov_dim) + matrix::view::dense b, + matrix::view::dense residual, + matrix::view::dense givens_sin, + matrix::view::dense givens_cos, + array& stop_status, size_type krylov_dim) { - const auto num_threads = std::max(b->get_size()[0] * b->get_stride(), - krylov_dim * b->get_size()[1]); + const auto num_threads = + std::max(b.size[0] * b.stride, krylov_dim * b.size[1]); const dim3 grid_dim(ceildiv(num_threads, default_block_size), 1, 1); const dim3 block_dim(default_block_size, 1, 1); constexpr auto block_size = default_block_size; initialize_kernel( - grid_dim, block_dim, 0, exec->get_queue(), b->get_size()[0], - b->get_size()[1], krylov_dim, as_device_type(b->get_const_values()), - b->get_stride(), as_device_type(residual->get_values()), - residual->get_stride(), givens_sin->get_values(), - givens_sin->get_stride(), givens_cos->get_values(), - givens_cos->get_stride(), stop_status->get_data()); + grid_dim, block_dim, 0, exec->get_queue(), b.size[0], b.size[1], + krylov_dim, as_device_type(b.values), b.stride, + as_device_type(residual.values), residual.stride, givens_sin.values, + givens_sin.stride, givens_cos.values, givens_cos.stride, + stop_status.get_data()); } GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE_BASE( @@ -953,19 +952,19 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE_BASE( template void restart(std::shared_ptr exec, - const matrix::Dense* residual, - matrix::Dense>* residual_norm, - matrix::Dense* residual_norm_collection, - matrix::Dense>* arnoldi_norm, + matrix::view::dense residual, + matrix::view::dense> residual_norm, + matrix::view::dense residual_norm_collection, + matrix::view::dense> arnoldi_norm, Accessor3d krylov_bases, - matrix::Dense* next_krylov_basis, - array* final_iter_nums, array& reduction_tmp, + matrix::view::dense next_krylov_basis, + array& final_iter_nums, array& reduction_tmp, size_type krylov_dim) { constexpr bool use_scalar = gko::cb_gmres::detail::has_3d_scaled_accessor::value; - const auto num_rows = residual->get_size()[0]; - const auto num_rhs = residual->get_size()[1]; + const auto num_rows = residual.size[0]; + const auto num_rhs = residual.size[1]; const auto krylov_stride = gko::cb_gmres::helper_functions_accessor::get_stride( krylov_bases); @@ -973,37 +972,34 @@ void restart(std::shared_ptr exec, ceildiv((krylov_dim + 1) * krylov_stride[0], default_block_size), 1, 1); const dim3 block_dim(default_block_size, 1, 1); constexpr auto block_size = default_block_size; - const auto stride_arnoldi = arnoldi_norm->get_stride(); + const auto stride_arnoldi = arnoldi_norm.stride; - restart_1_kernel( - grid_dim_1, block_dim, 0, exec->get_queue(), residual->get_size()[0], - residual->get_size()[1], krylov_dim, krylov_bases, - residual_norm_collection->get_values(), - residual_norm_collection->get_stride()); + restart_1_kernel(grid_dim_1, block_dim, 0, exec->get_queue(), + residual.size[0], residual.size[1], krylov_dim, + krylov_bases, residual_norm_collection.values, + residual_norm_collection.stride); kernels::dpcpp::dense::compute_norm2_dispatch(exec, residual, residual_norm, reduction_tmp); if (use_scalar) { - components::fill_array(exec, - arnoldi_norm->get_values() + 2 * stride_arnoldi, + components::fill_array(exec, arnoldi_norm.values + 2 * stride_arnoldi, num_rhs, zero>()); const dim3 grid_size_nrm(ceildiv(num_rhs, default_dot_dim), exec->get_num_computing_units() * 2); const dim3 block_size_nrm(default_dot_dim, default_dot_dim); multinorminf_without_stop_kernel( grid_size_nrm, block_size_nrm, 0, exec->get_queue(), num_rows, - num_rhs, as_device_type(residual->get_const_values()), - residual->get_stride(), - arnoldi_norm->get_values() + 2 * stride_arnoldi, 0); + num_rhs, as_device_type(residual.values), residual.stride, + arnoldi_norm.values + 2 * stride_arnoldi, 0); } if (gko::cb_gmres::detail::has_3d_scaled_accessor::value) { set_scalar_kernel( ceildiv(num_rhs * (krylov_dim + 1), default_block_size), default_block_size, 0, exec->get_queue(), num_rhs, krylov_dim + 1, - residual_norm->get_const_values(), residual_norm->get_stride(), - arnoldi_norm->get_const_values() + 2 * stride_arnoldi, - stride_arnoldi, krylov_bases); + residual_norm.values, residual_norm.stride, + arnoldi_norm.values + 2 * stride_arnoldi, stride_arnoldi, + krylov_bases); } const dim3 grid_dim_2( @@ -1011,29 +1007,27 @@ void restart(std::shared_ptr exec, default_block_size), 1, 1); restart_2_kernel( - grid_dim_2, block_dim, 0, exec->get_queue(), residual->get_size()[0], - residual->get_size()[1], as_device_type(residual->get_const_values()), - residual->get_stride(), residual_norm->get_const_values(), - residual_norm_collection->get_values(), krylov_bases, - next_krylov_basis->get_values(), next_krylov_basis->get_stride(), - final_iter_nums->get_data()); + grid_dim_2, block_dim, 0, exec->get_queue(), residual.size[0], + residual.size[1], as_device_type(residual.values), residual.stride, + residual_norm.values, residual_norm_collection.values, krylov_bases, + next_krylov_basis.values, next_krylov_basis.stride, + final_iter_nums.get_data()); } GKO_INSTANTIATE_FOR_EACH_CB_GMRES_TYPE(GKO_DECLARE_CB_GMRES_RESTART_KERNEL); template -void finish_arnoldi_CGS(std::shared_ptr exec, - matrix::Dense* next_krylov_basis, - Accessor3dim krylov_bases, - matrix::Dense* hessenberg_iter, - matrix::Dense* buffer_iter, - matrix::Dense>* arnoldi_norm, - size_type iter, const stopping_status* stop_status, - stopping_status* reorth_status, - array* num_reorth) +void finish_arnoldi_CGS( + std::shared_ptr exec, + matrix::view::dense next_krylov_basis, Accessor3dim krylov_bases, + matrix::view::dense hessenberg_iter, + matrix::view::dense buffer_iter, + matrix::view::dense> arnoldi_norm, size_type iter, + const stopping_status* stop_status, stopping_status* reorth_status, + array& num_reorth) { - const auto dim_size = next_krylov_basis->get_size(); + const auto dim_size = next_krylov_basis.size; if (dim_size[1] == 0) { return; } @@ -1042,10 +1036,10 @@ void finish_arnoldi_CGS(std::shared_ptr exec, constexpr int singledot_block_size = default_dot_dim; constexpr bool use_scalar = gko::cb_gmres::detail::has_3d_scaled_accessor::value; - const auto stride_next_krylov = next_krylov_basis->get_stride(); - const auto stride_hessenberg = hessenberg_iter->get_stride(); - const auto stride_buffer = buffer_iter->get_stride(); - const auto stride_arnoldi = arnoldi_norm->get_stride(); + const auto stride_next_krylov = next_krylov_basis.stride; + const auto stride_hessenberg = hessenberg_iter.stride; + const auto stride_buffer = buffer_iter.stride; + const auto stride_arnoldi = arnoldi_norm.stride; const dim3 grid_size(ceildiv(dim_size[1], default_dot_dim), exec->get_num_computing_units() * 2); const dim3 grid_size_num_iters(ceildiv(dim_size[1], default_dot_dim), @@ -1060,27 +1054,25 @@ void finish_arnoldi_CGS(std::shared_ptr exec, const dim3 block_size_iters_single(singledot_block_size); size_type num_reorth_host; - components::fill_array(exec, arnoldi_norm->get_values(), dim_size[1], + components::fill_array(exec, arnoldi_norm.values, dim_size[1], zero()); multinorm2_kernel(grid_size, block_size, 0, exec->get_queue(), dim_size[0], - dim_size[1], next_krylov_basis->get_const_values(), - stride_next_krylov, arnoldi_norm->get_values(), - stop_status); + dim_size[1], next_krylov_basis.values, stride_next_krylov, + arnoldi_norm.values, stop_status); zero_matrix(exec, iter + 1, dim_size[1], stride_hessenberg, - hessenberg_iter->get_values()); + hessenberg_iter.values); if (dim_size[1] > 1) { multidot_kernel( grid_size_num_iters, block_size, 0, exec->get_queue(), dim_size[0], - dim_size[1], next_krylov_basis->get_const_values(), - stride_next_krylov, krylov_bases, hessenberg_iter->get_values(), - stride_hessenberg, stop_status); + dim_size[1], next_krylov_basis.values, stride_next_krylov, + krylov_bases, hessenberg_iter.values, stride_hessenberg, + stop_status); } else { singledot_kernel( grid_size_iters_single, block_size_iters_single, 0, - exec->get_queue(), dim_size[0], - next_krylov_basis->get_const_values(), stride_next_krylov, - krylov_bases, hessenberg_iter->get_values(), stride_hessenberg, - stop_status); + exec->get_queue(), dim_size[0], next_krylov_basis.values, + stride_next_krylov, krylov_bases, hessenberg_iter.values, + stride_hessenberg, stop_status); } // for i in 1:iter // hessenberg(iter, i) = next_krylov_basis' * krylov_bases(:, i) @@ -1088,51 +1080,47 @@ void finish_arnoldi_CGS(std::shared_ptr exec, update_next_krylov_kernel( ceildiv(dim_size[0] * stride_next_krylov, default_block_size), default_block_size, 0, exec->get_queue(), iter + 1, dim_size[0], - dim_size[1], next_krylov_basis->get_values(), stride_next_krylov, - krylov_bases, hessenberg_iter->get_const_values(), stride_hessenberg, - stop_status); + dim_size[1], next_krylov_basis.values, stride_next_krylov, krylov_bases, + hessenberg_iter.values, stride_hessenberg, stop_status); // for i in 1:iter // next_krylov_basis -= hessenberg(iter, i) * krylov_bases(:, i) // end - components::fill_array(exec, arnoldi_norm->get_values() + stride_arnoldi, + components::fill_array(exec, arnoldi_norm.values + stride_arnoldi, dim_size[1], zero()); if (use_scalar) { - components::fill_array(exec, - arnoldi_norm->get_values() + 2 * stride_arnoldi, + components::fill_array(exec, arnoldi_norm.values + 2 * stride_arnoldi, dim_size[1], zero()); } multinorm2_inf_kernel( grid_size, block_size, 0, exec->get_queue(), dim_size[0], dim_size[1], - next_krylov_basis->get_const_values(), stride_next_krylov, - arnoldi_norm->get_values() + stride_arnoldi, - arnoldi_norm->get_values() + 2 * stride_arnoldi, stop_status); + next_krylov_basis.values, stride_next_krylov, + arnoldi_norm.values + stride_arnoldi, + arnoldi_norm.values + 2 * stride_arnoldi, stop_status); // nrmN = norm(next_krylov_basis) - components::fill_array(exec, num_reorth->get_data(), 1, zero()); + components::fill_array(exec, num_reorth.get_data(), 1, zero()); check_arnoldi_norms( ceildiv(dim_size[1], default_block_size), default_block_size, 0, - exec->get_queue(), dim_size[1], arnoldi_norm->get_values(), - stride_arnoldi, hessenberg_iter->get_values(), stride_hessenberg, - iter + 1, krylov_bases, stop_status, reorth_status, - num_reorth->get_data()); - num_reorth_host = get_element(*num_reorth, 0); + exec->get_queue(), dim_size[1], arnoldi_norm.values, stride_arnoldi, + hessenberg_iter.values, stride_hessenberg, iter + 1, krylov_bases, + stop_status, reorth_status, num_reorth.get_data()); + num_reorth_host = get_element(num_reorth, 0); // num_reorth_host := number of next_krylov vector to be reorthogonalization for (size_type l = 1; (num_reorth_host > 0) && (l < 3); l++) { zero_matrix(exec, iter + 1, dim_size[1], stride_buffer, - buffer_iter->get_values()); + buffer_iter.values); if (dim_size[1] > 1) { multidot_kernel( grid_size_num_iters, block_size, 0, exec->get_queue(), - dim_size[0], dim_size[1], next_krylov_basis->get_const_values(), - stride_next_krylov, krylov_bases, buffer_iter->get_values(), + dim_size[0], dim_size[1], next_krylov_basis.values, + stride_next_krylov, krylov_bases, buffer_iter.values, stride_buffer, stop_status); } else { singledot_kernel( grid_size_iters_single, block_size_iters_single, 0, - exec->get_queue(), dim_size[0], - next_krylov_basis->get_const_values(), stride_next_krylov, - krylov_bases, buffer_iter->get_values(), stride_buffer, - stop_status); + exec->get_queue(), dim_size[0], next_krylov_basis.values, + stride_next_krylov, krylov_bases, buffer_iter.values, + stride_buffer, stop_status); } // for i in 1:iter // hessenberg(iter, i) = next_krylov_basis' * krylov_bases(:, i) @@ -1140,99 +1128,96 @@ void finish_arnoldi_CGS(std::shared_ptr exec, update_next_krylov_and_add_kernel( ceildiv(dim_size[0] * stride_next_krylov, default_block_size), default_block_size, 0, exec->get_queue(), iter + 1, dim_size[0], - dim_size[1], next_krylov_basis->get_values(), stride_next_krylov, - krylov_bases, hessenberg_iter->get_values(), stride_hessenberg, - buffer_iter->get_const_values(), stride_buffer, stop_status, - reorth_status); + dim_size[1], next_krylov_basis.values, stride_next_krylov, + krylov_bases, hessenberg_iter.values, stride_hessenberg, + buffer_iter.values, stride_buffer, stop_status, reorth_status); // for i in 1:iter // next_krylov_basis -= hessenberg(iter, i) * krylov_bases(:, i) // end - components::fill_array(exec, - arnoldi_norm->get_values() + stride_arnoldi, + components::fill_array(exec, arnoldi_norm.values + stride_arnoldi, dim_size[1], zero()); if (use_scalar) { - components::fill_array( - exec, arnoldi_norm->get_values() + 2 * stride_arnoldi, - dim_size[1], zero()); + components::fill_array(exec, + arnoldi_norm.values + 2 * stride_arnoldi, + dim_size[1], zero()); } multinorm2_inf_kernel( grid_size, block_size, 0, exec->get_queue(), dim_size[0], - dim_size[1], next_krylov_basis->get_const_values(), - stride_next_krylov, arnoldi_norm->get_values() + stride_arnoldi, - arnoldi_norm->get_values() + 2 * stride_arnoldi, stop_status); + dim_size[1], next_krylov_basis.values, stride_next_krylov, + arnoldi_norm.values + stride_arnoldi, + arnoldi_norm.values + 2 * stride_arnoldi, stop_status); // nrmN = norm(next_krylov_basis) - components::fill_array(exec, num_reorth->get_data(), 1, + components::fill_array(exec, num_reorth.get_data(), 1, zero()); check_arnoldi_norms( ceildiv(dim_size[1], default_block_size), default_block_size, 0, - exec->get_queue(), dim_size[1], arnoldi_norm->get_values(), - stride_arnoldi, hessenberg_iter->get_values(), stride_hessenberg, - iter + 1, krylov_bases, stop_status, reorth_status, - num_reorth->get_data()); - num_reorth_host = get_element(*num_reorth, 0); + exec->get_queue(), dim_size[1], arnoldi_norm.values, stride_arnoldi, + hessenberg_iter.values, stride_hessenberg, iter + 1, krylov_bases, + stop_status, reorth_status, num_reorth.get_data()); + num_reorth_host = get_element(num_reorth, 0); } update_krylov_next_krylov_kernel( ceildiv(dim_size[0] * stride_next_krylov, default_block_size), default_block_size, 0, exec->get_queue(), iter, dim_size[0], - dim_size[1], next_krylov_basis->get_values(), stride_next_krylov, - krylov_bases, hessenberg_iter->get_const_values(), stride_hessenberg, - stop_status); + dim_size[1], next_krylov_basis.values, stride_next_krylov, krylov_bases, + hessenberg_iter.values, stride_hessenberg, stop_status); // next_krylov_basis /= hessenberg(iter, iter + 1) // krylov_bases(:, iter + 1) = next_krylov_basis // End of arnoldi } template -void givens_rotation(std::shared_ptr exec, - matrix::Dense* givens_sin, - matrix::Dense* givens_cos, - matrix::Dense* hessenberg_iter, - matrix::Dense>* residual_norm, - matrix::Dense* residual_norm_collection, - size_type iter, const array* stop_status) +void givens_rotation( + std::shared_ptr exec, + matrix::view::dense givens_sin, + matrix::view::dense givens_cos, + matrix::view::dense hessenberg_iter, + matrix::view::dense> residual_norm, + matrix::view::dense residual_norm_collection, size_type iter, + const array& stop_status) { // TODO: tune block_size for optimal performance constexpr auto block_size = default_block_size; - const auto num_cols = hessenberg_iter->get_size()[1]; + const auto num_cols = hessenberg_iter.size[1]; const dim3 block_dim{block_size, 1, 1}; const dim3 grid_dim{ static_cast(ceildiv(num_cols, block_size)), 1, 1}; givens_rotation_kernel( - grid_dim, block_dim, 0, exec->get_queue(), - hessenberg_iter->get_size()[0], hessenberg_iter->get_size()[1], iter, - hessenberg_iter->get_values(), hessenberg_iter->get_stride(), - givens_sin->get_values(), givens_sin->get_stride(), - givens_cos->get_values(), givens_cos->get_stride(), - residual_norm->get_values(), residual_norm_collection->get_values(), - residual_norm_collection->get_stride(), stop_status->get_const_data()); + grid_dim, block_dim, 0, exec->get_queue(), hessenberg_iter.size[0], + hessenberg_iter.size[1], iter, hessenberg_iter.values, + hessenberg_iter.stride, givens_sin.values, givens_sin.stride, + givens_cos.values, givens_cos.stride, residual_norm.values, + residual_norm_collection.values, residual_norm_collection.stride, + stop_status.get_const_data()); } template void arnoldi(std::shared_ptr exec, - matrix::Dense* next_krylov_basis, - matrix::Dense* givens_sin, - matrix::Dense* givens_cos, - matrix::Dense>* residual_norm, - matrix::Dense* residual_norm_collection, - Accessor3d krylov_bases, matrix::Dense* hessenberg_iter, - matrix::Dense* buffer_iter, - matrix::Dense>* arnoldi_norm, - size_type iter, array* final_iter_nums, - const array* stop_status, - array* reorth_status, - array* num_reorth) + matrix::view::dense next_krylov_basis, + matrix::view::dense givens_sin, + matrix::view::dense givens_cos, + matrix::view::dense> residual_norm, + matrix::view::dense residual_norm_collection, + Accessor3d krylov_bases, + matrix::view::dense hessenberg_iter, + matrix::view::dense buffer_iter, + matrix::view::dense> arnoldi_norm, + size_type iter, array& final_iter_nums, + const array& stop_status, + array& reorth_status, + array& num_reorth) { increase_final_iteration_numbers_kernel( static_cast( - ceildiv(final_iter_nums->get_size(), default_block_size)), - default_block_size, 0, exec->get_queue(), final_iter_nums->get_data(), - stop_status->get_const_data(), final_iter_nums->get_size()); + ceildiv(final_iter_nums.get_size(), default_block_size)), + default_block_size, 0, exec->get_queue(), final_iter_nums.get_data(), + stop_status.get_const_data(), final_iter_nums.get_size()); finish_arnoldi_CGS(exec, next_krylov_basis, krylov_bases, hessenberg_iter, buffer_iter, arnoldi_norm, iter, - stop_status->get_const_data(), reorth_status->get_data(), + stop_status.get_const_data(), reorth_status.get_data(), num_reorth); givens_rotation(exec, givens_sin, givens_cos, hessenberg_iter, residual_norm, residual_norm_collection, iter, stop_status); @@ -1244,38 +1229,35 @@ GKO_INSTANTIATE_FOR_EACH_CB_GMRES_TYPE(GKO_DECLARE_CB_GMRES_ARNOLDI_KERNEL); template void solve_upper_triangular( std::shared_ptr exec, - const matrix::Dense* residual_norm_collection, - const matrix::Dense* hessenberg, matrix::Dense* y, - const array* final_iter_nums) + matrix::view::dense residual_norm_collection, + matrix::view::dense hessenberg, + matrix::view::dense y, const array& final_iter_nums) { // TODO: tune block_size for optimal performance constexpr auto block_size = default_block_size; - const auto num_rhs = residual_norm_collection->get_size()[1]; + const auto num_rhs = residual_norm_collection.size[1]; const dim3 block_dim{block_size, 1, 1}; const dim3 grid_dim{static_cast(ceildiv(num_rhs, block_size)), 1, 1}; solve_upper_triangular_kernel( - grid_dim, block_dim, 0, exec->get_queue(), hessenberg->get_size()[1], - num_rhs, residual_norm_collection->get_const_values(), - residual_norm_collection->get_stride(), - as_device_type(hessenberg->get_const_values()), - hessenberg->get_stride(), as_device_type(y->get_values()), - y->get_stride(), final_iter_nums->get_const_data()); + grid_dim, block_dim, 0, exec->get_queue(), hessenberg.size[1], num_rhs, + residual_norm_collection.values, residual_norm_collection.stride, + as_device_type(hessenberg.values), hessenberg.stride, + as_device_type(y.values), y.stride, final_iter_nums.get_const_data()); } template void calculate_qy(std::shared_ptr exec, ConstAccessor3d krylov_bases, size_type num_krylov_bases, - const matrix::Dense* y, - matrix::Dense* before_preconditioner, - const array* final_iter_nums) + matrix::view::dense y, + matrix::view::dense before_preconditioner, + const array& final_iter_nums) { - const auto num_rows = before_preconditioner->get_size()[0]; - const auto num_cols = before_preconditioner->get_size()[1]; - const auto stride_before_preconditioner = - before_preconditioner->get_stride(); + const auto num_rows = before_preconditioner.size[0]; + const auto num_cols = before_preconditioner.size[1]; + const auto stride_before_preconditioner = before_preconditioner.stride; constexpr auto block_size = default_block_size; const dim3 grid_dim{ @@ -1287,9 +1269,9 @@ void calculate_qy(std::shared_ptr exec, calculate_Qy_kernel( grid_dim, block_dim, 0, exec->get_queue(), num_rows, num_cols, - krylov_bases, as_device_type(y->get_const_values()), y->get_stride(), - before_preconditioner->get_values(), stride_before_preconditioner, - final_iter_nums->get_const_data()); + krylov_bases, as_device_type(y.values), y.stride, + before_preconditioner.values, stride_before_preconditioner, + final_iter_nums.get_const_data()); // Calculate qy // before_preconditioner = krylov_bases * y } @@ -1297,25 +1279,24 @@ void calculate_qy(std::shared_ptr exec, template void solve_krylov(std::shared_ptr exec, - const matrix::Dense* residual_norm_collection, + matrix::view::dense residual_norm_collection, ConstAccessor3d krylov_bases, - const matrix::Dense* hessenberg, - matrix::Dense* y, - matrix::Dense* before_preconditioner, - const array* final_iter_nums) + matrix::view::dense hessenberg, + matrix::view::dense y, + matrix::view::dense before_preconditioner, + const array& final_iter_nums) { - if (before_preconditioner->get_size()[1] == 0) { + if (before_preconditioner.size[1] == 0) { return; } // since hessenberg has dims: iters x iters * num_rhs // krylov_bases has dims: (iters + 1) x sysmtx[0] x num_rhs - const auto iters = - hessenberg->get_size()[1] / before_preconditioner->get_size()[1]; + const auto iters = hessenberg.size[1] / before_preconditioner.size[1]; const auto num_krylov_bases = iters + 1; solve_upper_triangular(exec, residual_norm_collection, hessenberg, y, final_iter_nums); - calculate_qy(exec, krylov_bases, num_krylov_bases, y, before_preconditioner, - final_iter_nums); + calculate_qy(exec, krylov_bases, num_krylov_bases, y.as_const(), + before_preconditioner, final_iter_nums); } GKO_INSTANTIATE_FOR_EACH_CB_GMRES_CONST_TYPE( diff --git a/dpcpp/solver/idr_kernels.dp.cpp b/dpcpp/solver/idr_kernels.dp.cpp index 72f3cbb483a..cb0e6c51cc3 100644 --- a/dpcpp/solver/idr_kernels.dp.cpp +++ b/dpcpp/solver/idr_kernels.dp.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -581,31 +581,29 @@ namespace { template void initialize_m(std::shared_ptr exec, - const size_type nrhs, matrix::Dense* m, - array* stop_status) + const size_type nrhs, matrix::view::dense m, + array& stop_status) { - const auto subspace_dim = m->get_size()[0]; - const auto m_stride = m->get_stride(); + const auto subspace_dim = m.size[0]; + const auto m_stride = m.stride; const auto grid_dim = ceildiv(m_stride * subspace_dim, default_block_size); initialize_m_kernel(grid_dim, default_block_size, 0, exec->get_queue(), - subspace_dim, nrhs, as_device_type(m->get_values()), - m_stride, stop_status->get_data()); + subspace_dim, nrhs, as_device_type(m.values), m_stride, + stop_status.get_data()); } template -void initialize_subspace_vectors(std::shared_ptr exec, - matrix::Dense* subspace_vectors, - bool deterministic) +void initialize_subspace_vectors( + std::shared_ptr exec, + matrix::view::dense subspace_vectors, bool deterministic) { if (!deterministic) { auto seed = std::random_device{}(); using real_type = remove_complex; - auto work = - reinterpret_cast(subspace_vectors->get_values()); - auto n = - subspace_vectors->get_size()[0] * subspace_vectors->get_stride(); + auto work = reinterpret_cast(subspace_vectors.values); + auto n = subspace_vectors.size[0] * subspace_vectors.stride; using rand_type = std::conditional_t || std::is_same_v, @@ -626,123 +624,118 @@ void initialize_subspace_vectors(std::shared_ptr exec, template -void orthonormalize_subspace_vectors(std::shared_ptr exec, - matrix::Dense* subspace_vectors) +void orthonormalize_subspace_vectors( + std::shared_ptr exec, + matrix::view::dense subspace_vectors) { orthonormalize_subspace_vectors_kernel( - 1, default_block_size, 0, exec->get_queue(), - subspace_vectors->get_size()[0], subspace_vectors->get_size()[1], - subspace_vectors->get_values(), subspace_vectors->get_stride()); + 1, default_block_size, 0, exec->get_queue(), subspace_vectors.size[0], + subspace_vectors.size[1], subspace_vectors.values, + subspace_vectors.stride); } template void solve_lower_triangular(std::shared_ptr exec, const size_type nrhs, - const matrix::Dense* m, - const matrix::Dense* f, - matrix::Dense* c, - const array* stop_status) + matrix::view::dense m, + matrix::view::dense f, + matrix::view::dense c, + const array& stop_status) { - const auto subspace_dim = m->get_size()[0]; + const auto subspace_dim = m.size[0]; const auto grid_dim = ceildiv(nrhs, default_block_size); solve_lower_triangular_kernel( grid_dim, default_block_size, 0, exec->get_queue(), subspace_dim, nrhs, - as_device_type(m->get_const_values()), m->get_stride(), - as_device_type(f->get_const_values()), f->get_stride(), - as_device_type(c->get_values()), c->get_stride(), - stop_status->get_const_data()); + as_device_type(m.values), m.stride, as_device_type(f.values), f.stride, + as_device_type(c.values), c.stride, stop_status.get_const_data()); } template void update_g_and_u(std::shared_ptr exec, const size_type nrhs, const size_type k, - const matrix::Dense* p, - const matrix::Dense* m, - matrix::Dense* alpha, - matrix::Dense* g, matrix::Dense* g_k, - matrix::Dense* u, - const array* stop_status) + matrix::view::dense p, + matrix::view::dense m, + matrix::view::dense alpha, + matrix::view::dense g, + matrix::view::dense g_k, + matrix::view::dense u, + const array& stop_status) { - const auto size = g->get_size()[0]; - const auto p_stride = p->get_stride(); + const auto size = g.size[0]; + const auto p_stride = p.stride; const dim3 grid_dim(ceildiv(nrhs, default_dot_dim), exec->get_num_computing_units() * 2); const dim3 block_dim(default_dot_dim, default_dot_dim); for (size_type i = 0; i < k; i++) { - const auto p_i = as_device_type(p->get_const_values()) + i * p_stride; + const auto p_i = as_device_type(p.values) + i * p_stride; // not support 16 bit atomic if constexpr (sizeof(remove_complex) == sizeof(int16)) { GKO_NOT_SUPPORTED(alpha); } else { if (nrhs > 1 || is_complex()) { - components::fill_array(exec, alpha->get_values(), nrhs, + components::fill_array(exec, alpha.values, nrhs, zero()); multidot_kernel(grid_dim, block_dim, 0, exec->get_queue(), size, - nrhs, p_i, as_device_type(g_k->get_values()), - g_k->get_stride(), - as_device_type(alpha->get_values()), - stop_status->get_const_data()); + nrhs, p_i, as_device_type(g_k.values), + g_k.stride, as_device_type(alpha.values), + stop_status.get_const_data()); } else { - onemkl::dot(*exec->get_queue(), size, p_i, 1, g_k->get_values(), - g_k->get_stride(), - as_device_type(alpha->get_values())); + onemkl::dot(*exec->get_queue(), size, p_i, 1, g_k.values, + g_k.stride, as_device_type(alpha.values)); } } update_g_k_and_u_kernel( - ceildiv(size * g_k->get_stride(), default_block_size), - default_block_size, 0, exec->get_queue(), k, i, size, nrhs, - as_device_type(alpha->get_const_values()), - as_device_type(m->get_const_values()), m->get_stride(), - as_device_type(g->get_const_values()), g->get_stride(), - as_device_type(g_k->get_values()), g_k->get_stride(), - as_device_type(u->get_values()), u->get_stride(), - stop_status->get_const_data()); + ceildiv(size * g_k.stride, default_block_size), default_block_size, + 0, exec->get_queue(), k, i, size, nrhs, + as_device_type(alpha.values), as_device_type(m.values), m.stride, + as_device_type(g.values), g.stride, as_device_type(g_k.values), + g_k.stride, as_device_type(u.values), u.stride, + stop_status.get_const_data()); } update_g_kernel( - ceildiv(size * g_k->get_stride(), default_block_size), - default_block_size, 0, exec->get_queue(), k, size, nrhs, - as_device_type(g_k->get_const_values()), g_k->get_stride(), - as_device_type(g->get_values()), g->get_stride(), - stop_status->get_const_data()); + ceildiv(size * g_k.stride, default_block_size), default_block_size, 0, + exec->get_queue(), k, size, nrhs, as_device_type(g_k.values), + g_k.stride, as_device_type(g.values), g.stride, + stop_status.get_const_data()); } template void update_m(std::shared_ptr exec, const size_type nrhs, - const size_type k, const matrix::Dense* p, - const matrix::Dense* g_k, matrix::Dense* m, - const array* stop_status) + const size_type k, matrix::view::dense p, + matrix::view::dense g_k, + matrix::view::dense m, + const array& stop_status) { - const auto size = g_k->get_size()[0]; - const auto subspace_dim = m->get_size()[0]; - const auto p_stride = p->get_stride(); - const auto m_stride = m->get_stride(); + const auto size = g_k.size[0]; + const auto subspace_dim = m.size[0]; + const auto p_stride = p.stride; + const auto m_stride = m.stride; const dim3 grid_dim(ceildiv(nrhs, default_dot_dim), exec->get_num_computing_units() * 2); const dim3 block_dim(default_dot_dim, default_dot_dim); for (size_type i = k; i < subspace_dim; i++) { - const auto p_i = p->get_const_values() + i * p_stride; - auto m_i = m->get_values() + i * m_stride + k * nrhs; + const auto p_i = p.values + i * p_stride; + auto m_i = m.values + i * m_stride + k * nrhs; if constexpr (sizeof(remove_complex) == sizeof(int16)) { GKO_NOT_SUPPORTED(m_i); } else { if (nrhs > 1 || is_complex()) { components::fill_array(exec, m_i, nrhs, zero()); - multidot_kernel(grid_dim, block_dim, 0, exec->get_queue(), size, - nrhs, as_device_type(p_i), - as_device_type(g_k->get_const_values()), - g_k->get_stride(), as_device_type(m_i), - stop_status->get_const_data()); + multidot_kernel( + grid_dim, block_dim, 0, exec->get_queue(), size, nrhs, + as_device_type(p_i), as_device_type(g_k.values), g_k.stride, + as_device_type(m_i), stop_status.get_const_data()); } else { onemkl::dot(*exec->get_queue(), size, as_device_type(p_i), 1, - g_k->get_const_values(), g_k->get_stride(), m_i); + g_k.values, g_k.stride, m_i); } } } @@ -752,27 +745,25 @@ void update_m(std::shared_ptr exec, const size_type nrhs, template void update_x_r_and_f(std::shared_ptr exec, const size_type nrhs, const size_type k, - const matrix::Dense* m, - const matrix::Dense* g, - const matrix::Dense* u, - matrix::Dense* f, matrix::Dense* r, - matrix::Dense* x, - const array* stop_status) -{ - const auto size = x->get_size()[0]; - const auto subspace_dim = m->get_size()[0]; - - const auto grid_dim = ceildiv(size * x->get_stride(), default_block_size); + matrix::view::dense m, + matrix::view::dense g, + matrix::view::dense u, + matrix::view::dense f, + matrix::view::dense r, + matrix::view::dense x, + const array& stop_status) +{ + const auto size = x.size[0]; + const auto subspace_dim = m.size[0]; + + const auto grid_dim = ceildiv(size * x.stride, default_block_size); update_x_r_and_f_kernel( grid_dim, default_block_size, 0, exec->get_queue(), k, size, - subspace_dim, nrhs, as_device_type(m->get_const_values()), - m->get_stride(), as_device_type(g->get_const_values()), g->get_stride(), - as_device_type(u->get_const_values()), u->get_stride(), - as_device_type(f->get_values()), f->get_stride(), - as_device_type(r->get_values()), r->get_stride(), - as_device_type(x->get_values()), x->get_stride(), - stop_status->get_const_data()); - components::fill_array(exec, f->get_values() + k * f->get_stride(), nrhs, + subspace_dim, nrhs, as_device_type(m.values), m.stride, + as_device_type(g.values), g.stride, as_device_type(u.values), u.stride, + as_device_type(f.values), f.stride, as_device_type(r.values), r.stride, + as_device_type(x.values), x.stride, stop_status.get_const_data()); + components::fill_array(exec, f.values + k * f.stride, nrhs, zero()); } @@ -782,9 +773,9 @@ void update_x_r_and_f(std::shared_ptr exec, template void initialize(std::shared_ptr exec, const size_type nrhs, - matrix::Dense* m, - matrix::Dense* subspace_vectors, bool deterministic, - array* stop_status) + matrix::view::dense m, + matrix::view::dense subspace_vectors, + bool deterministic, array& stop_status) { initialize_m(exec, nrhs, m, stop_status); initialize_subspace_vectors(exec, subspace_vectors, deterministic); @@ -796,26 +787,24 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_IDR_INITIALIZE_KERNEL); template void step_1(std::shared_ptr exec, const size_type nrhs, - const size_type k, const matrix::Dense* m, - const matrix::Dense* f, - const matrix::Dense* residual, - const matrix::Dense* g, matrix::Dense* c, - matrix::Dense* v, - const array* stop_status) + const size_type k, matrix::view::dense m, + matrix::view::dense f, + matrix::view::dense residual, + matrix::view::dense g, + matrix::view::dense c, matrix::view::dense v, + const array& stop_status) { solve_lower_triangular(exec, nrhs, m, f, c, stop_status); - const auto num_rows = v->get_size()[0]; - const auto subspace_dim = m->get_size()[0]; + const auto num_rows = v.size[0]; + const auto subspace_dim = m.size[0]; const auto grid_dim = ceildiv(nrhs * num_rows, default_block_size); step_1_kernel(grid_dim, default_block_size, 0, exec->get_queue(), k, - num_rows, subspace_dim, nrhs, - as_device_type(residual->get_const_values()), - residual->get_stride(), as_device_type(c->get_const_values()), - c->get_stride(), as_device_type(g->get_const_values()), - g->get_stride(), as_device_type(v->get_values()), - v->get_stride(), stop_status->get_const_data()); + num_rows, subspace_dim, nrhs, as_device_type(residual.values), + residual.stride, as_device_type(c.values), c.stride, + as_device_type(g.values), g.stride, as_device_type(v.values), + v.stride, stop_status.get_const_data()); } GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_IDR_STEP_1_KERNEL); @@ -823,26 +812,25 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_IDR_STEP_1_KERNEL); template void step_2(std::shared_ptr exec, const size_type nrhs, - const size_type k, const matrix::Dense* omega, - const matrix::Dense* preconditioned_vector, - const matrix::Dense* c, matrix::Dense* u, - const array* stop_status) + const size_type k, matrix::view::dense omega, + matrix::view::dense preconditioned_vector, + matrix::view::dense c, + matrix::view::dense u, + const array& stop_status) { if (nrhs == 0) { return; } - const auto num_rows = preconditioned_vector->get_size()[0]; - const auto subspace_dim = u->get_size()[1] / nrhs; + const auto num_rows = preconditioned_vector.size[0]; + const auto subspace_dim = u.size[1] / nrhs; const auto grid_dim = ceildiv(nrhs * num_rows, default_block_size); step_2_kernel(grid_dim, default_block_size, 0, exec->get_queue(), k, - num_rows, subspace_dim, nrhs, - as_device_type(omega->get_const_values()), - as_device_type(preconditioned_vector->get_const_values()), - preconditioned_vector->get_stride(), - as_device_type(c->get_const_values()), c->get_stride(), - as_device_type(u->get_values()), u->get_stride(), - stop_status->get_const_data()); + num_rows, subspace_dim, nrhs, as_device_type(omega.values), + as_device_type(preconditioned_vector.values), + preconditioned_vector.stride, as_device_type(c.values), + c.stride, as_device_type(u.values), u.stride, + stop_status.get_const_data()); } GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_IDR_STEP_2_KERNEL); @@ -850,16 +838,21 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_IDR_STEP_2_KERNEL); template void step_3(std::shared_ptr exec, const size_type nrhs, - const size_type k, const matrix::Dense* p, - matrix::Dense* g, matrix::Dense* g_k, - matrix::Dense* u, matrix::Dense* m, - matrix::Dense* f, matrix::Dense* alpha, - matrix::Dense* residual, matrix::Dense* x, - const array* stop_status) -{ - update_g_and_u(exec, nrhs, k, p, m, alpha, g, g_k, u, stop_status); - update_m(exec, nrhs, k, p, g_k, m, stop_status); - update_x_r_and_f(exec, nrhs, k, m, g, u, f, residual, x, stop_status); + const size_type k, matrix::view::dense p, + matrix::view::dense g, + matrix::view::dense g_k, + matrix::view::dense u, matrix::view::dense m, + matrix::view::dense f, + matrix::view::dense alpha, + matrix::view::dense residual, + matrix::view::dense x, + const array& stop_status) +{ + update_g_and_u(exec, nrhs, k, p.as_const(), m.as_const(), alpha, g, g_k, u, + stop_status); + update_m(exec, nrhs, k, p.as_const(), g_k.as_const(), m, stop_status); + update_x_r_and_f(exec, nrhs, k, m.as_const(), g.as_const(), u.as_const(), f, + residual, x, stop_status); } GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_IDR_STEP_3_KERNEL); @@ -868,16 +861,18 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_IDR_STEP_3_KERNEL); template void compute_omega( std::shared_ptr exec, const size_type nrhs, - const remove_complex kappa, const matrix::Dense* tht, - const matrix::Dense>* residual_norm, - matrix::Dense* omega, const array* stop_status) + const remove_complex kappa, + matrix::view::dense tht, + matrix::view::dense> residual_norm, + matrix::view::dense omega, + const array& stop_status) { const auto grid_dim = ceildiv(nrhs, config::warp_size); compute_omega_kernel( grid_dim, config::warp_size, 0, exec->get_queue(), nrhs, - as_device_type(kappa), as_device_type(tht->get_const_values()), - as_device_type(residual_norm->get_const_values()), - as_device_type(omega->get_values()), stop_status->get_const_data()); + as_device_type(kappa), as_device_type(tht.values), + as_device_type(residual_norm.values), as_device_type(omega.values), + stop_status.get_const_data()); } GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_IDR_COMPUTE_OMEGA_KERNEL); diff --git a/dpcpp/solver/lower_trs_kernels.dp.cpp b/dpcpp/solver/lower_trs_kernels.dp.cpp index f0257d12892..d7c543ed422 100644 --- a/dpcpp/solver/lower_trs_kernels.dp.cpp +++ b/dpcpp/solver/lower_trs_kernels.dp.cpp @@ -1,10 +1,11 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause #include "core/solver/lower_trs_kernels.hpp" #include +#include #include @@ -55,9 +56,10 @@ void solve(std::shared_ptr exec, const matrix::Csr* matrix, const solver::SolveStruct* solve_struct, bool unit_diag, const solver::trisolve_algorithm algorithm, - matrix::Dense* trans_b, matrix::Dense* trans_x, - const matrix::Dense* b, - matrix::Dense* x) GKO_NOT_IMPLEMENTED; + std::optional> trans_b, + std::optional> trans_x, + matrix::view::dense b, + matrix::view::dense x) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_LOWER_TRS_SOLVE_KERNEL); diff --git a/dpcpp/solver/multigrid_kernels.dp.cpp b/dpcpp/solver/multigrid_kernels.dp.cpp index aaf0ab63354..98c4dd4e891 100644 --- a/dpcpp/solver/multigrid_kernels.dp.cpp +++ b/dpcpp/solver/multigrid_kernels.dp.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -25,32 +25,33 @@ namespace multigrid { template void kcycle_step_1(std::shared_ptr exec, - const matrix::Dense* alpha, - const matrix::Dense* rho, - const matrix::Dense* v, - matrix::Dense* g, matrix::Dense* d, - matrix::Dense* e) GKO_NOT_IMPLEMENTED; + matrix::view::dense alpha, + matrix::view::dense rho, + matrix::view::dense v, + matrix::view::dense g, + matrix::view::dense d, + matrix::view::dense e) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_MULTIGRID_KCYCLE_STEP_1_KERNEL); template void kcycle_step_2(std::shared_ptr exec, - const matrix::Dense* alpha, - const matrix::Dense* rho, - const matrix::Dense* gamma, - const matrix::Dense* beta, - const matrix::Dense* zeta, - const matrix::Dense* d, - matrix::Dense* e) GKO_NOT_IMPLEMENTED; + matrix::view::dense alpha, + matrix::view::dense rho, + matrix::view::dense gamma, + matrix::view::dense beta, + matrix::view::dense zeta, + matrix::view::dense d, + matrix::view::dense e) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_MULTIGRID_KCYCLE_STEP_2_KERNEL); template void kcycle_check_stop(std::shared_ptr exec, - const matrix::Dense* old_norm, - const matrix::Dense* new_norm, + matrix::view::dense old_norm, + matrix::view::dense new_norm, const ValueType rel_tol, bool& is_stop) GKO_NOT_IMPLEMENTED; diff --git a/dpcpp/solver/upper_trs_kernels.dp.cpp b/dpcpp/solver/upper_trs_kernels.dp.cpp index a4878726b05..56e8d090bb3 100644 --- a/dpcpp/solver/upper_trs_kernels.dp.cpp +++ b/dpcpp/solver/upper_trs_kernels.dp.cpp @@ -1,10 +1,11 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause #include "core/solver/upper_trs_kernels.hpp" #include +#include #include @@ -55,9 +56,10 @@ void solve(std::shared_ptr exec, const matrix::Csr* matrix, const solver::SolveStruct* solve_struct, bool unit_diag, const solver::trisolve_algorithm algorithm, - matrix::Dense* trans_b, matrix::Dense* trans_x, - const matrix::Dense* b, - matrix::Dense* x) GKO_NOT_IMPLEMENTED; + std::optional> trans_b, + std::optional> trans_x, + matrix::view::dense b, + matrix::view::dense x) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_UPPER_TRS_SOLVE_KERNEL); diff --git a/dpcpp/stop/criterion_kernels.dp.cpp b/dpcpp/stop/criterion_kernels.dp.cpp index baa1742e4ba..654484e754c 100644 --- a/dpcpp/stop/criterion_kernels.dp.cpp +++ b/dpcpp/stop/criterion_kernels.dp.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -22,10 +22,10 @@ namespace set_all_statuses { void set_all_statuses(std::shared_ptr exec, uint8 stoppingId, bool setFinalized, - array* stop_status) + array& stop_status) { - auto size = stop_status->get_size(); - stopping_status* __restrict__ stop_status_ptr = stop_status->get_data(); + auto size = stop_status.get_size(); + stopping_status* __restrict__ stop_status_ptr = stop_status.get_data(); exec->get_queue()->submit([&](sycl::handler& cgh) { cgh.parallel_for(sycl::range<1>{size}, [=](sycl::id<1> idx_id) { const auto idx = idx_id[0]; diff --git a/dpcpp/stop/residual_norm_kernels.dp.cpp b/dpcpp/stop/residual_norm_kernels.dp.cpp index 129c26e08f4..a0ba56c9f79 100644 --- a/dpcpp/stop/residual_norm_kernels.dp.cpp +++ b/dpcpp/stop/residual_norm_kernels.dp.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -30,16 +30,16 @@ namespace residual_norm { template void residual_norm(std::shared_ptr exec, - const matrix::Dense* tau, - const matrix::Dense* orig_tau, + matrix::view::dense tau, + matrix::view::dense orig_tau, ValueType rel_residual_goal, uint8 stoppingId, - bool setFinalized, array* stop_status, - array* device_storage, bool* all_converged, - bool* one_changed) + bool setFinalized, array& stop_status, + array& device_storage, bool& all_converged, + bool& one_changed) { static_assert(is_complex_s::value == false, "ValueType must not be complex in this function!"); - auto device_storage_val = device_storage->get_data(); + auto device_storage_val = device_storage.get_data(); exec->get_queue()->submit([&](sycl::handler& cgh) { cgh.parallel_for(sycl::range<1>{1}, [=](sycl::id<1>) { device_storage_val[0] = true; @@ -47,28 +47,27 @@ void residual_norm(std::shared_ptr exec, }); }); - auto orig_tau_val = orig_tau->get_const_values(); - auto tau_val = as_device_type(tau->get_const_values()); - auto stop_status_val = stop_status->get_data(); + auto orig_tau_val = orig_tau.values; + auto tau_val = as_device_type(tau.values); + auto stop_status_val = stop_status.get_data(); exec->get_queue()->submit([&](sycl::handler& cgh) { - cgh.parallel_for( - sycl::range<1>{tau->get_size()[1]}, [=](sycl::id<1> idx_id) { - const auto tidx = idx_id[0]; - if (tau_val[tidx] <= rel_residual_goal * orig_tau_val[tidx]) { - stop_status_val[tidx].converge(stoppingId, setFinalized); - device_storage_val[1] = true; - } - // because only false is written to all_converged, write - // conflicts should not cause any problem - else if (!stop_status_val[tidx].has_stopped()) { - device_storage_val[0] = false; - } - }); + cgh.parallel_for(sycl::range<1>{tau.size[1]}, [=](sycl::id<1> idx_id) { + const auto tidx = idx_id[0]; + if (tau_val[tidx] <= rel_residual_goal * orig_tau_val[tidx]) { + stop_status_val[tidx].converge(stoppingId, setFinalized); + device_storage_val[1] = true; + } + // because only false is written to all_converged, write + // conflicts should not cause any problem + else if (!stop_status_val[tidx].has_stopped()) { + device_storage_val[0] = false; + } + }); }); /* Represents all_converged, one_changed */ - *all_converged = get_element(*device_storage, 0); - *one_changed = get_element(*device_storage, 1); + all_converged = get_element(device_storage, 0); + one_changed = get_element(device_storage, 1); } GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_TYPE( @@ -89,13 +88,13 @@ namespace implicit_residual_norm { template void implicit_residual_norm( std::shared_ptr exec, - const matrix::Dense* tau, - const matrix::Dense>* orig_tau, + matrix::view::dense tau, + matrix::view::dense> orig_tau, remove_complex rel_residual_goal, uint8 stoppingId, - bool setFinalized, array* stop_status, - array* device_storage, bool* all_converged, bool* one_changed) + bool setFinalized, array& stop_status, + array& device_storage, bool& all_converged, bool& one_changed) { - auto device_storage_val = device_storage->get_data(); + auto device_storage_val = device_storage.get_data(); exec->get_queue()->submit([&](sycl::handler& cgh) { cgh.parallel_for(sycl::range<1>{1}, [=](sycl::id<1>) { device_storage_val[0] = true; @@ -103,29 +102,28 @@ void implicit_residual_norm( }); }); - auto orig_tau_val = orig_tau->get_const_values(); - auto tau_val = as_device_type(tau->get_const_values()); - auto stop_status_val = stop_status->get_data(); + auto orig_tau_val = orig_tau.values; + auto tau_val = as_device_type(tau.values); + auto stop_status_val = stop_status.get_data(); exec->get_queue()->submit([&](sycl::handler& cgh) { - cgh.parallel_for( - sycl::range<1>{tau->get_size()[1]}, [=](sycl::id<1> idx_id) { - const auto tidx = idx_id[0]; - if (gko::sqrt(gko::abs(tau_val[tidx])) <= - rel_residual_goal * orig_tau_val[tidx]) { - stop_status_val[tidx].converge(stoppingId, setFinalized); - device_storage_val[1] = true; - } - // because only false is written to all_converged, write - // conflicts should not cause any problem - else if (!stop_status_val[tidx].has_stopped()) { - device_storage_val[0] = false; - } - }); + cgh.parallel_for(sycl::range<1>{tau.size[1]}, [=](sycl::id<1> idx_id) { + const auto tidx = idx_id[0]; + if (gko::sqrt(gko::abs(tau_val[tidx])) <= + rel_residual_goal * orig_tau_val[tidx]) { + stop_status_val[tidx].converge(stoppingId, setFinalized); + device_storage_val[1] = true; + } + // because only false is written to all_converged, write + // conflicts should not cause any problem + else if (!stop_status_val[tidx].has_stopped()) { + device_storage_val[0] = false; + } + }); }); /* Represents all_converged, one_changed */ - *all_converged = get_element(*device_storage, 0); - *one_changed = get_element(*device_storage, 1); + all_converged = get_element(device_storage, 0); + one_changed = get_element(device_storage, 1); } GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_IMPLICIT_RESIDUAL_NORM_KERNEL); diff --git a/dpcpp/test/base/kernel_launch.dp.cpp b/dpcpp/test/base/kernel_launch.dp.cpp index a6687583340..8c5eb696a5d 100644 --- a/dpcpp/test/base/kernel_launch.dp.cpp +++ b/dpcpp/test/base/kernel_launch.dp.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -154,7 +154,7 @@ TEST_F(KernelLaunch, Runs1DDense) static_assert(is_same::value, "type"); static_assert(is_same::value, "dummy"); - bool pointers_correct = d.data == d_ptr && d2.data == d_ptr; + bool pointers_correct = d.values == d_ptr && d2.values == d_ptr; bool strides_correct = d.stride == 5 && d2.stride == 5; bool accessors_2d_correct = &d(0, 0) == d_ptr && &d(1, 0) == d_ptr + d.stride && @@ -167,8 +167,9 @@ TEST_F(KernelLaunch, Runs1DDense) d(i / 4, i % 4) = 0; } }, - 16, zero_dense2.get(), static_cast(zero_dense2.get()), - zero_dense2->get_const_values(), move_only_val); + 16, zero_dense2->get_device_view(), + zero_dense2->get_const_device_view(), zero_dense2->get_const_values(), + move_only_val); GKO_ASSERT_MTX_NEAR(zero_dense2, iota_dense, 0.0); } @@ -232,8 +233,8 @@ TEST_F(KernelLaunch, Runs2DDense) static_assert(is_same::value, "type"); static_assert(is_same::value, "dummy"); - bool pointers_correct = d.data == d_ptr && d2.data == d_ptr && - d3.data == d2_ptr && d4 == d3_ptr; + bool pointers_correct = d.values == d_ptr && d2.values == d_ptr && + d3.values == d2_ptr && d4 == d3_ptr; bool strides_correct = d.stride == 5 && d2.stride == 5 && d3.stride == 6; bool accessors_2d_correct = @@ -249,11 +250,10 @@ TEST_F(KernelLaunch, Runs2DDense) d(i, j) = 0; } }, - dim<2>{4, 4}, zero_dense->get_stride(), zero_dense2.get(), - static_cast(zero_dense2.get()), - zero_dense2->get_const_values(), - gko::kernels::dpcpp::default_stride(zero_dense.get()), - gko::kernels::dpcpp::row_vector(vec_dense.get()), + dim<2>{4, 4}, zero_dense->get_stride(), zero_dense2->get_device_view(), + zero_dense2->get_const_device_view(), zero_dense2->get_const_values(), + gko::kernels::dpcpp::default_stride(zero_dense->get_device_view()), + gko::kernels::dpcpp::row_vector(vec_dense->get_device_view()), zero_dense->get_values(), vec_dense->get_values(), move_only_val); GKO_ASSERT_MTX_NEAR(zero_dense2, iota_dense, 0.0); diff --git a/hip/matrix/fft_kernels.hip.cpp b/hip/matrix/fft_kernels.hip.cpp index 4db4fbbcd32..c92bd304e7e 100644 --- a/hip/matrix/fft_kernels.hip.cpp +++ b/hip/matrix/fft_kernels.hip.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -152,15 +152,14 @@ class hipfft_handle { template void fft(std::shared_ptr exec, - const matrix::Dense>* b, - matrix::Dense>* x, bool inverse, + matrix::view::dense> b, + matrix::view::dense> x, bool inverse, array& buffer) { hipfft_handle handle{exec->get_stream()}; handle.template setup<1, std::complex, std::complex>( - {b->get_size()[0]}, b->get_stride(), x->get_stride(), b->get_size()[1], - buffer); - handle.execute(b->get_const_values(), x->get_values(), inverse); + {b.size[0]}, b.stride, x.stride, b.size[1], buffer); + handle.execute(b.values, x.values, inverse); } GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_TYPE_BASE(GKO_DECLARE_FFT_KERNEL); @@ -168,15 +167,14 @@ GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_TYPE_BASE(GKO_DECLARE_FFT_KERNEL); template void fft2(std::shared_ptr exec, - const matrix::Dense>* b, - matrix::Dense>* x, size_type size1, + matrix::view::dense> b, + matrix::view::dense> x, size_type size1, size_type size2, bool inverse, array& buffer) { hipfft_handle handle{exec->get_stream()}; handle.template setup<2, std::complex, std::complex>( - {size1, size2}, b->get_stride(), x->get_stride(), b->get_size()[1], - buffer); - handle.execute(b->get_const_values(), x->get_values(), inverse); + {size1, size2}, b.stride, x.stride, b.size[1], buffer); + handle.execute(b.values, x.values, inverse); } GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_TYPE_BASE(GKO_DECLARE_FFT2_KERNEL); @@ -184,15 +182,14 @@ GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_TYPE_BASE(GKO_DECLARE_FFT2_KERNEL); template void fft3(std::shared_ptr exec, - const matrix::Dense>* b, - matrix::Dense>* x, size_type size1, + matrix::view::dense> b, + matrix::view::dense> x, size_type size1, size_type size2, size_type size3, bool inverse, array& buffer) { hipfft_handle handle{exec->get_stream()}; handle.template setup<3, std::complex, std::complex>( - {size1, size2, size3}, b->get_stride(), x->get_stride(), - b->get_size()[1], buffer); - handle.execute(b->get_const_values(), x->get_values(), inverse); + {size1, size2, size3}, b.stride, x.stride, b.size[1], buffer); + handle.execute(b.values, x.values, inverse); } GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_TYPE_BASE(GKO_DECLARE_FFT3_KERNEL); diff --git a/hip/matrix/fft_kernels_stub.hip.cpp b/hip/matrix/fft_kernels_stub.hip.cpp index 753642b555e..973a49bde8b 100644 --- a/hip/matrix/fft_kernels_stub.hip.cpp +++ b/hip/matrix/fft_kernels_stub.hip.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -30,8 +30,8 @@ namespace fft { template void fft(std::shared_ptr exec, - const matrix::Dense>* b, - matrix::Dense>* x, bool inverse, + matrix::view::dense> b, + matrix::view::dense> x, bool inverse, array& buffer) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_TYPE_BASE(GKO_DECLARE_FFT_KERNEL); @@ -39,8 +39,8 @@ GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_TYPE_BASE(GKO_DECLARE_FFT_KERNEL); template void fft2(std::shared_ptr exec, - const matrix::Dense>* b, - matrix::Dense>* x, size_type size1, + matrix::view::dense> b, + matrix::view::dense> x, size_type size1, size_type size2, bool inverse, array& buffer) GKO_NOT_IMPLEMENTED; @@ -49,8 +49,8 @@ GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_TYPE_BASE(GKO_DECLARE_FFT2_KERNEL); template void fft3(std::shared_ptr exec, - const matrix::Dense>* b, - matrix::Dense>* x, size_type size1, + matrix::view::dense> b, + matrix::view::dense> x, size_type size1, size_type size2, size_type size3, bool inverse, array& buffer) GKO_NOT_IMPLEMENTED; diff --git a/hip/solver/common_trs_kernels.hip.hpp b/hip/solver/common_trs_kernels.hip.hpp index 1efa781daf0..092c9d58944 100644 --- a/hip/solver/common_trs_kernels.hip.hpp +++ b/hip/solver/common_trs_kernels.hip.hpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -158,15 +158,14 @@ template void solve_kernel(std::shared_ptr exec, const matrix::Csr* matrix, const solver::SolveStruct* solve_struct, - matrix::Dense* trans_b, - matrix::Dense* trans_x, - const matrix::Dense* b, - matrix::Dense* x) + matrix::view::dense trans_b, + matrix::view::dense trans_x, + matrix::view::dense b, + matrix::view::dense x) { - if (matrix->get_size()[0] == 0 || b->get_size()[1] == 0) { + if (matrix->get_size()[0] == 0 || b.size[1] == 0) { return; } - using vec = matrix::Dense; if (sparselib::is_supported::value) { if (auto hip_solve_struct = @@ -176,7 +175,7 @@ void solve_kernel(std::shared_ptr exec, { sparselib::pointer_mode_guard pm_guard(handle); - if (b->get_stride() == 1) { + if (b.stride == 1) { sparselib::csrsv2_solve( handle, SPARSELIB_OPERATION_NON_TRANSPOSE, matrix->get_size()[0], @@ -185,13 +184,13 @@ void solve_kernel(std::shared_ptr exec, matrix->get_const_values(), matrix->get_const_row_ptrs(), matrix->get_const_col_idxs(), - hip_solve_struct->solve_info, b->get_const_values(), - x->get_values(), hip_solve_struct->policy, + hip_solve_struct->solve_info, b.values, x.values, + hip_solve_struct->policy, hip_solve_struct->factor_work_vec); } else { - dense::transpose(exec, b, trans_b); - dense::transpose(exec, x, trans_x); - for (IndexType i = 0; i < trans_b->get_size()[0]; i++) { + dense::transpose(exec, b.as_const(), trans_b); + dense::transpose(exec, x.as_const(), trans_x); + for (IndexType i = 0; i < trans_b.size[0]; i++) { sparselib::csrsv2_solve( handle, SPARSELIB_OPERATION_NON_TRANSPOSE, matrix->get_size()[0], @@ -201,12 +200,12 @@ void solve_kernel(std::shared_ptr exec, matrix->get_const_row_ptrs(), matrix->get_const_col_idxs(), hip_solve_struct->solve_info, - trans_b->get_values() + i * trans_b->get_stride(), - trans_x->get_values() + i * trans_x->get_stride(), + trans_b.values + i * trans_b.stride, + trans_x.values + i * trans_x.stride, hip_solve_struct->policy, hip_solve_struct->factor_work_vec); } - dense::transpose(exec, trans_x, x); + dense::transpose(exec, trans_x.as_const(), x); } } } else { diff --git a/hip/solver/lower_trs_kernels.hip.cpp b/hip/solver/lower_trs_kernels.hip.cpp index 5eab76ed5fa..6675bd25971 100644 --- a/hip/solver/lower_trs_kernels.hip.cpp +++ b/hip/solver/lower_trs_kernels.hip.cpp @@ -1,10 +1,11 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause #include "core/solver/lower_trs_kernels.hpp" #include +#include #if HIP_VERSION >= 50200000 @@ -63,11 +64,13 @@ void solve(std::shared_ptr exec, const matrix::Csr* matrix, const solver::SolveStruct* solve_struct, bool unit_diag, const solver::trisolve_algorithm algorithm, - matrix::Dense* trans_b, matrix::Dense* trans_x, - const matrix::Dense* b, matrix::Dense* x) + std::optional> trans_b, + std::optional> trans_x, + matrix::view::dense b, + matrix::view::dense x) { - solve_kernel(exec, matrix, solve_struct, trans_b, - trans_x, b, x); + solve_kernel(exec, matrix, solve_struct, *trans_b, + *trans_x, b, x); } GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( diff --git a/hip/solver/upper_trs_kernels.hip.cpp b/hip/solver/upper_trs_kernels.hip.cpp index fb480d9b22d..106586e309e 100644 --- a/hip/solver/upper_trs_kernels.hip.cpp +++ b/hip/solver/upper_trs_kernels.hip.cpp @@ -1,10 +1,11 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause #include "core/solver/upper_trs_kernels.hpp" #include +#include #if HIP_VERSION >= 50200000 @@ -63,11 +64,13 @@ void solve(std::shared_ptr exec, const matrix::Csr* matrix, const solver::SolveStruct* solve_struct, bool unit_diag, const solver::trisolve_algorithm algorithm, - matrix::Dense* trans_b, matrix::Dense* trans_x, - const matrix::Dense* b, matrix::Dense* x) + std::optional> trans_b, + std::optional> trans_x, + matrix::view::dense b, + matrix::view::dense x) { - solve_kernel(exec, matrix, solve_struct, trans_b, - trans_x, b, x); + solve_kernel(exec, matrix, solve_struct, *trans_b, + *trans_x, b, x); } GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( diff --git a/hip/test/base/kernel_launch.hip.cpp b/hip/test/base/kernel_launch.hip.cpp index 4ab5bf12602..8293e86af63 100644 --- a/hip/test/base/kernel_launch.hip.cpp +++ b/hip/test/base/kernel_launch.hip.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -152,7 +152,7 @@ void run1d(std::shared_ptr exec, gko::matrix::Dense<>* m) static_assert(is_same::value, "type"); static_assert(is_same::value, "dummy"); - bool pointers_correct = d.data == d_ptr && d2.data == d_ptr; + bool pointers_correct = d.values == d_ptr && d2.values == d_ptr; bool strides_correct = d.stride == 5 && d2.stride == 5; bool accessors_2d_correct = &d(0, 0) == d_ptr && &d(1, 0) == d_ptr + d.stride && @@ -165,7 +165,7 @@ void run1d(std::shared_ptr exec, gko::matrix::Dense<>* m) d(i / 4, i % 4) = 0; } }, - 16, m, static_cast*>(m), + 16, m->get_device_view(), m->get_const_device_view(), m->get_const_values(), move_only_val); } @@ -244,8 +244,8 @@ void run2d(std::shared_ptr exec, gko::matrix::Dense<>* m1, static_assert(is_same::value, "type"); static_assert(is_same::value, "type"); static_assert(is_same::value, "dummy"); - bool pointers_correct = d.data == d_ptr && d2.data == d_ptr && - d3.data == d2_ptr && d4 == d3_ptr; + bool pointers_correct = d.values == d_ptr && d2.values == d_ptr && + d3.values == d2_ptr && d4 == d3_ptr; bool strides_correct = d.stride == 5 && d2.stride == 5 && d3.stride == 6; bool accessors_2d_correct = @@ -261,11 +261,11 @@ void run2d(std::shared_ptr exec, gko::matrix::Dense<>* m1, d(i, j) = 0; } }, - dim<2>{4, 4}, m2->get_stride(), m1, - static_cast*>(m1), m1->get_const_values(), - gko::kernels::hip::default_stride(m2), - gko::kernels::hip::row_vector(m3), m2->get_values(), m3->get_values(), - move_only_val); + dim<2>{4, 4}, m2->get_stride(), m1->get_device_view(), + m1->get_const_device_view(), m1->get_const_values(), + gko::kernels::hip::default_stride(m2->get_device_view()), + gko::kernels::hip::row_vector(m3->get_device_view()), m2->get_values(), + m3->get_values(), move_only_val); } TEST_F(KernelLaunch, Runs2DDense) diff --git a/include/ginkgo/core/base/executor.hpp b/include/ginkgo/core/base/executor.hpp index 383d1aaee07..e83e04ae676 100644 --- a/include/ginkgo/core/base/executor.hpp +++ b/include/ginkgo/core/base/executor.hpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -423,42 +423,43 @@ RegisteredOperation make_register_operation(const char* name, return ::gko::detail::make_register_operation( \ #_kernel, [&args...](auto exec) { \ using exec_type = decltype(exec); \ - if (std::is_same< \ - exec_type, \ - std::shared_ptr>:: \ - value) { \ + if constexpr (std::is_same< \ + exec_type, \ + std::shared_ptr< \ + const ::gko::ReferenceExecutor>>:: \ + value) { \ ::gko::kernels::reference::_kernel( \ std::dynamic_pointer_cast< \ const ::gko::ReferenceExecutor>(exec), \ std::forward(args)...); \ - } else if (std::is_same< \ - exec_type, \ - std::shared_ptr>:: \ - value) { \ + } else if constexpr ( \ + std::is_same< \ + exec_type, \ + std::shared_ptr>::value) { \ ::gko::kernels::omp::_kernel( \ std::dynamic_pointer_cast( \ exec), \ std::forward(args)...); \ - } else if (std::is_same< \ - exec_type, \ - std::shared_ptr>:: \ - value) { \ + } else if constexpr ( \ + std::is_same< \ + exec_type, \ + std::shared_ptr>::value) { \ ::gko::kernels::cuda::_kernel( \ std::dynamic_pointer_cast( \ exec), \ std::forward(args)...); \ - } else if (std::is_same< \ - exec_type, \ - std::shared_ptr>:: \ - value) { \ + } else if constexpr ( \ + std::is_same< \ + exec_type, \ + std::shared_ptr>::value) { \ ::gko::kernels::hip::_kernel( \ std::dynamic_pointer_cast( \ exec), \ std::forward(args)...); \ - } else if (std::is_same< \ - exec_type, \ - std::shared_ptr>:: \ - value) { \ + } else if constexpr ( \ + std::is_same< \ + exec_type, \ + std::shared_ptr>::value) { \ ::gko::kernels::dpcpp::_kernel( \ std::dynamic_pointer_cast( \ exec), \ diff --git a/include/ginkgo/core/matrix/device_views.hpp b/include/ginkgo/core/matrix/device_views.hpp index afa5104e8b5..d0d2e10d17b 100644 --- a/include/ginkgo/core/matrix/device_views.hpp +++ b/include/ginkgo/core/matrix/device_views.hpp @@ -24,23 +24,23 @@ template struct dense { dim<2> size; size_type stride; - ValueType* data; + ValueType* values; - /** Constructs a dense view from size, stride and data. */ - constexpr dense(dim<2> size, size_type stride, ValueType* data) - : size{size}, stride{stride}, data{data} + /** Constructs a dense view from size, stride and values. */ + constexpr dense(dim<2> size, size_type stride, ValueType* values) + : size{size}, stride{stride}, values{values} {} - /** Returns a const view of the same data */ + /** Returns a const view of the same values */ constexpr dense as_const() const { - return dense{size, stride, data}; + return dense{size, stride, values}; } /** Subscript operator accessing the given row and column */ constexpr ValueType& operator()(size_type row, size_type col) const { - return data[row * stride + col]; + return values[row * stride + col]; } }; diff --git a/omp/base/index_set_kernels.cpp b/omp/base/index_set_kernels.cpp index 6dca856b96f..716e061648a 100644 --- a/omp/base/index_set_kernels.cpp +++ b/omp/base/index_set_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -58,13 +58,13 @@ GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE( template void populate_subsets(std::shared_ptr exec, const IndexType index_space_size, - const array* indices, - array* subset_begin, - array* subset_end, - array* superset_indices, const bool is_sorted) + const array& indices, + array& subset_begin, + array& subset_end, + array& superset_indices, const bool is_sorted) { - auto num_indices = indices->get_size(); - auto tmp_indices = gko::array(*indices); + auto num_indices = indices.get_size(); + auto tmp_indices = indices; // Sort the indices if not sorted. if (!is_sorted) { std::sort(tmp_indices.get_data(), tmp_indices.get_data() + num_indices); @@ -100,13 +100,13 @@ void populate_subsets(std::shared_ptr exec, GKO_ASSERT(tmp_subset_begin.size() == tmp_subset_end.size()); GKO_ASSERT((tmp_subset_begin.size() + 1) == tmp_subset_superset_index.size()); - *subset_begin = std::move(gko::array( + subset_begin = std::move(gko::array( exec, tmp_subset_begin.data(), tmp_subset_begin.data() + tmp_subset_begin.size())); - *subset_end = std::move( + subset_end = std::move( gko::array(exec, tmp_subset_end.data(), tmp_subset_end.data() + tmp_subset_end.size())); - *superset_indices = std::move(gko::array( + superset_indices = std::move(gko::array( exec, tmp_subset_superset_index.data(), tmp_subset_superset_index.data() + tmp_subset_superset_index.size())); } diff --git a/omp/distributed/vector_kernels.cpp b/omp/distributed/vector_kernels.cpp index 03d27e75db9..ca0f42fa961 100644 --- a/omp/distributed/vector_kernels.cpp +++ b/omp/distributed/vector_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -20,7 +20,7 @@ void build_local( const device_matrix_data& input, const experimental::distributed::Partition* partition, - comm_index_type local_part, matrix::Dense* local_mtx) + comm_index_type local_part, matrix::view::dense local_mtx) { auto row_idxs = input.get_const_row_idxs(); auto col_idxs = input.get_const_col_idxs(); @@ -36,8 +36,8 @@ void build_local( auto part_id = range_parts[range_id]; // skip non-local rows if (part_id == local_part) { - local_mtx->at(map_to_local(row_idxs[i], partition, range_id), - static_cast(col_idxs[i])) = values[i]; + local_mtx(map_to_local(row_idxs[i], partition, range_id), + static_cast(col_idxs[i])) = values[i]; } } } diff --git a/omp/matrix/coo_kernels.cpp b/omp/matrix/coo_kernels.cpp index 021795d8e9c..b635556983b 100644 --- a/omp/matrix/coo_kernels.cpp +++ b/omp/matrix/coo_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -36,7 +36,8 @@ namespace coo { template void spmv(std::shared_ptr exec, const matrix::Coo* a, - const matrix::Dense* b, matrix::Dense* c) + matrix::view::dense b, + matrix::view::dense c) { dense::fill(exec, c, zero()); spmv2(exec, a, b, c); @@ -47,11 +48,11 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_COO_SPMV_KERNEL); template void advanced_spmv(std::shared_ptr exec, - const matrix::Dense* alpha, + matrix::view::dense alpha, const matrix::Coo* a, - const matrix::Dense* b, - const matrix::Dense* beta, - matrix::Dense* c) + matrix::view::dense b, + matrix::view::dense beta, + matrix::view::dense c) { dense::scale(exec, beta, c); advanced_spmv2(exec, alpha, a, b, c); @@ -64,14 +65,14 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void spmv2_blocked(std::shared_ptr exec, const matrix::Coo* a, - const matrix::Dense* b, - matrix::Dense* c, ValueType scale) + matrix::view::dense b, + matrix::view::dense c, ValueType scale) { - GKO_ASSERT(b->get_size()[1] > block_size); + GKO_ASSERT(b.size[1] > block_size); const auto coo_val = a->get_const_values(); const auto coo_col = a->get_const_col_idxs(); const auto coo_row = a->get_const_row_idxs(); - const auto num_rhs = b->get_size()[1]; + const auto num_rhs = b.size[1]; const auto rounded_rhs = num_rhs / block_size * block_size; const auto sentinel_row = a->get_size()[0] + 1; const auto nnz = a->get_num_stored_elements(); @@ -103,7 +104,7 @@ void spmv2_blocked(std::shared_ptr exec, for (size_type i = 0; i < block_size; i++) { const auto rhs = i + rhs_base; partial_sum[i] += - scale * coo_val[local_nz] * b->at(col, rhs); + scale * coo_val[local_nz] * b(col, rhs); } } // handle row overlap with previous thread: block add to @@ -111,7 +112,7 @@ void spmv2_blocked(std::shared_ptr exec, #pragma unroll for (size_type i = 0; i < block_size; i++) { const auto rhs = i + rhs_base; - atomic_add(c->at(first, rhs), partial_sum[i]); + atomic_add(c(first, rhs), partial_sum[i]); } } // handle row overlap with previous thread: remainder partial @@ -122,14 +123,13 @@ void spmv2_blocked(std::shared_ptr exec, const auto col = coo_col[nz]; for (size_type rhs = rounded_rhs; rhs < num_rhs; rhs++) { partial_sum[rhs - rounded_rhs] += - scale * coo_val[nz] * b->at(col, rhs); + scale * coo_val[nz] * b(col, rhs); } } // handle row overlap with previous thread: remainder add to // memory for (size_type rhs = rounded_rhs; rhs < num_rhs; rhs++) { - atomic_add(c->at(first, rhs), - partial_sum[rhs - rounded_rhs]); + atomic_add(c(first, rhs), partial_sum[rhs - rounded_rhs]); } } // handle non-overlapping rows @@ -141,12 +141,11 @@ void spmv2_blocked(std::shared_ptr exec, #pragma unroll for (size_type i = 0; i < block_size; i++) { const auto rhs = i + rhs_base; - c->at(row, rhs) += - scale * coo_val[nz] * b->at(col, rhs); + c(row, rhs) += scale * coo_val[nz] * b(col, rhs); } } for (size_type rhs = rounded_rhs; rhs < num_rhs; rhs++) { - c->at(row, rhs) += scale * coo_val[nz] * b->at(col, rhs); + c(row, rhs) += scale * coo_val[nz] * b(col, rhs); } } if (last != sentinel_row) { @@ -161,7 +160,7 @@ void spmv2_blocked(std::shared_ptr exec, for (size_type i = 0; i < block_size; i++) { const auto rhs = i + rhs_base; partial_sum[i] += - scale * coo_val[local_nz] * b->at(col, rhs); + scale * coo_val[local_nz] * b(col, rhs); } } // handle row overlap with following thread: block add to @@ -170,7 +169,7 @@ void spmv2_blocked(std::shared_ptr exec, for (size_type i = 0; i < block_size; i++) { const auto rhs = i + rhs_base; const auto row = last; - atomic_add(c->at(row, rhs), partial_sum[i]); + atomic_add(c(row, rhs), partial_sum[i]); } } // handle row overlap with following thread: block partial sums @@ -179,13 +178,13 @@ void spmv2_blocked(std::shared_ptr exec, const auto col = coo_col[nz]; for (size_type rhs = rounded_rhs; rhs < num_rhs; rhs++) { partial_sum[rhs - rounded_rhs] += - scale * coo_val[nz] * b->at(col, rhs); + scale * coo_val[nz] * b(col, rhs); } } // handle row overlap with following thread: block add to memory for (size_type rhs = rounded_rhs; rhs < num_rhs; rhs++) { const auto row = last; - atomic_add(c->at(row, rhs), partial_sum[rhs - rounded_rhs]); + atomic_add(c(row, rhs), partial_sum[rhs - rounded_rhs]); } } } @@ -196,10 +195,10 @@ void spmv2_blocked(std::shared_ptr exec, template void spmv2_small_rhs(std::shared_ptr exec, const matrix::Coo* a, - const matrix::Dense* b, - matrix::Dense* c, ValueType scale) + matrix::view::dense b, + matrix::view::dense c, ValueType scale) { - GKO_ASSERT(b->get_size()[1] == num_rhs); + GKO_ASSERT(b.size[1] == num_rhs); const auto coo_val = a->get_const_values(); const auto coo_col = a->get_const_col_idxs(); const auto coo_row = a->get_const_row_idxs(); @@ -226,14 +225,13 @@ void spmv2_small_rhs(std::shared_ptr exec, const auto col = coo_col[nz]; #pragma unroll for (size_type rhs = 0; rhs < num_rhs; rhs++) { - partial_sum[rhs] += - scale * coo_val[nz] * b->at(col, rhs); + partial_sum[rhs] += scale * coo_val[nz] * b(col, rhs); } } // handle row overlap with previous thread: add to memory #pragma unroll for (size_type rhs = 0; rhs < num_rhs; rhs++) { - atomic_add(c->at(first, rhs), partial_sum[rhs]); + atomic_add(c(first, rhs), partial_sum[rhs]); } } // handle non-overlapping rows @@ -242,7 +240,7 @@ void spmv2_small_rhs(std::shared_ptr exec, const auto col = coo_col[nz]; #pragma unroll for (size_type rhs = 0; rhs < num_rhs; rhs++) { - c->at(row, rhs) += scale * coo_val[nz] * b->at(col, rhs); + c(row, rhs) += scale * coo_val[nz] * b(col, rhs); } } if (last != sentinel_row) { @@ -252,15 +250,14 @@ void spmv2_small_rhs(std::shared_ptr exec, const auto col = coo_col[nz]; #pragma unroll for (size_type rhs = 0; rhs < num_rhs; rhs++) { - partial_sum[rhs] += - scale * coo_val[nz] * b->at(col, rhs); + partial_sum[rhs] += scale * coo_val[nz] * b(col, rhs); } } // handle row overlap with following thread: add to memory #pragma unroll for (size_type rhs = 0; rhs < num_rhs; rhs++) { const auto row = last; - atomic_add(c->at(row, rhs), partial_sum[rhs]); + atomic_add(c(row, rhs), partial_sum[rhs]); } } } @@ -271,10 +268,10 @@ void spmv2_small_rhs(std::shared_ptr exec, template void generic_spmv2(std::shared_ptr exec, const matrix::Coo* a, - const matrix::Dense* b, - matrix::Dense* c, ValueType scale) + matrix::view::dense b, + matrix::view::dense c, ValueType scale) { - const auto num_rhs = b->get_size()[1]; + const auto num_rhs = b.size[1]; if (num_rhs <= 0) { return; } @@ -301,7 +298,8 @@ void generic_spmv2(std::shared_ptr exec, template void spmv2(std::shared_ptr exec, const matrix::Coo* a, - const matrix::Dense* b, matrix::Dense* c) + matrix::view::dense b, + matrix::view::dense c) { generic_spmv2(exec, a, b, c, one()); } @@ -311,12 +309,12 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_COO_SPMV2_KERNEL); template void advanced_spmv2(std::shared_ptr exec, - const matrix::Dense* alpha, + matrix::view::dense alpha, const matrix::Coo* a, - const matrix::Dense* b, - matrix::Dense* c) + matrix::view::dense b, + matrix::view::dense c) { - generic_spmv2(exec, a, b, c, alpha->at(0, 0)); + generic_spmv2(exec, a, b, c, alpha(0, 0)); } GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( diff --git a/omp/matrix/csr_kernels.cpp b/omp/matrix/csr_kernels.cpp index 06d44433b27..e9280608c09 100644 --- a/omp/matrix/csr_kernels.cpp +++ b/omp/matrix/csr_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -85,8 +85,8 @@ template void merge_spmv(std::shared_ptr exec, const matrix::Csr* a, - const matrix::Dense* b, - matrix::Dense* c, AlphaOp alpha_op, + matrix::view::dense b, + matrix::view::dense c, AlphaOp alpha_op, BetaOp beta_op) { using arithmetic_type = @@ -118,7 +118,7 @@ void merge_spmv(std::shared_ptr exec, auto value_carry_over_ptr = value_carry_over.get_data(); // TODO: parallelize with number of cols, too. - for (size_type j = 0; j < c->get_size()[1]; ++j) { + for (size_type j = 0; j < c.size[1]; ++j) { // TODO: It uses static from the observation of the previous // experiments. Check it with different system and different kinds of // schedule. @@ -173,8 +173,8 @@ template void classical_spmv(std::shared_ptr exec, const matrix::Csr* a, - const matrix::Dense* b, - matrix::Dense* c, Function lambda) + matrix::view::dense b, + matrix::view::dense c, Function lambda) { using arithmetic_type = highest_precision; @@ -190,7 +190,7 @@ void classical_spmv(std::shared_ptr exec, #pragma omp parallel for for (size_type row = 0; row < a->get_size()[0]; ++row) { - for (size_type j = 0; j < c->get_size()[1]; ++j) { + for (size_type j = 0; j < c.size[1]; ++j) { auto sum = zero(); for (size_type k = row_ptrs[row]; k < static_cast(row_ptrs[row + 1]); ++k) { @@ -208,12 +208,12 @@ template void spmv(std::shared_ptr exec, const matrix::Csr* a, - const matrix::Dense* b, - matrix::Dense* c) + matrix::view::dense b, + matrix::view::dense c) { using arithmetic_type = highest_precision; - if (c->get_size()[0] == 0 || c->get_size()[1] == 0) { + if (c.size[0] == 0 || c.size[1] == 0) { // empty output: nothing to do } else if (a->get_strategy()->get_name() == "merge_path") { merge_spmv( @@ -231,17 +231,17 @@ GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_AND_INDEX_TYPE( template void advanced_spmv(std::shared_ptr exec, - const matrix::Dense* alpha, + matrix::view::dense alpha, const matrix::Csr* a, - const matrix::Dense* b, - const matrix::Dense* beta, - matrix::Dense* c) + matrix::view::dense b, + matrix::view::dense beta, + matrix::view::dense c) { using arithmetic_type = highest_precision; - auto valpha = static_cast(alpha->at(0, 0)); - auto vbeta = static_cast(beta->at(0, 0)); - if (c->get_size()[0] == 0 || c->get_size()[1] == 0) { + auto valpha = static_cast(alpha(0, 0)); + auto vbeta = static_cast(beta(0, 0)); + if (c.size[0] == 0 || c.size[1] == 0) { // empty output: nothing to do } else if (a->get_strategy()->get_name() == "merge_path") { merge_spmv( @@ -519,16 +519,16 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_SPGEMM_KERNEL); template void advanced_spgemm(std::shared_ptr exec, - const matrix::Dense* alpha, + matrix::view::dense alpha, const matrix::Csr* a, const matrix::Csr* b, - const matrix::Dense* beta, + matrix::view::dense beta, const matrix::Csr* d, matrix::Csr* c) { auto num_rows = a->get_size()[0]; - auto valpha = alpha->at(0, 0); - auto vbeta = beta->at(0, 0); + auto valpha = alpha(0, 0); + auto vbeta = beta(0, 0); constexpr auto sentinel = std::numeric_limits::max(); // first sweep: count nnz for each row @@ -688,10 +688,10 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void advanced_spgemm_reuse(std::shared_ptr exec, - const matrix::Dense* alpha, + matrix::view::dense alpha, const matrix::Csr* a, const matrix::Csr* b, - const matrix::Dense* beta, + matrix::view::dense beta, const matrix::Csr* d, const matrix::csr::lookup_data& c_lookup, matrix::Csr* c) @@ -709,8 +709,8 @@ void advanced_spgemm_reuse(std::shared_ptr exec, const auto b_vals = b->get_const_values(); const auto c_vals = c->get_values(); const auto d_vals = d->get_const_values(); - const auto valpha = alpha->at(0, 0); - const auto vbeta = beta->at(0, 0); + const auto valpha = alpha(0, 0); + const auto vbeta = beta(0, 0); const auto lookup_storage_offsets = c_lookup.storage_offsets.get_const_data(); const auto lookup_storage = c_lookup.storage.get_const_data(); @@ -762,15 +762,15 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void spgeam(std::shared_ptr exec, - const matrix::Dense* alpha, + matrix::view::dense alpha, const matrix::Csr* a, - const matrix::Dense* beta, + matrix::view::dense beta, const matrix::Csr* b, matrix::Csr* c) { auto num_rows = a->get_size()[0]; - auto valpha = alpha->at(0, 0); - auto vbeta = beta->at(0, 0); + auto valpha = alpha(0, 0); + auto vbeta = beta(0, 0); // first sweep: count nnz for each row auto c_row_ptrs = c->get_row_ptrs(); @@ -811,14 +811,14 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_SPGEAM_KERNEL); template void spgeam_numeric(std::shared_ptr exec, - const matrix::Dense* alpha, + matrix::view::dense alpha, const matrix::Csr* a, - const matrix::Dense* beta, + matrix::view::dense beta, const matrix::Csr* b, matrix::Csr* c) { - auto valpha = alpha->at(0, 0); - auto vbeta = beta->at(0, 0); + auto valpha = alpha(0, 0); + auto vbeta = beta(0, 0); auto c_row_ptrs = c->get_const_row_ptrs(); auto c_col_idxs = c->get_const_col_idxs(); auto c_vals = c->get_values(); @@ -840,7 +840,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void fill_in_dense(std::shared_ptr exec, const matrix::Csr* source, - matrix::Dense* result) + matrix::view::dense result) { auto num_rows = source->get_size()[0]; auto num_cols = source->get_size()[1]; @@ -852,7 +852,7 @@ void fill_in_dense(std::shared_ptr exec, for (size_type row = 0; row < num_rows; ++row) { for (size_type i = row_ptrs[row]; i < static_cast(row_ptrs[row + 1]); ++i) { - result->at(row, col_idxs[i]) = vals[i]; + result(row, col_idxs[i]) = vals[i]; } } } @@ -1006,17 +1006,17 @@ template void calculate_nonzeros_per_row_in_span( std::shared_ptr exec, const matrix::Csr* source, const span& row_span, - const span& col_span, array* row_nnz) + const span& col_span, array& row_nnz) { const auto row_ptrs = source->get_const_row_ptrs(); const auto col_idxs = source->get_const_col_idxs(); #pragma omp parallel for for (size_type row = row_span.begin; row < row_span.end; ++row) { - row_nnz->get_data()[row - row_span.begin] = zero(); + row_nnz.get_data()[row - row_span.begin] = zero(); for (auto nnz = row_ptrs[row]; nnz < row_ptrs[row + 1]; ++nnz) { if (col_idxs[nnz] >= col_span.begin && col_idxs[nnz] < col_span.end) { - row_nnz->get_data()[row - row_span.begin]++; + row_nnz.get_data()[row - row_span.begin]++; } } } @@ -1461,7 +1461,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void is_sorted_by_column_index( std::shared_ptr exec, - const matrix::Csr* to_check, bool* is_sorted) + const matrix::Csr* to_check, bool& is_sorted) { const auto row_ptrs = to_check->get_const_row_ptrs(); const auto col_idxs = to_check->get_const_col_idxs(); @@ -1479,7 +1479,7 @@ void is_sorted_by_column_index( } } } - *is_sorted = local_is_sorted; + is_sorted = local_is_sorted; } GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( @@ -1541,15 +1541,15 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void add_scaled_identity(std::shared_ptr exec, - const matrix::Dense* alpha, - const matrix::Dense* beta, + matrix::view::dense alpha, + matrix::view::dense beta, matrix::Csr* mtx) { const auto nrows = static_cast(mtx->get_size()[0]); const auto row_ptrs = mtx->get_const_row_ptrs(); const auto vals = mtx->get_values(); - const auto beta_val = beta->get_const_values()[0]; - const auto alpha_val = alpha->get_const_values()[0]; + const auto beta_val = beta.values[0]; + const auto alpha_val = alpha.values[0]; #pragma omp parallel for for (IndexType row = 0; row < nrows; row++) { for (IndexType iz = row_ptrs[row]; iz < row_ptrs[row + 1]; iz++) { diff --git a/omp/matrix/dense_kernels.cpp b/omp/matrix/dense_kernels.cpp index fe1f58ef93d..bc604702844 100644 --- a/omp/matrix/dense_kernels.cpp +++ b/omp/matrix/dense_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -38,9 +38,10 @@ namespace dense { template void compute_dot_dispatch(std::shared_ptr exec, - const matrix::Dense* x, - const matrix::Dense* y, - matrix::Dense* result, array& tmp) + matrix::view::dense x, + matrix::view::dense y, + matrix::view::dense result, + array& tmp) { // OpenMP uses the unified kernel. compute_dot(exec, x, y, result, tmp); @@ -52,9 +53,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( template void compute_conj_dot_dispatch(std::shared_ptr exec, - const matrix::Dense* x, - const matrix::Dense* y, - matrix::Dense* result, + matrix::view::dense x, + matrix::view::dense y, + matrix::view::dense result, array& tmp) { compute_conj_dot(exec, x, y, result, tmp); @@ -65,10 +66,10 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( template -void compute_norm2_dispatch(std::shared_ptr exec, - const matrix::Dense* x, - matrix::Dense>* result, - array& tmp) +void compute_norm2_dispatch( + std::shared_ptr exec, + matrix::view::dense x, + matrix::view::dense> result, array& tmp) { compute_norm2(exec, x, result, tmp); } @@ -79,22 +80,22 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( template void simple_apply(std::shared_ptr exec, - const matrix::Dense* a, - const matrix::Dense* b, - matrix::Dense* c) + matrix::view::dense a, + matrix::view::dense b, + matrix::view::dense c) { #pragma omp parallel for - for (size_type row = 0; row < c->get_size()[0]; ++row) { - for (size_type col = 0; col < c->get_size()[1]; ++col) { - c->at(row, col) = zero(); + for (size_type row = 0; row < c.size[0]; ++row) { + for (size_type col = 0; col < c.size[1]; ++col) { + c(row, col) = zero(); } } #pragma omp parallel for - for (size_type row = 0; row < c->get_size()[0]; ++row) { - for (size_type inner = 0; inner < a->get_size()[1]; ++inner) { - for (size_type col = 0; col < c->get_size()[1]; ++col) { - c->at(row, col) += a->at(row, inner) * b->at(inner, col); + for (size_type row = 0; row < c.size[0]; ++row) { + for (size_type inner = 0; inner < a.size[1]; ++inner) { + for (size_type col = 0; col < c.size[1]; ++col) { + c(row, col) += a(row, inner) * b(inner, col); } } } @@ -105,32 +106,33 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_SIMPLE_APPLY_KERNEL); template void apply(std::shared_ptr exec, - const matrix::Dense* alpha, - const matrix::Dense* a, const matrix::Dense* b, - const matrix::Dense* beta, matrix::Dense* c) + matrix::view::dense alpha, + matrix::view::dense a, + matrix::view::dense b, + matrix::view::dense beta, + matrix::view::dense c) { - if (is_nonzero(beta->at(0, 0))) { + if (is_nonzero(beta(0, 0))) { #pragma omp parallel for - for (size_type row = 0; row < c->get_size()[0]; ++row) { - for (size_type col = 0; col < c->get_size()[1]; ++col) { - c->at(row, col) *= beta->at(0, 0); + for (size_type row = 0; row < c.size[0]; ++row) { + for (size_type col = 0; col < c.size[1]; ++col) { + c(row, col) *= beta(0, 0); } } } else { #pragma omp parallel for - for (size_type row = 0; row < c->get_size()[0]; ++row) { - for (size_type col = 0; col < c->get_size()[1]; ++col) { - c->at(row, col) = zero(); + for (size_type row = 0; row < c.size[0]; ++row) { + for (size_type col = 0; col < c.size[1]; ++col) { + c(row, col) = zero(); } } } #pragma omp parallel for - for (size_type row = 0; row < c->get_size()[0]; ++row) { - for (size_type inner = 0; inner < a->get_size()[1]; ++inner) { - for (size_type col = 0; col < c->get_size()[1]; ++col) { - c->at(row, col) += - alpha->at(0, 0) * a->at(row, inner) * b->at(inner, col); + for (size_type row = 0; row < c.size[0]; ++row) { + for (size_type inner = 0; inner < a.size[1]; ++inner) { + for (size_type col = 0; col < c.size[1]; ++col) { + c(row, col) += alpha(0, 0) * a(row, inner) * b(inner, col); } } } @@ -141,7 +143,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_APPLY_KERNEL); template void convert_to_coo(std::shared_ptr exec, - const matrix::Dense* source, + matrix::view::dense source, const int64* row_ptrs, matrix::Coo* result) { @@ -157,7 +159,7 @@ void convert_to_coo(std::shared_ptr exec, for (size_type row = 0; row < num_rows; ++row) { auto idxs = row_ptrs[row]; for (size_type col = 0; col < num_cols; ++col) { - auto val = source->at(row, col); + auto val = source(row, col); if (is_nonzero(val)) { row_idxs[idxs] = row; col_idxs[idxs] = col; @@ -174,7 +176,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_csr(std::shared_ptr exec, - const matrix::Dense* source, + matrix::view::dense source, matrix::Csr* result) { auto num_rows = result->get_size()[0]; @@ -189,7 +191,7 @@ void convert_to_csr(std::shared_ptr exec, for (size_type row = 0; row < num_rows; ++row) { auto cur_ptr = row_ptrs[row]; for (size_type col = 0; col < num_cols; ++col) { - auto val = source->at(row, col); + auto val = source(row, col); if (is_nonzero(val)) { col_idxs[cur_ptr] = col; values[cur_ptr] = val; @@ -205,7 +207,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_ell(std::shared_ptr exec, - const matrix::Dense* source, + matrix::view::dense source, matrix::Ell* result) { auto num_rows = result->get_size()[0]; @@ -222,7 +224,7 @@ void convert_to_ell(std::shared_ptr exec, for (size_type row = 0; row < num_rows; row++) { size_type col_idx = 0; for (size_type col = 0; col < num_cols; col++) { - auto val = source->at(row, col); + auto val = source(row, col); if (is_nonzero(val)) { result->val_at(row, col_idx) = val; result->col_at(row, col_idx) = col; @@ -238,11 +240,11 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_fbcsr(std::shared_ptr exec, - const matrix::Dense* source, + matrix::view::dense source, matrix::Fbcsr* result) { - const auto num_rows = source->get_size()[0]; - const auto num_cols = source->get_size()[1]; + const auto num_rows = source.size[0]; + const auto num_cols = source.size[1]; const auto bs = result->get_block_size(); const auto nzbs = result->get_num_stored_blocks(); const auto num_block_rows = num_rows / bs; @@ -262,7 +264,7 @@ void convert_to_fbcsr(std::shared_ptr exec, for (int lcol = 0; lcol < bs; ++lcol) { const auto row = lrow + bs * brow; const auto col = lcol + bs * bcol; - block_nz = block_nz || is_nonzero(source->at(row, col)); + block_nz = block_nz || is_nonzero(source(row, col)); } } if (block_nz) { @@ -271,7 +273,7 @@ void convert_to_fbcsr(std::shared_ptr exec, for (int lcol = 0; lcol < bs; ++lcol) { const auto row = lrow + bs * brow; const auto col = lcol + bs * bcol; - blocks(block, lrow, lcol) = source->at(row, col); + blocks(block, lrow, lcol) = source(row, col); } } block++; @@ -286,7 +288,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_hybrid(std::shared_ptr exec, - const matrix::Dense* source, + matrix::view::dense source, const int64* coo_row_ptrs, matrix::Hybrid* result) { @@ -302,7 +304,7 @@ void convert_to_hybrid(std::shared_ptr exec, size_type ell_count = 0; size_type col = 0; for (; col < num_cols && ell_count < ell_lim; col++) { - auto val = source->at(row, col); + auto val = source(row, col); if (is_nonzero(val)) { result->ell_val_at(row, ell_count) = val; result->ell_col_at(row, ell_count) = col; @@ -315,7 +317,7 @@ void convert_to_hybrid(std::shared_ptr exec, } auto coo_idx = coo_row_ptrs[row]; for (; col < num_cols; col++) { - auto val = source->at(row, col); + auto val = source(row, col); if (is_nonzero(val)) { coo_val[coo_idx] = val; coo_col[coo_idx] = col; @@ -332,7 +334,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_sellp(std::shared_ptr exec, - const matrix::Dense* source, + matrix::view::dense source, matrix::Sellp* result) { const auto num_rows = result->get_size()[0]; @@ -353,7 +355,7 @@ void convert_to_sellp(std::shared_ptr exec, const auto sellp_end = slice_sets[slice + 1] * slice_size + local_row; for (size_type col = 0; col < num_cols; col++) { - auto val = source->at(row, col); + auto val = source(row, col); if (is_nonzero(val)) { col_idxs[sellp_idx] = col; vals[sellp_idx] = val; @@ -374,7 +376,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_sparsity_csr(std::shared_ptr exec, - const matrix::Dense* source, + matrix::view::dense source, matrix::SparsityCsr* result) { auto num_rows = result->get_size()[0]; @@ -389,7 +391,7 @@ void convert_to_sparsity_csr(std::shared_ptr exec, for (size_type row = 0; row < num_rows; ++row) { auto cur_ptr = row_ptrs[row]; for (size_type col = 0; col < num_cols; ++col) { - auto val = source->at(row, col); + auto val = source(row, col); if (is_nonzero(val)) { col_idxs[cur_ptr] = col; ++cur_ptr; @@ -404,13 +406,13 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void transpose(std::shared_ptr exec, - const matrix::Dense* orig, - matrix::Dense* trans) + matrix::view::dense orig, + matrix::view::dense trans) { #pragma omp parallel for - for (size_type i = 0; i < orig->get_size()[0]; ++i) { - for (size_type j = 0; j < orig->get_size()[1]; ++j) { - trans->at(j, i) = orig->at(i, j); + for (size_type i = 0; i < orig.size[0]; ++i) { + for (size_type j = 0; j < orig.size[1]; ++j) { + trans(j, i) = orig(i, j); } } } @@ -420,13 +422,13 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_TRANSPOSE_KERNEL); template void conj_transpose(std::shared_ptr exec, - const matrix::Dense* orig, - matrix::Dense* trans) + matrix::view::dense orig, + matrix::view::dense trans) { #pragma omp parallel for - for (size_type i = 0; i < orig->get_size()[0]; ++i) { - for (size_type j = 0; j < orig->get_size()[1]; ++j) { - trans->at(j, i) = conj(orig->at(i, j)); + for (size_type i = 0; i < orig.size[0]; ++i) { + for (size_type j = 0; j < orig.size[1]; ++j) { + trans(j, i) = conj(orig(i, j)); } } } @@ -436,11 +438,11 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_CONJ_TRANSPOSE_KERNEL); template void count_nonzero_blocks_per_row(std::shared_ptr exec, - const matrix::Dense* source, + matrix::view::dense source, int bs, IndexType* result) { - const auto num_rows = source->get_size()[0]; - const auto num_cols = source->get_size()[1]; + const auto num_rows = source.size[0]; + const auto num_cols = source.size[1]; const auto num_block_rows = num_rows / bs; const auto num_block_cols = num_cols / bs; #pragma omp parallel for @@ -452,7 +454,7 @@ void count_nonzero_blocks_per_row(std::shared_ptr exec, for (int lcol = 0; lcol < bs; ++lcol) { const auto row = lrow + bs * brow; const auto col = lcol + bs * bcol; - block_nz = block_nz || is_nonzero(source->at(row, col)); + block_nz = block_nz || is_nonzero(source(row, col)); } } num_nonzero_blocks += block_nz ? 1 : 0; diff --git a/omp/matrix/ell_kernels.cpp b/omp/matrix/ell_kernels.cpp index 6ba6ddf196d..db1a121dd09 100644 --- a/omp/matrix/ell_kernels.cpp +++ b/omp/matrix/ell_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -32,10 +32,10 @@ template void spmv_small_rhs(std::shared_ptr exec, const matrix::Ell* a, - const matrix::Dense* b, - matrix::Dense* c, OutFn out) + matrix::view::dense b, + matrix::view::dense c, OutFn out) { - GKO_ASSERT(b->get_size()[1] == num_rhs); + GKO_ASSERT(b.size[1] == num_rhs); using arithmetic_type = highest_precision; using a_accessor = @@ -51,12 +51,10 @@ void spmv_small_rhs(std::shared_ptr exec, static_cast(num_stored_elements_per_row * stride)}, a->get_const_values()); const auto b_vals = gko::acc::range( - std::array{ - {static_cast(b->get_size()[0]), - static_cast(b->get_size()[1])}}, - b->get_const_values(), - std::array{ - {static_cast(b->get_stride())}}); + std::array{{static_cast(b.size[0]), + static_cast(b.size[1])}}, + b.values, + std::array{{static_cast(b.stride)}}); #pragma omp parallel for for (size_type row = 0; row < a->get_size()[0]; row++) { @@ -74,7 +72,7 @@ void spmv_small_rhs(std::shared_ptr exec, } #pragma unroll for (size_type j = 0; j < num_rhs; j++) { - [&] { c->at(row, j) = out(row, j, partial_sum[j]); }(); + [&] { c(row, j) = out(row, j, partial_sum[j]); }(); } } } @@ -84,10 +82,10 @@ template void spmv_blocked(std::shared_ptr exec, const matrix::Ell* a, - const matrix::Dense* b, - matrix::Dense* c, OutFn out) + matrix::view::dense b, + matrix::view::dense c, OutFn out) { - GKO_ASSERT(b->get_size()[1] > block_size); + GKO_ASSERT(b.size[1] > block_size); using arithmetic_type = highest_precision; using a_accessor = @@ -103,14 +101,12 @@ void spmv_blocked(std::shared_ptr exec, static_cast(num_stored_elements_per_row * stride)}, a->get_const_values()); const auto b_vals = gko::acc::range( - std::array{ - {static_cast(b->get_size()[0]), - static_cast(b->get_size()[1])}}, - b->get_const_values(), - std::array{ - {static_cast(b->get_stride())}}); + std::array{{static_cast(b.size[0]), + static_cast(b.size[1])}}, + b.values, + std::array{{static_cast(b.stride)}}); - const auto num_rhs = b->get_size()[1]; + const auto num_rhs = b.size[1]; const auto rounded_rhs = num_rhs / block_size * block_size; #pragma omp parallel for @@ -132,7 +128,7 @@ void spmv_blocked(std::shared_ptr exec, #pragma unroll for (size_type j = 0; j < block_size; j++) { const auto col = j + rhs_base; - [&] { c->at(row, col) = out(row, col, partial_sum[j]); }(); + [&] { c(row, col) = out(row, col, partial_sum[j]); }(); } } partial_sum.fill(zero()); @@ -146,9 +142,7 @@ void spmv_blocked(std::shared_ptr exec, } } for (size_type j = rounded_rhs; j < num_rhs; j++) { - [&] { - c->at(row, j) = out(row, j, partial_sum[j - rounded_rhs]); - }(); + [&] { c(row, j) = out(row, j, partial_sum[j - rounded_rhs]); }(); } } } @@ -158,10 +152,10 @@ template void spmv(std::shared_ptr exec, const matrix::Ell* a, - const matrix::Dense* b, - matrix::Dense* c) + matrix::view::dense b, + matrix::view::dense c) { - const auto num_rhs = b->get_size()[1]; + const auto num_rhs = b.size[1]; if (num_rhs <= 0) { return; } @@ -192,24 +186,24 @@ GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_AND_INDEX_TYPE( template void advanced_spmv(std::shared_ptr exec, - const matrix::Dense* alpha, + matrix::view::dense alpha, const matrix::Ell* a, - const matrix::Dense* b, - const matrix::Dense* beta, - matrix::Dense* c) + matrix::view::dense b, + matrix::view::dense beta, + matrix::view::dense c) { using arithmetic_type = highest_precision; - const auto num_rhs = b->get_size()[1]; + const auto num_rhs = b.size[1]; if (num_rhs <= 0) { return; } - const auto alpha_val = arithmetic_type{alpha->at(0, 0)}; - const auto beta_val = arithmetic_type{beta->at(0, 0)}; + const auto alpha_val = arithmetic_type{alpha(0, 0)}; + const auto beta_val = arithmetic_type{beta(0, 0)}; auto out = [&](auto i, auto j, auto value) { - return is_zero(beta_val) ? alpha_val * value - : alpha_val * value + - beta_val * arithmetic_type{c->at(i, j)}; + return is_zero(beta_val) + ? alpha_val * value + : alpha_val * value + beta_val * arithmetic_type{c(i, j)}; }; if (num_rhs == 1) { spmv_small_rhs<1>(exec, a, b, c, out); diff --git a/omp/matrix/fbcsr_kernels.cpp b/omp/matrix/fbcsr_kernels.cpp index 874705169a9..6bfe16a69ba 100644 --- a/omp/matrix/fbcsr_kernels.cpp +++ b/omp/matrix/fbcsr_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -40,10 +40,11 @@ namespace fbcsr { template void spmv(std::shared_ptr exec, const matrix::Fbcsr* a, - const matrix::Dense* b, matrix::Dense* c) + matrix::view::dense b, + matrix::view::dense c) { const int bs = a->get_block_size(); - const auto nvecs = static_cast(b->get_size()[1]); + const auto nvecs = static_cast(b.size[1]); const IndexType nbrows = a->get_num_block_rows(); const size_type nbnz = a->get_num_stored_blocks(); auto row_ptrs = a->get_const_row_ptrs(); @@ -55,7 +56,7 @@ void spmv(std::shared_ptr exec, for (IndexType ibrow = 0; ibrow < nbrows; ++ibrow) { for (IndexType row = ibrow * bs; row < (ibrow + 1) * bs; ++row) { for (IndexType rhs = 0; rhs < nvecs; rhs++) { - c->at(row, rhs) = zero(); + c(row, rhs) = zero(); } } for (IndexType inz = row_ptrs[ibrow]; inz < row_ptrs[ibrow + 1]; @@ -66,7 +67,7 @@ void spmv(std::shared_ptr exec, const auto val = avalues(inz, ib, jb); const auto col = col_idxs[inz] * bs + jb; for (size_type j = 0; j < nvecs; ++j) { - c->at(row, j) += val * b->at(col, j); + c(row, j) += val * b(col, j); } } } @@ -79,20 +80,20 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_FBCSR_SPMV_KERNEL); template void advanced_spmv(std::shared_ptr exec, - const matrix::Dense* alpha, + matrix::view::dense alpha, const matrix::Fbcsr* a, - const matrix::Dense* b, - const matrix::Dense* beta, - matrix::Dense* c) + matrix::view::dense b, + matrix::view::dense beta, + matrix::view::dense c) { const int bs = a->get_block_size(); - const auto nvecs = static_cast(b->get_size()[1]); + const auto nvecs = static_cast(b.size[1]); const IndexType nbrows = a->get_num_block_rows(); const size_type nbnz = a->get_num_stored_blocks(); auto row_ptrs = a->get_const_row_ptrs(); auto col_idxs = a->get_const_col_idxs(); - auto valpha = alpha->at(0, 0); - auto vbeta = beta->at(0, 0); + auto valpha = alpha(0, 0); + auto vbeta = beta(0, 0); const acc::range> avalues{ to_std_array(nbnz, bs, bs), a->get_const_values()}; @@ -101,9 +102,9 @@ void advanced_spmv(std::shared_ptr exec, for (IndexType row = ibrow * bs; row < (ibrow + 1) * bs; ++row) { for (IndexType rhs = 0; rhs < nvecs; rhs++) { if (is_zero(vbeta)) { - c->at(row, rhs) = zero(vbeta); + c(row, rhs) = zero(vbeta); } else { - c->at(row, rhs) *= vbeta; + c(row, rhs) *= vbeta; } } } @@ -115,7 +116,7 @@ void advanced_spmv(std::shared_ptr exec, const auto val = avalues(inz, ib, jb); const auto col = col_idxs[inz] * bs + jb; for (size_type j = 0; j < nvecs; ++j) - c->at(row, j) += valpha * val * b->at(col, j); + c(row, j) += valpha * val * b(col, j); } } } @@ -187,7 +188,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void fill_in_dense(std::shared_ptr exec, const matrix::Fbcsr* source, - matrix::Dense* result) + matrix::view::dense result) { const auto bs = source->get_block_size(); const auto nbrows = source->get_num_block_rows(); @@ -206,7 +207,7 @@ void fill_in_dense(std::shared_ptr exec, const auto row = block_row * bs + local_row; for (int local_col = 0; local_col < bs; local_col++) { const auto col = block_col * bs + local_col; - result->at(row, col) = values(block, local_row, local_col); + result(row, col) = values(block, local_row, local_col); } } } @@ -354,7 +355,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void is_sorted_by_column_index( std::shared_ptr exec, - const matrix::Fbcsr* to_check, bool* is_sorted) + const matrix::Fbcsr* to_check, bool& is_sorted) { const auto row_ptrs = to_check->get_const_row_ptrs(); const auto col_idxs = to_check->get_const_col_idxs(); @@ -372,7 +373,7 @@ void is_sorted_by_column_index( } } } - *is_sorted = local_is_sorted; + is_sorted = local_is_sorted; } GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( diff --git a/omp/matrix/fft_kernels.cpp b/omp/matrix/fft_kernels.cpp index c4858b32334..8af4e3ab73b 100644 --- a/omp/matrix/fft_kernels.cpp +++ b/omp/matrix/fft_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -23,36 +23,37 @@ namespace fft { template -void bfly(const matrix::Dense* b, matrix::Dense* x, - int64 lo, int64 hi, OutValueType root) +void bfly(matrix::view::dense b, + matrix::view::dense x, int64 lo, int64 hi, + OutValueType root) { - for (size_type rhs = 0; rhs < x->get_size()[1]; rhs++) { - auto lo_val = b->at(lo, rhs); - auto hi_val = b->at(hi, rhs); - x->at(lo, rhs) = lo_val + hi_val; - x->at(hi, rhs) = (lo_val - hi_val) * root; + for (size_type rhs = 0; rhs < x.size[1]; rhs++) { + auto lo_val = b(lo, rhs); + auto hi_val = b(hi, rhs); + x(lo, rhs) = lo_val + hi_val; + x(hi, rhs) = (lo_val - hi_val) * root; } } template -void bfly(matrix::Dense* x, int64 lo, int64 hi, ValueType root) +void bfly(matrix::view::dense x, int64 lo, int64 hi, ValueType root) { - for (size_type rhs = 0; rhs < x->get_size()[1]; rhs++) { - auto lo_val = x->at(lo, rhs); - auto hi_val = x->at(hi, rhs); - x->at(lo, rhs) = lo_val + hi_val; - x->at(hi, rhs) = (lo_val - hi_val) * root; + for (size_type rhs = 0; rhs < x.size[1]; rhs++) { + auto lo_val = x(lo, rhs); + auto hi_val = x(hi, rhs); + x(lo, rhs) = lo_val + hi_val; + x(hi, rhs) = (lo_val - hi_val) * root; } } template -void bit_rev_swap(matrix::Dense* x, int64 i, int64 rev_i) +void bit_rev_swap(matrix::view::dense x, int64 i, int64 rev_i) { - for (size_type rhs = 0; rhs < x->get_size()[1]; rhs++) { + for (size_type rhs = 0; rhs < x.size[1]; rhs++) { if (i < rev_i) { - std::swap(x->at(i, rhs), x->at(rev_i, rhs)); + std::swap(x(i, rhs), x(rev_i, rhs)); } } } @@ -82,15 +83,15 @@ vector build_unit_roots(std::shared_ptr exec, template void fft(std::shared_ptr exec, - const matrix::Dense>* b, - matrix::Dense>* x, bool inverse, + matrix::view::dense> b, + matrix::view::dense> x, bool inverse, array& buffer) { using complex_type = std::complex; using real_type = ValueType; const int64 sign = inverse ? 1 : -1; - const auto nrhs = b->get_size()[1]; - const auto size = static_cast(b->get_size()[0]); + const auto nrhs = b.size[1]; + const auto size = static_cast(b.size[0]); GKO_ASSERT_IS_POWER_OF_TWO(size); auto roots = build_unit_roots(exec, size, sign); // first butterfly step @@ -124,14 +125,14 @@ GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_TYPE_BASE(GKO_DECLARE_FFT_KERNEL); template void fft2(std::shared_ptr exec, - const matrix::Dense>* b, - matrix::Dense>* x, size_type size1, + matrix::view::dense> b, + matrix::view::dense> x, size_type size1, size_type size2, bool inverse, array& buffer) { using complex_type = std::complex; using real_type = ValueType; const int64 sign = inverse ? 1 : -1; - const auto nrhs = b->get_size()[1]; + const auto nrhs = b.size[1]; const auto ssize1 = static_cast(size1); const auto ssize2 = static_cast(size2); GKO_ASSERT_IS_POWER_OF_TWO(ssize1); @@ -195,14 +196,14 @@ GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_TYPE_BASE(GKO_DECLARE_FFT2_KERNEL); template void fft3(std::shared_ptr exec, - const matrix::Dense>* b, - matrix::Dense>* x, size_type size1, + matrix::view::dense> b, + matrix::view::dense> x, size_type size1, size_type size2, size_type size3, bool inverse, array& buffer) { using complex_type = std::complex; using real_type = ValueType; const int64 sign = inverse ? 1 : -1; - const auto nrhs = b->get_size()[1]; + const auto nrhs = b.size[1]; const auto ssize1 = static_cast(size1); const auto ssize2 = static_cast(size2); const auto ssize3 = static_cast(size3); diff --git a/omp/matrix/sellp_kernels.cpp b/omp/matrix/sellp_kernels.cpp index bd19897e992..47c2d73066e 100644 --- a/omp/matrix/sellp_kernels.cpp +++ b/omp/matrix/sellp_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -25,10 +25,10 @@ namespace sellp { template void spmv_small_rhs(std::shared_ptr exec, const matrix::Sellp* a, - const matrix::Dense* b, - matrix::Dense* c, OutFn out) + matrix::view::dense b, + matrix::view::dense c, OutFn out) { - GKO_ASSERT(b->get_size()[1] == num_rhs); + GKO_ASSERT(b.size[1] == num_rhs); auto slice_lengths = a->get_const_slice_lengths(); auto slice_sets = a->get_const_slice_sets(); auto slice_size = a->get_slice_size(); @@ -46,15 +46,14 @@ void spmv_small_rhs(std::shared_ptr exec, if (col != invalid_index()) { #pragma unroll for (size_type j = 0; j < num_rhs; j++) { - partial_sum[j] += val * b->at(col, j); + partial_sum[j] += val * b(col, j); } } } #pragma unroll for (size_type j = 0; j < num_rhs; j++) { [&] { - c->at(global_row, j) = - out(global_row, j, partial_sum[j]); + c(global_row, j) = out(global_row, j, partial_sum[j]); }(); } } @@ -67,14 +66,14 @@ template void spmv_blocked(std::shared_ptr exec, const matrix::Sellp* a, - const matrix::Dense* b, - matrix::Dense* c, OutFn out) + matrix::view::dense b, + matrix::view::dense c, OutFn out) { auto slice_lengths = a->get_const_slice_lengths(); auto slice_sets = a->get_const_slice_sets(); auto slice_size = a->get_slice_size(); auto slice_num = ceildiv(a->get_size()[0] + slice_size - 1, slice_size); - const auto num_rhs = b->get_size()[1]; + const auto num_rhs = b.size[1]; const auto rounded_rhs = num_rhs / block_size * block_size; #pragma omp parallel for collapse(2) for (size_type slice = 0; slice < slice_num; slice++) { @@ -91,15 +90,14 @@ void spmv_blocked(std::shared_ptr exec, if (col != invalid_index()) { #pragma unroll for (size_type j = 0; j < block_size; j++) { - partial_sum[j] += - val * b->at(col, j + rhs_base); + partial_sum[j] += val * b(col, j + rhs_base); } } } #pragma unroll for (size_type j = 0; j < block_size; j++) { [&] { - c->at(global_row, j + rhs_base) = + c(global_row, j + rhs_base) = out(global_row, j + rhs_base, partial_sum[j]); }(); } @@ -110,13 +108,13 @@ void spmv_blocked(std::shared_ptr exec, auto col = a->col_at(row, slice_sets[slice], i); if (col != invalid_index()) { for (size_type j = rounded_rhs; j < num_rhs; j++) { - partial_sum[j - rounded_rhs] += val * b->at(col, j); + partial_sum[j - rounded_rhs] += val * b(col, j); } } } for (size_type j = rounded_rhs; j < num_rhs; j++) { [&] { - c->at(global_row, j) = + c(global_row, j) = out(global_row, j, partial_sum[j - rounded_rhs]); }(); } @@ -129,9 +127,10 @@ void spmv_blocked(std::shared_ptr exec, template void spmv(std::shared_ptr exec, const matrix::Sellp* a, - const matrix::Dense* b, matrix::Dense* c) + matrix::view::dense b, + matrix::view::dense c) { - const auto num_rhs = b->get_size()[1]; + const auto num_rhs = b.size[1]; if (num_rhs <= 0) { return; } @@ -160,21 +159,21 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_SELLP_SPMV_KERNEL); template void advanced_spmv(std::shared_ptr exec, - const matrix::Dense* alpha, + matrix::view::dense alpha, const matrix::Sellp* a, - const matrix::Dense* b, - const matrix::Dense* beta, - matrix::Dense* c) + matrix::view::dense b, + matrix::view::dense beta, + matrix::view::dense c) { - const auto num_rhs = b->get_size()[1]; + const auto num_rhs = b.size[1]; if (num_rhs <= 0) { return; } - const auto alpha_val = alpha->at(0, 0); - const auto beta_val = beta->at(0, 0); + const auto alpha_val = alpha(0, 0); + const auto beta_val = beta(0, 0); auto out = [&](auto i, auto j, auto value) { return is_zero(beta_val) ? alpha_val * value - : alpha_val * value + beta_val * c->at(i, j); + : alpha_val * value + beta_val * c(i, j); }; if (num_rhs == 1) { spmv_small_rhs<1>(exec, a, b, c, out); diff --git a/omp/matrix/sparsity_csr_kernels.cpp b/omp/matrix/sparsity_csr_kernels.cpp index 9a2b31e26cc..2e0c568961f 100644 --- a/omp/matrix/sparsity_csr_kernels.cpp +++ b/omp/matrix/sparsity_csr_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -35,8 +35,8 @@ template void spmv(std::shared_ptr exec, const matrix::SparsityCsr* a, - const matrix::Dense* b, - matrix::Dense* c) + matrix::view::dense b, + matrix::view::dense c) { using arithmetic_type = highest_precision; @@ -46,14 +46,14 @@ void spmv(std::shared_ptr exec, #pragma omp parallel for for (size_type row = 0; row < a->get_size()[0]; ++row) { - for (size_type j = 0; j < c->get_size()[1]; ++j) { + for (size_type j = 0; j < c.size[1]; ++j) { auto temp_val = gko::zero(); for (size_type k = row_ptrs[row]; k < static_cast(row_ptrs[row + 1]); ++k) { temp_val += - val * static_cast(b->at(col_idxs[k], j)); + val * static_cast(b(col_idxs[k], j)); } - c->at(row, j) = static_cast(temp_val); + c(row, j) = static_cast(temp_val); } } } @@ -65,33 +65,33 @@ GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_AND_INDEX_TYPE( template void advanced_spmv(std::shared_ptr exec, - const matrix::Dense* alpha, + matrix::view::dense alpha, const matrix::SparsityCsr* a, - const matrix::Dense* b, - const matrix::Dense* beta, - matrix::Dense* c) + matrix::view::dense b, + matrix::view::dense beta, + matrix::view::dense c) { using arithmetic_type = highest_precision; auto row_ptrs = a->get_const_row_ptrs(); auto col_idxs = a->get_const_col_idxs(); - const auto valpha = static_cast(alpha->at(0, 0)); - const auto vbeta = static_cast(beta->at(0, 0)); + const auto valpha = static_cast(alpha(0, 0)); + const auto vbeta = static_cast(beta(0, 0)); const auto val = static_cast(a->get_const_value()[0]); #pragma omp parallel for for (size_type row = 0; row < a->get_size()[0]; ++row) { - for (size_type j = 0; j < c->get_size()[1]; ++j) { + for (size_type j = 0; j < c.size[1]; ++j) { auto temp_val = gko::zero(); for (size_type k = row_ptrs[row]; k < static_cast(row_ptrs[row + 1]); ++k) { temp_val += - val * static_cast(b->at(col_idxs[k], j)); + val * static_cast(b(col_idxs[k], j)); } - c->at(row, j) = static_cast( + c(row, j) = static_cast( (is_zero(vbeta) ? zero(vbeta) - : vbeta * static_cast(c->at(row, j))) + + : vbeta * static_cast(c(row, j))) + valpha * temp_val); } } @@ -177,7 +177,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void is_sorted_by_column_index( std::shared_ptr exec, - const matrix::SparsityCsr* to_check, bool* is_sorted) + const matrix::SparsityCsr* to_check, bool& is_sorted) { const auto row_ptrs = to_check->get_const_row_ptrs(); const auto col_idxs = to_check->get_const_col_idxs(); @@ -199,7 +199,7 @@ void is_sorted_by_column_index( } } } - *is_sorted = local_is_sorted; + is_sorted = local_is_sorted; } GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( diff --git a/omp/preconditioner/isai_kernels.cpp b/omp/preconditioner/isai_kernels.cpp index 6f2fe4838d9..4fb38663d76 100644 --- a/omp/preconditioner/isai_kernels.cpp +++ b/omp/preconditioner/isai_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -335,7 +335,7 @@ void generate_excess_system(std::shared_ptr, const IndexType* excess_rhs_ptrs, const IndexType* excess_nz_ptrs, matrix::Csr* excess_system, - matrix::Dense* excess_rhs, + matrix::view::dense excess_rhs, size_type e_start, size_type e_end) { const auto num_rows = input->get_size()[0]; @@ -344,11 +344,11 @@ void generate_excess_system(std::shared_ptr, const auto m_vals = input->get_const_values(); const auto i_row_ptrs = inverse->get_const_row_ptrs(); const auto i_cols = inverse->get_const_col_idxs(); - const auto e_dim = excess_rhs->get_size()[0]; + const auto e_dim = excess_rhs.size[0]; auto e_row_ptrs = excess_system->get_row_ptrs(); auto e_cols = excess_system->get_col_idxs(); auto e_vals = excess_system->get_values(); - auto e_rhs = excess_rhs->get_values(); + auto e_rhs = excess_rhs.values; #pragma omp parallel for for (size_type row = e_start; row < e_end; ++row) { @@ -395,10 +395,10 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void scale_excess_solution(std::shared_ptr, const IndexType* excess_block_ptrs, - matrix::Dense* excess_solution, + matrix::view::dense excess_solution, size_type e_start, size_type e_end) { - auto excess_values = excess_solution->get_values(); + auto excess_values = excess_solution.values; auto offset = excess_block_ptrs[e_start]; #pragma omp parallel for for (size_type row = e_start; row < e_end; ++row) { @@ -420,13 +420,13 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template -void scatter_excess_solution(std::shared_ptr, - const IndexType* excess_block_ptrs, - const matrix::Dense* excess_solution, - matrix::Csr* inverse, - size_type e_start, size_type e_end) +void scatter_excess_solution( + std::shared_ptr, const IndexType* excess_block_ptrs, + matrix::view::dense excess_solution, + matrix::Csr* inverse, size_type e_start, + size_type e_end) { - auto excess_values = excess_solution->get_const_values(); + auto excess_values = excess_solution.values; auto values = inverse->get_values(); auto row_ptrs = inverse->get_const_row_ptrs(); auto offset = excess_block_ptrs[e_start]; diff --git a/omp/preconditioner/jacobi_kernels.cpp b/omp/preconditioner/jacobi_kernels.cpp index 76224f97a2f..9cab02c5c03 100644 --- a/omp/preconditioner/jacobi_kernels.cpp +++ b/omp/preconditioner/jacobi_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -489,9 +489,10 @@ void apply(std::shared_ptr exec, size_type num_blocks, const array& block_precisions, const array& block_pointers, const array& blocks, - const matrix::Dense* alpha, - const matrix::Dense* b, - const matrix::Dense* beta, matrix::Dense* x) + matrix::view::dense alpha, + matrix::view::dense b, + matrix::view::dense beta, + matrix::view::dense x) { const auto ptrs = block_pointers.get_const_data(); const auto prec = block_precisions.get_const_data(); @@ -499,18 +500,17 @@ void apply(std::shared_ptr exec, size_type num_blocks, for (size_type i = 0; i < num_blocks; ++i) { const auto group = blocks.get_const_data() + storage_scheme.get_group_offset(i); - const auto block_b = b->get_const_values() + b->get_stride() * ptrs[i]; - const auto block_x = x->get_values() + x->get_stride() * ptrs[i]; + const auto block_b = b.values + b.stride * ptrs[i]; + const auto block_x = x.values + x.stride * ptrs[i]; const auto block_size = ptrs[i + 1] - ptrs[i]; const auto p = prec ? prec[i] : precision_reduction(); GKO_PRECONDITIONER_JACOBI_RESOLVE_PRECISION( ValueType, p, - apply_block(block_size, b->get_size()[1], + apply_block(block_size, b.size[1], reinterpret_cast(group) + storage_scheme.get_block_offset(i), - storage_scheme.get_stride(), alpha->at(0, 0), block_b, - b->get_stride(), beta->at(0, 0), block_x, - x->get_stride())); + storage_scheme.get_stride(), alpha(0, 0), block_b, + b.stride, beta(0, 0), block_x, x.stride)); } } @@ -525,7 +525,7 @@ void simple_apply( storage_scheme, const array& block_precisions, const array& block_pointers, const array& blocks, - const matrix::Dense* b, matrix::Dense* x) + matrix::view::dense b, matrix::view::dense x) { const auto ptrs = block_pointers.get_const_data(); const auto prec = block_precisions.get_const_data(); @@ -533,18 +533,17 @@ void simple_apply( for (size_type i = 0; i < num_blocks; ++i) { const auto group = blocks.get_const_data() + storage_scheme.get_group_offset(i); - const auto block_b = b->get_const_values() + b->get_stride() * ptrs[i]; - const auto block_x = x->get_values() + x->get_stride() * ptrs[i]; + const auto block_b = b.values + b.stride * ptrs[i]; + const auto block_x = x.values + x.stride * ptrs[i]; const auto block_size = ptrs[i + 1] - ptrs[i]; const auto p = prec ? prec[i] : precision_reduction(); GKO_PRECONDITIONER_JACOBI_RESOLVE_PRECISION( ValueType, p, - apply_block(block_size, b->get_size()[1], + apply_block(block_size, b.size[1], reinterpret_cast(group) + storage_scheme.get_block_offset(i), storage_scheme.get_stride(), one(), block_b, - b->get_stride(), zero(), block_x, - x->get_stride())); + b.stride, zero(), block_x, x.stride)); } } diff --git a/omp/solver/cb_gmres_kernels.cpp b/omp/solver/cb_gmres_kernels.cpp index aeffc3202d6..1de576807d3 100644 --- a/omp/solver/cb_gmres_kernels.cpp +++ b/omp/solver/cb_gmres_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -30,20 +30,20 @@ namespace { template -void finish_arnoldi_CGS(std::shared_ptr exec, - matrix::Dense* next_krylov_basis, - Accessor3d krylov_bases, - matrix::Dense* hessenberg_iter, - matrix::Dense* buffer_iter, - matrix::Dense>* arnoldi_norm, - size_type iter, const stopping_status* stop_status) +void finish_arnoldi_CGS( + std::shared_ptr exec, + matrix::view::dense next_krylov_basis, Accessor3d krylov_bases, + matrix::view::dense hessenberg_iter, + matrix::view::dense buffer_iter, + matrix::view::dense> arnoldi_norm, size_type iter, + const stopping_status* stop_status) { using rc_vtype = remove_complex; constexpr bool has_scalar = gko::cb_gmres::detail::has_3d_scaled_accessor::value; const rc_vtype eta = 1.0 / sqrt(2.0); - for (size_type i = 0; i < next_krylov_basis->get_size()[1]; ++i) { + for (size_type i = 0; i < next_krylov_basis.size[1]; ++i) { if (stop_status[i].has_stopped()) { continue; } @@ -54,28 +54,27 @@ void finish_arnoldi_CGS(std::shared_ptr exec, [](auto row, auto col, auto next_krylov_basis) { return squared_norm(next_krylov_basis(row, col)); }, - GKO_KERNEL_REDUCE_SUM(rc_vtype), &nrm, - next_krylov_basis->get_size()[0], static_cast(i), - next_krylov_basis); - arnoldi_norm->at(0, i) = eta * sqrt(nrm); + GKO_KERNEL_REDUCE_SUM(rc_vtype), &nrm, next_krylov_basis.size[0], + static_cast(i), next_krylov_basis); + arnoldi_norm(0, i) = eta * sqrt(nrm); // nrmP = norm(next_krylov_basis) #pragma omp parallel for for (size_type k = 0; k < iter + 1; ++k) { ValueType hessenberg_iter_entry = zero(); - for (size_type j = 0; j < next_krylov_basis->get_size()[0]; ++j) { + for (size_type j = 0; j < next_krylov_basis.size[0]; ++j) { hessenberg_iter_entry += - next_krylov_basis->at(j, i) * conj(krylov_bases(k, j, i)); + next_krylov_basis(j, i) * conj(krylov_bases(k, j, i)); } - hessenberg_iter->at(k, i) = hessenberg_iter_entry; + hessenberg_iter(k, i) = hessenberg_iter_entry; } // for i in 1:iter // hessenberg(iter, i) = next_krylov_basis' * krylov_bases(:, i) // end for (size_type k = 0; k < iter + 1; ++k) { #pragma omp parallel for - for (size_type j = 0; j < next_krylov_basis->get_size()[0]; ++j) { - next_krylov_basis->at(j, i) -= - hessenberg_iter->at(k, i) * krylov_bases(k, j, i); + for (size_type j = 0; j < next_krylov_basis.size[0]; ++j) { + next_krylov_basis(j, i) -= + hessenberg_iter(k, i) * krylov_bases(k, j, i); } } // for i in 1:iter @@ -95,39 +94,36 @@ void finish_arnoldi_CGS(std::shared_ptr exec, std::max(a.second, b.second)); }, [](auto a) { return a; }, std::make_pair(rc_vtype{}, rc_vtype{}), - &result_pair, next_krylov_basis->get_size()[0], - static_cast(i), next_krylov_basis); + &result_pair, next_krylov_basis.size[0], static_cast(i), + next_krylov_basis); nrm = result_pair.first; inf = result_pair.second; - arnoldi_norm->at(1, i) = sqrt(nrm); + arnoldi_norm(1, i) = sqrt(nrm); if (has_scalar) { - arnoldi_norm->at(2, i) = inf; + arnoldi_norm(2, i) = inf; } for (size_type l = 1; - (arnoldi_norm->at(1, i)) < (arnoldi_norm->at(0, i)) && l < 3; - l++) { - arnoldi_norm->at(0, i) = eta * arnoldi_norm->at(1, i); + (arnoldi_norm(1, i)) < (arnoldi_norm(0, i)) && l < 3; l++) { + arnoldi_norm(0, i) = eta * arnoldi_norm(1, i); // nrmP = nrmN #pragma omp parallel for for (size_type k = 0; k < iter + 1; ++k) { ValueType hessenberg_iter_entry = zero(); - for (size_type j = 0; j < next_krylov_basis->get_size()[0]; - ++j) { - hessenberg_iter_entry += next_krylov_basis->at(j, i) * - conj(krylov_bases(k, j, i)); + for (size_type j = 0; j < next_krylov_basis.size[0]; ++j) { + hessenberg_iter_entry += + next_krylov_basis(j, i) * conj(krylov_bases(k, j, i)); } - buffer_iter->at(k, i) = hessenberg_iter_entry; + buffer_iter(k, i) = hessenberg_iter_entry; } // for i in 1:iter // buffer(iter, i) = next_krylov_basis' * krylov_bases(:, i) // end for (size_type k = 0; k < iter + 1; ++k) { #pragma omp parallel for - for (size_type j = 0; j < next_krylov_basis->get_size()[0]; - ++j) { - next_krylov_basis->at(j, i) -= - buffer_iter->at(k, i) * conj(krylov_bases(k, j, i)); + for (size_type j = 0; j < next_krylov_basis.size[0]; ++j) { + next_krylov_basis(j, i) -= + buffer_iter(k, i) * conj(krylov_bases(k, j, i)); } } // for i in 1:iter @@ -147,27 +143,26 @@ void finish_arnoldi_CGS(std::shared_ptr exec, }, [](auto a) { return a; }, std::make_pair(rc_vtype{}, rc_vtype{}), &result_pair, - next_krylov_basis->get_size()[0], static_cast(i), + next_krylov_basis.size[0], static_cast(i), next_krylov_basis); nrm = result_pair.first; inf = result_pair.second; - arnoldi_norm->at(1, i) = sqrt(nrm); + arnoldi_norm(1, i) = sqrt(nrm); if (has_scalar) { - arnoldi_norm->at(2, i) = inf; + arnoldi_norm(2, i) = inf; } // nrmN = norm(next_krylov_basis) // nrmI = infnorm(next_krylov_basis) } gko::cb_gmres::helper_functions_accessor::write_scalar( - krylov_bases, iter + 1, i, - arnoldi_norm->at(2, i) / arnoldi_norm->at(1, i)); + krylov_bases, iter + 1, i, arnoldi_norm(2, i) / arnoldi_norm(1, i)); // reorthogonalization - hessenberg_iter->at(iter + 1, i) = (arnoldi_norm->at(1, i)); + hessenberg_iter(iter + 1, i) = (arnoldi_norm(1, i)); // hessenberg(iter, iter + 1) = norm(next_krylov_basis) #pragma omp parallel for - for (size_type j = 0; j < next_krylov_basis->get_size()[0]; ++j) { - next_krylov_basis->at(j, i) /= hessenberg_iter->at(iter + 1, i); - krylov_bases(iter + 1, j, i) = next_krylov_basis->at(j, i); + for (size_type j = 0; j < next_krylov_basis.size[0]; ++j) { + next_krylov_basis(j, i) /= hessenberg_iter(iter + 1, i); + krylov_bases(iter + 1, j, i) = next_krylov_basis(j, i); } // next_krylov_basis /= hessenberg(iter, iter + 1) // krylov_bases(:, iter + 1) = next_krylov_basis @@ -177,45 +172,45 @@ void finish_arnoldi_CGS(std::shared_ptr exec, template -void calculate_sin_and_cos(matrix::Dense* givens_sin, - matrix::Dense* givens_cos, - matrix::Dense* hessenberg_iter, +void calculate_sin_and_cos(matrix::view::dense givens_sin, + matrix::view::dense givens_cos, + matrix::view::dense hessenberg_iter, size_type iter, const size_type rhs) { - if (is_zero(hessenberg_iter->at(iter, rhs))) { - givens_cos->at(iter, rhs) = zero(); - givens_sin->at(iter, rhs) = one(); + if (is_zero(hessenberg_iter(iter, rhs))) { + givens_cos(iter, rhs) = zero(); + givens_sin(iter, rhs) = one(); } else { - auto this_hess = hessenberg_iter->at(iter, rhs); - auto next_hess = hessenberg_iter->at(iter + 1, rhs); + auto this_hess = hessenberg_iter(iter, rhs); + auto next_hess = hessenberg_iter(iter + 1, rhs); const auto scale = abs(this_hess) + abs(next_hess); const auto hypotenuse = scale * sqrt(abs(this_hess / scale) * abs(this_hess / scale) + abs(next_hess / scale) * abs(next_hess / scale)); - givens_cos->at(iter, rhs) = conj(this_hess) / hypotenuse; - givens_sin->at(iter, rhs) = conj(next_hess) / hypotenuse; + givens_cos(iter, rhs) = conj(this_hess) / hypotenuse; + givens_sin(iter, rhs) = conj(next_hess) / hypotenuse; } } template -void givens_rotation(matrix::Dense* givens_sin, - matrix::Dense* givens_cos, - matrix::Dense* hessenberg_iter, size_type iter, - const stopping_status* stop_status) +void givens_rotation(matrix::view::dense givens_sin, + matrix::view::dense givens_cos, + matrix::view::dense hessenberg_iter, + size_type iter, const stopping_status* stop_status) { #pragma omp parallel for - for (size_type i = 0; i < hessenberg_iter->get_size()[1]; ++i) { + for (size_type i = 0; i < hessenberg_iter.size[1]; ++i) { if (stop_status[i].has_stopped()) { continue; } for (size_type j = 0; j < iter; ++j) { - auto temp = givens_cos->at(j, i) * hessenberg_iter->at(j, i) + - givens_sin->at(j, i) * hessenberg_iter->at(j + 1, i); - hessenberg_iter->at(j + 1, i) = - -conj(givens_sin->at(j, i)) * hessenberg_iter->at(j, i) + - conj(givens_cos->at(j, i)) * hessenberg_iter->at(j + 1, i); - hessenberg_iter->at(j, i) = temp; + auto temp = givens_cos(j, i) * hessenberg_iter(j, i) + + givens_sin(j, i) * hessenberg_iter(j + 1, i); + hessenberg_iter(j + 1, i) = + -conj(givens_sin(j, i)) * hessenberg_iter(j, i) + + conj(givens_cos(j, i)) * hessenberg_iter(j + 1, i); + hessenberg_iter(j, i) = temp; // temp = cos(j)*hessenberg(j) + // sin(j)*hessenberg(j+1) // hessenberg(j+1) = -conj(sin(j))*hessenberg(j) + @@ -225,10 +220,10 @@ void givens_rotation(matrix::Dense* givens_sin, calculate_sin_and_cos(givens_sin, givens_cos, hessenberg_iter, iter, i); - hessenberg_iter->at(iter, i) = - givens_cos->at(iter, i) * hessenberg_iter->at(iter, i) + - givens_sin->at(iter, i) * hessenberg_iter->at(iter + 1, i); - hessenberg_iter->at(iter + 1, i) = zero(); + hessenberg_iter(iter, i) = + givens_cos(iter, i) * hessenberg_iter(iter, i) + + givens_sin(iter, i) * hessenberg_iter(iter + 1, i); + hessenberg_iter(iter + 1, i) = zero(); // hessenberg(iter) = cos(iter)*hessenberg(iter) + // sin(iter)*hessenberg(iter + 1) // hessenberg(iter+1) = 0 @@ -238,46 +233,43 @@ void givens_rotation(matrix::Dense* givens_sin, template void calculate_next_residual_norm( - matrix::Dense* givens_sin, matrix::Dense* givens_cos, - matrix::Dense>* residual_norm, - matrix::Dense* residual_norm_collection, size_type iter, + matrix::view::dense givens_sin, + matrix::view::dense givens_cos, + matrix::view::dense> residual_norm, + matrix::view::dense residual_norm_collection, size_type iter, const stopping_status* stop_status) { #pragma omp parallel for - for (size_type i = 0; i < residual_norm->get_size()[1]; ++i) { + for (size_type i = 0; i < residual_norm.size[1]; ++i) { if (stop_status[i].has_stopped()) { continue; } - residual_norm_collection->at(iter + 1, i) = - -conj(givens_sin->at(iter, i)) * - residual_norm_collection->at(iter, i); - residual_norm_collection->at(iter, i) = - givens_cos->at(iter, i) * residual_norm_collection->at(iter, i); - residual_norm->at(0, i) = - abs(residual_norm_collection->at(iter + 1, i)); + residual_norm_collection(iter + 1, i) = + -conj(givens_sin(iter, i)) * residual_norm_collection(iter, i); + residual_norm_collection(iter, i) = + givens_cos(iter, i) * residual_norm_collection(iter, i); + residual_norm(0, i) = abs(residual_norm_collection(iter + 1, i)); } } template void solve_upper_triangular( - const matrix::Dense* residual_norm_collection, - const matrix::Dense* hessenberg, matrix::Dense* y, - const size_type* final_iter_nums) + matrix::view::dense residual_norm_collection, + matrix::view::dense hessenberg, + matrix::view::dense y, const size_type* final_iter_nums) { #pragma omp parallel for - for (size_type k = 0; k < residual_norm_collection->get_size()[1]; ++k) { + for (size_type k = 0; k < residual_norm_collection.size[1]; ++k) { for (int64 i = final_iter_nums[k] - 1; i >= 0; --i) { - auto temp = residual_norm_collection->at(i, k); + auto temp = residual_norm_collection(i, k); for (size_type j = i + 1; j < final_iter_nums[k]; ++j) { temp -= - hessenberg->at( - i, j * residual_norm_collection->get_size()[1] + k) * - y->at(j, k); + hessenberg(i, j * residual_norm_collection.size[1] + k) * + y(j, k); } - y->at(i, k) = - temp / hessenberg->at( - i, i * residual_norm_collection->get_size()[1] + k); + y(i, k) = + temp / hessenberg(i, i * residual_norm_collection.size[1] + k); } } } @@ -285,17 +277,16 @@ void solve_upper_triangular( template void calculate_qy(ConstAccessor3d krylov_bases, - const matrix::Dense* y, - matrix::Dense* before_preconditioner, + matrix::view::dense y, + matrix::view::dense before_preconditioner, const size_type* final_iter_nums) { #pragma omp parallel for - for (size_type i = 0; i < before_preconditioner->get_size()[0]; ++i) { - for (size_type k = 0; k < before_preconditioner->get_size()[1]; ++k) { - before_preconditioner->at(i, k) = zero(); + for (size_type i = 0; i < before_preconditioner.size[0]; ++i) { + for (size_type k = 0; k < before_preconditioner.size[1]; ++k) { + before_preconditioner(i, k) = zero(); for (size_type j = 0; j < final_iter_nums[k]; ++j) { - before_preconditioner->at(i, k) += - krylov_bases(j, i, k) * y->at(j, k); + before_preconditioner(i, k) += krylov_bases(j, i, k) * y(j, k); } } } @@ -307,26 +298,26 @@ void calculate_qy(ConstAccessor3d krylov_bases, template void initialize(std::shared_ptr exec, - const matrix::Dense* b, - matrix::Dense* residual, - matrix::Dense* givens_sin, - matrix::Dense* givens_cos, - array* stop_status, size_type krylov_dim) + matrix::view::dense b, + matrix::view::dense residual, + matrix::view::dense givens_sin, + matrix::view::dense givens_cos, + array& stop_status, size_type krylov_dim) { using rc_vtype = remove_complex; - for (size_type j = 0; j < b->get_size()[1]; ++j) { + for (size_type j = 0; j < b.size[1]; ++j) { #pragma omp parallel for - for (size_type i = 0; i < b->get_size()[0]; ++i) { - residual->at(i, j) = b->at(i, j); + for (size_type i = 0; i < b.size[0]; ++i) { + residual(i, j) = b(i, j); } #pragma omp parallel for for (size_type i = 0; i < krylov_dim; ++i) { - givens_sin->at(i, j) = zero(); - givens_cos->at(i, j) = zero(); + givens_sin(i, j) = zero(); + givens_cos(i, j) = zero(); } - stop_status->get_data()[j].reset(); + stop_status.get_data()[j].reset(); } } @@ -336,20 +327,20 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE_BASE( template void restart(std::shared_ptr exec, - const matrix::Dense* residual, - matrix::Dense>* residual_norm, - matrix::Dense* residual_norm_collection, - matrix::Dense>* arnoldi_norm, + matrix::view::dense residual, + matrix::view::dense> residual_norm, + matrix::view::dense residual_norm_collection, + matrix::view::dense> arnoldi_norm, Accessor3d krylov_bases, - matrix::Dense* next_krylov_basis, - array* final_iter_nums, array&, + matrix::view::dense next_krylov_basis, + array& final_iter_nums, array&, size_type krylov_dim) { using rc_vtype = remove_complex; constexpr bool has_scalar = gko::cb_gmres::detail::has_3d_scaled_accessor::value; - for (size_type j = 0; j < residual->get_size()[1]; ++j) { + for (size_type j = 0; j < residual.size[1]; ++j) { // Calculate residual norm auto res_norm = zero(); auto res_inf = zero(); @@ -366,44 +357,43 @@ void restart(std::shared_ptr exec, std::max(a.second, b.second)); }, [](auto a) { return a; }, std::make_pair(rc_vtype{}, rc_vtype{}), - &result_pair, next_krylov_basis->get_size()[0], - static_cast(j), residual); + &result_pair, next_krylov_basis.size[0], static_cast(j), + residual); res_norm = result_pair.first; res_inf = result_pair.second; - residual_norm->at(0, j) = sqrt(res_norm); + residual_norm(0, j) = sqrt(res_norm); if (has_scalar) { - arnoldi_norm->at(2, j) = res_inf; + arnoldi_norm(2, j) = res_inf; } gko::cb_gmres::helper_functions_accessor::write_scalar( - krylov_bases, {0}, j, - arnoldi_norm->at(2, j) / residual_norm->at(0, j)); + krylov_bases, {0}, j, arnoldi_norm(2, j) / residual_norm(0, j)); #pragma omp parallel for for (size_type i = 0; i < krylov_dim + 1; ++i) { if (i == 0) { - residual_norm_collection->at(i, j) = residual_norm->at(0, j); + residual_norm_collection(i, j) = residual_norm(0, j); } else { - residual_norm_collection->at(i, j) = zero(); + residual_norm_collection(i, j) = zero(); } } #pragma omp parallel for - for (size_type i = 0; i < residual->get_size()[0]; ++i) { - auto value = residual->at(i, j) / residual_norm->at(0, j); + for (size_type i = 0; i < residual.size[0]; ++i) { + auto value = residual(i, j) / residual_norm(0, j); krylov_bases(0, i, j) = value; - next_krylov_basis->at(i, j) = value; + next_krylov_basis(i, j) = value; } - final_iter_nums->get_data()[j] = 0; + final_iter_nums.get_data()[j] = 0; } #pragma omp parallel for for (size_type k = 1; k < krylov_dim + 1; ++k) { - for (size_type j = 0; j < residual->get_size()[1]; ++j) { + for (size_type j = 0; j < residual.size[1]; ++j) { gko::cb_gmres::helper_functions_accessor::write_scalar( krylov_bases, k, j, one()); } - for (size_type i = 0; i < residual->get_size()[0]; ++i) { - for (size_type j = 0; j < residual->get_size()[1]; ++j) { + for (size_type i = 0; i < residual.size[0]; ++i) { + for (size_type j = 0; j < residual.size[1]; ++j) { krylov_bases(k, i, j) = zero(); } } @@ -415,32 +405,33 @@ GKO_INSTANTIATE_FOR_EACH_CB_GMRES_TYPE(GKO_DECLARE_CB_GMRES_RESTART_KERNEL); template void arnoldi(std::shared_ptr exec, - matrix::Dense* next_krylov_basis, - matrix::Dense* givens_sin, - matrix::Dense* givens_cos, - matrix::Dense>* residual_norm, - matrix::Dense* residual_norm_collection, - Accessor3d krylov_bases, matrix::Dense* hessenberg_iter, - matrix::Dense* buffer_iter, - matrix::Dense>* arnoldi_norm, - size_type iter, array* final_iter_nums, - const array* stop_status, array*, - array*) + matrix::view::dense next_krylov_basis, + matrix::view::dense givens_sin, + matrix::view::dense givens_cos, + matrix::view::dense> residual_norm, + matrix::view::dense residual_norm_collection, + Accessor3d krylov_bases, + matrix::view::dense hessenberg_iter, + matrix::view::dense buffer_iter, + matrix::view::dense> arnoldi_norm, + size_type iter, array& final_iter_nums, + const array& stop_status, array&, + array&) { #pragma omp parallel for - for (size_type i = 0; i < final_iter_nums->get_size(); ++i) { - final_iter_nums->get_data()[i] += + for (size_type i = 0; i < final_iter_nums.get_size(); ++i) { + final_iter_nums.get_data()[i] += (1 - static_cast( - stop_status->get_const_data()[i].has_stopped())); + stop_status.get_const_data()[i].has_stopped())); } finish_arnoldi_CGS(exec, next_krylov_basis, krylov_bases, hessenberg_iter, buffer_iter, arnoldi_norm, iter, - stop_status->get_const_data()); + stop_status.get_const_data()); givens_rotation(givens_sin, givens_cos, hessenberg_iter, iter, - stop_status->get_const_data()); + stop_status.get_const_data()); calculate_next_residual_norm(givens_sin, givens_cos, residual_norm, residual_norm_collection, iter, - stop_status->get_const_data()); + stop_status.get_const_data()); } GKO_INSTANTIATE_FOR_EACH_CB_GMRES_TYPE(GKO_DECLARE_CB_GMRES_ARNOLDI_KERNEL); @@ -448,17 +439,17 @@ GKO_INSTANTIATE_FOR_EACH_CB_GMRES_TYPE(GKO_DECLARE_CB_GMRES_ARNOLDI_KERNEL); template void solve_krylov(std::shared_ptr exec, - const matrix::Dense* residual_norm_collection, + matrix::view::dense residual_norm_collection, ConstAccessor3d krylov_bases, - const matrix::Dense* hessenberg, - matrix::Dense* y, - matrix::Dense* before_preconditioner, - const array* final_iter_nums) + matrix::view::dense hessenberg, + matrix::view::dense y, + matrix::view::dense before_preconditioner, + const array& final_iter_nums) { solve_upper_triangular(residual_norm_collection, hessenberg, y, - final_iter_nums->get_const_data()); - calculate_qy(krylov_bases, y, before_preconditioner, - final_iter_nums->get_const_data()); + final_iter_nums.get_const_data()); + calculate_qy(krylov_bases, y.as_const(), before_preconditioner, + final_iter_nums.get_const_data()); } GKO_INSTANTIATE_FOR_EACH_CB_GMRES_CONST_TYPE( diff --git a/omp/solver/idr_kernels.cpp b/omp/solver/idr_kernels.cpp index 9218f48c859..0bdcecbc4bb 100644 --- a/omp/solver/idr_kernels.cpp +++ b/omp/solver/idr_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -33,23 +33,23 @@ namespace { template void solve_lower_triangular(const size_type nrhs, - const matrix::Dense* m, - const matrix::Dense* f, - matrix::Dense* c, - const array* stop_status) + matrix::view::dense m, + matrix::view::dense f, + matrix::view::dense c, + const array& stop_status) { #pragma omp parallel for - for (size_type i = 0; i < f->get_size()[1]; i++) { - if (stop_status->get_const_data()[i].has_stopped()) { + for (size_type i = 0; i < f.size[1]; i++) { + if (stop_status.get_const_data()[i].has_stopped()) { continue; } - for (size_type row = 0; row < m->get_size()[0]; row++) { - auto temp = f->at(row, i); + for (size_type row = 0; row < m.size[0]; row++) { + auto temp = f(row, i); for (size_type col = 0; col < row; col++) { - temp -= m->at(row, col * nrhs + i) * c->at(col, i); + temp -= m(row, col * nrhs + i) * c(col, i); } - c->at(row, i) = temp / m->at(row, row * nrhs + i); + c(row, i) = temp / m(row, row * nrhs + i); } } } @@ -57,33 +57,34 @@ void solve_lower_triangular(const size_type nrhs, template void update_g_and_u(const size_type nrhs, const size_type k, - const matrix::Dense* p, - const matrix::Dense* m, - matrix::Dense* g, matrix::Dense* g_k, - matrix::Dense* u, - const array* stop_status) + matrix::view::dense p, + matrix::view::dense m, + matrix::view::dense g, + matrix::view::dense g_k, + matrix::view::dense u, + const array& stop_status) { #pragma omp parallel for for (size_type i = 0; i < nrhs; i++) { - if (stop_status->get_const_data()[i].has_stopped()) { + if (stop_status.get_const_data()[i].has_stopped()) { continue; } for (size_type j = 0; j < k; j++) { auto alpha = zero(); - for (size_type ind = 0; ind < p->get_size()[1]; ind++) { - alpha += p->at(j, ind) * g_k->at(ind, i); + for (size_type ind = 0; ind < p.size[1]; ind++) { + alpha += p(j, ind) * g_k(ind, i); } - alpha /= m->at(j, j * nrhs + i); + alpha /= m(j, j * nrhs + i); - for (size_type row = 0; row < g->get_size()[0]; row++) { - g_k->at(row, i) -= alpha * g->at(row, j * nrhs + i); - u->at(row, k * nrhs + i) -= alpha * u->at(row, j * nrhs + i); + for (size_type row = 0; row < g.size[0]; row++) { + g_k(row, i) -= alpha * g(row, j * nrhs + i); + u(row, k * nrhs + i) -= alpha * u(row, j * nrhs + i); } } - for (size_type row = 0; row < g->get_size()[0]; row++) { - g->at(row, k * nrhs + i) = g_k->at(row, i); + for (size_type row = 0; row < g.size[0]; row++) { + g(row, k * nrhs + i) = g_k(row, i); } } } @@ -112,9 +113,9 @@ get_rand_value(Distribution&& dist, Generator&& gen) template void initialize(std::shared_ptr exec, const size_type nrhs, - matrix::Dense* m, - matrix::Dense* subspace_vectors, bool deterministic, - array* stop_status) + matrix::view::dense m, + matrix::view::dense subspace_vectors, + bool deterministic, array& stop_status) { if (nrhs == 0) { return; @@ -123,27 +124,27 @@ void initialize(std::shared_ptr exec, const size_type nrhs, // Initialize M #pragma omp parallel for for (size_type i = 0; i < nrhs; i++) { - stop_status->get_data()[i].reset(); + stop_status.get_data()[i].reset(); } #pragma omp parallel for - for (size_type row = 0; row < m->get_size()[0]; row++) { - for (size_type col = 0; col < m->get_size()[1]; col++) { - m->at(row, col) = + for (size_type row = 0; row < m.size[0]; row++) { + for (size_type col = 0; col < m.size[1]; col++) { + m(row, col) = (row == col / nrhs) ? one() : zero(); } } // Initialize and Orthonormalize P - const auto num_rows = subspace_vectors->get_size()[0]; - const auto num_cols = subspace_vectors->get_size()[1]; + const auto num_rows = subspace_vectors.size[0]; + const auto num_cols = subspace_vectors.size[1]; auto dist = std::normal_distribution<>(0.0, 1.0); auto seed = std::random_device{}(); auto gen = std::default_random_engine(seed); for (size_type row = 0; row < num_rows; row++) { if (!deterministic) { for (size_type col = 0; col < num_cols; col++) { - subspace_vectors->at(row, col) = + subspace_vectors(row, col) = get_rand_value(dist, gen); } } @@ -161,8 +162,7 @@ void initialize(std::shared_ptr exec, const size_type nrhs, subspace_vectors); #pragma omp parallel for for (size_type j = 0; j < num_cols; j++) { - subspace_vectors->at(row, j) -= - dot * subspace_vectors->at(i, j); + subspace_vectors(row, j) -= dot * subspace_vectors(i, j); } } @@ -179,7 +179,7 @@ void initialize(std::shared_ptr exec, const size_type nrhs, #pragma omp parallel for for (size_type j = 0; j < num_cols; j++) { - subspace_vectors->at(row, j) /= norm; + subspace_vectors(row, j) /= norm; } } } @@ -189,31 +189,31 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_IDR_INITIALIZE_KERNEL); template void step_1(std::shared_ptr exec, const size_type nrhs, - const size_type k, const matrix::Dense* m, - const matrix::Dense* f, - const matrix::Dense* residual, - const matrix::Dense* g, matrix::Dense* c, - matrix::Dense* v, - const array* stop_status) + const size_type k, matrix::view::dense m, + matrix::view::dense f, + matrix::view::dense residual, + matrix::view::dense g, + matrix::view::dense c, matrix::view::dense v, + const array& stop_status) { - const auto m_size = m->get_size(); + const auto m_size = m.size; // Compute c = M \ f solve_lower_triangular(nrhs, m, f, c, stop_status); for (size_type i = 0; i < nrhs; i++) { - if (stop_status->get_const_data()[i].has_stopped()) { + if (stop_status.get_const_data()[i].has_stopped()) { continue; } // v = residual - c_k * g_k - ... - c_s * g_s #pragma omp parallel for - for (size_type row = 0; row < v->get_size()[0]; row++) { - auto temp = residual->at(row, i); - for (size_type j = k; j < m->get_size()[0]; j++) { - temp -= c->at(j, i) * g->at(row, j * nrhs + i); + for (size_type row = 0; row < v.size[0]; row++) { + auto temp = residual(row, i); + for (size_type j = k; j < m.size[0]; j++) { + temp -= c(j, i) * g(row, j * nrhs + i); } - v->at(row, i) = temp; + v(row, i) = temp; } } } @@ -223,23 +223,24 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_IDR_STEP_1_KERNEL); template void step_2(std::shared_ptr exec, const size_type nrhs, - const size_type k, const matrix::Dense* omega, - const matrix::Dense* preconditioned_vector, - const matrix::Dense* c, matrix::Dense* u, - const array* stop_status) + const size_type k, matrix::view::dense omega, + matrix::view::dense preconditioned_vector, + matrix::view::dense c, + matrix::view::dense u, + const array& stop_status) { for (size_type i = 0; i < nrhs; i++) { - if (stop_status->get_const_data()[i].has_stopped()) { + if (stop_status.get_const_data()[i].has_stopped()) { continue; } #pragma omp parallel for - for (size_type row = 0; row < u->get_size()[0]; row++) { - auto temp = omega->at(0, i) * preconditioned_vector->at(row, i); - for (size_type j = k; j < c->get_size()[0]; j++) { - temp += c->at(j, i) * u->at(row, j * nrhs + i); + for (size_type row = 0; row < u.size[0]; row++) { + auto temp = omega(0, i) * preconditioned_vector(row, i); + for (size_type j = k; j < c.size[0]; j++) { + temp += c(j, i) * u(row, j * nrhs + i); } - u->at(row, k * nrhs + i) = temp; + u(row, k * nrhs + i) = temp; } } } @@ -249,42 +250,45 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_IDR_STEP_2_KERNEL); template void step_3(std::shared_ptr exec, const size_type nrhs, - const size_type k, const matrix::Dense* p, - matrix::Dense* g, matrix::Dense* g_k, - matrix::Dense* u, matrix::Dense* m, - matrix::Dense* f, matrix::Dense*, - matrix::Dense* residual, matrix::Dense* x, - const array* stop_status) + const size_type k, matrix::view::dense p, + matrix::view::dense g, + matrix::view::dense g_k, + matrix::view::dense u, matrix::view::dense m, + matrix::view::dense f, + matrix::view::dense alpha, + matrix::view::dense residual, + matrix::view::dense x, + const array& stop_status) { - update_g_and_u(nrhs, k, p, m, g, g_k, u, stop_status); + update_g_and_u(nrhs, k, p, m.as_const(), g, g_k, u, stop_status); for (size_type i = 0; i < nrhs; i++) { - if (stop_status->get_const_data()[i].has_stopped()) { + if (stop_status.get_const_data()[i].has_stopped()) { continue; } #pragma omp parallel for - for (size_type j = k; j < m->get_size()[0]; j++) { + for (size_type j = k; j < m.size[0]; j++) { auto temp = zero(); - for (size_type ind = 0; ind < p->get_size()[1]; ind++) { - temp += p->at(j, ind) * g->at(ind, k * nrhs + i); + for (size_type ind = 0; ind < p.size[1]; ind++) { + temp += p(j, ind) * g(ind, k * nrhs + i); } - m->at(j, k * nrhs + i) = temp; + m(j, k * nrhs + i) = temp; } - auto beta = f->at(k, i) / m->at(k, k * nrhs + i); + auto beta = f(k, i) / m(k, k * nrhs + i); #pragma omp parallel for - for (size_type row = 0; row < g->get_size()[0]; row++) { - residual->at(row, i) -= beta * g->at(row, k * nrhs + i); - x->at(row, i) += beta * u->at(row, k * nrhs + i); + for (size_type row = 0; row < g.size[0]; row++) { + residual(row, i) -= beta * g(row, k * nrhs + i); + x(row, i) += beta * u(row, k * nrhs + i); } - if (k + 1 < f->get_size()[0]) { - f->at(k, i) = zero(); + if (k + 1 < f.size[0]) { + f(k, i) = zero(); #pragma omp parallel for - for (size_type j = k + 1; j < f->get_size()[0]; j++) { - f->at(j, i) -= beta * m->at(j, k * nrhs + i); + for (size_type j = k + 1; j < f.size[0]; j++) { + f(j, i) -= beta * m(j, k * nrhs + i); } } } @@ -296,27 +300,29 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_IDR_STEP_3_KERNEL); template void compute_omega( std::shared_ptr exec, const size_type nrhs, - const remove_complex kappa, const matrix::Dense* tht, - const matrix::Dense>* residual_norm, - matrix::Dense* omega, const array* stop_status) + const remove_complex kappa, + matrix::view::dense tht, + matrix::view::dense> residual_norm, + matrix::view::dense omega, + const array& stop_status) { #pragma omp parallel for for (size_type i = 0; i < nrhs; i++) { - if (stop_status->get_const_data()[i].has_stopped()) { + if (stop_status.get_const_data()[i].has_stopped()) { continue; } - auto thr = omega->at(0, i); - auto normt = sqrt(real(tht->at(0, i))); + auto thr = omega(0, i); + auto normt = sqrt(real(tht(0, i))); if (normt == zero>()) { - omega->at(0, i) = 0; + omega(0, i) = 0; continue; } - omega->at(0, i) /= tht->at(0, i); - auto absrho = abs(thr / (normt * residual_norm->at(0, i))); + omega(0, i) /= tht(0, i); + auto absrho = abs(thr / (normt * residual_norm(0, i))); if (absrho < kappa) { - omega->at(0, i) *= kappa / absrho; + omega(0, i) *= kappa / absrho; } } } diff --git a/omp/solver/lower_trs_kernels.cpp b/omp/solver/lower_trs_kernels.cpp index 6dac6b46078..b8d5c4259a1 100644 --- a/omp/solver/lower_trs_kernels.cpp +++ b/omp/solver/lower_trs_kernels.cpp @@ -1,10 +1,11 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause #include "core/solver/lower_trs_kernels.hpp" #include +#include #include @@ -60,29 +61,31 @@ void solve(std::shared_ptr exec, const matrix::Csr* matrix, const solver::SolveStruct* solve_struct, bool unit_diag, const solver::trisolve_algorithm algorithm, - matrix::Dense* trans_b, matrix::Dense* trans_x, - const matrix::Dense* b, matrix::Dense* x) + std::optional> trans_b, + std::optional> trans_x, + matrix::view::dense b, + matrix::view::dense x) { auto row_ptrs = matrix->get_const_row_ptrs(); auto col_idxs = matrix->get_const_col_idxs(); auto vals = matrix->get_const_values(); #pragma omp parallel for - for (size_type j = 0; j < b->get_size()[1]; ++j) { + for (size_type j = 0; j < b.size[1]; ++j) { for (size_type row = 0; row < matrix->get_size()[0]; ++row) { auto diag = one(); - x->at(row, j) = b->at(row, j); + x(row, j) = b(row, j); for (auto k = row_ptrs[row]; k < row_ptrs[row + 1]; ++k) { auto col = col_idxs[k]; if (col < row) { - x->at(row, j) -= vals[k] * x->at(col, j); + x(row, j) -= vals[k] * x(col, j); } if (col == row) { diag = vals[k]; } } if (!unit_diag) { - x->at(row, j) /= diag; + x(row, j) /= diag; } } } diff --git a/omp/solver/multigrid_kernels.cpp b/omp/solver/multigrid_kernels.cpp index 2203187383d..4709b05d27a 100644 --- a/omp/solver/multigrid_kernels.cpp +++ b/omp/solver/multigrid_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -23,23 +23,24 @@ namespace multigrid { template void kcycle_step_1(std::shared_ptr exec, - const matrix::Dense* alpha, - const matrix::Dense* rho, - const matrix::Dense* v, - matrix::Dense* g, matrix::Dense* d, - matrix::Dense* e) + matrix::view::dense alpha, + matrix::view::dense rho, + matrix::view::dense v, + matrix::view::dense g, + matrix::view::dense d, + matrix::view::dense e) { - const auto nrows = g->get_size()[0]; - const auto nrhs = g->get_size()[1]; + const auto nrows = g.size[0]; + const auto nrhs = g.size[1]; #pragma omp parallel for for (size_type i = 0; i < nrhs; i++) { - auto temp = alpha->at(0, i) / rho->at(0, i); + auto temp = alpha(0, i) / rho(0, i); for (size_type j = 0; j < nrows; j++) { if (is_finite(temp)) { - g->at(j, i) -= temp * v->at(j, i); - e->at(j, i) *= temp; + g(j, i) -= temp * v(j, i); + e(j, i) *= temp; } - d->at(j, i) = e->at(j, i); + d(j, i) = e(j, i); } } } @@ -49,26 +50,24 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_MULTIGRID_KCYCLE_STEP_1_KERNEL); template void kcycle_step_2(std::shared_ptr exec, - const matrix::Dense* alpha, - const matrix::Dense* rho, - const matrix::Dense* gamma, - const matrix::Dense* beta, - const matrix::Dense* zeta, - const matrix::Dense* d, - matrix::Dense* e) + matrix::view::dense alpha, + matrix::view::dense rho, + matrix::view::dense gamma, + matrix::view::dense beta, + matrix::view::dense zeta, + matrix::view::dense d, + matrix::view::dense e) { - const auto nrows = e->get_size()[0]; - const auto nrhs = e->get_size()[1]; + const auto nrows = e.size[0]; + const auto nrhs = e.size[1]; #pragma omp parallel for for (size_type i = 0; i < nrhs; i++) { - auto scalar_d = zeta->at(0, i) / - (beta->at(0, i) - - gamma->at(0, i) * gamma->at(0, i) / rho->at(0, i)); - auto scalar_e = - one() - gamma->at(0, i) / alpha->at(0, i) * scalar_d; + auto scalar_d = + zeta(0, i) / (beta(0, i) - gamma(0, i) * gamma(0, i) / rho(0, i)); + auto scalar_e = one() - gamma(0, i) / alpha(0, i) * scalar_d; if (is_finite(scalar_d) && is_finite(scalar_e)) { for (size_type j = 0; j < nrows; j++) { - e->at(j, i) = scalar_e * e->at(j, i) + scalar_d * d->at(j, i); + e(j, i) = scalar_e * e(j, i) + scalar_d * d(j, i); } } } @@ -79,14 +78,14 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_MULTIGRID_KCYCLE_STEP_2_KERNEL); template void kcycle_check_stop(std::shared_ptr exec, - const matrix::Dense* old_norm, - const matrix::Dense* new_norm, + matrix::view::dense old_norm, + matrix::view::dense new_norm, const ValueType rel_tol, bool& is_stop) { is_stop = true; #pragma omp parallel for shared(is_stop) - for (size_type i = 0; i < old_norm->get_size()[1]; i++) { - if (new_norm->at(0, i) > rel_tol * old_norm->at(0, i)) { + for (size_type i = 0; i < old_norm.size[1]; i++) { + if (new_norm(0, i) > rel_tol * old_norm(0, i)) { #pragma omp atomic write is_stop = false; } diff --git a/omp/solver/upper_trs_kernels.cpp b/omp/solver/upper_trs_kernels.cpp index ea05cabeb63..a24e251f578 100644 --- a/omp/solver/upper_trs_kernels.cpp +++ b/omp/solver/upper_trs_kernels.cpp @@ -1,10 +1,11 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause #include "core/solver/upper_trs_kernels.hpp" #include +#include #include @@ -60,31 +61,33 @@ void solve(std::shared_ptr exec, const matrix::Csr* matrix, const solver::SolveStruct* solve_struct, bool unit_diag, const solver::trisolve_algorithm algorithm, - matrix::Dense* trans_b, matrix::Dense* trans_x, - const matrix::Dense* b, matrix::Dense* x) + std::optional> trans_b, + std::optional> trans_x, + matrix::view::dense b, + matrix::view::dense x) { auto row_ptrs = matrix->get_const_row_ptrs(); auto col_idxs = matrix->get_const_col_idxs(); auto vals = matrix->get_const_values(); #pragma omp parallel for - for (size_type j = 0; j < b->get_size()[1]; ++j) { + for (size_type j = 0; j < b.size[1]; ++j) { for (size_type inv_row = 0; inv_row < matrix->get_size()[0]; ++inv_row) { auto row = matrix->get_size()[0] - 1 - inv_row; auto diag = one(); - x->at(row, j) = b->at(row, j); + x(row, j) = b(row, j); for (auto k = row_ptrs[row]; k < row_ptrs[row + 1]; ++k) { auto col = col_idxs[k]; if (col > row) { - x->at(row, j) -= vals[k] * x->at(col, j); + x(row, j) -= vals[k] * x(col, j); } if (col == row) { diag = vals[k]; } } if (!unit_diag) { - x->at(row, j) /= diag; + x(row, j) /= diag; } } } diff --git a/omp/stop/criterion_kernels.cpp b/omp/stop/criterion_kernels.cpp index 65d880515d9..fc514a1776c 100644 --- a/omp/stop/criterion_kernels.cpp +++ b/omp/stop/criterion_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -19,11 +19,11 @@ namespace set_all_statuses { void set_all_statuses(std::shared_ptr exec, uint8 stoppingId, - bool setFinalized, array* stop_status) + bool setFinalized, array& stop_status) { #pragma omp parallel for - for (int i = 0; i < stop_status->get_size(); i++) { - stop_status->get_data()[i].stop(stoppingId, setFinalized); + for (int i = 0; i < stop_status.get_size(); i++) { + stop_status.get_data()[i].stop(stoppingId, setFinalized); } } diff --git a/omp/stop/residual_norm_kernels.cpp b/omp/stop/residual_norm_kernels.cpp index 0ec4395a16b..f6b478e2c93 100644 --- a/omp/stop/residual_norm_kernels.cpp +++ b/omp/stop/residual_norm_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -23,34 +23,34 @@ namespace residual_norm { template void residual_norm(std::shared_ptr exec, - const matrix::Dense* tau, - const matrix::Dense* orig_tau, + matrix::view::dense tau, + matrix::view::dense orig_tau, ValueType rel_residual_goal, uint8 stoppingId, - bool setFinalized, array* stop_status, - array* device_storage, bool* all_converged, - bool* one_changed) + bool setFinalized, array& stop_status, + array& device_storage, bool& all_converged, + bool& one_changed) { static_assert(is_complex_s::value == false, "ValueType must not be complex in this function!"); bool local_one_changed = false; #pragma omp parallel for reduction(|| : local_one_changed) - for (size_type i = 0; i < tau->get_size()[1]; ++i) { - if (tau->at(i) <= rel_residual_goal * orig_tau->at(i)) { - stop_status->get_data()[i].converge(stoppingId, setFinalized); + for (size_type i = 0; i < tau.size[1]; ++i) { + if (tau(0, i) <= rel_residual_goal * orig_tau(0, i)) { + stop_status.get_data()[i].converge(stoppingId, setFinalized); local_one_changed = true; } } - *one_changed = local_one_changed; + one_changed = local_one_changed; // No early stopping here because one cannot use break with parallel for // But it's parallel so does it matter? bool local_all_converged = true; #pragma omp parallel for reduction(&& : local_all_converged) - for (size_type i = 0; i < stop_status->get_size(); ++i) { - if (!stop_status->get_const_data()[i].has_stopped()) { + for (size_type i = 0; i < stop_status.get_size(); ++i) { + if (!stop_status.get_const_data()[i].has_stopped()) { local_all_converged = false; } } - *all_converged = local_all_converged; + all_converged = local_all_converged; } GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_TYPE( @@ -71,31 +71,31 @@ namespace implicit_residual_norm { template void implicit_residual_norm( std::shared_ptr exec, - const matrix::Dense* tau, - const matrix::Dense>* orig_tau, + matrix::view::dense tau, + matrix::view::dense> orig_tau, remove_complex rel_residual_goal, uint8 stoppingId, - bool setFinalized, array* stop_status, - array* device_storage, bool* all_converged, bool* one_changed) + bool setFinalized, array& stop_status, + array& device_storage, bool& all_converged, bool& one_changed) { bool local_one_changed = false; #pragma omp parallel for reduction(|| : local_one_changed) - for (size_type i = 0; i < tau->get_size()[1]; ++i) { - if (sqrt(abs(tau->at(i))) <= rel_residual_goal * orig_tau->at(i)) { - stop_status->get_data()[i].converge(stoppingId, setFinalized); + for (size_type i = 0; i < tau.size[1]; ++i) { + if (sqrt(abs(tau(0, i))) <= rel_residual_goal * orig_tau(0, i)) { + stop_status.get_data()[i].converge(stoppingId, setFinalized); local_one_changed = true; } } - *one_changed = local_one_changed; + one_changed = local_one_changed; // No early stopping here because one cannot use break with parallel for // But it's parallel so does it matter? bool local_all_converged = true; #pragma omp parallel for reduction(&& : local_all_converged) - for (size_type i = 0; i < stop_status->get_size(); ++i) { - if (!stop_status->get_const_data()[i].has_stopped()) { + for (size_type i = 0; i < stop_status.get_size(); ++i) { + if (!stop_status.get_const_data()[i].has_stopped()) { local_all_converged = false; } } - *all_converged = local_all_converged; + all_converged = local_all_converged; } GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_IMPLICIT_RESIDUAL_NORM_KERNEL); diff --git a/omp/test/base/index_set.cpp b/omp/test/base/index_set.cpp index 98a11bb8720..0d9818f2ab6 100644 --- a/omp/test/base/index_set.cpp +++ b/omp/test/base/index_set.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -71,11 +71,11 @@ TYPED_TEST(index_set, PopulateSubsetsIsEquivalentToReferenceForUnsortedInput) auto omp_superset_comp = gko::array{this->omp}; gko::kernels::reference::idx_set::populate_subsets( - this->ref, TypeParam(520), &rand_arr, &ref_begin_comp, &ref_end_comp, - &ref_superset_comp, false); + this->ref, TypeParam(520), rand_arr, ref_begin_comp, ref_end_comp, + ref_superset_comp, false); gko::kernels::omp::idx_set::populate_subsets( - this->omp, TypeParam(520), &rand_arr, &omp_begin_comp, &omp_end_comp, - &omp_superset_comp, false); + this->omp, TypeParam(520), rand_arr, omp_begin_comp, omp_end_comp, + omp_superset_comp, false); GKO_ASSERT_ARRAY_EQ(ref_begin_comp, omp_begin_comp); GKO_ASSERT_ARRAY_EQ(ref_end_comp, omp_end_comp); @@ -95,11 +95,11 @@ TYPED_TEST(index_set, PopulateSubsetsIsEquivalentToReferenceForSortedInput) auto omp_superset_comp = gko::array{this->omp}; gko::kernels::reference::idx_set::populate_subsets( - this->ref, TypeParam(520), &rand_arr, &ref_begin_comp, &ref_end_comp, - &ref_superset_comp, false); + this->ref, TypeParam(520), rand_arr, ref_begin_comp, ref_end_comp, + ref_superset_comp, false); gko::kernels::omp::idx_set::populate_subsets( - this->omp, TypeParam(520), &rand_arr, &omp_begin_comp, &omp_end_comp, - &omp_superset_comp, false); + this->omp, TypeParam(520), rand_arr, omp_begin_comp, omp_end_comp, + omp_superset_comp, false); GKO_ASSERT_ARRAY_EQ(ref_begin_comp, omp_begin_comp); GKO_ASSERT_ARRAY_EQ(ref_end_comp, omp_end_comp); @@ -116,11 +116,11 @@ TYPED_TEST(index_set, IndicesContainsIsEquivalentToReference) auto ref_indices_arr = this->setup_random_indices(73); auto ref_validity_arr = gko::array(this->omp, 73); gko::kernels::reference::idx_set::compute_validity( - this->ref, &ref_indices_arr, &ref_validity_arr); + this->ref, ref_indices_arr, ref_validity_arr); auto omp_indices_arr = gko::array(this->omp, ref_indices_arr); auto omp_validity_arr = gko::array(this->omp, 73); - gko::kernels::omp::idx_set::compute_validity(this->omp, &omp_indices_arr, - &omp_validity_arr); + gko::kernels::omp::idx_set::compute_validity(this->omp, omp_indices_arr, + omp_validity_arr); GKO_ASSERT_ARRAY_EQ(ref_validity_arr, omp_validity_arr); } diff --git a/omp/test/base/kernel_launch.cpp b/omp/test/base/kernel_launch.cpp index 5644d67caee..2072349a3fe 100644 --- a/omp/test/base/kernel_launch.cpp +++ b/omp/test/base/kernel_launch.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -138,7 +138,7 @@ TEST_F(KernelLaunch, Runs1DDense) static_assert(is_same::value, "type"); static_assert(is_same::value, "dummy"); - bool pointers_correct = d.data == d_ptr && d2.data == d_ptr; + bool pointers_correct = d.values == d_ptr && d2.values == d_ptr; bool strides_correct = d.stride == 5 && d2.stride == 5; bool accessors_2d_correct = &d(0, 0) == d_ptr && &d(1, 0) == d_ptr + d.stride && @@ -151,8 +151,9 @@ TEST_F(KernelLaunch, Runs1DDense) d(i / 4, i % 4) = 0; } }, - 16, zero_dense2.get(), - static_cast*>(zero_dense2.get()), + 16, zero_dense2->get_device_view(), + static_cast*>(zero_dense2.get()) + ->get_const_device_view(), zero_dense2->get_const_values(), move_only_val); GKO_ASSERT_MTX_NEAR(zero_dense2, iota_dense, 0.0); @@ -215,8 +216,8 @@ TEST_F(KernelLaunch, Runs2DDense) static_assert(is_same::value, "type"); static_assert(is_same::value, "type"); static_assert(is_same::value, "dummy"); - bool pointers_correct = d.data == d_ptr && d2.data == d_ptr && - d3.data == d2_ptr && d4 == d3_ptr; + bool pointers_correct = d.values == d_ptr && d2.values == d_ptr && + d3.values == d2_ptr && d4 == d3_ptr; bool strides_correct = d.stride == 5 && d2.stride == 5 && d3.stride == 6; bool accessors_2d_correct = @@ -232,11 +233,12 @@ TEST_F(KernelLaunch, Runs2DDense) d(i, j) = 0; } }, - dim<2>{4, 4}, zero_dense->get_stride(), zero_dense2.get(), - static_cast*>(zero_dense2.get()), + dim<2>{4, 4}, zero_dense->get_stride(), zero_dense2->get_device_view(), + static_cast*>(zero_dense2.get()) + ->get_const_device_view(), zero_dense2->get_const_values(), - gko::kernels::omp::default_stride(zero_dense.get()), - gko::kernels::omp::row_vector(vec_dense.get()), + gko::kernels::omp::default_stride(zero_dense->get_device_view()), + gko::kernels::omp::row_vector(vec_dense->get_device_view()), zero_dense->get_values(), vec_dense->get_values(), move_only_val); GKO_ASSERT_MTX_NEAR(zero_dense2, iota_dense, 0.0); diff --git a/reference/base/index_set_kernels.cpp b/reference/base/index_set_kernels.cpp index 6f769472a6f..bd7c26a7bfb 100644 --- a/reference/base/index_set_kernels.cpp +++ b/reference/base/index_set_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -36,13 +36,13 @@ namespace idx_set { template void compute_validity(std::shared_ptr exec, - const array* local_indices, - array* validity_array) + const array& local_indices, + array& validity_array) { - auto num_elems = local_indices->get_size(); + auto num_elems = local_indices.get_size(); for (size_type i = 0; i < num_elems; ++i) { - validity_array->get_data()[i] = - local_indices->get_const_data()[i] != invalid_index(); + validity_array.get_data()[i] = + local_indices.get_const_data()[i] != invalid_index(); } } @@ -74,13 +74,13 @@ GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE( template void populate_subsets(std::shared_ptr exec, const IndexType index_space_size, - const array* indices, - array* subset_begin, - array* subset_end, - array* superset_indices, const bool is_sorted) + const array& indices, + array& subset_begin, + array& subset_end, + array& superset_indices, const bool is_sorted) { - auto num_indices = indices->get_size(); - auto tmp_indices = gko::array(*indices); + auto num_indices = indices.get_size(); + auto tmp_indices = indices; // Sort the indices if not sorted. if (!is_sorted) { std::sort(tmp_indices.get_data(), tmp_indices.get_data() + num_indices); @@ -116,13 +116,13 @@ void populate_subsets(std::shared_ptr exec, GKO_ASSERT(tmp_subset_begin.size() == tmp_subset_end.size()); GKO_ASSERT((tmp_subset_begin.size() + 1) == tmp_subset_superset_index.size()); - *subset_begin = std::move(gko::array( + subset_begin = std::move(gko::array( exec, tmp_subset_begin.data(), tmp_subset_begin.data() + tmp_subset_begin.size())); - *subset_end = std::move( + subset_end = std::move( gko::array(exec, tmp_subset_end.data(), tmp_subset_end.data() + tmp_subset_end.size())); - *superset_indices = std::move(gko::array( + superset_indices = std::move(gko::array( exec, tmp_subset_superset_index.data(), tmp_subset_superset_index.data() + tmp_subset_superset_index.size())); } diff --git a/reference/distributed/partition_kernels.cpp b/reference/distributed/partition_kernels.cpp index e306aba7343..8b301572740 100644 --- a/reference/distributed/partition_kernels.cpp +++ b/reference/distributed/partition_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -142,14 +142,14 @@ void has_ordered_parts( std::shared_ptr exec, const experimental::distributed::Partition* partition, - bool* result) + bool& result) { - *result = true; + result = true; auto part_ids = partition->get_part_ids(); for (comm_index_type i = 1; i < partition->get_num_ranges(); ++i) { if (part_ids[i] < part_ids[i - 1]) { - *result = false; + result = false; return; } } diff --git a/reference/distributed/vector_kernels.cpp b/reference/distributed/vector_kernels.cpp index 95ae679f055..c275d3b8697 100644 --- a/reference/distributed/vector_kernels.cpp +++ b/reference/distributed/vector_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -20,7 +20,7 @@ void build_local( const device_matrix_data& input, const experimental::distributed::Partition* partition, - comm_index_type local_part, matrix::Dense* local_mtx) + comm_index_type local_part, matrix::view::dense local_mtx) { auto row_idxs = input.get_const_row_idxs(); auto col_idxs = input.get_const_col_idxs(); @@ -34,8 +34,8 @@ void build_local( auto part_id = range_parts[range_id]; // skip non-local rows if (part_id == local_part) { - local_mtx->at(map_to_local(row_idxs[i], partition, range_id), - static_cast(col_idxs[i])) = values[i]; + local_mtx(map_to_local(row_idxs[i], partition, range_id), + static_cast(col_idxs[i])) = values[i]; } } } diff --git a/reference/matrix/coo_kernels.cpp b/reference/matrix/coo_kernels.cpp index 6ebc4a282d6..7bf990be27f 100644 --- a/reference/matrix/coo_kernels.cpp +++ b/reference/matrix/coo_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -32,7 +32,8 @@ namespace coo { template void spmv(std::shared_ptr exec, const matrix::Coo* a, - const matrix::Dense* b, matrix::Dense* c) + matrix::view::dense b, + matrix::view::dense c) { dense::fill(exec, c, zero()); spmv2(exec, a, b, c); @@ -43,11 +44,11 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_COO_SPMV_KERNEL); template void advanced_spmv(std::shared_ptr exec, - const matrix::Dense* alpha, + matrix::view::dense alpha, const matrix::Coo* a, - const matrix::Dense* b, - const matrix::Dense* beta, - matrix::Dense* c) + matrix::view::dense b, + matrix::view::dense beta, + matrix::view::dense c) { dense::scale(exec, beta, c); advanced_spmv2(exec, alpha, a, b, c); @@ -60,15 +61,16 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void spmv2(std::shared_ptr exec, const matrix::Coo* a, - const matrix::Dense* b, matrix::Dense* c) + matrix::view::dense b, + matrix::view::dense c) { auto coo_val = a->get_const_values(); auto coo_col = a->get_const_col_idxs(); auto coo_row = a->get_const_row_idxs(); - auto num_cols = b->get_size()[1]; + auto num_cols = b.size[1]; for (size_type i = 0; i < a->get_num_stored_elements(); i++) { for (size_type j = 0; j < num_cols; j++) { - c->at(coo_row[i], j) += coo_val[i] * b->at(coo_col[i], j); + c(coo_row[i], j) += coo_val[i] * b(coo_col[i], j); } } } @@ -78,20 +80,19 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_COO_SPMV2_KERNEL); template void advanced_spmv2(std::shared_ptr exec, - const matrix::Dense* alpha, + matrix::view::dense alpha, const matrix::Coo* a, - const matrix::Dense* b, - matrix::Dense* c) + matrix::view::dense b, + matrix::view::dense c) { auto coo_val = a->get_const_values(); auto coo_col = a->get_const_col_idxs(); auto coo_row = a->get_const_row_idxs(); - auto alpha_val = alpha->at(0, 0); - auto num_cols = b->get_size()[1]; + auto alpha_val = alpha(0, 0); + auto num_cols = b.size[1]; for (size_type i = 0; i < a->get_num_stored_elements(); i++) { for (size_type j = 0; j < num_cols; j++) { - c->at(coo_row[i], j) += - alpha_val * coo_val[i] * b->at(coo_col[i], j); + c(coo_row[i], j) += alpha_val * coo_val[i] * b(coo_col[i], j); } } } @@ -103,13 +104,13 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void fill_in_dense(std::shared_ptr exec, const matrix::Coo* source, - matrix::Dense* result) + matrix::view::dense result) { auto coo_val = source->get_const_values(); auto coo_col = source->get_const_col_idxs(); auto coo_row = source->get_const_row_idxs(); for (size_type i = 0; i < source->get_num_stored_elements(); i++) { - result->at(coo_row[i], coo_col[i]) += coo_val[i]; + result(coo_row[i], coo_col[i]) += coo_val[i]; } } diff --git a/reference/matrix/csr_kernels.cpp b/reference/matrix/csr_kernels.cpp index 8d6a102e8f6..dfbee93c841 100644 --- a/reference/matrix/csr_kernels.cpp +++ b/reference/matrix/csr_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -48,8 +48,8 @@ template void spmv(std::shared_ptr exec, const matrix::Csr* a, - const matrix::Dense* b, - matrix::Dense* c) + matrix::view::dense b, + matrix::view::dense c) { using arithmetic_type = highest_precision; @@ -64,7 +64,7 @@ void spmv(std::shared_ptr exec, auto c_vals = acc::helper::build_rrm_accessor(c); for (size_type row = 0; row < a->get_size()[0]; ++row) { - for (size_type j = 0; j < c->get_size()[1]; ++j) { + for (size_type j = 0; j < c.size[1]; ++j) { auto sum = zero(); for (size_type k = row_ptrs[row]; k < static_cast(row_ptrs[row + 1]); ++k) { @@ -84,19 +84,19 @@ GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_AND_INDEX_TYPE( template void advanced_spmv(std::shared_ptr exec, - const matrix::Dense* alpha, + matrix::view::dense alpha, const matrix::Csr* a, - const matrix::Dense* b, - const matrix::Dense* beta, - matrix::Dense* c) + matrix::view::dense b, + matrix::view::dense beta, + matrix::view::dense c) { using arithmetic_type = highest_precision; auto row_ptrs = a->get_const_row_ptrs(); auto col_idxs = a->get_const_col_idxs(); - auto valpha = static_cast(alpha->at(0, 0)); - auto vbeta = static_cast(beta->at(0, 0)); + auto valpha = static_cast(alpha(0, 0)); + auto vbeta = static_cast(beta(0, 0)); const auto a_vals = acc::helper::build_const_rrm_accessor(a); @@ -104,7 +104,7 @@ void advanced_spmv(std::shared_ptr exec, acc::helper::build_const_rrm_accessor(b); auto c_vals = acc::helper::build_rrm_accessor(c); for (size_type row = 0; row < a->get_size()[0]; ++row) { - for (size_type j = 0; j < c->get_size()[1]; ++j) { + for (size_type j = 0; j < c.size[1]; ++j) { auto sum = is_zero(vbeta) ? zero(vbeta) : c_vals(row, j) * vbeta; for (size_type k = row_ptrs[row]; k < static_cast(row_ptrs[row + 1]); ++k) { @@ -246,16 +246,16 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_SPGEMM_KERNEL); template void advanced_spgemm(std::shared_ptr exec, - const matrix::Dense* alpha, + matrix::view::dense alpha, const matrix::Csr* a, const matrix::Csr* b, - const matrix::Dense* beta, + matrix::view::dense beta, const matrix::Csr* d, matrix::Csr* c) { auto num_rows = a->get_size()[0]; - auto valpha = alpha->at(0, 0); - auto vbeta = beta->at(0, 0); + auto valpha = alpha(0, 0); + auto vbeta = beta(0, 0); // first sweep: count nnz for each row auto c_row_ptrs = c->get_row_ptrs(); @@ -353,10 +353,10 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void advanced_spgemm_reuse(std::shared_ptr exec, - const matrix::Dense* alpha, + matrix::view::dense alpha, const matrix::Csr* a, const matrix::Csr* b, - const matrix::Dense* beta, + matrix::view::dense beta, const matrix::Csr* d, const matrix::csr::lookup_data& c_lookup, matrix::Csr* c) @@ -374,8 +374,8 @@ void advanced_spgemm_reuse(std::shared_ptr exec, const auto b_vals = b->get_const_values(); const auto c_vals = c->get_values(); const auto d_vals = d->get_const_values(); - const auto valpha = alpha->at(0, 0); - const auto vbeta = beta->at(0, 0); + const auto valpha = alpha(0, 0); + const auto vbeta = beta(0, 0); const auto lookup_storage_offsets = c_lookup.storage_offsets.get_const_data(); const auto lookup_storage = c_lookup.storage.get_const_data(); @@ -423,15 +423,15 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void spgeam(std::shared_ptr exec, - const matrix::Dense* alpha, + matrix::view::dense alpha, const matrix::Csr* a, - const matrix::Dense* beta, + matrix::view::dense beta, const matrix::Csr* b, matrix::Csr* c) { auto num_rows = a->get_size()[0]; - auto valpha = alpha->at(0, 0); - auto vbeta = beta->at(0, 0); + auto valpha = alpha(0, 0); + auto vbeta = beta(0, 0); // first sweep: count nnz for each row auto c_row_ptrs = c->get_row_ptrs(); @@ -472,14 +472,14 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_SPGEAM_KERNEL); template void spgeam_numeric(std::shared_ptr exec, - const matrix::Dense* alpha, + matrix::view::dense alpha, const matrix::Csr* a, - const matrix::Dense* beta, + matrix::view::dense beta, const matrix::Csr* b, matrix::Csr* c) { - auto valpha = alpha->at(0, 0); - auto vbeta = beta->at(0, 0); + auto valpha = alpha(0, 0); + auto vbeta = beta(0, 0); auto c_row_ptrs = c->get_const_row_ptrs(); auto c_col_idxs = c->get_const_col_idxs(); auto c_vals = c->get_values(); @@ -505,7 +505,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void fill_in_dense(std::shared_ptr exec, const matrix::Csr* source, - matrix::Dense* result) + matrix::view::dense result) { auto num_rows = source->get_size()[0]; auto num_cols = source->get_size()[1]; @@ -516,7 +516,7 @@ void fill_in_dense(std::shared_ptr exec, for (size_type row = 0; row < num_rows; ++row) { for (size_type i = row_ptrs[row]; i < static_cast(row_ptrs[row + 1]); ++i) { - result->at(row, col_idxs[i]) = vals[i]; + result(row, col_idxs[i]) = vals[i]; } } } @@ -748,16 +748,16 @@ template void calculate_nonzeros_per_row_in_span( std::shared_ptr exec, const matrix::Csr* source, const span& row_span, - const span& col_span, array* row_nnz) + const span& col_span, array& row_nnz) { size_type res_row = 0; for (auto row = row_span.begin; row < row_span.end; ++row) { - row_nnz->get_data()[res_row] = zero(); + row_nnz.get_data()[res_row] = zero(); for (auto nnz = source->get_const_row_ptrs()[row]; nnz < source->get_const_row_ptrs()[row + 1]; ++nnz) { if (source->get_const_col_idxs()[nnz] < col_span.end && source->get_const_col_idxs()[nnz] >= col_span.begin) { - row_nnz->get_data()[res_row]++; + row_nnz.get_data()[res_row]++; } } res_row++; @@ -1294,21 +1294,20 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void is_sorted_by_column_index( std::shared_ptr exec, - const matrix::Csr* to_check, bool* is_sorted) + const matrix::Csr* to_check, bool& is_sorted) { const auto row_ptrs = to_check->get_const_row_ptrs(); const auto col_idxs = to_check->get_const_col_idxs(); const auto size = to_check->get_size(); + is_sorted = true; for (size_type i = 0; i < size[0]; ++i) { for (auto idx = row_ptrs[i] + 1; idx < row_ptrs[i + 1]; ++idx) { if (col_idxs[idx - 1] > col_idxs[idx]) { - *is_sorted = false; + is_sorted = false; return; } } } - *is_sorted = true; - return; } GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( @@ -1341,14 +1340,14 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_EXTRACT_DIAGONAL); template void scale(std::shared_ptr exec, - const matrix::Dense* alpha, + matrix::view::dense alpha, matrix::Csr* to_scale) { const auto nnz = to_scale->get_num_stored_elements(); auto values = to_scale->get_values(); for (size_type idx = 0; idx < nnz; idx++) { - values[idx] *= alpha->at(0, 0); + values[idx] *= alpha(0, 0); } } @@ -1357,14 +1356,14 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_SCALE_KERNEL); template void inv_scale(std::shared_ptr exec, - const matrix::Dense* alpha, + matrix::view::dense alpha, matrix::Csr* to_scale) { const auto nnz = to_scale->get_num_stored_elements(); auto values = to_scale->get_values(); for (size_type idx = 0; idx < nnz; idx++) { - values[idx] /= alpha->at(0, 0); + values[idx] /= alpha(0, 0); } } @@ -1400,8 +1399,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void add_scaled_identity(std::shared_ptr exec, - const matrix::Dense* alpha, - const matrix::Dense* beta, + matrix::view::dense alpha, + matrix::view::dense beta, matrix::Csr* mtx) { const auto nrows = static_cast(mtx->get_size()[0]); @@ -1409,9 +1408,9 @@ void add_scaled_identity(std::shared_ptr exec, const auto vals = mtx->get_values(); for (IndexType row = 0; row < nrows; row++) { for (IndexType iz = row_ptrs[row]; iz < row_ptrs[row + 1]; iz++) { - vals[iz] *= beta->get_const_values()[0]; + vals[iz] *= beta.values[0]; if (row == mtx->get_const_col_idxs()[iz]) { - vals[iz] += alpha->get_const_values()[0]; + vals[iz] += alpha.values[0]; } } } diff --git a/reference/matrix/dense_kernels.cpp b/reference/matrix/dense_kernels.cpp index 7c36d9101d5..dc79d2b739c 100644 --- a/reference/matrix/dense_kernels.cpp +++ b/reference/matrix/dense_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -37,20 +37,20 @@ namespace dense { template void simple_apply(std::shared_ptr exec, - const matrix::Dense* a, - const matrix::Dense* b, - matrix::Dense* c) + matrix::view::dense a, + matrix::view::dense b, + matrix::view::dense c) { - for (size_type row = 0; row < c->get_size()[0]; ++row) { - for (size_type col = 0; col < c->get_size()[1]; ++col) { - c->at(row, col) = zero(); + for (size_type row = 0; row < c.size[0]; ++row) { + for (size_type col = 0; col < c.size[1]; ++col) { + c(row, col) = zero(); } } - for (size_type row = 0; row < c->get_size()[0]; ++row) { - for (size_type inner = 0; inner < a->get_size()[1]; ++inner) { - for (size_type col = 0; col < c->get_size()[1]; ++col) { - c->at(row, col) += a->at(row, inner) * b->at(inner, col); + for (size_type row = 0; row < c.size[0]; ++row) { + for (size_type inner = 0; inner < a.size[1]; ++inner) { + for (size_type col = 0; col < c.size[1]; ++col) { + c(row, col) += a(row, inner) * b(inner, col); } } } @@ -61,29 +61,30 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_SIMPLE_APPLY_KERNEL); template void apply(std::shared_ptr exec, - const matrix::Dense* alpha, - const matrix::Dense* a, const matrix::Dense* b, - const matrix::Dense* beta, matrix::Dense* c) + matrix::view::dense alpha, + matrix::view::dense a, + matrix::view::dense b, + matrix::view::dense beta, + matrix::view::dense c) { - if (is_nonzero(beta->at(0, 0))) { - for (size_type row = 0; row < c->get_size()[0]; ++row) { - for (size_type col = 0; col < c->get_size()[1]; ++col) { - c->at(row, col) *= beta->at(0, 0); + if (is_nonzero(beta(0, 0))) { + for (size_type row = 0; row < c.size[0]; ++row) { + for (size_type col = 0; col < c.size[1]; ++col) { + c(row, col) *= beta(0, 0); } } } else { - for (size_type row = 0; row < c->get_size()[0]; ++row) { - for (size_type col = 0; col < c->get_size()[1]; ++col) { - c->at(row, col) = zero(); + for (size_type row = 0; row < c.size[0]; ++row) { + for (size_type col = 0; col < c.size[1]; ++col) { + c(row, col) = zero(); } } } - for (size_type row = 0; row < c->get_size()[0]; ++row) { - for (size_type inner = 0; inner < a->get_size()[1]; ++inner) { - for (size_type col = 0; col < c->get_size()[1]; ++col) { - c->at(row, col) += - alpha->at(0, 0) * a->at(row, inner) * b->at(inner, col); + for (size_type row = 0; row < c.size[0]; ++row) { + for (size_type inner = 0; inner < a.size[1]; ++inner) { + for (size_type col = 0; col < c.size[1]; ++col) { + c(row, col) += alpha(0, 0) * a(row, inner) * b(inner, col); } } } @@ -94,13 +95,12 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_APPLY_KERNEL); template void copy(std::shared_ptr exec, - const matrix::Dense* input, - matrix::Dense* output) + matrix::view::dense input, + matrix::view::dense output) { - for (size_type row = 0; row < input->get_size()[0]; ++row) { - for (size_type col = 0; col < input->get_size()[1]; ++col) { - output->at(row, col) = - static_cast(input->at(row, col)); + for (size_type row = 0; row < input.size[0]; ++row) { + for (size_type col = 0; col < input.size[1]; ++col) { + output(row, col) = static_cast(input(row, col)); } } } @@ -111,11 +111,11 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_CONVERSION_OR_COPY( template void fill(std::shared_ptr exec, - matrix::Dense* mat, ValueType value) + matrix::view::dense mat, ValueType value) { - for (size_type row = 0; row < mat->get_size()[0]; ++row) { - for (size_type col = 0; col < mat->get_size()[1]; ++col) { - mat->at(row, col) = value; + for (size_type row = 0; row < mat.size[0]; ++row) { + for (size_type col = 0; col < mat.size[1]; ++col) { + mat(row, col) = value; } } } @@ -125,22 +125,23 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_FILL_KERNEL); template void scale(std::shared_ptr exec, - const matrix::Dense* alpha, matrix::Dense* x) + matrix::view::dense alpha, + matrix::view::dense x) { - if (alpha->get_size()[1] == 1) { - for (size_type i = 0; i < x->get_size()[0]; ++i) { - for (size_type j = 0; j < x->get_size()[1]; ++j) { - if (is_zero(alpha->at(0, 0))) { - x->at(i, j) = zero(); + if (alpha.size[1] == 1) { + for (size_type i = 0; i < x.size[0]; ++i) { + for (size_type j = 0; j < x.size[1]; ++j) { + if (is_zero(alpha(0, 0))) { + x(i, j) = zero(); } else { - x->at(i, j) *= alpha->at(0, 0); + x(i, j) *= alpha(0, 0); } } } } else { - for (size_type i = 0; i < x->get_size()[0]; ++i) { - for (size_type j = 0; j < x->get_size()[1]; ++j) { - x->at(i, j) *= alpha->at(0, j); + for (size_type i = 0; i < x.size[0]; ++i) { + for (size_type j = 0; j < x.size[1]; ++j) { + x(i, j) *= alpha(0, j); } } } @@ -151,19 +152,19 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_SCALAR_TYPE(GKO_DECLARE_DENSE_SCALE_KERNEL); template void inv_scale(std::shared_ptr exec, - const matrix::Dense* alpha, - matrix::Dense* x) + matrix::view::dense alpha, + matrix::view::dense x) { - if (alpha->get_size()[1] == 1) { - for (size_type i = 0; i < x->get_size()[0]; ++i) { - for (size_type j = 0; j < x->get_size()[1]; ++j) { - x->at(i, j) /= alpha->at(0, 0); + if (alpha.size[1] == 1) { + for (size_type i = 0; i < x.size[0]; ++i) { + for (size_type j = 0; j < x.size[1]; ++j) { + x(i, j) /= alpha(0, 0); } } } else { - for (size_type i = 0; i < x->get_size()[0]; ++i) { - for (size_type j = 0; j < x->get_size()[1]; ++j) { - x->at(i, j) /= alpha->at(0, j); + for (size_type i = 0; i < x.size[0]; ++i) { + for (size_type j = 0; j < x.size[1]; ++j) { + x(i, j) /= alpha(0, j); } } } @@ -175,21 +176,22 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_SCALAR_TYPE( template void add_scaled(std::shared_ptr exec, - const matrix::Dense* alpha, - const matrix::Dense* x, matrix::Dense* y) + matrix::view::dense alpha, + matrix::view::dense x, + matrix::view::dense y) { - if (alpha->get_size()[1] == 1) { - if (is_nonzero(alpha->at(0, 0))) { - for (size_type i = 0; i < x->get_size()[0]; ++i) { - for (size_type j = 0; j < x->get_size()[1]; ++j) { - y->at(i, j) += alpha->at(0, 0) * x->at(i, j); + if (alpha.size[1] == 1) { + if (is_nonzero(alpha(0, 0))) { + for (size_type i = 0; i < x.size[0]; ++i) { + for (size_type j = 0; j < x.size[1]; ++j) { + y(i, j) += alpha(0, 0) * x(i, j); } } } } else { - for (size_type i = 0; i < x->get_size()[0]; ++i) { - for (size_type j = 0; j < x->get_size()[1]; ++j) { - y->at(i, j) += alpha->at(0, j) * x->at(i, j); + for (size_type i = 0; i < x.size[0]; ++i) { + for (size_type j = 0; j < x.size[1]; ++j) { + y(i, j) += alpha(0, j) * x(i, j); } } } @@ -201,21 +203,22 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_SCALAR_TYPE( template void sub_scaled(std::shared_ptr exec, - const matrix::Dense* alpha, - const matrix::Dense* x, matrix::Dense* y) + matrix::view::dense alpha, + matrix::view::dense x, + matrix::view::dense y) { - if (alpha->get_size()[1] == 1) { - if (is_nonzero(alpha->at(0, 0))) { - for (size_type i = 0; i < x->get_size()[0]; ++i) { - for (size_type j = 0; j < x->get_size()[1]; ++j) { - y->at(i, j) -= alpha->at(0, 0) * x->at(i, j); + if (alpha.size[1] == 1) { + if (is_nonzero(alpha(0, 0))) { + for (size_type i = 0; i < x.size[0]; ++i) { + for (size_type j = 0; j < x.size[1]; ++j) { + y(i, j) -= alpha(0, 0) * x(i, j); } } } } else { - for (size_type i = 0; i < x->get_size()[0]; ++i) { - for (size_type j = 0; j < x->get_size()[1]; ++j) { - y->at(i, j) -= alpha->at(0, j) * x->at(i, j); + for (size_type i = 0; i < x.size[0]; ++i) { + for (size_type j = 0; j < x.size[1]; ++j) { + y(i, j) -= alpha(0, j) * x(i, j); } } } @@ -227,14 +230,14 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_SCALAR_TYPE( template void add_scaled_diag(std::shared_ptr exec, - const matrix::Dense* alpha, + matrix::view::dense alpha, const matrix::Diagonal* x, - matrix::Dense* y) + matrix::view::dense y) { const auto diag_values = x->get_const_values(); - if (is_nonzero(alpha->at(0, 0))) { + if (is_nonzero(alpha(0, 0))) { for (size_type i = 0; i < x->get_size()[0]; i++) { - y->at(i, i) += alpha->at(0, 0) * diag_values[i]; + y(i, i) += alpha(0, 0) * diag_values[i]; } } } @@ -244,14 +247,14 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_ADD_SCALED_DIAG_KERNEL); template void sub_scaled_diag(std::shared_ptr exec, - const matrix::Dense* alpha, + matrix::view::dense alpha, const matrix::Diagonal* x, - matrix::Dense* y) + matrix::view::dense y) { const auto diag_values = x->get_const_values(); - if (is_nonzero(alpha->at(0, 0))) { + if (is_nonzero(alpha(0, 0))) { for (size_type i = 0; i < x->get_size()[0]; i++) { - y->at(i, i) -= alpha->at(0, 0) * diag_values[i]; + y(i, i) -= alpha(0, 0) * diag_values[i]; } } } @@ -261,16 +264,16 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_SUB_SCALED_DIAG_KERNEL); template void compute_dot(std::shared_ptr exec, - const matrix::Dense* x, - const matrix::Dense* y, - matrix::Dense* result, array&) + matrix::view::dense x, + matrix::view::dense y, + matrix::view::dense result, array&) { - for (size_type j = 0; j < x->get_size()[1]; ++j) { - result->at(0, j) = zero(); + for (size_type j = 0; j < x.size[1]; ++j) { + result(0, j) = zero(); } - for (size_type i = 0; i < x->get_size()[0]; ++i) { - for (size_type j = 0; j < x->get_size()[1]; ++j) { - result->at(0, j) += x->at(i, j) * y->at(i, j); + for (size_type i = 0; i < x.size[0]; ++i) { + for (size_type j = 0; j < x.size[1]; ++j) { + result(0, j) += x(i, j) * y(i, j); } } } @@ -280,9 +283,10 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_COMPUTE_DOT_KERNEL); template void compute_dot_dispatch(std::shared_ptr exec, - const matrix::Dense* x, - const matrix::Dense* y, - matrix::Dense* result, array& tmp) + matrix::view::dense x, + matrix::view::dense y, + matrix::view::dense result, + array& tmp) { compute_dot(exec, x, y, result, tmp); } @@ -293,16 +297,16 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( template void compute_conj_dot(std::shared_ptr exec, - const matrix::Dense* x, - const matrix::Dense* y, - matrix::Dense* result, array&) + matrix::view::dense x, + matrix::view::dense y, + matrix::view::dense result, array&) { - for (size_type j = 0; j < x->get_size()[1]; ++j) { - result->at(0, j) = zero(); + for (size_type j = 0; j < x.size[1]; ++j) { + result(0, j) = zero(); } - for (size_type i = 0; i < x->get_size()[0]; ++i) { - for (size_type j = 0; j < x->get_size()[1]; ++j) { - result->at(0, j) += conj(x->at(i, j)) * y->at(i, j); + for (size_type i = 0; i < x.size[0]; ++i) { + for (size_type j = 0; j < x.size[1]; ++j) { + result(0, j) += conj(x(i, j)) * y(i, j); } } } @@ -312,9 +316,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_COMPUTE_CONJ_DOT_KERNEL); template void compute_conj_dot_dispatch(std::shared_ptr exec, - const matrix::Dense* x, - const matrix::Dense* y, - matrix::Dense* result, + matrix::view::dense x, + matrix::view::dense y, + matrix::view::dense result, array& tmp) { compute_conj_dot(exec, x, y, result, tmp); @@ -326,20 +330,20 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( template void compute_norm2(std::shared_ptr exec, - const matrix::Dense* x, - matrix::Dense>* result, + matrix::view::dense x, + matrix::view::dense> result, array&) { - for (size_type j = 0; j < x->get_size()[1]; ++j) { - result->at(0, j) = zero>(); + for (size_type j = 0; j < x.size[1]; ++j) { + result(0, j) = zero>(); } - for (size_type i = 0; i < x->get_size()[0]; ++i) { - for (size_type j = 0; j < x->get_size()[1]; ++j) { - result->at(0, j) += squared_norm(x->at(i, j)); + for (size_type i = 0; i < x.size[0]; ++i) { + for (size_type j = 0; j < x.size[1]; ++j) { + result(0, j) += squared_norm(x(i, j)); } } - for (size_type j = 0; j < x->get_size()[1]; ++j) { - result->at(0, j) = sqrt(result->at(0, j)); + for (size_type j = 0; j < x.size[1]; ++j) { + result(0, j) = sqrt(result(0, j)); } } @@ -347,10 +351,10 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_COMPUTE_NORM2_KERNEL); template -void compute_norm2_dispatch(std::shared_ptr exec, - const matrix::Dense* x, - matrix::Dense>* result, - array& tmp) +void compute_norm2_dispatch( + std::shared_ptr exec, + matrix::view::dense x, + matrix::view::dense> result, array& tmp) { compute_norm2(exec, x, result, tmp); } @@ -361,16 +365,16 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( template void compute_norm1(std::shared_ptr exec, - const matrix::Dense* x, - matrix::Dense>* result, + matrix::view::dense x, + matrix::view::dense> result, array&) { - for (size_type j = 0; j < x->get_size()[1]; ++j) { - result->at(0, j) = zero>(); + for (size_type j = 0; j < x.size[1]; ++j) { + result(0, j) = zero>(); } - for (size_type i = 0; i < x->get_size()[0]; ++i) { - for (size_type j = 0; j < x->get_size()[1]; ++j) { - result->at(0, j) += abs(x->at(i, j)); + for (size_type i = 0; i < x.size[0]; ++i) { + for (size_type j = 0; j < x.size[1]; ++j) { + result(0, j) += abs(x(i, j)); } } } @@ -380,21 +384,21 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_COMPUTE_NORM1_KERNEL); template void compute_mean(std::shared_ptr exec, - const matrix::Dense* x, - matrix::Dense* result, array&) + matrix::view::dense x, + matrix::view::dense result, array&) { using ValueType_nc = gko::remove_complex; - for (size_type j = 0; j < x->get_size()[1]; ++j) { - result->at(0, j) = zero(); + for (size_type j = 0; j < x.size[1]; ++j) { + result(0, j) = zero(); } - if (x->get_size()[0] == 0) return; + if (x.size[0] == 0) return; - for (size_type i = 0; i < x->get_size()[1]; ++i) { - for (size_type j = 0; j < x->get_size()[0]; ++j) { - result->at(0, i) += x->at(j, i); + for (size_type i = 0; i < x.size[1]; ++i) { + for (size_type j = 0; j < x.size[0]; ++j) { + result(0, i) += x(j, i); } - result->at(0, i) /= static_cast(x->get_size()[0]); + result(0, i) /= static_cast(x.size[0]); } } @@ -404,10 +408,10 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_COMPUTE_MEAN_KERNEL); template void fill_in_matrix_data(std::shared_ptr exec, const device_matrix_data& data, - matrix::Dense* output) + matrix::view::dense output) { for (size_type i = 0; i < data.get_num_stored_elements(); i++) { - output->at(data.get_const_row_idxs()[i], data.get_const_col_idxs()[i]) = + output(data.get_const_row_idxs()[i], data.get_const_col_idxs()[i]) = data.get_const_values()[i]; } } @@ -417,17 +421,17 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template -void compute_squared_norm2(std::shared_ptr exec, - const matrix::Dense* x, - matrix::Dense>* result, - array&) +void compute_squared_norm2( + std::shared_ptr exec, + matrix::view::dense x, + matrix::view::dense> result, array&) { - for (size_type j = 0; j < x->get_size()[1]; ++j) { - result->at(0, j) = zero>(); + for (size_type j = 0; j < x.size[1]; ++j) { + result(0, j) = zero>(); } - for (size_type i = 0; i < x->get_size()[0]; ++i) { - for (size_type j = 0; j < x->get_size()[1]; ++j) { - result->at(0, j) += squared_norm(x->at(i, j)); + for (size_type i = 0; i < x.size[0]; ++i) { + for (size_type j = 0; j < x.size[1]; ++j) { + result(0, j) += squared_norm(x(i, j)); } } } @@ -438,11 +442,11 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( template void compute_sqrt(std::shared_ptr exec, - matrix::Dense* data) + matrix::view::dense data) { - for (size_type i = 0; i < data->get_size()[0]; ++i) { - for (size_type j = 0; j < data->get_size()[1]; ++j) { - data->at(i, j) = sqrt(data->at(i, j)); + for (size_type i = 0; i < data.size[0]; ++i) { + for (size_type j = 0; j < data.size[1]; ++j) { + data(i, j) = sqrt(data(i, j)); } } } @@ -453,7 +457,7 @@ GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_TYPE( template void convert_to_coo(std::shared_ptr exec, - const matrix::Dense* source, const int64*, + matrix::view::dense source, const int64*, matrix::Coo* result) { auto num_rows = result->get_size()[0]; @@ -467,7 +471,7 @@ void convert_to_coo(std::shared_ptr exec, size_type idxs = 0; for (size_type row = 0; row < num_rows; ++row) { for (size_type col = 0; col < num_cols; ++col) { - auto val = source->at(row, col); + auto val = source(row, col); if (is_nonzero(val)) { row_idxs[idxs] = row; col_idxs[idxs] = col; @@ -484,7 +488,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_csr(std::shared_ptr exec, - const matrix::Dense* source, + matrix::view::dense source, matrix::Csr* result) { auto num_rows = result->get_size()[0]; @@ -499,7 +503,7 @@ void convert_to_csr(std::shared_ptr exec, row_ptrs[0] = cur_ptr; for (size_type row = 0; row < num_rows; ++row) { for (size_type col = 0; col < num_cols; ++col) { - auto val = source->at(row, col); + auto val = source(row, col); if (is_nonzero(val)) { col_idxs[cur_ptr] = col; values[cur_ptr] = val; @@ -516,7 +520,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_ell(std::shared_ptr exec, - const matrix::Dense* source, + matrix::view::dense source, matrix::Ell* result) { auto num_rows = result->get_size()[0]; @@ -532,7 +536,7 @@ void convert_to_ell(std::shared_ptr exec, for (size_type row = 0; row < num_rows; row++) { col_idx = 0; for (size_type col = 0; col < num_cols; col++) { - auto val = source->at(row, col); + auto val = source(row, col); if (is_nonzero(val)) { result->val_at(row, col_idx) = val; result->col_at(row, col_idx) = col; @@ -548,11 +552,11 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_fbcsr(std::shared_ptr exec, - const matrix::Dense* source, + matrix::view::dense source, matrix::Fbcsr* result) { - const auto num_rows = source->get_size()[0]; - const auto num_cols = source->get_size()[1]; + const auto num_rows = source.size[0]; + const auto num_cols = source.size[1]; const auto bs = result->get_block_size(); const auto nzbs = result->get_num_stored_blocks(); const auto num_block_rows = num_rows / bs; @@ -571,7 +575,7 @@ void convert_to_fbcsr(std::shared_ptr exec, for (int lcol = 0; lcol < bs; ++lcol) { const auto row = lrow + bs * brow; const auto col = lcol + bs * bcol; - block_nz = block_nz || is_nonzero(source->at(row, col)); + block_nz = block_nz || is_nonzero(source(row, col)); } } if (block_nz) { @@ -580,7 +584,7 @@ void convert_to_fbcsr(std::shared_ptr exec, for (int lcol = 0; lcol < bs; ++lcol) { const auto row = lrow + bs * brow; const auto col = lcol + bs * bcol; - blocks(block, lrow, lcol) = source->at(row, col); + blocks(block, lrow, lcol) = source(row, col); } } block++; @@ -595,7 +599,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_hybrid(std::shared_ptr exec, - const matrix::Dense* source, const int64*, + matrix::view::dense source, + const int64*, matrix::Hybrid* result) { auto num_rows = result->get_size()[0]; @@ -619,7 +624,7 @@ void convert_to_hybrid(std::shared_ptr exec, size_type col = 0; for (size_type col_idx = 0; col < num_cols && col_idx < ell_lim; col++) { - auto val = source->at(row, col); + auto val = source(row, col); if (is_nonzero(val)) { result->ell_val_at(row, col_idx) = val; result->ell_col_at(row, col_idx) = col; @@ -627,7 +632,7 @@ void convert_to_hybrid(std::shared_ptr exec, } } for (; col < num_cols; col++) { - auto val = source->at(row, col); + auto val = source(row, col); if (is_nonzero(val)) { coo_val[coo_idx] = val; coo_col[coo_idx] = col; @@ -644,7 +649,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_sellp(std::shared_ptr exec, - const matrix::Dense* source, + matrix::view::dense source, matrix::Sellp* result) { auto num_rows = result->get_size()[0]; @@ -660,7 +665,7 @@ void convert_to_sellp(std::shared_ptr exec, auto sellp_ind = slice_sets[slice] * slice_size + local_row; const auto sellp_end = slice_sets[slice + 1] * slice_size + local_row; for (size_type col = 0; col < num_cols; col++) { - auto val = source->at(row, col); + auto val = source(row, col); if (is_nonzero(val)) { col_idxs[sellp_ind] = col; vals[sellp_ind] = val; @@ -680,7 +685,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_sparsity_csr(std::shared_ptr exec, - const matrix::Dense* source, + matrix::view::dense source, matrix::SparsityCsr* result) { auto num_rows = result->get_size()[0]; @@ -694,7 +699,7 @@ void convert_to_sparsity_csr(std::shared_ptr exec, row_ptrs[0] = cur_ptr; for (size_type row = 0; row < num_rows; ++row) { for (size_type col = 0; col < num_cols; ++col) { - auto val = source->at(row, col); + auto val = source(row, col); if (is_nonzero(val)) { col_idxs[cur_ptr] = col; ++cur_ptr; @@ -710,16 +715,16 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void compute_max_nnz_per_row(std::shared_ptr exec, - const matrix::Dense* source, + matrix::view::dense source, size_type& result) { - auto num_rows = source->get_size()[0]; - auto num_cols = source->get_size()[1]; + auto num_rows = source.size[0]; + auto num_cols = source.size[1]; result = 0; for (size_type row = 0; row < num_rows; ++row) { size_type num_nonzeros = 0; for (size_type col = 0; col < num_cols; ++col) { - num_nonzeros += is_nonzero(source->at(row, col)); + num_nonzeros += is_nonzero(source(row, col)); } result = std::max(num_nonzeros, result); } @@ -731,12 +736,12 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( template void compute_slice_sets(std::shared_ptr exec, - const matrix::Dense* source, + matrix::view::dense source, size_type slice_size, size_type stride_factor, size_type* slice_sets, size_type* slice_lengths) { - const auto num_rows = source->get_size()[0]; - const auto num_cols = source->get_size()[1]; + const auto num_rows = source.size[0]; + const auto num_cols = source.size[1]; const auto num_slices = ceildiv(num_rows, slice_size); for (size_type slice = 0; slice < num_slices; slice++) { size_type slice_length = 0; @@ -745,7 +750,7 @@ void compute_slice_sets(std::shared_ptr exec, size_type row_nnz{}; if (row < num_rows) { for (size_type col = 0; col < num_cols; col++) { - row_nnz += is_nonzero(source->at(row, col)); + row_nnz += is_nonzero(source(row, col)); } } slice_length = std::max( @@ -763,15 +768,15 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( template void count_nonzeros_per_row(std::shared_ptr exec, - const matrix::Dense* source, + matrix::view::dense source, IndexType* result) { - auto num_rows = source->get_size()[0]; - auto num_cols = source->get_size()[1]; + auto num_rows = source.size[0]; + auto num_cols = source.size[1]; for (size_type row = 0; row < num_rows; ++row) { IndexType num_nonzeros{}; for (size_type col = 0; col < num_cols; ++col) { - num_nonzeros += is_nonzero(source->at(row, col)); + num_nonzeros += is_nonzero(source(row, col)); } result[row] = num_nonzeros; } @@ -785,11 +790,11 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( template void count_nonzero_blocks_per_row(std::shared_ptr exec, - const matrix::Dense* source, + matrix::view::dense source, int bs, IndexType* result) { - const auto num_rows = source->get_size()[0]; - const auto num_cols = source->get_size()[1]; + const auto num_rows = source.size[0]; + const auto num_cols = source.size[1]; const auto num_block_rows = num_rows / bs; const auto num_block_cols = num_cols / bs; for (size_type brow = 0; brow < num_block_rows; ++brow) { @@ -800,7 +805,7 @@ void count_nonzero_blocks_per_row(std::shared_ptr exec, for (int lcol = 0; lcol < bs; ++lcol) { const auto row = lrow + bs * brow; const auto col = lcol + bs * bcol; - block_nz = block_nz || is_nonzero(source->at(row, col)); + block_nz = block_nz || is_nonzero(source(row, col)); } } num_nonzero_blocks += block_nz ? 1 : 0; @@ -815,12 +820,12 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void transpose(std::shared_ptr exec, - const matrix::Dense* orig, - matrix::Dense* trans) + matrix::view::dense orig, + matrix::view::dense trans) { - for (size_type i = 0; i < orig->get_size()[0]; ++i) { - for (size_type j = 0; j < orig->get_size()[1]; ++j) { - trans->at(j, i) = orig->at(i, j); + for (size_type i = 0; i < orig.size[0]; ++i) { + for (size_type j = 0; j < orig.size[1]; ++j) { + trans(j, i) = orig(i, j); } } } @@ -830,12 +835,12 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_TRANSPOSE_KERNEL); template void conj_transpose(std::shared_ptr exec, - const matrix::Dense* orig, - matrix::Dense* trans) + matrix::view::dense orig, + matrix::view::dense trans) { - for (size_type i = 0; i < orig->get_size()[0]; ++i) { - for (size_type j = 0; j < orig->get_size()[1]; ++j) { - trans->at(j, i) = conj(orig->at(i, j)); + for (size_type i = 0; i < orig.size[0]; ++i) { + for (size_type j = 0; j < orig.size[1]; ++j) { + trans(j, i) = conj(orig(i, j)); } } } @@ -845,13 +850,14 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_CONJ_TRANSPOSE_KERNEL); template void symm_permute(std::shared_ptr exec, - const IndexType* perm, const matrix::Dense* orig, - matrix::Dense* permuted) + const IndexType* perm, + matrix::view::dense orig, + matrix::view::dense permuted) { - auto size = orig->get_size()[0]; + auto size = orig.size[0]; for (size_type i = 0; i < size; ++i) { for (size_type j = 0; j < size; ++j) { - permuted->at(i, j) = orig->at(perm[i], perm[j]); + permuted(i, j) = orig(perm[i], perm[j]); } } } @@ -863,13 +869,13 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void inv_symm_permute(std::shared_ptr exec, const IndexType* perm, - const matrix::Dense* orig, - matrix::Dense* permuted) + matrix::view::dense orig, + matrix::view::dense permuted) { - auto size = orig->get_size()[0]; + auto size = orig.size[0]; for (size_type i = 0; i < size; ++i) { for (size_type j = 0; j < size; ++j) { - permuted->at(perm[i], perm[j]) = orig->at(i, j); + permuted(perm[i], perm[j]) = orig(i, j); } } } @@ -881,12 +887,12 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void nonsymm_permute(std::shared_ptr exec, const IndexType* row_perm, const IndexType* col_perm, - const matrix::Dense* orig, - matrix::Dense* permuted) + matrix::view::dense orig, + matrix::view::dense permuted) { - for (size_type i = 0; i < orig->get_size()[0]; ++i) { - for (size_type j = 0; j < orig->get_size()[1]; ++j) { - permuted->at(i, j) = orig->at(row_perm[i], col_perm[j]); + for (size_type i = 0; i < orig.size[0]; ++i) { + for (size_type j = 0; j < orig.size[1]; ++j) { + permuted(i, j) = orig(row_perm[i], col_perm[j]); } } } @@ -898,12 +904,12 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void inv_nonsymm_permute(std::shared_ptr exec, const IndexType* row_perm, const IndexType* col_perm, - const matrix::Dense* orig, - matrix::Dense* permuted) + matrix::view::dense orig, + matrix::view::dense permuted) { - for (size_type i = 0; i < orig->get_size()[0]; ++i) { - for (size_type j = 0; j < orig->get_size()[1]; ++j) { - permuted->at(row_perm[i], col_perm[j]) = orig->at(i, j); + for (size_type i = 0; i < orig.size[0]; ++i) { + for (size_type j = 0; j < orig.size[1]; ++j) { + permuted(row_perm[i], col_perm[j]) = orig(i, j); } } } @@ -914,12 +920,13 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void row_gather(std::shared_ptr exec, - const IndexType* rows, const matrix::Dense* orig, - matrix::Dense* row_collection) + const IndexType* rows, + matrix::view::dense orig, + matrix::view::dense row_collection) { - for (size_type i = 0; i < row_collection->get_size()[0]; ++i) { - for (size_type j = 0; j < orig->get_size()[1]; ++j) { - row_collection->at(i, j) = orig->at(rows[i], j); + for (size_type i = 0; i < row_collection.size[0]; ++i) { + for (size_type j = 0; j < orig.size[1]; ++j) { + row_collection(i, j) = orig(rows[i], j); } } } @@ -930,21 +937,21 @@ GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_AND_INDEX_TYPE_2( template void advanced_row_gather(std::shared_ptr exec, - const matrix::Dense* alpha, + matrix::view::dense alpha, const IndexType* rows, - const matrix::Dense* orig, - const matrix::Dense* beta, - matrix::Dense* row_collection) + matrix::view::dense orig, + matrix::view::dense beta, + matrix::view::dense row_collection) { using type = highest_precision; - auto scalar_alpha = alpha->at(0, 0); - auto scalar_beta = beta->at(0, 0); - for (size_type i = 0; i < row_collection->get_size()[0]; ++i) { - for (size_type j = 0; j < orig->get_size()[1]; ++j) { - row_collection->at(i, j) = - static_cast(scalar_alpha * orig->at(rows[i], j)) + + auto scalar_alpha = alpha(0, 0); + auto scalar_beta = beta(0, 0); + for (size_type i = 0; i < row_collection.size[0]; ++i) { + for (size_type j = 0; j < orig.size[1]; ++j) { + row_collection(i, j) = + static_cast(scalar_alpha * orig(rows[i], j)) + static_cast(scalar_beta) * - static_cast(row_collection->at(i, j)); + static_cast(row_collection(i, j)); } } } @@ -955,12 +962,13 @@ GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_AND_INDEX_TYPE_2( template void col_permute(std::shared_ptr exec, - const IndexType* perm, const matrix::Dense* orig, - matrix::Dense* col_permuted) + const IndexType* perm, + matrix::view::dense orig, + matrix::view::dense col_permuted) { - for (size_type i = 0; i < orig->get_size()[0]; ++i) { - for (size_type j = 0; j < orig->get_size()[1]; ++j) { - col_permuted->at(i, j) = orig->at(i, perm[j]); + for (size_type i = 0; i < orig.size[0]; ++i) { + for (size_type j = 0; j < orig.size[1]; ++j) { + col_permuted(i, j) = orig(i, perm[j]); } } } @@ -972,12 +980,12 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void inv_row_permute(std::shared_ptr exec, const IndexType* perm, - const matrix::Dense* orig, - matrix::Dense* row_permuted) + matrix::view::dense orig, + matrix::view::dense row_permuted) { - for (size_type i = 0; i < orig->get_size()[0]; ++i) { - for (size_type j = 0; j < orig->get_size()[1]; ++j) { - row_permuted->at(perm[i], j) = orig->at(i, j); + for (size_type i = 0; i < orig.size[0]; ++i) { + for (size_type j = 0; j < orig.size[1]; ++j) { + row_permuted(perm[i], j) = orig(i, j); } } } @@ -989,12 +997,12 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void inv_col_permute(std::shared_ptr exec, const IndexType* perm, - const matrix::Dense* orig, - matrix::Dense* col_permuted) + matrix::view::dense orig, + matrix::view::dense col_permuted) { - for (size_type i = 0; i < orig->get_size()[0]; ++i) { - for (size_type j = 0; j < orig->get_size()[1]; ++j) { - col_permuted->at(i, perm[j]) = orig->at(i, j); + for (size_type i = 0; i < orig.size[0]; ++i) { + for (size_type j = 0; j < orig.size[1]; ++j) { + col_permuted(i, perm[j]) = orig(i, j); } } } @@ -1006,14 +1014,14 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void symm_scale_permute(std::shared_ptr exec, const ValueType* scale, const IndexType* perm, - const matrix::Dense* orig, - matrix::Dense* permuted) + matrix::view::dense orig, + matrix::view::dense permuted) { - for (size_type i = 0; i < orig->get_size()[0]; ++i) { - for (size_type j = 0; j < orig->get_size()[1]; ++j) { + for (size_type i = 0; i < orig.size[0]; ++i) { + for (size_type j = 0; j < orig.size[1]; ++j) { const auto row = perm[i]; const auto col = perm[j]; - permuted->at(i, j) = scale[row] * scale[col] * orig->at(row, col); + permuted(i, j) = scale[row] * scale[col] * orig(row, col); } } } @@ -1025,14 +1033,14 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void inv_symm_scale_permute(std::shared_ptr exec, const ValueType* scale, const IndexType* perm, - const matrix::Dense* orig, - matrix::Dense* permuted) + matrix::view::dense orig, + matrix::view::dense permuted) { - for (size_type i = 0; i < orig->get_size()[0]; ++i) { - for (size_type j = 0; j < orig->get_size()[1]; ++j) { + for (size_type i = 0; i < orig.size[0]; ++i) { + for (size_type j = 0; j < orig.size[1]; ++j) { const auto row = perm[i]; const auto col = perm[j]; - permuted->at(row, col) = orig->at(i, j) / (scale[row] * scale[col]); + permuted(row, col) = orig(i, j) / (scale[row] * scale[col]); } } } @@ -1047,15 +1055,14 @@ void nonsymm_scale_permute(std::shared_ptr exec, const IndexType* row_perm, const ValueType* col_scale, const IndexType* col_perm, - const matrix::Dense* orig, - matrix::Dense* permuted) + matrix::view::dense orig, + matrix::view::dense permuted) { - for (size_type i = 0; i < orig->get_size()[0]; ++i) { - for (size_type j = 0; j < orig->get_size()[1]; ++j) { + for (size_type i = 0; i < orig.size[0]; ++i) { + for (size_type j = 0; j < orig.size[1]; ++j) { const auto row = row_perm[i]; const auto col = col_perm[j]; - permuted->at(i, j) = - row_scale[row] * col_scale[col] * orig->at(row, col); + permuted(i, j) = row_scale[row] * col_scale[col] * orig(row, col); } } } @@ -1070,15 +1077,14 @@ void inv_nonsymm_scale_permute(std::shared_ptr exec, const IndexType* row_perm, const ValueType* col_scale, const IndexType* col_perm, - const matrix::Dense* orig, - matrix::Dense* permuted) + matrix::view::dense orig, + matrix::view::dense permuted) { - for (size_type i = 0; i < orig->get_size()[0]; ++i) { - for (size_type j = 0; j < orig->get_size()[1]; ++j) { + for (size_type i = 0; i < orig.size[0]; ++i) { + for (size_type j = 0; j < orig.size[1]; ++j) { const auto row = row_perm[i]; const auto col = col_perm[j]; - permuted->at(row, col) = - orig->at(i, j) / (row_scale[row] * col_scale[col]); + permuted(row, col) = orig(i, j) / (row_scale[row] * col_scale[col]); } } } @@ -1090,13 +1096,13 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void row_scale_permute(std::shared_ptr exec, const ValueType* scale, const IndexType* perm, - const matrix::Dense* orig, - matrix::Dense* permuted) + matrix::view::dense orig, + matrix::view::dense permuted) { - for (size_type i = 0; i < orig->get_size()[0]; ++i) { - for (size_type j = 0; j < orig->get_size()[1]; ++j) { + for (size_type i = 0; i < orig.size[0]; ++i) { + for (size_type j = 0; j < orig.size[1]; ++j) { const auto row = perm[i]; - permuted->at(i, j) = scale[row] * orig->at(row, j); + permuted(i, j) = scale[row] * orig(row, j); } } } @@ -1108,13 +1114,13 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void inv_row_scale_permute(std::shared_ptr exec, const ValueType* scale, const IndexType* perm, - const matrix::Dense* orig, - matrix::Dense* permuted) + matrix::view::dense orig, + matrix::view::dense permuted) { - for (size_type i = 0; i < orig->get_size()[0]; ++i) { - for (size_type j = 0; j < orig->get_size()[1]; ++j) { + for (size_type i = 0; i < orig.size[0]; ++i) { + for (size_type j = 0; j < orig.size[1]; ++j) { const auto row = perm[i]; - permuted->at(row, j) = orig->at(i, j) / scale[row]; + permuted(row, j) = orig(i, j) / scale[row]; } } } @@ -1126,13 +1132,13 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void col_scale_permute(std::shared_ptr exec, const ValueType* scale, const IndexType* perm, - const matrix::Dense* orig, - matrix::Dense* permuted) + matrix::view::dense orig, + matrix::view::dense permuted) { - for (size_type i = 0; i < orig->get_size()[0]; ++i) { - for (size_type j = 0; j < orig->get_size()[1]; ++j) { + for (size_type i = 0; i < orig.size[0]; ++i) { + for (size_type j = 0; j < orig.size[1]; ++j) { const auto col = perm[j]; - permuted->at(i, j) = scale[col] * orig->at(i, col); + permuted(i, j) = scale[col] * orig(i, col); } } } @@ -1144,13 +1150,13 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void inv_col_scale_permute(std::shared_ptr exec, const ValueType* scale, const IndexType* perm, - const matrix::Dense* orig, - matrix::Dense* permuted) + matrix::view::dense orig, + matrix::view::dense permuted) { - for (size_type i = 0; i < orig->get_size()[0]; ++i) { - for (size_type j = 0; j < orig->get_size()[1]; ++j) { + for (size_type i = 0; i < orig.size[0]; ++i) { + for (size_type j = 0; j < orig.size[1]; ++j) { const auto col = perm[j]; - permuted->at(i, col) = orig->at(i, j) / scale[col]; + permuted(i, col) = orig(i, j) / scale[col]; } } } @@ -1161,12 +1167,12 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void extract_diagonal(std::shared_ptr exec, - const matrix::Dense* orig, + matrix::view::dense orig, matrix::Diagonal* diag) { auto diag_values = diag->get_values(); for (size_type i = 0; i < diag->get_size()[0]; ++i) { - diag_values[i] = orig->at(i, i); + diag_values[i] = orig(i, i); } } @@ -1175,12 +1181,12 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_EXTRACT_DIAGONAL_KERNEL); template void inplace_absolute_dense(std::shared_ptr exec, - matrix::Dense* source) + matrix::view::dense source) { - auto dim = source->get_size(); + auto dim = source.size; for (size_type row = 0; row < dim[0]; row++) { for (size_type col = 0; col < dim[1]; col++) { - source->at(row, col) = abs(source->at(row, col)); + source(row, col) = abs(source(row, col)); } } } @@ -1189,14 +1195,15 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_INPLACE_ABSOLUTE_DENSE_KERNEL); template -void outplace_absolute_dense(std::shared_ptr exec, - const matrix::Dense* source, - matrix::Dense>* result) +void outplace_absolute_dense( + std::shared_ptr exec, + matrix::view::dense source, + matrix::view::dense> result) { - auto dim = source->get_size(); + auto dim = source.size; for (size_type row = 0; row < dim[0]; row++) { for (size_type col = 0; col < dim[1]; col++) { - result->at(row, col) = abs(source->at(row, col)); + result(row, col) = abs(source(row, col)); } } } @@ -1206,13 +1213,13 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_OUTPLACE_ABSOLUTE_DENSE_KERNEL); template void make_complex(std::shared_ptr exec, - const matrix::Dense* source, - matrix::Dense>* result) + matrix::view::dense source, + matrix::view::dense> result) { - auto dim = source->get_size(); + auto dim = source.size; for (size_type row = 0; row < dim[0]; row++) { for (size_type col = 0; col < dim[1]; col++) { - result->at(row, col) = to_complex{source->at(row, col)}; + result(row, col) = to_complex{source(row, col)}; } } } @@ -1222,13 +1229,13 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_MAKE_COMPLEX_KERNEL); template void get_real(std::shared_ptr exec, - const matrix::Dense* source, - matrix::Dense>* result) + matrix::view::dense source, + matrix::view::dense> result) { - auto dim = source->get_size(); + auto dim = source.size; for (size_type row = 0; row < dim[0]; row++) { for (size_type col = 0; col < dim[1]; col++) { - result->at(row, col) = real(source->at(row, col)); + result(row, col) = real(source(row, col)); } } } @@ -1238,13 +1245,13 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_GET_REAL_KERNEL); template void get_imag(std::shared_ptr exec, - const matrix::Dense* source, - matrix::Dense>* result) + matrix::view::dense source, + matrix::view::dense> result) { - auto dim = source->get_size(); + auto dim = source.size; for (size_type row = 0; row < dim[0]; row++) { for (size_type col = 0; col < dim[1]; col++) { - result->at(row, col) = imag(source->at(row, col)); + result(row, col) = imag(source(row, col)); } } } @@ -1254,16 +1261,16 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_GET_IMAG_KERNEL); template void add_scaled_identity(std::shared_ptr exec, - const matrix::Dense* alpha, - const matrix::Dense* beta, - matrix::Dense* mtx) + matrix::view::dense alpha, + matrix::view::dense beta, + matrix::view::dense mtx) { - const auto dim = mtx->get_size(); + const auto dim = mtx.size; for (size_type row = 0; row < dim[0]; row++) { for (size_type col = 0; col < dim[1]; col++) { - mtx->at(row, col) = beta->get_const_values()[0] * mtx->at(row, col); + mtx(row, col) = beta.values[0] * mtx(row, col); if (row == col) { - mtx->at(row, row) += alpha->get_const_values()[0]; + mtx(row, row) += alpha.values[0]; } } } diff --git a/reference/matrix/diagonal_kernels.cpp b/reference/matrix/diagonal_kernels.cpp index 028b7685c2b..aa016ae89bd 100644 --- a/reference/matrix/diagonal_kernels.cpp +++ b/reference/matrix/diagonal_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -22,15 +22,15 @@ namespace diagonal { template void apply_to_dense(std::shared_ptr exec, const matrix::Diagonal* a, - const matrix::Dense* b, - matrix::Dense* c, bool inverse) + matrix::view::dense b, + matrix::view::dense c, bool inverse) { const auto diag_values = a->get_const_values(); for (size_type row = 0; row < a->get_size()[0]; row++) { const auto scal = inverse ? one() / diag_values[row] : diag_values[row]; - for (size_type col = 0; col < b->get_size()[1]; col++) { - c->at(row, col) = b->at(row, col) * scal; + for (size_type col = 0; col < b.size[1]; col++) { + c(row, col) = b(row, col) * scal; } } } @@ -41,13 +41,13 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DIAGONAL_APPLY_TO_DENSE_KERNEL); template void right_apply_to_dense(std::shared_ptr exec, const matrix::Diagonal* a, - const matrix::Dense* b, - matrix::Dense* c) + matrix::view::dense b, + matrix::view::dense c) { const auto diag_values = a->get_const_values(); - for (size_type row = 0; row < b->get_size()[0]; row++) { + for (size_type row = 0; row < b.size[0]; row++) { for (size_type col = 0; col < a->get_size()[1]; col++) { - c->at(row, col) = b->at(row, col) * diag_values[col]; + c(row, col) = b(row, col) * diag_values[col]; } } } diff --git a/reference/matrix/ell_kernels.cpp b/reference/matrix/ell_kernels.cpp index a6ecd1f99df..b8788b7188e 100644 --- a/reference/matrix/ell_kernels.cpp +++ b/reference/matrix/ell_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -28,8 +28,8 @@ template void spmv(std::shared_ptr exec, const matrix::Ell* a, - const matrix::Dense* b, - matrix::Dense* c) + matrix::view::dense b, + matrix::view::dense c) { using arithmetic_type = highest_precision; @@ -46,14 +46,12 @@ void spmv(std::shared_ptr exec, static_cast(num_stored_elements_per_row * stride)}, a->get_const_values()); const auto b_vals = gko::acc::range( - std::array{ - {static_cast(b->get_size()[0]), - static_cast(b->get_size()[1])}}, - b->get_const_values(), - std::array{ - {static_cast(b->get_stride())}}); + std::array{{static_cast(b.size[0]), + static_cast(b.size[1])}}, + b.values, + std::array{{static_cast(b.stride)}}); - for (size_type j = 0; j < c->get_size()[1]; j++) { + for (size_type j = 0; j < c.size[1]; j++) { for (size_type row = 0; row < a->get_size()[0]; row++) { arithmetic_type result{}; for (size_type i = 0; i < num_stored_elements_per_row; i++) { @@ -63,7 +61,7 @@ void spmv(std::shared_ptr exec, result += val * b_vals(col, j); } } - c->at(row, j) = result; + c(row, j) = result; } } } @@ -75,11 +73,11 @@ GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_AND_INDEX_TYPE( template void advanced_spmv(std::shared_ptr exec, - const matrix::Dense* alpha, + matrix::view::dense alpha, const matrix::Ell* a, - const matrix::Dense* b, - const matrix::Dense* beta, - matrix::Dense* c) + matrix::view::dense b, + matrix::view::dense beta, + matrix::view::dense c) { using arithmetic_type = highest_precision; @@ -96,21 +94,19 @@ void advanced_spmv(std::shared_ptr exec, static_cast(num_stored_elements_per_row * stride)}, a->get_const_values()); const auto b_vals = gko::acc::range( - std::array{ - {static_cast(b->get_size()[0]), - static_cast(b->get_size()[1])}}, - b->get_const_values(), - std::array{ - {static_cast(b->get_stride())}}); - const auto alpha_val = arithmetic_type{alpha->at(0, 0)}; - const auto beta_val = arithmetic_type{beta->at(0, 0)}; - - for (size_type j = 0; j < c->get_size()[1]; j++) { + std::array{{static_cast(b.size[0]), + static_cast(b.size[1])}}, + b.values, + std::array{{static_cast(b.stride)}}); + const auto alpha_val = arithmetic_type{alpha(0, 0)}; + const auto beta_val = arithmetic_type{beta(0, 0)}; + + for (size_type j = 0; j < c.size[1]; j++) { for (size_type row = 0; row < a->get_size()[0]; row++) { arithmetic_type result = is_zero(beta_val) ? zero() - : beta_val * static_cast(c->at(row, j)); + : beta_val * static_cast(c(row, j)); for (size_type i = 0; i < num_stored_elements_per_row; i++) { arithmetic_type val = a_vals(row + i * stride); auto col = a->col_at(row, i); @@ -118,7 +114,7 @@ void advanced_spmv(std::shared_ptr exec, result += alpha_val * val * b_vals(col, j); } } - c->at(row, j) = result; + c(row, j) = result; } } } @@ -170,7 +166,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void fill_in_dense(std::shared_ptr exec, const matrix::Ell* source, - matrix::Dense* result) + matrix::view::dense result) { auto num_rows = source->get_size()[0]; auto num_cols = source->get_size()[1]; @@ -181,7 +177,7 @@ void fill_in_dense(std::shared_ptr exec, for (size_type i = 0; i < num_stored_elements_per_row; i++) { const auto col = source->col_at(row, i); if (col != invalid_index()) { - result->at(row, col) = source->val_at(row, i); + result(row, col) = source->val_at(row, i); } } } diff --git a/reference/matrix/fbcsr_kernels.cpp b/reference/matrix/fbcsr_kernels.cpp index 67aea37997a..f4006e041eb 100644 --- a/reference/matrix/fbcsr_kernels.cpp +++ b/reference/matrix/fbcsr_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -41,10 +41,11 @@ namespace fbcsr { template void spmv(const std::shared_ptr, const matrix::Fbcsr* a, - const matrix::Dense* b, matrix::Dense* c) + matrix::view::dense b, + matrix::view::dense c) { const int bs = a->get_block_size(); - const auto nvecs = static_cast(b->get_size()[1]); + const auto nvecs = static_cast(b.size[1]); const IndexType nbrows = a->get_num_block_rows(); const size_type nbnz = a->get_num_stored_blocks(); auto row_ptrs = a->get_const_row_ptrs(); @@ -55,7 +56,7 @@ void spmv(const std::shared_ptr, for (IndexType ibrow = 0; ibrow < nbrows; ++ibrow) { for (IndexType row = ibrow * bs; row < (ibrow + 1) * bs; ++row) { for (IndexType rhs = 0; rhs < nvecs; rhs++) { - c->at(row, rhs) = zero(); + c(row, rhs) = zero(); } } for (IndexType inz = row_ptrs[ibrow]; inz < row_ptrs[ibrow + 1]; @@ -66,7 +67,7 @@ void spmv(const std::shared_ptr, const auto val = avalues(inz, ib, jb); const auto col = col_idxs[inz] * bs + jb; for (size_type j = 0; j < nvecs; ++j) { - c->at(row, j) += val * b->at(col, j); + c(row, j) += val * b(col, j); } } } @@ -79,20 +80,20 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_FBCSR_SPMV_KERNEL); template void advanced_spmv(const std::shared_ptr, - const matrix::Dense* alpha, + matrix::view::dense alpha, const matrix::Fbcsr* a, - const matrix::Dense* b, - const matrix::Dense* beta, - matrix::Dense* c) + matrix::view::dense b, + matrix::view::dense beta, + matrix::view::dense c) { const int bs = a->get_block_size(); - const auto nvecs = static_cast(b->get_size()[1]); + const auto nvecs = static_cast(b.size[1]); const IndexType nbrows = a->get_num_block_rows(); const size_type nbnz = a->get_num_stored_blocks(); auto row_ptrs = a->get_const_row_ptrs(); auto col_idxs = a->get_const_col_idxs(); - auto valpha = alpha->at(0, 0); - auto vbeta = beta->at(0, 0); + auto valpha = alpha(0, 0); + auto vbeta = beta(0, 0); const acc::range> avalues{ to_std_array(nbnz, bs, bs), a->get_const_values()}; @@ -100,9 +101,9 @@ void advanced_spmv(const std::shared_ptr, for (IndexType row = ibrow * bs; row < (ibrow + 1) * bs; ++row) { for (IndexType rhs = 0; rhs < nvecs; rhs++) { if (is_zero(vbeta)) { - c->at(row, rhs) = zero(vbeta); + c(row, rhs) = zero(vbeta); } else { - c->at(row, rhs) *= vbeta; + c(row, rhs) *= vbeta; } } } @@ -115,7 +116,7 @@ void advanced_spmv(const std::shared_ptr, const auto val = avalues(inz, ib, jb); const auto col = col_idxs[inz] * bs + jb; for (size_type j = 0; j < nvecs; ++j) - c->at(row, j) += valpha * val * b->at(col, j); + c(row, j) += valpha * val * b(col, j); } } } @@ -187,7 +188,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void fill_in_dense(const std::shared_ptr, const matrix::Fbcsr* source, - matrix::Dense* result) + matrix::view::dense result) { const int bs = source->get_block_size(); const IndexType nbrows = source->get_num_block_rows(); @@ -208,7 +209,7 @@ void fill_in_dense(const std::shared_ptr, for (int ib = 0; ib < bs; ib++) { const IndexType row = brow * bs + ib; for (int jb = 0; jb < bs; jb++) { - result->at(row, col_idxs[ibnz] * bs + jb) = + result(row, col_idxs[ibnz] * bs + jb) = values(ibnz, ib, jb); } } @@ -377,22 +378,21 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void is_sorted_by_column_index( std::shared_ptr, - const matrix::Fbcsr* to_check, bool* is_sorted) + const matrix::Fbcsr* to_check, bool& is_sorted) { const auto row_ptrs = to_check->get_const_row_ptrs(); const auto col_idxs = to_check->get_const_col_idxs(); const size_type nbrows = to_check->get_num_block_rows(); + is_sorted = true; for (size_type i = 0; i < nbrows; ++i) { for (auto idx = row_ptrs[i] + 1; idx < row_ptrs[i + 1]; ++idx) { if (col_idxs[idx - 1] > col_idxs[idx]) { - *is_sorted = false; + is_sorted = false; return; } } } - *is_sorted = true; - return; } GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( diff --git a/reference/matrix/fft_kernels.cpp b/reference/matrix/fft_kernels.cpp index 2fc27cdbe3a..4d5204e341e 100644 --- a/reference/matrix/fft_kernels.cpp +++ b/reference/matrix/fft_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -23,36 +23,37 @@ namespace fft { template -void bfly(const matrix::Dense* b, matrix::Dense* x, - int64 lo, int64 hi, OutValueType root) +void bfly(matrix::view::dense b, + matrix::view::dense x, int64 lo, int64 hi, + OutValueType root) { - for (size_type rhs = 0; rhs < x->get_size()[1]; rhs++) { - auto lo_val = b->at(lo, rhs); - auto hi_val = b->at(hi, rhs); - x->at(lo, rhs) = lo_val + hi_val; - x->at(hi, rhs) = (lo_val - hi_val) * root; + for (size_type rhs = 0; rhs < x.size[1]; rhs++) { + auto lo_val = b(lo, rhs); + auto hi_val = b(hi, rhs); + x(lo, rhs) = lo_val + hi_val; + x(hi, rhs) = (lo_val - hi_val) * root; } } template -void bfly(matrix::Dense* x, int64 lo, int64 hi, ValueType root) +void bfly(matrix::view::dense x, int64 lo, int64 hi, ValueType root) { - for (size_type rhs = 0; rhs < x->get_size()[1]; rhs++) { - auto lo_val = x->at(lo, rhs); - auto hi_val = x->at(hi, rhs); - x->at(lo, rhs) = lo_val + hi_val; - x->at(hi, rhs) = (lo_val - hi_val) * root; + for (size_type rhs = 0; rhs < x.size[1]; rhs++) { + auto lo_val = x(lo, rhs); + auto hi_val = x(hi, rhs); + x(lo, rhs) = lo_val + hi_val; + x(hi, rhs) = (lo_val - hi_val) * root; } } template -void bit_rev_swap(matrix::Dense* x, int64 i, int64 rev_i) +void bit_rev_swap(matrix::view::dense x, int64 i, int64 rev_i) { - for (size_type rhs = 0; rhs < x->get_size()[1]; rhs++) { + for (size_type rhs = 0; rhs < x.size[1]; rhs++) { if (i < rev_i) { - std::swap(x->at(i, rhs), x->at(rev_i, rhs)); + std::swap(x(i, rhs), x(rev_i, rhs)); } } } @@ -82,15 +83,15 @@ vector build_unit_roots(std::shared_ptr exec, template void fft(std::shared_ptr exec, - const matrix::Dense>* b, - matrix::Dense>* x, bool inverse, + matrix::view::dense> b, + matrix::view::dense> x, bool inverse, array& buffer) { using complex_type = std::complex; using real_type = ValueType; const int64 sign = inverse ? 1 : -1; - const auto nrhs = b->get_size()[1]; - const auto size = static_cast(b->get_size()[0]); + const auto nrhs = b.size[1]; + const auto size = static_cast(b.size[0]); GKO_ASSERT_IS_POWER_OF_TWO(size); auto roots = build_unit_roots(exec, size, sign); // first butterfly step @@ -121,14 +122,14 @@ GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_TYPE_BASE(GKO_DECLARE_FFT_KERNEL); template void fft2(std::shared_ptr exec, - const matrix::Dense>* b, - matrix::Dense>* x, size_type size1, + matrix::view::dense> b, + matrix::view::dense> x, size_type size1, size_type size2, bool inverse, array& buffer) { using complex_type = std::complex; using real_type = ValueType; const int64 sign = inverse ? 1 : -1; - const auto nrhs = b->get_size()[1]; + const auto nrhs = b.size[1]; const auto ssize1 = static_cast(size1); const auto ssize2 = static_cast(size2); GKO_ASSERT_IS_POWER_OF_TWO(ssize1); @@ -188,14 +189,14 @@ GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_TYPE_BASE(GKO_DECLARE_FFT2_KERNEL); template void fft3(std::shared_ptr exec, - const matrix::Dense>* b, - matrix::Dense>* x, size_type size1, + matrix::view::dense> b, + matrix::view::dense> x, size_type size1, size_type size2, size_type size3, bool inverse, array& buffer) { using complex_type = std::complex; using real_type = ValueType; const int64 sign = inverse ? 1 : -1; - const auto nrhs = b->get_size()[1]; + const auto nrhs = b.size[1]; const auto ssize1 = static_cast(size1); const auto ssize2 = static_cast(size2); const auto ssize3 = static_cast(size3); diff --git a/reference/matrix/sellp_kernels.cpp b/reference/matrix/sellp_kernels.cpp index dbe0ea24315..9a1dc9b12d9 100644 --- a/reference/matrix/sellp_kernels.cpp +++ b/reference/matrix/sellp_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -26,7 +26,8 @@ namespace sellp { template void spmv(std::shared_ptr exec, const matrix::Sellp* a, - const matrix::Dense* b, matrix::Dense* c) + matrix::view::dense b, + matrix::view::dense c) { auto col_idxs = a->get_const_col_idxs(); auto slice_lengths = a->get_const_slice_lengths(); @@ -39,15 +40,15 @@ void spmv(std::shared_ptr exec, if (global_row >= a->get_size()[0]) { break; } - for (size_type j = 0; j < c->get_size()[1]; j++) { - c->at(global_row, j) = zero(); + for (size_type j = 0; j < c.size[1]; j++) { + c(global_row, j) = zero(); } for (size_type i = 0; i < slice_lengths[slice]; i++) { auto val = a->val_at(row, slice_sets[slice], i); auto col = a->col_at(row, slice_sets[slice], i); if (col != invalid_index()) { - for (size_type j = 0; j < c->get_size()[1]; j++) { - c->at(global_row, j) += val * b->at(col, j); + for (size_type j = 0; j < c.size[1]; j++) { + c(global_row, j) += val * b(col, j); } } } @@ -60,11 +61,11 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_SELLP_SPMV_KERNEL); template void advanced_spmv(std::shared_ptr exec, - const matrix::Dense* alpha, + matrix::view::dense alpha, const matrix::Sellp* a, - const matrix::Dense* b, - const matrix::Dense* beta, - matrix::Dense* c) + matrix::view::dense b, + matrix::view::dense beta, + matrix::view::dense c) { auto vals = a->get_const_values(); auto col_idxs = a->get_const_col_idxs(); @@ -72,27 +73,27 @@ void advanced_spmv(std::shared_ptr exec, auto slice_sets = a->get_const_slice_sets(); auto slice_size = a->get_slice_size(); auto slice_num = ceildiv(a->get_size()[0] + slice_size - 1, slice_size); - auto valpha = alpha->at(0, 0); - auto vbeta = beta->at(0, 0); + auto valpha = alpha(0, 0); + auto vbeta = beta(0, 0); for (size_type slice = 0; slice < slice_num; slice++) { for (size_type row = 0; row < slice_size; row++) { size_type global_row = slice * slice_size + row; if (global_row >= a->get_size()[0]) { break; } - for (size_type j = 0; j < c->get_size()[1]; j++) { + for (size_type j = 0; j < c.size[1]; j++) { if (is_nonzero(vbeta)) { - c->at(global_row, j) *= vbeta; + c(global_row, j) *= vbeta; } else { - c->at(global_row, j) = zero(); + c(global_row, j) = zero(); } } for (size_type i = 0; i < slice_lengths[slice]; i++) { auto val = a->val_at(row, slice_sets[slice], i); auto col = a->col_at(row, slice_sets[slice], i); if (col != invalid_index()) { - for (size_type j = 0; j < c->get_size()[1]; j++) { - c->at(global_row, j) += valpha * val * b->at(col, j); + for (size_type j = 0; j < c.size[1]; j++) { + c(global_row, j) += valpha * val * b(col, j); } } } @@ -174,7 +175,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void fill_in_dense(std::shared_ptr exec, const matrix::Sellp* source, - matrix::Dense* result) + matrix::view::dense result) { auto num_rows = source->get_size()[0]; auto num_cols = source->get_size()[1]; @@ -195,7 +196,7 @@ void fill_in_dense(std::shared_ptr exec, i++) { const auto col = col_idxs[row + i * slice_size]; if (col != invalid_index()) { - result->at(global_row, col) = vals[row + i * slice_size]; + result(global_row, col) = vals[row + i * slice_size]; } } } diff --git a/reference/matrix/sparsity_csr_kernels.cpp b/reference/matrix/sparsity_csr_kernels.cpp index 0a06871b411..b09583f6ec1 100644 --- a/reference/matrix/sparsity_csr_kernels.cpp +++ b/reference/matrix/sparsity_csr_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -33,8 +33,8 @@ template void spmv(std::shared_ptr exec, const matrix::SparsityCsr* a, - const matrix::Dense* b, - matrix::Dense* c) + matrix::view::dense b, + matrix::view::dense c) { using arithmetic_type = highest_precision; @@ -43,14 +43,14 @@ void spmv(std::shared_ptr exec, const auto val = static_cast(a->get_const_value()[0]); for (size_type row = 0; row < a->get_size()[0]; ++row) { - for (size_type j = 0; j < c->get_size()[1]; ++j) { + for (size_type j = 0; j < c.size[1]; ++j) { auto temp_val = gko::zero(); for (size_type k = row_ptrs[row]; k < static_cast(row_ptrs[row + 1]); ++k) { temp_val += - val * static_cast(b->at(col_idxs[k], j)); + val * static_cast(b(col_idxs[k], j)); } - c->at(row, j) = static_cast(temp_val); + c(row, j) = static_cast(temp_val); } } } @@ -62,33 +62,33 @@ GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_AND_INDEX_TYPE( template void advanced_spmv(std::shared_ptr exec, - const matrix::Dense* alpha, + matrix::view::dense alpha, const matrix::SparsityCsr* a, - const matrix::Dense* b, - const matrix::Dense* beta, - matrix::Dense* c) + matrix::view::dense b, + matrix::view::dense beta, + matrix::view::dense c) { using arithmetic_type = highest_precision; auto row_ptrs = a->get_const_row_ptrs(); auto col_idxs = a->get_const_col_idxs(); - const auto valpha = static_cast(alpha->at(0, 0)); - const auto vbeta = static_cast(beta->at(0, 0)); + const auto valpha = static_cast(alpha(0, 0)); + const auto vbeta = static_cast(beta(0, 0)); const auto val = static_cast(a->get_const_value()[0]); for (size_type row = 0; row < a->get_size()[0]; ++row) { - for (size_type j = 0; j < c->get_size()[1]; ++j) { + for (size_type j = 0; j < c.size[1]; ++j) { auto temp_val = gko::zero(); for (size_type k = row_ptrs[row]; k < static_cast(row_ptrs[row + 1]); ++k) { temp_val += - val * static_cast(b->at(col_idxs[k], j)); + val * static_cast(b(col_idxs[k], j)); } - c->at(row, j) = static_cast( + c(row, j) = static_cast( (is_zero(vbeta) ? zero(vbeta) - : vbeta * static_cast(c->at(row, j))) + + : vbeta * static_cast(c(row, j))) + valpha * temp_val); } } @@ -101,7 +101,7 @@ GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_AND_INDEX_TYPE( template void fill_in_dense(std::shared_ptr exec, const matrix::SparsityCsr* input, - matrix::Dense* output) + matrix::view::dense output) { auto row_ptrs = input->get_const_row_ptrs(); auto col_idxs = input->get_const_col_idxs(); @@ -110,7 +110,7 @@ void fill_in_dense(std::shared_ptr exec, for (size_type row = 0; row < input->get_size()[0]; ++row) { for (auto k = row_ptrs[row]; k < row_ptrs[row + 1]; ++k) { auto col = col_idxs[k]; - output->at(row, col) = val; + output(row, col) = val; } } } @@ -254,21 +254,20 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void is_sorted_by_column_index( std::shared_ptr exec, - const matrix::SparsityCsr* to_check, bool* is_sorted) + const matrix::SparsityCsr* to_check, bool& is_sorted) { const auto row_ptrs = to_check->get_const_row_ptrs(); const auto col_idxs = to_check->get_const_col_idxs(); const auto size = to_check->get_size(); + is_sorted = true; for (size_type i = 0; i < size[0]; ++i) { for (auto idx = row_ptrs[i] + 1; idx < row_ptrs[i + 1]; ++idx) { if (col_idxs[idx - 1] > col_idxs[idx]) { - *is_sorted = false; + is_sorted = false; return; } } } - *is_sorted = true; - return; } GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( diff --git a/reference/preconditioner/isai_kernels.cpp b/reference/preconditioner/isai_kernels.cpp index 55f56b5705e..4c80a70f1af 100644 --- a/reference/preconditioner/isai_kernels.cpp +++ b/reference/preconditioner/isai_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -325,7 +325,7 @@ void generate_excess_system(std::shared_ptr, const IndexType* excess_rhs_ptrs, const IndexType* excess_nz_ptrs, matrix::Csr* excess_system, - matrix::Dense* excess_rhs, + matrix::view::dense excess_rhs, size_type e_start, size_type e_end) { const auto num_rows = input->get_size()[0]; @@ -334,11 +334,11 @@ void generate_excess_system(std::shared_ptr, const auto m_vals = input->get_const_values(); const auto i_row_ptrs = inverse->get_const_row_ptrs(); const auto i_cols = inverse->get_const_col_idxs(); - const auto e_dim = excess_rhs->get_size()[0]; + const auto e_dim = excess_rhs.size[0]; auto e_row_ptrs = excess_system->get_row_ptrs(); auto e_cols = excess_system->get_col_idxs(); auto e_vals = excess_system->get_values(); - auto e_rhs = excess_rhs->get_values(); + auto e_rhs = excess_rhs.values; for (size_type row = e_start; row < e_end; ++row) { const auto i_begin = i_row_ptrs[row]; @@ -384,10 +384,10 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void scale_excess_solution(std::shared_ptr, const IndexType* excess_block_ptrs, - matrix::Dense* excess_solution, + matrix::view::dense excess_solution, size_type e_start, size_type e_end) { - auto excess_values = excess_solution->get_values(); + auto excess_values = excess_solution.values; IndexType block_start = 0; IndexType block_end = 0; auto offset = excess_block_ptrs[e_start]; @@ -410,13 +410,13 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template -void scatter_excess_solution(std::shared_ptr, - const IndexType* excess_block_ptrs, - const matrix::Dense* excess_solution, - matrix::Csr* inverse, - size_type e_start, size_type e_end) +void scatter_excess_solution( + std::shared_ptr, const IndexType* excess_block_ptrs, + matrix::view::dense excess_solution, + matrix::Csr* inverse, size_type e_start, + size_type e_end) { - auto excess_values = excess_solution->get_const_values(); + auto excess_values = excess_solution.values; auto values = inverse->get_values(); auto row_ptrs = inverse->get_const_row_ptrs(); auto offset = excess_block_ptrs[e_start]; diff --git a/reference/preconditioner/jacobi_kernels.cpp b/reference/preconditioner/jacobi_kernels.cpp index 66fb208ad9a..fdbcb121cf5 100644 --- a/reference/preconditioner/jacobi_kernels.cpp +++ b/reference/preconditioner/jacobi_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -470,27 +470,27 @@ void apply(std::shared_ptr exec, size_type num_blocks, const array& block_precisions, const array& block_pointers, const array& blocks, - const matrix::Dense* alpha, - const matrix::Dense* b, - const matrix::Dense* beta, matrix::Dense* x) + matrix::view::dense alpha, + matrix::view::dense b, + matrix::view::dense beta, + matrix::view::dense x) { const auto ptrs = block_pointers.get_const_data(); const auto prec = block_precisions.get_const_data(); for (size_type i = 0; i < num_blocks; ++i) { const auto group = blocks.get_const_data() + storage_scheme.get_group_offset(i); - const auto block_b = b->get_const_values() + b->get_stride() * ptrs[i]; - const auto block_x = x->get_values() + x->get_stride() * ptrs[i]; + const auto block_b = b.values + b.stride * ptrs[i]; + const auto block_x = x.values + x.stride * ptrs[i]; const auto block_size = ptrs[i + 1] - ptrs[i]; const auto p = prec ? prec[i] : precision_reduction(); GKO_PRECONDITIONER_JACOBI_RESOLVE_PRECISION( ValueType, p, - apply_block(block_size, b->get_size()[1], + apply_block(block_size, b.size[1], reinterpret_cast(group) + storage_scheme.get_block_offset(i), - storage_scheme.get_stride(), alpha->at(0, 0), block_b, - b->get_stride(), beta->at(0, 0), block_x, - x->get_stride())); + storage_scheme.get_stride(), alpha(0, 0), block_b, + b.stride, beta(0, 0), block_x, x.stride)); } } @@ -505,25 +505,24 @@ void simple_apply( storage_scheme, const array& block_precisions, const array& block_pointers, const array& blocks, - const matrix::Dense* b, matrix::Dense* x) + matrix::view::dense b, matrix::view::dense x) { const auto ptrs = block_pointers.get_const_data(); const auto prec = block_precisions.get_const_data(); for (size_type i = 0; i < num_blocks; ++i) { const auto group = blocks.get_const_data() + storage_scheme.get_group_offset(i); - const auto block_b = b->get_const_values() + b->get_stride() * ptrs[i]; - const auto block_x = x->get_values() + x->get_stride() * ptrs[i]; + const auto block_b = b.values + b.stride * ptrs[i]; + const auto block_x = x.values + x.stride * ptrs[i]; const auto block_size = ptrs[i + 1] - ptrs[i]; const auto p = prec ? prec[i] : precision_reduction(); GKO_PRECONDITIONER_JACOBI_RESOLVE_PRECISION( ValueType, p, - apply_block(block_size, b->get_size()[1], + apply_block(block_size, b.size[1], reinterpret_cast(group) + storage_scheme.get_block_offset(i), storage_scheme.get_stride(), one(), block_b, - b->get_stride(), zero(), block_x, - x->get_stride())); + b.stride, zero(), block_x, x.stride)); } } @@ -534,15 +533,15 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void scalar_apply(std::shared_ptr exec, const array& diag, - const matrix::Dense* alpha, - const matrix::Dense* b, - const matrix::Dense* beta, - matrix::Dense* x) + matrix::view::dense alpha, + matrix::view::dense b, + matrix::view::dense beta, + matrix::view::dense x) { - for (size_type i = 0; i < x->get_size()[0]; ++i) { - for (size_type j = 0; j < x->get_size()[1]; ++j) { - x->at(i, j) = beta->at(0) * x->at(i, j) + - alpha->at(0) * b->at(i, j) * diag.get_const_data()[i]; + for (size_type i = 0; i < x.size[0]; ++i) { + for (size_type j = 0; j < x.size[1]; ++j) { + x(i, j) = beta(0, 0) * x(i, j) + + alpha(0, 0) * b(i, j) * diag.get_const_data()[i]; } } } @@ -553,12 +552,12 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_JACOBI_SCALAR_APPLY_KERNEL); template void simple_scalar_apply(std::shared_ptr exec, const array& diag, - const matrix::Dense* b, - matrix::Dense* x) + matrix::view::dense b, + matrix::view::dense x) { - for (size_type i = 0; i < x->get_size()[0]; ++i) { - for (size_type j = 0; j < x->get_size()[1]; ++j) { - x->at(i, j) = b->at(i, j) * diag.get_const_data()[i]; + for (size_type i = 0; i < x.size[0]; ++i) { + for (size_type j = 0; j < x.size[1]; ++j) { + x(i, j) = b(i, j) * diag.get_const_data()[i]; } } } @@ -669,14 +668,14 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void scalar_convert_to_dense(std::shared_ptr exec, const array& blocks, - matrix::Dense* result) + matrix::view::dense result) { - auto matrix_size = result->get_size(); + auto matrix_size = result.size; for (size_type i = 0; i < matrix_size[0]; ++i) { for (size_type j = 0; j < matrix_size[1]; ++j) { - result->at(i, j) = zero(); + result(i, j) = zero(); if (i == j) { - result->at(i, j) = blocks.get_const_data()[i]; + result(i, j) = blocks.get_const_data()[i]; } } } diff --git a/reference/solver/bicg_kernels.cpp b/reference/solver/bicg_kernels.cpp index dee2d30b8dc..4af07ab00d2 100644 --- a/reference/solver/bicg_kernels.cpp +++ b/reference/solver/bicg_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -22,26 +22,26 @@ namespace bicg { template -void initialize(std::shared_ptr exec, - const matrix::Dense* b, matrix::Dense* r, - matrix::Dense* z, matrix::Dense* p, - matrix::Dense* q, matrix::Dense* prev_rho, - matrix::Dense* rho, matrix::Dense* r2, - matrix::Dense* z2, matrix::Dense* p2, - matrix::Dense* q2, - array* stop_status) +void initialize( + std::shared_ptr exec, + matrix::view::dense b, matrix::view::dense r, + matrix::view::dense z, matrix::view::dense p, + matrix::view::dense q, matrix::view::dense prev_rho, + matrix::view::dense rho, matrix::view::dense r2, + matrix::view::dense z2, matrix::view::dense p2, + matrix::view::dense q2, array& stop_status) { - for (size_type j = 0; j < b->get_size()[1]; ++j) { - rho->at(j) = zero(); - prev_rho->at(j) = one(); - stop_status->get_data()[j].reset(); + for (size_type j = 0; j < b.size[1]; ++j) { + rho(0, j) = zero(); + prev_rho(0, j) = one(); + stop_status.get_data()[j].reset(); } - for (size_type i = 0; i < b->get_size()[0]; ++i) { - for (size_type j = 0; j < b->get_size()[1]; ++j) { - r->at(i, j) = b->at(i, j); - r2->at(i, j) = b->at(i, j); - z->at(i, j) = p->at(i, j) = q->at(i, j) = zero(); - z2->at(i, j) = p2->at(i, j) = q2->at(i, j) = zero(); + for (size_type i = 0; i < b.size[0]; ++i) { + for (size_type j = 0; j < b.size[1]; ++j) { + r(i, j) = b(i, j); + r2(i, j) = b(i, j); + z(i, j) = p(i, j) = q(i, j) = zero(); + z2(i, j) = p2(i, j) = q2(i, j) = zero(); } } } @@ -51,24 +51,26 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BICG_INITIALIZE_KERNEL); template void step_1(std::shared_ptr exec, - matrix::Dense* p, const matrix::Dense* z, - matrix::Dense* p2, const matrix::Dense* z2, - const matrix::Dense* rho, - const matrix::Dense* prev_rho, - const array* stop_status) + matrix::view::dense p, + matrix::view::dense z, + matrix::view::dense p2, + matrix::view::dense z2, + matrix::view::dense rho, + matrix::view::dense prev_rho, + const array& stop_status) { - for (size_type i = 0; i < p->get_size()[0]; ++i) { - for (size_type j = 0; j < p->get_size()[1]; ++j) { - if (stop_status->get_const_data()[j].has_stopped()) { + for (size_type i = 0; i < p.size[0]; ++i) { + for (size_type j = 0; j < p.size[1]; ++j) { + if (stop_status.get_const_data()[j].has_stopped()) { continue; } - if (is_zero(prev_rho->at(j))) { - p->at(i, j) = z->at(i, j); - p2->at(i, j) = z2->at(i, j); + if (is_zero(prev_rho(0, j))) { + p(i, j) = z(i, j); + p2(i, j) = z2(i, j); } else { - auto tmp = rho->at(j) / prev_rho->at(j); - p->at(i, j) = z->at(i, j) + tmp * p->at(i, j); - p2->at(i, j) = z2->at(i, j) + tmp * p2->at(i, j); + auto tmp = rho(0, j) / prev_rho(0, j); + p(i, j) = z(i, j) + tmp * p(i, j); + p2(i, j) = z2(i, j) + tmp * p2(i, j); } } } @@ -79,24 +81,25 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BICG_STEP_1_KERNEL); template void step_2(std::shared_ptr exec, - matrix::Dense* x, matrix::Dense* r, - matrix::Dense* r2, const matrix::Dense* p, - const matrix::Dense* q, - const matrix::Dense* q2, - const matrix::Dense* beta, - const matrix::Dense* rho, - const array* stop_status) + matrix::view::dense x, matrix::view::dense r, + matrix::view::dense r2, + matrix::view::dense p, + matrix::view::dense q, + matrix::view::dense q2, + matrix::view::dense beta, + matrix::view::dense rho, + const array& stop_status) { - for (size_type i = 0; i < x->get_size()[0]; ++i) { - for (size_type j = 0; j < x->get_size()[1]; ++j) { - if (stop_status->get_const_data()[j].has_stopped()) { + for (size_type i = 0; i < x.size[0]; ++i) { + for (size_type j = 0; j < x.size[1]; ++j) { + if (stop_status.get_const_data()[j].has_stopped()) { continue; } - if (is_nonzero(beta->at(j))) { - auto tmp = rho->at(j) / beta->at(j); - x->at(i, j) += tmp * p->at(i, j); - r->at(i, j) -= tmp * q->at(i, j); - r2->at(i, j) -= tmp * q2->at(i, j); + if (is_nonzero(beta(0, j))) { + auto tmp = rho(0, j) / beta(0, j); + x(i, j) += tmp * p(i, j); + r(i, j) -= tmp * q(i, j); + r2(i, j) -= tmp * q2(i, j); } } } diff --git a/reference/solver/bicgstab_kernels.cpp b/reference/solver/bicgstab_kernels.cpp index 31955a59c53..0eb29cae81b 100644 --- a/reference/solver/bicgstab_kernels.cpp +++ b/reference/solver/bicgstab_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -23,36 +23,36 @@ namespace bicgstab { template -void initialize(std::shared_ptr exec, - const matrix::Dense* b, matrix::Dense* r, - matrix::Dense* rr, matrix::Dense* y, - matrix::Dense* s, matrix::Dense* t, - matrix::Dense* z, matrix::Dense* v, - matrix::Dense* p, matrix::Dense* prev_rho, - matrix::Dense* rho, matrix::Dense* alpha, - matrix::Dense* beta, matrix::Dense* gamma, - matrix::Dense* omega, - array* stop_status) +void initialize( + std::shared_ptr exec, + matrix::view::dense b, matrix::view::dense r, + matrix::view::dense rr, matrix::view::dense y, + matrix::view::dense s, matrix::view::dense t, + matrix::view::dense z, matrix::view::dense v, + matrix::view::dense p, matrix::view::dense prev_rho, + matrix::view::dense rho, matrix::view::dense alpha, + matrix::view::dense beta, matrix::view::dense gamma, + matrix::view::dense omega, array& stop_status) { - for (size_type j = 0; j < b->get_size()[1]; ++j) { - rho->at(j) = one(); - prev_rho->at(j) = one(); - alpha->at(j) = one(); - beta->at(j) = one(); - gamma->at(j) = one(); - omega->at(j) = one(); - stop_status->get_data()[j].reset(); + for (size_type j = 0; j < b.size[1]; ++j) { + rho(0, j) = one(); + prev_rho(0, j) = one(); + alpha(0, j) = one(); + beta(0, j) = one(); + gamma(0, j) = one(); + omega(0, j) = one(); + stop_status.get_data()[j].reset(); } - for (size_type i = 0; i < b->get_size()[0]; ++i) { - for (size_type j = 0; j < b->get_size()[1]; ++j) { - r->at(i, j) = b->at(i, j); - rr->at(i, j) = zero(); - z->at(i, j) = zero(); - v->at(i, j) = zero(); - s->at(i, j) = zero(); - t->at(i, j) = zero(); - y->at(i, j) = zero(); - p->at(i, j) = zero(); + for (size_type i = 0; i < b.size[0]; ++i) { + for (size_type j = 0; j < b.size[1]; ++j) { + r(i, j) = b(i, j); + rr(i, j) = zero(); + z(i, j) = zero(); + v(i, j) = zero(); + s(i, j) = zero(); + t(i, j) = zero(); + y(i, j) = zero(); + p(i, j) = zero(); } } } @@ -62,26 +62,26 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BICGSTAB_INITIALIZE_KERNEL); template void step_1(std::shared_ptr exec, - const matrix::Dense* r, matrix::Dense* p, - const matrix::Dense* v, - const matrix::Dense* rho, - const matrix::Dense* prev_rho, - const matrix::Dense* alpha, - const matrix::Dense* omega, - const array* stop_status) + matrix::view::dense r, + matrix::view::dense p, + matrix::view::dense v, + matrix::view::dense rho, + matrix::view::dense prev_rho, + matrix::view::dense alpha, + matrix::view::dense omega, + const array& stop_status) { - for (size_type i = 0; i < p->get_size()[0]; ++i) { - for (size_type j = 0; j < p->get_size()[1]; ++j) { - if (stop_status->get_const_data()[j].has_stopped()) { + for (size_type i = 0; i < p.size[0]; ++i) { + for (size_type j = 0; j < p.size[1]; ++j) { + if (stop_status.get_const_data()[j].has_stopped()) { continue; } - if (is_nonzero(prev_rho->at(j) * omega->at(j))) { + if (is_nonzero(prev_rho(0, j) * omega(0, j))) { const auto tmp = - rho->at(j) / prev_rho->at(j) * alpha->at(j) / omega->at(j); - p->at(i, j) = r->at(i, j) + - tmp * (p->at(i, j) - omega->at(j) * v->at(i, j)); + rho(0, j) / prev_rho(0, j) * alpha(0, j) / omega(0, j); + p(i, j) = r(i, j) + tmp * (p(i, j) - omega(0, j) * v(i, j)); } else { - p->at(i, j) = r->at(i, j); + p(i, j) = r(i, j); } } } @@ -92,24 +92,25 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BICGSTAB_STEP_1_KERNEL); template void step_2(std::shared_ptr exec, - const matrix::Dense* r, matrix::Dense* s, - const matrix::Dense* v, - const matrix::Dense* rho, - matrix::Dense* alpha, - const matrix::Dense* beta, - const array* stop_status) + matrix::view::dense r, + matrix::view::dense s, + matrix::view::dense v, + matrix::view::dense rho, + matrix::view::dense alpha, + matrix::view::dense beta, + const array& stop_status) { - for (size_type i = 0; i < s->get_size()[0]; ++i) { - for (size_type j = 0; j < s->get_size()[1]; ++j) { - if (stop_status->get_const_data()[j].has_stopped()) { + for (size_type i = 0; i < s.size[0]; ++i) { + for (size_type j = 0; j < s.size[1]; ++j) { + if (stop_status.get_const_data()[j].has_stopped()) { continue; } - if (is_nonzero(beta->at(j))) { - alpha->at(j) = rho->at(j) / beta->at(j); - s->at(i, j) = r->at(i, j) - alpha->at(j) * v->at(i, j); + if (is_nonzero(beta(0, j))) { + alpha(0, j) = rho(0, j) / beta(0, j); + s(i, j) = r(i, j) - alpha(0, j) * v(i, j); } else { - alpha->at(j) = zero(); - s->at(i, j) = r->at(i, j); + alpha(0, j) = zero(); + s(i, j) = r(i, j); } } } @@ -119,32 +120,35 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BICGSTAB_STEP_2_KERNEL); template -void step_3( - std::shared_ptr exec, matrix::Dense* x, - matrix::Dense* r, const matrix::Dense* s, - const matrix::Dense* t, const matrix::Dense* y, - const matrix::Dense* z, const matrix::Dense* alpha, - const matrix::Dense* beta, const matrix::Dense* gamma, - matrix::Dense* omega, const array* stop_status) +void step_3(std::shared_ptr exec, + matrix::view::dense x, matrix::view::dense r, + matrix::view::dense s, + matrix::view::dense t, + matrix::view::dense y, + matrix::view::dense z, + matrix::view::dense alpha, + matrix::view::dense beta, + matrix::view::dense gamma, + matrix::view::dense omega, + const array& stop_status) { - for (size_type j = 0; j < x->get_size()[1]; ++j) { - if (stop_status->get_const_data()[j].has_stopped()) { + for (size_type j = 0; j < x.size[1]; ++j) { + if (stop_status.get_const_data()[j].has_stopped()) { continue; } - if (is_nonzero(beta->at(j))) { - omega->at(j) = gamma->at(j) / beta->at(j); + if (is_nonzero(beta(0, j))) { + omega(0, j) = gamma(0, j) / beta(0, j); } else { - omega->at(j) = zero(); + omega(0, j) = zero(); } } - for (size_type i = 0; i < x->get_size()[0]; ++i) { - for (size_type j = 0; j < x->get_size()[1]; ++j) { - if (stop_status->get_const_data()[j].has_stopped()) { + for (size_type i = 0; i < x.size[0]; ++i) { + for (size_type j = 0; j < x.size[1]; ++j) { + if (stop_status.get_const_data()[j].has_stopped()) { continue; } - x->at(i, j) += - alpha->at(j) * y->at(i, j) + omega->at(j) * z->at(i, j); - r->at(i, j) = s->at(i, j) - omega->at(j) * t->at(i, j); + x(i, j) += alpha(0, j) * y(i, j) + omega(0, j) * z(i, j); + r(i, j) = s(i, j) - omega(0, j) * t(i, j); } } } @@ -154,16 +158,17 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BICGSTAB_STEP_3_KERNEL); template void finalize(std::shared_ptr exec, - matrix::Dense* x, const matrix::Dense* y, - const matrix::Dense* alpha, - array* stop_status) + matrix::view::dense x, + matrix::view::dense y, + matrix::view::dense alpha, + array& stop_status) { - for (size_type j = 0; j < x->get_size()[1]; ++j) { - if (stop_status->get_const_data()[j].has_stopped() && - !stop_status->get_const_data()[j].is_finalized()) { - for (size_type i = 0; i < x->get_size()[0]; ++i) { - x->at(i, j) += alpha->at(j) * y->at(i, j); - stop_status->get_data()[j].finalize(); + for (size_type j = 0; j < x.size[1]; ++j) { + if (stop_status.get_const_data()[j].has_stopped() && + !stop_status.get_const_data()[j].is_finalized()) { + for (size_type i = 0; i < x.size[0]; ++i) { + x(i, j) += alpha(0, j) * y(i, j); + stop_status.get_data()[j].finalize(); } } } diff --git a/reference/solver/cb_gmres_kernels.cpp b/reference/solver/cb_gmres_kernels.cpp index c5acb41e45b..2cc9847d6cb 100644 --- a/reference/solver/cb_gmres_kernels.cpp +++ b/reference/solver/cb_gmres_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -28,12 +28,12 @@ namespace { template -void finish_arnoldi_CGS(matrix::Dense* next_krylov_basis, - Accessor3d krylov_bases, - matrix::Dense* hessenberg_iter, - matrix::Dense* buffer_iter, - matrix::Dense>* arnoldi_norm, - size_type iter, const stopping_status* stop_status) +void finish_arnoldi_CGS( + matrix::view::dense next_krylov_basis, Accessor3d krylov_bases, + matrix::view::dense hessenberg_iter, + matrix::view::dense buffer_iter, + matrix::view::dense> arnoldi_norm, size_type iter, + const stopping_status* stop_status) { static_assert( std::is_same* next_krylov_basis, using rc_vtype = remove_complex; const rc_vtype eta = 1.0 / sqrt(2.0); - for (size_type i = 0; i < next_krylov_basis->get_size()[1]; ++i) { - arnoldi_norm->at(0, i) = zero(); - for (size_type j = 0; j < next_krylov_basis->get_size()[0]; ++j) { - arnoldi_norm->at(0, i) += squared_norm(next_krylov_basis->at(j, i)); + for (size_type i = 0; i < next_krylov_basis.size[1]; ++i) { + arnoldi_norm(0, i) = zero(); + for (size_type j = 0; j < next_krylov_basis.size[0]; ++j) { + arnoldi_norm(0, i) += squared_norm(next_krylov_basis(j, i)); } - arnoldi_norm->at(0, i) = eta * sqrt(arnoldi_norm->at(0, i)); - // arnoldi_norm->at(0, i) = norm(next_krylov_basis) + arnoldi_norm(0, i) = eta * sqrt(arnoldi_norm(0, i)); + // arnoldi_norm(0, i) = norm(next_krylov_basis) if (stop_status[i].has_stopped()) { continue; } for (size_type k = 0; k < iter + 1; ++k) { - hessenberg_iter->at(k, i) = zero(); - for (size_type j = 0; j < next_krylov_basis->get_size()[0]; ++j) { - hessenberg_iter->at(k, i) += - next_krylov_basis->at(j, i) * conj(krylov_bases(k, j, i)); + hessenberg_iter(k, i) = zero(); + for (size_type j = 0; j < next_krylov_basis.size[0]; ++j) { + hessenberg_iter(k, i) += + next_krylov_basis(j, i) * conj(krylov_bases(k, j, i)); } } // for i in 1:iter // hessenberg(iter, i) = next_krylov_basis' * krylov_bases(:, i) // end for (size_type k = 0; k < iter + 1; ++k) { - for (size_type j = 0; j < next_krylov_basis->get_size()[0]; ++j) { - next_krylov_basis->at(j, i) -= - hessenberg_iter->at(k, i) * krylov_bases(k, j, i); + for (size_type j = 0; j < next_krylov_basis.size[0]; ++j) { + next_krylov_basis(j, i) -= + hessenberg_iter(k, i) * krylov_bases(k, j, i); } } // for i in 1:iter // next_krylov_basis -= hessenberg(iter, i) * krylov_bases(:, i) // end - arnoldi_norm->at(1, i) = zero(); + arnoldi_norm(1, i) = zero(); if (has_scalar) { - arnoldi_norm->at(2, i) = zero(); + arnoldi_norm(2, i) = zero(); } - for (size_type j = 0; j < next_krylov_basis->get_size()[0]; ++j) { - arnoldi_norm->at(1, i) += squared_norm(next_krylov_basis->at(j, i)); + for (size_type j = 0; j < next_krylov_basis.size[0]; ++j) { + arnoldi_norm(1, i) += squared_norm(next_krylov_basis(j, i)); if (has_scalar) { - arnoldi_norm->at(2, i) = - (arnoldi_norm->at(2, i) >= abs(next_krylov_basis->at(j, i))) - ? arnoldi_norm->at(2, i) - : abs(next_krylov_basis->at(j, i)); + arnoldi_norm(2, i) = + (arnoldi_norm(2, i) >= abs(next_krylov_basis(j, i))) + ? arnoldi_norm(2, i) + : abs(next_krylov_basis(j, i)); } } - arnoldi_norm->at(1, i) = sqrt(arnoldi_norm->at(1, i)); + arnoldi_norm(1, i) = sqrt(arnoldi_norm(1, i)); for (size_type l = 1; - (arnoldi_norm->at(1, i)) < (arnoldi_norm->at(0, i)) && l < 3; - l++) { - arnoldi_norm->at(0, i) = eta * arnoldi_norm->at(1, i); + (arnoldi_norm(1, i)) < (arnoldi_norm(0, i)) && l < 3; l++) { + arnoldi_norm(0, i) = eta * arnoldi_norm(1, i); for (size_type k = 0; k < iter + 1; ++k) { - buffer_iter->at(k, i) = zero(); - for (size_type j = 0; j < next_krylov_basis->get_size()[0]; - ++j) { - buffer_iter->at(k, i) += next_krylov_basis->at(j, i) * - conj(krylov_bases(k, j, i)); + buffer_iter(k, i) = zero(); + for (size_type j = 0; j < next_krylov_basis.size[0]; ++j) { + buffer_iter(k, i) += + next_krylov_basis(j, i) * conj(krylov_bases(k, j, i)); } } // for i in 1:iter // buffer(iter, i) = next_krylov_basis' * krylov_bases(:, i) // end for (size_type k = 0; k < iter + 1; ++k) { - for (size_type j = 0; j < next_krylov_basis->get_size()[0]; - ++j) { - next_krylov_basis->at(j, i) -= - buffer_iter->at(k, i) * conj(krylov_bases(k, j, i)); + for (size_type j = 0; j < next_krylov_basis.size[0]; ++j) { + next_krylov_basis(j, i) -= + buffer_iter(k, i) * conj(krylov_bases(k, j, i)); } - hessenberg_iter->at(k, i) += buffer_iter->at(k, i); + hessenberg_iter(k, i) += buffer_iter(k, i); } // for i in 1:iter // next_krylov_basis -= buffer(iter, i) * krylov_bases(:, i) // hessenberg(iter, i) += buffer(iter, i) // end - arnoldi_norm->at(1, i) = zero(); - arnoldi_norm->at(2, i) = zero(); - for (size_type j = 0; j < next_krylov_basis->get_size()[0]; ++j) { - arnoldi_norm->at(1, i) += - squared_norm(next_krylov_basis->at(j, i)); - arnoldi_norm->at(2, i) = - (arnoldi_norm->at(2, i) >= abs(next_krylov_basis->at(j, i))) - ? arnoldi_norm->at(2, i) - : abs(next_krylov_basis->at(j, i)); + arnoldi_norm(1, i) = zero(); + arnoldi_norm(2, i) = zero(); + for (size_type j = 0; j < next_krylov_basis.size[0]; ++j) { + arnoldi_norm(1, i) += squared_norm(next_krylov_basis(j, i)); + arnoldi_norm(2, i) = + (arnoldi_norm(2, i) >= abs(next_krylov_basis(j, i))) + ? arnoldi_norm(2, i) + : abs(next_krylov_basis(j, i)); } - arnoldi_norm->at(1, i) = sqrt(arnoldi_norm->at(1, i)); + arnoldi_norm(1, i) = sqrt(arnoldi_norm(1, i)); // nrmN = norm(next_krylov_basis) } gko::cb_gmres::helper_functions_accessor::write_scalar( - krylov_bases, iter + 1, i, - arnoldi_norm->at(2, i) / arnoldi_norm->at(1, i)); - hessenberg_iter->at(iter + 1, i) = arnoldi_norm->at(1, i); + krylov_bases, iter + 1, i, arnoldi_norm(2, i) / arnoldi_norm(1, i)); + hessenberg_iter(iter + 1, i) = arnoldi_norm(1, i); // hessenberg(iter, iter + 1) = norm(next_krylov_basis) - for (size_type j = 0; j < next_krylov_basis->get_size()[0]; ++j) { - next_krylov_basis->at(j, i) /= hessenberg_iter->at(iter + 1, i); - krylov_bases(iter + 1, j, i) = next_krylov_basis->at(j, i); + for (size_type j = 0; j < next_krylov_basis.size[0]; ++j) { + next_krylov_basis(j, i) /= hessenberg_iter(iter + 1, i); + krylov_bases(iter + 1, j, i) = next_krylov_basis(j, i); } // next_krylov_basis /= hessenberg(iter, iter + 1) // krylov_bases(:, iter + 1) = next_krylov_basis @@ -145,44 +140,44 @@ void finish_arnoldi_CGS(matrix::Dense* next_krylov_basis, template -void calculate_sin_and_cos(matrix::Dense* givens_sin, - matrix::Dense* givens_cos, - matrix::Dense* hessenberg_iter, +void calculate_sin_and_cos(matrix::view::dense givens_sin, + matrix::view::dense givens_cos, + matrix::view::dense hessenberg_iter, size_type iter, const size_type rhs) { - if (is_zero(hessenberg_iter->at(iter, rhs))) { - givens_cos->at(iter, rhs) = zero(); - givens_sin->at(iter, rhs) = one(); + if (is_zero(hessenberg_iter(iter, rhs))) { + givens_cos(iter, rhs) = zero(); + givens_sin(iter, rhs) = one(); } else { - auto this_hess = hessenberg_iter->at(iter, rhs); - auto next_hess = hessenberg_iter->at(iter + 1, rhs); + auto this_hess = hessenberg_iter(iter, rhs); + auto next_hess = hessenberg_iter(iter + 1, rhs); const auto scale = abs(this_hess) + abs(next_hess); const auto hypotenuse = scale * sqrt(abs(this_hess / scale) * abs(this_hess / scale) + abs(next_hess / scale) * abs(next_hess / scale)); - givens_cos->at(iter, rhs) = conj(this_hess) / hypotenuse; - givens_sin->at(iter, rhs) = conj(next_hess) / hypotenuse; + givens_cos(iter, rhs) = conj(this_hess) / hypotenuse; + givens_sin(iter, rhs) = conj(next_hess) / hypotenuse; } } template -void givens_rotation(matrix::Dense* givens_sin, - matrix::Dense* givens_cos, - matrix::Dense* hessenberg_iter, size_type iter, - const stopping_status* stop_status) +void givens_rotation(matrix::view::dense givens_sin, + matrix::view::dense givens_cos, + matrix::view::dense hessenberg_iter, + size_type iter, const stopping_status* stop_status) { - for (size_type i = 0; i < hessenberg_iter->get_size()[1]; ++i) { + for (size_type i = 0; i < hessenberg_iter.size[1]; ++i) { if (stop_status[i].has_stopped()) { continue; } for (size_type j = 0; j < iter; ++j) { - auto temp = givens_cos->at(j, i) * hessenberg_iter->at(j, i) + - givens_sin->at(j, i) * hessenberg_iter->at(j + 1, i); - hessenberg_iter->at(j + 1, i) = - -conj(givens_sin->at(j, i)) * hessenberg_iter->at(j, i) + - conj(givens_cos->at(j, i)) * hessenberg_iter->at(j + 1, i); - hessenberg_iter->at(j, i) = temp; + auto temp = givens_cos(j, i) * hessenberg_iter(j, i) + + givens_sin(j, i) * hessenberg_iter(j + 1, i); + hessenberg_iter(j + 1, i) = + -conj(givens_sin(j, i)) * hessenberg_iter(j, i) + + conj(givens_cos(j, i)) * hessenberg_iter(j + 1, i); + hessenberg_iter(j, i) = temp; // temp = cos(j)*hessenberg(j) + // sin(j)*hessenberg(j+1) // hessenberg(j+1) = -conj(sin(j))*hessenberg(j) + @@ -192,10 +187,10 @@ void givens_rotation(matrix::Dense* givens_sin, calculate_sin_and_cos(givens_sin, givens_cos, hessenberg_iter, iter, i); - hessenberg_iter->at(iter, i) = - givens_cos->at(iter, i) * hessenberg_iter->at(iter, i) + - givens_sin->at(iter, i) * hessenberg_iter->at(iter + 1, i); - hessenberg_iter->at(iter + 1, i) = zero(); + hessenberg_iter(iter, i) = + givens_cos(iter, i) * hessenberg_iter(iter, i) + + givens_sin(iter, i) * hessenberg_iter(iter + 1, i); + hessenberg_iter(iter + 1, i) = zero(); // hessenberg(iter) = cos(iter)*hessenberg(iter) + // sin(iter)*hessenberg(iter + 1) // hessenberg(iter+1) = 0 @@ -205,44 +200,41 @@ void givens_rotation(matrix::Dense* givens_sin, template void calculate_next_residual_norm( - matrix::Dense* givens_sin, matrix::Dense* givens_cos, - matrix::Dense>* residual_norm, - matrix::Dense* residual_norm_collection, size_type iter, + matrix::view::dense givens_sin, + matrix::view::dense givens_cos, + matrix::view::dense> residual_norm, + matrix::view::dense residual_norm_collection, size_type iter, const stopping_status* stop_status) { - for (size_type i = 0; i < residual_norm->get_size()[1]; ++i) { + for (size_type i = 0; i < residual_norm.size[1]; ++i) { if (stop_status[i].has_stopped()) { continue; } - residual_norm_collection->at(iter + 1, i) = - -conj(givens_sin->at(iter, i)) * - residual_norm_collection->at(iter, i); - residual_norm_collection->at(iter, i) = - givens_cos->at(iter, i) * residual_norm_collection->at(iter, i); - residual_norm->at(0, i) = - abs(residual_norm_collection->at(iter + 1, i)); + residual_norm_collection(iter + 1, i) = + -conj(givens_sin(iter, i)) * residual_norm_collection(iter, i); + residual_norm_collection(iter, i) = + givens_cos(iter, i) * residual_norm_collection(iter, i); + residual_norm(0, i) = abs(residual_norm_collection(iter + 1, i)); } } template void solve_upper_triangular( - const matrix::Dense* residual_norm_collection, - const matrix::Dense* hessenberg, matrix::Dense* y, - const size_type* final_iter_nums) + matrix::view::dense residual_norm_collection, + matrix::view::dense hessenberg, + matrix::view::dense y, const size_type* final_iter_nums) { - for (size_type k = 0; k < residual_norm_collection->get_size()[1]; ++k) { + for (size_type k = 0; k < residual_norm_collection.size[1]; ++k) { for (int64 i = final_iter_nums[k] - 1; i >= 0; --i) { - auto temp = residual_norm_collection->at(i, k); + auto temp = residual_norm_collection(i, k); for (size_type j = i + 1; j < final_iter_nums[k]; ++j) { temp -= - hessenberg->at( - i, j * residual_norm_collection->get_size()[1] + k) * - y->at(j, k); + hessenberg(i, j * residual_norm_collection.size[1] + k) * + y(j, k); } - y->at(i, k) = - temp / hessenberg->at( - i, i * residual_norm_collection->get_size()[1] + k); + y(i, k) = + temp / hessenberg(i, i * residual_norm_collection.size[1] + k); } } } @@ -250,8 +242,8 @@ void solve_upper_triangular( template void calculate_qy(ConstAccessor3d krylov_bases, - const matrix::Dense* y, - matrix::Dense* before_preconditioner, + matrix::view::dense y, + matrix::view::dense before_preconditioner, const size_type* final_iter_nums) { static_assert( @@ -259,12 +251,11 @@ void calculate_qy(ConstAccessor3d krylov_bases, ValueType, typename ConstAccessor3d::accessor::arithmetic_type>::value, "ValueType must match arithmetic_type of accessor!"); - for (size_type k = 0; k < before_preconditioner->get_size()[1]; ++k) { - for (size_type i = 0; i < before_preconditioner->get_size()[0]; ++i) { - before_preconditioner->at(i, k) = zero(); + for (size_type k = 0; k < before_preconditioner.size[1]; ++k) { + for (size_type i = 0; i < before_preconditioner.size[0]; ++i) { + before_preconditioner(i, k) = zero(); for (size_type j = 0; j < final_iter_nums[k]; ++j) { - before_preconditioner->at(i, k) += - krylov_bases(j, i, k) * y->at(j, k); + before_preconditioner(i, k) += krylov_bases(j, i, k) * y(j, k); } } } @@ -276,21 +267,21 @@ void calculate_qy(ConstAccessor3d krylov_bases, template void initialize(std::shared_ptr exec, - const matrix::Dense* b, - matrix::Dense* residual, - matrix::Dense* givens_sin, - matrix::Dense* givens_cos, - array* stop_status, size_type krylov_dim) + matrix::view::dense b, + matrix::view::dense residual, + matrix::view::dense givens_sin, + matrix::view::dense givens_cos, + array& stop_status, size_type krylov_dim) { - for (size_type j = 0; j < b->get_size()[1]; ++j) { - for (size_type i = 0; i < b->get_size()[0]; ++i) { - residual->at(i, j) = b->at(i, j); + for (size_type j = 0; j < b.size[1]; ++j) { + for (size_type i = 0; i < b.size[0]; ++i) { + residual(i, j) = b(i, j); } for (size_type i = 0; i < krylov_dim; ++i) { - givens_sin->at(i, j) = zero(); - givens_cos->at(i, j) = zero(); + givens_sin(i, j) = zero(); + givens_cos(i, j) = zero(); } - stop_status->get_data()[j].reset(); + stop_status.get_data()[j].reset(); } } @@ -300,13 +291,13 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE_BASE( template void restart(std::shared_ptr exec, - const matrix::Dense* residual, - matrix::Dense>* residual_norm, - matrix::Dense* residual_norm_collection, - matrix::Dense>* arnoldi_norm, + matrix::view::dense residual, + matrix::view::dense> residual_norm, + matrix::view::dense residual_norm_collection, + matrix::view::dense> arnoldi_norm, Accessor3d krylov_bases, - matrix::Dense* next_krylov_basis, - array* final_iter_nums, array&, + matrix::view::dense next_krylov_basis, + array& final_iter_nums, array&, size_type krylov_dim) { static_assert( @@ -317,47 +308,43 @@ void restart(std::shared_ptr exec, constexpr bool has_scalar = gko::cb_gmres::detail::has_3d_scaled_accessor::value; - for (size_type j = 0; j < residual->get_size()[1]; ++j) { + for (size_type j = 0; j < residual.size[1]; ++j) { // Calculate residual norm - residual_norm->at(0, j) = zero(); + residual_norm(0, j) = zero(); if (has_scalar) { - arnoldi_norm->at(2, j) = zero(); + arnoldi_norm(2, j) = zero(); } - for (size_type i = 0; i < residual->get_size()[0]; ++i) { - residual_norm->at(0, j) += squared_norm(residual->at(i, j)); + for (size_type i = 0; i < residual.size[0]; ++i) { + residual_norm(0, j) += squared_norm(residual(i, j)); if (has_scalar) { - arnoldi_norm->at(2, j) = - (arnoldi_norm->at(2, j) >= abs(residual->at(i, j))) - ? arnoldi_norm->at(2, j) - : abs(residual->at(i, j)); + arnoldi_norm(2, j) = (arnoldi_norm(2, j) >= abs(residual(i, j))) + ? arnoldi_norm(2, j) + : abs(residual(i, j)); } } - residual_norm->at(0, j) = sqrt(residual_norm->at(0, j)); + residual_norm(0, j) = sqrt(residual_norm(0, j)); gko::cb_gmres::helper_functions_accessor::write_scalar( - krylov_bases, {0}, j, - arnoldi_norm->at(2, j) / residual_norm->at(0, j)); + krylov_bases, {0}, j, arnoldi_norm(2, j) / residual_norm(0, j)); for (size_type i = 0; i < krylov_dim + 1; ++i) { if (i == 0) { - residual_norm_collection->at(i, j) = residual_norm->at(0, j); + residual_norm_collection(i, j) = residual_norm(0, j); } else { - residual_norm_collection->at(i, j) = zero(); + residual_norm_collection(i, j) = zero(); } } - for (size_type i = 0; i < residual->get_size()[0]; ++i) { - krylov_bases(0, i, j) = - residual->at(i, j) / residual_norm->at(0, j); - next_krylov_basis->at(i, j) = - residual->at(i, j) / residual_norm->at(0, j); + for (size_type i = 0; i < residual.size[0]; ++i) { + krylov_bases(0, i, j) = residual(i, j) / residual_norm(0, j); + next_krylov_basis(i, j) = residual(i, j) / residual_norm(0, j); } - final_iter_nums->get_data()[j] = 0; + final_iter_nums.get_data()[j] = 0; } for (size_type k = 1; k < krylov_dim + 1; ++k) { - for (size_type j = 0; j < residual->get_size()[1]; ++j) { + for (size_type j = 0; j < residual.size[1]; ++j) { gko::cb_gmres::helper_functions_accessor::write_scalar( krylov_bases, k, j, one()); - for (size_type i = 0; i < residual->get_size()[0]; ++i) { + for (size_type i = 0; i < residual.size[0]; ++i) { krylov_bases(k, i, j) = zero(); } } @@ -369,35 +356,36 @@ GKO_INSTANTIATE_FOR_EACH_CB_GMRES_TYPE(GKO_DECLARE_CB_GMRES_RESTART_KERNEL); template void arnoldi(std::shared_ptr exec, - matrix::Dense* next_krylov_basis, - matrix::Dense* givens_sin, - matrix::Dense* givens_cos, - matrix::Dense>* residual_norm, - matrix::Dense* residual_norm_collection, - Accessor3d krylov_bases, matrix::Dense* hessenberg_iter, - matrix::Dense* buffer_iter, - matrix::Dense>* arnoldi_norm, - size_type iter, array* final_iter_nums, - const array* stop_status, array*, - array*) + matrix::view::dense next_krylov_basis, + matrix::view::dense givens_sin, + matrix::view::dense givens_cos, + matrix::view::dense> residual_norm, + matrix::view::dense residual_norm_collection, + Accessor3d krylov_bases, + matrix::view::dense hessenberg_iter, + matrix::view::dense buffer_iter, + matrix::view::dense> arnoldi_norm, + size_type iter, array& final_iter_nums, + const array& stop_status, array&, + array&) { static_assert( std::is_same::value, "ValueType must match arithmetic_type of accessor!"); - for (size_type i = 0; i < final_iter_nums->get_size(); ++i) { - final_iter_nums->get_data()[i] += + for (size_type i = 0; i < final_iter_nums.get_size(); ++i) { + final_iter_nums.get_data()[i] += (1 - static_cast( - stop_status->get_const_data()[i].has_stopped())); + stop_status.get_const_data()[i].has_stopped())); } finish_arnoldi_CGS(next_krylov_basis, krylov_bases, hessenberg_iter, buffer_iter, arnoldi_norm, iter, - stop_status->get_const_data()); + stop_status.get_const_data()); givens_rotation(givens_sin, givens_cos, hessenberg_iter, iter, - stop_status->get_const_data()); + stop_status.get_const_data()); calculate_next_residual_norm(givens_sin, givens_cos, residual_norm, residual_norm_collection, iter, - stop_status->get_const_data()); + stop_status.get_const_data()); } GKO_INSTANTIATE_FOR_EACH_CB_GMRES_TYPE(GKO_DECLARE_CB_GMRES_ARNOLDI_KERNEL); @@ -405,17 +393,17 @@ GKO_INSTANTIATE_FOR_EACH_CB_GMRES_TYPE(GKO_DECLARE_CB_GMRES_ARNOLDI_KERNEL); template void solve_krylov(std::shared_ptr exec, - const matrix::Dense* residual_norm_collection, + matrix::view::dense residual_norm_collection, ConstAccessor3d krylov_bases, - const matrix::Dense* hessenberg, - matrix::Dense* y, - matrix::Dense* before_preconditioner, - const array* final_iter_nums) + matrix::view::dense hessenberg, + matrix::view::dense y, + matrix::view::dense before_preconditioner, + const array& final_iter_nums) { solve_upper_triangular(residual_norm_collection, hessenberg, y, - final_iter_nums->get_const_data()); - calculate_qy(krylov_bases, y, before_preconditioner, - final_iter_nums->get_const_data()); + final_iter_nums.get_const_data()); + calculate_qy(krylov_bases, y.as_const(), before_preconditioner, + final_iter_nums.get_const_data()); } GKO_INSTANTIATE_FOR_EACH_CB_GMRES_CONST_TYPE( diff --git a/reference/solver/cg_kernels.cpp b/reference/solver/cg_kernels.cpp index 5af15692414..a8abfb3be01 100644 --- a/reference/solver/cg_kernels.cpp +++ b/reference/solver/cg_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -22,22 +22,22 @@ namespace cg { template -void initialize(std::shared_ptr exec, - const matrix::Dense* b, matrix::Dense* r, - matrix::Dense* z, matrix::Dense* p, - matrix::Dense* q, matrix::Dense* prev_rho, - matrix::Dense* rho, - array* stop_status) +void initialize( + std::shared_ptr exec, + matrix::view::dense b, matrix::view::dense r, + matrix::view::dense z, matrix::view::dense p, + matrix::view::dense q, matrix::view::dense prev_rho, + matrix::view::dense rho, array& stop_status) { - for (size_type j = 0; j < b->get_size()[1]; ++j) { - rho->at(j) = zero(); - prev_rho->at(j) = one(); - stop_status->get_data()[j].reset(); + for (size_type j = 0; j < b.size[1]; ++j) { + rho(0, j) = zero(); + prev_rho(0, j) = one(); + stop_status.get_data()[j].reset(); } - for (size_type i = 0; i < b->get_size()[0]; ++i) { - for (size_type j = 0; j < b->get_size()[1]; ++j) { - r->at(i, j) = b->at(i, j); - z->at(i, j) = p->at(i, j) = q->at(i, j) = zero(); + for (size_type i = 0; i < b.size[0]; ++i) { + for (size_type j = 0; j < b.size[1]; ++j) { + r(i, j) = b(i, j); + z(i, j) = p(i, j) = q(i, j) = zero(); } } } @@ -47,21 +47,22 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_CG_INITIALIZE_KERNEL); template void step_1(std::shared_ptr exec, - matrix::Dense* p, const matrix::Dense* z, - const matrix::Dense* rho, - const matrix::Dense* prev_rho, - const array* stop_status) + matrix::view::dense p, + matrix::view::dense z, + matrix::view::dense rho, + matrix::view::dense prev_rho, + const array& stop_status) { - for (size_type i = 0; i < p->get_size()[0]; ++i) { - for (size_type j = 0; j < p->get_size()[1]; ++j) { - if (stop_status->get_const_data()[j].has_stopped()) { + for (size_type i = 0; i < p.size[0]; ++i) { + for (size_type j = 0; j < p.size[1]; ++j) { + if (stop_status.get_const_data()[j].has_stopped()) { continue; } - if (is_zero(prev_rho->at(j))) { - p->at(i, j) = z->at(i, j); + if (is_zero(prev_rho(0, j))) { + p(i, j) = z(i, j); } else { - auto tmp = rho->at(j) / prev_rho->at(j); - p->at(i, j) = z->at(i, j) + tmp * p->at(i, j); + auto tmp = rho(0, j) / prev_rho(0, j); + p(i, j) = z(i, j) + tmp * p(i, j); } } } @@ -72,22 +73,22 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_CG_STEP_1_KERNEL); template void step_2(std::shared_ptr exec, - matrix::Dense* x, matrix::Dense* r, - const matrix::Dense* p, - const matrix::Dense* q, - const matrix::Dense* beta, - const matrix::Dense* rho, - const array* stop_status) + matrix::view::dense x, matrix::view::dense r, + matrix::view::dense p, + matrix::view::dense q, + matrix::view::dense beta, + matrix::view::dense rho, + const array& stop_status) { - for (size_type i = 0; i < x->get_size()[0]; ++i) { - for (size_type j = 0; j < x->get_size()[1]; ++j) { - if (stop_status->get_const_data()[j].has_stopped()) { + for (size_type i = 0; i < x.size[0]; ++i) { + for (size_type j = 0; j < x.size[1]; ++j) { + if (stop_status.get_const_data()[j].has_stopped()) { continue; } - if (is_nonzero(beta->at(j))) { - auto tmp = rho->at(j) / beta->at(j); - x->at(i, j) += tmp * p->at(i, j); - r->at(i, j) -= tmp * q->at(i, j); + if (is_nonzero(beta(0, j))) { + auto tmp = rho(0, j) / beta(0, j); + x(i, j) += tmp * p(i, j); + r(i, j) -= tmp * q(i, j); } } } diff --git a/reference/solver/cgs_kernels.cpp b/reference/solver/cgs_kernels.cpp index a5a5f8c5862..bd07bc81cb9 100644 --- a/reference/solver/cgs_kernels.cpp +++ b/reference/solver/cgs_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -21,32 +21,31 @@ namespace cgs { template -void initialize(std::shared_ptr exec, - const matrix::Dense* b, matrix::Dense* r, - matrix::Dense* r_tld, matrix::Dense* p, - matrix::Dense* q, matrix::Dense* u, - matrix::Dense* u_hat, - matrix::Dense* v_hat, matrix::Dense* t, - matrix::Dense* alpha, matrix::Dense* beta, - matrix::Dense* gamma, - matrix::Dense* rho_prev, - matrix::Dense* rho, - array* stop_status) +void initialize( + std::shared_ptr exec, + matrix::view::dense b, matrix::view::dense r, + matrix::view::dense r_tld, matrix::view::dense p, + matrix::view::dense q, matrix::view::dense u, + matrix::view::dense u_hat, matrix::view::dense v_hat, + matrix::view::dense t, matrix::view::dense alpha, + matrix::view::dense beta, matrix::view::dense gamma, + matrix::view::dense rho_prev, matrix::view::dense rho, + array& stop_status) { - for (size_type j = 0; j < b->get_size()[1]; ++j) { - rho->at(j) = zero(); - rho_prev->at(j) = one(); - alpha->at(j) = one(); - beta->at(j) = one(); - gamma->at(j) = one(); - stop_status->get_data()[j].reset(); + for (size_type j = 0; j < b.size[1]; ++j) { + rho(0, j) = zero(); + rho_prev(0, j) = one(); + alpha(0, j) = one(); + beta(0, j) = one(); + gamma(0, j) = one(); + stop_status.get_data()[j].reset(); } - for (size_type i = 0; i < b->get_size()[0]; ++i) { - for (size_type j = 0; j < b->get_size()[1]; ++j) { - r->at(i, j) = b->at(i, j); - r_tld->at(i, j) = b->at(i, j); - u->at(i, j) = u_hat->at(i, j) = p->at(i, j) = q->at(i, j) = - v_hat->at(i, j) = t->at(i, j) = zero(); + for (size_type i = 0; i < b.size[0]; ++i) { + for (size_type j = 0; j < b.size[1]; ++j) { + r(i, j) = b(i, j); + r_tld(i, j) = b(i, j); + u(i, j) = u_hat(i, j) = p(i, j) = q(i, j) = v_hat(i, j) = t(i, j) = + zero(); } } } @@ -56,29 +55,29 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_CGS_INITIALIZE_KERNEL); template void step_1(std::shared_ptr exec, - const matrix::Dense* r, matrix::Dense* u, - matrix::Dense* p, const matrix::Dense* q, - matrix::Dense* beta, const matrix::Dense* rho, - const matrix::Dense* rho_prev, - const array* stop_status) + matrix::view::dense r, + matrix::view::dense u, matrix::view::dense p, + matrix::view::dense q, + matrix::view::dense beta, + matrix::view::dense rho, + matrix::view::dense rho_prev, + const array& stop_status) { - for (size_type j = 0; j < p->get_size()[1]; ++j) { - if (stop_status->get_const_data()[j].has_stopped()) { + for (size_type j = 0; j < p.size[1]; ++j) { + if (stop_status.get_const_data()[j].has_stopped()) { continue; } - if (is_nonzero(rho_prev->at(j))) { - beta->at(j) = rho->at(j) / rho_prev->at(j); + if (is_nonzero(rho_prev(0, j))) { + beta(0, j) = rho(0, j) / rho_prev(0, j); } } - for (size_type i = 0; i < p->get_size()[0]; ++i) { - for (size_type j = 0; j < p->get_size()[1]; ++j) { - if (stop_status->get_const_data()[j].has_stopped()) { + for (size_type i = 0; i < p.size[0]; ++i) { + for (size_type j = 0; j < p.size[1]; ++j) { + if (stop_status.get_const_data()[j].has_stopped()) { continue; } - u->at(i, j) = r->at(i, j) + beta->at(j) * q->at(i, j); - p->at(i, j) = - u->at(i, j) + - beta->at(j) * (q->at(i, j) + beta->at(j) * p->at(i, j)); + u(i, j) = r(i, j) + beta(0, j) * q(i, j); + p(i, j) = u(i, j) + beta(0, j) * (q(i, j) + beta(0, j) * p(i, j)); } } } @@ -88,28 +87,29 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_CGS_STEP_1_KERNEL); template void step_2(std::shared_ptr exec, - const matrix::Dense* u, - const matrix::Dense* v_hat, matrix::Dense* q, - matrix::Dense* t, matrix::Dense* alpha, - const matrix::Dense* rho, - const matrix::Dense* gamma, - const array* stop_status) + matrix::view::dense u, + matrix::view::dense v_hat, + matrix::view::dense q, matrix::view::dense t, + matrix::view::dense alpha, + matrix::view::dense rho, + matrix::view::dense gamma, + const array& stop_status) { - for (size_type j = 0; j < u->get_size()[1]; ++j) { - if (stop_status->get_const_data()[j].has_stopped()) { + for (size_type j = 0; j < u.size[1]; ++j) { + if (stop_status.get_const_data()[j].has_stopped()) { continue; } - if (is_nonzero(gamma->at(j))) { - alpha->at(j) = rho->at(j) / gamma->at(j); + if (is_nonzero(gamma(0, j))) { + alpha(0, j) = rho(0, j) / gamma(0, j); } } - for (size_type i = 0; i < u->get_size()[0]; ++i) { - for (size_type j = 0; j < u->get_size()[1]; ++j) { - if (stop_status->get_const_data()[j].has_stopped()) { + for (size_type i = 0; i < u.size[0]; ++i) { + for (size_type j = 0; j < u.size[1]; ++j) { + if (stop_status.get_const_data()[j].has_stopped()) { continue; } - q->at(i, j) = u->at(i, j) - alpha->at(j) * v_hat->at(i, j); - t->at(i, j) = u->at(i, j) + q->at(i, j); + q(i, j) = u(i, j) - alpha(0, j) * v_hat(i, j); + t(i, j) = u(i, j) + q(i, j); } } } @@ -119,18 +119,19 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_CGS_STEP_2_KERNEL); template void step_3(std::shared_ptr exec, - const matrix::Dense* t, - const matrix::Dense* u_hat, matrix::Dense* r, - matrix::Dense* x, const matrix::Dense* alpha, - const array* stop_status) + matrix::view::dense t, + matrix::view::dense u_hat, + matrix::view::dense r, matrix::view::dense x, + matrix::view::dense alpha, + const array& stop_status) { - for (size_type i = 0; i < x->get_size()[0]; ++i) { - for (size_type j = 0; j < x->get_size()[1]; ++j) { - if (stop_status->get_const_data()[j].has_stopped()) { + for (size_type i = 0; i < x.size[0]; ++i) { + for (size_type j = 0; j < x.size[1]; ++j) { + if (stop_status.get_const_data()[j].has_stopped()) { continue; } - x->at(i, j) += alpha->at(j) * u_hat->at(i, j); - r->at(i, j) -= alpha->at(j) * t->at(i, j); + x(i, j) += alpha(0, j) * u_hat(i, j); + r(i, j) -= alpha(0, j) * t(i, j); } } } diff --git a/reference/solver/chebyshev_kernels.cpp b/reference/solver/chebyshev_kernels.cpp index b4f17309d86..275a1728444 100644 --- a/reference/solver/chebyshev_kernels.cpp +++ b/reference/solver/chebyshev_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2025 The Ginkgo authors +// SPDX-FileCopyrightText: 2025 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -16,20 +16,20 @@ namespace chebyshev { template void init_update(std::shared_ptr exec, const solver::detail::coeff_type alpha, - const matrix::Dense* inner_sol, - matrix::Dense* update_sol, - matrix::Dense* output) + matrix::view::dense inner_sol, + matrix::view::dense update_sol, + matrix::view::dense output) { // the coeff_type always be the highest precision, so we need // to cast the others from ValueType to this precision. using arithmetic_type = solver::detail::coeff_type; - for (size_t row = 0; row < output->get_size()[0]; row++) { - for (size_t col = 0; col < output->get_size()[1]; col++) { + for (size_t row = 0; row < output.size[0]; row++) { + for (size_t col = 0; col < output.size[1]; col++) { const auto inner_val = - static_cast(inner_sol->at(row, col)); - update_sol->at(row, col) = static_cast(inner_val); - output->at(row, col) = static_cast( - static_cast(output->at(row, col)) + + static_cast(inner_sol(row, col)); + update_sol(row, col) = static_cast(inner_val); + output(row, col) = static_cast( + static_cast(output(row, col)) + alpha * inner_val); } } @@ -42,23 +42,22 @@ template void update(std::shared_ptr exec, const solver::detail::coeff_type alpha, const solver::detail::coeff_type beta, - matrix::Dense* inner_sol, - matrix::Dense* update_sol, - matrix::Dense* output) + matrix::view::dense inner_sol, + matrix::view::dense update_sol, + matrix::view::dense output) { // the coeff_type always be the highest precision, so we need // to cast the others from ValueType to this precision. using arithmetic_type = solver::detail::coeff_type; - for (size_t row = 0; row < output->get_size()[0]; row++) { - for (size_t col = 0; col < output->get_size()[1]; col++) { + for (size_t row = 0; row < output.size[0]; row++) { + for (size_t col = 0; col < output.size[1]; col++) { const auto val = - static_cast(inner_sol->at(row, col)) + - beta * static_cast(update_sol->at(row, col)); - inner_sol->at(row, col) = static_cast(val); - update_sol->at(row, col) = static_cast(val); - output->at(row, col) = static_cast( - static_cast(output->at(row, col)) + - alpha * val); + static_cast(inner_sol(row, col)) + + beta * static_cast(update_sol(row, col)); + inner_sol(row, col) = static_cast(val); + update_sol(row, col) = static_cast(val); + output(row, col) = static_cast( + static_cast(output(row, col)) + alpha * val); } } } diff --git a/reference/solver/common_gmres_kernels.cpp b/reference/solver/common_gmres_kernels.cpp index 4ba091e03ae..e86e7c16fd4 100644 --- a/reference/solver/common_gmres_kernels.cpp +++ b/reference/solver/common_gmres_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -27,44 +27,44 @@ namespace { template -void calculate_sin_and_cos(matrix::Dense* givens_sin, - matrix::Dense* givens_cos, - matrix::Dense* hessenberg_iter, +void calculate_sin_and_cos(matrix::view::dense givens_sin, + matrix::view::dense givens_cos, + matrix::view::dense hessenberg_iter, size_type iter, const size_type rhs) { - if (is_zero(hessenberg_iter->at(iter, rhs))) { - givens_cos->at(iter, rhs) = zero(); - givens_sin->at(iter, rhs) = one(); + if (is_zero(hessenberg_iter(iter, rhs))) { + givens_cos(iter, rhs) = zero(); + givens_sin(iter, rhs) = one(); } else { - auto this_hess = hessenberg_iter->at(iter, rhs); - auto next_hess = hessenberg_iter->at(iter + 1, rhs); + auto this_hess = hessenberg_iter(iter, rhs); + auto next_hess = hessenberg_iter(iter + 1, rhs); const auto scale = abs(this_hess) + abs(next_hess); const auto hypotenuse = scale * sqrt(abs(this_hess / scale) * abs(this_hess / scale) + abs(next_hess / scale) * abs(next_hess / scale)); - givens_cos->at(iter, rhs) = conj(this_hess) / hypotenuse; - givens_sin->at(iter, rhs) = conj(next_hess) / hypotenuse; + givens_cos(iter, rhs) = conj(this_hess) / hypotenuse; + givens_sin(iter, rhs) = conj(next_hess) / hypotenuse; } } template -void givens_rotation(matrix::Dense* givens_sin, - matrix::Dense* givens_cos, - matrix::Dense* hessenberg_iter, size_type iter, - const stopping_status* stop_status) +void givens_rotation(matrix::view::dense givens_sin, + matrix::view::dense givens_cos, + matrix::view::dense hessenberg_iter, + size_type iter, const stopping_status* stop_status) { - for (size_type i = 0; i < hessenberg_iter->get_size()[1]; ++i) { + for (size_type i = 0; i < hessenberg_iter.size[1]; ++i) { if (stop_status[i].has_stopped()) { continue; } for (size_type j = 0; j < iter; ++j) { - auto temp = givens_cos->at(j, i) * hessenberg_iter->at(j, i) + - givens_sin->at(j, i) * hessenberg_iter->at(j + 1, i); - hessenberg_iter->at(j + 1, i) = - -conj(givens_sin->at(j, i)) * hessenberg_iter->at(j, i) + - conj(givens_cos->at(j, i)) * hessenberg_iter->at(j + 1, i); - hessenberg_iter->at(j, i) = temp; + auto temp = givens_cos(j, i) * hessenberg_iter(j, i) + + givens_sin(j, i) * hessenberg_iter(j + 1, i); + hessenberg_iter(j + 1, i) = + -conj(givens_sin(j, i)) * hessenberg_iter(j, i) + + conj(givens_cos(j, i)) * hessenberg_iter(j + 1, i); + hessenberg_iter(j, i) = temp; // temp = cos(j)*hessenberg(j) + // sin(j)*hessenberg(j+1) // hessenberg(j+1) = -conj(sin(j))*hessenberg(j) + @@ -74,10 +74,10 @@ void givens_rotation(matrix::Dense* givens_sin, calculate_sin_and_cos(givens_sin, givens_cos, hessenberg_iter, iter, i); - hessenberg_iter->at(iter, i) = - givens_cos->at(iter, i) * hessenberg_iter->at(iter, i) + - givens_sin->at(iter, i) * hessenberg_iter->at(iter + 1, i); - hessenberg_iter->at(iter + 1, i) = zero(); + hessenberg_iter(iter, i) = + givens_cos(iter, i) * hessenberg_iter(iter, i) + + givens_sin(iter, i) * hessenberg_iter(iter + 1, i); + hessenberg_iter(iter + 1, i) = zero(); // hessenberg(iter) = cos(iter)*hessenberg(iter) + // sin(iter)*hessenberg(iter + 1) // hessenberg(iter+1) = 0 @@ -87,22 +87,21 @@ void givens_rotation(matrix::Dense* givens_sin, template void calculate_next_residual_norm( - matrix::Dense* givens_sin, matrix::Dense* givens_cos, - matrix::Dense>* residual_norm, - matrix::Dense* residual_norm_collection, size_type iter, + matrix::view::dense givens_sin, + matrix::view::dense givens_cos, + matrix::view::dense> residual_norm, + matrix::view::dense residual_norm_collection, size_type iter, const stopping_status* stop_status) { - for (size_type i = 0; i < residual_norm->get_size()[1]; ++i) { + for (size_type i = 0; i < residual_norm.size[1]; ++i) { if (stop_status[i].has_stopped()) { continue; } - residual_norm_collection->at(iter + 1, i) = - -conj(givens_sin->at(iter, i)) * - residual_norm_collection->at(iter, i); - residual_norm_collection->at(iter, i) = - givens_cos->at(iter, i) * residual_norm_collection->at(iter, i); - residual_norm->at(0, i) = - abs(residual_norm_collection->at(iter + 1, i)); + residual_norm_collection(iter + 1, i) = + -conj(givens_sin(iter, i)) * residual_norm_collection(iter, i); + residual_norm_collection(iter, i) = + givens_cos(iter, i) * residual_norm_collection(iter, i); + residual_norm(0, i) = abs(residual_norm_collection(iter + 1, i)); } } @@ -112,21 +111,21 @@ void calculate_next_residual_norm( template void initialize(std::shared_ptr exec, - const matrix::Dense* b, - matrix::Dense* residual, - matrix::Dense* givens_sin, - matrix::Dense* givens_cos, + matrix::view::dense b, + matrix::view::dense residual, + matrix::view::dense givens_sin, + matrix::view::dense givens_cos, stopping_status* stop_status) { - const auto krylov_dim = givens_sin->get_size()[0]; + const auto krylov_dim = givens_sin.size[0]; using NormValueType = remove_complex; - for (size_type j = 0; j < b->get_size()[1]; ++j) { - for (size_type i = 0; i < b->get_size()[0]; ++i) { - residual->at(i, j) = b->at(i, j); + for (size_type j = 0; j < b.size[1]; ++j) { + for (size_type i = 0; i < b.size[0]; ++i) { + residual(i, j) = b(i, j); } for (size_type i = 0; i < krylov_dim; ++i) { - givens_sin->at(i, j) = zero(); - givens_cos->at(i, j) = zero(); + givens_sin(i, j) = zero(); + givens_cos(i, j) = zero(); } stop_status[j].reset(); } @@ -137,15 +136,15 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_COMMON_GMRES_INITIALIZE_KERNEL); template void hessenberg_qr(std::shared_ptr exec, - matrix::Dense* givens_sin, - matrix::Dense* givens_cos, - matrix::Dense>* residual_norm, - matrix::Dense* residual_norm_collection, - matrix::Dense* hessenberg_iter, size_type iter, - size_type* final_iter_nums, + matrix::view::dense givens_sin, + matrix::view::dense givens_cos, + matrix::view::dense> residual_norm, + matrix::view::dense residual_norm_collection, + matrix::view::dense hessenberg_iter, + size_type iter, size_type* final_iter_nums, const stopping_status* stop_status) { - for (size_type i = 0; i < givens_sin->get_size()[1]; ++i) { + for (size_type i = 0; i < givens_sin.size[1]; ++i) { if (!stop_status[i].has_stopped()) { final_iter_nums[i]++; } @@ -162,26 +161,25 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( template void solve_krylov(std::shared_ptr exec, - const matrix::Dense* residual_norm_collection, - const matrix::Dense* hessenberg, - matrix::Dense* y, const size_type* final_iter_nums, + matrix::view::dense residual_norm_collection, + matrix::view::dense hessenberg, + matrix::view::dense y, + const size_type* final_iter_nums, const stopping_status* stop_status) { - for (size_type k = 0; k < residual_norm_collection->get_size()[1]; ++k) { + for (size_type k = 0; k < residual_norm_collection.size[1]; ++k) { if (stop_status[k].is_finalized()) { continue; } for (int i = final_iter_nums[k] - 1; i >= 0; --i) { - auto temp = residual_norm_collection->at(i, k); + auto temp = residual_norm_collection(i, k); for (size_type j = i + 1; j < final_iter_nums[k]; ++j) { temp -= - hessenberg->at( - j, i * residual_norm_collection->get_size()[1] + k) * - y->at(j, k); + hessenberg(j, i * residual_norm_collection.size[1] + k) * + y(j, k); } - y->at(i, k) = - temp / hessenberg->at( - i, i * residual_norm_collection->get_size()[1] + k); + y(i, k) = + temp / hessenberg(i, i * residual_norm_collection.size[1] + k); } } } diff --git a/reference/solver/fcg_kernels.cpp b/reference/solver/fcg_kernels.cpp index 65b6bf27698..71b1adb2758 100644 --- a/reference/solver/fcg_kernels.cpp +++ b/reference/solver/fcg_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -21,24 +21,24 @@ namespace fcg { template -void initialize(std::shared_ptr exec, - const matrix::Dense* b, matrix::Dense* r, - matrix::Dense* z, matrix::Dense* p, - matrix::Dense* q, matrix::Dense* t, - matrix::Dense* prev_rho, - matrix::Dense* rho, matrix::Dense* rho_t, - array* stop_status) +void initialize( + std::shared_ptr exec, + matrix::view::dense b, matrix::view::dense r, + matrix::view::dense z, matrix::view::dense p, + matrix::view::dense q, matrix::view::dense t, + matrix::view::dense prev_rho, matrix::view::dense rho, + matrix::view::dense rho_t, array& stop_status) { - for (size_type j = 0; j < b->get_size()[1]; ++j) { - rho->at(j) = zero(); - prev_rho->at(j) = one(); - rho_t->at(j) = one(); - stop_status->get_data()[j].reset(); + for (size_type j = 0; j < b.size[1]; ++j) { + rho(0, j) = zero(); + prev_rho(0, j) = one(); + rho_t(0, j) = one(); + stop_status.get_data()[j].reset(); } - for (size_type i = 0; i < b->get_size()[0]; ++i) { - for (size_type j = 0; j < b->get_size()[1]; ++j) { - t->at(i, j) = r->at(i, j) = b->at(i, j); - z->at(i, j) = p->at(i, j) = q->at(i, j) = zero(); + for (size_type i = 0; i < b.size[0]; ++i) { + for (size_type j = 0; j < b.size[1]; ++j) { + t(i, j) = r(i, j) = b(i, j); + z(i, j) = p(i, j) = q(i, j) = zero(); } } } @@ -48,21 +48,22 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_FCG_INITIALIZE_KERNEL); template void step_1(std::shared_ptr exec, - matrix::Dense* p, const matrix::Dense* z, - const matrix::Dense* rho_t, - const matrix::Dense* prev_rho, - const array* stop_status) + matrix::view::dense p, + matrix::view::dense z, + matrix::view::dense rho_t, + matrix::view::dense prev_rho, + const array& stop_status) { - for (size_type i = 0; i < p->get_size()[0]; ++i) { - for (size_type j = 0; j < p->get_size()[1]; ++j) { - if (stop_status->get_const_data()[j].has_stopped()) { + for (size_type i = 0; i < p.size[0]; ++i) { + for (size_type j = 0; j < p.size[1]; ++j) { + if (stop_status.get_const_data()[j].has_stopped()) { continue; } - if (is_zero(prev_rho->at(j))) { - p->at(i, j) = z->at(i, j); + if (is_zero(prev_rho(0, j))) { + p(i, j) = z(i, j); } else { - auto tmp = rho_t->at(j) / prev_rho->at(j); - p->at(i, j) = z->at(i, j) + tmp * p->at(i, j); + auto tmp = rho_t(0, j) / prev_rho(0, j); + p(i, j) = z(i, j) + tmp * p(i, j); } } } @@ -73,24 +74,25 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_FCG_STEP_1_KERNEL); template void step_2(std::shared_ptr exec, - matrix::Dense* x, matrix::Dense* r, - matrix::Dense* t, const matrix::Dense* p, - const matrix::Dense* q, - const matrix::Dense* beta, - const matrix::Dense* rho, - const array* stop_status) + matrix::view::dense x, matrix::view::dense r, + matrix::view::dense t, + matrix::view::dense p, + matrix::view::dense q, + matrix::view::dense beta, + matrix::view::dense rho, + const array& stop_status) { - for (size_type i = 0; i < x->get_size()[0]; ++i) { - for (size_type j = 0; j < x->get_size()[1]; ++j) { - if (stop_status->get_const_data()[j].has_stopped()) { + for (size_type i = 0; i < x.size[0]; ++i) { + for (size_type j = 0; j < x.size[1]; ++j) { + if (stop_status.get_const_data()[j].has_stopped()) { continue; } - if (is_nonzero(beta->at(j))) { - auto tmp = rho->at(j) / beta->at(j); - auto prev_r = r->at(i, j); - x->at(i, j) += tmp * p->at(i, j); - r->at(i, j) -= tmp * q->at(i, j); - t->at(i, j) = r->at(i, j) - prev_r; + if (is_nonzero(beta(0, j))) { + auto tmp = rho(0, j) / beta(0, j); + auto prev_r = r(i, j); + x(i, j) += tmp * p(i, j); + r(i, j) -= tmp * q(i, j); + t(i, j) = r(i, j) - prev_r; } } } diff --git a/reference/solver/gcr_kernels.cpp b/reference/solver/gcr_kernels.cpp index 531814c641e..69cb154b405 100644 --- a/reference/solver/gcr_kernels.cpp +++ b/reference/solver/gcr_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -25,13 +25,13 @@ namespace gcr { template void initialize(std::shared_ptr exec, - const matrix::Dense* b, - matrix::Dense* residual, + matrix::view::dense b, + matrix::view::dense residual, stopping_status* stop_status) { - for (size_type j = 0; j < b->get_size()[1]; ++j) { - for (size_type i = 0; i < b->get_size()[0]; ++i) { - residual->at(i, j) = b->at(i, j); + for (size_type j = 0; j < b.size[1]; ++j) { + for (size_type i = 0; i < b.size[0]; ++i) { + residual(i, j) = b(i, j); } stop_status[j].reset(); } @@ -42,15 +42,16 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_GCR_INITIALIZE_KERNEL); template void restart(std::shared_ptr exec, - const matrix::Dense* residual, - const matrix::Dense* A_residual, - matrix::Dense* p_bases, - matrix::Dense* Ap_bases, size_type* final_iter_nums) + matrix::view::dense residual, + matrix::view::dense A_residual, + matrix::view::dense p_bases, + matrix::view::dense Ap_bases, + size_type* final_iter_nums) { - for (size_type j = 0; j < residual->get_size()[1]; ++j) { - for (size_type i = 0; i < residual->get_size()[0]; ++i) { - p_bases->at(i, j) = residual->at(i, j); - Ap_bases->at(i, j) = A_residual->at(i, j); + for (size_type j = 0; j < residual.size[1]; ++j) { + for (size_type i = 0; i < residual.size[0]; ++i) { + p_bases(i, j) = residual(i, j); + Ap_bases(i, j) = A_residual(i, j); } final_iter_nums[j] = 0; } @@ -61,22 +62,23 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_GCR_RESTART_KERNEL); template void step_1(std::shared_ptr exec, - matrix::Dense* x, matrix::Dense* residual, - const matrix::Dense* p, - const matrix::Dense* Ap, - const matrix::Dense>* Ap_norm, - const matrix::Dense* rAp, + matrix::view::dense x, + matrix::view::dense residual, + matrix::view::dense p, + matrix::view::dense Ap, + matrix::view::dense> Ap_norm, + matrix::view::dense rAp, const stopping_status* stop_status) { - for (size_type i = 0; i < x->get_size()[0]; ++i) { - for (size_type j = 0; j < x->get_size()[1]; ++j) { + for (size_type i = 0; i < x.size[0]; ++i) { + for (size_type j = 0; j < x.size[1]; ++j) { if (stop_status[j].has_stopped()) { continue; } - if (Ap_norm->at(j) != zero()) { - auto tmp = rAp->at(j) / Ap_norm->at(j); - x->at(i, j) += tmp * p->at(i, j); - residual->at(i, j) -= tmp * Ap->at(i, j); + if (Ap_norm(0, j) != zero()) { + auto tmp = rAp(0, j) / Ap_norm(0, j); + x(i, j) += tmp * p(i, j); + residual(i, j) -= tmp * Ap(i, j); } } } diff --git a/reference/solver/gmres_kernels.cpp b/reference/solver/gmres_kernels.cpp index a7f5a751a3b..7b062f8512f 100644 --- a/reference/solver/gmres_kernels.cpp +++ b/reference/solver/gmres_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -25,16 +25,16 @@ namespace gmres { template void restart(std::shared_ptr exec, - const matrix::Dense* residual, - const matrix::Dense>* residual_norm, - matrix::Dense* residual_norm_collection, - matrix::Dense* krylov_bases, size_type* final_iter_nums) + matrix::view::dense residual, + matrix::view::dense> residual_norm, + matrix::view::dense residual_norm_collection, + matrix::view::dense krylov_bases, + size_type* final_iter_nums) { - for (size_type j = 0; j < residual->get_size()[1]; ++j) { - residual_norm_collection->at(0, j) = residual_norm->at(0, j); - for (size_type i = 0; i < residual->get_size()[0]; ++i) { - krylov_bases->at(i, j) = - residual->at(i, j) / residual_norm->at(0, j); + for (size_type j = 0; j < residual.size[1]; ++j) { + residual_norm_collection(0, j) = residual_norm(0, j); + for (size_type i = 0; i < residual.size[0]; ++i) { + krylov_bases(i, j) = residual(i, j) / residual_norm(0, j); } final_iter_nums[j] = 0; } @@ -45,22 +45,21 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_GMRES_RESTART_KERNEL); template void multi_axpy(std::shared_ptr exec, - const matrix::Dense* krylov_bases, - const matrix::Dense* y, - matrix::Dense* before_preconditioner, + matrix::view::dense krylov_bases, + matrix::view::dense y, + matrix::view::dense before_preconditioner, const size_type* final_iter_nums, stopping_status* stop_status) { - const auto krylov_bases_rowoffset = before_preconditioner->get_size()[0]; - for (size_type k = 0; k < before_preconditioner->get_size()[1]; ++k) { + const auto krylov_bases_rowoffset = before_preconditioner.size[0]; + for (size_type k = 0; k < before_preconditioner.size[1]; ++k) { if (stop_status[k].is_finalized()) { continue; } - for (size_type i = 0; i < before_preconditioner->get_size()[0]; ++i) { - before_preconditioner->at(i, k) = zero(); + for (size_type i = 0; i < before_preconditioner.size[0]; ++i) { + before_preconditioner(i, k) = zero(); for (size_type j = 0; j < final_iter_nums[k]; ++j) { - before_preconditioner->at(i, k) += - krylov_bases->at(i + j * krylov_bases_rowoffset, k) * - y->at(j, k); + before_preconditioner(i, k) += + krylov_bases(i + j * krylov_bases_rowoffset, k) * y(j, k); } } if (stop_status[k].has_stopped()) { @@ -73,19 +72,19 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_GMRES_MULTI_AXPY_KERNEL); template void multi_dot(std::shared_ptr exec, - const matrix::Dense* krylov_bases, - const matrix::Dense* next_krylov, - matrix::Dense* hessenberg_col) + matrix::view::dense krylov_bases, + matrix::view::dense next_krylov, + matrix::view::dense hessenberg_col) { - auto num_rhs = next_krylov->get_size()[1]; - auto krylov_bases_rowoffset = next_krylov->get_size()[0]; - for (size_type i = 0; i < hessenberg_col->get_size()[0] - 1; ++i) { + auto num_rhs = next_krylov.size[1]; + auto krylov_bases_rowoffset = next_krylov.size[0]; + for (size_type i = 0; i < hessenberg_col.size[0] - 1; ++i) { for (size_type k = 0; k < num_rhs; ++k) { - hessenberg_col->at(i, k) = zero(); + hessenberg_col(i, k) = zero(); for (size_type j = 0; j < krylov_bases_rowoffset; ++j) { - hessenberg_col->at(i, k) += - conj(krylov_bases->at(i * krylov_bases_rowoffset + j, k)) * - next_krylov->at(j, k); + hessenberg_col(i, k) += + conj(krylov_bases(i * krylov_bases_rowoffset + j, k)) * + next_krylov(j, k); } } } diff --git a/reference/solver/idr_kernels.cpp b/reference/solver/idr_kernels.cpp index ef13cd08325..dc2f79488a1 100644 --- a/reference/solver/idr_kernels.cpp +++ b/reference/solver/idr_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -29,22 +29,22 @@ namespace { template void solve_lower_triangular(const size_type nrhs, - const matrix::Dense* m, - const matrix::Dense* f, - matrix::Dense* c, - const array* stop_status) + matrix::view::dense m, + matrix::view::dense f, + matrix::view::dense c, + const array& stop_status) { - for (size_type i = 0; i < f->get_size()[1]; i++) { - if (stop_status->get_const_data()[i].has_stopped()) { + for (size_type i = 0; i < f.size[1]; i++) { + if (stop_status.get_const_data()[i].has_stopped()) { continue; } - for (size_type row = 0; row < m->get_size()[0]; row++) { - auto temp = f->at(row, i); + for (size_type row = 0; row < m.size[0]; row++) { + auto temp = f(row, i); for (size_type col = 0; col < row; col++) { - temp -= m->at(row, col * nrhs + i) * c->at(col, i); + temp -= m(row, col * nrhs + i) * c(col, i); } - c->at(row, i) = temp / m->at(row, row * nrhs + i); + c(row, i) = temp / m(row, row * nrhs + i); } } } @@ -52,31 +52,32 @@ void solve_lower_triangular(const size_type nrhs, template void update_g_and_u(const size_type nrhs, const size_type k, - const matrix::Dense* p, - const matrix::Dense* m, - matrix::Dense* g, matrix::Dense* g_k, - matrix::Dense* u, - const array* stop_status) + matrix::view::dense p, + matrix::view::dense m, + matrix::view::dense g, + matrix::view::dense g_k, + matrix::view::dense u, + const array& stop_status) { for (size_type i = 0; i < nrhs; i++) { - if (stop_status->get_const_data()[i].has_stopped()) { + if (stop_status.get_const_data()[i].has_stopped()) { continue; } for (size_type j = 0; j < k; j++) { auto alpha = zero(); - for (size_type ind = 0; ind < p->get_size()[1]; ind++) { - alpha += p->at(j, ind) * g_k->at(ind, i); + for (size_type ind = 0; ind < p.size[1]; ind++) { + alpha += p(j, ind) * g_k(ind, i); } - alpha /= m->at(j, j * nrhs + i); - for (size_type row = 0; row < g->get_size()[0]; row++) { - g_k->at(row, i) -= alpha * g->at(row, j * nrhs + i); - u->at(row, k * nrhs + i) -= alpha * u->at(row, j * nrhs + i); + alpha /= m(j, j * nrhs + i); + for (size_type row = 0; row < g.size[0]; row++) { + g_k(row, i) -= alpha * g(row, j * nrhs + i); + u(row, k * nrhs + i) -= alpha * u(row, j * nrhs + i); } } - for (size_type row = 0; row < g->get_size()[0]; row++) { - g->at(row, k * nrhs + i) = g_k->at(row, i); + for (size_type row = 0; row < g.size[0]; row++) { + g(row, k * nrhs + i) = g_k(row, i); } } } @@ -105,32 +106,32 @@ get_rand_value(Distribution&& dist, Generator&& gen) template void initialize(std::shared_ptr exec, - const size_type nrhs, matrix::Dense* m, - matrix::Dense* subspace_vectors, bool deterministic, - array* stop_status) + const size_type nrhs, matrix::view::dense m, + matrix::view::dense subspace_vectors, + bool deterministic, array& stop_status) { // Initialize M for (size_type i = 0; i < nrhs; i++) { - stop_status->get_data()[i].reset(); + stop_status.get_data()[i].reset(); } - for (size_type row = 0; row < m->get_size()[0]; row++) { - for (size_type col = 0; col < m->get_size()[1]; col++) { - m->at(row, col) = + for (size_type row = 0; row < m.size[0]; row++) { + for (size_type col = 0; col < m.size[1]; col++) { + m(row, col) = (row == col / nrhs) ? one() : zero(); } } // Initialize and Orthonormalize P - const auto num_rows = subspace_vectors->get_size()[0]; - const auto num_cols = subspace_vectors->get_size()[1]; + const auto num_rows = subspace_vectors.size[0]; + const auto num_cols = subspace_vectors.size[1]; auto dist = std::normal_distribution<>(0.0, 1.0); auto seed = std::random_device{}(); auto gen = std::default_random_engine(seed); for (size_type row = 0; row < num_rows; row++) { if (!deterministic) { for (size_type col = 0; col < num_cols; col++) { - subspace_vectors->at(row, col) = + subspace_vectors(row, col) = get_rand_value(dist, gen); } } @@ -138,24 +139,22 @@ void initialize(std::shared_ptr exec, for (size_type i = 0; i < row; i++) { auto dot = zero(); for (size_type j = 0; j < num_cols; j++) { - dot += subspace_vectors->at(row, j) * - conj(subspace_vectors->at(i, j)); + dot += subspace_vectors(row, j) * conj(subspace_vectors(i, j)); } for (size_type j = 0; j < num_cols; j++) { - subspace_vectors->at(row, j) -= - dot * subspace_vectors->at(i, j); + subspace_vectors(row, j) -= dot * subspace_vectors(i, j); } } auto norm = zero(); for (size_type j = 0; j < num_cols; j++) { - norm += squared_norm(subspace_vectors->at(row, j)); + norm += squared_norm(subspace_vectors(row, j)); } norm = sqrt(norm); for (size_type j = 0; j < num_cols; j++) { - subspace_vectors->at(row, j) /= norm; + subspace_vectors(row, j) /= norm; } } } @@ -165,27 +164,27 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_IDR_INITIALIZE_KERNEL); template void step_1(std::shared_ptr exec, const size_type nrhs, - const size_type k, const matrix::Dense* m, - const matrix::Dense* f, - const matrix::Dense* residual, - const matrix::Dense* g, matrix::Dense* c, - matrix::Dense* v, - const array* stop_status) + const size_type k, matrix::view::dense m, + matrix::view::dense f, + matrix::view::dense residual, + matrix::view::dense g, + matrix::view::dense c, matrix::view::dense v, + const array& stop_status) { // Compute c = M \ f solve_lower_triangular(nrhs, m, f, c, stop_status); for (size_type i = 0; i < nrhs; i++) { - if (stop_status->get_const_data()[i].has_stopped()) { + if (stop_status.get_const_data()[i].has_stopped()) { continue; } // v = residual - c_k * g_k - ... - c_s * g_s - for (size_type row = 0; row < v->get_size()[0]; row++) { - auto temp = residual->at(row, i); - for (size_type j = k; j < m->get_size()[0]; j++) { - temp -= c->at(j, i) * g->at(row, j * nrhs + i); + for (size_type row = 0; row < v.size[0]; row++) { + auto temp = residual(row, i); + for (size_type j = k; j < m.size[0]; j++) { + temp -= c(j, i) * g(row, j * nrhs + i); } - v->at(row, i) = temp; + v(row, i) = temp; } } } @@ -195,22 +194,23 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_IDR_STEP_1_KERNEL); template void step_2(std::shared_ptr exec, const size_type nrhs, - const size_type k, const matrix::Dense* omega, - const matrix::Dense* preconditioned_vector, - const matrix::Dense* c, matrix::Dense* u, - const array* stop_status) + const size_type k, matrix::view::dense omega, + matrix::view::dense preconditioned_vector, + matrix::view::dense c, + matrix::view::dense u, + const array& stop_status) { for (size_type i = 0; i < nrhs; i++) { - if (stop_status->get_const_data()[i].has_stopped()) { + if (stop_status.get_const_data()[i].has_stopped()) { continue; } - for (size_type row = 0; row < u->get_size()[0]; row++) { - auto temp = omega->at(0, i) * preconditioned_vector->at(row, i); - for (size_type j = k; j < c->get_size()[0]; j++) { - temp += c->at(j, i) * u->at(row, j * nrhs + i); + for (size_type row = 0; row < u.size[0]; row++) { + auto temp = omega(0, i) * preconditioned_vector(row, i); + for (size_type j = k; j < c.size[0]; j++) { + temp += c(j, i) * u(row, j * nrhs + i); } - u->at(row, k * nrhs + i) = temp; + u(row, k * nrhs + i) = temp; } } } @@ -220,39 +220,41 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_IDR_STEP_2_KERNEL); template void step_3(std::shared_ptr exec, const size_type nrhs, - const size_type k, const matrix::Dense* p, - matrix::Dense* g, matrix::Dense* g_k, - matrix::Dense* u, matrix::Dense* m, - matrix::Dense* f, matrix::Dense*, - matrix::Dense* residual, matrix::Dense* x, - const array* stop_status) + const size_type k, matrix::view::dense p, + matrix::view::dense g, + matrix::view::dense g_k, + matrix::view::dense u, matrix::view::dense m, + matrix::view::dense f, matrix::view::dense, + matrix::view::dense residual, + matrix::view::dense x, + const array& stop_status) { - update_g_and_u(nrhs, k, p, m, g, g_k, u, stop_status); + update_g_and_u(nrhs, k, p, m.as_const(), g, g_k, u, stop_status); for (size_type i = 0; i < nrhs; i++) { - if (stop_status->get_const_data()[i].has_stopped()) { + if (stop_status.get_const_data()[i].has_stopped()) { continue; } - for (size_type j = k; j < m->get_size()[0]; j++) { + for (size_type j = k; j < m.size[0]; j++) { auto temp = zero(); - for (size_type ind = 0; ind < p->get_size()[1]; ind++) { - temp += p->at(j, ind) * g->at(ind, k * nrhs + i); + for (size_type ind = 0; ind < p.size[1]; ind++) { + temp += p(j, ind) * g(ind, k * nrhs + i); } - m->at(j, k * nrhs + i) = temp; + m(j, k * nrhs + i) = temp; } - auto beta = f->at(k, i) / m->at(k, k * nrhs + i); + auto beta = f(k, i) / m(k, k * nrhs + i); - for (size_type row = 0; row < g->get_size()[0]; row++) { - residual->at(row, i) -= beta * g->at(row, k * nrhs + i); - x->at(row, i) += beta * u->at(row, k * nrhs + i); + for (size_type row = 0; row < g.size[0]; row++) { + residual(row, i) -= beta * g(row, k * nrhs + i); + x(row, i) += beta * u(row, k * nrhs + i); } - if (k + 1 < f->get_size()[0]) { - f->at(k, i) = zero(); - for (size_type j = k + 1; j < f->get_size()[0]; j++) { - f->at(j, i) -= beta * m->at(j, k * nrhs + i); + if (k + 1 < f.size[0]) { + f(k, i) = zero(); + for (size_type j = k + 1; j < f.size[0]; j++) { + f(j, i) -= beta * m(j, k * nrhs + i); } } } @@ -264,24 +266,26 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_IDR_STEP_3_KERNEL); template void compute_omega( std::shared_ptr exec, const size_type nrhs, - const remove_complex kappa, const matrix::Dense* tht, - const matrix::Dense>* residual_norm, - matrix::Dense* omega, const array* stop_status) + const remove_complex kappa, + matrix::view::dense tht, + matrix::view::dense> residual_norm, + matrix::view::dense omega, + const array& stop_status) { for (size_type i = 0; i < nrhs; i++) { - if (stop_status->get_const_data()[i].has_stopped()) { + if (stop_status.get_const_data()[i].has_stopped()) { continue; } - auto thr = omega->at(0, i); - auto normt = sqrt(real(tht->at(0, i))); - omega->at(0, i) /= tht->at(0, i); - auto absrho = abs(thr / (normt * residual_norm->at(0, i))); + auto thr = omega(0, i); + auto normt = sqrt(real(tht(0, i))); + omega(0, i) /= tht(0, i); + auto absrho = abs(thr / (normt * residual_norm(0, i))); if (absrho < kappa) { - omega->at(0, i) *= kappa / absrho; + omega(0, i) *= kappa / absrho; } if (normt == zero>()) { - omega->at(0, i) = 0; + omega(0, i) = 0; } } } diff --git a/reference/solver/ir_kernels.cpp b/reference/solver/ir_kernels.cpp index 1f0f83bfe49..c2c4fd7999a 100644 --- a/reference/solver/ir_kernels.cpp +++ b/reference/solver/ir_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -17,10 +17,10 @@ namespace ir { void initialize(std::shared_ptr exec, - array* stop_status) + array& stop_status) { - for (size_type j = 0; j < stop_status->get_size(); ++j) { - stop_status->get_data()[j].reset(); + for (size_type j = 0; j < stop_status.get_size(); ++j) { + stop_status.get_data()[j].reset(); } } diff --git a/reference/solver/lower_trs_kernels.cpp b/reference/solver/lower_trs_kernels.cpp index ba02c9c838c..c6a66a23993 100644 --- a/reference/solver/lower_trs_kernels.cpp +++ b/reference/solver/lower_trs_kernels.cpp @@ -1,10 +1,11 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause #include "core/solver/lower_trs_kernels.hpp" #include +#include #include #include @@ -58,22 +59,24 @@ void solve(std::shared_ptr exec, const matrix::Csr* matrix, const solver::SolveStruct* solve_struct, bool unit_diag, const solver::trisolve_algorithm algorithm, - matrix::Dense*, matrix::Dense* trans_x, - const matrix::Dense* b, matrix::Dense* x) + std::optional> trans_b, + std::optional> trans_x, + matrix::view::dense b, + matrix::view::dense x) { auto row_ptrs = matrix->get_const_row_ptrs(); auto col_idxs = matrix->get_const_col_idxs(); auto vals = matrix->get_const_values(); - for (size_type j = 0; j < b->get_size()[1]; ++j) { + for (size_type j = 0; j < b.size[1]; ++j) { for (size_type row = 0; row < matrix->get_size()[0]; ++row) { auto diag = one(); bool found_diag = false; - x->at(row, j) = b->at(row, j); + x(row, j) = b(row, j); for (auto k = row_ptrs[row]; k < row_ptrs[row + 1]; ++k) { auto col = col_idxs[k]; if (col < row) { - x->at(row, j) -= vals[k] * x->at(col, j); + x(row, j) -= vals[k] * x(col, j); } if (col == row) { diag = vals[k]; @@ -82,7 +85,7 @@ void solve(std::shared_ptr exec, } if (!unit_diag) { GKO_ASSERT(found_diag); - x->at(row, j) /= diag; + x(row, j) /= diag; } } } diff --git a/reference/solver/minres_kernels.cpp b/reference/solver/minres_kernels.cpp index 7ffb26535f7..8619e85935d 100644 --- a/reference/solver/minres_kernels.cpp +++ b/reference/solver/minres_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -26,29 +26,29 @@ namespace minres { template void initialize( std::shared_ptr exec, - const matrix::Dense* r, matrix::Dense* z, - matrix::Dense* p, matrix::Dense* p_prev, - matrix::Dense* q, matrix::Dense* q_prev, - matrix::Dense* q_tilde, matrix::Dense* beta, - matrix::Dense* gamma, matrix::Dense* delta, - matrix::Dense* cos_prev, matrix::Dense* cos, - matrix::Dense* sin_prev, matrix::Dense* sin, - matrix::Dense* eta_next, matrix::Dense* eta, - array* stop_status) + matrix::view::dense r, matrix::view::dense z, + matrix::view::dense p, matrix::view::dense p_prev, + matrix::view::dense q, matrix::view::dense q_prev, + matrix::view::dense q_tilde, matrix::view::dense beta, + matrix::view::dense gamma, matrix::view::dense delta, + matrix::view::dense cos_prev, matrix::view::dense cos, + matrix::view::dense sin_prev, matrix::view::dense sin, + matrix::view::dense eta_next, matrix::view::dense eta, + array& stop_status) { - for (size_type j = 0; j < r->get_size()[1]; ++j) { - delta->at(j) = gamma->at(j) = cos_prev->at(j) = sin_prev->at(j) = - sin->at(j) = zero(); - cos->at(j) = one(); - eta_next->at(j) = eta->at(j) = beta->at(j) = sqrt(beta->at(j)); - stop_status->get_data()[j].reset(); + for (size_type j = 0; j < r.size[1]; ++j) { + delta(0, j) = gamma(0, j) = cos_prev(0, j) = sin_prev(0, j) = + sin(0, j) = zero(); + cos(0, j) = one(); + eta_next(0, j) = eta(0, j) = beta(0, j) = sqrt(beta(0, j)); + stop_status.get_data()[j].reset(); } - for (size_type i = 0; i < r->get_size()[0]; ++i) { - for (size_type j = 0; j < r->get_size()[1]; ++j) { - q->at(i, j) = safe_divide(r->at(i, j), beta->at(j)); - z->at(i, j) = safe_divide(z->at(i, j), beta->at(j)); - p->at(i, j) = p_prev->at(i, j) = q_prev->at(i, j) = - q_tilde->at(i, j) = zero(); + for (size_type i = 0; i < r.size[0]; ++i) { + for (size_type j = 0; j < r.size[1]; ++j) { + q(i, j) = safe_divide(r(i, j), beta(0, j)); + z(i, j) = safe_divide(z(i, j), beta(0, j)); + p(i, j) = p_prev(i, j) = q_prev(i, j) = q_tilde(i, j) = + zero(); } } } @@ -76,36 +76,35 @@ void update_givens_rotation(ValueType& alpha, const ValueType& beta, template -void step_1(std::shared_ptr exec, - matrix::Dense* alpha, matrix::Dense* beta, - matrix::Dense* gamma, matrix::Dense* delta, - matrix::Dense* cos_prev, matrix::Dense* cos, - matrix::Dense* sin_prev, matrix::Dense* sin, - matrix::Dense* eta, matrix::Dense* eta_next, - matrix::Dense* tau, - const array* stop_status) +void step_1( + std::shared_ptr exec, + matrix::view::dense alpha, matrix::view::dense beta, + matrix::view::dense gamma, matrix::view::dense delta, + matrix::view::dense cos_prev, matrix::view::dense cos, + matrix::view::dense sin_prev, matrix::view::dense sin, + matrix::view::dense eta, matrix::view::dense eta_next, + matrix::view::dense tau, + const array& stop_status) { - for (size_type j = 0; j < alpha->get_size()[1]; ++j) { - if (stop_status->get_const_data()[j].has_stopped()) { + for (size_type j = 0; j < alpha.size[1]; ++j) { + if (stop_status.get_const_data()[j].has_stopped()) { continue; } - beta->at(j) = sqrt(beta->at(j)); - delta->at(j) = sin_prev->at(j) * gamma->at(j); - auto tmp_d = gamma->at(j); - auto tmp_a = alpha->at(j); - gamma->at(j) = - cos_prev->at(j) * cos->at(j) * tmp_d + sin->at(j) * tmp_a; - alpha->at(j) = - -conj(sin->at(j)) * cos_prev->at(j) * tmp_d + cos->at(j) * tmp_a; - - std::swap(cos->at(j), cos_prev->at(j)); - std::swap(sin->at(j), sin_prev->at(j)); - update_givens_rotation(alpha->at(j), beta->at(j), cos->at(j), - sin->at(j)); - - tau->at(j) = sin->at(j) * sin->at(j) * tau->at(j); - eta->at(j) = eta_next->at(j); - eta_next->at(j) = -conj(sin->at(j)) * eta->at(j); + beta(0, j) = sqrt(beta(0, j)); + delta(0, j) = sin_prev(0, j) * gamma(0, j); + auto tmp_d = gamma(0, j); + auto tmp_a = alpha(0, j); + gamma(0, j) = cos_prev(0, j) * cos(0, j) * tmp_d + sin(0, j) * tmp_a; + alpha(0, j) = + -conj(sin(0, j)) * cos_prev(0, j) * tmp_d + cos(0, j) * tmp_a; + + std::swap(cos(0, j), cos_prev(0, j)); + std::swap(sin(0, j), sin_prev(0, j)); + update_givens_rotation(alpha(0, j), beta(0, j), cos(0, j), sin(0, j)); + + tau(0, j) = sin(0, j) * sin(0, j) * tau(0, j); + eta(0, j) = eta_next(0, j); + eta_next(0, j) = -conj(sin(0, j)) * eta(0, j); } } @@ -114,34 +113,36 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_MINRES_STEP_1_KERNEL); template void step_2(std::shared_ptr exec, - matrix::Dense* x, matrix::Dense* p, - const matrix::Dense* p_prev, matrix::Dense* z, - const matrix::Dense* z_tilde, - matrix::Dense* q, matrix::Dense* q_prev, - matrix::Dense* v, const matrix::Dense* alpha, - const matrix::Dense* beta, - const matrix::Dense* gamma, - const matrix::Dense* delta, - const matrix::Dense* cos, - const matrix::Dense* eta, - const array* stop_status) + matrix::view::dense x, matrix::view::dense p, + matrix::view::dense p_prev, + matrix::view::dense z, + matrix::view::dense z_tilde, + matrix::view::dense q, + matrix::view::dense q_prev, + matrix::view::dense v, + matrix::view::dense alpha, + matrix::view::dense beta, + matrix::view::dense gamma, + matrix::view::dense delta, + matrix::view::dense cos, + matrix::view::dense eta, + const array& stop_status) { - for (size_type i = 0; i < x->get_size()[0]; ++i) { - for (size_type j = 0; j < x->get_size()[1]; ++j) { - if (stop_status->get_const_data()[j].has_stopped()) { + for (size_type i = 0; i < x.size[0]; ++i) { + for (size_type j = 0; j < x.size[1]; ++j) { + if (stop_status.get_const_data()[j].has_stopped()) { continue; } - p->at(i, j) = - safe_divide(z->at(i, j) - gamma->at(j) * p_prev->at(i, j) - - delta->at(j) * p->at(i, j), - alpha->at(j)); - x->at(i, j) = x->at(i, j) + cos->at(j) * eta->at(j) * p->at(i, j); - - q_prev->at(i, j) = v->at(i, j); - const auto tmp = q->at(i, j); - q->at(i, j) = safe_divide(v->at(i, j), beta->at(j)); - v->at(i, j) = tmp * beta->at(j); - z->at(i, j) = safe_divide(z_tilde->at(i, j), beta->at(j)); + p(i, j) = safe_divide( + z(i, j) - gamma(0, j) * p_prev(i, j) - delta(0, j) * p(i, j), + alpha(0, j)); + x(i, j) = x(i, j) + cos(0, j) * eta(0, j) * p(i, j); + + q_prev(i, j) = v(i, j); + const auto tmp = q(i, j); + q(i, j) = safe_divide(v(i, j), beta(0, j)); + v(i, j) = tmp * beta(0, j); + z(i, j) = safe_divide(z_tilde(i, j), beta(0, j)); } } } diff --git a/reference/solver/multigrid_kernels.cpp b/reference/solver/multigrid_kernels.cpp index b08c9857d3a..5feb12a8634 100644 --- a/reference/solver/multigrid_kernels.cpp +++ b/reference/solver/multigrid_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -23,22 +23,23 @@ namespace multigrid { template void kcycle_step_1(std::shared_ptr exec, - const matrix::Dense* alpha, - const matrix::Dense* rho, - const matrix::Dense* v, - matrix::Dense* g, matrix::Dense* d, - matrix::Dense* e) + matrix::view::dense alpha, + matrix::view::dense rho, + matrix::view::dense v, + matrix::view::dense g, + matrix::view::dense d, + matrix::view::dense e) { - const auto nrows = g->get_size()[0]; - const auto nrhs = g->get_size()[1]; + const auto nrows = g.size[0]; + const auto nrhs = g.size[1]; for (size_type i = 0; i < nrhs; i++) { - auto temp = alpha->at(0, i) / rho->at(0, i); + auto temp = alpha(0, i) / rho(0, i); for (size_type j = 0; j < nrows; j++) { if (is_finite(temp)) { - g->at(j, i) -= temp * v->at(j, i); - e->at(j, i) *= temp; + g(j, i) -= temp * v(j, i); + e(j, i) *= temp; } - d->at(j, i) = e->at(j, i); + d(j, i) = e(j, i); } } } @@ -48,25 +49,23 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_MULTIGRID_KCYCLE_STEP_1_KERNEL); template void kcycle_step_2(std::shared_ptr exec, - const matrix::Dense* alpha, - const matrix::Dense* rho, - const matrix::Dense* gamma, - const matrix::Dense* beta, - const matrix::Dense* zeta, - const matrix::Dense* d, - matrix::Dense* e) + matrix::view::dense alpha, + matrix::view::dense rho, + matrix::view::dense gamma, + matrix::view::dense beta, + matrix::view::dense zeta, + matrix::view::dense d, + matrix::view::dense e) { - const auto nrows = e->get_size()[0]; - const auto nrhs = e->get_size()[1]; + const auto nrows = e.size[0]; + const auto nrhs = e.size[1]; for (size_type i = 0; i < nrhs; i++) { - auto scalar_d = zeta->at(0, i) / - (beta->at(0, i) - - gamma->at(0, i) * gamma->at(0, i) / rho->at(0, i)); - auto scalar_e = - one() - gamma->at(0, i) / alpha->at(0, i) * scalar_d; + auto scalar_d = + zeta(0, i) / (beta(0, i) - gamma(0, i) * gamma(0, i) / rho(0, i)); + auto scalar_e = one() - gamma(0, i) / alpha(0, i) * scalar_d; if (is_finite(scalar_d) && is_finite(scalar_e)) { for (size_type j = 0; j < nrows; j++) { - e->at(j, i) = scalar_e * e->at(j, i) + scalar_d * d->at(j, i); + e(j, i) = scalar_e * e(j, i) + scalar_d * d(j, i); } } } @@ -77,13 +76,13 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_MULTIGRID_KCYCLE_STEP_2_KERNEL); template void kcycle_check_stop(std::shared_ptr exec, - const matrix::Dense* old_norm, - const matrix::Dense* new_norm, + matrix::view::dense old_norm, + matrix::view::dense new_norm, const ValueType rel_tol, bool& is_stop) { is_stop = true; - for (size_type i = 0; i < old_norm->get_size()[1]; i++) { - if (new_norm->at(0, i) > rel_tol * old_norm->at(0, i)) { + for (size_type i = 0; i < old_norm.size[1]; i++) { + if (new_norm(0, i) > rel_tol * old_norm(0, i)) { is_stop = false; } } diff --git a/reference/solver/pipe_cg_kernels.cpp b/reference/solver/pipe_cg_kernels.cpp index 9cfad74df73..53be95c9c0a 100644 --- a/reference/solver/pipe_cg_kernels.cpp +++ b/reference/solver/pipe_cg_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2025 The Ginkgo authors +// SPDX-FileCopyrightText: 2025 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -23,18 +23,18 @@ namespace pipe_cg { template void initialize_1(std::shared_ptr exec, - const matrix::Dense* b, - matrix::Dense* r, - matrix::Dense* prev_rho, - array* stop_status) + matrix::view::dense b, + matrix::view::dense r, + matrix::view::dense prev_rho, + array& stop_status) { - for (size_type j = 0; j < b->get_size()[1]; ++j) { - prev_rho->at(j) = one(); - stop_status->get_data()[j].reset(); + for (size_type j = 0; j < b.size[1]; ++j) { + prev_rho(0, j) = one(); + stop_status.get_data()[j].reset(); } - for (size_type i = 0; i < b->get_size()[0]; ++i) { - for (size_type j = 0; j < b->get_size()[1]; ++j) { - r->at(i, j) = b->at(i, j); + for (size_type i = 0; i < b.size[0]; ++i) { + for (size_type j = 0; j < b.size[1]; ++j) { + r(i, j) = b(i, j); } } } @@ -44,29 +44,31 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_PIPE_CG_INITIALIZE_1_KERNEL); template void initialize_2(std::shared_ptr exec, - matrix::Dense* p, matrix::Dense* q, - matrix::Dense* f, matrix::Dense* g, - matrix::Dense* beta, - const matrix::Dense* z, - const matrix::Dense* w, - const matrix::Dense* m, - const matrix::Dense* n, - const matrix::Dense* delta) + matrix::view::dense p, + matrix::view::dense q, + matrix::view::dense f, + matrix::view::dense g, + matrix::view::dense beta, + matrix::view::dense z, + matrix::view::dense w, + matrix::view::dense m, + matrix::view::dense n, + matrix::view::dense delta) { - for (size_type j = 0; j < p->get_size()[1]; ++j) { + for (size_type j = 0; j < p.size[1]; ++j) { // beta = delta - beta->at(j) = delta->at(j); + beta(0, j) = delta(0, j); } - for (size_type i = 0; i < p->get_size()[0]; ++i) { + for (size_type i = 0; i < p.size[0]; ++i) { // p = z // q = w // f = m // g = n - for (size_type j = 0; j < p->get_size()[1]; ++j) { - p->at(i, j) = z->at(i, j); - q->at(i, j) = w->at(i, j); - f->at(i, j) = m->at(i, j); - g->at(i, j) = n->at(i, j); + for (size_type j = 0; j < p.size[1]; ++j) { + p(i, j) = z(i, j); + q(i, j) = w(i, j); + f(i, j) = m(i, j); + g(i, j) = n(i, j); } } } @@ -76,33 +78,34 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_PIPE_CG_INITIALIZE_2_KERNEL); template void step_1(std::shared_ptr exec, - matrix::Dense* x, matrix::Dense* r, - matrix::Dense* z1, matrix::Dense* z2, - matrix::Dense* w, const matrix::Dense* p, - const matrix::Dense* q, - const matrix::Dense* f, - const matrix::Dense* g, - const matrix::Dense* rho, - const matrix::Dense* beta, - const array* stop_status) + matrix::view::dense x, matrix::view::dense r, + matrix::view::dense z1, + matrix::view::dense z2, matrix::view::dense w, + matrix::view::dense p, + matrix::view::dense q, + matrix::view::dense f, + matrix::view::dense g, + matrix::view::dense rho, + matrix::view::dense beta, + const array& stop_status) { // tmp = rho / beta // x = x + tmp * p // r = r - tmp * q // z = z - tmp * f // w = w - tmp * g - for (size_type i = 0; i < p->get_size()[0]; ++i) { - for (size_type j = 0; j < p->get_size()[1]; ++j) { - if (stop_status->get_const_data()[j].has_stopped()) { + for (size_type i = 0; i < p.size[0]; ++i) { + for (size_type j = 0; j < p.size[1]; ++j) { + if (stop_status.get_const_data()[j].has_stopped()) { continue; } - if (is_nonzero(beta->at(j))) { - auto tmp = rho->at(j) / beta->at(j); - x->at(i, j) += tmp * p->at(i, j); - r->at(i, j) -= tmp * q->at(i, j); - z1->at(i, j) -= tmp * f->at(i, j); - z2->at(i, j) = z1->at(i, j); - w->at(i, j) -= tmp * g->at(i, j); + if (is_nonzero(beta(0, j))) { + auto tmp = rho(0, j) / beta(0, j); + x(i, j) += tmp * p(i, j); + r(i, j) -= tmp * q(i, j); + z1(i, j) -= tmp * f(i, j); + z2(i, j) = z1(i, j); + w(i, j) -= tmp * g(i, j); } } } @@ -113,16 +116,17 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_PIPE_CG_STEP_1_KERNEL); template void step_2(std::shared_ptr exec, - matrix::Dense* beta, matrix::Dense* p, - matrix::Dense* q, matrix::Dense* f, - matrix::Dense* g, const matrix::Dense* z, - const matrix::Dense* w, - const matrix::Dense* m, - const matrix::Dense* n, - const matrix::Dense* prev_rho, - const matrix::Dense* rho, - const matrix::Dense* delta, - const array* stop_status) + matrix::view::dense beta, + matrix::view::dense p, matrix::view::dense q, + matrix::view::dense f, matrix::view::dense g, + matrix::view::dense z, + matrix::view::dense w, + matrix::view::dense m, + matrix::view::dense n, + matrix::view::dense prev_rho, + matrix::view::dense rho, + matrix::view::dense delta, + const array& stop_status) { // tmp = rho / prev_rho // beta = delta - |tmp|^2 * beta @@ -130,31 +134,31 @@ void step_2(std::shared_ptr exec, // q = w + tmp * q // f = m + tmp * f // g = n + tmp * g - for (size_type j = 0; j < p->get_size()[1]; ++j) { - if (stop_status->get_const_data()[j].has_stopped()) { + for (size_type j = 0; j < p.size[1]; ++j) { + if (stop_status.get_const_data()[j].has_stopped()) { continue; } - if (is_nonzero(prev_rho->at(j))) { - auto tmp = rho->at(j) / prev_rho->at(j); + if (is_nonzero(prev_rho(0, j))) { + auto tmp = rho(0, j) / prev_rho(0, j); auto abs_tmp = abs(tmp); - beta->at(j) = delta->at(j) - abs_tmp * abs_tmp * beta->at(j); - if (is_zero(beta->at(j))) { - beta->at(j) = delta->at(j); + beta(0, j) = delta(0, j) - abs_tmp * abs_tmp * beta(0, j); + if (is_zero(beta(0, j))) { + beta(0, j) = delta(0, j); } - for (size_type i = 0; i < p->get_size()[0]; ++i) { - p->at(i, j) = z->at(i, j) + tmp * p->at(i, j); - q->at(i, j) = w->at(i, j) + tmp * q->at(i, j); - f->at(i, j) = m->at(i, j) + tmp * f->at(i, j); - g->at(i, j) = n->at(i, j) + tmp * g->at(i, j); + for (size_type i = 0; i < p.size[0]; ++i) { + p(i, j) = z(i, j) + tmp * p(i, j); + q(i, j) = w(i, j) + tmp * q(i, j); + f(i, j) = m(i, j) + tmp * f(i, j); + g(i, j) = n(i, j) + tmp * g(i, j); } } else { - beta->at(j) = delta->at(j); - for (size_type i = 0; i < p->get_size()[0]; ++i) { - p->at(i, j) = z->at(i, j); - q->at(i, j) = w->at(i, j); - f->at(i, j) = m->at(i, j); - g->at(i, j) = n->at(i, j); + beta(0, j) = delta(0, j); + for (size_type i = 0; i < p.size[0]; ++i) { + p(i, j) = z(i, j); + q(i, j) = w(i, j); + f(i, j) = m(i, j); + g(i, j) = n(i, j); } } } diff --git a/reference/solver/upper_trs_kernels.cpp b/reference/solver/upper_trs_kernels.cpp index f0c23a9c4cc..1030d5cb4e9 100644 --- a/reference/solver/upper_trs_kernels.cpp +++ b/reference/solver/upper_trs_kernels.cpp @@ -1,10 +1,11 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause #include "core/solver/upper_trs_kernels.hpp" #include +#include #include #include @@ -58,24 +59,26 @@ void solve(std::shared_ptr exec, const matrix::Csr* matrix, const solver::SolveStruct* solve_struct, bool unit_diag, const solver::trisolve_algorithm algorithm, - matrix::Dense* trans_b, matrix::Dense* trans_x, - const matrix::Dense* b, matrix::Dense* x) + std::optional> trans_b, + std::optional> trans_x, + matrix::view::dense b, + matrix::view::dense x) { auto row_ptrs = matrix->get_const_row_ptrs(); auto col_idxs = matrix->get_const_col_idxs(); auto vals = matrix->get_const_values(); - for (size_type j = 0; j < b->get_size()[1]; ++j) { + for (size_type j = 0; j < b.size[1]; ++j) { for (size_type inv_row = 0; inv_row < matrix->get_size()[0]; ++inv_row) { auto row = matrix->get_size()[0] - 1 - inv_row; auto diag = one(); bool found_diag = false; - x->at(row, j) = b->at(row, j); + x(row, j) = b(row, j); for (auto k = row_ptrs[row]; k < row_ptrs[row + 1]; ++k) { auto col = col_idxs[k]; if (col > row) { - x->at(row, j) -= vals[k] * x->at(col, j); + x(row, j) -= vals[k] * x(col, j); } if (col == row) { diag = vals[k]; @@ -84,7 +87,7 @@ void solve(std::shared_ptr exec, } if (!unit_diag) { GKO_ASSERT(found_diag); - x->at(row, j) /= diag; + x(row, j) /= diag; } } } diff --git a/reference/stop/criterion_kernels.cpp b/reference/stop/criterion_kernels.cpp index 4a91429d784..98a16413088 100644 --- a/reference/stop/criterion_kernels.cpp +++ b/reference/stop/criterion_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -20,10 +20,10 @@ namespace set_all_statuses { void set_all_statuses(std::shared_ptr exec, uint8 stoppingId, bool setFinalized, - array* stop_status) + array& stop_status) { - for (int i = 0; i < stop_status->get_size(); i++) { - stop_status->get_data()[i].stop(stoppingId, setFinalized); + for (int i = 0; i < stop_status.get_size(); i++) { + stop_status.get_data()[i].stop(stoppingId, setFinalized); } } diff --git a/reference/stop/residual_norm_kernels.cpp b/reference/stop/residual_norm_kernels.cpp index ba2672edc28..31c1d14895b 100644 --- a/reference/stop/residual_norm_kernels.cpp +++ b/reference/stop/residual_norm_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -25,26 +25,26 @@ namespace residual_norm { template void residual_norm(std::shared_ptr exec, - const matrix::Dense* tau, - const matrix::Dense* orig_tau, + matrix::view::dense tau, + matrix::view::dense orig_tau, ValueType rel_residual_goal, uint8 stoppingId, - bool setFinalized, array* stop_status, - array* device_storage, bool* all_converged, - bool* one_changed) + bool setFinalized, array& stop_status, + array& device_storage, bool& all_converged, + bool& one_changed) { static_assert(is_complex_s::value == false, "ValueType must not be complex in this function!"); - *all_converged = true; - *one_changed = false; - for (size_type i = 0; i < tau->get_size()[1]; ++i) { - if (tau->at(i) <= rel_residual_goal * orig_tau->at(i)) { - stop_status->get_data()[i].converge(stoppingId, setFinalized); - *one_changed = true; + all_converged = true; + one_changed = false; + for (size_type i = 0; i < tau.size[1]; ++i) { + if (tau(0, i) <= rel_residual_goal * orig_tau(0, i)) { + stop_status.get_data()[i].converge(stoppingId, setFinalized); + one_changed = true; } } - for (size_type i = 0; i < stop_status->get_size(); ++i) { - if (!stop_status->get_const_data()[i].has_stopped()) { - *all_converged = false; + for (size_type i = 0; i < stop_status.get_size(); ++i) { + if (!stop_status.get_const_data()[i].has_stopped()) { + all_converged = false; break; } } @@ -68,23 +68,23 @@ namespace implicit_residual_norm { template void implicit_residual_norm( std::shared_ptr exec, - const matrix::Dense* tau, - const matrix::Dense>* orig_tau, + matrix::view::dense tau, + matrix::view::dense> orig_tau, remove_complex rel_residual_goal, uint8 stoppingId, - bool setFinalized, array* stop_status, - array* device_storage, bool* all_converged, bool* one_changed) + bool setFinalized, array& stop_status, + array& device_storage, bool& all_converged, bool& one_changed) { - *all_converged = true; - *one_changed = false; - for (size_type i = 0; i < tau->get_size()[1]; ++i) { - if (sqrt(abs(tau->at(i))) <= rel_residual_goal * orig_tau->at(i)) { - stop_status->get_data()[i].converge(stoppingId, setFinalized); - *one_changed = true; + all_converged = true; + one_changed = false; + for (size_type i = 0; i < tau.size[1]; ++i) { + if (sqrt(abs(tau(0, i))) <= rel_residual_goal * orig_tau(0, i)) { + stop_status.get_data()[i].converge(stoppingId, setFinalized); + one_changed = true; } } - for (size_type i = 0; i < stop_status->get_size(); ++i) { - if (!stop_status->get_const_data()[i].has_stopped()) { - *all_converged = false; + for (size_type i = 0; i < stop_status.get_size(); ++i) { + if (!stop_status.get_const_data()[i].has_stopped()) { + all_converged = false; break; } } diff --git a/reference/test/distributed/vector_kernels.cpp b/reference/test/distributed/vector_kernels.cpp index 4e03cc6995e..7424ef458b1 100644 --- a/reference/test/distributed/vector_kernels.cpp +++ b/reference/test/distributed/vector_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -60,7 +60,7 @@ class Vector : public ::testing::Test { output->fill(gko::zero()); gko::kernels::reference::distributed_vector::build_local( - ref, input, partition.get(), part, output.get()); + ref, input, partition.get(), part, output->get_device_view()); GKO_ASSERT_MTX_NEAR(output, ref_outputs[part], 0); } diff --git a/reference/test/matrix/dense_kernels.cpp b/reference/test/matrix/dense_kernels.cpp index 2ce7b023a1c..93e2cff43f5 100644 --- a/reference/test/matrix/dense_kernels.cpp +++ b/reference/test/matrix/dense_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -672,8 +672,8 @@ TYPED_TEST(Dense, ComputesNorm2Squared) auto result = NormVector::create(this->exec, gko::dim<2>{1, 2}); gko::kernels::reference::dense::compute_squared_norm2( - gko::as(this->exec), mtx.get(), result.get(), - tmp); + gko::as(this->exec), + mtx->get_const_device_view(), result->get_device_view(), tmp); EXPECT_EQ(result->at(0, 0), T_nc{9.0}); EXPECT_EQ(result->at(0, 1), T_nc{25.0}); @@ -689,7 +689,7 @@ TYPED_TEST(Dense, ComputesSqrt) auto mtx(gko::initialize(I>{{9.0, 25.0}}, this->exec)); gko::kernels::reference::dense::compute_sqrt( - gko::as(this->exec), mtx.get()); + gko::as(this->exec), mtx->get_device_view()); EXPECT_EQ(mtx->at(0, 0), T_nc{3.0}); EXPECT_EQ(mtx->at(0, 1), T_nc{5.0}); diff --git a/reference/test/preconditioner/isai_kernels.cpp b/reference/test/preconditioner/isai_kernels.cpp index 4c32a1f3279..ecb48f2b972 100644 --- a/reference/test/preconditioner/isai_kernels.cpp +++ b/reference/test/preconditioner/isai_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -477,7 +477,7 @@ TYPED_TEST(Isai, KernelGenerateExcessALongrow) gko::kernels::reference::isai::generate_excess_system( this->exec, this->a_csr_longrow.get(), this->a_csr_longrow.get(), a1.get_const_data(), a2.get_const_data(), result.get(), - result_rhs.get(), 0, num_rows); + result_rhs->get_device_view(), 0, num_rows); GKO_ASSERT_MTX_EQ_SPARSITY(result, this->a_csr_longrow_e); GKO_ASSERT_MTX_NEAR(result, this->a_csr_longrow_e, 0); @@ -680,7 +680,7 @@ TYPED_TEST(Isai, KernelGenerateExcessLLongrow) gko::kernels::reference::isai::generate_excess_system( this->exec, this->l_csr_longrow.get(), this->l_csr_longrow.get(), a1.get_const_data(), a2.get_const_data(), result.get(), - result_rhs.get(), 0, num_rows); + result_rhs->get_device_view(), 0, num_rows); GKO_ASSERT_MTX_EQ_SPARSITY(result, this->l_csr_longrow_e); GKO_ASSERT_MTX_NEAR(result, this->l_csr_longrow_e, 0); @@ -877,7 +877,7 @@ TYPED_TEST(Isai, KernelGenerateExcessULongrow) gko::kernels::reference::isai::generate_excess_system( this->exec, this->u_csr_longrow.get(), this->u_csr_longrow.get(), a1.get_const_data(), a2.get_const_data(), result.get(), - result_rhs.get(), 0, num_rows); + result_rhs->get_device_view(), 0, num_rows); GKO_ASSERT_MTX_EQ_SPARSITY(result, this->u_csr_longrow_e); GKO_ASSERT_MTX_NEAR(result, this->u_csr_longrow_e, 0); @@ -985,7 +985,8 @@ TYPED_TEST(Isai, KernelGenerateExcessSpdLongrow) gko::kernels::reference::isai::generate_excess_system( this->exec, this->spd_csr_longrow.get(), this->spd_csr_longrow_inv_partial.get(), a1.get_const_data(), - a2.get_const_data(), result.get(), result_rhs.get(), 0, num_rows); + a2.get_const_data(), result.get(), result_rhs->get_device_view(), 0, + num_rows); GKO_ASSERT_MTX_EQ_SPARSITY(result, this->spd_csr_longrow_e); GKO_ASSERT_MTX_NEAR(result, this->spd_csr_longrow_e, 0); @@ -1017,7 +1018,8 @@ TYPED_TEST(Isai, KernelScatterExcessSolution) gko::array{this->exec, {11, 12, 13, 14, 15, 16, 17}}, 1); gko::kernels::reference::isai::scatter_excess_solution( - this->exec, ptrs.get_const_data(), sol.get(), mtx.get(), 0, 6); + this->exec, ptrs.get_const_data(), sol->get_const_device_view(), + mtx.get(), 0, 6); GKO_ASSERT_MTX_NEAR(mtx, expect, 0); } diff --git a/reference/test/solver/bicg_kernels.cpp b/reference/test/solver/bicg_kernels.cpp index 22b37683e0d..8045e1a8c12 100644 --- a/reference/test/solver/bicg_kernels.cpp +++ b/reference/test/solver/bicg_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -139,11 +139,13 @@ TYPED_TEST(Bicg, KernelInitialize) this->stopped); gko::kernels::reference::bicg::initialize( - this->exec, this->small_b.get(), this->small_r.get(), - this->small_z.get(), this->small_p.get(), this->small_q.get(), - this->small_prev_rho.get(), this->small_rho.get(), this->small_r2.get(), - this->small_z2.get(), this->small_p2.get(), this->small_q2.get(), - &this->small_stop); + this->exec, this->small_b->get_const_device_view(), + this->small_r->get_device_view(), this->small_z->get_device_view(), + this->small_p->get_device_view(), this->small_q->get_device_view(), + this->small_prev_rho->get_device_view(), + this->small_rho->get_device_view(), this->small_r2->get_device_view(), + this->small_z2->get_device_view(), this->small_p2->get_device_view(), + this->small_q2->get_device_view(), this->small_stop); GKO_ASSERT_MTX_NEAR(this->small_r, this->small_b, 0); GKO_ASSERT_MTX_NEAR(this->small_z, this->small_zero, 0); @@ -173,9 +175,12 @@ TYPED_TEST(Bicg, KernelStep1) this->small_stop.get_data()[1] = this->stopped; gko::kernels::reference::bicg::step_1( - this->exec, this->small_p.get(), this->small_z.get(), - this->small_p2.get(), this->small_z2.get(), this->small_rho.get(), - this->small_prev_rho.get(), &this->small_stop); + this->exec, this->small_p->get_device_view(), + this->small_z->get_const_device_view(), + this->small_p2->get_device_view(), + this->small_z2->get_const_device_view(), + this->small_rho->get_const_device_view(), + this->small_prev_rho->get_const_device_view(), this->small_stop); GKO_ASSERT_MTX_NEAR(this->small_p, l({{-1.25, 3.0}, {-1.25, 3.0}}), 0); GKO_ASSERT_MTX_NEAR(this->small_p2, l({{-1.25, 3.0}, {-1.25, 3.0}}), 0); @@ -192,9 +197,12 @@ TYPED_TEST(Bicg, KernelStep1DivByZero) this->small_prev_rho->fill(0); gko::kernels::reference::bicg::step_1( - this->exec, this->small_p.get(), this->small_z.get(), - this->small_p2.get(), this->small_z2.get(), this->small_rho.get(), - this->small_prev_rho.get(), &this->small_stop); + this->exec, this->small_p->get_device_view(), + this->small_z->get_const_device_view(), + this->small_p2->get_device_view(), + this->small_z2->get_const_device_view(), + this->small_rho->get_const_device_view(), + this->small_prev_rho->get_const_device_view(), this->small_stop); GKO_ASSERT_MTX_NEAR(this->small_p, l({{-2.0, -2.0}, {-2.0, -2.0}}), 0); GKO_ASSERT_MTX_NEAR(this->small_p2, l({{-2.0, -2.0}, {-2.0, -2.0}}), 0); @@ -216,10 +224,13 @@ TYPED_TEST(Bicg, KernelStep2) this->small_stop.get_data()[1] = this->stopped; gko::kernels::reference::bicg::step_2( - this->exec, this->small_x.get(), this->small_r.get(), - this->small_r2.get(), this->small_p.get(), this->small_q.get(), - this->small_q2.get(), this->small_beta.get(), this->small_rho.get(), - &this->small_stop); + this->exec, this->small_x->get_device_view(), + this->small_r->get_device_view(), this->small_r2->get_device_view(), + this->small_p->get_const_device_view(), + this->small_q->get_const_device_view(), + this->small_q2->get_const_device_view(), + this->small_beta->get_const_device_view(), + this->small_rho->get_const_device_view(), this->small_stop); GKO_ASSERT_MTX_NEAR(this->small_x, l({{-1.25, -2.0}, {-1.25, -2.0}}), 0); GKO_ASSERT_MTX_NEAR(this->small_r, l({{5.25, 4.0}, {5.25, 4.0}}), 0); @@ -239,10 +250,13 @@ TYPED_TEST(Bicg, KernelStep2DivByZero) this->small_beta->fill(0); gko::kernels::reference::bicg::step_2( - this->exec, this->small_x.get(), this->small_r.get(), - this->small_r2.get(), this->small_p.get(), this->small_q.get(), - this->small_q2.get(), this->small_beta.get(), this->small_rho.get(), - &this->small_stop); + this->exec, this->small_x->get_device_view(), + this->small_r->get_device_view(), this->small_r2->get_device_view(), + this->small_p->get_const_device_view(), + this->small_q->get_const_device_view(), + this->small_q2->get_const_device_view(), + this->small_beta->get_const_device_view(), + this->small_rho->get_const_device_view(), this->small_stop); GKO_ASSERT_MTX_NEAR(this->small_x, l({{-2.0, -2.0}, {-2.0, -2.0}}), 0); GKO_ASSERT_MTX_NEAR(this->small_r, l({{4.0, 4.0}, {4.0, 4.0}}), 0); diff --git a/reference/test/solver/bicgstab_kernels.cpp b/reference/test/solver/bicgstab_kernels.cpp index d483234e668..c061ff58eaa 100644 --- a/reference/test/solver/bicgstab_kernels.cpp +++ b/reference/test/solver/bicgstab_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -145,12 +145,17 @@ TYPED_TEST(Bicgstab, KernelInitialize) this->stopped); gko::kernels::reference::bicgstab::initialize( - this->exec, this->small_b.get(), this->small_r.get(), - this->small_rr.get(), this->small_y.get(), this->small_s.get(), - this->small_t.get(), this->small_z.get(), this->small_v.get(), - this->small_p.get(), this->small_prev_rho.get(), this->small_rho.get(), - this->small_alpha.get(), this->small_beta.get(), - this->small_gamma.get(), this->small_omega.get(), &this->small_stop); + this->exec, this->small_b->get_const_device_view(), + this->small_r->get_device_view(), this->small_rr->get_device_view(), + this->small_y->get_device_view(), this->small_s->get_device_view(), + this->small_t->get_device_view(), this->small_z->get_device_view(), + this->small_v->get_device_view(), this->small_p->get_device_view(), + this->small_prev_rho->get_device_view(), + this->small_rho->get_device_view(), + this->small_alpha->get_device_view(), + this->small_beta->get_device_view(), + this->small_gamma->get_device_view(), + this->small_omega->get_device_view(), this->small_stop); GKO_ASSERT_MTX_NEAR(this->small_r, this->small_b, 0); GKO_ASSERT_MTX_NEAR(this->small_rr, this->small_zero, 0); @@ -187,9 +192,13 @@ TYPED_TEST(Bicgstab, KernelStep1) this->small_stop.get_data()[1] = this->stopped; gko::kernels::reference::bicgstab::step_1( - this->exec, this->small_r.get(), this->small_p.get(), - this->small_v.get(), this->small_rho.get(), this->small_prev_rho.get(), - this->small_alpha.get(), this->small_omega.get(), &this->small_stop); + this->exec, this->small_r->get_const_device_view(), + this->small_p->get_device_view(), + this->small_v->get_const_device_view(), + this->small_rho->get_const_device_view(), + this->small_prev_rho->get_const_device_view(), + this->small_alpha->get_const_device_view(), + this->small_omega->get_const_device_view(), this->small_stop); GKO_ASSERT_MTX_NEAR(this->small_p, l({{-3.25, 3.0}, {-3.25, 3.0}}), 0); } @@ -206,9 +215,13 @@ TYPED_TEST(Bicgstab, KernelStep1DivRhoZero) this->small_omega->fill(1); gko::kernels::reference::bicgstab::step_1( - this->exec, this->small_r.get(), this->small_p.get(), - this->small_v.get(), this->small_rho.get(), this->small_prev_rho.get(), - this->small_alpha.get(), this->small_omega.get(), &this->small_stop); + this->exec, this->small_r->get_const_device_view(), + this->small_p->get_device_view(), + this->small_v->get_const_device_view(), + this->small_rho->get_const_device_view(), + this->small_prev_rho->get_const_device_view(), + this->small_alpha->get_const_device_view(), + this->small_omega->get_const_device_view(), this->small_stop); GKO_ASSERT_MTX_NEAR(this->small_p, l({{-2.0, -2.0}, {-2.0, -2.0}}), 0); } @@ -225,9 +238,13 @@ TYPED_TEST(Bicgstab, KernelStep1DivOmegaZero) this->small_omega->fill(0); gko::kernels::reference::bicgstab::step_1( - this->exec, this->small_r.get(), this->small_p.get(), - this->small_v.get(), this->small_rho.get(), this->small_prev_rho.get(), - this->small_alpha.get(), this->small_omega.get(), &this->small_stop); + this->exec, this->small_r->get_const_device_view(), + this->small_p->get_device_view(), + this->small_v->get_const_device_view(), + this->small_rho->get_const_device_view(), + this->small_prev_rho->get_const_device_view(), + this->small_alpha->get_const_device_view(), + this->small_omega->get_const_device_view(), this->small_stop); GKO_ASSERT_MTX_NEAR(this->small_p, l({{-2.0, -2.0}, {-2.0, -2.0}}), 0); } @@ -244,9 +261,13 @@ TYPED_TEST(Bicgstab, KernelStep1DivBothZero) this->small_omega->fill(0); gko::kernels::reference::bicgstab::step_1( - this->exec, this->small_r.get(), this->small_p.get(), - this->small_v.get(), this->small_rho.get(), this->small_prev_rho.get(), - this->small_alpha.get(), this->small_omega.get(), &this->small_stop); + this->exec, this->small_r->get_const_device_view(), + this->small_p->get_device_view(), + this->small_v->get_const_device_view(), + this->small_rho->get_const_device_view(), + this->small_prev_rho->get_const_device_view(), + this->small_alpha->get_const_device_view(), + this->small_omega->get_const_device_view(), this->small_stop); GKO_ASSERT_MTX_NEAR(this->small_p, l({{-2.0, -2.0}, {-2.0, -2.0}}), 0); } @@ -265,9 +286,12 @@ TYPED_TEST(Bicgstab, KernelStep2) this->small_stop.get_data()[1] = this->stopped; gko::kernels::reference::bicgstab::step_2( - this->exec, this->small_r.get(), this->small_s.get(), - this->small_v.get(), this->small_rho.get(), this->small_alpha.get(), - this->small_beta.get(), &this->small_stop); + this->exec, this->small_r->get_const_device_view(), + this->small_s->get_device_view(), + this->small_v->get_const_device_view(), + this->small_rho->get_const_device_view(), + this->small_alpha->get_device_view(), + this->small_beta->get_const_device_view(), this->small_stop); GKO_ASSERT_MTX_NEAR(this->small_s, l({{-2.25, 5.0}, {-2.25, 5.0}}), 0); GKO_ASSERT_MTX_NEAR(this->small_alpha, l({{0.25, 0.0}}), 0); @@ -284,9 +308,12 @@ TYPED_TEST(Bicgstab, KernelStep2DivZero) this->small_beta->fill(0); gko::kernels::reference::bicgstab::step_2( - this->exec, this->small_r.get(), this->small_s.get(), - this->small_v.get(), this->small_rho.get(), this->small_alpha.get(), - this->small_beta.get(), &this->small_stop); + this->exec, this->small_r->get_const_device_view(), + this->small_s->get_device_view(), + this->small_v->get_const_device_view(), + this->small_rho->get_const_device_view(), + this->small_alpha->get_device_view(), + this->small_beta->get_const_device_view(), this->small_stop); GKO_ASSERT_MTX_NEAR(this->small_s, l({{-2.0, -2.0}, {-2.0, -2.0}}), 0); GKO_ASSERT_MTX_NEAR(this->small_alpha, l({{0.0, 0.0}}), 0); @@ -311,10 +338,16 @@ TYPED_TEST(Bicgstab, KernelStep3) this->small_stop.get_data()[1] = this->stopped; gko::kernels::reference::bicgstab::step_3( - this->exec, this->small_x.get(), this->small_r.get(), - this->small_s.get(), this->small_t.get(), this->small_y.get(), - this->small_z.get(), this->small_alpha.get(), this->small_beta.get(), - this->small_gamma.get(), this->small_omega.get(), &this->small_stop); + this->exec, this->small_x->get_device_view(), + this->small_r->get_device_view(), + this->small_s->get_const_device_view(), + this->small_t->get_const_device_view(), + this->small_y->get_const_device_view(), + this->small_z->get_const_device_view(), + this->small_alpha->get_const_device_view(), + this->small_beta->get_const_device_view(), + this->small_gamma->get_const_device_view(), + this->small_omega->get_device_view(), this->small_stop); GKO_ASSERT_MTX_NEAR(this->small_x, l({{-15.0, 5.0}, {-15.0, 5.0}}), 0); GKO_ASSERT_MTX_NEAR(this->small_r, l({{-27.0, -2.0}, {-27.0, -2.0}}), 0); @@ -338,10 +371,16 @@ TYPED_TEST(Bicgstab, KernelStep3DivZero) this->small_alpha->at(1) = -2; gko::kernels::reference::bicgstab::step_3( - this->exec, this->small_x.get(), this->small_r.get(), - this->small_s.get(), this->small_t.get(), this->small_y.get(), - this->small_z.get(), this->small_alpha.get(), this->small_beta.get(), - this->small_gamma.get(), this->small_omega.get(), &this->small_stop); + this->exec, this->small_x->get_device_view(), + this->small_r->get_device_view(), + this->small_s->get_const_device_view(), + this->small_t->get_const_device_view(), + this->small_y->get_const_device_view(), + this->small_z->get_const_device_view(), + this->small_alpha->get_const_device_view(), + this->small_beta->get_const_device_view(), + this->small_gamma->get_const_device_view(), + this->small_omega->get_device_view(), this->small_stop); GKO_ASSERT_MTX_NEAR(this->small_x, l({{9.0, -3.0}, {9.0, -3.0}}), 0); GKO_ASSERT_MTX_NEAR(this->small_omega, l({{0.0, 0.0}}), 0); @@ -358,8 +397,9 @@ TYPED_TEST(Bicgstab, KernelFinalize) this->small_stop.get_data()[1] = this->finalized; gko::kernels::reference::bicgstab::finalize( - this->exec, this->small_x.get(), this->small_y.get(), - this->small_alpha.get(), &this->small_stop); + this->exec, this->small_x->get_device_view(), + this->small_y->get_const_device_view(), + this->small_alpha->get_const_device_view(), this->small_stop); GKO_ASSERT_MTX_NEAR(this->small_x, l({{9.0, 5.0}, {9.0, 5.0}}), 0); ASSERT_EQ(this->small_stop.get_data()[0], this->finalized); diff --git a/reference/test/solver/cg_kernels.cpp b/reference/test/solver/cg_kernels.cpp index c4987bb5b17..282a5298535 100644 --- a/reference/test/solver/cg_kernels.cpp +++ b/reference/test/solver/cg_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -123,9 +123,11 @@ TYPED_TEST(Cg, KernelInitialize) this->stopped); gko::kernels::reference::cg::initialize( - this->exec, this->small_b.get(), this->small_r.get(), - this->small_z.get(), this->small_p.get(), this->small_q.get(), - this->small_prev_rho.get(), this->small_rho.get(), &this->small_stop); + this->exec, this->small_b->get_const_device_view(), + this->small_r->get_device_view(), this->small_z->get_device_view(), + this->small_p->get_device_view(), this->small_q->get_device_view(), + this->small_prev_rho->get_device_view(), + this->small_rho->get_device_view(), this->small_stop); GKO_ASSERT_MTX_NEAR(this->small_r, this->small_b, 0); GKO_ASSERT_MTX_NEAR(this->small_z, this->small_zero, 0); @@ -149,8 +151,10 @@ TYPED_TEST(Cg, KernelStep1) this->small_stop.get_data()[1] = this->stopped; gko::kernels::reference::cg::step_1( - this->exec, this->small_p.get(), this->small_z.get(), - this->small_rho.get(), this->small_prev_rho.get(), &this->small_stop); + this->exec, this->small_p->get_device_view(), + this->small_z->get_const_device_view(), + this->small_rho->get_const_device_view(), + this->small_prev_rho->get_const_device_view(), this->small_stop); GKO_ASSERT_MTX_NEAR(this->small_p, l({{-1.25, 3.0}, {-1.25, 3.0}}), 0); } @@ -164,8 +168,10 @@ TYPED_TEST(Cg, KernelStep1DivByZero) this->small_prev_rho->fill(0); gko::kernels::reference::cg::step_1( - this->exec, this->small_p.get(), this->small_z.get(), - this->small_rho.get(), this->small_prev_rho.get(), &this->small_stop); + this->exec, this->small_p->get_device_view(), + this->small_z->get_const_device_view(), + this->small_rho->get_const_device_view(), + this->small_prev_rho->get_const_device_view(), this->small_stop); GKO_ASSERT_MTX_NEAR(this->small_p, l({{-2.0, -2.0}, {-2.0, -2.0}}), 0); } @@ -184,9 +190,12 @@ TYPED_TEST(Cg, KernelStep2) this->small_stop.get_data()[1] = this->stopped; gko::kernels::reference::cg::step_2( - this->exec, this->small_x.get(), this->small_r.get(), - this->small_p.get(), this->small_q.get(), this->small_beta.get(), - this->small_rho.get(), &this->small_stop); + this->exec, this->small_x->get_device_view(), + this->small_r->get_device_view(), + this->small_p->get_const_device_view(), + this->small_q->get_const_device_view(), + this->small_beta->get_const_device_view(), + this->small_rho->get_const_device_view(), this->small_stop); GKO_ASSERT_MTX_NEAR(this->small_x, l({{-1.25, -2.0}, {-1.25, -2.0}}), 0); GKO_ASSERT_MTX_NEAR(this->small_r, l({{5.25, 4.0}, {5.25, 4.0}}), 0); @@ -203,9 +212,12 @@ TYPED_TEST(Cg, KernelStep2DivByZero) this->small_beta->fill(0); gko::kernels::reference::cg::step_2( - this->exec, this->small_x.get(), this->small_r.get(), - this->small_p.get(), this->small_q.get(), this->small_beta.get(), - this->small_rho.get(), &this->small_stop); + this->exec, this->small_x->get_device_view(), + this->small_r->get_device_view(), + this->small_p->get_const_device_view(), + this->small_q->get_const_device_view(), + this->small_beta->get_const_device_view(), + this->small_rho->get_const_device_view(), this->small_stop); GKO_ASSERT_MTX_NEAR(this->small_x, l({{-2.0, -2.0}, {-2.0, -2.0}}), 0); GKO_ASSERT_MTX_NEAR(this->small_r, l({{4.0, 4.0}, {4.0, 4.0}}), 0); diff --git a/reference/test/solver/cgs_kernels.cpp b/reference/test/solver/cgs_kernels.cpp index f83040a4857..eddc1d61939 100644 --- a/reference/test/solver/cgs_kernels.cpp +++ b/reference/test/solver/cgs_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -142,12 +142,16 @@ TYPED_TEST(Cgs, KernelInitialize) this->stopped); gko::kernels::reference::cgs::initialize( - this->exec, this->small_b.get(), this->small_r.get(), - this->small_r_tld.get(), this->small_p.get(), this->small_q.get(), - this->small_u.get(), this->small_u_hat.get(), this->small_v_hat.get(), - this->small_t.get(), this->small_alpha.get(), this->small_beta.get(), - this->small_gamma.get(), this->small_prev_rho.get(), - this->small_rho.get(), &this->small_stop); + this->exec, this->small_b->get_const_device_view(), + this->small_r->get_device_view(), this->small_r_tld->get_device_view(), + this->small_p->get_device_view(), this->small_q->get_device_view(), + this->small_u->get_device_view(), this->small_u_hat->get_device_view(), + this->small_v_hat->get_device_view(), this->small_t->get_device_view(), + this->small_alpha->get_device_view(), + this->small_beta->get_device_view(), + this->small_gamma->get_device_view(), + this->small_prev_rho->get_device_view(), + this->small_rho->get_device_view(), this->small_stop); GKO_ASSERT_MTX_NEAR(this->small_r, this->small_b, 0); GKO_ASSERT_MTX_NEAR(this->small_r_tld, this->small_b, 0); @@ -181,9 +185,12 @@ TYPED_TEST(Cgs, KernelStep1) this->small_stop.get_data()[1] = this->stopped; gko::kernels::reference::cgs::step_1( - this->exec, this->small_r.get(), this->small_u.get(), - this->small_p.get(), this->small_q.get(), this->small_beta.get(), - this->small_rho.get(), this->small_prev_rho.get(), &this->small_stop); + this->exec, this->small_r->get_const_device_view(), + this->small_u->get_device_view(), this->small_p->get_device_view(), + this->small_q->get_const_device_view(), + this->small_beta->get_device_view(), + this->small_rho->get_const_device_view(), + this->small_prev_rho->get_const_device_view(), this->small_stop); GKO_ASSERT_MTX_NEAR(this->small_u, l({{-5.0, -4.0}, {-5.0, -4.0}}), 0); GKO_ASSERT_MTX_NEAR(this->small_p, l({{-19.0, -2.0}, {-19.0, -2.0}}), 0); @@ -202,9 +209,12 @@ TYPED_TEST(Cgs, KernelStep1DivZero) this->small_rho->fill(3); gko::kernels::reference::cgs::step_1( - this->exec, this->small_r.get(), this->small_u.get(), - this->small_p.get(), this->small_q.get(), this->small_beta.get(), - this->small_rho.get(), this->small_prev_rho.get(), &this->small_stop); + this->exec, this->small_r->get_const_device_view(), + this->small_u->get_device_view(), this->small_p->get_device_view(), + this->small_q->get_const_device_view(), + this->small_beta->get_device_view(), + this->small_rho->get_const_device_view(), + this->small_prev_rho->get_const_device_view(), this->small_stop); GKO_ASSERT_MTX_NEAR(this->small_u, l({{7.0, 7.0}, {7.0, 7.0}}), 0); GKO_ASSERT_MTX_NEAR(this->small_p, l({{5.0, 5.0}, {5.0, 5.0}}), 0); @@ -226,9 +236,12 @@ TYPED_TEST(Cgs, KernelStep2) this->small_stop.get_data()[1] = this->stopped; gko::kernels::reference::cgs::step_2( - this->exec, this->small_u.get(), this->small_v_hat.get(), - this->small_q.get(), this->small_t.get(), this->small_alpha.get(), - this->small_rho.get(), this->small_gamma.get(), &this->small_stop); + this->exec, this->small_u->get_const_device_view(), + this->small_v_hat->get_const_device_view(), + this->small_q->get_device_view(), this->small_t->get_device_view(), + this->small_alpha->get_device_view(), + this->small_rho->get_const_device_view(), + this->small_gamma->get_const_device_view(), this->small_stop); GKO_ASSERT_MTX_NEAR(this->small_q, l({{4.0, 1.0}, {4.0, 1.0}}), 0); GKO_ASSERT_MTX_NEAR(this->small_t, l({{2.0, -4.0}, {2.0, -4.0}}), 0); @@ -247,9 +260,12 @@ TYPED_TEST(Cgs, KernelStep2DivZero) this->small_rho->fill(-3); gko::kernels::reference::cgs::step_2( - this->exec, this->small_u.get(), this->small_v_hat.get(), - this->small_q.get(), this->small_t.get(), this->small_alpha.get(), - this->small_rho.get(), this->small_gamma.get(), &this->small_stop); + this->exec, this->small_u->get_const_device_view(), + this->small_v_hat->get_const_device_view(), + this->small_q->get_device_view(), this->small_t->get_device_view(), + this->small_alpha->get_device_view(), + this->small_rho->get_const_device_view(), + this->small_gamma->get_const_device_view(), this->small_stop); GKO_ASSERT_MTX_NEAR(this->small_q, l({{-8.0, -8.0}, {-8.0, -8.0}}), 0); GKO_ASSERT_MTX_NEAR(this->small_t, l({{-10.0, -10.0}, {-10.0, -10.0}}), 0); @@ -269,9 +285,10 @@ TYPED_TEST(Cgs, KernelStep3) this->small_stop.get_data()[1] = this->stopped; gko::kernels::reference::cgs::step_3( - this->exec, this->small_t.get(), this->small_u_hat.get(), - this->small_r.get(), this->small_x.get(), this->small_alpha.get(), - &this->small_stop); + this->exec, this->small_t->get_const_device_view(), + this->small_u_hat->get_const_device_view(), + this->small_r->get_device_view(), this->small_x->get_device_view(), + this->small_alpha->get_const_device_view(), this->small_stop); GKO_ASSERT_MTX_NEAR(this->small_r, l({{5.0, 1.0}, {5.0, 1.0}}), 0); GKO_ASSERT_MTX_NEAR(this->small_x, l({{-5.0, 3.0}, {-5.0, 3.0}}), 0); diff --git a/reference/test/solver/chebyshev_kernels.cpp b/reference/test/solver/chebyshev_kernels.cpp index 457555bb640..232ea1d461d 100644 --- a/reference/test/solver/chebyshev_kernels.cpp +++ b/reference/test/solver/chebyshev_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2025 The Ginkgo authors +// SPDX-FileCopyrightText: 2025 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -69,8 +69,8 @@ TYPED_TEST(Chebyshev, KernelInitUpdate) using Mtx = typename TestFixture::Mtx; gko::kernels::reference::chebyshev::init_update( - this->exec, this->alpha, this->inner_sol.get(), this->update_sol.get(), - this->output.get()); + this->exec, this->alpha, this->inner_sol->get_const_device_view(), + this->update_sol->get_device_view(), this->output->get_device_view()); GKO_ASSERT_MTX_NEAR(this->update_sol, this->inner_sol, 0); GKO_ASSERT_MTX_NEAR(this->output, @@ -88,8 +88,8 @@ TYPED_TEST(Chebyshev, KernelUpdate) using Mtx = typename TestFixture::Mtx; gko::kernels::reference::chebyshev::update( - this->exec, this->alpha, this->beta, this->inner_sol.get(), - this->update_sol.get(), this->output.get()); + this->exec, this->alpha, this->beta, this->inner_sol->get_device_view(), + this->update_sol->get_device_view(), this->output->get_device_view()); GKO_ASSERT_MTX_NEAR(this->update_sol, this->inner_sol, 0); GKO_ASSERT_MTX_NEAR( @@ -115,7 +115,7 @@ TYPED_TEST(Chebyshev, SolvesTriangularSystem) auto b = gko::initialize({3.9, 9.0, 2.2}, this->exec); auto x = gko::initialize({0.0, 0.0, 0.0}, this->exec); - solver->apply(b.get(), x.get()); + solver->apply(b, x); GKO_ASSERT_MTX_NEAR(x, l({1.0, 3.0, 2.0}), r::value * 1e1); } @@ -129,7 +129,7 @@ TYPED_TEST(Chebyshev, SolvesTriangularSystemMixed) auto b = gko::initialize({3.9, 9.0, 2.2}, this->exec); auto x = gko::initialize({0.0, 0.0, 0.0}, this->exec); - solver->apply(b.get(), x.get()); + solver->apply(b, x); GKO_ASSERT_MTX_NEAR(x, l({1.0, 3.0, 2.0}), (r_mixed()) * 1e1); @@ -148,7 +148,7 @@ TYPED_TEST(Chebyshev, SolvesTriangularSystemComplex) {value_type{0.0, 0.0}, value_type{0.0, 0.0}, value_type{0.0, 0.0}}, this->exec); - solver->apply(b.get(), x.get()); + solver->apply(b, x); GKO_ASSERT_MTX_NEAR(x, l({value_type{1.0, -2.0}, value_type{3.0, -6.0}, @@ -172,7 +172,7 @@ TYPED_TEST(Chebyshev, SolvesTriangularSystemMixedComplex) mixed_complex_type{0.0, 0.0}}, this->exec); - solver->apply(b.get(), x.get()); + solver->apply(b, x); GKO_ASSERT_MTX_NEAR( x, @@ -204,7 +204,7 @@ TYPED_TEST(Chebyshev, SolvesTriangularSystemWithIterativeInnerSolver) auto b = gko::initialize({3.9, 9.0, 2.2}, this->exec); auto x = gko::initialize({0.0, 0.0, 0.0}, this->exec); - solver_factory->generate(this->mtx)->apply(b.get(), x.get()); + solver_factory->generate(this->mtx)->apply(b, x); GKO_ASSERT_MTX_NEAR(x, l({1.0, 3.0, 2.0}), r::value * 1e1); } @@ -221,7 +221,7 @@ TYPED_TEST(Chebyshev, SolvesMultipleTriangularSystems) auto x = gko::initialize( {I{0.0, 0.0}, I{0.0, 0.0}, I{0.0, 0.0}}, this->exec); - solver->apply(b.get(), x.get()); + solver->apply(b, x); GKO_ASSERT_MTX_NEAR(x, l({{1.0, 1.0}, {3.0, 1.0}, {2.0, 1.0}}), r::value * 1e1); @@ -238,7 +238,7 @@ TYPED_TEST(Chebyshev, SolvesTriangularSystemUsingAdvancedApply) auto b = gko::initialize({3.9, 9.0, 2.2}, this->exec); auto x = gko::initialize({0.5, 1.0, 2.0}, this->exec); - solver->apply(alpha.get(), b.get(), beta.get(), x.get()); + solver->apply(alpha, b, beta, x); GKO_ASSERT_MTX_NEAR(x, l({1.5, 5.0, 2.0}), r::value * 1e1); } @@ -254,7 +254,7 @@ TYPED_TEST(Chebyshev, SolvesTriangularSystemUsingAdvancedApplyMixed) auto b = gko::initialize({3.9, 9.0, 2.2}, this->exec); auto x = gko::initialize({0.5, 1.0, 2.0}, this->exec); - solver->apply(alpha.get(), b.get(), beta.get(), x.get()); + solver->apply(alpha, b, beta, x); GKO_ASSERT_MTX_NEAR(x, l({1.5, 5.0, 2.0}), (r_mixed()) * 1e1); @@ -276,7 +276,7 @@ TYPED_TEST(Chebyshev, SolvesTriangularSystemUsingAdvancedApplyComplex) {value_type{0.5, -1.0}, value_type{1.0, -2.0}, value_type{2.0, -4.0}}, this->exec); - solver->apply(alpha.get(), b.get(), beta.get(), x.get()); + solver->apply(alpha, b, beta, x); GKO_ASSERT_MTX_NEAR(x, l({value_type{1.5, -3.0}, value_type{5.0, -10.0}, @@ -303,7 +303,7 @@ TYPED_TEST(Chebyshev, SolvesTriangularSystemUsingAdvancedApplyMixedComplex) mixed_complex_type{2.0, -4.0}}, this->exec); - solver->apply(alpha.get(), b.get(), beta.get(), x.get()); + solver->apply(alpha, b, beta, x); GKO_ASSERT_MTX_NEAR( x, @@ -326,7 +326,7 @@ TYPED_TEST(Chebyshev, SolvesMultipleStencilSystemsUsingAdvancedApply) auto x = gko::initialize( {I{0.5, 1.0}, I{1.0, 2.0}, I{2.0, 3.0}}, this->exec); - solver->apply(alpha.get(), b.get(), beta.get(), x.get()); + solver->apply(alpha, b, beta, x); GKO_ASSERT_MTX_NEAR(x, l({{1.5, 1.0}, {5.0, 0.0}, {2.0, -1.0}}), r::value * 1e1); @@ -341,7 +341,7 @@ TYPED_TEST(Chebyshev, SolvesTransposedTriangularSystem) auto b = gko::initialize({3.9, 9.0, 2.2}, this->exec); auto x = gko::initialize({0.0, 0.0, 0.0}, this->exec); - solver->transpose()->apply(b.get(), x.get()); + solver->transpose()->apply(b, x); GKO_ASSERT_MTX_NEAR(x, l({1.0, 3.0, 2.0}), r::value * 1e1); } @@ -356,7 +356,7 @@ TYPED_TEST(Chebyshev, SolvesConjTransposedTriangularSystem) auto b = gko::initialize({3.9, 9.0, 2.2}, this->exec); auto x = gko::initialize({0.0, 0.0, 0.0}, this->exec); - solver->conj_transpose()->apply(b.get(), x.get()); + solver->conj_transpose()->apply(b, x); GKO_ASSERT_MTX_NEAR(x, l({1.0, 3.0, 2.0}), r::value * 1e1); } diff --git a/reference/test/solver/fcg_kernels.cpp b/reference/test/solver/fcg_kernels.cpp index dca202f9fc8..2ba862f52f9 100644 --- a/reference/test/solver/fcg_kernels.cpp +++ b/reference/test/solver/fcg_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -130,10 +130,13 @@ TYPED_TEST(Fcg, KernelInitialize) this->stopped); gko::kernels::reference::fcg::initialize( - this->exec, this->small_b.get(), this->small_r.get(), - this->small_z.get(), this->small_p.get(), this->small_q.get(), - this->small_t.get(), this->small_prev_rho.get(), this->small_rho.get(), - this->small_rho_t.get(), &this->small_stop); + this->exec, this->small_b->get_const_device_view(), + this->small_r->get_device_view(), this->small_z->get_device_view(), + this->small_p->get_device_view(), this->small_q->get_device_view(), + this->small_t->get_device_view(), + this->small_prev_rho->get_device_view(), + this->small_rho->get_device_view(), + this->small_rho_t->get_device_view(), this->small_stop); GKO_ASSERT_MTX_NEAR(this->small_r, this->small_b, 0); GKO_ASSERT_MTX_NEAR(this->small_t, this->small_b, 0); @@ -159,8 +162,10 @@ TYPED_TEST(Fcg, KernelStep1) this->small_stop.get_data()[1] = this->stopped; gko::kernels::reference::fcg::step_1( - this->exec, this->small_p.get(), this->small_z.get(), - this->small_rho_t.get(), this->small_prev_rho.get(), &this->small_stop); + this->exec, this->small_p->get_device_view(), + this->small_z->get_const_device_view(), + this->small_rho_t->get_const_device_view(), + this->small_prev_rho->get_const_device_view(), this->small_stop); GKO_ASSERT_MTX_NEAR(this->small_p, l({{-1.25, 3.0}, {-1.25, 3.0}}), 0); } @@ -174,8 +179,10 @@ TYPED_TEST(Fcg, KernelStep1DivByZero) this->small_prev_rho->fill(0); gko::kernels::reference::fcg::step_1( - this->exec, this->small_p.get(), this->small_z.get(), - this->small_rho_t.get(), this->small_prev_rho.get(), &this->small_stop); + this->exec, this->small_p->get_device_view(), + this->small_z->get_const_device_view(), + this->small_rho_t->get_const_device_view(), + this->small_prev_rho->get_const_device_view(), this->small_stop); GKO_ASSERT_MTX_NEAR(this->small_p, l({{-2.0, -2.0}, {-2.0, -2.0}}), 0); } @@ -195,9 +202,12 @@ TYPED_TEST(Fcg, KernelStep2) this->small_stop.get_data()[1] = this->stopped; gko::kernels::reference::fcg::step_2( - this->exec, this->small_x.get(), this->small_r.get(), - this->small_t.get(), this->small_p.get(), this->small_q.get(), - this->small_beta.get(), this->small_rho.get(), &this->small_stop); + this->exec, this->small_x->get_device_view(), + this->small_r->get_device_view(), this->small_t->get_device_view(), + this->small_p->get_const_device_view(), + this->small_q->get_const_device_view(), + this->small_beta->get_const_device_view(), + this->small_rho->get_const_device_view(), this->small_stop); GKO_ASSERT_MTX_NEAR(this->small_x, l({{-1.25, -2.0}, {-1.25, -2.0}}), 0); GKO_ASSERT_MTX_NEAR(this->small_r, l({{5.25, 4.0}, {5.25, 4.0}}), 0); @@ -216,9 +226,12 @@ TYPED_TEST(Fcg, KernelStep2DivByZero) this->small_beta->fill(0); gko::kernels::reference::fcg::step_2( - this->exec, this->small_x.get(), this->small_r.get(), - this->small_t.get(), this->small_p.get(), this->small_q.get(), - this->small_beta.get(), this->small_rho.get(), &this->small_stop); + this->exec, this->small_x->get_device_view(), + this->small_r->get_device_view(), this->small_t->get_device_view(), + this->small_p->get_const_device_view(), + this->small_q->get_const_device_view(), + this->small_beta->get_const_device_view(), + this->small_rho->get_const_device_view(), this->small_stop); GKO_ASSERT_MTX_NEAR(this->small_x, l({{-2.0, -2.0}, {-2.0, -2.0}}), 0); GKO_ASSERT_MTX_NEAR(this->small_r, l({{4.0, 4.0}, {4.0, 4.0}}), 0); diff --git a/reference/test/solver/gcr_kernels.cpp b/reference/test/solver/gcr_kernels.cpp index 701a6590ec1..93f7ab2d1e7 100644 --- a/reference/test/solver/gcr_kernels.cpp +++ b/reference/test/solver/gcr_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -131,9 +131,9 @@ TYPED_TEST(Gcr, KernelInitialize) std::fill_n(this->small_stop.get_data(), this->small_stop.get_size(), this->stopped); - gko::kernels::reference::gcr::initialize(this->exec, this->small_b.get(), - this->small_residual.get(), - this->small_stop.get_data()); + gko::kernels::reference::gcr::initialize( + this->exec, this->small_b->get_const_device_view(), + this->small_residual->get_device_view(), this->small_stop.get_data()); GKO_ASSERT_MTX_NEAR(this->small_residual, this->small_b, 0); for (int i = 0; i < this->small_stop.get_size(); ++i) { @@ -148,8 +148,8 @@ TYPED_TEST(Gcr, KernelRestart) using Mtx = typename TestFixture::Mtx; const value_type nan = std::numeric_limits>::quiet_NaN(); - this->small_residual->copy_from(this->small_b.get()); - this->mtx->apply(this->small_residual.get(), this->small_A_residual.get()); + this->small_residual->copy_from(this->small_b); + this->mtx->apply(this->small_residual, this->small_A_residual); this->small_krylov_bases_p->fill(nan); this->small_mapped_krylov_bases_Ap->fill(nan); std::fill_n(this->small_final_iter_nums.get_data(), @@ -169,9 +169,10 @@ TYPED_TEST(Gcr, KernelRestart) } gko::kernels::reference::gcr::restart( - this->exec, this->small_residual.get(), this->small_A_residual.get(), - this->small_krylov_bases_p.get(), - this->small_mapped_krylov_bases_Ap.get(), + this->exec, this->small_residual->get_const_device_view(), + this->small_A_residual->get_const_device_view(), + this->small_krylov_bases_p->get_device_view(), + this->small_mapped_krylov_bases_Ap->get_device_view(), this->small_final_iter_nums.get_data()); ASSERT_EQ(this->small_final_iter_nums.get_size(), @@ -195,17 +196,19 @@ TYPED_TEST(Gcr, KernelStep1) this->small_residual->fill(nan); this->small_krylov_bases_p = gko::initialize( {I{0.5, -0.75}, I{1.25, 1.5}, I{-0.5, 1}}, this->exec); - this->mtx->apply(this->small_krylov_bases_p.get(), - this->small_mapped_krylov_bases_Ap.get()); - this->small_mapped_krylov_bases_Ap->compute_norm2( - this->small_Ap_norm.get()); + this->mtx->apply(this->small_krylov_bases_p, + this->small_mapped_krylov_bases_Ap); + this->small_mapped_krylov_bases_Ap->compute_norm2(this->small_Ap_norm); this->small_tmp_rAp = gko::initialize({13.0, 7.0, 1.0}, this->exec); gko::kernels::reference::gcr::step_1( - this->exec, this->small_x.get(), this->small_residual.get(), - this->small_krylov_bases_p.get(), - this->small_mapped_krylov_bases_Ap.get(), this->small_Ap_norm.get(), - this->small_tmp_rAp.get(), this->small_stop.get_data()); + this->exec, this->small_x->get_device_view(), + this->small_residual->get_device_view(), + this->small_krylov_bases_p->get_const_device_view(), + this->small_mapped_krylov_bases_Ap->get_const_device_view(), + this->small_Ap_norm->get_const_device_view(), + this->small_tmp_rAp->get_const_device_view(), + this->small_stop.get_data()); } @@ -217,7 +220,7 @@ TYPED_TEST(Gcr, SolvesStencilSystem) auto b = gko::initialize({13.0, 7.0, 1.0}, this->exec); auto x = gko::initialize({0.0, 0.0, 0.0}, this->exec); - solver->apply(b.get(), x.get()); + solver->apply(b, x); GKO_ASSERT_MTX_NEAR(x, l({1.0, 3.0, 2.0}), r::value * 1e1); } @@ -231,7 +234,7 @@ TYPED_TEST(Gcr, SolvesStencilSystemMixed) auto b = gko::initialize({13.0, 7.0, 1.0}, this->exec); auto x = gko::initialize({0.0, 0.0, 0.0}, this->exec); - solver->apply(b.get(), x.get()); + solver->apply(b, x); GKO_ASSERT_MTX_NEAR(x, l({1.0, 3.0, 2.0}), (r_mixed() * 1e1)); @@ -251,7 +254,7 @@ TYPED_TEST(Gcr, SolvesStencilSystemComplex) {value_type{0.0, 0.0}, value_type{0.0, 0.0}, value_type{0.0, 0.0}}, this->exec); - solver->apply(b.get(), x.get()); + solver->apply(b, x); GKO_ASSERT_MTX_NEAR(x, l({value_type{1.0, -2.0}, value_type{3.0, -6.0}, @@ -274,7 +277,7 @@ TYPED_TEST(Gcr, SolvesStencilSystemMixedComplex) {value_type{0.0, 0.0}, value_type{0.0, 0.0}, value_type{0.0, 0.0}}, this->exec); - solver->apply(b.get(), x.get()); + solver->apply(b, x); GKO_ASSERT_MTX_NEAR(x, l({value_type{1.0, -2.0}, value_type{3.0, -6.0}, @@ -294,7 +297,7 @@ TYPED_TEST(Gcr, SolvesMultipleStencilSystems) auto x = gko::initialize( {I{0.0, 0.0}, I{0.0, 0.0}, I{0.0, 0.0}}, this->exec); - solver->apply(b.get(), x.get()); + solver->apply(b, x); GKO_ASSERT_MTX_NEAR(x, l({{1.0, 1.0}, {3.0, 1.0}, {2.0, 1.0}}), r::value * 1e1); @@ -311,7 +314,7 @@ TYPED_TEST(Gcr, SolvesStencilSystemUsingAdvancedApply) auto b = gko::initialize({13.0, 7.0, 1.0}, this->exec); auto x = gko::initialize({0.5, 1.0, 2.0}, this->exec); - solver->apply(alpha.get(), b.get(), beta.get(), x.get()); + solver->apply(alpha, b, beta, x); GKO_ASSERT_MTX_NEAR(x, l({1.5, 5.0, 2.0}), r::value * 1e2); } @@ -327,7 +330,7 @@ TYPED_TEST(Gcr, SolvesStencilSystemUsingAdvancedApplyMixed) auto b = gko::initialize({13.0, 7.0, 1.0}, this->exec); auto x = gko::initialize({0.5, 1.0, 2.0}, this->exec); - solver->apply(alpha.get(), b.get(), beta.get(), x.get()); + solver->apply(alpha, b, beta, x); GKO_ASSERT_MTX_NEAR(x, l({1.5, 5.0, 2.0}), (r_mixed() * 2e1)); @@ -350,7 +353,7 @@ TYPED_TEST(Gcr, SolvesStencilSystemUsingAdvancedApplyComplex) {value_type{0.5, -1.0}, value_type{1.0, -2.0}, value_type{2.0, -4.0}}, this->exec); - solver->apply(alpha.get(), b.get(), beta.get(), x.get()); + solver->apply(alpha, b, beta, x); GKO_ASSERT_MTX_NEAR(x, l({value_type{1.5, -3.0}, value_type{5.0, -10.0}, @@ -376,7 +379,7 @@ TYPED_TEST(Gcr, SolvesStencilSystemUsingAdvancedApplyMixedComplex) {value_type{0.5, -1.0}, value_type{1.0, -2.0}, value_type{2.0, -4.0}}, this->exec); - solver->apply(alpha.get(), b.get(), beta.get(), x.get()); + solver->apply(alpha, b, beta, x); GKO_ASSERT_MTX_NEAR(x, l({value_type{1.5, -3.0}, value_type{5.0, -10.0}, @@ -398,7 +401,7 @@ TYPED_TEST(Gcr, SolvesMultipleStencilSystemsUsingAdvancedApply) auto x = gko::initialize( {I{0.5, 1.0}, I{1.0, 2.0}, I{2.0, 3.0}}, this->exec); - solver->apply(alpha.get(), b.get(), beta.get(), x.get()); + solver->apply(alpha, b, beta, x); GKO_ASSERT_MTX_NEAR(x, l({{1.5, 1.0}, {5.0, 0.0}, {2.0, -1.0}}), r::value * 1e2); @@ -417,7 +420,7 @@ TYPED_TEST(Gcr, SolvesBigDenseSystem1) this->exec); auto x = gko::initialize({0.0, 0.0, 0.0, 0.0, 0.0, 0.0}, this->exec); - solver->apply(b.get(), x.get()); + solver->apply(b, x); GKO_ASSERT_MTX_NEAR(x, l({52.7, 85.4, 134.2, -250.0, -16.8, 35.3}), r::value * 1e3); @@ -436,7 +439,7 @@ TYPED_TEST(Gcr, SolvesBigDenseSystem2) this->exec); auto x = gko::initialize({0.0, 0.0, 0.0, 0.0, 0.0, 0.0}, this->exec); - solver->apply(b.get(), x.get()); + solver->apply(b, x); GKO_ASSERT_MTX_NEAR(x, l({33.0, -56.0, 81.0, -30.0, 21.0, 40.0}), r::value * 1e3); @@ -455,7 +458,7 @@ TYPED_TEST(Gcr, SolveWithImplicitResNormCritIsDisabled) this->exec); auto x = gko::initialize({0.0, 0.0, 0.0, 0.0, 0.0, 0.0}, this->exec); - ASSERT_THROW(solver->apply(b.get(), x.get()), gko::NotSupported); + ASSERT_THROW(solver->apply(b, x), gko::NotSupported); } @@ -502,9 +505,9 @@ TYPED_TEST(Gcr, SolvesMultipleDenseSystemForDivergenceCheck) xc->at(i, 1) = x2->at(i); } - solver->apply(b1.get(), x1.get()); - solver->apply(b2.get(), x2.get()); - solver->apply(bc.get(), xc.get()); + solver->apply(b1, x1); + solver->apply(b2, x2); + solver->apply(bc, xc); auto mergedRes = Mtx::create(this->exec, gko::dim<2>{b1->get_size()[0], 2}); for (size_t i = 0; i < mergedRes->get_size()[0]; ++i) { mergedRes->at(i, 0) = x1->at(i); @@ -518,9 +521,9 @@ TYPED_TEST(Gcr, SolvesMultipleDenseSystemForDivergenceCheck) auto residual2 = gko::clone(this->exec, b2); auto residualC = gko::clone(this->exec, bc); - this->mtx_big->apply(alpha.get(), x1.get(), beta.get(), residual1.get()); - this->mtx_big->apply(alpha.get(), x2.get(), beta.get(), residual2.get()); - this->mtx_big->apply(alpha.get(), xc.get(), beta.get(), residualC.get()); + this->mtx_big->apply(alpha, x1, beta, residual1); + this->mtx_big->apply(alpha, x2, beta, residual2); + this->mtx_big->apply(alpha, xc, beta, residualC); auto normS1 = infNorm(residual1.get()); auto normS2 = infNorm(residual2.get()); @@ -560,7 +563,7 @@ TYPED_TEST(Gcr, SolvesBigDenseSystem1WithRestart) {-13945.16, 11205.66, 16132.96, 24342.18, -10910.98}, this->exec); auto x = gko::initialize({0.0, 0.0, 0.0, 0.0, 0.0}, this->exec); - solver->apply(b.get(), x.get()); + solver->apply(b, x); GKO_ASSERT_MTX_NEAR(x, l({-140.20, -142.20, 48.80, -17.70, -19.60}), half_tol * 1e2); @@ -589,7 +592,7 @@ TYPED_TEST(Gcr, SolvesWithPreconditioner) this->exec); auto x = gko::initialize({0.0, 0.0, 0.0, 0.0, 0.0, 0.0}, this->exec); - solver->apply(b.get(), x.get()); + solver->apply(b, x); GKO_ASSERT_MTX_NEAR(x, l({33.0, -56.0, 81.0, -30.0, 21.0, 40.0}), r::value * 1e3); @@ -608,7 +611,7 @@ TYPED_TEST(Gcr, SolvesTransposedBigDenseSystem) this->exec); auto x = gko::initialize({0.0, 0.0, 0.0, 0.0, 0.0, 0.0}, this->exec); - solver->transpose()->apply(b.get(), x.get()); + solver->transpose()->apply(b, x); GKO_ASSERT_MTX_NEAR(x, l({52.7, 85.4, 134.2, -250.0, -16.8, 35.3}), r::value * 1e3); @@ -628,7 +631,7 @@ TYPED_TEST(Gcr, SolvesConjTransposedBigDenseSystem) this->exec); auto x = gko::initialize({0.0, 0.0, 0.0, 0.0, 0.0, 0.0}, this->exec); - solver->conj_transpose()->apply(b.get(), x.get()); + solver->conj_transpose()->apply(b, x); GKO_ASSERT_MTX_NEAR(x, l({52.7, 85.4, 134.2, -250.0, -16.8, 35.3}), r::value * 1e3); diff --git a/reference/test/solver/gmres_kernels.cpp b/reference/test/solver/gmres_kernels.cpp index 93d2d79e624..c980e58d947 100644 --- a/reference/test/solver/gmres_kernels.cpp +++ b/reference/test/solver/gmres_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -154,9 +154,10 @@ TYPED_TEST(Gmres, KernelInitialize) expected_sin_cos->fill(gko::zero()); gko::kernels::reference::common_gmres::initialize( - this->exec, this->small_b.get(), this->small_residual.get(), - this->small_givens_sin.get(), this->small_givens_cos.get(), - this->small_stop.get_data()); + this->exec, this->small_b->get_const_device_view(), + this->small_residual->get_device_view(), + this->small_givens_sin->get_device_view(), + this->small_givens_cos->get_device_view(), this->small_stop.get_data()); GKO_ASSERT_MTX_NEAR(this->small_residual, this->small_b, 0); GKO_ASSERT_MTX_NEAR(this->small_givens_sin, expected_sin_cos, 0); @@ -192,9 +193,11 @@ TYPED_TEST(Gmres, KernelRestart) } gko::kernels::reference::gmres::restart( - this->exec, this->small_residual.get(), this->small_residual_norm.get(), - this->small_residual_norm_collection.get(), - this->small_krylov_bases.get(), this->small_final_iter_nums.get_data()); + this->exec, this->small_residual->get_const_device_view(), + this->small_residual_norm->get_const_device_view(), + this->small_residual_norm_collection->get_device_view(), + this->small_krylov_bases->get_device_view(), + this->small_final_iter_nums.get_data()); ASSERT_EQ(this->small_final_iter_nums.get_size(), this->small_residual_norm_collection->get_size()[1]); @@ -236,10 +239,12 @@ TYPED_TEST(Gmres, KernelHessenbergQrIter0) this->small_hessenberg->get_values()), hessenberg_iter_cols); gko::kernels::reference::common_gmres::hessenberg_qr( - this->exec, this->small_givens_sin.get(), this->small_givens_cos.get(), - this->small_residual_norm.get(), - this->small_residual_norm_collection.get(), hessenberg_reshape.get(), - iteration, this->small_final_iter_nums.get_data(), + this->exec, this->small_givens_sin->get_device_view(), + this->small_givens_cos->get_device_view(), + this->small_residual_norm->get_device_view(), + this->small_residual_norm_collection->get_device_view(), + hessenberg_reshape->get_device_view(), iteration, + this->small_final_iter_nums.get_data(), this->small_stop.get_const_data()); ASSERT_EQ(this->small_final_iter_nums.get_data()[0], 1); @@ -288,10 +293,12 @@ TYPED_TEST(Gmres, KernelHessenbergQrIter1) this->small_hessenberg->get_values()), hessenberg_iter_cols); gko::kernels::reference::common_gmres::hessenberg_qr( - this->exec, this->small_givens_sin.get(), this->small_givens_cos.get(), - this->small_residual_norm.get(), - this->small_residual_norm_collection.get(), hessenberg_reshape.get(), - iteration, this->small_final_iter_nums.get_data(), + this->exec, this->small_givens_sin->get_device_view(), + this->small_givens_cos->get_device_view(), + this->small_residual_norm->get_device_view(), + this->small_residual_norm_collection->get_device_view(), + hessenberg_reshape->get_device_view(), iteration, + this->small_final_iter_nums.get_data(), this->small_stop.get_const_data()); ASSERT_EQ(this->small_final_iter_nums.get_data()[0], 2); @@ -331,8 +338,10 @@ TYPED_TEST(Gmres, KernelSolveKrylov) gko::initialize({I{12, 3}, I{-3, 15}}, this->exec); gko::kernels::reference::common_gmres::solve_krylov( - this->exec, this->small_residual_norm_collection.get(), - this->small_hessenberg.get(), this->small_y.get(), + this->exec, + this->small_residual_norm_collection->get_const_device_view(), + this->small_hessenberg->get_const_device_view(), + this->small_y->get_device_view(), this->small_final_iter_nums.get_const_data(), this->small_stop.get_const_data()); @@ -369,8 +378,10 @@ TYPED_TEST(Gmres, KernelMultiAxpy) expected_stop.stop(7, true); gko::kernels::reference::gmres::multi_axpy( - this->exec, this->small_krylov_bases.get(), this->small_y.get(), - this->small_x.get(), this->small_final_iter_nums.get_const_data(), + this->exec, this->small_krylov_bases->get_const_device_view(), + this->small_y->get_const_device_view(), + this->small_x->get_device_view(), + this->small_final_iter_nums.get_const_data(), this->small_stop.get_data()); ASSERT_EQ(this->small_stop.get_const_data()[0], expected_stop); @@ -410,8 +421,9 @@ TYPED_TEST(Gmres, KernelMultiDot) }, this->exec); gko::kernels::reference::gmres::multi_dot( - this->exec, this->small_krylov_bases.get(), this->small_x.get(), - hessenberg_iter.get()); + this->exec, this->small_krylov_bases->get_const_device_view(), + this->small_x->get_const_device_view(), + hessenberg_iter->get_device_view()); GKO_ASSERT_MTX_NEAR(hessenberg_iter, l({{-3.8, -48.6}, {-23.6, -65.1}, {0.0, 0.0}}), diff --git a/reference/test/solver/ir_kernels.cpp b/reference/test/solver/ir_kernels.cpp index cb0ff5b5751..0f6f43083e7 100644 --- a/reference/test/solver/ir_kernels.cpp +++ b/reference/test/solver/ir_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -59,7 +59,7 @@ TYPED_TEST(Ir, KernelInitialize) non_stopped.reset(); std::fill_n(stop.get_data(), stop.get_size(), non_stopped); - gko::kernels::reference::ir::initialize(this->exec, &stop); + gko::kernels::reference::ir::initialize(this->exec, stop); ASSERT_EQ(stop.get_data()[0], non_stopped); ASSERT_EQ(stop.get_data()[1], non_stopped); diff --git a/reference/test/solver/minres_kernels.cpp b/reference/test/solver/minres_kernels.cpp index 9d6b5d3cae7..7a209ed9282 100644 --- a/reference/test/solver/minres_kernels.cpp +++ b/reference/test/solver/minres_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -148,12 +148,15 @@ TYPED_TEST(Minres, KernelInitialize) this->stopped); gko::kernels::reference::minres::initialize( - this->exec, this->small_r.get(), this->small_z.get(), - this->small_p.get(), this->small_p_prev.get(), this->small_q.get(), - this->small_q_prev.get(), this->small_v.get(), this->beta.get(), - this->gamma.get(), this->delta.get(), this->cos_prev.get(), - this->cos.get(), this->sin_prev.get(), this->sin.get(), - this->eta_next.get(), this->eta.get(), &this->small_stop); + this->exec, this->small_r->get_const_device_view(), + this->small_z->get_device_view(), this->small_p->get_device_view(), + this->small_p_prev->get_device_view(), this->small_q->get_device_view(), + this->small_q_prev->get_device_view(), this->small_v->get_device_view(), + this->beta->get_device_view(), this->gamma->get_device_view(), + this->delta->get_device_view(), this->cos_prev->get_device_view(), + this->cos->get_device_view(), this->sin_prev->get_device_view(), + this->sin->get_device_view(), this->eta_next->get_device_view(), + this->eta->get_device_view(), this->small_stop); GKO_ASSERT_MTX_NEAR(this->small_q, l({{1. / 2, 2. / 5}, {3. / 2, 4. / 5}}), r::value); @@ -189,12 +192,15 @@ TYPED_TEST(Minres, KernelInitializeWithSafeDivide) this->small_q->fill(1); gko::kernels::reference::minres::initialize( - this->exec, this->small_r.get(), this->small_z.get(), - this->small_p.get(), this->small_p_prev.get(), this->small_q.get(), - this->small_q_prev.get(), this->small_v.get(), this->beta.get(), - this->gamma.get(), this->delta.get(), this->cos_prev.get(), - this->cos.get(), this->sin_prev.get(), this->sin.get(), - this->eta_next.get(), this->eta.get(), &this->small_stop); + this->exec, this->small_r->get_const_device_view(), + this->small_z->get_device_view(), this->small_p->get_device_view(), + this->small_p_prev->get_device_view(), this->small_q->get_device_view(), + this->small_q_prev->get_device_view(), this->small_v->get_device_view(), + this->beta->get_device_view(), this->gamma->get_device_view(), + this->delta->get_device_view(), this->cos_prev->get_device_view(), + this->cos->get_device_view(), this->sin_prev->get_device_view(), + this->sin->get_device_view(), this->eta_next->get_device_view(), + this->eta->get_device_view(), this->small_stop); GKO_ASSERT_MTX_NEAR(this->small_q, l({{0.0, 0.0}, {0.0, 0.0}}), r::value); @@ -223,10 +229,13 @@ TYPED_TEST(Minres, KernelStep1) auto old_eta_next = gko::clone(this->eta_next); gko::kernels::reference::minres::step_1( - this->exec, this->alpha.get(), this->beta.get(), this->gamma.get(), - this->delta.get(), this->cos_prev.get(), this->cos.get(), - this->sin_prev.get(), this->sin.get(), this->eta.get(), - this->eta_next.get(), this->tau.get(), &this->small_stop); + this->exec, this->alpha->get_device_view(), + this->beta->get_device_view(), this->gamma->get_device_view(), + this->delta->get_device_view(), this->cos_prev->get_device_view(), + this->cos->get_device_view(), this->sin_prev->get_device_view(), + this->sin->get_device_view(), this->eta->get_device_view(), + this->eta_next->get_device_view(), this->tau->get_device_view(), + this->small_stop); GKO_ASSERT_MTX_NEAR(this->delta, l({{3 * 3., 7 * 6.}}), r::value); GKO_ASSERT_MTX_NEAR(this->gamma, @@ -279,12 +288,18 @@ TYPED_TEST(Minres, KernelStep2) old_small_z_tilde_scaled->inv_scale(this->beta.get()); gko::kernels::reference::minres::step_2( - this->exec, this->small_x.get(), this->small_p.get(), - this->small_p_prev.get(), this->small_z.get(), - this->small_z_tilde.get(), this->small_q.get(), - this->small_q_prev.get(), this->small_v.get(), this->alpha.get(), - this->beta.get(), this->gamma.get(), this->delta.get(), this->cos.get(), - this->eta.get(), &this->small_stop); + this->exec, this->small_x->get_device_view(), + this->small_p->get_device_view(), + this->small_p_prev->get_const_device_view(), + this->small_z->get_device_view(), + this->small_z_tilde->get_const_device_view(), + this->small_q->get_device_view(), this->small_q_prev->get_device_view(), + this->small_v->get_device_view(), this->alpha->get_const_device_view(), + this->beta->get_const_device_view(), + this->gamma->get_const_device_view(), + this->delta->get_const_device_view(), + this->cos->get_const_device_view(), this->eta->get_const_device_view(), + this->small_stop); GKO_ASSERT_MTX_NEAR(this->small_q_prev, old_small_v, 0.); GKO_ASSERT_MTX_NEAR(this->small_v, old_small_v_scaled, 0.); @@ -314,12 +329,18 @@ TYPED_TEST(Minres, KernelStep2WithSafeDivide) auto old_small_v_scaled = gko::clone(this->small_q); gko::kernels::reference::minres::step_2( - this->exec, this->small_x.get(), this->small_p.get(), - this->small_p_prev.get(), this->small_z.get(), - this->small_z_tilde.get(), this->small_q.get(), - this->small_q_prev.get(), this->small_v.get(), this->alpha.get(), - this->beta.get(), this->gamma.get(), this->delta.get(), this->cos.get(), - this->eta.get(), &this->small_stop); + this->exec, this->small_x->get_device_view(), + this->small_p->get_device_view(), + this->small_p_prev->get_const_device_view(), + this->small_z->get_device_view(), + this->small_z_tilde->get_const_device_view(), + this->small_q->get_device_view(), this->small_q_prev->get_device_view(), + this->small_v->get_device_view(), this->alpha->get_const_device_view(), + this->beta->get_const_device_view(), + this->gamma->get_const_device_view(), + this->delta->get_const_device_view(), + this->cos->get_const_device_view(), this->eta->get_const_device_view(), + this->small_stop); GKO_ASSERT_MTX_NEAR(this->small_q, l({{0.0, 0.0}, {0.0, 0.0}}), 0.); GKO_ASSERT_MTX_NEAR(this->small_z, l({{0.0, 0.0}, {0.0, 0.0}}), 0.); diff --git a/reference/test/solver/pipe_cg_kernels.cpp b/reference/test/solver/pipe_cg_kernels.cpp index e422d934324..b4d141a3600 100644 --- a/reference/test/solver/pipe_cg_kernels.cpp +++ b/reference/test/solver/pipe_cg_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2025 The Ginkgo authors +// SPDX-FileCopyrightText: 2025 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -132,8 +132,9 @@ TYPED_TEST(PipeCg, KernelInitialize1) this->stopped); gko::kernels::reference::pipe_cg::initialize_1( - this->exec, this->small_b.get(), this->small_r.get(), - this->small_prev_rho.get(), &this->small_stop); + this->exec, this->small_b->get_const_device_view(), + this->small_r->get_device_view(), + this->small_prev_rho->get_device_view(), this->small_stop); GKO_ASSERT_MTX_NEAR(this->small_r, this->small_b, 0); GKO_ASSERT_MTX_NEAR(this->small_prev_rho, l({{1.0, 1.0}}), 0); @@ -151,10 +152,14 @@ TYPED_TEST(PipeCg, KernelInitialize2) this->small_delta->fill(32); gko::kernels::reference::pipe_cg::initialize_2( - this->exec, this->small_p.get(), this->small_q.get(), - this->small_f.get(), this->small_g.get(), this->small_beta.get(), - this->small_z1.get(), this->small_w.get(), this->small_m.get(), - this->small_n.get(), this->small_delta.get()); + this->exec, this->small_p->get_device_view(), + this->small_q->get_device_view(), this->small_f->get_device_view(), + this->small_g->get_device_view(), this->small_beta->get_device_view(), + this->small_z1->get_const_device_view(), + this->small_w->get_const_device_view(), + this->small_m->get_const_device_view(), + this->small_n->get_const_device_view(), + this->small_delta->get_const_device_view()); GKO_ASSERT_MTX_NEAR(this->small_p, this->small_z1, 0); GKO_ASSERT_MTX_NEAR(this->small_q, this->small_w, 0); @@ -183,11 +188,15 @@ TYPED_TEST(PipeCg, KernelStep1) this->small_stop.get_data()[1] = this->stopped; gko::kernels::reference::pipe_cg::step_1( - this->exec, this->small_x.get(), this->small_r.get(), - this->small_z1.get(), this->small_z2.get(), this->small_w.get(), - this->small_p.get(), this->small_q.get(), this->small_f.get(), - this->small_g.get(), this->small_rho.get(), this->small_beta.get(), - &this->small_stop); + this->exec, this->small_x->get_device_view(), + this->small_r->get_device_view(), this->small_z1->get_device_view(), + this->small_z2->get_device_view(), this->small_w->get_device_view(), + this->small_p->get_const_device_view(), + this->small_q->get_const_device_view(), + this->small_f->get_const_device_view(), + this->small_g->get_const_device_view(), + this->small_rho->get_const_device_view(), + this->small_beta->get_const_device_view(), this->small_stop); GKO_ASSERT_MTX_NEAR(this->small_x, l({{2.0, 1.0}, {2.0, 1.0}}), 0); GKO_ASSERT_MTX_NEAR(this->small_r, l({{1.25, 2.0}, {1.25, 2.0}}), 0); @@ -211,11 +220,15 @@ TYPED_TEST(PipeCg, KernelStep1DivByZero) this->small_beta->fill(0); gko::kernels::reference::pipe_cg::step_1( - this->exec, this->small_x.get(), this->small_r.get(), - this->small_z1.get(), this->small_z2.get(), this->small_w.get(), - this->small_p.get(), this->small_q.get(), this->small_f.get(), - this->small_g.get(), this->small_rho.get(), this->small_beta.get(), - &this->small_stop); + this->exec, this->small_x->get_device_view(), + this->small_r->get_device_view(), this->small_z1->get_device_view(), + this->small_z2->get_device_view(), this->small_w->get_device_view(), + this->small_p->get_const_device_view(), + this->small_q->get_const_device_view(), + this->small_f->get_const_device_view(), + this->small_g->get_const_device_view(), + this->small_rho->get_const_device_view(), + this->small_beta->get_const_device_view(), this->small_stop); GKO_ASSERT_MTX_NEAR(this->small_x, l({{1.0, 1.0}, {1.0, 1.0}}), 0); GKO_ASSERT_MTX_NEAR(this->small_r, l({{2.0, 2.0}, {2.0, 2.0}}), 0); @@ -246,11 +259,16 @@ TYPED_TEST(PipeCg, KernelStep2) this->small_stop.get_data()[1] = this->stopped; gko::kernels::reference::pipe_cg::step_2( - this->exec, this->small_beta.get(), this->small_p.get(), - this->small_q.get(), this->small_f.get(), this->small_g.get(), - this->small_z1.get(), this->small_w.get(), this->small_m.get(), - this->small_n.get(), this->small_prev_rho.get(), this->small_rho.get(), - this->small_delta.get(), &this->small_stop); + this->exec, this->small_beta->get_device_view(), + this->small_p->get_device_view(), this->small_q->get_device_view(), + this->small_f->get_device_view(), this->small_g->get_device_view(), + this->small_z1->get_const_device_view(), + this->small_w->get_const_device_view(), + this->small_m->get_const_device_view(), + this->small_n->get_const_device_view(), + this->small_prev_rho->get_const_device_view(), + this->small_rho->get_const_device_view(), + this->small_delta->get_const_device_view(), this->small_stop); GKO_ASSERT_MTX_NEAR(this->small_beta, l({{4.5, 3.0}}), 0); GKO_ASSERT_MTX_NEAR(this->small_p, l({{-1.0, 4.0}, {-1.0, 4.0}}), 0); @@ -279,11 +297,16 @@ TYPED_TEST(PipeCg, KernelStep2DivByZero) this->small_delta->at(1) = 6; gko::kernels::reference::pipe_cg::step_2( - this->exec, this->small_beta.get(), this->small_p.get(), - this->small_q.get(), this->small_f.get(), this->small_g.get(), - this->small_z1.get(), this->small_w.get(), this->small_m.get(), - this->small_n.get(), this->small_prev_rho.get(), this->small_rho.get(), - this->small_delta.get(), &this->small_stop); + this->exec, this->small_beta->get_device_view(), + this->small_p->get_device_view(), this->small_q->get_device_view(), + this->small_f->get_device_view(), this->small_g->get_device_view(), + this->small_z1->get_const_device_view(), + this->small_w->get_const_device_view(), + this->small_m->get_const_device_view(), + this->small_n->get_const_device_view(), + this->small_prev_rho->get_const_device_view(), + this->small_rho->get_const_device_view(), + this->small_delta->get_const_device_view(), this->small_stop); GKO_ASSERT_MTX_NEAR(this->small_beta, this->small_delta, 0); GKO_ASSERT_MTX_NEAR(this->small_p, this->small_z1, 0); @@ -316,11 +339,16 @@ TYPED_TEST(PipeCg, KernelStep2BetaZero) this->small_stop.get_data()[1].reset(); gko::kernels::reference::pipe_cg::step_2( - this->exec, this->small_beta.get(), this->small_p.get(), - this->small_q.get(), this->small_f.get(), this->small_g.get(), - this->small_z1.get(), this->small_w.get(), this->small_m.get(), - this->small_n.get(), this->small_prev_rho.get(), this->small_rho.get(), - this->small_delta.get(), &this->small_stop); + this->exec, this->small_beta->get_device_view(), + this->small_p->get_device_view(), this->small_q->get_device_view(), + this->small_f->get_device_view(), this->small_g->get_device_view(), + this->small_z1->get_const_device_view(), + this->small_w->get_const_device_view(), + this->small_m->get_const_device_view(), + this->small_n->get_const_device_view(), + this->small_prev_rho->get_const_device_view(), + this->small_rho->get_const_device_view(), + this->small_delta->get_const_device_view(), this->small_stop); GKO_ASSERT_MTX_NEAR(this->small_beta, this->small_delta, 0); GKO_ASSERT_MTX_NEAR(this->small_p, l({{2.0, 1.5}, {2.0, 1.5}}), diff --git a/test/base/kernel_launch_generic.cpp b/test/base/kernel_launch_generic.cpp index 4ccf27aa7a4..93eadb8d573 100644 --- a/test/base/kernel_launch_generic.cpp +++ b/test/base/kernel_launch_generic.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -163,7 +163,7 @@ void run1d(std::shared_ptr exec, Mtx* m) static_assert(is_same::value, "type"); static_assert(is_same::value, "dummy"); - bool pointers_correct = d.data == d_ptr && d2.data == d_ptr; + bool pointers_correct = d.values == d_ptr && d2.values == d_ptr; bool strides_correct = d.stride == 5 && d2.stride == 5; bool accessors_2d_correct = &d(0, 0) == d_ptr && &d(1, 0) == d_ptr + d.stride && @@ -176,8 +176,9 @@ void run1d(std::shared_ptr exec, Mtx* m) d(i / 4, i % 4) = 0; } }, - 16, m, static_cast(m), m->get_const_values(), - move_only_val); + 16, m->get_device_view(), + static_cast(m)->get_const_device_view(), + m->get_const_values(), move_only_val); } TEST_F(KernelLaunch, Runs1DDense) @@ -258,8 +259,8 @@ void run2d(std::shared_ptr exec, Mtx* m1, Mtx* m2, Mtx* m3) static_assert(is_same::value, "type"); static_assert(is_same::value, "dummy"); - bool pointers_correct = d.data == d_ptr && d2.data == d_ptr && - d3.data == d2_ptr && d4 == d3_ptr; + bool pointers_correct = d.values == d_ptr && d2.values == d_ptr && + d3.values == d2_ptr && d4 == d3_ptr; bool strides_correct = d.stride == 5 && d2.stride == 5 && d3.stride == 6; bool accessors_2d_correct = @@ -275,11 +276,12 @@ void run2d(std::shared_ptr exec, Mtx* m1, Mtx* m2, Mtx* m3) d(i, j) = 0; } }, - dim<2>{4, 4}, m2->get_stride(), m1, static_cast(m1), - m1->get_const_values(), - gko::kernels::GKO_DEVICE_NAMESPACE::default_stride(m2), - gko::kernels::GKO_DEVICE_NAMESPACE::row_vector(m3), m2->get_values(), - m3->get_values(), move_only_val); + dim<2>{4, 4}, m2->get_stride(), m1->get_device_view(), + m1->get_const_device_view(), m1->get_const_values(), + gko::kernels::GKO_DEVICE_NAMESPACE::default_stride( + m2->get_device_view()), + gko::kernels::GKO_DEVICE_NAMESPACE::row_vector(m3->get_device_view()), + m2->get_values(), m3->get_values(), move_only_val); } TEST_F(KernelLaunch, Runs2DDense) diff --git a/test/distributed/vector_kernels.cpp b/test/distributed/vector_kernels.cpp index 6259af78481..279a8594c52 100644 --- a/test/distributed/vector_kernels.cpp +++ b/test/distributed/vector_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -56,9 +56,10 @@ class Vector : public CommonTestFixture { auto d_output = gko::clone(exec, output); gko::kernels::reference::distributed_vector::build_local( - ref, input, partition.get(), part, output.get()); + ref, input, partition.get(), part, output->get_device_view()); gko::kernels::GKO_DEVICE_NAMESPACE::distributed_vector::build_local( - exec, d_input, d_partition.get(), part, d_output.get()); + exec, d_input, d_partition.get(), part, + d_output->get_device_view()); GKO_ASSERT_MTX_NEAR(output, d_output, 0); } diff --git a/test/matrix/csr_kernels2.cpp b/test/matrix/csr_kernels2.cpp index 0e635789c84..0c80dfb0b1c 100644 --- a/test/matrix/csr_kernels2.cpp +++ b/test/matrix/csr_kernels2.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -1662,9 +1662,9 @@ TEST_F(Csr, CalculateNnzPerRowInSpanIsEquivalentToRef) auto drow_nnz = gko::array(this->exec, row_nnz); gko::kernels::reference::csr::calculate_nonzeros_per_row_in_span( - this->ref, this->mtx2.get(), rspan, cspan, &row_nnz); + this->ref, this->mtx2.get(), rspan, cspan, row_nnz); gko::kernels::GKO_DEVICE_NAMESPACE::csr::calculate_nonzeros_per_row_in_span( - this->exec, this->dmtx2.get(), rspan, cspan, &drow_nnz); + this->exec, this->dmtx2.get(), rspan, cspan, drow_nnz); GKO_ASSERT_ARRAY_EQ(row_nnz, drow_nnz); } @@ -1680,7 +1680,7 @@ TEST_F(Csr, ComputeSubmatrixIsEquivalentToRef) auto row_nnz = gko::array(this->ref, rspan.length() + 1); row_nnz.fill(gko::zero()); gko::kernels::reference::csr::calculate_nonzeros_per_row_in_span( - this->ref, this->mtx2.get(), rspan, cspan, &row_nnz); + this->ref, this->mtx2.get(), rspan, cspan, row_nnz); gko::kernels::reference::components::prefix_sum_nonnegative( this->ref, row_nnz.get_data(), row_nnz.get_size()); auto num_nnz = row_nnz.get_data()[rspan.length()]; diff --git a/test/matrix/dense_kernels.cpp b/test/matrix/dense_kernels.cpp index 727d6ee7d2f..98a6d6d4436 100644 --- a/test/matrix/dense_kernels.cpp +++ b/test/matrix/dense_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -598,9 +598,9 @@ TEST_F(Dense, CalculateNNZPerRowIsEquivalentToRef) dnnz_per_row.resize_and_reset(dx->get_size()[0]); gko::kernels::reference::dense::count_nonzeros_per_row( - ref, x.get(), nnz_per_row.get_data()); + ref, x->get_const_device_view(), nnz_per_row.get_data()); gko::kernels::GKO_DEVICE_NAMESPACE::dense::count_nonzeros_per_row( - exec, dx.get(), dnnz_per_row.get_data()); + exec, dx->get_const_device_view(), dnnz_per_row.get_data()); auto tmp = gko::array(ref, dnnz_per_row); for (gko::size_type i = 0; i < nnz_per_row.get_size(); i++) { @@ -615,10 +615,10 @@ TEST_F(Dense, ComputeMaxNNZPerRowIsEquivalentToRef) gko::size_type max_nnz; gko::size_type dmax_nnz; - gko::kernels::reference::dense::compute_max_nnz_per_row(ref, x.get(), - max_nnz); + gko::kernels::reference::dense::compute_max_nnz_per_row( + ref, x->get_const_device_view(), max_nnz); gko::kernels::GKO_DEVICE_NAMESPACE::dense::compute_max_nnz_per_row( - exec, dx.get(), dmax_nnz); + exec, dx->get_const_device_view(), dmax_nnz); ASSERT_EQ(max_nnz, dmax_nnz); } @@ -2005,9 +2005,9 @@ TEST_F(Dense, ComputeNorm2SquaredIsEquivalentToRef) gko::array dtmp{exec}; gko::kernels::reference::dense::compute_squared_norm2( - ref, x.get(), norm_expected.get(), tmp); + ref, x->get_const_device_view(), norm_expected->get_device_view(), tmp); gko::kernels::GKO_DEVICE_NAMESPACE::dense::compute_squared_norm2( - exec, dx.get(), dnorm.get(), dtmp); + exec, dx->get_const_device_view(), dnorm->get_device_view(), dtmp); GKO_ASSERT_MTX_NEAR(dnorm, norm_expected, r::value); } @@ -2021,8 +2021,9 @@ TEST_F(Dense, ComputesSqrt) rand_engine, ref); auto dmtx = gko::clone(exec, mtx); - gko::kernels::reference::dense::compute_sqrt(ref, mtx.get()); - gko::kernels::GKO_DEVICE_NAMESPACE::dense::compute_sqrt(exec, dmtx.get()); + gko::kernels::reference::dense::compute_sqrt(ref, mtx->get_device_view()); + gko::kernels::GKO_DEVICE_NAMESPACE::dense::compute_sqrt( + exec, dmtx->get_device_view()); GKO_ASSERT_MTX_NEAR(mtx, dmtx, r::value); } diff --git a/test/matrix/device_views.cpp b/test/matrix/device_views.cpp index 87c2cefa3bf..874e376b5e4 100644 --- a/test/matrix/device_views.cpp +++ b/test/matrix/device_views.cpp @@ -37,7 +37,7 @@ void assert_dense_view(std::shared_ptr exec) gko::matrix::view::dense view{gko::dim<2>{1, 2}, 3, values}; if (view.size == gko::dim<2>(1, 2) && view.stride == 3 && - view.data == values && &view(0, 0) == &values[0] && + view.values == values && &view(0, 0) == &values[0] && &view(1, 0) == &values[3] && &view(1, 1) == &values[4] && view(1, 1) == gko::one(view(1, 1))) { *correct = true; diff --git a/test/preconditioner/isai_kernels.cpp b/test/preconditioner/isai_kernels.cpp index 8ac1ad1e8ba..7c614ad8e76 100644 --- a/test/preconditioner/isai_kernels.cpp +++ b/test/preconditioner/isai_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -310,10 +310,11 @@ TEST_F(Isai, IsaiGenerateExcessLinverseLongIsEquivalentToRef) gko::kernels::reference::isai::generate_excess_system( ref, mtx.get(), inverse.get(), a1.get_const_data(), a2.get_const_data(), - excess.get(), e_rhs.get(), 0, num_rows); + excess.get(), e_rhs->get_device_view(), 0, num_rows); gko::kernels::GKO_DEVICE_NAMESPACE::isai::generate_excess_system( exec, d_mtx.get(), d_inverse.get(), da1.get_const_data(), - da2.get_const_data(), dexcess.get(), de_rhs.get(), 0, num_rows); + da2.get_const_data(), dexcess.get(), de_rhs->get_device_view(), 0, + num_rows); GKO_ASSERT_MTX_EQ_SPARSITY(excess, dexcess); GKO_ASSERT_MTX_NEAR(excess, dexcess, 0); @@ -341,10 +342,11 @@ TEST_F(Isai, IsaiGenerateExcessUinverseLongIsEquivalentToRef) gko::kernels::reference::isai::generate_excess_system( ref, mtx.get(), inverse.get(), a1.get_const_data(), a2.get_const_data(), - excess.get(), e_rhs.get(), 0, num_rows); + excess.get(), e_rhs->get_device_view(), 0, num_rows); gko::kernels::GKO_DEVICE_NAMESPACE::isai::generate_excess_system( exec, d_mtx.get(), d_inverse.get(), da1.get_const_data(), - da2.get_const_data(), dexcess.get(), de_rhs.get(), 0, num_rows); + da2.get_const_data(), dexcess.get(), de_rhs->get_device_view(), 0, + num_rows); GKO_ASSERT_MTX_EQ_SPARSITY(excess, dexcess); GKO_ASSERT_MTX_NEAR(excess, dexcess, 0); @@ -372,10 +374,11 @@ TEST_F(Isai, IsaiGenerateExcessAinverseLongIsEquivalentToRef) gko::kernels::reference::isai::generate_excess_system( ref, mtx.get(), inverse.get(), a1.get_const_data(), a2.get_const_data(), - excess.get(), e_rhs.get(), 0, num_rows); + excess.get(), e_rhs->get_device_view(), 0, num_rows); gko::kernels::GKO_DEVICE_NAMESPACE::isai::generate_excess_system( exec, d_mtx.get(), d_inverse.get(), da1.get_const_data(), - da2.get_const_data(), dexcess.get(), de_rhs.get(), 0, num_rows); + da2.get_const_data(), dexcess.get(), de_rhs->get_device_view(), 0, + num_rows); GKO_ASSERT_MTX_EQ_SPARSITY(excess, dexcess); GKO_ASSERT_MTX_NEAR(excess, dexcess, 0); @@ -403,10 +406,11 @@ TEST_F(Isai, IsaiGenerateExcessSpdinverseLongIsEquivalentToRef) gko::kernels::reference::isai::generate_excess_system( ref, mtx.get(), inverse.get(), a1.get_const_data(), a2.get_const_data(), - excess.get(), e_rhs.get(), 0, num_rows); + excess.get(), e_rhs->get_device_view(), 0, num_rows); gko::kernels::GKO_DEVICE_NAMESPACE::isai::generate_excess_system( exec, d_mtx.get(), d_inverse.get(), da1.get_const_data(), - da2.get_const_data(), dexcess.get(), de_rhs.get(), 0, num_rows); + da2.get_const_data(), dexcess.get(), de_rhs->get_device_view(), 0, + num_rows); GKO_ASSERT_MTX_EQ_SPARSITY(excess, dexcess); GKO_ASSERT_MTX_NEAR(excess, dexcess, 0); @@ -434,10 +438,11 @@ TEST_F(Isai, IsaiGeneratePartialExcessIsEquivalentToRef) gko::kernels::reference::isai::generate_excess_system( ref, mtx.get(), inverse.get(), a1.get_const_data(), a2.get_const_data(), - excess.get(), e_rhs.get(), 5u, 10u); + excess.get(), e_rhs->get_device_view(), 5u, 10u); gko::kernels::GKO_DEVICE_NAMESPACE::isai::generate_excess_system( exec, d_mtx.get(), d_inverse.get(), da1.get_const_data(), - da2.get_const_data(), dexcess.get(), de_rhs.get(), 5u, 10u); + da2.get_const_data(), dexcess.get(), de_rhs->get_device_view(), 5u, + 10u); GKO_ASSERT_MTX_EQ_SPARSITY(excess, dexcess); GKO_ASSERT_MTX_NEAR(excess, dexcess, 0); @@ -462,9 +467,9 @@ TEST_F(Isai, IsaiScaleExcessSolutionIsEquivalentToRef) d_inverse->copy_from(inverse); gko::kernels::reference::isai::scale_excess_solution( - ref, a1.get_const_data(), e_rhs.get(), 0, num_rows); + ref, a1.get_const_data(), e_rhs->get_device_view(), 0, num_rows); gko::kernels::GKO_DEVICE_NAMESPACE::isai::scale_excess_solution( - exec, da1.get_const_data(), de_rhs.get(), 0, num_rows); + exec, da1.get_const_data(), de_rhs->get_device_view(), 0, num_rows); GKO_ASSERT_MTX_NEAR(e_rhs, de_rhs, 0); } @@ -485,9 +490,9 @@ TEST_F(Isai, IsaiScalePartialExcessSolutionIsEquivalentToRef) auto de_rhs = gko::clone(exec, e_rhs); gko::kernels::reference::isai::scale_excess_solution( - ref, a1.get_const_data(), e_rhs.get(), 5u, 10u); + ref, a1.get_const_data(), e_rhs->get_device_view(), 5u, 10u); gko::kernels::GKO_DEVICE_NAMESPACE::isai::scale_excess_solution( - exec, da1.get_const_data(), de_rhs.get(), 5u, 10u); + exec, da1.get_const_data(), de_rhs->get_device_view(), 5u, 10u); GKO_ASSERT_MTX_NEAR(e_rhs, de_rhs, 0); } @@ -509,9 +514,11 @@ TEST_F(Isai, IsaiScatterExcessSolutionLIsEquivalentToRef) d_inverse->copy_from(inverse); gko::kernels::reference::isai::scatter_excess_solution( - ref, a1.get_const_data(), e_rhs.get(), inverse.get(), 0, num_rows); + ref, a1.get_const_data(), e_rhs->get_const_device_view(), inverse.get(), + 0, num_rows); gko::kernels::GKO_DEVICE_NAMESPACE::isai::scatter_excess_solution( - exec, da1.get_const_data(), de_rhs.get(), d_inverse.get(), 0, num_rows); + exec, da1.get_const_data(), de_rhs->get_const_device_view(), + d_inverse.get(), 0, num_rows); GKO_ASSERT_MTX_NEAR(inverse, d_inverse, 0); ASSERT_GT(e_dim, 0); @@ -535,9 +542,11 @@ TEST_F(Isai, IsaiScatterExcessSolutionUIsEquivalentToRef) d_inverse->copy_from(inverse); gko::kernels::reference::isai::scatter_excess_solution( - ref, a1.get_const_data(), e_rhs.get(), inverse.get(), 0, num_rows); + ref, a1.get_const_data(), e_rhs->get_const_device_view(), inverse.get(), + 0, num_rows); gko::kernels::GKO_DEVICE_NAMESPACE::isai::scatter_excess_solution( - exec, da1.get_const_data(), de_rhs.get(), d_inverse.get(), 0, num_rows); + exec, da1.get_const_data(), de_rhs->get_const_device_view(), + d_inverse.get(), 0, num_rows); GKO_ASSERT_MTX_NEAR(inverse, d_inverse, 0); ASSERT_GT(e_dim, 0); @@ -561,9 +570,11 @@ TEST_F(Isai, IsaiScatterExcessSolutionAIsEquivalentToRef) d_inverse->copy_from(inverse); gko::kernels::reference::isai::scatter_excess_solution( - ref, a1.get_const_data(), e_rhs.get(), inverse.get(), 0, num_rows); + ref, a1.get_const_data(), e_rhs->get_const_device_view(), inverse.get(), + 0, num_rows); gko::kernels::GKO_DEVICE_NAMESPACE::isai::scatter_excess_solution( - exec, da1.get_const_data(), de_rhs.get(), d_inverse.get(), 0, num_rows); + exec, da1.get_const_data(), de_rhs->get_const_device_view(), + d_inverse.get(), 0, num_rows); GKO_ASSERT_MTX_NEAR(inverse, d_inverse, 0); ASSERT_GT(e_dim, 0); @@ -587,9 +598,11 @@ TEST_F(Isai, IsaiScatterExcessSolutionSpdIsEquivalentToRef) d_inverse->copy_from(inverse); gko::kernels::reference::isai::scatter_excess_solution( - ref, a1.get_const_data(), e_rhs.get(), inverse.get(), 0, num_rows); + ref, a1.get_const_data(), e_rhs->get_const_device_view(), inverse.get(), + 0, num_rows); gko::kernels::GKO_DEVICE_NAMESPACE::isai::scatter_excess_solution( - exec, da1.get_const_data(), de_rhs.get(), d_inverse.get(), 0, num_rows); + exec, da1.get_const_data(), de_rhs->get_const_device_view(), + d_inverse.get(), 0, num_rows); GKO_ASSERT_MTX_NEAR(inverse, d_inverse, 0); ASSERT_GT(e_dim, 0); @@ -613,9 +626,11 @@ TEST_F(Isai, IsaiScatterPartialExcessSolutionIsEquivalentToRef) d_inverse->copy_from(inverse); gko::kernels::reference::isai::scatter_excess_solution( - ref, a1.get_const_data(), e_rhs.get(), inverse.get(), 5u, 10u); + ref, a1.get_const_data(), e_rhs->get_const_device_view(), inverse.get(), + 5u, 10u); gko::kernels::GKO_DEVICE_NAMESPACE::isai::scatter_excess_solution( - exec, da1.get_const_data(), de_rhs.get(), d_inverse.get(), 5u, 10u); + exec, da1.get_const_data(), de_rhs->get_const_device_view(), + d_inverse.get(), 5u, 10u); GKO_ASSERT_MTX_NEAR(inverse, d_inverse, 0); ASSERT_GT(e_dim, 0); diff --git a/test/solver/bicg_kernels.cpp b/test/solver/bicg_kernels.cpp index 3f3b6a01ae1..5dab6b4930c 100644 --- a/test/solver/bicg_kernels.cpp +++ b/test/solver/bicg_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -67,13 +67,12 @@ class Bicg : public CommonTestFixture { // check correct handling for zero values beta->at(2) = 0.0; prev_rho->at(2) = 0.0; - stop_status = - std::make_unique>(ref, n); - for (size_t i = 0; i < stop_status->get_size(); ++i) { - stop_status->get_data()[i].reset(); + stop_status = gko::array(ref, n); + for (size_t i = 0; i < stop_status.get_size(); ++i) { + stop_status.get_data()[i].reset(); } // check correct handling for stopped columns - stop_status->get_data()[1].stop(1); + stop_status.get_data()[1].stop(1); d_b = gko::clone(exec, b); d_r = gko::clone(exec, r); @@ -88,8 +87,7 @@ class Bicg : public CommonTestFixture { d_beta = gko::clone(exec, beta); d_prev_rho = gko::clone(exec, prev_rho); d_rho = gko::clone(exec, rho); - d_stop_status = std::make_unique>( - exec, *stop_status); + d_stop_status = gko::array(exec, stop_status); } std::default_random_engine rand_engine; @@ -108,7 +106,7 @@ class Bicg : public CommonTestFixture { std::unique_ptr prev_rho; std::unique_ptr rho; std::shared_ptr mtx_ani; - std::unique_ptr> stop_status; + gko::array stop_status; std::unique_ptr d_b; std::unique_ptr d_r; @@ -124,7 +122,7 @@ class Bicg : public CommonTestFixture { std::unique_ptr d_prev_rho; std::unique_ptr d_rho; std::shared_ptr d_mtx_ani; - std::unique_ptr> d_stop_status; + gko::array d_stop_status; }; @@ -133,12 +131,17 @@ TEST_F(Bicg, BicgInitializeIsEquivalentToRef) initialize_data(); gko::kernels::reference::bicg::initialize( - ref, b.get(), r.get(), z.get(), p.get(), q.get(), prev_rho.get(), - rho.get(), r2.get(), z2.get(), p2.get(), q2.get(), stop_status.get()); + ref, b->get_const_device_view(), r->get_device_view(), + z->get_device_view(), p->get_device_view(), q->get_device_view(), + prev_rho->get_device_view(), rho->get_device_view(), + r2->get_device_view(), z2->get_device_view(), p2->get_device_view(), + q2->get_device_view(), stop_status); gko::kernels::GKO_DEVICE_NAMESPACE::bicg::initialize( - exec, d_b.get(), d_r.get(), d_z.get(), d_p.get(), d_q.get(), - d_prev_rho.get(), d_rho.get(), d_r2.get(), d_z2.get(), d_p2.get(), - d_q2.get(), d_stop_status.get()); + exec, d_b->get_const_device_view(), d_r->get_device_view(), + d_z->get_device_view(), d_p->get_device_view(), d_q->get_device_view(), + d_prev_rho->get_device_view(), d_rho->get_device_view(), + d_r2->get_device_view(), d_z2->get_device_view(), + d_p2->get_device_view(), d_q2->get_device_view(), d_stop_status); GKO_ASSERT_MTX_NEAR(d_r, r, ::r::value); GKO_ASSERT_MTX_NEAR(d_z, z, ::r::value); @@ -150,7 +153,7 @@ TEST_F(Bicg, BicgInitializeIsEquivalentToRef) GKO_ASSERT_MTX_NEAR(d_q2, q2, ::r::value); GKO_ASSERT_MTX_NEAR(d_prev_rho, prev_rho, ::r::value); GKO_ASSERT_MTX_NEAR(d_rho, rho, ::r::value); - GKO_ASSERT_ARRAY_EQ(*d_stop_status, *stop_status); + GKO_ASSERT_ARRAY_EQ(d_stop_status, stop_status); } @@ -158,12 +161,16 @@ TEST_F(Bicg, BicgStep1IsEquivalentToRef) { initialize_data(); - gko::kernels::reference::bicg::step_1(ref, p.get(), z.get(), p2.get(), - z2.get(), rho.get(), prev_rho.get(), - stop_status.get()); + gko::kernels::reference::bicg::step_1( + ref, p->get_device_view(), z->get_const_device_view(), + p2->get_device_view(), z2->get_const_device_view(), + rho->get_const_device_view(), prev_rho->get_const_device_view(), + stop_status); gko::kernels::GKO_DEVICE_NAMESPACE::bicg::step_1( - exec, d_p.get(), d_z.get(), d_p2.get(), d_z2.get(), d_rho.get(), - d_prev_rho.get(), d_stop_status.get()); + exec, d_p->get_device_view(), d_z->get_const_device_view(), + d_p2->get_device_view(), d_z2->get_const_device_view(), + d_rho->get_const_device_view(), d_prev_rho->get_const_device_view(), + d_stop_status); GKO_ASSERT_MTX_NEAR(d_p, p, ::r::value); GKO_ASSERT_MTX_NEAR(d_z, z, ::r::value); @@ -177,11 +184,16 @@ TEST_F(Bicg, BicgStep2IsEquivalentToRef) initialize_data(); gko::kernels::reference::bicg::step_2( - ref, x.get(), r.get(), r2.get(), p.get(), q.get(), q2.get(), beta.get(), - rho.get(), stop_status.get()); + ref, x->get_device_view(), r->get_device_view(), r2->get_device_view(), + p->get_const_device_view(), q->get_const_device_view(), + q2->get_const_device_view(), beta->get_const_device_view(), + rho->get_const_device_view(), stop_status); gko::kernels::GKO_DEVICE_NAMESPACE::bicg::step_2( - exec, d_x.get(), d_r.get(), d_r2.get(), d_p.get(), d_q.get(), - d_q2.get(), d_beta.get(), d_rho.get(), d_stop_status.get()); + exec, d_x->get_device_view(), d_r->get_device_view(), + d_r2->get_device_view(), d_p->get_const_device_view(), + d_q->get_const_device_view(), d_q2->get_const_device_view(), + d_beta->get_const_device_view(), d_rho->get_const_device_view(), + d_stop_status); GKO_ASSERT_MTX_NEAR(d_x, x, ::r::value); GKO_ASSERT_MTX_NEAR(d_r, r, ::r::value); diff --git a/test/solver/bicgstab_kernels.cpp b/test/solver/bicgstab_kernels.cpp index 9716acd86cb..2d95573b024 100644 --- a/test/solver/bicgstab_kernels.cpp +++ b/test/solver/bicgstab_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -91,13 +91,12 @@ class Bicgstab : public CommonTestFixture { beta->at(2) = 0.0; omega->at(2) = 0.0; omega->at(3) = 0.0; - stop_status = - std::make_unique>(ref, n); + stop_status = gko::array(ref, n); for (size_t i = 0; i < n; ++i) { - stop_status->get_data()[i].reset(); + stop_status.get_data()[i].reset(); } // check correct handling for stopped columns - stop_status->get_data()[1].stop(1); + stop_status.get_data()[1].stop(1); d_x = gko::clone(exec, x); d_b = gko::clone(exec, b); @@ -115,8 +114,7 @@ class Bicgstab : public CommonTestFixture { d_beta = gko::clone(exec, beta); d_gamma = gko::clone(exec, gamma); d_omega = gko::clone(exec, omega); - d_stop_status = std::make_unique>( - exec, *stop_status); + d_stop_status = gko::array(exec, stop_status); } std::default_random_engine rand_engine; @@ -142,7 +140,7 @@ class Bicgstab : public CommonTestFixture { std::unique_ptr beta; std::unique_ptr gamma; std::unique_ptr omega; - std::unique_ptr> stop_status; + gko::array stop_status; std::unique_ptr d_x; std::unique_ptr d_b; @@ -160,7 +158,7 @@ class Bicgstab : public CommonTestFixture { std::unique_ptr d_beta; std::unique_ptr d_gamma; std::unique_ptr d_omega; - std::unique_ptr> d_stop_status; + gko::array d_stop_status; }; @@ -169,14 +167,21 @@ TEST_F(Bicgstab, BicgstabInitializeIsEquivalentToRef) initialize_data(); gko::kernels::reference::bicgstab::initialize( - ref, b.get(), r.get(), rr.get(), y.get(), s.get(), t.get(), z.get(), - v.get(), p.get(), prev_rho.get(), rho.get(), alpha.get(), beta.get(), - gamma.get(), omega.get(), stop_status.get()); + ref, b->get_const_device_view(), r->get_device_view(), + rr->get_device_view(), y->get_device_view(), s->get_device_view(), + t->get_device_view(), z->get_device_view(), v->get_device_view(), + p->get_device_view(), prev_rho->get_device_view(), + rho->get_device_view(), alpha->get_device_view(), + beta->get_device_view(), gamma->get_device_view(), + omega->get_device_view(), stop_status); gko::kernels::GKO_DEVICE_NAMESPACE::bicgstab::initialize( - exec, d_b.get(), d_r.get(), d_rr.get(), d_y.get(), d_s.get(), d_t.get(), - d_z.get(), d_v.get(), d_p.get(), d_prev_rho.get(), d_rho.get(), - d_alpha.get(), d_beta.get(), d_gamma.get(), d_omega.get(), - d_stop_status.get()); + exec, d_b->get_const_device_view(), d_r->get_device_view(), + d_rr->get_device_view(), d_y->get_device_view(), d_s->get_device_view(), + d_t->get_device_view(), d_z->get_device_view(), d_v->get_device_view(), + d_p->get_device_view(), d_prev_rho->get_device_view(), + d_rho->get_device_view(), d_alpha->get_device_view(), + d_beta->get_device_view(), d_gamma->get_device_view(), + d_omega->get_device_view(), d_stop_status); GKO_EXPECT_MTX_NEAR(d_r, r, ::r::value); GKO_EXPECT_MTX_NEAR(d_z, z, ::r::value); @@ -192,7 +197,7 @@ TEST_F(Bicgstab, BicgstabInitializeIsEquivalentToRef) GKO_EXPECT_MTX_NEAR(d_beta, beta, ::r::value); GKO_EXPECT_MTX_NEAR(d_gamma, gamma, ::r::value); GKO_EXPECT_MTX_NEAR(d_omega, omega, ::r::value); - GKO_ASSERT_ARRAY_EQ(*d_stop_status, *stop_status); + GKO_ASSERT_ARRAY_EQ(d_stop_status, stop_status); } @@ -201,11 +206,15 @@ TEST_F(Bicgstab, BicgstabStep1IsEquivalentToRef) initialize_data(); gko::kernels::reference::bicgstab::step_1( - ref, r.get(), p.get(), v.get(), rho.get(), prev_rho.get(), alpha.get(), - omega.get(), stop_status.get()); + ref, r->get_const_device_view(), p->get_device_view(), + v->get_const_device_view(), rho->get_const_device_view(), + prev_rho->get_const_device_view(), alpha->get_const_device_view(), + omega->get_const_device_view(), stop_status); gko::kernels::GKO_DEVICE_NAMESPACE::bicgstab::step_1( - exec, d_r.get(), d_p.get(), d_v.get(), d_rho.get(), d_prev_rho.get(), - d_alpha.get(), d_omega.get(), d_stop_status.get()); + exec, d_r->get_const_device_view(), d_p->get_device_view(), + d_v->get_const_device_view(), d_rho->get_const_device_view(), + d_prev_rho->get_const_device_view(), d_alpha->get_const_device_view(), + d_omega->get_const_device_view(), d_stop_status); GKO_ASSERT_MTX_NEAR(d_p, p, ::r::value); } @@ -215,12 +224,15 @@ TEST_F(Bicgstab, BicgstabStep2IsEquivalentToRef) { initialize_data(); - gko::kernels::reference::bicgstab::step_2(ref, r.get(), s.get(), v.get(), - rho.get(), alpha.get(), - beta.get(), stop_status.get()); + gko::kernels::reference::bicgstab::step_2( + ref, r->get_const_device_view(), s->get_device_view(), + v->get_const_device_view(), rho->get_const_device_view(), + alpha->get_device_view(), beta->get_const_device_view(), stop_status); gko::kernels::GKO_DEVICE_NAMESPACE::bicgstab::step_2( - exec, d_r.get(), d_s.get(), d_v.get(), d_rho.get(), d_alpha.get(), - d_beta.get(), d_stop_status.get()); + exec, d_r->get_const_device_view(), d_s->get_device_view(), + d_v->get_const_device_view(), d_rho->get_const_device_view(), + d_alpha->get_device_view(), d_beta->get_const_device_view(), + d_stop_status); GKO_ASSERT_MTX_NEAR(d_alpha, alpha, ::r::value); GKO_ASSERT_MTX_NEAR(d_s, s, ::r::value); @@ -232,12 +244,18 @@ TEST_F(Bicgstab, BicgstabStep3IsEquivalentToRef) initialize_data(); gko::kernels::reference::bicgstab::step_3( - ref, x.get(), r.get(), s.get(), t.get(), y.get(), z.get(), alpha.get(), - beta.get(), gamma.get(), omega.get(), stop_status.get()); + ref, x->get_device_view(), r->get_device_view(), + s->get_const_device_view(), t->get_const_device_view(), + y->get_const_device_view(), z->get_const_device_view(), + alpha->get_const_device_view(), beta->get_const_device_view(), + gamma->get_const_device_view(), omega->get_device_view(), stop_status); gko::kernels::GKO_DEVICE_NAMESPACE::bicgstab::step_3( - exec, d_x.get(), d_r.get(), d_s.get(), d_t.get(), d_y.get(), d_z.get(), - d_alpha.get(), d_beta.get(), d_gamma.get(), d_omega.get(), - d_stop_status.get()); + exec, d_x->get_device_view(), d_r->get_device_view(), + d_s->get_const_device_view(), d_t->get_const_device_view(), + d_y->get_const_device_view(), d_z->get_const_device_view(), + d_alpha->get_const_device_view(), d_beta->get_const_device_view(), + d_gamma->get_const_device_view(), d_omega->get_device_view(), + d_stop_status); GKO_ASSERT_MTX_NEAR(d_omega, omega, ::r::value); GKO_ASSERT_MTX_NEAR(d_x, x, ::r::value); @@ -266,24 +284,25 @@ TEST_F(Bicgstab, BicgstabFinalizeIsEquivalentToRefWithoutRaceCondition) d_x = x->clone(exec); d_y = y->clone(exec); d_alpha = alpha->clone(exec); - stop_status = std::make_unique>(ref, n); + stop_status = gko::array(ref, n); for (size_t i = 0; i < n; ++i) { - stop_status->get_data()[i].reset(); + stop_status.get_data()[i].reset(); } // check correct handling for stopped columns - stop_status->get_data()[1].stop(1); + stop_status.get_data()[1].stop(1); // finalize only update the stopped one but not finished yet - stop_status->get_data()[0].stop(1, false); - d_stop_status = - std::make_unique>(exec, *stop_status); + stop_status.get_data()[0].stop(1, false); + d_stop_status = gko::array(exec, stop_status); - gko::kernels::reference::bicgstab::finalize(ref, x.get(), y.get(), - alpha.get(), stop_status.get()); + gko::kernels::reference::bicgstab::finalize( + ref, x->get_device_view(), y->get_const_device_view(), + alpha->get_const_device_view(), stop_status); gko::kernels::GKO_DEVICE_NAMESPACE::bicgstab::finalize( - exec, d_x.get(), d_y.get(), d_alpha.get(), d_stop_status.get()); + exec, d_x->get_device_view(), d_y->get_const_device_view(), + d_alpha->get_const_device_view(), d_stop_status); GKO_ASSERT_MTX_NEAR(d_x, x, ::r::value); - GKO_ASSERT_ARRAY_EQ(*d_stop_status, *stop_status); + GKO_ASSERT_ARRAY_EQ(d_stop_status, stop_status); } diff --git a/test/solver/cb_gmres_kernels.cpp b/test/solver/cb_gmres_kernels.cpp index 98eb295091b..3ad67afc870 100644 --- a/test/solver/cb_gmres_kernels.cpp +++ b/test/solver/cb_gmres_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -95,23 +95,21 @@ class CbGmres : public CommonTestFixture { residual_norm_collection = gen_mtx(default_krylov_dim_mixed + 1, n); givens_sin = gen_mtx(default_krylov_dim_mixed, n); givens_cos = gen_mtx(default_krylov_dim_mixed, n); - stop_status = - std::make_unique>(ref, n); - for (size_t i = 0; i < stop_status->get_size(); ++i) { - stop_status->get_data()[i].reset(); + stop_status = gko::array(ref, n); + for (size_t i = 0; i < stop_status.get_size(); ++i) { + stop_status.get_data()[i].reset(); } - reorth_status = - std::make_unique>(ref, n); - for (size_t i = 0; i < reorth_status->get_size(); ++i) { - reorth_status->get_data()[i].reset(); + reorth_status = gko::array(ref, n); + for (size_t i = 0; i < reorth_status.get_size(); ++i) { + reorth_status.get_data()[i].reset(); } - final_iter_nums = std::make_unique>(ref, n); - for (size_t i = 0; i < final_iter_nums->get_size(); ++i) { - final_iter_nums->get_data()[i] = 5; + final_iter_nums = gko::array(ref, n); + for (size_t i = 0; i < final_iter_nums.get_size(); ++i) { + final_iter_nums.get_data()[i] = 5; } - num_reorth = std::make_unique>(ref, n); - for (size_t i = 0; i < num_reorth->get_size(); ++i) { - num_reorth->get_data()[i] = 5; + num_reorth = gko::array(ref, n); + for (size_t i = 0; i < num_reorth.get_size(); ++i) { + num_reorth.get_data()[i] = 5; } d_x = gko::clone(exec, x); @@ -130,14 +128,10 @@ class CbGmres : public CommonTestFixture { d_residual_norm_collection = gko::clone(exec, residual_norm_collection); d_givens_sin = gko::clone(exec, givens_sin); d_givens_cos = gko::clone(exec, givens_cos); - d_stop_status = std::make_unique>( - exec, *stop_status); - d_reorth_status = std::make_unique>( - exec, *reorth_status); - d_final_iter_nums = std::make_unique>( - exec, *final_iter_nums); - d_num_reorth = - std::make_unique>(exec, *num_reorth); + d_stop_status = gko::array(exec, stop_status); + d_reorth_status = gko::array(exec, reorth_status); + d_final_iter_nums = gko::array(exec, final_iter_nums); + d_num_reorth = gko::array(exec, num_reorth); } void assert_krylov_bases_near() @@ -171,10 +165,10 @@ class CbGmres : public CommonTestFixture { std::unique_ptr residual_norm_collection; std::unique_ptr givens_sin; std::unique_ptr givens_cos; - std::unique_ptr> stop_status; - std::unique_ptr> reorth_status; - std::unique_ptr> final_iter_nums; - std::unique_ptr> num_reorth; + gko::array stop_status; + gko::array reorth_status; + gko::array final_iter_nums; + gko::array num_reorth; std::unique_ptr d_x; std::unique_ptr d_before_preconditioner; @@ -191,10 +185,10 @@ class CbGmres : public CommonTestFixture { std::unique_ptr d_residual_norm_collection; std::unique_ptr d_givens_sin; std::unique_ptr d_givens_cos; - std::unique_ptr> d_stop_status; - std::unique_ptr> d_reorth_status; - std::unique_ptr> d_final_iter_nums; - std::unique_ptr> d_num_reorth; + gko::array d_stop_status; + gko::array d_reorth_status; + gko::array d_final_iter_nums; + gko::array d_num_reorth; }; @@ -203,16 +197,18 @@ TEST_F(CbGmres, CbGmresInitialize1IsEquivalentToRef) initialize_data(); gko::kernels::reference::cb_gmres::initialize( - ref, b.get(), residual.get(), givens_sin.get(), givens_cos.get(), - stop_status.get(), default_krylov_dim_mixed); + ref, b->get_const_device_view(), residual->get_device_view(), + givens_sin->get_device_view(), givens_cos->get_device_view(), + stop_status, default_krylov_dim_mixed); gko::kernels::GKO_DEVICE_NAMESPACE::cb_gmres::initialize( - exec, d_b.get(), d_residual.get(), d_givens_sin.get(), - d_givens_cos.get(), d_stop_status.get(), default_krylov_dim_mixed); + exec, d_b->get_const_device_view(), d_residual->get_device_view(), + d_givens_sin->get_device_view(), d_givens_cos->get_device_view(), + d_stop_status, default_krylov_dim_mixed); GKO_ASSERT_MTX_NEAR(d_residual, residual, r::value); GKO_ASSERT_MTX_NEAR(d_givens_sin, givens_sin, r::value); GKO_ASSERT_MTX_NEAR(d_givens_cos, givens_cos, r::value); - GKO_ASSERT_ARRAY_EQ(*d_stop_status, *stop_status); + GKO_ASSERT_ARRAY_EQ(d_stop_status, stop_status); } TEST_F(CbGmres, CbGmresInitialize2IsEquivalentToRef) @@ -222,22 +218,26 @@ TEST_F(CbGmres, CbGmresInitialize2IsEquivalentToRef) gko::array dtmp{exec}; gko::kernels::reference::cb_gmres::restart( - ref, residual.get(), residual_norm.get(), - residual_norm_collection.get(), arnoldi_norm.get(), - range_helper.get_range(), next_krylov_basis.get(), - final_iter_nums.get(), tmp, default_krylov_dim_mixed); + ref, residual->get_const_device_view(), + residual_norm->get_device_view(), + residual_norm_collection->get_device_view(), + arnoldi_norm->get_device_view(), range_helper.get_range(), + next_krylov_basis->get_device_view(), final_iter_nums, tmp, + default_krylov_dim_mixed); gko::kernels::GKO_DEVICE_NAMESPACE::cb_gmres::restart( - exec, d_residual.get(), d_residual_norm.get(), - d_residual_norm_collection.get(), d_arnoldi_norm.get(), - d_range_helper.get_range(), d_next_krylov_basis.get(), - d_final_iter_nums.get(), dtmp, default_krylov_dim_mixed); + exec, d_residual->get_const_device_view(), + d_residual_norm->get_device_view(), + d_residual_norm_collection->get_device_view(), + d_arnoldi_norm->get_device_view(), d_range_helper.get_range(), + d_next_krylov_basis->get_device_view(), d_final_iter_nums, dtmp, + default_krylov_dim_mixed); GKO_ASSERT_MTX_NEAR(d_arnoldi_norm, arnoldi_norm, r::value); GKO_ASSERT_MTX_NEAR(d_residual_norm, residual_norm, r::value); GKO_ASSERT_MTX_NEAR(d_residual_norm_collection, residual_norm_collection, r::value); assert_krylov_bases_near(); - GKO_ASSERT_ARRAY_EQ(*d_final_iter_nums, *final_iter_nums); + GKO_ASSERT_ARRAY_EQ(d_final_iter_nums, final_iter_nums); } TEST_F(CbGmres, CbGmresStep1IsEquivalentToRef) @@ -246,18 +246,21 @@ TEST_F(CbGmres, CbGmresStep1IsEquivalentToRef) int iter = 5; gko::kernels::reference::cb_gmres::arnoldi( - ref, next_krylov_basis.get(), givens_sin.get(), givens_cos.get(), - residual_norm.get(), residual_norm_collection.get(), - range_helper.get_range(), hessenberg_iter.get(), buffer_iter.get(), - arnoldi_norm.get(), iter, final_iter_nums.get(), stop_status.get(), - reorth_status.get(), num_reorth.get()); + ref, next_krylov_basis->get_device_view(), + givens_sin->get_device_view(), givens_cos->get_device_view(), + residual_norm->get_device_view(), + residual_norm_collection->get_device_view(), range_helper.get_range(), + hessenberg_iter->get_device_view(), buffer_iter->get_device_view(), + arnoldi_norm->get_device_view(), iter, final_iter_nums, stop_status, + reorth_status, num_reorth); gko::kernels::GKO_DEVICE_NAMESPACE::cb_gmres::arnoldi( - exec, d_next_krylov_basis.get(), d_givens_sin.get(), d_givens_cos.get(), - d_residual_norm.get(), d_residual_norm_collection.get(), - d_range_helper.get_range(), d_hessenberg_iter.get(), - d_buffer_iter.get(), d_arnoldi_norm.get(), iter, - d_final_iter_nums.get(), d_stop_status.get(), d_reorth_status.get(), - d_num_reorth.get()); + exec, d_next_krylov_basis->get_device_view(), + d_givens_sin->get_device_view(), d_givens_cos->get_device_view(), + d_residual_norm->get_device_view(), + d_residual_norm_collection->get_device_view(), + d_range_helper.get_range(), d_hessenberg_iter->get_device_view(), + d_buffer_iter->get_device_view(), d_arnoldi_norm->get_device_view(), + iter, d_final_iter_nums, d_stop_status, d_reorth_status, d_num_reorth); GKO_ASSERT_MTX_NEAR(d_arnoldi_norm, arnoldi_norm, r::value); GKO_ASSERT_MTX_NEAR(d_next_krylov_basis, next_krylov_basis, @@ -270,7 +273,7 @@ TEST_F(CbGmres, CbGmresStep1IsEquivalentToRef) GKO_ASSERT_MTX_NEAR(d_hessenberg_iter, hessenberg_iter, r::value); assert_krylov_bases_near(); - GKO_ASSERT_ARRAY_EQ(*d_final_iter_nums, *final_iter_nums); + GKO_ASSERT_ARRAY_EQ(d_final_iter_nums, final_iter_nums); } TEST_F(CbGmres, CbGmresStep2IsEquivalentToRef) @@ -278,14 +281,15 @@ TEST_F(CbGmres, CbGmresStep2IsEquivalentToRef) initialize_data(); gko::kernels::reference::cb_gmres::solve_krylov( - ref, residual_norm_collection.get(), - range_helper.get_range().get_accessor().to_const(), hessenberg.get(), - y.get(), before_preconditioner.get(), final_iter_nums.get()); + ref, residual_norm_collection->get_const_device_view(), + range_helper.get_range().get_accessor().to_const(), + hessenberg->get_const_device_view(), y->get_device_view(), + before_preconditioner->get_device_view(), final_iter_nums); gko::kernels::GKO_DEVICE_NAMESPACE::cb_gmres::solve_krylov( - exec, d_residual_norm_collection.get(), + exec, d_residual_norm_collection->get_const_device_view(), d_range_helper.get_range().get_accessor().to_const(), - d_hessenberg.get(), d_y.get(), d_before_preconditioner.get(), - d_final_iter_nums.get()); + d_hessenberg->get_const_device_view(), d_y->get_device_view(), + d_before_preconditioner->get_device_view(), d_final_iter_nums); GKO_ASSERT_MTX_NEAR(d_y, y, r::value); GKO_ASSERT_MTX_NEAR(d_x, x, r::value); diff --git a/test/solver/cg_kernels.cpp b/test/solver/cg_kernels.cpp index 13e6905fa81..92d33669b78 100644 --- a/test/solver/cg_kernels.cpp +++ b/test/solver/cg_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -56,13 +56,12 @@ class Cg : public CommonTestFixture { // check correct handling for zero values beta->at(2) = 0.0; prev_rho->at(2) = 0.0; - stop_status = - std::make_unique>(ref, n); - for (size_t i = 0; i < stop_status->get_size(); ++i) { - stop_status->get_data()[i].reset(); + stop_status = gko::array(ref, n); + for (size_t i = 0; i < stop_status.get_size(); ++i) { + stop_status.get_data()[i].reset(); } // check correct handling for stopped columns - stop_status->get_data()[1].stop(1); + stop_status.get_data()[1].stop(1); d_b = gko::clone(exec, b); d_r = gko::clone(exec, r); @@ -73,8 +72,7 @@ class Cg : public CommonTestFixture { d_beta = gko::clone(exec, beta); d_prev_rho = gko::clone(exec, prev_rho); d_rho = gko::clone(exec, rho); - d_stop_status = std::make_unique>( - exec, *stop_status); + d_stop_status = gko::array(exec, stop_status); } std::default_random_engine rand_engine; @@ -88,7 +86,7 @@ class Cg : public CommonTestFixture { std::unique_ptr beta; std::unique_ptr prev_rho; std::unique_ptr rho; - std::unique_ptr> stop_status; + gko::array stop_status; std::unique_ptr d_b; std::unique_ptr d_r; @@ -99,7 +97,7 @@ class Cg : public CommonTestFixture { std::unique_ptr d_beta; std::unique_ptr d_prev_rho; std::unique_ptr d_rho; - std::unique_ptr> d_stop_status; + gko::array d_stop_status; }; @@ -107,12 +105,14 @@ TEST_F(Cg, CgInitializeIsEquivalentToRef) { initialize_data(); - gko::kernels::reference::cg::initialize(ref, b.get(), r.get(), z.get(), - p.get(), q.get(), prev_rho.get(), - rho.get(), stop_status.get()); + gko::kernels::reference::cg::initialize( + ref, b->get_const_device_view(), r->get_device_view(), + z->get_device_view(), p->get_device_view(), q->get_device_view(), + prev_rho->get_device_view(), rho->get_device_view(), stop_status); gko::kernels::GKO_DEVICE_NAMESPACE::cg::initialize( - exec, d_b.get(), d_r.get(), d_z.get(), d_p.get(), d_q.get(), - d_prev_rho.get(), d_rho.get(), d_stop_status.get()); + exec, d_b->get_const_device_view(), d_r->get_device_view(), + d_z->get_device_view(), d_p->get_device_view(), d_q->get_device_view(), + d_prev_rho->get_device_view(), d_rho->get_device_view(), d_stop_status); GKO_ASSERT_MTX_NEAR(d_r, r, ::r::value); GKO_ASSERT_MTX_NEAR(d_z, z, ::r::value); @@ -120,7 +120,7 @@ TEST_F(Cg, CgInitializeIsEquivalentToRef) GKO_ASSERT_MTX_NEAR(d_q, q, ::r::value); GKO_ASSERT_MTX_NEAR(d_prev_rho, prev_rho, ::r::value); GKO_ASSERT_MTX_NEAR(d_rho, rho, ::r::value); - GKO_ASSERT_ARRAY_EQ(*d_stop_status, *stop_status); + GKO_ASSERT_ARRAY_EQ(d_stop_status, stop_status); } @@ -128,11 +128,14 @@ TEST_F(Cg, CgStep1IsEquivalentToRef) { initialize_data(); - gko::kernels::reference::cg::step_1(ref, p.get(), z.get(), rho.get(), - prev_rho.get(), stop_status.get()); + gko::kernels::reference::cg::step_1( + ref, p->get_device_view(), z->get_const_device_view(), + rho->get_const_device_view(), prev_rho->get_const_device_view(), + stop_status); gko::kernels::GKO_DEVICE_NAMESPACE::cg::step_1( - exec, d_p.get(), d_z.get(), d_rho.get(), d_prev_rho.get(), - d_stop_status.get()); + exec, d_p->get_device_view(), d_z->get_const_device_view(), + d_rho->get_const_device_view(), d_prev_rho->get_const_device_view(), + d_stop_status); GKO_ASSERT_MTX_NEAR(d_p, p, ::r::value); GKO_ASSERT_MTX_NEAR(d_z, z, ::r::value); @@ -142,12 +145,16 @@ TEST_F(Cg, CgStep1IsEquivalentToRef) TEST_F(Cg, CgStep2IsEquivalentToRef) { initialize_data(); - gko::kernels::reference::cg::step_2(ref, x.get(), r.get(), p.get(), q.get(), - beta.get(), rho.get(), - stop_status.get()); + gko::kernels::reference::cg::step_2( + ref, x->get_device_view(), r->get_device_view(), + p->get_const_device_view(), q->get_const_device_view(), + beta->get_const_device_view(), rho->get_const_device_view(), + stop_status); gko::kernels::GKO_DEVICE_NAMESPACE::cg::step_2( - exec, d_x.get(), d_r.get(), d_p.get(), d_q.get(), d_beta.get(), - d_rho.get(), d_stop_status.get()); + exec, d_x->get_device_view(), d_r->get_device_view(), + d_p->get_const_device_view(), d_q->get_const_device_view(), + d_beta->get_const_device_view(), d_rho->get_const_device_view(), + d_stop_status); GKO_ASSERT_MTX_NEAR(d_x, x, ::r::value); GKO_ASSERT_MTX_NEAR(d_r, r, ::r::value); diff --git a/test/solver/cgs_kernels.cpp b/test/solver/cgs_kernels.cpp index f952e68170e..2d229913a9c 100644 --- a/test/solver/cgs_kernels.cpp +++ b/test/solver/cgs_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -85,13 +85,12 @@ class Cgs : public CommonTestFixture { // check correct handling for zero values gamma->at(2) = 0.0; rho_prev->at(2) = 0.0; - stop_status = - std::make_unique>(ref, n); - for (size_t i = 0; i < stop_status->get_size(); ++i) { - stop_status->get_data()[i].reset(); + stop_status = gko::array(ref, n); + for (size_t i = 0; i < stop_status.get_size(); ++i) { + stop_status.get_data()[i].reset(); } // check correct handling for stopped columns - stop_status->get_data()[1].stop(1); + stop_status.get_data()[1].stop(1); d_b = gko::clone(exec, b); d_r = gko::clone(exec, r); @@ -108,8 +107,7 @@ class Cgs : public CommonTestFixture { d_gamma = gko::clone(exec, gamma); d_rho_prev = gko::clone(exec, rho_prev); d_rho = gko::clone(exec, rho); - d_stop_status = std::make_unique>( - exec, *stop_status); + d_stop_status = gko::array(exec, stop_status); } std::default_random_engine rand_engine; @@ -134,7 +132,7 @@ class Cgs : public CommonTestFixture { std::unique_ptr gamma; std::unique_ptr rho; std::unique_ptr rho_prev; - std::unique_ptr> stop_status; + gko::array stop_status; std::unique_ptr d_b; std::unique_ptr d_r; @@ -151,7 +149,7 @@ class Cgs : public CommonTestFixture { std::unique_ptr d_gamma; std::unique_ptr d_rho; std::unique_ptr d_rho_prev; - std::unique_ptr> d_stop_status; + gko::array d_stop_status; }; @@ -160,14 +158,21 @@ TEST_F(Cgs, CgsInitializeIsEquivalentToRef) initialize_data(); gko::kernels::reference::cgs::initialize( - ref, b.get(), r.get(), r_tld.get(), p.get(), q.get(), u.get(), - u_hat.get(), v_hat.get(), t.get(), alpha.get(), beta.get(), gamma.get(), - rho_prev.get(), rho.get(), stop_status.get()); + ref, b->get_const_device_view(), r->get_device_view(), + r_tld->get_device_view(), p->get_device_view(), q->get_device_view(), + u->get_device_view(), u_hat->get_device_view(), + v_hat->get_device_view(), t->get_device_view(), + alpha->get_device_view(), beta->get_device_view(), + gamma->get_device_view(), rho_prev->get_device_view(), + rho->get_device_view(), stop_status); gko::kernels::GKO_DEVICE_NAMESPACE::cgs::initialize( - exec, d_b.get(), d_r.get(), d_r_tld.get(), d_p.get(), d_q.get(), - d_u.get(), d_u_hat.get(), d_v_hat.get(), d_t.get(), d_alpha.get(), - d_beta.get(), d_gamma.get(), d_rho_prev.get(), d_rho.get(), - d_stop_status.get()); + exec, d_b->get_const_device_view(), d_r->get_device_view(), + d_r_tld->get_device_view(), d_p->get_device_view(), + d_q->get_device_view(), d_u->get_device_view(), + d_u_hat->get_device_view(), d_v_hat->get_device_view(), + d_t->get_device_view(), d_alpha->get_device_view(), + d_beta->get_device_view(), d_gamma->get_device_view(), + d_rho_prev->get_device_view(), d_rho->get_device_view(), d_stop_status); GKO_ASSERT_MTX_NEAR(d_r, r, ::r::value); GKO_ASSERT_MTX_NEAR(d_r_tld, r_tld, ::r::value); @@ -182,7 +187,7 @@ TEST_F(Cgs, CgsInitializeIsEquivalentToRef) GKO_ASSERT_MTX_NEAR(d_alpha, alpha, ::r::value); GKO_ASSERT_MTX_NEAR(d_beta, beta, ::r::value); GKO_ASSERT_MTX_NEAR(d_gamma, gamma, ::r::value); - GKO_ASSERT_ARRAY_EQ(*d_stop_status, *stop_status); + GKO_ASSERT_ARRAY_EQ(d_stop_status, stop_status); } @@ -190,12 +195,16 @@ TEST_F(Cgs, CgsStep1IsEquivalentToRef) { initialize_data(); - gko::kernels::reference::cgs::step_1(ref, r.get(), u.get(), p.get(), - q.get(), beta.get(), rho.get(), - rho_prev.get(), stop_status.get()); + gko::kernels::reference::cgs::step_1( + ref, r->get_const_device_view(), u->get_device_view(), + p->get_device_view(), q->get_const_device_view(), + beta->get_device_view(), rho->get_const_device_view(), + rho_prev->get_const_device_view(), stop_status); gko::kernels::GKO_DEVICE_NAMESPACE::cgs::step_1( - exec, d_r.get(), d_u.get(), d_p.get(), d_q.get(), d_beta.get(), - d_rho.get(), d_rho_prev.get(), d_stop_status.get()); + exec, d_r->get_const_device_view(), d_u->get_device_view(), + d_p->get_device_view(), d_q->get_const_device_view(), + d_beta->get_device_view(), d_rho->get_const_device_view(), + d_rho_prev->get_const_device_view(), d_stop_status); GKO_ASSERT_MTX_NEAR(d_beta, beta, ::r::value); GKO_ASSERT_MTX_NEAR(d_u, u, ::r::value); @@ -207,12 +216,16 @@ TEST_F(Cgs, CgsStep2IsEquivalentToRef) { initialize_data(); - gko::kernels::reference::cgs::step_2(ref, u.get(), v_hat.get(), q.get(), - t.get(), alpha.get(), rho.get(), - gamma.get(), stop_status.get()); + gko::kernels::reference::cgs::step_2( + ref, u->get_const_device_view(), v_hat->get_const_device_view(), + q->get_device_view(), t->get_device_view(), alpha->get_device_view(), + rho->get_const_device_view(), gamma->get_const_device_view(), + stop_status); gko::kernels::GKO_DEVICE_NAMESPACE::cgs::step_2( - exec, d_u.get(), d_v_hat.get(), d_q.get(), d_t.get(), d_alpha.get(), - d_rho.get(), d_gamma.get(), d_stop_status.get()); + exec, d_u->get_const_device_view(), d_v_hat->get_const_device_view(), + d_q->get_device_view(), d_t->get_device_view(), + d_alpha->get_device_view(), d_rho->get_const_device_view(), + d_gamma->get_const_device_view(), d_stop_status); GKO_ASSERT_MTX_NEAR(d_alpha, alpha, ::r::value); GKO_ASSERT_MTX_NEAR(d_t, t, ::r::value); @@ -224,12 +237,14 @@ TEST_F(Cgs, CgsStep3IsEquivalentToRef) { initialize_data(); - gko::kernels::reference::cgs::step_3(ref, t.get(), u_hat.get(), r.get(), - x.get(), alpha.get(), - stop_status.get()); + gko::kernels::reference::cgs::step_3( + ref, t->get_const_device_view(), u_hat->get_const_device_view(), + r->get_device_view(), x->get_device_view(), + alpha->get_const_device_view(), stop_status); gko::kernels::GKO_DEVICE_NAMESPACE::cgs::step_3( - exec, d_t.get(), d_u_hat.get(), d_r.get(), d_x.get(), d_alpha.get(), - d_stop_status.get()); + exec, d_t->get_const_device_view(), d_u_hat->get_const_device_view(), + d_r->get_device_view(), d_x->get_device_view(), + d_alpha->get_const_device_view(), d_stop_status); GKO_ASSERT_MTX_NEAR(d_x, x, ::r::value); GKO_ASSERT_MTX_NEAR(d_r, r, ::r::value); diff --git a/test/solver/chebyshev_kernels.cpp b/test/solver/chebyshev_kernels.cpp index 1447ad8cf4a..04232d2dca8 100644 --- a/test/solver/chebyshev_kernels.cpp +++ b/test/solver/chebyshev_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -55,9 +55,11 @@ TEST_F(Chebyshev, KernelInitUpdate) auto d_output = gko::clone(exec, output); gko::kernels::reference::chebyshev::init_update( - ref, alpha, inner_sol.get(), update_sol.get(), output.get()); + ref, alpha, inner_sol->get_const_device_view(), + update_sol->get_device_view(), output->get_device_view()); gko::kernels::GKO_DEVICE_NAMESPACE::chebyshev::init_update( - exec, alpha, d_inner_sol.get(), d_update_sol.get(), d_output.get()); + exec, alpha, d_inner_sol->get_const_device_view(), + d_update_sol->get_device_view(), d_output->get_device_view()); GKO_ASSERT_MTX_NEAR(d_update_sol, d_inner_sol, 0); GKO_ASSERT_MTX_NEAR(d_update_sol, update_sol, 0); @@ -78,10 +80,11 @@ TEST_F(Chebyshev, KernelUpdate) auto d_output = gko::clone(exec, output); gko::kernels::reference::chebyshev::update( - ref, alpha, beta, inner_sol.get(), update_sol.get(), output.get()); + ref, alpha, beta, inner_sol->get_device_view(), + update_sol->get_device_view(), output->get_device_view()); gko::kernels::GKO_DEVICE_NAMESPACE::chebyshev::update( - exec, alpha, beta, d_inner_sol.get(), d_update_sol.get(), - d_output.get()); + exec, alpha, beta, d_inner_sol->get_device_view(), + d_update_sol->get_device_view(), d_output->get_device_view()); GKO_ASSERT_MTX_NEAR(d_update_sol, d_inner_sol, 0); GKO_ASSERT_MTX_NEAR(d_inner_sol, inner_sol, r::value); diff --git a/test/solver/fcg_kernels.cpp b/test/solver/fcg_kernels.cpp index 194151f203e..72f006a2b82 100644 --- a/test/solver/fcg_kernels.cpp +++ b/test/solver/fcg_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -58,13 +58,12 @@ class Fcg : public CommonTestFixture { // check correct handling for zero values beta->at(2) = 0.0; prev_rho->at(2) = 0.0; - stop_status = - std::make_unique>(ref, n); - for (size_t i = 0; i < stop_status->get_size(); ++i) { - stop_status->get_data()[i].reset(); + stop_status = gko::array(ref, n); + for (size_t i = 0; i < stop_status.get_size(); ++i) { + stop_status.get_data()[i].reset(); } // check correct handling for stopped columns - stop_status->get_data()[1].stop(1); + stop_status.get_data()[1].stop(1); d_b = gko::clone(exec, b); d_r = gko::clone(exec, r); @@ -77,8 +76,7 @@ class Fcg : public CommonTestFixture { d_prev_rho = gko::clone(exec, prev_rho); d_rho_t = gko::clone(exec, rho_t); d_rho = gko::clone(exec, rho); - d_stop_status = std::make_unique>( - exec, *stop_status); + d_stop_status = gko::array(exec, stop_status); } std::default_random_engine rand_engine; @@ -94,7 +92,7 @@ class Fcg : public CommonTestFixture { std::unique_ptr prev_rho; std::unique_ptr rho; std::unique_ptr rho_t; - std::unique_ptr> stop_status; + gko::array stop_status; std::unique_ptr d_b; std::unique_ptr d_r; @@ -107,7 +105,7 @@ class Fcg : public CommonTestFixture { std::unique_ptr d_prev_rho; std::unique_ptr d_rho; std::unique_ptr d_rho_t; - std::unique_ptr> d_stop_status; + gko::array d_stop_status; }; @@ -116,11 +114,15 @@ TEST_F(Fcg, FcgInitializeIsEquivalentToRef) initialize_data(); gko::kernels::reference::fcg::initialize( - ref, b.get(), r.get(), z.get(), p.get(), q.get(), t.get(), - prev_rho.get(), rho.get(), rho_t.get(), stop_status.get()); + ref, b->get_const_device_view(), r->get_device_view(), + z->get_device_view(), p->get_device_view(), q->get_device_view(), + t->get_device_view(), prev_rho->get_device_view(), + rho->get_device_view(), rho_t->get_device_view(), stop_status); gko::kernels::GKO_DEVICE_NAMESPACE::fcg::initialize( - exec, d_b.get(), d_r.get(), d_z.get(), d_p.get(), d_q.get(), d_t.get(), - d_prev_rho.get(), d_rho.get(), d_rho_t.get(), d_stop_status.get()); + exec, d_b->get_const_device_view(), d_r->get_device_view(), + d_z->get_device_view(), d_p->get_device_view(), d_q->get_device_view(), + d_t->get_device_view(), d_prev_rho->get_device_view(), + d_rho->get_device_view(), d_rho_t->get_device_view(), d_stop_status); GKO_ASSERT_MTX_NEAR(d_r, r, ::r::value); GKO_ASSERT_MTX_NEAR(d_t, t, ::r::value); @@ -130,7 +132,7 @@ TEST_F(Fcg, FcgInitializeIsEquivalentToRef) GKO_ASSERT_MTX_NEAR(d_prev_rho, prev_rho, ::r::value); GKO_ASSERT_MTX_NEAR(d_rho, rho, ::r::value); GKO_ASSERT_MTX_NEAR(d_rho_t, rho_t, ::r::value); - GKO_ASSERT_ARRAY_EQ(*d_stop_status, *stop_status); + GKO_ASSERT_ARRAY_EQ(d_stop_status, stop_status); } @@ -138,11 +140,14 @@ TEST_F(Fcg, FcgStep1IsEquivalentToRef) { initialize_data(); - gko::kernels::reference::fcg::step_1(ref, p.get(), z.get(), rho_t.get(), - prev_rho.get(), stop_status.get()); + gko::kernels::reference::fcg::step_1( + ref, p->get_device_view(), z->get_const_device_view(), + rho_t->get_const_device_view(), prev_rho->get_const_device_view(), + stop_status); gko::kernels::GKO_DEVICE_NAMESPACE::fcg::step_1( - exec, d_p.get(), d_z.get(), d_rho_t.get(), d_prev_rho.get(), - d_stop_status.get()); + exec, d_p->get_device_view(), d_z->get_const_device_view(), + d_rho_t->get_const_device_view(), d_prev_rho->get_const_device_view(), + d_stop_status); GKO_ASSERT_MTX_NEAR(d_p, p, ::r::value); GKO_ASSERT_MTX_NEAR(d_z, z, ::r::value); @@ -152,12 +157,16 @@ TEST_F(Fcg, FcgStep1IsEquivalentToRef) TEST_F(Fcg, FcgStep2IsEquivalentToRef) { initialize_data(); - gko::kernels::reference::fcg::step_2(ref, x.get(), r.get(), t.get(), - p.get(), q.get(), beta.get(), - rho.get(), stop_status.get()); + gko::kernels::reference::fcg::step_2( + ref, x->get_device_view(), r->get_device_view(), t->get_device_view(), + p->get_const_device_view(), q->get_const_device_view(), + beta->get_const_device_view(), rho->get_const_device_view(), + stop_status); gko::kernels::GKO_DEVICE_NAMESPACE::fcg::step_2( - exec, d_x.get(), d_r.get(), d_t.get(), d_p.get(), d_q.get(), - d_beta.get(), d_rho.get(), d_stop_status.get()); + exec, d_x->get_device_view(), d_r->get_device_view(), + d_t->get_device_view(), d_p->get_const_device_view(), + d_q->get_const_device_view(), d_beta->get_const_device_view(), + d_rho->get_const_device_view(), d_stop_status); GKO_ASSERT_MTX_NEAR(d_x, x, ::r::value); GKO_ASSERT_MTX_NEAR(d_r, r, ::r::value); diff --git a/test/solver/gcr_kernels.cpp b/test/solver/gcr_kernels.cpp index 3095475538d..c849652ee25 100644 --- a/test/solver/gcr_kernels.cpp +++ b/test/solver/gcr_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -147,10 +147,12 @@ TEST_F(Gcr, GcrKernelInitializeIsEquivalentToRef) { initialize_data(); - gko::kernels::reference::gcr::initialize(ref, b.get(), residual.get(), + gko::kernels::reference::gcr::initialize(ref, b->get_const_device_view(), + residual->get_device_view(), stop_status.get_data()); gko::kernels::GKO_DEVICE_NAMESPACE::gcr::initialize( - exec, d_b.get(), d_residual.get(), d_stop_status.get_data()); + exec, d_b->get_const_device_view(), d_residual->get_device_view(), + d_stop_status.get_data()); GKO_ASSERT_MTX_NEAR(d_residual, residual, r::value); GKO_ASSERT_ARRAY_EQ(d_stop_status, stop_status); @@ -163,10 +165,12 @@ TEST_F(Gcr, GcrKernelInitializeWithStrideIsEquivalentToRef) auto d_b_strided = Mtx::create(exec, b->get_size(), b->get_stride() + 2); d_b_strided->copy_from(d_b); - gko::kernels::reference::gcr::initialize(ref, b.get(), residual.get(), + gko::kernels::reference::gcr::initialize(ref, b->get_const_device_view(), + residual->get_device_view(), stop_status.get_data()); gko::kernels::GKO_DEVICE_NAMESPACE::gcr::initialize( - exec, d_b_strided.get(), d_residual.get(), d_stop_status.get_data()); + exec, d_b_strided->get_const_device_view(), + d_residual->get_device_view(), d_stop_status.get_data()); GKO_ASSERT_MTX_NEAR(d_residual, residual, r::value); GKO_ASSERT_ARRAY_EQ(d_stop_status, stop_status); @@ -177,12 +181,14 @@ TEST_F(Gcr, GcrKernelRestartIsEquivalentToRef) { initialize_data(); - gko::kernels::reference::gcr::restart(ref, residual.get(), A_residual.get(), - p_bases.get(), Ap_bases.get(), - final_iter_nums.get_data()); + gko::kernels::reference::gcr::restart( + ref, residual->get_const_device_view(), + A_residual->get_const_device_view(), p_bases->get_device_view(), + Ap_bases->get_device_view(), final_iter_nums.get_data()); gko::kernels::GKO_DEVICE_NAMESPACE::gcr::restart( - exec, d_residual.get(), d_A_residual.get(), d_p_bases.get(), - d_Ap_bases.get(), d_final_iter_nums.get_data()); + exec, d_residual->get_const_device_view(), + d_A_residual->get_const_device_view(), d_p_bases->get_device_view(), + d_Ap_bases->get_device_view(), d_final_iter_nums.get_data()); GKO_ASSERT_MTX_NEAR(d_A_residual, A_residual, r::value); GKO_ASSERT_MTX_NEAR(d_p, p, r::value); @@ -195,12 +201,16 @@ TEST_F(Gcr, GcrStep1IsEquivalentToRef) { initialize_data(); - gko::kernels::reference::gcr::step_1(ref, x.get(), residual.get(), p.get(), - Ap.get(), Ap_norm.get(), rAp.get(), - stop_status.get_data()); + gko::kernels::reference::gcr::step_1( + ref, x->get_device_view(), residual->get_device_view(), + p->get_const_device_view(), Ap->get_const_device_view(), + Ap_norm->get_const_device_view(), rAp->get_const_device_view(), + stop_status.get_data()); gko::kernels::GKO_DEVICE_NAMESPACE::gcr::step_1( - exec, d_x.get(), d_residual.get(), d_p.get(), d_Ap.get(), - d_Ap_norm.get(), d_rAp.get(), d_stop_status.get_data()); + exec, d_x->get_device_view(), d_residual->get_device_view(), + d_p->get_const_device_view(), d_Ap->get_const_device_view(), + d_Ap_norm->get_const_device_view(), d_rAp->get_const_device_view(), + d_stop_status.get_data()); GKO_ASSERT_MTX_NEAR(d_x, x, r::value); GKO_ASSERT_MTX_NEAR(d_residual, residual, r::value); diff --git a/test/solver/gmres_kernels.cpp b/test/solver/gmres_kernels.cpp index 72cbc83b002..b9a763efaeb 100644 --- a/test/solver/gmres_kernels.cpp +++ b/test/solver/gmres_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -154,11 +154,13 @@ TEST_F(Gmres, GmresKernelInitializeIsEquivalentToRef) initialize_data(); gko::kernels::reference::common_gmres::initialize( - ref, b.get(), residual.get(), givens_sin.get(), givens_cos.get(), + ref, b->get_const_device_view(), residual->get_device_view(), + givens_sin->get_device_view(), givens_cos->get_device_view(), stop_status.get_data()); gko::kernels::GKO_DEVICE_NAMESPACE::common_gmres::initialize( - exec, d_b.get(), d_residual.get(), d_givens_sin.get(), - d_givens_cos.get(), d_stop_status.get_data()); + exec, d_b->get_const_device_view(), d_residual->get_device_view(), + d_givens_sin->get_device_view(), d_givens_cos->get_device_view(), + d_stop_status.get_data()); GKO_ASSERT_MTX_NEAR(d_residual, residual, r::value); GKO_ASSERT_MTX_NEAR(d_givens_sin, givens_sin, r::value); @@ -174,13 +176,15 @@ TEST_F(Gmres, GmresKernelRestartIsEquivalentToRef) d_residual_norm->copy_from(residual_norm); gko::kernels::reference::gmres::restart( - ref, residual.get(), residual_norm.get(), - residual_norm_collection.get(), krylov_bases.get(), - final_iter_nums.get_data()); + ref, residual->get_const_device_view(), + residual_norm->get_const_device_view(), + residual_norm_collection->get_device_view(), + krylov_bases->get_device_view(), final_iter_nums.get_data()); gko::kernels::GKO_DEVICE_NAMESPACE::gmres::restart( - exec, d_residual.get(), d_residual_norm.get(), - d_residual_norm_collection.get(), d_krylov_bases.get(), - d_final_iter_nums.get_data()); + exec, d_residual->get_const_device_view(), + d_residual_norm->get_const_device_view(), + d_residual_norm_collection->get_device_view(), + d_krylov_bases->get_device_view(), d_final_iter_nums.get_data()); GKO_ASSERT_MTX_NEAR(d_residual_norm, residual_norm, r::value); GKO_ASSERT_MTX_NEAR(d_residual_norm_collection, residual_norm_collection, @@ -196,12 +200,16 @@ TEST_F(Gmres, GmresKernelHessenbergQRIsEquivalentToRef) int iter = 5; gko::kernels::reference::common_gmres::hessenberg_qr( - ref, givens_sin.get(), givens_cos.get(), residual_norm.get(), - residual_norm_collection.get(), hessenberg_iter.get(), iter, - final_iter_nums.get_data(), stop_status.get_const_data()); + ref, givens_sin->get_device_view(), givens_cos->get_device_view(), + residual_norm->get_device_view(), + residual_norm_collection->get_device_view(), + hessenberg_iter->get_device_view(), iter, final_iter_nums.get_data(), + stop_status.get_const_data()); gko::kernels::GKO_DEVICE_NAMESPACE::common_gmres::hessenberg_qr( - exec, d_givens_sin.get(), d_givens_cos.get(), d_residual_norm.get(), - d_residual_norm_collection.get(), d_hessenberg_iter.get(), iter, + exec, d_givens_sin->get_device_view(), d_givens_cos->get_device_view(), + d_residual_norm->get_device_view(), + d_residual_norm_collection->get_device_view(), + d_hessenberg_iter->get_device_view(), iter, d_final_iter_nums.get_data(), d_stop_status.get_const_data()); GKO_ASSERT_MTX_NEAR(d_givens_sin, givens_sin, r::value); @@ -222,12 +230,16 @@ TEST_F(Gmres, GmresKernelHessenbergQROnSingleRHSIsEquivalentToRef) int iter = 5; gko::kernels::reference::common_gmres::hessenberg_qr( - ref, givens_sin.get(), givens_cos.get(), residual_norm.get(), - residual_norm_collection.get(), hessenberg_iter.get(), iter, - final_iter_nums.get_data(), stop_status.get_const_data()); + ref, givens_sin->get_device_view(), givens_cos->get_device_view(), + residual_norm->get_device_view(), + residual_norm_collection->get_device_view(), + hessenberg_iter->get_device_view(), iter, final_iter_nums.get_data(), + stop_status.get_const_data()); gko::kernels::GKO_DEVICE_NAMESPACE::common_gmres::hessenberg_qr( - exec, d_givens_sin.get(), d_givens_cos.get(), d_residual_norm.get(), - d_residual_norm_collection.get(), d_hessenberg_iter.get(), iter, + exec, d_givens_sin->get_device_view(), d_givens_cos->get_device_view(), + d_residual_norm->get_device_view(), + d_residual_norm_collection->get_device_view(), + d_hessenberg_iter->get_device_view(), iter, d_final_iter_nums.get_data(), d_stop_status.get_const_data()); GKO_ASSERT_MTX_NEAR(d_givens_sin, givens_sin, r::value); @@ -247,10 +259,12 @@ TEST_F(Gmres, GmresKernelSolveKrylovIsEquivalentToRef) initialize_data(); gko::kernels::reference::common_gmres::solve_krylov( - ref, residual_norm_collection.get(), hessenberg.get(), y.get(), + ref, residual_norm_collection->get_const_device_view(), + hessenberg->get_const_device_view(), y->get_device_view(), final_iter_nums.get_const_data(), stop_status.get_const_data()); gko::kernels::GKO_DEVICE_NAMESPACE::common_gmres::solve_krylov( - exec, d_residual_norm_collection.get(), d_hessenberg.get(), d_y.get(), + exec, d_residual_norm_collection->get_const_device_view(), + d_hessenberg->get_const_device_view(), d_y->get_device_view(), d_final_iter_nums.get_const_data(), d_stop_status.get_const_data()); GKO_ASSERT_MTX_NEAR(d_y, y, r::value); @@ -262,10 +276,13 @@ TEST_F(Gmres, GmresKernelMultiAxpyIsEquivalentToRef) initialize_data(); gko::kernels::reference::gmres::multi_axpy( - ref, krylov_bases.get(), y.get(), before_preconditioner.get(), + ref, krylov_bases->get_const_device_view(), y->get_const_device_view(), + before_preconditioner->get_device_view(), final_iter_nums.get_const_data(), stop_status.get_data()); gko::kernels::GKO_DEVICE_NAMESPACE::gmres::multi_axpy( - exec, d_krylov_bases.get(), d_y.get(), d_before_preconditioner.get(), + exec, d_krylov_bases->get_const_device_view(), + d_y->get_const_device_view(), + d_before_preconditioner->get_device_view(), d_final_iter_nums.get_const_data(), d_stop_status.get_data()); GKO_ASSERT_MTX_NEAR(d_before_preconditioner, before_preconditioner, @@ -296,10 +313,13 @@ TEST_F(Gmres, GmresKernelMultiDotIsEquivalentToRef) d_x->get_size()[0] * gko::solver::gmres_default_krylov_dim}, gko::span{0, d_x->get_size()[1]}); gko::kernels::reference::gmres::multi_dot( - ref, krylov_basis.get(), next_krylov.get(), hessenberg_iter.get()); + ref, krylov_basis->get_const_device_view(), + next_krylov->get_const_device_view(), + hessenberg_iter->get_device_view()); gko::kernels::GKO_DEVICE_NAMESPACE::gmres::multi_dot( - exec, d_krylov_basis.get(), d_next_krylov.get(), - d_hessenberg_iter.get()); + exec, d_krylov_basis->get_const_device_view(), + d_next_krylov->get_const_device_view(), + d_hessenberg_iter->get_device_view()); GKO_ASSERT_MTX_NEAR(d_hessenberg_iter, hessenberg_iter, r::value); diff --git a/test/solver/idr_kernels.cpp b/test/solver/idr_kernels.cpp index ed32a5494c8..db1abb03b6f 100644 --- a/test/solver/idr_kernels.cpp +++ b/test/solver/idr_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -83,10 +83,9 @@ class Idr : public CommonTestFixture { omega = gen_mtx(1, nrhs); tht = gen_mtx(1, nrhs); residual_norm = gen_mtx(1, nrhs); - stop_status = - std::make_unique>(ref, nrhs); + stop_status = gko::array(ref, nrhs); for (size_t i = 0; i < nrhs; ++i) { - stop_status->get_data()[i].reset(); + stop_status.get_data()[i].reset(); } d_mtx = gko::clone(exec, mtx); @@ -104,8 +103,7 @@ class Idr : public CommonTestFixture { d_omega = gko::clone(exec, omega); d_tht = gko::clone(exec, tht); d_residual_norm = gko::clone(exec, residual_norm); - d_stop_status = std::make_unique>( - exec, *stop_status); + d_stop_status = gko::array(exec, stop_status); } std::default_random_engine rand_engine; @@ -131,7 +129,7 @@ class Idr : public CommonTestFixture { std::unique_ptr omega; std::unique_ptr tht; std::unique_ptr residual_norm; - std::unique_ptr> stop_status; + gko::array stop_status; std::unique_ptr d_x; std::unique_ptr d_b; @@ -147,7 +145,7 @@ class Idr : public CommonTestFixture { std::unique_ptr d_omega; std::unique_ptr d_tht; std::unique_ptr d_residual_norm; - std::unique_ptr> d_stop_status; + gko::array d_stop_status; }; @@ -155,10 +153,12 @@ TEST_F(Idr, IdrInitializeIsEquivalentToRef) { initialize_data(); - gko::kernels::reference::idr::initialize(ref, nrhs, m.get(), p.get(), true, - stop_status.get()); + gko::kernels::reference::idr::initialize(ref, nrhs, m->get_device_view(), + p->get_device_view(), true, + stop_status); gko::kernels::GKO_DEVICE_NAMESPACE::idr::initialize( - exec, nrhs, d_m.get(), d_p.get(), true, d_stop_status.get()); + exec, nrhs, d_m->get_device_view(), d_p->get_device_view(), true, + d_stop_status); GKO_ASSERT_MTX_NEAR(m, d_m, rr::value); GKO_ASSERT_MTX_NEAR(p, d_p, rr::value); @@ -170,12 +170,15 @@ TEST_F(Idr, IdrStep1IsEquivalentToRef) initialize_data(); gko::size_type k = 2; - gko::kernels::reference::idr::step_1(ref, nrhs, k, m.get(), f.get(), - r.get(), g.get(), c.get(), v.get(), - stop_status.get()); + gko::kernels::reference::idr::step_1( + ref, nrhs, k, m->get_const_device_view(), f->get_const_device_view(), + r->get_const_device_view(), g->get_const_device_view(), + c->get_device_view(), v->get_device_view(), stop_status); gko::kernels::GKO_DEVICE_NAMESPACE::idr::step_1( - exec, nrhs, k, d_m.get(), d_f.get(), d_r.get(), d_g.get(), d_c.get(), - d_v.get(), d_stop_status.get()); + exec, nrhs, k, d_m->get_const_device_view(), + d_f->get_const_device_view(), d_r->get_const_device_view(), + d_g->get_const_device_view(), d_c->get_device_view(), + d_v->get_device_view(), d_stop_status); GKO_ASSERT_MTX_NEAR(c, d_c, rr::value); GKO_ASSERT_MTX_NEAR(v, d_v, rr::value); @@ -187,11 +190,14 @@ TEST_F(Idr, IdrStep2IsEquivalentToRef) initialize_data(); gko::size_type k = 2; - gko::kernels::reference::idr::step_2(ref, nrhs, k, omega.get(), v.get(), - c.get(), u.get(), stop_status.get()); + gko::kernels::reference::idr::step_2( + ref, nrhs, k, omega->get_const_device_view(), + v->get_const_device_view(), c->get_const_device_view(), + u->get_device_view(), stop_status); gko::kernels::GKO_DEVICE_NAMESPACE::idr::step_2( - exec, nrhs, k, d_omega.get(), d_v.get(), d_c.get(), d_u.get(), - d_stop_status.get()); + exec, nrhs, k, d_omega->get_const_device_view(), + d_v->get_const_device_view(), d_c->get_const_device_view(), + d_u->get_device_view(), d_stop_status); GKO_ASSERT_MTX_NEAR(u, d_u, rr::value); } @@ -203,11 +209,15 @@ TEST_F(Idr, IdrStep3IsEquivalentToRef) gko::size_type k = 2; gko::kernels::reference::idr::step_3( - ref, nrhs, k, p.get(), g.get(), v.get(), u.get(), m.get(), f.get(), - alpha.get(), r.get(), x.get(), stop_status.get()); + ref, nrhs, k, p->get_const_device_view(), g->get_device_view(), + v->get_device_view(), u->get_device_view(), m->get_device_view(), + f->get_device_view(), alpha->get_device_view(), r->get_device_view(), + x->get_device_view(), stop_status); gko::kernels::GKO_DEVICE_NAMESPACE::idr::step_3( - exec, nrhs, k, d_p.get(), d_g.get(), d_v.get(), d_u.get(), d_m.get(), - d_f.get(), d_alpha.get(), d_r.get(), d_x.get(), d_stop_status.get()); + exec, nrhs, k, d_p->get_const_device_view(), d_g->get_device_view(), + d_v->get_device_view(), d_u->get_device_view(), d_m->get_device_view(), + d_f->get_device_view(), d_alpha->get_device_view(), + d_r->get_device_view(), d_x->get_device_view(), d_stop_status); GKO_ASSERT_MTX_NEAR(g, d_g, 10 * rr::value); GKO_ASSERT_MTX_NEAR(v, d_v, 10 * rr::value); @@ -224,12 +234,14 @@ TEST_F(Idr, IdrComputeOmegaIsEquivalentToRef) initialize_data(); value_type kappa = 0.7; - gko::kernels::reference::idr::compute_omega(ref, nrhs, kappa, tht.get(), - residual_norm.get(), - omega.get(), stop_status.get()); + gko::kernels::reference::idr::compute_omega( + ref, nrhs, kappa, tht->get_const_device_view(), + residual_norm->get_const_device_view(), omega->get_device_view(), + stop_status); gko::kernels::GKO_DEVICE_NAMESPACE::idr::compute_omega( - exec, nrhs, kappa, d_tht.get(), d_residual_norm.get(), d_omega.get(), - d_stop_status.get()); + exec, nrhs, kappa, d_tht->get_const_device_view(), + d_residual_norm->get_const_device_view(), d_omega->get_device_view(), + d_stop_status); GKO_ASSERT_MTX_NEAR(omega, d_omega, rr::value); } diff --git a/test/solver/ir_kernels.cpp b/test/solver/ir_kernels.cpp index 31973e849b1..33de4eab1b3 100644 --- a/test/solver/ir_kernels.cpp +++ b/test/solver/ir_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -50,8 +50,8 @@ TEST_F(Ir, InitializeIsEquivalentToRef) } auto d_stop_status = gko::array(exec, stop_status); - gko::kernels::reference::ir::initialize(ref, &stop_status); - gko::kernels::GKO_DEVICE_NAMESPACE::ir::initialize(exec, &d_stop_status); + gko::kernels::reference::ir::initialize(ref, stop_status); + gko::kernels::GKO_DEVICE_NAMESPACE::ir::initialize(exec, d_stop_status); auto tmp = gko::array(ref, d_stop_status); for (int i = 0; i < stop_status.get_size(); ++i) { diff --git a/test/solver/minres_kernels.cpp b/test/solver/minres_kernels.cpp index 1f5bd149b27..cb466fe4480 100644 --- a/test/solver/minres_kernels.cpp +++ b/test/solver/minres_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -74,13 +74,12 @@ class Minres : public CommonTestFixture { tau = gen_mtx(1, n, n, false)->compute_absolute(); // check correct handling for zero values beta->at(2) = gko::zero(); - stop_status = - std::make_unique>(ref, n); - for (gko::size_type i = 0; i < stop_status->get_size(); ++i) { - stop_status->get_data()[i].reset(); + stop_status = gko::array(ref, n); + for (gko::size_type i = 0; i < stop_status.get_size(); ++i) { + stop_status.get_data()[i].reset(); } // check correct handling for stopped columns - stop_status->get_data()[1].stop(1); + stop_status.get_data()[1].stop(1); d_x = gko::clone(exec, x); d_b = gko::clone(exec, b); @@ -103,8 +102,7 @@ class Minres : public CommonTestFixture { d_cos = gko::clone(exec, cos); d_sin_prev = gko::clone(exec, sin_prev); d_sin = gko::clone(exec, sin); - d_stop_status = std::make_unique>( - exec, *stop_status); + d_stop_status = gko::array(exec, stop_status); } std::default_random_engine rand_engine{42}; @@ -153,8 +151,8 @@ class Minres : public CommonTestFixture { std::unique_ptr d_sin_prev; std::unique_ptr d_sin; - std::unique_ptr> stop_status; - std::unique_ptr> d_stop_status; + gko::array stop_status; + gko::array d_stop_status; }; TEST_F(Minres, MinresInitializeIsEquivalentToRef) @@ -162,15 +160,23 @@ TEST_F(Minres, MinresInitializeIsEquivalentToRef) initialize_data(); gko::kernels::reference::minres::initialize( - ref, r.get(), z.get(), p.get(), p_prev.get(), q.get(), q_prev.get(), - v.get(), beta.get(), gamma.get(), delta.get(), cos_prev.get(), - cos.get(), sin_prev.get(), sin.get(), eta_next.get(), eta.get(), - stop_status.get()); + ref, r->get_const_device_view(), z->get_device_view(), + p->get_device_view(), p_prev->get_device_view(), q->get_device_view(), + q_prev->get_device_view(), v->get_device_view(), + beta->get_device_view(), gamma->get_device_view(), + delta->get_device_view(), cos_prev->get_device_view(), + cos->get_device_view(), sin_prev->get_device_view(), + sin->get_device_view(), eta_next->get_device_view(), + eta->get_device_view(), stop_status); gko::kernels::GKO_DEVICE_NAMESPACE::minres::initialize( - exec, d_r.get(), d_z.get(), d_p.get(), d_p_prev.get(), d_q.get(), - d_q_prev.get(), d_v.get(), d_beta.get(), d_gamma.get(), d_delta.get(), - d_cos_prev.get(), d_cos.get(), d_sin_prev.get(), d_sin.get(), - d_eta_next.get(), d_eta.get(), d_stop_status.get()); + exec, d_r->get_const_device_view(), d_z->get_device_view(), + d_p->get_device_view(), d_p_prev->get_device_view(), + d_q->get_device_view(), d_q_prev->get_device_view(), + d_v->get_device_view(), d_beta->get_device_view(), + d_gamma->get_device_view(), d_delta->get_device_view(), + d_cos_prev->get_device_view(), d_cos->get_device_view(), + d_sin_prev->get_device_view(), d_sin->get_device_view(), + d_eta_next->get_device_view(), d_eta->get_device_view(), d_stop_status); GKO_ASSERT_MTX_NEAR(d_r, r, ::r::value); GKO_ASSERT_MTX_NEAR(d_z, z, ::r::value); @@ -189,7 +195,7 @@ TEST_F(Minres, MinresInitializeIsEquivalentToRef) GKO_ASSERT_MTX_NEAR(d_cos, cos, ::r::value); GKO_ASSERT_MTX_NEAR(d_sin_prev, sin_prev, ::r::value); GKO_ASSERT_MTX_NEAR(d_sin, sin, ::r::value); - GKO_ASSERT_ARRAY_EQ(*d_stop_status, *stop_status); + GKO_ASSERT_ARRAY_EQ(d_stop_status, stop_status); } @@ -198,13 +204,19 @@ TEST_F(Minres, MinresStep1IsEquivalentToRef) initialize_data(); gko::kernels::reference::minres::step_1( - ref, alpha.get(), beta.get(), gamma.get(), delta.get(), cos_prev.get(), - cos.get(), sin_prev.get(), sin.get(), eta.get(), eta_next.get(), - tau.get(), stop_status.get()); + ref, alpha->get_device_view(), beta->get_device_view(), + gamma->get_device_view(), delta->get_device_view(), + cos_prev->get_device_view(), cos->get_device_view(), + sin_prev->get_device_view(), sin->get_device_view(), + eta->get_device_view(), eta_next->get_device_view(), + tau->get_device_view(), stop_status); gko::kernels::GKO_DEVICE_NAMESPACE::minres::step_1( - exec, d_alpha.get(), d_beta.get(), d_gamma.get(), d_delta.get(), - d_cos_prev.get(), d_cos.get(), d_sin_prev.get(), d_sin.get(), - d_eta.get(), d_eta_next.get(), d_tau.get(), d_stop_status.get()); + exec, d_alpha->get_device_view(), d_beta->get_device_view(), + d_gamma->get_device_view(), d_delta->get_device_view(), + d_cos_prev->get_device_view(), d_cos->get_device_view(), + d_sin_prev->get_device_view(), d_sin->get_device_view(), + d_eta->get_device_view(), d_eta_next->get_device_view(), + d_tau->get_device_view(), d_stop_status); GKO_ASSERT_MTX_NEAR(d_alpha, alpha, ::r::value); GKO_ASSERT_MTX_NEAR(d_beta, beta, ::r::value); @@ -225,14 +237,23 @@ TEST_F(Minres, MinresStep2IsEquivalentToRef) initialize_data(); gko::kernels::reference::minres::step_2( - ref, x.get(), p.get(), p_prev.get(), z.get(), z_tilde.get(), q.get(), - q_prev.get(), v.get(), alpha.get(), beta.get(), gamma.get(), - delta.get(), cos.get(), eta.get(), stop_status.get()); + ref, x->get_device_view(), p->get_device_view(), + p_prev->get_const_device_view(), z->get_device_view(), + z_tilde->get_const_device_view(), q->get_device_view(), + q_prev->get_device_view(), v->get_device_view(), + alpha->get_const_device_view(), beta->get_const_device_view(), + gamma->get_const_device_view(), delta->get_const_device_view(), + cos->get_const_device_view(), eta->get_const_device_view(), + stop_status); gko::kernels::GKO_DEVICE_NAMESPACE::minres::step_2( - exec, d_x.get(), d_p.get(), d_p_prev.get(), d_z.get(), d_z_tilde.get(), - d_q.get(), d_q_prev.get(), d_v.get(), d_alpha.get(), d_beta.get(), - d_gamma.get(), d_delta.get(), d_cos.get(), d_eta.get(), - d_stop_status.get()); + exec, d_x->get_device_view(), d_p->get_device_view(), + d_p_prev->get_const_device_view(), d_z->get_device_view(), + d_z_tilde->get_const_device_view(), d_q->get_device_view(), + d_q_prev->get_device_view(), d_v->get_device_view(), + d_alpha->get_const_device_view(), d_beta->get_const_device_view(), + d_gamma->get_const_device_view(), d_delta->get_const_device_view(), + d_cos->get_const_device_view(), d_eta->get_const_device_view(), + d_stop_status); GKO_ASSERT_MTX_NEAR(d_x, x, ::r::value); GKO_ASSERT_MTX_NEAR(d_z, z, ::r::value); @@ -270,8 +291,8 @@ TEST_F(Minres, ApplyIsEquivalentToRef) auto solver = minres_factory->generate(std::move(mtx)); auto d_solver = d_minres_factory->generate(std::move(d_mtx)); - solver->apply(b.get(), x.get()); - d_solver->apply(d_b.get(), d_x.get()); + solver->apply(b, x); + d_solver->apply(d_b, d_x); GKO_ASSERT_MTX_NEAR(d_x, x, ::r::value * 100); } @@ -312,8 +333,8 @@ TEST_F(Minres, PreconditionedApplyIsEquivalentToRef) auto solver = minres_factory->generate(std::move(mtx)); auto d_solver = d_minres_factory->generate(std::move(d_mtx)); - solver->apply(b.get(), x.get()); - d_solver->apply(d_b.get(), d_x.get()); + solver->apply(b, x); + d_solver->apply(d_b, d_x); GKO_ASSERT_MTX_NEAR(d_x, x, ::r::value * 100); } diff --git a/test/solver/multigrid_kernels.cpp b/test/solver/multigrid_kernels.cpp index 2efb7cf8158..0b85c9c86f4 100644 --- a/test/solver/multigrid_kernels.cpp +++ b/test/solver/multigrid_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -139,10 +139,13 @@ TEST_F(Multigrid, MultigridKCycleStep1IsEquivalentToRef) initialize_data(); gko::kernels::reference::multigrid::kcycle_step_1( - ref, alpha.get(), rho.get(), v.get(), g.get(), d.get(), e.get()); + ref, alpha->get_const_device_view(), rho->get_const_device_view(), + v->get_const_device_view(), g->get_device_view(), d->get_device_view(), + e->get_device_view()); gko::kernels::GKO_DEVICE_NAMESPACE::multigrid::kcycle_step_1( - exec, d_alpha.get(), d_rho.get(), d_v.get(), d_g.get(), d_d.get(), - d_e.get()); + exec, d_alpha->get_const_device_view(), d_rho->get_const_device_view(), + d_v->get_const_device_view(), d_g->get_device_view(), + d_d->get_device_view(), d_e->get_device_view()); GKO_ASSERT_MTX_NEAR(d_g, g, 1e-14); GKO_ASSERT_MTX_NEAR(d_d, d, 1e-14); @@ -155,11 +158,15 @@ TEST_F(Multigrid, MultigridKCycleStep2IsEquivalentToRef) initialize_data(); gko::kernels::reference::multigrid::kcycle_step_2( - ref, alpha.get(), rho.get(), gamma.get(), beta.get(), zeta.get(), - d.get(), e.get()); + ref, alpha->get_const_device_view(), rho->get_const_device_view(), + gamma->get_const_device_view(), beta->get_const_device_view(), + zeta->get_const_device_view(), d->get_const_device_view(), + e->get_device_view()); gko::kernels::GKO_DEVICE_NAMESPACE::multigrid::kcycle_step_2( - exec, d_alpha.get(), d_rho.get(), d_gamma.get(), d_beta.get(), - d_zeta.get(), d_d.get(), d_e.get()); + exec, d_alpha->get_const_device_view(), d_rho->get_const_device_view(), + d_gamma->get_const_device_view(), d_beta->get_const_device_view(), + d_zeta->get_const_device_view(), d_d->get_const_device_view(), + d_e->get_device_view()); GKO_ASSERT_MTX_NEAR(d_e, e, 1e-14); } @@ -174,13 +181,17 @@ TEST_F(Multigrid, MultigridKCycleCheckStopIsEquivalentToRef) bool d_is_stop_5; gko::kernels::reference::multigrid::kcycle_check_stop( - ref, old_norm.get(), new_norm.get(), 1.0, is_stop_10); + ref, old_norm->get_const_device_view(), + new_norm->get_const_device_view(), 1.0, is_stop_10); gko::kernels::GKO_DEVICE_NAMESPACE::multigrid::kcycle_check_stop( - exec, d_old_norm.get(), d_new_norm.get(), 1.0, d_is_stop_10); + exec, d_old_norm->get_const_device_view(), + d_new_norm->get_const_device_view(), 1.0, d_is_stop_10); gko::kernels::reference::multigrid::kcycle_check_stop( - ref, old_norm.get(), new_norm.get(), 0.5, is_stop_5); + ref, old_norm->get_const_device_view(), + new_norm->get_const_device_view(), 0.5, is_stop_5); gko::kernels::GKO_DEVICE_NAMESPACE::multigrid::kcycle_check_stop( - exec, d_old_norm.get(), d_new_norm.get(), 0.5, d_is_stop_5); + exec, d_old_norm->get_const_device_view(), + d_new_norm->get_const_device_view(), 0.5, d_is_stop_5); GKO_ASSERT_EQ(d_is_stop_10, is_stop_10); GKO_ASSERT_EQ(d_is_stop_10, true); diff --git a/test/solver/pipe_cg_kernels.cpp b/test/solver/pipe_cg_kernels.cpp index ea5fadd4464..f9e421f868b 100644 --- a/test/solver/pipe_cg_kernels.cpp +++ b/test/solver/pipe_cg_kernels.cpp @@ -64,13 +64,12 @@ class PipeCg : public CommonTestFixture { beta->at(2) = 0.0; delta->at(2) = 0.0; prev_rho->at(2) = 0.0; - stop_status = - std::make_unique>(ref, size_n); - for (size_t i = 0; i < stop_status->get_size(); ++i) { - stop_status->get_data()[i].reset(); + stop_status = gko::array(ref, size_n); + for (size_t i = 0; i < stop_status.get_size(); ++i) { + stop_status.get_data()[i].reset(); } // check correct handling for stopped columns - stop_status->get_data()[1].stop(1); + stop_status.get_data()[1].stop(1); d_b = gko::clone(exec, b); d_r = gko::clone(exec, r); @@ -88,8 +87,7 @@ class PipeCg : public CommonTestFixture { d_delta = gko::clone(exec, delta); d_prev_rho = gko::clone(exec, prev_rho); d_rho = gko::clone(exec, rho); - d_stop_status = std::make_unique>( - exec, *stop_status); + d_stop_status = gko::array(exec, stop_status); } std::default_random_engine rand_engine; @@ -110,7 +108,7 @@ class PipeCg : public CommonTestFixture { std::unique_ptr g; std::unique_ptr p; std::unique_ptr q; - std::unique_ptr> stop_status; + gko::array stop_status; std::unique_ptr d_prev_rho; std::unique_ptr d_beta; @@ -128,7 +126,7 @@ class PipeCg : public CommonTestFixture { std::unique_ptr d_g; std::unique_ptr d_p; std::unique_ptr d_q; - std::unique_ptr> d_stop_status; + gko::array d_stop_status; }; @@ -137,14 +135,16 @@ TEST_F(PipeCg, PipeCgInitialize1IsEquivalentToRef) initialize_data(); gko::kernels::reference::pipe_cg::initialize_1( - ref, b.get(), r.get(), prev_rho.get(), stop_status.get()); + ref, b->get_const_device_view(), r->get_device_view(), + prev_rho->get_device_view(), stop_status); gko::kernels::GKO_DEVICE_NAMESPACE::pipe_cg::initialize_1( - exec, d_b.get(), d_r.get(), d_prev_rho.get(), d_stop_status.get()); + exec, d_b->get_const_device_view(), d_r->get_device_view(), + d_prev_rho->get_device_view(), d_stop_status); GKO_ASSERT_MTX_NEAR(d_r, r, 0); GKO_ASSERT_MTX_NEAR(d_prev_rho, prev_rho, 0); GKO_ASSERT_MTX_NEAR(d_rho, rho, 0); - GKO_ASSERT_ARRAY_EQ(*d_stop_status, *stop_status); + GKO_ASSERT_ARRAY_EQ(d_stop_status, stop_status); } @@ -153,13 +153,18 @@ TEST_F(PipeCg, PipeCgInitialize2IsEquivalentToRef) initialize_data(); gko::kernels::reference::pipe_cg::initialize_2( - ref, this->p.get(), this->q.get(), this->f.get(), this->g.get(), - this->beta.get(), this->z1.get(), this->w.get(), this->m.get(), - this->n.get(), this->delta.get()); + ref, this->p->get_device_view(), this->q->get_device_view(), + this->f->get_device_view(), this->g->get_device_view(), + this->beta->get_device_view(), this->z1->get_const_device_view(), + this->w->get_const_device_view(), this->m->get_const_device_view(), + this->n->get_const_device_view(), this->delta->get_const_device_view()); gko::kernels::GKO_DEVICE_NAMESPACE::pipe_cg::initialize_2( - this->exec, this->d_p.get(), this->d_q.get(), this->d_f.get(), - this->d_g.get(), this->d_beta.get(), this->d_z1.get(), this->d_w.get(), - this->d_m.get(), this->d_n.get(), this->d_delta.get()); + this->exec, this->d_p->get_device_view(), this->d_q->get_device_view(), + this->d_f->get_device_view(), this->d_g->get_device_view(), + this->d_beta->get_device_view(), this->d_z1->get_const_device_view(), + this->d_w->get_const_device_view(), this->d_m->get_const_device_view(), + this->d_n->get_const_device_view(), + this->d_delta->get_const_device_view()); GKO_ASSERT_MTX_NEAR(d_p, p, 0); GKO_ASSERT_MTX_NEAR(d_q, q, 0); @@ -174,15 +179,20 @@ TEST_F(PipeCg, PipeCgStep1IsEquivalentToRef) initialize_data(); gko::kernels::reference::pipe_cg::step_1( - ref, this->x.get(), this->r.get(), this->z1.get(), this->z2.get(), - this->w.get(), this->p.get(), this->q.get(), this->f.get(), - this->g.get(), this->rho.get(), this->beta.get(), - this->stop_status.get()); + ref, this->x->get_device_view(), this->r->get_device_view(), + this->z1->get_device_view(), this->z2->get_device_view(), + this->w->get_device_view(), this->p->get_const_device_view(), + this->q->get_const_device_view(), this->f->get_const_device_view(), + this->g->get_const_device_view(), this->rho->get_const_device_view(), + this->beta->get_const_device_view(), this->stop_status); gko::kernels::GKO_DEVICE_NAMESPACE::pipe_cg::step_1( - this->exec, this->d_x.get(), this->d_r.get(), this->d_z1.get(), - this->d_z2.get(), this->d_w.get(), this->d_p.get(), this->d_q.get(), - this->d_f.get(), this->d_g.get(), this->d_rho.get(), this->d_beta.get(), - this->d_stop_status.get()); + this->exec, this->d_x->get_device_view(), this->d_r->get_device_view(), + this->d_z1->get_device_view(), this->d_z2->get_device_view(), + this->d_w->get_device_view(), this->d_p->get_const_device_view(), + this->d_q->get_const_device_view(), this->d_f->get_const_device_view(), + this->d_g->get_const_device_view(), + this->d_rho->get_const_device_view(), + this->d_beta->get_const_device_view(), this->d_stop_status); GKO_ASSERT_MTX_NEAR(d_x, x, ::r::value); GKO_ASSERT_MTX_NEAR(d_r, r, ::r::value); @@ -197,15 +207,23 @@ TEST_F(PipeCg, PipeCgStep2IsEquivalentToRef) initialize_data(); gko::kernels::reference::pipe_cg::step_2( - ref, this->beta.get(), this->p.get(), this->q.get(), this->f.get(), - this->g.get(), this->z1.get(), this->w.get(), this->m.get(), - this->n.get(), this->prev_rho.get(), this->rho.get(), this->delta.get(), - this->stop_status.get()); + ref, this->beta->get_device_view(), this->p->get_device_view(), + this->q->get_device_view(), this->f->get_device_view(), + this->g->get_device_view(), this->z1->get_const_device_view(), + this->w->get_const_device_view(), this->m->get_const_device_view(), + this->n->get_const_device_view(), + this->prev_rho->get_const_device_view(), + this->rho->get_const_device_view(), + this->delta->get_const_device_view(), this->stop_status); gko::kernels::GKO_DEVICE_NAMESPACE::pipe_cg::step_2( - this->exec, this->d_beta.get(), this->d_p.get(), this->d_q.get(), - this->d_f.get(), this->d_g.get(), this->d_z1.get(), this->d_w.get(), - this->d_m.get(), this->d_n.get(), this->d_prev_rho.get(), - this->d_rho.get(), this->d_delta.get(), this->d_stop_status.get()); + this->exec, this->d_beta->get_device_view(), + this->d_p->get_device_view(), this->d_q->get_device_view(), + this->d_f->get_device_view(), this->d_g->get_device_view(), + this->d_z1->get_const_device_view(), this->d_w->get_const_device_view(), + this->d_m->get_const_device_view(), this->d_n->get_const_device_view(), + this->d_prev_rho->get_const_device_view(), + this->d_rho->get_const_device_view(), + this->d_delta->get_const_device_view(), this->d_stop_status); GKO_ASSERT_MTX_NEAR(d_beta, beta, ::r::value); GKO_ASSERT_MTX_NEAR(d_p, p, ::r::value);