fix the function declaration of definition different from declaration…

… in instantiation
ginkgo-project · Nov 14, 2024 · ea20a94 · ea20a94
1 parent 357b7d8
commit ea20a94
Show file tree

Hide file tree

Showing 35 changed files with 225 additions and 260 deletions.
diff --git a/common/cuda_hip/base/batch_multi_vector_kernels.cpp b/common/cuda_hip/base/batch_multi_vector_kernels.cpp
@@ -30,8 +30,8 @@ constexpr auto default_block_size = 256;
 
 template <typename ValueType>
 void scale(std::shared_ptr<const DefaultExecutor> exec,
-           const batch::MultiVector<ValueType>* const alpha,
-           batch::MultiVector<ValueType>* const x)
+           const batch::MultiVector<ValueType>* alpha,
+           batch::MultiVector<ValueType>* x)
 {
     const auto num_blocks = x->get_num_batch_items();
     const auto alpha_ub = get_batch_struct(alpha);
@@ -61,9 +61,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
 
 template <typename ValueType>
 void add_scaled(std::shared_ptr<const DefaultExecutor> exec,
-                const batch::MultiVector<ValueType>* const alpha,
-                const batch::MultiVector<ValueType>* const x,
-                batch::MultiVector<ValueType>* const y)
+                const batch::MultiVector<ValueType>* alpha,
+                const batch::MultiVector<ValueType>* x,
+                batch::MultiVector<ValueType>* y)
 {
     const auto num_blocks = x->get_num_batch_items();
     const size_type nrhs = x->get_common_size()[1];
@@ -127,8 +127,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
 
 template <typename ValueType>
 void compute_norm2(std::shared_ptr<const DefaultExecutor> exec,
-                   const batch::MultiVector<ValueType>* const x,
-                   batch::MultiVector<remove_complex<ValueType>>* const result)
+                   const batch::MultiVector<ValueType>* x,
+                   batch::MultiVector<remove_complex<ValueType>>* result)
 {
     const auto num_blocks = x->get_num_batch_items();
     const auto num_rhs = x->get_common_size()[1];

diff --git a/common/cuda_hip/matrix/csr_kernels.template.cpp b/common/cuda_hip/matrix/csr_kernels.template.cpp
@@ -1823,9 +1823,9 @@ void extract_diagonal(std::shared_ptr<const DefaultExecutor> exec,
 
 
 template <typename ValueType, typename IndexType>
-void check_diagonal_entries_exist(
-    std::shared_ptr<const DefaultExecutor> exec,
-    const matrix::Csr<ValueType, IndexType>* const mtx, bool& has_all_diags)
+void check_diagonal_entries_exist(std::shared_ptr<const DefaultExecutor> exec,
+                                  const matrix::Csr<ValueType, IndexType>* mtx,
+                                  bool& has_all_diags)
 {
     const auto num_diag = static_cast<IndexType>(
         std::min(mtx->get_size()[0], mtx->get_size()[1]));
@@ -1846,9 +1846,9 @@ void check_diagonal_entries_exist(
 
 template <typename ValueType, typename IndexType>
 void add_scaled_identity(std::shared_ptr<const DefaultExecutor> exec,
-                         const matrix::Dense<ValueType>* const alpha,
-                         const matrix::Dense<ValueType>* const beta,
-                         matrix::Csr<ValueType, IndexType>* const mtx)
+                         const matrix::Dense<ValueType>* alpha,
+                         const matrix::Dense<ValueType>* beta,
+                         matrix::Csr<ValueType, IndexType>* mtx)
 {
     const auto nrows = mtx->get_size()[0];
     if (nrows == 0) {

diff --git a/common/cuda_hip/matrix/fbcsr_kernels.template.cpp b/common/cuda_hip/matrix/fbcsr_kernels.template.cpp
@@ -294,8 +294,8 @@ __global__ void __launch_bounds__(default_block_size)
 
 template <typename ValueType, typename IndexType>
 void fallback_transpose(const std::shared_ptr<const DefaultExecutor> exec,
-                        const matrix::Fbcsr<ValueType, IndexType>* const input,
-                        matrix::Fbcsr<ValueType, IndexType>* const output)
+                        const matrix::Fbcsr<ValueType, IndexType>* input,
+                        matrix::Fbcsr<ValueType, IndexType>* output)
 {
     const auto in_num_row_blocks = input->get_num_block_rows();
     const auto out_num_row_blocks = output->get_num_block_rows();
@@ -353,8 +353,8 @@ void fill_in_dense(std::shared_ptr<const DefaultExecutor> exec,
 
 template <typename ValueType, typename IndexType>
 void convert_to_csr(const std::shared_ptr<const DefaultExecutor> exec,
-                    const matrix::Fbcsr<ValueType, IndexType>* const source,
-                    matrix::Csr<ValueType, IndexType>* const result)
+                    const matrix::Fbcsr<ValueType, IndexType>* source,
+                    matrix::Csr<ValueType, IndexType>* result)
 {
     constexpr auto warps_per_block = default_block_size / config::warp_size;
     const auto num_blocks =
@@ -373,8 +373,7 @@ void convert_to_csr(const std::shared_ptr<const DefaultExecutor> exec,
 template <typename ValueType, typename IndexType>
 void is_sorted_by_column_index(
     std::shared_ptr<const DefaultExecutor> exec,
-    const matrix::Fbcsr<ValueType, IndexType>* const to_check,
-    bool* const is_sorted)
+    const matrix::Fbcsr<ValueType, IndexType>* to_check, bool* is_sorted)
 {
     *is_sorted = true;
     auto gpu_array = array<bool>(exec, 1);
@@ -396,7 +395,7 @@ void is_sorted_by_column_index(
 
 template <typename ValueType, typename IndexType>
 void sort_by_column_index(const std::shared_ptr<const DefaultExecutor> exec,
-                          matrix::Fbcsr<ValueType, IndexType>* const to_sort)
+                          matrix::Fbcsr<ValueType, IndexType>* to_sort)
     GKO_NOT_IMPLEMENTED;
 
 
@@ -412,8 +411,8 @@ namespace {
 template <typename ValueType>
 void dense_transpose(std::shared_ptr<const DefaultExecutor> exec,
                      const size_type nrows, const size_type ncols,
-                     const size_type orig_stride, const ValueType* const orig,
-                     const size_type trans_stride, ValueType* const trans)
+                     const size_type orig_stride, const ValueType* orig,
+                     const size_type trans_stride, ValueType* trans)
 {
     if (nrows == 0) {
         return;
@@ -439,9 +438,8 @@ void dense_transpose(std::shared_ptr<const DefaultExecutor> exec,
 
 template <typename ValueType, typename IndexType>
 void spmv(std::shared_ptr<const DefaultExecutor> exec,
-          const matrix::Fbcsr<ValueType, IndexType>* const a,
-          const matrix::Dense<ValueType>* const b,
-          matrix::Dense<ValueType>* const c)
+          const matrix::Fbcsr<ValueType, IndexType>* a,
+          const matrix::Dense<ValueType>* b, matrix::Dense<ValueType>* c)
 {
     if (c->get_size()[0] == 0 || c->get_size()[1] == 0) {
         // empty output: nothing to do
@@ -494,11 +492,11 @@ void spmv(std::shared_ptr<const DefaultExecutor> exec,
 
 template <typename ValueType, typename IndexType>
 void advanced_spmv(std::shared_ptr<const DefaultExecutor> exec,
-                   const matrix::Dense<ValueType>* const alpha,
-                   const matrix::Fbcsr<ValueType, IndexType>* const a,
-                   const matrix::Dense<ValueType>* const b,
-                   const matrix::Dense<ValueType>* const beta,
-                   matrix::Dense<ValueType>* const c)
+                   const matrix::Dense<ValueType>* alpha,
+                   const matrix::Fbcsr<ValueType, IndexType>* a,
+                   const matrix::Dense<ValueType>* b,
+                   const matrix::Dense<ValueType>* beta,
+                   matrix::Dense<ValueType>* c)
 {
     if (c->get_size()[0] == 0 || c->get_size()[1] == 0) {
         // empty output: nothing to do
@@ -556,7 +554,7 @@ namespace {
 template <int mat_blk_sz, typename ValueType, typename IndexType>
 void transpose_blocks_impl(syn::value_list<int, mat_blk_sz>,
                            std::shared_ptr<const DefaultExecutor> exec,
-                           matrix::Fbcsr<ValueType, IndexType>* const mat)
+                           matrix::Fbcsr<ValueType, IndexType>* mat)
 {
     constexpr int subwarp_size = config::warp_size;
     const auto nbnz = mat->get_num_stored_blocks();
@@ -579,8 +577,8 @@ GKO_ENABLE_IMPLEMENTATION_SELECTION(select_transpose_blocks,
 
 template <typename ValueType, typename IndexType>
 void transpose(const std::shared_ptr<const DefaultExecutor> exec,
-               const matrix::Fbcsr<ValueType, IndexType>* const orig,
-               matrix::Fbcsr<ValueType, IndexType>* const trans)
+               const matrix::Fbcsr<ValueType, IndexType>* orig,
+               matrix::Fbcsr<ValueType, IndexType>* trans)
 {
 #ifdef GKO_COMPILING_CUDA
     if (sparselib::is_supported<ValueType, IndexType>::value) {

diff --git a/common/cuda_hip/reorder/rcm_kernels.cpp b/common/cuda_hip/reorder/rcm_kernels.cpp
@@ -614,11 +614,9 @@ void sort_levels(std::shared_ptr<const DefaultExecutor> exec,
 
 template <typename IndexType>
 void compute_permutation(std::shared_ptr<const DefaultExecutor> exec,
-                         const IndexType num_rows,
-                         const IndexType* const row_ptrs,
-                         const IndexType* const col_idxs,
-                         IndexType* const permutation,
-                         IndexType* const inv_permutation,
+                         const IndexType num_rows, const IndexType* row_ptrs,
+                         const IndexType* col_idxs, IndexType* permutation,
+                         IndexType* inv_permutation,
                          const gko::reorder::starting_strategy strategy)
 {
     if (num_rows == 0) {

diff --git a/common/unified/matrix/dense_kernels.template.cpp b/common/unified/matrix/dense_kernels.template.cpp
@@ -730,9 +730,9 @@ void get_imag(std::shared_ptr<const DefaultExecutor> exec,
 
 template <typename ValueType, typename ScalarType>
 void add_scaled_identity(std::shared_ptr<const DefaultExecutor> exec,
-                         const matrix::Dense<ScalarType>* const alpha,
-                         const matrix::Dense<ScalarType>* const beta,
-                         matrix::Dense<ValueType>* const mtx)
+                         const matrix::Dense<ScalarType>* alpha,
+                         const matrix::Dense<ScalarType>* beta,
+                         matrix::Dense<ValueType>* mtx)
 {
     run_kernel(
         exec,

diff --git a/core/matrix/csr.cpp b/core/matrix/csr.cpp
@@ -1029,8 +1029,8 @@ void Csr<ValueType, IndexType>::inv_scale_impl(const LinOp* alpha)
 
 
 template <typename ValueType, typename IndexType>
-void Csr<ValueType, IndexType>::add_scaled_identity_impl(const LinOp* const a,
-                                                         const LinOp* const b)
+void Csr<ValueType, IndexType>::add_scaled_identity_impl(const LinOp* a,
+                                                         const LinOp* b)
 {
     bool has_diags{false};
     this->get_executor()->run(

diff --git a/core/matrix/dense.cpp b/core/matrix/dense.cpp
@@ -1906,8 +1906,7 @@ void Dense<ValueType>::get_imag(ptr_param<real_type> result) const
 
 
 template <typename ValueType>
-void Dense<ValueType>::add_scaled_identity_impl(const LinOp* const a,
-                                                const LinOp* const b)
+void Dense<ValueType>::add_scaled_identity_impl(const LinOp* a, const LinOp* b)
 {
     precision_dispatch_real_complex<ValueType>(
         [this](auto dense_alpha, auto dense_beta, auto dense_x) {

diff --git a/core/matrix/fbcsr.cpp b/core/matrix/fbcsr.cpp
@@ -103,8 +103,7 @@ Fbcsr<ValueType, IndexType>::Fbcsr(Fbcsr&& other) : Fbcsr{other.get_executor()}
 
 
 template <typename ValueType, typename IndexType>
-void Fbcsr<ValueType, IndexType>::apply_impl(const LinOp* const b,
-                                             LinOp* const x) const
+void Fbcsr<ValueType, IndexType>::apply_impl(const LinOp* b, LinOp* x) const
 {
     if (auto b_fbcsr = dynamic_cast<const Fbcsr<ValueType, IndexType>*>(b)) {
         // if b is a FBCSR matrix, we need an SpGeMM
@@ -122,10 +121,8 @@ void Fbcsr<ValueType, IndexType>::apply_impl(const LinOp* const b,
 
 
 template <typename ValueType, typename IndexType>
-void Fbcsr<ValueType, IndexType>::apply_impl(const LinOp* const alpha,
-                                             const LinOp* const b,
-                                             const LinOp* const beta,
-                                             LinOp* const x) const
+void Fbcsr<ValueType, IndexType>::apply_impl(const LinOp* alpha, const LinOp* b,
+                                             const LinOp* beta, LinOp* x) const
 {
     if (auto b_fbcsr = dynamic_cast<const Fbcsr<ValueType, IndexType>*>(b)) {
         // if b is a FBCSR matrix, we need an SpGeMM
@@ -148,7 +145,7 @@ void Fbcsr<ValueType, IndexType>::apply_impl(const LinOp* const alpha,
 
 template <typename ValueType, typename IndexType>
 void Fbcsr<ValueType, IndexType>::convert_to(
-    Fbcsr<next_precision<ValueType>, IndexType>* const result) const
+    Fbcsr<next_precision<ValueType>, IndexType>* result) const
 {
     result->values_ = this->values_;
     result->col_idxs_ = this->col_idxs_;
@@ -161,15 +158,14 @@ void Fbcsr<ValueType, IndexType>::convert_to(
 
 template <typename ValueType, typename IndexType>
 void Fbcsr<ValueType, IndexType>::move_to(
-    Fbcsr<next_precision<ValueType>, IndexType>* const result)
+    Fbcsr<next_precision<ValueType>, IndexType>* result)
 {
     this->convert_to(result);
 }
 
 
 template <typename ValueType, typename IndexType>
-void Fbcsr<ValueType, IndexType>::convert_to(
-    Dense<ValueType>* const result) const
+void Fbcsr<ValueType, IndexType>::convert_to(Dense<ValueType>* result) const
 {
     auto exec = this->get_executor();
     auto tmp_result = make_temporary_output_clone(exec, result);
@@ -180,15 +176,15 @@ void Fbcsr<ValueType, IndexType>::convert_to(
 
 
 template <typename ValueType, typename IndexType>
-void Fbcsr<ValueType, IndexType>::move_to(Dense<ValueType>* const result)
+void Fbcsr<ValueType, IndexType>::move_to(Dense<ValueType>* result)
 {
     this->convert_to(result);
 }
 
 
 template <typename ValueType, typename IndexType>
 void Fbcsr<ValueType, IndexType>::convert_to(
-    Csr<ValueType, IndexType>* const result) const
+    Csr<ValueType, IndexType>* result) const
 {
     auto exec = this->get_executor();
     {
@@ -204,16 +200,15 @@ void Fbcsr<ValueType, IndexType>::convert_to(
 
 
 template <typename ValueType, typename IndexType>
-void Fbcsr<ValueType, IndexType>::move_to(
-    Csr<ValueType, IndexType>* const result)
+void Fbcsr<ValueType, IndexType>::move_to(Csr<ValueType, IndexType>* result)
 {
     this->convert_to(result);
 }
 
 
 template <typename ValueType, typename IndexType>
 void Fbcsr<ValueType, IndexType>::convert_to(
-    SparsityCsr<ValueType, IndexType>* const result) const
+    SparsityCsr<ValueType, IndexType>* result) const
 {
     result->set_size(
         gko::dim<2>{static_cast<size_type>(this->get_num_block_rows()),
@@ -227,7 +222,7 @@ void Fbcsr<ValueType, IndexType>::convert_to(
 
 template <typename ValueType, typename IndexType>
 void Fbcsr<ValueType, IndexType>::move_to(
-    SparsityCsr<ValueType, IndexType>* const result)
+    SparsityCsr<ValueType, IndexType>* result)
 {
     this->convert_to(result);
 }

diff --git a/cuda/preconditioner/batch_jacobi_kernels.cu b/cuda/preconditioner/batch_jacobi_kernels.cu
@@ -45,8 +45,7 @@ using batch_jacobi_cuda_compiled_max_block_sizes =
 template <typename IndexType>
 void compute_cumulative_block_storage(
     std::shared_ptr<const DefaultExecutor> exec, const size_type num_blocks,
-    const IndexType* const block_pointers,
-    IndexType* const blocks_cumulative_offsets)
+    const IndexType* block_pointers, IndexType* blocks_cumulative_offsets)
 {
     dim3 block(default_block_size);
     dim3 grid(ceildiv(num_blocks, default_block_size));
@@ -66,8 +65,8 @@ GKO_INSTANTIATE_FOR_INT32_TYPE(
 template <typename IndexType>
 void find_row_block_map(std::shared_ptr<const DefaultExecutor> exec,
                         const size_type num_blocks,
-                        const IndexType* const block_pointers,
-                        IndexType* const map_block_to_row)
+                        const IndexType* block_pointers,
+                        IndexType* map_block_to_row)
 {
     dim3 block(default_block_size);
     dim3 grid(ceildiv(num_blocks, default_block_size));
@@ -83,10 +82,10 @@ GKO_INSTANTIATE_FOR_INT32_TYPE(
 template <typename ValueType, typename IndexType>
 void extract_common_blocks_pattern(
     std::shared_ptr<const DefaultExecutor> exec,
-    const gko::matrix::Csr<ValueType, IndexType>* const first_sys_csr,
-    const size_type num_blocks, const IndexType* const cumulative_block_storage,
-    const IndexType* const block_pointers,
-    const IndexType* const map_block_to_row, IndexType* const blocks_pattern)
+    const gko::matrix::Csr<ValueType, IndexType>* first_sys_csr,
+    const size_type num_blocks, const IndexType* cumulative_block_storage,
+    const IndexType* block_pointers, const IndexType* map_block_to_row,
+    IndexType* blocks_pattern)
 {
     const auto nrows = first_sys_csr->get_size()[0];
     dim3 block(default_block_size);
@@ -143,11 +142,10 @@ GKO_ENABLE_IMPLEMENTATION_SELECTION(select_compute_block_jacobi_helper,
 template <typename ValueType, typename IndexType>
 void compute_block_jacobi(
     std::shared_ptr<const DefaultExecutor> exec,
-    const batch::matrix::Csr<ValueType, IndexType>* const sys_csr,
+    const batch::matrix::Csr<ValueType, IndexType>* sys_csr,
     const uint32 max_block_size, const size_type num_blocks,
-    const IndexType* const cumulative_block_storage,
-    const IndexType* const block_pointers,
-    const IndexType* const blocks_pattern, ValueType* const blocks)
+    const IndexType* cumulative_block_storage, const IndexType* block_pointers,
+    const IndexType* blocks_pattern, ValueType* blocks)
 {
     select_compute_block_jacobi_helper(
         batch_jacobi_cuda_compiled_max_block_sizes(),

diff --git a/cuda/solver/batch_bicgstab_kernels.cu b/cuda/solver/batch_bicgstab_kernels.cu
@@ -217,10 +217,9 @@ private:
 template <typename ValueType>
 void apply(std::shared_ptr<const DefaultExecutor> exec,
            const settings<remove_complex<ValueType>>& settings,
-           const batch::BatchLinOp* const mat,
-           const batch::BatchLinOp* const precon,
-           const batch::MultiVector<ValueType>* const b,
-           batch::MultiVector<ValueType>* const x,
+           const batch::BatchLinOp* mat, const batch::BatchLinOp* precon,
+           const batch::MultiVector<ValueType>* b,
+           batch::MultiVector<ValueType>* x,
            batch::log::detail::log_data<remove_complex<ValueType>>& logdata)
 {
     using cu_value_type = cuda_type<ValueType>;

diff --git a/cuda/solver/batch_cg_kernels.cu b/cuda/solver/batch_cg_kernels.cu
@@ -197,10 +197,9 @@ private:
 template <typename ValueType>
 void apply(std::shared_ptr<const DefaultExecutor> exec,
            const settings<remove_complex<ValueType>>& settings,
-           const batch::BatchLinOp* const mat,
-           const batch::BatchLinOp* const precon,
-           const batch::MultiVector<ValueType>* const b,
-           batch::MultiVector<ValueType>* const x,
+           const batch::BatchLinOp* mat, const batch::BatchLinOp* precon,
+           const batch::MultiVector<ValueType>* b,
+           batch::MultiVector<ValueType>* x,
            batch::log::detail::log_data<remove_complex<ValueType>>& logdata)
 {
     using cu_value_type = cuda_type<ValueType>;