Skip to content

Commit

Permalink
fix the function declaration of definition different from declaration…
Browse files Browse the repository at this point in the history
… in instantiation
  • Loading branch information
yhmtsai committed Nov 14, 2024
1 parent 357b7d8 commit ea20a94
Show file tree
Hide file tree
Showing 35 changed files with 225 additions and 260 deletions.
14 changes: 7 additions & 7 deletions common/cuda_hip/base/batch_multi_vector_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@ constexpr auto default_block_size = 256;

template <typename ValueType>
void scale(std::shared_ptr<const DefaultExecutor> exec,
const batch::MultiVector<ValueType>* const alpha,
batch::MultiVector<ValueType>* const x)
const batch::MultiVector<ValueType>* alpha,
batch::MultiVector<ValueType>* x)
{
const auto num_blocks = x->get_num_batch_items();
const auto alpha_ub = get_batch_struct(alpha);
Expand Down Expand Up @@ -61,9 +61,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(

template <typename ValueType>
void add_scaled(std::shared_ptr<const DefaultExecutor> exec,
const batch::MultiVector<ValueType>* const alpha,
const batch::MultiVector<ValueType>* const x,
batch::MultiVector<ValueType>* const y)
const batch::MultiVector<ValueType>* alpha,
const batch::MultiVector<ValueType>* x,
batch::MultiVector<ValueType>* y)
{
const auto num_blocks = x->get_num_batch_items();
const size_type nrhs = x->get_common_size()[1];
Expand Down Expand Up @@ -127,8 +127,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(

template <typename ValueType>
void compute_norm2(std::shared_ptr<const DefaultExecutor> exec,
const batch::MultiVector<ValueType>* const x,
batch::MultiVector<remove_complex<ValueType>>* const result)
const batch::MultiVector<ValueType>* x,
batch::MultiVector<remove_complex<ValueType>>* result)
{
const auto num_blocks = x->get_num_batch_items();
const auto num_rhs = x->get_common_size()[1];
Expand Down
12 changes: 6 additions & 6 deletions common/cuda_hip/matrix/csr_kernels.template.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1823,9 +1823,9 @@ void extract_diagonal(std::shared_ptr<const DefaultExecutor> exec,


template <typename ValueType, typename IndexType>
void check_diagonal_entries_exist(
std::shared_ptr<const DefaultExecutor> exec,
const matrix::Csr<ValueType, IndexType>* const mtx, bool& has_all_diags)
void check_diagonal_entries_exist(std::shared_ptr<const DefaultExecutor> exec,
const matrix::Csr<ValueType, IndexType>* mtx,
bool& has_all_diags)
{
const auto num_diag = static_cast<IndexType>(
std::min(mtx->get_size()[0], mtx->get_size()[1]));
Expand All @@ -1846,9 +1846,9 @@ void check_diagonal_entries_exist(

template <typename ValueType, typename IndexType>
void add_scaled_identity(std::shared_ptr<const DefaultExecutor> exec,
const matrix::Dense<ValueType>* const alpha,
const matrix::Dense<ValueType>* const beta,
matrix::Csr<ValueType, IndexType>* const mtx)
const matrix::Dense<ValueType>* alpha,
const matrix::Dense<ValueType>* beta,
matrix::Csr<ValueType, IndexType>* mtx)
{
const auto nrows = mtx->get_size()[0];
if (nrows == 0) {
Expand Down
38 changes: 18 additions & 20 deletions common/cuda_hip/matrix/fbcsr_kernels.template.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -294,8 +294,8 @@ __global__ void __launch_bounds__(default_block_size)

template <typename ValueType, typename IndexType>
void fallback_transpose(const std::shared_ptr<const DefaultExecutor> exec,
const matrix::Fbcsr<ValueType, IndexType>* const input,
matrix::Fbcsr<ValueType, IndexType>* const output)
const matrix::Fbcsr<ValueType, IndexType>* input,
matrix::Fbcsr<ValueType, IndexType>* output)
{
const auto in_num_row_blocks = input->get_num_block_rows();
const auto out_num_row_blocks = output->get_num_block_rows();
Expand Down Expand Up @@ -353,8 +353,8 @@ void fill_in_dense(std::shared_ptr<const DefaultExecutor> exec,

template <typename ValueType, typename IndexType>
void convert_to_csr(const std::shared_ptr<const DefaultExecutor> exec,
const matrix::Fbcsr<ValueType, IndexType>* const source,
matrix::Csr<ValueType, IndexType>* const result)
const matrix::Fbcsr<ValueType, IndexType>* source,
matrix::Csr<ValueType, IndexType>* result)
{
constexpr auto warps_per_block = default_block_size / config::warp_size;
const auto num_blocks =
Expand All @@ -373,8 +373,7 @@ void convert_to_csr(const std::shared_ptr<const DefaultExecutor> exec,
template <typename ValueType, typename IndexType>
void is_sorted_by_column_index(
std::shared_ptr<const DefaultExecutor> exec,
const matrix::Fbcsr<ValueType, IndexType>* const to_check,
bool* const is_sorted)
const matrix::Fbcsr<ValueType, IndexType>* to_check, bool* is_sorted)
{
*is_sorted = true;
auto gpu_array = array<bool>(exec, 1);
Expand All @@ -396,7 +395,7 @@ void is_sorted_by_column_index(

template <typename ValueType, typename IndexType>
void sort_by_column_index(const std::shared_ptr<const DefaultExecutor> exec,
matrix::Fbcsr<ValueType, IndexType>* const to_sort)
matrix::Fbcsr<ValueType, IndexType>* to_sort)
GKO_NOT_IMPLEMENTED;


Expand All @@ -412,8 +411,8 @@ namespace {
template <typename ValueType>
void dense_transpose(std::shared_ptr<const DefaultExecutor> exec,
const size_type nrows, const size_type ncols,
const size_type orig_stride, const ValueType* const orig,
const size_type trans_stride, ValueType* const trans)
const size_type orig_stride, const ValueType* orig,
const size_type trans_stride, ValueType* trans)
{
if (nrows == 0) {
return;
Expand All @@ -439,9 +438,8 @@ void dense_transpose(std::shared_ptr<const DefaultExecutor> exec,

template <typename ValueType, typename IndexType>
void spmv(std::shared_ptr<const DefaultExecutor> exec,
const matrix::Fbcsr<ValueType, IndexType>* const a,
const matrix::Dense<ValueType>* const b,
matrix::Dense<ValueType>* const c)
const matrix::Fbcsr<ValueType, IndexType>* a,
const matrix::Dense<ValueType>* b, matrix::Dense<ValueType>* c)
{
if (c->get_size()[0] == 0 || c->get_size()[1] == 0) {
// empty output: nothing to do
Expand Down Expand Up @@ -494,11 +492,11 @@ void spmv(std::shared_ptr<const DefaultExecutor> exec,

template <typename ValueType, typename IndexType>
void advanced_spmv(std::shared_ptr<const DefaultExecutor> exec,
const matrix::Dense<ValueType>* const alpha,
const matrix::Fbcsr<ValueType, IndexType>* const a,
const matrix::Dense<ValueType>* const b,
const matrix::Dense<ValueType>* const beta,
matrix::Dense<ValueType>* const c)
const matrix::Dense<ValueType>* alpha,
const matrix::Fbcsr<ValueType, IndexType>* a,
const matrix::Dense<ValueType>* b,
const matrix::Dense<ValueType>* beta,
matrix::Dense<ValueType>* c)
{
if (c->get_size()[0] == 0 || c->get_size()[1] == 0) {
// empty output: nothing to do
Expand Down Expand Up @@ -556,7 +554,7 @@ namespace {
template <int mat_blk_sz, typename ValueType, typename IndexType>
void transpose_blocks_impl(syn::value_list<int, mat_blk_sz>,
std::shared_ptr<const DefaultExecutor> exec,
matrix::Fbcsr<ValueType, IndexType>* const mat)
matrix::Fbcsr<ValueType, IndexType>* mat)
{
constexpr int subwarp_size = config::warp_size;
const auto nbnz = mat->get_num_stored_blocks();
Expand All @@ -579,8 +577,8 @@ GKO_ENABLE_IMPLEMENTATION_SELECTION(select_transpose_blocks,

template <typename ValueType, typename IndexType>
void transpose(const std::shared_ptr<const DefaultExecutor> exec,
const matrix::Fbcsr<ValueType, IndexType>* const orig,
matrix::Fbcsr<ValueType, IndexType>* const trans)
const matrix::Fbcsr<ValueType, IndexType>* orig,
matrix::Fbcsr<ValueType, IndexType>* trans)
{
#ifdef GKO_COMPILING_CUDA
if (sparselib::is_supported<ValueType, IndexType>::value) {
Expand Down
8 changes: 3 additions & 5 deletions common/cuda_hip/reorder/rcm_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -614,11 +614,9 @@ void sort_levels(std::shared_ptr<const DefaultExecutor> exec,

template <typename IndexType>
void compute_permutation(std::shared_ptr<const DefaultExecutor> exec,
const IndexType num_rows,
const IndexType* const row_ptrs,
const IndexType* const col_idxs,
IndexType* const permutation,
IndexType* const inv_permutation,
const IndexType num_rows, const IndexType* row_ptrs,
const IndexType* col_idxs, IndexType* permutation,
IndexType* inv_permutation,
const gko::reorder::starting_strategy strategy)
{
if (num_rows == 0) {
Expand Down
6 changes: 3 additions & 3 deletions common/unified/matrix/dense_kernels.template.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -730,9 +730,9 @@ void get_imag(std::shared_ptr<const DefaultExecutor> exec,

template <typename ValueType, typename ScalarType>
void add_scaled_identity(std::shared_ptr<const DefaultExecutor> exec,
const matrix::Dense<ScalarType>* const alpha,
const matrix::Dense<ScalarType>* const beta,
matrix::Dense<ValueType>* const mtx)
const matrix::Dense<ScalarType>* alpha,
const matrix::Dense<ScalarType>* beta,
matrix::Dense<ValueType>* mtx)
{
run_kernel(
exec,
Expand Down
4 changes: 2 additions & 2 deletions core/matrix/csr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1029,8 +1029,8 @@ void Csr<ValueType, IndexType>::inv_scale_impl(const LinOp* alpha)


template <typename ValueType, typename IndexType>
void Csr<ValueType, IndexType>::add_scaled_identity_impl(const LinOp* const a,
const LinOp* const b)
void Csr<ValueType, IndexType>::add_scaled_identity_impl(const LinOp* a,
const LinOp* b)
{
bool has_diags{false};
this->get_executor()->run(
Expand Down
3 changes: 1 addition & 2 deletions core/matrix/dense.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1906,8 +1906,7 @@ void Dense<ValueType>::get_imag(ptr_param<real_type> result) const


template <typename ValueType>
void Dense<ValueType>::add_scaled_identity_impl(const LinOp* const a,
const LinOp* const b)
void Dense<ValueType>::add_scaled_identity_impl(const LinOp* a, const LinOp* b)
{
precision_dispatch_real_complex<ValueType>(
[this](auto dense_alpha, auto dense_beta, auto dense_x) {
Expand Down
27 changes: 11 additions & 16 deletions core/matrix/fbcsr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -103,8 +103,7 @@ Fbcsr<ValueType, IndexType>::Fbcsr(Fbcsr&& other) : Fbcsr{other.get_executor()}


template <typename ValueType, typename IndexType>
void Fbcsr<ValueType, IndexType>::apply_impl(const LinOp* const b,
LinOp* const x) const
void Fbcsr<ValueType, IndexType>::apply_impl(const LinOp* b, LinOp* x) const
{
if (auto b_fbcsr = dynamic_cast<const Fbcsr<ValueType, IndexType>*>(b)) {
// if b is a FBCSR matrix, we need an SpGeMM
Expand All @@ -122,10 +121,8 @@ void Fbcsr<ValueType, IndexType>::apply_impl(const LinOp* const b,


template <typename ValueType, typename IndexType>
void Fbcsr<ValueType, IndexType>::apply_impl(const LinOp* const alpha,
const LinOp* const b,
const LinOp* const beta,
LinOp* const x) const
void Fbcsr<ValueType, IndexType>::apply_impl(const LinOp* alpha, const LinOp* b,
const LinOp* beta, LinOp* x) const
{
if (auto b_fbcsr = dynamic_cast<const Fbcsr<ValueType, IndexType>*>(b)) {
// if b is a FBCSR matrix, we need an SpGeMM
Expand All @@ -148,7 +145,7 @@ void Fbcsr<ValueType, IndexType>::apply_impl(const LinOp* const alpha,

template <typename ValueType, typename IndexType>
void Fbcsr<ValueType, IndexType>::convert_to(
Fbcsr<next_precision<ValueType>, IndexType>* const result) const
Fbcsr<next_precision<ValueType>, IndexType>* result) const
{
result->values_ = this->values_;
result->col_idxs_ = this->col_idxs_;
Expand All @@ -161,15 +158,14 @@ void Fbcsr<ValueType, IndexType>::convert_to(

template <typename ValueType, typename IndexType>
void Fbcsr<ValueType, IndexType>::move_to(
Fbcsr<next_precision<ValueType>, IndexType>* const result)
Fbcsr<next_precision<ValueType>, IndexType>* result)
{
this->convert_to(result);
}


template <typename ValueType, typename IndexType>
void Fbcsr<ValueType, IndexType>::convert_to(
Dense<ValueType>* const result) const
void Fbcsr<ValueType, IndexType>::convert_to(Dense<ValueType>* result) const
{
auto exec = this->get_executor();
auto tmp_result = make_temporary_output_clone(exec, result);
Expand All @@ -180,15 +176,15 @@ void Fbcsr<ValueType, IndexType>::convert_to(


template <typename ValueType, typename IndexType>
void Fbcsr<ValueType, IndexType>::move_to(Dense<ValueType>* const result)
void Fbcsr<ValueType, IndexType>::move_to(Dense<ValueType>* result)
{
this->convert_to(result);
}


template <typename ValueType, typename IndexType>
void Fbcsr<ValueType, IndexType>::convert_to(
Csr<ValueType, IndexType>* const result) const
Csr<ValueType, IndexType>* result) const
{
auto exec = this->get_executor();
{
Expand All @@ -204,16 +200,15 @@ void Fbcsr<ValueType, IndexType>::convert_to(


template <typename ValueType, typename IndexType>
void Fbcsr<ValueType, IndexType>::move_to(
Csr<ValueType, IndexType>* const result)
void Fbcsr<ValueType, IndexType>::move_to(Csr<ValueType, IndexType>* result)
{
this->convert_to(result);
}


template <typename ValueType, typename IndexType>
void Fbcsr<ValueType, IndexType>::convert_to(
SparsityCsr<ValueType, IndexType>* const result) const
SparsityCsr<ValueType, IndexType>* result) const
{
result->set_size(
gko::dim<2>{static_cast<size_type>(this->get_num_block_rows()),
Expand All @@ -227,7 +222,7 @@ void Fbcsr<ValueType, IndexType>::convert_to(

template <typename ValueType, typename IndexType>
void Fbcsr<ValueType, IndexType>::move_to(
SparsityCsr<ValueType, IndexType>* const result)
SparsityCsr<ValueType, IndexType>* result)
{
this->convert_to(result);
}
Expand Down
22 changes: 10 additions & 12 deletions cuda/preconditioner/batch_jacobi_kernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,7 @@ using batch_jacobi_cuda_compiled_max_block_sizes =
template <typename IndexType>
void compute_cumulative_block_storage(
std::shared_ptr<const DefaultExecutor> exec, const size_type num_blocks,
const IndexType* const block_pointers,
IndexType* const blocks_cumulative_offsets)
const IndexType* block_pointers, IndexType* blocks_cumulative_offsets)
{
dim3 block(default_block_size);
dim3 grid(ceildiv(num_blocks, default_block_size));
Expand All @@ -66,8 +65,8 @@ GKO_INSTANTIATE_FOR_INT32_TYPE(
template <typename IndexType>
void find_row_block_map(std::shared_ptr<const DefaultExecutor> exec,
const size_type num_blocks,
const IndexType* const block_pointers,
IndexType* const map_block_to_row)
const IndexType* block_pointers,
IndexType* map_block_to_row)
{
dim3 block(default_block_size);
dim3 grid(ceildiv(num_blocks, default_block_size));
Expand All @@ -83,10 +82,10 @@ GKO_INSTANTIATE_FOR_INT32_TYPE(
template <typename ValueType, typename IndexType>
void extract_common_blocks_pattern(
std::shared_ptr<const DefaultExecutor> exec,
const gko::matrix::Csr<ValueType, IndexType>* const first_sys_csr,
const size_type num_blocks, const IndexType* const cumulative_block_storage,
const IndexType* const block_pointers,
const IndexType* const map_block_to_row, IndexType* const blocks_pattern)
const gko::matrix::Csr<ValueType, IndexType>* first_sys_csr,
const size_type num_blocks, const IndexType* cumulative_block_storage,
const IndexType* block_pointers, const IndexType* map_block_to_row,
IndexType* blocks_pattern)
{
const auto nrows = first_sys_csr->get_size()[0];
dim3 block(default_block_size);
Expand Down Expand Up @@ -143,11 +142,10 @@ GKO_ENABLE_IMPLEMENTATION_SELECTION(select_compute_block_jacobi_helper,
template <typename ValueType, typename IndexType>
void compute_block_jacobi(
std::shared_ptr<const DefaultExecutor> exec,
const batch::matrix::Csr<ValueType, IndexType>* const sys_csr,
const batch::matrix::Csr<ValueType, IndexType>* sys_csr,
const uint32 max_block_size, const size_type num_blocks,
const IndexType* const cumulative_block_storage,
const IndexType* const block_pointers,
const IndexType* const blocks_pattern, ValueType* const blocks)
const IndexType* cumulative_block_storage, const IndexType* block_pointers,
const IndexType* blocks_pattern, ValueType* blocks)
{
select_compute_block_jacobi_helper(
batch_jacobi_cuda_compiled_max_block_sizes(),
Expand Down
7 changes: 3 additions & 4 deletions cuda/solver/batch_bicgstab_kernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -217,10 +217,9 @@ private:
template <typename ValueType>
void apply(std::shared_ptr<const DefaultExecutor> exec,
const settings<remove_complex<ValueType>>& settings,
const batch::BatchLinOp* const mat,
const batch::BatchLinOp* const precon,
const batch::MultiVector<ValueType>* const b,
batch::MultiVector<ValueType>* const x,
const batch::BatchLinOp* mat, const batch::BatchLinOp* precon,
const batch::MultiVector<ValueType>* b,
batch::MultiVector<ValueType>* x,
batch::log::detail::log_data<remove_complex<ValueType>>& logdata)
{
using cu_value_type = cuda_type<ValueType>;
Expand Down
7 changes: 3 additions & 4 deletions cuda/solver/batch_cg_kernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -197,10 +197,9 @@ private:
template <typename ValueType>
void apply(std::shared_ptr<const DefaultExecutor> exec,
const settings<remove_complex<ValueType>>& settings,
const batch::BatchLinOp* const mat,
const batch::BatchLinOp* const precon,
const batch::MultiVector<ValueType>* const b,
batch::MultiVector<ValueType>* const x,
const batch::BatchLinOp* mat, const batch::BatchLinOp* precon,
const batch::MultiVector<ValueType>* b,
batch::MultiVector<ValueType>* x,
batch::log::detail::log_data<remove_complex<ValueType>>& logdata)
{
using cu_value_type = cuda_type<ValueType>;
Expand Down
Loading

0 comments on commit ea20a94

Please sign in to comment.