changes so far

rapidsai · Jan 29, 2025 · 694d371 · 694d371
1 parent c75cd87
commit 694d371
Show file tree

Hide file tree

Showing 13 changed files with 69 additions and 48 deletions.
diff --git a/cpp/include/raft/cluster/detail/connectivities.cuh b/cpp/include/raft/cluster/detail/connectivities.cuh
@@ -95,7 +95,7 @@ struct distance_graph_impl<raft::cluster::LinkageDistance::KNN_GRAPH, value_idx,
                       });
 
     raft::sparse::convert::sorted_coo_to_csr(
-      knn_graph_coo.rows(), (value_idx)knn_graph_coo.nnz, indptr.data(), m + 1, stream);
+      knn_graph_coo.rows(), knn_graph_coo.nnz, indptr.data(), m + 1, stream);
 
     // TODO: Wouldn't need to copy here if we could compute knn
     // graph directly on the device uvectors

diff --git a/cpp/include/raft/cluster/detail/mst.cuh b/cpp/include/raft/cluster/detail/mst.cuh
@@ -98,7 +98,7 @@ void connect_knn_graph(
 
   rmm::device_uvector<value_idx> indptr2(m + 1, stream);
   raft::sparse::convert::sorted_coo_to_csr(
-    connected_edges.rows(), (value_idx)connected_edges.nnz, indptr2.data(), m + 1, stream);
+    connected_edges.rows(), connected_edges.nnz, indptr2.data(), m + 1, stream);
 
   // On the second call, we hand the MST the original colors
   // and the new set of edges and let it restart the optimization process

diff --git a/cpp/include/raft/sparse/convert/csr.cuh b/cpp/include/raft/sparse/convert/csr.cuh
@@ -54,10 +54,10 @@ void coo_to_csr(raft::resources const& handle,
  * @param m: number of rows in dense matrix
  * @param stream: cuda stream to use
  */
-template <typename T, typename outT>
-void sorted_coo_to_csr(const T* rows, outT nnz, outT* row_ind, int m, cudaStream_t stream)
+template <typename T, typename nnz_type, typename outT>
+void sorted_coo_to_csr(const T* rows, nnz_type nnz, outT* row_ind, int m, cudaStream_t stream)
 {
-  detail::sorted_coo_to_csr(rows, nnz, row_ind, m, stream);
+  detail::sorted_coo_to_csr(rows, (uint64_t)nnz, row_ind, m, stream);
 }
 
 /**
@@ -70,7 +70,7 @@ void sorted_coo_to_csr(const T* rows, outT nnz, outT* row_ind, int m, cudaStream
 template <typename T, typename outT>
 void sorted_coo_to_csr(COO<T>* coo, outT* row_ind, cudaStream_t stream)
 {
-  detail::sorted_coo_to_csr(coo->rows(), (outT)coo->nnz, row_ind, coo->n_rows, stream);
+  detail::sorted_coo_to_csr(coo->rows(), coo->safe_nnz, row_ind, coo->n_rows, stream);
 }
 
 /**

diff --git a/cpp/include/raft/sparse/convert/detail/csr.cuh b/cpp/include/raft/sparse/convert/detail/csr.cuh
@@ -85,7 +85,7 @@ void coo_to_csr(raft::resources const& handle,
  * @param stream: cuda stream to use
  */
 template <typename T, typename outT>
-void sorted_coo_to_csr(const T* rows, outT nnz, outT* row_ind, int m, cudaStream_t stream)
+void sorted_coo_to_csr(const T* rows, uint64_t nnz, outT* row_ind, int m, cudaStream_t stream)
 {
   rmm::device_uvector<outT> row_counts(m, stream);
   RAFT_CUDA_TRY(cudaMemsetAsync(row_counts.data(), 0, (uint64_t)m * sizeof(outT), stream));

diff --git a/cpp/include/raft/sparse/detail/coo.cuh b/cpp/include/raft/sparse/detail/coo.cuh
@@ -52,15 +52,22 @@ class COO {
   rmm::device_uvector<T> vals_arr;
 
  public:
-  uint64_t nnz;
+  Index_Type nnz;
+  uint64_t safe_nnz;
   Index_Type n_rows;
   Index_Type n_cols;
 
   /**
    * @param stream: CUDA stream to use
    */
   COO(cudaStream_t stream)
-    : rows_arr(0, stream), cols_arr(0, stream), vals_arr(0, stream), nnz(0), n_rows(0), n_cols(0)
+    : rows_arr(0, stream),
+      cols_arr(0, stream),
+      vals_arr(0, stream),
+      nnz(0),
+      n_rows(0),
+      n_cols(0),
+      safe_nnz(0)
   {
   }
 
@@ -72,13 +79,20 @@ class COO {
    * @param n_rows: number of rows in the dense matrix
    * @param n_cols: number of cols in the dense matrix
    */
+  template <typename SafeNNZ_Type>
   COO(rmm::device_uvector<Index_Type>& rows,
       rmm::device_uvector<Index_Type>& cols,
       rmm::device_uvector<T>& vals,
-      uint64_t nnz,
+      SafeNNZ_Type nnz,
       Index_Type n_rows = 0,
       Index_Type n_cols = 0)
-    : rows_arr(rows), cols_arr(cols), vals_arr(vals), nnz(nnz), n_rows(n_rows), n_cols(n_cols)
+    : rows_arr(rows),
+      cols_arr(cols),
+      vals_arr(vals),
+      nnz((Index_Type)nnz),
+      n_rows(n_rows),
+      n_cols(n_cols),
+      safe_nnz((uint64_t)nnz)
   {
   }
 
@@ -89,8 +103,9 @@ class COO {
    * @param n_cols: number of cols in the dense matrix
    * @param init: initialize arrays with zeros
    */
+  template <typename SafeNNZ_Type>
   COO(cudaStream_t stream,
-      uint64_t nnz,
+      SafeNNZ_Type nnz,
       Index_Type n_rows = 0,
       Index_Type n_cols = 0,
       bool init         = true)
@@ -99,18 +114,19 @@ class COO {
       vals_arr(nnz, stream),
       nnz(nnz),
       n_rows(n_rows),
-      n_cols(n_cols)
+      n_cols(n_cols),
+      safe_nnz(nnz)
   {
     if (init) init_arrays(stream);
   }
 
   void init_arrays(cudaStream_t stream)
   {
     RAFT_CUDA_TRY(
-      cudaMemsetAsync(this->rows_arr.data(), 0, this->nnz * sizeof(Index_Type), stream));
+      cudaMemsetAsync(this->rows_arr.data(), 0, this->safe_nnz * sizeof(Index_Type), stream));
     RAFT_CUDA_TRY(
-      cudaMemsetAsync(this->cols_arr.data(), 0, this->nnz * sizeof(Index_Type), stream));
-    RAFT_CUDA_TRY(cudaMemsetAsync(this->vals_arr.data(), 0, this->nnz * sizeof(T), stream));
+      cudaMemsetAsync(this->cols_arr.data(), 0, this->safe_nnz * sizeof(Index_Type), stream));
+    RAFT_CUDA_TRY(cudaMemsetAsync(this->vals_arr.data(), 0, this->safe_nnz * sizeof(T), stream));
   }
 
   ~COO() {}
@@ -121,7 +137,7 @@ class COO {
    */
   bool validate_size() const
   {
-    if (this->nnz <= 0 || n_rows <= 0 || n_cols <= 0) return false;
+    if (this->safe_nnz <= 0 || n_rows <= 0 || n_cols <= 0) return false;
     return true;
   }
 
@@ -162,10 +178,10 @@ class COO {
       cudaStream_t stream;
       RAFT_CUDA_TRY(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking));
 
-      out << raft::arr2Str(c.rows_arr.data(), c.nnz, "rows", stream) << std::endl;
-      out << raft::arr2Str(c.cols_arr.data(), c.nnz, "cols", stream) << std::endl;
-      out << raft::arr2Str(c.vals_arr.data(), c.nnz, "vals", stream) << std::endl;
-      out << "nnz=" << c.nnz << std::endl;
+      out << raft::arr2Str(c.rows_arr.data(), c.safe_nnz, "rows", stream) << std::endl;
+      out << raft::arr2Str(c.cols_arr.data(), c.safe_nnz, "cols", stream) << std::endl;
+      out << raft::arr2Str(c.vals_arr.data(), c.safe_nnz, "vals", stream) << std::endl;
+      out << "nnz=" << c.safe_nnz << std::endl;
       out << "n_rows=" << c.n_rows << std::endl;
       out << "n_cols=" << c.n_cols << std::endl;
 
@@ -204,7 +220,8 @@ class COO {
    * @param init: should values be initialized to 0?
    * @param stream: CUDA stream to use
    */
-  void allocate(uint64_t nnz, bool init, cudaStream_t stream)
+  template <typename SafeNNZ_Type>
+  void allocate(SafeNNZ_Type nnz, bool init, cudaStream_t stream)
   {
     this->allocate(nnz, 0, init, stream);
   }
@@ -216,7 +233,8 @@ class COO {
    * @param init: should values be initialized to 0?
    * @param stream: CUDA stream to use
    */
-  void allocate(uint64_t nnz, Index_Type size, bool init, cudaStream_t stream)
+  template <typename SafeNNZ_Type>
+  void allocate(SafeNNZ_Type nnz, Index_Type size, bool init, cudaStream_t stream)
   {
     this->allocate(nnz, size, size, init, stream);
   }
@@ -229,15 +247,18 @@ class COO {
    * @param init: should values be initialized to 0?
    * @param stream: stream to use for init
    */
-  void allocate(uint64_t nnz, Index_Type n_rows, Index_Type n_cols, bool init, cudaStream_t stream)
+  template <typename SafeNNZ_Type>
+  void allocate(
+    SafeNNZ_Type nnz, Index_Type n_rows, Index_Type n_cols, bool init, cudaStream_t stream)
   {
-    this->n_rows = n_rows;
-    this->n_cols = n_cols;
-    this->nnz    = nnz;
+    this->n_rows   = n_rows;
+    this->n_cols   = n_cols;
+    this->nnz      = (Index_Type)nnz;
+    this->safe_nnz = nnz;
 
-    this->rows_arr.resize(this->nnz, stream);
-    this->cols_arr.resize(this->nnz, stream);
-    this->vals_arr.resize(this->nnz, stream);
+    this->rows_arr.resize(nnz, stream);
+    this->cols_arr.resize(nnz, stream);
+    this->vals_arr.resize(nnz, stream);
 
     if (init) init_arrays(stream);
   }

diff --git a/cpp/include/raft/sparse/linalg/degree.cuh b/cpp/include/raft/sparse/linalg/degree.cuh
@@ -33,10 +33,10 @@ namespace linalg {
  * @param results: output result array
  * @param stream: cuda stream to use
  */
-template <typename T = int, typename outT>
-void coo_degree(const T* rows, uint64_t nnz, outT* results, cudaStream_t stream)
+template <typename T = int, typename nnz_type, typename outT>
+void coo_degree(const T* rows, nnz_type nnz, outT* results, cudaStream_t stream)
 {
-  detail::coo_degree<64, T>(rows, nnz, results, stream);
+  detail::coo_degree<64, T>(rows, (uint64_t)nnz, results, stream);
 }
 
 /**
@@ -50,7 +50,7 @@ void coo_degree(const T* rows, uint64_t nnz, outT* results, cudaStream_t stream)
 template <typename T, typename outT>
 void coo_degree(COO<T>* in, outT* results, cudaStream_t stream)
 {
-  coo_degree(in->rows(), in->nnz, results, stream);
+  coo_degree(in->rows(), in->safe_nnz, results, stream);
 }
 
 /**
@@ -64,11 +64,11 @@ void coo_degree(COO<T>* in, outT* results, cudaStream_t stream)
  * @param results: output row counts
  * @param stream: cuda stream to use
  */
-template <typename T, typename outT>
+template <typename T, typename nnz_type, typename outT>
 void coo_degree_scalar(
-  const int* rows, const T* vals, uint64_t nnz, T scalar, outT* results, cudaStream_t stream = 0)
+  const int* rows, const T* vals, nnz_type nnz, T scalar, outT* results, cudaStream_t stream = 0)
 {
-  detail::coo_degree_scalar<64>(rows, vals, nnz, scalar, results, stream);
+  detail::coo_degree_scalar<64>(rows, vals, (uint64_t)nnz, scalar, results, stream);
 }
 
 /**
@@ -83,7 +83,7 @@ void coo_degree_scalar(
 template <typename T, typename outT>
 void coo_degree_scalar(COO<T>* in, T scalar, outT* results, cudaStream_t stream)
 {
-  coo_degree_scalar(in->rows(), in->vals(), in->nnz, scalar, results, stream);
+  coo_degree_scalar(in->rows(), in->vals(), in->safe_nnz, scalar, results, stream);
 }
 
 /**

diff --git a/cpp/include/raft/sparse/neighbors/detail/cross_component_nn.cuh b/cpp/include/raft/sparse/neighbors/detail/cross_component_nn.cuh
@@ -242,7 +242,7 @@ void perform_1nn(raft::resources const& handle,
   // the color components.
   auto colors_group_idxs = raft::make_device_vector<value_idx, value_idx>(handle, n_components + 1);
   raft::sparse::convert::sorted_coo_to_csr(
-    colors, (value_idx)n_rows, colors_group_idxs.data_handle(), n_components + 1, stream);
+    colors, n_rows, colors_group_idxs.data_handle(), n_components + 1, stream);
 
   auto group_idxs_view = raft::make_device_vector_view<const value_idx, value_idx>(
     colors_group_idxs.data_handle() + 1, n_components);

diff --git a/cpp/include/raft/spatial/knn/detail/ball_cover.cuh b/cpp/include/raft/spatial/knn/detail/ball_cover.cuh
@@ -161,7 +161,7 @@ void construct_landmark_1nn(raft::resources const& handle,
 
   // convert to CSR for fast lookup
   raft::sparse::convert::sorted_coo_to_csr(R_1nn_inds.data(),
-                                           (value_idx)index.m,
+                                           index.m,
                                            index.get_R_indptr().data_handle(),
                                            index.n_landmarks + 1,
                                            resource::get_cuda_stream(handle));

diff --git a/cpp/include/raft/spectral/detail/matrix_wrappers.hpp b/cpp/include/raft/spectral/detail/matrix_wrappers.hpp
@@ -134,15 +134,15 @@ class vector_t {
   const thrust_exec_policy_t thrust_policy;
 };
 
-template <typename index_type, typename value_type>
+template <typename index_type, typename value_type, typename nnz_type = index_type>
 struct sparse_matrix_t {
   sparse_matrix_t(resources const& raft_handle,
                   index_type const* row_offsets,
                   index_type const* col_indices,
                   value_type const* values,
                   index_type const nrows,
                   index_type const ncols,
-                  uint64_t const nnz)
+                  nnz_type const nnz)
     : handle_(raft_handle),
       row_offsets_(row_offsets),
       col_indices_(col_indices),
@@ -158,7 +158,7 @@ struct sparse_matrix_t {
                   index_type const* col_indices,
                   value_type const* values,
                   index_type const nrows,
-                  uint64_t const nnz)
+                  nnz_type const nnz)
     : handle_(raft_handle),
       row_offsets_(row_offsets),
       col_indices_(col_indices),
@@ -311,7 +311,7 @@ struct sparse_matrix_t {
   value_type const* values_;
   index_type const nrows_;
   index_type const ncols_;
-  uint64_t const nnz_;
+  nnz_type const nnz_;
 };
 
 template <typename index_type, typename value_type>

diff --git a/cpp/tests/linalg/eigen_solvers.cu b/cpp/tests/linalg/eigen_solvers.cu
@@ -46,7 +46,7 @@ TEST(Raft, EigenSolvers)
   index_type nnz   = 0;
   index_type nrows = 0;
 
-  sparse_matrix_t<index_type, value_type> sm1{h, ro, ci, vs, nrows, static_cast<uint64_t>(nnz)};
+  sparse_matrix_t<index_type, value_type> sm1{h, ro, ci, vs, nrows, nnz};
   ASSERT_EQ(nullptr, sm1.row_offsets_);
 
   index_type neigvs{10};

diff --git a/cpp/tests/sparse/solver/lanczos.cu b/cpp/tests/sparse/solver/lanczos.cu
@@ -173,7 +173,7 @@ class rmat_lanczos_tests
       raft::make_device_vector<IndexType, uint32_t, raft::row_major>(handle,
                                                                      symmetric_coo.n_rows + 1);
     raft::sparse::convert::sorted_coo_to_csr(symmetric_coo.rows(),
-                                             static_cast<int>(symmetric_coo.nnz),
+                                             symmetric_coo.nnz,
                                              row_indices.data_handle(),
                                              symmetric_coo.n_rows + 1,
                                              stream);

diff --git a/cpp/tests/sparse/spectral_matrix.cu b/cpp/tests/sparse/spectral_matrix.cu
@@ -55,7 +55,7 @@ TEST(Raft, SpectralMatrices)
   value_type* vs{nullptr};
   index_type nnz   = 0;
   index_type nrows = 0;
-  sparse_matrix_t<index_type, value_type> sm1{h, ro, ci, vs, nrows, static_cast<uint64_t>(nnz)};
+  sparse_matrix_t<index_type, value_type> sm1{h, ro, ci, vs, nrows, nnz};
   sparse_matrix_t<index_type, value_type> sm2{h, csr_v};
   ASSERT_EQ(nullptr, sm1.row_offsets_);
   ASSERT_EQ(nullptr, sm2.row_offsets_);

diff --git a/cpp/tests/sparse/symmetrize.cu b/cpp/tests/sparse/symmetrize.cu
@@ -109,7 +109,7 @@ class SparseSymmetrizeTest
     rmm::device_scalar<value_idx> sum(stream);
     sum.set_value_to_zero_async(stream);
 
-    assert_symmetry<<<raft::ceildiv(out.nnz, (uint64_t)256), 256, 0, stream>>>(
+    assert_symmetry<<<raft::ceildiv(out.nnz, 256), 256, 0, stream>>>(
       out.rows(), out.cols(), out.vals(), (value_idx)out.nnz, sum.data());
 
     sum_h = sum.value(stream);