rapidsai · wphicks · Feb 13, 2025 · Feb 13, 2025 · Feb 13, 2025 · Feb 13, 2025
@@ -168,7 +168,7 @@ class sparse_matrix {
                 row_type n_rows,
                 col_type n_cols,
                 nnz_type nnz = 0) noexcept(std::is_nothrow_default_constructible_v<container_type>)
-    : structure_{handle, n_rows, n_cols, nnz}, cp_{}, c_elements_{cp_.create(handle, 0)} {};
+    : structure_{handle, n_rows, n_cols, nnz}, cp_{}, c_elements_{cp_.create(handle, nnz)} {};
 
   // Constructor that owns the data but not the structure
   // This constructor is only callable with a `structure_type == *_structure_view`

@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+#include <raft/core/device_csr_matrix.hpp>
+#include <raft/core/resources.hpp>
+
+#include <type_traits>
+
+namespace raft {
+namespace sparse {
+namespace linalg {
+namespace detail {
+
+/* Compute the graph Laplacian of an adjacency matrix
+ *
+ * This kernel implements the necessary logic for computing a graph
+ * Laplacian for an adjacency matrix in CSR format. A custom kernel is
+ * required because cusparse does not conveniently implement matrix subtraction with 64-bit
+ * indices. The custom kernel also allows the computation to be completed
+ * with no extra allocations or compute.
+ */
+template <typename ElementType, typename IndptrType, typename IndicesType>
+RAFT_KERNEL compute_graph_laplacian_kernel(ElementType* output_values,
+                                           IndicesType* output_indices,
+                                           IndptrType* output_indptr,
+                                           IndptrType dim,
+                                           ElementType const* adj_values,
+                                           IndicesType const* adj_indices,
+                                           IndptrType const* adj_indptr)
+{
+  /* The graph Laplacian L of an adjacency matrix A is given by:
+   * L = D - A
+   * where D is the degree matrix of A. The degree matrix is itself defined
+   * as the sum of each row of A and represents the degree of the node
+   * indicated by the index of the row. */
+
+  for (auto row = threadIdx.x + blockIdx.x * blockDim.x; row < dim; row += blockDim.x * gridDim.x) {
+    auto row_begin = adj_indptr[row];
+    auto row_end   = adj_indptr[row + 1];
+    // All output indexes will need to be offset by the row, since every row will
+    // gain exactly one new non-zero element. degree_output_index is the index
+    // where we will store the degree of each row
+    auto degree_output_index = row_begin + row;
+    auto degree_value        = ElementType{};
+    // value_index indicates the index of the current value in the original
+    // adjacency matrix
+    for (auto value_index = row_begin; value_index < row_end; ++value_index) {
+      auto col_index         = adj_indices[value_index];
+      auto is_lower_diagonal = col_index < row;
+      auto output_index      = value_index + row + !is_lower_diagonal;
+      auto input_value       = adj_values[value_index];
+      degree_value += input_value;
+      output_values[output_index]  = ElementType{-1} * input_value;
+      output_indices[output_index] = col_index;
+      // Increment the index where we will store the degree for every non-zero
+      // element before we reach the diagonal
+      degree_output_index += is_lower_diagonal;
+    }
+    output_values[degree_output_index]  = degree_value;
+    output_indices[degree_output_index] = row;
+    output_indptr[row]                  = row_begin + row;
+    output_indptr[row + 1]              = row_end + row + 1;
+  }
+}
+
+}  // namespace detail
+
+/** Given a CSR adjacency matrix, return the graph Laplacian
+ *
+ * Note that for non-symmetric matrices, the out-degree Laplacian is returned.
+ */
+template <typename ElementType, typename IndptrType, typename IndicesType, typename NZType>
+auto compute_graph_laplacian(
+  raft::resources const& res,
+  device_csr_matrix_view<ElementType, IndptrType, IndicesType, NZType> input)
+{
+  auto input_structure = input.structure_view();
+  auto dim             = input_structure.get_n_rows();
+  RAFT_EXPECTS(dim == input_structure.get_n_cols(),
+               "The graph Laplacian can only be computed on a square adjacency matrix");
+  auto result = make_device_csr_matrix<std::remove_const_t<ElementType>,
+                                       std::remove_const_t<IndptrType>,
+                                       std::remove_const_t<IndicesType>,
+                                       std::remove_const_t<NZType>>(
+    res,
+    dim,
+    dim,
+    /* The nnz for the result will be the dimension of the (square) input matrix plus the number of
+     * non-zero elements in the original matrix, since we introduce non-zero elements along the
+     * diagonal to represent the degree of each node. */
+    input_structure.get_nnz() + dim);
+  auto result_structure                         = result.structure_view();
+  auto static constexpr const threads_per_block = 256;
+  auto blocks = std::min(int((dim + threads_per_block - 1) / threads_per_block), 65535);
+  auto stream = resource::get_cuda_stream(res);
+  detail::compute_graph_laplacian_kernel<<<threads_per_block, blocks, 0, stream>>>(
+    result.get_elements().data(),
+    result_structure.get_indices().data(),
+    result_structure.get_indptr().data(),
+    dim,
+    input.get_elements().data(),
+    input_structure.get_indices().data(),
+    input_structure.get_indptr().data());
+  return result;
+}
+
+}  // namespace linalg
+}  // namespace sparse
+}  // namespace raft
@@ -15,6 +15,8 @@
  */
 #pragma once
 
+#include <raft/core/device_csr_matrix.hpp>
+#include <raft/core/device_span.hpp>
 #include <raft/core/resource/cublas_handle.hpp>
 #include <raft/core/resource/cuda_stream.hpp>
 #include <raft/core/resource/cusparse_handle.hpp>
@@ -33,6 +35,7 @@
 #include <thrust/system/cuda/execution_policy.h>
 
 #include <algorithm>
+#include <cstddef>
 
 // =========================================================
 // Useful macros
@@ -181,6 +184,19 @@ struct sparse_matrix_t {
   {
   }
 
+  auto to_csr_matrix_view() const
+  {
+    // The usage of sparse_matrix_t prior to introduction of this method
+    // assumed that all data was strictly on device. We will make the same
+    // assumption for construction of the csr_matrix_view
+    return device_csr_matrix_view<value_type const, index_type const, index_type const, index_type>{
+      device_span<value_type const>{values_, std::uint64_t(nnz_)},
+      device_compressed_structure_view<index_type const, index_type const, index_type>{
+        device_span<index_type const>{row_offsets_, std::uint64_t(nrows_ + 1)},
+        device_span<index_type const>{col_indices_, std::uint64_t(nnz_)},
+        ncols_}};
+  }
+
   virtual ~sparse_matrix_t(void) =
     default;  // virtual because used as base for following matrix types
 

@@ -21,6 +21,7 @@
 #include <raft/spectral/cluster_solvers.cuh>
 #include <raft/spectral/detail/spectral_util.cuh>
 #include <raft/spectral/eigen_solvers.cuh>
+#include <raft/sparse/linalg/laplacian.cuh>
 #include <raft/spectral/matrix_wrappers.hpp>
 
 #include <cuda.h>
@@ -97,14 +98,15 @@ std::tuple<vertex_t, weight_t, vertex_t> partition(
   // Compute eigenvectors of Laplacian
 
   // Initialize Laplacian
-  /// sparse_matrix_t<vertex_t, weight_t> A{handle, graph};
-  spectral::matrix::laplacian_matrix_t<vertex_t, weight_t, nnz_t> L{handle, csr_m};
+  auto laplacian =
+    raft::sparse::linalg::compute_graph_laplacian(handle, csr_m.to_csr_matrix_view());
 
   auto eigen_config = eigen_solver.get_config();
   auto nEigVecs     = eigen_config.n_eigVecs;
 
   // Compute smallest eigenvalues and eigenvectors
-  std::get<0>(stats) = eigen_solver.solve_smallest_eigenvectors(handle, L, eigVals, eigVecs);
+  std::get<0>(stats) =
+    eigen_solver.solve_smallest_eigenvectors(handle, laplacian.view(), eigVals, eigVecs);
 
   // Whiten eigenvector matrix
   transform_eigen_matrix(handle, n, nEigVecs, eigVecs);

@@ -18,6 +18,7 @@
 
 #pragma once
 
+#include <raft/core/mdspan.hpp>
 #include <raft/sparse/solver/lanczos.cuh>
 #include <raft/spectral/matrix_wrappers.hpp>
 
@@ -49,28 +50,52 @@ struct lanczos_solver_t {
   {
   }
 
-  index_type_t solve_smallest_eigenvectors(
+  auto solve_smallest_eigenvectors(
     raft::resources const& handle,
     matrix::sparse_matrix_t<index_type_t, value_type_t, size_type_t> const& A,
     value_type_t* __restrict__ eigVals,
     value_type_t* __restrict__ eigVecs) const
+  {
+    auto csr_structure =
+      raft::make_device_compressed_structure_view<index_type_t, index_type_t, index_type_t>(
+        const_cast<index_type_t*>(A.row_offsets_),
+        const_cast<index_type_t*>(A.col_indices_),
+        A.nrows_,
+        A.ncols_,
+        A.nnz_);
+
+    auto csr_matrix =
+      raft::make_device_csr_matrix_view<value_type_t, index_type_t, index_type_t, index_type_t>(
+        const_cast<value_type_t*>(A.values_), csr_structure);
+    return solve_smallest_eigenvectors(handle, csr_matrix, eigVals, eigVecs);
+  }
+
+  auto solve_smallest_eigenvectors(
+    raft::resources const& handle,
+    device_csr_matrix_view<value_type_t, index_type_t, index_type_t, index_type_t> input,
+    value_type_t* __restrict__ eigVals,
+    value_type_t* __restrict__ eigVecs) const
   {
     RAFT_EXPECTS(eigVals != nullptr, "Null eigVals buffer.");
     RAFT_EXPECTS(eigVecs != nullptr, "Null eigVecs buffer.");
-    index_type_t iters{};
-    sparse::solver::computeSmallestEigenvectors(handle,
-                                                A,
-                                                config_.n_eigVecs,
-                                                config_.maxIter,
-                                                config_.restartIter,
-                                                config_.tol,
-                                                config_.reorthogonalize,
-                                                iters,
-                                                eigVals,
-                                                eigVecs,
-                                                config_.seed);
 
-    return iters;
+    auto lanczos_config = raft::sparse::solver::lanczos_solver_config<value_type_t>{
+      config_.n_eigVecs, config_.maxIter, config_.restartIter, config_.tol, config_.seed};
+    auto v0_opt = std::optional<raft::device_vector_view<value_type_t, uint32_t, raft::row_major>>{
+      std::nullopt};
+    auto input_structure = input.structure_view();
+
+    auto solver_iterations = sparse::solver::lanczos_compute_smallest_eigenvectors(
+      handle,
+      lanczos_config,
+      input,
+      v0_opt,
+      raft::make_device_vector_view<value_type_t, uint32_t, raft::col_major>(eigVals,
+                                                                             config_.n_eigVecs),
+      raft::make_device_matrix_view<value_type_t, uint32_t, raft::col_major>(
+        eigVecs, input_structure.get_n_rows(), config_.n_eigVecs));
+
+    return index_type_t{solver_iterations};
   }
 
   index_type_t solve_largest_eigenvectors(

@@ -259,6 +259,7 @@ if(BUILD_TESTS)
     sparse/csr_transpose.cu
     sparse/degree.cu
     sparse/filter.cu
+    sparse/laplacian.cu
     sparse/masked_matmul.cu
     sparse/norm.cu
     sparse/normalize.cu

@@ -14,12 +14,16 @@
  * limitations under the License.
  */
 
+#include <raft/core/handle.hpp>
 #include <raft/core/nvtx.hpp>
 #include <raft/core/resource/device_id.hpp>
 #include <raft/core/resources.hpp>
 #include <raft/spectral/eigen_solvers.cuh>
+#include <raft/spectral/modularity_maximization.cuh>
 #include <raft/spectral/partition.cuh>
 
+#include <thrust/device_vector.h>
+
 #include <gtest/gtest.h>
 
 #include <cstddef>
@@ -117,5 +121,85 @@ TEST(Raft, SpectralSolvers)
   EXPECT_ANY_THROW(spectral::analyzePartition(h, sm, k, clusters, edgeCut, cost));
 }
 
+TEST(Raft, SpectralPartition)
+{
+  auto offsets = thrust::device_vector<int>(std::vector<int>{
+    0,  16, 25, 35, 41, 44, 48, 52,  56,  61,  63,  66,  67,  69,  74,  76,  78, 80,
+    82, 84, 87, 89, 91, 93, 98, 101, 104, 106, 110, 113, 117, 121, 127, 139, 156});
+  auto indices = thrust::device_vector<int>(std::vector<int>{
+    1,  2,  3,  4,  5,  6,  7,  8,  10, 11, 12, 13, 17, 19, 21, 31, 0,  2,  3,  7,  13, 17, 19,
+    21, 30, 0,  1,  3,  7,  8,  9,  13, 27, 28, 32, 0,  1,  2,  7,  12, 13, 0,  6,  10, 0,  6,
+    10, 16, 0,  4,  5,  16, 0,  1,  2,  3,  0,  2,  30, 32, 33, 2,  33, 0,  4,  5,  0,  0,  3,
+    0,  1,  2,  3,  33, 32, 33, 32, 33, 5,  6,  0,  1,  32, 33, 0,  1,  33, 32, 33, 0,  1,  32,
+    33, 25, 27, 29, 32, 33, 25, 27, 31, 23, 24, 31, 29, 33, 2,  23, 24, 33, 2,  31, 33, 23, 26,
+    32, 33, 1,  8,  32, 33, 0,  24, 25, 28, 32, 33, 2,  8,  14, 15, 18, 20, 22, 23, 29, 30, 31,
+    33, 8,  9,  13, 14, 15, 18, 19, 20, 22, 23, 26, 27, 28, 29, 30, 31, 32});
+  auto values  = thrust::device_vector<float>(std::vector<float>{
+    1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+    1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+    1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+    1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+    1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+    1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+    1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+    1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+    1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0});
+
+  auto num_verts = int(offsets.size() - 1);
+  auto num_edges = indices.size();
+
+  auto result_v = thrust::device_vector<int>(std::vector<int>(num_verts, -1));
+
+  auto constexpr const n_clusters  = 8;
+  auto constexpr const n_eig_vects = std::uint64_t{8};
+
+  auto constexpr const evs_tolerance   = .00001f;
+  auto constexpr const kmean_tolerance = .00001f;
+  auto constexpr const evs_max_iter    = 100;
+  auto constexpr const kmean_max_iter  = 100;
+
+  auto eig_vals  = thrust::device_vector<float>(n_eig_vects);
+  auto eig_vects = thrust::device_vector<float>(n_eig_vects * num_verts);
+
+  auto handle = raft::handle_t{};
+
+  auto restartIter_lanczos = int{15 + n_eig_vects};
+
+  auto seed_eig_solver     = std::uint64_t{1234567};
+  auto seed_cluster_solver = std::uint64_t{12345678};
+
+  auto const adj_matrix = raft::spectral::matrix::sparse_matrix_t<int, float>{handle,
+                                                                              offsets.data().get(),
+                                                                              indices.data().get(),
+                                                                              values.data().get(),
+                                                                              num_verts,
+                                                                              num_verts,
+                                                                              num_edges};
+
+  auto eig_cfg = raft::spectral::eigen_solver_config_t<int, float>{
+    n_eig_vects, evs_max_iter, restartIter_lanczos, evs_tolerance, false, seed_eig_solver};
+  auto eigen_solver = raft::spectral::lanczos_solver_t<int, float>{eig_cfg};
+
+  auto clust_cfg = raft::spectral::cluster_solver_config_t<int, float>{
+    n_clusters, kmean_max_iter, kmean_tolerance, seed_cluster_solver};
+  auto cluster_solver = raft::spectral::kmeans_solver_t<int, float>{clust_cfg};
+
+  partition(handle,
+            adj_matrix,
+            eigen_solver,
+            cluster_solver,
+            result_v.data().get(),
+            eig_vals.data().get(),
+            eig_vects.data().get());
+
+  auto edge_cut = float{};
+  auto cost     = float{};
+
+  raft::spectral::analyzePartition(
+    handle, adj_matrix, n_clusters, result_v.data().get(), edge_cut, cost);
+
+  ASSERT_LT(edge_cut, 55.0);
+}
+
 }  // namespace spectral
 }  // namespace raft