Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix Laplacian calculation in spectral partitioning #2568

Open
wants to merge 6 commits into
base: branch-25.04
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cpp/include/raft/core/sparse_types.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ class sparse_matrix {
row_type n_rows,
col_type n_cols,
nnz_type nnz = 0) noexcept(std::is_nothrow_default_constructible_v<container_type>)
: structure_{handle, n_rows, n_cols, nnz}, cp_{}, c_elements_{cp_.create(handle, 0)} {};
: structure_{handle, n_rows, n_cols, nnz}, cp_{}, c_elements_{cp_.create(handle, nnz)} {};

// Constructor that owns the data but not the structure
// This constructor is only callable with a `structure_type == *_structure_view`
Expand Down
122 changes: 122 additions & 0 deletions cpp/include/raft/sparse/linalg/laplacian.cuh
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
/*
* Copyright (c) 2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include <raft/core/device_csr_matrix.hpp>
#include <raft/core/resources.hpp>

#include <type_traits>

namespace raft {
namespace sparse {
namespace linalg {
wphicks marked this conversation as resolved.
Show resolved Hide resolved
namespace detail {

/* Compute the graph Laplacian of an adjacency matrix
*
* This kernel implements the necessary logic for computing a graph
* Laplacian for an adjacency matrix in CSR format. A custom kernel is
* required because cusparse does not conveniently implement matrix subtraction with 64-bit
* indices. The custom kernel also allows the computation to be completed
* with no extra allocations or compute.
*/
template <typename ElementType, typename IndptrType, typename IndicesType>
RAFT_KERNEL compute_graph_laplacian_kernel(ElementType* output_values,
IndicesType* output_indices,
IndptrType* output_indptr,
IndptrType dim,
ElementType const* adj_values,
IndicesType const* adj_indices,
IndptrType const* adj_indptr)
{
/* The graph Laplacian L of an adjacency matrix A is given by:
* L = D - A
* where D is the degree matrix of A. The degree matrix is itself defined
* as the sum of each row of A and represents the degree of the node
* indicated by the index of the row. */

for (auto row = threadIdx.x + blockIdx.x * blockDim.x; row < dim; row += blockDim.x * gridDim.x) {
auto row_begin = adj_indptr[row];
auto row_end = adj_indptr[row + 1];
// All output indexes will need to be offset by the row, since every row will
// gain exactly one new non-zero element. degree_output_index is the index
// where we will store the degree of each row
auto degree_output_index = row_begin + row;
auto degree_value = ElementType{};
// value_index indicates the index of the current value in the original
// adjacency matrix
for (auto value_index = row_begin; value_index < row_end; ++value_index) {
auto col_index = adj_indices[value_index];
auto is_lower_diagonal = col_index < row;
auto output_index = value_index + row + !is_lower_diagonal;
auto input_value = adj_values[value_index];
degree_value += input_value;
output_values[output_index] = ElementType{-1} * input_value;
output_indices[output_index] = col_index;
// Increment the index where we will store the degree for every non-zero
// element before we reach the diagonal
degree_output_index += is_lower_diagonal;
}
output_values[degree_output_index] = degree_value;
output_indices[degree_output_index] = row;
output_indptr[row] = row_begin + row;
output_indptr[row + 1] = row_end + row + 1;
}
}

} // namespace detail

/** Given a CSR adjacency matrix, return the graph Laplacian
*
* Note that for non-symmetric matrices, the out-degree Laplacian is returned.
*/
template <typename ElementType, typename IndptrType, typename IndicesType, typename NZType>
auto compute_graph_laplacian(
raft::resources const& res,
device_csr_matrix_view<ElementType, IndptrType, IndicesType, NZType> input)
{
auto input_structure = input.structure_view();
auto dim = input_structure.get_n_rows();
RAFT_EXPECTS(dim == input_structure.get_n_cols(),
"The graph Laplacian can only be computed on a square adjacency matrix");
auto result = make_device_csr_matrix<std::remove_const_t<ElementType>,
std::remove_const_t<IndptrType>,
std::remove_const_t<IndicesType>,
std::remove_const_t<NZType>>(
res,
dim,
dim,
/* The nnz for the result will be the dimension of the (square) input matrix plus the number of
* non-zero elements in the original matrix, since we introduce non-zero elements along the
* diagonal to represent the degree of each node. */
input_structure.get_nnz() + dim);
auto result_structure = result.structure_view();
auto static constexpr const threads_per_block = 256;
auto blocks = std::min(int((dim + threads_per_block - 1) / threads_per_block), 65535);
auto stream = resource::get_cuda_stream(res);
detail::compute_graph_laplacian_kernel<<<threads_per_block, blocks, 0, stream>>>(
result.get_elements().data(),
result_structure.get_indices().data(),
result_structure.get_indptr().data(),
dim,
input.get_elements().data(),
input_structure.get_indices().data(),
input_structure.get_indptr().data());
return result;
}

} // namespace linalg
} // namespace sparse
} // namespace raft
16 changes: 16 additions & 0 deletions cpp/include/raft/spectral/detail/matrix_wrappers.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
*/
#pragma once

#include <raft/core/device_csr_matrix.hpp>
#include <raft/core/device_span.hpp>
#include <raft/core/resource/cublas_handle.hpp>
#include <raft/core/resource/cuda_stream.hpp>
#include <raft/core/resource/cusparse_handle.hpp>
Expand All @@ -33,6 +35,7 @@
#include <thrust/system/cuda/execution_policy.h>

#include <algorithm>
#include <cstddef>

// =========================================================
// Useful macros
Expand Down Expand Up @@ -181,6 +184,19 @@ struct sparse_matrix_t {
{
}

auto to_csr_matrix_view() const
{
// The usage of sparse_matrix_t prior to introduction of this method
// assumed that all data was strictly on device. We will make the same
// assumption for construction of the csr_matrix_view
return device_csr_matrix_view<value_type const, index_type const, index_type const, index_type>{
device_span<value_type const>{values_, std::uint64_t(nnz_)},
device_compressed_structure_view<index_type const, index_type const, index_type>{
device_span<index_type const>{row_offsets_, std::uint64_t(nrows_ + 1)},
device_span<index_type const>{col_indices_, std::uint64_t(nnz_)},
ncols_}};
}

virtual ~sparse_matrix_t(void) =
default; // virtual because used as base for following matrix types

Expand Down
8 changes: 5 additions & 3 deletions cpp/include/raft/spectral/detail/partition.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include <raft/spectral/cluster_solvers.cuh>
#include <raft/spectral/detail/spectral_util.cuh>
#include <raft/spectral/eigen_solvers.cuh>
#include <raft/sparse/linalg/laplacian.cuh>
#include <raft/spectral/matrix_wrappers.hpp>

#include <cuda.h>
Expand Down Expand Up @@ -97,14 +98,15 @@ std::tuple<vertex_t, weight_t, vertex_t> partition(
// Compute eigenvectors of Laplacian

// Initialize Laplacian
/// sparse_matrix_t<vertex_t, weight_t> A{handle, graph};
spectral::matrix::laplacian_matrix_t<vertex_t, weight_t, nnz_t> L{handle, csr_m};
auto laplacian =
raft::sparse::linalg::compute_graph_laplacian(handle, csr_m.to_csr_matrix_view());

auto eigen_config = eigen_solver.get_config();
auto nEigVecs = eigen_config.n_eigVecs;

// Compute smallest eigenvalues and eigenvectors
std::get<0>(stats) = eigen_solver.solve_smallest_eigenvectors(handle, L, eigVals, eigVecs);
std::get<0>(stats) =
eigen_solver.solve_smallest_eigenvectors(handle, laplacian.view(), eigVals, eigVecs);

// Whiten eigenvector matrix
transform_eigen_matrix(handle, n, nEigVecs, eigVecs);
Expand Down
53 changes: 39 additions & 14 deletions cpp/include/raft/spectral/eigen_solvers.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

#pragma once

#include <raft/core/mdspan.hpp>
#include <raft/sparse/solver/lanczos.cuh>
#include <raft/spectral/matrix_wrappers.hpp>

Expand Down Expand Up @@ -49,28 +50,52 @@ struct lanczos_solver_t {
{
}

index_type_t solve_smallest_eigenvectors(
auto solve_smallest_eigenvectors(
raft::resources const& handle,
matrix::sparse_matrix_t<index_type_t, value_type_t, size_type_t> const& A,
value_type_t* __restrict__ eigVals,
value_type_t* __restrict__ eigVecs) const
{
auto csr_structure =
raft::make_device_compressed_structure_view<index_type_t, index_type_t, index_type_t>(
const_cast<index_type_t*>(A.row_offsets_),
const_cast<index_type_t*>(A.col_indices_),
wphicks marked this conversation as resolved.
Show resolved Hide resolved
A.nrows_,
A.ncols_,
A.nnz_);

auto csr_matrix =
raft::make_device_csr_matrix_view<value_type_t, index_type_t, index_type_t, index_type_t>(
const_cast<value_type_t*>(A.values_), csr_structure);
return solve_smallest_eigenvectors(handle, csr_matrix, eigVals, eigVecs);
}

auto solve_smallest_eigenvectors(
raft::resources const& handle,
device_csr_matrix_view<value_type_t, index_type_t, index_type_t, index_type_t> input,
value_type_t* __restrict__ eigVals,
value_type_t* __restrict__ eigVecs) const
{
RAFT_EXPECTS(eigVals != nullptr, "Null eigVals buffer.");
RAFT_EXPECTS(eigVecs != nullptr, "Null eigVecs buffer.");
index_type_t iters{};
sparse::solver::computeSmallestEigenvectors(handle,
A,
config_.n_eigVecs,
config_.maxIter,
config_.restartIter,
config_.tol,
config_.reorthogonalize,
iters,
eigVals,
eigVecs,
config_.seed);

return iters;
auto lanczos_config = raft::sparse::solver::lanczos_solver_config<value_type_t>{
config_.n_eigVecs, config_.maxIter, config_.restartIter, config_.tol, config_.seed};
auto v0_opt = std::optional<raft::device_vector_view<value_type_t, uint32_t, raft::row_major>>{
std::nullopt};
auto input_structure = input.structure_view();

auto solver_iterations = sparse::solver::lanczos_compute_smallest_eigenvectors(
handle,
lanczos_config,
input,
v0_opt,
raft::make_device_vector_view<value_type_t, uint32_t, raft::col_major>(eigVals,
config_.n_eigVecs),
raft::make_device_matrix_view<value_type_t, uint32_t, raft::col_major>(
eigVecs, input_structure.get_n_rows(), config_.n_eigVecs));

return index_type_t{solver_iterations};
}

index_type_t solve_largest_eigenvectors(
Expand Down
1 change: 1 addition & 0 deletions cpp/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,7 @@ if(BUILD_TESTS)
sparse/csr_transpose.cu
sparse/degree.cu
sparse/filter.cu
sparse/laplacian.cu
sparse/masked_matmul.cu
sparse/norm.cu
sparse/normalize.cu
Expand Down
84 changes: 84 additions & 0 deletions cpp/tests/linalg/eigen_solvers.cu
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,16 @@
* limitations under the License.
*/

#include <raft/core/handle.hpp>
#include <raft/core/nvtx.hpp>
#include <raft/core/resource/device_id.hpp>
#include <raft/core/resources.hpp>
#include <raft/spectral/eigen_solvers.cuh>
#include <raft/spectral/modularity_maximization.cuh>
#include <raft/spectral/partition.cuh>

#include <thrust/device_vector.h>

#include <gtest/gtest.h>

#include <cstddef>
Expand Down Expand Up @@ -117,5 +121,85 @@ TEST(Raft, SpectralSolvers)
EXPECT_ANY_THROW(spectral::analyzePartition(h, sm, k, clusters, edgeCut, cost));
}

TEST(Raft, SpectralPartition)
{
auto offsets = thrust::device_vector<int>(std::vector<int>{
0, 16, 25, 35, 41, 44, 48, 52, 56, 61, 63, 66, 67, 69, 74, 76, 78, 80,
82, 84, 87, 89, 91, 93, 98, 101, 104, 106, 110, 113, 117, 121, 127, 139, 156});
auto indices = thrust::device_vector<int>(std::vector<int>{
1, 2, 3, 4, 5, 6, 7, 8, 10, 11, 12, 13, 17, 19, 21, 31, 0, 2, 3, 7, 13, 17, 19,
21, 30, 0, 1, 3, 7, 8, 9, 13, 27, 28, 32, 0, 1, 2, 7, 12, 13, 0, 6, 10, 0, 6,
10, 16, 0, 4, 5, 16, 0, 1, 2, 3, 0, 2, 30, 32, 33, 2, 33, 0, 4, 5, 0, 0, 3,
0, 1, 2, 3, 33, 32, 33, 32, 33, 5, 6, 0, 1, 32, 33, 0, 1, 33, 32, 33, 0, 1, 32,
33, 25, 27, 29, 32, 33, 25, 27, 31, 23, 24, 31, 29, 33, 2, 23, 24, 33, 2, 31, 33, 23, 26,
32, 33, 1, 8, 32, 33, 0, 24, 25, 28, 32, 33, 2, 8, 14, 15, 18, 20, 22, 23, 29, 30, 31,
33, 8, 9, 13, 14, 15, 18, 19, 20, 22, 23, 26, 27, 28, 29, 30, 31, 32});
auto values = thrust::device_vector<float>(std::vector<float>{
1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0});

auto num_verts = int(offsets.size() - 1);
auto num_edges = indices.size();

auto result_v = thrust::device_vector<int>(std::vector<int>(num_verts, -1));

auto constexpr const n_clusters = 8;
auto constexpr const n_eig_vects = std::uint64_t{8};

auto constexpr const evs_tolerance = .00001f;
auto constexpr const kmean_tolerance = .00001f;
auto constexpr const evs_max_iter = 100;
auto constexpr const kmean_max_iter = 100;

auto eig_vals = thrust::device_vector<float>(n_eig_vects);
auto eig_vects = thrust::device_vector<float>(n_eig_vects * num_verts);

auto handle = raft::handle_t{};

auto restartIter_lanczos = int{15 + n_eig_vects};

auto seed_eig_solver = std::uint64_t{1234567};
auto seed_cluster_solver = std::uint64_t{12345678};

auto const adj_matrix = raft::spectral::matrix::sparse_matrix_t<int, float>{handle,
offsets.data().get(),
indices.data().get(),
values.data().get(),
num_verts,
num_verts,
num_edges};

auto eig_cfg = raft::spectral::eigen_solver_config_t<int, float>{
n_eig_vects, evs_max_iter, restartIter_lanczos, evs_tolerance, false, seed_eig_solver};
auto eigen_solver = raft::spectral::lanczos_solver_t<int, float>{eig_cfg};

auto clust_cfg = raft::spectral::cluster_solver_config_t<int, float>{
n_clusters, kmean_max_iter, kmean_tolerance, seed_cluster_solver};
auto cluster_solver = raft::spectral::kmeans_solver_t<int, float>{clust_cfg};

partition(handle,
adj_matrix,
eigen_solver,
cluster_solver,
result_v.data().get(),
eig_vals.data().get(),
eig_vects.data().get());

auto edge_cut = float{};
auto cost = float{};

raft::spectral::analyzePartition(
handle, adj_matrix, n_clusters, result_v.data().get(), edge_cut, cost);

ASSERT_LT(edge_cut, 55.0);
}

} // namespace spectral
} // namespace raft
Loading
Loading