rapidsai · rapids-bot · Jan 25, 2025 · Jan 20, 2025 · Jan 20, 2025 · Jan 23, 2025
@@ -1,6 +1,11 @@
-# Copyright (c) 2022-2023, NVIDIA CORPORATION.
+# Copyright (c) 2022-2025, NVIDIA CORPORATION.
 
 repos:
+      - repo: https://github.com/pre-commit/pre-commit-hooks
+        rev: v5.0.0
+        hooks:
+              - id: trailing-whitespace
+              - id: end-of-file-fixer
       - repo: https://github.com/PyCQA/isort
         rev: 5.12.0
         hooks:

diff --git a/README.md b/README.md
@@ -29,9 +29,9 @@ cuVS contains state-of-the-art implementations of several algorithms for running
 
 Vector search is an information retrieval method that has been growing in popularity over the past few  years, partly because of the rising importance of multimedia embeddings created from unstructured data and the need to perform semantic search on the embeddings to find items which are semantically similar to each other.
 
-Vector search is also used in _data mining and machine learning_ tasks and comprises an important step in many _clustering_ and _visualization_ algorithms like [UMAP](https://arxiv.org/abs/2008.00325), [t-SNE](https://lvdmaaten.github.io/tsne/), K-means, and [HDBSCAN](https://hdbscan.readthedocs.io/en/latest/how_hdbscan_works.html). 
+Vector search is also used in _data mining and machine learning_ tasks and comprises an important step in many _clustering_ and _visualization_ algorithms like [UMAP](https://arxiv.org/abs/2008.00325), [t-SNE](https://lvdmaaten.github.io/tsne/), K-means, and [HDBSCAN](https://hdbscan.readthedocs.io/en/latest/how_hdbscan_works.html).
 
-Finally, faster vector search enables interactions between dense vectors and graphs. Converting a pile of dense vectors into nearest neighbors graphs unlocks the entire world of graph analysis algorithms, such as those found in [GraphBLAS](https://graphblas.org/) and [cuGraph](https://github.com/rapidsai/cugraph). 
+Finally, faster vector search enables interactions between dense vectors and graphs. Converting a pile of dense vectors into nearest neighbors graphs unlocks the entire world of graph analysis algorithms, such as those found in [GraphBLAS](https://graphblas.org/) and [cuGraph](https://github.com/rapidsai/cugraph).
 
 Below are some common use-cases for vector search
 
@@ -45,7 +45,7 @@ Below are some common use-cases for vector search
   - Audio search
   - Molecular search
   - Model training
-  
+
 
 - ### Data mining
   - Clustering algorithms
@@ -71,7 +71,7 @@ In addition to the items above, cuVS takes on the burden of keeping non-trivial
 
 ## cuVS Technology Stack
 
-cuVS is built on top of the RAPIDS RAFT library of high performance machine learning primitives and provides all the necessary routines for vector search and clustering on the GPU. 
+cuVS is built on top of the RAPIDS RAFT library of high performance machine learning primitives and provides all the necessary routines for vector search and clustering on the GPU.
 
 ![cuVS is built on top of low-level CUDA libraries and provides many important routines that enable vector search and clustering on the GPU](img/tech_stack.png "cuVS Technology Stack")
 
@@ -103,7 +103,7 @@ pip install cuvs-cu11 --extra-index-url=https://pypi.nvidia.com
 And CUDA 12 packages:
 ```bash
 pip install cuvs-cu12 --extra-index-url=https://pypi.nvidia.com
-```    
+```
 
 ### Nightlies
 If installing a version that has not yet been released, the `rapidsai` channel can be replaced with `rapidsai-nightly`:
@@ -240,7 +240,7 @@ If you are interested in contributing to the cuVS library, please read our [Cont
 
 ## References
 
-For the interested reader, many of the accelerated implementations in cuVS are also based on research papers which can provide a lot more background. We also ask you to please cite the corresponding algorithms by referencing them in your own research. 
+For the interested reader, many of the accelerated implementations in cuVS are also based on research papers which can provide a lot more background. We also ask you to please cite the corresponding algorithms by referencing them in your own research.
 - [CAGRA: Highly Parallel Graph Construction and Approximate Nearest Neighbor Search](https://arxiv.org/abs/2308.15136)
 - [Top-K Algorithms on GPU: A Comprehensive Study and New Methods](https://dl.acm.org/doi/10.1145/3581784.3607062)
 - [Fast K-NN Graph Construction by GPU Based NN-Descent](https://dl.acm.org/doi/abs/10.1145/3459637.3482344?casa_token=O_nan1B1F5cAAAAA:QHWDEhh0wmd6UUTLY9_Gv6c3XI-5DXM9mXVaUXOYeStlpxTPmV3nKvABRfoivZAaQ3n8FWyrkWw>)

@@ -27,7 +27,7 @@ AlwaysBreakAfterDefinitionReturnType: None
 AlwaysBreakAfterReturnType: None
 AlwaysBreakBeforeMultilineStrings: true
 AlwaysBreakTemplateDeclarations: Yes
-BinPackArguments:  false       
+BinPackArguments:  false
 BinPackParameters: false
 BraceWrapping:
   AfterClass:            false

@@ -137,4 +137,4 @@ void cuvs_mg_ivf_flat<T, IdxT>::search(
     handle_, *index_, search_params_, queries_view, neighbors_view, distances_view);
 }
 
-}  // namespace cuvs::bench
+}  // namespace cuvs::bench
@@ -136,4 +136,4 @@ void cuvs_mg_ivf_pq<T, IdxT>::search(
     handle_, *index_, search_params_, queries_view, neighbors_view, distances_view);
 }
 
-}  // namespace cuvs::bench
+}  // namespace cuvs::bench
@@ -9,7 +9,7 @@
           "VERSION": "?",
           "GIT_SHALLOW": "?",
           "OPTIONS": "*",
-          "FIND_PACKAGE_ARGUMENTS": "*" 
+          "FIND_PACKAGE_ARGUMENTS": "*"
         }
       },
       "ConfigureTest": {

@@ -20,8 +20,7 @@ index 7419bdf5e..545384d82 100755
 -  $<BUILD_INTERFACE:${cute_SOURCE_DIR}/include>
 -  $<BUILD_INTERFACE:${cute_SOURCE_DIR}/examples>
    )
- 
+
  # Mark CTK headers as system to supress warnings from them
--- 
+--
 2.34.1
-
@@ -6,4 +6,4 @@
         "git_tag": "main"
       }
     }
-  }
+  }
@@ -3,11 +3,11 @@
 @@ -62,7 +62,7 @@ struct SimpleKNNSymCache {
                                                  const ValueT dist_half)
          : dist_query(dist_query), dist_half(dist_half) {}
- 
+
 -    __device__ __forceinline__ DistQueryAndHalf() {}
 +    DistQueryAndHalf() = default;
    };
- 
+
    struct DistanceAndNorm {
 @@ -98,8 +98,7 @@ struct SimpleKNNSymCache {
      KeyT cache;
@@ -17,7 +17,7 @@
 -    __device__ __forceinline__ SyncTempStorage() {}
 +    SyncTempStorage() = default;
    };
- 
+
   public:
 diff --git a/include/ggnn/cuda_knn_ggnn_gpu_instance.cuh b/include/ggnn/cuda_knn_ggnn_gpu_instance.cuh
 index 8cbaf0d..6eb72ac 100644
@@ -28,23 +28,23 @@ index 8cbaf0d..6eb72ac 100644
  #include "ggnn/utils/cuda_knn_utils.cuh"
  #include "ggnn/utils/cuda_knn_constants.cuh"
 -#include "ggnn/utils/cuda_knn_dataset.cuh"
- 
+
  template <typename ValueT>
  __global__ void divide(ValueT* res, ValueT* input, ValueT N) {
 @@ -98,9 +97,7 @@ struct GGNNGPUInstance {
    typedef GGNNGraphDevice<KeyT, BaseT, ValueT> GGNNGraphDevice;
    typedef GGNNGraphHost<KeyT, BaseT, ValueT> GGNNGraphHost;
- 
+
 -  const Dataset<KeyT, BaseT, BAddrT>* dataset;
    GGNNGraphBuffer<KeyT, ValueT>* ggnn_buffer {nullptr};
 -  GGNNQuery<KeyT, ValueT, BaseT> ggnn_query;
- 
+
    // Graph Shards resident on the GPU
    std::vector<GGNNGraphDevice> ggnn_shards;
 @@ -117,13 +114,12 @@ struct GGNNGPUInstance {
    // number of shards that need to be processed by this instance
    const int num_parts;
- 
+
 -  GGNNGPUInstance(const int gpu_id, const Dataset<KeyT, BaseT, BAddrT>* dataset,
 +  GGNNGPUInstance(const int gpu_id,
              const int N_shard, const int L,
@@ -60,24 +60,24 @@ index 8cbaf0d..6eb72ac 100644
 @@ -135,7 +131,6 @@ struct GGNNGPUInstance {
        CHECK_EQ(current_gpu_id, gpu_id) << "cudaSetDevice() needs to be called in advance!";
      }
- 
+
 -    ggnn_query.loadQueriesAsync(dataset->h_query, 0);
- 
+
      computeGraphParameters();
- 
+
 @@ -186,7 +181,7 @@ struct GGNNGPUInstance {
    }
- 
+
    GGNNGPUInstance(const GGNNGPUInstance& other)
 -   : dataset{nullptr}, ggnn_query{0, D, KQuery},
 +   :
       gpu_id{0}, N_shard{0}, num_parts{0} {
      // this exists to allow using vector::emplace_back
      // when it triggers a reallocation, this code will be called.
 @@ -305,6 +300,7 @@ struct GGNNGPUInstance {
- 
+
    // io
- 
+
 +  /*
    void waitForDiskIO(const int shard_id) {
      auto& cpu_buffer = ggnn_cpu_buffers[shard_id%ggnn_cpu_buffers.size()];
@@ -87,41 +87,41 @@ index 8cbaf0d..6eb72ac 100644
      CHECK_CUDA(cudaPeekAtLastError());
    }
 +  */
- 
+
    // graph operations
- 
+
    template <int BLOCK_DIM_X = 32, int MAX_ITERATIONS = 400, int CACHE_SIZE = 512, int SORTED_SIZE = 256, bool DIST_STATS = false>
 -  void queryLayer(const int shard_id = 0) const {
 +  void queryLayer(const BaseT* d_query, int batch_size, KeyT* d_query_result_ids, ValueT* d_query_result_dists, const int shard_id = 0) const {
      CHECK_CUDA(cudaSetDevice(gpu_id));
      const auto& shard = ggnn_shards.at(shard_id%ggnn_shards.size());
- 
+
 @@ -482,21 +479,21 @@ struct GGNNGPUInstance {
- 
+
      int* m_dist_statistics = nullptr;
      if (DIST_STATS)
 -      cudaMallocManaged(&m_dist_statistics, dataset->N_query * sizeof(int));
 +      cudaMallocManaged(&m_dist_statistics, batch_size * sizeof(int));
- 
+
      QueryKernel query_kernel;
      query_kernel.d_base = shard.d_base;
 -    query_kernel.d_query = ggnn_query.d_query;
 +    query_kernel.d_query = d_query;
- 
+
      query_kernel.d_graph = shard.d_graph;
 -    query_kernel.d_query_results = ggnn_query.d_query_result_ids;
 -    query_kernel.d_query_results_dists = ggnn_query.d_query_result_dists;
 +    query_kernel.d_query_results = d_query_result_ids;
 +    query_kernel.d_query_results_dists = d_query_result_dists;
- 
+
      query_kernel.d_translation = shard.d_translation;
- 
+
      query_kernel.d_nn1_stats = shard.d_nn1_stats;
- 
+
 -    query_kernel.N = dataset->N_query;
 +    query_kernel.N = batch_size;
      query_kernel.N_offset = 0;
- 
+
      query_kernel.d_dist_stats = m_dist_statistics;
 @@ -771,6 +768,16 @@ struct GGNNGPUInstance {
        sym(layer, shard_id);
@@ -138,19 +138,19 @@ index 8cbaf0d..6eb72ac 100644
 +    ggnn_shards.at(0).d_base = dataset;
 +  }
  };
- 
+
  #endif  // INCLUDE_GGNN_CUDA_KNN_GGNN_GPU_INSTANCE_CUH_
 diff --git a/include/ggnn/graph/cuda_knn_ggnn_graph_device.cuh b/include/ggnn/graph/cuda_knn_ggnn_graph_device.cuh
 index c94a8f1..781226d 100644
 --- a/include/ggnn/graph/cuda_knn_ggnn_graph_device.cuh
 +++ b/include/ggnn/graph/cuda_knn_ggnn_graph_device.cuh
 @@ -50,7 +50,7 @@ struct GGNNGraphDevice {
    ValueT* d_nn1_stats;
- 
+
    /// base data pointer for the shard.
 -  BaseT* d_base;
 +  const BaseT* d_base;
- 
+
    /// combined memory pool
    char* d_memory;
 @@ -69,7 +69,9 @@ struct GGNNGraphDevice {
@@ -161,12 +161,12 @@ index c94a8f1..781226d 100644
 +    // base_size = align8(static_cast<size_t>(N) * D * sizeof(BaseT));
 +    (void) N;
 +    (void) D;
- 
+
      const size_t total_size = base_size+total_graph_size;
- 
+
 @@ -86,8 +88,7 @@ struct GGNNGraphDevice {
      CHECK_CUDA(cudaMalloc(&d_memory, total_size));
- 
+
      size_t pos = 0;
 -    d_base = reinterpret_cast<BaseT*>(d_memory+pos);
 -    pos += base_size;
@@ -175,17 +175,17 @@ index c94a8f1..781226d 100644
      pos += graph_size;
      d_translation = reinterpret_cast<KeyT*>(d_memory+pos);
 @@ -99,14 +100,14 @@ struct GGNNGraphDevice {
- 
+
      CHECK_EQ(pos, total_size);
- 
+
 -    CHECK_CUDA(cudaStreamCreate(&stream));
 +    // CHECK_CUDA(cudaStreamCreate(&stream));
- 
+
      CHECK_CUDA(cudaPeekAtLastError());
      CHECK_CUDA(cudaDeviceSynchronize());
      CHECK_CUDA(cudaPeekAtLastError());
    }
- 
+
 -  GGNNGraphDevice(const GGNNGraphDevice& other) {
 +  GGNNGraphDevice(const GGNNGraphDevice&) {
      // this exists to allow using vector::emplace_back
@@ -194,20 +194,20 @@ index c94a8f1..781226d 100644
 @@ -116,7 +117,7 @@ struct GGNNGraphDevice {
    ~GGNNGraphDevice() {
      cudaFree(d_memory);
- 
+
 -    CHECK_CUDA(cudaStreamDestroy(stream));
 +    // CHECK_CUDA(cudaStreamDestroy(stream));
    }
  };
- 
+
 diff --git a/include/ggnn/graph/cuda_knn_ggnn_graph_host.cuh b/include/ggnn/graph/cuda_knn_ggnn_graph_host.cuh
 index 2055f9e..ef5843a 100644
 --- a/include/ggnn/graph/cuda_knn_ggnn_graph_host.cuh
 +++ b/include/ggnn/graph/cuda_knn_ggnn_graph_host.cuh
 @@ -92,7 +92,7 @@ struct GGNNGraphHost {
      CHECK_CUDA(cudaPeekAtLastError());
    }
- 
+
 -  GGNNGraphHost(const GGNNGraphHost& other) {
 +  GGNNGraphHost(const GGNNGraphHost&) {
      // this exists to allow using vector::emplace_back
@@ -220,11 +220,10 @@ index 49d76a1..eef69e6 100644
 @@ -22,7 +22,6 @@ limitations under the License.
  #include <cuda.h>
  #include <cuda_runtime.h>
- 
+
 -#include <gflags/gflags.h>
  #include <cub/cub.cuh>
- 
+
  #include "ggnn/utils/cuda_knn_constants.cuh"
--- 
+--
 2.43.0
-
@@ -13,4 +13,4 @@
         ]
       }
     }
-  }
+  }
-Original file line number
+Diff line change
@@ Expand Up / @@ -6,4 +6,4 @@ @@
             "git_tag": "main"
           }
         }
-      }
+      }