From 441d2f1bcceb8f653a0fdaec5658c54c5201155b Mon Sep 17 00:00:00 2001
From: Divye Gala <divyegala@gmail.com>
Date: Mon, 2 Dec 2024 17:34:08 -0500
Subject: [PATCH 1/3] HNSW CPU Hierarchy (#465)

This PR adds an option to build the full HNSW hierarchy on the CPU when converting a CAGRA index to an hnswlib index. This lets us enable an `extend()` API.

For hnswlib:
1. Update to `v0.7.0`
2. Remove dependency as symbols are compiled within DSO

Authors:
  - Divye Gala (https://github.com/divyegala)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - Corey J. Nolet (https://github.com/cjnolet)

URL: https://github.com/rapidsai/cuvs/pull/465
---
 .../bench_ann_cuda-118_arch-aarch64.yaml      |   1 -
 .../bench_ann_cuda-118_arch-x86_64.yaml       |   1 -
 .../bench_ann_cuda-125_arch-aarch64.yaml      |   1 -
 .../bench_ann_cuda-125_arch-x86_64.yaml       |   1 -
 cpp/CMakeLists.txt                            |   1 +
 cpp/bench/ann/CMakeLists.txt                  |   4 +-
 cpp/bench/ann/src/cuvs/cuvs_cagra_hnswlib.cu  |  34 +-
 .../ann/src/cuvs/cuvs_cagra_hnswlib_wrapper.h |  57 ++-
 cpp/bench/ann/src/cuvs/cuvs_cagra_wrapper.h   |   2 +
 .../ann/src/hnswlib/hnswlib_benchmark.cpp     |   4 +-
 cpp/cmake/modules/ConfigureCUDA.cmake         |   8 +-
 cpp/cmake/patches/hnswlib.diff                | 327 ++++++-------
 cpp/cmake/patches/hnswlib_override.json       |  28 +-
 cpp/cmake/thirdparty/get_hnswlib.cmake        |   1 +
 cpp/include/cuvs/neighbors/hnsw.h             | 279 ++++++++++-
 cpp/include/cuvs/neighbors/hnsw.hpp           | 440 +++++++++++++++---
 cpp/src/neighbors/detail/hnsw.hpp             | 218 ++++++++-
 cpp/src/neighbors/hnsw.cpp                    |  57 ++-
 cpp/src/neighbors/hnsw_c.cpp                  | 157 ++++++-
 cpp/src/neighbors/iface/iface.hpp             |   1 +
 cpp/test/neighbors/ann_hnsw_c.cu              |   4 +-
 cpp/test/neighbors/hnsw.cu                    |   3 +-
 dependencies.yaml                             |   1 -
 docs/source/c_api/neighbors_hnsw_c.rst        |  22 +
 docs/source/cpp_api/neighbors_hnsw.rst        |  23 +-
 python/cuvs/cuvs/neighbors/hnsw/__init__.py   |  15 +-
 python/cuvs/cuvs/neighbors/hnsw/hnsw.pxd      |  49 +-
 python/cuvs/cuvs/neighbors/hnsw/hnsw.pyx      | 276 ++++++++---
 python/cuvs/cuvs/test/test_hnsw.py            |  89 +++-
 .../config/algos/cuvs_cagra_hnswlib.yaml      |   5 +-
 30 files changed, 1683 insertions(+), 426 deletions(-)

diff --git a/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml b/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml
index 21cb98180..1e602ccf1 100644
--- a/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml
+++ b/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml
@@ -24,7 +24,6 @@ dependencies:
 - gcc_linux-aarch64=11.*
 - glog>=0.6.0
 - h5py>=3.8.0
-- hnswlib=0.6.2
 - libcublas-dev=11.11.3.6
 - libcublas=11.11.3.6
 - libcurand-dev=10.3.0.86
diff --git a/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml b/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml
index 432509bcb..b060e78c2 100644
--- a/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml
+++ b/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml
@@ -24,7 +24,6 @@ dependencies:
 - gcc_linux-64=11.*
 - glog>=0.6.0
 - h5py>=3.8.0
-- hnswlib=0.6.2
 - libcublas-dev=11.11.3.6
 - libcublas=11.11.3.6
 - libcurand-dev=10.3.0.86
diff --git a/conda/environments/bench_ann_cuda-125_arch-aarch64.yaml b/conda/environments/bench_ann_cuda-125_arch-aarch64.yaml
index 0c5043ac2..485122273 100644
--- a/conda/environments/bench_ann_cuda-125_arch-aarch64.yaml
+++ b/conda/environments/bench_ann_cuda-125_arch-aarch64.yaml
@@ -25,7 +25,6 @@ dependencies:
 - gcc_linux-aarch64=11.*
 - glog>=0.6.0
 - h5py>=3.8.0
-- hnswlib=0.6.2
 - libcublas-dev
 - libcurand-dev
 - libcusolver-dev
diff --git a/conda/environments/bench_ann_cuda-125_arch-x86_64.yaml b/conda/environments/bench_ann_cuda-125_arch-x86_64.yaml
index cbb22333c..d5f48dadb 100644
--- a/conda/environments/bench_ann_cuda-125_arch-x86_64.yaml
+++ b/conda/environments/bench_ann_cuda-125_arch-x86_64.yaml
@@ -25,7 +25,6 @@ dependencies:
 - gcc_linux-64=11.*
 - glog>=0.6.0
 - h5py>=3.8.0
-- hnswlib=0.6.2
 - libcublas-dev
 - libcurand-dev
 - libcusolver-dev
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index eb2e7c7a4..34b7cb898 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -577,6 +577,7 @@ if(BUILD_SHARED_LIBS)
 
   if(BUILD_CAGRA_HNSWLIB)
     target_link_libraries(cuvs_objs PRIVATE hnswlib::hnswlib)
+    target_compile_definitions(cuvs PUBLIC CUVS_BUILD_CAGRA_HNSWLIB)
     target_compile_definitions(cuvs_objs PUBLIC CUVS_BUILD_CAGRA_HNSWLIB)
   endif()
 
diff --git a/cpp/bench/ann/CMakeLists.txt b/cpp/bench/ann/CMakeLists.txt
index 0f6b42ae9..c161a68bc 100644
--- a/cpp/bench/ann/CMakeLists.txt
+++ b/cpp/bench/ann/CMakeLists.txt
@@ -225,9 +225,7 @@ if(CUVS_ANN_BENCH_USE_CUVS_CAGRA)
 endif()
 
 if(CUVS_ANN_BENCH_USE_CUVS_CAGRA_HNSWLIB)
-  ConfigureAnnBench(
-    NAME CUVS_CAGRA_HNSWLIB PATH src/cuvs/cuvs_cagra_hnswlib.cu LINKS cuvs hnswlib::hnswlib
-  )
+  ConfigureAnnBench(NAME CUVS_CAGRA_HNSWLIB PATH src/cuvs/cuvs_cagra_hnswlib.cu LINKS cuvs)
 endif()
 
 if(CUVS_ANN_BENCH_USE_CUVS_MG)
diff --git a/cpp/bench/ann/src/cuvs/cuvs_cagra_hnswlib.cu b/cpp/bench/ann/src/cuvs/cuvs_cagra_hnswlib.cu
index 558ba01e0..e45a3bd5a 100644
--- a/cpp/bench/ann/src/cuvs/cuvs_cagra_hnswlib.cu
+++ b/cpp/bench/ann/src/cuvs/cuvs_cagra_hnswlib.cu
@@ -24,12 +24,35 @@
 
 namespace cuvs::bench {
 
+template <typename T, typename IdxT>
+void parse_build_param(const nlohmann::json& conf,
+                       typename cuvs::bench::cuvs_cagra_hnswlib<T, IdxT>::build_param& param)
+{
+  if (conf.contains("hierarchy")) {
+    if (conf.at("hierarchy") == "none") {
+      param.hnsw_index_params.hierarchy = cuvs::neighbors::hnsw::HnswHierarchy::NONE;
+    } else if (conf.at("hierarchy") == "cpu") {
+      param.hnsw_index_params.hierarchy = cuvs::neighbors::hnsw::HnswHierarchy::CPU;
+    } else {
+      THROW("Invalid value for hierarchy: %s", conf.at("hierarchy").get<std::string>().c_str());
+    }
+  }
+  if (conf.contains("ef_construction")) {
+    param.hnsw_index_params.ef_construction = conf.at("ef_construction");
+  }
+  if (conf.contains("num_threads")) {
+    param.hnsw_index_params.num_threads = conf.at("num_threads");
+  }
+}
+
 template <typename T, typename IdxT>
 void parse_search_param(const nlohmann::json& conf,
                         typename cuvs::bench::cuvs_cagra_hnswlib<T, IdxT>::search_param& param)
 {
-  param.ef = conf.at("ef");
-  if (conf.contains("numThreads")) { param.num_threads = conf.at("numThreads"); }
+  param.hnsw_search_param.ef = conf.at("ef");
+  if (conf.contains("num_threads")) {
+    param.hnsw_search_param.num_threads = conf.at("num_threads");
+  }
 }
 
 template <typename T>
@@ -43,9 +66,10 @@ auto create_algo(const std::string& algo_name,
 
   if constexpr (std::is_same_v<T, float> or std::is_same_v<T, std::uint8_t>) {
     if (algo_name == "raft_cagra_hnswlib" || algo_name == "cuvs_cagra_hnswlib") {
-      typename cuvs::bench::cuvs_cagra_hnswlib<T, uint32_t>::build_param param;
-      parse_build_param<T, uint32_t>(conf, param);
-      a = std::make_unique<cuvs::bench::cuvs_cagra_hnswlib<T, uint32_t>>(metric, dim, param);
+      typename cuvs::bench::cuvs_cagra_hnswlib<T, uint32_t>::build_param bparam;
+      ::parse_build_param<T, uint32_t>(conf, bparam.cagra_build_param);
+      parse_build_param<T, uint32_t>(conf, bparam);
+      a = std::make_unique<cuvs::bench::cuvs_cagra_hnswlib<T, uint32_t>>(metric, dim, bparam);
     }
   }
 
diff --git a/cpp/bench/ann/src/cuvs/cuvs_cagra_hnswlib_wrapper.h b/cpp/bench/ann/src/cuvs/cuvs_cagra_hnswlib_wrapper.h
index 875fe0bba..e4169f6f8 100644
--- a/cpp/bench/ann/src/cuvs/cuvs_cagra_hnswlib_wrapper.h
+++ b/cpp/bench/ann/src/cuvs/cuvs_cagra_hnswlib_wrapper.h
@@ -15,8 +15,8 @@
  */
 #pragma once
 
-#include "../hnswlib/hnswlib_wrapper.h"
 #include "cuvs_cagra_wrapper.h"
+#include <cuvs/neighbors/hnsw.hpp>
 
 #include <memory>
 
@@ -26,14 +26,20 @@ template <typename T, typename IdxT>
 class cuvs_cagra_hnswlib : public algo<T>, public algo_gpu {
  public:
   using search_param_base = typename algo<T>::search_param;
-  using build_param       = typename cuvs_cagra<T, IdxT>::build_param;
-  using search_param      = typename hnsw_lib<T>::search_param;
+
+  struct build_param {
+    typename cuvs_cagra<T, IdxT>::build_param cagra_build_param;
+    cuvs::neighbors::hnsw::index_params hnsw_index_params;
+  };
+
+  struct search_param : public search_param_base {
+    cuvs::neighbors::hnsw::search_params hnsw_search_param;
+  };
 
   cuvs_cagra_hnswlib(Metric metric, int dim, const build_param& param, int concurrent_searches = 1)
     : algo<T>(metric, dim),
-      cagra_build_{metric, dim, param, concurrent_searches},
-      // hnsw_lib param values don't matter since we don't build with hnsw_lib
-      hnswlib_search_{metric, dim, typename hnsw_lib<T>::build_param{50, 100}}
+      build_param_{param},
+      cagra_build_{metric, dim, param.cagra_build_param, concurrent_searches}
   {
   }
 
@@ -69,40 +75,67 @@ class cuvs_cagra_hnswlib : public algo<T>, public algo_gpu {
   }
 
  private:
+  raft::resources handle_{};
+  build_param build_param_;
+  search_param search_param_;
   cuvs_cagra<T, IdxT> cagra_build_;
-  hnsw_lib<T> hnswlib_search_;
+  std::shared_ptr<cuvs::neighbors::hnsw::index<T>> hnsw_index_;
 };
 
 template <typename T, typename IdxT>
 void cuvs_cagra_hnswlib<T, IdxT>::build(const T* dataset, size_t nrow)
 {
   cagra_build_.build(dataset, nrow);
+  auto* cagra_index      = cagra_build_.get_index();
+  auto host_dataset_view = raft::make_host_matrix_view<const T, int64_t>(dataset, nrow, this->dim_);
+  auto opt_dataset_view =
+    std::optional<raft::host_matrix_view<const T, int64_t>>(std::move(host_dataset_view));
+  hnsw_index_ = cuvs::neighbors::hnsw::from_cagra(
+    handle_, build_param_.hnsw_index_params, *cagra_index, opt_dataset_view);
 }
 
 template <typename T, typename IdxT>
 void cuvs_cagra_hnswlib<T, IdxT>::set_search_param(const search_param_base& param_)
 {
-  hnswlib_search_.set_search_param(param_);
+  search_param_ = dynamic_cast<const search_param&>(param_);
 }
 
 template <typename T, typename IdxT>
 void cuvs_cagra_hnswlib<T, IdxT>::save(const std::string& file) const
 {
-  cagra_build_.save_to_hnswlib(file);
+  cuvs::neighbors::hnsw::serialize(handle_, file, *(hnsw_index_.get()));
 }
 
 template <typename T, typename IdxT>
 void cuvs_cagra_hnswlib<T, IdxT>::load(const std::string& file)
 {
-  hnswlib_search_.load(file);
-  hnswlib_search_.set_base_layer_only();
+  cuvs::neighbors::hnsw::index<T>* idx = nullptr;
+  cuvs::neighbors::hnsw::deserialize(handle_,
+                                     build_param_.hnsw_index_params,
+                                     file,
+                                     this->dim_,
+                                     parse_metric_type(this->metric_),
+                                     &idx);
+  hnsw_index_ = std::shared_ptr<cuvs::neighbors::hnsw::index<T>>(idx);
 }
 
 template <typename T, typename IdxT>
 void cuvs_cagra_hnswlib<T, IdxT>::search(
   const T* queries, int batch_size, int k, algo_base::index_type* neighbors, float* distances) const
 {
-  hnswlib_search_.search(queries, batch_size, k, neighbors, distances);
+  // Only Latency mode is supported for now
+  auto queries_view =
+    raft::make_host_matrix_view<const T, int64_t>(queries, batch_size, this->dim_);
+  auto neighbors_view = raft::make_host_matrix_view<uint64_t, int64_t>(
+    reinterpret_cast<uint64_t*>(neighbors), batch_size, k);
+  auto distances_view = raft::make_host_matrix_view<float, int64_t>(distances, batch_size, k);
+
+  cuvs::neighbors::hnsw::search(handle_,
+                                search_param_.hnsw_search_param,
+                                *(hnsw_index_.get()),
+                                queries_view,
+                                neighbors_view,
+                                distances_view);
 }
 
 }  // namespace cuvs::bench
diff --git a/cpp/bench/ann/src/cuvs/cuvs_cagra_wrapper.h b/cpp/bench/ann/src/cuvs/cuvs_cagra_wrapper.h
index b2ba35eee..f6d3d60fc 100644
--- a/cpp/bench/ann/src/cuvs/cuvs_cagra_wrapper.h
+++ b/cpp/bench/ann/src/cuvs/cuvs_cagra_wrapper.h
@@ -154,6 +154,8 @@ class cuvs_cagra : public algo<T>, public algo_gpu {
   void save_to_hnswlib(const std::string& file) const;
   std::unique_ptr<algo<T>> copy() override;
 
+  auto get_index() const -> const cuvs::neighbors::cagra::index<T, IdxT>* { return index_.get(); }
+
  private:
   // handle_ must go first to make sure it dies last and all memory allocated in pool
   configured_raft_resources handle_{};
diff --git a/cpp/bench/ann/src/hnswlib/hnswlib_benchmark.cpp b/cpp/bench/ann/src/hnswlib/hnswlib_benchmark.cpp
index 755c7c8d6..6e219d2a7 100644
--- a/cpp/bench/ann/src/hnswlib/hnswlib_benchmark.cpp
+++ b/cpp/bench/ann/src/hnswlib/hnswlib_benchmark.cpp
@@ -33,7 +33,7 @@ void parse_build_param(const nlohmann::json& conf,
 {
   param.ef_construction = conf.at("efConstruction");
   param.m               = conf.at("M");
-  if (conf.contains("numThreads")) { param.num_threads = conf.at("numThreads"); }
+  if (conf.contains("num_threads")) { param.num_threads = conf.at("num_threads"); }
 }
 
 template <typename T>
@@ -41,7 +41,7 @@ void parse_search_param(const nlohmann::json& conf,
                         typename cuvs::bench::hnsw_lib<T>::search_param& param)
 {
   param.ef = conf.at("ef");
-  if (conf.contains("numThreads")) { param.num_threads = conf.at("numThreads"); }
+  if (conf.contains("num_threads")) { param.num_threads = conf.at("num_threads"); }
 }
 
 template <typename T, template <typename> class Algo>
diff --git a/cpp/cmake/modules/ConfigureCUDA.cmake b/cpp/cmake/modules/ConfigureCUDA.cmake
index 74da25660..3e91d9995 100644
--- a/cpp/cmake/modules/ConfigureCUDA.cmake
+++ b/cpp/cmake/modules/ConfigureCUDA.cmake
@@ -22,8 +22,12 @@ endif()
 # Be very strict when compiling with GCC as host compiler (and thus more lenient when compiling with
 # clang)
 if(CMAKE_COMPILER_IS_GNUCXX)
-  list(APPEND CUVS_CXX_FLAGS -Wall -Werror -Wno-unknown-pragmas -Wno-error=deprecated-declarations)
-  list(APPEND CUVS_CUDA_FLAGS -Xcompiler=-Wall,-Werror,-Wno-error=deprecated-declarations)
+  list(APPEND CUVS_CXX_FLAGS -Wall -Werror -Wno-unknown-pragmas -Wno-error=deprecated-declarations
+       -Wno-reorder
+  )
+  list(APPEND CUVS_CUDA_FLAGS
+       -Xcompiler=-Wall,-Werror,-Wno-error=deprecated-declarations,-Wno-reorder
+  )
 
   # set warnings as errors
   if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 11.2.0)
diff --git a/cpp/cmake/patches/hnswlib.diff b/cpp/cmake/patches/hnswlib.diff
index e7f89a8cc..f20c27d91 100644
--- a/cpp/cmake/patches/hnswlib.diff
+++ b/cpp/cmake/patches/hnswlib.diff
@@ -1,188 +1,159 @@
+diff --git a/hnswlib/hnswalg.h b/hnswlib/hnswalg.h
+index bef0017..0ee7931 100644
 --- a/hnswlib/hnswalg.h
 +++ b/hnswlib/hnswalg.h
-@@ -3,6 +3,7 @@
- #include "visited_list_pool.h"
- #include "hnswlib.h"
- #include <atomic>
-+#include <limits>
- #include <random>
- #include <stdlib.h>
- #include <assert.h>
-@@ -16,6 +17,8 @@ namespace hnswlib {
-     template<typename dist_t>
-     class HierarchicalNSW : public AlgorithmInterface<dist_t> {
-     public:
-+        bool base_layer_only{false};
-+        int num_seeds=32;
-         static const tableint max_update_element_locks = 65536;
-         HierarchicalNSW(SpaceInterface<dist_t> *s) {
-         }
-@@ -56,7 +59,7 @@ namespace hnswlib {
-             visited_list_pool_ = new VisitedListPool(1, max_elements);
- 
-             //initializations for special treatment of the first node
--            enterpoint_node_ = -1;
-+            enterpoint_node_ = std::numeric_limits<tableint>::max();
-             maxlevel_ = -1;
- 
-             linkLists_ = (char **) malloc(sizeof(void *) * max_elements_);
-@@ -527,7 +530,7 @@ namespace hnswlib {
-                     tableint *datal = (tableint *) (data + 1);
-                     for (int i = 0; i < size; i++) {
-                         tableint cand = datal[i];
--                        if (cand < 0 || cand > max_elements_)
-+                        if (cand > max_elements_)
-                             throw std::runtime_error("cand error");
-                         dist_t d = fstdistfunc_(query_data, getDataByInternalId(cand), dist_func_param_);
- 
-@@ -1067,7 +1070,7 @@ namespace hnswlib {
-                             tableint *datal = (tableint *) (data + 1);
-                             for (int i = 0; i < size; i++) {
-                                 tableint cand = datal[i];
--                                if (cand < 0 || cand > max_elements_)
-+                                if (cand > max_elements_)
-                                     throw std::runtime_error("cand error");
-                                 dist_t d = fstdistfunc_(data_point, getDataByInternalId(cand), dist_func_param_);
-                                 if (d < curdist) {
-@@ -1119,28 +1122,41 @@ namespace hnswlib {
-             tableint currObj = enterpoint_node_;
-             dist_t curdist = fstdistfunc_(query_data, getDataByInternalId(enterpoint_node_), dist_func_param_);
- 
--            for (int level = maxlevel_; level > 0; level--) {
--                bool changed = true;
--                while (changed) {
--                    changed = false;
--                    unsigned int *data;
-+            if (base_layer_only) {
-+                // You can increase the number of seeds when testing large-scale dataset, num_seeds = 48 for 100M-scale
-+                for (int i = 0; i < num_seeds; i++) {
-+                    tableint obj = i * (max_elements_ / num_seeds);
-+                    dist_t dist = fstdistfunc_(query_data, getDataByInternalId(obj), dist_func_param_);
-+                    if (dist < curdist) {
-+                        curdist = dist;
-+                        currObj = obj;
-+                    }
+@@ -16,6 +16,9 @@ typedef unsigned int linklistsizeint;
+ template<typename dist_t>
+ class HierarchicalNSW : public AlgorithmInterface<dist_t> {
+  public:
++    bool base_layer_only = false;
++    int num_seeds = 32;
++    bool base_layer_init = true;
+     static const tableint MAX_LABEL_OPERATION_LOCKS = 65536;
+     static const unsigned char DELETE_MARK = 0x01;
+ 
+@@ -1098,7 +1101,7 @@ class HierarchicalNSW : public AlgorithmInterface<dist_t> {
+ 
+         std::unique_lock <std::mutex> lock_el(link_list_locks_[cur_c]);
+         int curlevel = getRandomLevel(mult_);
+-        if (level > 0)
++        if (level > -1)
+             curlevel = level;
+ 
+         element_levels_[cur_c] = curlevel;
+@@ -1116,6 +1119,9 @@ class HierarchicalNSW : public AlgorithmInterface<dist_t> {
+         memcpy(getExternalLabeLp(cur_c), &label, sizeof(labeltype));
+         memcpy(getDataByInternalId(cur_c), data_point, data_size_);
+ 
++        if (!base_layer_init && curlevel == 0)
++            return cur_c;
++
+         if (curlevel) {
+             linkLists_[cur_c] = (char *) malloc(size_links_per_element_ * curlevel + 1);
+             if (linkLists_[cur_c] == nullptr)
+@@ -1138,7 +1144,7 @@ class HierarchicalNSW : public AlgorithmInterface<dist_t> {
+                         tableint *datal = (tableint *) (data + 1);
+                         for (int i = 0; i < size; i++) {
+                             tableint cand = datal[i];
+-                            if (cand < 0 || cand > max_elements_)
++                            if (static_cast<int>(cand) < 0 || cand > max_elements_)
+                                 throw std::runtime_error("cand error");
+                             dist_t d = fstdistfunc_(data_point, getDataByInternalId(cand), dist_func_param_);
+                             if (d < curdist) {
+@@ -1188,28 +1194,41 @@ class HierarchicalNSW : public AlgorithmInterface<dist_t> {
+         tableint currObj = enterpoint_node_;
+         dist_t curdist = fstdistfunc_(query_data, getDataByInternalId(enterpoint_node_), dist_func_param_);
+ 
+-        for (int level = maxlevel_; level > 0; level--) {
+-            bool changed = true;
+-            while (changed) {
+-                changed = false;
+-                unsigned int *data;
++        if (base_layer_only) {
++            // You can increase the number of seeds when testing large-scale dataset, num_seeds = 48 for 100M-scale
++            for (int i = 0; i < num_seeds; i++) {
++                tableint obj = i * (max_elements_ / num_seeds);
++                dist_t dist = fstdistfunc_(query_data, getDataByInternalId(obj), dist_func_param_);
++                if (dist < curdist) {
++                    curdist = dist;
++                    currObj = obj;
 +                }
 +            }
-+            else{
-+                for (int level = maxlevel_; level > 0; level--) {
-+                    bool changed = true;
-+                    while (changed) {
-+                        changed = false;
-+                        unsigned int *data;
- 
--                    data = (unsigned int *) get_linklist(currObj, level);
--                    int size = getListCount(data);
--                    metric_hops++;
--                    metric_distance_computations+=size;
-+                        data = (unsigned int *) get_linklist(currObj, level);
-+                        int size = getListCount(data);
-+                        metric_hops++;
-+                        metric_distance_computations+=size;
- 
--                    tableint *datal = (tableint *) (data + 1);
--                    for (int i = 0; i < size; i++) {
--                        tableint cand = datal[i];
--                        if (cand < 0 || cand > max_elements_)
--                            throw std::runtime_error("cand error");
--                        dist_t d = fstdistfunc_(query_data, getDataByInternalId(cand), dist_func_param_);
-+                        tableint *datal = (tableint *) (data + 1);
-+                        for (int i = 0; i < size; i++) {
-+                            tableint cand = datal[i];
-+                            if (cand > max_elements_)
-+                                throw std::runtime_error("cand error");
-+                            dist_t d = fstdistfunc_(query_data, getDataByInternalId(cand), dist_func_param_);
- 
--                        if (d < curdist) {
--                            curdist = d;
--                            currObj = cand;
--                            changed = true;
-+                            if (d < curdist) {
-+                                curdist = d;
-+                                currObj = cand;
-+                                changed = true;
-+                            }
-                         }
++        }
++        else {
++            for (int level = maxlevel_; level > 0; level--) {
++                bool changed = true;
++                while (changed) {
++                    changed = false;
++                    unsigned int *data;
+ 
+-                data = (unsigned int *) get_linklist(currObj, level);
+-                int size = getListCount(data);
+-                metric_hops++;
+-                metric_distance_computations+=size;
++                    data = (unsigned int *) get_linklist(currObj, level);
++                    int size = getListCount(data);
++                    metric_hops++;
++                    metric_distance_computations+=size;
++
++                    tableint *datal = (tableint *) (data + 1);
++                    for (int i = 0; i < size; i++) {
++                        tableint cand = datal[i];
++                        if (static_cast<int>(cand) < 0 || cand > max_elements_)
++                            throw std::runtime_error("cand error");
++                        dist_t d = fstdistfunc_(query_data, getDataByInternalId(cand), dist_func_param_);
+ 
+-                tableint *datal = (tableint *) (data + 1);
+-                for (int i = 0; i < size; i++) {
+-                    tableint cand = datal[i];
+-                    if (cand < 0 || cand > max_elements_)
+-                        throw std::runtime_error("cand error");
+-                    dist_t d = fstdistfunc_(query_data, getDataByInternalId(cand), dist_func_param_);
+-
+-                    if (d < curdist) {
+-                        curdist = d;
+-                        currObj = cand;
+-                        changed = true;
++                        if (d < curdist) {
++                            curdist = d;
++                            currObj = cand;
++                            changed = true;
++                        }
                      }
                  }
+             }
 diff --git a/hnswlib/space_l2.h b/hnswlib/space_l2.h
-index 4413537..c3240f3 100644
+index 834d19f..0c0af26 100644
 --- a/hnswlib/space_l2.h
 +++ b/hnswlib/space_l2.h
-@@ -252,13 +252,14 @@ namespace hnswlib {
-         ~L2Space() {}
-     };
- 
-+    template <typename T>
-     static int
-     L2SqrI4x(const void *__restrict pVect1, const void *__restrict pVect2, const void *__restrict qty_ptr) {
- 
-         size_t qty = *((size_t *) qty_ptr);
-         int res = 0;
--        unsigned char *a = (unsigned char *) pVect1;
--        unsigned char *b = (unsigned char *) pVect2;
-+        T *a = (T *) pVect1;
-+        T *b = (T *) pVect2;
- 
-         qty = qty >> 2;
-         for (size_t i = 0; i < qty; i++) {
-@@ -279,11 +280,12 @@ namespace hnswlib {
-         return (res);
-     }
- 
-+    template <typename T>
-     static int L2SqrI(const void* __restrict pVect1, const void* __restrict pVect2, const void* __restrict qty_ptr) {
-         size_t qty = *((size_t*)qty_ptr);
-         int res = 0;
--        unsigned char* a = (unsigned char*)pVect1;
--        unsigned char* b = (unsigned char*)pVect2;
-+        T* a = (T*)pVect1;
-+        T* b = (T*)pVect2;
- 
-         for(size_t i = 0; i < qty; i++)
-         {
-@@ -294,6 +296,7 @@ namespace hnswlib {
-         return (res);
-     }
- 
-+    template <typename T>
-     class L2SpaceI : public SpaceInterface<int> {
- 
-         DISTFUNC<int> fstdistfunc_;
-@@ -302,10 +305,10 @@ namespace hnswlib {
-     public:
-         L2SpaceI(size_t dim) {
-             if(dim % 4 == 0) {
--                fstdistfunc_ = L2SqrI4x;
-+                fstdistfunc_ = L2SqrI4x<T>;
-             }
-             else {
--                fstdistfunc_ = L2SqrI;
-+                fstdistfunc_ = L2SqrI<T>;
-             }
-             dim_ = dim;
-             data_size_ = dim * sizeof(unsigned char);
-diff --git a/hnswlib/visited_list_pool.h b/hnswlib/visited_list_pool.h
-index 5e1a4a5..4195ebd 100644
---- a/hnswlib/visited_list_pool.h
-+++ b/hnswlib/visited_list_pool.h
-@@ -3,6 +3,7 @@
- #include <mutex>
- #include <string.h>
- #include <deque>
-+#include <limits>
- 
- namespace hnswlib {
-     typedef unsigned short int vl_type;
-@@ -14,7 +15,7 @@ namespace hnswlib {
-         unsigned int numelements;
- 
-         VisitedList(int numelements1) {
--            curV = -1;
-+            curV = std::numeric_limits<vl_type>::max();
-             numelements = numelements1;
-             mass = new vl_type[numelements];
+@@ -252,12 +252,13 @@ class L2Space : public SpaceInterface<float> {
+     ~L2Space() {}
+ };
+ 
++template <typename T>
+ static int
+ L2SqrI4x(const void *__restrict pVect1, const void *__restrict pVect2, const void *__restrict qty_ptr) {
+     size_t qty = *((size_t *) qty_ptr);
+     int res = 0;
+-    unsigned char *a = (unsigned char *) pVect1;
+-    unsigned char *b = (unsigned char *) pVect2;
++    T *a = (T *) pVect1;
++    T *b = (T *) pVect2;
+ 
+     qty = qty >> 2;
+     for (size_t i = 0; i < qty; i++) {
+@@ -277,11 +278,12 @@ L2SqrI4x(const void *__restrict pVect1, const void *__restrict pVect2, const voi
+     return (res);
+ }
+ 
++template <typename T>
+ static int L2SqrI(const void* __restrict pVect1, const void* __restrict pVect2, const void* __restrict qty_ptr) {
+     size_t qty = *((size_t*)qty_ptr);
+     int res = 0;
+-    unsigned char* a = (unsigned char*)pVect1;
+-    unsigned char* b = (unsigned char*)pVect2;
++    T* a = (T*)pVect1;
++    T* b = (T*)pVect2;
+ 
+     for (size_t i = 0; i < qty; i++) {
+         res += ((*a) - (*b)) * ((*a) - (*b));
+@@ -291,6 +293,7 @@ static int L2SqrI(const void* __restrict pVect1, const void* __restrict pVect2,
+     return (res);
+ }
+ 
++template <typename T>
+ class L2SpaceI : public SpaceInterface<int> {
+     DISTFUNC<int> fstdistfunc_;
+     size_t data_size_;
+@@ -299,9 +302,9 @@ class L2SpaceI : public SpaceInterface<int> {
+  public:
+     L2SpaceI(size_t dim) {
+         if (dim % 4 == 0) {
+-            fstdistfunc_ = L2SqrI4x;
++            fstdistfunc_ = L2SqrI4x<T>;
+         } else {
+-            fstdistfunc_ = L2SqrI;
++            fstdistfunc_ = L2SqrI<T>;
          }
--- 
-2.43.0
-
+         dim_ = dim;
+         data_size_ = dim * sizeof(unsigned char);
diff --git a/cpp/cmake/patches/hnswlib_override.json b/cpp/cmake/patches/hnswlib_override.json
index aef2da772..c50220e24 100644
--- a/cpp/cmake/patches/hnswlib_override.json
+++ b/cpp/cmake/patches/hnswlib_override.json
@@ -1,16 +1,16 @@
 {
-    "packages" : {
-      "hnswlib" : {
-        "version": "0.6.2",
-        "git_url": "https://github.com/nmslib/hnswlib.git",
-        "git_tag": "v${version}",
-        "patches" : [
-          {
-            "file" : "${current_json_dir}/hnswlib.diff",
-            "issue" : "Correct compilation issues",
-            "fixed_in" : ""
-          }
-        ]
-      }
+  "packages": {
+    "hnswlib": {
+      "version": "0.7.0",
+      "git_url": "https://github.com/nmslib/hnswlib.git",
+      "git_tag": "v${version}",
+      "patches": [
+        {
+          "file": "${current_json_dir}/hnswlib.diff",
+          "issue": "Correct compilation issues",
+          "fixed_in": ""
+        }
+      ]
     }
-  }
\ No newline at end of file
+  }
+}
\ No newline at end of file
diff --git a/cpp/cmake/thirdparty/get_hnswlib.cmake b/cpp/cmake/thirdparty/get_hnswlib.cmake
index 2e6c895e5..5b4d89aa2 100644
--- a/cpp/cmake/thirdparty/get_hnswlib.cmake
+++ b/cpp/cmake/thirdparty/get_hnswlib.cmake
@@ -15,6 +15,7 @@
 #=============================================================================
 
 function(find_and_configure_hnswlib)
+  message(STATUS "Finding or building hnswlib")
   set(oneValueArgs)
 
   include(${rapids-cmake-dir}/cpm/package_override.cmake)
diff --git a/cpp/include/cuvs/neighbors/hnsw.h b/cpp/include/cuvs/neighbors/hnsw.h
index 0495c574a..b7eda54b8 100644
--- a/cpp/include/cuvs/neighbors/hnsw.h
+++ b/cpp/include/cuvs/neighbors/hnsw.h
@@ -16,6 +16,8 @@
 
 #pragma once
 
+#include "cagra.h"
+
 #include <cuvs/core/c_api.h>
 #include <cuvs/distance/distance.h>
 #include <dlpack/dlpack.h>
@@ -27,32 +29,51 @@ extern "C" {
 #endif
 
 /**
- * @defgroup hnsw_c_search_params C API for hnswlib wrapper search params
+ * @defgroup hnsw_c_index_params C API for HNSW index params
  * @{
  */
 
-struct cuvsHnswSearchParams {
-  int32_t ef;
-  int32_t numThreads;
+/**
+ * @brief Hierarchy for HNSW index when converting from CAGRA index
+ *
+ * NOTE: When the value is `NONE`, the HNSW index is built as a base-layer-only index.
+ */
+enum cuvsHnswHierarchy {
+  /* Flat hierarchy, search is base-layer only */
+  NONE,
+  /* Full hierarchy is built using the CPU */
+  CPU
 };
 
-typedef struct cuvsHnswSearchParams* cuvsHnswSearchParams_t;
+struct cuvsHnswIndexParams {
+  /* hierarchy of the hnsw index */
+  cuvsHnswHierarchy hierarchy;
+  /** Size of the candidate list during hierarchy construction when hierarchy is `CPU`*/
+  int ef_construction;
+  /** Number of host threads to use to construct hierarchy when hierarchy is `CPU`
+  NOTE: Constructing the hierarchy when converting from a CAGRA graph is highly sensitive
+  to parallelism, and increasing the number of threads can reduce the quality of the index.
+   */
+  int num_threads;
+};
+
+typedef struct cuvsHnswIndexParams* cuvsHnswIndexParams_t;
 
 /**
- * @brief Allocate HNSW search params, and populate with default values
+ * @brief Allocate HNSW Index params, and populate with default values
  *
- * @param[in] params cuvsHnswSearchParams_t to allocate
+ * @param[in] params cuvsHnswIndexParams_t to allocate
  * @return cuvsError_t
  */
-cuvsError_t cuvsHnswSearchParamsCreate(cuvsHnswSearchParams_t* params);
+cuvsError_t cuvsHnswIndexParamsCreate(cuvsHnswIndexParams_t* params);
 
 /**
- * @brief De-allocate HNSW search params
+ * @brief De-allocate HNSW Index params
  *
- * @param[in] params cuvsHnswSearchParams_t to de-allocate
+ * @param[in] params
  * @return cuvsError_t
  */
-cuvsError_t cuvsHnswSearchParamsDestroy(cuvsHnswSearchParams_t params);
+cuvsError_t cuvsHnswIndexParamsDestroy(cuvsHnswIndexParams_t params);
 
 /**
  * @}
@@ -90,6 +111,184 @@ cuvsError_t cuvsHnswIndexCreate(cuvsHnswIndex_t* index);
  */
 cuvsError_t cuvsHnswIndexDestroy(cuvsHnswIndex_t index);
 
+/**
+ * @}
+ */
+
+/**
+ * @defgroup hnsw_c_extend_params Parameters for extending HNSW index
+ * @{
+ */
+
+struct cuvsHnswExtendParams {
+  /** Number of CPU threads used to extend additional vectors */
+  int num_threads;
+};
+
+typedef struct cuvsHnswExtendParams* cuvsHnswExtendParams_t;
+
+/**
+ * @brief Allocate HNSW extend params, and populate with default values
+ *
+ * @param[in] params cuvsHnswExtendParams_t to allocate
+ * @return cuvsError_t
+ */
+cuvsError_t cuvsHnswExtendParamsCreate(cuvsHnswExtendParams_t* params);
+
+/**
+ * @brief De-allocate HNSW extend params
+ *
+ * @param[in] params cuvsHnswExtendParams_t to de-allocate
+ * @return cuvsError_t
+ */
+
+cuvsError_t cuvsHnswExtendParamsDestroy(cuvsHnswExtendParams_t params);
+
+/**
+ * @}
+ */
+
+/**
+ * @defgroup hnsw_c_index_load Load CAGRA index as hnswlib index
+ * @{
+ */
+
+/**
+ * @brief Convert a CAGRA Index to an HNSW index.
+ * NOTE: When hierarchy is:
+ *       1. `NONE`: This method uses the filesystem to write the CAGRA index in
+ * `/tmp/<random_number>.bin` before reading it as an hnswlib index, then deleting the temporary
+ * file. The returned index is immutable and can only be searched by the hnswlib wrapper in cuVS, as
+ * the format is not compatible with the original hnswlib.
+ *       2. `CPU`: The returned index is mutable and can be extended with additional vectors. The
+ * serialized index is also compatible with the original hnswlib library.
+ *
+ * @param[in] res cuvsResources_t opaque C handle
+ * @param[in] params cuvsHnswIndexParams_t used to load Hnsw index
+ * @param[in] cagra_index cuvsCagraIndex_t to convert to HNSW index
+ * @param[out] hnsw_index cuvsHnswIndex_t to return the HNSW index
+ *
+ * @return cuvsError_t
+ *
+ * @code{.c}
+ * #include <cuvs/core/c_api.h>
+ * #include <cuvs/neighbors/cagra.h>
+ * #include <cuvs/neighbors/hnsw.h>
+ *
+ * // Create cuvsResources_t
+ * cuvsResources_t res;
+ * cuvsError_t res_create_status = cuvsResourcesCreate(&res);
+ *
+ * // create a CAGRA index with `cuvsCagraBuild`
+ *
+ * // Convert the CAGRA index to an HNSW index
+ * cuvsHnswIndex_t hnsw_index;
+ * cuvsHnswIndexCreate(&hnsw_index);
+ * cuvsHnswIndexParams_t hnsw_params;
+ * cuvsHnswIndexParamsCreate(&hnsw_params);
+ * cuvsHnswFromCagra(res, hnsw_params, cagra_index, hnsw_index);
+ *
+ * // de-allocate `hnsw_params`, `hnsw_index` and `res`
+ * cuvsError_t hnsw_params_destroy_status = cuvsHnswIndexParamsDestroy(hnsw_params);
+ * cuvsError_t hnsw_index_destroy_status = cuvsHnswIndexDestroy(hnsw_index);
+ * cuvsError_t res_destroy_status = cuvsResourcesDestroy(res);
+ * @endcode
+ */
+cuvsError_t cuvsHnswFromCagra(cuvsResources_t res,
+                              cuvsHnswIndexParams_t params,
+                              cuvsCagraIndex_t cagra_index,
+                              cuvsHnswIndex_t hnsw_index);
+
+/**
+ * @}
+ */
+
+/**
+ * @defgroup hnsw_c_index_extend Extend HNSW index with additional vectors
+ * @{
+ */
+
+/**
+ * @brief Add new vectors to an HNSW index
+ * NOTE: The HNSW index can only be extended when the hierarchy is `CPU`
+ *       when converting from a CAGRA index.
+
+ * @param[in] res cuvsResources_t opaque C handle
+ * @param[in] params cuvsHnswExtendParams_t used to extend Hnsw index
+ * @param[in] additional_dataset DLManagedTensor* additional dataset to extend the index
+ * @param[inout] index cuvsHnswIndex_t to extend
+  *
+  * @return cuvsError_t
+  *
+  * @code{.c}
+  * #include <cuvs/core/c_api.h>
+  * #include <cuvs/neighbors/cagra.h>
+  * #include <cuvs/neighbors/hnsw.h>
+  *
+  * // Create cuvsResources_t
+  * cuvsResources_t res;
+  * cuvsError_t res_create_status = cuvsResourcesCreate(&res);
+  *
+  * // create an index with `cuvsCagraBuild`
+  *
+  * // Convert the CAGRA index to an HNSW index
+  * cuvsHnswIndex_t hnsw_index;
+  * cuvsHnswIndexCreate(&hnsw_index);
+  * cuvsHnswIndexParams_t hnsw_params;
+  * cuvsHnswIndexParamsCreate(&hnsw_params);
+  * cuvsHnswFromCagra(res, hnsw_params, cagra_index, hnsw_index);
+  *
+  * // Extend the HNSW index with additional vectors
+  * DLManagedTensor additional_dataset;
+  * cuvsHnswExtendParams_t extend_params;
+  * cuvsHnswExtendParamsCreate(&extend_params);
+  * cuvsHnswExtend(res, extend_params, additional_dataset, hnsw_index);
+  *
+  * // de-allocate `hnsw_params`, `hnsw_index`, `extend_params` and `res`
+  * cuvsError_t hnsw_params_destroy_status = cuvsHnswIndexParamsDestroy(hnsw_params);
+  * cuvsError_t hnsw_index_destroy_status = cuvsHnswIndexDestroy(hnsw_index);
+  * cuvsError_t extend_params_destroy_status = cuvsHnswExtendParamsDestroy(extend_params);
+  * cuvsError_t res_destroy_status = cuvsResourcesDestroy(res);
+  * @endcode
+  */
+
+cuvsError_t cuvsHnswExtend(cuvsResources_t res,
+                           cuvsHnswExtendParams_t params,
+                           DLManagedTensor* additional_dataset,
+                           cuvsHnswIndex_t index);
+
+/**
+ * @}
+ */
+
+/**
+ * @defgroup hnsw_c_search_params C API for hnswlib wrapper search params
+ * @{
+ */
+
+struct cuvsHnswSearchParams {
+  int32_t ef;
+  int32_t num_threads;
+};
+
+typedef struct cuvsHnswSearchParams* cuvsHnswSearchParams_t;
+
+/**
+ * @brief Allocate HNSW search params, and populate with default values
+ *
+ * @param[in] params cuvsHnswSearchParams_t to allocate
+ * @return cuvsError_t
+ */
+cuvsError_t cuvsHnswSearchParamsCreate(cuvsHnswSearchParams_t* params);
+
+/**
+ * @brief De-allocate HNSW search params
+ *
+ * @param[in] params cuvsHnswSearchParams_t to de-allocate
+ * @return cuvsError_t
+ */
+cuvsError_t cuvsHnswSearchParamsDestroy(cuvsHnswSearchParams_t params);
+
 /**
  * @}
  */
@@ -111,8 +310,8 @@ cuvsError_t cuvsHnswIndexDestroy(cuvsHnswIndex_t index);
  *          c. `kDLDataType.code == kDLUInt` and `kDLDataType.bits = 8`
  *        2. `neighbors`: `kDLDataType.code == kDLUInt` and `kDLDataType.bits = 64`
  *        3. `distances`: `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32`
- * NOTE: The HNSW index can only be searched by the hnswlib wrapper in cuVS,
- *       as the format is not compatible with the original hnswlib.
+ * NOTE: When hierarchy is `NONE`, the HNSW index can only be searched by the hnswlib wrapper in
+ * cuVS, as the format is not compatible with the original hnswlib.
  *
  * @code {.c}
  * #include <cuvs/core/c_api.h>
@@ -131,7 +330,7 @@ cuvsError_t cuvsHnswIndexDestroy(cuvsHnswIndex_t index);
  * cuvsHnswSearchParams_t params;
  * cuvsError_t params_create_status = cuvsHnswSearchParamsCreate(&params);
  *
- * // Search the `index` built using `cuvsHnswBuild`
+ * // Search the `index` built using `cuvsHnswFromCagra`
  * cuvsError_t search_status = cuvsHnswSearch(res, params, index, &queries, &neighbors,
  * &distances);
  *
@@ -142,7 +341,7 @@ cuvsError_t cuvsHnswIndexDestroy(cuvsHnswIndex_t index);
  *
  * @param[in] res cuvsResources_t opaque C handle
  * @param[in] params cuvsHnswSearchParams_t used to search Hnsw index
- * @param[in] index cuvsHnswIndex which has been returned by `cuvsHnswBuild`
+ * @param[in] index cuvsHnswIndex which has been returned by `cuvsHnswFromCagra`
  * @param[in] queries DLManagedTensor* queries dataset to search
  * @param[out] neighbors DLManagedTensor* output `k` neighbors for queries
  * @param[out] distances DLManagedTensor* output `k` distances for queries
@@ -163,9 +362,50 @@ cuvsError_t cuvsHnswSearch(cuvsResources_t res,
  * @{
  */
 
+/**
+ * @brief Serialize a CAGRA index to a file as an hnswlib index
+ * NOTE: When hierarchy is `NONE`, the saved hnswlib index is immutable and can only be read by the
+ * hnswlib wrapper in cuVS, as the serialization format is not compatible with the original hnswlib.
+ * However, when hierarchy is `CPU`, the saved hnswlib index is compatible with the original hnswlib
+ * library.
+ *
+ * @param[in] res cuvsResources_t opaque C handle
+ * @param[in] filename the name of the file to save the index
+ * @param[in] index cuvsHnswIndex_t to serialize
+ * @return cuvsError_t
+ *
+ * @code{.c}
+ * #include <cuvs/core/c_api.h>
+ * #include <cuvs/neighbors/cagra.h>
+ * #include <cuvs/neighbors/hnsw.h>
+ *
+ * // Create cuvsResources_t
+ * cuvsResources_t res;
+ * cuvsError_t res_create_status = cuvsResourcesCreate(&res);
+ *
+ * // create an index with `cuvsCagraBuild`
+ *
+ * // Convert the CAGRA index to an HNSW index
+ * cuvsHnswIndex_t hnsw_index;
+ * cuvsHnswIndexCreate(&hnsw_index);
+ * cuvsHnswIndexParams_t hnsw_params;
+ * cuvsHnswIndexParamsCreate(&hnsw_params);
+ * cuvsHnswFromCagra(res, hnsw_params, cagra_index, hnsw_index);
+ *
+ * // Serialize the HNSW index
+ * cuvsHnswSerialize(res, "/path/to/index", hnsw_index);
+ *
+ * // de-allocate `hnsw_params`, `hnsw_index` and `res`
+ * cuvsError_t hnsw_params_destroy_status = cuvsHnswIndexParamsDestroy(hnsw_params);
+ * cuvsError_t hnsw_index_destroy_status = cuvsHnswIndexDestroy(hnsw_index);
+ * cuvsError_t res_destroy_status = cuvsResourcesDestroy(res);
+ * @endcode
+ */
+cuvsError_t cuvsHnswSerialize(cuvsResources_t res, const char* filename, cuvsHnswIndex_t index);
+
 /**
  * Load hnswlib index from file which was serialized from a HNSW index.
- * NOTE: The loaded hnswlib index is immutable, and only be read by the
+ * NOTE: When hierarchy is `NONE`, the loaded hnswlib index is immutable, and only be read by the
  * hnswlib wrapper in cuVS, as the serialization format is not compatible with the original hnswlib.
  * Experimental, both the API and the serialization format are subject to change.
  *
@@ -185,17 +425,22 @@ cuvsError_t cuvsHnswSearch(cuvsResources_t res,
  * // The index should have the same dtype as the one used to build CAGRA the index
  * cuvsHnswIndex_t hnsw_index;
  * cuvsHnswIndexCreate(&hnsw_index);
+ * cuvsHnsWIndexParams_t hnsw_params;
+ * cuvsHnswIndexParamsCreate(&hnsw_params);
+ * hnsw_params->hierarchy = NONE;
  * hnsw_index->dtype = index->dtype;
- * cuvsCagraDeserialize(res, "/path/to/index", hnsw_index);
+ * cuvsHnswDeserialize(res, hnsw_params, "/path/to/index", dim, metric hnsw_index);
  * @endcode
  *
  * @param[in] res cuvsResources_t opaque C handle
+ * @param[in] params cuvsHnswIndexParams_t used to load Hnsw index
  * @param[in] filename the name of the file that stores the index
  * @param[in] dim the dimension of the vectors in the index
  * @param[in] metric the distance metric used to build the index
  * @param[out] index HNSW index loaded disk
  */
 cuvsError_t cuvsHnswDeserialize(cuvsResources_t res,
+                                cuvsHnswIndexParams_t params,
                                 const char* filename,
                                 int dim,
                                 cuvsDistanceType metric,
diff --git a/cpp/include/cuvs/neighbors/hnsw.hpp b/cpp/include/cuvs/neighbors/hnsw.hpp
index d5abd6d55..f0b433d8e 100644
--- a/cpp/include/cuvs/neighbors/hnsw.hpp
+++ b/cpp/include/cuvs/neighbors/hnsw.hpp
@@ -34,14 +34,30 @@
 namespace cuvs::neighbors::hnsw {
 
 /**
- * @defgroup hnsw_cpp_search_params Build CAGRA index and search with hnswlib
+ * @defgroup hnsw_cpp_index_params hnswlib index wrapper params
  * @{
  */
 
-struct search_params : cuvs::neighbors::search_params {
-  int ef;               // size of the candidate list
-  int num_threads = 0;  // number of host threads to use for concurrent searches. Value of 0
-                        // automatically maximizes parallelism
+/**
+ * @brief Hierarchy for HNSW index when converting from CAGRA index
+ *
+ * NOTE: When the value is `NONE`, the HNSW index is built as a base-layer-only index.
+ */
+enum class HnswHierarchy {
+  NONE,  // base-layer-only index
+  CPU    // full index with CPU-built hierarchy
+};
+
+struct index_params : cuvs::neighbors::index_params {
+  /** Hierarchy build type for HNSW index when converting from CAGRA index */
+  HnswHierarchy hierarchy = HnswHierarchy::NONE;
+  /** Size of the candidate list during hierarchy construction when hierarchy is `CPU`*/
+  int ef_construction = 200;
+  /** Number of host threads to use to construct hierarchy when hierarchy is `CPU`
+  NOTE: Constructing the hierarchy when converting from a CAGRA graph is highly sensitive
+  to parallelism, and increasing the number of threads can reduce the quality of the index.
+   */
+  int num_threads = 2;
 };
 
 /**@}*/
@@ -62,8 +78,12 @@ struct index : cuvs::neighbors::index {
    *
    * @param[in] dim dimensions of the training dataset
    * @param[in] metric distance metric to search. Supported metrics ("L2Expanded", "InnerProduct")
+   * @param[in] hierarchy hierarchy used for upper HNSW layers
    */
-  index(int dim, cuvs::distance::DistanceType metric) : dim_{dim}, metric_{metric} {}
+  index(int dim, cuvs::distance::DistanceType metric, HnswHierarchy hierarchy = HnswHierarchy::NONE)
+    : dim_{dim}, metric_{metric}, hierarchy_{hierarchy}
+  {
+  }
 
   virtual ~index() {}
 
@@ -76,6 +96,8 @@ struct index : cuvs::neighbors::index {
 
   auto metric() const -> cuvs::distance::DistanceType { return metric_; }
 
+  auto hierarchy() const -> HnswHierarchy { return hierarchy_; }
+
   /**
   @brief Set ef for search
   */
@@ -84,24 +106,41 @@ struct index : cuvs::neighbors::index {
  private:
   int dim_;
   cuvs::distance::DistanceType metric_;
+  HnswHierarchy hierarchy_;
 };
 
 /**@}*/
 
+/**
+ * @defgroup hnsw_cpp_extend_params HNSW index extend parameters
+ * @{
+ */
+
+struct extend_params {
+  /** Number of host threads to use to add additional vectors to the index.
+  Value of 0 automatically maximizes parallelism. */
+  int num_threads = 0;
+};
+
 /**
  * @defgroup hnsw_cpp_index_load Load CAGRA index as hnswlib index
  * @{
  */
 
 /**
- * @brief Construct an immutable hnswlib base-layer-only index from a CAGRA index
- * NOTE: This method uses the filesystem to write the CAGRA index in `/tmp/<random_number>.bin`
- * before reading it as an hnswlib index, then deleting the temporary file. The returned index
- * is immutable and can only be searched by the hnswlib wrapper in cuVS, as the format is not
- * compatible with the original hnswlib.
+ * @brief Construct an hnswlib index from a CAGRA index
+ * NOTE: When `hnsw::index_params.hierarchy` is:
+ *       1. `NONE`: This method uses the filesystem to write the CAGRA index in
+ * `/tmp/<random_number>.bin` before reading it as an hnswlib index, then deleting the temporary
+ * file. The returned index is immutable and can only be searched by the hnswlib wrapper in cuVS, as
+ * the format is not compatible with the original hnswlib.
+ *       2. `CPU`: The returned index is mutable and can be extended with additional vectors. The
+ * serialized index is also compatible with the original hnswlib library.
  *
  * @param[in] res raft resources
+ * @param[in] params hnsw index parameters
  * @param[in] cagra_index cagra index
+ * @param[in] dataset optional dataset to avoid extra memory copy when hierarchy is `CPU`
  *
  * Usage example:
  * @code{.cpp}
@@ -110,24 +149,34 @@ struct index : cuvs::neighbors::index {
  *   // use default index parameters
  *   cagra::index_params index_params;
  *   // create and fill the index from a [N, D] dataset
- *   auto index = cagra::build<float, uint32_t>(res, index_params, dataset);
+ *   auto index = cagra::build(res, index_params, dataset);
  *
- *   // Load CAGRA index as base-layer-only hnswlib index
- *   auto hnsw_index = hnsw::from_cagra(res, index);
+ *   // Load CAGRA index as an HNSW index
+ *   hnsw::index_params hnsw_params;
+ *   auto hnsw_index = hnsw::from_cagra(res, hnsw_params, index);
  * @endcode
  */
 std::unique_ptr<index<float>> from_cagra(
-  raft::resources const& res, const cuvs::neighbors::cagra::index<float, uint32_t>& cagra_index);
+  raft::resources const& res,
+  const index_params& params,
+  const cuvs::neighbors::cagra::index<float, uint32_t>& cagra_index,
+  std::optional<raft::host_matrix_view<const float, int64_t, raft::row_major>> dataset =
+    std::nullopt);
 
 /**
- * @brief Construct an immutable hnswlib base-layer-only index from a CAGRA index
- * NOTE: This method uses the filesystem to write the CAGRA index in `/tmp/<random_number>.bin`
- * before reading it as an hnswlib index, then deleting the temporary file.  The returned index
- * is immutable and can only be searched by the hnswlib wrapper in cuVS, as the format is not
- * compatible with the original hnswlib.
+ * @brief Construct an hnswlib index from a CAGRA index
+ * NOTE: When `hnsw::index_params.hierarchy` is:
+ *       1. `NONE`: This method uses the filesystem to write the CAGRA index in
+ * `/tmp/<random_number>.bin` before reading it as an hnswlib index, then deleting the temporary
+ * file. The returned index is immutable and can only be searched by the hnswlib wrapper in cuVS, as
+ * the format is not compatible with the original hnswlib.
+ *       2. `CPU`: The returned index is mutable and can be extended with additional vectors. The
+ * serialized index is also compatible with the original hnswlib library.
  *
  * @param[in] res raft resources
+ * @param[in] params hnsw index parameters
  * @param[in] cagra_index cagra index
+ * @param[in] dataset optional dataset to avoid extra memory copy when hierarchy is `CPU`
  *
  * Usage example:
  * @code{.cpp}
@@ -136,24 +185,34 @@ std::unique_ptr<index<float>> from_cagra(
  *   // use default index parameters
  *   cagra::index_params index_params;
  *   // create and fill the index from a [N, D] dataset
- *   auto index = cagra::build<uint8_t, uint32_t>(res, index_params, dataset);
+ *   auto index = cagra::build(res, index_params, dataset);
  *
- *   // Load CAGRA index as base-layer-only hnswlib index
- *   auto hnsw_index = hnsw::from_cagra(res, index);
+ *   // Load CAGRA index as an HNSW index
+ *   hnsw::index_params hnsw_params;
+ *   auto hnsw_index = hnsw::from_cagra(res, hnsw_params, index);
  * @endcode
  */
 std::unique_ptr<index<uint8_t>> from_cagra(
-  raft::resources const& res, const cuvs::neighbors::cagra::index<uint8_t, uint32_t>& cagra_index);
+  raft::resources const& res,
+  const index_params& params,
+  const cuvs::neighbors::cagra::index<uint8_t, uint32_t>& cagra_index,
+  std::optional<raft::host_matrix_view<const uint8_t, int64_t, raft::row_major>> dataset =
+    std::nullopt);
 
 /**
- * @brief Construct an immutable hnswlib base-layer-only index from a CAGRA index
- * NOTE: This method uses the filesystem to write the CAGRA index in `/tmp/<random_number>.bin`
- * before reading it as an hnswlib index, then deleting the temporary file.  The returned index
- * is immutable and can only be searched by the hnswlib wrapper in cuVS, as the format is not
- * compatible with the original hnswlib.
+ * @brief Construct an hnswlib index from a CAGRA index
+ * NOTE: When `hnsw::index_params.hierarchy` is:
+ *       1. `NONE`: This method uses the filesystem to write the CAGRA index in
+ * `/tmp/<random_number>.bin` before reading it as an hnswlib index, then deleting the temporary
+ * file. The returned index is immutable and can only be searched by the hnswlib wrapper in cuVS, as
+ * the format is not compatible with the original hnswlib.
+ *       2. `CPU`: The returned index is mutable and can be extended with additional vectors. The
+ * serialized index is also compatible with the original hnswlib library.
  *
  * @param[in] res raft resources
+ * @param[in] params hnsw index parameters
  * @param[in] cagra_index cagra index
+ * @param[in] dataset optional dataset to avoid extra memory copy when hierarchy is `CPU`
  *
  * Usage example:
  * @code{.cpp}
@@ -162,14 +221,138 @@ std::unique_ptr<index<uint8_t>> from_cagra(
  *   // use default index parameters
  *   cagra::index_params index_params;
  *   // create and fill the index from a [N, D] dataset
- *   auto index = cagra::build<int8_t, uint32_t>(res, index_params, dataset);
+ *   auto index = cagra::build(res, index_params, dataset);
  *
- *   // Load CAGRA index as base-layer-only hnswlib index
- *   auto hnsw_index = hnsw::from_cagra(res, index);
+ *   // Load CAGRA index as an HNSW index
+ *   hnsw::index_params hnsw_params;
+ *   auto hnsw_index = hnsw::from_cagra(res, hnsw_params, index);
  * @endcode
  */
 std::unique_ptr<index<int8_t>> from_cagra(
-  raft::resources const& res, const cuvs::neighbors::cagra::index<int8_t, uint32_t>& cagra_index);
+  raft::resources const& res,
+  const index_params& params,
+  const cuvs::neighbors::cagra::index<int8_t, uint32_t>& cagra_index,
+  std::optional<raft::host_matrix_view<const int8_t, int64_t, raft::row_major>> dataset =
+    std::nullopt);
+
+/**@}*/
+
+/**
+ * @defgroup hnsw_cpp_index_extend Extend HNSW index with additional vectors
+ * @{
+ */
+
+/**
+ * @brief Add new vectors to an HNSW index
+ * NOTE: The HNSW index can only be extended when the `hnsw::index_params.hierarchy` is `CPU`
+ *       when converting from a CAGRA index.
+ *
+ * @param[in] res raft resources
+ * @param[in] params configure the extend
+ * @param[in] additional_dataset a host matrix view to a row-major matrix [n_rows, index->dim()]
+ * @param[inout] idx HNSW index to extend
+ *
+ * Usage example:
+ * @code{.cpp}
+ *   // Build a CAGRA index
+ *   using namespace cuvs::neighbors;
+ *   cagra::index_params index_params;
+ *   // create and fill the index from a [N, D] dataset
+ *   auto index = cagra::build(res, index_params, dataset);
+ *
+ *   // Load CAGRA index as an HNSW index
+ *   hnsw::index_params hnsw_params;
+ *   hnsw_params.hierarchy = hnsw::HnswHierarchy::CPU;
+ *   auto hnsw_index = hnsw::from_cagra(res, hnsw_params, index);
+ *
+ *   // Extend the HNSW index with additional vectors
+ *   auto additional_dataset = raft::make_host_matrix<float>(res, add_size, index->dim());
+ *   hnsw::extend_params extend_params;
+ *   hnsw::extend(res, extend_params, additional_dataset, *hnsw_index.get());
+ */
+void extend(raft::resources const& res,
+            const extend_params& params,
+            raft::host_matrix_view<const float, int64_t, raft::row_major> additional_dataset,
+            index<float>& idx);
+
+/**
+ * @brief Add new vectors to an HNSW index
+ * NOTE: The HNSW index can only be extended when the `hnsw::index_params.hierarchy` is `CPU`
+ *       when converting from a CAGRA index.
+ *
+ * @param[in] res raft resources
+ * @param[in] params configure the extend
+ * @param[in] additional_dataset a host matrix view to a row-major matrix [n_rows, index->dim()]
+ * @param[inout] idx HNSW index to extend
+ *
+ * Usage example:
+ * @code{.cpp}
+ *   // Build a CAGRA index
+ *   using namespace cuvs::neighbors;
+ *   cagra::index_params index_params;
+ *   // create and fill the index from a [N, D] dataset
+ *   auto index = cagra::build(res, index_params, dataset);
+ *
+ *   // Load CAGRA index as an HNSW index
+ *   hnsw::index_params hnsw_params;
+ *   hnsw_params.hierarchy = hnsw::HnswHierarchy::CPU;
+ *   auto hnsw_index = hnsw::from_cagra(res, hnsw_params, index);
+ *
+ *   // Extend the HNSW index with additional vectors
+ *   auto additional_dataset = raft::make_host_matrix<float>(res, add_size, index->dim());
+ *   hnsw::extend_params extend_params;
+ *   hnsw::extend(res, extend_params, additional_dataset, *hnsw_index.get());
+ */
+void extend(raft::resources const& res,
+            const extend_params& params,
+            raft::host_matrix_view<const uint8_t, int64_t, raft::row_major> additional_dataset,
+            index<uint8_t>& idx);
+
+/**
+ * @brief Add new vectors to an HNSW index
+ * NOTE: The HNSW index can only be extended when the `hnsw::index_params.hierarchy` is `CPU`
+ *       when converting from a CAGRA index.
+ *
+ * @param[in] res raft resources
+ * @param[in] params configure the extend
+ * @param[in] additional_dataset a host matrix view to a row-major matrix [n_rows, index->dim()]
+ * @param[inout] idx HNSW index to extend
+ *
+ * Usage example:
+ * @code{.cpp}
+ *   // Build a CAGRA index
+ *   using namespace cuvs::neighbors;
+ *   cagra::index_params index_params;
+ *   // create and fill the index from a [N, D] dataset
+ *   auto index = cagra::build(res, index_params, dataset);
+ *
+ *   // Load CAGRA index as an HNSW index
+ *   hnsw::index_params hnsw_params;
+ *   hnsw_params.hierarchy = hnsw::HnswHierarchy::CPU;
+ *   auto hnsw_index = hnsw::from_cagra(res, hnsw_params, index);
+ *
+ *   // Extend the HNSW index with additional vectors
+ *   auto additional_dataset = raft::make_host_matrix<float>(res, add_size, index->dim());
+ *   hnsw::extend_params extend_params;
+ *   hnsw::extend(res, extend_params, additional_dataset, *hnsw_index.get());
+ */
+void extend(raft::resources const& res,
+            const extend_params& params,
+            raft::host_matrix_view<const int8_t, int64_t, raft::row_major> additional_dataset,
+            index<int8_t>& idx);
+
+/**@} */
+
+/**
+ * @defgroup hnsw_cpp_search_params Build CAGRA index and search with hnswlib
+ * @{
+ */
+
+struct search_params : cuvs::neighbors::search_params {
+  int ef;               // size of the candidate list
+  int num_threads = 0;  // number of host threads to use for concurrent searches. Value of 0
+                        // automatically maximizes parallelism
+};
 
 /**@}*/
 
@@ -181,9 +364,9 @@ std::unique_ptr<index<int8_t>> from_cagra(
  */
 
 /**
- * @brief Search hnswlib base-layer-only index constructed from a CAGRA index
- * NOTE: The HNSW index can only be searched by the hnswlib wrapper in cuVS,
- *       as the format is not compatible with the original hnswlib.
+ * @brief Search HNSW index constructed from a CAGRA index
+ * NOTE: The HNSW index can only be searched by the hnswlib wrapper in cuVS when the hierarchy is
+ * `NONE`, as the format is not compatible with the original hnswlib.
  *
  * @param[in] res raft resources
  * @param[in] params configure the search
@@ -201,10 +384,11 @@ std::unique_ptr<index<int8_t>> from_cagra(
  *   // use default index parameters
  *   cagra::index_params index_params;
  *   // create and fill the index from a [N, D] dataset
- *   auto index = cagra::build<float, uint32_t>(res, index_params, dataset);
+ *   auto index = cagra::build(res, index_params, dataset);
  *
- *   // Load CAGRA index as a base-layer HNSW index using the filesystem
- *   auto hnsw_index = hnsw::from_cagra(res, index);
+ *   // Load CAGRA index as an HNSW index
+ *   hnsw::index_params hnsw_params;
+ *   auto hnsw_index = hnsw::from_cagra(res, hnsw_params, index);
  *
  *   // Search K nearest neighbors as an hnswlib index
  *   // using host threads for concurrency
@@ -224,9 +408,9 @@ void search(raft::resources const& res,
             raft::host_matrix_view<float, int64_t, raft::row_major> distances);
 
 /**
- * @brief Search hnswlib base-layer-only index constructed from a CAGRA index
- * NOTE: The HNSW index can only be searched by the hnswlib wrapper in cuVS,
- *       as the format is not compatible with the original hnswlib.
+ * @brief Search HNSWindex constructed from a CAGRA index
+ * NOTE: The HNSW index can only be searched by the hnswlib wrapper in cuVS when the hierarchy is
+ * `NONE`, as the format is not compatible with the original hnswlib.
  *
  * @param[in] res raft resources
  * @param[in] params configure the search
@@ -244,10 +428,11 @@ void search(raft::resources const& res,
  *   // use default index parameters
  *   cagra::index_params index_params;
  *   // create and fill the index from a [N, D] dataset
- *   auto index = cagra::build<uint8_t, uint32_t>(res, index_params, dataset);
+ *   auto index = cagra::build(res, index_params, dataset);
  *
- *   // Load CAGRA index as a base-layer HNSW index using the filesystem
- *   auto hnsw_index = hnsw::from_cagra(res, index);
+ *   // Load CAGRA index as an HNSW index
+ *   hnsw::index_params hnsw_params;
+ *   auto hnsw_index = hnsw::from_cagra(res, hnsw_params, index);
  *
  *   // Search K nearest neighbors as an hnswlib index
  *   // using host threads for concurrency
@@ -267,9 +452,9 @@ void search(raft::resources const& res,
             raft::host_matrix_view<float, int64_t, raft::row_major> distances);
 
 /**
- * @brief Search hnswlib base-layer-only index constructed from a CAGRA index
- * NOTE: The HNSW index can only be searched by the hnswlib wrapper in cuVS,
- *       as the format is not compatible with the original hnswlib.
+ * @brief Search HNSW index constructed from a CAGRA index
+ * NOTE: The HNSW index can only be searched by the hnswlib wrapper in cuVS when the hierarchy is
+ * `NONE`, as the format is not compatible with the original hnswlib.
  *
  * @param[in] res raft resources
  * @param[in] params configure the search
@@ -287,10 +472,11 @@ void search(raft::resources const& res,
  *   // use default index parameters
  *   cagra::index_params index_params;
  *   // create and fill the index from a [N, D] dataset
- *   auto index = cagra::build<int8_t, uint32_t>(res, index_params, dataset);
+ *   auto index = cagra::build(res, index_params, dataset);
  *
- *   // Load CAGRA index as a base-layer HNSW index using the filesystem
- *   auto hnsw_index = hnsw::from_cagra(res, index);
+ *   // Load CAGRA index as an HNSW index
+ *   hnsw::index_params hnsw_params;
+ *   auto hnsw_index = hnsw::from_cagra(res, hnsw_params, index);
  *
  *   // Search K nearest neighbors as an hnswlib index
  *   // using host threads for concurrency
@@ -312,16 +498,106 @@ void search(raft::resources const& res,
 /**@}*/
 
 /**
- * @defgroup hnsw_cpp_index_deserialize Deserialize CAGRA index as hnswlib index
+ * @defgroup hnsw_cpp_index_serialize Deserialize CAGRA index as hnswlib index
  * @{
  */
 
+/**
+ * @brief Serialize a CAGRA index to a file as an hnswlib index
+ * NOTE: When hierarchy is `NONE`, the saved hnswlib index is immutable and can only be read by the
+ * hnswlib wrapper in cuVS, as the serialization format is not compatible with the original hnswlib.
+ * However, when hierarchy is `CPU`, the saved hnswlib index is compatible with the original hnswlib
+ * library.
+ *
+ * @param[in] res raft resources
+ * @param[in] filename path to the file to save the serialized CAGRA index
+ * @param[in] idx cagra index
+ *
+ * Usage example:
+ * @code{.cpp}
+ *   // Build a CAGRA index
+ *   using namespace cuvs::neighbors;
+ *   // use default index parameters
+ *   cagra::index_params index_params;
+ *   // create and fill the index from a [N, D] dataset
+ *   auto index = cagra::build(res, index_params, dataset);
+ *
+ *   // Load CAGRA index as an HNSW index
+ *   hnsw::index_params hnsw_params;
+ *   auto hnsw_index = hnsw::from_cagra(res, hnsw_params, index);
+ *   // Save the index
+ *   hnsw::serialize(res, "index.bin", index);
+ * @endcode
+ */
+void serialize(raft::resources const& res, const std::string& filename, const index<float>& idx);
+
+/**
+ * @brief Serialize a CAGRA index to a file as an hnswlib index
+ * NOTE: When hierarchy is `NONE`, the saved hnswlib index is immutable and can only be read by the
+ * hnswlib wrapper in cuVS, as the serialization format is not compatible with the original hnswlib.
+ * However, when hierarchy is `CPU`, the saved hnswlib index is compatible with the original hnswlib
+ * library.
+ *
+ * @param[in] res raft resources
+ * @param[in] filename path to the file to save the serialized CAGRA index
+ * @param[in] idx cagra index
+ *
+ * Usage example:
+ * @code{.cpp}
+ *   // Build a CAGRA index
+ *   using namespace cuvs::neighbors;
+ *   // use default index parameters
+ *   cagra::index_params index_params;
+ *   // create and fill the index from a [N, D] dataset
+ *   auto index = cagra::build(res, index_params, dataset);
+ *
+ *   // Load CAGRA index as an HNSW index
+ *   hnsw::index_params hnsw_params;
+ *   auto hnsw_index = hnsw::from_cagra(res, hnsw_params, index);
+ *   // Save the index
+ *   hnsw::serialize(res, "index.bin", index);
+ * @endcode
+ */
+void serialize(raft::resources const& res, const std::string& filename, const index<uint8_t>& idx);
+
+/**
+ * @brief Serialize a CAGRA index to a file as an hnswlib index
+ * NOTE: When hierarchy is `NONE`, the saved hnswlib index is immutable and can only be read by the
+ * hnswlib wrapper in cuVS, as the serialization format is not compatible with the original hnswlib.
+ * However, when hierarchy is `CPU`, the saved hnswlib index is compatible with the original hnswlib
+ * library.
+ *
+ * @param[in] res raft resources
+ * @param[in] filename path to the file to save the serialized CAGRA index
+ * @param[in] idx cagra index
+ *
+ * Usage example:
+ * @code{.cpp}
+ *   // Build a CAGRA index
+ *   using namespace cuvs::neighbors;
+ *   // use default index parameters
+ *   cagra::index_params index_params;
+ *   // create and fill the index from a [N, D] dataset
+ *   auto index = cagra::build(res, index_params, dataset);
+ *
+ *   // Load CAGRA index as an HNSW index
+ *   hnsw::index_params hnsw_params;
+ *   auto hnsw_index = hnsw::from_cagra(res, hnsw_params, index);
+ *   // Save the index
+ *   hnsw::serialize(res, "index.bin", index);
+ * @endcode
+ */
+void serialize(raft::resources const& res, const std::string& filename, const index<int8_t>& idx);
+
 /**
  * @brief De-serialize a CAGRA index saved to a file as an hnswlib index
- * NOTE: The loaded hnswlib index is immutable, and only be read by the
+ * NOTE: When hierarchy is `NONE`, the saved hnswlib index is immutable and can only be read by the
  * hnswlib wrapper in cuVS, as the serialization format is not compatible with the original hnswlib.
+ * However, when hierarchy is `CPU`, the saved hnswlib index is compatible with the original hnswlib
+ * library.
  *
  * @param[in] res raft resources
+ * @param[in] params hnsw index parameters
  * @param[in] filename path to the file containing the serialized CAGRA index
  * @param[in] dim dimensions of the training dataset
  * @param[in] metric distance metric to search. Supported metrics ("L2Expanded", "InnerProduct")
@@ -334,19 +610,23 @@ void search(raft::resources const& res,
  *   // use default index parameters
  *   cagra::index_params index_params;
  *   // create and fill the index from a [N, D] dataset
- *   auto index = cagra::build<float, uint32_t>(res, index_params, dataset);
+ *   auto index = cagra::build(res, index_params, dataset);
  *
- *   // save a CAGRA index to a file
- *   cagra::serialize(res, index, "index.bin");
- *   // De-serialize a CAGRA index as a base-layer HNSW index using the filesystem
- *   index<float>* hnsw_index = nullptr;
- *   hnsw::deserialize(res, "index.bin", index->dim(), index->metric(), &hnsw_index);
+ *   // Load CAGRA index as an HNSW index
+ *  hnsw::index_params hnsw_params;
+ *  auto hnsw_index = hnsw::from_cagra(res, hnsw_params, index);
+ *  // save HNSW index to a file
+ *  hnsw::serialize(res, "index.bin", hnsw_index);
+ *  // De-serialize the HNSW index
+ *  index<float>* hnsw_index = nullptr;
+ *  hnsw::deserialize(res, hnsw_params, "index.bin", index->dim(), index->metric(), &hnsw_index);
  *
  *   // Delete index after use
  *   delete hnsw_index;
  * @endcode
  */
 void deserialize(raft::resources const& res,
+                 const index_params& params,
                  const std::string& filename,
                  int dim,
                  cuvs::distance::DistanceType metric,
@@ -354,10 +634,13 @@ void deserialize(raft::resources const& res,
 
 /**
  * @brief De-serialize a CAGRA index saved to a file as an hnswlib index
- * NOTE: The loaded hnswlib index is immutable, and only be read by the
+ * NOTE: When hierarchy is `NONE`, the saved hnswlib index is immutable and can only be read by the
  * hnswlib wrapper in cuVS, as the serialization format is not compatible with the original hnswlib.
+ * However, when hierarchy is `CPU`, the saved hnswlib index is compatible with the original hnswlib
+ * library.
  *
  * @param[in] res raft resources
+ * @param[in] params hnsw index parameters
  * @param[in] filename path to the file containing the serialized CAGRA index
  * @param[in] dim dimensions of the training dataset
  * @param[in] metric distance metric to search. Supported metrics ("L2Expanded", "InnerProduct")
@@ -370,19 +653,23 @@ void deserialize(raft::resources const& res,
  *   // use default index parameters
  *   cagra::index_params index_params;
  *   // create and fill the index from a [N, D] dataset
- *   auto index = cagra::build<uint8_t, uint32_t>(res, index_params, dataset);
+ *   auto index = cagra::build(res, index_params, dataset);
  *
- *   // save a CAGRA index to a file
- *   cagra::serialize(res, index, "index.bin");
- *   // De-serialize a CAGRA index as a base-layer HNSW index using the filesystem
- *   index<uint8_t>* hnsw_index = nullptr;
- *   hnsw::deserialize(res, "index.bin", index->dim(), index->metric(), &hnsw_index);
+ *   // Load CAGRA index as an HNSW index
+ *  hnsw::index_params hnsw_params;
+ *  auto hnsw_index = hnsw::from_cagra(res, hnsw_params, index);
+ *  // save HNSW index to a file
+ *  hnsw::serialize(res, "index.bin", hnsw_index);
+ *  // De-serialize the HNSW index
+ *  index<uint8_t>* hnsw_index = nullptr;
+ *  hnsw::deserialize(res, hnsw_params, "index.bin", index->dim(), index->metric(), &hnsw_index);
  *
  *   // Delete index after use
  *   delete hnsw_index;
  * @endcode
  */
 void deserialize(raft::resources const& res,
+                 const index_params& params,
                  const std::string& filename,
                  int dim,
                  cuvs::distance::DistanceType metric,
@@ -390,10 +677,13 @@ void deserialize(raft::resources const& res,
 
 /**
  * @brief De-serialize a CAGRA index saved to a file as an hnswlib index
- * NOTE: The loaded hnswlib index is immutable, and only be read by the
+ * NOTE: When hierarchy is `NONE`, the saved hnswlib index is immutable and can only be read by the
  * hnswlib wrapper in cuVS, as the serialization format is not compatible with the original hnswlib.
+ * However, when hierarchy is `CPU`, the saved hnswlib index is compatible with the original hnswlib
+ * library.
  *
  * @param[in] res raft resources
+ * @param[in] params hnsw index parameters
  * @param[in] filename path to the file containing the serialized CAGRA index
  * @param[in] dim dimensions of the training dataset
  * @param[in] metric distance metric to search. Supported metrics ("L2Expanded", "InnerProduct")
@@ -406,19 +696,23 @@ void deserialize(raft::resources const& res,
  *   // use default index parameters
  *   cagra::index_params index_params;
  *   // create and fill the index from a [N, D] dataset
- *   auto index = cagra::build<int8_t, uint32_t>(res, index_params, dataset);
+ *   auto index = cagra::build(res, index_params, dataset);
  *
- *   // save a CAGRA index to a file
- *   cagra::serialize(res, index, "index.bin");
- *   // De-serialize a CAGRA index as a base-layer HNSW index using the filesystem
- *   index<int8_t>* hnsw_index = nullptr;
- *   hnsw::deserialize(res, "index.bin", index->dim(), index->metric(), &hnsw_index);
+ *   // Load CAGRA index as an HNSW index
+ *  hnsw::index_params hnsw_params;
+ *  auto hnsw_index = hnsw::from_cagra(res, hnsw_params, index);
+ *  // save HNSW index to a file
+ *  hnsw::serialize(res, "index.bin", hnsw_index);
+ *  // De-serialize the HNSW index
+ *  index<int8_t>* hnsw_index = nullptr;
+ *  hnsw::deserialize(res, hnsw_params, "index.bin", index->dim(), index->metric(), &hnsw_index);
  *
  *   // Delete index after use
  *   delete hnsw_index;
  * @endcode
  */
 void deserialize(raft::resources const& res,
+                 const index_params& params,
                  const std::string& filename,
                  int dim,
                  cuvs::distance::DistanceType metric,
diff --git a/cpp/src/neighbors/detail/hnsw.hpp b/cpp/src/neighbors/detail/hnsw.hpp
index ce1e03264..e129d23e8 100644
--- a/cpp/src/neighbors/detail/hnsw.hpp
+++ b/cpp/src/neighbors/detail/hnsw.hpp
@@ -22,9 +22,63 @@
 #include <hnswlib/hnswlib.h>
 #include <memory>
 #include <random>
+#include <thread>
 
 namespace cuvs::neighbors::hnsw::detail {
 
+// Multithreaded executor
+// The helper function is copied from the hnswlib repository
+// as for some reason, adding vectors to the hnswlib index does not
+// work well with omp parallel for
+template <class Function>
+inline void ParallelFor(size_t start, size_t end, size_t numThreads, Function fn)
+{
+  if (numThreads <= 0) { numThreads = std::thread::hardware_concurrency(); }
+
+  if (numThreads == 1) {
+    for (size_t id = start; id < end; id++) {
+      fn(id, 0);
+    }
+  } else {
+    std::vector<std::thread> threads;
+    std::atomic<size_t> current(start);
+
+    // keep track of exceptions in threads
+    // https://stackoverflow.com/a/32428427/1713196
+    std::exception_ptr lastException = nullptr;
+    std::mutex lastExceptMutex;
+
+    for (size_t threadId = 0; threadId < numThreads; ++threadId) {
+      threads.push_back(std::thread([&, threadId] {
+        while (true) {
+          size_t id = current.fetch_add(1);
+
+          if (id >= end) { break; }
+
+          try {
+            fn(id, threadId);
+          } catch (...) {
+            std::unique_lock<std::mutex> lastExcepLock(lastExceptMutex);
+            lastException = std::current_exception();
+            /*
+             * This will work even when current is the largest value that
+             * size_t can fit, because fetch_add returns the previous value
+             * before the increment (what will result in overflow
+             * and produce 0 instead of current + 1).
+             */
+            current = end;
+            break;
+          }
+        }
+      }));
+    }
+    for (auto& thread : threads) {
+      thread.join();
+    }
+    if (lastException) { std::rethrow_exception(lastException); }
+  }
+}
+
 template <typename T>
 struct hnsw_dist_t {
   using type = void;
@@ -54,9 +108,10 @@ struct index_impl : index<T> {
    * @param[in] filepath path to the index
    * @param[in] dim dimensions of the training dataset
    * @param[in] metric distance metric to search. Supported metrics ("L2Expanded", "InnerProduct")
+   * @param[in] hierarchy hierarchy used for upper HNSW layers
    */
-  index_impl(const std::string& filepath, int dim, cuvs::distance::DistanceType metric)
-    : index<T>{dim, metric}
+  index_impl(int dim, cuvs::distance::DistanceType metric, HnswHierarchy hierarchy)
+    : index<T>{dim, metric, hierarchy}
   {
     if constexpr (std::is_same_v<T, float>) {
       if (metric == cuvs::distance::DistanceType::L2Expanded) {
@@ -71,11 +126,6 @@ struct index_impl : index<T> {
     }
 
     RAFT_EXPECTS(space_ != nullptr, "Unsupported metric type was used");
-
-    appr_alg_ = std::make_unique<hnswlib::HierarchicalNSW<typename hnsw_dist_t<T>::type>>(
-      space_.get(), filepath);
-
-    appr_alg_->base_layer_only = true;
   }
 
   /**
@@ -88,14 +138,32 @@ struct index_impl : index<T> {
   */
   void set_ef(int ef) const override { appr_alg_->ef_ = ef; }
 
+  /**
+  @brief Set index
+   */
+  void set_index(std::unique_ptr<hnswlib::HierarchicalNSW<typename hnsw_dist_t<T>::type>>&& index)
+  {
+    appr_alg_ = std::move(index);
+  }
+
+  /**
+  @brief Get space
+   */
+  auto get_space() const -> hnswlib::SpaceInterface<typename hnsw_dist_t<T>::type>*
+  {
+    return space_.get();
+  }
+
  private:
   std::unique_ptr<hnswlib::HierarchicalNSW<typename hnsw_dist_t<T>::type>> appr_alg_;
   std::unique_ptr<hnswlib::SpaceInterface<typename hnsw_dist_t<T>::type>> space_;
 };
 
-template <typename T>
-std::unique_ptr<index<T>> from_cagra(raft::resources const& res,
-                                     const cuvs::neighbors::cagra::index<T, uint32_t>& cagra_index)
+template <typename T, HnswHierarchy hierarchy>
+std::enable_if_t<hierarchy == HnswHierarchy::NONE, std::unique_ptr<index<T>>> from_cagra(
+  raft::resources const& res,
+  const index_params& params,
+  const cuvs::neighbors::cagra::index<T, uint32_t>& cagra_index)
 {
   std::random_device dev;
   std::mt19937 rng(dev());
@@ -103,13 +171,125 @@ std::unique_ptr<index<T>> from_cagra(raft::resources const& res,
   auto uuid            = std::to_string(dist(rng));
   std::string filepath = "/tmp/" + uuid + ".bin";
   cuvs::neighbors::cagra::serialize_to_hnswlib(res, filepath, cagra_index);
+
   index<T>* hnsw_index = nullptr;
   cuvs::neighbors::hnsw::deserialize(
-    res, filepath, cagra_index.dim(), cagra_index.metric(), &hnsw_index);
+    res, params, filepath, cagra_index.dim(), cagra_index.metric(), &hnsw_index);
   std::filesystem::remove(filepath);
   return std::unique_ptr<index<T>>(hnsw_index);
 }
 
+template <typename T, HnswHierarchy hierarchy>
+std::enable_if_t<hierarchy == HnswHierarchy::CPU, std::unique_ptr<index<T>>> from_cagra(
+  raft::resources const& res,
+  const index_params& params,
+  const cuvs::neighbors::cagra::index<T, uint32_t>& cagra_index,
+  std::optional<raft::host_matrix_view<const T, int64_t, raft::row_major>> dataset)
+{
+  // auto host_dataset = raft::make_host_matrix<T, int64_t>(dataset.extent(0), dataset.extent(1));
+  auto host_dataset = raft::make_host_matrix<T, int64_t>(0, 0);
+  raft::host_matrix_view<const T, int64_t, raft::row_major> host_dataset_view(
+    host_dataset.data_handle(), host_dataset.extent(0), host_dataset.extent(1));
+  if (dataset.has_value()) {
+    host_dataset_view = dataset.value();
+  } else {
+    // move dataset to host, remove padding
+    auto cagra_dataset = cagra_index.dataset();
+    host_dataset =
+      raft::make_host_matrix<T, int64_t>(cagra_dataset.extent(0), cagra_dataset.extent(1));
+    RAFT_CUDA_TRY(cudaMemcpy2DAsync(host_dataset.data_handle(),
+                                    sizeof(T) * host_dataset.extent(1),
+                                    cagra_dataset.data_handle(),
+                                    sizeof(T) * cagra_dataset.stride(0),
+                                    sizeof(T) * host_dataset.extent(1),
+                                    cagra_dataset.extent(0),
+                                    cudaMemcpyDefault,
+                                    raft::resource::get_cuda_stream(res)));
+    raft::resource::sync_stream(res);
+    host_dataset_view = host_dataset.view();
+  }
+  // build upper layers of hnsw index
+  auto hnsw_index =
+    std::make_unique<index_impl<T>>(cagra_index.dim(), cagra_index.metric(), hierarchy);
+  auto appr_algo = std::make_unique<hnswlib::HierarchicalNSW<typename hnsw_dist_t<T>::type>>(
+    hnsw_index->get_space(),
+    host_dataset_view.extent(0),
+    cagra_index.graph().extent(1) / 2,
+    params.ef_construction);
+  appr_algo->base_layer_init = false;  // tell hnswlib to build upper layers only
+  ParallelFor(0, host_dataset_view.extent(0), params.num_threads, [&](size_t i, size_t threadId) {
+    appr_algo->addPoint((void*)(host_dataset_view.data_handle() + i * host_dataset_view.extent(1)),
+                        i);
+  });
+  appr_algo->base_layer_init = true;  // reset to true to allow addition of new points
+
+  // move cagra graph to host
+  auto graph = cagra_index.graph();
+  auto host_graph =
+    raft::make_host_matrix<uint32_t, int64_t, raft::row_major>(graph.extent(0), graph.extent(1));
+  raft::copy(host_graph.data_handle(),
+             graph.data_handle(),
+             graph.size(),
+             raft::resource::get_cuda_stream(res));
+  raft::resource::sync_stream(res);
+
+// copy cagra graph to hnswlib base layer
+#pragma omp parallel for
+  for (size_t i = 0; i < static_cast<size_t>(host_graph.extent(0)); ++i) {
+    auto ll_i = appr_algo->get_linklist0(i);
+    appr_algo->setListCount(ll_i, host_graph.extent(1));
+    auto* data = (uint32_t*)(ll_i + 1);
+    for (size_t j = 0; j < static_cast<size_t>(host_graph.extent(1)); ++j) {
+      data[j] = host_graph(i, j);
+    }
+  }
+
+  hnsw_index->set_index(std::move(appr_algo));
+  return hnsw_index;
+}
+
+template <typename T>
+std::unique_ptr<index<T>> from_cagra(
+  raft::resources const& res,
+  const index_params& params,
+  const cuvs::neighbors::cagra::index<T, uint32_t>& cagra_index,
+  std::optional<raft::host_matrix_view<const T, int64_t, raft::row_major>> dataset)
+{
+  if (params.hierarchy == HnswHierarchy::NONE) {
+    return from_cagra<T, HnswHierarchy::NONE>(res, params, cagra_index);
+  } else if (params.hierarchy == HnswHierarchy::CPU) {
+    return from_cagra<T, HnswHierarchy::CPU>(res, params, cagra_index, dataset);
+  }
+  {
+    RAFT_FAIL("Unsupported hierarchy type");
+  }
+}
+
+template <typename T>
+void extend(raft::resources const& res,
+            const extend_params& params,
+            raft::host_matrix_view<const T, int64_t, raft::row_major> additional_dataset,
+            index<T>& idx)
+{
+  auto* hnswlib_index = reinterpret_cast<hnswlib::HierarchicalNSW<typename hnsw_dist_t<T>::type>*>(
+    const_cast<void*>(idx.get_index()));
+  auto current_element_count = hnswlib_index->getCurrentElementCount();
+  auto new_element_count     = additional_dataset.extent(0);
+  auto num_threads           = params.num_threads == 0 ? std::thread::hardware_concurrency()
+                                                       : static_cast<size_t>(params.num_threads);
+
+  hnswlib_index->resizeIndex(current_element_count + new_element_count);
+  ParallelFor(current_element_count,
+              current_element_count + new_element_count,
+              num_threads,
+              [&](size_t i, size_t threadId) {
+                hnswlib_index->addPoint(
+                  (void*)(additional_dataset.data_handle() +
+                          (i - current_element_count) * additional_dataset.extent(1)),
+                  i);
+              });
+}
+
 template <typename T>
 void get_search_knn_results(hnswlib::HierarchicalNSW<typename hnsw_dist_t<T>::type> const* idx,
                             const T* query,
@@ -171,14 +351,28 @@ void search(raft::resources const& res,
   }
 }
 
+template <typename T>
+void serialize(raft::resources const& res, const std::string& filename, const index<T>& idx)
+{
+  auto* hnswlib_index = reinterpret_cast<hnswlib::HierarchicalNSW<typename hnsw_dist_t<T>::type>*>(
+    const_cast<void*>(idx.get_index()));
+  hnswlib_index->saveIndex(filename);
+}
+
 template <typename T>
 void deserialize(raft::resources const& res,
+                 const index_params& params,
                  const std::string& filename,
                  int dim,
                  cuvs::distance::DistanceType metric,
                  index<T>** idx)
 {
-  *idx = new detail::index_impl<T>(filename, dim, metric);
+  auto hnsw_index = std::make_unique<index_impl<T>>(dim, metric, params.hierarchy);
+  auto appr_algo  = std::make_unique<hnswlib::HierarchicalNSW<typename hnsw_dist_t<T>::type>>(
+    hnsw_index->get_space(), filename);
+  if (params.hierarchy == HnswHierarchy::NONE) { appr_algo->base_layer_only = true; }
+  hnsw_index->set_index(std::move(appr_algo));
+  *idx = hnsw_index.release();
 }
 
 }  // namespace cuvs::neighbors::hnsw::detail
diff --git a/cpp/src/neighbors/hnsw.cpp b/cpp/src/neighbors/hnsw.cpp
index e6f3fbcc7..f165176ec 100644
--- a/cpp/src/neighbors/hnsw.cpp
+++ b/cpp/src/neighbors/hnsw.cpp
@@ -21,11 +21,14 @@
 
 namespace cuvs::neighbors::hnsw {
 
-#define CUVS_INST_HNSW_FROM_CAGRA(T)                                                           \
-  std::unique_ptr<index<T>> from_cagra(                                                        \
-    raft::resources const& res, const cuvs::neighbors::cagra::index<T, uint32_t>& cagra_index) \
-  {                                                                                            \
-    return detail::from_cagra<T>(res, cagra_index);                                            \
+#define CUVS_INST_HNSW_FROM_CAGRA(T)                                                  \
+  std::unique_ptr<index<T>> from_cagra(                                               \
+    raft::resources const& res,                                                       \
+    const index_params& params,                                                       \
+    const cuvs::neighbors::cagra::index<T, uint32_t>& cagra_index,                    \
+    std::optional<raft::host_matrix_view<const T, int64_t, raft::row_major>> dataset) \
+  {                                                                                   \
+    return detail::from_cagra<T>(res, params, cagra_index, dataset);                  \
   }
 
 CUVS_INST_HNSW_FROM_CAGRA(float);
@@ -34,6 +37,21 @@ CUVS_INST_HNSW_FROM_CAGRA(int8_t);
 
 #undef CUVS_INST_HNSW_FROM_CAGRA
 
+#define CUVS_INST_HNSW_EXTEND(T)                                                            \
+  void extend(raft::resources const& res,                                                   \
+              const extend_params& params,                                                  \
+              raft::host_matrix_view<const T, int64_t, raft::row_major> additional_dataset, \
+              index<T>& idx)                                                                \
+  {                                                                                         \
+    detail::extend<T>(res, params, additional_dataset, idx);                                \
+  }
+
+CUVS_INST_HNSW_EXTEND(float);
+CUVS_INST_HNSW_EXTEND(uint8_t);
+CUVS_INST_HNSW_EXTEND(int8_t);
+
+#undef CUVS_INST_HNSW_EXTEND
+
 #define CUVS_INST_HNSW_SEARCH(T)                                                    \
   void search(raft::resources const& res,                                           \
               const search_params& params,                                          \
@@ -51,20 +69,25 @@ CUVS_INST_HNSW_SEARCH(int8_t);
 
 #undef CUVS_INST_HNSW_SEARCH
 
-#define CUVS_INST_HNSW_DESERIALIZE(T)                        \
-  void deserialize(raft::resources const& res,               \
-                   const std::string& filename,              \
-                   int dim,                                  \
-                   cuvs::distance::DistanceType metric,      \
-                   index<T>** idx)                           \
-  {                                                          \
-    detail::deserialize<T>(res, filename, dim, metric, idx); \
+#define CUVS_INST_HNSW_SERIALIZE(T)                                                            \
+  void serialize(raft::resources const& res, const std::string& filename, const index<T>& idx) \
+  {                                                                                            \
+    detail::serialize<T>(res, filename, idx);                                                  \
+  }                                                                                            \
+  void deserialize(raft::resources const& res,                                                 \
+                   const index_params& params,                                                 \
+                   const std::string& filename,                                                \
+                   int dim,                                                                    \
+                   cuvs::distance::DistanceType metric,                                        \
+                   index<T>** idx)                                                             \
+  {                                                                                            \
+    detail::deserialize<T>(res, params, filename, dim, metric, idx);                           \
   }
 
-CUVS_INST_HNSW_DESERIALIZE(float);
-CUVS_INST_HNSW_DESERIALIZE(uint8_t);
-CUVS_INST_HNSW_DESERIALIZE(int8_t);
+CUVS_INST_HNSW_SERIALIZE(float);
+CUVS_INST_HNSW_SERIALIZE(uint8_t);
+CUVS_INST_HNSW_SERIALIZE(int8_t);
 
-#undef CUVS_INST_HNSW_DESERIALIZE
+#undef CUVS_INST_HNSW_SERIALIZE
 
 }  // namespace cuvs::neighbors::hnsw
diff --git a/cpp/src/neighbors/hnsw_c.cpp b/cpp/src/neighbors/hnsw_c.cpp
index a19875641..0233a510a 100644
--- a/cpp/src/neighbors/hnsw_c.cpp
+++ b/cpp/src/neighbors/hnsw_c.cpp
@@ -31,6 +31,44 @@
 #include <cuvs/neighbors/hnsw.hpp>
 
 namespace {
+
+template <typename T>
+void _from_cagra(cuvsResources_t res,
+                 cuvsHnswIndexParams_t params,
+                 cuvsCagraIndex_t cagra_index,
+                 cuvsHnswIndex_t hnsw_index)
+{
+  auto res_ptr = reinterpret_cast<raft::resources*>(res);
+  auto index   = reinterpret_cast<cuvs::neighbors::cagra::index<T, uint32_t>*>(cagra_index->addr);
+  auto cpp_params            = cuvs::neighbors::hnsw::index_params();
+  cpp_params.hierarchy       = static_cast<cuvs::neighbors::hnsw::HnswHierarchy>(params->hierarchy);
+  cpp_params.ef_construction = params->ef_construction;
+  cpp_params.num_threads     = params->num_threads;
+  std::optional<raft::host_matrix_view<const T, int64_t, raft::row_major>> dataset = std::nullopt;
+
+  auto hnsw_index_unique_ptr =
+    cuvs::neighbors::hnsw::from_cagra(*res_ptr, cpp_params, *index, dataset);
+  auto hnsw_index_ptr = hnsw_index_unique_ptr.release();
+  hnsw_index->addr    = reinterpret_cast<uintptr_t>(hnsw_index_ptr);
+}
+
+template <typename T>
+void _extend(cuvsResources_t res,
+             cuvsHnswExtendParams_t params,
+             DLManagedTensor* additional_dataset,
+             cuvsHnswIndex index)
+{
+  auto res_ptr           = reinterpret_cast<raft::resources*>(res);
+  auto index_ptr         = reinterpret_cast<cuvs::neighbors::hnsw::index<T>*>(index.addr);
+  auto cpp_params        = cuvs::neighbors::hnsw::extend_params();
+  cpp_params.num_threads = params->num_threads;
+
+  using additional_dataset_mdspan_type = raft::host_matrix_view<T const, int64_t, raft::row_major>;
+  auto additional_dataset_mds =
+    cuvs::core::from_dlpack<additional_dataset_mdspan_type>(additional_dataset);
+  cuvs::neighbors::hnsw::extend(*res_ptr, cpp_params, additional_dataset_mds, *index_ptr);
+}
+
 template <typename T>
 void _search(cuvsResources_t res,
              cuvsHnswSearchParams params,
@@ -44,7 +82,7 @@ void _search(cuvsResources_t res,
 
   auto search_params        = cuvs::neighbors::hnsw::search_params();
   search_params.ef          = params.ef;
-  search_params.num_threads = params.numThreads;
+  search_params.num_threads = params.num_threads;
 
   using queries_mdspan_type   = raft::host_matrix_view<T const, int64_t, raft::row_major>;
   using neighbors_mdspan_type = raft::host_matrix_view<uint64_t, int64_t, raft::row_major>;
@@ -57,26 +95,42 @@ void _search(cuvsResources_t res,
 }
 
 template <typename T>
-void* _deserialize(cuvsResources_t res, const char* filename, int dim, cuvsDistanceType metric)
+void _serialize(cuvsResources_t res, const char* filename, cuvsHnswIndex index)
+{
+  auto res_ptr   = reinterpret_cast<raft::resources*>(res);
+  auto index_ptr = reinterpret_cast<cuvs::neighbors::hnsw::index<T>*>(index.addr);
+  cuvs::neighbors::hnsw::serialize(*res_ptr, std::string(filename), *index_ptr);
+}
+
+template <typename T>
+void* _deserialize(cuvsResources_t res,
+                   cuvsHnswIndexParams_t params,
+                   const char* filename,
+                   int dim,
+                   cuvsDistanceType metric)
 {
   auto res_ptr                           = reinterpret_cast<raft::resources*>(res);
   cuvs::neighbors::hnsw::index<T>* index = nullptr;
-  cuvs::neighbors::hnsw::deserialize(*res_ptr, std::string(filename), dim, metric, &index);
+  auto cpp_params                        = cuvs::neighbors::hnsw::index_params();
+  cpp_params.hierarchy = static_cast<cuvs::neighbors::hnsw::HnswHierarchy>(params->hierarchy);
+  cuvs::neighbors::hnsw::deserialize(
+    *res_ptr, cpp_params, std::string(filename), dim, metric, &index);
   return index;
 }
 }  // namespace
 
-extern "C" cuvsError_t cuvsHnswSearchParamsCreate(cuvsHnswSearchParams_t* params)
+extern "C" cuvsError_t cuvsHnswIndexParamsCreate(cuvsHnswIndexParams_t* params)
 {
-  return cuvs::core::translate_exceptions(
-    [=] { *params = new cuvsHnswSearchParams{.ef = 200, .numThreads = 0}; });
+  return cuvs::core::translate_exceptions([=] {
+    *params = new cuvsHnswIndexParams{
+      .hierarchy = cuvsHnswHierarchy::NONE, .ef_construction = 200, .num_threads = 2};
+  });
 }
 
-extern "C" cuvsError_t cuvsHnswSearchParamsDestroy(cuvsHnswSearchParams_t params)
+extern "C" cuvsError_t cuvsHnswIndexParamsDestroy(cuvsHnswIndexParams_t params)
 {
   return cuvs::core::translate_exceptions([=] { delete params; });
 }
-
 extern "C" cuvsError_t cuvsHnswIndexCreate(cuvsHnswIndex_t* index)
 {
   return cuvs::core::translate_exceptions([=] { *index = new cuvsHnswIndex{}; });
@@ -101,6 +155,66 @@ extern "C" cuvsError_t cuvsHnswIndexDestroy(cuvsHnswIndex_t index_c_ptr)
   });
 }
 
+extern "C" cuvsError_t cuvsHnswExtendParamsCreate(cuvsHnswExtendParams_t* params)
+{
+  return cuvs::core::translate_exceptions(
+    [=] { *params = new cuvsHnswExtendParams{.num_threads = 0}; });
+}
+
+extern "C" cuvsError_t cuvsHnswExtendParamsDestroy(cuvsHnswExtendParams_t params)
+{
+  return cuvs::core::translate_exceptions([=] { delete params; });
+}
+
+extern "C" cuvsError_t cuvsHnswFromCagra(cuvsResources_t res,
+                                         cuvsHnswIndexParams_t params,
+                                         cuvsCagraIndex_t cagra_index,
+                                         cuvsHnswIndex_t hnsw_index)
+{
+  return cuvs::core::translate_exceptions([=] {
+    auto index        = *cagra_index;
+    hnsw_index->dtype = index.dtype;
+    if (index.dtype.code == kDLFloat) {
+      _from_cagra<float>(res, params, cagra_index, hnsw_index);
+    } else if (index.dtype.code == kDLUInt) {
+      _from_cagra<uint8_t>(res, params, cagra_index, hnsw_index);
+    } else if (index.dtype.code == kDLInt) {
+      _from_cagra<int8_t>(res, params, cagra_index, hnsw_index);
+    } else {
+      RAFT_FAIL("Unsupported dtype: %d", index.dtype.code);
+    }
+  });
+}
+
+extern "C" cuvsError_t cuvsHnswExtend(cuvsResources_t res,
+                                      cuvsHnswExtendParams_t params,
+                                      DLManagedTensor* additional_dataset,
+                                      cuvsHnswIndex_t index)
+{
+  return cuvs::core::translate_exceptions([=] {
+    if (index->dtype.code == kDLFloat) {
+      _extend<float>(res, params, additional_dataset, *index);
+    } else if (index->dtype.code == kDLUInt) {
+      _extend<uint8_t>(res, params, additional_dataset, *index);
+    } else if (index->dtype.code == kDLInt) {
+      _extend<int8_t>(res, params, additional_dataset, *index);
+    } else {
+      RAFT_FAIL("Unsupported dtype: %d", index->dtype.code);
+    }
+  });
+}
+
+extern "C" cuvsError_t cuvsHnswSearchParamsCreate(cuvsHnswSearchParams_t* params)
+{
+  return cuvs::core::translate_exceptions(
+    [=] { *params = new cuvsHnswSearchParams{.ef = 200, .num_threads = 0}; });
+}
+
+extern "C" cuvsError_t cuvsHnswSearchParamsDestroy(cuvsHnswSearchParams_t params)
+{
+  return cuvs::core::translate_exceptions([=] { delete params; });
+}
+
 extern "C" cuvsError_t cuvsHnswSearch(cuvsResources_t res,
                                       cuvsHnswSearchParams_t params,
                                       cuvsHnswIndex_t index_c_ptr,
@@ -140,7 +254,25 @@ extern "C" cuvsError_t cuvsHnswSearch(cuvsResources_t res,
   });
 }
 
+extern "C" cuvsError_t cuvsHnswSerialize(cuvsResources_t res,
+                                         const char* filename,
+                                         cuvsHnswIndex_t index)
+{
+  return cuvs::core::translate_exceptions([=] {
+    if (index->dtype.code == kDLFloat) {
+      _serialize<float>(res, filename, *index);
+    } else if (index->dtype.code == kDLInt) {
+      _serialize<int8_t>(res, filename, *index);
+    } else if (index->dtype.code == kDLUInt) {
+      _serialize<uint8_t>(res, filename, *index);
+    } else {
+      RAFT_FAIL("Unsupported index dtype: %d and bits: %d", index->dtype.code, index->dtype.bits);
+    }
+  });
+}
+
 extern "C" cuvsError_t cuvsHnswDeserialize(cuvsResources_t res,
+                                           cuvsHnswIndexParams_t params,
                                            const char* filename,
                                            int dim,
                                            cuvsDistanceType metric,
@@ -148,11 +280,14 @@ extern "C" cuvsError_t cuvsHnswDeserialize(cuvsResources_t res,
 {
   return cuvs::core::translate_exceptions([=] {
     if (index->dtype.code == kDLFloat && index->dtype.bits == 32) {
-      index->addr = reinterpret_cast<uintptr_t>(_deserialize<float>(res, filename, dim, metric));
+      index->addr =
+        reinterpret_cast<uintptr_t>(_deserialize<float>(res, params, filename, dim, metric));
     } else if (index->dtype.code == kDLUInt && index->dtype.bits == 8) {
-      index->addr = reinterpret_cast<uintptr_t>(_deserialize<uint8_t>(res, filename, dim, metric));
+      index->addr =
+        reinterpret_cast<uintptr_t>(_deserialize<uint8_t>(res, params, filename, dim, metric));
     } else if (index->dtype.code == kDLInt && index->dtype.bits == 8) {
-      index->addr = reinterpret_cast<uintptr_t>(_deserialize<int8_t>(res, filename, dim, metric));
+      index->addr =
+        reinterpret_cast<uintptr_t>(_deserialize<int8_t>(res, params, filename, dim, metric));
     } else {
       RAFT_FAIL("Unsupported dtype in file %s", filename);
     }
diff --git a/cpp/src/neighbors/iface/iface.hpp b/cpp/src/neighbors/iface/iface.hpp
index 9b3da75a4..98ef3fdd3 100644
--- a/cpp/src/neighbors/iface/iface.hpp
+++ b/cpp/src/neighbors/iface/iface.hpp
@@ -20,6 +20,7 @@
 #include <cuvs/neighbors/common.hpp>
 #include <cuvs/neighbors/ivf_flat.hpp>
 #include <cuvs/neighbors/ivf_pq.hpp>
+#include <fstream>
 #include <raft/core/device_resources.hpp>
 
 #include <fstream>
diff --git a/cpp/test/neighbors/ann_hnsw_c.cu b/cpp/test/neighbors/ann_hnsw_c.cu
index fc740b924..2a6401b1d 100644
--- a/cpp/test/neighbors/ann_hnsw_c.cu
+++ b/cpp/test/neighbors/ann_hnsw_c.cu
@@ -111,7 +111,9 @@ TEST(CagraHnswC, BuildSearch)
   cuvsHnswIndex_t hnsw_index;
   cuvsHnswIndexCreate(&hnsw_index);
   hnsw_index->dtype = index->dtype;
-  cuvsHnswDeserialize(res, "/tmp/cagra_hnswlib.index", 2, L2Expanded, hnsw_index);
+  cuvsHnswIndexParams_t hnsw_params;
+  cuvsHnswIndexParamsCreate(&hnsw_params);
+  cuvsHnswDeserialize(res, hnsw_params, "/tmp/cagra_hnswlib.index", 2, L2Expanded, hnsw_index);
 
   // search index
   cuvsHnswSearchParams_t search_params;
diff --git a/cpp/test/neighbors/hnsw.cu b/cpp/test/neighbors/hnsw.cu
index 9fb88be05..20ee83a11 100644
--- a/cpp/test/neighbors/hnsw.cu
+++ b/cpp/test/neighbors/hnsw.cu
@@ -108,7 +108,8 @@ class AnnHNSWTest : public ::testing::TestWithParam<AnnHNSWInputs> {
 
       cuvs::neighbors::hnsw::search_params search_params;
       search_params.ef = ps.ef;
-      auto hnsw_index  = cuvs::neighbors::hnsw::from_cagra(handle_, index);
+      cuvs::neighbors::hnsw::index_params hnsw_params;
+      auto hnsw_index = cuvs::neighbors::hnsw::from_cagra(handle_, hnsw_params, index);
       auto queries_HNSW_view =
         raft::make_host_matrix_view<DataT, int64_t>(queries_h.data(), ps.n_queries, ps.dim);
       auto indices_HNSW_view =
diff --git a/dependencies.yaml b/dependencies.yaml
index e909ad0dc..80a7d2024 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -470,7 +470,6 @@ dependencies:
     common:
       - output_types: [conda, pyproject, requirements]
         packages:
-          - hnswlib=0.6.2
           - nlohmann_json>=3.11.2
           - glog>=0.6.0
           - h5py>=3.8.0
diff --git a/docs/source/c_api/neighbors_hnsw_c.rst b/docs/source/c_api/neighbors_hnsw_c.rst
index 988e5b6f3..22ffc236d 100644
--- a/docs/source/c_api/neighbors_hnsw_c.rst
+++ b/docs/source/c_api/neighbors_hnsw_c.rst
@@ -26,6 +26,28 @@ Index
     :members:
     :content-only:
 
+Index extend parameters
+-----------------------
+
+.. doxygengroup:: hnsw_c_extend_params
+    :project: cuvs
+    :members:
+    :content-only:
+
+Index extend
+------------
+.. doxygengroup:: hnsw_c_index_extend
+    :project: cuvs
+    :members:
+    :content-only:
+
+Index load
+----------
+.. doxygengroup:: hnsw_c_index_load
+    :project: cuvs
+    :members:
+    :content-only:
+
 Index search
 ------------
 
diff --git a/docs/source/cpp_api/neighbors_hnsw.rst b/docs/source/cpp_api/neighbors_hnsw.rst
index b0af88af0..00dd3a213 100644
--- a/docs/source/cpp_api/neighbors_hnsw.rst
+++ b/docs/source/cpp_api/neighbors_hnsw.rst
@@ -27,10 +27,25 @@ Index
     :members:
     :content-only:
 
-Index load
+Index extend parameters
+-----------------------
+
+.. doxygengroup:: hnsw_cpp_extend_params
+    :project: cuvs
+    :members:
+    :content-only:
+
+Index extend
 ------------
+.. doxygengroup:: hnsw_cpp_index_extend
+    :project: cuvs
+    :members:
+    :content-only:
 
-.. doxygengroup:: hnsw_cpp_index_search
+Index load
+----------
+
+.. doxygengroup:: hnsw_cpp_index_load
     :project: cuvs
     :members:
     :content-only:
@@ -43,10 +58,10 @@ Index search
     :members:
     :content-only:
 
-Index deserialize
+Index serialize
 ---------------
 
-.. doxygengroup:: hnsw_cpp_index_deserialize
+.. doxygengroup:: hnsw_cpp_index_serialize
     :project: cuvs
     :members:
     :content-only:
diff --git a/python/cuvs/cuvs/neighbors/hnsw/__init__.py b/python/cuvs/cuvs/neighbors/hnsw/__init__.py
index 5efcdf68b..fafff7d03 100644
--- a/python/cuvs/cuvs/neighbors/hnsw/__init__.py
+++ b/python/cuvs/cuvs/neighbors/hnsw/__init__.py
@@ -13,10 +13,23 @@
 # limitations under the License.
 
 
-from .hnsw import Index, SearchParams, from_cagra, load, save, search
+from .hnsw import (
+    ExtendParams,
+    Index,
+    IndexParams,
+    SearchParams,
+    extend,
+    from_cagra,
+    load,
+    save,
+    search,
+)
 
 __all__ = [
+    "IndexParams",
     "Index",
+    "ExtendParams",
+    "extend",
     "SearchParams",
     "load",
     "save",
diff --git a/python/cuvs/cuvs/neighbors/hnsw/hnsw.pxd b/python/cuvs/cuvs/neighbors/hnsw/hnsw.pxd
index 1cdc97406..e0c517933 100644
--- a/python/cuvs/cuvs/neighbors/hnsw/hnsw.pxd
+++ b/python/cuvs/cuvs/neighbors/hnsw/hnsw.pxd
@@ -20,14 +20,25 @@ from libc.stdint cimport int32_t, uintptr_t
 from cuvs.common.c_api cimport cuvsError_t, cuvsResources_t
 from cuvs.common.cydlpack cimport DLDataType, DLManagedTensor
 from cuvs.distance_type cimport cuvsDistanceType
+from cuvs.neighbors.cagra.cagra cimport cuvsCagraIndex_t
 
 
 cdef extern from "cuvs/neighbors/hnsw.h" nogil:
-    ctypedef struct cuvsHnswSearchParams:
-        int32_t ef
-        int32_t numThreads
 
-    ctypedef cuvsHnswSearchParams* cuvsHnswSearchParams_t
+    ctypedef enum cuvsHnswHierarchy:
+        NONE
+        CPU
+
+    ctypedef struct cuvsHnswIndexParams:
+        cuvsHnswHierarchy hierarchy
+        int32_t ef_construction
+        int32_t num_threads
+
+    ctypedef cuvsHnswIndexParams* cuvsHnswIndexParams_t
+
+    cuvsError_t cuvsHnswIndexParamsCreate(cuvsHnswIndexParams_t* params)
+
+    cuvsError_t cuvsHnswIndexParamsDestroy(cuvsHnswIndexParams_t params)
 
     ctypedef struct cuvsHnswIndex:
         uintptr_t addr
@@ -39,6 +50,31 @@ cdef extern from "cuvs/neighbors/hnsw.h" nogil:
 
     cuvsError_t cuvsHnswIndexDestroy(cuvsHnswIndex_t index)
 
+    ctypedef struct cuvsHnswExtendParams:
+        int32_t num_threads
+
+    ctypedef cuvsHnswExtendParams* cuvsHnswExtendParams_t
+
+    cuvsError_t cuvsHnswExtendParamsCreate(cuvsHnswExtendParams_t* params)
+
+    cuvsError_t cuvsHnswExtendParamsDestroy(cuvsHnswExtendParams_t params)
+
+    cuvsError_t cuvsHnswFromCagra(cuvsResources_t res,
+                                  cuvsHnswIndexParams_t params,
+                                  cuvsCagraIndex_t cagra_index,
+                                  cuvsHnswIndex_t hnsw_index) except +
+
+    cuvsError_t cuvsHnswExtend(cuvsResources_t res,
+                               cuvsHnswExtendParams_t params,
+                               DLManagedTensor* data,
+                               cuvsHnswIndex_t index) except +
+
+    ctypedef struct cuvsHnswSearchParams:
+        int32_t ef
+        int32_t num_threads
+
+    ctypedef cuvsHnswSearchParams* cuvsHnswSearchParams_t
+
     cuvsError_t cuvsHnswSearch(cuvsResources_t res,
                                cuvsHnswSearchParams* params,
                                cuvsHnswIndex_t index,
@@ -46,7 +82,12 @@ cdef extern from "cuvs/neighbors/hnsw.h" nogil:
                                DLManagedTensor* neighbors,
                                DLManagedTensor* distances) except +
 
+    cuvsError_t cuvsHnswSerialize(cuvsResources_t res,
+                                  const char * filename,
+                                  cuvsHnswIndex_t index) except +
+
     cuvsError_t cuvsHnswDeserialize(cuvsResources_t res,
+                                    cuvsHnswIndexParams_t params,
                                     const char * filename,
                                     int32_t dim,
                                     cuvsDistanceType metric,
diff --git a/python/cuvs/cuvs/neighbors/hnsw/hnsw.pyx b/python/cuvs/cuvs/neighbors/hnsw/hnsw.pyx
index bcfaf167e..4c44350e8 100644
--- a/python/cuvs/cuvs/neighbors/hnsw/hnsw.pyx
+++ b/python/cuvs/cuvs/neighbors/hnsw/hnsw.pyx
@@ -39,41 +39,63 @@ from pylibraft.common.cai_wrapper import wrap_array
 from pylibraft.common.interruptible import cuda_interruptible
 
 
-cdef class SearchParams:
+cdef class IndexParams:
     """
-    HNSW search parameters
+    Parameters to build index for HNSW nearest neighbor search
 
     Parameters
     ----------
-    ef: int, default = 200
-        Maximum number of candidate list size used during search.
-    num_threads: int, default = 0
-        Number of CPU threads used to increase search parallelism.
-        When set to 0, the number of threads is automatically determined
-        using OpenMP's `omp_get_max_threads()`.
+    hierarchy : string, default = "none" (optional)
+        The hierarchy of the HNSW index. Valid values are ["none", "cpu"].
+        - "none": No hierarchy is built.
+        - "cpu": Hierarchy is built using CPU.
+    ef_construction : int, default = 200 (optional)
+        Maximum number of candidate list size used during construction
+        when hierarchy is `cpu`.
+    num_threads : int, default = 2 (optional)
+        Number of CPU threads used to increase construction parallelism
+        when hierarchy is `cpu`.
+        NOTE: Constructing the hierarchy when converting from a CAGRA graph
+        is highly sensitive to parallelism, and increasing the number of
+        threads can reduce the quality of the index.
     """
 
-    cdef cuvsHnswSearchParams params
+    cdef cuvsHnswIndexParams* params
+
+    def __cinit__(self):
+        check_cuvs(cuvsHnswIndexParamsCreate(&self.params))
+
+    def __dealloc__(self):
+        check_cuvs(cuvsHnswIndexParamsDestroy(self.params))
 
     def __init__(self, *,
-                 ef=200,
-                 num_threads=0):
-        self.params.ef = ef
-        self.params.numThreads = num_threads
+                 hierarchy="none",
+                 ef_construction=200,
+                 num_threads=2):
+        if hierarchy == "none":
+            self.params.hierarchy = cuvsHnswHierarchy.NONE
+        elif hierarchy == "cpu":
+            self.params.hierarchy = cuvsHnswHierarchy.CPU
+        else:
+            raise ValueError("Invalid hierarchy type."
+                             " Valid values are 'none' and 'cpu'.")
+        self.params.ef_construction = ef_construction
+        self.params.num_threads = num_threads
 
-    def __repr__(self):
-        attr_str = [attr + "=" + str(getattr(self, attr))
-                    for attr in [
-                        "ef", "num_threads"]]
-        return "SearchParams(type=HNSW, " + (", ".join(attr_str)) + ")"
+    @property
+    def hierarchy(self):
+        if self.params.hierarchy == cuvsHnswHierarchy.NONE:
+            return "none"
+        elif self.params.hierarchy == cuvsHnswHierarchy.CPU:
+            return "cpu"
 
     @property
-    def ef(self):
-        return self.params.ef
+    def ef_construction(self):
+        return self.params.ef_construction
 
     @property
     def num_threads(self):
-        return self.params.numThreads
+        return self.params.num_threads
 
 
 cdef class Index:
@@ -103,13 +125,44 @@ cdef class Index:
         return "Index(type=HNSW, metric=L2" + (", ".join(attr_str)) + ")"
 
 
+cdef class ExtendParams:
+    """
+    Parameters to extend the HNSW index with new data
+
+    Parameters
+    ----------
+    num_threads : int, default = 0 (optional)
+        Number of CPU threads used to increase construction parallelism.
+        When set to 0, the number of threads is automatically determined.
+    """
+
+    cdef cuvsHnswExtendParams* params
+
+    def __cinit__(self):
+        check_cuvs(cuvsHnswExtendParamsCreate(&self.params))
+
+    def __dealloc__(self):
+        check_cuvs(cuvsHnswExtendParamsDestroy(self.params))
+
+    def __init__(self, *,
+                 num_threads=0):
+        self.params.num_threads = num_threads
+
+    @property
+    def num_threads(self):
+        return self.params.num_threads
+
+
 @auto_sync_resources
-def save(filename, cagra.Index index, resources=None):
+def save(filename, Index index, resources=None):
     """
     Saves the CAGRA index to a file as an hnswlib index.
-    The saved index is immutable and can only be searched by the hnswlib
-    wrapper in cuVS, as the format is not compatible with the original
-    hnswlib.
+    If the index was constructed with `hnsw.IndexParams(hierarchy="none")`,
+    then the saved index is immutable and can only be searched by the hnswlib
+    wrapper in cuVS, as the format is not compatible with the original hnswlib.
+    However, if the index was constructed with
+    `hnsw.IndexParams(hierarchy="cpu")`, then the saved index is mutable and
+    compatible with the original hnswlib.
 
     Saving / loading the index is experimental. The serialization format is
     subject to change.
@@ -119,7 +172,7 @@ def save(filename, cagra.Index index, resources=None):
     filename : string
         Name of the file.
     index : Index
-        Trained CAGRA index.
+        Trained HNSW index.
     {resources_docstring}
 
     Examples
@@ -131,23 +184,28 @@ def save(filename, cagra.Index index, resources=None):
     >>> dataset = cp.random.random_sample((n_samples, n_features),
     ...                                   dtype=cp.float32)
     >>> # Build index
-    >>> index = cagra.build(cagra.IndexParams(), dataset)
+    >>> cagra_index = cagra.build(cagra.IndexParams(), dataset)
     >>> # Serialize and deserialize the cagra index built
-    >>> hnsw.save("my_index.bin", index)
+    >>> hnsw_index = hnsw.from_cagra(hnsw.IndexParams(), cagra_index)
+    >>> hnsw.save("my_index.bin", hnsw_index)
     """
     cdef string c_filename = filename.encode('utf-8')
     cdef cuvsResources_t res = <cuvsResources_t>resources.get_c_obj()
-    check_cuvs(cagra.cuvsCagraSerializeToHnswlib(res,
-                                                 c_filename.c_str(),
-                                                 index.index))
+    check_cuvs(cuvsHnswSerialize(res,
+                                 c_filename.c_str(),
+                                 index.index))
 
 
 @auto_sync_resources
-def load(filename, dim, dtype, metric="sqeuclidean", resources=None):
+def load(IndexParams index_params, filename, dim, dtype, metric="sqeuclidean",
+         resources=None):
     """
-    Loads base-layer-only hnswlib index from file, which was originally
-    saved as a built CAGRA index. The loaded index is immutable and can only
-    be searched by the hnswlib wrapper in cuVS, as the format is not
+    Loads an HNSW index.
+    If the index was constructed with `hnsw.IndexParams(hierarchy="none")`,
+    then the loaded index is immutable and can only be searched by the hnswlib
+    wrapper in cuVS, as the format is not compatible with the original hnswlib.
+    However, if the index was constructed with
+    `hnsw.IndexParams(hierarchy="cpu")`, then the loaded index is mutable and
     compatible with the original hnswlib.
 
     Saving / loading the index is experimental. The serialization format is
@@ -156,6 +214,8 @@ def load(filename, dim, dtype, metric="sqeuclidean", resources=None):
 
     Parameters
     ----------
+    index_params : IndexParams
+        Parameters that were used to convert CAGRA index to HNSW index.
     filename : string
         Name of the file.
     dim : int
@@ -214,6 +274,7 @@ def load(filename, dim, dtype, metric="sqeuclidean", resources=None):
 
     check_cuvs(cuvsHnswDeserialize(
         res,
+        index_params.params,
         c_filename.c_str(),
         dim,
         distance_type,
@@ -224,26 +285,30 @@ def load(filename, dim, dtype, metric="sqeuclidean", resources=None):
 
 
 @auto_sync_resources
-def from_cagra(cagra.Index index, temporary_index_path=None, resources=None):
+def from_cagra(IndexParams index_params, cagra.Index cagra_index,
+               temporary_index_path=None, resources=None):
     """
-    Returns an hnsw base-layer-only index from a CAGRA index.
-
-    NOTE: This method uses the filesystem to write the CAGRA index in
-          `/tmp/<random_number>.bin` or the parameter `temporary_index_path`
-          if not None before reading it as an hnsw index,
-          then deleting the temporary file. The returned index is immutable
-          and can only be searched by the hnsw wrapper in cuVS, as the
-          format is not compatible with the original hnswlib library.
-          By `base_layer_only`, we mean that the hnsw index is created
-          without the additional layers that are used for the hierarchical
-          search in hnswlib. Instead, the base layer is used for the search.
+    Returns an HNSW index from a CAGRA index.
+
+    NOTE: When `index_params.hierarchy` is:
+          1. `NONE`: This method uses the filesystem to write the CAGRA index
+                     in `/tmp/<random_number>.bin` before reading it as an
+                     hnswlib index, then deleting the temporary file. The
+                     returned index is immutable and can only be searched by
+                     the hnswlib wrapper in cuVS, as the format is not
+                    compatible with the original hnswlib.
+          2. `CPU`: The returned index is mutable and can be extended with
+                    additional vectors. The serialized index is also compatible
+                    with the original hnswlib library.
 
     Saving / loading the index is experimental. The serialization format is
     subject to change.
 
     Parameters
     ----------
-    index : Index
+    index_params : IndexParams
+        Parameters to convert the CAGRA index to HNSW index.
+    cagra_index : cagra.Index
         Trained CAGRA index.
     temporary_index_path : string, default = None
         Path to save the temporary index file. If None, the temporary file
@@ -262,18 +327,107 @@ def from_cagra(cagra.Index index, temporary_index_path=None, resources=None):
     >>> # Build index
     >>> index = cagra.build(cagra.IndexParams(), dataset)
     >>> # Serialize the CAGRA index to hnswlib base layer only index format
-    >>> hnsw_index = hnsw.from_cagra(index)
+    >>> hnsw_index = hnsw.from_cagra(hnsw.IndexParams(), index)
     """
-    uuid_num = uuid.uuid4()
-    filename = temporary_index_path if temporary_index_path else \
-        f"/tmp/{uuid_num}.bin"
-    save(filename, index, resources=resources)
-    hnsw_index = load(filename, index.dim, np.dtype(index.active_index_type),
-                      "sqeuclidean", resources=resources)
-    os.remove(filename)
+
+    cdef Index hnsw_index = Index()
+    cdef cuvsResources_t res = <cuvsResources_t>resources.get_c_obj()
+    check_cuvs(cuvsHnswFromCagra(
+        res,
+        index_params.params,
+        cagra_index.index,
+        hnsw_index.index
+    ))
+
+    hnsw_index.trained = True
     return hnsw_index
 
 
+@auto_sync_resources
+def extend(ExtendParams extend_params, Index index, data, resources=None):
+    """
+    Extends the HNSW index with new data.
+
+    Parameters
+    ----------
+    extend_params : ExtendParams
+    index : Index
+        Trained HNSW index.
+    data : Host array interface compliant matrix shape (n_samples, dim)
+        Supported dtype [float32, int8, uint8]
+    {resources_docstring}
+
+    Examples
+    --------
+    >>> import numpy as np
+    >>> from cuvs.neighbors import hnsw, cagra
+    >>>
+    >>> n_samples = 50000
+    >>> n_features = 50
+    >>> dataset = np.random.random_sample((n_samples, n_features))
+    >>>
+    >>> # Build index
+    >>> index = cagra.build(hnsw.IndexParams(), dataset)
+    >>> # Load index
+    >>> hnsw_index = hnsw.from_cagra(hnsw.IndexParams(hierarchy="cpu"), index)
+    >>> # Extend the index with new data
+    >>> new_data = np.random.random_sample((n_samples, n_features))
+    >>> hnsw.extend(hnsw.ExtendParams(), hnsw_index, new_data)
+    """
+
+    data_ai = wrap_array(data)
+    _check_input_array(data_ai, [np.dtype('float32'),
+                                 np.dtype('uint8'),
+                                 np.dtype('int8')])
+
+    cdef cydlpack.DLManagedTensor* data_dlpack = cydlpack.dlpack_c(data_ai)
+    cdef cuvsResources_t res = <cuvsResources_t>resources.get_c_obj()
+
+    check_cuvs(cuvsHnswExtend(
+        res,
+        extend_params.params,
+        data_dlpack,
+        index.index
+    ))
+
+
+cdef class SearchParams:
+    """
+    HNSW search parameters
+
+    Parameters
+    ----------
+    ef: int, default = 200
+        Maximum number of candidate list size used during search.
+    num_threads: int, default = 0
+        Number of CPU threads used to increase search parallelism.
+        When set to 0, the number of threads is automatically determined
+        using OpenMP's `omp_get_max_threads()`.
+    """
+
+    cdef cuvsHnswSearchParams params
+
+    def __init__(self, *,
+                 ef=200,
+                 num_threads=0):
+        self.params.ef = ef
+        self.params.num_threads = num_threads
+
+    def __repr__(self):
+        attr_str = [attr + "=" + str(getattr(self, attr))
+                    for attr in [
+                        "ef", "num_threads"]]
+        return "SearchParams(type=HNSW, " + (", ".join(attr_str)) + ")"
+
+    @property
+    def ef(self):
+        return self.params.ef
+
+    @property
+    def num_threads(self):
+        return self.params.num_threads
+
+
 @auto_sync_resources
 @auto_convert_output
 def search(SearchParams search_params,
@@ -290,15 +444,15 @@ def search(SearchParams search_params,
     ----------
     search_params : SearchParams
     index : Index
-        Trained CAGRA index.
-    queries : CUDA array interface compliant matrix shape (n_samples, dim)
+        Trained HNSW index.
+    queries : CPU array interface compliant matrix shape (n_samples, dim)
         Supported dtype [float, int]
     k : int
         The number of neighbors.
-    neighbors : Optional CUDA array interface compliant matrix shape
+    neighbors : Optional CPU array interface compliant matrix shape
                 (n_queries, k), dtype uint64_t. If supplied, neighbor
                 indices will be written here in-place. (default None)
-    distances : Optional CUDA array interface compliant matrix shape
+    distances : Optional CPU array interface compliant matrix shape
                 (n_queries, k) If supplied, the distances to the
                 neighbors will be written here in-place. (default None)
     {resources_docstring}
@@ -323,7 +477,7 @@ def search(SearchParams search_params,
     ...     num_threads=0
     ... )
     >>> # Convert CAGRA index to HNSW
-    >>> hnsw_index = hnsw.from_cagra(index)
+    >>> hnsw_index = hnsw.from_cagra(hnsw.IndexParams(), index)
     >>> # Using a pooling allocator reduces overhead of temporary array
     >>> # creation during search. This is useful if multiple searches
     >>> # are performed with same query size.
diff --git a/python/cuvs/cuvs/test/test_hnsw.py b/python/cuvs/cuvs/test/test_hnsw.py
index 20a35401e..20f583ae8 100644
--- a/python/cuvs/cuvs/test/test_hnsw.py
+++ b/python/cuvs/cuvs/test/test_hnsw.py
@@ -32,6 +32,7 @@ def run_hnsw_build_search_test(
     build_algo="ivf_pq",
     intermediate_graph_degree=128,
     graph_degree=64,
+    hierarchy="none",
     search_params={},
 ):
     dataset = generate_data((n_rows, n_cols), dtype)
@@ -53,7 +54,8 @@ def run_hnsw_build_search_test(
 
     assert index.trained
 
-    hnsw_index = hnsw.from_cagra(index)
+    hnsw_params = hnsw.IndexParams(hierarchy=hierarchy, num_threads=1)
+    hnsw_index = hnsw.from_cagra(hnsw_params, index)
 
     queries = generate_data((n_queries, n_cols), dtype)
 
@@ -83,10 +85,93 @@ def run_hnsw_build_search_test(
 @pytest.mark.parametrize("num_threads", [2, 4])
 @pytest.mark.parametrize("metric", ["sqeuclidean", "inner_product"])
 @pytest.mark.parametrize("build_algo", ["ivf_pq", "nn_descent"])
-def test_hnsw(dtype, k, ef, num_threads, metric, build_algo):
+@pytest.mark.parametrize("hierarchy", ["none", "cpu"])
+def test_hnsw(dtype, k, ef, num_threads, metric, build_algo, hierarchy):
     # Note that inner_product tests use normalized input which we cannot
     # represent in int8, therefore we test only sqeuclidean metric here.
     run_hnsw_build_search_test(
+        dtype=dtype,
+        k=k,
+        metric=metric,
+        build_algo=build_algo,
+        hierarchy=hierarchy,
+        search_params={"ef": ef, "num_threads": num_threads},
+    )
+
+
+def run_hnsw_extend_test(
+    n_rows=10000,
+    add_rows=2000,
+    n_cols=10,
+    n_queries=100,
+    k=10,
+    dtype=np.float32,
+    metric="sqeuclidean",
+    build_algo="ivf_pq",
+    intermediate_graph_degree=128,
+    graph_degree=64,
+    search_params={},
+):
+    dataset = generate_data((n_rows, n_cols), dtype)
+    add_dataset = generate_data((add_rows, n_cols), dtype)
+    if metric == "inner_product":
+        dataset = normalize(dataset, norm="l2", axis=1)
+        add_dataset = normalize(add_dataset, norm="l2", axis=1)
+        if dtype in [np.int8, np.uint8]:
+            pytest.skip(
+                "inner_product metric is not supported for int8/uint8 data"
+            )
+        if build_algo == "nn_descent":
+            pytest.skip("inner_product metric is not supported for nn_descent")
+
+    build_params = cagra.IndexParams(
+        metric=metric,
+        intermediate_graph_degree=intermediate_graph_degree,
+        graph_degree=graph_degree,
+        build_algo=build_algo,
+    )
+
+    index = cagra.build(build_params, dataset)
+
+    assert index.trained
+
+    hnsw_params = hnsw.IndexParams(hierarchy="cpu", num_threads=1)
+    hnsw_index = hnsw.from_cagra(hnsw_params, index)
+    hnsw.extend(hnsw.ExtendParams(), hnsw_index, add_dataset)
+
+    queries = generate_data((n_queries, n_cols), dtype)
+
+    search_params = hnsw.SearchParams(**search_params)
+
+    out_dist, out_idx = hnsw.search(search_params, hnsw_index, queries, k)
+
+    # Calculate reference values with sklearn
+    skl_metric = {
+        "sqeuclidean": "sqeuclidean",
+        "inner_product": "cosine",
+        "euclidean": "euclidean",
+    }[metric]
+    nn_skl = NearestNeighbors(
+        n_neighbors=k, algorithm="brute", metric=skl_metric
+    )
+    nn_skl.fit(np.vstack([dataset, add_dataset]))
+    skl_dist, skl_idx = nn_skl.kneighbors(queries, return_distance=True)
+
+    recall = calc_recall(out_idx, skl_idx)
+    print(recall)
+    assert recall > 0.95
+
+
+@pytest.mark.parametrize("dtype", [np.float32, np.int8, np.uint8])
+@pytest.mark.parametrize("k", [10, 20])
+@pytest.mark.parametrize("ef", [30, 40])
+@pytest.mark.parametrize("num_threads", [2, 4])
+@pytest.mark.parametrize("metric", ["sqeuclidean"])
+@pytest.mark.parametrize("build_algo", ["ivf_pq", "nn_descent"])
+def test_hnsw_extend(dtype, k, ef, num_threads, metric, build_algo):
+    # Note that inner_product tests use normalized input which we cannot
+    # represent in int8, therefore we test only sqeuclidean metric here.
+    run_hnsw_extend_test(
         dtype=dtype,
         k=k,
         metric=metric,
diff --git a/python/cuvs_bench/cuvs_bench/config/algos/cuvs_cagra_hnswlib.yaml b/python/cuvs_bench/cuvs_bench/config/algos/cuvs_cagra_hnswlib.yaml
index f1a7f272c..90a561bca 100644
--- a/python/cuvs_bench/cuvs_bench/config/algos/cuvs_cagra_hnswlib.yaml
+++ b/python/cuvs_bench/cuvs_bench/config/algos/cuvs_cagra_hnswlib.yaml
@@ -4,8 +4,11 @@ constraints:
 groups:
   base:
     build:
-      graph_degree: [32, 64, 128, 256]
+      graph_degree: [32, 64, 96, 128]
       intermediate_graph_degree: [32, 64, 96, 128]
       graph_build_algo: ["NN_DESCENT"]
+      hierarchy: ["none", "cpu"]
+      ef_construction: [64, 128, 256, 512]
+      num_threads: [2, 5, 10]
     search:
       ef: [10, 20, 40, 60, 80, 120, 200, 400, 600, 800]

From e0aebfd0c499189585319f5a5fbf46dfb9ce04f9 Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Tue, 3 Dec 2024 14:41:47 -0600
Subject: [PATCH 2/3] add a README for wheels (#504)

Wheel-building CI jobs are failing like this:

> Checking
final_dist/cuvs_cu12-25.2.0a26-cp310-cp310-manylinux_2_28_aarch64.whl:
FAILED due to warnings
> WARNING `long_description` missing.
> Error: Process completed with exit code 1.

([build
link](https://github.com/rapidsai/cuvs/actions/runs/12133882036))

Looks like the root cause is a combination of the following:

* there was a new `twine` release (6.x) 3 days ago:
https://pypi.org/project/twine/#history
* it contains https://github.com/pypa/twine/pull/1168, which makes
`twine check --strict` fail if the wheel's `long_description` is empty
* the `cuvs` wheel README (used as the wheel `long_description`) is
empty

This proposes adding a small README, with just 2 sentences copied from
the project's root-level README, to get past that check.

## Notes for Reviewers

The `long_description` becomes the project homepage when a project is
hosted on PyPI. The wheels produced from this repo aren't currently
being published to pypi.org so this change won't be seen there, but a
more user-friendly README should be added if/when we decide to publish
`cuvs-cu{11,12}` to pypi.org.

ref: https://github.com/rapidsai/build-planning/issues/70
---
 python/cuvs/README.md | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/python/cuvs/README.md b/python/cuvs/README.md
index e69de29bb..27b494811 100644
--- a/python/cuvs/README.md
+++ b/python/cuvs/README.md
@@ -0,0 +1,3 @@
+# cuVS
+
+cuVS contains state-of-the-art implementations of several algorithms for running approximate nearest neighbors and clustering on the GPU. It can be used directly or through the various databases and other libraries that have integrated it. The primary goal of cuVS is to simplify the use of GPUs for vector similarity search and clustering.

From fbbca0570db27d476b500ef021c03482b0d989e2 Mon Sep 17 00:00:00 2001
From: Micka <mide@nvidia.com>
Date: Wed, 4 Dec 2024 00:57:52 +0100
Subject: [PATCH 3/3] Add Question Retrieval notebook using Milvus (#451)

This notebook is adapting the Question Retrieval nb to use Milvus.
It can serve as a good example on how to do Bulk ingest, how to use cuVS, and especially CAGRA+HNSW on Milvus

Authors:
  - Micka (https://github.com/lowener)
  - Corey J. Nolet (https://github.com/cjnolet)

Approvers:
  - Corey J. Nolet (https://github.com/cjnolet)

URL: https://github.com/rapidsai/cuvs/pull/451
---
 ...ectorSearch_QuestionRetrieval_Milvus.ipynb | 732 ++++++++++++++++++
 1 file changed, 732 insertions(+)
 create mode 100644 notebooks/VectorSearch_QuestionRetrieval_Milvus.ipynb

diff --git a/notebooks/VectorSearch_QuestionRetrieval_Milvus.ipynb b/notebooks/VectorSearch_QuestionRetrieval_Milvus.ipynb
new file mode 100644
index 000000000..09a6cca43
--- /dev/null
+++ b/notebooks/VectorSearch_QuestionRetrieval_Milvus.ipynb
@@ -0,0 +1,732 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "f5499b54",
+   "metadata": {},
+   "source": [
+    "\n",
+    "# Similar Questions Retrieval - Milvus - CAGRA-HNSW\n",
+    "\n",
+    "This notebook is inspired by the [similar search example of Sentence-Transformers](https://www.sbert.net/examples/applications/semantic-search/README.html#similar-questions-retrieval), and adapted to be used with [Milvus](https://milvus.io) and [cuVS](https://rapids.ai/cuvs/).\n",
+    "\n",
+    "The model was pre-trained on the [Natural Questions dataset](https://ai.google.com/research/NaturalQuestions). It consists of about 100k real Google search queries, together with an annotated passage from Wikipedia that provides the answer. It is an example of an asymmetric search task. As corpus, we use the smaller [Simple English Wikipedia](http://sbert.net/datasets/simplewiki-2020-11-01.jsonl.gz) so that it fits easily into memory.\n",
+    "\n",
+    "The steps to install the latest Milvus package are available in the [Milvus documentation](https://milvus.io/docs/quickstart.md)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e8d55ede",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-11-08T14:47:21.149465Z",
+     "iopub.status.busy": "2024-11-08T14:47:21.149218Z",
+     "iopub.status.idle": "2024-11-08T14:47:23.440275Z",
+     "shell.execute_reply": "2024-11-08T14:47:23.439436Z"
+    },
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "!pip install sentence_transformers torch pymilvus pymilvus[bulk_writer] dask dask[distributed]\n",
+    "\n",
+    "# Note: if you have a Hopper based GPU, like an H100, use these to install:\n",
+    "# pip install torch --index-url https://download.pytorch.org/whl/cu118\n",
+    "# pip install sentence_transformers"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "eb1e81c3",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-11-08T14:47:23.444058Z",
+     "iopub.status.busy": "2024-11-08T14:47:23.443683Z",
+     "iopub.status.idle": "2024-11-08T14:47:24.219903Z",
+     "shell.execute_reply": "2024-11-08T14:47:24.219228Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "!nvidia-smi"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ee4c5cc0",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-11-08T14:47:24.223131Z",
+     "iopub.status.busy": "2024-11-08T14:47:24.222874Z",
+     "iopub.status.idle": "2024-11-08T14:47:34.024085Z",
+     "shell.execute_reply": "2024-11-08T14:47:34.023435Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "import dask.array as da\n",
+    "import gzip\n",
+    "import json\n",
+    "import math\n",
+    "import numpy as np\n",
+    "import os\n",
+    "import pymilvus\n",
+    "import time\n",
+    "import torch\n",
+    "\n",
+    "from minio import Minio\n",
+    "from multiprocessing import Process\n",
+    "from sentence_transformers import SentenceTransformer, CrossEncoder, util\n",
+    "from typing import List\n",
+    "\n",
+    "\n",
+    "from pymilvus import (\n",
+    "    connections, utility\n",
+    ")\n",
+    "from pymilvus.bulk_writer import LocalBulkWriter, BulkFileType  # pip install pymilvus[bulk_writer]\n",
+    "\n",
+    "if not torch.cuda.is_available():\n",
+    "  print(\"Warning: No GPU found. Please add GPU to your notebook\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "47cabaca",
+   "metadata": {},
+   "source": [
+    "# Setup Milvus Collection"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5fcd259c",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-11-08T14:47:34.027677Z",
+     "iopub.status.busy": "2024-11-08T14:47:34.027288Z",
+     "iopub.status.idle": "2024-11-08T14:47:34.109212Z",
+     "shell.execute_reply": "2024-11-08T14:47:34.108609Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "DIM = 768\n",
+    "MILVUS_PORT = 30004\n",
+    "MILVUS_HOST = f\"http://localhost:{MILVUS_PORT}\"\n",
+    "ID_FIELD=\"id\"\n",
+    "EMBEDDING_FIELD=\"embedding\"\n",
+    "\n",
+    "collection_name = \"simple_wiki\"\n",
+    "\n",
+    "def get_milvus_client():\n",
+    "    return pymilvus.MilvusClient(uri=MILVUS_HOST)\n",
+    "\n",
+    "client = get_milvus_client()\n",
+    "\n",
+    "fields = [\n",
+    "    pymilvus.FieldSchema(name=ID_FIELD, dtype=pymilvus.DataType.INT64, is_primary=True),\n",
+    "    pymilvus.FieldSchema(name=EMBEDDING_FIELD, dtype=pymilvus.DataType.FLOAT_VECTOR, dim=DIM)\n",
+    "]\n",
+    "\n",
+    "schema = pymilvus.CollectionSchema(fields)\n",
+    "schema.verify()\n",
+    "\n",
+    "if collection_name in client.list_collections():\n",
+    "    print(f\"Collection '{collection_name}' already exists. Deleting collection...\")\n",
+    "    client.drop_collection(collection_name)\n",
+    "\n",
+    "client.create_collection(collection_name, schema=schema, dimension=DIM, vector_field_name=EMBEDDING_FIELD)\n",
+    "collection = pymilvus.Collection(name=collection_name, using=client._using)\n",
+    "collection.release()\n",
+    "collection.drop_index()\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "00bd20f5",
+   "metadata": {},
+   "source": [
+    "# Setup Sentence Transformer model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0a1a6307",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-11-08T14:47:34.111782Z",
+     "iopub.status.busy": "2024-11-08T14:47:34.111556Z",
+     "iopub.status.idle": "2024-11-08T14:47:39.654323Z",
+     "shell.execute_reply": "2024-11-08T14:47:39.653386Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "# We use the Bi-Encoder to encode all passages, so that we can use it with semantic search\n",
+    "model_name = 'nq-distilbert-base-v1'\n",
+    "bi_encoder = SentenceTransformer(model_name)\n",
+    "\n",
+    "# As dataset, we use Simple English Wikipedia. Compared to the full English wikipedia, it has only\n",
+    "# about 170k articles. We split these articles into paragraphs and encode them with the bi-encoder\n",
+    "\n",
+    "wikipedia_filepath = 'data/simplewiki-2020-11-01.jsonl.gz'\n",
+    "\n",
+    "if not os.path.exists(wikipedia_filepath):\n",
+    "    util.http_get('http://sbert.net/datasets/simplewiki-2020-11-01.jsonl.gz', wikipedia_filepath)\n",
+    "\n",
+    "passages = []\n",
+    "with gzip.open(wikipedia_filepath, 'rt', encoding='utf8') as fIn:\n",
+    "    for line in fIn:\n",
+    "        data = json.loads(line.strip())\n",
+    "        for paragraph in data['paragraphs']:\n",
+    "            # We encode the passages as [title, text]\n",
+    "            passages.append([data['title'], paragraph])\n",
+    "\n",
+    "# If you like, you can also limit the number of passages you want to use\n",
+    "print(\"Passages:\", len(passages))\n",
+    "\n",
+    "# To speed things up, pre-computed embeddings are downloaded.\n",
+    "# The provided file encoded the passages with the model 'nq-distilbert-base-v1'\n",
+    "if model_name == 'nq-distilbert-base-v1':\n",
+    "    embeddings_filepath = 'simplewiki-2020-11-01-nq-distilbert-base-v1.pt'\n",
+    "    if not os.path.exists(embeddings_filepath):\n",
+    "        util.http_get('http://sbert.net/datasets/simplewiki-2020-11-01-nq-distilbert-base-v1.pt', embeddings_filepath)\n",
+    "\n",
+    "    corpus_embeddings = torch.load(embeddings_filepath, map_location='cpu', weights_only=True).float()  # Convert embedding file to float\n",
+    "    #if torch.cuda.is_available():\n",
+    "    #    corpus_embeddings = corpus_embeddings.to('cuda')\n",
+    "else:  # Here, we compute the corpus_embeddings from scratch (which can take a while depending on the GPU)\n",
+    "    corpus_embeddings = bi_encoder.encode(passages, convert_to_tensor=True, show_progress_bar=True).to('cpu')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "1f4e9b9d",
+   "metadata": {},
+   "source": [
+    "# Vector Search using Milvus and RAPIDS cuVS \n",
+    "Now that our embeddings are ready to be indexed and that the model has been loaded, we can use Milvus and RAPIDS cuVS to do our vector search.\n",
+    "\n",
+    "This is done in 3 steps: First we ingest all the vectors in the Milvus collection, then we build the Milvus index, to finally search it."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "563751c1",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-11-08T14:47:39.658832Z",
+     "iopub.status.busy": "2024-11-08T14:47:39.658374Z",
+     "iopub.status.idle": "2024-11-08T14:49:47.244768Z",
+     "shell.execute_reply": "2024-11-08T14:49:47.244162Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "# minio\n",
+    "MINIO_PORT = 30009\n",
+    "MINIO_URL = f\"localhost:{MINIO_PORT}\"\n",
+    "MINIO_SECRET_KEY = \"minioadmin\"\n",
+    "MINIO_ACCESS_KEY = \"minioadmin\"\n",
+    "\n",
+    "def upload_to_minio(file_paths: List[List[str]], remote_paths: List[List[str]], bucket_name=\"milvus-bucket\"):\n",
+    "    minio_client = Minio(endpoint=MINIO_URL, access_key=MINIO_ACCESS_KEY, secret_key=MINIO_SECRET_KEY, secure=False)\n",
+    "    if not minio_client.bucket_exists(bucket_name):\n",
+    "        minio_client.make_bucket(bucket_name)\n",
+    "\n",
+    "    for local_batch, remote_batch in zip(file_paths, remote_paths):\n",
+    "        for local_file, remote_file in zip(local_batch, remote_batch):\n",
+    "            minio_client.fput_object(bucket_name, \n",
+    "                                     object_name=remote_file,\n",
+    "                                     file_path=local_file,\n",
+    "                                     part_size=512 * 1024 * 1024,\n",
+    "                                     num_parallel_uploads=5)\n",
+    "     \n",
+    "    \n",
+    "def ingest_data_bulk(collection_name, vectors, schema: pymilvus.CollectionSchema, log_times=True, bulk_writer_type=\"milvus\", debug=False):\n",
+    "    print(f\"-  Ingesting {len(vectors) // 1000}k vectors, Bulk\")\n",
+    "    tic = time.perf_counter()\n",
+    "    collection = pymilvus.Collection(collection_name, using=get_milvus_client()._using)\n",
+    "    remote_path = None\n",
+    "\n",
+    "    if bulk_writer_type == 'milvus':\n",
+    "        # # Prepare source data for faster ingestion\n",
+    "        writer = LocalBulkWriter(\n",
+    "            schema=schema,\n",
+    "            local_path='bulk_data',\n",
+    "            segment_size=512 * 1024 * 1024, # Default value\n",
+    "            file_type=BulkFileType.NPY\n",
+    "        )\n",
+    "        for id, vec in enumerate(vectors):\n",
+    "            writer.append_row({ID_FIELD: id, EMBEDDING_FIELD: vec})\n",
+    "\n",
+    "        if debug:\n",
+    "            print(writer.batch_files)\n",
+    "        def callback(file_list):\n",
+    "            if debug:\n",
+    "                print(f\"  -  Commit successful\")\n",
+    "                print(file_list)\n",
+    "        writer.commit(call_back=callback)\n",
+    "        files_to_upload = writer.batch_files\n",
+    "    elif bulk_writer_type == 'dask':\n",
+    "        # Prepare source data for faster ingestion\n",
+    "        if not os.path.isdir(\"bulk_data\"):\n",
+    "            os.mkdir(\"bulk_data\")\n",
+    "\n",
+    "        from dask.distributed import Client, LocalCluster\n",
+    "        cluster = LocalCluster(n_workers=1, threads_per_worker=1)\n",
+    "        client = Client(cluster)\n",
+    "\n",
+    "        chunk_size = 100000\n",
+    "        da_vectors = da.from_array(vectors, chunks=(chunk_size, vectors.shape[1]))\n",
+    "        da_ids = da.arange(len(vectors), chunks=(chunk_size,))\n",
+    "        da.to_npy_stack(\"bulk_data/da_embedding/\", da_vectors)\n",
+    "        da.to_npy_stack(\"bulk_data/da_id/\", da_ids)\n",
+    "        files_to_upload = []\n",
+    "        remote_path = []\n",
+    "        for chunk_nb in range(math.ceil(len(vectors) / chunk_size)):\n",
+    "            files_to_upload.append([f\"bulk_data/da_embedding/{chunk_nb}.npy\", f\"bulk_data/da_id/{chunk_nb}.npy\"])\n",
+    "            remote_path.append([f\"bulk_data/da_{chunk_nb}/embedding.npy\", f\"bulk_data/da__{chunk_nb}/id.npy\"])\n",
+    "\n",
+    "    elif bulk_writer_type == 'numpy':\n",
+    "        # Directly save NPY files\n",
+    "        np.save(\"bulk_data/embedding.npy\", vectors)\n",
+    "        np.save(\"bulk_data/id.npy\", np.arange(len(vectors)))\n",
+    "        files_to_upload = [[\"bulk_data/embedding.npy\", \"bulk_data/id.npy\"]]\n",
+    "    else:\n",
+    "        raise ValueError(\"Invalid bulk writer type\")\n",
+    "    \n",
+    "    toc = time.perf_counter()\n",
+    "    if log_times:\n",
+    "        print(f\"  -  File save time: {toc - tic:.2f} seconds\")\n",
+    "    # Import data\n",
+    "    if remote_path is None:\n",
+    "        remote_path = files_to_upload\n",
+    "    upload_to_minio(files_to_upload, remote_path)\n",
+    "    \n",
+    "    job_ids = [utility.do_bulk_insert(collection_name, batch, using=get_milvus_client()._using) for batch in remote_path]\n",
+    "\n",
+    "    while True:\n",
+    "        tasks = [utility.get_bulk_insert_state(job_id, using=get_milvus_client()._using) for job_id in job_ids]\n",
+    "        success = all(task.state_name == \"Completed\" for task in tasks)\n",
+    "        failure = any(task.state_name == \"Failed\" for task in tasks)\n",
+    "        for i in range(len(tasks)):\n",
+    "            task = tasks[i]\n",
+    "            if debug:\n",
+    "                print(f\"  -  Task {i}/{len(tasks)} state: {task.state_name}, Progress percent: {task.infos['progress_percent']}, Imported row count: {task.row_count}\")\n",
+    "            if task.state_name == \"Failed\":\n",
+    "                print(task)\n",
+    "        if success or failure:\n",
+    "            break\n",
+    "        time.sleep(2)\n",
+    "\n",
+    "    added_entities = str(sum([task.row_count for task in tasks]))\n",
+    "    failure = failure or added_entities != str(len(vectors))\n",
+    "    if failure:\n",
+    "        print(f\"-  Ingestion failed. Added entities: {added_entities}\")\n",
+    "    toc = time.perf_counter()\n",
+    "    if log_times:\n",
+    "        datasize = vectors.nbytes / 1024 / 1024\n",
+    "        print(f\"-  Ingestion time: {toc - tic:.2f} seconds. ({(datasize / (toc-tic)):.2f}MB/s)\")\n",
+    "\n",
+    "ingest_data_bulk(collection_name, np.array(corpus_embeddings), schema, bulk_writer_type='dask', log_times=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ad90b4be",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-11-08T14:49:47.247498Z",
+     "iopub.status.busy": "2024-11-08T14:49:47.247268Z",
+     "iopub.status.idle": "2024-11-08T14:50:00.737502Z",
+     "shell.execute_reply": "2024-11-08T14:50:00.736808Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "# Setups the IVFPQ index\n",
+    "\n",
+    "index_params = dict(\n",
+    "    index_type=\"GPU_IVF_PQ\",\n",
+    "    metric_type=\"L2\",\n",
+    "    params={\"nlist\": 150, # Number of clusters\n",
+    "            \"m\": 96})      # Product Quantization dimension\n",
+    "\n",
+    "# Drop the index if it exists\n",
+    "if collection.has_index():\n",
+    "    collection.release()\n",
+    "    collection.drop_index()\n",
+    "\n",
+    "# Create the index\n",
+    "tic = time.perf_counter()\n",
+    "collection.create_index(field_name=EMBEDDING_FIELD, index_params=index_params)\n",
+    "collection.load()\n",
+    "toc = time.perf_counter()\n",
+    "print(f\"-  Index creation time: {toc - tic:.4f} seconds. ({index_params})\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "c75acea7",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-11-08T14:50:00.740443Z",
+     "iopub.status.busy": "2024-11-08T14:50:00.740142Z",
+     "iopub.status.idle": "2024-11-08T14:50:00.745403Z",
+     "shell.execute_reply": "2024-11-08T14:50:00.744672Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "# Search the index\n",
+    "def search_cuvs_pq(query, top_k = 5, n_probe = 30):\n",
+    "    # Encode the query using the bi-encoder and find potentially relevant passages\n",
+    "    question_embedding = bi_encoder.encode(query, convert_to_tensor=True)\n",
+    "\n",
+    "    search_params = {\"nprobe\": n_probe}\n",
+    "    tic = time.perf_counter()\n",
+    "    hits = collection.search(\n",
+    "                data=np.array(question_embedding[None].cpu()), anns_field=EMBEDDING_FIELD, param=search_params, limit=top_k\n",
+    "            )\n",
+    "    toc = time.perf_counter()\n",
+    "\n",
+    "    # Output of top-k hits\n",
+    "    print(\"Input question:\", query)\n",
+    "    print(\"Results (after {:.3f} ms):\".format((toc - tic)*1000))\n",
+    "    for k in range(top_k):\n",
+    "        print(\"\\t{:.3f}\\t{}\".format(hits[0][k].distance, passages[hits[0][k].id]))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "07935bca",
+   "metadata": {},
+   "source": [
+    "The ideal use-case for the IVF-PQ algorithm is when there is a need to reduce the memory footprint while keeping a good accuracy."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c27d4715",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-11-08T14:50:00.748001Z",
+     "iopub.status.busy": "2024-11-08T14:50:00.747783Z",
+     "iopub.status.idle": "2024-11-08T14:50:01.785914Z",
+     "shell.execute_reply": "2024-11-08T14:50:01.785223Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "search_cuvs_pq(query=\"Who was Grace Hopper?\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "bc375518",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-11-08T14:50:01.788877Z",
+     "iopub.status.busy": "2024-11-08T14:50:01.788640Z",
+     "iopub.status.idle": "2024-11-08T14:50:01.813820Z",
+     "shell.execute_reply": "2024-11-08T14:50:01.813153Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "search_cuvs_pq(query=\"Who was Alan Turing?\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ab154181",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-11-08T14:50:01.816625Z",
+     "iopub.status.busy": "2024-11-08T14:50:01.816362Z",
+     "iopub.status.idle": "2024-11-08T14:50:01.839593Z",
+     "shell.execute_reply": "2024-11-08T14:50:01.838986Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "search_cuvs_pq(query = \"What is creating tides?\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "836344ec",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-11-08T14:50:01.842319Z",
+     "iopub.status.busy": "2024-11-08T14:50:01.842022Z",
+     "iopub.status.idle": "2024-11-08T14:50:15.969324Z",
+     "shell.execute_reply": "2024-11-08T14:50:15.968562Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "# Drop the current index if it exists\n",
+    "if collection.has_index():\n",
+    "    collection.release()\n",
+    "    collection.drop_index()\n",
+    "\n",
+    "# Create the IVF Flat index\n",
+    "index_params = dict(\n",
+    "    index_type=\"GPU_IVF_FLAT\",\n",
+    "    metric_type=\"L2\",\n",
+    "    params={\"nlist\": 150}) # Number of clusters)\n",
+    "tic = time.perf_counter()\n",
+    "collection.create_index(field_name=EMBEDDING_FIELD, index_params=index_params)\n",
+    "collection.load()\n",
+    "toc = time.perf_counter()\n",
+    "print(f\"-  Index creation time: {toc - tic:.4f} seconds. ({index_params})\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "2d6017ed",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-11-08T14:50:15.972764Z",
+     "iopub.status.busy": "2024-11-08T14:50:15.972368Z",
+     "iopub.status.idle": "2024-11-08T14:50:15.977806Z",
+     "shell.execute_reply": "2024-11-08T14:50:15.977064Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "def search_cuvs_flat(query, top_k = 5, n_probe = 30):\n",
+    "    # Encode the query using the bi-encoder and find potentially relevant passages\n",
+    "    question_embedding = bi_encoder.encode(query, convert_to_tensor=True)\n",
+    "    \n",
+    "    search_params = {\"nprobe\": n_probe}\n",
+    "    tic = time.perf_counter()\n",
+    "    hits = collection.search(\n",
+    "                data=np.array(question_embedding[None].cpu()), anns_field=EMBEDDING_FIELD, param=search_params, limit=top_k\n",
+    "            )\n",
+    "    toc = time.perf_counter()\n",
+    "\n",
+    "    # Output of top-k hits\n",
+    "    print(\"Input question:\", query)\n",
+    "    print(\"Results (after {:.3f} ms):\".format((toc - tic)*1000))\n",
+    "    for k in range(top_k):\n",
+    "        print(\"\\t{:.3f}\\t{}\".format(hits[0][k].distance, passages[hits[0][k].id]))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f5cfb644",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-11-08T14:50:15.980796Z",
+     "iopub.status.busy": "2024-11-08T14:50:15.980408Z",
+     "iopub.status.idle": "2024-11-08T14:50:16.009271Z",
+     "shell.execute_reply": "2024-11-08T14:50:16.008579Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "search_cuvs_flat(query=\"Who was Grace Hopper?\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b5694d00",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-11-08T14:50:16.012253Z",
+     "iopub.status.busy": "2024-11-08T14:50:16.011924Z",
+     "iopub.status.idle": "2024-11-08T14:50:16.043432Z",
+     "shell.execute_reply": "2024-11-08T14:50:16.042751Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "search_cuvs_flat(query=\"Who was Alan Turing?\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "fcfc3c5b",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-11-08T14:50:16.046439Z",
+     "iopub.status.busy": "2024-11-08T14:50:16.046093Z",
+     "iopub.status.idle": "2024-11-08T14:50:16.071322Z",
+     "shell.execute_reply": "2024-11-08T14:50:16.070614Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "search_cuvs_flat(query = \"What is creating tides?\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a59d7b32-0832-4c3a-864e-aeb2e6e7fe1f",
+   "metadata": {},
+   "source": [
+    "## Using CAGRA: Hybrid GPU-CPU graph-based Vector Search\n",
+    "\n",
+    "CAGRA is a graph-based nearest neighbors implementation with state-of-the art performance for both small- and large-batch sized vector searches. \n",
+    "\n",
+    "CAGRA follows the same steps as IVF-FLAT and IVF-PQ in Milvus, but is also able to be adapted for querying on CPU.\n",
+    "This means that CAGRA is able to profit from a high training speed on GPU, as well as a low inference time on CPU, that minimize latency even on the smallest queries."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e5ce4dab",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-11-08T14:50:16.074449Z",
+     "iopub.status.busy": "2024-11-08T14:50:16.074128Z",
+     "iopub.status.idle": "2024-11-08T14:50:30.479027Z",
+     "shell.execute_reply": "2024-11-08T14:50:30.478265Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "# Drop the current index if it exists\n",
+    "if collection.has_index():\n",
+    "    collection.release()\n",
+    "    collection.drop_index()\n",
+    "\n",
+    "# Create the IVF Flat index\n",
+    "index_params = dict(\n",
+    "    index_type=\"GPU_CAGRA\",\n",
+    "    metric_type=\"L2\",\n",
+    "    params={\"graph_degree\": 64, \"intermediate_graph_degree\": 128, \"build_algo\": \"NN_DESCENT\", \"adapt_for_cpu\": True})\n",
+    "tic = time.perf_counter()\n",
+    "collection.create_index(field_name=EMBEDDING_FIELD, index_params=index_params)\n",
+    "collection.load()\n",
+    "toc = time.perf_counter()\n",
+    "print(f\"-  Index creation time: {toc - tic:.4f} seconds. ({index_params})\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "id": "df229e21-f6b6-4d6c-ad54-2724f8738934",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-11-08T14:50:30.481748Z",
+     "iopub.status.busy": "2024-11-08T14:50:30.481474Z",
+     "iopub.status.idle": "2024-11-08T14:50:30.486324Z",
+     "shell.execute_reply": "2024-11-08T14:50:30.485696Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "def search_cuvs_cagra(query, top_k = 5, itopk = 32):\n",
+    "    # Encode the query using the bi-encoder and find potentially relevant passages\n",
+    "    question_embedding = bi_encoder.encode(query, convert_to_tensor=True)\n",
+    "\n",
+    "    search_params = {\"params\": {\"itopk\": itopk, \"ef\": 35}}\n",
+    "    tic = time.perf_counter()\n",
+    "    hits = collection.search(\n",
+    "                data=np.array(question_embedding[None].cpu()), anns_field=EMBEDDING_FIELD, param=search_params, limit=top_k\n",
+    "            )\n",
+    "    toc = time.perf_counter()\n",
+    "\n",
+    "    # Output of top-k hits\n",
+    "    print(\"Input question:\", query)\n",
+    "    print(\"Results (after {:.3f} ms):\".format((toc - tic)*1000))\n",
+    "    for k in range(top_k):\n",
+    "        print(\"\\t{:.3f}\\t{}\".format(hits[0][k].distance, passages[hits[0][k].id]))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b5e862fd-b7e5-4423-8fbf-36918f02c8f3",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-11-08T14:50:30.489077Z",
+     "iopub.status.busy": "2024-11-08T14:50:30.488790Z",
+     "iopub.status.idle": "2024-11-08T14:50:30.513998Z",
+     "shell.execute_reply": "2024-11-08T14:50:30.513319Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "search_cuvs_cagra(query=\"Who was Grace Hopper?\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "cb8a5b7b",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-11-08T14:50:30.516748Z",
+     "iopub.status.busy": "2024-11-08T14:50:30.516521Z",
+     "iopub.status.idle": "2024-11-08T14:50:30.538982Z",
+     "shell.execute_reply": "2024-11-08T14:50:30.538269Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "search_cuvs_cagra(query=\"Who was Alan Turing?\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4c89810a",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-11-08T14:50:30.541508Z",
+     "iopub.status.busy": "2024-11-08T14:50:30.541287Z",
+     "iopub.status.idle": "2024-11-08T14:50:30.562722Z",
+     "shell.execute_reply": "2024-11-08T14:50:30.562085Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "search_cuvs_cagra(query=\"What is creating tides?\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}