diff --git a/cpp/bench/ann/src/cuvs/cuvs_cagra_hnswlib_wrapper.h b/cpp/bench/ann/src/cuvs/cuvs_cagra_hnswlib_wrapper.h index e4169f6f8..6670ed892 100644 --- a/cpp/bench/ann/src/cuvs/cuvs_cagra_hnswlib_wrapper.h +++ b/cpp/bench/ann/src/cuvs/cuvs_cagra_hnswlib_wrapper.h @@ -17,7 +17,9 @@ #include "cuvs_cagra_wrapper.h" #include +#include +#include #include namespace cuvs::bench { @@ -90,8 +92,13 @@ void cuvs_cagra_hnswlib::build(const T* dataset, size_t nrow) auto host_dataset_view = raft::make_host_matrix_view(dataset, nrow, this->dim_); auto opt_dataset_view = std::optional>(std::move(host_dataset_view)); - hnsw_index_ = cuvs::neighbors::hnsw::from_cagra( + const auto start_clock = std::chrono::system_clock::now(); + hnsw_index_ = cuvs::neighbors::hnsw::from_cagra( handle_, build_param_.hnsw_index_params, *cagra_index, opt_dataset_view); + int time = + std::chrono::duration_cast(std::chrono::system_clock::now() - start_clock) + .count(); + RAFT_LOG_DEBUG("Graph saved to HNSW format in %d:%d min", time / 60, time % 60); } template diff --git a/cpp/include/cuvs/neighbors/cagra.hpp b/cpp/include/cuvs/neighbors/cagra.hpp index a4684ce26..597e186d7 100644 --- a/cpp/include/cuvs/neighbors/cagra.hpp +++ b/cpp/include/cuvs/neighbors/cagra.hpp @@ -1599,11 +1599,16 @@ void deserialize(raft::resources const& handle, * @param[in] handle the raft handle * @param[in] os output stream * @param[in] index CAGRA index + * @param[in] dataset [optional] host array that stores the dataset, required if the index + * does not contain the dataset. * */ -void serialize_to_hnswlib(raft::resources const& handle, - std::ostream& os, - const cuvs::neighbors::cagra::index& index); +void serialize_to_hnswlib( + raft::resources const& handle, + std::ostream& os, + const cuvs::neighbors::cagra::index& index, + std::optional> dataset = + std::nullopt); /** * Save a CAGRA build index in hnswlib base-layer-only serialized format @@ -1628,11 +1633,16 @@ void serialize_to_hnswlib(raft::resources const& handle, * @param[in] handle the raft handle * @param[in] filename the file name for saving the index * @param[in] index CAGRA index + * @param[in] dataset [optional] host array that stores the dataset, required if the index + * does not contain the dataset. * */ -void serialize_to_hnswlib(raft::resources const& handle, - const std::string& filename, - const cuvs::neighbors::cagra::index& index); +void serialize_to_hnswlib( + raft::resources const& handle, + const std::string& filename, + const cuvs::neighbors::cagra::index& index, + std::optional> dataset = + std::nullopt); /** * Write the CAGRA built index as a base layer HNSW index to an output stream @@ -1656,11 +1666,16 @@ void serialize_to_hnswlib(raft::resources const& handle, * @param[in] handle the raft handle * @param[in] os output stream * @param[in] index CAGRA index + * @param[in] dataset [optional] host array that stores the dataset, required if the index + * does not contain the dataset. * */ -void serialize_to_hnswlib(raft::resources const& handle, - std::ostream& os, - const cuvs::neighbors::cagra::index& index); +void serialize_to_hnswlib( + raft::resources const& handle, + std::ostream& os, + const cuvs::neighbors::cagra::index& index, + std::optional> dataset = + std::nullopt); /** * Save a CAGRA build index in hnswlib base-layer-only serialized format @@ -1685,11 +1700,16 @@ void serialize_to_hnswlib(raft::resources const& handle, * @param[in] handle the raft handle * @param[in] filename the file name for saving the index * @param[in] index CAGRA index + * @param[in] dataset [optional] host array that stores the dataset, required if the index + * does not contain the dataset. * */ -void serialize_to_hnswlib(raft::resources const& handle, - const std::string& filename, - const cuvs::neighbors::cagra::index& index); +void serialize_to_hnswlib( + raft::resources const& handle, + const std::string& filename, + const cuvs::neighbors::cagra::index& index, + std::optional> dataset = + std::nullopt); /** * Write the CAGRA built index as a base layer HNSW index to an output stream @@ -1713,11 +1733,16 @@ void serialize_to_hnswlib(raft::resources const& handle, * @param[in] handle the raft handle * @param[in] os output stream * @param[in] index CAGRA index + * @param[in] dataset [optional] host array that stores the dataset, required if the index + * does not contain the dataset. * */ -void serialize_to_hnswlib(raft::resources const& handle, - std::ostream& os, - const cuvs::neighbors::cagra::index& index); +void serialize_to_hnswlib( + raft::resources const& handle, + std::ostream& os, + const cuvs::neighbors::cagra::index& index, + std::optional> dataset = + std::nullopt); /** * Save a CAGRA build index in hnswlib base-layer-only serialized format @@ -1742,11 +1767,16 @@ void serialize_to_hnswlib(raft::resources const& handle, * @param[in] handle the raft handle * @param[in] filename the file name for saving the index * @param[in] index CAGRA index + * @param[in] dataset [optional] host array that stores the dataset, required if the index + * does not contain the dataset. * */ -void serialize_to_hnswlib(raft::resources const& handle, - const std::string& filename, - const cuvs::neighbors::cagra::index& index); +void serialize_to_hnswlib( + raft::resources const& handle, + const std::string& filename, + const cuvs::neighbors::cagra::index& index, + std::optional> dataset = + std::nullopt); /** * @} diff --git a/cpp/src/neighbors/cagra_serialize.cuh b/cpp/src/neighbors/cagra_serialize.cuh index e193c0630..1b153b2ce 100644 --- a/cpp/src/neighbors/cagra_serialize.cuh +++ b/cpp/src/neighbors/cagra_serialize.cuh @@ -20,51 +20,56 @@ namespace cuvs::neighbors::cagra { -#define CUVS_INST_CAGRA_SERIALIZE(DTYPE) \ - void serialize(raft::resources const& handle, \ - const std::string& filename, \ - const cuvs::neighbors::cagra::index& index, \ - bool include_dataset) \ - { \ - cuvs::neighbors::cagra::detail::serialize( \ - handle, filename, index, include_dataset); \ - }; \ - \ - void deserialize(raft::resources const& handle, \ - const std::string& filename, \ - cuvs::neighbors::cagra::index* index) \ - { \ - cuvs::neighbors::cagra::detail::deserialize(handle, filename, index); \ - }; \ - void serialize(raft::resources const& handle, \ - std::ostream& os, \ - const cuvs::neighbors::cagra::index& index, \ - bool include_dataset) \ - { \ - cuvs::neighbors::cagra::detail::serialize( \ - handle, os, index, include_dataset); \ - } \ - \ - void deserialize(raft::resources const& handle, \ - std::istream& is, \ - cuvs::neighbors::cagra::index* index) \ - { \ - cuvs::neighbors::cagra::detail::deserialize(handle, is, index); \ - } \ - \ - void serialize_to_hnswlib(raft::resources const& handle, \ - std::ostream& os, \ - const cuvs::neighbors::cagra::index& index) \ - { \ - cuvs::neighbors::cagra::detail::serialize_to_hnswlib(handle, os, index); \ - } \ - \ - void serialize_to_hnswlib(raft::resources const& handle, \ - const std::string& filename, \ - const cuvs::neighbors::cagra::index& index) \ - { \ - cuvs::neighbors::cagra::detail::serialize_to_hnswlib( \ - handle, filename, index); \ +#define CUVS_INST_CAGRA_SERIALIZE(DTYPE) \ + void serialize(raft::resources const& handle, \ + const std::string& filename, \ + const cuvs::neighbors::cagra::index& index, \ + bool include_dataset) \ + { \ + cuvs::neighbors::cagra::detail::serialize( \ + handle, filename, index, include_dataset); \ + }; \ + \ + void deserialize(raft::resources const& handle, \ + const std::string& filename, \ + cuvs::neighbors::cagra::index* index) \ + { \ + cuvs::neighbors::cagra::detail::deserialize(handle, filename, index); \ + }; \ + void serialize(raft::resources const& handle, \ + std::ostream& os, \ + const cuvs::neighbors::cagra::index& index, \ + bool include_dataset) \ + { \ + cuvs::neighbors::cagra::detail::serialize( \ + handle, os, index, include_dataset); \ + } \ + \ + void deserialize(raft::resources const& handle, \ + std::istream& is, \ + cuvs::neighbors::cagra::index* index) \ + { \ + cuvs::neighbors::cagra::detail::deserialize(handle, is, index); \ + } \ + \ + void serialize_to_hnswlib( \ + raft::resources const& handle, \ + std::ostream& os, \ + const cuvs::neighbors::cagra::index& index, \ + std::optional> dataset) \ + { \ + cuvs::neighbors::cagra::detail::serialize_to_hnswlib( \ + handle, os, index, dataset); \ + } \ + \ + void serialize_to_hnswlib( \ + raft::resources const& handle, \ + const std::string& filename, \ + const cuvs::neighbors::cagra::index& index, \ + std::optional> dataset) \ + { \ + cuvs::neighbors::cagra::detail::serialize_to_hnswlib( \ + handle, filename, index, dataset); \ } } // namespace cuvs::neighbors::cagra diff --git a/cpp/src/neighbors/detail/cagra/cagra_serialize.cuh b/cpp/src/neighbors/detail/cagra/cagra_serialize.cuh index c83da7bb1..4bd761dc6 100644 --- a/cpp/src/neighbors/detail/cagra/cagra_serialize.cuh +++ b/cpp/src/neighbors/detail/cagra/cagra_serialize.cuh @@ -30,6 +30,7 @@ #include #include #include +#include #include namespace cuvs::neighbors::cagra::detail { @@ -96,16 +97,19 @@ void serialize(raft::resources const& res, } template -void serialize_to_hnswlib(raft::resources const& res, - std::ostream& os, - const cuvs::neighbors::cagra::index& index_) +void serialize_to_hnswlib( + raft::resources const& res, + std::ostream& os, + const cuvs::neighbors::cagra::index& index_, + std::optional> dataset) { // static_assert(std::is_same_v or std::is_same_v, // "An hnswlib index can only be trained with int32 or uint32 IdxT"); + int dim = (dataset) ? dataset->extent(1) : index_.dim(); raft::common::nvtx::range fun_scope("cagra::serialize"); RAFT_LOG_DEBUG("Saving CAGRA index to hnswlib format, size %zu, dim %u", static_cast(index_.size()), - index_.dim()); + dim); // offset_level_0 std::size_t offset_level_0 = 0; @@ -119,8 +123,8 @@ void serialize_to_hnswlib(raft::resources const& res, // Example:M: 16, dim = 128, data_t = float, index_t = uint32_t, list_size_type = uint32_t, // labeltype: size_t size_data_per_element_ = M * 2 * sizeof(index_t) + sizeof(list_size_type) + // dim * sizeof(T) + sizeof(labeltype) - auto size_data_per_element = static_cast(index_.graph_degree() * sizeof(IdxT) + 4 + - index_.dim() * sizeof(T) + 8); + auto size_data_per_element = + static_cast(index_.graph_degree() * sizeof(IdxT) + 4 + dim * sizeof(T) + 8); os.write(reinterpret_cast(&size_data_per_element), sizeof(std::size_t)); // label_offset std::size_t label_offset = size_data_per_element - 8; @@ -150,19 +154,29 @@ void serialize_to_hnswlib(raft::resources const& res, std::size_t efConstruction = 500; os.write(reinterpret_cast(&efConstruction), sizeof(std::size_t)); - auto dataset = index_.dataset(); // Remove padding before saving the dataset - auto host_dataset = raft::make_host_matrix(dataset.extent(0), dataset.extent(1)); - RAFT_CUDA_TRY(cudaMemcpy2DAsync(host_dataset.data_handle(), - sizeof(T) * host_dataset.extent(1), - dataset.data_handle(), - sizeof(T) * dataset.stride(0), - sizeof(T) * host_dataset.extent(1), - dataset.extent(0), - cudaMemcpyDefault, - raft::resource::get_cuda_stream(res))); - raft::resource::sync_stream(res); - + raft::host_matrix host_dataset = raft::make_host_matrix(0, 0); + raft::host_matrix_view host_dataset_view; + if (dataset) { + host_dataset_view = *dataset; + } else { + auto dataset = index_.dataset(); + RAFT_EXPECTS(dataset.size() > 0, + "Invalid CAGRA dataset of size 0 during serialization, shape %zux%zu", + static_cast(dataset.extent(0)), + static_cast(dataset.extent(1))); + host_dataset = raft::make_host_matrix(dataset.extent(0), dataset.extent(1)); + RAFT_CUDA_TRY(cudaMemcpy2DAsync(host_dataset.data_handle(), + sizeof(T) * host_dataset.extent(1), + dataset.data_handle(), + sizeof(T) * dataset.stride(0), + sizeof(T) * host_dataset.extent(1), + dataset.extent(0), + cudaMemcpyDefault, + raft::resource::get_cuda_stream(res))); + raft::resource::sync_stream(res); + host_dataset_view = raft::make_const_mdspan(host_dataset.view()); + } auto graph = index_.graph(); auto host_graph = raft::make_host_matrix(graph.extent(0), graph.extent(1)); @@ -172,23 +186,48 @@ void serialize_to_hnswlib(raft::resources const& res, raft::resource::get_cuda_stream(res)); raft::resource::sync_stream(res); + size_t d_report_offset = index_.size() / 10; // Report progress in 10% steps. + size_t next_report_offset = d_report_offset; + const auto start_clock = std::chrono::system_clock::now(); // Write one dataset and graph row at a time + RAFT_EXPECTS(host_graph.stride(1) == 1, "serialize_to_hnswlib expects row_major graph"); + RAFT_EXPECTS(host_dataset_view.stride(1) == 1, "serialize_to_hnswlib expects row_major dataset"); + + size_t bytes_written = 0; + float GiB = 1 << 30; for (std::size_t i = 0; i < index_.size(); i++) { auto graph_degree = static_cast(index_.graph_degree()); os.write(reinterpret_cast(&graph_degree), sizeof(int)); - for (std::size_t j = 0; j < index_.graph_degree(); ++j) { - auto graph_elem = host_graph(i, j); - os.write(reinterpret_cast(&graph_elem), sizeof(IdxT)); - } - - auto data_row = host_dataset.data_handle() + (index_.dim() * i); - for (std::size_t j = 0; j < index_.dim(); ++j) { - auto data_elem = static_cast(host_dataset(i, j)); - os.write(reinterpret_cast(&data_elem), sizeof(T)); - } + IdxT* graph_row = &host_graph(i, 0); + os.write(reinterpret_cast(graph_row), sizeof(IdxT) * index_.graph_degree()); + const T* data_row = &host_dataset_view(i, 0); + os.write(reinterpret_cast(data_row), sizeof(T) * dim); os.write(reinterpret_cast(&i), sizeof(std::size_t)); + + bytes_written += + dim * sizeof(T) + index_.graph_degree() * sizeof(IdxT) + sizeof(int) + sizeof(size_t); + const auto end_clock = std::chrono::system_clock::now(); + if (!os.good()) { RAFT_FAIL("Error writing HNSW file, row %zu", i); } + if (i > next_report_offset) { + next_report_offset += d_report_offset; + const auto time = + std::chrono::duration_cast(end_clock - start_clock).count() * + 1e-6; + float throughput = bytes_written / GiB / time; + float rows_throughput = i / time; + float ETA = (index_.size() - i) / rows_throughput; + RAFT_LOG_DEBUG( + "# Writing rows %12lu / %12lu (%3.2f %%), %3.2f GiB/sec, ETA %d:%3.1f, written %3.2f GiB\r", + i, + index_.size(), + i / static_cast(index_.size()) * 100, + throughput, + int(ETA / 60), + std::fmod(ETA, 60.0f), + bytes_written / GiB); + } } for (std::size_t i = 0; i < index_.size(); i++) { @@ -199,14 +238,16 @@ void serialize_to_hnswlib(raft::resources const& res, } template -void serialize_to_hnswlib(raft::resources const& res, - const std::string& filename, - const cuvs::neighbors::cagra::index& index_) +void serialize_to_hnswlib( + raft::resources const& res, + const std::string& filename, + const cuvs::neighbors::cagra::index& index_, + std::optional> dataset) { std::ofstream of(filename, std::ios::out | std::ios::binary); if (!of) { RAFT_FAIL("Cannot open file %s", filename.c_str()); } - detail::serialize_to_hnswlib(res, of, index_); + detail::serialize_to_hnswlib(res, of, index_, dataset); of.close(); if (!of) { RAFT_FAIL("Error writing output %s", filename.c_str()); } diff --git a/cpp/src/neighbors/detail/hnsw.hpp b/cpp/src/neighbors/detail/hnsw.hpp index e129d23e8..5447ae07a 100644 --- a/cpp/src/neighbors/detail/hnsw.hpp +++ b/cpp/src/neighbors/detail/hnsw.hpp @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -163,14 +164,15 @@ template std::enable_if_t>> from_cagra( raft::resources const& res, const index_params& params, - const cuvs::neighbors::cagra::index& cagra_index) + const cuvs::neighbors::cagra::index& cagra_index, + std::optional> dataset) { std::random_device dev; std::mt19937 rng(dev()); std::uniform_int_distribution dist(0); auto uuid = std::to_string(dist(rng)); std::string filepath = "/tmp/" + uuid + ".bin"; - cuvs::neighbors::cagra::serialize_to_hnswlib(res, filepath, cagra_index); + cuvs::neighbors::cagra::serialize_to_hnswlib(res, filepath, cagra_index, dataset); index* hnsw_index = nullptr; cuvs::neighbors::hnsw::deserialize( @@ -195,6 +197,10 @@ std::enable_if_t>> fro } else { // move dataset to host, remove padding auto cagra_dataset = cagra_index.dataset(); + RAFT_EXPECTS(cagra_dataset.size() > 0, + "Invalid CAGRA dataset of size 0, shape %zux%zu", + static_cast(cagra_dataset.extent(0)), + static_cast(cagra_dataset.extent(1))); host_dataset = raft::make_host_matrix(cagra_dataset.extent(0), cagra_dataset.extent(1)); RAFT_CUDA_TRY(cudaMemcpy2DAsync(host_dataset.data_handle(), @@ -209,9 +215,9 @@ std::enable_if_t>> fro host_dataset_view = host_dataset.view(); } // build upper layers of hnsw index - auto hnsw_index = - std::make_unique>(cagra_index.dim(), cagra_index.metric(), hierarchy); - auto appr_algo = std::make_unique::type>>( + int dim = host_dataset_view.extent(1); + auto hnsw_index = std::make_unique>(dim, cagra_index.metric(), hierarchy); + auto appr_algo = std::make_unique::type>>( hnsw_index->get_space(), host_dataset_view.extent(0), cagra_index.graph().extent(1) / 2, @@ -256,7 +262,7 @@ std::unique_ptr> from_cagra( std::optional> dataset) { if (params.hierarchy == HnswHierarchy::NONE) { - return from_cagra(res, params, cagra_index); + return from_cagra(res, params, cagra_index, dataset); } else if (params.hierarchy == HnswHierarchy::CPU) { return from_cagra(res, params, cagra_index, dataset); }