Skip to content

Commit

Permalink
Merge branch 'update-cagra-graph-extend' of github.com:enp1s0/cuvs in…
Browse files Browse the repository at this point in the history
…to update-cagra-graph-extend
  • Loading branch information
enp1s0 committed Jan 20, 2025
2 parents 5e2d306 + 54334d7 commit 79e14f7
Show file tree
Hide file tree
Showing 17 changed files with 199 additions and 245 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ There are several benefits to using cuVS and GPUs for vector search, including
6. Multiple language support
7. Building blocks for composing new or accelerating existing algorithms

In addition to the items above, cuVS takes on the burden of keeping non-trivial accelerated code up to date as new NVIDIA architectures and CUDA versions are released. This provides a deslightful development experimence, guaranteeing that any libraries, databases, or applications built on top of it will always be getting the best performance and scale.
In addition to the items above, cuVS takes on the burden of keeping non-trivial accelerated code up to date as new NVIDIA architectures and CUDA versions are released. This provides a delightful development experience, guaranteeing that any libraries, databases, or applications built on top of it will always be getting the best performance and scale.

## cuVS Technology Stack

Expand Down
2 changes: 1 addition & 1 deletion ci/run_cuvs_pytests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,4 @@ set -euo pipefail
# Support invoking run_pytests.sh outside the script directory
cd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"/../python/cuvs/cuvs

pytest --cache-clear --verbose "$@" tests
pytest --cache-clear --verbose "$@" test
33 changes: 32 additions & 1 deletion cpp/include/cuvs/core/detail/interop.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,6 @@ inline MdspanType from_dlpack(DLManagedTensor* managed_tensor)
RAFT_EXPECTS(to_data_type.lanes == tensor.dtype.lanes,
"lanes mismatch between return mdspan and DLTensor");
RAFT_EXPECTS(tensor.dtype.lanes == 1, "More than 1 DLTensor lanes not supported");
RAFT_EXPECTS(tensor.strides == nullptr, "Strided memory layout for DLTensor not supported");

auto to_device = accessor_type_to_DLDevice<typename MdspanType::accessor_type>();
if (to_device.device_type == kDLCUDA) {
Expand All @@ -110,4 +109,36 @@ inline MdspanType from_dlpack(DLManagedTensor* managed_tensor)
return MdspanType{reinterpret_cast<typename MdspanType::data_handle_type>(tensor.data), exts};
}

inline bool is_f_contiguous(DLManagedTensor* managed_tensor)
{
auto tensor = managed_tensor->dl_tensor;

if (!tensor.strides) { return false; }
int64_t expected_stride = 1;
for (int64_t i = 0; i < tensor.ndim; ++i) {
if (tensor.strides[i] != expected_stride) { return false; }
expected_stride *= tensor.shape[i];
}

return true;
}

inline bool is_c_contiguous(DLManagedTensor* managed_tensor)
{
auto tensor = managed_tensor->dl_tensor;

if (!tensor.strides) {
// no stride information indicates a row-major tensor according to the dlpack spec
return true;
}

int64_t expected_stride = 1;
for (int64_t i = tensor.ndim - 1; i >= 0; --i) {
if (tensor.strides[i] != expected_stride) { return false; }
expected_stride *= tensor.shape[i];
}

return true;
}

} // namespace cuvs::core::detail
18 changes: 17 additions & 1 deletion cpp/include/cuvs/core/interop.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,25 @@ inline bool is_dlpack_host_compatible(DLTensor tensor)
return detail::is_dlpack_host_compatible(tensor);
}

/**
* @brief Check if DLManagedTensor has a row-major (c-contiguous) layout
*
* @param tensor DLManagedTensor object to check
* @return bool
*/
inline bool is_c_contiguous(DLManagedTensor* tensor) { return detail::is_c_contiguous(tensor); }

/**
* @brief Check if DLManagedTensor has a col-major (f-contiguous) layout
*
* @param tensor DLManagedTensor object to check
* @return bool
*/
inline bool is_f_contiguous(DLManagedTensor* tensor) { return detail::is_f_contiguous(tensor); }

/**
* @brief Convert a DLManagedTensor to an mdspan
* NOTE: This function only supports compact row-major layouts.
* NOTE: This function only supports compact row-major and col-major layouts.
*
* @code {.cpp}
* #include <raft/core/device_mdspan.hpp>
Expand Down
73 changes: 60 additions & 13 deletions cpp/src/distance/pairwise_distance_c.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@

namespace {

template <typename T, typename DistT>
template <typename T, typename DistT, typename LayoutT = raft::row_major>
void _pairwise_distance(cuvsResources_t res,
DLManagedTensor* x_tensor,
DLManagedTensor* y_tensor,
Expand All @@ -39,8 +39,8 @@ void _pairwise_distance(cuvsResources_t res,
{
auto res_ptr = reinterpret_cast<raft::resources*>(res);

using mdspan_type = raft::device_matrix_view<T const, int64_t, raft::row_major>;
using distances_mdspan_type = raft::device_matrix_view<DistT, int64_t, raft::row_major>;
using mdspan_type = raft::device_matrix_view<T const, int64_t, LayoutT>;
using distances_mdspan_type = raft::device_matrix_view<DistT, int64_t, LayoutT>;

auto x_mds = cuvs::core::from_dlpack<mdspan_type>(x_tensor);
auto y_mds = cuvs::core::from_dlpack<mdspan_type>(y_tensor);
Expand Down Expand Up @@ -70,17 +70,64 @@ extern "C" cuvsError_t cuvsPairwiseDistance(cuvsResources_t res,
RAFT_FAIL("Inputs to cuvsPairwiseDistance must all have the same dtype");
}

if (x_dt.bits == 32) {
_pairwise_distance<float, float>(
res, x_tensor, y_tensor, distances_tensor, metric, metric_arg);
} else if (x_dt.bits == 16) {
_pairwise_distance<half, float>(
res, x_tensor, y_tensor, distances_tensor, metric, metric_arg);
} else if (x_dt.bits == 64) {
_pairwise_distance<double, double>(
res, x_tensor, y_tensor, distances_tensor, metric, metric_arg);
bool x_row_major;
if (cuvs::core::is_c_contiguous(x_tensor)) {
x_row_major = true;
} else if (cuvs::core::is_f_contiguous(x_tensor)) {
x_row_major = false;
} else {
RAFT_FAIL("Unsupported DLtensor dtype: %d and bits: %d", x_dt.code, x_dt.bits);
RAFT_FAIL("X input to cuvsPairwiseDistance must be contiguous (non-strided)");
}

bool y_row_major;
if (cuvs::core::is_c_contiguous(y_tensor)) {
y_row_major = true;
} else if (cuvs::core::is_f_contiguous(y_tensor)) {
y_row_major = false;
} else {
RAFT_FAIL("Y input to cuvsPairwiseDistance must be contiguous (non-strided)");
}

bool distances_row_major;
if (cuvs::core::is_c_contiguous(distances_tensor)) {
distances_row_major = true;
} else if (cuvs::core::is_f_contiguous(distances_tensor)) {
distances_row_major = false;
} else {
RAFT_FAIL("distances input to cuvsPairwiseDistance must be contiguous (non-strided)");
}

if ((x_row_major != y_row_major) || (x_row_major != distances_row_major)) {
RAFT_FAIL(
"Inputs to cuvsPairwiseDistance must all have the same layout (row-major or col-major)");
}

if (x_row_major) {
if (x_dt.bits == 32) {
_pairwise_distance<float, float>(
res, x_tensor, y_tensor, distances_tensor, metric, metric_arg);
} else if (x_dt.bits == 16) {
_pairwise_distance<half, float>(
res, x_tensor, y_tensor, distances_tensor, metric, metric_arg);
} else if (x_dt.bits == 64) {
_pairwise_distance<double, double>(
res, x_tensor, y_tensor, distances_tensor, metric, metric_arg);
} else {
RAFT_FAIL("Unsupported DLtensor dtype: %d and bits: %d", x_dt.code, x_dt.bits);
}
} else {
if (x_dt.bits == 32) {
_pairwise_distance<float, float, raft::col_major>(
res, x_tensor, y_tensor, distances_tensor, metric, metric_arg);
} else if (x_dt.bits == 16) {
_pairwise_distance<half, float, raft::col_major>(
res, x_tensor, y_tensor, distances_tensor, metric, metric_arg);
} else if (x_dt.bits == 64) {
_pairwise_distance<double, double, raft::col_major>(
res, x_tensor, y_tensor, distances_tensor, metric, metric_arg);
} else {
RAFT_FAIL("Unsupported DLtensor dtype: %d and bits: %d", x_dt.code, x_dt.bits);
}
}
});
}
28 changes: 21 additions & 7 deletions cpp/src/neighbors/brute_force_c.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,15 +33,15 @@

namespace {

template <typename T>
template <typename T, typename LayoutT = raft::row_major>
void* _build(cuvsResources_t res,
DLManagedTensor* dataset_tensor,
cuvsDistanceType metric,
T metric_arg)
{
auto res_ptr = reinterpret_cast<raft::resources*>(res);

using mdspan_type = raft::device_matrix_view<T const, int64_t, raft::row_major>;
using mdspan_type = raft::device_matrix_view<T const, int64_t, LayoutT>;
auto mds = cuvs::core::from_dlpack<mdspan_type>(dataset_tensor);

cuvs::neighbors::brute_force::index_params params;
Expand All @@ -53,7 +53,7 @@ void* _build(cuvsResources_t res,
return index_on_heap;
}

template <typename T>
template <typename T, typename QueriesLayoutT = raft::row_major>
void _search(cuvsResources_t res,
cuvsBruteForceIndex index,
DLManagedTensor* queries_tensor,
Expand All @@ -64,7 +64,7 @@ void _search(cuvsResources_t res,
auto res_ptr = reinterpret_cast<raft::resources*>(res);
auto index_ptr = reinterpret_cast<cuvs::neighbors::brute_force::index<T>*>(index.addr);

using queries_mdspan_type = raft::device_matrix_view<T const, int64_t, raft::row_major>;
using queries_mdspan_type = raft::device_matrix_view<T const, int64_t, QueriesLayoutT>;
using neighbors_mdspan_type = raft::device_matrix_view<int64_t, int64_t, raft::row_major>;
using distances_mdspan_type = raft::device_matrix_view<float, int64_t, raft::row_major>;
using prefilter_mds_type = raft::device_vector_view<const uint32_t, int64_t>;
Expand Down Expand Up @@ -150,8 +150,15 @@ extern "C" cuvsError_t cuvsBruteForceBuild(cuvsResources_t res,
auto dataset = dataset_tensor->dl_tensor;

if (dataset.dtype.code == kDLFloat && dataset.dtype.bits == 32) {
index->addr =
reinterpret_cast<uintptr_t>(_build<float>(res, dataset_tensor, metric, metric_arg));
if (cuvs::core::is_c_contiguous(dataset_tensor)) {
index->addr =
reinterpret_cast<uintptr_t>(_build<float>(res, dataset_tensor, metric, metric_arg));
} else if (cuvs::core::is_f_contiguous(dataset_tensor)) {
index->addr = reinterpret_cast<uintptr_t>(
_build<float, raft::col_major>(res, dataset_tensor, metric, metric_arg));
} else {
RAFT_FAIL("dataset input to cuvsBruteForceBuild must be contiguous (non-strided)");
}
index->dtype = dataset.dtype;
} else {
RAFT_FAIL("Unsupported dataset DLtensor dtype: %d and bits: %d",
Expand Down Expand Up @@ -189,7 +196,14 @@ extern "C" cuvsError_t cuvsBruteForceSearch(cuvsResources_t res,
RAFT_EXPECTS(queries.dtype.code == index.dtype.code, "type mismatch between index and queries");

if (queries.dtype.code == kDLFloat && queries.dtype.bits == 32) {
_search<float>(res, index, queries_tensor, neighbors_tensor, distances_tensor, prefilter);
if (cuvs::core::is_c_contiguous(queries_tensor)) {
_search<float>(res, index, queries_tensor, neighbors_tensor, distances_tensor, prefilter);
} else if (cuvs::core::is_f_contiguous(queries_tensor)) {
_search<float, raft::col_major>(
res, index, queries_tensor, neighbors_tensor, distances_tensor, prefilter);
} else {
RAFT_FAIL("queries input to cuvsBruteForceSearch must be contiguous (non-strided)");
}
} else {
RAFT_FAIL("Unsupported queries DLtensor dtype: %d and bits: %d",
queries.dtype.code,
Expand Down
6 changes: 3 additions & 3 deletions cpp/src/neighbors/detail/dynamic_batching.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -238,8 +238,8 @@ enum struct slot_state : int32_t {
struct batch_token {
uint64_t value = 0;

constexpr inline batch_token() {}
explicit constexpr inline batch_token(uint32_t buffer_id) { id() = buffer_id; }
constexpr inline batch_token() = default;
RAFT_INLINE_FUNCTION explicit batch_token(uint32_t buffer_id) { id() = buffer_id; }

/**
* Sequential id of the batch in the array of batches.
Expand Down Expand Up @@ -492,7 +492,7 @@ struct batch_queue_t {
* NB: "round" is the number of times the queue counters went over the whole ring buffer.
* It's used to avoid the ABA problem for atomic token updates.
*/
static constexpr inline auto make_empty_token(seq_order_id seq_id) noexcept -> batch_token
static inline auto make_empty_token(seq_order_id seq_id) noexcept -> batch_token
{
// Modify the seq_id to identify that the token slot is empty
auto empty_round = static_cast<uint32_t>(slot_state::kEmptyPast) * kSize;
Expand Down
2 changes: 1 addition & 1 deletion cpp/test/neighbors/ann_ivf_pq.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -879,7 +879,7 @@ inline auto enum_variety_ip() -> test_cases_t
// InnerProduct score is signed,
// thus we're forced to used signed 8-bit representation,
// thus we have one bit less precision
y.min_recall = y.min_recall.value() * 0.90;
y.min_recall = y.min_recall.value() * 0.88;
} else {
// In other cases it seems to perform a little bit better, still worse than L2
y.min_recall = y.min_recall.value() * 0.94;
Expand Down
2 changes: 1 addition & 1 deletion docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,7 @@ def setup(app):
linkcode_resolve = make_linkcode_resolve(
"cuvs",
"https://github.com/rapidsai/cuvs/"
"blob/{revision}/python/cuvs/cuvs/"
"blob/{revision}/python/cuvs/"
"{package}/{path}#L{lineno}",
)

Expand Down
Loading

0 comments on commit 79e14f7

Please sign in to comment.