Skip to content

Commit

Permalink
documentation, formatting
Browse files Browse the repository at this point in the history
  • Loading branch information
ajit283 committed Jan 8, 2025
1 parent d7725db commit 9a64945
Show file tree
Hide file tree
Showing 4 changed files with 26 additions and 25 deletions.
2 changes: 1 addition & 1 deletion python/cuvs/cuvs/neighbors/cagra/cagra.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@ from libcpp cimport bool

from cuvs.common.c_api cimport cuvsError_t, cuvsResources_t
from cuvs.common.cydlpack cimport DLDataType, DLManagedTensor
from cuvs.neighbors.filters.filters cimport cuvsFilter
from cuvs.distance_type cimport cuvsDistanceType
from cuvs.neighbors.filters.filters cimport cuvsFilter


cdef extern from "cuvs/neighbors/cagra.h" nogil:
Expand Down
4 changes: 3 additions & 1 deletion python/cuvs/cuvs/neighbors/cagra/cagra.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@ from libc.stdint cimport (
)

from cuvs.common.exceptions import check_cuvs

from cuvs.neighbors.filters import no_filter


Expand Down Expand Up @@ -506,6 +505,9 @@ def search(SearchParams search_params,
distances : Optional CUDA array interface compliant matrix shape
(n_queries, k) If supplied, the distances to the
neighbors will be written here in-place. (default None)
filter: Optional cuvs.neighbors.cuvsFilter can be used to filter
neighbors based on a given bitset.
(default None)
{resources_docstring}
Examples
Expand Down
22 changes: 10 additions & 12 deletions python/cuvs/cuvs/neighbors/filters/filters.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ from libc.stdint cimport uintptr_t
from cuvs.common cimport cydlpack
from cuvs.neighbors.common import _check_input_array

from .filters cimport BITMAP, NO_FILTER, cuvsFilter
from .filters cimport BITMAP, BITSET, NO_FILTER, cuvsFilter

from pylibraft.common.cai_wrapper import wrap_array

Expand Down Expand Up @@ -102,21 +102,19 @@ def from_bitset(bitset):
Parameters
----------
bitmap : numpy.ndarray
bitset : numpy.ndarray
An array with type of `uint32` where each bit in the array corresponds
to if a sample and query pair is greenlit (not filtered) or filtered.
The array is row-major, meaning the bits are ordered by rows first.
Each bit in a `uint32` element represents a different sample-query
pair.
to if a sample is greenlit (not filtered) or filtered.
Each bit in a `uint32` element represents a different sample of the dataset.
- Bit value of 1: The sample-query pair is greenlit (allowed).
- Bit value of 0: The sample-query pair is filtered.
- Bit value of 1: The sample is greenlit (allowed).
- Bit value of 0: The sample pair is filtered.
Returns
-------
filter : cuvs.neighbors.filters.Prefilter
An instance of `Prefilter` that can be used to filter neighbors
based on the given bitmap.
based on the given bitset.
{resources_docstring}
Examples
Expand All @@ -129,9 +127,9 @@ def from_bitset(bitset):
>>> n_samples = 50000
>>> n_queries = 1000
>>>
>>> n_bitmap = np.ceil(n_samples * n_queries / 32).astype(int)
>>> bitmap = cp.random.randint(1, 100, size=(n_bitmap,), dtype=cp.uint32)
>>> prefilter = filters.from_bitmap(bitmap)
>>> n_bitset = np.ceil(n_samples / 32).astype(int)
>>> bitset = cp.random.randint(1, 100, size=(n_bitset,), dtype=cp.uint32)
>>> prefilter = filters.from_bitset(bitset)
"""
bitset_cai = wrap_array(bitset)
_check_input_array(bitset_cai, [np.dtype('uint32')])
Expand Down
23 changes: 12 additions & 11 deletions python/cuvs/cuvs/test/test_cagra.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@
import numpy as np
import pytest
from pylibraft.common import device_ndarray
from scipy.spatial.distance import cdist
from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import normalize
from scipy.spatial.distance import cdist

from cuvs.neighbors import cagra, filters
from cuvs.test.ann_utils import calc_recall, generate_data
Expand Down Expand Up @@ -169,11 +169,9 @@ def test_cagra_dataset_dtype_host_device(
},
],
)


def create_sparse_bitset(n_size, sparsity):
bits_per_uint32 = 32
num_bits = n_size
num_bits = n_size
num_uint32s = (num_bits + bits_per_uint32 - 1) // bits_per_uint32
num_ones = int(num_bits * sparsity)

Expand Down Expand Up @@ -257,14 +255,16 @@ def test_filtered_cagra(
# Get actual results
actual_indices = out_idx_device.copy_to_host()
actual_distances = out_dist_device.copy_to_host()

filtered_idx_map = np.cumsum(~bool_filter) - 1 # -1 because cumsum starts at 1

filtered_idx_map = (
np.cumsum(~bool_filter) - 1
) # -1 because cumsum starts at 1

# Map CAGRA indices to filtered space
mapped_actual_indices = np.take(filtered_idx_map,
actual_indices,
mode='clip')
mapped_actual_indices = np.take(
filtered_idx_map, actual_indices, mode="clip"
)

# Verify filtering - no filtered indices should be in results
filtered_indices = np.where(bool_filter)[0]
for i in range(n_queries):
Expand All @@ -274,7 +274,8 @@ def test_filtered_cagra(
recall = calc_recall(mapped_actual_indices, skl_idx)

assert recall > 0.7



def test_cagra_index_params(params):
# Note that inner_product tests use normalized input which we cannot
# represent in int8, therefore we test only sqeuclidean metric here.
Expand Down

0 comments on commit 9a64945

Please sign in to comment.