From 6f2db2069dc9be70e3b393b07babc0134b43d71d Mon Sep 17 00:00:00 2001 From: Ray Douglass Date: Fri, 15 Nov 2024 09:31:54 -0500 Subject: [PATCH 01/39] DOC v25.02 Updates [skip ci] --- .../cuda11.8-conda/devcontainer.json | 6 ++--- .devcontainer/cuda11.8-pip/devcontainer.json | 8 +++--- .../cuda12.5-conda/devcontainer.json | 6 ++--- .devcontainer/cuda12.5-pip/devcontainer.json | 8 +++--- .github/workflows/build.yaml | 14 +++++----- .github/workflows/pr.yaml | 26 +++++++++---------- .github/workflows/test.yaml | 8 +++--- README.md | 2 +- VERSION | 2 +- .../all_cuda-118_arch-aarch64.yaml | 4 +-- .../all_cuda-118_arch-x86_64.yaml | 4 +-- .../all_cuda-125_arch-aarch64.yaml | 4 +-- .../all_cuda-125_arch-x86_64.yaml | 4 +-- .../bench_ann_cuda-118_arch-aarch64.yaml | 4 +-- .../bench_ann_cuda-118_arch-x86_64.yaml | 4 +-- .../bench_ann_cuda-125_arch-aarch64.yaml | 4 +-- .../bench_ann_cuda-125_arch-x86_64.yaml | 4 +-- dependencies.yaml | 12 ++++----- docs/source/developer_guide.md | 6 ++--- examples/cmake/thirdparty/fetch_rapids.cmake | 2 +- python/cuvs/pyproject.toml | 2 +- rust/Cargo.toml | 2 +- rust/cuvs/Cargo.toml | 2 +- 23 files changed, 69 insertions(+), 69 deletions(-) diff --git a/.devcontainer/cuda11.8-conda/devcontainer.json b/.devcontainer/cuda11.8-conda/devcontainer.json index 05f11c005..f03ec7b19 100644 --- a/.devcontainer/cuda11.8-conda/devcontainer.json +++ b/.devcontainer/cuda11.8-conda/devcontainer.json @@ -5,17 +5,17 @@ "args": { "CUDA": "11.8", "PYTHON_PACKAGE_MANAGER": "conda", - "BASE": "rapidsai/devcontainers:24.12-cpp-cuda11.8-mambaforge-ubuntu22.04" + "BASE": "rapidsai/devcontainers:25.02-cpp-cuda11.8-mambaforge-ubuntu22.04" } }, "runArgs": [ "--rm", "--name", - "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.12-cuda11.8-conda" + "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-25.02-cuda11.8-conda" ], "hostRequirements": {"gpu": "optional"}, "features": { - "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.12": {} + "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:25.2": {} }, "overrideFeatureInstallOrder": [ "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils" diff --git a/.devcontainer/cuda11.8-pip/devcontainer.json b/.devcontainer/cuda11.8-pip/devcontainer.json index b4c507f86..a59c499d3 100644 --- a/.devcontainer/cuda11.8-pip/devcontainer.json +++ b/.devcontainer/cuda11.8-pip/devcontainer.json @@ -5,24 +5,24 @@ "args": { "CUDA": "11.8", "PYTHON_PACKAGE_MANAGER": "pip", - "BASE": "rapidsai/devcontainers:24.12-cpp-cuda11.8-ucx1.17.0-openmpi-ubuntu22.04" + "BASE": "rapidsai/devcontainers:25.02-cpp-cuda11.8-ucx1.17.0-openmpi-ubuntu22.04" } }, "runArgs": [ "--rm", "--name", - "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.12-cuda11.8-pip" + "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-25.02-cuda11.8-pip" ], "hostRequirements": {"gpu": "optional"}, "features": { - "ghcr.io/rapidsai/devcontainers/features/cuda:24.12": { + "ghcr.io/rapidsai/devcontainers/features/cuda:25.2": { "version": "11.8", "installcuBLAS": true, "installcuSOLVER": true, "installcuRAND": true, "installcuSPARSE": true }, - "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.12": {} + "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:25.2": {} }, "overrideFeatureInstallOrder": [ "ghcr.io/rapidsai/devcontainers/features/ucx", diff --git a/.devcontainer/cuda12.5-conda/devcontainer.json b/.devcontainer/cuda12.5-conda/devcontainer.json index 4f8d628c2..39852cec1 100644 --- a/.devcontainer/cuda12.5-conda/devcontainer.json +++ b/.devcontainer/cuda12.5-conda/devcontainer.json @@ -5,17 +5,17 @@ "args": { "CUDA": "12.5", "PYTHON_PACKAGE_MANAGER": "conda", - "BASE": "rapidsai/devcontainers:24.12-cpp-mambaforge-ubuntu22.04" + "BASE": "rapidsai/devcontainers:25.02-cpp-mambaforge-ubuntu22.04" } }, "runArgs": [ "--rm", "--name", - "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.12-cuda12.5-conda" + "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-25.02-cuda12.5-conda" ], "hostRequirements": {"gpu": "optional"}, "features": { - "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.12": {} + "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:25.2": {} }, "overrideFeatureInstallOrder": [ "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils" diff --git a/.devcontainer/cuda12.5-pip/devcontainer.json b/.devcontainer/cuda12.5-pip/devcontainer.json index 8e6ba4de8..d84966656 100644 --- a/.devcontainer/cuda12.5-pip/devcontainer.json +++ b/.devcontainer/cuda12.5-pip/devcontainer.json @@ -5,24 +5,24 @@ "args": { "CUDA": "12.5", "PYTHON_PACKAGE_MANAGER": "pip", - "BASE": "rapidsai/devcontainers:24.12-cpp-cuda12.5-ucx1.17.0-openmpi-ubuntu22.04" + "BASE": "rapidsai/devcontainers:25.02-cpp-cuda12.5-ucx1.17.0-openmpi-ubuntu22.04" } }, "runArgs": [ "--rm", "--name", - "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.12-cuda12.5-pip" + "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-25.02-cuda12.5-pip" ], "hostRequirements": {"gpu": "optional"}, "features": { - "ghcr.io/rapidsai/devcontainers/features/cuda:24.12": { + "ghcr.io/rapidsai/devcontainers/features/cuda:25.2": { "version": "12.5", "installcuBLAS": true, "installcuSOLVER": true, "installcuRAND": true, "installcuSPARSE": true }, - "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.12": {} + "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:25.2": {} }, "overrideFeatureInstallOrder": [ "ghcr.io/rapidsai/devcontainers/features/ucx", diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 7ac02e365..e93b7a694 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -28,7 +28,7 @@ concurrency: jobs: cpp-build: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-25.02 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -37,7 +37,7 @@ jobs: rust-build: needs: cpp-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.02 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -50,7 +50,7 @@ jobs: python-build: needs: [cpp-build] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-25.02 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -59,7 +59,7 @@ jobs: upload-conda: needs: [cpp-build, python-build] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@branch-25.02 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -70,7 +70,7 @@ jobs: if: github.ref_type == 'branch' needs: python-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.02 with: arch: "amd64" branch: ${{ inputs.branch }} @@ -82,7 +82,7 @@ jobs: sha: ${{ inputs.sha }} wheel-build-cuvs: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.02 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -92,7 +92,7 @@ jobs: wheel-publish-cuvs: needs: wheel-build-cuvs secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-25.02 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index e18e82df0..a62b4e00a 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -25,13 +25,13 @@ jobs: - wheel-tests-cuvs - devcontainer secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-25.02 if: always() with: needs: ${{ toJSON(needs) }} changed-files: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/changed-files.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/changed-files.yaml@branch-25.02 with: files_yaml: | test_cpp: @@ -64,27 +64,27 @@ jobs: - '!thirdparty/LICENSES/**' checks: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-25.02 with: enable_check_generated_files: false conda-cpp-build: needs: checks secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-25.02 with: build_type: pull-request node_type: cpu16 conda-cpp-tests: needs: [conda-cpp-build, changed-files] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-25.02 if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_cpp with: build_type: pull-request conda-cpp-checks: needs: conda-cpp-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-25.02 with: build_type: pull-request enable_check_symbols: true @@ -92,20 +92,20 @@ jobs: conda-python-build: needs: conda-cpp-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-25.02 with: build_type: pull-request conda-python-tests: needs: [conda-python-build, changed-files] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-25.02 if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python with: build_type: pull-request docs-build: needs: conda-python-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.02 with: build_type: pull-request node_type: "gpu-v100-latest-1" @@ -115,7 +115,7 @@ jobs: rust-build: needs: conda-cpp-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.02 with: build_type: pull-request node_type: "gpu-v100-latest-1" @@ -125,21 +125,21 @@ jobs: wheel-build-cuvs: needs: checks secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.02 with: build_type: pull-request script: ci/build_wheel_cuvs.sh wheel-tests-cuvs: needs: [wheel-build-cuvs, changed-files] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.02 if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python with: build_type: pull-request script: ci/test_wheel_cuvs.sh devcontainer: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@branch-25.02 with: arch: '["amd64"]' cuda: '["12.5"]' diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 5f60c0a34..2645e5d5d 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -16,7 +16,7 @@ on: jobs: conda-cpp-checks: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-25.02 with: build_type: nightly branch: ${{ inputs.branch }} @@ -26,7 +26,7 @@ jobs: symbol_exclusions: (void (thrust::|cub::)|raft_cutlass) conda-cpp-tests: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-25.02 with: build_type: nightly branch: ${{ inputs.branch }} @@ -34,7 +34,7 @@ jobs: sha: ${{ inputs.sha }} conda-python-tests: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-25.02 with: build_type: nightly branch: ${{ inputs.branch }} @@ -42,7 +42,7 @@ jobs: sha: ${{ inputs.sha }} wheel-tests-cuvs: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.02 with: build_type: nightly branch: ${{ inputs.branch }} diff --git a/README.md b/README.md index 572e8d098..8b7e529af 100755 --- a/README.md +++ b/README.md @@ -109,7 +109,7 @@ pip install cuvs-cu12 --extra-index-url=https://pypi.nvidia.com If installing a version that has not yet been released, the `rapidsai` channel can be replaced with `rapidsai-nightly`: ```bash -conda install -c conda-forge -c nvidia -c rapidsai-nightly cuvs=24.12 +conda install -c conda-forge -c nvidia -c rapidsai-nightly cuvs=25.02 ``` cuVS also has `pip` wheel packages that can be installed. Please see the [Build and Install Guide](https://docs.rapids.ai/api/cuvs/nightly/build/) for more information on installing the available cuVS packages and building from source. diff --git a/VERSION b/VERSION index af28c42b5..72eefaf7c 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -24.12.00 +25.02.00 diff --git a/conda/environments/all_cuda-118_arch-aarch64.yaml b/conda/environments/all_cuda-118_arch-aarch64.yaml index 80bfb0c24..1daf668b1 100644 --- a/conda/environments/all_cuda-118_arch-aarch64.yaml +++ b/conda/environments/all_cuda-118_arch-aarch64.yaml @@ -35,7 +35,7 @@ dependencies: - libcusolver=11.4.1.48 - libcusparse-dev=11.7.5.86 - libcusparse=11.7.5.86 -- librmm==24.12.*,>=0.0.0a0 +- librmm==25.2.*,>=0.0.0a0 - make - nccl>=2.19 - ninja @@ -45,7 +45,7 @@ dependencies: - openblas - pre-commit - pydata-sphinx-theme -- pylibraft==24.12.*,>=0.0.0a0 +- pylibraft==25.2.*,>=0.0.0a0 - pytest-cov - pytest==7.* - rapids-build-backend>=0.3.0,<0.4.0.dev0 diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index 07937726c..098156397 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -35,7 +35,7 @@ dependencies: - libcusolver=11.4.1.48 - libcusparse-dev=11.7.5.86 - libcusparse=11.7.5.86 -- librmm==24.12.*,>=0.0.0a0 +- librmm==25.2.*,>=0.0.0a0 - make - nccl>=2.19 - ninja @@ -45,7 +45,7 @@ dependencies: - openblas - pre-commit - pydata-sphinx-theme -- pylibraft==24.12.*,>=0.0.0a0 +- pylibraft==25.2.*,>=0.0.0a0 - pytest-cov - pytest==7.* - rapids-build-backend>=0.3.0,<0.4.0.dev0 diff --git a/conda/environments/all_cuda-125_arch-aarch64.yaml b/conda/environments/all_cuda-125_arch-aarch64.yaml index b7fd6fcfa..b94b44749 100644 --- a/conda/environments/all_cuda-125_arch-aarch64.yaml +++ b/conda/environments/all_cuda-125_arch-aarch64.yaml @@ -32,7 +32,7 @@ dependencies: - libcurand-dev - libcusolver-dev - libcusparse-dev -- librmm==24.12.*,>=0.0.0a0 +- librmm==25.2.*,>=0.0.0a0 - make - nccl>=2.19 - ninja @@ -41,7 +41,7 @@ dependencies: - openblas - pre-commit - pydata-sphinx-theme -- pylibraft==24.12.*,>=0.0.0a0 +- pylibraft==25.2.*,>=0.0.0a0 - pytest-cov - pytest==7.* - rapids-build-backend>=0.3.0,<0.4.0.dev0 diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml index 83a457465..10e30a8c2 100644 --- a/conda/environments/all_cuda-125_arch-x86_64.yaml +++ b/conda/environments/all_cuda-125_arch-x86_64.yaml @@ -32,7 +32,7 @@ dependencies: - libcurand-dev - libcusolver-dev - libcusparse-dev -- librmm==24.12.*,>=0.0.0a0 +- librmm==25.2.*,>=0.0.0a0 - make - nccl>=2.19 - ninja @@ -41,7 +41,7 @@ dependencies: - openblas - pre-commit - pydata-sphinx-theme -- pylibraft==24.12.*,>=0.0.0a0 +- pylibraft==25.2.*,>=0.0.0a0 - pytest-cov - pytest==7.* - rapids-build-backend>=0.3.0,<0.4.0.dev0 diff --git a/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml b/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml index 21cb98180..2a1d80aaa 100644 --- a/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml +++ b/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml @@ -33,7 +33,7 @@ dependencies: - libcusolver=11.4.1.48 - libcusparse-dev=11.7.5.86 - libcusparse=11.7.5.86 -- librmm==24.12.*,>=0.0.0a0 +- librmm==25.2.*,>=0.0.0a0 - matplotlib - nccl>=2.19 - ninja @@ -41,7 +41,7 @@ dependencies: - nvcc_linux-aarch64=11.8 - openblas - pandas -- pylibraft==24.12.*,>=0.0.0a0 +- pylibraft==25.2.*,>=0.0.0a0 - pyyaml - rapids-build-backend>=0.3.0,<0.4.0.dev0 - setuptools diff --git a/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml b/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml index 432509bcb..6507f55cc 100644 --- a/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml +++ b/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml @@ -33,7 +33,7 @@ dependencies: - libcusolver=11.4.1.48 - libcusparse-dev=11.7.5.86 - libcusparse=11.7.5.86 -- librmm==24.12.*,>=0.0.0a0 +- librmm==25.2.*,>=0.0.0a0 - matplotlib - nccl>=2.19 - ninja @@ -41,7 +41,7 @@ dependencies: - nvcc_linux-64=11.8 - openblas - pandas -- pylibraft==24.12.*,>=0.0.0a0 +- pylibraft==25.2.*,>=0.0.0a0 - pyyaml - rapids-build-backend>=0.3.0,<0.4.0.dev0 - setuptools diff --git a/conda/environments/bench_ann_cuda-125_arch-aarch64.yaml b/conda/environments/bench_ann_cuda-125_arch-aarch64.yaml index 0c5043ac2..e53606a06 100644 --- a/conda/environments/bench_ann_cuda-125_arch-aarch64.yaml +++ b/conda/environments/bench_ann_cuda-125_arch-aarch64.yaml @@ -30,14 +30,14 @@ dependencies: - libcurand-dev - libcusolver-dev - libcusparse-dev -- librmm==24.12.*,>=0.0.0a0 +- librmm==25.2.*,>=0.0.0a0 - matplotlib - nccl>=2.19 - ninja - nlohmann_json>=3.11.2 - openblas - pandas -- pylibraft==24.12.*,>=0.0.0a0 +- pylibraft==25.2.*,>=0.0.0a0 - pyyaml - rapids-build-backend>=0.3.0,<0.4.0.dev0 - setuptools diff --git a/conda/environments/bench_ann_cuda-125_arch-x86_64.yaml b/conda/environments/bench_ann_cuda-125_arch-x86_64.yaml index cbb22333c..e37c507c7 100644 --- a/conda/environments/bench_ann_cuda-125_arch-x86_64.yaml +++ b/conda/environments/bench_ann_cuda-125_arch-x86_64.yaml @@ -30,14 +30,14 @@ dependencies: - libcurand-dev - libcusolver-dev - libcusparse-dev -- librmm==24.12.*,>=0.0.0a0 +- librmm==25.2.*,>=0.0.0a0 - matplotlib - nccl>=2.19 - ninja - nlohmann_json>=3.11.2 - openblas - pandas -- pylibraft==24.12.*,>=0.0.0a0 +- pylibraft==25.2.*,>=0.0.0a0 - pyyaml - rapids-build-backend>=0.3.0,<0.4.0.dev0 - setuptools diff --git a/dependencies.yaml b/dependencies.yaml index e909ad0dc..a7be191d6 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -488,7 +488,7 @@ dependencies: common: - output_types: conda packages: - - &librmm_unsuffixed librmm==24.12.*,>=0.0.0a0 + - &librmm_unsuffixed librmm==25.2.*,>=0.0.0a0 - output_types: requirements packages: # pip recognizes the index as a global option for the requirements.txt file @@ -501,18 +501,18 @@ dependencies: cuda: "12.*" cuda_suffixed: "true" packages: - - librmm-cu12==24.12.*,>=0.0.0a0 + - librmm-cu12==25.2.*,>=0.0.0a0 - matrix: cuda: "11.*" cuda_suffixed: "true" packages: - - librmm-cu11==24.12.*,>=0.0.0a0 + - librmm-cu11==25.2.*,>=0.0.0a0 - {matrix: null, packages: [*librmm_unsuffixed]} depends_on_pylibraft: common: - output_types: conda packages: - - &pylibraft_unsuffixed pylibraft==24.12.*,>=0.0.0a0 + - &pylibraft_unsuffixed pylibraft==25.2.*,>=0.0.0a0 - output_types: requirements packages: # pip recognizes the index as a global option for the requirements.txt file @@ -525,10 +525,10 @@ dependencies: cuda: "12.*" cuda_suffixed: "true" packages: - - pylibraft-cu12==24.12.*,>=0.0.0a0 + - pylibraft-cu12==25.2.*,>=0.0.0a0 - matrix: cuda: "11.*" cuda_suffixed: "true" packages: - - pylibraft-cu11==24.12.*,>=0.0.0a0 + - pylibraft-cu11==25.2.*,>=0.0.0a0 - {matrix: null, packages: [*pylibraft_unsuffixed]} diff --git a/docs/source/developer_guide.md b/docs/source/developer_guide.md index 7702f80b3..4fdd6405e 100644 --- a/docs/source/developer_guide.md +++ b/docs/source/developer_guide.md @@ -187,7 +187,7 @@ RAFT relies on `clang-format` to enforce code style across all C++ and CUDA sour 1. Do not split empty functions/records/namespaces. 2. Two-space indentation everywhere, including the line continuations. 3. Disable reflowing of comments. - The reasons behind these deviations from the Google style guide are given in comments [here](https://github.com/rapidsai/raft/blob/branch-24.12/cpp/.clang-format). + The reasons behind these deviations from the Google style guide are given in comments [here](https://github.com/rapidsai/raft/blob/branch-25.02/cpp/.clang-format). [`doxygen`](https://doxygen.nl/) is used as documentation generator and also as a documentation linter. In order to run doxygen as a linter on C++/CUDA code, run @@ -205,7 +205,7 @@ you can run `codespell -i 3 -w .` from the repository root directory. This will bring up an interactive prompt to select which spelling fixes to apply. ### #include style -[include_checker.py](https://github.com/rapidsai/raft/blob/branch-24.12/cpp/scripts/include_checker.py) is used to enforce the include style as follows: +[include_checker.py](https://github.com/rapidsai/raft/blob/branch-25.02/cpp/scripts/include_checker.py) is used to enforce the include style as follows: 1. `#include "..."` should be used for referencing local files only. It is acceptable to be used for referencing files in a sub-folder/parent-folder of the same algorithm, but should never be used to include files in other algorithms or between algorithms and the primitives or other dependencies. 2. `#include <...>` should be used for referencing everything else @@ -230,7 +230,7 @@ Call CUDA APIs via the provided helper macros `RAFT_CUDA_TRY`, `RAFT_CUBLAS_TRY` ## Logging ### Introduction -Anything and everything about logging is defined inside [logger.hpp](https://github.com/rapidsai/raft/blob/branch-24.12/cpp/include/raft/core/logger.hpp). It uses [spdlog](https://github.com/gabime/spdlog) underneath, but this information is transparent to all. +Anything and everything about logging is defined inside [logger.hpp](https://github.com/rapidsai/raft/blob/branch-25.02/cpp/include/raft/core/logger.hpp). It uses [spdlog](https://github.com/gabime/spdlog) underneath, but this information is transparent to all. ### Usage ```cpp diff --git a/examples/cmake/thirdparty/fetch_rapids.cmake b/examples/cmake/thirdparty/fetch_rapids.cmake index 6f4c627ed..23c8490b4 100644 --- a/examples/cmake/thirdparty/fetch_rapids.cmake +++ b/examples/cmake/thirdparty/fetch_rapids.cmake @@ -12,7 +12,7 @@ # the License. # Use this variable to update RAPIDS and RAFT versions -set(RAPIDS_VERSION "24.12") +set(RAPIDS_VERSION "25.02") if(NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/RAFT_RAPIDS.cmake) file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-${RAPIDS_VERSION}/RAPIDS.cmake diff --git a/python/cuvs/pyproject.toml b/python/cuvs/pyproject.toml index d40026776..894b8820f 100644 --- a/python/cuvs/pyproject.toml +++ b/python/cuvs/pyproject.toml @@ -37,7 +37,7 @@ dependencies = [ "nvidia-curand", "nvidia-cusolver", "nvidia-cusparse", - "pylibraft==24.12.*,>=0.0.0a0", + "pylibraft==25.2.*,>=0.0.0a0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. classifiers = [ "Intended Audience :: Developers", diff --git a/rust/Cargo.toml b/rust/Cargo.toml index 79aa5756a..ddb8b32cd 100644 --- a/rust/Cargo.toml +++ b/rust/Cargo.toml @@ -6,7 +6,7 @@ members = [ resolver = "2" [workspace.package] -version = "24.12.0" +version = "25.2.0" edition = "2021" repository = "https://github.com/rapidsai/cuvs" homepage = "https://github.com/rapidsai/cuvs" diff --git a/rust/cuvs/Cargo.toml b/rust/cuvs/Cargo.toml index 13cc658e3..1095b1fea 100644 --- a/rust/cuvs/Cargo.toml +++ b/rust/cuvs/Cargo.toml @@ -9,7 +9,7 @@ authors.workspace = true license.workspace = true [dependencies] -ffi = { package = "cuvs-sys", path = "../cuvs-sys", version = "24.12.0" } +ffi = { package = "cuvs-sys", path = "../cuvs-sys", version = "25.2.0" } ndarray = "0.15" [dev-dependencies] From aba3fa7bb38690d394082aff65531c5858836450 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Tue, 26 Nov 2024 08:55:39 -0600 Subject: [PATCH 02/39] Update example code fetching rapids-cmake to use CUVS instead of RAFT (#493) Small update to CMake example code to use cuVS instead of RAFT. Authors: - Bradley Dice (https://github.com/bdice) Approvers: - Micka (https://github.com/lowener) URL: https://github.com/rapidsai/cuvs/pull/493 --- examples/cmake/thirdparty/fetch_rapids.cmake | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/examples/cmake/thirdparty/fetch_rapids.cmake b/examples/cmake/thirdparty/fetch_rapids.cmake index 23c8490b4..3c5510b8b 100644 --- a/examples/cmake/thirdparty/fetch_rapids.cmake +++ b/examples/cmake/thirdparty/fetch_rapids.cmake @@ -11,11 +11,11 @@ # or implied. See the License for the specific language governing permissions and limitations under # the License. -# Use this variable to update RAPIDS and RAFT versions +# Use this variable to update RAPIDS and cuVS versions set(RAPIDS_VERSION "25.02") -if(NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/RAFT_RAPIDS.cmake) +if(NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/CUVS_RAPIDS.cmake) file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-${RAPIDS_VERSION}/RAPIDS.cmake - ${CMAKE_CURRENT_BINARY_DIR}/RAFT_RAPIDS.cmake) + ${CMAKE_CURRENT_BINARY_DIR}/CUVS_RAPIDS.cmake) endif() -include(${CMAKE_CURRENT_BINARY_DIR}/RAFT_RAPIDS.cmake) +include(${CMAKE_CURRENT_BINARY_DIR}/CUVS_RAPIDS.cmake) From 6e5c0c8b0ce57b4b2069cbe5255619f210420792 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Tue, 26 Nov 2024 11:24:01 -0600 Subject: [PATCH 03/39] Remove RAFT BUILD_ANN_BENCH option (#497) This cleans up a reference to RAFT's `BUILD_ANN_BENCH` CMake option which no longer exists. Authors: - Bradley Dice (https://github.com/bdice) Approvers: - Ben Frederickson (https://github.com/benfred) URL: https://github.com/rapidsai/cuvs/pull/497 --- cpp/cmake/thirdparty/get_raft.cmake | 1 - 1 file changed, 1 deletion(-) diff --git a/cpp/cmake/thirdparty/get_raft.cmake b/cpp/cmake/thirdparty/get_raft.cmake index 7640fbfa6..5def74f4b 100644 --- a/cpp/cmake/thirdparty/get_raft.cmake +++ b/cpp/cmake/thirdparty/get_raft.cmake @@ -50,7 +50,6 @@ function(find_and_configure_raft) OPTIONS "BUILD_TESTS OFF" "BUILD_PRIMS_BENCH OFF" - "BUILD_ANN_BENCH OFF" "RAFT_NVTX ${PKG_ENABLE_NVTX}" "RAFT_COMPILE_LIBRARY OFF" ) From bd0620df0c143711352c9f5e312268aaa1801cbd Mon Sep 17 00:00:00 2001 From: Jake Awe <50372925+AyodeAwe@users.noreply.github.com> Date: Tue, 26 Nov 2024 15:28:58 -0600 Subject: [PATCH 04/39] Add breaking change workflow trigger (#442) Adds a workflow that triggers a second workflow which sends a notification to a designated Slack channel on every PR labelled with breaking, whenever any of the following events are triggered on the PR: - closed - reopened - labeled - unlabeled Depends on https://github.com/rapidsai/shared-workflows/pull/257 --- .../trigger-breaking-change-alert.yaml | 26 +++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 .github/workflows/trigger-breaking-change-alert.yaml diff --git a/.github/workflows/trigger-breaking-change-alert.yaml b/.github/workflows/trigger-breaking-change-alert.yaml new file mode 100644 index 000000000..3b972f31c --- /dev/null +++ b/.github/workflows/trigger-breaking-change-alert.yaml @@ -0,0 +1,26 @@ +name: Trigger Breaking Change Notifications + +on: + pull_request_target: + types: + - closed + - reopened + - labeled + - unlabeled + +jobs: + trigger-notifier: + if: contains(github.event.pull_request.labels.*.name, 'breaking') + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/breaking-change-alert.yaml@branch-24.12 + with: + sender_login: ${{ github.event.sender.login }} + sender_avatar: ${{ github.event.sender.avatar_url }} + repo: ${{ github.repository }} + pr_number: ${{ github.event.pull_request.number }} + pr_title: "${{ github.event.pull_request.title }}" + pr_body: "${{ github.event.pull_request.body || '_Empty PR description_' }}" + pr_base_ref: ${{ github.event.pull_request.base.ref }} + pr_author: ${{ github.event.pull_request.user.login }} + event_action: ${{ github.event.action }} + pr_merged: ${{ github.event.pull_request.merged }} From ae6816c9be622fe18f1d80f3b32b70ac9f5566fe Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Tue, 26 Nov 2024 16:32:11 -0600 Subject: [PATCH 05/39] Require approval to run CI on draft PRs (#498) By default, CI runs on draft PRs. This leads to many CI runs that may be unnecessary. With this PR's change to `.github/copy-pr-bot.yaml`, an `/ok to test` comment from a trusted user is required to trigger CI on draft PRs. Non-draft PRs will run CI by default, assuming that all commits are signed by trusted users. Otherwise an `/ok to test` is required (as before) -- see the `copy-pr-bot` docs at https://docs.gha-runners.nvidia.com/apps/copy-pr-bot/ for more information. Part of https://github.com/rapidsai/build-planning/issues/123. Authors: - Bradley Dice (https://github.com/bdice) Approvers: - James Lamb (https://github.com/jameslamb) URL: https://github.com/rapidsai/cuvs/pull/498 --- .github/copy-pr-bot.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/copy-pr-bot.yaml b/.github/copy-pr-bot.yaml index 895ba83ee..e0ea775aa 100644 --- a/.github/copy-pr-bot.yaml +++ b/.github/copy-pr-bot.yaml @@ -2,3 +2,4 @@ # https://docs.gha-runners.nvidia.com/apps/copy-pr-bot/ enabled: true +auto_sync_draft: false From 31c59ce0cae2505c89e8e4cdd8d77fd29256df4a Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Fri, 29 Nov 2024 16:42:44 -0800 Subject: [PATCH 06/39] Adapt to rmm logger changes (#499) This PR adapts to breaking changes in rmm in https://github.com/rapidsai/rmm/pull/1722. Authors: - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - Bradley Dice (https://github.com/bdice) - Ben Frederickson (https://github.com/benfred) URL: https://github.com/rapidsai/cuvs/pull/499 --- cpp/CMakeLists.txt | 9 +++++---- cpp/bench/ann/CMakeLists.txt | 8 +++++++- cpp/test/CMakeLists.txt | 4 ++++ examples/cpp/CMakeLists.txt | 13 ++++++++----- python/cuvs/CMakeLists.txt | 3 +++ python/cuvs/cuvs/common/CMakeLists.txt | 4 ++++ python/cuvs/cuvs/distance/CMakeLists.txt | 4 ++++ python/cuvs/cuvs/neighbors/CMakeLists.txt | 4 ++++ .../cuvs/cuvs/neighbors/brute_force/CMakeLists.txt | 4 ++++ python/cuvs/cuvs/neighbors/cagra/CMakeLists.txt | 4 ++++ python/cuvs/cuvs/neighbors/filters/CMakeLists.txt | 4 ++++ python/cuvs/cuvs/neighbors/hnsw/CMakeLists.txt | 4 ++++ python/cuvs/cuvs/neighbors/ivf_flat/CMakeLists.txt | 4 ++++ python/cuvs/cuvs/neighbors/ivf_pq/CMakeLists.txt | 4 ++++ python/cuvs/cuvs/test/conftest.py | 5 +++++ 15 files changed, 68 insertions(+), 10 deletions(-) create mode 100644 python/cuvs/cuvs/test/conftest.py diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index eb2e7c7a4..3c8ef69fd 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -480,12 +480,13 @@ if(BUILD_SHARED_LIBS) "$<$:${CUVS_CUDA_FLAGS}>" ) target_link_libraries( - cuvs_objs PUBLIC raft::raft rmm::rmm ${CUVS_CTK_MATH_DEPENDENCIES} + cuvs_objs PUBLIC raft::raft rmm::rmm rmm::rmm_logger ${CUVS_CTK_MATH_DEPENDENCIES} $ + PRIVATE rmm::rmm_logger_impl ) - add_library(cuvs SHARED $) - add_library(cuvs_static STATIC $) + add_library(cuvs SHARED $,EXCLUDE,rmm.*logger>) + add_library(cuvs_static STATIC $,EXCLUDE,rmm.*logger>) target_compile_options( cuvs INTERFACE $<$:--expt-extended-lambda @@ -696,7 +697,7 @@ target_compile_definitions(cuvs::cuvs INTERFACE $<$:NVTX_ENAB target_link_libraries( cuvs_c PUBLIC cuvs::cuvs ${CUVS_CTK_MATH_DEPENDENCIES} - PRIVATE raft::raft + PRIVATE raft::raft rmm::rmm_logger_impl ) # ensure CUDA symbols aren't relocated to the middle of the debug build binaries diff --git a/cpp/bench/ann/CMakeLists.txt b/cpp/bench/ann/CMakeLists.txt index 0f6b42ae9..572b792a7 100644 --- a/cpp/bench/ann/CMakeLists.txt +++ b/cpp/bench/ann/CMakeLists.txt @@ -129,6 +129,7 @@ function(ConfigureAnnBench) $<$:CUDA::cudart_static> $ $ + $ ) set_target_properties( @@ -174,6 +175,11 @@ function(ConfigureAnnBench) add_dependencies(CUVS_ANN_BENCH_ALL ${BENCH_NAME}) endfunction() +if(CUVS_FAISS_ENABLE_GPU) + add_library(cuvs_bench_rmm_logger OBJECT) + target_link_libraries(cuvs_bench_rmm_logger PRIVATE rmm::rmm_logger_impl) +endif() + # ################################################################################################## # * Configure benchmark targets ------------------------------------------------------------- @@ -300,7 +306,7 @@ if(CUVS_ANN_BENCH_SINGLE_EXE) target_link_libraries( ANN_BENCH PRIVATE raft::raft nlohmann_json::nlohmann_json benchmark::benchmark dl fmt::fmt-header-only - spdlog::spdlog_header_only $<$:CUDA::nvtx3> + spdlog::spdlog_header_only $<$:CUDA::nvtx3> rmm::rmm_logger_impl ) set_target_properties( ANN_BENCH diff --git a/cpp/test/CMakeLists.txt b/cpp/test/CMakeLists.txt index 286d721d7..16663ba08 100644 --- a/cpp/test/CMakeLists.txt +++ b/cpp/test/CMakeLists.txt @@ -49,6 +49,7 @@ function(ConfigureTest) PRIVATE cuvs cuvs::cuvs raft::raft + test_rmm_logger GTest::gtest GTest::gtest_main Threads::Threads @@ -87,6 +88,9 @@ function(ConfigureTest) ) endfunction() +add_library(test_rmm_logger OBJECT) +target_link_libraries(test_rmm_logger PRIVATE rmm::rmm_logger_impl) + # ################################################################################################## # test sources ################################################################################## # ################################################################################################## diff --git a/examples/cpp/CMakeLists.txt b/examples/cpp/CMakeLists.txt index 092b65ed9..48815b870 100644 --- a/examples/cpp/CMakeLists.txt +++ b/examples/cpp/CMakeLists.txt @@ -44,10 +44,13 @@ add_executable(VAMANA_EXAMPLE src/vamana_example.cu) # `$` is a generator expression that ensures that targets are # installed in a conda environment, if one exists -target_link_libraries(CAGRA_EXAMPLE PRIVATE cuvs::cuvs $) +add_library(rmm_logger OBJECT) +target_link_libraries(rmm_logger PRIVATE rmm::rmm_logger_impl) + +target_link_libraries(CAGRA_EXAMPLE PRIVATE cuvs::cuvs $ rmm_logger) target_link_libraries( - CAGRA_PERSISTENT_EXAMPLE PRIVATE cuvs::cuvs $ Threads::Threads + CAGRA_PERSISTENT_EXAMPLE PRIVATE cuvs::cuvs $ Threads::Threads rmm_logger ) -target_link_libraries(IVF_PQ_EXAMPLE PRIVATE cuvs::cuvs $) -target_link_libraries(IVF_FLAT_EXAMPLE PRIVATE cuvs::cuvs $) -target_link_libraries(VAMANA_EXAMPLE PRIVATE cuvs::cuvs $) +target_link_libraries(IVF_PQ_EXAMPLE PRIVATE cuvs::cuvs $ rmm_logger) +target_link_libraries(IVF_FLAT_EXAMPLE PRIVATE cuvs::cuvs $ rmm_logger) +target_link_libraries(VAMANA_EXAMPLE PRIVATE cuvs::cuvs $ rmm_logger) diff --git a/python/cuvs/CMakeLists.txt b/python/cuvs/CMakeLists.txt index feb3bd58c..c0990995f 100644 --- a/python/cuvs/CMakeLists.txt +++ b/python/cuvs/CMakeLists.txt @@ -110,6 +110,9 @@ endif() rapids_cython_init() +add_library(cuvs_rmm_logger OBJECT) +target_link_libraries(cuvs_rmm_logger PRIVATE rmm::rmm_logger_impl) + add_subdirectory(cuvs/common) add_subdirectory(cuvs/distance) add_subdirectory(cuvs/neighbors) diff --git a/python/cuvs/cuvs/common/CMakeLists.txt b/python/cuvs/cuvs/common/CMakeLists.txt index 202919e01..361f2fafc 100644 --- a/python/cuvs/cuvs/common/CMakeLists.txt +++ b/python/cuvs/cuvs/common/CMakeLists.txt @@ -22,3 +22,7 @@ rapids_cython_create_modules( SOURCE_FILES "${cython_sources}" LINKED_LIBRARIES "${linked_libraries}" ASSOCIATED_TARGETS cuvs MODULE_PREFIX common_ ) + +foreach(tgt IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) + target_link_libraries(${tgt} PRIVATE cuvs_rmm_logger) +endforeach() diff --git a/python/cuvs/cuvs/distance/CMakeLists.txt b/python/cuvs/cuvs/distance/CMakeLists.txt index 363778a9c..514b08c43 100644 --- a/python/cuvs/cuvs/distance/CMakeLists.txt +++ b/python/cuvs/cuvs/distance/CMakeLists.txt @@ -22,3 +22,7 @@ rapids_cython_create_modules( SOURCE_FILES "${cython_sources}" LINKED_LIBRARIES "${linked_libraries}" ASSOCIATED_TARGETS cuvs MODULE_PREFIX distance_ ) + +foreach(tgt IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) + target_link_libraries(${tgt} PRIVATE cuvs_rmm_logger) +endforeach() diff --git a/python/cuvs/cuvs/neighbors/CMakeLists.txt b/python/cuvs/cuvs/neighbors/CMakeLists.txt index f68bbea53..031fd485e 100644 --- a/python/cuvs/cuvs/neighbors/CMakeLists.txt +++ b/python/cuvs/cuvs/neighbors/CMakeLists.txt @@ -29,3 +29,7 @@ rapids_cython_create_modules( SOURCE_FILES "${cython_sources}" LINKED_LIBRARIES "${linked_libraries}" ASSOCIATED_TARGETS cuvs MODULE_PREFIX neighbors_refine_ ) + +foreach(tgt IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) + target_link_libraries(${tgt} PRIVATE cuvs_rmm_logger) +endforeach() diff --git a/python/cuvs/cuvs/neighbors/brute_force/CMakeLists.txt b/python/cuvs/cuvs/neighbors/brute_force/CMakeLists.txt index 4806fb9fc..61eda649c 100644 --- a/python/cuvs/cuvs/neighbors/brute_force/CMakeLists.txt +++ b/python/cuvs/cuvs/neighbors/brute_force/CMakeLists.txt @@ -23,3 +23,7 @@ rapids_cython_create_modules( LINKED_LIBRARIES "${linked_libraries}" ASSOCIATED_TARGETS cuvs MODULE_PREFIX neighbors_brute_force_ ) + +foreach(tgt IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) + target_link_libraries(${tgt} PRIVATE cuvs_rmm_logger) +endforeach() diff --git a/python/cuvs/cuvs/neighbors/cagra/CMakeLists.txt b/python/cuvs/cuvs/neighbors/cagra/CMakeLists.txt index 87e6597fe..1f40daab2 100644 --- a/python/cuvs/cuvs/neighbors/cagra/CMakeLists.txt +++ b/python/cuvs/cuvs/neighbors/cagra/CMakeLists.txt @@ -22,3 +22,7 @@ rapids_cython_create_modules( SOURCE_FILES "${cython_sources}" LINKED_LIBRARIES "${linked_libraries}" ASSOCIATED_TARGETS cuvs MODULE_PREFIX neighbors_cagra_ ) + +foreach(tgt IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) + target_link_libraries(${tgt} PRIVATE cuvs_rmm_logger) +endforeach() diff --git a/python/cuvs/cuvs/neighbors/filters/CMakeLists.txt b/python/cuvs/cuvs/neighbors/filters/CMakeLists.txt index c90615feb..a678852d9 100644 --- a/python/cuvs/cuvs/neighbors/filters/CMakeLists.txt +++ b/python/cuvs/cuvs/neighbors/filters/CMakeLists.txt @@ -22,3 +22,7 @@ rapids_cython_create_modules( SOURCE_FILES "${cython_sources}" LINKED_LIBRARIES "${linked_libraries}" ASSOCIATED_TARGETS cuvs MODULE_PREFIX neighbors_prefilter_ ) + +foreach(tgt IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) + target_link_libraries(${tgt} PRIVATE cuvs_rmm_logger) +endforeach() diff --git a/python/cuvs/cuvs/neighbors/hnsw/CMakeLists.txt b/python/cuvs/cuvs/neighbors/hnsw/CMakeLists.txt index 1f9c422ca..8351916e6 100644 --- a/python/cuvs/cuvs/neighbors/hnsw/CMakeLists.txt +++ b/python/cuvs/cuvs/neighbors/hnsw/CMakeLists.txt @@ -22,3 +22,7 @@ rapids_cython_create_modules( SOURCE_FILES "${cython_sources}" LINKED_LIBRARIES "${linked_libraries}" ASSOCIATED_TARGETS cuvs MODULE_PREFIX neighbors_hnsw_ ) + +foreach(tgt IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) + target_link_libraries(${tgt} PRIVATE cuvs_rmm_logger) +endforeach() diff --git a/python/cuvs/cuvs/neighbors/ivf_flat/CMakeLists.txt b/python/cuvs/cuvs/neighbors/ivf_flat/CMakeLists.txt index 09bd8f422..f5663cdaa 100644 --- a/python/cuvs/cuvs/neighbors/ivf_flat/CMakeLists.txt +++ b/python/cuvs/cuvs/neighbors/ivf_flat/CMakeLists.txt @@ -22,3 +22,7 @@ rapids_cython_create_modules( SOURCE_FILES "${cython_sources}" LINKED_LIBRARIES "${linked_libraries}" ASSOCIATED_TARGETS cuvs MODULE_PREFIX neighbors_ivf_flat_ ) + +foreach(tgt IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) + target_link_libraries(${tgt} PRIVATE cuvs_rmm_logger) +endforeach() diff --git a/python/cuvs/cuvs/neighbors/ivf_pq/CMakeLists.txt b/python/cuvs/cuvs/neighbors/ivf_pq/CMakeLists.txt index 97c3a1824..a24320ded 100644 --- a/python/cuvs/cuvs/neighbors/ivf_pq/CMakeLists.txt +++ b/python/cuvs/cuvs/neighbors/ivf_pq/CMakeLists.txt @@ -22,3 +22,7 @@ rapids_cython_create_modules( SOURCE_FILES "${cython_sources}" LINKED_LIBRARIES "${linked_libraries}" ASSOCIATED_TARGETS cuvs MODULE_PREFIX neighbors_pq_ ) + +foreach(tgt IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) + target_link_libraries(${tgt} PRIVATE cuvs_rmm_logger) +endforeach() diff --git a/python/cuvs/cuvs/test/conftest.py b/python/cuvs/cuvs/test/conftest.py new file mode 100644 index 000000000..d84de5d21 --- /dev/null +++ b/python/cuvs/cuvs/test/conftest.py @@ -0,0 +1,5 @@ +# arm tests sporadically run into +# https://bugzilla.redhat.com/show_bug.cgi?id=1722181. +# This is a workaround to ensure that OpenMP gets the TLS that it needs. + +import sklearn # noqa: F401 From 121588259b3f48381c6c154556e78e46b3119eb4 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Mon, 2 Dec 2024 14:18:23 -0600 Subject: [PATCH 07/39] prefer system install of UCX in devcontainers (#501) Contributes to https://github.com/rapidsai/build-planning/issues/118 Proposes the following changes for pip devcontainers: * prefer system installation of ucx to the one provided by the `libucx-cu{11,12}` wheels (ref: https://github.com/rapidsai/devcontainers/pull/421#issuecomment-2502324982) And some other related changes noticed while doing that: * update lingering `24.*` references to `25.02` ## Notes for Reviewers ### How I tested this Relying on CI for most things. Double-checked that `update-version.sh` would have caught the one lingering `24.12` reference like this: ```shell ./ci/release/update-version.sh '25.02.00' git grep -E '24\.' ``` Similar to https://github.com/rapidsai/cuml/pull/6149 Authors: - James Lamb (https://github.com/jameslamb) Approvers: - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/cuvs/pull/501 --- .devcontainer/Dockerfile | 1 + .github/workflows/trigger-breaking-change-alert.yaml | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index 594ba8c3c..77b90fa20 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -13,6 +13,7 @@ RUN apt update -y \ && rm -rf /tmp/* /var/tmp/* /var/cache/apt/* /var/lib/apt/lists/*; ENV DEFAULT_VIRTUAL_ENV=rapids +ENV RAPIDS_LIBUCX_PREFER_SYSTEM_LIBRARY=true FROM ${BASE} as conda-base diff --git a/.github/workflows/trigger-breaking-change-alert.yaml b/.github/workflows/trigger-breaking-change-alert.yaml index 3b972f31c..01dd2436b 100644 --- a/.github/workflows/trigger-breaking-change-alert.yaml +++ b/.github/workflows/trigger-breaking-change-alert.yaml @@ -12,7 +12,7 @@ jobs: trigger-notifier: if: contains(github.event.pull_request.labels.*.name, 'breaking') secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/breaking-change-alert.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/breaking-change-alert.yaml@branch-25.02 with: sender_login: ${{ github.event.sender.login }} sender_avatar: ${{ github.event.sender.avatar_url }} From 69199c2297e5a6012dd0f26a491550066cecdd4b Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Wed, 4 Dec 2024 18:19:19 -0600 Subject: [PATCH 08/39] Remove upper bounds on cuda-python to allow 12.6.2 and 11.8.5 (#508) Now that some upstream bugs have been fixed, we can allow cuda-python 12.6.2 and 11.8.5. See https://github.com/NVIDIA/cuda-python/issues/226#issuecomment-2472355738 for more information. Authors: - Bradley Dice (https://github.com/bdice) Approvers: - James Lamb (https://github.com/jameslamb) URL: https://github.com/rapidsai/cuvs/pull/508 --- conda/environments/all_cuda-118_arch-aarch64.yaml | 2 +- conda/environments/all_cuda-118_arch-x86_64.yaml | 2 +- conda/environments/all_cuda-125_arch-aarch64.yaml | 2 +- conda/environments/all_cuda-125_arch-x86_64.yaml | 2 +- conda/environments/bench_ann_cuda-118_arch-aarch64.yaml | 2 +- conda/environments/bench_ann_cuda-118_arch-x86_64.yaml | 2 +- conda/environments/bench_ann_cuda-125_arch-aarch64.yaml | 2 +- conda/environments/bench_ann_cuda-125_arch-x86_64.yaml | 2 +- conda/recipes/cuvs/meta.yaml | 8 ++++---- dependencies.yaml | 4 ++-- 10 files changed, 14 insertions(+), 14 deletions(-) diff --git a/conda/environments/all_cuda-118_arch-aarch64.yaml b/conda/environments/all_cuda-118_arch-aarch64.yaml index 1daf668b1..b27e9d341 100644 --- a/conda/environments/all_cuda-118_arch-aarch64.yaml +++ b/conda/environments/all_cuda-118_arch-aarch64.yaml @@ -15,7 +15,7 @@ dependencies: - cmake>=3.26.4,!=3.30.0 - cuda-nvtx=11.8 - cuda-profiler-api=11.8.86 -- cuda-python>=11.7.1,<12.0a0,<=11.8.3 +- cuda-python>=11.7.1,<12.0a0 - cuda-version=11.8 - cudatoolkit - cupy>=12.0.0 diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index 098156397..2a2791824 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -15,7 +15,7 @@ dependencies: - cmake>=3.26.4,!=3.30.0 - cuda-nvtx=11.8 - cuda-profiler-api=11.8.86 -- cuda-python>=11.7.1,<12.0a0,<=11.8.3 +- cuda-python>=11.7.1,<12.0a0 - cuda-version=11.8 - cudatoolkit - cupy>=12.0.0 diff --git a/conda/environments/all_cuda-125_arch-aarch64.yaml b/conda/environments/all_cuda-125_arch-aarch64.yaml index b94b44749..800d8c5cc 100644 --- a/conda/environments/all_cuda-125_arch-aarch64.yaml +++ b/conda/environments/all_cuda-125_arch-aarch64.yaml @@ -17,7 +17,7 @@ dependencies: - cuda-nvcc - cuda-nvtx-dev - cuda-profiler-api -- cuda-python>=12.0,<13.0a0,<=12.6.0 +- cuda-python>=12.0,<13.0a0 - cuda-version=12.5 - cupy>=12.0.0 - cxx-compiler diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml index 10e30a8c2..15addf9da 100644 --- a/conda/environments/all_cuda-125_arch-x86_64.yaml +++ b/conda/environments/all_cuda-125_arch-x86_64.yaml @@ -17,7 +17,7 @@ dependencies: - cuda-nvcc - cuda-nvtx-dev - cuda-profiler-api -- cuda-python>=12.0,<13.0a0,<=12.6.0 +- cuda-python>=12.0,<13.0a0 - cuda-version=12.5 - cupy>=12.0.0 - cxx-compiler diff --git a/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml b/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml index dd7499c78..ced5176e9 100644 --- a/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml +++ b/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml @@ -15,7 +15,7 @@ dependencies: - cmake>=3.26.4,!=3.30.0 - cuda-nvtx=11.8 - cuda-profiler-api=11.8.86 -- cuda-python>=11.7.1,<12.0a0,<=11.8.3 +- cuda-python>=11.7.1,<12.0a0 - cuda-version=11.8 - cudatoolkit - cxx-compiler diff --git a/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml b/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml index f12e01c60..d8e49519f 100644 --- a/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml +++ b/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml @@ -15,7 +15,7 @@ dependencies: - cmake>=3.26.4,!=3.30.0 - cuda-nvtx=11.8 - cuda-profiler-api=11.8.86 -- cuda-python>=11.7.1,<12.0a0,<=11.8.3 +- cuda-python>=11.7.1,<12.0a0 - cuda-version=11.8 - cudatoolkit - cxx-compiler diff --git a/conda/environments/bench_ann_cuda-125_arch-aarch64.yaml b/conda/environments/bench_ann_cuda-125_arch-aarch64.yaml index 89134093c..5e6373ad1 100644 --- a/conda/environments/bench_ann_cuda-125_arch-aarch64.yaml +++ b/conda/environments/bench_ann_cuda-125_arch-aarch64.yaml @@ -17,7 +17,7 @@ dependencies: - cuda-nvcc - cuda-nvtx-dev - cuda-profiler-api -- cuda-python>=12.0,<13.0a0,<=12.6.0 +- cuda-python>=12.0,<13.0a0 - cuda-version=12.5 - cxx-compiler - cython>=3.0.0 diff --git a/conda/environments/bench_ann_cuda-125_arch-x86_64.yaml b/conda/environments/bench_ann_cuda-125_arch-x86_64.yaml index 88b4c859a..fece589d5 100644 --- a/conda/environments/bench_ann_cuda-125_arch-x86_64.yaml +++ b/conda/environments/bench_ann_cuda-125_arch-x86_64.yaml @@ -17,7 +17,7 @@ dependencies: - cuda-nvcc - cuda-nvtx-dev - cuda-profiler-api -- cuda-python>=12.0,<13.0a0,<=12.6.0 +- cuda-python>=12.0,<13.0a0 - cuda-version=12.5 - cxx-compiler - cython>=3.0.0 diff --git a/conda/recipes/cuvs/meta.yaml b/conda/recipes/cuvs/meta.yaml index 560c95feb..f799d9b0b 100644 --- a/conda/recipes/cuvs/meta.yaml +++ b/conda/recipes/cuvs/meta.yaml @@ -43,10 +43,10 @@ requirements: - {{ stdlib("c") }} host: {% if cuda_major == "11" %} - - cuda-python >=11.7.1,<12.0a0,<=11.8.3 + - cuda-python >=11.7.1,<12.0a0 - cudatoolkit {% else %} - - cuda-python >=12.0,<13.0a0,<=12.6.0 + - cuda-python >=12.0,<13.0a0 - cuda-cudart-dev {% endif %} - cuda-version ={{ cuda_version }} @@ -61,10 +61,10 @@ requirements: - {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }} {% if cuda_major == "11" %} - cudatoolkit - - cuda-python >=11.7.1,<12.0a0,<=11.8.3 + - cuda-python >=11.7.1,<12.0a0 {% else %} - cuda-cudart - - cuda-python >=12.0,<13.0a0,<=12.6.0 + - cuda-python >=12.0,<13.0a0 {% endif %} - pylibraft {{ minor_version }} - libcuvs {{ version }} diff --git a/dependencies.yaml b/dependencies.yaml index ee5155489..28ded3671 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -213,11 +213,11 @@ dependencies: - matrix: cuda: "12.*" packages: - - &cuda_python12 cuda-python>=12.0,<13.0a0,<=12.6.0 + - &cuda_python12 cuda-python>=12.0,<13.0a0 - matrix: cuda: "11.*" packages: - - &cuda_python11 cuda-python>=11.7.1,<12.0a0,<=11.8.3 + - &cuda_python11 cuda-python>=11.7.1,<12.0a0 - matrix: packages: - &cuda_python cuda-python From e1a57084a61aeffdd5b45e35dcff11e418527c1d Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Sat, 7 Dec 2024 00:34:59 -0600 Subject: [PATCH 09/39] Update cuda-python lower bounds to 12.6.2 / 11.8.5 (#524) We require a newer cuda-python lower bound for new features and to use the new layout. This will fix a number of errors observed when the runtime version of cuda-python is older than the version used to build packages using Cython features from cuda-python. See https://github.com/rapidsai/build-planning/issues/117#issuecomment-2524250915 for details. Authors: - Bradley Dice (https://github.com/bdice) Approvers: - James Lamb (https://github.com/jameslamb) URL: https://github.com/rapidsai/cuvs/pull/524 --- conda/environments/all_cuda-118_arch-aarch64.yaml | 2 +- conda/environments/all_cuda-118_arch-x86_64.yaml | 2 +- conda/environments/all_cuda-125_arch-aarch64.yaml | 2 +- conda/environments/all_cuda-125_arch-x86_64.yaml | 2 +- conda/environments/bench_ann_cuda-118_arch-aarch64.yaml | 2 +- conda/environments/bench_ann_cuda-118_arch-x86_64.yaml | 2 +- conda/environments/bench_ann_cuda-125_arch-aarch64.yaml | 2 +- conda/environments/bench_ann_cuda-125_arch-x86_64.yaml | 2 +- conda/recipes/cuvs/meta.yaml | 8 ++++---- dependencies.yaml | 4 ++-- 10 files changed, 14 insertions(+), 14 deletions(-) diff --git a/conda/environments/all_cuda-118_arch-aarch64.yaml b/conda/environments/all_cuda-118_arch-aarch64.yaml index b27e9d341..50aa3fe7e 100644 --- a/conda/environments/all_cuda-118_arch-aarch64.yaml +++ b/conda/environments/all_cuda-118_arch-aarch64.yaml @@ -15,7 +15,7 @@ dependencies: - cmake>=3.26.4,!=3.30.0 - cuda-nvtx=11.8 - cuda-profiler-api=11.8.86 -- cuda-python>=11.7.1,<12.0a0 +- cuda-python>=11.8.5,<12.0a0 - cuda-version=11.8 - cudatoolkit - cupy>=12.0.0 diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index 2a2791824..8f15b6164 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -15,7 +15,7 @@ dependencies: - cmake>=3.26.4,!=3.30.0 - cuda-nvtx=11.8 - cuda-profiler-api=11.8.86 -- cuda-python>=11.7.1,<12.0a0 +- cuda-python>=11.8.5,<12.0a0 - cuda-version=11.8 - cudatoolkit - cupy>=12.0.0 diff --git a/conda/environments/all_cuda-125_arch-aarch64.yaml b/conda/environments/all_cuda-125_arch-aarch64.yaml index 800d8c5cc..f194c01a3 100644 --- a/conda/environments/all_cuda-125_arch-aarch64.yaml +++ b/conda/environments/all_cuda-125_arch-aarch64.yaml @@ -17,7 +17,7 @@ dependencies: - cuda-nvcc - cuda-nvtx-dev - cuda-profiler-api -- cuda-python>=12.0,<13.0a0 +- cuda-python>=12.6.2,<13.0a0 - cuda-version=12.5 - cupy>=12.0.0 - cxx-compiler diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml index 15addf9da..912d1629b 100644 --- a/conda/environments/all_cuda-125_arch-x86_64.yaml +++ b/conda/environments/all_cuda-125_arch-x86_64.yaml @@ -17,7 +17,7 @@ dependencies: - cuda-nvcc - cuda-nvtx-dev - cuda-profiler-api -- cuda-python>=12.0,<13.0a0 +- cuda-python>=12.6.2,<13.0a0 - cuda-version=12.5 - cupy>=12.0.0 - cxx-compiler diff --git a/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml b/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml index ced5176e9..bb85af66f 100644 --- a/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml +++ b/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml @@ -15,7 +15,7 @@ dependencies: - cmake>=3.26.4,!=3.30.0 - cuda-nvtx=11.8 - cuda-profiler-api=11.8.86 -- cuda-python>=11.7.1,<12.0a0 +- cuda-python>=11.8.5,<12.0a0 - cuda-version=11.8 - cudatoolkit - cxx-compiler diff --git a/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml b/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml index d8e49519f..225340fbb 100644 --- a/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml +++ b/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml @@ -15,7 +15,7 @@ dependencies: - cmake>=3.26.4,!=3.30.0 - cuda-nvtx=11.8 - cuda-profiler-api=11.8.86 -- cuda-python>=11.7.1,<12.0a0 +- cuda-python>=11.8.5,<12.0a0 - cuda-version=11.8 - cudatoolkit - cxx-compiler diff --git a/conda/environments/bench_ann_cuda-125_arch-aarch64.yaml b/conda/environments/bench_ann_cuda-125_arch-aarch64.yaml index 5e6373ad1..a541db0ca 100644 --- a/conda/environments/bench_ann_cuda-125_arch-aarch64.yaml +++ b/conda/environments/bench_ann_cuda-125_arch-aarch64.yaml @@ -17,7 +17,7 @@ dependencies: - cuda-nvcc - cuda-nvtx-dev - cuda-profiler-api -- cuda-python>=12.0,<13.0a0 +- cuda-python>=12.6.2,<13.0a0 - cuda-version=12.5 - cxx-compiler - cython>=3.0.0 diff --git a/conda/environments/bench_ann_cuda-125_arch-x86_64.yaml b/conda/environments/bench_ann_cuda-125_arch-x86_64.yaml index fece589d5..25de38443 100644 --- a/conda/environments/bench_ann_cuda-125_arch-x86_64.yaml +++ b/conda/environments/bench_ann_cuda-125_arch-x86_64.yaml @@ -17,7 +17,7 @@ dependencies: - cuda-nvcc - cuda-nvtx-dev - cuda-profiler-api -- cuda-python>=12.0,<13.0a0 +- cuda-python>=12.6.2,<13.0a0 - cuda-version=12.5 - cxx-compiler - cython>=3.0.0 diff --git a/conda/recipes/cuvs/meta.yaml b/conda/recipes/cuvs/meta.yaml index f799d9b0b..ad7ffe756 100644 --- a/conda/recipes/cuvs/meta.yaml +++ b/conda/recipes/cuvs/meta.yaml @@ -43,10 +43,10 @@ requirements: - {{ stdlib("c") }} host: {% if cuda_major == "11" %} - - cuda-python >=11.7.1,<12.0a0 + - cuda-python >=11.8.5,<12.0a0 - cudatoolkit {% else %} - - cuda-python >=12.0,<13.0a0 + - cuda-python >=12.6.2,<13.0a0 - cuda-cudart-dev {% endif %} - cuda-version ={{ cuda_version }} @@ -61,10 +61,10 @@ requirements: - {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }} {% if cuda_major == "11" %} - cudatoolkit - - cuda-python >=11.7.1,<12.0a0 + - cuda-python >=11.8.5,<12.0a0 {% else %} - cuda-cudart - - cuda-python >=12.0,<13.0a0 + - cuda-python >=12.6.2,<13.0a0 {% endif %} - pylibraft {{ minor_version }} - libcuvs {{ version }} diff --git a/dependencies.yaml b/dependencies.yaml index 28ded3671..7adb38d2a 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -213,11 +213,11 @@ dependencies: - matrix: cuda: "12.*" packages: - - &cuda_python12 cuda-python>=12.0,<13.0a0 + - &cuda_python12 cuda-python>=12.6.2,<13.0a0 - matrix: cuda: "11.*" packages: - - &cuda_python11 cuda-python>=11.7.1,<12.0a0 + - &cuda_python11 cuda-python>=11.8.5,<12.0a0 - matrix: packages: - &cuda_python cuda-python From ef16a9e7fa7af418019e8cc7bcdd33828aee9f67 Mon Sep 17 00:00:00 2001 From: "Artem M. Chirkin" <9253178+achirkin@users.noreply.github.com> Date: Thu, 12 Dec 2024 00:04:37 +0100 Subject: [PATCH 10/39] Fix Grace-specific issues in CAGRA (#527) Fix Grace-specific test failures: 1. Add stream syncs at the places where host-allocated memory may be destructed while still being accessed by GPU to avoid relying on stream-ordered semantics of memory allocations. 2. A bug in tests: CAGRA index produced by `cagra::build` is not guaranteed to be owning. The tests assumed otherwise; when the host dataset is passed, and it's accessible on the device (it's the case with Grace), the created index ended up non-owning. The lifetime of the host dataset in the tests was smaller than of the index, which led to invalid host accesses from the device. 3. A bug in `dataset_deserialize.hpp`: `deserialize_strided` function constructed a non-owning strided dataset, because the host data was accessible by the GPU. The current fix is to add a move-semantics overload of `make_strided_dataset` that always owns the passed data (either via moving the mdarray or by copying the data). Authors: - Artem M. Chirkin (https://github.com/achirkin) Approvers: - Corey J. Nolet (https://github.com/cjnolet) URL: https://github.com/rapidsai/cuvs/pull/527 --- cpp/include/cuvs/neighbors/common.hpp | 78 ++++++++++++++++++- cpp/src/neighbors/detail/ann_utils.cuh | 11 +++ cpp/src/neighbors/detail/cagra/utils.hpp | 16 +++- .../neighbors/detail/dataset_serialize.hpp | 2 +- cpp/test/neighbors/ann_cagra.cuh | 23 +++--- 5 files changed, 115 insertions(+), 15 deletions(-) diff --git a/cpp/include/cuvs/neighbors/common.hpp b/cpp/include/cuvs/neighbors/common.hpp index 60b8cc122..bd9ea4834 100644 --- a/cpp/include/cuvs/neighbors/common.hpp +++ b/cpp/include/cuvs/neighbors/common.hpp @@ -264,6 +264,77 @@ auto make_strided_dataset(const raft::resources& res, const SrcT& src, uint32_t return std::make_unique(std::move(out_array), out_layout); } +/** + * @brief Contstruct a strided matrix from any mdarray. + * + * This function constructs an owning device matrix and copies the data. + * When the data is copied, padding elements are filled with zeroes. + * + * @tparam DataT + * @tparam IdxT + * @tparam LayoutPolicy + * @tparam ContainerPolicy + * + * @param[in] res raft resources handle + * @param[in] src the source mdarray or mdspan + * @param[in] required_stride the leading dimension (in elements) + * @return owning current-device-accessible strided matrix + */ +template +auto make_strided_dataset( + const raft::resources& res, + raft::mdarray, LayoutPolicy, ContainerPolicy>&& src, + uint32_t required_stride) -> std::unique_ptr> +{ + using value_type = DataT; + using index_type = IdxT; + using layout_type = LayoutPolicy; + using container_policy_type = ContainerPolicy; + static_assert(std::is_same_v || + std::is_same_v> || + std::is_same_v, + "The input must be row-major"); + RAFT_EXPECTS(src.extent(1) <= required_stride, + "The input row length must be not larger than the desired stride."); + const uint32_t src_stride = src.stride(0) > 0 ? src.stride(0) : src.extent(1); + const bool stride_matches = required_stride == src_stride; + + auto out_layout = + raft::make_strided_layout(src.extents(), std::array{required_stride, 1}); + + using out_mdarray_type = raft::device_matrix; + using out_layout_type = typename out_mdarray_type::layout_type; + using out_container_policy_type = typename out_mdarray_type::container_policy_type; + using out_owning_type = + owning_dataset; + + if constexpr (std::is_same_v && + std::is_same_v) { + if (stride_matches) { + // Everything matches, we can own the mdarray + return std::make_unique(std::move(src), out_layout); + } + } + // Something is wrong: have to make a copy and produce an owning dataset + auto out_array = + raft::make_device_matrix(res, src.extent(0), required_stride); + + RAFT_CUDA_TRY(cudaMemsetAsync(out_array.data_handle(), + 0, + out_array.size() * sizeof(value_type), + raft::resource::get_cuda_stream(res))); + RAFT_CUDA_TRY(cudaMemcpy2DAsync(out_array.data_handle(), + sizeof(value_type) * required_stride, + src.data_handle(), + sizeof(value_type) * src_stride, + sizeof(value_type) * src.extent(1), + src.extent(0), + cudaMemcpyDefault, + raft::resource::get_cuda_stream(res))); + + return std::make_unique(std::move(out_array), out_layout); +} + /** * @brief Contstruct a strided matrix from any mdarray or mdspan. * @@ -278,14 +349,15 @@ auto make_strided_dataset(const raft::resources& res, const SrcT& src, uint32_t * @return maybe owning current-device-accessible strided matrix */ template -auto make_aligned_dataset(const raft::resources& res, const SrcT& src, uint32_t align_bytes = 16) +auto make_aligned_dataset(const raft::resources& res, SrcT src, uint32_t align_bytes = 16) -> std::unique_ptr> { - using value_type = typename SrcT::value_type; + using source_type = std::remove_cv_t>; + using value_type = typename source_type::value_type; constexpr size_t kSize = sizeof(value_type); uint32_t required_stride = raft::round_up_safe(src.extent(1) * kSize, std::lcm(align_bytes, kSize)) / kSize; - return make_strided_dataset(res, src, required_stride); + return make_strided_dataset(res, std::forward(src), required_stride); } /** * @brief VPQ compressed dataset. diff --git a/cpp/src/neighbors/detail/ann_utils.cuh b/cpp/src/neighbors/detail/ann_utils.cuh index 652d41c85..529356351 100644 --- a/cpp/src/neighbors/detail/ann_utils.cuh +++ b/cpp/src/neighbors/detail/ann_utils.cuh @@ -403,6 +403,17 @@ struct batch_load_iterator { /** A single batch of data residing in device memory. */ struct batch { + ~batch() noexcept + { + /* + If there's no copy, there's no allocation owned by the batch. + If there's no allocation, there's no guarantee that the device pointer is stream-ordered. + If there's no stream order guarantee, we must synchronize with the stream before the batch is + destroyed to make sure all GPU operations in that stream finish earlier. + */ + if (!does_copy()) { RAFT_CUDA_TRY_NO_THROW(cudaStreamSynchronize(stream_)); } + } + /** Logical width of a single row in a batch, in elements of type `T`. */ [[nodiscard]] auto row_width() const -> size_type { return row_width_; } /** Logical offset of the batch, in rows (`row_width()`) */ diff --git a/cpp/src/neighbors/detail/cagra/utils.hpp b/cpp/src/neighbors/detail/cagra/utils.hpp index 0f8309328..9f95c5b1c 100644 --- a/cpp/src/neighbors/detail/cagra/utils.hpp +++ b/cpp/src/neighbors/detail/cagra/utils.hpp @@ -179,7 +179,7 @@ class device_matrix_view_from_host { public: device_matrix_view_from_host(raft::resources const& res, raft::host_matrix_view host_view) - : host_view_(host_view) + : res_(res), host_view_(host_view) { cudaPointerAttributes attr; RAFT_CUDA_TRY(cudaPointerGetAttributes(&attr, host_view.data_handle())); @@ -199,6 +199,17 @@ class device_matrix_view_from_host { } } + ~device_matrix_view_from_host() noexcept + { + /* + If there's no copy, there's no allocation owned by this struct. + If there's no allocation, there's no guarantee that the device pointer is stream-ordered. + If there's no stream order guarantee, we must synchronize with the stream before the struct is + destroyed to make sure all GPU operations in that stream finish earlier. + */ + if (!allocated_memory()) { raft::resource::sync_stream(res_); } + } + raft::device_matrix_view view() { return raft::make_device_matrix_view( @@ -207,9 +218,10 @@ class device_matrix_view_from_host { T* data_handle() { return device_ptr; } - bool allocated_memory() const { return device_mem_.has_value(); } + [[nodiscard]] bool allocated_memory() const { return device_mem_.has_value(); } private: + const raft::resources& res_; std::optional> device_mem_; raft::host_matrix_view host_view_; T* device_ptr; diff --git a/cpp/src/neighbors/detail/dataset_serialize.hpp b/cpp/src/neighbors/detail/dataset_serialize.hpp index 40d9df930..0ecc2cf5d 100644 --- a/cpp/src/neighbors/detail/dataset_serialize.hpp +++ b/cpp/src/neighbors/detail/dataset_serialize.hpp @@ -140,7 +140,7 @@ auto deserialize_strided(raft::resources const& res, std::istream& is) auto stride = raft::deserialize_scalar(res, is); auto host_array = raft::make_host_matrix(n_rows, dim); raft::deserialize_mdspan(res, is, host_array.view()); - return make_strided_dataset(res, host_array, stride); + return make_strided_dataset(res, std::move(host_array), stride); } template diff --git a/cpp/test/neighbors/ann_cagra.cuh b/cpp/test/neighbors/ann_cagra.cuh index 8d5701439..c1cd3ca09 100644 --- a/cpp/test/neighbors/ann_cagra.cuh +++ b/cpp/test/neighbors/ann_cagra.cuh @@ -389,12 +389,13 @@ class AnnCagraTest : public ::testing::TestWithParam { (const DataT*)database.data(), ps.n_rows, ps.dim); { + std::optional> database_host{std::nullopt}; cagra::index index(handle_, index_params.metric); if (ps.host_dataset) { - auto database_host = raft::make_host_matrix(ps.n_rows, ps.dim); - raft::copy(database_host.data_handle(), database.data(), database.size(), stream_); + database_host = raft::make_host_matrix(ps.n_rows, ps.dim); + raft::copy(database_host->data_handle(), database.data(), database.size(), stream_); auto database_host_view = raft::make_host_matrix_view( - (const DataT*)database_host.data_handle(), ps.n_rows, ps.dim); + (const DataT*)database_host->data_handle(), ps.n_rows, ps.dim); index = cagra::build(handle_, index_params, database_host_view); } else { @@ -567,13 +568,16 @@ class AnnCagraAddNodesTest : public ::testing::TestWithParam { auto initial_database_view = raft::make_device_matrix_view( (const DataT*)database.data(), initial_database_size, ps.dim); + std::optional> database_host{std::nullopt}; cagra::index index(handle_); if (ps.host_dataset) { - auto database_host = raft::make_host_matrix(ps.n_rows, ps.dim); + database_host = raft::make_host_matrix(ps.n_rows, ps.dim); raft::copy( - database_host.data_handle(), database.data(), initial_database_view.size(), stream_); + database_host->data_handle(), database.data(), initial_database_view.size(), stream_); auto database_host_view = raft::make_host_matrix_view( - (const DataT*)database_host.data_handle(), initial_database_size, ps.dim); + (const DataT*)database_host->data_handle(), initial_database_size, ps.dim); + // NB: database_host must live no less than the index, because the index _may_be_ + // non-onwning index = cagra::build(handle_, index_params, database_host_view); } else { index = cagra::build(handle_, index_params, initial_database_view); @@ -763,12 +767,13 @@ class AnnCagraFilterTest : public ::testing::TestWithParam { auto database_view = raft::make_device_matrix_view( (const DataT*)database.data(), ps.n_rows, ps.dim); + std::optional> database_host{std::nullopt}; cagra::index index(handle_); if (ps.host_dataset) { - auto database_host = raft::make_host_matrix(ps.n_rows, ps.dim); - raft::copy(database_host.data_handle(), database.data(), database.size(), stream_); + database_host = raft::make_host_matrix(ps.n_rows, ps.dim); + raft::copy(database_host->data_handle(), database.data(), database.size(), stream_); auto database_host_view = raft::make_host_matrix_view( - (const DataT*)database_host.data_handle(), ps.n_rows, ps.dim); + (const DataT*)database_host->data_handle(), ps.n_rows, ps.dim); index = cagra::build(handle_, index_params, database_host_view); } else { index = cagra::build(handle_, index_params, database_view); From b859bc5edd41009604219416356cb6c96b189c2f Mon Sep 17 00:00:00 2001 From: tsuki <12711693+enp1s0@users.noreply.github.com> Date: Fri, 13 Dec 2024 15:30:32 +0900 Subject: [PATCH 11/39] Fix cagra::extend error message (#532) When extending a CAGRA index that is built with `index_param.attach_dataset_on_build = false`, an error message "Only uncompressed dataset is supported" is displayed even if the dataset used to build the graph is not compressed. This problem occurs since the extend function does not check whether the dataset is empty. This PR fixes it. Authors: - tsuki (https://github.com/enp1s0) Approvers: - Artem M. Chirkin (https://github.com/achirkin) URL: https://github.com/rapidsai/cuvs/pull/532 --- cpp/src/neighbors/detail/cagra/add_nodes.cuh | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/cpp/src/neighbors/detail/cagra/add_nodes.cuh b/cpp/src/neighbors/detail/cagra/add_nodes.cuh index b03b8214b..952039130 100644 --- a/cpp/src/neighbors/detail/cagra/add_nodes.cuh +++ b/cpp/src/neighbors/detail/cagra/add_nodes.cuh @@ -432,8 +432,14 @@ void extend_core( } else { index.update_graph(handle, raft::make_const_mdspan(updated_graph.view())); } + } else if (dynamic_cast*>(&index.data()) != + nullptr) { + RAFT_FAIL( + "cagra::extend only supports an index to which the dataset is attached. Please check if the " + "index was built with index_param.attach_dataset_on_build = true, or if a dataset was " + "attached after the build."); } else { - RAFT_FAIL("Only uncompressed dataset is supported"); + RAFT_FAIL("cagra::extend only supports an uncompressed dataset index"); } } } // namespace cuvs::neighbors::cagra From b3ce774d39e149d4e34c401068f24136eac44e13 Mon Sep 17 00:00:00 2001 From: Ben Frederickson Date: Tue, 17 Dec 2024 17:45:35 -0800 Subject: [PATCH 12/39] Fix CI for python cuvs_bench (#541) I'm seeing CI failures due to a missing 'setuptools' on a recent PR https://github.com/rapidsai/cuvs/actions/runs/12363479650/job/34545223389?pr=536 in building the cuvs_bench code. This seems to be because we were missing some requires in dependencies.yaml. Fix Authors: - Ben Frederickson (https://github.com/benfred) Approvers: - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/cuvs/pull/541 --- conda/recipes/cuvs-bench/meta.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/conda/recipes/cuvs-bench/meta.yaml b/conda/recipes/cuvs-bench/meta.yaml index 0681a1038..d77aee8ce 100644 --- a/conda/recipes/cuvs-bench/meta.yaml +++ b/conda/recipes/cuvs-bench/meta.yaml @@ -79,6 +79,7 @@ requirements: - python - rapids-build-backend>=0.3.0,<0.4.0.dev0 - rmm ={{ minor_version }} + - setuptools>=64.0.0 run: - benchmark From 660a2caa64f864e38e0e7bd19df86556d25aa7db Mon Sep 17 00:00:00 2001 From: Tarang Jain <40517122+tarang-jain@users.noreply.github.com> Date: Thu, 19 Dec 2024 13:29:55 -0800 Subject: [PATCH 13/39] Additional Distances for CAGRA C and Python API (#546) Add InnerProduct metric to CAGRA C and Python API + updates to CAGRA pytests. Closes https://github.com/rapidsai/cuvs/issues/545 Authors: - Tarang Jain (https://github.com/tarang-jain) Approvers: - Divye Gala (https://github.com/divyegala) URL: https://github.com/rapidsai/cuvs/pull/546 --- cpp/include/cuvs/neighbors/cagra.h | 3 +++ cpp/src/neighbors/cagra_c.cpp | 6 ++++-- python/cuvs/cuvs/neighbors/cagra/cagra.pxd | 2 ++ python/cuvs/cuvs/neighbors/cagra/cagra.pyx | 20 ++++++++++++-------- python/cuvs/cuvs/test/test_cagra.py | 8 +++++--- 5 files changed, 26 insertions(+), 13 deletions(-) diff --git a/cpp/include/cuvs/neighbors/cagra.h b/cpp/include/cuvs/neighbors/cagra.h index 14331ebbc..f7f58a19c 100644 --- a/cpp/include/cuvs/neighbors/cagra.h +++ b/cpp/include/cuvs/neighbors/cagra.h @@ -17,6 +17,7 @@ #pragma once #include +#include #include #include #include @@ -87,6 +88,8 @@ typedef struct cuvsCagraCompressionParams* cuvsCagraCompressionParams_t; * */ struct cuvsCagraIndexParams { + /** Distance type. */ + cuvsDistanceType metric; /** Degree of input graph for pruning. */ size_t intermediate_graph_degree; /** Degree of output graph. */ diff --git a/cpp/src/neighbors/cagra_c.cpp b/cpp/src/neighbors/cagra_c.cpp index 326a89665..02b7a566e 100644 --- a/cpp/src/neighbors/cagra_c.cpp +++ b/cpp/src/neighbors/cagra_c.cpp @@ -41,7 +41,8 @@ void* _build(cuvsResources_t res, cuvsCagraIndexParams params, DLManagedTensor* auto res_ptr = reinterpret_cast(res); auto index = new cuvs::neighbors::cagra::index(*res_ptr); - auto index_params = cuvs::neighbors::cagra::index_params(); + auto index_params = cuvs::neighbors::cagra::index_params(); + index_params.metric = static_cast((int)params.metric), index_params.intermediate_graph_degree = params.intermediate_graph_degree; index_params.graph_degree = params.graph_degree; @@ -252,7 +253,8 @@ extern "C" cuvsError_t cuvsCagraSearch(cuvsResources_t res, extern "C" cuvsError_t cuvsCagraIndexParamsCreate(cuvsCagraIndexParams_t* params) { return cuvs::core::translate_exceptions([=] { - *params = new cuvsCagraIndexParams{.intermediate_graph_degree = 128, + *params = new cuvsCagraIndexParams{.metric = L2Expanded, + .intermediate_graph_degree = 128, .graph_degree = 64, .build_algo = IVF_PQ, .nn_descent_niter = 20}; diff --git a/python/cuvs/cuvs/neighbors/cagra/cagra.pxd b/python/cuvs/cuvs/neighbors/cagra/cagra.pxd index bba5a91a8..a0f811480 100644 --- a/python/cuvs/cuvs/neighbors/cagra/cagra.pxd +++ b/python/cuvs/cuvs/neighbors/cagra/cagra.pxd @@ -28,6 +28,7 @@ from libcpp cimport bool from cuvs.common.c_api cimport cuvsError_t, cuvsResources_t from cuvs.common.cydlpack cimport DLDataType, DLManagedTensor +from cuvs.distance_type cimport cuvsDistanceType cdef extern from "cuvs/neighbors/cagra.h" nogil: @@ -47,6 +48,7 @@ cdef extern from "cuvs/neighbors/cagra.h" nogil: ctypedef cuvsCagraCompressionParams* cuvsCagraCompressionParams_t ctypedef struct cuvsCagraIndexParams: + cuvsDistanceType metric size_t intermediate_graph_degree size_t graph_degree cuvsCagraGraphBuildAlgo build_algo diff --git a/python/cuvs/cuvs/neighbors/cagra/cagra.pyx b/python/cuvs/cuvs/neighbors/cagra/cagra.pyx index 752aef741..fd55905cf 100644 --- a/python/cuvs/cuvs/neighbors/cagra/cagra.pyx +++ b/python/cuvs/cuvs/neighbors/cagra/cagra.pyx @@ -28,11 +28,13 @@ from libcpp cimport bool, cast from libcpp.string cimport string from cuvs.common cimport cydlpack +from cuvs.distance_type cimport cuvsDistanceType from pylibraft.common import auto_convert_output, cai_wrapper, device_ndarray from pylibraft.common.cai_wrapper import wrap_array from pylibraft.common.interruptible import cuda_interruptible +from cuvs.distance import DISTANCE_TYPES from cuvs.neighbors.common import _check_input_array from libc.stdint cimport ( @@ -131,9 +133,11 @@ cdef class IndexParams: Parameters ---------- metric : string denoting the metric type, default="sqeuclidean" - Valid values for metric: ["sqeuclidean"], where + Valid values for metric: ["sqeuclidean", "inner_product"], where - sqeuclidean is the euclidean distance without the square root operation, i.e.: distance(a,b) = \\sum_i (a_i - b_i)^2 + - inner_product distance is defined as + distance(a, b) = \\sum_i a_i * b_i. intermediate_graph_degree : int, default = 128 graph_degree : int, default = 64 @@ -151,6 +155,7 @@ cdef class IndexParams: """ cdef cuvsCagraIndexParams* params + cdef object _metric # hold on to a reference to the compression, to keep from being GC'ed cdef public object compression @@ -170,10 +175,8 @@ cdef class IndexParams: nn_descent_niter=20, compression=None): - # todo (dgd): enable once other metrics are present - # and exposed in cuVS C API - # self.params.metric = _get_metric(metric) - # self.params.metric_arg = 0 + self._metric = metric + self.params.metric = DISTANCE_TYPES[metric] self.params.intermediate_graph_degree = intermediate_graph_degree self.params.graph_degree = graph_degree if build_algo == "ivf_pq": @@ -186,9 +189,9 @@ cdef class IndexParams: self.params.compression = \ compression.get_handle() - # @property - # def metric(self): - # return self.params.metric + @property + def metric(self): + return self._metric @property def intermediate_graph_degree(self): @@ -247,6 +250,7 @@ def build(IndexParams index_params, dataset, resources=None): The following distance metrics are supported: - L2 + - InnerProduct Parameters ---------- diff --git a/python/cuvs/cuvs/test/test_cagra.py b/python/cuvs/cuvs/test/test_cagra.py index 56e132c23..d3b03a5d0 100644 --- a/python/cuvs/cuvs/test/test_cagra.py +++ b/python/cuvs/cuvs/test/test_cagra.py @@ -29,7 +29,7 @@ def run_cagra_build_search_test( n_queries=100, k=10, dtype=np.float32, - metric="euclidean", + metric="sqeuclidean", intermediate_graph_degree=128, graph_degree=64, build_algo="ivf_pq", @@ -42,6 +42,8 @@ def run_cagra_build_search_test( ): dataset = generate_data((n_rows, n_cols), dtype) if metric == "inner_product": + if dtype in [np.int8, np.uint8]: + pytest.skip("skip normalization for int8/uint8 data") dataset = normalize(dataset, norm="l2", axis=1) dataset_device = device_ndarray(dataset) @@ -122,7 +124,7 @@ def run_cagra_build_search_test( @pytest.mark.parametrize("dtype", [np.float32, np.int8, np.uint8]) @pytest.mark.parametrize("array_type", ["device", "host"]) @pytest.mark.parametrize("build_algo", ["ivf_pq", "nn_descent"]) -@pytest.mark.parametrize("metric", ["euclidean"]) +@pytest.mark.parametrize("metric", ["sqeuclidean", "inner_product"]) def test_cagra_dataset_dtype_host_device( dtype, array_type, inplace, build_algo, metric ): @@ -145,7 +147,7 @@ def test_cagra_dataset_dtype_host_device( "graph_degree": 32, "add_data_on_build": True, "k": 1, - "metric": "euclidean", + "metric": "sqeuclidean", "build_algo": "ivf_pq", }, { From 89ebf15150223f4bce4a08bf3a6a4089380a1d0a Mon Sep 17 00:00:00 2001 From: Ben Frederickson Date: Thu, 19 Dec 2024 19:34:09 -0800 Subject: [PATCH 14/39] Use nvidia-sphinx-theme for docs (#528) Authors: - Ben Frederickson (https://github.com/benfred) Approvers: - Corey J. Nolet (https://github.com/cjnolet) - James Lamb (https://github.com/jameslamb) URL: https://github.com/rapidsai/cuvs/pull/528 --- conda/environments/all_cuda-118_arch-aarch64.yaml | 6 ++++-- conda/environments/all_cuda-118_arch-x86_64.yaml | 6 ++++-- conda/environments/all_cuda-125_arch-aarch64.yaml | 6 ++++-- conda/environments/all_cuda-125_arch-x86_64.yaml | 6 ++++-- dependencies.yaml | 8 +++++--- docs/source/conf.py | 2 +- 6 files changed, 22 insertions(+), 12 deletions(-) diff --git a/conda/environments/all_cuda-118_arch-aarch64.yaml b/conda/environments/all_cuda-118_arch-aarch64.yaml index 50aa3fe7e..a6d98ea3b 100644 --- a/conda/environments/all_cuda-118_arch-aarch64.yaml +++ b/conda/environments/all_cuda-118_arch-aarch64.yaml @@ -7,7 +7,6 @@ channels: - conda-forge - nvidia dependencies: -- breathe - c-compiler - clang - clang-tools=16.0.6 @@ -44,7 +43,6 @@ dependencies: - nvcc_linux-aarch64=11.8 - openblas - pre-commit -- pydata-sphinx-theme - pylibraft==25.2.*,>=0.0.0a0 - pytest-cov - pytest==7.* @@ -55,5 +53,9 @@ dependencies: - scikit-learn - sphinx-copybutton - sphinx-markdown-tables +- sphinx>=8.0.0 - sysroot_linux-aarch64==2.17 +- pip: + - breathe>=4.35.0 + - nvidia-sphinx-theme name: all_cuda-118_arch-aarch64 diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index 8f15b6164..1063e4d6c 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -7,7 +7,6 @@ channels: - conda-forge - nvidia dependencies: -- breathe - c-compiler - clang - clang-tools=16.0.6 @@ -44,7 +43,6 @@ dependencies: - nvcc_linux-64=11.8 - openblas - pre-commit -- pydata-sphinx-theme - pylibraft==25.2.*,>=0.0.0a0 - pytest-cov - pytest==7.* @@ -55,5 +53,9 @@ dependencies: - scikit-learn - sphinx-copybutton - sphinx-markdown-tables +- sphinx>=8.0.0 - sysroot_linux-64==2.17 +- pip: + - breathe>=4.35.0 + - nvidia-sphinx-theme name: all_cuda-118_arch-x86_64 diff --git a/conda/environments/all_cuda-125_arch-aarch64.yaml b/conda/environments/all_cuda-125_arch-aarch64.yaml index f194c01a3..ee7b37695 100644 --- a/conda/environments/all_cuda-125_arch-aarch64.yaml +++ b/conda/environments/all_cuda-125_arch-aarch64.yaml @@ -7,7 +7,6 @@ channels: - conda-forge - nvidia dependencies: -- breathe - c-compiler - clang - clang-tools=16.0.6 @@ -40,7 +39,6 @@ dependencies: - numpydoc - openblas - pre-commit -- pydata-sphinx-theme - pylibraft==25.2.*,>=0.0.0a0 - pytest-cov - pytest==7.* @@ -51,5 +49,9 @@ dependencies: - scikit-learn - sphinx-copybutton - sphinx-markdown-tables +- sphinx>=8.0.0 - sysroot_linux-aarch64==2.17 +- pip: + - breathe>=4.35.0 + - nvidia-sphinx-theme name: all_cuda-125_arch-aarch64 diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml index 912d1629b..7c8e1fd99 100644 --- a/conda/environments/all_cuda-125_arch-x86_64.yaml +++ b/conda/environments/all_cuda-125_arch-x86_64.yaml @@ -7,7 +7,6 @@ channels: - conda-forge - nvidia dependencies: -- breathe - c-compiler - clang - clang-tools=16.0.6 @@ -40,7 +39,6 @@ dependencies: - numpydoc - openblas - pre-commit -- pydata-sphinx-theme - pylibraft==25.2.*,>=0.0.0a0 - pytest-cov - pytest==7.* @@ -51,5 +49,9 @@ dependencies: - scikit-learn - sphinx-copybutton - sphinx-markdown-tables +- sphinx>=8.0.0 - sysroot_linux-64==2.17 +- pip: + - breathe>=4.35.0 + - nvidia-sphinx-theme name: all_cuda-125_arch-x86_64 diff --git a/dependencies.yaml b/dependencies.yaml index eca97d2f5..a73fe7b8f 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -394,22 +394,24 @@ dependencies: common: - output_types: [conda] packages: - - breathe - doxygen>=1.8.20 - graphviz - ipython - numpydoc - - pydata-sphinx-theme - recommonmark + - sphinx>=8.0.0 - sphinx-copybutton - sphinx-markdown-tables + - pip: + - nvidia-sphinx-theme + - breathe>=4.35.0 rust: common: - output_types: [conda] packages: - make - rust - # clang/liblclang only needed for bindgen support + # clang/libclang only needed for bindgen support - clang - libclang build_wheels: diff --git a/docs/source/conf.py b/docs/source/conf.py index 0d667833a..c14919568 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -99,7 +99,7 @@ # a list of builtin themes. # -html_theme = "pydata_sphinx_theme" +html_theme = "nvidia_sphinx_theme" # Theme options are theme-specific and customize the look and feel of a theme From f48e9aab593232b72f74fd79ad256ed51b997b43 Mon Sep 17 00:00:00 2001 From: Ben Frederickson Date: Thu, 19 Dec 2024 19:39:29 -0800 Subject: [PATCH 15/39] Add support for float16 to the python pairwise distance api (#547) Authors: - Ben Frederickson (https://github.com/benfred) Approvers: - Corey J. Nolet (https://github.com/cjnolet) URL: https://github.com/rapidsai/cuvs/pull/547 --- cpp/src/distance/pairwise_distance_c.cpp | 13 +++++++++---- python/cuvs/cuvs/distance/distance.pyx | 7 +++++-- python/cuvs/cuvs/test/test_distance.py | 13 ++++++++++--- 3 files changed, 24 insertions(+), 9 deletions(-) diff --git a/cpp/src/distance/pairwise_distance_c.cpp b/cpp/src/distance/pairwise_distance_c.cpp index d457198a2..061adaa2c 100644 --- a/cpp/src/distance/pairwise_distance_c.cpp +++ b/cpp/src/distance/pairwise_distance_c.cpp @@ -29,7 +29,7 @@ namespace { -template +template void _pairwise_distance(cuvsResources_t res, DLManagedTensor* x_tensor, DLManagedTensor* y_tensor, @@ -40,7 +40,7 @@ void _pairwise_distance(cuvsResources_t res, auto res_ptr = reinterpret_cast(res); using mdspan_type = raft::device_matrix_view; - using distances_mdspan_type = raft::device_matrix_view; + using distances_mdspan_type = raft::device_matrix_view; auto x_mds = cuvs::core::from_dlpack(x_tensor); auto y_mds = cuvs::core::from_dlpack(y_tensor); @@ -71,9 +71,14 @@ extern "C" cuvsError_t cuvsPairwiseDistance(cuvsResources_t res, } if (x_dt.bits == 32) { - _pairwise_distance(res, x_tensor, y_tensor, distances_tensor, metric, metric_arg); + _pairwise_distance( + res, x_tensor, y_tensor, distances_tensor, metric, metric_arg); + } else if (x_dt.bits == 16) { + _pairwise_distance( + res, x_tensor, y_tensor, distances_tensor, metric, metric_arg); } else if (x_dt.bits == 64) { - _pairwise_distance(res, x_tensor, y_tensor, distances_tensor, metric, metric_arg); + _pairwise_distance( + res, x_tensor, y_tensor, distances_tensor, metric, metric_arg); } else { RAFT_FAIL("Unsupported DLtensor dtype: %d and bits: %d", x_dt.code, x_dt.bits); } diff --git a/python/cuvs/cuvs/distance/distance.pyx b/python/cuvs/cuvs/distance/distance.pyx index eb34366e4..187532bfe 100644 --- a/python/cuvs/cuvs/distance/distance.pyx +++ b/python/cuvs/cuvs/distance/distance.pyx @@ -100,7 +100,10 @@ def pairwise_distance(X, Y, out=None, metric="euclidean", metric_arg=2.0, n = y_cai.shape[0] if out is None: - out = device_ndarray.empty((m, n), dtype=y_cai.dtype) + output_dtype = y_cai.dtype + if np.issubdtype(y_cai.dtype, np.float16): + output_dtype = np.float32 + out = device_ndarray.empty((m, n), dtype=output_dtype) out_cai = wrap_array(out) x_k = x_cai.shape[1] @@ -119,7 +122,7 @@ def pairwise_distance(X, Y, out=None, metric="euclidean", metric_arg=2.0, y_dt = y_cai.dtype d_dt = out_cai.dtype - if x_dt != y_dt or x_dt != d_dt: + if x_dt != y_dt: raise ValueError("Inputs must have the same dtypes") cdef cydlpack.DLManagedTensor* x_dlpack = \ diff --git a/python/cuvs/cuvs/test/test_distance.py b/python/cuvs/cuvs/test/test_distance.py index 681217fc8..f466c2743 100644 --- a/python/cuvs/cuvs/test/test_distance.py +++ b/python/cuvs/cuvs/test/test_distance.py @@ -40,7 +40,7 @@ ], ) @pytest.mark.parametrize("inplace", [True, False]) -@pytest.mark.parametrize("dtype", [np.float32, np.float64]) +@pytest.mark.parametrize("dtype", [np.float32, np.float64, np.float16]) def test_distance(n_rows, n_cols, inplace, metric, dtype): input1 = np.random.random_sample((n_rows, n_cols)) input1 = np.asarray(input1).astype(dtype) @@ -55,7 +55,10 @@ def test_distance(n_rows, n_cols, inplace, metric, dtype): norm = np.sum(input1, axis=1) input1 = (input1.T / norm).T - output = np.zeros((n_rows, n_rows), dtype=dtype) + output_dtype = dtype + if np.issubdtype(dtype, np.float16): + output_dtype = np.float32 + output = np.zeros((n_rows, n_rows), dtype=output_dtype) if metric == "inner_product": expected = np.matmul(input1, input1.T) @@ -76,4 +79,8 @@ def test_distance(n_rows, n_cols, inplace, metric, dtype): actual = output_device.copy_to_host() - assert np.allclose(expected, actual, atol=1e-3, rtol=1e-3) + tol = 1e-3 + if np.issubdtype(dtype, np.float16): + tol = 1e-1 + + assert np.allclose(expected, actual, atol=tol, rtol=tol) From ac49c414254cb448efce02d7a7b08190e43584e8 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Mon, 30 Dec 2024 11:44:25 -0800 Subject: [PATCH 16/39] Check if nightlies have succeeded recently enough (#548) Contributes to https://github.com/rapidsai/build-planning/issues/127 This PR cannot be merged unless nightly CI has passed within the past 7 days, so if it remains unmerged that will itself be an indication that nightly CI needs fixing. Authors: - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - James Lamb (https://github.com/jameslamb) URL: https://github.com/rapidsai/cuvs/pull/548 --- .github/workflows/pr.yaml | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 4c3b4d06a..91f51bd90 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -12,6 +12,7 @@ concurrency: jobs: pr-builder: needs: + - check-nightly-ci - changed-files - checks - conda-cpp-build @@ -29,6 +30,18 @@ jobs: if: always() with: needs: ${{ toJSON(needs) }} + check-nightly-ci: + # Switch to ubuntu-latest once it defaults to a version of Ubuntu that + # provides at least Python 3.11 (see + # https://docs.python.org/3/library/datetime.html#datetime.date.fromisoformat) + runs-on: ubuntu-24.04 + env: + RAPIDS_GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + steps: + - name: Check if nightly CI is passing + uses: rapidsai/shared-actions/check_nightly_success/dispatch@main + with: + repo: cuvs changed-files: secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/changed-files.yaml@branch-25.02 From a57227310a54b42481e20aaece72d0879f4c5b96 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Mon, 30 Dec 2024 16:09:03 -0800 Subject: [PATCH 17/39] Update for raft logger changes (#540) This PR updates cuvs to use raft's updated logger implementation using [rapids-logger](https://github.com/rapidsai/rapids-logger). It is a breaking change because it changes the kmeans `base_params` verbosity type from an int to a `raft::level_enum`. This PR requires https://github.com/rapidsai/raft/pull/2530. Contributes to https://github.com/rapidsai/build-planning/issues/104 Authors: - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - Bradley Dice (https://github.com/bdice) - Corey J. Nolet (https://github.com/cjnolet) URL: https://github.com/rapidsai/cuvs/pull/540 --- cpp/CMakeLists.txt | 11 ++++--- cpp/bench/ann/CMakeLists.txt | 21 +++++++++---- cpp/bench/ann/src/common/benchmark.hpp | 31 +++++++------------ cpp/include/cuvs/cluster/kmeans.hpp | 3 +- cpp/src/cluster/detail/kmeans.cuh | 12 +++---- cpp/src/cluster/detail/kmeans_auto_find_k.cuh | 4 +-- cpp/src/cluster/detail/kmeans_balanced.cuh | 4 +-- cpp/src/cluster/detail/kmeans_common.cuh | 2 +- .../detail/sparse/coo_spmv_kernel.cuh | 2 ++ cpp/src/neighbors/detail/ann_utils.cuh | 2 +- cpp/src/neighbors/detail/cagra/add_nodes.cuh | 2 -- .../neighbors/detail/cagra/cagra_build.cuh | 4 +-- .../detail/cagra/cagra_serialize.cuh | 4 +-- .../detail/cagra/compute_distance.hpp | 2 +- .../detail/cagra/search_multi_cta.cuh | 2 +- .../cagra/search_multi_cta_kernel-inl.cuh | 2 +- .../detail/cagra/search_multi_kernel.cuh | 2 +- .../detail/cagra/search_single_cta.cuh | 2 +- .../cagra/search_single_cta_kernel-inl.cuh | 3 +- .../neighbors/detail/dataset_serialize.hpp | 2 +- cpp/src/neighbors/detail/dynamic_batching.cuh | 2 -- .../neighbors/detail/vamana/vamana_build.cuh | 4 +-- .../detail/vamana/vamana_serialize.cuh | 2 +- .../detail/vamana/vamana_structs.cuh | 2 +- cpp/src/neighbors/detail/vpq_dataset.cuh | 2 +- cpp/src/neighbors/ivf_flat/ivf_flat_build.cuh | 3 +- .../ivf_flat/ivf_flat_interleaved_scan.cuh | 2 +- .../neighbors/ivf_flat/ivf_flat_search.cuh | 3 +- cpp/src/neighbors/ivf_pq/ivf_pq_build.cuh | 3 +- cpp/src/neighbors/ivf_pq/ivf_pq_fp_8bit.cuh | 2 +- cpp/src/neighbors/ivf_pq/ivf_pq_search.cuh | 2 +- cpp/src/neighbors/ivf_pq/ivf_pq_serialize.cuh | 2 +- cpp/src/neighbors/mg/omp_checks.cpp | 1 - cpp/test/CMakeLists.txt | 4 +-- cpp/test/neighbors/ann_ivf_pq.cuh | 2 -- cpp/test/neighbors/ann_utils.cuh | 2 -- cpp/test/neighbors/brute_force.cu | 2 -- 37 files changed, 71 insertions(+), 86 deletions(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 79e50c1c1..26c0b82d3 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -486,13 +486,14 @@ if(BUILD_SHARED_LIBS) "$<$:${CUVS_CUDA_FLAGS}>" ) target_link_libraries( - cuvs_objs PUBLIC raft::raft rmm::rmm rmm::rmm_logger ${CUVS_CTK_MATH_DEPENDENCIES} - $ - PRIVATE rmm::rmm_logger_impl + cuvs_objs + PUBLIC raft::raft rmm::rmm rmm::rmm_logger ${CUVS_CTK_MATH_DEPENDENCIES} + $ + PRIVATE rmm::rmm_logger_impl raft::raft_logger_impl ) add_library(cuvs SHARED $,EXCLUDE,rmm.*logger>) - add_library(cuvs_static STATIC $,EXCLUDE,rmm.*logger>) + add_library(cuvs_static STATIC $,EXCLUDE,rmm.*logger>) target_compile_options( cuvs INTERFACE $<$:--expt-extended-lambda @@ -704,7 +705,7 @@ target_compile_definitions(cuvs::cuvs INTERFACE $<$:NVTX_ENAB target_link_libraries( cuvs_c PUBLIC cuvs::cuvs ${CUVS_CTK_MATH_DEPENDENCIES} - PRIVATE raft::raft rmm::rmm_logger_impl + PRIVATE raft::raft rmm::rmm_logger_impl raft::raft_logger_impl ) # ensure CUDA symbols aren't relocated to the middle of the debug build binaries diff --git a/cpp/bench/ann/CMakeLists.txt b/cpp/bench/ann/CMakeLists.txt index 144cd3048..200b52ab3 100644 --- a/cpp/bench/ann/CMakeLists.txt +++ b/cpp/bench/ann/CMakeLists.txt @@ -126,10 +126,11 @@ function(ConfigureAnnBench) PRIVATE ${ConfigureAnnBench_LINKS} nlohmann_json::nlohmann_json Threads::Threads + $ $<$:CUDA::cudart_static> $ $ - $ + $ ) set_target_properties( @@ -175,9 +176,11 @@ function(ConfigureAnnBench) add_dependencies(CUVS_ANN_BENCH_ALL ${BENCH_NAME}) endfunction() -if(CUVS_FAISS_ENABLE_GPU) - add_library(cuvs_bench_rmm_logger OBJECT) - target_link_libraries(cuvs_bench_rmm_logger PRIVATE rmm::rmm_logger_impl) +if(CUVS_FAISS_ENABLE_GPU OR CUVS_ANN_BENCH_SINGLE_EXE) + add_library(cuvs_bench_logger OBJECT) + target_link_libraries( + cuvs_bench_logger PRIVATE rmm::rmm_logger_impl $ + ) endif() # ################################################################################################## @@ -303,8 +306,14 @@ if(CUVS_ANN_BENCH_SINGLE_EXE) target_link_libraries( ANN_BENCH - PRIVATE raft::raft nlohmann_json::nlohmann_json benchmark::benchmark dl fmt::fmt-header-only - spdlog::spdlog_header_only $<$:CUDA::nvtx3> rmm::rmm_logger_impl + PRIVATE raft::raft + nlohmann_json::nlohmann_json + benchmark::benchmark + dl + fmt::fmt-header-only + spdlog::spdlog_header_only + $<$:CUDA::nvtx3> + cuvs_bench_logger ) set_target_properties( ANN_BENCH diff --git a/cpp/bench/ann/src/common/benchmark.hpp b/cpp/bench/ann/src/common/benchmark.hpp index 06e1e27af..49be78673 100644 --- a/cpp/bench/ann/src/common/benchmark.hpp +++ b/cpp/bench/ann/src/common/benchmark.hpp @@ -597,18 +597,16 @@ inline auto parse_string_flag(const char* arg, const char* pat, std::string& res inline auto run_main(int argc, char** argv) -> int { - bool force_overwrite = false; - bool build_mode = false; - bool search_mode = false; - bool no_lap_sync = false; - std::string data_prefix = "data"; - std::string index_prefix = "index"; - std::string new_override_kv = ""; - std::string mode = "latency"; - std::string threads_arg_txt = ""; - std::vector threads = {1, -1}; // min_thread, max_thread - std::string log_level_str = ""; - [[maybe_unused]] int raft_log_level = 0; // raft::logger::get(RAFT_NAME).get_level(); + bool force_overwrite = false; + bool build_mode = false; + bool search_mode = false; + bool no_lap_sync = false; + std::string data_prefix = "data"; + std::string index_prefix = "index"; + std::string new_override_kv = ""; + std::string mode = "latency"; + std::string threads_arg_txt = ""; + std::vector threads = {1, -1}; // min_thread, max_thread kv_series override_kv{}; char arg0_default[] = "benchmark"; // NOLINT @@ -639,12 +637,7 @@ inline auto run_main(int argc, char** argv) -> int parse_string_flag(argv[i], "--index_prefix", index_prefix) || parse_string_flag(argv[i], "--mode", mode) || parse_string_flag(argv[i], "--override_kv", new_override_kv) || - parse_string_flag(argv[i], "--threads", threads_arg_txt) || - parse_string_flag(argv[i], "--raft_log_level", log_level_str)) { - if (!log_level_str.empty()) { - raft_log_level = std::stoi(log_level_str); - log_level_str = ""; - } + parse_string_flag(argv[i], "--threads", threads_arg_txt)) { if (!threads_arg_txt.empty()) { auto threads_arg = split(threads_arg_txt, ':'); threads[0] = std::stoi(threads_arg[0]); @@ -673,8 +666,6 @@ inline auto run_main(int argc, char** argv) -> int } } - // raft::logger::get(RAFT_NAME).set_level(raft_log_level); - Mode metric_objective = Mode::kLatency; if (mode == "throughput") { metric_objective = Mode::kThroughput; } diff --git a/cpp/include/cuvs/cluster/kmeans.hpp b/cpp/include/cuvs/cluster/kmeans.hpp index 89b3acc24..cb8d36b10 100644 --- a/cpp/include/cuvs/cluster/kmeans.hpp +++ b/cpp/include/cuvs/cluster/kmeans.hpp @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -85,7 +86,7 @@ struct params : base_params { /** * verbosity level. */ - int verbosity = RAFT_LEVEL_INFO; + raft::level_enum verbosity = raft::level_enum::info; /** * Seed to the random number generator. diff --git a/cpp/src/cluster/detail/kmeans.cuh b/cpp/src/cluster/detail/kmeans.cuh index 3d054f0fd..e943b8afc 100644 --- a/cpp/src/cluster/detail/kmeans.cuh +++ b/cpp/src/cluster/detail/kmeans.cuh @@ -25,7 +25,7 @@ #include #include #include -#include +#include #include #include #include @@ -56,8 +56,6 @@ namespace cuvs::cluster::kmeans::detail { -// TODO(cjnolet): RAFT_NAME needs to be removed and the raft::logger fixed to not require it -static const std::string RAFT_NAME = "raft"; static const std::string CUVS_NAME = "cuvs"; // ========================================================= @@ -373,7 +371,7 @@ void kmeans_fit_main(raft::resources const& handle, rmm::device_uvector& workspace) { raft::common::nvtx::range fun_scope("kmeans_fit_main"); - raft::logger::get(RAFT_NAME).set_level(params.verbosity); + raft::default_logger().set_level(params.verbosity); cudaStream_t stream = raft::resource::get_cuda_stream(handle); auto n_samples = X.extent(0); auto n_features = X.extent(1); @@ -879,7 +877,7 @@ void kmeans_fit(raft::resources const& handle, pams.n_clusters); } - raft::logger::get(RAFT_NAME).set_level(pams.verbosity); + raft::default_logger().set_level(pams.verbosity); // Allocate memory rmm::device_uvector workspace(0, stream); @@ -1025,7 +1023,7 @@ void kmeans_predict(raft::resources const& handle, RAFT_EXPECTS(centroids.extent(1) == n_features, "invalid parameter (centroids.extent(1) != n_features)"); - raft::logger::get(RAFT_NAME).set_level(pams.verbosity); + raft::default_logger().set_level(pams.verbosity); auto metric = pams.metric; // Allocate memory @@ -1218,7 +1216,7 @@ void kmeans_transform(raft::resources const& handle, raft::device_matrix_view X_new) { raft::common::nvtx::range fun_scope("kmeans_transform"); - raft::logger::get(RAFT_NAME).set_level(pams.verbosity); + raft::default_logger().set_level(pams.verbosity); cudaStream_t stream = raft::resource::get_cuda_stream(handle); auto n_samples = X.extent(0); auto n_features = X.extent(1); diff --git a/cpp/src/cluster/detail/kmeans_auto_find_k.cuh b/cpp/src/cluster/detail/kmeans_auto_find_k.cuh index 6441f7ad5..797b33bca 100644 --- a/cpp/src/cluster/detail/kmeans_auto_find_k.cuh +++ b/cpp/src/cluster/detail/kmeans_auto_find_k.cuh @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include #include #include @@ -230,4 +230,4 @@ void find_k(raft::resources const& handle, n_iter); } } -} // namespace cuvs::cluster::kmeans::detail \ No newline at end of file +} // namespace cuvs::cluster::kmeans::detail diff --git a/cpp/src/cluster/detail/kmeans_balanced.cuh b/cpp/src/cluster/detail/kmeans_balanced.cuh index 3f1ad2334..ba4cabbde 100644 --- a/cpp/src/cluster/detail/kmeans_balanced.cuh +++ b/cpp/src/cluster/detail/kmeans_balanced.cuh @@ -25,7 +25,8 @@ #include #include -#include +#include +#include #include #include #include @@ -59,7 +60,6 @@ namespace cuvs::cluster::kmeans::detail { -static const std::string RAFT_NAME = "raft"; constexpr static inline float kAdjustCentersWeight = 7.0f; /** diff --git a/cpp/src/cluster/detail/kmeans_common.cuh b/cpp/src/cluster/detail/kmeans_common.cuh index eec71b5d2..03db08bd1 100644 --- a/cpp/src/cluster/detail/kmeans_common.cuh +++ b/cpp/src/cluster/detail/kmeans_common.cuh @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/cpp/src/distance/detail/sparse/coo_spmv_kernel.cuh b/cpp/src/distance/detail/sparse/coo_spmv_kernel.cuh index 1f4b19af4..e44edc68a 100644 --- a/cpp/src/distance/detail/sparse/coo_spmv_kernel.cuh +++ b/cpp/src/distance/detail/sparse/coo_spmv_kernel.cuh @@ -16,6 +16,8 @@ #pragma once +#include + #include #include #include diff --git a/cpp/src/neighbors/detail/ann_utils.cuh b/cpp/src/neighbors/detail/ann_utils.cuh index 529356351..149eea3f1 100644 --- a/cpp/src/neighbors/detail/ann_utils.cuh +++ b/cpp/src/neighbors/detail/ann_utils.cuh @@ -18,7 +18,7 @@ #include #include -#include +#include #include #include #include diff --git a/cpp/src/neighbors/detail/cagra/add_nodes.cuh b/cpp/src/neighbors/detail/cagra/add_nodes.cuh index 952039130..358b7643e 100644 --- a/cpp/src/neighbors/detail/cagra/add_nodes.cuh +++ b/cpp/src/neighbors/detail/cagra/add_nodes.cuh @@ -31,8 +31,6 @@ namespace cuvs::neighbors::cagra { -static const std::string RAFT_NAME = "raft"; - template void add_node_core( raft::resources const& handle, diff --git a/cpp/src/neighbors/detail/cagra/cagra_build.cuh b/cpp/src/neighbors/detail/cagra/cagra_build.cuh index b7fec724b..340986448 100644 --- a/cpp/src/neighbors/detail/cagra/cagra_build.cuh +++ b/cpp/src/neighbors/detail/cagra/cagra_build.cuh @@ -26,7 +26,7 @@ #include #include #include -#include +#include #include #include @@ -46,8 +46,6 @@ namespace cuvs::neighbors::cagra::detail { -static const std::string RAFT_NAME = "raft"; - template void write_to_graph(raft::host_matrix_view knn_graph, raft::host_matrix_view neighbors_host_view, diff --git a/cpp/src/neighbors/detail/cagra/cagra_serialize.cuh b/cpp/src/neighbors/detail/cagra/cagra_serialize.cuh index 0f6cf852f..c83da7bb1 100644 --- a/cpp/src/neighbors/detail/cagra/cagra_serialize.cuh +++ b/cpp/src/neighbors/detail/cagra/cagra_serialize.cuh @@ -18,7 +18,7 @@ #include #include -#include +#include #include #include #include @@ -34,8 +34,6 @@ namespace cuvs::neighbors::cagra::detail { -static const std::string RAFT_NAME = "raft"; - constexpr int serialization_version = 4; /** diff --git a/cpp/src/neighbors/detail/cagra/compute_distance.hpp b/cpp/src/neighbors/detail/cagra/compute_distance.hpp index 7eb798459..2227e4f9e 100644 --- a/cpp/src/neighbors/detail/cagra/compute_distance.hpp +++ b/cpp/src/neighbors/detail/cagra/compute_distance.hpp @@ -22,7 +22,7 @@ #include #include #include -#include +#include #include // TODO: This shouldn't be invoking spatial/knn diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta.cuh b/cpp/src/neighbors/detail/cagra/search_multi_cta.cuh index ecfd856f1..9cb432bcb 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta.cuh +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta.cuh @@ -26,7 +26,7 @@ #include #include -#include +#include #include #include #include diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_kernel-inl.cuh b/cpp/src/neighbors/detail/cagra/search_multi_cta_kernel-inl.cuh index 9fa9d5894..7535ff217 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_kernel-inl.cuh +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_kernel-inl.cuh @@ -26,7 +26,7 @@ #include "utils.hpp" #include -#include +#include #include #include #include diff --git a/cpp/src/neighbors/detail/cagra/search_multi_kernel.cuh b/cpp/src/neighbors/detail/cagra/search_multi_kernel.cuh index c6fe21642..469c80a08 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_kernel.cuh +++ b/cpp/src/neighbors/detail/cagra/search_multi_kernel.cuh @@ -23,7 +23,7 @@ #include "utils.hpp" #include -#include +#include #include #include diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta.cuh b/cpp/src/neighbors/detail/cagra/search_single_cta.cuh index fa71dbaf9..161aa8c4a 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta.cuh +++ b/cpp/src/neighbors/detail/cagra/search_single_cta.cuh @@ -26,7 +26,7 @@ #include "utils.hpp" #include -#include +#include #include #include #include diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_kernel-inl.cuh b/cpp/src/neighbors/detail/cagra/search_single_cta_kernel-inl.cuh index 678ed0cb4..188862fbb 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_kernel-inl.cuh +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_kernel-inl.cuh @@ -28,7 +28,7 @@ #include #include -#include +#include #include #include #include @@ -64,7 +64,6 @@ namespace cuvs::neighbors::cagra::detail { namespace single_cta_search { -using raft::RAFT_NAME; // TODO: this is required for RAFT_LOG_XXX messages. // #define _CLK_BREAKDOWN diff --git a/cpp/src/neighbors/detail/dataset_serialize.hpp b/cpp/src/neighbors/detail/dataset_serialize.hpp index 0ecc2cf5d..ba3090b59 100644 --- a/cpp/src/neighbors/detail/dataset_serialize.hpp +++ b/cpp/src/neighbors/detail/dataset_serialize.hpp @@ -21,7 +21,7 @@ #include #include -#include +#include #include diff --git a/cpp/src/neighbors/detail/dynamic_batching.cuh b/cpp/src/neighbors/detail/dynamic_batching.cuh index 5c6b1654e..cb8e08ef5 100644 --- a/cpp/src/neighbors/detail/dynamic_batching.cuh +++ b/cpp/src/neighbors/detail/dynamic_batching.cuh @@ -50,8 +50,6 @@ namespace cuvs::neighbors::dynamic_batching::detail { -using raft::RAFT_NAME; // TODO: a workaround for RAFT_LOG_XXX macros - /** * A helper to make the requester threads more cooperative when busy-spinning. * It is used in the wait loops across this file to reduce the CPU usage. diff --git a/cpp/src/neighbors/detail/vamana/vamana_build.cuh b/cpp/src/neighbors/detail/vamana/vamana_build.cuh index da24decb3..ec75c99c1 100644 --- a/cpp/src/neighbors/detail/vamana/vamana_build.cuh +++ b/cpp/src/neighbors/detail/vamana/vamana_build.cuh @@ -29,7 +29,7 @@ #include #include #include -#include +#include #include #include #include @@ -52,8 +52,6 @@ namespace cuvs::neighbors::experimental::vamana::detail { * @{ */ -static const std::string RAFT_NAME = "raft"; - static const int blockD = 32; static const int maxBlocks = 10000; diff --git a/cpp/src/neighbors/detail/vamana/vamana_serialize.cuh b/cpp/src/neighbors/detail/vamana/vamana_serialize.cuh index a554464f6..c360ae19a 100644 --- a/cpp/src/neighbors/detail/vamana/vamana_serialize.cuh +++ b/cpp/src/neighbors/detail/vamana/vamana_serialize.cuh @@ -20,7 +20,7 @@ #include #include -#include +#include #include #include #include diff --git a/cpp/src/neighbors/detail/vamana/vamana_structs.cuh b/cpp/src/neighbors/detail/vamana/vamana_structs.cuh index 86cb4e1f8..f6f0279f7 100644 --- a/cpp/src/neighbors/detail/vamana/vamana_structs.cuh +++ b/cpp/src/neighbors/detail/vamana/vamana_structs.cuh @@ -29,7 +29,7 @@ #include #include #include -#include +#include #include #include diff --git a/cpp/src/neighbors/detail/vpq_dataset.cuh b/cpp/src/neighbors/detail/vpq_dataset.cuh index d85bad920..0d7882b4b 100644 --- a/cpp/src/neighbors/detail/vpq_dataset.cuh +++ b/cpp/src/neighbors/detail/vpq_dataset.cuh @@ -25,7 +25,7 @@ #include #include -#include +#include #include #include #include diff --git a/cpp/src/neighbors/ivf_flat/ivf_flat_build.cuh b/cpp/src/neighbors/ivf_flat/ivf_flat_build.cuh index d6ffc1218..f594343c7 100644 --- a/cpp/src/neighbors/ivf_flat/ivf_flat_build.cuh +++ b/cpp/src/neighbors/ivf_flat/ivf_flat_build.cuh @@ -27,7 +27,8 @@ #include "../../cluster/kmeans_balanced.cuh" #include "../detail/ann_utils.cuh" #include -#include +#include +#include #include #include #include diff --git a/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan.cuh b/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan.cuh index f5a4267cd..79b4f1a18 100644 --- a/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan.cuh +++ b/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan.cuh @@ -23,7 +23,7 @@ #include "../detail/ann_utils.cuh" #include -#include // RAFT_LOG_TRACE +#include #include #include #include // RAFT_CUDA_TRY diff --git a/cpp/src/neighbors/ivf_flat/ivf_flat_search.cuh b/cpp/src/neighbors/ivf_flat/ivf_flat_search.cuh index 032b6a8ff..2df6f4f0e 100644 --- a/cpp/src/neighbors/ivf_flat/ivf_flat_search.cuh +++ b/cpp/src/neighbors/ivf_flat/ivf_flat_search.cuh @@ -27,7 +27,8 @@ #include // is_min_close, DistanceType #include // cuvs::selection::select_k #include -#include // RAFT_LOG_TRACE +#include +#include #include #include // raft::resources #include // raft::linalg::gemm diff --git a/cpp/src/neighbors/ivf_pq/ivf_pq_build.cuh b/cpp/src/neighbors/ivf_pq/ivf_pq_build.cuh index 1d4acea1e..44a1b11fa 100644 --- a/cpp/src/neighbors/ivf_pq/ivf_pq_build.cuh +++ b/cpp/src/neighbors/ivf_pq/ivf_pq_build.cuh @@ -30,7 +30,7 @@ #include "../../cluster/kmeans_balanced.cuh" #include -#include +#include #include #include #include @@ -68,7 +68,6 @@ #include namespace cuvs::neighbors::ivf_pq::detail { -using raft::RAFT_NAME; // TODO: this is required for RAFT_LOG_XXX messages. using namespace cuvs::spatial::knn::detail; // NOLINT using internal_extents_t = int64_t; // The default mdspan extent type used internally. diff --git a/cpp/src/neighbors/ivf_pq/ivf_pq_fp_8bit.cuh b/cpp/src/neighbors/ivf_pq/ivf_pq_fp_8bit.cuh index 5b41e5f3d..1b098ac5c 100644 --- a/cpp/src/neighbors/ivf_pq/ivf_pq_fp_8bit.cuh +++ b/cpp/src/neighbors/ivf_pq/ivf_pq_fp_8bit.cuh @@ -20,7 +20,7 @@ #include #include -#include +#include #include #include #include diff --git a/cpp/src/neighbors/ivf_pq/ivf_pq_search.cuh b/cpp/src/neighbors/ivf_pq/ivf_pq_search.cuh index db8f9fbd3..05bb99353 100644 --- a/cpp/src/neighbors/ivf_pq/ivf_pq_search.cuh +++ b/cpp/src/neighbors/ivf_pq/ivf_pq_search.cuh @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/cpp/src/neighbors/ivf_pq/ivf_pq_serialize.cuh b/cpp/src/neighbors/ivf_pq/ivf_pq_serialize.cuh index 5eaebe69d..4af9dbb8e 100644 --- a/cpp/src/neighbors/ivf_pq/ivf_pq_serialize.cuh +++ b/cpp/src/neighbors/ivf_pq/ivf_pq_serialize.cuh @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/cpp/src/neighbors/mg/omp_checks.cpp b/cpp/src/neighbors/mg/omp_checks.cpp index e09182dfe..c8cc27414 100644 --- a/cpp/src/neighbors/mg/omp_checks.cpp +++ b/cpp/src/neighbors/mg/omp_checks.cpp @@ -18,7 +18,6 @@ #include namespace cuvs::neighbors::mg { -using raft::RAFT_NAME; void check_omp_threads(const int requirements) { diff --git a/cpp/test/CMakeLists.txt b/cpp/test/CMakeLists.txt index 4d13daaed..cca061455 100644 --- a/cpp/test/CMakeLists.txt +++ b/cpp/test/CMakeLists.txt @@ -89,7 +89,7 @@ function(ConfigureTest) endfunction() add_library(test_rmm_logger OBJECT) -target_link_libraries(test_rmm_logger PRIVATE rmm::rmm_logger_impl) +target_link_libraries(test_rmm_logger PRIVATE rmm::rmm_logger_impl raft::raft_logger_impl) # ################################################################################################## # test sources ################################################################################## @@ -236,7 +236,7 @@ if(BUILD_TESTS) NAME SPARSE_TEST PATH sparse/cluster/cluster_solvers.cu sparse/cluster/eigen_solvers.cu sparse/cluster/spectral.cu GPUS 1 PERCENT 100 ) - + ConfigureTest( NAME PREPROCESSING_TEST PATH preprocessing/scalar_quantization.cu GPUS 1 PERCENT 100 ) diff --git a/cpp/test/neighbors/ann_ivf_pq.cuh b/cpp/test/neighbors/ann_ivf_pq.cuh index 3a92b5e3d..01efd804e 100644 --- a/cpp/test/neighbors/ann_ivf_pq.cuh +++ b/cpp/test/neighbors/ann_ivf_pq.cuh @@ -31,8 +31,6 @@ namespace cuvs::neighbors::ivf_pq { -using raft::RAFT_NAME; // For logging - struct test_ivf_sample_filter { static constexpr unsigned offset = 300; }; diff --git a/cpp/test/neighbors/ann_utils.cuh b/cpp/test/neighbors/ann_utils.cuh index 94bccade2..ded8cb5af 100644 --- a/cpp/test/neighbors/ann_utils.cuh +++ b/cpp/test/neighbors/ann_utils.cuh @@ -38,8 +38,6 @@ namespace cuvs::neighbors { -using raft::RAFT_NAME; // For logging - struct print_dtype { cudaDataType_t value; }; diff --git a/cpp/test/neighbors/brute_force.cu b/cpp/test/neighbors/brute_force.cu index 8c354baa9..2cefb1098 100644 --- a/cpp/test/neighbors/brute_force.cu +++ b/cpp/test/neighbors/brute_force.cu @@ -76,11 +76,9 @@ class KNNTest : public ::testing::TestWithParam> { protected: void testBruteForce() { - // #if (RAFT_ACTIVE_LEVEL >= RAFT_LEVEL_DEBUG) raft::print_device_vector("Input array: ", input_.data(), rows_ * cols_, std::cout); std::cout << "K: " << k_ << std::endl; raft::print_device_vector("Labels array: ", search_labels_.data(), rows_, std::cout); - // #endif auto index = raft::make_device_matrix_view( (const T*)(input_.data()), rows_, cols_); From 55c5a7f0f9c3e103a33264a913dbd17b059eff78 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Mon, 30 Dec 2024 18:48:13 -0800 Subject: [PATCH 18/39] Get Breathe from conda again (#554) As part of https://github.com/rapidsai/cuvs/pull/528 cuvs's doc builds were modified to pull Breathe from pip. That was necessary because the nvidia-sphinx-theme requires Sphinx 8 but [the conda-forge Breathe package was not compatible with that Sphinx version](https://github.com/conda-forge/breathe-feedstock/issues/63). I fixed that in https://github.com/conda-forge/breathe-feedstock/pull/64, so now we can go back to using Breathe from conda to avoid mixing pip and conda for dependency management in the same environment. Authors: - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/cuvs/pull/554 --- conda/environments/all_cuda-118_arch-aarch64.yaml | 2 +- conda/environments/all_cuda-118_arch-x86_64.yaml | 2 +- conda/environments/all_cuda-125_arch-aarch64.yaml | 2 +- conda/environments/all_cuda-125_arch-x86_64.yaml | 2 +- dependencies.yaml | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/conda/environments/all_cuda-118_arch-aarch64.yaml b/conda/environments/all_cuda-118_arch-aarch64.yaml index a6d98ea3b..01853da84 100644 --- a/conda/environments/all_cuda-118_arch-aarch64.yaml +++ b/conda/environments/all_cuda-118_arch-aarch64.yaml @@ -7,6 +7,7 @@ channels: - conda-forge - nvidia dependencies: +- breathe>=4.35.0 - c-compiler - clang - clang-tools=16.0.6 @@ -56,6 +57,5 @@ dependencies: - sphinx>=8.0.0 - sysroot_linux-aarch64==2.17 - pip: - - breathe>=4.35.0 - nvidia-sphinx-theme name: all_cuda-118_arch-aarch64 diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index 1063e4d6c..a1ad68d7f 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -7,6 +7,7 @@ channels: - conda-forge - nvidia dependencies: +- breathe>=4.35.0 - c-compiler - clang - clang-tools=16.0.6 @@ -56,6 +57,5 @@ dependencies: - sphinx>=8.0.0 - sysroot_linux-64==2.17 - pip: - - breathe>=4.35.0 - nvidia-sphinx-theme name: all_cuda-118_arch-x86_64 diff --git a/conda/environments/all_cuda-125_arch-aarch64.yaml b/conda/environments/all_cuda-125_arch-aarch64.yaml index ee7b37695..ee0213fff 100644 --- a/conda/environments/all_cuda-125_arch-aarch64.yaml +++ b/conda/environments/all_cuda-125_arch-aarch64.yaml @@ -7,6 +7,7 @@ channels: - conda-forge - nvidia dependencies: +- breathe>=4.35.0 - c-compiler - clang - clang-tools=16.0.6 @@ -52,6 +53,5 @@ dependencies: - sphinx>=8.0.0 - sysroot_linux-aarch64==2.17 - pip: - - breathe>=4.35.0 - nvidia-sphinx-theme name: all_cuda-125_arch-aarch64 diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml index 7c8e1fd99..d93dcaf7a 100644 --- a/conda/environments/all_cuda-125_arch-x86_64.yaml +++ b/conda/environments/all_cuda-125_arch-x86_64.yaml @@ -7,6 +7,7 @@ channels: - conda-forge - nvidia dependencies: +- breathe>=4.35.0 - c-compiler - clang - clang-tools=16.0.6 @@ -52,6 +53,5 @@ dependencies: - sphinx>=8.0.0 - sysroot_linux-64==2.17 - pip: - - breathe>=4.35.0 - nvidia-sphinx-theme name: all_cuda-125_arch-x86_64 diff --git a/dependencies.yaml b/dependencies.yaml index a73fe7b8f..a11e59e31 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -394,6 +394,7 @@ dependencies: common: - output_types: [conda] packages: + - breathe>=4.35.0 - doxygen>=1.8.20 - graphviz - ipython @@ -404,7 +405,6 @@ dependencies: - sphinx-markdown-tables - pip: - nvidia-sphinx-theme - - breathe>=4.35.0 rust: common: - output_types: [conda] From 0e735ea025f8e1e24e8e9b3d3f2ac502711f5387 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Tue, 7 Jan 2025 12:32:02 -0600 Subject: [PATCH 19/39] remove setup.cfg files, other packaging cleanup (#544) Similar to https://github.com/rapidsai/raft/pull/2532, this proposes some small packaging cleanup. * removes `setup.cfg` files - *these are currently being ignored by tools, in favor of identical configuration in `pyproject.toml` and `.flake8` files* - e.g. https://github.com/rapidsai/cuvs/blob/b3ce774d39e149d4e34c401068f24136eac44e13/.pre-commit-config.yaml#L31-L35 * alphabetizes dependency lists in `dependencies.yaml` * changes `cupy:` group in `dependencies.yaml` to `depends_on_cupy:` (for consistency with other dependencies) Authors: - James Lamb (https://github.com/jameslamb) Approvers: - Vyas Ramasubramani (https://github.com/vyasr) - Micka (https://github.com/lowener) URL: https://github.com/rapidsai/cuvs/pull/544 --- .pre-commit-config.yaml | 3 +-- dependencies.yaml | 40 +++++++++++++++--------------- pyproject.toml | 4 +-- python/cuvs/setup.cfg | 39 ----------------------------- setup.cfg | 55 ----------------------------------------- 5 files changed, 23 insertions(+), 118 deletions(-) delete mode 100644 python/cuvs/setup.cfg delete mode 100644 setup.cfg diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 5e53abd92..fcfc7e1fa 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -108,8 +108,7 @@ repos: [.](cmake|cpp|cu|cuh|h|hpp|sh|pxd|py|pyx|rs)$| CMakeLists[.]txt$| CMakeLists_standalone[.]txt$| - meta[.]yaml$| - setup[.]cfg$ + meta[.]yaml$ exclude: | (?x) docs/source/sphinxext/github_link\.py| diff --git a/dependencies.yaml b/dependencies.yaml index a11e59e31..fbd1d8372 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -7,39 +7,39 @@ files: arch: [x86_64, aarch64] includes: - build - - rapids_build - build_py_cuvs + - build_wheels + - checks - cuda - cuda_version - - depends_on_pylibraft + - depends_on_cupy - depends_on_librmm + - depends_on_pylibraft - develop - - checks - - build_wheels - - test_libcuvs - docs + - rapids_build - run_py_cuvs + - rust + - test_libcuvs - test_python_common - test_py_cuvs - - cupy - - rust bench_ann: output: conda matrix: cuda: ["11.8", "12.5"] arch: [x86_64, aarch64] includes: - - rapids_build + - bench + - bench_python - build_py_cuvs - cuda - cuda_version + - depends_on_cupy - depends_on_pylibraft - depends_on_librmm - develop - - bench - - bench_python + - rapids_build - rapids_build_setuptools - - cupy test_cpp: output: none includes: @@ -49,10 +49,10 @@ files: output: none includes: - cuda_version + - depends_on_cupy - py_version - test_python_common - test_py_cuvs - - cupy checks: output: none includes: @@ -61,19 +61,19 @@ files: docs: output: none includes: + - cuda - cuda_version - - cupy + - depends_on_cupy - docs - py_version - - rust - rapids_build - - cuda + - rust rust: output: none includes: + - cuda - cuda_version - rapids_build - - cuda - rust py_build_cuvs: output: pyproject @@ -89,8 +89,8 @@ files: table: tool.rapids-build-backend key: requires includes: - - rapids_build - build_py_cuvs + - rapids_build py_run_cuvs: output: pyproject pyproject_dir: python/cuvs @@ -98,8 +98,8 @@ files: table: project includes: - cuda_wheels - - run_py_cuvs - depends_on_pylibraft + - run_py_cuvs py_test_cuvs: output: pyproject pyproject_dir: python/cuvs @@ -107,9 +107,9 @@ files: table: project.optional-dependencies key: test includes: + - depends_on_cupy - test_python_common - test_py_cuvs - - cupy py_build_cuvs_bench: output: pyproject pyproject_dir: python/cuvs_bench @@ -368,7 +368,7 @@ dependencies: - nvidia-cusolver - nvidia-cusparse - cupy: + depends_on_cupy: common: - output_types: conda packages: diff --git a/pyproject.toml b/pyproject.toml index fbf4cf41f..417514466 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,7 +25,7 @@ force-exclude = ''' # unlike the match option above this match-dir will have no effect when # pydocstyle is invoked from pre-commit. Therefore this exclusion list must # also be maintained in the pre-commit config file. -match-dir = "^(?!(ci|cpp|conda|docs)).*$" +match-dir = "^(?!(ci|cpp|conda|docs|notebooks)).*$" select = "D201, D204, D206, D207, D208, D209, D210, D211, D214, D215, D300, D301, D302, D403, D405, D406, D407, D408, D409, D410, D411, D412, D414, D418" # Would like to enable the following rules in the future: # D200, D202, D205, D400 @@ -42,6 +42,6 @@ follow_imports = "skip" skip = "./.git,./.github,./cpp/build,.*egg-info.*,./.mypy_cache,.*_skbuild" # ignore short words, and typename parameters like OffsetT ignore-regex = "\\b(.{1,4}|[A-Z]\\w*T)\\b" -ignore-words-list = "inout,numer" +ignore-words-list = "inout,unparseable,numer" builtin = "clear" quiet-level = 3 diff --git a/python/cuvs/setup.cfg b/python/cuvs/setup.cfg deleted file mode 100644 index 57b4954bc..000000000 --- a/python/cuvs/setup.cfg +++ /dev/null @@ -1,39 +0,0 @@ -# Copyright (c) 2024, NVIDIA CORPORATION. - -[isort] -line_length=79 -multi_line_output=3 -include_trailing_comma=True -force_grid_wrap=0 -combine_as_imports=True -order_by_type=True -known_dask= - dask - distributed - dask_cuda -known_rapids= - cuvs - nvtext - cudf - cuml - raft - cugraph - dask_cudf - rmm -known_first_party= - cuvs -default_section=THIRDPARTY -sections=FUTURE,STDLIB,THIRDPARTY,DASK,RAPIDS,FIRSTPARTY,LOCALFOLDER -skip= - thirdparty - .eggs - .git - .hg - .mypy_cache - .tox - .venv - _build - buck-out - build - dist - __init__.py diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index e64641d05..000000000 --- a/setup.cfg +++ /dev/null @@ -1,55 +0,0 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. - -[flake8] -filename = *.py, *.pyx, *.pxd, *.pxi -exclude = __init__.py, *.egg, build, docs, .git -force-check = True -ignore = - # line break before binary operator - W503, - # whitespace before : - E203 -per-file-ignores = - # Rules ignored only in Cython: - # E211: whitespace before '(' (used in multi-line imports) - # E225: Missing whitespace around operators (breaks cython casting syntax like ) - # E226: Missing whitespace around arithmetic operators (breaks cython pointer syntax like int*) - # E227: Missing whitespace around bitwise or shift operator (Can also break casting syntax) - # E275: Missing whitespace after keyword (Doesn't work with Cython except?) - # E402: invalid syntax (works for Python, not Cython) - # E999: invalid syntax (works for Python, not Cython) - # W504: line break after binary operator (breaks lines that end with a pointer) - *.pyx: E211, E225, E226, E227, E275, E402, E999, W504 - *.pxd: E211, E225, E226, E227, E275, E402, E999, W504 - *.pxi: E211, E225, E226, E227, E275, E402, E999, W504 - -[pydocstyle] -# Due to https://github.com/PyCQA/pydocstyle/issues/363, we must exclude rather -# than include using match-dir. Note that as discussed in -# https://stackoverflow.com/questions/65478393/how-to-filter-directories-using-the-match-dir-flag-for-pydocstyle, -# unlike the match option above this match-dir will have no effect when -# pydocstyle is invoked from pre-commit. Therefore this exclusion list must -# also be maintained in the pre-commit config file. -match-dir = ^(?!(ci|cpp|conda|docs|java|notebooks)).*$ -# Allow missing docstrings for docutils -ignore-decorators = .*(docutils|doc_apply|copy_docstring).* -select = - D201, D204, D206, D207, D208, D209, D210, D211, D214, D215, D300, D301, D302, D403, D405, D406, D407, D408, D409, D410, D411, D412, D414, D418 - # Would like to enable the following rules in the future: - # D200, D202, D205, D400 - -[mypy] -ignore_missing_imports = True -# If we don't specify this, then mypy will check excluded files if -# they are imported by a checked file. -follow_imports = skip - -[codespell] -# note: pre-commit passes explicit lists of files here, which this skip file list doesn't override - -# this is only to allow you to run codespell interactively -skip = ./.git,./.github,./cpp/build,.*egg-info.*,./.mypy_cache,.*_skbuild -# ignore short words, and typename parameters like OffsetT -ignore-regex = \b(.{1,4}|[A-Z]\w*T)\b -ignore-words-list = inout,unparseable,numer -builtin = clear -quiet-level = 3 From e3244123a3021f52f1374fdafedbd8f37546d112 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Tue, 7 Jan 2025 11:55:06 -0800 Subject: [PATCH 20/39] Support raft's logger targets (#557) rapidsai/raft#2530 added new targets that we need to make global in cuvs's CMake as well. Authors: - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/cuvs/pull/557 --- cpp/cmake/thirdparty/get_raft.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/cmake/thirdparty/get_raft.cmake b/cpp/cmake/thirdparty/get_raft.cmake index 5def74f4b..2e57df84e 100644 --- a/cpp/cmake/thirdparty/get_raft.cmake +++ b/cpp/cmake/thirdparty/get_raft.cmake @@ -39,7 +39,7 @@ function(find_and_configure_raft) # Invoke CPM find_package() #----------------------------------------------------- rapids_cpm_find(raft ${PKG_VERSION} - GLOBAL_TARGETS raft::raft + GLOBAL_TARGETS raft::raft raft::raft_logger raft::raft_logger_impl BUILD_EXPORT_SET cuvs-exports INSTALL_EXPORT_SET cuvs-exports COMPONENTS ${RAFT_COMPONENTS} From 2a10353fcaa1e1da8429267a542f2c5a6e3412b1 Mon Sep 17 00:00:00 2001 From: Ben Frederickson Date: Tue, 7 Jan 2025 16:45:22 -0800 Subject: [PATCH 21/39] Change brute_force api to match ivf*/cagra (#536) This changes the brute_force knn api to match that of ivf-* and cagra , by adding a search_params and index_params structure to the relevant calls. This allows us to use the dynamic batching code on brute_force knn, as well as provide a more standardized API for our users. Authors: - Ben Frederickson (https://github.com/benfred) Approvers: - Corey J. Nolet (https://github.com/cjnolet) URL: https://github.com/rapidsai/cuvs/pull/536 --- cpp/include/cuvs/neighbors/brute_force.hpp | 139 +++++++++++++----- cpp/src/neighbors/brute_force.cu | 116 ++++++++++----- cpp/src/neighbors/brute_force_c.cpp | 15 +- cpp/src/neighbors/dynamic_batching.cu | 3 + cpp/test/CMakeLists.txt | 1 + cpp/test/neighbors/brute_force.cu | 34 +++-- .../dynamic_batching/test_brute_force.cu | 40 +++++ 7 files changed, 259 insertions(+), 89 deletions(-) create mode 100644 cpp/test/neighbors/dynamic_batching/test_brute_force.cu diff --git a/cpp/include/cuvs/neighbors/brute_force.hpp b/cpp/include/cuvs/neighbors/brute_force.hpp index d040e03db..8fca9da83 100644 --- a/cpp/include/cuvs/neighbors/brute_force.hpp +++ b/cpp/include/cuvs/neighbors/brute_force.hpp @@ -16,7 +16,6 @@ #pragma once -#include "common.hpp" #include #include #include @@ -28,6 +27,10 @@ namespace cuvs::neighbors::brute_force { +struct index_params : cuvs::neighbors::index_params {}; + +struct search_params : cuvs::neighbors::search_params {}; + /** * @defgroup bruteforce_cpp_index Bruteforce index * @{ @@ -41,6 +44,11 @@ namespace cuvs::neighbors::brute_force { */ template struct index : cuvs::neighbors::index { + using index_params_type = brute_force::index_params; + using search_params_type = brute_force::search_params; + using index_type = int64_t; + using value_type = T; + public: index(const index&) = delete; index(index&&) = default; @@ -181,20 +189,26 @@ struct index : cuvs::neighbors::index { * @code{.cpp} * using namespace cuvs::neighbors; * // create and fill the index from a [N, D] dataset - * auto index = brute_force::build(handle, dataset, metric); + * brute_force::index_params index_params; + * auto index = brute_force::build(handle, index_params, dataset); * @endcode * * @param[in] handle + * @param[in] index_params parameters such as the distance metric to use * @param[in] dataset a device pointer to a row-major matrix [n_rows, dim] - * @param[in] metric cuvs::distance::DistanceType - * @param[in] metric_arg metric argument * * @return the constructed brute-force index */ auto build(raft::resources const& handle, - raft::device_matrix_view dataset, - cuvs::distance::DistanceType metric = cuvs::distance::DistanceType::L2Unexpanded, - float metric_arg = 0) -> cuvs::neighbors::brute_force::index; + const cuvs::neighbors::brute_force::index_params& index_params, + raft::device_matrix_view dataset) + -> cuvs::neighbors::brute_force::index; + +[[deprecated]] auto build( + raft::resources const& handle, + raft::device_matrix_view dataset, + cuvs::distance::DistanceType metric = cuvs::distance::DistanceType::L2Unexpanded, + float metric_arg = 0) -> cuvs::neighbors::brute_force::index; /** * @brief Build the index from the dataset for efficient search. * @@ -202,62 +216,78 @@ auto build(raft::resources const& handle, * @code{.cpp} * using namespace cuvs::neighbors; * // create and fill the index from a [N, D] dataset - * auto index = brute_force::build(handle, dataset, metric); + * brute_force::index_params index_params; + * auto index = brute_force::build(handle, index_params, dataset); * @endcode * * @param[in] handle + * @param[in] index_params parameters such as the distance metric to use * @param[in] dataset a device pointer to a row-major matrix [n_rows, dim] - * @param[in] metric cuvs::distance::DistanceType - * @param[in] metric_arg metric argument * - * @return the constructed ivf-flat index + * @return the constructed brute force index */ auto build(raft::resources const& handle, - raft::device_matrix_view dataset, - cuvs::distance::DistanceType metric = cuvs::distance::DistanceType::L2Unexpanded, - float metric_arg = 0) -> cuvs::neighbors::brute_force::index; + const cuvs::neighbors::brute_force::index_params& index_params, + raft::device_matrix_view dataset) + -> cuvs::neighbors::brute_force::index; + +[[deprecated]] auto build( + raft::resources const& handle, + raft::device_matrix_view dataset, + cuvs::distance::DistanceType metric = cuvs::distance::DistanceType::L2Unexpanded, + float metric_arg = 0) -> cuvs::neighbors::brute_force::index; + /** * @brief Build the index from the dataset for efficient search. * * Usage example: * @code{.cpp} - * using namespace cuvs::neighbors; - * // create and fill the index from a [N, D] dataset - * auto index = brute_force::build(handle, dataset, metric); + * brute_force::index_params index_params; + * auto index = brute_force::build(handle, index_params, dataset); * @endcode * * @param[in] handle - * @param[in] dataset a device pointer to a col-major matrix [n_rows, dim] - * @param[in] metric cuvs::distance::DistanceType - * @param[in] metric_arg metric argument + * @param[in] index_params parameters such as the distance metric to use + * @param[in] dataset a device pointer to a row-major matrix [n_rows, dim] * - * @return the constructed bruteforce index + * @return the constructed brute force index */ auto build(raft::resources const& handle, - raft::device_matrix_view dataset, - cuvs::distance::DistanceType metric = cuvs::distance::DistanceType::L2Unexpanded, - float metric_arg = 0) -> cuvs::neighbors::brute_force::index; + const cuvs::neighbors::brute_force::index_params& index_params, + raft::device_matrix_view dataset) + -> cuvs::neighbors::brute_force::index; + +[[deprecated]] auto build( + raft::resources const& handle, + raft::device_matrix_view dataset, + cuvs::distance::DistanceType metric = cuvs::distance::DistanceType::L2Unexpanded, + float metric_arg = 0) -> cuvs::neighbors::brute_force::index; + /** * @brief Build the index from the dataset for efficient search. * * Usage example: * @code{.cpp} - * using namespace cuvs::neighbors; - * // create and fill the index from a [N, D] dataset - * auto index = brute_force::build(handle, dataset, metric); + * brute_force::index_params index_params; + * auto index = brute_force::build(handle, index_params, dataset); * @endcode * * @param[in] handle - * @param[in] dataset a device pointer to a col-major matrix [n_rows, dim] - * @param[in] metric cuvs::distance::DistanceType - * @param[in] metric_arg metric argument + * @param[in] index_params parameters such as the distance metric to use + * @param[in] dataset a device pointer to a row-major matrix [n_rows, dim] * - * @return the constructed bruteforce index + * @return the constructed brute force index */ auto build(raft::resources const& handle, - raft::device_matrix_view dataset, - cuvs::distance::DistanceType metric = cuvs::distance::DistanceType::L2Unexpanded, - float metric_arg = 0) -> cuvs::neighbors::brute_force::index; + const cuvs::neighbors::brute_force::index_params& index_params, + raft::device_matrix_view dataset) + -> cuvs::neighbors::brute_force::index; + +[[deprecated]] auto build( + raft::resources const& handle, + raft::device_matrix_view dataset, + cuvs::distance::DistanceType metric = cuvs::distance::DistanceType::L2Unexpanded, + float metric_arg = 0) -> cuvs::neighbors::brute_force::index; /** * @} */ @@ -286,6 +316,7 @@ auto build(raft::resources const& handle, * @endcode * * @param[in] handle + * @param[in] params parameters configuring the search * @param[in] index brute-force constructed index * @param[in] queries a device pointer to a row-major matrix [n_queries, index->dim()] * @param[out] neighbors a device pointer to the indices of the neighbors in the source dataset @@ -296,6 +327,7 @@ auto build(raft::resources const& handle, * `index->size()` bits to indicate whether queries[0] should compute the distance with dataset. */ void search(raft::resources const& handle, + const cuvs::neighbors::brute_force::search_params& params, const cuvs::neighbors::brute_force::index& index, raft::device_matrix_view queries, raft::device_matrix_view neighbors, @@ -303,6 +335,14 @@ void search(raft::resources const& handle, const cuvs::neighbors::filtering::base_filter& sample_filter = cuvs::neighbors::filtering::none_sample_filter{}); +[[deprecated]] void search(raft::resources const& handle, + const cuvs::neighbors::brute_force::index& index, + raft::device_matrix_view queries, + raft::device_matrix_view neighbors, + raft::device_matrix_view distances, + const cuvs::neighbors::filtering::base_filter& sample_filter = + cuvs::neighbors::filtering::none_sample_filter{}); + /** * @brief Search ANN using the constructed index. * @@ -323,6 +363,7 @@ void search(raft::resources const& handle, * @endcode * * @param[in] handle + * @param[in] params parameters configuring the search * @param[in] index ivf-flat constructed index * @param[in] queries a device pointer to a row-major matrix [n_queries, index->dim()] * @param[out] neighbors a device pointer to the indices of the neighbors in the source dataset @@ -332,18 +373,28 @@ void search(raft::resources const& handle, * given */ void search(raft::resources const& handle, + const cuvs::neighbors::brute_force::search_params& params, const cuvs::neighbors::brute_force::index& index, raft::device_matrix_view queries, raft::device_matrix_view neighbors, raft::device_matrix_view distances, const cuvs::neighbors::filtering::base_filter& sample_filter = cuvs::neighbors::filtering::none_sample_filter{}); + +[[deprecated]] void search(raft::resources const& handle, + const cuvs::neighbors::brute_force::index& index, + raft::device_matrix_view queries, + raft::device_matrix_view neighbors, + raft::device_matrix_view distances, + const cuvs::neighbors::filtering::base_filter& sample_filter = + cuvs::neighbors::filtering::none_sample_filter{}); /** * @brief Search ANN using the constructed index. * * See the [brute_force::build](#brute_force::build) documentation for a usage example. * * @param[in] handle + * @param[in] params parameters configuring the search * @param[in] index bruteforce constructed index * @param[in] queries a device pointer to a col-major matrix [n_queries, index->dim()] * @param[out] neighbors a device pointer to the indices of the neighbors in the source dataset @@ -353,18 +404,28 @@ void search(raft::resources const& handle, * given query */ void search(raft::resources const& handle, + const cuvs::neighbors::brute_force::search_params& params, const cuvs::neighbors::brute_force::index& index, raft::device_matrix_view queries, raft::device_matrix_view neighbors, raft::device_matrix_view distances, const cuvs::neighbors::filtering::base_filter& sample_filter = cuvs::neighbors::filtering::none_sample_filter{}); + +[[deprecated]] void search(raft::resources const& handle, + const cuvs::neighbors::brute_force::index& index, + raft::device_matrix_view queries, + raft::device_matrix_view neighbors, + raft::device_matrix_view distances, + const cuvs::neighbors::filtering::base_filter& sample_filter = + cuvs::neighbors::filtering::none_sample_filter{}); /** * @brief Search ANN using the constructed index. * * See the [brute_force::build](#brute_force::build) documentation for a usage example. * * @param[in] handle + * @param[in] params parameters configuring the search * @param[in] index bruteforce constructed index * @param[in] queries a device pointer to a col-major matrix [n_queries, index->dim()] * @param[out] neighbors a device pointer to the indices of the neighbors in the source dataset @@ -374,12 +435,21 @@ void search(raft::resources const& handle, * given query */ void search(raft::resources const& handle, + const cuvs::neighbors::brute_force::search_params& params, const cuvs::neighbors::brute_force::index& index, raft::device_matrix_view queries, raft::device_matrix_view neighbors, raft::device_matrix_view distances, const cuvs::neighbors::filtering::base_filter& sample_filter = cuvs::neighbors::filtering::none_sample_filter{}); + +[[deprecated]] void search(raft::resources const& handle, + const cuvs::neighbors::brute_force::index& index, + raft::device_matrix_view queries, + raft::device_matrix_view neighbors, + raft::device_matrix_view distances, + const cuvs::neighbors::filtering::base_filter& sample_filter = + cuvs::neighbors::filtering::none_sample_filter{}); /** * @} */ @@ -472,6 +542,7 @@ struct sparse_search_params { * @brief Search the sparse bruteforce index for nearest neighbors * * @param[in] handle + * @param[in] params parameters configuring the search * @param[in] index Sparse brute-force constructed index * @param[in] queries a sparse CSR matrix on the device to query * @param[out] neighbors a device pointer to the indices of the neighbors in the source dataset diff --git a/cpp/src/neighbors/brute_force.cu b/cpp/src/neighbors/brute_force.cu index d534676e3..a9980a390 100644 --- a/cpp/src/neighbors/brute_force.cu +++ b/cpp/src/neighbors/brute_force.cu @@ -160,45 +160,81 @@ void index::update_dataset( dataset_view_ = raft::make_const_mdspan(dataset_.view()); } -#define CUVS_INST_BFKNN(T, DistT) \ - auto build(raft::resources const& res, \ - raft::device_matrix_view dataset, \ - cuvs::distance::DistanceType metric, \ - DistT metric_arg) \ - ->cuvs::neighbors::brute_force::index \ - { \ - return detail::build(res, dataset, metric, metric_arg); \ - } \ - auto build(raft::resources const& res, \ - raft::device_matrix_view dataset, \ - cuvs::distance::DistanceType metric, \ - DistT metric_arg) \ - ->cuvs::neighbors::brute_force::index \ - { \ - return detail::build(res, dataset, metric, metric_arg); \ - } \ - \ - void search(raft::resources const& res, \ - const cuvs::neighbors::brute_force::index& idx, \ - raft::device_matrix_view queries, \ - raft::device_matrix_view neighbors, \ - raft::device_matrix_view distances, \ - const cuvs::neighbors::filtering::base_filter& sample_filter) \ - { \ - detail::search( \ - res, idx, queries, neighbors, distances, sample_filter); \ - } \ - void search(raft::resources const& res, \ - const cuvs::neighbors::brute_force::index& idx, \ - raft::device_matrix_view queries, \ - raft::device_matrix_view neighbors, \ - raft::device_matrix_view distances, \ - const cuvs::neighbors::filtering::base_filter& sample_filter) \ - { \ - detail::search( \ - res, idx, queries, neighbors, distances, sample_filter); \ - } \ - \ +#define CUVS_INST_BFKNN(T, DistT) \ + auto build(raft::resources const& res, \ + const cuvs::neighbors::brute_force::index_params& index_params, \ + raft::device_matrix_view dataset) \ + ->cuvs::neighbors::brute_force::index \ + { \ + return detail::build(res, dataset, index_params.metric, index_params.metric_arg); \ + } \ + auto build(raft::resources const& res, \ + raft::device_matrix_view dataset, \ + cuvs::distance::DistanceType metric, \ + DistT metric_arg) \ + ->cuvs::neighbors::brute_force::index \ + { \ + return detail::build(res, dataset, metric, metric_arg); \ + } \ + auto build(raft::resources const& res, \ + const cuvs::neighbors::brute_force::index_params& index_params, \ + raft::device_matrix_view dataset) \ + ->cuvs::neighbors::brute_force::index \ + { \ + return detail::build(res, dataset, index_params.metric, index_params.metric_arg); \ + } \ + auto build(raft::resources const& res, \ + raft::device_matrix_view dataset, \ + cuvs::distance::DistanceType metric, \ + DistT metric_arg) \ + ->cuvs::neighbors::brute_force::index \ + { \ + return detail::build(res, dataset, metric, metric_arg); \ + } \ + \ + void search(raft::resources const& res, \ + const cuvs::neighbors::brute_force::search_params& params, \ + const cuvs::neighbors::brute_force::index& idx, \ + raft::device_matrix_view queries, \ + raft::device_matrix_view neighbors, \ + raft::device_matrix_view distances, \ + const cuvs::neighbors::filtering::base_filter& sample_filter) \ + { \ + detail::search( \ + res, idx, queries, neighbors, distances, sample_filter); \ + } \ + void search(raft::resources const& res, \ + const cuvs::neighbors::brute_force::index& idx, \ + raft::device_matrix_view queries, \ + raft::device_matrix_view neighbors, \ + raft::device_matrix_view distances, \ + const cuvs::neighbors::filtering::base_filter& sample_filter) \ + { \ + detail::search( \ + res, idx, queries, neighbors, distances, sample_filter); \ + } \ + void search(raft::resources const& res, \ + const cuvs::neighbors::brute_force::search_params& params, \ + const cuvs::neighbors::brute_force::index& idx, \ + raft::device_matrix_view queries, \ + raft::device_matrix_view neighbors, \ + raft::device_matrix_view distances, \ + const cuvs::neighbors::filtering::base_filter& sample_filter) \ + { \ + detail::search( \ + res, idx, queries, neighbors, distances, sample_filter); \ + } \ + void search(raft::resources const& res, \ + const cuvs::neighbors::brute_force::index& idx, \ + raft::device_matrix_view queries, \ + raft::device_matrix_view neighbors, \ + raft::device_matrix_view distances, \ + const cuvs::neighbors::filtering::base_filter& sample_filter) \ + { \ + detail::search( \ + res, idx, queries, neighbors, distances, sample_filter); \ + } \ + \ template struct cuvs::neighbors::brute_force::index; CUVS_INST_BFKNN(float, float); @@ -206,4 +242,4 @@ CUVS_INST_BFKNN(half, float); #undef CUVS_INST_BFKNN -} // namespace cuvs::neighbors::brute_force \ No newline at end of file +} // namespace cuvs::neighbors::brute_force diff --git a/cpp/src/neighbors/brute_force_c.cpp b/cpp/src/neighbors/brute_force_c.cpp index f1a8c995d..2b8980863 100644 --- a/cpp/src/neighbors/brute_force_c.cpp +++ b/cpp/src/neighbors/brute_force_c.cpp @@ -44,10 +44,12 @@ void* _build(cuvsResources_t res, using mdspan_type = raft::device_matrix_view; auto mds = cuvs::core::from_dlpack(dataset_tensor); - auto index_on_stack = cuvs::neighbors::brute_force::build( - *res_ptr, mds, static_cast((int)metric), metric_arg); - auto index_on_heap = new cuvs::neighbors::brute_force::index(std::move(index_on_stack)); + cuvs::neighbors::brute_force::index_params params; + params.metric = metric; + params.metric_arg = metric_arg; + auto index_on_stack = cuvs::neighbors::brute_force::build(*res_ptr, params, mds); + auto index_on_heap = new cuvs::neighbors::brute_force::index(std::move(index_on_stack)); return index_on_heap; } @@ -72,8 +74,11 @@ void _search(cuvsResources_t res, auto neighbors_mds = cuvs::core::from_dlpack(neighbors_tensor); auto distances_mds = cuvs::core::from_dlpack(distances_tensor); + cuvs::neighbors::brute_force::search_params params; + if (prefilter.type == NO_FILTER) { cuvs::neighbors::brute_force::search(*res_ptr, + params, *index_ptr, queries_mds, neighbors_mds, @@ -87,7 +92,7 @@ void _search(cuvsResources_t res, queries_mds.extent(0), index_ptr->dataset().extent(0))); cuvs::neighbors::brute_force::search( - *res_ptr, *index_ptr, queries_mds, neighbors_mds, distances_mds, prefilter_view); + *res_ptr, params, *index_ptr, queries_mds, neighbors_mds, distances_mds, prefilter_view); } else { RAFT_FAIL("Unsupported prefilter type: BITSET"); } @@ -226,4 +231,4 @@ extern "C" cuvsError_t cuvsBruteForceSerialize(cuvsResources_t res, RAFT_FAIL("Unsupported index dtype: %d and bits: %d", index->dtype.code, index->dtype.bits); } }); -} \ No newline at end of file +} diff --git a/cpp/src/neighbors/dynamic_batching.cu b/cpp/src/neighbors/dynamic_batching.cu index 6be70353b..84c8a2cf1 100644 --- a/cpp/src/neighbors/dynamic_batching.cu +++ b/cpp/src/neighbors/dynamic_batching.cu @@ -16,6 +16,7 @@ #include "detail/dynamic_batching.cuh" +#include #include #include #include @@ -53,6 +54,8 @@ namespace cuvs::neighbors::dynamic_batching { return index.runner->search(res, params, queries, neighbors, distances); \ } +CUVS_INST_DYNAMIC_BATCHING_INDEX(float, int64_t, cuvs::neighbors::brute_force, index); + CUVS_INST_DYNAMIC_BATCHING_INDEX(float, uint32_t, cuvs::neighbors::cagra, index); CUVS_INST_DYNAMIC_BATCHING_INDEX(half, uint32_t, cuvs::neighbors::cagra, index); CUVS_INST_DYNAMIC_BATCHING_INDEX(int8_t, uint32_t, cuvs::neighbors::cagra, index); diff --git a/cpp/test/CMakeLists.txt b/cpp/test/CMakeLists.txt index cca061455..9aa596a6e 100644 --- a/cpp/test/CMakeLists.txt +++ b/cpp/test/CMakeLists.txt @@ -183,6 +183,7 @@ if(BUILD_TESTS) NAME NEIGHBORS_DYNAMIC_BATCHING_TEST PATH + neighbors/dynamic_batching/test_brute_force.cu neighbors/dynamic_batching/test_cagra.cu neighbors/dynamic_batching/test_ivf_flat.cu neighbors/dynamic_batching/test_ivf_pq.cu diff --git a/cpp/test/neighbors/brute_force.cu b/cpp/test/neighbors/brute_force.cu index 2cefb1098..b1c819a26 100644 --- a/cpp/test/neighbors/brute_force.cu +++ b/cpp/test/neighbors/brute_force.cu @@ -89,10 +89,18 @@ class KNNTest : public ::testing::TestWithParam> { auto distances = raft::make_device_matrix_view(distances_.data(), rows_, k_); - auto metric = cuvs::distance::DistanceType::L2Unexpanded; - auto idx = cuvs::neighbors::brute_force::build(handle, index, metric); - cuvs::neighbors::brute_force::search( - handle, idx, search, indices, distances, cuvs::neighbors::filtering::none_sample_filter{}); + cuvs::neighbors::brute_force::index_params index_params; + index_params.metric = cuvs::distance::DistanceType::L2Unexpanded; + + auto idx = cuvs::neighbors::brute_force::build(handle, index_params, index); + cuvs::neighbors::brute_force::search_params search_params; + cuvs::neighbors::brute_force::search(handle, + search_params, + idx, + search, + indices, + distances, + cuvs::neighbors::filtering::none_sample_filter{}); build_actual_output<<>>( actual_labels_.data(), rows_, k_, search_labels_.data(), indices_.data()); @@ -385,16 +393,22 @@ class RandomBruteForceKNNTest : public ::testing::TestWithParam auto distances = raft::make_device_matrix_view( cuvs_distances_.data(), params_.num_queries, params_.k); + cuvs::neighbors::brute_force::index_params index_params; + index_params.metric = metric; + index_params.metric_arg = metric_arg; + + cuvs::neighbors::brute_force::search_params search_params; + if (params_.row_major) { auto idx = cuvs::neighbors::brute_force::build(handle_, + index_params, raft::make_device_matrix_view( - database.data(), params_.num_db_vecs, params_.dim), - metric, - metric_arg); + database.data(), params_.num_db_vecs, params_.dim)); cuvs::neighbors::brute_force::search( handle_, + search_params, idx, raft::make_device_matrix_view( search_queries.data(), params_.num_queries, params_.dim), @@ -404,13 +418,13 @@ class RandomBruteForceKNNTest : public ::testing::TestWithParam } else { auto idx = cuvs::neighbors::brute_force::build( handle_, + index_params, raft::make_device_matrix_view( - database.data(), params_.num_db_vecs, params_.dim), - metric, - metric_arg); + database.data(), params_.num_db_vecs, params_.dim)); cuvs::neighbors::brute_force::search( handle_, + search_params, idx, raft::make_device_matrix_view( search_queries.data(), params_.num_queries, params_.dim), diff --git a/cpp/test/neighbors/dynamic_batching/test_brute_force.cu b/cpp/test/neighbors/dynamic_batching/test_brute_force.cu new file mode 100644 index 000000000..11f468374 --- /dev/null +++ b/cpp/test/neighbors/dynamic_batching/test_brute_force.cu @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include "../dynamic_batching.cuh" + +#include + +namespace cuvs::neighbors::dynamic_batching { + +using brute_force_float32 = dynamic_batching_test, + brute_force::build, + brute_force::search>; + +TEST_P(brute_force_float32, defaults) +{ + build_all(); + search_all(); + check_neighbors(); +} + +INSTANTIATE_TEST_CASE_P(dynamic_batching, brute_force_float32, ::testing::ValuesIn(inputs)); + +} // namespace cuvs::neighbors::dynamic_batching From 1e548f8c3a773452ce69556f4db72fc712efae02 Mon Sep 17 00:00:00 2001 From: Ben Frederickson Date: Fri, 10 Jan 2025 07:38:42 -0800 Subject: [PATCH 22/39] Allow brute_force::build to work on host matrix dataset (#562) Closes #538 Authors: - Ben Frederickson (https://github.com/benfred) Approvers: - Micka (https://github.com/lowener) URL: https://github.com/rapidsai/cuvs/pull/562 --- cpp/include/cuvs/neighbors/brute_force.hpp | 28 ++++ cpp/src/neighbors/brute_force.cu | 7 + cpp/src/neighbors/detail/knn_brute_force.cuh | 22 ++- cpp/test/neighbors/brute_force.cu | 142 ++++++++++++------- 4 files changed, 140 insertions(+), 59 deletions(-) diff --git a/cpp/include/cuvs/neighbors/brute_force.hpp b/cpp/include/cuvs/neighbors/brute_force.hpp index 8fca9da83..99581469f 100644 --- a/cpp/include/cuvs/neighbors/brute_force.hpp +++ b/cpp/include/cuvs/neighbors/brute_force.hpp @@ -204,6 +204,20 @@ auto build(raft::resources const& handle, raft::device_matrix_view dataset) -> cuvs::neighbors::brute_force::index; +/** + * @brief Build the index from the dataset for efficient search. + * + * @param[in] handle + * @param[in] index_params parameters such as the distance metric to use + * @param[in] dataset a host pointer to a row-major matrix [n_rows, dim] + * + * @return the constructed brute-force index + */ +auto build(raft::resources const& handle, + const cuvs::neighbors::brute_force::index_params& index_params, + raft::host_matrix_view dataset) + -> cuvs::neighbors::brute_force::index; + [[deprecated]] auto build( raft::resources const& handle, raft::device_matrix_view dataset, @@ -231,6 +245,20 @@ auto build(raft::resources const& handle, raft::device_matrix_view dataset) -> cuvs::neighbors::brute_force::index; +/** + * @brief Build the index from the dataset for efficient search. + * + * @param[in] handle + * @param[in] index_params parameters such as the distance metric to use + * @param[in] dataset a host pointer to a row-major matrix [n_rows, dim] + * + * @return the constructed brute-force index + */ +auto build(raft::resources const& handle, + const cuvs::neighbors::brute_force::index_params& index_params, + raft::host_matrix_view dataset) + -> cuvs::neighbors::brute_force::index; + [[deprecated]] auto build( raft::resources const& handle, raft::device_matrix_view dataset, diff --git a/cpp/src/neighbors/brute_force.cu b/cpp/src/neighbors/brute_force.cu index a9980a390..d54a75879 100644 --- a/cpp/src/neighbors/brute_force.cu +++ b/cpp/src/neighbors/brute_force.cu @@ -168,6 +168,13 @@ void index::update_dataset( { \ return detail::build(res, dataset, index_params.metric, index_params.metric_arg); \ } \ + auto build(raft::resources const& res, \ + const cuvs::neighbors::brute_force::index_params& index_params, \ + raft::host_matrix_view dataset) \ + ->cuvs::neighbors::brute_force::index \ + { \ + return detail::build(res, dataset, index_params.metric, index_params.metric_arg); \ + } \ auto build(raft::resources const& res, \ raft::device_matrix_view dataset, \ cuvs::distance::DistanceType metric, \ diff --git a/cpp/src/neighbors/detail/knn_brute_force.cuh b/cpp/src/neighbors/detail/knn_brute_force.cuh index e5eeecbc9..f1976e002 100644 --- a/cpp/src/neighbors/detail/knn_brute_force.cuh +++ b/cpp/src/neighbors/detail/knn_brute_force.cuh @@ -28,6 +28,7 @@ #include "./knn_utils.cuh" #include +#include #include #include #include @@ -750,10 +751,10 @@ void search(raft::resources const& res, } } -template +template cuvs::neighbors::brute_force::index build( raft::resources const& res, - raft::device_matrix_view dataset, + mdspan, LayoutT, AccessorT> dataset, cuvs::distance::DistanceType metric, DistT metric_arg) { @@ -764,18 +765,31 @@ cuvs::neighbors::brute_force::index build( if (metric == cuvs::distance::DistanceType::L2Expanded || metric == cuvs::distance::DistanceType::L2SqrtExpanded || metric == cuvs::distance::DistanceType::CosineExpanded) { + auto dataset_storage = std::optional>{}; + auto dataset_view = [&res, &dataset_storage, dataset]() { + if constexpr (std::is_same_v>) { + return dataset; + } else { + dataset_storage = + make_device_matrix(res, dataset.extent(0), dataset.extent(1)); + raft::copy(res, dataset_storage->view(), dataset); + return raft::make_const_mdspan(dataset_storage->view()); + } + }(); + norms = raft::make_device_vector(res, dataset.extent(0)); // cosine needs the l2norm, where as l2 distances needs the squared norm if (metric == cuvs::distance::DistanceType::CosineExpanded) { raft::linalg::norm(res, - dataset, + dataset_view, norms->view(), raft::linalg::NormType::L2Norm, raft::linalg::Apply::ALONG_ROWS, raft::sqrt_op{}); } else { raft::linalg::norm(res, - dataset, + dataset_view, norms->view(), raft::linalg::NormType::L2Norm, raft::linalg::Apply::ALONG_ROWS); diff --git a/cpp/test/neighbors/brute_force.cu b/cpp/test/neighbors/brute_force.cu index b1c819a26..a9ad4bf1c 100644 --- a/cpp/test/neighbors/brute_force.cu +++ b/cpp/test/neighbors/brute_force.cu @@ -21,6 +21,7 @@ #include #include +#include #include #include #include @@ -210,6 +211,7 @@ struct RandomKNNInputs { int k; cuvs::distance::DistanceType metric; bool row_major; + bool host_dataset; }; std::ostream& operator<<(std::ostream& os, const RandomKNNInputs& input) @@ -217,7 +219,7 @@ std::ostream& operator<<(std::ostream& os, const RandomKNNInputs& input) return os << "num_queries:" << input.num_queries << " num_vecs:" << input.num_db_vecs << " dim:" << input.dim << " k:" << input.k << " metric:" << cuvs::neighbors::print_metric{input.metric} - << " row_major:" << input.row_major; + << " row_major:" << input.row_major << " host_dataset:" << input.host_dataset; } template @@ -399,12 +401,15 @@ class RandomBruteForceKNNTest : public ::testing::TestWithParam cuvs::neighbors::brute_force::search_params search_params; - if (params_.row_major) { - auto idx = - cuvs::neighbors::brute_force::build(handle_, - index_params, - raft::make_device_matrix_view( - database.data(), params_.num_db_vecs, params_.dim)); + if (params_.host_dataset) { + // test building from a dataset in host memory + auto host_database = + raft::make_host_matrix(params_.num_db_vecs, params_.dim); + raft::copy( + host_database.data_handle(), database.data(), params_.num_db_vecs * params_.dim, stream_); + + auto idx = cuvs::neighbors::brute_force::build( + handle_, index_params, raft::make_const_mdspan(host_database.view())); cuvs::neighbors::brute_force::search( handle_, @@ -416,21 +421,39 @@ class RandomBruteForceKNNTest : public ::testing::TestWithParam distances, cuvs::neighbors::filtering::none_sample_filter{}); } else { - auto idx = cuvs::neighbors::brute_force::build( - handle_, - index_params, - raft::make_device_matrix_view( - database.data(), params_.num_db_vecs, params_.dim)); + if (params_.row_major) { + auto idx = + cuvs::neighbors::brute_force::build(handle_, + index_params, + raft::make_device_matrix_view( + database.data(), params_.num_db_vecs, params_.dim)); - cuvs::neighbors::brute_force::search( - handle_, - search_params, - idx, - raft::make_device_matrix_view( - search_queries.data(), params_.num_queries, params_.dim), - indices, - distances, - cuvs::neighbors::filtering::none_sample_filter{}); + cuvs::neighbors::brute_force::search( + handle_, + search_params, + idx, + raft::make_device_matrix_view( + search_queries.data(), params_.num_queries, params_.dim), + indices, + distances, + cuvs::neighbors::filtering::none_sample_filter{}); + } else { + auto idx = cuvs::neighbors::brute_force::build( + handle_, + index_params, + raft::make_device_matrix_view( + database.data(), params_.num_db_vecs, params_.dim)); + + cuvs::neighbors::brute_force::search( + handle_, + search_params, + idx, + raft::make_device_matrix_view( + search_queries.data(), params_.num_queries, params_.dim), + indices, + distances, + cuvs::neighbors::filtering::none_sample_filter{}); + } } ASSERT_TRUE(cuvs::neighbors::devArrMatchKnnPair(ref_indices_.data(), @@ -480,42 +503,51 @@ class RandomBruteForceKNNTest : public ::testing::TestWithParam const std::vector random_inputs = { // test each distance metric on a small-ish input, with row-major inputs - {100, 256, 2, 65, cuvs::distance::DistanceType::L2Expanded, true}, - {256, 512, 16, 8, cuvs::distance::DistanceType::L2Unexpanded, true}, - {256, 512, 16, 8, cuvs::distance::DistanceType::L2SqrtExpanded, true}, - {256, 512, 16, 8, cuvs::distance::DistanceType::L2SqrtUnexpanded, true}, - {256, 512, 16, 8, cuvs::distance::DistanceType::L1, true}, - {256, 512, 16, 8, cuvs::distance::DistanceType::Linf, true}, - {256, 512, 16, 8, cuvs::distance::DistanceType::InnerProduct, true}, - {256, 512, 16, 8, cuvs::distance::DistanceType::CorrelationExpanded, true}, - {256, 512, 16, 8, cuvs::distance::DistanceType::CosineExpanded, true}, - {256, 512, 16, 8, cuvs::distance::DistanceType::LpUnexpanded, true}, - {256, 512, 16, 8, cuvs::distance::DistanceType::JensenShannon, true}, - {256, 512, 16, 8, cuvs::distance::DistanceType::L2SqrtExpanded, true}, - {256, 512, 16, 8, cuvs::distance::DistanceType::Canberra, true}, + {100, 256, 2, 65, cuvs::distance::DistanceType::L2Expanded, true, false}, + {256, 512, 16, 8, cuvs::distance::DistanceType::L2Unexpanded, true, false}, + {256, 512, 16, 8, cuvs::distance::DistanceType::L2SqrtExpanded, true, false}, + {256, 512, 16, 8, cuvs::distance::DistanceType::L2SqrtUnexpanded, true, false}, + {256, 512, 16, 8, cuvs::distance::DistanceType::L1, true, false}, + {256, 512, 16, 8, cuvs::distance::DistanceType::Linf, true, false}, + {256, 512, 16, 8, cuvs::distance::DistanceType::InnerProduct, true, false}, + {256, 512, 16, 8, cuvs::distance::DistanceType::CorrelationExpanded, true, false}, + {256, 512, 16, 8, cuvs::distance::DistanceType::CosineExpanded, true, false}, + {256, 512, 16, 8, cuvs::distance::DistanceType::LpUnexpanded, true, false}, + {256, 512, 16, 8, cuvs::distance::DistanceType::JensenShannon, true, false}, + {256, 512, 16, 8, cuvs::distance::DistanceType::L2SqrtExpanded, true, false}, + {256, 512, 16, 8, cuvs::distance::DistanceType::Canberra, true, false}, // test each distance metric with col-major inputs - {256, 512, 16, 7, cuvs::distance::DistanceType::L2Expanded, false}, - {256, 512, 16, 8, cuvs::distance::DistanceType::L2Unexpanded, false}, - {256, 512, 16, 8, cuvs::distance::DistanceType::L2SqrtExpanded, false}, - {256, 512, 16, 8, cuvs::distance::DistanceType::L2SqrtUnexpanded, false}, - {256, 512, 16, 8, cuvs::distance::DistanceType::L1, false}, - {256, 512, 16, 8, cuvs::distance::DistanceType::Linf, false}, - {256, 512, 16, 8, cuvs::distance::DistanceType::InnerProduct, false}, - {256, 512, 16, 8, cuvs::distance::DistanceType::CorrelationExpanded, false}, - {256, 512, 16, 8, cuvs::distance::DistanceType::CosineExpanded, false}, - {256, 512, 16, 8, cuvs::distance::DistanceType::LpUnexpanded, false}, - {256, 512, 16, 8, cuvs::distance::DistanceType::JensenShannon, false}, - {256, 512, 16, 8, cuvs::distance::DistanceType::L2SqrtExpanded, false}, - {256, 512, 16, 8, cuvs::distance::DistanceType::Canberra, false}, + {256, 512, 16, 7, cuvs::distance::DistanceType::L2Expanded, false, false}, + {256, 512, 16, 8, cuvs::distance::DistanceType::L2Unexpanded, false, false}, + {256, 512, 16, 8, cuvs::distance::DistanceType::L2SqrtExpanded, false, false}, + {256, 512, 16, 8, cuvs::distance::DistanceType::L2SqrtUnexpanded, false, false}, + {256, 512, 16, 8, cuvs::distance::DistanceType::L1, false, false}, + {256, 512, 16, 8, cuvs::distance::DistanceType::Linf, false, false}, + {256, 512, 16, 8, cuvs::distance::DistanceType::InnerProduct, false, false}, + {256, 512, 16, 8, cuvs::distance::DistanceType::CorrelationExpanded, false, false}, + {256, 512, 16, 8, cuvs::distance::DistanceType::CosineExpanded, false, false}, + {256, 512, 16, 8, cuvs::distance::DistanceType::LpUnexpanded, false, false}, + {256, 512, 16, 8, cuvs::distance::DistanceType::JensenShannon, false, false}, + {256, 512, 16, 8, cuvs::distance::DistanceType::L2SqrtExpanded, false, false}, + {256, 512, 16, 8, cuvs::distance::DistanceType::Canberra, false, false}, // larger tests on different sized data / k values - {10000, 40000, 32, 30, cuvs::distance::DistanceType::L2Expanded, false}, - {345, 1023, 16, 128, cuvs::distance::DistanceType::CosineExpanded, true}, - {789, 20516, 64, 256, cuvs::distance::DistanceType::L2SqrtExpanded, false}, - {1000, 200000, 128, 128, cuvs::distance::DistanceType::L2Expanded, true}, - {1000, 200000, 128, 128, cuvs::distance::DistanceType::L2Expanded, false}, - {1000, 5000, 128, 128, cuvs::distance::DistanceType::LpUnexpanded, true}, - {1000, 5000, 128, 128, cuvs::distance::DistanceType::L2SqrtExpanded, false}, - {1000, 5000, 128, 128, cuvs::distance::DistanceType::InnerProduct, false}}; + {10000, 40000, 32, 30, cuvs::distance::DistanceType::L2Expanded, false, false}, + {345, 1023, 16, 128, cuvs::distance::DistanceType::CosineExpanded, true, false}, + {789, 20516, 64, 256, cuvs::distance::DistanceType::L2SqrtExpanded, false, false}, + {1000, 200000, 128, 128, cuvs::distance::DistanceType::L2Expanded, true, false}, + {1000, 200000, 128, 128, cuvs::distance::DistanceType::L2Expanded, false, false}, + {1000, 5000, 128, 128, cuvs::distance::DistanceType::LpUnexpanded, true, false}, + {1000, 5000, 128, 128, cuvs::distance::DistanceType::L2SqrtExpanded, false, false}, + {1000, 5000, 128, 128, cuvs::distance::DistanceType::InnerProduct, false, false}, + // test with datasets on host memory + {256, 512, 16, 8, cuvs::distance::DistanceType::L2Expanded, true, true}, + {256, 512, 32, 16, cuvs::distance::DistanceType::L2Unexpanded, true, true}, + {256, 512, 8, 8, cuvs::distance::DistanceType::L2SqrtExpanded, true, true}, + {256, 128, 32, 8, cuvs::distance::DistanceType::L2SqrtUnexpanded, true, true}, + {256, 512, 16, 8, cuvs::distance::DistanceType::L1, true, true}, + {256, 512, 16, 8, cuvs::distance::DistanceType::Linf, true, true}, + {256, 512, 16, 8, cuvs::distance::DistanceType::InnerProduct, true, true}, + {256, 512, 16, 7, cuvs::distance::DistanceType::L2Expanded, true, true}}; typedef RandomBruteForceKNNTest RandomBruteForceKNNTestF; TEST_P(RandomBruteForceKNNTestF, BruteForce) { this->testBruteForce(); } From 28d9990821e26b9bef7b452d9f797bec2972a92e Mon Sep 17 00:00:00 2001 From: Micka Date: Mon, 13 Jan 2025 20:38:03 +0100 Subject: [PATCH 23/39] Add support for refinement with `uint32_t` index type (#563) Closes #537. Needed change for the transition from Raft to cuVS. Authors: - Micka (https://github.com/lowener) Approvers: - Ben Frederickson (https://github.com/benfred) URL: https://github.com/rapidsai/cuvs/pull/563 --- cpp/include/cuvs/neighbors/refine.hpp | 45 +++++++++++++++++++ cpp/src/neighbors/ivf_flat_index.cpp | 1 + .../detail/refine_device_float_float.cu | 1 + cpp/src/neighbors/refine/refine_device.cuh | 13 +++--- 4 files changed, 54 insertions(+), 6 deletions(-) diff --git a/cpp/include/cuvs/neighbors/refine.hpp b/cpp/include/cuvs/neighbors/refine.hpp index 19fbd30bb..5e60ff537 100644 --- a/cpp/include/cuvs/neighbors/refine.hpp +++ b/cpp/include/cuvs/neighbors/refine.hpp @@ -76,6 +76,51 @@ void refine(raft::resources const& handle, raft::device_matrix_view distances, cuvs::distance::DistanceType metric = cuvs::distance::DistanceType::L2Unexpanded); +/** + * @brief Refine nearest neighbor search. + * + * Refinement is an operation that follows an approximate NN search. The approximate search has + * already selected n_candidates neighbor candidates for each query. We narrow it down to k + * neighbors. For each query, we calculate the exact distance between the query and its + * n_candidates neighbor candidate, and select the k nearest ones. + * + * The k nearest neighbors and distances are returned. + * + * Example usage + * @code{.cpp} + * using namespace cuvs::neighbors; + * // use default index parameters + * ivf_pq::index_params index_params; + * // create and fill the index from a [N, D] dataset + * auto index = ivf_pq::build(handle, index_params, dataset); + * // use default search parameters + * ivf_pq::search_params search_params; + * // search m = 4 * k nearest neighbours for each of the N queries + * ivf_pq::search(handle, search_params, index, queries, neighbor_candidates, + * out_dists_tmp); + * // refine it to the k nearest one + * refine(handle, dataset, queries, neighbor_candidates, out_indices, out_dists, + * index.metric()); + * @endcode + * + * + * @param[in] handle the raft handle + * @param[in] dataset device matrix that stores the dataset [n_rows, dims] + * @param[in] queries device matrix of the queries [n_queris, dims] + * @param[in] neighbor_candidates indices of candidate vectors [n_queries, n_candidates], where + * n_candidates >= k + * @param[out] indices device matrix that stores the refined indices [n_queries, k] + * @param[out] distances device matrix that stores the refined distances [n_queries, k] + * @param[in] metric distance metric to use. Euclidean (L2) is used by default + */ +void refine(raft::resources const& handle, + raft::device_matrix_view dataset, + raft::device_matrix_view queries, + raft::device_matrix_view neighbor_candidates, + raft::device_matrix_view indices, + raft::device_matrix_view distances, + cuvs::distance::DistanceType metric = cuvs::distance::DistanceType::L2Unexpanded); + /** * @brief Refine nearest neighbor search. * diff --git a/cpp/src/neighbors/ivf_flat_index.cpp b/cpp/src/neighbors/ivf_flat_index.cpp index 6f7d11e50..c16dc47aa 100644 --- a/cpp/src/neighbors/ivf_flat_index.cpp +++ b/cpp/src/neighbors/ivf_flat_index.cpp @@ -226,6 +226,7 @@ void index::check_consistency() "inconsistent number of lists (clusters)"); } +template struct index; // Used for refine function template struct index; template struct index; template struct index; diff --git a/cpp/src/neighbors/refine/detail/refine_device_float_float.cu b/cpp/src/neighbors/refine/detail/refine_device_float_float.cu index 25bad201b..76b792d1c 100644 --- a/cpp/src/neighbors/refine/detail/refine_device_float_float.cu +++ b/cpp/src/neighbors/refine/detail/refine_device_float_float.cu @@ -43,5 +43,6 @@ } instantiate_cuvs_neighbors_refine_d(int64_t, float, float, int64_t); +instantiate_cuvs_neighbors_refine_d(uint32_t, float, float, int64_t); #undef instantiate_cuvs_neighbors_refine_d diff --git a/cpp/src/neighbors/refine/refine_device.cuh b/cpp/src/neighbors/refine/refine_device.cuh index 6184e540b..a5491be0d 100644 --- a/cpp/src/neighbors/refine/refine_device.cuh +++ b/cpp/src/neighbors/refine/refine_device.cuh @@ -84,12 +84,13 @@ void refine_device( cuvs::neighbors::ivf_flat::index refinement_index( handle, cuvs::distance::DistanceType(metric), n_queries, false, true, dim); - cuvs::neighbors::ivf_flat::detail::fill_refinement_index(handle, - &refinement_index, - dataset.data_handle(), - neighbor_candidates.data_handle(), - n_queries, - n_candidates); + cuvs::neighbors::ivf_flat::detail::fill_refinement_index( + handle, + &refinement_index, + dataset.data_handle(), + neighbor_candidates.data_handle(), + static_cast(n_queries), + static_cast(n_candidates)); uint32_t grid_dim_x = 1; // the neighbor ids will be computed in uint32_t as offset From 898ccfb588cb3013cd4de90fccefd406668e7dce Mon Sep 17 00:00:00 2001 From: Ben Frederickson Date: Wed, 15 Jan 2025 08:46:50 -0800 Subject: [PATCH 24/39] Expose col-major pairwise distances to python (#572) Authors: - Ben Frederickson (https://github.com/benfred) Approvers: - Corey J. Nolet (https://github.com/cjnolet) URL: https://github.com/rapidsai/cuvs/pull/572 --- cpp/include/cuvs/core/detail/interop.hpp | 33 ++++++++++- cpp/include/cuvs/core/interop.hpp | 18 +++++- cpp/src/distance/pairwise_distance_c.cpp | 73 +++++++++++++++++++----- python/cuvs/cuvs/common/cydlpack.pyx | 13 ++++- python/cuvs/cuvs/distance/distance.pyx | 4 +- python/cuvs/cuvs/test/test_distance.py | 7 ++- 6 files changed, 128 insertions(+), 20 deletions(-) diff --git a/cpp/include/cuvs/core/detail/interop.hpp b/cpp/include/cuvs/core/detail/interop.hpp index 2ed0b330d..19e4a922c 100644 --- a/cpp/include/cuvs/core/detail/interop.hpp +++ b/cpp/include/cuvs/core/detail/interop.hpp @@ -86,7 +86,6 @@ inline MdspanType from_dlpack(DLManagedTensor* managed_tensor) RAFT_EXPECTS(to_data_type.lanes == tensor.dtype.lanes, "lanes mismatch between return mdspan and DLTensor"); RAFT_EXPECTS(tensor.dtype.lanes == 1, "More than 1 DLTensor lanes not supported"); - RAFT_EXPECTS(tensor.strides == nullptr, "Strided memory layout for DLTensor not supported"); auto to_device = accessor_type_to_DLDevice(); if (to_device.device_type == kDLCUDA) { @@ -110,4 +109,36 @@ inline MdspanType from_dlpack(DLManagedTensor* managed_tensor) return MdspanType{reinterpret_cast(tensor.data), exts}; } +inline bool is_f_contiguous(DLManagedTensor* managed_tensor) +{ + auto tensor = managed_tensor->dl_tensor; + + if (!tensor.strides) { return false; } + int64_t expected_stride = 1; + for (int64_t i = 0; i < tensor.ndim; ++i) { + if (tensor.strides[i] != expected_stride) { return false; } + expected_stride *= tensor.shape[i]; + } + + return true; +} + +inline bool is_c_contiguous(DLManagedTensor* managed_tensor) +{ + auto tensor = managed_tensor->dl_tensor; + + if (!tensor.strides) { + // no stride information indicates a row-major tensor according to the dlpack spec + return true; + } + + int64_t expected_stride = 1; + for (int64_t i = tensor.ndim - 1; i >= 0; --i) { + if (tensor.strides[i] != expected_stride) { return false; } + expected_stride *= tensor.shape[i]; + } + + return true; +} + } // namespace cuvs::core::detail diff --git a/cpp/include/cuvs/core/interop.hpp b/cpp/include/cuvs/core/interop.hpp index 2462f02ec..096885f2f 100644 --- a/cpp/include/cuvs/core/interop.hpp +++ b/cpp/include/cuvs/core/interop.hpp @@ -51,9 +51,25 @@ inline bool is_dlpack_host_compatible(DLTensor tensor) return detail::is_dlpack_host_compatible(tensor); } +/** + * @brief Check if DLManagedTensor has a row-major (c-contiguous) layout + * + * @param tensor DLManagedTensor object to check + * @return bool + */ +inline bool is_c_contiguous(DLManagedTensor* tensor) { return detail::is_c_contiguous(tensor); } + +/** + * @brief Check if DLManagedTensor has a col-major (f-contiguous) layout + * + * @param tensor DLManagedTensor object to check + * @return bool + */ +inline bool is_f_contiguous(DLManagedTensor* tensor) { return detail::is_f_contiguous(tensor); } + /** * @brief Convert a DLManagedTensor to an mdspan - * NOTE: This function only supports compact row-major layouts. + * NOTE: This function only supports compact row-major and col-major layouts. * * @code {.cpp} * #include diff --git a/cpp/src/distance/pairwise_distance_c.cpp b/cpp/src/distance/pairwise_distance_c.cpp index 061adaa2c..5344a554c 100644 --- a/cpp/src/distance/pairwise_distance_c.cpp +++ b/cpp/src/distance/pairwise_distance_c.cpp @@ -29,7 +29,7 @@ namespace { -template +template void _pairwise_distance(cuvsResources_t res, DLManagedTensor* x_tensor, DLManagedTensor* y_tensor, @@ -39,8 +39,8 @@ void _pairwise_distance(cuvsResources_t res, { auto res_ptr = reinterpret_cast(res); - using mdspan_type = raft::device_matrix_view; - using distances_mdspan_type = raft::device_matrix_view; + using mdspan_type = raft::device_matrix_view; + using distances_mdspan_type = raft::device_matrix_view; auto x_mds = cuvs::core::from_dlpack(x_tensor); auto y_mds = cuvs::core::from_dlpack(y_tensor); @@ -70,17 +70,64 @@ extern "C" cuvsError_t cuvsPairwiseDistance(cuvsResources_t res, RAFT_FAIL("Inputs to cuvsPairwiseDistance must all have the same dtype"); } - if (x_dt.bits == 32) { - _pairwise_distance( - res, x_tensor, y_tensor, distances_tensor, metric, metric_arg); - } else if (x_dt.bits == 16) { - _pairwise_distance( - res, x_tensor, y_tensor, distances_tensor, metric, metric_arg); - } else if (x_dt.bits == 64) { - _pairwise_distance( - res, x_tensor, y_tensor, distances_tensor, metric, metric_arg); + bool x_row_major; + if (cuvs::core::is_c_contiguous(x_tensor)) { + x_row_major = true; + } else if (cuvs::core::is_f_contiguous(x_tensor)) { + x_row_major = false; } else { - RAFT_FAIL("Unsupported DLtensor dtype: %d and bits: %d", x_dt.code, x_dt.bits); + RAFT_FAIL("X input to cuvsPairwiseDistance must be contiguous (non-strided)"); + } + + bool y_row_major; + if (cuvs::core::is_c_contiguous(y_tensor)) { + y_row_major = true; + } else if (cuvs::core::is_f_contiguous(y_tensor)) { + y_row_major = false; + } else { + RAFT_FAIL("Y input to cuvsPairwiseDistance must be contiguous (non-strided)"); + } + + bool distances_row_major; + if (cuvs::core::is_c_contiguous(distances_tensor)) { + distances_row_major = true; + } else if (cuvs::core::is_f_contiguous(distances_tensor)) { + distances_row_major = false; + } else { + RAFT_FAIL("distances input to cuvsPairwiseDistance must be contiguous (non-strided)"); + } + + if ((x_row_major != y_row_major) || (x_row_major != distances_row_major)) { + RAFT_FAIL( + "Inputs to cuvsPairwiseDistance must all have the same layout (row-major or col-major"); + } + + if (x_row_major) { + if (x_dt.bits == 32) { + _pairwise_distance( + res, x_tensor, y_tensor, distances_tensor, metric, metric_arg); + } else if (x_dt.bits == 16) { + _pairwise_distance( + res, x_tensor, y_tensor, distances_tensor, metric, metric_arg); + } else if (x_dt.bits == 64) { + _pairwise_distance( + res, x_tensor, y_tensor, distances_tensor, metric, metric_arg); + } else { + RAFT_FAIL("Unsupported DLtensor dtype: %d and bits: %d", x_dt.code, x_dt.bits); + } + } else { + if (x_dt.bits == 32) { + _pairwise_distance( + res, x_tensor, y_tensor, distances_tensor, metric, metric_arg); + } else if (x_dt.bits == 16) { + _pairwise_distance( + res, x_tensor, y_tensor, distances_tensor, metric, metric_arg); + } else if (x_dt.bits == 64) { + _pairwise_distance( + res, x_tensor, y_tensor, distances_tensor, metric, metric_arg); + } else { + RAFT_FAIL("Unsupported DLtensor dtype: %d and bits: %d", x_dt.code, x_dt.bits); + } } }); } diff --git a/python/cuvs/cuvs/common/cydlpack.pyx b/python/cuvs/cuvs/common/cydlpack.pyx index 79f88cddc..bee8d9afa 100644 --- a/python/cuvs/cuvs/common/cydlpack.pyx +++ b/python/cuvs/cuvs/common/cydlpack.pyx @@ -25,6 +25,8 @@ cdef void deleter(DLManagedTensor* tensor) noexcept: if tensor.manager_ctx is NULL: return stdlib.free(tensor.dl_tensor.shape) + if tensor.dl_tensor.strides is not NULL: + stdlib.free(tensor.dl_tensor.strides) tensor.manager_ctx = NULL stdlib.free(tensor) @@ -95,11 +97,20 @@ cdef DLManagedTensor* dlpack_c(ary): tensor.data = tensor_ptr tensor.device = dev tensor.dtype = dtype - tensor.strides = NULL tensor.ndim = ndim tensor.shape = shape tensor.byte_offset = 0 + if ary.c_contiguous: + tensor.strides = NULL + elif ary.f_contiguous: + tensor.strides = stdlib.malloc(ndim * sizeof(int64_t)) + tensor.strides[0] = 1 + for i in range(1, ndim): + tensor.strides[i] = tensor.strides[i-1] * tensor.shape[i-1] + else: + raise ValueError("Input data must be contiguous") + dlm.dl_tensor = tensor dlm.manager_ctx = NULL dlm.deleter = deleter diff --git a/python/cuvs/cuvs/distance/distance.pyx b/python/cuvs/cuvs/distance/distance.pyx index 187532bfe..d00e6b1b1 100644 --- a/python/cuvs/cuvs/distance/distance.pyx +++ b/python/cuvs/cuvs/distance/distance.pyx @@ -103,7 +103,9 @@ def pairwise_distance(X, Y, out=None, metric="euclidean", metric_arg=2.0, output_dtype = y_cai.dtype if np.issubdtype(y_cai.dtype, np.float16): output_dtype = np.float32 - out = device_ndarray.empty((m, n), dtype=output_dtype) + + order = "C" if getattr(X, "flags", X).c_contiguous else "F" + out = device_ndarray.empty((m, n), dtype=output_dtype, order=order) out_cai = wrap_array(out) x_k = x_cai.shape[1] diff --git a/python/cuvs/cuvs/test/test_distance.py b/python/cuvs/cuvs/test/test_distance.py index f466c2743..9f206064c 100644 --- a/python/cuvs/cuvs/test/test_distance.py +++ b/python/cuvs/cuvs/test/test_distance.py @@ -40,10 +40,11 @@ ], ) @pytest.mark.parametrize("inplace", [True, False]) +@pytest.mark.parametrize("order", ["F", "C"]) @pytest.mark.parametrize("dtype", [np.float32, np.float64, np.float16]) -def test_distance(n_rows, n_cols, inplace, metric, dtype): +def test_distance(n_rows, n_cols, inplace, order, metric, dtype): input1 = np.random.random_sample((n_rows, n_cols)) - input1 = np.asarray(input1).astype(dtype) + input1 = np.asarray(input1, order=order).astype(dtype) # RussellRao expects boolean arrays if metric == "russellrao": @@ -58,7 +59,7 @@ def test_distance(n_rows, n_cols, inplace, metric, dtype): output_dtype = dtype if np.issubdtype(dtype, np.float16): output_dtype = np.float32 - output = np.zeros((n_rows, n_rows), dtype=output_dtype) + output = np.zeros((n_rows, n_rows), dtype=output_dtype, order=order) if metric == "inner_product": expected = np.matmul(input1, input1.T) From 47d71c391453f002f4070512056075e37c8fcd3e Mon Sep 17 00:00:00 2001 From: "Artem M. Chirkin" <9253178+achirkin@users.noreply.github.com> Date: Thu, 16 Jan 2025 05:24:14 +0100 Subject: [PATCH 25/39] Reduce the recall threshold for IVF-PQ low-precision LUT inner product tests (#573) IVF-PQ allows to use low-precision for the lookup table during search to improve QPS. When used for the inner product distance, this has extra tall on recall. This PR reduces our expectation of the recall in this case as an answer to occasional test failures in CI. Authors: - Artem M. Chirkin (https://github.com/achirkin) Approvers: - Corey J. Nolet (https://github.com/cjnolet) URL: https://github.com/rapidsai/cuvs/pull/573 --- cpp/test/neighbors/ann_ivf_pq.cuh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/test/neighbors/ann_ivf_pq.cuh b/cpp/test/neighbors/ann_ivf_pq.cuh index 01efd804e..6c0fdc608 100644 --- a/cpp/test/neighbors/ann_ivf_pq.cuh +++ b/cpp/test/neighbors/ann_ivf_pq.cuh @@ -879,7 +879,7 @@ inline auto enum_variety_ip() -> test_cases_t // InnerProduct score is signed, // thus we're forced to used signed 8-bit representation, // thus we have one bit less precision - y.min_recall = y.min_recall.value() * 0.90; + y.min_recall = y.min_recall.value() * 0.88; } else { // In other cases it seems to perform a little bit better, still worse than L2 y.min_recall = y.min_recall.value() * 0.94; From c49ba7bf9b26633f08b254968ae33bb74039a104 Mon Sep 17 00:00:00 2001 From: Ben Frederickson Date: Wed, 15 Jan 2025 20:26:05 -0800 Subject: [PATCH 26/39] expose col-major bfknn to python (#575) Follow on to #572 - Authors: - Ben Frederickson (https://github.com/benfred) Approvers: - Corey J. Nolet (https://github.com/cjnolet) URL: https://github.com/rapidsai/cuvs/pull/575 --- cpp/src/distance/pairwise_distance_c.cpp | 2 +- cpp/src/neighbors/brute_force_c.cpp | 28 ++++++++++++++----- .../neighbors/brute_force/brute_force.pyx | 4 +-- python/cuvs/cuvs/neighbors/common.py | 6 ++-- python/cuvs/cuvs/test/test_brute_force.py | 9 ++++-- 5 files changed, 34 insertions(+), 15 deletions(-) diff --git a/cpp/src/distance/pairwise_distance_c.cpp b/cpp/src/distance/pairwise_distance_c.cpp index 5344a554c..121574880 100644 --- a/cpp/src/distance/pairwise_distance_c.cpp +++ b/cpp/src/distance/pairwise_distance_c.cpp @@ -99,7 +99,7 @@ extern "C" cuvsError_t cuvsPairwiseDistance(cuvsResources_t res, if ((x_row_major != y_row_major) || (x_row_major != distances_row_major)) { RAFT_FAIL( - "Inputs to cuvsPairwiseDistance must all have the same layout (row-major or col-major"); + "Inputs to cuvsPairwiseDistance must all have the same layout (row-major or col-major)"); } if (x_row_major) { diff --git a/cpp/src/neighbors/brute_force_c.cpp b/cpp/src/neighbors/brute_force_c.cpp index 2b8980863..1693ac930 100644 --- a/cpp/src/neighbors/brute_force_c.cpp +++ b/cpp/src/neighbors/brute_force_c.cpp @@ -33,7 +33,7 @@ namespace { -template +template void* _build(cuvsResources_t res, DLManagedTensor* dataset_tensor, cuvsDistanceType metric, @@ -41,7 +41,7 @@ void* _build(cuvsResources_t res, { auto res_ptr = reinterpret_cast(res); - using mdspan_type = raft::device_matrix_view; + using mdspan_type = raft::device_matrix_view; auto mds = cuvs::core::from_dlpack(dataset_tensor); cuvs::neighbors::brute_force::index_params params; @@ -53,7 +53,7 @@ void* _build(cuvsResources_t res, return index_on_heap; } -template +template void _search(cuvsResources_t res, cuvsBruteForceIndex index, DLManagedTensor* queries_tensor, @@ -64,7 +64,7 @@ void _search(cuvsResources_t res, auto res_ptr = reinterpret_cast(res); auto index_ptr = reinterpret_cast*>(index.addr); - using queries_mdspan_type = raft::device_matrix_view; + using queries_mdspan_type = raft::device_matrix_view; using neighbors_mdspan_type = raft::device_matrix_view; using distances_mdspan_type = raft::device_matrix_view; using prefilter_mds_type = raft::device_vector_view; @@ -150,8 +150,15 @@ extern "C" cuvsError_t cuvsBruteForceBuild(cuvsResources_t res, auto dataset = dataset_tensor->dl_tensor; if (dataset.dtype.code == kDLFloat && dataset.dtype.bits == 32) { - index->addr = - reinterpret_cast(_build(res, dataset_tensor, metric, metric_arg)); + if (cuvs::core::is_c_contiguous(dataset_tensor)) { + index->addr = + reinterpret_cast(_build(res, dataset_tensor, metric, metric_arg)); + } else if (cuvs::core::is_f_contiguous(dataset_tensor)) { + index->addr = reinterpret_cast( + _build(res, dataset_tensor, metric, metric_arg)); + } else { + RAFT_FAIL("dataset input to cuvsBruteForceBuild must be contiguous (non-strided)"); + } index->dtype = dataset.dtype; } else { RAFT_FAIL("Unsupported dataset DLtensor dtype: %d and bits: %d", @@ -189,7 +196,14 @@ extern "C" cuvsError_t cuvsBruteForceSearch(cuvsResources_t res, RAFT_EXPECTS(queries.dtype.code == index.dtype.code, "type mismatch between index and queries"); if (queries.dtype.code == kDLFloat && queries.dtype.bits == 32) { - _search(res, index, queries_tensor, neighbors_tensor, distances_tensor, prefilter); + if (cuvs::core::is_c_contiguous(queries_tensor)) { + _search(res, index, queries_tensor, neighbors_tensor, distances_tensor, prefilter); + } else if (cuvs::core::is_f_contiguous(queries_tensor)) { + _search( + res, index, queries_tensor, neighbors_tensor, distances_tensor, prefilter); + } else { + RAFT_FAIL("queries input to cuvsBruteForceSearch must be contiguous (non-strided)"); + } } else { RAFT_FAIL("Unsupported queries DLtensor dtype: %d and bits: %d", queries.dtype.code, diff --git a/python/cuvs/cuvs/neighbors/brute_force/brute_force.pyx b/python/cuvs/cuvs/neighbors/brute_force/brute_force.pyx index 9d43bfb29..f71acd086 100644 --- a/python/cuvs/cuvs/neighbors/brute_force/brute_force.pyx +++ b/python/cuvs/cuvs/neighbors/brute_force/brute_force.pyx @@ -102,7 +102,7 @@ def build(dataset, metric="sqeuclidean", metric_arg=2.0, resources=None): """ dataset_ai = wrap_array(dataset) - _check_input_array(dataset_ai, [np.dtype('float32')]) + _check_input_array(dataset_ai, [np.dtype('float32')], exp_row_major=False) cdef cuvsResources_t res = resources.get_c_obj() @@ -218,7 +218,7 @@ def search(Index index, cdef cuvsResources_t res = resources.get_c_obj() queries_cai = wrap_array(queries) - _check_input_array(queries_cai, [np.dtype('float32')]) + _check_input_array(queries_cai, [np.dtype('float32')], exp_row_major=False) cdef uint32_t n_queries = queries_cai.shape[0] diff --git a/python/cuvs/cuvs/neighbors/common.py b/python/cuvs/cuvs/neighbors/common.py index c14b9f8c9..f49d9eb1f 100644 --- a/python/cuvs/cuvs/neighbors/common.py +++ b/python/cuvs/cuvs/neighbors/common.py @@ -14,11 +14,13 @@ # limitations under the License. -def _check_input_array(cai, exp_dt, exp_rows=None, exp_cols=None): +def _check_input_array( + cai, exp_dt, exp_rows=None, exp_cols=None, exp_row_major=True +): if cai.dtype not in exp_dt: raise TypeError("dtype %s not supported" % cai.dtype) - if not cai.c_contiguous: + if exp_row_major and not cai.c_contiguous: raise ValueError("Row major input is expected") if exp_cols is not None and cai.shape[1] != exp_cols: diff --git a/python/cuvs/cuvs/test/test_brute_force.py b/python/cuvs/cuvs/test/test_brute_force.py index acf347ec3..0b37ad885 100644 --- a/python/cuvs/cuvs/test/test_brute_force.py +++ b/python/cuvs/cuvs/test/test_brute_force.py @@ -40,12 +40,15 @@ ], ) @pytest.mark.parametrize("inplace", [True, False]) +@pytest.mark.parametrize("order", ["F", "C"]) @pytest.mark.parametrize("dtype", [np.float32]) def test_brute_force_knn( - n_index_rows, n_query_rows, n_cols, k, inplace, metric, dtype + n_index_rows, n_query_rows, n_cols, k, inplace, order, metric, dtype ): - index = np.random.random_sample((n_index_rows, n_cols)).astype(dtype) - queries = np.random.random_sample((n_query_rows, n_cols)).astype(dtype) + index = np.random.random_sample((n_index_rows, n_cols)) + index = np.asarray(index, order=order).astype(dtype) + queries = np.random.random_sample((n_query_rows, n_cols)) + queries = np.asarray(queries, order=order).astype(dtype) # RussellRao expects boolean arrays if metric == "russellrao": From 8aae7069141ffe413f03cd4cd036860394fc0c44 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Wed, 15 Jan 2025 23:32:54 -0500 Subject: [PATCH 27/39] Small fixes to docs and pairwise distances (#570) Authors: - Corey J. Nolet (https://github.com/cjnolet) Approvers: - Ben Frederickson (https://github.com/benfred) URL: https://github.com/rapidsai/cuvs/pull/570 --- docs/source/cuvs_bench/index.rst | 24 ++++++++++++++---------- python/cuvs/cuvs/distance/distance.pyx | 6 +++--- python/cuvs/cuvs/test/test_distance.py | 6 ++---- 3 files changed, 19 insertions(+), 17 deletions(-) diff --git a/docs/source/cuvs_bench/index.rst b/docs/source/cuvs_bench/index.rst index 81fb7537c..820c44c4f 100644 --- a/docs/source/cuvs_bench/index.rst +++ b/docs/source/cuvs_bench/index.rst @@ -24,7 +24,7 @@ This tool offers several benefits, including * `Docker`_ -- `How to run the benchmarks`_ +- `How benchmarks are run`_ * `Step 1: Prepare the dataset`_ @@ -93,32 +93,36 @@ We provide images for GPU enabled systems, as well as systems without a GPU. The - `cuvs-bench-datasets`: Contains the GPU and CPU benchmarks with million-scale datasets already included in the container. Best suited for users that want to run multiple million scale datasets already included in the image. - `cuvs-bench-cpu`: Contains only CPU benchmarks with minimal size. Best suited for users that want the smallest containers to reproduce benchmarks on systems without a GPU. -Nightly images are located in `dockerhub `_, meanwhile release (stable) versions are located in `NGC `_, starting with release 24.10. +Nightly images are located in `dockerhub `_. -The following command pulls the nightly container for Python version 3.10, CUDA version 12.0, and cuVS version 24.10: +The following command pulls the nightly container for Python version 3.10, CUDA version 12.5, and cuVS version 24.12: .. code-block:: bash - docker pull rapidsai/cuvs-bench:24.10a-cuda12.0-py3.10 #substitute cuvs-bench for the exact desired container. + docker pull rapidsai/cuvs-bench:24.12a-cuda12.5-py3.10 #substitute cuvs-bench for the exact desired container. The CUDA and python versions can be changed for the supported values: -- Supported CUDA versions: 11.4 and 12.x -- Supported Python versions: 3.9 and 3.10. +- Supported CUDA versions: 11.8 and 12.5 +- Supported Python versions: 3.10 and 3.11. You can see the exact versions as well in the dockerhub site: - `cuVS bench images `_ -- `cuVS bench with datasets preloaded images `_ +- `cuVS bench with pre-loaded million-scale datasets images `_ - `cuVS bench CPU only images `_ **Note:** GPU containers use the CUDA toolkit from inside the container, the only requirement is a driver installed on the host machine that supports that version. So, for example, CUDA 11.8 containers can run in systems with a CUDA 12.x capable driver. Please also note that the Nvidia-Docker runtime from the `Nvidia Container Toolkit `_ is required to use GPUs inside docker containers. -How to run the benchmarks -========================= +How benchmarks are run +====================== + +The `cuvs-bench` package contains lightweight Python scripts to run the benchmarks. There are 4 general steps to running the benchmarks and visualizing the results. -We provide a collection of lightweight Python scripts to run the benchmarks. There are 4 general steps to running the benchmarks and visualizing the results. #. Prepare Dataset + #. Build Index and Search Index + #. Data Export + #. Plot Results Step 1: Prepare the dataset diff --git a/python/cuvs/cuvs/distance/distance.pyx b/python/cuvs/cuvs/distance/distance.pyx index d00e6b1b1..d50fc152f 100644 --- a/python/cuvs/cuvs/distance/distance.pyx +++ b/python/cuvs/cuvs/distance/distance.pyx @@ -56,7 +56,7 @@ SUPPORTED_DISTANCES = ["euclidean", "l1", "cityblock", "l2", "inner_product", @auto_sync_resources @auto_convert_output -def pairwise_distance(X, Y, out=None, metric="euclidean", metric_arg=2.0, +def pairwise_distance(X, Y, out=None, metric="euclidean", p=2.0, resources=None): """ Compute pairwise distances between X and Y @@ -74,7 +74,7 @@ def pairwise_distance(X, Y, out=None, metric="euclidean", metric_arg=2.0, Y : CUDA array interface compliant matrix shape (n, k) out : Optional writable CUDA array interface matrix shape (m, n) metric : string denoting the metric type (default="euclidean") - metric_arg : metric parameter (currently used only for "minkowski") + p : metric parameter (currently used only for "minkowski") {resources_docstring} Examples @@ -139,6 +139,6 @@ def pairwise_distance(X, Y, out=None, metric="euclidean", metric_arg=2.0, y_dlpack, out_dlpack, distance_type, - metric_arg)) + p)) return out diff --git a/python/cuvs/cuvs/test/test_distance.py b/python/cuvs/cuvs/test/test_distance.py index 9f206064c..483d5d201 100644 --- a/python/cuvs/cuvs/test/test_distance.py +++ b/python/cuvs/cuvs/test/test_distance.py @@ -35,6 +35,7 @@ "jensenshannon", "russellrao", "cosine", + "minkowski", "sqeuclidean", "inner_product", ], @@ -70,10 +71,7 @@ def test_distance(n_rows, n_cols, inplace, order, metric, dtype): output_device = device_ndarray(output) if inplace else None ret_output = pairwise_distance( - input1_device, - input1_device, - output_device, - metric, + input1_device, input1_device, output_device, metric, p=2.0 ) output_device = ret_output if not inplace else output_device From 3ffb29ff06ff0b8fffa140780750cf1999626f7e Mon Sep 17 00:00:00 2001 From: Micka Date: Thu, 16 Jan 2025 07:14:47 +0100 Subject: [PATCH 28/39] Fix broken link to python doc (#564) Apply the same change as https://github.com/rapidsai/cuml/pull/6202 to fix Python links to source code. Closes #533 Authors: - Micka (https://github.com/lowener) - Corey J. Nolet (https://github.com/cjnolet) Approvers: - Corey J. Nolet (https://github.com/cjnolet) URL: https://github.com/rapidsai/cuvs/pull/564 --- docs/source/conf.py | 2 +- docs/source/sphinxext/github_link.py | 22 ++++++++++++++++++---- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/docs/source/conf.py b/docs/source/conf.py index c14919568..ca7330279 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -198,7 +198,7 @@ def setup(app): linkcode_resolve = make_linkcode_resolve( "cuvs", "https://github.com/rapidsai/cuvs/" - "blob/{revision}/python/cuvs/cuvs/" + "blob/{revision}/python/cuvs/" "{package}/{path}#L{lineno}", ) diff --git a/docs/source/sphinxext/github_link.py b/docs/source/sphinxext/github_link.py index 2c52488ca..75acfbd6e 100644 --- a/docs/source/sphinxext/github_link.py +++ b/docs/source/sphinxext/github_link.py @@ -1,5 +1,20 @@ # This contains code with copyright by the scikit-learn project, subject to the # license in /thirdparty/LICENSES/LICENSE.scikit_learn +# +# Copyright (c) 2024-2025, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# import inspect import os @@ -101,10 +116,9 @@ def _linkcode_resolve(domain, info, package, url_fmt, revision): else: return else: - # Test if we are absolute or not (pyx are relative) - if (not os.path.isabs(fn)): - # Should be relative to docs right now - fn = os.path.abspath(os.path.join("..", "python", fn)) + if fn.endswith(".pyx"): + sp_path = next(x for x in sys.path if re.match(".*site-packages$", x)) + fn = fn.replace("/opt/conda/conda-bld/work/python/cuvs", sp_path) # Convert to relative from module root fn = os.path.relpath(fn, From 86b4ee8c6244b9ee2f1a718dad8dea0b1476ed15 Mon Sep 17 00:00:00 2001 From: Nathan VanBenschoten Date: Wed, 15 Jan 2025 23:15:31 -0700 Subject: [PATCH 29/39] Fix typos in README (#543) Spotted while learning about the project. Authors: - Nathan VanBenschoten (https://github.com/nvanbenschoten) - Corey J. Nolet (https://github.com/cjnolet) Approvers: - Corey J. Nolet (https://github.com/cjnolet) URL: https://github.com/rapidsai/cuvs/pull/543 --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 47f094039..dac71c881 100755 --- a/README.md +++ b/README.md @@ -67,7 +67,7 @@ There are several benefits to using cuVS and GPUs for vector search, including 6. Multiple language support 7. Building blocks for composing new or accelerating existing algorithms -In addition to the items above, cuVS takes on the burden of keeping non-trivial accelerated code up to date as new NVIDIA architectures and CUDA versions are released. This provides a deslightful development experimence, guaranteeing that any libraries, databases, or applications built on top of it will always be getting the best performance and scale. +In addition to the items above, cuVS takes on the burden of keeping non-trivial accelerated code up to date as new NVIDIA architectures and CUDA versions are released. This provides a delightful development experience, guaranteeing that any libraries, databases, or applications built on top of it will always be getting the best performance and scale. ## cuVS Technology Stack From b9f71fe346af926de55223b313979a5c85a01b1b Mon Sep 17 00:00:00 2001 From: "Artem M. Chirkin" <9253178+achirkin@users.noreply.github.com> Date: Thu, 16 Jan 2025 15:40:52 +0100 Subject: [PATCH 30/39] Fix the use of constexpr in the dynamic batching header (#582) Remove the `constexpr` in a function that is non-constexpr according to the C++17 rules. Authors: - Artem M. Chirkin (https://github.com/achirkin) Approvers: - Bradley Dice (https://github.com/bdice) - Corey J. Nolet (https://github.com/cjnolet) URL: https://github.com/rapidsai/cuvs/pull/582 --- cpp/src/neighbors/detail/dynamic_batching.cuh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cpp/src/neighbors/detail/dynamic_batching.cuh b/cpp/src/neighbors/detail/dynamic_batching.cuh index cb8e08ef5..23c5c07f6 100644 --- a/cpp/src/neighbors/detail/dynamic_batching.cuh +++ b/cpp/src/neighbors/detail/dynamic_batching.cuh @@ -238,8 +238,8 @@ enum struct slot_state : int32_t { struct batch_token { uint64_t value = 0; - constexpr inline batch_token() {} - explicit constexpr inline batch_token(uint32_t buffer_id) { id() = buffer_id; } + constexpr inline batch_token() = default; + RAFT_INLINE_FUNCTION explicit batch_token(uint32_t buffer_id) { id() = buffer_id; } /** * Sequential id of the batch in the array of batches. @@ -492,7 +492,7 @@ struct batch_queue_t { * NB: "round" is the number of times the queue counters went over the whole ring buffer. * It's used to avoid the ABA problem for atomic token updates. */ - static constexpr inline auto make_empty_token(seq_order_id seq_id) noexcept -> batch_token + static inline auto make_empty_token(seq_order_id seq_id) noexcept -> batch_token { // Modify the seq_id to identify that the token slot is empty auto empty_round = static_cast(slot_state::kEmptyPast) * kSize; From 6371aa32e957b6e8df0cae3347b94ef5604e6e7b Mon Sep 17 00:00:00 2001 From: Robert Maynard Date: Thu, 16 Jan 2025 20:00:54 -0500 Subject: [PATCH 31/39] run_cuvs_pytests.sh uses proper test dir (#584) Authors: - Robert Maynard (https://github.com/robertmaynard) Approvers: - Corey J. Nolet (https://github.com/cjnolet) - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/cuvs/pull/584 --- ci/run_cuvs_pytests.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/run_cuvs_pytests.sh b/ci/run_cuvs_pytests.sh index 4de8927b1..57df9af94 100755 --- a/ci/run_cuvs_pytests.sh +++ b/ci/run_cuvs_pytests.sh @@ -6,4 +6,4 @@ set -euo pipefail # Support invoking run_pytests.sh outside the script directory cd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"/../python/cuvs/cuvs -pytest --cache-clear --verbose "$@" tests +pytest --cache-clear --verbose "$@" test From bd603a97ea7c095dd109c802387b33dc1d591b54 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Fri, 17 Jan 2025 13:16:16 -0500 Subject: [PATCH 32/39] Fixing small typo in cuvs bench docs (#586) Authors: - Corey J. Nolet (https://github.com/cjnolet) Approvers: - Ben Frederickson (https://github.com/benfred) URL: https://github.com/rapidsai/cuvs/pull/586 --- docs/source/cuvs_bench/index.rst | 195 +------------------------------ 1 file changed, 3 insertions(+), 192 deletions(-) diff --git a/docs/source/cuvs_bench/index.rst b/docs/source/cuvs_bench/index.rst index 820c44c4f..c15aa41c1 100644 --- a/docs/source/cuvs_bench/index.rst +++ b/docs/source/cuvs_bench/index.rst @@ -24,16 +24,6 @@ This tool offers several benefits, including * `Docker`_ -- `How benchmarks are run`_ - - * `Step 1: Prepare the dataset`_ - - * `Step 2: Build and search index`_ - - * `Step 3: Data export`_ - - * `Step 4: Plot the results`_ - - `Running the benchmarks`_ * `End-to-end: smaller-scale benchmarks (<1M to 10M)`_ @@ -75,7 +65,7 @@ Conda conda activate cuvs_benchmarks # to install GPU package: - conda install -c rapidsai -c conda-forge -c nvidia cuvs-ann-bench= cuda-version=11.8* + conda install -c rapidsai -c conda-forge -c nvidia cuvs-bench= cuda-version=11.8* # to install CPU package for usage in CPU-only systems: conda install -c rapidsai -c conda-forge cuvs-bench-cpu @@ -99,7 +89,7 @@ The following command pulls the nightly container for Python version 3.10, CUDA .. code-block:: bash - docker pull rapidsai/cuvs-bench:24.12a-cuda12.5-py3.10 #substitute cuvs-bench for the exact desired container. + docker pull rapidsai/cuvs-bench:24.12a-cuda12.5-py3.10 # substitute cuvs-bench for the exact desired container. The CUDA and python versions can be changed for the supported values: - Supported CUDA versions: 11.8 and 12.5 @@ -112,185 +102,6 @@ You can see the exact versions as well in the dockerhub site: **Note:** GPU containers use the CUDA toolkit from inside the container, the only requirement is a driver installed on the host machine that supports that version. So, for example, CUDA 11.8 containers can run in systems with a CUDA 12.x capable driver. Please also note that the Nvidia-Docker runtime from the `Nvidia Container Toolkit `_ is required to use GPUs inside docker containers. -How benchmarks are run -====================== - -The `cuvs-bench` package contains lightweight Python scripts to run the benchmarks. There are 4 general steps to running the benchmarks and visualizing the results. - -#. Prepare Dataset - -#. Build Index and Search Index - -#. Data Export - -#. Plot Results - -Step 1: Prepare the dataset ---------------------------- - -The script `cuvs_bench.get_dataset` will download and unpack the dataset in directory that the user provides. As of now, only million-scale datasets are supported by this script. For more information on :doc:`datasets and formats `. - -The usage of this script is: - -.. code-block:: bash - - usage: get_dataset.py [-h] [--name NAME] [--dataset-path DATASET_PATH] [--normalize] - - options: - -h, --help show this help message and exit - --dataset DATASET dataset to download (default: glove-100-angular) - --dataset-path DATASET_PATH - path to download dataset (default: ${RAPIDS_DATASET_ROOT_DIR}) - --normalize normalize cosine distance to inner product (default: False) - -When option `normalize` is provided to the script, any dataset that has cosine distances -will be normalized to inner product. So, for example, the dataset `glove-100-angular` -will be written at location `datasets/glove-100-inner/`. - -Step 2: Build and search index ------------------------------- - -The script `cuvs_bench.run` will build and search indices for a given dataset and its -specified configuration. - -The usage of the script `cuvs_bench.run` is: - -.. code-block:: bash - - usage: __main__.py [-h] [--subset-size SUBSET_SIZE] [-k COUNT] [-bs BATCH_SIZE] [--dataset-configuration DATASET_CONFIGURATION] [--configuration CONFIGURATION] [--dataset DATASET] - [--dataset-path DATASET_PATH] [--build] [--search] [--algorithms ALGORITHMS] [--groups GROUPS] [--algo-groups ALGO_GROUPS] [-f] [-m SEARCH_MODE] - - options: - -h, --help show this help message and exit - --subset-size SUBSET_SIZE - the number of subset rows of the dataset to build the index (default: None) - -k COUNT, --count COUNT - the number of nearest neighbors to search for (default: 10) - -bs BATCH_SIZE, --batch-size BATCH_SIZE - number of query vectors to use in each query trial (default: 10000) - --dataset-configuration DATASET_CONFIGURATION - path to YAML configuration file for datasets (default: None) - --configuration CONFIGURATION - path to YAML configuration file or directory for algorithms Any run groups found in the specified file/directory will automatically override groups of the same name - present in the default configurations, including `base` (default: None) - --dataset DATASET name of dataset (default: glove-100-inner) - --dataset-path DATASET_PATH - path to dataset folder, by default will look in RAPIDS_DATASET_ROOT_DIR if defined, otherwise a datasets subdirectory from the calling directory (default: - os.getcwd()/datasets/) - --build - --search - --algorithms ALGORITHMS - run only comma separated list of named algorithms. If parameters `groups` and `algo-groups` are both undefined, then group `base` is run by default (default: None) - --groups GROUPS run only comma separated groups of parameters (default: base) - --algo-groups ALGO_GROUPS - add comma separated . to run. Example usage: "--algo-groups=cuvs_cagra.large,hnswlib.large" (default: None) - -f, --force re-run algorithms even if their results already exist (default: False) - -m SEARCH_MODE, --search-mode SEARCH_MODE - run search in 'latency' (measure individual batches) or 'throughput' (pipeline batches and measure end-to-end) mode (default: throughput) - -t SEARCH_THREADS, --search-threads SEARCH_THREADS - specify the number threads to use for throughput benchmark. Single value or a pair of min and max separated by ':'. Example --search-threads=1:4. Power of 2 values between 'min' and 'max' will be used. If only 'min' is - specified, then a single test is run with 'min' threads. By default min=1, max=. (default: None) - -r, --dry-run dry-run mode will convert the yaml config for the specified algorithms and datasets to the json format that's consumed by the lower-level c++ binaries and then print the command to run execute the benchmarks but - will not actually execute the command. (default: False) - -`dataset`: name of the dataset to be searched in `datasets.yaml`_ - -`dataset-configuration`: optional filepath to custom dataset YAML config which has an entry for arg `dataset` - -`configuration`: optional filepath to YAML configuration for an algorithm or to directory that contains YAML configurations for several algorithms. Refer to `Dataset.yaml config`_ for more info. - -`algorithms`: runs all algorithms that it can find in YAML configs found by `configuration`. By default, only `base` group will be run. - -`groups`: run only specific groups of parameters configurations for an algorithm. Groups are defined in YAML configs (see `configuration`), and by default run `base` group - -`algo-groups`: this parameter is helpful to append any specific algorithm+group combination to run the benchmark for in addition to all the arguments from `algorithms` and `groups`. It is of the format `.`, or for example, `cuvs_cagra.large` - -For every algorithm run by this script, it outputs an index build statistics JSON file in `/result/build/<{algo},{group}.json>` -and an index search statistics JSON file in `/result/search/<{algo},{group},k{k},bs{batch_size}.json>`. NOTE: The filenames will not have ",{group}" if `group = "base"`. - -For every algorithm run by this script, it outputs an index build statistics JSON file in `/result/build/<{algo},{group}.json>` -and an index search statistics JSON file in `/result/search/<{algo},{group},k{k},bs{batch_size}.json>`. NOTE: The filenames will not have ",{group}" if `group = "base"`. - -`dataset-path` : -#. data is read from `/` -#. indices are built in `//index` -#. build/search results are stored in `//result` - -`build` and `search` : if both parameters are not supplied to the script then it is assumed both are `True`. - -`indices` and `algorithms` : these parameters ensure that the algorithm specified for an index is available in `algos.yaml` and not disabled, as well as having an associated executable. - -Step 3: Data export -------------------- - -The script `cuvs_bench.data_export` will convert the intermediate JSON outputs produced by `cuvs_bench.run` to more easily readable CSV files, which are needed to build charts made by `cuvs_bench.plot`. - -.. code-block:: bash - - usage: data_export.py [-h] [--dataset DATASET] [--dataset-path DATASET_PATH] - - options: - -h, --help show this help message and exit - --dataset DATASET dataset to download (default: glove-100-inner) - --dataset-path DATASET_PATH - path to dataset folder (default: ${RAPIDS_DATASET_ROOT_DIR}) - -Build statistics CSV file is stored in `/result/build/<{algo},{group}.csv>` -and index search statistics CSV file in `/result/search/<{algo},{group},k{k},bs{batch_size},{suffix}.csv>`, where suffix has three values: -#. `raw`: All search results are exported -#. `throughput`: Pareto frontier of throughput results is exported -#. `latency`: Pareto frontier of latency results is exported - -Step 4: Plot the results ------------------------- - -The script `cuvs_bench.plot` will plot results for all algorithms found in index search statistics CSV files `/result/search/*.csv`. - -The usage of this script is: - -.. code-block:: bash - - usage: [-h] [--dataset DATASET] [--dataset-path DATASET_PATH] [--output-filepath OUTPUT_FILEPATH] [--algorithms ALGORITHMS] [--groups GROUPS] [--algo-groups ALGO_GROUPS] - [-k COUNT] [-bs BATCH_SIZE] [--build] [--search] [--x-scale X_SCALE] [--y-scale {linear,log,symlog,logit}] [--x-start X_START] [--mode {throughput,latency}] - [--time-unit {s,ms,us}] [--raw] - - options: - -h, --help show this help message and exit - --dataset DATASET dataset to plot (default: glove-100-inner) - --dataset-path DATASET_PATH - path to dataset folder (default: /home/coder/cuvs/datasets/) - --output-filepath OUTPUT_FILEPATH - directory for PNG to be saved (default: /home/coder/cuvs) - --algorithms ALGORITHMS - plot only comma separated list of named algorithms. If parameters `groups` and `algo-groups are both undefined, then group `base` is plot by default - (default: None) - --groups GROUPS plot only comma separated groups of parameters (default: base) - --algo-groups ALGO_GROUPS, --algo-groups ALGO_GROUPS - add comma separated . to plot. Example usage: "--algo-groups=cuvs_cagra.large,hnswlib.large" (default: None) - -k COUNT, --count COUNT - the number of nearest neighbors to search for (default: 10) - -bs BATCH_SIZE, --batch-size BATCH_SIZE - number of query vectors to use in each query trial (default: 10000) - --build - --search - --x-scale X_SCALE Scale to use when drawing the X-axis. Typically linear, logit or a2 (default: linear) - --y-scale {linear,log,symlog,logit} - Scale to use when drawing the Y-axis (default: linear) - --x-start X_START Recall values to start the x-axis from (default: 0.8) - --mode {throughput,latency} - search mode whose Pareto frontier is used on the y-axis (default: throughput) - --time-unit {s,ms,us} - time unit to plot when mode is latency (default: ms) - --raw Show raw results (not just Pareto frontier) of mode arg (default: False) - -`mode`: plots pareto frontier of `throughput` or `latency` results exported in the previous step - -`algorithms`: plots all algorithms that it can find results for the specified `dataset`. By default, only `base` group will be plotted. - -`groups`: plot only specific groups of parameters configurations for an algorithm. Groups are defined in YAML configs (see `configuration`), and by default run `base` group - -`algo-groups`: this parameter is helpful to append any specific algorithm+group combination to plot results for in addition to all the arguments from `algorithms` and `groups`. It is of the format `.`, or for example, `cuvs_cagra.large` - Running the benchmarks ====================== @@ -576,7 +387,7 @@ Creating and customizing dataset configurations A single configuration will often define a set of algorithms, with associated index and search parameters, that can be generalize across datasets. We use YAML to define dataset specific and algorithm specific configurations. -A default `datasets.yaml` is provided by CUVS in `${CUVS_HOME}/python/cuvs-ann-bench/src/cuvs_bench/run/conf` with configurations available for several datasets. Here's a simple example entry for the `sift-128-euclidean` dataset: +A default `datasets.yaml` is provided by CUVS in `${CUVS_HOME}/python/cuvs_bench/src/cuvs_bench/run/conf` with configurations available for several datasets. Here's a simple example entry for the `sift-128-euclidean` dataset: .. code-block:: yaml From f1de1b287f776bcb03d20765fad85da3555969d6 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Tue, 21 Jan 2025 11:30:32 -0600 Subject: [PATCH 33/39] Use GCC 13 in CUDA 12 conda builds. (#567) ## Description conda-forge is using GCC 13 for CUDA 12 builds. This PR updates CUDA 12 conda builds to use GCC 13, for alignment. These PRs should be merged in a specific order, see https://github.com/rapidsai/build-planning/issues/129 for details. Closes https://github.com/rapidsai/build-planning/issues/129. --- .../all_cuda-118_arch-aarch64.yaml | 2 +- .../all_cuda-118_arch-x86_64.yaml | 2 +- .../all_cuda-125_arch-aarch64.yaml | 4 ++-- .../all_cuda-125_arch-x86_64.yaml | 4 ++-- .../bench_ann_cuda-118_arch-aarch64.yaml | 2 +- .../bench_ann_cuda-118_arch-x86_64.yaml | 2 +- .../bench_ann_cuda-125_arch-aarch64.yaml | 4 ++-- .../bench_ann_cuda-125_arch-x86_64.yaml | 4 ++-- .../cuvs-bench-cpu/conda_build_config.yaml | 8 ++++--- .../cuvs-bench/conda_build_config.yaml | 14 +++++------ conda/recipes/cuvs-bench/meta.yaml | 6 ++--- conda/recipes/cuvs/conda_build_config.yaml | 14 +++++------ conda/recipes/cuvs/meta.yaml | 6 ++--- conda/recipes/libcuvs/conda_build_config.yaml | 14 +++++------ conda/recipes/libcuvs/meta.yaml | 24 +++++++------------ cpp/test/distance/masked_nn.cu | 4 ++-- dependencies.yaml | 18 ++++++++++++-- 17 files changed, 68 insertions(+), 64 deletions(-) diff --git a/conda/environments/all_cuda-118_arch-aarch64.yaml b/conda/environments/all_cuda-118_arch-aarch64.yaml index 01853da84..4c464ef4e 100644 --- a/conda/environments/all_cuda-118_arch-aarch64.yaml +++ b/conda/environments/all_cuda-118_arch-aarch64.yaml @@ -55,7 +55,7 @@ dependencies: - sphinx-copybutton - sphinx-markdown-tables - sphinx>=8.0.0 -- sysroot_linux-aarch64==2.17 +- sysroot_linux-aarch64==2.28 - pip: - nvidia-sphinx-theme name: all_cuda-118_arch-aarch64 diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index a1ad68d7f..71cbeeaf3 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -55,7 +55,7 @@ dependencies: - sphinx-copybutton - sphinx-markdown-tables - sphinx>=8.0.0 -- sysroot_linux-64==2.17 +- sysroot_linux-64==2.28 - pip: - nvidia-sphinx-theme name: all_cuda-118_arch-x86_64 diff --git a/conda/environments/all_cuda-125_arch-aarch64.yaml b/conda/environments/all_cuda-125_arch-aarch64.yaml index ee0213fff..4fd08fa97 100644 --- a/conda/environments/all_cuda-125_arch-aarch64.yaml +++ b/conda/environments/all_cuda-125_arch-aarch64.yaml @@ -24,7 +24,7 @@ dependencies: - cython>=3.0.0 - dlpack>=0.8,<1.0 - doxygen>=1.8.20 -- gcc_linux-aarch64=11.* +- gcc_linux-aarch64=13.* - graphviz - ipython - libclang @@ -51,7 +51,7 @@ dependencies: - sphinx-copybutton - sphinx-markdown-tables - sphinx>=8.0.0 -- sysroot_linux-aarch64==2.17 +- sysroot_linux-aarch64==2.28 - pip: - nvidia-sphinx-theme name: all_cuda-125_arch-aarch64 diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml index d93dcaf7a..de5f341fa 100644 --- a/conda/environments/all_cuda-125_arch-x86_64.yaml +++ b/conda/environments/all_cuda-125_arch-x86_64.yaml @@ -24,7 +24,7 @@ dependencies: - cython>=3.0.0 - dlpack>=0.8,<1.0 - doxygen>=1.8.20 -- gcc_linux-64=11.* +- gcc_linux-64=13.* - graphviz - ipython - libclang @@ -51,7 +51,7 @@ dependencies: - sphinx-copybutton - sphinx-markdown-tables - sphinx>=8.0.0 -- sysroot_linux-64==2.17 +- sysroot_linux-64==2.28 - pip: - nvidia-sphinx-theme name: all_cuda-125_arch-x86_64 diff --git a/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml b/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml index a90dc03e7..fb69ac251 100644 --- a/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml +++ b/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml @@ -47,6 +47,6 @@ dependencies: - pyyaml - rapids-build-backend>=0.3.0,<0.4.0.dev0 - setuptools -- sysroot_linux-aarch64==2.17 +- sysroot_linux-aarch64==2.28 - wheel name: bench_ann_cuda-118_arch-aarch64 diff --git a/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml b/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml index b7344c822..123033b08 100644 --- a/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml +++ b/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml @@ -47,6 +47,6 @@ dependencies: - pyyaml - rapids-build-backend>=0.3.0,<0.4.0.dev0 - setuptools -- sysroot_linux-64==2.17 +- sysroot_linux-64==2.28 - wheel name: bench_ann_cuda-118_arch-x86_64 diff --git a/conda/environments/bench_ann_cuda-125_arch-aarch64.yaml b/conda/environments/bench_ann_cuda-125_arch-aarch64.yaml index da7229004..fa2ae7955 100644 --- a/conda/environments/bench_ann_cuda-125_arch-aarch64.yaml +++ b/conda/environments/bench_ann_cuda-125_arch-aarch64.yaml @@ -24,7 +24,7 @@ dependencies: - cxx-compiler - cython>=3.0.0 - dlpack>=0.8,<1.0 -- gcc_linux-aarch64=11.* +- gcc_linux-aarch64=13.* - glog>=0.6.0 - h5py>=3.8.0 - libcublas-dev @@ -43,6 +43,6 @@ dependencies: - pyyaml - rapids-build-backend>=0.3.0,<0.4.0.dev0 - setuptools -- sysroot_linux-aarch64==2.17 +- sysroot_linux-aarch64==2.28 - wheel name: bench_ann_cuda-125_arch-aarch64 diff --git a/conda/environments/bench_ann_cuda-125_arch-x86_64.yaml b/conda/environments/bench_ann_cuda-125_arch-x86_64.yaml index 5d1dd8fc7..76b005e3c 100644 --- a/conda/environments/bench_ann_cuda-125_arch-x86_64.yaml +++ b/conda/environments/bench_ann_cuda-125_arch-x86_64.yaml @@ -24,7 +24,7 @@ dependencies: - cxx-compiler - cython>=3.0.0 - dlpack>=0.8,<1.0 -- gcc_linux-64=11.* +- gcc_linux-64=13.* - glog>=0.6.0 - h5py>=3.8.0 - libcublas-dev @@ -43,6 +43,6 @@ dependencies: - pyyaml - rapids-build-backend>=0.3.0,<0.4.0.dev0 - setuptools -- sysroot_linux-64==2.17 +- sysroot_linux-64==2.28 - wheel name: bench_ann_cuda-125_arch-x86_64 diff --git a/conda/recipes/cuvs-bench-cpu/conda_build_config.yaml b/conda/recipes/cuvs-bench-cpu/conda_build_config.yaml index ed6f708e1..5407d7c17 100644 --- a/conda/recipes/cuvs-bench-cpu/conda_build_config.yaml +++ b/conda/recipes/cuvs-bench-cpu/conda_build_config.yaml @@ -1,14 +1,16 @@ c_compiler_version: - - 11 + - 13 # [not os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")] + - 11 # [os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")] cxx_compiler_version: - - 11 + - 13 # [not os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")] + - 11 # [os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")] c_stdlib: - sysroot c_stdlib_version: - - "2.17" + - "2.28" cmake_version: - ">=3.26.4,!=3.30.0" diff --git a/conda/recipes/cuvs-bench/conda_build_config.yaml b/conda/recipes/cuvs-bench/conda_build_config.yaml index 47bd730da..ccd7341d1 100644 --- a/conda/recipes/cuvs-bench/conda_build_config.yaml +++ b/conda/recipes/cuvs-bench/conda_build_config.yaml @@ -1,20 +1,20 @@ c_compiler_version: - - 11 + - 13 # [not os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")] + - 11 # [os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")] cxx_compiler_version: - - 11 + - 13 # [not os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")] + - 11 # [os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")] cuda_compiler: - - cuda-nvcc - -cuda11_compiler: - - nvcc + - cuda-nvcc # [not os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")] + - nvcc # [os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")] c_stdlib: - sysroot c_stdlib_version: - - "2.17" + - "2.28" cmake_version: - ">=3.26.4,!=3.30.0" diff --git a/conda/recipes/cuvs-bench/meta.yaml b/conda/recipes/cuvs-bench/meta.yaml index d77aee8ce..33b1745ec 100644 --- a/conda/recipes/cuvs-bench/meta.yaml +++ b/conda/recipes/cuvs-bench/meta.yaml @@ -37,10 +37,8 @@ build: number: {{ GIT_DESCRIBE_NUMBER }} string: cuda{{ cuda_major }}_py{{ py_version }}_{{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }} ignore_run_exports_from: - {% if cuda_major == "11" %} - - {{ compiler('cuda11') }} - {% else %} - {{ compiler('cuda') }} + {% if cuda_major != "11" %} - cuda-cudart-dev - libcublas-dev {% endif %} @@ -50,7 +48,7 @@ requirements: - {{ compiler('c') }} - {{ compiler('cxx') }} {% if cuda_major == "11" %} - - {{ compiler('cuda11') }} ={{ cuda_version }} + - {{ compiler('cuda') }} ={{ cuda_version }} {% else %} - {{ compiler('cuda') }} {% endif %} diff --git a/conda/recipes/cuvs/conda_build_config.yaml b/conda/recipes/cuvs/conda_build_config.yaml index 001878ff2..83f5ebcb1 100644 --- a/conda/recipes/cuvs/conda_build_config.yaml +++ b/conda/recipes/cuvs/conda_build_config.yaml @@ -1,20 +1,20 @@ c_compiler_version: - - 11 + - 13 # [not os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")] + - 11 # [os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")] cxx_compiler_version: - - 11 + - 13 # [not os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")] + - 11 # [os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")] cuda_compiler: - - cuda-nvcc - -cuda11_compiler: - - nvcc + - cuda-nvcc # [not os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")] + - nvcc # [os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")] c_stdlib: - sysroot c_stdlib_version: - - "2.17" + - "2.28" cmake_version: - ">=3.26.4,!=3.30.0" diff --git a/conda/recipes/cuvs/meta.yaml b/conda/recipes/cuvs/meta.yaml index ad7ffe756..25fc204a8 100644 --- a/conda/recipes/cuvs/meta.yaml +++ b/conda/recipes/cuvs/meta.yaml @@ -20,10 +20,8 @@ build: number: {{ GIT_DESCRIBE_NUMBER }} string: cuda{{ cuda_major }}_py{{ py_version }}_{{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }} ignore_run_exports_from: - {% if cuda_major == "11" %} - - {{ compiler('cuda11') }} - {% else %} - {{ compiler('cuda') }} + {% if cuda_major != "11" %} - cuda-cudart-dev {% endif %} - cuda-python @@ -33,7 +31,7 @@ requirements: - {{ compiler('c') }} - {{ compiler('cxx') }} {% if cuda_major == "11" %} - - {{ compiler('cuda11') }} ={{ cuda_version }} + - {{ compiler('cuda') }} ={{ cuda_version }} {% else %} - {{ compiler('cuda') }} {% endif %} diff --git a/conda/recipes/libcuvs/conda_build_config.yaml b/conda/recipes/libcuvs/conda_build_config.yaml index b8c49943e..72cc4415d 100644 --- a/conda/recipes/libcuvs/conda_build_config.yaml +++ b/conda/recipes/libcuvs/conda_build_config.yaml @@ -1,20 +1,20 @@ c_compiler_version: - - 11 + - 13 # [not os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")] + - 11 # [os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")] cxx_compiler_version: - - 11 + - 13 # [not os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")] + - 11 # [os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")] cuda_compiler: - - cuda-nvcc - -cuda11_compiler: - - nvcc + - cuda-nvcc # [not os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")] + - nvcc # [os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")] c_stdlib: - sysroot c_stdlib_version: - - "2.17" + - "2.28" cmake_version: - ">=3.26.4,!=3.30.0" diff --git a/conda/recipes/libcuvs/meta.yaml b/conda/recipes/libcuvs/meta.yaml index 46552c397..fd466cd22 100644 --- a/conda/recipes/libcuvs/meta.yaml +++ b/conda/recipes/libcuvs/meta.yaml @@ -39,10 +39,8 @@ outputs: number: {{ GIT_DESCRIBE_NUMBER }} string: cuda{{ cuda_major }}_{{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }} ignore_run_exports_from: - {% if cuda_major == "11" %} - - {{ compiler('cuda11') }} - {% else %} - {{ compiler('cuda') }} + {% if cuda_major != "11" %} - cuda-cudart-dev - libcublas-dev - libcurand-dev @@ -54,7 +52,7 @@ outputs: - {{ compiler('c') }} - {{ compiler('cxx') }} {% if cuda_major == "11" %} - - {{ compiler('cuda11') }} ={{ cuda_version }} + - {{ compiler('cuda') }} ={{ cuda_version }} {% else %} - {{ compiler('cuda') }} {% endif %} @@ -106,10 +104,8 @@ outputs: number: {{ GIT_DESCRIBE_NUMBER }} string: cuda{{ cuda_major }}_{{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }} ignore_run_exports_from: - {% if cuda_major == "11" %} - - {{ compiler('cuda11') }} - {% else %} - {{ compiler('cuda') }} + {% if cuda_major != "11" %} - cuda-cudart-dev - libcublas-dev - libcurand-dev @@ -121,7 +117,7 @@ outputs: - {{ compiler('c') }} - {{ compiler('cxx') }} {% if cuda_major == "11" %} - - {{ compiler('cuda11') }} ={{ cuda_version }} + - {{ compiler('cuda') }} ={{ cuda_version }} {% else %} - {{ compiler('cuda') }} {% endif %} @@ -174,10 +170,8 @@ outputs: number: {{ GIT_DESCRIBE_NUMBER }} string: cuda{{ cuda_major }}_{{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }} ignore_run_exports_from: - {% if cuda_major == "11" %} - - {{ compiler('cuda11') }} - {% else %} - {{ compiler('cuda') }} + {% if cuda_major != "11" %} - cuda-cudart-dev - libcublas-dev - libcurand-dev @@ -189,7 +183,7 @@ outputs: - {{ compiler('c') }} - {{ compiler('cxx') }} {% if cuda_major == "11" %} - - {{ compiler('cuda11') }} ={{ cuda_version }} + - {{ compiler('cuda') }} ={{ cuda_version }} {% else %} - {{ compiler('cuda') }} {% endif %} @@ -246,10 +240,8 @@ outputs: number: {{ GIT_DESCRIBE_NUMBER }} string: cuda{{ cuda_major }}_{{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }} ignore_run_exports_from: - {% if cuda_major == "11" %} - - {{ compiler('cuda11') }} - {% else %} - {{ compiler('cuda') }} + {% if cuda_major != "11" %} - cuda-cudart-dev - libcublas-dev - libcurand-dev @@ -261,7 +253,7 @@ outputs: - {{ compiler('c') }} - {{ compiler('cxx') }} {% if cuda_major == "11" %} - - {{ compiler('cuda11') }} ={{ cuda_version }} + - {{ compiler('cuda') }} ={{ cuda_version }} {% else %} - {{ compiler('cuda') }} {% endif %} diff --git a/cpp/test/distance/masked_nn.cu b/cpp/test/distance/masked_nn.cu index a8f2f5163..a1c784669 100644 --- a/cpp/test/distance/masked_nn.cu +++ b/cpp/test/distance/masked_nn.cu @@ -314,8 +314,8 @@ template cudaStream_t stream = 0) { typedef typename raft::KeyValuePair KVP; - std::shared_ptr exp_h(new KVP[size]); - std::shared_ptr act_h(new KVP[size]); + std::shared_ptr exp_h(new KVP[size]); + std::shared_ptr act_h(new KVP[size]); raft::update_host(exp_h.get(), expected, size, stream); raft::update_host(act_h.get(), actual, size, stream); RAFT_CUDA_TRY(cudaStreamSynchronize(stream)); diff --git a/dependencies.yaml b/dependencies.yaml index fbd1d8372..d23c118c0 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -166,14 +166,28 @@ dependencies: matrices: - matrix: arch: x86_64 + cuda: "11.*" packages: - gcc_linux-64=11.* - - sysroot_linux-64==2.17 + - sysroot_linux-64==2.28 - matrix: arch: aarch64 + cuda: "11.*" packages: - gcc_linux-aarch64=11.* - - sysroot_linux-aarch64==2.17 + - sysroot_linux-aarch64==2.28 + - matrix: + arch: x86_64 + cuda: "12.*" + packages: + - gcc_linux-64=13.* + - sysroot_linux-64==2.28 + - matrix: + arch: aarch64 + cuda: "12.*" + packages: + - gcc_linux-aarch64=13.* + - sysroot_linux-aarch64==2.28 - output_types: conda matrices: - matrix: {cuda: "12.*"} From 9b7bb975249f6863f72aa04147bf423d130a25c3 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Wed, 22 Jan 2025 00:41:17 -0600 Subject: [PATCH 34/39] Temporarily skip CUDA 11 wheel CI (#599) Due to some failures coming from libraft C++ wheels, CUDA 11 wheel CI will not pass. This PR temporarily disables CUDA 11 wheel tests until those issues can be resolved. See https://github.com/rapidsai/build-planning/issues/137. Authors: - Bradley Dice (https://github.com/bdice) Approvers: - James Lamb (https://github.com/jameslamb) URL: https://github.com/rapidsai/cuvs/pull/599 --- .github/workflows/pr.yaml | 3 +++ .github/workflows/test.yaml | 3 +++ 2 files changed, 6 insertions(+) diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 91f51bd90..ca85c5c2e 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -150,6 +150,9 @@ jobs: with: build_type: pull-request script: ci/test_wheel_cuvs.sh + # CUDA 11 wheel CI is disabled until + # https://github.com/rapidsai/build-planning/issues/137 is resolved. + matrix_filter: map(select(.CUDA_VER | startswith("11") | not)) devcontainer: secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@branch-25.02 diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index e3bf5d16f..cf081d579 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -49,3 +49,6 @@ jobs: date: ${{ inputs.date }} sha: ${{ inputs.sha }} script: ci/test_wheel_cuvs.sh + # CUDA 11 wheel CI is disabled until + # https://github.com/rapidsai/build-planning/issues/137 is resolved. + matrix_filter: map(select(.CUDA_VER | startswith("11") | not)) From d6476f122e57d20e54cf1df6eb3a5b2baeefb175 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Wed, 22 Jan 2025 11:05:14 -0600 Subject: [PATCH 35/39] Use cuda.bindings layout. (#588) This PR updates cuVS to use the new cuda-python `cuda.bindings` layout. See https://github.com/rapidsai/build-planning/issues/117. Authors: - Bradley Dice (https://github.com/bdice) Approvers: - Dante Gama Dessavre (https://github.com/dantegd) - https://github.com/jakirkham URL: https://github.com/rapidsai/cuvs/pull/588 --- python/cuvs/cuvs/common/c_api.pxd | 2 +- python/cuvs/cuvs/common/resources.pyx | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/python/cuvs/cuvs/common/c_api.pxd b/python/cuvs/cuvs/common/c_api.pxd index f99fd5348..dae93d750 100644 --- a/python/cuvs/cuvs/common/c_api.pxd +++ b/python/cuvs/cuvs/common/c_api.pxd @@ -16,7 +16,7 @@ # cython: language_level=3 -from cuda.ccudart cimport cudaStream_t +from cuda.bindings.cyruntime cimport cudaStream_t from libc.stdint cimport uintptr_t diff --git a/python/cuvs/cuvs/common/resources.pyx b/python/cuvs/cuvs/common/resources.pyx index c0b72ae34..0edf53fc1 100644 --- a/python/cuvs/cuvs/common/resources.pyx +++ b/python/cuvs/cuvs/common/resources.pyx @@ -17,7 +17,7 @@ import functools -from cuda.ccudart cimport cudaStream_t +from cuda.bindings.cyruntime cimport cudaStream_t from cuvs.common.c_api cimport ( cuvsResources_t, From 43969ca29e054fb94820f518c047e8446e6730a0 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Wed, 22 Jan 2025 14:31:23 -0600 Subject: [PATCH 36/39] Revert "Temporarily skip CUDA 11 wheel CI" (#601) Reverts rapidsai/cuvs#599 now that https://github.com/rapidsai/raft/pull/2548 has landed. Authors: - Bradley Dice (https://github.com/bdice) Approvers: - James Lamb (https://github.com/jameslamb) URL: https://github.com/rapidsai/cuvs/pull/601 --- .github/workflows/pr.yaml | 3 --- .github/workflows/test.yaml | 3 --- 2 files changed, 6 deletions(-) diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index ca85c5c2e..91f51bd90 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -150,9 +150,6 @@ jobs: with: build_type: pull-request script: ci/test_wheel_cuvs.sh - # CUDA 11 wheel CI is disabled until - # https://github.com/rapidsai/build-planning/issues/137 is resolved. - matrix_filter: map(select(.CUDA_VER | startswith("11") | not)) devcontainer: secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@branch-25.02 diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index cf081d579..e3bf5d16f 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -49,6 +49,3 @@ jobs: date: ${{ inputs.date }} sha: ${{ inputs.sha }} script: ci/test_wheel_cuvs.sh - # CUDA 11 wheel CI is disabled until - # https://github.com/rapidsai/build-planning/issues/137 is resolved. - matrix_filter: map(select(.CUDA_VER | startswith("11") | not)) From 1c91e1f6bf38321fb1f57250bc32980ea0674138 Mon Sep 17 00:00:00 2001 From: rhdong Date: Wed, 22 Jan 2025 13:33:10 -0800 Subject: [PATCH 37/39] [Fix] l2_exp random fail in half-float32 mixed precision on self-neighboring (#596) Authors: - rhdong (https://github.com/rhdong) Approvers: - Ben Frederickson (https://github.com/benfred) URL: https://github.com/rapidsai/cuvs/pull/596 --- .../distance/detail/distance_ops/l2_exp.cuh | 34 ++++++++++++------- python/cuvs/cuvs/test/test_distance.py | 5 ++- 2 files changed, 23 insertions(+), 16 deletions(-) diff --git a/cpp/src/distance/detail/distance_ops/l2_exp.cuh b/cpp/src/distance/detail/distance_ops/l2_exp.cuh index 04817aa8b..f49771605 100644 --- a/cpp/src/distance/detail/distance_ops/l2_exp.cuh +++ b/cpp/src/distance/detail/distance_ops/l2_exp.cuh @@ -28,14 +28,14 @@ namespace cuvs::distance::detail::ops { * for round-off error tolerance. * @tparam DataT */ -template -__device__ constexpr DataT get_clamp_precision() +template +__device__ constexpr AccT get_clamp_precision() { switch (sizeof(DataT)) { - case 2: return 1e-3; - case 4: return 1e-6; - case 8: return 1e-15; - default: return 0; + case 2: return AccT{1e-3}; + case 4: return AccT{1e-6}; + case 8: return AccT{1e-15}; + default: return AccT{0}; } } @@ -46,19 +46,27 @@ struct l2_exp_cutlass_op { __device__ l2_exp_cutlass_op() noexcept : sqrt(false) {} __device__ l2_exp_cutlass_op(bool isSqrt) noexcept : sqrt(isSqrt) {} - inline __device__ AccT operator()(DataT aNorm, DataT bNorm, DataT accVal) const noexcept + inline __device__ AccT operator()(AccT aNorm, AccT bNorm, AccT accVal) const noexcept { - AccT outVal = aNorm + bNorm - DataT(2.0) * accVal; + AccT outVal = aNorm + bNorm - AccT(2.0) * accVal; /** * Self-neighboring points should have (aNorm == bNorm) == accVal and the dot product (accVal) * can sometimes have round-off errors, which will cause (aNorm == bNorm) ~ accVal instead. */ - outVal = outVal * AccT(!((outVal * outVal < get_clamp_precision()) * (aNorm == bNorm))); + outVal = + outVal * AccT(!((outVal * outVal < get_clamp_precision()) * (aNorm == bNorm))); return sqrt ? raft::sqrt(outVal * static_cast(outVal > AccT(0))) : outVal; } - __device__ AccT operator()(DataT aData) const noexcept { return aData; } + __device__ AccT operator()(DataT aData) const noexcept + { + if constexpr (std::is_same_v && std::is_same_v) { + return __half2float(aData); + } else { + return aData; + } + } }; /** @@ -121,9 +129,9 @@ struct l2_exp_distance_op { * (accVal) can sometimes have round-off errors, which will cause (aNorm == bNorm) ~ accVal * instead. */ - acc[i][j] = - val * static_cast((val > AccT(0))) * - static_cast(!((val * val < get_clamp_precision()) * (regxn[i] == regyn[j]))); + acc[i][j] = val * static_cast((val > AccT(0))) * + static_cast( + !((val * val < get_clamp_precision()) * (regxn[i] == regyn[j]))); } } if (sqrt) { diff --git a/python/cuvs/cuvs/test/test_distance.py b/python/cuvs/cuvs/test/test_distance.py index 483d5d201..370dd773a 100644 --- a/python/cuvs/cuvs/test/test_distance.py +++ b/python/cuvs/cuvs/test/test_distance.py @@ -21,6 +21,7 @@ from cuvs.distance import pairwise_distance +@pytest.mark.parametrize("times", range(20)) @pytest.mark.parametrize("n_rows", [50, 100]) @pytest.mark.parametrize("n_cols", [10, 50]) @pytest.mark.parametrize( @@ -43,7 +44,7 @@ @pytest.mark.parametrize("inplace", [True, False]) @pytest.mark.parametrize("order", ["F", "C"]) @pytest.mark.parametrize("dtype", [np.float32, np.float64, np.float16]) -def test_distance(n_rows, n_cols, inplace, order, metric, dtype): +def test_distance(n_rows, n_cols, inplace, order, metric, dtype, times): input1 = np.random.random_sample((n_rows, n_cols)) input1 = np.asarray(input1, order=order).astype(dtype) @@ -79,7 +80,5 @@ def test_distance(n_rows, n_cols, inplace, order, metric, dtype): actual = output_device.copy_to_host() tol = 1e-3 - if np.issubdtype(dtype, np.float16): - tol = 1e-1 assert np.allclose(expected, actual, atol=tol, rtol=tol) From b62b11aa73c5b39fe5ccd7328ccdcf468c4c2323 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Wed, 22 Jan 2025 21:53:22 -0600 Subject: [PATCH 38/39] introduce libcuvs wheels (#594) Contributes to https://github.com/rapidsai/build-planning/issues/33. Proposes packaging `libcuvs` as a wheel, which is then re-used by `cuvs-cu{11,12}`. Similar changes were recently made in RAFT: https://github.com/rapidsai/raft/pull/2531 As part of this, also proposes: * introducing a new CMake option, `CUVS_COMPILE_DYNAMIC_ONLY`, to allow building/installing only the dynamic shared library (i.e. skipping the static library) * enforcing `rapids-cmake`'s preferred CMake style (similar https://github.com/rapidsai/raft/pull/2531#discussion_r1917039870) * standardizing `clang` pins across the project, and pinning to `clang` 16 for Rust builds (https://github.com/rapidsai/cuvs/pull/594#discussion_r1924484839) ## Notes for Reviewers ### Benefits of these changes * smaller wheels for projects that depend on cuVS (they can dynamically link against `libcuvs` instead of statically linking in the pieces they need) * fewer CI resources used for cuVS wheels (no more re-compiling for every Python minor version) * faster, cheaper cuML wheel builds (https://github.com/rapidsai/cuml/pull/6199#discussion_r1920830933) * other benefits mentioned in https://github.com/rapidsai/build-planning/issues/33 ### Wheel contents `libcuvs`: * `libcuvs.so` and `libcuvs_c.so` (shared library) * cuVS headers * vendored dependencies (hnswlib) `cuvs`: * `cuvs` Python / Cython code and compiled Cython extensions ### Size changes (CUDA 12, Python 3.12, x86_64) | wheel | num files (before) | num files (this PR) | size (before) | size (this PR) | |:---------------:|------------------:|-----------------:|--------------:|---------------:| | `libcuvs` | --- | 67 | --- | 843M | | `cuvs` | 88 | 84 |845M | 2M | |**TOTAL** | **88** | **131** | **845M** | **845M** | *NOTES: size = compressed, "before" = 2025-01-22 nightlies*
how I calculated those (click me) * nightly commit = https://github.com/rapidsai/cuml/commit/7c715c494dff71274d0fdec774bdee12a7e78827 * PR = this PR ```shell docker run \ --rm \ --network host \ --env RAPIDS_NIGHTLY_DATE=2025-01-22 \ --env CUVS_NIGHTLY_SHA=f1de1b2 \ --env CUVS_PR="pull-request/594" \ --env CUVS_PR_SHA="97c56178cd0e07e4b6b138bb0904af78379f1bb3" \ --env RAPIDS_PY_CUDA_SUFFIX=cu12 \ --env WHEEL_DIR_BEFORE=/tmp/wheels-before \ --env WHEEL_DIR_AFTER=/tmp/wheels-after \ -it rapidsai/ci-wheel:cuda12.5.1-rockylinux8-py3.12 \ bash # --- nightly wheels --- # mkdir -p ./wheels-before export RAPIDS_BUILD_TYPE=branch export RAPIDS_REF_NAME="branch-25.02" # cuvs RAPIDS_PY_WHEEL_NAME="cuvs_${RAPIDS_PY_CUDA_SUFFIX}" \ RAPIDS_REPOSITORY=rapidsai/cuvs \ RAPIDS_SHA=${CUVS_NIGHTLY_SHA} \ rapids-download-wheels-from-s3 python ./wheels-before # --- wheels from CI --- # mkdir -p ./wheels-after export RAPIDS_BUILD_TYPE="pull-request" # libcuvs RAPIDS_PY_WHEEL_NAME="libcuvs_${RAPIDS_PY_CUDA_SUFFIX}" \ RAPIDS_REPOSITORY=rapidsai/cuvs \ RAPIDS_REF_NAME="${CUVS_PR}" \ RAPIDS_SHA="${CUVS_PR_SHA}" \ rapids-download-wheels-from-s3 cpp ./wheels-after # cuvs RAPIDS_PY_WHEEL_NAME="cuvs_${RAPIDS_PY_CUDA_SUFFIX}" \ RAPIDS_REPOSITORY=rapidsai/cuvs \ RAPIDS_REF_NAME="${CUVS_PR}" \ RAPIDS_SHA="${CUVS_PR_SHA}" \ rapids-download-wheels-from-s3 python ./wheels-after pip install pydistcheck pydistcheck \ --inspect \ --select 'distro-too-large-compressed' \ ./wheels-before/*.whl \ | grep -E '^checking|files: | compressed' \ > ./before.txt # get more exact sizes du -sh ./wheels-before/* pydistcheck \ --inspect \ --select 'distro-too-large-compressed' \ ./wheels-after/*.whl \ | grep -E '^checking|files: | compressed' \ > ./after.txt # get more exact sizes du -sh ./wheels-after/* ```
### How I tested this * https://github.com/rapidsai/devcontainers/pull/440 * https://github.com/rapidsai/cuml/pull/6199 Authors: - James Lamb (https://github.com/jameslamb) Approvers: - Bradley Dice (https://github.com/bdice) - Ben Frederickson (https://github.com/benfred) URL: https://github.com/rapidsai/cuvs/pull/594 --- .github/workflows/build.yaml | 24 ++++ .github/workflows/pr.yaml | 12 +- .pre-commit-config.yaml | 2 +- build.sh | 8 +- ci/build_wheel.sh | 34 +++--- ci/build_wheel_cuvs.sh | 23 ++-- ci/build_wheel_libcuvs.sh | 32 +++++ ci/check_style.sh | 7 ++ ci/release/update-version.sh | 4 +- ci/test_wheel_cuvs.sh | 9 +- ci/validate_wheel.sh | 12 -- .../all_cuda-118_arch-aarch64.yaml | 5 +- .../all_cuda-118_arch-x86_64.yaml | 5 +- .../all_cuda-125_arch-aarch64.yaml | 5 +- .../all_cuda-125_arch-x86_64.yaml | 5 +- .../bench_ann_cuda-118_arch-aarch64.yaml | 3 +- .../bench_ann_cuda-118_arch-x86_64.yaml | 3 +- .../bench_ann_cuda-125_arch-aarch64.yaml | 3 +- .../bench_ann_cuda-125_arch-x86_64.yaml | 3 +- cpp/CMakeLists.txt | 77 +++++++----- cpp/cmake/thirdparty/get_raft.cmake | 1 + dependencies.yaml | 110 +++++++++++++++--- examples/cpp/CMakeLists.txt | 22 +++- python/cuvs/CMakeLists.txt | 70 +---------- python/cuvs/cuvs/__init__.py | 11 ++ python/cuvs/cuvs/common/CMakeLists.txt | 2 +- python/cuvs/cuvs/distance/CMakeLists.txt | 2 +- python/cuvs/cuvs/neighbors/CMakeLists.txt | 2 +- .../cuvs/neighbors/brute_force/CMakeLists.txt | 3 +- .../cuvs/cuvs/neighbors/cagra/CMakeLists.txt | 2 +- .../cuvs/neighbors/filters/CMakeLists.txt | 2 +- .../cuvs/cuvs/neighbors/hnsw/CMakeLists.txt | 2 +- .../cuvs/neighbors/ivf_flat/CMakeLists.txt | 2 +- .../cuvs/cuvs/neighbors/ivf_pq/CMakeLists.txt | 2 +- python/cuvs/pyproject.toml | 19 ++- python/libcuvs/CMakeLists.txt | 69 +++++++++++ python/libcuvs/LICENSE | 1 + python/libcuvs/README.md | 1 + python/libcuvs/libcuvs/VERSION | 1 + python/libcuvs/libcuvs/__init__.py | 16 +++ python/libcuvs/libcuvs/_version.py | 33 ++++++ python/libcuvs/libcuvs/load.py | 100 ++++++++++++++++ python/libcuvs/pyproject.toml | 108 +++++++++++++++++ 43 files changed, 653 insertions(+), 204 deletions(-) create mode 100755 ci/build_wheel_libcuvs.sh create mode 100644 python/libcuvs/CMakeLists.txt create mode 120000 python/libcuvs/LICENSE create mode 120000 python/libcuvs/README.md create mode 120000 python/libcuvs/libcuvs/VERSION create mode 100644 python/libcuvs/libcuvs/__init__.py create mode 100644 python/libcuvs/libcuvs/_version.py create mode 100644 python/libcuvs/libcuvs/load.py create mode 100644 python/libcuvs/pyproject.toml diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index e93b7a694..59b8e00de 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -80,7 +80,30 @@ jobs: node_type: "gpu-v100-latest-1" run_script: "ci/build_docs.sh" sha: ${{ inputs.sha }} + wheel-build-libcuvs: + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.02 + with: + build_type: ${{ inputs.build_type || 'branch' }} + branch: ${{ inputs.branch }} + sha: ${{ inputs.sha }} + date: ${{ inputs.date }} + script: ci/build_wheel_libcuvs.sh + # build for every combination of arch and CUDA version, but only for the latest Python + matrix_filter: group_by([.ARCH, (.CUDA_VER|split(".")|map(tonumber)|.[0])]) | map(max_by(.PY_VER|split(".")|map(tonumber))) + wheel-publish-libcuvs: + needs: wheel-build-libcuvs + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-25.02 + with: + build_type: ${{ inputs.build_type || 'branch' }} + branch: ${{ inputs.branch }} + sha: ${{ inputs.sha }} + date: ${{ inputs.date }} + package-name: libcuvs + package-type: cpp wheel-build-cuvs: + needs: wheel-build-libcuvs secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.02 with: @@ -99,3 +122,4 @@ jobs: sha: ${{ inputs.sha }} date: ${{ inputs.date }} package-name: cuvs + package-type: python diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 91f51bd90..843439f26 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -22,6 +22,7 @@ jobs: - conda-python-tests - docs-build - rust-build + - wheel-build-libcuvs - wheel-build-cuvs - wheel-tests-cuvs - devcontainer @@ -135,10 +136,19 @@ jobs: arch: "amd64" container_image: "rapidsai/ci-conda:latest" run_script: "ci/build_rust.sh" - wheel-build-cuvs: + wheel-build-libcuvs: needs: checks secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.02 + with: + build_type: pull-request + script: ci/build_wheel_libcuvs.sh + # build for every combination of arch and CUDA version, but only for the latest Python + matrix_filter: group_by([.ARCH, (.CUDA_VER|split(".")|map(tonumber)|.[0])]) | map(max_by(.PY_VER|split(".")|map(tonumber))) + wheel-build-cuvs: + needs: wheel-build-libcuvs + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.02 with: build_type: pull-request script: ci/build_wheel_cuvs.sh diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index fcfc7e1fa..240f82be6 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -115,7 +115,7 @@ repos: cpp/cmake/modules/FindAVX\.cmake| - id: verify-alpha-spec - repo: https://github.com/rapidsai/dependency-file-generator - rev: v1.16.0 + rev: v1.17.0 hooks: - id: rapids-dependency-file-generator args: ["--clean"] diff --git a/build.sh b/build.sh index bd5fa649b..3b9a9a3a8 100755 --- a/build.sh +++ b/build.sh @@ -313,12 +313,6 @@ if [[ ${CMAKE_TARGET} == "" ]]; then CMAKE_TARGET="all" fi - -SKBUILD_EXTRA_CMAKE_ARGS="${EXTRA_CMAKE_ARGS}" -if [[ "${EXTRA_CMAKE_ARGS}" != *"DFIND_CUVS_CPP"* ]]; then - SKBUILD_EXTRA_CMAKE_ARGS="${SKBUILD_EXTRA_CMAKE_ARGS};-DFIND_CUVS_CPP=ON" -fi - # If clean given, run it prior to any other steps if (( ${CLEAN} == 1 )); then # If the dirs to clean are mounted dirs in a container, the @@ -434,7 +428,7 @@ fi # Build and (optionally) install the cuvs Python package if (( ${NUMARGS} == 0 )) || hasArg python; then - SKBUILD_CMAKE_ARGS="${SKBUILD_EXTRA_CMAKE_ARGS}" \ + SKBUILD_CMAKE_ARGS="${EXTRA_CMAKE_ARGS}" \ SKBUILD_BUILD_OPTIONS="-j${PARALLEL_LEVEL}" \ python -m pip install --no-build-isolation --no-deps --config-settings rapidsai.disable-cuda=true ${REPODIR}/python/cuvs fi diff --git a/ci/build_wheel.sh b/ci/build_wheel.sh index 4994374a8..c6f1232b3 100755 --- a/ci/build_wheel.sh +++ b/ci/build_wheel.sh @@ -1,10 +1,11 @@ #!/bin/bash -# Copyright (c) 2023-2024, NVIDIA CORPORATION. +# Copyright (c) 2023-2025, NVIDIA CORPORATION. set -euo pipefail package_name=$1 package_dir=$2 +package_type=$3 underscore_package_name=$(echo "${package_name}" | tr "-" "_") source rapids-configure-sccache @@ -16,21 +17,22 @@ rapids-generate-version > ./VERSION cd "${package_dir}" -case "${RAPIDS_CUDA_VERSION}" in - 12.*) - EXCLUDE_ARGS=( - --exclude "libcublas.so.12" - --exclude "libcublasLt.so.12" - --exclude "libcurand.so.10" - --exclude "libcusolver.so.11" - --exclude "libcusparse.so.12" - --exclude "libnvJitLink.so.12" +EXCLUDE_ARGS=( + --exclude "libraft.so" + --exclude "libcublas.so.*" + --exclude "libcublasLt.so.*" + --exclude "libcurand.so.*" + --exclude "libcusolver.so.*" + --exclude "libcusparse.so.*" + --exclude "libnvJitLink.so.*" +) + +if [[ "${package_dir}" != "python/libcuvs" ]]; then + EXCLUDE_ARGS+=( + --exclude "libcuvs_c.so" + --exclude "libcuvs.so" ) - ;; - 11.*) - EXCLUDE_ARGS=() - ;; -esac +fi rapids-logger "Building '${package_name}' wheel" @@ -48,4 +50,4 @@ sccache --show-adv-stats mkdir -p final_dist python -m auditwheel repair -w final_dist "${EXCLUDE_ARGS[@]}" dist/* -RAPIDS_PY_WHEEL_NAME="${underscore_package_name}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 python final_dist +RAPIDS_PY_WHEEL_NAME="${underscore_package_name}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 ${package_type} final_dist diff --git a/ci/build_wheel_cuvs.sh b/ci/build_wheel_cuvs.sh index 444657cc0..fb40d1459 100755 --- a/ci/build_wheel_cuvs.sh +++ b/ci/build_wheel_cuvs.sh @@ -1,21 +1,20 @@ #!/bin/bash -# Copyright (c) 2023-2024, NVIDIA CORPORATION. +# Copyright (c) 2023-2025, NVIDIA CORPORATION. set -euo pipefail package_dir="python/cuvs" -case "${RAPIDS_CUDA_VERSION}" in - 12.*) - EXTRA_CMAKE_ARGS=";-DUSE_CUDA_MATH_WHEELS=ON" - ;; - 11.*) - EXTRA_CMAKE_ARGS=";-DUSE_CUDA_MATH_WHEELS=OFF" - ;; -esac +RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" -# Set up skbuild options. Enable sccache in skbuild config options -export SKBUILD_CMAKE_ARGS="-DDETECT_CONDA_ENV=OFF;-DFIND_CUVS_CPP=OFF${EXTRA_CMAKE_ARGS}" +# Downloads libcuvs wheels from this current build, +# then ensures 'cuvs' wheel builds always use the 'libcuvs' just built in the same CI run. +# +# Using env variable PIP_CONSTRAINT is necessary to ensure the constraints +# are used when creating the isolated build environment. +RAPIDS_PY_WHEEL_NAME="libcuvs_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 cpp /tmp/libcuvs_dist +echo "libcuvs-${RAPIDS_PY_CUDA_SUFFIX} @ file://$(echo /tmp/libcuvs_dist/libcuvs_*.whl)" > /tmp/constraints.txt +export PIP_CONSTRAINT="/tmp/constraints.txt" -ci/build_wheel.sh cuvs ${package_dir} +ci/build_wheel.sh cuvs ${package_dir} python ci/validate_wheel.sh ${package_dir} final_dist diff --git a/ci/build_wheel_libcuvs.sh b/ci/build_wheel_libcuvs.sh new file mode 100755 index 000000000..148be89a2 --- /dev/null +++ b/ci/build_wheel_libcuvs.sh @@ -0,0 +1,32 @@ +#!/bin/bash +# Copyright (c) 2025, NVIDIA CORPORATION. + +set -euo pipefail + +package_name="libcuvs" +package_dir="python/libcuvs" + +rapids-logger "Generating build requirements" +matrix_selectors="cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION};cuda_suffixed=true" + +rapids-dependency-file-generator \ + --output requirements \ + --file-key "py_build_${package_name}" \ + --file-key "py_rapids_build_${package_name}" \ + --matrix "${matrix_selectors}" \ +| tee /tmp/requirements-build.txt + +rapids-logger "Installing build requirements" +python -m pip install \ + -v \ + --prefer-binary \ + -r /tmp/requirements-build.txt + +# build with '--no-build-isolation', for better sccache hit rate +# 0 really means "add --no-build-isolation" (ref: https://github.com/pypa/pip/issues/5735) +export PIP_NO_BUILD_ISOLATION=0 + +RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" + +ci/build_wheel.sh libcuvs ${package_dir} cpp +ci/validate_wheel.sh ${package_dir} final_dist libcuvs diff --git a/ci/check_style.sh b/ci/check_style.sh index c22f3f9f0..952e94bf1 100755 --- a/ci/check_style.sh +++ b/ci/check_style.sh @@ -14,5 +14,12 @@ rapids-dependency-file-generator \ rapids-mamba-retry env create --yes -f env.yaml -n checks conda activate checks +# get config for cmake-format checks +RAPIDS_VERSION_MAJOR_MINOR="$(rapids-version-major-minor)" +FORMAT_FILE_URL="https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-${RAPIDS_VERSION_MAJOR_MINOR}/cmake-format-rapids-cmake.json" +export RAPIDS_CMAKE_FORMAT_FILE=/tmp/rapids_cmake_ci/cmake-formats-rapids-cmake.json +mkdir -p $(dirname ${RAPIDS_CMAKE_FORMAT_FILE}) +wget -O ${RAPIDS_CMAKE_FORMAT_FILE} ${FORMAT_FILE_URL} + # Run pre-commit checks pre-commit run --all-files --show-diff-on-failure diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh index 4cf1f0617..7562035a9 100755 --- a/ci/release/update-version.sh +++ b/ci/release/update-version.sh @@ -44,8 +44,10 @@ echo "${NEXT_FULL_TAG}" > VERSION DEPENDENCIES=( dask-cuda cuvs - pylibraft + libcuvs + libraft librmm + pylibraft rmm rapids-dask-dependency ) diff --git a/ci/test_wheel_cuvs.sh b/ci/test_wheel_cuvs.sh index 7033003e9..862c69a3a 100755 --- a/ci/test_wheel_cuvs.sh +++ b/ci/test_wheel_cuvs.sh @@ -1,13 +1,16 @@ #!/bin/bash -# Copyright (c) 2023-2024, NVIDIA CORPORATION. +# Copyright (c) 2023-2025, NVIDIA CORPORATION. set -euo pipefail mkdir -p ./dist RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" -RAPIDS_PY_WHEEL_NAME="cuvs_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./dist +RAPIDS_PY_WHEEL_NAME="libcuvs_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 cpp ./local-libcuvs-dep +RAPIDS_PY_WHEEL_NAME="cuvs_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 python ./dist # echo to expand wildcard before adding `[extra]` requires for pip -python -m pip install $(echo ./dist/cuvs*.whl)[test] +python -m pip install \ + ./local-libcuvs-dep/libcuvs*.whl \ + "$(echo ./dist/cuvs*.whl)[test]" python -m pytest ./python/cuvs/cuvs/test diff --git a/ci/validate_wheel.sh b/ci/validate_wheel.sh index f2b235765..19d413fa2 100755 --- a/ci/validate_wheel.sh +++ b/ci/validate_wheel.sh @@ -8,24 +8,12 @@ wheel_dir_relative_path=$2 RAPIDS_CUDA_MAJOR="${RAPIDS_CUDA_VERSION%%.*}" -# some packages are much larger on CUDA 11 than on CUDA 12 -if [[ "${RAPIDS_CUDA_MAJOR}" == "11" ]]; then - PYDISTCHECK_ARGS=( - --max-allowed-size-compressed '1.4G' - ) -else - PYDISTCHECK_ARGS=( - --max-allowed-size-compressed '950M' - ) -fi - cd "${package_dir}" rapids-logger "validate packages with 'pydistcheck'" pydistcheck \ --inspect \ - "${PYDISTCHECK_ARGS[@]}" \ "$(echo ${wheel_dir_relative_path}/*.whl)" rapids-logger "validate packages with 'twine'" diff --git a/conda/environments/all_cuda-118_arch-aarch64.yaml b/conda/environments/all_cuda-118_arch-aarch64.yaml index 4c464ef4e..123acb421 100644 --- a/conda/environments/all_cuda-118_arch-aarch64.yaml +++ b/conda/environments/all_cuda-118_arch-aarch64.yaml @@ -9,8 +9,7 @@ channels: dependencies: - breathe>=4.35.0 - c-compiler -- clang -- clang-tools=16.0.6 +- clang-tools==16.0.6 - clang==16.0.6 - cmake>=3.26.4,!=3.30.0 - cuda-nvtx=11.8 @@ -26,7 +25,7 @@ dependencies: - gcc_linux-aarch64=11.* - graphviz - ipython -- libclang +- libclang==16.0.6 - libcublas-dev=11.11.3.6 - libcublas=11.11.3.6 - libcurand-dev=10.3.0.86 diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index 71cbeeaf3..c6a65e684 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -9,8 +9,7 @@ channels: dependencies: - breathe>=4.35.0 - c-compiler -- clang -- clang-tools=16.0.6 +- clang-tools==16.0.6 - clang==16.0.6 - cmake>=3.26.4,!=3.30.0 - cuda-nvtx=11.8 @@ -26,7 +25,7 @@ dependencies: - gcc_linux-64=11.* - graphviz - ipython -- libclang +- libclang==16.0.6 - libcublas-dev=11.11.3.6 - libcublas=11.11.3.6 - libcurand-dev=10.3.0.86 diff --git a/conda/environments/all_cuda-125_arch-aarch64.yaml b/conda/environments/all_cuda-125_arch-aarch64.yaml index 4fd08fa97..b71f5ed43 100644 --- a/conda/environments/all_cuda-125_arch-aarch64.yaml +++ b/conda/environments/all_cuda-125_arch-aarch64.yaml @@ -9,8 +9,7 @@ channels: dependencies: - breathe>=4.35.0 - c-compiler -- clang -- clang-tools=16.0.6 +- clang-tools==16.0.6 - clang==16.0.6 - cmake>=3.26.4,!=3.30.0 - cuda-cudart-dev @@ -27,7 +26,7 @@ dependencies: - gcc_linux-aarch64=13.* - graphviz - ipython -- libclang +- libclang==16.0.6 - libcublas-dev - libcurand-dev - libcusolver-dev diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml index de5f341fa..16cd595d3 100644 --- a/conda/environments/all_cuda-125_arch-x86_64.yaml +++ b/conda/environments/all_cuda-125_arch-x86_64.yaml @@ -9,8 +9,7 @@ channels: dependencies: - breathe>=4.35.0 - c-compiler -- clang -- clang-tools=16.0.6 +- clang-tools==16.0.6 - clang==16.0.6 - cmake>=3.26.4,!=3.30.0 - cuda-cudart-dev @@ -27,7 +26,7 @@ dependencies: - gcc_linux-64=13.* - graphviz - ipython -- libclang +- libclang==16.0.6 - libcublas-dev - libcurand-dev - libcusolver-dev diff --git a/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml b/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml index fb69ac251..2e2ad8446 100644 --- a/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml +++ b/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml @@ -9,7 +9,7 @@ channels: dependencies: - benchmark>=1.8.2 - c-compiler -- clang-tools=16.0.6 +- clang-tools==16.0.6 - clang==16.0.6 - click - cmake>=3.26.4,!=3.30.0 @@ -26,6 +26,7 @@ dependencies: - gcc_linux-aarch64=11.* - glog>=0.6.0 - h5py>=3.8.0 +- libclang==16.0.6 - libcublas-dev=11.11.3.6 - libcublas=11.11.3.6 - libcurand-dev=10.3.0.86 diff --git a/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml b/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml index 123033b08..90243415c 100644 --- a/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml +++ b/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml @@ -9,7 +9,7 @@ channels: dependencies: - benchmark>=1.8.2 - c-compiler -- clang-tools=16.0.6 +- clang-tools==16.0.6 - clang==16.0.6 - click - cmake>=3.26.4,!=3.30.0 @@ -26,6 +26,7 @@ dependencies: - gcc_linux-64=11.* - glog>=0.6.0 - h5py>=3.8.0 +- libclang==16.0.6 - libcublas-dev=11.11.3.6 - libcublas=11.11.3.6 - libcurand-dev=10.3.0.86 diff --git a/conda/environments/bench_ann_cuda-125_arch-aarch64.yaml b/conda/environments/bench_ann_cuda-125_arch-aarch64.yaml index fa2ae7955..34e01aeea 100644 --- a/conda/environments/bench_ann_cuda-125_arch-aarch64.yaml +++ b/conda/environments/bench_ann_cuda-125_arch-aarch64.yaml @@ -9,7 +9,7 @@ channels: dependencies: - benchmark>=1.8.2 - c-compiler -- clang-tools=16.0.6 +- clang-tools==16.0.6 - clang==16.0.6 - click - cmake>=3.26.4,!=3.30.0 @@ -27,6 +27,7 @@ dependencies: - gcc_linux-aarch64=13.* - glog>=0.6.0 - h5py>=3.8.0 +- libclang==16.0.6 - libcublas-dev - libcurand-dev - libcusolver-dev diff --git a/conda/environments/bench_ann_cuda-125_arch-x86_64.yaml b/conda/environments/bench_ann_cuda-125_arch-x86_64.yaml index 76b005e3c..dcfb54a22 100644 --- a/conda/environments/bench_ann_cuda-125_arch-x86_64.yaml +++ b/conda/environments/bench_ann_cuda-125_arch-x86_64.yaml @@ -9,7 +9,7 @@ channels: dependencies: - benchmark>=1.8.2 - c-compiler -- clang-tools=16.0.6 +- clang-tools==16.0.6 - clang==16.0.6 - click - cmake>=3.26.4,!=3.30.0 @@ -27,6 +27,7 @@ dependencies: - gcc_linux-64=13.* - glog>=0.6.0 - h5py>=3.8.0 +- libclang==16.0.6 - libcublas-dev - libcurand-dev - libcusolver-dev diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 26c0b82d3..11f21db44 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -68,6 +68,7 @@ option(CUDA_LOG_COMPILE_TIME "Write a log of compilation times to nvcc_compile_l option(DETECT_CONDA_ENV "Enable detection of conda environment for dependencies" ON) option(DISABLE_DEPRECATION_WARNINGS "Disable deprecaction warnings " ON) option(DISABLE_OPENMP "Disable OpenMP" OFF) +option(CUVS_COMPILE_DYNAMIC_ONLY "Only build the shared library and skip the static library." OFF) option(CUVS_NVTX "Enable nvtx markers" OFF) option(CUVS_RAFT_CLONE_ON_PIN "Explicitly clone RAFT branch when pinned to non-feature branch" ON) @@ -94,6 +95,7 @@ include(CMakeDependentOption) message(VERBOSE "cuVS: Build cuVS unit-tests: ${BUILD_TESTS}") message(VERBOSE "cuVS: Build CPU only components: ${BUILD_CPU_ONLY}") message(VERBOSE "cuVS: Build ANN benchmarks: ${BUILD_CUVS_BENCH}") +message(VERBOSE "cuVS: Build only the shared library: ${CUVS_COMPILE_DYNAMIC_ONLY}") message(VERBOSE "cuVS: Enable detection of conda environment for dependencies: ${DETECT_CONDA_ENV}") message(VERBOSE "cuVS: Disable depreaction warnings " ${DISABLE_DEPRECATION_WARNINGS}) message(VERBOSE "cuVS: Disable OpenMP: ${DISABLE_OPENMP}") @@ -493,7 +495,10 @@ if(BUILD_SHARED_LIBS) ) add_library(cuvs SHARED $,EXCLUDE,rmm.*logger>) - add_library(cuvs_static STATIC $,EXCLUDE,rmm.*logger>) + + if(NOT CUVS_COMPILE_DYNAMIC_ONLY) + add_library(cuvs_static STATIC $,EXCLUDE,rmm.*logger>) + endif() target_compile_options( cuvs INTERFACE $<$:--expt-extended-lambda @@ -501,20 +506,23 @@ if(BUILD_SHARED_LIBS) ) add_library(cuvs::cuvs ALIAS cuvs) - add_library(cuvs::cuvs_static ALIAS cuvs_static) - set_target_properties( - cuvs_static - PROPERTIES BUILD_RPATH "\$ORIGIN" - INSTALL_RPATH "\$ORIGIN" - CXX_STANDARD 17 - CXX_STANDARD_REQUIRED ON - POSITION_INDEPENDENT_CODE ON - INTERFACE_POSITION_INDEPENDENT_CODE ON - EXPORT_NAME cuvs_static - ) + if(NOT CUVS_COMPILE_DYNAMIC_ONLY) + add_library(cuvs::cuvs_static ALIAS cuvs_static) - target_compile_options(cuvs_static PRIVATE "$<$:${CUVS_CXX_FLAGS}>") + set_target_properties( + cuvs_static + PROPERTIES BUILD_RPATH "\$ORIGIN" + INSTALL_RPATH "\$ORIGIN" + CXX_STANDARD 17 + CXX_STANDARD_REQUIRED ON + POSITION_INDEPENDENT_CODE ON + INTERFACE_POSITION_INDEPENDENT_CODE ON + EXPORT_NAME cuvs_static + ) + + target_compile_options(cuvs_static PRIVATE "$<$:${CUVS_CXX_FLAGS}>") + endif() target_include_directories( cuvs_objs @@ -523,19 +531,21 @@ if(BUILD_SHARED_LIBS) INTERFACE "$" ) - target_include_directories( - cuvs_static - PUBLIC "$" - INTERFACE "$" - ) + if(NOT CUVS_COMPILE_DYNAMIC_ONLY) + target_include_directories( + cuvs_static + PUBLIC "$" + INTERFACE "$" + ) - # ensure CUDA symbols aren't relocated to the middle of the debug build binaries - target_link_options(cuvs_static PRIVATE $) + # ensure CUDA symbols aren't relocated to the middle of the debug build binaries + target_link_options(cuvs_static PRIVATE $) - target_include_directories( - cuvs_static PUBLIC "$" - "$" - ) + target_include_directories( + cuvs_static PUBLIC "$" + "$" + ) + endif() target_include_directories( cuvs PUBLIC "$" @@ -571,11 +581,13 @@ if(BUILD_SHARED_LIBS) cuvs-cagra-search ${CUVS_COMMS_DEPENDENCY} ) - target_link_libraries( - cuvs_static - PUBLIC rmm::rmm raft::raft ${CUVS_CTK_MATH_DEPENDENCIES} - PRIVATE nvidia::cutlass::cutlass $ - ) + if(NOT CUVS_COMPILE_DYNAMIC_ONLY) + target_link_libraries( + cuvs_static + PUBLIC rmm::rmm raft::raft ${CUVS_CTK_MATH_DEPENDENCIES} + PRIVATE nvidia::cutlass::cutlass $ + ) + endif() endif() if(BUILD_MG_ALGOS) @@ -718,8 +730,13 @@ target_compile_definitions(cuvs::cuvs INTERFACE $<$:NVTX_ENAB include(GNUInstallDirs) include(CPack) + set(_cuvs_lib_targets cuvs) + if(NOT CUVS_COMPILE_DYNAMIC_ONLY) + list(APPEND _cuvs_lib_targets cuvs_static) + endif() + install( - TARGETS cuvs cuvs_static + TARGETS ${_cuvs_lib_targets} DESTINATION ${lib_dir} COMPONENT cuvs EXPORT cuvs-exports diff --git a/cpp/cmake/thirdparty/get_raft.cmake b/cpp/cmake/thirdparty/get_raft.cmake index 2e57df84e..845c7a833 100644 --- a/cpp/cmake/thirdparty/get_raft.cmake +++ b/cpp/cmake/thirdparty/get_raft.cmake @@ -44,6 +44,7 @@ function(find_and_configure_raft) INSTALL_EXPORT_SET cuvs-exports COMPONENTS ${RAFT_COMPONENTS} CPM_ARGS + EXCLUDE_FROM_ALL TRUE GIT_REPOSITORY https://github.com/${PKG_FORK}/raft.git GIT_TAG ${PKG_PINNED_TAG} SOURCE_SUBDIR cpp diff --git a/dependencies.yaml b/dependencies.yaml index d23c118c0..478b2acc2 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -7,15 +7,16 @@ files: arch: [x86_64, aarch64] includes: - build + - build_cython - build_py_cuvs - build_wheels - checks + - clang - cuda - cuda_version - depends_on_cupy - depends_on_librmm - depends_on_pylibraft - - develop - docs - rapids_build - run_py_cuvs @@ -31,13 +32,15 @@ files: includes: - bench - bench_python + - build_cython - build_py_cuvs + - clang - cuda - cuda_version - depends_on_cupy - depends_on_pylibraft + - depends_on_libcuvs - depends_on_librmm - - develop - rapids_build - rapids_build_setuptools test_cpp: @@ -61,6 +64,7 @@ files: docs: output: none includes: + - clang - cuda - cuda_version - depends_on_cupy @@ -71,10 +75,37 @@ files: rust: output: none includes: + # clang/libclang only needed for bindgen support + - clang - cuda - cuda_version - rapids_build - rust + py_build_libcuvs: + output: pyproject + pyproject_dir: python/libcuvs + extras: + table: build-system + includes: + - build + py_rapids_build_libcuvs: + output: pyproject + pyproject_dir: python/libcuvs + extras: + table: tool.rapids-build-backend + key: requires + includes: + - depends_on_libraft + - depends_on_librmm + - rapids_build + py_run_libcuvs: + output: pyproject + pyproject_dir: python/libcuvs + extras: + table: project + includes: + - cuda_wheels + - depends_on_libraft py_build_cuvs: output: pyproject pyproject_dir: python/cuvs @@ -89,7 +120,11 @@ files: table: tool.rapids-build-backend key: requires includes: + - build_cython - build_py_cuvs + - depends_on_libcuvs + - depends_on_libraft + - depends_on_librmm - rapids_build py_run_cuvs: output: pyproject @@ -97,7 +132,6 @@ files: extras: table: project includes: - - cuda_wheels - depends_on_pylibraft - run_py_cuvs py_test_cuvs: @@ -149,12 +183,16 @@ dependencies: - output_types: [requirements, pyproject] packages: - scikit-build-core[pyproject]>=0.10.0 + build_cython: + common: + - output_types: [conda, requirements, pyproject] + packages: + - cython>=3.0.0 rapids_build: common: - output_types: [conda, requirements, pyproject] packages: - &cmake_ver cmake>=3.26.4,!=3.30.0 - - cython>=3.0.0 - ninja - output_types: [conda] packages: @@ -241,12 +279,13 @@ dependencies: - output_types: [conda, requirements] packages: - pre-commit - develop: + clang: common: - output_types: conda packages: - clang==16.0.6 - - clang-tools=16.0.6 + - clang-tools==16.0.6 + - libclang==16.0.6 cuda_version: specific: - output_types: conda @@ -363,13 +402,14 @@ dependencies: - nvidia-curand-cu12 - nvidia-cusolver-cu12 - nvidia-cusparse-cu12 - # CUDA 11 does not provide wheels, so use the system libraries instead - matrix: cuda: "11.*" use_cuda_wheels: "true" packages: - # if use_cuda_wheels=false is provided, do not add dependencies on any CUDA wheels - # (e.g. for DLFW and pip devcontainers) + - nvidia-cublas-cu11 + - nvidia-curand-cu11 + - nvidia-cusolver-cu11 + - nvidia-cusparse-cu11 - matrix: use_cuda_wheels: "false" packages: @@ -425,9 +465,6 @@ dependencies: packages: - make - rust - # clang/libclang only needed for bindgen support - - clang - - libclang build_wheels: common: - output_types: [requirements, pyproject] @@ -492,7 +529,6 @@ dependencies: - h5py>=3.8.0 - benchmark>=1.8.2 - openblas - - libcuvs==25.2.*,>=0.0.0a0 bench_python: common: - output_types: [conda, pyproject, requirements] @@ -502,6 +538,54 @@ dependencies: - matplotlib - pandas - pyyaml + depends_on_libcuvs: + common: + - output_types: conda + packages: + - &libcuvs_unsuffixed libcuvs==25.2.*,>=0.0.0a0 + - output_types: requirements + packages: + # pip recognizes the index as a global option for the requirements.txt file + - --extra-index-url=https://pypi.nvidia.com + - --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple + specific: + - output_types: [requirements, pyproject] + matrices: + - matrix: + cuda: "12.*" + cuda_suffixed: "true" + packages: + - libcuvs-cu12==25.2.*,>=0.0.0a0 + - matrix: + cuda: "11.*" + cuda_suffixed: "true" + packages: + - libcuvs-cu11==25.2.*,>=0.0.0a0 + - {matrix: null, packages: [*libcuvs_unsuffixed]} + depends_on_libraft: + common: + - output_types: conda + packages: + - &libraft_unsuffixed libraft==25.2.*,>=0.0.0a0 + - output_types: requirements + packages: + # pip recognizes the index as a global option for the requirements.txt file + - --extra-index-url=https://pypi.nvidia.com + - --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple + specific: + - output_types: [requirements, pyproject] + matrices: + - matrix: + cuda: "12.*" + cuda_suffixed: "true" + packages: + - libraft-cu12==25.2.*,>=0.0.0a0 + - matrix: + cuda: "11.*" + cuda_suffixed: "true" + packages: + - libraft-cu11==25.2.*,>=0.0.0a0 + - {matrix: null, packages: [*libraft_unsuffixed]} depends_on_librmm: common: - output_types: conda diff --git a/examples/cpp/CMakeLists.txt b/examples/cpp/CMakeLists.txt index 9554207bb..b0d0ae9ee 100644 --- a/examples/cpp/CMakeLists.txt +++ b/examples/cpp/CMakeLists.txt @@ -48,13 +48,23 @@ add_executable(VAMANA_EXAMPLE src/vamana_example.cu) add_library(rmm_logger OBJECT) target_link_libraries(rmm_logger PRIVATE rmm::rmm_logger_impl) -target_link_libraries(CAGRA_EXAMPLE PRIVATE cuvs::cuvs $ rmm_logger) target_link_libraries( - CAGRA_PERSISTENT_EXAMPLE PRIVATE cuvs::cuvs $ Threads::Threads rmm_logger + CAGRA_EXAMPLE PRIVATE cuvs::cuvs $ rmm_logger ) target_link_libraries( - DYNAMIC_BATCHING_EXAMPLE PRIVATE cuvs::cuvs $ Threads::Threads rmm_logger + CAGRA_PERSISTENT_EXAMPLE PRIVATE cuvs::cuvs $ Threads::Threads + rmm_logger +) +target_link_libraries( + DYNAMIC_BATCHING_EXAMPLE PRIVATE cuvs::cuvs $ Threads::Threads + rmm_logger +) +target_link_libraries( + IVF_PQ_EXAMPLE PRIVATE cuvs::cuvs $ rmm_logger +) +target_link_libraries( + IVF_FLAT_EXAMPLE PRIVATE cuvs::cuvs $ rmm_logger +) +target_link_libraries( + VAMANA_EXAMPLE PRIVATE cuvs::cuvs $ rmm_logger ) -target_link_libraries(IVF_PQ_EXAMPLE PRIVATE cuvs::cuvs $ rmm_logger) -target_link_libraries(IVF_FLAT_EXAMPLE PRIVATE cuvs::cuvs $ rmm_logger) -target_link_libraries(VAMANA_EXAMPLE PRIVATE cuvs::cuvs $ rmm_logger) diff --git a/python/cuvs/CMakeLists.txt b/python/cuvs/CMakeLists.txt index c0990995f..f3feae9a7 100644 --- a/python/cuvs/CMakeLists.txt +++ b/python/cuvs/CMakeLists.txt @@ -31,18 +31,6 @@ project( C CXX CUDA ) -# ################################################################################################## -# * User Options -------------------------------------------------------------- - -option(FIND_CUVS_CPP "Search for existing CUVS C++ installations before defaulting to local files" - OFF -) -option(USE_CUDA_MATH_WHEELS "Use the CUDA math wheels instead of the system libraries" OFF) - -message( - "CUVS_PY: Searching for existing cuVS C/C++ installations before defaulting to local files: ${FIND_CUVS_CPP}" -) - # ################################################################################################## # * Process User Options ------------------------------------------------------ @@ -54,56 +42,14 @@ include(rapids-find) rapids_cpm_init() -# If the user requested it we attempt to find CUVS. -if(FIND_CUVS_CPP) - find_package(cuvs "${RAPIDS_VERSION}" REQUIRED COMPONENTS c_api) - include(../../cpp/cmake/thirdparty/get_dlpack.cmake) -else() - set(cuvs_FOUND OFF) -endif() +# --- cuVS ---# +find_package(cuvs "${RAPIDS_VERSION}" REQUIRED COMPONENTS c_api) -if(NOT cuvs_FOUND) - find_package(CUDAToolkit REQUIRED) +# --- dlpack ---# +include(../../cpp/cmake/thirdparty/get_dlpack.cmake) - set(BUILD_TESTS OFF) - set(BUILD_C_LIBRARY ON) - - # Statically link dependencies if building wheels - set(CUDA_STATIC_RUNTIME ON) - set(CUDA_STATIC_MATH_LIBRARIES ON) - set(CUVS_USE_RAFT_STATIC ON) - - if(CUDAToolkit_VERSION VERSION_GREATER_EQUAL 12.0) - set(CUDA_STATIC_MATH_LIBRARIES OFF) - elseif(USE_CUDA_MATH_WHEELS) - message(FATAL_ERROR "Cannot use CUDA math wheels with CUDA < 12.0") - endif() - - add_subdirectory(../../cpp cuvs-cpp EXCLUDE_FROM_ALL) - - if(NOT CUDA_STATIC_MATH_LIBRARIES AND USE_CUDA_MATH_WHEELS) - set(rpaths - "$ORIGIN/../nvidia/cublas/lib" - "$ORIGIN/../nvidia/curand/lib" - "$ORIGIN/../nvidia/cusolver/lib" - "$ORIGIN/../nvidia/cusparse/lib" - "$ORIGIN/../nvidia/nvjitlink/lib" - ) - set_property( - TARGET cuvs - PROPERTY INSTALL_RPATH ${rpaths} - APPEND - ) - set_property( - TARGET cuvs_c - PROPERTY INSTALL_RPATH ${rpaths} - APPEND - ) - endif() - - set(cython_lib_dir cuvs) - install(TARGETS cuvs cuvs_c DESTINATION ${cython_lib_dir}) -endif() +# ensure Cython targets can find dlpack headers (these do not come installed with with cuVS) +target_include_directories(cuvs::cuvs INTERFACE "$") # ################################################################################################## # * Build Cython artifacts ----------------------------------------------------- @@ -116,7 +62,3 @@ target_link_libraries(cuvs_rmm_logger PRIVATE rmm::rmm_logger_impl) add_subdirectory(cuvs/common) add_subdirectory(cuvs/distance) add_subdirectory(cuvs/neighbors) - -if(DEFINED cython_lib_dir) - rapids_cython_add_rpath_entries(TARGET cuvs PATHS "${cython_lib_dir}") -endif() diff --git a/python/cuvs/cuvs/__init__.py b/python/cuvs/cuvs/__init__.py index 9f0481cb7..1a41f0d76 100644 --- a/python/cuvs/cuvs/__init__.py +++ b/python/cuvs/cuvs/__init__.py @@ -13,4 +13,15 @@ # limitations under the License. # +# If libcuvs was installed as a wheel, we must request it to load the library +# symbols. Otherwise, we assume that the library was installed in a system path that ld +# can find. +try: + import libcuvs +except ModuleNotFoundError: + pass +else: + libcuvs.load_library() + del libcuvs + from cuvs._version import __git_commit__, __version__ diff --git a/python/cuvs/cuvs/common/CMakeLists.txt b/python/cuvs/cuvs/common/CMakeLists.txt index 361f2fafc..b0e1cb335 100644 --- a/python/cuvs/cuvs/common/CMakeLists.txt +++ b/python/cuvs/cuvs/common/CMakeLists.txt @@ -20,7 +20,7 @@ set(linked_libraries cuvs::cuvs cuvs::c_api) rapids_cython_create_modules( CXX SOURCE_FILES "${cython_sources}" - LINKED_LIBRARIES "${linked_libraries}" ASSOCIATED_TARGETS cuvs MODULE_PREFIX common_ + LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX common_ ) foreach(tgt IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) diff --git a/python/cuvs/cuvs/distance/CMakeLists.txt b/python/cuvs/cuvs/distance/CMakeLists.txt index 514b08c43..ded07395c 100644 --- a/python/cuvs/cuvs/distance/CMakeLists.txt +++ b/python/cuvs/cuvs/distance/CMakeLists.txt @@ -20,7 +20,7 @@ set(linked_libraries cuvs::cuvs cuvs::c_api) rapids_cython_create_modules( CXX SOURCE_FILES "${cython_sources}" - LINKED_LIBRARIES "${linked_libraries}" ASSOCIATED_TARGETS cuvs MODULE_PREFIX distance_ + LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX distance_ ) foreach(tgt IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) diff --git a/python/cuvs/cuvs/neighbors/CMakeLists.txt b/python/cuvs/cuvs/neighbors/CMakeLists.txt index 031fd485e..b9161eefc 100644 --- a/python/cuvs/cuvs/neighbors/CMakeLists.txt +++ b/python/cuvs/cuvs/neighbors/CMakeLists.txt @@ -27,7 +27,7 @@ set(linked_libraries cuvs::cuvs cuvs::c_api) rapids_cython_create_modules( CXX SOURCE_FILES "${cython_sources}" - LINKED_LIBRARIES "${linked_libraries}" ASSOCIATED_TARGETS cuvs MODULE_PREFIX neighbors_refine_ + LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX neighbors_refine_ ) foreach(tgt IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) diff --git a/python/cuvs/cuvs/neighbors/brute_force/CMakeLists.txt b/python/cuvs/cuvs/neighbors/brute_force/CMakeLists.txt index 61eda649c..3c646f498 100644 --- a/python/cuvs/cuvs/neighbors/brute_force/CMakeLists.txt +++ b/python/cuvs/cuvs/neighbors/brute_force/CMakeLists.txt @@ -20,8 +20,7 @@ set(linked_libraries cuvs::cuvs cuvs::c_api) rapids_cython_create_modules( CXX SOURCE_FILES "${cython_sources}" - LINKED_LIBRARIES "${linked_libraries}" ASSOCIATED_TARGETS cuvs MODULE_PREFIX - neighbors_brute_force_ + LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX neighbors_brute_force_ ) foreach(tgt IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) diff --git a/python/cuvs/cuvs/neighbors/cagra/CMakeLists.txt b/python/cuvs/cuvs/neighbors/cagra/CMakeLists.txt index 1f40daab2..6cf0956a2 100644 --- a/python/cuvs/cuvs/neighbors/cagra/CMakeLists.txt +++ b/python/cuvs/cuvs/neighbors/cagra/CMakeLists.txt @@ -20,7 +20,7 @@ set(linked_libraries cuvs::cuvs cuvs::c_api) rapids_cython_create_modules( CXX SOURCE_FILES "${cython_sources}" - LINKED_LIBRARIES "${linked_libraries}" ASSOCIATED_TARGETS cuvs MODULE_PREFIX neighbors_cagra_ + LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX neighbors_cagra_ ) foreach(tgt IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) diff --git a/python/cuvs/cuvs/neighbors/filters/CMakeLists.txt b/python/cuvs/cuvs/neighbors/filters/CMakeLists.txt index a678852d9..43e008363 100644 --- a/python/cuvs/cuvs/neighbors/filters/CMakeLists.txt +++ b/python/cuvs/cuvs/neighbors/filters/CMakeLists.txt @@ -20,7 +20,7 @@ set(linked_libraries cuvs::cuvs cuvs::c_api) rapids_cython_create_modules( CXX SOURCE_FILES "${cython_sources}" - LINKED_LIBRARIES "${linked_libraries}" ASSOCIATED_TARGETS cuvs MODULE_PREFIX neighbors_prefilter_ + LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX neighbors_prefilter_ ) foreach(tgt IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) diff --git a/python/cuvs/cuvs/neighbors/hnsw/CMakeLists.txt b/python/cuvs/cuvs/neighbors/hnsw/CMakeLists.txt index 8351916e6..c33313c3c 100644 --- a/python/cuvs/cuvs/neighbors/hnsw/CMakeLists.txt +++ b/python/cuvs/cuvs/neighbors/hnsw/CMakeLists.txt @@ -20,7 +20,7 @@ set(linked_libraries cuvs::cuvs cuvs::c_api) rapids_cython_create_modules( CXX SOURCE_FILES "${cython_sources}" - LINKED_LIBRARIES "${linked_libraries}" ASSOCIATED_TARGETS cuvs MODULE_PREFIX neighbors_hnsw_ + LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX neighbors_hnsw_ ) foreach(tgt IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) diff --git a/python/cuvs/cuvs/neighbors/ivf_flat/CMakeLists.txt b/python/cuvs/cuvs/neighbors/ivf_flat/CMakeLists.txt index f5663cdaa..eadb8934c 100644 --- a/python/cuvs/cuvs/neighbors/ivf_flat/CMakeLists.txt +++ b/python/cuvs/cuvs/neighbors/ivf_flat/CMakeLists.txt @@ -20,7 +20,7 @@ set(linked_libraries cuvs::cuvs cuvs::c_api) rapids_cython_create_modules( CXX SOURCE_FILES "${cython_sources}" - LINKED_LIBRARIES "${linked_libraries}" ASSOCIATED_TARGETS cuvs MODULE_PREFIX neighbors_ivf_flat_ + LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX neighbors_ivf_flat_ ) foreach(tgt IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) diff --git a/python/cuvs/cuvs/neighbors/ivf_pq/CMakeLists.txt b/python/cuvs/cuvs/neighbors/ivf_pq/CMakeLists.txt index a24320ded..df61793b8 100644 --- a/python/cuvs/cuvs/neighbors/ivf_pq/CMakeLists.txt +++ b/python/cuvs/cuvs/neighbors/ivf_pq/CMakeLists.txt @@ -20,7 +20,7 @@ set(linked_libraries cuvs::cuvs cuvs::c_api) rapids_cython_create_modules( CXX SOURCE_FILES "${cython_sources}" - LINKED_LIBRARIES "${linked_libraries}" ASSOCIATED_TARGETS cuvs MODULE_PREFIX neighbors_pq_ + LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX neighbors_pq_ ) foreach(tgt IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) diff --git a/python/cuvs/pyproject.toml b/python/cuvs/pyproject.toml index 155e454a8..30658623b 100644 --- a/python/cuvs/pyproject.toml +++ b/python/cuvs/pyproject.toml @@ -33,10 +33,6 @@ requires-python = ">=3.10" dependencies = [ "cuda-python", "numpy>=1.23,<3.0a0", - "nvidia-cublas", - "nvidia-curand", - "nvidia-cusolver", - "nvidia-cusparse", "pylibraft==25.2.*,>=0.0.0a0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. classifiers = [ @@ -59,12 +55,6 @@ test = [ Homepage = "https://github.com/rapidsai/cuvs" Documentation = "https://docs.rapids.ai/api/cuvs/stable/" -[tool.setuptools] -license-files = ["LICENSE"] - -[tool.setuptools.dynamic] -version = {file = "cuvs/VERSION"} - [tool.isort] line_length = 79 multi_line_output = 3 @@ -127,18 +117,23 @@ requires = [ "cmake>=3.26.4,!=3.30.0", "cuda-python", "cython>=3.0.0", + "libcuvs==25.2.*,>=0.0.0a0", + "libraft==25.2.*,>=0.0.0a0", + "librmm==25.2.*,>=0.0.0a0", "ninja", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. build-backend = "scikit_build_core.build" dependencies-file = "../../dependencies.yaml" -matrix-entry = "cuda_suffixed=true;use_cuda_wheels=true" +matrix-entry = "cuda_suffixed=true" [tool.pydistcheck] select = [ - # NOTE: size threshold is managed via CLI args in CI scripts "distro-too-large-compressed", ] +# PyPI limit is 100 MiB, fail CI before we get too close to that +max_allowed_size_compressed = '75M' + [tool.pytest.ini_options] filterwarnings = [ "error", diff --git a/python/libcuvs/CMakeLists.txt b/python/libcuvs/CMakeLists.txt new file mode 100644 index 000000000..569652b71 --- /dev/null +++ b/python/libcuvs/CMakeLists.txt @@ -0,0 +1,69 @@ +# ============================================================================= +# Copyright (c) 2025, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations under +# the License. +# ============================================================================= + +cmake_minimum_required(VERSION 3.26.4 FATAL_ERROR) + +include(../../rapids_config.cmake) + +include(rapids-cuda) +rapids_cuda_init_architectures(libcuvs-python) + +project( + libcuvs-python + VERSION "${RAPIDS_VERSION}" + LANGUAGES CXX CUDA +) + +# Check if cuVS is already available. If so, it is the user's responsibility to ensure that the +# CMake package is also available at build time of the Python cuvs package. +find_package(cuvs "${RAPIDS_VERSION}") + +if(cuvs_FOUND) + return() +endif() + +unset(cuvs_FOUND) + +# --- CUDA --- # +set(CUDA_STATIC_RUNTIME ON) +set(CUDA_STATIC_MATH_LIBRARIES OFF) + +# --- RAFT ---# +set(CUVS_USE_RAFT_STATIC OFF) + +# --- cuVS ---# +set(BUILD_TESTS OFF) +set(BUILD_C_LIBRARY ON) +set(CUVS_COMPILE_DYNAMIC_ONLY ON) + +add_subdirectory(../../cpp cuvs-cpp) + +# assumes libcuvs.so is installed 2 levels deep, e.g. site-packages/libcuvs/lib64/libcuvs.so +set(rpaths + "$ORIGIN/../../nvidia/cublas/lib" + "$ORIGIN/../../nvidia/curand/lib" + "$ORIGIN/../../nvidia/cusolver/lib" + "$ORIGIN/../../nvidia/cusparse/lib" + "$ORIGIN/../../nvidia/nvjitlink/lib" +) +set_property( + TARGET cuvs + PROPERTY INSTALL_RPATH ${rpaths} + APPEND +) +set_property( + TARGET cuvs_c + PROPERTY INSTALL_RPATH ${rpaths} + APPEND +) diff --git a/python/libcuvs/LICENSE b/python/libcuvs/LICENSE new file mode 120000 index 000000000..30cff7403 --- /dev/null +++ b/python/libcuvs/LICENSE @@ -0,0 +1 @@ +../../LICENSE \ No newline at end of file diff --git a/python/libcuvs/README.md b/python/libcuvs/README.md new file mode 120000 index 000000000..fe8400541 --- /dev/null +++ b/python/libcuvs/README.md @@ -0,0 +1 @@ +../../README.md \ No newline at end of file diff --git a/python/libcuvs/libcuvs/VERSION b/python/libcuvs/libcuvs/VERSION new file mode 120000 index 000000000..d62dc733e --- /dev/null +++ b/python/libcuvs/libcuvs/VERSION @@ -0,0 +1 @@ +../../../VERSION \ No newline at end of file diff --git a/python/libcuvs/libcuvs/__init__.py b/python/libcuvs/libcuvs/__init__.py new file mode 100644 index 000000000..2d3a86015 --- /dev/null +++ b/python/libcuvs/libcuvs/__init__.py @@ -0,0 +1,16 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from libcuvs._version import __git_commit__, __version__ +from libcuvs.load import load_library diff --git a/python/libcuvs/libcuvs/_version.py b/python/libcuvs/libcuvs/_version.py new file mode 100644 index 000000000..530bf8bea --- /dev/null +++ b/python/libcuvs/libcuvs/_version.py @@ -0,0 +1,33 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import importlib.resources + +__version__ = ( + importlib.resources.files(__package__) + .joinpath("VERSION") + .read_text() + .strip() +) +try: + __git_commit__ = ( + importlib.resources.files(__package__) + .joinpath("GIT_COMMIT") + .read_text() + .strip() + ) +except FileNotFoundError: + __git_commit__ = "" + +__all__ = ["__git_commit__", "__version__"] diff --git a/python/libcuvs/libcuvs/load.py b/python/libcuvs/libcuvs/load.py new file mode 100644 index 000000000..a9c6a9325 --- /dev/null +++ b/python/libcuvs/libcuvs/load.py @@ -0,0 +1,100 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import ctypes +import os + +# Loading with RTLD_LOCAL adds the library itself to the loader's +# loaded library cache without loading any symbols into the global +# namespace. This allows libraries that express a dependency on +# this library to be loaded later and successfully satisfy this dependency +# without polluting the global symbol table with symbols from +# libcuvs that could conflict with symbols from other DSOs. +PREFERRED_LOAD_FLAG = ctypes.RTLD_LOCAL + + +def _load_system_installation(soname: str): + """Try to dlopen() the library indicated by ``soname`` + Raises ``OSError`` if library cannot be loaded. + """ + return ctypes.CDLL(soname, PREFERRED_LOAD_FLAG) + + +def _load_wheel_installation(soname: str): + """Try to dlopen() the library indicated by ``soname`` + Returns ``None`` if the library cannot be loaded. + """ + if os.path.isfile( + lib := os.path.join(os.path.dirname(__file__), "lib64", soname) + ): + return ctypes.CDLL(lib, PREFERRED_LOAD_FLAG) + return None + + +def load_library(): + """Dynamically load libcuvs.so and its dependencies""" + try: + # libraft must be loaded before libcuvs because libcuvs + # references its symbols + import libraft + + libraft.load_library() + except ModuleNotFoundError: + # 'libcuvs' has a runtime dependency on 'libraft'. However, + # that dependency might be satisfied by the 'libraft' conda package + # (which does not have any Python modules), instead of the + # 'libraft' wheel. + # + # In that situation, assume that 'libraft.so' is in a place where + # the loader can find it. + pass + + prefer_system_installation = ( + os.getenv("RAPIDS_LIBCUVS_PREFER_SYSTEM_LIBRARY", "false").lower() + != "false" + ) + + libs_to_return = [] + for soname in ["libcuvs.so", "libcuvs_c.so"]: + libcuvs_lib = None + if prefer_system_installation: + # Prefer a system library if one is present to + # avoid clobbering symbols that other packages might expect, + # but if no other library is present use the one in the wheel. + try: + libcuvs_lib = _load_system_installation(soname) + except OSError: + libcuvs_lib = _load_wheel_installation(soname) + else: + # Prefer the libraries bundled in this package. If they aren't + # found (which might be the case in builds where the library was + # prebuilt before packaging the wheel), look for a system + # installation. + try: + libcuvs_lib = _load_wheel_installation(soname) + if libcuvs_lib is None: + libcuvs_lib = _load_system_installation(soname) + except OSError: + # If none of the searches above succeed, just silently return + # None and rely on other mechanisms (like RPATHs on other DSOs) + # to help the loader find the library. + pass + if libcuvs_lib: + libs_to_return.append(libcuvs_lib) + + # The caller almost never needs to do anything with this library, but no + # harm in offering the option since this object at least provides a handle + # to inspect where libcuvs was loaded from. + return libs_to_return diff --git a/python/libcuvs/pyproject.toml b/python/libcuvs/pyproject.toml new file mode 100644 index 000000000..28443b782 --- /dev/null +++ b/python/libcuvs/pyproject.toml @@ -0,0 +1,108 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +[build-system] +requires = [ + "rapids-build-backend>=0.3.0,<0.4.0.dev0", + "scikit-build-core[pyproject]>=0.10.0", +] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. +build-backend = "rapids_build_backend.build" + +[project] +name = "libcuvs" +dynamic = ["version"] +description = "cuVS: Vector Search on the GPU (C++)" +readme = { file = "README.md", content-type = "text/markdown" } +authors = [ + { name = "NVIDIA Corporation" }, +] +license = { text = "Apache 2.0" } +requires-python = ">=3.10" +dependencies = [ + "libraft==25.2.*,>=0.0.0a0", + "nvidia-cublas", + "nvidia-curand", + "nvidia-cusolver", + "nvidia-cusparse", +] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. +classifiers = [ + "Intended Audience :: Developers", +] + +[project.urls] +Homepage = "https://github.com/rapidsai/cuvs" +Documentation = "https://docs.rapids.ai/api/cuvs/stable/" + +[project.entry-points."cmake.prefix"] +libcuvs = "libcuvs" + +[tool.isort] +line_length = 79 +multi_line_output = 3 +include_trailing_comma = true +force_grid_wrap = 0 +combine_as_imports = true +order_by_type = true +known_first_party = [ + "libcuvs", +] +skip = [ + "thirdparty", + ".eggs", + ".git", + ".hg", + ".mypy_cache", + ".tox", + ".venv", + "_build", + "buck-out", + "build", + "dist", + "__init__.py", +] + +[tool.scikit-build] +build-dir = "build/{wheel_tag}" +cmake.build-type = "Release" +cmake.version = "CMakeLists.txt" +minimum-version = "build-system.requires" +ninja.make-fallback = true +sdist.reproducible = true +wheel.install-dir = "libcuvs" +wheel.packages = ["libcuvs"] +wheel.py-api = "py3" + +[tool.scikit-build.metadata.version] +provider = "scikit_build_core.metadata.regex" +input = "libcuvs/VERSION" +regex = "(?P.*)" + +[tool.rapids-build-backend] +build-backend = "scikit_build_core.build" +requires = [ + "cmake>=3.26.4,!=3.30.0", + "libraft==25.2.*,>=0.0.0a0", + "librmm==25.2.*,>=0.0.0a0", + "ninja", +] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. +dependencies-file = "../../dependencies.yaml" +matrix-entry = "cuda_suffixed=true;use_cuda_wheels=true" + +[tool.pydistcheck] +select = [ + "distro-too-large-compressed", +] + +# detect when package size grows significantly +max_allowed_size_compressed = '1.1G' From 80370a1220af0928d69ca96a66e6b6ff1220bab7 Mon Sep 17 00:00:00 2001 From: tsuki <12711693+enp1s0@users.noreply.github.com> Date: Fri, 24 Jan 2025 02:35:08 +0900 Subject: [PATCH 39/39] Improve the performance of CAGRA new vector addition with the default params (#569) This PR updates the default chunk size of the CAGRA graph extension and also adds a knob to control the batch size of the CAGRA searches run inside for better throughput. The default chunk size was set to 1 in the current implementation because there is a potential problem with low recall when the chunk size is large, because no edges are made within nodes in the same chunk. However, as I have investigated, the low recall problem rarely occurs with large chunk sizes. # Search performance The performance was measured after applying a bugfix https://github.com/rapidsai/cuvs/pull/565 ## degree = 32 ![extend-ir0 9-degree32](https://github.com/user-attachments/assets/a5bb2fb6-8c12-49ad-b96a-1b384d79a96b) (I don't know the reason the performance is unstable in NYTimes.) ## degree = 64 ![extend-ir0 9-degree64](https://github.com/user-attachments/assets/8e926e1c-d772-4682-9419-9cc027f09d3f) So I increase the default chunk size to the size of the new dataset vectors for better throughput in this PR. I also make public a knob to control the search batch size in the `extend' function to control the balance between throughput and memory consumption. Authors: - tsuki (https://github.com/enp1s0) - Corey J. Nolet (https://github.com/cjnolet) Approvers: - Corey J. Nolet (https://github.com/cjnolet) - Tamas Bela Feher (https://github.com/tfeher) URL: https://github.com/rapidsai/cuvs/pull/569 --- cpp/src/neighbors/detail/cagra/add_nodes.cuh | 34 ++++++++++++++------ 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/cpp/src/neighbors/detail/cagra/add_nodes.cuh b/cpp/src/neighbors/detail/cagra/add_nodes.cuh index 358b7643e..453928992 100644 --- a/cpp/src/neighbors/detail/cagra/add_nodes.cuh +++ b/cpp/src/neighbors/detail/cagra/add_nodes.cuh @@ -37,7 +37,8 @@ void add_node_core( const cuvs::neighbors::cagra::index& idx, raft::mdspan, raft::layout_stride, Accessor> additional_dataset_view, - raft::host_matrix_view updated_graph) + raft::host_matrix_view updated_graph, + const cuvs::neighbors::cagra::extend_params& extend_params) { using DistanceT = float; const std::size_t degree = idx.graph_degree(); @@ -68,7 +69,19 @@ void add_node_core( new_size, raft::resource::get_cuda_stream(handle)); - const std::size_t max_chunk_size = 1024; + std::size_t data_size_per_vector = + sizeof(IdxT) * base_degree + sizeof(DistanceT) * base_degree + sizeof(T) * dim; + cudaPointerAttributes attr; + RAFT_CUDA_TRY(cudaPointerGetAttributes(&attr, additional_dataset_view.data_handle())); + if (attr.devicePointer == nullptr) { + // for batch_load_iterator + data_size_per_vector += sizeof(T) * dim; + } + + const std::size_t max_search_batch_size = + std::min(std::max(1lu, raft::resource::get_workspace_free_bytes(handle) / data_size_per_vector), + num_add); + RAFT_EXPECTS(max_search_batch_size > 0, "No enough working memory space is left."); cuvs::neighbors::cagra::search_params params; params.itopk_size = std::max(base_degree * 2lu, 256lu); @@ -77,24 +90,24 @@ void add_node_core( auto mr = raft::resource::get_workspace_resource(handle); auto neighbor_indices = raft::make_device_mdarray( - handle, mr, raft::make_extents(max_chunk_size, base_degree)); + handle, mr, raft::make_extents(max_search_batch_size, base_degree)); auto neighbor_distances = raft::make_device_mdarray( - handle, mr, raft::make_extents(max_chunk_size, base_degree)); + handle, mr, raft::make_extents(max_search_batch_size, base_degree)); auto queries = raft::make_device_mdarray( - handle, mr, raft::make_extents(max_chunk_size, dim)); + handle, mr, raft::make_extents(max_search_batch_size, dim)); auto host_neighbor_indices = - raft::make_host_matrix(max_chunk_size, base_degree); + raft::make_host_matrix(max_search_batch_size, base_degree); cuvs::spatial::knn::detail::utils::batch_load_iterator additional_dataset_batch( additional_dataset_view.data_handle(), num_add, additional_dataset_view.stride(0), - max_chunk_size, + max_search_batch_size, raft::resource::get_cuda_stream(handle), - raft::resource::get_workspace_resource(handle)); + mr); for (const auto& batch : additional_dataset_batch) { // Step 1: Obtain K (=base_degree) nearest neighbors of the new vectors by CAGRA search // Create queries @@ -254,7 +267,8 @@ void add_graph_nodes( const std::size_t degree = index.graph_degree(); const std::size_t dim = index.dim(); const std::size_t stride = input_updated_dataset_view.stride(0); - const std::size_t max_chunk_size_ = params.max_chunk_size == 0 ? 1 : params.max_chunk_size; + const std::size_t max_chunk_size_ = + params.max_chunk_size == 0 ? new_dataset_size : params.max_chunk_size; raft::copy(updated_graph_view.data_handle(), index.graph().data_handle(), @@ -298,7 +312,7 @@ void add_graph_nodes( stride); neighbors::cagra::add_node_core( - handle, internal_index, additional_dataset_view, updated_graph); + handle, internal_index, additional_dataset_view, updated_graph, params); raft::resource::sync_stream(handle); } }