diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index e93b7a694..59b8e00de 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -80,7 +80,30 @@ jobs: node_type: "gpu-v100-latest-1" run_script: "ci/build_docs.sh" sha: ${{ inputs.sha }} + wheel-build-libcuvs: + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.02 + with: + build_type: ${{ inputs.build_type || 'branch' }} + branch: ${{ inputs.branch }} + sha: ${{ inputs.sha }} + date: ${{ inputs.date }} + script: ci/build_wheel_libcuvs.sh + # build for every combination of arch and CUDA version, but only for the latest Python + matrix_filter: group_by([.ARCH, (.CUDA_VER|split(".")|map(tonumber)|.[0])]) | map(max_by(.PY_VER|split(".")|map(tonumber))) + wheel-publish-libcuvs: + needs: wheel-build-libcuvs + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-25.02 + with: + build_type: ${{ inputs.build_type || 'branch' }} + branch: ${{ inputs.branch }} + sha: ${{ inputs.sha }} + date: ${{ inputs.date }} + package-name: libcuvs + package-type: cpp wheel-build-cuvs: + needs: wheel-build-libcuvs secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.02 with: @@ -99,3 +122,4 @@ jobs: sha: ${{ inputs.sha }} date: ${{ inputs.date }} package-name: cuvs + package-type: python diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 91f51bd90..843439f26 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -22,6 +22,7 @@ jobs: - conda-python-tests - docs-build - rust-build + - wheel-build-libcuvs - wheel-build-cuvs - wheel-tests-cuvs - devcontainer @@ -135,10 +136,19 @@ jobs: arch: "amd64" container_image: "rapidsai/ci-conda:latest" run_script: "ci/build_rust.sh" - wheel-build-cuvs: + wheel-build-libcuvs: needs: checks secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.02 + with: + build_type: pull-request + script: ci/build_wheel_libcuvs.sh + # build for every combination of arch and CUDA version, but only for the latest Python + matrix_filter: group_by([.ARCH, (.CUDA_VER|split(".")|map(tonumber)|.[0])]) | map(max_by(.PY_VER|split(".")|map(tonumber))) + wheel-build-cuvs: + needs: wheel-build-libcuvs + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.02 with: build_type: pull-request script: ci/build_wheel_cuvs.sh diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index fcfc7e1fa..240f82be6 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -115,7 +115,7 @@ repos: cpp/cmake/modules/FindAVX\.cmake| - id: verify-alpha-spec - repo: https://github.com/rapidsai/dependency-file-generator - rev: v1.16.0 + rev: v1.17.0 hooks: - id: rapids-dependency-file-generator args: ["--clean"] diff --git a/build.sh b/build.sh index bd5fa649b..3b9a9a3a8 100755 --- a/build.sh +++ b/build.sh @@ -313,12 +313,6 @@ if [[ ${CMAKE_TARGET} == "" ]]; then CMAKE_TARGET="all" fi - -SKBUILD_EXTRA_CMAKE_ARGS="${EXTRA_CMAKE_ARGS}" -if [[ "${EXTRA_CMAKE_ARGS}" != *"DFIND_CUVS_CPP"* ]]; then - SKBUILD_EXTRA_CMAKE_ARGS="${SKBUILD_EXTRA_CMAKE_ARGS};-DFIND_CUVS_CPP=ON" -fi - # If clean given, run it prior to any other steps if (( ${CLEAN} == 1 )); then # If the dirs to clean are mounted dirs in a container, the @@ -434,7 +428,7 @@ fi # Build and (optionally) install the cuvs Python package if (( ${NUMARGS} == 0 )) || hasArg python; then - SKBUILD_CMAKE_ARGS="${SKBUILD_EXTRA_CMAKE_ARGS}" \ + SKBUILD_CMAKE_ARGS="${EXTRA_CMAKE_ARGS}" \ SKBUILD_BUILD_OPTIONS="-j${PARALLEL_LEVEL}" \ python -m pip install --no-build-isolation --no-deps --config-settings rapidsai.disable-cuda=true ${REPODIR}/python/cuvs fi diff --git a/ci/build_wheel.sh b/ci/build_wheel.sh index 4994374a8..c6f1232b3 100755 --- a/ci/build_wheel.sh +++ b/ci/build_wheel.sh @@ -1,10 +1,11 @@ #!/bin/bash -# Copyright (c) 2023-2024, NVIDIA CORPORATION. +# Copyright (c) 2023-2025, NVIDIA CORPORATION. set -euo pipefail package_name=$1 package_dir=$2 +package_type=$3 underscore_package_name=$(echo "${package_name}" | tr "-" "_") source rapids-configure-sccache @@ -16,21 +17,22 @@ rapids-generate-version > ./VERSION cd "${package_dir}" -case "${RAPIDS_CUDA_VERSION}" in - 12.*) - EXCLUDE_ARGS=( - --exclude "libcublas.so.12" - --exclude "libcublasLt.so.12" - --exclude "libcurand.so.10" - --exclude "libcusolver.so.11" - --exclude "libcusparse.so.12" - --exclude "libnvJitLink.so.12" +EXCLUDE_ARGS=( + --exclude "libraft.so" + --exclude "libcublas.so.*" + --exclude "libcublasLt.so.*" + --exclude "libcurand.so.*" + --exclude "libcusolver.so.*" + --exclude "libcusparse.so.*" + --exclude "libnvJitLink.so.*" +) + +if [[ "${package_dir}" != "python/libcuvs" ]]; then + EXCLUDE_ARGS+=( + --exclude "libcuvs_c.so" + --exclude "libcuvs.so" ) - ;; - 11.*) - EXCLUDE_ARGS=() - ;; -esac +fi rapids-logger "Building '${package_name}' wheel" @@ -48,4 +50,4 @@ sccache --show-adv-stats mkdir -p final_dist python -m auditwheel repair -w final_dist "${EXCLUDE_ARGS[@]}" dist/* -RAPIDS_PY_WHEEL_NAME="${underscore_package_name}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 python final_dist +RAPIDS_PY_WHEEL_NAME="${underscore_package_name}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 ${package_type} final_dist diff --git a/ci/build_wheel_cuvs.sh b/ci/build_wheel_cuvs.sh index 444657cc0..fb40d1459 100755 --- a/ci/build_wheel_cuvs.sh +++ b/ci/build_wheel_cuvs.sh @@ -1,21 +1,20 @@ #!/bin/bash -# Copyright (c) 2023-2024, NVIDIA CORPORATION. +# Copyright (c) 2023-2025, NVIDIA CORPORATION. set -euo pipefail package_dir="python/cuvs" -case "${RAPIDS_CUDA_VERSION}" in - 12.*) - EXTRA_CMAKE_ARGS=";-DUSE_CUDA_MATH_WHEELS=ON" - ;; - 11.*) - EXTRA_CMAKE_ARGS=";-DUSE_CUDA_MATH_WHEELS=OFF" - ;; -esac +RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" -# Set up skbuild options. Enable sccache in skbuild config options -export SKBUILD_CMAKE_ARGS="-DDETECT_CONDA_ENV=OFF;-DFIND_CUVS_CPP=OFF${EXTRA_CMAKE_ARGS}" +# Downloads libcuvs wheels from this current build, +# then ensures 'cuvs' wheel builds always use the 'libcuvs' just built in the same CI run. +# +# Using env variable PIP_CONSTRAINT is necessary to ensure the constraints +# are used when creating the isolated build environment. +RAPIDS_PY_WHEEL_NAME="libcuvs_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 cpp /tmp/libcuvs_dist +echo "libcuvs-${RAPIDS_PY_CUDA_SUFFIX} @ file://$(echo /tmp/libcuvs_dist/libcuvs_*.whl)" > /tmp/constraints.txt +export PIP_CONSTRAINT="/tmp/constraints.txt" -ci/build_wheel.sh cuvs ${package_dir} +ci/build_wheel.sh cuvs ${package_dir} python ci/validate_wheel.sh ${package_dir} final_dist diff --git a/ci/build_wheel_libcuvs.sh b/ci/build_wheel_libcuvs.sh new file mode 100755 index 000000000..148be89a2 --- /dev/null +++ b/ci/build_wheel_libcuvs.sh @@ -0,0 +1,32 @@ +#!/bin/bash +# Copyright (c) 2025, NVIDIA CORPORATION. + +set -euo pipefail + +package_name="libcuvs" +package_dir="python/libcuvs" + +rapids-logger "Generating build requirements" +matrix_selectors="cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION};cuda_suffixed=true" + +rapids-dependency-file-generator \ + --output requirements \ + --file-key "py_build_${package_name}" \ + --file-key "py_rapids_build_${package_name}" \ + --matrix "${matrix_selectors}" \ +| tee /tmp/requirements-build.txt + +rapids-logger "Installing build requirements" +python -m pip install \ + -v \ + --prefer-binary \ + -r /tmp/requirements-build.txt + +# build with '--no-build-isolation', for better sccache hit rate +# 0 really means "add --no-build-isolation" (ref: https://github.com/pypa/pip/issues/5735) +export PIP_NO_BUILD_ISOLATION=0 + +RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" + +ci/build_wheel.sh libcuvs ${package_dir} cpp +ci/validate_wheel.sh ${package_dir} final_dist libcuvs diff --git a/ci/check_style.sh b/ci/check_style.sh index c22f3f9f0..952e94bf1 100755 --- a/ci/check_style.sh +++ b/ci/check_style.sh @@ -14,5 +14,12 @@ rapids-dependency-file-generator \ rapids-mamba-retry env create --yes -f env.yaml -n checks conda activate checks +# get config for cmake-format checks +RAPIDS_VERSION_MAJOR_MINOR="$(rapids-version-major-minor)" +FORMAT_FILE_URL="https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-${RAPIDS_VERSION_MAJOR_MINOR}/cmake-format-rapids-cmake.json" +export RAPIDS_CMAKE_FORMAT_FILE=/tmp/rapids_cmake_ci/cmake-formats-rapids-cmake.json +mkdir -p $(dirname ${RAPIDS_CMAKE_FORMAT_FILE}) +wget -O ${RAPIDS_CMAKE_FORMAT_FILE} ${FORMAT_FILE_URL} + # Run pre-commit checks pre-commit run --all-files --show-diff-on-failure diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh index 4cf1f0617..7562035a9 100755 --- a/ci/release/update-version.sh +++ b/ci/release/update-version.sh @@ -44,8 +44,10 @@ echo "${NEXT_FULL_TAG}" > VERSION DEPENDENCIES=( dask-cuda cuvs - pylibraft + libcuvs + libraft librmm + pylibraft rmm rapids-dask-dependency ) diff --git a/ci/test_wheel_cuvs.sh b/ci/test_wheel_cuvs.sh index 7033003e9..862c69a3a 100755 --- a/ci/test_wheel_cuvs.sh +++ b/ci/test_wheel_cuvs.sh @@ -1,13 +1,16 @@ #!/bin/bash -# Copyright (c) 2023-2024, NVIDIA CORPORATION. +# Copyright (c) 2023-2025, NVIDIA CORPORATION. set -euo pipefail mkdir -p ./dist RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" -RAPIDS_PY_WHEEL_NAME="cuvs_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./dist +RAPIDS_PY_WHEEL_NAME="libcuvs_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 cpp ./local-libcuvs-dep +RAPIDS_PY_WHEEL_NAME="cuvs_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 python ./dist # echo to expand wildcard before adding `[extra]` requires for pip -python -m pip install $(echo ./dist/cuvs*.whl)[test] +python -m pip install \ + ./local-libcuvs-dep/libcuvs*.whl \ + "$(echo ./dist/cuvs*.whl)[test]" python -m pytest ./python/cuvs/cuvs/test diff --git a/ci/validate_wheel.sh b/ci/validate_wheel.sh index f2b235765..19d413fa2 100755 --- a/ci/validate_wheel.sh +++ b/ci/validate_wheel.sh @@ -8,24 +8,12 @@ wheel_dir_relative_path=$2 RAPIDS_CUDA_MAJOR="${RAPIDS_CUDA_VERSION%%.*}" -# some packages are much larger on CUDA 11 than on CUDA 12 -if [[ "${RAPIDS_CUDA_MAJOR}" == "11" ]]; then - PYDISTCHECK_ARGS=( - --max-allowed-size-compressed '1.4G' - ) -else - PYDISTCHECK_ARGS=( - --max-allowed-size-compressed '950M' - ) -fi - cd "${package_dir}" rapids-logger "validate packages with 'pydistcheck'" pydistcheck \ --inspect \ - "${PYDISTCHECK_ARGS[@]}" \ "$(echo ${wheel_dir_relative_path}/*.whl)" rapids-logger "validate packages with 'twine'" diff --git a/conda/environments/all_cuda-118_arch-aarch64.yaml b/conda/environments/all_cuda-118_arch-aarch64.yaml index 01853da84..123acb421 100644 --- a/conda/environments/all_cuda-118_arch-aarch64.yaml +++ b/conda/environments/all_cuda-118_arch-aarch64.yaml @@ -9,8 +9,7 @@ channels: dependencies: - breathe>=4.35.0 - c-compiler -- clang -- clang-tools=16.0.6 +- clang-tools==16.0.6 - clang==16.0.6 - cmake>=3.26.4,!=3.30.0 - cuda-nvtx=11.8 @@ -26,7 +25,7 @@ dependencies: - gcc_linux-aarch64=11.* - graphviz - ipython -- libclang +- libclang==16.0.6 - libcublas-dev=11.11.3.6 - libcublas=11.11.3.6 - libcurand-dev=10.3.0.86 @@ -55,7 +54,7 @@ dependencies: - sphinx-copybutton - sphinx-markdown-tables - sphinx>=8.0.0 -- sysroot_linux-aarch64==2.17 +- sysroot_linux-aarch64==2.28 - pip: - nvidia-sphinx-theme name: all_cuda-118_arch-aarch64 diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index a1ad68d7f..c6a65e684 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -9,8 +9,7 @@ channels: dependencies: - breathe>=4.35.0 - c-compiler -- clang -- clang-tools=16.0.6 +- clang-tools==16.0.6 - clang==16.0.6 - cmake>=3.26.4,!=3.30.0 - cuda-nvtx=11.8 @@ -26,7 +25,7 @@ dependencies: - gcc_linux-64=11.* - graphviz - ipython -- libclang +- libclang==16.0.6 - libcublas-dev=11.11.3.6 - libcublas=11.11.3.6 - libcurand-dev=10.3.0.86 @@ -55,7 +54,7 @@ dependencies: - sphinx-copybutton - sphinx-markdown-tables - sphinx>=8.0.0 -- sysroot_linux-64==2.17 +- sysroot_linux-64==2.28 - pip: - nvidia-sphinx-theme name: all_cuda-118_arch-x86_64 diff --git a/conda/environments/all_cuda-125_arch-aarch64.yaml b/conda/environments/all_cuda-125_arch-aarch64.yaml index ee0213fff..b71f5ed43 100644 --- a/conda/environments/all_cuda-125_arch-aarch64.yaml +++ b/conda/environments/all_cuda-125_arch-aarch64.yaml @@ -9,8 +9,7 @@ channels: dependencies: - breathe>=4.35.0 - c-compiler -- clang -- clang-tools=16.0.6 +- clang-tools==16.0.6 - clang==16.0.6 - cmake>=3.26.4,!=3.30.0 - cuda-cudart-dev @@ -24,10 +23,10 @@ dependencies: - cython>=3.0.0 - dlpack>=0.8,<1.0 - doxygen>=1.8.20 -- gcc_linux-aarch64=11.* +- gcc_linux-aarch64=13.* - graphviz - ipython -- libclang +- libclang==16.0.6 - libcublas-dev - libcurand-dev - libcusolver-dev @@ -51,7 +50,7 @@ dependencies: - sphinx-copybutton - sphinx-markdown-tables - sphinx>=8.0.0 -- sysroot_linux-aarch64==2.17 +- sysroot_linux-aarch64==2.28 - pip: - nvidia-sphinx-theme name: all_cuda-125_arch-aarch64 diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml index d93dcaf7a..16cd595d3 100644 --- a/conda/environments/all_cuda-125_arch-x86_64.yaml +++ b/conda/environments/all_cuda-125_arch-x86_64.yaml @@ -9,8 +9,7 @@ channels: dependencies: - breathe>=4.35.0 - c-compiler -- clang -- clang-tools=16.0.6 +- clang-tools==16.0.6 - clang==16.0.6 - cmake>=3.26.4,!=3.30.0 - cuda-cudart-dev @@ -24,10 +23,10 @@ dependencies: - cython>=3.0.0 - dlpack>=0.8,<1.0 - doxygen>=1.8.20 -- gcc_linux-64=11.* +- gcc_linux-64=13.* - graphviz - ipython -- libclang +- libclang==16.0.6 - libcublas-dev - libcurand-dev - libcusolver-dev @@ -51,7 +50,7 @@ dependencies: - sphinx-copybutton - sphinx-markdown-tables - sphinx>=8.0.0 -- sysroot_linux-64==2.17 +- sysroot_linux-64==2.28 - pip: - nvidia-sphinx-theme name: all_cuda-125_arch-x86_64 diff --git a/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml b/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml index a90dc03e7..2e2ad8446 100644 --- a/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml +++ b/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml @@ -9,7 +9,7 @@ channels: dependencies: - benchmark>=1.8.2 - c-compiler -- clang-tools=16.0.6 +- clang-tools==16.0.6 - clang==16.0.6 - click - cmake>=3.26.4,!=3.30.0 @@ -26,6 +26,7 @@ dependencies: - gcc_linux-aarch64=11.* - glog>=0.6.0 - h5py>=3.8.0 +- libclang==16.0.6 - libcublas-dev=11.11.3.6 - libcublas=11.11.3.6 - libcurand-dev=10.3.0.86 @@ -47,6 +48,6 @@ dependencies: - pyyaml - rapids-build-backend>=0.3.0,<0.4.0.dev0 - setuptools -- sysroot_linux-aarch64==2.17 +- sysroot_linux-aarch64==2.28 - wheel name: bench_ann_cuda-118_arch-aarch64 diff --git a/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml b/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml index b7344c822..90243415c 100644 --- a/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml +++ b/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml @@ -9,7 +9,7 @@ channels: dependencies: - benchmark>=1.8.2 - c-compiler -- clang-tools=16.0.6 +- clang-tools==16.0.6 - clang==16.0.6 - click - cmake>=3.26.4,!=3.30.0 @@ -26,6 +26,7 @@ dependencies: - gcc_linux-64=11.* - glog>=0.6.0 - h5py>=3.8.0 +- libclang==16.0.6 - libcublas-dev=11.11.3.6 - libcublas=11.11.3.6 - libcurand-dev=10.3.0.86 @@ -47,6 +48,6 @@ dependencies: - pyyaml - rapids-build-backend>=0.3.0,<0.4.0.dev0 - setuptools -- sysroot_linux-64==2.17 +- sysroot_linux-64==2.28 - wheel name: bench_ann_cuda-118_arch-x86_64 diff --git a/conda/environments/bench_ann_cuda-125_arch-aarch64.yaml b/conda/environments/bench_ann_cuda-125_arch-aarch64.yaml index da7229004..34e01aeea 100644 --- a/conda/environments/bench_ann_cuda-125_arch-aarch64.yaml +++ b/conda/environments/bench_ann_cuda-125_arch-aarch64.yaml @@ -9,7 +9,7 @@ channels: dependencies: - benchmark>=1.8.2 - c-compiler -- clang-tools=16.0.6 +- clang-tools==16.0.6 - clang==16.0.6 - click - cmake>=3.26.4,!=3.30.0 @@ -24,9 +24,10 @@ dependencies: - cxx-compiler - cython>=3.0.0 - dlpack>=0.8,<1.0 -- gcc_linux-aarch64=11.* +- gcc_linux-aarch64=13.* - glog>=0.6.0 - h5py>=3.8.0 +- libclang==16.0.6 - libcublas-dev - libcurand-dev - libcusolver-dev @@ -43,6 +44,6 @@ dependencies: - pyyaml - rapids-build-backend>=0.3.0,<0.4.0.dev0 - setuptools -- sysroot_linux-aarch64==2.17 +- sysroot_linux-aarch64==2.28 - wheel name: bench_ann_cuda-125_arch-aarch64 diff --git a/conda/environments/bench_ann_cuda-125_arch-x86_64.yaml b/conda/environments/bench_ann_cuda-125_arch-x86_64.yaml index 5d1dd8fc7..dcfb54a22 100644 --- a/conda/environments/bench_ann_cuda-125_arch-x86_64.yaml +++ b/conda/environments/bench_ann_cuda-125_arch-x86_64.yaml @@ -9,7 +9,7 @@ channels: dependencies: - benchmark>=1.8.2 - c-compiler -- clang-tools=16.0.6 +- clang-tools==16.0.6 - clang==16.0.6 - click - cmake>=3.26.4,!=3.30.0 @@ -24,9 +24,10 @@ dependencies: - cxx-compiler - cython>=3.0.0 - dlpack>=0.8,<1.0 -- gcc_linux-64=11.* +- gcc_linux-64=13.* - glog>=0.6.0 - h5py>=3.8.0 +- libclang==16.0.6 - libcublas-dev - libcurand-dev - libcusolver-dev @@ -43,6 +44,6 @@ dependencies: - pyyaml - rapids-build-backend>=0.3.0,<0.4.0.dev0 - setuptools -- sysroot_linux-64==2.17 +- sysroot_linux-64==2.28 - wheel name: bench_ann_cuda-125_arch-x86_64 diff --git a/conda/recipes/cuvs-bench-cpu/conda_build_config.yaml b/conda/recipes/cuvs-bench-cpu/conda_build_config.yaml index ed6f708e1..5407d7c17 100644 --- a/conda/recipes/cuvs-bench-cpu/conda_build_config.yaml +++ b/conda/recipes/cuvs-bench-cpu/conda_build_config.yaml @@ -1,14 +1,16 @@ c_compiler_version: - - 11 + - 13 # [not os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")] + - 11 # [os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")] cxx_compiler_version: - - 11 + - 13 # [not os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")] + - 11 # [os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")] c_stdlib: - sysroot c_stdlib_version: - - "2.17" + - "2.28" cmake_version: - ">=3.26.4,!=3.30.0" diff --git a/conda/recipes/cuvs-bench/conda_build_config.yaml b/conda/recipes/cuvs-bench/conda_build_config.yaml index 47bd730da..ccd7341d1 100644 --- a/conda/recipes/cuvs-bench/conda_build_config.yaml +++ b/conda/recipes/cuvs-bench/conda_build_config.yaml @@ -1,20 +1,20 @@ c_compiler_version: - - 11 + - 13 # [not os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")] + - 11 # [os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")] cxx_compiler_version: - - 11 + - 13 # [not os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")] + - 11 # [os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")] cuda_compiler: - - cuda-nvcc - -cuda11_compiler: - - nvcc + - cuda-nvcc # [not os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")] + - nvcc # [os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")] c_stdlib: - sysroot c_stdlib_version: - - "2.17" + - "2.28" cmake_version: - ">=3.26.4,!=3.30.0" diff --git a/conda/recipes/cuvs-bench/meta.yaml b/conda/recipes/cuvs-bench/meta.yaml index d77aee8ce..33b1745ec 100644 --- a/conda/recipes/cuvs-bench/meta.yaml +++ b/conda/recipes/cuvs-bench/meta.yaml @@ -37,10 +37,8 @@ build: number: {{ GIT_DESCRIBE_NUMBER }} string: cuda{{ cuda_major }}_py{{ py_version }}_{{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }} ignore_run_exports_from: - {% if cuda_major == "11" %} - - {{ compiler('cuda11') }} - {% else %} - {{ compiler('cuda') }} + {% if cuda_major != "11" %} - cuda-cudart-dev - libcublas-dev {% endif %} @@ -50,7 +48,7 @@ requirements: - {{ compiler('c') }} - {{ compiler('cxx') }} {% if cuda_major == "11" %} - - {{ compiler('cuda11') }} ={{ cuda_version }} + - {{ compiler('cuda') }} ={{ cuda_version }} {% else %} - {{ compiler('cuda') }} {% endif %} diff --git a/conda/recipes/cuvs/conda_build_config.yaml b/conda/recipes/cuvs/conda_build_config.yaml index 001878ff2..83f5ebcb1 100644 --- a/conda/recipes/cuvs/conda_build_config.yaml +++ b/conda/recipes/cuvs/conda_build_config.yaml @@ -1,20 +1,20 @@ c_compiler_version: - - 11 + - 13 # [not os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")] + - 11 # [os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")] cxx_compiler_version: - - 11 + - 13 # [not os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")] + - 11 # [os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")] cuda_compiler: - - cuda-nvcc - -cuda11_compiler: - - nvcc + - cuda-nvcc # [not os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")] + - nvcc # [os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")] c_stdlib: - sysroot c_stdlib_version: - - "2.17" + - "2.28" cmake_version: - ">=3.26.4,!=3.30.0" diff --git a/conda/recipes/cuvs/meta.yaml b/conda/recipes/cuvs/meta.yaml index ad7ffe756..25fc204a8 100644 --- a/conda/recipes/cuvs/meta.yaml +++ b/conda/recipes/cuvs/meta.yaml @@ -20,10 +20,8 @@ build: number: {{ GIT_DESCRIBE_NUMBER }} string: cuda{{ cuda_major }}_py{{ py_version }}_{{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }} ignore_run_exports_from: - {% if cuda_major == "11" %} - - {{ compiler('cuda11') }} - {% else %} - {{ compiler('cuda') }} + {% if cuda_major != "11" %} - cuda-cudart-dev {% endif %} - cuda-python @@ -33,7 +31,7 @@ requirements: - {{ compiler('c') }} - {{ compiler('cxx') }} {% if cuda_major == "11" %} - - {{ compiler('cuda11') }} ={{ cuda_version }} + - {{ compiler('cuda') }} ={{ cuda_version }} {% else %} - {{ compiler('cuda') }} {% endif %} diff --git a/conda/recipes/libcuvs/conda_build_config.yaml b/conda/recipes/libcuvs/conda_build_config.yaml index b8c49943e..72cc4415d 100644 --- a/conda/recipes/libcuvs/conda_build_config.yaml +++ b/conda/recipes/libcuvs/conda_build_config.yaml @@ -1,20 +1,20 @@ c_compiler_version: - - 11 + - 13 # [not os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")] + - 11 # [os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")] cxx_compiler_version: - - 11 + - 13 # [not os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")] + - 11 # [os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")] cuda_compiler: - - cuda-nvcc - -cuda11_compiler: - - nvcc + - cuda-nvcc # [not os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")] + - nvcc # [os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")] c_stdlib: - sysroot c_stdlib_version: - - "2.17" + - "2.28" cmake_version: - ">=3.26.4,!=3.30.0" diff --git a/conda/recipes/libcuvs/meta.yaml b/conda/recipes/libcuvs/meta.yaml index 46552c397..fd466cd22 100644 --- a/conda/recipes/libcuvs/meta.yaml +++ b/conda/recipes/libcuvs/meta.yaml @@ -39,10 +39,8 @@ outputs: number: {{ GIT_DESCRIBE_NUMBER }} string: cuda{{ cuda_major }}_{{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }} ignore_run_exports_from: - {% if cuda_major == "11" %} - - {{ compiler('cuda11') }} - {% else %} - {{ compiler('cuda') }} + {% if cuda_major != "11" %} - cuda-cudart-dev - libcublas-dev - libcurand-dev @@ -54,7 +52,7 @@ outputs: - {{ compiler('c') }} - {{ compiler('cxx') }} {% if cuda_major == "11" %} - - {{ compiler('cuda11') }} ={{ cuda_version }} + - {{ compiler('cuda') }} ={{ cuda_version }} {% else %} - {{ compiler('cuda') }} {% endif %} @@ -106,10 +104,8 @@ outputs: number: {{ GIT_DESCRIBE_NUMBER }} string: cuda{{ cuda_major }}_{{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }} ignore_run_exports_from: - {% if cuda_major == "11" %} - - {{ compiler('cuda11') }} - {% else %} - {{ compiler('cuda') }} + {% if cuda_major != "11" %} - cuda-cudart-dev - libcublas-dev - libcurand-dev @@ -121,7 +117,7 @@ outputs: - {{ compiler('c') }} - {{ compiler('cxx') }} {% if cuda_major == "11" %} - - {{ compiler('cuda11') }} ={{ cuda_version }} + - {{ compiler('cuda') }} ={{ cuda_version }} {% else %} - {{ compiler('cuda') }} {% endif %} @@ -174,10 +170,8 @@ outputs: number: {{ GIT_DESCRIBE_NUMBER }} string: cuda{{ cuda_major }}_{{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }} ignore_run_exports_from: - {% if cuda_major == "11" %} - - {{ compiler('cuda11') }} - {% else %} - {{ compiler('cuda') }} + {% if cuda_major != "11" %} - cuda-cudart-dev - libcublas-dev - libcurand-dev @@ -189,7 +183,7 @@ outputs: - {{ compiler('c') }} - {{ compiler('cxx') }} {% if cuda_major == "11" %} - - {{ compiler('cuda11') }} ={{ cuda_version }} + - {{ compiler('cuda') }} ={{ cuda_version }} {% else %} - {{ compiler('cuda') }} {% endif %} @@ -246,10 +240,8 @@ outputs: number: {{ GIT_DESCRIBE_NUMBER }} string: cuda{{ cuda_major }}_{{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }} ignore_run_exports_from: - {% if cuda_major == "11" %} - - {{ compiler('cuda11') }} - {% else %} - {{ compiler('cuda') }} + {% if cuda_major != "11" %} - cuda-cudart-dev - libcublas-dev - libcurand-dev @@ -261,7 +253,7 @@ outputs: - {{ compiler('c') }} - {{ compiler('cxx') }} {% if cuda_major == "11" %} - - {{ compiler('cuda11') }} ={{ cuda_version }} + - {{ compiler('cuda') }} ={{ cuda_version }} {% else %} - {{ compiler('cuda') }} {% endif %} diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 26c0b82d3..11f21db44 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -68,6 +68,7 @@ option(CUDA_LOG_COMPILE_TIME "Write a log of compilation times to nvcc_compile_l option(DETECT_CONDA_ENV "Enable detection of conda environment for dependencies" ON) option(DISABLE_DEPRECATION_WARNINGS "Disable deprecaction warnings " ON) option(DISABLE_OPENMP "Disable OpenMP" OFF) +option(CUVS_COMPILE_DYNAMIC_ONLY "Only build the shared library and skip the static library." OFF) option(CUVS_NVTX "Enable nvtx markers" OFF) option(CUVS_RAFT_CLONE_ON_PIN "Explicitly clone RAFT branch when pinned to non-feature branch" ON) @@ -94,6 +95,7 @@ include(CMakeDependentOption) message(VERBOSE "cuVS: Build cuVS unit-tests: ${BUILD_TESTS}") message(VERBOSE "cuVS: Build CPU only components: ${BUILD_CPU_ONLY}") message(VERBOSE "cuVS: Build ANN benchmarks: ${BUILD_CUVS_BENCH}") +message(VERBOSE "cuVS: Build only the shared library: ${CUVS_COMPILE_DYNAMIC_ONLY}") message(VERBOSE "cuVS: Enable detection of conda environment for dependencies: ${DETECT_CONDA_ENV}") message(VERBOSE "cuVS: Disable depreaction warnings " ${DISABLE_DEPRECATION_WARNINGS}) message(VERBOSE "cuVS: Disable OpenMP: ${DISABLE_OPENMP}") @@ -493,7 +495,10 @@ if(BUILD_SHARED_LIBS) ) add_library(cuvs SHARED $,EXCLUDE,rmm.*logger>) - add_library(cuvs_static STATIC $,EXCLUDE,rmm.*logger>) + + if(NOT CUVS_COMPILE_DYNAMIC_ONLY) + add_library(cuvs_static STATIC $,EXCLUDE,rmm.*logger>) + endif() target_compile_options( cuvs INTERFACE $<$:--expt-extended-lambda @@ -501,20 +506,23 @@ if(BUILD_SHARED_LIBS) ) add_library(cuvs::cuvs ALIAS cuvs) - add_library(cuvs::cuvs_static ALIAS cuvs_static) - set_target_properties( - cuvs_static - PROPERTIES BUILD_RPATH "\$ORIGIN" - INSTALL_RPATH "\$ORIGIN" - CXX_STANDARD 17 - CXX_STANDARD_REQUIRED ON - POSITION_INDEPENDENT_CODE ON - INTERFACE_POSITION_INDEPENDENT_CODE ON - EXPORT_NAME cuvs_static - ) + if(NOT CUVS_COMPILE_DYNAMIC_ONLY) + add_library(cuvs::cuvs_static ALIAS cuvs_static) - target_compile_options(cuvs_static PRIVATE "$<$:${CUVS_CXX_FLAGS}>") + set_target_properties( + cuvs_static + PROPERTIES BUILD_RPATH "\$ORIGIN" + INSTALL_RPATH "\$ORIGIN" + CXX_STANDARD 17 + CXX_STANDARD_REQUIRED ON + POSITION_INDEPENDENT_CODE ON + INTERFACE_POSITION_INDEPENDENT_CODE ON + EXPORT_NAME cuvs_static + ) + + target_compile_options(cuvs_static PRIVATE "$<$:${CUVS_CXX_FLAGS}>") + endif() target_include_directories( cuvs_objs @@ -523,19 +531,21 @@ if(BUILD_SHARED_LIBS) INTERFACE "$" ) - target_include_directories( - cuvs_static - PUBLIC "$" - INTERFACE "$" - ) + if(NOT CUVS_COMPILE_DYNAMIC_ONLY) + target_include_directories( + cuvs_static + PUBLIC "$" + INTERFACE "$" + ) - # ensure CUDA symbols aren't relocated to the middle of the debug build binaries - target_link_options(cuvs_static PRIVATE $) + # ensure CUDA symbols aren't relocated to the middle of the debug build binaries + target_link_options(cuvs_static PRIVATE $) - target_include_directories( - cuvs_static PUBLIC "$" - "$" - ) + target_include_directories( + cuvs_static PUBLIC "$" + "$" + ) + endif() target_include_directories( cuvs PUBLIC "$" @@ -571,11 +581,13 @@ if(BUILD_SHARED_LIBS) cuvs-cagra-search ${CUVS_COMMS_DEPENDENCY} ) - target_link_libraries( - cuvs_static - PUBLIC rmm::rmm raft::raft ${CUVS_CTK_MATH_DEPENDENCIES} - PRIVATE nvidia::cutlass::cutlass $ - ) + if(NOT CUVS_COMPILE_DYNAMIC_ONLY) + target_link_libraries( + cuvs_static + PUBLIC rmm::rmm raft::raft ${CUVS_CTK_MATH_DEPENDENCIES} + PRIVATE nvidia::cutlass::cutlass $ + ) + endif() endif() if(BUILD_MG_ALGOS) @@ -718,8 +730,13 @@ target_compile_definitions(cuvs::cuvs INTERFACE $<$:NVTX_ENAB include(GNUInstallDirs) include(CPack) + set(_cuvs_lib_targets cuvs) + if(NOT CUVS_COMPILE_DYNAMIC_ONLY) + list(APPEND _cuvs_lib_targets cuvs_static) + endif() + install( - TARGETS cuvs cuvs_static + TARGETS ${_cuvs_lib_targets} DESTINATION ${lib_dir} COMPONENT cuvs EXPORT cuvs-exports diff --git a/cpp/cmake/thirdparty/get_raft.cmake b/cpp/cmake/thirdparty/get_raft.cmake index 2e57df84e..845c7a833 100644 --- a/cpp/cmake/thirdparty/get_raft.cmake +++ b/cpp/cmake/thirdparty/get_raft.cmake @@ -44,6 +44,7 @@ function(find_and_configure_raft) INSTALL_EXPORT_SET cuvs-exports COMPONENTS ${RAFT_COMPONENTS} CPM_ARGS + EXCLUDE_FROM_ALL TRUE GIT_REPOSITORY https://github.com/${PKG_FORK}/raft.git GIT_TAG ${PKG_PINNED_TAG} SOURCE_SUBDIR cpp diff --git a/cpp/src/distance/detail/distance_ops/l2_exp.cuh b/cpp/src/distance/detail/distance_ops/l2_exp.cuh index 04817aa8b..f49771605 100644 --- a/cpp/src/distance/detail/distance_ops/l2_exp.cuh +++ b/cpp/src/distance/detail/distance_ops/l2_exp.cuh @@ -28,14 +28,14 @@ namespace cuvs::distance::detail::ops { * for round-off error tolerance. * @tparam DataT */ -template -__device__ constexpr DataT get_clamp_precision() +template +__device__ constexpr AccT get_clamp_precision() { switch (sizeof(DataT)) { - case 2: return 1e-3; - case 4: return 1e-6; - case 8: return 1e-15; - default: return 0; + case 2: return AccT{1e-3}; + case 4: return AccT{1e-6}; + case 8: return AccT{1e-15}; + default: return AccT{0}; } } @@ -46,19 +46,27 @@ struct l2_exp_cutlass_op { __device__ l2_exp_cutlass_op() noexcept : sqrt(false) {} __device__ l2_exp_cutlass_op(bool isSqrt) noexcept : sqrt(isSqrt) {} - inline __device__ AccT operator()(DataT aNorm, DataT bNorm, DataT accVal) const noexcept + inline __device__ AccT operator()(AccT aNorm, AccT bNorm, AccT accVal) const noexcept { - AccT outVal = aNorm + bNorm - DataT(2.0) * accVal; + AccT outVal = aNorm + bNorm - AccT(2.0) * accVal; /** * Self-neighboring points should have (aNorm == bNorm) == accVal and the dot product (accVal) * can sometimes have round-off errors, which will cause (aNorm == bNorm) ~ accVal instead. */ - outVal = outVal * AccT(!((outVal * outVal < get_clamp_precision()) * (aNorm == bNorm))); + outVal = + outVal * AccT(!((outVal * outVal < get_clamp_precision()) * (aNorm == bNorm))); return sqrt ? raft::sqrt(outVal * static_cast(outVal > AccT(0))) : outVal; } - __device__ AccT operator()(DataT aData) const noexcept { return aData; } + __device__ AccT operator()(DataT aData) const noexcept + { + if constexpr (std::is_same_v && std::is_same_v) { + return __half2float(aData); + } else { + return aData; + } + } }; /** @@ -121,9 +129,9 @@ struct l2_exp_distance_op { * (accVal) can sometimes have round-off errors, which will cause (aNorm == bNorm) ~ accVal * instead. */ - acc[i][j] = - val * static_cast((val > AccT(0))) * - static_cast(!((val * val < get_clamp_precision()) * (regxn[i] == regyn[j]))); + acc[i][j] = val * static_cast((val > AccT(0))) * + static_cast( + !((val * val < get_clamp_precision()) * (regxn[i] == regyn[j]))); } } if (sqrt) { diff --git a/cpp/src/neighbors/detail/cagra/add_nodes.cuh b/cpp/src/neighbors/detail/cagra/add_nodes.cuh index 63f5c51a6..913094e2a 100644 --- a/cpp/src/neighbors/detail/cagra/add_nodes.cuh +++ b/cpp/src/neighbors/detail/cagra/add_nodes.cuh @@ -37,7 +37,8 @@ void add_node_core( const cuvs::neighbors::cagra::index& idx, raft::mdspan, raft::layout_stride, Accessor> additional_dataset_view, - raft::host_matrix_view updated_graph) + raft::host_matrix_view updated_graph, + const cuvs::neighbors::cagra::extend_params& extend_params) { using DistanceT = float; const std::size_t degree = idx.graph_degree(); @@ -68,7 +69,19 @@ void add_node_core( new_size, raft::resource::get_cuda_stream(handle)); - const std::size_t max_chunk_size = 1024; + std::size_t data_size_per_vector = + sizeof(IdxT) * base_degree + sizeof(DistanceT) * base_degree + sizeof(T) * dim; + cudaPointerAttributes attr; + RAFT_CUDA_TRY(cudaPointerGetAttributes(&attr, additional_dataset_view.data_handle())); + if (attr.devicePointer == nullptr) { + // for batch_load_iterator + data_size_per_vector += sizeof(T) * dim; + } + + const std::size_t max_search_batch_size = + std::min(std::max(1lu, raft::resource::get_workspace_free_bytes(handle) / data_size_per_vector), + num_add); + RAFT_EXPECTS(max_search_batch_size > 0, "No enough working memory space is left."); cuvs::neighbors::cagra::search_params params; params.itopk_size = std::max(base_degree * 2lu, 256lu); @@ -77,24 +90,24 @@ void add_node_core( auto mr = raft::resource::get_workspace_resource(handle); auto neighbor_indices = raft::make_device_mdarray( - handle, mr, raft::make_extents(max_chunk_size, base_degree)); + handle, mr, raft::make_extents(max_search_batch_size, base_degree)); auto neighbor_distances = raft::make_device_mdarray( - handle, mr, raft::make_extents(max_chunk_size, base_degree)); + handle, mr, raft::make_extents(max_search_batch_size, base_degree)); auto queries = raft::make_device_mdarray( - handle, mr, raft::make_extents(max_chunk_size, dim)); + handle, mr, raft::make_extents(max_search_batch_size, dim)); auto host_neighbor_indices = - raft::make_host_matrix(max_chunk_size, base_degree); + raft::make_host_matrix(max_search_batch_size, base_degree); cuvs::spatial::knn::detail::utils::batch_load_iterator additional_dataset_batch( additional_dataset_view.data_handle(), num_add, additional_dataset_view.stride(0), - max_chunk_size, + max_search_batch_size, raft::resource::get_cuda_stream(handle), - raft::resource::get_workspace_resource(handle)); + mr); for (const auto& batch : additional_dataset_batch) { // Step 1: Obtain K (=base_degree) nearest neighbors of the new vectors by CAGRA search // Create queries @@ -298,7 +311,8 @@ void add_graph_nodes( const std::size_t degree = index.graph_degree(); const std::size_t dim = index.dim(); const std::size_t stride = input_updated_dataset_view.stride(0); - const std::size_t max_chunk_size_ = params.max_chunk_size == 0 ? 1 : params.max_chunk_size; + const std::size_t max_chunk_size_ = + params.max_chunk_size == 0 ? new_dataset_size : params.max_chunk_size; raft::copy(updated_graph_view.data_handle(), index.graph().data_handle(), @@ -342,7 +356,7 @@ void add_graph_nodes( stride); neighbors::cagra::add_node_core( - handle, internal_index, additional_dataset_view, updated_graph); + handle, internal_index, additional_dataset_view, updated_graph, params); raft::resource::sync_stream(handle); } } diff --git a/cpp/test/distance/masked_nn.cu b/cpp/test/distance/masked_nn.cu index a8f2f5163..a1c784669 100644 --- a/cpp/test/distance/masked_nn.cu +++ b/cpp/test/distance/masked_nn.cu @@ -314,8 +314,8 @@ template cudaStream_t stream = 0) { typedef typename raft::KeyValuePair KVP; - std::shared_ptr exp_h(new KVP[size]); - std::shared_ptr act_h(new KVP[size]); + std::shared_ptr exp_h(new KVP[size]); + std::shared_ptr act_h(new KVP[size]); raft::update_host(exp_h.get(), expected, size, stream); raft::update_host(act_h.get(), actual, size, stream); RAFT_CUDA_TRY(cudaStreamSynchronize(stream)); diff --git a/dependencies.yaml b/dependencies.yaml index fbd1d8372..478b2acc2 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -7,15 +7,16 @@ files: arch: [x86_64, aarch64] includes: - build + - build_cython - build_py_cuvs - build_wheels - checks + - clang - cuda - cuda_version - depends_on_cupy - depends_on_librmm - depends_on_pylibraft - - develop - docs - rapids_build - run_py_cuvs @@ -31,13 +32,15 @@ files: includes: - bench - bench_python + - build_cython - build_py_cuvs + - clang - cuda - cuda_version - depends_on_cupy - depends_on_pylibraft + - depends_on_libcuvs - depends_on_librmm - - develop - rapids_build - rapids_build_setuptools test_cpp: @@ -61,6 +64,7 @@ files: docs: output: none includes: + - clang - cuda - cuda_version - depends_on_cupy @@ -71,10 +75,37 @@ files: rust: output: none includes: + # clang/libclang only needed for bindgen support + - clang - cuda - cuda_version - rapids_build - rust + py_build_libcuvs: + output: pyproject + pyproject_dir: python/libcuvs + extras: + table: build-system + includes: + - build + py_rapids_build_libcuvs: + output: pyproject + pyproject_dir: python/libcuvs + extras: + table: tool.rapids-build-backend + key: requires + includes: + - depends_on_libraft + - depends_on_librmm + - rapids_build + py_run_libcuvs: + output: pyproject + pyproject_dir: python/libcuvs + extras: + table: project + includes: + - cuda_wheels + - depends_on_libraft py_build_cuvs: output: pyproject pyproject_dir: python/cuvs @@ -89,7 +120,11 @@ files: table: tool.rapids-build-backend key: requires includes: + - build_cython - build_py_cuvs + - depends_on_libcuvs + - depends_on_libraft + - depends_on_librmm - rapids_build py_run_cuvs: output: pyproject @@ -97,7 +132,6 @@ files: extras: table: project includes: - - cuda_wheels - depends_on_pylibraft - run_py_cuvs py_test_cuvs: @@ -149,12 +183,16 @@ dependencies: - output_types: [requirements, pyproject] packages: - scikit-build-core[pyproject]>=0.10.0 + build_cython: + common: + - output_types: [conda, requirements, pyproject] + packages: + - cython>=3.0.0 rapids_build: common: - output_types: [conda, requirements, pyproject] packages: - &cmake_ver cmake>=3.26.4,!=3.30.0 - - cython>=3.0.0 - ninja - output_types: [conda] packages: @@ -166,14 +204,28 @@ dependencies: matrices: - matrix: arch: x86_64 + cuda: "11.*" packages: - gcc_linux-64=11.* - - sysroot_linux-64==2.17 + - sysroot_linux-64==2.28 - matrix: arch: aarch64 + cuda: "11.*" packages: - gcc_linux-aarch64=11.* - - sysroot_linux-aarch64==2.17 + - sysroot_linux-aarch64==2.28 + - matrix: + arch: x86_64 + cuda: "12.*" + packages: + - gcc_linux-64=13.* + - sysroot_linux-64==2.28 + - matrix: + arch: aarch64 + cuda: "12.*" + packages: + - gcc_linux-aarch64=13.* + - sysroot_linux-aarch64==2.28 - output_types: conda matrices: - matrix: {cuda: "12.*"} @@ -227,12 +279,13 @@ dependencies: - output_types: [conda, requirements] packages: - pre-commit - develop: + clang: common: - output_types: conda packages: - clang==16.0.6 - - clang-tools=16.0.6 + - clang-tools==16.0.6 + - libclang==16.0.6 cuda_version: specific: - output_types: conda @@ -349,13 +402,14 @@ dependencies: - nvidia-curand-cu12 - nvidia-cusolver-cu12 - nvidia-cusparse-cu12 - # CUDA 11 does not provide wheels, so use the system libraries instead - matrix: cuda: "11.*" use_cuda_wheels: "true" packages: - # if use_cuda_wheels=false is provided, do not add dependencies on any CUDA wheels - # (e.g. for DLFW and pip devcontainers) + - nvidia-cublas-cu11 + - nvidia-curand-cu11 + - nvidia-cusolver-cu11 + - nvidia-cusparse-cu11 - matrix: use_cuda_wheels: "false" packages: @@ -411,9 +465,6 @@ dependencies: packages: - make - rust - # clang/libclang only needed for bindgen support - - clang - - libclang build_wheels: common: - output_types: [requirements, pyproject] @@ -478,7 +529,6 @@ dependencies: - h5py>=3.8.0 - benchmark>=1.8.2 - openblas - - libcuvs==25.2.*,>=0.0.0a0 bench_python: common: - output_types: [conda, pyproject, requirements] @@ -488,6 +538,54 @@ dependencies: - matplotlib - pandas - pyyaml + depends_on_libcuvs: + common: + - output_types: conda + packages: + - &libcuvs_unsuffixed libcuvs==25.2.*,>=0.0.0a0 + - output_types: requirements + packages: + # pip recognizes the index as a global option for the requirements.txt file + - --extra-index-url=https://pypi.nvidia.com + - --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple + specific: + - output_types: [requirements, pyproject] + matrices: + - matrix: + cuda: "12.*" + cuda_suffixed: "true" + packages: + - libcuvs-cu12==25.2.*,>=0.0.0a0 + - matrix: + cuda: "11.*" + cuda_suffixed: "true" + packages: + - libcuvs-cu11==25.2.*,>=0.0.0a0 + - {matrix: null, packages: [*libcuvs_unsuffixed]} + depends_on_libraft: + common: + - output_types: conda + packages: + - &libraft_unsuffixed libraft==25.2.*,>=0.0.0a0 + - output_types: requirements + packages: + # pip recognizes the index as a global option for the requirements.txt file + - --extra-index-url=https://pypi.nvidia.com + - --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple + specific: + - output_types: [requirements, pyproject] + matrices: + - matrix: + cuda: "12.*" + cuda_suffixed: "true" + packages: + - libraft-cu12==25.2.*,>=0.0.0a0 + - matrix: + cuda: "11.*" + cuda_suffixed: "true" + packages: + - libraft-cu11==25.2.*,>=0.0.0a0 + - {matrix: null, packages: [*libraft_unsuffixed]} depends_on_librmm: common: - output_types: conda diff --git a/docs/source/cuvs_bench/index.rst b/docs/source/cuvs_bench/index.rst index 820c44c4f..c15aa41c1 100644 --- a/docs/source/cuvs_bench/index.rst +++ b/docs/source/cuvs_bench/index.rst @@ -24,16 +24,6 @@ This tool offers several benefits, including * `Docker`_ -- `How benchmarks are run`_ - - * `Step 1: Prepare the dataset`_ - - * `Step 2: Build and search index`_ - - * `Step 3: Data export`_ - - * `Step 4: Plot the results`_ - - `Running the benchmarks`_ * `End-to-end: smaller-scale benchmarks (<1M to 10M)`_ @@ -75,7 +65,7 @@ Conda conda activate cuvs_benchmarks # to install GPU package: - conda install -c rapidsai -c conda-forge -c nvidia cuvs-ann-bench= cuda-version=11.8* + conda install -c rapidsai -c conda-forge -c nvidia cuvs-bench= cuda-version=11.8* # to install CPU package for usage in CPU-only systems: conda install -c rapidsai -c conda-forge cuvs-bench-cpu @@ -99,7 +89,7 @@ The following command pulls the nightly container for Python version 3.10, CUDA .. code-block:: bash - docker pull rapidsai/cuvs-bench:24.12a-cuda12.5-py3.10 #substitute cuvs-bench for the exact desired container. + docker pull rapidsai/cuvs-bench:24.12a-cuda12.5-py3.10 # substitute cuvs-bench for the exact desired container. The CUDA and python versions can be changed for the supported values: - Supported CUDA versions: 11.8 and 12.5 @@ -112,185 +102,6 @@ You can see the exact versions as well in the dockerhub site: **Note:** GPU containers use the CUDA toolkit from inside the container, the only requirement is a driver installed on the host machine that supports that version. So, for example, CUDA 11.8 containers can run in systems with a CUDA 12.x capable driver. Please also note that the Nvidia-Docker runtime from the `Nvidia Container Toolkit `_ is required to use GPUs inside docker containers. -How benchmarks are run -====================== - -The `cuvs-bench` package contains lightweight Python scripts to run the benchmarks. There are 4 general steps to running the benchmarks and visualizing the results. - -#. Prepare Dataset - -#. Build Index and Search Index - -#. Data Export - -#. Plot Results - -Step 1: Prepare the dataset ---------------------------- - -The script `cuvs_bench.get_dataset` will download and unpack the dataset in directory that the user provides. As of now, only million-scale datasets are supported by this script. For more information on :doc:`datasets and formats `. - -The usage of this script is: - -.. code-block:: bash - - usage: get_dataset.py [-h] [--name NAME] [--dataset-path DATASET_PATH] [--normalize] - - options: - -h, --help show this help message and exit - --dataset DATASET dataset to download (default: glove-100-angular) - --dataset-path DATASET_PATH - path to download dataset (default: ${RAPIDS_DATASET_ROOT_DIR}) - --normalize normalize cosine distance to inner product (default: False) - -When option `normalize` is provided to the script, any dataset that has cosine distances -will be normalized to inner product. So, for example, the dataset `glove-100-angular` -will be written at location `datasets/glove-100-inner/`. - -Step 2: Build and search index ------------------------------- - -The script `cuvs_bench.run` will build and search indices for a given dataset and its -specified configuration. - -The usage of the script `cuvs_bench.run` is: - -.. code-block:: bash - - usage: __main__.py [-h] [--subset-size SUBSET_SIZE] [-k COUNT] [-bs BATCH_SIZE] [--dataset-configuration DATASET_CONFIGURATION] [--configuration CONFIGURATION] [--dataset DATASET] - [--dataset-path DATASET_PATH] [--build] [--search] [--algorithms ALGORITHMS] [--groups GROUPS] [--algo-groups ALGO_GROUPS] [-f] [-m SEARCH_MODE] - - options: - -h, --help show this help message and exit - --subset-size SUBSET_SIZE - the number of subset rows of the dataset to build the index (default: None) - -k COUNT, --count COUNT - the number of nearest neighbors to search for (default: 10) - -bs BATCH_SIZE, --batch-size BATCH_SIZE - number of query vectors to use in each query trial (default: 10000) - --dataset-configuration DATASET_CONFIGURATION - path to YAML configuration file for datasets (default: None) - --configuration CONFIGURATION - path to YAML configuration file or directory for algorithms Any run groups found in the specified file/directory will automatically override groups of the same name - present in the default configurations, including `base` (default: None) - --dataset DATASET name of dataset (default: glove-100-inner) - --dataset-path DATASET_PATH - path to dataset folder, by default will look in RAPIDS_DATASET_ROOT_DIR if defined, otherwise a datasets subdirectory from the calling directory (default: - os.getcwd()/datasets/) - --build - --search - --algorithms ALGORITHMS - run only comma separated list of named algorithms. If parameters `groups` and `algo-groups` are both undefined, then group `base` is run by default (default: None) - --groups GROUPS run only comma separated groups of parameters (default: base) - --algo-groups ALGO_GROUPS - add comma separated . to run. Example usage: "--algo-groups=cuvs_cagra.large,hnswlib.large" (default: None) - -f, --force re-run algorithms even if their results already exist (default: False) - -m SEARCH_MODE, --search-mode SEARCH_MODE - run search in 'latency' (measure individual batches) or 'throughput' (pipeline batches and measure end-to-end) mode (default: throughput) - -t SEARCH_THREADS, --search-threads SEARCH_THREADS - specify the number threads to use for throughput benchmark. Single value or a pair of min and max separated by ':'. Example --search-threads=1:4. Power of 2 values between 'min' and 'max' will be used. If only 'min' is - specified, then a single test is run with 'min' threads. By default min=1, max=. (default: None) - -r, --dry-run dry-run mode will convert the yaml config for the specified algorithms and datasets to the json format that's consumed by the lower-level c++ binaries and then print the command to run execute the benchmarks but - will not actually execute the command. (default: False) - -`dataset`: name of the dataset to be searched in `datasets.yaml`_ - -`dataset-configuration`: optional filepath to custom dataset YAML config which has an entry for arg `dataset` - -`configuration`: optional filepath to YAML configuration for an algorithm or to directory that contains YAML configurations for several algorithms. Refer to `Dataset.yaml config`_ for more info. - -`algorithms`: runs all algorithms that it can find in YAML configs found by `configuration`. By default, only `base` group will be run. - -`groups`: run only specific groups of parameters configurations for an algorithm. Groups are defined in YAML configs (see `configuration`), and by default run `base` group - -`algo-groups`: this parameter is helpful to append any specific algorithm+group combination to run the benchmark for in addition to all the arguments from `algorithms` and `groups`. It is of the format `.`, or for example, `cuvs_cagra.large` - -For every algorithm run by this script, it outputs an index build statistics JSON file in `/result/build/<{algo},{group}.json>` -and an index search statistics JSON file in `/result/search/<{algo},{group},k{k},bs{batch_size}.json>`. NOTE: The filenames will not have ",{group}" if `group = "base"`. - -For every algorithm run by this script, it outputs an index build statistics JSON file in `/result/build/<{algo},{group}.json>` -and an index search statistics JSON file in `/result/search/<{algo},{group},k{k},bs{batch_size}.json>`. NOTE: The filenames will not have ",{group}" if `group = "base"`. - -`dataset-path` : -#. data is read from `/` -#. indices are built in `//index` -#. build/search results are stored in `//result` - -`build` and `search` : if both parameters are not supplied to the script then it is assumed both are `True`. - -`indices` and `algorithms` : these parameters ensure that the algorithm specified for an index is available in `algos.yaml` and not disabled, as well as having an associated executable. - -Step 3: Data export -------------------- - -The script `cuvs_bench.data_export` will convert the intermediate JSON outputs produced by `cuvs_bench.run` to more easily readable CSV files, which are needed to build charts made by `cuvs_bench.plot`. - -.. code-block:: bash - - usage: data_export.py [-h] [--dataset DATASET] [--dataset-path DATASET_PATH] - - options: - -h, --help show this help message and exit - --dataset DATASET dataset to download (default: glove-100-inner) - --dataset-path DATASET_PATH - path to dataset folder (default: ${RAPIDS_DATASET_ROOT_DIR}) - -Build statistics CSV file is stored in `/result/build/<{algo},{group}.csv>` -and index search statistics CSV file in `/result/search/<{algo},{group},k{k},bs{batch_size},{suffix}.csv>`, where suffix has three values: -#. `raw`: All search results are exported -#. `throughput`: Pareto frontier of throughput results is exported -#. `latency`: Pareto frontier of latency results is exported - -Step 4: Plot the results ------------------------- - -The script `cuvs_bench.plot` will plot results for all algorithms found in index search statistics CSV files `/result/search/*.csv`. - -The usage of this script is: - -.. code-block:: bash - - usage: [-h] [--dataset DATASET] [--dataset-path DATASET_PATH] [--output-filepath OUTPUT_FILEPATH] [--algorithms ALGORITHMS] [--groups GROUPS] [--algo-groups ALGO_GROUPS] - [-k COUNT] [-bs BATCH_SIZE] [--build] [--search] [--x-scale X_SCALE] [--y-scale {linear,log,symlog,logit}] [--x-start X_START] [--mode {throughput,latency}] - [--time-unit {s,ms,us}] [--raw] - - options: - -h, --help show this help message and exit - --dataset DATASET dataset to plot (default: glove-100-inner) - --dataset-path DATASET_PATH - path to dataset folder (default: /home/coder/cuvs/datasets/) - --output-filepath OUTPUT_FILEPATH - directory for PNG to be saved (default: /home/coder/cuvs) - --algorithms ALGORITHMS - plot only comma separated list of named algorithms. If parameters `groups` and `algo-groups are both undefined, then group `base` is plot by default - (default: None) - --groups GROUPS plot only comma separated groups of parameters (default: base) - --algo-groups ALGO_GROUPS, --algo-groups ALGO_GROUPS - add comma separated . to plot. Example usage: "--algo-groups=cuvs_cagra.large,hnswlib.large" (default: None) - -k COUNT, --count COUNT - the number of nearest neighbors to search for (default: 10) - -bs BATCH_SIZE, --batch-size BATCH_SIZE - number of query vectors to use in each query trial (default: 10000) - --build - --search - --x-scale X_SCALE Scale to use when drawing the X-axis. Typically linear, logit or a2 (default: linear) - --y-scale {linear,log,symlog,logit} - Scale to use when drawing the Y-axis (default: linear) - --x-start X_START Recall values to start the x-axis from (default: 0.8) - --mode {throughput,latency} - search mode whose Pareto frontier is used on the y-axis (default: throughput) - --time-unit {s,ms,us} - time unit to plot when mode is latency (default: ms) - --raw Show raw results (not just Pareto frontier) of mode arg (default: False) - -`mode`: plots pareto frontier of `throughput` or `latency` results exported in the previous step - -`algorithms`: plots all algorithms that it can find results for the specified `dataset`. By default, only `base` group will be plotted. - -`groups`: plot only specific groups of parameters configurations for an algorithm. Groups are defined in YAML configs (see `configuration`), and by default run `base` group - -`algo-groups`: this parameter is helpful to append any specific algorithm+group combination to plot results for in addition to all the arguments from `algorithms` and `groups`. It is of the format `.`, or for example, `cuvs_cagra.large` - Running the benchmarks ====================== @@ -576,7 +387,7 @@ Creating and customizing dataset configurations A single configuration will often define a set of algorithms, with associated index and search parameters, that can be generalize across datasets. We use YAML to define dataset specific and algorithm specific configurations. -A default `datasets.yaml` is provided by CUVS in `${CUVS_HOME}/python/cuvs-ann-bench/src/cuvs_bench/run/conf` with configurations available for several datasets. Here's a simple example entry for the `sift-128-euclidean` dataset: +A default `datasets.yaml` is provided by CUVS in `${CUVS_HOME}/python/cuvs_bench/src/cuvs_bench/run/conf` with configurations available for several datasets. Here's a simple example entry for the `sift-128-euclidean` dataset: .. code-block:: yaml diff --git a/examples/cpp/CMakeLists.txt b/examples/cpp/CMakeLists.txt index 9554207bb..b0d0ae9ee 100644 --- a/examples/cpp/CMakeLists.txt +++ b/examples/cpp/CMakeLists.txt @@ -48,13 +48,23 @@ add_executable(VAMANA_EXAMPLE src/vamana_example.cu) add_library(rmm_logger OBJECT) target_link_libraries(rmm_logger PRIVATE rmm::rmm_logger_impl) -target_link_libraries(CAGRA_EXAMPLE PRIVATE cuvs::cuvs $ rmm_logger) target_link_libraries( - CAGRA_PERSISTENT_EXAMPLE PRIVATE cuvs::cuvs $ Threads::Threads rmm_logger + CAGRA_EXAMPLE PRIVATE cuvs::cuvs $ rmm_logger ) target_link_libraries( - DYNAMIC_BATCHING_EXAMPLE PRIVATE cuvs::cuvs $ Threads::Threads rmm_logger + CAGRA_PERSISTENT_EXAMPLE PRIVATE cuvs::cuvs $ Threads::Threads + rmm_logger +) +target_link_libraries( + DYNAMIC_BATCHING_EXAMPLE PRIVATE cuvs::cuvs $ Threads::Threads + rmm_logger +) +target_link_libraries( + IVF_PQ_EXAMPLE PRIVATE cuvs::cuvs $ rmm_logger +) +target_link_libraries( + IVF_FLAT_EXAMPLE PRIVATE cuvs::cuvs $ rmm_logger +) +target_link_libraries( + VAMANA_EXAMPLE PRIVATE cuvs::cuvs $ rmm_logger ) -target_link_libraries(IVF_PQ_EXAMPLE PRIVATE cuvs::cuvs $ rmm_logger) -target_link_libraries(IVF_FLAT_EXAMPLE PRIVATE cuvs::cuvs $ rmm_logger) -target_link_libraries(VAMANA_EXAMPLE PRIVATE cuvs::cuvs $ rmm_logger) diff --git a/python/cuvs/CMakeLists.txt b/python/cuvs/CMakeLists.txt index c0990995f..f3feae9a7 100644 --- a/python/cuvs/CMakeLists.txt +++ b/python/cuvs/CMakeLists.txt @@ -31,18 +31,6 @@ project( C CXX CUDA ) -# ################################################################################################## -# * User Options -------------------------------------------------------------- - -option(FIND_CUVS_CPP "Search for existing CUVS C++ installations before defaulting to local files" - OFF -) -option(USE_CUDA_MATH_WHEELS "Use the CUDA math wheels instead of the system libraries" OFF) - -message( - "CUVS_PY: Searching for existing cuVS C/C++ installations before defaulting to local files: ${FIND_CUVS_CPP}" -) - # ################################################################################################## # * Process User Options ------------------------------------------------------ @@ -54,56 +42,14 @@ include(rapids-find) rapids_cpm_init() -# If the user requested it we attempt to find CUVS. -if(FIND_CUVS_CPP) - find_package(cuvs "${RAPIDS_VERSION}" REQUIRED COMPONENTS c_api) - include(../../cpp/cmake/thirdparty/get_dlpack.cmake) -else() - set(cuvs_FOUND OFF) -endif() +# --- cuVS ---# +find_package(cuvs "${RAPIDS_VERSION}" REQUIRED COMPONENTS c_api) -if(NOT cuvs_FOUND) - find_package(CUDAToolkit REQUIRED) +# --- dlpack ---# +include(../../cpp/cmake/thirdparty/get_dlpack.cmake) - set(BUILD_TESTS OFF) - set(BUILD_C_LIBRARY ON) - - # Statically link dependencies if building wheels - set(CUDA_STATIC_RUNTIME ON) - set(CUDA_STATIC_MATH_LIBRARIES ON) - set(CUVS_USE_RAFT_STATIC ON) - - if(CUDAToolkit_VERSION VERSION_GREATER_EQUAL 12.0) - set(CUDA_STATIC_MATH_LIBRARIES OFF) - elseif(USE_CUDA_MATH_WHEELS) - message(FATAL_ERROR "Cannot use CUDA math wheels with CUDA < 12.0") - endif() - - add_subdirectory(../../cpp cuvs-cpp EXCLUDE_FROM_ALL) - - if(NOT CUDA_STATIC_MATH_LIBRARIES AND USE_CUDA_MATH_WHEELS) - set(rpaths - "$ORIGIN/../nvidia/cublas/lib" - "$ORIGIN/../nvidia/curand/lib" - "$ORIGIN/../nvidia/cusolver/lib" - "$ORIGIN/../nvidia/cusparse/lib" - "$ORIGIN/../nvidia/nvjitlink/lib" - ) - set_property( - TARGET cuvs - PROPERTY INSTALL_RPATH ${rpaths} - APPEND - ) - set_property( - TARGET cuvs_c - PROPERTY INSTALL_RPATH ${rpaths} - APPEND - ) - endif() - - set(cython_lib_dir cuvs) - install(TARGETS cuvs cuvs_c DESTINATION ${cython_lib_dir}) -endif() +# ensure Cython targets can find dlpack headers (these do not come installed with with cuVS) +target_include_directories(cuvs::cuvs INTERFACE "$") # ################################################################################################## # * Build Cython artifacts ----------------------------------------------------- @@ -116,7 +62,3 @@ target_link_libraries(cuvs_rmm_logger PRIVATE rmm::rmm_logger_impl) add_subdirectory(cuvs/common) add_subdirectory(cuvs/distance) add_subdirectory(cuvs/neighbors) - -if(DEFINED cython_lib_dir) - rapids_cython_add_rpath_entries(TARGET cuvs PATHS "${cython_lib_dir}") -endif() diff --git a/python/cuvs/cuvs/__init__.py b/python/cuvs/cuvs/__init__.py index 9f0481cb7..1a41f0d76 100644 --- a/python/cuvs/cuvs/__init__.py +++ b/python/cuvs/cuvs/__init__.py @@ -13,4 +13,15 @@ # limitations under the License. # +# If libcuvs was installed as a wheel, we must request it to load the library +# symbols. Otherwise, we assume that the library was installed in a system path that ld +# can find. +try: + import libcuvs +except ModuleNotFoundError: + pass +else: + libcuvs.load_library() + del libcuvs + from cuvs._version import __git_commit__, __version__ diff --git a/python/cuvs/cuvs/common/CMakeLists.txt b/python/cuvs/cuvs/common/CMakeLists.txt index 361f2fafc..b0e1cb335 100644 --- a/python/cuvs/cuvs/common/CMakeLists.txt +++ b/python/cuvs/cuvs/common/CMakeLists.txt @@ -20,7 +20,7 @@ set(linked_libraries cuvs::cuvs cuvs::c_api) rapids_cython_create_modules( CXX SOURCE_FILES "${cython_sources}" - LINKED_LIBRARIES "${linked_libraries}" ASSOCIATED_TARGETS cuvs MODULE_PREFIX common_ + LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX common_ ) foreach(tgt IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) diff --git a/python/cuvs/cuvs/common/c_api.pxd b/python/cuvs/cuvs/common/c_api.pxd index f99fd5348..dae93d750 100644 --- a/python/cuvs/cuvs/common/c_api.pxd +++ b/python/cuvs/cuvs/common/c_api.pxd @@ -16,7 +16,7 @@ # cython: language_level=3 -from cuda.ccudart cimport cudaStream_t +from cuda.bindings.cyruntime cimport cudaStream_t from libc.stdint cimport uintptr_t diff --git a/python/cuvs/cuvs/common/resources.pyx b/python/cuvs/cuvs/common/resources.pyx index c0b72ae34..0edf53fc1 100644 --- a/python/cuvs/cuvs/common/resources.pyx +++ b/python/cuvs/cuvs/common/resources.pyx @@ -17,7 +17,7 @@ import functools -from cuda.ccudart cimport cudaStream_t +from cuda.bindings.cyruntime cimport cudaStream_t from cuvs.common.c_api cimport ( cuvsResources_t, diff --git a/python/cuvs/cuvs/distance/CMakeLists.txt b/python/cuvs/cuvs/distance/CMakeLists.txt index 514b08c43..ded07395c 100644 --- a/python/cuvs/cuvs/distance/CMakeLists.txt +++ b/python/cuvs/cuvs/distance/CMakeLists.txt @@ -20,7 +20,7 @@ set(linked_libraries cuvs::cuvs cuvs::c_api) rapids_cython_create_modules( CXX SOURCE_FILES "${cython_sources}" - LINKED_LIBRARIES "${linked_libraries}" ASSOCIATED_TARGETS cuvs MODULE_PREFIX distance_ + LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX distance_ ) foreach(tgt IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) diff --git a/python/cuvs/cuvs/neighbors/CMakeLists.txt b/python/cuvs/cuvs/neighbors/CMakeLists.txt index 031fd485e..b9161eefc 100644 --- a/python/cuvs/cuvs/neighbors/CMakeLists.txt +++ b/python/cuvs/cuvs/neighbors/CMakeLists.txt @@ -27,7 +27,7 @@ set(linked_libraries cuvs::cuvs cuvs::c_api) rapids_cython_create_modules( CXX SOURCE_FILES "${cython_sources}" - LINKED_LIBRARIES "${linked_libraries}" ASSOCIATED_TARGETS cuvs MODULE_PREFIX neighbors_refine_ + LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX neighbors_refine_ ) foreach(tgt IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) diff --git a/python/cuvs/cuvs/neighbors/brute_force/CMakeLists.txt b/python/cuvs/cuvs/neighbors/brute_force/CMakeLists.txt index 61eda649c..3c646f498 100644 --- a/python/cuvs/cuvs/neighbors/brute_force/CMakeLists.txt +++ b/python/cuvs/cuvs/neighbors/brute_force/CMakeLists.txt @@ -20,8 +20,7 @@ set(linked_libraries cuvs::cuvs cuvs::c_api) rapids_cython_create_modules( CXX SOURCE_FILES "${cython_sources}" - LINKED_LIBRARIES "${linked_libraries}" ASSOCIATED_TARGETS cuvs MODULE_PREFIX - neighbors_brute_force_ + LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX neighbors_brute_force_ ) foreach(tgt IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) diff --git a/python/cuvs/cuvs/neighbors/cagra/CMakeLists.txt b/python/cuvs/cuvs/neighbors/cagra/CMakeLists.txt index 1f40daab2..6cf0956a2 100644 --- a/python/cuvs/cuvs/neighbors/cagra/CMakeLists.txt +++ b/python/cuvs/cuvs/neighbors/cagra/CMakeLists.txt @@ -20,7 +20,7 @@ set(linked_libraries cuvs::cuvs cuvs::c_api) rapids_cython_create_modules( CXX SOURCE_FILES "${cython_sources}" - LINKED_LIBRARIES "${linked_libraries}" ASSOCIATED_TARGETS cuvs MODULE_PREFIX neighbors_cagra_ + LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX neighbors_cagra_ ) foreach(tgt IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) diff --git a/python/cuvs/cuvs/neighbors/filters/CMakeLists.txt b/python/cuvs/cuvs/neighbors/filters/CMakeLists.txt index a678852d9..43e008363 100644 --- a/python/cuvs/cuvs/neighbors/filters/CMakeLists.txt +++ b/python/cuvs/cuvs/neighbors/filters/CMakeLists.txt @@ -20,7 +20,7 @@ set(linked_libraries cuvs::cuvs cuvs::c_api) rapids_cython_create_modules( CXX SOURCE_FILES "${cython_sources}" - LINKED_LIBRARIES "${linked_libraries}" ASSOCIATED_TARGETS cuvs MODULE_PREFIX neighbors_prefilter_ + LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX neighbors_prefilter_ ) foreach(tgt IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) diff --git a/python/cuvs/cuvs/neighbors/hnsw/CMakeLists.txt b/python/cuvs/cuvs/neighbors/hnsw/CMakeLists.txt index 8351916e6..c33313c3c 100644 --- a/python/cuvs/cuvs/neighbors/hnsw/CMakeLists.txt +++ b/python/cuvs/cuvs/neighbors/hnsw/CMakeLists.txt @@ -20,7 +20,7 @@ set(linked_libraries cuvs::cuvs cuvs::c_api) rapids_cython_create_modules( CXX SOURCE_FILES "${cython_sources}" - LINKED_LIBRARIES "${linked_libraries}" ASSOCIATED_TARGETS cuvs MODULE_PREFIX neighbors_hnsw_ + LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX neighbors_hnsw_ ) foreach(tgt IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) diff --git a/python/cuvs/cuvs/neighbors/ivf_flat/CMakeLists.txt b/python/cuvs/cuvs/neighbors/ivf_flat/CMakeLists.txt index f5663cdaa..eadb8934c 100644 --- a/python/cuvs/cuvs/neighbors/ivf_flat/CMakeLists.txt +++ b/python/cuvs/cuvs/neighbors/ivf_flat/CMakeLists.txt @@ -20,7 +20,7 @@ set(linked_libraries cuvs::cuvs cuvs::c_api) rapids_cython_create_modules( CXX SOURCE_FILES "${cython_sources}" - LINKED_LIBRARIES "${linked_libraries}" ASSOCIATED_TARGETS cuvs MODULE_PREFIX neighbors_ivf_flat_ + LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX neighbors_ivf_flat_ ) foreach(tgt IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) diff --git a/python/cuvs/cuvs/neighbors/ivf_pq/CMakeLists.txt b/python/cuvs/cuvs/neighbors/ivf_pq/CMakeLists.txt index a24320ded..df61793b8 100644 --- a/python/cuvs/cuvs/neighbors/ivf_pq/CMakeLists.txt +++ b/python/cuvs/cuvs/neighbors/ivf_pq/CMakeLists.txt @@ -20,7 +20,7 @@ set(linked_libraries cuvs::cuvs cuvs::c_api) rapids_cython_create_modules( CXX SOURCE_FILES "${cython_sources}" - LINKED_LIBRARIES "${linked_libraries}" ASSOCIATED_TARGETS cuvs MODULE_PREFIX neighbors_pq_ + LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX neighbors_pq_ ) foreach(tgt IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) diff --git a/python/cuvs/cuvs/test/test_distance.py b/python/cuvs/cuvs/test/test_distance.py index 483d5d201..370dd773a 100644 --- a/python/cuvs/cuvs/test/test_distance.py +++ b/python/cuvs/cuvs/test/test_distance.py @@ -21,6 +21,7 @@ from cuvs.distance import pairwise_distance +@pytest.mark.parametrize("times", range(20)) @pytest.mark.parametrize("n_rows", [50, 100]) @pytest.mark.parametrize("n_cols", [10, 50]) @pytest.mark.parametrize( @@ -43,7 +44,7 @@ @pytest.mark.parametrize("inplace", [True, False]) @pytest.mark.parametrize("order", ["F", "C"]) @pytest.mark.parametrize("dtype", [np.float32, np.float64, np.float16]) -def test_distance(n_rows, n_cols, inplace, order, metric, dtype): +def test_distance(n_rows, n_cols, inplace, order, metric, dtype, times): input1 = np.random.random_sample((n_rows, n_cols)) input1 = np.asarray(input1, order=order).astype(dtype) @@ -79,7 +80,5 @@ def test_distance(n_rows, n_cols, inplace, order, metric, dtype): actual = output_device.copy_to_host() tol = 1e-3 - if np.issubdtype(dtype, np.float16): - tol = 1e-1 assert np.allclose(expected, actual, atol=tol, rtol=tol) diff --git a/python/cuvs/pyproject.toml b/python/cuvs/pyproject.toml index 155e454a8..30658623b 100644 --- a/python/cuvs/pyproject.toml +++ b/python/cuvs/pyproject.toml @@ -33,10 +33,6 @@ requires-python = ">=3.10" dependencies = [ "cuda-python", "numpy>=1.23,<3.0a0", - "nvidia-cublas", - "nvidia-curand", - "nvidia-cusolver", - "nvidia-cusparse", "pylibraft==25.2.*,>=0.0.0a0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. classifiers = [ @@ -59,12 +55,6 @@ test = [ Homepage = "https://github.com/rapidsai/cuvs" Documentation = "https://docs.rapids.ai/api/cuvs/stable/" -[tool.setuptools] -license-files = ["LICENSE"] - -[tool.setuptools.dynamic] -version = {file = "cuvs/VERSION"} - [tool.isort] line_length = 79 multi_line_output = 3 @@ -127,18 +117,23 @@ requires = [ "cmake>=3.26.4,!=3.30.0", "cuda-python", "cython>=3.0.0", + "libcuvs==25.2.*,>=0.0.0a0", + "libraft==25.2.*,>=0.0.0a0", + "librmm==25.2.*,>=0.0.0a0", "ninja", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. build-backend = "scikit_build_core.build" dependencies-file = "../../dependencies.yaml" -matrix-entry = "cuda_suffixed=true;use_cuda_wheels=true" +matrix-entry = "cuda_suffixed=true" [tool.pydistcheck] select = [ - # NOTE: size threshold is managed via CLI args in CI scripts "distro-too-large-compressed", ] +# PyPI limit is 100 MiB, fail CI before we get too close to that +max_allowed_size_compressed = '75M' + [tool.pytest.ini_options] filterwarnings = [ "error", diff --git a/python/libcuvs/CMakeLists.txt b/python/libcuvs/CMakeLists.txt new file mode 100644 index 000000000..569652b71 --- /dev/null +++ b/python/libcuvs/CMakeLists.txt @@ -0,0 +1,69 @@ +# ============================================================================= +# Copyright (c) 2025, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations under +# the License. +# ============================================================================= + +cmake_minimum_required(VERSION 3.26.4 FATAL_ERROR) + +include(../../rapids_config.cmake) + +include(rapids-cuda) +rapids_cuda_init_architectures(libcuvs-python) + +project( + libcuvs-python + VERSION "${RAPIDS_VERSION}" + LANGUAGES CXX CUDA +) + +# Check if cuVS is already available. If so, it is the user's responsibility to ensure that the +# CMake package is also available at build time of the Python cuvs package. +find_package(cuvs "${RAPIDS_VERSION}") + +if(cuvs_FOUND) + return() +endif() + +unset(cuvs_FOUND) + +# --- CUDA --- # +set(CUDA_STATIC_RUNTIME ON) +set(CUDA_STATIC_MATH_LIBRARIES OFF) + +# --- RAFT ---# +set(CUVS_USE_RAFT_STATIC OFF) + +# --- cuVS ---# +set(BUILD_TESTS OFF) +set(BUILD_C_LIBRARY ON) +set(CUVS_COMPILE_DYNAMIC_ONLY ON) + +add_subdirectory(../../cpp cuvs-cpp) + +# assumes libcuvs.so is installed 2 levels deep, e.g. site-packages/libcuvs/lib64/libcuvs.so +set(rpaths + "$ORIGIN/../../nvidia/cublas/lib" + "$ORIGIN/../../nvidia/curand/lib" + "$ORIGIN/../../nvidia/cusolver/lib" + "$ORIGIN/../../nvidia/cusparse/lib" + "$ORIGIN/../../nvidia/nvjitlink/lib" +) +set_property( + TARGET cuvs + PROPERTY INSTALL_RPATH ${rpaths} + APPEND +) +set_property( + TARGET cuvs_c + PROPERTY INSTALL_RPATH ${rpaths} + APPEND +) diff --git a/python/libcuvs/LICENSE b/python/libcuvs/LICENSE new file mode 120000 index 000000000..30cff7403 --- /dev/null +++ b/python/libcuvs/LICENSE @@ -0,0 +1 @@ +../../LICENSE \ No newline at end of file diff --git a/python/libcuvs/README.md b/python/libcuvs/README.md new file mode 120000 index 000000000..fe8400541 --- /dev/null +++ b/python/libcuvs/README.md @@ -0,0 +1 @@ +../../README.md \ No newline at end of file diff --git a/python/libcuvs/libcuvs/VERSION b/python/libcuvs/libcuvs/VERSION new file mode 120000 index 000000000..d62dc733e --- /dev/null +++ b/python/libcuvs/libcuvs/VERSION @@ -0,0 +1 @@ +../../../VERSION \ No newline at end of file diff --git a/python/libcuvs/libcuvs/__init__.py b/python/libcuvs/libcuvs/__init__.py new file mode 100644 index 000000000..2d3a86015 --- /dev/null +++ b/python/libcuvs/libcuvs/__init__.py @@ -0,0 +1,16 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from libcuvs._version import __git_commit__, __version__ +from libcuvs.load import load_library diff --git a/python/libcuvs/libcuvs/_version.py b/python/libcuvs/libcuvs/_version.py new file mode 100644 index 000000000..530bf8bea --- /dev/null +++ b/python/libcuvs/libcuvs/_version.py @@ -0,0 +1,33 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import importlib.resources + +__version__ = ( + importlib.resources.files(__package__) + .joinpath("VERSION") + .read_text() + .strip() +) +try: + __git_commit__ = ( + importlib.resources.files(__package__) + .joinpath("GIT_COMMIT") + .read_text() + .strip() + ) +except FileNotFoundError: + __git_commit__ = "" + +__all__ = ["__git_commit__", "__version__"] diff --git a/python/libcuvs/libcuvs/load.py b/python/libcuvs/libcuvs/load.py new file mode 100644 index 000000000..a9c6a9325 --- /dev/null +++ b/python/libcuvs/libcuvs/load.py @@ -0,0 +1,100 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import ctypes +import os + +# Loading with RTLD_LOCAL adds the library itself to the loader's +# loaded library cache without loading any symbols into the global +# namespace. This allows libraries that express a dependency on +# this library to be loaded later and successfully satisfy this dependency +# without polluting the global symbol table with symbols from +# libcuvs that could conflict with symbols from other DSOs. +PREFERRED_LOAD_FLAG = ctypes.RTLD_LOCAL + + +def _load_system_installation(soname: str): + """Try to dlopen() the library indicated by ``soname`` + Raises ``OSError`` if library cannot be loaded. + """ + return ctypes.CDLL(soname, PREFERRED_LOAD_FLAG) + + +def _load_wheel_installation(soname: str): + """Try to dlopen() the library indicated by ``soname`` + Returns ``None`` if the library cannot be loaded. + """ + if os.path.isfile( + lib := os.path.join(os.path.dirname(__file__), "lib64", soname) + ): + return ctypes.CDLL(lib, PREFERRED_LOAD_FLAG) + return None + + +def load_library(): + """Dynamically load libcuvs.so and its dependencies""" + try: + # libraft must be loaded before libcuvs because libcuvs + # references its symbols + import libraft + + libraft.load_library() + except ModuleNotFoundError: + # 'libcuvs' has a runtime dependency on 'libraft'. However, + # that dependency might be satisfied by the 'libraft' conda package + # (which does not have any Python modules), instead of the + # 'libraft' wheel. + # + # In that situation, assume that 'libraft.so' is in a place where + # the loader can find it. + pass + + prefer_system_installation = ( + os.getenv("RAPIDS_LIBCUVS_PREFER_SYSTEM_LIBRARY", "false").lower() + != "false" + ) + + libs_to_return = [] + for soname in ["libcuvs.so", "libcuvs_c.so"]: + libcuvs_lib = None + if prefer_system_installation: + # Prefer a system library if one is present to + # avoid clobbering symbols that other packages might expect, + # but if no other library is present use the one in the wheel. + try: + libcuvs_lib = _load_system_installation(soname) + except OSError: + libcuvs_lib = _load_wheel_installation(soname) + else: + # Prefer the libraries bundled in this package. If they aren't + # found (which might be the case in builds where the library was + # prebuilt before packaging the wheel), look for a system + # installation. + try: + libcuvs_lib = _load_wheel_installation(soname) + if libcuvs_lib is None: + libcuvs_lib = _load_system_installation(soname) + except OSError: + # If none of the searches above succeed, just silently return + # None and rely on other mechanisms (like RPATHs on other DSOs) + # to help the loader find the library. + pass + if libcuvs_lib: + libs_to_return.append(libcuvs_lib) + + # The caller almost never needs to do anything with this library, but no + # harm in offering the option since this object at least provides a handle + # to inspect where libcuvs was loaded from. + return libs_to_return diff --git a/python/libcuvs/pyproject.toml b/python/libcuvs/pyproject.toml new file mode 100644 index 000000000..28443b782 --- /dev/null +++ b/python/libcuvs/pyproject.toml @@ -0,0 +1,108 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +[build-system] +requires = [ + "rapids-build-backend>=0.3.0,<0.4.0.dev0", + "scikit-build-core[pyproject]>=0.10.0", +] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. +build-backend = "rapids_build_backend.build" + +[project] +name = "libcuvs" +dynamic = ["version"] +description = "cuVS: Vector Search on the GPU (C++)" +readme = { file = "README.md", content-type = "text/markdown" } +authors = [ + { name = "NVIDIA Corporation" }, +] +license = { text = "Apache 2.0" } +requires-python = ">=3.10" +dependencies = [ + "libraft==25.2.*,>=0.0.0a0", + "nvidia-cublas", + "nvidia-curand", + "nvidia-cusolver", + "nvidia-cusparse", +] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. +classifiers = [ + "Intended Audience :: Developers", +] + +[project.urls] +Homepage = "https://github.com/rapidsai/cuvs" +Documentation = "https://docs.rapids.ai/api/cuvs/stable/" + +[project.entry-points."cmake.prefix"] +libcuvs = "libcuvs" + +[tool.isort] +line_length = 79 +multi_line_output = 3 +include_trailing_comma = true +force_grid_wrap = 0 +combine_as_imports = true +order_by_type = true +known_first_party = [ + "libcuvs", +] +skip = [ + "thirdparty", + ".eggs", + ".git", + ".hg", + ".mypy_cache", + ".tox", + ".venv", + "_build", + "buck-out", + "build", + "dist", + "__init__.py", +] + +[tool.scikit-build] +build-dir = "build/{wheel_tag}" +cmake.build-type = "Release" +cmake.version = "CMakeLists.txt" +minimum-version = "build-system.requires" +ninja.make-fallback = true +sdist.reproducible = true +wheel.install-dir = "libcuvs" +wheel.packages = ["libcuvs"] +wheel.py-api = "py3" + +[tool.scikit-build.metadata.version] +provider = "scikit_build_core.metadata.regex" +input = "libcuvs/VERSION" +regex = "(?P.*)" + +[tool.rapids-build-backend] +build-backend = "scikit_build_core.build" +requires = [ + "cmake>=3.26.4,!=3.30.0", + "libraft==25.2.*,>=0.0.0a0", + "librmm==25.2.*,>=0.0.0a0", + "ninja", +] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. +dependencies-file = "../../dependencies.yaml" +matrix-entry = "cuda_suffixed=true;use_cuda_wheels=true" + +[tool.pydistcheck] +select = [ + "distro-too-large-compressed", +] + +# detect when package size grows significantly +max_allowed_size_compressed = '1.1G'