diff --git a/.devcontainer/cuda11.8-pip/devcontainer.json b/.devcontainer/cuda11.8-pip/devcontainer.json
index 531debcf8..f83de4c22 100644
--- a/.devcontainer/cuda11.8-pip/devcontainer.json
+++ b/.devcontainer/cuda11.8-pip/devcontainer.json
@@ -5,7 +5,7 @@
     "args": {
       "CUDA": "11.8",
       "PYTHON_PACKAGE_MANAGER": "pip",
-      "BASE": "rapidsai/devcontainers:25.04-cpp-cuda11.8-ucx1.17.0-openmpi-ubuntu22.04"
+      "BASE": "rapidsai/devcontainers:25.04-cpp-cuda11.8-ucx1.18.0-openmpi-ubuntu22.04"
     }
   },
   "runArgs": [
diff --git a/.devcontainer/cuda12.5-conda/devcontainer.json b/.devcontainer/cuda12.8-conda/devcontainer.json
similarity index 91%
rename from .devcontainer/cuda12.5-conda/devcontainer.json
rename to .devcontainer/cuda12.8-conda/devcontainer.json
index 145363009..4b374a218 100644
--- a/.devcontainer/cuda12.5-conda/devcontainer.json
+++ b/.devcontainer/cuda12.8-conda/devcontainer.json
@@ -3,7 +3,7 @@
     "context": "${localWorkspaceFolder}/.devcontainer",
     "dockerfile": "${localWorkspaceFolder}/.devcontainer/Dockerfile",
     "args": {
-      "CUDA": "12.5",
+      "CUDA": "12.8",
       "PYTHON_PACKAGE_MANAGER": "conda",
       "BASE": "rapidsai/devcontainers:25.04-cpp-mambaforge-ubuntu22.04"
     }
@@ -11,7 +11,7 @@
   "runArgs": [
     "--rm",
     "--name",
-    "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-25.04-cuda12.5-conda"
+    "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-25.04-cuda12.8-conda"
   ],
   "hostRequirements": {"gpu": "optional"},
   "features": {
@@ -20,7 +20,7 @@
   "overrideFeatureInstallOrder": [
     "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils"
   ],
-  "initializeCommand": ["/bin/bash", "-c", "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config,conda/pkgs,conda/${localWorkspaceFolderBasename}-cuda12.5-envs}"],
+  "initializeCommand": ["/bin/bash", "-c", "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config,conda/pkgs,conda/${localWorkspaceFolderBasename}-cuda12.8-envs}"],
   "postAttachCommand": ["/bin/bash", "-c", "if [ ${CODESPACES:-false} = 'true' ]; then . devcontainer-utils-post-attach-command; . rapids-post-attach-command; fi"],
   "workspaceFolder": "/home/coder",
   "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/cuvs,type=bind,consistency=consistent",
@@ -29,7 +29,7 @@
     "source=${localWorkspaceFolder}/../.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
     "source=${localWorkspaceFolder}/../.config,target=/home/coder/.config,type=bind,consistency=consistent",
     "source=${localWorkspaceFolder}/../.conda/pkgs,target=/home/coder/.conda/pkgs,type=bind,consistency=consistent",
-    "source=${localWorkspaceFolder}/../.conda/${localWorkspaceFolderBasename}-cuda12.5-envs,target=/home/coder/.conda/envs,type=bind,consistency=consistent"
+    "source=${localWorkspaceFolder}/../.conda/${localWorkspaceFolderBasename}-cuda12.8-envs,target=/home/coder/.conda/envs,type=bind,consistency=consistent"
   ],
   "customizations": {
     "vscode": {
diff --git a/.devcontainer/cuda12.5-pip/devcontainer.json b/.devcontainer/cuda12.8-pip/devcontainer.json
similarity index 88%
rename from .devcontainer/cuda12.5-pip/devcontainer.json
rename to .devcontainer/cuda12.8-pip/devcontainer.json
index 572fb431a..1548c63ac 100644
--- a/.devcontainer/cuda12.5-pip/devcontainer.json
+++ b/.devcontainer/cuda12.8-pip/devcontainer.json
@@ -3,20 +3,20 @@
     "context": "${localWorkspaceFolder}/.devcontainer",
     "dockerfile": "${localWorkspaceFolder}/.devcontainer/Dockerfile",
     "args": {
-      "CUDA": "12.5",
+      "CUDA": "12.8",
       "PYTHON_PACKAGE_MANAGER": "pip",
-      "BASE": "rapidsai/devcontainers:25.04-cpp-cuda12.5-ucx1.17.0-openmpi-ubuntu22.04"
+      "BASE": "rapidsai/devcontainers:25.04-cpp-cuda12.8-ucx1.18.0-openmpi-ubuntu22.04"
     }
   },
   "runArgs": [
     "--rm",
     "--name",
-    "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-25.04-cuda12.5-pip"
+    "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-25.04-cuda12.8-pip"
   ],
   "hostRequirements": {"gpu": "optional"},
   "features": {
     "ghcr.io/rapidsai/devcontainers/features/cuda:25.4": {
-      "version": "12.5",
+      "version": "12.8",
       "installcuBLAS": true,
       "installcuSOLVER": true,
       "installcuRAND": true,
@@ -29,7 +29,7 @@
     "ghcr.io/rapidsai/devcontainers/features/cuda",
     "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils"
   ],
-  "initializeCommand": ["/bin/bash", "-c", "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config/pip,local/share/${localWorkspaceFolderBasename}-cuda12.5-venvs}"],
+  "initializeCommand": ["/bin/bash", "-c", "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config/pip,local/share/${localWorkspaceFolderBasename}-cuda12.8-venvs}"],
   "postAttachCommand": ["/bin/bash", "-c", "if [ ${CODESPACES:-false} = 'true' ]; then . devcontainer-utils-post-attach-command; . rapids-post-attach-command; fi"],
   "workspaceFolder": "/home/coder",
   "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/cuvs,type=bind,consistency=consistent",
@@ -37,7 +37,7 @@
     "source=${localWorkspaceFolder}/../.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
     "source=${localWorkspaceFolder}/../.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
     "source=${localWorkspaceFolder}/../.config,target=/home/coder/.config,type=bind,consistency=consistent",
-    "source=${localWorkspaceFolder}/../.local/share/${localWorkspaceFolderBasename}-cuda12.5-venvs,target=/home/coder/.local/share/venvs,type=bind,consistency=consistent"
+    "source=${localWorkspaceFolder}/../.local/share/${localWorkspaceFolderBasename}-cuda12.8-venvs,target=/home/coder/.local/share/venvs,type=bind,consistency=consistent"
   ],
   "customizations": {
     "vscode": {
diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index a4d10ef46..d7e27e65d 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -124,7 +124,7 @@ jobs:
       build_type: pull-request
       node_type: "gpu-v100-latest-1"
       arch: "amd64"
-      container_image: "rapidsai/ci-conda:latest"
+      container_image: "rapidsai/ci-conda:cuda12.8.0-ubuntu24.04-py3.12"
       run_script: "ci/build_docs.sh"
   rust-build:
     needs: conda-cpp-build
@@ -165,7 +165,7 @@ jobs:
     uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@branch-25.04
     with:
       arch: '["amd64"]'
-      cuda: '["12.5"]'
+      cuda: '["12.8"]'
       build_command: |
         sccache -z;
         build-all --verbose;
diff --git a/.gitignore b/.gitignore
index da6eb07f6..a70ca197f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -83,3 +83,6 @@ ivf_pq_index
 # cuvs_bench
 datasets/
 /*.json
+
+# java
+.classpath
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 240f82be6..1124d3814 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,6 +1,11 @@
-# Copyright (c) 2022-2023, NVIDIA CORPORATION.
+# Copyright (c) 2022-2025, NVIDIA CORPORATION.
 
 repos:
+      - repo: https://github.com/pre-commit/pre-commit-hooks
+        rev: v5.0.0
+        hooks:
+              - id: trailing-whitespace
+              - id: end-of-file-fixer
       - repo: https://github.com/PyCQA/isort
         rev: 5.12.0
         hooks:
@@ -81,7 +86,7 @@ repos:
                 exclude: .*/thirdparty/.*
               - id: include-check
                 name: include-check
-                entry: python ./cpp/scripts/include_checker.py cpp/bench cpp/include cpp/test
+                entry: python ./cpp/scripts/include_checker.py cpp/bench cpp/include cpp/tests
                 pass_filenames: false
                 language: python
                 additional_dependencies: [gitpython]
diff --git a/README.md b/README.md
index 7ee0875dd..abbfb2696 100755
--- a/README.md
+++ b/README.md
@@ -29,9 +29,9 @@ cuVS contains state-of-the-art implementations of several algorithms for running
 
 Vector search is an information retrieval method that has been growing in popularity over the past few  years, partly because of the rising importance of multimedia embeddings created from unstructured data and the need to perform semantic search on the embeddings to find items which are semantically similar to each other.
 
-Vector search is also used in _data mining and machine learning_ tasks and comprises an important step in many _clustering_ and _visualization_ algorithms like [UMAP](https://arxiv.org/abs/2008.00325), [t-SNE](https://lvdmaaten.github.io/tsne/), K-means, and [HDBSCAN](https://hdbscan.readthedocs.io/en/latest/how_hdbscan_works.html). 
+Vector search is also used in _data mining and machine learning_ tasks and comprises an important step in many _clustering_ and _visualization_ algorithms like [UMAP](https://arxiv.org/abs/2008.00325), [t-SNE](https://lvdmaaten.github.io/tsne/), K-means, and [HDBSCAN](https://hdbscan.readthedocs.io/en/latest/how_hdbscan_works.html).
 
-Finally, faster vector search enables interactions between dense vectors and graphs. Converting a pile of dense vectors into nearest neighbors graphs unlocks the entire world of graph analysis algorithms, such as those found in [GraphBLAS](https://graphblas.org/) and [cuGraph](https://github.com/rapidsai/cugraph). 
+Finally, faster vector search enables interactions between dense vectors and graphs. Converting a pile of dense vectors into nearest neighbors graphs unlocks the entire world of graph analysis algorithms, such as those found in [GraphBLAS](https://graphblas.org/) and [cuGraph](https://github.com/rapidsai/cugraph).
 
 Below are some common use-cases for vector search
 
@@ -45,7 +45,7 @@ Below are some common use-cases for vector search
   - Audio search
   - Molecular search
   - Model training
-  
+
 
 - ### Data mining
   - Clustering algorithms
@@ -71,7 +71,7 @@ In addition to the items above, cuVS takes on the burden of keeping non-trivial
 
 ## cuVS Technology Stack
 
-cuVS is built on top of the RAPIDS RAFT library of high performance machine learning primitives and provides all the necessary routines for vector search and clustering on the GPU. 
+cuVS is built on top of the RAPIDS RAFT library of high performance machine learning primitives and provides all the necessary routines for vector search and clustering on the GPU.
 
 ![cuVS is built on top of low-level CUDA libraries and provides many important routines that enable vector search and clustering on the GPU](img/tech_stack.png "cuVS Technology Stack")
 
@@ -103,7 +103,7 @@ pip install cuvs-cu11 --extra-index-url=https://pypi.nvidia.com
 And CUDA 12 packages:
 ```bash
 pip install cuvs-cu12 --extra-index-url=https://pypi.nvidia.com
-```    
+```
 
 ### Nightlies
 If installing a version that has not yet been released, the `rapidsai` channel can be replaced with `rapidsai-nightly`:
@@ -169,7 +169,7 @@ cuvsCagraIndexParamsDestroy(index_params);
 cuvsResourcesDestroy(res);
 ```
 
-For more code examples of the C APIs, including drop-in Cmake project templates, please refer to the [C examples](https://github.com/rapidsai/cuvs/tree/branch-24.10/examples/c)
+For more code examples of the C APIs, including drop-in Cmake project templates, please refer to the [C examples](https://github.com/rapidsai/cuvs/tree/branch-25.04/examples/c)
 
 ### Rust API
 
@@ -232,7 +232,7 @@ fn cagra_example() -> Result<()> {
 }
 ```
 
-For more code examples of the Rust APIs, including a drop-in project templates, please refer to the [Rust examples](https://github.com/rapidsai/cuvs/tree/branch-24.10/examples/rust).
+For more code examples of the Rust APIs, including a drop-in project templates, please refer to the [Rust examples](https://github.com/rapidsai/cuvs/tree/branch-25.04/examples/rust).
 
 ## Contributing
 
@@ -240,7 +240,7 @@ If you are interested in contributing to the cuVS library, please read our [Cont
 
 ## References
 
-For the interested reader, many of the accelerated implementations in cuVS are also based on research papers which can provide a lot more background. We also ask you to please cite the corresponding algorithms by referencing them in your own research. 
+For the interested reader, many of the accelerated implementations in cuVS are also based on research papers which can provide a lot more background. We also ask you to please cite the corresponding algorithms by referencing them in your own research.
 - [CAGRA: Highly Parallel Graph Construction and Approximate Nearest Neighbor Search](https://arxiv.org/abs/2308.15136)
 - [Top-K Algorithms on GPU: A Comprehensive Study and New Methods](https://dl.acm.org/doi/10.1145/3581784.3607062)
 - [Fast K-NN Graph Construction by GPU Based NN-Descent](https://dl.acm.org/doi/abs/10.1145/3459637.3482344?casa_token=O_nan1B1F5cAAAAA:QHWDEhh0wmd6UUTLY9_Gv6c3XI-5DXM9mXVaUXOYeStlpxTPmV3nKvABRfoivZAaQ3n8FWyrkWw>)
diff --git a/build.sh b/build.sh
index 3b9a9a3a8..89e1b5a33 100755
--- a/build.sh
+++ b/build.sh
@@ -18,7 +18,7 @@ ARGS=$*
 # scripts, and that this script resides in the repo dir!
 REPODIR=$(cd $(dirname $0); pwd)
 
-VALIDARGS="clean libcuvs python rust docs tests bench-ann examples --uninstall  -v -g -n --compile-static-lib --allgpuarch --no-mg --no-cpu --cpu-only --no-shared-libs --no-nvtx --show_depr_warn --incl-cache-stats --time -h"
+VALIDARGS="clean libcuvs python rust java docs tests bench-ann examples --uninstall  -v -g -n --compile-static-lib --allgpuarch --no-mg --no-cpu --cpu-only --no-shared-libs --no-nvtx --show_depr_warn --incl-cache-stats --time -h"
 HELP="$0 [<target> ...] [<flag> ...] [--cmake-args=\"<args>\"] [--cache-tool=<tool>] [--limit-tests=<targets>] [--limit-bench-ann=<targets>] [--build-metrics=<filename>]
  where <target> is:
    clean            - remove all existing build artifacts and configuration (start over)
@@ -26,6 +26,7 @@ HELP="$0 [<target> ...] [<flag> ...] [--cmake-args=\"<args>\"] [--cache-tool=<to
                       around the C++ code.
    python           - build the cuvs Python package
    rust             - build the cuvs Rust bindings
+   java             - build the cuvs Java bindings
    docs             - build the documentation
    tests            - build the tests
    bench-ann        - build end-to-end ann benchmarks
@@ -61,7 +62,8 @@ SPHINX_BUILD_DIR=${REPODIR}/docs
 DOXYGEN_BUILD_DIR=${REPODIR}/cpp/doxygen
 PYTHON_BUILD_DIR=${REPODIR}/python/cuvs/_skbuild
 RUST_BUILD_DIR=${REPODIR}/rust/target
-BUILD_DIRS="${LIBCUVS_BUILD_DIR} ${PYTHON_BUILD_DIR} ${RUST_BUILD_DIR}"
+JAVA_BUILD_DIR=${REPODIR}/java/cuvs-java/target
+BUILD_DIRS="${LIBCUVS_BUILD_DIR} ${PYTHON_BUILD_DIR} ${RUST_BUILD_DIR} ${JAVA_BUILD_DIR}"
 
 # Set defaults for vars modified by flags to this script
 CMAKE_LOG_LEVEL=""
@@ -445,6 +447,15 @@ if (( ${NUMARGS} == 0 )) || hasArg rust; then
     cargo test
 fi
 
+# Build the cuvs Java bindings
+if (( ${NUMARGS} == 0 )) || hasArg java; then
+    if ! hasArg libcuvs; then
+        echo "Please add 'libcuvs' to this script's arguments (ex. './build.sh libcuvs java') if libcuvs libraries are not already built"
+    fi
+    cd ${REPODIR}/java
+    ./build.sh
+fi
+
 export RAPIDS_VERSION="$(sed -E -e 's/^([0-9]{2})\.([0-9]{2})\.([0-9]{2}).*$/\1.\2.\3/' "${REPODIR}/VERSION")"
 export RAPIDS_VERSION_MAJOR_MINOR="$(sed -E -e 's/^([0-9]{2})\.([0-9]{2})\.([0-9]{2}).*$/\1.\2/' "${REPODIR}/VERSION")"
 
diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh
index 7562035a9..3385d4b61 100755
--- a/ci/release/update-version.sh
+++ b/ci/release/update-version.sh
@@ -28,6 +28,7 @@ NEXT_UCXX_SHORT_TAG="$(curl -sL https://version.gpuci.io/rapids/${NEXT_SHORT_TAG
 # Need to distutils-normalize the original version
 NEXT_SHORT_TAG_PEP440=$(python -c "from packaging.version import Version; print(Version('${NEXT_SHORT_TAG}'))")
 NEXT_UCXX_SHORT_TAG_PEP440=$(python -c "from packaging.version import Version; print(Version('${NEXT_UCXX_SHORT_TAG}'))")
+PATCH_PEP440=$(python -c "from packaging.version import Version; print(Version('${NEXT_PATCH}'))")
 
 echo "Preparing release $CURRENT_TAG => $NEXT_FULL_TAG"
 
@@ -78,6 +79,10 @@ done
 sed_runner "/rapidsai\/raft/ s|branch-[0-9][0-9].[0-9][0-9]|branch-${NEXT_SHORT_TAG}|g" docs/source/developer_guide.md
 
 sed_runner "s|=[0-9][0-9].[0-9][0-9]|=${NEXT_SHORT_TAG}|g" README.md
+sed_runner "s|branch-[0-9][0-9].[0-9][0-9]|branch-${NEXT_SHORT_TAG}|g" README.md
+
+# references to license files
+sed_runner "s|branch-[0-9][0-9].[0-9][0-9]|branch-${NEXT_SHORT_TAG}|g" python/cuvs_bench/cuvs_bench/plot/__main__.py
 
 # rust can't handle leading 0's in the major/minor/patch version - remove
 NEXT_FULL_RUST_TAG=$(printf "%d.%d.%d" $((10#$NEXT_MAJOR)) $((10#$NEXT_MINOR)) $((10#$NEXT_PATCH)))
@@ -92,3 +97,10 @@ find .devcontainer/ -type f -name devcontainer.json -print0 | while IFS= read -r
     sed_runner "s@rapidsai/devcontainers/features/rapids-build-utils:[0-9.]*@rapidsai/devcontainers/features/rapids-build-utils:${NEXT_SHORT_TAG_PEP440}@" "${filename}"
     sed_runner "s@rapids-\${localWorkspaceFolderBasename}-${CURRENT_SHORT_TAG}@rapids-\${localWorkspaceFolderBasename}-${NEXT_SHORT_TAG}@g" "${filename}"
 done
+
+# Update Java API version
+NEXT_FULL_JAVA_TAG="${NEXT_SHORT_TAG}.${PATCH_PEP440}"
+sed_runner "s/VERSION=\".*\"/VERSION=\"${NEXT_FULL_JAVA_TAG}\"/g" java/build.sh
+for FILE in java/*/pom.xml; do
+  sed_runner "/<!--CUVS_JAVA#VERSION_UPDATE_MARKER_START-->.*<!--CUVS_JAVA#VERSION_UPDATE_MARKER_END-->/s//<!--CUVS_JAVA#VERSION_UPDATE_MARKER_START--><version>${NEXT_FULL_JAVA_TAG}<\/version><!--CUVS_JAVA#VERSION_UPDATE_MARKER_END-->/g" "${FILE}"
+done
diff --git a/ci/run_cuvs_pytests.sh b/ci/run_cuvs_pytests.sh
index 57df9af94..4de8927b1 100755
--- a/ci/run_cuvs_pytests.sh
+++ b/ci/run_cuvs_pytests.sh
@@ -6,4 +6,4 @@ set -euo pipefail
 # Support invoking run_pytests.sh outside the script directory
 cd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"/../python/cuvs/cuvs
 
-pytest --cache-clear --verbose "$@" test
+pytest --cache-clear --verbose "$@" tests
diff --git a/ci/test_python.sh b/ci/test_python.sh
index b9c394062..e8749fe79 100755
--- a/ci/test_python.sh
+++ b/ci/test_python.sh
@@ -52,7 +52,7 @@ pytest \
  --cov=cuvs \
  --cov-report=xml:"${RAPIDS_COVERAGE_DIR}/cuvs-coverage.xml" \
  --cov-report=term \
- test
+ tests
 
 rapids-logger "Test script exiting with value: $EXITCODE"
 exit ${EXITCODE}
diff --git a/ci/test_wheel_cuvs.sh b/ci/test_wheel_cuvs.sh
index 862c69a3a..cb74c5206 100755
--- a/ci/test_wheel_cuvs.sh
+++ b/ci/test_wheel_cuvs.sh
@@ -13,4 +13,4 @@ python -m pip install \
     ./local-libcuvs-dep/libcuvs*.whl \
     "$(echo ./dist/cuvs*.whl)[test]"
 
-python -m pytest ./python/cuvs/cuvs/test
+python -m pytest ./python/cuvs/cuvs/tests
diff --git a/conda/environments/all_cuda-125_arch-aarch64.yaml b/conda/environments/all_cuda-128_arch-aarch64.yaml
similarity index 95%
rename from conda/environments/all_cuda-125_arch-aarch64.yaml
rename to conda/environments/all_cuda-128_arch-aarch64.yaml
index 15a170012..c657e501d 100644
--- a/conda/environments/all_cuda-125_arch-aarch64.yaml
+++ b/conda/environments/all_cuda-128_arch-aarch64.yaml
@@ -17,7 +17,7 @@ dependencies:
 - cuda-nvtx-dev
 - cuda-profiler-api
 - cuda-python>=12.6.2,<13.0a0
-- cuda-version=12.5
+- cuda-version=12.8
 - cupy>=12.0.0
 - cxx-compiler
 - cython>=3.0.0
@@ -53,4 +53,4 @@ dependencies:
 - sysroot_linux-aarch64==2.28
 - pip:
   - nvidia-sphinx-theme
-name: all_cuda-125_arch-aarch64
+name: all_cuda-128_arch-aarch64
diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-128_arch-x86_64.yaml
similarity index 95%
rename from conda/environments/all_cuda-125_arch-x86_64.yaml
rename to conda/environments/all_cuda-128_arch-x86_64.yaml
index c35920bdb..d542e834b 100644
--- a/conda/environments/all_cuda-125_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-128_arch-x86_64.yaml
@@ -17,7 +17,7 @@ dependencies:
 - cuda-nvtx-dev
 - cuda-profiler-api
 - cuda-python>=12.6.2,<13.0a0
-- cuda-version=12.5
+- cuda-version=12.8
 - cupy>=12.0.0
 - cxx-compiler
 - cython>=3.0.0
@@ -53,4 +53,4 @@ dependencies:
 - sysroot_linux-64==2.28
 - pip:
   - nvidia-sphinx-theme
-name: all_cuda-125_arch-x86_64
+name: all_cuda-128_arch-x86_64
diff --git a/conda/environments/bench_ann_cuda-125_arch-aarch64.yaml b/conda/environments/bench_ann_cuda-128_arch-aarch64.yaml
similarity index 94%
rename from conda/environments/bench_ann_cuda-125_arch-aarch64.yaml
rename to conda/environments/bench_ann_cuda-128_arch-aarch64.yaml
index e43e5b656..352963108 100644
--- a/conda/environments/bench_ann_cuda-125_arch-aarch64.yaml
+++ b/conda/environments/bench_ann_cuda-128_arch-aarch64.yaml
@@ -18,7 +18,7 @@ dependencies:
 - cuda-nvtx-dev
 - cuda-profiler-api
 - cuda-python>=12.6.2,<13.0a0
-- cuda-version=12.5
+- cuda-version=12.8
 - cupy>=12.0.0
 - cuvs==25.4.*,>=0.0.0a0
 - cxx-compiler
@@ -46,4 +46,4 @@ dependencies:
 - setuptools
 - sysroot_linux-aarch64==2.28
 - wheel
-name: bench_ann_cuda-125_arch-aarch64
+name: bench_ann_cuda-128_arch-aarch64
diff --git a/conda/environments/bench_ann_cuda-125_arch-x86_64.yaml b/conda/environments/bench_ann_cuda-128_arch-x86_64.yaml
similarity index 94%
rename from conda/environments/bench_ann_cuda-125_arch-x86_64.yaml
rename to conda/environments/bench_ann_cuda-128_arch-x86_64.yaml
index cbdc36444..d55da7683 100644
--- a/conda/environments/bench_ann_cuda-125_arch-x86_64.yaml
+++ b/conda/environments/bench_ann_cuda-128_arch-x86_64.yaml
@@ -18,7 +18,7 @@ dependencies:
 - cuda-nvtx-dev
 - cuda-profiler-api
 - cuda-python>=12.6.2,<13.0a0
-- cuda-version=12.5
+- cuda-version=12.8
 - cupy>=12.0.0
 - cuvs==25.4.*,>=0.0.0a0
 - cxx-compiler
@@ -46,4 +46,4 @@ dependencies:
 - setuptools
 - sysroot_linux-64==2.28
 - wheel
-name: bench_ann_cuda-125_arch-x86_64
+name: bench_ann_cuda-128_arch-x86_64
diff --git a/cpp/.clang-format b/cpp/.clang-format
index 18f376d66..26b9a5bf4 100644
--- a/cpp/.clang-format
+++ b/cpp/.clang-format
@@ -27,7 +27,7 @@ AlwaysBreakAfterDefinitionReturnType: None
 AlwaysBreakAfterReturnType: None
 AlwaysBreakBeforeMultilineStrings: true
 AlwaysBreakTemplateDeclarations: Yes
-BinPackArguments:  false       
+BinPackArguments:  false
 BinPackParameters: false
 BraceWrapping:
   AfterClass:            false
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 11f21db44..3ed3227e0 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -173,6 +173,9 @@ include(cmake/modules/ConfigureCUDA.cmake)
 rapids_cpm_init()
 
 if(NOT BUILD_CPU_ONLY)
+  # We must find CCCL ourselves before raft so that we get the right version.
+  include(${rapids-cmake-dir}/cpm/cccl.cmake)
+  rapids_cpm_cccl(BUILD_EXPORT_SET cuvs-exports INSTALL_EXPORT_SET cuvs-exports)
   include(cmake/thirdparty/get_raft.cmake)
   include(cmake/thirdparty/get_cutlass.cmake)
 endif()
@@ -685,6 +688,7 @@ target_compile_definitions(cuvs::cuvs INTERFACE $<$<BOOL:${CUVS_NVTX}>:NVTX_ENAB
       src/neighbors/cagra_c.cpp
       $<$<BOOL:${BUILD_CAGRA_HNSWLIB}>:src/neighbors/hnsw_c.cpp>
       src/neighbors/refine/refine_c.cpp
+      src/preprocessing/quantize/scalar_c.cpp
       src/distance/pairwise_distance_c.cpp
     )
 
@@ -797,7 +801,7 @@ endif()
 if(BUILD_TESTS)
   enable_testing()
   add_subdirectory(internal)
-  add_subdirectory(test)
+  add_subdirectory(tests)
 endif()
 
 # ##################################################################################################
diff --git a/cpp/bench/ann/CMakeLists.txt b/cpp/bench/ann/CMakeLists.txt
index 200b52ab3..1ec9eeb9f 100644
--- a/cpp/bench/ann/CMakeLists.txt
+++ b/cpp/bench/ann/CMakeLists.txt
@@ -20,6 +20,7 @@ list(APPEND CMAKE_MODULE_PATH "${CUVS_SOURCE_DIR}")
 option(CUVS_ANN_BENCH_USE_FAISS_GPU_FLAT "Include faiss' brute-force knn algorithm in benchmark" ON)
 option(CUVS_ANN_BENCH_USE_FAISS_GPU_IVF_FLAT "Include faiss' ivf flat algorithm in benchmark" ON)
 option(CUVS_ANN_BENCH_USE_FAISS_GPU_IVF_PQ "Include faiss' ivf pq algorithm in benchmark" ON)
+option(CUVS_ANN_BENCH_USE_FAISS_GPU_CAGRA "Include faiss' cagra algorithm in benchmark" ON)
 option(CUVS_ANN_BENCH_USE_FAISS_CPU_FLAT "Include faiss' cpu brute-force algorithm in benchmark" ON)
 option(CUVS_ANN_BENCH_USE_FAISS_CPU_IVF_FLAT "Include faiss' cpu ivf flat algorithm in benchmark"
        ON
@@ -275,18 +276,28 @@ endif()
 if(CUVS_ANN_BENCH_USE_FAISS_GPU_IVF_FLAT AND CUVS_FAISS_ENABLE_GPU)
   ConfigureAnnBench(
     NAME FAISS_GPU_IVF_FLAT PATH src/faiss/faiss_gpu_benchmark.cu LINKS ${CUVS_FAISS_TARGETS}
+    raft::raft
   )
 endif()
 
 if(CUVS_ANN_BENCH_USE_FAISS_GPU_IVF_PQ AND CUVS_FAISS_ENABLE_GPU)
   ConfigureAnnBench(
     NAME FAISS_GPU_IVF_PQ PATH src/faiss/faiss_gpu_benchmark.cu LINKS ${CUVS_FAISS_TARGETS}
+    raft::raft
   )
 endif()
 
 if(CUVS_ANN_BENCH_USE_FAISS_GPU_FLAT AND CUVS_FAISS_ENABLE_GPU)
   ConfigureAnnBench(
     NAME FAISS_GPU_FLAT PATH src/faiss/faiss_gpu_benchmark.cu LINKS ${CUVS_FAISS_TARGETS}
+    raft::raft
+  )
+endif()
+
+if(CUVS_ANN_BENCH_USE_FAISS_GPU_CAGRA AND CUVS_FAISS_ENABLE_GPU)
+  ConfigureAnnBench(
+    NAME FAISS_GPU_CAGRA PATH src/faiss/faiss_gpu_benchmark.cu LINKS ${CUVS_FAISS_TARGETS}
+    raft::raft
   )
 endif()
 
diff --git a/cpp/bench/ann/src/cuvs/cuvs_cagra_hnswlib_wrapper.h b/cpp/bench/ann/src/cuvs/cuvs_cagra_hnswlib_wrapper.h
index e4169f6f8..6670ed892 100644
--- a/cpp/bench/ann/src/cuvs/cuvs_cagra_hnswlib_wrapper.h
+++ b/cpp/bench/ann/src/cuvs/cuvs_cagra_hnswlib_wrapper.h
@@ -17,7 +17,9 @@
 
 #include "cuvs_cagra_wrapper.h"
 #include <cuvs/neighbors/hnsw.hpp>
+#include <raft/core/logger.hpp>
 
+#include <chrono>
 #include <memory>
 
 namespace cuvs::bench {
@@ -90,8 +92,13 @@ void cuvs_cagra_hnswlib<T, IdxT>::build(const T* dataset, size_t nrow)
   auto host_dataset_view = raft::make_host_matrix_view<const T, int64_t>(dataset, nrow, this->dim_);
   auto opt_dataset_view =
     std::optional<raft::host_matrix_view<const T, int64_t>>(std::move(host_dataset_view));
-  hnsw_index_ = cuvs::neighbors::hnsw::from_cagra(
+  const auto start_clock = std::chrono::system_clock::now();
+  hnsw_index_            = cuvs::neighbors::hnsw::from_cagra(
     handle_, build_param_.hnsw_index_params, *cagra_index, opt_dataset_view);
+  int time =
+    std::chrono::duration_cast<std::chrono::seconds>(std::chrono::system_clock::now() - start_clock)
+      .count();
+  RAFT_LOG_DEBUG("Graph saved to HNSW format in %d:%d min", time / 60, time % 60);
 }
 
 template <typename T, typename IdxT>
diff --git a/cpp/bench/ann/src/cuvs/cuvs_mg_ivf_flat_wrapper.h b/cpp/bench/ann/src/cuvs/cuvs_mg_ivf_flat_wrapper.h
index 54a0d2fac..0540edc8f 100644
--- a/cpp/bench/ann/src/cuvs/cuvs_mg_ivf_flat_wrapper.h
+++ b/cpp/bench/ann/src/cuvs/cuvs_mg_ivf_flat_wrapper.h
@@ -137,4 +137,4 @@ void cuvs_mg_ivf_flat<T, IdxT>::search(
     handle_, *index_, search_params_, queries_view, neighbors_view, distances_view);
 }
 
-}  // namespace cuvs::bench
\ No newline at end of file
+}  // namespace cuvs::bench
diff --git a/cpp/bench/ann/src/cuvs/cuvs_mg_ivf_pq_wrapper.h b/cpp/bench/ann/src/cuvs/cuvs_mg_ivf_pq_wrapper.h
index 84aea7d4a..65ca1bb11 100644
--- a/cpp/bench/ann/src/cuvs/cuvs_mg_ivf_pq_wrapper.h
+++ b/cpp/bench/ann/src/cuvs/cuvs_mg_ivf_pq_wrapper.h
@@ -136,4 +136,4 @@ void cuvs_mg_ivf_pq<T, IdxT>::search(
     handle_, *index_, search_params_, queries_view, neighbors_view, distances_view);
 }
 
-}  // namespace cuvs::bench
\ No newline at end of file
+}  // namespace cuvs::bench
diff --git a/cpp/bench/ann/src/faiss/faiss_gpu_benchmark.cu b/cpp/bench/ann/src/faiss/faiss_gpu_benchmark.cu
index 2d9271639..8483e52d8 100644
--- a/cpp/bench/ann/src/faiss/faiss_gpu_benchmark.cu
+++ b/cpp/bench/ann/src/faiss/faiss_gpu_benchmark.cu
@@ -42,6 +42,11 @@ void parse_build_param(const nlohmann::json& conf,
                        typename cuvs::bench::faiss_gpu_ivf_flat<T>::build_param& param)
 {
   parse_base_build_param<T>(conf, param);
+  if (conf.contains("use_cuvs")) {
+    param.use_cuvs = conf.at("use_cuvs");
+  } else {
+    param.use_cuvs = false;
+  }
 }
 
 template <typename T>
@@ -60,6 +65,16 @@ void parse_build_param(const nlohmann::json& conf,
   } else {
     param.use_float16 = false;
   }
+  if (conf.contains("use_cuvs")) {
+    param.use_cuvs = conf.at("use_cuvs");
+  } else {
+    param.use_cuvs = false;
+  }
+  if (conf.contains("bitsPerCode")) {
+    param.bitsPerCode = conf.at("bitsPerCode");
+  } else {
+    param.bitsPerCode = 8;
+  }
 }
 
 template <typename T>
@@ -70,11 +85,52 @@ void parse_build_param(const nlohmann::json& conf,
   param.quantizer_type = conf.at("quantizer_type");
 }
 
+template <typename T>
+void parse_build_param(const nlohmann::json& conf,
+                       typename cuvs::bench::faiss_gpu_cagra<T>::build_param& param)
+{
+  if (conf.contains("graph_degree")) {
+    param.graph_degree = conf.at("graph_degree");
+  } else {
+    param.graph_degree = 64;
+  }
+  if (conf.contains("intermediate_graph_degree")) {
+    param.intermediate_graph_degree = conf.at("intermediate_graph_degree");
+  } else {
+    param.intermediate_graph_degree = 128;
+  }
+  if (conf.contains("cagra_build_algo")) { param.cagra_build_algo = conf.at("cagra_build_algo"); }
+}
+
 template <typename T>
 void parse_search_param(const nlohmann::json& conf,
                         typename cuvs::bench::faiss_gpu<T>::search_param& param)
 {
-  param.nprobe = conf.at("nprobe");
+  if (conf.contains("nprobe")) { param.nprobe = conf.at("nprobe"); }
+  if (conf.contains("refine_ratio")) { param.refine_ratio = conf.at("refine_ratio"); }
+}
+
+template <typename T>
+void parse_search_param(const nlohmann::json& conf,
+                        typename cuvs::bench::faiss_gpu_cagra<T>::search_param& param)
+{
+  if (conf.contains("itopk")) { param.p.itopk_size = conf.at("itopk"); }
+  if (conf.contains("search_width")) { param.p.search_width = conf.at("search_width"); }
+  if (conf.contains("max_iterations")) { param.p.max_iterations = conf.at("max_iterations"); }
+  if (conf.contains("algo")) {
+    if (conf.at("algo") == "single_cta") {
+      param.p.algo = faiss::gpu::search_algo::SINGLE_CTA;
+    } else if (conf.at("algo") == "multi_cta") {
+      param.p.algo = faiss::gpu::search_algo::MULTI_CTA;
+    } else if (conf.at("algo") == "multi_kernel") {
+      param.p.algo = faiss::gpu::search_algo::MULTI_KERNEL;
+    } else if (conf.at("algo") == "auto") {
+      param.p.algo = faiss::gpu::search_algo::AUTO;
+    } else {
+      std::string tmp = conf.at("algo");
+      THROW("Invalid value for algo: %s", tmp.c_str());
+    }
+  }
   if (conf.contains("refine_ratio")) { param.refine_ratio = conf.at("refine_ratio"); }
 }
 
@@ -105,6 +161,8 @@ auto create_algo(const std::string& algo_name,
       a = make_algo<T, cuvs::bench::faiss_gpu_ivfsq>(metric, dim, conf);
     } else if (algo_name == "faiss_gpu_flat") {
       a = std::make_unique<cuvs::bench::faiss_gpu_flat<T>>(metric, dim);
+    } else if (algo_name == "faiss_gpu_cagra") {
+      a = make_algo<T, cuvs::bench::faiss_gpu_cagra>(metric, dim, conf);
     }
   }
 
@@ -125,6 +183,10 @@ auto create_search_param(const std::string& algo_name, const nlohmann::json& con
   } else if (algo_name == "faiss_gpu_flat") {
     auto param = std::make_unique<typename cuvs::bench::faiss_gpu<T>::search_param>();
     return param;
+  } else if (algo_name == "faiss_gpu_cagra") {
+    auto param = std::make_unique<typename cuvs::bench::faiss_gpu_cagra<T>::search_param>();
+    parse_search_param<T>(conf, *param);
+    return param;
   }
   // else
   throw std::runtime_error("invalid algo: '" + algo_name + "'");
diff --git a/cpp/bench/ann/src/faiss/faiss_gpu_wrapper.h b/cpp/bench/ann/src/faiss/faiss_gpu_wrapper.h
index f935e365f..6cf1fe4b0 100644
--- a/cpp/bench/ann/src/faiss/faiss_gpu_wrapper.h
+++ b/cpp/bench/ann/src/faiss/faiss_gpu_wrapper.h
@@ -17,12 +17,16 @@
 
 #include "../common/ann_types.hpp"
 #include "../common/util.hpp"
+#include "../cuvs/cuvs_ann_bench_utils.h"
+#include <cuvs/neighbors/refine.hpp>
 
 #include <faiss/IndexFlat.h>
+#include <faiss/IndexHNSW.h>
 #include <faiss/IndexIVFFlat.h>
 #include <faiss/IndexIVFPQ.h>
 #include <faiss/IndexRefine.h>
 #include <faiss/IndexScalarQuantizer.h>
+#include <faiss/gpu/GpuIndexCagra.h>
 #include <faiss/gpu/GpuIndexFlat.h>
 #include <faiss/gpu/GpuIndexIVFFlat.h>
 #include <faiss/gpu/GpuIndexIVFPQ.h>
@@ -32,6 +36,10 @@
 #include <faiss/index_io.h>
 #include <omp.h>
 
+#include <raft/core/device_mdarray.hpp>
+#include <raft/core/device_resources.hpp>
+#include <raft/core/host_mdarray.hpp>
+
 #include <cassert>
 #include <iostream>
 #include <memory>
@@ -41,7 +49,7 @@
 
 namespace {
 
-auto parse_metric_type(cuvs::bench::Metric metric) -> faiss::MetricType
+auto parse_metric_faiss(cuvs::bench::Metric metric) -> faiss::MetricType
 {
   if (metric == cuvs::bench::Metric::kInnerProduct) {
     return faiss::METRIC_INNER_PRODUCT;
@@ -80,7 +88,7 @@ class faiss_gpu : public algo<T>, public algo_gpu {
  public:
   using search_param_base = typename algo<T>::search_param;
   struct search_param : public search_param_base {
-    int nprobe;
+    int nprobe         = 1;
     float refine_ratio = 1.0;
     [[nodiscard]] auto needs_dataset() const -> bool override { return refine_ratio > 1.0f; }
   };
@@ -93,7 +101,7 @@ class faiss_gpu : public algo<T>, public algo_gpu {
   faiss_gpu(Metric metric, int dim, const build_param& param)
     : algo<T>(metric, dim),
       gpu_resource_{std::make_shared<faiss::gpu::StandardGpuResources>()},
-      metric_type_(parse_metric_type(metric)),
+      metric_type_(parse_metric_faiss(metric)),
       nlist_{param.nlist},
       training_sample_fraction_{1.0 / double(param.ratio)}
   {
@@ -152,6 +160,9 @@ class faiss_gpu : public algo<T>, public algo_gpu {
    * faiss::gpu::StandardGpuResources are thread-safe.
    *
    */
+
+  // simply owning a configured_raft_resource object takes care of setting the pool memory resource
+  configured_raft_resources handle_{};
   mutable std::shared_ptr<faiss::gpu::StandardGpuResources> gpu_resource_;
   std::shared_ptr<faiss::gpu::GpuIndex> index_;
   std::shared_ptr<faiss::IndexRefineFlat> index_refine_{nullptr};
@@ -160,6 +171,7 @@ class faiss_gpu : public algo<T>, public algo_gpu {
   int device_;
   double training_sample_fraction_;
   std::shared_ptr<faiss::SearchParameters> search_params_;
+  std::shared_ptr<faiss::IndexRefineSearchParameters> refine_search_params_{nullptr};
   const T* dataset_;
   float refine_ratio_ = 1.0;
 };
@@ -192,26 +204,75 @@ void faiss_gpu<T>::build(const T* dataset, size_t nrow)
   }
   index_->train(nrow, dataset);  // faiss::gpu::GpuIndexFlat::train() will do nothing
   assert(index_->is_trained);
-  index_->add(nrow, dataset);
+  auto index_cagra = dynamic_cast<faiss::gpu::GpuIndexCagra*>(index_.get());
+  if (index_cagra == nullptr) { index_->add(nrow, dataset); }
 }
 
 template <typename T>
 void faiss_gpu<T>::search(
   const T* queries, int batch_size, int k, algo_base::index_type* neighbors, float* distances) const
 {
+  ASSERT(
+    cuvs::bench::benchmark_n_threads == 1,
+    "Throughput mode disabled. Underlying StandardGpuResources object might not be thread-safe.");
+  using IdxT = faiss::idx_t;
   static_assert(sizeof(size_t) == sizeof(faiss::idx_t),
                 "sizes of size_t and faiss::idx_t are different");
 
-  if (this->refine_ratio_ > 1.0) {
-    // TODO(snanditale): FAISS changed their search APIs to accept the search parameters as a struct
-    // object but their refine API doesn't allow the struct to be passed in. Once this is fixed, we
-    // need to re-enable refinement below
-    // index_refine_->search(batch_size, queries, k, distances,
-    // reinterpret_cast<faiss::idx_t*>(neighbors), this->search_params_.get()); Related FAISS issue:
-    // https://github.com/facebookresearch/faiss/issues/3118
-    throw std::runtime_error(
-      "FAISS doesn't support refinement in their new APIs so this feature is disabled in the "
-      "benchmarks for the time being.");
+  if (refine_ratio_ > 1.0) {
+    if (raft::get_device_for_address(queries) >= 0) {
+      uint32_t k0        = static_cast<uint32_t>(refine_ratio_ * k);
+      auto distances_tmp = raft::make_device_matrix<float, IdxT>(
+        gpu_resource_->getRaftHandle(device_), batch_size, k0);
+      auto candidates =
+        raft::make_device_matrix<IdxT, IdxT>(gpu_resource_->getRaftHandle(device_), batch_size, k0);
+      index_->search(batch_size,
+                     queries,
+                     k0,
+                     distances_tmp.data_handle(),
+                     candidates.data_handle(),
+                     this->search_params_.get());
+      gpu_resource_->getRaftHandle(device_).sync_stream();
+
+      auto queries_host    = raft::make_host_matrix<T, IdxT>(batch_size, index_->d);
+      auto candidates_host = raft::make_host_matrix<IdxT, IdxT>(batch_size, k0);
+      auto neighbors_host  = raft::make_host_matrix<IdxT, IdxT>(batch_size, k);
+      auto distances_host  = raft::make_host_matrix<float, IdxT>(batch_size, k);
+      auto dataset_v       = raft::make_host_matrix_view<const T, faiss::idx_t>(
+        this->dataset_, index_->ntotal, index_->d);
+
+      raft::device_resources handle_ = gpu_resource_->getRaftHandle(device_);
+
+      raft::copy(queries_host.data_handle(), queries, queries_host.size(), handle_.get_stream());
+      raft::copy(candidates_host.data_handle(),
+                 candidates.data_handle(),
+                 candidates_host.size(),
+                 handle_.get_stream());
+
+      // wait for the queries to copy to host in 'stream`
+      handle_.sync_stream();
+
+      cuvs::neighbors::refine(handle_,
+                              dataset_v,
+                              queries_host.view(),
+                              candidates_host.view(),
+                              neighbors_host.view(),
+                              distances_host.view(),
+                              parse_metric_type(this->metric_));
+      handle_.sync_stream();
+
+      raft::copy(
+        neighbors, neighbors_host.data_handle(), neighbors_host.size(), handle_.get_stream());
+      raft::copy(
+        distances, distances_host.data_handle(), distances_host.size(), handle_.get_stream());
+    } else {
+      index_refine_->search(batch_size,
+                            queries,
+                            k,
+                            distances,
+                            reinterpret_cast<faiss::idx_t*>(neighbors),
+                            this->refine_search_params_.get());
+    }
   } else {
     index_->search(batch_size,
                    queries,
@@ -253,15 +314,18 @@ void faiss_gpu<T>::load_(const std::string& file)
 template <typename T>
 class faiss_gpu_ivf_flat : public faiss_gpu<T> {
  public:
-  using typename faiss_gpu<T>::build_param;
+  struct build_param : public faiss_gpu<T>::build_param {
+    bool use_cuvs;
+  };
   using typename faiss_gpu<T>::search_param_base;
 
   faiss_gpu_ivf_flat(Metric metric, int dim, const build_param& param)
     : faiss_gpu<T>(metric, dim, param)
   {
     faiss::gpu::GpuIndexIVFFlatConfig config;
-    config.device = this->device_;
-    this->index_  = std::make_shared<faiss::gpu::GpuIndexIVFFlat>(
+    config.device   = this->device_;
+    config.use_cuvs = param.use_cuvs;
+    this->index_    = std::make_shared<faiss::gpu::GpuIndexIVFFlat>(
       this->gpu_resource_.get(), dim, param.nlist, this->metric_type_, config);
   }
 
@@ -298,6 +362,8 @@ class faiss_gpu_ivfpq : public faiss_gpu<T> {
     int m;
     bool use_float16;
     bool use_precomputed;
+    bool use_cuvs;
+    int bitsPerCode;
   };
   using typename faiss_gpu<T>::search_param_base;
 
@@ -307,16 +373,17 @@ class faiss_gpu_ivfpq : public faiss_gpu<T> {
     faiss::gpu::GpuIndexIVFPQConfig config;
     config.useFloat16LookupTables = param.use_float16;
     config.usePrecomputedTables   = param.use_precomputed;
-    config.device                 = this->device_;
-
-    this->index_ =
-      std::make_shared<faiss::gpu::GpuIndexIVFPQ>(this->gpu_resource_.get(),
-                                                  dim,
-                                                  param.nlist,
-                                                  param.m,
-                                                  8,  // FAISS only supports bitsPerCode=8
-                                                  this->metric_type_,
-                                                  config);
+    config.use_cuvs               = param.use_cuvs;
+    if (param.use_cuvs) { config.interleavedLayout = param.use_cuvs; }
+    config.device = this->device_;
+
+    this->index_ = std::make_shared<faiss::gpu::GpuIndexIVFPQ>(this->gpu_resource_.get(),
+                                                               dim,
+                                                               param.nlist,
+                                                               param.m,
+                                                               param.bitsPerCode,
+                                                               this->metric_type_,
+                                                               config);
   }
 
   void set_search_param(const search_param_base& param) override
@@ -334,6 +401,11 @@ class faiss_gpu_ivfpq : public faiss_gpu<T> {
       this->index_refine_ =
         std::make_shared<faiss::IndexRefineFlat>(this->index_.get(), this->dataset_);
       this->index_refine_.get()->k_factor = sp.refine_ratio;
+      faiss::IndexRefineSearchParameters faiss_refine_search_params;
+      faiss_refine_search_params.k_factor          = this->index_refine_.get()->k_factor;
+      faiss_refine_search_params.base_index_params = this->search_params_.get();
+      this->refine_search_params_ =
+        std::make_unique<faiss::IndexRefineSearchParameters>(faiss_refine_search_params);
     }
   }
 
@@ -441,4 +513,56 @@ class faiss_gpu_flat : public faiss_gpu<T> {
   std::unique_ptr<algo<T>> copy() override { return std::make_unique<faiss_gpu_flat<T>>(*this); };
 };
 
+template <typename T>
+class faiss_gpu_cagra : public faiss_gpu<T> {
+ public:
+  struct build_param : public faiss_gpu<T>::build_param {
+    size_t intermediate_graph_degree;
+    /// Degree of output graph.
+    size_t graph_degree;
+    /// ANN algorithm to build knn graph.
+    std::string cagra_build_algo;
+    /// Number of Iterations to run if building with NN_DESCENT
+    size_t nn_descent_niter;
+  };
+  using typename faiss_gpu<T>::search_param_base;
+  struct search_param : public faiss_gpu<T>::search_param {
+    faiss::gpu::SearchParametersCagra p;
+  };
+
+  faiss_gpu_cagra(Metric metric, int dim, const build_param& param)
+    : faiss_gpu<T>(metric, dim, param)
+  {
+    faiss::gpu::GpuIndexCagraConfig config;
+    config.graph_degree              = param.graph_degree;
+    config.intermediate_graph_degree = param.intermediate_graph_degree;
+    config.device                    = this->device_;
+    if (param.cagra_build_algo == "IVF_PQ") {
+      config.build_algo = faiss::gpu::graph_build_algo::IVF_PQ;
+    } else {
+      config.build_algo = faiss::gpu::graph_build_algo::NN_DESCENT;
+    }
+    config.nn_descent_niter = param.nn_descent_niter;
+
+    this->index_ = std::make_shared<faiss::gpu::GpuIndexCagra>(
+      this->gpu_resource_.get(), dim, parse_metric_faiss(this->metric_), config);
+  }
+
+  void set_search_param(const search_param_base& param) override
+  {
+    auto sp              = static_cast<const typename faiss_gpu_cagra<T>::search_param&>(param);
+    this->search_params_ = std::make_shared<faiss::gpu::SearchParametersCagra>(sp.p);
+  }
+
+  void save(const std::string& file) const override
+  {
+    this->template save_<faiss::gpu::GpuIndexCagra, faiss::IndexHNSWCagra>(file);
+  }
+  void load(const std::string& file) override
+  {
+    this->template load_<faiss::gpu::GpuIndexCagra, faiss::IndexHNSWCagra>(file);
+  }
+  std::unique_ptr<algo<T>> copy() override { return std::make_unique<faiss_gpu_cagra<T>>(*this); };
+};
+
 }  // namespace cuvs::bench
diff --git a/cpp/cmake/config.json b/cpp/cmake/config.json
index f7cc50e51..3c568d976 100644
--- a/cpp/cmake/config.json
+++ b/cpp/cmake/config.json
@@ -9,7 +9,7 @@
           "VERSION": "?",
           "GIT_SHALLOW": "?",
           "OPTIONS": "*",
-          "FIND_PACKAGE_ARGUMENTS": "*" 
+          "FIND_PACKAGE_ARGUMENTS": "*"
         }
       },
       "ConfigureTest": {
diff --git a/cpp/cmake/patches/cutlass/build-export.patch b/cpp/cmake/patches/cutlass/build-export.patch
index a6423e9c0..31bbd2510 100644
--- a/cpp/cmake/patches/cutlass/build-export.patch
+++ b/cpp/cmake/patches/cutlass/build-export.patch
@@ -20,8 +20,7 @@ index 7419bdf5e..545384d82 100755
 -  $<BUILD_INTERFACE:${cute_SOURCE_DIR}/include>
 -  $<BUILD_INTERFACE:${cute_SOURCE_DIR}/examples>
    )
- 
+
  # Mark CTK headers as system to supress warnings from them
--- 
+--
 2.34.1
-
diff --git a/cpp/cmake/patches/faiss_override.json b/cpp/cmake/patches/faiss_override.json
index c39abdc2b..18542fcf5 100644
--- a/cpp/cmake/patches/faiss_override.json
+++ b/cpp/cmake/patches/faiss_override.json
@@ -6,4 +6,4 @@
         "git_tag": "main"
       }
     }
-  }
\ No newline at end of file
+  }
diff --git a/cpp/cmake/patches/ggnn.diff b/cpp/cmake/patches/ggnn.diff
index fc4529880..c2413110b 100644
--- a/cpp/cmake/patches/ggnn.diff
+++ b/cpp/cmake/patches/ggnn.diff
@@ -3,11 +3,11 @@
 @@ -62,7 +62,7 @@ struct SimpleKNNSymCache {
                                                  const ValueT dist_half)
          : dist_query(dist_query), dist_half(dist_half) {}
- 
+
 -    __device__ __forceinline__ DistQueryAndHalf() {}
 +    DistQueryAndHalf() = default;
    };
- 
+
    struct DistanceAndNorm {
 @@ -98,8 +98,7 @@ struct SimpleKNNSymCache {
      KeyT cache;
@@ -17,7 +17,7 @@
 -    __device__ __forceinline__ SyncTempStorage() {}
 +    SyncTempStorage() = default;
    };
- 
+
   public:
 diff --git a/include/ggnn/cuda_knn_ggnn_gpu_instance.cuh b/include/ggnn/cuda_knn_ggnn_gpu_instance.cuh
 index 8cbaf0d..6eb72ac 100644
@@ -28,23 +28,23 @@ index 8cbaf0d..6eb72ac 100644
  #include "ggnn/utils/cuda_knn_utils.cuh"
  #include "ggnn/utils/cuda_knn_constants.cuh"
 -#include "ggnn/utils/cuda_knn_dataset.cuh"
- 
+
  template <typename ValueT>
  __global__ void divide(ValueT* res, ValueT* input, ValueT N) {
 @@ -98,9 +97,7 @@ struct GGNNGPUInstance {
    typedef GGNNGraphDevice<KeyT, BaseT, ValueT> GGNNGraphDevice;
    typedef GGNNGraphHost<KeyT, BaseT, ValueT> GGNNGraphHost;
- 
+
 -  const Dataset<KeyT, BaseT, BAddrT>* dataset;
    GGNNGraphBuffer<KeyT, ValueT>* ggnn_buffer {nullptr};
 -  GGNNQuery<KeyT, ValueT, BaseT> ggnn_query;
- 
+
    // Graph Shards resident on the GPU
    std::vector<GGNNGraphDevice> ggnn_shards;
 @@ -117,13 +114,12 @@ struct GGNNGPUInstance {
    // number of shards that need to be processed by this instance
    const int num_parts;
- 
+
 -  GGNNGPUInstance(const int gpu_id, const Dataset<KeyT, BaseT, BAddrT>* dataset,
 +  GGNNGPUInstance(const int gpu_id,
              const int N_shard, const int L,
@@ -60,14 +60,14 @@ index 8cbaf0d..6eb72ac 100644
 @@ -135,7 +131,6 @@ struct GGNNGPUInstance {
        CHECK_EQ(current_gpu_id, gpu_id) << "cudaSetDevice() needs to be called in advance!";
      }
- 
+
 -    ggnn_query.loadQueriesAsync(dataset->h_query, 0);
- 
+
      computeGraphParameters();
- 
+
 @@ -186,7 +181,7 @@ struct GGNNGPUInstance {
    }
- 
+
    GGNNGPUInstance(const GGNNGPUInstance& other)
 -   : dataset{nullptr}, ggnn_query{0, D, KQuery},
 +   :
@@ -75,9 +75,9 @@ index 8cbaf0d..6eb72ac 100644
      // this exists to allow using vector::emplace_back
      // when it triggers a reallocation, this code will be called.
 @@ -305,6 +300,7 @@ struct GGNNGPUInstance {
- 
+
    // io
- 
+
 +  /*
    void waitForDiskIO(const int shard_id) {
      auto& cpu_buffer = ggnn_cpu_buffers[shard_id%ggnn_cpu_buffers.size()];
@@ -87,41 +87,41 @@ index 8cbaf0d..6eb72ac 100644
      CHECK_CUDA(cudaPeekAtLastError());
    }
 +  */
- 
+
    // graph operations
- 
+
    template <int BLOCK_DIM_X = 32, int MAX_ITERATIONS = 400, int CACHE_SIZE = 512, int SORTED_SIZE = 256, bool DIST_STATS = false>
 -  void queryLayer(const int shard_id = 0) const {
 +  void queryLayer(const BaseT* d_query, int batch_size, KeyT* d_query_result_ids, ValueT* d_query_result_dists, const int shard_id = 0) const {
      CHECK_CUDA(cudaSetDevice(gpu_id));
      const auto& shard = ggnn_shards.at(shard_id%ggnn_shards.size());
- 
+
 @@ -482,21 +479,21 @@ struct GGNNGPUInstance {
- 
+
      int* m_dist_statistics = nullptr;
      if (DIST_STATS)
 -      cudaMallocManaged(&m_dist_statistics, dataset->N_query * sizeof(int));
 +      cudaMallocManaged(&m_dist_statistics, batch_size * sizeof(int));
- 
+
      QueryKernel query_kernel;
      query_kernel.d_base = shard.d_base;
 -    query_kernel.d_query = ggnn_query.d_query;
 +    query_kernel.d_query = d_query;
- 
+
      query_kernel.d_graph = shard.d_graph;
 -    query_kernel.d_query_results = ggnn_query.d_query_result_ids;
 -    query_kernel.d_query_results_dists = ggnn_query.d_query_result_dists;
 +    query_kernel.d_query_results = d_query_result_ids;
 +    query_kernel.d_query_results_dists = d_query_result_dists;
- 
+
      query_kernel.d_translation = shard.d_translation;
- 
+
      query_kernel.d_nn1_stats = shard.d_nn1_stats;
- 
+
 -    query_kernel.N = dataset->N_query;
 +    query_kernel.N = batch_size;
      query_kernel.N_offset = 0;
- 
+
      query_kernel.d_dist_stats = m_dist_statistics;
 @@ -771,6 +768,16 @@ struct GGNNGPUInstance {
        sym(layer, shard_id);
@@ -138,7 +138,7 @@ index 8cbaf0d..6eb72ac 100644
 +    ggnn_shards.at(0).d_base = dataset;
 +  }
  };
- 
+
  #endif  // INCLUDE_GGNN_CUDA_KNN_GGNN_GPU_INSTANCE_CUH_
 diff --git a/include/ggnn/graph/cuda_knn_ggnn_graph_device.cuh b/include/ggnn/graph/cuda_knn_ggnn_graph_device.cuh
 index c94a8f1..781226d 100644
@@ -146,11 +146,11 @@ index c94a8f1..781226d 100644
 +++ b/include/ggnn/graph/cuda_knn_ggnn_graph_device.cuh
 @@ -50,7 +50,7 @@ struct GGNNGraphDevice {
    ValueT* d_nn1_stats;
- 
+
    /// base data pointer for the shard.
 -  BaseT* d_base;
 +  const BaseT* d_base;
- 
+
    /// combined memory pool
    char* d_memory;
 @@ -69,7 +69,9 @@ struct GGNNGraphDevice {
@@ -161,12 +161,12 @@ index c94a8f1..781226d 100644
 +    // base_size = align8(static_cast<size_t>(N) * D * sizeof(BaseT));
 +    (void) N;
 +    (void) D;
- 
+
      const size_t total_size = base_size+total_graph_size;
- 
+
 @@ -86,8 +88,7 @@ struct GGNNGraphDevice {
      CHECK_CUDA(cudaMalloc(&d_memory, total_size));
- 
+
      size_t pos = 0;
 -    d_base = reinterpret_cast<BaseT*>(d_memory+pos);
 -    pos += base_size;
@@ -175,17 +175,17 @@ index c94a8f1..781226d 100644
      pos += graph_size;
      d_translation = reinterpret_cast<KeyT*>(d_memory+pos);
 @@ -99,14 +100,14 @@ struct GGNNGraphDevice {
- 
+
      CHECK_EQ(pos, total_size);
- 
+
 -    CHECK_CUDA(cudaStreamCreate(&stream));
 +    // CHECK_CUDA(cudaStreamCreate(&stream));
- 
+
      CHECK_CUDA(cudaPeekAtLastError());
      CHECK_CUDA(cudaDeviceSynchronize());
      CHECK_CUDA(cudaPeekAtLastError());
    }
- 
+
 -  GGNNGraphDevice(const GGNNGraphDevice& other) {
 +  GGNNGraphDevice(const GGNNGraphDevice&) {
      // this exists to allow using vector::emplace_back
@@ -194,12 +194,12 @@ index c94a8f1..781226d 100644
 @@ -116,7 +117,7 @@ struct GGNNGraphDevice {
    ~GGNNGraphDevice() {
      cudaFree(d_memory);
- 
+
 -    CHECK_CUDA(cudaStreamDestroy(stream));
 +    // CHECK_CUDA(cudaStreamDestroy(stream));
    }
  };
- 
+
 diff --git a/include/ggnn/graph/cuda_knn_ggnn_graph_host.cuh b/include/ggnn/graph/cuda_knn_ggnn_graph_host.cuh
 index 2055f9e..ef5843a 100644
 --- a/include/ggnn/graph/cuda_knn_ggnn_graph_host.cuh
@@ -207,7 +207,7 @@ index 2055f9e..ef5843a 100644
 @@ -92,7 +92,7 @@ struct GGNNGraphHost {
      CHECK_CUDA(cudaPeekAtLastError());
    }
- 
+
 -  GGNNGraphHost(const GGNNGraphHost& other) {
 +  GGNNGraphHost(const GGNNGraphHost&) {
      // this exists to allow using vector::emplace_back
@@ -220,11 +220,10 @@ index 49d76a1..eef69e6 100644
 @@ -22,7 +22,6 @@ limitations under the License.
  #include <cuda.h>
  #include <cuda_runtime.h>
- 
+
 -#include <gflags/gflags.h>
  #include <cub/cub.cuh>
- 
+
  #include "ggnn/utils/cuda_knn_constants.cuh"
--- 
+--
 2.43.0
-
diff --git a/cpp/cmake/patches/ggnn_override.json b/cpp/cmake/patches/ggnn_override.json
index c9a1b6978..f1bd8c8a4 100644
--- a/cpp/cmake/patches/ggnn_override.json
+++ b/cpp/cmake/patches/ggnn_override.json
@@ -13,4 +13,4 @@
         ]
       }
     }
-  }
\ No newline at end of file
+  }
diff --git a/cpp/cmake/patches/hnswlib.diff b/cpp/cmake/patches/hnswlib.diff
index f20c27d91..96be2b238 100644
--- a/cpp/cmake/patches/hnswlib.diff
+++ b/cpp/cmake/patches/hnswlib.diff
@@ -11,20 +11,20 @@ index bef0017..0ee7931 100644
 +    bool base_layer_init = true;
      static const tableint MAX_LABEL_OPERATION_LOCKS = 65536;
      static const unsigned char DELETE_MARK = 0x01;
- 
+
 @@ -1098,7 +1101,7 @@ class HierarchicalNSW : public AlgorithmInterface<dist_t> {
- 
+
          std::unique_lock <std::mutex> lock_el(link_list_locks_[cur_c]);
          int curlevel = getRandomLevel(mult_);
 -        if (level > 0)
 +        if (level > -1)
              curlevel = level;
- 
+
          element_levels_[cur_c] = curlevel;
 @@ -1116,6 +1119,9 @@ class HierarchicalNSW : public AlgorithmInterface<dist_t> {
          memcpy(getExternalLabeLp(cur_c), &label, sizeof(labeltype));
          memcpy(getDataByInternalId(cur_c), data_point, data_size_);
- 
+
 +        if (!base_layer_init && curlevel == 0)
 +            return cur_c;
 +
@@ -43,7 +43,7 @@ index bef0017..0ee7931 100644
 @@ -1188,28 +1194,41 @@ class HierarchicalNSW : public AlgorithmInterface<dist_t> {
          tableint currObj = enterpoint_node_;
          dist_t curdist = fstdistfunc_(query_data, getDataByInternalId(enterpoint_node_), dist_func_param_);
- 
+
 -        for (int level = maxlevel_; level > 0; level--) {
 -            bool changed = true;
 -            while (changed) {
@@ -66,7 +66,7 @@ index bef0017..0ee7931 100644
 +                while (changed) {
 +                    changed = false;
 +                    unsigned int *data;
- 
+
 -                data = (unsigned int *) get_linklist(currObj, level);
 -                int size = getListCount(data);
 -                metric_hops++;
@@ -82,7 +82,7 @@ index bef0017..0ee7931 100644
 +                        if (static_cast<int>(cand) < 0 || cand > max_elements_)
 +                            throw std::runtime_error("cand error");
 +                        dist_t d = fstdistfunc_(query_data, getDataByInternalId(cand), dist_func_param_);
- 
+
 -                tableint *datal = (tableint *) (data + 1);
 -                for (int i = 0; i < size; i++) {
 -                    tableint cand = datal[i];
@@ -109,7 +109,7 @@ index 834d19f..0c0af26 100644
 @@ -252,12 +252,13 @@ class L2Space : public SpaceInterface<float> {
      ~L2Space() {}
  };
- 
+
 +template <typename T>
  static int
  L2SqrI4x(const void *__restrict pVect1, const void *__restrict pVect2, const void *__restrict qty_ptr) {
@@ -119,13 +119,13 @@ index 834d19f..0c0af26 100644
 -    unsigned char *b = (unsigned char *) pVect2;
 +    T *a = (T *) pVect1;
 +    T *b = (T *) pVect2;
- 
+
      qty = qty >> 2;
      for (size_t i = 0; i < qty; i++) {
 @@ -277,11 +278,12 @@ L2SqrI4x(const void *__restrict pVect1, const void *__restrict pVect2, const voi
      return (res);
  }
- 
+
 +template <typename T>
  static int L2SqrI(const void* __restrict pVect1, const void* __restrict pVect2, const void* __restrict qty_ptr) {
      size_t qty = *((size_t*)qty_ptr);
@@ -134,13 +134,13 @@ index 834d19f..0c0af26 100644
 -    unsigned char* b = (unsigned char*)pVect2;
 +    T* a = (T*)pVect1;
 +    T* b = (T*)pVect2;
- 
+
      for (size_t i = 0; i < qty; i++) {
          res += ((*a) - (*b)) * ((*a) - (*b));
 @@ -291,6 +293,7 @@ static int L2SqrI(const void* __restrict pVect1, const void* __restrict pVect2,
      return (res);
  }
- 
+
 +template <typename T>
  class L2SpaceI : public SpaceInterface<int> {
      DISTFUNC<int> fstdistfunc_;
diff --git a/cpp/cmake/patches/hnswlib_override.json b/cpp/cmake/patches/hnswlib_override.json
index c50220e24..812af74bc 100644
--- a/cpp/cmake/patches/hnswlib_override.json
+++ b/cpp/cmake/patches/hnswlib_override.json
@@ -13,4 +13,4 @@
       ]
     }
   }
-}
\ No newline at end of file
+}
diff --git a/cpp/cmake/thirdparty/get_faiss.cmake b/cpp/cmake/thirdparty/get_faiss.cmake
index d6261d248..bc5b0ad1b 100644
--- a/cpp/cmake/thirdparty/get_faiss.cmake
+++ b/cpp/cmake/thirdparty/get_faiss.cmake
@@ -23,7 +23,7 @@ function(find_and_configure_faiss)
     HEADER_NAMES  faiss/IndexFlat.h
     LIBRARY_NAMES faiss
     )
-  
+
   set(patch_dir "${CMAKE_CURRENT_FUNCTION_LIST_DIR}/../patches")
   rapids_cpm_package_override("${patch_dir}/faiss_override.json")
 
@@ -55,6 +55,7 @@ function(find_and_configure_faiss)
     EXCLUDE_FROM_ALL ${exclude}
     OPTIONS
     "FAISS_ENABLE_GPU ${PKG_ENABLE_GPU}"
+    "FAISS_ENABLE_CUVS ${PKG_ENABLE_GPU}"
     "FAISS_ENABLE_PYTHON OFF"
     "FAISS_OPT_LEVEL ${CUVS_FAISS_OPT_LEVEL}"
     "FAISS_USE_CUDA_TOOLKIT_STATIC ${CUDA_STATIC_RUNTIME}"
@@ -62,7 +63,7 @@ function(find_and_configure_faiss)
     "CMAKE_MESSAGE_LOG_LEVEL VERBOSE"
     )
 
-  
+
   include("${rapids-cmake-dir}/cpm/detail/display_patch_status.cmake")
   rapids_cpm_display_patch_status(faiss)
 
diff --git a/cpp/cmake/thirdparty/get_ggnn.cmake b/cpp/cmake/thirdparty/get_ggnn.cmake
index 2ccfbc64d..8ee5bfd1d 100644
--- a/cpp/cmake/thirdparty/get_ggnn.cmake
+++ b/cpp/cmake/thirdparty/get_ggnn.cmake
@@ -38,7 +38,7 @@ function(find_and_configure_ggnn)
 
   include("${rapids-cmake-dir}/cpm/detail/display_patch_status.cmake")
   rapids_cpm_display_patch_status(ggnn)
-  
+
   if(NOT TARGET ggnn::ggnn)
     add_library(ggnn INTERFACE)
     target_include_directories(ggnn INTERFACE "$<BUILD_INTERFACE:${ggnn_SOURCE_DIR}/include>")
diff --git a/cpp/cmake/thirdparty/get_raft.cmake b/cpp/cmake/thirdparty/get_raft.cmake
index 845c7a833..3c352b693 100644
--- a/cpp/cmake/thirdparty/get_raft.cmake
+++ b/cpp/cmake/thirdparty/get_raft.cmake
@@ -44,7 +44,7 @@ function(find_and_configure_raft)
             INSTALL_EXPORT_SET  cuvs-exports
             COMPONENTS          ${RAFT_COMPONENTS}
             CPM_ARGS
-              EXCLUDE_FROM_ALL TRUE  
+              EXCLUDE_FROM_ALL TRUE
               GIT_REPOSITORY        https://github.com/${PKG_FORK}/raft.git
               GIT_TAG               ${PKG_PINNED_TAG}
               SOURCE_SUBDIR         cpp
diff --git a/cpp/doxygen/Doxyfile b/cpp/doxygen/Doxyfile
index e28572457..ce6741c1f 100644
--- a/cpp/doxygen/Doxyfile
+++ b/cpp/doxygen/Doxyfile
@@ -880,7 +880,7 @@ RECURSIVE              = YES
 # Note that relative paths are relative to the directory from which doxygen is
 # run.
 
-EXCLUDE                = ../include/cuvs/sparse/selection 
+EXCLUDE                = ../include/cuvs/sparse/selection
 
 # The EXCLUDE_SYMLINKS tag can be used to select whether or not files or
 # directories that are symbolic links (a Unix file system feature) are excluded
diff --git a/cpp/doxygen/main_page.md b/cpp/doxygen/main_page.md
index d002df001..81757f5db 100644
--- a/cpp/doxygen/main_page.md
+++ b/cpp/doxygen/main_page.md
@@ -1,3 +1,3 @@
 # libcuvs
 
-cuVS is a library for vector search on the GPU
\ No newline at end of file
+cuVS is a library for vector search on the GPU
diff --git a/cpp/include/cuvs/neighbors/brute_force.hpp b/cpp/include/cuvs/neighbors/brute_force.hpp
index 99581469f..72a5cac12 100644
--- a/cpp/include/cuvs/neighbors/brute_force.hpp
+++ b/cpp/include/cuvs/neighbors/brute_force.hpp
@@ -332,15 +332,28 @@ auto build(raft::resources const& handle,
  * Note, this function requires a temporary buffer to store intermediate results between cuda kernel
  * calls, which may lead to undesirable allocations and slowdown. To alleviate the problem, you can
  * pass a pool memory resource or a large enough pre-allocated memory resource to reduce or
- * eliminate entirely allocations happening within `search`:
+ * eliminate entirely allocations happening within `search`.
+ *
+ * Usage example:
  * @code{.cpp}
- *   ...
- *   // Use the same allocator across multiple searches to reduce the number of
- *   // cuda memory allocations
- *   brute_force::search(handle, index, queries1, out_inds1, out_dists1);
- *   brute_force::search(handle, index, queries2, out_inds2, out_dists2);
- *   brute_force::search(handle, index, queries3, out_inds3, out_dists3);
- *   ...
+ *   using namespace cuvs::neighbors;
+ *
+ *   // use default index parameters
+ *   brute_force::index_params index_params;
+ *   // create and fill the index from a [N, D] dataset
+ *   brute_force::index_params index_params;
+ *   auto index = brute_force::build(handle, index_params, dataset);
+ *   // use default search parameters
+ *   brute_force::search_params search_params;
+ *   // create a bitset to filter the search
+ *   auto removed_indices = raft::make_device_vector<int64_t, int64_t>(res, n_removed_indices);
+ *   raft::core::bitset<std::uint32_t, int64_t> removed_indices_bitset(
+ *     res, removed_indices.view(), dataset.extent(0));
+ *   // search K nearest neighbours according to a bitset
+ *   auto neighbors = raft::make_device_matrix<uint32_t>(res, n_queries, k);
+ *   auto distances = raft::make_device_matrix<float>(res, n_queries, k);
+ *   auto filter    = filtering::bitset_filter(removed_indices_bitset.view());
+ *   brute_force::search(res, search_params, index, queries, neighbors, distances, filter);
  * @endcode
  *
  * @param[in] handle
@@ -350,9 +363,17 @@ auto build(raft::resources const& handle,
  * @param[out] neighbors a device pointer to the indices of the neighbors in the source dataset
  * [n_queries, k]
  * @param[out] distances a device pointer to the distances to the selected neighbors [n_queries, k]
- * @param[in] sample_filter An optional device bitmap filter function with a `row-major` layout and
- * the shape of [n_queries, index->size()], which means the filter will use the first
- * `index->size()` bits to indicate whether queries[0] should compute the distance with dataset.
+ * @param[in] sample_filter An optional device filter that restricts which dataset elements should
+ * be considered for each query.
+ *
+ * - Supports two types of filters:
+ *   1. **Bitset Filter**: A shared filter where each bit corresponds to a dataset element.
+ *      All queries share the same filter, with a logical shape of `[1, index->size()]`.
+ *   2. **Bitmap Filter**: A per-query filter with a logical shape of `[n_queries, index->size()]`,
+ *      where each bit indicates whether a specific dataset element should be considered for a
+ *      particular query. (1 for inclusion, 0 for exclusion).
+ *
+ * - The default value is `none_sample_filter`, which applies no filtering.
  */
 void search(raft::resources const& handle,
             const cuvs::neighbors::brute_force::search_params& params,
@@ -379,15 +400,28 @@ void search(raft::resources const& handle,
  * Note, this function requires a temporary buffer to store intermediate results between cuda kernel
  * calls, which may lead to undesirable allocations and slowdown. To alleviate the problem, you can
  * pass a pool memory resource or a large enough pre-allocated memory resource to reduce or
- * eliminate entirely allocations happening within `search`:
+ * eliminate entirely allocations happening within `search`.
+ *
+ * Usage example:
  * @code{.cpp}
- *   ...
- *   // Use the same allocator across multiple searches to reduce the number of
- *   // cuda memory allocations
- *   brute_force::search(handle, index, queries1, out_inds1, out_dists1);
- *   brute_force::search(handle, index, queries2, out_inds2, out_dists2);
- *   brute_force::search(handle, index, queries3, out_inds3, out_dists3);
- *   ...
+ *   using namespace cuvs::neighbors;
+ *
+ *   // use default index parameters
+ *   brute_force::index_params index_params;
+ *   // create and fill the index from a [N, D] dataset
+ *   brute_force::index_params index_params;
+ *   auto index = brute_force::build(handle, index_params, dataset);
+ *   // use default search parameters
+ *   brute_force::search_params search_params;
+ *   // create a bitset to filter the search
+ *   auto removed_indices = raft::make_device_vector<int64_t, int64_t>(res, n_removed_indices);
+ *   raft::core::bitset<std::uint32_t, int64_t> removed_indices_bitset(
+ *     res, removed_indices.view(), dataset.extent(0));
+ *   // search K nearest neighbours according to a bitset
+ *   auto neighbors = raft::make_device_matrix<uint32_t>(res, n_queries, k);
+ *   auto distances = raft::make_device_matrix<half>(res, n_queries, k);
+ *   auto filter    = filtering::bitset_filter(removed_indices_bitset.view());
+ *   brute_force::search(res, search_params, index, queries, neighbors, distances, filter);
  * @endcode
  *
  * @param[in] handle
@@ -397,8 +431,17 @@ void search(raft::resources const& handle,
  * @param[out] neighbors a device pointer to the indices of the neighbors in the source dataset
  * [n_queries, k]
  * @param[out] distances a device pointer to the distances to the selected neighbors [n_queries, k]
- * @param[in] sample_filter a optional device bitmap filter function that greenlights samples for a
- * given
+ * @param[in] sample_filter An optional device filter that restricts which dataset elements should
+ * be considered for each query.
+ *
+ * - Supports two types of filters:
+ *   1. **Bitset Filter**: A shared filter where each bit corresponds to a dataset element.
+ *      All queries share the same filter, with a logical shape of `[1, index->size()]`.
+ *   2. **Bitmap Filter**: A per-query filter with a logical shape of `[n_queries, index->size()]`,
+ *      where each bit indicates whether a specific dataset element should be considered for a
+ *      particular query. (1 for inclusion, 0 for exclusion).
+ *
+ * - The default value is `none_sample_filter`, which applies no filtering.
  */
 void search(raft::resources const& handle,
             const cuvs::neighbors::brute_force::search_params& params,
@@ -421,6 +464,33 @@ void search(raft::resources const& handle,
  *
  * See the [brute_force::build](#brute_force::build) documentation for a usage example.
  *
+ * Note, this function requires a temporary buffer to store intermediate results between cuda kernel
+ * calls, which may lead to undesirable allocations and slowdown. To alleviate the problem, you can
+ * pass a pool memory resource or a large enough pre-allocated memory resource to reduce or
+ * eliminate entirely allocations happening within `search`.
+ *
+ * Usage example:
+ * @code{.cpp}
+ *   using namespace cuvs::neighbors;
+ *
+ *   // use default index parameters
+ *   brute_force::index_params index_params;
+ *   // create and fill the index from a [N, D] dataset
+ *   brute_force::index_params index_params;
+ *   auto index = brute_force::build(handle, index_params, dataset);
+ *   // use default search parameters
+ *   brute_force::search_params search_params;
+ *   // create a bitset to filter the search
+ *   auto removed_indices = raft::make_device_vector<int64_t, int64_t>(res, n_removed_indices);
+ *   raft::core::bitset<std::uint32_t, int64_t> removed_indices_bitset(
+ *     res, removed_indices.view(), dataset.extent(0));
+ *   // search K nearest neighbours according to a bitset
+ *   auto neighbors = raft::make_device_matrix<uint32_t>(res, n_queries, k);
+ *   auto distances = raft::make_device_matrix<float>(res, n_queries, k);
+ *   auto filter    = filtering::bitset_filter(removed_indices_bitset.view());
+ *   brute_force::search(res, search_params, index, queries, neighbors, distances, filter);
+ * @endcode
+ *
  * @param[in] handle
  * @param[in] params parameters configuring the search
  * @param[in] index bruteforce constructed index
@@ -428,8 +498,17 @@ void search(raft::resources const& handle,
  * @param[out] neighbors a device pointer to the indices of the neighbors in the source dataset
  * [n_queries, k]
  * @param[out] distances a device pointer to the distances to the selected neighbors [n_queries, k]
- * @param[in] sample_filter an optional device bitmap filter function that greenlights samples for a
- * given query
+ * @param[in] sample_filter An optional device filter that restricts which dataset elements should
+ * be considered for each query.
+ *
+ * - Supports two types of filters:
+ *   1. **Bitset Filter**: A shared filter where each bit corresponds to a dataset element.
+ *      All queries share the same filter, with a logical shape of `[1, index->size()]`.
+ *   2. **Bitmap Filter**: A per-query filter with a logical shape of `[n_queries, index->size()]`,
+ *      where each bit indicates whether a specific dataset element should be considered for a
+ *      particular query. (1 for inclusion, 0 for exclusion).
+ *
+ * - The default value is `none_sample_filter`, which applies no filtering.
  */
 void search(raft::resources const& handle,
             const cuvs::neighbors::brute_force::search_params& params,
@@ -452,6 +531,33 @@ void search(raft::resources const& handle,
  *
  * See the [brute_force::build](#brute_force::build) documentation for a usage example.
  *
+ * Note, this function requires a temporary buffer to store intermediate results between cuda kernel
+ * calls, which may lead to undesirable allocations and slowdown. To alleviate the problem, you can
+ * pass a pool memory resource or a large enough pre-allocated memory resource to reduce or
+ * eliminate entirely allocations happening within `search`.
+ *
+ * Usage example:
+ * @code{.cpp}
+ *   using namespace cuvs::neighbors;
+ *
+ *   // use default index parameters
+ *   brute_force::index_params index_params;
+ *   // create and fill the index from a [N, D] dataset
+ *   brute_force::index_params index_params;
+ *   auto index = brute_force::build(handle, index_params, dataset);
+ *   // use default search parameters
+ *   brute_force::search_params search_params;
+ *   // create a bitset to filter the search
+ *   auto removed_indices = raft::make_device_vector<int64_t, int64_t>(res, n_removed_indices);
+ *   raft::core::bitset<std::uint32_t, int64_t> removed_indices_bitset(
+ *     res, removed_indices.view(), dataset.extent(0));
+ *   // search K nearest neighbours according to a bitset
+ *   auto neighbors = raft::make_device_matrix<uint32_t>(res, n_queries, k);
+ *   auto distances = raft::make_device_matrix<half>(res, n_queries, k);
+ *   auto filter    = filtering::bitset_filter(removed_indices_bitset.view());
+ *   brute_force::search(res, search_params, index, queries, neighbors, distances, filter);
+ * @endcode
+ *
  * @param[in] handle
  * @param[in] params parameters configuring the search
  * @param[in] index bruteforce constructed index
@@ -459,8 +565,17 @@ void search(raft::resources const& handle,
  * @param[out] neighbors a device pointer to the indices of the neighbors in the source dataset
  * [n_queries, k]
  * @param[out] distances a device pointer to the distances to the selected neighbors [n_queries, k]
- * @param[in] sample_filter an optional device bitmap filter function that greenlights samples for a
- * given query
+ * @param[in] sample_filter An optional device filter that restricts which dataset elements should
+ * be considered for each query.
+ *
+ * - Supports two types of filters:
+ *   1. **Bitset Filter**: A shared filter where each bit corresponds to a dataset element.
+ *      All queries share the same filter, with a logical shape of `[1, index->size()]`.
+ *   2. **Bitmap Filter**: A per-query filter with a logical shape of `[n_queries, index->size()]`,
+ *      where each bit indicates whether a specific dataset element should be considered for a
+ *      particular query. (1 for inclusion, 0 for exclusion).
+ *
+ * - The default value is `none_sample_filter`, which applies no filtering.
  */
 void search(raft::resources const& handle,
             const cuvs::neighbors::brute_force::search_params& params,
diff --git a/cpp/include/cuvs/neighbors/cagra.h b/cpp/include/cuvs/neighbors/cagra.h
index f7f58a19c..207f3f21b 100644
--- a/cpp/include/cuvs/neighbors/cagra.h
+++ b/cpp/include/cuvs/neighbors/cagra.h
@@ -18,6 +18,7 @@
 
 #include <cuvs/core/c_api.h>
 #include <cuvs/distance/distance.h>
+#include <cuvs/neighbors/common.h>
 #include <dlpack/dlpack.h>
 #include <stdbool.h>
 #include <stdint.h>
@@ -140,6 +141,45 @@ cuvsError_t cuvsCagraCompressionParamsCreate(cuvsCagraCompressionParams_t* param
  */
 cuvsError_t cuvsCagraCompressionParamsDestroy(cuvsCagraCompressionParams_t params);
 
+/**
+ * @}
+ */
+
+/**
+ * @defgroup cagra_c_extend_params C API for CUDA ANN Graph-based nearest neighbor search
+ * @{
+ */
+/**
+ * @brief Supplemental parameters to extend CAGRA Index
+ *
+ */
+struct cuvsCagraExtendParams {
+  /** The additional dataset is divided into chunks and added to the graph. This is the knob to
+   * adjust the tradeoff between the recall and operation throughput. Large chunk sizes can result
+   * in high throughput, but use more working memory (O(max_chunk_size*degree^2)). This can also
+   * degrade recall because no edges are added between the nodes in the same chunk. Auto select when
+   * 0. */
+  uint32_t max_chunk_size;
+};
+
+typedef struct cuvsCagraExtendParams* cuvsCagraExtendParams_t;
+
+/**
+ * @brief Allocate CAGRA Extend params, and populate with default values
+ *
+ * @param[in] params cuvsCagraExtendParams_t to allocate
+ * @return cuvsError_t
+ */
+cuvsError_t cuvsCagraExtendParamsCreate(cuvsCagraExtendParams_t* params);
+
+/**
+ * @brief De-allocate CAGRA Extend params
+ *
+ * @param[in] params
+ * @return cuvsError_t
+ */
+cuvsError_t cuvsCagraExtendParamsDestroy(cuvsCagraExtendParams_t params);
+
 /**
  * @}
  */
@@ -339,6 +379,36 @@ cuvsError_t cuvsCagraBuild(cuvsResources_t res,
  * @}
  */
 
+/**
+ * @defgroup cagra_c_extend_params C API for CUDA ANN Graph-based nearest neighbor search
+ * @{
+ */
+
+/**
+ * @brief Extend a CAGRA index with a `DLManagedTensor` which has underlying
+ *        `DLDeviceType` equal to `kDLCUDA`, `kDLCUDAHost`, `kDLCUDAManaged`,
+ *        or `kDLCPU`. Also, acceptable underlying types are:
+ *        1. `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32`
+ *        2. `kDLDataType.code == kDLInt` and `kDLDataType.bits = 8`
+ *        3. `kDLDataType.code == kDLUInt` and `kDLDataType.bits = 8`
+ *
+ * @param[in] res cuvsResources_t opaque C handle
+ * @param[in] params cuvsCagraExtendParams_t used to extend CAGRA index
+ * @param[in] additional_dataset DLManagedTensor* additional dataset
+ * @param[in,out] index cuvsCagraIndex_t CAGRA index
+ * @param[out] return_dataset DLManagedTensor* extended dataset
+ * @return cuvsError_t
+ */
+cuvsError_t cuvsCagraExtend(cuvsResources_t res,
+                            cuvsCagraExtendParams_t params,
+                            DLManagedTensor* additional_dataset,
+                            cuvsCagraIndex_t index,
+                            DLManagedTensor* return_dataset);
+
+/**
+ * @}
+ */
+
 /**
  * @defgroup cagra_c_index_search C API for CUDA ANN Graph-based nearest neighbor search
  * @{
@@ -388,13 +458,16 @@ cuvsError_t cuvsCagraBuild(cuvsResources_t res,
  * @param[in] queries DLManagedTensor* queries dataset to search
  * @param[out] neighbors DLManagedTensor* output `k` neighbors for queries
  * @param[out] distances DLManagedTensor* output `k` distances for queries
+ * @param[in] filter cuvsFilter input filter that can be used
+              to filter queries and neighbors based on the given bitset.
  */
 cuvsError_t cuvsCagraSearch(cuvsResources_t res,
                             cuvsCagraSearchParams_t params,
                             cuvsCagraIndex_t index,
                             DLManagedTensor* queries,
                             DLManagedTensor* neighbors,
-                            DLManagedTensor* distances);
+                            DLManagedTensor* distances,
+                            cuvsFilter filter);
 
 /**
  * @}
diff --git a/cpp/include/cuvs/neighbors/cagra.hpp b/cpp/include/cuvs/neighbors/cagra.hpp
index a4684ce26..597e186d7 100644
--- a/cpp/include/cuvs/neighbors/cagra.hpp
+++ b/cpp/include/cuvs/neighbors/cagra.hpp
@@ -1599,11 +1599,16 @@ void deserialize(raft::resources const& handle,
  * @param[in] handle the raft handle
  * @param[in] os output stream
  * @param[in] index CAGRA index
+ * @param[in] dataset [optional] host array that stores the dataset, required if the index
+ *            does not contain the dataset.
  *
  */
-void serialize_to_hnswlib(raft::resources const& handle,
-                          std::ostream& os,
-                          const cuvs::neighbors::cagra::index<float, uint32_t>& index);
+void serialize_to_hnswlib(
+  raft::resources const& handle,
+  std::ostream& os,
+  const cuvs::neighbors::cagra::index<float, uint32_t>& index,
+  std::optional<raft::host_matrix_view<const float, int64_t, raft::row_major>> dataset =
+    std::nullopt);
 
 /**
  * Save a CAGRA build index in hnswlib base-layer-only serialized format
@@ -1628,11 +1633,16 @@ void serialize_to_hnswlib(raft::resources const& handle,
  * @param[in] handle the raft handle
  * @param[in] filename the file name for saving the index
  * @param[in] index CAGRA index
+ * @param[in] dataset [optional] host array that stores the dataset, required if the index
+ *            does not contain the dataset.
  *
  */
-void serialize_to_hnswlib(raft::resources const& handle,
-                          const std::string& filename,
-                          const cuvs::neighbors::cagra::index<float, uint32_t>& index);
+void serialize_to_hnswlib(
+  raft::resources const& handle,
+  const std::string& filename,
+  const cuvs::neighbors::cagra::index<float, uint32_t>& index,
+  std::optional<raft::host_matrix_view<const float, int64_t, raft::row_major>> dataset =
+    std::nullopt);
 
 /**
  * Write the CAGRA built index as a base layer HNSW index to an output stream
@@ -1656,11 +1666,16 @@ void serialize_to_hnswlib(raft::resources const& handle,
  * @param[in] handle the raft handle
  * @param[in] os output stream
  * @param[in] index CAGRA index
+ * @param[in] dataset [optional] host array that stores the dataset, required if the index
+ *            does not contain the dataset.
  *
  */
-void serialize_to_hnswlib(raft::resources const& handle,
-                          std::ostream& os,
-                          const cuvs::neighbors::cagra::index<int8_t, uint32_t>& index);
+void serialize_to_hnswlib(
+  raft::resources const& handle,
+  std::ostream& os,
+  const cuvs::neighbors::cagra::index<int8_t, uint32_t>& index,
+  std::optional<raft::host_matrix_view<const int8_t, int64_t, raft::row_major>> dataset =
+    std::nullopt);
 
 /**
  * Save a CAGRA build index in hnswlib base-layer-only serialized format
@@ -1685,11 +1700,16 @@ void serialize_to_hnswlib(raft::resources const& handle,
  * @param[in] handle the raft handle
  * @param[in] filename the file name for saving the index
  * @param[in] index CAGRA index
+ * @param[in] dataset [optional] host array that stores the dataset, required if the index
+ *            does not contain the dataset.
  *
  */
-void serialize_to_hnswlib(raft::resources const& handle,
-                          const std::string& filename,
-                          const cuvs::neighbors::cagra::index<int8_t, uint32_t>& index);
+void serialize_to_hnswlib(
+  raft::resources const& handle,
+  const std::string& filename,
+  const cuvs::neighbors::cagra::index<int8_t, uint32_t>& index,
+  std::optional<raft::host_matrix_view<const int8_t, int64_t, raft::row_major>> dataset =
+    std::nullopt);
 
 /**
  * Write the CAGRA built index as a base layer HNSW index to an output stream
@@ -1713,11 +1733,16 @@ void serialize_to_hnswlib(raft::resources const& handle,
  * @param[in] handle the raft handle
  * @param[in] os output stream
  * @param[in] index CAGRA index
+ * @param[in] dataset [optional] host array that stores the dataset, required if the index
+ *            does not contain the dataset.
  *
  */
-void serialize_to_hnswlib(raft::resources const& handle,
-                          std::ostream& os,
-                          const cuvs::neighbors::cagra::index<uint8_t, uint32_t>& index);
+void serialize_to_hnswlib(
+  raft::resources const& handle,
+  std::ostream& os,
+  const cuvs::neighbors::cagra::index<uint8_t, uint32_t>& index,
+  std::optional<raft::host_matrix_view<const uint8_t, int64_t, raft::row_major>> dataset =
+    std::nullopt);
 
 /**
  * Save a CAGRA build index in hnswlib base-layer-only serialized format
@@ -1742,11 +1767,16 @@ void serialize_to_hnswlib(raft::resources const& handle,
  * @param[in] handle the raft handle
  * @param[in] filename the file name for saving the index
  * @param[in] index CAGRA index
+ * @param[in] dataset [optional] host array that stores the dataset, required if the index
+ *            does not contain the dataset.
  *
  */
-void serialize_to_hnswlib(raft::resources const& handle,
-                          const std::string& filename,
-                          const cuvs::neighbors::cagra::index<uint8_t, uint32_t>& index);
+void serialize_to_hnswlib(
+  raft::resources const& handle,
+  const std::string& filename,
+  const cuvs::neighbors::cagra::index<uint8_t, uint32_t>& index,
+  std::optional<raft::host_matrix_view<const uint8_t, int64_t, raft::row_major>> dataset =
+    std::nullopt);
 
 /**
  * @}
diff --git a/cpp/include/cuvs/neighbors/common.hpp b/cpp/include/cuvs/neighbors/common.hpp
index bd9ea4834..038b6b1da 100644
--- a/cpp/include/cuvs/neighbors/common.hpp
+++ b/cpp/include/cuvs/neighbors/common.hpp
@@ -18,6 +18,7 @@
 
 #include <cstdint>
 #include <cuvs/distance/distance.hpp>
+#include <raft/core/device_csr_matrix.hpp>
 #include <raft/core/device_mdarray.hpp>
 #include <raft/core/device_resources.hpp>
 #include <raft/core/host_mdspan.hpp>
@@ -456,8 +457,16 @@ inline constexpr bool is_vpq_dataset_v = is_vpq_dataset<DatasetT>::value;
 
 namespace filtering {
 
+/**
+ * @defgroup neighbors_filtering Filtering for ANN Types
+ * @{
+ */
+
+enum class FilterType { None, Bitmap, Bitset };
+
 struct base_filter {
-  virtual ~base_filter() = default;
+  virtual ~base_filter()                     = default;
+  virtual FilterType get_filter_type() const = 0;
 };
 
 /* A filter that filters nothing. This is the default behavior. */
@@ -475,6 +484,8 @@ struct none_sample_filter : public base_filter {
     const uint32_t query_ix,
     // the index of the current sample
     const uint32_t sample_ix) const;
+
+  FilterType get_filter_type() const override { return FilterType::None; }
 };
 
 /**
@@ -513,15 +524,24 @@ struct ivf_to_sample_filter {
  */
 template <typename bitmap_t, typename index_t>
 struct bitmap_filter : public base_filter {
+  using view_t = cuvs::core::bitmap_view<bitmap_t, index_t>;
+
   // View of the bitset to use as a filter
-  const cuvs::core::bitmap_view<bitmap_t, index_t> bitmap_view_;
+  const view_t bitmap_view_;
 
-  bitmap_filter(const cuvs::core::bitmap_view<bitmap_t, index_t> bitmap_for_filtering);
+  bitmap_filter(const view_t bitmap_for_filtering);
   inline _RAFT_HOST_DEVICE bool operator()(
     // query index
     const uint32_t query_ix,
     // the index of the current sample
     const uint32_t sample_ix) const;
+
+  FilterType get_filter_type() const override { return FilterType::Bitmap; }
+
+  view_t view() const { return bitmap_view_; }
+
+  template <typename csr_matrix_t>
+  void to_csr(raft::resources const& handle, csr_matrix_t& csr);
 };
 
 /**
@@ -532,17 +552,28 @@ struct bitmap_filter : public base_filter {
  */
 template <typename bitset_t, typename index_t>
 struct bitset_filter : public base_filter {
+  using view_t = cuvs::core::bitset_view<bitset_t, index_t>;
+
   // View of the bitset to use as a filter
-  const cuvs::core::bitset_view<bitset_t, index_t> bitset_view_;
+  const view_t bitset_view_;
 
-  bitset_filter(const cuvs::core::bitset_view<bitset_t, index_t> bitset_for_filtering);
+  bitset_filter(const view_t bitset_for_filtering);
   inline _RAFT_HOST_DEVICE bool operator()(
     // query index
     const uint32_t query_ix,
     // the index of the current sample
     const uint32_t sample_ix) const;
+
+  FilterType get_filter_type() const override { return FilterType::Bitset; }
+
+  view_t view() const { return bitset_view_; }
+
+  template <typename csr_matrix_t>
+  void to_csr(raft::resources const& handle, csr_matrix_t& csr);
 };
 
+/** @} */  // end group neighbors_filtering
+
 /**
  * If the filtering depends on the index of a sample, then the following
  * filter template can be used:
diff --git a/cpp/include/cuvs/neighbors/hnsw.h b/cpp/include/cuvs/neighbors/hnsw.h
index b7eda54b8..d88fd3b4e 100644
--- a/cpp/include/cuvs/neighbors/hnsw.h
+++ b/cpp/include/cuvs/neighbors/hnsw.h
@@ -47,13 +47,13 @@ enum cuvsHnswHierarchy {
 
 struct cuvsHnswIndexParams {
   /* hierarchy of the hnsw index */
-  cuvsHnswHierarchy hierarchy;
+  enum cuvsHnswHierarchy hierarchy;
   /** Size of the candidate list during hierarchy construction when hierarchy is `CPU`*/
   int ef_construction;
   /** Number of host threads to use to construct hierarchy when hierarchy is `CPU`
-  NOTE: Constructing the hierarchy when converting from a CAGRA graph is highly sensitive
-  to parallelism, and increasing the number of threads can reduce the quality of the index.
-   */
+      When the value is 0, the number of threads is automatically determined to the maximum
+      number of threads available.
+  */
   int num_threads;
 };
 
@@ -158,8 +158,8 @@ cuvsError_t cuvsHnswExtendParamsDestroy(cuvsHnswExtendParams_t params);
  * NOTE: When hierarchy is:
  *       1. `NONE`: This method uses the filesystem to write the CAGRA index in
  * `/tmp/<random_number>.bin` before reading it as an hnswlib index, then deleting the temporary
- * file. The returned index is immutable and can only be searched by the hnswlib wrapper in cuVS, as
- * the format is not compatible with the original hnswlib.
+ * file. The returned index is immutable and can only be searched by the hnswlib wrapper in cuVS,
+ * as the format is not compatible with the original hnswlib.
  *       2. `CPU`: The returned index is mutable and can be extended with additional vectors. The
  * serialized index is also compatible with the original hnswlib library.
  *
@@ -364,10 +364,10 @@ cuvsError_t cuvsHnswSearch(cuvsResources_t res,
 
 /**
  * @brief Serialize a CAGRA index to a file as an hnswlib index
- * NOTE: When hierarchy is `NONE`, the saved hnswlib index is immutable and can only be read by the
- * hnswlib wrapper in cuVS, as the serialization format is not compatible with the original hnswlib.
- * However, when hierarchy is `CPU`, the saved hnswlib index is compatible with the original hnswlib
- * library.
+ * NOTE: When hierarchy is `NONE`, the saved hnswlib index is immutable and can only be read by
+ * the hnswlib wrapper in cuVS, as the serialization format is not compatible with the original
+ * hnswlib. However, when hierarchy is `CPU`, the saved hnswlib index is compatible with the
+ * original hnswlib library.
  *
  * @param[in] res cuvsResources_t opaque C handle
  * @param[in] filename the name of the file to save the index
@@ -406,8 +406,8 @@ cuvsError_t cuvsHnswSerialize(cuvsResources_t res, const char* filename, cuvsHns
 /**
  * Load hnswlib index from file which was serialized from a HNSW index.
  * NOTE: When hierarchy is `NONE`, the loaded hnswlib index is immutable, and only be read by the
- * hnswlib wrapper in cuVS, as the serialization format is not compatible with the original hnswlib.
- * Experimental, both the API and the serialization format are subject to change.
+ * hnswlib wrapper in cuVS, as the serialization format is not compatible with the original
+ * hnswlib. Experimental, both the API and the serialization format are subject to change.
  *
  * @code{.c}
  * #include <cuvs/core/c_api.h>
diff --git a/cpp/include/cuvs/neighbors/hnsw.hpp b/cpp/include/cuvs/neighbors/hnsw.hpp
index f0b433d8e..750f1f87f 100644
--- a/cpp/include/cuvs/neighbors/hnsw.hpp
+++ b/cpp/include/cuvs/neighbors/hnsw.hpp
@@ -54,10 +54,10 @@ struct index_params : cuvs::neighbors::index_params {
   /** Size of the candidate list during hierarchy construction when hierarchy is `CPU`*/
   int ef_construction = 200;
   /** Number of host threads to use to construct hierarchy when hierarchy is `CPU`
-  NOTE: Constructing the hierarchy when converting from a CAGRA graph is highly sensitive
-  to parallelism, and increasing the number of threads can reduce the quality of the index.
+      When the value is 0, the number of threads is automatically determined to the
+      maximum number of threads available.
    */
-  int num_threads = 2;
+  int num_threads = 0;
 };
 
 /**@}*/
@@ -724,4 +724,4 @@ void deserialize(raft::resources const& res,
 
 #else
 #error "This header is only available if cuVS CMake option `BUILD_CAGRA_HNSWLIB=ON"
-#endif
\ No newline at end of file
+#endif
diff --git a/cpp/include/cuvs/neighbors/refine.hpp b/cpp/include/cuvs/neighbors/refine.hpp
index 5e60ff537..e03ffb563 100644
--- a/cpp/include/cuvs/neighbors/refine.hpp
+++ b/cpp/include/cuvs/neighbors/refine.hpp
@@ -481,4 +481,4 @@ void refine(raft::resources const& handle,
             raft::host_matrix_view<float, int64_t, raft::row_major> distances,
             cuvs::distance::DistanceType metric = cuvs::distance::DistanceType::L2Unexpanded);
 
-}  // namespace cuvs::neighbors
\ No newline at end of file
+}  // namespace cuvs::neighbors
diff --git a/cpp/include/cuvs/neighbors/vamana.hpp b/cpp/include/cuvs/neighbors/vamana.hpp
index bec17937f..bc205a6f4 100644
--- a/cpp/include/cuvs/neighbors/vamana.hpp
+++ b/cpp/include/cuvs/neighbors/vamana.hpp
@@ -31,16 +31,27 @@
 #include <optional>
 #include <variant>
 
-namespace cuvs::neighbors::experimental::vamana {
+namespace cuvs::neighbors::vamana {
+
 /**
  * @defgroup vamana_cpp_index_params Vamana index build parameters
  * @{
  */
 
 /**
- * @brief ANN parameters used by VAMANA to build index
+ * @brief Parameters used to build DiskANN index
  *
+ * `graph_degree`: Maximum degree of graph; correspods to the R parameter of
+ * Vamana algorithm in the literature.
+ * `visited_size`: Maximum number of visited nodes per search during Vamana algorithm.
+ * Loosely corresponds to the L parameter in the literature.
+ * `vamana_iters`: The number of times all vectors are inserted into the graph. If > 1,
+ * all vectors are re-inserted to improve graph quality.
+ * `max_fraction`: The maximum batch size is this fraction of the total dataset size. Larger
+ * gives faster build but lower graph quality.
+ * `alpha`: Used to determine how aggressive the pruning will be.
  */
+
 struct index_params : cuvs::neighbors::index_params {
   /** Maximum degree of output graph corresponds to the R parameter in the original Vamana
    * literature. */
@@ -55,10 +66,12 @@ struct index_params : cuvs::neighbors::index_params {
   /** Maximum fraction of dataset inserted per batch.              *
    * Larger max batch decreases graph quality, but improves speed */
   float max_fraction = 0.06;
-  /** Base of growth rate of batch sies **/
+  /** Base of growth rate of batch sizes **/
   float batch_base = 2;
   /** Size of candidate queue structure - should be (2^x)-1 */
   uint32_t queue_size = 127;
+  /** Max batchsize of reverse edge processing (reduces memory footprint) */
+  uint32_t reverse_batchsize = 1000000;
 };
 
 /**
@@ -215,61 +228,295 @@ struct index : cuvs::neighbors::index {
  * @{
  */
 /**
- * @brief Build the index from the dataset for efficient search.
+ * @brief Build the index from the dataset for efficient DiskANN search.
+ *
+ * The build utilities the Vamana insertion-based algorithm to create the graph. The algorithm
+ * starts with an empty graph and iteratively iserts batches of nodes. Each batch involves
+ * performing a greedy search for each vector to be inserted, and inserting it with edges to
+ * all nodes traversed during the search. Reverse edges are also inserted and robustPrune is applied
+ * to improve graph quality. The index_params struct controls the degree of the final graph.
+ *
+ * The following distance metrics are supported:
+ * - L2
+ *
+ * Usage example:
+ * @code{.cpp}
+ *   using namespace cuvs::neighbors;
+ *   // use default index parameters;
+ *   vamana::index_params index_params;
+ *   // create and fill index from a [N, D] dataset;
+ *   auto index = vamana::build(res, index_params, dataset);
+ *   // write index to file to be used by CPU-based DiskANN search (cuVS does not yet support
+ * search) vamana::serialize(res, filename, index);
+ * @endcode
  *
+ * @param[in] res
+ * @param[in] params parameters for building the index
+ * @param[in] dataset a matrix view (device) to a row-major matrix [n_rows, dim]
+ *
+ * @return the constructed vamana index
  */
-auto build(raft::resources const& handle,
-           const cuvs::neighbors::experimental::vamana::index_params& params,
+auto build(raft::resources const& res,
+           const cuvs::neighbors::vamana::index_params& params,
            raft::device_matrix_view<const float, int64_t, raft::row_major> dataset)
-  -> cuvs::neighbors::experimental::vamana::index<float, uint32_t>;
+  -> cuvs::neighbors::vamana::index<float, uint32_t>;
 
-auto build(raft::resources const& handle,
-           const cuvs::neighbors::experimental::vamana::index_params& params,
+/**
+ * @brief Build the index from the dataset for efficient DiskANN search.
+ *
+ * The build utilities the Vamana insertion-based algorithm to create the graph. The algorithm
+ * starts with an empty graph and iteratively iserts batches of nodes. Each batch involves
+ * performing a greedy search for each vector to be inserted, and inserting it with edges to
+ * all nodes traversed during the search. Reverse edges are also inserted and robustPrune is applied
+ * to improve graph quality. The index_params struct controls the degree of the final graph.
+ *
+ * The following distance metrics are supported:
+ * - L2
+ *
+ * Usage example:
+ * @code{.cpp}
+ *   using namespace cuvs::neighbors;
+ *   // use default index parameters;
+ *   vamana::index_params index_params;
+ *   // create and fill index from a [N, D] dataset;
+ *   auto index = vamana::build(res, index_params, dataset);
+ *   // write index to file to be used by CPU-based DiskANN search (cuVS does not yet support
+ * search) vamana::serialize(res, filename, index);
+ * @endcode
+ *
+ * @param[in] res
+ * @param[in] params parameters for building the index
+ * @param[in] dataset a matrix view (host) to a row-major matrix [n_rows, dim]
+ *
+ * @return the constructed vamana index
+ */
+auto build(raft::resources const& res,
+           const cuvs::neighbors::vamana::index_params& params,
            raft::host_matrix_view<const float, int64_t, raft::row_major> dataset)
-  -> cuvs::neighbors::experimental::vamana::index<float, uint32_t>;
+  -> cuvs::neighbors::vamana::index<float, uint32_t>;
 
-auto build(raft::resources const& handle,
-           const cuvs::neighbors::experimental::vamana::index_params& params,
+/**
+ * @brief Build the index from the dataset for efficient DiskANN search.
+ *
+ * The build utilities the Vamana insertion-based algorithm to create the graph. The algorithm
+ * starts with an empty graph and iteratively iserts batches of nodes. Each batch involves
+ * performing a greedy search for each vector to be inserted, and inserting it with edges to
+ * all nodes traversed during the search. Reverse edges are also inserted and robustPrune is applied
+ * to improve graph quality. The index_params struct controls the degree of the final graph.
+ *
+ * The following distance metrics are supported:
+ * - L2
+ *
+ * Usage example:
+ * @code{.cpp}
+ *   using namespace cuvs::neighbors;
+ *   // use default index parameters;
+ *   vamana::index_params index_params;
+ *   // create and fill index from a [N, D] dataset;
+ *   auto index = vamana::build(res, index_params, dataset);
+ *   // write index to file to be used by CPU-based DiskANN search (cuVS does not yet support
+ * search) vamana::serialize(res, filename, index);
+ * @endcode
+ *
+ * @param[in] res
+ * @param[in] params parameters for building the index
+ * @param[in] dataset a matrix view (device) to a row-major matrix [n_rows, dim]
+ *
+ * @return the constructed vamana index
+ */
+auto build(raft::resources const& res,
+           const cuvs::neighbors::vamana::index_params& params,
            raft::device_matrix_view<const int8_t, int64_t, raft::row_major> dataset)
-  -> cuvs::neighbors::experimental::vamana::index<int8_t, uint32_t>;
+  -> cuvs::neighbors::vamana::index<int8_t, uint32_t>;
 
-auto build(raft::resources const& handle,
-           const cuvs::neighbors::experimental::vamana::index_params& params,
+/**
+ * @brief Build the index from the dataset for efficient DiskANN search.
+ *
+ * The build utilities the Vamana insertion-based algorithm to create the graph. The algorithm
+ * starts with an empty graph and iteratively iserts batches of nodes. Each batch involves
+ * performing a greedy search for each vector to be inserted, and inserting it with edges to
+ * all nodes traversed during the search. Reverse edges are also inserted and robustPrune is applied
+ * to improve graph quality. The index_params struct controls the degree of the final graph.
+ *
+ * The following distance metrics are supported:
+ * - L2
+ *
+ * Usage example:
+ * @code{.cpp}
+ *   using namespace cuvs::neighbors;
+ *   // use default index parameters;
+ *   vamana::index_params index_params;
+ *   // create and fill index from a [N, D] dataset;
+ *   auto index = vamana::build(res, index_params, dataset);
+ *   // write index to file to be used by CPU-based DiskANN search (cuVS does not yet support
+ * search) vamana::serialize(res, filename, index);
+ * @endcode
+ *
+ * @param[in] res
+ * @param[in] params parameters for building the index
+ * @param[in] dataset a matrix view (host) to a row-major matrix [n_rows, dim]
+ *
+ * @return the constructed vamana index
+ */
+auto build(raft::resources const& res,
+           const cuvs::neighbors::vamana::index_params& params,
            raft::host_matrix_view<const int8_t, int64_t, raft::row_major> dataset)
-  -> cuvs::neighbors::experimental::vamana::index<int8_t, uint32_t>;
+  -> cuvs::neighbors::vamana::index<int8_t, uint32_t>;
 
-auto build(raft::resources const& handle,
-           const cuvs::neighbors::experimental::vamana::index_params& params,
+/**
+ * @brief Build the index from the dataset for efficient DiskANN search.
+ *
+ * The build utilities the Vamana insertion-based algorithm to create the graph. The algorithm
+ * starts with an empty graph and iteratively iserts batches of nodes. Each batch involves
+ * performing a greedy search for each vector to be inserted, and inserting it with edges to
+ * all nodes traversed during the search. Reverse edges are also inserted and robustPrune is applied
+ * to improve graph quality. The index_params struct controls the degree of the final graph.
+ *
+ * The following distance metrics are supported:
+ * - L2
+ *
+ * Usage example:
+ * @code{.cpp}
+ *   using namespace cuvs::neighbors;
+ *   // use default index parameters;
+ *   vamana::index_params index_params;
+ *   // create and fill index from a [N, D] dataset;
+ *   auto index = vamana::build(res, index_params, dataset);
+ *   // write index to file to be used by CPU-based DiskANN search (cuVS does not yet support
+ * search) vamana::serialize(res, filename, index);
+ * @endcode
+ *
+ * @param[in] res
+ * @param[in] params parameters for building the index
+ * @param[in] dataset a matrix view (device) to a row-major matrix [n_rows, dim]
+ *
+ * @return the constructed vamana index
+ */
+auto build(raft::resources const& res,
+           const cuvs::neighbors::vamana::index_params& params,
            raft::device_matrix_view<const uint8_t, int64_t, raft::row_major> dataset)
-  -> cuvs::neighbors::experimental::vamana::index<uint8_t, uint32_t>;
+  -> cuvs::neighbors::vamana::index<uint8_t, uint32_t>;
 
-auto build(raft::resources const& handle,
-           const cuvs::neighbors::experimental::vamana::index_params& params,
+/**
+ * @brief Build the index from the dataset for efficient DiskANN search.
+ *
+ * The build utilities the Vamana insertion-based algorithm to create the graph. The algorithm
+ * starts with an empty graph and iteratively iserts batches of nodes. Each batch involves
+ * performing a greedy search for each vector to be inserted, and inserting it with edges to
+ * all nodes traversed during the search. Reverse edges are also inserted and robustPrune is applied
+ * to improve graph quality. The index_params struct controls the degree of the final graph.
+ *
+ * The following distance metrics are supported:
+ * - L2
+ *
+ * Usage example:
+ * @code{.cpp}
+ *   using namespace cuvs::neighbors;
+ *   // use default index parameters;
+ *   vamana::index_params index_params;
+ *   // create and fill index from a [N, D] dataset;
+ *   auto index = vamana::build(res, index_params, dataset);
+ *   // write index to file to be used by CPU-based DiskANN search (cuVS does not yet support
+ * search) vamana::serialize(res, filename, index);
+ * @endcode
+ *
+ * @param[in] res
+ * @param[in] params parameters for building the index
+ * @param[in] dataset a matrix view (host) to a row-major matrix [n_rows, dim]
+ *
+ * @return the constructed vamana index
+ */
+auto build(raft::resources const& res,
+           const cuvs::neighbors::vamana::index_params& params,
            raft::host_matrix_view<const uint8_t, int64_t, raft::row_major> dataset)
-  -> cuvs::neighbors::experimental::vamana::index<uint8_t, uint32_t>;
+  -> cuvs::neighbors::vamana::index<uint8_t, uint32_t>;
 
 /**
  * @defgroup vamana_cpp_serialize Vamana serialize functions
  * @{
  */
+
 /**
  * Save the index to file.
+ *
+ * Matches the file format used by the DiskANN open-source repository, allowing cross-compatibility.
+ *
+ * @code{.cpp}
+ *   #include <raft/core/resources.hpp>
+ *   #include <cuvs/neighbors/vamana.hpp>
+ *
+ *   raft::resources handle;
+ *
+ *   // create a string with a filepath
+ *   std::string file_prefix("/path/to/index/prefix");
+ *   // create an index with `auto index = cuvs::neighbors::vamana::build(...);`
+ *   cuvs::neighbors::vamana::serialize(handle, file_prefix, index);
+ * @endcode
+ *
+ * @param[in] handle the raft handle
+ * @param[in] file_prefix prefix of path and name of index files
+ * @param[in] index Vamana index
+ *
  */
 
 void serialize(raft::resources const& handle,
                const std::string& file_prefix,
-               const cuvs::neighbors::experimental::vamana::index<float, uint32_t>& index);
+               const cuvs::neighbors::vamana::index<float, uint32_t>& index);
 
+/**
+ * Save the index to file.
+ *
+ * Matches the file format used by the DiskANN open-source repository, allowing cross-compatibility.
+ *
+ * @code{.cpp}
+ *   #include <raft/core/resources.hpp>
+ *   #include <cuvs/neighbors/vamana.hpp>
+ *
+ *   raft::resources handle;
+ *
+ *   // create a string with a filepath
+ *   std::string file_prefix("/path/to/index/prefix");
+ *   // create an index with `auto index = cuvs::neighbors::vamana::build(...);`
+ *   cuvs::neighbors::vamana::serialize(handle, file_prefix, index);
+ * @endcode
+ *
+ * @param[in] handle the raft handle
+ * @param[in] file_prefix prefix of path and name of index files
+ * @param[in] index Vamana index
+ *
+ */
 void serialize(raft::resources const& handle,
                const std::string& file_prefix,
-               const cuvs::neighbors::experimental::vamana::index<int8_t, uint32_t>& index);
+               const cuvs::neighbors::vamana::index<int8_t, uint32_t>& index);
 
+/**
+ * Save the index to file.
+ *
+ * Matches the file format used by the DiskANN open-source repository, allowing cross-compatibility.
+ *
+ * @code{.cpp}
+ *   #include <raft/core/resources.hpp>
+ *   #include <cuvs/neighbors/vamana.hpp>
+ *
+ *   raft::resources handle;
+ *
+ *   // create a string with a filepath
+ *   std::string file_prefix("/path/to/index/prefix");
+ *   // create an index with `auto index = cuvs::neighbors::vamana::build(...);`
+ *   cuvs::neighbors::vamana::serialize(handle, file_prefix, index);
+ * @endcode
+ *
+ * @param[in] handle the raft handle
+ * @param[in] file_prefix prefix of path and name of index files
+ * @param[in] index Vamana index
+ *
+ */
 void serialize(raft::resources const& handle,
                const std::string& file_prefix,
-               const cuvs::neighbors::experimental::vamana::index<uint8_t, uint32_t>& index);
+               const cuvs::neighbors::vamana::index<uint8_t, uint32_t>& index);
 
 /**
  * @}
  */
 
-}  // namespace cuvs::neighbors::experimental::vamana
+}  // namespace cuvs::neighbors::vamana
diff --git a/cpp/include/cuvs/preprocessing/quantize/scalar.h b/cpp/include/cuvs/preprocessing/quantize/scalar.h
new file mode 100644
index 000000000..a8dbcb78b
--- /dev/null
+++ b/cpp/include/cuvs/preprocessing/quantize/scalar.h
@@ -0,0 +1,130 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <cuvs/core/c_api.h>
+#include <dlpack/dlpack.h>
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @brief Scalar quantizer parameters.
+ */
+struct cuvsScalarQuantizerParams {
+  /*
+   * specifies how many outliers at top & bottom will be ignored
+   * needs to be within range of (0, 1]
+   */
+  float quantile;
+};
+
+typedef struct cuvsScalarQuantizerParams* cuvsScalarQuantizerParams_t;
+
+/**
+ * @brief Allocate Scalar Quantizer params, and populate with default values
+ *
+ * @param[in] params cuvsScalarQuantizerParams_t to allocate
+ * @return cuvsError_t
+ */
+cuvsError_t cuvsScalarQuantizerParamsCreate(cuvsScalarQuantizerParams_t* params);
+
+/**
+ * @brief De-allocate Scalar Quantizer params
+ *
+ * @param[in] params
+ * @return cuvsError_t
+ */
+cuvsError_t cuvsScalarQuantizerParamsDestroy(cuvsScalarQuantizerParams_t params);
+
+/**
+ * @brief Defines and stores scalar for quantisation upon training
+ *
+ * The quantization is performed by a linear mapping of an interval in the
+ * float data type to the full range of the quantized int type.
+ */
+typedef struct {
+  double min_;
+  double max_;
+} cuvsScalarQuantizer;
+
+typedef cuvsScalarQuantizer* cuvsScalarQuantizer_t;
+
+/**
+ * @brief Allocate Scalar Quantizer and populate with default values
+ *
+ * @param[in] quantizer cuvsScalarQuantizer_t to allocate
+ * @return cuvsError_t
+ */
+cuvsError_t cuvsScalarQuantizerCreate(cuvsScalarQuantizer_t* quantizer);
+
+/**
+ * @brief De-allocate Scalar Quantizer
+ *
+ * @param[in] quantizer
+ * @return cuvsError_t
+ */
+cuvsError_t cuvsScalarQuantizerDestroy(cuvsScalarQuantizer_t quantizer);
+
+/**
+ * @brief Trains a scalar quantizer to be used later for quantizing the dataset.
+ *
+ * @param[in] res raft resource
+ * @param[in] params configure scalar quantizer, e.g. quantile
+ * @param[in] dataset a row-major host or device matrix
+ * @param[out] quantizer trained scalar quantizer
+ */
+cuvsError_t cuvsScalarQuantizerTrain(cuvsResources_t res,
+                                     cuvsScalarQuantizerParams_t params,
+                                     DLManagedTensor* dataset,
+                                     cuvsScalarQuantizer_t quantizer);
+
+/**
+ * @brief Applies quantization transform to given dataset
+ *
+ * @param[in] res raft resource
+ * @param[in] quantizer a scalar quantizer
+ * @param[in] dataset a row-major host or device matrix to transform
+ * @param[out] out a row-major host or device matrix to store transformed data
+ */
+cuvsError_t cuvsScalarQuantizerTransform(cuvsResources_t res,
+                                         cuvsScalarQuantizer_t quantizer,
+                                         DLManagedTensor* dataset,
+                                         DLManagedTensor* out);
+
+/**
+ * @brief Perform inverse quantization step on previously quantized dataset
+ *
+ * Note that depending on the chosen data types train dataset the conversion is
+ * not lossless.
+ *
+ * @param[in] res raft resource
+ * @param[in] quantizer a scalar quantizer
+ * @param[in] dataset a row-major host or device matrix
+ * @param[out] out a row-major host or device matrix
+ *
+ */
+cuvsError_t cuvsScalarQuantizerInverseTransform(cuvsResources_t res,
+                                                cuvsScalarQuantizer_t quantizer,
+                                                DLManagedTensor* dataset,
+                                                DLManagedTensor* out);
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/cpp/scripts/run-clang-compile.py b/cpp/scripts/run-clang-compile.py
index eae52f731..71b5db568 100644
--- a/cpp/scripts/run-clang-compile.py
+++ b/cpp/scripts/run-clang-compile.py
@@ -253,12 +253,12 @@ def run_clang_command(clang_cmd, cwd):
 class LockContext(object):
     def __init__(self, lock=None) -> None:
         self._lock = lock
-    
+
     def __enter__(self):
         if self._lock:
             self._lock.acquire()
         return self
-    
+
     def __exit__(self, _, __, ___):
         if self._lock:
             self._lock.release()
diff --git a/cpp/scripts/run-clang-tidy.py b/cpp/scripts/run-clang-tidy.py
index 3d8bbcec4..cad08ca55 100644
--- a/cpp/scripts/run-clang-tidy.py
+++ b/cpp/scripts/run-clang-tidy.py
@@ -296,12 +296,12 @@ def run_clang_tidy_command(tidy_cmd, cwd):
 class LockContext(object):
     def __init__(self, lock=None) -> None:
         self._lock = lock
-    
+
     def __enter__(self):
         if self._lock:
             self._lock.acquire()
         return self
-    
+
     def __exit__(self, _, __, ___):
         if self._lock:
             self._lock.release()
diff --git a/cpp/scripts/run-cmake-format.sh b/cpp/scripts/run-cmake-format.sh
index 1f7ae359f..7e30258fc 100755
--- a/cpp/scripts/run-cmake-format.sh
+++ b/cpp/scripts/run-cmake-format.sh
@@ -17,7 +17,7 @@
 # and exits gracefully if the file is not found. If a user wishes to specify a
 # config file at a nonstandard location, they may do so by setting the
 # environment variable RAPIDS_CMAKE_FORMAT_FILE.
-# 
+#
 # This script can be invoked directly anywhere within the project repository.
 # Alternatively, it may be invoked as a pre-commit hook via
 # `pre-commit run (cmake-format)|(cmake-lint)`.
diff --git a/cpp/src/cluster/detail/mst.cuh b/cpp/src/cluster/detail/mst.cuh
index 5804b8b5d..2929262d2 100644
--- a/cpp/src/cluster/detail/mst.cuh
+++ b/cpp/src/cluster/detail/mst.cuh
@@ -204,4 +204,4 @@ void build_sorted_mst(
   raft::copy_async(mst_weight, mst_coo.weights.data(), mst_coo.n_edges, stream);
 }
 
-};  // namespace  cuvs::cluster::agglomerative::detail
\ No newline at end of file
+};  // namespace  cuvs::cluster::agglomerative::detail
diff --git a/cpp/src/cluster/detail/single_linkage.cuh b/cpp/src/cluster/detail/single_linkage.cuh
index 8b90336b7..d1660f9ff 100644
--- a/cpp/src/cluster/detail/single_linkage.cuh
+++ b/cpp/src/cluster/detail/single_linkage.cuh
@@ -122,4 +122,4 @@ void single_linkage(raft::resources const& handle,
   out->n_leaves               = m;
   out->n_connected_components = 1;
 }
-};  // namespace  cuvs::cluster::agglomerative::detail
\ No newline at end of file
+};  // namespace  cuvs::cluster::agglomerative::detail
diff --git a/cpp/src/distance/detail/fused_distance_nn/gemm.h b/cpp/src/distance/detail/fused_distance_nn/gemm.h
index 6c8d718d0..789878a73 100644
--- a/cpp/src/distance/detail/fused_distance_nn/gemm.h
+++ b/cpp/src/distance/detail/fused_distance_nn/gemm.h
@@ -409,4 +409,4 @@ struct FusedDistanceNNGemm<double,
 
 }  // namespace kernel
 }  // namespace gemm
-}  // namespace cuvs
\ No newline at end of file
+}  // namespace cuvs
diff --git a/cpp/src/neighbors/brute_force_c.cpp b/cpp/src/neighbors/brute_force_c.cpp
index 1693ac930..98c74e285 100644
--- a/cpp/src/neighbors/brute_force_c.cpp
+++ b/cpp/src/neighbors/brute_force_c.cpp
@@ -67,8 +67,8 @@ void _search(cuvsResources_t res,
   using queries_mdspan_type   = raft::device_matrix_view<T const, int64_t, QueriesLayoutT>;
   using neighbors_mdspan_type = raft::device_matrix_view<int64_t, int64_t, raft::row_major>;
   using distances_mdspan_type = raft::device_matrix_view<float, int64_t, raft::row_major>;
-  using prefilter_mds_type    = raft::device_vector_view<const uint32_t, int64_t>;
-  using prefilter_bmp_type    = cuvs::core::bitmap_view<const uint32_t, int64_t>;
+  using prefilter_mds_type    = raft::device_vector_view<uint32_t, int64_t>;
+  using prefilter_bmp_type    = cuvs::core::bitmap_view<uint32_t, int64_t>;
 
   auto queries_mds   = cuvs::core::from_dlpack<queries_mdspan_type>(queries_tensor);
   auto neighbors_mds = cuvs::core::from_dlpack<neighbors_mdspan_type>(neighbors_tensor);
@@ -85,14 +85,14 @@ void _search(cuvsResources_t res,
                                          distances_mds,
                                          cuvs::neighbors::filtering::none_sample_filter{});
   } else if (prefilter.type == BITMAP) {
-    auto prefilter_ptr  = reinterpret_cast<DLManagedTensor*>(prefilter.addr);
-    auto prefilter_mds  = cuvs::core::from_dlpack<prefilter_mds_type>(prefilter_ptr);
-    auto prefilter_view = cuvs::neighbors::filtering::bitmap_filter(
-      prefilter_bmp_type((const uint32_t*)prefilter_mds.data_handle(),
+    auto prefilter_ptr   = reinterpret_cast<DLManagedTensor*>(prefilter.addr);
+    auto prefilter_mds   = cuvs::core::from_dlpack<prefilter_mds_type>(prefilter_ptr);
+    const auto prefilter = cuvs::neighbors::filtering::bitmap_filter(
+      prefilter_bmp_type((uint32_t*)prefilter_mds.data_handle(),
                          queries_mds.extent(0),
                          index_ptr->dataset().extent(0)));
     cuvs::neighbors::brute_force::search(
-      *res_ptr, params, *index_ptr, queries_mds, neighbors_mds, distances_mds, prefilter_view);
+      *res_ptr, params, *index_ptr, queries_mds, neighbors_mds, distances_mds, prefilter);
   } else {
     RAFT_FAIL("Unsupported prefilter type: BITSET");
   }
diff --git a/cpp/src/neighbors/cagra_c.cpp b/cpp/src/neighbors/cagra_c.cpp
index 02b7a566e..333fffb42 100644
--- a/cpp/src/neighbors/cagra_c.cpp
+++ b/cpp/src/neighbors/cagra_c.cpp
@@ -28,6 +28,7 @@
 #include <cuvs/core/interop.hpp>
 #include <cuvs/neighbors/cagra.h>
 #include <cuvs/neighbors/cagra.hpp>
+#include <cuvs/neighbors/common.h>
 
 #include <fstream>
 
@@ -86,13 +87,51 @@ void* _build(cuvsResources_t res, cuvsCagraIndexParams params, DLManagedTensor*
   return index;
 }
 
+template <typename T>
+void _extend(cuvsResources_t res,
+             cuvsCagraExtendParams params,
+             cuvsCagraIndex index,
+             DLManagedTensor* additional_dataset_tensor,
+             DLManagedTensor* return_tensor)
+{
+  auto dataset          = additional_dataset_tensor->dl_tensor;
+  auto return_dl_tensor = return_tensor->dl_tensor;
+  auto index_ptr        = reinterpret_cast<cuvs::neighbors::cagra::index<T, uint32_t>*>(index.addr);
+  auto res_ptr          = reinterpret_cast<raft::resources*>(res);
+
+  // TODO: use C struct here (see issue #487)
+  auto extend_params           = cuvs::neighbors::cagra::extend_params();
+  extend_params.max_chunk_size = params.max_chunk_size;
+
+  if (cuvs::core::is_dlpack_device_compatible(dataset) &&
+      cuvs::core::is_dlpack_device_compatible(return_dl_tensor)) {
+    using mdspan_type        = raft::device_matrix_view<T const, int64_t, raft::row_major>;
+    using mdspan_return_type = raft::device_matrix_view<T, int64_t, raft::row_major>;
+    auto mds                 = cuvs::core::from_dlpack<mdspan_type>(additional_dataset_tensor);
+    auto return_mds          = cuvs::core::from_dlpack<mdspan_return_type>(return_tensor);
+    cuvs::neighbors::cagra::extend(*res_ptr, extend_params, mds, *index_ptr, return_mds);
+  } else if (cuvs::core::is_dlpack_host_compatible(dataset) &&
+             cuvs::core::is_dlpack_host_compatible(return_dl_tensor)) {
+    using mdspan_type        = raft::host_matrix_view<T const, int64_t, raft::row_major>;
+    using mdspan_return_type = raft::device_matrix_view<T, int64_t, raft::row_major>;
+    auto mds                 = cuvs::core::from_dlpack<mdspan_type>(additional_dataset_tensor);
+    auto return_mds          = cuvs::core::from_dlpack<mdspan_return_type>(return_tensor);
+    cuvs::neighbors::cagra::extend(*res_ptr, extend_params, mds, *index_ptr, return_mds);
+  } else {
+    RAFT_FAIL("Unsupported dataset DLtensor dtype: %d and bits: %d",
+              dataset.dtype.code,
+              dataset.dtype.bits);
+  }
+}
+
 template <typename T>
 void _search(cuvsResources_t res,
              cuvsCagraSearchParams params,
              cuvsCagraIndex index,
              DLManagedTensor* queries_tensor,
              DLManagedTensor* neighbors_tensor,
-             DLManagedTensor* distances_tensor)
+             DLManagedTensor* distances_tensor,
+             cuvsFilter filter)
 {
   auto res_ptr   = reinterpret_cast<raft::resources*>(res);
   auto index_ptr = reinterpret_cast<cuvs::neighbors::cagra::index<T, uint32_t>*>(index.addr);
@@ -118,8 +157,26 @@ void _search(cuvsResources_t res,
   auto queries_mds            = cuvs::core::from_dlpack<queries_mdspan_type>(queries_tensor);
   auto neighbors_mds          = cuvs::core::from_dlpack<neighbors_mdspan_type>(neighbors_tensor);
   auto distances_mds          = cuvs::core::from_dlpack<distances_mdspan_type>(distances_tensor);
-  cuvs::neighbors::cagra::search(
-    *res_ptr, search_params, *index_ptr, queries_mds, neighbors_mds, distances_mds);
+  if (filter.type == NO_FILTER) {
+    cuvs::neighbors::cagra::search(
+      *res_ptr, search_params, *index_ptr, queries_mds, neighbors_mds, distances_mds);
+  } else if (filter.type == BITSET) {
+    using filter_mdspan_type    = raft::device_vector_view<std::uint32_t, int64_t, raft::row_major>;
+    auto removed_indices_tensor = reinterpret_cast<DLManagedTensor*>(filter.addr);
+    auto removed_indices = cuvs::core::from_dlpack<filter_mdspan_type>(removed_indices_tensor);
+    cuvs::core::bitset_view<std::uint32_t, int64_t> removed_indices_bitset(
+      removed_indices, index_ptr->dataset().extent(0));
+    auto bitset_filter_obj = cuvs::neighbors::filtering::bitset_filter(removed_indices_bitset);
+    cuvs::neighbors::cagra::search(*res_ptr,
+                                   search_params,
+                                   *index_ptr,
+                                   queries_mds,
+                                   neighbors_mds,
+                                   distances_mds,
+                                   bitset_filter_obj);
+  } else {
+    RAFT_FAIL("Unsupported filter type: BITMAP");
+  }
 }
 
 template <typename T>
@@ -209,12 +266,37 @@ extern "C" cuvsError_t cuvsCagraBuild(cuvsResources_t res,
   });
 }
 
+extern "C" cuvsError_t cuvsCagraExtend(cuvsResources_t res,
+                                       cuvsCagraExtendParams_t params,
+                                       DLManagedTensor* additional_dataset_tensor,
+                                       cuvsCagraIndex_t index_c_ptr,
+                                       DLManagedTensor* return_dataset_tensor)
+{
+  return cuvs::core::translate_exceptions([=] {
+    auto dataset = additional_dataset_tensor->dl_tensor;
+    auto index   = *index_c_ptr;
+
+    if ((dataset.dtype.code == kDLFloat) && (dataset.dtype.bits == 32)) {
+      _extend<float>(res, *params, index, additional_dataset_tensor, return_dataset_tensor);
+    } else if (dataset.dtype.code == kDLInt && dataset.dtype.bits == 8) {
+      _extend<int8_t>(res, *params, index, additional_dataset_tensor, return_dataset_tensor);
+    } else if (dataset.dtype.code == kDLUInt && dataset.dtype.bits == 8) {
+      _extend<uint8_t>(res, *params, index, additional_dataset_tensor, return_dataset_tensor);
+    } else {
+      RAFT_FAIL("Unsupported dataset DLtensor dtype: %d and bits: %d",
+                dataset.dtype.code,
+                dataset.dtype.bits);
+    }
+  });
+}
+
 extern "C" cuvsError_t cuvsCagraSearch(cuvsResources_t res,
                                        cuvsCagraSearchParams_t params,
                                        cuvsCagraIndex_t index_c_ptr,
                                        DLManagedTensor* queries_tensor,
                                        DLManagedTensor* neighbors_tensor,
-                                       DLManagedTensor* distances_tensor)
+                                       DLManagedTensor* distances_tensor,
+                                       cuvsFilter filter)
 {
   return cuvs::core::translate_exceptions([=] {
     auto queries   = queries_tensor->dl_tensor;
@@ -237,11 +319,14 @@ extern "C" cuvsError_t cuvsCagraSearch(cuvsResources_t res,
     RAFT_EXPECTS(queries.dtype.code == index.dtype.code, "type mismatch between index and queries");
 
     if (queries.dtype.code == kDLFloat && queries.dtype.bits == 32) {
-      _search<float>(res, *params, index, queries_tensor, neighbors_tensor, distances_tensor);
+      _search<float>(
+        res, *params, index, queries_tensor, neighbors_tensor, distances_tensor, filter);
     } else if (queries.dtype.code == kDLInt && queries.dtype.bits == 8) {
-      _search<int8_t>(res, *params, index, queries_tensor, neighbors_tensor, distances_tensor);
+      _search<int8_t>(
+        res, *params, index, queries_tensor, neighbors_tensor, distances_tensor, filter);
     } else if (queries.dtype.code == kDLUInt && queries.dtype.bits == 8) {
-      _search<uint8_t>(res, *params, index, queries_tensor, neighbors_tensor, distances_tensor);
+      _search<uint8_t>(
+        res, *params, index, queries_tensor, neighbors_tensor, distances_tensor, filter);
     } else {
       RAFT_FAIL("Unsupported queries DLtensor dtype: %d and bits: %d",
                 queries.dtype.code,
@@ -285,6 +370,17 @@ extern "C" cuvsError_t cuvsCagraCompressionParamsDestroy(cuvsCagraCompressionPar
   return cuvs::core::translate_exceptions([=] { delete params; });
 }
 
+extern "C" cuvsError_t cuvsCagraExtendParamsCreate(cuvsCagraExtendParams_t* params)
+{
+  return cuvs::core::translate_exceptions(
+    [=] { *params = new cuvsCagraExtendParams{.max_chunk_size = 0}; });
+}
+
+extern "C" cuvsError_t cuvsCagraExtendParamsDestroy(cuvsCagraExtendParams_t params)
+{
+  return cuvs::core::translate_exceptions([=] { delete params; });
+}
+
 extern "C" cuvsError_t cuvsCagraSearchParamsCreate(cuvsCagraSearchParams_t* params)
 {
   return cuvs::core::translate_exceptions([=] {
diff --git a/cpp/src/neighbors/cagra_optimize.cu b/cpp/src/neighbors/cagra_optimize.cu
index cba66a5e9..436d5d321 100644
--- a/cpp/src/neighbors/cagra_optimize.cu
+++ b/cpp/src/neighbors/cagra_optimize.cu
@@ -38,4 +38,4 @@ void optimize(raft::resources const& handle,
                                raft::memory_type::host>>(handle, knn_graph, new_graph);
 }
 
-}  // namespace cuvs::neighbors::cagra
\ No newline at end of file
+}  // namespace cuvs::neighbors::cagra
diff --git a/cpp/src/neighbors/cagra_serialize.cuh b/cpp/src/neighbors/cagra_serialize.cuh
index e193c0630..1b153b2ce 100644
--- a/cpp/src/neighbors/cagra_serialize.cuh
+++ b/cpp/src/neighbors/cagra_serialize.cuh
@@ -20,51 +20,56 @@
 
 namespace cuvs::neighbors::cagra {
 
-#define CUVS_INST_CAGRA_SERIALIZE(DTYPE)                                                      \
-  void serialize(raft::resources const& handle,                                               \
-                 const std::string& filename,                                                 \
-                 const cuvs::neighbors::cagra::index<DTYPE, uint32_t>& index,                 \
-                 bool include_dataset)                                                        \
-  {                                                                                           \
-    cuvs::neighbors::cagra::detail::serialize<DTYPE, uint32_t>(                               \
-      handle, filename, index, include_dataset);                                              \
-  };                                                                                          \
-                                                                                              \
-  void deserialize(raft::resources const& handle,                                             \
-                   const std::string& filename,                                               \
-                   cuvs::neighbors::cagra::index<DTYPE, uint32_t>* index)                     \
-  {                                                                                           \
-    cuvs::neighbors::cagra::detail::deserialize<DTYPE, uint32_t>(handle, filename, index);    \
-  };                                                                                          \
-  void serialize(raft::resources const& handle,                                               \
-                 std::ostream& os,                                                            \
-                 const cuvs::neighbors::cagra::index<DTYPE, uint32_t>& index,                 \
-                 bool include_dataset)                                                        \
-  {                                                                                           \
-    cuvs::neighbors::cagra::detail::serialize<DTYPE, uint32_t>(                               \
-      handle, os, index, include_dataset);                                                    \
-  }                                                                                           \
-                                                                                              \
-  void deserialize(raft::resources const& handle,                                             \
-                   std::istream& is,                                                          \
-                   cuvs::neighbors::cagra::index<DTYPE, uint32_t>* index)                     \
-  {                                                                                           \
-    cuvs::neighbors::cagra::detail::deserialize<DTYPE, uint32_t>(handle, is, index);          \
-  }                                                                                           \
-                                                                                              \
-  void serialize_to_hnswlib(raft::resources const& handle,                                    \
-                            std::ostream& os,                                                 \
-                            const cuvs::neighbors::cagra::index<DTYPE, uint32_t>& index)      \
-  {                                                                                           \
-    cuvs::neighbors::cagra::detail::serialize_to_hnswlib<DTYPE, uint32_t>(handle, os, index); \
-  }                                                                                           \
-                                                                                              \
-  void serialize_to_hnswlib(raft::resources const& handle,                                    \
-                            const std::string& filename,                                      \
-                            const cuvs::neighbors::cagra::index<DTYPE, uint32_t>& index)      \
-  {                                                                                           \
-    cuvs::neighbors::cagra::detail::serialize_to_hnswlib<DTYPE, uint32_t>(                    \
-      handle, filename, index);                                                               \
+#define CUVS_INST_CAGRA_SERIALIZE(DTYPE)                                                   \
+  void serialize(raft::resources const& handle,                                            \
+                 const std::string& filename,                                              \
+                 const cuvs::neighbors::cagra::index<DTYPE, uint32_t>& index,              \
+                 bool include_dataset)                                                     \
+  {                                                                                        \
+    cuvs::neighbors::cagra::detail::serialize<DTYPE, uint32_t>(                            \
+      handle, filename, index, include_dataset);                                           \
+  };                                                                                       \
+                                                                                           \
+  void deserialize(raft::resources const& handle,                                          \
+                   const std::string& filename,                                            \
+                   cuvs::neighbors::cagra::index<DTYPE, uint32_t>* index)                  \
+  {                                                                                        \
+    cuvs::neighbors::cagra::detail::deserialize<DTYPE, uint32_t>(handle, filename, index); \
+  };                                                                                       \
+  void serialize(raft::resources const& handle,                                            \
+                 std::ostream& os,                                                         \
+                 const cuvs::neighbors::cagra::index<DTYPE, uint32_t>& index,              \
+                 bool include_dataset)                                                     \
+  {                                                                                        \
+    cuvs::neighbors::cagra::detail::serialize<DTYPE, uint32_t>(                            \
+      handle, os, index, include_dataset);                                                 \
+  }                                                                                        \
+                                                                                           \
+  void deserialize(raft::resources const& handle,                                          \
+                   std::istream& is,                                                       \
+                   cuvs::neighbors::cagra::index<DTYPE, uint32_t>* index)                  \
+  {                                                                                        \
+    cuvs::neighbors::cagra::detail::deserialize<DTYPE, uint32_t>(handle, is, index);       \
+  }                                                                                        \
+                                                                                           \
+  void serialize_to_hnswlib(                                                               \
+    raft::resources const& handle,                                                         \
+    std::ostream& os,                                                                      \
+    const cuvs::neighbors::cagra::index<DTYPE, uint32_t>& index,                           \
+    std::optional<raft::host_matrix_view<const DTYPE, int64_t, raft::row_major>> dataset)  \
+  {                                                                                        \
+    cuvs::neighbors::cagra::detail::serialize_to_hnswlib<DTYPE, uint32_t>(                 \
+      handle, os, index, dataset);                                                         \
+  }                                                                                        \
+                                                                                           \
+  void serialize_to_hnswlib(                                                               \
+    raft::resources const& handle,                                                         \
+    const std::string& filename,                                                           \
+    const cuvs::neighbors::cagra::index<DTYPE, uint32_t>& index,                           \
+    std::optional<raft::host_matrix_view<const DTYPE, int64_t, raft::row_major>> dataset)  \
+  {                                                                                        \
+    cuvs::neighbors::cagra::detail::serialize_to_hnswlib<DTYPE, uint32_t>(                 \
+      handle, filename, index, dataset);                                                   \
   }
 
 }  // namespace cuvs::neighbors::cagra
diff --git a/cpp/src/neighbors/detail/cagra/add_nodes.cuh b/cpp/src/neighbors/detail/cagra/add_nodes.cuh
index 453928992..913094e2a 100644
--- a/cpp/src/neighbors/detail/cagra/add_nodes.cuh
+++ b/cpp/src/neighbors/detail/cagra/add_nodes.cuh
@@ -137,6 +137,31 @@ void add_node_core(
                raft::resource::get_cuda_stream(handle));
     raft::resource::sync_stream(handle);
 
+    // Check search results
+    constexpr int max_warnings = 3;
+    int num_warnings           = 0;
+    for (std::size_t vec_i = 0; vec_i < batch.size(); vec_i++) {
+      std::uint32_t invalid_edges = 0;
+      for (std::uint32_t i = 0; i < base_degree; i++) {
+        if (host_neighbor_indices(vec_i, i) >= old_size) { invalid_edges++; }
+      }
+      if (invalid_edges > 0) {
+        if (num_warnings < max_warnings) {
+          RAFT_LOG_WARN(
+            "Invalid edges found in search results "
+            "(vec_i:%lu, invalid_edges:%lu, degree:%lu, base_degree:%lu)",
+            (uint64_t)vec_i,
+            (uint64_t)invalid_edges,
+            (uint64_t)degree,
+            (uint64_t)base_degree);
+        }
+        num_warnings += 1;
+      }
+    }
+    if (num_warnings > max_warnings) {
+      RAFT_LOG_WARN("The number of queries that contain invalid search results: %d", num_warnings);
+    }
+
     // Step 2: rank-based reordering
 #pragma omp parallel
     {
@@ -147,9 +172,16 @@ void add_node_core(
         for (std::uint32_t i = 0; i < base_degree; i++) {
           std::uint32_t detourable_node_count = 0;
           const auto a_id                     = host_neighbor_indices(vec_i, i);
+          if (a_id >= idx.size()) {
+            // If the node ID is not valid, the number of detours is increased
+            // to a value greater than the maximum, so that the edge to that
+            // node is not selected as much as possible.
+            detourable_node_count_list[i] = std::make_pair(a_id, base_degree + 1);
+            continue;
+          }
           for (std::uint32_t j = 0; j < i; j++) {
             const auto b0_id = host_neighbor_indices(vec_i, j);
-            assert(b0_id < idx.size());
+            if (b0_id >= idx.size()) { continue; }
             for (std::uint32_t k = 0; k < degree; k++) {
               const auto b1_id = updated_graph(b0_id, k);
               if (a_id == b1_id) {
@@ -160,6 +192,7 @@ void add_node_core(
           }
           detourable_node_count_list[i] = std::make_pair(a_id, detourable_node_count);
         }
+
         std::sort(detourable_node_count_list.begin(),
                   detourable_node_count_list.end(),
                   [&](const std::pair<IdxT, std::size_t> a, const std::pair<IdxT, std::size_t> b) {
@@ -181,13 +214,18 @@ void add_node_core(
       const auto target_new_node_id = old_size + batch.offset() + vec_i;
       for (std::size_t i = 0; i < num_rev_edges; i++) {
         const auto target_node_id = updated_graph(old_size + batch.offset() + vec_i, i);
-
+        if (target_node_id >= new_size) {
+          RAFT_FAIL("Invalid node ID found in updated_graph (%u)\n", target_node_id);
+        }
         IdxT replace_id                        = new_size;
         IdxT replace_id_j                      = 0;
         std::size_t replace_num_incoming_edges = 0;
         for (std::int32_t j = degree - 1; j >= static_cast<std::int32_t>(rev_edge_search_range);
              j--) {
-          const auto neighbor_id               = updated_graph(target_node_id, j);
+          const auto neighbor_id = updated_graph(target_node_id, j);
+          if (neighbor_id >= new_size) {
+            RAFT_FAIL("Invalid node ID found in updated_graph (%u)\n", neighbor_id);
+          }
           const std::size_t num_incoming_edges = host_num_incoming_edges(neighbor_id);
           if (num_incoming_edges > replace_num_incoming_edges) {
             // Check duplication
@@ -206,10 +244,6 @@ void add_node_core(
             replace_id_j               = j;
           }
         }
-        if (replace_id >= new_size) {
-          std::fprintf(stderr, "Invalid rev edge index (%u)\n", replace_id);
-          return;
-        }
         updated_graph(target_node_id, replace_id_j) = target_new_node_id;
         rev_edges[i]                                = replace_id;
       }
@@ -221,13 +255,15 @@ void add_node_core(
       const auto rank_based_list_ptr =
         updated_graph.data_handle() + (old_size + batch.offset() + vec_i) * degree;
       const auto rev_edges_return_list_ptr = rev_edges.data();
-      while (num_add < degree) {
+      while ((num_add < degree) &&
+             ((rank_base_i < degree) || (rev_edges_return_i < num_rev_edges))) {
         const auto node_list_ptr =
           interleave_switch == 0 ? rank_based_list_ptr : rev_edges_return_list_ptr;
         auto& node_list_index          = interleave_switch == 0 ? rank_base_i : rev_edges_return_i;
         const auto max_node_list_index = interleave_switch == 0 ? degree : num_rev_edges;
         for (; node_list_index < max_node_list_index; node_list_index++) {
           const auto candidate = node_list_ptr[node_list_index];
+          if (candidate >= new_size) { continue; }
           // Check duplication
           bool dup = false;
           for (std::uint32_t j = 0; j < num_add; j++) {
@@ -244,6 +280,12 @@ void add_node_core(
         }
         interleave_switch = 1 - interleave_switch;
       }
+      if (num_add < degree) {
+        RAFT_FAIL("Number of edges is not enough (target_new_node_id:%lu, num_add:%lu, degree:%lu)",
+                  (uint64_t)target_new_node_id,
+                  (uint64_t)num_add,
+                  (uint64_t)degree);
+      }
       for (std::uint32_t i = 0; i < degree; i++) {
         updated_graph(target_new_node_id, i) = temp[i];
       }
@@ -259,7 +301,9 @@ void add_graph_nodes(
   raft::host_matrix_view<IdxT, std::int64_t> updated_graph_view,
   const cagra::extend_params& params)
 {
-  assert(input_updated_dataset_view.extent(0) >= index.size());
+  if (input_updated_dataset_view.extent(0) < index.size()) {
+    RAFT_FAIL("Updated dataset must be not smaller than the previous index state.");
+  }
 
   const std::size_t initial_dataset_size = index.size();
   const std::size_t new_dataset_size     = input_updated_dataset_view.extent(0);
diff --git a/cpp/src/neighbors/detail/cagra/cagra_build.cpp b/cpp/src/neighbors/detail/cagra/cagra_build.cpp
index 574a02097..490dc0f30 100644
--- a/cpp/src/neighbors/detail/cagra/cagra_build.cpp
+++ b/cpp/src/neighbors/detail/cagra/cagra_build.cpp
@@ -32,4 +32,4 @@ ivf_pq_params::ivf_pq_params(raft::matrix_extent<int64_t> dataset_extents,
 
   refinement_rate = 2;
 }
-}  // namespace cuvs::neighbors::cagra::graph_build_params
\ No newline at end of file
+}  // namespace cuvs::neighbors::cagra::graph_build_params
diff --git a/cpp/src/neighbors/detail/cagra/cagra_search.cuh b/cpp/src/neighbors/detail/cagra/cagra_search.cuh
index 5778d85a6..b4f701819 100644
--- a/cpp/src/neighbors/detail/cagra/cagra_search.cuh
+++ b/cpp/src/neighbors/detail/cagra/cagra_search.cuh
@@ -75,7 +75,7 @@ void search_main_core(raft::resources const& res,
   using CagraSampleFilterT_s = typename CagraSampleFilterT_Selector<CagraSampleFilterT>::type;
   std::unique_ptr<search_plan_impl<DataT, IndexT, DistanceT, CagraSampleFilterT_s>> plan =
     factory<DataT, IndexT, DistanceT, CagraSampleFilterT_s>::create(
-      res, params, dataset_desc, queries.extent(1), graph.extent(1), topk);
+      res, params, dataset_desc, queries.extent(1), graph.extent(0), graph.extent(1), topk);
 
   plan->check(topk);
 
diff --git a/cpp/src/neighbors/detail/cagra/cagra_serialize.cuh b/cpp/src/neighbors/detail/cagra/cagra_serialize.cuh
index c83da7bb1..4bd761dc6 100644
--- a/cpp/src/neighbors/detail/cagra/cagra_serialize.cuh
+++ b/cpp/src/neighbors/detail/cagra/cagra_serialize.cuh
@@ -30,6 +30,7 @@
 #include <cstddef>
 #include <cstdint>
 #include <fstream>
+#include <optional>
 #include <type_traits>
 
 namespace cuvs::neighbors::cagra::detail {
@@ -96,16 +97,19 @@ void serialize(raft::resources const& res,
 }
 
 template <typename T, typename IdxT>
-void serialize_to_hnswlib(raft::resources const& res,
-                          std::ostream& os,
-                          const cuvs::neighbors::cagra::index<T, IdxT>& index_)
+void serialize_to_hnswlib(
+  raft::resources const& res,
+  std::ostream& os,
+  const cuvs::neighbors::cagra::index<T, IdxT>& index_,
+  std::optional<raft::host_matrix_view<const T, int64_t, raft::row_major>> dataset)
 {
   // static_assert(std::is_same_v<IdxT, int> or std::is_same_v<IdxT, uint32_t>,
   //               "An hnswlib index can only be trained with int32 or uint32 IdxT");
+  int dim = (dataset) ? dataset->extent(1) : index_.dim();
   raft::common::nvtx::range<cuvs::common::nvtx::domain::cuvs> fun_scope("cagra::serialize");
   RAFT_LOG_DEBUG("Saving CAGRA index to hnswlib format, size %zu, dim %u",
                  static_cast<size_t>(index_.size()),
-                 index_.dim());
+                 dim);
 
   // offset_level_0
   std::size_t offset_level_0 = 0;
@@ -119,8 +123,8 @@ void serialize_to_hnswlib(raft::resources const& res,
   // Example:M: 16, dim = 128, data_t = float, index_t = uint32_t, list_size_type = uint32_t,
   // labeltype: size_t size_data_per_element_ = M * 2 * sizeof(index_t) + sizeof(list_size_type) +
   // dim * sizeof(T) + sizeof(labeltype)
-  auto size_data_per_element = static_cast<std::size_t>(index_.graph_degree() * sizeof(IdxT) + 4 +
-                                                        index_.dim() * sizeof(T) + 8);
+  auto size_data_per_element =
+    static_cast<std::size_t>(index_.graph_degree() * sizeof(IdxT) + 4 + dim * sizeof(T) + 8);
   os.write(reinterpret_cast<char*>(&size_data_per_element), sizeof(std::size_t));
   // label_offset
   std::size_t label_offset = size_data_per_element - 8;
@@ -150,19 +154,29 @@ void serialize_to_hnswlib(raft::resources const& res,
   std::size_t efConstruction = 500;
   os.write(reinterpret_cast<char*>(&efConstruction), sizeof(std::size_t));
 
-  auto dataset = index_.dataset();
   // Remove padding before saving the dataset
-  auto host_dataset = raft::make_host_matrix<T, int64_t>(dataset.extent(0), dataset.extent(1));
-  RAFT_CUDA_TRY(cudaMemcpy2DAsync(host_dataset.data_handle(),
-                                  sizeof(T) * host_dataset.extent(1),
-                                  dataset.data_handle(),
-                                  sizeof(T) * dataset.stride(0),
-                                  sizeof(T) * host_dataset.extent(1),
-                                  dataset.extent(0),
-                                  cudaMemcpyDefault,
-                                  raft::resource::get_cuda_stream(res)));
-  raft::resource::sync_stream(res);
-
+  raft::host_matrix<T, int64_t> host_dataset = raft::make_host_matrix<T, int64_t>(0, 0);
+  raft::host_matrix_view<const T, int64_t> host_dataset_view;
+  if (dataset) {
+    host_dataset_view = *dataset;
+  } else {
+    auto dataset = index_.dataset();
+    RAFT_EXPECTS(dataset.size() > 0,
+                 "Invalid CAGRA dataset of size 0 during serialization, shape %zux%zu",
+                 static_cast<size_t>(dataset.extent(0)),
+                 static_cast<size_t>(dataset.extent(1)));
+    host_dataset = raft::make_host_matrix<T, int64_t>(dataset.extent(0), dataset.extent(1));
+    RAFT_CUDA_TRY(cudaMemcpy2DAsync(host_dataset.data_handle(),
+                                    sizeof(T) * host_dataset.extent(1),
+                                    dataset.data_handle(),
+                                    sizeof(T) * dataset.stride(0),
+                                    sizeof(T) * host_dataset.extent(1),
+                                    dataset.extent(0),
+                                    cudaMemcpyDefault,
+                                    raft::resource::get_cuda_stream(res)));
+    raft::resource::sync_stream(res);
+    host_dataset_view = raft::make_const_mdspan(host_dataset.view());
+  }
   auto graph = index_.graph();
   auto host_graph =
     raft::make_host_matrix<IdxT, int64_t, raft::row_major>(graph.extent(0), graph.extent(1));
@@ -172,23 +186,48 @@ void serialize_to_hnswlib(raft::resources const& res,
              raft::resource::get_cuda_stream(res));
   raft::resource::sync_stream(res);
 
+  size_t d_report_offset    = index_.size() / 10;  // Report progress in 10% steps.
+  size_t next_report_offset = d_report_offset;
+  const auto start_clock    = std::chrono::system_clock::now();
   // Write one dataset and graph row at a time
+  RAFT_EXPECTS(host_graph.stride(1) == 1, "serialize_to_hnswlib expects row_major graph");
+  RAFT_EXPECTS(host_dataset_view.stride(1) == 1, "serialize_to_hnswlib expects row_major dataset");
+
+  size_t bytes_written = 0;
+  float GiB            = 1 << 30;
   for (std::size_t i = 0; i < index_.size(); i++) {
     auto graph_degree = static_cast<int>(index_.graph_degree());
     os.write(reinterpret_cast<char*>(&graph_degree), sizeof(int));
 
-    for (std::size_t j = 0; j < index_.graph_degree(); ++j) {
-      auto graph_elem = host_graph(i, j);
-      os.write(reinterpret_cast<char*>(&graph_elem), sizeof(IdxT));
-    }
-
-    auto data_row = host_dataset.data_handle() + (index_.dim() * i);
-    for (std::size_t j = 0; j < index_.dim(); ++j) {
-      auto data_elem = static_cast<T>(host_dataset(i, j));
-      os.write(reinterpret_cast<char*>(&data_elem), sizeof(T));
-    }
+    IdxT* graph_row = &host_graph(i, 0);
+    os.write(reinterpret_cast<char*>(graph_row), sizeof(IdxT) * index_.graph_degree());
 
+    const T* data_row = &host_dataset_view(i, 0);
+    os.write(reinterpret_cast<const char*>(data_row), sizeof(T) * dim);
     os.write(reinterpret_cast<char*>(&i), sizeof(std::size_t));
+
+    bytes_written +=
+      dim * sizeof(T) + index_.graph_degree() * sizeof(IdxT) + sizeof(int) + sizeof(size_t);
+    const auto end_clock = std::chrono::system_clock::now();
+    if (!os.good()) { RAFT_FAIL("Error writing HNSW file, row %zu", i); }
+    if (i > next_report_offset) {
+      next_report_offset += d_report_offset;
+      const auto time =
+        std::chrono::duration_cast<std::chrono::microseconds>(end_clock - start_clock).count() *
+        1e-6;
+      float throughput      = bytes_written / GiB / time;
+      float rows_throughput = i / time;
+      float ETA             = (index_.size() - i) / rows_throughput;
+      RAFT_LOG_DEBUG(
+        "# Writing rows %12lu / %12lu (%3.2f %%), %3.2f GiB/sec, ETA %d:%3.1f, written %3.2f GiB\r",
+        i,
+        index_.size(),
+        i / static_cast<double>(index_.size()) * 100,
+        throughput,
+        int(ETA / 60),
+        std::fmod(ETA, 60.0f),
+        bytes_written / GiB);
+    }
   }
 
   for (std::size_t i = 0; i < index_.size(); i++) {
@@ -199,14 +238,16 @@ void serialize_to_hnswlib(raft::resources const& res,
 }
 
 template <typename T, typename IdxT>
-void serialize_to_hnswlib(raft::resources const& res,
-                          const std::string& filename,
-                          const cuvs::neighbors::cagra::index<T, IdxT>& index_)
+void serialize_to_hnswlib(
+  raft::resources const& res,
+  const std::string& filename,
+  const cuvs::neighbors::cagra::index<T, IdxT>& index_,
+  std::optional<raft::host_matrix_view<const T, int64_t, raft::row_major>> dataset)
 {
   std::ofstream of(filename, std::ios::out | std::ios::binary);
   if (!of) { RAFT_FAIL("Cannot open file %s", filename.c_str()); }
 
-  detail::serialize_to_hnswlib<T, IdxT>(res, of, index_);
+  detail::serialize_to_hnswlib<T, IdxT>(res, of, index_, dataset);
 
   of.close();
   if (!of) { RAFT_FAIL("Error writing output %s", filename.c_str()); }
diff --git a/cpp/src/neighbors/detail/cagra/device_common.hpp b/cpp/src/neighbors/detail/cagra/device_common.hpp
index 7ec3d4d9e..e5886582d 100644
--- a/cpp/src/neighbors/detail/cagra/device_common.hpp
+++ b/cpp/src/neighbors/detail/cagra/device_common.hpp
@@ -109,7 +109,9 @@ RAFT_DEVICE_INLINE_FUNCTION void compute_distance_to_random_nodes(
   const IndexT* __restrict__ seed_ptr,  // [num_seeds]
   const uint32_t num_seeds,
   IndexT* __restrict__ visited_hash_ptr,
-  const uint32_t hash_bitlen,
+  const uint32_t visited_hash_bitlen,
+  IndexT* __restrict__ traversed_hash_ptr,
+  const uint32_t traversed_hash_bitlen,
   const uint32_t block_id   = 0,
   const uint32_t num_blocks = 1)
 {
@@ -145,19 +147,29 @@ RAFT_DEVICE_INLINE_FUNCTION void compute_distance_to_random_nodes(
 
     const unsigned lane_id = threadIdx.x & ((1u << team_size_bits) - 1u);
     if (valid_i && lane_id == 0) {
-      if (best_index_team_local != raft::upper_bound<IndexT>() &&
-          hashmap::insert(visited_hash_ptr, hash_bitlen, best_index_team_local)) {
-        result_distances_ptr[i] = best_norm2_team_local;
-        result_indices_ptr[i]   = best_index_team_local;
-      } else {
-        result_distances_ptr[i] = raft::upper_bound<DistanceT>();
-        result_indices_ptr[i]   = raft::upper_bound<IndexT>();
+      if (best_index_team_local != raft::upper_bound<IndexT>()) {
+        if (hashmap::insert(visited_hash_ptr, visited_hash_bitlen, best_index_team_local) == 0) {
+          // Deactivate this entry as insertion into visited hash table has failed.
+          best_norm2_team_local = raft::upper_bound<DistanceT>();
+          best_index_team_local = raft::upper_bound<IndexT>();
+        } else if ((traversed_hash_ptr != nullptr) &&
+                   hashmap::search<IndexT, 1>(
+                     traversed_hash_ptr, traversed_hash_bitlen, best_index_team_local)) {
+          // Deactivate this entry as it has been already used by others.
+          best_norm2_team_local = raft::upper_bound<DistanceT>();
+          best_index_team_local = raft::upper_bound<IndexT>();
+        }
       }
+      result_distances_ptr[i] = best_norm2_team_local;
+      result_indices_ptr[i]   = best_index_team_local;
     }
   }
 }
 
-template <typename IndexT, typename DistanceT, typename DATASET_DESCRIPTOR_T>
+template <typename IndexT,
+          typename DistanceT,
+          typename DATASET_DESCRIPTOR_T,
+          int STATIC_RESULT_POSITION = 1>
 RAFT_DEVICE_INLINE_FUNCTION void compute_distance_to_child_nodes(
   IndexT* __restrict__ result_child_indices_ptr,
   DistanceT* __restrict__ result_child_distances_ptr,
@@ -168,13 +180,17 @@ RAFT_DEVICE_INLINE_FUNCTION void compute_distance_to_child_nodes(
   const uint32_t knn_k,
   // hashmap
   IndexT* __restrict__ visited_hashmap_ptr,
-  const uint32_t hash_bitlen,
+  const uint32_t visited_hash_bitlen,
+  IndexT* __restrict__ traversed_hashmap_ptr,
+  const uint32_t traversed_hash_bitlen,
   const IndexT* __restrict__ parent_indices,
   const IndexT* __restrict__ internal_topk_list,
-  const uint32_t search_width)
+  const uint32_t search_width,
+  int* __restrict__ result_position = nullptr,
+  const int max_result_position     = 0)
 {
   constexpr IndexT index_msb_1_mask = utils::gen_index_msb_1_mask<IndexT>::value;
-  constexpr IndexT invalid_index    = raft::upper_bound<IndexT>();
+  constexpr IndexT invalid_index    = ~static_cast<IndexT>(0);
 
   // Read child indices of parents from knn graph and check if the distance
   // computaiton is necessary.
@@ -186,11 +202,22 @@ RAFT_DEVICE_INLINE_FUNCTION void compute_distance_to_child_nodes(
       child_id             = knn_graph[(i % knn_k) + (static_cast<int64_t>(knn_k) * parent_id)];
     }
     if (child_id != invalid_index) {
-      if (hashmap::insert(visited_hashmap_ptr, hash_bitlen, child_id) == 0) {
+      if (hashmap::insert(visited_hashmap_ptr, visited_hash_bitlen, child_id) == 0) {
+        // Deactivate this entry as insertion into visited hash table has failed.
+        child_id = invalid_index;
+      } else if ((traversed_hashmap_ptr != nullptr) &&
+                 hashmap::search<IndexT, 1>(
+                   traversed_hashmap_ptr, traversed_hash_bitlen, child_id)) {
+        // Deactivate this entry as this has been already used by others.
         child_id = invalid_index;
       }
     }
-    result_child_indices_ptr[i] = child_id;
+    if (STATIC_RESULT_POSITION) {
+      result_child_indices_ptr[i] = child_id;
+    } else if (child_id != invalid_index) {
+      int j                       = atomicSub(result_position, 1) - 1;
+      result_child_indices_ptr[j] = child_id;
+    }
   }
   __syncthreads();
 
@@ -201,9 +228,11 @@ RAFT_DEVICE_INLINE_FUNCTION void compute_distance_to_child_nodes(
   const auto compute_distance = dataset_desc.compute_distance_impl;
   const auto args             = dataset_desc.args.load();
   const bool lead_lane        = (threadIdx.x & ((1u << team_size_bits) - 1u)) == 0;
+  const uint32_t ofst         = STATIC_RESULT_POSITION ? 0 : result_position[0];
   for (uint32_t i = threadIdx.x >> team_size_bits; i < max_i; i += blockDim.x >> team_size_bits) {
-    const bool valid_i  = i < num_k;
-    const auto child_id = valid_i ? result_child_indices_ptr[i] : invalid_index;
+    const auto j        = i + ofst;
+    const bool valid_i  = STATIC_RESULT_POSITION ? (j < num_k) : (j < max_result_position);
+    const auto child_id = valid_i ? result_child_indices_ptr[j] : invalid_index;
 
     // We should be calling `dataset_desc.compute_distance(..)` here as follows:
     // > const auto child_dist = dataset_desc.compute_distance(child_id, child_id != invalid_index);
@@ -213,9 +242,10 @@ RAFT_DEVICE_INLINE_FUNCTION void compute_distance_to_child_nodes(
       (child_id != invalid_index) ? compute_distance(args, child_id)
                                   : (lead_lane ? raft::upper_bound<DistanceT>() : 0),
       team_size_bits);
+    __syncwarp();
 
     // Store the distance
-    if (valid_i && lead_lane) { result_child_distances_ptr[i] = child_dist; }
+    if (valid_i && lead_lane) { result_child_distances_ptr[j] = child_dist; }
   }
 }
 
diff --git a/cpp/src/neighbors/detail/cagra/factory.cuh b/cpp/src/neighbors/detail/cagra/factory.cuh
index e6e7ff64f..d2ae5c55b 100644
--- a/cpp/src/neighbors/detail/cagra/factory.cuh
+++ b/cpp/src/neighbors/detail/cagra/factory.cuh
@@ -40,10 +40,11 @@ class factory {
     search_params const& params,
     const dataset_descriptor_host<DataT, IndexT, DistanceT>& dataset_desc,
     int64_t dim,
+    int64_t dataset_size,
     int64_t graph_degree,
     uint32_t topk)
   {
-    search_plan_impl_base plan(params, dim, graph_degree, topk);
+    search_plan_impl_base plan(params, dim, dataset_size, graph_degree, topk);
     return dispatch_kernel(res, plan, dataset_desc);
   }
 
@@ -56,15 +57,15 @@ class factory {
     if (plan.algo == search_algo::SINGLE_CTA) {
       return std::make_unique<
         single_cta_search::search<DataT, IndexT, DistanceT, CagraSampleFilterT>>(
-        res, plan, dataset_desc, plan.dim, plan.graph_degree, plan.topk);
+        res, plan, dataset_desc, plan.dim, plan.dataset_size, plan.graph_degree, plan.topk);
     } else if (plan.algo == search_algo::MULTI_CTA) {
       return std::make_unique<
         multi_cta_search::search<DataT, IndexT, DistanceT, CagraSampleFilterT>>(
-        res, plan, dataset_desc, plan.dim, plan.graph_degree, plan.topk);
+        res, plan, dataset_desc, plan.dim, plan.dataset_size, plan.graph_degree, plan.topk);
     } else {
       return std::make_unique<
         multi_kernel_search::search<DataT, IndexT, DistanceT, CagraSampleFilterT>>(
-        res, plan, dataset_desc, plan.dim, plan.graph_degree, plan.topk);
+        res, plan, dataset_desc, plan.dim, plan.dataset_size, plan.graph_degree, plan.topk);
     }
   }
 };
diff --git a/cpp/src/neighbors/detail/cagra/hashmap.hpp b/cpp/src/neighbors/detail/cagra/hashmap.hpp
index 2c62dda90..652e1db22 100644
--- a/cpp/src/neighbors/detail/cagra/hashmap.hpp
+++ b/cpp/src/neighbors/detail/cagra/hashmap.hpp
@@ -23,6 +23,8 @@
 
 #include <cstdint>
 
+#define HASHMAP_LINEAR_PROBING
+
 // #pragma GCC diagnostic push
 // #pragma GCC diagnostic ignored
 // #pragma GCC diagnostic pop
@@ -38,11 +40,11 @@ RAFT_DEVICE_INLINE_FUNCTION void init(IdxT* const table,
 {
   if (threadIdx.x < FIRST_TID) return;
   for (unsigned i = threadIdx.x - FIRST_TID; i < get_size(bitlen); i += blockDim.x - FIRST_TID) {
-    table[i] = utils::get_max_value<IdxT>();
+    table[i] = ~static_cast<IdxT>(0);
   }
 }
 
-template <class IdxT>
+template <class IdxT, unsigned SUPPORT_REMOVE = 0>
 RAFT_DEVICE_INLINE_FUNCTION uint32_t insert(IdxT* const table,
                                             const uint32_t bitlen,
                                             const IdxT key)
@@ -50,7 +52,7 @@ RAFT_DEVICE_INLINE_FUNCTION uint32_t insert(IdxT* const table,
   // Open addressing is used for collision resolution
   const uint32_t size     = get_size(bitlen);
   const uint32_t bit_mask = size - 1;
-#if 1
+#ifdef HASHMAP_LINEAR_PROBING
   // Linear probing
   IdxT index                = (key ^ (key >> bitlen)) & bit_mask;
   constexpr uint32_t stride = 1;
@@ -59,32 +61,89 @@ RAFT_DEVICE_INLINE_FUNCTION uint32_t insert(IdxT* const table,
   uint32_t index        = key & bit_mask;
   const uint32_t stride = (key >> bitlen) * 2 + 1;
 #endif
+  constexpr IdxT hashval_empty = ~static_cast<IdxT>(0);
+  const IdxT removed_key       = key | utils::gen_index_msb_1_mask<IdxT>::value;
   for (unsigned i = 0; i < size; i++) {
-    const IdxT old = atomicCAS(&table[index], ~static_cast<IdxT>(0), key);
-    if (old == ~static_cast<IdxT>(0)) {
+    const IdxT old = atomicCAS(&table[index], hashval_empty, key);
+    if (old == hashval_empty) {
       return 1;
     } else if (old == key) {
       return 0;
+    } else if (SUPPORT_REMOVE) {
+      // Checks if this key has been removed before.
+      const uint32_t old = atomicCAS(&table[index], removed_key, key);
+      if (old == removed_key) {
+        return 1;
+      } else if (old == key) {
+        return 0;
+      }
     }
     index = (index + stride) & bit_mask;
   }
   return 0;
 }
 
-template <unsigned TEAM_SIZE, class IdxT>
-RAFT_DEVICE_INLINE_FUNCTION uint32_t insert(IdxT* const table,
-                                            const uint32_t bitlen,
-                                            const IdxT key)
+template <class IdxT, unsigned SUPPORT_REMOVE = 0>
+RAFT_DEVICE_INLINE_FUNCTION uint32_t search(IdxT* table, const uint32_t bitlen, const IdxT key)
 {
-  IdxT ret = 0;
-  if (threadIdx.x % TEAM_SIZE == 0) { ret = insert(table, bitlen, key); }
-  for (unsigned offset = 1; offset < TEAM_SIZE; offset *= 2) {
-    ret |= __shfl_xor_sync(0xffffffff, ret, offset);
+  const uint32_t size     = get_size(bitlen);
+  const uint32_t bit_mask = size - 1;
+#ifdef HASHMAP_LINEAR_PROBING
+  // Linear probing
+  IdxT index                = (key ^ (key >> bitlen)) & bit_mask;
+  constexpr uint32_t stride = 1;
+#else
+  // Double hashing
+  IdxT index            = key & bit_mask;
+  const uint32_t stride = (key >> bitlen) * 2 + 1;
+#endif
+  constexpr IdxT hashval_empty = ~static_cast<IdxT>(0);
+  const IdxT removed_key       = key | utils::gen_index_msb_1_mask<IdxT>::value;
+  for (unsigned i = 0; i < size; i++) {
+    const IdxT val = table[index];
+    if (val == key) {
+      return 1;
+    } else if (val == hashval_empty) {
+      return 0;
+    } else if (SUPPORT_REMOVE) {
+      // Check if this key has been removed.
+      if (val == removed_key) { return 0; }
+    }
+    index = (index + stride) & bit_mask;
   }
-  return ret;
+  return 0;
 }
 
 template <class IdxT>
+RAFT_DEVICE_INLINE_FUNCTION uint32_t remove(IdxT* table, const uint32_t bitlen, const IdxT key)
+{
+  const uint32_t size     = get_size(bitlen);
+  const uint32_t bit_mask = size - 1;
+#ifdef HASHMAP_LINEAR_PROBING
+  // Linear probing
+  IdxT index                = (key ^ (key >> bitlen)) & bit_mask;
+  constexpr uint32_t stride = 1;
+#else
+  // Double hashing
+  IdxT index            = key & bit_mask;
+  const uint32_t stride = (key >> bitlen) * 2 + 1;
+#endif
+  constexpr IdxT hashval_empty = ~static_cast<IdxT>(0);
+  const IdxT removed_key       = key | utils::gen_index_msb_1_mask<IdxT>::value;
+  for (unsigned i = 0; i < size; i++) {
+    // To remove a key, set the MSB to 1.
+    const uint32_t old = atomicCAS(&table[index], key, removed_key);
+    if (old == key) {
+      return 1;
+    } else if (old == hashval_empty) {
+      return 0;
+    }
+    index = (index + stride) & bit_mask;
+  }
+  return 0;
+}
+
+template <class IdxT, unsigned SUPPORT_REMOVE = 0>
 RAFT_DEVICE_INLINE_FUNCTION uint32_t
 insert(unsigned team_size, IdxT* const table, const uint32_t bitlen, const IdxT key)
 {
diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta.cuh b/cpp/src/neighbors/detail/cagra/search_multi_cta.cuh
index 9cb432bcb..2f1c0332a 100644
--- a/cpp/src/neighbors/detail/cagra/search_multi_cta.cuh
+++ b/cpp/src/neighbors/detail/cagra/search_multi_cta.cuh
@@ -102,33 +102,36 @@ struct search : public search_plan_impl<DataT, IndexT, DistanceT, SAMPLE_FILTER_
          search_params params,
          const dataset_descriptor_host<DataT, IndexT, DistanceT>& dataset_desc,
          int64_t dim,
+         int64_t dataset_size,
          int64_t graph_degree,
          uint32_t topk)
-    : base_type(res, params, dataset_desc, dim, graph_degree, topk),
+    : base_type(res, params, dataset_desc, dim, dataset_size, graph_degree, topk),
       intermediate_indices(res),
       intermediate_distances(res),
       topk_workspace(res)
-
   {
     set_params(res, params);
   }
 
   void set_params(raft::resources const& res, const search_params& params)
   {
-    constexpr unsigned muti_cta_itopk_size = 32;
-    this->itopk_size                       = muti_cta_itopk_size;
-    search_width                           = 1;
+    constexpr unsigned multi_cta_itopk_size = 32;
+    this->itopk_size                        = multi_cta_itopk_size;
+    search_width                            = 1;
     num_cta_per_query =
-      max(params.search_width, raft::ceildiv(params.itopk_size, (size_t)muti_cta_itopk_size));
-    result_buffer_size = itopk_size + search_width * graph_degree;
+      max(params.search_width, raft::ceildiv(params.itopk_size, (size_t)multi_cta_itopk_size));
+    result_buffer_size = itopk_size + (search_width * graph_degree);
     typedef raft::Pow2<32> AlignBytes;
     unsigned result_buffer_size_32 = AlignBytes::roundUp(result_buffer_size);
     // constexpr unsigned max_result_buffer_size = 256;
     RAFT_EXPECTS(result_buffer_size_32 <= 256, "Result buffer size cannot exceed 256");
 
-    smem_size = dataset_desc.smem_ws_size_in_bytes +
-                (sizeof(INDEX_T) + sizeof(DISTANCE_T)) * result_buffer_size_32 +
-                sizeof(uint32_t) * search_width + sizeof(uint32_t);
+    smem_size =
+      dataset_desc.smem_ws_size_in_bytes +
+      (sizeof(INDEX_T) + sizeof(DISTANCE_T)) * (result_buffer_size_32) +
+      sizeof(INDEX_T) * hashmap::get_size(small_hash_bitlen) +  // local_visited_hashmap_ptr
+      sizeof(INDEX_T) * search_width +                          // parent_indices_buffer
+      sizeof(int);                                              // result_position
     RAFT_LOG_DEBUG("# smem_size: %u", smem_size);
 
     //
@@ -222,6 +225,7 @@ struct search : public search_plan_impl<DataT, IndexT, DistanceT, SAMPLE_FILTER_
                    thread_block_size,
                    result_buffer_size,
                    smem_size,
+                   small_hash_bitlen,
                    hash_bitlen,
                    hashmap.data(),
                    num_cta_per_query,
diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_inst.cuh b/cpp/src/neighbors/detail/cagra/search_multi_cta_inst.cuh
index 8d34ab0d6..60dcbab09 100644
--- a/cpp/src/neighbors/detail/cagra/search_multi_cta_inst.cuh
+++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_inst.cuh
@@ -36,6 +36,7 @@ namespace cuvs::neighbors::cagra::detail::multi_cta_search {
     uint32_t block_size,                                                      \
     uint32_t result_buffer_size,                                              \
     uint32_t smem_size,                                                       \
+    uint32_t small_hash_bitlen,                                               \
     int64_t hash_bitlen,                                                      \
     IndexT* hashmap_ptr,                                                      \
     uint32_t num_cta_per_query,                                               \
diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_kernel-inl.cuh b/cpp/src/neighbors/detail/cagra/search_multi_cta_kernel-inl.cuh
index 7535ff217..ea738b137 100644
--- a/cpp/src/neighbors/detail/cagra/search_multi_cta_kernel-inl.cuh
+++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_kernel-inl.cuh
@@ -54,54 +54,76 @@ namespace multi_cta_search {
 
 // #define _CLK_BREAKDOWN
 
-template <class INDEX_T>
-RAFT_DEVICE_INLINE_FUNCTION void pickup_next_parents(
-  INDEX_T* const next_parent_indices,  // [search_width]
-  const uint32_t search_width,
-  INDEX_T* const itopk_indices,  // [num_itopk]
-  const size_t num_itopk,
-  uint32_t* const terminate_flag)
+template <class INDEX_T, class DISTANCE_T>
+RAFT_DEVICE_INLINE_FUNCTION void pickup_next_parent(
+  INDEX_T* const next_parent_indices,
+  INDEX_T* const itopk_indices,       // [itopk_size * 2]
+  DISTANCE_T* const itopk_distances,  // [itopk_size * 2]
+  INDEX_T* const hash_ptr,
+  const uint32_t hash_bitlen)
 {
+  constexpr uint32_t itopk_size      = 32;
   constexpr INDEX_T index_msb_1_mask = utils::gen_index_msb_1_mask<INDEX_T>::value;
-  const unsigned warp_id             = threadIdx.x / 32;
+  constexpr INDEX_T invalid_index    = ~static_cast<INDEX_T>(0);
+
+  const unsigned warp_id = threadIdx.x / 32;
   if (warp_id > 0) { return; }
-  const unsigned lane_id = threadIdx.x % 32;
-  for (uint32_t i = lane_id; i < search_width; i += 32) {
-    next_parent_indices[i] = utils::get_max_value<INDEX_T>();
-  }
-  uint32_t max_itopk = num_itopk;
-  if (max_itopk % 32) { max_itopk += 32 - (max_itopk % 32); }
-  uint32_t num_new_parents = 0;
-  for (uint32_t j = lane_id; j < max_itopk; j += 32) {
-    INDEX_T index;
-    int new_parent = 0;
-    if (j < num_itopk) {
-      index = itopk_indices[j];
-      if ((index & index_msb_1_mask) == 0) {  // check if most significant bit is set
-        new_parent = 1;
-      }
+  if (threadIdx.x == 0) { next_parent_indices[0] = invalid_index; }
+  __syncwarp();
+
+  int j = -1;
+  for (unsigned i = threadIdx.x; i < itopk_size * 2; i += 32) {
+    INDEX_T index    = itopk_indices[i];
+    int is_invalid   = 0;
+    int is_candidate = 0;
+    if (index == invalid_index) {
+      is_invalid = 1;
+    } else if (index & index_msb_1_mask) {
+    } else {
+      is_candidate = 1;
     }
-    const uint32_t ballot_mask = __ballot_sync(0xffffffff, new_parent);
-    if (new_parent) {
-      const auto i = __popc(ballot_mask & ((1 << lane_id) - 1)) + num_new_parents;
-      if (i < search_width) {
-        next_parent_indices[i] = j;
-        itopk_indices[j] |= index_msb_1_mask;  // set most significant bit as used node
+
+    const auto ballot_mask  = __ballot_sync(0xffffffff, is_candidate);
+    const auto candidate_id = __popc(ballot_mask & ((1 << threadIdx.x) - 1));
+    for (int k = 0; k < __popc(ballot_mask); k++) {
+      int flag_done = 0;
+      if (is_candidate && candidate_id == k) {
+        is_candidate = 0;
+        if (hashmap::insert<INDEX_T, 1>(hash_ptr, hash_bitlen, index)) {
+          // Use this candidate as next parent
+          index |= index_msb_1_mask;  // set most significant bit as used node
+          if (i < itopk_size) {
+            next_parent_indices[0] = i;
+            itopk_indices[i]       = index;
+          } else {
+            next_parent_indices[0] = j;
+            // Move the next parent node from i-th position to j-th position
+            itopk_indices[j]   = index;
+            itopk_distances[j] = itopk_distances[i];
+            itopk_indices[i]   = invalid_index;
+            itopk_distances[i] = utils::get_max_value<DISTANCE_T>();
+          }
+          flag_done = 1;
+        } else {
+          // Deactivate the node since it has been used by other CTA.
+          itopk_indices[i]   = invalid_index;
+          itopk_distances[i] = utils::get_max_value<DISTANCE_T>();
+          is_invalid         = 1;
+        }
       }
+      if (__any_sync(0xffffffff, (flag_done > 0))) { return; }
+    }
+    if (i < itopk_size) {
+      j = 31 - __clz(__ballot_sync(0xffffffff, is_invalid));
+      if (j < 0) { return; }
     }
-    num_new_parents += __popc(ballot_mask);
-    if (num_new_parents >= search_width) { break; }
   }
-  if (threadIdx.x == 0 && (num_new_parents == 0)) { *terminate_flag = 1; }
 }
 
 template <unsigned MAX_ELEMENTS, class INDEX_T>
-RAFT_DEVICE_INLINE_FUNCTION void topk_by_bitonic_sort(
-  float* distances,  // [num_elements]
-  INDEX_T* indices,  // [num_elements]
-  const uint32_t num_elements,
-  const uint32_t num_itopk  // num_itopk <= num_elements
-)
+RAFT_DEVICE_INLINE_FUNCTION void topk_by_bitonic_sort(float* distances,  // [num_elements]
+                                                      INDEX_T* indices,  // [num_elements]
+                                                      const uint32_t num_elements)
 {
   const unsigned warp_id = threadIdx.x / 32;
   if (warp_id > 0) { return; }
@@ -116,15 +138,15 @@ RAFT_DEVICE_INLINE_FUNCTION void topk_by_bitonic_sort(
       val[i] = indices[j];
     } else {
       key[i] = utils::get_max_value<float>();
-      val[i] = utils::get_max_value<INDEX_T>();
+      val[i] = ~static_cast<INDEX_T>(0);
     }
   }
   /* Warp Sort */
   bitonic::warp_sort<float, INDEX_T, N>(key, val);
-  /* Store itopk sorted results */
+  /* Store sorted results */
   for (unsigned i = 0; i < N; i++) {
     unsigned j = (N * lane_id) + i;
-    if (j < num_itopk) {
+    if (j < num_elements) {
       distances[j] = key[i];
       indices[j]   = val[i];
     }
@@ -148,11 +170,11 @@ RAFT_KERNEL __launch_bounds__(1024, 1) search_kernel(
   const uint64_t rand_xor_mask,
   const typename DATASET_DESCRIPTOR_T::INDEX_T* seed_ptr,  // [num_queries, num_seeds]
   const uint32_t num_seeds,
+  const uint32_t visited_hash_bitlen,
   typename DATASET_DESCRIPTOR_T::INDEX_T* const
-    visited_hashmap_ptr,  // [num_queries, 1 << hash_bitlen]
-  const uint32_t hash_bitlen,
+    traversed_hashmap_ptr,  // [num_queries, 1 << traversed_hash_bitlen]
+  const uint32_t traversed_hash_bitlen,
   const uint32_t itopk_size,
-  const uint32_t search_width,
   const uint32_t min_iteration,
   const uint32_t max_iteration,
   uint32_t* const num_executed_iterations, /* stats */
@@ -185,12 +207,12 @@ RAFT_KERNEL __launch_bounds__(1024, 1) search_kernel(
   extern __shared__ uint8_t smem[];
 
   // Layout of result_buffer
-  // +----------------+------------------------------+---------+
-  // | internal_top_k | neighbors of parent nodes    | padding |
-  // | <itopk_size>   | <search_width * graph_degree> | upto 32 |
-  // +----------------+------------------------------+---------+
-  // |<---          result_buffer_size           --->|
-  const auto result_buffer_size    = itopk_size + (search_width * graph_degree);
+  // +----------------+---------+---------------------------+
+  // | internal_top_k | padding | neighbors of parent nodes |
+  // | <itopk_size>   | upto 32 | <graph_degree>            |
+  // +----------------+---------+---------------------------+
+  // |<---        result_buffer_size_32                 --->|
+  const auto result_buffer_size    = itopk_size + graph_degree;
   const auto result_buffer_size_32 = raft::round_up_safe<uint32_t>(result_buffer_size, 32);
   assert(result_buffer_size_32 <= MAX_ELEMENTS);
 
@@ -201,22 +223,23 @@ RAFT_KERNEL __launch_bounds__(1024, 1) search_kernel(
     reinterpret_cast<INDEX_T*>(smem + dataset_desc->smem_ws_size_in_bytes());
   auto* __restrict__ result_distances_buffer =
     reinterpret_cast<DISTANCE_T*>(result_indices_buffer + result_buffer_size_32);
-  auto* __restrict__ parent_indices_buffer =
+  auto* __restrict__ local_visited_hashmap_ptr =
     reinterpret_cast<INDEX_T*>(result_distances_buffer + result_buffer_size_32);
-  auto* __restrict__ terminate_flag =
-    reinterpret_cast<uint32_t*>(parent_indices_buffer + search_width);
+  auto* __restrict__ parent_indices_buffer =
+    reinterpret_cast<INDEX_T*>(local_visited_hashmap_ptr + hashmap::get_size(visited_hash_bitlen));
+  auto* __restrict__ result_position = reinterpret_cast<int*>(parent_indices_buffer + 1);
 
-#if 0
-    /* debug */
-    for (unsigned i = threadIdx.x; i < result_buffer_size_32; i += blockDim.x) {
-        result_indices_buffer[i] = utils::get_max_value<INDEX_T>();
-        result_distances_buffer[i] = utils::get_max_value<DISTANCE_T>();
-    }
-#endif
+  INDEX_T* const local_traversed_hashmap_ptr =
+    traversed_hashmap_ptr + (hashmap::get_size(traversed_hash_bitlen) * query_id);
 
-  if (threadIdx.x == 0) { terminate_flag[0] = 0; }
-  INDEX_T* const local_visited_hashmap_ptr =
-    visited_hashmap_ptr + (hashmap::get_size(hash_bitlen) * query_id);
+  constexpr INDEX_T invalid_index    = ~static_cast<INDEX_T>(0);
+  constexpr INDEX_T index_msb_1_mask = utils::gen_index_msb_1_mask<INDEX_T>::value;
+
+  for (unsigned i = threadIdx.x; i < result_buffer_size_32; i += blockDim.x) {
+    result_indices_buffer[i]   = invalid_index;
+    result_distances_buffer[i] = utils::get_max_value<DISTANCE_T>();
+  }
+  hashmap::init<INDEX_T>(local_visited_hashmap_ptr, visited_hash_bitlen);
   __syncthreads();
   _CLK_REC(clk_init);
 
@@ -229,13 +252,15 @@ RAFT_KERNEL __launch_bounds__(1024, 1) search_kernel(
   device::compute_distance_to_random_nodes(result_indices_buffer,
                                            result_distances_buffer,
                                            *dataset_desc,
-                                           result_buffer_size,
+                                           graph_degree,
                                            num_distilation,
                                            rand_xor_mask,
                                            local_seed_ptr,
                                            num_seeds,
                                            local_visited_hashmap_ptr,
-                                           hash_bitlen,
+                                           visited_hash_bitlen,
+                                           local_traversed_hashmap_ptr,
+                                           traversed_hash_bitlen,
                                            block_id,
                                            num_blocks);
   __syncthreads();
@@ -243,50 +268,90 @@ RAFT_KERNEL __launch_bounds__(1024, 1) search_kernel(
 
   uint32_t iter = 0;
   while (1) {
-    // topk with bitonic sort
     _CLK_START();
-    topk_by_bitonic_sort<MAX_ELEMENTS, INDEX_T>(result_distances_buffer,
-                                                result_indices_buffer,
-                                                itopk_size + (search_width * graph_degree),
-                                                itopk_size);
+    if (threadIdx.x < 32) {
+      // [1st warp] Topk with bitonic sort
+      topk_by_bitonic_sort<MAX_ELEMENTS, INDEX_T>(
+        result_distances_buffer, result_indices_buffer, result_buffer_size_32);
+    }
+    __syncthreads();
     _CLK_REC(clk_topk);
 
-    if (iter + 1 == max_iteration) {
-      __syncthreads();
-      break;
-    }
+    if (iter + 1 >= max_iteration) { break; }
 
-    // pick up next parents
     _CLK_START();
-    pickup_next_parents<INDEX_T>(
-      parent_indices_buffer, search_width, result_indices_buffer, itopk_size, terminate_flag);
+    if (threadIdx.x < 32) {
+      // [1st warp] Pick up a next parent
+      pickup_next_parent<INDEX_T, DISTANCE_T>(parent_indices_buffer,
+                                              result_indices_buffer,
+                                              result_distances_buffer,
+                                              local_traversed_hashmap_ptr,
+                                              traversed_hash_bitlen);
+    } else {
+      // [Other warps] Reset visited hashmap
+      hashmap::init<INDEX_T>(local_visited_hashmap_ptr, visited_hash_bitlen, 32);
+    }
+    __syncthreads();
     _CLK_REC(clk_pickup_parents);
 
-    __syncthreads();
-    if (*terminate_flag && iter >= min_iteration) { break; }
+    if ((parent_indices_buffer[0] == invalid_index) && (iter >= min_iteration)) { break; }
 
-    // compute the norms between child nodes and query node
     _CLK_START();
-    device::compute_distance_to_child_nodes(result_indices_buffer + itopk_size,
-                                            result_distances_buffer + itopk_size,
-                                            *dataset_desc,
-                                            knn_graph,
-                                            graph_degree,
-                                            local_visited_hashmap_ptr,
-                                            hash_bitlen,
-                                            parent_indices_buffer,
-                                            result_indices_buffer,
-                                            search_width);
-    _CLK_REC(clk_compute_distance);
+    for (unsigned i = threadIdx.x; i < result_buffer_size_32; i += blockDim.x) {
+      INDEX_T index = result_indices_buffer[i];
+      if (index == invalid_index) { continue; }
+      if ((i >= itopk_size) && (index & index_msb_1_mask)) {
+        // Remove nodes kicked out of the itopk list from the traversed hash table.
+        hashmap::remove<INDEX_T>(
+          local_traversed_hashmap_ptr, traversed_hash_bitlen, index & ~index_msb_1_mask);
+        result_indices_buffer[i]   = invalid_index;
+        result_distances_buffer[i] = utils::get_max_value<DISTANCE_T>();
+      } else {
+        // Restore visited hashmap by putting nodes on result buffer in it.
+        index &= ~index_msb_1_mask;
+        hashmap::insert(local_visited_hashmap_ptr, visited_hash_bitlen, index);
+      }
+    }
+    // Initialize buffer for compute_distance_to_child_nodes.
+    if (threadIdx.x == blockDim.x - 1) { result_position[0] = result_buffer_size_32; }
+    __syncthreads();
+
+    // Compute the norms between child nodes and query node
+    device::compute_distance_to_child_nodes<INDEX_T, DISTANCE_T, DATASET_DESCRIPTOR_T, 0>(
+      result_indices_buffer,
+      result_distances_buffer,
+      *dataset_desc,
+      knn_graph,
+      graph_degree,
+      local_visited_hashmap_ptr,
+      visited_hash_bitlen,
+      local_traversed_hashmap_ptr,
+      traversed_hash_bitlen,
+      parent_indices_buffer,
+      result_indices_buffer,
+      1,
+      result_position,
+      result_buffer_size_32);
+    // __syncthreads();
+
+    // Check the state of the nodes in the result buffer which were not updated
+    // by the compute_distance_to_child_nodes above, and if it cannot be used as
+    // a parent node, it is deactivated.
+    for (uint32_t i = threadIdx.x; i < result_position[0]; i += blockDim.x) {
+      INDEX_T index = result_indices_buffer[i];
+      if (index == invalid_index || index & index_msb_1_mask) { continue; }
+      if (hashmap::search<INDEX_T, 1>(local_traversed_hashmap_ptr, traversed_hash_bitlen, index)) {
+        result_indices_buffer[i]   = invalid_index;
+        result_distances_buffer[i] = utils::get_max_value<DISTANCE_T>();
+      }
+    }
     __syncthreads();
+    _CLK_REC(clk_compute_distance);
 
     // Filtering
     if constexpr (!std::is_same<SAMPLE_FILTER_T,
                                 cuvs::neighbors::filtering::none_sample_filter>::value) {
-      constexpr INDEX_T index_msb_1_mask = utils::gen_index_msb_1_mask<INDEX_T>::value;
-      const INDEX_T invalid_index        = utils::get_max_value<INDEX_T>();
-
-      for (unsigned p = threadIdx.x; p < search_width; p += blockDim.x) {
+      for (unsigned p = threadIdx.x; p < 1; p += blockDim.x) {
         if (parent_indices_buffer[p] != invalid_index) {
           const auto parent_id =
             result_indices_buffer[parent_indices_buffer[p]] & ~index_msb_1_mask;
@@ -303,36 +368,64 @@ RAFT_KERNEL __launch_bounds__(1024, 1) search_kernel(
     iter++;
   }
 
-  // Post process for filtering
+  // Filtering
   if constexpr (!std::is_same<SAMPLE_FILTER_T,
                               cuvs::neighbors::filtering::none_sample_filter>::value) {
-    constexpr INDEX_T index_msb_1_mask = utils::gen_index_msb_1_mask<INDEX_T>::value;
-    const INDEX_T invalid_index        = utils::get_max_value<INDEX_T>();
-
-    for (unsigned i = threadIdx.x; i < itopk_size + search_width * graph_degree; i += blockDim.x) {
-      const auto node_id = result_indices_buffer[i] & ~index_msb_1_mask;
-      if (node_id != (invalid_index & ~index_msb_1_mask) && !sample_filter(query_id, node_id)) {
-        // If the parent must not be in the resulting top-k list, remove from the parent list
-        result_distances_buffer[i] = utils::get_max_value<DISTANCE_T>();
+    for (uint32_t i = threadIdx.x; i < result_buffer_size_32; i += blockDim.x) {
+      INDEX_T index = result_indices_buffer[i];
+      if (index == invalid_index) { continue; }
+      index &= ~index_msb_1_mask;
+      if (!sample_filter(query_id, index)) {
         result_indices_buffer[i]   = invalid_index;
+        result_distances_buffer[i] = utils::get_max_value<DISTANCE_T>();
       }
     }
-
-    __syncthreads();
-    topk_by_bitonic_sort<MAX_ELEMENTS, INDEX_T>(result_distances_buffer,
-                                                result_indices_buffer,
-                                                itopk_size + (search_width * graph_degree),
-                                                itopk_size);
     __syncthreads();
   }
 
-  for (uint32_t i = threadIdx.x; i < itopk_size; i += blockDim.x) {
-    uint32_t j = i + (itopk_size * (cta_id + (num_cta_per_query * query_id)));
-    if (result_distances_ptr != nullptr) { result_distances_ptr[j] = result_distances_buffer[i]; }
-    constexpr INDEX_T index_msb_1_mask = utils::gen_index_msb_1_mask<INDEX_T>::value;
-
-    result_indices_ptr[j] =
-      result_indices_buffer[i] & ~index_msb_1_mask;  // clear most significant bit
+  // Output search results (1st warp only).
+  if (threadIdx.x < 32) {
+    uint32_t offset = 0;
+    for (uint32_t i = threadIdx.x; i < result_buffer_size_32; i += 32) {
+      INDEX_T index = result_indices_buffer[i];
+      bool is_valid = false;
+      if (index != invalid_index) {
+        if (index & index_msb_1_mask) {
+          is_valid = true;
+          index &= ~index_msb_1_mask;
+        } else if ((offset < itopk_size) &&
+                   hashmap::insert<INDEX_T, 1>(
+                     local_traversed_hashmap_ptr, traversed_hash_bitlen, index)) {
+          // If a node that is not used as a parent can be inserted into
+          // the traversed hash table, it is considered a valid result.
+          is_valid = true;
+        }
+      }
+      const auto mask = __ballot_sync(0xffffffff, is_valid);
+      if (is_valid) {
+        const auto j = offset + __popc(mask & ((1 << threadIdx.x) - 1));
+        if (j < itopk_size) {
+          uint32_t k            = j + (itopk_size * (cta_id + (num_cta_per_query * query_id)));
+          result_indices_ptr[k] = index & ~index_msb_1_mask;
+          if (result_distances_ptr != nullptr) {
+            result_distances_ptr[k] = result_distances_buffer[i];
+          }
+        } else {
+          // If it is valid and registered in the traversed hash table but is
+          // not output as a result, it is removed from the hash table.
+          hashmap::remove<INDEX_T>(local_traversed_hashmap_ptr, traversed_hash_bitlen, index);
+        }
+      }
+      offset += __popc(mask);
+    }
+    // If the number of outputs is insufficient, fill in with invalid results.
+    for (uint32_t i = offset + threadIdx.x; i < itopk_size; i += 32) {
+      uint32_t k            = i + (itopk_size * (cta_id + (num_cta_per_query * query_id)));
+      result_indices_ptr[k] = invalid_index;
+      if (result_distances_ptr != nullptr) {
+        result_distances_ptr[k] = utils::get_max_value<DISTANCE_T>();
+      }
+    }
   }
 
   if (threadIdx.x == 0 && cta_id == 0 && num_executed_iterations != nullptr) {
@@ -427,8 +520,9 @@ void select_and_run(const dataset_descriptor_host<DataT, IndexT, DistanceT>& dat
                     uint32_t block_size,  //
                     uint32_t result_buffer_size,
                     uint32_t smem_size,
-                    int64_t hash_bitlen,
-                    IndexT* hashmap_ptr,
+                    uint32_t visited_hash_bitlen,
+                    int64_t traversed_hash_bitlen,
+                    IndexT* traversed_hashmap_ptr,
                     uint32_t num_cta_per_query,
                     uint32_t num_seeds,
                     SampleFilterT sample_filter,
@@ -441,9 +535,13 @@ void select_and_run(const dataset_descriptor_host<DataT, IndexT, DistanceT>& dat
   RAFT_CUDA_TRY(
     cudaFuncSetAttribute(kernel, cudaFuncAttributeMaxDynamicSharedMemorySize, smem_size));
   // Initialize hash table
-  const uint32_t hash_size = hashmap::get_size(hash_bitlen);
-  set_value_batch(
-    hashmap_ptr, hash_size, utils::get_max_value<IndexT>(), hash_size, num_queries, stream);
+  const uint32_t traversed_hash_size = hashmap::get_size(traversed_hash_bitlen);
+  set_value_batch(traversed_hashmap_ptr,
+                  traversed_hash_size,
+                  ~static_cast<IndexT>(0),
+                  traversed_hash_size,
+                  num_queries,
+                  stream);
 
   dim3 block_dims(block_size, 1, 1);
   dim3 grid_dims(num_cta_per_query, num_queries, 1);
@@ -463,10 +561,10 @@ void select_and_run(const dataset_descriptor_host<DataT, IndexT, DistanceT>& dat
                                                        ps.rand_xor_mask,
                                                        dev_seed_ptr,
                                                        num_seeds,
-                                                       hashmap_ptr,
-                                                       hash_bitlen,
+                                                       visited_hash_bitlen,
+                                                       traversed_hashmap_ptr,
+                                                       traversed_hash_bitlen,
                                                        ps.itopk_size,
-                                                       ps.search_width,
                                                        ps.min_iterations,
                                                        ps.max_iterations,
                                                        num_executed_iterations,
diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_kernel.cuh b/cpp/src/neighbors/detail/cagra/search_multi_cta_kernel.cuh
index 1a1dcd579..e5dc29f27 100644
--- a/cpp/src/neighbors/detail/cagra/search_multi_cta_kernel.cuh
+++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_kernel.cuh
@@ -36,8 +36,9 @@ void select_and_run(const dataset_descriptor_host<DataT, IndexT, DistanceT>& dat
                     uint32_t block_size,  //
                     uint32_t result_buffer_size,
                     uint32_t smem_size,
-                    int64_t hash_bitlen,
-                    IndexT* hashmap_ptr,
+                    uint32_t visited_hash_bitlen,
+                    int64_t traversed_hash_bitlen,
+                    IndexT* traversed_hashmap_ptr,
                     uint32_t num_cta_per_query,
                     uint32_t num_seeds,
                     SampleFilterT sample_filter,
diff --git a/cpp/src/neighbors/detail/cagra/search_multi_kernel.cuh b/cpp/src/neighbors/detail/cagra/search_multi_kernel.cuh
index 469c80a08..cb3e819c9 100644
--- a/cpp/src/neighbors/detail/cagra/search_multi_kernel.cuh
+++ b/cpp/src/neighbors/detail/cagra/search_multi_kernel.cuh
@@ -635,9 +635,10 @@ struct search : search_plan_impl<DataT, IndexT, DistanceT, SAMPLE_FILTER_T> {
          search_params params,
          const dataset_descriptor_host<DataT, IndexT, DistanceT>& dataset_desc,
          int64_t dim,
+         int64_t dataset_size,
          int64_t graph_degree,
          uint32_t topk)
-    : base_type(res, params, dataset_desc, dim, graph_degree, topk),
+    : base_type(res, params, dataset_desc, dim, dataset_size, graph_degree, topk),
       result_indices(res),
       result_distances(res),
       parent_node_list(res),
diff --git a/cpp/src/neighbors/detail/cagra/search_plan.cuh b/cpp/src/neighbors/detail/cagra/search_plan.cuh
index 99254aa50..5fe5b0903 100644
--- a/cpp/src/neighbors/detail/cagra/search_plan.cuh
+++ b/cpp/src/neighbors/detail/cagra/search_plan.cuh
@@ -108,11 +108,17 @@ struct lightweight_uvector {
 };
 
 struct search_plan_impl_base : public search_params {
+  int64_t dataset_size;
   int64_t dim;
   int64_t graph_degree;
   uint32_t topk;
-  search_plan_impl_base(search_params params, int64_t dim, int64_t graph_degree, uint32_t topk)
-    : search_params(params), dim(dim), graph_degree(graph_degree), topk(topk)
+  search_plan_impl_base(
+    search_params params, int64_t dim, int64_t dataset_size, int64_t graph_degree, uint32_t topk)
+    : search_params(params),
+      dim(dim),
+      dataset_size(dataset_size),
+      graph_degree(graph_degree),
+      topk(topk)
   {
     if (algo == search_algo::AUTO) {
       const size_t num_sm = raft::getMultiProcessorCount();
@@ -141,7 +147,6 @@ struct search_plan_impl : public search_plan_impl_base {
   size_t small_hash_bitlen;
   size_t small_hash_reset_interval;
   size_t hashmap_size;
-  uint32_t dataset_size;
   uint32_t result_buffer_size;
 
   uint32_t smem_size;
@@ -157,9 +162,10 @@ struct search_plan_impl : public search_plan_impl_base {
                    search_params params,
                    const dataset_descriptor_host<DataT, IndexT, DistanceT>& dataset_desc,
                    int64_t dim,
+                   int64_t dataset_size,
                    int64_t graph_degree,
                    uint32_t topk)
-    : search_plan_impl_base(params, dim, graph_degree, topk),
+    : search_plan_impl_base(params, dim, dataset_size, graph_degree, topk),
       hashmap(res),
       num_executed_iterations(res),
       dev_seed(res),
@@ -193,10 +199,16 @@ struct search_plan_impl : public search_plan_impl_base {
     uint32_t _max_iterations = max_iterations;
     if (max_iterations == 0) {
       if (algo == search_algo::MULTI_CTA) {
-        _max_iterations = 1 + std::min(32 * 1.1, 32 + 10.0);  // TODO(anaruse)
+        constexpr uint32_t mc_itopk_size   = 32;
+        constexpr uint32_t mc_search_width = 1;
+        _max_iterations                    = mc_itopk_size / mc_search_width;
       } else {
-        _max_iterations =
-          1 + std::min((itopk_size / search_width) * 1.1, (itopk_size / search_width) + 10.0);
+        _max_iterations = itopk_size / search_width;
+      }
+      int64_t num_reachable_nodes = 1;
+      while (num_reachable_nodes < dataset_size) {
+        num_reachable_nodes *= max((int64_t)2, graph_degree / 2);
+        _max_iterations += 1;
       }
     }
     if (max_iterations < min_iterations) { _max_iterations = min_iterations; }
@@ -219,88 +231,107 @@ struct search_plan_impl : public search_plan_impl_base {
   // defines hash_bitlen, small_hash_bitlen, small_hash_reset interval, hash_size
   inline void calc_hashmap_params(raft::resources const& res)
   {
-    // for multiple CTA search
-    uint32_t mc_num_cta_per_query = 0;
-    uint32_t mc_search_width      = 0;
-    uint32_t mc_itopk_size        = 0;
-    if (algo == search_algo::MULTI_CTA) {
-      mc_itopk_size        = 32;
-      mc_search_width      = 1;
-      mc_num_cta_per_query = max(search_width, raft::ceildiv(itopk_size, (size_t)32));
-      RAFT_LOG_DEBUG("# mc_itopk_size: %u", mc_itopk_size);
-      RAFT_LOG_DEBUG("# mc_search_width: %u", mc_search_width);
-      RAFT_LOG_DEBUG("# mc_num_cta_per_query: %u", mc_num_cta_per_query);
-    }
-
     // Determine hash size (bit length)
     hashmap_size              = 0;
     hash_bitlen               = 0;
     small_hash_bitlen         = 0;
     small_hash_reset_interval = 1024 * 1024;
     float max_fill_rate       = hashmap_max_fill_rate;
-    while (hashmap_mode == hash_mode::AUTO || hashmap_mode == hash_mode::SMALL) {
-      //
-      // The small-hash reduces hash table size by initializing the hash table
-      // for each iteration and re-registering only the nodes that should not be
-      // re-visited in that iteration. Therefore, the size of small-hash should
-      // be determined based on the internal topk size and the number of nodes
-      // visited per iteration.
-      //
-      const auto max_visited_nodes = itopk_size + (search_width * graph_degree * 1);
-      unsigned min_bitlen          = 8;   // 256
-      unsigned max_bitlen          = 13;  // 8K
-      if (min_bitlen < hashmap_min_bitlen) { min_bitlen = hashmap_min_bitlen; }
-      hash_bitlen = min_bitlen;
-      while (max_visited_nodes > hashmap::get_size(hash_bitlen) * max_fill_rate) {
-        hash_bitlen += 1;
-      }
-      if (hash_bitlen > max_bitlen) {
-        // Switch to normal hash if hashmap_mode is AUTO, otherwise exit.
-        if (hashmap_mode == hash_mode::AUTO) {
-          hash_bitlen = 0;
-          break;
-        } else {
-          RAFT_FAIL(
-            "small-hash cannot be used because the required hash size exceeds the limit (%u)",
-            hashmap::get_size(max_bitlen));
-        }
-      }
-      small_hash_bitlen = hash_bitlen;
+    if (algo == search_algo::MULTI_CTA) {
+      const uint32_t mc_itopk_size = 32;
+      const uint32_t mc_num_cta_per_query =
+        max(search_width, raft::ceildiv(itopk_size, (size_t)mc_itopk_size));
+      RAFT_LOG_DEBUG("# mc_itopk_size: %u", mc_itopk_size);
+      RAFT_LOG_DEBUG("# mc_num_cta_per_query: %u", mc_num_cta_per_query);
       //
-      // Sincc the hash table size is limited to a power of 2, the requirement,
-      // the maximum fill rate, may be satisfied even if the frequency of hash
-      // table reset is reduced to once every 2 or more iterations without
-      // changing the hash table size. In that case, reduce the reset frequency.
+      // [visited_hash_table]
+      // In the multi CTA algo, which node has been visited is managed in a hash
+      // table that each CTA has in the shared memory. This hash table is not
+      // shared among CTAs. This hash table is reset and restored in each iteration.
       //
-      small_hash_reset_interval = 1;
-      while (1) {
-        const auto max_visited_nodes =
-          itopk_size + (search_width * graph_degree * (small_hash_reset_interval + 1));
-        if (max_visited_nodes > hashmap::get_size(hash_bitlen) * max_fill_rate) { break; }
-        small_hash_reset_interval += 1;
+      const uint32_t max_visited_nodes = mc_itopk_size + (graph_degree * 2);
+      small_hash_bitlen                = 8;  // 256
+      while (max_visited_nodes > hashmap::get_size(small_hash_bitlen) * max_fill_rate) {
+        small_hash_bitlen += 1;
       }
-      break;
-    }
-    if (hash_bitlen == 0) {
+      RAFT_EXPECTS(small_hash_bitlen <= 14, "small_hash_bitlen cannot be largen than 14 (16K)");
       //
-      // The size of hash table is determined based on the maximum number of
-      // nodes that may be visited before the search is completed and the
-      // maximum fill rate of the hash table.
+      // [traversed_hash_table]
+      // Whether a node has ever been used as the starting point for a traversal
+      // in each iteration is managed in a separate hash table, which is shared
+      // among the CTAs.
       //
-      uint32_t max_visited_nodes = itopk_size + (search_width * graph_degree * max_iterations);
-      if (algo == search_algo::MULTI_CTA) {
-        max_visited_nodes = mc_itopk_size + (mc_search_width * graph_degree * max_iterations);
-        max_visited_nodes *= mc_num_cta_per_query;
-      }
+      const auto max_traversed_nodes =
+        mc_num_cta_per_query * max((size_t)mc_itopk_size, max_iterations);
       unsigned min_bitlen = 11;  // 2K
       if (min_bitlen < hashmap_min_bitlen) { min_bitlen = hashmap_min_bitlen; }
       hash_bitlen = min_bitlen;
-      while (max_visited_nodes > hashmap::get_size(hash_bitlen) * max_fill_rate) {
+      while (max_traversed_nodes > hashmap::get_size(hash_bitlen) * max_fill_rate) {
         hash_bitlen += 1;
       }
-      RAFT_EXPECTS(hash_bitlen <= 20, "hash_bitlen cannot be largen than 20 (1M)");
+      RAFT_EXPECTS(hash_bitlen <= 25, "hash_bitlen cannot be largen than 25 (32M)");
+    } else {
+      while (hashmap_mode == hash_mode::AUTO || hashmap_mode == hash_mode::SMALL) {
+        //
+        // The small-hash reduces hash table size by initializing the hash table
+        // for each iteration and re-registering only the nodes that should not be
+        // re-visited in that iteration. Therefore, the size of small-hash should
+        // be determined based on the internal topk size and the number of nodes
+        // visited per iteration.
+        //
+        const auto max_visited_nodes = itopk_size + (search_width * graph_degree * 1);
+        unsigned min_bitlen          = 8;   // 256
+        unsigned max_bitlen          = 13;  // 8K
+        if (min_bitlen < hashmap_min_bitlen) { min_bitlen = hashmap_min_bitlen; }
+        hash_bitlen = min_bitlen;
+        while (max_visited_nodes > hashmap::get_size(hash_bitlen) * max_fill_rate) {
+          hash_bitlen += 1;
+        }
+        if (hash_bitlen > max_bitlen) {
+          // Switch to normal hash if hashmap_mode is AUTO, otherwise exit.
+          if (hashmap_mode == hash_mode::AUTO) {
+            hash_bitlen = 0;
+            break;
+          } else {
+            RAFT_FAIL(
+              "small-hash cannot be used because the required hash size exceeds the limit (%u)",
+              hashmap::get_size(max_bitlen));
+          }
+        }
+        small_hash_bitlen = hash_bitlen;
+        //
+        // Sincc the hash table size is limited to a power of 2, the requirement,
+        // the maximum fill rate, may be satisfied even if the frequency of hash
+        // table reset is reduced to once every 2 or more iterations without
+        // changing the hash table size. In that case, reduce the reset frequency.
+        //
+        small_hash_reset_interval = 1;
+        while (1) {
+          const auto max_visited_nodes =
+            itopk_size + (search_width * graph_degree * (small_hash_reset_interval + 1));
+          if (max_visited_nodes > hashmap::get_size(hash_bitlen) * max_fill_rate) { break; }
+          small_hash_reset_interval += 1;
+        }
+        break;
+      }
+      if (hash_bitlen == 0) {
+        //
+        // The size of hash table is determined based on the maximum number of
+        // nodes that may be visited before the search is completed and the
+        // maximum fill rate of the hash table.
+        //
+        uint32_t max_visited_nodes = itopk_size + (search_width * graph_degree * max_iterations);
+        unsigned min_bitlen        = 11;  // 2K
+        if (min_bitlen < hashmap_min_bitlen) { min_bitlen = hashmap_min_bitlen; }
+        hash_bitlen = min_bitlen;
+        while (max_visited_nodes > hashmap::get_size(hash_bitlen) * max_fill_rate) {
+          hash_bitlen += 1;
+        }
+        RAFT_EXPECTS(hash_bitlen <= 20,
+                     "hash_bitlen cannot be largen than 20 (1M). You can decrease itopk_size, "
+                     "search_width or max_iterations to reduce the required hashmap size.");
+      }
     }
-
     RAFT_LOG_DEBUG("# internal topK = %lu", itopk_size);
     RAFT_LOG_DEBUG("# parent size = %lu", search_width);
     RAFT_LOG_DEBUG("# min_iterations = %lu", min_iterations);
diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta.cuh b/cpp/src/neighbors/detail/cagra/search_single_cta.cuh
index 161aa8c4a..20070487b 100644
--- a/cpp/src/neighbors/detail/cagra/search_single_cta.cuh
+++ b/cpp/src/neighbors/detail/cagra/search_single_cta.cuh
@@ -94,9 +94,10 @@ struct search : search_plan_impl<DataT, IndexT, DistanceT, SAMPLE_FILTER_T> {
          search_params params,
          const dataset_descriptor_host<DataT, IndexT, DistanceT>& dataset_desc,
          int64_t dim,
+         int64_t dataset_size,
          int64_t graph_degree,
          uint32_t topk)
-    : base_type(res, params, dataset_desc, dim, graph_degree, topk)
+    : base_type(res, params, dataset_desc, dim, dataset_size, graph_degree, topk)
   {
     set_params(res);
   }
diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_kernel-inl.cuh b/cpp/src/neighbors/detail/cagra/search_single_cta_kernel-inl.cuh
index 188862fbb..1e072f540 100644
--- a/cpp/src/neighbors/detail/cagra/search_single_cta_kernel-inl.cuh
+++ b/cpp/src/neighbors/detail/cagra/search_single_cta_kernel-inl.cuh
@@ -621,7 +621,9 @@ __device__ void search_core(
                                            local_seed_ptr,
                                            num_seeds,
                                            local_visited_hashmap_ptr,
-                                           hash_bitlen);
+                                           hash_bitlen,
+                                           (INDEX_T*)nullptr,
+                                           0);
   __syncthreads();
   _CLK_REC(clk_compute_1st_distance);
 
@@ -748,6 +750,8 @@ __device__ void search_core(
                                             graph_degree,
                                             local_visited_hashmap_ptr,
                                             hash_bitlen,
+                                            (INDEX_T*)nullptr,
+                                            0,
                                             parent_list_buffer,
                                             result_indices_buffer,
                                             search_width);
diff --git a/cpp/src/neighbors/detail/hnsw.hpp b/cpp/src/neighbors/detail/hnsw.hpp
index e129d23e8..07e012349 100644
--- a/cpp/src/neighbors/detail/hnsw.hpp
+++ b/cpp/src/neighbors/detail/hnsw.hpp
@@ -21,64 +21,13 @@
 #include <hnswlib/hnswalg.h>
 #include <hnswlib/hnswlib.h>
 #include <memory>
+#include <omp.h>
+#include <raft/core/logger.hpp>
 #include <random>
 #include <thread>
 
 namespace cuvs::neighbors::hnsw::detail {
 
-// Multithreaded executor
-// The helper function is copied from the hnswlib repository
-// as for some reason, adding vectors to the hnswlib index does not
-// work well with omp parallel for
-template <class Function>
-inline void ParallelFor(size_t start, size_t end, size_t numThreads, Function fn)
-{
-  if (numThreads <= 0) { numThreads = std::thread::hardware_concurrency(); }
-
-  if (numThreads == 1) {
-    for (size_t id = start; id < end; id++) {
-      fn(id, 0);
-    }
-  } else {
-    std::vector<std::thread> threads;
-    std::atomic<size_t> current(start);
-
-    // keep track of exceptions in threads
-    // https://stackoverflow.com/a/32428427/1713196
-    std::exception_ptr lastException = nullptr;
-    std::mutex lastExceptMutex;
-
-    for (size_t threadId = 0; threadId < numThreads; ++threadId) {
-      threads.push_back(std::thread([&, threadId] {
-        while (true) {
-          size_t id = current.fetch_add(1);
-
-          if (id >= end) { break; }
-
-          try {
-            fn(id, threadId);
-          } catch (...) {
-            std::unique_lock<std::mutex> lastExcepLock(lastExceptMutex);
-            lastException = std::current_exception();
-            /*
-             * This will work even when current is the largest value that
-             * size_t can fit, because fetch_add returns the previous value
-             * before the increment (what will result in overflow
-             * and produce 0 instead of current + 1).
-             */
-            current = end;
-            break;
-          }
-        }
-      }));
-    }
-    for (auto& thread : threads) {
-      thread.join();
-    }
-    if (lastException) { std::rethrow_exception(lastException); }
-  }
-}
-
 template <typename T>
 struct hnsw_dist_t {
   using type = void;
@@ -163,14 +112,15 @@ template <typename T, HnswHierarchy hierarchy>
 std::enable_if_t<hierarchy == HnswHierarchy::NONE, std::unique_ptr<index<T>>> from_cagra(
   raft::resources const& res,
   const index_params& params,
-  const cuvs::neighbors::cagra::index<T, uint32_t>& cagra_index)
+  const cuvs::neighbors::cagra::index<T, uint32_t>& cagra_index,
+  std::optional<raft::host_matrix_view<const T, int64_t, raft::row_major>> dataset)
 {
   std::random_device dev;
   std::mt19937 rng(dev());
   std::uniform_int_distribution<std::mt19937::result_type> dist(0);
   auto uuid            = std::to_string(dist(rng));
   std::string filepath = "/tmp/" + uuid + ".bin";
-  cuvs::neighbors::cagra::serialize_to_hnswlib(res, filepath, cagra_index);
+  cuvs::neighbors::cagra::serialize_to_hnswlib(res, filepath, cagra_index, dataset);
 
   index<T>* hnsw_index = nullptr;
   cuvs::neighbors::hnsw::deserialize(
@@ -195,6 +145,10 @@ std::enable_if_t<hierarchy == HnswHierarchy::CPU, std::unique_ptr<index<T>>> fro
   } else {
     // move dataset to host, remove padding
     auto cagra_dataset = cagra_index.dataset();
+    RAFT_EXPECTS(cagra_dataset.size() > 0,
+                 "Invalid CAGRA dataset of size 0, shape %zux%zu",
+                 static_cast<size_t>(cagra_dataset.extent(0)),
+                 static_cast<size_t>(cagra_dataset.extent(1)));
     host_dataset =
       raft::make_host_matrix<T, int64_t>(cagra_dataset.extent(0), cagra_dataset.extent(1));
     RAFT_CUDA_TRY(cudaMemcpy2DAsync(host_dataset.data_handle(),
@@ -209,18 +163,20 @@ std::enable_if_t<hierarchy == HnswHierarchy::CPU, std::unique_ptr<index<T>>> fro
     host_dataset_view = host_dataset.view();
   }
   // build upper layers of hnsw index
-  auto hnsw_index =
-    std::make_unique<index_impl<T>>(cagra_index.dim(), cagra_index.metric(), hierarchy);
-  auto appr_algo = std::make_unique<hnswlib::HierarchicalNSW<typename hnsw_dist_t<T>::type>>(
+  int dim         = host_dataset_view.extent(1);
+  auto hnsw_index = std::make_unique<index_impl<T>>(dim, cagra_index.metric(), hierarchy);
+  auto appr_algo  = std::make_unique<hnswlib::HierarchicalNSW<typename hnsw_dist_t<T>::type>>(
     hnsw_index->get_space(),
     host_dataset_view.extent(0),
     cagra_index.graph().extent(1) / 2,
     params.ef_construction);
   appr_algo->base_layer_init = false;  // tell hnswlib to build upper layers only
-  ParallelFor(0, host_dataset_view.extent(0), params.num_threads, [&](size_t i, size_t threadId) {
+  auto num_threads           = params.num_threads == 0 ? omp_get_max_threads() : params.num_threads;
+#pragma omp parallel for num_threads(num_threads)
+  for (int64_t i = 0; i < host_dataset_view.extent(0); i++) {
     appr_algo->addPoint((void*)(host_dataset_view.data_handle() + i * host_dataset_view.extent(1)),
                         i);
-  });
+  }
   appr_algo->base_layer_init = true;  // reset to true to allow addition of new points
 
   // move cagra graph to host
@@ -236,11 +192,13 @@ std::enable_if_t<hierarchy == HnswHierarchy::CPU, std::unique_ptr<index<T>>> fro
 // copy cagra graph to hnswlib base layer
 #pragma omp parallel for
   for (size_t i = 0; i < static_cast<size_t>(host_graph.extent(0)); ++i) {
-    auto ll_i = appr_algo->get_linklist0(i);
+    auto hnsw_internal_id = appr_algo->label_lookup_.find(i)->second;
+    auto ll_i             = appr_algo->get_linklist0(hnsw_internal_id);
     appr_algo->setListCount(ll_i, host_graph.extent(1));
     auto* data = (uint32_t*)(ll_i + 1);
     for (size_t j = 0; j < static_cast<size_t>(host_graph.extent(1)); ++j) {
-      data[j] = host_graph(i, j);
+      auto neighbor_internal_id = appr_algo->label_lookup_.find(host_graph(i, j))->second;
+      data[j]                   = neighbor_internal_id;
     }
   }
 
@@ -256,7 +214,7 @@ std::unique_ptr<index<T>> from_cagra(
   std::optional<raft::host_matrix_view<const T, int64_t, raft::row_major>> dataset)
 {
   if (params.hierarchy == HnswHierarchy::NONE) {
-    return from_cagra<T, HnswHierarchy::NONE>(res, params, cagra_index);
+    return from_cagra<T, HnswHierarchy::NONE>(res, params, cagra_index, dataset);
   } else if (params.hierarchy == HnswHierarchy::CPU) {
     return from_cagra<T, HnswHierarchy::CPU>(res, params, cagra_index, dataset);
   }
@@ -275,19 +233,15 @@ void extend(raft::resources const& res,
     const_cast<void*>(idx.get_index()));
   auto current_element_count = hnswlib_index->getCurrentElementCount();
   auto new_element_count     = additional_dataset.extent(0);
-  auto num_threads           = params.num_threads == 0 ? std::thread::hardware_concurrency()
-                                                       : static_cast<size_t>(params.num_threads);
+  auto num_threads           = params.num_threads == 0 ? omp_get_max_threads() : params.num_threads;
 
   hnswlib_index->resizeIndex(current_element_count + new_element_count);
-  ParallelFor(current_element_count,
-              current_element_count + new_element_count,
-              num_threads,
-              [&](size_t i, size_t threadId) {
-                hnswlib_index->addPoint(
-                  (void*)(additional_dataset.data_handle() +
-                          (i - current_element_count) * additional_dataset.extent(1)),
-                  i);
-              });
+#pragma omp parallel for num_threads(num_threads)
+  for (int64_t i = 0; i < additional_dataset.extent(0); i++) {
+    hnswlib_index->addPoint(
+      (void*)(additional_dataset.data_handle() + i * additional_dataset.extent(1)),
+      current_element_count + i);
+  }
 }
 
 template <typename T>
diff --git a/cpp/src/neighbors/detail/knn_brute_force.cuh b/cpp/src/neighbors/detail/knn_brute_force.cuh
index f1976e002..5caf84cc7 100644
--- a/cpp/src/neighbors/detail/knn_brute_force.cuh
+++ b/cpp/src/neighbors/detail/knn_brute_force.cuh
@@ -56,9 +56,12 @@
 
 #include <cstdint>
 #include <iostream>
+#include <optional>
 #include <set>
+#include <variant>
 
 namespace cuvs::neighbors::detail {
+
 /**
  * Calculates brute force knn, using a fixed memory budget
  * by tiling over both the rows and columns of pairwise_distances
@@ -82,8 +85,10 @@ void tiled_brute_force_knn(const raft::resources& handle,
                            size_t max_col_tile_size                  = 0,
                            const DistanceT* precomputed_index_norms  = nullptr,
                            const DistanceT* precomputed_search_norms = nullptr,
-                           const uint32_t* filter_bitmap             = nullptr,
-                           DistanceEpilogue distance_epilogue        = raft::identity_op())
+                           const uint32_t* filter_bits               = nullptr,
+                           DistanceEpilogue distance_epilogue        = raft::identity_op(),
+                           cuvs::neighbors::filtering::FilterType filter_type =
+                             cuvs::neighbors::filtering::FilterType::Bitmap)
 {
   // Figure out the number of rows/cols to tile for
   size_t tile_rows = 0;
@@ -245,21 +250,23 @@ void tiled_brute_force_knn(const raft::resources& handle,
         }
       }
 
-      if (filter_bitmap != nullptr) {
-        auto distances_ptr        = temp_distances.data();
-        auto count                = thrust::make_counting_iterator<IndexType>(0);
-        DistanceT masked_distance = select_min ? std::numeric_limits<DistanceT>::infinity()
-                                               : std::numeric_limits<DistanceT>::lowest();
+      auto distances_ptr        = temp_distances.data();
+      auto count                = thrust::make_counting_iterator<IndexType>(0);
+      DistanceT masked_distance = select_min ? std::numeric_limits<DistanceT>::infinity()
+                                             : std::numeric_limits<DistanceT>::lowest();
+
+      if (filter_bits != nullptr) {
+        size_t n_cols = filter_type == cuvs::neighbors::filtering::FilterType::Bitmap ? n : 0;
         thrust::for_each(raft::resource::get_thrust_policy(handle),
                          count,
                          count + current_query_size * current_centroid_size,
                          [=] __device__(IndexType idx) {
                            IndexType row      = i + (idx / current_centroid_size);
                            IndexType col      = j + (idx % current_centroid_size);
-                           IndexType g_idx    = row * n + col;
+                           IndexType g_idx    = row * n_cols + col;
                            IndexType item_idx = (g_idx) >> 5;
                            uint32_t bit_idx   = (g_idx)&31;
-                           uint32_t filter    = filter_bitmap[item_idx];
+                           uint32_t filter    = filter_bits[item_idx];
                            if ((filter & (uint32_t(1) << bit_idx)) == 0) {
                              distances_ptr[idx] = masked_distance;
                            }
@@ -575,12 +582,12 @@ void brute_force_search(
     query_norms ? query_norms->data_handle() : nullptr);
 }
 
-template <typename T, typename IdxT, typename BitmapT, typename DistanceT = float>
+template <typename T, typename IdxT, typename BitsT, typename DistanceT = float>
 void brute_force_search_filtered(
   raft::resources const& res,
   const cuvs::neighbors::brute_force::index<T, DistanceT>& idx,
   raft::device_matrix_view<const T, IdxT, raft::row_major> queries,
-  cuvs::core::bitmap_view<const BitmapT, IdxT> filter,
+  const cuvs::neighbors::filtering::base_filter* filter,
   raft::device_matrix_view<IdxT, IdxT, raft::row_major> neighbors,
   raft::device_matrix_view<DistanceT, IdxT, raft::row_major> distances,
   std::optional<raft::device_vector_view<const DistanceT, IdxT>> query_norms = std::nullopt)
@@ -601,29 +608,42 @@ void brute_force_search_filtered(
                                     metric == cuvs::distance::DistanceType::CosineExpanded),
                "Index must has norms when using Euclidean, IP, and Cosine!");
 
-  IdxT n_queries = queries.extent(0);
-  IdxT n_dataset = idx.dataset().extent(0);
-  IdxT dim       = idx.dataset().extent(1);
-  IdxT k         = neighbors.extent(1);
+  IdxT n_queries                                     = queries.extent(0);
+  IdxT n_dataset                                     = idx.dataset().extent(0);
+  IdxT dim                                           = idx.dataset().extent(1);
+  IdxT k                                             = neighbors.extent(1);
+  cuvs::neighbors::filtering::FilterType filter_type = filter->get_filter_type();
 
   auto stream = raft::resource::get_cuda_stream(res);
 
-  // calc nnz
-  IdxT nnz_h = 0;
-  rmm::device_scalar<IdxT> nnz(0, stream);
-  auto nnz_view = raft::make_device_scalar_view<IdxT>(nnz.data());
-  auto filter_view =
-    raft::make_device_vector_view<const BitmapT, IdxT>(filter.data(), filter.n_elements());
-  IdxT size_h    = n_queries * n_dataset;
-  auto size_view = raft::make_host_scalar_view<const IdxT, IdxT>(&size_h);
-
-  raft::popc(res, filter_view, size_view, nnz_view);
-  raft::copy(&nnz_h, nnz.data(), 1, stream);
+  std::optional<std::variant<const cuvs::core::bitmap_view<BitsT, IdxT>,
+                             const cuvs::core::bitset_view<BitsT, IdxT>>>
+    filter_view;
+
+  IdxT nnz_h     = 0;
+  float sparsity = 0.0f;
+
+  const BitsT* filter_data = nullptr;
+
+  if (filter_type == cuvs::neighbors::filtering::FilterType::Bitmap) {
+    auto actual_filter =
+      dynamic_cast<const cuvs::neighbors::filtering::bitmap_filter<BitsT, int64_t>*>(filter);
+    filter_view.emplace(actual_filter->view());
+    nnz_h    = actual_filter->view().count(res);
+    sparsity = 1.0 - nnz_h / (1.0 * n_queries * n_dataset);
+  } else if (filter_type == cuvs::neighbors::filtering::FilterType::Bitset) {
+    auto actual_filter =
+      dynamic_cast<const cuvs::neighbors::filtering::bitset_filter<BitsT, int64_t>*>(filter);
+    filter_view.emplace(actual_filter->view());
+    nnz_h    = n_queries * actual_filter->view().count(res);
+    sparsity = 1.0 - nnz_h / (1.0 * n_queries * n_dataset);
+  } else {
+    RAFT_FAIL("Unsupported sample filter type");
+  }
 
-  raft::resource::sync_stream(res, stream);
-  float sparsity = (1.0f * nnz_h / (1.0f * n_queries * n_dataset));
+  std::visit([&](const auto& actual_view) { filter_data = actual_view.data(); }, *filter_view);
 
-  if (sparsity > 0.01f) {
+  if (sparsity < 0.9f) {
     raft::resources stream_pool_handle(res);
     raft::resource::set_cuda_stream(stream_pool_handle, stream);
     auto idx_norm = idx.has_norms() ? const_cast<DistanceT*>(idx.norms().data_handle()) : nullptr;
@@ -643,12 +663,12 @@ void brute_force_search_filtered(
                                               0,
                                               idx_norm,
                                               nullptr,
-                                              filter.data());
+                                              filter_data,
+                                              raft::identity_op(),
+                                              filter_type);
   } else {
     auto csr = raft::make_device_csr_matrix<DistanceT, IdxT>(res, n_queries, n_dataset, nnz_h);
-
-    // fill csr
-    raft::sparse::convert::bitmap_to_csr(res, filter, csr);
+    std::visit([&](const auto& actual_view) { actual_view.to_csr(res, csr); }, *filter_view);
 
     // create filter csr view
     auto compressed_csr_view = csr.structure_view();
@@ -664,7 +684,11 @@ void brute_force_search_filtered(
     auto csr_view = raft::make_device_csr_matrix_view<DistanceT, IdxT, IdxT, IdxT>(
       csr.get_elements().data(), compressed_csr_view);
 
-    raft::sparse::linalg::masked_matmul(res, queries, dataset_view, filter, csr_view);
+    std::visit(
+      [&](const auto& actual_view) {
+        raft::sparse::linalg::masked_matmul(res, queries, dataset_view, actual_view, csr_view);
+      },
+      *filter_view);
 
     // post process
     std::optional<raft::device_vector<DistanceT, IdxT>> query_norms_;
@@ -733,21 +757,27 @@ void search(raft::resources const& res,
     return brute_force_search<T, int64_t, DistT>(res, idx, queries, neighbors, distances);
   } catch (const std::bad_cast&) {
   }
+  if constexpr (std::is_same_v<LayoutT, raft::col_major>) {
+    RAFT_FAIL("filtered search isn't available with col_major queries yet");
+  } else {
+    try {
+      auto& sample_filter =
+        dynamic_cast<const cuvs::neighbors::filtering::bitmap_filter<uint32_t, int64_t>&>(
+          sample_filter_ref);
+      return brute_force_search_filtered<T, int64_t, uint32_t, DistT>(
+        res, idx, queries, &sample_filter, neighbors, distances);
+    } catch (const std::bad_cast&) {
+    }
 
-  try {
-    auto& sample_filter =
-      dynamic_cast<const cuvs::neighbors::filtering::bitmap_filter<const uint32_t, int64_t>&>(
-        sample_filter_ref);
-    if constexpr (std::is_same_v<LayoutT, raft::col_major>) {
-      RAFT_FAIL("filtered search isn't available with col_major queries yet");
-    } else {
-      cuvs::core::bitmap_view<const uint32_t, int64_t> sample_filter_view =
-        sample_filter.bitmap_view_;
+    try {
+      auto& sample_filter =
+        dynamic_cast<const cuvs::neighbors::filtering::bitset_filter<uint32_t, int64_t>&>(
+          sample_filter_ref);
       return brute_force_search_filtered<T, int64_t, uint32_t, DistT>(
-        res, idx, queries, sample_filter_view, neighbors, distances);
+        res, idx, queries, &sample_filter, neighbors, distances);
+    } catch (const std::bad_cast&) {
+      RAFT_FAIL("Unsupported sample filter type");
     }
-  } catch (const std::bad_cast&) {
-    RAFT_FAIL("Unsupported sample filter type");
   }
 }
 
diff --git a/cpp/src/neighbors/detail/vamana/greedy_search.cuh b/cpp/src/neighbors/detail/vamana/greedy_search.cuh
index f51c6c91b..4d94bbaa7 100644
--- a/cpp/src/neighbors/detail/vamana/greedy_search.cuh
+++ b/cpp/src/neighbors/detail/vamana/greedy_search.cuh
@@ -30,7 +30,7 @@
 #include <cstdio>
 #include <vector>
 
-namespace cuvs::neighbors::experimental::vamana::detail {
+namespace cuvs::neighbors::vamana::detail {
 
 /* @defgroup greedy_search_detail greedy search
  * @{
@@ -112,13 +112,15 @@ __global__ void GreedySearchKernel(
     DistPair<IdxT, accT> candidate_queue;
   };
 
+  int align_padding = (((dim - 1) / alignof(ShmemLayout)) + 1) * alignof(ShmemLayout) - dim;
+
   // Dynamic shared memory used for blocksort, temp vector storage, and neighborhood list
   extern __shared__ __align__(alignof(ShmemLayout)) char smem[];
 
   size_t smem_offset = sort_smem_size;  // temp sorting memory takes first chunk
 
   T* s_coords = reinterpret_cast<T*>(&smem[smem_offset]);
-  smem_offset += dim * sizeof(T);
+  smem_offset += (dim + align_padding) * sizeof(T);
 
   Node<accT>* topk_pq = reinterpret_cast<Node<accT>*>(&smem[smem_offset]);
   smem_offset += topk * sizeof(Node<accT>);
@@ -283,4 +285,4 @@ __global__ void GreedySearchKernel(
  * @}
  */
 
-}  // namespace cuvs::neighbors::experimental::vamana::detail
+}  // namespace cuvs::neighbors::vamana::detail
diff --git a/cpp/src/neighbors/detail/vamana/macros.cuh b/cpp/src/neighbors/detail/vamana/macros.cuh
index 5692650a0..c290413a2 100644
--- a/cpp/src/neighbors/detail/vamana/macros.cuh
+++ b/cpp/src/neighbors/detail/vamana/macros.cuh
@@ -16,7 +16,7 @@
 
 #pragma once
 
-namespace cuvs::neighbors::experimental::vamana::detail {
+namespace cuvs::neighbors::vamana::detail {
 
 /* Macros to compute the shared memory requirements for CUB primitives used by search and prune */
 #define COMPUTE_SMEM_SIZES(degree, visited_size, DEG, CANDS)                                     \
@@ -79,4 +79,4 @@ namespace cuvs::neighbors::experimental::vamana::detail {
   SEARCH_CALL_SORT(topk, 512);   \
   SEARCH_CALL_SORT(topk, 1024);
 
-}  // namespace cuvs::neighbors::experimental::vamana::detail
+}  // namespace cuvs::neighbors::vamana::detail
diff --git a/cpp/src/neighbors/detail/vamana/priority_queue.cuh b/cpp/src/neighbors/detail/vamana/priority_queue.cuh
index 4b3bd8466..6dc1dc94a 100644
--- a/cpp/src/neighbors/detail/vamana/priority_queue.cuh
+++ b/cpp/src/neighbors/detail/vamana/priority_queue.cuh
@@ -20,7 +20,7 @@
 #include <raft/util/warp_primitives.cuh>
 #include <stdio.h>
 
-namespace cuvs::neighbors::experimental::vamana::detail {
+namespace cuvs::neighbors::vamana::detail {
 
 /***************************************************************************************
 ***************************************************************************************/
@@ -326,4 +326,4 @@ __forceinline__ __device__ void enqueue_all_neighbors(int num_neighbors,
   }
 }
 
-}  // namespace cuvs::neighbors::experimental::vamana::detail
+}  // namespace cuvs::neighbors::vamana::detail
diff --git a/cpp/src/neighbors/detail/vamana/robust_prune.cuh b/cpp/src/neighbors/detail/vamana/robust_prune.cuh
index 8446ac136..182d20c88 100644
--- a/cpp/src/neighbors/detail/vamana/robust_prune.cuh
+++ b/cpp/src/neighbors/detail/vamana/robust_prune.cuh
@@ -19,10 +19,12 @@
 #include <cub/cub.cuh>
 #include <thrust/sort.h>
 
+#include <raft/util/cuda_dev_essentials.cuh>
+
 #include "macros.cuh"
 #include "vamana_structs.cuh"
 
-namespace cuvs::neighbors::experimental::vamana::detail {
+namespace cuvs::neighbors::vamana::detail {
 
 // Load candidates (from query) and previous edges (from nbh_list) into registers (tmp) spanning
 // warp
@@ -145,9 +147,11 @@ __global__ void RobustPruneKernel(
   // Dynamic shared memory used for blocksort, temp vector storage, and neighborhood list
   extern __shared__ __align__(alignof(ShmemLayout)) char smem[];
 
-  T* s_coords = reinterpret_cast<T*>(&smem[sort_smem_size]);
-  DistPair<IdxT, accT>* new_nbh_list =
-    reinterpret_cast<DistPair<IdxT, accT>*>(&smem[dim * sizeof(T) + sort_smem_size]);
+  int align_padding = raft::alignTo<int>(dim, alignof(ShmemLayout)) - dim;
+
+  T* s_coords                        = reinterpret_cast<T*>(&smem[sort_smem_size]);
+  DistPair<IdxT, accT>* new_nbh_list = reinterpret_cast<DistPair<IdxT, accT>*>(
+    &smem[(dim + align_padding) * sizeof(T) + sort_smem_size]);
 
   static __shared__ Point<T, accT> s_query;
   s_query.coords = s_coords;
@@ -245,4 +249,4 @@ __global__ void RobustPruneKernel(
 
 }  // namespace
 
-}  // namespace cuvs::neighbors::experimental::vamana::detail
+}  // namespace cuvs::neighbors::vamana::detail
diff --git a/cpp/src/neighbors/detail/vamana/vamana_build.cuh b/cpp/src/neighbors/detail/vamana/vamana_build.cuh
index ec75c99c1..184b024f8 100644
--- a/cpp/src/neighbors/detail/vamana/vamana_build.cuh
+++ b/cpp/src/neighbors/detail/vamana/vamana_build.cuh
@@ -46,7 +46,7 @@
 #include <cstdio>
 #include <vector>
 
-namespace cuvs::neighbors::experimental::vamana::detail {
+namespace cuvs::neighbors::vamana::detail {
 
 /* @defgroup vamana_build_detail vamana build
  * @{
@@ -104,11 +104,12 @@ void batched_insert_vamana(
       "to 1.0");
     max_batchsize = (int)dataset.extent(0);
   }
-  int insert_iters = (int)(params.vamana_iters);
-  double base      = (double)(params.batch_base);
-  float alpha      = (float)(params.alpha);
-  int visited_size = params.visited_size;
-  int queue_size   = params.queue_size;
+  int insert_iters  = (int)(params.vamana_iters);
+  double base       = (double)(params.batch_base);
+  float alpha       = (float)(params.alpha);
+  int visited_size  = params.visited_size;
+  int queue_size    = params.queue_size;
+  int reverse_batch = params.reverse_batchsize;
 
   if ((visited_size & (visited_size - 1)) != 0) {
     RAFT_LOG_WARN("visited_size must be a power of 2, rounding up.");
@@ -152,36 +153,20 @@ void batched_insert_vamana(
   std::vector<IdxT> insert_order;
   create_insert_permutation<IdxT>(insert_order, (uint32_t)N);
 
-  // Memory needed to sort reverse edges - potentially large memory footprint
-  auto edge_dest =
-    raft::make_device_mdarray<IdxT>(res,
-                                    raft::resource::get_large_workspace_resource(res),
-                                    raft::make_extents<int64_t>(max_batchsize, degree));
-  auto edge_src =
-    raft::make_device_mdarray<IdxT>(res,
-                                    raft::resource::get_large_workspace_resource(res),
-                                    raft::make_extents<int64_t>(max_batchsize, degree));
-
-  size_t temp_storage_bytes = max_batchsize * degree * (2 * sizeof(IdxT));
-  RAFT_LOG_DEBUG("Temp storage needed for sorting (bytes): %lu", temp_storage_bytes);
-  auto temp_sort_storage =
-    raft::make_device_mdarray<IdxT>(res,
-                                    raft::resource::get_large_workspace_resource(res),
-                                    raft::make_extents<int64_t>(2 * max_batchsize, degree));
-
   // Calculate the shared memory sizes of each kernel
   int search_smem_sort_size = 0;
   int prune_smem_sort_size  = 0;
   SELECT_SMEM_SIZES(degree, visited_size);  // Sets above 2 variables to appropriate sizes
 
   // Total dynamic shared memory used by GreedySearch
-  int search_smem_total_size =
-    static_cast<int>(search_smem_sort_size + dim * sizeof(T) + visited_size * sizeof(Node<accT>) +
-                     degree * sizeof(int) + queue_size * sizeof(DistPair<IdxT, accT>));
+  int align_padding          = raft::alignTo(dim, 16) - dim;
+  int search_smem_total_size = static_cast<int>(
+    search_smem_sort_size + (dim + align_padding) * sizeof(T) + visited_size * sizeof(Node<accT>) +
+    degree * sizeof(int) + queue_size * sizeof(DistPair<IdxT, accT>));
 
   // Total dynamic shared memory size needed by both RobustPrune calls
-  int prune_smem_total_size =
-    prune_smem_sort_size + dim * sizeof(T) + (degree + visited_size) * sizeof(DistPair<IdxT, accT>);
+  int prune_smem_total_size = prune_smem_sort_size + (dim + align_padding) * sizeof(T) +
+                              (degree + visited_size) * sizeof(DistPair<IdxT, accT>);
 
   RAFT_LOG_DEBUG("Dynamic shared memory usage (bytes): GreedySearch: %d, RobustPrune: %d",
                  search_smem_total_size,
@@ -228,7 +213,6 @@ void batched_insert_vamana(
                                                                  metric,
                                                                  queue_size,
                                                                  search_smem_sort_size);
-
       // Run on candidates of vectors being inserted
       RobustPruneKernel<T, accT, IdxT>
         <<<num_blocks, blockD, prune_smem_total_size, stream>>>(d_graph.view(),
@@ -252,6 +236,16 @@ void batched_insert_vamana(
 
       int total_edges;
       raft::copy(&total_edges, d_total_edges.data_handle(), 1, stream);
+      RAFT_CUDA_TRY(cudaStreamSynchronize(stream));
+
+      auto edge_dest =
+        raft::make_device_mdarray<IdxT>(res,
+                                        raft::resource::get_large_workspace_resource(res),
+                                        raft::make_extents<int64_t>(total_edges));
+      auto edge_src =
+        raft::make_device_mdarray<IdxT>(res,
+                                        raft::resource::get_large_workspace_resource(res),
+                                        raft::make_extents<int64_t>(total_edges));
 
       // Create reverse edge list
       create_reverse_edge_list<accT, IdxT>
@@ -261,6 +255,24 @@ void batched_insert_vamana(
                                             edge_src.data_handle(),
                                             edge_dest.data_handle());
 
+      void* d_temp_storage      = nullptr;
+      size_t temp_storage_bytes = 0;
+
+      cub::DeviceMergeSort::SortPairs(d_temp_storage,
+                                      temp_storage_bytes,
+                                      edge_dest.data_handle(),
+                                      edge_src.data_handle(),
+                                      total_edges,
+                                      CmpEdge<IdxT>(),
+                                      stream);
+
+      RAFT_LOG_DEBUG("Temp storage needed for sorting (bytes): %lu", temp_storage_bytes);
+
+      auto temp_sort_storage = raft::make_device_mdarray<IdxT>(
+        res,
+        raft::resource::get_large_workspace_resource(res),
+        raft::make_extents<int64_t>(temp_storage_bytes / sizeof(IdxT)));
+
       // Sort to group reverse edges by destination
       cub::DeviceMergeSort::SortPairs(temp_sort_storage.data_handle(),
                                       temp_storage_bytes,
@@ -279,64 +291,77 @@ void batched_insert_vamana(
                                                 edge_dest.data_handle() + total_edges);
       auto unique_indices = raft::make_device_vector<int>(res, total_edges);
       raft::linalg::map_offset(res, unique_indices.view(), raft::identity_op{});
+
       thrust::unique_by_key(
         edge_dest_vec.begin(), edge_dest_vec.end(), unique_indices.data_handle());
 
-      // Allocate reverse QueryCandidate list based on number of unique destinations
-      // TODO - Do this in batches to reduce memory footprint / support larger datasets
-      auto reverse_list_ptr = raft::make_device_mdarray<QueryCandidates<IdxT, accT>>(
-        res,
-        raft::resource::get_large_workspace_resource(res),
-        raft::make_extents<int64_t>(unique_dests));
-      auto rev_ids =
-        raft::make_device_mdarray<IdxT>(res,
-                                        raft::resource::get_large_workspace_resource(res),
-                                        raft::make_extents<int64_t>(unique_dests, visited_size));
-      auto rev_dists =
-        raft::make_device_mdarray<accT>(res,
-                                        raft::resource::get_large_workspace_resource(res),
-                                        raft::make_extents<int64_t>(unique_dests, visited_size));
-
-      QueryCandidates<IdxT, accT>* reverse_list =
-        static_cast<QueryCandidates<IdxT, accT>*>(reverse_list_ptr.data_handle());
-
-      init_query_candidate_list<IdxT, accT><<<256, blockD, 0, stream>>>(reverse_list,
-                                                                        rev_ids.data_handle(),
-                                                                        rev_dists.data_handle(),
-                                                                        (int)unique_dests,
-                                                                        visited_size);
-
-      // May need more blocks for reverse list
-      num_blocks = min(maxBlocks, unique_dests);
-
-      // Populate reverse list ids and candidate lists from edge_src and edge_dest
-      populate_reverse_list_struct<T, accT, IdxT>
-        <<<num_blocks, blockD, 0, stream>>>(reverse_list,
-                                            edge_src.data_handle(),
-                                            edge_dest.data_handle(),
-                                            unique_indices.data_handle(),
-                                            unique_dests,
-                                            total_edges,
-                                            dataset.extent(0));
-
-      // Recompute distances (avoided keeping it during sorting)
-      recompute_reverse_dists<T, accT, IdxT>
-        <<<num_blocks, blockD, 0, stream>>>(reverse_list, dataset, unique_dests, metric);
-
-      // Call 2nd RobustPrune on reverse query_list
-      RobustPruneKernel<T, accT, IdxT>
-        <<<num_blocks, blockD, prune_smem_total_size, stream>>>(d_graph.view(),
-                                                                raft::make_const_mdspan(dataset),
-                                                                reverse_list_ptr.data_handle(),
-                                                                unique_dests,
-                                                                visited_size,
-                                                                metric,
-                                                                alpha,
-                                                                prune_smem_sort_size);
-
-      // Write new edge lists to graph
-      write_graph_edges_kernel<accT, IdxT><<<num_blocks, blockD, 0, stream>>>(
-        d_graph.view(), reverse_list_ptr.data_handle(), degree, unique_dests);
+      edge_dest_vec.clear();
+      edge_dest_vec.shrink_to_fit();
+
+      // Batch execution of reverse edge creation/application
+      reverse_batch = params.reverse_batchsize;
+      for (int rev_start = 0; rev_start < (int)unique_dests; rev_start += reverse_batch) {
+        if (rev_start + reverse_batch > (int)unique_dests) {
+          reverse_batch = (int)unique_dests - rev_start;
+        }
+
+        // Allocate reverse QueryCandidate list based on number of unique destinations
+        auto reverse_list_ptr = raft::make_device_mdarray<QueryCandidates<IdxT, accT>>(
+          res,
+          raft::resource::get_large_workspace_resource(res),
+          raft::make_extents<int64_t>(reverse_batch));
+        auto rev_ids =
+          raft::make_device_mdarray<IdxT>(res,
+                                          raft::resource::get_large_workspace_resource(res),
+                                          raft::make_extents<int64_t>(reverse_batch, visited_size));
+        auto rev_dists =
+          raft::make_device_mdarray<accT>(res,
+                                          raft::resource::get_large_workspace_resource(res),
+                                          raft::make_extents<int64_t>(reverse_batch, visited_size));
+
+        QueryCandidates<IdxT, accT>* reverse_list =
+          static_cast<QueryCandidates<IdxT, accT>*>(reverse_list_ptr.data_handle());
+
+        init_query_candidate_list<IdxT, accT><<<256, blockD, 0, stream>>>(reverse_list,
+                                                                          rev_ids.data_handle(),
+                                                                          rev_dists.data_handle(),
+                                                                          (int)reverse_batch,
+                                                                          visited_size);
+
+        // May need more blocks for reverse list
+        num_blocks = min(maxBlocks, reverse_batch);
+
+        // Populate reverse list ids and candidate lists from edge_src and edge_dest
+        populate_reverse_list_struct<T, accT, IdxT>
+          <<<num_blocks, blockD, 0, stream>>>(reverse_list,
+                                              edge_src.data_handle(),
+                                              edge_dest.data_handle(),
+                                              unique_indices.data_handle(),
+                                              unique_dests,
+                                              total_edges,
+                                              dataset.extent(0),
+                                              rev_start,
+                                              reverse_batch);
+
+        // Recompute distances (avoided keeping it during sorting)
+        recompute_reverse_dists<T, accT, IdxT>
+          <<<num_blocks, blockD, 0, stream>>>(reverse_list, dataset, reverse_batch, metric);
+
+        // Call 2nd RobustPrune on reverse query_list
+        RobustPruneKernel<T, accT, IdxT>
+          <<<num_blocks, blockD, prune_smem_total_size, stream>>>(d_graph.view(),
+                                                                  raft::make_const_mdspan(dataset),
+                                                                  reverse_list_ptr.data_handle(),
+                                                                  reverse_batch,
+                                                                  visited_size,
+                                                                  metric,
+                                                                  alpha,
+                                                                  prune_smem_sort_size);
+
+        // Write new edge lists to graph
+        write_graph_edges_kernel<accT, IdxT><<<num_blocks, blockD, 0, stream>>>(
+          d_graph.view(), reverse_list_ptr.data_handle(), degree, reverse_batch);
+      }
 
       start += step_size;
       step_size *= base;
@@ -371,8 +396,6 @@ index<T, IdxT> build(
   RAFT_EXPECTS(params.visited_size > graph_degree, "visited_size must be > graph_degree");
 
   int dim = dataset.extent(1);
-  // TODO - Fix issue with alignment when dataset dimension is odd
-  RAFT_EXPECTS(dim % 2 == 0, "Datasets with an odd number of dimensions not currently supported");
 
   RAFT_LOG_DEBUG("Creating empty graph structure");
   auto vamana_graph = raft::make_host_matrix<IdxT, int64_t>(dataset.extent(0), graph_degree);
@@ -405,4 +428,4 @@ index<T, IdxT> build(
  * @}
  */
 
-}  // namespace cuvs::neighbors::experimental::vamana::detail
+}  // namespace cuvs::neighbors::vamana::detail
diff --git a/cpp/src/neighbors/detail/vamana/vamana_serialize.cuh b/cpp/src/neighbors/detail/vamana/vamana_serialize.cuh
index c360ae19a..27a17205e 100644
--- a/cpp/src/neighbors/detail/vamana/vamana_serialize.cuh
+++ b/cpp/src/neighbors/detail/vamana/vamana_serialize.cuh
@@ -34,7 +34,7 @@
 #include <fstream>
 #include <type_traits>
 
-namespace cuvs::neighbors::experimental::vamana::detail {
+namespace cuvs::neighbors::vamana::detail {
 
 /**
  * Save the index to file.
@@ -117,4 +117,4 @@ void serialize(raft::resources const& res,
   if (!index_of) { RAFT_FAIL("Error writing output %s", file_name.c_str()); }
 }
 
-}  // namespace cuvs::neighbors::experimental::vamana::detail
+}  // namespace cuvs::neighbors::vamana::detail
diff --git a/cpp/src/neighbors/detail/vamana/vamana_structs.cuh b/cpp/src/neighbors/detail/vamana/vamana_structs.cuh
index f6f0279f7..22678c196 100644
--- a/cpp/src/neighbors/detail/vamana/vamana_structs.cuh
+++ b/cpp/src/neighbors/detail/vamana/vamana_structs.cuh
@@ -34,7 +34,7 @@
 
 #include <cuvs/distance/distance.hpp>
 
-namespace cuvs::neighbors::experimental::vamana::detail {
+namespace cuvs::neighbors::vamana::detail {
 
 /* @defgroup vamana_structures vamana structures
  * @{
@@ -170,7 +170,7 @@ __device__ SUMTYPE l2_ILP4(Point<T, SUMTYPE>* src_vec, Point<T, SUMTYPE>* dst_ve
     temp_dst[0] = dst_vec->coords[i];
     if (i + 32 < src_vec->Dim) temp_dst[1] = dst_vec->coords[i + 32];
     if (i + 64 < src_vec->Dim) temp_dst[2] = dst_vec->coords[i + 64];
-    if (i + 92 < src_vec->Dim) temp_dst[3] = dst_vec->coords[i + 96];
+    if (i + 96 < src_vec->Dim) temp_dst[3] = dst_vec->coords[i + 96];
 
     partial_sum[0] = fmaf(
       (src_vec[0].coords[i] - temp_dst[0]), (src_vec[0].coords[i] - temp_dst[0]), partial_sum[0]);
@@ -182,7 +182,7 @@ __device__ SUMTYPE l2_ILP4(Point<T, SUMTYPE>* src_vec, Point<T, SUMTYPE>* dst_ve
       partial_sum[2] = fmaf((src_vec[0].coords[i + 64] - temp_dst[2]),
                             (src_vec[0].coords[i + 64] - temp_dst[2]),
                             partial_sum[2]);
-    if (i + 92 < src_vec->Dim)
+    if (i + 96 < src_vec->Dim)
       partial_sum[3] = fmaf((src_vec[0].coords[i + 96] - temp_dst[3]),
                             (src_vec[0].coords[i + 96] - temp_dst[3]),
                             partial_sum[3]);
@@ -192,6 +192,7 @@ __device__ SUMTYPE l2_ILP4(Point<T, SUMTYPE>* src_vec, Point<T, SUMTYPE>* dst_ve
   for (int offset = 16; offset > 0; offset /= 2) {
     partial_sum[0] += __shfl_down_sync(FULL_BITMASK, partial_sum[0], offset);
   }
+
   return partial_sum[0];
 }
 
@@ -419,22 +420,24 @@ __global__ void populate_reverse_list_struct(QueryCandidates<IdxT, accT>* revers
                                              int* unique_indices,
                                              int unique_dests,
                                              int total_edges,
-                                             int N)
+                                             int N,
+                                             int rev_start,
+                                             int reverse_batch)
 {
-  for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < unique_dests;
+  for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < reverse_batch;
        i += blockDim.x * gridDim.x) {
-    reverse_list[i].queryId = edge_dest[unique_indices[i]];
-    if (i == unique_dests - 1) {
-      reverse_list[i].size = total_edges - unique_indices[i];
+    reverse_list[i].queryId = edge_dest[unique_indices[i + rev_start]];
+    if (rev_start + i == unique_dests - 1) {
+      reverse_list[i].size = total_edges - unique_indices[i + rev_start];
     } else {
-      reverse_list[i].size = unique_indices[i + 1] - unique_indices[i];
+      reverse_list[i].size = unique_indices[i + rev_start + 1] - unique_indices[i + rev_start];
     }
     if (reverse_list[i].size > reverse_list[i].maxSize) {
       reverse_list[i].size = reverse_list[i].maxSize;
     }
 
     for (int j = 0; j < reverse_list[i].size; j++) {
-      reverse_list[i].ids[j] = edge_src[unique_indices[i] + j];
+      reverse_list[i].ids[j] = edge_src[unique_indices[i + rev_start] + j];
     }
     for (int j = reverse_list[i].size; j < reverse_list[i].maxSize; j++) {
       reverse_list[i].ids[j]   = raft::upper_bound<IdxT>();
@@ -475,4 +478,4 @@ __global__ void recompute_reverse_dists(
  * @}
  */
 
-}  // namespace cuvs::neighbors::experimental::vamana::detail
+}  // namespace cuvs::neighbors::vamana::detail
diff --git a/cpp/src/neighbors/hnsw_c.cpp b/cpp/src/neighbors/hnsw_c.cpp
index 0233a510a..628d87e00 100644
--- a/cpp/src/neighbors/hnsw_c.cpp
+++ b/cpp/src/neighbors/hnsw_c.cpp
@@ -123,7 +123,7 @@ extern "C" cuvsError_t cuvsHnswIndexParamsCreate(cuvsHnswIndexParams_t* params)
 {
   return cuvs::core::translate_exceptions([=] {
     *params = new cuvsHnswIndexParams{
-      .hierarchy = cuvsHnswHierarchy::NONE, .ef_construction = 200, .num_threads = 2};
+      .hierarchy = cuvsHnswHierarchy::NONE, .ef_construction = 200, .num_threads = 0};
   });
 }
 
diff --git a/cpp/src/neighbors/ivf_flat/generate_ivf_flat.py b/cpp/src/neighbors/ivf_flat/generate_ivf_flat.py
index c435cc6d9..39baeeafa 100644
--- a/cpp/src/neighbors/ivf_flat/generate_ivf_flat.py
+++ b/cpp/src/neighbors/ivf_flat/generate_ivf_flat.py
@@ -136,7 +136,7 @@
   {                                                                                  \\
     cuvs::neighbors::ivf_flat::detail::extend(                                       \\
       handle, new_vectors, new_indices, idx);                                        \\
-  }                    
+  }
 """
 
 search_macro = """
diff --git a/cpp/src/neighbors/ivf_list.cuh b/cpp/src/neighbors/ivf_list.cuh
index c6335b114..9d8aef503 100644
--- a/cpp/src/neighbors/ivf_list.cuh
+++ b/cpp/src/neighbors/ivf_list.cuh
@@ -192,4 +192,4 @@ enable_if_valid_list_t<ListT> deserialize_list(const raft::resources& handle,
   // Make sure the data is copied from host to device before the host arrays get out of the scope.
   raft::resource::sync_stream(handle);
 }
-}  // namespace cuvs::neighbors::ivf
\ No newline at end of file
+}  // namespace cuvs::neighbors::ivf
diff --git a/cpp/src/neighbors/ivf_pq/ivf_pq_build.cuh b/cpp/src/neighbors/ivf_pq/ivf_pq_build.cuh
index 44a1b11fa..0e492da6c 100644
--- a/cpp/src/neighbors/ivf_pq/ivf_pq_build.cuh
+++ b/cpp/src/neighbors/ivf_pq/ivf_pq_build.cuh
@@ -1696,7 +1696,7 @@ auto build(raft::resources const& handle,
                 "Unsupported data type");
 
   std::cout << "using ivf_pq::index_params nrows " << (int)dataset.extent(0) << ", dim "
-            << (int)dataset.extent(1) << ", n_lits " << (int)params.n_lists << ", pq_dim "
+            << (int)dataset.extent(1) << ", n_lists " << (int)params.n_lists << ", pq_dim "
             << (int)params.pq_dim << std::endl;
   RAFT_EXPECTS(n_rows > 0 && dim > 0, "empty dataset");
   RAFT_EXPECTS(n_rows >= params.n_lists, "number of rows can't be less than n_lists");
diff --git a/cpp/src/neighbors/nn_descent_index.cpp b/cpp/src/neighbors/nn_descent_index.cpp
index 25d5b6af8..3fe69e775 100644
--- a/cpp/src/neighbors/nn_descent_index.cpp
+++ b/cpp/src/neighbors/nn_descent_index.cpp
@@ -26,4 +26,4 @@ index_params::index_params(size_t graph_degree, cuvs::distance::DistanceType met
   this->intermediate_graph_degree = 1.5 * graph_degree;
   this->metric                    = metric;
 }
-}  // namespace cuvs::neighbors::nn_descent
\ No newline at end of file
+}  // namespace cuvs::neighbors::nn_descent
diff --git a/cpp/src/neighbors/refine/detail/refine_host_half_float.cpp b/cpp/src/neighbors/refine/detail/refine_host_half_float.cpp
index ed9c41208..4c9378dfc 100644
--- a/cpp/src/neighbors/refine/detail/refine_host_half_float.cpp
+++ b/cpp/src/neighbors/refine/detail/refine_host_half_float.cpp
@@ -34,4 +34,4 @@
 
 instantiate_cuvs_neighbors_refine(int64_t, half, float, int64_t);
 
-#undef instantiate_cuvs_neighbors_refine
\ No newline at end of file
+#undef instantiate_cuvs_neighbors_refine
diff --git a/cpp/src/neighbors/refine/refine_device.cuh b/cpp/src/neighbors/refine/refine_device.cuh
index a5491be0d..606ba58e3 100644
--- a/cpp/src/neighbors/refine/refine_device.cuh
+++ b/cpp/src/neighbors/refine/refine_device.cuh
@@ -159,4 +159,4 @@ void refine_impl(
 {
   detail::refine_device(handle, dataset, queries, neighbor_candidates, indices, distances, metric);
 }
-}  // namespace cuvs::neighbors
\ No newline at end of file
+}  // namespace cuvs::neighbors
diff --git a/cpp/src/neighbors/refine/refine_host.hpp b/cpp/src/neighbors/refine/refine_host.hpp
index dcaccd5a8..105023d59 100644
--- a/cpp/src/neighbors/refine/refine_host.hpp
+++ b/cpp/src/neighbors/refine/refine_host.hpp
@@ -211,4 +211,4 @@ void refine_impl(
   detail::refine_host(dataset, queries, neighbor_candidates, indices, distances, metric);
 }
 
-}  // namespace cuvs::neighbors
\ No newline at end of file
+}  // namespace cuvs::neighbors
diff --git a/cpp/src/neighbors/sample_filter.cuh b/cpp/src/neighbors/sample_filter.cuh
index 258116ed3..b0c61f924 100644
--- a/cpp/src/neighbors/sample_filter.cuh
+++ b/cpp/src/neighbors/sample_filter.cuh
@@ -20,6 +20,7 @@
 #include <raft/core/bitmap.cuh>
 #include <raft/core/bitset.cuh>
 #include <raft/core/detail/macros.hpp>
+#include <raft/sparse/convert/csr.cuh>
 
 #include <cstddef>
 #include <cstdint>
@@ -108,6 +109,13 @@ inline _RAFT_HOST_DEVICE bool bitset_filter<bitset_t, index_t>::operator()(
   return bitset_view_.test(sample_ix);
 }
 
+template <typename bitset_t, typename index_t>
+template <typename csr_matrix_t>
+void bitset_filter<bitset_t, index_t>::to_csr(raft::resources const& handle, csr_matrix_t& csr)
+{
+  raft::sparse::convert::bitset_to_csr(handle, bitset_view_, csr);
+}
+
 template <typename bitmap_t, typename index_t>
 bitmap_filter<bitmap_t, index_t>::bitmap_filter(
   const cuvs::core::bitmap_view<bitmap_t, index_t> bitmap_for_filtering)
@@ -124,4 +132,12 @@ inline _RAFT_HOST_DEVICE bool bitmap_filter<bitmap_t, index_t>::operator()(
 {
   return bitmap_view_.test(query_ix, sample_ix);
 }
+
+template <typename bitmap_t, typename index_t>
+template <typename csr_matrix_t>
+void bitmap_filter<bitmap_t, index_t>::to_csr(raft::resources const& handle, csr_matrix_t& csr)
+{
+  raft::sparse::convert::bitmap_to_csr(handle, bitmap_view_, csr);
+}
+
 }  // namespace cuvs::neighbors::filtering
diff --git a/cpp/src/neighbors/vamana.cuh b/cpp/src/neighbors/vamana.cuh
index 9b9e8d271..964d7a9a0 100644
--- a/cpp/src/neighbors/vamana.cuh
+++ b/cpp/src/neighbors/vamana.cuh
@@ -31,7 +31,7 @@
 
 #include <rmm/cuda_stream_view.hpp>
 
-namespace cuvs::neighbors::experimental::vamana {
+namespace cuvs::neighbors::vamana {
 
 /**
  * @defgroup VAMANA ANN Graph-based nearest neighbor search
@@ -85,8 +85,7 @@ index<T, IdxT> build(
   const index_params& params,
   raft::mdspan<const T, raft::matrix_extent<int64_t>, raft::row_major, Accessor> dataset)
 {
-  return cuvs::neighbors::experimental::vamana::detail::build<T, IdxT, Accessor>(
-    res, params, dataset);
+  return cuvs::neighbors::vamana::detail::build<T, IdxT, Accessor>(res, params, dataset);
 }
 
 template <typename T, typename IdxT>
@@ -94,9 +93,9 @@ void serialize(raft::resources const& res,
                const std::string& file_prefix,
                const index<T, IdxT>& index_)
 {
-  cuvs::neighbors::experimental::vamana::detail::build<T, IdxT>(res, file_prefix, index_);
+  cuvs::neighbors::vamana::detail::build<T, IdxT>(res, file_prefix, index_);
 }
 
 /** @} */  // end group vamana
 
-}  // namespace cuvs::neighbors::experimental::vamana
+}  // namespace cuvs::neighbors::vamana
diff --git a/cpp/src/neighbors/vamana_build_float.cu b/cpp/src/neighbors/vamana_build_float.cu
index b83af6122..0e09d6399 100644
--- a/cpp/src/neighbors/vamana_build_float.cu
+++ b/cpp/src/neighbors/vamana_build_float.cu
@@ -17,27 +17,27 @@
 #include "vamana.cuh"
 #include <cuvs/neighbors/vamana.hpp>
 
-namespace cuvs::neighbors::experimental::vamana {
+namespace cuvs::neighbors::vamana {
 
-#define RAFT_INST_VAMANA_BUILD(T, IdxT)                                                    \
-  auto build(raft::resources const& handle,                                                \
-             const cuvs::neighbors::experimental::vamana::index_params& params,            \
-             raft::device_matrix_view<const T, int64_t, raft::row_major> dataset)          \
-    ->cuvs::neighbors::experimental::vamana::index<T, IdxT>                                \
-  {                                                                                        \
-    return cuvs::neighbors::experimental::vamana::build<T, IdxT>(handle, params, dataset); \
-  }                                                                                        \
-                                                                                           \
-  auto build(raft::resources const& handle,                                                \
-             const cuvs::neighbors::experimental::vamana::index_params& params,            \
-             raft::host_matrix_view<const T, int64_t, raft::row_major> dataset)            \
-    ->cuvs::neighbors::experimental::vamana::index<T, IdxT>                                \
-  {                                                                                        \
-    return cuvs::neighbors::experimental::vamana::build<T, IdxT>(handle, params, dataset); \
+#define RAFT_INST_VAMANA_BUILD(T, IdxT)                                           \
+  auto build(raft::resources const& handle,                                       \
+             const cuvs::neighbors::vamana::index_params& params,                 \
+             raft::device_matrix_view<const T, int64_t, raft::row_major> dataset) \
+    ->cuvs::neighbors::vamana::index<T, IdxT>                                     \
+  {                                                                               \
+    return cuvs::neighbors::vamana::build<T, IdxT>(handle, params, dataset);      \
+  }                                                                               \
+                                                                                  \
+  auto build(raft::resources const& handle,                                       \
+             const cuvs::neighbors::vamana::index_params& params,                 \
+             raft::host_matrix_view<const T, int64_t, raft::row_major> dataset)   \
+    ->cuvs::neighbors::vamana::index<T, IdxT>                                     \
+  {                                                                               \
+    return cuvs::neighbors::vamana::build<T, IdxT>(handle, params, dataset);      \
   }
 
 RAFT_INST_VAMANA_BUILD(float, uint32_t);
 
 #undef RAFT_INST_VAMANA_BUILD
 
-}  // namespace cuvs::neighbors::experimental::vamana
+}  // namespace cuvs::neighbors::vamana
diff --git a/cpp/src/neighbors/vamana_build_int8.cu b/cpp/src/neighbors/vamana_build_int8.cu
index 91d2cf028..f70b9ea27 100644
--- a/cpp/src/neighbors/vamana_build_int8.cu
+++ b/cpp/src/neighbors/vamana_build_int8.cu
@@ -17,27 +17,27 @@
 #include "vamana.cuh"
 #include <cuvs/neighbors/vamana.hpp>
 
-namespace cuvs::neighbors::experimental::vamana {
+namespace cuvs::neighbors::vamana {
 
-#define RAFT_INST_VAMANA_BUILD(T, IdxT)                                                    \
-  auto build(raft::resources const& handle,                                                \
-             const cuvs::neighbors::experimental::vamana::index_params& params,            \
-             raft::device_matrix_view<const T, int64_t, raft::row_major> dataset)          \
-    ->cuvs::neighbors::experimental::vamana::index<T, IdxT>                                \
-  {                                                                                        \
-    return cuvs::neighbors::experimental::vamana::build<T, IdxT>(handle, params, dataset); \
-  }                                                                                        \
-                                                                                           \
-  auto build(raft::resources const& handle,                                                \
-             const cuvs::neighbors::experimental::vamana::index_params& params,            \
-             raft::host_matrix_view<const T, int64_t, raft::row_major> dataset)            \
-    ->cuvs::neighbors::experimental::vamana::index<T, IdxT>                                \
-  {                                                                                        \
-    return cuvs::neighbors::experimental::vamana::build<T, IdxT>(handle, params, dataset); \
+#define RAFT_INST_VAMANA_BUILD(T, IdxT)                                           \
+  auto build(raft::resources const& handle,                                       \
+             const cuvs::neighbors::vamana::index_params& params,                 \
+             raft::device_matrix_view<const T, int64_t, raft::row_major> dataset) \
+    ->cuvs::neighbors::vamana::index<T, IdxT>                                     \
+  {                                                                               \
+    return cuvs::neighbors::vamana::build<T, IdxT>(handle, params, dataset);      \
+  }                                                                               \
+                                                                                  \
+  auto build(raft::resources const& handle,                                       \
+             const cuvs::neighbors::vamana::index_params& params,                 \
+             raft::host_matrix_view<const T, int64_t, raft::row_major> dataset)   \
+    ->cuvs::neighbors::vamana::index<T, IdxT>                                     \
+  {                                                                               \
+    return cuvs::neighbors::vamana::build<T, IdxT>(handle, params, dataset);      \
   }
 
 RAFT_INST_VAMANA_BUILD(int8_t, uint32_t);
 
 #undef RAFT_INST_VAMANA_BUILD
 
-}  // namespace cuvs::neighbors::experimental::vamana
+}  // namespace cuvs::neighbors::vamana
diff --git a/cpp/src/neighbors/vamana_build_uint8.cu b/cpp/src/neighbors/vamana_build_uint8.cu
index bba93e7f4..8daf0c065 100644
--- a/cpp/src/neighbors/vamana_build_uint8.cu
+++ b/cpp/src/neighbors/vamana_build_uint8.cu
@@ -17,27 +17,27 @@
 #include "vamana.cuh"
 #include <cuvs/neighbors/vamana.hpp>
 
-namespace cuvs::neighbors::experimental::vamana {
+namespace cuvs::neighbors::vamana {
 
-#define RAFT_INST_VAMANA_BUILD(T, IdxT)                                                    \
-  auto build(raft::resources const& handle,                                                \
-             const cuvs::neighbors::experimental::vamana::index_params& params,            \
-             raft::device_matrix_view<const T, int64_t, raft::row_major> dataset)          \
-    ->cuvs::neighbors::experimental::vamana::index<T, IdxT>                                \
-  {                                                                                        \
-    return cuvs::neighbors::experimental::vamana::build<T, IdxT>(handle, params, dataset); \
-  }                                                                                        \
-                                                                                           \
-  auto build(raft::resources const& handle,                                                \
-             const cuvs::neighbors::experimental::vamana::index_params& params,            \
-             raft::host_matrix_view<const T, int64_t, raft::row_major> dataset)            \
-    ->cuvs::neighbors::experimental::vamana::index<T, IdxT>                                \
-  {                                                                                        \
-    return cuvs::neighbors::experimental::vamana::build<T, IdxT>(handle, params, dataset); \
+#define RAFT_INST_VAMANA_BUILD(T, IdxT)                                           \
+  auto build(raft::resources const& handle,                                       \
+             const cuvs::neighbors::vamana::index_params& params,                 \
+             raft::device_matrix_view<const T, int64_t, raft::row_major> dataset) \
+    ->cuvs::neighbors::vamana::index<T, IdxT>                                     \
+  {                                                                               \
+    return cuvs::neighbors::vamana::build<T, IdxT>(handle, params, dataset);      \
+  }                                                                               \
+                                                                                  \
+  auto build(raft::resources const& handle,                                       \
+             const cuvs::neighbors::vamana::index_params& params,                 \
+             raft::host_matrix_view<const T, int64_t, raft::row_major> dataset)   \
+    ->cuvs::neighbors::vamana::index<T, IdxT>                                     \
+  {                                                                               \
+    return cuvs::neighbors::vamana::build<T, IdxT>(handle, params, dataset);      \
   }
 
 RAFT_INST_VAMANA_BUILD(uint8_t, uint32_t);
 
 #undef RAFT_INST_VAMANA_BUILD
 
-}  // namespace cuvs::neighbors::experimental::vamana
+}  // namespace cuvs::neighbors::vamana
diff --git a/cpp/src/neighbors/vamana_serialize.cuh b/cpp/src/neighbors/vamana_serialize.cuh
index a49d267b3..b8cb580a8 100644
--- a/cpp/src/neighbors/vamana_serialize.cuh
+++ b/cpp/src/neighbors/vamana_serialize.cuh
@@ -18,7 +18,7 @@
 
 #include "detail/vamana/vamana_serialize.cuh"
 
-namespace cuvs::neighbors::experimental::vamana {
+namespace cuvs::neighbors::vamana {
 
 /**
  * @defgroup VAMANA graph serialize/derserialize
@@ -28,12 +28,11 @@ namespace cuvs::neighbors::experimental::vamana {
 #define CUVS_INST_VAMANA_SERIALIZE(DTYPE)                                                     \
   void serialize(raft::resources const& handle,                                               \
                  const std::string& file_prefix,                                              \
-                 const cuvs::neighbors::experimental::vamana::index<DTYPE, uint32_t>& index_) \
+                 const cuvs::neighbors::vamana::index<DTYPE, uint32_t>& index_)               \
   {                                                                                           \
-    cuvs::neighbors::experimental::vamana::detail::serialize<DTYPE, uint32_t>(                \
-      handle, file_prefix, index_);                                                           \
+    cuvs::neighbors::vamana::detail::serialize<DTYPE, uint32_t>(handle, file_prefix, index_); \
   };
 
 /** @} */  // end group vamana
 
-}  // namespace cuvs::neighbors::experimental::vamana
+}  // namespace cuvs::neighbors::vamana
diff --git a/cpp/src/neighbors/vamana_serialize_float.cu b/cpp/src/neighbors/vamana_serialize_float.cu
index f25369368..8bf7ceb1e 100644
--- a/cpp/src/neighbors/vamana_serialize_float.cu
+++ b/cpp/src/neighbors/vamana_serialize_float.cu
@@ -16,8 +16,8 @@
 
 #include "vamana_serialize.cuh"
 
-namespace cuvs::neighbors::experimental::vamana {
+namespace cuvs::neighbors::vamana {
 
 CUVS_INST_VAMANA_SERIALIZE(float);
 
-}  // namespace cuvs::neighbors::experimental::vamana
+}  // namespace cuvs::neighbors::vamana
diff --git a/cpp/src/neighbors/vamana_serialize_int8.cu b/cpp/src/neighbors/vamana_serialize_int8.cu
index 1cd54b198..0f87f67ce 100644
--- a/cpp/src/neighbors/vamana_serialize_int8.cu
+++ b/cpp/src/neighbors/vamana_serialize_int8.cu
@@ -16,8 +16,8 @@
 
 #include "vamana_serialize.cuh"
 
-namespace cuvs::neighbors::experimental::vamana {
+namespace cuvs::neighbors::vamana {
 
 CUVS_INST_VAMANA_SERIALIZE(int8_t);
 
-}  // namespace cuvs::neighbors::experimental::vamana
+}  // namespace cuvs::neighbors::vamana
diff --git a/cpp/src/neighbors/vamana_serialize_uint8.cu b/cpp/src/neighbors/vamana_serialize_uint8.cu
index 3e6d945b8..871c30506 100644
--- a/cpp/src/neighbors/vamana_serialize_uint8.cu
+++ b/cpp/src/neighbors/vamana_serialize_uint8.cu
@@ -16,8 +16,8 @@
 
 #include "vamana_serialize.cuh"
 
-namespace cuvs::neighbors::experimental::vamana {
+namespace cuvs::neighbors::vamana {
 
 CUVS_INST_VAMANA_SERIALIZE(uint8_t);
 
-}  // namespace cuvs::neighbors::experimental::vamana
+}  // namespace cuvs::neighbors::vamana
diff --git a/cpp/src/preprocessing/quantize/scalar.cu b/cpp/src/preprocessing/quantize/scalar.cu
index 9624ad4fe..68cba4cc8 100644
--- a/cpp/src/preprocessing/quantize/scalar.cu
+++ b/cpp/src/preprocessing/quantize/scalar.cu
@@ -71,4 +71,4 @@ CUVS_INST_QUANTIZATION(half, int8_t);
 
 #undef CUVS_INST_QUANTIZATION
 
-}  // namespace cuvs::preprocessing::quantize::scalar
\ No newline at end of file
+}  // namespace cuvs::preprocessing::quantize::scalar
diff --git a/cpp/src/preprocessing/quantize/scalar_c.cpp b/cpp/src/preprocessing/quantize/scalar_c.cpp
new file mode 100644
index 000000000..99880e426
--- /dev/null
+++ b/cpp/src/preprocessing/quantize/scalar_c.cpp
@@ -0,0 +1,209 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cstdint>
+#include <dlpack/dlpack.h>
+
+#include <cuvs/core/c_api.h>
+#include <cuvs/core/exceptions.hpp>
+#include <cuvs/core/interop.hpp>
+#include <cuvs/preprocessing/quantize/scalar.h>
+#include <cuvs/preprocessing/quantize/scalar.hpp>
+
+namespace {
+
+template <typename T>
+void _train(cuvsResources_t res,
+            cuvsScalarQuantizerParams params,
+            DLManagedTensor* dataset_tensor,
+            cuvsScalarQuantizer_t quantizer)
+{
+  auto dataset = dataset_tensor->dl_tensor;
+
+  auto res_ptr = reinterpret_cast<raft::resources*>(res);
+
+  auto quantizer_params     = cuvs::preprocessing::quantize::scalar::params();
+  quantizer_params.quantile = params.quantile;
+
+  cuvs::preprocessing::quantize::scalar::quantizer<T> ret;
+
+  if (cuvs::core::is_dlpack_device_compatible(dataset)) {
+    using mdspan_type = raft::device_matrix_view<T const, int64_t, raft::row_major>;
+    auto mds          = cuvs::core::from_dlpack<mdspan_type>(dataset_tensor);
+    ret = cuvs::preprocessing::quantize::scalar::train(*res_ptr, quantizer_params, mds);
+  } else if (cuvs::core::is_dlpack_host_compatible(dataset)) {
+    using mdspan_type = raft::host_matrix_view<T const, int64_t, raft::row_major>;
+    auto mds          = cuvs::core::from_dlpack<mdspan_type>(dataset_tensor);
+    ret = cuvs::preprocessing::quantize::scalar::train(*res_ptr, quantizer_params, mds);
+  } else {
+    RAFT_FAIL("dataset must be accessible on host or device memory");
+  }
+
+  quantizer->min_ = ret.min_;
+  quantizer->max_ = ret.max_;
+}
+
+template <typename T, typename OutputT = int8_t>
+void _transform(cuvsResources_t res,
+                cuvsScalarQuantizer_t quantizer_,
+                DLManagedTensor* dataset_tensor,
+                DLManagedTensor* out_tensor)
+{
+  auto res_ptr = reinterpret_cast<raft::resources*>(res);
+
+  cuvs::preprocessing::quantize::scalar::quantizer<T> quantizer;
+  quantizer.min_ = quantizer_->min_;
+  quantizer.max_ = quantizer_->max_;
+
+  auto dataset = dataset_tensor->dl_tensor;
+  if (cuvs::core::is_dlpack_device_compatible(dataset)) {
+    using mdspan_type     = raft::device_matrix_view<T const, int64_t, raft::row_major>;
+    using out_mdspan_type = raft::device_matrix_view<OutputT, int64_t, raft::row_major>;
+
+    cuvs::preprocessing::quantize::scalar::transform(
+      *res_ptr,
+      quantizer,
+      cuvs::core::from_dlpack<mdspan_type>(dataset_tensor),
+      cuvs::core::from_dlpack<out_mdspan_type>(out_tensor));
+
+  } else if (cuvs::core::is_dlpack_host_compatible(dataset)) {
+    using mdspan_type     = raft::host_matrix_view<T const, int64_t, raft::row_major>;
+    using out_mdspan_type = raft::host_matrix_view<OutputT, int64_t, raft::row_major>;
+
+    cuvs::preprocessing::quantize::scalar::transform(
+      *res_ptr,
+      quantizer,
+      cuvs::core::from_dlpack<mdspan_type>(dataset_tensor),
+      cuvs::core::from_dlpack<out_mdspan_type>(out_tensor));
+  } else {
+    RAFT_FAIL("dataset must be accessible on host or device memory");
+  }
+}
+
+template <typename OutputT, typename InputT = int8_t>
+void _inverse_transform(cuvsResources_t res,
+                        cuvsScalarQuantizer_t quantizer_,
+                        DLManagedTensor* dataset_tensor,
+                        DLManagedTensor* out_tensor)
+{
+  auto res_ptr = reinterpret_cast<raft::resources*>(res);
+
+  cuvs::preprocessing::quantize::scalar::quantizer<OutputT> quantizer;
+  quantizer.min_ = quantizer_->min_;
+  quantizer.max_ = quantizer_->max_;
+
+  auto dataset = dataset_tensor->dl_tensor;
+  if (cuvs::core::is_dlpack_device_compatible(dataset)) {
+    using mdspan_type     = raft::device_matrix_view<InputT const, int64_t, raft::row_major>;
+    using out_mdspan_type = raft::device_matrix_view<OutputT, int64_t, raft::row_major>;
+
+    cuvs::preprocessing::quantize::scalar::inverse_transform(
+      *res_ptr,
+      quantizer,
+      cuvs::core::from_dlpack<mdspan_type>(dataset_tensor),
+      cuvs::core::from_dlpack<out_mdspan_type>(out_tensor));
+
+  } else if (cuvs::core::is_dlpack_host_compatible(dataset)) {
+    using mdspan_type     = raft::host_matrix_view<InputT const, int64_t, raft::row_major>;
+    using out_mdspan_type = raft::host_matrix_view<OutputT, int64_t, raft::row_major>;
+
+    cuvs::preprocessing::quantize::scalar::inverse_transform(
+      *res_ptr,
+      quantizer,
+      cuvs::core::from_dlpack<mdspan_type>(dataset_tensor),
+      cuvs::core::from_dlpack<out_mdspan_type>(out_tensor));
+  } else {
+    RAFT_FAIL("dataset must be accessible on host or device memory");
+  }
+}
+}  // namespace
+
+extern "C" cuvsError_t cuvsScalarQuantizerParamsCreate(cuvsScalarQuantizerParams_t* params)
+{
+  return cuvs::core::translate_exceptions(
+    [=] { *params = new cuvsScalarQuantizerParams{.quantile = 0.99}; });
+}
+
+extern "C" cuvsError_t cuvsScalarQuantizerParamsDestroy(cuvsScalarQuantizerParams_t params)
+{
+  return cuvs::core::translate_exceptions([=] { delete params; });
+}
+
+extern "C" cuvsError_t cuvsScalarQuantizerCreate(cuvsScalarQuantizer_t* quantizer)
+{
+  return cuvs::core::translate_exceptions([=] { *quantizer = new cuvsScalarQuantizer{}; });
+}
+
+extern "C" cuvsError_t cuvsScalarQuantizerDestroy(cuvsScalarQuantizer_t quantizer)
+{
+  return cuvs::core::translate_exceptions([=] { delete quantizer; });
+}
+
+extern "C" cuvsError_t cuvsScalarQuantizerTrain(cuvsResources_t res,
+                                                cuvsScalarQuantizerParams_t params,
+                                                DLManagedTensor* dataset_tensor,
+                                                cuvsScalarQuantizer_t quantizer)
+{
+  return cuvs::core::translate_exceptions([=] {
+    auto dataset = dataset_tensor->dl_tensor;
+    if (dataset.dtype.code == kDLFloat && dataset.dtype.bits == 32) {
+      _train<float>(res, *params, dataset_tensor, quantizer);
+    } else if (dataset.dtype.code == kDLFloat && dataset.dtype.bits == 64) {
+      _train<double>(res, *params, dataset_tensor, quantizer);
+    } else {
+      RAFT_FAIL("Unsupported dataset DLtensor dtype: %d and bits: %d",
+                dataset.dtype.code,
+                dataset.dtype.bits);
+    }
+  });
+}
+
+extern "C" cuvsError_t cuvsScalarQuantizerTransform(cuvsResources_t res,
+                                                    cuvsScalarQuantizer_t quantizer,
+                                                    DLManagedTensor* dataset_tensor,
+                                                    DLManagedTensor* out_tensor)
+{
+  return cuvs::core::translate_exceptions([=] {
+    auto dataset = dataset_tensor->dl_tensor;
+    if (dataset.dtype.code == kDLFloat && dataset.dtype.bits == 32) {
+      _transform<float>(res, quantizer, dataset_tensor, out_tensor);
+    } else if (dataset.dtype.code == kDLFloat && dataset.dtype.bits == 64) {
+      _transform<double>(res, quantizer, dataset_tensor, out_tensor);
+    } else {
+      RAFT_FAIL("Unsupported dataset DLtensor dtype: %d and bits: %d",
+                dataset.dtype.code,
+                dataset.dtype.bits);
+    }
+  });
+}
+
+cuvsError_t cuvsScalarQuantizerInverseTransform(cuvsResources_t res,
+                                                cuvsScalarQuantizer_t quantizer,
+                                                DLManagedTensor* dataset,
+                                                DLManagedTensor* out)
+{
+  return cuvs::core::translate_exceptions([=] {
+    auto dtype = out->dl_tensor.dtype;
+    if (dtype.code == kDLFloat && dtype.bits == 32) {
+      _inverse_transform<float>(res, quantizer, dataset, out);
+    } else if (dtype.code == kDLFloat && dtype.bits == 64) {
+      _inverse_transform<double>(res, quantizer, dataset, out);
+    } else {
+      RAFT_FAIL(
+        "Unsupported output dataset DLtensor dtype: %d and bits: %d", dtype.code, dtype.bits);
+    }
+  });
+}
diff --git a/cpp/src/sparse/cluster/cluster_solvers.cuh b/cpp/src/sparse/cluster/cluster_solvers.cuh
index 7b4cf6ab3..737dfa5f8 100644
--- a/cpp/src/sparse/cluster/cluster_solvers.cuh
+++ b/cpp/src/sparse/cluster/cluster_solvers.cuh
@@ -97,4 +97,4 @@ struct kmeans_solver_t {
 }  // namespace spectral
 }  // namespace cuvs
 
-#endif
\ No newline at end of file
+#endif
diff --git a/cpp/src/sparse/cluster/detail/spectral.cuh b/cpp/src/sparse/cluster/detail/spectral.cuh
index 571d92bf5..3d44bd4a2 100644
--- a/cpp/src/sparse/cluster/detail/spectral.cuh
+++ b/cpp/src/sparse/cluster/detail/spectral.cuh
@@ -108,4 +108,4 @@ void fit_embedding(raft::resources const& handle,
   RAFT_CUDA_TRY(cudaGetLastError());
 }
 
-};  // namespace cuvs::sparse::cluster::spectral::detail
\ No newline at end of file
+};  // namespace cuvs::sparse::cluster::spectral::detail
diff --git a/cpp/src/sparse/cluster/modularity_maximization.cuh b/cpp/src/sparse/cluster/modularity_maximization.cuh
index 71cba6927..ce60116d1 100644
--- a/cpp/src/sparse/cluster/modularity_maximization.cuh
+++ b/cpp/src/sparse/cluster/modularity_maximization.cuh
@@ -83,4 +83,4 @@ void analyzeModularity(raft::resources const& handle,
 }  // namespace spectral
 }  // namespace cuvs
 
-#endif
\ No newline at end of file
+#endif
diff --git a/cpp/src/sparse/cluster/partition.cuh b/cpp/src/sparse/cluster/partition.cuh
index df78a8a2d..111decadf 100644
--- a/cpp/src/sparse/cluster/partition.cuh
+++ b/cpp/src/sparse/cluster/partition.cuh
@@ -92,4 +92,4 @@ void analyzePartition(raft::resources const& handle,
 }  // namespace spectral
 }  // namespace cuvs
 
-#endif
\ No newline at end of file
+#endif
diff --git a/cpp/src/sparse/neighbors/cross_component_nn.cuh b/cpp/src/sparse/neighbors/cross_component_nn.cuh
index 36a0c79f5..02ce240c4 100644
--- a/cpp/src/sparse/neighbors/cross_component_nn.cuh
+++ b/cpp/src/sparse/neighbors/cross_component_nn.cuh
@@ -96,4 +96,4 @@ void cross_component_nn(
                              metric);
 }
 
-};  // end namespace cuvs::sparse::neighbors
\ No newline at end of file
+};  // end namespace cuvs::sparse::neighbors
diff --git a/cpp/test/neighbors/ann_cagra_c.cu b/cpp/test/neighbors/ann_cagra_c.cu
deleted file mode 100644
index 599d2d842..000000000
--- a/cpp/test/neighbors/ann_cagra_c.cu
+++ /dev/null
@@ -1,128 +0,0 @@
-/*
- * Copyright (c) 2023-2024, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "../test_utils.cuh"
-#include <cuvs/core/c_api.h>
-#include <dlpack/dlpack.h>
-
-#include <cstdint>
-#include <cuvs/neighbors/cagra.h>
-
-#include <cuda_runtime.h>
-#include <gtest/gtest.h>
-#include <sys/types.h>
-
-float dataset[4][2] = {{0.74021935, 0.9209938},
-                       {0.03902049, 0.9689629},
-                       {0.92514056, 0.4463501},
-                       {0.6673192, 0.10993068}};
-float queries[4][2] = {{0.48216683, 0.0428398},
-                       {0.5084142, 0.6545497},
-                       {0.51260436, 0.2643005},
-                       {0.05198065, 0.5789965}};
-
-uint32_t neighbors_exp[4] = {3, 0, 3, 1};
-float distances_exp[4]    = {0.03878258, 0.12472608, 0.04776672, 0.15224178};
-
-TEST(CagraC, BuildSearch)
-{
-  // create cuvsResources_t
-  cuvsResources_t res;
-  cuvsResourcesCreate(&res);
-  cudaStream_t stream;
-  cuvsStreamGet(res, &stream);
-
-  // create dataset DLTensor
-  DLManagedTensor dataset_tensor;
-  dataset_tensor.dl_tensor.data               = dataset;
-  dataset_tensor.dl_tensor.device.device_type = kDLCPU;
-  dataset_tensor.dl_tensor.ndim               = 2;
-  dataset_tensor.dl_tensor.dtype.code         = kDLFloat;
-  dataset_tensor.dl_tensor.dtype.bits         = 32;
-  dataset_tensor.dl_tensor.dtype.lanes        = 1;
-  int64_t dataset_shape[2]                    = {4, 2};
-  dataset_tensor.dl_tensor.shape              = dataset_shape;
-  dataset_tensor.dl_tensor.strides            = nullptr;
-
-  // create index
-  cuvsCagraIndex_t index;
-  cuvsCagraIndexCreate(&index);
-
-  // build index
-  cuvsCagraIndexParams_t build_params;
-  cuvsCagraIndexParamsCreate(&build_params);
-  cuvsCagraBuild(res, build_params, &dataset_tensor, index);
-
-  // create queries DLTensor
-  rmm::device_uvector<float> queries_d(4 * 2, stream);
-  raft::copy(queries_d.data(), (float*)queries, 4 * 2, stream);
-
-  DLManagedTensor queries_tensor;
-  queries_tensor.dl_tensor.data               = queries_d.data();
-  queries_tensor.dl_tensor.device.device_type = kDLCUDA;
-  queries_tensor.dl_tensor.ndim               = 2;
-  queries_tensor.dl_tensor.dtype.code         = kDLFloat;
-  queries_tensor.dl_tensor.dtype.bits         = 32;
-  queries_tensor.dl_tensor.dtype.lanes        = 1;
-  int64_t queries_shape[2]                    = {4, 2};
-  queries_tensor.dl_tensor.shape              = queries_shape;
-  queries_tensor.dl_tensor.strides            = nullptr;
-
-  // create neighbors DLTensor
-  rmm::device_uvector<uint32_t> neighbors_d(4, stream);
-
-  DLManagedTensor neighbors_tensor;
-  neighbors_tensor.dl_tensor.data               = neighbors_d.data();
-  neighbors_tensor.dl_tensor.device.device_type = kDLCUDA;
-  neighbors_tensor.dl_tensor.ndim               = 2;
-  neighbors_tensor.dl_tensor.dtype.code         = kDLUInt;
-  neighbors_tensor.dl_tensor.dtype.bits         = 32;
-  neighbors_tensor.dl_tensor.dtype.lanes        = 1;
-  int64_t neighbors_shape[2]                    = {4, 1};
-  neighbors_tensor.dl_tensor.shape              = neighbors_shape;
-  neighbors_tensor.dl_tensor.strides            = nullptr;
-
-  // create distances DLTensor
-  rmm::device_uvector<float> distances_d(4, stream);
-
-  DLManagedTensor distances_tensor;
-  distances_tensor.dl_tensor.data               = distances_d.data();
-  distances_tensor.dl_tensor.device.device_type = kDLCUDA;
-  distances_tensor.dl_tensor.ndim               = 2;
-  distances_tensor.dl_tensor.dtype.code         = kDLFloat;
-  distances_tensor.dl_tensor.dtype.bits         = 32;
-  distances_tensor.dl_tensor.dtype.lanes        = 1;
-  int64_t distances_shape[2]                    = {4, 1};
-  distances_tensor.dl_tensor.shape              = distances_shape;
-  distances_tensor.dl_tensor.strides            = nullptr;
-
-  // search index
-  cuvsCagraSearchParams_t search_params;
-  cuvsCagraSearchParamsCreate(&search_params);
-  cuvsCagraSearch(res, search_params, index, &queries_tensor, &neighbors_tensor, &distances_tensor);
-
-  // verify output
-  ASSERT_TRUE(
-    cuvs::devArrMatchHost(neighbors_exp, neighbors_d.data(), 4, cuvs::Compare<uint32_t>()));
-  ASSERT_TRUE(cuvs::devArrMatchHost(
-    distances_exp, distances_d.data(), 4, cuvs::CompareApprox<float>(0.001f)));
-
-  // de-allocate index and res
-  cuvsCagraSearchParamsDestroy(search_params);
-  cuvsCagraIndexParamsDestroy(build_params);
-  cuvsCagraIndexDestroy(index);
-  cuvsResourcesDestroy(res);
-}
diff --git a/cpp/test/CMakeLists.txt b/cpp/tests/CMakeLists.txt
similarity index 92%
rename from cpp/test/CMakeLists.txt
rename to cpp/tests/CMakeLists.txt
index 9aa596a6e..3ed37175d 100644
--- a/cpp/test/CMakeLists.txt
+++ b/cpp/tests/CMakeLists.txt
@@ -77,7 +77,7 @@ function(ConfigureTest)
     target_compile_definitions(${TEST_NAME} PRIVATE "CUVS_DISABLE_CUDA")
   endif()
 
-  target_include_directories(${TEST_NAME} PUBLIC "$<BUILD_INTERFACE:${CUVS_SOURCE_DIR}/test>")
+  target_include_directories(${TEST_NAME} PUBLIC "$<BUILD_INTERFACE:${CUVS_SOURCE_DIR}/tests>")
 
   rapids_test_add(
     NAME ${TEST_NAME}
@@ -138,19 +138,28 @@ if(BUILD_TESTS)
   )
 
   ConfigureTest(
-    NAME
-    NEIGHBORS_ANN_CAGRA_TEST
-    PATH
-    neighbors/ann_cagra/bug_extreme_inputs_oob.cu
-    neighbors/ann_cagra/bug_multi_cta_crash.cu
-    neighbors/ann_cagra/test_float_uint32_t.cu
-    neighbors/ann_cagra/test_half_uint32_t.cu
-    neighbors/ann_cagra/test_int8_t_uint32_t.cu
-    neighbors/ann_cagra/test_uint8_t_uint32_t.cu
-    GPUS
-    1
-    PERCENT
-    100
+    NAME NEIGHBORS_ANN_CAGRA_TEST_BUGS PATH neighbors/ann_cagra/bug_extreme_inputs_oob.cu
+    neighbors/ann_cagra/bug_multi_cta_crash.cu GPUS 1 PERCENT 100
+  )
+
+  ConfigureTest(
+    NAME NEIGHBORS_ANN_CAGRA_FLOAT_UINT32_TEST PATH neighbors/ann_cagra/test_float_uint32_t.cu GPUS
+    1 PERCENT 100
+  )
+
+  ConfigureTest(
+    NAME NEIGHBORS_ANN_CAGRA_HALF_UINT32_TEST PATH neighbors/ann_cagra/test_half_uint32_t.cu GPUS 1
+    PERCENT 100
+  )
+
+  ConfigureTest(
+    NAME NEIGHBORS_ANN_CAGRA_INT8_UINT32_TEST PATH neighbors/ann_cagra/test_int8_t_uint32_t.cu GPUS
+    1 PERCENT 100
+  )
+
+  ConfigureTest(
+    NAME NEIGHBORS_ANN_CAGRA_UINT8_UINT32_TEST PATH neighbors/ann_cagra/test_uint8_t_uint32_t.cu
+    GPUS 1 PERCENT 100
   )
 
   ConfigureTest(
diff --git a/cpp/test/cluster/kmeans.cu b/cpp/tests/cluster/kmeans.cu
similarity index 100%
rename from cpp/test/cluster/kmeans.cu
rename to cpp/tests/cluster/kmeans.cu
diff --git a/cpp/test/cluster/kmeans_balanced.cu b/cpp/tests/cluster/kmeans_balanced.cu
similarity index 100%
rename from cpp/test/cluster/kmeans_balanced.cu
rename to cpp/tests/cluster/kmeans_balanced.cu
diff --git a/cpp/test/cluster/kmeans_find_k.cu b/cpp/tests/cluster/kmeans_find_k.cu
similarity index 100%
rename from cpp/test/cluster/kmeans_find_k.cu
rename to cpp/tests/cluster/kmeans_find_k.cu
diff --git a/cpp/test/cluster/kmeans_mg.cu b/cpp/tests/cluster/kmeans_mg.cu
similarity index 100%
rename from cpp/test/cluster/kmeans_mg.cu
rename to cpp/tests/cluster/kmeans_mg.cu
diff --git a/cpp/test/cluster/linkage.cu b/cpp/tests/cluster/linkage.cu
similarity index 100%
rename from cpp/test/cluster/linkage.cu
rename to cpp/tests/cluster/linkage.cu
diff --git a/cpp/test/core/c_api.c b/cpp/tests/core/c_api.c
similarity index 100%
rename from cpp/test/core/c_api.c
rename to cpp/tests/core/c_api.c
diff --git a/cpp/test/core/interop.cu b/cpp/tests/core/interop.cu
similarity index 100%
rename from cpp/test/core/interop.cu
rename to cpp/tests/core/interop.cu
diff --git a/cpp/test/distance/dist_canberra.cu b/cpp/tests/distance/dist_canberra.cu
similarity index 100%
rename from cpp/test/distance/dist_canberra.cu
rename to cpp/tests/distance/dist_canberra.cu
diff --git a/cpp/test/distance/dist_correlation.cu b/cpp/tests/distance/dist_correlation.cu
similarity index 100%
rename from cpp/test/distance/dist_correlation.cu
rename to cpp/tests/distance/dist_correlation.cu
diff --git a/cpp/test/distance/dist_cos.cu b/cpp/tests/distance/dist_cos.cu
similarity index 100%
rename from cpp/test/distance/dist_cos.cu
rename to cpp/tests/distance/dist_cos.cu
diff --git a/cpp/test/distance/dist_hamming.cu b/cpp/tests/distance/dist_hamming.cu
similarity index 100%
rename from cpp/test/distance/dist_hamming.cu
rename to cpp/tests/distance/dist_hamming.cu
diff --git a/cpp/test/distance/dist_hellinger.cu b/cpp/tests/distance/dist_hellinger.cu
similarity index 100%
rename from cpp/test/distance/dist_hellinger.cu
rename to cpp/tests/distance/dist_hellinger.cu
diff --git a/cpp/test/distance/dist_inner_product.cu b/cpp/tests/distance/dist_inner_product.cu
similarity index 100%
rename from cpp/test/distance/dist_inner_product.cu
rename to cpp/tests/distance/dist_inner_product.cu
diff --git a/cpp/test/distance/dist_jensen_shannon.cu b/cpp/tests/distance/dist_jensen_shannon.cu
similarity index 100%
rename from cpp/test/distance/dist_jensen_shannon.cu
rename to cpp/tests/distance/dist_jensen_shannon.cu
diff --git a/cpp/test/distance/dist_kl_divergence.cu b/cpp/tests/distance/dist_kl_divergence.cu
similarity index 100%
rename from cpp/test/distance/dist_kl_divergence.cu
rename to cpp/tests/distance/dist_kl_divergence.cu
diff --git a/cpp/test/distance/dist_l1.cu b/cpp/tests/distance/dist_l1.cu
similarity index 100%
rename from cpp/test/distance/dist_l1.cu
rename to cpp/tests/distance/dist_l1.cu
diff --git a/cpp/test/distance/dist_l2_exp.cu b/cpp/tests/distance/dist_l2_exp.cu
similarity index 100%
rename from cpp/test/distance/dist_l2_exp.cu
rename to cpp/tests/distance/dist_l2_exp.cu
diff --git a/cpp/test/distance/dist_l2_sqrt_exp.cu b/cpp/tests/distance/dist_l2_sqrt_exp.cu
similarity index 100%
rename from cpp/test/distance/dist_l2_sqrt_exp.cu
rename to cpp/tests/distance/dist_l2_sqrt_exp.cu
diff --git a/cpp/test/distance/dist_l2_unexp.cu b/cpp/tests/distance/dist_l2_unexp.cu
similarity index 100%
rename from cpp/test/distance/dist_l2_unexp.cu
rename to cpp/tests/distance/dist_l2_unexp.cu
diff --git a/cpp/test/distance/dist_l_inf.cu b/cpp/tests/distance/dist_l_inf.cu
similarity index 100%
rename from cpp/test/distance/dist_l_inf.cu
rename to cpp/tests/distance/dist_l_inf.cu
diff --git a/cpp/test/distance/dist_lp_unexp.cu b/cpp/tests/distance/dist_lp_unexp.cu
similarity index 100%
rename from cpp/test/distance/dist_lp_unexp.cu
rename to cpp/tests/distance/dist_lp_unexp.cu
diff --git a/cpp/test/distance/dist_russell_rao.cu b/cpp/tests/distance/dist_russell_rao.cu
similarity index 100%
rename from cpp/test/distance/dist_russell_rao.cu
rename to cpp/tests/distance/dist_russell_rao.cu
diff --git a/cpp/test/distance/distance_base.cuh b/cpp/tests/distance/distance_base.cuh
similarity index 100%
rename from cpp/test/distance/distance_base.cuh
rename to cpp/tests/distance/distance_base.cuh
diff --git a/cpp/test/distance/gram.cu b/cpp/tests/distance/gram.cu
similarity index 99%
rename from cpp/test/distance/gram.cu
rename to cpp/tests/distance/gram.cu
index 89b1525ea..3d14da84d 100644
--- a/cpp/test/distance/gram.cu
+++ b/cpp/tests/distance/gram.cu
@@ -171,4 +171,4 @@ typedef GramMatrixTest<double> GramMatrixTestDouble;
 TEST_P(GramMatrixTestFloat, Gram) { runTest(); }
 
 INSTANTIATE_TEST_SUITE_P(GramMatrixTests, GramMatrixTestFloat, ::testing::ValuesIn(inputs));
-};  // namespace cuvs::distance::kernels
\ No newline at end of file
+};  // namespace cuvs::distance::kernels
diff --git a/cpp/test/distance/gram_base.cuh b/cpp/tests/distance/gram_base.cuh
similarity index 99%
rename from cpp/test/distance/gram_base.cuh
rename to cpp/tests/distance/gram_base.cuh
index 326cdb4f8..6cfb1807b 100644
--- a/cpp/test/distance/gram_base.cuh
+++ b/cpp/tests/distance/gram_base.cuh
@@ -88,4 +88,4 @@ void naiveGramMatrixKernel(int n1,
 
 }  // namespace kernels
 }  // namespace distance
-}  // namespace cuvs
\ No newline at end of file
+}  // namespace cuvs
diff --git a/cpp/test/distance/masked_nn.cu b/cpp/tests/distance/masked_nn.cu
similarity index 100%
rename from cpp/test/distance/masked_nn.cu
rename to cpp/tests/distance/masked_nn.cu
diff --git a/cpp/test/distance/pairwise_distance_c.cu b/cpp/tests/distance/pairwise_distance_c.cu
similarity index 100%
rename from cpp/test/distance/pairwise_distance_c.cu
rename to cpp/tests/distance/pairwise_distance_c.cu
diff --git a/cpp/test/distance/run_pairwise_distance_c.c b/cpp/tests/distance/run_pairwise_distance_c.c
similarity index 100%
rename from cpp/test/distance/run_pairwise_distance_c.c
rename to cpp/tests/distance/run_pairwise_distance_c.c
diff --git a/cpp/test/distance/sparse_distance.cu b/cpp/tests/distance/sparse_distance.cu
similarity index 100%
rename from cpp/test/distance/sparse_distance.cu
rename to cpp/tests/distance/sparse_distance.cu
diff --git a/cpp/test/neighbors/ann_brute_force.cuh b/cpp/tests/neighbors/ann_brute_force.cuh
similarity index 100%
rename from cpp/test/neighbors/ann_brute_force.cuh
rename to cpp/tests/neighbors/ann_brute_force.cuh
diff --git a/cpp/test/neighbors/ann_brute_force/test_float.cu b/cpp/tests/neighbors/ann_brute_force/test_float.cu
similarity index 100%
rename from cpp/test/neighbors/ann_brute_force/test_float.cu
rename to cpp/tests/neighbors/ann_brute_force/test_float.cu
diff --git a/cpp/test/neighbors/ann_brute_force/test_half.cu b/cpp/tests/neighbors/ann_brute_force/test_half.cu
similarity index 100%
rename from cpp/test/neighbors/ann_brute_force/test_half.cu
rename to cpp/tests/neighbors/ann_brute_force/test_half.cu
diff --git a/cpp/test/neighbors/ann_cagra.cuh b/cpp/tests/neighbors/ann_cagra.cuh
similarity index 88%
rename from cpp/test/neighbors/ann_cagra.cuh
rename to cpp/tests/neighbors/ann_cagra.cuh
index bbafae6d1..cab91c698 100644
--- a/cpp/test/neighbors/ann_cagra.cuh
+++ b/cpp/tests/neighbors/ann_cagra.cuh
@@ -876,14 +876,15 @@ class AnnCagraFilterTest : public ::testing::TestWithParam<AnnCagraInputs> {
 inline std::vector<AnnCagraInputs> generate_inputs()
 {
   // TODO(tfeher): test MULTI_CTA kernel with search_width > 1 to allow multiple CTA per queries
+  // Charge graph dim, search algo and max_query parameter
   std::vector<AnnCagraInputs> inputs = raft::util::itertools::product<AnnCagraInputs>(
     {100},
     {1000},
     {1, 8, 17},
-    {1, 16},  // k
-    {graph_build_algo::IVF_PQ, graph_build_algo::NN_DESCENT},
+    {16},  // k
+    {graph_build_algo::NN_DESCENT},
     {search_algo::SINGLE_CTA, search_algo::MULTI_CTA, search_algo::MULTI_KERNEL},
-    {0, 1, 10, 100},  // query size
+    {0, 10},  // query size
     {0},
     {256},
     {1},
@@ -892,11 +893,30 @@ inline std::vector<AnnCagraInputs> generate_inputs()
     {true},
     {0.995});
 
+  // Fixed dim, and changing neighbors and query size (output matrix size)
   auto inputs2 = raft::util::itertools::product<AnnCagraInputs>(
+    {1, 100},
+    {1000},
+    {8},
+    {1, 16},  // k
+    {graph_build_algo::NN_DESCENT},
+    {search_algo::SINGLE_CTA, search_algo::MULTI_CTA, search_algo::MULTI_KERNEL},
+    {0},  // query size
+    {0},
+    {256},
+    {1},
+    {cuvs::distance::DistanceType::L2Expanded, cuvs::distance::DistanceType::InnerProduct},
+    {false},
+    {true},
+    {0.995});
+  inputs.insert(inputs.end(), inputs2.begin(), inputs2.end());
+
+  // Varying dim and build algo.
+  inputs2 = raft::util::itertools::product<AnnCagraInputs>(
     {100},
     {1000},
-    {1, 3, 5, 7, 8, 17, 64, 128, 137, 192, 256, 512, 619, 1024},  // dim
-    {16},                                                         // k
+    {1, 3, 5, 7, 8, 17, 64, 128, 137, 192, 256, 512, 1024},  // dim
+    {16},                                                    // k
     {graph_build_algo::IVF_PQ, graph_build_algo::NN_DESCENT},
     {search_algo::AUTO},
     {10},
@@ -908,6 +928,8 @@ inline std::vector<AnnCagraInputs> generate_inputs()
     {true},
     {0.995});
   inputs.insert(inputs.end(), inputs2.begin(), inputs2.end());
+
+  // Varying team_size, graph_build_algo
   inputs2 = raft::util::itertools::product<AnnCagraInputs>(
     {100},
     {1000},
@@ -925,6 +947,7 @@ inline std::vector<AnnCagraInputs> generate_inputs()
     {0.995});
   inputs.insert(inputs.end(), inputs2.begin(), inputs2.end());
 
+  // Varying graph_build_algo, itopk_size
   inputs2 = raft::util::itertools::product<AnnCagraInputs>(
     {100},
     {1000},
@@ -942,9 +965,10 @@ inline std::vector<AnnCagraInputs> generate_inputs()
     {0.995});
   inputs.insert(inputs.end(), inputs2.begin(), inputs2.end());
 
+  // Varying n_rows, host_dataset
   inputs2 = raft::util::itertools::product<AnnCagraInputs>(
     {100},
-    {10000, 20000},
+    {10000},
     {32},
     {10},
     {graph_build_algo::AUTO},
@@ -959,7 +983,8 @@ inline std::vector<AnnCagraInputs> generate_inputs()
     {0.985});
   inputs.insert(inputs.end(), inputs2.begin(), inputs2.end());
 
-  // a few PQ configurations
+  // A few PQ configurations.
+  // Varying dim, vq_n_centers
   inputs2 = raft::util::itertools::product<AnnCagraInputs>(
     {100},
     {10000},
@@ -987,7 +1012,8 @@ inline std::vector<AnnCagraInputs> generate_inputs()
     }
   }
 
-  // refinement options
+  // Refinement options
+  // Varying host_dataset, ivf_pq_search_refine_ratio
   inputs2 = raft::util::itertools::product<AnnCagraInputs>(
     {100},
     {5000},
@@ -1006,10 +1032,11 @@ inline std::vector<AnnCagraInputs> generate_inputs()
     {1.0f, 2.0f, 3.0f});
   inputs.insert(inputs.end(), inputs2.begin(), inputs2.end());
 
+  // Varying dim, adding non_owning_memory_buffer_flag
   inputs2 = raft::util::itertools::product<AnnCagraInputs>(
     {100},
     {1000},
-    {1, 3, 5, 7, 8, 17, 64, 128, 137, 192, 256, 512, 619, 1024},  // dim
+    {1, 5, 8, 64, 137, 256, 619, 1024},  // dim
     {10},
     {graph_build_algo::IVF_PQ},
     {search_algo::AUTO},
@@ -1029,6 +1056,143 @@ inline std::vector<AnnCagraInputs> generate_inputs()
   return inputs;
 }
 
-const std::vector<AnnCagraInputs> inputs = generate_inputs();
+inline std::vector<AnnCagraInputs> generate_addnode_inputs()
+{
+  // changing dim
+  std::vector<AnnCagraInputs> inputs = raft::util::itertools::product<AnnCagraInputs>(
+    {100},
+    {1000},
+    {1, 8, 17, 64, 128, 137, 512, 1024},  // dim
+    {16},                                 // k
+    {graph_build_algo::NN_DESCENT},
+    {search_algo::AUTO},
+    {10},
+    {0},
+    {64},
+    {1},
+    {cuvs::distance::DistanceType::L2Expanded, cuvs::distance::DistanceType::InnerProduct},
+    {false},
+    {true},
+    {0.995});
+
+  // testing host and device datasets
+  auto inputs2 = raft::util::itertools::product<AnnCagraInputs>(
+    {100},
+    {10000},
+    {32},
+    {10},
+    {graph_build_algo::AUTO},
+    {search_algo::AUTO},
+    {10},
+    {0},  // team_size
+    {64},
+    {1},
+    {cuvs::distance::DistanceType::L2Expanded, cuvs::distance::DistanceType::InnerProduct},
+    {false, true},
+    {false},
+    {0.985});
+  inputs.insert(inputs.end(), inputs2.begin(), inputs2.end());
+
+  // a few PQ configurations
+  inputs2 = raft::util::itertools::product<AnnCagraInputs>(
+    {100},
+    {10000},
+    {192, 1024},  // dim
+    {16},         // k
+    {graph_build_algo::IVF_PQ},
+    {search_algo::AUTO},
+    {10},
+    {0},
+    {64},
+    {1},
+    {cuvs::distance::DistanceType::L2Expanded},
+    {false},
+    {true},
+    {0.6});                      // don't demand high recall without refinement
+  for (uint32_t pq_len : {2}) {  // for now, only pq_len = 2 is supported, more options coming soon
+    for (uint32_t vq_n_centers : {100}) {
+      for (auto input : inputs2) {
+        vpq_params ps{};
+        ps.pq_dim       = input.dim / pq_len;
+        ps.vq_n_centers = vq_n_centers;
+        input.compression.emplace(ps);
+        inputs.push_back(input);
+      }
+    }
+  }
+
+  return inputs;
+}
+
+inline std::vector<AnnCagraInputs> generate_filtering_inputs()
+{
+  // Charge graph dim, search algo
+  std::vector<AnnCagraInputs> inputs = raft::util::itertools::product<AnnCagraInputs>(
+    {100},
+    {1000},
+    {1, 8, 17},
+    {16},  // k
+    {graph_build_algo::NN_DESCENT},
+    {search_algo::SINGLE_CTA, search_algo::MULTI_CTA, search_algo::MULTI_KERNEL},
+    {0},  // query size
+    {0},
+    {256},
+    {1},
+    {cuvs::distance::DistanceType::L2Expanded, cuvs::distance::DistanceType::InnerProduct},
+    {false},
+    {true},
+    {0.995});
+
+  // Fixed dim, and changing neighbors and query size (output matrix size)
+  auto inputs2 = raft::util::itertools::product<AnnCagraInputs>(
+    {1, 100},
+    {1000},
+    {8},
+    {1, 16},  // k
+    {graph_build_algo::NN_DESCENT},
+    {search_algo::SINGLE_CTA, search_algo::MULTI_CTA, search_algo::MULTI_KERNEL},
+    {0},  // query size
+    {0},
+    {256},
+    {1},
+    {cuvs::distance::DistanceType::L2Expanded, cuvs::distance::DistanceType::InnerProduct},
+    {false},
+    {true},
+    {0.995});
+  inputs.insert(inputs.end(), inputs2.begin(), inputs2.end());
+
+  // a few PQ configurations
+  inputs2 = raft::util::itertools::product<AnnCagraInputs>(
+    {100},
+    {10000},
+    {256},  // dim
+    {16},   // k
+    {graph_build_algo::IVF_PQ},
+    {search_algo::AUTO},
+    {10},
+    {0},
+    {64},
+    {1},
+    {cuvs::distance::DistanceType::L2Expanded},
+    {false},
+    {true},
+    {0.6});                      // don't demand high recall without refinement
+  for (uint32_t pq_len : {2}) {  // for now, only pq_len = 2 is supported, more options coming soon
+    for (uint32_t vq_n_centers : {100}) {
+      for (auto input : inputs2) {
+        vpq_params ps{};
+        ps.pq_dim       = input.dim / pq_len;
+        ps.vq_n_centers = vq_n_centers;
+        input.compression.emplace(ps);
+        inputs.push_back(input);
+      }
+    }
+  }
+
+  return inputs;
+}
+const std::vector<AnnCagraInputs> inputs           = generate_inputs();
+const std::vector<AnnCagraInputs> inputs_addnode   = generate_addnode_inputs();
+const std::vector<AnnCagraInputs> inputs_filtering = generate_filtering_inputs();
 
 }  // namespace cuvs::neighbors::cagra
diff --git a/cpp/test/neighbors/ann_cagra/bug_extreme_inputs_oob.cu b/cpp/tests/neighbors/ann_cagra/bug_extreme_inputs_oob.cu
similarity index 100%
rename from cpp/test/neighbors/ann_cagra/bug_extreme_inputs_oob.cu
rename to cpp/tests/neighbors/ann_cagra/bug_extreme_inputs_oob.cu
diff --git a/cpp/test/neighbors/ann_cagra/bug_multi_cta_crash.cu b/cpp/tests/neighbors/ann_cagra/bug_multi_cta_crash.cu
similarity index 100%
rename from cpp/test/neighbors/ann_cagra/bug_multi_cta_crash.cu
rename to cpp/tests/neighbors/ann_cagra/bug_multi_cta_crash.cu
diff --git a/cpp/test/neighbors/ann_cagra/test_float_uint32_t.cu b/cpp/tests/neighbors/ann_cagra/test_float_uint32_t.cu
similarity index 86%
rename from cpp/test/neighbors/ann_cagra/test_float_uint32_t.cu
rename to cpp/tests/neighbors/ann_cagra/test_float_uint32_t.cu
index ca188d132..640657ccb 100644
--- a/cpp/test/neighbors/ann_cagra/test_float_uint32_t.cu
+++ b/cpp/tests/neighbors/ann_cagra/test_float_uint32_t.cu
@@ -32,7 +32,9 @@ TEST_P(AnnCagraFilterTestF_U32, AnnCagra) { this->testCagra(); }
 INSTANTIATE_TEST_CASE_P(AnnCagraTest, AnnCagraTestF_U32, ::testing::ValuesIn(inputs));
 INSTANTIATE_TEST_CASE_P(AnnCagraAddNodesTest,
                         AnnCagraAddNodesTestF_U32,
-                        ::testing::ValuesIn(inputs));
-INSTANTIATE_TEST_CASE_P(AnnCagraFilterTest, AnnCagraFilterTestF_U32, ::testing::ValuesIn(inputs));
+                        ::testing::ValuesIn(inputs_addnode));
+INSTANTIATE_TEST_CASE_P(AnnCagraFilterTest,
+                        AnnCagraFilterTestF_U32,
+                        ::testing::ValuesIn(inputs_filtering));
 
 }  // namespace cuvs::neighbors::cagra
diff --git a/cpp/test/neighbors/ann_cagra/test_half_uint32_t.cu b/cpp/tests/neighbors/ann_cagra/test_half_uint32_t.cu
similarity index 100%
rename from cpp/test/neighbors/ann_cagra/test_half_uint32_t.cu
rename to cpp/tests/neighbors/ann_cagra/test_half_uint32_t.cu
diff --git a/cpp/test/neighbors/ann_cagra/test_int8_t_uint32_t.cu b/cpp/tests/neighbors/ann_cagra/test_int8_t_uint32_t.cu
similarity index 86%
rename from cpp/test/neighbors/ann_cagra/test_int8_t_uint32_t.cu
rename to cpp/tests/neighbors/ann_cagra/test_int8_t_uint32_t.cu
index 4aa03afd5..32f6289fe 100644
--- a/cpp/test/neighbors/ann_cagra/test_int8_t_uint32_t.cu
+++ b/cpp/tests/neighbors/ann_cagra/test_int8_t_uint32_t.cu
@@ -30,7 +30,9 @@ TEST_P(AnnCagraFilterTestI8_U32, AnnCagra) { this->testCagra(); }
 INSTANTIATE_TEST_CASE_P(AnnCagraTest, AnnCagraTestI8_U32, ::testing::ValuesIn(inputs));
 INSTANTIATE_TEST_CASE_P(AnnCagraAddNodesTest,
                         AnnCagraAddNodesTestI8_U32,
-                        ::testing::ValuesIn(inputs));
-INSTANTIATE_TEST_CASE_P(AnnCagraFilterTest, AnnCagraFilterTestI8_U32, ::testing::ValuesIn(inputs));
+                        ::testing::ValuesIn(inputs_addnode));
+INSTANTIATE_TEST_CASE_P(AnnCagraFilterTest,
+                        AnnCagraFilterTestI8_U32,
+                        ::testing::ValuesIn(inputs_filtering));
 
 }  // namespace cuvs::neighbors::cagra
diff --git a/cpp/test/neighbors/ann_cagra/test_uint8_t_uint32_t.cu b/cpp/tests/neighbors/ann_cagra/test_uint8_t_uint32_t.cu
similarity index 86%
rename from cpp/test/neighbors/ann_cagra/test_uint8_t_uint32_t.cu
rename to cpp/tests/neighbors/ann_cagra/test_uint8_t_uint32_t.cu
index b8e2a6b77..53f804be6 100644
--- a/cpp/test/neighbors/ann_cagra/test_uint8_t_uint32_t.cu
+++ b/cpp/tests/neighbors/ann_cagra/test_uint8_t_uint32_t.cu
@@ -30,7 +30,9 @@ TEST_P(AnnCagraFilterTestU8_U32, AnnCagra) { this->testCagra(); }
 INSTANTIATE_TEST_CASE_P(AnnCagraTest, AnnCagraTestU8_U32, ::testing::ValuesIn(inputs));
 INSTANTIATE_TEST_CASE_P(AnnCagraAddNodesTest,
                         AnnCagraAddNodesTestU8_U32,
-                        ::testing::ValuesIn(inputs));
-INSTANTIATE_TEST_CASE_P(AnnCagraFilterTest, AnnCagraFilterTestU8_U32, ::testing::ValuesIn(inputs));
+                        ::testing::ValuesIn(inputs_addnode));
+INSTANTIATE_TEST_CASE_P(AnnCagraFilterTest,
+                        AnnCagraFilterTestU8_U32,
+                        ::testing::ValuesIn(inputs_filtering));
 
 }  // namespace cuvs::neighbors::cagra
diff --git a/cpp/tests/neighbors/ann_cagra_c.cu b/cpp/tests/neighbors/ann_cagra_c.cu
new file mode 100644
index 000000000..9e0890c34
--- /dev/null
+++ b/cpp/tests/neighbors/ann_cagra_c.cu
@@ -0,0 +1,478 @@
+/*
+ * Copyright (c) 2023-2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "../test_utils.cuh"
+#include <cstddef>
+#include <cuvs/core/c_api.h>
+#include <cuvs/distance/distance.hpp>
+#include <dlpack/dlpack.h>
+
+#include <cstdint>
+#include <cuvs/neighbors/cagra.h>
+
+#include <cuda_runtime.h>
+#include <gtest/gtest.h>
+
+#include <raft/core/device_mdarray.hpp>
+#include <raft/core/device_mdspan.hpp>
+#include <raft/core/math.hpp>
+#include <raft/core/mdspan.hpp>
+#include <raft/core/operators.hpp>
+#include <raft/core/resources.hpp>
+#include <raft/matrix/argmin.cuh>
+#include <raft/matrix/linewise_op.cuh>
+#include <sys/types.h>
+
+#include <raft/random/make_blobs.cuh>
+
+float dataset[4][2] = {{0.74021935, 0.9209938},
+                       {0.03902049, 0.9689629},
+                       {0.92514056, 0.4463501},
+                       {0.6673192, 0.10993068}};
+float queries[4][2] = {{0.48216683, 0.0428398},
+                       {0.5084142, 0.6545497},
+                       {0.51260436, 0.2643005},
+                       {0.05198065, 0.5789965}};
+
+uint32_t filter[1] = {0b1001};  // index 1 and 2 are removed
+
+uint32_t neighbors_exp[4] = {3, 0, 3, 1};
+float distances_exp[4]    = {0.03878258, 0.12472608, 0.04776672, 0.15224178};
+
+uint32_t neighbors_exp_filtered[4] = {3, 0, 3, 0};
+float distances_exp_filtered[4]    = {0.03878258, 0.12472608, 0.04776672, 0.59063464};
+
+TEST(CagraC, BuildSearch)
+{
+  // create cuvsResources_t
+  cuvsResources_t res;
+  cuvsResourcesCreate(&res);
+  cudaStream_t stream;
+  cuvsStreamGet(res, &stream);
+
+  // create dataset DLTensor
+  DLManagedTensor dataset_tensor;
+  dataset_tensor.dl_tensor.data               = dataset;
+  dataset_tensor.dl_tensor.device.device_type = kDLCPU;
+  dataset_tensor.dl_tensor.ndim               = 2;
+  dataset_tensor.dl_tensor.dtype.code         = kDLFloat;
+  dataset_tensor.dl_tensor.dtype.bits         = 32;
+  dataset_tensor.dl_tensor.dtype.lanes        = 1;
+  int64_t dataset_shape[2]                    = {4, 2};
+  dataset_tensor.dl_tensor.shape              = dataset_shape;
+  dataset_tensor.dl_tensor.strides            = nullptr;
+
+  // create index
+  cuvsCagraIndex_t index;
+  cuvsCagraIndexCreate(&index);
+
+  // build index
+  cuvsCagraIndexParams_t build_params;
+  cuvsCagraIndexParamsCreate(&build_params);
+  cuvsCagraBuild(res, build_params, &dataset_tensor, index);
+
+  // create queries DLTensor
+  rmm::device_uvector<float> queries_d(4 * 2, stream);
+  raft::copy(queries_d.data(), (float*)queries, 4 * 2, stream);
+
+  DLManagedTensor queries_tensor;
+  queries_tensor.dl_tensor.data               = queries_d.data();
+  queries_tensor.dl_tensor.device.device_type = kDLCUDA;
+  queries_tensor.dl_tensor.ndim               = 2;
+  queries_tensor.dl_tensor.dtype.code         = kDLFloat;
+  queries_tensor.dl_tensor.dtype.bits         = 32;
+  queries_tensor.dl_tensor.dtype.lanes        = 1;
+  int64_t queries_shape[2]                    = {4, 2};
+  queries_tensor.dl_tensor.shape              = queries_shape;
+  queries_tensor.dl_tensor.strides            = nullptr;
+
+  // create neighbors DLTensor
+  rmm::device_uvector<uint32_t> neighbors_d(4, stream);
+
+  DLManagedTensor neighbors_tensor;
+  neighbors_tensor.dl_tensor.data               = neighbors_d.data();
+  neighbors_tensor.dl_tensor.device.device_type = kDLCUDA;
+  neighbors_tensor.dl_tensor.ndim               = 2;
+  neighbors_tensor.dl_tensor.dtype.code         = kDLUInt;
+  neighbors_tensor.dl_tensor.dtype.bits         = 32;
+  neighbors_tensor.dl_tensor.dtype.lanes        = 1;
+  int64_t neighbors_shape[2]                    = {4, 1};
+  neighbors_tensor.dl_tensor.shape              = neighbors_shape;
+  neighbors_tensor.dl_tensor.strides            = nullptr;
+
+  // create distances DLTensor
+  rmm::device_uvector<float> distances_d(4, stream);
+
+  DLManagedTensor distances_tensor;
+  distances_tensor.dl_tensor.data               = distances_d.data();
+  distances_tensor.dl_tensor.device.device_type = kDLCUDA;
+  distances_tensor.dl_tensor.ndim               = 2;
+  distances_tensor.dl_tensor.dtype.code         = kDLFloat;
+  distances_tensor.dl_tensor.dtype.bits         = 32;
+  distances_tensor.dl_tensor.dtype.lanes        = 1;
+  int64_t distances_shape[2]                    = {4, 1};
+  distances_tensor.dl_tensor.shape              = distances_shape;
+  distances_tensor.dl_tensor.strides            = nullptr;
+
+  cuvsFilter filter;
+  filter.type = NO_FILTER;
+  filter.addr = (uintptr_t)NULL;
+
+  // search index
+  cuvsCagraSearchParams_t search_params;
+  cuvsCagraSearchParamsCreate(&search_params);
+  cuvsCagraSearch(
+    res, search_params, index, &queries_tensor, &neighbors_tensor, &distances_tensor, filter);
+
+  // verify output
+  ASSERT_TRUE(
+    cuvs::devArrMatchHost(neighbors_exp, neighbors_d.data(), 4, cuvs::Compare<uint32_t>()));
+  ASSERT_TRUE(cuvs::devArrMatchHost(
+    distances_exp, distances_d.data(), 4, cuvs::CompareApprox<float>(0.001f)));
+
+  // de-allocate index and res
+  cuvsCagraSearchParamsDestroy(search_params);
+  cuvsCagraIndexParamsDestroy(build_params);
+  cuvsCagraIndexDestroy(index);
+  cuvsResourcesDestroy(res);
+}
+
+TEST(CagraC, BuildExtendSearch)
+{
+  // create cuvsResources_t
+  cuvsResources_t res;
+  cuvsResourcesCreate(&res);
+  cudaStream_t stream;
+  cuvsStreamGet(res, &stream);
+
+  raft::resources handle;
+
+  const int32_t dimensions = 16;
+  // main_data_size needs to be >= 128 (see issue #486)
+  const int32_t main_data_size       = 1024;
+  const int32_t additional_data_size = 64;
+  const int32_t num_queries          = 4;
+
+  // create random data for datasets and queries
+  rmm::device_uvector<float> random_data_d(
+    (main_data_size + additional_data_size + num_queries) * dimensions, stream);
+  rmm::device_uvector<int32_t> random_labels_d(
+    (main_data_size + additional_data_size + num_queries) * dimensions, stream);
+  raft::random::make_blobs(random_data_d.data(),
+                           random_labels_d.data(),
+                           main_data_size + additional_data_size + num_queries,
+                           dimensions,
+                           10,
+                           stream);
+
+  // create  dataset DLTensor
+  rmm::device_uvector<float> main_d(main_data_size * dimensions, stream);
+  rmm::device_uvector<int32_t> main_labels_d(main_data_size, stream);
+  raft::copy(main_d.data(), random_data_d.data(), main_data_size * dimensions, stream);
+  DLManagedTensor dataset_tensor;
+  dataset_tensor.dl_tensor.data               = main_d.data();
+  dataset_tensor.dl_tensor.device.device_type = kDLCUDA;
+  dataset_tensor.dl_tensor.ndim               = 2;
+  dataset_tensor.dl_tensor.dtype.code         = kDLFloat;
+  dataset_tensor.dl_tensor.dtype.bits         = 32;
+  dataset_tensor.dl_tensor.dtype.lanes        = 1;
+  int64_t dataset_shape[2]                    = {main_data_size, dimensions};
+  dataset_tensor.dl_tensor.shape              = dataset_shape;
+  dataset_tensor.dl_tensor.strides            = nullptr;
+
+  // create additional dataset DLTensor
+  rmm::device_uvector<float> additional_d(additional_data_size * dimensions, stream);
+  raft::copy(additional_d.data(),
+             random_data_d.data() + main_d.size(),
+             additional_data_size * dimensions,
+             stream);
+  DLManagedTensor additional_dataset_tensor;
+  additional_dataset_tensor.dl_tensor.data               = additional_d.data();
+  additional_dataset_tensor.dl_tensor.device.device_type = kDLCUDA;
+  additional_dataset_tensor.dl_tensor.ndim               = 2;
+  additional_dataset_tensor.dl_tensor.dtype.code         = kDLFloat;
+  additional_dataset_tensor.dl_tensor.dtype.bits         = 32;
+  additional_dataset_tensor.dl_tensor.dtype.lanes        = 1;
+  int64_t additional_dataset_shape[2]                    = {additional_data_size, dimensions};
+  additional_dataset_tensor.dl_tensor.shape              = additional_dataset_shape;
+  additional_dataset_tensor.dl_tensor.strides            = nullptr;
+
+  // create tensor for that points to the extended dataset
+  rmm::device_uvector<float> extend_return_d((additional_data_size + main_data_size) * dimensions,
+                                             stream);
+  DLManagedTensor additional_dataset_return_tensor;
+  additional_dataset_return_tensor.dl_tensor.data               = extend_return_d.data();
+  additional_dataset_return_tensor.dl_tensor.device.device_type = kDLCUDA;
+  additional_dataset_return_tensor.dl_tensor.ndim               = 2;
+  additional_dataset_return_tensor.dl_tensor.dtype.code         = kDLFloat;
+  additional_dataset_return_tensor.dl_tensor.dtype.bits         = 32;
+  additional_dataset_return_tensor.dl_tensor.dtype.lanes        = 1;
+  int64_t additional_return_dataset_shape[2] = {additional_data_size + main_data_size, dimensions};
+  additional_dataset_return_tensor.dl_tensor.shape   = additional_return_dataset_shape;
+  additional_dataset_return_tensor.dl_tensor.strides = nullptr;
+
+  // create index
+  cuvsCagraIndex_t index;
+  cuvsCagraIndexCreate(&index);
+
+  // build index
+  cuvsCagraIndexParams_t build_params;
+  cuvsCagraIndexParamsCreate(&build_params);
+  cuvsCagraBuild(res, build_params, &dataset_tensor, index);
+
+  cuvsStreamSync(res);
+
+  // extend index
+  cuvsCagraExtendParams_t extend_params;
+  cuvsCagraExtendParamsCreate(&extend_params);
+  cuvsCagraExtend(
+    res, extend_params, &additional_dataset_tensor, index, &additional_dataset_return_tensor);
+
+  // create queries DLTensor
+  rmm::device_uvector<float> queries_d(num_queries * dimensions, stream);
+  raft::copy(queries_d.data(),
+             random_data_d.data() + (main_data_size + additional_data_size) * dimensions,
+             num_queries * dimensions,
+             stream);
+  DLManagedTensor queries_tensor;
+  queries_tensor.dl_tensor.data               = queries_d.data();
+  queries_tensor.dl_tensor.device.device_type = kDLCUDA;
+  queries_tensor.dl_tensor.ndim               = 2;
+  queries_tensor.dl_tensor.dtype.code         = kDLFloat;
+  queries_tensor.dl_tensor.dtype.bits         = 32;
+  queries_tensor.dl_tensor.dtype.lanes        = 1;
+  int64_t queries_shape[2]                    = {4, dimensions};
+  queries_tensor.dl_tensor.shape              = queries_shape;
+  queries_tensor.dl_tensor.strides            = nullptr;
+
+  // create pairwise distance matrix for dataset and queries
+  auto pairwise_distance_dataset_input =
+    raft::make_device_matrix<float>(handle, main_data_size + additional_data_size, dimensions);
+
+  raft::copy(pairwise_distance_dataset_input.data_handle(), main_d.data(), main_d.size(), stream);
+  raft::copy(pairwise_distance_dataset_input.data_handle() + main_d.size(),
+             additional_d.data(),
+             additional_d.size(),
+             stream);
+
+  auto pairwise_distance_queries_input =
+    raft::make_device_matrix<float>(handle, num_queries, dimensions);
+
+  raft::copy(pairwise_distance_queries_input.data_handle(),
+             (float*)queries_d.data(),
+             num_queries * dimensions,
+             stream);
+
+  auto pairwise_distances =
+    raft::make_device_matrix<float>(handle, num_queries, (main_data_size + additional_data_size));
+  auto metric = cuvs::distance::DistanceType::L2Expanded;
+
+  cuvs::distance::pairwise_distance(handle,
+                                    pairwise_distance_queries_input.view(),
+                                    pairwise_distance_dataset_input.view(),
+
+                                    pairwise_distances.view(),
+                                    metric);
+
+  auto min_cols =
+    raft::make_device_vector<uint32_t, uint32_t>(handle, pairwise_distances.extent(0));
+
+  auto distances_const_view = raft::make_device_matrix_view<const float, uint32_t>(
+    pairwise_distances.data_handle(), pairwise_distances.extent(0), pairwise_distances.extent(1));
+
+  raft::matrix::argmin(handle, distances_const_view, min_cols.view());
+
+  float min_cols_distances[num_queries];
+
+  for (uint32_t i = 0; i < min_cols.extent(0); i++) {
+    uint32_t mc           = min_cols(i);
+    min_cols_distances[i] = pairwise_distances(i, mc);
+  }
+
+  // create neighbors DLTensor
+  rmm::device_uvector<uint32_t> neighbors_d(4, stream);
+
+  DLManagedTensor neighbors_tensor;
+  neighbors_tensor.dl_tensor.data               = neighbors_d.data();
+  neighbors_tensor.dl_tensor.device.device_type = kDLCUDA;
+  neighbors_tensor.dl_tensor.ndim               = 2;
+  neighbors_tensor.dl_tensor.dtype.code         = kDLUInt;
+  neighbors_tensor.dl_tensor.dtype.bits         = 32;
+  neighbors_tensor.dl_tensor.dtype.lanes        = 1;
+  int64_t neighbors_shape[2]                    = {num_queries, 1};
+  neighbors_tensor.dl_tensor.shape              = neighbors_shape;
+  neighbors_tensor.dl_tensor.strides            = nullptr;
+
+  // create distances DLTensor
+  rmm::device_uvector<float> distances_d(4, stream);
+
+  distances_d.resize(4, stream);
+
+  DLManagedTensor distances_tensor;
+  distances_tensor.dl_tensor.data               = distances_d.data();
+  distances_tensor.dl_tensor.device.device_type = kDLCUDA;
+  distances_tensor.dl_tensor.ndim               = 2;
+  distances_tensor.dl_tensor.dtype.code         = kDLFloat;
+  distances_tensor.dl_tensor.dtype.bits         = 32;
+  distances_tensor.dl_tensor.dtype.lanes        = 1;
+  int64_t distances_shape[2]                    = {num_queries, 1};
+  distances_tensor.dl_tensor.shape              = distances_shape;
+  distances_tensor.dl_tensor.strides            = nullptr;
+
+  cuvsFilter filter;
+  filter.type = NO_FILTER;
+  filter.addr = (uintptr_t)NULL;
+
+  // search index
+  cuvsCagraSearchParams_t search_params;
+  cuvsCagraSearchParamsCreate(&search_params);
+  cuvsCagraSearch(
+    res, search_params, index, &queries_tensor, &neighbors_tensor, &distances_tensor, filter);
+
+  // make sure that extend_return_d points to the extended dataset
+  ASSERT_TRUE(cuvs::devArrMatch(
+    main_d.data(), extend_return_d.data(), main_d.size(), cuvs::Compare<float>()));
+
+  ASSERT_TRUE(cuvs::devArrMatch(additional_d.data(),
+                                extend_return_d.data() + main_d.size(),
+                                additional_d.size(),
+                                cuvs::Compare<float>()));
+
+  // check neighbors
+  ASSERT_TRUE(
+    cuvs::devArrMatch(min_cols.data_handle(), neighbors_d.data(), 4, cuvs::Compare<uint32_t>()));
+
+  // check distances
+  ASSERT_TRUE(cuvs::devArrMatchHost(
+    min_cols_distances, distances_d.data(), 4, cuvs::CompareApprox<float>(0.001f)));
+
+  // de-allocate index and res
+  cuvsCagraSearchParamsDestroy(search_params);
+  cuvsCagraExtendParamsDestroy(extend_params);
+  cuvsCagraIndexParamsDestroy(build_params);
+  cuvsCagraIndexDestroy(index);
+  cuvsResourcesDestroy(res);
+}
+
+TEST(CagraC, BuildSearchFiltered)
+{
+  // create cuvsResources_t
+  cuvsResources_t res;
+  cuvsResourcesCreate(&res);
+  cudaStream_t stream;
+  cuvsStreamGet(res, &stream);
+
+  // create dataset DLTensor
+  DLManagedTensor dataset_tensor;
+  dataset_tensor.dl_tensor.data               = dataset;
+  dataset_tensor.dl_tensor.device.device_type = kDLCPU;
+  dataset_tensor.dl_tensor.ndim               = 2;
+  dataset_tensor.dl_tensor.dtype.code         = kDLFloat;
+  dataset_tensor.dl_tensor.dtype.bits         = 32;
+  dataset_tensor.dl_tensor.dtype.lanes        = 1;
+  int64_t dataset_shape[2]                    = {4, 2};
+  dataset_tensor.dl_tensor.shape              = dataset_shape;
+  dataset_tensor.dl_tensor.strides            = nullptr;
+
+  // create index
+  cuvsCagraIndex_t index;
+  cuvsCagraIndexCreate(&index);
+
+  // build index
+  cuvsCagraIndexParams_t build_params;
+  cuvsCagraIndexParamsCreate(&build_params);
+  cuvsCagraBuild(res, build_params, &dataset_tensor, index);
+
+  // create queries DLTensor
+  rmm::device_uvector<float> queries_d(4 * 2, stream);
+  raft::copy(queries_d.data(), (float*)queries, 4 * 2, stream);
+
+  DLManagedTensor queries_tensor;
+  queries_tensor.dl_tensor.data               = queries_d.data();
+  queries_tensor.dl_tensor.device.device_type = kDLCUDA;
+  queries_tensor.dl_tensor.ndim               = 2;
+  queries_tensor.dl_tensor.dtype.code         = kDLFloat;
+  queries_tensor.dl_tensor.dtype.bits         = 32;
+  queries_tensor.dl_tensor.dtype.lanes        = 1;
+  int64_t queries_shape[2]                    = {4, 2};
+  queries_tensor.dl_tensor.shape              = queries_shape;
+  queries_tensor.dl_tensor.strides            = nullptr;
+
+  // create neighbors DLTensor
+  rmm::device_uvector<uint32_t> neighbors_d(4, stream);
+
+  DLManagedTensor neighbors_tensor;
+  neighbors_tensor.dl_tensor.data               = neighbors_d.data();
+  neighbors_tensor.dl_tensor.device.device_type = kDLCUDA;
+  neighbors_tensor.dl_tensor.ndim               = 2;
+  neighbors_tensor.dl_tensor.dtype.code         = kDLUInt;
+  neighbors_tensor.dl_tensor.dtype.bits         = 32;
+  neighbors_tensor.dl_tensor.dtype.lanes        = 1;
+  int64_t neighbors_shape[2]                    = {4, 1};
+  neighbors_tensor.dl_tensor.shape              = neighbors_shape;
+  neighbors_tensor.dl_tensor.strides            = nullptr;
+
+  // create distances DLTensor
+  rmm::device_uvector<float> distances_d(4, stream);
+
+  DLManagedTensor distances_tensor;
+  distances_tensor.dl_tensor.data               = distances_d.data();
+  distances_tensor.dl_tensor.device.device_type = kDLCUDA;
+  distances_tensor.dl_tensor.ndim               = 2;
+  distances_tensor.dl_tensor.dtype.code         = kDLFloat;
+  distances_tensor.dl_tensor.dtype.bits         = 32;
+  distances_tensor.dl_tensor.dtype.lanes        = 1;
+  int64_t distances_shape[2]                    = {4, 1};
+  distances_tensor.dl_tensor.shape              = distances_shape;
+  distances_tensor.dl_tensor.strides            = nullptr;
+
+  // create filter DLTensor
+  rmm::device_uvector<uint32_t> filter_d(1, stream);
+  raft::copy(filter_d.data(), filter, 1, stream);
+
+  cuvsFilter filter;
+
+  DLManagedTensor filter_tensor;
+  filter_tensor.dl_tensor.data               = filter_d.data();
+  filter_tensor.dl_tensor.device.device_type = kDLCUDA;
+  filter_tensor.dl_tensor.ndim               = 1;
+  filter_tensor.dl_tensor.dtype.code         = kDLUInt;
+  filter_tensor.dl_tensor.dtype.bits         = 32;
+  filter_tensor.dl_tensor.dtype.lanes        = 1;
+  int64_t filter_shape[1]                    = {1};
+  filter_tensor.dl_tensor.shape              = filter_shape;
+  filter_tensor.dl_tensor.strides            = nullptr;
+
+  filter.type = BITSET;
+  filter.addr = (uintptr_t)&filter_tensor;
+
+  // search index
+  cuvsCagraSearchParams_t search_params;
+  cuvsCagraSearchParamsCreate(&search_params);
+  cuvsCagraSearch(
+    res, search_params, index, &queries_tensor, &neighbors_tensor, &distances_tensor, filter);
+  // verify output
+  ASSERT_TRUE(cuvs::devArrMatchHost(
+    neighbors_exp_filtered, neighbors_d.data(), 4, cuvs::Compare<uint32_t>()));
+  ASSERT_TRUE(cuvs::devArrMatchHost(
+    distances_exp_filtered, distances_d.data(), 4, cuvs::CompareApprox<float>(0.001f)));
+
+  // de-allocate index and res
+  cuvsCagraSearchParamsDestroy(search_params);
+  cuvsCagraIndexParamsDestroy(build_params);
+  cuvsCagraIndexDestroy(index);
+  cuvsResourcesDestroy(res);
+}
diff --git a/cpp/test/neighbors/ann_hnsw_c.cu b/cpp/tests/neighbors/ann_hnsw_c.cu
similarity index 100%
rename from cpp/test/neighbors/ann_hnsw_c.cu
rename to cpp/tests/neighbors/ann_hnsw_c.cu
diff --git a/cpp/test/neighbors/ann_ivf_flat.cuh b/cpp/tests/neighbors/ann_ivf_flat.cuh
similarity index 100%
rename from cpp/test/neighbors/ann_ivf_flat.cuh
rename to cpp/tests/neighbors/ann_ivf_flat.cuh
diff --git a/cpp/test/neighbors/ann_ivf_flat/test_float_int64_t.cu b/cpp/tests/neighbors/ann_ivf_flat/test_float_int64_t.cu
similarity index 100%
rename from cpp/test/neighbors/ann_ivf_flat/test_float_int64_t.cu
rename to cpp/tests/neighbors/ann_ivf_flat/test_float_int64_t.cu
diff --git a/cpp/test/neighbors/ann_ivf_flat/test_int8_t_int64_t.cu b/cpp/tests/neighbors/ann_ivf_flat/test_int8_t_int64_t.cu
similarity index 100%
rename from cpp/test/neighbors/ann_ivf_flat/test_int8_t_int64_t.cu
rename to cpp/tests/neighbors/ann_ivf_flat/test_int8_t_int64_t.cu
diff --git a/cpp/test/neighbors/ann_ivf_flat/test_uint8_t_int64_t.cu b/cpp/tests/neighbors/ann_ivf_flat/test_uint8_t_int64_t.cu
similarity index 100%
rename from cpp/test/neighbors/ann_ivf_flat/test_uint8_t_int64_t.cu
rename to cpp/tests/neighbors/ann_ivf_flat/test_uint8_t_int64_t.cu
diff --git a/cpp/test/neighbors/ann_ivf_flat_c.cu b/cpp/tests/neighbors/ann_ivf_flat_c.cu
similarity index 100%
rename from cpp/test/neighbors/ann_ivf_flat_c.cu
rename to cpp/tests/neighbors/ann_ivf_flat_c.cu
diff --git a/cpp/test/neighbors/ann_ivf_pq.cuh b/cpp/tests/neighbors/ann_ivf_pq.cuh
similarity index 100%
rename from cpp/test/neighbors/ann_ivf_pq.cuh
rename to cpp/tests/neighbors/ann_ivf_pq.cuh
diff --git a/cpp/test/neighbors/ann_ivf_pq/test_float_int64_t.cu b/cpp/tests/neighbors/ann_ivf_pq/test_float_int64_t.cu
similarity index 100%
rename from cpp/test/neighbors/ann_ivf_pq/test_float_int64_t.cu
rename to cpp/tests/neighbors/ann_ivf_pq/test_float_int64_t.cu
diff --git a/cpp/test/neighbors/ann_ivf_pq/test_int8_t_int64_t.cu b/cpp/tests/neighbors/ann_ivf_pq/test_int8_t_int64_t.cu
similarity index 100%
rename from cpp/test/neighbors/ann_ivf_pq/test_int8_t_int64_t.cu
rename to cpp/tests/neighbors/ann_ivf_pq/test_int8_t_int64_t.cu
diff --git a/cpp/test/neighbors/ann_ivf_pq/test_uint8_t_int64_t.cu b/cpp/tests/neighbors/ann_ivf_pq/test_uint8_t_int64_t.cu
similarity index 100%
rename from cpp/test/neighbors/ann_ivf_pq/test_uint8_t_int64_t.cu
rename to cpp/tests/neighbors/ann_ivf_pq/test_uint8_t_int64_t.cu
diff --git a/cpp/test/neighbors/ann_ivf_pq_c.cu b/cpp/tests/neighbors/ann_ivf_pq_c.cu
similarity index 100%
rename from cpp/test/neighbors/ann_ivf_pq_c.cu
rename to cpp/tests/neighbors/ann_ivf_pq_c.cu
diff --git a/cpp/test/neighbors/ann_nn_descent.cuh b/cpp/tests/neighbors/ann_nn_descent.cuh
similarity index 100%
rename from cpp/test/neighbors/ann_nn_descent.cuh
rename to cpp/tests/neighbors/ann_nn_descent.cuh
diff --git a/cpp/test/neighbors/ann_nn_descent/test_float_uint32_t.cu b/cpp/tests/neighbors/ann_nn_descent/test_float_uint32_t.cu
similarity index 100%
rename from cpp/test/neighbors/ann_nn_descent/test_float_uint32_t.cu
rename to cpp/tests/neighbors/ann_nn_descent/test_float_uint32_t.cu
diff --git a/cpp/test/neighbors/ann_nn_descent/test_int8_t_uint32_t.cu b/cpp/tests/neighbors/ann_nn_descent/test_int8_t_uint32_t.cu
similarity index 100%
rename from cpp/test/neighbors/ann_nn_descent/test_int8_t_uint32_t.cu
rename to cpp/tests/neighbors/ann_nn_descent/test_int8_t_uint32_t.cu
diff --git a/cpp/test/neighbors/ann_nn_descent/test_uint8_t_uint32_t.cu b/cpp/tests/neighbors/ann_nn_descent/test_uint8_t_uint32_t.cu
similarity index 100%
rename from cpp/test/neighbors/ann_nn_descent/test_uint8_t_uint32_t.cu
rename to cpp/tests/neighbors/ann_nn_descent/test_uint8_t_uint32_t.cu
diff --git a/cpp/test/neighbors/ann_utils.cuh b/cpp/tests/neighbors/ann_utils.cuh
similarity index 100%
rename from cpp/test/neighbors/ann_utils.cuh
rename to cpp/tests/neighbors/ann_utils.cuh
diff --git a/cpp/test/neighbors/ann_vamana.cuh b/cpp/tests/neighbors/ann_vamana.cuh
similarity index 73%
rename from cpp/test/neighbors/ann_vamana.cuh
rename to cpp/tests/neighbors/ann_vamana.cuh
index 9d9df4470..9fe0324d7 100644
--- a/cpp/test/neighbors/ann_vamana.cuh
+++ b/cpp/tests/neighbors/ann_vamana.cuh
@@ -46,7 +46,7 @@
 #include <string>
 #include <vector>
 
-namespace cuvs::neighbors::experimental::vamana {
+namespace cuvs::neighbors::vamana {
 
 struct edge_op {
   template <typename Type, typename... UnusedArgs>
@@ -64,6 +64,7 @@ struct AnnVamanaInputs {
   double max_fraction;
   cuvs::distance::DistanceType metric;
   bool host_dataset;
+  int reverse_batchsize;
 
   // cagra search params
   int n_queries;
@@ -131,10 +132,11 @@ class AnnVamanaTest : public ::testing::TestWithParam<AnnVamanaInputs> {
   void testVamana()
   {
     vamana::index_params index_params;
-    index_params.metric       = ps.metric;
-    index_params.graph_degree = ps.graph_degree;
-    index_params.visited_size = ps.visited_size;
-    index_params.max_fraction = ps.max_fraction;
+    index_params.metric            = ps.metric;
+    index_params.graph_degree      = ps.graph_degree;
+    index_params.visited_size      = ps.visited_size;
+    index_params.max_fraction      = ps.max_fraction;
+    index_params.reverse_batchsize = ps.reverse_batchsize;
 
     auto database_view = raft::make_device_matrix_view<const DataT, int64_t>(
       (const DataT*)database.data(), ps.n_rows, ps.dim);
@@ -264,14 +266,13 @@ inline std::vector<AnnVamanaInputs> generate_inputs()
 {
   std::vector<AnnVamanaInputs> inputs = raft::util::itertools::product<AnnVamanaInputs>(
     {1000},
-    //    {1, 3, 5, 7, 8, 17, 64, 128, 137, 192, 256, 512, 619, 1024},  // TODO - fix alignment
-    //    issue for odd dims
-    {16, 32, 64, 128, 192, 256, 512, 1024},  // dim
-    {32},                                    // graph degree
-    {64, 128, 256},                          // visited_size
+    {1, 3, 5, 7, 8, 17, 64, 128, 137, 192, 256, 512, 619, 1024},
+    {32},       // graph degree
+    {64, 256},  // visited_size
     {0.06, 0.1},
     {cuvs::distance::DistanceType::L2Expanded},
     {false},
+    {100, 1000000},
     {100},
     {10},
     {cagra::search_algo::AUTO},
@@ -280,55 +281,58 @@ inline std::vector<AnnVamanaInputs> generate_inputs()
     {1},
     {0.2});
 
-  std::vector<AnnVamanaInputs> inputs2 =
-    raft::util::itertools::product<AnnVamanaInputs>({1000},
-                                                    {16, 32, 64, 128, 192, 256, 512, 1024},  // dim
-                                                    {64},             // graph degree
-                                                    {128, 256, 512},  // visited_size
-                                                    {0.06, 0.1},
-                                                    {cuvs::distance::DistanceType::L2Expanded},
-                                                    {false},
-                                                    {100},
-                                                    {10},
-                                                    {cagra::search_algo::AUTO},
-                                                    {10},
-                                                    {32},
-                                                    {1},
-                                                    {0.2});
+  std::vector<AnnVamanaInputs> inputs2 = raft::util::itertools::product<AnnVamanaInputs>(
+    {1000},
+    {1, 3, 5, 7, 8, 17, 64, 128, 137, 192, 256, 512, 619, 1024},
+    {64},        // graph degree
+    {128, 512},  // visited_size
+    {0.06},
+    {cuvs::distance::DistanceType::L2Expanded},
+    {false},
+    {1000000},
+    {100},
+    {10},
+    {cagra::search_algo::AUTO},
+    {10},
+    {32},
+    {1},
+    {0.2});
   inputs.insert(inputs.end(), inputs2.begin(), inputs2.end());
 
-  inputs2 =
-    raft::util::itertools::product<AnnVamanaInputs>({1000},
-                                                    {16, 32, 64, 128, 192, 256, 512, 1024},  // dim
-                                                    {128},       // graph degree
-                                                    {256, 512},  // visited_size
-                                                    {0.06, 0.1},
-                                                    {cuvs::distance::DistanceType::L2Expanded},
-                                                    {false},
-                                                    {100},
-                                                    {10},
-                                                    {cagra::search_algo::AUTO},
-                                                    {10},
-                                                    {64},
-                                                    {1},
-                                                    {0.2});
+  inputs2 = raft::util::itertools::product<AnnVamanaInputs>(
+    {1000},
+    {1, 3, 5, 7, 8, 17, 64, 128, 137, 192, 256, 512, 619, 1024},
+    {128},  // graph degree
+    {256},  // visited_size
+    {0.06},
+    {cuvs::distance::DistanceType::L2Expanded},
+    {false},
+    {1000000},
+    {100},
+    {10},
+    {cagra::search_algo::AUTO},
+    {10},
+    {64},
+    {1},
+    {0.2});
   inputs.insert(inputs.end(), inputs2.begin(), inputs2.end());
 
-  inputs2 =
-    raft::util::itertools::product<AnnVamanaInputs>({1000},
-                                                    {16, 32, 64, 128, 192, 256, 512, 1024},  // dim
-                                                    {256},        // graph degree
-                                                    {512, 1024},  // visited_size
-                                                    {0.06, 0.1},
-                                                    {cuvs::distance::DistanceType::L2Expanded},
-                                                    {false},
-                                                    {100},
-                                                    {10},
-                                                    {cagra::search_algo::AUTO},
-                                                    {10},
-                                                    {64},
-                                                    {1},
-                                                    {0.2});
+  inputs2 = raft::util::itertools::product<AnnVamanaInputs>(
+    {1000},
+    {1, 3, 5, 7, 8, 17, 64, 128, 137, 192, 256, 512, 619, 1024},
+    {256},        // graph degree
+    {512, 1024},  // visited_size
+    {0.06},
+    {cuvs::distance::DistanceType::L2Expanded},
+    {false},
+    {1000000},
+    {100},
+    {10},
+    {cagra::search_algo::AUTO},
+    {10},
+    {64},
+    {1},
+    {0.2});
   inputs.insert(inputs.end(), inputs2.begin(), inputs2.end());
 
   return inputs;
@@ -336,4 +340,4 @@ inline std::vector<AnnVamanaInputs> generate_inputs()
 
 const std::vector<AnnVamanaInputs> inputs = generate_inputs();
 
-}  // namespace cuvs::neighbors::experimental::vamana
+}  // namespace cuvs::neighbors::vamana
diff --git a/cpp/test/neighbors/ann_vamana/test_float_uint32_t.cu b/cpp/tests/neighbors/ann_vamana/test_float_uint32_t.cu
similarity index 89%
rename from cpp/test/neighbors/ann_vamana/test_float_uint32_t.cu
rename to cpp/tests/neighbors/ann_vamana/test_float_uint32_t.cu
index 9aa9da1b8..7b89b6544 100644
--- a/cpp/test/neighbors/ann_vamana/test_float_uint32_t.cu
+++ b/cpp/tests/neighbors/ann_vamana/test_float_uint32_t.cu
@@ -18,11 +18,11 @@
 
 #include "../ann_vamana.cuh"
 
-namespace cuvs::neighbors::experimental::vamana {
+namespace cuvs::neighbors::vamana {
 
 typedef AnnVamanaTest<float, float, std::uint32_t> AnnVamanaTestF_U32;
 TEST_P(AnnVamanaTestF_U32, AnnVamana) { this->testVamana(); }
 
 INSTANTIATE_TEST_CASE_P(AnnVamanaTest, AnnVamanaTestF_U32, ::testing::ValuesIn(inputs));
 
-}  // namespace cuvs::neighbors::experimental::vamana
+}  // namespace cuvs::neighbors::vamana
diff --git a/cpp/test/neighbors/ann_vamana/test_int8_t_uint32_t.cu b/cpp/tests/neighbors/ann_vamana/test_int8_t_uint32_t.cu
similarity index 89%
rename from cpp/test/neighbors/ann_vamana/test_int8_t_uint32_t.cu
rename to cpp/tests/neighbors/ann_vamana/test_int8_t_uint32_t.cu
index 0a6b563b2..843d2274a 100644
--- a/cpp/test/neighbors/ann_vamana/test_int8_t_uint32_t.cu
+++ b/cpp/tests/neighbors/ann_vamana/test_int8_t_uint32_t.cu
@@ -18,11 +18,11 @@
 
 #include "../ann_vamana.cuh"
 
-namespace cuvs::neighbors::experimental::vamana {
+namespace cuvs::neighbors::vamana {
 
 typedef AnnVamanaTest<float, int8_t, std::uint32_t> AnnVamanaTestI8_U32;
 TEST_P(AnnVamanaTestI8_U32, AnnVamana) { this->testVamana(); }
 
 INSTANTIATE_TEST_CASE_P(AnnVamanaTest, AnnVamanaTestI8_U32, ::testing::ValuesIn(inputs));
 
-}  // namespace cuvs::neighbors::experimental::vamana
+}  // namespace cuvs::neighbors::vamana
diff --git a/cpp/test/neighbors/ann_vamana/test_uint8_t_uint32_t.cu b/cpp/tests/neighbors/ann_vamana/test_uint8_t_uint32_t.cu
similarity index 89%
rename from cpp/test/neighbors/ann_vamana/test_uint8_t_uint32_t.cu
rename to cpp/tests/neighbors/ann_vamana/test_uint8_t_uint32_t.cu
index c0680dc18..f08db0c49 100644
--- a/cpp/test/neighbors/ann_vamana/test_uint8_t_uint32_t.cu
+++ b/cpp/tests/neighbors/ann_vamana/test_uint8_t_uint32_t.cu
@@ -18,11 +18,11 @@
 
 #include "../ann_vamana.cuh"
 
-namespace cuvs::neighbors::experimental::vamana {
+namespace cuvs::neighbors::vamana {
 
 typedef AnnVamanaTest<float, uint8_t, std::uint32_t> AnnVamanaTestU8_U32;
 TEST_P(AnnVamanaTestU8_U32, AnnVamana) { this->testVamana(); }
 
 INSTANTIATE_TEST_CASE_P(AnnVamanaTest, AnnVamanaTestU8_U32, ::testing::ValuesIn(inputs));
 
-}  // namespace cuvs::neighbors::experimental::vamana
+}  // namespace cuvs::neighbors::vamana
diff --git a/cpp/test/neighbors/brute_force.cu b/cpp/tests/neighbors/brute_force.cu
similarity index 100%
rename from cpp/test/neighbors/brute_force.cu
rename to cpp/tests/neighbors/brute_force.cu
diff --git a/cpp/test/neighbors/brute_force_c.cu b/cpp/tests/neighbors/brute_force_c.cu
similarity index 100%
rename from cpp/test/neighbors/brute_force_c.cu
rename to cpp/tests/neighbors/brute_force_c.cu
diff --git a/cpp/test/neighbors/brute_force_prefiltered.cu b/cpp/tests/neighbors/brute_force_prefiltered.cu
similarity index 54%
rename from cpp/test/neighbors/brute_force_prefiltered.cu
rename to cpp/tests/neighbors/brute_force_prefiltered.cu
index 12b1c529e..bf7dce7ee 100644
--- a/cpp/test/neighbors/brute_force_prefiltered.cu
+++ b/cpp/tests/neighbors/brute_force_prefiltered.cu
@@ -28,6 +28,7 @@
 #include <raft/random/rng_state.hpp>
 #include <raft/util/popc.cuh>
 
+#include <cusparse.h>
 #include <gtest/gtest.h>
 
 #include <cuda_fp16.h>
@@ -146,11 +147,27 @@ void set_bitmap(const index_t* src,
   RAFT_CUDA_TRY(cudaGetLastError());
 }
 
+bool isCuSparseVersionGreaterThan_12_0_1()
+{
+  int version;
+  cusparseHandle_t handle;
+  cusparseCreate(&handle);
+  cusparseGetVersion(handle, &version);
+
+  int major = version / 1000;
+  int minor = (version % 1000) / 100;
+  int patch = version % 100;
+
+  cusparseDestroy(handle);
+
+  return (major > 12) || (major == 12 && minor > 0) || (major == 12 && minor == 0 && patch >= 2);
+}
+
 template <typename value_t, typename dist_t, typename index_t, typename bitmap_t = uint32_t>
-class PrefilteredBruteForceTest
+class PrefilteredBruteForceOnBitmapTest
   : public ::testing::TestWithParam<PrefilteredBruteForceInputs<index_t>> {
  public:
-  PrefilteredBruteForceTest()
+  PrefilteredBruteForceOnBitmapTest()
     : stream(raft::resource::get_cuda_stream(handle)),
       params(::testing::TestWithParam<PrefilteredBruteForceInputs<index_t>>::GetParam()),
       filter_d(0, stream),
@@ -352,6 +369,9 @@ class PrefilteredBruteForceTest
 
   void SetUp() override
   {
+    if (std::is_same_v<value_t, half> && !isCuSparseVersionGreaterThan_12_0_1()) {
+      GTEST_SKIP() << "Skipping all tests for half-float as cuSparse doesn't support it.";
+    }
     index_t element =
       raft::ceildiv(params.n_queries * params.n_dataset, index_t(sizeof(bitmap_t) * 8));
     std::vector<bitmap_t> filter_h(element);
@@ -476,8 +496,6 @@ class PrefilteredBruteForceTest
     out_val_expected_d.resize(params.n_queries * params.top_k, stream);
     out_idx_expected_d.resize(params.n_queries * params.top_k, stream);
 
-    // dump_vector(out_val_h.data(), out_val_h.size(), "out_val_h");
-
     raft::update_device(out_val_expected_d.data(), out_val_h.data(), out_val_h.size(), stream);
     raft::update_device(out_idx_expected_d.data(), out_idx_h.data(), out_idx_h.size(), stream);
 
@@ -494,8 +512,8 @@ class PrefilteredBruteForceTest
 
     auto dataset = brute_force::build(handle, dataset_raw, params.metric);
 
-    auto filter = cuvs::core::bitmap_view<const bitmap_t, index_t>(
-      (const bitmap_t*)filter_d.data(), params.n_queries, params.n_dataset);
+    auto filter = cuvs::core::bitmap_view<bitmap_t, index_t>(
+      (bitmap_t*)filter_d.data(), params.n_queries, params.n_dataset);
 
     auto out_val = raft::make_device_matrix_view<dist_t, index_t, raft::row_major>(
       out_val_d.data(), params.n_queries, params.top_k);
@@ -544,11 +562,451 @@ class PrefilteredBruteForceTest
   rmm::device_uvector<index_t> out_idx_expected_d;
 };
 
-using PrefilteredBruteForceTest_float_int64 = PrefilteredBruteForceTest<float, float, int64_t>;
-TEST_P(PrefilteredBruteForceTest_float_int64, Result) { Run(); }
+template <typename value_t, typename dist_t, typename index_t, typename bitset_t = uint32_t>
+class PrefilteredBruteForceOnBitsetTest
+  : public ::testing::TestWithParam<PrefilteredBruteForceInputs<index_t>> {
+ public:
+  PrefilteredBruteForceOnBitsetTest()
+    : stream(raft::resource::get_cuda_stream(handle)),
+      params(::testing::TestWithParam<PrefilteredBruteForceInputs<index_t>>::GetParam()),
+      filter_d(0, stream),
+      dataset_d(0, stream),
+      queries_d(0, stream),
+      out_val_d(0, stream),
+      out_val_expected_d(0, stream),
+      out_idx_d(0, stream),
+      out_idx_expected_d(0, stream)
+  {
+  }
+
+ protected:
+  void repeat_cpu_bitset(std::vector<bitset_t>& input,
+                         size_t input_bits,
+                         size_t repeat,
+                         std::vector<bitset_t>& output)
+  {
+    const size_t output_bits  = input_bits * repeat;
+    const size_t output_units = (output_bits + sizeof(bitset_t) * 8 - 1) / (sizeof(bitset_t) * 8);
+
+    std::memset(output.data(), 0, output_units * sizeof(bitset_t));
+
+    size_t output_bit_index = 0;
+
+    for (size_t r = 0; r < repeat; ++r) {
+      for (size_t i = 0; i < input_bits; ++i) {
+        size_t input_unit_index = i / (sizeof(bitset_t) * 8);
+        size_t input_bit_offset = i % (sizeof(bitset_t) * 8);
+        bool bit                = (input[input_unit_index] >> input_bit_offset) & 1;
+
+        size_t output_unit_index = output_bit_index / (sizeof(bitset_t) * 8);
+        size_t output_bit_offset = output_bit_index % (sizeof(bitset_t) * 8);
+
+        output[output_unit_index] |= (static_cast<bitset_t>(bit) << output_bit_offset);
+
+        ++output_bit_index;
+      }
+    }
+  }
+
+  index_t create_sparse_matrix_with_rmat(index_t m,
+                                         index_t n,
+                                         float sparsity,
+                                         rmm::device_uvector<bitset_t>& filter_d)
+  {
+    index_t r_scale   = (index_t)std::log2(m);
+    index_t c_scale   = (index_t)std::log2(n);
+    index_t n_edges   = (index_t)(m * n * 1.0f * sparsity);
+    index_t max_scale = std::max(r_scale, c_scale);
+
+    rmm::device_uvector<index_t> out_src{(unsigned long)n_edges, stream};
+    rmm::device_uvector<index_t> out_dst{(unsigned long)n_edges, stream};
+    rmm::device_uvector<float> theta{(unsigned long)(4 * max_scale), stream};
+
+    raft::random::RngState state{2024ULL, raft::random::GeneratorType::GenPC};
+
+    raft::random::uniform<float>(handle, state, theta.data(), theta.size(), 0.0f, 1.0f);
+    normalize<float, float>(
+      theta.data(), theta.data(), max_scale, r_scale, c_scale, r_scale != c_scale, true, stream);
+    raft::random::rmat_rectangular_gen((index_t*)nullptr,
+                                       out_src.data(),
+                                       out_dst.data(),
+                                       theta.data(),
+                                       r_scale,
+                                       c_scale,
+                                       n_edges,
+                                       stream,
+                                       state);
+
+    index_t nnz_h = 0;
+    {
+      auto src    = out_src.data();
+      auto dst    = out_dst.data();
+      auto bitset = filter_d.data();
+      rmm::device_scalar<index_t> nnz(0, stream);
+      auto nnz_view = raft::make_device_scalar_view<index_t>(nnz.data());
+      auto filter_view =
+        raft::make_device_vector_view<const uint32_t, index_t>(filter_d.data(), filter_d.size());
+      index_t size_h = m * n;
+      auto size_view = raft::make_host_scalar_view<const index_t, index_t>(&size_h);
+
+      set_bitmap(src, dst, bitset, n_edges, n, stream);
+
+      raft::popc(handle, filter_view, size_view, nnz_view);
+      raft::copy(&nnz_h, nnz.data(), 1, stream);
+
+      raft::resource::sync_stream(handle, stream);
+    }
+
+    return nnz_h;
+  }
+
+  void cpu_convert_to_csr(std::vector<bitset_t>& bitset,
+                          index_t rows,
+                          index_t cols,
+                          std::vector<index_t>& indices,
+                          std::vector<index_t>& indptr)
+  {
+    index_t offset_indptr   = 0;
+    index_t offset_values   = 0;
+    indptr[offset_indptr++] = 0;
+
+    index_t index        = 0;
+    bitset_t element     = 0;
+    index_t bit_position = 0;
+
+    for (index_t i = 0; i < rows; ++i) {
+      for (index_t j = 0; j < cols; ++j) {
+        index        = i * cols + j;
+        element      = bitset[index / (8 * sizeof(bitset_t))];
+        bit_position = index % (8 * sizeof(bitset_t));
+
+        if (((element >> bit_position) & 1)) {
+          indices[offset_values] = static_cast<index_t>(j);
+          offset_values++;
+        }
+      }
+      indptr[offset_indptr++] = static_cast<index_t>(offset_values);
+    }
+  }
+
+  void cpu_sddmm(const std::vector<dist_t>& A,
+                 const std::vector<dist_t>& B,
+                 std::vector<dist_t>& vals,
+                 const std::vector<index_t>& cols,
+                 const std::vector<index_t>& row_ptrs,
+                 bool is_row_major_A,
+                 bool is_row_major_B,
+                 dist_t alpha = 1.0,
+                 dist_t beta  = 0.0)
+  {
+    if (params.n_queries * params.dim != static_cast<index_t>(A.size()) ||
+        params.dim * params.n_dataset != static_cast<index_t>(B.size())) {
+      std::cerr << "Matrix dimensions and vector size do not match!" << std::endl;
+      return;
+    }
 
-using PrefilteredBruteForceTest_half_int64 = PrefilteredBruteForceTest<half, float, int64_t>;
-TEST_P(PrefilteredBruteForceTest_half_int64, Result) { Run(); }
+    bool trans_a = is_row_major_A;
+    bool trans_b = is_row_major_B;
+
+    for (index_t i = 0; i < params.n_queries; ++i) {
+      for (index_t j = row_ptrs[i]; j < row_ptrs[i + 1]; ++j) {
+        dist_t sum     = 0;
+        dist_t norms_A = 0;
+        dist_t norms_B = 0;
+
+        for (index_t l = 0; l < params.dim; ++l) {
+          index_t a_index = trans_a ? i * params.dim + l : l * params.n_queries + i;
+          index_t b_index = trans_b ? l * params.n_dataset + cols[j] : cols[j] * params.dim + l;
+          dist_t A_v;
+          dist_t B_v;
+          if constexpr (sizeof(value_t) == 2) {
+            A_v = __half2float(__float2half(A[a_index]));
+            B_v = __half2float(__float2half(B[b_index]));
+          } else {
+            A_v = A[a_index];
+            B_v = B[b_index];
+          }
+
+          sum += A_v * B_v;
+
+          norms_A += A_v * A_v;
+          norms_B += B_v * B_v;
+        }
+        vals[j] = alpha * sum + beta * vals[j];
+        if (params.metric == cuvs::distance::DistanceType::L2Expanded) {
+          vals[j] = dist_t(-2.0) * vals[j] + norms_A + norms_B;
+        } else if (params.metric == cuvs::distance::DistanceType::L2SqrtExpanded) {
+          vals[j] = std::sqrt(dist_t(-2.0) * vals[j] + norms_A + norms_B);
+        } else if (params.metric == cuvs::distance::DistanceType::CosineExpanded) {
+          vals[j] = dist_t(1.0) - vals[j] / std::sqrt(norms_A * norms_B);
+        }
+      }
+    }
+  }
+
+  void cpu_select_k(const std::vector<index_t>& indptr_h,
+                    const std::vector<index_t>& indices_h,
+                    const std::vector<dist_t>& values_h,
+                    std::optional<std::vector<index_t>>& in_idx_h,
+                    index_t n_queries,
+                    index_t n_dataset,
+                    index_t top_k,
+                    std::vector<dist_t>& out_values_h,
+                    std::vector<index_t>& out_indices_h,
+                    bool select_min = true)
+  {
+    auto comp = [select_min](const std::pair<dist_t, index_t>& a,
+                             const std::pair<dist_t, index_t>& b) {
+      return select_min ? a.first < b.first : a.first >= b.first;
+    };
+
+    for (index_t row = 0; row < n_queries; ++row) {
+      std::priority_queue<std::pair<dist_t, index_t>,
+                          std::vector<std::pair<dist_t, index_t>>,
+                          decltype(comp)>
+        pq(comp);
+      for (index_t idx = indptr_h[row]; idx < indptr_h[row + 1]; ++idx) {
+        pq.push({values_h[idx], (in_idx_h.has_value()) ? (*in_idx_h)[idx] : indices_h[idx]});
+        if (pq.size() > size_t(top_k)) { pq.pop(); }
+      }
+
+      std::vector<std::pair<dist_t, index_t>> row_pairs;
+      while (!pq.empty()) {
+        row_pairs.push_back(pq.top());
+        pq.pop();
+      }
+
+      if (select_min) {
+        std::sort(row_pairs.begin(), row_pairs.end(), [](const auto& a, const auto& b) {
+          return a.first <= b.first;
+        });
+      } else {
+        std::sort(row_pairs.begin(), row_pairs.end(), [](const auto& a, const auto& b) {
+          return a.first >= b.first;
+        });
+      }
+      for (index_t col = 0; col < top_k; col++) {
+        if (col < index_t(row_pairs.size())) {
+          out_values_h[row * top_k + col]  = row_pairs[col].first;
+          out_indices_h[row * top_k + col] = row_pairs[col].second;
+        }
+      }
+    }
+  }
+
+  void SetUp() override
+  {
+    if (std::is_same_v<value_t, half> && !isCuSparseVersionGreaterThan_12_0_1()) {
+      GTEST_SKIP() << "Skipping all tests for half-float as cuSparse doesn't support it.";
+    }
+    index_t element = raft::ceildiv(1 * params.n_dataset, index_t(sizeof(bitset_t) * 8));
+    std::vector<bitset_t> filter_h(element);
+    std::vector<bitset_t> filter_repeat_h(element * params.n_queries);
+
+    filter_d.resize(element, stream);
+
+    nnz = create_sparse_matrix_with_rmat(1, params.n_dataset, params.sparsity, filter_d);
+    raft::update_host(filter_h.data(), filter_d.data(), filter_d.size(), stream);
+    raft::resource::sync_stream(handle, stream);
+
+    repeat_cpu_bitset(
+      filter_h, size_t(params.n_dataset), size_t(params.n_queries), filter_repeat_h);
+    nnz *= params.n_queries;
+
+    index_t dataset_size = params.n_dataset * params.dim;
+    index_t queries_size = params.n_queries * params.dim;
+
+    std::vector<dist_t> dataset_h(dataset_size);
+    std::vector<dist_t> queries_h(queries_size);
+
+    dataset_d.resize(dataset_size, stream);
+    queries_d.resize(queries_size, stream);
+
+    auto blobs_in_val =
+      raft::make_device_matrix<dist_t, index_t>(handle, 1, dataset_size + queries_size);
+    auto labels = raft::make_device_vector<index_t, index_t>(handle, 1);
+
+    if constexpr (!std::is_same_v<value_t, half>) {
+      raft::random::make_blobs<value_t, index_t>(blobs_in_val.data_handle(),
+                                                 labels.data_handle(),
+                                                 1,
+                                                 dataset_size + queries_size,
+                                                 1,
+                                                 stream,
+                                                 false,
+                                                 nullptr,
+                                                 nullptr,
+                                                 value_t(1.0),
+                                                 false,
+                                                 value_t(-1.0f),
+                                                 value_t(1.0f),
+                                                 uint64_t(2024));
+    } else {
+      raft::random::make_blobs<dist_t, index_t>(blobs_in_val.data_handle(),
+                                                labels.data_handle(),
+                                                1,
+                                                dataset_size + queries_size,
+                                                1,
+                                                stream,
+                                                false,
+                                                nullptr,
+                                                nullptr,
+                                                dist_t(1.0),
+                                                false,
+                                                dist_t(-1.0f),
+                                                dist_t(1.0f),
+                                                uint64_t(2024));
+    }
+
+    raft::copy(dataset_h.data(), blobs_in_val.data_handle(), dataset_size, stream);
+
+    if constexpr (std::is_same_v<value_t, half>) {
+      thrust::device_ptr<dist_t> d_output_ptr =
+        thrust::device_pointer_cast(blobs_in_val.data_handle());
+      thrust::device_ptr<value_t> d_value_ptr = thrust::device_pointer_cast(dataset_d.data());
+      thrust::transform(thrust::cuda::par.on(stream),
+                        d_output_ptr,
+                        d_output_ptr + dataset_size,
+                        d_value_ptr,
+                        float_to_half());
+    } else {
+      raft::copy(dataset_d.data(), blobs_in_val.data_handle(), dataset_size, stream);
+    }
+
+    raft::copy(queries_h.data(), blobs_in_val.data_handle() + dataset_size, queries_size, stream);
+    if constexpr (std::is_same_v<value_t, half>) {
+      thrust::device_ptr<dist_t> d_output_ptr =
+        thrust::device_pointer_cast(blobs_in_val.data_handle() + dataset_size);
+      thrust::device_ptr<value_t> d_value_ptr = thrust::device_pointer_cast(queries_d.data());
+      thrust::transform(thrust::cuda::par.on(stream),
+                        d_output_ptr,
+                        d_output_ptr + queries_size,
+                        d_value_ptr,
+                        float_to_half());
+    } else {
+      raft::copy(queries_d.data(), blobs_in_val.data_handle() + dataset_size, queries_size, stream);
+    }
+
+    raft::resource::sync_stream(handle);
+
+    std::vector<dist_t> values_h(nnz);
+    std::vector<index_t> indices_h(nnz);
+    std::vector<index_t> indptr_h(params.n_queries + 1);
+
+    cpu_convert_to_csr(filter_repeat_h, params.n_queries, params.n_dataset, indices_h, indptr_h);
+
+    cpu_sddmm(queries_h, dataset_h, values_h, indices_h, indptr_h, true, false);
+
+    bool select_min = cuvs::distance::is_min_close(params.metric);
+
+    std::vector<dist_t> out_val_h(
+      params.n_queries * params.top_k,
+      select_min ? std::numeric_limits<dist_t>::infinity() : std::numeric_limits<dist_t>::lowest());
+    std::vector<index_t> out_idx_h(params.n_queries * params.top_k, static_cast<index_t>(0));
+
+    out_val_d.resize(params.n_queries * params.top_k, stream);
+    out_idx_d.resize(params.n_queries * params.top_k, stream);
+
+    raft::update_device(out_val_d.data(), out_val_h.data(), out_val_h.size(), stream);
+    raft::update_device(out_idx_d.data(), out_idx_h.data(), out_idx_h.size(), stream);
+
+    raft::resource::sync_stream(handle);
+
+    std::optional<std::vector<index_t>> optional_indices_h = std::nullopt;
+    cpu_select_k(indptr_h,
+                 indices_h,
+                 values_h,
+                 optional_indices_h,
+                 params.n_queries,
+                 params.n_dataset,
+                 params.top_k,
+                 out_val_h,
+                 out_idx_h,
+                 select_min);
+    out_val_expected_d.resize(params.n_queries * params.top_k, stream);
+    out_idx_expected_d.resize(params.n_queries * params.top_k, stream);
+
+    raft::update_device(out_val_expected_d.data(), out_val_h.data(), out_val_h.size(), stream);
+    raft::update_device(out_idx_expected_d.data(), out_idx_h.data(), out_idx_h.size(), stream);
+
+    raft::resource::sync_stream(handle);
+  }
+
+  void Run()
+  {
+    auto dataset_raw = raft::make_device_matrix_view<const value_t, index_t, raft::row_major>(
+      (const value_t*)dataset_d.data(), params.n_dataset, params.dim);
+
+    auto queries = raft::make_device_matrix_view<const value_t, index_t, raft::row_major>(
+      (const value_t*)queries_d.data(), params.n_queries, params.dim);
+
+    auto dataset = brute_force::build(handle, dataset_raw, params.metric);
+
+    auto filter =
+      cuvs::core::bitset_view<bitset_t, index_t>((bitset_t*)filter_d.data(), params.n_dataset);
+
+    auto out_val = raft::make_device_matrix_view<dist_t, index_t, raft::row_major>(
+      out_val_d.data(), params.n_queries, params.top_k);
+    auto out_idx = raft::make_device_matrix_view<index_t, index_t, raft::row_major>(
+      out_idx_d.data(), params.n_queries, params.top_k);
+
+    brute_force::search(handle,
+                        dataset,
+                        queries,
+                        out_idx,
+                        out_val,
+                        cuvs::neighbors::filtering::bitset_filter(filter));
+    std::vector<dist_t> out_val_h(params.n_queries * params.top_k,
+                                  std::numeric_limits<dist_t>::infinity());
+
+    raft::update_host(out_val_h.data(), out_val_d.data(), out_val_h.size(), stream);
+    raft::resource::sync_stream(handle);
+
+    ASSERT_TRUE(cuvs::neighbors::devArrMatchKnnPair(out_idx_expected_d.data(),
+                                                    out_idx.data_handle(),
+                                                    out_val_expected_d.data(),
+                                                    out_val.data_handle(),
+                                                    params.n_queries,
+                                                    params.top_k,
+                                                    0.001f,
+                                                    stream,
+                                                    true));
+  }
+
+ protected:
+  raft::resources handle;
+  cudaStream_t stream;
+
+  PrefilteredBruteForceInputs<index_t> params;
+
+  index_t nnz;
+
+  rmm::device_uvector<value_t> dataset_d;
+  rmm::device_uvector<value_t> queries_d;
+  rmm::device_uvector<bitset_t> filter_d;
+
+  rmm::device_uvector<dist_t> out_val_d;
+  rmm::device_uvector<dist_t> out_val_expected_d;
+
+  rmm::device_uvector<index_t> out_idx_d;
+  rmm::device_uvector<index_t> out_idx_expected_d;
+};
+
+using PrefilteredBruteForceTestOnBitmap_float_int64 =
+  PrefilteredBruteForceOnBitmapTest<float, float, int64_t>;
+TEST_P(PrefilteredBruteForceTestOnBitmap_float_int64, Result) { Run(); }
+
+using PrefilteredBruteForceTestOnBitmap_half_int64 =
+  PrefilteredBruteForceOnBitmapTest<half, float, int64_t>;
+TEST_P(PrefilteredBruteForceTestOnBitmap_half_int64, Result) { Run(); }
+
+using PrefilteredBruteForceTestOnBitset_float_int64 =
+  PrefilteredBruteForceOnBitsetTest<float, float, int64_t>;
+TEST_P(PrefilteredBruteForceTestOnBitset_float_int64, Result) { Run(); }
+
+using PrefilteredBruteForceTestOnBitset_half_int64 =
+  PrefilteredBruteForceOnBitsetTest<half, float, int64_t>;
+TEST_P(PrefilteredBruteForceTestOnBitset_half_int64, Result) { Run(); }
 
 template <typename index_t>
 const std::vector<PrefilteredBruteForceInputs<index_t>> selectk_inputs = {
@@ -570,7 +1028,7 @@ const std::vector<PrefilteredBruteForceInputs<index_t>> selectk_inputs = {
   {1024, 8192, 5, 0, 0.1, cuvs::distance::DistanceType::L2SqrtExpanded},
   {1024, 8192, 8, 0, 0.1, cuvs::distance::DistanceType::CosineExpanded},
 
-  {1024, 8192, 1, 1, 0.1, cuvs::distance::DistanceType::L2Expanded},  //--
+  {1024, 8192, 1, 1, 0.1, cuvs::distance::DistanceType::L2Expanded},
   {1024, 8192, 3, 1, 0.1, cuvs::distance::DistanceType::InnerProduct},
   {1024, 8192, 5, 1, 0.1, cuvs::distance::DistanceType::L2SqrtExpanded},
   {1024, 8192, 8, 1, 0.1, cuvs::distance::DistanceType::CosineExpanded},
@@ -599,12 +1057,20 @@ const std::vector<PrefilteredBruteForceInputs<index_t>> selectk_inputs = {
   {1024, 8192, 5, 16, 0.5, cuvs::distance::DistanceType::CosineExpanded},
   {1024, 8192, 8, 16, 0.2, cuvs::distance::DistanceType::CosineExpanded}};
 
-INSTANTIATE_TEST_CASE_P(PrefilteredBruteForceTest,
-                        PrefilteredBruteForceTest_float_int64,
+INSTANTIATE_TEST_CASE_P(PrefilteredBruteForceOnBitmapTest,
+                        PrefilteredBruteForceTestOnBitmap_float_int64,
+                        ::testing::ValuesIn(selectk_inputs<int64_t>));
+
+INSTANTIATE_TEST_CASE_P(PrefilteredBruteForceOnBitmapTest,
+                        PrefilteredBruteForceTestOnBitmap_half_int64,
+                        ::testing::ValuesIn(selectk_inputs<int64_t>));
+
+INSTANTIATE_TEST_CASE_P(PrefilteredBruteForceOnBitsetTest,
+                        PrefilteredBruteForceTestOnBitset_float_int64,
                         ::testing::ValuesIn(selectk_inputs<int64_t>));
 
-INSTANTIATE_TEST_CASE_P(PrefilteredBruteForceTest,
-                        PrefilteredBruteForceTest_half_int64,
+INSTANTIATE_TEST_CASE_P(PrefilteredBruteForceOnBitsetTest,
+                        PrefilteredBruteForceTestOnBitset_half_int64,
                         ::testing::ValuesIn(selectk_inputs<int64_t>));
 
 }  // namespace cuvs::neighbors::brute_force
diff --git a/cpp/test/neighbors/c_api.c b/cpp/tests/neighbors/c_api.c
similarity index 100%
rename from cpp/test/neighbors/c_api.c
rename to cpp/tests/neighbors/c_api.c
diff --git a/cpp/test/neighbors/dynamic_batching.cuh b/cpp/tests/neighbors/dynamic_batching.cuh
similarity index 100%
rename from cpp/test/neighbors/dynamic_batching.cuh
rename to cpp/tests/neighbors/dynamic_batching.cuh
diff --git a/cpp/test/neighbors/dynamic_batching/test_brute_force.cu b/cpp/tests/neighbors/dynamic_batching/test_brute_force.cu
similarity index 100%
rename from cpp/test/neighbors/dynamic_batching/test_brute_force.cu
rename to cpp/tests/neighbors/dynamic_batching/test_brute_force.cu
diff --git a/cpp/test/neighbors/dynamic_batching/test_cagra.cu b/cpp/tests/neighbors/dynamic_batching/test_cagra.cu
similarity index 100%
rename from cpp/test/neighbors/dynamic_batching/test_cagra.cu
rename to cpp/tests/neighbors/dynamic_batching/test_cagra.cu
diff --git a/cpp/test/neighbors/dynamic_batching/test_ivf_flat.cu b/cpp/tests/neighbors/dynamic_batching/test_ivf_flat.cu
similarity index 100%
rename from cpp/test/neighbors/dynamic_batching/test_ivf_flat.cu
rename to cpp/tests/neighbors/dynamic_batching/test_ivf_flat.cu
diff --git a/cpp/test/neighbors/dynamic_batching/test_ivf_pq.cu b/cpp/tests/neighbors/dynamic_batching/test_ivf_pq.cu
similarity index 100%
rename from cpp/test/neighbors/dynamic_batching/test_ivf_pq.cu
rename to cpp/tests/neighbors/dynamic_batching/test_ivf_pq.cu
diff --git a/cpp/test/neighbors/hnsw.cu b/cpp/tests/neighbors/hnsw.cu
similarity index 100%
rename from cpp/test/neighbors/hnsw.cu
rename to cpp/tests/neighbors/hnsw.cu
diff --git a/cpp/test/neighbors/knn_utils.cuh b/cpp/tests/neighbors/knn_utils.cuh
similarity index 100%
rename from cpp/test/neighbors/knn_utils.cuh
rename to cpp/tests/neighbors/knn_utils.cuh
diff --git a/cpp/test/neighbors/mg.cuh b/cpp/tests/neighbors/mg.cuh
similarity index 100%
rename from cpp/test/neighbors/mg.cuh
rename to cpp/tests/neighbors/mg.cuh
diff --git a/cpp/test/neighbors/mg/test_float.cu b/cpp/tests/neighbors/mg/test_float.cu
similarity index 100%
rename from cpp/test/neighbors/mg/test_float.cu
rename to cpp/tests/neighbors/mg/test_float.cu
diff --git a/cpp/test/neighbors/naive_knn.cuh b/cpp/tests/neighbors/naive_knn.cuh
similarity index 100%
rename from cpp/test/neighbors/naive_knn.cuh
rename to cpp/tests/neighbors/naive_knn.cuh
diff --git a/cpp/test/neighbors/refine.cu b/cpp/tests/neighbors/refine.cu
similarity index 100%
rename from cpp/test/neighbors/refine.cu
rename to cpp/tests/neighbors/refine.cu
diff --git a/cpp/test/neighbors/refine_helper.cuh b/cpp/tests/neighbors/refine_helper.cuh
similarity index 100%
rename from cpp/test/neighbors/refine_helper.cuh
rename to cpp/tests/neighbors/refine_helper.cuh
diff --git a/cpp/test/neighbors/run_brute_force_c.c b/cpp/tests/neighbors/run_brute_force_c.c
similarity index 100%
rename from cpp/test/neighbors/run_brute_force_c.c
rename to cpp/tests/neighbors/run_brute_force_c.c
diff --git a/cpp/test/neighbors/run_ivf_flat_c.c b/cpp/tests/neighbors/run_ivf_flat_c.c
similarity index 100%
rename from cpp/test/neighbors/run_ivf_flat_c.c
rename to cpp/tests/neighbors/run_ivf_flat_c.c
diff --git a/cpp/test/neighbors/run_ivf_pq_c.c b/cpp/tests/neighbors/run_ivf_pq_c.c
similarity index 100%
rename from cpp/test/neighbors/run_ivf_pq_c.c
rename to cpp/tests/neighbors/run_ivf_pq_c.c
diff --git a/cpp/test/neighbors/sparse_brute_force.cu b/cpp/tests/neighbors/sparse_brute_force.cu
similarity index 100%
rename from cpp/test/neighbors/sparse_brute_force.cu
rename to cpp/tests/neighbors/sparse_brute_force.cu
diff --git a/cpp/test/preprocessing/scalar_quantization.cu b/cpp/tests/preprocessing/scalar_quantization.cu
similarity index 100%
rename from cpp/test/preprocessing/scalar_quantization.cu
rename to cpp/tests/preprocessing/scalar_quantization.cu
diff --git a/cpp/test/sparse/cluster/cluster_solvers.cu b/cpp/tests/sparse/cluster/cluster_solvers.cu
similarity index 100%
rename from cpp/test/sparse/cluster/cluster_solvers.cu
rename to cpp/tests/sparse/cluster/cluster_solvers.cu
diff --git a/cpp/test/sparse/cluster/eigen_solvers.cu b/cpp/tests/sparse/cluster/eigen_solvers.cu
similarity index 100%
rename from cpp/test/sparse/cluster/eigen_solvers.cu
rename to cpp/tests/sparse/cluster/eigen_solvers.cu
diff --git a/cpp/test/sparse/cluster/spectral.cu b/cpp/tests/sparse/cluster/spectral.cu
similarity index 99%
rename from cpp/test/sparse/cluster/spectral.cu
rename to cpp/tests/sparse/cluster/spectral.cu
index 7d0cdef9d..98b963451 100644
--- a/cpp/test/sparse/cluster/spectral.cu
+++ b/cpp/tests/sparse/cluster/spectral.cu
@@ -106,4 +106,4 @@ TEST(Raft, Spectral)
 }
 
 }  // namespace cluster
-}  // namespace cuvs
\ No newline at end of file
+}  // namespace cuvs
diff --git a/cpp/test/sparse/cluster/spectral_matrix.cu b/cpp/tests/sparse/cluster/spectral_matrix.cu
similarity index 100%
rename from cpp/test/sparse/cluster/spectral_matrix.cu
rename to cpp/tests/sparse/cluster/spectral_matrix.cu
diff --git a/cpp/test/sparse/gram.cu b/cpp/tests/sparse/gram.cu
similarity index 99%
rename from cpp/test/sparse/gram.cu
rename to cpp/tests/sparse/gram.cu
index d7af30a1c..c3ce99817 100644
--- a/cpp/test/sparse/gram.cu
+++ b/cpp/tests/sparse/gram.cu
@@ -327,4 +327,4 @@ INSTANTIATE_TEST_SUITE_P(GramMatrixTests, GramMatrixTestFloatLd, ::testing::Valu
 INSTANTIATE_TEST_SUITE_P(GramMatrixTests,
                          GramMatrixTestFloatLdCsr,
                          ::testing::ValuesIn(inputs_ld_csr));
-};  // namespace cuvs::distance::kernels::sparse
\ No newline at end of file
+};  // namespace cuvs::distance::kernels::sparse
diff --git a/cpp/test/sparse/neighbors/cross_component_nn.cu b/cpp/tests/sparse/neighbors/cross_component_nn.cu
similarity index 100%
rename from cpp/test/sparse/neighbors/cross_component_nn.cu
rename to cpp/tests/sparse/neighbors/cross_component_nn.cu
diff --git a/cpp/test/stats/silhouette_score.cu b/cpp/tests/stats/silhouette_score.cu
similarity index 100%
rename from cpp/test/stats/silhouette_score.cu
rename to cpp/tests/stats/silhouette_score.cu
diff --git a/cpp/test/stats/trustworthiness.cu b/cpp/tests/stats/trustworthiness.cu
similarity index 100%
rename from cpp/test/stats/trustworthiness.cu
rename to cpp/tests/stats/trustworthiness.cu
diff --git a/cpp/test/test.cpp b/cpp/tests/test.cpp
similarity index 100%
rename from cpp/test/test.cpp
rename to cpp/tests/test.cpp
diff --git a/cpp/test/test_utils.cuh b/cpp/tests/test_utils.cuh
similarity index 99%
rename from cpp/test/test_utils.cuh
rename to cpp/tests/test_utils.cuh
index d0b2d6bda..022202ffb 100644
--- a/cpp/test/test_utils.cuh
+++ b/cpp/tests/test_utils.cuh
@@ -327,4 +327,4 @@ inline std::vector<float> read_csv(std::string filename, bool skip_first_n_colum
   return result;
 }
 
-};  // end namespace cuvs
\ No newline at end of file
+};  // end namespace cuvs
diff --git a/cpp/test/test_utils.h b/cpp/tests/test_utils.h
similarity index 100%
rename from cpp/test/test_utils.h
rename to cpp/tests/test_utils.h
diff --git a/dependencies.yaml b/dependencies.yaml
index 8ba4b743f..186e14841 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -3,7 +3,7 @@ files:
   all:
     output: conda
     matrix:
-      cuda: ["11.8", "12.5"]
+      cuda: ["11.8", "12.8"]
       arch: [x86_64, aarch64]
     includes:
       - build
@@ -27,7 +27,7 @@ files:
   bench_ann:
     output: conda
     matrix:
-      cuda: ["11.8", "12.5"]
+      cuda: ["11.8", "12.8"]
       arch: [x86_64, aarch64]
     includes:
       - bench
@@ -132,6 +132,7 @@ files:
     extras:
       table: project
     includes:
+      - depends_on_libcuvs
       - depends_on_pylibraft
       - run_py_cuvs
   py_test_cuvs:
@@ -318,6 +319,10 @@ dependencies:
               cuda: "12.5"
             packages:
               - cuda-version=12.5
+          - matrix:
+              cuda: "12.8"
+            packages:
+              - cuda-version=12.8
   cuda:
     specific:
       - output_types: conda
diff --git a/docs/README.md b/docs/README.md
index cd0fe0a14..639961ea3 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -11,4 +11,4 @@ bash build.sh docs
 #### Once the process finishes, documentation can be found in build/html
 ```shell script
 xdg-open build/html/index.html`
-```
\ No newline at end of file
+```
diff --git a/docs/source/_static/references.css b/docs/source/_static/references.css
index 225cf13ba..d1f647233 100644
--- a/docs/source/_static/references.css
+++ b/docs/source/_static/references.css
@@ -20,4 +20,4 @@ dl.citation > dt.label > span::before {
 /* Add closing bracket */
 dl.citation > dt.label > span::after {
   content: "]";
-}
\ No newline at end of file
+}
diff --git a/docs/source/api_basics.rst b/docs/source/api_basics.rst
index 4842e2788..d3e2c8a6f 100644
--- a/docs/source/api_basics.rst
+++ b/docs/source/api_basics.rst
@@ -87,4 +87,4 @@ Rust
 
 .. code-block:: rust
 
-    let res = cuvs::Resources::new()?;
\ No newline at end of file
+    let res = cuvs::Resources::new()?;
diff --git a/docs/source/api_docs.rst b/docs/source/api_docs.rst
index f4deef506..68d184c72 100644
--- a/docs/source/api_docs.rst
+++ b/docs/source/api_docs.rst
@@ -10,4 +10,4 @@ API Reference
    rust_api/index.rst
 
 * :ref:`genindex`
-* :ref:`search`
\ No newline at end of file
+* :ref:`search`
diff --git a/docs/source/api_interoperability.rst b/docs/source/api_interoperability.rst
index 4dfe1e09a..519660036 100644
--- a/docs/source/api_interoperability.rst
+++ b/docs/source/api_interoperability.rst
@@ -48,10 +48,10 @@ The `mdarray` is an owning object that forms a convenience layer over RMM and ca
 .. code-block:: c++
 
     #include <raft/core/device_mdarray.hpp>
-    
+
     int n_rows = 10;
     int n_cols = 10;
-    
+
     auto scalar = raft::make_device_scalar<float>(handle, 1.0);
     auto vector = raft::make_device_vector<float>(handle, n_cols);
     auto matrix = raft::make_device_matrix<float>(handle, n_rows, n_cols);
diff --git a/docs/source/build.rst b/docs/source/build.rst
index a6b8ccd13..5d1b8e354 100644
--- a/docs/source/build.rst
+++ b/docs/source/build.rst
@@ -43,14 +43,14 @@ C/C++ Package
 
 .. code-block:: bash
 
-   conda install -c rapidsai -c conda-forge -c nvidia libcuvs cuda-version=12.5
+   conda install -c rapidsai -c conda-forge -c nvidia libcuvs cuda-version=12.8
 
 Python Package
 ~~~~~~~~~~~~~~
 
 .. code-block:: bash
 
-   conda install -c rapidsai -c conda-forge -c nvidia cuvs cuda-version=12.5
+   conda install -c rapidsai -c conda-forge -c nvidia cuvs cuda-version=12.8
 
 Python through Pip
 ^^^^^^^^^^^^^^^^^^
@@ -92,7 +92,7 @@ Conda environment scripts are provided for installing the necessary dependencies
 
 .. code-block:: bash
 
-    conda env create --name cuvs -f conda/environments/all_cuda-125_arch-x86_64.yaml
+    conda env create --name cuvs -f conda/environments/all_cuda-128_arch-x86_64.yaml
     conda activate cuvs
 
 The process for building from source with CUDA 11 differs slightly in that your host system will also need to have CUDA toolkit installed which is greater than, or equal to, the version you install into you conda environment. Installing CUDA toolkit into your host system is necessary because `nvcc` is not provided with Conda's cudatoolkit dependencies for CUDA 11. The following example will install create and install dependencies for a CUDA 11.8 conda environment
diff --git a/docs/source/comparing_indexes.rst b/docs/source/comparing_indexes.rst
index 221aab6d7..167aa2e07 100644
--- a/docs/source/comparing_indexes.rst
+++ b/docs/source/comparing_indexes.rst
@@ -57,4 +57,4 @@ It turns out that most vector databases, like Milvus for example, make many smal
 
 Please note, however, that there are often caps on the size of each of these smaller indexes, and that needs to be taken into consideration when choosing the size of the sub sample to tune.
 
-Please see :doc:`this guide <tuning_guide>` for more information on the steps one would take to do this subsampling and tuning process.
\ No newline at end of file
+Please see :doc:`this guide <tuning_guide>` for more information on the steps one would take to do this subsampling and tuning process.
diff --git a/docs/source/contributing.md b/docs/source/contributing.md
index c426ce534..1a58da4d7 100755
--- a/docs/source/contributing.md
+++ b/docs/source/contributing.md
@@ -89,5 +89,3 @@ implementation of the issue, ask them in the issue instead of the PR.
 
 ## Attribution
 Portions adopted from https://github.com/pytorch/pytorch/blob/master/CONTRIBUTING.md
-
-
diff --git a/docs/source/cpp_api/cluster.rst b/docs/source/cpp_api/cluster.rst
index 3e3e627f7..3de663a21 100644
--- a/docs/source/cpp_api/cluster.rst
+++ b/docs/source/cpp_api/cluster.rst
@@ -11,4 +11,3 @@ Cluster
 
    cluster_kmeans.rst
    cluster_agglomerative.rst
-
diff --git a/docs/source/cpp_api/cluster_kmeans.rst b/docs/source/cpp_api/cluster_kmeans.rst
index 3ff8a03e5..6c342baab 100644
--- a/docs/source/cpp_api/cluster_kmeans.rst
+++ b/docs/source/cpp_api/cluster_kmeans.rst
@@ -41,4 +41,4 @@ namespace *cuvs::cluster::kmeans::helpers*
 .. doxygengroup:: kmeans_helpers
     :project: cuvs
     :members:
-    :content-only:
\ No newline at end of file
+    :content-only:
diff --git a/docs/source/cpp_api/neighbors.rst b/docs/source/cpp_api/neighbors.rst
index ab810ab53..ff1566e6c 100644
--- a/docs/source/cpp_api/neighbors.rst
+++ b/docs/source/cpp_api/neighbors.rst
@@ -12,9 +12,11 @@ Nearest Neighbors
    neighbors_bruteforce.rst
    neighbors_cagra.rst
    neighbors_dynamic_batching.rst
+   neighbors_filter.rst
    neighbors_hnsw.rst
    neighbors_ivf_flat.rst
    neighbors_ivf_pq.rst
    neighbors_nn_descent.rst
    neighbors_refine.rst
    neighbors_mg.rst
+   neighbors_vamana.rst
diff --git a/docs/source/cpp_api/neighbors_bruteforce.rst b/docs/source/cpp_api/neighbors_bruteforce.rst
index f75e26b3c..1a3f2f715 100644
--- a/docs/source/cpp_api/neighbors_bruteforce.rst
+++ b/docs/source/cpp_api/neighbors_bruteforce.rst
@@ -7,7 +7,7 @@ The bruteforce method is running the KNN algorithm. It performs an extensive sea
    :language: c++
    :class: highlight
 
-``#include <cuvs/neighbors/bruteforce.hpp>``
+``#include <cuvs/neighbors/brute_force.hpp>``
 
 namespace *cuvs::neighbors::bruteforce*
 
diff --git a/docs/source/cpp_api/neighbors_filter.rst b/docs/source/cpp_api/neighbors_filter.rst
new file mode 100644
index 000000000..aba1d348f
--- /dev/null
+++ b/docs/source/cpp_api/neighbors_filter.rst
@@ -0,0 +1,18 @@
+Filtering
+==========
+
+All nearest neighbors search methods support filtering. Filtering is a method to reduce the number
+of candidates that are considered for the nearest neighbors search.
+
+.. role:: py(code)
+   :language: c++
+   :class: highlight
+
+``#include <cuvs/neighbors/common.hpp>``
+
+namespace *cuvs::neighbors*
+
+.. doxygengroup:: neighbors_filtering
+    :project: cuvs
+    :members:
+    :content-only:
diff --git a/docs/source/cpp_api/neighbors_nn_descent.rst b/docs/source/cpp_api/neighbors_nn_descent.rst
index edecdfa8d..c21a1003d 100644
--- a/docs/source/cpp_api/neighbors_nn_descent.rst
+++ b/docs/source/cpp_api/neighbors_nn_descent.rst
@@ -34,4 +34,4 @@ Index build
 .. doxygengroup:: nn_descent_cpp_index_build
     :project: cuvs
     :members:
-    :content-only:
\ No newline at end of file
+    :content-only:
diff --git a/docs/source/cpp_api/neighbors_refine.rst b/docs/source/cpp_api/neighbors_refine.rst
index d53c8087c..4a90ee995 100644
--- a/docs/source/cpp_api/neighbors_refine.rst
+++ b/docs/source/cpp_api/neighbors_refine.rst
@@ -17,4 +17,4 @@ Index
 .. doxygengroup:: ann_refine
     :project: cuvs
     :members:
-    :content-only:
\ No newline at end of file
+    :content-only:
diff --git a/docs/source/cpp_api/neighbors_vamana.rst b/docs/source/cpp_api/neighbors_vamana.rst
new file mode 100644
index 000000000..25447efce
--- /dev/null
+++ b/docs/source/cpp_api/neighbors_vamana.rst
@@ -0,0 +1,44 @@
+Vamana
+======
+
+Vamana is the graph construction algorithm behind the well-known DiskANN vector search solution. The cuVS implementation of Vamana/DiskANN is a custom GPU-acceleration version of the algorithm that aims to reduce index construction time using NVIDIA GPUs.
+
+.. role:: py(code)
+   :language: c++
+   :class: highlight
+
+``#include <cuvs/neighbors/vamana.hpp>``
+
+namespace *cuvs::neighbors::vamana*
+
+Index build parameters
+----------------------
+
+.. doxygengroup:: vamana_cpp_index_params
+    :project: cuvs
+    :members:
+    :content-only:
+
+Index
+-----
+
+.. doxygengroup:: vamana_cpp_index
+    :project: cuvs
+    :members:
+    :content-only:
+
+Index build
+-----------
+
+.. doxygengroup:: vamana_cpp_index_build
+    :project: cuvs
+    :members:
+    :content-only:
+
+Index serialize
+---------------
+
+.. doxygengroup:: vamana_cpp_serialize
+    :project: cuvs
+    :members:
+    :content-only:
diff --git a/docs/source/cpp_api/preprocessing_quantize.rst b/docs/source/cpp_api/preprocessing_quantize.rst
index b660c61c5..8a08fb98b 100644
--- a/docs/source/cpp_api/preprocessing_quantize.rst
+++ b/docs/source/cpp_api/preprocessing_quantize.rst
@@ -2,7 +2,7 @@ Quantize
 ========
 
 This page provides C++ class references for the publicly-exposed elements of the
-`cuvs/preprocessing/quantize` package. 
+`cuvs/preprocessing/quantize` package.
 
 .. role:: py(code)
    :language: c++
@@ -17,4 +17,3 @@ namespace *cuvs::preprocessing::quantize::scalar*
 
 .. doxygengroup:: scalar
    :project: cuvs
-
diff --git a/docs/source/cpp_api/stats.rst b/docs/source/cpp_api/stats.rst
index 80d6c65fc..988ba05df 100644
--- a/docs/source/cpp_api/stats.rst
+++ b/docs/source/cpp_api/stats.rst
@@ -32,4 +32,3 @@ namespace *cuvs::stats*
     :project: cuvs
     :members:
     :content-only:
-
diff --git a/docs/source/cuvs_bench/build.rst b/docs/source/cuvs_bench/build.rst
index de8432691..0c2dc98b8 100644
--- a/docs/source/cuvs_bench/build.rst
+++ b/docs/source/cuvs_bench/build.rst
@@ -56,4 +56,4 @@ Available targets to use with `--limit-bench-ann` are:
 - CUVS_IVF_PQ_ANN_BENCH
 - CUVS_IVF_FLAT_ANN_BENCH
 
-By default, the `*_ANN_BENCH` executables program infer the dataset's datatype from the filename's extension. For example, an extension of `fbin` uses a `float` datatype, `f16bin` uses a `float16` datatype, extension of `i8bin` uses `int8_t` datatype, and `u8bin` uses `uint8_t` type. Currently, only `float`, `float16`, int8_t`, and `unit8_t` are supported.
\ No newline at end of file
+By default, the `*_ANN_BENCH` executables program infer the dataset's datatype from the filename's extension. For example, an extension of `fbin` uses a `float` datatype, `f16bin` uses a `float16` datatype, extension of `i8bin` uses `int8_t` datatype, and `u8bin` uses `uint8_t` type. Currently, only `float`, `float16`, int8_t`, and `unit8_t` are supported.
diff --git a/docs/source/cuvs_bench/datasets.rst b/docs/source/cuvs_bench/datasets.rst
index 0f6539e56..e6a53ca82 100644
--- a/docs/source/cuvs_bench/datasets.rst
+++ b/docs/source/cuvs_bench/datasets.rst
@@ -59,4 +59,4 @@ If you have a dataset, but no corresponding ground truth file, then you can gene
 
     # Using only a subset of the dataset. Define queries by randomly
     # selecting vectors from the (subset of the) dataset.
-    python -m cuvs_bench.generate_groundtruth --dataset /dataset/base.fbin --nrows=2000000 --output=groundtruth_dir --queries=random-choice --n_queries=10000
\ No newline at end of file
+    python -m cuvs_bench.generate_groundtruth --dataset /dataset/base.fbin --nrows=2000000 --output=groundtruth_dir --queries=random-choice --n_queries=10000
diff --git a/docs/source/cuvs_bench/param_tuning.rst b/docs/source/cuvs_bench/param_tuning.rst
index faffa9daf..c85504e96 100644
--- a/docs/source/cuvs_bench/param_tuning.rst
+++ b/docs/source/cuvs_bench/param_tuning.rst
@@ -671,4 +671,4 @@ hnswlib
    - 1
    - Number of threads to use for queries.
 
-Please refer to `HNSW algorithm parameters guide <https://github.com/nmslib/hnswlib/blob/master/ALGO_PARAMS.md>`_ from `hnswlib` to learn more about these arguments.
\ No newline at end of file
+Please refer to `HNSW algorithm parameters guide <https://github.com/nmslib/hnswlib/blob/master/ALGO_PARAMS.md>`_ from `hnswlib` to learn more about these arguments.
diff --git a/docs/source/cuvs_bench/wiki_all_dataset.rst b/docs/source/cuvs_bench/wiki_all_dataset.rst
index 04ac7d9a4..cedeeb93f 100644
--- a/docs/source/cuvs_bench/wiki_all_dataset.rst
+++ b/docs/source/cuvs_bench/wiki_all_dataset.rst
@@ -52,4 +52,4 @@ After the dataset is downloaded and extracted to the `wiki_all_88M` directory (o
 License info
 ============
 
-The English wiki texts available on Kaggle come with the `CC BY-NCSA 4.0 <https://creativecommons.org/licenses/by-nc-sa/4.0/>`_ license and the Cohere wikipedia data set comes with the `Apache 2.0 <https://choosealicense.com/licenses/apache-2.0/>`_ license.
\ No newline at end of file
+The English wiki texts available on Kaggle come with the `CC BY-NCSA 4.0 <https://creativecommons.org/licenses/by-nc-sa/4.0/>`_ license and the Cohere wikipedia data set comes with the `Apache 2.0 <https://choosealicense.com/licenses/apache-2.0/>`_ license.
diff --git a/docs/source/developer_guide.md b/docs/source/developer_guide.md
index 51ac1aa60..5ba0d77de 100644
--- a/docs/source/developer_guide.md
+++ b/docs/source/developer_guide.md
@@ -211,7 +211,7 @@ This will bring up an interactive prompt to select which spelling fixes to apply
 
 Manually, run the following to bulk-fix include style issues:
 ```bash
-python ./cpp/scripts/include_checker.py --inplace [cpp/include cpp/test ... list of folders which you want to fix]
+python ./cpp/scripts/include_checker.py --inplace [cpp/include cpp/tests ... list of folders which you want to fix]
 ```
 
 ### Copyright header
diff --git a/docs/source/filtering.rst b/docs/source/filtering.rst
new file mode 100644
index 000000000..35805c5de
--- /dev/null
+++ b/docs/source/filtering.rst
@@ -0,0 +1,116 @@
+.. _filtering:
+
+~~~~~~~~~~~~~~~~~~~~~~~~
+Filtering vector indexes
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+cuVS supports different type of filtering depending on the vector index being used. The main method used in all of the vector indexes
+is pre-filtering, which is a technique that will into account the filtering of the vectors before computing it's closest neighbors, saving
+some computation from calculating distances.
+
+Bitset
+======
+
+A bitset is an array of bits where each bit can have two possible values: `0` and `1`, which signify in the context of filtering whether
+a sample should be filtered or not. `0` means that the corresponding vector will be filtered, and will therefore not be present in the results of the search.
+This mechanism is optimized to take as little memory space as possible, and is available through the RAFT library
+(check out RAFT's `bitset API documentation <https://docs.rapids.ai/api/raft/stable/cpp_api/core_bitset/>`). When calling a search function of an ANN index, the
+bitset length should match the number of vectors present in the database.
+
+Bitmap
+======
+
+A bitmap is based on the same principle as a bitset, but in two dimensions. This allows users to provide a different bitset for each query
+being searched. Check out RAFT's `bitmap API documentation <https://docs.rapids.ai/api/raft/stable/cpp_api/core_bitmap/>`.
+
+Examples
+=======
+
+Using a Bitset filter on a CAGRA index
+--------------------------------------
+
+.. code-block:: c++
+
+    #include <cuvs/neighbors/cagra.hpp>
+    #include <cuvs/core/bitset.hpp>
+
+    using namespace cuvs::neighbors;
+    cagra::index index;
+
+    // ... build index ...
+
+    cagra::search_params search_params;
+    raft::device_resources res;
+    raft::device_matrix_view<float> queries = load_queries();
+    raft::device_matrix_view<uint32_t> neighbors = make_device_matrix_view<uint32_t>(n_queries, k);
+    raft::device_matrix_view<float> distances = make_device_matrix_view<float>(n_queries, k);
+
+    // Load a list of all the samples that will get filtered
+    std::vector<uint32_t> removed_indices_host = get_invalid_indices();
+    auto removed_indices_device =
+          raft::make_device_vector<uint32_t, uint32_t>(res, removed_indices_host.size());
+    // Copy this list to device
+    raft::copy(removed_indices_device.data_handle(), removed_indices_host.data(),
+               removed_indices_host.size(), raft::resource::get_cuda_stream(res));
+
+    // Create a bitset with the list of samples to filter.
+    cuvs::core::bitset<uint32_t, uint32_t> removed_indices_bitset(
+        res, removed_indices_device.view(), index.size());
+    // Use a `bitset_filter` in the `cagra::search` function call.
+    auto bitset_filter =
+          cuvs::neighbors::filtering::bitset_filter(removed_indices_bitset.view());
+    cagra::search(res,
+                  search_params,
+                  index,
+                  queries,
+                  neighbors,
+                  distances,
+                  bitset_filter);
+
+
+Using a Bitmap filter on a Brute-force index
+--------------------------------------------
+
+.. code-block:: c++
+
+    #include <cuvs/neighbors/brute_force.hpp>
+    #include <cuvs/core/bitmap.hpp>
+
+    using namespace cuvs::neighbors;
+    using indexing_dtype = int64_t;
+
+    // ... build index ...
+    brute_force::index_params index_params;
+    brute_force::search_params search_params;
+    raft::device_resources res;
+    raft::device_matrix_view<float, indexing_dtype> dataset = load_dataset(n_vectors, dim);
+    raft::device_matrix_view<float, indexing_dtype> queries = load_queries(n_queries, dim);
+    auto index = brute_force::build(res, index_params, raft::make_const_mdspan(dataset.view()));
+
+    // Load a list of all the samples that will get filtered
+    std::vector<uint32_t> removed_indices_host = get_invalid_indices();
+    auto removed_indices_device =
+          raft::make_device_vector<uint32_t, uint32_t>(res, removed_indices_host.size());
+    // Copy this list to device
+    raft::copy(removed_indices_device.data_handle(), removed_indices_host.data(),
+               removed_indices_host.size(), raft::resource::get_cuda_stream(res));
+
+    // Create a bitmap with the list of samples to filter.
+    cuvs::core::bitset<uint32_t, indexing_dtype> removed_indices_bitset(
+      res, removed_indices_device.view(), n_queries * n_vectors);
+    cuvs::core::bitmap_view<const uint32_t, indexing_dtype> removed_indices_bitmap(
+        removed_indices_bitset.data(), n_queries, n_vectors);
+
+    // Use a `bitmap_filter` in the `brute_force::search` function call.
+    auto bitmap_filter =
+          cuvs::neighbors::filtering::bitmap_filter(removed_indices_bitmap);
+
+    auto neighbors = raft::make_device_matrix_view<uint32_t, indexing_dtype>(n_queries, k);
+    auto distances = raft::make_device_matrix_view<float, indexing_dtype>(n_queries, k);
+    brute_force::search(res,
+                        search_params,
+                        index,
+                        raft::make_const_mdspan(queries.view()),
+                        neighbors.view(),
+                        distances.view(),
+                        bitmap_filter);
diff --git a/docs/source/getting_started.rst b/docs/source/getting_started.rst
index fa38c45ee..c4706e510 100644
--- a/docs/source/getting_started.rst
+++ b/docs/source/getting_started.rst
@@ -117,4 +117,6 @@ We always welcome patches for new features and bug fixes. Please read our `contr
    comparing_indexes.rst
    indexes/indexes.rst
    api_basics.rst
-   api_interoperability.rst
\ No newline at end of file
+   api_interoperability.rst
+   working_with_ann_indexes.rst
+   filtering.rst
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 286836c18..4dcf59112 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -88,4 +88,3 @@ Contents
    cuvs_bench/index.rst
    api_docs.rst
    contributing.md
-
diff --git a/docs/source/indexes/bruteforce.rst b/docs/source/indexes/bruteforce.rst
index 0bd17dbf1..3dc115507 100644
--- a/docs/source/indexes/bruteforce.rst
+++ b/docs/source/indexes/bruteforce.rst
@@ -12,7 +12,7 @@ Brute-force can also be a good choice for heavily filtered queries where other a
 when filtering out 90%-95% of the vectors from a search, the IVF methods could struggle to return anything at all with smaller number of probes and
 graph-based algorithms with limited hash table memory could end up skipping over important unfiltered entries.
 
-[ :doc:`C API <../c_api/neighbors_bruteforce_c>` | :doc:`C++ API <../cpp_api/neighbors_bruteforce>` | :doc:`Python API <../python_api/neighbors_bruteforce>` | :doc:`Rust API <../rust_api/index>` ]
+[ :doc:`C API <../c_api/neighbors_bruteforce_c>` | :doc:`C++ API <../cpp_api/neighbors_bruteforce>` | :doc:`Python API <../python_api/neighbors_brute_force>` | :doc:`Rust API <../rust_api/index>` ]
 
 Filtering considerations
 ------------------------
@@ -57,6 +57,6 @@ Memory footprint
 Index footprint
 ~~~~~~~~~~~~~~~
 
-Raw vectors: :math:`n_vectors * n_dimensions * precision`
+Raw vectors: :math:`n\_vectors * n\_dimensions * precision`
 
-Vector norms (for distances which require them): :math:`n_vectors * precision`
+Vector norms (for distances which require them): :math:`n\_vectors * precision`
diff --git a/docs/source/indexes/cagra.rst b/docs/source/indexes/cagra.rst
index de8821e74..14d6c6502 100644
--- a/docs/source/indexes/cagra.rst
+++ b/docs/source/indexes/cagra.rst
@@ -108,14 +108,14 @@ IVFPQ or NN-DESCENT can be used to build the graph (additions to the peak memory
 Dataset on device (graph on host):
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-Index memory footprint (device): :math:`n_index_vectors * n_dims * sizeof(T)`
+Index memory footprint (device): :math:`n\_index\_vectors * n\_dims * sizeof(T)`
 
-Index memory footprint (host): :math:`graph_degree * n_index_vectors * sizeof(T)``
+Index memory footprint (host): :math:`graph\_degree * n\_index\_vectors * sizeof(T)``
 
 Dataset on host (graph on host):
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-Index memory footprint (host): :math:`n_index_vectors * n_dims * sizeof(T) + graph_degree * n_index_vectors * sizeof(T)`
+Index memory footprint (host): :math:`n\_index\_vectors * n\_dims * sizeof(T) + graph\_degree * n\_index\_vectors * sizeof(T)`
 
 Build peak memory usage:
 ~~~~~~~~~~~~~~~~~~~~~~~~
@@ -123,7 +123,7 @@ Build peak memory usage:
 When built using NN-descent / IVF-PQ, the build process consists of two phases: (1) building an initial/(intermediate) graph and then (2) optimizing the graph. Key input parameters are n_vectors, intermediate_graph_degree, graph_degree.
 The memory usage in the first phase (building) depends on the chosen method. The biggest allocation is the graph (n_vectors*intermediate_graph_degree), but it’s stored in the host memory.
 Usually, the second phase (optimize) uses the most device memory. The peak memory usage is achieved during the pruning step (graph_core.cuh/optimize)
-Optimize: formula for peak memory usage (device): :math:`n_vectors * (4 + (sizeof(IdxT) + 1) * intermediate_degree)``
+Optimize: formula for peak memory usage (device): :math:`n\_vectors * (4 + (sizeof(IdxT) + 1) * intermediate_degree)``
 
 Build with out-of-core IVF-PQ peak memory usage:
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -134,16 +134,18 @@ IVF-PQ Build:
 
 .. math::
 
-   n_vectors / train_set_ratio * dim * sizeof(float)   // trainset, may be in managed mem
-   + n_vectors / train_set_ratio * sizeof(uint32_t)    // labels, may be in managed mem
-   + n_clusters * n_dim * sizeof(float)                // cluster centers
+   n\_vectors / train\_set\_ratio * dim * sizeof_{float}   // trainset, may be in managed mem
+
+   + n\_vectors / train\_set\_ratio * sizeof(uint32_t)    // labels, may be in managed mem
+
+   + n\_clusters * n\_dim * sizeof_{float}                // cluster centers
 
 IVF-PQ Search (max batch size 1024 vectors on device at a time):
 
 .. math::
 
-   [n_vectors * (pq_dim * pq_bits / 8 + sizeof(int64_t)) + O(n_clusters)]
-   + [batch_size * n_dim * sizeof(float)] + [batch_size * intermediate_degree * sizeof(uint32_t)] +
-   [batch_size * intermediate_degree * sizeof(float)]
+   [n\_vectors * (pq\_dim * pq\_bits / 8 + sizeof_{int64\_t}) + O(n\_clusters)]
 
+   + [batch\_size * n\_dim * sizeof_{float}] + [batch\_size * intermediate\_degree * sizeof_{uint32\_t}]
 
+   + [batch\_size * intermediate\_degree * sizeof_{float}]
diff --git a/docs/source/indexes/indexes.rst b/docs/source/indexes/indexes.rst
index a2fb1434a..8746b84ac 100644
--- a/docs/source/indexes/indexes.rst
+++ b/docs/source/indexes/indexes.rst
@@ -9,6 +9,7 @@ Nearest Neighbor Indexes
    cagra.rst
    ivfflat.rst
    ivfpq.rst
+   vamana.rst
 
 
 Indices and tables
@@ -16,4 +17,4 @@ Indices and tables
 
 * :ref:`genindex`
 * :ref:`modindex`
-* :ref:`search`
\ No newline at end of file
+* :ref:`search`
diff --git a/docs/source/indexes/ivfflat.rst b/docs/source/indexes/ivfflat.rst
index 14dd1798c..7154db037 100644
--- a/docs/source/indexes/ivfflat.rst
+++ b/docs/source/indexes/ivfflat.rst
@@ -86,7 +86,7 @@ Memory footprint
 ----------------
 
 Each cluster is padded to at least 32 vectors (but potentially up to 1024). Assuming uniform random distribution of vectors/list, we would have
-:math:`cluster\_overhead = (conservative\_memory\_allocation ? 16 : 512 ) * dim * sizeof_{float})`
+:math:`cluster\_overhead = (conservative\_memory\_allocation ? 16 : 512 ) * dim * sizeof_{float}`
 
 Note that each cluster is allocated as a separate allocation. If we use a `cuda_memory_resource`, that would grab memory in 1 MiB chunks, so on average we might have 0.5 MiB overhead per cluster. If we us 10s of thousands of clusters, it becomes essential to use pool allocator to avoid this overhead.
 
@@ -110,7 +110,6 @@ Index (device memory):
 Peak device memory usage for index build:
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-:math:`workspace = min(1GB, n\_queries * [(n\_lists + 1 + n\_probes * (k + 1)) * sizeof_{float}) + n\_probes * k * sizeof_{idx}])`
+:math:`workspace = min(1GB, n\_queries * [(n\_lists + 1 + n\_probes * (k + 1)) * sizeof_{float} + n\_probes * k * sizeof_{idx}])`
 
 :math:`index\_size + workspace`
-
diff --git a/docs/source/indexes/ivfpq.rst b/docs/source/indexes/ivfpq.rst
index e4bd81395..ad973adf2 100644
--- a/docs/source/indexes/ivfpq.rst
+++ b/docs/source/indexes/ivfpq.rst
@@ -97,22 +97,22 @@ Simple approximate formula: :math:`n\_vectors * (pq\_dim * \frac{pq\_bits}{8} +
 
 The IVF lists end up being represented by a sparse data structure that stores the pointers to each list, an indices array that contains the indexes of each vector in each list, and an array with the encoded (and interleaved) data for each list.
 
-IVF list pointers: :math:`n\_clusters * sizeof_{uint32_t}`
+IVF list pointers: :math:`n\_clusters * sizeof_{uint32\_t}`
 
-Indices: :math:`n\_vectors * sizeof_{idx}``
+Indices: :math:`n\_vectors * sizeof_{idx}`
 
 Encoded data (interleaved): :math:`n\_vectors * pq\_dim * \frac{pq\_bits}{8}`
 
-Per subspace method: :math:`4 * pq\_dim * pq\_len * 2^pq\_bits`
+Per subspace method: :math:`4 * pq\_dim * pq\_len * 2^{pq\_bits}`
 
-Per cluster method: :math:`4 * n\_clusters * pq\_len * 2^pq\_bits`
+Per cluster method: :math:`4 * n\_clusters * pq\_len * 2^{pq\_bits}`
 
 Extras: :math:`n\_clusters * (20 + 8 * dim)`
 
 Index (host memory):
 ~~~~~~~~~~~~~~~~~~~~
 
-When refinement is used with the dataset on host, the original raw vectors are needed: :math:`n\_vectors * dims * sizeof_{Tloat}`
+When refinement is used with the dataset on host, the original raw vectors are needed: :math:`n\_vectors * dims * sizeof_{float}`
 
 Search peak memory usage (device);
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -128,13 +128,8 @@ Build peak memory usage (device):
 
    \frac{n\_vectors}{trainset\_ratio * dims * sizeof_{float}}
 
-   + \frac{n\_vectors}{trainset\_ratio * sizeof_{uint32_t}}
+   + \frac{n\_vectors}{trainset\_ratio * sizeof_{uint32\_t}}
 
    + n\_clusters * dim * sizeof_{float}
 
 Note, if there’s not enough space left in the workspace memory resource, IVF-PQ build automatically switches to the managed memory for the training set and labels.
-
-
-
-
-
diff --git a/docs/source/indexes/vamana.rst b/docs/source/indexes/vamana.rst
new file mode 100644
index 000000000..7e0a79fb8
--- /dev/null
+++ b/docs/source/indexes/vamana.rst
@@ -0,0 +1,75 @@
+Vamana
+======
+
+VAMANA is the underlying graph construction algorithm used to construct indexes for the DiskANN vector search solution. DiskANN and the Vamana algorithm are described in detail in the `published paper <https://papers.nips.cc/paper/9527-rand-nsg-fast-accurate-billion-point-nearest-neighbor-search-on-a-single-node.pdf>`, and a highly optimized `open-source repository <https://github.com/microsoft/DiskANN>`  includes many features for index construction and search. In cuVS, we provide a version of the Vamana algorithm optimized for GPU architectures to accelreate graph construction to build DiskANN idnexes. At a high level, the Vamana algorithm operates as follows:
+
+* 1. Starting with an empty graph, select a medoid vector from the D-dimension vector dataset and insert it into the graph.
+* 2. Iteratively insert batches of dataset vectors into the graph, connecting each inserted vector to neighbors based on a graph traversal.
+* 3. For each batch, create reverse edges and prune unnecessary edges.
+
+There are many algorithmic details that are outlined in the `paper <https://papers.nips.cc/paper/9527-rand-nsg-fast-accurate-billion-point-nearest-neighbor-search-on-a-single-node.pdf>`, and many GPU-specific optimizations are included in this implementation.
+
+The current implementation of DiskANN in cuVS only includes the 'in-memory' graph construction and a serialization step that writes the index to a file. This index file can be then used by the `open-source DiskANN <https://github.com/microsoft/DiskANN>` library to perform efficient search. Additional DiskANN functionality, including GPU-accelerated search and 'ssd' index build are planned for future cuVS releases.
+
+[ :doc:`C++ API <../cpp_api/neighbors_vamana>` | :doc:`Python API <../python_api/neighbors_vamana>` ]
+
+Interoperability with CPU DiskANN
+---------------------------------
+
+The 'vamana::serialize' API calls writes the index to a file with a format that is compatible with the `open-source DiskANN repositoriy <https://github.com/microsoft/DiskANN>`. This allows cuVS to be used to accelerate index construction while leveraging the efficient CPU-based search currently available.
+
+Configuration parameters
+------------------------
+
+Build parameters
+~~~~~~~~~~~~~~~~
+
+.. list-table::
+   :widths: 25 25 50
+   :header-rows: 1
+
+   * - Name
+     - Default
+     - Description
+   * - graph_degree
+     - 32
+     - The maximum degre of the final Vamana graph. The internal representation of the graph includes this many edges for every node, but serialize will compress the graph into a 'CSR' format with, potentially, fewer edges.
+   * - visited_size
+     - 64
+     - Maximum number of visited nodes saved during each traversal to insert a new node. This corresponds to the 'L' parameter in the paper.
+   * - vamana_iters
+     - 1
+     - Number of iterations ran to improve the graph. Each iteration involves inserting every vector in the dataset.
+   * - alpha
+     - 1.2
+     - Alpha parameter that defines how aggressively to prune edges.
+   * - max_fraction
+     - 0.06
+     - Maximum fraction of the dataset that will be inserted as a single batch. Larger max batch size decreases graph quality but improves speed.
+   * - batch_base
+     - 2
+     - Base of growth rate of batch sizes. Insertion batch sizes increase exponentially based on this parameter until max_fraction is reached.
+   * - queue_size
+     - 127
+     - Size of the candidate queue structure used during graph traversal. Must be (2^x)-1 for some x, and must be > visited_size.
+
+Tuning Considerations
+---------------------
+
+The 2 hyper-parameters that are most often tuned are `graph_degree` and `visited_size`. The time needed to create a graph increases dramatically when increasing `graph_degree`, in particular. However, larger graphs may be needed to achieve very high recall search, especially for large datasets.
+
+Memory footprint
+----------------
+
+Vamana builds a graph that is stored in device memory. However, in order to serialize the index and write it to a file for later use, it must be moved into host memory. If the `include_dataset` parameter is also set, then the dataset must be resident in host memory when calling serialize as well.
+
+Device memory usage
+~~~~~~~~~~~~~~~~~~~
+
+The built index represents the graph as fixed degree, storing a total of :math:`graph\_degree * n\_index\_vectors` edges. Graph construction also requires the dataset be in device memory (or it copies it to device during build). In addition, device memory is used during construction to sort and create the reverse edges. Thus, the amount of device memory needed depends on the dataset itself, but it is bounded by a maximum sum of:
+
+- vector dataset: :math:`n\_index\_vectors * n\_dims * sizeof(T)`
+- output graph: :math:`graph\_degree * n\_index\_vectors * sizeof(IdxT)`
+- scratch memory: :math:`n\_index\_vectors * max\_fraction * (2 + graph\_degree) * sizeof(IdxT)`
+
+Reduction in scratch device memory requirements are planned for upcoming releases of cuVS.
diff --git a/docs/source/python_api.rst b/docs/source/python_api.rst
index 69bcd300b..aa75160c1 100644
--- a/docs/source/python_api.rst
+++ b/docs/source/python_api.rst
@@ -8,4 +8,5 @@ Python API Documentation
    :maxdepth: 4
 
    python_api/distance.rst
-   python_api/neighbors.rst
\ No newline at end of file
+   python_api/neighbors.rst
+   python_api/preprocessing.rst
diff --git a/docs/source/python_api/distance.rst b/docs/source/python_api/distance.rst
index fd9efa853..debd82953 100644
--- a/docs/source/python_api/distance.rst
+++ b/docs/source/python_api/distance.rst
@@ -7,6 +7,6 @@ Distance
 
 
 Pairwise Distance
-##################
+#################
 
 .. autofunction:: cuvs.distance.pairwise_distance
diff --git a/docs/source/python_api/preprocessing.rst b/docs/source/python_api/preprocessing.rst
new file mode 100644
index 000000000..679eabc16
--- /dev/null
+++ b/docs/source/python_api/preprocessing.rst
@@ -0,0 +1,11 @@
+Preprocessing
+=============
+
+.. role:: py(code)
+   :language: python
+   :class: highlight
+
+Scalar Quantizer
+################
+
+.. autofunction:: cuvs.preprocessing.quantize.scalar
diff --git a/docs/source/vector_databases_vs_vector_search.rst b/docs/source/vector_databases_vs_vector_search.rst
index 446737c11..5c43ee550 100644
--- a/docs/source/vector_databases_vs_vector_search.rst
+++ b/docs/source/vector_databases_vs_vector_search.rst
@@ -54,4 +54,4 @@ Unfortunately, for large datasets, doing a hyper-parameter optimization on the w
 
 Full hyper-parameter optimization may also not always be necessary- for example, once you have built a ground truth dataset on a subset, many times you can start by building an index with the default build parameters and then playing around with different search parameters until you get the desired quality and search performance.  For massive indexes that might be multiple terabytes, you could also take this subsampling of, say, 10M vectors, train an index and then tune the search parameters from there. While there might be a small margin of error, the chosen build/search parameters should generalize fairly well for the databases that build locally partitioned indexes.
 
-Refer to our :doc:`tuning guide <tuning_guide>` for more information and examples on how to efficiently and automatically tune your vector search indexes based on your needs.
\ No newline at end of file
+Refer to our :doc:`tuning guide <tuning_guide>` for more information and examples on how to efficiently and automatically tune your vector search indexes based on your needs.
diff --git a/examples/README.md b/examples/README.md
index 701fd0dee..f5a606ee3 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -1,17 +1,17 @@
 # cuVS C and C++ Examples
 
-This template project provides a drop-in sample to either start building a new application with, or using CUVS in an existing CMake project. 
+This template project provides a drop-in sample to either start building a new application with, or using CUVS in an existing CMake project.
 
 First, please refer to our [installation docs](https://docs.rapids.ai/api/cuvs/stable/build.html#cuda-gpu-requirements) for the minimum requirements to use cuVS.
 
-Once the minimum requirements are satisfied, this example template application can be built with the provided `build.sh` script. This is a bash script that calls the appropriate CMake commands, so you can look into it to see the typical CMake based build workflow.  
+Once the minimum requirements are satisfied, this example template application can be built with the provided `build.sh` script. This is a bash script that calls the appropriate CMake commands, so you can look into it to see the typical CMake based build workflow.
 
 The directories (`CUVS_SOURCE/examples/c`) or (`CUVS_SOURCE/examples/cpp`) can be copied directly in order to build a new application with cuVS.
 
-cuVS can be integrated into an existing CMake project by copying the contents in the `configure rapids-cmake` and `configure cuvs` sections of the provided `CMakeLists.txt` into your project, along with `cmake/thirdparty/get_cuvs.cmake`. 
+cuVS can be integrated into an existing CMake project by copying the contents in the `configure rapids-cmake` and `configure cuvs` sections of the provided `CMakeLists.txt` into your project, along with `cmake/thirdparty/get_cuvs.cmake`.
 
 Make sure to link against the appropriate CMake targets. Use `cuvs::c_api` and `cuvs::cuvs` to use the C and C++ shared libraries respectively.
 
 ```cmake
 target_link_libraries(your_app_target PRIVATE cuvs::cuvs)
-```
\ No newline at end of file
+```
diff --git a/examples/c/src/L2_c_example.c b/examples/c/src/L2_c_example.c
index 73ddf6103..0b621798e 100644
--- a/examples/c/src/L2_c_example.c
+++ b/examples/c/src/L2_c_example.c
@@ -84,7 +84,7 @@ void l2_distance_calc(int64_t n_cols,float x[], float y[], float *ret) {
   DLManagedTensor y_tensor;
   int64_t y_shape[2] = {N_ROWS, n_cols};
   tensor_initialize(y_d, y_shape, &y_tensor);
-  
+
   DLManagedTensor dist_tensor;
   int64_t distances_shape[2] = {N_ROWS, N_ROWS};
   tensor_initialize(distance_d, distances_shape, &dist_tensor);
@@ -93,7 +93,7 @@ void l2_distance_calc(int64_t n_cols,float x[], float y[], float *ret) {
   cuvsPairwiseDistance(res, &x_tensor, &y_tensor, &dist_tensor, L2SqrtUnexpanded, 2.0);
 
   cudaMemcpy(ret, distance_d, sizeof(float) * N_ROWS * N_ROWS, cudaMemcpyDefault);
-  
+
   cuvsRMMFree(res, distance_d, sizeof(float) * N_ROWS * N_ROWS);
   cuvsRMMFree(res, x_d, sizeof(float) * N_ROWS * n_cols);
   cuvsRMMFree(res, y_d, sizeof(float) * N_ROWS * n_cols);
@@ -106,12 +106,12 @@ int euclidean_distance_calculation_example() {
 
   outputVector((float *)PointA);
   outputVector((float *)PointB);
-  
+
   float ret;
-  
+
   l2_distance_calc(DIM, (float *)PointA, (float *)PointB, &ret);
   printf("L2 distance is %f.\n", ret);
-  
+
   cuvsResourcesDestroy(res);
 
   return 0;
diff --git a/examples/c/src/cagra_c_example.c b/examples/c/src/cagra_c_example.c
index fdcbbf571..83957aabc 100644
--- a/examples/c/src/cagra_c_example.c
+++ b/examples/c/src/cagra_c_example.c
@@ -67,9 +67,9 @@ void cagra_build_search_simple() {
   // Allocate memory for `queries`, `neighbors` and `distances` output
   uint32_t *neighbors;
   float *distances, *queries_d;
-  cuvsRMMAlloc(res, (void**) &queries_d, sizeof(float) * n_queries * n_cols);
-  cuvsRMMAlloc(res, (void**) &neighbors, sizeof(uint32_t) * n_queries * topk);
-  cuvsRMMAlloc(res, (void**) &distances, sizeof(float) * n_queries * topk);
+  cuvsRMMAlloc(res, (void **)&queries_d, sizeof(float) * n_queries * n_cols);
+  cuvsRMMAlloc(res, (void **)&neighbors, sizeof(uint32_t) * n_queries * topk);
+  cuvsRMMAlloc(res, (void **)&distances, sizeof(float) * n_queries * topk);
 
   // Use DLPack to represent `queries`, `neighbors` and `distances` as tensors
   cudaMemcpy(queries_d, queries, sizeof(float) * 4 * 2, cudaMemcpyDefault);
@@ -111,8 +111,12 @@ void cagra_build_search_simple() {
   cuvsCagraSearchParams_t search_params;
   cuvsCagraSearchParamsCreate(&search_params);
 
+  cuvsFilter filter;
+  filter.type = NO_FILTER;
+  filter.addr = (uintptr_t)NULL;
+
   cuvsCagraSearch(res, search_params, index, &queries_tensor, &neighbors_tensor,
-                  &distances_tensor);
+                  &distances_tensor, filter);
 
   // print results
   uint32_t *neighbors_h =
diff --git a/examples/c/src/common.h b/examples/c/src/common.h
index 60b9b73cf..e5b8d0176 100644
--- a/examples/c/src/common.h
+++ b/examples/c/src/common.h
@@ -106,4 +106,3 @@ void print_results(int64_t * neighbor, float* distances,int n_rows, int n_cols)
         printf("]\n");
     }
 }
-
diff --git a/examples/c/src/ivf_flat_c_example.c b/examples/c/src/ivf_flat_c_example.c
index c068d04f8..2121ca35e 100644
--- a/examples/c/src/ivf_flat_c_example.c
+++ b/examples/c/src/ivf_flat_c_example.c
@@ -56,7 +56,7 @@ void ivf_flat_build_search_simple(cuvsResources_t *res, DLManagedTensor * datase
     DLManagedTensor neighbors_tensor;
     int64_t neighbors_shape[2] = {n_queries, topk};
     int_tensor_initialize(neighbors_d, neighbors_shape, &neighbors_tensor);
-    
+
     DLManagedTensor distances_tensor;
     int64_t distances_shape[2] = {n_queries, topk};
     float_tensor_initialize(distances_d, distances_shape, &distances_tensor);
@@ -91,7 +91,7 @@ void ivf_flat_build_search_simple(cuvsResources_t *res, DLManagedTensor * datase
 
     cuvsIvfFlatSearchParamsDestroy(search_params);
     cuvsIvfFlatIndexDestroy(index);
-    cuvsIvfFlatIndexParamsDestroy(index_params);  
+    cuvsIvfFlatIndexParamsDestroy(index_params);
 }
 
 void ivf_flat_build_extend_search(cuvsResources_t *res, DLManagedTensor * trainset_tensor, DLManagedTensor * dataset_tensor, DLManagedTensor * queries_tensor) {
@@ -102,7 +102,7 @@ void ivf_flat_build_extend_search(cuvsResources_t *res, DLManagedTensor * trains
     int64_t data_indices_shape[1] = {n_dataset};
     int_tensor_initialize(data_indices_d, data_indices_shape, &data_indices_tensor);
     data_indices_tensor.dl_tensor.ndim = 1;
-    
+
     printf("\nRun k-means clustering using the training set\n");
 
     int64_t *data_indices = (int64_t *)malloc(n_dataset * sizeof(int64_t));
@@ -154,11 +154,11 @@ void ivf_flat_build_extend_search(cuvsResources_t *res, DLManagedTensor * trains
     DLManagedTensor neighbors_tensor;
     int64_t neighbors_shape[2] = {n_queries, topk};
     int_tensor_initialize(neighbors_d, neighbors_shape, &neighbors_tensor);
-    
+
     DLManagedTensor distances_tensor;
     int64_t distances_shape[2] = {n_queries, topk};
     float_tensor_initialize(distances_d, distances_shape, &distances_tensor);
-    
+
     // Create default search params
     cuvsIvfFlatSearchParams_t search_params;
     cuvsIvfFlatSearchParamsCreate(&search_params);
@@ -203,7 +203,7 @@ int main() {
     float *queries = (float *)malloc(n_queries * n_dim * sizeof(float));
     generate_dataset(dataset, n_samples, n_dim, -10.0, 10.0);
     generate_dataset(queries, n_queries, n_dim, -1.0, 1.0);
-    
+
     // Create a cuvsResources_t object
     cuvsResources_t res;
     cuvsResourcesCreate(&res);
@@ -245,7 +245,7 @@ int main() {
     DLManagedTensor trainset_tensor;
     int64_t trainset_shape[2] = {n_trainset, n_dim};
     float_tensor_initialize(trainset_d, trainset_shape, &trainset_tensor);
-    
+
     // Build and extend example.
     ivf_flat_build_extend_search(&res, &trainset_tensor, &dataset_tensor, &queries_tensor);
 
diff --git a/examples/c/src/ivf_pq_c_example.c b/examples/c/src/ivf_pq_c_example.c
index b6d6b485b..c3235f2d4 100644
--- a/examples/c/src/ivf_pq_c_example.c
+++ b/examples/c/src/ivf_pq_c_example.c
@@ -45,7 +45,7 @@ void ivf_pq_build_search(cuvsResources_t *res, DLManagedTensor * dataset_tensor,
         cuvsIvfPqIndexParamsDestroy(index_params);
         return;
     }
-    
+
     // Create output arrays.
     int64_t topk      = 10;
     int64_t n_queries = queries_tensor->dl_tensor.shape[0];
@@ -89,7 +89,7 @@ void ivf_pq_build_search(cuvsResources_t *res, DLManagedTensor * dataset_tensor,
 
     printf("\nOriginal results:\n");
     print_results(neighbors, distances, 2, topk);
-    
+
     // Re-ranking operation: refine the initial search results by computing exact distances
     int64_t topk_refined = 7;
     int64_t *neighbors_refined_d;
@@ -100,11 +100,11 @@ void ivf_pq_build_search(cuvsResources_t *res, DLManagedTensor * dataset_tensor,
     DLManagedTensor neighbors_refined_tensor;
     int64_t neighbors_refined_shape[2] = {n_queries, topk_refined};
     int_tensor_initialize(neighbors_refined_d, neighbors_refined_shape, &neighbors_refined_tensor);
-    
+
     DLManagedTensor distances_refined_tensor;
     int64_t distances_refined_shape[2] = {n_queries, topk_refined};
     float_tensor_initialize(distances_refined_d, distances_refined_shape, &distances_refined_tensor);
-    
+
     // Note, refinement requires the original dataset and the queries.
     // Don't forget to specify the same distance metric as used by the index.
     cuvsError_t refine_status = cuvsRefine(*res, dataset_tensor, queries_tensor,
@@ -140,7 +140,7 @@ void ivf_pq_build_search(cuvsResources_t *res, DLManagedTensor * dataset_tensor,
 
     cuvsIvfPqSearchParamsDestroy(search_params);
     cuvsIvfPqIndexDestroy(index);
-    cuvsIvfPqIndexParamsDestroy(index_params);  
+    cuvsIvfPqIndexParamsDestroy(index_params);
 }
 
 int main() {
@@ -152,7 +152,7 @@ int main() {
     float *queries = (float *)malloc(n_queries * n_dim * sizeof(float));
     generate_dataset(dataset, n_samples, n_dim, -10.0, 10.0);
     generate_dataset(queries, n_queries, n_dim, -1.0, 1.0);
-    
+
     // Create a cuvsResources_t object
     cuvsResources_t res;
     cuvsResourcesCreate(&res);
@@ -166,7 +166,7 @@ int main() {
     DLManagedTensor dataset_tensor;
     int64_t dataset_shape[2] = {n_samples,n_dim};
     float_tensor_initialize(dataset_d, dataset_shape, &dataset_tensor);
-    
+
     // Allocate memory for `queries`
     float *queries_d;
     cuvsRMMAlloc(res, (void**) &queries_d, sizeof(float) * n_queries * n_dim);
@@ -177,7 +177,7 @@ int main() {
     DLManagedTensor queries_tensor;
     int64_t queries_shape[2] = {n_queries, n_dim};
     float_tensor_initialize(queries_d, queries_shape, &queries_tensor);
-    
+
     // Simple build and search example.
     ivf_pq_build_search(&res, &dataset_tensor, &queries_tensor);
 
diff --git a/examples/cpp/CMakeLists.txt b/examples/cpp/CMakeLists.txt
index b0d0ae9ee..6bf8f3408 100644
--- a/examples/cpp/CMakeLists.txt
+++ b/examples/cpp/CMakeLists.txt
@@ -36,6 +36,7 @@ set(BUILD_CUVS_C_LIBRARY OFF)
 include(../cmake/thirdparty/get_cuvs.cmake)
 
 # -------------- compile tasks ----------------- #
+add_executable(BRUTE_FORCE_EXAMPLE src/brute_force_bitmap.cu)
 add_executable(CAGRA_EXAMPLE src/cagra_example.cu)
 add_executable(CAGRA_PERSISTENT_EXAMPLE src/cagra_persistent_example.cu)
 add_executable(DYNAMIC_BATCHING_EXAMPLE src/dynamic_batching_example.cu)
@@ -48,6 +49,9 @@ add_executable(VAMANA_EXAMPLE src/vamana_example.cu)
 add_library(rmm_logger OBJECT)
 target_link_libraries(rmm_logger PRIVATE rmm::rmm_logger_impl)
 
+target_link_libraries(
+  BRUTE_FORCE_EXAMPLE PRIVATE cuvs::cuvs $<TARGET_NAME_IF_EXISTS:conda_env> rmm_logger
+)
 target_link_libraries(
   CAGRA_EXAMPLE PRIVATE cuvs::cuvs $<TARGET_NAME_IF_EXISTS:conda_env> rmm_logger
 )
diff --git a/examples/cpp/src/brute_force_bitmap.cu b/examples/cpp/src/brute_force_bitmap.cu
new file mode 100644
index 000000000..69e4df60b
--- /dev/null
+++ b/examples/cpp/src/brute_force_bitmap.cu
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2022-2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cuvs/core/bitmap.hpp>
+#include <cuvs/neighbors/brute_force.hpp>
+
+#include <raft/core/device_mdarray.hpp>
+#include <raft/core/device_resources.hpp>
+#include <raft/random/make_blobs.cuh>
+
+#include <rmm/mr/device/device_memory_resource.hpp>
+#include <iostream>
+
+void load_dataset(const raft::device_resources& res, float* data_ptr, int n_vectors, int dim)
+{
+  raft::random::RngState rng(1234ULL);
+  raft::random::uniform(
+    res, rng, data_ptr, n_vectors * dim, 0.1f, 2.0f);
+}
+
+int main()
+{
+  using namespace cuvs::neighbors;
+  using dataset_dtype = float;
+  using indexing_dtype = int64_t;
+  auto dim = 128;
+  auto n_vectors = 90;
+  auto n_queries = 100;
+  auto k = 5;
+
+  // ... build index ...
+  raft::device_resources res;
+  brute_force::index_params index_params;
+  brute_force::search_params search_params;
+  auto dataset = raft::make_device_matrix<dataset_dtype, indexing_dtype>(res, n_vectors, dim);
+  auto queries = raft::make_device_matrix<dataset_dtype, indexing_dtype>(res, n_queries, dim);
+
+  load_dataset(res, dataset.data_handle(), n_vectors, dim);
+  load_dataset(res, queries.data_handle(), n_queries, dim);
+  auto index = brute_force::build(res, index_params, raft::make_const_mdspan(dataset.view()));
+
+  // Load a list of all the samples that will get filtered
+  std::vector<indexing_dtype> removed_indices_host = {2, 13, 21, 8};
+  auto removed_indices_device =
+        raft::make_device_vector<indexing_dtype, indexing_dtype>(res, removed_indices_host.size());
+  // Copy this list to device
+  raft::copy(removed_indices_device.data_handle(), removed_indices_host.data(),
+             removed_indices_host.size(), raft::resource::get_cuda_stream(res));
+
+  // Create a bitmap with the list of samples to filter.
+  cuvs::core::bitset<uint32_t, indexing_dtype> removed_indices_bitset(
+      res, removed_indices_device.view(), n_queries * n_vectors);
+  cuvs::core::bitmap_view<const uint32_t, indexing_dtype> removed_indices_bitmap(
+      removed_indices_bitset.data(), n_queries, n_vectors);
+
+  // Use a `bitmap_filter` in the `brute_force::search` function call.
+  auto bitmap_filter = cuvs::neighbors::filtering::bitmap_filter(removed_indices_bitmap);
+
+  auto neighbors = raft::make_device_matrix<indexing_dtype, indexing_dtype>(res, n_queries, k);
+  auto distances = raft::make_device_matrix<dataset_dtype, indexing_dtype>(res, n_queries, k);
+  std::cout << "Searching..." << std::endl;
+  brute_force::search(res,
+                      search_params,
+                      index,
+                      raft::make_const_mdspan(queries.view()),
+                      neighbors.view(),
+                      distances.view(),
+                      bitmap_filter);
+  std::cout << "Success!" << std::endl;
+  return 0;
+}
diff --git a/examples/cpp/src/common.cuh b/examples/cpp/src/common.cuh
index 8e109a764..a743ff802 100644
--- a/examples/cpp/src/common.cuh
+++ b/examples/cpp/src/common.cuh
@@ -124,7 +124,6 @@ raft::device_matrix<T,idxT> read_bin_dataset(raft::device_resources const &dev_r
 
   auto dataset = raft::make_device_matrix<T, idxT>(dev_resources, N, dim);
   raft::copy(dataset.data_handle(), data.data(), data.size(), raft::resource::get_cuda_stream(dev_resources));
- 
+
   return dataset;
 }
-
diff --git a/examples/cpp/src/vamana_example.cu b/examples/cpp/src/vamana_example.cu
index 60bf14d56..9e5201d31 100644
--- a/examples/cpp/src/vamana_example.cu
+++ b/examples/cpp/src/vamana_example.cu
@@ -29,11 +29,15 @@
 #include "common.cuh"
 
 template <typename T>
-void vamana_build_and_write(raft::device_resources const &dev_resources,
+void vamana_build_and_write(raft::device_resources const& dev_resources,
                             raft::device_matrix_view<const T, int64_t> dataset,
-                            std::string out_fname, int degree, int visited_size,
-                            float max_fraction, int iters) {
-  using namespace cuvs::neighbors::experimental;
+                            std::string out_fname,
+                            int degree,
+                            int visited_size,
+                            float max_fraction,
+                            int iters)
+{
+  using namespace cuvs::neighbors;
 
   // use default index parameters
   vamana::index_params index_params;
@@ -46,13 +50,12 @@ void vamana_build_and_write(raft::device_resources const &dev_resources,
 
   auto start = std::chrono::system_clock::now();
   auto index = vamana::build(dev_resources, index_params, dataset);
-  auto end = std::chrono::system_clock::now();
+  auto end   = std::chrono::system_clock::now();
   std::chrono::duration<double> elapsed_seconds = end - start;
 
   std::cout << "Vamana index has " << index.size() << " vectors" << std::endl;
-  std::cout << "Vamana graph has degree " << index.graph_degree()
-            << ", graph size [" << index.graph().extent(0) << ", "
-            << index.graph().extent(1) << "]" << std::endl;
+  std::cout << "Vamana graph has degree " << index.graph_degree() << ", graph size ["
+            << index.graph().extent(0) << ", " << index.graph().extent(1) << "]" << std::endl;
 
   std::cout << "Time to build index: " << elapsed_seconds.count() << "s\n";
 
@@ -60,9 +63,11 @@ void vamana_build_and_write(raft::device_resources const &dev_resources,
   serialize(dev_resources, out_fname, index);
 }
 
-void usage() {
-  printf("Usage: ./vamana_example <data filename> <output filename> <graph "
-         "degree> <visited_size> <max_fraction> <iterations> \n");
+void usage()
+{
+  printf(
+    "Usage: ./vamana_example <data filename> <output filename> <graph "
+    "degree> <visited_size> <max_fraction> <iterations> \n");
   printf("Input file expected to be binary file of fp32 vectors.\n");
   printf("Graph degree sizes supported: 32, 64, 128, 256\n");
   printf("Visited_size must be > degree and a power of 2.\n");
@@ -71,13 +76,14 @@ void usage() {
   exit(1);
 }
 
-int main(int argc, char *argv[]) {
+int main(int argc, char* argv[])
+{
   raft::device_resources dev_resources;
 
   // Set pool memory resource with 1 GiB initial pool size. All allocations use
   // the same pool.
   rmm::mr::pool_memory_resource<rmm::mr::device_memory_resource> pool_mr(
-      rmm::mr::get_current_device_resource(), 1024 * 1024 * 1024ull);
+    rmm::mr::get_current_device_resource(), 1024 * 1024 * 1024ull);
   rmm::mr::set_current_device_resource(&pool_mr);
 
   // Alternatively, one could define a pool allocator for temporary arrays (used
@@ -87,22 +93,24 @@ int main(int argc, char *argv[]) {
   // limit. raft::resource::set_workspace_to_pool_resource(dev_resources, 2 *
   // 1024 * 1024 * 1024ull);
 
-  if (argc != 7)
-    usage();
+  if (argc != 7) usage();
 
-  std::string data_fname = (std::string)(argv[1]); // Input filename
-  std::string out_fname = (std::string)argv[2];    // Output index filename
-  int degree = atoi(argv[3]);
-  int max_visited = atoi(argv[4]);
-  float max_fraction = atof(argv[5]);
-  int iters = atoi(argv[6]);
+  std::string data_fname = (std::string)(argv[1]);  // Input filename
+  std::string out_fname  = (std::string)argv[2];    // Output index filename
+  int degree             = atoi(argv[3]);
+  int max_visited        = atoi(argv[4]);
+  float max_fraction     = atof(argv[5]);
+  int iters              = atoi(argv[6]);
 
   // Read in binary dataset file
-  auto dataset =
-      read_bin_dataset<uint8_t, int64_t>(dev_resources, data_fname, INT_MAX);
+  auto dataset = read_bin_dataset<uint8_t, int64_t>(dev_resources, data_fname, INT_MAX);
 
   // Simple build example to create graph and write to a file
-  vamana_build_and_write<uint8_t>(
-      dev_resources, raft::make_const_mdspan(dataset.view()), out_fname, degree,
-      max_visited, max_fraction, iters);
+  vamana_build_and_write<uint8_t>(dev_resources,
+                                  raft::make_const_mdspan(dataset.view()),
+                                  out_fname,
+                                  degree,
+                                  max_visited,
+                                  max_fraction,
+                                  iters);
 }
diff --git a/java/README.md b/java/README.md
new file mode 100644
index 000000000..e5676146a
--- /dev/null
+++ b/java/README.md
@@ -0,0 +1,14 @@
+Prerequisites
+-------------
+
+* JDK 22
+* Maven 3.9.6 or later
+
+To build this API, please do `./build.sh java` in the top level directory. Since this API is dependent on `libcuvs` it must be noted that `libcuvs` gets built automatically before building this API.
+
+Alternatively, please build libcuvs (`./build.sh libcuvs` from top level directory) before building the Java API with `./build.sh` from this directory.
+
+Building
+--------
+
+`./build.sh` will generate the `libcuvs_java.so` file in the `internal/` directory, and then build the final jar file for the cuVS Java API in the `cuvs-java/` directory.
diff --git a/java/build.sh b/java/build.sh
new file mode 100755
index 000000000..daebfdf67
--- /dev/null
+++ b/java/build.sh
@@ -0,0 +1,14 @@
+VERSION="25.04.0" # Note: The version is updated automatically when ci/release/update-version.sh is invoked
+GROUP_ID="com.nvidia.cuvs"
+SO_FILE_PATH="./internal"
+
+if [ -z "$CMAKE_PREFIX_PATH" ]; then
+    export CMAKE_PREFIX_PATH=`pwd`/../cpp/build
+fi
+
+cd internal && cmake . && cmake --build . \
+  && cd .. \
+  && mvn install:install-file -DgroupId=$GROUP_ID -DartifactId=cuvs-java-internal -Dversion=$VERSION -Dpackaging=so -Dfile=$SO_FILE_PATH/libcuvs_java.so \
+  && cd cuvs-java \
+  && mvn package \
+  && mvn install:install-file -Dfile=./target/cuvs-java-$VERSION-jar-with-dependencies.jar -DgroupId=$GROUP_ID -DartifactId=cuvs-java -Dversion=$VERSION -Dpackaging=jar
diff --git a/java/cuvs-java/.gitignore b/java/cuvs-java/.gitignore
new file mode 100644
index 000000000..b83d22266
--- /dev/null
+++ b/java/cuvs-java/.gitignore
@@ -0,0 +1 @@
+/target/
diff --git a/java/cuvs-java/pom.xml b/java/cuvs-java/pom.xml
new file mode 100644
index 000000000..e6cd30f82
--- /dev/null
+++ b/java/cuvs-java/pom.xml
@@ -0,0 +1,159 @@
+<!--
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+    xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <modelVersion>4.0.0</modelVersion>
+    <groupId>com.nvidia.cuvs</groupId>
+    <artifactId>cuvs-java</artifactId>
+    <!-- NOTE: The version automatically gets updated when ci/release/update-version.sh is invoked. -->
+    <!--CUVS_JAVA#VERSION_UPDATE_MARKER_START--><version>25.04.0</version><!--CUVS_JAVA#VERSION_UPDATE_MARKER_END-->
+    <name>cuvs-java</name>
+    <packaging>jar</packaging>
+
+    <properties>
+        <maven.compiler.target>22</maven.compiler.target>
+        <maven.compiler.source>22</maven.compiler.source>
+        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+        <project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
+    </properties>
+
+    <dependencies>
+
+        <dependency>
+            <groupId>org.slf4j</groupId>
+            <artifactId>slf4j-api</artifactId>
+            <version>1.7.36</version>
+            <scope>test</scope>
+        </dependency>
+
+        <dependency>
+            <groupId>org.slf4j</groupId>
+            <artifactId>slf4j-simple</artifactId>
+            <version>1.7.36</version>
+            <scope>test</scope>
+        </dependency>
+
+        <dependency>
+            <groupId>com.carrotsearch.randomizedtesting</groupId>
+            <artifactId>randomizedtesting-runner</artifactId>
+            <version>2.8.2</version>
+            <scope>test</scope>
+        </dependency>
+
+        <dependency>
+            <groupId>junit</groupId>
+            <artifactId>junit</artifactId>
+            <version>4.13.1</version>
+            <scope>test</scope>
+        </dependency>
+
+    </dependencies>
+
+    <build>
+        <plugins>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-surefire-plugin</artifactId>
+                <version>3.5.2</version>
+                <configuration>
+                    <parallel>suites</parallel>
+                    <threadCountSuites>1</threadCountSuites>
+                    <perCoreThreadCount>false</perCoreThreadCount>
+                    <systemPropertyVariables>
+                        <java.library.path>${project.build.directory}/classes</java.library.path>
+                    </systemPropertyVariables>
+                </configuration>
+            </plugin>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-dependency-plugin</artifactId>
+                <version>2.10</version>
+                <executions>
+                    <execution>
+                        <id>copy</id>
+                        <phase>compile</phase>
+                        <goals>
+                            <goal>copy</goal>
+                        </goals>
+                        <configuration>
+                            <artifactItems>
+                                <artifactItem>
+                                    <groupId>com.nvidia.cuvs</groupId>
+                                    <artifactId>cuvs-java-internal</artifactId>
+                                    <!-- NOTE: The version automatically gets updated when ci/release/update-version.sh is invoked. -->
+                                    <!--CUVS_JAVA#VERSION_UPDATE_MARKER_START--><version>25.04.0</version><!--CUVS_JAVA#VERSION_UPDATE_MARKER_END-->
+                                    <type>so</type>
+                                    <overWrite>false</overWrite>
+                                    <outputDirectory>
+                                        ${project.build.directory}/classes</outputDirectory>
+                                    <destFileName>libcuvs_java.so</destFileName>
+                                </artifactItem>
+                            </artifactItems>
+                        </configuration>
+                    </execution>
+                </executions>
+            </plugin>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-assembly-plugin</artifactId>
+                <version>3.4.2</version>
+                <configuration>
+                    <descriptorRefs>
+                        <descriptorRef>jar-with-dependencies</descriptorRef>
+                    </descriptorRefs>
+                    <archiverConfig>
+                        <duplicateBehavior>add</duplicateBehavior>
+                    </archiverConfig>
+                </configuration>
+                <executions>
+                    <execution>
+                        <id>assemble-all</id>
+                        <phase>package</phase>
+                        <goals>
+                            <goal>single</goal>
+                        </goals>
+                    </execution>
+                </executions>
+            </plugin>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-jar-plugin</artifactId>
+                <version>2.2</version>
+                <configuration>
+                    <archive>
+                        <manifest>
+                            <addClasspath>true</addClasspath>
+                            <mainClass>
+                                com.nvidia.cuvs.examples.CagraExample</mainClass>
+                        </manifest>
+                    </archive>
+                </configuration>
+            </plugin>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-javadoc-plugin</artifactId>
+                <version>3.6.2</version>
+                <configuration>
+                    <excludePackageNames>com.nvidia.cuvs.examples,com.nvidia.cuvs.panama</excludePackageNames>
+                    <reportOutputDirectory>${project.build.directory}</reportOutputDirectory>
+                </configuration>
+            </plugin>
+        </plugins>
+    </build>
+</project>
diff --git a/java/cuvs-java/src/main/java/com/nvidia/cuvs/BruteForceIndex.java b/java/cuvs-java/src/main/java/com/nvidia/cuvs/BruteForceIndex.java
new file mode 100644
index 000000000..30ffca1cd
--- /dev/null
+++ b/java/cuvs-java/src/main/java/com/nvidia/cuvs/BruteForceIndex.java
@@ -0,0 +1,364 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.nvidia.cuvs;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.lang.foreign.FunctionDescriptor;
+import java.lang.foreign.MemoryLayout;
+import java.lang.foreign.MemorySegment;
+import java.lang.foreign.SequenceLayout;
+import java.lang.foreign.ValueLayout;
+import java.lang.invoke.MethodHandle;
+import java.lang.invoke.MethodHandles;
+import java.util.UUID;
+
+import com.nvidia.cuvs.common.Util;
+import com.nvidia.cuvs.panama.CuVSBruteForceIndex;
+
+/**
+ *
+ * {@link BruteForceIndex} encapsulates a BRUTEFORCE index, along with methods
+ * to interact with it.
+ *
+ * @since 25.02
+ */
+public class BruteForceIndex {
+
+  private final float[][] dataset;
+  private final CuVSResources resources;
+  private MethodHandle indexMethodHandle;
+  private MethodHandle searchMethodHandle;
+  private MethodHandle destroyIndexMethodHandle;
+  private MethodHandle serializeMethodHandle;
+  private MethodHandle deserializeMethodHandle;
+  private IndexReference bruteForceIndexReference;
+  private BruteForceIndexParams bruteForceIndexParams;
+  private MemoryLayout longMemoryLayout;
+  private MemoryLayout intMemoryLayout;
+  private MemoryLayout floatMemoryLayout;
+
+  /**
+   * Constructor for building the index using specified dataset
+   *
+   * @param dataset               the dataset used for creating the BRUTEFORCE
+   *                              index
+   * @param resources             an instance of {@link CuVSResources}
+   * @param bruteForceIndexParams an instance of {@link BruteForceIndexParams}
+   *                              holding the index parameters
+   */
+  private BruteForceIndex(float[][] dataset, CuVSResources resources, BruteForceIndexParams bruteForceIndexParams)
+      throws Throwable {
+    this.dataset = dataset;
+    this.resources = resources;
+    this.bruteForceIndexParams = bruteForceIndexParams;
+
+    longMemoryLayout = resources.linker.canonicalLayouts().get("long");
+    intMemoryLayout = resources.linker.canonicalLayouts().get("int");
+    floatMemoryLayout = resources.linker.canonicalLayouts().get("float");
+
+    initializeMethodHandles();
+    this.bruteForceIndexReference = build();
+  }
+
+  /**
+   * Constructor for loading the index from an {@link InputStream}
+   *
+   * @param inputStream an instance of stream to read the index bytes from
+   * @param resources   an instance of {@link CuVSResources}
+   */
+  private BruteForceIndex(InputStream inputStream, CuVSResources resources) throws Throwable {
+    this.bruteForceIndexParams = null;
+    this.dataset = null;
+    this.resources = resources;
+
+    longMemoryLayout = resources.linker.canonicalLayouts().get("long");
+    intMemoryLayout = resources.linker.canonicalLayouts().get("int");
+    floatMemoryLayout = resources.linker.canonicalLayouts().get("float");
+
+    initializeMethodHandles();
+    this.bruteForceIndexReference = deserialize(inputStream);
+  }
+
+  /**
+   * Initializes the {@link MethodHandles} for invoking native methods.
+   *
+   * @throws IOException @{@link IOException} is unable to load the native library
+   */
+  private void initializeMethodHandles() throws IOException {
+    indexMethodHandle = resources.linker.downcallHandle(
+        resources.getSymbolLookup().find("build_brute_force_index").get(),
+        FunctionDescriptor.of(ValueLayout.ADDRESS, ValueLayout.ADDRESS, longMemoryLayout, longMemoryLayout,
+            ValueLayout.ADDRESS, ValueLayout.ADDRESS, intMemoryLayout));
+
+    searchMethodHandle = resources.linker.downcallHandle(
+        resources.getSymbolLookup().find("search_brute_force_index").get(),
+        FunctionDescriptor.ofVoid(ValueLayout.ADDRESS, ValueLayout.ADDRESS, intMemoryLayout, longMemoryLayout,
+            intMemoryLayout, ValueLayout.ADDRESS, ValueLayout.ADDRESS, ValueLayout.ADDRESS, ValueLayout.ADDRESS,
+            ValueLayout.ADDRESS, longMemoryLayout, longMemoryLayout));
+
+    destroyIndexMethodHandle = resources.linker.downcallHandle(
+        resources.getSymbolLookup().find("destroy_brute_force_index").get(),
+        FunctionDescriptor.ofVoid(ValueLayout.ADDRESS, ValueLayout.ADDRESS));
+
+    serializeMethodHandle = resources.linker.downcallHandle(
+        resources.getSymbolLookup().find("serialize_brute_force_index").get(),
+        FunctionDescriptor.ofVoid(ValueLayout.ADDRESS, ValueLayout.ADDRESS, ValueLayout.ADDRESS, ValueLayout.ADDRESS));
+
+    deserializeMethodHandle = resources.linker.downcallHandle(
+        resources.getSymbolLookup().find("deserialize_brute_force_index").get(),
+        FunctionDescriptor.ofVoid(ValueLayout.ADDRESS, ValueLayout.ADDRESS, ValueLayout.ADDRESS, ValueLayout.ADDRESS));
+  }
+
+  /**
+   * Invokes the native destroy_brute_force_index function to de-allocate
+   * BRUTEFORCE index
+   */
+  public void destroyIndex() throws Throwable {
+    MemoryLayout returnValueMemoryLayout = intMemoryLayout;
+    MemorySegment returnValueMemorySegment = resources.arena.allocate(returnValueMemoryLayout);
+    destroyIndexMethodHandle.invokeExact(bruteForceIndexReference.getMemorySegment(), returnValueMemorySegment);
+  }
+
+  /**
+   * Invokes the native build_brute_force_index function via the Panama API to
+   * build the {@link BruteForceIndex}
+   *
+   * @return an instance of {@link IndexReference} that holds the pointer to the
+   *         index
+   */
+  private IndexReference build() throws Throwable {
+    long rows = dataset.length;
+    long cols = rows > 0 ? dataset[0].length : 0;
+
+    MemoryLayout returnValueMemoryLayout = intMemoryLayout;
+    MemorySegment returnValueMemorySegment = resources.arena.allocate(returnValueMemoryLayout);
+
+    IndexReference indexReference = new IndexReference((MemorySegment) indexMethodHandle.invokeExact(
+        Util.buildMemorySegment(resources.linker, resources.arena, dataset), rows, cols, resources.getMemorySegment(),
+        returnValueMemorySegment, bruteForceIndexParams.getNumWriterThreads()));
+
+    return indexReference;
+  }
+
+  /**
+   * Invokes the native search_brute_force_index via the Panama API for searching
+   * a BRUTEFORCE index.
+   *
+   * @param cuvsQuery an instance of {@link BruteForceQuery} holding the query
+   *                  vectors and other parameters
+   * @return an instance of {@link BruteForceSearchResults} containing the results
+   */
+  public BruteForceSearchResults search(BruteForceQuery cuvsQuery) throws Throwable {
+    long numQueries = cuvsQuery.getQueryVectors().length;
+    long numBlocks = cuvsQuery.getTopK() * numQueries;
+    int vectorDimension = numQueries > 0 ? cuvsQuery.getQueryVectors()[0].length : 0;
+    long prefilterDataLength = cuvsQuery.getPrefilter() != null ? cuvsQuery.getPrefilter().length : 0;
+    long numRows = dataset != null ? dataset.length : 0;
+
+    SequenceLayout neighborsSequenceLayout = MemoryLayout.sequenceLayout(numBlocks, longMemoryLayout);
+    SequenceLayout distancesSequenceLayout = MemoryLayout.sequenceLayout(numBlocks, floatMemoryLayout);
+    MemorySegment neighborsMemorySegment = resources.arena.allocate(neighborsSequenceLayout);
+    MemorySegment distancesMemorySegment = resources.arena.allocate(distancesSequenceLayout);
+    MemoryLayout returnValueMemoryLayout = intMemoryLayout;
+    MemorySegment returnValueMemorySegment = resources.arena.allocate(returnValueMemoryLayout);
+    MemorySegment prefilterDataMemorySegment = cuvsQuery.getPrefilter() != null
+        ? Util.buildMemorySegment(resources.linker, resources.arena, cuvsQuery.getPrefilter())
+        : MemorySegment.NULL;
+
+    searchMethodHandle.invokeExact(bruteForceIndexReference.getMemorySegment(),
+        Util.buildMemorySegment(resources.linker, resources.arena, cuvsQuery.getQueryVectors()), cuvsQuery.getTopK(),
+        numQueries, vectorDimension, resources.getMemorySegment(), neighborsMemorySegment, distancesMemorySegment,
+        returnValueMemorySegment, prefilterDataMemorySegment, prefilterDataLength, numRows);
+
+    return new BruteForceSearchResults(neighborsSequenceLayout, distancesSequenceLayout, neighborsMemorySegment,
+        distancesMemorySegment, cuvsQuery.getTopK(), cuvsQuery.getMapping(), numQueries);
+  }
+
+  /**
+   * A method to persist a BRUTEFORCE index using an instance of
+   * {@link OutputStream} for writing index bytes.
+   *
+   * @param outputStream an instance of {@link OutputStream} to write the index
+   *                     bytes into
+   */
+  public void serialize(OutputStream outputStream) throws Throwable {
+    serialize(outputStream, File.createTempFile(UUID.randomUUID().toString(), ".bf"));
+  }
+
+  /**
+   * A method to persist a BRUTEFORCE index using an instance of
+   * {@link OutputStream} and path to the intermediate temporary file.
+   *
+   * @param outputStream an instance of {@link OutputStream} to write the index
+   *                     bytes to
+   * @param tempFile     an intermediate {@link File} where BRUTEFORCE index is
+   *                     written temporarily
+   */
+  public void serialize(OutputStream outputStream, File tempFile) throws Throwable {
+    MemoryLayout returnValueMemoryLayout = intMemoryLayout;
+    MemorySegment returnValueMemorySegment = resources.arena.allocate(returnValueMemoryLayout);
+    serializeMethodHandle.invokeExact(resources.getMemorySegment(), bruteForceIndexReference.getMemorySegment(),
+        returnValueMemorySegment,
+        Util.buildMemorySegment(resources.linker, resources.arena, tempFile.getAbsolutePath()));
+    FileInputStream fileInputStream = new FileInputStream(tempFile);
+    byte[] chunk = new byte[1024]; // TODO: Make this configurable
+    int chunkLength = 0;
+    while ((chunkLength = fileInputStream.read(chunk)) != -1) {
+      outputStream.write(chunk, 0, chunkLength);
+    }
+    fileInputStream.close();
+    tempFile.delete();
+  }
+
+  /**
+   * Gets an instance of {@link IndexReference} by deserializing a BRUTEFORCE
+   * index using an {@link InputStream}.
+   *
+   * @param inputStream an instance of {@link InputStream}
+   * @return an instance of {@link IndexReference}.
+   */
+  private IndexReference deserialize(InputStream inputStream) throws Throwable {
+    MemoryLayout returnValueMemoryLayout = intMemoryLayout;
+    MemorySegment returnValueMemorySegment = resources.arena.allocate(returnValueMemoryLayout);
+    String tmpIndexFile = "/tmp/" + UUID.randomUUID().toString() + ".bf";
+    IndexReference indexReference = new IndexReference(resources);
+
+    File tempFile = new File(tmpIndexFile);
+    FileOutputStream fileOutputStream = new FileOutputStream(tempFile);
+    byte[] chunk = new byte[1024];
+    int chunkLength = 0;
+    while ((chunkLength = inputStream.read(chunk)) != -1) {
+      fileOutputStream.write(chunk, 0, chunkLength);
+    }
+    deserializeMethodHandle.invokeExact(resources.getMemorySegment(), indexReference.getMemorySegment(),
+        returnValueMemorySegment, Util.buildMemorySegment(resources.linker, resources.arena, tmpIndexFile));
+
+    inputStream.close();
+    fileOutputStream.close();
+    tempFile.delete();
+
+    return indexReference;
+  }
+
+  /**
+   * Builder helps configure and create an instance of {@link BruteForceIndex}.
+   */
+  public static class Builder {
+
+    private float[][] dataset;
+    private CuVSResources cuvsResources;
+    private BruteForceIndexParams bruteForceIndexParams;
+    private InputStream inputStream;
+
+    /**
+     * Constructs this Builder with an instance of {@link CuVSResources}.
+     *
+     * @param cuvsResources an instance of {@link CuVSResources}
+     */
+    public Builder(CuVSResources cuvsResources) {
+      this.cuvsResources = cuvsResources;
+    }
+
+    /**
+     * Registers an instance of configured {@link BruteForceIndexParams} with this
+     * Builder.
+     *
+     * @param bruteForceIndexParams An instance of BruteForceIndexParams
+     * @return An instance of this Builder
+     */
+    public Builder withIndexParams(BruteForceIndexParams bruteForceIndexParams) {
+      this.bruteForceIndexParams = bruteForceIndexParams;
+      return this;
+    }
+
+    /**
+     * Sets an instance of InputStream typically used when index deserialization is
+     * needed.
+     *
+     * @param inputStream an instance of {@link InputStream}
+     * @return an instance of this Builder
+     */
+    public Builder from(InputStream inputStream) {
+      this.inputStream = inputStream;
+      return this;
+    }
+
+    /**
+     * Sets the dataset for building the {@link BruteForceIndex}.
+     *
+     * @param dataset a two-dimensional float array
+     * @return an instance of this Builder
+     */
+    public Builder withDataset(float[][] dataset) {
+      this.dataset = dataset;
+      return this;
+    }
+
+    /**
+     * Builds and returns an instance of {@link BruteForceIndex}.
+     *
+     * @return an instance of {@link BruteForceIndex}
+     */
+    public BruteForceIndex build() throws Throwable {
+      if (inputStream != null) {
+        return new BruteForceIndex(inputStream, cuvsResources);
+      } else {
+        return new BruteForceIndex(dataset, cuvsResources, bruteForceIndexParams);
+      }
+    }
+  }
+
+  /**
+   * Holds the memory reference to a BRUTEFORCE index.
+   */
+  protected static class IndexReference {
+
+    private final MemorySegment memorySegment;
+
+    /**
+     * Constructs CagraIndexReference and allocate the MemorySegment.
+     */
+    protected IndexReference(CuVSResources resources) {
+      memorySegment = CuVSBruteForceIndex.allocate(resources.arena);
+    }
+
+    /**
+     * Constructs BruteForceIndexReference with an instance of MemorySegment passed
+     * as a parameter.
+     *
+     * @param indexMemorySegment the MemorySegment instance to use for containing
+     *                           index reference
+     */
+    protected IndexReference(MemorySegment indexMemorySegment) {
+      this.memorySegment = indexMemorySegment;
+    }
+
+    /**
+     * Gets the instance of index MemorySegment.
+     *
+     * @return index MemorySegment
+     */
+    protected MemorySegment getMemorySegment() {
+      return memorySegment;
+    }
+  }
+}
diff --git a/java/cuvs-java/src/main/java/com/nvidia/cuvs/BruteForceIndexParams.java b/java/cuvs-java/src/main/java/com/nvidia/cuvs/BruteForceIndexParams.java
new file mode 100644
index 000000000..832edf51a
--- /dev/null
+++ b/java/cuvs-java/src/main/java/com/nvidia/cuvs/BruteForceIndexParams.java
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.nvidia.cuvs;
+
+/**
+ * Supplemental parameters to build BRUTEFORCE index.
+ *
+ * @since 25.02
+ */
+public class BruteForceIndexParams {
+
+  private final int numWriterThreads;
+
+  private BruteForceIndexParams(int writerThreads) {
+    this.numWriterThreads = writerThreads;
+  }
+
+  @Override
+  public String toString() {
+    return "BruteForceIndexParams [numWriterThreads=" + numWriterThreads + "]";
+  }
+
+  /**
+   * Gets the number of threads used to build the index.
+   */
+  public int getNumWriterThreads() {
+    return numWriterThreads;
+  }
+
+  /**
+   * Builder configures and creates an instance of {@link BruteForceIndexParams}.
+   */
+  public static class Builder {
+
+    private int numWriterThreads = 2;
+
+    /**
+     * Sets the number of writer threads to use for indexing.
+     *
+     * @param numWriterThreads number of writer threads to use
+     * @return an instance of Builder
+     */
+    public Builder withNumWriterThreads(int numWriterThreads) {
+      this.numWriterThreads = numWriterThreads;
+      return this;
+    }
+
+    /**
+     * Builds an instance of {@link BruteForceIndexParams}.
+     *
+     * @return an instance of {@link BruteForceIndexParams}
+     */
+    public BruteForceIndexParams build() {
+      return new BruteForceIndexParams(numWriterThreads);
+    }
+  }
+}
diff --git a/java/cuvs-java/src/main/java/com/nvidia/cuvs/BruteForceQuery.java b/java/cuvs-java/src/main/java/com/nvidia/cuvs/BruteForceQuery.java
new file mode 100644
index 000000000..7febc3ba3
--- /dev/null
+++ b/java/cuvs-java/src/main/java/com/nvidia/cuvs/BruteForceQuery.java
@@ -0,0 +1,156 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.nvidia.cuvs;
+
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * BruteForceQuery holds the query vectors to be used while invoking search.
+ *
+ * @since 25.02
+ */
+public class BruteForceQuery {
+
+  private List<Integer> mapping;
+  private float[][] queryVectors;
+  private long[] prefilter;
+  private int topK;
+
+  /**
+   * Constructs an instance of {@link BruteForceQuery} using queryVectors,
+   * mapping, and topK.
+   *
+   * @param queryVectors 2D float query vector array
+   * @param mapping      an instance of ID mapping
+   * @param topK         the top k results to return
+   * @param prefilter    the prefilter data to use while searching the BRUTEFORCE
+   *                     index
+   */
+  public BruteForceQuery(float[][] queryVectors, List<Integer> mapping, int topK, long[] prefilter) {
+    this.queryVectors = queryVectors;
+    this.mapping = mapping;
+    this.topK = topK;
+    this.prefilter = prefilter;
+  }
+
+  /**
+   * Gets the query vector 2D float array.
+   *
+   * @return 2D float array
+   */
+  public float[][] getQueryVectors() {
+    return queryVectors;
+  }
+
+  /**
+   * Gets the passed map instance.
+   *
+   * @return a map of ID mappings
+   */
+  public List<Integer> getMapping() {
+    return mapping;
+  }
+
+  /**
+   * Gets the topK value.
+   *
+   * @return an integer
+   */
+  public int getTopK() {
+    return topK;
+  }
+
+  /**
+   * Gets the prefilter long array
+   *
+   * @return a long array
+   */
+  public long[] getPrefilter() {
+    return prefilter;
+  }
+
+  @Override
+  public String toString() {
+    return "BruteForceQuery [mapping=" + mapping + ", queryVectors=" + Arrays.toString(queryVectors) + ", prefilter="
+        + Arrays.toString(prefilter) + ", topK=" + topK + "]";
+  }
+
+  /**
+   * Builder helps configure and create an instance of BruteForceQuery.
+   */
+  public static class Builder {
+
+    private float[][] queryVectors;
+    private long[] prefilter;
+    private List<Integer> mapping;
+    private int topK = 2;
+
+    /**
+     * Registers the query vectors to be passed in the search call.
+     *
+     * @param queryVectors 2D float query vector array
+     * @return an instance of this Builder
+     */
+    public Builder withQueryVectors(float[][] queryVectors) {
+      this.queryVectors = queryVectors;
+      return this;
+    }
+
+    /**
+     * Sets the instance of mapping to be used for ID mapping.
+     *
+     * @param mapping the ID mapping instance
+     * @return an instance of this Builder
+     */
+    public Builder withMapping(List<Integer> mapping) {
+      this.mapping = mapping;
+      return this;
+    }
+
+    /**
+     * Registers the topK value.
+     *
+     * @param topK the topK value used to retrieve the topK results
+     * @return an instance of this Builder
+     */
+    public Builder withTopK(int topK) {
+      this.topK = topK;
+      return this;
+    }
+
+    /**
+     * Sets the prefilter data for building the {@link BruteForceQuery}.
+     *
+     * @param prefilter a one-dimensional long array
+     * @return an instance of this Builder
+     */
+    public Builder withPrefilter(long[] prefilter) {
+      this.prefilter = prefilter;
+      return this;
+    }
+
+    /**
+     * Builds an instance of {@link BruteForceQuery}
+     *
+     * @return an instance of {@link BruteForceQuery}
+     */
+    public BruteForceQuery build() {
+      return new BruteForceQuery(queryVectors, mapping, topK, prefilter);
+    }
+  }
+}
diff --git a/java/cuvs-java/src/main/java/com/nvidia/cuvs/BruteForceSearchResults.java b/java/cuvs-java/src/main/java/com/nvidia/cuvs/BruteForceSearchResults.java
new file mode 100644
index 000000000..b87e1f601
--- /dev/null
+++ b/java/cuvs-java/src/main/java/com/nvidia/cuvs/BruteForceSearchResults.java
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.nvidia.cuvs;
+
+import java.lang.foreign.MemorySegment;
+import java.lang.foreign.SequenceLayout;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+
+import com.nvidia.cuvs.common.SearchResults;
+
+/**
+ * SearchResult encapsulates the logic for reading and holding search results.
+ *
+ * @since 25.02
+ */
+public class BruteForceSearchResults extends SearchResults {
+
+  protected BruteForceSearchResults(SequenceLayout neighboursSequenceLayout, SequenceLayout distancesSequenceLayout,
+      MemorySegment neighboursMemorySegment, MemorySegment distancesMemorySegment, int topK, List<Integer> mapping,
+      long numberOfQueries) {
+    super(neighboursSequenceLayout, distancesSequenceLayout, neighboursMemorySegment, distancesMemorySegment, topK,
+        mapping, numberOfQueries);
+    readResultMemorySegments();
+  }
+
+  /**
+   * Reads neighbors and distances {@link MemorySegment} and loads the values
+   * internally
+   */
+  protected void readResultMemorySegments() {
+    Map<Integer, Float> intermediateResultMap = new LinkedHashMap<Integer, Float>();
+    int count = 0;
+    for (long i = 0; i < topK * numberOfQueries; i++) {
+      long id = (long) neighboursVarHandle.get(neighboursMemorySegment, 0L, i);
+      float dst = (float) distancesVarHandle.get(distancesMemorySegment, 0L, i);
+      intermediateResultMap.put(mapping != null ? mapping.get((int) id) : (int) id, dst);
+      count += 1;
+      if (count == topK) {
+        results.add(intermediateResultMap);
+        intermediateResultMap = new LinkedHashMap<Integer, Float>();
+        count = 0;
+      }
+    }
+  }
+}
diff --git a/java/cuvs-java/src/main/java/com/nvidia/cuvs/CagraCompressionParams.java b/java/cuvs-java/src/main/java/com/nvidia/cuvs/CagraCompressionParams.java
new file mode 100644
index 000000000..09f6bae8b
--- /dev/null
+++ b/java/cuvs-java/src/main/java/com/nvidia/cuvs/CagraCompressionParams.java
@@ -0,0 +1,266 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.nvidia.cuvs;
+
+import java.lang.foreign.MemorySegment;
+
+import com.nvidia.cuvs.panama.CuVSCagraCompressionParams;
+
+/**
+ * Supplemental compression parameters to build CAGRA Index.
+ *
+ * @since 25.02
+ */
+public class CagraCompressionParams {
+
+  private final MemorySegment memorySegment;
+  private CuVSResources resources;
+  private final int pqBits;
+  private final int pqDim;
+  private final int vqNCenters;
+  private final int kmeansNIters;
+  private final double vqKmeansTrainsetFraction;
+  private final double pqKmeansTrainsetFraction;
+
+  /**
+   * Constructs an instance of CagraCompressionParams with passed search
+   * parameters.
+   *
+   * @param resources                the resources instance to use
+   * @param pqBits                   the bit length of the vector element after
+   *                                 compression by PQ
+   * @param pqDim                    the dimensionality of the vector after
+   *                                 compression by PQ
+   * @param vqNCenters               the vector quantization (VQ) codebook size -
+   *                                 number of “coarse cluster centers”
+   * @param kmeansNIters             the number of iterations searching for kmeans
+   *                                 centers (both VQ and PQ phases)
+   * @param vqKmeansTrainsetFraction the fraction of data to use during iterative
+   *                                 kmeans building (VQ phase)
+   * @param pqKmeansTrainsetFraction the fraction of data to use during iterative
+   *                                 kmeans building (PQ phase)
+   */
+  private CagraCompressionParams(CuVSResources resources, int pqBits, int pqDim, int vqNCenters, int kmeansNIters,
+      double vqKmeansTrainsetFraction, double pqKmeansTrainsetFraction) {
+    this.resources = resources;
+    this.pqBits = pqBits;
+    this.pqDim = pqDim;
+    this.vqNCenters = vqNCenters;
+    this.kmeansNIters = kmeansNIters;
+    this.vqKmeansTrainsetFraction = vqKmeansTrainsetFraction;
+    this.pqKmeansTrainsetFraction = pqKmeansTrainsetFraction;
+    this.memorySegment = initMemorySegment();
+  }
+
+  /**
+   * Allocates the configured compression parameters in the MemorySegment.
+   */
+  private MemorySegment initMemorySegment() {
+    MemorySegment compressionParamsMemorySegment = CuVSCagraCompressionParams.allocate(resources.arena);
+    CuVSCagraCompressionParams.pq_bits(compressionParamsMemorySegment, pqBits);
+    CuVSCagraCompressionParams.pq_dim(compressionParamsMemorySegment, pqDim);
+    CuVSCagraCompressionParams.vq_n_centers(compressionParamsMemorySegment, vqNCenters);
+    CuVSCagraCompressionParams.kmeans_n_iters(compressionParamsMemorySegment, kmeansNIters);
+    CuVSCagraCompressionParams.vq_kmeans_trainset_fraction(compressionParamsMemorySegment, vqKmeansTrainsetFraction);
+    CuVSCagraCompressionParams.pq_kmeans_trainset_fraction(compressionParamsMemorySegment, pqKmeansTrainsetFraction);
+    return compressionParamsMemorySegment;
+  }
+
+  /**
+   * Gets the memory segment instance containing the compression parameters.
+   *
+   * @return the memory segment instance containing the compression parameters.
+   */
+  protected MemorySegment getMemorySegment() {
+    return memorySegment;
+  }
+
+  /**
+   * Gets the bit length of the vector element after compression by PQ.
+   *
+   * @return the bit length of the vector element after compression by PQ.
+   */
+  public int getPqBits() {
+    return pqBits;
+  }
+
+  /**
+   * Gets the dimensionality of the vector after compression by PQ.
+   *
+   * @return the dimensionality of the vector after compression by PQ.
+   */
+  public int getPqDim() {
+    return pqDim;
+  }
+
+  /**
+   * Gets the vector quantization (VQ) codebook size - number of “coarse cluster
+   * centers”.
+   *
+   * @return the vector quantization (VQ) codebook size - number of “coarse
+   *         cluster centers”.
+   */
+  public int getVqNCenters() {
+    return vqNCenters;
+  }
+
+  /**
+   * Gets the number of iterations searching for kmeans centers (both VQ and PQ
+   * phases).
+   *
+   * @return the number of iterations searching for kmeans centers (both VQ and PQ
+   *         phases).
+   */
+  public int getKmeansNIters() {
+    return kmeansNIters;
+  }
+
+  /**
+   * Gets the fraction of data to use during iterative kmeans building (VQ phase).
+   *
+   * @return the fraction of data to use during iterative kmeans building (VQ
+   *         phase).
+   */
+  public double getVqKmeansTrainsetFraction() {
+    return vqKmeansTrainsetFraction;
+  }
+
+  /**
+   * Gets the fraction of data to use during iterative kmeans building (PQ phase).
+   *
+   * @return the fraction of data to use during iterative kmeans building (PQ
+   *         phase).
+   */
+  public double getPqKmeansTrainsetFraction() {
+    return pqKmeansTrainsetFraction;
+  }
+
+  @Override
+  public String toString() {
+    return "CagraCompressionParams [pqBits=" + pqBits + ", pqDim=" + pqDim + ", vqNCenters=" + vqNCenters
+        + ", kmeansNIters=" + kmeansNIters + ", vqKmeansTrainsetFraction=" + vqKmeansTrainsetFraction
+        + ", pqKmeansTrainsetFraction=" + pqKmeansTrainsetFraction + "]";
+  }
+
+  /**
+   * Builder configures and creates an instance of {@link CagraCompressionParams}.
+   */
+  public static class Builder {
+
+    private CuVSResources resources;
+    private int pqBits = 8;
+    private int pqDim = 0;
+    private int vqNCenters = 0;
+    private int kmeansNIters = 25;
+    private double vqKmeansTrainsetFraction = 0;
+    private double pqKmeansTrainsetFraction = 0;
+
+    public Builder(CuVSResources resources) {
+      this.resources = resources;
+    }
+
+    /**
+     * Sets the bit length of the vector element after compression by PQ.
+     *
+     * Possible values: [4, 5, 6, 7, 8]. Hint: the smaller the ‘pq_bits’, the
+     * smaller the index size and the better the search performance, but the lower
+     * the recall.
+     *
+     * @param pqBits
+     * @return an instance of Builder
+     */
+    public Builder withPqBits(int pqBits) {
+      this.pqBits = pqBits;
+      return this;
+    }
+
+    /**
+     * Sets the dimensionality of the vector after compression by PQ.
+     *
+     * When zero, an optimal value is selected using a heuristic.
+     *
+     * @param pqDim
+     * @return an instance of Builder
+     */
+    public Builder withPqDim(int pqDim) {
+      this.pqDim = pqDim;
+      return this;
+    }
+
+    /**
+     * Sets the vector quantization (VQ) codebook size - number of “coarse cluster
+     * centers”.
+     *
+     * When zero, an optimal value is selected using a heuristic.
+     *
+     * @param vqNCenters
+     * @return an instance of Builder
+     */
+    public Builder withVqNCenters(int vqNCenters) {
+      this.vqNCenters = vqNCenters;
+      return this;
+    }
+
+    /**
+     * Sets the number of iterations searching for kmeans centers (both VQ and PQ
+     * phases).
+     *
+     * @param kmeansNIters
+     * @return an instance of Builder
+     */
+    public Builder withKmeansNIters(int kmeansNIters) {
+      this.kmeansNIters = kmeansNIters;
+      return this;
+    }
+
+    /**
+     * Sets the fraction of data to use during iterative kmeans building (VQ phase).
+     *
+     * When zero, an optimal value is selected using a heuristic.
+     *
+     * @param vqKmeansTrainsetFraction
+     * @return an instance of Builder
+     */
+    public Builder withVqKmeansTrainsetFraction(double vqKmeansTrainsetFraction) {
+      this.vqKmeansTrainsetFraction = vqKmeansTrainsetFraction;
+      return this;
+    }
+
+    /**
+     * Sets the fraction of data to use during iterative kmeans building (PQ phase).
+     *
+     * When zero, an optimal value is selected using a heuristic.
+     *
+     * @param pqKmeansTrainsetFraction
+     * @return an instance of Builder
+     */
+    public Builder withPqKmeansTrainsetFraction(double pqKmeansTrainsetFraction) {
+      this.pqKmeansTrainsetFraction = pqKmeansTrainsetFraction;
+      return this;
+    }
+
+    /**
+     * Builds an instance of {@link CagraCompressionParams}.
+     *
+     * @return an instance of {@link CagraCompressionParams}
+     */
+    public CagraCompressionParams build() {
+      return new CagraCompressionParams(resources, pqBits, pqDim, vqNCenters, kmeansNIters, vqKmeansTrainsetFraction,
+          pqKmeansTrainsetFraction);
+    }
+  }
+}
diff --git a/java/cuvs-java/src/main/java/com/nvidia/cuvs/CagraIndex.java b/java/cuvs-java/src/main/java/com/nvidia/cuvs/CagraIndex.java
new file mode 100644
index 000000000..21eea9783
--- /dev/null
+++ b/java/cuvs-java/src/main/java/com/nvidia/cuvs/CagraIndex.java
@@ -0,0 +1,517 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.nvidia.cuvs;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.lang.foreign.FunctionDescriptor;
+import java.lang.foreign.MemoryLayout;
+import java.lang.foreign.MemorySegment;
+import java.lang.foreign.SequenceLayout;
+import java.lang.foreign.ValueLayout;
+import java.lang.invoke.MethodHandle;
+import java.lang.invoke.MethodHandles;
+import java.util.UUID;
+
+import com.nvidia.cuvs.common.Util;
+import com.nvidia.cuvs.panama.CuVSCagraIndex;
+
+/**
+ * {@link CagraIndex} encapsulates a CAGRA index, along with methods to interact
+ * with it.
+ * <p>
+ * CAGRA is a graph-based nearest neighbors algorithm that was built from the
+ * ground up for GPU acceleration. CAGRA demonstrates state-of-the art index
+ * build and query performance for both small and large-batch sized search. Know
+ * more about this algorithm
+ * <a href="https://arxiv.org/abs/2308.15136" target="_blank">here</a>
+ *
+ * @since 25.02
+ */
+public class CagraIndex {
+
+  private final float[][] dataset;
+  private final CuVSResources resources;
+  private MethodHandle indexMethodHandle;
+  private MethodHandle searchMethodHandle;
+  private MethodHandle serializeMethodHandle;
+  private MethodHandle deserializeMethodHandle;
+  private MethodHandle destroyIndexMethodHandle;
+  private MethodHandle serializeCAGRAIndexToHNSWMethodHandle;
+  private CagraIndexParams cagraIndexParameters;
+  private CagraCompressionParams cagraCompressionParams;
+  private IndexReference cagraIndexReference;
+  private MemoryLayout longMemoryLayout;
+  private MemoryLayout intMemoryLayout;
+  private MemoryLayout floatMemoryLayout;
+
+  /**
+   * Constructor for building the index using specified dataset
+   *
+   * @param indexParameters        an instance of {@link CagraIndexParams} holding
+   *                               the index parameters
+   * @param cagraCompressionParams an instance of {@link CagraCompressionParams}
+   *                               holding the compression parameters
+   * @param dataset                the dataset for indexing
+   * @param resources              an instance of {@link CuVSResources}
+   */
+  private CagraIndex(CagraIndexParams indexParameters, CagraCompressionParams cagraCompressionParams, float[][] dataset,
+      CuVSResources resources) throws Throwable {
+    this.cagraIndexParameters = indexParameters;
+    this.cagraCompressionParams = cagraCompressionParams;
+    this.dataset = dataset;
+    this.resources = resources;
+
+    longMemoryLayout = resources.linker.canonicalLayouts().get("long");
+    intMemoryLayout = resources.linker.canonicalLayouts().get("int");
+    floatMemoryLayout = resources.linker.canonicalLayouts().get("float");
+
+    initializeMethodHandles();
+    this.cagraIndexReference = build();
+  }
+
+  /**
+   * Constructor for loading the index from an {@link InputStream}
+   *
+   * @param inputStream an instance of stream to read the index bytes from
+   * @param resources   an instance of {@link CuVSResources}
+   */
+  private CagraIndex(InputStream inputStream, CuVSResources resources) throws Throwable {
+    this.cagraIndexParameters = null;
+    this.cagraCompressionParams = null;
+    this.dataset = null;
+    this.resources = resources;
+
+    longMemoryLayout = resources.linker.canonicalLayouts().get("long");
+    intMemoryLayout = resources.linker.canonicalLayouts().get("int");
+    floatMemoryLayout = resources.linker.canonicalLayouts().get("float");
+
+    initializeMethodHandles();
+    this.cagraIndexReference = deserialize(inputStream);
+  }
+
+  /**
+   * Initializes the {@link MethodHandles} for invoking native methods.
+   *
+   * @throws IOException @{@link IOException} is unable to load the native library
+   */
+  private void initializeMethodHandles() throws IOException {
+    indexMethodHandle = resources.linker.downcallHandle(resources.getSymbolLookup().find("build_cagra_index").get(),
+        FunctionDescriptor.of(ValueLayout.ADDRESS, ValueLayout.ADDRESS, longMemoryLayout, longMemoryLayout,
+            ValueLayout.ADDRESS, ValueLayout.ADDRESS, ValueLayout.ADDRESS, ValueLayout.ADDRESS, intMemoryLayout));
+
+    searchMethodHandle = resources.linker.downcallHandle(resources.getSymbolLookup().find("search_cagra_index").get(),
+        FunctionDescriptor.ofVoid(ValueLayout.ADDRESS, ValueLayout.ADDRESS, intMemoryLayout, longMemoryLayout,
+            intMemoryLayout, ValueLayout.ADDRESS, ValueLayout.ADDRESS, ValueLayout.ADDRESS, ValueLayout.ADDRESS,
+            ValueLayout.ADDRESS));
+
+    serializeMethodHandle = resources.linker.downcallHandle(
+        resources.getSymbolLookup().find("serialize_cagra_index").get(),
+        FunctionDescriptor.ofVoid(ValueLayout.ADDRESS, ValueLayout.ADDRESS, ValueLayout.ADDRESS, ValueLayout.ADDRESS));
+
+    deserializeMethodHandle = resources.linker.downcallHandle(
+        resources.getSymbolLookup().find("deserialize_cagra_index").get(),
+        FunctionDescriptor.ofVoid(ValueLayout.ADDRESS, ValueLayout.ADDRESS, ValueLayout.ADDRESS, ValueLayout.ADDRESS));
+
+    destroyIndexMethodHandle = resources.linker.downcallHandle(
+        resources.getSymbolLookup().find("destroy_cagra_index").get(),
+        FunctionDescriptor.ofVoid(ValueLayout.ADDRESS, ValueLayout.ADDRESS));
+
+    serializeCAGRAIndexToHNSWMethodHandle = resources.linker.downcallHandle(
+        resources.getSymbolLookup().find("serialize_cagra_index_to_hnsw").get(),
+        FunctionDescriptor.ofVoid(ValueLayout.ADDRESS, ValueLayout.ADDRESS, ValueLayout.ADDRESS, ValueLayout.ADDRESS));
+
+  }
+
+  /**
+   * Invokes the native destroy_cagra_index to de-allocate the CAGRA index
+   */
+  public void destroyIndex() throws Throwable {
+    MemoryLayout returnValueMemoryLayout = intMemoryLayout;
+    MemorySegment returnValueMemorySegment = resources.arena.allocate(returnValueMemoryLayout);
+    destroyIndexMethodHandle.invokeExact(cagraIndexReference.getMemorySegment(), returnValueMemorySegment);
+  }
+
+  /**
+   * Invokes the native build_cagra_index function via the Panama API to build the
+   * {@link CagraIndex}
+   *
+   * @return an instance of {@link IndexReference} that holds the pointer to the
+   *         index
+   */
+  private IndexReference build() throws Throwable {
+    long rows = dataset.length;
+    long cols = rows > 0 ? dataset[0].length : 0;
+
+    MemoryLayout returnValueMemoryLayout = intMemoryLayout;
+    MemorySegment returnValueMemorySegment = resources.arena.allocate(returnValueMemoryLayout);
+
+    MemorySegment indexParamsMemorySegment = cagraIndexParameters != null ? cagraIndexParameters.getMemorySegment()
+        : MemorySegment.NULL;
+
+    int numWriterThreads = cagraIndexParameters != null ? cagraIndexParameters.getNumWriterThreads() : 1;
+
+    MemorySegment compressionParamsMemorySegment = cagraCompressionParams != null
+        ? cagraCompressionParams.getMemorySegment()
+        : MemorySegment.NULL;
+
+    IndexReference indexReference = new IndexReference((MemorySegment) indexMethodHandle.invokeExact(
+        Util.buildMemorySegment(resources.linker, resources.arena, dataset), rows, cols, resources.getMemorySegment(),
+        returnValueMemorySegment, indexParamsMemorySegment, compressionParamsMemorySegment, numWriterThreads));
+
+    return indexReference;
+  }
+
+  /**
+   * Invokes the native search_cagra_index via the Panama API for searching a
+   * CAGRA index.
+   *
+   * @param query an instance of {@link CagraQuery} holding the query vectors and
+   *              other parameters
+   * @return an instance of {@link CagraSearchResults} containing the results
+   */
+  public CagraSearchResults search(CagraQuery query) throws Throwable {
+    int topK = query.getMapping() != null ? Math.min(query.getMapping().size(), query.getTopK()) : query.getTopK();
+    long numQueries = query.getQueryVectors().length;
+    long numBlocks = topK * numQueries;
+    int vectorDimension = numQueries > 0 ? query.getQueryVectors()[0].length : 0;
+
+    SequenceLayout neighborsSequenceLayout = MemoryLayout.sequenceLayout(numBlocks, intMemoryLayout);
+    SequenceLayout distancesSequenceLayout = MemoryLayout.sequenceLayout(numBlocks, floatMemoryLayout);
+    MemorySegment neighborsMemorySegment = resources.arena.allocate(neighborsSequenceLayout);
+    MemorySegment distancesMemorySegment = resources.arena.allocate(distancesSequenceLayout);
+    MemoryLayout returnValueMemoryLayout = intMemoryLayout;
+    MemorySegment returnValueMemorySegment = resources.arena.allocate(returnValueMemoryLayout);
+
+    searchMethodHandle.invokeExact(cagraIndexReference.getMemorySegment(),
+        Util.buildMemorySegment(resources.linker, resources.arena, query.getQueryVectors()), topK, numQueries,
+        vectorDimension, resources.getMemorySegment(), neighborsMemorySegment, distancesMemorySegment,
+        returnValueMemorySegment, query.getCagraSearchParameters().getMemorySegment());
+
+    return new CagraSearchResults(neighborsSequenceLayout, distancesSequenceLayout, neighborsMemorySegment,
+        distancesMemorySegment, topK, query.getMapping(), numQueries);
+  }
+
+  /**
+   * A method to persist a CAGRA index using an instance of {@link OutputStream}
+   * for writing index bytes.
+   *
+   * @param outputStream an instance of {@link OutputStream} to write the index
+   *                     bytes into
+   */
+  public void serialize(OutputStream outputStream) throws Throwable {
+    serialize(outputStream, File.createTempFile(UUID.randomUUID().toString(), ".cag"), 1024);
+  }
+
+  /**
+   * A method to persist a CAGRA index using an instance of {@link OutputStream}
+   * for writing index bytes.
+   *
+   * @param outputStream an instance of {@link OutputStream} to write the index
+   *                     bytes into
+   * @param bufferLength the length of buffer to use for writing bytes. Default
+   *                     value is 1024
+   */
+  public void serialize(OutputStream outputStream, int bufferLength) throws Throwable {
+    serialize(outputStream, File.createTempFile(UUID.randomUUID().toString(), ".cag"), bufferLength);
+  }
+
+  /**
+   * A method to persist a CAGRA index using an instance of {@link OutputStream}
+   * for writing index bytes.
+   *
+   * @param outputStream an instance of {@link OutputStream} to write the index
+   *                     bytes into
+   * @param tempFile     an intermediate {@link File} where CAGRA index is written
+   *                     temporarily
+   */
+  public void serialize(OutputStream outputStream, File tempFile) throws Throwable {
+    serialize(outputStream, tempFile, 1024);
+  }
+
+  /**
+   * A method to persist a CAGRA index using an instance of {@link OutputStream}
+   * and path to the intermediate temporary file.
+   *
+   * @param outputStream an instance of {@link OutputStream} to write the index
+   *                     bytes to
+   * @param tempFile     an intermediate {@link File} where CAGRA index is written
+   *                     temporarily
+   * @param bufferLength the length of buffer to use for writing bytes. Default
+   *                     value is 1024
+   */
+  public void serialize(OutputStream outputStream, File tempFile, int bufferLength) throws Throwable {
+    MemoryLayout returnValueMemoryLayout = intMemoryLayout;
+    MemorySegment returnValueMemorySegment = resources.arena.allocate(returnValueMemoryLayout);
+    serializeMethodHandle.invokeExact(resources.getMemorySegment(), cagraIndexReference.getMemorySegment(),
+        returnValueMemorySegment,
+        Util.buildMemorySegment(resources.linker, resources.arena, tempFile.getAbsolutePath()));
+    FileInputStream fileInputStream = new FileInputStream(tempFile);
+    byte[] chunk = new byte[bufferLength];
+    int chunkLength = 0;
+    while ((chunkLength = fileInputStream.read(chunk)) != -1) {
+      outputStream.write(chunk, 0, chunkLength);
+    }
+    fileInputStream.close();
+    tempFile.delete();
+  }
+
+  /**
+   * A method to create and persist HNSW index from CAGRA index using an instance
+   * of {@link OutputStream} and path to the intermediate temporary file.
+   *
+   * @param outputStream an instance of {@link OutputStream} to write the index
+   *                     bytes to
+   */
+  public void serializeToHNSW(OutputStream outputStream) throws Throwable {
+    serializeToHNSW(outputStream, File.createTempFile(UUID.randomUUID().toString(), ".hnsw"), 1024);
+  }
+
+  /**
+   * A method to create and persist HNSW index from CAGRA index using an instance
+   * of {@link OutputStream} and path to the intermediate temporary file.
+   *
+   * @param outputStream an instance of {@link OutputStream} to write the index
+   *                     bytes to
+   * @param bufferLength the length of buffer to use for writing bytes. Default
+   *                     value is 1024
+   */
+  public void serializeToHNSW(OutputStream outputStream, int bufferLength) throws Throwable {
+    serializeToHNSW(outputStream, File.createTempFile(UUID.randomUUID().toString(), ".hnsw"), bufferLength);
+  }
+
+  /**
+   * A method to create and persist HNSW index from CAGRA index using an instance
+   * of {@link OutputStream} and path to the intermediate temporary file.
+   *
+   * @param outputStream an instance of {@link OutputStream} to write the index
+   *                     bytes to
+   * @param tempFile     an intermediate {@link File} where CAGRA index is written
+   *                     temporarily
+   */
+  public void serializeToHNSW(OutputStream outputStream, File tempFile) throws Throwable {
+    serializeToHNSW(outputStream, tempFile, 1024);
+  }
+
+  /**
+   * A method to create and persist HNSW index from CAGRA index using an instance
+   * of {@link OutputStream} and path to the intermediate temporary file.
+   *
+   * @param outputStream an instance of {@link OutputStream} to write the index
+   *                     bytes to
+   * @param tempFile     an intermediate {@link File} where CAGRA index is written
+   *                     temporarily
+   * @param bufferLength the length of buffer to use for writing bytes. Default
+   *                     value is 1024
+   */
+  public void serializeToHNSW(OutputStream outputStream, File tempFile, int bufferLength) throws Throwable {
+    MemoryLayout returnValueMemoryLayout = intMemoryLayout;
+    MemorySegment returnValueMemorySegment = resources.arena.allocate(returnValueMemoryLayout);
+    serializeCAGRAIndexToHNSWMethodHandle.invokeExact(resources.getMemorySegment(),
+        Util.buildMemorySegment(resources.linker, resources.arena, tempFile.getAbsolutePath()),
+        cagraIndexReference.getMemorySegment(), returnValueMemorySegment);
+    FileInputStream fileInputStream = new FileInputStream(tempFile);
+    byte[] chunk = new byte[bufferLength];
+    int chunkLength = 0;
+    while ((chunkLength = fileInputStream.read(chunk)) != -1) {
+      outputStream.write(chunk, 0, chunkLength);
+    }
+    fileInputStream.close();
+    tempFile.delete();
+  }
+
+  /**
+   * Gets an instance of {@link IndexReference} by deserializing a CAGRA index
+   * using an {@link InputStream}.
+   *
+   * @param inputStream an instance of {@link InputStream}
+   * @return an instance of {@link IndexReference}.
+   */
+  private IndexReference deserialize(InputStream inputStream) throws Throwable {
+    return deserialize(inputStream, 1024);
+  }
+
+  /**
+   * Gets an instance of {@link IndexReference} by deserializing a CAGRA index
+   * using an {@link InputStream}.
+   *
+   * @param inputStream  an instance of {@link InputStream}
+   * @param bufferLength the length of the buffer to use while reading the bytes
+   *                     from the stream. Default value is 1024.
+   * @return an instance of {@link IndexReference}.
+   */
+  private IndexReference deserialize(InputStream inputStream, int bufferLength) throws Throwable {
+    MemoryLayout returnValueMemoryLayout = intMemoryLayout;
+    MemorySegment returnValueMemorySegment = resources.arena.allocate(returnValueMemoryLayout);
+    String tmpIndexFile = "/tmp/" + UUID.randomUUID().toString() + ".cag";
+    IndexReference indexReference = new IndexReference(resources);
+
+    File tempFile = new File(tmpIndexFile);
+    FileOutputStream fileOutputStream = new FileOutputStream(tempFile);
+    byte[] chunk = new byte[bufferLength];
+    int chunkLength = 0;
+    while ((chunkLength = inputStream.read(chunk)) != -1) {
+      fileOutputStream.write(chunk, 0, chunkLength);
+    }
+    deserializeMethodHandle.invokeExact(resources.getMemorySegment(), indexReference.getMemorySegment(),
+        returnValueMemorySegment, Util.buildMemorySegment(resources.linker, resources.arena, tmpIndexFile));
+
+    inputStream.close();
+    fileOutputStream.close();
+    tempFile.delete();
+
+    return indexReference;
+  }
+
+  /**
+   * Gets an instance of {@link CagraIndexParams}
+   *
+   * @return an instance of {@link CagraIndexParams}
+   */
+  public CagraIndexParams getCagraIndexParameters() {
+    return cagraIndexParameters;
+  }
+
+  /**
+   * Gets an instance of {@link CuVSResources}
+   *
+   * @return an instance of {@link CuVSResources}
+   */
+  public CuVSResources getCuVSResources() {
+    return resources;
+  }
+
+  /**
+   * Builder helps configure and create an instance of {@link CagraIndex}.
+   */
+  public static class Builder {
+
+    private float[][] dataset;
+    private CagraIndexParams cagraIndexParams;
+    private CagraCompressionParams cagraCompressionParams;
+    private CuVSResources cuvsResources;
+    private InputStream inputStream;
+
+    /**
+     * Constructs this Builder with an instance of {@link CuVSResources}.
+     *
+     * @param cuvsResources an instance of {@link CuVSResources}
+     */
+    public Builder(CuVSResources cuvsResources) {
+      this.cuvsResources = cuvsResources;
+    }
+
+    /**
+     * Sets an instance of InputStream typically used when index deserialization is
+     * needed.
+     *
+     * @param inputStream an instance of {@link InputStream}
+     * @return an instance of this Builder
+     */
+    public Builder from(InputStream inputStream) {
+      this.inputStream = inputStream;
+      return this;
+    }
+
+    /**
+     * Sets the dataset for building the {@link CagraIndex}.
+     *
+     * @param dataset a two-dimensional float array
+     * @return an instance of this Builder
+     */
+    public Builder withDataset(float[][] dataset) {
+      this.dataset = dataset;
+      return this;
+    }
+
+    /**
+     * Registers an instance of configured {@link CagraIndexParams} with this
+     * Builder.
+     *
+     * @param cagraIndexParameters An instance of CagraIndexParams.
+     * @return An instance of this Builder.
+     */
+    public Builder withIndexParams(CagraIndexParams cagraIndexParameters) {
+      this.cagraIndexParams = cagraIndexParameters;
+      return this;
+    }
+
+    /**
+     * Registers an instance of configured {@link CagraCompressionParams} with this
+     * Builder.
+     *
+     * @param cagraCompressionParams An instance of CagraCompressionParams.
+     * @return An instance of this Builder.
+     */
+    public Builder withCompressionParams(CagraCompressionParams cagraCompressionParams) {
+      this.cagraCompressionParams = cagraCompressionParams;
+      return this;
+    }
+
+    /**
+     * Builds and returns an instance of CagraIndex.
+     *
+     * @return an instance of CagraIndex
+     */
+    public CagraIndex build() throws Throwable {
+      if (inputStream != null) {
+        return new CagraIndex(inputStream, cuvsResources);
+      } else {
+        return new CagraIndex(cagraIndexParams, cagraCompressionParams, dataset, cuvsResources);
+      }
+    }
+  }
+
+  /**
+   * Holds the memory reference to a CAGRA index.
+   */
+  protected static class IndexReference {
+
+    private final MemorySegment memorySegment;
+
+    /**
+     * Constructs CagraIndexReference and allocate the MemorySegment.
+     */
+    protected IndexReference(CuVSResources resources) {
+      memorySegment = CuVSCagraIndex.allocate(resources.arena);
+    }
+
+    /**
+     * Constructs CagraIndexReference with an instance of MemorySegment passed as a
+     * parameter.
+     *
+     * @param indexMemorySegment the MemorySegment instance to use for containing
+     *                           index reference
+     */
+    protected IndexReference(MemorySegment indexMemorySegment) {
+      this.memorySegment = indexMemorySegment;
+    }
+
+    /**
+     * Gets the instance of index MemorySegment.
+     *
+     * @return index MemorySegment
+     */
+    protected MemorySegment getMemorySegment() {
+      return memorySegment;
+    }
+  }
+}
diff --git a/java/cuvs-java/src/main/java/com/nvidia/cuvs/CagraIndexParams.java b/java/cuvs-java/src/main/java/com/nvidia/cuvs/CagraIndexParams.java
new file mode 100644
index 000000000..767ee281b
--- /dev/null
+++ b/java/cuvs-java/src/main/java/com/nvidia/cuvs/CagraIndexParams.java
@@ -0,0 +1,344 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.nvidia.cuvs;
+
+import java.lang.foreign.MemorySegment;
+
+import com.nvidia.cuvs.panama.CuVSCagraIndexParams;
+
+/**
+ * Supplemental parameters to build CAGRA Index.
+ *
+ * @since 25.02
+ */
+public class CagraIndexParams {
+
+  private final CagraGraphBuildAlgo cuvsCagraGraphBuildAlgo;
+  private final CuvsDistanceType cuvsDistanceType;
+  private final MemorySegment memorySegment;
+  private CuVSResources resources;
+  private final int intermediateGraphDegree;
+  private final int graphDegree;
+  private final int nnDescentNiter;
+  private final int numWriterThreads;
+
+  /**
+   * Enum that denotes which ANN algorithm is used to build CAGRA graph.
+   */
+  public enum CagraGraphBuildAlgo {
+    /**
+     * Select build algorithm automatically
+     */
+    AUTO_SELECT(0),
+    /**
+     * Use IVF-PQ to build all-neighbors knn graph
+     */
+    IVF_PQ(1),
+    /**
+     * Experimental, use NN-Descent to build all-neighbors knn graph
+     */
+    NN_DESCENT(2);
+
+    /**
+     * The value for the enum choice.
+     */
+    public final int value;
+
+    private CagraGraphBuildAlgo(int value) {
+      this.value = value;
+    }
+  }
+
+  /**
+   * Enum that denotes how to compute distance.
+   */
+  public enum CuvsDistanceType {
+
+    /**
+     * evaluate as dist_ij = sum(x_ik^2) + sum(y_ij)^2 - 2*sum(x_ik * y_jk)
+     */
+    L2Expanded(0),
+    /**
+     * same as above, but inside the epilogue, perform square root operation
+     */
+    L2SqrtExpanded(1),
+    /**
+     * cosine distance
+     */
+    CosineExpanded(2),
+    /**
+     * L1 distance *
+     */
+    L1(3),
+    /**
+     * evaluate as dist_ij += (x_ik - y-jk)^2 *
+     */
+    L2Unexpanded(4),
+    /**
+     * same as above, but inside the epilogue, perform square root operation
+     */
+    L2SqrtUnexpanded(5),
+    /**
+     * basic inner product
+     */
+    InnerProduct(6),
+    /**
+     * Chebyshev (Linf) distance
+     */
+    Linf(7),
+    /**
+     * Canberra distance
+     */
+    Canberra(8),
+    /**
+     * Generalized Minkowski distance
+     */
+    LpUnexpanded(9),
+    /**
+     * Correlation distance
+     */
+    CorrelationExpanded(10),
+    /**
+     * Jaccard distance
+     */
+    JaccardExpanded(11),
+    /**
+     * Hellinger distance
+     */
+    HellingerExpanded(12),
+    /**
+     * Haversine distance
+     */
+    Haversine(13),
+    /**
+     * Bray-Curtis distance
+     */
+    BrayCurtis(14),
+    /**
+     * Jensen-Shannon distance
+     */
+    JensenShannon(15),
+    /**
+     * Hamming distance
+     */
+    HammingUnexpanded(16),
+    /**
+     * KLDivergence
+     */
+    KLDivergence(17),
+    /**
+     * RusselRao
+     */
+    RusselRaoExpanded(18),
+    /**
+     * Dice-Sorensen distance
+     */
+    DiceExpanded(19),
+    /**
+     * Precomputed (special value)
+     */
+    Precomputed(100);
+
+    /**
+     * The value for the enum choice.
+     */
+    public final int value;
+
+    private CuvsDistanceType(int value) {
+      this.value = value;
+    }
+
+  }
+
+  private CagraIndexParams(CuVSResources resources, int intermediateGraphDegree, int graphDegree,
+      CagraGraphBuildAlgo CuvsCagraGraphBuildAlgo, int nnDescentNiter, int writerThreads,
+      CuvsDistanceType cuvsDistanceType) {
+    this.resources = resources;
+    this.intermediateGraphDegree = intermediateGraphDegree;
+    this.graphDegree = graphDegree;
+    this.cuvsCagraGraphBuildAlgo = CuvsCagraGraphBuildAlgo;
+    this.nnDescentNiter = nnDescentNiter;
+    this.numWriterThreads = writerThreads;
+    this.cuvsDistanceType = cuvsDistanceType;
+
+    this.memorySegment = initMemorySegment();
+  }
+
+  private MemorySegment initMemorySegment() {
+    MemorySegment indexParamsMemorySegment = CuVSCagraIndexParams.allocate(resources.arena);
+    CuVSCagraIndexParams.intermediate_graph_degree(indexParamsMemorySegment, intermediateGraphDegree);
+    CuVSCagraIndexParams.graph_degree(indexParamsMemorySegment, graphDegree);
+    CuVSCagraIndexParams.build_algo(indexParamsMemorySegment, cuvsCagraGraphBuildAlgo.value);
+    CuVSCagraIndexParams.nn_descent_niter(indexParamsMemorySegment, nnDescentNiter);
+    CuVSCagraIndexParams.metric(indexParamsMemorySegment, cuvsDistanceType.value);
+    return indexParamsMemorySegment;
+  }
+
+  /**
+   * Gets the degree of input graph for pruning.
+   *
+   * @return the degree of input graph
+   */
+  public int getIntermediateGraphDegree() {
+    return intermediateGraphDegree;
+  }
+
+  /**
+   * Gets the degree of output graph.
+   *
+   * @return the degree of output graph
+   */
+  public int getGraphDegree() {
+    return graphDegree;
+  }
+
+  /**
+   * Gets the {@link CagraGraphBuildAlgo} used to build the index.
+   */
+  public CagraGraphBuildAlgo getCagraGraphBuildAlgo() {
+    return cuvsCagraGraphBuildAlgo;
+  }
+
+  /**
+   * Gets the number of iterations to run if building with
+   * {@link CagraGraphBuildAlgo#NN_DESCENT}
+   */
+  public int getNNDescentNumIterations() {
+    return nnDescentNiter;
+  }
+
+  protected MemorySegment getMemorySegment() {
+    return memorySegment;
+  }
+
+  /**
+   * Gets the {@link CuvsDistanceType} used to build the index.
+   */
+  public CuvsDistanceType getCuvsDistanceType() {
+    return cuvsDistanceType;
+  }
+
+  /**
+   * Gets the number of threads used to build the index.
+   */
+  public int getNumWriterThreads() {
+    return numWriterThreads;
+  }
+
+  @Override
+  public String toString() {
+    return "CagraIndexParams [cuvsCagraGraphBuildAlgo=" + cuvsCagraGraphBuildAlgo + ", cuvsDistanceType="
+        + cuvsDistanceType + ", intermediateGraphDegree=" + intermediateGraphDegree + ", graphDegree=" + graphDegree
+        + ", nnDescentNiter=" + nnDescentNiter + ", numWriterThreads=" + numWriterThreads + "]";
+  }
+
+  /**
+   * Builder configures and creates an instance of {@link CagraIndexParams}.
+   */
+  public static class Builder {
+
+    private CuVSResources resources;
+    private CagraGraphBuildAlgo cuvsCagraGraphBuildAlgo = CagraGraphBuildAlgo.NN_DESCENT;
+    private CuvsDistanceType cuvsDistanceType = CuvsDistanceType.L2Expanded;
+    private int intermediateGraphDegree = 128;
+    private int graphDegree = 64;
+    private int nnDescentNumIterations = 20;
+    private int numWriterThreads = 2;
+
+    public Builder(CuVSResources resources) {
+      this.resources = resources;
+    }
+
+    /**
+     * Sets the degree of input graph for pruning.
+     *
+     * @param intermediateGraphDegree degree of input graph for pruning
+     * @return an instance of Builder
+     */
+    public Builder withIntermediateGraphDegree(int intermediateGraphDegree) {
+      this.intermediateGraphDegree = intermediateGraphDegree;
+      return this;
+    }
+
+    /**
+     * Sets the degree of output graph.
+     *
+     * @param graphDegree degree of output graph
+     * @return an instance to Builder
+     */
+    public Builder withGraphDegree(int graphDegree) {
+      this.graphDegree = graphDegree;
+      return this;
+    }
+
+    /**
+     * Sets the CuvsCagraGraphBuildAlgo to use.
+     *
+     * @param cuvsCagraGraphBuildAlgo the CuvsCagraGraphBuildAlgo to use
+     * @return an instance of Builder
+     */
+    public Builder withCagraGraphBuildAlgo(CagraGraphBuildAlgo cuvsCagraGraphBuildAlgo) {
+      this.cuvsCagraGraphBuildAlgo = cuvsCagraGraphBuildAlgo;
+      return this;
+    }
+
+    /**
+     * Sets the metric to use.
+     *
+     * @param cuvsDistanceType the {@link CuvsDistanceType} to use
+     * @return an instance of Builder
+     */
+    public Builder withMetric(CuvsDistanceType cuvsDistanceType) {
+      this.cuvsDistanceType = cuvsDistanceType;
+      return this;
+    }
+
+    /**
+     * Sets the Number of Iterations to run if building with
+     * {@link CagraGraphBuildAlgo#NN_DESCENT}.
+     *
+     * @param nnDescentNiter number of Iterations to run if building with
+     *                       {@link CagraGraphBuildAlgo#NN_DESCENT}
+     * @return an instance of Builder
+     */
+    public Builder withNNDescentNumIterations(int nnDescentNiter) {
+      this.nnDescentNumIterations = nnDescentNiter;
+      return this;
+    }
+
+    /**
+     * Sets the number of writer threads to use for indexing.
+     *
+     * @param numWriterThreads number of writer threads to use
+     * @return an instance of Builder
+     */
+    public Builder withNumWriterThreads(int numWriterThreads) {
+      this.numWriterThreads = numWriterThreads;
+      return this;
+    }
+
+    /**
+     * Builds an instance of {@link CagraIndexParams}.
+     *
+     * @return an instance of {@link CagraIndexParams}
+     */
+    public CagraIndexParams build() {
+      return new CagraIndexParams(resources, intermediateGraphDegree, graphDegree, cuvsCagraGraphBuildAlgo,
+          nnDescentNumIterations, numWriterThreads, cuvsDistanceType);
+    }
+  }
+}
diff --git a/java/cuvs-java/src/main/java/com/nvidia/cuvs/CagraQuery.java b/java/cuvs-java/src/main/java/com/nvidia/cuvs/CagraQuery.java
new file mode 100644
index 000000000..de2fc3f41
--- /dev/null
+++ b/java/cuvs-java/src/main/java/com/nvidia/cuvs/CagraQuery.java
@@ -0,0 +1,165 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.nvidia.cuvs;
+
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * CagraQuery holds the CagraSearchParams and the query vectors to be used while
+ * invoking search.
+ *
+ * @since 25.02
+ */
+public class CagraQuery {
+
+  private CagraSearchParams cagraSearchParameters;
+  private List<Integer> mapping;
+  private float[][] queryVectors;
+  private int topK;
+
+  /**
+   * Constructs an instance of {@link CagraQuery} using cagraSearchParameters,
+   * preFilter, queryVectors, mapping, and topK.
+   *
+   * @param cagraSearchParameters an instance of {@link CagraSearchParams} holding
+   *                              the search parameters
+   * @param queryVectors          2D float query vector array
+   * @param mapping               an instance of ID mapping
+   * @param topK                  the top k results to return
+   */
+  public CagraQuery(CagraSearchParams cagraSearchParameters, float[][] queryVectors, List<Integer> mapping, int topK) {
+    super();
+    this.cagraSearchParameters = cagraSearchParameters;
+    this.queryVectors = queryVectors;
+    this.mapping = mapping;
+    this.topK = topK;
+  }
+
+  /**
+   * Gets the instance of CagraSearchParams initially set.
+   *
+   * @return an instance CagraSearchParams
+   */
+  public CagraSearchParams getCagraSearchParameters() {
+    return cagraSearchParameters;
+  }
+
+  /**
+   * Gets the query vector 2D float array.
+   *
+   * @return 2D float array
+   */
+  public float[][] getQueryVectors() {
+    return queryVectors;
+  }
+
+  /**
+   * Gets the passed map instance.
+   *
+   * @return a map of ID mappings
+   */
+  public List<Integer> getMapping() {
+    return mapping;
+  }
+
+  /**
+   * Gets the topK value.
+   *
+   * @return the topK value
+   */
+  public int getTopK() {
+    return topK;
+  }
+
+  @Override
+  public String toString() {
+    return "CuVSQuery [cagraSearchParameters=" + cagraSearchParameters + ", queryVectors="
+        + Arrays.toString(queryVectors) + ", mapping=" + mapping + ", topK=" + topK + "]";
+  }
+
+  /**
+   * Builder helps configure and create an instance of CagraQuery.
+   */
+  public static class Builder {
+
+    private CagraSearchParams cagraSearchParams;
+    private float[][] queryVectors;
+    private List<Integer> mapping;
+    private int topK = 2;
+
+    /**
+     * Default constructor.
+     */
+    public Builder() {
+    }
+
+    /**
+     * Sets the instance of configured CagraSearchParams to be passed for search.
+     *
+     * @param cagraSearchParams an instance of the configured CagraSearchParams to
+     *                          be used for this query
+     * @return an instance of this Builder
+     */
+    public Builder withSearchParams(CagraSearchParams cagraSearchParams) {
+      this.cagraSearchParams = cagraSearchParams;
+      return this;
+    }
+
+    /**
+     * Registers the query vectors to be passed in the search call.
+     *
+     * @param queryVectors 2D float query vector array
+     * @return an instance of this Builder
+     */
+    public Builder withQueryVectors(float[][] queryVectors) {
+      this.queryVectors = queryVectors;
+      return this;
+    }
+
+    /**
+     * Sets the instance of mapping to be used for ID mapping.
+     *
+     * @param mapping the ID mapping instance
+     * @return an instance of this Builder
+     */
+    public Builder withMapping(List<Integer> mapping) {
+      this.mapping = mapping;
+      return this;
+    }
+
+    /**
+     * Registers the topK value.
+     *
+     * @param topK the topK value used to retrieve the topK results
+     * @return an instance of this Builder
+     */
+    public Builder withTopK(int topK) {
+      this.topK = topK;
+      return this;
+    }
+
+    /**
+     * Builds an instance of CuVSQuery.
+     *
+     * @return an instance of CuVSQuery
+     */
+    public CagraQuery build() {
+      return new CagraQuery(cagraSearchParams, queryVectors, mapping, topK);
+    }
+  }
+}
diff --git a/java/cuvs-java/src/main/java/com/nvidia/cuvs/CagraSearchParams.java b/java/cuvs-java/src/main/java/com/nvidia/cuvs/CagraSearchParams.java
new file mode 100644
index 000000000..54dbb548e
--- /dev/null
+++ b/java/cuvs-java/src/main/java/com/nvidia/cuvs/CagraSearchParams.java
@@ -0,0 +1,505 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.nvidia.cuvs;
+
+import java.lang.foreign.MemorySegment;
+
+import com.nvidia.cuvs.panama.CuVSCagraSearchParams;
+
+/**
+ * CagraSearchParams encapsulates the logic for configuring and holding search
+ * parameters.
+ *
+ * @since 25.02
+ */
+public class CagraSearchParams {
+
+  private CuVSResources resources;
+  private int maxQueries;
+  private int iTopKSize;
+  private int maxIterations;
+  private int teamSize;
+  private int searchWidth;
+  private int minIterations;
+  private int threadBlockSize;
+  private int hashmapMinBitlen;
+  private int numRandomSamplings;
+  private float hashMapMaxFillRate;
+  private long randXORMask;
+  private MemorySegment memorySegment;
+  private SearchAlgo searchAlgo;
+  private HashMapMode hashMapMode;
+
+  /**
+   * Enum to denote algorithm used to search CAGRA Index.
+   */
+  public enum SearchAlgo {
+    /**
+     * for large batch sizes
+     */
+    SINGLE_CTA(0),
+    /**
+     * for small batch sizes
+     */
+    MULTI_CTA(1),
+    /**
+     * MULTI_KERNEL
+     */
+    MULTI_KERNEL(2),
+    /**
+     * AUTO
+     */
+    AUTO(3);
+
+    /**
+     * The value for the enum choice.
+     */
+    public final int value;
+
+    private SearchAlgo(int value) {
+      this.value = value;
+    }
+  }
+
+  /**
+   * Enum to denote Hash Mode used while searching CAGRA index.
+   */
+  public enum HashMapMode {
+    /**
+     * HASH
+     */
+    HASH(0),
+    /**
+     * SMALL
+     */
+    SMALL(1),
+    /**
+     * AUTO_HASH
+     */
+    AUTO_HASH(2);
+
+    /**
+     * The value for the enum choice.
+     */
+    public final int value;
+
+    private HashMapMode(int value) {
+      this.value = value;
+    }
+  }
+
+  /**
+   * Constructs an instance of CagraSearchParams with passed search parameters.
+   *
+   * @param resources          the resources instance to use
+   * @param maxQueries         the maximum number of queries to search at the same
+   *                           time (batch size)
+   * @param iTopKSize          the number of intermediate search results retained
+   *                           during the search
+   * @param maxIterations      the upper limit of search iterations
+   * @param searchAlgo         the search implementation is configured
+   * @param teamSize           the number of threads used to calculate a single
+   *                           distance
+   * @param searchWidth        the number of graph nodes to select as the starting
+   *                           point for the search in each iteration
+   * @param minIterations      the lower limit of search iterations
+   * @param threadBlockSize    the thread block size
+   * @param hashmapMode        the hash map type configured
+   * @param hashmapMinBitlen   the lower limit of hash map bit length
+   * @param hashmapMaxFillRate the upper limit of hash map fill rate
+   * @param numRandomSamplings the number of iterations of initial random seed
+   *                           node selection
+   * @param randXORMask        the bit mask used for initial random seed node
+   *                           selection
+   */
+  private CagraSearchParams(CuVSResources resources, int maxQueries, int iTopKSize, int maxIterations,
+      SearchAlgo searchAlgo, int teamSize, int searchWidth, int minIterations, int threadBlockSize,
+      HashMapMode hashmapMode, int hashmapMinBitlen, float hashmapMaxFillRate, int numRandomSamplings,
+      long randXORMask) {
+    this.maxQueries = maxQueries;
+    this.iTopKSize = iTopKSize;
+    this.maxIterations = maxIterations;
+    this.searchAlgo = searchAlgo;
+    this.teamSize = teamSize;
+    this.searchWidth = searchWidth;
+    this.minIterations = minIterations;
+    this.threadBlockSize = threadBlockSize;
+    this.hashMapMode = hashmapMode;
+    this.hashmapMinBitlen = hashmapMinBitlen;
+    this.hashMapMaxFillRate = hashmapMaxFillRate;
+    this.numRandomSamplings = numRandomSamplings;
+    this.randXORMask = randXORMask;
+    this.resources = resources;
+
+    this.memorySegment = allocateMemorySegment();
+  }
+
+  /**
+   * Allocates the configured search parameters in the MemorySegment.
+   */
+  private MemorySegment allocateMemorySegment() {
+    MemorySegment memorySegment = CuVSCagraSearchParams.allocate(resources.arena);
+    CuVSCagraSearchParams.max_queries(memorySegment, maxQueries);
+    CuVSCagraSearchParams.itopk_size(memorySegment, iTopKSize);
+    CuVSCagraSearchParams.max_iterations(memorySegment, maxIterations);
+    if (searchAlgo != null) {
+      CuVSCagraSearchParams.algo(memorySegment, searchAlgo.value);
+    }
+    CuVSCagraSearchParams.team_size(memorySegment, teamSize);
+    CuVSCagraSearchParams.search_width(memorySegment, searchWidth);
+    CuVSCagraSearchParams.min_iterations(memorySegment, minIterations);
+    CuVSCagraSearchParams.thread_block_size(memorySegment, threadBlockSize);
+    if (hashMapMode != null) {
+      CuVSCagraSearchParams.hashmap_mode(memorySegment, hashMapMode.value);
+    }
+    CuVSCagraSearchParams.hashmap_min_bitlen(memorySegment, hashmapMinBitlen);
+    CuVSCagraSearchParams.hashmap_max_fill_rate(memorySegment, hashMapMaxFillRate);
+    CuVSCagraSearchParams.num_random_samplings(memorySegment, numRandomSamplings);
+    CuVSCagraSearchParams.rand_xor_mask(memorySegment, randXORMask);
+    return memorySegment;
+  }
+
+  /**
+   * Gets the maximum number of queries to search at the same time (batch size).
+   *
+   * @return the maximum number of queries
+   */
+  public int getMaxQueries() {
+    return maxQueries;
+  }
+
+  /**
+   * Gets the number of intermediate search results retained during the search.
+   *
+   * @return the number of intermediate search results
+   */
+  public int getITopKSize() {
+    return iTopKSize;
+  }
+
+  /**
+   * Gets the upper limit of search iterations.
+   *
+   * @return the upper limit value
+   */
+  public int getMaxIterations() {
+    return maxIterations;
+  }
+
+  /**
+   * Gets the number of threads used to calculate a single distance.
+   *
+   * @return the number of threads configured
+   */
+  public int getTeamSize() {
+    return teamSize;
+  }
+
+  /**
+   * Gets the number of graph nodes to select as the starting point for the search
+   * in each iteration.
+   *
+   * @return the number of graph nodes
+   */
+  public int getSearchWidth() {
+    return searchWidth;
+  }
+
+  /**
+   * Gets the lower limit of search iterations.
+   *
+   * @return the lower limit value
+   */
+  public int getMinIterations() {
+    return minIterations;
+  }
+
+  /**
+   * Gets the thread block size.
+   *
+   * @return the thread block size
+   */
+  public int getThreadBlockSize() {
+    return threadBlockSize;
+  }
+
+  /**
+   * Gets the lower limit of hash map bit length.
+   *
+   * @return the lower limit value
+   */
+  public int getHashmapMinBitlen() {
+    return hashmapMinBitlen;
+  }
+
+  /**
+   * Gets the number of iterations of initial random seed node selection.
+   *
+   * @return the number of iterations
+   */
+  public int getNumRandomSamplings() {
+    return numRandomSamplings;
+  }
+
+  /**
+   * Gets the upper limit of hash map fill rate.
+   *
+   * @return the upper limit of hash map fill rate
+   */
+  public float getHashMapMaxFillRate() {
+    return hashMapMaxFillRate;
+  }
+
+  /**
+   * Gets the bit mask used for initial random seed node selection.
+   *
+   * @return the bit mask value
+   */
+  public long getRandXORMask() {
+    return randXORMask;
+  }
+
+  /**
+   * Gets the MemorySegment holding CagraSearchParams.
+   *
+   * @return the MemorySegment holding CagraSearchParams
+   */
+  protected MemorySegment getMemorySegment() {
+    return memorySegment;
+  }
+
+  /**
+   * Gets which search implementation is configured.
+   *
+   * @return the configured {@link SearchAlgo}
+   */
+  public SearchAlgo getCagraSearchAlgo() {
+    return searchAlgo;
+  }
+
+  /**
+   * Gets the hash map mode configured.
+   *
+   * @return the configured {@link HashMapMode}
+   */
+  public HashMapMode getHashMapMode() {
+    return hashMapMode;
+  }
+
+  @Override
+  public String toString() {
+    return "CagraSearchParams [resources=" + resources + ", maxQueries=" + maxQueries + ", itopkSize=" + iTopKSize
+        + ", maxIterations=" + maxIterations + ", cuvsCagraSearchAlgo=" + searchAlgo + ", teamSize=" + teamSize
+        + ", searchWidth=" + searchWidth + ", minIterations=" + minIterations + ", threadBlockSize=" + threadBlockSize
+        + ", hashMapMode=" + hashMapMode + ", hashMapMinBitlen=" + hashmapMinBitlen + ", hashMapMaxFillRate="
+        + hashMapMaxFillRate + ", numRandomSamplings=" + numRandomSamplings + ", randXORMask=" + randXORMask
+        + ", memorySegment=" + memorySegment + "]";
+  }
+
+  /**
+   * Builder configures and creates an instance of CagraSearchParams.
+   */
+  public static class Builder {
+
+    private CuVSResources resources;
+    private int maxQueries;
+    private int iTopKSize = 64;
+    private int maxIterations;
+    private int teamSize;
+    private int searchWidth = 1;
+    private int minIterations;
+    private int threadBlockSize;
+    private int hashMapMinBitlen;
+    private int numRandomSamplings = 1;
+    private float hashMapMaxFillRate = 0.5f;
+    private long randXORMask = 0x128394;
+    private SearchAlgo searchAlgo;
+    private HashMapMode hashMapMode;
+
+    /**
+     * Constructs this Builder with an instance of Arena.
+     *
+     * @param resources the {@link CuVSResources} instance to use
+     */
+    public Builder(CuVSResources resources) {
+      this.resources = resources;
+    }
+
+    /**
+     * Sets the maximum number of queries to search at the same time (batch size).
+     * Auto select when 0.
+     *
+     * @param maxQueries the maximum number of queries
+     * @return an instance of this Builder
+     */
+    public Builder withMaxQueries(int maxQueries) {
+      this.maxQueries = maxQueries;
+      return this;
+    }
+
+    /**
+     * Sets the number of intermediate search results retained during the search.
+     * This is the main knob to adjust trade off between accuracy and search speed.
+     * Higher values improve the search accuracy.
+     *
+     * @param iTopKSize the number of intermediate search results
+     * @return an instance of this Builder
+     */
+    public Builder withItopkSize(int iTopKSize) {
+      this.iTopKSize = iTopKSize;
+      return this;
+    }
+
+    /**
+     * Sets the upper limit of search iterations. Auto select when 0.
+     *
+     * @param maxIterations the upper limit of search iterations
+     * @return an instance of this Builder
+     */
+    public Builder withMaxIterations(int maxIterations) {
+      this.maxIterations = maxIterations;
+      return this;
+    }
+
+    /**
+     * Sets which search implementation to use.
+     *
+     * @param cuvsCagraSearchAlgo the {@link SearchAlgo} to use
+     * @return an instance of this Builder
+     */
+    public Builder withAlgo(SearchAlgo cuvsCagraSearchAlgo) {
+      this.searchAlgo = cuvsCagraSearchAlgo;
+      return this;
+    }
+
+    /**
+     * Sets the number of threads used to calculate a single distance. 4, 8, 16, or
+     * 32.
+     *
+     * @param teamSize the number of threads used to calculate a single distance
+     * @return an instance of this Builder
+     */
+    public Builder withTeamSize(int teamSize) {
+      this.teamSize = teamSize;
+      return this;
+    }
+
+    /**
+     * Sets the number of graph nodes to select as the starting point for the search
+     * in each iteration.
+     *
+     * @param searchWidth the number of graph nodes to select
+     * @return an instance of this Builder
+     */
+    public Builder withSearchWidth(int searchWidth) {
+      this.searchWidth = searchWidth;
+      return this;
+    }
+
+    /**
+     * Sets the lower limit of search iterations.
+     *
+     * @param minIterations the lower limit of search iterations
+     * @return an instance of this Builder
+     */
+    public Builder withMinIterations(int minIterations) {
+      this.minIterations = minIterations;
+      return this;
+    }
+
+    /**
+     * Sets the thread block size. 0, 64, 128, 256, 512, 1024. Auto selection when
+     * 0.
+     *
+     * @param threadBlockSize the thread block size
+     * @return an instance of this Builder
+     */
+    public Builder withThreadBlockSize(int threadBlockSize) {
+      this.threadBlockSize = threadBlockSize;
+      return this;
+    }
+
+    /**
+     * Sets the hash map type. Auto selection when AUTO.
+     *
+     * @param hashMapMode the {@link HashMapMode}
+     * @return an instance of this Builder
+     */
+    public Builder withHashMapMode(HashMapMode hashMapMode) {
+      this.hashMapMode = hashMapMode;
+      return this;
+    }
+
+    /**
+     * Sets the lower limit of hash map bit length. More than 8.
+     *
+     * @param hashMapMinBitlen the lower limit of hash map bit length
+     * @return an instance of this Builder
+     */
+    public Builder withHashMapMinBitlen(int hashMapMinBitlen) {
+      this.hashMapMinBitlen = hashMapMinBitlen;
+      return this;
+    }
+
+    /**
+     * Sets the upper limit of hash map fill rate. More than 0.1, less than 0.9.
+     *
+     * @param hashMapMaxFillRate the upper limit of hash map fill rate
+     * @return an instance of this Builder
+     */
+    public Builder withHashMapMaxFillRate(float hashMapMaxFillRate) {
+      this.hashMapMaxFillRate = hashMapMaxFillRate;
+      return this;
+    }
+
+    /**
+     * Sets the number of iterations of initial random seed node selection. 1 or
+     * more.
+     *
+     * @param numRandomSamplings the number of iterations of initial random seed
+     *                           node selection
+     * @return an instance of this Builder
+     */
+    public Builder withNumRandomSamplings(int numRandomSamplings) {
+      this.numRandomSamplings = numRandomSamplings;
+      return this;
+    }
+
+    /**
+     * Sets the bit mask used for initial random seed node selection.
+     *
+     * @param randXORMask the bit mask used for initial random seed node selection
+     * @return an instance of this Builder
+     */
+    public Builder withRandXorMask(long randXORMask) {
+      this.randXORMask = randXORMask;
+      return this;
+    }
+
+    /**
+     * Builds an instance of {@link CagraSearchParams} with passed search
+     * parameters.
+     *
+     * @return an instance of CagraSearchParams
+     */
+    public CagraSearchParams build() {
+      return new CagraSearchParams(resources, maxQueries, iTopKSize, maxIterations, searchAlgo, teamSize, searchWidth,
+          minIterations, threadBlockSize, hashMapMode, hashMapMinBitlen, hashMapMaxFillRate, numRandomSamplings,
+          randXORMask);
+    }
+  }
+}
diff --git a/java/cuvs-java/src/main/java/com/nvidia/cuvs/CagraSearchResults.java b/java/cuvs-java/src/main/java/com/nvidia/cuvs/CagraSearchResults.java
new file mode 100644
index 000000000..3473facce
--- /dev/null
+++ b/java/cuvs-java/src/main/java/com/nvidia/cuvs/CagraSearchResults.java
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.nvidia.cuvs;
+
+import java.lang.foreign.MemorySegment;
+import java.lang.foreign.SequenceLayout;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+
+import com.nvidia.cuvs.common.SearchResults;
+
+/**
+ * SearchResult encapsulates the logic for reading and holding search results.
+ *
+ * @since 25.02
+ */
+public class CagraSearchResults extends SearchResults {
+
+  protected CagraSearchResults(SequenceLayout neighboursSequenceLayout, SequenceLayout distancesSequenceLayout,
+      MemorySegment neighboursMemorySegment, MemorySegment distancesMemorySegment, int topK, List<Integer> mapping,
+      long numberOfQueries) {
+    super(neighboursSequenceLayout, distancesSequenceLayout, neighboursMemorySegment, distancesMemorySegment, topK,
+        mapping, numberOfQueries);
+    readResultMemorySegments();
+  }
+
+  /**
+   * Reads neighbors and distances {@link MemorySegment} and loads the values
+   * internally
+   */
+  protected void readResultMemorySegments() {
+    Map<Integer, Float> intermediateResultMap = new LinkedHashMap<Integer, Float>();
+    int count = 0;
+    for (long i = 0; i < topK * numberOfQueries; i++) {
+      int id = (int) neighboursVarHandle.get(neighboursMemorySegment, 0L, i);
+      float dst = (float) distancesVarHandle.get(distancesMemorySegment, 0L, i);
+      intermediateResultMap.put(mapping != null ? mapping.get(id) : id, dst);
+      count += 1;
+      if (count == topK) {
+        results.add(intermediateResultMap);
+        intermediateResultMap = new LinkedHashMap<Integer, Float>();
+        count = 0;
+      }
+    }
+  }
+}
diff --git a/java/cuvs-java/src/main/java/com/nvidia/cuvs/CuVSResources.java b/java/cuvs-java/src/main/java/com/nvidia/cuvs/CuVSResources.java
new file mode 100644
index 000000000..dbaba3ebb
--- /dev/null
+++ b/java/cuvs-java/src/main/java/com/nvidia/cuvs/CuVSResources.java
@@ -0,0 +1,152 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.nvidia.cuvs;
+
+import java.io.File;
+import java.lang.foreign.Arena;
+import java.lang.foreign.FunctionDescriptor;
+import java.lang.foreign.Linker;
+import java.lang.foreign.MemoryLayout;
+import java.lang.foreign.MemorySegment;
+import java.lang.foreign.SymbolLookup;
+import java.lang.foreign.ValueLayout;
+import java.lang.invoke.MethodHandle;
+
+import com.nvidia.cuvs.common.Util;
+
+/**
+ * Used for allocating resources for cuVS
+ *
+ * @since 25.02
+ */
+public class CuVSResources implements AutoCloseable {
+
+  public final Arena arena;
+  public final Linker linker;
+  public final SymbolLookup symbolLookup;
+  protected File nativeLibrary;
+  private final MethodHandle createResourcesMethodHandle;
+  private final MethodHandle destroyResourcesMethodHandle;
+  private MemorySegment resourcesMemorySegment;
+  private MemoryLayout intMemoryLayout;
+
+  /**
+   * Constructor that allocates the resources needed for cuVS
+   *
+   * @throws Throwable exception thrown when native function is invoked
+   */
+  public CuVSResources() throws Throwable {
+    linker = Linker.nativeLinker();
+    arena = Arena.ofShared();
+
+    nativeLibrary = Util.loadNativeLibrary();
+    symbolLookup = SymbolLookup.libraryLookup(nativeLibrary.getAbsolutePath(), arena);
+    intMemoryLayout = linker.canonicalLayouts().get("int");
+
+    createResourcesMethodHandle = linker.downcallHandle(symbolLookup.find("create_resources").get(),
+        FunctionDescriptor.of(ValueLayout.ADDRESS, ValueLayout.ADDRESS));
+
+    destroyResourcesMethodHandle = linker.downcallHandle(symbolLookup.find("destroy_resources").get(),
+        FunctionDescriptor.ofVoid(ValueLayout.ADDRESS, ValueLayout.ADDRESS));
+
+    createResources();
+  }
+
+  /**
+   * Creates the resources used internally and returns its reference.
+   *
+   * @throws Throwable exception thrown when native function is invoked
+   */
+  public void createResources() throws Throwable {
+    MemoryLayout returnValueMemoryLayout = intMemoryLayout;
+    MemorySegment returnValueMemorySegment = arena.allocate(returnValueMemoryLayout);
+    resourcesMemorySegment = (MemorySegment) createResourcesMethodHandle.invokeExact(returnValueMemorySegment);
+  }
+
+  @Override
+  public void close() {
+    MemoryLayout returnValueMemoryLayout = intMemoryLayout;
+    MemorySegment returnValueMemorySegment = arena.allocate(returnValueMemoryLayout);
+    try {
+      destroyResourcesMethodHandle.invokeExact(resourcesMemorySegment, returnValueMemorySegment);
+    } catch (Throwable e) {
+      e.printStackTrace();
+    }
+    if (!arena.scope().isAlive()) {
+      arena.close();
+    }
+    nativeLibrary.delete();
+  }
+
+  /**
+   * Gets the reference to the cuvsResources MemorySegment.
+   *
+   * @return cuvsResources MemorySegment
+   */
+  protected MemorySegment getMemorySegment() {
+    return resourcesMemorySegment;
+  }
+
+  /**
+   * Returns the loaded libcuvs_java_cagra.so as a {@link SymbolLookup}
+   */
+  protected SymbolLookup getSymbolLookup() {
+    return symbolLookup;
+  }
+
+  /**
+   * Container for GPU information
+   */
+  public class GPUInfo {
+
+    private final int gpuId;
+    private final long freeMemory;
+    private final long totalMemory;
+    private final float computeCapability;
+
+    public GPUInfo(int gpuId, long freeMemory, long totalMemory, float computeCapability) {
+      super();
+      this.gpuId = gpuId;
+      this.freeMemory = freeMemory;
+      this.totalMemory = totalMemory;
+      this.computeCapability = computeCapability;
+    }
+
+    public int getGpuId() {
+      return gpuId;
+    }
+
+    public long getFreeMemory() {
+      return freeMemory;
+    }
+
+    public long getTotalMemory() {
+      return totalMemory;
+    }
+
+    public float getComputeCapability() {
+      return computeCapability;
+    }
+
+    @Override
+    public String toString() {
+      return "GPUInfo [gpuId=" + gpuId + ", freeMemory=" + freeMemory + ", totalMemory=" + totalMemory
+          + ", computeCapability=" + computeCapability + "]";
+    }
+
+  }
+}
diff --git a/java/cuvs-java/src/main/java/com/nvidia/cuvs/GPUInfo.java b/java/cuvs-java/src/main/java/com/nvidia/cuvs/GPUInfo.java
new file mode 100644
index 000000000..48cade179
--- /dev/null
+++ b/java/cuvs-java/src/main/java/com/nvidia/cuvs/GPUInfo.java
@@ -0,0 +1,56 @@
+package com.nvidia.cuvs;
+
+/**
+ * Contains GPU information
+ */
+public class GPUInfo {
+  private final int gpuId;
+  private final String name;
+  private final long freeMemory;
+  private final long totalMemory;
+  private final float computeCapability;
+
+  /**
+   * Constructor for GPUInfo
+   *
+   * @param gpuId             id of the GPU starting from 0
+   * @param name              ASCII string identifying device
+   * @param freeMemory        returned free memory in bytes
+   * @param totalMemory       returned total memory in bytes
+   * @param computeCapability the compute capability of the device
+   */
+  public GPUInfo(int gpuId, String name, long freeMemory, long totalMemory, float computeCapability) {
+    super();
+    this.gpuId = gpuId;
+    this.name = name;
+    this.freeMemory = freeMemory;
+    this.totalMemory = totalMemory;
+    this.computeCapability = computeCapability;
+  }
+
+  public int getGpuId() {
+    return gpuId;
+  }
+
+  public String getName() {
+    return name;
+  }
+
+  public long getFreeMemory() {
+    return freeMemory;
+  }
+
+  public long getTotalMemory() {
+    return totalMemory;
+  }
+
+  public float getComputeCapability() {
+    return computeCapability;
+  }
+
+  @Override
+  public String toString() {
+    return "GPUInfo [gpuId=" + gpuId + ", name=" + name + ", freeMemory=" + freeMemory + ", totalMemory=" + totalMemory
+        + ", computeCapability=" + computeCapability + "]";
+  }
+}
diff --git a/java/cuvs-java/src/main/java/com/nvidia/cuvs/HnswIndex.java b/java/cuvs-java/src/main/java/com/nvidia/cuvs/HnswIndex.java
new file mode 100644
index 000000000..d55308a1b
--- /dev/null
+++ b/java/cuvs-java/src/main/java/com/nvidia/cuvs/HnswIndex.java
@@ -0,0 +1,263 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.nvidia.cuvs;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.lang.foreign.FunctionDescriptor;
+import java.lang.foreign.MemoryLayout;
+import java.lang.foreign.MemorySegment;
+import java.lang.foreign.SequenceLayout;
+import java.lang.foreign.ValueLayout;
+import java.lang.invoke.MethodHandle;
+import java.lang.invoke.MethodHandles;
+import java.util.UUID;
+
+import com.nvidia.cuvs.common.Util;
+import com.nvidia.cuvs.panama.CuVSHnswIndex;
+
+/**
+ * {@link HnswIndex} encapsulates a HNSW index, along with methods to interact
+ * with it.
+ *
+ * @since 25.02
+ */
+public class HnswIndex {
+
+  private final CuVSResources resources;
+  private MethodHandle deserializeHnswIndexMethodHandle;
+  private MethodHandle searchHnswIndexMethodHandle;
+  private MethodHandle destroyHnswIndexMethodHandle;
+  private HnswIndexParams hnswIndexParams;
+  private IndexReference hnswIndexReference;
+  private MemoryLayout longMemoryLayout;
+  private MemoryLayout intMemoryLayout;
+  private MemoryLayout floatMemoryLayout;
+
+  /**
+   * Constructor for loading the index from an {@link InputStream}
+   *
+   * @param inputStream an instance of stream to read the index bytes from
+   * @param resources   an instance of {@link CuVSResources}
+   */
+  private HnswIndex(InputStream inputStream, CuVSResources resources, HnswIndexParams hnswIndexParams)
+      throws Throwable {
+    this.hnswIndexParams = hnswIndexParams;
+    this.resources = resources;
+
+    longMemoryLayout = resources.linker.canonicalLayouts().get("long");
+    intMemoryLayout = resources.linker.canonicalLayouts().get("int");
+    floatMemoryLayout = resources.linker.canonicalLayouts().get("float");
+
+    initializeMethodHandles();
+    this.hnswIndexReference = deserialize(inputStream);
+  }
+
+  /**
+   * Initializes the {@link MethodHandles} for invoking native methods.
+   *
+   * @throws IOException @{@link IOException} is unable to load the native library
+   */
+  private void initializeMethodHandles() throws IOException {
+    deserializeHnswIndexMethodHandle = resources.linker.downcallHandle(
+        resources.getSymbolLookup().find("deserialize_hnsw_index").get(), FunctionDescriptor.of(ValueLayout.ADDRESS,
+            ValueLayout.ADDRESS, ValueLayout.ADDRESS, ValueLayout.ADDRESS, ValueLayout.ADDRESS, intMemoryLayout));
+
+    searchHnswIndexMethodHandle = resources.linker.downcallHandle(
+        resources.getSymbolLookup().find("search_hnsw_index").get(),
+        FunctionDescriptor.ofVoid(ValueLayout.ADDRESS, ValueLayout.ADDRESS, ValueLayout.ADDRESS, ValueLayout.ADDRESS,
+            ValueLayout.ADDRESS, ValueLayout.ADDRESS, ValueLayout.ADDRESS, intMemoryLayout, intMemoryLayout,
+            longMemoryLayout));
+
+    destroyHnswIndexMethodHandle = resources.linker.downcallHandle(
+        resources.getSymbolLookup().find("destroy_hnsw_index").get(),
+        FunctionDescriptor.ofVoid(ValueLayout.ADDRESS, ValueLayout.ADDRESS));
+  }
+
+  /**
+   * Invokes the native destroy_hnsw_index to de-allocate the HNSW index
+   */
+  public void destroyIndex() throws Throwable {
+    MemoryLayout returnValueMemoryLayout = intMemoryLayout;
+    MemorySegment returnValueMemorySegment = resources.arena.allocate(returnValueMemoryLayout);
+    destroyHnswIndexMethodHandle.invokeExact(hnswIndexReference.getMemorySegment(), returnValueMemorySegment);
+  }
+
+  /**
+   * Invokes the native search_hnsw_index via the Panama API for searching a HNSW
+   * index.
+   *
+   * @param query an instance of {@link HnswQuery} holding the query vectors and
+   *              other parameters
+   * @return an instance of {@link HnswSearchResults} containing the results
+   */
+  public HnswSearchResults search(HnswQuery query) throws Throwable {
+    long numQueries = query.getQueryVectors().length;
+    long numBlocks = query.getTopK() * numQueries;
+    int vectorDimension = numQueries > 0 ? query.getQueryVectors()[0].length : 0;
+
+    SequenceLayout neighborsSequenceLayout = MemoryLayout.sequenceLayout(numBlocks, longMemoryLayout);
+    SequenceLayout distancesSequenceLayout = MemoryLayout.sequenceLayout(numBlocks, floatMemoryLayout);
+    MemorySegment neighborsMemorySegment = resources.arena.allocate(neighborsSequenceLayout);
+    MemorySegment distancesMemorySegment = resources.arena.allocate(distancesSequenceLayout);
+    MemoryLayout returnValueMemoryLayout = intMemoryLayout;
+    MemorySegment returnValueMemorySegment = resources.arena.allocate(returnValueMemoryLayout);
+
+    searchHnswIndexMethodHandle.invokeExact(resources.getMemorySegment(), hnswIndexReference.getMemorySegment(),
+        query.getHnswSearchParams().getHnswSearchParamsMemorySegment(), returnValueMemorySegment,
+        neighborsMemorySegment, distancesMemorySegment,
+        Util.buildMemorySegment(resources.linker, resources.arena, query.getQueryVectors()), query.getTopK(),
+        vectorDimension, numQueries);
+
+    return new HnswSearchResults(neighborsSequenceLayout, distancesSequenceLayout, neighborsMemorySegment,
+        distancesMemorySegment, query.getTopK(), query.getMapping(), numQueries);
+  }
+
+  /**
+   * Gets an instance of {@link IndexReference} by deserializing a HNSW index
+   * using an {@link InputStream}.
+   *
+   * @param inputStream an instance of {@link InputStream}
+   * @return an instance of {@link IndexReference}.
+   */
+  private IndexReference deserialize(InputStream inputStream) throws Throwable {
+    return deserialize(inputStream, 1024);
+  }
+
+  /**
+   * Gets an instance of {@link IndexReference} by deserializing a HNSW index
+   * using an {@link InputStream}.
+   *
+   * @param inputStream  an instance of {@link InputStream}
+   * @param bufferLength the length of the buffer to use while reading the bytes
+   *                     from the stream. Default value is 1024.
+   * @return an instance of {@link IndexReference}.
+   */
+  private IndexReference deserialize(InputStream inputStream, int bufferLength) throws Throwable {
+    MemoryLayout returnValueMemoryLayout = intMemoryLayout;
+    MemorySegment returnValueMemorySegment = resources.arena.allocate(returnValueMemoryLayout);
+    String tmpIndexFile = "/tmp/" + UUID.randomUUID().toString() + ".hnsw";
+
+    File tempFile = new File(tmpIndexFile);
+    FileOutputStream fileOutputStream = new FileOutputStream(tempFile);
+    byte[] chunk = new byte[bufferLength];
+    int chunkLength = 0;
+    while ((chunkLength = inputStream.read(chunk)) != -1) {
+      fileOutputStream.write(chunk, 0, chunkLength);
+    }
+
+    IndexReference indexReference = new IndexReference((MemorySegment) deserializeHnswIndexMethodHandle.invokeExact(
+        resources.getMemorySegment(), Util.buildMemorySegment(resources.linker, resources.arena, tmpIndexFile),
+        hnswIndexParams.getHnswIndexParamsMemorySegment(), returnValueMemorySegment,
+        hnswIndexParams.getVectorDimension()));
+
+    inputStream.close();
+    fileOutputStream.close();
+    tempFile.delete();
+
+    return indexReference;
+  }
+
+  /**
+   * Builder helps configure and create an instance of {@link HnswIndex}.
+   */
+  public static class Builder {
+
+    private CuVSResources cuvsResources;
+    private InputStream inputStream;
+    private HnswIndexParams hnswIndexParams;
+
+    /**
+     * Constructs this Builder with an instance of {@link CuVSResources}.
+     *
+     * @param cuvsResources an instance of {@link CuVSResources}
+     */
+    public Builder(CuVSResources cuvsResources) {
+      this.cuvsResources = cuvsResources;
+    }
+
+    /**
+     * Sets an instance of InputStream typically used when index deserialization is
+     * needed.
+     *
+     * @param inputStream an instance of {@link InputStream}
+     * @return an instance of this Builder
+     */
+    public Builder from(InputStream inputStream) {
+      this.inputStream = inputStream;
+      return this;
+    }
+
+    /**
+     * Registers an instance of configured {@link HnswIndexParams} with this
+     * Builder.
+     *
+     * @param hnswIndexParameters An instance of HnswIndexParams.
+     * @return An instance of this Builder.
+     */
+    public Builder withIndexParams(HnswIndexParams hnswIndexParameters) {
+      this.hnswIndexParams = hnswIndexParameters;
+      return this;
+    }
+
+    /**
+     * Builds and returns an instance of CagraIndex.
+     *
+     * @return an instance of CagraIndex
+     */
+    public HnswIndex build() throws Throwable {
+      return new HnswIndex(inputStream, cuvsResources, hnswIndexParams);
+    }
+  }
+
+  /**
+   * Holds the memory reference to a HNSW index.
+   */
+  protected static class IndexReference {
+
+    private final MemorySegment memorySegment;
+
+    /**
+     * Constructs CagraIndexReference and allocate the MemorySegment.
+     */
+    protected IndexReference(CuVSResources resources) {
+      memorySegment = CuVSHnswIndex.allocate(resources.arena);
+    }
+
+    /**
+     * Constructs CagraIndexReference with an instance of MemorySegment passed as a
+     * parameter.
+     *
+     * @param indexMemorySegment the MemorySegment instance to use for containing
+     *                           index reference
+     */
+    protected IndexReference(MemorySegment indexMemorySegment) {
+      this.memorySegment = indexMemorySegment;
+    }
+
+    /**
+     * Gets the instance of index MemorySegment.
+     *
+     * @return index MemorySegment
+     */
+    protected MemorySegment getMemorySegment() {
+      return memorySegment;
+    }
+  }
+}
diff --git a/java/cuvs-java/src/main/java/com/nvidia/cuvs/HnswIndexParams.java b/java/cuvs-java/src/main/java/com/nvidia/cuvs/HnswIndexParams.java
new file mode 100644
index 000000000..ef06adf61
--- /dev/null
+++ b/java/cuvs-java/src/main/java/com/nvidia/cuvs/HnswIndexParams.java
@@ -0,0 +1,211 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.nvidia.cuvs;
+
+import java.lang.foreign.MemorySegment;
+
+import com.nvidia.cuvs.panama.CuVSHnswIndexParams;
+
+/**
+ * Supplemental parameters to build HNSW index.
+ *
+ * @since 25.02
+ */
+public class HnswIndexParams {
+
+  /**
+   * Hierarchy for HNSW index when converting from CAGRA index
+   *
+   * NOTE: When the value is `NONE`, the HNSW index is built as a base-layer-only
+   * index.
+   */
+  public enum CuvsHnswHierarchy {
+
+    /**
+     * Flat hierarchy, search is base-layer only
+     */
+    NONE(0),
+
+    /**
+     * Full hierarchy is built using the CPU
+     */
+    CPU(1);
+
+    /**
+     * The value for the enum choice.
+     */
+    public final int value;
+
+    private CuvsHnswHierarchy(int value) {
+      this.value = value;
+    }
+  };
+
+  private CuVSResources resources;
+  private MemorySegment memorySegment;
+  private CuvsHnswHierarchy hierarchy = CuvsHnswHierarchy.NONE;
+  private int efConstruction = 200;
+  private int numThreads = 2;
+  private int vectorDimension;
+
+  private HnswIndexParams(CuVSResources resources, CuvsHnswHierarchy hierarchy, int efConstruction, int numThreads,
+      int vectorDimension) {
+    this.resources = resources;
+    this.hierarchy = hierarchy;
+    this.efConstruction = efConstruction;
+    this.numThreads = numThreads;
+    this.vectorDimension = vectorDimension;
+    this.memorySegment = allocateMemorySegment();
+  }
+
+  /**
+   * Allocates the configured search parameters in the MemorySegment.
+   */
+  private MemorySegment allocateMemorySegment() {
+    MemorySegment memorySegment = CuVSHnswIndexParams.allocate(resources.arena);
+    CuVSHnswIndexParams.ef_construction(memorySegment, efConstruction);
+    CuVSHnswIndexParams.num_threads(memorySegment, numThreads);
+    return memorySegment;
+  }
+
+  public MemorySegment getHnswIndexParamsMemorySegment() {
+    return memorySegment;
+  }
+
+  /**
+   *
+   * @return
+   */
+  public CuvsHnswHierarchy getHierarchy() {
+    return hierarchy;
+  }
+
+  /**
+   *
+   * @return
+   */
+  public int getEfConstruction() {
+    return efConstruction;
+  }
+
+  /**
+   *
+   * @return
+   */
+  public int getNumThreads() {
+    return numThreads;
+  }
+
+  /**
+   *
+   * @return
+   */
+  public int getVectorDimension() {
+    return vectorDimension;
+  }
+
+  public CuVSResources getResources() {
+    return resources;
+  }
+
+  @Override
+  public String toString() {
+    return "HnswIndexParams [hierarchy=" + hierarchy + ", efConstruction=" + efConstruction + ", numThreads="
+        + numThreads + ", vectorDimension=" + vectorDimension + "]";
+  }
+
+  /**
+   * Builder configures and creates an instance of {@link HnswIndexParams}.
+   */
+  public static class Builder {
+
+    private CuVSResources resources;
+    private CuvsHnswHierarchy hierarchy = CuvsHnswHierarchy.NONE;
+    private int efConstruction = 200;
+    private int numThreads = 2;
+    private int vectorDimension;
+
+    /**
+     * Constructs this Builder with an instance of Arena.
+     *
+     * @param resources the {@link CuVSResources} instance to use
+     */
+    public Builder(CuVSResources resources) {
+      this.resources = resources;
+    }
+
+    /**
+     * Sets the hierarchy for HNSW index when converting from CAGRA index.
+     *
+     * NOTE: When the value is `NONE`, the HNSW index is built as a base-layer-only
+     * index.
+     *
+     * @param hierarchy the hierarchy for HNSW index when converting from CAGRA
+     *                  index
+     * @return an instance of Builder
+     */
+    public Builder withHierarchy(CuvsHnswHierarchy hierarchy) {
+      this.hierarchy = hierarchy;
+      return this;
+    }
+
+    /**
+     * Sets the size of the candidate list during hierarchy construction when
+     * hierarchy is `CPU`.
+     *
+     * @param efConstruction the size of the candidate list during hierarchy
+     *                       construction when hierarchy is `CPU`
+     * @return an instance of Builder
+     */
+    public Builder withEfConstruction(int efConstruction) {
+      this.efConstruction = efConstruction;
+      return this;
+    }
+
+    /**
+     * Sets the number of host threads to use to construct hierarchy when hierarchy
+     * is `CPU`.
+     *
+     * @param numThreads the number of threads
+     * @return an instance of Builder
+     */
+    public Builder withNumThreads(int numThreads) {
+      this.numThreads = numThreads;
+      return this;
+    }
+
+    /**
+     * Sets the vector dimension
+     *
+     * @param vectorDimension the vector dimension
+     * @return an instance of Builder
+     */
+    public Builder withVectorDimension(int vectorDimension) {
+      this.vectorDimension = vectorDimension;
+      return this;
+    }
+
+    /**
+     * Builds an instance of {@link HnswIndexParams}.
+     *
+     * @return an instance of {@link HnswIndexParams}
+     */
+    public HnswIndexParams build() {
+      return new HnswIndexParams(resources, hierarchy, efConstruction, numThreads, vectorDimension);
+    }
+  }
+}
diff --git a/java/cuvs-java/src/main/java/com/nvidia/cuvs/HnswQuery.java b/java/cuvs-java/src/main/java/com/nvidia/cuvs/HnswQuery.java
new file mode 100644
index 000000000..2575bbde4
--- /dev/null
+++ b/java/cuvs-java/src/main/java/com/nvidia/cuvs/HnswQuery.java
@@ -0,0 +1,156 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.nvidia.cuvs;
+
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * HnswQuery holds the query vectors to be used while invoking search on the
+ * HNSW index.
+ *
+ * @since 25.02
+ */
+public class HnswQuery {
+
+  private HnswSearchParams hnswSearchParams;
+  private List<Integer> mapping;
+  private float[][] queryVectors;
+  private int topK;
+
+  /**
+   * Constructs an instance of {@link HnswQuery} using queryVectors, mapping, and
+   * topK.
+   *
+   * @param hnswSearchParams the search parameters to use
+   * @param queryVectors     2D float query vector array
+   * @param mapping          an instance of ID mapping
+   * @param topK             the top k results to return
+   */
+  private HnswQuery(HnswSearchParams hnswSearchParams, float[][] queryVectors, List<Integer> mapping, int topK) {
+    this.hnswSearchParams = hnswSearchParams;
+    this.queryVectors = queryVectors;
+    this.mapping = mapping;
+    this.topK = topK;
+  }
+
+  /**
+   * Gets the instance of HnswSearchParams.
+   *
+   * @return the instance of {@link HnswSearchParams}
+   */
+  public HnswSearchParams getHnswSearchParams() {
+    return hnswSearchParams;
+  }
+
+  /**
+   * Gets the query vector 2D float array.
+   *
+   * @return 2D float array
+   */
+  public float[][] getQueryVectors() {
+    return queryVectors;
+  }
+
+  /**
+   * Gets the passed map instance.
+   *
+   * @return a map of ID mappings
+   */
+  public List<Integer> getMapping() {
+    return mapping;
+  }
+
+  /**
+   * Gets the topK value.
+   *
+   * @return an integer
+   */
+  public int getTopK() {
+    return topK;
+  }
+
+  @Override
+  public String toString() {
+    return "HnswQuery [mapping=" + mapping + ", queryVectors=" + Arrays.toString(queryVectors) + ", topK=" + topK + "]";
+  }
+
+  /**
+   * Builder helps configure and create an instance of BruteForceQuery.
+   */
+  public static class Builder {
+
+    private HnswSearchParams hnswSearchParams;
+    private float[][] queryVectors;
+    private List<Integer> mapping;
+    private int topK = 2;
+
+    /**
+     * Sets the instance of configured HnswSearchParams to be passed for search.
+     *
+     * @param hnswSearchParams an instance of the configured HnswSearchParams to be
+     *                         used for this query
+     * @return an instance of this Builder
+     */
+    public Builder withSearchParams(HnswSearchParams hnswSearchParams) {
+      this.hnswSearchParams = hnswSearchParams;
+      return this;
+    }
+
+    /**
+     * Registers the query vectors to be passed in the search call.
+     *
+     * @param queryVectors 2D float query vector array
+     * @return an instance of this Builder
+     */
+    public Builder withQueryVectors(float[][] queryVectors) {
+      this.queryVectors = queryVectors;
+      return this;
+    }
+
+    /**
+     * Sets the instance of mapping to be used for ID mapping.
+     *
+     * @param mapping the ID mapping instance
+     * @return an instance of this Builder
+     */
+    public Builder withMapping(List<Integer> mapping) {
+      this.mapping = mapping;
+      return this;
+    }
+
+    /**
+     * Registers the topK value.
+     *
+     * @param topK the topK value used to retrieve the topK results
+     * @return an instance of this Builder
+     */
+    public Builder withTopK(int topK) {
+      this.topK = topK;
+      return this;
+    }
+
+    /**
+     * Builds an instance of {@link HnswQuery}
+     *
+     * @return an instance of {@link HnswQuery}
+     */
+    public HnswQuery build() {
+      return new HnswQuery(hnswSearchParams, queryVectors, mapping, topK);
+    }
+  }
+}
diff --git a/java/cuvs-java/src/main/java/com/nvidia/cuvs/HnswSearchParams.java b/java/cuvs-java/src/main/java/com/nvidia/cuvs/HnswSearchParams.java
new file mode 100644
index 000000000..a2725e6de
--- /dev/null
+++ b/java/cuvs-java/src/main/java/com/nvidia/cuvs/HnswSearchParams.java
@@ -0,0 +1,137 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.nvidia.cuvs;
+
+import java.lang.foreign.MemorySegment;
+
+import com.nvidia.cuvs.panama.CuVSHnswSearchParams;
+
+/**
+ * HnswSearchParams encapsulates the logic for configuring and holding search
+ * parameters for HNSW index.
+ *
+ * @since 25.02
+ */
+public class HnswSearchParams {
+
+  private CuVSResources resources;
+  private MemorySegment memorySegment;
+  private int ef = 200;
+  private int numThreads = 0;
+
+  /**
+   * Constructs an instance of HnswSearchParams with passed search parameters.
+   *
+   * @param resources  the resources instance to use
+   * @param ef         the ef value
+   * @param numThreads the number of threads
+   *
+   */
+  private HnswSearchParams(CuVSResources resources, int ef, int numThreads) {
+    this.resources = resources;
+    this.ef = ef;
+    this.numThreads = numThreads;
+    this.memorySegment = allocateMemorySegment();
+  }
+
+  /**
+   * Allocates the configured search parameters in the MemorySegment.
+   */
+  private MemorySegment allocateMemorySegment() {
+    MemorySegment memorySegment = CuVSHnswSearchParams.allocate(resources.arena);
+    CuVSHnswSearchParams.ef(memorySegment, ef);
+    CuVSHnswSearchParams.num_threads(memorySegment, numThreads);
+    return memorySegment;
+  }
+
+  public MemorySegment getHnswSearchParamsMemorySegment() {
+    return memorySegment;
+  }
+
+  /**
+   * Gets the ef value
+   *
+   * @return the integer ef value
+   */
+  public int getEf() {
+    return ef;
+  }
+
+  /**
+   * Gets the number of threads
+   *
+   * @return the number of threads
+   */
+  public int getNumThreads() {
+    return numThreads;
+  }
+
+  @Override
+  public String toString() {
+    return "HnswSearchParams [ef=" + ef + ", numThreads=" + numThreads + "]";
+  }
+
+  /**
+   * Builder configures and creates an instance of HnswSearchParams.
+   */
+  public static class Builder {
+
+    private CuVSResources resources;
+    private int ef = 200;
+    private int numThreads = 0;
+
+    /**
+     * Constructs this Builder with an instance of Arena.
+     *
+     * @param resources the {@link CuVSResources} instance to use
+     */
+    public Builder(CuVSResources resources) {
+      this.resources = resources;
+    }
+
+    /**
+     * Sets the ef value
+     *
+     * @param ef the ef value
+     * @return an instance of this Builder
+     */
+    public Builder withEF(int ef) {
+      this.ef = ef;
+      return this;
+    }
+
+    /**
+     * Sets the number of threads
+     *
+     * @param numThreads the number of threads
+     * @return an instance of this Builder
+     */
+    public Builder withNumThreads(int numThreads) {
+      this.numThreads = numThreads;
+      return this;
+    }
+
+    /**
+     * Builds an instance of {@link HnswSearchParams} with passed search parameters.
+     *
+     * @return an instance of HnswSearchParams
+     */
+    public HnswSearchParams build() {
+      return new HnswSearchParams(resources, ef, numThreads);
+    }
+  }
+}
diff --git a/java/cuvs-java/src/main/java/com/nvidia/cuvs/HnswSearchResults.java b/java/cuvs-java/src/main/java/com/nvidia/cuvs/HnswSearchResults.java
new file mode 100644
index 000000000..8cb4d89e0
--- /dev/null
+++ b/java/cuvs-java/src/main/java/com/nvidia/cuvs/HnswSearchResults.java
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.nvidia.cuvs;
+
+import java.lang.foreign.MemorySegment;
+import java.lang.foreign.SequenceLayout;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+
+import com.nvidia.cuvs.common.SearchResults;
+
+/**
+ * SearchResult encapsulates the logic for reading and holding search results.
+ *
+ * @since 25.02
+ */
+public class HnswSearchResults extends SearchResults {
+
+  protected HnswSearchResults(SequenceLayout neighboursSequenceLayout, SequenceLayout distancesSequenceLayout,
+      MemorySegment neighboursMemorySegment, MemorySegment distancesMemorySegment, int topK, List<Integer> mapping,
+      long numberOfQueries) {
+    super(neighboursSequenceLayout, distancesSequenceLayout, neighboursMemorySegment, distancesMemorySegment, topK,
+        mapping, numberOfQueries);
+    readResultMemorySegments();
+  }
+
+  /**
+   * Reads neighbors and distances {@link MemorySegment} and loads the values
+   * internally
+   */
+  protected void readResultMemorySegments() {
+    Map<Integer, Float> intermediateResultMap = new LinkedHashMap<Integer, Float>();
+    int count = 0;
+    for (long i = 0; i < topK * numberOfQueries; i++) {
+      long id = (long) neighboursVarHandle.get(neighboursMemorySegment, 0L, i);
+      float dst = (float) distancesVarHandle.get(distancesMemorySegment, 0L, i);
+      intermediateResultMap.put(mapping != null ? mapping.get((int) id) : (int) id, dst);
+      count += 1;
+      if (count == topK) {
+        results.add(intermediateResultMap);
+        intermediateResultMap = new LinkedHashMap<Integer, Float>();
+        count = 0;
+      }
+    }
+  }
+}
diff --git a/java/cuvs-java/src/main/java/com/nvidia/cuvs/LibraryException.java b/java/cuvs-java/src/main/java/com/nvidia/cuvs/LibraryException.java
new file mode 100644
index 000000000..40018be92
--- /dev/null
+++ b/java/cuvs-java/src/main/java/com/nvidia/cuvs/LibraryException.java
@@ -0,0 +1,18 @@
+package com.nvidia.cuvs;
+
+public class LibraryException extends RuntimeException {
+
+  private static final long serialVersionUID = -2311171907713571455L;
+
+  public LibraryException(Exception ex) {
+    super(ex);
+  }
+
+  public LibraryException(String message) {
+    super(message);
+  }
+
+  public LibraryException(String message, Exception e) {
+    super(message, e);
+  }
+}
diff --git a/java/cuvs-java/src/main/java/com/nvidia/cuvs/common/SearchResults.java b/java/cuvs-java/src/main/java/com/nvidia/cuvs/common/SearchResults.java
new file mode 100644
index 000000000..83e98cb8e
--- /dev/null
+++ b/java/cuvs-java/src/main/java/com/nvidia/cuvs/common/SearchResults.java
@@ -0,0 +1,53 @@
+package com.nvidia.cuvs.common;
+
+import java.lang.foreign.MemoryLayout.PathElement;
+import java.lang.foreign.MemorySegment;
+import java.lang.foreign.SequenceLayout;
+import java.lang.invoke.VarHandle;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+
+public abstract class SearchResults {
+
+  protected final List<Map<Integer, Float>> results;
+  protected final List<Integer> mapping; // TODO: Is this performant in a user application?
+  protected final SequenceLayout neighboursSequenceLayout;
+  protected final SequenceLayout distancesSequenceLayout;
+  protected final MemorySegment neighboursMemorySegment;
+  protected final MemorySegment distancesMemorySegment;
+  protected final int topK;
+  protected final long numberOfQueries;
+  protected final VarHandle neighboursVarHandle;
+  protected final VarHandle distancesVarHandle;
+
+  protected SearchResults(SequenceLayout neighboursSequenceLayout, SequenceLayout distancesSequenceLayout,
+      MemorySegment neighboursMemorySegment, MemorySegment distancesMemorySegment, int topK, List<Integer> mapping,
+      long numberOfQueries) {
+    this.topK = topK;
+    this.numberOfQueries = numberOfQueries;
+    this.neighboursSequenceLayout = neighboursSequenceLayout;
+    this.distancesSequenceLayout = distancesSequenceLayout;
+    this.neighboursMemorySegment = neighboursMemorySegment;
+    this.distancesMemorySegment = distancesMemorySegment;
+    this.mapping = mapping;
+    results = new LinkedList<Map<Integer, Float>>();
+    neighboursVarHandle = neighboursSequenceLayout.varHandle(PathElement.sequenceElement());
+    distancesVarHandle = distancesSequenceLayout.varHandle(PathElement.sequenceElement());
+  }
+
+  /**
+   * Reads neighbors and distances {@link MemorySegment} and loads the values
+   * internally
+   */
+  protected abstract void readResultMemorySegments();
+
+  /**
+   * Gets a list results as a map of neighbor IDs to distances.
+   *
+   * @return a list of results for each query as a map of neighbor IDs to distance
+   */
+  public List<Map<Integer, Float>> getResults() {
+    return results;
+  }
+}
diff --git a/java/cuvs-java/src/main/java/com/nvidia/cuvs/common/Util.java b/java/cuvs-java/src/main/java/com/nvidia/cuvs/common/Util.java
new file mode 100644
index 000000000..163ef3a84
--- /dev/null
+++ b/java/cuvs-java/src/main/java/com/nvidia/cuvs/common/Util.java
@@ -0,0 +1,265 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.nvidia.cuvs.common;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.lang.foreign.Arena;
+import java.lang.foreign.FunctionDescriptor;
+import java.lang.foreign.Linker;
+import java.lang.foreign.MemoryLayout;
+import java.lang.foreign.MemoryLayout.PathElement;
+import java.lang.foreign.MemorySegment;
+import java.lang.foreign.SymbolLookup;
+import java.lang.foreign.ValueLayout;
+import java.lang.invoke.MethodHandle;
+import java.lang.invoke.VarHandle;
+import java.util.ArrayList;
+import java.util.List;
+
+import com.nvidia.cuvs.GPUInfo;
+import com.nvidia.cuvs.LibraryException;
+import com.nvidia.cuvs.panama.GpuInfo;
+
+public class Util {
+
+  private static Arena arena = null;
+  private static Linker linker = null;
+  private static SymbolLookup symbolLookup = null;
+  private static MemoryLayout intMemoryLayout;
+  private static MethodHandle getGpuInfoMethodHandle = null;
+  protected static File nativeLibrary;
+
+  static {
+    try {
+      linker = Linker.nativeLinker();
+      arena = Arena.ofShared();
+      nativeLibrary = Util.loadLibraryFromJar("/libcuvs_java.so");
+      symbolLookup = SymbolLookup.libraryLookup(nativeLibrary.getAbsolutePath(), arena);
+      intMemoryLayout = linker.canonicalLayouts().get("int");
+      getGpuInfoMethodHandle = linker.downcallHandle(symbolLookup.find("get_gpu_info").get(),
+          FunctionDescriptor.ofVoid(ValueLayout.ADDRESS, ValueLayout.ADDRESS, ValueLayout.ADDRESS));
+    } catch (Exception e) {
+      throw new LibraryException("LibCuVS Java Library Not Loaded", e);
+    }
+  }
+
+  /**
+   * Get the list of compatible GPUs based on compute capability >= 7.0 and total
+   * memory >= 8GB
+   *
+   * @return a list of compatible GPUs. See {@link GPUInfo}
+   */
+  public static List<GPUInfo> compatibleGPUs() throws Throwable {
+    return compatibleGPUs(7.0, 8192);
+  }
+
+  /**
+   * Get the list of compatible GPUs based on given compute capability and total
+   * memory
+   *
+   * @param minComputeCapability the minimum compute capability
+   * @param minDeviceMemoryMB    the minimum total available memory in MB
+   * @return a list of compatible GPUs. See {@link GPUInfo}
+   */
+  public static List<GPUInfo> compatibleGPUs(double minComputeCapability, int minDeviceMemoryMB) throws Throwable {
+    List<GPUInfo> compatibleGPUs = new ArrayList<GPUInfo>();
+    double minDeviceMemoryB = Math.pow(2, 20) * minDeviceMemoryMB;
+    for (GPUInfo gpuInfo : availableGPUs()) {
+      if (gpuInfo.getComputeCapability() >= minComputeCapability && gpuInfo.getTotalMemory() >= minDeviceMemoryB) {
+        compatibleGPUs.add(gpuInfo);
+      }
+    }
+    return compatibleGPUs;
+  }
+
+  /**
+   * Gets all the available GPUs
+   *
+   * @return a list of {@link GPUInfo} objects with GPU details
+   */
+  public static List<GPUInfo> availableGPUs() throws Throwable {
+    List<GPUInfo> results = new ArrayList<GPUInfo>();
+    MemoryLayout returnValueMemoryLayout = intMemoryLayout;
+    MemorySegment returnValueMemorySegment = arena.allocate(returnValueMemoryLayout);
+    MemoryLayout numGpuMemoryLayout = intMemoryLayout;
+    MemorySegment numGpuMemorySegment = arena.allocate(numGpuMemoryLayout);
+
+    /*
+     * Setting a value of 1024 because we cannot predict how much memory to allocate
+     * before the function is invoked as cudaGetDeviceCount is inside the
+     * get_gpu_info function.
+     */
+    MemorySegment GpuInfoArrayMemorySegment = GpuInfo.allocateArray(1024, arena);
+    getGpuInfoMethodHandle.invokeExact(returnValueMemorySegment, numGpuMemorySegment, GpuInfoArrayMemorySegment);
+    int numGPUs = numGpuMemorySegment.get(ValueLayout.JAVA_INT, 0);
+    MemoryLayout ml = MemoryLayout.sequenceLayout(numGPUs, GpuInfo.layout());
+    for (int i = 0; i < numGPUs; i++) {
+      VarHandle gpuIdVarHandle = ml.varHandle(PathElement.sequenceElement(i), PathElement.groupElement("gpu_id"));
+      VarHandle freeMemoryVarHandle = ml.varHandle(PathElement.sequenceElement(i),
+          PathElement.groupElement("free_memory"));
+      VarHandle totalMemoryVarHandle = ml.varHandle(PathElement.sequenceElement(i),
+          PathElement.groupElement("total_memory"));
+      VarHandle ComputeCapabilityVarHandle = ml.varHandle(PathElement.sequenceElement(i),
+          PathElement.groupElement("compute_capability"));
+      StringBuilder gpuName = new StringBuilder();
+      char b = 1;
+      int p = 0;
+      while (b != 0x00) {
+        VarHandle gpuNameVarHandle = ml.varHandle(PathElement.sequenceElement(i), PathElement.groupElement("name"),
+            PathElement.sequenceElement(p++));
+        b = (char) (byte) gpuNameVarHandle.get(GpuInfoArrayMemorySegment, 0L);
+        gpuName.append(b);
+      }
+      results.add(new GPUInfo((int) gpuIdVarHandle.get(GpuInfoArrayMemorySegment, 0L), gpuName.toString().trim(),
+          (long) freeMemoryVarHandle.get(GpuInfoArrayMemorySegment, 0L),
+          (long) totalMemoryVarHandle.get(GpuInfoArrayMemorySegment, 0L),
+          (float) ComputeCapabilityVarHandle.get(GpuInfoArrayMemorySegment, 0L)));
+    }
+    return results;
+  }
+
+  /**
+   * A utility method for getting an instance of {@link MemorySegment} for a
+   * {@link String}.
+   *
+   * @param str the string for the expected {@link MemorySegment}
+   * @return an instance of {@link MemorySegment}
+   */
+  public static MemorySegment buildMemorySegment(Linker linker, Arena arena, String str) {
+    MemoryLayout charMemoryLayout = linker.canonicalLayouts().get("char");
+    StringBuilder sb = new StringBuilder(str).append('\0');
+    MemoryLayout stringMemoryLayout = MemoryLayout.sequenceLayout(sb.length(), charMemoryLayout);
+    MemorySegment stringMemorySegment = arena.allocate(stringMemoryLayout);
+
+    for (int i = 0; i < sb.length(); i++) {
+      VarHandle varHandle = stringMemoryLayout.varHandle(PathElement.sequenceElement(i));
+      varHandle.set(stringMemorySegment, 0L, (byte) sb.charAt(i));
+    }
+    return stringMemorySegment;
+  }
+
+  /**
+   * A utility method for building a {@link MemorySegment} for a 1D long array.
+   *
+   * @param data The 1D long array for which the {@link MemorySegment} is needed
+   * @return an instance of {@link MemorySegment}
+   */
+  public static MemorySegment buildMemorySegment(Linker linker, Arena arena, long[] data) {
+    int cells = data.length;
+    MemoryLayout longMemoryLayout = linker.canonicalLayouts().get("long");
+    MemoryLayout dataMemoryLayout = MemoryLayout.sequenceLayout(cells, longMemoryLayout);
+    MemorySegment dataMemorySegment = arena.allocate(dataMemoryLayout);
+    MemorySegment.copy(data, 0, dataMemorySegment, (ValueLayout) longMemoryLayout, 0, cells);
+    return dataMemorySegment;
+  }
+
+  /**
+   * A utility method for building a {@link MemorySegment} for a 2D float array.
+   *
+   * @param data The 2D float array for which the {@link MemorySegment} is needed
+   * @return an instance of {@link MemorySegment}
+   */
+  public static MemorySegment buildMemorySegment(Linker linker, Arena arena, float[][] data) {
+    long rows = data.length;
+    long cols = rows > 0 ? data[0].length : 0;
+    MemoryLayout floatMemoryLayout = linker.canonicalLayouts().get("float");
+    MemoryLayout dataMemoryLayout = MemoryLayout.sequenceLayout(rows * cols, floatMemoryLayout);
+    MemorySegment dataMemorySegment = arena.allocate(dataMemoryLayout);
+    long floatByteSize = floatMemoryLayout.byteSize();
+
+    for (int r = 0; r < rows; r++) {
+      MemorySegment.copy(data[r], 0, dataMemorySegment, (ValueLayout) floatMemoryLayout, (r * cols * floatByteSize),
+          (int) cols);
+    }
+
+    return dataMemorySegment;
+  }
+
+  /**
+   * Load the CuVS .so file from environment variable CUVS_JAVA_SO_PATH. If not
+   * found there, try to load it from the classpath to a temporary file.
+   */
+  public static File loadNativeLibrary() throws IOException {
+    String libraryPathFromEnvironment = System.getenv("CUVS_JAVA_SO_PATH");
+    if (libraryPathFromEnvironment != null) {
+      File file = new File(libraryPathFromEnvironment);
+      if (!file.exists())
+        throw new RuntimeException(
+            "Environment variable CUVS_JAVA_SO_PATH points to non-existent file: " + libraryPathFromEnvironment);
+      return file;
+    }
+    return loadLibraryFromJar("/libcuvs_java.so");
+  }
+
+  private static File loadLibraryFromJar(String path) throws IOException {
+    if (!path.startsWith("/")) {
+      throw new IllegalArgumentException("The path has to be absolute (start with '/').");
+    }
+    // Obtain filename from path
+    String[] parts = path.split("/");
+    String filename = (parts.length > 1) ? parts[parts.length - 1] : null;
+
+    // Split filename to prefix and suffix (extension)
+    String prefix = "";
+    String suffix = null;
+    if (filename != null) {
+      parts = filename.split("\\.", 2);
+      prefix = parts[0];
+      suffix = (parts.length > 1) ? "." + parts[parts.length - 1] : null;
+    }
+    // Prepare temporary file
+    File temp = File.createTempFile(prefix, suffix);
+    InputStream libraryStream = Util.class.getModule().getResourceAsStream(path); // Util.class.getResourceAsStream(path);
+    streamCopy(libraryStream, new FileOutputStream(temp));
+
+    return temp;
+  }
+
+  private static void streamCopy(InputStream is, OutputStream os) throws LibraryException {
+    if (is == null) {
+      throw new LibraryException("CuVS Library Not Found in ClassPath");
+    }
+    byte[] buffer = new byte[1024];
+    int readBytes;
+
+    try {
+      while ((readBytes = is.read(buffer)) != -1) {
+        os.write(buffer, 0, readBytes);
+      }
+    } catch (IOException e) {
+      throw new LibraryException(e);
+    } finally {
+      // If read/write fails, close streams safely before throwing an exception
+      if (os != null)
+        try {
+          os.close();
+        } catch (IOException e) {
+          e.printStackTrace();
+        }
+      if (is != null)
+        try {
+          is.close();
+        } catch (IOException e) {
+          e.printStackTrace();
+        }
+    }
+  }
+}
diff --git a/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/BruteForceH.java b/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/BruteForceH.java
new file mode 100644
index 000000000..16603606b
--- /dev/null
+++ b/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/BruteForceH.java
@@ -0,0 +1,1912 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.nvidia.cuvs.panama;
+
+import static java.lang.foreign.ValueLayout.JAVA_BYTE;
+
+import java.lang.foreign.AddressLayout;
+import java.lang.foreign.Arena;
+import java.lang.foreign.FunctionDescriptor;
+import java.lang.foreign.GroupLayout;
+import java.lang.foreign.Linker;
+import java.lang.foreign.MemoryLayout;
+import java.lang.foreign.MemorySegment;
+import java.lang.foreign.PaddingLayout;
+import java.lang.foreign.SequenceLayout;
+import java.lang.foreign.StructLayout;
+import java.lang.foreign.SymbolLookup;
+import java.lang.foreign.ValueLayout;
+import java.lang.foreign.ValueLayout.OfByte;
+import java.lang.foreign.ValueLayout.OfInt;
+import java.lang.foreign.ValueLayout.OfLong;
+import java.lang.foreign.ValueLayout.OfShort;
+import java.lang.invoke.MethodHandle;
+import java.lang.invoke.MethodHandles;
+import java.util.Arrays;
+import java.util.stream.Collectors;
+
+public class BruteForceH {
+
+  BruteForceH() {
+    // Should not be called directly
+  }
+
+  static final Arena LIBRARY_ARENA = Arena.ofAuto();
+  static final boolean TRACE_DOWNCALLS = Boolean.getBoolean("jextract.trace.downcalls");
+
+  static void traceDowncall(String name, Object... args) {
+    String traceArgs = Arrays.stream(args).map(Object::toString).collect(Collectors.joining(", "));
+    System.out.printf("%s(%s)\n", name, traceArgs);
+  }
+
+  static MemorySegment findOrThrow(String symbol) {
+    return SYMBOL_LOOKUP.find(symbol).orElseThrow(() -> new UnsatisfiedLinkError("unresolved symbol: " + symbol));
+  }
+
+  static MethodHandle upcallHandle(Class<?> fi, String name, FunctionDescriptor fdesc) {
+    try {
+      return MethodHandles.lookup().findVirtual(fi, name, fdesc.toMethodType());
+    } catch (ReflectiveOperationException ex) {
+      throw new AssertionError(ex);
+    }
+  }
+
+  static MemoryLayout align(MemoryLayout layout, long align) {
+    return switch (layout) {
+    case PaddingLayout p -> p;
+    case ValueLayout v -> v.withByteAlignment(align);
+    case GroupLayout g -> {
+      MemoryLayout[] alignedMembers = g.memberLayouts().stream().map(m -> align(m, align)).toArray(MemoryLayout[]::new);
+      yield g instanceof StructLayout ? MemoryLayout.structLayout(alignedMembers)
+          : MemoryLayout.unionLayout(alignedMembers);
+    }
+    case SequenceLayout s -> MemoryLayout.sequenceLayout(s.elementCount(), align(s.elementLayout(), align));
+    };
+  }
+
+  static final SymbolLookup SYMBOL_LOOKUP = SymbolLookup.loaderLookup().or(Linker.nativeLinker().defaultLookup());
+
+  public static final ValueLayout.OfBoolean C_BOOL = ValueLayout.JAVA_BOOLEAN;
+  public static final ValueLayout.OfByte C_CHAR = ValueLayout.JAVA_BYTE;
+  public static final ValueLayout.OfShort C_SHORT = ValueLayout.JAVA_SHORT;
+  public static final ValueLayout.OfInt C_INT = ValueLayout.JAVA_INT;
+  public static final ValueLayout.OfLong C_LONG_LONG = ValueLayout.JAVA_LONG;
+  public static final ValueLayout.OfFloat C_FLOAT = ValueLayout.JAVA_FLOAT;
+  public static final ValueLayout.OfDouble C_DOUBLE = ValueLayout.JAVA_DOUBLE;
+  public static final AddressLayout C_POINTER = ValueLayout.ADDRESS
+      .withTargetLayout(MemoryLayout.sequenceLayout(java.lang.Long.MAX_VALUE, JAVA_BYTE));
+  public static final ValueLayout.OfLong C_LONG = ValueLayout.JAVA_LONG;
+  private static final int DLPACK_VERSION = (int) 80L;
+
+  /**
+   * {@snippet lang = c : * #define DLPACK_VERSION 80
+   * }
+   */
+  public static int DLPACK_VERSION() {
+    return DLPACK_VERSION;
+  }
+
+  private static final int DLPACK_ABI_VERSION = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define DLPACK_ABI_VERSION 1
+   * }
+   */
+  public static int DLPACK_ABI_VERSION() {
+    return DLPACK_ABI_VERSION;
+  }
+
+  private static final int _STDINT_H = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define _STDINT_H 1
+   * }
+   */
+  public static int _STDINT_H() {
+    return _STDINT_H;
+  }
+
+  private static final int _FEATURES_H = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define _FEATURES_H 1
+   * }
+   */
+  public static int _FEATURES_H() {
+    return _FEATURES_H;
+  }
+
+  private static final int _DEFAULT_SOURCE = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define _DEFAULT_SOURCE 1
+   * }
+   */
+  public static int _DEFAULT_SOURCE() {
+    return _DEFAULT_SOURCE;
+  }
+
+  private static final int __GLIBC_USE_ISOC2X = (int) 0L;
+
+  /**
+   * {@snippet lang = c : * #define __GLIBC_USE_ISOC2X 0
+   * }
+   */
+  public static int __GLIBC_USE_ISOC2X() {
+    return __GLIBC_USE_ISOC2X;
+  }
+
+  private static final int __USE_ISOC11 = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __USE_ISOC11 1
+   * }
+   */
+  public static int __USE_ISOC11() {
+    return __USE_ISOC11;
+  }
+
+  private static final int __USE_ISOC99 = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __USE_ISOC99 1
+   * }
+   */
+  public static int __USE_ISOC99() {
+    return __USE_ISOC99;
+  }
+
+  private static final int __USE_ISOC95 = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __USE_ISOC95 1
+   * }
+   */
+  public static int __USE_ISOC95() {
+    return __USE_ISOC95;
+  }
+
+  private static final int __USE_POSIX_IMPLICITLY = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __USE_POSIX_IMPLICITLY 1
+   * }
+   */
+  public static int __USE_POSIX_IMPLICITLY() {
+    return __USE_POSIX_IMPLICITLY;
+  }
+
+  private static final int _POSIX_SOURCE = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define _POSIX_SOURCE 1
+   * }
+   */
+  public static int _POSIX_SOURCE() {
+    return _POSIX_SOURCE;
+  }
+
+  private static final int __USE_POSIX = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __USE_POSIX 1
+   * }
+   */
+  public static int __USE_POSIX() {
+    return __USE_POSIX;
+  }
+
+  private static final int __USE_POSIX2 = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __USE_POSIX2 1
+   * }
+   */
+  public static int __USE_POSIX2() {
+    return __USE_POSIX2;
+  }
+
+  private static final int __USE_POSIX199309 = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __USE_POSIX199309 1
+   * }
+   */
+  public static int __USE_POSIX199309() {
+    return __USE_POSIX199309;
+  }
+
+  private static final int __USE_POSIX199506 = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __USE_POSIX199506 1
+   * }
+   */
+  public static int __USE_POSIX199506() {
+    return __USE_POSIX199506;
+  }
+
+  private static final int __USE_XOPEN2K = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __USE_XOPEN2K 1
+   * }
+   */
+  public static int __USE_XOPEN2K() {
+    return __USE_XOPEN2K;
+  }
+
+  private static final int __USE_XOPEN2K8 = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __USE_XOPEN2K8 1
+   * }
+   */
+  public static int __USE_XOPEN2K8() {
+    return __USE_XOPEN2K8;
+  }
+
+  private static final int _ATFILE_SOURCE = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define _ATFILE_SOURCE 1
+   * }
+   */
+  public static int _ATFILE_SOURCE() {
+    return _ATFILE_SOURCE;
+  }
+
+  private static final int __WORDSIZE = (int) 64L;
+
+  /**
+   * {@snippet lang = c : * #define __WORDSIZE 64
+   * }
+   */
+  public static int __WORDSIZE() {
+    return __WORDSIZE;
+  }
+
+  private static final int __WORDSIZE_TIME64_COMPAT32 = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __WORDSIZE_TIME64_COMPAT32 1
+   * }
+   */
+  public static int __WORDSIZE_TIME64_COMPAT32() {
+    return __WORDSIZE_TIME64_COMPAT32;
+  }
+
+  private static final int __SYSCALL_WORDSIZE = (int) 64L;
+
+  /**
+   * {@snippet lang = c : * #define __SYSCALL_WORDSIZE 64
+   * }
+   */
+  public static int __SYSCALL_WORDSIZE() {
+    return __SYSCALL_WORDSIZE;
+  }
+
+  private static final int __USE_MISC = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __USE_MISC 1
+   * }
+   */
+  public static int __USE_MISC() {
+    return __USE_MISC;
+  }
+
+  private static final int __USE_ATFILE = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __USE_ATFILE 1
+   * }
+   */
+  public static int __USE_ATFILE() {
+    return __USE_ATFILE;
+  }
+
+  private static final int __USE_FORTIFY_LEVEL = (int) 0L;
+
+  /**
+   * {@snippet lang = c : * #define __USE_FORTIFY_LEVEL 0
+   * }
+   */
+  public static int __USE_FORTIFY_LEVEL() {
+    return __USE_FORTIFY_LEVEL;
+  }
+
+  private static final int __GLIBC_USE_DEPRECATED_GETS = (int) 0L;
+
+  /**
+   * {@snippet lang = c : * #define __GLIBC_USE_DEPRECATED_GETS 0
+   * }
+   */
+  public static int __GLIBC_USE_DEPRECATED_GETS() {
+    return __GLIBC_USE_DEPRECATED_GETS;
+  }
+
+  private static final int __GLIBC_USE_DEPRECATED_SCANF = (int) 0L;
+
+  /**
+   * {@snippet lang = c : * #define __GLIBC_USE_DEPRECATED_SCANF 0
+   * }
+   */
+  public static int __GLIBC_USE_DEPRECATED_SCANF() {
+    return __GLIBC_USE_DEPRECATED_SCANF;
+  }
+
+  private static final int _STDC_PREDEF_H = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define _STDC_PREDEF_H 1
+   * }
+   */
+  public static int _STDC_PREDEF_H() {
+    return _STDC_PREDEF_H;
+  }
+
+  private static final int __STDC_IEC_559__ = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __STDC_IEC_559__ 1
+   * }
+   */
+  public static int __STDC_IEC_559__() {
+    return __STDC_IEC_559__;
+  }
+
+  private static final int __STDC_IEC_559_COMPLEX__ = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __STDC_IEC_559_COMPLEX__ 1
+   * }
+   */
+  public static int __STDC_IEC_559_COMPLEX__() {
+    return __STDC_IEC_559_COMPLEX__;
+  }
+
+  private static final int __GNU_LIBRARY__ = (int) 6L;
+
+  /**
+   * {@snippet lang = c : * #define __GNU_LIBRARY__ 6
+   * }
+   */
+  public static int __GNU_LIBRARY__() {
+    return __GNU_LIBRARY__;
+  }
+
+  private static final int __GLIBC__ = (int) 2L;
+
+  /**
+   * {@snippet lang = c : * #define __GLIBC__ 2
+   * }
+   */
+  public static int __GLIBC__() {
+    return __GLIBC__;
+  }
+
+  private static final int __GLIBC_MINOR__ = (int) 35L;
+
+  /**
+   * {@snippet lang = c : * #define __GLIBC_MINOR__ 35
+   * }
+   */
+  public static int __GLIBC_MINOR__() {
+    return __GLIBC_MINOR__;
+  }
+
+  private static final int _SYS_CDEFS_H = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define _SYS_CDEFS_H 1
+   * }
+   */
+  public static int _SYS_CDEFS_H() {
+    return _SYS_CDEFS_H;
+  }
+
+  private static final int __glibc_c99_flexarr_available = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __glibc_c99_flexarr_available 1
+   * }
+   */
+  public static int __glibc_c99_flexarr_available() {
+    return __glibc_c99_flexarr_available;
+  }
+
+  private static final int __LDOUBLE_REDIRECTS_TO_FLOAT128_ABI = (int) 0L;
+
+  /**
+   * {@snippet lang = c : * #define __LDOUBLE_REDIRECTS_TO_FLOAT128_ABI 0
+   * }
+   */
+  public static int __LDOUBLE_REDIRECTS_TO_FLOAT128_ABI() {
+    return __LDOUBLE_REDIRECTS_TO_FLOAT128_ABI;
+  }
+
+  private static final int __HAVE_GENERIC_SELECTION = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __HAVE_GENERIC_SELECTION 1
+   * }
+   */
+  public static int __HAVE_GENERIC_SELECTION() {
+    return __HAVE_GENERIC_SELECTION;
+  }
+
+  private static final int __GLIBC_USE_LIB_EXT2 = (int) 0L;
+
+  /**
+   * {@snippet lang = c : * #define __GLIBC_USE_LIB_EXT2 0
+   * }
+   */
+  public static int __GLIBC_USE_LIB_EXT2() {
+    return __GLIBC_USE_LIB_EXT2;
+  }
+
+  private static final int __GLIBC_USE_IEC_60559_BFP_EXT = (int) 0L;
+
+  /**
+   * {@snippet lang = c : * #define __GLIBC_USE_IEC_60559_BFP_EXT 0
+   * }
+   */
+  public static int __GLIBC_USE_IEC_60559_BFP_EXT() {
+    return __GLIBC_USE_IEC_60559_BFP_EXT;
+  }
+
+  private static final int __GLIBC_USE_IEC_60559_BFP_EXT_C2X = (int) 0L;
+
+  /**
+   * {@snippet lang = c : * #define __GLIBC_USE_IEC_60559_BFP_EXT_C2X 0
+   * }
+   */
+  public static int __GLIBC_USE_IEC_60559_BFP_EXT_C2X() {
+    return __GLIBC_USE_IEC_60559_BFP_EXT_C2X;
+  }
+
+  private static final int __GLIBC_USE_IEC_60559_EXT = (int) 0L;
+
+  /**
+   * {@snippet lang = c : * #define __GLIBC_USE_IEC_60559_EXT 0
+   * }
+   */
+  public static int __GLIBC_USE_IEC_60559_EXT() {
+    return __GLIBC_USE_IEC_60559_EXT;
+  }
+
+  private static final int __GLIBC_USE_IEC_60559_FUNCS_EXT = (int) 0L;
+
+  /**
+   * {@snippet lang = c : * #define __GLIBC_USE_IEC_60559_FUNCS_EXT 0
+   * }
+   */
+  public static int __GLIBC_USE_IEC_60559_FUNCS_EXT() {
+    return __GLIBC_USE_IEC_60559_FUNCS_EXT;
+  }
+
+  private static final int __GLIBC_USE_IEC_60559_FUNCS_EXT_C2X = (int) 0L;
+
+  /**
+   * {@snippet lang = c : * #define __GLIBC_USE_IEC_60559_FUNCS_EXT_C2X 0
+   * }
+   */
+  public static int __GLIBC_USE_IEC_60559_FUNCS_EXT_C2X() {
+    return __GLIBC_USE_IEC_60559_FUNCS_EXT_C2X;
+  }
+
+  private static final int __GLIBC_USE_IEC_60559_TYPES_EXT = (int) 0L;
+
+  /**
+   * {@snippet lang = c : * #define __GLIBC_USE_IEC_60559_TYPES_EXT 0
+   * }
+   */
+  public static int __GLIBC_USE_IEC_60559_TYPES_EXT() {
+    return __GLIBC_USE_IEC_60559_TYPES_EXT;
+  }
+
+  private static final int _BITS_TYPES_H = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define _BITS_TYPES_H 1
+   * }
+   */
+  public static int _BITS_TYPES_H() {
+    return _BITS_TYPES_H;
+  }
+
+  private static final int _BITS_TYPESIZES_H = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define _BITS_TYPESIZES_H 1
+   * }
+   */
+  public static int _BITS_TYPESIZES_H() {
+    return _BITS_TYPESIZES_H;
+  }
+
+  private static final int __OFF_T_MATCHES_OFF64_T = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __OFF_T_MATCHES_OFF64_T 1
+   * }
+   */
+  public static int __OFF_T_MATCHES_OFF64_T() {
+    return __OFF_T_MATCHES_OFF64_T;
+  }
+
+  private static final int __INO_T_MATCHES_INO64_T = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __INO_T_MATCHES_INO64_T 1
+   * }
+   */
+  public static int __INO_T_MATCHES_INO64_T() {
+    return __INO_T_MATCHES_INO64_T;
+  }
+
+  private static final int __RLIM_T_MATCHES_RLIM64_T = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __RLIM_T_MATCHES_RLIM64_T 1
+   * }
+   */
+  public static int __RLIM_T_MATCHES_RLIM64_T() {
+    return __RLIM_T_MATCHES_RLIM64_T;
+  }
+
+  private static final int __STATFS_MATCHES_STATFS64 = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __STATFS_MATCHES_STATFS64 1
+   * }
+   */
+  public static int __STATFS_MATCHES_STATFS64() {
+    return __STATFS_MATCHES_STATFS64;
+  }
+
+  private static final int __KERNEL_OLD_TIMEVAL_MATCHES_TIMEVAL64 = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __KERNEL_OLD_TIMEVAL_MATCHES_TIMEVAL64 1
+   * }
+   */
+  public static int __KERNEL_OLD_TIMEVAL_MATCHES_TIMEVAL64() {
+    return __KERNEL_OLD_TIMEVAL_MATCHES_TIMEVAL64;
+  }
+
+  private static final int __FD_SETSIZE = (int) 1024L;
+
+  /**
+   * {@snippet lang = c : * #define __FD_SETSIZE 1024
+   * }
+   */
+  public static int __FD_SETSIZE() {
+    return __FD_SETSIZE;
+  }
+
+  private static final int _BITS_TIME64_H = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define _BITS_TIME64_H 1
+   * }
+   */
+  public static int _BITS_TIME64_H() {
+    return _BITS_TIME64_H;
+  }
+
+  private static final int _BITS_WCHAR_H = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define _BITS_WCHAR_H 1
+   * }
+   */
+  public static int _BITS_WCHAR_H() {
+    return _BITS_WCHAR_H;
+  }
+
+  private static final int _BITS_STDINT_INTN_H = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define _BITS_STDINT_INTN_H 1
+   * }
+   */
+  public static int _BITS_STDINT_INTN_H() {
+    return _BITS_STDINT_INTN_H;
+  }
+
+  private static final int _BITS_STDINT_UINTN_H = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define _BITS_STDINT_UINTN_H 1
+   * }
+   */
+  public static int _BITS_STDINT_UINTN_H() {
+    return _BITS_STDINT_UINTN_H;
+  }
+
+  /**
+   * {@snippet lang = c : * typedef unsigned char __u_char
+   * }
+   */
+  public static final OfByte __u_char = BruteForceH.C_CHAR;
+  /**
+   * {@snippet lang = c : * typedef unsigned short __u_short
+   * }
+   */
+  public static final OfShort __u_short = BruteForceH.C_SHORT;
+  /**
+   * {@snippet lang = c : * typedef unsigned int __u_int
+   * }
+   */
+  public static final OfInt __u_int = BruteForceH.C_INT;
+  /**
+   * {@snippet lang = c : * typedef unsigned long __u_long
+   * }
+   */
+  public static final OfLong __u_long = BruteForceH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef signed char __int8_t
+   * }
+   */
+  public static final OfByte __int8_t = BruteForceH.C_CHAR;
+  /**
+   * {@snippet lang = c : * typedef unsigned char __uint8_t
+   * }
+   */
+  public static final OfByte __uint8_t = BruteForceH.C_CHAR;
+  /**
+   * {@snippet lang = c : * typedef short __int16_t
+   * }
+   */
+  public static final OfShort __int16_t = BruteForceH.C_SHORT;
+  /**
+   * {@snippet lang = c : * typedef unsigned short __uint16_t
+   * }
+   */
+  public static final OfShort __uint16_t = BruteForceH.C_SHORT;
+  /**
+   * {@snippet lang = c : * typedef int __int32_t
+   * }
+   */
+  public static final OfInt __int32_t = BruteForceH.C_INT;
+  /**
+   * {@snippet lang = c : * typedef unsigned int __uint32_t
+   * }
+   */
+  public static final OfInt __uint32_t = BruteForceH.C_INT;
+  /**
+   * {@snippet lang = c : * typedef long __int64_t
+   * }
+   */
+  public static final OfLong __int64_t = BruteForceH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned long __uint64_t
+   * }
+   */
+  public static final OfLong __uint64_t = BruteForceH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef __int8_t __int_least8_t
+   * }
+   */
+  public static final OfByte __int_least8_t = BruteForceH.C_CHAR;
+  /**
+   * {@snippet lang = c : * typedef __uint8_t __uint_least8_t
+   * }
+   */
+  public static final OfByte __uint_least8_t = BruteForceH.C_CHAR;
+  /**
+   * {@snippet lang = c : * typedef __int16_t __int_least16_t
+   * }
+   */
+  public static final OfShort __int_least16_t = BruteForceH.C_SHORT;
+  /**
+   * {@snippet lang = c : * typedef __uint16_t __uint_least16_t
+   * }
+   */
+  public static final OfShort __uint_least16_t = BruteForceH.C_SHORT;
+  /**
+   * {@snippet lang = c : * typedef __int32_t __int_least32_t
+   * }
+   */
+  public static final OfInt __int_least32_t = BruteForceH.C_INT;
+  /**
+   * {@snippet lang = c : * typedef __uint32_t __uint_least32_t
+   * }
+   */
+  public static final OfInt __uint_least32_t = BruteForceH.C_INT;
+  /**
+   * {@snippet lang = c : * typedef __int64_t __int_least64_t
+   * }
+   */
+  public static final OfLong __int_least64_t = BruteForceH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef __uint64_t __uint_least64_t
+   * }
+   */
+  public static final OfLong __uint_least64_t = BruteForceH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef long __quad_t
+   * }
+   */
+  public static final OfLong __quad_t = BruteForceH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned long __u_quad_t
+   * }
+   */
+  public static final OfLong __u_quad_t = BruteForceH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef long __intmax_t
+   * }
+   */
+  public static final OfLong __intmax_t = BruteForceH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned long __uintmax_t
+   * }
+   */
+  public static final OfLong __uintmax_t = BruteForceH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned long __dev_t
+   * }
+   */
+  public static final OfLong __dev_t = BruteForceH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned int __uid_t
+   * }
+   */
+  public static final OfInt __uid_t = BruteForceH.C_INT;
+  /**
+   * {@snippet lang = c : * typedef unsigned int __gid_t
+   * }
+   */
+  public static final OfInt __gid_t = BruteForceH.C_INT;
+  /**
+   * {@snippet lang = c : * typedef unsigned long __ino_t
+   * }
+   */
+  public static final OfLong __ino_t = BruteForceH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned long __ino64_t
+   * }
+   */
+  public static final OfLong __ino64_t = BruteForceH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned int __mode_t
+   * }
+   */
+  public static final OfInt __mode_t = BruteForceH.C_INT;
+  /**
+   * {@snippet lang = c : * typedef unsigned long __nlink_t
+   * }
+   */
+  public static final OfLong __nlink_t = BruteForceH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef long __off_t
+   * }
+   */
+  public static final OfLong __off_t = BruteForceH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef long __off64_t
+   * }
+   */
+  public static final OfLong __off64_t = BruteForceH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef int __pid_t
+   * }
+   */
+  public static final OfInt __pid_t = BruteForceH.C_INT;
+  /**
+   * {@snippet lang = c : * typedef long __clock_t
+   * }
+   */
+  public static final OfLong __clock_t = BruteForceH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned long __rlim_t
+   * }
+   */
+  public static final OfLong __rlim_t = BruteForceH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned long __rlim64_t
+   * }
+   */
+  public static final OfLong __rlim64_t = BruteForceH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned int __id_t
+   * }
+   */
+  public static final OfInt __id_t = BruteForceH.C_INT;
+  /**
+   * {@snippet lang = c : * typedef long __time_t
+   * }
+   */
+  public static final OfLong __time_t = BruteForceH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned int __useconds_t
+   * }
+   */
+  public static final OfInt __useconds_t = BruteForceH.C_INT;
+  /**
+   * {@snippet lang = c : * typedef long __suseconds_t
+   * }
+   */
+  public static final OfLong __suseconds_t = BruteForceH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef long __suseconds64_t
+   * }
+   */
+  public static final OfLong __suseconds64_t = BruteForceH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef int __daddr_t
+   * }
+   */
+  public static final OfInt __daddr_t = BruteForceH.C_INT;
+  /**
+   * {@snippet lang = c : * typedef int __key_t
+   * }
+   */
+  public static final OfInt __key_t = BruteForceH.C_INT;
+  /**
+   * {@snippet lang = c : * typedef int __clockid_t
+   * }
+   */
+  public static final OfInt __clockid_t = BruteForceH.C_INT;
+  /**
+   * {@snippet lang = c : * typedef void *__timer_t
+   * }
+   */
+  public static final AddressLayout __timer_t = BruteForceH.C_POINTER;
+  /**
+   * {@snippet lang = c : * typedef long __blksize_t
+   * }
+   */
+  public static final OfLong __blksize_t = BruteForceH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef long __blkcnt_t
+   * }
+   */
+  public static final OfLong __blkcnt_t = BruteForceH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef long __blkcnt64_t
+   * }
+   */
+  public static final OfLong __blkcnt64_t = BruteForceH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned long __fsblkcnt_t
+   * }
+   */
+  public static final OfLong __fsblkcnt_t = BruteForceH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned long __fsblkcnt64_t
+   * }
+   */
+  public static final OfLong __fsblkcnt64_t = BruteForceH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned long __fsfilcnt_t
+   * }
+   */
+  public static final OfLong __fsfilcnt_t = BruteForceH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned long __fsfilcnt64_t
+   * }
+   */
+  public static final OfLong __fsfilcnt64_t = BruteForceH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef long __fsword_t
+   * }
+   */
+  public static final OfLong __fsword_t = BruteForceH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef long __ssize_t
+   * }
+   */
+  public static final OfLong __ssize_t = BruteForceH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef long __syscall_slong_t
+   * }
+   */
+  public static final OfLong __syscall_slong_t = BruteForceH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned long __syscall_ulong_t
+   * }
+   */
+  public static final OfLong __syscall_ulong_t = BruteForceH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef __off64_t __loff_t
+   * }
+   */
+  public static final OfLong __loff_t = BruteForceH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef char *__caddr_t
+   * }
+   */
+  public static final AddressLayout __caddr_t = BruteForceH.C_POINTER;
+  /**
+   * {@snippet lang = c : * typedef long __intptr_t
+   * }
+   */
+  public static final OfLong __intptr_t = BruteForceH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned int __socklen_t
+   * }
+   */
+  public static final OfInt __socklen_t = BruteForceH.C_INT;
+  /**
+   * {@snippet lang = c : * typedef int __sig_atomic_t
+   * }
+   */
+  public static final OfInt __sig_atomic_t = BruteForceH.C_INT;
+  /**
+   * {@snippet lang = c : * typedef __int8_t int8_t
+   * }
+   */
+  public static final OfByte int8_t = BruteForceH.C_CHAR;
+  /**
+   * {@snippet lang = c : * typedef __int16_t int16_t
+   * }
+   */
+  public static final OfShort int16_t = BruteForceH.C_SHORT;
+  /**
+   * {@snippet lang = c : * typedef __int32_t int32_t
+   * }
+   */
+  public static final OfInt int32_t = BruteForceH.C_INT;
+  /**
+   * {@snippet lang = c : * typedef __int64_t int64_t
+   * }
+   */
+  public static final OfLong int64_t = BruteForceH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef __uint8_t uint8_t
+   * }
+   */
+  public static final OfByte uint8_t = BruteForceH.C_CHAR;
+  /**
+   * {@snippet lang = c : * typedef __uint16_t uint16_t
+   * }
+   */
+  public static final OfShort uint16_t = BruteForceH.C_SHORT;
+  /**
+   * {@snippet lang = c : * typedef __uint32_t uint32_t
+   * }
+   */
+  public static final OfInt uint32_t = BruteForceH.C_INT;
+  /**
+   * {@snippet lang = c : * typedef __uint64_t uint64_t
+   * }
+   */
+  public static final OfLong uint64_t = BruteForceH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef __int_least8_t int_least8_t
+   * }
+   */
+  public static final OfByte int_least8_t = BruteForceH.C_CHAR;
+  /**
+   * {@snippet lang = c : * typedef __int_least16_t int_least16_t
+   * }
+   */
+  public static final OfShort int_least16_t = BruteForceH.C_SHORT;
+  /**
+   * {@snippet lang = c : * typedef __int_least32_t int_least32_t
+   * }
+   */
+  public static final OfInt int_least32_t = BruteForceH.C_INT;
+  /**
+   * {@snippet lang = c : * typedef __int_least64_t int_least64_t
+   * }
+   */
+  public static final OfLong int_least64_t = BruteForceH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef __uint_least8_t uint_least8_t
+   * }
+   */
+  public static final OfByte uint_least8_t = BruteForceH.C_CHAR;
+  /**
+   * {@snippet lang = c : * typedef __uint_least16_t uint_least16_t
+   * }
+   */
+  public static final OfShort uint_least16_t = BruteForceH.C_SHORT;
+  /**
+   * {@snippet lang = c : * typedef __uint_least32_t uint_least32_t
+   * }
+   */
+  public static final OfInt uint_least32_t = BruteForceH.C_INT;
+  /**
+   * {@snippet lang = c : * typedef __uint_least64_t uint_least64_t
+   * }
+   */
+  public static final OfLong uint_least64_t = BruteForceH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef signed char int_fast8_t
+   * }
+   */
+  public static final OfByte int_fast8_t = BruteForceH.C_CHAR;
+  /**
+   * {@snippet lang = c : * typedef long int_fast16_t
+   * }
+   */
+  public static final OfLong int_fast16_t = BruteForceH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef long int_fast32_t
+   * }
+   */
+  public static final OfLong int_fast32_t = BruteForceH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef long int_fast64_t
+   * }
+   */
+  public static final OfLong int_fast64_t = BruteForceH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned char uint_fast8_t
+   * }
+   */
+  public static final OfByte uint_fast8_t = BruteForceH.C_CHAR;
+  /**
+   * {@snippet lang = c : * typedef unsigned long uint_fast16_t
+   * }
+   */
+  public static final OfLong uint_fast16_t = BruteForceH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned long uint_fast32_t
+   * }
+   */
+  public static final OfLong uint_fast32_t = BruteForceH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned long uint_fast64_t
+   * }
+   */
+  public static final OfLong uint_fast64_t = BruteForceH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef long intptr_t
+   * }
+   */
+  public static final OfLong intptr_t = BruteForceH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned long uintptr_t
+   * }
+   */
+  public static final OfLong uintptr_t = BruteForceH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef __intmax_t intmax_t
+   * }
+   */
+  public static final OfLong intmax_t = BruteForceH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef __uintmax_t uintmax_t
+   * }
+   */
+  public static final OfLong uintmax_t = BruteForceH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef long ptrdiff_t
+   * }
+   */
+  public static final OfLong ptrdiff_t = BruteForceH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned long size_t
+   * }
+   */
+  public static final OfLong size_t = BruteForceH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef int wchar_t
+   * }
+   */
+  public static final OfInt wchar_t = BruteForceH.C_INT;
+  private static final int kDLCPU = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.kDLCPU = 1
+   * }
+   */
+  public static int kDLCPU() {
+    return kDLCPU;
+  }
+
+  private static final int kDLCUDA = (int) 2L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.kDLCUDA = 2
+   * }
+   */
+  public static int kDLCUDA() {
+    return kDLCUDA;
+  }
+
+  private static final int kDLCUDAHost = (int) 3L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.kDLCUDAHost = 3
+   * }
+   */
+  public static int kDLCUDAHost() {
+    return kDLCUDAHost;
+  }
+
+  private static final int kDLOpenCL = (int) 4L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.kDLOpenCL = 4
+   * }
+   */
+  public static int kDLOpenCL() {
+    return kDLOpenCL;
+  }
+
+  private static final int kDLVulkan = (int) 7L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.kDLVulkan = 7
+   * }
+   */
+  public static int kDLVulkan() {
+    return kDLVulkan;
+  }
+
+  private static final int kDLMetal = (int) 8L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.kDLMetal = 8
+   * }
+   */
+  public static int kDLMetal() {
+    return kDLMetal;
+  }
+
+  private static final int kDLVPI = (int) 9L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.kDLVPI = 9
+   * }
+   */
+  public static int kDLVPI() {
+    return kDLVPI;
+  }
+
+  private static final int kDLROCM = (int) 10L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.kDLROCM = 10
+   * }
+   */
+  public static int kDLROCM() {
+    return kDLROCM;
+  }
+
+  private static final int kDLROCMHost = (int) 11L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.kDLROCMHost = 11
+   * }
+   */
+  public static int kDLROCMHost() {
+    return kDLROCMHost;
+  }
+
+  private static final int kDLExtDev = (int) 12L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.kDLExtDev = 12
+   * }
+   */
+  public static int kDLExtDev() {
+    return kDLExtDev;
+  }
+
+  private static final int kDLCUDAManaged = (int) 13L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.kDLCUDAManaged = 13
+   * }
+   */
+  public static int kDLCUDAManaged() {
+    return kDLCUDAManaged;
+  }
+
+  private static final int kDLOneAPI = (int) 14L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.kDLOneAPI = 14
+   * }
+   */
+  public static int kDLOneAPI() {
+    return kDLOneAPI;
+  }
+
+  private static final int kDLWebGPU = (int) 15L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.kDLWebGPU = 15
+   * }
+   */
+  public static int kDLWebGPU() {
+    return kDLWebGPU;
+  }
+
+  private static final int kDLHexagon = (int) 16L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.kDLHexagon = 16
+   * }
+   */
+  public static int kDLHexagon() {
+    return kDLHexagon;
+  }
+
+  private static final int kDLInt = (int) 0L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.kDLInt = 0
+   * }
+   */
+  public static int kDLInt() {
+    return kDLInt;
+  }
+
+  private static final int kDLUInt = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.kDLUInt = 1
+   * }
+   */
+  public static int kDLUInt() {
+    return kDLUInt;
+  }
+
+  private static final int kDLFloat = (int) 2L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.kDLFloat = 2
+   * }
+   */
+  public static int kDLFloat() {
+    return kDLFloat;
+  }
+
+  private static final int kDLOpaqueHandle = (int) 3L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.kDLOpaqueHandle = 3
+   * }
+   */
+  public static int kDLOpaqueHandle() {
+    return kDLOpaqueHandle;
+  }
+
+  private static final int kDLBfloat = (int) 4L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.kDLBfloat = 4
+   * }
+   */
+  public static int kDLBfloat() {
+    return kDLBfloat;
+  }
+
+  private static final int kDLComplex = (int) 5L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.kDLComplex = 5
+   * }
+   */
+  public static int kDLComplex() {
+    return kDLComplex;
+  }
+
+  private static final int kDLBool = (int) 6L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.kDLBool = 6
+   * }
+   */
+  public static int kDLBool() {
+    return kDLBool;
+  }
+
+  /**
+   * {@snippet lang = c : * typedef cuvsBruteForceIndex *cuvsBruteForceIndex_t
+   * }
+   */
+  public static final AddressLayout cuvsBruteForceIndex_t = BruteForceH.C_POINTER;
+  private static final long _POSIX_C_SOURCE = 200809L;
+
+  /**
+   * {@snippet lang = c : * #define _POSIX_C_SOURCE 200809
+   * }
+   */
+  public static long _POSIX_C_SOURCE() {
+    return _POSIX_C_SOURCE;
+  }
+
+  private static final int __TIMESIZE = (int) 64L;
+
+  /**
+   * {@snippet lang = c : * #define __TIMESIZE 64
+   * }
+   */
+  public static int __TIMESIZE() {
+    return __TIMESIZE;
+  }
+
+  private static final long __STDC_IEC_60559_BFP__ = 201404L;
+
+  /**
+   * {@snippet lang = c : * #define __STDC_IEC_60559_BFP__ 201404
+   * }
+   */
+  public static long __STDC_IEC_60559_BFP__() {
+    return __STDC_IEC_60559_BFP__;
+  }
+
+  private static final long __STDC_IEC_60559_COMPLEX__ = 201404L;
+
+  /**
+   * {@snippet lang = c : * #define __STDC_IEC_60559_COMPLEX__ 201404
+   * }
+   */
+  public static long __STDC_IEC_60559_COMPLEX__() {
+    return __STDC_IEC_60559_COMPLEX__;
+  }
+
+  private static final long __STDC_ISO_10646__ = 201706L;
+
+  /**
+   * {@snippet lang = c : * #define __STDC_ISO_10646__ 201706
+   * }
+   */
+  public static long __STDC_ISO_10646__() {
+    return __STDC_ISO_10646__;
+  }
+
+  private static final int __WCHAR_MAX = (int) 2147483647L;
+
+  /**
+   * {@snippet lang = c : * #define __WCHAR_MAX 2147483647
+   * }
+   */
+  public static int __WCHAR_MAX() {
+    return __WCHAR_MAX;
+  }
+
+  private static final int __WCHAR_MIN = (int) -2147483648L;
+
+  /**
+   * {@snippet lang = c : * #define __WCHAR_MIN -2147483648
+   * }
+   */
+  public static int __WCHAR_MIN() {
+    return __WCHAR_MIN;
+  }
+
+  private static final int INT8_MIN = (int) -128L;
+
+  /**
+   * {@snippet lang = c : * #define INT8_MIN -128
+   * }
+   */
+  public static int INT8_MIN() {
+    return INT8_MIN;
+  }
+
+  private static final int INT16_MIN = (int) -32768L;
+
+  /**
+   * {@snippet lang = c : * #define INT16_MIN -32768
+   * }
+   */
+  public static int INT16_MIN() {
+    return INT16_MIN;
+  }
+
+  private static final int INT32_MIN = (int) -2147483648L;
+
+  /**
+   * {@snippet lang = c : * #define INT32_MIN -2147483648
+   * }
+   */
+  public static int INT32_MIN() {
+    return INT32_MIN;
+  }
+
+  private static final long INT64_MIN = -9223372036854775808L;
+
+  /**
+   * {@snippet lang = c : * #define INT64_MIN -9223372036854775808
+   * }
+   */
+  public static long INT64_MIN() {
+    return INT64_MIN;
+  }
+
+  private static final int INT8_MAX = (int) 127L;
+
+  /**
+   * {@snippet lang = c : * #define INT8_MAX 127
+   * }
+   */
+  public static int INT8_MAX() {
+    return INT8_MAX;
+  }
+
+  private static final int INT16_MAX = (int) 32767L;
+
+  /**
+   * {@snippet lang = c : * #define INT16_MAX 32767
+   * }
+   */
+  public static int INT16_MAX() {
+    return INT16_MAX;
+  }
+
+  private static final int INT32_MAX = (int) 2147483647L;
+
+  /**
+   * {@snippet lang = c : * #define INT32_MAX 2147483647
+   * }
+   */
+  public static int INT32_MAX() {
+    return INT32_MAX;
+  }
+
+  private static final long INT64_MAX = 9223372036854775807L;
+
+  /**
+   * {@snippet lang = c : * #define INT64_MAX 9223372036854775807
+   * }
+   */
+  public static long INT64_MAX() {
+    return INT64_MAX;
+  }
+
+  private static final int UINT8_MAX = (int) 255L;
+
+  /**
+   * {@snippet lang = c : * #define UINT8_MAX 255
+   * }
+   */
+  public static int UINT8_MAX() {
+    return UINT8_MAX;
+  }
+
+  private static final int UINT16_MAX = (int) 65535L;
+
+  /**
+   * {@snippet lang = c : * #define UINT16_MAX 65535
+   * }
+   */
+  public static int UINT16_MAX() {
+    return UINT16_MAX;
+  }
+
+  private static final int UINT32_MAX = (int) 4294967295L;
+
+  /**
+   * {@snippet lang = c : * #define UINT32_MAX 4294967295
+   * }
+   */
+  public static int UINT32_MAX() {
+    return UINT32_MAX;
+  }
+
+  private static final long UINT64_MAX = -1L;
+
+  /**
+   * {@snippet lang = c : * #define UINT64_MAX -1
+   * }
+   */
+  public static long UINT64_MAX() {
+    return UINT64_MAX;
+  }
+
+  private static final int INT_LEAST8_MIN = (int) -128L;
+
+  /**
+   * {@snippet lang = c : * #define INT_LEAST8_MIN -128
+   * }
+   */
+  public static int INT_LEAST8_MIN() {
+    return INT_LEAST8_MIN;
+  }
+
+  private static final int INT_LEAST16_MIN = (int) -32768L;
+
+  /**
+   * {@snippet lang = c : * #define INT_LEAST16_MIN -32768
+   * }
+   */
+  public static int INT_LEAST16_MIN() {
+    return INT_LEAST16_MIN;
+  }
+
+  private static final int INT_LEAST32_MIN = (int) -2147483648L;
+
+  /**
+   * {@snippet lang = c : * #define INT_LEAST32_MIN -2147483648
+   * }
+   */
+  public static int INT_LEAST32_MIN() {
+    return INT_LEAST32_MIN;
+  }
+
+  private static final long INT_LEAST64_MIN = -9223372036854775808L;
+
+  /**
+   * {@snippet lang = c : * #define INT_LEAST64_MIN -9223372036854775808
+   * }
+   */
+  public static long INT_LEAST64_MIN() {
+    return INT_LEAST64_MIN;
+  }
+
+  private static final int INT_LEAST8_MAX = (int) 127L;
+
+  /**
+   * {@snippet lang = c : * #define INT_LEAST8_MAX 127
+   * }
+   */
+  public static int INT_LEAST8_MAX() {
+    return INT_LEAST8_MAX;
+  }
+
+  private static final int INT_LEAST16_MAX = (int) 32767L;
+
+  /**
+   * {@snippet lang = c : * #define INT_LEAST16_MAX 32767
+   * }
+   */
+  public static int INT_LEAST16_MAX() {
+    return INT_LEAST16_MAX;
+  }
+
+  private static final int INT_LEAST32_MAX = (int) 2147483647L;
+
+  /**
+   * {@snippet lang = c : * #define INT_LEAST32_MAX 2147483647
+   * }
+   */
+  public static int INT_LEAST32_MAX() {
+    return INT_LEAST32_MAX;
+  }
+
+  private static final long INT_LEAST64_MAX = 9223372036854775807L;
+
+  /**
+   * {@snippet lang = c : * #define INT_LEAST64_MAX 9223372036854775807
+   * }
+   */
+  public static long INT_LEAST64_MAX() {
+    return INT_LEAST64_MAX;
+  }
+
+  private static final int UINT_LEAST8_MAX = (int) 255L;
+
+  /**
+   * {@snippet lang = c : * #define UINT_LEAST8_MAX 255
+   * }
+   */
+  public static int UINT_LEAST8_MAX() {
+    return UINT_LEAST8_MAX;
+  }
+
+  private static final int UINT_LEAST16_MAX = (int) 65535L;
+
+  /**
+   * {@snippet lang = c : * #define UINT_LEAST16_MAX 65535
+   * }
+   */
+  public static int UINT_LEAST16_MAX() {
+    return UINT_LEAST16_MAX;
+  }
+
+  private static final int UINT_LEAST32_MAX = (int) 4294967295L;
+
+  /**
+   * {@snippet lang = c : * #define UINT_LEAST32_MAX 4294967295
+   * }
+   */
+  public static int UINT_LEAST32_MAX() {
+    return UINT_LEAST32_MAX;
+  }
+
+  private static final long UINT_LEAST64_MAX = -1L;
+
+  /**
+   * {@snippet lang = c : * #define UINT_LEAST64_MAX -1
+   * }
+   */
+  public static long UINT_LEAST64_MAX() {
+    return UINT_LEAST64_MAX;
+  }
+
+  private static final int INT_FAST8_MIN = (int) -128L;
+
+  /**
+   * {@snippet lang = c : * #define INT_FAST8_MIN -128
+   * }
+   */
+  public static int INT_FAST8_MIN() {
+    return INT_FAST8_MIN;
+  }
+
+  private static final long INT_FAST16_MIN = -9223372036854775808L;
+
+  /**
+   * {@snippet lang = c : * #define INT_FAST16_MIN -9223372036854775808
+   * }
+   */
+  public static long INT_FAST16_MIN() {
+    return INT_FAST16_MIN;
+  }
+
+  private static final long INT_FAST32_MIN = -9223372036854775808L;
+
+  /**
+   * {@snippet lang = c : * #define INT_FAST32_MIN -9223372036854775808
+   * }
+   */
+  public static long INT_FAST32_MIN() {
+    return INT_FAST32_MIN;
+  }
+
+  private static final long INT_FAST64_MIN = -9223372036854775808L;
+
+  /**
+   * {@snippet lang = c : * #define INT_FAST64_MIN -9223372036854775808
+   * }
+   */
+  public static long INT_FAST64_MIN() {
+    return INT_FAST64_MIN;
+  }
+
+  private static final int INT_FAST8_MAX = (int) 127L;
+
+  /**
+   * {@snippet lang = c : * #define INT_FAST8_MAX 127
+   * }
+   */
+  public static int INT_FAST8_MAX() {
+    return INT_FAST8_MAX;
+  }
+
+  private static final long INT_FAST16_MAX = 9223372036854775807L;
+
+  /**
+   * {@snippet lang = c : * #define INT_FAST16_MAX 9223372036854775807
+   * }
+   */
+  public static long INT_FAST16_MAX() {
+    return INT_FAST16_MAX;
+  }
+
+  private static final long INT_FAST32_MAX = 9223372036854775807L;
+
+  /**
+   * {@snippet lang = c : * #define INT_FAST32_MAX 9223372036854775807
+   * }
+   */
+  public static long INT_FAST32_MAX() {
+    return INT_FAST32_MAX;
+  }
+
+  private static final long INT_FAST64_MAX = 9223372036854775807L;
+
+  /**
+   * {@snippet lang = c : * #define INT_FAST64_MAX 9223372036854775807
+   * }
+   */
+  public static long INT_FAST64_MAX() {
+    return INT_FAST64_MAX;
+  }
+
+  private static final int UINT_FAST8_MAX = (int) 255L;
+
+  /**
+   * {@snippet lang = c : * #define UINT_FAST8_MAX 255
+   * }
+   */
+  public static int UINT_FAST8_MAX() {
+    return UINT_FAST8_MAX;
+  }
+
+  private static final long UINT_FAST16_MAX = -1L;
+
+  /**
+   * {@snippet lang = c : * #define UINT_FAST16_MAX -1
+   * }
+   */
+  public static long UINT_FAST16_MAX() {
+    return UINT_FAST16_MAX;
+  }
+
+  private static final long UINT_FAST32_MAX = -1L;
+
+  /**
+   * {@snippet lang = c : * #define UINT_FAST32_MAX -1
+   * }
+   */
+  public static long UINT_FAST32_MAX() {
+    return UINT_FAST32_MAX;
+  }
+
+  private static final long UINT_FAST64_MAX = -1L;
+
+  /**
+   * {@snippet lang = c : * #define UINT_FAST64_MAX -1
+   * }
+   */
+  public static long UINT_FAST64_MAX() {
+    return UINT_FAST64_MAX;
+  }
+
+  private static final long INTPTR_MIN = -9223372036854775808L;
+
+  /**
+   * {@snippet lang = c : * #define INTPTR_MIN -9223372036854775808
+   * }
+   */
+  public static long INTPTR_MIN() {
+    return INTPTR_MIN;
+  }
+
+  private static final long INTPTR_MAX = 9223372036854775807L;
+
+  /**
+   * {@snippet lang = c : * #define INTPTR_MAX 9223372036854775807
+   * }
+   */
+  public static long INTPTR_MAX() {
+    return INTPTR_MAX;
+  }
+
+  private static final long UINTPTR_MAX = -1L;
+
+  /**
+   * {@snippet lang = c : * #define UINTPTR_MAX -1
+   * }
+   */
+  public static long UINTPTR_MAX() {
+    return UINTPTR_MAX;
+  }
+
+  private static final long INTMAX_MIN = -9223372036854775808L;
+
+  /**
+   * {@snippet lang = c : * #define INTMAX_MIN -9223372036854775808
+   * }
+   */
+  public static long INTMAX_MIN() {
+    return INTMAX_MIN;
+  }
+
+  private static final long INTMAX_MAX = 9223372036854775807L;
+
+  /**
+   * {@snippet lang = c : * #define INTMAX_MAX 9223372036854775807
+   * }
+   */
+  public static long INTMAX_MAX() {
+    return INTMAX_MAX;
+  }
+
+  private static final long UINTMAX_MAX = -1L;
+
+  /**
+   * {@snippet lang = c : * #define UINTMAX_MAX -1
+   * }
+   */
+  public static long UINTMAX_MAX() {
+    return UINTMAX_MAX;
+  }
+
+  private static final long PTRDIFF_MIN = -9223372036854775808L;
+
+  /**
+   * {@snippet lang = c : * #define PTRDIFF_MIN -9223372036854775808
+   * }
+   */
+  public static long PTRDIFF_MIN() {
+    return PTRDIFF_MIN;
+  }
+
+  private static final long PTRDIFF_MAX = 9223372036854775807L;
+
+  /**
+   * {@snippet lang = c : * #define PTRDIFF_MAX 9223372036854775807
+   * }
+   */
+  public static long PTRDIFF_MAX() {
+    return PTRDIFF_MAX;
+  }
+
+  private static final int SIG_ATOMIC_MIN = (int) -2147483648L;
+
+  /**
+   * {@snippet lang = c : * #define SIG_ATOMIC_MIN -2147483648
+   * }
+   */
+  public static int SIG_ATOMIC_MIN() {
+    return SIG_ATOMIC_MIN;
+  }
+
+  private static final int SIG_ATOMIC_MAX = (int) 2147483647L;
+
+  /**
+   * {@snippet lang = c : * #define SIG_ATOMIC_MAX 2147483647
+   * }
+   */
+  public static int SIG_ATOMIC_MAX() {
+    return SIG_ATOMIC_MAX;
+  }
+
+  private static final long SIZE_MAX = -1L;
+
+  /**
+   * {@snippet lang = c : * #define SIZE_MAX -1
+   * }
+   */
+  public static long SIZE_MAX() {
+    return SIZE_MAX;
+  }
+
+  private static final int WCHAR_MIN = (int) -2147483648L;
+
+  /**
+   * {@snippet lang = c : * #define WCHAR_MIN -2147483648
+   * }
+   */
+  public static int WCHAR_MIN() {
+    return WCHAR_MIN;
+  }
+
+  private static final int WCHAR_MAX = (int) 2147483647L;
+
+  /**
+   * {@snippet lang = c : * #define WCHAR_MAX 2147483647
+   * }
+   */
+  public static int WCHAR_MAX() {
+    return WCHAR_MAX;
+  }
+
+  private static final int WINT_MIN = (int) 0L;
+
+  /**
+   * {@snippet lang = c : * #define WINT_MIN 0
+   * }
+   */
+  public static int WINT_MIN() {
+    return WINT_MIN;
+  }
+
+  private static final int WINT_MAX = (int) 4294967295L;
+
+  /**
+   * {@snippet lang = c : * #define WINT_MAX 4294967295
+   * }
+   */
+  public static int WINT_MAX() {
+    return WINT_MAX;
+  }
+
+  private static final MemorySegment NULL = MemorySegment.ofAddress(0L);
+
+  /**
+   * {@snippet lang = c : * #define NULL (void*) 0
+   * }
+   */
+  public static MemorySegment NULL() {
+    return NULL;
+  }
+}
diff --git a/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/CagraH.java b/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/CagraH.java
new file mode 100644
index 000000000..88ddeb6fe
--- /dev/null
+++ b/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/CagraH.java
@@ -0,0 +1,2298 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.nvidia.cuvs.panama;
+
+import static java.lang.foreign.ValueLayout.JAVA_BYTE;
+
+import java.lang.foreign.AddressLayout;
+import java.lang.foreign.Arena;
+import java.lang.foreign.FunctionDescriptor;
+import java.lang.foreign.GroupLayout;
+import java.lang.foreign.Linker;
+import java.lang.foreign.MemoryLayout;
+import java.lang.foreign.MemorySegment;
+import java.lang.foreign.PaddingLayout;
+import java.lang.foreign.SequenceLayout;
+import java.lang.foreign.StructLayout;
+import java.lang.foreign.SymbolLookup;
+import java.lang.foreign.ValueLayout;
+import java.lang.foreign.ValueLayout.OfByte;
+import java.lang.foreign.ValueLayout.OfInt;
+import java.lang.foreign.ValueLayout.OfLong;
+import java.lang.foreign.ValueLayout.OfShort;
+import java.lang.invoke.MethodHandle;
+import java.lang.invoke.MethodHandles;
+import java.util.Arrays;
+import java.util.stream.Collectors;
+
+public class CagraH {
+
+  CagraH() {
+    // Should not be called directly
+  }
+
+  static final Arena LIBRARY_ARENA = Arena.ofAuto();
+  static final boolean TRACE_DOWNCALLS = Boolean.getBoolean("jextract.trace.downcalls");
+
+  static void traceDowncall(String name, Object... args) {
+    String traceArgs = Arrays.stream(args).map(Object::toString).collect(Collectors.joining(", "));
+    System.out.printf("%s(%s)\n", name, traceArgs);
+  }
+
+  static MemorySegment findOrThrow(String symbol) {
+    return SYMBOL_LOOKUP.find(symbol).orElseThrow(() -> new UnsatisfiedLinkError("unresolved symbol: " + symbol));
+  }
+
+  static MethodHandle upcallHandle(Class<?> fi, String name, FunctionDescriptor fdesc) {
+    try {
+      return MethodHandles.lookup().findVirtual(fi, name, fdesc.toMethodType());
+    } catch (ReflectiveOperationException ex) {
+      throw new AssertionError(ex);
+    }
+  }
+
+  static MemoryLayout align(MemoryLayout layout, long align) {
+    return switch (layout) {
+    case PaddingLayout p -> p;
+    case ValueLayout v -> v.withByteAlignment(align);
+    case GroupLayout g -> {
+      MemoryLayout[] alignedMembers = g.memberLayouts().stream().map(m -> align(m, align)).toArray(MemoryLayout[]::new);
+      yield g instanceof StructLayout ? MemoryLayout.structLayout(alignedMembers)
+          : MemoryLayout.unionLayout(alignedMembers);
+    }
+    case SequenceLayout s -> MemoryLayout.sequenceLayout(s.elementCount(), align(s.elementLayout(), align));
+    };
+  }
+
+  static final SymbolLookup SYMBOL_LOOKUP = SymbolLookup.loaderLookup().or(Linker.nativeLinker().defaultLookup());
+
+  public static final ValueLayout.OfBoolean C_BOOL = ValueLayout.JAVA_BOOLEAN;
+  public static final ValueLayout.OfByte C_CHAR = ValueLayout.JAVA_BYTE;
+  public static final ValueLayout.OfShort C_SHORT = ValueLayout.JAVA_SHORT;
+  public static final ValueLayout.OfInt C_INT = ValueLayout.JAVA_INT;
+  public static final ValueLayout.OfLong C_LONG_LONG = ValueLayout.JAVA_LONG;
+  public static final ValueLayout.OfFloat C_FLOAT = ValueLayout.JAVA_FLOAT;
+  public static final ValueLayout.OfDouble C_DOUBLE = ValueLayout.JAVA_DOUBLE;
+  public static final AddressLayout C_POINTER = ValueLayout.ADDRESS
+      .withTargetLayout(MemoryLayout.sequenceLayout(java.lang.Long.MAX_VALUE, JAVA_BYTE));
+  public static final ValueLayout.OfLong C_LONG = ValueLayout.JAVA_LONG;
+  private static final int DLPACK_VERSION = (int) 80L;
+
+  /**
+   * {@snippet lang = c : * #define DLPACK_VERSION 80
+   * }
+   */
+  public static int DLPACK_VERSION() {
+    return DLPACK_VERSION;
+  }
+
+  private static final int DLPACK_ABI_VERSION = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define DLPACK_ABI_VERSION 1
+   * }
+   */
+  public static int DLPACK_ABI_VERSION() {
+    return DLPACK_ABI_VERSION;
+  }
+
+  private static final int _STDINT_H = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define _STDINT_H 1
+   * }
+   */
+  public static int _STDINT_H() {
+    return _STDINT_H;
+  }
+
+  private static final int _FEATURES_H = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define _FEATURES_H 1
+   * }
+   */
+  public static int _FEATURES_H() {
+    return _FEATURES_H;
+  }
+
+  private static final int _DEFAULT_SOURCE = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define _DEFAULT_SOURCE 1
+   * }
+   */
+  public static int _DEFAULT_SOURCE() {
+    return _DEFAULT_SOURCE;
+  }
+
+  private static final int __GLIBC_USE_ISOC2X = (int) 0L;
+
+  /**
+   * {@snippet lang = c : * #define __GLIBC_USE_ISOC2X 0
+   * }
+   */
+  public static int __GLIBC_USE_ISOC2X() {
+    return __GLIBC_USE_ISOC2X;
+  }
+
+  private static final int __USE_ISOC11 = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __USE_ISOC11 1
+   * }
+   */
+  public static int __USE_ISOC11() {
+    return __USE_ISOC11;
+  }
+
+  private static final int __USE_ISOC99 = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __USE_ISOC99 1
+   * }
+   */
+  public static int __USE_ISOC99() {
+    return __USE_ISOC99;
+  }
+
+  private static final int __USE_ISOC95 = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __USE_ISOC95 1
+   * }
+   */
+  public static int __USE_ISOC95() {
+    return __USE_ISOC95;
+  }
+
+  private static final int __USE_POSIX_IMPLICITLY = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __USE_POSIX_IMPLICITLY 1
+   * }
+   */
+  public static int __USE_POSIX_IMPLICITLY() {
+    return __USE_POSIX_IMPLICITLY;
+  }
+
+  private static final int _POSIX_SOURCE = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define _POSIX_SOURCE 1
+   * }
+   */
+  public static int _POSIX_SOURCE() {
+    return _POSIX_SOURCE;
+  }
+
+  private static final int __USE_POSIX = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __USE_POSIX 1
+   * }
+   */
+  public static int __USE_POSIX() {
+    return __USE_POSIX;
+  }
+
+  private static final int __USE_POSIX2 = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __USE_POSIX2 1
+   * }
+   */
+  public static int __USE_POSIX2() {
+    return __USE_POSIX2;
+  }
+
+  private static final int __USE_POSIX199309 = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __USE_POSIX199309 1
+   * }
+   */
+  public static int __USE_POSIX199309() {
+    return __USE_POSIX199309;
+  }
+
+  private static final int __USE_POSIX199506 = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __USE_POSIX199506 1
+   * }
+   */
+  public static int __USE_POSIX199506() {
+    return __USE_POSIX199506;
+  }
+
+  private static final int __USE_XOPEN2K = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __USE_XOPEN2K 1
+   * }
+   */
+  public static int __USE_XOPEN2K() {
+    return __USE_XOPEN2K;
+  }
+
+  private static final int __USE_XOPEN2K8 = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __USE_XOPEN2K8 1
+   * }
+   */
+  public static int __USE_XOPEN2K8() {
+    return __USE_XOPEN2K8;
+  }
+
+  private static final int _ATFILE_SOURCE = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define _ATFILE_SOURCE 1
+   * }
+   */
+  public static int _ATFILE_SOURCE() {
+    return _ATFILE_SOURCE;
+  }
+
+  private static final int __WORDSIZE = (int) 64L;
+
+  /**
+   * {@snippet lang = c : * #define __WORDSIZE 64
+   * }
+   */
+  public static int __WORDSIZE() {
+    return __WORDSIZE;
+  }
+
+  private static final int __WORDSIZE_TIME64_COMPAT32 = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __WORDSIZE_TIME64_COMPAT32 1
+   * }
+   */
+  public static int __WORDSIZE_TIME64_COMPAT32() {
+    return __WORDSIZE_TIME64_COMPAT32;
+  }
+
+  private static final int __SYSCALL_WORDSIZE = (int) 64L;
+
+  /**
+   * {@snippet lang = c : * #define __SYSCALL_WORDSIZE 64
+   * }
+   */
+  public static int __SYSCALL_WORDSIZE() {
+    return __SYSCALL_WORDSIZE;
+  }
+
+  private static final int __USE_MISC = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __USE_MISC 1
+   * }
+   */
+  public static int __USE_MISC() {
+    return __USE_MISC;
+  }
+
+  private static final int __USE_ATFILE = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __USE_ATFILE 1
+   * }
+   */
+  public static int __USE_ATFILE() {
+    return __USE_ATFILE;
+  }
+
+  private static final int __USE_FORTIFY_LEVEL = (int) 0L;
+
+  /**
+   * {@snippet lang = c : * #define __USE_FORTIFY_LEVEL 0
+   * }
+   */
+  public static int __USE_FORTIFY_LEVEL() {
+    return __USE_FORTIFY_LEVEL;
+  }
+
+  private static final int __GLIBC_USE_DEPRECATED_GETS = (int) 0L;
+
+  /**
+   * {@snippet lang = c : * #define __GLIBC_USE_DEPRECATED_GETS 0
+   * }
+   */
+  public static int __GLIBC_USE_DEPRECATED_GETS() {
+    return __GLIBC_USE_DEPRECATED_GETS;
+  }
+
+  private static final int __GLIBC_USE_DEPRECATED_SCANF = (int) 0L;
+
+  /**
+   * {@snippet lang = c : * #define __GLIBC_USE_DEPRECATED_SCANF 0
+   * }
+   */
+  public static int __GLIBC_USE_DEPRECATED_SCANF() {
+    return __GLIBC_USE_DEPRECATED_SCANF;
+  }
+
+  private static final int _STDC_PREDEF_H = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define _STDC_PREDEF_H 1
+   * }
+   */
+  public static int _STDC_PREDEF_H() {
+    return _STDC_PREDEF_H;
+  }
+
+  private static final int __STDC_IEC_559__ = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __STDC_IEC_559__ 1
+   * }
+   */
+  public static int __STDC_IEC_559__() {
+    return __STDC_IEC_559__;
+  }
+
+  private static final int __STDC_IEC_559_COMPLEX__ = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __STDC_IEC_559_COMPLEX__ 1
+   * }
+   */
+  public static int __STDC_IEC_559_COMPLEX__() {
+    return __STDC_IEC_559_COMPLEX__;
+  }
+
+  private static final int __GNU_LIBRARY__ = (int) 6L;
+
+  /**
+   * {@snippet lang = c : * #define __GNU_LIBRARY__ 6
+   * }
+   */
+  public static int __GNU_LIBRARY__() {
+    return __GNU_LIBRARY__;
+  }
+
+  private static final int __GLIBC__ = (int) 2L;
+
+  /**
+   * {@snippet lang = c : * #define __GLIBC__ 2
+   * }
+   */
+  public static int __GLIBC__() {
+    return __GLIBC__;
+  }
+
+  private static final int __GLIBC_MINOR__ = (int) 35L;
+
+  /**
+   * {@snippet lang = c : * #define __GLIBC_MINOR__ 35
+   * }
+   */
+  public static int __GLIBC_MINOR__() {
+    return __GLIBC_MINOR__;
+  }
+
+  private static final int _SYS_CDEFS_H = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define _SYS_CDEFS_H 1
+   * }
+   */
+  public static int _SYS_CDEFS_H() {
+    return _SYS_CDEFS_H;
+  }
+
+  private static final int __glibc_c99_flexarr_available = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __glibc_c99_flexarr_available 1
+   * }
+   */
+  public static int __glibc_c99_flexarr_available() {
+    return __glibc_c99_flexarr_available;
+  }
+
+  private static final int __LDOUBLE_REDIRECTS_TO_FLOAT128_ABI = (int) 0L;
+
+  /**
+   * {@snippet lang = c : * #define __LDOUBLE_REDIRECTS_TO_FLOAT128_ABI 0
+   * }
+   */
+  public static int __LDOUBLE_REDIRECTS_TO_FLOAT128_ABI() {
+    return __LDOUBLE_REDIRECTS_TO_FLOAT128_ABI;
+  }
+
+  private static final int __HAVE_GENERIC_SELECTION = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __HAVE_GENERIC_SELECTION 1
+   * }
+   */
+  public static int __HAVE_GENERIC_SELECTION() {
+    return __HAVE_GENERIC_SELECTION;
+  }
+
+  private static final int __GLIBC_USE_LIB_EXT2 = (int) 0L;
+
+  /**
+   * {@snippet lang = c : * #define __GLIBC_USE_LIB_EXT2 0
+   * }
+   */
+  public static int __GLIBC_USE_LIB_EXT2() {
+    return __GLIBC_USE_LIB_EXT2;
+  }
+
+  private static final int __GLIBC_USE_IEC_60559_BFP_EXT = (int) 0L;
+
+  /**
+   * {@snippet lang = c : * #define __GLIBC_USE_IEC_60559_BFP_EXT 0
+   * }
+   */
+  public static int __GLIBC_USE_IEC_60559_BFP_EXT() {
+    return __GLIBC_USE_IEC_60559_BFP_EXT;
+  }
+
+  private static final int __GLIBC_USE_IEC_60559_BFP_EXT_C2X = (int) 0L;
+
+  /**
+   * {@snippet lang = c : * #define __GLIBC_USE_IEC_60559_BFP_EXT_C2X 0
+   * }
+   */
+  public static int __GLIBC_USE_IEC_60559_BFP_EXT_C2X() {
+    return __GLIBC_USE_IEC_60559_BFP_EXT_C2X;
+  }
+
+  private static final int __GLIBC_USE_IEC_60559_EXT = (int) 0L;
+
+  /**
+   * {@snippet lang = c : * #define __GLIBC_USE_IEC_60559_EXT 0
+   * }
+   */
+  public static int __GLIBC_USE_IEC_60559_EXT() {
+    return __GLIBC_USE_IEC_60559_EXT;
+  }
+
+  private static final int __GLIBC_USE_IEC_60559_FUNCS_EXT = (int) 0L;
+
+  /**
+   * {@snippet lang = c : * #define __GLIBC_USE_IEC_60559_FUNCS_EXT 0
+   * }
+   */
+  public static int __GLIBC_USE_IEC_60559_FUNCS_EXT() {
+    return __GLIBC_USE_IEC_60559_FUNCS_EXT;
+  }
+
+  private static final int __GLIBC_USE_IEC_60559_FUNCS_EXT_C2X = (int) 0L;
+
+  /**
+   * {@snippet lang = c : * #define __GLIBC_USE_IEC_60559_FUNCS_EXT_C2X 0
+   * }
+   */
+  public static int __GLIBC_USE_IEC_60559_FUNCS_EXT_C2X() {
+    return __GLIBC_USE_IEC_60559_FUNCS_EXT_C2X;
+  }
+
+  private static final int __GLIBC_USE_IEC_60559_TYPES_EXT = (int) 0L;
+
+  /**
+   * {@snippet lang = c : * #define __GLIBC_USE_IEC_60559_TYPES_EXT 0
+   * }
+   */
+  public static int __GLIBC_USE_IEC_60559_TYPES_EXT() {
+    return __GLIBC_USE_IEC_60559_TYPES_EXT;
+  }
+
+  private static final int _BITS_TYPES_H = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define _BITS_TYPES_H 1
+   * }
+   */
+  public static int _BITS_TYPES_H() {
+    return _BITS_TYPES_H;
+  }
+
+  private static final int _BITS_TYPESIZES_H = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define _BITS_TYPESIZES_H 1
+   * }
+   */
+  public static int _BITS_TYPESIZES_H() {
+    return _BITS_TYPESIZES_H;
+  }
+
+  private static final int __OFF_T_MATCHES_OFF64_T = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __OFF_T_MATCHES_OFF64_T 1
+   * }
+   */
+  public static int __OFF_T_MATCHES_OFF64_T() {
+    return __OFF_T_MATCHES_OFF64_T;
+  }
+
+  private static final int __INO_T_MATCHES_INO64_T = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __INO_T_MATCHES_INO64_T 1
+   * }
+   */
+  public static int __INO_T_MATCHES_INO64_T() {
+    return __INO_T_MATCHES_INO64_T;
+  }
+
+  private static final int __RLIM_T_MATCHES_RLIM64_T = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __RLIM_T_MATCHES_RLIM64_T 1
+   * }
+   */
+  public static int __RLIM_T_MATCHES_RLIM64_T() {
+    return __RLIM_T_MATCHES_RLIM64_T;
+  }
+
+  private static final int __STATFS_MATCHES_STATFS64 = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __STATFS_MATCHES_STATFS64 1
+   * }
+   */
+  public static int __STATFS_MATCHES_STATFS64() {
+    return __STATFS_MATCHES_STATFS64;
+  }
+
+  private static final int __KERNEL_OLD_TIMEVAL_MATCHES_TIMEVAL64 = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __KERNEL_OLD_TIMEVAL_MATCHES_TIMEVAL64 1
+   * }
+   */
+  public static int __KERNEL_OLD_TIMEVAL_MATCHES_TIMEVAL64() {
+    return __KERNEL_OLD_TIMEVAL_MATCHES_TIMEVAL64;
+  }
+
+  private static final int __FD_SETSIZE = (int) 1024L;
+
+  /**
+   * {@snippet lang = c : * #define __FD_SETSIZE 1024
+   * }
+   */
+  public static int __FD_SETSIZE() {
+    return __FD_SETSIZE;
+  }
+
+  private static final int _BITS_TIME64_H = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define _BITS_TIME64_H 1
+   * }
+   */
+  public static int _BITS_TIME64_H() {
+    return _BITS_TIME64_H;
+  }
+
+  private static final int _BITS_WCHAR_H = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define _BITS_WCHAR_H 1
+   * }
+   */
+  public static int _BITS_WCHAR_H() {
+    return _BITS_WCHAR_H;
+  }
+
+  private static final int _BITS_STDINT_INTN_H = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define _BITS_STDINT_INTN_H 1
+   * }
+   */
+  public static int _BITS_STDINT_INTN_H() {
+    return _BITS_STDINT_INTN_H;
+  }
+
+  private static final int _BITS_STDINT_UINTN_H = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define _BITS_STDINT_UINTN_H 1
+   * }
+   */
+  public static int _BITS_STDINT_UINTN_H() {
+    return _BITS_STDINT_UINTN_H;
+  }
+
+  private static final int true_ = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define true 1
+   * }
+   */
+  public static int true_() {
+    return true_;
+  }
+
+  private static final int false_ = (int) 0L;
+
+  /**
+   * {@snippet lang = c : * #define false 0
+   * }
+   */
+  public static int false_() {
+    return false_;
+  }
+
+  private static final int __bool_true_false_are_defined = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __bool_true_false_are_defined 1
+   * }
+   */
+  public static int __bool_true_false_are_defined() {
+    return __bool_true_false_are_defined;
+  }
+
+  private static final int L2Expanded = (int) 0L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.L2Expanded = 0
+   * }
+   */
+  public static int L2Expanded() {
+    return L2Expanded;
+  }
+
+  private static final int L2SqrtExpanded = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.L2SqrtExpanded = 1
+   * }
+   */
+  public static int L2SqrtExpanded() {
+    return L2SqrtExpanded;
+  }
+
+  private static final int CosineExpanded = (int) 2L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.CosineExpanded = 2
+   * }
+   */
+  public static int CosineExpanded() {
+    return CosineExpanded;
+  }
+
+  private static final int L1 = (int) 3L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.L1 = 3
+   * }
+   */
+  public static int L1() {
+    return L1;
+  }
+
+  private static final int L2Unexpanded = (int) 4L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.L2Unexpanded = 4
+   * }
+   */
+  public static int L2Unexpanded() {
+    return L2Unexpanded;
+  }
+
+  private static final int L2SqrtUnexpanded = (int) 5L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.L2SqrtUnexpanded = 5
+   * }
+   */
+  public static int L2SqrtUnexpanded() {
+    return L2SqrtUnexpanded;
+  }
+
+  private static final int InnerProduct = (int) 6L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.InnerProduct = 6
+   * }
+   */
+  public static int InnerProduct() {
+    return InnerProduct;
+  }
+
+  private static final int Linf = (int) 7L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.Linf = 7
+   * }
+   */
+  public static int Linf() {
+    return Linf;
+  }
+
+  private static final int Canberra = (int) 8L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.Canberra = 8
+   * }
+   */
+  public static int Canberra() {
+    return Canberra;
+  }
+
+  private static final int LpUnexpanded = (int) 9L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.LpUnexpanded = 9
+   * }
+   */
+  public static int LpUnexpanded() {
+    return LpUnexpanded;
+  }
+
+  private static final int CorrelationExpanded = (int) 10L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.CorrelationExpanded = 10
+   * }
+   */
+  public static int CorrelationExpanded() {
+    return CorrelationExpanded;
+  }
+
+  private static final int JaccardExpanded = (int) 11L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.JaccardExpanded = 11
+   * }
+   */
+  public static int JaccardExpanded() {
+    return JaccardExpanded;
+  }
+
+  private static final int HellingerExpanded = (int) 12L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.HellingerExpanded = 12
+   * }
+   */
+  public static int HellingerExpanded() {
+    return HellingerExpanded;
+  }
+
+  private static final int Haversine = (int) 13L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.Haversine = 13
+   * }
+   */
+  public static int Haversine() {
+    return Haversine;
+  }
+
+  private static final int BrayCurtis = (int) 14L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.BrayCurtis = 14
+   * }
+   */
+  public static int BrayCurtis() {
+    return BrayCurtis;
+  }
+
+  private static final int JensenShannon = (int) 15L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.JensenShannon = 15
+   * }
+   */
+  public static int JensenShannon() {
+    return JensenShannon;
+  }
+
+  private static final int HammingUnexpanded = (int) 16L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.HammingUnexpanded = 16
+   * }
+   */
+  public static int HammingUnexpanded() {
+    return HammingUnexpanded;
+  }
+
+  private static final int KLDivergence = (int) 17L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.KLDivergence = 17
+   * }
+   */
+  public static int KLDivergence() {
+    return KLDivergence;
+  }
+
+  private static final int RusselRaoExpanded = (int) 18L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.RusselRaoExpanded = 18
+   * }
+   */
+  public static int RusselRaoExpanded() {
+    return RusselRaoExpanded;
+  }
+
+  private static final int DiceExpanded = (int) 19L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.DiceExpanded = 19
+   * }
+   */
+  public static int DiceExpanded() {
+    return DiceExpanded;
+  }
+
+  private static final int Precomputed = (int) 100L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.Precomputed = 100
+   * }
+   */
+  public static int Precomputed() {
+    return Precomputed;
+  }
+
+  /**
+   * {@snippet lang = c : * typedef unsigned char __u_char
+   * }
+   */
+  public static final OfByte __u_char = CagraH.C_CHAR;
+  /**
+   * {@snippet lang = c : * typedef unsigned short __u_short
+   * }
+   */
+  public static final OfShort __u_short = CagraH.C_SHORT;
+  /**
+   * {@snippet lang = c : * typedef unsigned int __u_int
+   * }
+   */
+  public static final OfInt __u_int = CagraH.C_INT;
+  /**
+   * {@snippet lang = c : * typedef unsigned long __u_long
+   * }
+   */
+  public static final OfLong __u_long = CagraH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef signed char __int8_t
+   * }
+   */
+  public static final OfByte __int8_t = CagraH.C_CHAR;
+  /**
+   * {@snippet lang = c : * typedef unsigned char __uint8_t
+   * }
+   */
+  public static final OfByte __uint8_t = CagraH.C_CHAR;
+  /**
+   * {@snippet lang = c : * typedef short __int16_t
+   * }
+   */
+  public static final OfShort __int16_t = CagraH.C_SHORT;
+  /**
+   * {@snippet lang = c : * typedef unsigned short __uint16_t
+   * }
+   */
+  public static final OfShort __uint16_t = CagraH.C_SHORT;
+  /**
+   * {@snippet lang = c : * typedef int __int32_t
+   * }
+   */
+  public static final OfInt __int32_t = CagraH.C_INT;
+  /**
+   * {@snippet lang = c : * typedef unsigned int __uint32_t
+   * }
+   */
+  public static final OfInt __uint32_t = CagraH.C_INT;
+  /**
+   * {@snippet lang = c : * typedef long __int64_t
+   * }
+   */
+  public static final OfLong __int64_t = CagraH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned long __uint64_t
+   * }
+   */
+  public static final OfLong __uint64_t = CagraH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef __int8_t __int_least8_t
+   * }
+   */
+  public static final OfByte __int_least8_t = CagraH.C_CHAR;
+  /**
+   * {@snippet lang = c : * typedef __uint8_t __uint_least8_t
+   * }
+   */
+  public static final OfByte __uint_least8_t = CagraH.C_CHAR;
+  /**
+   * {@snippet lang = c : * typedef __int16_t __int_least16_t
+   * }
+   */
+  public static final OfShort __int_least16_t = CagraH.C_SHORT;
+  /**
+   * {@snippet lang = c : * typedef __uint16_t __uint_least16_t
+   * }
+   */
+  public static final OfShort __uint_least16_t = CagraH.C_SHORT;
+  /**
+   * {@snippet lang = c : * typedef __int32_t __int_least32_t
+   * }
+   */
+  public static final OfInt __int_least32_t = CagraH.C_INT;
+  /**
+   * {@snippet lang = c : * typedef __uint32_t __uint_least32_t
+   * }
+   */
+  public static final OfInt __uint_least32_t = CagraH.C_INT;
+  /**
+   * {@snippet lang = c : * typedef __int64_t __int_least64_t
+   * }
+   */
+  public static final OfLong __int_least64_t = CagraH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef __uint64_t __uint_least64_t
+   * }
+   */
+  public static final OfLong __uint_least64_t = CagraH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef long __quad_t
+   * }
+   */
+  public static final OfLong __quad_t = CagraH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned long __u_quad_t
+   * }
+   */
+  public static final OfLong __u_quad_t = CagraH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef long __intmax_t
+   * }
+   */
+  public static final OfLong __intmax_t = CagraH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned long __uintmax_t
+   * }
+   */
+  public static final OfLong __uintmax_t = CagraH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned long __dev_t
+   * }
+   */
+  public static final OfLong __dev_t = CagraH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned int __uid_t
+   * }
+   */
+  public static final OfInt __uid_t = CagraH.C_INT;
+  /**
+   * {@snippet lang = c : * typedef unsigned int __gid_t
+   * }
+   */
+  public static final OfInt __gid_t = CagraH.C_INT;
+  /**
+   * {@snippet lang = c : * typedef unsigned long __ino_t
+   * }
+   */
+  public static final OfLong __ino_t = CagraH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned long __ino64_t
+   * }
+   */
+  public static final OfLong __ino64_t = CagraH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned int __mode_t
+   * }
+   */
+  public static final OfInt __mode_t = CagraH.C_INT;
+  /**
+   * {@snippet lang = c : * typedef unsigned long __nlink_t
+   * }
+   */
+  public static final OfLong __nlink_t = CagraH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef long __off_t
+   * }
+   */
+  public static final OfLong __off_t = CagraH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef long __off64_t
+   * }
+   */
+  public static final OfLong __off64_t = CagraH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef int __pid_t
+   * }
+   */
+  public static final OfInt __pid_t = CagraH.C_INT;
+  /**
+   * {@snippet lang = c : * typedef long __clock_t
+   * }
+   */
+  public static final OfLong __clock_t = CagraH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned long __rlim_t
+   * }
+   */
+  public static final OfLong __rlim_t = CagraH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned long __rlim64_t
+   * }
+   */
+  public static final OfLong __rlim64_t = CagraH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned int __id_t
+   * }
+   */
+  public static final OfInt __id_t = CagraH.C_INT;
+  /**
+   * {@snippet lang = c : * typedef long __time_t
+   * }
+   */
+  public static final OfLong __time_t = CagraH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned int __useconds_t
+   * }
+   */
+  public static final OfInt __useconds_t = CagraH.C_INT;
+  /**
+   * {@snippet lang = c : * typedef long __suseconds_t
+   * }
+   */
+  public static final OfLong __suseconds_t = CagraH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef long __suseconds64_t
+   * }
+   */
+  public static final OfLong __suseconds64_t = CagraH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef int __daddr_t
+   * }
+   */
+  public static final OfInt __daddr_t = CagraH.C_INT;
+  /**
+   * {@snippet lang = c : * typedef int __key_t
+   * }
+   */
+  public static final OfInt __key_t = CagraH.C_INT;
+  /**
+   * {@snippet lang = c : * typedef int __clockid_t
+   * }
+   */
+  public static final OfInt __clockid_t = CagraH.C_INT;
+  /**
+   * {@snippet lang = c : * typedef void *__timer_t
+   * }
+   */
+  public static final AddressLayout __timer_t = CagraH.C_POINTER;
+  /**
+   * {@snippet lang = c : * typedef long __blksize_t
+   * }
+   */
+  public static final OfLong __blksize_t = CagraH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef long __blkcnt_t
+   * }
+   */
+  public static final OfLong __blkcnt_t = CagraH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef long __blkcnt64_t
+   * }
+   */
+  public static final OfLong __blkcnt64_t = CagraH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned long __fsblkcnt_t
+   * }
+   */
+  public static final OfLong __fsblkcnt_t = CagraH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned long __fsblkcnt64_t
+   * }
+   */
+  public static final OfLong __fsblkcnt64_t = CagraH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned long __fsfilcnt_t
+   * }
+   */
+  public static final OfLong __fsfilcnt_t = CagraH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned long __fsfilcnt64_t
+   * }
+   */
+  public static final OfLong __fsfilcnt64_t = CagraH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef long __fsword_t
+   * }
+   */
+  public static final OfLong __fsword_t = CagraH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef long __ssize_t
+   * }
+   */
+  public static final OfLong __ssize_t = CagraH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef long __syscall_slong_t
+   * }
+   */
+  public static final OfLong __syscall_slong_t = CagraH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned long __syscall_ulong_t
+   * }
+   */
+  public static final OfLong __syscall_ulong_t = CagraH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef __off64_t __loff_t
+   * }
+   */
+  public static final OfLong __loff_t = CagraH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef char *__caddr_t
+   * }
+   */
+  public static final AddressLayout __caddr_t = CagraH.C_POINTER;
+  /**
+   * {@snippet lang = c : * typedef long __intptr_t
+   * }
+   */
+  public static final OfLong __intptr_t = CagraH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned int __socklen_t
+   * }
+   */
+  public static final OfInt __socklen_t = CagraH.C_INT;
+  /**
+   * {@snippet lang = c : * typedef int __sig_atomic_t
+   * }
+   */
+  public static final OfInt __sig_atomic_t = CagraH.C_INT;
+  /**
+   * {@snippet lang = c : * typedef __int8_t int8_t
+   * }
+   */
+  public static final OfByte int8_t = CagraH.C_CHAR;
+  /**
+   * {@snippet lang = c : * typedef __int16_t int16_t
+   * }
+   */
+  public static final OfShort int16_t = CagraH.C_SHORT;
+  /**
+   * {@snippet lang = c : * typedef __int32_t int32_t
+   * }
+   */
+  public static final OfInt int32_t = CagraH.C_INT;
+  /**
+   * {@snippet lang = c : * typedef __int64_t int64_t
+   * }
+   */
+  public static final OfLong int64_t = CagraH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef __uint8_t uint8_t
+   * }
+   */
+  public static final OfByte uint8_t = CagraH.C_CHAR;
+  /**
+   * {@snippet lang = c : * typedef __uint16_t uint16_t
+   * }
+   */
+  public static final OfShort uint16_t = CagraH.C_SHORT;
+  /**
+   * {@snippet lang = c : * typedef __uint32_t uint32_t
+   * }
+   */
+  public static final OfInt uint32_t = CagraH.C_INT;
+  /**
+   * {@snippet lang = c : * typedef __uint64_t uint64_t
+   * }
+   */
+  public static final OfLong uint64_t = CagraH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef __int_least8_t int_least8_t
+   * }
+   */
+  public static final OfByte int_least8_t = CagraH.C_CHAR;
+  /**
+   * {@snippet lang = c : * typedef __int_least16_t int_least16_t
+   * }
+   */
+  public static final OfShort int_least16_t = CagraH.C_SHORT;
+  /**
+   * {@snippet lang = c : * typedef __int_least32_t int_least32_t
+   * }
+   */
+  public static final OfInt int_least32_t = CagraH.C_INT;
+  /**
+   * {@snippet lang = c : * typedef __int_least64_t int_least64_t
+   * }
+   */
+  public static final OfLong int_least64_t = CagraH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef __uint_least8_t uint_least8_t
+   * }
+   */
+  public static final OfByte uint_least8_t = CagraH.C_CHAR;
+  /**
+   * {@snippet lang = c : * typedef __uint_least16_t uint_least16_t
+   * }
+   */
+  public static final OfShort uint_least16_t = CagraH.C_SHORT;
+  /**
+   * {@snippet lang = c : * typedef __uint_least32_t uint_least32_t
+   * }
+   */
+  public static final OfInt uint_least32_t = CagraH.C_INT;
+  /**
+   * {@snippet lang = c : * typedef __uint_least64_t uint_least64_t
+   * }
+   */
+  public static final OfLong uint_least64_t = CagraH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef signed char int_fast8_t
+   * }
+   */
+  public static final OfByte int_fast8_t = CagraH.C_CHAR;
+  /**
+   * {@snippet lang = c : * typedef long int_fast16_t
+   * }
+   */
+  public static final OfLong int_fast16_t = CagraH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef long int_fast32_t
+   * }
+   */
+  public static final OfLong int_fast32_t = CagraH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef long int_fast64_t
+   * }
+   */
+  public static final OfLong int_fast64_t = CagraH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned char uint_fast8_t
+   * }
+   */
+  public static final OfByte uint_fast8_t = CagraH.C_CHAR;
+  /**
+   * {@snippet lang = c : * typedef unsigned long uint_fast16_t
+   * }
+   */
+  public static final OfLong uint_fast16_t = CagraH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned long uint_fast32_t
+   * }
+   */
+  public static final OfLong uint_fast32_t = CagraH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned long uint_fast64_t
+   * }
+   */
+  public static final OfLong uint_fast64_t = CagraH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef long intptr_t
+   * }
+   */
+  public static final OfLong intptr_t = CagraH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned long uintptr_t
+   * }
+   */
+  public static final OfLong uintptr_t = CagraH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef __intmax_t intmax_t
+   * }
+   */
+  public static final OfLong intmax_t = CagraH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef __uintmax_t uintmax_t
+   * }
+   */
+  public static final OfLong uintmax_t = CagraH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef long ptrdiff_t
+   * }
+   */
+  public static final OfLong ptrdiff_t = CagraH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned long size_t
+   * }
+   */
+  public static final OfLong size_t = CagraH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef int wchar_t
+   * }
+   */
+  public static final OfInt wchar_t = CagraH.C_INT;
+  private static final int kDLCPU = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.kDLCPU = 1
+   * }
+   */
+  public static int kDLCPU() {
+    return kDLCPU;
+  }
+
+  private static final int kDLCUDA = (int) 2L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.kDLCUDA = 2
+   * }
+   */
+  public static int kDLCUDA() {
+    return kDLCUDA;
+  }
+
+  private static final int kDLCUDAHost = (int) 3L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.kDLCUDAHost = 3
+   * }
+   */
+  public static int kDLCUDAHost() {
+    return kDLCUDAHost;
+  }
+
+  private static final int kDLOpenCL = (int) 4L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.kDLOpenCL = 4
+   * }
+   */
+  public static int kDLOpenCL() {
+    return kDLOpenCL;
+  }
+
+  private static final int kDLVulkan = (int) 7L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.kDLVulkan = 7
+   * }
+   */
+  public static int kDLVulkan() {
+    return kDLVulkan;
+  }
+
+  private static final int kDLMetal = (int) 8L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.kDLMetal = 8
+   * }
+   */
+  public static int kDLMetal() {
+    return kDLMetal;
+  }
+
+  private static final int kDLVPI = (int) 9L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.kDLVPI = 9
+   * }
+   */
+  public static int kDLVPI() {
+    return kDLVPI;
+  }
+
+  private static final int kDLROCM = (int) 10L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.kDLROCM = 10
+   * }
+   */
+  public static int kDLROCM() {
+    return kDLROCM;
+  }
+
+  private static final int kDLROCMHost = (int) 11L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.kDLROCMHost = 11
+   * }
+   */
+  public static int kDLROCMHost() {
+    return kDLROCMHost;
+  }
+
+  private static final int kDLExtDev = (int) 12L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.kDLExtDev = 12
+   * }
+   */
+  public static int kDLExtDev() {
+    return kDLExtDev;
+  }
+
+  private static final int kDLCUDAManaged = (int) 13L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.kDLCUDAManaged = 13
+   * }
+   */
+  public static int kDLCUDAManaged() {
+    return kDLCUDAManaged;
+  }
+
+  private static final int kDLOneAPI = (int) 14L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.kDLOneAPI = 14
+   * }
+   */
+  public static int kDLOneAPI() {
+    return kDLOneAPI;
+  }
+
+  private static final int kDLWebGPU = (int) 15L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.kDLWebGPU = 15
+   * }
+   */
+  public static int kDLWebGPU() {
+    return kDLWebGPU;
+  }
+
+  private static final int kDLHexagon = (int) 16L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.kDLHexagon = 16
+   * }
+   */
+  public static int kDLHexagon() {
+    return kDLHexagon;
+  }
+
+  private static final int kDLInt = (int) 0L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.kDLInt = 0
+   * }
+   */
+  public static int kDLInt() {
+    return kDLInt;
+  }
+
+  private static final int kDLUInt = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.kDLUInt = 1
+   * }
+   */
+  public static int kDLUInt() {
+    return kDLUInt;
+  }
+
+  private static final int kDLFloat = (int) 2L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.kDLFloat = 2
+   * }
+   */
+  public static int kDLFloat() {
+    return kDLFloat;
+  }
+
+  private static final int kDLOpaqueHandle = (int) 3L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.kDLOpaqueHandle = 3
+   * }
+   */
+  public static int kDLOpaqueHandle() {
+    return kDLOpaqueHandle;
+  }
+
+  private static final int kDLBfloat = (int) 4L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.kDLBfloat = 4
+   * }
+   */
+  public static int kDLBfloat() {
+    return kDLBfloat;
+  }
+
+  private static final int kDLComplex = (int) 5L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.kDLComplex = 5
+   * }
+   */
+  public static int kDLComplex() {
+    return kDLComplex;
+  }
+
+  private static final int kDLBool = (int) 6L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.kDLBool = 6
+   * }
+   */
+  public static int kDLBool() {
+    return kDLBool;
+  }
+
+  private static final int AUTO_SELECT = (int) 0L;
+
+  /**
+   * {@snippet lang = c : * enum cuvsCagraGraphBuildAlgo.AUTO_SELECT = 0
+   * }
+   */
+  public static int AUTO_SELECT() {
+    return AUTO_SELECT;
+  }
+
+  private static final int IVF_PQ = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * enum cuvsCagraGraphBuildAlgo.IVF_PQ = 1
+   * }
+   */
+  public static int IVF_PQ() {
+    return IVF_PQ;
+  }
+
+  private static final int NN_DESCENT = (int) 2L;
+
+  /**
+   * {@snippet lang = c : * enum cuvsCagraGraphBuildAlgo.NN_DESCENT = 2
+   * }
+   */
+  public static int NN_DESCENT() {
+    return NN_DESCENT;
+  }
+
+  /**
+   * {@snippet lang = c :
+   * typedef struct cuvsCagraCompressionParams {
+   *     uint32_t pq_bits;
+   *     uint32_t pq_dim;
+   *     uint32_t vq_n_centers;
+   *     uint32_t kmeans_n_iters;
+   *     double vq_kmeans_trainset_fraction;
+   *     double pq_kmeans_trainset_fraction;
+   * } *cuvsCagraCompressionParams_t
+   * }
+   */
+  public static final AddressLayout cuvsCagraCompressionParams_t = CagraH.C_POINTER;
+  /**
+   * {@snippet lang = c :
+   * typedef struct cuvsCagraIndexParams {
+   *     cuvsDistanceType metric;
+   *     long intermediate_graph_degree;
+   *     long graph_degree;
+   *     enum cuvsCagraGraphBuildAlgo build_algo;
+   *     long nn_descent_niter;
+   *     cuvsCagraCompressionParams_t compression;
+   * } *cuvsCagraIndexParams_t
+   * }
+   */
+  public static final AddressLayout cuvsCagraIndexParams_t = CagraH.C_POINTER;
+  private static final int SINGLE_CTA = (int) 0L;
+
+  /**
+   * {@snippet lang = c : * enum cuvsCagraSearchAlgo.SINGLE_CTA = 0
+   * }
+   */
+  public static int SINGLE_CTA() {
+    return SINGLE_CTA;
+  }
+
+  private static final int MULTI_CTA = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * enum cuvsCagraSearchAlgo.MULTI_CTA = 1
+   * }
+   */
+  public static int MULTI_CTA() {
+    return MULTI_CTA;
+  }
+
+  private static final int MULTI_KERNEL = (int) 2L;
+
+  /**
+   * {@snippet lang = c : * enum cuvsCagraSearchAlgo.MULTI_KERNEL = 2
+   * }
+   */
+  public static int MULTI_KERNEL() {
+    return MULTI_KERNEL;
+  }
+
+  private static final int AUTO = (int) 3L;
+
+  /**
+   * {@snippet lang = c : * enum cuvsCagraSearchAlgo.AUTO = 3
+   * }
+   */
+  public static int AUTO() {
+    return AUTO;
+  }
+
+  private static final int HASH = (int) 0L;
+
+  /**
+   * {@snippet lang = c : * enum cuvsCagraHashMode.HASH = 0
+   * }
+   */
+  public static int HASH() {
+    return HASH;
+  }
+
+  private static final int SMALL = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * enum cuvsCagraHashMode.SMALL = 1
+   * }
+   */
+  public static int SMALL() {
+    return SMALL;
+  }
+
+  private static final int AUTO_HASH = (int) 2L;
+
+  /**
+   * {@snippet lang = c : * enum cuvsCagraHashMode.AUTO_HASH = 2
+   * }
+   */
+  public static int AUTO_HASH() {
+    return AUTO_HASH;
+  }
+
+  /**
+   * {@snippet lang = c :
+   * typedef struct cuvsCagraSearchParams {
+   *     long max_queries;
+   *     long itopk_size;
+   *     long max_iterations;
+   *     enum cuvsCagraSearchAlgo algo;
+   *     long team_size;
+   *     long search_width;
+   *     long min_iterations;
+   *     long thread_block_size;
+   *     enum cuvsCagraHashMode hashmap_mode;
+   *     long hashmap_min_bitlen;
+   *     float hashmap_max_fill_rate;
+   *     uint32_t num_random_samplings;
+   *     uint64_t rand_xor_mask;
+   * } *cuvsCagraSearchParams_t
+   * }
+   */
+  public static final AddressLayout cuvsCagraSearchParams_t = CagraH.C_POINTER;
+  /**
+   * {@snippet lang = c : * typedef cuvsCagraIndex *cuvsCagraIndex_t
+   * }
+   */
+  public static final AddressLayout cuvsCagraIndex_t = CagraH.C_POINTER;
+  private static final long _POSIX_C_SOURCE = 200809L;
+
+  /**
+   * {@snippet lang = c : * #define _POSIX_C_SOURCE 200809
+   * }
+   */
+  public static long _POSIX_C_SOURCE() {
+    return _POSIX_C_SOURCE;
+  }
+
+  private static final int __TIMESIZE = (int) 64L;
+
+  /**
+   * {@snippet lang = c : * #define __TIMESIZE 64
+   * }
+   */
+  public static int __TIMESIZE() {
+    return __TIMESIZE;
+  }
+
+  private static final long __STDC_IEC_60559_BFP__ = 201404L;
+
+  /**
+   * {@snippet lang = c : * #define __STDC_IEC_60559_BFP__ 201404
+   * }
+   */
+  public static long __STDC_IEC_60559_BFP__() {
+    return __STDC_IEC_60559_BFP__;
+  }
+
+  private static final long __STDC_IEC_60559_COMPLEX__ = 201404L;
+
+  /**
+   * {@snippet lang = c : * #define __STDC_IEC_60559_COMPLEX__ 201404
+   * }
+   */
+  public static long __STDC_IEC_60559_COMPLEX__() {
+    return __STDC_IEC_60559_COMPLEX__;
+  }
+
+  private static final long __STDC_ISO_10646__ = 201706L;
+
+  /**
+   * {@snippet lang = c : * #define __STDC_ISO_10646__ 201706
+   * }
+   */
+  public static long __STDC_ISO_10646__() {
+    return __STDC_ISO_10646__;
+  }
+
+  private static final int __WCHAR_MAX = (int) 2147483647L;
+
+  /**
+   * {@snippet lang = c : * #define __WCHAR_MAX 2147483647
+   * }
+   */
+  public static int __WCHAR_MAX() {
+    return __WCHAR_MAX;
+  }
+
+  private static final int __WCHAR_MIN = (int) -2147483648L;
+
+  /**
+   * {@snippet lang = c : * #define __WCHAR_MIN -2147483648
+   * }
+   */
+  public static int __WCHAR_MIN() {
+    return __WCHAR_MIN;
+  }
+
+  private static final int INT8_MIN = (int) -128L;
+
+  /**
+   * {@snippet lang = c : * #define INT8_MIN -128
+   * }
+   */
+  public static int INT8_MIN() {
+    return INT8_MIN;
+  }
+
+  private static final int INT16_MIN = (int) -32768L;
+
+  /**
+   * {@snippet lang = c : * #define INT16_MIN -32768
+   * }
+   */
+  public static int INT16_MIN() {
+    return INT16_MIN;
+  }
+
+  private static final int INT32_MIN = (int) -2147483648L;
+
+  /**
+   * {@snippet lang = c : * #define INT32_MIN -2147483648
+   * }
+   */
+  public static int INT32_MIN() {
+    return INT32_MIN;
+  }
+
+  private static final long INT64_MIN = -9223372036854775808L;
+
+  /**
+   * {@snippet lang = c : * #define INT64_MIN -9223372036854775808
+   * }
+   */
+  public static long INT64_MIN() {
+    return INT64_MIN;
+  }
+
+  private static final int INT8_MAX = (int) 127L;
+
+  /**
+   * {@snippet lang = c : * #define INT8_MAX 127
+   * }
+   */
+  public static int INT8_MAX() {
+    return INT8_MAX;
+  }
+
+  private static final int INT16_MAX = (int) 32767L;
+
+  /**
+   * {@snippet lang = c : * #define INT16_MAX 32767
+   * }
+   */
+  public static int INT16_MAX() {
+    return INT16_MAX;
+  }
+
+  private static final int INT32_MAX = (int) 2147483647L;
+
+  /**
+   * {@snippet lang = c : * #define INT32_MAX 2147483647
+   * }
+   */
+  public static int INT32_MAX() {
+    return INT32_MAX;
+  }
+
+  private static final long INT64_MAX = 9223372036854775807L;
+
+  /**
+   * {@snippet lang = c : * #define INT64_MAX 9223372036854775807
+   * }
+   */
+  public static long INT64_MAX() {
+    return INT64_MAX;
+  }
+
+  private static final int UINT8_MAX = (int) 255L;
+
+  /**
+   * {@snippet lang = c : * #define UINT8_MAX 255
+   * }
+   */
+  public static int UINT8_MAX() {
+    return UINT8_MAX;
+  }
+
+  private static final int UINT16_MAX = (int) 65535L;
+
+  /**
+   * {@snippet lang = c : * #define UINT16_MAX 65535
+   * }
+   */
+  public static int UINT16_MAX() {
+    return UINT16_MAX;
+  }
+
+  private static final int UINT32_MAX = (int) 4294967295L;
+
+  /**
+   * {@snippet lang = c : * #define UINT32_MAX 4294967295
+   * }
+   */
+  public static int UINT32_MAX() {
+    return UINT32_MAX;
+  }
+
+  private static final long UINT64_MAX = -1L;
+
+  /**
+   * {@snippet lang = c : * #define UINT64_MAX -1
+   * }
+   */
+  public static long UINT64_MAX() {
+    return UINT64_MAX;
+  }
+
+  private static final int INT_LEAST8_MIN = (int) -128L;
+
+  /**
+   * {@snippet lang = c : * #define INT_LEAST8_MIN -128
+   * }
+   */
+  public static int INT_LEAST8_MIN() {
+    return INT_LEAST8_MIN;
+  }
+
+  private static final int INT_LEAST16_MIN = (int) -32768L;
+
+  /**
+   * {@snippet lang = c : * #define INT_LEAST16_MIN -32768
+   * }
+   */
+  public static int INT_LEAST16_MIN() {
+    return INT_LEAST16_MIN;
+  }
+
+  private static final int INT_LEAST32_MIN = (int) -2147483648L;
+
+  /**
+   * {@snippet lang = c : * #define INT_LEAST32_MIN -2147483648
+   * }
+   */
+  public static int INT_LEAST32_MIN() {
+    return INT_LEAST32_MIN;
+  }
+
+  private static final long INT_LEAST64_MIN = -9223372036854775808L;
+
+  /**
+   * {@snippet lang = c : * #define INT_LEAST64_MIN -9223372036854775808
+   * }
+   */
+  public static long INT_LEAST64_MIN() {
+    return INT_LEAST64_MIN;
+  }
+
+  private static final int INT_LEAST8_MAX = (int) 127L;
+
+  /**
+   * {@snippet lang = c : * #define INT_LEAST8_MAX 127
+   * }
+   */
+  public static int INT_LEAST8_MAX() {
+    return INT_LEAST8_MAX;
+  }
+
+  private static final int INT_LEAST16_MAX = (int) 32767L;
+
+  /**
+   * {@snippet lang = c : * #define INT_LEAST16_MAX 32767
+   * }
+   */
+  public static int INT_LEAST16_MAX() {
+    return INT_LEAST16_MAX;
+  }
+
+  private static final int INT_LEAST32_MAX = (int) 2147483647L;
+
+  /**
+   * {@snippet lang = c : * #define INT_LEAST32_MAX 2147483647
+   * }
+   */
+  public static int INT_LEAST32_MAX() {
+    return INT_LEAST32_MAX;
+  }
+
+  private static final long INT_LEAST64_MAX = 9223372036854775807L;
+
+  /**
+   * {@snippet lang = c : * #define INT_LEAST64_MAX 9223372036854775807
+   * }
+   */
+  public static long INT_LEAST64_MAX() {
+    return INT_LEAST64_MAX;
+  }
+
+  private static final int UINT_LEAST8_MAX = (int) 255L;
+
+  /**
+   * {@snippet lang = c : * #define UINT_LEAST8_MAX 255
+   * }
+   */
+  public static int UINT_LEAST8_MAX() {
+    return UINT_LEAST8_MAX;
+  }
+
+  private static final int UINT_LEAST16_MAX = (int) 65535L;
+
+  /**
+   * {@snippet lang = c : * #define UINT_LEAST16_MAX 65535
+   * }
+   */
+  public static int UINT_LEAST16_MAX() {
+    return UINT_LEAST16_MAX;
+  }
+
+  private static final int UINT_LEAST32_MAX = (int) 4294967295L;
+
+  /**
+   * {@snippet lang = c : * #define UINT_LEAST32_MAX 4294967295
+   * }
+   */
+  public static int UINT_LEAST32_MAX() {
+    return UINT_LEAST32_MAX;
+  }
+
+  private static final long UINT_LEAST64_MAX = -1L;
+
+  /**
+   * {@snippet lang = c : * #define UINT_LEAST64_MAX -1
+   * }
+   */
+  public static long UINT_LEAST64_MAX() {
+    return UINT_LEAST64_MAX;
+  }
+
+  private static final int INT_FAST8_MIN = (int) -128L;
+
+  /**
+   * {@snippet lang = c : * #define INT_FAST8_MIN -128
+   * }
+   */
+  public static int INT_FAST8_MIN() {
+    return INT_FAST8_MIN;
+  }
+
+  private static final long INT_FAST16_MIN = -9223372036854775808L;
+
+  /**
+   * {@snippet lang = c : * #define INT_FAST16_MIN -9223372036854775808
+   * }
+   */
+  public static long INT_FAST16_MIN() {
+    return INT_FAST16_MIN;
+  }
+
+  private static final long INT_FAST32_MIN = -9223372036854775808L;
+
+  /**
+   * {@snippet lang = c : * #define INT_FAST32_MIN -9223372036854775808
+   * }
+   */
+  public static long INT_FAST32_MIN() {
+    return INT_FAST32_MIN;
+  }
+
+  private static final long INT_FAST64_MIN = -9223372036854775808L;
+
+  /**
+   * {@snippet lang = c : * #define INT_FAST64_MIN -9223372036854775808
+   * }
+   */
+  public static long INT_FAST64_MIN() {
+    return INT_FAST64_MIN;
+  }
+
+  private static final int INT_FAST8_MAX = (int) 127L;
+
+  /**
+   * {@snippet lang = c : * #define INT_FAST8_MAX 127
+   * }
+   */
+  public static int INT_FAST8_MAX() {
+    return INT_FAST8_MAX;
+  }
+
+  private static final long INT_FAST16_MAX = 9223372036854775807L;
+
+  /**
+   * {@snippet lang = c : * #define INT_FAST16_MAX 9223372036854775807
+   * }
+   */
+  public static long INT_FAST16_MAX() {
+    return INT_FAST16_MAX;
+  }
+
+  private static final long INT_FAST32_MAX = 9223372036854775807L;
+
+  /**
+   * {@snippet lang = c : * #define INT_FAST32_MAX 9223372036854775807
+   * }
+   */
+  public static long INT_FAST32_MAX() {
+    return INT_FAST32_MAX;
+  }
+
+  private static final long INT_FAST64_MAX = 9223372036854775807L;
+
+  /**
+   * {@snippet lang = c : * #define INT_FAST64_MAX 9223372036854775807
+   * }
+   */
+  public static long INT_FAST64_MAX() {
+    return INT_FAST64_MAX;
+  }
+
+  private static final int UINT_FAST8_MAX = (int) 255L;
+
+  /**
+   * {@snippet lang = c : * #define UINT_FAST8_MAX 255
+   * }
+   */
+  public static int UINT_FAST8_MAX() {
+    return UINT_FAST8_MAX;
+  }
+
+  private static final long UINT_FAST16_MAX = -1L;
+
+  /**
+   * {@snippet lang = c : * #define UINT_FAST16_MAX -1
+   * }
+   */
+  public static long UINT_FAST16_MAX() {
+    return UINT_FAST16_MAX;
+  }
+
+  private static final long UINT_FAST32_MAX = -1L;
+
+  /**
+   * {@snippet lang = c : * #define UINT_FAST32_MAX -1
+   * }
+   */
+  public static long UINT_FAST32_MAX() {
+    return UINT_FAST32_MAX;
+  }
+
+  private static final long UINT_FAST64_MAX = -1L;
+
+  /**
+   * {@snippet lang = c : * #define UINT_FAST64_MAX -1
+   * }
+   */
+  public static long UINT_FAST64_MAX() {
+    return UINT_FAST64_MAX;
+  }
+
+  private static final long INTPTR_MIN = -9223372036854775808L;
+
+  /**
+   * {@snippet lang = c : * #define INTPTR_MIN -9223372036854775808
+   * }
+   */
+  public static long INTPTR_MIN() {
+    return INTPTR_MIN;
+  }
+
+  private static final long INTPTR_MAX = 9223372036854775807L;
+
+  /**
+   * {@snippet lang = c : * #define INTPTR_MAX 9223372036854775807
+   * }
+   */
+  public static long INTPTR_MAX() {
+    return INTPTR_MAX;
+  }
+
+  private static final long UINTPTR_MAX = -1L;
+
+  /**
+   * {@snippet lang = c : * #define UINTPTR_MAX -1
+   * }
+   */
+  public static long UINTPTR_MAX() {
+    return UINTPTR_MAX;
+  }
+
+  private static final long INTMAX_MIN = -9223372036854775808L;
+
+  /**
+   * {@snippet lang = c : * #define INTMAX_MIN -9223372036854775808
+   * }
+   */
+  public static long INTMAX_MIN() {
+    return INTMAX_MIN;
+  }
+
+  private static final long INTMAX_MAX = 9223372036854775807L;
+
+  /**
+   * {@snippet lang = c : * #define INTMAX_MAX 9223372036854775807
+   * }
+   */
+  public static long INTMAX_MAX() {
+    return INTMAX_MAX;
+  }
+
+  private static final long UINTMAX_MAX = -1L;
+
+  /**
+   * {@snippet lang = c : * #define UINTMAX_MAX -1
+   * }
+   */
+  public static long UINTMAX_MAX() {
+    return UINTMAX_MAX;
+  }
+
+  private static final long PTRDIFF_MIN = -9223372036854775808L;
+
+  /**
+   * {@snippet lang = c : * #define PTRDIFF_MIN -9223372036854775808
+   * }
+   */
+  public static long PTRDIFF_MIN() {
+    return PTRDIFF_MIN;
+  }
+
+  private static final long PTRDIFF_MAX = 9223372036854775807L;
+
+  /**
+   * {@snippet lang = c : * #define PTRDIFF_MAX 9223372036854775807
+   * }
+   */
+  public static long PTRDIFF_MAX() {
+    return PTRDIFF_MAX;
+  }
+
+  private static final int SIG_ATOMIC_MIN = (int) -2147483648L;
+
+  /**
+   * {@snippet lang = c : * #define SIG_ATOMIC_MIN -2147483648
+   * }
+   */
+  public static int SIG_ATOMIC_MIN() {
+    return SIG_ATOMIC_MIN;
+  }
+
+  private static final int SIG_ATOMIC_MAX = (int) 2147483647L;
+
+  /**
+   * {@snippet lang = c : * #define SIG_ATOMIC_MAX 2147483647
+   * }
+   */
+  public static int SIG_ATOMIC_MAX() {
+    return SIG_ATOMIC_MAX;
+  }
+
+  private static final long SIZE_MAX = -1L;
+
+  /**
+   * {@snippet lang = c : * #define SIZE_MAX -1
+   * }
+   */
+  public static long SIZE_MAX() {
+    return SIZE_MAX;
+  }
+
+  private static final int WCHAR_MIN = (int) -2147483648L;
+
+  /**
+   * {@snippet lang = c : * #define WCHAR_MIN -2147483648
+   * }
+   */
+  public static int WCHAR_MIN() {
+    return WCHAR_MIN;
+  }
+
+  private static final int WCHAR_MAX = (int) 2147483647L;
+
+  /**
+   * {@snippet lang = c : * #define WCHAR_MAX 2147483647
+   * }
+   */
+  public static int WCHAR_MAX() {
+    return WCHAR_MAX;
+  }
+
+  private static final int WINT_MIN = (int) 0L;
+
+  /**
+   * {@snippet lang = c : * #define WINT_MIN 0
+   * }
+   */
+  public static int WINT_MIN() {
+    return WINT_MIN;
+  }
+
+  private static final int WINT_MAX = (int) 4294967295L;
+
+  /**
+   * {@snippet lang = c : * #define WINT_MAX 4294967295
+   * }
+   */
+  public static int WINT_MAX() {
+    return WINT_MAX;
+  }
+
+  private static final MemorySegment NULL = MemorySegment.ofAddress(0L);
+
+  /**
+   * {@snippet lang = c : * #define NULL (void*) 0
+   * }
+   */
+  public static MemorySegment NULL() {
+    return NULL;
+  }
+}
diff --git a/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/CuVSBruteForceIndex.java b/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/CuVSBruteForceIndex.java
new file mode 100644
index 000000000..8bf29027e
--- /dev/null
+++ b/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/CuVSBruteForceIndex.java
@@ -0,0 +1,183 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.nvidia.cuvs.panama;
+
+import static java.lang.foreign.MemoryLayout.PathElement.groupElement;
+
+import java.lang.foreign.Arena;
+import java.lang.foreign.GroupLayout;
+import java.lang.foreign.MemoryLayout;
+import java.lang.foreign.MemorySegment;
+import java.lang.foreign.SegmentAllocator;
+import java.lang.foreign.ValueLayout.OfLong;
+import java.util.function.Consumer;
+
+/**
+ * {@snippet lang = c :
+ * struct {
+ *     uintptr_t addr;
+ *     DLDataType dtype;
+ * }
+ * }
+ */
+public class CuVSBruteForceIndex {
+
+  CuVSBruteForceIndex() {
+    // Should not be called directly
+  }
+
+  private static final GroupLayout $LAYOUT = MemoryLayout.structLayout(BruteForceH.C_LONG.withName("addr"),
+      DLDataType.layout().withName("dtype"), MemoryLayout.paddingLayout(4)).withName("$anon$22:9");
+
+  /**
+   * The layout of this struct
+   */
+  public static final GroupLayout layout() {
+    return $LAYOUT;
+  }
+
+  private static final OfLong addr$LAYOUT = (OfLong) $LAYOUT.select(groupElement("addr"));
+
+  /**
+   * Layout for field:
+   * {@snippet lang = c : * uintptr_t addr
+   * }
+   */
+  public static final OfLong addr$layout() {
+    return addr$LAYOUT;
+  }
+
+  private static final long addr$OFFSET = 0;
+
+  /**
+   * Offset for field:
+   * {@snippet lang = c : * uintptr_t addr
+   * }
+   */
+  public static final long addr$offset() {
+    return addr$OFFSET;
+  }
+
+  /**
+   * Getter for field:
+   * {@snippet lang = c : * uintptr_t addr
+   * }
+   */
+  public static long addr(MemorySegment struct) {
+    return struct.get(addr$LAYOUT, addr$OFFSET);
+  }
+
+  /**
+   * Setter for field:
+   * {@snippet lang = c : * uintptr_t addr
+   * }
+   */
+  public static void addr(MemorySegment struct, long fieldValue) {
+    struct.set(addr$LAYOUT, addr$OFFSET, fieldValue);
+  }
+
+  private static final GroupLayout dtype$LAYOUT = (GroupLayout) $LAYOUT.select(groupElement("dtype"));
+
+  /**
+   * Layout for field:
+   * {@snippet lang = c : * DLDataType dtype
+   * }
+   */
+  public static final GroupLayout dtype$layout() {
+    return dtype$LAYOUT;
+  }
+
+  private static final long dtype$OFFSET = 8;
+
+  /**
+   * Offset for field:
+   * {@snippet lang = c : * DLDataType dtype
+   * }
+   */
+  public static final long dtype$offset() {
+    return dtype$OFFSET;
+  }
+
+  /**
+   * Getter for field:
+   * {@snippet lang = c : * DLDataType dtype
+   * }
+   */
+  public static MemorySegment dtype(MemorySegment struct) {
+    return struct.asSlice(dtype$OFFSET, dtype$LAYOUT.byteSize());
+  }
+
+  /**
+   * Setter for field:
+   * {@snippet lang = c : * DLDataType dtype
+   * }
+   */
+  public static void dtype(MemorySegment struct, MemorySegment fieldValue) {
+    MemorySegment.copy(fieldValue, 0L, struct, dtype$OFFSET, dtype$LAYOUT.byteSize());
+  }
+
+  /**
+   * Obtains a slice of {@code arrayParam} which selects the array element at
+   * {@code index}. The returned segment has address
+   * {@code arrayParam.address() + index * layout().byteSize()}
+   */
+  public static MemorySegment asSlice(MemorySegment array, long index) {
+    return array.asSlice(layout().byteSize() * index);
+  }
+
+  /**
+   * The size (in bytes) of this struct
+   */
+  public static long sizeof() {
+    return layout().byteSize();
+  }
+
+  /**
+   * Allocate a segment of size {@code layout().byteSize()} using
+   * {@code allocator}
+   */
+  public static MemorySegment allocate(SegmentAllocator allocator) {
+    return allocator.allocate(layout());
+  }
+
+  /**
+   * Allocate an array of size {@code elementCount} using {@code allocator}. The
+   * returned segment has size {@code elementCount * layout().byteSize()}.
+   */
+  public static MemorySegment allocateArray(long elementCount, SegmentAllocator allocator) {
+    return allocator.allocate(MemoryLayout.sequenceLayout(elementCount, layout()));
+  }
+
+  /**
+   * Reinterprets {@code addr} using target {@code arena} and
+   * {@code cleanupAction} (if any). The returned segment has size
+   * {@code layout().byteSize()}
+   */
+  public static MemorySegment reinterpret(MemorySegment addr, Arena arena, Consumer<MemorySegment> cleanup) {
+    return reinterpret(addr, 1, arena, cleanup);
+  }
+
+  /**
+   * Reinterprets {@code addr} using target {@code arena} and
+   * {@code cleanupAction} (if any). The returned segment has size
+   * {@code elementCount * layout().byteSize()}
+   */
+  public static MemorySegment reinterpret(MemorySegment addr, long elementCount, Arena arena,
+      Consumer<MemorySegment> cleanup) {
+    return addr.reinterpret(layout().byteSize() * elementCount, arena, cleanup);
+  }
+}
diff --git a/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/CuVSCagraCompressionParams.java b/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/CuVSCagraCompressionParams.java
new file mode 100644
index 000000000..1fe8eca76
--- /dev/null
+++ b/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/CuVSCagraCompressionParams.java
@@ -0,0 +1,352 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.nvidia.cuvs.panama;
+
+import static java.lang.foreign.MemoryLayout.PathElement.groupElement;
+
+import java.lang.foreign.Arena;
+import java.lang.foreign.GroupLayout;
+import java.lang.foreign.MemoryLayout;
+import java.lang.foreign.MemorySegment;
+import java.lang.foreign.SegmentAllocator;
+import java.lang.foreign.ValueLayout.OfDouble;
+import java.lang.foreign.ValueLayout.OfInt;
+import java.util.function.Consumer;
+
+/**
+ * {@snippet lang = c :
+ * struct cuvsCagraCompressionParams {
+ *     uint32_t pq_bits;
+ *     uint32_t pq_dim;
+ *     uint32_t vq_n_centers;
+ *     uint32_t kmeans_n_iters;
+ *     double vq_kmeans_trainset_fraction;
+ *     double pq_kmeans_trainset_fraction;
+ * }
+ * }
+ */
+public class CuVSCagraCompressionParams {
+
+  CuVSCagraCompressionParams() {
+    // Should not be called directly
+  }
+
+  private static final GroupLayout $LAYOUT = MemoryLayout.structLayout(CagraH.C_INT.withName("pq_bits"),
+      CagraH.C_INT.withName("pq_dim"), CagraH.C_INT.withName("vq_n_centers"),
+      CagraH.C_INT.withName("kmeans_n_iters"), CagraH.C_DOUBLE.withName("vq_kmeans_trainset_fraction"),
+      CagraH.C_DOUBLE.withName("pq_kmeans_trainset_fraction")).withName("cuvsCagraCompressionParams");
+
+  /**
+   * The layout of this struct
+   */
+  public static final GroupLayout layout() {
+    return $LAYOUT;
+  }
+
+  private static final OfInt pq_bits$LAYOUT = (OfInt) $LAYOUT.select(groupElement("pq_bits"));
+
+  /**
+   * Layout for field:
+   * {@snippet lang = c : * uint32_t pq_bits
+   * }
+   */
+  public static final OfInt pq_bits$layout() {
+    return pq_bits$LAYOUT;
+  }
+
+  private static final long pq_bits$OFFSET = 0;
+
+  /**
+   * Offset for field:
+   * {@snippet lang = c : * uint32_t pq_bits
+   * }
+   */
+  public static final long pq_bits$offset() {
+    return pq_bits$OFFSET;
+  }
+
+  /**
+   * Getter for field:
+   * {@snippet lang = c : * uint32_t pq_bits
+   * }
+   */
+  public static int pq_bits(MemorySegment struct) {
+    return struct.get(pq_bits$LAYOUT, pq_bits$OFFSET);
+  }
+
+  /**
+   * Setter for field:
+   * {@snippet lang = c : * uint32_t pq_bits
+   * }
+   */
+  public static void pq_bits(MemorySegment struct, int fieldValue) {
+    struct.set(pq_bits$LAYOUT, pq_bits$OFFSET, fieldValue);
+  }
+
+  private static final OfInt pq_dim$LAYOUT = (OfInt) $LAYOUT.select(groupElement("pq_dim"));
+
+  /**
+   * Layout for field:
+   * {@snippet lang = c : * uint32_t pq_dim
+   * }
+   */
+  public static final OfInt pq_dim$layout() {
+    return pq_dim$LAYOUT;
+  }
+
+  private static final long pq_dim$OFFSET = 4;
+
+  /**
+   * Offset for field:
+   * {@snippet lang = c : * uint32_t pq_dim
+   * }
+   */
+  public static final long pq_dim$offset() {
+    return pq_dim$OFFSET;
+  }
+
+  /**
+   * Getter for field:
+   * {@snippet lang = c : * uint32_t pq_dim
+   * }
+   */
+  public static int pq_dim(MemorySegment struct) {
+    return struct.get(pq_dim$LAYOUT, pq_dim$OFFSET);
+  }
+
+  /**
+   * Setter for field:
+   * {@snippet lang = c : * uint32_t pq_dim
+   * }
+   */
+  public static void pq_dim(MemorySegment struct, int fieldValue) {
+    struct.set(pq_dim$LAYOUT, pq_dim$OFFSET, fieldValue);
+  }
+
+  private static final OfInt vq_n_centers$LAYOUT = (OfInt) $LAYOUT.select(groupElement("vq_n_centers"));
+
+  /**
+   * Layout for field:
+   * {@snippet lang = c : * uint32_t vq_n_centers
+   * }
+   */
+  public static final OfInt vq_n_centers$layout() {
+    return vq_n_centers$LAYOUT;
+  }
+
+  private static final long vq_n_centers$OFFSET = 8;
+
+  /**
+   * Offset for field:
+   * {@snippet lang = c : * uint32_t vq_n_centers
+   * }
+   */
+  public static final long vq_n_centers$offset() {
+    return vq_n_centers$OFFSET;
+  }
+
+  /**
+   * Getter for field:
+   * {@snippet lang = c : * uint32_t vq_n_centers
+   * }
+   */
+  public static int vq_n_centers(MemorySegment struct) {
+    return struct.get(vq_n_centers$LAYOUT, vq_n_centers$OFFSET);
+  }
+
+  /**
+   * Setter for field:
+   * {@snippet lang = c : * uint32_t vq_n_centers
+   * }
+   */
+  public static void vq_n_centers(MemorySegment struct, int fieldValue) {
+    struct.set(vq_n_centers$LAYOUT, vq_n_centers$OFFSET, fieldValue);
+  }
+
+  private static final OfInt kmeans_n_iters$LAYOUT = (OfInt) $LAYOUT.select(groupElement("kmeans_n_iters"));
+
+  /**
+   * Layout for field:
+   * {@snippet lang = c : * uint32_t kmeans_n_iters
+   * }
+   */
+  public static final OfInt kmeans_n_iters$layout() {
+    return kmeans_n_iters$LAYOUT;
+  }
+
+  private static final long kmeans_n_iters$OFFSET = 12;
+
+  /**
+   * Offset for field:
+   * {@snippet lang = c : * uint32_t kmeans_n_iters
+   * }
+   */
+  public static final long kmeans_n_iters$offset() {
+    return kmeans_n_iters$OFFSET;
+  }
+
+  /**
+   * Getter for field:
+   * {@snippet lang = c : * uint32_t kmeans_n_iters
+   * }
+   */
+  public static int kmeans_n_iters(MemorySegment struct) {
+    return struct.get(kmeans_n_iters$LAYOUT, kmeans_n_iters$OFFSET);
+  }
+
+  /**
+   * Setter for field:
+   * {@snippet lang = c : * uint32_t kmeans_n_iters
+   * }
+   */
+  public static void kmeans_n_iters(MemorySegment struct, int fieldValue) {
+    struct.set(kmeans_n_iters$LAYOUT, kmeans_n_iters$OFFSET, fieldValue);
+  }
+
+  private static final OfDouble vq_kmeans_trainset_fraction$LAYOUT = (OfDouble) $LAYOUT
+      .select(groupElement("vq_kmeans_trainset_fraction"));
+
+  /**
+   * Layout for field:
+   * {@snippet lang = c : * double vq_kmeans_trainset_fraction
+   * }
+   */
+  public static final OfDouble vq_kmeans_trainset_fraction$layout() {
+    return vq_kmeans_trainset_fraction$LAYOUT;
+  }
+
+  private static final long vq_kmeans_trainset_fraction$OFFSET = 16;
+
+  /**
+   * Offset for field:
+   * {@snippet lang = c : * double vq_kmeans_trainset_fraction
+   * }
+   */
+  public static final long vq_kmeans_trainset_fraction$offset() {
+    return vq_kmeans_trainset_fraction$OFFSET;
+  }
+
+  /**
+   * Getter for field:
+   * {@snippet lang = c : * double vq_kmeans_trainset_fraction
+   * }
+   */
+  public static double vq_kmeans_trainset_fraction(MemorySegment struct) {
+    return struct.get(vq_kmeans_trainset_fraction$LAYOUT, vq_kmeans_trainset_fraction$OFFSET);
+  }
+
+  /**
+   * Setter for field:
+   * {@snippet lang = c : * double vq_kmeans_trainset_fraction
+   * }
+   */
+  public static void vq_kmeans_trainset_fraction(MemorySegment struct, double fieldValue) {
+    struct.set(vq_kmeans_trainset_fraction$LAYOUT, vq_kmeans_trainset_fraction$OFFSET, fieldValue);
+  }
+
+  private static final OfDouble pq_kmeans_trainset_fraction$LAYOUT = (OfDouble) $LAYOUT
+      .select(groupElement("pq_kmeans_trainset_fraction"));
+
+  /**
+   * Layout for field:
+   * {@snippet lang = c : * double pq_kmeans_trainset_fraction
+   * }
+   */
+  public static final OfDouble pq_kmeans_trainset_fraction$layout() {
+    return pq_kmeans_trainset_fraction$LAYOUT;
+  }
+
+  private static final long pq_kmeans_trainset_fraction$OFFSET = 24;
+
+  /**
+   * Offset for field:
+   * {@snippet lang = c : * double pq_kmeans_trainset_fraction
+   * }
+   */
+  public static final long pq_kmeans_trainset_fraction$offset() {
+    return pq_kmeans_trainset_fraction$OFFSET;
+  }
+
+  /**
+   * Getter for field:
+   * {@snippet lang = c : * double pq_kmeans_trainset_fraction
+   * }
+   */
+  public static double pq_kmeans_trainset_fraction(MemorySegment struct) {
+    return struct.get(pq_kmeans_trainset_fraction$LAYOUT, pq_kmeans_trainset_fraction$OFFSET);
+  }
+
+  /**
+   * Setter for field:
+   * {@snippet lang = c : * double pq_kmeans_trainset_fraction
+   * }
+   */
+  public static void pq_kmeans_trainset_fraction(MemorySegment struct, double fieldValue) {
+    struct.set(pq_kmeans_trainset_fraction$LAYOUT, pq_kmeans_trainset_fraction$OFFSET, fieldValue);
+  }
+
+  /**
+   * Obtains a slice of {@code arrayParam} which selects the array element at
+   * {@code index}. The returned segment has address
+   * {@code arrayParam.address() + index * layout().byteSize()}
+   */
+  public static MemorySegment asSlice(MemorySegment array, long index) {
+    return array.asSlice(layout().byteSize() * index);
+  }
+
+  /**
+   * The size (in bytes) of this struct
+   */
+  public static long sizeof() {
+    return layout().byteSize();
+  }
+
+  /**
+   * Allocate a segment of size {@code layout().byteSize()} using
+   * {@code allocator}
+   */
+  public static MemorySegment allocate(SegmentAllocator allocator) {
+    return allocator.allocate(layout());
+  }
+
+  /**
+   * Allocate an array of size {@code elementCount} using {@code allocator}. The
+   * returned segment has size {@code elementCount * layout().byteSize()}.
+   */
+  public static MemorySegment allocateArray(long elementCount, SegmentAllocator allocator) {
+    return allocator.allocate(MemoryLayout.sequenceLayout(elementCount, layout()));
+  }
+
+  /**
+   * Reinterprets {@code addr} using target {@code arena} and
+   * {@code cleanupAction} (if any). The returned segment has size
+   * {@code layout().byteSize()}
+   */
+  public static MemorySegment reinterpret(MemorySegment addr, Arena arena, Consumer<MemorySegment> cleanup) {
+    return reinterpret(addr, 1, arena, cleanup);
+  }
+
+  /**
+   * Reinterprets {@code addr} using target {@code arena} and
+   * {@code cleanupAction} (if any). The returned segment has size
+   * {@code elementCount * layout().byteSize()}
+   */
+  public static MemorySegment reinterpret(MemorySegment addr, long elementCount, Arena arena,
+      Consumer<MemorySegment> cleanup) {
+    return addr.reinterpret(layout().byteSize() * elementCount, arena, cleanup);
+  }
+}
diff --git a/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/CuVSCagraIndex.java b/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/CuVSCagraIndex.java
new file mode 100644
index 000000000..ec15f6729
--- /dev/null
+++ b/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/CuVSCagraIndex.java
@@ -0,0 +1,183 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.nvidia.cuvs.panama;
+
+import static java.lang.foreign.MemoryLayout.PathElement.groupElement;
+
+import java.lang.foreign.Arena;
+import java.lang.foreign.GroupLayout;
+import java.lang.foreign.MemoryLayout;
+import java.lang.foreign.MemorySegment;
+import java.lang.foreign.SegmentAllocator;
+import java.lang.foreign.ValueLayout.OfLong;
+import java.util.function.Consumer;
+
+/**
+ * {@snippet lang = c :
+ * struct {
+ *     uintptr_t addr;
+ *     DLDataType dtype;
+ * }
+ * }
+ */
+public class CuVSCagraIndex {
+
+  CuVSCagraIndex() {
+    // Should not be called directly
+  }
+
+  private static final GroupLayout $LAYOUT = MemoryLayout.structLayout(CagraH.C_LONG.withName("addr"),
+      DLDataType.layout().withName("dtype"), MemoryLayout.paddingLayout(4)).withName("$anon$175:9");
+
+  /**
+   * The layout of this struct
+   */
+  public static final GroupLayout layout() {
+    return $LAYOUT;
+  }
+
+  private static final OfLong addr$LAYOUT = (OfLong) $LAYOUT.select(groupElement("addr"));
+
+  /**
+   * Layout for field:
+   * {@snippet lang = c : * uintptr_t addr
+   * }
+   */
+  public static final OfLong addr$layout() {
+    return addr$LAYOUT;
+  }
+
+  private static final long addr$OFFSET = 0;
+
+  /**
+   * Offset for field:
+   * {@snippet lang = c : * uintptr_t addr
+   * }
+   */
+  public static final long addr$offset() {
+    return addr$OFFSET;
+  }
+
+  /**
+   * Getter for field:
+   * {@snippet lang = c : * uintptr_t addr
+   * }
+   */
+  public static long addr(MemorySegment struct) {
+    return struct.get(addr$LAYOUT, addr$OFFSET);
+  }
+
+  /**
+   * Setter for field:
+   * {@snippet lang = c : * uintptr_t addr
+   * }
+   */
+  public static void addr(MemorySegment struct, long fieldValue) {
+    struct.set(addr$LAYOUT, addr$OFFSET, fieldValue);
+  }
+
+  private static final GroupLayout dtype$LAYOUT = (GroupLayout) $LAYOUT.select(groupElement("dtype"));
+
+  /**
+   * Layout for field:
+   * {@snippet lang = c : * DLDataType dtype
+   * }
+   */
+  public static final GroupLayout dtype$layout() {
+    return dtype$LAYOUT;
+  }
+
+  private static final long dtype$OFFSET = 8;
+
+  /**
+   * Offset for field:
+   * {@snippet lang = c : * DLDataType dtype
+   * }
+   */
+  public static final long dtype$offset() {
+    return dtype$OFFSET;
+  }
+
+  /**
+   * Getter for field:
+   * {@snippet lang = c : * DLDataType dtype
+   * }
+   */
+  public static MemorySegment dtype(MemorySegment struct) {
+    return struct.asSlice(dtype$OFFSET, dtype$LAYOUT.byteSize());
+  }
+
+  /**
+   * Setter for field:
+   * {@snippet lang = c : * DLDataType dtype
+   * }
+   */
+  public static void dtype(MemorySegment struct, MemorySegment fieldValue) {
+    MemorySegment.copy(fieldValue, 0L, struct, dtype$OFFSET, dtype$LAYOUT.byteSize());
+  }
+
+  /**
+   * Obtains a slice of {@code arrayParam} which selects the array element at
+   * {@code index}. The returned segment has address
+   * {@code arrayParam.address() + index * layout().byteSize()}
+   */
+  public static MemorySegment asSlice(MemorySegment array, long index) {
+    return array.asSlice(layout().byteSize() * index);
+  }
+
+  /**
+   * The size (in bytes) of this struct
+   */
+  public static long sizeof() {
+    return layout().byteSize();
+  }
+
+  /**
+   * Allocate a segment of size {@code layout().byteSize()} using
+   * {@code allocator}
+   */
+  public static MemorySegment allocate(SegmentAllocator allocator) {
+    return allocator.allocate(layout());
+  }
+
+  /**
+   * Allocate an array of size {@code elementCount} using {@code allocator}. The
+   * returned segment has size {@code elementCount * layout().byteSize()}.
+   */
+  public static MemorySegment allocateArray(long elementCount, SegmentAllocator allocator) {
+    return allocator.allocate(MemoryLayout.sequenceLayout(elementCount, layout()));
+  }
+
+  /**
+   * Reinterprets {@code addr} using target {@code arena} and
+   * {@code cleanupAction} (if any). The returned segment has size
+   * {@code layout().byteSize()}
+   */
+  public static MemorySegment reinterpret(MemorySegment addr, Arena arena, Consumer<MemorySegment> cleanup) {
+    return reinterpret(addr, 1, arena, cleanup);
+  }
+
+  /**
+   * Reinterprets {@code addr} using target {@code arena} and
+   * {@code cleanupAction} (if any). The returned segment has size
+   * {@code elementCount * layout().byteSize()}
+   */
+  public static MemorySegment reinterpret(MemorySegment addr, long elementCount, Arena arena,
+      Consumer<MemorySegment> cleanup) {
+    return addr.reinterpret(layout().byteSize() * elementCount, arena, cleanup);
+  }
+}
diff --git a/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/CuVSCagraIndexParams.java b/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/CuVSCagraIndexParams.java
new file mode 100644
index 000000000..1823eacfe
--- /dev/null
+++ b/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/CuVSCagraIndexParams.java
@@ -0,0 +1,354 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.nvidia.cuvs.panama;
+
+import static java.lang.foreign.MemoryLayout.PathElement.groupElement;
+
+import java.lang.foreign.AddressLayout;
+import java.lang.foreign.Arena;
+import java.lang.foreign.GroupLayout;
+import java.lang.foreign.MemoryLayout;
+import java.lang.foreign.MemorySegment;
+import java.lang.foreign.SegmentAllocator;
+import java.lang.foreign.ValueLayout.OfInt;
+import java.lang.foreign.ValueLayout.OfLong;
+import java.util.function.Consumer;
+
+/**
+ * {@snippet lang = c :
+ * struct cuvsCagraIndexParams {
+ *     cuvsDistanceType metric;
+ *     long intermediate_graph_degree;
+ *     long graph_degree;
+ *     enum cuvsCagraGraphBuildAlgo build_algo;
+ *     long nn_descent_niter;
+ *     cuvsCagraCompressionParams_t compression;
+ * }
+ * }
+ */
+public class CuVSCagraIndexParams {
+
+  CuVSCagraIndexParams() {
+    // Should not be called directly
+  }
+
+  private static final GroupLayout $LAYOUT = MemoryLayout
+      .structLayout(CagraH.C_INT.withName("metric"), MemoryLayout.paddingLayout(4),
+          CagraH.C_LONG.withName("intermediate_graph_degree"), CagraH.C_LONG.withName("graph_degree"),
+          CagraH.C_INT.withName("build_algo"), MemoryLayout.paddingLayout(4),
+          CagraH.C_LONG.withName("nn_descent_niter"), CagraH.C_POINTER.withName("compression"))
+      .withName("cuvsCagraIndexParams");
+
+  /**
+   * The layout of this struct
+   */
+  public static final GroupLayout layout() {
+    return $LAYOUT;
+  }
+
+  private static final OfInt metric$LAYOUT = (OfInt) $LAYOUT.select(groupElement("metric"));
+
+  /**
+   * Layout for field:
+   * {@snippet lang = c : * cuvsDistanceType metric
+   * }
+   */
+  public static final OfInt metric$layout() {
+    return metric$LAYOUT;
+  }
+
+  private static final long metric$OFFSET = 0;
+
+  /**
+   * Offset for field:
+   * {@snippet lang = c : * cuvsDistanceType metric
+   * }
+   */
+  public static final long metric$offset() {
+    return metric$OFFSET;
+  }
+
+  /**
+   * Getter for field:
+   * {@snippet lang = c : * cuvsDistanceType metric
+   * }
+   */
+  public static int metric(MemorySegment struct) {
+    return struct.get(metric$LAYOUT, metric$OFFSET);
+  }
+
+  /**
+   * Setter for field:
+   * {@snippet lang = c : * cuvsDistanceType metric
+   * }
+   */
+  public static void metric(MemorySegment struct, int fieldValue) {
+    struct.set(metric$LAYOUT, metric$OFFSET, fieldValue);
+  }
+
+  private static final OfLong intermediate_graph_degree$LAYOUT = (OfLong) $LAYOUT
+      .select(groupElement("intermediate_graph_degree"));
+
+  /**
+   * Layout for field:
+   * {@snippet lang = c : * long intermediate_graph_degree
+   * }
+   */
+  public static final OfLong intermediate_graph_degree$layout() {
+    return intermediate_graph_degree$LAYOUT;
+  }
+
+  private static final long intermediate_graph_degree$OFFSET = 8;
+
+  /**
+   * Offset for field:
+   * {@snippet lang = c : * long intermediate_graph_degree
+   * }
+   */
+  public static final long intermediate_graph_degree$offset() {
+    return intermediate_graph_degree$OFFSET;
+  }
+
+  /**
+   * Getter for field:
+   * {@snippet lang = c : * long intermediate_graph_degree
+   * }
+   */
+  public static long intermediate_graph_degree(MemorySegment struct) {
+    return struct.get(intermediate_graph_degree$LAYOUT, intermediate_graph_degree$OFFSET);
+  }
+
+  /**
+   * Setter for field:
+   * {@snippet lang = c : * long intermediate_graph_degree
+   * }
+   */
+  public static void intermediate_graph_degree(MemorySegment struct, long fieldValue) {
+    struct.set(intermediate_graph_degree$LAYOUT, intermediate_graph_degree$OFFSET, fieldValue);
+  }
+
+  private static final OfLong graph_degree$LAYOUT = (OfLong) $LAYOUT.select(groupElement("graph_degree"));
+
+  /**
+   * Layout for field:
+   * {@snippet lang = c : * long graph_degree
+   * }
+   */
+  public static final OfLong graph_degree$layout() {
+    return graph_degree$LAYOUT;
+  }
+
+  private static final long graph_degree$OFFSET = 16;
+
+  /**
+   * Offset for field:
+   * {@snippet lang = c : * long graph_degree
+   * }
+   */
+  public static final long graph_degree$offset() {
+    return graph_degree$OFFSET;
+  }
+
+  /**
+   * Getter for field:
+   * {@snippet lang = c : * long graph_degree
+   * }
+   */
+  public static long graph_degree(MemorySegment struct) {
+    return struct.get(graph_degree$LAYOUT, graph_degree$OFFSET);
+  }
+
+  /**
+   * Setter for field:
+   * {@snippet lang = c : * long graph_degree
+   * }
+   */
+  public static void graph_degree(MemorySegment struct, long fieldValue) {
+    struct.set(graph_degree$LAYOUT, graph_degree$OFFSET, fieldValue);
+  }
+
+  private static final OfInt build_algo$LAYOUT = (OfInt) $LAYOUT.select(groupElement("build_algo"));
+
+  /**
+   * Layout for field:
+   * {@snippet lang = c : * enum cuvsCagraGraphBuildAlgo build_algo
+   * }
+   */
+  public static final OfInt build_algo$layout() {
+    return build_algo$LAYOUT;
+  }
+
+  private static final long build_algo$OFFSET = 24;
+
+  /**
+   * Offset for field:
+   * {@snippet lang = c : * enum cuvsCagraGraphBuildAlgo build_algo
+   * }
+   */
+  public static final long build_algo$offset() {
+    return build_algo$OFFSET;
+  }
+
+  /**
+   * Getter for field:
+   * {@snippet lang = c : * enum cuvsCagraGraphBuildAlgo build_algo
+   * }
+   */
+  public static int build_algo(MemorySegment struct) {
+    return struct.get(build_algo$LAYOUT, build_algo$OFFSET);
+  }
+
+  /**
+   * Setter for field:
+   * {@snippet lang = c : * enum cuvsCagraGraphBuildAlgo build_algo
+   * }
+   */
+  public static void build_algo(MemorySegment struct, int fieldValue) {
+    struct.set(build_algo$LAYOUT, build_algo$OFFSET, fieldValue);
+  }
+
+  private static final OfLong nn_descent_niter$LAYOUT = (OfLong) $LAYOUT.select(groupElement("nn_descent_niter"));
+
+  /**
+   * Layout for field:
+   * {@snippet lang = c : * long nn_descent_niter
+   * }
+   */
+  public static final OfLong nn_descent_niter$layout() {
+    return nn_descent_niter$LAYOUT;
+  }
+
+  private static final long nn_descent_niter$OFFSET = 32;
+
+  /**
+   * Offset for field:
+   * {@snippet lang = c : * long nn_descent_niter
+   * }
+   */
+  public static final long nn_descent_niter$offset() {
+    return nn_descent_niter$OFFSET;
+  }
+
+  /**
+   * Getter for field:
+   * {@snippet lang = c : * long nn_descent_niter
+   * }
+   */
+  public static long nn_descent_niter(MemorySegment struct) {
+    return struct.get(nn_descent_niter$LAYOUT, nn_descent_niter$OFFSET);
+  }
+
+  /**
+   * Setter for field:
+   * {@snippet lang = c : * long nn_descent_niter
+   * }
+   */
+  public static void nn_descent_niter(MemorySegment struct, long fieldValue) {
+    struct.set(nn_descent_niter$LAYOUT, nn_descent_niter$OFFSET, fieldValue);
+  }
+
+  private static final AddressLayout compression$LAYOUT = (AddressLayout) $LAYOUT.select(groupElement("compression"));
+
+  /**
+   * Layout for field:
+   * {@snippet lang = c : * cuvsCagraCompressionParams_t compression
+   * }
+   */
+  public static final AddressLayout compression$layout() {
+    return compression$LAYOUT;
+  }
+
+  private static final long compression$OFFSET = 40;
+
+  /**
+   * Offset for field:
+   * {@snippet lang = c : * cuvsCagraCompressionParams_t compression
+   * }
+   */
+  public static final long compression$offset() {
+    return compression$OFFSET;
+  }
+
+  /**
+   * Getter for field:
+   * {@snippet lang = c : * cuvsCagraCompressionParams_t compression
+   * }
+   */
+  public static MemorySegment compression(MemorySegment struct) {
+    return struct.get(compression$LAYOUT, compression$OFFSET);
+  }
+
+  /**
+   * Setter for field:
+   * {@snippet lang = c : * cuvsCagraCompressionParams_t compression
+   * }
+   */
+  public static void compression(MemorySegment struct, MemorySegment fieldValue) {
+    struct.set(compression$LAYOUT, compression$OFFSET, fieldValue);
+  }
+
+  /**
+   * Obtains a slice of {@code arrayParam} which selects the array element at
+   * {@code index}. The returned segment has address
+   * {@code arrayParam.address() + index * layout().byteSize()}
+   */
+  public static MemorySegment asSlice(MemorySegment array, long index) {
+    return array.asSlice(layout().byteSize() * index);
+  }
+
+  /**
+   * The size (in bytes) of this struct
+   */
+  public static long sizeof() {
+    return layout().byteSize();
+  }
+
+  /**
+   * Allocate a segment of size {@code layout().byteSize()} using
+   * {@code allocator}
+   */
+  public static MemorySegment allocate(SegmentAllocator allocator) {
+    return allocator.allocate(layout());
+  }
+
+  /**
+   * Allocate an array of size {@code elementCount} using {@code allocator}. The
+   * returned segment has size {@code elementCount * layout().byteSize()}.
+   */
+  public static MemorySegment allocateArray(long elementCount, SegmentAllocator allocator) {
+    return allocator.allocate(MemoryLayout.sequenceLayout(elementCount, layout()));
+  }
+
+  /**
+   * Reinterprets {@code addr} using target {@code arena} and
+   * {@code cleanupAction} (if any). The returned segment has size
+   * {@code layout().byteSize()}
+   */
+  public static MemorySegment reinterpret(MemorySegment addr, Arena arena, Consumer<MemorySegment> cleanup) {
+    return reinterpret(addr, 1, arena, cleanup);
+  }
+
+  /**
+   * Reinterprets {@code addr} using target {@code arena} and
+   * {@code cleanupAction} (if any). The returned segment has size
+   * {@code elementCount * layout().byteSize()}
+   */
+  public static MemorySegment reinterpret(MemorySegment addr, long elementCount, Arena arena,
+      Consumer<MemorySegment> cleanup) {
+    return addr.reinterpret(layout().byteSize() * elementCount, arena, cleanup);
+  }
+}
diff --git a/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/CuVSCagraSearchParams.java b/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/CuVSCagraSearchParams.java
new file mode 100644
index 000000000..ec59284d9
--- /dev/null
+++ b/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/CuVSCagraSearchParams.java
@@ -0,0 +1,644 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.nvidia.cuvs.panama;
+
+import static java.lang.foreign.MemoryLayout.PathElement.groupElement;
+
+import java.lang.foreign.Arena;
+import java.lang.foreign.GroupLayout;
+import java.lang.foreign.MemoryLayout;
+import java.lang.foreign.MemorySegment;
+import java.lang.foreign.SegmentAllocator;
+import java.lang.foreign.ValueLayout.OfFloat;
+import java.lang.foreign.ValueLayout.OfInt;
+import java.lang.foreign.ValueLayout.OfLong;
+import java.util.function.Consumer;
+
+/**
+ * {@snippet lang = c :
+ * struct cuvsCagraSearchParams {
+ *     long max_queries;
+ *     long itopk_size;
+ *     long max_iterations;
+ *     enum cuvsCagraSearchAlgo algo;
+ *     long team_size;
+ *     long search_width;
+ *     long min_iterations;
+ *     long thread_block_size;
+ *     enum cuvsCagraHashMode hashmap_mode;
+ *     long hashmap_min_bitlen;
+ *     float hashmap_max_fill_rate;
+ *     uint32_t num_random_samplings;
+ *     uint64_t rand_xor_mask;
+ * }
+ * }
+ */
+public class CuVSCagraSearchParams {
+
+  CuVSCagraSearchParams() {
+    // Should not be called directly
+  }
+
+  private static final GroupLayout $LAYOUT = MemoryLayout
+      .structLayout(CagraH.C_LONG.withName("max_queries"), CagraH.C_LONG.withName("itopk_size"),
+          CagraH.C_LONG.withName("max_iterations"), CagraH.C_INT.withName("algo"), MemoryLayout.paddingLayout(4),
+          CagraH.C_LONG.withName("team_size"), CagraH.C_LONG.withName("search_width"),
+          CagraH.C_LONG.withName("min_iterations"), CagraH.C_LONG.withName("thread_block_size"),
+          CagraH.C_INT.withName("hashmap_mode"), MemoryLayout.paddingLayout(4),
+          CagraH.C_LONG.withName("hashmap_min_bitlen"), CagraH.C_FLOAT.withName("hashmap_max_fill_rate"),
+          CagraH.C_INT.withName("num_random_samplings"), CagraH.C_LONG.withName("rand_xor_mask"))
+      .withName("cuvsCagraSearchParams");
+
+  /**
+   * The layout of this struct
+   */
+  public static final GroupLayout layout() {
+    return $LAYOUT;
+  }
+
+  private static final OfLong max_queries$LAYOUT = (OfLong) $LAYOUT.select(groupElement("max_queries"));
+
+  /**
+   * Layout for field:
+   * {@snippet lang = c : * long max_queries
+   * }
+   */
+  public static final OfLong max_queries$layout() {
+    return max_queries$LAYOUT;
+  }
+
+  private static final long max_queries$OFFSET = 0;
+
+  /**
+   * Offset for field:
+   * {@snippet lang = c : * long max_queries
+   * }
+   */
+  public static final long max_queries$offset() {
+    return max_queries$OFFSET;
+  }
+
+  /**
+   * Getter for field:
+   * {@snippet lang = c : * long max_queries
+   * }
+   */
+  public static long max_queries(MemorySegment struct) {
+    return struct.get(max_queries$LAYOUT, max_queries$OFFSET);
+  }
+
+  /**
+   * Setter for field:
+   * {@snippet lang = c : * long max_queries
+   * }
+   */
+  public static void max_queries(MemorySegment struct, long fieldValue) {
+    struct.set(max_queries$LAYOUT, max_queries$OFFSET, fieldValue);
+  }
+
+  private static final OfLong itopk_size$LAYOUT = (OfLong) $LAYOUT.select(groupElement("itopk_size"));
+
+  /**
+   * Layout for field:
+   * {@snippet lang = c : * long itopk_size
+   * }
+   */
+  public static final OfLong itopk_size$layout() {
+    return itopk_size$LAYOUT;
+  }
+
+  private static final long itopk_size$OFFSET = 8;
+
+  /**
+   * Offset for field:
+   * {@snippet lang = c : * long itopk_size
+   * }
+   */
+  public static final long itopk_size$offset() {
+    return itopk_size$OFFSET;
+  }
+
+  /**
+   * Getter for field:
+   * {@snippet lang = c : * long itopk_size
+   * }
+   */
+  public static long itopk_size(MemorySegment struct) {
+    return struct.get(itopk_size$LAYOUT, itopk_size$OFFSET);
+  }
+
+  /**
+   * Setter for field:
+   * {@snippet lang = c : * long itopk_size
+   * }
+   */
+  public static void itopk_size(MemorySegment struct, long fieldValue) {
+    struct.set(itopk_size$LAYOUT, itopk_size$OFFSET, fieldValue);
+  }
+
+  private static final OfLong max_iterations$LAYOUT = (OfLong) $LAYOUT.select(groupElement("max_iterations"));
+
+  /**
+   * Layout for field:
+   * {@snippet lang = c : * long max_iterations
+   * }
+   */
+  public static final OfLong max_iterations$layout() {
+    return max_iterations$LAYOUT;
+  }
+
+  private static final long max_iterations$OFFSET = 16;
+
+  /**
+   * Offset for field:
+   * {@snippet lang = c : * long max_iterations
+   * }
+   */
+  public static final long max_iterations$offset() {
+    return max_iterations$OFFSET;
+  }
+
+  /**
+   * Getter for field:
+   * {@snippet lang = c : * long max_iterations
+   * }
+   */
+  public static long max_iterations(MemorySegment struct) {
+    return struct.get(max_iterations$LAYOUT, max_iterations$OFFSET);
+  }
+
+  /**
+   * Setter for field:
+   * {@snippet lang = c : * long max_iterations
+   * }
+   */
+  public static void max_iterations(MemorySegment struct, long fieldValue) {
+    struct.set(max_iterations$LAYOUT, max_iterations$OFFSET, fieldValue);
+  }
+
+  private static final OfInt algo$LAYOUT = (OfInt) $LAYOUT.select(groupElement("algo"));
+
+  /**
+   * Layout for field:
+   * {@snippet lang = c : * enum cuvsCagraSearchAlgo algo
+   * }
+   */
+  public static final OfInt algo$layout() {
+    return algo$LAYOUT;
+  }
+
+  private static final long algo$OFFSET = 24;
+
+  /**
+   * Offset for field:
+   * {@snippet lang = c : * enum cuvsCagraSearchAlgo algo
+   * }
+   */
+  public static final long algo$offset() {
+    return algo$OFFSET;
+  }
+
+  /**
+   * Getter for field:
+   * {@snippet lang = c : * enum cuvsCagraSearchAlgo algo
+   * }
+   */
+  public static int algo(MemorySegment struct) {
+    return struct.get(algo$LAYOUT, algo$OFFSET);
+  }
+
+  /**
+   * Setter for field:
+   * {@snippet lang = c : * enum cuvsCagraSearchAlgo algo
+   * }
+   */
+  public static void algo(MemorySegment struct, int fieldValue) {
+    struct.set(algo$LAYOUT, algo$OFFSET, fieldValue);
+  }
+
+  private static final OfLong team_size$LAYOUT = (OfLong) $LAYOUT.select(groupElement("team_size"));
+
+  /**
+   * Layout for field:
+   * {@snippet lang = c : * long team_size
+   * }
+   */
+  public static final OfLong team_size$layout() {
+    return team_size$LAYOUT;
+  }
+
+  private static final long team_size$OFFSET = 32;
+
+  /**
+   * Offset for field:
+   * {@snippet lang = c : * long team_size
+   * }
+   */
+  public static final long team_size$offset() {
+    return team_size$OFFSET;
+  }
+
+  /**
+   * Getter for field:
+   * {@snippet lang = c : * long team_size
+   * }
+   */
+  public static long team_size(MemorySegment struct) {
+    return struct.get(team_size$LAYOUT, team_size$OFFSET);
+  }
+
+  /**
+   * Setter for field:
+   * {@snippet lang = c : * long team_size
+   * }
+   */
+  public static void team_size(MemorySegment struct, long fieldValue) {
+    struct.set(team_size$LAYOUT, team_size$OFFSET, fieldValue);
+  }
+
+  private static final OfLong search_width$LAYOUT = (OfLong) $LAYOUT.select(groupElement("search_width"));
+
+  /**
+   * Layout for field:
+   * {@snippet lang = c : * long search_width
+   * }
+   */
+  public static final OfLong search_width$layout() {
+    return search_width$LAYOUT;
+  }
+
+  private static final long search_width$OFFSET = 40;
+
+  /**
+   * Offset for field:
+   * {@snippet lang = c : * long search_width
+   * }
+   */
+  public static final long search_width$offset() {
+    return search_width$OFFSET;
+  }
+
+  /**
+   * Getter for field:
+   * {@snippet lang = c : * long search_width
+   * }
+   */
+  public static long search_width(MemorySegment struct) {
+    return struct.get(search_width$LAYOUT, search_width$OFFSET);
+  }
+
+  /**
+   * Setter for field:
+   * {@snippet lang = c : * long search_width
+   * }
+   */
+  public static void search_width(MemorySegment struct, long fieldValue) {
+    struct.set(search_width$LAYOUT, search_width$OFFSET, fieldValue);
+  }
+
+  private static final OfLong min_iterations$LAYOUT = (OfLong) $LAYOUT.select(groupElement("min_iterations"));
+
+  /**
+   * Layout for field:
+   * {@snippet lang = c : * long min_iterations
+   * }
+   */
+  public static final OfLong min_iterations$layout() {
+    return min_iterations$LAYOUT;
+  }
+
+  private static final long min_iterations$OFFSET = 48;
+
+  /**
+   * Offset for field:
+   * {@snippet lang = c : * long min_iterations
+   * }
+   */
+  public static final long min_iterations$offset() {
+    return min_iterations$OFFSET;
+  }
+
+  /**
+   * Getter for field:
+   * {@snippet lang = c : * long min_iterations
+   * }
+   */
+  public static long min_iterations(MemorySegment struct) {
+    return struct.get(min_iterations$LAYOUT, min_iterations$OFFSET);
+  }
+
+  /**
+   * Setter for field:
+   * {@snippet lang = c : * long min_iterations
+   * }
+   */
+  public static void min_iterations(MemorySegment struct, long fieldValue) {
+    struct.set(min_iterations$LAYOUT, min_iterations$OFFSET, fieldValue);
+  }
+
+  private static final OfLong thread_block_size$LAYOUT = (OfLong) $LAYOUT.select(groupElement("thread_block_size"));
+
+  /**
+   * Layout for field:
+   * {@snippet lang = c : * long thread_block_size
+   * }
+   */
+  public static final OfLong thread_block_size$layout() {
+    return thread_block_size$LAYOUT;
+  }
+
+  private static final long thread_block_size$OFFSET = 56;
+
+  /**
+   * Offset for field:
+   * {@snippet lang = c : * long thread_block_size
+   * }
+   */
+  public static final long thread_block_size$offset() {
+    return thread_block_size$OFFSET;
+  }
+
+  /**
+   * Getter for field:
+   * {@snippet lang = c : * long thread_block_size
+   * }
+   */
+  public static long thread_block_size(MemorySegment struct) {
+    return struct.get(thread_block_size$LAYOUT, thread_block_size$OFFSET);
+  }
+
+  /**
+   * Setter for field:
+   * {@snippet lang = c : * long thread_block_size
+   * }
+   */
+  public static void thread_block_size(MemorySegment struct, long fieldValue) {
+    struct.set(thread_block_size$LAYOUT, thread_block_size$OFFSET, fieldValue);
+  }
+
+  private static final OfInt hashmap_mode$LAYOUT = (OfInt) $LAYOUT.select(groupElement("hashmap_mode"));
+
+  /**
+   * Layout for field:
+   * {@snippet lang = c : * enum cuvsCagraHashMode hashmap_mode
+   * }
+   */
+  public static final OfInt hashmap_mode$layout() {
+    return hashmap_mode$LAYOUT;
+  }
+
+  private static final long hashmap_mode$OFFSET = 64;
+
+  /**
+   * Offset for field:
+   * {@snippet lang = c : * enum cuvsCagraHashMode hashmap_mode
+   * }
+   */
+  public static final long hashmap_mode$offset() {
+    return hashmap_mode$OFFSET;
+  }
+
+  /**
+   * Getter for field:
+   * {@snippet lang = c : * enum cuvsCagraHashMode hashmap_mode
+   * }
+   */
+  public static int hashmap_mode(MemorySegment struct) {
+    return struct.get(hashmap_mode$LAYOUT, hashmap_mode$OFFSET);
+  }
+
+  /**
+   * Setter for field:
+   * {@snippet lang = c : * enum cuvsCagraHashMode hashmap_mode
+   * }
+   */
+  public static void hashmap_mode(MemorySegment struct, int fieldValue) {
+    struct.set(hashmap_mode$LAYOUT, hashmap_mode$OFFSET, fieldValue);
+  }
+
+  private static final OfLong hashmap_min_bitlen$LAYOUT = (OfLong) $LAYOUT.select(groupElement("hashmap_min_bitlen"));
+
+  /**
+   * Layout for field:
+   * {@snippet lang = c : * long hashmap_min_bitlen
+   * }
+   */
+  public static final OfLong hashmap_min_bitlen$layout() {
+    return hashmap_min_bitlen$LAYOUT;
+  }
+
+  private static final long hashmap_min_bitlen$OFFSET = 72;
+
+  /**
+   * Offset for field:
+   * {@snippet lang = c : * long hashmap_min_bitlen
+   * }
+   */
+  public static final long hashmap_min_bitlen$offset() {
+    return hashmap_min_bitlen$OFFSET;
+  }
+
+  /**
+   * Getter for field:
+   * {@snippet lang = c : * long hashmap_min_bitlen
+   * }
+   */
+  public static long hashmap_min_bitlen(MemorySegment struct) {
+    return struct.get(hashmap_min_bitlen$LAYOUT, hashmap_min_bitlen$OFFSET);
+  }
+
+  /**
+   * Setter for field:
+   * {@snippet lang = c : * long hashmap_min_bitlen
+   * }
+   */
+  public static void hashmap_min_bitlen(MemorySegment struct, long fieldValue) {
+    struct.set(hashmap_min_bitlen$LAYOUT, hashmap_min_bitlen$OFFSET, fieldValue);
+  }
+
+  private static final OfFloat hashmap_max_fill_rate$LAYOUT = (OfFloat) $LAYOUT
+      .select(groupElement("hashmap_max_fill_rate"));
+
+  /**
+   * Layout for field:
+   * {@snippet lang = c : * float hashmap_max_fill_rate
+   * }
+   */
+  public static final OfFloat hashmap_max_fill_rate$layout() {
+    return hashmap_max_fill_rate$LAYOUT;
+  }
+
+  private static final long hashmap_max_fill_rate$OFFSET = 80;
+
+  /**
+   * Offset for field:
+   * {@snippet lang = c : * float hashmap_max_fill_rate
+   * }
+   */
+  public static final long hashmap_max_fill_rate$offset() {
+    return hashmap_max_fill_rate$OFFSET;
+  }
+
+  /**
+   * Getter for field:
+   * {@snippet lang = c : * float hashmap_max_fill_rate
+   * }
+   */
+  public static float hashmap_max_fill_rate(MemorySegment struct) {
+    return struct.get(hashmap_max_fill_rate$LAYOUT, hashmap_max_fill_rate$OFFSET);
+  }
+
+  /**
+   * Setter for field:
+   * {@snippet lang = c : * float hashmap_max_fill_rate
+   * }
+   */
+  public static void hashmap_max_fill_rate(MemorySegment struct, float fieldValue) {
+    struct.set(hashmap_max_fill_rate$LAYOUT, hashmap_max_fill_rate$OFFSET, fieldValue);
+  }
+
+  private static final OfInt num_random_samplings$LAYOUT = (OfInt) $LAYOUT.select(groupElement("num_random_samplings"));
+
+  /**
+   * Layout for field:
+   * {@snippet lang = c : * uint32_t num_random_samplings
+   * }
+   */
+  public static final OfInt num_random_samplings$layout() {
+    return num_random_samplings$LAYOUT;
+  }
+
+  private static final long num_random_samplings$OFFSET = 84;
+
+  /**
+   * Offset for field:
+   * {@snippet lang = c : * uint32_t num_random_samplings
+   * }
+   */
+  public static final long num_random_samplings$offset() {
+    return num_random_samplings$OFFSET;
+  }
+
+  /**
+   * Getter for field:
+   * {@snippet lang = c : * uint32_t num_random_samplings
+   * }
+   */
+  public static int num_random_samplings(MemorySegment struct) {
+    return struct.get(num_random_samplings$LAYOUT, num_random_samplings$OFFSET);
+  }
+
+  /**
+   * Setter for field:
+   * {@snippet lang = c : * uint32_t num_random_samplings
+   * }
+   */
+  public static void num_random_samplings(MemorySegment struct, int fieldValue) {
+    struct.set(num_random_samplings$LAYOUT, num_random_samplings$OFFSET, fieldValue);
+  }
+
+  private static final OfLong rand_xor_mask$LAYOUT = (OfLong) $LAYOUT.select(groupElement("rand_xor_mask"));
+
+  /**
+   * Layout for field:
+   * {@snippet lang = c : * uint64_t rand_xor_mask
+   * }
+   */
+  public static final OfLong rand_xor_mask$layout() {
+    return rand_xor_mask$LAYOUT;
+  }
+
+  private static final long rand_xor_mask$OFFSET = 88;
+
+  /**
+   * Offset for field:
+   * {@snippet lang = c : * uint64_t rand_xor_mask
+   * }
+   */
+  public static final long rand_xor_mask$offset() {
+    return rand_xor_mask$OFFSET;
+  }
+
+  /**
+   * Getter for field:
+   * {@snippet lang = c : * uint64_t rand_xor_mask
+   * }
+   */
+  public static long rand_xor_mask(MemorySegment struct) {
+    return struct.get(rand_xor_mask$LAYOUT, rand_xor_mask$OFFSET);
+  }
+
+  /**
+   * Setter for field:
+   * {@snippet lang = c : * uint64_t rand_xor_mask
+   * }
+   */
+  public static void rand_xor_mask(MemorySegment struct, long fieldValue) {
+    struct.set(rand_xor_mask$LAYOUT, rand_xor_mask$OFFSET, fieldValue);
+  }
+
+  /**
+   * Obtains a slice of {@code arrayParam} which selects the array element at
+   * {@code index}. The returned segment has address
+   * {@code arrayParam.address() + index * layout().byteSize()}
+   */
+  public static MemorySegment asSlice(MemorySegment array, long index) {
+    return array.asSlice(layout().byteSize() * index);
+  }
+
+  /**
+   * The size (in bytes) of this struct
+   */
+  public static long sizeof() {
+    return layout().byteSize();
+  }
+
+  /**
+   * Allocate a segment of size {@code layout().byteSize()} using
+   * {@code allocator}
+   */
+  public static MemorySegment allocate(SegmentAllocator allocator) {
+    return allocator.allocate(layout());
+  }
+
+  /**
+   * Allocate an array of size {@code elementCount} using {@code allocator}. The
+   * returned segment has size {@code elementCount * layout().byteSize()}.
+   */
+  public static MemorySegment allocateArray(long elementCount, SegmentAllocator allocator) {
+    return allocator.allocate(MemoryLayout.sequenceLayout(elementCount, layout()));
+  }
+
+  /**
+   * Reinterprets {@code addr} using target {@code arena} and
+   * {@code cleanupAction} (if any). The returned segment has size
+   * {@code layout().byteSize()}
+   */
+  public static MemorySegment reinterpret(MemorySegment addr, Arena arena, Consumer<MemorySegment> cleanup) {
+    return reinterpret(addr, 1, arena, cleanup);
+  }
+
+  /**
+   * Reinterprets {@code addr} using target {@code arena} and
+   * {@code cleanupAction} (if any). The returned segment has size
+   * {@code elementCount * layout().byteSize()}
+   */
+  public static MemorySegment reinterpret(MemorySegment addr, long elementCount, Arena arena,
+      Consumer<MemorySegment> cleanup) {
+    return addr.reinterpret(layout().byteSize() * elementCount, arena, cleanup);
+  }
+}
diff --git a/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/CuVSFilter.java b/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/CuVSFilter.java
new file mode 100644
index 000000000..9385660dc
--- /dev/null
+++ b/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/CuVSFilter.java
@@ -0,0 +1,188 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.nvidia.cuvs.panama;
+
+import static java.lang.foreign.MemoryLayout.PathElement.groupElement;
+
+import java.lang.foreign.Arena;
+import java.lang.foreign.GroupLayout;
+import java.lang.foreign.MemoryLayout;
+import java.lang.foreign.MemorySegment;
+import java.lang.foreign.SegmentAllocator;
+import java.lang.foreign.ValueLayout.OfInt;
+import java.lang.foreign.ValueLayout.OfLong;
+import java.util.function.Consumer;
+
+/**
+ * {@snippet lang=c :
+ * struct {
+ *     uintptr_t addr;
+ *     enum cuvsFilterType type;
+ * }
+ * }
+ */
+public class CuVSFilter {
+
+    CuVSFilter() {
+        // Should not be called directly
+    }
+
+    private static final GroupLayout $LAYOUT = MemoryLayout.structLayout(
+        BruteForceH.C_LONG.withName("addr"),
+        BruteForceH.C_INT.withName("type"),
+        MemoryLayout.paddingLayout(4)
+    ).withName("$anon$50:9");
+
+    /**
+     * The layout of this struct
+     */
+    public static final GroupLayout layout() {
+        return $LAYOUT;
+    }
+
+    private static final OfLong addr$LAYOUT = (OfLong)$LAYOUT.select(groupElement("addr"));
+
+    /**
+     * Layout for field:
+     * {@snippet lang=c :
+     * uintptr_t addr
+     * }
+     */
+    public static final OfLong addr$layout() {
+        return addr$LAYOUT;
+    }
+
+    private static final long addr$OFFSET = 0;
+
+    /**
+     * Offset for field:
+     * {@snippet lang=c :
+     * uintptr_t addr
+     * }
+     */
+    public static final long addr$offset() {
+        return addr$OFFSET;
+    }
+
+    /**
+     * Getter for field:
+     * {@snippet lang=c :
+     * uintptr_t addr
+     * }
+     */
+    public static long addr(MemorySegment struct) {
+        return struct.get(addr$LAYOUT, addr$OFFSET);
+    }
+
+    /**
+     * Setter for field:
+     * {@snippet lang=c :
+     * uintptr_t addr
+     * }
+     */
+    public static void addr(MemorySegment struct, long fieldValue) {
+        struct.set(addr$LAYOUT, addr$OFFSET, fieldValue);
+    }
+
+    private static final OfInt type$LAYOUT = (OfInt)$LAYOUT.select(groupElement("type"));
+
+    /**
+     * Layout for field:
+     * {@snippet lang=c :
+     * enum cuvsFilterType type
+     * }
+     */
+    public static final OfInt type$layout() {
+        return type$LAYOUT;
+    }
+
+    private static final long type$OFFSET = 8;
+
+    /**
+     * Offset for field:
+     * {@snippet lang=c :
+     * enum cuvsFilterType type
+     * }
+     */
+    public static final long type$offset() {
+        return type$OFFSET;
+    }
+
+    /**
+     * Getter for field:
+     * {@snippet lang=c :
+     * enum cuvsFilterType type
+     * }
+     */
+    public static int type(MemorySegment struct) {
+        return struct.get(type$LAYOUT, type$OFFSET);
+    }
+
+    /**
+     * Setter for field:
+     * {@snippet lang=c :
+     * enum cuvsFilterType type
+     * }
+     */
+    public static void type(MemorySegment struct, int fieldValue) {
+        struct.set(type$LAYOUT, type$OFFSET, fieldValue);
+    }
+
+    /**
+     * Obtains a slice of {@code arrayParam} which selects the array element at {@code index}.
+     * The returned segment has address {@code arrayParam.address() + index * layout().byteSize()}
+     */
+    public static MemorySegment asSlice(MemorySegment array, long index) {
+        return array.asSlice(layout().byteSize() * index);
+    }
+
+    /**
+     * The size (in bytes) of this struct
+     */
+    public static long sizeof() { return layout().byteSize(); }
+
+    /**
+     * Allocate a segment of size {@code layout().byteSize()} using {@code allocator}
+     */
+    public static MemorySegment allocate(SegmentAllocator allocator) {
+        return allocator.allocate(layout());
+    }
+
+    /**
+     * Allocate an array of size {@code elementCount} using {@code allocator}.
+     * The returned segment has size {@code elementCount * layout().byteSize()}.
+     */
+    public static MemorySegment allocateArray(long elementCount, SegmentAllocator allocator) {
+        return allocator.allocate(MemoryLayout.sequenceLayout(elementCount, layout()));
+    }
+
+    /**
+     * Reinterprets {@code addr} using target {@code arena} and {@code cleanupAction} (if any).
+     * The returned segment has size {@code layout().byteSize()}
+     */
+    public static MemorySegment reinterpret(MemorySegment addr, Arena arena, Consumer<MemorySegment> cleanup) {
+        return reinterpret(addr, 1, arena, cleanup);
+    }
+
+    /**
+     * Reinterprets {@code addr} using target {@code arena} and {@code cleanupAction} (if any).
+     * The returned segment has size {@code elementCount * layout().byteSize()}
+     */
+    public static MemorySegment reinterpret(MemorySegment addr, long elementCount, Arena arena, Consumer<MemorySegment> cleanup) {
+        return addr.reinterpret(layout().byteSize() * elementCount, arena, cleanup);
+    }
+}
diff --git a/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/CuVSHnswExtendParams.java b/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/CuVSHnswExtendParams.java
new file mode 100644
index 000000000..8d750d02e
--- /dev/null
+++ b/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/CuVSHnswExtendParams.java
@@ -0,0 +1,142 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.nvidia.cuvs.panama;
+
+import static java.lang.foreign.MemoryLayout.PathElement.groupElement;
+
+import java.lang.foreign.Arena;
+import java.lang.foreign.GroupLayout;
+import java.lang.foreign.MemoryLayout;
+import java.lang.foreign.MemorySegment;
+import java.lang.foreign.SegmentAllocator;
+import java.lang.foreign.ValueLayout.OfInt;
+import java.util.function.Consumer;
+
+/**
+ * {@snippet lang = c :
+ * struct cuvsHnswExtendParams {
+ *     int num_threads;
+ * }
+ * }
+ */
+public class CuVSHnswExtendParams {
+
+  CuVSHnswExtendParams() {
+    // Should not be called directly
+  }
+
+  private static final GroupLayout $LAYOUT = MemoryLayout.structLayout(HnswH.C_INT.withName("num_threads"))
+      .withName("cuvsHnswExtendParams");
+
+  /**
+   * The layout of this struct
+   */
+  public static final GroupLayout layout() {
+    return $LAYOUT;
+  }
+
+  private static final OfInt num_threads$LAYOUT = (OfInt) $LAYOUT.select(groupElement("num_threads"));
+
+  /**
+   * Layout for field:
+   * {@snippet lang = c : * int num_threads
+   * }
+   */
+  public static final OfInt num_threads$layout() {
+    return num_threads$LAYOUT;
+  }
+
+  private static final long num_threads$OFFSET = 0;
+
+  /**
+   * Offset for field:
+   * {@snippet lang = c : * int num_threads
+   * }
+   */
+  public static final long num_threads$offset() {
+    return num_threads$OFFSET;
+  }
+
+  /**
+   * Getter for field:
+   * {@snippet lang = c : * int num_threads
+   * }
+   */
+  public static int num_threads(MemorySegment struct) {
+    return struct.get(num_threads$LAYOUT, num_threads$OFFSET);
+  }
+
+  /**
+   * Setter for field:
+   * {@snippet lang = c : * int num_threads
+   * }
+   */
+  public static void num_threads(MemorySegment struct, int fieldValue) {
+    struct.set(num_threads$LAYOUT, num_threads$OFFSET, fieldValue);
+  }
+
+  /**
+   * Obtains a slice of {@code arrayParam} which selects the array element at
+   * {@code index}. The returned segment has address
+   * {@code arrayParam.address() + index * layout().byteSize()}
+   */
+  public static MemorySegment asSlice(MemorySegment array, long index) {
+    return array.asSlice(layout().byteSize() * index);
+  }
+
+  /**
+   * The size (in bytes) of this struct
+   */
+  public static long sizeof() {
+    return layout().byteSize();
+  }
+
+  /**
+   * Allocate a segment of size {@code layout().byteSize()} using
+   * {@code allocator}
+   */
+  public static MemorySegment allocate(SegmentAllocator allocator) {
+    return allocator.allocate(layout());
+  }
+
+  /**
+   * Allocate an array of size {@code elementCount} using {@code allocator}. The
+   * returned segment has size {@code elementCount * layout().byteSize()}.
+   */
+  public static MemorySegment allocateArray(long elementCount, SegmentAllocator allocator) {
+    return allocator.allocate(MemoryLayout.sequenceLayout(elementCount, layout()));
+  }
+
+  /**
+   * Reinterprets {@code addr} using target {@code arena} and
+   * {@code cleanupAction} (if any). The returned segment has size
+   * {@code layout().byteSize()}
+   */
+  public static MemorySegment reinterpret(MemorySegment addr, Arena arena, Consumer<MemorySegment> cleanup) {
+    return reinterpret(addr, 1, arena, cleanup);
+  }
+
+  /**
+   * Reinterprets {@code addr} using target {@code arena} and
+   * {@code cleanupAction} (if any). The returned segment has size
+   * {@code elementCount * layout().byteSize()}
+   */
+  public static MemorySegment reinterpret(MemorySegment addr, long elementCount, Arena arena,
+      Consumer<MemorySegment> cleanup) {
+    return addr.reinterpret(layout().byteSize() * elementCount, arena, cleanup);
+  }
+}
diff --git a/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/CuVSHnswIndex.java b/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/CuVSHnswIndex.java
new file mode 100644
index 000000000..1f879462b
--- /dev/null
+++ b/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/CuVSHnswIndex.java
@@ -0,0 +1,183 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.nvidia.cuvs.panama;
+
+import static java.lang.foreign.MemoryLayout.PathElement.groupElement;
+
+import java.lang.foreign.Arena;
+import java.lang.foreign.GroupLayout;
+import java.lang.foreign.MemoryLayout;
+import java.lang.foreign.MemorySegment;
+import java.lang.foreign.SegmentAllocator;
+import java.lang.foreign.ValueLayout.OfLong;
+import java.util.function.Consumer;
+
+/**
+ * {@snippet lang = c :
+ * struct {
+ *     uintptr_t addr;
+ *     DLDataType dtype;
+ * }
+ * }
+ */
+public class CuVSHnswIndex {
+
+  CuVSHnswIndex() {
+    // Should not be called directly
+  }
+
+  private static final GroupLayout $LAYOUT = MemoryLayout.structLayout(HnswH.C_LONG.withName("addr"),
+      DLDataType.layout().withName("dtype"), MemoryLayout.paddingLayout(4)).withName("$anon$66:9");
+
+  /**
+   * The layout of this struct
+   */
+  public static final GroupLayout layout() {
+    return $LAYOUT;
+  }
+
+  private static final OfLong addr$LAYOUT = (OfLong) $LAYOUT.select(groupElement("addr"));
+
+  /**
+   * Layout for field:
+   * {@snippet lang = c : * uintptr_t addr
+   * }
+   */
+  public static final OfLong addr$layout() {
+    return addr$LAYOUT;
+  }
+
+  private static final long addr$OFFSET = 0;
+
+  /**
+   * Offset for field:
+   * {@snippet lang = c : * uintptr_t addr
+   * }
+   */
+  public static final long addr$offset() {
+    return addr$OFFSET;
+  }
+
+  /**
+   * Getter for field:
+   * {@snippet lang = c : * uintptr_t addr
+   * }
+   */
+  public static long addr(MemorySegment struct) {
+    return struct.get(addr$LAYOUT, addr$OFFSET);
+  }
+
+  /**
+   * Setter for field:
+   * {@snippet lang = c : * uintptr_t addr
+   * }
+   */
+  public static void addr(MemorySegment struct, long fieldValue) {
+    struct.set(addr$LAYOUT, addr$OFFSET, fieldValue);
+  }
+
+  private static final GroupLayout dtype$LAYOUT = (GroupLayout) $LAYOUT.select(groupElement("dtype"));
+
+  /**
+   * Layout for field:
+   * {@snippet lang = c : * DLDataType dtype
+   * }
+   */
+  public static final GroupLayout dtype$layout() {
+    return dtype$LAYOUT;
+  }
+
+  private static final long dtype$OFFSET = 8;
+
+  /**
+   * Offset for field:
+   * {@snippet lang = c : * DLDataType dtype
+   * }
+   */
+  public static final long dtype$offset() {
+    return dtype$OFFSET;
+  }
+
+  /**
+   * Getter for field:
+   * {@snippet lang = c : * DLDataType dtype
+   * }
+   */
+  public static MemorySegment dtype(MemorySegment struct) {
+    return struct.asSlice(dtype$OFFSET, dtype$LAYOUT.byteSize());
+  }
+
+  /**
+   * Setter for field:
+   * {@snippet lang = c : * DLDataType dtype
+   * }
+   */
+  public static void dtype(MemorySegment struct, MemorySegment fieldValue) {
+    MemorySegment.copy(fieldValue, 0L, struct, dtype$OFFSET, dtype$LAYOUT.byteSize());
+  }
+
+  /**
+   * Obtains a slice of {@code arrayParam} which selects the array element at
+   * {@code index}. The returned segment has address
+   * {@code arrayParam.address() + index * layout().byteSize()}
+   */
+  public static MemorySegment asSlice(MemorySegment array, long index) {
+    return array.asSlice(layout().byteSize() * index);
+  }
+
+  /**
+   * The size (in bytes) of this struct
+   */
+  public static long sizeof() {
+    return layout().byteSize();
+  }
+
+  /**
+   * Allocate a segment of size {@code layout().byteSize()} using
+   * {@code allocator}
+   */
+  public static MemorySegment allocate(SegmentAllocator allocator) {
+    return allocator.allocate(layout());
+  }
+
+  /**
+   * Allocate an array of size {@code elementCount} using {@code allocator}. The
+   * returned segment has size {@code elementCount * layout().byteSize()}.
+   */
+  public static MemorySegment allocateArray(long elementCount, SegmentAllocator allocator) {
+    return allocator.allocate(MemoryLayout.sequenceLayout(elementCount, layout()));
+  }
+
+  /**
+   * Reinterprets {@code addr} using target {@code arena} and
+   * {@code cleanupAction} (if any). The returned segment has size
+   * {@code layout().byteSize()}
+   */
+  public static MemorySegment reinterpret(MemorySegment addr, Arena arena, Consumer<MemorySegment> cleanup) {
+    return reinterpret(addr, 1, arena, cleanup);
+  }
+
+  /**
+   * Reinterprets {@code addr} using target {@code arena} and
+   * {@code cleanupAction} (if any). The returned segment has size
+   * {@code elementCount * layout().byteSize()}
+   */
+  public static MemorySegment reinterpret(MemorySegment addr, long elementCount, Arena arena,
+      Consumer<MemorySegment> cleanup) {
+    return addr.reinterpret(layout().byteSize() * elementCount, arena, cleanup);
+  }
+}
diff --git a/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/CuVSHnswIndexParams.java b/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/CuVSHnswIndexParams.java
new file mode 100644
index 000000000..3e6e45a09
--- /dev/null
+++ b/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/CuVSHnswIndexParams.java
@@ -0,0 +1,224 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.nvidia.cuvs.panama;
+
+import static java.lang.foreign.MemoryLayout.PathElement.groupElement;
+
+import java.lang.foreign.Arena;
+import java.lang.foreign.GroupLayout;
+import java.lang.foreign.MemoryLayout;
+import java.lang.foreign.MemorySegment;
+import java.lang.foreign.SegmentAllocator;
+import java.lang.foreign.ValueLayout.OfInt;
+import java.util.function.Consumer;
+
+/**
+ * {@snippet lang = c :
+ * struct cuvsHnswIndexParams {
+ *     cuvsHnswHierarchy hierarchy;
+ *     int ef_construction;
+ *     int num_threads;
+ * }
+ * }
+ */
+public class CuVSHnswIndexParams {
+
+  CuVSHnswIndexParams() {
+    // Should not be called directly
+  }
+
+  private static final GroupLayout $LAYOUT = MemoryLayout.structLayout(HnswH.C_INT.withName("hierarchy"),
+      HnswH.C_INT.withName("ef_construction"), HnswH.C_INT.withName("num_threads")).withName("cuvsHnswIndexParams");
+
+  /**
+   * The layout of this struct
+   */
+  public static final GroupLayout layout() {
+    return $LAYOUT;
+  }
+
+  private static final OfInt hierarchy$LAYOUT = (OfInt) $LAYOUT.select(groupElement("hierarchy"));
+
+  /**
+   * Layout for field:
+   * {@snippet lang = c : * cuvsHnswHierarchy hierarchy
+   * }
+   */
+  public static final OfInt hierarchy$layout() {
+    return hierarchy$LAYOUT;
+  }
+
+  private static final long hierarchy$OFFSET = 0;
+
+  /**
+   * Offset for field:
+   * {@snippet lang = c : * cuvsHnswHierarchy hierarchy
+   * }
+   */
+  public static final long hierarchy$offset() {
+    return hierarchy$OFFSET;
+  }
+
+  /**
+   * Getter for field:
+   * {@snippet lang = c : * cuvsHnswHierarchy hierarchy
+   * }
+   */
+  public static int hierarchy(MemorySegment struct) {
+    return struct.get(hierarchy$LAYOUT, hierarchy$OFFSET);
+  }
+
+  /**
+   * Setter for field:
+   * {@snippet lang = c : * cuvsHnswHierarchy hierarchy
+   * }
+   */
+  public static void hierarchy(MemorySegment struct, int fieldValue) {
+    struct.set(hierarchy$LAYOUT, hierarchy$OFFSET, fieldValue);
+  }
+
+  private static final OfInt ef_construction$LAYOUT = (OfInt) $LAYOUT.select(groupElement("ef_construction"));
+
+  /**
+   * Layout for field:
+   * {@snippet lang = c : * int ef_construction
+   * }
+   */
+  public static final OfInt ef_construction$layout() {
+    return ef_construction$LAYOUT;
+  }
+
+  private static final long ef_construction$OFFSET = 4;
+
+  /**
+   * Offset for field:
+   * {@snippet lang = c : * int ef_construction
+   * }
+   */
+  public static final long ef_construction$offset() {
+    return ef_construction$OFFSET;
+  }
+
+  /**
+   * Getter for field:
+   * {@snippet lang = c : * int ef_construction
+   * }
+   */
+  public static int ef_construction(MemorySegment struct) {
+    return struct.get(ef_construction$LAYOUT, ef_construction$OFFSET);
+  }
+
+  /**
+   * Setter for field:
+   * {@snippet lang = c : * int ef_construction
+   * }
+   */
+  public static void ef_construction(MemorySegment struct, int fieldValue) {
+    struct.set(ef_construction$LAYOUT, ef_construction$OFFSET, fieldValue);
+  }
+
+  private static final OfInt num_threads$LAYOUT = (OfInt) $LAYOUT.select(groupElement("num_threads"));
+
+  /**
+   * Layout for field:
+   * {@snippet lang = c : * int num_threads
+   * }
+   */
+  public static final OfInt num_threads$layout() {
+    return num_threads$LAYOUT;
+  }
+
+  private static final long num_threads$OFFSET = 8;
+
+  /**
+   * Offset for field:
+   * {@snippet lang = c : * int num_threads
+   * }
+   */
+  public static final long num_threads$offset() {
+    return num_threads$OFFSET;
+  }
+
+  /**
+   * Getter for field:
+   * {@snippet lang = c : * int num_threads
+   * }
+   */
+  public static int num_threads(MemorySegment struct) {
+    return struct.get(num_threads$LAYOUT, num_threads$OFFSET);
+  }
+
+  /**
+   * Setter for field:
+   * {@snippet lang = c : * int num_threads
+   * }
+   */
+  public static void num_threads(MemorySegment struct, int fieldValue) {
+    struct.set(num_threads$LAYOUT, num_threads$OFFSET, fieldValue);
+  }
+
+  /**
+   * Obtains a slice of {@code arrayParam} which selects the array element at
+   * {@code index}. The returned segment has address
+   * {@code arrayParam.address() + index * layout().byteSize()}
+   */
+  public static MemorySegment asSlice(MemorySegment array, long index) {
+    return array.asSlice(layout().byteSize() * index);
+  }
+
+  /**
+   * The size (in bytes) of this struct
+   */
+  public static long sizeof() {
+    return layout().byteSize();
+  }
+
+  /**
+   * Allocate a segment of size {@code layout().byteSize()} using
+   * {@code allocator}
+   */
+  public static MemorySegment allocate(SegmentAllocator allocator) {
+    return allocator.allocate(layout());
+  }
+
+  /**
+   * Allocate an array of size {@code elementCount} using {@code allocator}. The
+   * returned segment has size {@code elementCount * layout().byteSize()}.
+   */
+  public static MemorySegment allocateArray(long elementCount, SegmentAllocator allocator) {
+    return allocator.allocate(MemoryLayout.sequenceLayout(elementCount, layout()));
+  }
+
+  /**
+   * Reinterprets {@code addr} using target {@code arena} and
+   * {@code cleanupAction} (if any). The returned segment has size
+   * {@code layout().byteSize()}
+   */
+  public static MemorySegment reinterpret(MemorySegment addr, Arena arena, Consumer<MemorySegment> cleanup) {
+    return reinterpret(addr, 1, arena, cleanup);
+  }
+
+  /**
+   * Reinterprets {@code addr} using target {@code arena} and
+   * {@code cleanupAction} (if any). The returned segment has size
+   * {@code elementCount * layout().byteSize()}
+   */
+  public static MemorySegment reinterpret(MemorySegment addr, long elementCount, Arena arena,
+      Consumer<MemorySegment> cleanup) {
+    return addr.reinterpret(layout().byteSize() * elementCount, arena, cleanup);
+  }
+}
diff --git a/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/CuVSHnswSearchParams.java b/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/CuVSHnswSearchParams.java
new file mode 100644
index 000000000..4a5941a3c
--- /dev/null
+++ b/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/CuVSHnswSearchParams.java
@@ -0,0 +1,183 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.nvidia.cuvs.panama;
+
+import static java.lang.foreign.MemoryLayout.PathElement.groupElement;
+
+import java.lang.foreign.Arena;
+import java.lang.foreign.GroupLayout;
+import java.lang.foreign.MemoryLayout;
+import java.lang.foreign.MemorySegment;
+import java.lang.foreign.SegmentAllocator;
+import java.lang.foreign.ValueLayout.OfInt;
+import java.util.function.Consumer;
+
+/**
+ * {@snippet lang = c :
+ * struct cuvsHnswSearchParams {
+ *     int32_t ef;
+ *     int32_t num_threads;
+ * }
+ * }
+ */
+public class CuVSHnswSearchParams {
+
+  CuVSHnswSearchParams() {
+    // Should not be called directly
+  }
+
+  private static final GroupLayout $LAYOUT = MemoryLayout
+      .structLayout(HnswH.C_INT.withName("ef"), HnswH.C_INT.withName("num_threads")).withName("cuvsHnswSearchParams");
+
+  /**
+   * The layout of this struct
+   */
+  public static final GroupLayout layout() {
+    return $LAYOUT;
+  }
+
+  private static final OfInt ef$LAYOUT = (OfInt) $LAYOUT.select(groupElement("ef"));
+
+  /**
+   * Layout for field:
+   * {@snippet lang = c : * int32_t ef
+   * }
+   */
+  public static final OfInt ef$layout() {
+    return ef$LAYOUT;
+  }
+
+  private static final long ef$OFFSET = 0;
+
+  /**
+   * Offset for field:
+   * {@snippet lang = c : * int32_t ef
+   * }
+   */
+  public static final long ef$offset() {
+    return ef$OFFSET;
+  }
+
+  /**
+   * Getter for field:
+   * {@snippet lang = c : * int32_t ef
+   * }
+   */
+  public static int ef(MemorySegment struct) {
+    return struct.get(ef$LAYOUT, ef$OFFSET);
+  }
+
+  /**
+   * Setter for field:
+   * {@snippet lang = c : * int32_t ef
+   * }
+   */
+  public static void ef(MemorySegment struct, int fieldValue) {
+    struct.set(ef$LAYOUT, ef$OFFSET, fieldValue);
+  }
+
+  private static final OfInt num_threads$LAYOUT = (OfInt) $LAYOUT.select(groupElement("num_threads"));
+
+  /**
+   * Layout for field:
+   * {@snippet lang = c : * int32_t num_threads
+   * }
+   */
+  public static final OfInt num_threads$layout() {
+    return num_threads$LAYOUT;
+  }
+
+  private static final long num_threads$OFFSET = 4;
+
+  /**
+   * Offset for field:
+   * {@snippet lang = c : * int32_t num_threads
+   * }
+   */
+  public static final long num_threads$offset() {
+    return num_threads$OFFSET;
+  }
+
+  /**
+   * Getter for field:
+   * {@snippet lang = c : * int32_t num_threads
+   * }
+   */
+  public static int num_threads(MemorySegment struct) {
+    return struct.get(num_threads$LAYOUT, num_threads$OFFSET);
+  }
+
+  /**
+   * Setter for field:
+   * {@snippet lang = c : * int32_t num_threads
+   * }
+   */
+  public static void num_threads(MemorySegment struct, int fieldValue) {
+    struct.set(num_threads$LAYOUT, num_threads$OFFSET, fieldValue);
+  }
+
+  /**
+   * Obtains a slice of {@code arrayParam} which selects the array element at
+   * {@code index}. The returned segment has address
+   * {@code arrayParam.address() + index * layout().byteSize()}
+   */
+  public static MemorySegment asSlice(MemorySegment array, long index) {
+    return array.asSlice(layout().byteSize() * index);
+  }
+
+  /**
+   * The size (in bytes) of this struct
+   */
+  public static long sizeof() {
+    return layout().byteSize();
+  }
+
+  /**
+   * Allocate a segment of size {@code layout().byteSize()} using
+   * {@code allocator}
+   */
+  public static MemorySegment allocate(SegmentAllocator allocator) {
+    return allocator.allocate(layout());
+  }
+
+  /**
+   * Allocate an array of size {@code elementCount} using {@code allocator}. The
+   * returned segment has size {@code elementCount * layout().byteSize()}.
+   */
+  public static MemorySegment allocateArray(long elementCount, SegmentAllocator allocator) {
+    return allocator.allocate(MemoryLayout.sequenceLayout(elementCount, layout()));
+  }
+
+  /**
+   * Reinterprets {@code addr} using target {@code arena} and
+   * {@code cleanupAction} (if any). The returned segment has size
+   * {@code layout().byteSize()}
+   */
+  public static MemorySegment reinterpret(MemorySegment addr, Arena arena, Consumer<MemorySegment> cleanup) {
+    return reinterpret(addr, 1, arena, cleanup);
+  }
+
+  /**
+   * Reinterprets {@code addr} using target {@code arena} and
+   * {@code cleanupAction} (if any). The returned segment has size
+   * {@code elementCount * layout().byteSize()}
+   */
+  public static MemorySegment reinterpret(MemorySegment addr, long elementCount, Arena arena,
+      Consumer<MemorySegment> cleanup) {
+    return addr.reinterpret(layout().byteSize() * elementCount, arena, cleanup);
+  }
+}
diff --git a/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/DLDataType.java b/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/DLDataType.java
new file mode 100644
index 000000000..c7c588676
--- /dev/null
+++ b/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/DLDataType.java
@@ -0,0 +1,233 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.nvidia.cuvs.panama;
+
+import static java.lang.foreign.MemoryLayout.PathElement.groupElement;
+
+import java.lang.foreign.Arena;
+import java.lang.foreign.GroupLayout;
+import java.lang.foreign.MemoryLayout;
+import java.lang.foreign.MemorySegment;
+import java.lang.foreign.SegmentAllocator;
+import java.lang.foreign.ValueLayout.OfByte;
+import java.lang.foreign.ValueLayout.OfShort;
+import java.util.function.Consumer;
+
+/**
+ * {@snippet lang=c :
+ * struct {
+ *     uint8_t code;
+ *     uint8_t bits;
+ *     uint16_t lanes;
+ * }
+ * }
+ */
+public class DLDataType {
+
+    DLDataType() {
+        // Should not be called directly
+    }
+
+    private static final GroupLayout $LAYOUT = MemoryLayout.structLayout(
+        DlpackH.C_CHAR.withName("code"),
+        DlpackH.C_CHAR.withName("bits"),
+        DlpackH.C_SHORT.withName("lanes")
+    ).withName("$anon$174:9");
+
+    /**
+     * The layout of this struct
+     */
+    public static final GroupLayout layout() {
+        return $LAYOUT;
+    }
+
+    private static final OfByte code$LAYOUT = (OfByte)$LAYOUT.select(groupElement("code"));
+
+    /**
+     * Layout for field:
+     * {@snippet lang=c :
+     * uint8_t code
+     * }
+     */
+    public static final OfByte code$layout() {
+        return code$LAYOUT;
+    }
+
+    private static final long code$OFFSET = 0;
+
+    /**
+     * Offset for field:
+     * {@snippet lang=c :
+     * uint8_t code
+     * }
+     */
+    public static final long code$offset() {
+        return code$OFFSET;
+    }
+
+    /**
+     * Getter for field:
+     * {@snippet lang=c :
+     * uint8_t code
+     * }
+     */
+    public static byte code(MemorySegment struct) {
+        return struct.get(code$LAYOUT, code$OFFSET);
+    }
+
+    /**
+     * Setter for field:
+     * {@snippet lang=c :
+     * uint8_t code
+     * }
+     */
+    public static void code(MemorySegment struct, byte fieldValue) {
+        struct.set(code$LAYOUT, code$OFFSET, fieldValue);
+    }
+
+    private static final OfByte bits$LAYOUT = (OfByte)$LAYOUT.select(groupElement("bits"));
+
+    /**
+     * Layout for field:
+     * {@snippet lang=c :
+     * uint8_t bits
+     * }
+     */
+    public static final OfByte bits$layout() {
+        return bits$LAYOUT;
+    }
+
+    private static final long bits$OFFSET = 1;
+
+    /**
+     * Offset for field:
+     * {@snippet lang=c :
+     * uint8_t bits
+     * }
+     */
+    public static final long bits$offset() {
+        return bits$OFFSET;
+    }
+
+    /**
+     * Getter for field:
+     * {@snippet lang=c :
+     * uint8_t bits
+     * }
+     */
+    public static byte bits(MemorySegment struct) {
+        return struct.get(bits$LAYOUT, bits$OFFSET);
+    }
+
+    /**
+     * Setter for field:
+     * {@snippet lang=c :
+     * uint8_t bits
+     * }
+     */
+    public static void bits(MemorySegment struct, byte fieldValue) {
+        struct.set(bits$LAYOUT, bits$OFFSET, fieldValue);
+    }
+
+    private static final OfShort lanes$LAYOUT = (OfShort)$LAYOUT.select(groupElement("lanes"));
+
+    /**
+     * Layout for field:
+     * {@snippet lang=c :
+     * uint16_t lanes
+     * }
+     */
+    public static final OfShort lanes$layout() {
+        return lanes$LAYOUT;
+    }
+
+    private static final long lanes$OFFSET = 2;
+
+    /**
+     * Offset for field:
+     * {@snippet lang=c :
+     * uint16_t lanes
+     * }
+     */
+    public static final long lanes$offset() {
+        return lanes$OFFSET;
+    }
+
+    /**
+     * Getter for field:
+     * {@snippet lang=c :
+     * uint16_t lanes
+     * }
+     */
+    public static short lanes(MemorySegment struct) {
+        return struct.get(lanes$LAYOUT, lanes$OFFSET);
+    }
+
+    /**
+     * Setter for field:
+     * {@snippet lang=c :
+     * uint16_t lanes
+     * }
+     */
+    public static void lanes(MemorySegment struct, short fieldValue) {
+        struct.set(lanes$LAYOUT, lanes$OFFSET, fieldValue);
+    }
+
+    /**
+     * Obtains a slice of {@code arrayParam} which selects the array element at {@code index}.
+     * The returned segment has address {@code arrayParam.address() + index * layout().byteSize()}
+     */
+    public static MemorySegment asSlice(MemorySegment array, long index) {
+        return array.asSlice(layout().byteSize() * index);
+    }
+
+    /**
+     * The size (in bytes) of this struct
+     */
+    public static long sizeof() { return layout().byteSize(); }
+
+    /**
+     * Allocate a segment of size {@code layout().byteSize()} using {@code allocator}
+     */
+    public static MemorySegment allocate(SegmentAllocator allocator) {
+        return allocator.allocate(layout());
+    }
+
+    /**
+     * Allocate an array of size {@code elementCount} using {@code allocator}.
+     * The returned segment has size {@code elementCount * layout().byteSize()}.
+     */
+    public static MemorySegment allocateArray(long elementCount, SegmentAllocator allocator) {
+        return allocator.allocate(MemoryLayout.sequenceLayout(elementCount, layout()));
+    }
+
+    /**
+     * Reinterprets {@code addr} using target {@code arena} and {@code cleanupAction} (if any).
+     * The returned segment has size {@code layout().byteSize()}
+     */
+    public static MemorySegment reinterpret(MemorySegment addr, Arena arena, Consumer<MemorySegment> cleanup) {
+        return reinterpret(addr, 1, arena, cleanup);
+    }
+
+    /**
+     * Reinterprets {@code addr} using target {@code arena} and {@code cleanupAction} (if any).
+     * The returned segment has size {@code elementCount * layout().byteSize()}
+     */
+    public static MemorySegment reinterpret(MemorySegment addr, long elementCount, Arena arena, Consumer<MemorySegment> cleanup) {
+        return addr.reinterpret(layout().byteSize() * elementCount, arena, cleanup);
+    }
+}
diff --git a/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/DLDevice.java b/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/DLDevice.java
new file mode 100644
index 000000000..a0d5d89ea
--- /dev/null
+++ b/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/DLDevice.java
@@ -0,0 +1,186 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.nvidia.cuvs.panama;
+
+import static java.lang.foreign.MemoryLayout.PathElement.groupElement;
+
+import java.lang.foreign.Arena;
+import java.lang.foreign.GroupLayout;
+import java.lang.foreign.MemoryLayout;
+import java.lang.foreign.MemorySegment;
+import java.lang.foreign.SegmentAllocator;
+import java.lang.foreign.ValueLayout.OfInt;
+import java.util.function.Consumer;
+
+/**
+ * {@snippet lang=c :
+ * struct {
+ *     DLDeviceType device_type;
+ *     int32_t device_id;
+ * }
+ * }
+ */
+public class DLDevice {
+
+    DLDevice() {
+        // Should not be called directly
+    }
+
+    private static final GroupLayout $LAYOUT = MemoryLayout.structLayout(
+        DlpackH.C_INT.withName("device_type"),
+        DlpackH.C_INT.withName("device_id")
+    ).withName("$anon$126:9");
+
+    /**
+     * The layout of this struct
+     */
+    public static final GroupLayout layout() {
+        return $LAYOUT;
+    }
+
+    private static final OfInt device_type$LAYOUT = (OfInt)$LAYOUT.select(groupElement("device_type"));
+
+    /**
+     * Layout for field:
+     * {@snippet lang=c :
+     * DLDeviceType device_type
+     * }
+     */
+    public static final OfInt device_type$layout() {
+        return device_type$LAYOUT;
+    }
+
+    private static final long device_type$OFFSET = 0;
+
+    /**
+     * Offset for field:
+     * {@snippet lang=c :
+     * DLDeviceType device_type
+     * }
+     */
+    public static final long device_type$offset() {
+        return device_type$OFFSET;
+    }
+
+    /**
+     * Getter for field:
+     * {@snippet lang=c :
+     * DLDeviceType device_type
+     * }
+     */
+    public static int device_type(MemorySegment struct) {
+        return struct.get(device_type$LAYOUT, device_type$OFFSET);
+    }
+
+    /**
+     * Setter for field:
+     * {@snippet lang=c :
+     * DLDeviceType device_type
+     * }
+     */
+    public static void device_type(MemorySegment struct, int fieldValue) {
+        struct.set(device_type$LAYOUT, device_type$OFFSET, fieldValue);
+    }
+
+    private static final OfInt device_id$LAYOUT = (OfInt)$LAYOUT.select(groupElement("device_id"));
+
+    /**
+     * Layout for field:
+     * {@snippet lang=c :
+     * int32_t device_id
+     * }
+     */
+    public static final OfInt device_id$layout() {
+        return device_id$LAYOUT;
+    }
+
+    private static final long device_id$OFFSET = 4;
+
+    /**
+     * Offset for field:
+     * {@snippet lang=c :
+     * int32_t device_id
+     * }
+     */
+    public static final long device_id$offset() {
+        return device_id$OFFSET;
+    }
+
+    /**
+     * Getter for field:
+     * {@snippet lang=c :
+     * int32_t device_id
+     * }
+     */
+    public static int device_id(MemorySegment struct) {
+        return struct.get(device_id$LAYOUT, device_id$OFFSET);
+    }
+
+    /**
+     * Setter for field:
+     * {@snippet lang=c :
+     * int32_t device_id
+     * }
+     */
+    public static void device_id(MemorySegment struct, int fieldValue) {
+        struct.set(device_id$LAYOUT, device_id$OFFSET, fieldValue);
+    }
+
+    /**
+     * Obtains a slice of {@code arrayParam} which selects the array element at {@code index}.
+     * The returned segment has address {@code arrayParam.address() + index * layout().byteSize()}
+     */
+    public static MemorySegment asSlice(MemorySegment array, long index) {
+        return array.asSlice(layout().byteSize() * index);
+    }
+
+    /**
+     * The size (in bytes) of this struct
+     */
+    public static long sizeof() { return layout().byteSize(); }
+
+    /**
+     * Allocate a segment of size {@code layout().byteSize()} using {@code allocator}
+     */
+    public static MemorySegment allocate(SegmentAllocator allocator) {
+        return allocator.allocate(layout());
+    }
+
+    /**
+     * Allocate an array of size {@code elementCount} using {@code allocator}.
+     * The returned segment has size {@code elementCount * layout().byteSize()}.
+     */
+    public static MemorySegment allocateArray(long elementCount, SegmentAllocator allocator) {
+        return allocator.allocate(MemoryLayout.sequenceLayout(elementCount, layout()));
+    }
+
+    /**
+     * Reinterprets {@code addr} using target {@code arena} and {@code cleanupAction} (if any).
+     * The returned segment has size {@code layout().byteSize()}
+     */
+    public static MemorySegment reinterpret(MemorySegment addr, Arena arena, Consumer<MemorySegment> cleanup) {
+        return reinterpret(addr, 1, arena, cleanup);
+    }
+
+    /**
+     * Reinterprets {@code addr} using target {@code arena} and {@code cleanupAction} (if any).
+     * The returned segment has size {@code elementCount * layout().byteSize()}
+     */
+    public static MemorySegment reinterpret(MemorySegment addr, long elementCount, Arena arena, Consumer<MemorySegment> cleanup) {
+        return addr.reinterpret(layout().byteSize() * elementCount, arena, cleanup);
+    }
+}
diff --git a/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/DLManagedTensor.java b/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/DLManagedTensor.java
new file mode 100644
index 000000000..71bba9fe5
--- /dev/null
+++ b/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/DLManagedTensor.java
@@ -0,0 +1,288 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.nvidia.cuvs.panama;
+
+import static java.lang.foreign.MemoryLayout.PathElement.groupElement;
+
+import java.lang.foreign.AddressLayout;
+import java.lang.foreign.Arena;
+import java.lang.foreign.FunctionDescriptor;
+import java.lang.foreign.GroupLayout;
+import java.lang.foreign.Linker;
+import java.lang.foreign.MemoryLayout;
+import java.lang.foreign.MemorySegment;
+import java.lang.foreign.SegmentAllocator;
+import java.lang.invoke.MethodHandle;
+import java.util.function.Consumer;
+
+/**
+ * {@snippet lang=c :
+ * struct DLManagedTensor {
+ *     DLTensor dl_tensor;
+ *     void *manager_ctx;
+ *     void (*deleter)(struct DLManagedTensor *);
+ * }
+ * }
+ */
+public class DLManagedTensor {
+
+    DLManagedTensor() {
+        // Should not be called directly
+    }
+
+    private static final GroupLayout $LAYOUT = MemoryLayout.structLayout(
+        DLTensor.layout().withName("dl_tensor"),
+        DlpackH.C_POINTER.withName("manager_ctx"),
+        DlpackH.C_POINTER.withName("deleter")
+    ).withName("DLManagedTensor");
+
+    /**
+     * The layout of this struct
+     */
+    public static final GroupLayout layout() {
+        return $LAYOUT;
+    }
+
+    private static final GroupLayout dl_tensor$LAYOUT = (GroupLayout)$LAYOUT.select(groupElement("dl_tensor"));
+
+    /**
+     * Layout for field:
+     * {@snippet lang=c :
+     * DLTensor dl_tensor
+     * }
+     */
+    public static final GroupLayout dl_tensor$layout() {
+        return dl_tensor$LAYOUT;
+    }
+
+    private static final long dl_tensor$OFFSET = 0;
+
+    /**
+     * Offset for field:
+     * {@snippet lang=c :
+     * DLTensor dl_tensor
+     * }
+     */
+    public static final long dl_tensor$offset() {
+        return dl_tensor$OFFSET;
+    }
+
+    /**
+     * Getter for field:
+     * {@snippet lang=c :
+     * DLTensor dl_tensor
+     * }
+     */
+    public static MemorySegment dl_tensor(MemorySegment struct) {
+        return struct.asSlice(dl_tensor$OFFSET, dl_tensor$LAYOUT.byteSize());
+    }
+
+    /**
+     * Setter for field:
+     * {@snippet lang=c :
+     * DLTensor dl_tensor
+     * }
+     */
+    public static void dl_tensor(MemorySegment struct, MemorySegment fieldValue) {
+        MemorySegment.copy(fieldValue, 0L, struct, dl_tensor$OFFSET, dl_tensor$LAYOUT.byteSize());
+    }
+
+    private static final AddressLayout manager_ctx$LAYOUT = (AddressLayout)$LAYOUT.select(groupElement("manager_ctx"));
+
+    /**
+     * Layout for field:
+     * {@snippet lang=c :
+     * void *manager_ctx
+     * }
+     */
+    public static final AddressLayout manager_ctx$layout() {
+        return manager_ctx$LAYOUT;
+    }
+
+    private static final long manager_ctx$OFFSET = 48;
+
+    /**
+     * Offset for field:
+     * {@snippet lang=c :
+     * void *manager_ctx
+     * }
+     */
+    public static final long manager_ctx$offset() {
+        return manager_ctx$OFFSET;
+    }
+
+    /**
+     * Getter for field:
+     * {@snippet lang=c :
+     * void *manager_ctx
+     * }
+     */
+    public static MemorySegment manager_ctx(MemorySegment struct) {
+        return struct.get(manager_ctx$LAYOUT, manager_ctx$OFFSET);
+    }
+
+    /**
+     * Setter for field:
+     * {@snippet lang=c :
+     * void *manager_ctx
+     * }
+     */
+    public static void manager_ctx(MemorySegment struct, MemorySegment fieldValue) {
+        struct.set(manager_ctx$LAYOUT, manager_ctx$OFFSET, fieldValue);
+    }
+
+    /**
+     * {@snippet lang=c :
+     * void (*deleter)(struct DLManagedTensor *)
+     * }
+     */
+    public static class deleter {
+
+        deleter() {
+            // Should not be called directly
+        }
+
+        /**
+         * The function pointer signature, expressed as a functional interface
+         */
+        public interface Function {
+            void apply(MemorySegment _x0);
+        }
+
+        private static final FunctionDescriptor $DESC = FunctionDescriptor.ofVoid(
+            DlpackH.C_POINTER
+        );
+
+        /**
+         * The descriptor of this function pointer
+         */
+        public static FunctionDescriptor descriptor() {
+            return $DESC;
+        }
+
+        private static final MethodHandle UP$MH = DlpackH.upcallHandle(deleter.Function.class, "apply", $DESC);
+
+        /**
+         * Allocates a new upcall stub, whose implementation is defined by {@code fi}.
+         * The lifetime of the returned segment is managed by {@code arena}
+         */
+        public static MemorySegment allocate(deleter.Function fi, Arena arena) {
+            return Linker.nativeLinker().upcallStub(UP$MH.bindTo(fi), $DESC, arena);
+        }
+
+        private static final MethodHandle DOWN$MH = Linker.nativeLinker().downcallHandle($DESC);
+
+        /**
+         * Invoke the upcall stub {@code funcPtr}, with given parameters
+         */
+        public static void invoke(MemorySegment funcPtr,MemorySegment _x0) {
+            try {
+                 DOWN$MH.invokeExact(funcPtr, _x0);
+            } catch (Throwable ex$) {
+                throw new AssertionError("should not reach here", ex$);
+            }
+        }
+    }
+
+    private static final AddressLayout deleter$LAYOUT = (AddressLayout)$LAYOUT.select(groupElement("deleter"));
+
+    /**
+     * Layout for field:
+     * {@snippet lang=c :
+     * void (*deleter)(struct DLManagedTensor *)
+     * }
+     */
+    public static final AddressLayout deleter$layout() {
+        return deleter$LAYOUT;
+    }
+
+    private static final long deleter$OFFSET = 56;
+
+    /**
+     * Offset for field:
+     * {@snippet lang=c :
+     * void (*deleter)(struct DLManagedTensor *)
+     * }
+     */
+    public static final long deleter$offset() {
+        return deleter$OFFSET;
+    }
+
+    /**
+     * Getter for field:
+     * {@snippet lang=c :
+     * void (*deleter)(struct DLManagedTensor *)
+     * }
+     */
+    public static MemorySegment deleter(MemorySegment struct) {
+        return struct.get(deleter$LAYOUT, deleter$OFFSET);
+    }
+
+    /**
+     * Setter for field:
+     * {@snippet lang=c :
+     * void (*deleter)(struct DLManagedTensor *)
+     * }
+     */
+    public static void deleter(MemorySegment struct, MemorySegment fieldValue) {
+        struct.set(deleter$LAYOUT, deleter$OFFSET, fieldValue);
+    }
+
+    /**
+     * Obtains a slice of {@code arrayParam} which selects the array element at {@code index}.
+     * The returned segment has address {@code arrayParam.address() + index * layout().byteSize()}
+     */
+    public static MemorySegment asSlice(MemorySegment array, long index) {
+        return array.asSlice(layout().byteSize() * index);
+    }
+
+    /**
+     * The size (in bytes) of this struct
+     */
+    public static long sizeof() { return layout().byteSize(); }
+
+    /**
+     * Allocate a segment of size {@code layout().byteSize()} using {@code allocator}
+     */
+    public static MemorySegment allocate(SegmentAllocator allocator) {
+        return allocator.allocate(layout());
+    }
+
+    /**
+     * Allocate an array of size {@code elementCount} using {@code allocator}.
+     * The returned segment has size {@code elementCount * layout().byteSize()}.
+     */
+    public static MemorySegment allocateArray(long elementCount, SegmentAllocator allocator) {
+        return allocator.allocate(MemoryLayout.sequenceLayout(elementCount, layout()));
+    }
+
+    /**
+     * Reinterprets {@code addr} using target {@code arena} and {@code cleanupAction} (if any).
+     * The returned segment has size {@code layout().byteSize()}
+     */
+    public static MemorySegment reinterpret(MemorySegment addr, Arena arena, Consumer<MemorySegment> cleanup) {
+        return reinterpret(addr, 1, arena, cleanup);
+    }
+
+    /**
+     * Reinterprets {@code addr} using target {@code arena} and {@code cleanupAction} (if any).
+     * The returned segment has size {@code elementCount * layout().byteSize()}
+     */
+    public static MemorySegment reinterpret(MemorySegment addr, long elementCount, Arena arena, Consumer<MemorySegment> cleanup) {
+        return addr.reinterpret(layout().byteSize() * elementCount, arena, cleanup);
+    }
+}
diff --git a/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/DLManagedTensorVersioned.java b/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/DLManagedTensorVersioned.java
new file mode 100644
index 000000000..efdcf9043
--- /dev/null
+++ b/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/DLManagedTensorVersioned.java
@@ -0,0 +1,381 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.nvidia.cuvs.panama;
+
+import static java.lang.foreign.MemoryLayout.PathElement.groupElement;
+
+import java.lang.foreign.AddressLayout;
+import java.lang.foreign.Arena;
+import java.lang.foreign.FunctionDescriptor;
+import java.lang.foreign.GroupLayout;
+import java.lang.foreign.Linker;
+import java.lang.foreign.MemoryLayout;
+import java.lang.foreign.MemorySegment;
+import java.lang.foreign.SegmentAllocator;
+import java.lang.foreign.ValueLayout.OfLong;
+import java.lang.invoke.MethodHandle;
+import java.util.function.Consumer;
+
+/**
+ * {@snippet lang=c :
+ * struct DLManagedTensorVersioned {
+ *     DLPackVersion version;
+ *     void *manager_ctx;
+ *     void (*deleter)(struct DLManagedTensorVersioned *);
+ *     uint64_t flags;
+ *     DLTensor dl_tensor;
+ * }
+ * }
+ */
+public class DLManagedTensorVersioned {
+
+    DLManagedTensorVersioned() {
+        // Should not be called directly
+    }
+
+    private static final GroupLayout $LAYOUT = MemoryLayout.structLayout(
+        DLPackVersion.layout().withName("version"),
+        DlpackH.C_POINTER.withName("manager_ctx"),
+        DlpackH.C_POINTER.withName("deleter"),
+        DlpackH.C_LONG.withName("flags"),
+        DLTensor.layout().withName("dl_tensor")
+    ).withName("DLManagedTensorVersioned");
+
+    /**
+     * The layout of this struct
+     */
+    public static final GroupLayout layout() {
+        return $LAYOUT;
+    }
+
+    private static final GroupLayout version$LAYOUT = (GroupLayout)$LAYOUT.select(groupElement("version"));
+
+    /**
+     * Layout for field:
+     * {@snippet lang=c :
+     * DLPackVersion version
+     * }
+     */
+    public static final GroupLayout version$layout() {
+        return version$LAYOUT;
+    }
+
+    private static final long version$OFFSET = 0;
+
+    /**
+     * Offset for field:
+     * {@snippet lang=c :
+     * DLPackVersion version
+     * }
+     */
+    public static final long version$offset() {
+        return version$OFFSET;
+    }
+
+    /**
+     * Getter for field:
+     * {@snippet lang=c :
+     * DLPackVersion version
+     * }
+     */
+    public static MemorySegment version(MemorySegment struct) {
+        return struct.asSlice(version$OFFSET, version$LAYOUT.byteSize());
+    }
+
+    /**
+     * Setter for field:
+     * {@snippet lang=c :
+     * DLPackVersion version
+     * }
+     */
+    public static void version(MemorySegment struct, MemorySegment fieldValue) {
+        MemorySegment.copy(fieldValue, 0L, struct, version$OFFSET, version$LAYOUT.byteSize());
+    }
+
+    private static final AddressLayout manager_ctx$LAYOUT = (AddressLayout)$LAYOUT.select(groupElement("manager_ctx"));
+
+    /**
+     * Layout for field:
+     * {@snippet lang=c :
+     * void *manager_ctx
+     * }
+     */
+    public static final AddressLayout manager_ctx$layout() {
+        return manager_ctx$LAYOUT;
+    }
+
+    private static final long manager_ctx$OFFSET = 8;
+
+    /**
+     * Offset for field:
+     * {@snippet lang=c :
+     * void *manager_ctx
+     * }
+     */
+    public static final long manager_ctx$offset() {
+        return manager_ctx$OFFSET;
+    }
+
+    /**
+     * Getter for field:
+     * {@snippet lang=c :
+     * void *manager_ctx
+     * }
+     */
+    public static MemorySegment manager_ctx(MemorySegment struct) {
+        return struct.get(manager_ctx$LAYOUT, manager_ctx$OFFSET);
+    }
+
+    /**
+     * Setter for field:
+     * {@snippet lang=c :
+     * void *manager_ctx
+     * }
+     */
+    public static void manager_ctx(MemorySegment struct, MemorySegment fieldValue) {
+        struct.set(manager_ctx$LAYOUT, manager_ctx$OFFSET, fieldValue);
+    }
+
+    /**
+     * {@snippet lang=c :
+     * void (*deleter)(struct DLManagedTensorVersioned *)
+     * }
+     */
+    public static class deleter {
+
+        deleter() {
+            // Should not be called directly
+        }
+
+        /**
+         * The function pointer signature, expressed as a functional interface
+         */
+        public interface Function {
+            void apply(MemorySegment _x0);
+        }
+
+        private static final FunctionDescriptor $DESC = FunctionDescriptor.ofVoid(
+            DlpackH.C_POINTER
+        );
+
+        /**
+         * The descriptor of this function pointer
+         */
+        public static FunctionDescriptor descriptor() {
+            return $DESC;
+        }
+
+        private static final MethodHandle UP$MH = DlpackH.upcallHandle(deleter.Function.class, "apply", $DESC);
+
+        /**
+         * Allocates a new upcall stub, whose implementation is defined by {@code fi}.
+         * The lifetime of the returned segment is managed by {@code arena}
+         */
+        public static MemorySegment allocate(deleter.Function fi, Arena arena) {
+            return Linker.nativeLinker().upcallStub(UP$MH.bindTo(fi), $DESC, arena);
+        }
+
+        private static final MethodHandle DOWN$MH = Linker.nativeLinker().downcallHandle($DESC);
+
+        /**
+         * Invoke the upcall stub {@code funcPtr}, with given parameters
+         */
+        public static void invoke(MemorySegment funcPtr,MemorySegment _x0) {
+            try {
+                 DOWN$MH.invokeExact(funcPtr, _x0);
+            } catch (Throwable ex$) {
+                throw new AssertionError("should not reach here", ex$);
+            }
+        }
+    }
+
+    private static final AddressLayout deleter$LAYOUT = (AddressLayout)$LAYOUT.select(groupElement("deleter"));
+
+    /**
+     * Layout for field:
+     * {@snippet lang=c :
+     * void (*deleter)(struct DLManagedTensorVersioned *)
+     * }
+     */
+    public static final AddressLayout deleter$layout() {
+        return deleter$LAYOUT;
+    }
+
+    private static final long deleter$OFFSET = 16;
+
+    /**
+     * Offset for field:
+     * {@snippet lang=c :
+     * void (*deleter)(struct DLManagedTensorVersioned *)
+     * }
+     */
+    public static final long deleter$offset() {
+        return deleter$OFFSET;
+    }
+
+    /**
+     * Getter for field:
+     * {@snippet lang=c :
+     * void (*deleter)(struct DLManagedTensorVersioned *)
+     * }
+     */
+    public static MemorySegment deleter(MemorySegment struct) {
+        return struct.get(deleter$LAYOUT, deleter$OFFSET);
+    }
+
+    /**
+     * Setter for field:
+     * {@snippet lang=c :
+     * void (*deleter)(struct DLManagedTensorVersioned *)
+     * }
+     */
+    public static void deleter(MemorySegment struct, MemorySegment fieldValue) {
+        struct.set(deleter$LAYOUT, deleter$OFFSET, fieldValue);
+    }
+
+    private static final OfLong flags$LAYOUT = (OfLong)$LAYOUT.select(groupElement("flags"));
+
+    /**
+     * Layout for field:
+     * {@snippet lang=c :
+     * uint64_t flags
+     * }
+     */
+    public static final OfLong flags$layout() {
+        return flags$LAYOUT;
+    }
+
+    private static final long flags$OFFSET = 24;
+
+    /**
+     * Offset for field:
+     * {@snippet lang=c :
+     * uint64_t flags
+     * }
+     */
+    public static final long flags$offset() {
+        return flags$OFFSET;
+    }
+
+    /**
+     * Getter for field:
+     * {@snippet lang=c :
+     * uint64_t flags
+     * }
+     */
+    public static long flags(MemorySegment struct) {
+        return struct.get(flags$LAYOUT, flags$OFFSET);
+    }
+
+    /**
+     * Setter for field:
+     * {@snippet lang=c :
+     * uint64_t flags
+     * }
+     */
+    public static void flags(MemorySegment struct, long fieldValue) {
+        struct.set(flags$LAYOUT, flags$OFFSET, fieldValue);
+    }
+
+    private static final GroupLayout dl_tensor$LAYOUT = (GroupLayout)$LAYOUT.select(groupElement("dl_tensor"));
+
+    /**
+     * Layout for field:
+     * {@snippet lang=c :
+     * DLTensor dl_tensor
+     * }
+     */
+    public static final GroupLayout dl_tensor$layout() {
+        return dl_tensor$LAYOUT;
+    }
+
+    private static final long dl_tensor$OFFSET = 32;
+
+    /**
+     * Offset for field:
+     * {@snippet lang=c :
+     * DLTensor dl_tensor
+     * }
+     */
+    public static final long dl_tensor$offset() {
+        return dl_tensor$OFFSET;
+    }
+
+    /**
+     * Getter for field:
+     * {@snippet lang=c :
+     * DLTensor dl_tensor
+     * }
+     */
+    public static MemorySegment dl_tensor(MemorySegment struct) {
+        return struct.asSlice(dl_tensor$OFFSET, dl_tensor$LAYOUT.byteSize());
+    }
+
+    /**
+     * Setter for field:
+     * {@snippet lang=c :
+     * DLTensor dl_tensor
+     * }
+     */
+    public static void dl_tensor(MemorySegment struct, MemorySegment fieldValue) {
+        MemorySegment.copy(fieldValue, 0L, struct, dl_tensor$OFFSET, dl_tensor$LAYOUT.byteSize());
+    }
+
+    /**
+     * Obtains a slice of {@code arrayParam} which selects the array element at {@code index}.
+     * The returned segment has address {@code arrayParam.address() + index * layout().byteSize()}
+     */
+    public static MemorySegment asSlice(MemorySegment array, long index) {
+        return array.asSlice(layout().byteSize() * index);
+    }
+
+    /**
+     * The size (in bytes) of this struct
+     */
+    public static long sizeof() { return layout().byteSize(); }
+
+    /**
+     * Allocate a segment of size {@code layout().byteSize()} using {@code allocator}
+     */
+    public static MemorySegment allocate(SegmentAllocator allocator) {
+        return allocator.allocate(layout());
+    }
+
+    /**
+     * Allocate an array of size {@code elementCount} using {@code allocator}.
+     * The returned segment has size {@code elementCount * layout().byteSize()}.
+     */
+    public static MemorySegment allocateArray(long elementCount, SegmentAllocator allocator) {
+        return allocator.allocate(MemoryLayout.sequenceLayout(elementCount, layout()));
+    }
+
+    /**
+     * Reinterprets {@code addr} using target {@code arena} and {@code cleanupAction} (if any).
+     * The returned segment has size {@code layout().byteSize()}
+     */
+    public static MemorySegment reinterpret(MemorySegment addr, Arena arena, Consumer<MemorySegment> cleanup) {
+        return reinterpret(addr, 1, arena, cleanup);
+    }
+
+    /**
+     * Reinterprets {@code addr} using target {@code arena} and {@code cleanupAction} (if any).
+     * The returned segment has size {@code elementCount * layout().byteSize()}
+     */
+    public static MemorySegment reinterpret(MemorySegment addr, long elementCount, Arena arena, Consumer<MemorySegment> cleanup) {
+        return addr.reinterpret(layout().byteSize() * elementCount, arena, cleanup);
+    }
+}
diff --git a/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/DLPackVersion.java b/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/DLPackVersion.java
new file mode 100644
index 000000000..bc8050766
--- /dev/null
+++ b/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/DLPackVersion.java
@@ -0,0 +1,186 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.nvidia.cuvs.panama;
+
+import static java.lang.foreign.MemoryLayout.PathElement.groupElement;
+
+import java.lang.foreign.Arena;
+import java.lang.foreign.GroupLayout;
+import java.lang.foreign.MemoryLayout;
+import java.lang.foreign.MemorySegment;
+import java.lang.foreign.SegmentAllocator;
+import java.lang.foreign.ValueLayout.OfInt;
+import java.util.function.Consumer;
+
+/**
+ * {@snippet lang=c :
+ * struct {
+ *     uint32_t major;
+ *     uint32_t minor;
+ * }
+ * }
+ */
+public class DLPackVersion {
+
+    DLPackVersion() {
+        // Should not be called directly
+    }
+
+    private static final GroupLayout $LAYOUT = MemoryLayout.structLayout(
+        DlpackH.C_INT.withName("major"),
+        DlpackH.C_INT.withName("minor")
+    ).withName("$anon$61:9");
+
+    /**
+     * The layout of this struct
+     */
+    public static final GroupLayout layout() {
+        return $LAYOUT;
+    }
+
+    private static final OfInt major$LAYOUT = (OfInt)$LAYOUT.select(groupElement("major"));
+
+    /**
+     * Layout for field:
+     * {@snippet lang=c :
+     * uint32_t major
+     * }
+     */
+    public static final OfInt major$layout() {
+        return major$LAYOUT;
+    }
+
+    private static final long major$OFFSET = 0;
+
+    /**
+     * Offset for field:
+     * {@snippet lang=c :
+     * uint32_t major
+     * }
+     */
+    public static final long major$offset() {
+        return major$OFFSET;
+    }
+
+    /**
+     * Getter for field:
+     * {@snippet lang=c :
+     * uint32_t major
+     * }
+     */
+    public static int major(MemorySegment struct) {
+        return struct.get(major$LAYOUT, major$OFFSET);
+    }
+
+    /**
+     * Setter for field:
+     * {@snippet lang=c :
+     * uint32_t major
+     * }
+     */
+    public static void major(MemorySegment struct, int fieldValue) {
+        struct.set(major$LAYOUT, major$OFFSET, fieldValue);
+    }
+
+    private static final OfInt minor$LAYOUT = (OfInt)$LAYOUT.select(groupElement("minor"));
+
+    /**
+     * Layout for field:
+     * {@snippet lang=c :
+     * uint32_t minor
+     * }
+     */
+    public static final OfInt minor$layout() {
+        return minor$LAYOUT;
+    }
+
+    private static final long minor$OFFSET = 4;
+
+    /**
+     * Offset for field:
+     * {@snippet lang=c :
+     * uint32_t minor
+     * }
+     */
+    public static final long minor$offset() {
+        return minor$OFFSET;
+    }
+
+    /**
+     * Getter for field:
+     * {@snippet lang=c :
+     * uint32_t minor
+     * }
+     */
+    public static int minor(MemorySegment struct) {
+        return struct.get(minor$LAYOUT, minor$OFFSET);
+    }
+
+    /**
+     * Setter for field:
+     * {@snippet lang=c :
+     * uint32_t minor
+     * }
+     */
+    public static void minor(MemorySegment struct, int fieldValue) {
+        struct.set(minor$LAYOUT, minor$OFFSET, fieldValue);
+    }
+
+    /**
+     * Obtains a slice of {@code arrayParam} which selects the array element at {@code index}.
+     * The returned segment has address {@code arrayParam.address() + index * layout().byteSize()}
+     */
+    public static MemorySegment asSlice(MemorySegment array, long index) {
+        return array.asSlice(layout().byteSize() * index);
+    }
+
+    /**
+     * The size (in bytes) of this struct
+     */
+    public static long sizeof() { return layout().byteSize(); }
+
+    /**
+     * Allocate a segment of size {@code layout().byteSize()} using {@code allocator}
+     */
+    public static MemorySegment allocate(SegmentAllocator allocator) {
+        return allocator.allocate(layout());
+    }
+
+    /**
+     * Allocate an array of size {@code elementCount} using {@code allocator}.
+     * The returned segment has size {@code elementCount * layout().byteSize()}.
+     */
+    public static MemorySegment allocateArray(long elementCount, SegmentAllocator allocator) {
+        return allocator.allocate(MemoryLayout.sequenceLayout(elementCount, layout()));
+    }
+
+    /**
+     * Reinterprets {@code addr} using target {@code arena} and {@code cleanupAction} (if any).
+     * The returned segment has size {@code layout().byteSize()}
+     */
+    public static MemorySegment reinterpret(MemorySegment addr, Arena arena, Consumer<MemorySegment> cleanup) {
+        return reinterpret(addr, 1, arena, cleanup);
+    }
+
+    /**
+     * Reinterprets {@code addr} using target {@code arena} and {@code cleanupAction} (if any).
+     * The returned segment has size {@code elementCount * layout().byteSize()}
+     */
+    public static MemorySegment reinterpret(MemorySegment addr, long elementCount, Arena arena, Consumer<MemorySegment> cleanup) {
+        return addr.reinterpret(layout().byteSize() * elementCount, arena, cleanup);
+    }
+}
diff --git a/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/DLTensor.java b/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/DLTensor.java
new file mode 100644
index 000000000..63082b35a
--- /dev/null
+++ b/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/DLTensor.java
@@ -0,0 +1,418 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.nvidia.cuvs.panama;
+
+import static java.lang.foreign.MemoryLayout.PathElement.groupElement;
+
+import java.lang.foreign.AddressLayout;
+import java.lang.foreign.Arena;
+import java.lang.foreign.GroupLayout;
+import java.lang.foreign.MemoryLayout;
+import java.lang.foreign.MemorySegment;
+import java.lang.foreign.SegmentAllocator;
+import java.lang.foreign.ValueLayout.OfInt;
+import java.lang.foreign.ValueLayout.OfLong;
+import java.util.function.Consumer;
+
+/**
+ * {@snippet lang=c :
+ * struct {
+ *     void *data;
+ *     DLDevice device;
+ *     int32_t ndim;
+ *     DLDataType dtype;
+ *     int64_t *shape;
+ *     int64_t *strides;
+ *     uint64_t byte_offset;
+ * }
+ * }
+ */
+public class DLTensor {
+
+    DLTensor() {
+        // Should not be called directly
+    }
+
+    private static final GroupLayout $LAYOUT = MemoryLayout.structLayout(
+        DlpackH.C_POINTER.withName("data"),
+        DLDevice.layout().withName("device"),
+        DlpackH.C_INT.withName("ndim"),
+        DLDataType.layout().withName("dtype"),
+        DlpackH.C_POINTER.withName("shape"),
+        DlpackH.C_POINTER.withName("strides"),
+        DlpackH.C_LONG.withName("byte_offset")
+    ).withName("$anon$192:9");
+
+    /**
+     * The layout of this struct
+     */
+    public static final GroupLayout layout() {
+        return $LAYOUT;
+    }
+
+    private static final AddressLayout data$LAYOUT = (AddressLayout)$LAYOUT.select(groupElement("data"));
+
+    /**
+     * Layout for field:
+     * {@snippet lang=c :
+     * void *data
+     * }
+     */
+    public static final AddressLayout data$layout() {
+        return data$LAYOUT;
+    }
+
+    private static final long data$OFFSET = 0;
+
+    /**
+     * Offset for field:
+     * {@snippet lang=c :
+     * void *data
+     * }
+     */
+    public static final long data$offset() {
+        return data$OFFSET;
+    }
+
+    /**
+     * Getter for field:
+     * {@snippet lang=c :
+     * void *data
+     * }
+     */
+    public static MemorySegment data(MemorySegment struct) {
+        return struct.get(data$LAYOUT, data$OFFSET);
+    }
+
+    /**
+     * Setter for field:
+     * {@snippet lang=c :
+     * void *data
+     * }
+     */
+    public static void data(MemorySegment struct, MemorySegment fieldValue) {
+        struct.set(data$LAYOUT, data$OFFSET, fieldValue);
+    }
+
+    private static final GroupLayout device$LAYOUT = (GroupLayout)$LAYOUT.select(groupElement("device"));
+
+    /**
+     * Layout for field:
+     * {@snippet lang=c :
+     * DLDevice device
+     * }
+     */
+    public static final GroupLayout device$layout() {
+        return device$LAYOUT;
+    }
+
+    private static final long device$OFFSET = 8;
+
+    /**
+     * Offset for field:
+     * {@snippet lang=c :
+     * DLDevice device
+     * }
+     */
+    public static final long device$offset() {
+        return device$OFFSET;
+    }
+
+    /**
+     * Getter for field:
+     * {@snippet lang=c :
+     * DLDevice device
+     * }
+     */
+    public static MemorySegment device(MemorySegment struct) {
+        return struct.asSlice(device$OFFSET, device$LAYOUT.byteSize());
+    }
+
+    /**
+     * Setter for field:
+     * {@snippet lang=c :
+     * DLDevice device
+     * }
+     */
+    public static void device(MemorySegment struct, MemorySegment fieldValue) {
+        MemorySegment.copy(fieldValue, 0L, struct, device$OFFSET, device$LAYOUT.byteSize());
+    }
+
+    private static final OfInt ndim$LAYOUT = (OfInt)$LAYOUT.select(groupElement("ndim"));
+
+    /**
+     * Layout for field:
+     * {@snippet lang=c :
+     * int32_t ndim
+     * }
+     */
+    public static final OfInt ndim$layout() {
+        return ndim$LAYOUT;
+    }
+
+    private static final long ndim$OFFSET = 16;
+
+    /**
+     * Offset for field:
+     * {@snippet lang=c :
+     * int32_t ndim
+     * }
+     */
+    public static final long ndim$offset() {
+        return ndim$OFFSET;
+    }
+
+    /**
+     * Getter for field:
+     * {@snippet lang=c :
+     * int32_t ndim
+     * }
+     */
+    public static int ndim(MemorySegment struct) {
+        return struct.get(ndim$LAYOUT, ndim$OFFSET);
+    }
+
+    /**
+     * Setter for field:
+     * {@snippet lang=c :
+     * int32_t ndim
+     * }
+     */
+    public static void ndim(MemorySegment struct, int fieldValue) {
+        struct.set(ndim$LAYOUT, ndim$OFFSET, fieldValue);
+    }
+
+    private static final GroupLayout dtype$LAYOUT = (GroupLayout)$LAYOUT.select(groupElement("dtype"));
+
+    /**
+     * Layout for field:
+     * {@snippet lang=c :
+     * DLDataType dtype
+     * }
+     */
+    public static final GroupLayout dtype$layout() {
+        return dtype$LAYOUT;
+    }
+
+    private static final long dtype$OFFSET = 20;
+
+    /**
+     * Offset for field:
+     * {@snippet lang=c :
+     * DLDataType dtype
+     * }
+     */
+    public static final long dtype$offset() {
+        return dtype$OFFSET;
+    }
+
+    /**
+     * Getter for field:
+     * {@snippet lang=c :
+     * DLDataType dtype
+     * }
+     */
+    public static MemorySegment dtype(MemorySegment struct) {
+        return struct.asSlice(dtype$OFFSET, dtype$LAYOUT.byteSize());
+    }
+
+    /**
+     * Setter for field:
+     * {@snippet lang=c :
+     * DLDataType dtype
+     * }
+     */
+    public static void dtype(MemorySegment struct, MemorySegment fieldValue) {
+        MemorySegment.copy(fieldValue, 0L, struct, dtype$OFFSET, dtype$LAYOUT.byteSize());
+    }
+
+    private static final AddressLayout shape$LAYOUT = (AddressLayout)$LAYOUT.select(groupElement("shape"));
+
+    /**
+     * Layout for field:
+     * {@snippet lang=c :
+     * int64_t *shape
+     * }
+     */
+    public static final AddressLayout shape$layout() {
+        return shape$LAYOUT;
+    }
+
+    private static final long shape$OFFSET = 24;
+
+    /**
+     * Offset for field:
+     * {@snippet lang=c :
+     * int64_t *shape
+     * }
+     */
+    public static final long shape$offset() {
+        return shape$OFFSET;
+    }
+
+    /**
+     * Getter for field:
+     * {@snippet lang=c :
+     * int64_t *shape
+     * }
+     */
+    public static MemorySegment shape(MemorySegment struct) {
+        return struct.get(shape$LAYOUT, shape$OFFSET);
+    }
+
+    /**
+     * Setter for field:
+     * {@snippet lang=c :
+     * int64_t *shape
+     * }
+     */
+    public static void shape(MemorySegment struct, MemorySegment fieldValue) {
+        struct.set(shape$LAYOUT, shape$OFFSET, fieldValue);
+    }
+
+    private static final AddressLayout strides$LAYOUT = (AddressLayout)$LAYOUT.select(groupElement("strides"));
+
+    /**
+     * Layout for field:
+     * {@snippet lang=c :
+     * int64_t *strides
+     * }
+     */
+    public static final AddressLayout strides$layout() {
+        return strides$LAYOUT;
+    }
+
+    private static final long strides$OFFSET = 32;
+
+    /**
+     * Offset for field:
+     * {@snippet lang=c :
+     * int64_t *strides
+     * }
+     */
+    public static final long strides$offset() {
+        return strides$OFFSET;
+    }
+
+    /**
+     * Getter for field:
+     * {@snippet lang=c :
+     * int64_t *strides
+     * }
+     */
+    public static MemorySegment strides(MemorySegment struct) {
+        return struct.get(strides$LAYOUT, strides$OFFSET);
+    }
+
+    /**
+     * Setter for field:
+     * {@snippet lang=c :
+     * int64_t *strides
+     * }
+     */
+    public static void strides(MemorySegment struct, MemorySegment fieldValue) {
+        struct.set(strides$LAYOUT, strides$OFFSET, fieldValue);
+    }
+
+    private static final OfLong byte_offset$LAYOUT = (OfLong)$LAYOUT.select(groupElement("byte_offset"));
+
+    /**
+     * Layout for field:
+     * {@snippet lang=c :
+     * uint64_t byte_offset
+     * }
+     */
+    public static final OfLong byte_offset$layout() {
+        return byte_offset$LAYOUT;
+    }
+
+    private static final long byte_offset$OFFSET = 40;
+
+    /**
+     * Offset for field:
+     * {@snippet lang=c :
+     * uint64_t byte_offset
+     * }
+     */
+    public static final long byte_offset$offset() {
+        return byte_offset$OFFSET;
+    }
+
+    /**
+     * Getter for field:
+     * {@snippet lang=c :
+     * uint64_t byte_offset
+     * }
+     */
+    public static long byte_offset(MemorySegment struct) {
+        return struct.get(byte_offset$LAYOUT, byte_offset$OFFSET);
+    }
+
+    /**
+     * Setter for field:
+     * {@snippet lang=c :
+     * uint64_t byte_offset
+     * }
+     */
+    public static void byte_offset(MemorySegment struct, long fieldValue) {
+        struct.set(byte_offset$LAYOUT, byte_offset$OFFSET, fieldValue);
+    }
+
+    /**
+     * Obtains a slice of {@code arrayParam} which selects the array element at {@code index}.
+     * The returned segment has address {@code arrayParam.address() + index * layout().byteSize()}
+     */
+    public static MemorySegment asSlice(MemorySegment array, long index) {
+        return array.asSlice(layout().byteSize() * index);
+    }
+
+    /**
+     * The size (in bytes) of this struct
+     */
+    public static long sizeof() { return layout().byteSize(); }
+
+    /**
+     * Allocate a segment of size {@code layout().byteSize()} using {@code allocator}
+     */
+    public static MemorySegment allocate(SegmentAllocator allocator) {
+        return allocator.allocate(layout());
+    }
+
+    /**
+     * Allocate an array of size {@code elementCount} using {@code allocator}.
+     * The returned segment has size {@code elementCount * layout().byteSize()}.
+     */
+    public static MemorySegment allocateArray(long elementCount, SegmentAllocator allocator) {
+        return allocator.allocate(MemoryLayout.sequenceLayout(elementCount, layout()));
+    }
+
+    /**
+     * Reinterprets {@code addr} using target {@code arena} and {@code cleanupAction} (if any).
+     * The returned segment has size {@code layout().byteSize()}
+     */
+    public static MemorySegment reinterpret(MemorySegment addr, Arena arena, Consumer<MemorySegment> cleanup) {
+        return reinterpret(addr, 1, arena, cleanup);
+    }
+
+    /**
+     * Reinterprets {@code addr} using target {@code arena} and {@code cleanupAction} (if any).
+     * The returned segment has size {@code elementCount * layout().byteSize()}
+     */
+    public static MemorySegment reinterpret(MemorySegment addr, long elementCount, Arena arena, Consumer<MemorySegment> cleanup) {
+        return addr.reinterpret(layout().byteSize() * elementCount, arena, cleanup);
+    }
+}
diff --git a/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/DistanceH.java b/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/DistanceH.java
new file mode 100644
index 000000000..3949e1089
--- /dev/null
+++ b/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/DistanceH.java
@@ -0,0 +1,274 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.nvidia.cuvs.panama;
+
+import java.lang.invoke.*;
+import java.lang.foreign.*;
+import java.nio.ByteOrder;
+import java.util.*;
+import java.util.function.*;
+import java.util.stream.*;
+
+import static java.lang.foreign.ValueLayout.*;
+import static java.lang.foreign.MemoryLayout.PathElement.*;
+
+public class DistanceH {
+
+    DistanceH() {
+        // Should not be called directly
+    }
+
+    static final Arena LIBRARY_ARENA = Arena.ofAuto();
+    static final boolean TRACE_DOWNCALLS = Boolean.getBoolean("jextract.trace.downcalls");
+
+    static void traceDowncall(String name, Object... args) {
+         String traceArgs = Arrays.stream(args)
+                       .map(Object::toString)
+                       .collect(Collectors.joining(", "));
+         System.out.printf("%s(%s)\n", name, traceArgs);
+    }
+
+    static MemorySegment findOrThrow(String symbol) {
+        return SYMBOL_LOOKUP.find(symbol)
+            .orElseThrow(() -> new UnsatisfiedLinkError("unresolved symbol: " + symbol));
+    }
+
+    static MethodHandle upcallHandle(Class<?> fi, String name, FunctionDescriptor fdesc) {
+        try {
+            return MethodHandles.lookup().findVirtual(fi, name, fdesc.toMethodType());
+        } catch (ReflectiveOperationException ex) {
+            throw new AssertionError(ex);
+        }
+    }
+
+    static MemoryLayout align(MemoryLayout layout, long align) {
+        return switch (layout) {
+            case PaddingLayout p -> p;
+            case ValueLayout v -> v.withByteAlignment(align);
+            case GroupLayout g -> {
+                MemoryLayout[] alignedMembers = g.memberLayouts().stream()
+                        .map(m -> align(m, align)).toArray(MemoryLayout[]::new);
+                yield g instanceof StructLayout ?
+                        MemoryLayout.structLayout(alignedMembers) : MemoryLayout.unionLayout(alignedMembers);
+            }
+            case SequenceLayout s -> MemoryLayout.sequenceLayout(s.elementCount(), align(s.elementLayout(), align));
+        };
+    }
+
+    static final SymbolLookup SYMBOL_LOOKUP = SymbolLookup.loaderLookup()
+            .or(Linker.nativeLinker().defaultLookup());
+
+    public static final ValueLayout.OfBoolean C_BOOL = ValueLayout.JAVA_BOOLEAN;
+    public static final ValueLayout.OfByte C_CHAR = ValueLayout.JAVA_BYTE;
+    public static final ValueLayout.OfShort C_SHORT = ValueLayout.JAVA_SHORT;
+    public static final ValueLayout.OfInt C_INT = ValueLayout.JAVA_INT;
+    public static final ValueLayout.OfLong C_LONG_LONG = ValueLayout.JAVA_LONG;
+    public static final ValueLayout.OfFloat C_FLOAT = ValueLayout.JAVA_FLOAT;
+    public static final ValueLayout.OfDouble C_DOUBLE = ValueLayout.JAVA_DOUBLE;
+    public static final AddressLayout C_POINTER = ValueLayout.ADDRESS
+            .withTargetLayout(MemoryLayout.sequenceLayout(java.lang.Long.MAX_VALUE, JAVA_BYTE));
+    public static final ValueLayout.OfLong C_LONG = ValueLayout.JAVA_LONG;
+    private static final int L2Expanded = (int)0L;
+    /**
+     * {@snippet lang=c :
+     * enum <anonymous>.L2Expanded = 0
+     * }
+     */
+    public static int L2Expanded() {
+        return L2Expanded;
+    }
+    private static final int L2SqrtExpanded = (int)1L;
+    /**
+     * {@snippet lang=c :
+     * enum <anonymous>.L2SqrtExpanded = 1
+     * }
+     */
+    public static int L2SqrtExpanded() {
+        return L2SqrtExpanded;
+    }
+    private static final int CosineExpanded = (int)2L;
+    /**
+     * {@snippet lang=c :
+     * enum <anonymous>.CosineExpanded = 2
+     * }
+     */
+    public static int CosineExpanded() {
+        return CosineExpanded;
+    }
+    private static final int L1 = (int)3L;
+    /**
+     * {@snippet lang=c :
+     * enum <anonymous>.L1 = 3
+     * }
+     */
+    public static int L1() {
+        return L1;
+    }
+    private static final int L2Unexpanded = (int)4L;
+    /**
+     * {@snippet lang=c :
+     * enum <anonymous>.L2Unexpanded = 4
+     * }
+     */
+    public static int L2Unexpanded() {
+        return L2Unexpanded;
+    }
+    private static final int L2SqrtUnexpanded = (int)5L;
+    /**
+     * {@snippet lang=c :
+     * enum <anonymous>.L2SqrtUnexpanded = 5
+     * }
+     */
+    public static int L2SqrtUnexpanded() {
+        return L2SqrtUnexpanded;
+    }
+    private static final int InnerProduct = (int)6L;
+    /**
+     * {@snippet lang=c :
+     * enum <anonymous>.InnerProduct = 6
+     * }
+     */
+    public static int InnerProduct() {
+        return InnerProduct;
+    }
+    private static final int Linf = (int)7L;
+    /**
+     * {@snippet lang=c :
+     * enum <anonymous>.Linf = 7
+     * }
+     */
+    public static int Linf() {
+        return Linf;
+    }
+    private static final int Canberra = (int)8L;
+    /**
+     * {@snippet lang=c :
+     * enum <anonymous>.Canberra = 8
+     * }
+     */
+    public static int Canberra() {
+        return Canberra;
+    }
+    private static final int LpUnexpanded = (int)9L;
+    /**
+     * {@snippet lang=c :
+     * enum <anonymous>.LpUnexpanded = 9
+     * }
+     */
+    public static int LpUnexpanded() {
+        return LpUnexpanded;
+    }
+    private static final int CorrelationExpanded = (int)10L;
+    /**
+     * {@snippet lang=c :
+     * enum <anonymous>.CorrelationExpanded = 10
+     * }
+     */
+    public static int CorrelationExpanded() {
+        return CorrelationExpanded;
+    }
+    private static final int JaccardExpanded = (int)11L;
+    /**
+     * {@snippet lang=c :
+     * enum <anonymous>.JaccardExpanded = 11
+     * }
+     */
+    public static int JaccardExpanded() {
+        return JaccardExpanded;
+    }
+    private static final int HellingerExpanded = (int)12L;
+    /**
+     * {@snippet lang=c :
+     * enum <anonymous>.HellingerExpanded = 12
+     * }
+     */
+    public static int HellingerExpanded() {
+        return HellingerExpanded;
+    }
+    private static final int Haversine = (int)13L;
+    /**
+     * {@snippet lang=c :
+     * enum <anonymous>.Haversine = 13
+     * }
+     */
+    public static int Haversine() {
+        return Haversine;
+    }
+    private static final int BrayCurtis = (int)14L;
+    /**
+     * {@snippet lang=c :
+     * enum <anonymous>.BrayCurtis = 14
+     * }
+     */
+    public static int BrayCurtis() {
+        return BrayCurtis;
+    }
+    private static final int JensenShannon = (int)15L;
+    /**
+     * {@snippet lang=c :
+     * enum <anonymous>.JensenShannon = 15
+     * }
+     */
+    public static int JensenShannon() {
+        return JensenShannon;
+    }
+    private static final int HammingUnexpanded = (int)16L;
+    /**
+     * {@snippet lang=c :
+     * enum <anonymous>.HammingUnexpanded = 16
+     * }
+     */
+    public static int HammingUnexpanded() {
+        return HammingUnexpanded;
+    }
+    private static final int KLDivergence = (int)17L;
+    /**
+     * {@snippet lang=c :
+     * enum <anonymous>.KLDivergence = 17
+     * }
+     */
+    public static int KLDivergence() {
+        return KLDivergence;
+    }
+    private static final int RusselRaoExpanded = (int)18L;
+    /**
+     * {@snippet lang=c :
+     * enum <anonymous>.RusselRaoExpanded = 18
+     * }
+     */
+    public static int RusselRaoExpanded() {
+        return RusselRaoExpanded;
+    }
+    private static final int DiceExpanded = (int)19L;
+    /**
+     * {@snippet lang=c :
+     * enum <anonymous>.DiceExpanded = 19
+     * }
+     */
+    public static int DiceExpanded() {
+        return DiceExpanded;
+    }
+    private static final int Precomputed = (int)100L;
+    /**
+     * {@snippet lang=c :
+     * enum <anonymous>.Precomputed = 100
+     * }
+     */
+    public static int Precomputed() {
+        return Precomputed;
+    }
+}
diff --git a/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/DlpackH.java b/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/DlpackH.java
new file mode 100644
index 000000000..d459dd3bf
--- /dev/null
+++ b/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/DlpackH.java
@@ -0,0 +1,1898 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.nvidia.cuvs.panama;
+
+import static java.lang.foreign.ValueLayout.JAVA_BYTE;
+
+import java.lang.foreign.AddressLayout;
+import java.lang.foreign.Arena;
+import java.lang.foreign.FunctionDescriptor;
+import java.lang.foreign.GroupLayout;
+import java.lang.foreign.Linker;
+import java.lang.foreign.MemoryLayout;
+import java.lang.foreign.MemorySegment;
+import java.lang.foreign.PaddingLayout;
+import java.lang.foreign.SequenceLayout;
+import java.lang.foreign.StructLayout;
+import java.lang.foreign.SymbolLookup;
+import java.lang.foreign.ValueLayout;
+import java.lang.foreign.ValueLayout.OfByte;
+import java.lang.foreign.ValueLayout.OfInt;
+import java.lang.foreign.ValueLayout.OfLong;
+import java.lang.foreign.ValueLayout.OfShort;
+import java.lang.invoke.MethodHandle;
+import java.lang.invoke.MethodHandles;
+import java.util.Arrays;
+import java.util.stream.Collectors;
+
+public class DlpackH {
+
+    DlpackH() {
+        // Should not be called directly
+    }
+
+    static final Arena LIBRARY_ARENA = Arena.ofAuto();
+    static final boolean TRACE_DOWNCALLS = Boolean.getBoolean("jextract.trace.downcalls");
+
+    static void traceDowncall(String name, Object... args) {
+         String traceArgs = Arrays.stream(args)
+                       .map(Object::toString)
+                       .collect(Collectors.joining(", "));
+         System.out.printf("%s(%s)\n", name, traceArgs);
+    }
+
+    static MemorySegment findOrThrow(String symbol) {
+        return SYMBOL_LOOKUP.find(symbol)
+            .orElseThrow(() -> new UnsatisfiedLinkError("unresolved symbol: " + symbol));
+    }
+
+    static MethodHandle upcallHandle(Class<?> fi, String name, FunctionDescriptor fdesc) {
+        try {
+            return MethodHandles.lookup().findVirtual(fi, name, fdesc.toMethodType());
+        } catch (ReflectiveOperationException ex) {
+            throw new AssertionError(ex);
+        }
+    }
+
+    static MemoryLayout align(MemoryLayout layout, long align) {
+        return switch (layout) {
+            case PaddingLayout p -> p;
+            case ValueLayout v -> v.withByteAlignment(align);
+            case GroupLayout g -> {
+                MemoryLayout[] alignedMembers = g.memberLayouts().stream()
+                        .map(m -> align(m, align)).toArray(MemoryLayout[]::new);
+                yield g instanceof StructLayout ?
+                        MemoryLayout.structLayout(alignedMembers) : MemoryLayout.unionLayout(alignedMembers);
+            }
+            case SequenceLayout s -> MemoryLayout.sequenceLayout(s.elementCount(), align(s.elementLayout(), align));
+        };
+    }
+
+    static final SymbolLookup SYMBOL_LOOKUP = SymbolLookup.loaderLookup()
+            .or(Linker.nativeLinker().defaultLookup());
+
+    public static final ValueLayout.OfBoolean C_BOOL = ValueLayout.JAVA_BOOLEAN;
+    public static final ValueLayout.OfByte C_CHAR = ValueLayout.JAVA_BYTE;
+    public static final ValueLayout.OfShort C_SHORT = ValueLayout.JAVA_SHORT;
+    public static final ValueLayout.OfInt C_INT = ValueLayout.JAVA_INT;
+    public static final ValueLayout.OfLong C_LONG_LONG = ValueLayout.JAVA_LONG;
+    public static final ValueLayout.OfFloat C_FLOAT = ValueLayout.JAVA_FLOAT;
+    public static final ValueLayout.OfDouble C_DOUBLE = ValueLayout.JAVA_DOUBLE;
+    public static final AddressLayout C_POINTER = ValueLayout.ADDRESS
+            .withTargetLayout(MemoryLayout.sequenceLayout(java.lang.Long.MAX_VALUE, JAVA_BYTE));
+    public static final ValueLayout.OfLong C_LONG = ValueLayout.JAVA_LONG;
+    private static final int DLPACK_MAJOR_VERSION = (int)1L;
+    /**
+     * {@snippet lang=c :
+     * #define DLPACK_MAJOR_VERSION 1
+     * }
+     */
+    public static int DLPACK_MAJOR_VERSION() {
+        return DLPACK_MAJOR_VERSION;
+    }
+    private static final int DLPACK_MINOR_VERSION = (int)0L;
+    /**
+     * {@snippet lang=c :
+     * #define DLPACK_MINOR_VERSION 0
+     * }
+     */
+    public static int DLPACK_MINOR_VERSION() {
+        return DLPACK_MINOR_VERSION;
+    }
+    private static final int _STDINT_H = (int)1L;
+    /**
+     * {@snippet lang=c :
+     * #define _STDINT_H 1
+     * }
+     */
+    public static int _STDINT_H() {
+        return _STDINT_H;
+    }
+    private static final int _FEATURES_H = (int)1L;
+    /**
+     * {@snippet lang=c :
+     * #define _FEATURES_H 1
+     * }
+     */
+    public static int _FEATURES_H() {
+        return _FEATURES_H;
+    }
+    private static final int _DEFAULT_SOURCE = (int)1L;
+    /**
+     * {@snippet lang=c :
+     * #define _DEFAULT_SOURCE 1
+     * }
+     */
+    public static int _DEFAULT_SOURCE() {
+        return _DEFAULT_SOURCE;
+    }
+    private static final int __GLIBC_USE_ISOC2X = (int)0L;
+    /**
+     * {@snippet lang=c :
+     * #define __GLIBC_USE_ISOC2X 0
+     * }
+     */
+    public static int __GLIBC_USE_ISOC2X() {
+        return __GLIBC_USE_ISOC2X;
+    }
+    private static final int __USE_ISOC11 = (int)1L;
+    /**
+     * {@snippet lang=c :
+     * #define __USE_ISOC11 1
+     * }
+     */
+    public static int __USE_ISOC11() {
+        return __USE_ISOC11;
+    }
+    private static final int __USE_ISOC99 = (int)1L;
+    /**
+     * {@snippet lang=c :
+     * #define __USE_ISOC99 1
+     * }
+     */
+    public static int __USE_ISOC99() {
+        return __USE_ISOC99;
+    }
+    private static final int __USE_ISOC95 = (int)1L;
+    /**
+     * {@snippet lang=c :
+     * #define __USE_ISOC95 1
+     * }
+     */
+    public static int __USE_ISOC95() {
+        return __USE_ISOC95;
+    }
+    private static final int __USE_POSIX_IMPLICITLY = (int)1L;
+    /**
+     * {@snippet lang=c :
+     * #define __USE_POSIX_IMPLICITLY 1
+     * }
+     */
+    public static int __USE_POSIX_IMPLICITLY() {
+        return __USE_POSIX_IMPLICITLY;
+    }
+    private static final int _POSIX_SOURCE = (int)1L;
+    /**
+     * {@snippet lang=c :
+     * #define _POSIX_SOURCE 1
+     * }
+     */
+    public static int _POSIX_SOURCE() {
+        return _POSIX_SOURCE;
+    }
+    private static final int __USE_POSIX = (int)1L;
+    /**
+     * {@snippet lang=c :
+     * #define __USE_POSIX 1
+     * }
+     */
+    public static int __USE_POSIX() {
+        return __USE_POSIX;
+    }
+    private static final int __USE_POSIX2 = (int)1L;
+    /**
+     * {@snippet lang=c :
+     * #define __USE_POSIX2 1
+     * }
+     */
+    public static int __USE_POSIX2() {
+        return __USE_POSIX2;
+    }
+    private static final int __USE_POSIX199309 = (int)1L;
+    /**
+     * {@snippet lang=c :
+     * #define __USE_POSIX199309 1
+     * }
+     */
+    public static int __USE_POSIX199309() {
+        return __USE_POSIX199309;
+    }
+    private static final int __USE_POSIX199506 = (int)1L;
+    /**
+     * {@snippet lang=c :
+     * #define __USE_POSIX199506 1
+     * }
+     */
+    public static int __USE_POSIX199506() {
+        return __USE_POSIX199506;
+    }
+    private static final int __USE_XOPEN2K = (int)1L;
+    /**
+     * {@snippet lang=c :
+     * #define __USE_XOPEN2K 1
+     * }
+     */
+    public static int __USE_XOPEN2K() {
+        return __USE_XOPEN2K;
+    }
+    private static final int __USE_XOPEN2K8 = (int)1L;
+    /**
+     * {@snippet lang=c :
+     * #define __USE_XOPEN2K8 1
+     * }
+     */
+    public static int __USE_XOPEN2K8() {
+        return __USE_XOPEN2K8;
+    }
+    private static final int _ATFILE_SOURCE = (int)1L;
+    /**
+     * {@snippet lang=c :
+     * #define _ATFILE_SOURCE 1
+     * }
+     */
+    public static int _ATFILE_SOURCE() {
+        return _ATFILE_SOURCE;
+    }
+    private static final int __WORDSIZE = (int)64L;
+    /**
+     * {@snippet lang=c :
+     * #define __WORDSIZE 64
+     * }
+     */
+    public static int __WORDSIZE() {
+        return __WORDSIZE;
+    }
+    private static final int __WORDSIZE_TIME64_COMPAT32 = (int)1L;
+    /**
+     * {@snippet lang=c :
+     * #define __WORDSIZE_TIME64_COMPAT32 1
+     * }
+     */
+    public static int __WORDSIZE_TIME64_COMPAT32() {
+        return __WORDSIZE_TIME64_COMPAT32;
+    }
+    private static final int __SYSCALL_WORDSIZE = (int)64L;
+    /**
+     * {@snippet lang=c :
+     * #define __SYSCALL_WORDSIZE 64
+     * }
+     */
+    public static int __SYSCALL_WORDSIZE() {
+        return __SYSCALL_WORDSIZE;
+    }
+    private static final int __USE_MISC = (int)1L;
+    /**
+     * {@snippet lang=c :
+     * #define __USE_MISC 1
+     * }
+     */
+    public static int __USE_MISC() {
+        return __USE_MISC;
+    }
+    private static final int __USE_ATFILE = (int)1L;
+    /**
+     * {@snippet lang=c :
+     * #define __USE_ATFILE 1
+     * }
+     */
+    public static int __USE_ATFILE() {
+        return __USE_ATFILE;
+    }
+    private static final int __USE_FORTIFY_LEVEL = (int)0L;
+    /**
+     * {@snippet lang=c :
+     * #define __USE_FORTIFY_LEVEL 0
+     * }
+     */
+    public static int __USE_FORTIFY_LEVEL() {
+        return __USE_FORTIFY_LEVEL;
+    }
+    private static final int __GLIBC_USE_DEPRECATED_GETS = (int)0L;
+    /**
+     * {@snippet lang=c :
+     * #define __GLIBC_USE_DEPRECATED_GETS 0
+     * }
+     */
+    public static int __GLIBC_USE_DEPRECATED_GETS() {
+        return __GLIBC_USE_DEPRECATED_GETS;
+    }
+    private static final int __GLIBC_USE_DEPRECATED_SCANF = (int)0L;
+    /**
+     * {@snippet lang=c :
+     * #define __GLIBC_USE_DEPRECATED_SCANF 0
+     * }
+     */
+    public static int __GLIBC_USE_DEPRECATED_SCANF() {
+        return __GLIBC_USE_DEPRECATED_SCANF;
+    }
+    private static final int _STDC_PREDEF_H = (int)1L;
+    /**
+     * {@snippet lang=c :
+     * #define _STDC_PREDEF_H 1
+     * }
+     */
+    public static int _STDC_PREDEF_H() {
+        return _STDC_PREDEF_H;
+    }
+    private static final int __STDC_IEC_559__ = (int)1L;
+    /**
+     * {@snippet lang=c :
+     * #define __STDC_IEC_559__ 1
+     * }
+     */
+    public static int __STDC_IEC_559__() {
+        return __STDC_IEC_559__;
+    }
+    private static final int __STDC_IEC_559_COMPLEX__ = (int)1L;
+    /**
+     * {@snippet lang=c :
+     * #define __STDC_IEC_559_COMPLEX__ 1
+     * }
+     */
+    public static int __STDC_IEC_559_COMPLEX__() {
+        return __STDC_IEC_559_COMPLEX__;
+    }
+    private static final int __GNU_LIBRARY__ = (int)6L;
+    /**
+     * {@snippet lang=c :
+     * #define __GNU_LIBRARY__ 6
+     * }
+     */
+    public static int __GNU_LIBRARY__() {
+        return __GNU_LIBRARY__;
+    }
+    private static final int __GLIBC__ = (int)2L;
+    /**
+     * {@snippet lang=c :
+     * #define __GLIBC__ 2
+     * }
+     */
+    public static int __GLIBC__() {
+        return __GLIBC__;
+    }
+    private static final int __GLIBC_MINOR__ = (int)35L;
+    /**
+     * {@snippet lang=c :
+     * #define __GLIBC_MINOR__ 35
+     * }
+     */
+    public static int __GLIBC_MINOR__() {
+        return __GLIBC_MINOR__;
+    }
+    private static final int _SYS_CDEFS_H = (int)1L;
+    /**
+     * {@snippet lang=c :
+     * #define _SYS_CDEFS_H 1
+     * }
+     */
+    public static int _SYS_CDEFS_H() {
+        return _SYS_CDEFS_H;
+    }
+    private static final int __glibc_c99_flexarr_available = (int)1L;
+    /**
+     * {@snippet lang=c :
+     * #define __glibc_c99_flexarr_available 1
+     * }
+     */
+    public static int __glibc_c99_flexarr_available() {
+        return __glibc_c99_flexarr_available;
+    }
+    private static final int __LDOUBLE_REDIRECTS_TO_FLOAT128_ABI = (int)0L;
+    /**
+     * {@snippet lang=c :
+     * #define __LDOUBLE_REDIRECTS_TO_FLOAT128_ABI 0
+     * }
+     */
+    public static int __LDOUBLE_REDIRECTS_TO_FLOAT128_ABI() {
+        return __LDOUBLE_REDIRECTS_TO_FLOAT128_ABI;
+    }
+    private static final int __HAVE_GENERIC_SELECTION = (int)1L;
+    /**
+     * {@snippet lang=c :
+     * #define __HAVE_GENERIC_SELECTION 1
+     * }
+     */
+    public static int __HAVE_GENERIC_SELECTION() {
+        return __HAVE_GENERIC_SELECTION;
+    }
+    private static final int __GLIBC_USE_LIB_EXT2 = (int)0L;
+    /**
+     * {@snippet lang=c :
+     * #define __GLIBC_USE_LIB_EXT2 0
+     * }
+     */
+    public static int __GLIBC_USE_LIB_EXT2() {
+        return __GLIBC_USE_LIB_EXT2;
+    }
+    private static final int __GLIBC_USE_IEC_60559_BFP_EXT = (int)0L;
+    /**
+     * {@snippet lang=c :
+     * #define __GLIBC_USE_IEC_60559_BFP_EXT 0
+     * }
+     */
+    public static int __GLIBC_USE_IEC_60559_BFP_EXT() {
+        return __GLIBC_USE_IEC_60559_BFP_EXT;
+    }
+    private static final int __GLIBC_USE_IEC_60559_BFP_EXT_C2X = (int)0L;
+    /**
+     * {@snippet lang=c :
+     * #define __GLIBC_USE_IEC_60559_BFP_EXT_C2X 0
+     * }
+     */
+    public static int __GLIBC_USE_IEC_60559_BFP_EXT_C2X() {
+        return __GLIBC_USE_IEC_60559_BFP_EXT_C2X;
+    }
+    private static final int __GLIBC_USE_IEC_60559_EXT = (int)0L;
+    /**
+     * {@snippet lang=c :
+     * #define __GLIBC_USE_IEC_60559_EXT 0
+     * }
+     */
+    public static int __GLIBC_USE_IEC_60559_EXT() {
+        return __GLIBC_USE_IEC_60559_EXT;
+    }
+    private static final int __GLIBC_USE_IEC_60559_FUNCS_EXT = (int)0L;
+    /**
+     * {@snippet lang=c :
+     * #define __GLIBC_USE_IEC_60559_FUNCS_EXT 0
+     * }
+     */
+    public static int __GLIBC_USE_IEC_60559_FUNCS_EXT() {
+        return __GLIBC_USE_IEC_60559_FUNCS_EXT;
+    }
+    private static final int __GLIBC_USE_IEC_60559_FUNCS_EXT_C2X = (int)0L;
+    /**
+     * {@snippet lang=c :
+     * #define __GLIBC_USE_IEC_60559_FUNCS_EXT_C2X 0
+     * }
+     */
+    public static int __GLIBC_USE_IEC_60559_FUNCS_EXT_C2X() {
+        return __GLIBC_USE_IEC_60559_FUNCS_EXT_C2X;
+    }
+    private static final int __GLIBC_USE_IEC_60559_TYPES_EXT = (int)0L;
+    /**
+     * {@snippet lang=c :
+     * #define __GLIBC_USE_IEC_60559_TYPES_EXT 0
+     * }
+     */
+    public static int __GLIBC_USE_IEC_60559_TYPES_EXT() {
+        return __GLIBC_USE_IEC_60559_TYPES_EXT;
+    }
+    private static final int _BITS_TYPES_H = (int)1L;
+    /**
+     * {@snippet lang=c :
+     * #define _BITS_TYPES_H 1
+     * }
+     */
+    public static int _BITS_TYPES_H() {
+        return _BITS_TYPES_H;
+    }
+    private static final int _BITS_TYPESIZES_H = (int)1L;
+    /**
+     * {@snippet lang=c :
+     * #define _BITS_TYPESIZES_H 1
+     * }
+     */
+    public static int _BITS_TYPESIZES_H() {
+        return _BITS_TYPESIZES_H;
+    }
+    private static final int __OFF_T_MATCHES_OFF64_T = (int)1L;
+    /**
+     * {@snippet lang=c :
+     * #define __OFF_T_MATCHES_OFF64_T 1
+     * }
+     */
+    public static int __OFF_T_MATCHES_OFF64_T() {
+        return __OFF_T_MATCHES_OFF64_T;
+    }
+    private static final int __INO_T_MATCHES_INO64_T = (int)1L;
+    /**
+     * {@snippet lang=c :
+     * #define __INO_T_MATCHES_INO64_T 1
+     * }
+     */
+    public static int __INO_T_MATCHES_INO64_T() {
+        return __INO_T_MATCHES_INO64_T;
+    }
+    private static final int __RLIM_T_MATCHES_RLIM64_T = (int)1L;
+    /**
+     * {@snippet lang=c :
+     * #define __RLIM_T_MATCHES_RLIM64_T 1
+     * }
+     */
+    public static int __RLIM_T_MATCHES_RLIM64_T() {
+        return __RLIM_T_MATCHES_RLIM64_T;
+    }
+    private static final int __STATFS_MATCHES_STATFS64 = (int)1L;
+    /**
+     * {@snippet lang=c :
+     * #define __STATFS_MATCHES_STATFS64 1
+     * }
+     */
+    public static int __STATFS_MATCHES_STATFS64() {
+        return __STATFS_MATCHES_STATFS64;
+    }
+    private static final int __KERNEL_OLD_TIMEVAL_MATCHES_TIMEVAL64 = (int)1L;
+    /**
+     * {@snippet lang=c :
+     * #define __KERNEL_OLD_TIMEVAL_MATCHES_TIMEVAL64 1
+     * }
+     */
+    public static int __KERNEL_OLD_TIMEVAL_MATCHES_TIMEVAL64() {
+        return __KERNEL_OLD_TIMEVAL_MATCHES_TIMEVAL64;
+    }
+    private static final int __FD_SETSIZE = (int)1024L;
+    /**
+     * {@snippet lang=c :
+     * #define __FD_SETSIZE 1024
+     * }
+     */
+    public static int __FD_SETSIZE() {
+        return __FD_SETSIZE;
+    }
+    private static final int _BITS_TIME64_H = (int)1L;
+    /**
+     * {@snippet lang=c :
+     * #define _BITS_TIME64_H 1
+     * }
+     */
+    public static int _BITS_TIME64_H() {
+        return _BITS_TIME64_H;
+    }
+    private static final int _BITS_WCHAR_H = (int)1L;
+    /**
+     * {@snippet lang=c :
+     * #define _BITS_WCHAR_H 1
+     * }
+     */
+    public static int _BITS_WCHAR_H() {
+        return _BITS_WCHAR_H;
+    }
+    private static final int _BITS_STDINT_INTN_H = (int)1L;
+    /**
+     * {@snippet lang=c :
+     * #define _BITS_STDINT_INTN_H 1
+     * }
+     */
+    public static int _BITS_STDINT_INTN_H() {
+        return _BITS_STDINT_INTN_H;
+    }
+    private static final int _BITS_STDINT_UINTN_H = (int)1L;
+    /**
+     * {@snippet lang=c :
+     * #define _BITS_STDINT_UINTN_H 1
+     * }
+     */
+    public static int _BITS_STDINT_UINTN_H() {
+        return _BITS_STDINT_UINTN_H;
+    }
+    /**
+     * {@snippet lang=c :
+     * typedef unsigned char __u_char
+     * }
+     */
+    public static final OfByte __u_char = DlpackH.C_CHAR;
+    /**
+     * {@snippet lang=c :
+     * typedef unsigned short __u_short
+     * }
+     */
+    public static final OfShort __u_short = DlpackH.C_SHORT;
+    /**
+     * {@snippet lang=c :
+     * typedef unsigned int __u_int
+     * }
+     */
+    public static final OfInt __u_int = DlpackH.C_INT;
+    /**
+     * {@snippet lang=c :
+     * typedef unsigned long __u_long
+     * }
+     */
+    public static final OfLong __u_long = DlpackH.C_LONG;
+    /**
+     * {@snippet lang=c :
+     * typedef signed char __int8_t
+     * }
+     */
+    public static final OfByte __int8_t = DlpackH.C_CHAR;
+    /**
+     * {@snippet lang=c :
+     * typedef unsigned char __uint8_t
+     * }
+     */
+    public static final OfByte __uint8_t = DlpackH.C_CHAR;
+    /**
+     * {@snippet lang=c :
+     * typedef short __int16_t
+     * }
+     */
+    public static final OfShort __int16_t = DlpackH.C_SHORT;
+    /**
+     * {@snippet lang=c :
+     * typedef unsigned short __uint16_t
+     * }
+     */
+    public static final OfShort __uint16_t = DlpackH.C_SHORT;
+    /**
+     * {@snippet lang=c :
+     * typedef int __int32_t
+     * }
+     */
+    public static final OfInt __int32_t = DlpackH.C_INT;
+    /**
+     * {@snippet lang=c :
+     * typedef unsigned int __uint32_t
+     * }
+     */
+    public static final OfInt __uint32_t = DlpackH.C_INT;
+    /**
+     * {@snippet lang=c :
+     * typedef long __int64_t
+     * }
+     */
+    public static final OfLong __int64_t = DlpackH.C_LONG;
+    /**
+     * {@snippet lang=c :
+     * typedef unsigned long __uint64_t
+     * }
+     */
+    public static final OfLong __uint64_t = DlpackH.C_LONG;
+    /**
+     * {@snippet lang=c :
+     * typedef __int8_t __int_least8_t
+     * }
+     */
+    public static final OfByte __int_least8_t = DlpackH.C_CHAR;
+    /**
+     * {@snippet lang=c :
+     * typedef __uint8_t __uint_least8_t
+     * }
+     */
+    public static final OfByte __uint_least8_t = DlpackH.C_CHAR;
+    /**
+     * {@snippet lang=c :
+     * typedef __int16_t __int_least16_t
+     * }
+     */
+    public static final OfShort __int_least16_t = DlpackH.C_SHORT;
+    /**
+     * {@snippet lang=c :
+     * typedef __uint16_t __uint_least16_t
+     * }
+     */
+    public static final OfShort __uint_least16_t = DlpackH.C_SHORT;
+    /**
+     * {@snippet lang=c :
+     * typedef __int32_t __int_least32_t
+     * }
+     */
+    public static final OfInt __int_least32_t = DlpackH.C_INT;
+    /**
+     * {@snippet lang=c :
+     * typedef __uint32_t __uint_least32_t
+     * }
+     */
+    public static final OfInt __uint_least32_t = DlpackH.C_INT;
+    /**
+     * {@snippet lang=c :
+     * typedef __int64_t __int_least64_t
+     * }
+     */
+    public static final OfLong __int_least64_t = DlpackH.C_LONG;
+    /**
+     * {@snippet lang=c :
+     * typedef __uint64_t __uint_least64_t
+     * }
+     */
+    public static final OfLong __uint_least64_t = DlpackH.C_LONG;
+    /**
+     * {@snippet lang=c :
+     * typedef long __quad_t
+     * }
+     */
+    public static final OfLong __quad_t = DlpackH.C_LONG;
+    /**
+     * {@snippet lang=c :
+     * typedef unsigned long __u_quad_t
+     * }
+     */
+    public static final OfLong __u_quad_t = DlpackH.C_LONG;
+    /**
+     * {@snippet lang=c :
+     * typedef long __intmax_t
+     * }
+     */
+    public static final OfLong __intmax_t = DlpackH.C_LONG;
+    /**
+     * {@snippet lang=c :
+     * typedef unsigned long __uintmax_t
+     * }
+     */
+    public static final OfLong __uintmax_t = DlpackH.C_LONG;
+    /**
+     * {@snippet lang=c :
+     * typedef unsigned long __dev_t
+     * }
+     */
+    public static final OfLong __dev_t = DlpackH.C_LONG;
+    /**
+     * {@snippet lang=c :
+     * typedef unsigned int __uid_t
+     * }
+     */
+    public static final OfInt __uid_t = DlpackH.C_INT;
+    /**
+     * {@snippet lang=c :
+     * typedef unsigned int __gid_t
+     * }
+     */
+    public static final OfInt __gid_t = DlpackH.C_INT;
+    /**
+     * {@snippet lang=c :
+     * typedef unsigned long __ino_t
+     * }
+     */
+    public static final OfLong __ino_t = DlpackH.C_LONG;
+    /**
+     * {@snippet lang=c :
+     * typedef unsigned long __ino64_t
+     * }
+     */
+    public static final OfLong __ino64_t = DlpackH.C_LONG;
+    /**
+     * {@snippet lang=c :
+     * typedef unsigned int __mode_t
+     * }
+     */
+    public static final OfInt __mode_t = DlpackH.C_INT;
+    /**
+     * {@snippet lang=c :
+     * typedef unsigned long __nlink_t
+     * }
+     */
+    public static final OfLong __nlink_t = DlpackH.C_LONG;
+    /**
+     * {@snippet lang=c :
+     * typedef long __off_t
+     * }
+     */
+    public static final OfLong __off_t = DlpackH.C_LONG;
+    /**
+     * {@snippet lang=c :
+     * typedef long __off64_t
+     * }
+     */
+    public static final OfLong __off64_t = DlpackH.C_LONG;
+    /**
+     * {@snippet lang=c :
+     * typedef int __pid_t
+     * }
+     */
+    public static final OfInt __pid_t = DlpackH.C_INT;
+    /**
+     * {@snippet lang=c :
+     * typedef long __clock_t
+     * }
+     */
+    public static final OfLong __clock_t = DlpackH.C_LONG;
+    /**
+     * {@snippet lang=c :
+     * typedef unsigned long __rlim_t
+     * }
+     */
+    public static final OfLong __rlim_t = DlpackH.C_LONG;
+    /**
+     * {@snippet lang=c :
+     * typedef unsigned long __rlim64_t
+     * }
+     */
+    public static final OfLong __rlim64_t = DlpackH.C_LONG;
+    /**
+     * {@snippet lang=c :
+     * typedef unsigned int __id_t
+     * }
+     */
+    public static final OfInt __id_t = DlpackH.C_INT;
+    /**
+     * {@snippet lang=c :
+     * typedef long __time_t
+     * }
+     */
+    public static final OfLong __time_t = DlpackH.C_LONG;
+    /**
+     * {@snippet lang=c :
+     * typedef unsigned int __useconds_t
+     * }
+     */
+    public static final OfInt __useconds_t = DlpackH.C_INT;
+    /**
+     * {@snippet lang=c :
+     * typedef long __suseconds_t
+     * }
+     */
+    public static final OfLong __suseconds_t = DlpackH.C_LONG;
+    /**
+     * {@snippet lang=c :
+     * typedef long __suseconds64_t
+     * }
+     */
+    public static final OfLong __suseconds64_t = DlpackH.C_LONG;
+    /**
+     * {@snippet lang=c :
+     * typedef int __daddr_t
+     * }
+     */
+    public static final OfInt __daddr_t = DlpackH.C_INT;
+    /**
+     * {@snippet lang=c :
+     * typedef int __key_t
+     * }
+     */
+    public static final OfInt __key_t = DlpackH.C_INT;
+    /**
+     * {@snippet lang=c :
+     * typedef int __clockid_t
+     * }
+     */
+    public static final OfInt __clockid_t = DlpackH.C_INT;
+    /**
+     * {@snippet lang=c :
+     * typedef void *__timer_t
+     * }
+     */
+    public static final AddressLayout __timer_t = DlpackH.C_POINTER;
+    /**
+     * {@snippet lang=c :
+     * typedef long __blksize_t
+     * }
+     */
+    public static final OfLong __blksize_t = DlpackH.C_LONG;
+    /**
+     * {@snippet lang=c :
+     * typedef long __blkcnt_t
+     * }
+     */
+    public static final OfLong __blkcnt_t = DlpackH.C_LONG;
+    /**
+     * {@snippet lang=c :
+     * typedef long __blkcnt64_t
+     * }
+     */
+    public static final OfLong __blkcnt64_t = DlpackH.C_LONG;
+    /**
+     * {@snippet lang=c :
+     * typedef unsigned long __fsblkcnt_t
+     * }
+     */
+    public static final OfLong __fsblkcnt_t = DlpackH.C_LONG;
+    /**
+     * {@snippet lang=c :
+     * typedef unsigned long __fsblkcnt64_t
+     * }
+     */
+    public static final OfLong __fsblkcnt64_t = DlpackH.C_LONG;
+    /**
+     * {@snippet lang=c :
+     * typedef unsigned long __fsfilcnt_t
+     * }
+     */
+    public static final OfLong __fsfilcnt_t = DlpackH.C_LONG;
+    /**
+     * {@snippet lang=c :
+     * typedef unsigned long __fsfilcnt64_t
+     * }
+     */
+    public static final OfLong __fsfilcnt64_t = DlpackH.C_LONG;
+    /**
+     * {@snippet lang=c :
+     * typedef long __fsword_t
+     * }
+     */
+    public static final OfLong __fsword_t = DlpackH.C_LONG;
+    /**
+     * {@snippet lang=c :
+     * typedef long __ssize_t
+     * }
+     */
+    public static final OfLong __ssize_t = DlpackH.C_LONG;
+    /**
+     * {@snippet lang=c :
+     * typedef long __syscall_slong_t
+     * }
+     */
+    public static final OfLong __syscall_slong_t = DlpackH.C_LONG;
+    /**
+     * {@snippet lang=c :
+     * typedef unsigned long __syscall_ulong_t
+     * }
+     */
+    public static final OfLong __syscall_ulong_t = DlpackH.C_LONG;
+    /**
+     * {@snippet lang=c :
+     * typedef __off64_t __loff_t
+     * }
+     */
+    public static final OfLong __loff_t = DlpackH.C_LONG;
+    /**
+     * {@snippet lang=c :
+     * typedef char *__caddr_t
+     * }
+     */
+    public static final AddressLayout __caddr_t = DlpackH.C_POINTER;
+    /**
+     * {@snippet lang=c :
+     * typedef long __intptr_t
+     * }
+     */
+    public static final OfLong __intptr_t = DlpackH.C_LONG;
+    /**
+     * {@snippet lang=c :
+     * typedef unsigned int __socklen_t
+     * }
+     */
+    public static final OfInt __socklen_t = DlpackH.C_INT;
+    /**
+     * {@snippet lang=c :
+     * typedef int __sig_atomic_t
+     * }
+     */
+    public static final OfInt __sig_atomic_t = DlpackH.C_INT;
+    /**
+     * {@snippet lang=c :
+     * typedef __int8_t int8_t
+     * }
+     */
+    public static final OfByte int8_t = DlpackH.C_CHAR;
+    /**
+     * {@snippet lang=c :
+     * typedef __int16_t int16_t
+     * }
+     */
+    public static final OfShort int16_t = DlpackH.C_SHORT;
+    /**
+     * {@snippet lang=c :
+     * typedef __int32_t int32_t
+     * }
+     */
+    public static final OfInt int32_t = DlpackH.C_INT;
+    /**
+     * {@snippet lang=c :
+     * typedef __int64_t int64_t
+     * }
+     */
+    public static final OfLong int64_t = DlpackH.C_LONG;
+    /**
+     * {@snippet lang=c :
+     * typedef __uint8_t uint8_t
+     * }
+     */
+    public static final OfByte uint8_t = DlpackH.C_CHAR;
+    /**
+     * {@snippet lang=c :
+     * typedef __uint16_t uint16_t
+     * }
+     */
+    public static final OfShort uint16_t = DlpackH.C_SHORT;
+    /**
+     * {@snippet lang=c :
+     * typedef __uint32_t uint32_t
+     * }
+     */
+    public static final OfInt uint32_t = DlpackH.C_INT;
+    /**
+     * {@snippet lang=c :
+     * typedef __uint64_t uint64_t
+     * }
+     */
+    public static final OfLong uint64_t = DlpackH.C_LONG;
+    /**
+     * {@snippet lang=c :
+     * typedef __int_least8_t int_least8_t
+     * }
+     */
+    public static final OfByte int_least8_t = DlpackH.C_CHAR;
+    /**
+     * {@snippet lang=c :
+     * typedef __int_least16_t int_least16_t
+     * }
+     */
+    public static final OfShort int_least16_t = DlpackH.C_SHORT;
+    /**
+     * {@snippet lang=c :
+     * typedef __int_least32_t int_least32_t
+     * }
+     */
+    public static final OfInt int_least32_t = DlpackH.C_INT;
+    /**
+     * {@snippet lang=c :
+     * typedef __int_least64_t int_least64_t
+     * }
+     */
+    public static final OfLong int_least64_t = DlpackH.C_LONG;
+    /**
+     * {@snippet lang=c :
+     * typedef __uint_least8_t uint_least8_t
+     * }
+     */
+    public static final OfByte uint_least8_t = DlpackH.C_CHAR;
+    /**
+     * {@snippet lang=c :
+     * typedef __uint_least16_t uint_least16_t
+     * }
+     */
+    public static final OfShort uint_least16_t = DlpackH.C_SHORT;
+    /**
+     * {@snippet lang=c :
+     * typedef __uint_least32_t uint_least32_t
+     * }
+     */
+    public static final OfInt uint_least32_t = DlpackH.C_INT;
+    /**
+     * {@snippet lang=c :
+     * typedef __uint_least64_t uint_least64_t
+     * }
+     */
+    public static final OfLong uint_least64_t = DlpackH.C_LONG;
+    /**
+     * {@snippet lang=c :
+     * typedef signed char int_fast8_t
+     * }
+     */
+    public static final OfByte int_fast8_t = DlpackH.C_CHAR;
+    /**
+     * {@snippet lang=c :
+     * typedef long int_fast16_t
+     * }
+     */
+    public static final OfLong int_fast16_t = DlpackH.C_LONG;
+    /**
+     * {@snippet lang=c :
+     * typedef long int_fast32_t
+     * }
+     */
+    public static final OfLong int_fast32_t = DlpackH.C_LONG;
+    /**
+     * {@snippet lang=c :
+     * typedef long int_fast64_t
+     * }
+     */
+    public static final OfLong int_fast64_t = DlpackH.C_LONG;
+    /**
+     * {@snippet lang=c :
+     * typedef unsigned char uint_fast8_t
+     * }
+     */
+    public static final OfByte uint_fast8_t = DlpackH.C_CHAR;
+    /**
+     * {@snippet lang=c :
+     * typedef unsigned long uint_fast16_t
+     * }
+     */
+    public static final OfLong uint_fast16_t = DlpackH.C_LONG;
+    /**
+     * {@snippet lang=c :
+     * typedef unsigned long uint_fast32_t
+     * }
+     */
+    public static final OfLong uint_fast32_t = DlpackH.C_LONG;
+    /**
+     * {@snippet lang=c :
+     * typedef unsigned long uint_fast64_t
+     * }
+     */
+    public static final OfLong uint_fast64_t = DlpackH.C_LONG;
+    /**
+     * {@snippet lang=c :
+     * typedef long intptr_t
+     * }
+     */
+    public static final OfLong intptr_t = DlpackH.C_LONG;
+    /**
+     * {@snippet lang=c :
+     * typedef unsigned long uintptr_t
+     * }
+     */
+    public static final OfLong uintptr_t = DlpackH.C_LONG;
+    /**
+     * {@snippet lang=c :
+     * typedef __intmax_t intmax_t
+     * }
+     */
+    public static final OfLong intmax_t = DlpackH.C_LONG;
+    /**
+     * {@snippet lang=c :
+     * typedef __uintmax_t uintmax_t
+     * }
+     */
+    public static final OfLong uintmax_t = DlpackH.C_LONG;
+    /**
+     * {@snippet lang=c :
+     * typedef long ptrdiff_t
+     * }
+     */
+    public static final OfLong ptrdiff_t = DlpackH.C_LONG;
+    /**
+     * {@snippet lang=c :
+     * typedef unsigned long size_t
+     * }
+     */
+    public static final OfLong size_t = DlpackH.C_LONG;
+    /**
+     * {@snippet lang=c :
+     * typedef int wchar_t
+     * }
+     */
+    public static final OfInt wchar_t = DlpackH.C_INT;
+    private static final int kDLCPU = (int)1L;
+    /**
+     * {@snippet lang=c :
+     * enum <anonymous>.kDLCPU = 1
+     * }
+     */
+    public static int kDLCPU() {
+        return kDLCPU;
+    }
+    private static final int kDLCUDA = (int)2L;
+    /**
+     * {@snippet lang=c :
+     * enum <anonymous>.kDLCUDA = 2
+     * }
+     */
+    public static int kDLCUDA() {
+        return kDLCUDA;
+    }
+    private static final int kDLCUDAHost = (int)3L;
+    /**
+     * {@snippet lang=c :
+     * enum <anonymous>.kDLCUDAHost = 3
+     * }
+     */
+    public static int kDLCUDAHost() {
+        return kDLCUDAHost;
+    }
+    private static final int kDLOpenCL = (int)4L;
+    /**
+     * {@snippet lang=c :
+     * enum <anonymous>.kDLOpenCL = 4
+     * }
+     */
+    public static int kDLOpenCL() {
+        return kDLOpenCL;
+    }
+    private static final int kDLVulkan = (int)7L;
+    /**
+     * {@snippet lang=c :
+     * enum <anonymous>.kDLVulkan = 7
+     * }
+     */
+    public static int kDLVulkan() {
+        return kDLVulkan;
+    }
+    private static final int kDLMetal = (int)8L;
+    /**
+     * {@snippet lang=c :
+     * enum <anonymous>.kDLMetal = 8
+     * }
+     */
+    public static int kDLMetal() {
+        return kDLMetal;
+    }
+    private static final int kDLVPI = (int)9L;
+    /**
+     * {@snippet lang=c :
+     * enum <anonymous>.kDLVPI = 9
+     * }
+     */
+    public static int kDLVPI() {
+        return kDLVPI;
+    }
+    private static final int kDLROCM = (int)10L;
+    /**
+     * {@snippet lang=c :
+     * enum <anonymous>.kDLROCM = 10
+     * }
+     */
+    public static int kDLROCM() {
+        return kDLROCM;
+    }
+    private static final int kDLROCMHost = (int)11L;
+    /**
+     * {@snippet lang=c :
+     * enum <anonymous>.kDLROCMHost = 11
+     * }
+     */
+    public static int kDLROCMHost() {
+        return kDLROCMHost;
+    }
+    private static final int kDLExtDev = (int)12L;
+    /**
+     * {@snippet lang=c :
+     * enum <anonymous>.kDLExtDev = 12
+     * }
+     */
+    public static int kDLExtDev() {
+        return kDLExtDev;
+    }
+    private static final int kDLCUDAManaged = (int)13L;
+    /**
+     * {@snippet lang=c :
+     * enum <anonymous>.kDLCUDAManaged = 13
+     * }
+     */
+    public static int kDLCUDAManaged() {
+        return kDLCUDAManaged;
+    }
+    private static final int kDLOneAPI = (int)14L;
+    /**
+     * {@snippet lang=c :
+     * enum <anonymous>.kDLOneAPI = 14
+     * }
+     */
+    public static int kDLOneAPI() {
+        return kDLOneAPI;
+    }
+    private static final int kDLWebGPU = (int)15L;
+    /**
+     * {@snippet lang=c :
+     * enum <anonymous>.kDLWebGPU = 15
+     * }
+     */
+    public static int kDLWebGPU() {
+        return kDLWebGPU;
+    }
+    private static final int kDLHexagon = (int)16L;
+    /**
+     * {@snippet lang=c :
+     * enum <anonymous>.kDLHexagon = 16
+     * }
+     */
+    public static int kDLHexagon() {
+        return kDLHexagon;
+    }
+    private static final int kDLMAIA = (int)17L;
+    /**
+     * {@snippet lang=c :
+     * enum <anonymous>.kDLMAIA = 17
+     * }
+     */
+    public static int kDLMAIA() {
+        return kDLMAIA;
+    }
+    private static final int kDLInt = (int)0L;
+    /**
+     * {@snippet lang=c :
+     * enum <anonymous>.kDLInt = 0
+     * }
+     */
+    public static int kDLInt() {
+        return kDLInt;
+    }
+    private static final int kDLUInt = (int)1L;
+    /**
+     * {@snippet lang=c :
+     * enum <anonymous>.kDLUInt = 1
+     * }
+     */
+    public static int kDLUInt() {
+        return kDLUInt;
+    }
+    private static final int kDLFloat = (int)2L;
+    /**
+     * {@snippet lang=c :
+     * enum <anonymous>.kDLFloat = 2
+     * }
+     */
+    public static int kDLFloat() {
+        return kDLFloat;
+    }
+    private static final int kDLOpaqueHandle = (int)3L;
+    /**
+     * {@snippet lang=c :
+     * enum <anonymous>.kDLOpaqueHandle = 3
+     * }
+     */
+    public static int kDLOpaqueHandle() {
+        return kDLOpaqueHandle;
+    }
+    private static final int kDLBfloat = (int)4L;
+    /**
+     * {@snippet lang=c :
+     * enum <anonymous>.kDLBfloat = 4
+     * }
+     */
+    public static int kDLBfloat() {
+        return kDLBfloat;
+    }
+    private static final int kDLComplex = (int)5L;
+    /**
+     * {@snippet lang=c :
+     * enum <anonymous>.kDLComplex = 5
+     * }
+     */
+    public static int kDLComplex() {
+        return kDLComplex;
+    }
+    private static final int kDLBool = (int)6L;
+    /**
+     * {@snippet lang=c :
+     * enum <anonymous>.kDLBool = 6
+     * }
+     */
+    public static int kDLBool() {
+        return kDLBool;
+    }
+    private static final long _POSIX_C_SOURCE = 200809L;
+    /**
+     * {@snippet lang=c :
+     * #define _POSIX_C_SOURCE 200809
+     * }
+     */
+    public static long _POSIX_C_SOURCE() {
+        return _POSIX_C_SOURCE;
+    }
+    private static final int __TIMESIZE = (int)64L;
+    /**
+     * {@snippet lang=c :
+     * #define __TIMESIZE 64
+     * }
+     */
+    public static int __TIMESIZE() {
+        return __TIMESIZE;
+    }
+    private static final long __STDC_IEC_60559_BFP__ = 201404L;
+    /**
+     * {@snippet lang=c :
+     * #define __STDC_IEC_60559_BFP__ 201404
+     * }
+     */
+    public static long __STDC_IEC_60559_BFP__() {
+        return __STDC_IEC_60559_BFP__;
+    }
+    private static final long __STDC_IEC_60559_COMPLEX__ = 201404L;
+    /**
+     * {@snippet lang=c :
+     * #define __STDC_IEC_60559_COMPLEX__ 201404
+     * }
+     */
+    public static long __STDC_IEC_60559_COMPLEX__() {
+        return __STDC_IEC_60559_COMPLEX__;
+    }
+    private static final long __STDC_ISO_10646__ = 201706L;
+    /**
+     * {@snippet lang=c :
+     * #define __STDC_ISO_10646__ 201706
+     * }
+     */
+    public static long __STDC_ISO_10646__() {
+        return __STDC_ISO_10646__;
+    }
+    private static final int __WCHAR_MAX = (int)2147483647L;
+    /**
+     * {@snippet lang=c :
+     * #define __WCHAR_MAX 2147483647
+     * }
+     */
+    public static int __WCHAR_MAX() {
+        return __WCHAR_MAX;
+    }
+    private static final int __WCHAR_MIN = (int)-2147483648L;
+    /**
+     * {@snippet lang=c :
+     * #define __WCHAR_MIN -2147483648
+     * }
+     */
+    public static int __WCHAR_MIN() {
+        return __WCHAR_MIN;
+    }
+    private static final int INT8_MIN = (int)-128L;
+    /**
+     * {@snippet lang=c :
+     * #define INT8_MIN -128
+     * }
+     */
+    public static int INT8_MIN() {
+        return INT8_MIN;
+    }
+    private static final int INT16_MIN = (int)-32768L;
+    /**
+     * {@snippet lang=c :
+     * #define INT16_MIN -32768
+     * }
+     */
+    public static int INT16_MIN() {
+        return INT16_MIN;
+    }
+    private static final int INT32_MIN = (int)-2147483648L;
+    /**
+     * {@snippet lang=c :
+     * #define INT32_MIN -2147483648
+     * }
+     */
+    public static int INT32_MIN() {
+        return INT32_MIN;
+    }
+    private static final long INT64_MIN = -9223372036854775808L;
+    /**
+     * {@snippet lang=c :
+     * #define INT64_MIN -9223372036854775808
+     * }
+     */
+    public static long INT64_MIN() {
+        return INT64_MIN;
+    }
+    private static final int INT8_MAX = (int)127L;
+    /**
+     * {@snippet lang=c :
+     * #define INT8_MAX 127
+     * }
+     */
+    public static int INT8_MAX() {
+        return INT8_MAX;
+    }
+    private static final int INT16_MAX = (int)32767L;
+    /**
+     * {@snippet lang=c :
+     * #define INT16_MAX 32767
+     * }
+     */
+    public static int INT16_MAX() {
+        return INT16_MAX;
+    }
+    private static final int INT32_MAX = (int)2147483647L;
+    /**
+     * {@snippet lang=c :
+     * #define INT32_MAX 2147483647
+     * }
+     */
+    public static int INT32_MAX() {
+        return INT32_MAX;
+    }
+    private static final long INT64_MAX = 9223372036854775807L;
+    /**
+     * {@snippet lang=c :
+     * #define INT64_MAX 9223372036854775807
+     * }
+     */
+    public static long INT64_MAX() {
+        return INT64_MAX;
+    }
+    private static final int UINT8_MAX = (int)255L;
+    /**
+     * {@snippet lang=c :
+     * #define UINT8_MAX 255
+     * }
+     */
+    public static int UINT8_MAX() {
+        return UINT8_MAX;
+    }
+    private static final int UINT16_MAX = (int)65535L;
+    /**
+     * {@snippet lang=c :
+     * #define UINT16_MAX 65535
+     * }
+     */
+    public static int UINT16_MAX() {
+        return UINT16_MAX;
+    }
+    private static final int UINT32_MAX = (int)4294967295L;
+    /**
+     * {@snippet lang=c :
+     * #define UINT32_MAX 4294967295
+     * }
+     */
+    public static int UINT32_MAX() {
+        return UINT32_MAX;
+    }
+    private static final long UINT64_MAX = -1L;
+    /**
+     * {@snippet lang=c :
+     * #define UINT64_MAX -1
+     * }
+     */
+    public static long UINT64_MAX() {
+        return UINT64_MAX;
+    }
+    private static final int INT_LEAST8_MIN = (int)-128L;
+    /**
+     * {@snippet lang=c :
+     * #define INT_LEAST8_MIN -128
+     * }
+     */
+    public static int INT_LEAST8_MIN() {
+        return INT_LEAST8_MIN;
+    }
+    private static final int INT_LEAST16_MIN = (int)-32768L;
+    /**
+     * {@snippet lang=c :
+     * #define INT_LEAST16_MIN -32768
+     * }
+     */
+    public static int INT_LEAST16_MIN() {
+        return INT_LEAST16_MIN;
+    }
+    private static final int INT_LEAST32_MIN = (int)-2147483648L;
+    /**
+     * {@snippet lang=c :
+     * #define INT_LEAST32_MIN -2147483648
+     * }
+     */
+    public static int INT_LEAST32_MIN() {
+        return INT_LEAST32_MIN;
+    }
+    private static final long INT_LEAST64_MIN = -9223372036854775808L;
+    /**
+     * {@snippet lang=c :
+     * #define INT_LEAST64_MIN -9223372036854775808
+     * }
+     */
+    public static long INT_LEAST64_MIN() {
+        return INT_LEAST64_MIN;
+    }
+    private static final int INT_LEAST8_MAX = (int)127L;
+    /**
+     * {@snippet lang=c :
+     * #define INT_LEAST8_MAX 127
+     * }
+     */
+    public static int INT_LEAST8_MAX() {
+        return INT_LEAST8_MAX;
+    }
+    private static final int INT_LEAST16_MAX = (int)32767L;
+    /**
+     * {@snippet lang=c :
+     * #define INT_LEAST16_MAX 32767
+     * }
+     */
+    public static int INT_LEAST16_MAX() {
+        return INT_LEAST16_MAX;
+    }
+    private static final int INT_LEAST32_MAX = (int)2147483647L;
+    /**
+     * {@snippet lang=c :
+     * #define INT_LEAST32_MAX 2147483647
+     * }
+     */
+    public static int INT_LEAST32_MAX() {
+        return INT_LEAST32_MAX;
+    }
+    private static final long INT_LEAST64_MAX = 9223372036854775807L;
+    /**
+     * {@snippet lang=c :
+     * #define INT_LEAST64_MAX 9223372036854775807
+     * }
+     */
+    public static long INT_LEAST64_MAX() {
+        return INT_LEAST64_MAX;
+    }
+    private static final int UINT_LEAST8_MAX = (int)255L;
+    /**
+     * {@snippet lang=c :
+     * #define UINT_LEAST8_MAX 255
+     * }
+     */
+    public static int UINT_LEAST8_MAX() {
+        return UINT_LEAST8_MAX;
+    }
+    private static final int UINT_LEAST16_MAX = (int)65535L;
+    /**
+     * {@snippet lang=c :
+     * #define UINT_LEAST16_MAX 65535
+     * }
+     */
+    public static int UINT_LEAST16_MAX() {
+        return UINT_LEAST16_MAX;
+    }
+    private static final int UINT_LEAST32_MAX = (int)4294967295L;
+    /**
+     * {@snippet lang=c :
+     * #define UINT_LEAST32_MAX 4294967295
+     * }
+     */
+    public static int UINT_LEAST32_MAX() {
+        return UINT_LEAST32_MAX;
+    }
+    private static final long UINT_LEAST64_MAX = -1L;
+    /**
+     * {@snippet lang=c :
+     * #define UINT_LEAST64_MAX -1
+     * }
+     */
+    public static long UINT_LEAST64_MAX() {
+        return UINT_LEAST64_MAX;
+    }
+    private static final int INT_FAST8_MIN = (int)-128L;
+    /**
+     * {@snippet lang=c :
+     * #define INT_FAST8_MIN -128
+     * }
+     */
+    public static int INT_FAST8_MIN() {
+        return INT_FAST8_MIN;
+    }
+    private static final long INT_FAST16_MIN = -9223372036854775808L;
+    /**
+     * {@snippet lang=c :
+     * #define INT_FAST16_MIN -9223372036854775808
+     * }
+     */
+    public static long INT_FAST16_MIN() {
+        return INT_FAST16_MIN;
+    }
+    private static final long INT_FAST32_MIN = -9223372036854775808L;
+    /**
+     * {@snippet lang=c :
+     * #define INT_FAST32_MIN -9223372036854775808
+     * }
+     */
+    public static long INT_FAST32_MIN() {
+        return INT_FAST32_MIN;
+    }
+    private static final long INT_FAST64_MIN = -9223372036854775808L;
+    /**
+     * {@snippet lang=c :
+     * #define INT_FAST64_MIN -9223372036854775808
+     * }
+     */
+    public static long INT_FAST64_MIN() {
+        return INT_FAST64_MIN;
+    }
+    private static final int INT_FAST8_MAX = (int)127L;
+    /**
+     * {@snippet lang=c :
+     * #define INT_FAST8_MAX 127
+     * }
+     */
+    public static int INT_FAST8_MAX() {
+        return INT_FAST8_MAX;
+    }
+    private static final long INT_FAST16_MAX = 9223372036854775807L;
+    /**
+     * {@snippet lang=c :
+     * #define INT_FAST16_MAX 9223372036854775807
+     * }
+     */
+    public static long INT_FAST16_MAX() {
+        return INT_FAST16_MAX;
+    }
+    private static final long INT_FAST32_MAX = 9223372036854775807L;
+    /**
+     * {@snippet lang=c :
+     * #define INT_FAST32_MAX 9223372036854775807
+     * }
+     */
+    public static long INT_FAST32_MAX() {
+        return INT_FAST32_MAX;
+    }
+    private static final long INT_FAST64_MAX = 9223372036854775807L;
+    /**
+     * {@snippet lang=c :
+     * #define INT_FAST64_MAX 9223372036854775807
+     * }
+     */
+    public static long INT_FAST64_MAX() {
+        return INT_FAST64_MAX;
+    }
+    private static final int UINT_FAST8_MAX = (int)255L;
+    /**
+     * {@snippet lang=c :
+     * #define UINT_FAST8_MAX 255
+     * }
+     */
+    public static int UINT_FAST8_MAX() {
+        return UINT_FAST8_MAX;
+    }
+    private static final long UINT_FAST16_MAX = -1L;
+    /**
+     * {@snippet lang=c :
+     * #define UINT_FAST16_MAX -1
+     * }
+     */
+    public static long UINT_FAST16_MAX() {
+        return UINT_FAST16_MAX;
+    }
+    private static final long UINT_FAST32_MAX = -1L;
+    /**
+     * {@snippet lang=c :
+     * #define UINT_FAST32_MAX -1
+     * }
+     */
+    public static long UINT_FAST32_MAX() {
+        return UINT_FAST32_MAX;
+    }
+    private static final long UINT_FAST64_MAX = -1L;
+    /**
+     * {@snippet lang=c :
+     * #define UINT_FAST64_MAX -1
+     * }
+     */
+    public static long UINT_FAST64_MAX() {
+        return UINT_FAST64_MAX;
+    }
+    private static final long INTPTR_MIN = -9223372036854775808L;
+    /**
+     * {@snippet lang=c :
+     * #define INTPTR_MIN -9223372036854775808
+     * }
+     */
+    public static long INTPTR_MIN() {
+        return INTPTR_MIN;
+    }
+    private static final long INTPTR_MAX = 9223372036854775807L;
+    /**
+     * {@snippet lang=c :
+     * #define INTPTR_MAX 9223372036854775807
+     * }
+     */
+    public static long INTPTR_MAX() {
+        return INTPTR_MAX;
+    }
+    private static final long UINTPTR_MAX = -1L;
+    /**
+     * {@snippet lang=c :
+     * #define UINTPTR_MAX -1
+     * }
+     */
+    public static long UINTPTR_MAX() {
+        return UINTPTR_MAX;
+    }
+    private static final long INTMAX_MIN = -9223372036854775808L;
+    /**
+     * {@snippet lang=c :
+     * #define INTMAX_MIN -9223372036854775808
+     * }
+     */
+    public static long INTMAX_MIN() {
+        return INTMAX_MIN;
+    }
+    private static final long INTMAX_MAX = 9223372036854775807L;
+    /**
+     * {@snippet lang=c :
+     * #define INTMAX_MAX 9223372036854775807
+     * }
+     */
+    public static long INTMAX_MAX() {
+        return INTMAX_MAX;
+    }
+    private static final long UINTMAX_MAX = -1L;
+    /**
+     * {@snippet lang=c :
+     * #define UINTMAX_MAX -1
+     * }
+     */
+    public static long UINTMAX_MAX() {
+        return UINTMAX_MAX;
+    }
+    private static final long PTRDIFF_MIN = -9223372036854775808L;
+    /**
+     * {@snippet lang=c :
+     * #define PTRDIFF_MIN -9223372036854775808
+     * }
+     */
+    public static long PTRDIFF_MIN() {
+        return PTRDIFF_MIN;
+    }
+    private static final long PTRDIFF_MAX = 9223372036854775807L;
+    /**
+     * {@snippet lang=c :
+     * #define PTRDIFF_MAX 9223372036854775807
+     * }
+     */
+    public static long PTRDIFF_MAX() {
+        return PTRDIFF_MAX;
+    }
+    private static final int SIG_ATOMIC_MIN = (int)-2147483648L;
+    /**
+     * {@snippet lang=c :
+     * #define SIG_ATOMIC_MIN -2147483648
+     * }
+     */
+    public static int SIG_ATOMIC_MIN() {
+        return SIG_ATOMIC_MIN;
+    }
+    private static final int SIG_ATOMIC_MAX = (int)2147483647L;
+    /**
+     * {@snippet lang=c :
+     * #define SIG_ATOMIC_MAX 2147483647
+     * }
+     */
+    public static int SIG_ATOMIC_MAX() {
+        return SIG_ATOMIC_MAX;
+    }
+    private static final long SIZE_MAX = -1L;
+    /**
+     * {@snippet lang=c :
+     * #define SIZE_MAX -1
+     * }
+     */
+    public static long SIZE_MAX() {
+        return SIZE_MAX;
+    }
+    private static final int WCHAR_MIN = (int)-2147483648L;
+    /**
+     * {@snippet lang=c :
+     * #define WCHAR_MIN -2147483648
+     * }
+     */
+    public static int WCHAR_MIN() {
+        return WCHAR_MIN;
+    }
+    private static final int WCHAR_MAX = (int)2147483647L;
+    /**
+     * {@snippet lang=c :
+     * #define WCHAR_MAX 2147483647
+     * }
+     */
+    public static int WCHAR_MAX() {
+        return WCHAR_MAX;
+    }
+    private static final int WINT_MIN = (int)0L;
+    /**
+     * {@snippet lang=c :
+     * #define WINT_MIN 0
+     * }
+     */
+    public static int WINT_MIN() {
+        return WINT_MIN;
+    }
+    private static final int WINT_MAX = (int)4294967295L;
+    /**
+     * {@snippet lang=c :
+     * #define WINT_MAX 4294967295
+     * }
+     */
+    public static int WINT_MAX() {
+        return WINT_MAX;
+    }
+    private static final MemorySegment NULL = MemorySegment.ofAddress(0L);
+    /**
+     * {@snippet lang=c :
+     * #define NULL (void*) 0
+     * }
+     */
+    public static MemorySegment NULL() {
+        return NULL;
+    }
+    private static final long DLPACK_FLAG_BITMASK_READ_ONLY = 1L;
+    /**
+     * {@snippet lang=c :
+     * #define DLPACK_FLAG_BITMASK_READ_ONLY 1
+     * }
+     */
+    public static long DLPACK_FLAG_BITMASK_READ_ONLY() {
+        return DLPACK_FLAG_BITMASK_READ_ONLY;
+    }
+    private static final long DLPACK_FLAG_BITMASK_IS_COPIED = 2L;
+    /**
+     * {@snippet lang=c :
+     * #define DLPACK_FLAG_BITMASK_IS_COPIED 2
+     * }
+     */
+    public static long DLPACK_FLAG_BITMASK_IS_COPIED() {
+        return DLPACK_FLAG_BITMASK_IS_COPIED;
+    }
+}
diff --git a/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/Fsidt.java b/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/Fsidt.java
new file mode 100644
index 000000000..07f2849a5
--- /dev/null
+++ b/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/Fsidt.java
@@ -0,0 +1,175 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.nvidia.cuvs.panama;
+
+import static java.lang.foreign.MemoryLayout.PathElement.groupElement;
+import static java.lang.foreign.MemoryLayout.PathElement.sequenceElement;
+
+import java.lang.foreign.Arena;
+import java.lang.foreign.GroupLayout;
+import java.lang.foreign.MemoryLayout;
+import java.lang.foreign.MemorySegment;
+import java.lang.foreign.SegmentAllocator;
+import java.lang.foreign.SequenceLayout;
+import java.lang.invoke.VarHandle;
+import java.util.function.Consumer;
+
+/**
+ * {@snippet lang = c :
+ * struct {
+ *     int __val[2];
+ * }
+ * }
+ */
+public class Fsidt {
+
+  Fsidt() {
+    // Should not be called directly
+  }
+
+  private static final GroupLayout $LAYOUT = MemoryLayout
+      .structLayout(MemoryLayout.sequenceLayout(2, CagraH.C_INT).withName("__val")).withName("$anon$155:12");
+
+  /**
+   * The layout of this struct
+   */
+  public static final GroupLayout layout() {
+    return $LAYOUT;
+  }
+
+  private static final SequenceLayout __val$LAYOUT = (SequenceLayout) $LAYOUT.select(groupElement("__val"));
+
+  /**
+   * Layout for field:
+   * {@snippet lang = c : * int __val[2]
+   * }
+   */
+  public static final SequenceLayout __val$layout() {
+    return __val$LAYOUT;
+  }
+
+  private static final long __val$OFFSET = 0;
+
+  /**
+   * Offset for field:
+   * {@snippet lang = c : * int __val[2]
+   * }
+   */
+  public static final long __val$offset() {
+    return __val$OFFSET;
+  }
+
+  /**
+   * Getter for field:
+   * {@snippet lang = c : * int __val[2]
+   * }
+   */
+  public static MemorySegment __val(MemorySegment struct) {
+    return struct.asSlice(__val$OFFSET, __val$LAYOUT.byteSize());
+  }
+
+  /**
+   * Setter for field:
+   * {@snippet lang = c : * int __val[2]
+   * }
+   */
+  public static void __val(MemorySegment struct, MemorySegment fieldValue) {
+    MemorySegment.copy(fieldValue, 0L, struct, __val$OFFSET, __val$LAYOUT.byteSize());
+  }
+
+  private static long[] __val$DIMS = { 2 };
+
+  /**
+   * Dimensions for array field:
+   * {@snippet lang = c : * int __val[2]
+   * }
+   */
+  public static long[] __val$dimensions() {
+    return __val$DIMS;
+  }
+
+  private static final VarHandle __val$ELEM_HANDLE = __val$LAYOUT.varHandle(sequenceElement());
+
+  /**
+   * Indexed getter for field:
+   * {@snippet lang = c : * int __val[2]
+   * }
+   */
+  public static int __val(MemorySegment struct, long index0) {
+    return (int) __val$ELEM_HANDLE.get(struct, 0L, index0);
+  }
+
+  /**
+   * Indexed setter for field:
+   * {@snippet lang = c : * int __val[2]
+   * }
+   */
+  public static void __val(MemorySegment struct, long index0, int fieldValue) {
+    __val$ELEM_HANDLE.set(struct, 0L, index0, fieldValue);
+  }
+
+  /**
+   * Obtains a slice of {@code arrayParam} which selects the array element at
+   * {@code index}. The returned segment has address
+   * {@code arrayParam.address() + index * layout().byteSize()}
+   */
+  public static MemorySegment asSlice(MemorySegment array, long index) {
+    return array.asSlice(layout().byteSize() * index);
+  }
+
+  /**
+   * The size (in bytes) of this struct
+   */
+  public static long sizeof() {
+    return layout().byteSize();
+  }
+
+  /**
+   * Allocate a segment of size {@code layout().byteSize()} using
+   * {@code allocator}
+   */
+  public static MemorySegment allocate(SegmentAllocator allocator) {
+    return allocator.allocate(layout());
+  }
+
+  /**
+   * Allocate an array of size {@code elementCount} using {@code allocator}. The
+   * returned segment has size {@code elementCount * layout().byteSize()}.
+   */
+  public static MemorySegment allocateArray(long elementCount, SegmentAllocator allocator) {
+    return allocator.allocate(MemoryLayout.sequenceLayout(elementCount, layout()));
+  }
+
+  /**
+   * Reinterprets {@code addr} using target {@code arena} and
+   * {@code cleanupAction} (if any). The returned segment has size
+   * {@code layout().byteSize()}
+   */
+  public static MemorySegment reinterpret(MemorySegment addr, Arena arena, Consumer<MemorySegment> cleanup) {
+    return reinterpret(addr, 1, arena, cleanup);
+  }
+
+  /**
+   * Reinterprets {@code addr} using target {@code arena} and
+   * {@code cleanupAction} (if any). The returned segment has size
+   * {@code elementCount * layout().byteSize()}
+   */
+  public static MemorySegment reinterpret(MemorySegment addr, long elementCount, Arena arena,
+      Consumer<MemorySegment> cleanup) {
+    return addr.reinterpret(layout().byteSize() * elementCount, arena, cleanup);
+  }
+}
diff --git a/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/GpuInfo.java b/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/GpuInfo.java
new file mode 100644
index 000000000..c1d13575c
--- /dev/null
+++ b/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/GpuInfo.java
@@ -0,0 +1,342 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.nvidia.cuvs.panama;
+
+import static java.lang.foreign.MemoryLayout.PathElement.groupElement;
+import static java.lang.foreign.MemoryLayout.PathElement.sequenceElement;
+
+import java.lang.foreign.Arena;
+import java.lang.foreign.GroupLayout;
+import java.lang.foreign.MemoryLayout;
+import java.lang.foreign.MemorySegment;
+import java.lang.foreign.SegmentAllocator;
+import java.lang.foreign.SequenceLayout;
+import java.lang.foreign.ValueLayout.OfFloat;
+import java.lang.foreign.ValueLayout.OfInt;
+import java.lang.foreign.ValueLayout.OfLong;
+import java.lang.invoke.VarHandle;
+import java.util.function.Consumer;
+
+/**
+ * {@snippet lang = c :
+ * struct gpuInfo {
+ *     int gpu_id;
+ *     char name[256];
+ *     long free_memory;
+ *     long total_memory;
+ *     float compute_capability;
+ * }
+ * }
+ */
+public class GpuInfo {
+  GpuInfo() {
+    // Should not be called directly
+  }
+
+  private static final GroupLayout $LAYOUT = MemoryLayout.structLayout(GpuInfoH.C_INT.withName("gpu_id"),
+      MemoryLayout.sequenceLayout(256, GpuInfoH.C_CHAR).withName("name"), MemoryLayout.paddingLayout(4),
+      GpuInfoH.C_LONG.withName("free_memory"), GpuInfoH.C_LONG.withName("total_memory"),
+      GpuInfoH.C_FLOAT.withName("compute_capability"), MemoryLayout.paddingLayout(4)).withName("gpuInfo");
+
+  /**
+   * The layout of this struct
+   */
+  public static final GroupLayout layout() {
+    return $LAYOUT;
+  }
+
+  private static final OfInt gpu_id$LAYOUT = (OfInt) $LAYOUT.select(groupElement("gpu_id"));
+
+  /**
+   * Layout for field:
+   * {@snippet lang = c : * int gpu_id
+   * }
+   */
+  public static final OfInt gpu_id$layout() {
+    return gpu_id$LAYOUT;
+  }
+
+  private static final long gpu_id$OFFSET = 0;
+
+  /**
+   * Offset for field:
+   * {@snippet lang = c : * int gpu_id
+   * }
+   */
+  public static final long gpu_id$offset() {
+    return gpu_id$OFFSET;
+  }
+
+  /**
+   * Getter for field:
+   * {@snippet lang = c : * int gpu_id
+   * }
+   */
+  public static int gpu_id(MemorySegment struct) {
+    return struct.get(gpu_id$LAYOUT, gpu_id$OFFSET);
+  }
+
+  /**
+   * Setter for field:
+   * {@snippet lang = c : * int gpu_id
+   * }
+   */
+  public static void gpu_id(MemorySegment struct, int fieldValue) {
+    struct.set(gpu_id$LAYOUT, gpu_id$OFFSET, fieldValue);
+  }
+
+  private static final SequenceLayout name$LAYOUT = (SequenceLayout) $LAYOUT.select(groupElement("name"));
+
+  /**
+   * Layout for field:
+   * {@snippet lang = c : * char name[256]
+   * }
+   */
+  public static final SequenceLayout name$layout() {
+    return name$LAYOUT;
+  }
+
+  private static final long name$OFFSET = 4;
+
+  /**
+   * Offset for field:
+   * {@snippet lang = c : * char name[256]
+   * }
+   */
+  public static final long name$offset() {
+    return name$OFFSET;
+  }
+
+  /**
+   * Getter for field:
+   * {@snippet lang = c : * char name[256]
+   * }
+   */
+  public static MemorySegment name(MemorySegment struct) {
+    return struct.asSlice(name$OFFSET, name$LAYOUT.byteSize());
+  }
+
+  /**
+   * Setter for field:
+   * {@snippet lang = c : * char name[256]
+   * }
+   */
+  public static void name(MemorySegment struct, MemorySegment fieldValue) {
+    MemorySegment.copy(fieldValue, 0L, struct, name$OFFSET, name$LAYOUT.byteSize());
+  }
+
+  private static long[] name$DIMS = { 256 };
+
+  /**
+   * Dimensions for array field:
+   * {@snippet lang = c : * char name[256]
+   * }
+   */
+  public static long[] name$dimensions() {
+    return name$DIMS;
+  }
+
+  private static final VarHandle name$ELEM_HANDLE = name$LAYOUT.varHandle(sequenceElement());
+
+  /**
+   * Indexed getter for field:
+   * {@snippet lang = c : * char name[256]
+   * }
+   */
+  public static byte name(MemorySegment struct, long index0) {
+    return (byte) name$ELEM_HANDLE.get(struct, 0L, index0);
+  }
+
+  /**
+   * Indexed setter for field:
+   * {@snippet lang = c : * char name[256]
+   * }
+   */
+  public static void name(MemorySegment struct, long index0, byte fieldValue) {
+    name$ELEM_HANDLE.set(struct, 0L, index0, fieldValue);
+  }
+
+  private static final OfLong free_memory$LAYOUT = (OfLong) $LAYOUT.select(groupElement("free_memory"));
+
+  /**
+   * Layout for field:
+   * {@snippet lang = c : * long free_memory
+   * }
+   */
+  public static final OfLong free_memory$layout() {
+    return free_memory$LAYOUT;
+  }
+
+  private static final long free_memory$OFFSET = 264;
+
+  /**
+   * Offset for field:
+   * {@snippet lang = c : * long free_memory
+   * }
+   */
+  public static final long free_memory$offset() {
+    return free_memory$OFFSET;
+  }
+
+  /**
+   * Getter for field:
+   * {@snippet lang = c : * long free_memory
+   * }
+   */
+  public static long free_memory(MemorySegment struct) {
+    return struct.get(free_memory$LAYOUT, free_memory$OFFSET);
+  }
+
+  /**
+   * Setter for field:
+   * {@snippet lang = c : * long free_memory
+   * }
+   */
+  public static void free_memory(MemorySegment struct, long fieldValue) {
+    struct.set(free_memory$LAYOUT, free_memory$OFFSET, fieldValue);
+  }
+
+  private static final OfLong total_memory$LAYOUT = (OfLong) $LAYOUT.select(groupElement("total_memory"));
+
+  /**
+   * Layout for field:
+   * {@snippet lang = c : * long total_memory
+   * }
+   */
+  public static final OfLong total_memory$layout() {
+    return total_memory$LAYOUT;
+  }
+
+  private static final long total_memory$OFFSET = 272;
+
+  /**
+   * Offset for field:
+   * {@snippet lang = c : * long total_memory
+   * }
+   */
+  public static final long total_memory$offset() {
+    return total_memory$OFFSET;
+  }
+
+  /**
+   * Getter for field:
+   * {@snippet lang = c : * long total_memory
+   * }
+   */
+  public static long total_memory(MemorySegment struct) {
+    return struct.get(total_memory$LAYOUT, total_memory$OFFSET);
+  }
+
+  /**
+   * Setter for field:
+   * {@snippet lang = c : * long total_memory
+   * }
+   */
+  public static void total_memory(MemorySegment struct, long fieldValue) {
+    struct.set(total_memory$LAYOUT, total_memory$OFFSET, fieldValue);
+  }
+
+  private static final OfFloat compute_capability$LAYOUT = (OfFloat) $LAYOUT.select(groupElement("compute_capability"));
+
+  /**
+   * Layout for field:
+   * {@snippet lang = c : * float compute_capability
+   * }
+   */
+  public static final OfFloat compute_capability$layout() {
+    return compute_capability$LAYOUT;
+  }
+
+  private static final long compute_capability$OFFSET = 280;
+
+  /**
+   * Offset for field:
+   * {@snippet lang = c : * float compute_capability
+   * }
+   */
+  public static final long compute_capability$offset() {
+    return compute_capability$OFFSET;
+  }
+
+  /**
+   * Getter for field:
+   * {@snippet lang = c : * float compute_capability
+   * }
+   */
+  public static float compute_capability(MemorySegment struct) {
+    return struct.get(compute_capability$LAYOUT, compute_capability$OFFSET);
+  }
+
+  /**
+   * Setter for field:
+   * {@snippet lang = c : * float compute_capability
+   * }
+   */
+  public static void compute_capability(MemorySegment struct, float fieldValue) {
+    struct.set(compute_capability$LAYOUT, compute_capability$OFFSET, fieldValue);
+  }
+
+  /**
+   * Obtains a slice of {@code arrayParam} which selects the array element at
+   * {@code index}. The returned segment has address
+   * {@code arrayParam.address() + index * layout().byteSize()}
+   */
+  public static MemorySegment asSlice(MemorySegment array, long index) {
+    return array.asSlice(layout().byteSize() * index);
+  }
+
+  /**
+   * The size (in bytes) of this struct
+   */
+  public static long sizeof() {
+    return layout().byteSize();
+  }
+
+  /**
+   * Allocate a segment of size {@code layout().byteSize()} using
+   * {@code allocator}
+   */
+  public static MemorySegment allocate(SegmentAllocator allocator) {
+    return allocator.allocate(layout());
+  }
+
+  /**
+   * Allocate an array of size {@code elementCount} using {@code allocator}. The
+   * returned segment has size {@code elementCount * layout().byteSize()}.
+   */
+  public static MemorySegment allocateArray(long elementCount, SegmentAllocator allocator) {
+    return allocator.allocate(MemoryLayout.sequenceLayout(elementCount, layout()));
+  }
+
+  /**
+   * Reinterprets {@code addr} using target {@code arena} and
+   * {@code cleanupAction} (if any). The returned segment has size
+   * {@code layout().byteSize()}
+   */
+  public static MemorySegment reinterpret(MemorySegment addr, Arena arena, Consumer<MemorySegment> cleanup) {
+    return reinterpret(addr, 1, arena, cleanup);
+  }
+
+  /**
+   * Reinterprets {@code addr} using target {@code arena} and
+   * {@code cleanupAction} (if any). The returned segment has size
+   * {@code elementCount * layout().byteSize()}
+   */
+  public static MemorySegment reinterpret(MemorySegment addr, long elementCount, Arena arena,
+      Consumer<MemorySegment> cleanup) {
+    return addr.reinterpret(layout().byteSize() * elementCount, arena, cleanup);
+  }
+}
diff --git a/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/GpuInfoH.java b/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/GpuInfoH.java
new file mode 100644
index 000000000..ad37e37ec
--- /dev/null
+++ b/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/GpuInfoH.java
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.nvidia.cuvs.panama;
+
+import static java.lang.foreign.ValueLayout.JAVA_BYTE;
+
+import java.lang.foreign.AddressLayout;
+import java.lang.foreign.Arena;
+import java.lang.foreign.FunctionDescriptor;
+import java.lang.foreign.GroupLayout;
+import java.lang.foreign.Linker;
+import java.lang.foreign.MemoryLayout;
+import java.lang.foreign.MemorySegment;
+import java.lang.foreign.PaddingLayout;
+import java.lang.foreign.SequenceLayout;
+import java.lang.foreign.StructLayout;
+import java.lang.foreign.SymbolLookup;
+import java.lang.foreign.ValueLayout;
+import java.lang.invoke.MethodHandle;
+import java.lang.invoke.MethodHandles;
+import java.util.Arrays;
+import java.util.stream.Collectors;
+
+public class GpuInfoH {
+  GpuInfoH() {
+    // Should not be called directly
+  }
+
+  static final Arena LIBRARY_ARENA = Arena.ofAuto();
+  static final boolean TRACE_DOWNCALLS = Boolean.getBoolean("jextract.trace.downcalls");
+
+  static void traceDowncall(String name, Object... args) {
+    String traceArgs = Arrays.stream(args).map(Object::toString).collect(Collectors.joining(", "));
+    System.out.printf("%s(%s)\n", name, traceArgs);
+  }
+
+  static MemorySegment findOrThrow(String symbol) {
+    return SYMBOL_LOOKUP.find(symbol).orElseThrow(() -> new UnsatisfiedLinkError("unresolved symbol: " + symbol));
+  }
+
+  static MethodHandle upcallHandle(Class<?> fi, String name, FunctionDescriptor fdesc) {
+    try {
+      return MethodHandles.lookup().findVirtual(fi, name, fdesc.toMethodType());
+    } catch (ReflectiveOperationException ex) {
+      throw new AssertionError(ex);
+    }
+  }
+
+  static MemoryLayout align(MemoryLayout layout, long align) {
+    return switch (layout) {
+    case PaddingLayout p -> p;
+    case ValueLayout v -> v.withByteAlignment(align);
+    case GroupLayout g -> {
+      MemoryLayout[] alignedMembers = g.memberLayouts().stream().map(m -> align(m, align)).toArray(MemoryLayout[]::new);
+      yield g instanceof StructLayout ? MemoryLayout.structLayout(alignedMembers)
+          : MemoryLayout.unionLayout(alignedMembers);
+    }
+    case SequenceLayout s -> MemoryLayout.sequenceLayout(s.elementCount(), align(s.elementLayout(), align));
+    };
+  }
+
+  static final SymbolLookup SYMBOL_LOOKUP = SymbolLookup.loaderLookup().or(Linker.nativeLinker().defaultLookup());
+  public static final ValueLayout.OfBoolean C_BOOL = ValueLayout.JAVA_BOOLEAN;
+  public static final ValueLayout.OfByte C_CHAR = ValueLayout.JAVA_BYTE;
+  public static final ValueLayout.OfShort C_SHORT = ValueLayout.JAVA_SHORT;
+  public static final ValueLayout.OfInt C_INT = ValueLayout.JAVA_INT;
+  public static final ValueLayout.OfLong C_LONG_LONG = ValueLayout.JAVA_LONG;
+  public static final ValueLayout.OfFloat C_FLOAT = ValueLayout.JAVA_FLOAT;
+  public static final ValueLayout.OfDouble C_DOUBLE = ValueLayout.JAVA_DOUBLE;
+  public static final AddressLayout C_POINTER = ValueLayout.ADDRESS
+      .withTargetLayout(MemoryLayout.sequenceLayout(java.lang.Long.MAX_VALUE, JAVA_BYTE));
+  public static final ValueLayout.OfLong C_LONG = ValueLayout.JAVA_LONG;
+}
diff --git a/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/HnswH.java b/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/HnswH.java
new file mode 100644
index 000000000..32945c432
--- /dev/null
+++ b/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/HnswH.java
@@ -0,0 +1,2350 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.nvidia.cuvs.panama;
+
+import static java.lang.foreign.ValueLayout.JAVA_BYTE;
+
+import java.lang.foreign.AddressLayout;
+import java.lang.foreign.Arena;
+import java.lang.foreign.FunctionDescriptor;
+import java.lang.foreign.GroupLayout;
+import java.lang.foreign.Linker;
+import java.lang.foreign.MemoryLayout;
+import java.lang.foreign.MemorySegment;
+import java.lang.foreign.PaddingLayout;
+import java.lang.foreign.SequenceLayout;
+import java.lang.foreign.StructLayout;
+import java.lang.foreign.SymbolLookup;
+import java.lang.foreign.ValueLayout;
+import java.lang.foreign.ValueLayout.OfByte;
+import java.lang.foreign.ValueLayout.OfInt;
+import java.lang.foreign.ValueLayout.OfLong;
+import java.lang.foreign.ValueLayout.OfShort;
+import java.lang.invoke.MethodHandle;
+import java.lang.invoke.MethodHandles;
+import java.util.Arrays;
+import java.util.stream.Collectors;
+
+public class HnswH {
+
+  HnswH() {
+    // Should not be called directly
+  }
+
+  static final Arena LIBRARY_ARENA = Arena.ofAuto();
+  static final boolean TRACE_DOWNCALLS = Boolean.getBoolean("jextract.trace.downcalls");
+
+  static void traceDowncall(String name, Object... args) {
+    String traceArgs = Arrays.stream(args).map(Object::toString).collect(Collectors.joining(", "));
+    System.out.printf("%s(%s)\n", name, traceArgs);
+  }
+
+  static MemorySegment findOrThrow(String symbol) {
+    return SYMBOL_LOOKUP.find(symbol).orElseThrow(() -> new UnsatisfiedLinkError("unresolved symbol: " + symbol));
+  }
+
+  static MethodHandle upcallHandle(Class<?> fi, String name, FunctionDescriptor fdesc) {
+    try {
+      return MethodHandles.lookup().findVirtual(fi, name, fdesc.toMethodType());
+    } catch (ReflectiveOperationException ex) {
+      throw new AssertionError(ex);
+    }
+  }
+
+  static MemoryLayout align(MemoryLayout layout, long align) {
+    return switch (layout) {
+    case PaddingLayout p -> p;
+    case ValueLayout v -> v.withByteAlignment(align);
+    case GroupLayout g -> {
+      MemoryLayout[] alignedMembers = g.memberLayouts().stream().map(m -> align(m, align)).toArray(MemoryLayout[]::new);
+      yield g instanceof StructLayout ? MemoryLayout.structLayout(alignedMembers)
+          : MemoryLayout.unionLayout(alignedMembers);
+    }
+    case SequenceLayout s -> MemoryLayout.sequenceLayout(s.elementCount(), align(s.elementLayout(), align));
+    };
+  }
+
+  static final SymbolLookup SYMBOL_LOOKUP = SymbolLookup.loaderLookup().or(Linker.nativeLinker().defaultLookup());
+
+  public static final ValueLayout.OfBoolean C_BOOL = ValueLayout.JAVA_BOOLEAN;
+  public static final ValueLayout.OfByte C_CHAR = ValueLayout.JAVA_BYTE;
+  public static final ValueLayout.OfShort C_SHORT = ValueLayout.JAVA_SHORT;
+  public static final ValueLayout.OfInt C_INT = ValueLayout.JAVA_INT;
+  public static final ValueLayout.OfLong C_LONG_LONG = ValueLayout.JAVA_LONG;
+  public static final ValueLayout.OfFloat C_FLOAT = ValueLayout.JAVA_FLOAT;
+  public static final ValueLayout.OfDouble C_DOUBLE = ValueLayout.JAVA_DOUBLE;
+  public static final AddressLayout C_POINTER = ValueLayout.ADDRESS
+      .withTargetLayout(MemoryLayout.sequenceLayout(java.lang.Long.MAX_VALUE, JAVA_BYTE));
+  public static final ValueLayout.OfLong C_LONG = ValueLayout.JAVA_LONG;
+  private static final int DLPACK_VERSION = (int) 80L;
+
+  /**
+   * {@snippet lang = c : * #define DLPACK_VERSION 80
+   * }
+   */
+  public static int DLPACK_VERSION() {
+    return DLPACK_VERSION;
+  }
+
+  private static final int DLPACK_ABI_VERSION = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define DLPACK_ABI_VERSION 1
+   * }
+   */
+  public static int DLPACK_ABI_VERSION() {
+    return DLPACK_ABI_VERSION;
+  }
+
+  private static final int _STDINT_H = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define _STDINT_H 1
+   * }
+   */
+  public static int _STDINT_H() {
+    return _STDINT_H;
+  }
+
+  private static final int _FEATURES_H = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define _FEATURES_H 1
+   * }
+   */
+  public static int _FEATURES_H() {
+    return _FEATURES_H;
+  }
+
+  private static final int _DEFAULT_SOURCE = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define _DEFAULT_SOURCE 1
+   * }
+   */
+  public static int _DEFAULT_SOURCE() {
+    return _DEFAULT_SOURCE;
+  }
+
+  private static final int __GLIBC_USE_ISOC2X = (int) 0L;
+
+  /**
+   * {@snippet lang = c : * #define __GLIBC_USE_ISOC2X 0
+   * }
+   */
+  public static int __GLIBC_USE_ISOC2X() {
+    return __GLIBC_USE_ISOC2X;
+  }
+
+  private static final int __USE_ISOC11 = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __USE_ISOC11 1
+   * }
+   */
+  public static int __USE_ISOC11() {
+    return __USE_ISOC11;
+  }
+
+  private static final int __USE_ISOC99 = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __USE_ISOC99 1
+   * }
+   */
+  public static int __USE_ISOC99() {
+    return __USE_ISOC99;
+  }
+
+  private static final int __USE_ISOC95 = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __USE_ISOC95 1
+   * }
+   */
+  public static int __USE_ISOC95() {
+    return __USE_ISOC95;
+  }
+
+  private static final int __USE_POSIX_IMPLICITLY = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __USE_POSIX_IMPLICITLY 1
+   * }
+   */
+  public static int __USE_POSIX_IMPLICITLY() {
+    return __USE_POSIX_IMPLICITLY;
+  }
+
+  private static final int _POSIX_SOURCE = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define _POSIX_SOURCE 1
+   * }
+   */
+  public static int _POSIX_SOURCE() {
+    return _POSIX_SOURCE;
+  }
+
+  private static final int __USE_POSIX = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __USE_POSIX 1
+   * }
+   */
+  public static int __USE_POSIX() {
+    return __USE_POSIX;
+  }
+
+  private static final int __USE_POSIX2 = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __USE_POSIX2 1
+   * }
+   */
+  public static int __USE_POSIX2() {
+    return __USE_POSIX2;
+  }
+
+  private static final int __USE_POSIX199309 = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __USE_POSIX199309 1
+   * }
+   */
+  public static int __USE_POSIX199309() {
+    return __USE_POSIX199309;
+  }
+
+  private static final int __USE_POSIX199506 = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __USE_POSIX199506 1
+   * }
+   */
+  public static int __USE_POSIX199506() {
+    return __USE_POSIX199506;
+  }
+
+  private static final int __USE_XOPEN2K = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __USE_XOPEN2K 1
+   * }
+   */
+  public static int __USE_XOPEN2K() {
+    return __USE_XOPEN2K;
+  }
+
+  private static final int __USE_XOPEN2K8 = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __USE_XOPEN2K8 1
+   * }
+   */
+  public static int __USE_XOPEN2K8() {
+    return __USE_XOPEN2K8;
+  }
+
+  private static final int _ATFILE_SOURCE = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define _ATFILE_SOURCE 1
+   * }
+   */
+  public static int _ATFILE_SOURCE() {
+    return _ATFILE_SOURCE;
+  }
+
+  private static final int __WORDSIZE = (int) 64L;
+
+  /**
+   * {@snippet lang = c : * #define __WORDSIZE 64
+   * }
+   */
+  public static int __WORDSIZE() {
+    return __WORDSIZE;
+  }
+
+  private static final int __WORDSIZE_TIME64_COMPAT32 = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __WORDSIZE_TIME64_COMPAT32 1
+   * }
+   */
+  public static int __WORDSIZE_TIME64_COMPAT32() {
+    return __WORDSIZE_TIME64_COMPAT32;
+  }
+
+  private static final int __SYSCALL_WORDSIZE = (int) 64L;
+
+  /**
+   * {@snippet lang = c : * #define __SYSCALL_WORDSIZE 64
+   * }
+   */
+  public static int __SYSCALL_WORDSIZE() {
+    return __SYSCALL_WORDSIZE;
+  }
+
+  private static final int __USE_MISC = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __USE_MISC 1
+   * }
+   */
+  public static int __USE_MISC() {
+    return __USE_MISC;
+  }
+
+  private static final int __USE_ATFILE = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __USE_ATFILE 1
+   * }
+   */
+  public static int __USE_ATFILE() {
+    return __USE_ATFILE;
+  }
+
+  private static final int __USE_FORTIFY_LEVEL = (int) 0L;
+
+  /**
+   * {@snippet lang = c : * #define __USE_FORTIFY_LEVEL 0
+   * }
+   */
+  public static int __USE_FORTIFY_LEVEL() {
+    return __USE_FORTIFY_LEVEL;
+  }
+
+  private static final int __GLIBC_USE_DEPRECATED_GETS = (int) 0L;
+
+  /**
+   * {@snippet lang = c : * #define __GLIBC_USE_DEPRECATED_GETS 0
+   * }
+   */
+  public static int __GLIBC_USE_DEPRECATED_GETS() {
+    return __GLIBC_USE_DEPRECATED_GETS;
+  }
+
+  private static final int __GLIBC_USE_DEPRECATED_SCANF = (int) 0L;
+
+  /**
+   * {@snippet lang = c : * #define __GLIBC_USE_DEPRECATED_SCANF 0
+   * }
+   */
+  public static int __GLIBC_USE_DEPRECATED_SCANF() {
+    return __GLIBC_USE_DEPRECATED_SCANF;
+  }
+
+  private static final int _STDC_PREDEF_H = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define _STDC_PREDEF_H 1
+   * }
+   */
+  public static int _STDC_PREDEF_H() {
+    return _STDC_PREDEF_H;
+  }
+
+  private static final int __STDC_IEC_559__ = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __STDC_IEC_559__ 1
+   * }
+   */
+  public static int __STDC_IEC_559__() {
+    return __STDC_IEC_559__;
+  }
+
+  private static final int __STDC_IEC_559_COMPLEX__ = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __STDC_IEC_559_COMPLEX__ 1
+   * }
+   */
+  public static int __STDC_IEC_559_COMPLEX__() {
+    return __STDC_IEC_559_COMPLEX__;
+  }
+
+  private static final int __GNU_LIBRARY__ = (int) 6L;
+
+  /**
+   * {@snippet lang = c : * #define __GNU_LIBRARY__ 6
+   * }
+   */
+  public static int __GNU_LIBRARY__() {
+    return __GNU_LIBRARY__;
+  }
+
+  private static final int __GLIBC__ = (int) 2L;
+
+  /**
+   * {@snippet lang = c : * #define __GLIBC__ 2
+   * }
+   */
+  public static int __GLIBC__() {
+    return __GLIBC__;
+  }
+
+  private static final int __GLIBC_MINOR__ = (int) 35L;
+
+  /**
+   * {@snippet lang = c : * #define __GLIBC_MINOR__ 35
+   * }
+   */
+  public static int __GLIBC_MINOR__() {
+    return __GLIBC_MINOR__;
+  }
+
+  private static final int _SYS_CDEFS_H = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define _SYS_CDEFS_H 1
+   * }
+   */
+  public static int _SYS_CDEFS_H() {
+    return _SYS_CDEFS_H;
+  }
+
+  private static final int __glibc_c99_flexarr_available = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __glibc_c99_flexarr_available 1
+   * }
+   */
+  public static int __glibc_c99_flexarr_available() {
+    return __glibc_c99_flexarr_available;
+  }
+
+  private static final int __LDOUBLE_REDIRECTS_TO_FLOAT128_ABI = (int) 0L;
+
+  /**
+   * {@snippet lang = c : * #define __LDOUBLE_REDIRECTS_TO_FLOAT128_ABI 0
+   * }
+   */
+  public static int __LDOUBLE_REDIRECTS_TO_FLOAT128_ABI() {
+    return __LDOUBLE_REDIRECTS_TO_FLOAT128_ABI;
+  }
+
+  private static final int __HAVE_GENERIC_SELECTION = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __HAVE_GENERIC_SELECTION 1
+   * }
+   */
+  public static int __HAVE_GENERIC_SELECTION() {
+    return __HAVE_GENERIC_SELECTION;
+  }
+
+  private static final int __GLIBC_USE_LIB_EXT2 = (int) 0L;
+
+  /**
+   * {@snippet lang = c : * #define __GLIBC_USE_LIB_EXT2 0
+   * }
+   */
+  public static int __GLIBC_USE_LIB_EXT2() {
+    return __GLIBC_USE_LIB_EXT2;
+  }
+
+  private static final int __GLIBC_USE_IEC_60559_BFP_EXT = (int) 0L;
+
+  /**
+   * {@snippet lang = c : * #define __GLIBC_USE_IEC_60559_BFP_EXT 0
+   * }
+   */
+  public static int __GLIBC_USE_IEC_60559_BFP_EXT() {
+    return __GLIBC_USE_IEC_60559_BFP_EXT;
+  }
+
+  private static final int __GLIBC_USE_IEC_60559_BFP_EXT_C2X = (int) 0L;
+
+  /**
+   * {@snippet lang = c : * #define __GLIBC_USE_IEC_60559_BFP_EXT_C2X 0
+   * }
+   */
+  public static int __GLIBC_USE_IEC_60559_BFP_EXT_C2X() {
+    return __GLIBC_USE_IEC_60559_BFP_EXT_C2X;
+  }
+
+  private static final int __GLIBC_USE_IEC_60559_EXT = (int) 0L;
+
+  /**
+   * {@snippet lang = c : * #define __GLIBC_USE_IEC_60559_EXT 0
+   * }
+   */
+  public static int __GLIBC_USE_IEC_60559_EXT() {
+    return __GLIBC_USE_IEC_60559_EXT;
+  }
+
+  private static final int __GLIBC_USE_IEC_60559_FUNCS_EXT = (int) 0L;
+
+  /**
+   * {@snippet lang = c : * #define __GLIBC_USE_IEC_60559_FUNCS_EXT 0
+   * }
+   */
+  public static int __GLIBC_USE_IEC_60559_FUNCS_EXT() {
+    return __GLIBC_USE_IEC_60559_FUNCS_EXT;
+  }
+
+  private static final int __GLIBC_USE_IEC_60559_FUNCS_EXT_C2X = (int) 0L;
+
+  /**
+   * {@snippet lang = c : * #define __GLIBC_USE_IEC_60559_FUNCS_EXT_C2X 0
+   * }
+   */
+  public static int __GLIBC_USE_IEC_60559_FUNCS_EXT_C2X() {
+    return __GLIBC_USE_IEC_60559_FUNCS_EXT_C2X;
+  }
+
+  private static final int __GLIBC_USE_IEC_60559_TYPES_EXT = (int) 0L;
+
+  /**
+   * {@snippet lang = c : * #define __GLIBC_USE_IEC_60559_TYPES_EXT 0
+   * }
+   */
+  public static int __GLIBC_USE_IEC_60559_TYPES_EXT() {
+    return __GLIBC_USE_IEC_60559_TYPES_EXT;
+  }
+
+  private static final int _BITS_TYPES_H = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define _BITS_TYPES_H 1
+   * }
+   */
+  public static int _BITS_TYPES_H() {
+    return _BITS_TYPES_H;
+  }
+
+  private static final int _BITS_TYPESIZES_H = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define _BITS_TYPESIZES_H 1
+   * }
+   */
+  public static int _BITS_TYPESIZES_H() {
+    return _BITS_TYPESIZES_H;
+  }
+
+  private static final int __OFF_T_MATCHES_OFF64_T = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __OFF_T_MATCHES_OFF64_T 1
+   * }
+   */
+  public static int __OFF_T_MATCHES_OFF64_T() {
+    return __OFF_T_MATCHES_OFF64_T;
+  }
+
+  private static final int __INO_T_MATCHES_INO64_T = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __INO_T_MATCHES_INO64_T 1
+   * }
+   */
+  public static int __INO_T_MATCHES_INO64_T() {
+    return __INO_T_MATCHES_INO64_T;
+  }
+
+  private static final int __RLIM_T_MATCHES_RLIM64_T = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __RLIM_T_MATCHES_RLIM64_T 1
+   * }
+   */
+  public static int __RLIM_T_MATCHES_RLIM64_T() {
+    return __RLIM_T_MATCHES_RLIM64_T;
+  }
+
+  private static final int __STATFS_MATCHES_STATFS64 = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __STATFS_MATCHES_STATFS64 1
+   * }
+   */
+  public static int __STATFS_MATCHES_STATFS64() {
+    return __STATFS_MATCHES_STATFS64;
+  }
+
+  private static final int __KERNEL_OLD_TIMEVAL_MATCHES_TIMEVAL64 = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __KERNEL_OLD_TIMEVAL_MATCHES_TIMEVAL64 1
+   * }
+   */
+  public static int __KERNEL_OLD_TIMEVAL_MATCHES_TIMEVAL64() {
+    return __KERNEL_OLD_TIMEVAL_MATCHES_TIMEVAL64;
+  }
+
+  private static final int __FD_SETSIZE = (int) 1024L;
+
+  /**
+   * {@snippet lang = c : * #define __FD_SETSIZE 1024
+   * }
+   */
+  public static int __FD_SETSIZE() {
+    return __FD_SETSIZE;
+  }
+
+  private static final int _BITS_TIME64_H = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define _BITS_TIME64_H 1
+   * }
+   */
+  public static int _BITS_TIME64_H() {
+    return _BITS_TIME64_H;
+  }
+
+  private static final int _BITS_WCHAR_H = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define _BITS_WCHAR_H 1
+   * }
+   */
+  public static int _BITS_WCHAR_H() {
+    return _BITS_WCHAR_H;
+  }
+
+  private static final int _BITS_STDINT_INTN_H = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define _BITS_STDINT_INTN_H 1
+   * }
+   */
+  public static int _BITS_STDINT_INTN_H() {
+    return _BITS_STDINT_INTN_H;
+  }
+
+  private static final int _BITS_STDINT_UINTN_H = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define _BITS_STDINT_UINTN_H 1
+   * }
+   */
+  public static int _BITS_STDINT_UINTN_H() {
+    return _BITS_STDINT_UINTN_H;
+  }
+
+  private static final int true_ = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define true 1
+   * }
+   */
+  public static int true_() {
+    return true_;
+  }
+
+  private static final int false_ = (int) 0L;
+
+  /**
+   * {@snippet lang = c : * #define false 0
+   * }
+   */
+  public static int false_() {
+    return false_;
+  }
+
+  private static final int __bool_true_false_are_defined = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __bool_true_false_are_defined 1
+   * }
+   */
+  public static int __bool_true_false_are_defined() {
+    return __bool_true_false_are_defined;
+  }
+
+  private static final int L2Expanded = (int) 0L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.L2Expanded = 0
+   * }
+   */
+  public static int L2Expanded() {
+    return L2Expanded;
+  }
+
+  private static final int L2SqrtExpanded = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.L2SqrtExpanded = 1
+   * }
+   */
+  public static int L2SqrtExpanded() {
+    return L2SqrtExpanded;
+  }
+
+  private static final int CosineExpanded = (int) 2L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.CosineExpanded = 2
+   * }
+   */
+  public static int CosineExpanded() {
+    return CosineExpanded;
+  }
+
+  private static final int L1 = (int) 3L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.L1 = 3
+   * }
+   */
+  public static int L1() {
+    return L1;
+  }
+
+  private static final int L2Unexpanded = (int) 4L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.L2Unexpanded = 4
+   * }
+   */
+  public static int L2Unexpanded() {
+    return L2Unexpanded;
+  }
+
+  private static final int L2SqrtUnexpanded = (int) 5L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.L2SqrtUnexpanded = 5
+   * }
+   */
+  public static int L2SqrtUnexpanded() {
+    return L2SqrtUnexpanded;
+  }
+
+  private static final int InnerProduct = (int) 6L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.InnerProduct = 6
+   * }
+   */
+  public static int InnerProduct() {
+    return InnerProduct;
+  }
+
+  private static final int Linf = (int) 7L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.Linf = 7
+   * }
+   */
+  public static int Linf() {
+    return Linf;
+  }
+
+  private static final int Canberra = (int) 8L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.Canberra = 8
+   * }
+   */
+  public static int Canberra() {
+    return Canberra;
+  }
+
+  private static final int LpUnexpanded = (int) 9L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.LpUnexpanded = 9
+   * }
+   */
+  public static int LpUnexpanded() {
+    return LpUnexpanded;
+  }
+
+  private static final int CorrelationExpanded = (int) 10L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.CorrelationExpanded = 10
+   * }
+   */
+  public static int CorrelationExpanded() {
+    return CorrelationExpanded;
+  }
+
+  private static final int JaccardExpanded = (int) 11L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.JaccardExpanded = 11
+   * }
+   */
+  public static int JaccardExpanded() {
+    return JaccardExpanded;
+  }
+
+  private static final int HellingerExpanded = (int) 12L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.HellingerExpanded = 12
+   * }
+   */
+  public static int HellingerExpanded() {
+    return HellingerExpanded;
+  }
+
+  private static final int Haversine = (int) 13L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.Haversine = 13
+   * }
+   */
+  public static int Haversine() {
+    return Haversine;
+  }
+
+  private static final int BrayCurtis = (int) 14L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.BrayCurtis = 14
+   * }
+   */
+  public static int BrayCurtis() {
+    return BrayCurtis;
+  }
+
+  private static final int JensenShannon = (int) 15L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.JensenShannon = 15
+   * }
+   */
+  public static int JensenShannon() {
+    return JensenShannon;
+  }
+
+  private static final int HammingUnexpanded = (int) 16L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.HammingUnexpanded = 16
+   * }
+   */
+  public static int HammingUnexpanded() {
+    return HammingUnexpanded;
+  }
+
+  private static final int KLDivergence = (int) 17L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.KLDivergence = 17
+   * }
+   */
+  public static int KLDivergence() {
+    return KLDivergence;
+  }
+
+  private static final int RusselRaoExpanded = (int) 18L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.RusselRaoExpanded = 18
+   * }
+   */
+  public static int RusselRaoExpanded() {
+    return RusselRaoExpanded;
+  }
+
+  private static final int DiceExpanded = (int) 19L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.DiceExpanded = 19
+   * }
+   */
+  public static int DiceExpanded() {
+    return DiceExpanded;
+  }
+
+  private static final int Precomputed = (int) 100L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.Precomputed = 100
+   * }
+   */
+  public static int Precomputed() {
+    return Precomputed;
+  }
+
+  /**
+   * {@snippet lang = c : * typedef unsigned char __u_char
+   * }
+   */
+  public static final OfByte __u_char = HnswH.C_CHAR;
+  /**
+   * {@snippet lang = c : * typedef unsigned short __u_short
+   * }
+   */
+  public static final OfShort __u_short = HnswH.C_SHORT;
+  /**
+   * {@snippet lang = c : * typedef unsigned int __u_int
+   * }
+   */
+  public static final OfInt __u_int = HnswH.C_INT;
+  /**
+   * {@snippet lang = c : * typedef unsigned long __u_long
+   * }
+   */
+  public static final OfLong __u_long = HnswH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef signed char __int8_t
+   * }
+   */
+  public static final OfByte __int8_t = HnswH.C_CHAR;
+  /**
+   * {@snippet lang = c : * typedef unsigned char __uint8_t
+   * }
+   */
+  public static final OfByte __uint8_t = HnswH.C_CHAR;
+  /**
+   * {@snippet lang = c : * typedef short __int16_t
+   * }
+   */
+  public static final OfShort __int16_t = HnswH.C_SHORT;
+  /**
+   * {@snippet lang = c : * typedef unsigned short __uint16_t
+   * }
+   */
+  public static final OfShort __uint16_t = HnswH.C_SHORT;
+  /**
+   * {@snippet lang = c : * typedef int __int32_t
+   * }
+   */
+  public static final OfInt __int32_t = HnswH.C_INT;
+  /**
+   * {@snippet lang = c : * typedef unsigned int __uint32_t
+   * }
+   */
+  public static final OfInt __uint32_t = HnswH.C_INT;
+  /**
+   * {@snippet lang = c : * typedef long __int64_t
+   * }
+   */
+  public static final OfLong __int64_t = HnswH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned long __uint64_t
+   * }
+   */
+  public static final OfLong __uint64_t = HnswH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef __int8_t __int_least8_t
+   * }
+   */
+  public static final OfByte __int_least8_t = HnswH.C_CHAR;
+  /**
+   * {@snippet lang = c : * typedef __uint8_t __uint_least8_t
+   * }
+   */
+  public static final OfByte __uint_least8_t = HnswH.C_CHAR;
+  /**
+   * {@snippet lang = c : * typedef __int16_t __int_least16_t
+   * }
+   */
+  public static final OfShort __int_least16_t = HnswH.C_SHORT;
+  /**
+   * {@snippet lang = c : * typedef __uint16_t __uint_least16_t
+   * }
+   */
+  public static final OfShort __uint_least16_t = HnswH.C_SHORT;
+  /**
+   * {@snippet lang = c : * typedef __int32_t __int_least32_t
+   * }
+   */
+  public static final OfInt __int_least32_t = HnswH.C_INT;
+  /**
+   * {@snippet lang = c : * typedef __uint32_t __uint_least32_t
+   * }
+   */
+  public static final OfInt __uint_least32_t = HnswH.C_INT;
+  /**
+   * {@snippet lang = c : * typedef __int64_t __int_least64_t
+   * }
+   */
+  public static final OfLong __int_least64_t = HnswH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef __uint64_t __uint_least64_t
+   * }
+   */
+  public static final OfLong __uint_least64_t = HnswH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef long __quad_t
+   * }
+   */
+  public static final OfLong __quad_t = HnswH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned long __u_quad_t
+   * }
+   */
+  public static final OfLong __u_quad_t = HnswH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef long __intmax_t
+   * }
+   */
+  public static final OfLong __intmax_t = HnswH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned long __uintmax_t
+   * }
+   */
+  public static final OfLong __uintmax_t = HnswH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned long __dev_t
+   * }
+   */
+  public static final OfLong __dev_t = HnswH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned int __uid_t
+   * }
+   */
+  public static final OfInt __uid_t = HnswH.C_INT;
+  /**
+   * {@snippet lang = c : * typedef unsigned int __gid_t
+   * }
+   */
+  public static final OfInt __gid_t = HnswH.C_INT;
+  /**
+   * {@snippet lang = c : * typedef unsigned long __ino_t
+   * }
+   */
+  public static final OfLong __ino_t = HnswH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned long __ino64_t
+   * }
+   */
+  public static final OfLong __ino64_t = HnswH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned int __mode_t
+   * }
+   */
+  public static final OfInt __mode_t = HnswH.C_INT;
+  /**
+   * {@snippet lang = c : * typedef unsigned long __nlink_t
+   * }
+   */
+  public static final OfLong __nlink_t = HnswH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef long __off_t
+   * }
+   */
+  public static final OfLong __off_t = HnswH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef long __off64_t
+   * }
+   */
+  public static final OfLong __off64_t = HnswH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef int __pid_t
+   * }
+   */
+  public static final OfInt __pid_t = HnswH.C_INT;
+  /**
+   * {@snippet lang = c : * typedef long __clock_t
+   * }
+   */
+  public static final OfLong __clock_t = HnswH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned long __rlim_t
+   * }
+   */
+  public static final OfLong __rlim_t = HnswH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned long __rlim64_t
+   * }
+   */
+  public static final OfLong __rlim64_t = HnswH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned int __id_t
+   * }
+   */
+  public static final OfInt __id_t = HnswH.C_INT;
+  /**
+   * {@snippet lang = c : * typedef long __time_t
+   * }
+   */
+  public static final OfLong __time_t = HnswH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned int __useconds_t
+   * }
+   */
+  public static final OfInt __useconds_t = HnswH.C_INT;
+  /**
+   * {@snippet lang = c : * typedef long __suseconds_t
+   * }
+   */
+  public static final OfLong __suseconds_t = HnswH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef long __suseconds64_t
+   * }
+   */
+  public static final OfLong __suseconds64_t = HnswH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef int __daddr_t
+   * }
+   */
+  public static final OfInt __daddr_t = HnswH.C_INT;
+  /**
+   * {@snippet lang = c : * typedef int __key_t
+   * }
+   */
+  public static final OfInt __key_t = HnswH.C_INT;
+  /**
+   * {@snippet lang = c : * typedef int __clockid_t
+   * }
+   */
+  public static final OfInt __clockid_t = HnswH.C_INT;
+  /**
+   * {@snippet lang = c : * typedef void *__timer_t
+   * }
+   */
+  public static final AddressLayout __timer_t = HnswH.C_POINTER;
+  /**
+   * {@snippet lang = c : * typedef long __blksize_t
+   * }
+   */
+  public static final OfLong __blksize_t = HnswH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef long __blkcnt_t
+   * }
+   */
+  public static final OfLong __blkcnt_t = HnswH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef long __blkcnt64_t
+   * }
+   */
+  public static final OfLong __blkcnt64_t = HnswH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned long __fsblkcnt_t
+   * }
+   */
+  public static final OfLong __fsblkcnt_t = HnswH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned long __fsblkcnt64_t
+   * }
+   */
+  public static final OfLong __fsblkcnt64_t = HnswH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned long __fsfilcnt_t
+   * }
+   */
+  public static final OfLong __fsfilcnt_t = HnswH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned long __fsfilcnt64_t
+   * }
+   */
+  public static final OfLong __fsfilcnt64_t = HnswH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef long __fsword_t
+   * }
+   */
+  public static final OfLong __fsword_t = HnswH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef long __ssize_t
+   * }
+   */
+  public static final OfLong __ssize_t = HnswH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef long __syscall_slong_t
+   * }
+   */
+  public static final OfLong __syscall_slong_t = HnswH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned long __syscall_ulong_t
+   * }
+   */
+  public static final OfLong __syscall_ulong_t = HnswH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef __off64_t __loff_t
+   * }
+   */
+  public static final OfLong __loff_t = HnswH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef char *__caddr_t
+   * }
+   */
+  public static final AddressLayout __caddr_t = HnswH.C_POINTER;
+  /**
+   * {@snippet lang = c : * typedef long __intptr_t
+   * }
+   */
+  public static final OfLong __intptr_t = HnswH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned int __socklen_t
+   * }
+   */
+  public static final OfInt __socklen_t = HnswH.C_INT;
+  /**
+   * {@snippet lang = c : * typedef int __sig_atomic_t
+   * }
+   */
+  public static final OfInt __sig_atomic_t = HnswH.C_INT;
+  /**
+   * {@snippet lang = c : * typedef __int8_t int8_t
+   * }
+   */
+  public static final OfByte int8_t = HnswH.C_CHAR;
+  /**
+   * {@snippet lang = c : * typedef __int16_t int16_t
+   * }
+   */
+  public static final OfShort int16_t = HnswH.C_SHORT;
+  /**
+   * {@snippet lang = c : * typedef __int32_t int32_t
+   * }
+   */
+  public static final OfInt int32_t = HnswH.C_INT;
+  /**
+   * {@snippet lang = c : * typedef __int64_t int64_t
+   * }
+   */
+  public static final OfLong int64_t = HnswH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef __uint8_t uint8_t
+   * }
+   */
+  public static final OfByte uint8_t = HnswH.C_CHAR;
+  /**
+   * {@snippet lang = c : * typedef __uint16_t uint16_t
+   * }
+   */
+  public static final OfShort uint16_t = HnswH.C_SHORT;
+  /**
+   * {@snippet lang = c : * typedef __uint32_t uint32_t
+   * }
+   */
+  public static final OfInt uint32_t = HnswH.C_INT;
+  /**
+   * {@snippet lang = c : * typedef __uint64_t uint64_t
+   * }
+   */
+  public static final OfLong uint64_t = HnswH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef __int_least8_t int_least8_t
+   * }
+   */
+  public static final OfByte int_least8_t = HnswH.C_CHAR;
+  /**
+   * {@snippet lang = c : * typedef __int_least16_t int_least16_t
+   * }
+   */
+  public static final OfShort int_least16_t = HnswH.C_SHORT;
+  /**
+   * {@snippet lang = c : * typedef __int_least32_t int_least32_t
+   * }
+   */
+  public static final OfInt int_least32_t = HnswH.C_INT;
+  /**
+   * {@snippet lang = c : * typedef __int_least64_t int_least64_t
+   * }
+   */
+  public static final OfLong int_least64_t = HnswH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef __uint_least8_t uint_least8_t
+   * }
+   */
+  public static final OfByte uint_least8_t = HnswH.C_CHAR;
+  /**
+   * {@snippet lang = c : * typedef __uint_least16_t uint_least16_t
+   * }
+   */
+  public static final OfShort uint_least16_t = HnswH.C_SHORT;
+  /**
+   * {@snippet lang = c : * typedef __uint_least32_t uint_least32_t
+   * }
+   */
+  public static final OfInt uint_least32_t = HnswH.C_INT;
+  /**
+   * {@snippet lang = c : * typedef __uint_least64_t uint_least64_t
+   * }
+   */
+  public static final OfLong uint_least64_t = HnswH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef signed char int_fast8_t
+   * }
+   */
+  public static final OfByte int_fast8_t = HnswH.C_CHAR;
+  /**
+   * {@snippet lang = c : * typedef long int_fast16_t
+   * }
+   */
+  public static final OfLong int_fast16_t = HnswH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef long int_fast32_t
+   * }
+   */
+  public static final OfLong int_fast32_t = HnswH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef long int_fast64_t
+   * }
+   */
+  public static final OfLong int_fast64_t = HnswH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned char uint_fast8_t
+   * }
+   */
+  public static final OfByte uint_fast8_t = HnswH.C_CHAR;
+  /**
+   * {@snippet lang = c : * typedef unsigned long uint_fast16_t
+   * }
+   */
+  public static final OfLong uint_fast16_t = HnswH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned long uint_fast32_t
+   * }
+   */
+  public static final OfLong uint_fast32_t = HnswH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned long uint_fast64_t
+   * }
+   */
+  public static final OfLong uint_fast64_t = HnswH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef long intptr_t
+   * }
+   */
+  public static final OfLong intptr_t = HnswH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned long uintptr_t
+   * }
+   */
+  public static final OfLong uintptr_t = HnswH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef __intmax_t intmax_t
+   * }
+   */
+  public static final OfLong intmax_t = HnswH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef __uintmax_t uintmax_t
+   * }
+   */
+  public static final OfLong uintmax_t = HnswH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef long ptrdiff_t
+   * }
+   */
+  public static final OfLong ptrdiff_t = HnswH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned long size_t
+   * }
+   */
+  public static final OfLong size_t = HnswH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef int wchar_t
+   * }
+   */
+  public static final OfInt wchar_t = HnswH.C_INT;
+  private static final int kDLCPU = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.kDLCPU = 1
+   * }
+   */
+  public static int kDLCPU() {
+    return kDLCPU;
+  }
+
+  private static final int kDLCUDA = (int) 2L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.kDLCUDA = 2
+   * }
+   */
+  public static int kDLCUDA() {
+    return kDLCUDA;
+  }
+
+  private static final int kDLCUDAHost = (int) 3L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.kDLCUDAHost = 3
+   * }
+   */
+  public static int kDLCUDAHost() {
+    return kDLCUDAHost;
+  }
+
+  private static final int kDLOpenCL = (int) 4L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.kDLOpenCL = 4
+   * }
+   */
+  public static int kDLOpenCL() {
+    return kDLOpenCL;
+  }
+
+  private static final int kDLVulkan = (int) 7L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.kDLVulkan = 7
+   * }
+   */
+  public static int kDLVulkan() {
+    return kDLVulkan;
+  }
+
+  private static final int kDLMetal = (int) 8L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.kDLMetal = 8
+   * }
+   */
+  public static int kDLMetal() {
+    return kDLMetal;
+  }
+
+  private static final int kDLVPI = (int) 9L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.kDLVPI = 9
+   * }
+   */
+  public static int kDLVPI() {
+    return kDLVPI;
+  }
+
+  private static final int kDLROCM = (int) 10L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.kDLROCM = 10
+   * }
+   */
+  public static int kDLROCM() {
+    return kDLROCM;
+  }
+
+  private static final int kDLROCMHost = (int) 11L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.kDLROCMHost = 11
+   * }
+   */
+  public static int kDLROCMHost() {
+    return kDLROCMHost;
+  }
+
+  private static final int kDLExtDev = (int) 12L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.kDLExtDev = 12
+   * }
+   */
+  public static int kDLExtDev() {
+    return kDLExtDev;
+  }
+
+  private static final int kDLCUDAManaged = (int) 13L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.kDLCUDAManaged = 13
+   * }
+   */
+  public static int kDLCUDAManaged() {
+    return kDLCUDAManaged;
+  }
+
+  private static final int kDLOneAPI = (int) 14L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.kDLOneAPI = 14
+   * }
+   */
+  public static int kDLOneAPI() {
+    return kDLOneAPI;
+  }
+
+  private static final int kDLWebGPU = (int) 15L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.kDLWebGPU = 15
+   * }
+   */
+  public static int kDLWebGPU() {
+    return kDLWebGPU;
+  }
+
+  private static final int kDLHexagon = (int) 16L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.kDLHexagon = 16
+   * }
+   */
+  public static int kDLHexagon() {
+    return kDLHexagon;
+  }
+
+  private static final int kDLInt = (int) 0L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.kDLInt = 0
+   * }
+   */
+  public static int kDLInt() {
+    return kDLInt;
+  }
+
+  private static final int kDLUInt = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.kDLUInt = 1
+   * }
+   */
+  public static int kDLUInt() {
+    return kDLUInt;
+  }
+
+  private static final int kDLFloat = (int) 2L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.kDLFloat = 2
+   * }
+   */
+  public static int kDLFloat() {
+    return kDLFloat;
+  }
+
+  private static final int kDLOpaqueHandle = (int) 3L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.kDLOpaqueHandle = 3
+   * }
+   */
+  public static int kDLOpaqueHandle() {
+    return kDLOpaqueHandle;
+  }
+
+  private static final int kDLBfloat = (int) 4L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.kDLBfloat = 4
+   * }
+   */
+  public static int kDLBfloat() {
+    return kDLBfloat;
+  }
+
+  private static final int kDLComplex = (int) 5L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.kDLComplex = 5
+   * }
+   */
+  public static int kDLComplex() {
+    return kDLComplex;
+  }
+
+  private static final int kDLBool = (int) 6L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.kDLBool = 6
+   * }
+   */
+  public static int kDLBool() {
+    return kDLBool;
+  }
+
+  private static final int AUTO_SELECT = (int) 0L;
+
+  /**
+   * {@snippet lang = c : * enum cuvsCagraGraphBuildAlgo.AUTO_SELECT = 0
+   * }
+   */
+  public static int AUTO_SELECT() {
+    return AUTO_SELECT;
+  }
+
+  private static final int IVF_PQ = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * enum cuvsCagraGraphBuildAlgo.IVF_PQ = 1
+   * }
+   */
+  public static int IVF_PQ() {
+    return IVF_PQ;
+  }
+
+  private static final int NN_DESCENT = (int) 2L;
+
+  /**
+   * {@snippet lang = c : * enum cuvsCagraGraphBuildAlgo.NN_DESCENT = 2
+   * }
+   */
+  public static int NN_DESCENT() {
+    return NN_DESCENT;
+  }
+
+  /**
+   * {@snippet lang = c :
+   * typedef struct cuvsCagraCompressionParams {
+   *     uint32_t pq_bits;
+   *     uint32_t pq_dim;
+   *     uint32_t vq_n_centers;
+   *     uint32_t kmeans_n_iters;
+   *     double vq_kmeans_trainset_fraction;
+   *     double pq_kmeans_trainset_fraction;
+   * } *cuvsCagraCompressionParams_t
+   * }
+   */
+  public static final AddressLayout cuvsCagraCompressionParams_t = HnswH.C_POINTER;
+  /**
+   * {@snippet lang = c :
+   * typedef struct cuvsCagraIndexParams {
+   *     cuvsDistanceType metric;
+   *     long intermediate_graph_degree;
+   *     long graph_degree;
+   *     enum cuvsCagraGraphBuildAlgo build_algo;
+   *     long nn_descent_niter;
+   *     cuvsCagraCompressionParams_t compression;
+   * } *cuvsCagraIndexParams_t
+   * }
+   */
+  public static final AddressLayout cuvsCagraIndexParams_t = HnswH.C_POINTER;
+  private static final int SINGLE_CTA = (int) 0L;
+
+  /**
+   * {@snippet lang = c : * enum cuvsCagraSearchAlgo.SINGLE_CTA = 0
+   * }
+   */
+  public static int SINGLE_CTA() {
+    return SINGLE_CTA;
+  }
+
+  private static final int MULTI_CTA = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * enum cuvsCagraSearchAlgo.MULTI_CTA = 1
+   * }
+   */
+  public static int MULTI_CTA() {
+    return MULTI_CTA;
+  }
+
+  private static final int MULTI_KERNEL = (int) 2L;
+
+  /**
+   * {@snippet lang = c : * enum cuvsCagraSearchAlgo.MULTI_KERNEL = 2
+   * }
+   */
+  public static int MULTI_KERNEL() {
+    return MULTI_KERNEL;
+  }
+
+  private static final int AUTO = (int) 3L;
+
+  /**
+   * {@snippet lang = c : * enum cuvsCagraSearchAlgo.AUTO = 3
+   * }
+   */
+  public static int AUTO() {
+    return AUTO;
+  }
+
+  private static final int HASH = (int) 0L;
+
+  /**
+   * {@snippet lang = c : * enum cuvsCagraHashMode.HASH = 0
+   * }
+   */
+  public static int HASH() {
+    return HASH;
+  }
+
+  private static final int SMALL = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * enum cuvsCagraHashMode.SMALL = 1
+   * }
+   */
+  public static int SMALL() {
+    return SMALL;
+  }
+
+  private static final int AUTO_HASH = (int) 2L;
+
+  /**
+   * {@snippet lang = c : * enum cuvsCagraHashMode.AUTO_HASH = 2
+   * }
+   */
+  public static int AUTO_HASH() {
+    return AUTO_HASH;
+  }
+
+  /**
+   * {@snippet lang = c :
+   * typedef struct cuvsCagraSearchParams {
+   *     long max_queries;
+   *     long itopk_size;
+   *     long max_iterations;
+   *     enum cuvsCagraSearchAlgo algo;
+   *     long team_size;
+   *     long search_width;
+   *     long min_iterations;
+   *     long thread_block_size;
+   *     enum cuvsCagraHashMode hashmap_mode;
+   *     long hashmap_min_bitlen;
+   *     float hashmap_max_fill_rate;
+   *     uint32_t num_random_samplings;
+   *     uint64_t rand_xor_mask;
+   * } *cuvsCagraSearchParams_t
+   * }
+   */
+  public static final AddressLayout cuvsCagraSearchParams_t = HnswH.C_POINTER;
+  /**
+   * {@snippet lang = c : * typedef cuvsCagraIndex *cuvsCagraIndex_t
+   * }
+   */
+  public static final AddressLayout cuvsCagraIndex_t = HnswH.C_POINTER;
+  private static final int NONE = (int) 0L;
+
+  /**
+   * {@snippet lang = c : * enum cuvsHnswHierarchy.NONE = 0
+   * }
+   */
+  public static int NONE() {
+    return NONE;
+  }
+
+  private static final int CPU = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * enum cuvsHnswHierarchy.CPU = 1
+   * }
+   */
+  public static int CPU() {
+    return CPU;
+  }
+
+  /**
+   * {@snippet lang = c :
+   * typedef struct cuvsHnswIndexParams {
+   *     cuvsHnswHierarchy hierarchy;
+   *     int ef_construction;
+   *     int num_threads;
+   * } *cuvsHnswIndexParams_t
+   * }
+   */
+  public static final AddressLayout cuvsHnswIndexParams_t = HnswH.C_POINTER;
+  /**
+   * {@snippet lang = c : * typedef cuvsHnswIndex *cuvsHnswIndex_t
+   * }
+   */
+  public static final AddressLayout cuvsHnswIndex_t = HnswH.C_POINTER;
+  /**
+   * {@snippet lang = c :
+   * typedef struct cuvsHnswExtendParams {
+   *     int num_threads;
+   * } *cuvsHnswExtendParams_t
+   * }
+   */
+  public static final AddressLayout cuvsHnswExtendParams_t = HnswH.C_POINTER;
+  /**
+   * {@snippet lang = c :
+   * typedef struct cuvsHnswSearchParams {
+   *     int32_t ef;
+   *     int32_t num_threads;
+   * } *cuvsHnswSearchParams_t
+   * }
+   */
+  public static final AddressLayout cuvsHnswSearchParams_t = HnswH.C_POINTER;
+  private static final long _POSIX_C_SOURCE = 200809L;
+
+  /**
+   * {@snippet lang = c : * #define _POSIX_C_SOURCE 200809
+   * }
+   */
+  public static long _POSIX_C_SOURCE() {
+    return _POSIX_C_SOURCE;
+  }
+
+  private static final int __TIMESIZE = (int) 64L;
+
+  /**
+   * {@snippet lang = c : * #define __TIMESIZE 64
+   * }
+   */
+  public static int __TIMESIZE() {
+    return __TIMESIZE;
+  }
+
+  private static final long __STDC_IEC_60559_BFP__ = 201404L;
+
+  /**
+   * {@snippet lang = c : * #define __STDC_IEC_60559_BFP__ 201404
+   * }
+   */
+  public static long __STDC_IEC_60559_BFP__() {
+    return __STDC_IEC_60559_BFP__;
+  }
+
+  private static final long __STDC_IEC_60559_COMPLEX__ = 201404L;
+
+  /**
+   * {@snippet lang = c : * #define __STDC_IEC_60559_COMPLEX__ 201404
+   * }
+   */
+  public static long __STDC_IEC_60559_COMPLEX__() {
+    return __STDC_IEC_60559_COMPLEX__;
+  }
+
+  private static final long __STDC_ISO_10646__ = 201706L;
+
+  /**
+   * {@snippet lang = c : * #define __STDC_ISO_10646__ 201706
+   * }
+   */
+  public static long __STDC_ISO_10646__() {
+    return __STDC_ISO_10646__;
+  }
+
+  private static final int __WCHAR_MAX = (int) 2147483647L;
+
+  /**
+   * {@snippet lang = c : * #define __WCHAR_MAX 2147483647
+   * }
+   */
+  public static int __WCHAR_MAX() {
+    return __WCHAR_MAX;
+  }
+
+  private static final int __WCHAR_MIN = (int) -2147483648L;
+
+  /**
+   * {@snippet lang = c : * #define __WCHAR_MIN -2147483648
+   * }
+   */
+  public static int __WCHAR_MIN() {
+    return __WCHAR_MIN;
+  }
+
+  private static final int INT8_MIN = (int) -128L;
+
+  /**
+   * {@snippet lang = c : * #define INT8_MIN -128
+   * }
+   */
+  public static int INT8_MIN() {
+    return INT8_MIN;
+  }
+
+  private static final int INT16_MIN = (int) -32768L;
+
+  /**
+   * {@snippet lang = c : * #define INT16_MIN -32768
+   * }
+   */
+  public static int INT16_MIN() {
+    return INT16_MIN;
+  }
+
+  private static final int INT32_MIN = (int) -2147483648L;
+
+  /**
+   * {@snippet lang = c : * #define INT32_MIN -2147483648
+   * }
+   */
+  public static int INT32_MIN() {
+    return INT32_MIN;
+  }
+
+  private static final long INT64_MIN = -9223372036854775808L;
+
+  /**
+   * {@snippet lang = c : * #define INT64_MIN -9223372036854775808
+   * }
+   */
+  public static long INT64_MIN() {
+    return INT64_MIN;
+  }
+
+  private static final int INT8_MAX = (int) 127L;
+
+  /**
+   * {@snippet lang = c : * #define INT8_MAX 127
+   * }
+   */
+  public static int INT8_MAX() {
+    return INT8_MAX;
+  }
+
+  private static final int INT16_MAX = (int) 32767L;
+
+  /**
+   * {@snippet lang = c : * #define INT16_MAX 32767
+   * }
+   */
+  public static int INT16_MAX() {
+    return INT16_MAX;
+  }
+
+  private static final int INT32_MAX = (int) 2147483647L;
+
+  /**
+   * {@snippet lang = c : * #define INT32_MAX 2147483647
+   * }
+   */
+  public static int INT32_MAX() {
+    return INT32_MAX;
+  }
+
+  private static final long INT64_MAX = 9223372036854775807L;
+
+  /**
+   * {@snippet lang = c : * #define INT64_MAX 9223372036854775807
+   * }
+   */
+  public static long INT64_MAX() {
+    return INT64_MAX;
+  }
+
+  private static final int UINT8_MAX = (int) 255L;
+
+  /**
+   * {@snippet lang = c : * #define UINT8_MAX 255
+   * }
+   */
+  public static int UINT8_MAX() {
+    return UINT8_MAX;
+  }
+
+  private static final int UINT16_MAX = (int) 65535L;
+
+  /**
+   * {@snippet lang = c : * #define UINT16_MAX 65535
+   * }
+   */
+  public static int UINT16_MAX() {
+    return UINT16_MAX;
+  }
+
+  private static final int UINT32_MAX = (int) 4294967295L;
+
+  /**
+   * {@snippet lang = c : * #define UINT32_MAX 4294967295
+   * }
+   */
+  public static int UINT32_MAX() {
+    return UINT32_MAX;
+  }
+
+  private static final long UINT64_MAX = -1L;
+
+  /**
+   * {@snippet lang = c : * #define UINT64_MAX -1
+   * }
+   */
+  public static long UINT64_MAX() {
+    return UINT64_MAX;
+  }
+
+  private static final int INT_LEAST8_MIN = (int) -128L;
+
+  /**
+   * {@snippet lang = c : * #define INT_LEAST8_MIN -128
+   * }
+   */
+  public static int INT_LEAST8_MIN() {
+    return INT_LEAST8_MIN;
+  }
+
+  private static final int INT_LEAST16_MIN = (int) -32768L;
+
+  /**
+   * {@snippet lang = c : * #define INT_LEAST16_MIN -32768
+   * }
+   */
+  public static int INT_LEAST16_MIN() {
+    return INT_LEAST16_MIN;
+  }
+
+  private static final int INT_LEAST32_MIN = (int) -2147483648L;
+
+  /**
+   * {@snippet lang = c : * #define INT_LEAST32_MIN -2147483648
+   * }
+   */
+  public static int INT_LEAST32_MIN() {
+    return INT_LEAST32_MIN;
+  }
+
+  private static final long INT_LEAST64_MIN = -9223372036854775808L;
+
+  /**
+   * {@snippet lang = c : * #define INT_LEAST64_MIN -9223372036854775808
+   * }
+   */
+  public static long INT_LEAST64_MIN() {
+    return INT_LEAST64_MIN;
+  }
+
+  private static final int INT_LEAST8_MAX = (int) 127L;
+
+  /**
+   * {@snippet lang = c : * #define INT_LEAST8_MAX 127
+   * }
+   */
+  public static int INT_LEAST8_MAX() {
+    return INT_LEAST8_MAX;
+  }
+
+  private static final int INT_LEAST16_MAX = (int) 32767L;
+
+  /**
+   * {@snippet lang = c : * #define INT_LEAST16_MAX 32767
+   * }
+   */
+  public static int INT_LEAST16_MAX() {
+    return INT_LEAST16_MAX;
+  }
+
+  private static final int INT_LEAST32_MAX = (int) 2147483647L;
+
+  /**
+   * {@snippet lang = c : * #define INT_LEAST32_MAX 2147483647
+   * }
+   */
+  public static int INT_LEAST32_MAX() {
+    return INT_LEAST32_MAX;
+  }
+
+  private static final long INT_LEAST64_MAX = 9223372036854775807L;
+
+  /**
+   * {@snippet lang = c : * #define INT_LEAST64_MAX 9223372036854775807
+   * }
+   */
+  public static long INT_LEAST64_MAX() {
+    return INT_LEAST64_MAX;
+  }
+
+  private static final int UINT_LEAST8_MAX = (int) 255L;
+
+  /**
+   * {@snippet lang = c : * #define UINT_LEAST8_MAX 255
+   * }
+   */
+  public static int UINT_LEAST8_MAX() {
+    return UINT_LEAST8_MAX;
+  }
+
+  private static final int UINT_LEAST16_MAX = (int) 65535L;
+
+  /**
+   * {@snippet lang = c : * #define UINT_LEAST16_MAX 65535
+   * }
+   */
+  public static int UINT_LEAST16_MAX() {
+    return UINT_LEAST16_MAX;
+  }
+
+  private static final int UINT_LEAST32_MAX = (int) 4294967295L;
+
+  /**
+   * {@snippet lang = c : * #define UINT_LEAST32_MAX 4294967295
+   * }
+   */
+  public static int UINT_LEAST32_MAX() {
+    return UINT_LEAST32_MAX;
+  }
+
+  private static final long UINT_LEAST64_MAX = -1L;
+
+  /**
+   * {@snippet lang = c : * #define UINT_LEAST64_MAX -1
+   * }
+   */
+  public static long UINT_LEAST64_MAX() {
+    return UINT_LEAST64_MAX;
+  }
+
+  private static final int INT_FAST8_MIN = (int) -128L;
+
+  /**
+   * {@snippet lang = c : * #define INT_FAST8_MIN -128
+   * }
+   */
+  public static int INT_FAST8_MIN() {
+    return INT_FAST8_MIN;
+  }
+
+  private static final long INT_FAST16_MIN = -9223372036854775808L;
+
+  /**
+   * {@snippet lang = c : * #define INT_FAST16_MIN -9223372036854775808
+   * }
+   */
+  public static long INT_FAST16_MIN() {
+    return INT_FAST16_MIN;
+  }
+
+  private static final long INT_FAST32_MIN = -9223372036854775808L;
+
+  /**
+   * {@snippet lang = c : * #define INT_FAST32_MIN -9223372036854775808
+   * }
+   */
+  public static long INT_FAST32_MIN() {
+    return INT_FAST32_MIN;
+  }
+
+  private static final long INT_FAST64_MIN = -9223372036854775808L;
+
+  /**
+   * {@snippet lang = c : * #define INT_FAST64_MIN -9223372036854775808
+   * }
+   */
+  public static long INT_FAST64_MIN() {
+    return INT_FAST64_MIN;
+  }
+
+  private static final int INT_FAST8_MAX = (int) 127L;
+
+  /**
+   * {@snippet lang = c : * #define INT_FAST8_MAX 127
+   * }
+   */
+  public static int INT_FAST8_MAX() {
+    return INT_FAST8_MAX;
+  }
+
+  private static final long INT_FAST16_MAX = 9223372036854775807L;
+
+  /**
+   * {@snippet lang = c : * #define INT_FAST16_MAX 9223372036854775807
+   * }
+   */
+  public static long INT_FAST16_MAX() {
+    return INT_FAST16_MAX;
+  }
+
+  private static final long INT_FAST32_MAX = 9223372036854775807L;
+
+  /**
+   * {@snippet lang = c : * #define INT_FAST32_MAX 9223372036854775807
+   * }
+   */
+  public static long INT_FAST32_MAX() {
+    return INT_FAST32_MAX;
+  }
+
+  private static final long INT_FAST64_MAX = 9223372036854775807L;
+
+  /**
+   * {@snippet lang = c : * #define INT_FAST64_MAX 9223372036854775807
+   * }
+   */
+  public static long INT_FAST64_MAX() {
+    return INT_FAST64_MAX;
+  }
+
+  private static final int UINT_FAST8_MAX = (int) 255L;
+
+  /**
+   * {@snippet lang = c : * #define UINT_FAST8_MAX 255
+   * }
+   */
+  public static int UINT_FAST8_MAX() {
+    return UINT_FAST8_MAX;
+  }
+
+  private static final long UINT_FAST16_MAX = -1L;
+
+  /**
+   * {@snippet lang = c : * #define UINT_FAST16_MAX -1
+   * }
+   */
+  public static long UINT_FAST16_MAX() {
+    return UINT_FAST16_MAX;
+  }
+
+  private static final long UINT_FAST32_MAX = -1L;
+
+  /**
+   * {@snippet lang = c : * #define UINT_FAST32_MAX -1
+   * }
+   */
+  public static long UINT_FAST32_MAX() {
+    return UINT_FAST32_MAX;
+  }
+
+  private static final long UINT_FAST64_MAX = -1L;
+
+  /**
+   * {@snippet lang = c : * #define UINT_FAST64_MAX -1
+   * }
+   */
+  public static long UINT_FAST64_MAX() {
+    return UINT_FAST64_MAX;
+  }
+
+  private static final long INTPTR_MIN = -9223372036854775808L;
+
+  /**
+   * {@snippet lang = c : * #define INTPTR_MIN -9223372036854775808
+   * }
+   */
+  public static long INTPTR_MIN() {
+    return INTPTR_MIN;
+  }
+
+  private static final long INTPTR_MAX = 9223372036854775807L;
+
+  /**
+   * {@snippet lang = c : * #define INTPTR_MAX 9223372036854775807
+   * }
+   */
+  public static long INTPTR_MAX() {
+    return INTPTR_MAX;
+  }
+
+  private static final long UINTPTR_MAX = -1L;
+
+  /**
+   * {@snippet lang = c : * #define UINTPTR_MAX -1
+   * }
+   */
+  public static long UINTPTR_MAX() {
+    return UINTPTR_MAX;
+  }
+
+  private static final long INTMAX_MIN = -9223372036854775808L;
+
+  /**
+   * {@snippet lang = c : * #define INTMAX_MIN -9223372036854775808
+   * }
+   */
+  public static long INTMAX_MIN() {
+    return INTMAX_MIN;
+  }
+
+  private static final long INTMAX_MAX = 9223372036854775807L;
+
+  /**
+   * {@snippet lang = c : * #define INTMAX_MAX 9223372036854775807
+   * }
+   */
+  public static long INTMAX_MAX() {
+    return INTMAX_MAX;
+  }
+
+  private static final long UINTMAX_MAX = -1L;
+
+  /**
+   * {@snippet lang = c : * #define UINTMAX_MAX -1
+   * }
+   */
+  public static long UINTMAX_MAX() {
+    return UINTMAX_MAX;
+  }
+
+  private static final long PTRDIFF_MIN = -9223372036854775808L;
+
+  /**
+   * {@snippet lang = c : * #define PTRDIFF_MIN -9223372036854775808
+   * }
+   */
+  public static long PTRDIFF_MIN() {
+    return PTRDIFF_MIN;
+  }
+
+  private static final long PTRDIFF_MAX = 9223372036854775807L;
+
+  /**
+   * {@snippet lang = c : * #define PTRDIFF_MAX 9223372036854775807
+   * }
+   */
+  public static long PTRDIFF_MAX() {
+    return PTRDIFF_MAX;
+  }
+
+  private static final int SIG_ATOMIC_MIN = (int) -2147483648L;
+
+  /**
+   * {@snippet lang = c : * #define SIG_ATOMIC_MIN -2147483648
+   * }
+   */
+  public static int SIG_ATOMIC_MIN() {
+    return SIG_ATOMIC_MIN;
+  }
+
+  private static final int SIG_ATOMIC_MAX = (int) 2147483647L;
+
+  /**
+   * {@snippet lang = c : * #define SIG_ATOMIC_MAX 2147483647
+   * }
+   */
+  public static int SIG_ATOMIC_MAX() {
+    return SIG_ATOMIC_MAX;
+  }
+
+  private static final long SIZE_MAX = -1L;
+
+  /**
+   * {@snippet lang = c : * #define SIZE_MAX -1
+   * }
+   */
+  public static long SIZE_MAX() {
+    return SIZE_MAX;
+  }
+
+  private static final int WCHAR_MIN = (int) -2147483648L;
+
+  /**
+   * {@snippet lang = c : * #define WCHAR_MIN -2147483648
+   * }
+   */
+  public static int WCHAR_MIN() {
+    return WCHAR_MIN;
+  }
+
+  private static final int WCHAR_MAX = (int) 2147483647L;
+
+  /**
+   * {@snippet lang = c : * #define WCHAR_MAX 2147483647
+   * }
+   */
+  public static int WCHAR_MAX() {
+    return WCHAR_MAX;
+  }
+
+  private static final int WINT_MIN = (int) 0L;
+
+  /**
+   * {@snippet lang = c : * #define WINT_MIN 0
+   * }
+   */
+  public static int WINT_MIN() {
+    return WINT_MIN;
+  }
+
+  private static final int WINT_MAX = (int) 4294967295L;
+
+  /**
+   * {@snippet lang = c : * #define WINT_MAX 4294967295
+   * }
+   */
+  public static int WINT_MAX() {
+    return WINT_MAX;
+  }
+
+  private static final MemorySegment NULL = MemorySegment.ofAddress(0L);
+
+  /**
+   * {@snippet lang = c : * #define NULL (void*) 0
+   * }
+   */
+  public static MemorySegment NULL() {
+    return NULL;
+  }
+}
diff --git a/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/IvfFlatH.java b/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/IvfFlatH.java
new file mode 100644
index 000000000..47353cc9e
--- /dev/null
+++ b/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/IvfFlatH.java
@@ -0,0 +1,2845 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.nvidia.cuvs.panama;
+
+import static java.lang.foreign.ValueLayout.JAVA_BYTE;
+
+import java.lang.foreign.AddressLayout;
+import java.lang.foreign.Arena;
+import java.lang.foreign.FunctionDescriptor;
+import java.lang.foreign.GroupLayout;
+import java.lang.foreign.Linker;
+import java.lang.foreign.MemoryLayout;
+import java.lang.foreign.MemorySegment;
+import java.lang.foreign.PaddingLayout;
+import java.lang.foreign.SequenceLayout;
+import java.lang.foreign.StructLayout;
+import java.lang.foreign.SymbolLookup;
+import java.lang.foreign.ValueLayout;
+import java.lang.foreign.ValueLayout.OfByte;
+import java.lang.foreign.ValueLayout.OfInt;
+import java.lang.foreign.ValueLayout.OfLong;
+import java.lang.foreign.ValueLayout.OfShort;
+import java.lang.invoke.MethodHandle;
+import java.lang.invoke.MethodHandles;
+import java.util.Arrays;
+import java.util.stream.Collectors;
+
+public class IvfFlatH {
+
+  IvfFlatH() {
+    // Should not be called directly
+  }
+
+  static final Arena LIBRARY_ARENA = Arena.ofAuto();
+  static final boolean TRACE_DOWNCALLS = Boolean.getBoolean("jextract.trace.downcalls");
+
+  static void traceDowncall(String name, Object... args) {
+    String traceArgs = Arrays.stream(args).map(Object::toString).collect(Collectors.joining(", "));
+    System.out.printf("%s(%s)\n", name, traceArgs);
+  }
+
+  static MemorySegment findOrThrow(String symbol) {
+    return SYMBOL_LOOKUP.find(symbol).orElseThrow(() -> new UnsatisfiedLinkError("unresolved symbol: " + symbol));
+  }
+
+  static MethodHandle upcallHandle(Class<?> fi, String name, FunctionDescriptor fdesc) {
+    try {
+      return MethodHandles.lookup().findVirtual(fi, name, fdesc.toMethodType());
+    } catch (ReflectiveOperationException ex) {
+      throw new AssertionError(ex);
+    }
+  }
+
+  static MemoryLayout align(MemoryLayout layout, long align) {
+    return switch (layout) {
+    case PaddingLayout p -> p;
+    case ValueLayout v -> v.withByteAlignment(align);
+    case GroupLayout g -> {
+      MemoryLayout[] alignedMembers = g.memberLayouts().stream().map(m -> align(m, align)).toArray(MemoryLayout[]::new);
+      yield g instanceof StructLayout ? MemoryLayout.structLayout(alignedMembers)
+          : MemoryLayout.unionLayout(alignedMembers);
+    }
+    case SequenceLayout s -> MemoryLayout.sequenceLayout(s.elementCount(), align(s.elementLayout(), align));
+    };
+  }
+
+  static final SymbolLookup SYMBOL_LOOKUP = SymbolLookup.loaderLookup().or(Linker.nativeLinker().defaultLookup());
+
+  public static final ValueLayout.OfBoolean C_BOOL = ValueLayout.JAVA_BOOLEAN;
+  public static final ValueLayout.OfByte C_CHAR = ValueLayout.JAVA_BYTE;
+  public static final ValueLayout.OfShort C_SHORT = ValueLayout.JAVA_SHORT;
+  public static final ValueLayout.OfInt C_INT = ValueLayout.JAVA_INT;
+  public static final ValueLayout.OfLong C_LONG_LONG = ValueLayout.JAVA_LONG;
+  public static final ValueLayout.OfFloat C_FLOAT = ValueLayout.JAVA_FLOAT;
+  public static final ValueLayout.OfDouble C_DOUBLE = ValueLayout.JAVA_DOUBLE;
+  public static final AddressLayout C_POINTER = ValueLayout.ADDRESS
+      .withTargetLayout(MemoryLayout.sequenceLayout(java.lang.Long.MAX_VALUE, JAVA_BYTE));
+  public static final ValueLayout.OfLong C_LONG = ValueLayout.JAVA_LONG;
+  private static final int _STDINT_H = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define _STDINT_H 1
+   * }
+   */
+  public static int _STDINT_H() {
+    return _STDINT_H;
+  }
+
+  private static final int _FEATURES_H = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define _FEATURES_H 1
+   * }
+   */
+  public static int _FEATURES_H() {
+    return _FEATURES_H;
+  }
+
+  private static final int _DEFAULT_SOURCE = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define _DEFAULT_SOURCE 1
+   * }
+   */
+  public static int _DEFAULT_SOURCE() {
+    return _DEFAULT_SOURCE;
+  }
+
+  private static final int __GLIBC_USE_ISOC2X = (int) 0L;
+
+  /**
+   * {@snippet lang = c : * #define __GLIBC_USE_ISOC2X 0
+   * }
+   */
+  public static int __GLIBC_USE_ISOC2X() {
+    return __GLIBC_USE_ISOC2X;
+  }
+
+  private static final int __USE_ISOC11 = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __USE_ISOC11 1
+   * }
+   */
+  public static int __USE_ISOC11() {
+    return __USE_ISOC11;
+  }
+
+  private static final int __USE_ISOC99 = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __USE_ISOC99 1
+   * }
+   */
+  public static int __USE_ISOC99() {
+    return __USE_ISOC99;
+  }
+
+  private static final int __USE_ISOC95 = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __USE_ISOC95 1
+   * }
+   */
+  public static int __USE_ISOC95() {
+    return __USE_ISOC95;
+  }
+
+  private static final int __USE_POSIX_IMPLICITLY = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __USE_POSIX_IMPLICITLY 1
+   * }
+   */
+  public static int __USE_POSIX_IMPLICITLY() {
+    return __USE_POSIX_IMPLICITLY;
+  }
+
+  private static final int _POSIX_SOURCE = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define _POSIX_SOURCE 1
+   * }
+   */
+  public static int _POSIX_SOURCE() {
+    return _POSIX_SOURCE;
+  }
+
+  private static final int __USE_POSIX = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __USE_POSIX 1
+   * }
+   */
+  public static int __USE_POSIX() {
+    return __USE_POSIX;
+  }
+
+  private static final int __USE_POSIX2 = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __USE_POSIX2 1
+   * }
+   */
+  public static int __USE_POSIX2() {
+    return __USE_POSIX2;
+  }
+
+  private static final int __USE_POSIX199309 = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __USE_POSIX199309 1
+   * }
+   */
+  public static int __USE_POSIX199309() {
+    return __USE_POSIX199309;
+  }
+
+  private static final int __USE_POSIX199506 = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __USE_POSIX199506 1
+   * }
+   */
+  public static int __USE_POSIX199506() {
+    return __USE_POSIX199506;
+  }
+
+  private static final int __USE_XOPEN2K = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __USE_XOPEN2K 1
+   * }
+   */
+  public static int __USE_XOPEN2K() {
+    return __USE_XOPEN2K;
+  }
+
+  private static final int __USE_XOPEN2K8 = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __USE_XOPEN2K8 1
+   * }
+   */
+  public static int __USE_XOPEN2K8() {
+    return __USE_XOPEN2K8;
+  }
+
+  private static final int _ATFILE_SOURCE = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define _ATFILE_SOURCE 1
+   * }
+   */
+  public static int _ATFILE_SOURCE() {
+    return _ATFILE_SOURCE;
+  }
+
+  private static final int __WORDSIZE = (int) 64L;
+
+  /**
+   * {@snippet lang = c : * #define __WORDSIZE 64
+   * }
+   */
+  public static int __WORDSIZE() {
+    return __WORDSIZE;
+  }
+
+  private static final int __WORDSIZE_TIME64_COMPAT32 = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __WORDSIZE_TIME64_COMPAT32 1
+   * }
+   */
+  public static int __WORDSIZE_TIME64_COMPAT32() {
+    return __WORDSIZE_TIME64_COMPAT32;
+  }
+
+  private static final int __SYSCALL_WORDSIZE = (int) 64L;
+
+  /**
+   * {@snippet lang = c : * #define __SYSCALL_WORDSIZE 64
+   * }
+   */
+  public static int __SYSCALL_WORDSIZE() {
+    return __SYSCALL_WORDSIZE;
+  }
+
+  private static final int __USE_MISC = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __USE_MISC 1
+   * }
+   */
+  public static int __USE_MISC() {
+    return __USE_MISC;
+  }
+
+  private static final int __USE_ATFILE = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __USE_ATFILE 1
+   * }
+   */
+  public static int __USE_ATFILE() {
+    return __USE_ATFILE;
+  }
+
+  private static final int __USE_FORTIFY_LEVEL = (int) 0L;
+
+  /**
+   * {@snippet lang = c : * #define __USE_FORTIFY_LEVEL 0
+   * }
+   */
+  public static int __USE_FORTIFY_LEVEL() {
+    return __USE_FORTIFY_LEVEL;
+  }
+
+  private static final int __GLIBC_USE_DEPRECATED_GETS = (int) 0L;
+
+  /**
+   * {@snippet lang = c : * #define __GLIBC_USE_DEPRECATED_GETS 0
+   * }
+   */
+  public static int __GLIBC_USE_DEPRECATED_GETS() {
+    return __GLIBC_USE_DEPRECATED_GETS;
+  }
+
+  private static final int __GLIBC_USE_DEPRECATED_SCANF = (int) 0L;
+
+  /**
+   * {@snippet lang = c : * #define __GLIBC_USE_DEPRECATED_SCANF 0
+   * }
+   */
+  public static int __GLIBC_USE_DEPRECATED_SCANF() {
+    return __GLIBC_USE_DEPRECATED_SCANF;
+  }
+
+  private static final int _STDC_PREDEF_H = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define _STDC_PREDEF_H 1
+   * }
+   */
+  public static int _STDC_PREDEF_H() {
+    return _STDC_PREDEF_H;
+  }
+
+  private static final int __STDC_IEC_559__ = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __STDC_IEC_559__ 1
+   * }
+   */
+  public static int __STDC_IEC_559__() {
+    return __STDC_IEC_559__;
+  }
+
+  private static final int __STDC_IEC_559_COMPLEX__ = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __STDC_IEC_559_COMPLEX__ 1
+   * }
+   */
+  public static int __STDC_IEC_559_COMPLEX__() {
+    return __STDC_IEC_559_COMPLEX__;
+  }
+
+  private static final int __GNU_LIBRARY__ = (int) 6L;
+
+  /**
+   * {@snippet lang = c : * #define __GNU_LIBRARY__ 6
+   * }
+   */
+  public static int __GNU_LIBRARY__() {
+    return __GNU_LIBRARY__;
+  }
+
+  private static final int __GLIBC__ = (int) 2L;
+
+  /**
+   * {@snippet lang = c : * #define __GLIBC__ 2
+   * }
+   */
+  public static int __GLIBC__() {
+    return __GLIBC__;
+  }
+
+  private static final int __GLIBC_MINOR__ = (int) 35L;
+
+  /**
+   * {@snippet lang = c : * #define __GLIBC_MINOR__ 35
+   * }
+   */
+  public static int __GLIBC_MINOR__() {
+    return __GLIBC_MINOR__;
+  }
+
+  private static final int _SYS_CDEFS_H = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define _SYS_CDEFS_H 1
+   * }
+   */
+  public static int _SYS_CDEFS_H() {
+    return _SYS_CDEFS_H;
+  }
+
+  private static final int __glibc_c99_flexarr_available = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __glibc_c99_flexarr_available 1
+   * }
+   */
+  public static int __glibc_c99_flexarr_available() {
+    return __glibc_c99_flexarr_available;
+  }
+
+  private static final int __LDOUBLE_REDIRECTS_TO_FLOAT128_ABI = (int) 0L;
+
+  /**
+   * {@snippet lang = c : * #define __LDOUBLE_REDIRECTS_TO_FLOAT128_ABI 0
+   * }
+   */
+  public static int __LDOUBLE_REDIRECTS_TO_FLOAT128_ABI() {
+    return __LDOUBLE_REDIRECTS_TO_FLOAT128_ABI;
+  }
+
+  private static final int __HAVE_GENERIC_SELECTION = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __HAVE_GENERIC_SELECTION 1
+   * }
+   */
+  public static int __HAVE_GENERIC_SELECTION() {
+    return __HAVE_GENERIC_SELECTION;
+  }
+
+  private static final int __GLIBC_USE_LIB_EXT2 = (int) 0L;
+
+  /**
+   * {@snippet lang = c : * #define __GLIBC_USE_LIB_EXT2 0
+   * }
+   */
+  public static int __GLIBC_USE_LIB_EXT2() {
+    return __GLIBC_USE_LIB_EXT2;
+  }
+
+  private static final int __GLIBC_USE_IEC_60559_BFP_EXT = (int) 0L;
+
+  /**
+   * {@snippet lang = c : * #define __GLIBC_USE_IEC_60559_BFP_EXT 0
+   * }
+   */
+  public static int __GLIBC_USE_IEC_60559_BFP_EXT() {
+    return __GLIBC_USE_IEC_60559_BFP_EXT;
+  }
+
+  private static final int __GLIBC_USE_IEC_60559_BFP_EXT_C2X = (int) 0L;
+
+  /**
+   * {@snippet lang = c : * #define __GLIBC_USE_IEC_60559_BFP_EXT_C2X 0
+   * }
+   */
+  public static int __GLIBC_USE_IEC_60559_BFP_EXT_C2X() {
+    return __GLIBC_USE_IEC_60559_BFP_EXT_C2X;
+  }
+
+  private static final int __GLIBC_USE_IEC_60559_EXT = (int) 0L;
+
+  /**
+   * {@snippet lang = c : * #define __GLIBC_USE_IEC_60559_EXT 0
+   * }
+   */
+  public static int __GLIBC_USE_IEC_60559_EXT() {
+    return __GLIBC_USE_IEC_60559_EXT;
+  }
+
+  private static final int __GLIBC_USE_IEC_60559_FUNCS_EXT = (int) 0L;
+
+  /**
+   * {@snippet lang = c : * #define __GLIBC_USE_IEC_60559_FUNCS_EXT 0
+   * }
+   */
+  public static int __GLIBC_USE_IEC_60559_FUNCS_EXT() {
+    return __GLIBC_USE_IEC_60559_FUNCS_EXT;
+  }
+
+  private static final int __GLIBC_USE_IEC_60559_FUNCS_EXT_C2X = (int) 0L;
+
+  /**
+   * {@snippet lang = c : * #define __GLIBC_USE_IEC_60559_FUNCS_EXT_C2X 0
+   * }
+   */
+  public static int __GLIBC_USE_IEC_60559_FUNCS_EXT_C2X() {
+    return __GLIBC_USE_IEC_60559_FUNCS_EXT_C2X;
+  }
+
+  private static final int __GLIBC_USE_IEC_60559_TYPES_EXT = (int) 0L;
+
+  /**
+   * {@snippet lang = c : * #define __GLIBC_USE_IEC_60559_TYPES_EXT 0
+   * }
+   */
+  public static int __GLIBC_USE_IEC_60559_TYPES_EXT() {
+    return __GLIBC_USE_IEC_60559_TYPES_EXT;
+  }
+
+  private static final int _BITS_TYPES_H = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define _BITS_TYPES_H 1
+   * }
+   */
+  public static int _BITS_TYPES_H() {
+    return _BITS_TYPES_H;
+  }
+
+  private static final int _BITS_TYPESIZES_H = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define _BITS_TYPESIZES_H 1
+   * }
+   */
+  public static int _BITS_TYPESIZES_H() {
+    return _BITS_TYPESIZES_H;
+  }
+
+  private static final int __OFF_T_MATCHES_OFF64_T = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __OFF_T_MATCHES_OFF64_T 1
+   * }
+   */
+  public static int __OFF_T_MATCHES_OFF64_T() {
+    return __OFF_T_MATCHES_OFF64_T;
+  }
+
+  private static final int __INO_T_MATCHES_INO64_T = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __INO_T_MATCHES_INO64_T 1
+   * }
+   */
+  public static int __INO_T_MATCHES_INO64_T() {
+    return __INO_T_MATCHES_INO64_T;
+  }
+
+  private static final int __RLIM_T_MATCHES_RLIM64_T = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __RLIM_T_MATCHES_RLIM64_T 1
+   * }
+   */
+  public static int __RLIM_T_MATCHES_RLIM64_T() {
+    return __RLIM_T_MATCHES_RLIM64_T;
+  }
+
+  private static final int __STATFS_MATCHES_STATFS64 = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __STATFS_MATCHES_STATFS64 1
+   * }
+   */
+  public static int __STATFS_MATCHES_STATFS64() {
+    return __STATFS_MATCHES_STATFS64;
+  }
+
+  private static final int __KERNEL_OLD_TIMEVAL_MATCHES_TIMEVAL64 = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __KERNEL_OLD_TIMEVAL_MATCHES_TIMEVAL64 1
+   * }
+   */
+  public static int __KERNEL_OLD_TIMEVAL_MATCHES_TIMEVAL64() {
+    return __KERNEL_OLD_TIMEVAL_MATCHES_TIMEVAL64;
+  }
+
+  private static final int __FD_SETSIZE = (int) 1024L;
+
+  /**
+   * {@snippet lang = c : * #define __FD_SETSIZE 1024
+   * }
+   */
+  public static int __FD_SETSIZE() {
+    return __FD_SETSIZE;
+  }
+
+  private static final int _BITS_TIME64_H = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define _BITS_TIME64_H 1
+   * }
+   */
+  public static int _BITS_TIME64_H() {
+    return _BITS_TIME64_H;
+  }
+
+  private static final int _BITS_WCHAR_H = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define _BITS_WCHAR_H 1
+   * }
+   */
+  public static int _BITS_WCHAR_H() {
+    return _BITS_WCHAR_H;
+  }
+
+  private static final int _BITS_STDINT_INTN_H = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define _BITS_STDINT_INTN_H 1
+   * }
+   */
+  public static int _BITS_STDINT_INTN_H() {
+    return _BITS_STDINT_INTN_H;
+  }
+
+  private static final int _BITS_STDINT_UINTN_H = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define _BITS_STDINT_UINTN_H 1
+   * }
+   */
+  public static int _BITS_STDINT_UINTN_H() {
+    return _BITS_STDINT_UINTN_H;
+  }
+
+  private static final int DLPACK_MAJOR_VERSION = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define DLPACK_MAJOR_VERSION 1
+   * }
+   */
+  public static int DLPACK_MAJOR_VERSION() {
+    return DLPACK_MAJOR_VERSION;
+  }
+
+  private static final int DLPACK_MINOR_VERSION = (int) 0L;
+
+  /**
+   * {@snippet lang = c : * #define DLPACK_MINOR_VERSION 0
+   * }
+   */
+  public static int DLPACK_MINOR_VERSION() {
+    return DLPACK_MINOR_VERSION;
+  }
+
+  private static final int true_ = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define true 1
+   * }
+   */
+  public static int true_() {
+    return true_;
+  }
+
+  private static final int false_ = (int) 0L;
+
+  /**
+   * {@snippet lang = c : * #define false 0
+   * }
+   */
+  public static int false_() {
+    return false_;
+  }
+
+  private static final int __bool_true_false_are_defined = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __bool_true_false_are_defined 1
+   * }
+   */
+  public static int __bool_true_false_are_defined() {
+    return __bool_true_false_are_defined;
+  }
+
+  /**
+   * {@snippet lang = c : * typedef unsigned char __u_char
+   * }
+   */
+  public static final OfByte __u_char = IvfFlatH.C_CHAR;
+  /**
+   * {@snippet lang = c : * typedef unsigned short __u_short
+   * }
+   */
+  public static final OfShort __u_short = IvfFlatH.C_SHORT;
+  /**
+   * {@snippet lang = c : * typedef unsigned int __u_int
+   * }
+   */
+  public static final OfInt __u_int = IvfFlatH.C_INT;
+  /**
+   * {@snippet lang = c : * typedef unsigned long __u_long
+   * }
+   */
+  public static final OfLong __u_long = IvfFlatH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef signed char __int8_t
+   * }
+   */
+  public static final OfByte __int8_t = IvfFlatH.C_CHAR;
+  /**
+   * {@snippet lang = c : * typedef unsigned char __uint8_t
+   * }
+   */
+  public static final OfByte __uint8_t = IvfFlatH.C_CHAR;
+  /**
+   * {@snippet lang = c : * typedef short __int16_t
+   * }
+   */
+  public static final OfShort __int16_t = IvfFlatH.C_SHORT;
+  /**
+   * {@snippet lang = c : * typedef unsigned short __uint16_t
+   * }
+   */
+  public static final OfShort __uint16_t = IvfFlatH.C_SHORT;
+  /**
+   * {@snippet lang = c : * typedef int __int32_t
+   * }
+   */
+  public static final OfInt __int32_t = IvfFlatH.C_INT;
+  /**
+   * {@snippet lang = c : * typedef unsigned int __uint32_t
+   * }
+   */
+  public static final OfInt __uint32_t = IvfFlatH.C_INT;
+  /**
+   * {@snippet lang = c : * typedef long __int64_t
+   * }
+   */
+  public static final OfLong __int64_t = IvfFlatH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned long __uint64_t
+   * }
+   */
+  public static final OfLong __uint64_t = IvfFlatH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef __int8_t __int_least8_t
+   * }
+   */
+  public static final OfByte __int_least8_t = IvfFlatH.C_CHAR;
+  /**
+   * {@snippet lang = c : * typedef __uint8_t __uint_least8_t
+   * }
+   */
+  public static final OfByte __uint_least8_t = IvfFlatH.C_CHAR;
+  /**
+   * {@snippet lang = c : * typedef __int16_t __int_least16_t
+   * }
+   */
+  public static final OfShort __int_least16_t = IvfFlatH.C_SHORT;
+  /**
+   * {@snippet lang = c : * typedef __uint16_t __uint_least16_t
+   * }
+   */
+  public static final OfShort __uint_least16_t = IvfFlatH.C_SHORT;
+  /**
+   * {@snippet lang = c : * typedef __int32_t __int_least32_t
+   * }
+   */
+  public static final OfInt __int_least32_t = IvfFlatH.C_INT;
+  /**
+   * {@snippet lang = c : * typedef __uint32_t __uint_least32_t
+   * }
+   */
+  public static final OfInt __uint_least32_t = IvfFlatH.C_INT;
+  /**
+   * {@snippet lang = c : * typedef __int64_t __int_least64_t
+   * }
+   */
+  public static final OfLong __int_least64_t = IvfFlatH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef __uint64_t __uint_least64_t
+   * }
+   */
+  public static final OfLong __uint_least64_t = IvfFlatH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef long __quad_t
+   * }
+   */
+  public static final OfLong __quad_t = IvfFlatH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned long __u_quad_t
+   * }
+   */
+  public static final OfLong __u_quad_t = IvfFlatH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef long __intmax_t
+   * }
+   */
+  public static final OfLong __intmax_t = IvfFlatH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned long __uintmax_t
+   * }
+   */
+  public static final OfLong __uintmax_t = IvfFlatH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned long __dev_t
+   * }
+   */
+  public static final OfLong __dev_t = IvfFlatH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned int __uid_t
+   * }
+   */
+  public static final OfInt __uid_t = IvfFlatH.C_INT;
+  /**
+   * {@snippet lang = c : * typedef unsigned int __gid_t
+   * }
+   */
+  public static final OfInt __gid_t = IvfFlatH.C_INT;
+  /**
+   * {@snippet lang = c : * typedef unsigned long __ino_t
+   * }
+   */
+  public static final OfLong __ino_t = IvfFlatH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned long __ino64_t
+   * }
+   */
+  public static final OfLong __ino64_t = IvfFlatH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned int __mode_t
+   * }
+   */
+  public static final OfInt __mode_t = IvfFlatH.C_INT;
+  /**
+   * {@snippet lang = c : * typedef unsigned long __nlink_t
+   * }
+   */
+  public static final OfLong __nlink_t = IvfFlatH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef long __off_t
+   * }
+   */
+  public static final OfLong __off_t = IvfFlatH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef long __off64_t
+   * }
+   */
+  public static final OfLong __off64_t = IvfFlatH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef int __pid_t
+   * }
+   */
+  public static final OfInt __pid_t = IvfFlatH.C_INT;
+  /**
+   * {@snippet lang = c : * typedef long __clock_t
+   * }
+   */
+  public static final OfLong __clock_t = IvfFlatH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned long __rlim_t
+   * }
+   */
+  public static final OfLong __rlim_t = IvfFlatH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned long __rlim64_t
+   * }
+   */
+  public static final OfLong __rlim64_t = IvfFlatH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned int __id_t
+   * }
+   */
+  public static final OfInt __id_t = IvfFlatH.C_INT;
+  /**
+   * {@snippet lang = c : * typedef long __time_t
+   * }
+   */
+  public static final OfLong __time_t = IvfFlatH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned int __useconds_t
+   * }
+   */
+  public static final OfInt __useconds_t = IvfFlatH.C_INT;
+  /**
+   * {@snippet lang = c : * typedef long __suseconds_t
+   * }
+   */
+  public static final OfLong __suseconds_t = IvfFlatH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef long __suseconds64_t
+   * }
+   */
+  public static final OfLong __suseconds64_t = IvfFlatH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef int __daddr_t
+   * }
+   */
+  public static final OfInt __daddr_t = IvfFlatH.C_INT;
+  /**
+   * {@snippet lang = c : * typedef int __key_t
+   * }
+   */
+  public static final OfInt __key_t = IvfFlatH.C_INT;
+  /**
+   * {@snippet lang = c : * typedef int __clockid_t
+   * }
+   */
+  public static final OfInt __clockid_t = IvfFlatH.C_INT;
+  /**
+   * {@snippet lang = c : * typedef void *__timer_t
+   * }
+   */
+  public static final AddressLayout __timer_t = IvfFlatH.C_POINTER;
+  /**
+   * {@snippet lang = c : * typedef long __blksize_t
+   * }
+   */
+  public static final OfLong __blksize_t = IvfFlatH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef long __blkcnt_t
+   * }
+   */
+  public static final OfLong __blkcnt_t = IvfFlatH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef long __blkcnt64_t
+   * }
+   */
+  public static final OfLong __blkcnt64_t = IvfFlatH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned long __fsblkcnt_t
+   * }
+   */
+  public static final OfLong __fsblkcnt_t = IvfFlatH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned long __fsblkcnt64_t
+   * }
+   */
+  public static final OfLong __fsblkcnt64_t = IvfFlatH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned long __fsfilcnt_t
+   * }
+   */
+  public static final OfLong __fsfilcnt_t = IvfFlatH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned long __fsfilcnt64_t
+   * }
+   */
+  public static final OfLong __fsfilcnt64_t = IvfFlatH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef long __fsword_t
+   * }
+   */
+  public static final OfLong __fsword_t = IvfFlatH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef long __ssize_t
+   * }
+   */
+  public static final OfLong __ssize_t = IvfFlatH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef long __syscall_slong_t
+   * }
+   */
+  public static final OfLong __syscall_slong_t = IvfFlatH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned long __syscall_ulong_t
+   * }
+   */
+  public static final OfLong __syscall_ulong_t = IvfFlatH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef __off64_t __loff_t
+   * }
+   */
+  public static final OfLong __loff_t = IvfFlatH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef char *__caddr_t
+   * }
+   */
+  public static final AddressLayout __caddr_t = IvfFlatH.C_POINTER;
+  /**
+   * {@snippet lang = c : * typedef long __intptr_t
+   * }
+   */
+  public static final OfLong __intptr_t = IvfFlatH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned int __socklen_t
+   * }
+   */
+  public static final OfInt __socklen_t = IvfFlatH.C_INT;
+  /**
+   * {@snippet lang = c : * typedef int __sig_atomic_t
+   * }
+   */
+  public static final OfInt __sig_atomic_t = IvfFlatH.C_INT;
+  /**
+   * {@snippet lang = c : * typedef __int8_t int8_t
+   * }
+   */
+  public static final OfByte int8_t = IvfFlatH.C_CHAR;
+  /**
+   * {@snippet lang = c : * typedef __int16_t int16_t
+   * }
+   */
+  public static final OfShort int16_t = IvfFlatH.C_SHORT;
+  /**
+   * {@snippet lang = c : * typedef __int32_t int32_t
+   * }
+   */
+  public static final OfInt int32_t = IvfFlatH.C_INT;
+  /**
+   * {@snippet lang = c : * typedef __int64_t int64_t
+   * }
+   */
+  public static final OfLong int64_t = IvfFlatH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef __uint8_t uint8_t
+   * }
+   */
+  public static final OfByte uint8_t = IvfFlatH.C_CHAR;
+  /**
+   * {@snippet lang = c : * typedef __uint16_t uint16_t
+   * }
+   */
+  public static final OfShort uint16_t = IvfFlatH.C_SHORT;
+  /**
+   * {@snippet lang = c : * typedef __uint32_t uint32_t
+   * }
+   */
+  public static final OfInt uint32_t = IvfFlatH.C_INT;
+  /**
+   * {@snippet lang = c : * typedef __uint64_t uint64_t
+   * }
+   */
+  public static final OfLong uint64_t = IvfFlatH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef __int_least8_t int_least8_t
+   * }
+   */
+  public static final OfByte int_least8_t = IvfFlatH.C_CHAR;
+  /**
+   * {@snippet lang = c : * typedef __int_least16_t int_least16_t
+   * }
+   */
+  public static final OfShort int_least16_t = IvfFlatH.C_SHORT;
+  /**
+   * {@snippet lang = c : * typedef __int_least32_t int_least32_t
+   * }
+   */
+  public static final OfInt int_least32_t = IvfFlatH.C_INT;
+  /**
+   * {@snippet lang = c : * typedef __int_least64_t int_least64_t
+   * }
+   */
+  public static final OfLong int_least64_t = IvfFlatH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef __uint_least8_t uint_least8_t
+   * }
+   */
+  public static final OfByte uint_least8_t = IvfFlatH.C_CHAR;
+  /**
+   * {@snippet lang = c : * typedef __uint_least16_t uint_least16_t
+   * }
+   */
+  public static final OfShort uint_least16_t = IvfFlatH.C_SHORT;
+  /**
+   * {@snippet lang = c : * typedef __uint_least32_t uint_least32_t
+   * }
+   */
+  public static final OfInt uint_least32_t = IvfFlatH.C_INT;
+  /**
+   * {@snippet lang = c : * typedef __uint_least64_t uint_least64_t
+   * }
+   */
+  public static final OfLong uint_least64_t = IvfFlatH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef signed char int_fast8_t
+   * }
+   */
+  public static final OfByte int_fast8_t = IvfFlatH.C_CHAR;
+  /**
+   * {@snippet lang = c : * typedef long int_fast16_t
+   * }
+   */
+  public static final OfLong int_fast16_t = IvfFlatH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef long int_fast32_t
+   * }
+   */
+  public static final OfLong int_fast32_t = IvfFlatH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef long int_fast64_t
+   * }
+   */
+  public static final OfLong int_fast64_t = IvfFlatH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned char uint_fast8_t
+   * }
+   */
+  public static final OfByte uint_fast8_t = IvfFlatH.C_CHAR;
+  /**
+   * {@snippet lang = c : * typedef unsigned long uint_fast16_t
+   * }
+   */
+  public static final OfLong uint_fast16_t = IvfFlatH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned long uint_fast32_t
+   * }
+   */
+  public static final OfLong uint_fast32_t = IvfFlatH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned long uint_fast64_t
+   * }
+   */
+  public static final OfLong uint_fast64_t = IvfFlatH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef long intptr_t
+   * }
+   */
+  public static final OfLong intptr_t = IvfFlatH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned long uintptr_t
+   * }
+   */
+  public static final OfLong uintptr_t = IvfFlatH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef __intmax_t intmax_t
+   * }
+   */
+  public static final OfLong intmax_t = IvfFlatH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef __uintmax_t uintmax_t
+   * }
+   */
+  public static final OfLong uintmax_t = IvfFlatH.C_LONG;
+  private static final int CUVS_ERROR = (int) 0L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.CUVS_ERROR = 0
+   * }
+   */
+  public static int CUVS_ERROR() {
+    return CUVS_ERROR;
+  }
+
+  private static final int CUVS_SUCCESS = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.CUVS_SUCCESS = 1
+   * }
+   */
+  public static int CUVS_SUCCESS() {
+    return CUVS_SUCCESS;
+  }
+
+  /**
+   * {@snippet lang = c : * typedef uintptr_t cuvsResources_t
+   * }
+   */
+  public static final OfLong cuvsResources_t = IvfFlatH.C_LONG;
+  private static final int L2Expanded = (int) 0L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.L2Expanded = 0
+   * }
+   */
+  public static int L2Expanded() {
+    return L2Expanded;
+  }
+
+  private static final int L2SqrtExpanded = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.L2SqrtExpanded = 1
+   * }
+   */
+  public static int L2SqrtExpanded() {
+    return L2SqrtExpanded;
+  }
+
+  private static final int CosineExpanded = (int) 2L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.CosineExpanded = 2
+   * }
+   */
+  public static int CosineExpanded() {
+    return CosineExpanded;
+  }
+
+  private static final int L1 = (int) 3L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.L1 = 3
+   * }
+   */
+  public static int L1() {
+    return L1;
+  }
+
+  private static final int L2Unexpanded = (int) 4L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.L2Unexpanded = 4
+   * }
+   */
+  public static int L2Unexpanded() {
+    return L2Unexpanded;
+  }
+
+  private static final int L2SqrtUnexpanded = (int) 5L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.L2SqrtUnexpanded = 5
+   * }
+   */
+  public static int L2SqrtUnexpanded() {
+    return L2SqrtUnexpanded;
+  }
+
+  private static final int InnerProduct = (int) 6L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.InnerProduct = 6
+   * }
+   */
+  public static int InnerProduct() {
+    return InnerProduct;
+  }
+
+  private static final int Linf = (int) 7L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.Linf = 7
+   * }
+   */
+  public static int Linf() {
+    return Linf;
+  }
+
+  private static final int Canberra = (int) 8L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.Canberra = 8
+   * }
+   */
+  public static int Canberra() {
+    return Canberra;
+  }
+
+  private static final int LpUnexpanded = (int) 9L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.LpUnexpanded = 9
+   * }
+   */
+  public static int LpUnexpanded() {
+    return LpUnexpanded;
+  }
+
+  private static final int CorrelationExpanded = (int) 10L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.CorrelationExpanded = 10
+   * }
+   */
+  public static int CorrelationExpanded() {
+    return CorrelationExpanded;
+  }
+
+  private static final int JaccardExpanded = (int) 11L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.JaccardExpanded = 11
+   * }
+   */
+  public static int JaccardExpanded() {
+    return JaccardExpanded;
+  }
+
+  private static final int HellingerExpanded = (int) 12L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.HellingerExpanded = 12
+   * }
+   */
+  public static int HellingerExpanded() {
+    return HellingerExpanded;
+  }
+
+  private static final int Haversine = (int) 13L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.Haversine = 13
+   * }
+   */
+  public static int Haversine() {
+    return Haversine;
+  }
+
+  private static final int BrayCurtis = (int) 14L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.BrayCurtis = 14
+   * }
+   */
+  public static int BrayCurtis() {
+    return BrayCurtis;
+  }
+
+  private static final int JensenShannon = (int) 15L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.JensenShannon = 15
+   * }
+   */
+  public static int JensenShannon() {
+    return JensenShannon;
+  }
+
+  private static final int HammingUnexpanded = (int) 16L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.HammingUnexpanded = 16
+   * }
+   */
+  public static int HammingUnexpanded() {
+    return HammingUnexpanded;
+  }
+
+  private static final int KLDivergence = (int) 17L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.KLDivergence = 17
+   * }
+   */
+  public static int KLDivergence() {
+    return KLDivergence;
+  }
+
+  private static final int RusselRaoExpanded = (int) 18L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.RusselRaoExpanded = 18
+   * }
+   */
+  public static int RusselRaoExpanded() {
+    return RusselRaoExpanded;
+  }
+
+  private static final int DiceExpanded = (int) 19L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.DiceExpanded = 19
+   * }
+   */
+  public static int DiceExpanded() {
+    return DiceExpanded;
+  }
+
+  private static final int Precomputed = (int) 100L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.Precomputed = 100
+   * }
+   */
+  public static int Precomputed() {
+    return Precomputed;
+  }
+
+  /**
+   * {@snippet lang = c : * typedef long ptrdiff_t
+   * }
+   */
+  public static final OfLong ptrdiff_t = IvfFlatH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned long size_t
+   * }
+   */
+  public static final OfLong size_t = IvfFlatH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef int wchar_t
+   * }
+   */
+  public static final OfInt wchar_t = IvfFlatH.C_INT;
+  private static final int kDLCPU = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.kDLCPU = 1
+   * }
+   */
+  public static int kDLCPU() {
+    return kDLCPU;
+  }
+
+  private static final int kDLCUDA = (int) 2L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.kDLCUDA = 2
+   * }
+   */
+  public static int kDLCUDA() {
+    return kDLCUDA;
+  }
+
+  private static final int kDLCUDAHost = (int) 3L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.kDLCUDAHost = 3
+   * }
+   */
+  public static int kDLCUDAHost() {
+    return kDLCUDAHost;
+  }
+
+  private static final int kDLOpenCL = (int) 4L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.kDLOpenCL = 4
+   * }
+   */
+  public static int kDLOpenCL() {
+    return kDLOpenCL;
+  }
+
+  private static final int kDLVulkan = (int) 7L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.kDLVulkan = 7
+   * }
+   */
+  public static int kDLVulkan() {
+    return kDLVulkan;
+  }
+
+  private static final int kDLMetal = (int) 8L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.kDLMetal = 8
+   * }
+   */
+  public static int kDLMetal() {
+    return kDLMetal;
+  }
+
+  private static final int kDLVPI = (int) 9L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.kDLVPI = 9
+   * }
+   */
+  public static int kDLVPI() {
+    return kDLVPI;
+  }
+
+  private static final int kDLROCM = (int) 10L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.kDLROCM = 10
+   * }
+   */
+  public static int kDLROCM() {
+    return kDLROCM;
+  }
+
+  private static final int kDLROCMHost = (int) 11L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.kDLROCMHost = 11
+   * }
+   */
+  public static int kDLROCMHost() {
+    return kDLROCMHost;
+  }
+
+  private static final int kDLExtDev = (int) 12L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.kDLExtDev = 12
+   * }
+   */
+  public static int kDLExtDev() {
+    return kDLExtDev;
+  }
+
+  private static final int kDLCUDAManaged = (int) 13L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.kDLCUDAManaged = 13
+   * }
+   */
+  public static int kDLCUDAManaged() {
+    return kDLCUDAManaged;
+  }
+
+  private static final int kDLOneAPI = (int) 14L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.kDLOneAPI = 14
+   * }
+   */
+  public static int kDLOneAPI() {
+    return kDLOneAPI;
+  }
+
+  private static final int kDLWebGPU = (int) 15L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.kDLWebGPU = 15
+   * }
+   */
+  public static int kDLWebGPU() {
+    return kDLWebGPU;
+  }
+
+  private static final int kDLHexagon = (int) 16L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.kDLHexagon = 16
+   * }
+   */
+  public static int kDLHexagon() {
+    return kDLHexagon;
+  }
+
+  private static final int kDLMAIA = (int) 17L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.kDLMAIA = 17
+   * }
+   */
+  public static int kDLMAIA() {
+    return kDLMAIA;
+  }
+
+  private static final int kDLInt = (int) 0L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.kDLInt = 0
+   * }
+   */
+  public static int kDLInt() {
+    return kDLInt;
+  }
+
+  private static final int kDLUInt = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.kDLUInt = 1
+   * }
+   */
+  public static int kDLUInt() {
+    return kDLUInt;
+  }
+
+  private static final int kDLFloat = (int) 2L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.kDLFloat = 2
+   * }
+   */
+  public static int kDLFloat() {
+    return kDLFloat;
+  }
+
+  private static final int kDLOpaqueHandle = (int) 3L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.kDLOpaqueHandle = 3
+   * }
+   */
+  public static int kDLOpaqueHandle() {
+    return kDLOpaqueHandle;
+  }
+
+  private static final int kDLBfloat = (int) 4L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.kDLBfloat = 4
+   * }
+   */
+  public static int kDLBfloat() {
+    return kDLBfloat;
+  }
+
+  private static final int kDLComplex = (int) 5L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.kDLComplex = 5
+   * }
+   */
+  public static int kDLComplex() {
+    return kDLComplex;
+  }
+
+  private static final int kDLBool = (int) 6L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.kDLBool = 6
+   * }
+   */
+  public static int kDLBool() {
+    return kDLBool;
+  }
+
+  /**
+   * {@snippet lang = c :
+   * typedef struct cuvsIvfFlatIndexParams {
+   *     cuvsDistanceType metric;
+   *     float metric_arg;
+   *     _Bool add_data_on_build;
+   *     uint32_t n_lists;
+   *     uint32_t kmeans_n_iters;
+   *     double kmeans_trainset_fraction;
+   *     _Bool adaptive_centers;
+   *     _Bool conservative_memory_allocation;
+   * } *cuvsIvfFlatIndexParams_t
+   * }
+   */
+  public static final AddressLayout cuvsIvfFlatIndexParams_t = IvfFlatH.C_POINTER;
+
+  private static class cuvsIvfFlatIndexParamsCreate {
+    public static final FunctionDescriptor DESC = FunctionDescriptor.of(IvfFlatH.C_INT, IvfFlatH.C_POINTER);
+
+    public static final MemorySegment ADDR = IvfFlatH.findOrThrow("cuvsIvfFlatIndexParamsCreate");
+
+    public static final MethodHandle HANDLE = Linker.nativeLinker().downcallHandle(ADDR, DESC);
+  }
+
+  /**
+   * Function descriptor for:
+   * {@snippet lang = c
+   * : * cuvsError_t cuvsIvfFlatIndexParamsCreate(cuvsIvfFlatIndexParams_t *index_params)
+   * }
+   */
+  public static FunctionDescriptor cuvsIvfFlatIndexParamsCreate$descriptor() {
+    return cuvsIvfFlatIndexParamsCreate.DESC;
+  }
+
+  /**
+   * Downcall method handle for:
+   * {@snippet lang = c
+   * : * cuvsError_t cuvsIvfFlatIndexParamsCreate(cuvsIvfFlatIndexParams_t *index_params)
+   * }
+   */
+  public static MethodHandle cuvsIvfFlatIndexParamsCreate$handle() {
+    return cuvsIvfFlatIndexParamsCreate.HANDLE;
+  }
+
+  /**
+   * Address for:
+   * {@snippet lang = c
+   * : * cuvsError_t cuvsIvfFlatIndexParamsCreate(cuvsIvfFlatIndexParams_t *index_params)
+   * }
+   */
+  public static MemorySegment cuvsIvfFlatIndexParamsCreate$address() {
+    return cuvsIvfFlatIndexParamsCreate.ADDR;
+  }
+
+  /**
+   * {@snippet lang = c
+   * : * cuvsError_t cuvsIvfFlatIndexParamsCreate(cuvsIvfFlatIndexParams_t *index_params)
+   * }
+   */
+  public static int cuvsIvfFlatIndexParamsCreate(MemorySegment index_params) {
+    var mh$ = cuvsIvfFlatIndexParamsCreate.HANDLE;
+    try {
+      if (TRACE_DOWNCALLS) {
+        traceDowncall("cuvsIvfFlatIndexParamsCreate", index_params);
+      }
+      return (int) mh$.invokeExact(index_params);
+    } catch (Throwable ex$) {
+      throw new AssertionError("should not reach here", ex$);
+    }
+  }
+
+  private static class cuvsIvfFlatIndexParamsDestroy {
+    public static final FunctionDescriptor DESC = FunctionDescriptor.of(IvfFlatH.C_INT, IvfFlatH.C_POINTER);
+
+    public static final MemorySegment ADDR = IvfFlatH.findOrThrow("cuvsIvfFlatIndexParamsDestroy");
+
+    public static final MethodHandle HANDLE = Linker.nativeLinker().downcallHandle(ADDR, DESC);
+  }
+
+  /**
+   * Function descriptor for:
+   * {@snippet lang = c
+   * : * cuvsError_t cuvsIvfFlatIndexParamsDestroy(cuvsIvfFlatIndexParams_t index_params)
+   * }
+   */
+  public static FunctionDescriptor cuvsIvfFlatIndexParamsDestroy$descriptor() {
+    return cuvsIvfFlatIndexParamsDestroy.DESC;
+  }
+
+  /**
+   * Downcall method handle for:
+   * {@snippet lang = c
+   * : * cuvsError_t cuvsIvfFlatIndexParamsDestroy(cuvsIvfFlatIndexParams_t index_params)
+   * }
+   */
+  public static MethodHandle cuvsIvfFlatIndexParamsDestroy$handle() {
+    return cuvsIvfFlatIndexParamsDestroy.HANDLE;
+  }
+
+  /**
+   * Address for:
+   * {@snippet lang = c
+   * : * cuvsError_t cuvsIvfFlatIndexParamsDestroy(cuvsIvfFlatIndexParams_t index_params)
+   * }
+   */
+  public static MemorySegment cuvsIvfFlatIndexParamsDestroy$address() {
+    return cuvsIvfFlatIndexParamsDestroy.ADDR;
+  }
+
+  /**
+   * {@snippet lang = c
+   * : * cuvsError_t cuvsIvfFlatIndexParamsDestroy(cuvsIvfFlatIndexParams_t index_params)
+   * }
+   */
+  public static int cuvsIvfFlatIndexParamsDestroy(MemorySegment index_params) {
+    var mh$ = cuvsIvfFlatIndexParamsDestroy.HANDLE;
+    try {
+      if (TRACE_DOWNCALLS) {
+        traceDowncall("cuvsIvfFlatIndexParamsDestroy", index_params);
+      }
+      return (int) mh$.invokeExact(index_params);
+    } catch (Throwable ex$) {
+      throw new AssertionError("should not reach here", ex$);
+    }
+  }
+
+  /**
+   * {@snippet lang = c :
+   * typedef struct cuvsIvfFlatSearchParams {
+   *     uint32_t n_probes;
+   * } *cuvsIvfFlatSearchParams_t
+   * }
+   */
+  public static final AddressLayout cuvsIvfFlatSearchParams_t = IvfFlatH.C_POINTER;
+
+  private static class cuvsIvfFlatSearchParamsCreate {
+    public static final FunctionDescriptor DESC = FunctionDescriptor.of(IvfFlatH.C_INT, IvfFlatH.C_POINTER);
+
+    public static final MemorySegment ADDR = IvfFlatH.findOrThrow("cuvsIvfFlatSearchParamsCreate");
+
+    public static final MethodHandle HANDLE = Linker.nativeLinker().downcallHandle(ADDR, DESC);
+  }
+
+  /**
+   * Function descriptor for:
+   * {@snippet lang = c
+   * : * cuvsError_t cuvsIvfFlatSearchParamsCreate(cuvsIvfFlatSearchParams_t *params)
+   * }
+   */
+  public static FunctionDescriptor cuvsIvfFlatSearchParamsCreate$descriptor() {
+    return cuvsIvfFlatSearchParamsCreate.DESC;
+  }
+
+  /**
+   * Downcall method handle for:
+   * {@snippet lang = c
+   * : * cuvsError_t cuvsIvfFlatSearchParamsCreate(cuvsIvfFlatSearchParams_t *params)
+   * }
+   */
+  public static MethodHandle cuvsIvfFlatSearchParamsCreate$handle() {
+    return cuvsIvfFlatSearchParamsCreate.HANDLE;
+  }
+
+  /**
+   * Address for:
+   * {@snippet lang = c
+   * : * cuvsError_t cuvsIvfFlatSearchParamsCreate(cuvsIvfFlatSearchParams_t *params)
+   * }
+   */
+  public static MemorySegment cuvsIvfFlatSearchParamsCreate$address() {
+    return cuvsIvfFlatSearchParamsCreate.ADDR;
+  }
+
+  /**
+   * {@snippet lang = c
+   * : * cuvsError_t cuvsIvfFlatSearchParamsCreate(cuvsIvfFlatSearchParams_t *params)
+   * }
+   */
+  public static int cuvsIvfFlatSearchParamsCreate(MemorySegment params) {
+    var mh$ = cuvsIvfFlatSearchParamsCreate.HANDLE;
+    try {
+      if (TRACE_DOWNCALLS) {
+        traceDowncall("cuvsIvfFlatSearchParamsCreate", params);
+      }
+      return (int) mh$.invokeExact(params);
+    } catch (Throwable ex$) {
+      throw new AssertionError("should not reach here", ex$);
+    }
+  }
+
+  private static class cuvsIvfFlatSearchParamsDestroy {
+    public static final FunctionDescriptor DESC = FunctionDescriptor.of(IvfFlatH.C_INT, IvfFlatH.C_POINTER);
+
+    public static final MemorySegment ADDR = IvfFlatH.findOrThrow("cuvsIvfFlatSearchParamsDestroy");
+
+    public static final MethodHandle HANDLE = Linker.nativeLinker().downcallHandle(ADDR, DESC);
+  }
+
+  /**
+   * Function descriptor for:
+   * {@snippet lang = c
+   * : * cuvsError_t cuvsIvfFlatSearchParamsDestroy(cuvsIvfFlatSearchParams_t params)
+   * }
+   */
+  public static FunctionDescriptor cuvsIvfFlatSearchParamsDestroy$descriptor() {
+    return cuvsIvfFlatSearchParamsDestroy.DESC;
+  }
+
+  /**
+   * Downcall method handle for:
+   * {@snippet lang = c
+   * : * cuvsError_t cuvsIvfFlatSearchParamsDestroy(cuvsIvfFlatSearchParams_t params)
+   * }
+   */
+  public static MethodHandle cuvsIvfFlatSearchParamsDestroy$handle() {
+    return cuvsIvfFlatSearchParamsDestroy.HANDLE;
+  }
+
+  /**
+   * Address for:
+   * {@snippet lang = c
+   * : * cuvsError_t cuvsIvfFlatSearchParamsDestroy(cuvsIvfFlatSearchParams_t params)
+   * }
+   */
+  public static MemorySegment cuvsIvfFlatSearchParamsDestroy$address() {
+    return cuvsIvfFlatSearchParamsDestroy.ADDR;
+  }
+
+  /**
+   * {@snippet lang = c
+   * : * cuvsError_t cuvsIvfFlatSearchParamsDestroy(cuvsIvfFlatSearchParams_t params)
+   * }
+   */
+  public static int cuvsIvfFlatSearchParamsDestroy(MemorySegment params) {
+    var mh$ = cuvsIvfFlatSearchParamsDestroy.HANDLE;
+    try {
+      if (TRACE_DOWNCALLS) {
+        traceDowncall("cuvsIvfFlatSearchParamsDestroy", params);
+      }
+      return (int) mh$.invokeExact(params);
+    } catch (Throwable ex$) {
+      throw new AssertionError("should not reach here", ex$);
+    }
+  }
+
+  /**
+   * {@snippet lang = c : * typedef cuvsIvfFlatIndex *cuvsIvfFlatIndex_t
+   * }
+   */
+  public static final AddressLayout cuvsIvfFlatIndex_t = IvfFlatH.C_POINTER;
+
+  private static class cuvsIvfFlatIndexCreate {
+    public static final FunctionDescriptor DESC = FunctionDescriptor.of(IvfFlatH.C_INT, IvfFlatH.C_POINTER);
+
+    public static final MemorySegment ADDR = IvfFlatH.findOrThrow("cuvsIvfFlatIndexCreate");
+
+    public static final MethodHandle HANDLE = Linker.nativeLinker().downcallHandle(ADDR, DESC);
+  }
+
+  /**
+   * Function descriptor for:
+   * {@snippet lang = c
+   * : * cuvsError_t cuvsIvfFlatIndexCreate(cuvsIvfFlatIndex_t *index)
+   * }
+   */
+  public static FunctionDescriptor cuvsIvfFlatIndexCreate$descriptor() {
+    return cuvsIvfFlatIndexCreate.DESC;
+  }
+
+  /**
+   * Downcall method handle for:
+   * {@snippet lang = c
+   * : * cuvsError_t cuvsIvfFlatIndexCreate(cuvsIvfFlatIndex_t *index)
+   * }
+   */
+  public static MethodHandle cuvsIvfFlatIndexCreate$handle() {
+    return cuvsIvfFlatIndexCreate.HANDLE;
+  }
+
+  /**
+   * Address for:
+   * {@snippet lang = c
+   * : * cuvsError_t cuvsIvfFlatIndexCreate(cuvsIvfFlatIndex_t *index)
+   * }
+   */
+  public static MemorySegment cuvsIvfFlatIndexCreate$address() {
+    return cuvsIvfFlatIndexCreate.ADDR;
+  }
+
+  /**
+   * {@snippet lang = c
+   * : * cuvsError_t cuvsIvfFlatIndexCreate(cuvsIvfFlatIndex_t *index)
+   * }
+   */
+  public static int cuvsIvfFlatIndexCreate(MemorySegment index) {
+    var mh$ = cuvsIvfFlatIndexCreate.HANDLE;
+    try {
+      if (TRACE_DOWNCALLS) {
+        traceDowncall("cuvsIvfFlatIndexCreate", index);
+      }
+      return (int) mh$.invokeExact(index);
+    } catch (Throwable ex$) {
+      throw new AssertionError("should not reach here", ex$);
+    }
+  }
+
+  private static class cuvsIvfFlatIndexDestroy {
+    public static final FunctionDescriptor DESC = FunctionDescriptor.of(IvfFlatH.C_INT, IvfFlatH.C_POINTER);
+
+    public static final MemorySegment ADDR = IvfFlatH.findOrThrow("cuvsIvfFlatIndexDestroy");
+
+    public static final MethodHandle HANDLE = Linker.nativeLinker().downcallHandle(ADDR, DESC);
+  }
+
+  /**
+   * Function descriptor for:
+   * {@snippet lang = c
+   * : * cuvsError_t cuvsIvfFlatIndexDestroy(cuvsIvfFlatIndex_t index)
+   * }
+   */
+  public static FunctionDescriptor cuvsIvfFlatIndexDestroy$descriptor() {
+    return cuvsIvfFlatIndexDestroy.DESC;
+  }
+
+  /**
+   * Downcall method handle for:
+   * {@snippet lang = c
+   * : * cuvsError_t cuvsIvfFlatIndexDestroy(cuvsIvfFlatIndex_t index)
+   * }
+   */
+  public static MethodHandle cuvsIvfFlatIndexDestroy$handle() {
+    return cuvsIvfFlatIndexDestroy.HANDLE;
+  }
+
+  /**
+   * Address for:
+   * {@snippet lang = c
+   * : * cuvsError_t cuvsIvfFlatIndexDestroy(cuvsIvfFlatIndex_t index)
+   * }
+   */
+  public static MemorySegment cuvsIvfFlatIndexDestroy$address() {
+    return cuvsIvfFlatIndexDestroy.ADDR;
+  }
+
+  /**
+   * {@snippet lang = c
+   * : * cuvsError_t cuvsIvfFlatIndexDestroy(cuvsIvfFlatIndex_t index)
+   * }
+   */
+  public static int cuvsIvfFlatIndexDestroy(MemorySegment index) {
+    var mh$ = cuvsIvfFlatIndexDestroy.HANDLE;
+    try {
+      if (TRACE_DOWNCALLS) {
+        traceDowncall("cuvsIvfFlatIndexDestroy", index);
+      }
+      return (int) mh$.invokeExact(index);
+    } catch (Throwable ex$) {
+      throw new AssertionError("should not reach here", ex$);
+    }
+  }
+
+  private static class cuvsIvfFlatBuild {
+    public static final FunctionDescriptor DESC = FunctionDescriptor.of(IvfFlatH.C_INT, IvfFlatH.C_LONG,
+        IvfFlatH.C_POINTER, IvfFlatH.C_POINTER, IvfFlatH.C_POINTER);
+
+    public static final MemorySegment ADDR = IvfFlatH.findOrThrow("cuvsIvfFlatBuild");
+
+    public static final MethodHandle HANDLE = Linker.nativeLinker().downcallHandle(ADDR, DESC);
+  }
+
+  /**
+   * Function descriptor for:
+   * {@snippet lang = c
+   * : * cuvsError_t cuvsIvfFlatBuild(cuvsResources_t res, cuvsIvfFlatIndexParams_t index_params, DLManagedTensor *dataset, cuvsIvfFlatIndex_t index)
+   * }
+   */
+  public static FunctionDescriptor cuvsIvfFlatBuild$descriptor() {
+    return cuvsIvfFlatBuild.DESC;
+  }
+
+  /**
+   * Downcall method handle for:
+   * {@snippet lang = c
+   * : * cuvsError_t cuvsIvfFlatBuild(cuvsResources_t res, cuvsIvfFlatIndexParams_t index_params, DLManagedTensor *dataset, cuvsIvfFlatIndex_t index)
+   * }
+   */
+  public static MethodHandle cuvsIvfFlatBuild$handle() {
+    return cuvsIvfFlatBuild.HANDLE;
+  }
+
+  /**
+   * Address for:
+   * {@snippet lang = c
+   * : * cuvsError_t cuvsIvfFlatBuild(cuvsResources_t res, cuvsIvfFlatIndexParams_t index_params, DLManagedTensor *dataset, cuvsIvfFlatIndex_t index)
+   * }
+   */
+  public static MemorySegment cuvsIvfFlatBuild$address() {
+    return cuvsIvfFlatBuild.ADDR;
+  }
+
+  /**
+   * {@snippet lang = c
+   * : * cuvsError_t cuvsIvfFlatBuild(cuvsResources_t res, cuvsIvfFlatIndexParams_t index_params, DLManagedTensor *dataset, cuvsIvfFlatIndex_t index)
+   * }
+   */
+  public static int cuvsIvfFlatBuild(long res, MemorySegment index_params, MemorySegment dataset, MemorySegment index) {
+    var mh$ = cuvsIvfFlatBuild.HANDLE;
+    try {
+      if (TRACE_DOWNCALLS) {
+        traceDowncall("cuvsIvfFlatBuild", res, index_params, dataset, index);
+      }
+      return (int) mh$.invokeExact(res, index_params, dataset, index);
+    } catch (Throwable ex$) {
+      throw new AssertionError("should not reach here", ex$);
+    }
+  }
+
+  private static class cuvsIvfFlatSearch {
+    public static final FunctionDescriptor DESC = FunctionDescriptor.of(IvfFlatH.C_INT, IvfFlatH.C_LONG,
+        IvfFlatH.C_POINTER, IvfFlatH.C_POINTER, IvfFlatH.C_POINTER, IvfFlatH.C_POINTER, IvfFlatH.C_POINTER);
+
+    public static final MemorySegment ADDR = IvfFlatH.findOrThrow("cuvsIvfFlatSearch");
+
+    public static final MethodHandle HANDLE = Linker.nativeLinker().downcallHandle(ADDR, DESC);
+  }
+
+  /**
+   * Function descriptor for:
+   * {@snippet lang = c
+   * : * cuvsError_t cuvsIvfFlatSearch(cuvsResources_t res, cuvsIvfFlatSearchParams_t search_params, cuvsIvfFlatIndex_t index, DLManagedTensor *queries, DLManagedTensor *neighbors, DLManagedTensor *distances)
+   * }
+   */
+  public static FunctionDescriptor cuvsIvfFlatSearch$descriptor() {
+    return cuvsIvfFlatSearch.DESC;
+  }
+
+  /**
+   * Downcall method handle for:
+   * {@snippet lang = c
+   * : * cuvsError_t cuvsIvfFlatSearch(cuvsResources_t res, cuvsIvfFlatSearchParams_t search_params, cuvsIvfFlatIndex_t index, DLManagedTensor *queries, DLManagedTensor *neighbors, DLManagedTensor *distances)
+   * }
+   */
+  public static MethodHandle cuvsIvfFlatSearch$handle() {
+    return cuvsIvfFlatSearch.HANDLE;
+  }
+
+  /**
+   * Address for:
+   * {@snippet lang = c
+   * : * cuvsError_t cuvsIvfFlatSearch(cuvsResources_t res, cuvsIvfFlatSearchParams_t search_params, cuvsIvfFlatIndex_t index, DLManagedTensor *queries, DLManagedTensor *neighbors, DLManagedTensor *distances)
+   * }
+   */
+  public static MemorySegment cuvsIvfFlatSearch$address() {
+    return cuvsIvfFlatSearch.ADDR;
+  }
+
+  /**
+   * {@snippet lang = c
+   * : * cuvsError_t cuvsIvfFlatSearch(cuvsResources_t res, cuvsIvfFlatSearchParams_t search_params, cuvsIvfFlatIndex_t index, DLManagedTensor *queries, DLManagedTensor *neighbors, DLManagedTensor *distances)
+   * }
+   */
+  public static int cuvsIvfFlatSearch(long res, MemorySegment search_params, MemorySegment index, MemorySegment queries,
+      MemorySegment neighbors, MemorySegment distances) {
+    var mh$ = cuvsIvfFlatSearch.HANDLE;
+    try {
+      if (TRACE_DOWNCALLS) {
+        traceDowncall("cuvsIvfFlatSearch", res, search_params, index, queries, neighbors, distances);
+      }
+      return (int) mh$.invokeExact(res, search_params, index, queries, neighbors, distances);
+    } catch (Throwable ex$) {
+      throw new AssertionError("should not reach here", ex$);
+    }
+  }
+
+  private static class cuvsIvfFlatSerialize {
+    public static final FunctionDescriptor DESC = FunctionDescriptor.of(IvfFlatH.C_INT, IvfFlatH.C_LONG,
+        IvfFlatH.C_POINTER, IvfFlatH.C_POINTER);
+
+    public static final MemorySegment ADDR = IvfFlatH.findOrThrow("cuvsIvfFlatSerialize");
+
+    public static final MethodHandle HANDLE = Linker.nativeLinker().downcallHandle(ADDR, DESC);
+  }
+
+  /**
+   * Function descriptor for:
+   * {@snippet lang = c
+   * : * cuvsError_t cuvsIvfFlatSerialize(cuvsResources_t res, const char *filename, cuvsIvfFlatIndex_t index)
+   * }
+   */
+  public static FunctionDescriptor cuvsIvfFlatSerialize$descriptor() {
+    return cuvsIvfFlatSerialize.DESC;
+  }
+
+  /**
+   * Downcall method handle for:
+   * {@snippet lang = c
+   * : * cuvsError_t cuvsIvfFlatSerialize(cuvsResources_t res, const char *filename, cuvsIvfFlatIndex_t index)
+   * }
+   */
+  public static MethodHandle cuvsIvfFlatSerialize$handle() {
+    return cuvsIvfFlatSerialize.HANDLE;
+  }
+
+  /**
+   * Address for:
+   * {@snippet lang = c
+   * : * cuvsError_t cuvsIvfFlatSerialize(cuvsResources_t res, const char *filename, cuvsIvfFlatIndex_t index)
+   * }
+   */
+  public static MemorySegment cuvsIvfFlatSerialize$address() {
+    return cuvsIvfFlatSerialize.ADDR;
+  }
+
+  /**
+   * {@snippet lang = c
+   * : * cuvsError_t cuvsIvfFlatSerialize(cuvsResources_t res, const char *filename, cuvsIvfFlatIndex_t index)
+   * }
+   */
+  public static int cuvsIvfFlatSerialize(long res, MemorySegment filename, MemorySegment index) {
+    var mh$ = cuvsIvfFlatSerialize.HANDLE;
+    try {
+      if (TRACE_DOWNCALLS) {
+        traceDowncall("cuvsIvfFlatSerialize", res, filename, index);
+      }
+      return (int) mh$.invokeExact(res, filename, index);
+    } catch (Throwable ex$) {
+      throw new AssertionError("should not reach here", ex$);
+    }
+  }
+
+  private static class cuvsIvfFlatDeserialize {
+    public static final FunctionDescriptor DESC = FunctionDescriptor.of(IvfFlatH.C_INT, IvfFlatH.C_LONG,
+        IvfFlatH.C_POINTER, IvfFlatH.C_POINTER);
+
+    public static final MemorySegment ADDR = IvfFlatH.findOrThrow("cuvsIvfFlatDeserialize");
+
+    public static final MethodHandle HANDLE = Linker.nativeLinker().downcallHandle(ADDR, DESC);
+  }
+
+  /**
+   * Function descriptor for:
+   * {@snippet lang = c
+   * : * cuvsError_t cuvsIvfFlatDeserialize(cuvsResources_t res, const char *filename, cuvsIvfFlatIndex_t index)
+   * }
+   */
+  public static FunctionDescriptor cuvsIvfFlatDeserialize$descriptor() {
+    return cuvsIvfFlatDeserialize.DESC;
+  }
+
+  /**
+   * Downcall method handle for:
+   * {@snippet lang = c
+   * : * cuvsError_t cuvsIvfFlatDeserialize(cuvsResources_t res, const char *filename, cuvsIvfFlatIndex_t index)
+   * }
+   */
+  public static MethodHandle cuvsIvfFlatDeserialize$handle() {
+    return cuvsIvfFlatDeserialize.HANDLE;
+  }
+
+  /**
+   * Address for:
+   * {@snippet lang = c
+   * : * cuvsError_t cuvsIvfFlatDeserialize(cuvsResources_t res, const char *filename, cuvsIvfFlatIndex_t index)
+   * }
+   */
+  public static MemorySegment cuvsIvfFlatDeserialize$address() {
+    return cuvsIvfFlatDeserialize.ADDR;
+  }
+
+  /**
+   * {@snippet lang = c
+   * : * cuvsError_t cuvsIvfFlatDeserialize(cuvsResources_t res, const char *filename, cuvsIvfFlatIndex_t index)
+   * }
+   */
+  public static int cuvsIvfFlatDeserialize(long res, MemorySegment filename, MemorySegment index) {
+    var mh$ = cuvsIvfFlatDeserialize.HANDLE;
+    try {
+      if (TRACE_DOWNCALLS) {
+        traceDowncall("cuvsIvfFlatDeserialize", res, filename, index);
+      }
+      return (int) mh$.invokeExact(res, filename, index);
+    } catch (Throwable ex$) {
+      throw new AssertionError("should not reach here", ex$);
+    }
+  }
+
+  private static class cuvsIvfFlatExtend {
+    public static final FunctionDescriptor DESC = FunctionDescriptor.of(IvfFlatH.C_INT, IvfFlatH.C_LONG,
+        IvfFlatH.C_POINTER, IvfFlatH.C_POINTER, IvfFlatH.C_POINTER);
+
+    public static final MemorySegment ADDR = IvfFlatH.findOrThrow("cuvsIvfFlatExtend");
+
+    public static final MethodHandle HANDLE = Linker.nativeLinker().downcallHandle(ADDR, DESC);
+  }
+
+  /**
+   * Function descriptor for:
+   * {@snippet lang = c
+   * : * cuvsError_t cuvsIvfFlatExtend(cuvsResources_t res, DLManagedTensor *new_vectors, DLManagedTensor *new_indices, cuvsIvfFlatIndex_t index)
+   * }
+   */
+  public static FunctionDescriptor cuvsIvfFlatExtend$descriptor() {
+    return cuvsIvfFlatExtend.DESC;
+  }
+
+  /**
+   * Downcall method handle for:
+   * {@snippet lang = c
+   * : * cuvsError_t cuvsIvfFlatExtend(cuvsResources_t res, DLManagedTensor *new_vectors, DLManagedTensor *new_indices, cuvsIvfFlatIndex_t index)
+   * }
+   */
+  public static MethodHandle cuvsIvfFlatExtend$handle() {
+    return cuvsIvfFlatExtend.HANDLE;
+  }
+
+  /**
+   * Address for:
+   * {@snippet lang = c
+   * : * cuvsError_t cuvsIvfFlatExtend(cuvsResources_t res, DLManagedTensor *new_vectors, DLManagedTensor *new_indices, cuvsIvfFlatIndex_t index)
+   * }
+   */
+  public static MemorySegment cuvsIvfFlatExtend$address() {
+    return cuvsIvfFlatExtend.ADDR;
+  }
+
+  /**
+   * {@snippet lang = c
+   * : * cuvsError_t cuvsIvfFlatExtend(cuvsResources_t res, DLManagedTensor *new_vectors, DLManagedTensor *new_indices, cuvsIvfFlatIndex_t index)
+   * }
+   */
+  public static int cuvsIvfFlatExtend(long res, MemorySegment new_vectors, MemorySegment new_indices,
+      MemorySegment index) {
+    var mh$ = cuvsIvfFlatExtend.HANDLE;
+    try {
+      if (TRACE_DOWNCALLS) {
+        traceDowncall("cuvsIvfFlatExtend", res, new_vectors, new_indices, index);
+      }
+      return (int) mh$.invokeExact(res, new_vectors, new_indices, index);
+    } catch (Throwable ex$) {
+      throw new AssertionError("should not reach here", ex$);
+    }
+  }
+
+  private static final long _POSIX_C_SOURCE = 200809L;
+
+  /**
+   * {@snippet lang = c : * #define _POSIX_C_SOURCE 200809
+   * }
+   */
+  public static long _POSIX_C_SOURCE() {
+    return _POSIX_C_SOURCE;
+  }
+
+  private static final int __TIMESIZE = (int) 64L;
+
+  /**
+   * {@snippet lang = c : * #define __TIMESIZE 64
+   * }
+   */
+  public static int __TIMESIZE() {
+    return __TIMESIZE;
+  }
+
+  private static final long __STDC_IEC_60559_BFP__ = 201404L;
+
+  /**
+   * {@snippet lang = c : * #define __STDC_IEC_60559_BFP__ 201404
+   * }
+   */
+  public static long __STDC_IEC_60559_BFP__() {
+    return __STDC_IEC_60559_BFP__;
+  }
+
+  private static final long __STDC_IEC_60559_COMPLEX__ = 201404L;
+
+  /**
+   * {@snippet lang = c : * #define __STDC_IEC_60559_COMPLEX__ 201404
+   * }
+   */
+  public static long __STDC_IEC_60559_COMPLEX__() {
+    return __STDC_IEC_60559_COMPLEX__;
+  }
+
+  private static final long __STDC_ISO_10646__ = 201706L;
+
+  /**
+   * {@snippet lang = c : * #define __STDC_ISO_10646__ 201706
+   * }
+   */
+  public static long __STDC_ISO_10646__() {
+    return __STDC_ISO_10646__;
+  }
+
+  private static final int __WCHAR_MAX = (int) 2147483647L;
+
+  /**
+   * {@snippet lang = c : * #define __WCHAR_MAX 2147483647
+   * }
+   */
+  public static int __WCHAR_MAX() {
+    return __WCHAR_MAX;
+  }
+
+  private static final int __WCHAR_MIN = (int) -2147483648L;
+
+  /**
+   * {@snippet lang = c : * #define __WCHAR_MIN -2147483648
+   * }
+   */
+  public static int __WCHAR_MIN() {
+    return __WCHAR_MIN;
+  }
+
+  private static final int INT8_MIN = (int) -128L;
+
+  /**
+   * {@snippet lang = c : * #define INT8_MIN -128
+   * }
+   */
+  public static int INT8_MIN() {
+    return INT8_MIN;
+  }
+
+  private static final int INT16_MIN = (int) -32768L;
+
+  /**
+   * {@snippet lang = c : * #define INT16_MIN -32768
+   * }
+   */
+  public static int INT16_MIN() {
+    return INT16_MIN;
+  }
+
+  private static final int INT32_MIN = (int) -2147483648L;
+
+  /**
+   * {@snippet lang = c : * #define INT32_MIN -2147483648
+   * }
+   */
+  public static int INT32_MIN() {
+    return INT32_MIN;
+  }
+
+  private static final long INT64_MIN = -9223372036854775808L;
+
+  /**
+   * {@snippet lang = c : * #define INT64_MIN -9223372036854775808
+   * }
+   */
+  public static long INT64_MIN() {
+    return INT64_MIN;
+  }
+
+  private static final int INT8_MAX = (int) 127L;
+
+  /**
+   * {@snippet lang = c : * #define INT8_MAX 127
+   * }
+   */
+  public static int INT8_MAX() {
+    return INT8_MAX;
+  }
+
+  private static final int INT16_MAX = (int) 32767L;
+
+  /**
+   * {@snippet lang = c : * #define INT16_MAX 32767
+   * }
+   */
+  public static int INT16_MAX() {
+    return INT16_MAX;
+  }
+
+  private static final int INT32_MAX = (int) 2147483647L;
+
+  /**
+   * {@snippet lang = c : * #define INT32_MAX 2147483647
+   * }
+   */
+  public static int INT32_MAX() {
+    return INT32_MAX;
+  }
+
+  private static final long INT64_MAX = 9223372036854775807L;
+
+  /**
+   * {@snippet lang = c : * #define INT64_MAX 9223372036854775807
+   * }
+   */
+  public static long INT64_MAX() {
+    return INT64_MAX;
+  }
+
+  private static final int UINT8_MAX = (int) 255L;
+
+  /**
+   * {@snippet lang = c : * #define UINT8_MAX 255
+   * }
+   */
+  public static int UINT8_MAX() {
+    return UINT8_MAX;
+  }
+
+  private static final int UINT16_MAX = (int) 65535L;
+
+  /**
+   * {@snippet lang = c : * #define UINT16_MAX 65535
+   * }
+   */
+  public static int UINT16_MAX() {
+    return UINT16_MAX;
+  }
+
+  private static final int UINT32_MAX = (int) 4294967295L;
+
+  /**
+   * {@snippet lang = c : * #define UINT32_MAX 4294967295
+   * }
+   */
+  public static int UINT32_MAX() {
+    return UINT32_MAX;
+  }
+
+  private static final long UINT64_MAX = -1L;
+
+  /**
+   * {@snippet lang = c : * #define UINT64_MAX -1
+   * }
+   */
+  public static long UINT64_MAX() {
+    return UINT64_MAX;
+  }
+
+  private static final int INT_LEAST8_MIN = (int) -128L;
+
+  /**
+   * {@snippet lang = c : * #define INT_LEAST8_MIN -128
+   * }
+   */
+  public static int INT_LEAST8_MIN() {
+    return INT_LEAST8_MIN;
+  }
+
+  private static final int INT_LEAST16_MIN = (int) -32768L;
+
+  /**
+   * {@snippet lang = c : * #define INT_LEAST16_MIN -32768
+   * }
+   */
+  public static int INT_LEAST16_MIN() {
+    return INT_LEAST16_MIN;
+  }
+
+  private static final int INT_LEAST32_MIN = (int) -2147483648L;
+
+  /**
+   * {@snippet lang = c : * #define INT_LEAST32_MIN -2147483648
+   * }
+   */
+  public static int INT_LEAST32_MIN() {
+    return INT_LEAST32_MIN;
+  }
+
+  private static final long INT_LEAST64_MIN = -9223372036854775808L;
+
+  /**
+   * {@snippet lang = c : * #define INT_LEAST64_MIN -9223372036854775808
+   * }
+   */
+  public static long INT_LEAST64_MIN() {
+    return INT_LEAST64_MIN;
+  }
+
+  private static final int INT_LEAST8_MAX = (int) 127L;
+
+  /**
+   * {@snippet lang = c : * #define INT_LEAST8_MAX 127
+   * }
+   */
+  public static int INT_LEAST8_MAX() {
+    return INT_LEAST8_MAX;
+  }
+
+  private static final int INT_LEAST16_MAX = (int) 32767L;
+
+  /**
+   * {@snippet lang = c : * #define INT_LEAST16_MAX 32767
+   * }
+   */
+  public static int INT_LEAST16_MAX() {
+    return INT_LEAST16_MAX;
+  }
+
+  private static final int INT_LEAST32_MAX = (int) 2147483647L;
+
+  /**
+   * {@snippet lang = c : * #define INT_LEAST32_MAX 2147483647
+   * }
+   */
+  public static int INT_LEAST32_MAX() {
+    return INT_LEAST32_MAX;
+  }
+
+  private static final long INT_LEAST64_MAX = 9223372036854775807L;
+
+  /**
+   * {@snippet lang = c : * #define INT_LEAST64_MAX 9223372036854775807
+   * }
+   */
+  public static long INT_LEAST64_MAX() {
+    return INT_LEAST64_MAX;
+  }
+
+  private static final int UINT_LEAST8_MAX = (int) 255L;
+
+  /**
+   * {@snippet lang = c : * #define UINT_LEAST8_MAX 255
+   * }
+   */
+  public static int UINT_LEAST8_MAX() {
+    return UINT_LEAST8_MAX;
+  }
+
+  private static final int UINT_LEAST16_MAX = (int) 65535L;
+
+  /**
+   * {@snippet lang = c : * #define UINT_LEAST16_MAX 65535
+   * }
+   */
+  public static int UINT_LEAST16_MAX() {
+    return UINT_LEAST16_MAX;
+  }
+
+  private static final int UINT_LEAST32_MAX = (int) 4294967295L;
+
+  /**
+   * {@snippet lang = c : * #define UINT_LEAST32_MAX 4294967295
+   * }
+   */
+  public static int UINT_LEAST32_MAX() {
+    return UINT_LEAST32_MAX;
+  }
+
+  private static final long UINT_LEAST64_MAX = -1L;
+
+  /**
+   * {@snippet lang = c : * #define UINT_LEAST64_MAX -1
+   * }
+   */
+  public static long UINT_LEAST64_MAX() {
+    return UINT_LEAST64_MAX;
+  }
+
+  private static final int INT_FAST8_MIN = (int) -128L;
+
+  /**
+   * {@snippet lang = c : * #define INT_FAST8_MIN -128
+   * }
+   */
+  public static int INT_FAST8_MIN() {
+    return INT_FAST8_MIN;
+  }
+
+  private static final long INT_FAST16_MIN = -9223372036854775808L;
+
+  /**
+   * {@snippet lang = c : * #define INT_FAST16_MIN -9223372036854775808
+   * }
+   */
+  public static long INT_FAST16_MIN() {
+    return INT_FAST16_MIN;
+  }
+
+  private static final long INT_FAST32_MIN = -9223372036854775808L;
+
+  /**
+   * {@snippet lang = c : * #define INT_FAST32_MIN -9223372036854775808
+   * }
+   */
+  public static long INT_FAST32_MIN() {
+    return INT_FAST32_MIN;
+  }
+
+  private static final long INT_FAST64_MIN = -9223372036854775808L;
+
+  /**
+   * {@snippet lang = c : * #define INT_FAST64_MIN -9223372036854775808
+   * }
+   */
+  public static long INT_FAST64_MIN() {
+    return INT_FAST64_MIN;
+  }
+
+  private static final int INT_FAST8_MAX = (int) 127L;
+
+  /**
+   * {@snippet lang = c : * #define INT_FAST8_MAX 127
+   * }
+   */
+  public static int INT_FAST8_MAX() {
+    return INT_FAST8_MAX;
+  }
+
+  private static final long INT_FAST16_MAX = 9223372036854775807L;
+
+  /**
+   * {@snippet lang = c : * #define INT_FAST16_MAX 9223372036854775807
+   * }
+   */
+  public static long INT_FAST16_MAX() {
+    return INT_FAST16_MAX;
+  }
+
+  private static final long INT_FAST32_MAX = 9223372036854775807L;
+
+  /**
+   * {@snippet lang = c : * #define INT_FAST32_MAX 9223372036854775807
+   * }
+   */
+  public static long INT_FAST32_MAX() {
+    return INT_FAST32_MAX;
+  }
+
+  private static final long INT_FAST64_MAX = 9223372036854775807L;
+
+  /**
+   * {@snippet lang = c : * #define INT_FAST64_MAX 9223372036854775807
+   * }
+   */
+  public static long INT_FAST64_MAX() {
+    return INT_FAST64_MAX;
+  }
+
+  private static final int UINT_FAST8_MAX = (int) 255L;
+
+  /**
+   * {@snippet lang = c : * #define UINT_FAST8_MAX 255
+   * }
+   */
+  public static int UINT_FAST8_MAX() {
+    return UINT_FAST8_MAX;
+  }
+
+  private static final long UINT_FAST16_MAX = -1L;
+
+  /**
+   * {@snippet lang = c : * #define UINT_FAST16_MAX -1
+   * }
+   */
+  public static long UINT_FAST16_MAX() {
+    return UINT_FAST16_MAX;
+  }
+
+  private static final long UINT_FAST32_MAX = -1L;
+
+  /**
+   * {@snippet lang = c : * #define UINT_FAST32_MAX -1
+   * }
+   */
+  public static long UINT_FAST32_MAX() {
+    return UINT_FAST32_MAX;
+  }
+
+  private static final long UINT_FAST64_MAX = -1L;
+
+  /**
+   * {@snippet lang = c : * #define UINT_FAST64_MAX -1
+   * }
+   */
+  public static long UINT_FAST64_MAX() {
+    return UINT_FAST64_MAX;
+  }
+
+  private static final long INTPTR_MIN = -9223372036854775808L;
+
+  /**
+   * {@snippet lang = c : * #define INTPTR_MIN -9223372036854775808
+   * }
+   */
+  public static long INTPTR_MIN() {
+    return INTPTR_MIN;
+  }
+
+  private static final long INTPTR_MAX = 9223372036854775807L;
+
+  /**
+   * {@snippet lang = c : * #define INTPTR_MAX 9223372036854775807
+   * }
+   */
+  public static long INTPTR_MAX() {
+    return INTPTR_MAX;
+  }
+
+  private static final long UINTPTR_MAX = -1L;
+
+  /**
+   * {@snippet lang = c : * #define UINTPTR_MAX -1
+   * }
+   */
+  public static long UINTPTR_MAX() {
+    return UINTPTR_MAX;
+  }
+
+  private static final long INTMAX_MIN = -9223372036854775808L;
+
+  /**
+   * {@snippet lang = c : * #define INTMAX_MIN -9223372036854775808
+   * }
+   */
+  public static long INTMAX_MIN() {
+    return INTMAX_MIN;
+  }
+
+  private static final long INTMAX_MAX = 9223372036854775807L;
+
+  /**
+   * {@snippet lang = c : * #define INTMAX_MAX 9223372036854775807
+   * }
+   */
+  public static long INTMAX_MAX() {
+    return INTMAX_MAX;
+  }
+
+  private static final long UINTMAX_MAX = -1L;
+
+  /**
+   * {@snippet lang = c : * #define UINTMAX_MAX -1
+   * }
+   */
+  public static long UINTMAX_MAX() {
+    return UINTMAX_MAX;
+  }
+
+  private static final long PTRDIFF_MIN = -9223372036854775808L;
+
+  /**
+   * {@snippet lang = c : * #define PTRDIFF_MIN -9223372036854775808
+   * }
+   */
+  public static long PTRDIFF_MIN() {
+    return PTRDIFF_MIN;
+  }
+
+  private static final long PTRDIFF_MAX = 9223372036854775807L;
+
+  /**
+   * {@snippet lang = c : * #define PTRDIFF_MAX 9223372036854775807
+   * }
+   */
+  public static long PTRDIFF_MAX() {
+    return PTRDIFF_MAX;
+  }
+
+  private static final int SIG_ATOMIC_MIN = (int) -2147483648L;
+
+  /**
+   * {@snippet lang = c : * #define SIG_ATOMIC_MIN -2147483648
+   * }
+   */
+  public static int SIG_ATOMIC_MIN() {
+    return SIG_ATOMIC_MIN;
+  }
+
+  private static final int SIG_ATOMIC_MAX = (int) 2147483647L;
+
+  /**
+   * {@snippet lang = c : * #define SIG_ATOMIC_MAX 2147483647
+   * }
+   */
+  public static int SIG_ATOMIC_MAX() {
+    return SIG_ATOMIC_MAX;
+  }
+
+  private static final long SIZE_MAX = -1L;
+
+  /**
+   * {@snippet lang = c : * #define SIZE_MAX -1
+   * }
+   */
+  public static long SIZE_MAX() {
+    return SIZE_MAX;
+  }
+
+  private static final int WCHAR_MIN = (int) -2147483648L;
+
+  /**
+   * {@snippet lang = c : * #define WCHAR_MIN -2147483648
+   * }
+   */
+  public static int WCHAR_MIN() {
+    return WCHAR_MIN;
+  }
+
+  private static final int WCHAR_MAX = (int) 2147483647L;
+
+  /**
+   * {@snippet lang = c : * #define WCHAR_MAX 2147483647
+   * }
+   */
+  public static int WCHAR_MAX() {
+    return WCHAR_MAX;
+  }
+
+  private static final int WINT_MIN = (int) 0L;
+
+  /**
+   * {@snippet lang = c : * #define WINT_MIN 0
+   * }
+   */
+  public static int WINT_MIN() {
+    return WINT_MIN;
+  }
+
+  private static final int WINT_MAX = (int) 4294967295L;
+
+  /**
+   * {@snippet lang = c : * #define WINT_MAX 4294967295
+   * }
+   */
+  public static int WINT_MAX() {
+    return WINT_MAX;
+  }
+
+  private static final MemorySegment NULL = MemorySegment.ofAddress(0L);
+
+  /**
+   * {@snippet lang = c : * #define NULL (void*) 0
+   * }
+   */
+  public static MemorySegment NULL() {
+    return NULL;
+  }
+
+  private static final long DLPACK_FLAG_BITMASK_READ_ONLY = 1L;
+
+  /**
+   * {@snippet lang = c : * #define DLPACK_FLAG_BITMASK_READ_ONLY 1
+   * }
+   */
+  public static long DLPACK_FLAG_BITMASK_READ_ONLY() {
+    return DLPACK_FLAG_BITMASK_READ_ONLY;
+  }
+
+  private static final long DLPACK_FLAG_BITMASK_IS_COPIED = 2L;
+
+  /**
+   * {@snippet lang = c : * #define DLPACK_FLAG_BITMASK_IS_COPIED 2
+   * }
+   */
+  public static long DLPACK_FLAG_BITMASK_IS_COPIED() {
+    return DLPACK_FLAG_BITMASK_IS_COPIED;
+  }
+}
diff --git a/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/IvfPqH.java b/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/IvfPqH.java
new file mode 100644
index 000000000..d5f1e3cee
--- /dev/null
+++ b/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/IvfPqH.java
@@ -0,0 +1,3182 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.nvidia.cuvs.panama;
+
+import static java.lang.foreign.ValueLayout.JAVA_BYTE;
+
+import java.lang.foreign.AddressLayout;
+import java.lang.foreign.Arena;
+import java.lang.foreign.FunctionDescriptor;
+import java.lang.foreign.GroupLayout;
+import java.lang.foreign.Linker;
+import java.lang.foreign.MemoryLayout;
+import java.lang.foreign.MemorySegment;
+import java.lang.foreign.PaddingLayout;
+import java.lang.foreign.SequenceLayout;
+import java.lang.foreign.StructLayout;
+import java.lang.foreign.SymbolLookup;
+import java.lang.foreign.ValueLayout;
+import java.lang.foreign.ValueLayout.OfByte;
+import java.lang.foreign.ValueLayout.OfInt;
+import java.lang.foreign.ValueLayout.OfLong;
+import java.lang.foreign.ValueLayout.OfShort;
+import java.lang.invoke.MethodHandle;
+import java.lang.invoke.MethodHandles;
+import java.util.Arrays;
+import java.util.stream.Collectors;
+
+public class IvfPqH {
+
+  IvfPqH() {
+    // Should not be called directly
+  }
+
+  static final Arena LIBRARY_ARENA = Arena.ofAuto();
+  static final boolean TRACE_DOWNCALLS = Boolean.getBoolean("jextract.trace.downcalls");
+
+  static void traceDowncall(String name, Object... args) {
+    String traceArgs = Arrays.stream(args).map(Object::toString).collect(Collectors.joining(", "));
+    System.out.printf("%s(%s)\n", name, traceArgs);
+  }
+
+  static MemorySegment findOrThrow(String symbol) {
+    return SYMBOL_LOOKUP.find(symbol).orElseThrow(() -> new UnsatisfiedLinkError("unresolved symbol: " + symbol));
+  }
+
+  static MethodHandle upcallHandle(Class<?> fi, String name, FunctionDescriptor fdesc) {
+    try {
+      return MethodHandles.lookup().findVirtual(fi, name, fdesc.toMethodType());
+    } catch (ReflectiveOperationException ex) {
+      throw new AssertionError(ex);
+    }
+  }
+
+  static MemoryLayout align(MemoryLayout layout, long align) {
+    return switch (layout) {
+    case PaddingLayout p -> p;
+    case ValueLayout v -> v.withByteAlignment(align);
+    case GroupLayout g -> {
+      MemoryLayout[] alignedMembers = g.memberLayouts().stream().map(m -> align(m, align)).toArray(MemoryLayout[]::new);
+      yield g instanceof StructLayout ? MemoryLayout.structLayout(alignedMembers)
+          : MemoryLayout.unionLayout(alignedMembers);
+    }
+    case SequenceLayout s -> MemoryLayout.sequenceLayout(s.elementCount(), align(s.elementLayout(), align));
+    };
+  }
+
+  static final SymbolLookup SYMBOL_LOOKUP = SymbolLookup.loaderLookup().or(Linker.nativeLinker().defaultLookup());
+
+  public static final ValueLayout.OfBoolean C_BOOL = ValueLayout.JAVA_BOOLEAN;
+  public static final ValueLayout.OfByte C_CHAR = ValueLayout.JAVA_BYTE;
+  public static final ValueLayout.OfShort C_SHORT = ValueLayout.JAVA_SHORT;
+  public static final ValueLayout.OfInt C_INT = ValueLayout.JAVA_INT;
+  public static final ValueLayout.OfLong C_LONG_LONG = ValueLayout.JAVA_LONG;
+  public static final ValueLayout.OfFloat C_FLOAT = ValueLayout.JAVA_FLOAT;
+  public static final ValueLayout.OfDouble C_DOUBLE = ValueLayout.JAVA_DOUBLE;
+  public static final AddressLayout C_POINTER = ValueLayout.ADDRESS
+      .withTargetLayout(MemoryLayout.sequenceLayout(java.lang.Long.MAX_VALUE, JAVA_BYTE));
+  public static final ValueLayout.OfLong C_LONG = ValueLayout.JAVA_LONG;
+  private static final int _STDINT_H = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define _STDINT_H 1
+   * }
+   */
+  public static int _STDINT_H() {
+    return _STDINT_H;
+  }
+
+  private static final int _FEATURES_H = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define _FEATURES_H 1
+   * }
+   */
+  public static int _FEATURES_H() {
+    return _FEATURES_H;
+  }
+
+  private static final int _DEFAULT_SOURCE = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define _DEFAULT_SOURCE 1
+   * }
+   */
+  public static int _DEFAULT_SOURCE() {
+    return _DEFAULT_SOURCE;
+  }
+
+  private static final int __GLIBC_USE_ISOC2X = (int) 0L;
+
+  /**
+   * {@snippet lang = c : * #define __GLIBC_USE_ISOC2X 0
+   * }
+   */
+  public static int __GLIBC_USE_ISOC2X() {
+    return __GLIBC_USE_ISOC2X;
+  }
+
+  private static final int __USE_ISOC11 = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __USE_ISOC11 1
+   * }
+   */
+  public static int __USE_ISOC11() {
+    return __USE_ISOC11;
+  }
+
+  private static final int __USE_ISOC99 = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __USE_ISOC99 1
+   * }
+   */
+  public static int __USE_ISOC99() {
+    return __USE_ISOC99;
+  }
+
+  private static final int __USE_ISOC95 = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __USE_ISOC95 1
+   * }
+   */
+  public static int __USE_ISOC95() {
+    return __USE_ISOC95;
+  }
+
+  private static final int __USE_POSIX_IMPLICITLY = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __USE_POSIX_IMPLICITLY 1
+   * }
+   */
+  public static int __USE_POSIX_IMPLICITLY() {
+    return __USE_POSIX_IMPLICITLY;
+  }
+
+  private static final int _POSIX_SOURCE = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define _POSIX_SOURCE 1
+   * }
+   */
+  public static int _POSIX_SOURCE() {
+    return _POSIX_SOURCE;
+  }
+
+  private static final int __USE_POSIX = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __USE_POSIX 1
+   * }
+   */
+  public static int __USE_POSIX() {
+    return __USE_POSIX;
+  }
+
+  private static final int __USE_POSIX2 = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __USE_POSIX2 1
+   * }
+   */
+  public static int __USE_POSIX2() {
+    return __USE_POSIX2;
+  }
+
+  private static final int __USE_POSIX199309 = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __USE_POSIX199309 1
+   * }
+   */
+  public static int __USE_POSIX199309() {
+    return __USE_POSIX199309;
+  }
+
+  private static final int __USE_POSIX199506 = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __USE_POSIX199506 1
+   * }
+   */
+  public static int __USE_POSIX199506() {
+    return __USE_POSIX199506;
+  }
+
+  private static final int __USE_XOPEN2K = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __USE_XOPEN2K 1
+   * }
+   */
+  public static int __USE_XOPEN2K() {
+    return __USE_XOPEN2K;
+  }
+
+  private static final int __USE_XOPEN2K8 = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __USE_XOPEN2K8 1
+   * }
+   */
+  public static int __USE_XOPEN2K8() {
+    return __USE_XOPEN2K8;
+  }
+
+  private static final int _ATFILE_SOURCE = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define _ATFILE_SOURCE 1
+   * }
+   */
+  public static int _ATFILE_SOURCE() {
+    return _ATFILE_SOURCE;
+  }
+
+  private static final int __WORDSIZE = (int) 64L;
+
+  /**
+   * {@snippet lang = c : * #define __WORDSIZE 64
+   * }
+   */
+  public static int __WORDSIZE() {
+    return __WORDSIZE;
+  }
+
+  private static final int __WORDSIZE_TIME64_COMPAT32 = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __WORDSIZE_TIME64_COMPAT32 1
+   * }
+   */
+  public static int __WORDSIZE_TIME64_COMPAT32() {
+    return __WORDSIZE_TIME64_COMPAT32;
+  }
+
+  private static final int __SYSCALL_WORDSIZE = (int) 64L;
+
+  /**
+   * {@snippet lang = c : * #define __SYSCALL_WORDSIZE 64
+   * }
+   */
+  public static int __SYSCALL_WORDSIZE() {
+    return __SYSCALL_WORDSIZE;
+  }
+
+  private static final int __USE_MISC = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __USE_MISC 1
+   * }
+   */
+  public static int __USE_MISC() {
+    return __USE_MISC;
+  }
+
+  private static final int __USE_ATFILE = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __USE_ATFILE 1
+   * }
+   */
+  public static int __USE_ATFILE() {
+    return __USE_ATFILE;
+  }
+
+  private static final int __USE_FORTIFY_LEVEL = (int) 0L;
+
+  /**
+   * {@snippet lang = c : * #define __USE_FORTIFY_LEVEL 0
+   * }
+   */
+  public static int __USE_FORTIFY_LEVEL() {
+    return __USE_FORTIFY_LEVEL;
+  }
+
+  private static final int __GLIBC_USE_DEPRECATED_GETS = (int) 0L;
+
+  /**
+   * {@snippet lang = c : * #define __GLIBC_USE_DEPRECATED_GETS 0
+   * }
+   */
+  public static int __GLIBC_USE_DEPRECATED_GETS() {
+    return __GLIBC_USE_DEPRECATED_GETS;
+  }
+
+  private static final int __GLIBC_USE_DEPRECATED_SCANF = (int) 0L;
+
+  /**
+   * {@snippet lang = c : * #define __GLIBC_USE_DEPRECATED_SCANF 0
+   * }
+   */
+  public static int __GLIBC_USE_DEPRECATED_SCANF() {
+    return __GLIBC_USE_DEPRECATED_SCANF;
+  }
+
+  private static final int _STDC_PREDEF_H = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define _STDC_PREDEF_H 1
+   * }
+   */
+  public static int _STDC_PREDEF_H() {
+    return _STDC_PREDEF_H;
+  }
+
+  private static final int __STDC_IEC_559__ = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __STDC_IEC_559__ 1
+   * }
+   */
+  public static int __STDC_IEC_559__() {
+    return __STDC_IEC_559__;
+  }
+
+  private static final int __STDC_IEC_559_COMPLEX__ = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __STDC_IEC_559_COMPLEX__ 1
+   * }
+   */
+  public static int __STDC_IEC_559_COMPLEX__() {
+    return __STDC_IEC_559_COMPLEX__;
+  }
+
+  private static final int __GNU_LIBRARY__ = (int) 6L;
+
+  /**
+   * {@snippet lang = c : * #define __GNU_LIBRARY__ 6
+   * }
+   */
+  public static int __GNU_LIBRARY__() {
+    return __GNU_LIBRARY__;
+  }
+
+  private static final int __GLIBC__ = (int) 2L;
+
+  /**
+   * {@snippet lang = c : * #define __GLIBC__ 2
+   * }
+   */
+  public static int __GLIBC__() {
+    return __GLIBC__;
+  }
+
+  private static final int __GLIBC_MINOR__ = (int) 35L;
+
+  /**
+   * {@snippet lang = c : * #define __GLIBC_MINOR__ 35
+   * }
+   */
+  public static int __GLIBC_MINOR__() {
+    return __GLIBC_MINOR__;
+  }
+
+  private static final int _SYS_CDEFS_H = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define _SYS_CDEFS_H 1
+   * }
+   */
+  public static int _SYS_CDEFS_H() {
+    return _SYS_CDEFS_H;
+  }
+
+  private static final int __glibc_c99_flexarr_available = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __glibc_c99_flexarr_available 1
+   * }
+   */
+  public static int __glibc_c99_flexarr_available() {
+    return __glibc_c99_flexarr_available;
+  }
+
+  private static final int __LDOUBLE_REDIRECTS_TO_FLOAT128_ABI = (int) 0L;
+
+  /**
+   * {@snippet lang = c : * #define __LDOUBLE_REDIRECTS_TO_FLOAT128_ABI 0
+   * }
+   */
+  public static int __LDOUBLE_REDIRECTS_TO_FLOAT128_ABI() {
+    return __LDOUBLE_REDIRECTS_TO_FLOAT128_ABI;
+  }
+
+  private static final int __HAVE_GENERIC_SELECTION = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __HAVE_GENERIC_SELECTION 1
+   * }
+   */
+  public static int __HAVE_GENERIC_SELECTION() {
+    return __HAVE_GENERIC_SELECTION;
+  }
+
+  private static final int __GLIBC_USE_LIB_EXT2 = (int) 0L;
+
+  /**
+   * {@snippet lang = c : * #define __GLIBC_USE_LIB_EXT2 0
+   * }
+   */
+  public static int __GLIBC_USE_LIB_EXT2() {
+    return __GLIBC_USE_LIB_EXT2;
+  }
+
+  private static final int __GLIBC_USE_IEC_60559_BFP_EXT = (int) 0L;
+
+  /**
+   * {@snippet lang = c : * #define __GLIBC_USE_IEC_60559_BFP_EXT 0
+   * }
+   */
+  public static int __GLIBC_USE_IEC_60559_BFP_EXT() {
+    return __GLIBC_USE_IEC_60559_BFP_EXT;
+  }
+
+  private static final int __GLIBC_USE_IEC_60559_BFP_EXT_C2X = (int) 0L;
+
+  /**
+   * {@snippet lang = c : * #define __GLIBC_USE_IEC_60559_BFP_EXT_C2X 0
+   * }
+   */
+  public static int __GLIBC_USE_IEC_60559_BFP_EXT_C2X() {
+    return __GLIBC_USE_IEC_60559_BFP_EXT_C2X;
+  }
+
+  private static final int __GLIBC_USE_IEC_60559_EXT = (int) 0L;
+
+  /**
+   * {@snippet lang = c : * #define __GLIBC_USE_IEC_60559_EXT 0
+   * }
+   */
+  public static int __GLIBC_USE_IEC_60559_EXT() {
+    return __GLIBC_USE_IEC_60559_EXT;
+  }
+
+  private static final int __GLIBC_USE_IEC_60559_FUNCS_EXT = (int) 0L;
+
+  /**
+   * {@snippet lang = c : * #define __GLIBC_USE_IEC_60559_FUNCS_EXT 0
+   * }
+   */
+  public static int __GLIBC_USE_IEC_60559_FUNCS_EXT() {
+    return __GLIBC_USE_IEC_60559_FUNCS_EXT;
+  }
+
+  private static final int __GLIBC_USE_IEC_60559_FUNCS_EXT_C2X = (int) 0L;
+
+  /**
+   * {@snippet lang = c : * #define __GLIBC_USE_IEC_60559_FUNCS_EXT_C2X 0
+   * }
+   */
+  public static int __GLIBC_USE_IEC_60559_FUNCS_EXT_C2X() {
+    return __GLIBC_USE_IEC_60559_FUNCS_EXT_C2X;
+  }
+
+  private static final int __GLIBC_USE_IEC_60559_TYPES_EXT = (int) 0L;
+
+  /**
+   * {@snippet lang = c : * #define __GLIBC_USE_IEC_60559_TYPES_EXT 0
+   * }
+   */
+  public static int __GLIBC_USE_IEC_60559_TYPES_EXT() {
+    return __GLIBC_USE_IEC_60559_TYPES_EXT;
+  }
+
+  private static final int _BITS_TYPES_H = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define _BITS_TYPES_H 1
+   * }
+   */
+  public static int _BITS_TYPES_H() {
+    return _BITS_TYPES_H;
+  }
+
+  private static final int _BITS_TYPESIZES_H = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define _BITS_TYPESIZES_H 1
+   * }
+   */
+  public static int _BITS_TYPESIZES_H() {
+    return _BITS_TYPESIZES_H;
+  }
+
+  private static final int __OFF_T_MATCHES_OFF64_T = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __OFF_T_MATCHES_OFF64_T 1
+   * }
+   */
+  public static int __OFF_T_MATCHES_OFF64_T() {
+    return __OFF_T_MATCHES_OFF64_T;
+  }
+
+  private static final int __INO_T_MATCHES_INO64_T = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __INO_T_MATCHES_INO64_T 1
+   * }
+   */
+  public static int __INO_T_MATCHES_INO64_T() {
+    return __INO_T_MATCHES_INO64_T;
+  }
+
+  private static final int __RLIM_T_MATCHES_RLIM64_T = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __RLIM_T_MATCHES_RLIM64_T 1
+   * }
+   */
+  public static int __RLIM_T_MATCHES_RLIM64_T() {
+    return __RLIM_T_MATCHES_RLIM64_T;
+  }
+
+  private static final int __STATFS_MATCHES_STATFS64 = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __STATFS_MATCHES_STATFS64 1
+   * }
+   */
+  public static int __STATFS_MATCHES_STATFS64() {
+    return __STATFS_MATCHES_STATFS64;
+  }
+
+  private static final int __KERNEL_OLD_TIMEVAL_MATCHES_TIMEVAL64 = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __KERNEL_OLD_TIMEVAL_MATCHES_TIMEVAL64 1
+   * }
+   */
+  public static int __KERNEL_OLD_TIMEVAL_MATCHES_TIMEVAL64() {
+    return __KERNEL_OLD_TIMEVAL_MATCHES_TIMEVAL64;
+  }
+
+  private static final int __FD_SETSIZE = (int) 1024L;
+
+  /**
+   * {@snippet lang = c : * #define __FD_SETSIZE 1024
+   * }
+   */
+  public static int __FD_SETSIZE() {
+    return __FD_SETSIZE;
+  }
+
+  private static final int _BITS_TIME64_H = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define _BITS_TIME64_H 1
+   * }
+   */
+  public static int _BITS_TIME64_H() {
+    return _BITS_TIME64_H;
+  }
+
+  private static final int _BITS_WCHAR_H = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define _BITS_WCHAR_H 1
+   * }
+   */
+  public static int _BITS_WCHAR_H() {
+    return _BITS_WCHAR_H;
+  }
+
+  private static final int _BITS_STDINT_INTN_H = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define _BITS_STDINT_INTN_H 1
+   * }
+   */
+  public static int _BITS_STDINT_INTN_H() {
+    return _BITS_STDINT_INTN_H;
+  }
+
+  private static final int _BITS_STDINT_UINTN_H = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define _BITS_STDINT_UINTN_H 1
+   * }
+   */
+  public static int _BITS_STDINT_UINTN_H() {
+    return _BITS_STDINT_UINTN_H;
+  }
+
+  private static final int DLPACK_MAJOR_VERSION = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define DLPACK_MAJOR_VERSION 1
+   * }
+   */
+  public static int DLPACK_MAJOR_VERSION() {
+    return DLPACK_MAJOR_VERSION;
+  }
+
+  private static final int DLPACK_MINOR_VERSION = (int) 0L;
+
+  /**
+   * {@snippet lang = c : * #define DLPACK_MINOR_VERSION 0
+   * }
+   */
+  public static int DLPACK_MINOR_VERSION() {
+    return DLPACK_MINOR_VERSION;
+  }
+
+  private static final int true_ = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define true 1
+   * }
+   */
+  public static int true_() {
+    return true_;
+  }
+
+  private static final int false_ = (int) 0L;
+
+  /**
+   * {@snippet lang = c : * #define false 0
+   * }
+   */
+  public static int false_() {
+    return false_;
+  }
+
+  private static final int __bool_true_false_are_defined = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * #define __bool_true_false_are_defined 1
+   * }
+   */
+  public static int __bool_true_false_are_defined() {
+    return __bool_true_false_are_defined;
+  }
+
+  /**
+   * {@snippet lang = c : * typedef unsigned char __u_char
+   * }
+   */
+  public static final OfByte __u_char = IvfPqH.C_CHAR;
+  /**
+   * {@snippet lang = c : * typedef unsigned short __u_short
+   * }
+   */
+  public static final OfShort __u_short = IvfPqH.C_SHORT;
+  /**
+   * {@snippet lang = c : * typedef unsigned int __u_int
+   * }
+   */
+  public static final OfInt __u_int = IvfPqH.C_INT;
+  /**
+   * {@snippet lang = c : * typedef unsigned long __u_long
+   * }
+   */
+  public static final OfLong __u_long = IvfPqH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef signed char __int8_t
+   * }
+   */
+  public static final OfByte __int8_t = IvfPqH.C_CHAR;
+  /**
+   * {@snippet lang = c : * typedef unsigned char __uint8_t
+   * }
+   */
+  public static final OfByte __uint8_t = IvfPqH.C_CHAR;
+  /**
+   * {@snippet lang = c : * typedef short __int16_t
+   * }
+   */
+  public static final OfShort __int16_t = IvfPqH.C_SHORT;
+  /**
+   * {@snippet lang = c : * typedef unsigned short __uint16_t
+   * }
+   */
+  public static final OfShort __uint16_t = IvfPqH.C_SHORT;
+  /**
+   * {@snippet lang = c : * typedef int __int32_t
+   * }
+   */
+  public static final OfInt __int32_t = IvfPqH.C_INT;
+  /**
+   * {@snippet lang = c : * typedef unsigned int __uint32_t
+   * }
+   */
+  public static final OfInt __uint32_t = IvfPqH.C_INT;
+  /**
+   * {@snippet lang = c : * typedef long __int64_t
+   * }
+   */
+  public static final OfLong __int64_t = IvfPqH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned long __uint64_t
+   * }
+   */
+  public static final OfLong __uint64_t = IvfPqH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef __int8_t __int_least8_t
+   * }
+   */
+  public static final OfByte __int_least8_t = IvfPqH.C_CHAR;
+  /**
+   * {@snippet lang = c : * typedef __uint8_t __uint_least8_t
+   * }
+   */
+  public static final OfByte __uint_least8_t = IvfPqH.C_CHAR;
+  /**
+   * {@snippet lang = c : * typedef __int16_t __int_least16_t
+   * }
+   */
+  public static final OfShort __int_least16_t = IvfPqH.C_SHORT;
+  /**
+   * {@snippet lang = c : * typedef __uint16_t __uint_least16_t
+   * }
+   */
+  public static final OfShort __uint_least16_t = IvfPqH.C_SHORT;
+  /**
+   * {@snippet lang = c : * typedef __int32_t __int_least32_t
+   * }
+   */
+  public static final OfInt __int_least32_t = IvfPqH.C_INT;
+  /**
+   * {@snippet lang = c : * typedef __uint32_t __uint_least32_t
+   * }
+   */
+  public static final OfInt __uint_least32_t = IvfPqH.C_INT;
+  /**
+   * {@snippet lang = c : * typedef __int64_t __int_least64_t
+   * }
+   */
+  public static final OfLong __int_least64_t = IvfPqH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef __uint64_t __uint_least64_t
+   * }
+   */
+  public static final OfLong __uint_least64_t = IvfPqH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef long __quad_t
+   * }
+   */
+  public static final OfLong __quad_t = IvfPqH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned long __u_quad_t
+   * }
+   */
+  public static final OfLong __u_quad_t = IvfPqH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef long __intmax_t
+   * }
+   */
+  public static final OfLong __intmax_t = IvfPqH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned long __uintmax_t
+   * }
+   */
+  public static final OfLong __uintmax_t = IvfPqH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned long __dev_t
+   * }
+   */
+  public static final OfLong __dev_t = IvfPqH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned int __uid_t
+   * }
+   */
+  public static final OfInt __uid_t = IvfPqH.C_INT;
+  /**
+   * {@snippet lang = c : * typedef unsigned int __gid_t
+   * }
+   */
+  public static final OfInt __gid_t = IvfPqH.C_INT;
+  /**
+   * {@snippet lang = c : * typedef unsigned long __ino_t
+   * }
+   */
+  public static final OfLong __ino_t = IvfPqH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned long __ino64_t
+   * }
+   */
+  public static final OfLong __ino64_t = IvfPqH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned int __mode_t
+   * }
+   */
+  public static final OfInt __mode_t = IvfPqH.C_INT;
+  /**
+   * {@snippet lang = c : * typedef unsigned long __nlink_t
+   * }
+   */
+  public static final OfLong __nlink_t = IvfPqH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef long __off_t
+   * }
+   */
+  public static final OfLong __off_t = IvfPqH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef long __off64_t
+   * }
+   */
+  public static final OfLong __off64_t = IvfPqH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef int __pid_t
+   * }
+   */
+  public static final OfInt __pid_t = IvfPqH.C_INT;
+  /**
+   * {@snippet lang = c : * typedef long __clock_t
+   * }
+   */
+  public static final OfLong __clock_t = IvfPqH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned long __rlim_t
+   * }
+   */
+  public static final OfLong __rlim_t = IvfPqH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned long __rlim64_t
+   * }
+   */
+  public static final OfLong __rlim64_t = IvfPqH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned int __id_t
+   * }
+   */
+  public static final OfInt __id_t = IvfPqH.C_INT;
+  /**
+   * {@snippet lang = c : * typedef long __time_t
+   * }
+   */
+  public static final OfLong __time_t = IvfPqH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned int __useconds_t
+   * }
+   */
+  public static final OfInt __useconds_t = IvfPqH.C_INT;
+  /**
+   * {@snippet lang = c : * typedef long __suseconds_t
+   * }
+   */
+  public static final OfLong __suseconds_t = IvfPqH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef long __suseconds64_t
+   * }
+   */
+  public static final OfLong __suseconds64_t = IvfPqH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef int __daddr_t
+   * }
+   */
+  public static final OfInt __daddr_t = IvfPqH.C_INT;
+  /**
+   * {@snippet lang = c : * typedef int __key_t
+   * }
+   */
+  public static final OfInt __key_t = IvfPqH.C_INT;
+  /**
+   * {@snippet lang = c : * typedef int __clockid_t
+   * }
+   */
+  public static final OfInt __clockid_t = IvfPqH.C_INT;
+  /**
+   * {@snippet lang = c : * typedef void *__timer_t
+   * }
+   */
+  public static final AddressLayout __timer_t = IvfPqH.C_POINTER;
+  /**
+   * {@snippet lang = c : * typedef long __blksize_t
+   * }
+   */
+  public static final OfLong __blksize_t = IvfPqH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef long __blkcnt_t
+   * }
+   */
+  public static final OfLong __blkcnt_t = IvfPqH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef long __blkcnt64_t
+   * }
+   */
+  public static final OfLong __blkcnt64_t = IvfPqH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned long __fsblkcnt_t
+   * }
+   */
+  public static final OfLong __fsblkcnt_t = IvfPqH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned long __fsblkcnt64_t
+   * }
+   */
+  public static final OfLong __fsblkcnt64_t = IvfPqH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned long __fsfilcnt_t
+   * }
+   */
+  public static final OfLong __fsfilcnt_t = IvfPqH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned long __fsfilcnt64_t
+   * }
+   */
+  public static final OfLong __fsfilcnt64_t = IvfPqH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef long __fsword_t
+   * }
+   */
+  public static final OfLong __fsword_t = IvfPqH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef long __ssize_t
+   * }
+   */
+  public static final OfLong __ssize_t = IvfPqH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef long __syscall_slong_t
+   * }
+   */
+  public static final OfLong __syscall_slong_t = IvfPqH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned long __syscall_ulong_t
+   * }
+   */
+  public static final OfLong __syscall_ulong_t = IvfPqH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef __off64_t __loff_t
+   * }
+   */
+  public static final OfLong __loff_t = IvfPqH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef char *__caddr_t
+   * }
+   */
+  public static final AddressLayout __caddr_t = IvfPqH.C_POINTER;
+  /**
+   * {@snippet lang = c : * typedef long __intptr_t
+   * }
+   */
+  public static final OfLong __intptr_t = IvfPqH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned int __socklen_t
+   * }
+   */
+  public static final OfInt __socklen_t = IvfPqH.C_INT;
+  /**
+   * {@snippet lang = c : * typedef int __sig_atomic_t
+   * }
+   */
+  public static final OfInt __sig_atomic_t = IvfPqH.C_INT;
+  /**
+   * {@snippet lang = c : * typedef __int8_t int8_t
+   * }
+   */
+  public static final OfByte int8_t = IvfPqH.C_CHAR;
+  /**
+   * {@snippet lang = c : * typedef __int16_t int16_t
+   * }
+   */
+  public static final OfShort int16_t = IvfPqH.C_SHORT;
+  /**
+   * {@snippet lang = c : * typedef __int32_t int32_t
+   * }
+   */
+  public static final OfInt int32_t = IvfPqH.C_INT;
+  /**
+   * {@snippet lang = c : * typedef __int64_t int64_t
+   * }
+   */
+  public static final OfLong int64_t = IvfPqH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef __uint8_t uint8_t
+   * }
+   */
+  public static final OfByte uint8_t = IvfPqH.C_CHAR;
+  /**
+   * {@snippet lang = c : * typedef __uint16_t uint16_t
+   * }
+   */
+  public static final OfShort uint16_t = IvfPqH.C_SHORT;
+  /**
+   * {@snippet lang = c : * typedef __uint32_t uint32_t
+   * }
+   */
+  public static final OfInt uint32_t = IvfPqH.C_INT;
+  /**
+   * {@snippet lang = c : * typedef __uint64_t uint64_t
+   * }
+   */
+  public static final OfLong uint64_t = IvfPqH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef __int_least8_t int_least8_t
+   * }
+   */
+  public static final OfByte int_least8_t = IvfPqH.C_CHAR;
+  /**
+   * {@snippet lang = c : * typedef __int_least16_t int_least16_t
+   * }
+   */
+  public static final OfShort int_least16_t = IvfPqH.C_SHORT;
+  /**
+   * {@snippet lang = c : * typedef __int_least32_t int_least32_t
+   * }
+   */
+  public static final OfInt int_least32_t = IvfPqH.C_INT;
+  /**
+   * {@snippet lang = c : * typedef __int_least64_t int_least64_t
+   * }
+   */
+  public static final OfLong int_least64_t = IvfPqH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef __uint_least8_t uint_least8_t
+   * }
+   */
+  public static final OfByte uint_least8_t = IvfPqH.C_CHAR;
+  /**
+   * {@snippet lang = c : * typedef __uint_least16_t uint_least16_t
+   * }
+   */
+  public static final OfShort uint_least16_t = IvfPqH.C_SHORT;
+  /**
+   * {@snippet lang = c : * typedef __uint_least32_t uint_least32_t
+   * }
+   */
+  public static final OfInt uint_least32_t = IvfPqH.C_INT;
+  /**
+   * {@snippet lang = c : * typedef __uint_least64_t uint_least64_t
+   * }
+   */
+  public static final OfLong uint_least64_t = IvfPqH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef signed char int_fast8_t
+   * }
+   */
+  public static final OfByte int_fast8_t = IvfPqH.C_CHAR;
+  /**
+   * {@snippet lang = c : * typedef long int_fast16_t
+   * }
+   */
+  public static final OfLong int_fast16_t = IvfPqH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef long int_fast32_t
+   * }
+   */
+  public static final OfLong int_fast32_t = IvfPqH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef long int_fast64_t
+   * }
+   */
+  public static final OfLong int_fast64_t = IvfPqH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned char uint_fast8_t
+   * }
+   */
+  public static final OfByte uint_fast8_t = IvfPqH.C_CHAR;
+  /**
+   * {@snippet lang = c : * typedef unsigned long uint_fast16_t
+   * }
+   */
+  public static final OfLong uint_fast16_t = IvfPqH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned long uint_fast32_t
+   * }
+   */
+  public static final OfLong uint_fast32_t = IvfPqH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned long uint_fast64_t
+   * }
+   */
+  public static final OfLong uint_fast64_t = IvfPqH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef long intptr_t
+   * }
+   */
+  public static final OfLong intptr_t = IvfPqH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned long uintptr_t
+   * }
+   */
+  public static final OfLong uintptr_t = IvfPqH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef __intmax_t intmax_t
+   * }
+   */
+  public static final OfLong intmax_t = IvfPqH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef __uintmax_t uintmax_t
+   * }
+   */
+  public static final OfLong uintmax_t = IvfPqH.C_LONG;
+  private static final int CUVS_ERROR = (int) 0L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.CUVS_ERROR = 0
+   * }
+   */
+  public static int CUVS_ERROR() {
+    return CUVS_ERROR;
+  }
+
+  private static final int CUVS_SUCCESS = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.CUVS_SUCCESS = 1
+   * }
+   */
+  public static int CUVS_SUCCESS() {
+    return CUVS_SUCCESS;
+  }
+
+  /**
+   * {@snippet lang = c : * typedef uintptr_t cuvsResources_t
+   * }
+   */
+  public static final OfLong cuvsResources_t = IvfPqH.C_LONG;
+  private static final int L2Expanded = (int) 0L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.L2Expanded = 0
+   * }
+   */
+  public static int L2Expanded() {
+    return L2Expanded;
+  }
+
+  private static final int L2SqrtExpanded = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.L2SqrtExpanded = 1
+   * }
+   */
+  public static int L2SqrtExpanded() {
+    return L2SqrtExpanded;
+  }
+
+  private static final int CosineExpanded = (int) 2L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.CosineExpanded = 2
+   * }
+   */
+  public static int CosineExpanded() {
+    return CosineExpanded;
+  }
+
+  private static final int L1 = (int) 3L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.L1 = 3
+   * }
+   */
+  public static int L1() {
+    return L1;
+  }
+
+  private static final int L2Unexpanded = (int) 4L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.L2Unexpanded = 4
+   * }
+   */
+  public static int L2Unexpanded() {
+    return L2Unexpanded;
+  }
+
+  private static final int L2SqrtUnexpanded = (int) 5L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.L2SqrtUnexpanded = 5
+   * }
+   */
+  public static int L2SqrtUnexpanded() {
+    return L2SqrtUnexpanded;
+  }
+
+  private static final int InnerProduct = (int) 6L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.InnerProduct = 6
+   * }
+   */
+  public static int InnerProduct() {
+    return InnerProduct;
+  }
+
+  private static final int Linf = (int) 7L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.Linf = 7
+   * }
+   */
+  public static int Linf() {
+    return Linf;
+  }
+
+  private static final int Canberra = (int) 8L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.Canberra = 8
+   * }
+   */
+  public static int Canberra() {
+    return Canberra;
+  }
+
+  private static final int LpUnexpanded = (int) 9L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.LpUnexpanded = 9
+   * }
+   */
+  public static int LpUnexpanded() {
+    return LpUnexpanded;
+  }
+
+  private static final int CorrelationExpanded = (int) 10L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.CorrelationExpanded = 10
+   * }
+   */
+  public static int CorrelationExpanded() {
+    return CorrelationExpanded;
+  }
+
+  private static final int JaccardExpanded = (int) 11L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.JaccardExpanded = 11
+   * }
+   */
+  public static int JaccardExpanded() {
+    return JaccardExpanded;
+  }
+
+  private static final int HellingerExpanded = (int) 12L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.HellingerExpanded = 12
+   * }
+   */
+  public static int HellingerExpanded() {
+    return HellingerExpanded;
+  }
+
+  private static final int Haversine = (int) 13L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.Haversine = 13
+   * }
+   */
+  public static int Haversine() {
+    return Haversine;
+  }
+
+  private static final int BrayCurtis = (int) 14L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.BrayCurtis = 14
+   * }
+   */
+  public static int BrayCurtis() {
+    return BrayCurtis;
+  }
+
+  private static final int JensenShannon = (int) 15L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.JensenShannon = 15
+   * }
+   */
+  public static int JensenShannon() {
+    return JensenShannon;
+  }
+
+  private static final int HammingUnexpanded = (int) 16L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.HammingUnexpanded = 16
+   * }
+   */
+  public static int HammingUnexpanded() {
+    return HammingUnexpanded;
+  }
+
+  private static final int KLDivergence = (int) 17L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.KLDivergence = 17
+   * }
+   */
+  public static int KLDivergence() {
+    return KLDivergence;
+  }
+
+  private static final int RusselRaoExpanded = (int) 18L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.RusselRaoExpanded = 18
+   * }
+   */
+  public static int RusselRaoExpanded() {
+    return RusselRaoExpanded;
+  }
+
+  private static final int DiceExpanded = (int) 19L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.DiceExpanded = 19
+   * }
+   */
+  public static int DiceExpanded() {
+    return DiceExpanded;
+  }
+
+  private static final int Precomputed = (int) 100L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.Precomputed = 100
+   * }
+   */
+  public static int Precomputed() {
+    return Precomputed;
+  }
+
+  /**
+   * {@snippet lang = c : * typedef long ptrdiff_t
+   * }
+   */
+  public static final OfLong ptrdiff_t = IvfPqH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef unsigned long size_t
+   * }
+   */
+  public static final OfLong size_t = IvfPqH.C_LONG;
+  /**
+   * {@snippet lang = c : * typedef int wchar_t
+   * }
+   */
+  public static final OfInt wchar_t = IvfPqH.C_INT;
+  private static final int kDLCPU = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.kDLCPU = 1
+   * }
+   */
+  public static int kDLCPU() {
+    return kDLCPU;
+  }
+
+  private static final int kDLCUDA = (int) 2L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.kDLCUDA = 2
+   * }
+   */
+  public static int kDLCUDA() {
+    return kDLCUDA;
+  }
+
+  private static final int kDLCUDAHost = (int) 3L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.kDLCUDAHost = 3
+   * }
+   */
+  public static int kDLCUDAHost() {
+    return kDLCUDAHost;
+  }
+
+  private static final int kDLOpenCL = (int) 4L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.kDLOpenCL = 4
+   * }
+   */
+  public static int kDLOpenCL() {
+    return kDLOpenCL;
+  }
+
+  private static final int kDLVulkan = (int) 7L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.kDLVulkan = 7
+   * }
+   */
+  public static int kDLVulkan() {
+    return kDLVulkan;
+  }
+
+  private static final int kDLMetal = (int) 8L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.kDLMetal = 8
+   * }
+   */
+  public static int kDLMetal() {
+    return kDLMetal;
+  }
+
+  private static final int kDLVPI = (int) 9L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.kDLVPI = 9
+   * }
+   */
+  public static int kDLVPI() {
+    return kDLVPI;
+  }
+
+  private static final int kDLROCM = (int) 10L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.kDLROCM = 10
+   * }
+   */
+  public static int kDLROCM() {
+    return kDLROCM;
+  }
+
+  private static final int kDLROCMHost = (int) 11L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.kDLROCMHost = 11
+   * }
+   */
+  public static int kDLROCMHost() {
+    return kDLROCMHost;
+  }
+
+  private static final int kDLExtDev = (int) 12L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.kDLExtDev = 12
+   * }
+   */
+  public static int kDLExtDev() {
+    return kDLExtDev;
+  }
+
+  private static final int kDLCUDAManaged = (int) 13L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.kDLCUDAManaged = 13
+   * }
+   */
+  public static int kDLCUDAManaged() {
+    return kDLCUDAManaged;
+  }
+
+  private static final int kDLOneAPI = (int) 14L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.kDLOneAPI = 14
+   * }
+   */
+  public static int kDLOneAPI() {
+    return kDLOneAPI;
+  }
+
+  private static final int kDLWebGPU = (int) 15L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.kDLWebGPU = 15
+   * }
+   */
+  public static int kDLWebGPU() {
+    return kDLWebGPU;
+  }
+
+  private static final int kDLHexagon = (int) 16L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.kDLHexagon = 16
+   * }
+   */
+  public static int kDLHexagon() {
+    return kDLHexagon;
+  }
+
+  private static final int kDLMAIA = (int) 17L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.kDLMAIA = 17
+   * }
+   */
+  public static int kDLMAIA() {
+    return kDLMAIA;
+  }
+
+  private static final int kDLInt = (int) 0L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.kDLInt = 0
+   * }
+   */
+  public static int kDLInt() {
+    return kDLInt;
+  }
+
+  private static final int kDLUInt = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.kDLUInt = 1
+   * }
+   */
+  public static int kDLUInt() {
+    return kDLUInt;
+  }
+
+  private static final int kDLFloat = (int) 2L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.kDLFloat = 2
+   * }
+   */
+  public static int kDLFloat() {
+    return kDLFloat;
+  }
+
+  private static final int kDLOpaqueHandle = (int) 3L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.kDLOpaqueHandle = 3
+   * }
+   */
+  public static int kDLOpaqueHandle() {
+    return kDLOpaqueHandle;
+  }
+
+  private static final int kDLBfloat = (int) 4L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.kDLBfloat = 4
+   * }
+   */
+  public static int kDLBfloat() {
+    return kDLBfloat;
+  }
+
+  private static final int kDLComplex = (int) 5L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.kDLComplex = 5
+   * }
+   */
+  public static int kDLComplex() {
+    return kDLComplex;
+  }
+
+  private static final int kDLBool = (int) 6L;
+
+  /**
+   * {@snippet lang = c : * enum <anonymous>.kDLBool = 6
+   * }
+   */
+  public static int kDLBool() {
+    return kDLBool;
+  }
+
+  private static final int CUDA_R_16F = (int) 2L;
+
+  /**
+   * {@snippet lang = c : * enum cudaDataType_t.CUDA_R_16F = 2
+   * }
+   */
+  public static int CUDA_R_16F() {
+    return CUDA_R_16F;
+  }
+
+  private static final int CUDA_C_16F = (int) 6L;
+
+  /**
+   * {@snippet lang = c : * enum cudaDataType_t.CUDA_C_16F = 6
+   * }
+   */
+  public static int CUDA_C_16F() {
+    return CUDA_C_16F;
+  }
+
+  private static final int CUDA_R_16BF = (int) 14L;
+
+  /**
+   * {@snippet lang = c : * enum cudaDataType_t.CUDA_R_16BF = 14
+   * }
+   */
+  public static int CUDA_R_16BF() {
+    return CUDA_R_16BF;
+  }
+
+  private static final int CUDA_C_16BF = (int) 15L;
+
+  /**
+   * {@snippet lang = c : * enum cudaDataType_t.CUDA_C_16BF = 15
+   * }
+   */
+  public static int CUDA_C_16BF() {
+    return CUDA_C_16BF;
+  }
+
+  private static final int CUDA_R_32F = (int) 0L;
+
+  /**
+   * {@snippet lang = c : * enum cudaDataType_t.CUDA_R_32F = 0
+   * }
+   */
+  public static int CUDA_R_32F() {
+    return CUDA_R_32F;
+  }
+
+  private static final int CUDA_C_32F = (int) 4L;
+
+  /**
+   * {@snippet lang = c : * enum cudaDataType_t.CUDA_C_32F = 4
+   * }
+   */
+  public static int CUDA_C_32F() {
+    return CUDA_C_32F;
+  }
+
+  private static final int CUDA_R_64F = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * enum cudaDataType_t.CUDA_R_64F = 1
+   * }
+   */
+  public static int CUDA_R_64F() {
+    return CUDA_R_64F;
+  }
+
+  private static final int CUDA_C_64F = (int) 5L;
+
+  /**
+   * {@snippet lang = c : * enum cudaDataType_t.CUDA_C_64F = 5
+   * }
+   */
+  public static int CUDA_C_64F() {
+    return CUDA_C_64F;
+  }
+
+  private static final int CUDA_R_4I = (int) 16L;
+
+  /**
+   * {@snippet lang = c : * enum cudaDataType_t.CUDA_R_4I = 16
+   * }
+   */
+  public static int CUDA_R_4I() {
+    return CUDA_R_4I;
+  }
+
+  private static final int CUDA_C_4I = (int) 17L;
+
+  /**
+   * {@snippet lang = c : * enum cudaDataType_t.CUDA_C_4I = 17
+   * }
+   */
+  public static int CUDA_C_4I() {
+    return CUDA_C_4I;
+  }
+
+  private static final int CUDA_R_4U = (int) 18L;
+
+  /**
+   * {@snippet lang = c : * enum cudaDataType_t.CUDA_R_4U = 18
+   * }
+   */
+  public static int CUDA_R_4U() {
+    return CUDA_R_4U;
+  }
+
+  private static final int CUDA_C_4U = (int) 19L;
+
+  /**
+   * {@snippet lang = c : * enum cudaDataType_t.CUDA_C_4U = 19
+   * }
+   */
+  public static int CUDA_C_4U() {
+    return CUDA_C_4U;
+  }
+
+  private static final int CUDA_R_8I = (int) 3L;
+
+  /**
+   * {@snippet lang = c : * enum cudaDataType_t.CUDA_R_8I = 3
+   * }
+   */
+  public static int CUDA_R_8I() {
+    return CUDA_R_8I;
+  }
+
+  private static final int CUDA_C_8I = (int) 7L;
+
+  /**
+   * {@snippet lang = c : * enum cudaDataType_t.CUDA_C_8I = 7
+   * }
+   */
+  public static int CUDA_C_8I() {
+    return CUDA_C_8I;
+  }
+
+  private static final int CUDA_R_8U = (int) 8L;
+
+  /**
+   * {@snippet lang = c : * enum cudaDataType_t.CUDA_R_8U = 8
+   * }
+   */
+  public static int CUDA_R_8U() {
+    return CUDA_R_8U;
+  }
+
+  private static final int CUDA_C_8U = (int) 9L;
+
+  /**
+   * {@snippet lang = c : * enum cudaDataType_t.CUDA_C_8U = 9
+   * }
+   */
+  public static int CUDA_C_8U() {
+    return CUDA_C_8U;
+  }
+
+  private static final int CUDA_R_16I = (int) 20L;
+
+  /**
+   * {@snippet lang = c : * enum cudaDataType_t.CUDA_R_16I = 20
+   * }
+   */
+  public static int CUDA_R_16I() {
+    return CUDA_R_16I;
+  }
+
+  private static final int CUDA_C_16I = (int) 21L;
+
+  /**
+   * {@snippet lang = c : * enum cudaDataType_t.CUDA_C_16I = 21
+   * }
+   */
+  public static int CUDA_C_16I() {
+    return CUDA_C_16I;
+  }
+
+  private static final int CUDA_R_16U = (int) 22L;
+
+  /**
+   * {@snippet lang = c : * enum cudaDataType_t.CUDA_R_16U = 22
+   * }
+   */
+  public static int CUDA_R_16U() {
+    return CUDA_R_16U;
+  }
+
+  private static final int CUDA_C_16U = (int) 23L;
+
+  /**
+   * {@snippet lang = c : * enum cudaDataType_t.CUDA_C_16U = 23
+   * }
+   */
+  public static int CUDA_C_16U() {
+    return CUDA_C_16U;
+  }
+
+  private static final int CUDA_R_32I = (int) 10L;
+
+  /**
+   * {@snippet lang = c : * enum cudaDataType_t.CUDA_R_32I = 10
+   * }
+   */
+  public static int CUDA_R_32I() {
+    return CUDA_R_32I;
+  }
+
+  private static final int CUDA_C_32I = (int) 11L;
+
+  /**
+   * {@snippet lang = c : * enum cudaDataType_t.CUDA_C_32I = 11
+   * }
+   */
+  public static int CUDA_C_32I() {
+    return CUDA_C_32I;
+  }
+
+  private static final int CUDA_R_32U = (int) 12L;
+
+  /**
+   * {@snippet lang = c : * enum cudaDataType_t.CUDA_R_32U = 12
+   * }
+   */
+  public static int CUDA_R_32U() {
+    return CUDA_R_32U;
+  }
+
+  private static final int CUDA_C_32U = (int) 13L;
+
+  /**
+   * {@snippet lang = c : * enum cudaDataType_t.CUDA_C_32U = 13
+   * }
+   */
+  public static int CUDA_C_32U() {
+    return CUDA_C_32U;
+  }
+
+  private static final int CUDA_R_64I = (int) 24L;
+
+  /**
+   * {@snippet lang = c : * enum cudaDataType_t.CUDA_R_64I = 24
+   * }
+   */
+  public static int CUDA_R_64I() {
+    return CUDA_R_64I;
+  }
+
+  private static final int CUDA_C_64I = (int) 25L;
+
+  /**
+   * {@snippet lang = c : * enum cudaDataType_t.CUDA_C_64I = 25
+   * }
+   */
+  public static int CUDA_C_64I() {
+    return CUDA_C_64I;
+  }
+
+  private static final int CUDA_R_64U = (int) 26L;
+
+  /**
+   * {@snippet lang = c : * enum cudaDataType_t.CUDA_R_64U = 26
+   * }
+   */
+  public static int CUDA_R_64U() {
+    return CUDA_R_64U;
+  }
+
+  private static final int CUDA_C_64U = (int) 27L;
+
+  /**
+   * {@snippet lang = c : * enum cudaDataType_t.CUDA_C_64U = 27
+   * }
+   */
+  public static int CUDA_C_64U() {
+    return CUDA_C_64U;
+  }
+
+  private static final int MAJOR_VERSION = (int) 0L;
+
+  /**
+   * {@snippet lang = c : * enum libraryPropertyType_t.MAJOR_VERSION = 0
+   * }
+   */
+  public static int MAJOR_VERSION() {
+    return MAJOR_VERSION;
+  }
+
+  private static final int MINOR_VERSION = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * enum libraryPropertyType_t.MINOR_VERSION = 1
+   * }
+   */
+  public static int MINOR_VERSION() {
+    return MINOR_VERSION;
+  }
+
+  private static final int PATCH_LEVEL = (int) 2L;
+
+  /**
+   * {@snippet lang = c : * enum libraryPropertyType_t.PATCH_LEVEL = 2
+   * }
+   */
+  public static int PATCH_LEVEL() {
+    return PATCH_LEVEL;
+  }
+
+  private static final int PER_SUBSPACE = (int) 0L;
+
+  /**
+   * {@snippet lang = c : * enum codebook_gen.PER_SUBSPACE = 0
+   * }
+   */
+  public static int PER_SUBSPACE() {
+    return PER_SUBSPACE;
+  }
+
+  private static final int PER_CLUSTER = (int) 1L;
+
+  /**
+   * {@snippet lang = c : * enum codebook_gen.PER_CLUSTER = 1
+   * }
+   */
+  public static int PER_CLUSTER() {
+    return PER_CLUSTER;
+  }
+
+  /**
+   * {@snippet lang = c :
+   * typedef struct cuvsIvfPqIndexParams {
+   *     cuvsDistanceType metric;
+   *     float metric_arg;
+   *     _Bool add_data_on_build;
+   *     uint32_t n_lists;
+   *     uint32_t kmeans_n_iters;
+   *     double kmeans_trainset_fraction;
+   *     uint32_t pq_bits;
+   *     uint32_t pq_dim;
+   *     enum codebook_gen codebook_kind;
+   *     _Bool force_random_rotation;
+   *     _Bool conservative_memory_allocation;
+   *     uint32_t max_train_points_per_pq_code;
+   * } *cuvsIvfPqIndexParams_t
+   * }
+   */
+  public static final AddressLayout cuvsIvfPqIndexParams_t = IvfPqH.C_POINTER;
+
+  private static class cuvsIvfPqIndexParamsCreate {
+    public static final FunctionDescriptor DESC = FunctionDescriptor.of(IvfPqH.C_INT, IvfPqH.C_POINTER);
+
+    public static final MemorySegment ADDR = IvfPqH.findOrThrow("cuvsIvfPqIndexParamsCreate");
+
+    public static final MethodHandle HANDLE = Linker.nativeLinker().downcallHandle(ADDR, DESC);
+  }
+
+  /**
+   * Function descriptor for:
+   * {@snippet lang = c
+   * : * cuvsError_t cuvsIvfPqIndexParamsCreate(cuvsIvfPqIndexParams_t *index_params)
+   * }
+   */
+  public static FunctionDescriptor cuvsIvfPqIndexParamsCreate$descriptor() {
+    return cuvsIvfPqIndexParamsCreate.DESC;
+  }
+
+  /**
+   * Downcall method handle for:
+   * {@snippet lang = c
+   * : * cuvsError_t cuvsIvfPqIndexParamsCreate(cuvsIvfPqIndexParams_t *index_params)
+   * }
+   */
+  public static MethodHandle cuvsIvfPqIndexParamsCreate$handle() {
+    return cuvsIvfPqIndexParamsCreate.HANDLE;
+  }
+
+  /**
+   * Address for:
+   * {@snippet lang = c
+   * : * cuvsError_t cuvsIvfPqIndexParamsCreate(cuvsIvfPqIndexParams_t *index_params)
+   * }
+   */
+  public static MemorySegment cuvsIvfPqIndexParamsCreate$address() {
+    return cuvsIvfPqIndexParamsCreate.ADDR;
+  }
+
+  /**
+   * {@snippet lang = c
+   * : * cuvsError_t cuvsIvfPqIndexParamsCreate(cuvsIvfPqIndexParams_t *index_params)
+   * }
+   */
+  public static int cuvsIvfPqIndexParamsCreate(MemorySegment index_params) {
+    var mh$ = cuvsIvfPqIndexParamsCreate.HANDLE;
+    try {
+      if (TRACE_DOWNCALLS) {
+        traceDowncall("cuvsIvfPqIndexParamsCreate", index_params);
+      }
+      return (int) mh$.invokeExact(index_params);
+    } catch (Throwable ex$) {
+      throw new AssertionError("should not reach here", ex$);
+    }
+  }
+
+  private static class cuvsIvfPqIndexParamsDestroy {
+    public static final FunctionDescriptor DESC = FunctionDescriptor.of(IvfPqH.C_INT, IvfPqH.C_POINTER);
+
+    public static final MemorySegment ADDR = IvfPqH.findOrThrow("cuvsIvfPqIndexParamsDestroy");
+
+    public static final MethodHandle HANDLE = Linker.nativeLinker().downcallHandle(ADDR, DESC);
+  }
+
+  /**
+   * Function descriptor for:
+   * {@snippet lang = c
+   * : * cuvsError_t cuvsIvfPqIndexParamsDestroy(cuvsIvfPqIndexParams_t index_params)
+   * }
+   */
+  public static FunctionDescriptor cuvsIvfPqIndexParamsDestroy$descriptor() {
+    return cuvsIvfPqIndexParamsDestroy.DESC;
+  }
+
+  /**
+   * Downcall method handle for:
+   * {@snippet lang = c
+   * : * cuvsError_t cuvsIvfPqIndexParamsDestroy(cuvsIvfPqIndexParams_t index_params)
+   * }
+   */
+  public static MethodHandle cuvsIvfPqIndexParamsDestroy$handle() {
+    return cuvsIvfPqIndexParamsDestroy.HANDLE;
+  }
+
+  /**
+   * Address for:
+   * {@snippet lang = c
+   * : * cuvsError_t cuvsIvfPqIndexParamsDestroy(cuvsIvfPqIndexParams_t index_params)
+   * }
+   */
+  public static MemorySegment cuvsIvfPqIndexParamsDestroy$address() {
+    return cuvsIvfPqIndexParamsDestroy.ADDR;
+  }
+
+  /**
+   * {@snippet lang = c
+   * : * cuvsError_t cuvsIvfPqIndexParamsDestroy(cuvsIvfPqIndexParams_t index_params)
+   * }
+   */
+  public static int cuvsIvfPqIndexParamsDestroy(MemorySegment index_params) {
+    var mh$ = cuvsIvfPqIndexParamsDestroy.HANDLE;
+    try {
+      if (TRACE_DOWNCALLS) {
+        traceDowncall("cuvsIvfPqIndexParamsDestroy", index_params);
+      }
+      return (int) mh$.invokeExact(index_params);
+    } catch (Throwable ex$) {
+      throw new AssertionError("should not reach here", ex$);
+    }
+  }
+
+  /**
+   * {@snippet lang = c :
+   * typedef struct cuvsIvfPqSearchParams {
+   *     uint32_t n_probes;
+   *     cudaDataType_t lut_dtype;
+   *     cudaDataType_t internal_distance_dtype;
+   *     double preferred_shmem_carveout;
+   * } *cuvsIvfPqSearchParams_t
+   * }
+   */
+  public static final AddressLayout cuvsIvfPqSearchParams_t = IvfPqH.C_POINTER;
+
+  private static class cuvsIvfPqSearchParamsCreate {
+    public static final FunctionDescriptor DESC = FunctionDescriptor.of(IvfPqH.C_INT, IvfPqH.C_POINTER);
+
+    public static final MemorySegment ADDR = IvfPqH.findOrThrow("cuvsIvfPqSearchParamsCreate");
+
+    public static final MethodHandle HANDLE = Linker.nativeLinker().downcallHandle(ADDR, DESC);
+  }
+
+  /**
+   * Function descriptor for:
+   * {@snippet lang = c
+   * : * cuvsError_t cuvsIvfPqSearchParamsCreate(cuvsIvfPqSearchParams_t *params)
+   * }
+   */
+  public static FunctionDescriptor cuvsIvfPqSearchParamsCreate$descriptor() {
+    return cuvsIvfPqSearchParamsCreate.DESC;
+  }
+
+  /**
+   * Downcall method handle for:
+   * {@snippet lang = c
+   * : * cuvsError_t cuvsIvfPqSearchParamsCreate(cuvsIvfPqSearchParams_t *params)
+   * }
+   */
+  public static MethodHandle cuvsIvfPqSearchParamsCreate$handle() {
+    return cuvsIvfPqSearchParamsCreate.HANDLE;
+  }
+
+  /**
+   * Address for:
+   * {@snippet lang = c
+   * : * cuvsError_t cuvsIvfPqSearchParamsCreate(cuvsIvfPqSearchParams_t *params)
+   * }
+   */
+  public static MemorySegment cuvsIvfPqSearchParamsCreate$address() {
+    return cuvsIvfPqSearchParamsCreate.ADDR;
+  }
+
+  /**
+   * {@snippet lang = c
+   * : * cuvsError_t cuvsIvfPqSearchParamsCreate(cuvsIvfPqSearchParams_t *params)
+   * }
+   */
+  public static int cuvsIvfPqSearchParamsCreate(MemorySegment params) {
+    var mh$ = cuvsIvfPqSearchParamsCreate.HANDLE;
+    try {
+      if (TRACE_DOWNCALLS) {
+        traceDowncall("cuvsIvfPqSearchParamsCreate", params);
+      }
+      return (int) mh$.invokeExact(params);
+    } catch (Throwable ex$) {
+      throw new AssertionError("should not reach here", ex$);
+    }
+  }
+
+  private static class cuvsIvfPqSearchParamsDestroy {
+    public static final FunctionDescriptor DESC = FunctionDescriptor.of(IvfPqH.C_INT, IvfPqH.C_POINTER);
+
+    public static final MemorySegment ADDR = IvfPqH.findOrThrow("cuvsIvfPqSearchParamsDestroy");
+
+    public static final MethodHandle HANDLE = Linker.nativeLinker().downcallHandle(ADDR, DESC);
+  }
+
+  /**
+   * Function descriptor for:
+   * {@snippet lang = c
+   * : * cuvsError_t cuvsIvfPqSearchParamsDestroy(cuvsIvfPqSearchParams_t params)
+   * }
+   */
+  public static FunctionDescriptor cuvsIvfPqSearchParamsDestroy$descriptor() {
+    return cuvsIvfPqSearchParamsDestroy.DESC;
+  }
+
+  /**
+   * Downcall method handle for:
+   * {@snippet lang = c
+   * : * cuvsError_t cuvsIvfPqSearchParamsDestroy(cuvsIvfPqSearchParams_t params)
+   * }
+   */
+  public static MethodHandle cuvsIvfPqSearchParamsDestroy$handle() {
+    return cuvsIvfPqSearchParamsDestroy.HANDLE;
+  }
+
+  /**
+   * Address for:
+   * {@snippet lang = c
+   * : * cuvsError_t cuvsIvfPqSearchParamsDestroy(cuvsIvfPqSearchParams_t params)
+   * }
+   */
+  public static MemorySegment cuvsIvfPqSearchParamsDestroy$address() {
+    return cuvsIvfPqSearchParamsDestroy.ADDR;
+  }
+
+  /**
+   * {@snippet lang = c
+   * : * cuvsError_t cuvsIvfPqSearchParamsDestroy(cuvsIvfPqSearchParams_t params)
+   * }
+   */
+  public static int cuvsIvfPqSearchParamsDestroy(MemorySegment params) {
+    var mh$ = cuvsIvfPqSearchParamsDestroy.HANDLE;
+    try {
+      if (TRACE_DOWNCALLS) {
+        traceDowncall("cuvsIvfPqSearchParamsDestroy", params);
+      }
+      return (int) mh$.invokeExact(params);
+    } catch (Throwable ex$) {
+      throw new AssertionError("should not reach here", ex$);
+    }
+  }
+
+  /**
+   * {@snippet lang = c : * typedef cuvsIvfPq *cuvsIvfPqIndex_t
+   * }
+   */
+  public static final AddressLayout cuvsIvfPqIndex_t = IvfPqH.C_POINTER;
+
+  private static class cuvsIvfPqIndexCreate {
+    public static final FunctionDescriptor DESC = FunctionDescriptor.of(IvfPqH.C_INT, IvfPqH.C_POINTER);
+
+    public static final MemorySegment ADDR = IvfPqH.findOrThrow("cuvsIvfPqIndexCreate");
+
+    public static final MethodHandle HANDLE = Linker.nativeLinker().downcallHandle(ADDR, DESC);
+  }
+
+  /**
+   * Function descriptor for:
+   * {@snippet lang = c
+   * : * cuvsError_t cuvsIvfPqIndexCreate(cuvsIvfPqIndex_t *index)
+   * }
+   */
+  public static FunctionDescriptor cuvsIvfPqIndexCreate$descriptor() {
+    return cuvsIvfPqIndexCreate.DESC;
+  }
+
+  /**
+   * Downcall method handle for:
+   * {@snippet lang = c
+   * : * cuvsError_t cuvsIvfPqIndexCreate(cuvsIvfPqIndex_t *index)
+   * }
+   */
+  public static MethodHandle cuvsIvfPqIndexCreate$handle() {
+    return cuvsIvfPqIndexCreate.HANDLE;
+  }
+
+  /**
+   * Address for:
+   * {@snippet lang = c
+   * : * cuvsError_t cuvsIvfPqIndexCreate(cuvsIvfPqIndex_t *index)
+   * }
+   */
+  public static MemorySegment cuvsIvfPqIndexCreate$address() {
+    return cuvsIvfPqIndexCreate.ADDR;
+  }
+
+  /**
+   * {@snippet lang = c
+   * : * cuvsError_t cuvsIvfPqIndexCreate(cuvsIvfPqIndex_t *index)
+   * }
+   */
+  public static int cuvsIvfPqIndexCreate(MemorySegment index) {
+    var mh$ = cuvsIvfPqIndexCreate.HANDLE;
+    try {
+      if (TRACE_DOWNCALLS) {
+        traceDowncall("cuvsIvfPqIndexCreate", index);
+      }
+      return (int) mh$.invokeExact(index);
+    } catch (Throwable ex$) {
+      throw new AssertionError("should not reach here", ex$);
+    }
+  }
+
+  private static class cuvsIvfPqIndexDestroy {
+    public static final FunctionDescriptor DESC = FunctionDescriptor.of(IvfPqH.C_INT, IvfPqH.C_POINTER);
+
+    public static final MemorySegment ADDR = IvfPqH.findOrThrow("cuvsIvfPqIndexDestroy");
+
+    public static final MethodHandle HANDLE = Linker.nativeLinker().downcallHandle(ADDR, DESC);
+  }
+
+  /**
+   * Function descriptor for:
+   * {@snippet lang = c
+   * : * cuvsError_t cuvsIvfPqIndexDestroy(cuvsIvfPqIndex_t index)
+   * }
+   */
+  public static FunctionDescriptor cuvsIvfPqIndexDestroy$descriptor() {
+    return cuvsIvfPqIndexDestroy.DESC;
+  }
+
+  /**
+   * Downcall method handle for:
+   * {@snippet lang = c
+   * : * cuvsError_t cuvsIvfPqIndexDestroy(cuvsIvfPqIndex_t index)
+   * }
+   */
+  public static MethodHandle cuvsIvfPqIndexDestroy$handle() {
+    return cuvsIvfPqIndexDestroy.HANDLE;
+  }
+
+  /**
+   * Address for:
+   * {@snippet lang = c
+   * : * cuvsError_t cuvsIvfPqIndexDestroy(cuvsIvfPqIndex_t index)
+   * }
+   */
+  public static MemorySegment cuvsIvfPqIndexDestroy$address() {
+    return cuvsIvfPqIndexDestroy.ADDR;
+  }
+
+  /**
+   * {@snippet lang = c
+   * : * cuvsError_t cuvsIvfPqIndexDestroy(cuvsIvfPqIndex_t index)
+   * }
+   */
+  public static int cuvsIvfPqIndexDestroy(MemorySegment index) {
+    var mh$ = cuvsIvfPqIndexDestroy.HANDLE;
+    try {
+      if (TRACE_DOWNCALLS) {
+        traceDowncall("cuvsIvfPqIndexDestroy", index);
+      }
+      return (int) mh$.invokeExact(index);
+    } catch (Throwable ex$) {
+      throw new AssertionError("should not reach here", ex$);
+    }
+  }
+
+  private static class cuvsIvfPqBuild {
+    public static final FunctionDescriptor DESC = FunctionDescriptor.of(IvfPqH.C_INT, IvfPqH.C_LONG,
+        IvfPqH.C_POINTER, IvfPqH.C_POINTER, IvfPqH.C_POINTER);
+
+    public static final MemorySegment ADDR = IvfPqH.findOrThrow("cuvsIvfPqBuild");
+
+    public static final MethodHandle HANDLE = Linker.nativeLinker().downcallHandle(ADDR, DESC);
+  }
+
+  /**
+   * Function descriptor for:
+   * {@snippet lang = c
+   * : * cuvsError_t cuvsIvfPqBuild(cuvsResources_t res, cuvsIvfPqIndexParams_t params, DLManagedTensor *dataset, cuvsIvfPqIndex_t index)
+   * }
+   */
+  public static FunctionDescriptor cuvsIvfPqBuild$descriptor() {
+    return cuvsIvfPqBuild.DESC;
+  }
+
+  /**
+   * Downcall method handle for:
+   * {@snippet lang = c
+   * : * cuvsError_t cuvsIvfPqBuild(cuvsResources_t res, cuvsIvfPqIndexParams_t params, DLManagedTensor *dataset, cuvsIvfPqIndex_t index)
+   * }
+   */
+  public static MethodHandle cuvsIvfPqBuild$handle() {
+    return cuvsIvfPqBuild.HANDLE;
+  }
+
+  /**
+   * Address for:
+   * {@snippet lang = c
+   * : * cuvsError_t cuvsIvfPqBuild(cuvsResources_t res, cuvsIvfPqIndexParams_t params, DLManagedTensor *dataset, cuvsIvfPqIndex_t index)
+   * }
+   */
+  public static MemorySegment cuvsIvfPqBuild$address() {
+    return cuvsIvfPqBuild.ADDR;
+  }
+
+  /**
+   * {@snippet lang = c
+   * : * cuvsError_t cuvsIvfPqBuild(cuvsResources_t res, cuvsIvfPqIndexParams_t params, DLManagedTensor *dataset, cuvsIvfPqIndex_t index)
+   * }
+   */
+  public static int cuvsIvfPqBuild(long res, MemorySegment params, MemorySegment dataset, MemorySegment index) {
+    var mh$ = cuvsIvfPqBuild.HANDLE;
+    try {
+      if (TRACE_DOWNCALLS) {
+        traceDowncall("cuvsIvfPqBuild", res, params, dataset, index);
+      }
+      return (int) mh$.invokeExact(res, params, dataset, index);
+    } catch (Throwable ex$) {
+      throw new AssertionError("should not reach here", ex$);
+    }
+  }
+
+  private static class cuvsIvfPqSearch {
+    public static final FunctionDescriptor DESC = FunctionDescriptor.of(IvfPqH.C_INT, IvfPqH.C_LONG,
+        IvfPqH.C_POINTER, IvfPqH.C_POINTER, IvfPqH.C_POINTER, IvfPqH.C_POINTER, IvfPqH.C_POINTER);
+
+    public static final MemorySegment ADDR = IvfPqH.findOrThrow("cuvsIvfPqSearch");
+
+    public static final MethodHandle HANDLE = Linker.nativeLinker().downcallHandle(ADDR, DESC);
+  }
+
+  /**
+   * Function descriptor for:
+   * {@snippet lang = c
+   * : * cuvsError_t cuvsIvfPqSearch(cuvsResources_t res, cuvsIvfPqSearchParams_t search_params, cuvsIvfPqIndex_t index, DLManagedTensor *queries, DLManagedTensor *neighbors, DLManagedTensor *distances)
+   * }
+   */
+  public static FunctionDescriptor cuvsIvfPqSearch$descriptor() {
+    return cuvsIvfPqSearch.DESC;
+  }
+
+  /**
+   * Downcall method handle for:
+   * {@snippet lang = c
+   * : * cuvsError_t cuvsIvfPqSearch(cuvsResources_t res, cuvsIvfPqSearchParams_t search_params, cuvsIvfPqIndex_t index, DLManagedTensor *queries, DLManagedTensor *neighbors, DLManagedTensor *distances)
+   * }
+   */
+  public static MethodHandle cuvsIvfPqSearch$handle() {
+    return cuvsIvfPqSearch.HANDLE;
+  }
+
+  /**
+   * Address for:
+   * {@snippet lang = c
+   * : * cuvsError_t cuvsIvfPqSearch(cuvsResources_t res, cuvsIvfPqSearchParams_t search_params, cuvsIvfPqIndex_t index, DLManagedTensor *queries, DLManagedTensor *neighbors, DLManagedTensor *distances)
+   * }
+   */
+  public static MemorySegment cuvsIvfPqSearch$address() {
+    return cuvsIvfPqSearch.ADDR;
+  }
+
+  /**
+   * {@snippet lang = c
+   * : * cuvsError_t cuvsIvfPqSearch(cuvsResources_t res, cuvsIvfPqSearchParams_t search_params, cuvsIvfPqIndex_t index, DLManagedTensor *queries, DLManagedTensor *neighbors, DLManagedTensor *distances)
+   * }
+   */
+  public static int cuvsIvfPqSearch(long res, MemorySegment search_params, MemorySegment index, MemorySegment queries,
+      MemorySegment neighbors, MemorySegment distances) {
+    var mh$ = cuvsIvfPqSearch.HANDLE;
+    try {
+      if (TRACE_DOWNCALLS) {
+        traceDowncall("cuvsIvfPqSearch", res, search_params, index, queries, neighbors, distances);
+      }
+      return (int) mh$.invokeExact(res, search_params, index, queries, neighbors, distances);
+    } catch (Throwable ex$) {
+      throw new AssertionError("should not reach here", ex$);
+    }
+  }
+
+  private static class cuvsIvfPqSerialize {
+    public static final FunctionDescriptor DESC = FunctionDescriptor.of(IvfPqH.C_INT, IvfPqH.C_LONG,
+        IvfPqH.C_POINTER, IvfPqH.C_POINTER);
+
+    public static final MemorySegment ADDR = IvfPqH.findOrThrow("cuvsIvfPqSerialize");
+
+    public static final MethodHandle HANDLE = Linker.nativeLinker().downcallHandle(ADDR, DESC);
+  }
+
+  /**
+   * Function descriptor for:
+   * {@snippet lang = c
+   * : * cuvsError_t cuvsIvfPqSerialize(cuvsResources_t res, const char *filename, cuvsIvfPqIndex_t index)
+   * }
+   */
+  public static FunctionDescriptor cuvsIvfPqSerialize$descriptor() {
+    return cuvsIvfPqSerialize.DESC;
+  }
+
+  /**
+   * Downcall method handle for:
+   * {@snippet lang = c
+   * : * cuvsError_t cuvsIvfPqSerialize(cuvsResources_t res, const char *filename, cuvsIvfPqIndex_t index)
+   * }
+   */
+  public static MethodHandle cuvsIvfPqSerialize$handle() {
+    return cuvsIvfPqSerialize.HANDLE;
+  }
+
+  /**
+   * Address for:
+   * {@snippet lang = c
+   * : * cuvsError_t cuvsIvfPqSerialize(cuvsResources_t res, const char *filename, cuvsIvfPqIndex_t index)
+   * }
+   */
+  public static MemorySegment cuvsIvfPqSerialize$address() {
+    return cuvsIvfPqSerialize.ADDR;
+  }
+
+  /**
+   * {@snippet lang = c
+   * : * cuvsError_t cuvsIvfPqSerialize(cuvsResources_t res, const char *filename, cuvsIvfPqIndex_t index)
+   * }
+   */
+  public static int cuvsIvfPqSerialize(long res, MemorySegment filename, MemorySegment index) {
+    var mh$ = cuvsIvfPqSerialize.HANDLE;
+    try {
+      if (TRACE_DOWNCALLS) {
+        traceDowncall("cuvsIvfPqSerialize", res, filename, index);
+      }
+      return (int) mh$.invokeExact(res, filename, index);
+    } catch (Throwable ex$) {
+      throw new AssertionError("should not reach here", ex$);
+    }
+  }
+
+  private static class cuvsIvfPqDeserialize {
+    public static final FunctionDescriptor DESC = FunctionDescriptor.of(IvfPqH.C_INT, IvfPqH.C_LONG,
+        IvfPqH.C_POINTER, IvfPqH.C_POINTER);
+
+    public static final MemorySegment ADDR = IvfPqH.findOrThrow("cuvsIvfPqDeserialize");
+
+    public static final MethodHandle HANDLE = Linker.nativeLinker().downcallHandle(ADDR, DESC);
+  }
+
+  /**
+   * Function descriptor for:
+   * {@snippet lang = c
+   * : * cuvsError_t cuvsIvfPqDeserialize(cuvsResources_t res, const char *filename, cuvsIvfPqIndex_t index)
+   * }
+   */
+  public static FunctionDescriptor cuvsIvfPqDeserialize$descriptor() {
+    return cuvsIvfPqDeserialize.DESC;
+  }
+
+  /**
+   * Downcall method handle for:
+   * {@snippet lang = c
+   * : * cuvsError_t cuvsIvfPqDeserialize(cuvsResources_t res, const char *filename, cuvsIvfPqIndex_t index)
+   * }
+   */
+  public static MethodHandle cuvsIvfPqDeserialize$handle() {
+    return cuvsIvfPqDeserialize.HANDLE;
+  }
+
+  /**
+   * Address for:
+   * {@snippet lang = c
+   * : * cuvsError_t cuvsIvfPqDeserialize(cuvsResources_t res, const char *filename, cuvsIvfPqIndex_t index)
+   * }
+   */
+  public static MemorySegment cuvsIvfPqDeserialize$address() {
+    return cuvsIvfPqDeserialize.ADDR;
+  }
+
+  /**
+   * {@snippet lang = c
+   * : * cuvsError_t cuvsIvfPqDeserialize(cuvsResources_t res, const char *filename, cuvsIvfPqIndex_t index)
+   * }
+   */
+  public static int cuvsIvfPqDeserialize(long res, MemorySegment filename, MemorySegment index) {
+    var mh$ = cuvsIvfPqDeserialize.HANDLE;
+    try {
+      if (TRACE_DOWNCALLS) {
+        traceDowncall("cuvsIvfPqDeserialize", res, filename, index);
+      }
+      return (int) mh$.invokeExact(res, filename, index);
+    } catch (Throwable ex$) {
+      throw new AssertionError("should not reach here", ex$);
+    }
+  }
+
+  private static class cuvsIvfPqExtend {
+    public static final FunctionDescriptor DESC = FunctionDescriptor.of(IvfPqH.C_INT, IvfPqH.C_LONG,
+        IvfPqH.C_POINTER, IvfPqH.C_POINTER, IvfPqH.C_POINTER);
+
+    public static final MemorySegment ADDR = IvfPqH.findOrThrow("cuvsIvfPqExtend");
+
+    public static final MethodHandle HANDLE = Linker.nativeLinker().downcallHandle(ADDR, DESC);
+  }
+
+  /**
+   * Function descriptor for:
+   * {@snippet lang = c
+   * : * cuvsError_t cuvsIvfPqExtend(cuvsResources_t res, DLManagedTensor *new_vectors, DLManagedTensor *new_indices, cuvsIvfPqIndex_t index)
+   * }
+   */
+  public static FunctionDescriptor cuvsIvfPqExtend$descriptor() {
+    return cuvsIvfPqExtend.DESC;
+  }
+
+  /**
+   * Downcall method handle for:
+   * {@snippet lang = c
+   * : * cuvsError_t cuvsIvfPqExtend(cuvsResources_t res, DLManagedTensor *new_vectors, DLManagedTensor *new_indices, cuvsIvfPqIndex_t index)
+   * }
+   */
+  public static MethodHandle cuvsIvfPqExtend$handle() {
+    return cuvsIvfPqExtend.HANDLE;
+  }
+
+  /**
+   * Address for:
+   * {@snippet lang = c
+   * : * cuvsError_t cuvsIvfPqExtend(cuvsResources_t res, DLManagedTensor *new_vectors, DLManagedTensor *new_indices, cuvsIvfPqIndex_t index)
+   * }
+   */
+  public static MemorySegment cuvsIvfPqExtend$address() {
+    return cuvsIvfPqExtend.ADDR;
+  }
+
+  /**
+   * {@snippet lang = c
+   * : * cuvsError_t cuvsIvfPqExtend(cuvsResources_t res, DLManagedTensor *new_vectors, DLManagedTensor *new_indices, cuvsIvfPqIndex_t index)
+   * }
+   */
+  public static int cuvsIvfPqExtend(long res, MemorySegment new_vectors, MemorySegment new_indices,
+      MemorySegment index) {
+    var mh$ = cuvsIvfPqExtend.HANDLE;
+    try {
+      if (TRACE_DOWNCALLS) {
+        traceDowncall("cuvsIvfPqExtend", res, new_vectors, new_indices, index);
+      }
+      return (int) mh$.invokeExact(res, new_vectors, new_indices, index);
+    } catch (Throwable ex$) {
+      throw new AssertionError("should not reach here", ex$);
+    }
+  }
+
+  private static final long _POSIX_C_SOURCE = 200809L;
+
+  /**
+   * {@snippet lang = c : * #define _POSIX_C_SOURCE 200809
+   * }
+   */
+  public static long _POSIX_C_SOURCE() {
+    return _POSIX_C_SOURCE;
+  }
+
+  private static final int __TIMESIZE = (int) 64L;
+
+  /**
+   * {@snippet lang = c : * #define __TIMESIZE 64
+   * }
+   */
+  public static int __TIMESIZE() {
+    return __TIMESIZE;
+  }
+
+  private static final long __STDC_IEC_60559_BFP__ = 201404L;
+
+  /**
+   * {@snippet lang = c : * #define __STDC_IEC_60559_BFP__ 201404
+   * }
+   */
+  public static long __STDC_IEC_60559_BFP__() {
+    return __STDC_IEC_60559_BFP__;
+  }
+
+  private static final long __STDC_IEC_60559_COMPLEX__ = 201404L;
+
+  /**
+   * {@snippet lang = c : * #define __STDC_IEC_60559_COMPLEX__ 201404
+   * }
+   */
+  public static long __STDC_IEC_60559_COMPLEX__() {
+    return __STDC_IEC_60559_COMPLEX__;
+  }
+
+  private static final long __STDC_ISO_10646__ = 201706L;
+
+  /**
+   * {@snippet lang = c : * #define __STDC_ISO_10646__ 201706
+   * }
+   */
+  public static long __STDC_ISO_10646__() {
+    return __STDC_ISO_10646__;
+  }
+
+  private static final int __WCHAR_MAX = (int) 2147483647L;
+
+  /**
+   * {@snippet lang = c : * #define __WCHAR_MAX 2147483647
+   * }
+   */
+  public static int __WCHAR_MAX() {
+    return __WCHAR_MAX;
+  }
+
+  private static final int __WCHAR_MIN = (int) -2147483648L;
+
+  /**
+   * {@snippet lang = c : * #define __WCHAR_MIN -2147483648
+   * }
+   */
+  public static int __WCHAR_MIN() {
+    return __WCHAR_MIN;
+  }
+
+  private static final int INT8_MIN = (int) -128L;
+
+  /**
+   * {@snippet lang = c : * #define INT8_MIN -128
+   * }
+   */
+  public static int INT8_MIN() {
+    return INT8_MIN;
+  }
+
+  private static final int INT16_MIN = (int) -32768L;
+
+  /**
+   * {@snippet lang = c : * #define INT16_MIN -32768
+   * }
+   */
+  public static int INT16_MIN() {
+    return INT16_MIN;
+  }
+
+  private static final int INT32_MIN = (int) -2147483648L;
+
+  /**
+   * {@snippet lang = c : * #define INT32_MIN -2147483648
+   * }
+   */
+  public static int INT32_MIN() {
+    return INT32_MIN;
+  }
+
+  private static final long INT64_MIN = -9223372036854775808L;
+
+  /**
+   * {@snippet lang = c : * #define INT64_MIN -9223372036854775808
+   * }
+   */
+  public static long INT64_MIN() {
+    return INT64_MIN;
+  }
+
+  private static final int INT8_MAX = (int) 127L;
+
+  /**
+   * {@snippet lang = c : * #define INT8_MAX 127
+   * }
+   */
+  public static int INT8_MAX() {
+    return INT8_MAX;
+  }
+
+  private static final int INT16_MAX = (int) 32767L;
+
+  /**
+   * {@snippet lang = c : * #define INT16_MAX 32767
+   * }
+   */
+  public static int INT16_MAX() {
+    return INT16_MAX;
+  }
+
+  private static final int INT32_MAX = (int) 2147483647L;
+
+  /**
+   * {@snippet lang = c : * #define INT32_MAX 2147483647
+   * }
+   */
+  public static int INT32_MAX() {
+    return INT32_MAX;
+  }
+
+  private static final long INT64_MAX = 9223372036854775807L;
+
+  /**
+   * {@snippet lang = c : * #define INT64_MAX 9223372036854775807
+   * }
+   */
+  public static long INT64_MAX() {
+    return INT64_MAX;
+  }
+
+  private static final int UINT8_MAX = (int) 255L;
+
+  /**
+   * {@snippet lang = c : * #define UINT8_MAX 255
+   * }
+   */
+  public static int UINT8_MAX() {
+    return UINT8_MAX;
+  }
+
+  private static final int UINT16_MAX = (int) 65535L;
+
+  /**
+   * {@snippet lang = c : * #define UINT16_MAX 65535
+   * }
+   */
+  public static int UINT16_MAX() {
+    return UINT16_MAX;
+  }
+
+  private static final int UINT32_MAX = (int) 4294967295L;
+
+  /**
+   * {@snippet lang = c : * #define UINT32_MAX 4294967295
+   * }
+   */
+  public static int UINT32_MAX() {
+    return UINT32_MAX;
+  }
+
+  private static final long UINT64_MAX = -1L;
+
+  /**
+   * {@snippet lang = c : * #define UINT64_MAX -1
+   * }
+   */
+  public static long UINT64_MAX() {
+    return UINT64_MAX;
+  }
+
+  private static final int INT_LEAST8_MIN = (int) -128L;
+
+  /**
+   * {@snippet lang = c : * #define INT_LEAST8_MIN -128
+   * }
+   */
+  public static int INT_LEAST8_MIN() {
+    return INT_LEAST8_MIN;
+  }
+
+  private static final int INT_LEAST16_MIN = (int) -32768L;
+
+  /**
+   * {@snippet lang = c : * #define INT_LEAST16_MIN -32768
+   * }
+   */
+  public static int INT_LEAST16_MIN() {
+    return INT_LEAST16_MIN;
+  }
+
+  private static final int INT_LEAST32_MIN = (int) -2147483648L;
+
+  /**
+   * {@snippet lang = c : * #define INT_LEAST32_MIN -2147483648
+   * }
+   */
+  public static int INT_LEAST32_MIN() {
+    return INT_LEAST32_MIN;
+  }
+
+  private static final long INT_LEAST64_MIN = -9223372036854775808L;
+
+  /**
+   * {@snippet lang = c : * #define INT_LEAST64_MIN -9223372036854775808
+   * }
+   */
+  public static long INT_LEAST64_MIN() {
+    return INT_LEAST64_MIN;
+  }
+
+  private static final int INT_LEAST8_MAX = (int) 127L;
+
+  /**
+   * {@snippet lang = c : * #define INT_LEAST8_MAX 127
+   * }
+   */
+  public static int INT_LEAST8_MAX() {
+    return INT_LEAST8_MAX;
+  }
+
+  private static final int INT_LEAST16_MAX = (int) 32767L;
+
+  /**
+   * {@snippet lang = c : * #define INT_LEAST16_MAX 32767
+   * }
+   */
+  public static int INT_LEAST16_MAX() {
+    return INT_LEAST16_MAX;
+  }
+
+  private static final int INT_LEAST32_MAX = (int) 2147483647L;
+
+  /**
+   * {@snippet lang = c : * #define INT_LEAST32_MAX 2147483647
+   * }
+   */
+  public static int INT_LEAST32_MAX() {
+    return INT_LEAST32_MAX;
+  }
+
+  private static final long INT_LEAST64_MAX = 9223372036854775807L;
+
+  /**
+   * {@snippet lang = c : * #define INT_LEAST64_MAX 9223372036854775807
+   * }
+   */
+  public static long INT_LEAST64_MAX() {
+    return INT_LEAST64_MAX;
+  }
+
+  private static final int UINT_LEAST8_MAX = (int) 255L;
+
+  /**
+   * {@snippet lang = c : * #define UINT_LEAST8_MAX 255
+   * }
+   */
+  public static int UINT_LEAST8_MAX() {
+    return UINT_LEAST8_MAX;
+  }
+
+  private static final int UINT_LEAST16_MAX = (int) 65535L;
+
+  /**
+   * {@snippet lang = c : * #define UINT_LEAST16_MAX 65535
+   * }
+   */
+  public static int UINT_LEAST16_MAX() {
+    return UINT_LEAST16_MAX;
+  }
+
+  private static final int UINT_LEAST32_MAX = (int) 4294967295L;
+
+  /**
+   * {@snippet lang = c : * #define UINT_LEAST32_MAX 4294967295
+   * }
+   */
+  public static int UINT_LEAST32_MAX() {
+    return UINT_LEAST32_MAX;
+  }
+
+  private static final long UINT_LEAST64_MAX = -1L;
+
+  /**
+   * {@snippet lang = c : * #define UINT_LEAST64_MAX -1
+   * }
+   */
+  public static long UINT_LEAST64_MAX() {
+    return UINT_LEAST64_MAX;
+  }
+
+  private static final int INT_FAST8_MIN = (int) -128L;
+
+  /**
+   * {@snippet lang = c : * #define INT_FAST8_MIN -128
+   * }
+   */
+  public static int INT_FAST8_MIN() {
+    return INT_FAST8_MIN;
+  }
+
+  private static final long INT_FAST16_MIN = -9223372036854775808L;
+
+  /**
+   * {@snippet lang = c : * #define INT_FAST16_MIN -9223372036854775808
+   * }
+   */
+  public static long INT_FAST16_MIN() {
+    return INT_FAST16_MIN;
+  }
+
+  private static final long INT_FAST32_MIN = -9223372036854775808L;
+
+  /**
+   * {@snippet lang = c : * #define INT_FAST32_MIN -9223372036854775808
+   * }
+   */
+  public static long INT_FAST32_MIN() {
+    return INT_FAST32_MIN;
+  }
+
+  private static final long INT_FAST64_MIN = -9223372036854775808L;
+
+  /**
+   * {@snippet lang = c : * #define INT_FAST64_MIN -9223372036854775808
+   * }
+   */
+  public static long INT_FAST64_MIN() {
+    return INT_FAST64_MIN;
+  }
+
+  private static final int INT_FAST8_MAX = (int) 127L;
+
+  /**
+   * {@snippet lang = c : * #define INT_FAST8_MAX 127
+   * }
+   */
+  public static int INT_FAST8_MAX() {
+    return INT_FAST8_MAX;
+  }
+
+  private static final long INT_FAST16_MAX = 9223372036854775807L;
+
+  /**
+   * {@snippet lang = c : * #define INT_FAST16_MAX 9223372036854775807
+   * }
+   */
+  public static long INT_FAST16_MAX() {
+    return INT_FAST16_MAX;
+  }
+
+  private static final long INT_FAST32_MAX = 9223372036854775807L;
+
+  /**
+   * {@snippet lang = c : * #define INT_FAST32_MAX 9223372036854775807
+   * }
+   */
+  public static long INT_FAST32_MAX() {
+    return INT_FAST32_MAX;
+  }
+
+  private static final long INT_FAST64_MAX = 9223372036854775807L;
+
+  /**
+   * {@snippet lang = c : * #define INT_FAST64_MAX 9223372036854775807
+   * }
+   */
+  public static long INT_FAST64_MAX() {
+    return INT_FAST64_MAX;
+  }
+
+  private static final int UINT_FAST8_MAX = (int) 255L;
+
+  /**
+   * {@snippet lang = c : * #define UINT_FAST8_MAX 255
+   * }
+   */
+  public static int UINT_FAST8_MAX() {
+    return UINT_FAST8_MAX;
+  }
+
+  private static final long UINT_FAST16_MAX = -1L;
+
+  /**
+   * {@snippet lang = c : * #define UINT_FAST16_MAX -1
+   * }
+   */
+  public static long UINT_FAST16_MAX() {
+    return UINT_FAST16_MAX;
+  }
+
+  private static final long UINT_FAST32_MAX = -1L;
+
+  /**
+   * {@snippet lang = c : * #define UINT_FAST32_MAX -1
+   * }
+   */
+  public static long UINT_FAST32_MAX() {
+    return UINT_FAST32_MAX;
+  }
+
+  private static final long UINT_FAST64_MAX = -1L;
+
+  /**
+   * {@snippet lang = c : * #define UINT_FAST64_MAX -1
+   * }
+   */
+  public static long UINT_FAST64_MAX() {
+    return UINT_FAST64_MAX;
+  }
+
+  private static final long INTPTR_MIN = -9223372036854775808L;
+
+  /**
+   * {@snippet lang = c : * #define INTPTR_MIN -9223372036854775808
+   * }
+   */
+  public static long INTPTR_MIN() {
+    return INTPTR_MIN;
+  }
+
+  private static final long INTPTR_MAX = 9223372036854775807L;
+
+  /**
+   * {@snippet lang = c : * #define INTPTR_MAX 9223372036854775807
+   * }
+   */
+  public static long INTPTR_MAX() {
+    return INTPTR_MAX;
+  }
+
+  private static final long UINTPTR_MAX = -1L;
+
+  /**
+   * {@snippet lang = c : * #define UINTPTR_MAX -1
+   * }
+   */
+  public static long UINTPTR_MAX() {
+    return UINTPTR_MAX;
+  }
+
+  private static final long INTMAX_MIN = -9223372036854775808L;
+
+  /**
+   * {@snippet lang = c : * #define INTMAX_MIN -9223372036854775808
+   * }
+   */
+  public static long INTMAX_MIN() {
+    return INTMAX_MIN;
+  }
+
+  private static final long INTMAX_MAX = 9223372036854775807L;
+
+  /**
+   * {@snippet lang = c : * #define INTMAX_MAX 9223372036854775807
+   * }
+   */
+  public static long INTMAX_MAX() {
+    return INTMAX_MAX;
+  }
+
+  private static final long UINTMAX_MAX = -1L;
+
+  /**
+   * {@snippet lang = c : * #define UINTMAX_MAX -1
+   * }
+   */
+  public static long UINTMAX_MAX() {
+    return UINTMAX_MAX;
+  }
+
+  private static final long PTRDIFF_MIN = -9223372036854775808L;
+
+  /**
+   * {@snippet lang = c : * #define PTRDIFF_MIN -9223372036854775808
+   * }
+   */
+  public static long PTRDIFF_MIN() {
+    return PTRDIFF_MIN;
+  }
+
+  private static final long PTRDIFF_MAX = 9223372036854775807L;
+
+  /**
+   * {@snippet lang = c : * #define PTRDIFF_MAX 9223372036854775807
+   * }
+   */
+  public static long PTRDIFF_MAX() {
+    return PTRDIFF_MAX;
+  }
+
+  private static final int SIG_ATOMIC_MIN = (int) -2147483648L;
+
+  /**
+   * {@snippet lang = c : * #define SIG_ATOMIC_MIN -2147483648
+   * }
+   */
+  public static int SIG_ATOMIC_MIN() {
+    return SIG_ATOMIC_MIN;
+  }
+
+  private static final int SIG_ATOMIC_MAX = (int) 2147483647L;
+
+  /**
+   * {@snippet lang = c : * #define SIG_ATOMIC_MAX 2147483647
+   * }
+   */
+  public static int SIG_ATOMIC_MAX() {
+    return SIG_ATOMIC_MAX;
+  }
+
+  private static final long SIZE_MAX = -1L;
+
+  /**
+   * {@snippet lang = c : * #define SIZE_MAX -1
+   * }
+   */
+  public static long SIZE_MAX() {
+    return SIZE_MAX;
+  }
+
+  private static final int WCHAR_MIN = (int) -2147483648L;
+
+  /**
+   * {@snippet lang = c : * #define WCHAR_MIN -2147483648
+   * }
+   */
+  public static int WCHAR_MIN() {
+    return WCHAR_MIN;
+  }
+
+  private static final int WCHAR_MAX = (int) 2147483647L;
+
+  /**
+   * {@snippet lang = c : * #define WCHAR_MAX 2147483647
+   * }
+   */
+  public static int WCHAR_MAX() {
+    return WCHAR_MAX;
+  }
+
+  private static final int WINT_MIN = (int) 0L;
+
+  /**
+   * {@snippet lang = c : * #define WINT_MIN 0
+   * }
+   */
+  public static int WINT_MIN() {
+    return WINT_MIN;
+  }
+
+  private static final int WINT_MAX = (int) 4294967295L;
+
+  /**
+   * {@snippet lang = c : * #define WINT_MAX 4294967295
+   * }
+   */
+  public static int WINT_MAX() {
+    return WINT_MAX;
+  }
+
+  private static final MemorySegment NULL = MemorySegment.ofAddress(0L);
+
+  /**
+   * {@snippet lang = c : * #define NULL (void*) 0
+   * }
+   */
+  public static MemorySegment NULL() {
+    return NULL;
+  }
+
+  private static final long DLPACK_FLAG_BITMASK_READ_ONLY = 1L;
+
+  /**
+   * {@snippet lang = c : * #define DLPACK_FLAG_BITMASK_READ_ONLY 1
+   * }
+   */
+  public static long DLPACK_FLAG_BITMASK_READ_ONLY() {
+    return DLPACK_FLAG_BITMASK_READ_ONLY;
+  }
+
+  private static final long DLPACK_FLAG_BITMASK_IS_COPIED = 2L;
+
+  /**
+   * {@snippet lang = c : * #define DLPACK_FLAG_BITMASK_IS_COPIED 2
+   * }
+   */
+  public static long DLPACK_FLAG_BITMASK_IS_COPIED() {
+    return DLPACK_FLAG_BITMASK_IS_COPIED;
+  }
+}
diff --git a/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/MaxAlignT.java b/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/MaxAlignT.java
new file mode 100644
index 000000000..1216df6f7
--- /dev/null
+++ b/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/MaxAlignT.java
@@ -0,0 +1,142 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.nvidia.cuvs.panama;
+
+import static java.lang.foreign.MemoryLayout.PathElement.groupElement;
+
+import java.lang.foreign.Arena;
+import java.lang.foreign.GroupLayout;
+import java.lang.foreign.MemoryLayout;
+import java.lang.foreign.MemorySegment;
+import java.lang.foreign.SegmentAllocator;
+import java.lang.foreign.ValueLayout.OfLong;
+import java.util.function.Consumer;
+
+/**
+ * {@snippet lang=c :
+ * struct {
+ *     long long __clang_max_align_nonce1;
+ *     long double __clang_max_align_nonce2;
+ * }
+ * }
+ */
+public class MaxAlignT {
+
+    MaxAlignT() {
+        // Should not be called directly
+    }
+
+    private static final GroupLayout $LAYOUT = MemoryLayout.structLayout(
+        DlpackH.C_LONG_LONG.withName("__clang_max_align_nonce1"),
+        MemoryLayout.paddingLayout(24)
+    ).withName("$anon$19:9");
+
+    /**
+     * The layout of this struct
+     */
+    public static final GroupLayout layout() {
+        return $LAYOUT;
+    }
+
+    private static final OfLong __clang_max_align_nonce1$LAYOUT = (OfLong)$LAYOUT.select(groupElement("__clang_max_align_nonce1"));
+
+    /**
+     * Layout for field:
+     * {@snippet lang=c :
+     * long long __clang_max_align_nonce1
+     * }
+     */
+    public static final OfLong __clang_max_align_nonce1$layout() {
+        return __clang_max_align_nonce1$LAYOUT;
+    }
+
+    private static final long __clang_max_align_nonce1$OFFSET = 0;
+
+    /**
+     * Offset for field:
+     * {@snippet lang=c :
+     * long long __clang_max_align_nonce1
+     * }
+     */
+    public static final long __clang_max_align_nonce1$offset() {
+        return __clang_max_align_nonce1$OFFSET;
+    }
+
+    /**
+     * Getter for field:
+     * {@snippet lang=c :
+     * long long __clang_max_align_nonce1
+     * }
+     */
+    public static long __clang_max_align_nonce1(MemorySegment struct) {
+        return struct.get(__clang_max_align_nonce1$LAYOUT, __clang_max_align_nonce1$OFFSET);
+    }
+
+    /**
+     * Setter for field:
+     * {@snippet lang=c :
+     * long long __clang_max_align_nonce1
+     * }
+     */
+    public static void __clang_max_align_nonce1(MemorySegment struct, long fieldValue) {
+        struct.set(__clang_max_align_nonce1$LAYOUT, __clang_max_align_nonce1$OFFSET, fieldValue);
+    }
+
+    /**
+     * Obtains a slice of {@code arrayParam} which selects the array element at {@code index}.
+     * The returned segment has address {@code arrayParam.address() + index * layout().byteSize()}
+     */
+    public static MemorySegment asSlice(MemorySegment array, long index) {
+        return array.asSlice(layout().byteSize() * index);
+    }
+
+    /**
+     * The size (in bytes) of this struct
+     */
+    public static long sizeof() { return layout().byteSize(); }
+
+    /**
+     * Allocate a segment of size {@code layout().byteSize()} using {@code allocator}
+     */
+    public static MemorySegment allocate(SegmentAllocator allocator) {
+        return allocator.allocate(layout());
+    }
+
+    /**
+     * Allocate an array of size {@code elementCount} using {@code allocator}.
+     * The returned segment has size {@code elementCount * layout().byteSize()}.
+     */
+    public static MemorySegment allocateArray(long elementCount, SegmentAllocator allocator) {
+        return allocator.allocate(MemoryLayout.sequenceLayout(elementCount, layout()));
+    }
+
+    /**
+     * Reinterprets {@code addr} using target {@code arena} and {@code cleanupAction} (if any).
+     * The returned segment has size {@code layout().byteSize()}
+     */
+    public static MemorySegment reinterpret(MemorySegment addr, Arena arena, Consumer<MemorySegment> cleanup) {
+        return reinterpret(addr, 1, arena, cleanup);
+    }
+
+    /**
+     * Reinterprets {@code addr} using target {@code arena} and {@code cleanupAction} (if any).
+     * The returned segment has size {@code elementCount * layout().byteSize()}
+     */
+    public static MemorySegment reinterpret(MemorySegment addr, long elementCount, Arena arena, Consumer<MemorySegment> cleanup) {
+        return addr.reinterpret(layout().byteSize() * elementCount, arena, cleanup);
+    }
+}
diff --git a/java/cuvs-java/src/main/java/module-info.java b/java/cuvs-java/src/main/java/module-info.java
new file mode 100644
index 000000000..468252f22
--- /dev/null
+++ b/java/cuvs-java/src/main/java/module-info.java
@@ -0,0 +1,19 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+module com.nvidia.cuvs {
+  exports com.nvidia.cuvs;
+}
diff --git a/java/cuvs-java/src/test/java/com/nvidia/cuvs/BruteForceAndSearchTest.java b/java/cuvs-java/src/test/java/com/nvidia/cuvs/BruteForceAndSearchTest.java
new file mode 100644
index 000000000..91e6825bc
--- /dev/null
+++ b/java/cuvs-java/src/test/java/com/nvidia/cuvs/BruteForceAndSearchTest.java
@@ -0,0 +1,129 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.nvidia.cuvs;
+
+import static org.junit.Assert.assertEquals;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.InputStream;
+import java.lang.invoke.MethodHandles;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+import java.util.UUID;
+
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.nvidia.cuvs.common.SearchResults;
+
+public class BruteForceAndSearchTest {
+
+  private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+
+  /**
+   * A basic test that checks the whole flow - from indexing to search.
+   *
+   * @throws Throwable
+   */
+  @Test
+  public void testIndexingAndSearchingFlow() throws Throwable {
+
+    // Sample data and query
+    float[][] dataset = {
+        { 0.74021935f, 0.9209938f },
+        { 0.03902049f, 0.9689629f },
+        { 0.92514056f, 0.4463501f },
+        { 0.6673192f, 0.10993068f }
+      };
+    List<Integer> map = List.of(0, 1, 2, 3);
+    float[][] queries = {
+        { 0.48216683f, 0.0428398f },
+        { 0.5084142f, 0.6545497f },
+        { 0.51260436f, 0.2643005f },
+        { 0.05198065f, 0.5789965f }
+      };
+
+    // Expected search results
+    List<Map<Integer, Float>> expectedResults = Arrays.asList(
+        Map.of(3, 0.038782537f, 2, 0.35904616f, 0, 0.83774555f),
+        Map.of(0, 0.12472606f, 2, 0.21700788f, 1, 0.3191862f),
+        Map.of(3, 0.047766685f, 2, 0.20332813f, 0, 0.48305476f),
+        Map.of(1, 0.15224183f, 0, 0.5906347f, 3, 0.5986643f)
+      );
+
+    for (int j = 0; j < 10; j++) {
+
+      try (CuVSResources resources = new CuVSResources()) {
+
+        // Create a query object with the query vectors
+        BruteForceQuery cuvsQuery = new BruteForceQuery.Builder()
+            .withTopK(3)
+            .withQueryVectors(queries)
+            .withMapping(map)
+            .build();
+
+        // Set index parameters
+        BruteForceIndexParams indexParams = new BruteForceIndexParams.Builder()
+            .withNumWriterThreads(32)
+            .build();
+
+        // Create the index with the dataset
+        BruteForceIndex index = new BruteForceIndex.Builder(resources)
+            .withDataset(dataset)
+            .withIndexParams(indexParams)
+            .build();
+
+        // Saving the index on to the disk.
+        String indexFileName = UUID.randomUUID().toString() + ".bf";
+        index.serialize(new FileOutputStream(indexFileName));
+
+        // Loading a BRUTEFORCE index from disk.
+        File indexFile = new File(indexFileName);
+        InputStream inputStream = new FileInputStream(indexFile);
+        BruteForceIndex loadedIndex = new BruteForceIndex.Builder(resources)
+            .from(inputStream)
+            .build();
+
+        // Perform the search
+        SearchResults resultsFromLoadedIndex = loadedIndex.search(cuvsQuery);
+
+        // Check results
+        log.info(resultsFromLoadedIndex.getResults().toString());
+        assertEquals(expectedResults, resultsFromLoadedIndex.getResults());
+
+        // Perform the search
+        SearchResults results = index.search(cuvsQuery);
+
+        // Check results
+        log.info(results.getResults().toString());
+        assertEquals(expectedResults, results.getResults());
+
+        // Cleanup
+        index.destroyIndex();
+        loadedIndex.destroyIndex();
+
+        if (indexFile.exists()) {
+          indexFile.delete();
+        }
+      }
+    }
+  }
+}
diff --git a/java/cuvs-java/src/test/java/com/nvidia/cuvs/BruteForceRandomizedTest.java b/java/cuvs-java/src/test/java/com/nvidia/cuvs/BruteForceRandomizedTest.java
new file mode 100644
index 000000000..64d1eb86e
--- /dev/null
+++ b/java/cuvs-java/src/test/java/com/nvidia/cuvs/BruteForceRandomizedTest.java
@@ -0,0 +1,99 @@
+package com.nvidia.cuvs;
+
+import java.lang.invoke.MethodHandles;
+import java.util.List;
+
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.carrotsearch.randomizedtesting.RandomizedRunner;
+
+@RunWith(RandomizedRunner.class)
+public class BruteForceRandomizedTest extends CuVSTestCase {
+
+  private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+
+  @Before
+  public void setup() {
+    initializeRandom();
+    log.info("Random context initialized for test.");
+  }
+
+  @Test
+  public void testResultsTopKWithRandomValues() throws Throwable {
+    for (int i = 0; i < 10; i++) {
+      tmpResultsTopKWithRandomValues();
+    }
+  }
+
+  private void tmpResultsTopKWithRandomValues() throws Throwable {
+    int DATASET_SIZE_LIMIT = 10_000;
+    int DIMENSIONS_LIMIT = 2048;
+    int NUM_QUERIES_LIMIT = 10;
+    int TOP_K_LIMIT = 64; // nocommit This fails beyond 64
+
+    int datasetSize = random.nextInt(DATASET_SIZE_LIMIT) + 1;
+    int dimensions = random.nextInt(DIMENSIONS_LIMIT) + 1;
+    int numQueries = random.nextInt(NUM_QUERIES_LIMIT) + 1;
+    int topK = Math.min(random.nextInt(TOP_K_LIMIT) + 1, datasetSize);
+
+    if (datasetSize < topK)
+      datasetSize = topK;
+
+    // Generate a random dataset
+    float[][] dataset = generateData(random, datasetSize, dimensions);
+
+    // Generate random query vectors
+    float[][] queries = generateData(random, numQueries, dimensions);
+
+    log.info("Dataset size: {}x{}", datasetSize, dimensions);
+    log.info("Query size: {}x{}", numQueries, dimensions);
+    log.info("TopK: {}", topK);
+
+    // Debugging: Log dataset and queries
+    if (log.isDebugEnabled()) {
+      log.debug("Dataset:");
+      for (float[] row : dataset) {
+        log.debug(java.util.Arrays.toString(row));
+      }
+      log.debug("Queries:");
+      for (float[] query : queries) {
+        log.debug(java.util.Arrays.toString(query));
+      }
+    }
+    // Sanity checks
+    assert dataset.length > 0 : "Dataset is empty.";
+    assert queries.length > 0 : "Queries are empty.";
+    assert dimensions > 0 : "Invalid dimensions.";
+    assert topK > 0 && topK <= datasetSize : "Invalid topK value.";
+
+    // Generate expected results using brute force
+    List<List<Integer>> expected = generateExpectedResults(topK, dataset, queries, log);
+
+    // Create CuVS index and query
+    try (CuVSResources resources = new CuVSResources()) {
+
+      BruteForceQuery query = new BruteForceQuery.Builder()
+          .withTopK(topK)
+          .withQueryVectors(queries)
+          .build();
+
+      BruteForceIndexParams indexParams = new BruteForceIndexParams.Builder()
+          .withNumWriterThreads(32)
+          .build();
+
+      BruteForceIndex index = new BruteForceIndex.Builder(resources)
+          .withDataset(dataset)
+          .withIndexParams(indexParams)
+          .build();
+
+      log.info("Index built successfully. Executing search...");
+      BruteForceSearchResults results = index.search(query);
+
+      compareResults(results, expected, topK, datasetSize, numQueries);
+    }
+  }
+}
diff --git a/java/cuvs-java/src/test/java/com/nvidia/cuvs/CagraBuildAndSearchTest.java b/java/cuvs-java/src/test/java/com/nvidia/cuvs/CagraBuildAndSearchTest.java
new file mode 100644
index 000000000..934e60b1c
--- /dev/null
+++ b/java/cuvs-java/src/test/java/com/nvidia/cuvs/CagraBuildAndSearchTest.java
@@ -0,0 +1,137 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.nvidia.cuvs;
+
+import static org.junit.Assert.assertEquals;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.InputStream;
+import java.lang.invoke.MethodHandles;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+import java.util.UUID;
+
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.nvidia.cuvs.CagraIndexParams.CagraGraphBuildAlgo;
+import com.nvidia.cuvs.CagraIndexParams.CuvsDistanceType;
+import com.nvidia.cuvs.common.SearchResults;
+
+public class CagraBuildAndSearchTest {
+
+  private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+
+  /**
+   * A basic test that checks the whole flow - from indexing to search.
+   *
+   * @throws Throwable
+   */
+  @Test
+  public void testIndexingAndSearchingFlow() throws Throwable {
+
+    // Sample data and query
+    float[][] dataset = {
+        { 0.74021935f, 0.9209938f },
+        { 0.03902049f, 0.9689629f },
+        { 0.92514056f, 0.4463501f },
+        { 0.6673192f, 0.10993068f }
+      };
+    List<Integer> map = List.of(0, 1, 2, 3);
+    float[][] queries = {
+        { 0.48216683f, 0.0428398f },
+        { 0.5084142f, 0.6545497f },
+        { 0.51260436f, 0.2643005f },
+        { 0.05198065f, 0.5789965f }
+      };
+
+    // Expected search results
+    List<Map<Integer, Float>> expectedResults = Arrays.asList(
+        Map.of(3, 0.038782578f, 2, 0.3590463f, 0, 0.83774555f),
+        Map.of(0, 0.12472608f, 2, 0.21700792f, 1, 0.31918612f),
+        Map.of(3, 0.047766715f, 2, 0.20332818f, 0, 0.48305473f),
+        Map.of(1, 0.15224178f, 0, 0.59063464f, 3, 0.5986642f));
+
+    for (int j = 0; j < 10; j++) {
+
+      try (CuVSResources resources = new CuVSResources()) {
+
+        // Configure index parameters
+        CagraIndexParams indexParams = new CagraIndexParams.Builder(resources)
+            .withCagraGraphBuildAlgo(CagraGraphBuildAlgo.NN_DESCENT)
+            .withGraphDegree(1)
+            .withIntermediateGraphDegree(2)
+            .withNumWriterThreads(32)
+            .withMetric(CuvsDistanceType.L2Expanded)
+            .build();
+
+        // Create the index with the dataset
+        CagraIndex index = new CagraIndex.Builder(resources)
+            .withDataset(dataset)
+            .withIndexParams(indexParams)
+            .build();
+
+        // Saving the index on to the disk.
+        String indexFileName = UUID.randomUUID().toString() + ".cag";
+        index.serialize(new FileOutputStream(indexFileName));
+
+        // Loading a CAGRA index from disk.
+        File indexFile = new File(indexFileName);
+        InputStream inputStream = new FileInputStream(indexFile);
+        CagraIndex loadedIndex = new CagraIndex.Builder(resources)
+            .from(inputStream)
+            .build();
+
+        // Configure search parameters
+        CagraSearchParams searchParams = new CagraSearchParams.Builder(resources)
+            .build();
+
+        // Create a query object with the query vectors
+        CagraQuery cuvsQuery = new CagraQuery.Builder()
+            .withTopK(3)
+            .withSearchParams(searchParams)
+            .withQueryVectors(queries)
+            .withMapping(map)
+            .build();
+
+        // Perform the search
+        SearchResults results = index.search(cuvsQuery);
+
+        // Check results
+        log.info(results.getResults().toString());
+        assertEquals(expectedResults, results.getResults());
+
+        // Search from deserialized index
+        results = loadedIndex.search(cuvsQuery);
+
+        // Check results
+        log.info(results.getResults().toString());
+        assertEquals(expectedResults, results.getResults());
+
+        // Cleanup
+        if (indexFile.exists()) {
+          indexFile.delete();
+        }
+        index.destroyIndex();
+      }
+    }
+  }
+}
diff --git a/java/cuvs-java/src/test/java/com/nvidia/cuvs/CagraRandomizedTest.java b/java/cuvs-java/src/test/java/com/nvidia/cuvs/CagraRandomizedTest.java
new file mode 100644
index 000000000..fc7570133
--- /dev/null
+++ b/java/cuvs-java/src/test/java/com/nvidia/cuvs/CagraRandomizedTest.java
@@ -0,0 +1,101 @@
+package com.nvidia.cuvs;
+
+import java.lang.invoke.MethodHandles;
+import java.util.List;
+
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.carrotsearch.randomizedtesting.RandomizedRunner;
+import com.nvidia.cuvs.CagraIndexParams.CagraGraphBuildAlgo;
+
+@RunWith(RandomizedRunner.class)
+public class CagraRandomizedTest extends CuVSTestCase {
+
+  private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+
+  @Before
+  public void setup() {
+    initializeRandom();
+    log.info("Random context initialized for test.");
+  }
+
+  @Test
+  public void testResultsTopKWithRandomValues() throws Throwable {
+    for (int i = 0; i < 10; i++) {
+      tmpResultsTopKWithRandomValues();
+    }
+  }
+
+  private void tmpResultsTopKWithRandomValues() throws Throwable {
+    int DATASET_SIZE_LIMIT = 10_000;
+    int DIMENSIONS_LIMIT = 2048;
+    int NUM_QUERIES_LIMIT = 10;
+    int TOP_K_LIMIT = 64; // nocommit This fails beyond 64
+
+    int datasetSize = random.nextInt(DATASET_SIZE_LIMIT) + 1;
+    int dimensions = random.nextInt(DIMENSIONS_LIMIT) + 1;
+    int numQueries = random.nextInt(NUM_QUERIES_LIMIT) + 1;
+    int topK = Math.min(random.nextInt(TOP_K_LIMIT) + 1, datasetSize);
+
+    if (datasetSize < topK)
+      datasetSize = topK;
+
+    // Generate a random dataset
+    float[][] dataset = generateData(random, datasetSize, dimensions);
+
+    // Generate random query vectors
+    float[][] queries = generateData(random, numQueries, dimensions);
+
+    log.info("Dataset size: {}x{}", datasetSize, dimensions);
+    log.info("Query size: {}x{}", numQueries, dimensions);
+    log.info("TopK: {}", topK);
+
+    // Debugging: Log dataset and queries
+    if (log.isDebugEnabled()) {
+      log.debug("Dataset:");
+      for (float[] row : dataset) {
+        log.debug(java.util.Arrays.toString(row));
+      }
+      log.debug("Queries:");
+      for (float[] query : queries) {
+        log.debug(java.util.Arrays.toString(query));
+      }
+    }
+    // Sanity checks
+    assert dataset.length > 0 : "Dataset is empty.";
+    assert queries.length > 0 : "Queries are empty.";
+    assert dimensions > 0 : "Invalid dimensions.";
+    assert topK > 0 && topK <= datasetSize : "Invalid topK value.";
+
+    // Generate expected results using brute force
+    List<List<Integer>> expected = generateExpectedResults(topK, dataset, queries, log);
+
+    // Create CuVS index and query
+    try (CuVSResources resources = new CuVSResources()) {
+      CagraIndexParams indexParams = new CagraIndexParams.Builder(resources)
+          .withCagraGraphBuildAlgo(CagraGraphBuildAlgo.NN_DESCENT)
+          .build();
+      CagraIndex index = new CagraIndex.Builder(resources)
+          .withDataset(dataset)
+          .withIndexParams(indexParams)
+          .build();
+      log.info("Index built successfully.");
+
+      // Execute search and retrieve results
+      CagraQuery query = new CagraQuery.Builder()
+          .withQueryVectors(queries)
+          .withTopK(topK)
+          .withSearchParams(new CagraSearchParams.Builder(resources)
+              .build())
+          .build();
+      log.info("Query built successfully. Executing search...");
+      CagraSearchResults results = index.search(query);
+
+      compareResults(results, expected, topK, datasetSize, numQueries);
+    }
+  }
+}
diff --git a/java/cuvs-java/src/test/java/com/nvidia/cuvs/CuVSTestCase.java b/java/cuvs-java/src/test/java/com/nvidia/cuvs/CuVSTestCase.java
new file mode 100644
index 000000000..bd50dc759
--- /dev/null
+++ b/java/cuvs-java/src/test/java/com/nvidia/cuvs/CuVSTestCase.java
@@ -0,0 +1,88 @@
+package com.nvidia.cuvs;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.lang.invoke.MethodHandles;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
+import java.util.TreeMap;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.carrotsearch.randomizedtesting.RandomizedContext;
+import com.nvidia.cuvs.common.SearchResults;
+
+public abstract class CuVSTestCase {
+  protected Random random;
+  private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+
+  protected void initializeRandom() {
+    random = RandomizedContext.current().getRandom();
+    log.info("Test seed: " + RandomizedContext.current().getRunnerSeedAsString());
+  }
+
+  protected float[][] generateData(Random random, int rows, int cols) {
+    float[][] data = new float[rows][cols];
+    for (int i = 0; i < rows; i++) {
+      for (int j = 0; j < cols; j++) {
+        data[i][j] = random.nextFloat() * 100;
+      }
+    }
+    return data;
+  }
+
+  protected List<List<Integer>> generateExpectedResults(int topK, float[][] dataset, float[][] queries, Logger log) {
+    List<List<Integer>> neighborsResult = new ArrayList<>();
+    int dimensions = dataset[0].length;
+
+    for (float[] query : queries) {
+      Map<Integer, Double> distances = new TreeMap<>();
+      for (int j = 0; j < dataset.length; j++) {
+        double distance = 0;
+        for (int k = 0; k < dimensions; k++) {
+          distance += (query[k] - dataset[j][k]) * (query[k] - dataset[j][k]);
+        }
+        distances.put(j, Math.sqrt(distance));
+      }
+
+      // Sort by distance and select the topK nearest neighbors
+      List<Integer> neighbors = distances.entrySet().stream().sorted(Map.Entry.comparingByValue())
+          .map(Map.Entry::getKey).toList();
+      neighborsResult.add(neighbors.subList(0, Math.min(topK * 2, dataset.length)));
+    }
+
+    log.info("Expected results generated successfully.");
+    return neighborsResult;
+  }
+
+  protected void compareResults(SearchResults results, List<List<Integer>> expected, int topK, int datasetSize,
+      int numQueries) {
+
+    for (int i = 0; i < numQueries; i++) {
+      log.info("Results returned for query " + i + ": " + results.getResults().get(i).keySet());
+      log.info("Expected results for query " + i + ": " + expected.get(i).subList(0, Math.min(topK, datasetSize)));
+    }
+
+    // actual vs. expected results
+    for (int i = 0; i < results.getResults().size(); i++) {
+      Map<Integer, Float> result = results.getResults().get(i);
+      assertEquals("TopK mismatch for query.", Math.min(topK, datasetSize), result.size());
+
+      // Sort result by values (distances) and extract keys
+      List<Integer> sortedResultKeys = result.entrySet().stream().sorted(Map.Entry.comparingByValue())
+          .map(Map.Entry::getKey) // Extract sorted keys
+          .toList();
+
+      // just make sure that the first 5 results are in the expected list (which
+      // comprises of 2*topK results)
+      for (int j = 0; j < Math.min(5, sortedResultKeys.size()); j++) {
+        assertTrue("Not found in expected list: " + sortedResultKeys.get(j),
+            expected.get(i).contains(sortedResultKeys.get(j)));
+      }
+    }
+  }
+}
diff --git a/java/cuvs-java/src/test/java/com/nvidia/cuvs/HnswBuildAndSearchTest.java b/java/cuvs-java/src/test/java/com/nvidia/cuvs/HnswBuildAndSearchTest.java
new file mode 100644
index 000000000..712e7edf9
--- /dev/null
+++ b/java/cuvs-java/src/test/java/com/nvidia/cuvs/HnswBuildAndSearchTest.java
@@ -0,0 +1,132 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.nvidia.cuvs;
+
+import static org.junit.Assert.assertEquals;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.InputStream;
+import java.lang.invoke.MethodHandles;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+import java.util.UUID;
+
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.nvidia.cuvs.CagraIndexParams.CagraGraphBuildAlgo;
+import com.nvidia.cuvs.CagraIndexParams.CuvsDistanceType;
+
+public class HnswBuildAndSearchTest {
+
+  private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+
+  /**
+   * A basic test that checks the whole flow - from indexing to search.
+   *
+   * @throws Throwable
+   */
+  @Test
+  public void testIndexingAndSearchingFlow() throws Throwable {
+
+    // Sample data and query
+    float[][] dataset = {
+        { 0.74021935f, 0.9209938f },
+        { 0.03902049f, 0.9689629f },
+        { 0.92514056f, 0.4463501f },
+        { 0.6673192f, 0.10993068f }
+      };
+    List<Integer> map = List.of(0, 1, 2, 3);
+    float[][] queries = {
+        { 0.48216683f, 0.0428398f },
+        { 0.5084142f, 0.6545497f },
+        { 0.51260436f, 0.2643005f },
+        { 0.05198065f, 0.5789965f }
+      };
+
+    // Expected search results
+    List<Map<Integer, Float>> expectedResults = Arrays.asList(
+        Map.of(3, 0.038782578f, 2, 0.35904628f, 0, 0.8377455f),
+        Map.of(0, 0.12472608f, 2, 0.21700794f, 1, 0.31918612f),
+        Map.of(3, 0.047766715f, 2, 0.20332818f, 0, 0.48305473f),
+        Map.of(1, 0.15224178f, 0, 0.59063464f, 3, 0.59866416f)
+      );
+
+    for (int j = 0; j < 10; j++) {
+
+      try (CuVSResources resources = new CuVSResources()) {
+
+        // Configure index parameters
+        CagraIndexParams indexParams = new CagraIndexParams.Builder(resources)
+            .withCagraGraphBuildAlgo(CagraGraphBuildAlgo.IVF_PQ)
+            .withGraphDegree(64)
+            .withIntermediateGraphDegree(128)
+            .withNumWriterThreads(32)
+            .withMetric(CuvsDistanceType.L2Expanded)
+            .build();
+
+        // Create the index with the dataset
+        CagraIndex index = new CagraIndex.Builder(resources)
+            .withDataset(dataset)
+            .withIndexParams(indexParams)
+            .build();
+
+        // Saving the HNSW index on to the disk.
+        String hnswIndexFileName = UUID.randomUUID().toString() + ".hnsw";
+        index.serializeToHNSW(new FileOutputStream(hnswIndexFileName));
+
+        HnswIndexParams hnswIndexParams = new HnswIndexParams.Builder(resources)
+            .withVectorDimension(2)
+            .build();
+        InputStream inputStreamHNSW = new FileInputStream(hnswIndexFileName);
+        File hnswIndexFile = new File(hnswIndexFileName);
+
+        HnswIndex hnswIndex = new HnswIndex.Builder(resources)
+            .from(inputStreamHNSW)
+            .withIndexParams(hnswIndexParams)
+            .build();
+
+        HnswSearchParams hnswSearchParams = new HnswSearchParams.Builder(resources)
+            .build();
+
+        HnswQuery hnswQuery = new HnswQuery.Builder()
+            .withMapping(map)
+            .withQueryVectors(queries)
+            .withSearchParams(hnswSearchParams)
+            .withTopK(3)
+            .build();
+
+        HnswSearchResults results = hnswIndex.search(hnswQuery);
+
+        // Check results
+        log.info(results.getResults().toString());
+        assertEquals(expectedResults, results.getResults());
+
+        // Cleanup
+        if (hnswIndexFile.exists()) {
+          hnswIndexFile.delete();
+        }
+        index.destroyIndex();
+        hnswIndex.destroyIndex();
+      }
+    }
+  }
+}
diff --git a/java/cuvs-java/src/test/java/com/nvidia/cuvs/HnswRandomizedTest.java b/java/cuvs-java/src/test/java/com/nvidia/cuvs/HnswRandomizedTest.java
new file mode 100644
index 000000000..c292309f7
--- /dev/null
+++ b/java/cuvs-java/src/test/java/com/nvidia/cuvs/HnswRandomizedTest.java
@@ -0,0 +1,136 @@
+package com.nvidia.cuvs;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.InputStream;
+import java.lang.invoke.MethodHandles;
+import java.util.List;
+import java.util.UUID;
+
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.carrotsearch.randomizedtesting.RandomizedRunner;
+import com.nvidia.cuvs.CagraIndexParams.CagraGraphBuildAlgo;
+import com.nvidia.cuvs.CagraIndexParams.CuvsDistanceType;
+
+@RunWith(RandomizedRunner.class)
+public class HnswRandomizedTest extends CuVSTestCase {
+
+  private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+
+  @Before
+  public void setup() {
+    initializeRandom();
+    log.info("Random context initialized for test.");
+  }
+
+  @Test
+  public void testResultsTopKWithRandomValues() throws Throwable {
+    for (int i = 0; i < 10; i++) {
+      tmpResultsTopKWithRandomValues();
+    }
+  }
+
+  private void tmpResultsTopKWithRandomValues() throws Throwable {
+    int DATASET_SIZE_LIMIT = 10_000;
+    int DIMENSIONS_LIMIT = 2048;
+    int NUM_QUERIES_LIMIT = 10;
+    int TOP_K_LIMIT = 64; // nocommit This fails beyond 64
+
+    int datasetSize = random.nextInt(DATASET_SIZE_LIMIT) + 1;
+    int dimensions = random.nextInt(DIMENSIONS_LIMIT) + 1;
+    int numQueries = random.nextInt(NUM_QUERIES_LIMIT) + 1;
+    int topK = Math.min(random.nextInt(TOP_K_LIMIT) + 1, datasetSize);
+
+    if (datasetSize < topK)
+      datasetSize = topK;
+
+    // Generate a random dataset
+    float[][] dataset = generateData(random, datasetSize, dimensions);
+
+    // Generate random query vectors
+    float[][] queries = generateData(random, numQueries, dimensions);
+
+    log.info("Dataset size: {}x{}", datasetSize, dimensions);
+    log.info("Query size: {}x{}", numQueries, dimensions);
+    log.info("TopK: {}", topK);
+
+    // Debugging: Log dataset and queries
+    if (log.isDebugEnabled()) {
+      log.debug("Dataset:");
+      for (float[] row : dataset) {
+        log.debug(java.util.Arrays.toString(row));
+      }
+      log.debug("Queries:");
+      for (float[] query : queries) {
+        log.debug(java.util.Arrays.toString(query));
+      }
+    }
+    // Sanity checks
+    assert dataset.length > 0 : "Dataset is empty.";
+    assert queries.length > 0 : "Queries are empty.";
+    assert dimensions > 0 : "Invalid dimensions.";
+    assert topK > 0 && topK <= datasetSize : "Invalid topK value.";
+
+    // Generate expected results using brute force
+    List<List<Integer>> expected = generateExpectedResults(topK, dataset, queries, log);
+
+    // Create CuVS index and query
+    try (CuVSResources resources = new CuVSResources()) {
+
+      // Configure index parameters
+      CagraIndexParams indexParams = new CagraIndexParams.Builder(resources)
+          .withCagraGraphBuildAlgo(CagraGraphBuildAlgo.NN_DESCENT)
+          .withGraphDegree(64)
+          .withIntermediateGraphDegree(128)
+          .withNumWriterThreads(32)
+          .withMetric(CuvsDistanceType.L2Expanded)
+          .build();
+
+      // Create the index with the dataset
+      CagraIndex index = new CagraIndex.Builder(resources)
+          .withDataset(dataset)
+          .withIndexParams(indexParams)
+          .build();
+
+      // Saving the HNSW index on to the disk.
+      String hnswIndexFileName = UUID.randomUUID().toString() + ".hnsw";
+      index.serializeToHNSW(new FileOutputStream(hnswIndexFileName));
+
+      HnswIndexParams hnswIndexParams = new HnswIndexParams.Builder(resources)
+          .withVectorDimension(dimensions)
+          .build();
+      InputStream inputStreamHNSW = new FileInputStream(hnswIndexFileName);
+      File hnswIndexFile = new File(hnswIndexFileName);
+
+      HnswIndex hnswIndex = new HnswIndex.Builder(resources)
+          .from(inputStreamHNSW)
+          .withIndexParams(hnswIndexParams)
+          .build();
+
+      HnswSearchParams hnswSearchParams = new HnswSearchParams.Builder(resources)
+          .withNumThreads(32)
+          .build();
+
+      HnswQuery hnswQuery = new HnswQuery.Builder()
+          .withQueryVectors(queries)
+          .withSearchParams(hnswSearchParams)
+          .withTopK(topK)
+          .build();
+
+      log.info("Index built successfully. Executing search...");
+      HnswSearchResults results = hnswIndex.search(hnswQuery);
+
+      if (hnswIndexFile.exists()) {
+        hnswIndexFile.delete();
+      }
+
+      compareResults(results, expected, topK, datasetSize, numQueries);
+    }
+  }
+}
diff --git a/java/examples/.gitignore b/java/examples/.gitignore
new file mode 100644
index 000000000..b83d22266
--- /dev/null
+++ b/java/examples/.gitignore
@@ -0,0 +1 @@
+/target/
diff --git a/java/examples/README.md b/java/examples/README.md
new file mode 100644
index 000000000..d05d7b911
--- /dev/null
+++ b/java/examples/README.md
@@ -0,0 +1,8 @@
+Building and Running
+--------------------
+
+Make sure to have JDK 22 and Maven 3.9.6+.
+
+    mvn clean compile assembly:single
+
+    java --enable-native-access=ALL-UNNAMED -jar ./target/cagra-sample-1.0-SNAPSHOT-jar-with-dependencies.jar
diff --git a/java/examples/pom.xml b/java/examples/pom.xml
new file mode 100644
index 000000000..41ce5a9c4
--- /dev/null
+++ b/java/examples/pom.xml
@@ -0,0 +1,105 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+  <groupId>com.nvidia.cuvs.examples</groupId>
+  <artifactId>cuvs-java-examples</artifactId>
+  <!-- NOTE: The version automatically gets updated when ci/release/update-version.sh is invoked. -->
+  <!--CUVS_JAVA#VERSION_UPDATE_MARKER_START--><version>25.04.0</version><!--CUVS_JAVA#VERSION_UPDATE_MARKER_END-->
+  <name>cuvs-java-examples</name>
+
+  <properties>
+    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+    <maven.compiler.release>22</maven.compiler.release>
+  </properties>
+
+  <dependencies>
+    <dependency>
+      <groupId>com.nvidia.cuvs</groupId>
+      <artifactId>cuvs-java</artifactId>
+      <!-- NOTE: The version automatically gets updated when ci/release/update-version.sh is invoked. -->
+      <!--CUVS_JAVA#VERSION_UPDATE_MARKER_START--><version>25.04.0</version><!--CUVS_JAVA#VERSION_UPDATE_MARKER_END-->
+    </dependency>
+    <dependency>
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-api</artifactId>
+      <version>2.0.13</version>
+    </dependency>
+    <dependency>
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-simple</artifactId>
+      <version>2.0.13</version>
+      <scope>runtime</scope>
+   </dependency>
+  </dependencies>
+
+  <build>
+    <pluginManagement><!-- lock down plugins versions to avoid using Maven defaults (may be moved to parent pom) -->
+      <plugins>
+        <!-- clean lifecycle, see https://maven.apache.org/ref/current/maven-core/lifecycles.html#clean_Lifecycle -->
+        <plugin>
+          <artifactId>maven-clean-plugin</artifactId>
+          <version>3.4.0</version>
+        </plugin>
+        <!-- default lifecycle, jar packaging: see https://maven.apache.org/ref/current/maven-core/default-bindings.html#Plugin_bindings_for_jar_packaging -->
+        <plugin>
+          <artifactId>maven-resources-plugin</artifactId>
+          <version>3.3.1</version>
+        </plugin>
+        <plugin>
+          <artifactId>maven-compiler-plugin</artifactId>
+          <version>3.13.0</version>
+        </plugin>
+        <plugin>
+          <artifactId>maven-surefire-plugin</artifactId>
+          <version>3.3.0</version>
+        </plugin>
+        <plugin>
+          <artifactId>maven-jar-plugin</artifactId>
+          <version>3.4.2</version>
+        </plugin>
+        <plugin>
+          <artifactId>maven-install-plugin</artifactId>
+          <version>3.1.2</version>
+        </plugin>
+        <plugin>
+          <artifactId>maven-deploy-plugin</artifactId>
+          <version>3.1.2</version>
+        </plugin>
+        <!-- site lifecycle, see https://maven.apache.org/ref/current/maven-core/lifecycles.html#site_Lifecycle -->
+        <plugin>
+          <artifactId>maven-site-plugin</artifactId>
+          <version>3.12.1</version>
+        </plugin>
+        <plugin>
+          <artifactId>maven-project-info-reports-plugin</artifactId>
+          <version>3.6.1</version>
+        </plugin>
+        <plugin>
+            <artifactId>maven-assembly-plugin</artifactId>
+            <executions>
+               <execution>
+                  <id>make-jar-with-dependencies</id>
+                  <!-- this is used for inheritance merges -->
+                  <phase>package</phase>
+                  <!-- bind to the packaging phase -->
+                  <goals>
+                     <goal>single</goal>
+                  </goals>
+               </execution>
+            </executions>
+            <configuration>
+               <archive>
+                  <manifest>
+                     <mainClass>com.nvidia.cuvs.examples.CagraExample</mainClass>
+                  </manifest>
+               </archive>
+               <descriptorRefs>
+                  <descriptorRef>jar-with-dependencies</descriptorRef>
+               </descriptorRefs>
+            </configuration>
+        </plugin>
+      </plugins>
+    </pluginManagement>
+  </build>
+</project>
diff --git a/java/examples/src/main/java/com/nvidia/cuvs/examples/BruteForceExample.java b/java/examples/src/main/java/com/nvidia/cuvs/examples/BruteForceExample.java
new file mode 100644
index 000000000..5f72d92fc
--- /dev/null
+++ b/java/examples/src/main/java/com/nvidia/cuvs/examples/BruteForceExample.java
@@ -0,0 +1,90 @@
+package com.nvidia.cuvs.examples;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.InputStream;
+import java.lang.invoke.MethodHandles;
+import java.util.UUID;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.nvidia.cuvs.BruteForceIndex;
+import com.nvidia.cuvs.BruteForceIndexParams;
+import com.nvidia.cuvs.BruteForceQuery;
+import com.nvidia.cuvs.CuVSResources;
+import com.nvidia.cuvs.common.SearchResults;
+
+public class BruteForceExample {
+
+  private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+
+  public static void main(String[] args) throws Throwable {
+
+    // Sample data and query
+    float[][] dataset = {
+        { 0.74021935f, 0.9209938f },
+        { 0.03902049f, 0.9689629f },
+        { 0.92514056f, 0.4463501f },
+        { 0.6673192f, 0.10993068f }
+       };
+
+    float[][] queries = {
+        { 0.48216683f, 0.0428398f },
+        { 0.5084142f, 0.6545497f },
+        { 0.51260436f, 0.2643005f },
+        { 0.05198065f, 0.5789965f }
+       };
+
+    try (CuVSResources resources = new CuVSResources()) {
+
+      // Create a query object with the query vectors
+      BruteForceQuery cuvsQuery = new BruteForceQuery.Builder()
+          .withTopK(3)
+          .withQueryVectors(queries)
+          .build();
+
+      // Set index parameters
+      BruteForceIndexParams indexParams = new BruteForceIndexParams.Builder()
+          .build();
+
+      // Create the index with the dataset
+      BruteForceIndex index = new BruteForceIndex.Builder(resources)
+          .withDataset(dataset)
+          .withIndexParams(indexParams)
+          .build();
+
+      // Saving the index on to the disk.
+      String indexFileName = UUID.randomUUID().toString() + ".bf";
+      index.serialize(new FileOutputStream(indexFileName));
+
+      // Loading a BRUTEFORCE index from disk.
+      File indexFile = new File(indexFileName);
+      InputStream inputStream = new FileInputStream(indexFile);
+      BruteForceIndex loadedIndex = new BruteForceIndex.Builder(resources)
+          .from(inputStream)
+          .build();
+
+      // Perform the search
+      SearchResults resultsFromLoadedIndex = loadedIndex.search(cuvsQuery);
+
+      // Check results
+      log.info(resultsFromLoadedIndex.getResults().toString());
+
+      // Perform the search
+      SearchResults results = index.search(cuvsQuery);
+
+      // Check results
+      log.info(results.getResults().toString());
+
+      // Cleanup
+      index.destroyIndex();
+      loadedIndex.destroyIndex();
+
+      if (indexFile.exists()) {
+        indexFile.delete();
+      }
+    }
+  }
+}
diff --git a/java/examples/src/main/java/com/nvidia/cuvs/examples/CagraExample.java b/java/examples/src/main/java/com/nvidia/cuvs/examples/CagraExample.java
new file mode 100644
index 000000000..f561ce69d
--- /dev/null
+++ b/java/examples/src/main/java/com/nvidia/cuvs/examples/CagraExample.java
@@ -0,0 +1,100 @@
+package com.nvidia.cuvs.examples;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.InputStream;
+import java.lang.invoke.MethodHandles;
+import java.util.UUID;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.nvidia.cuvs.CagraIndex;
+import com.nvidia.cuvs.CagraIndexParams;
+import com.nvidia.cuvs.CagraIndexParams.CagraGraphBuildAlgo;
+import com.nvidia.cuvs.CagraIndexParams.CuvsDistanceType;
+import com.nvidia.cuvs.CagraQuery;
+import com.nvidia.cuvs.CagraSearchParams;
+import com.nvidia.cuvs.CuVSResources;
+import com.nvidia.cuvs.common.SearchResults;
+
+public class CagraExample {
+
+  private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+
+  public static void main(String[] args) throws Throwable {
+
+    // Sample data and query
+    float[][] dataset = {
+        { 0.74021935f, 0.9209938f },
+        { 0.03902049f, 0.9689629f },
+        { 0.92514056f, 0.4463501f },
+        { 0.6673192f, 0.10993068f }
+       };
+
+    float[][] queries = {
+        { 0.48216683f, 0.0428398f },
+        { 0.5084142f, 0.6545497f },
+        { 0.51260436f, 0.2643005f },
+        { 0.05198065f, 0.5789965f }
+       };
+
+    try (CuVSResources resources = new CuVSResources()) {
+
+      // Configure index parameters
+      CagraIndexParams indexParams = new CagraIndexParams.Builder(resources)
+          .withCagraGraphBuildAlgo(CagraGraphBuildAlgo.NN_DESCENT)
+          .withGraphDegree(1)
+          .withIntermediateGraphDegree(2)
+          .withMetric(CuvsDistanceType.L2Expanded)
+          .build();
+
+      // Create the index with the dataset
+      CagraIndex index = new CagraIndex.Builder(resources)
+          .withDataset(dataset)
+          .withIndexParams(indexParams)
+          .build();
+
+      // Saving the index on to the disk.
+      String indexFileName = UUID.randomUUID().toString() + ".cag";
+      index.serialize(new FileOutputStream(indexFileName));
+
+      // Loading a CAGRA index from disk.
+      File indexFile = new File(indexFileName);
+      InputStream inputStream = new FileInputStream(indexFile);
+      CagraIndex loadedIndex = new CagraIndex.Builder(resources)
+          .from(inputStream)
+          .build();
+
+      // Configure search parameters
+      CagraSearchParams searchParams = new CagraSearchParams.Builder(resources)
+          .build();
+
+      // Create a query object with the query vectors
+      CagraQuery cuvsQuery = new CagraQuery.Builder()
+          .withTopK(3)
+          .withSearchParams(searchParams)
+          .withQueryVectors(queries)
+          .build();
+
+      // Perform the search
+      SearchResults results = index.search(cuvsQuery);
+
+      // Check results
+      log.info(results.getResults().toString());
+
+      // Search from deserialized index
+      results = loadedIndex.search(cuvsQuery);
+
+      // Check results
+      log.info(results.getResults().toString());
+
+      // Cleanup
+      if (indexFile.exists()) {
+        indexFile.delete();
+      }
+      index.destroyIndex();
+    }
+  }
+}
diff --git a/java/examples/src/main/java/com/nvidia/cuvs/examples/HnswExample.java b/java/examples/src/main/java/com/nvidia/cuvs/examples/HnswExample.java
new file mode 100644
index 000000000..0f13be3bc
--- /dev/null
+++ b/java/examples/src/main/java/com/nvidia/cuvs/examples/HnswExample.java
@@ -0,0 +1,99 @@
+package com.nvidia.cuvs.examples;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.InputStream;
+import java.lang.invoke.MethodHandles;
+import java.util.UUID;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.nvidia.cuvs.CagraIndex;
+import com.nvidia.cuvs.CagraIndexParams;
+import com.nvidia.cuvs.CagraIndexParams.CagraGraphBuildAlgo;
+import com.nvidia.cuvs.CagraIndexParams.CuvsDistanceType;
+import com.nvidia.cuvs.CuVSResources;
+import com.nvidia.cuvs.HnswIndex;
+import com.nvidia.cuvs.HnswIndexParams;
+import com.nvidia.cuvs.HnswQuery;
+import com.nvidia.cuvs.HnswSearchParams;
+import com.nvidia.cuvs.HnswSearchResults;
+
+public class HnswExample {
+
+  private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+
+  public static void main(String[] args) throws Throwable {
+
+    // Sample data and query
+    float[][] dataset = {
+        { 0.74021935f, 0.9209938f },
+        { 0.03902049f, 0.9689629f },
+        { 0.92514056f, 0.4463501f },
+        { 0.6673192f, 0.10993068f }
+       };
+
+    float[][] queries = {
+        { 0.48216683f, 0.0428398f },
+        { 0.5084142f, 0.6545497f },
+        { 0.51260436f, 0.2643005f },
+        { 0.05198065f, 0.5789965f }
+       };
+
+    try (CuVSResources resources = new CuVSResources()) {
+
+      // Configure index parameters
+      CagraIndexParams indexParams = new CagraIndexParams.Builder(resources)
+          .withCagraGraphBuildAlgo(CagraGraphBuildAlgo.IVF_PQ)
+          .withGraphDegree(64)
+          .withIntermediateGraphDegree(128)
+          .withNumWriterThreads(32)
+          .withMetric(CuvsDistanceType.L2Expanded)
+          .build();
+
+      // Create the index with the dataset
+      CagraIndex index = new CagraIndex.Builder(resources)
+          .withDataset(dataset)
+          .withIndexParams(indexParams)
+          .build();
+
+      // Saving the HNSW index on to the disk.
+      String hnswIndexFileName = UUID.randomUUID().toString() + ".hnsw";
+      index.serializeToHNSW(new FileOutputStream(hnswIndexFileName));
+
+      HnswIndexParams hnswIndexParams = new HnswIndexParams.Builder(resources)
+          .withVectorDimension(2)
+          .build();
+      InputStream inputStreamHNSW = new FileInputStream(hnswIndexFileName);
+      File hnswIndexFile = new File(hnswIndexFileName);
+
+      HnswIndex hnswIndex = new HnswIndex.Builder(resources)
+          .from(inputStreamHNSW)
+          .withIndexParams(hnswIndexParams)
+          .build();
+
+      HnswSearchParams hnswSearchParams = new HnswSearchParams.Builder(resources)
+          .build();
+
+      HnswQuery hnswQuery = new HnswQuery.Builder()
+          .withQueryVectors(queries)
+          .withSearchParams(hnswSearchParams)
+          .withTopK(3)
+          .build();
+
+      HnswSearchResults results = hnswIndex.search(hnswQuery);
+
+      // Check results
+      log.info(results.getResults().toString());
+
+      // Cleanup
+      if (hnswIndexFile.exists()) {
+        hnswIndexFile.delete();
+      }
+      index.destroyIndex();
+      hnswIndex.destroyIndex();
+    }
+  }
+}
diff --git a/java/examples/src/main/resources/log4j2.xml b/java/examples/src/main/resources/log4j2.xml
new file mode 100644
index 000000000..bf0eb598c
--- /dev/null
+++ b/java/examples/src/main/resources/log4j2.xml
@@ -0,0 +1,14 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<Configuration status="INFO">
+    <Appenders>
+        <Console name="Console" target="SYSTEM_OUT">
+            <PatternLayout pattern="%d{HH:mm:ss.SSS} [%t] %-5level %logger{36} - %msg%n"/>
+        </Console>
+    </Appenders>
+
+    <Loggers>
+        <Root level="debug">
+            <AppenderRef ref="Console"/>
+        </Root>
+    </Loggers>
+</Configuration>
diff --git a/java/internal/CMakeLists.txt b/java/internal/CMakeLists.txt
new file mode 100644
index 000000000..3bd316e2d
--- /dev/null
+++ b/java/internal/CMakeLists.txt
@@ -0,0 +1,62 @@
+# =============================================================================
+# Copyright (c) 2025, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under the License
+# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+# or implied. See the License for the specific language governing permissions and limitations under
+# the License.
+# =============================================================================
+
+cmake_minimum_required(VERSION 3.26.4 FATAL_ERROR)
+
+include(rapids_config.cmake)
+include(rapids-cmake)
+include(rapids-cpm)
+include(rapids-export)
+include(rapids-find)
+rapids_cpm_init()
+
+project(
+  cuvs-java
+  VERSION "${RAPIDS_VERSION}"
+  LANGUAGES CXX C
+)
+
+find_package(OpenMP)
+find_package(Threads)
+
+option(FIND_CUVS_CPP "Search for existing CUVS C++ installations before defaulting to local files"
+       ON
+)
+
+if(FIND_CUVS_CPP)
+  find_package(cuvs "${RAPIDS_VERSION}" REQUIRED COMPONENTS c_api)
+  if(NOT TARGET cuvs::c_api)
+    message(
+      FATAL_ERROR
+        "Building against a preexisting libcuvs library requires the compiled libcuvs to have been built!"
+    )
+  endif()
+else()
+  set(cuvs_FOUND OFF)
+endif()
+
+if(NOT cuvs_FOUND)
+  set(BUILD_TESTS OFF)
+  set(BUILD_C_LIBRARY ON)
+  add_subdirectory(../../cpp cuvs-cpp EXCLUDE_FROM_ALL)
+endif()
+
+include(get_dlpack.cmake)
+
+ADD_LIBRARY(cuvs_java SHARED src/cuvs_java.c)
+target_include_directories(cuvs_java PUBLIC "$<BUILD_INTERFACE:${DLPACK_INCLUDE_DIR}>")
+target_link_libraries(
+  cuvs_java PRIVATE cuvs::c_api $<TARGET_NAME_IF_EXISTS:conda_env> OpenMP::OpenMP_CXX
+                    Threads::Threads
+)
diff --git a/java/internal/VERSION b/java/internal/VERSION
new file mode 120000
index 000000000..558194c5a
--- /dev/null
+++ b/java/internal/VERSION
@@ -0,0 +1 @@
+../../VERSION
\ No newline at end of file
diff --git a/java/internal/get_dlpack.cmake b/java/internal/get_dlpack.cmake
new file mode 120000
index 000000000..93a54ca4d
--- /dev/null
+++ b/java/internal/get_dlpack.cmake
@@ -0,0 +1 @@
+../../cpp/cmake/thirdparty/get_dlpack.cmake
\ No newline at end of file
diff --git a/java/internal/rapids_config.cmake b/java/internal/rapids_config.cmake
new file mode 120000
index 000000000..398eea52d
--- /dev/null
+++ b/java/internal/rapids_config.cmake
@@ -0,0 +1 @@
+../../rapids_config.cmake
\ No newline at end of file
diff --git a/java/internal/src/cuvs_java.c b/java/internal/src/cuvs_java.c
new file mode 100644
index 000000000..febde8463
--- /dev/null
+++ b/java/internal/src/cuvs_java.c
@@ -0,0 +1,464 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cuvs/core/c_api.h>
+#include <cuvs/neighbors/cagra.h>
+#include <cuvs/neighbors/brute_force.h>
+#include <cuvs/neighbors/hnsw.h>
+#include <dlpack/dlpack.h>
+#include <cuda_runtime.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <omp.h>
+#include <string.h>
+
+#define try bool __HadError=false;
+#define catch(x) ExitJmp:if(__HadError)
+#define throw(x) {__HadError=true;goto ExitJmp;}
+
+/**
+ * @brief Create an Initialized opaque C handle
+ *
+ * @param[out] return_value return value for cuvsResourcesCreate function call
+ * @return cuvsResources_t
+ */
+cuvsResources_t create_resources(int *return_value) {
+  cuvsResources_t cuvs_resources;
+  *return_value = cuvsResourcesCreate(&cuvs_resources);
+  return cuvs_resources;
+}
+
+/**
+ * @brief Destroy and de-allocate opaque C handle
+ *
+ * @param[in] cuvs_resources an opaque C handle
+ * @param[out] return_value return value for cuvsResourcesDestroy function call
+ */
+void destroy_resources(cuvsResources_t cuvs_resources, int *return_value) {
+  *return_value = cuvsResourcesDestroy(cuvs_resources);
+}
+
+/**
+ * @brief Helper function for creating DLManagedTensor instance
+ *
+ * @param[in] data the data pointer points to the allocated data
+ * @param[in] shape the shape of the tensor
+ * @param[in] code the type code of base types
+ * @param[in] bits the shape of the tensor
+ * @param[in] ndim the number of dimensions
+ * @return DLManagedTensor
+ */
+DLManagedTensor prepare_tensor(void *data, int64_t shape[], DLDataTypeCode code, int bits, int ndim, DLDeviceType device_type) {
+  DLManagedTensor tensor;
+
+  tensor.dl_tensor.data = data;
+  tensor.dl_tensor.device.device_type = device_type; //kDLCUDA;
+  tensor.dl_tensor.ndim = ndim;
+  tensor.dl_tensor.dtype.code = code;
+  tensor.dl_tensor.dtype.bits = bits;
+  tensor.dl_tensor.dtype.lanes = 1;
+  tensor.dl_tensor.shape = shape;
+  tensor.dl_tensor.strides = NULL;
+
+  return tensor;
+}
+
+/**
+ * @brief Function for building CAGRA index
+ *
+ * @param[in] dataset index dataset
+ * @param[in] rows number of dataset rows
+ * @param[in] dimensions vector dimension of the dataset
+ * @param[in] cuvs_resources reference of the underlying opaque C handle
+ * @param[out] return_value return value for cuvsCagraBuild function call
+ * @param[in] index_params a reference to the index parameters
+ * @param[in] compression_params a reference to the compression parameters
+ * @param[in] n_writer_threads number of omp threads to use
+ * @return cuvsCagraIndex_t
+ */
+cuvsCagraIndex_t build_cagra_index(float *dataset, long rows, long dimensions, cuvsResources_t cuvs_resources, int *return_value,
+    cuvsCagraIndexParams_t index_params, cuvsCagraCompressionParams_t compression_params, int n_writer_threads) {
+
+  cudaStream_t stream;
+  cuvsStreamGet(cuvs_resources, &stream);
+
+  omp_set_num_threads(n_writer_threads);
+  cuvsRMMPoolMemoryResourceEnable(95, 95, false);
+
+  int64_t dataset_shape[2] = {rows, dimensions};
+  DLManagedTensor dataset_tensor = prepare_tensor(dataset, dataset_shape, kDLFloat, 32, 2, kDLCUDA);
+
+  cuvsCagraIndex_t index;
+  cuvsCagraIndexCreate(&index);
+
+  index_params->compression = compression_params;
+  cuvsStreamSync(cuvs_resources);
+  *return_value = cuvsCagraBuild(cuvs_resources, index_params, &dataset_tensor, index);
+
+  omp_set_num_threads(1);
+
+  return index;
+}
+
+/**
+ * @brief A function to de-allocate CAGRA index
+ *
+ * @param[in] index cuvsCagraIndex_t to de-allocate
+ * @param[out] return_value return value for cuvsCagraIndexDestroy function call
+ */
+void destroy_cagra_index(cuvsCagraIndex_t index, int *return_value) {
+  *return_value = cuvsCagraIndexDestroy(index);
+}
+
+/**
+ * @brief A function to serialize a CAGRA index
+ *
+ * @param[in] cuvs_resources reference of the underlying opaque C handle
+ * @param[in] index cuvsCagraIndex_t reference
+ * @param[out] return_value return value for cuvsCagraSerialize function call
+ * @param[in] filename the filename of the index file
+ */
+void serialize_cagra_index(cuvsResources_t cuvs_resources, cuvsCagraIndex_t index, int *return_value, char* filename) {
+  *return_value = cuvsCagraSerialize(cuvs_resources, filename, index, true);
+}
+
+/**
+ * @brief A function to de-serialize a CAGRA index
+ *
+ * @param[in] cuvs_resources reference to the underlying opaque C handle
+ * @param[in] index cuvsCagraIndex_t reference
+ * @param[out] return_value return value for cuvsCagraDeserialize function call
+ * @param[in] filename the filename of the index file
+ */
+void deserialize_cagra_index(cuvsResources_t cuvs_resources, cuvsCagraIndex_t index, int *return_value, char* filename) {
+  *return_value = cuvsCagraDeserialize(cuvs_resources, filename, index);
+}
+
+/**
+ * @brief A function to search a CAGRA index and return results
+ *
+ * @param[in] index reference to a CAGRA index to search on
+ * @param[in] queries query vectors
+ * @param[in] topk topK results to return
+ * @param[in] n_queries number of queries
+ * @param[in] dimensions vector dimension
+ * @param[in] cuvs_resources reference to the underlying opaque C handle
+ * @param[out] neighbors_h reference to the neighbor results on the host memory
+ * @param[out] distances_h reference to the distance results on the host memory
+ * @param[out] return_value return value for cuvsCagraSearch function call
+ * @param[in] search_params reference to cuvsCagraSearchParams_t holding the search parameters
+ */
+void search_cagra_index(cuvsCagraIndex_t index, float *queries, int topk, long n_queries, int dimensions,
+    cuvsResources_t cuvs_resources, int *neighbors_h, float *distances_h, int *return_value, cuvsCagraSearchParams_t search_params) {
+
+  cudaStream_t stream;
+  cuvsStreamGet(cuvs_resources, &stream);
+
+  uint32_t *neighbors;
+  float *distances, *queries_d;
+  cuvsRMMAlloc(cuvs_resources, (void**) &queries_d, sizeof(float) * n_queries * dimensions);
+  cuvsRMMAlloc(cuvs_resources, (void**) &neighbors, sizeof(uint32_t) * n_queries * topk);
+  cuvsRMMAlloc(cuvs_resources, (void**) &distances, sizeof(float) * n_queries * topk);
+
+  cudaMemcpy(queries_d, queries, sizeof(float) * n_queries * dimensions, cudaMemcpyDefault);
+
+  int64_t queries_shape[2] = {n_queries, dimensions};
+  DLManagedTensor queries_tensor = prepare_tensor(queries_d, queries_shape, kDLFloat, 32, 2, kDLCUDA);
+
+  int64_t neighbors_shape[2] = {n_queries, topk};
+  DLManagedTensor neighbors_tensor = prepare_tensor(neighbors, neighbors_shape, kDLUInt, 32, 2, kDLCUDA);
+
+  int64_t distances_shape[2] = {n_queries, topk};
+  DLManagedTensor distances_tensor = prepare_tensor(distances, distances_shape, kDLFloat, 32, 2, kDLCUDA);
+
+  cuvsStreamSync(cuvs_resources);
+
+  cuvsFilter filter; // TODO: Implement Cagra Pre-Filtering, but leave it as no-op for now
+  filter.type = NO_FILTER;
+  filter.addr = (uintptr_t)NULL;
+
+  *return_value = cuvsCagraSearch(cuvs_resources, search_params, index, &queries_tensor, &neighbors_tensor,
+                  &distances_tensor, filter);
+
+  cudaMemcpy(neighbors_h, neighbors, sizeof(uint32_t) * n_queries * topk, cudaMemcpyDefault);
+  cudaMemcpy(distances_h, distances, sizeof(float) * n_queries * topk, cudaMemcpyDefault);
+
+  cuvsRMMFree(cuvs_resources, distances, sizeof(float) * n_queries * topk);
+  cuvsRMMFree(cuvs_resources, neighbors, sizeof(uint32_t) * n_queries * topk);
+  cuvsRMMFree(cuvs_resources, queries_d, sizeof(float) * n_queries * dimensions);
+}
+
+/**
+ * @brief De-allocate BRUTEFORCE index
+ *
+ * @param[in] index reference to BRUTEFORCE index
+ * @param[out] return_value return value for cuvsBruteForceIndexDestroy function call
+ */
+void destroy_brute_force_index(cuvsBruteForceIndex_t index, int *return_value) {
+  *return_value = cuvsBruteForceIndexDestroy(index);
+}
+
+/**
+ * @brief A function to build BRUTEFORCE index
+ *
+ * @param[in] dataset the dataset to be indexed
+ * @param[in] rows the number of rows in the dataset
+ * @param[in] dimensions the vector dimension
+ * @param[in] cuvs_resources reference to the underlying opaque C handle
+ * @param[out] return_value return value for cuvsBruteForceBuild function call
+ * @param[in] n_writer_threads number of threads to use while indexing
+ * @return cuvsBruteForceIndex_t
+ */
+cuvsBruteForceIndex_t build_brute_force_index(float *dataset, long rows, long dimensions, cuvsResources_t cuvs_resources,
+  int *return_value, int n_writer_threads) {
+
+  omp_set_num_threads(n_writer_threads);
+  cuvsRMMPoolMemoryResourceEnable(95, 95, false);
+
+  cudaStream_t stream;
+  cuvsStreamGet(cuvs_resources, &stream);
+
+  float *dataset_d;
+  cuvsRMMAlloc(cuvs_resources, (void**) &dataset_d, sizeof(float) * rows * dimensions);
+  cudaMemcpy(dataset_d, dataset, sizeof(float) * rows * dimensions, cudaMemcpyDefault);
+
+  int64_t dataset_shape[2] = {rows, dimensions};
+  DLManagedTensor dataset_tensor = prepare_tensor(dataset_d, dataset_shape, kDLFloat, 32, 2, kDLCUDA);
+
+  cuvsBruteForceIndex_t index;
+  cuvsError_t index_create_status = cuvsBruteForceIndexCreate(&index);
+
+  cuvsStreamSync(cuvs_resources);
+  *return_value = cuvsBruteForceBuild(cuvs_resources, &dataset_tensor, L2Expanded, 0.0f, index);
+
+  omp_set_num_threads(1);
+
+  return index;
+}
+
+/**
+ * @brief A function to search the BRUTEFORCE index
+ *
+ * @param[in] index reference to a BRUTEFORCE index to search on
+ * @param[in] queries reference to query vectors
+ * @param[in] topk the top k results to return
+ * @param[in] n_queries number of queries
+ * @param[in] dimensions vector dimension
+ * @param[in] cuvs_resources reference to the underlying opaque C handle
+ * @param[out] neighbors_h reference to the neighbor results on the host memory
+ * @param[out] distances_h reference to the distance results on the host memory
+ * @param[out] return_value return value for cuvsBruteForceSearch function call
+ * @param[in] prefilter_data cuvsFilter input prefilter that can be used to filter queries and neighbors based on the given bitmap
+ * @param[in] prefilter_data_length prefilter length input
+ * @param[in] n_rows number of rows in the dataset
+ */
+void search_brute_force_index(cuvsBruteForceIndex_t index, float *queries, int topk, long n_queries, int dimensions,
+    cuvsResources_t cuvs_resources, int64_t *neighbors_h, float *distances_h, int *return_value, long *prefilter_data,
+    long prefilter_data_length, long n_rows) {
+
+  cudaStream_t stream;
+  cuvsStreamGet(cuvs_resources, &stream);
+
+  int64_t *neighbors;
+  float *distances, *queries_d;
+  long *prefilter_data_d;
+
+  long prefilter_data_32_size = sizeof(uint32_t) * prefilter_data_length * 2;
+  uint32_t *prefilter_data_32 = (uint32_t *)malloc(prefilter_data_32_size);
+
+  cuvsRMMAlloc(cuvs_resources, (void**) &queries_d, sizeof(float) * n_queries * dimensions);
+  cuvsRMMAlloc(cuvs_resources, (void**) &neighbors, sizeof(int64_t) * n_queries * topk);
+  cuvsRMMAlloc(cuvs_resources, (void**) &distances, sizeof(float) * n_queries * topk);
+  cuvsRMMAlloc(cuvs_resources, (void**) &prefilter_data_d, prefilter_data_32_size);
+
+  cudaMemcpy(queries_d, queries, sizeof(float) * n_queries * dimensions, cudaMemcpyDefault);
+  cudaMemcpy(prefilter_data_d, prefilter_data_32, prefilter_data_32_size, cudaMemcpyDefault);
+
+  int64_t queries_shape[2] = {n_queries, dimensions};
+  DLManagedTensor queries_tensor = prepare_tensor(queries_d, queries_shape, kDLFloat, 32, 2, kDLCUDA);
+
+  int64_t neighbors_shape[2] = {n_queries, topk};
+  DLManagedTensor neighbors_tensor = prepare_tensor(neighbors, neighbors_shape, kDLInt, 64, 2, kDLCUDA);
+
+  int64_t distances_shape[2] = {n_queries, topk};
+  DLManagedTensor distances_tensor = prepare_tensor(distances, distances_shape, kDLFloat, 32, 2, kDLCUDA);
+
+  // unpack the incoming long into two 32bit ints
+  for (long i = 0; i < prefilter_data_length; i++) {
+    *(prefilter_data_32 + (2 * i)) = (int)(*(prefilter_data + i) >> 32);
+    *(prefilter_data_32 + ((2 * i) + 1)) = (int)*(prefilter_data + i);
+    //long l = (((long)*(prefilter_data_32 + (2 * i))) << 32) | (*(prefilter_data_32 + ((2 * i) + 1)) & 0xffffffffL);
+  }
+
+  cuvsFilter prefilter;
+  if (prefilter_data == NULL) {
+    prefilter.type = NO_FILTER;
+    prefilter.addr = (uintptr_t)NULL;
+  } else {
+    int64_t prefilter_shape[1] = {(n_queries * n_rows + 31) / 32};
+    DLManagedTensor prefilter_tensor = prepare_tensor(prefilter_data_d, prefilter_shape, kDLUInt, 32, 1, kDLCUDA);
+    prefilter.type = BITMAP;
+    prefilter.addr = (uintptr_t)&prefilter_tensor;
+  }
+
+  cuvsStreamSync(cuvs_resources);
+  *return_value = cuvsBruteForceSearch(cuvs_resources, index, &queries_tensor, &neighbors_tensor, &distances_tensor, prefilter);
+
+  cudaMemcpy(neighbors_h, neighbors, sizeof(int64_t) * n_queries * topk, cudaMemcpyDefault);
+  cudaMemcpy(distances_h, distances, sizeof(float) * n_queries * topk, cudaMemcpyDefault);
+
+  cuvsRMMFree(cuvs_resources, neighbors, sizeof(int64_t) * n_queries * topk);
+  cuvsRMMFree(cuvs_resources, distances, sizeof(float) * n_queries * topk);
+  cuvsRMMFree(cuvs_resources, queries_d, sizeof(float) * n_queries * dimensions);
+}
+
+/**
+ * @brief A function to serialize a BRUTEFORCE index
+ *
+ * @param[in] cuvs_resources reference of the underlying opaque C handle
+ * @param[in] index cuvsBruteForceIndex_t reference
+ * @param[out] return_value return value for cuvsBruteForceSerialize function call
+ * @param[in] filename the filename of the index file
+ */
+void serialize_brute_force_index(cuvsResources_t cuvs_resources, cuvsBruteForceIndex_t index, int *return_value, char* filename) {
+  *return_value = cuvsBruteForceSerialize(cuvs_resources, filename, index);
+}
+
+/**
+ * @brief A function to de-serialize a BRUTEFORCE index
+ *
+ * @param[in] cuvs_resources reference to the underlying opaque C handle
+ * @param[in] index cuvsBruteForceIndex_t reference
+ * @param[out] return_value return value for cuvsBruteForceDeserialize function call
+ * @param[in] filename the filename of the index file
+ */
+void deserialize_brute_force_index(cuvsResources_t cuvs_resources, cuvsBruteForceIndex_t index, int *return_value, char* filename) {
+  *return_value = cuvsBruteForceDeserialize(cuvs_resources, filename, index);
+}
+
+/**
+ * @brief A function to create and serialize an HNSW index from CAGRA index
+ *
+ * @param[in] cuvs_resources reference to the underlying opaque C handle
+ * @param[in] file_path the path to the file of the created HNSW index
+ * @param[in] index cuvsCagraIndex_t reference to the existing CAGRA index
+ * @param[out] return_value return value for cuvsCagraSerializeToHnswlib function call
+ */
+void serialize_cagra_index_to_hnsw(cuvsResources_t cuvs_resources, char *file_path, cuvsCagraIndex_t index, int *return_value) {
+  *return_value = cuvsCagraSerializeToHnswlib(cuvs_resources, file_path, index);
+}
+
+/**
+ * @brief A function to deserialize the persisted HNSW index
+ *
+ * @param[in] cuvs_resources reference to the underlying opaque C handle
+ * @param[in] file_path the path to the persisted HNSW index file
+ * @param[in] hnsw_params reference to the HNSW index params
+ * @param[out] return_value return value for cuvsHnswDeserialize function call
+ * @param[in] vector_dimension the dimension of the vectors in the HNSW index
+ * @returns cuvsHnswIndex_t reference to the created HNSW index
+ */
+cuvsHnswIndex_t deserialize_hnsw_index(cuvsResources_t cuvs_resources, char *file_path,
+  cuvsHnswIndexParams_t hnsw_params, int *return_value, int vector_dimension) {
+  cuvsHnswIndex_t hnsw_index;
+  cuvsError_t rv = cuvsHnswIndexCreate(&hnsw_index);
+  hnsw_index->dtype.bits = 32;
+  hnsw_index->dtype.code = kDLFloat;
+  hnsw_index->dtype.lanes = 1;
+  *return_value = cuvsHnswDeserialize(cuvs_resources, hnsw_params, file_path, vector_dimension, L2Expanded, hnsw_index);
+  return hnsw_index;
+}
+
+/**
+ * @brief A Function to search in the HNSW index
+ *
+ * @param[in] cuvs_resources reference to the underlying opaque C handle
+ * @param[in] hnsw_index the HNSW index reference
+ * @param[in] search_params reference to the HNSW search parameters
+ * @param[out] return_value return value for cuvsHnswSearch function call
+ * @param[out] neighbors_h result container on host holding the neighbor ids
+ * @param[out] distances_h result container on host holding the distances
+ * @param[in] queries reference to the queries
+ * @param[in] topk the top k results to return
+ * @param[in] query_dimension the dimension of the query vectors
+ * @param[in] n_queries the number of queries passed to the function
+ */
+void search_hnsw_index(cuvsResources_t cuvs_resources, cuvsHnswIndex_t hnsw_index, cuvsHnswSearchParams_t search_params,
+  int *return_value, uint64_t *neighbors_h, float *distances_h, float *queries, int topk, int query_dimension, int n_queries) {
+
+  int64_t queries_shape[2] = {n_queries, query_dimension};
+  DLManagedTensor queries_tensor = prepare_tensor(queries, queries_shape, kDLFloat, 32, 2, kDLCPU);
+
+  int64_t neighbors_shape[2] = {n_queries, topk};
+  DLManagedTensor neighbors_tensor = prepare_tensor(neighbors_h, neighbors_shape, kDLUInt, 64, 2, kDLCPU);
+
+  int64_t distances_shape[2] = {n_queries, topk};
+  DLManagedTensor distances_tensor = prepare_tensor(distances_h, distances_shape, kDLFloat, 32, 2, kDLCPU);
+
+  *return_value = cuvsHnswSearch(
+    cuvs_resources, search_params, hnsw_index, &queries_tensor, &neighbors_tensor, &distances_tensor);
+}
+
+/**
+ * @brief A function to destroy the HNSW index
+ *
+ * @param[in] hnsw_index the HNSW index reference
+ * @param[out] return_value return value for cuvsHnswIndexDestroy function call
+ */
+void destroy_hnsw_index(cuvsHnswIndex_t hnsw_index, int *return_value) {
+  *return_value = cuvsHnswIndexDestroy(hnsw_index);
+}
+
+/**
+ * @brief struct for containing gpu information
+ */
+typedef struct gpuInfo {
+  int gpu_id;
+  char name[256];
+  long free_memory;
+  long total_memory;
+  float compute_capability;
+} gpuInfo;
+
+/**
+ * @brief A function to get GPU details
+ *
+ * @param[out] return_value return value for cudaMemGetInfo function call
+ * @param[out] num_gpus the number of devices found
+ * @param[out] gpu_info_arr reference to the array of gpuInfo objects
+ */
+void get_gpu_info(int *return_value, int *num_gpus, gpuInfo *gpu_info_arr) {
+  cudaGetDeviceCount(num_gpus);
+  // Limiting the num_gpus to 1024. For more details please see comments in Util.availableGPUs()
+  *num_gpus = (*num_gpus > 1024) ? 1024 : *num_gpus;
+  struct gpuInfo gpuInfos[*num_gpus];
+  size_t free, total;
+  // https://docs.nvidia.com/cuda/cuda-runtime-api/structcudaDeviceProp.html#structcudaDeviceProp
+  struct cudaDeviceProp deviceProp;
+  for (int i = 0; i < *num_gpus; i++) {
+    cudaSetDevice(i);
+    cudaGetDeviceProperties(&deviceProp, i);
+    char buffer[10];
+    sprintf(buffer, "%d.%d", deviceProp.major, deviceProp.minor);
+    *return_value = cudaMemGetInfo(&free, &total);
+    gpuInfos[i].gpu_id = i;
+    strcpy(gpuInfos[i].name, deviceProp.name);
+    gpuInfos[i].free_memory = free;
+    gpuInfos[i].total_memory = total;
+    gpuInfos[i].compute_capability = atof(buffer);
+    *(gpu_info_arr + i) = gpuInfos[i];
+  }
+}
diff --git a/python/cuvs/.coveragerc b/python/cuvs/.coveragerc
index fc087fb9c..3269e10b8 100644
--- a/python/cuvs/.coveragerc
+++ b/python/cuvs/.coveragerc
@@ -1,3 +1,3 @@
 # Configuration file for Python coverage tests
 [run]
-source = pylibraft
\ No newline at end of file
+source = pylibraft
diff --git a/python/cuvs/CMakeLists.txt b/python/cuvs/CMakeLists.txt
index f3feae9a7..465a2ba8a 100644
--- a/python/cuvs/CMakeLists.txt
+++ b/python/cuvs/CMakeLists.txt
@@ -62,3 +62,4 @@ target_link_libraries(cuvs_rmm_logger PRIVATE rmm::rmm_logger_impl)
 add_subdirectory(cuvs/common)
 add_subdirectory(cuvs/distance)
 add_subdirectory(cuvs/neighbors)
+add_subdirectory(cuvs/preprocessing)
diff --git a/python/cuvs/cuvs/neighbors/cagra/cagra.pxd b/python/cuvs/cuvs/neighbors/cagra/cagra.pxd
index a0f811480..fba7e3d1e 100644
--- a/python/cuvs/cuvs/neighbors/cagra/cagra.pxd
+++ b/python/cuvs/cuvs/neighbors/cagra/cagra.pxd
@@ -29,6 +29,7 @@ from libcpp cimport bool
 from cuvs.common.c_api cimport cuvsError_t, cuvsResources_t
 from cuvs.common.cydlpack cimport DLDataType, DLManagedTensor
 from cuvs.distance_type cimport cuvsDistanceType
+from cuvs.neighbors.filters.filters cimport cuvsFilter
 
 
 cdef extern from "cuvs/neighbors/cagra.h" nogil:
@@ -115,7 +116,8 @@ cdef extern from "cuvs/neighbors/cagra.h" nogil:
                                 cuvsCagraIndex_t index,
                                 DLManagedTensor* queries,
                                 DLManagedTensor* neighbors,
-                                DLManagedTensor* distances) except +
+                                DLManagedTensor* distances,
+                                cuvsFilter filter) except +
 
     cuvsError_t cuvsCagraSerialize(cuvsResources_t res,
                                    const char * filename,
diff --git a/python/cuvs/cuvs/neighbors/cagra/cagra.pyx b/python/cuvs/cuvs/neighbors/cagra/cagra.pyx
index fd55905cf..f62563f61 100644
--- a/python/cuvs/cuvs/neighbors/cagra/cagra.pyx
+++ b/python/cuvs/cuvs/neighbors/cagra/cagra.pyx
@@ -48,6 +48,7 @@ from libc.stdint cimport (
 )
 
 from cuvs.common.exceptions import check_cuvs
+from cuvs.neighbors.filters import no_filter
 
 
 cdef class CompressionParams:
@@ -484,7 +485,8 @@ def search(SearchParams search_params,
            k,
            neighbors=None,
            distances=None,
-           resources=None):
+           resources=None,
+           filter=None):
     """
     Find the k nearest neighbors for each query.
 
@@ -503,6 +505,9 @@ def search(SearchParams search_params,
     distances : Optional CUDA array interface compliant matrix shape
                 (n_queries, k) If supplied, the distances to the
                 neighbors will be written here in-place. (default None)
+    filter:     Optional cuvs.neighbors.cuvsFilter can be used to filter
+                neighbors based on a given bitset.
+        (default None)
     {resources_docstring}
 
     Examples
@@ -557,6 +562,9 @@ def search(SearchParams search_params,
     _check_input_array(distances_cai, [np.dtype('float32')],
                        exp_rows=n_queries, exp_cols=k)
 
+    if filter is None:
+        filter = no_filter()
+
     cdef cuvsCagraSearchParams* params = &search_params.params
     cdef cydlpack.DLManagedTensor* queries_dlpack = \
         cydlpack.dlpack_c(queries_cai)
@@ -573,7 +581,8 @@ def search(SearchParams search_params,
             index.index,
             queries_dlpack,
             neighbors_dlpack,
-            distances_dlpack
+            distances_dlpack,
+            filter.prefilter
         ))
 
     return (distances, neighbors)
diff --git a/python/cuvs/cuvs/neighbors/filters/__init__.py b/python/cuvs/cuvs/neighbors/filters/__init__.py
index 2ad118965..0ddf809c9 100644
--- a/python/cuvs/cuvs/neighbors/filters/__init__.py
+++ b/python/cuvs/cuvs/neighbors/filters/__init__.py
@@ -13,6 +13,6 @@
 # limitations under the License.
 
 
-from .filters import Prefilter, from_bitmap, no_filter
+from .filters import Prefilter, from_bitmap, from_bitset, no_filter
 
-__all__ = ["no_filter", "from_bitmap", "Prefilter"]
+__all__ = ["no_filter", "from_bitmap", "from_bitset", "Prefilter"]
diff --git a/python/cuvs/cuvs/neighbors/filters/filters.pyx b/python/cuvs/cuvs/neighbors/filters/filters.pyx
index 9bc2a905c..16042e966 100644
--- a/python/cuvs/cuvs/neighbors/filters/filters.pyx
+++ b/python/cuvs/cuvs/neighbors/filters/filters.pyx
@@ -22,7 +22,7 @@ from libc.stdint cimport uintptr_t
 from cuvs.common cimport cydlpack
 from cuvs.neighbors.common import _check_input_array
 
-from .filters cimport BITMAP, NO_FILTER, cuvsFilter
+from .filters cimport BITMAP, BITSET, NO_FILTER, cuvsFilter
 
 from pylibraft.common.cai_wrapper import wrap_array
 
@@ -95,3 +95,52 @@ def from_bitmap(bitmap):
     filter.addr = <uintptr_t> bitmap_dlpack
 
     return Prefilter(filter, parent=bitmap)
+
+
+def from_bitset(bitset):
+    """
+    Create a pre-filter from an array with type of uint32.
+
+    Parameters
+    ----------
+    bitset : numpy.ndarray
+        An array with type of `uint32` where each bit in the array
+        corresponds to if a sample is greenlit (not filtered) or filtered.
+        Each bit in a `uint32` element represents a different sample of
+        the dataset.
+
+        - Bit value of 1: The sample is greenlit (allowed).
+        - Bit value of 0: The sample pair is filtered.
+
+    Returns
+    -------
+    filter : cuvs.neighbors.filters.Prefilter
+        An instance of `Prefilter` that can be used to filter neighbors
+        based on the given bitset.
+    {resources_docstring}
+
+    Examples
+    --------
+
+    >>> import cupy as cp
+    >>> import numpy as np
+    >>> from cuvs.neighbors import filters
+    >>>
+    >>> n_samples = 50000
+    >>> n_queries = 1000
+    >>>
+    >>> n_bitset = np.ceil(n_samples / 32).astype(int)
+    >>> bitset = cp.random.randint(1, 100, size=(n_bitset,), dtype=cp.uint32)
+    >>> prefilter = filters.from_bitset(bitset)
+    """
+    bitset_cai = wrap_array(bitset)
+    _check_input_array(bitset_cai, [np.dtype('uint32')])
+
+    cdef cydlpack.DLManagedTensor* bitset_dlpack = \
+        cydlpack.dlpack_c(bitset_cai)
+
+    cdef cuvsFilter filter
+    filter.type = BITSET
+    filter.addr = <uintptr_t> bitset_dlpack
+
+    return Prefilter(filter, parent=bitset)
diff --git a/python/cuvs/cuvs/neighbors/hnsw/hnsw.pyx b/python/cuvs/cuvs/neighbors/hnsw/hnsw.pyx
index 4c44350e8..72a3617bd 100644
--- a/python/cuvs/cuvs/neighbors/hnsw/hnsw.pyx
+++ b/python/cuvs/cuvs/neighbors/hnsw/hnsw.pyx
@@ -52,12 +52,10 @@ cdef class IndexParams:
     ef_construction : int, default = 200 (optional)
         Maximum number of candidate list size used during construction
         when hierarchy is `cpu`.
-    num_threads : int, default = 2 (optional)
+    num_threads : int, default = 0 (optional)
         Number of CPU threads used to increase construction parallelism
-        when hierarchy is `cpu`.
-        NOTE: Constructing the hierarchy when converting from a CAGRA graph
-        is highly sensitive to parallelism, and increasing the number of
-        threads can reduce the quality of the index.
+        when hierarchy is `cpu`. When the value is 0, the number of threads is
+        automatically determined to the maximum number of threads available.
     """
 
     cdef cuvsHnswIndexParams* params
@@ -71,7 +69,7 @@ cdef class IndexParams:
     def __init__(self, *,
                  hierarchy="none",
                  ef_construction=200,
-                 num_threads=2):
+                 num_threads=0):
         if hierarchy == "none":
             self.params.hierarchy = cuvsHnswHierarchy.NONE
         elif hierarchy == "cpu":
diff --git a/python/cuvs/cuvs/preprocessing/CMakeLists.txt b/python/cuvs/cuvs/preprocessing/CMakeLists.txt
new file mode 100644
index 000000000..be31760f9
--- /dev/null
+++ b/python/cuvs/cuvs/preprocessing/CMakeLists.txt
@@ -0,0 +1,15 @@
+# =============================================================================
+# Copyright (c) 2025, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under the License
+# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+# or implied. See the License for the specific language governing permissions and limitations under
+# the License.
+# =============================================================================
+
+add_subdirectory(quantize/scalar)
diff --git a/python/cuvs/cuvs/test/__init__py b/python/cuvs/cuvs/preprocessing/__init__.py
similarity index 100%
rename from python/cuvs/cuvs/test/__init__py
rename to python/cuvs/cuvs/preprocessing/__init__.py
diff --git a/python/cuvs/cuvs/preprocessing/quantize/__init__.py b/python/cuvs/cuvs/preprocessing/quantize/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/python/cuvs/cuvs/preprocessing/quantize/scalar/CMakeLists.txt b/python/cuvs/cuvs/preprocessing/quantize/scalar/CMakeLists.txt
new file mode 100644
index 000000000..dc55edbf0
--- /dev/null
+++ b/python/cuvs/cuvs/preprocessing/quantize/scalar/CMakeLists.txt
@@ -0,0 +1,28 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under the License
+# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+# or implied. See the License for the specific language governing permissions and limitations under
+# the License.
+# =============================================================================
+
+# Set the list of Cython files to build
+set(cython_sources scalar.pyx)
+set(linked_libraries cuvs::cuvs cuvs::c_api)
+
+# Build all of the Cython targets
+rapids_cython_create_modules(
+  CXX
+  SOURCE_FILES "${cython_sources}"
+  LINKED_LIBRARIES "${linked_libraries}" ASSOCIATED_TARGETS cuvs MODULE_PREFIX
+                   preprocessing_quantize_scalar_
+)
+
+foreach(tgt IN LISTS RAPIDS_CYTHON_CREATED_TARGETS)
+  target_link_libraries(${tgt} PRIVATE cuvs_rmm_logger)
+endforeach()
diff --git a/python/cuvs/cuvs/preprocessing/quantize/scalar/__init__.pxd b/python/cuvs/cuvs/preprocessing/quantize/scalar/__init__.pxd
new file mode 100644
index 000000000..e69de29bb
diff --git a/python/cuvs/cuvs/preprocessing/quantize/scalar/__init__.py b/python/cuvs/cuvs/preprocessing/quantize/scalar/__init__.py
new file mode 100644
index 000000000..f24510bab
--- /dev/null
+++ b/python/cuvs/cuvs/preprocessing/quantize/scalar/__init__.py
@@ -0,0 +1,21 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from .scalar import (
+    Quantizer,
+    QuantizerParams,
+    inverse_transform,
+    train,
+    transform,
+)
diff --git a/python/cuvs/cuvs/preprocessing/quantize/scalar/scalar.pxd b/python/cuvs/cuvs/preprocessing/quantize/scalar/scalar.pxd
new file mode 100644
index 000000000..f195c4b0d
--- /dev/null
+++ b/python/cuvs/cuvs/preprocessing/quantize/scalar/scalar.pxd
@@ -0,0 +1,59 @@
+#
+# Copyright (c) 2025, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# cython: language_level=3
+
+from cuvs.common.c_api cimport cuvsError_t, cuvsResources_t
+from cuvs.common.cydlpack cimport DLDataType, DLManagedTensor
+
+
+cdef extern from "cuvs/preprocessing/quantize/scalar.h" nogil:
+    ctypedef struct cuvsScalarQuantizerParams:
+        float quantile
+
+    ctypedef cuvsScalarQuantizerParams* cuvsScalarQuantizerParams_t
+
+    cuvsError_t cuvsScalarQuantizerParamsCreate(
+        cuvsScalarQuantizerParams_t* params)
+
+    cuvsError_t cuvsScalarQuantizerParamsDestroy(
+        cuvsScalarQuantizerParams_t params)
+
+    ctypedef struct cuvsScalarQuantizer:
+        double min_
+        double max_
+
+    ctypedef cuvsScalarQuantizer* cuvsScalarQuantizer_t
+
+    cuvsError_t cuvsScalarQuantizerCreate(
+        cuvsScalarQuantizer_t* quantizer)
+
+    cuvsError_t cuvsScalarQuantizerDestroy(
+        cuvsScalarQuantizer_t quantizer)
+
+    cuvsError_t cuvsScalarQuantizerTrain(cuvsResources_t res,
+                                         cuvsScalarQuantizerParams_t params,
+                                         DLManagedTensor* dataset,
+                                         cuvsScalarQuantizer_t quantizer)
+
+    cuvsError_t cuvsScalarQuantizerTransform(cuvsResources_t res,
+                                             cuvsScalarQuantizer_t quantizer,
+                                             DLManagedTensor* dataset,
+                                             DLManagedTensor* out)
+
+    cuvsError_t cuvsScalarQuantizerInverseTransform(cuvsResources_t res,
+                                                    cuvsScalarQuantizer_t q,
+                                                    DLManagedTensor* dataset,
+                                                    DLManagedTensor* out)
diff --git a/python/cuvs/cuvs/preprocessing/quantize/scalar/scalar.pyx b/python/cuvs/cuvs/preprocessing/quantize/scalar/scalar.pyx
new file mode 100644
index 000000000..c4cc53f25
--- /dev/null
+++ b/python/cuvs/cuvs/preprocessing/quantize/scalar/scalar.pyx
@@ -0,0 +1,236 @@
+#
+# Copyright (c) 2025, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# cython: language_level=3
+
+import numpy as np
+
+from cuvs.common cimport cydlpack
+
+from pylibraft.common import auto_convert_output, device_ndarray
+from pylibraft.common.cai_wrapper import wrap_array
+
+from cuvs.common.exceptions import check_cuvs
+from cuvs.common.resources import auto_sync_resources
+from cuvs.neighbors.common import _check_input_array
+
+
+cdef class QuantizerParams:
+    """
+    Parameters for scalar quantization
+
+    Parameters
+    ----------
+    quantile: float
+        specifies how many outliers at top & bottom will be ignored
+        needs to be within range of (0, 1]
+    """
+
+    cdef cuvsScalarQuantizerParams * params
+
+    def __cinit__(self):
+        check_cuvs(cuvsScalarQuantizerParamsCreate(&self.params))
+
+    def __dealloc__(self):
+        check_cuvs(cuvsScalarQuantizerParamsDestroy(self.params))
+
+    def __init__(self, *, quantile=None):
+        if quantile is not None:
+            self.params.quantile = quantile
+
+    @property
+    def quantile(self):
+        return self.params.quantile
+
+
+cdef class Quantizer:
+    """
+    Defines and stores scalar for quantisation upon training
+
+    The quantization is performed by a linear mapping of an interval in the
+    float data type to the full range of the quantized int type.
+    """
+    cdef cuvsScalarQuantizer * quantizer
+
+    def __cinit__(self):
+        check_cuvs(cuvsScalarQuantizerCreate(&self.quantizer))
+
+    def __dealloc__(self):
+        check_cuvs(cuvsScalarQuantizerDestroy(self.quantizer))
+
+    @property
+    def min(self):
+        return self.quantizer.min_
+
+    @property
+    def max(self):
+        return self.quantizer.max_
+
+    def __repr__(self):
+        return f"scalar.Quantizer(min={self.min}, max={self.max})"
+
+
+@auto_sync_resources
+def train(QuantizerParams params, dataset, resources=None):
+    """
+    Initializes a scalar quantizer to be used later for quantizing the dataset.
+
+    Parameters
+    ----------
+    params : QuantizerParams object
+    dataset : row major host or device dataset
+    {resources_docstring}
+
+    Returns
+    -------
+    quantizer: cuvs.preprocessing.quantize.scalar.Quantizer
+
+    Examples
+    --------
+
+    >>> import cupy as cp
+    >>> from cuvs.preprocessing.quantize import scalar
+    >>> n_samples = 50000
+    >>> n_features = 50
+    >>> dataset = cp.random.random_sample((n_samples, n_features),
+    ...                                   dtype=cp.float32)
+    >>> params = scalar.QuantizerParams(quantile=0.99)
+    >>> quantizer = scalar.train(params, dataset)
+    >>> transformed = scalar.transform(quantizer, dataset)
+    """
+    dataset_ai = wrap_array(dataset)
+
+    cdef cydlpack.DLManagedTensor* dataset_dlpack = \
+        cydlpack.dlpack_c(dataset_ai)
+
+    _check_input_array(dataset_ai, [np.dtype("float32"), np.dtype("float64")])
+
+    cdef cuvsResources_t res = <cuvsResources_t>resources.get_c_obj()
+    cdef Quantizer ret = Quantizer()
+
+    check_cuvs(cuvsScalarQuantizerTrain(res,
+                                        params.params,
+                                        dataset_dlpack,
+                                        ret.quantizer))
+
+    return ret
+
+
+@auto_sync_resources
+@auto_convert_output
+def transform(Quantizer quantizer, dataset, output=None, resources=None):
+    """
+    Applies quantization transform to given dataset
+
+    Parameters
+    ----------
+    quantizer : trained Quantizer object
+    dataset : row major host or device dataset to transform
+    output : optional preallocated output memory, on host or device memory
+    {resources_docstring}
+
+    Returns
+    -------
+    output : transformed dataset quantized into a int8
+
+    Examples
+    --------
+    >>> import cupy as cp
+    >>> from cuvs.preprocessing.quantize import scalar
+    >>> n_samples = 50000
+    >>> n_features = 50
+    >>> dataset = cp.random.random_sample((n_samples, n_features),
+    ...                                   dtype=cp.float32)
+    >>> params = scalar.QuantizerParams(quantile=0.99)
+    >>> quantizer = scalar.train(params, dataset)
+    >>> transformed = scalar.transform(quantizer, dataset)
+    """
+
+    dataset_ai = wrap_array(dataset)
+
+    _check_input_array(dataset_ai, [np.dtype("float32"), np.dtype("float64")])
+
+    if output is None:
+        on_device = hasattr(dataset, "__cuda_array_interface__")
+        ndarray = device_ndarray if on_device else np
+        output = ndarray.empty((dataset_ai.shape[0],
+                                dataset_ai.shape[1]), dtype="int8")
+
+    output_ai = wrap_array(output)
+    _check_input_array(output_ai, [np.dtype("int8")])
+
+    cdef cuvsResources_t res = <cuvsResources_t>resources.get_c_obj()
+
+    cdef cydlpack.DLManagedTensor* dataset_dlpack = \
+        cydlpack.dlpack_c(dataset_ai)
+    cdef cydlpack.DLManagedTensor* output_dlpack = \
+        cydlpack.dlpack_c(output_ai)
+
+    check_cuvs(cuvsScalarQuantizerTransform(res,
+                                            quantizer.quantizer,
+                                            dataset_dlpack,
+                                            output_dlpack))
+
+    return output
+
+
+@auto_sync_resources
+@auto_convert_output
+def inverse_transform(Quantizer quantizer, dataset, output=None,
+                      resources=None):
+    """
+    Perform inverse quantization step on previously quantized dataset
+
+    Note that depending on the chosen data types train dataset the conversion
+    is not lossless.
+
+    Parameters
+    ----------
+    quantizer : trained Quantizer object
+    dataset : row major host or device dataset to transform
+    output : optional preallocated output memory, on host or device
+    {resources_docstring}
+
+    Returns
+    -------
+    output : transformed dataset with scalar quantization reversed
+    """
+
+    dataset_ai = wrap_array(dataset)
+
+    _check_input_array(dataset_ai, [np.dtype("int8")])
+
+    if output is None:
+        on_device = hasattr(dataset, "__cuda_array_interface__")
+        ndarray = device_ndarray if on_device else np
+        output = ndarray.empty((dataset_ai.shape[0],
+                                dataset_ai.shape[1]), dtype="float32")
+
+    output_ai = wrap_array(output)
+    _check_input_array(output_ai, [np.dtype("float32"), np.dtype("float64")])
+
+    cdef cuvsResources_t res = <cuvsResources_t>resources.get_c_obj()
+
+    cdef cydlpack.DLManagedTensor* dataset_dlpack = \
+        cydlpack.dlpack_c(dataset_ai)
+    cdef cydlpack.DLManagedTensor* output_dlpack = \
+        cydlpack.dlpack_c(output_ai)
+
+    check_cuvs(cuvsScalarQuantizerInverseTransform(res,
+                                                   quantizer.quantizer,
+                                                   dataset_dlpack,
+                                                   output_dlpack))
+
+    return output
diff --git a/python/cuvs/cuvs/tests/__init__py b/python/cuvs/cuvs/tests/__init__py
new file mode 100644
index 000000000..e69de29bb
diff --git a/python/cuvs/cuvs/test/ann_utils.py b/python/cuvs/cuvs/tests/ann_utils.py
similarity index 100%
rename from python/cuvs/cuvs/test/ann_utils.py
rename to python/cuvs/cuvs/tests/ann_utils.py
diff --git a/python/cuvs/cuvs/test/conftest.py b/python/cuvs/cuvs/tests/conftest.py
similarity index 100%
rename from python/cuvs/cuvs/test/conftest.py
rename to python/cuvs/cuvs/tests/conftest.py
diff --git a/python/cuvs/cuvs/test/test_brute_force.py b/python/cuvs/cuvs/tests/test_brute_force.py
similarity index 98%
rename from python/cuvs/cuvs/test/test_brute_force.py
rename to python/cuvs/cuvs/tests/test_brute_force.py
index 0b37ad885..a234794f9 100644
--- a/python/cuvs/cuvs/test/test_brute_force.py
+++ b/python/cuvs/cuvs/tests/test_brute_force.py
@@ -134,7 +134,7 @@ def test_prefiltered_brute_force_knn(
     index = np.random.random_sample((n_index_rows, n_cols)).astype(dtype)
     queries = np.random.random_sample((n_query_rows, n_cols)).astype(dtype)
     bitmap = create_sparse_array(
-        (np.ceil(n_query_rows * n_index_rows / 32).astype(int)), sparsity
+        (np.ceil(n_query_rows * n_index_rows / 32).astype(np.uint32)), sparsity
     )
 
     is_min = metric != "inner_product"
diff --git a/python/cuvs/cuvs/test/test_cagra.py b/python/cuvs/cuvs/tests/test_cagra.py
similarity index 67%
rename from python/cuvs/cuvs/test/test_cagra.py
rename to python/cuvs/cuvs/tests/test_cagra.py
index d3b03a5d0..ab3ac4a37 100644
--- a/python/cuvs/cuvs/test/test_cagra.py
+++ b/python/cuvs/cuvs/tests/test_cagra.py
@@ -19,8 +19,8 @@
 from sklearn.neighbors import NearestNeighbors
 from sklearn.preprocessing import normalize
 
-from cuvs.neighbors import cagra
-from cuvs.test.ann_utils import calc_recall, generate_data
+from cuvs.neighbors import cagra, filters
+from cuvs.tests.ann_utils import calc_recall, generate_data
 
 
 def run_cagra_build_search_test(
@@ -139,6 +139,99 @@ def test_cagra_dataset_dtype_host_device(
     )
 
 
+def create_sparse_bitset(n_size, sparsity):
+    bits_per_uint32 = 32
+    num_bits = n_size
+    num_uint32s = (num_bits + bits_per_uint32 - 1) // bits_per_uint32
+    num_ones = int(num_bits * sparsity)
+
+    array = np.zeros(num_uint32s, dtype=np.uint32)
+    indices = np.random.choice(num_bits, num_ones, replace=False)
+
+    for index in indices:
+        i = index // bits_per_uint32
+        bit_position = index % bits_per_uint32
+        array[i] |= 1 << bit_position
+
+    return array
+
+
+@pytest.mark.parametrize("sparsity", [0.2, 0.5, 0.7, 1.0])
+def test_filtered_cagra(
+    sparsity,
+    n_rows=10000,
+    n_cols=10,
+    n_queries=10,
+    k=10,
+):
+    dataset = generate_data((n_rows, n_cols), np.float32)
+    queries = generate_data((n_queries, n_cols), np.float32)
+
+    bitset = create_sparse_bitset(n_rows, sparsity)
+
+    dataset_device = device_ndarray(dataset)
+    queries_device = device_ndarray(queries)
+    bitset_device = device_ndarray(bitset)
+
+    build_params = cagra.IndexParams()
+    index = cagra.build(build_params, dataset_device)
+
+    filter_ = filters.from_bitset(bitset_device)
+
+    out_idx = np.zeros((n_queries, k), dtype=np.uint32)
+    out_dist = np.zeros((n_queries, k), dtype=np.float32)
+    out_idx_device = device_ndarray(out_idx)
+    out_dist_device = device_ndarray(out_dist)
+
+    search_params = cagra.SearchParams()
+    ret_distances, ret_indices = cagra.search(
+        search_params,
+        index,
+        queries_device,
+        k,
+        neighbors=out_idx_device,
+        distances=out_dist_device,
+        filter=filter_,
+    )
+
+    # Convert bitset to bool array for validation
+    bitset_as_uint8 = bitset.view(np.uint8)
+    bool_filter = np.unpackbits(bitset_as_uint8)
+    bool_filter = bool_filter.reshape(-1, 4, 8)
+    bool_filter = np.flip(bool_filter, axis=2)
+    bool_filter = bool_filter.reshape(-1)[:n_rows]
+    bool_filter = np.logical_not(bool_filter)  # Flip so True means filtered
+
+    # Get filtered dataset for reference calculation
+    non_filtered_mask = ~bool_filter
+    filtered_dataset = dataset[non_filtered_mask]
+
+    nn_skl = NearestNeighbors(
+        n_neighbors=k, algorithm="brute", metric="euclidean"
+    )
+    nn_skl.fit(filtered_dataset)
+    skl_idx = nn_skl.kneighbors(queries, return_distance=False)
+
+    actual_indices = out_idx_device.copy_to_host()
+
+    filtered_idx_map = (
+        np.cumsum(~bool_filter) - 1
+    )  # -1 because cumsum starts at 1
+
+    # Map CAGRA indices to filtered space
+    mapped_actual_indices = np.take(
+        filtered_idx_map, actual_indices, mode="clip"
+    )
+
+    filtered_indices = np.where(bool_filter)[0]
+    for i in range(n_queries):
+        assert not np.intersect1d(filtered_indices, actual_indices[i]).size
+
+    recall = calc_recall(mapped_actual_indices, skl_idx)
+
+    assert recall > 0.7
+
+
 @pytest.mark.parametrize(
     "params",
     [
diff --git a/python/cuvs/cuvs/test/test_distance.py b/python/cuvs/cuvs/tests/test_distance.py
similarity index 100%
rename from python/cuvs/cuvs/test/test_distance.py
rename to python/cuvs/cuvs/tests/test_distance.py
diff --git a/python/cuvs/cuvs/test/test_doctests.py b/python/cuvs/cuvs/tests/test_doctests.py
similarity index 97%
rename from python/cuvs/cuvs/test/test_doctests.py
rename to python/cuvs/cuvs/tests/test_doctests.py
index 4407bda52..381a077fb 100644
--- a/python/cuvs/cuvs/test/test_doctests.py
+++ b/python/cuvs/cuvs/tests/test_doctests.py
@@ -23,6 +23,7 @@
 
 import cuvs.distance
 import cuvs.neighbors
+import cuvs.preprocessing.quantize.scalar
 
 # Code adapted from https://github.com/rapidsai/cudf/blob/branch-23.02/python/cudf/cudf/tests/test_doctests.py  # noqa
 
@@ -96,6 +97,7 @@ def _find_doctests_in_obj(obj, finder=None, criteria=None):
 DOC_STRINGS.extend(_find_doctests_in_obj(cuvs.neighbors.ivf_flat))
 DOC_STRINGS.extend(_find_doctests_in_obj(cuvs.common))
 DOC_STRINGS.extend(_find_doctests_in_obj(cuvs.distance))
+DOC_STRINGS.extend(_find_doctests_in_obj(cuvs.preprocessing.quantize.scalar))
 
 
 def _test_name_from_docstring(docstring):
diff --git a/python/cuvs/cuvs/test/test_hnsw.py b/python/cuvs/cuvs/tests/test_hnsw.py
similarity index 96%
rename from python/cuvs/cuvs/test/test_hnsw.py
rename to python/cuvs/cuvs/tests/test_hnsw.py
index 20f583ae8..23a0920ef 100644
--- a/python/cuvs/cuvs/test/test_hnsw.py
+++ b/python/cuvs/cuvs/tests/test_hnsw.py
@@ -19,7 +19,7 @@
 from sklearn.preprocessing import normalize
 
 from cuvs.neighbors import cagra, hnsw
-from cuvs.test.ann_utils import calc_recall, generate_data
+from cuvs.tests.ann_utils import calc_recall, generate_data
 
 
 def run_hnsw_build_search_test(
@@ -54,7 +54,7 @@ def run_hnsw_build_search_test(
 
     assert index.trained
 
-    hnsw_params = hnsw.IndexParams(hierarchy=hierarchy, num_threads=1)
+    hnsw_params = hnsw.IndexParams(hierarchy=hierarchy)
     hnsw_index = hnsw.from_cagra(hnsw_params, index)
 
     queries = generate_data((n_queries, n_cols), dtype)
@@ -135,7 +135,7 @@ def run_hnsw_extend_test(
 
     assert index.trained
 
-    hnsw_params = hnsw.IndexParams(hierarchy="cpu", num_threads=1)
+    hnsw_params = hnsw.IndexParams(hierarchy="cpu")
     hnsw_index = hnsw.from_cagra(hnsw_params, index)
     hnsw.extend(hnsw.ExtendParams(), hnsw_index, add_dataset)
 
@@ -158,7 +158,6 @@ def run_hnsw_extend_test(
     skl_dist, skl_idx = nn_skl.kneighbors(queries, return_distance=True)
 
     recall = calc_recall(out_idx, skl_idx)
-    print(recall)
     assert recall > 0.95
 
 
diff --git a/python/cuvs/cuvs/test/test_ivf_flat.py b/python/cuvs/cuvs/tests/test_ivf_flat.py
similarity index 98%
rename from python/cuvs/cuvs/test/test_ivf_flat.py
rename to python/cuvs/cuvs/tests/test_ivf_flat.py
index 9dd4097dc..c3ec0252a 100644
--- a/python/cuvs/cuvs/test/test_ivf_flat.py
+++ b/python/cuvs/cuvs/tests/test_ivf_flat.py
@@ -20,7 +20,7 @@
 from sklearn.preprocessing import normalize
 
 from cuvs.neighbors import ivf_flat
-from cuvs.test.ann_utils import calc_recall, generate_data
+from cuvs.tests.ann_utils import calc_recall, generate_data
 
 
 def run_ivf_flat_build_search_test(
diff --git a/python/cuvs/cuvs/test/test_ivf_pq.py b/python/cuvs/cuvs/tests/test_ivf_pq.py
similarity index 98%
rename from python/cuvs/cuvs/test/test_ivf_pq.py
rename to python/cuvs/cuvs/tests/test_ivf_pq.py
index dff65b18c..6661be7de 100644
--- a/python/cuvs/cuvs/test/test_ivf_pq.py
+++ b/python/cuvs/cuvs/tests/test_ivf_pq.py
@@ -20,7 +20,7 @@
 from sklearn.preprocessing import normalize
 
 from cuvs.neighbors import ivf_pq
-from cuvs.test.ann_utils import calc_recall, generate_data
+from cuvs.tests.ann_utils import calc_recall, generate_data
 
 
 def run_ivf_pq_build_search_test(
diff --git a/python/cuvs/cuvs/test/test_refine.py b/python/cuvs/cuvs/tests/test_refine.py
similarity index 100%
rename from python/cuvs/cuvs/test/test_refine.py
rename to python/cuvs/cuvs/tests/test_refine.py
diff --git a/python/cuvs/cuvs/tests/test_scalar_quantizer.py b/python/cuvs/cuvs/tests/test_scalar_quantizer.py
new file mode 100644
index 000000000..0dbcada85
--- /dev/null
+++ b/python/cuvs/cuvs/tests/test_scalar_quantizer.py
@@ -0,0 +1,53 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import numpy as np
+import pytest
+from pylibraft.common import device_ndarray
+
+from cuvs.preprocessing.quantize import scalar
+
+
+@pytest.mark.parametrize("n_rows", [50, 100])
+@pytest.mark.parametrize("n_cols", [10, 50])
+@pytest.mark.parametrize("inplace", [True, False])
+@pytest.mark.parametrize("device_memory", [True, False])
+@pytest.mark.parametrize("dtype", [np.float32, np.float64])
+def test_scalar_quantizer(n_rows, n_cols, inplace, device_memory, dtype):
+    input1 = np.random.random_sample((n_rows, n_cols)).astype(dtype)
+    output = np.zeros((n_rows, n_cols), dtype="int8") if inplace else None
+
+    input1_device = device_ndarray(input1)
+    output_device = device_ndarray(output) if inplace else None
+
+    params = scalar.QuantizerParams(quantile=0.99)
+    quantizer = scalar.train(
+        params, input1_device if device_memory else input1
+    )
+    transformed = scalar.transform(
+        quantizer,
+        input1_device if device_memory else input1,
+        output=output_device if device_memory else output,
+    )
+    if device_memory:
+        actual = transformed if not inplace else output_device
+        actual = actual.copy_to_host()
+    else:
+        actual = transformed if not inplace else output
+
+    # naive sq quantization
+    start, end = quantizer.min, quantizer.max
+    expected = np.int8(255 * (input1 - start) / (end - start) - 128)
+    assert np.allclose(expected, actual, atol=2, rtol=2)
diff --git a/python/cuvs/cuvs/test/test_serialization.py b/python/cuvs/cuvs/tests/test_serialization.py
similarity index 98%
rename from python/cuvs/cuvs/test/test_serialization.py
rename to python/cuvs/cuvs/tests/test_serialization.py
index 1f4a54e87..dafaed4c7 100644
--- a/python/cuvs/cuvs/test/test_serialization.py
+++ b/python/cuvs/cuvs/tests/test_serialization.py
@@ -18,7 +18,7 @@
 from pylibraft.common import device_ndarray
 
 from cuvs.neighbors import brute_force, cagra, ivf_flat, ivf_pq
-from cuvs.test.ann_utils import generate_data
+from cuvs.tests.ann_utils import generate_data
 
 
 @pytest.mark.parametrize("dtype", [np.float32, np.int8, np.ubyte])
diff --git a/python/cuvs/cuvs/test/test_version.py b/python/cuvs/cuvs/tests/test_version.py
similarity index 100%
rename from python/cuvs/cuvs/test/test_version.py
rename to python/cuvs/cuvs/tests/test_version.py
diff --git a/python/cuvs/pyproject.toml b/python/cuvs/pyproject.toml
index 91a8cde1a..2193f7dcb 100644
--- a/python/cuvs/pyproject.toml
+++ b/python/cuvs/pyproject.toml
@@ -32,6 +32,7 @@ license = { text = "Apache 2.0" }
 requires-python = ">=3.10"
 dependencies = [
     "cuda-python",
+    "libcuvs==25.4.*,>=0.0.0a0",
     "numpy>=1.23,<3.0a0",
     "pylibraft==25.4.*,>=0.0.0a0",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
diff --git a/python/cuvs_bench/cuvs_bench/config/algorithms.yaml b/python/cuvs_bench/cuvs_bench/config/algorithms.yaml
index 357517933..609a8378e 100644
--- a/python/cuvs_bench/cuvs_bench/config/algorithms.yaml
+++ b/python/cuvs_bench/cuvs_bench/config/algorithms.yaml
@@ -10,6 +10,9 @@ faiss_gpu_ivf_pq:
 faiss_gpu_ivf_sq:
   executable: FAISS_GPU_IVF_PQ_ANN_BENCH
   requires_gpu: true
+faiss_gpu_cagra:
+  executable: FAISS_GPU_CAGRA_ANN_BENCH
+  requires_gpu: true
 faiss_cpu_flat:
   executable: FAISS_CPU_FLAT_ANN_BENCH
   requires_gpu: false
diff --git a/python/cuvs_bench/cuvs_bench/config/algos/constraints/__init__.py b/python/cuvs_bench/cuvs_bench/config/algos/constraints/__init__.py
index de05bd752..71a105bc8 100644
--- a/python/cuvs_bench/cuvs_bench/config/algos/constraints/__init__.py
+++ b/python/cuvs_bench/cuvs_bench/config/algos/constraints/__init__.py
@@ -72,9 +72,7 @@ def faiss_gpu_ivf_pq_build(params, dims):
     ret = params["M"] <= dims and dims % params["M"] == 0
     if "use_cuvs" in params and params["use_cuvs"]:
         return ret
-    pq_bits = 8
-    if "bitsPerCode" in params:
-        pq_bits = params["bitsPerCode"]
+    pq_bits = params.get("bitsPerCode", 8)
     lookup_table_size = 4
     if "useFloat16" in params and params["useFloat16"]:
         lookup_table_size = 2
diff --git a/python/cuvs_bench/cuvs_bench/config/algos/cuvs_cagra_hnswlib.yaml b/python/cuvs_bench/cuvs_bench/config/algos/cuvs_cagra_hnswlib.yaml
index 90a561bca..630dc94ff 100644
--- a/python/cuvs_bench/cuvs_bench/config/algos/cuvs_cagra_hnswlib.yaml
+++ b/python/cuvs_bench/cuvs_bench/config/algos/cuvs_cagra_hnswlib.yaml
@@ -1,5 +1,6 @@
 name: cuvs_cagra_hnswlib
 constraints:
+  build: cuvs_bench.config.algos.constraints.cuvs_cagra_build
   search: cuvs_bench.config.algos.constraints.hnswlib_search
 groups:
   base:
@@ -9,6 +10,5 @@ groups:
       graph_build_algo: ["NN_DESCENT"]
       hierarchy: ["none", "cpu"]
       ef_construction: [64, 128, 256, 512]
-      num_threads: [2, 5, 10]
     search:
       ef: [10, 20, 40, 60, 80, 120, 200, 400, 600, 800]
diff --git a/python/cuvs_bench/cuvs_bench/config/algos/faiss_cpu_ivf_flat.yaml b/python/cuvs_bench/cuvs_bench/config/algos/faiss_cpu_ivf_flat.yaml
index 29c145f86..aa7409dce 100644
--- a/python/cuvs_bench/cuvs_bench/config/algos/faiss_cpu_ivf_flat.yaml
+++ b/python/cuvs_bench/cuvs_bench/config/algos/faiss_cpu_ivf_flat.yaml
@@ -7,4 +7,4 @@ groups:
       useFloat16: [False]
     search:
       nprobe: [1, 5, 10, 50, 100, 200]
-      refine_ratio: [1]
\ No newline at end of file
+      refine_ratio: [1]
diff --git a/python/cuvs_bench/cuvs_bench/config/algos/faiss_gpu_cagra.yaml b/python/cuvs_bench/cuvs_bench/config/algos/faiss_gpu_cagra.yaml
new file mode 100644
index 000000000..578885096
--- /dev/null
+++ b/python/cuvs_bench/cuvs_bench/config/algos/faiss_gpu_cagra.yaml
@@ -0,0 +1,13 @@
+name: faiss_gpu_cagra
+constraints:
+  build: cuvs_bench.config.algos.constraints.cuvs_cagra_build
+  search: cuvs_bench.config.algos.constraints.cuvs_cagra_search
+groups:
+  base:
+    build:
+      graph_degree: [32, 64, 96, 128]
+      intermediate_graph_degree: [32, 64, 96, 128]
+      graph_build_algo: ["NN_DESCENT"]
+    search:
+      itopk: [32, 64, 128, 256, 512]
+      search_width: [1, 2, 4, 8, 16, 32, 64]
diff --git a/python/cuvs_bench/cuvs_bench/config/algos/faiss_gpu_ivf_flat.yaml b/python/cuvs_bench/cuvs_bench/config/algos/faiss_gpu_ivf_flat.yaml
index e4abc35f5..20329e60d 100644
--- a/python/cuvs_bench/cuvs_bench/config/algos/faiss_gpu_ivf_flat.yaml
+++ b/python/cuvs_bench/cuvs_bench/config/algos/faiss_gpu_ivf_flat.yaml
@@ -2,20 +2,29 @@ name: faiss_gpu_ivf_flat
 groups:
   base:
     build:
-      nlist: [2048]
-      ratio: [10]
-      useFloat16: [False, True]
-      use_raft: [False]
+      nlist: [1024, 2048, 4096]
+      ratio: [4]
+      use_cuvs: [False]
     search:
       nprobe: [1, 5, 10, 50, 100, 200]
-      refine_ratio: [1]
-groups:
-  baseraft:
+  basecuvs:
     build:
-      nlist: [2048]
-      ratio: [10]
-      useFloat16: [False, True]
-      use_raft: [True]
+      nlist: [1024, 2048, 4096]
+      ratio: [4]
+      use_cuvs: [True]
     search:
       nprobe: [1, 5, 10, 50, 100, 200]
-      refine_ratio: [1]
\ No newline at end of file
+  large:
+    build:
+      nlist: [8192, 16384, 32768]
+      ratio: [4]
+      use_cuvs: [False]
+    search:
+      nprobe: [10, 20, 30, 50, 100, 200, 500, 1000]
+  largecuvs:
+    build:
+      nlist: [8192, 16384, 32768]
+      ratio: [4]
+      use_cuvs: [True]
+    search:
+      nprobe: [10, 20, 30, 50, 100, 200, 500, 1000]
diff --git a/python/cuvs_bench/cuvs_bench/config/algos/faiss_gpu_ivf_pq.yaml b/python/cuvs_bench/cuvs_bench/config/algos/faiss_gpu_ivf_pq.yaml
index 782f3aed1..d6cfe0569 100644
--- a/python/cuvs_bench/cuvs_bench/config/algos/faiss_gpu_ivf_pq.yaml
+++ b/python/cuvs_bench/cuvs_bench/config/algos/faiss_gpu_ivf_pq.yaml
@@ -5,73 +5,72 @@ constraints:
 groups:
   base:
     build:
-      nlist: [1024, 2048, 4096, 8192]
-      M: [64, 32, 16]
-      ratio: [10]
-      usePrecomputed: [False, True]
-      useFloat16: [False, True]
-      use_raft: [False]
+      nlist: [1024, 2048, 4096]
+      M: [96, 64]
+      ratio: [4]
+      usePrecomputed: [True]
+      useFloat16: [True]
+      use_cuvs: [False]
       bitsPerCode: [8]
     search:
-      nprobe: [1, 5, 10, 50, 100, 200]
+      nprobe: [10, 20, 50, 100, 200]
       refine_ratio: [1, 2, 4]
-  baseraft:
+  basecuvs:
     build:
-      nlist: [1024, 2048, 4096, 8192]
-      M: [64, 32, 16]
-      ratio: [10]
+      nlist: [1024, 2048, 4096]
+      M: [96, 64]
+      ratio: [4]
       usePrecomputed: [False]
-      useFloat16: [False, True]
-      use_raft: [True]
+      useFloat16: [False]
+      use_cuvs: [True]
       bitsPerCode: [8, 6, 5, 4]
     search:
-      nprobe: [1, 5, 10, 50, 100, 200]
+      nprobe: [10, 20, 50, 100, 200]
       refine_ratio: [1, 2, 4]
   large:
     build:
-      nlist: [8192, 16384, 32768, 65536]
-      M: [48, 32, 16]
+      nlist: [8192, 16384, 32768]
+      M: [96, 48, 32]
       ratio: [4]
-      usePrecomputed: [False, True]
+      usePrecomputed: [False]
       useFloat16: [False, True]
-      use_raft: [False]
+      use_cuvs: [False]
       bitsPerCode: [8]
     search:
-      nprobe: [20, 30, 40, 50, 100, 200, 500, 1000]
+      nprobe: [20, 30, 50, 100, 200, 500, 1000]
       refine_ratio: [1, 2, 4]
-  largeraft:
+  largecuvs:
     build:
-      nlist: [8192, 16384, 32768, 65536]
-      M: [48, 32, 16]
+      nlist: [8192, 16384, 32768]
+      M: [96, 48, 32]
       ratio: [4]
       usePrecomputed: [False]
-      useFloat16: [False, True]
-      use_raft: [True]
+      useFloat16: [False]
+      use_cuvs: [True]
       bitsPerCode: [8, 6, 5, 4]
     search:
-      nprobe: [20, 30, 40, 50, 100, 200, 500, 1000]
+      nprobe: [20, 30, 50, 100, 200, 500, 1000]
       refine_ratio: [1, 2, 4]
   100M:
     build:
-      nlist: [50000]
-      M: [48]
-      ratio: [10]
+      nlist: [100000]
+      M: [96, 64]
+      ratio: [4]
       usePrecomputed: [False, True]
-      useFloat16: [False, True]
-      use_raft: [False]
+      useFloat16: [True]
+      use_cuvs: [False]
       bitsPerCode: [8]
     search:
       nprobe: [20, 30, 40, 50, 100, 200, 500, 1000]
-      refine_ratio: [1]
-  100Mraft:
+      refine_ratio: [1, 2, 4]
+  100Mcuvs:
     build:
-      nlist: [50000]
-      M: [48]
-      ratio: [10]
-      usePrecomputed: [False, True]
+      nlist: [100000]
+      M: [96, 64]
+      ratio: [4]
       useFloat16: [False, True]
-      use_raft: [True]
+      use_cuvs: [True]
       bitsPerCode: [8, 6, 5, 4]
     search:
       nprobe: [20, 30, 40, 50, 100, 200, 500, 1000]
-      refine_ratio: [1]
+      refine_ratio: [1, 2, 4]
diff --git a/python/cuvs_bench/cuvs_bench/plot/__main__.py b/python/cuvs_bench/cuvs_bench/plot/__main__.py
index 93deb69c7..340738f89 100644
--- a/python/cuvs_bench/cuvs_bench/plot/__main__.py
+++ b/python/cuvs_bench/cuvs_bench/plot/__main__.py
@@ -17,7 +17,7 @@
 # 1: https://github.com/erikbern/ann-benchmarks/blob/main/plot.py
 # 2: https://github.com/erikbern/ann-benchmarks/blob/main/ann_benchmarks/plotting/utils.py  # noqa: E501
 # 3: https://github.com/erikbern/ann-benchmarks/blob/main/ann_benchmarks/plotting/metrics.py  # noqa: E501
-# License: https://github.com/rapidsai/cuvs/blob/branch-24.10/thirdparty/LICENSES/LICENSE.ann-benchmark # noqa: E501
+# License: https://github.com/rapidsai/cuvs/blob/branch-25.04/thirdparty/LICENSES/LICENSE.ann-benchmark # noqa: E501
 
 import itertools
 import os
diff --git a/python/cuvs_bench/cuvs_bench/run/__main__.py b/python/cuvs_bench/cuvs_bench/run/__main__.py
index 58fc5291b..3246eb991 100644
--- a/python/cuvs_bench/cuvs_bench/run/__main__.py
+++ b/python/cuvs_bench/cuvs_bench/run/__main__.py
@@ -150,16 +150,6 @@
     "were interrupted, use this option to convert those intermediate "
     "files manually.",
 )
-@click.option(
-    "--raft-log-level",
-    default="info",
-    show_default=True,
-    prompt="Enter the log level",
-    help="Log level, possible values are [off, error, warn, info, debug, "
-    "trace]. Default: 'info'. Note that 'debug' or more detailed "
-    "logging level requires that the library is compiled with "
-    "-DRAFT_ACTIVE_LEVEL=<L> where <L> >= <requested log level>.",
-)
 def main(
     subset_size: Optional[int],
     count: int,
@@ -178,7 +168,6 @@ def main(
     search_threads: Optional[str],
     dry_run: bool,
     data_export: bool,
-    raft_log_level: str,
 ) -> None:
     """
     Main function to run the benchmark with the provided options.
@@ -217,8 +206,6 @@ def main(
         The number of threads to use for throughput benchmark.
     dry_run : bool
         Whether to perform a dry run without actual execution.
-    raft_log_level : str
-        The logging level for the RAFT library.
 
     """
 
diff --git a/python/cuvs_bench/cuvs_bench/run/run.py b/python/cuvs_bench/cuvs_bench/run/run.py
index d7827a096..a16f01b94 100644
--- a/python/cuvs_bench/cuvs_bench/run/run.py
+++ b/python/cuvs_bench/cuvs_bench/run/run.py
@@ -586,7 +586,6 @@ def run_benchmark(
     search_threads: int,
     dry_run: bool,
     data_export: bool,
-    raft_log_level: int,
 ) -> None:
     """
     Runs a benchmarking process based on the provided configurations.
@@ -625,8 +624,6 @@ def run_benchmark(
         The number of threads to use for searching.
     dry_run : bool
         Whether to perform a dry run without actual execution.
-    raft_log_level : int
-        The logging level for the RAFT library.
 
     Returns
     -------
@@ -689,5 +686,4 @@ def run_benchmark(
         batch_size,
         search_threads,
         search_mode,
-        raft_log_level,
     )
diff --git a/python/cuvs_bench/cuvs_bench/run/runners.py b/python/cuvs_bench/cuvs_bench/run/runners.py
index 5a540d2e5..522636e6c 100644
--- a/python/cuvs_bench/cuvs_bench/run/runners.py
+++ b/python/cuvs_bench/cuvs_bench/run/runners.py
@@ -37,7 +37,6 @@ def cuvs_bench_cpp(
     batch_size: int,
     search_threads: Optional[int],
     mode: str = "throughput",
-    raft_log_level: str = "info",
 ) -> None:
     """
     Run the CUVS benchmarking tool with the provided configuration.
@@ -72,8 +71,6 @@ def cuvs_bench_cpp(
     mode : str, optional
         The mode of search to perform ('latency' or 'throughput'),
         by default 'throughput'.
-    raft_log_level : str, optional
-        The logging level for the RAFT library, by default 'info'.
 
     Returns
     -------
@@ -117,7 +114,6 @@ def cuvs_bench_cpp(
                 "--benchmark_out_format=json",
                 "--benchmark_counters_tabular=true",
                 f"--benchmark_out={os.path.join(benchmark_out)}",
-                f"--raft_log_level={parse_log_level(raft_log_level)}",
             ]
             if force:
                 cmd.append("--force")
@@ -156,7 +152,6 @@ def cuvs_bench_cpp(
                 "--benchmark_out_format=json",
                 f"--mode={mode}",
                 f"--benchmark_out={os.path.join(search_folder, search_file)}",
-                f"--raft_log_level={parse_log_level(raft_log_level)}",
             ]
             if force:
                 cmd.append("--force")
diff --git a/rust/Cargo.toml b/rust/Cargo.toml
index 6aa9a1e10..9e857806c 100644
--- a/rust/Cargo.toml
+++ b/rust/Cargo.toml
@@ -13,4 +13,3 @@ homepage = "https://github.com/rapidsai/cuvs"
 description = "RAPIDS vector search library"
 authors = ["NVIDIA Corporation"]
 license = "Apache-2.0"
-
diff --git a/rust/cuvs/src/cagra/index.rs b/rust/cuvs/src/cagra/index.rs
index 959959f60..bf316b4d7 100644
--- a/rust/cuvs/src/cagra/index.rs
+++ b/rust/cuvs/src/cagra/index.rs
@@ -78,6 +78,11 @@ impl Index {
         distances: &ManagedTensor,
     ) -> Result<()> {
         unsafe {
+            let prefilter = ffi::cuvsFilter {
+                addr: 0,
+                type_: ffi::cuvsFilterType::NO_FILTER,
+            };
+
             check_cuvs(ffi::cuvsCagraSearch(
                 res.0,
                 params.0,
@@ -85,6 +90,7 @@ impl Index {
                 queries.as_ptr(),
                 neighbors.as_ptr(),
                 distances.as_ptr(),
+                prefilter,
             ))
         }
     }
@@ -167,7 +173,8 @@ mod tests {
     #[test]
     fn test_cagra_compression() {
         use crate::cagra::CompressionParams;
-        let build_params = IndexParams::new().unwrap()
+        let build_params = IndexParams::new()
+            .unwrap()
             .set_compression(CompressionParams::new().unwrap());
         test_cagra(build_params);
     }
diff --git a/rust/cuvs/src/distance/mod.rs b/rust/cuvs/src/distance/mod.rs
index 566bf0129..f6911c683 100644
--- a/rust/cuvs/src/distance/mod.rs
+++ b/rust/cuvs/src/distance/mod.rs
@@ -71,7 +71,7 @@ mod tests {
         let distances = ManagedTensor::from(&distances_host)
             .to_device(&res)
             .unwrap();
-    
+
         pairwise_distance(&res, &dataset_device, &dataset_device, &distances, DistanceType::L2Expanded,
         None).unwrap();
 
diff --git a/rust/cuvs/src/ivf_flat/mod.rs b/rust/cuvs/src/ivf_flat/mod.rs
index 32aa70784..cee4e9454 100644
--- a/rust/cuvs/src/ivf_flat/mod.rs
+++ b/rust/cuvs/src/ivf_flat/mod.rs
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-//! The IVF-Flat method is an ANN algorithm. It uses an inverted file index (IVF) with 
-//! unmodified (that is, flat) vectors. This algorithm provides simple knobs to reduce 
+//! The IVF-Flat method is an ANN algorithm. It uses an inverted file index (IVF) with
+//! unmodified (that is, flat) vectors. This algorithm provides simple knobs to reduce
 //! the overall search space and to trade-off accuracy for speed.
 //!
 //! Example:
diff --git a/thirdparty/LICENSES/LICENSE.ann-benchmark b/thirdparty/LICENSES/LICENSE.ann-benchmark
index 9f8e4222f..4d04745ab 100644
--- a/thirdparty/LICENSES/LICENSE.ann-benchmark
+++ b/thirdparty/LICENSES/LICENSE.ann-benchmark
@@ -18,4 +18,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
\ No newline at end of file
+SOFTWARE.
diff --git a/thirdparty/LICENSES/LICENSE.faiss b/thirdparty/LICENSES/LICENSE.faiss
index 87cbf536c..b96dcb048 100644
--- a/thirdparty/LICENSES/LICENSE.faiss
+++ b/thirdparty/LICENSES/LICENSE.faiss
@@ -18,4 +18,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
\ No newline at end of file
+SOFTWARE.
diff --git a/thirdparty/LICENSES/LICENSE.pytorch b/thirdparty/LICENSES/LICENSE.pytorch
index 7ad3d737a..04f9ad110 100644
--- a/thirdparty/LICENSES/LICENSE.pytorch
+++ b/thirdparty/LICENSES/LICENSE.pytorch
@@ -74,4 +74,4 @@ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
\ No newline at end of file
+POSSIBILITY OF SUCH DAMAGE.