From d95cae5fb27ba076f7b295697cf27789e28d93fa Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Fri, 17 Jan 2025 14:15:07 -0600
Subject: [PATCH 01/15] Use GCC 13 in CUDA 12 conda builds. (#6221)

## Description
conda-forge is using GCC 13 for CUDA 12 builds. This PR updates CUDA 12
conda builds to use GCC 13, for alignment.

These PRs should be merged in a specific order, see
https://github.com/rapidsai/build-planning/issues/129 for details.

---------

Co-authored-by: divyegala <divyegala@gmail.com>
---
 .../all_cuda-118_arch-x86_64.yaml             |  2 +-
 .../all_cuda-125_arch-x86_64.yaml             |  4 ++--
 .../clang_tidy_cuda-118_arch-x86_64.yaml      |  2 +-
 .../cpp_all_cuda-118_arch-x86_64.yaml         |  2 +-
 .../cpp_all_cuda-125_arch-x86_64.yaml         |  4 ++--
 .../recipes/cuml-cpu/conda_build_config.yaml  |  8 ++++---
 conda/recipes/cuml/conda_build_config.yaml    | 14 ++++++-------
 conda/recipes/cuml/meta.yaml                  |  8 +++----
 conda/recipes/libcuml/conda_build_config.yaml | 14 ++++++-------
 conda/recipes/libcuml/meta.yaml               | 16 +++++---------
 cpp/CMakeLists.txt                            | 21 +++++++++++++++++++
 cpp/src/hdbscan/condensed_hierarchy.cu        |  8 ++++---
 cpp/src/hdbscan/detail/utils.h                |  8 ++++---
 cpp/test/CMakeLists.txt                       | 10 ++++++++-
 dependencies.yaml                             | 16 ++++++++++----
 15 files changed, 86 insertions(+), 51 deletions(-)

diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml
index bf73b938a5..b97b4f0ca6 100644
--- a/conda/environments/all_cuda-118_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-118_arch-x86_64.yaml
@@ -75,7 +75,7 @@ dependencies:
 - sphinx-copybutton
 - sphinx-markdown-tables
 - statsmodels
-- sysroot_linux-64==2.17
+- sysroot_linux-64==2.28
 - treelite==4.3.0
 - umap-learn==0.5.6
 - xgboost>=2.1.0
diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml
index 72539f2d18..5c8439cf50 100644
--- a/conda/environments/all_cuda-125_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-125_arch-x86_64.yaml
@@ -25,7 +25,7 @@ dependencies:
 - dask-ml
 - doxygen=1.9.1
 - fmt>=11.0.2,<12
-- gcc_linux-64=11.*
+- gcc_linux-64=13.*
 - graphviz
 - hdbscan>=0.8.39,<0.8.40
 - hypothesis>=6.0,<7
@@ -71,7 +71,7 @@ dependencies:
 - sphinx-copybutton
 - sphinx-markdown-tables
 - statsmodels
-- sysroot_linux-64==2.17
+- sysroot_linux-64==2.28
 - treelite==4.3.0
 - umap-learn==0.5.6
 - xgboost>=2.1.0
diff --git a/conda/environments/clang_tidy_cuda-118_arch-x86_64.yaml b/conda/environments/clang_tidy_cuda-118_arch-x86_64.yaml
index 836e34b0a9..f307fd45e3 100644
--- a/conda/environments/clang_tidy_cuda-118_arch-x86_64.yaml
+++ b/conda/environments/clang_tidy_cuda-118_arch-x86_64.yaml
@@ -33,6 +33,6 @@ dependencies:
 - ninja
 - nvcc_linux-64=11.8
 - spdlog>=1.14.1,<1.15
-- sysroot_linux-64==2.17
+- sysroot_linux-64==2.28
 - tomli
 name: clang_tidy_cuda-118_arch-x86_64
diff --git a/conda/environments/cpp_all_cuda-118_arch-x86_64.yaml b/conda/environments/cpp_all_cuda-118_arch-x86_64.yaml
index 8442e61e86..6220cd8a50 100644
--- a/conda/environments/cpp_all_cuda-118_arch-x86_64.yaml
+++ b/conda/environments/cpp_all_cuda-118_arch-x86_64.yaml
@@ -31,5 +31,5 @@ dependencies:
 - ninja
 - nvcc_linux-64=11.8
 - spdlog>=1.14.1,<1.15
-- sysroot_linux-64==2.17
+- sysroot_linux-64==2.28
 name: cpp_all_cuda-118_arch-x86_64
diff --git a/conda/environments/cpp_all_cuda-125_arch-x86_64.yaml b/conda/environments/cpp_all_cuda-125_arch-x86_64.yaml
index d199d744e0..5b553bc95d 100644
--- a/conda/environments/cpp_all_cuda-125_arch-x86_64.yaml
+++ b/conda/environments/cpp_all_cuda-125_arch-x86_64.yaml
@@ -15,7 +15,7 @@ dependencies:
 - cuda-version=12.5
 - cxx-compiler
 - fmt>=11.0.2,<12
-- gcc_linux-64=11.*
+- gcc_linux-64=13.*
 - libcublas-dev
 - libcufft-dev
 - libcumlprims==25.2.*,>=0.0.0a0
@@ -27,5 +27,5 @@ dependencies:
 - librmm==25.2.*,>=0.0.0a0
 - ninja
 - spdlog>=1.14.1,<1.15
-- sysroot_linux-64==2.17
+- sysroot_linux-64==2.28
 name: cpp_all_cuda-125_arch-x86_64
diff --git a/conda/recipes/cuml-cpu/conda_build_config.yaml b/conda/recipes/cuml-cpu/conda_build_config.yaml
index a6f636917a..354e9fb334 100644
--- a/conda/recipes/cuml-cpu/conda_build_config.yaml
+++ b/conda/recipes/cuml-cpu/conda_build_config.yaml
@@ -1,8 +1,10 @@
 c_compiler_version:
-  - 11
+  - 13  # [not os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")]
+  - 11  # [os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")]
 
 cxx_compiler_version:
-  - 11
+  - 13  # [not os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")]
+  - 11  # [os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")]
 
 cmake_version:
   - ">=3.26.4,!=3.30.0"
@@ -11,4 +13,4 @@ c_stdlib:
   - sysroot
 
 c_stdlib_version:
-  - "=2.17"
+  - "=2.28"
diff --git a/conda/recipes/cuml/conda_build_config.yaml b/conda/recipes/cuml/conda_build_config.yaml
index eb829ba4de..ded1456b11 100644
--- a/conda/recipes/cuml/conda_build_config.yaml
+++ b/conda/recipes/cuml/conda_build_config.yaml
@@ -1,14 +1,14 @@
 c_compiler_version:
-  - 11
+  - 13  # [not os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")]
+  - 11  # [os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")]
 
 cxx_compiler_version:
-  - 11
+  - 13  # [not os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")]
+  - 11  # [os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")]
 
 cuda_compiler:
-  - cuda-nvcc
-
-cuda11_compiler:
-  - nvcc
+  - cuda-nvcc  # [not os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")]
+  - nvcc  # [os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")]
 
 cmake_version:
   - ">=3.26.4,!=3.30.0"
@@ -17,7 +17,7 @@ c_stdlib:
   - sysroot
 
 c_stdlib_version:
-  - "=2.17"
+  - "=2.28"
 
 treelite_version:
   - "=4.3.0"
diff --git a/conda/recipes/cuml/meta.yaml b/conda/recipes/cuml/meta.yaml
index 35157fb3cc..a84f2738f9 100644
--- a/conda/recipes/cuml/meta.yaml
+++ b/conda/recipes/cuml/meta.yaml
@@ -1,4 +1,4 @@
-# Copyright (c) 2018-2024, NVIDIA CORPORATION.
+# Copyright (c) 2018-2025, NVIDIA CORPORATION.
 
 {% set version = environ['RAPIDS_PACKAGE_VERSION'].lstrip('v') %}
 {% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %}
@@ -33,10 +33,8 @@ build:
     - SCCACHE_S3_KEY_PREFIX=cuml-linux64 # [linux64]
     - SCCACHE_S3_USE_SSL
   ignore_run_exports_from:
-    {% if cuda_major == "11" %}
-    - {{ compiler('cuda11') }}
-    {% else %}
     - {{ compiler('cuda') }}
+    {% if cuda_major != "11" %}
     - cuda-cudart-dev
     {% endif %}
     - cuda-python
@@ -46,7 +44,7 @@ requirements:
     - {{ compiler('c') }}
     - {{ compiler('cxx') }}
     {% if cuda_major == "11" %}
-    - {{ compiler('cuda11') }} ={{ cuda_version }}
+    - {{ compiler('cuda') }} ={{ cuda_version }}
     {% else %}
     - {{ compiler('cuda') }}
     {% endif %}
diff --git a/conda/recipes/libcuml/conda_build_config.yaml b/conda/recipes/libcuml/conda_build_config.yaml
index f802440354..b9e0670a70 100644
--- a/conda/recipes/libcuml/conda_build_config.yaml
+++ b/conda/recipes/libcuml/conda_build_config.yaml
@@ -1,20 +1,20 @@
 c_compiler_version:
-  - 11
+  - 13  # [not os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")]
+  - 11  # [os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")]
 
 cxx_compiler_version:
-  - 11
+  - 13  # [not os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")]
+  - 11  # [os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")]
 
 cuda_compiler:
-  - cuda-nvcc
-
-cuda11_compiler:
-  - nvcc
+  - cuda-nvcc  # [not os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")]
+  - nvcc  # [os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")]
 
 c_stdlib:
   - sysroot
 
 c_stdlib_version:
-  - "=2.17"
+  - "=2.28"
 
 cmake_version:
   - ">=3.26.4,!=3.30.0"
diff --git a/conda/recipes/libcuml/meta.yaml b/conda/recipes/libcuml/meta.yaml
index f4a65c50f7..4d193a0ae8 100644
--- a/conda/recipes/libcuml/meta.yaml
+++ b/conda/recipes/libcuml/meta.yaml
@@ -1,4 +1,4 @@
-# Copyright (c) 2018-2024, NVIDIA CORPORATION.
+# Copyright (c) 2018-2025, NVIDIA CORPORATION.
 
 {% set version = environ['RAPIDS_PACKAGE_VERSION'].lstrip('v') %}
 {% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %}
@@ -14,9 +14,7 @@ source:
 
 build:
   ignore_run_exports_from:
-    {% if cuda_major == "11" %}
-    - {{ compiler('cuda11') }}
-    {% endif %}
+    - {{ compiler('cuda') }}
   script_env:
     - AWS_ACCESS_KEY_ID
     - AWS_SECRET_ACCESS_KEY
@@ -38,7 +36,7 @@ requirements:
     - {{ compiler('c') }}
     - {{ compiler('cxx') }}
     {% if cuda_major == "11" %}
-    - {{ compiler('cuda11') }} ={{ cuda_version }}
+    - {{ compiler('cuda') }} ={{ cuda_version }}
     {% else %}
     - {{ compiler('cuda') }}
     {% endif %}
@@ -84,10 +82,8 @@ outputs:
       number: {{ GIT_DESCRIBE_NUMBER }}
       string: cuda{{ cuda_major }}_{{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }}
       ignore_run_exports_from:
-        {% if cuda_major == "11" %}
-        - {{ compiler('cuda11') }}
-        {% else %}
         - {{ compiler('cuda') }}
+        {% if cuda_major != "11" %}
         - cuda-cudart-dev
         {% endif %}
     requirements:
@@ -131,10 +127,8 @@ outputs:
       number: {{ GIT_DESCRIBE_NUMBER }}
       string: cuda{{ cuda_major }}_{{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }}
       ignore_run_exports_from:
-        {% if cuda_major == "11" %}
-        - {{ compiler('cuda11') }}
-        {% else %}
         - {{ compiler('cuda') }}
+        {% if cuda_major != "11" %}
         - cuda-cudart-dev
         {% endif %}
     requirements:
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 90c0c02cf3..118f3f0e28 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -422,6 +422,27 @@ if(BUILD_CUML_CPP_LIBRARY)
         src/hdbscan/hdbscan.cu
         src/hdbscan/condensed_hierarchy.cu
         src/hdbscan/prediction_data.cu)
+
+    # When using GCC 13, some maybe-uninitialized warnings appear from CCCL and are treated as errors.
+    # See this issue: https://github.com/rapidsai/cuml/issues/6225
+    set_property(
+      SOURCE src/hdbscan/condensed_hierarchy.cu
+      APPEND_STRING
+      PROPERTY COMPILE_FLAGS
+      " -Xcompiler=-Wno-maybe-uninitialized"
+    )
+    set_property(
+      SOURCE src/hdbscan/hdbscan.cu
+      APPEND_STRING
+      PROPERTY COMPILE_FLAGS
+      " -Xcompiler=-Wno-maybe-uninitialized"
+    )
+    set_property(
+      SOURCE src/hdbscan/prediction_data.cu
+      APPEND_STRING
+      PROPERTY COMPILE_FLAGS
+      " -Xcompiler=-Wno-maybe-uninitialized"
+    )
   endif()
 
   if(all_algo OR holtwinters_algo)
diff --git a/cpp/src/hdbscan/condensed_hierarchy.cu b/cpp/src/hdbscan/condensed_hierarchy.cu
index 76f1a19cf8..5744bc51c8 100644
--- a/cpp/src/hdbscan/condensed_hierarchy.cu
+++ b/cpp/src/hdbscan/condensed_hierarchy.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -27,6 +27,7 @@
 
 #include <cub/cub.cuh>
 #include <cuda/functional>
+#include <cuda/std/functional>
 #include <thrust/copy.h>
 #include <thrust/device_ptr.h>
 #include <thrust/execution_policy.h>
@@ -157,8 +158,9 @@ void CondensedHierarchy<value_idx, value_t>::condense(value_idx* full_parents,
     thrust::cuda::par.on(stream),
     full_sizes,
     full_sizes + size,
-    cuda::proclaim_return_type<bool>([=] __device__(value_idx a) -> bool { return a != -1; }),
-    0,
+    cuda::proclaim_return_type<value_idx>(
+      [=] __device__(value_idx a) -> value_idx { return static_cast<value_idx>(a != -1); }),
+    static_cast<value_idx>(0),
     thrust::plus<value_idx>());
 
   parents.resize(n_edges, stream);
diff --git a/cpp/src/hdbscan/detail/utils.h b/cpp/src/hdbscan/detail/utils.h
index b151628429..4456416a6f 100644
--- a/cpp/src/hdbscan/detail/utils.h
+++ b/cpp/src/hdbscan/detail/utils.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -34,6 +34,7 @@
 #include <rmm/exec_policy.hpp>
 
 #include <cub/cub.cuh>
+#include <cuda/functional>
 #include <thrust/copy.h>
 #include <thrust/execution_policy.h>
 #include <thrust/for_each.h>
@@ -114,8 +115,9 @@ Common::CondensedHierarchy<value_idx, value_t> make_cluster_tree(
     thrust_policy,
     sizes,
     sizes + condensed_tree.get_n_edges(),
-    cuda::proclaim_return_type<bool>([=] __device__(value_idx a) -> bool { return a > 1; }),
-    0,
+    cuda::proclaim_return_type<value_idx>(
+      [=] __device__(value_idx a) -> value_idx { return static_cast<value_idx>(a > 1); }),
+    static_cast<value_idx>(0),
     thrust::plus<value_idx>());
 
   // remove leaves from condensed tree
diff --git a/cpp/test/CMakeLists.txt b/cpp/test/CMakeLists.txt
index 0576217965..6cfd9d2d2f 100644
--- a/cpp/test/CMakeLists.txt
+++ b/cpp/test/CMakeLists.txt
@@ -1,5 +1,5 @@
 #=============================================================================
-# Copyright (c) 2018-2024, NVIDIA CORPORATION.
+# Copyright (c) 2018-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -149,6 +149,14 @@ if("${CMAKE_CUDA_COMPILER_VERSION}" VERSION_GREATER_EQUAL "11.2")
     # An HDBSCAN gtest is failing w/ CUDA 11.2 for some reason.
     if(all_algo OR hdbscan_algo)
       ConfigureTest(PREFIX SG NAME HDBSCAN_TEST  sg/hdbscan_test.cu ML_INCLUDE)
+      # When using GCC 13, some maybe-uninitialized warnings appear from CCCL and are treated as errors.
+      # See this issue: https://github.com/rapidsai/cuml/issues/6225
+      set_property(
+        SOURCE sg/hdbscan_test.cu
+        APPEND_STRING
+        PROPERTY COMPILE_FLAGS
+        " -Xcompiler=-Wno-maybe-uninitialized"
+      )
     endif()
 endif()
 
diff --git a/dependencies.yaml b/dependencies.yaml
index f35eb74021..6761744857 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -190,29 +190,37 @@ dependencies:
           - matrix:
               arch: x86_64
             packages:
-              - gcc_linux-64=11.*
-              - sysroot_linux-64==2.17
+              - sysroot_linux-64==2.28
           - matrix:
               arch: aarch64
             packages:
-              - gcc_linux-aarch64=11.*
-              - sysroot_linux-aarch64==2.17
+              - sysroot_linux-aarch64==2.28
       - output_types: conda
         matrices:
           - matrix:
               arch: x86_64
               cuda: "11.8"
             packages:
+              - gcc_linux-64=11.*
               - nvcc_linux-64=11.8
           - matrix:
               arch: aarch64
               cuda: "11.8"
             packages:
+              - gcc_linux-64=11.*
               - nvcc_linux-aarch64=11.8
           - matrix:
+              arch: x86_64
+              cuda: "12.*"
+            packages:
+              - cuda-nvcc
+              - gcc_linux-64=13.*
+          - matrix:
+              arch: aarch64
               cuda: "12.*"
             packages:
               - cuda-nvcc
+              - gcc_linux-aarch64=13.*
   py_build_cuml:
     common:
       - output_types: [conda, requirements, pyproject]

From 01e19bba9821954b062a04fbf31d3522afa4b0b1 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Tue, 21 Jan 2025 17:35:49 -0600
Subject: [PATCH 02/15] Ignore cudf's __dataframe__ deprecation. (#6229)

Currently CI is failing due to
https://github.com/rapidsai/cudf/pull/17736.

The `__dataframe__` protocol appears to be used internally by
scikit-learn:
https://github.com/scikit-learn/scikit-learn/blob/311bf6badd74bb69081eb90e2643f15706d3473c/sklearn/utils/validation.py#L389

Errors look like:
```
FAILED test_metrics.py::test_sklearn_search - FutureWarning: Using `__dataframe__` is deprecated
```

This PR ignores the `FutureWarning` to allow CI to pass.

---------

Co-authored-by: Dante Gama Dessavre <danteg@nvidia.com>
---
 python/cuml/cuml/tests/test_input_utils.py       | 16 ++++++++++------
 .../cuml/tests/test_kneighbors_classifier.py     |  4 +++-
 python/cuml/cuml/tests/test_metrics.py           |  2 ++
 3 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/python/cuml/cuml/tests/test_input_utils.py b/python/cuml/cuml/tests/test_input_utils.py
index b2570f8f17..4d76c84900 100644
--- a/python/cuml/cuml/tests/test_input_utils.py
+++ b/python/cuml/cuml/tests/test_input_utils.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2019-2024, NVIDIA CORPORATION.
+# Copyright (c) 2019-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -35,6 +35,7 @@
 np = cpu_only_import("numpy")
 
 nbcuda = gpu_only_import_from("numba", "cuda")
+cudf_pandas_active = gpu_only_import_from("cudf.pandas", "LOADED")
 pdDF = cpu_only_import_from("pandas", "DataFrame")
 
 
@@ -446,11 +447,14 @@ def test_tocupy_missing_values_handling():
     assert str(array.dtype) == "float64"
     assert cp.isnan(array[1])
 
-    with pytest.raises(ValueError):
-        df = cudf.Series(data=[7, None, 3])
-        array, n_rows, n_cols, dtype = input_to_cupy_array(
-            df, fail_on_null=True
-        )
+    # cudf.pandas now mimics pandas better for handling None, so we don't
+    # need to fail and raise this error when cudf.pandas is active.
+    if not cudf_pandas_active:
+        with pytest.raises(ValueError):
+            df = cudf.Series(data=[7, None, 3])
+            array, n_rows, n_cols, dtype = input_to_cupy_array(
+                df, fail_on_null=True
+            )
 
 
 @pytest.mark.cudf_pandas
diff --git a/python/cuml/cuml/tests/test_kneighbors_classifier.py b/python/cuml/cuml/tests/test_kneighbors_classifier.py
index d39ef3bae5..cb387b21d1 100644
--- a/python/cuml/cuml/tests/test_kneighbors_classifier.py
+++ b/python/cuml/cuml/tests/test_kneighbors_classifier.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2019-2023, NVIDIA CORPORATION.
+# Copyright (c) 2019-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -218,6 +218,8 @@ def test_predict_large_n_classes(datatype):
     assert array_equal(y_hat.astype(np.int32), y_test.astype(np.int32))
 
 
+# Ignore FutureWarning: Using `__dataframe__` is deprecated
+@pytest.mark.filterwarnings("ignore::FutureWarning")
 @pytest.mark.parametrize("n_samples", [100])
 @pytest.mark.parametrize("n_features", [40])
 @pytest.mark.parametrize("n_neighbors", [4])
diff --git a/python/cuml/cuml/tests/test_metrics.py b/python/cuml/cuml/tests/test_metrics.py
index 5886ff68d4..40ea25ed35 100644
--- a/python/cuml/cuml/tests/test_metrics.py
+++ b/python/cuml/cuml/tests/test_metrics.py
@@ -163,6 +163,8 @@ def test_r2_score(datatype, use_handle):
     np.testing.assert_almost_equal(score, 0.98, decimal=7)
 
 
+# Ignore FutureWarning: Using `__dataframe__` is deprecated
+@pytest.mark.filterwarnings("ignore::FutureWarning")
 def test_sklearn_search():
     """Test ensures scoring function works with sklearn machinery"""
     import numpy as np

From bd7c69e011d76e45dd4b30b1b78042c89061cc9f Mon Sep 17 00:00:00 2001
From: GALI PREM SAGAR <sagarprem75@gmail.com>
Date: Thu, 23 Jan 2025 02:13:31 -0600
Subject: [PATCH 03/15] Add upper bound to prevent usage of numba 0.61.0
 (#6244)

Numba 0.61.0 just got released with a couple of breaking changes, this pr is required to unblock the ci.


xref: https://github.com/rapidsai/cudf/pull/17777

Authors:
  - GALI PREM SAGAR (https://github.com/galipremsagar)
  - Dante Gama Dessavre (https://github.com/dantegd)

Approvers:
  - Dante Gama Dessavre (https://github.com/dantegd)
  - Ray Douglass (https://github.com/raydouglass)
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cuml/pull/6244
---
 conda/environments/all_cuda-118_arch-x86_64.yaml           | 2 +-
 conda/environments/all_cuda-125_arch-x86_64.yaml           | 2 +-
 dependencies.yaml                                          | 2 +-
 .../cuml/cuml/tests/dask/test_dask_logistic_regression.py  | 7 ++++++-
 python/cuml/pyproject.toml                                 | 2 +-
 5 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml
index b97b4f0ca6..01d42d3d6e 100644
--- a/conda/environments/all_cuda-118_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-118_arch-x86_64.yaml
@@ -47,7 +47,7 @@ dependencies:
 - nbsphinx
 - ninja
 - nltk
-- numba>=0.57
+- numba>=0.59.1,<0.61.0a0
 - numpy>=1.23,<3.0a0
 - numpydoc
 - nvcc_linux-64=11.8
diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml
index 5c8439cf50..f98ca90945 100644
--- a/conda/environments/all_cuda-125_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-125_arch-x86_64.yaml
@@ -44,7 +44,7 @@ dependencies:
 - nbsphinx
 - ninja
 - nltk
-- numba>=0.57
+- numba>=0.59.1,<0.61.0a0
 - numpy>=1.23,<3.0a0
 - numpydoc
 - packaging
diff --git a/dependencies.yaml b/dependencies.yaml
index 6761744857..54ad194e84 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -234,7 +234,7 @@ dependencies:
         packages:
           - dask-cuda==25.2.*,>=0.0.0a0
           - joblib>=0.11
-          - numba>=0.57
+          - numba>=0.59.1,<0.61.0a0
           - numpy>=1.23,<3.0a0
             # TODO: Is scipy really a hard dependency, or should
             # we make it optional (i.e. an extra for pip
diff --git a/python/cuml/cuml/tests/dask/test_dask_logistic_regression.py b/python/cuml/cuml/tests/dask/test_dask_logistic_regression.py
index 0a3d47ea3f..716b701356 100644
--- a/python/cuml/cuml/tests/dask/test_dask_logistic_regression.py
+++ b/python/cuml/cuml/tests/dask/test_dask_logistic_regression.py
@@ -34,7 +34,12 @@
 dask_cudf = gpu_only_import("dask_cudf")
 cudf = gpu_only_import("cudf")
 
-pytestmark = pytest.mark.mg
+pytestmark = [
+    pytest.mark.mg,
+    pytest.mark.skip(
+        reason="pytest hang https://github.com/rapidsai/cuml/issues/6247"
+    ),
+]
 
 
 def _prep_training_data(c, X_train, y_train, partitions_per_worker):
diff --git a/python/cuml/pyproject.toml b/python/cuml/pyproject.toml
index 820a690d3c..c945827a2c 100644
--- a/python/cuml/pyproject.toml
+++ b/python/cuml/pyproject.toml
@@ -99,7 +99,7 @@ dependencies = [
     "dask-cuda==25.2.*,>=0.0.0a0",
     "dask-cudf==25.2.*,>=0.0.0a0",
     "joblib>=0.11",
-    "numba>=0.57",
+    "numba>=0.59.1,<0.61.0a0",
     "numpy>=1.23,<3.0a0",
     "nvidia-cublas",
     "nvidia-cufft",

From 1bcd5c731a76b261ed12cbb138361f0be58b0cf3 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Thu, 23 Jan 2025 16:13:18 -0600
Subject: [PATCH 04/15] Normalize whitespace (#6238)

This PR applies `pre-commit` hooks to normalize whitespace (trimming trailing whitespace and enforcing consistent end-of-file newlines).

These rules are already applied to most other RAPIDS repos, so this PR aligns with the norm in RAPIDS.

Authors:
  - Bradley Dice (https://github.com/bdice)

Approvers:
  - Jake Awe (https://github.com/AyodeAwe)
  - Vyas Ramasubramani (https://github.com/vyasr)
  - Dante Gama Dessavre (https://github.com/dantegd)

URL: https://github.com/rapidsai/cuml/pull/6238
---
 .github/labeler.yml                           |  3 +--
 .pre-commit-config.yaml                       |  7 +++++-
 LICENSE                                       |  2 +-
 ci/checks/black_lists.sh                      |  4 ++--
 codecov.yml                                   |  2 +-
 cpp/.clang-tidy                               |  3 +--
 cpp/CMakeLists.txt                            |  2 +-
 cpp/cmake/modules/ConfigureAlgorithms.cmake   |  3 +--
 cpp/examples/symreg/README.md                 | 12 +++++-----
 .../cuml/common/pinned_host_vector.hpp        |  4 ++--
 cpp/include/cuml/experimental/fil/README.md   |  4 ++--
 cpp/include/cuml/solvers/params.hpp           |  4 ++--
 cpp/include/cuml/tsa/holtwinters_params.h     |  4 ++--
 cpp/src/dbscan/vertexdeg/pack.h               |  4 ++--
 .../decisiontree/batched-levelalgo/bins.cuh   |  4 ++--
 cpp/src/genetic/constants.h                   |  4 ++--
 cpp/src/glm/qn/mg/glm_base_mg.cuh             |  4 ++--
 cpp/src/glm/qn/mg/standardization.cuh         |  4 ++--
 cpp/src/hdbscan/detail/kernels/membership.cuh |  4 ++--
 .../hdbscan/detail/kernels/stabilities.cuh    |  4 ++--
 cpp/src/hdbscan/detail/predict.cuh            |  4 ++--
 cpp/src/tsne/kluger_lab_license.txt           |  2 +-
 cpp/src_prims/datasets/boston.h               |  4 ++--
 cpp/src_prims/datasets/breast_cancer.h        |  4 ++--
 cpp/src_prims/datasets/diabetes.h             |  4 ++--
 cpp/test/c_api/README.md                      |  2 +-
 cpp/test/mg/kmeans_test.cu                    |  4 ++--
 cpp/test/sg/handle_test.cu                    |  4 ++--
 cpp/test/sg/hdbscan_inputs.hpp                |  4 ++--
 docs/source/_static/references.css            |  2 +-
 docs/source/api.rst                           |  2 +-
 docs/source/cuml_blogs.rst                    |  1 -
 docs/source/cuml_intro.rst                    |  2 +-
 docs/source/user_guide.rst                    |  1 -
 notebooks/README.md                           |  2 +-
 .../data/time_series/population_estimate.csv  |  2 +-
 notebooks/random_forest_demo.ipynb            |  2 +-
 print_env.sh                                  | 15 +++++++------
 python/cuml/.coveragerc                       |  2 +-
 python/cuml/README.md                         |  2 --
 .../cuml/cuml/_thirdparty/sklearn/README.md   |  2 +-
 .../tests/ts_datasets/population_estimate.csv |  2 +-
 thirdparty/LICENSES/LICENSE.H2O4GPU           |  2 +-
 thirdparty/LICENSES/LICENSE.faiss             |  2 +-
 wiki/DEFINITION_OF_DONE_CRITERIA.md           | 14 ++++++------
 wiki/README.md                                |  4 ++--
 wiki/mnmg/Using_Infiniband_for_MNMG.md        | 22 +++++++++----------
 47 files changed, 96 insertions(+), 99 deletions(-)

diff --git a/.github/labeler.yml b/.github/labeler.yml
index bc1c15661f..b7146a1bdc 100644
--- a/.github/labeler.yml
+++ b/.github/labeler.yml
@@ -5,7 +5,7 @@
 Cython / Python:
   - 'python/**'
   - 'notebooks/**'
-  
+
 CUDA/C++:
   - 'cpp/**'
 
@@ -18,4 +18,3 @@ ci:
 
 conda:
   - 'conda/**'
-  
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 8447183ca1..8ce60fe6f4 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,7 +1,12 @@
 ---
-# Copyright (c) 2023, NVIDIA CORPORATION.
+# Copyright (c) 2023-2025, NVIDIA CORPORATION.
 
 repos:
+    - repo: https://github.com/pre-commit/pre-commit-hooks
+      rev: v5.0.0
+      hooks:
+          - id: trailing-whitespace
+          - id: end-of-file-fixer
     - repo: https://github.com/psf/black
       rev: 22.10.0
       hooks:
diff --git a/LICENSE b/LICENSE
index 4b54edd235..3ba63d53f4 100644
--- a/LICENSE
+++ b/LICENSE
@@ -187,7 +187,7 @@
       identification within third-party archives.
 
    Copyright 2018 NVIDIA CORPORATION
-   
+
    Licensed under the Apache License, Version 2.0 (the "License");
    you may not use this file except in compliance with the License.
    You may obtain a copy of the License at
diff --git a/ci/checks/black_lists.sh b/ci/checks/black_lists.sh
index 85435cf856..6300ab359e 100755
--- a/ci/checks/black_lists.sh
+++ b/ci/checks/black_lists.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright (c) 2019-2023, NVIDIA CORPORATION.
+# Copyright (c) 2019-2025, NVIDIA CORPORATION.
 ##########################################
 # cuML black listed function call Tester #
 ##########################################
@@ -40,7 +40,7 @@ done
 
 for cond_black_listed in cudaMemcpy cudaMemset; do
     TMP=`git --no-pager diff --ignore-submodules -w --minimal -U0 -S"$cond_black_listed" $PR_TARGET_BRANCH | grep '^+' | grep -v '^+++' | grep -P "$cond_black_listed(?!Async)"`
-    
+
     if [ "$TMP" != "" ]; then
         for filename in `git --no-pager diff --ignore-submodules -w --minimal --name-only -S"$cond_black_listed" $PR_TARGET_BRANCH`; do
             basefilename=$(basename -- "$filename")
diff --git a/codecov.yml b/codecov.yml
index 5e6b1fbbb3..038c75e2a0 100644
--- a/codecov.yml
+++ b/codecov.yml
@@ -11,4 +11,4 @@ comment:
 # undocumented option:
 # https://community.codecov.io/t/unable-to-determine-a-parent-commit-to-compare-against-in-base-branch-after-squash-and-merge/2480/15
 codecov:
-  allow_coverage_offsets: true
\ No newline at end of file
+  allow_coverage_offsets: true
diff --git a/cpp/.clang-tidy b/cpp/.clang-tidy
index fea01ccea0..37651f0c26 100644
--- a/cpp/.clang-tidy
+++ b/cpp/.clang-tidy
@@ -4,7 +4,7 @@ WarningsAsErrors: '*'
 HeaderFilterRegex: ''
 AnalyzeTemporaryDtors: false
 FormatStyle:     none
-CheckOptions:    
+CheckOptions:
   - key:             cert-dcl16-c.NewSuffixes
     value:           'L;LL;LU;LLU'
   - key:             cppcoreguidelines-non-private-member-variables-in-classes.IgnoreClassesWithAllMemberVariablesBeingPublic
@@ -142,4 +142,3 @@ CheckOptions:
   - key:             readability-identifier-naming.TypedefSuffix
     value:           ''
 ...
-
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 118f3f0e28..b7ef6e2293 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -115,7 +115,7 @@ message(VERBOSE "CUML_CPP: RMM_LOGGING_LEVEL = '${RMM_LOGGING_LEVEL}'.")
 set(LIBCUML_LOGGING_LEVEL
     "DEBUG"
     CACHE STRING "Choose the logging level."
-)                                                                                                
+)
 set_property(
   CACHE LIBCUML_LOGGING_LEVEL PROPERTY STRINGS "TRACE" "DEBUG" "INFO" "WARN" "ERROR" "CRITICAL"
                                        "OFF"
diff --git a/cpp/cmake/modules/ConfigureAlgorithms.cmake b/cpp/cmake/modules/ConfigureAlgorithms.cmake
index f93425405e..261c0a1ac2 100644
--- a/cpp/cmake/modules/ConfigureAlgorithms.cmake
+++ b/cpp/cmake/modules/ConfigureAlgorithms.cmake
@@ -1,5 +1,5 @@
 #=============================================================================
-# Copyright (c) 2022-2024, NVIDIA CORPORATION.
+# Copyright (c) 2022-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -129,4 +129,3 @@ else()
         set(LINK_CUVS ON)
     endif()
 endif()
-
diff --git a/cpp/examples/symreg/README.md b/cpp/examples/symreg/README.md
index 52581eb627..444e8e7c68 100644
--- a/cpp/examples/symreg/README.md
+++ b/cpp/examples/symreg/README.md
@@ -1,5 +1,5 @@
 # symbolic regression
-This subfolder contains an example on how perform symbolic regression in cuML (from C++) 
+This subfolder contains an example on how perform symbolic regression in cuML (from C++)
 There are two `CMakeLists.txt` in this folder:
 1. `CMakeLists.txt` (default) which is included when building cuML
 2. `CMakeLists_standalone.txt` as an example for a stand alone project linking to `libcuml.so`
@@ -11,7 +11,7 @@ $ cmake .. -DCUML_LIBRARY_DIR=/path/to/directory/with/libcuml.so -DCUML_INCLUDE_
 ```
 Then build with `make` or `ninja`
 ```
-$ make  
+$ make
 Scanning dependencies of target raft
 [ 10%] Creating directories for 'raft'
 [ 20%] Performing download step (git clone) for 'raft'
@@ -28,7 +28,7 @@ Scanning dependencies of target symreg_example
 [100%] Linking CUDA executable symreg_example
 [100%] Built target symreg_example
 ```
-`CMakeLists_standalone.txt` also loads a minimal set of header dependencies(namely [raft](https://github.com/rapidsai/raft) and [cub](https://github.com/NVIDIA/cub)) if they are not detected in the system. 
+`CMakeLists_standalone.txt` also loads a minimal set of header dependencies(namely [raft](https://github.com/rapidsai/raft) and [cub](https://github.com/NVIDIA/cub)) if they are not detected in the system.
 ## Run
 
 1. Generate a toy training and test dataset
@@ -53,7 +53,7 @@ $ ./symreg_example -n_cols 2                   \
                    -stopping_criteria 0.01     \
                    -p_crossover 0.7            \
                    -p_subtree 0.1              \
-                   -p_hoist 0.05               \ 
+                   -p_hoist 0.05               \
                    -p_point 0.1                \
                    -parsimony_coefficient 0.01
 ```
@@ -77,11 +77,11 @@ Finished training for 4 generations.
            Best AST equation :( add( sub( mult( X0, X0) , div( X1, X1) ) , sub( X1, mult( X1, X1) ) ) )
 Training time =    626.658ms
 ***************************************
-Beginning Inference on Test dataset... 
+Beginning Inference on Test dataset...
 Inference score on test set = 5.29271e-08
 Inference time =    0.35248ms
 Some Predicted test values:
 -1.65061;-1.64081;-0.91711;-2.28976;-0.280688;
 Corresponding Actual test values:
 -1.65061;-1.64081;-0.91711;-2.28976;-0.280688;
-```
\ No newline at end of file
+```
diff --git a/cpp/include/cuml/common/pinned_host_vector.hpp b/cpp/include/cuml/common/pinned_host_vector.hpp
index 768bcb3b4e..db49c9d635 100644
--- a/cpp/include/cuml/common/pinned_host_vector.hpp
+++ b/cpp/include/cuml/common/pinned_host_vector.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -61,4 +61,4 @@ class pinned_host_vector {
   std::size_t size_;
 };
 
-}  // namespace ML
\ No newline at end of file
+}  // namespace ML
diff --git a/cpp/include/cuml/experimental/fil/README.md b/cpp/include/cuml/experimental/fil/README.md
index 48d4a4ab16..70ece00cb7 100644
--- a/cpp/include/cuml/experimental/fil/README.md
+++ b/cpp/include/cuml/experimental/fil/README.md
@@ -3,11 +3,11 @@ RAPIDS Forest Inference Library (FIL) provides accelerated inference for
 tree-based machine learning models. Unlike packages like XGBoost,
 LightGBM, or even Scikit-Learn/cuML's random forest implementations, FIL
 cannot be used to _train_ forest models. Instead, its goal is to speed up
-inference using forest models trained by all of those packages. 
+inference using forest models trained by all of those packages.
 
 This directory contains an experimental new implementation of FIL which
 provides both CPU and GPU execution. Its GPU implementation also offers
-improved performance relative to the existing implementation in many but not all cases. 
+improved performance relative to the existing implementation in many but not all cases.
 
 For Python usage information and more extensive information on
 parameter-tuning and other end-user functionality, check out
diff --git a/cpp/include/cuml/solvers/params.hpp b/cpp/include/cuml/solvers/params.hpp
index d32b4bc7ba..3be8603bbe 100644
--- a/cpp/include/cuml/solvers/params.hpp
+++ b/cpp/include/cuml/solvers/params.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018, NVIDIA CORPORATION.
+ * Copyright (c) 2018-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -33,4 +33,4 @@ enum loss_funct {
 
 enum penalty { NONE, L1, L2, ELASTICNET };
 
-};  // namespace ML
\ No newline at end of file
+};  // namespace ML
diff --git a/cpp/include/cuml/tsa/holtwinters_params.h b/cpp/include/cuml/tsa/holtwinters_params.h
index e896816164..c16fa74400 100644
--- a/cpp/include/cuml/tsa/holtwinters_params.h
+++ b/cpp/include/cuml/tsa/holtwinters_params.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -46,4 +46,4 @@ struct OptimParams {
 
 enum Norm { L0, L1, L2, LINF };
 
-}  // namespace ML
\ No newline at end of file
+}  // namespace ML
diff --git a/cpp/src/dbscan/vertexdeg/pack.h b/cpp/src/dbscan/vertexdeg/pack.h
index e876050e0f..c6700a34ad 100644
--- a/cpp/src/dbscan/vertexdeg/pack.h
+++ b/cpp/src/dbscan/vertexdeg/pack.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2018-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -65,4 +65,4 @@ struct Pack {
 
 }  // namespace VertexDeg
 }  // namespace Dbscan
-}  // namespace ML
\ No newline at end of file
+}  // namespace ML
diff --git a/cpp/src/decisiontree/batched-levelalgo/bins.cuh b/cpp/src/decisiontree/batched-levelalgo/bins.cuh
index 312c4f2b51..9e89bada62 100644
--- a/cpp/src/decisiontree/batched-levelalgo/bins.cuh
+++ b/cpp/src/decisiontree/batched-levelalgo/bins.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -73,4 +73,4 @@ struct AggregateBin {
   }
 };
 }  // namespace DT
-}  // namespace ML
\ No newline at end of file
+}  // namespace ML
diff --git a/cpp/src/genetic/constants.h b/cpp/src/genetic/constants.h
index 5e793a6604..1934e022a7 100644
--- a/cpp/src/genetic/constants.h
+++ b/cpp/src/genetic/constants.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -28,4 +28,4 @@ const int GENE_TPB = 256;
 const int MAX_STACK_SIZE = 20;
 
 }  // namespace genetic
-}  // namespace cuml
\ No newline at end of file
+}  // namespace cuml
diff --git a/cpp/src/glm/qn/mg/glm_base_mg.cuh b/cpp/src/glm/qn/mg/glm_base_mg.cuh
index 49c309c5a4..2884f75b15 100644
--- a/cpp/src/glm/qn/mg/glm_base_mg.cuh
+++ b/cpp/src/glm/qn/mg/glm_base_mg.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2023-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2023-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -204,4 +204,4 @@ struct GLMWithDataMG : ML::GLM::detail::GLMWithData<T, GLMObjective> {
 };
 };  // namespace opg
 };  // namespace GLM
-};  // namespace ML
\ No newline at end of file
+};  // namespace ML
diff --git a/cpp/src/glm/qn/mg/standardization.cuh b/cpp/src/glm/qn/mg/standardization.cuh
index 4cd169f909..f0cc15cdba 100644
--- a/cpp/src/glm/qn/mg/standardization.cuh
+++ b/cpp/src/glm/qn/mg/standardization.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2023-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2023-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -377,4 +377,4 @@ struct Standardizer {
 
 };  // namespace opg
 };  // namespace GLM
-};  // namespace ML
\ No newline at end of file
+};  // namespace ML
diff --git a/cpp/src/hdbscan/detail/kernels/membership.cuh b/cpp/src/hdbscan/detail/kernels/membership.cuh
index b5bbf2a34e..8db7c15d64 100644
--- a/cpp/src/hdbscan/detail/kernels/membership.cuh
+++ b/cpp/src/hdbscan/detail/kernels/membership.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -70,4 +70,4 @@ struct probabilities_functor {
 };  // namespace Membership
 };  // namespace detail
 };  // namespace HDBSCAN
-};  // namespace ML
\ No newline at end of file
+};  // namespace ML
diff --git a/cpp/src/hdbscan/detail/kernels/stabilities.cuh b/cpp/src/hdbscan/detail/kernels/stabilities.cuh
index 7248457d1d..3e324f4289 100644
--- a/cpp/src/hdbscan/detail/kernels/stabilities.cuh
+++ b/cpp/src/hdbscan/detail/kernels/stabilities.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -61,4 +61,4 @@ struct stabilities_functor {
 };  // namespace Stability
 };  // namespace detail
 };  // namespace HDBSCAN
-};  // namespace ML
\ No newline at end of file
+};  // namespace ML
diff --git a/cpp/src/hdbscan/detail/predict.cuh b/cpp/src/hdbscan/detail/predict.cuh
index 9cbe5fea19..217afed3aa 100644
--- a/cpp/src/hdbscan/detail/predict.cuh
+++ b/cpp/src/hdbscan/detail/predict.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -272,4 +272,4 @@ void approximate_predict(const raft::handle_t& handle,
 };  // end namespace Predict
 };  // end namespace detail
 };  // end namespace HDBSCAN
-};  // end namespace ML
\ No newline at end of file
+};  // end namespace ML
diff --git a/cpp/src/tsne/kluger_lab_license.txt b/cpp/src/tsne/kluger_lab_license.txt
index 4a8d5f8a8e..90a858ccfd 100644
--- a/cpp/src/tsne/kluger_lab_license.txt
+++ b/cpp/src/tsne/kluger_lab_license.txt
@@ -132,4 +132,4 @@ General Public License. (e.g. they do not require you to accompany any object
 code using FFTW with the corresponding source code.) For these alternative
 terms you must purchase a license from MIT’s Technology Licensing Office. Users
 interested in such a license should contact us (fftw@fftw.org) for more
-information.
\ No newline at end of file
+information.
diff --git a/cpp/src_prims/datasets/boston.h b/cpp/src_prims/datasets/boston.h
index acbd4db928..984bae0362 100644
--- a/cpp/src_prims/datasets/boston.h
+++ b/cpp/src_prims/datasets/boston.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -535,4 +535,4 @@ static const int n_features = 13;
 
 }  // namespace Boston
 }  // namespace Datasets
-}  // namespace MLCommon
\ No newline at end of file
+}  // namespace MLCommon
diff --git a/cpp/src_prims/datasets/breast_cancer.h b/cpp/src_prims/datasets/breast_cancer.h
index 9432a20701..4293e03d70 100644
--- a/cpp/src_prims/datasets/breast_cancer.h
+++ b/cpp/src_prims/datasets/breast_cancer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -1989,4 +1989,4 @@ static const int n_features = 30;
 
 }  // namespace BreastCancer
 }  // namespace Datasets
-}  // namespace MLCommon
\ No newline at end of file
+}  // namespace MLCommon
diff --git a/cpp/src_prims/datasets/diabetes.h b/cpp/src_prims/datasets/diabetes.h
index a4983e1ff2..d13359e274 100644
--- a/cpp/src_prims/datasets/diabetes.h
+++ b/cpp/src_prims/datasets/diabetes.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -1504,4 +1504,4 @@ static const int n_features = 10;
 
 }  // namespace Diabetes
 }  // namespace Datasets
-}  // namespace MLCommon
\ No newline at end of file
+}  // namespace MLCommon
diff --git a/cpp/test/c_api/README.md b/cpp/test/c_api/README.md
index d0632dfa64..e23b509360 100644
--- a/cpp/test/c_api/README.md
+++ b/cpp/test/c_api/README.md
@@ -17,4 +17,4 @@ To help prevent accidentally including the C-API files when compiling `libcuml++
 #error \
   "This header is only for the C-API and should not be included from the C++ API."
 #endif
-```
\ No newline at end of file
+```
diff --git a/cpp/test/mg/kmeans_test.cu b/cpp/test/mg/kmeans_test.cu
index 4ee5836f18..0d3732202e 100644
--- a/cpp/test/mg/kmeans_test.cu
+++ b/cpp/test/mg/kmeans_test.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -203,4 +203,4 @@ INSTANTIATE_TEST_CASE_P(KmeansTests, KmeansTestF, ::testing::ValuesIn(inputsf2))
 
 INSTANTIATE_TEST_CASE_P(KmeansTests, KmeansTestD, ::testing::ValuesIn(inputsd2));
 
-}  // end namespace ML
\ No newline at end of file
+}  // end namespace ML
diff --git a/cpp/test/sg/handle_test.cu b/cpp/test/sg/handle_test.cu
index 8bf87fa71e..3764c2fd02 100644
--- a/cpp/test/sg/handle_test.cu
+++ b/cpp/test/sg/handle_test.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -45,4 +45,4 @@ TEST(HandleTest, DoubleDestoryFails)
   // handle is destroyed
   status = cumlDestroy(handle);
   EXPECT_EQ(CUML_INVALID_HANDLE, status);
-}
\ No newline at end of file
+}
diff --git a/cpp/test/sg/hdbscan_inputs.hpp b/cpp/test/sg/hdbscan_inputs.hpp
index eaf37543ac..9933c6e8a9 100644
--- a/cpp/test/sg/hdbscan_inputs.hpp
+++ b/cpp/test/sg/hdbscan_inputs.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -11560,4 +11560,4 @@ const std::vector<MembershipVectorInputs<float, int>> membership_vector_inputs =
     0.003645882708951831,
     0.0031043440103530884}}};
 };  // namespace HDBSCAN
-};  // namespace ML
\ No newline at end of file
+};  // namespace ML
diff --git a/docs/source/_static/references.css b/docs/source/_static/references.css
index 225cf13ba9..d1f647233a 100644
--- a/docs/source/_static/references.css
+++ b/docs/source/_static/references.css
@@ -20,4 +20,4 @@ dl.citation > dt.label > span::before {
 /* Add closing bracket */
 dl.citation > dt.label > span::after {
   content: "]";
-}
\ No newline at end of file
+}
diff --git a/docs/source/api.rst b/docs/source/api.rst
index 44a29563f3..2def05ac72 100644
--- a/docs/source/api.rst
+++ b/docs/source/api.rst
@@ -52,7 +52,7 @@ cuML provides experimental support for running selected estimators and operators
    * - Regression and Classification
      - Ridge
 
-If a CUDA-enabled GPU is available on the system, cuML will default to using it. Users can configure CPU or GPU execution for supported operators via context managers or global configuration. 
+If a CUDA-enabled GPU is available on the system, cuML will default to using it. Users can configure CPU or GPU execution for supported operators via context managers or global configuration.
 
 .. code-block:: python
 
diff --git a/docs/source/cuml_blogs.rst b/docs/source/cuml_blogs.rst
index 0df70746c1..c56334671f 100644
--- a/docs/source/cuml_blogs.rst
+++ b/docs/source/cuml_blogs.rst
@@ -28,4 +28,3 @@ Academic Papers
 ---------------
 
 * `Machine Learning in Python: Main developments and technology trends in data science, machine learning, and artificial intelligence (Sebastian Raschka, Joshua Patterson, Corey Nolet) <https://arxiv.org/abs/2002.04803>`_
-
diff --git a/docs/source/cuml_intro.rst b/docs/source/cuml_intro.rst
index f78a472718..54b193b888 100644
--- a/docs/source/cuml_intro.rst
+++ b/docs/source/cuml_intro.rst
@@ -18,7 +18,7 @@ then call ``predict`` or ``transform`` for inference.
 .. code-block:: python
 
    import cuml.LinearRegression
-   
+
    model = cuml.LinearRegression()
    model.fit(X_train, y)
    y_prediction = model.predict(X_test)
diff --git a/docs/source/user_guide.rst b/docs/source/user_guide.rst
index 73f0f1d057..64c7705eb3 100644
--- a/docs/source/user_guide.rst
+++ b/docs/source/user_guide.rst
@@ -7,4 +7,3 @@ User Guide
    estimator_intro.ipynb
    pickling_cuml_models.ipynb
    execution_device_interoperability.ipynb
-
diff --git a/notebooks/README.md b/notebooks/README.md
index a6b7e28461..39d40e4fd8 100644
--- a/notebooks/README.md
+++ b/notebooks/README.md
@@ -9,7 +9,7 @@ documentation tree.
 
 ## Additional notebooks
 Notebook Title | Status | Description
---- | --- | --- 
+--- | --- | ---
 [ARIMA Demo](arima_demo.ipynb) | Working | Forecast using ARIMA on time-series data.
 [Forest Inference Demo](forest_inference_demo.ipynb) | Working | Save and load an XGBoost model into FIL and infer on new data.
 [KMeans Demo](kmeans_demo.ipynb) | Working | Predict using k-means, visualize and compare the results with Scikit-learn's k-means.
diff --git a/notebooks/data/time_series/population_estimate.csv b/notebooks/data/time_series/population_estimate.csv
index 02e1be61ed..2c6afa6fac 100644
--- a/notebooks/data/time_series/population_estimate.csv
+++ b/notebooks/data/time_series/population_estimate.csv
@@ -135,4 +135,4 @@
 2008,2104.1,2187.4
 2009,2134.0,2213.2
 2010,2158.2,2234.9
-2011,2174.3,2248.4
\ No newline at end of file
+2011,2174.3,2248.4
diff --git a/notebooks/random_forest_demo.ipynb b/notebooks/random_forest_demo.ipynb
index a94b41cfe3..eb4e6e7c2b 100755
--- a/notebooks/random_forest_demo.ipynb
+++ b/notebooks/random_forest_demo.ipynb
@@ -291,4 +291,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 4
-}
\ No newline at end of file
+}
diff --git a/print_env.sh b/print_env.sh
index db24245c73..0129213d6e 100755
--- a/print_env.sh
+++ b/print_env.sh
@@ -1,7 +1,8 @@
 #!/usr/bin/env bash
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
 # Reports relevant environment information useful for diagnosing and
 # debugging cuML issues.
-# Usage: 
+# Usage:
 # "./print_env.sh" - prints to stdout
 # "./print_env.sh > env.txt" - prints to file "env.txt"
 
@@ -14,16 +15,16 @@ git submodule status --recursive
 else
 echo "Not inside a git repository"
 fi
-echo 
+echo
 
 echo "***OS Information***"
 cat /etc/*-release
 uname -a
-echo 
+echo
 
 echo "***GPU Information***"
 nvidia-smi
-echo 
+echo
 
 echo "***CPU***"
 lscpu
@@ -31,15 +32,15 @@ echo
 
 echo "***CMake***"
 which cmake && cmake --version
-echo 
+echo
 
 echo "***g++***"
 which g++ && g++ --version
-echo 
+echo
 
 echo "***nvcc***"
 which nvcc && nvcc --version
-echo 
+echo
 
 echo "***Python***"
 which python && python -c "import sys; print('Python {0}.{1}.{2}'.format(sys.version_info[0], sys.version_info[1], sys.version_info[2]))"
diff --git a/python/cuml/.coveragerc b/python/cuml/.coveragerc
index c1a8b220e2..de753b8687 100644
--- a/python/cuml/.coveragerc
+++ b/python/cuml/.coveragerc
@@ -23,4 +23,4 @@ exclude_lines =
 
     # Don't complain if non-runnable code isn't run:
     if 0:
-    if False:
\ No newline at end of file
+    if False:
diff --git a/python/cuml/README.md b/python/cuml/README.md
index 735e03ae2c..96729831d4 100644
--- a/python/cuml/README.md
+++ b/python/cuml/README.md
@@ -77,5 +77,3 @@ Packages required for multigpu algorithms*:
 ### Python Tests
 
 Python tests are based on the pytest library. To run them, from the `path_to_cuml/python/` folder, simply type `pytest`.
-
-
diff --git a/python/cuml/cuml/_thirdparty/sklearn/README.md b/python/cuml/cuml/_thirdparty/sklearn/README.md
index 38332cdcc1..cf5930e017 100644
--- a/python/cuml/cuml/_thirdparty/sklearn/README.md
+++ b/python/cuml/cuml/_thirdparty/sklearn/README.md
@@ -12,4 +12,4 @@ The code originates from the Scikit-Learn Github repository : https://github.com
     - Changes should be kept minimal, large portions of modified imported code should lie in the thirdparty_adapter directory
     - Only well-tested, reliable accelerated preprocessing functions should be exposed in cuml.preprocessing.__init__.py
     - Tests must be added for each exposed function
-    - Remember that a preprocessing model should always return the same datatype it received as input (NumPy, CuPy, Pandas, cuDF, Numba)
\ No newline at end of file
+    - Remember that a preprocessing model should always return the same datatype it received as input (NumPy, CuPy, Pandas, cuDF, Numba)
diff --git a/python/cuml/cuml/tests/ts_datasets/population_estimate.csv b/python/cuml/cuml/tests/ts_datasets/population_estimate.csv
index 02e1be61ed..2c6afa6fac 100644
--- a/python/cuml/cuml/tests/ts_datasets/population_estimate.csv
+++ b/python/cuml/cuml/tests/ts_datasets/population_estimate.csv
@@ -135,4 +135,4 @@
 2008,2104.1,2187.4
 2009,2134.0,2213.2
 2010,2158.2,2234.9
-2011,2174.3,2248.4
\ No newline at end of file
+2011,2174.3,2248.4
diff --git a/thirdparty/LICENSES/LICENSE.H2O4GPU b/thirdparty/LICENSES/LICENSE.H2O4GPU
index c056706936..4935303b01 100644
--- a/thirdparty/LICENSES/LICENSE.H2O4GPU
+++ b/thirdparty/LICENSES/LICENSE.H2O4GPU
@@ -228,4 +228,4 @@ LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-```
\ No newline at end of file
+```
diff --git a/thirdparty/LICENSES/LICENSE.faiss b/thirdparty/LICENSES/LICENSE.faiss
index 87cbf536c6..b96dcb0480 100644
--- a/thirdparty/LICENSES/LICENSE.faiss
+++ b/thirdparty/LICENSES/LICENSE.faiss
@@ -18,4 +18,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
\ No newline at end of file
+SOFTWARE.
diff --git a/wiki/DEFINITION_OF_DONE_CRITERIA.md b/wiki/DEFINITION_OF_DONE_CRITERIA.md
index 0dace8ae36..2c2cd399b9 100644
--- a/wiki/DEFINITION_OF_DONE_CRITERIA.md
+++ b/wiki/DEFINITION_OF_DONE_CRITERIA.md
@@ -6,8 +6,8 @@
 Below is a quick and simple checklist for developers to determine whether an algorithm is complete and ready for release. Most of these items contain more detailed descriptions in their corresponding developer guide. The checklist is broken down by layer (C++ or Python) and categorized further into
 
 - **Design:** All algorithms should be designed with an eye on maintainability, performance, readability, and robustness.
-- **Testing:** The goal for automated testing is to increase both the spread and the depth of code coverage as much as possible in order to ease time spent fixing bugs and developing new features. Additionally, a very important factor for a tool like `cuml` is to provide testing with multiple datasets that really stress the mathematical behavior of the algorithms. A comprehensive set of tests lowers the possibility for regressions and the introduction of bugs as the code evolves between versions. This covers both correctness & performance. 
-- **Documentation:** User-facing documentation should be complete and descriptive. Developer-facing documentation should be used for constructs which are complex and/or not immediately obvious. 
+- **Testing:** The goal for automated testing is to increase both the spread and the depth of code coverage as much as possible in order to ease time spent fixing bugs and developing new features. Additionally, a very important factor for a tool like `cuml` is to provide testing with multiple datasets that really stress the mathematical behavior of the algorithms. A comprehensive set of tests lowers the possibility for regressions and the introduction of bugs as the code evolves between versions. This covers both correctness & performance.
+- **Documentation:** User-facing documentation should be complete and descriptive. Developer-facing documentation should be used for constructs which are complex and/or not immediately obvious.
 - **Performance:** Algorithms should be [benchmarked] and profiled regularly to spot potential bottlenecks, performance regressions, and memory problems.
 
 ### C++
@@ -38,12 +38,12 @@ Below is a quick and simple checklist for developers to determine whether an alg
 #### Design
 
 - Python class is as "near drop-in replacement" for Scikit-learn (or relevant industry standard) API as possible. This means parameters have the same names as Scikit-learn, and where differences exist, they are clearly documented in docstrings.
-- It is recommended to open an initial PR with the API design if there are going to be significant differences with reference APIs, or lack of a reference API, to have a discussion about it. 
+- It is recommended to open an initial PR with the API design if there are going to be significant differences with reference APIs, or lack of a reference API, to have a discussion about it.
 - Python class is pickleable and a test has been added to `cuml/tests/test_pickle.py`
 - APIs use `input_to_cuml_array` to accept flexible inputs and check their datatypes and use `cumlArray.to_output()` to return configurable outputs.
 - Any internal parameters or array-based instance variables use `CumlArray`
 
-#### Testing 
+#### Testing
 
 - Pytests for wrapper functionality against Scikit-learn using relevant datasets
 - Stress tests against reasonable inputs (e.g short-wide, tall-narrow, different numerical precision)
@@ -60,7 +60,7 @@ Below is a quick and simple checklist for developers to determine whether an alg
 
 ## Review Checklist
 
-Aside from the general algorithm expectations outlined in the checklists above, code reviewers should use the following checklist to make sure the algorithm meets cuML standards. 
+Aside from the general algorithm expectations outlined in the checklists above, code reviewers should use the following checklist to make sure the algorithm meets cuML standards.
 
 ### All
 
@@ -71,7 +71,7 @@ Aside from the general algorithm expectations outlined in the checklists above,
 - Changes to the public API will not have a negative impact to existing users between minor versions (eg. large changes to very popular public APIs go through a deprecation cycle to preserve backwards compatibility)
 - Where it is reasonable to do so, unexpected inputs fail gracefully and provide actionable feedback to the user
 - Automated tests properly exercise the changes in the PR
-- New algorithms provide benchmarks (both C++ and Python) 
+- New algorithms provide benchmarks (both C++ and Python)
 
 
 ### C++
@@ -80,4 +80,4 @@ Aside from the general algorithm expectations outlined in the checklists above,
 
 ### Python
 
-- Look at the list of slowest PyTests printed in the CI logs and check that any newly committed PyTests are not going to have a significant impact on the end-to-end execution. 
\ No newline at end of file
+- Look at the list of slowest PyTests printed in the CI logs and check that any newly committed PyTests are not going to have a significant impact on the end-to-end execution.
diff --git a/wiki/README.md b/wiki/README.md
index c735da3552..a31ff0c17a 100644
--- a/wiki/README.md
+++ b/wiki/README.md
@@ -3,7 +3,7 @@
 This wiki is provided as an extension to cuML's public documentation, geared toward developers on the project.
 
 If you are interested in contributing to cuML, read through our [contributing guide](../CONTRIBUTING.md). You are
-also encouraged to read through our Python [developer guide](python/DEVELOPER_GUIDE.md) and C++ 
+also encouraged to read through our Python [developer guide](python/DEVELOPER_GUIDE.md) and C++
 [developer guide](cpp/DEVELOPER_GUIDE.md) to gain an understanding for how we design our algorithms.
 
-We have criteria for defining our [definition of done](DEFINITION_OF_DONE_CRITERIA.md) to allow us to provide high performance, maintainable and overall high quality implementations, while giving as much transparency as possible about the status of our algorithms with our users.
\ No newline at end of file
+We have criteria for defining our [definition of done](DEFINITION_OF_DONE_CRITERIA.md) to allow us to provide high performance, maintainable and overall high quality implementations, while giving as much transparency as possible about the status of our algorithms with our users.
diff --git a/wiki/mnmg/Using_Infiniband_for_MNMG.md b/wiki/mnmg/Using_Infiniband_for_MNMG.md
index 4dcad64005..0f9ad3613d 100644
--- a/wiki/mnmg/Using_Infiniband_for_MNMG.md
+++ b/wiki/mnmg/Using_Infiniband_for_MNMG.md
@@ -4,7 +4,7 @@ These instructions outline how to run multi-node multi-GPU cuML on devices with
 
 The steps in this wiki post have been largely adapted from the [Experiments in High Performance Networking with UCX and DGX](https://blog.dask.org/2019/06/09/ucx-dgx) blog by Matthew Rocklin and Rick Zamora.
 
-## 1. Install UCX 
+## 1. Install UCX
 
 ### From Conda
 
@@ -19,7 +19,7 @@ Install autogen if it's not already installed:
 sudo apt-get install autogen autoconf libtool
 ```
 
-Optionally install `gdrcopy` for faster GPU-Network card data transfer: 
+Optionally install `gdrcopy` for faster GPU-Network card data transfer:
 
 From the [ucx wiki](https://github.com/openucx/ucx/wiki/NVIDIA-GPU-Support), `gdrcopy` can be installed, and might be necessary, to enable faster GPU-Network card data transfer.
 
@@ -160,7 +160,7 @@ If you configured UCX with the `gdrcopy` option, you should also expect to see t
 #       error handling: none
 ```
 
-To better understand the CUDA-based transports in UCX, refer to [this wiki](https://github.com/openucx/ucx/wiki/NVIDIA-GPU-Support) for more details. 
+To better understand the CUDA-based transports in UCX, refer to [this wiki](https://github.com/openucx/ucx/wiki/NVIDIA-GPU-Support) for more details.
 
 
 ## 2. Install ucx-py
@@ -184,11 +184,11 @@ make -j install
 
 ## 3. Install NCCL
 
-It's important that NCCL 2.4+ be installed and no previous versions of NCCL are conflicting on your library path. This will cause compile errors during the build of cuML. 
+It's important that NCCL 2.4+ be installed and no previous versions of NCCL are conflicting on your library path. This will cause compile errors during the build of cuML.
 
 
 ```bash
-conda install -c nvidia nccl 
+conda install -c nvidia nccl
 ```
 
 Create the file `.nccl.conf` in your home dir with the following:
@@ -196,7 +196,7 @@ Create the file `.nccl.conf` in your home dir with the following:
 NCCL_SOCKET_IFNAME=ib0
 ```
 
-## 4. Enable IP over IB interface at ib0 
+## 4. Enable IP over IB interface at ib0
 
 Follow the instructions at [this link](https://docs.oracle.com/cd/E19436-01/820-3522-10/ch4-linux.html#50536461_82843) to create an IP interface for the IB devices.
 
@@ -210,20 +210,20 @@ You can verify the interface was created properly with `ifconfig ib0`
 The output should look like this:
 
 ```
-ib0       Link encap:UNSPEC  HWaddr 80-00-00-68-FE-80-00-00-00-00-00-00-00-00-00-00  
+ib0       Link encap:UNSPEC  HWaddr 80-00-00-68-FE-80-00-00-00-00-00-00-00-00-00-00
           inet addr:10.0.0.50  Bcast:10.0.0.255  Mask:255.255.255.0
           inet6 addr: fe80::526b:4b03:f5:ce9c/64 Scope:Link
           UP BROADCAST RUNNING MULTICAST  MTU:65520  Metric:1
           RX packets:2655 errors:0 dropped:0 overruns:0 frame:0
           TX packets:2697 errors:0 dropped:10 overruns:0 carrier:0
-          collisions:0 txqueuelen:256 
+          collisions:0 txqueuelen:256
           RX bytes:183152 (183.1 KB)  TX bytes:194696 (194.6 KB)
 
 ```
 
 ## 5.  Set UCX environment vars
 
-Use `ibstatus` to see your open IB devices. Output will look like this: 
+Use `ibstatus` to see your open IB devices. Output will look like this:
 
 ```
 Infiniband device 'mlx5_0' port 1 status:
@@ -263,7 +263,7 @@ Infiniband device 'mlx5_3' port 1 status:
 	link_layer:	 InfiniBand
 
 ```
- 
+
 Put the devices and ports in a `UCX_NET_DEVICES` environment variable:
 
 
@@ -387,5 +387,3 @@ final_size: 16
 final_size: 16
 final_size: 16
 ```
-
-

From 8753e7603cc95e8179c1981575d56220e3b3f26e Mon Sep 17 00:00:00 2001
From: Jim Crist-Harif <jcristharif@gmail.com>
Date: Thu, 23 Jan 2025 19:37:10 -0600
Subject: [PATCH 05/15] Support `alpha=0` in `Ridge` (#6236)

When `alpha = 0`, `Ridge` is equivalent to a `LinearRegression`. Previously we checked if alpha was positive, scikit-learn instead requires that alpha is non-negative. This updates the check and adds a test.

Authors:
  - Jim Crist-Harif (https://github.com/jcrist)
  - Micka (https://github.com/lowener)

Approvers:
  - William Hicks (https://github.com/wphicks)

URL: https://github.com/rapidsai/cuml/pull/6236
---
 python/cuml/cuml/linear_model/ridge.pyx     |  7 +++----
 python/cuml/cuml/tests/test_linear_model.py | 11 +++++++++++
 2 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/python/cuml/cuml/linear_model/ridge.pyx b/python/cuml/cuml/linear_model/ridge.pyx
index bd039867f3..e873f2bab4 100644
--- a/python/cuml/cuml/linear_model/ridge.pyx
+++ b/python/cuml/cuml/linear_model/ridge.pyx
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2019-2024, NVIDIA CORPORATION.
+# Copyright (c) 2019-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -241,9 +241,8 @@ class Ridge(UniversalBase,
         self.intercept_value = 0.0
 
     def _check_alpha(self, alpha):
-        if alpha <= 0.0:
-            msg = "alpha value has to be positive"
-            raise TypeError(msg.format(alpha))
+        if alpha < 0.0:
+            raise ValueError(f"alpha must be non-negative, got {alpha}")
 
     def _get_algorithm_int(self, algorithm):
         if self.solver not in ['svd', 'eig', 'cd']:
diff --git a/python/cuml/cuml/tests/test_linear_model.py b/python/cuml/cuml/tests/test_linear_model.py
index 559bfc96c3..cb9074088d 100644
--- a/python/cuml/cuml/tests/test_linear_model.py
+++ b/python/cuml/cuml/tests/test_linear_model.py
@@ -417,6 +417,17 @@ def test_ridge_regression_model(datatype, algorithm, nrows, column_info):
         )
 
 
+def test_ridge_and_least_squares_equal_when_alpha_is_0():
+    X, y = make_regression(n_samples=5, n_features=4, random_state=0)
+
+    ridge = cuRidge(alpha=0.0, fit_intercept=False)
+    ols = cuLinearRegression(fit_intercept=False)
+
+    ridge.fit(X, y)
+    ols.fit(X, y)
+    assert array_equal(ridge.coef_, ols.coef_)
+
+
 @pytest.mark.parametrize("datatype", [np.float32, np.float64])
 @pytest.mark.parametrize("algorithm", ["eig", "svd"])
 @pytest.mark.parametrize(

From df25a577448163b8b0f731ddb3a79d00b872ee25 Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Thu, 23 Jan 2025 22:27:10 -0600
Subject: [PATCH 06/15] update pip devcontainers to UCX 1.18 (#6249)

Contributes to https://github.com/rapidsai/build-planning/issues/138

Updates to using UCX 1.18 in pip devcontainers here.

Authors:
  - James Lamb (https://github.com/jameslamb)

Approvers:
  - Ray Douglass (https://github.com/raydouglass)
  - https://github.com/jakirkham

URL: https://github.com/rapidsai/cuml/pull/6249
---
 .devcontainer/cuda11.8-pip/devcontainer.json | 2 +-
 .devcontainer/cuda12.5-pip/devcontainer.json | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.devcontainer/cuda11.8-pip/devcontainer.json b/.devcontainer/cuda11.8-pip/devcontainer.json
index 1bf0c7c3e0..f91b0f61d7 100644
--- a/.devcontainer/cuda11.8-pip/devcontainer.json
+++ b/.devcontainer/cuda11.8-pip/devcontainer.json
@@ -5,7 +5,7 @@
     "args": {
       "CUDA": "11.8",
       "PYTHON_PACKAGE_MANAGER": "pip",
-      "BASE": "rapidsai/devcontainers:25.02-cpp-cuda11.8-ucx1.17.0-openmpi-ubuntu22.04"
+      "BASE": "rapidsai/devcontainers:25.02-cpp-cuda11.8-ucx1.18.0-openmpi-ubuntu22.04"
     }
   },
   "runArgs": [
diff --git a/.devcontainer/cuda12.5-pip/devcontainer.json b/.devcontainer/cuda12.5-pip/devcontainer.json
index 823af5e290..3fb2068de0 100644
--- a/.devcontainer/cuda12.5-pip/devcontainer.json
+++ b/.devcontainer/cuda12.5-pip/devcontainer.json
@@ -5,7 +5,7 @@
     "args": {
       "CUDA": "12.5",
       "PYTHON_PACKAGE_MANAGER": "pip",
-      "BASE": "rapidsai/devcontainers:25.02-cpp-cuda12.5-ucx1.17.0-openmpi-ubuntu22.04"
+      "BASE": "rapidsai/devcontainers:25.02-cpp-cuda12.5-ucx1.18.0-openmpi-ubuntu22.04"
     }
   },
   "runArgs": [

From b1ac2384d820db075cd715fa3ff3775fc552b29f Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Thu, 23 Jan 2025 22:34:06 -0600
Subject: [PATCH 07/15] Use cuda.bindings layout. (#6233)

This PR updates cuML to use the new cuda-python `cuda.bindings` layout. See https://github.com/rapidsai/build-planning/issues/117.

Authors:
  - Bradley Dice (https://github.com/bdice)
  - https://github.com/jakirkham

Approvers:
  - Dante Gama Dessavre (https://github.com/dantegd)
  - https://github.com/jakirkham

URL: https://github.com/rapidsai/cuml/pull/6233
---
 python/cuml/cuml/svm/linear.pyx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/cuml/cuml/svm/linear.pyx b/python/cuml/cuml/svm/linear.pyx
index 9a0efedbc3..1311a0dbe6 100644
--- a/python/cuml/cuml/svm/linear.pyx
+++ b/python/cuml/cuml/svm/linear.pyx
@@ -37,7 +37,7 @@ from pylibraft.common.interruptible import cuda_interruptible
 from cuml.common import input_to_cuml_array
 from libc.stdint cimport uintptr_t
 from libcpp cimport bool as cppbool
-from cuda.ccudart cimport(
+from cuda.bindings.cyruntime cimport(
     cudaMemcpyAsync,
     cudaMemcpyKind,
 )

From 2d45fa4b510696cb4c08abc74dd73fb4cf576cae Mon Sep 17 00:00:00 2001
From: Robert Maynard <rmaynard@nvidia.com>
Date: Fri, 24 Jan 2025 01:45:08 -0500
Subject: [PATCH 08/15] Define block size for sm_120 (#6250)

CUDA 12.8 introduces sm_120 that requires a reduced number of threads per sm

Authors:
  - Robert Maynard (https://github.com/robertmaynard)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)
  - Dante Gama Dessavre (https://github.com/dantegd)

URL: https://github.com/rapidsai/cuml/pull/6250
---
 .../cuml/experimental/fil/detail/gpu_introspection.hpp        | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cpp/include/cuml/experimental/fil/detail/gpu_introspection.hpp b/cpp/include/cuml/experimental/fil/detail/gpu_introspection.hpp
index b4d132de66..06bbed9419 100644
--- a/cpp/include/cuml/experimental/fil/detail/gpu_introspection.hpp
+++ b/cpp/include/cuml/experimental/fil/detail/gpu_introspection.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2023-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2023-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -104,7 +104,7 @@ auto constexpr static const WARP_SIZE             = index_type{32};
 auto constexpr static const MAX_THREADS_PER_BLOCK = index_type{256};
 #ifdef __CUDACC__
 #if __CUDA_ARCH__ == 720 || __CUDA_ARCH__ == 750 || __CUDA_ARCH__ == 860 || \
-  __CUDA_ARCH__ == 870 || __CUDA_ARCH__ == 890
+  __CUDA_ARCH__ == 870 || __CUDA_ARCH__ == 890 || __CUDA_ARCH__ == 1200
 auto constexpr static const MAX_THREADS_PER_SM = index_type{1024};
 #else
 auto constexpr static const MAX_THREADS_PER_SM = index_type{2048};

From 732dac9c476bbe1c1dbb0a9f85691890113af727 Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Fri, 24 Jan 2025 11:25:34 -0600
Subject: [PATCH 09/15] introduce libcuml wheels (#6199)

Replaces #6006, contributes to https://github.com/rapidsai/build-planning/issues/33.

Proposes packaging `libcuml` as a wheel, which is then re-used by `cuml-cu{11,12}` wheels.

## Notes for Reviewers

### Benefits of these changes

* smaller wheels (see "Size Changes" below)
* faster compile times
  - *no more re-compiling RAFT, thanks to https://github.com/rapidsai/raft/pull/2531*
* less use of CI resources (only compiling once per CPU architecture / CUDA versions, instead of once per those + Python minor version)
* other benefits mentioned in https://github.com/rapidsai/build-planning/issues/33

### Wheel contents

`libcuml`:

* `libcuml++.so` (shared library) and its headers
* `libcumlprims_mg.so` (shared library) and its headers
* other vendored dependencies (CCCL, `fmt`)

`cuml`:

* `cuml` Python / Cython code and compiled Cython extensions

### Dependency Flows

In short.... `libcuml` contains `libcuml.so` and `libcumlprims_mg.so` dynamic libraries and the headers to link against them.

* Anything that needs to link against cuML at build time pulls in `libcugraph` wheels as a build dependency.
* Anything that needs cuML's symbols at runtime pulls it in as a runtime dependency, and calls `libcuml.load_library()`.

For more details and some flowcharts, see https://github.com/rapidsai/build-planning/issues/33#issuecomment-2590129852

### Size changes (CUDA 12, Python 3.12, x86_64)

| wheel                | num files (before) | num files (this PR) | size (before)  | size (this PR) |
|:---------------:|------------------:|-----------------:|--------------:|-------------:|
| `libcuml`           |   ---                       |   1766                   | ---                   | 289M                 |
| `cuml`               |   442                     |   441                    | 527M               | 9M                 |
|**TOTAL**          |   **442**              |   **2207**               | **527M**        | **298M**    |

*NOTES: size = compressed, "before" = 2025-01-22 nightlies*

<details><summary>how I calculated those (click me)</summary>

```shell
docker run \
    --rm \
    --network host \
    --env RAPIDS_NIGHTLY_DATE=2025-01-22 \
    --env CUML_NIGHTLY_SHA=01e19bba9821954b062a04fbf31d3522afa4b0b1 \
    --env CUML_PR="pull-request/6199" \
    --env CUML_PR_SHA="9d5100ec4589e20230a31817518427efa1e49c6d" \
    --env RAPIDS_PY_CUDA_SUFFIX=cu12 \
    --env WHEEL_DIR_BEFORE=/tmp/wheels-before \
    --env WHEEL_DIR_AFTER=/tmp/wheels-after \
    -it rapidsai/ci-wheel:cuda12.5.1-rockylinux8-py3.12 \
    bash

# --- nightly wheels --- #
mkdir -p ./wheels-before

export RAPIDS_BUILD_TYPE=branch
export RAPIDS_REF_NAME="branch-25.02"

# cuml
RAPIDS_PY_WHEEL_NAME="cuml_${RAPIDS_PY_CUDA_SUFFIX}" \
RAPIDS_REPOSITORY=rapidsai/cuml \
RAPIDS_SHA=${CUML_NIGHTLY_SHA} \
    rapids-download-wheels-from-s3 python ./wheels-before

# --- wheels from CI --- #
mkdir -p ./wheels-after

export RAPIDS_BUILD_TYPE="pull-request"

# libcuml
RAPIDS_PY_WHEEL_NAME="libcuml_${RAPIDS_PY_CUDA_SUFFIX}" \
RAPIDS_REPOSITORY=rapidsai/cuml \
RAPIDS_REF_NAME="${CUML_PR}" \
RAPIDS_SHA="${CUML_PR_SHA}" \
    rapids-download-wheels-from-s3 cpp ./wheels-after

# cuml
RAPIDS_PY_WHEEL_NAME="cuml_${RAPIDS_PY_CUDA_SUFFIX}" \
RAPIDS_REPOSITORY=rapidsai/cuml \
RAPIDS_REF_NAME="${CUML_PR}" \
RAPIDS_SHA="${CUML_PR_SHA}" \
    rapids-download-wheels-from-s3 python ./wheels-after

pip install pydistcheck
pydistcheck \
    --inspect \
    --select 'distro-too-large-compressed' \
    ./wheels-before/*.whl \
| grep -E '^checking|files: | compressed' \
> ./before.txt

# get more exact sizes
du -sh ./wheels-before/*

pydistcheck \
    --inspect \
    --select 'distro-too-large-compressed' \
    ./wheels-after/*.whl \
| grep -E '^checking|files: | compressed' \
> ./after.txt

# get more exact sizes
du -sh ./wheels-after/*
```

</details>

### How I tested this

These other PRs:

* https://github.com/rapidsai/devcontainers/pull/442

Authors:
  - James Lamb (https://github.com/jameslamb)
  - Bradley Dice (https://github.com/bdice)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - Divye Gala (https://github.com/divyegala)

URL: https://github.com/rapidsai/cuml/pull/6199
---
 .github/workflows/build.yaml                  |  27 +++-
 .github/workflows/pr.yaml                     |  17 ++-
 build.sh                                      |   7 +-
 ci/build_wheel.sh                             |  45 ++----
 ci/build_wheel_cuml.sh                        |  42 ++++++
 ci/build_wheel_libcuml.sh                     |  56 ++++++++
 ci/test_wheel.sh                              |   2 +
 ci/validate_wheel.sh                          |   7 +-
 .../all_cuda-118_arch-x86_64.yaml             |   2 +-
 .../all_cuda-125_arch-x86_64.yaml             |   2 +-
 cpp/cmake/thirdparty/get_cuvs.cmake           |   2 +-
 cpp/cmake/thirdparty/get_treelite.cmake       |   3 +-
 dependencies.yaml                             | 128 ++++++++++++++++-
 python/cuml/CMakeLists.txt                    | 133 +++++-------------
 python/cuml/cuml/__init__.py                  |  12 +-
 python/cuml/cuml/cluster/CMakeLists.txt       |   3 +-
 .../cuml/cuml/cluster/hdbscan/CMakeLists.txt  |   3 +-
 python/cuml/cuml/common/CMakeLists.txt        |   3 +-
 python/cuml/cuml/datasets/CMakeLists.txt      |   3 +-
 python/cuml/cuml/decomposition/CMakeLists.txt |   3 +-
 python/cuml/cuml/ensemble/CMakeLists.txt      |   3 +-
 .../cuml/cuml/experimental/fil/CMakeLists.txt |   3 +-
 .../experimental/linear_model/CMakeLists.txt  |   3 +-
 python/cuml/cuml/explainer/CMakeLists.txt     |   3 +-
 python/cuml/cuml/fil/CMakeLists.txt           |   3 +-
 python/cuml/cuml/internals/CMakeLists.txt     |   3 +-
 python/cuml/cuml/kernel_ridge/CMakeLists.txt  |   3 +-
 python/cuml/cuml/linear_model/CMakeLists.txt  |   3 +-
 python/cuml/cuml/manifold/CMakeLists.txt      |   3 +-
 python/cuml/cuml/metrics/CMakeLists.txt       |   3 +-
 .../cuml/cuml/metrics/cluster/CMakeLists.txt  |   3 +-
 python/cuml/cuml/neighbors/CMakeLists.txt     |   3 +-
 .../cuml/random_projection/CMakeLists.txt     |   3 +-
 python/cuml/cuml/solvers/CMakeLists.txt       |   3 +-
 python/cuml/cuml/svm/CMakeLists.txt           |   3 +-
 python/cuml/cuml/tsa/CMakeLists.txt           |   3 +-
 python/cuml/pyproject.toml                    |  10 +-
 python/libcuml/CMakeLists.txt                 |  89 ++++++++++++
 python/libcuml/LICENSE                        |   1 +
 python/libcuml/README.md                      |   1 +
 python/libcuml/libcuml/VERSION                |   1 +
 python/libcuml/libcuml/__init__.py            |  16 +++
 python/libcuml/libcuml/_version.py            |  30 ++++
 python/libcuml/libcuml/load.py                | 117 +++++++++++++++
 python/libcuml/pyproject.toml                 |  89 ++++++++++++
 45 files changed, 702 insertions(+), 200 deletions(-)
 create mode 100755 ci/build_wheel_cuml.sh
 create mode 100755 ci/build_wheel_libcuml.sh
 create mode 100644 python/libcuml/CMakeLists.txt
 create mode 120000 python/libcuml/LICENSE
 create mode 120000 python/libcuml/README.md
 create mode 120000 python/libcuml/libcuml/VERSION
 create mode 100644 python/libcuml/libcuml/__init__.py
 create mode 100644 python/libcuml/libcuml/_version.py
 create mode 100644 python/libcuml/libcuml/load.py
 create mode 100644 python/libcuml/pyproject.toml

diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
index 945b8e4f72..fd6b8c1879 100644
--- a/.github/workflows/build.yaml
+++ b/.github/workflows/build.yaml
@@ -66,7 +66,7 @@ jobs:
       branch: ${{ inputs.branch }}
       date: ${{ inputs.date }}
       sha: ${{ inputs.sha }}
-  wheel-build-cuml:
+  wheel-build-libcuml:
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.02
     with:
@@ -74,13 +74,36 @@ jobs:
       branch: ${{ inputs.branch }}
       sha: ${{ inputs.sha }}
       date: ${{ inputs.date }}
-      script: ci/build_wheel.sh
+      script: ci/build_wheel_libcuml.sh
       # Note that this approach to cloning repos obviates any modification to
       # the CMake variables in get_cumlprims_mg.cmake since CMake will just use
       # the clone as is.
       extra-repo: rapidsai/cumlprims_mg
       extra-repo-sha: branch-25.02
       extra-repo-deploy-key: CUMLPRIMS_SSH_PRIVATE_DEPLOY_KEY
+      # build for every combination of arch and CUDA version, but only for the latest Python
+      matrix_filter: group_by([.ARCH, (.CUDA_VER|split(".")|map(tonumber)|.[0])]) | map(max_by(.PY_VER|split(".")|map(tonumber)))
+  wheel-publish-libcuml:
+    needs: wheel-build-libcuml
+    secrets: inherit
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-25.02
+    with:
+      build_type: ${{ inputs.build_type || 'branch' }}
+      branch: ${{ inputs.branch }}
+      sha: ${{ inputs.sha }}
+      date: ${{ inputs.date }}
+      package-name: libcuml
+      package-type: cpp
+  wheel-build-cuml:
+    needs: wheel-build-libcuml
+    secrets: inherit
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.02
+    with:
+      build_type: ${{ inputs.build_type || 'branch' }}
+      branch: ${{ inputs.branch }}
+      sha: ${{ inputs.sha }}
+      date: ${{ inputs.date }}
+      script: ci/build_wheel_cuml.sh
   wheel-publish-cuml:
     needs: wheel-build-cuml
     secrets: inherit
diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index 95d648f8e6..37b176d635 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -25,6 +25,7 @@ jobs:
       - conda-notebook-tests
       - docs-build
       - telemetry-setup
+      - wheel-build-libcuml
       - wheel-build-cuml
       - wheel-tests-cuml
       - devcontainer
@@ -166,16 +167,28 @@ jobs:
       arch: "amd64"
       container_image: "rapidsai/ci-conda:latest"
       run_script: "ci/build_docs.sh"
-  wheel-build-cuml:
+  wheel-build-libcuml:
     needs: checks
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.02
     with:
       build_type: pull-request
-      script: ci/build_wheel.sh
+      branch: ${{ inputs.branch }}
+      sha: ${{ inputs.sha }}
+      date: ${{ inputs.date }}
+      script: ci/build_wheel_libcuml.sh
       extra-repo: rapidsai/cumlprims_mg
       extra-repo-sha: branch-25.02
       extra-repo-deploy-key: CUMLPRIMS_SSH_PRIVATE_DEPLOY_KEY
+      # build for every combination of arch and CUDA version, but only for the latest Python
+      matrix_filter: group_by([.ARCH, (.CUDA_VER|split(".")|map(tonumber)|.[0])]) | map(max_by(.PY_VER|split(".")|map(tonumber)))
+  wheel-build-cuml:
+    needs: [checks, wheel-build-libcuml]
+    secrets: inherit
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.02
+    with:
+      build_type: pull-request
+      script: ci/build_wheel_cuml.sh
   wheel-tests-cuml:
     needs: [wheel-build-cuml, changed-files]
     secrets: inherit
diff --git a/build.sh b/build.sh
index 9eb36f103c..6f1b6b9f83 100755
--- a/build.sh
+++ b/build.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 
-# Copyright (c) 2019-2024, NVIDIA CORPORATION.
+# Copyright (c) 2019-2025, NVIDIA CORPORATION.
 
 # cuml build script
 
@@ -288,11 +288,6 @@ if (! hasArg --configure-only) && (completeBuild || hasArg cuml || hasArg pydocs
     # Replace spaces with semicolons in SKBUILD_EXTRA_CMAKE_ARGS
     SKBUILD_EXTRA_CMAKE_ARGS=$(echo ${SKBUILD_EXTRA_CMAKE_ARGS} | sed 's/ /;/g')
 
-    # Append `-DFIND_CUML_CPP=ON` to CUML_EXTRA_CMAKE_ARGS unless a user specified the option.
-    if [[ "${SKBUILD_EXTRA_CMAKE_ARGS}" != *"DFIND_CUML_CPP"* ]]; then
-        SKBUILD_EXTRA_CMAKE_ARGS="${SKBUILD_EXTRA_CMAKE_ARGS};-DFIND_CUML_CPP=ON"
-    fi
-
     SKBUILD_CMAKE_ARGS="-DCMAKE_MESSAGE_LOG_LEVEL=${CMAKE_LOG_LEVEL};${SKBUILD_EXTRA_CMAKE_ARGS}" \
         python -m pip install --no-build-isolation --no-deps --config-settings rapidsai.disable-cuda=true ${REPODIR}/python/cuml
 
diff --git a/ci/build_wheel.sh b/ci/build_wheel.sh
index 104895daf7..3c840d9849 100755
--- a/ci/build_wheel.sh
+++ b/ci/build_wheel.sh
@@ -1,55 +1,26 @@
 #!/bin/bash
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
+# Copyright (c) 2023-2025, NVIDIA CORPORATION.
 
 set -euo pipefail
 
-package_dir="python/cuml"
+package_name=$1
+package_dir=$2
 
 source rapids-configure-sccache
 source rapids-date-string
 
-RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
-
 rapids-generate-version > ./VERSION
 
-cd ${package_dir}
-
-case "${RAPIDS_CUDA_VERSION}" in
-  12.*)
-    EXCLUDE_ARGS=(
-      --exclude "libcuvs.so"
-      --exclude "libcublas.so.12"
-      --exclude "libcublasLt.so.12"
-      --exclude "libcufft.so.11"
-      --exclude "libcurand.so.10"
-      --exclude "libcusolver.so.11"
-      --exclude "libcusparse.so.12"
-      --exclude "libnvJitLink.so.12"
-    )
-    EXTRA_CMAKE_ARGS=";-DUSE_CUDA_MATH_WHEELS=ON"
-    ;;
-  11.*)
-    EXCLUDE_ARGS=(
-      --exclude "libcuvs.so"
-    )
-    EXTRA_CMAKE_ARGS=";-DUSE_CUDA_MATH_WHEELS=OFF"
-    ;;
-esac
+cd "${package_dir}"
 
 sccache --zero-stats
 
-SKBUILD_CMAKE_ARGS="-DDETECT_CONDA_ENV=OFF;-DDISABLE_DEPRECATION_WARNINGS=ON;-DCPM_cumlprims_mg_SOURCE=${GITHUB_WORKSPACE}/cumlprims_mg/;-DUSE_CUVS_WHEEL=ON${EXTRA_CMAKE_ARGS}" \
-  python -m pip wheel . \
+rapids-logger "Building '${package_name}' wheel"
+python -m pip wheel \
     -w dist \
     -v \
     --no-deps \
-    --disable-pip-version-check
+    --disable-pip-version-check \
+    .
 
 sccache --show-adv-stats
-
-mkdir -p final_dist
-python -m auditwheel repair -w final_dist "${EXCLUDE_ARGS[@]}" dist/*
-
-../../ci/validate_wheel.sh final_dist
-
-RAPIDS_PY_WHEEL_NAME="cuml_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 python final_dist
diff --git a/ci/build_wheel_cuml.sh b/ci/build_wheel_cuml.sh
new file mode 100755
index 0000000000..b83d4973f3
--- /dev/null
+++ b/ci/build_wheel_cuml.sh
@@ -0,0 +1,42 @@
+#!/bin/bash
+# Copyright (c) 2023-2025, NVIDIA CORPORATION.
+
+set -euo pipefail
+
+package_name="cuml"
+package_dir="python/cuml"
+
+RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
+
+# Download the libcuml wheel built in the previous step and make it
+# available for pip to find.
+RAPIDS_PY_WHEEL_NAME="libcuml_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 cpp /tmp/libcuml_dist
+echo "libcuml-${RAPIDS_PY_CUDA_SUFFIX} @ file://$(echo /tmp/libcuml_dist/libcuml_*.whl)" >> /tmp/constraints.txt
+export PIP_CONSTRAINT="/tmp/constraints.txt"
+
+EXCLUDE_ARGS=(
+  --exclude "libcuml++.so"
+  --exclude "libcumlprims_mg.so"
+  --exclude "libcuvs.so"
+  --exclude "libraft.so"
+  --exclude "libcublas.so.*"
+  --exclude "libcublasLt.so.*"
+  --exclude "libcufft.so.*"
+  --exclude "libcurand.so.*"
+  --exclude "libcusolver.so.*"
+  --exclude "libcusparse.so.*"
+  --exclude "libnvJitLink.so.*"
+)
+
+export SKBUILD_CMAKE_ARGS="-DDISABLE_DEPRECATION_WARNINGS=ON;-DSINGLEGPU=OFF;-DUSE_LIBCUML_WHEEL=ON"
+./ci/build_wheel.sh "${package_name}" "${package_dir}"
+
+mkdir -p ${package_dir}/final_dist
+python -m auditwheel repair \
+    "${EXCLUDE_ARGS[@]}" \
+    -w ${package_dir}/final_dist \
+    ${package_dir}/dist/*
+
+./ci/validate_wheel.sh ${package_dir} final_dist
+
+RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 python "${package_dir}/final_dist"
diff --git a/ci/build_wheel_libcuml.sh b/ci/build_wheel_libcuml.sh
new file mode 100755
index 0000000000..ad38eab617
--- /dev/null
+++ b/ci/build_wheel_libcuml.sh
@@ -0,0 +1,56 @@
+#!/bin/bash
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+set -euo pipefail
+
+package_name="libcuml"
+package_dir="python/libcuml"
+
+RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
+
+rapids-logger "Generating build requirements"
+
+rapids-dependency-file-generator \
+  --output requirements \
+  --file-key "py_build_${package_name}" \
+  --file-key "py_rapids_build_${package_name}" \
+  --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION};cuda_suffixed=true" \
+| tee /tmp/requirements-build.txt
+
+rapids-logger "Installing build requirements"
+python -m pip install \
+    -v \
+    --prefer-binary \
+    -r /tmp/requirements-build.txt
+
+# build with '--no-build-isolation', for better sccache hit rate
+# 0 really means "add --no-build-isolation" (ref: https://github.com/pypa/pip/issues/5735)
+export PIP_NO_BUILD_ISOLATION=0
+
+# NOTE: 'libcumlprims_mg.so' is marked as '--exclude' here because auditwheel doesn't detect it,
+#       but it really is intentionally included in 'libcuml' wheels
+EXCLUDE_ARGS=(
+  --exclude "libcumlprims_mg.so"
+  --exclude "libcuvs.so"
+  --exclude "libraft.so"
+  --exclude "libcublas.so.*"
+  --exclude "libcublasLt.so.*"
+  --exclude "libcufft.so.*"
+  --exclude "libcurand.so.*"
+  --exclude "libcusolver.so.*"
+  --exclude "libcusparse.so.*"
+  --exclude "libnvJitLink.so.*"
+)
+
+export SKBUILD_CMAKE_ARGS="-DDISABLE_DEPRECATION_WARNINGS=ON;-DCPM_cumlprims_mg_SOURCE=${GITHUB_WORKSPACE}/cumlprims_mg/"
+./ci/build_wheel.sh "${package_name}" "${package_dir}"
+
+mkdir -p ${package_dir}/final_dist
+python -m auditwheel repair \
+    "${EXCLUDE_ARGS[@]}" \
+    -w ${package_dir}/final_dist \
+    ${package_dir}/dist/*
+
+./ci/validate_wheel.sh ${package_dir} final_dist
+
+RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 cpp "${package_dir}/final_dist"
diff --git a/ci/test_wheel.sh b/ci/test_wheel.sh
index 76cfe9a86b..8027876005 100755
--- a/ci/test_wheel.sh
+++ b/ci/test_wheel.sh
@@ -6,9 +6,11 @@ set -euo pipefail
 mkdir -p ./dist
 RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
 RAPIDS_PY_WHEEL_NAME="cuml_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 python ./dist
+RAPIDS_PY_WHEEL_NAME="libcuml_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 cpp ./dist
 
 # echo to expand wildcard before adding `[extra]` requires for pip
 python -m pip install \
+  ./dist/libcuml*.whl \
   "$(echo ./dist/cuml*.whl)[test]"
 
 RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR:-"${PWD}/test-results"}
diff --git a/ci/validate_wheel.sh b/ci/validate_wheel.sh
index 60a80fce6b..c4bd01faab 100755
--- a/ci/validate_wheel.sh
+++ b/ci/validate_wheel.sh
@@ -1,9 +1,12 @@
 #!/bin/bash
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
 set -euo pipefail
 
-wheel_dir_relative_path=$1
+package_dir=$1
+wheel_dir_relative_path=$2
+
+cd "${package_dir}"
 
 rapids-logger "validate packages with 'pydistcheck'"
 
diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml
index 01d42d3d6e..17238e84c3 100644
--- a/conda/environments/all_cuda-118_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-118_arch-x86_64.yaml
@@ -42,7 +42,7 @@ dependencies:
 - libcusparse-dev=11.7.5.86
 - libcusparse=11.7.5.86
 - libcuvs==25.2.*,>=0.0.0a0
-- libraft-headers==25.2.*,>=0.0.0a0
+- libraft==25.2.*,>=0.0.0a0
 - librmm==25.2.*,>=0.0.0a0
 - nbsphinx
 - ninja
diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml
index f98ca90945..10d9178d43 100644
--- a/conda/environments/all_cuda-125_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-125_arch-x86_64.yaml
@@ -39,7 +39,7 @@ dependencies:
 - libcusolver-dev
 - libcusparse-dev
 - libcuvs==25.2.*,>=0.0.0a0
-- libraft-headers==25.2.*,>=0.0.0a0
+- libraft==25.2.*,>=0.0.0a0
 - librmm==25.2.*,>=0.0.0a0
 - nbsphinx
 - ninja
diff --git a/cpp/cmake/thirdparty/get_cuvs.cmake b/cpp/cmake/thirdparty/get_cuvs.cmake
index a48b4c6b1e..7a2b5f3571 100644
--- a/cpp/cmake/thirdparty/get_cuvs.cmake
+++ b/cpp/cmake/thirdparty/get_cuvs.cmake
@@ -54,8 +54,8 @@ function(find_and_configure_cuvs)
         EXCLUDE_FROM_ALL       ${PKG_EXCLUDE_FROM_ALL}
         OPTIONS
           "BUILD_TESTS OFF"
-          "BUILD_BENCH OFF"
           "BUILD_CAGRA_HNSWLIB OFF"
+          "BUILD_CUVS_BENCH OFF"
           "BUILD_MG_ALGOS ${CUVS_BUILD_MG_ALGOS}"
 
     )
diff --git a/cpp/cmake/thirdparty/get_treelite.cmake b/cpp/cmake/thirdparty/get_treelite.cmake
index a14bacb531..e197b76a11 100644
--- a/cpp/cmake/thirdparty/get_treelite.cmake
+++ b/cpp/cmake/thirdparty/get_treelite.cmake
@@ -1,5 +1,5 @@
 #=============================================================================
-# Copyright (c) 2021-2024, NVIDIA CORPORATION.
+# Copyright (c) 2021-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -29,6 +29,7 @@ function(find_and_configure_treelite)
 
     rapids_cpm_find(Treelite ${PKG_VERSION}
         GLOBAL_TARGETS       ${TREELITE_LIBS}
+        BUILD_EXPORT_SET     cuml-exports
         INSTALL_EXPORT_SET   cuml-exports
         CPM_ARGS
             GIT_REPOSITORY   https://github.com/dmlc/treelite.git
diff --git a/dependencies.yaml b/dependencies.yaml
index 54ad194e84..78d6d58134 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -16,7 +16,7 @@ files:
       - depends_on_dask_cudf
       - depends_on_libcumlprims
       - depends_on_libcuvs
-      - depends_on_libraft_headers
+      - depends_on_libraft
       - depends_on_librmm
       - depends_on_pylibraft
       - depends_on_raft_dask
@@ -110,9 +110,11 @@ files:
     includes:
       - common_build
       - depends_on_cuda_python
-      - depends_on_cuvs
+      - depends_on_libcuml
       - depends_on_libcumlprims
-      - depends_on_libraft_headers
+      - depends_on_libcuvs
+      - depends_on_libraft
+      - depends_on_librmm
       - depends_on_pylibraft
       - depends_on_rmm
       - py_build_cuml
@@ -128,6 +130,7 @@ files:
       - depends_on_cupy
       - depends_on_cuvs
       - depends_on_dask_cudf
+      - depends_on_libcuml
       - depends_on_pylibraft
       - depends_on_raft_dask
       - depends_on_rmm
@@ -140,6 +143,33 @@ files:
       key: test
     includes:
       - test_python
+  py_build_libcuml:
+    output: pyproject
+    pyproject_dir: python/libcuml
+    extras:
+      table: build-system
+    includes:
+      - rapids_build_backend
+  py_rapids_build_libcuml:
+    output: pyproject
+    pyproject_dir: python/libcuml
+    extras:
+      table: tool.rapids-build-backend
+      key: requires
+    includes:
+      - common_build
+      - depends_on_libcuvs
+      - depends_on_libraft
+      - depends_on_librmm
+  py_run_libcuml:
+    output: pyproject
+    pyproject_dir: python/libcuml
+    extras:
+      table: project
+    includes:
+      - cuda_wheels
+      - depends_on_libcuvs
+      - depends_on_libraft
 channels:
   - rapidsai
   - rapidsai-nightly
@@ -370,6 +400,11 @@ dependencies:
               cuda: "11.*"
               use_cuda_wheels: "true"
             packages:
+              - nvidia-cublas-cu11
+              - nvidia-cufft-cu11
+              - nvidia-curand-cu11
+              - nvidia-cusparse-cu11
+              - nvidia-cusolver-cu11
           # if use_cuda_wheels=false is provided, do not add dependencies on any CUDA wheels
           # (e.g. for DLFW and pip devcontainers)
           - matrix:
@@ -583,7 +618,26 @@ dependencies:
     common:
       - output_types: conda
         packages:
-          - libcuml==25.2.*,>=0.0.0a0
+          - &libcuml_unsuffixed libcuml==25.2.*,>=0.0.0a0
+      - output_types: requirements
+        packages:
+          # pip recognizes the index as a global option for the requirements.txt file
+          - --extra-index-url=https://pypi.nvidia.com
+          - --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple
+    specific:
+      - output_types: [requirements, pyproject]
+        matrices:
+          - matrix:
+              cuda: "12.*"
+              cuda_suffixed: "true"
+            packages:
+              - libcuml-cu12==25.2.*,>=0.0.0a0
+          - matrix:
+              cuda: "11.*"
+              cuda_suffixed: "true"
+            packages:
+              - libcuml-cu11==25.2.*,>=0.0.0a0
+          - {matrix: null, packages: [*libcuml_unsuffixed]}
   depends_on_libcumlprims:
     common:
       - output_types: conda
@@ -593,7 +647,50 @@ dependencies:
     common:
       - output_types: conda
         packages:
-          - libcuvs==25.2.*,>=0.0.0a0
+          - &libcuvs_unsuffixed libcuvs==25.2.*,>=0.0.0a0
+      - output_types: requirements
+        packages:
+          # pip recognizes the index as a global option for the requirements.txt file
+          - --extra-index-url=https://pypi.nvidia.com
+          - --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple
+    specific:
+      - output_types: [requirements, pyproject]
+        matrices:
+          - matrix:
+              cuda: "12.*"
+              cuda_suffixed: "true"
+            packages:
+              - libcuvs-cu12==25.2.*,>=0.0.0a0
+          - matrix:
+              cuda: "11.*"
+              cuda_suffixed: "true"
+            packages:
+              - libcuvs-cu11==25.2.*,>=0.0.0a0
+          - {matrix: null, packages: [*libcuvs_unsuffixed]}
+  depends_on_libraft:
+    common:
+      - output_types: conda
+        packages:
+          - &libraft_unsuffixed libraft==25.2.*,>=0.0.0a0
+      - output_types: requirements
+        packages:
+          # pip recognizes the index as a global option for the requirements.txt file
+          - --extra-index-url=https://pypi.nvidia.com
+          - --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple
+    specific:
+      - output_types: [requirements, pyproject]
+        matrices:
+          - matrix:
+              cuda: "12.*"
+              cuda_suffixed: "true"
+            packages:
+              - libraft-cu12==25.2.*,>=0.0.0a0
+          - matrix:
+              cuda: "11.*"
+              cuda_suffixed: "true"
+            packages:
+              - libraft-cu11==25.2.*,>=0.0.0a0
+          - {matrix: null, packages: [*libraft_unsuffixed]}
   depends_on_libraft_headers:
     common:
       - output_types: conda
@@ -603,7 +700,26 @@ dependencies:
     common:
       - output_types: conda
         packages:
-          - librmm==25.2.*,>=0.0.0a0
+          - &librmm_unsuffixed librmm==25.2.*,>=0.0.0a0
+      - output_types: requirements
+        packages:
+          # pip recognizes the index as a global option for the requirements.txt file
+          - --extra-index-url=https://pypi.nvidia.com
+          - --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple
+    specific:
+      - output_types: [requirements, pyproject]
+        matrices:
+          - matrix:
+              cuda: "12.*"
+              cuda_suffixed: "true"
+            packages:
+              - librmm-cu12==25.2.*,>=0.0.0a0
+          - matrix:
+              cuda: "11.*"
+              cuda_suffixed: "true"
+            packages:
+              - librmm-cu11==25.2.*,>=0.0.0a0
+          - {matrix: null, packages: [*librmm_unsuffixed]}
   depends_on_pylibraft:
     common:
       - output_types: conda
diff --git a/python/cuml/CMakeLists.txt b/python/cuml/CMakeLists.txt
index 7bda8d9a64..c491420d71 100644
--- a/python/cuml/CMakeLists.txt
+++ b/python/cuml/CMakeLists.txt
@@ -36,17 +36,12 @@ project(
 ################################################################################
 # - User Options  --------------------------------------------------------------
 option(CUML_UNIVERSAL "Build all cuML Python components." ON)
-option(FIND_CUML_CPP "Search for existing CUML C++ installations before defaulting to local files" OFF)
 option(SINGLEGPU "Disable all mnmg components and comms libraries" OFF)
-option(USE_CUDA_MATH_WHEELS "Use the CUDA math wheels instead of the system libraries" OFF)
-option(USE_CUVS_WHEEL "Use the cuVS wheel" OFF)
-set(CUML_RAFT_CLONE_ON_PIN OFF)
-
+option(USE_LIBCUML_WHEEL "Use libcuml wheel to provide some dependencies" OFF)
 
 # todo: use CMAKE_MESSAGE_CONTEXT for prefix for logging.
 # https://github.com/rapidsai/cuml/issues/4843
 message(VERBOSE "CUML_PY: Build only cuML CPU Python components.: ${CUML_CPU}")
-message(VERBOSE "CUML_PY: Searching for existing CUML C++ installations before defaulting to local files: ${FIND_CUML_CPP}")
 message(VERBOSE "CUML_PY: Disabling all mnmg components and comms libraries: ${SINGLEGPU}")
 
 set(CUML_ALGORITHMS "ALL" CACHE STRING "Choose which algorithms are built cuML. Can specify individual algorithms or groups in a semicolon-separated list.")
@@ -57,83 +52,11 @@ set(CUML_CPP_SRC "../../cpp")
 ################################################################################
 # - Process User Options  ------------------------------------------------------
 
-# If the user requested it, we attempt to find cuml.
-if(FIND_CUML_CPP)
-  # We need to call get_treelite explicitly because we need the correct
-  # ${TREELITE_LIBS} definition for RF
-  include(rapids-cpm)
-  include(rapids-export)
-  rapids_cpm_init()
-  find_package(cuml ${CUML_VERSION} REQUIRED)
-  include(${CUML_CPP_SRC}/cmake/thirdparty/get_treelite.cmake)
-else()
-  set(cuml_FOUND OFF)
-endif()
-
-include(rapids-cython-core)
-
-set(CUML_PYTHON_TREELITE_TARGET treelite::treelite)
-
-if(NOT CUML_CPU)
-  if(NOT cuml_FOUND)
-    find_package(CUDAToolkit REQUIRED)
-
-    set(BUILD_CUML_TESTS OFF)
-    set(BUILD_PRIMS_TESTS OFF)
-    set(BUILD_CUML_C_LIBRARY OFF)
-    set(BUILD_CUML_EXAMPLES OFF)
-    set(BUILD_CUML_BENCH OFF)
-    set(CUML_EXPORT_TREELITE_LINKAGE ON)
-    set(CUML_PYTHON_TREELITE_TARGET treelite::treelite_static)
-
-    # Statically link dependencies if building wheels
-    set(CUDA_STATIC_RUNTIME ON)
-    set(CUML_USE_CUVS_STATIC ON)
-    set(CUML_USE_TREELITE_STATIC ON)
-    set(CUML_USE_CUMLPRIMS_MG_STATIC ON)
-    # Link to the CUDA wheels with shared libraries for CUDA 12+
-    if(CUDAToolkit_VERSION VERSION_GREATER_EQUAL 12.0)
-      set(CUDA_STATIC_MATH_LIBRARIES OFF)
-    else()
-      if(USE_CUDA_MATH_WHEELS)
-        message(FATAL_ERROR "Cannot use CUDA math wheels with CUDA < 12.0")
-      endif()
-      set(CUDA_STATIC_MATH_LIBRARIES ON)
-    endif()
-    # Don't install the static libs into wheels
-    set(CUML_EXCLUDE_RAFT_FROM_ALL ON)
-    set(RAFT_EXCLUDE_FAISS_FROM_ALL ON)
-    set(CUML_EXCLUDE_TREELITE_FROM_ALL ON)
-    set(CUML_EXCLUDE_CUMLPRIMS_MG_FROM_ALL ON)
-
-    add_subdirectory(${CUML_CPP_SRC} cuml-cpp EXCLUDE_FROM_ALL)
-
-    if(NOT CUDA_STATIC_MATH_LIBRARIES AND USE_CUDA_MATH_WHEELS)
-      set(rpaths
-        "$ORIGIN/../nvidia/cublas/lib"
-        "$ORIGIN/../nvidia/cufft/lib"
-        "$ORIGIN/../nvidia/curand/lib"
-        "$ORIGIN/../nvidia/cusolver/lib"
-        "$ORIGIN/../nvidia/cusparse/lib"
-        "$ORIGIN/../nvidia/nvjitlink/lib"
-      )
-      set_property(TARGET ${CUML_CPP_TARGET} PROPERTY INSTALL_RPATH ${rpaths} APPEND)
-    endif()
-
-    if(USE_CUVS_WHEEL)
-      set(rpaths "$ORIGIN/../cuvs")
-      set_property(TARGET ${CUML_CPP_TARGET} PROPERTY INSTALL_RPATH ${rpaths} APPEND)
-    endif()
-
-    set(cython_lib_dir cuml)
-    install(TARGETS ${CUML_CPP_TARGET} DESTINATION ${cython_lib_dir})
-  endif()
-endif()
-
 if(CUML_CPU)
   set(CUML_UNIVERSAL OFF)
   set(SINGLEGPU ON)
 
+  # only a subset of algorithms are supported in CPU-only cuML
   set(CUML_ALGORITHMS "linearregression")
   list(APPEND CUML_ALGORITHMS "pca")
   list(APPEND CUML_ALGORITHMS "tsvd")
@@ -152,22 +75,49 @@ if(CUML_CPU)
 
   list(APPEND CYTHON_FLAGS
   "--compile-time-env GPUBUILD=0")
+
+# cuml-cpu does not need libcuml++.so
 else()
+
+  include(rapids-cpm)
+  include(rapids-export)
+  rapids_cpm_init()
+
+  # --- treelite --- #
+  # Need to call get_treelite explicitly because we need the correct
+  # ${TREELITE_LIBS} definition for RF.
+  #
+  # And because cuml Cython code needs the headers to satisfy calls like
+  # 'cdef extern from "treelite/c_api.h"'
+
+  # wheel builds use a static treelite, because the 'libtreelite.so' in 'treelite' wheels
+  # isn't intended for dynamic linking by third-party projects (e.g. hides its symbols)
+  if(USE_LIBCUML_WHEEL)
+    set(CUML_PYTHON_TREELITE_TARGET treelite::treelite_static)
+    set(CUML_USE_TREELITE_STATIC ON)
+  else()
+    set(CUML_PYTHON_TREELITE_TARGET treelite::treelite)
+    set(CUML_USE_TREELITE_STATIC OFF)
+  endif()
+
+  set(CUML_EXCLUDE_TREELITE_FROM_ALL ON)
+
+  include(${CUML_CPP_SRC}/cmake/thirdparty/get_treelite.cmake)
+
+  # --- libcuml --- #
+  find_package(cuml "${RAPIDS_VERSION}" REQUIRED)
+
   set(cuml_sg_libraries cuml::${CUML_CPP_TARGET})
   set(cuml_mg_libraries cuml::${CUML_CPP_TARGET})
 
+  if(NOT SINGLEGPU)
+    list(APPEND cuml_mg_libraries cumlprims_mg::cumlprims_mg)
+  endif()
+
   list(APPEND CYTHON_FLAGS
   "--compile-time-env GPUBUILD=1")
 endif()
 
-if(NOT SINGLEGPU)
-  include("${CUML_CPP_SRC}/cmake/thirdparty/get_cumlprims_mg.cmake")
-  set(cuml_mg_libraries
-    cuml::${CUML_CPP_TARGET}
-    cumlprims_mg::cumlprims_mg
-  )
-endif()
-
  ################################################################################
  # - Build Cython artifacts -----------------------------------------------------
 
@@ -182,6 +132,7 @@ endif()
 
 message(VERBOSE "CUML_PY: Building cuML with algorithms: '${CUML_ALGORITHMS}'.")
 
+include(rapids-cython-core)
 rapids_cython_init()
 
 add_subdirectory(cuml/common)
@@ -206,11 +157,3 @@ add_subdirectory(cuml/svm)
 add_subdirectory(cuml/tsa)
 
 add_subdirectory(cuml/experimental/linear_model)
-
-if(DEFINED cython_lib_dir)
-  rapids_cython_add_rpath_entries(TARGET cuml PATHS "${cython_lib_dir}")
-endif()
-
-if(USE_CUVS_WHEEL)
-  rapids_cython_add_rpath_entries(TARGET cuml PATHS cuvs)
-endif()
diff --git a/python/cuml/cuml/__init__.py b/python/cuml/cuml/__init__.py
index 62ab93c1b4..a8557c84fc 100644
--- a/python/cuml/cuml/__init__.py
+++ b/python/cuml/cuml/__init__.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2022-2023, NVIDIA CORPORATION.
+# Copyright (c) 2022-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -14,6 +14,16 @@
 # limitations under the License.
 #
 
+# If libcuml was installed as a wheel, we must request it to load the library symbols.
+# Otherwise, we assume that the library was installed in a system path that ld can find.
+try:
+    import libcuml
+except ModuleNotFoundError:
+    pass
+else:
+    libcuml.load_library()
+    del libcuml
+
 from cuml.internals.base import Base, UniversalBase
 from cuml.internals.available_devices import is_cuda_available
 
diff --git a/python/cuml/cuml/cluster/CMakeLists.txt b/python/cuml/cuml/cluster/CMakeLists.txt
index 43d15ae6fa..ac012ff510 100644
--- a/python/cuml/cuml/cluster/CMakeLists.txt
+++ b/python/cuml/cuml/cluster/CMakeLists.txt
@@ -1,5 +1,5 @@
 # =============================================================================
-# Copyright (c) 2022-2023, NVIDIA CORPORATION.
+# Copyright (c) 2022-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 # in compliance with the License. You may obtain a copy of the License at
@@ -31,5 +31,4 @@ rapids_cython_create_modules(
   SOURCE_FILES "${cython_sources}"
   LINKED_LIBRARIES "${cuml_mg_libraries}"
   MODULE_PREFIX cluster_
-  ASSOCIATED_TARGETS cuml
 )
diff --git a/python/cuml/cuml/cluster/hdbscan/CMakeLists.txt b/python/cuml/cuml/cluster/hdbscan/CMakeLists.txt
index 2c4b41909d..5a85a97b2d 100644
--- a/python/cuml/cuml/cluster/hdbscan/CMakeLists.txt
+++ b/python/cuml/cuml/cluster/hdbscan/CMakeLists.txt
@@ -1,5 +1,5 @@
 # =============================================================================
-# Copyright (c) 2022, NVIDIA CORPORATION.
+# Copyright (c) 2022-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 # in compliance with the License. You may obtain a copy of the License at
@@ -21,5 +21,4 @@ rapids_cython_create_modules(
         SOURCE_FILES "${cython_sources}"
         LINKED_LIBRARIES "${cuml_sg_libraries}"
         MODULE_PREFIX cluster_hdbscan_
-        ASSOCIATED_TARGETS cuml
 )
diff --git a/python/cuml/cuml/common/CMakeLists.txt b/python/cuml/cuml/common/CMakeLists.txt
index 1492dcd46a..df6cee81cb 100644
--- a/python/cuml/cuml/common/CMakeLists.txt
+++ b/python/cuml/cuml/common/CMakeLists.txt
@@ -1,5 +1,5 @@
 # =============================================================================
-# Copyright (c) 2022-2024, NVIDIA CORPORATION.
+# Copyright (c) 2022-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 # in compliance with the License. You may obtain a copy of the License at
@@ -28,7 +28,6 @@ rapids_cython_create_modules(
   SOURCE_FILES "${cython_sources}"
   LINKED_LIBRARIES "${cuml_mg_libraries}"
   MODULE_PREFIX common_
-  ASSOCIATED_TARGETS cuml
 )
 
 if(${CUML_UNIVERSAL})
diff --git a/python/cuml/cuml/datasets/CMakeLists.txt b/python/cuml/cuml/datasets/CMakeLists.txt
index 51d6614600..64c2b483f6 100644
--- a/python/cuml/cuml/datasets/CMakeLists.txt
+++ b/python/cuml/cuml/datasets/CMakeLists.txt
@@ -1,5 +1,5 @@
 # =============================================================================
-# Copyright (c) 2022-2023, NVIDIA CORPORATION.
+# Copyright (c) 2022-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 # in compliance with the License. You may obtain a copy of the License at
@@ -21,5 +21,4 @@ rapids_cython_create_modules(
   SOURCE_FILES "${cython_sources}"
   LINKED_LIBRARIES "${cuml_sg_libraries}"
   MODULE_PREFIX datasets_
-  ASSOCIATED_TARGETS cuml
 )
diff --git a/python/cuml/cuml/decomposition/CMakeLists.txt b/python/cuml/cuml/decomposition/CMakeLists.txt
index 2552c80d74..71f36d57af 100644
--- a/python/cuml/cuml/decomposition/CMakeLists.txt
+++ b/python/cuml/cuml/decomposition/CMakeLists.txt
@@ -1,5 +1,5 @@
 # =============================================================================
-# Copyright (c) 2022, NVIDIA CORPORATION.
+# Copyright (c) 2022-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 # in compliance with the License. You may obtain a copy of the License at
@@ -30,5 +30,4 @@ rapids_cython_create_modules(
   SOURCE_FILES "${cython_sources}"
   LINKED_LIBRARIES "${cuml_mg_libraries}"
   MODULE_PREFIX decomposition_
-  ASSOCIATED_TARGETS cuml
 )
diff --git a/python/cuml/cuml/ensemble/CMakeLists.txt b/python/cuml/cuml/ensemble/CMakeLists.txt
index e3732c1577..38999083e6 100644
--- a/python/cuml/cuml/ensemble/CMakeLists.txt
+++ b/python/cuml/cuml/ensemble/CMakeLists.txt
@@ -1,5 +1,5 @@
 # =============================================================================
-# Copyright (c) 2022-2023, NVIDIA CORPORATION.
+# Copyright (c) 2022-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 # in compliance with the License. You may obtain a copy of the License at
@@ -27,5 +27,4 @@ rapids_cython_create_modules(
   SOURCE_FILES "${cython_sources}"
   LINKED_LIBRARIES "${linked_libraries}"
   MODULE_PREFIX ensemble_
-  ASSOCIATED_TARGETS cuml
 )
diff --git a/python/cuml/cuml/experimental/fil/CMakeLists.txt b/python/cuml/cuml/experimental/fil/CMakeLists.txt
index f558a47620..d2baab6642 100644
--- a/python/cuml/cuml/experimental/fil/CMakeLists.txt
+++ b/python/cuml/cuml/experimental/fil/CMakeLists.txt
@@ -1,5 +1,5 @@
 # =============================================================================
-# Copyright (c) 2022-2023, NVIDIA CORPORATION.
+# Copyright (c) 2022-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 # in compliance with the License. You may obtain a copy of the License at
@@ -26,5 +26,4 @@ rapids_cython_create_modules(
   SOURCE_FILES "${cython_sources}"
   LINKED_LIBRARIES "${linked_libraries}"
   MODULE_PREFIX experimental_fil_
-  ASSOCIATED_TARGETS cuml
 )
diff --git a/python/cuml/cuml/experimental/linear_model/CMakeLists.txt b/python/cuml/cuml/experimental/linear_model/CMakeLists.txt
index 6c52c3cd16..eb367d9a56 100644
--- a/python/cuml/cuml/experimental/linear_model/CMakeLists.txt
+++ b/python/cuml/cuml/experimental/linear_model/CMakeLists.txt
@@ -1,5 +1,5 @@
 # =============================================================================
-# Copyright (c) 2022-2023, NVIDIA CORPORATION.
+# Copyright (c) 2022-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 # in compliance with the License. You may obtain a copy of the License at
@@ -22,5 +22,4 @@ rapids_cython_create_modules(
   SOURCE_FILES "${cython_sources}"
   LINKED_LIBRARIES "${cuml_sg_libraries}"
   MODULE_PREFIX experimental_
-  ASSOCIATED_TARGETS cuml
 )
diff --git a/python/cuml/cuml/explainer/CMakeLists.txt b/python/cuml/cuml/explainer/CMakeLists.txt
index e982fb1264..213bb74a78 100644
--- a/python/cuml/cuml/explainer/CMakeLists.txt
+++ b/python/cuml/cuml/explainer/CMakeLists.txt
@@ -1,5 +1,5 @@
 # =============================================================================
-# Copyright (c) 2022-2023, NVIDIA CORPORATION.
+# Copyright (c) 2022-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 # in compliance with the License. You may obtain a copy of the License at
@@ -28,5 +28,4 @@ rapids_cython_create_modules(
   SOURCE_FILES "${cython_sources}"
   LINKED_LIBRARIES "${linked_libraries}"
   MODULE_PREFIX explainer_
-  ASSOCIATED_TARGETS cuml
 )
diff --git a/python/cuml/cuml/fil/CMakeLists.txt b/python/cuml/cuml/fil/CMakeLists.txt
index 54e2df2cd4..816e8aa7c8 100644
--- a/python/cuml/cuml/fil/CMakeLists.txt
+++ b/python/cuml/cuml/fil/CMakeLists.txt
@@ -1,5 +1,5 @@
 # =============================================================================
-# Copyright (c) 2022-2023, NVIDIA CORPORATION.
+# Copyright (c) 2022-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 # in compliance with the License. You may obtain a copy of the License at
@@ -27,5 +27,4 @@ rapids_cython_create_modules(
   SOURCE_FILES "${cython_sources}"
   LINKED_LIBRARIES "${linked_libraries}"
   MODULE_PREFIX fil_
-  ASSOCIATED_TARGETS cuml
 )
diff --git a/python/cuml/cuml/internals/CMakeLists.txt b/python/cuml/cuml/internals/CMakeLists.txt
index d4ebfdd01d..a363fb7323 100644
--- a/python/cuml/cuml/internals/CMakeLists.txt
+++ b/python/cuml/cuml/internals/CMakeLists.txt
@@ -1,5 +1,5 @@
 # =============================================================================
-# Copyright (c) 2022-2023, NVIDIA CORPORATION.
+# Copyright (c) 2022-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 # in compliance with the License. You may obtain a copy of the License at
@@ -26,7 +26,6 @@ rapids_cython_create_modules(
   SOURCE_FILES "${cython_sources}"
   LINKED_LIBRARIES "${cuml_sg_libraries}"
   MODULE_PREFIX internals_
-  ASSOCIATED_TARGETS cuml
 )
 
 # We need to include for callbacks_implements.h in the internals folder
diff --git a/python/cuml/cuml/kernel_ridge/CMakeLists.txt b/python/cuml/cuml/kernel_ridge/CMakeLists.txt
index 75421a1752..1bf0d0a1e0 100644
--- a/python/cuml/cuml/kernel_ridge/CMakeLists.txt
+++ b/python/cuml/cuml/kernel_ridge/CMakeLists.txt
@@ -1,5 +1,5 @@
 # =============================================================================
-# Copyright (c) 2022-2023, NVIDIA CORPORATION.
+# Copyright (c) 2022-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 # in compliance with the License. You may obtain a copy of the License at
@@ -22,5 +22,4 @@ rapids_cython_create_modules(
   SOURCE_FILES "${cython_sources}"
   LINKED_LIBRARIES "${cuml_sg_libraries}"
   MODULE_PREFIX kernel_ridge_
-  ASSOCIATED_TARGETS cuml
 )
diff --git a/python/cuml/cuml/linear_model/CMakeLists.txt b/python/cuml/cuml/linear_model/CMakeLists.txt
index aa72642453..cfa0c3ab05 100644
--- a/python/cuml/cuml/linear_model/CMakeLists.txt
+++ b/python/cuml/cuml/linear_model/CMakeLists.txt
@@ -1,5 +1,5 @@
 # =============================================================================
-# Copyright (c) 2022-2023, NVIDIA CORPORATION.
+# Copyright (c) 2022-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 # in compliance with the License. You may obtain a copy of the License at
@@ -37,5 +37,4 @@ rapids_cython_create_modules(
   SOURCE_FILES "${cython_sources}"
   LINKED_LIBRARIES "${cuml_mg_libraries}"
   MODULE_PREFIX linear_model_
-  ASSOCIATED_TARGETS cuml
 )
diff --git a/python/cuml/cuml/manifold/CMakeLists.txt b/python/cuml/cuml/manifold/CMakeLists.txt
index 115705d9af..0ca860afe7 100644
--- a/python/cuml/cuml/manifold/CMakeLists.txt
+++ b/python/cuml/cuml/manifold/CMakeLists.txt
@@ -1,5 +1,5 @@
 # =============================================================================
-# Copyright (c) 2022-2023, NVIDIA CORPORATION.
+# Copyright (c) 2022-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 # in compliance with the License. You may obtain a copy of the License at
@@ -25,5 +25,4 @@ rapids_cython_create_modules(
   SOURCE_FILES "${cython_sources}"
   LINKED_LIBRARIES "${cuml_sg_libraries}"
   MODULE_PREFIX manifold_
-  ASSOCIATED_TARGETS cuml
 )
diff --git a/python/cuml/cuml/metrics/CMakeLists.txt b/python/cuml/cuml/metrics/CMakeLists.txt
index 0a6e789c13..a56575ccb8 100644
--- a/python/cuml/cuml/metrics/CMakeLists.txt
+++ b/python/cuml/cuml/metrics/CMakeLists.txt
@@ -1,5 +1,5 @@
 # =============================================================================
-# Copyright (c) 2022-2023, NVIDIA CORPORATION.
+# Copyright (c) 2022-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 # in compliance with the License. You may obtain a copy of the License at
@@ -27,5 +27,4 @@ rapids_cython_create_modules(
   SOURCE_FILES "${cython_sources}"
   LINKED_LIBRARIES "${cuml_sg_libraries}"
   MODULE_PREFIX metrics_
-  ASSOCIATED_TARGETS cuml
 )
diff --git a/python/cuml/cuml/metrics/cluster/CMakeLists.txt b/python/cuml/cuml/metrics/cluster/CMakeLists.txt
index a81708674c..fbbde707f4 100644
--- a/python/cuml/cuml/metrics/cluster/CMakeLists.txt
+++ b/python/cuml/cuml/metrics/cluster/CMakeLists.txt
@@ -1,5 +1,5 @@
 # =============================================================================
-# Copyright (c) 2022-2023, NVIDIA CORPORATION.
+# Copyright (c) 2022-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 # in compliance with the License. You may obtain a copy of the License at
@@ -28,5 +28,4 @@ rapids_cython_create_modules(
   CXX
   SOURCE_FILES "${cython_sources}"
   LINKED_LIBRARIES "${cuml_sg_libraries}"
-  ASSOCIATED_TARGETS cuml
 )
diff --git a/python/cuml/cuml/neighbors/CMakeLists.txt b/python/cuml/cuml/neighbors/CMakeLists.txt
index dbb23550aa..6658ddc5f2 100644
--- a/python/cuml/cuml/neighbors/CMakeLists.txt
+++ b/python/cuml/cuml/neighbors/CMakeLists.txt
@@ -1,5 +1,5 @@
 # =============================================================================
-# Copyright (c) 2022-2024, NVIDIA CORPORATION.
+# Copyright (c) 2022-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 # in compliance with the License. You may obtain a copy of the License at
@@ -32,5 +32,4 @@ rapids_cython_create_modules(
   SOURCE_FILES "${cython_sources}"
   LINKED_LIBRARIES "${cuml_mg_libraries}"
   MODULE_PREFIX neighbors_
-  ASSOCIATED_TARGETS cuml
 )
diff --git a/python/cuml/cuml/random_projection/CMakeLists.txt b/python/cuml/cuml/random_projection/CMakeLists.txt
index f4e54397ac..012382a1bb 100644
--- a/python/cuml/cuml/random_projection/CMakeLists.txt
+++ b/python/cuml/cuml/random_projection/CMakeLists.txt
@@ -1,5 +1,5 @@
 # =============================================================================
-# Copyright (c) 2022-2023, NVIDIA CORPORATION.
+# Copyright (c) 2022-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 # in compliance with the License. You may obtain a copy of the License at
@@ -22,5 +22,4 @@ rapids_cython_create_modules(
   SOURCE_FILES "${cython_sources}"
   LINKED_LIBRARIES "${cuml_sg_libraries}"
   MODULE_PREFIX random_projection_
-  ASSOCIATED_TARGETS cuml
 )
diff --git a/python/cuml/cuml/solvers/CMakeLists.txt b/python/cuml/cuml/solvers/CMakeLists.txt
index a6eada58a9..ad83508254 100644
--- a/python/cuml/cuml/solvers/CMakeLists.txt
+++ b/python/cuml/cuml/solvers/CMakeLists.txt
@@ -1,5 +1,5 @@
 # =============================================================================
-# Copyright (c) 2022-2023, NVIDIA CORPORATION.
+# Copyright (c) 2022-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 # in compliance with the License. You may obtain a copy of the License at
@@ -31,5 +31,4 @@ rapids_cython_create_modules(
   SOURCE_FILES "${cython_sources}"
   LINKED_LIBRARIES "${cuml_mg_libraries}"
   MODULE_PREFIX solvers_
-  ASSOCIATED_TARGETS cuml
 )
diff --git a/python/cuml/cuml/svm/CMakeLists.txt b/python/cuml/cuml/svm/CMakeLists.txt
index 3b9ab0e199..5d19df1a4a 100644
--- a/python/cuml/cuml/svm/CMakeLists.txt
+++ b/python/cuml/cuml/svm/CMakeLists.txt
@@ -1,5 +1,5 @@
 # =============================================================================
-# Copyright (c) 2022-2023, NVIDIA CORPORATION.
+# Copyright (c) 2022-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 # in compliance with the License. You may obtain a copy of the License at
@@ -25,5 +25,4 @@ rapids_cython_create_modules(
   SOURCE_FILES "${cython_sources}"
   LINKED_LIBRARIES "${cuml_sg_libraries}"
   MODULE_PREFIX svm_
-  ASSOCIATED_TARGETS cuml
 )
diff --git a/python/cuml/cuml/tsa/CMakeLists.txt b/python/cuml/cuml/tsa/CMakeLists.txt
index 3cbe54bded..92552e9ff4 100644
--- a/python/cuml/cuml/tsa/CMakeLists.txt
+++ b/python/cuml/cuml/tsa/CMakeLists.txt
@@ -1,5 +1,5 @@
 # =============================================================================
-# Copyright (c) 2022-2023, NVIDIA CORPORATION.
+# Copyright (c) 2022-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 # in compliance with the License. You may obtain a copy of the License at
@@ -27,5 +27,4 @@ rapids_cython_create_modules(
   SOURCE_FILES "${cython_sources}"
   LINKED_LIBRARIES "${cuml_sg_libraries}"
   MODULE_PREFIX tsa_
-  ASSOCIATED_TARGETS cuml
 )
diff --git a/python/cuml/pyproject.toml b/python/cuml/pyproject.toml
index c945827a2c..9fb00fd653 100644
--- a/python/cuml/pyproject.toml
+++ b/python/cuml/pyproject.toml
@@ -24,8 +24,8 @@ select = [
     "distro-too-large-compressed",
 ]
 
-# detect when package size grows significantly
-max_allowed_size_compressed = '1.5G'
+# PyPI limit is 100 MiB, fail CI before we get too close to that
+max_allowed_size_compressed = '75M'
 
 [tool.pytest.ini_options]
 addopts = "--tb=native"
@@ -99,6 +99,7 @@ dependencies = [
     "dask-cuda==25.2.*,>=0.0.0a0",
     "dask-cudf==25.2.*,>=0.0.0a0",
     "joblib>=0.11",
+    "libcuml==25.2.*,>=0.0.0a0",
     "numba>=0.59.1,<0.61.0a0",
     "numpy>=1.23,<3.0a0",
     "nvidia-cublas",
@@ -179,8 +180,11 @@ matrix-entry = "cuda_suffixed=true;use_cuda_wheels=true"
 requires = [
     "cmake>=3.26.4,!=3.30.0",
     "cuda-python",
-    "cuvs==25.2.*,>=0.0.0a0",
     "cython>=3.0.0",
+    "libcuml==25.2.*,>=0.0.0a0",
+    "libcuvs==25.2.*,>=0.0.0a0",
+    "libraft==25.2.*,>=0.0.0a0",
+    "librmm==25.2.*,>=0.0.0a0",
     "ninja",
     "pylibraft==25.2.*,>=0.0.0a0",
     "rmm==25.2.*,>=0.0.0a0",
diff --git a/python/libcuml/CMakeLists.txt b/python/libcuml/CMakeLists.txt
new file mode 100644
index 0000000000..62bc665e4a
--- /dev/null
+++ b/python/libcuml/CMakeLists.txt
@@ -0,0 +1,89 @@
+# =============================================================================
+# Copyright (c) 2025, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under the License
+# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+# or implied. See the License for the specific language governing permissions and limitations under
+# the License.
+# =============================================================================
+
+cmake_minimum_required(VERSION 3.26.4 FATAL_ERROR)
+
+include(../../rapids_config.cmake)
+
+include(rapids-cuda)
+rapids_cuda_init_architectures(libcuml-python)
+
+project(
+  libcuml-python
+  VERSION "${RAPIDS_VERSION}"
+  LANGUAGES CXX CUDA
+)
+
+# Check if cuml is already available. If so, it is the user's responsibility to ensure that the
+# CMake package is also available at build time of the Python cuml package.
+find_package(cuml "${RAPIDS_VERSION}")
+
+if(cuml_FOUND)
+  return()
+endif()
+
+unset(cuml_FOUND)
+
+# --- cuML --- #
+set(BUILD_CUML_TESTS OFF)
+set(BUILD_PRIMS_TESTS OFF)
+set(BUILD_CUML_C_LIBRARY OFF)
+set(BUILD_CUML_EXAMPLES OFF)
+set(BUILD_CUML_BENCH OFF)
+
+# In libcuml wheels, we always want to build in all cuML algorithms.
+# This is the default in cpp/CMakeLists.txt, but just making that choice for wheels explicit here.
+set(CUML_ALGORITHMS "ALL" CACHE STRING "Choose which algorithms are built cuML. Can specify individual algorithms or groups in a semicolon-separated list.")
+
+# for libcuml wheels, always compile in the multi-node, multi-GPU stuff from cumlprims_mg
+set(SINGLEGPU OFF)
+
+set(CUML_CPP_TARGET "cuml++")
+set(CUML_CPP_SRC "../../cpp")
+
+# --- cumlprims_mg --- #
+# ship cumlprims_mg in the 'libcuml' wheel (for re-use by 'cuml' wheels)
+set(CUML_USE_CUMLPRIMS_MG_STATIC OFF)
+set(CUML_EXCLUDE_CUMLPRIMS_MG_FROM_ALL OFF)
+
+# --- cuVS --- #
+set(CUML_USE_CUVS_STATIC OFF)
+set(CUML_EXCLUDE_CUVS_FROM_ALL ON)
+
+# --- raft --- #
+set(CUML_RAFT_CLONE_ON_PIN OFF)
+set(CUML_EXCLUDE_RAFT_FROM_ALL ON)
+
+# --- treelite --- #
+set(CUML_EXPORT_TREELITE_LINKAGE ON)
+set(CUML_PYTHON_TREELITE_TARGET treelite::treelite_static)
+set(CUML_USE_TREELITE_STATIC ON)
+set(CUML_EXCLUDE_TREELITE_FROM_ALL ON)
+
+# --- CUDA --- #
+set(CUDA_STATIC_RUNTIME ON)
+set(CUDA_STATIC_MATH_LIBRARIES OFF)
+
+add_subdirectory(../../cpp cuml-cpp)
+
+# assumes libcuml++ is installed 2 levels deep, e.g. site-packages/cuml/lib64/libcuml++.so
+set(rpaths
+  "$ORIGIN/../../nvidia/cublas/lib"
+  "$ORIGIN/../../nvidia/cufft/lib"
+  "$ORIGIN/../../nvidia/curand/lib"
+  "$ORIGIN/../../nvidia/cusolver/lib"
+  "$ORIGIN/../../nvidia/cusparse/lib"
+  "$ORIGIN/../../nvidia/nvjitlink/lib"
+)
+set_property(TARGET ${CUML_CPP_TARGET} PROPERTY INSTALL_RPATH ${rpaths} APPEND)
diff --git a/python/libcuml/LICENSE b/python/libcuml/LICENSE
new file mode 120000
index 0000000000..30cff7403d
--- /dev/null
+++ b/python/libcuml/LICENSE
@@ -0,0 +1 @@
+../../LICENSE
\ No newline at end of file
diff --git a/python/libcuml/README.md b/python/libcuml/README.md
new file mode 120000
index 0000000000..fe84005413
--- /dev/null
+++ b/python/libcuml/README.md
@@ -0,0 +1 @@
+../../README.md
\ No newline at end of file
diff --git a/python/libcuml/libcuml/VERSION b/python/libcuml/libcuml/VERSION
new file mode 120000
index 0000000000..d62dc733ef
--- /dev/null
+++ b/python/libcuml/libcuml/VERSION
@@ -0,0 +1 @@
+../../../VERSION
\ No newline at end of file
diff --git a/python/libcuml/libcuml/__init__.py b/python/libcuml/libcuml/__init__.py
new file mode 100644
index 0000000000..69d95c8423
--- /dev/null
+++ b/python/libcuml/libcuml/__init__.py
@@ -0,0 +1,16 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from libcuml._version import __git_commit__, __version__
+from libcuml.load import load_library
diff --git a/python/libcuml/libcuml/_version.py b/python/libcuml/libcuml/_version.py
new file mode 100644
index 0000000000..da66c0d576
--- /dev/null
+++ b/python/libcuml/libcuml/_version.py
@@ -0,0 +1,30 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import importlib.resources
+
+__version__ = (
+    importlib.resources.files(__package__).joinpath("VERSION").read_text().strip()
+)
+try:
+    __git_commit__ = (
+        importlib.resources.files(__package__)
+        .joinpath("GIT_COMMIT")
+        .read_text()
+        .strip()
+    )
+except FileNotFoundError:
+    __git_commit__ = ""
+
+__all__ = ["__git_commit__", "__version__"]
diff --git a/python/libcuml/libcuml/load.py b/python/libcuml/libcuml/load.py
new file mode 100644
index 0000000000..45b38afb3f
--- /dev/null
+++ b/python/libcuml/libcuml/load.py
@@ -0,0 +1,117 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import ctypes
+import os
+
+# Loading with RTLD_LOCAL adds the library itself to the loader's
+# loaded library cache without loading any symbols into the global
+# namespace. This allows libraries that express a dependency on
+# this library to be loaded later and successfully satisfy this dependency
+# without polluting the global symbol table with symbols from
+# libcuml that could conflict with symbols from other DSOs.
+PREFERRED_LOAD_FLAG = ctypes.RTLD_LOCAL
+
+
+def _load_system_installation(soname: str):
+    """Try to dlopen() the library indicated by ``soname``
+    Raises ``OSError`` if library cannot be loaded.
+    """
+    return ctypes.CDLL(soname, PREFERRED_LOAD_FLAG)
+
+
+def _load_wheel_installation(soname: str):
+    """Try to dlopen() the library indicated by ``soname``
+
+    Returns ``None`` if the library cannot be loaded.
+    """
+    if os.path.isfile(
+        lib := os.path.join(os.path.dirname(__file__), "lib64", soname)
+    ):
+        return ctypes.CDLL(lib, PREFERRED_LOAD_FLAG)
+    return None
+
+
+def load_library():
+    """Dynamically load libcuml++.so and its dependencies"""
+    try:
+        # libraft must be loaded before libcuml++ because libcuml++
+        # references its symbols
+        import libraft
+
+        libraft.load_library()
+    except ModuleNotFoundError:
+        # 'libcuml++' has a runtime dependency on 'libraft'. However,
+        # that dependency might be satisfied by the 'libraft' conda package
+        # (which does not have any Python modules), instead of the
+        # 'libraft' wheel.
+        #
+        # In that situation, assume that 'libraft.so' is in a place where
+        # the loader can find it.
+        pass
+
+    try:
+        # libcuvs must be loaded before libcuml++ because libcuml++
+        # references its symbols
+        import libcuvs
+
+        libcuvs.load_library()
+    except ModuleNotFoundError:
+        # 'libcuml++' has a runtime dependency on 'libcuvs'. However,
+        # that dependency might be satisfied by the 'libcuvs' conda package
+        # (which does not have any Python modules), instead of the
+        # 'libcuvs' wheel.
+        #
+        # In that situation, assume that 'libcuvs.so' is in a place where
+        # the loader can find it.
+        pass
+
+    prefer_system_installation = (
+        os.getenv("RAPIDS_LIBCUML_PREFER_SYSTEM_LIBRARY", "false").lower()
+        != "false"
+    )
+
+    libs_to_return = []
+    for soname in ["libcumlprims_mg.so", "libcuml++.so"]:
+        libcuml_lib = None
+        if prefer_system_installation:
+            # Prefer a system library if one is present to
+            # avoid clobbering symbols that other packages might expect, but if no
+            # other library is present use the one in the wheel.
+            try:
+                libcuml_lib = _load_system_installation(soname)
+            except OSError:
+                libcuml_lib = _load_wheel_installation(soname)
+        else:
+            # Prefer the libraries bundled in this package. If they aren't found
+            # (which might be the case in builds where the library was prebuilt before
+            # packaging the wheel), look for a system installation.
+            try:
+                libcuml_lib = _load_wheel_installation(soname)
+                if libcuml_lib is None:
+                    libcuml_lib = _load_system_installation(soname)
+            except OSError:
+                # If none of the searches above succeed, just silently return None
+                # and rely on other mechanisms (like RPATHs on other DSOs) to
+                # help the loader find the library.
+                pass
+        if libcuml_lib:
+            libs_to_return.append(libcuml_lib)
+
+    # The caller almost never needs to do anything with these libraries, but no
+    # harm in offering the option since these objects at least provide handles
+    # to inspect where libcuml was loaded from.
+
+    return libs_to_return
diff --git a/python/libcuml/pyproject.toml b/python/libcuml/pyproject.toml
new file mode 100644
index 0000000000..9d1530c1ad
--- /dev/null
+++ b/python/libcuml/pyproject.toml
@@ -0,0 +1,89 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+[build-system]
+build-backend = "rapids_build_backend.build"
+requires = [
+    "rapids-build-backend>=0.3.0,<0.4.0.dev0",
+    "scikit-build-core[pyproject]>=0.10.0",
+] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
+
+[project]
+name = "libcuml"
+dynamic = ["version"]
+description = "cuML - RAPIDS ML Algorithms (C++)"
+readme = { file = "README.md", content-type = "text/markdown" }
+authors = [
+    { name = "NVIDIA Corporation" },
+]
+license = { text = "Apache 2.0" }
+requires-python = ">=3.10"
+classifiers = [
+    "Intended Audience :: Developers",
+    "Topic :: Scientific/Engineering",
+    "License :: OSI Approved :: Apache Software License",
+    "Programming Language :: C++",
+    "Environment :: GPU :: NVIDIA CUDA",
+]
+dependencies = [
+    "libcuvs==25.2.*,>=0.0.0a0",
+    "libraft==25.2.*,>=0.0.0a0",
+    "nvidia-cublas",
+    "nvidia-cufft",
+    "nvidia-curand",
+    "nvidia-cusolver",
+    "nvidia-cusparse",
+] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
+
+[project.urls]
+Homepage = "https://github.com/rapidsai/cuml"
+
+[project.entry-points."cmake.prefix"]
+libcuml= "libcuml"
+
+[tool.pydistcheck]
+select = [
+    "distro-too-large-compressed",
+]
+
+# detect when package size grows significantly
+max_allowed_size_compressed = '500M'
+
+[tool.scikit-build]
+build-dir = "build/{wheel_tag}"
+cmake.build-type = "Release"
+cmake.version = "CMakeLists.txt"
+minimum-version = "build-system.requires"
+ninja.make-fallback = true
+sdist.reproducible = true
+wheel.packages = ["libcuml"]
+wheel.install-dir = "libcuml"
+wheel.py-api = "py3"
+
+[tool.scikit-build.metadata.version]
+provider = "scikit_build_core.metadata.regex"
+input = "libcuml/VERSION"
+regex = "(?P<value>.*)"
+
+[tool.rapids-build-backend]
+build-backend = "scikit_build_core.build"
+dependencies-file = "../../dependencies.yaml"
+matrix-entry = "cuda_suffixed=true;use_cuda_wheels=true"
+requires = [
+    "cmake>=3.26.4,!=3.30.0",
+    "libcuvs==25.2.*,>=0.0.0a0",
+    "libraft==25.2.*,>=0.0.0a0",
+    "librmm==25.2.*,>=0.0.0a0",
+    "ninja",
+] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.

From c262f1f4e536517bd0520503ef8cc44044a74ec2 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Fri, 24 Jan 2025 11:33:26 -0600
Subject: [PATCH 10/15] Rename cpp/test to cpp/tests. (#6237)

Renames `test` directories to `tests` for alignment with the rest of RAPIDS.

See also: https://github.com/rapidsai/build-planning/issues/140

Authors:
  - Bradley Dice (https://github.com/bdice)

Approvers:
  - Dante Gama Dessavre (https://github.com/dantegd)

URL: https://github.com/rapidsai/cuml/pull/6237
---
 cpp/CMakeLists.txt                                            | 2 +-
 cpp/{test => tests}/.gitkeep                                  | 0
 cpp/{test => tests}/CMakeLists.txt                            | 0
 cpp/{test => tests}/c_api/README.md                           | 0
 cpp/{test => tests}/c_api/dbscan_api_test.c                   | 0
 cpp/{test => tests}/c_api/glm_api_test.c                      | 0
 cpp/{test => tests}/c_api/holtwinters_api_test.c              | 0
 cpp/{test => tests}/c_api/knn_api_test.c                      | 0
 cpp/{test => tests}/c_api/svm_api_test.c                      | 0
 cpp/{test => tests}/mg/kmeans_test.cu                         | 0
 cpp/{test => tests}/mg/knn.cu                                 | 0
 cpp/{test => tests}/mg/knn_classify.cu                        | 0
 cpp/{test => tests}/mg/knn_regress.cu                         | 0
 cpp/{test => tests}/mg/knn_test_helper.cuh                    | 0
 cpp/{test => tests}/mg/main.cu                                | 0
 cpp/{test => tests}/mg/pca.cu                                 | 0
 cpp/{test => tests}/mg/test_opg_utils.h                       | 0
 cpp/{test => tests}/prims/add_sub_dev_scalar.cu               | 0
 cpp/{test => tests}/prims/batched/csr.cu                      | 0
 cpp/{test => tests}/prims/batched/gemv.cu                     | 0
 cpp/{test => tests}/prims/batched/make_symm.cu                | 0
 cpp/{test => tests}/prims/batched/matrix.cu                   | 0
 cpp/{test => tests}/prims/decoupled_lookback.cu               | 0
 cpp/{test => tests}/prims/device_utils.cu                     | 0
 cpp/{test => tests}/prims/dist_adj.cu                         | 0
 cpp/{test => tests}/prims/distance_base.cuh                   | 0
 cpp/{test => tests}/prims/eltwise2d.cu                        | 0
 cpp/{test => tests}/prims/fast_int_div.cu                     | 0
 cpp/{test => tests}/prims/fillna.cu                           | 0
 cpp/{test => tests}/prims/grid_sync.cu                        | 0
 cpp/{test => tests}/prims/hinge.cu                            | 0
 cpp/{test => tests}/prims/jones_transform.cu                  | 0
 cpp/{test => tests}/prims/knn_classify.cu                     | 0
 cpp/{test => tests}/prims/knn_regression.cu                   | 0
 cpp/{test => tests}/prims/kselection.cu                       | 0
 cpp/{test => tests}/prims/linalg_block.cu                     | 0
 cpp/{test => tests}/prims/linalg_naive.h                      | 0
 cpp/{test => tests}/prims/linearReg.cu                        | 0
 cpp/{test => tests}/prims/log.cu                              | 0
 cpp/{test => tests}/prims/logisticReg.cu                      | 0
 cpp/{test => tests}/prims/make_arima.cu                       | 0
 cpp/{test => tests}/prims/penalty.cu                          | 0
 cpp/{test => tests}/prims/sigmoid.cu                          | 0
 cpp/{test => tests}/prims/test_utils.h                        | 0
 cpp/{test => tests}/sg/cd_test.cu                             | 0
 cpp/{test => tests}/sg/dbscan_test.cu                         | 0
 cpp/{test => tests}/sg/experimental/fil/raft_proto/buffer.cpp | 0
 cpp/{test => tests}/sg/experimental/fil/raft_proto/buffer.cu  | 0
 cpp/{test => tests}/sg/fil_child_index_test.cu                | 0
 cpp/{test => tests}/sg/fil_test.cu                            | 0
 cpp/{test => tests}/sg/fnv_hash_test.cpp                      | 0
 cpp/{test => tests}/sg/genetic/evolution_test.cu              | 0
 cpp/{test => tests}/sg/genetic/node_test.cpp                  | 0
 cpp/{test => tests}/sg/genetic/param_test.cu                  | 0
 cpp/{test => tests}/sg/genetic/program_test.cu                | 0
 cpp/{test => tests}/sg/handle_test.cu                         | 0
 cpp/{test => tests}/sg/hdbscan_inputs.hpp                     | 0
 cpp/{test => tests}/sg/hdbscan_test.cu                        | 0
 cpp/{test => tests}/sg/holtwinters_test.cu                    | 0
 cpp/{test => tests}/sg/knn_test.cu                            | 0
 cpp/{test => tests}/sg/lars_test.cu                           | 0
 cpp/{test => tests}/sg/linear_svm_test.cu                     | 0
 cpp/{test => tests}/sg/linkage_test.cu                        | 0
 cpp/{test => tests}/sg/logger.cpp                             | 0
 cpp/{test => tests}/sg/multi_sum_test.cu                      | 0
 cpp/{test => tests}/sg/ols.cu                                 | 0
 cpp/{test => tests}/sg/pca_test.cu                            | 0
 cpp/{test => tests}/sg/quasi_newton.cu                        | 0
 cpp/{test => tests}/sg/rf_test.cu                             | 0
 cpp/{test => tests}/sg/ridge.cu                               | 0
 cpp/{test => tests}/sg/rproj_test.cu                          | 0
 cpp/{test => tests}/sg/sgd.cu                                 | 0
 cpp/{test => tests}/sg/shap_kernel.cu                         | 0
 cpp/{test => tests}/sg/svc_test.cu                            | 0
 cpp/{test => tests}/sg/time_series_datasets.h                 | 0
 cpp/{test => tests}/sg/trustworthiness_test.cu                | 0
 cpp/{test => tests}/sg/tsne_test.cu                           | 0
 cpp/{test => tests}/sg/tsvd_test.cu                           | 0
 cpp/{test => tests}/sg/umap_parametrizable_test.cu            | 0
 79 files changed, 1 insertion(+), 1 deletion(-)
 rename cpp/{test => tests}/.gitkeep (100%)
 rename cpp/{test => tests}/CMakeLists.txt (100%)
 rename cpp/{test => tests}/c_api/README.md (100%)
 rename cpp/{test => tests}/c_api/dbscan_api_test.c (100%)
 rename cpp/{test => tests}/c_api/glm_api_test.c (100%)
 rename cpp/{test => tests}/c_api/holtwinters_api_test.c (100%)
 rename cpp/{test => tests}/c_api/knn_api_test.c (100%)
 rename cpp/{test => tests}/c_api/svm_api_test.c (100%)
 rename cpp/{test => tests}/mg/kmeans_test.cu (100%)
 rename cpp/{test => tests}/mg/knn.cu (100%)
 rename cpp/{test => tests}/mg/knn_classify.cu (100%)
 rename cpp/{test => tests}/mg/knn_regress.cu (100%)
 rename cpp/{test => tests}/mg/knn_test_helper.cuh (100%)
 rename cpp/{test => tests}/mg/main.cu (100%)
 rename cpp/{test => tests}/mg/pca.cu (100%)
 rename cpp/{test => tests}/mg/test_opg_utils.h (100%)
 rename cpp/{test => tests}/prims/add_sub_dev_scalar.cu (100%)
 rename cpp/{test => tests}/prims/batched/csr.cu (100%)
 rename cpp/{test => tests}/prims/batched/gemv.cu (100%)
 rename cpp/{test => tests}/prims/batched/make_symm.cu (100%)
 rename cpp/{test => tests}/prims/batched/matrix.cu (100%)
 rename cpp/{test => tests}/prims/decoupled_lookback.cu (100%)
 rename cpp/{test => tests}/prims/device_utils.cu (100%)
 rename cpp/{test => tests}/prims/dist_adj.cu (100%)
 rename cpp/{test => tests}/prims/distance_base.cuh (100%)
 rename cpp/{test => tests}/prims/eltwise2d.cu (100%)
 rename cpp/{test => tests}/prims/fast_int_div.cu (100%)
 rename cpp/{test => tests}/prims/fillna.cu (100%)
 rename cpp/{test => tests}/prims/grid_sync.cu (100%)
 rename cpp/{test => tests}/prims/hinge.cu (100%)
 rename cpp/{test => tests}/prims/jones_transform.cu (100%)
 rename cpp/{test => tests}/prims/knn_classify.cu (100%)
 rename cpp/{test => tests}/prims/knn_regression.cu (100%)
 rename cpp/{test => tests}/prims/kselection.cu (100%)
 rename cpp/{test => tests}/prims/linalg_block.cu (100%)
 rename cpp/{test => tests}/prims/linalg_naive.h (100%)
 rename cpp/{test => tests}/prims/linearReg.cu (100%)
 rename cpp/{test => tests}/prims/log.cu (100%)
 rename cpp/{test => tests}/prims/logisticReg.cu (100%)
 rename cpp/{test => tests}/prims/make_arima.cu (100%)
 rename cpp/{test => tests}/prims/penalty.cu (100%)
 rename cpp/{test => tests}/prims/sigmoid.cu (100%)
 rename cpp/{test => tests}/prims/test_utils.h (100%)
 rename cpp/{test => tests}/sg/cd_test.cu (100%)
 rename cpp/{test => tests}/sg/dbscan_test.cu (100%)
 rename cpp/{test => tests}/sg/experimental/fil/raft_proto/buffer.cpp (100%)
 rename cpp/{test => tests}/sg/experimental/fil/raft_proto/buffer.cu (100%)
 rename cpp/{test => tests}/sg/fil_child_index_test.cu (100%)
 rename cpp/{test => tests}/sg/fil_test.cu (100%)
 rename cpp/{test => tests}/sg/fnv_hash_test.cpp (100%)
 rename cpp/{test => tests}/sg/genetic/evolution_test.cu (100%)
 rename cpp/{test => tests}/sg/genetic/node_test.cpp (100%)
 rename cpp/{test => tests}/sg/genetic/param_test.cu (100%)
 rename cpp/{test => tests}/sg/genetic/program_test.cu (100%)
 rename cpp/{test => tests}/sg/handle_test.cu (100%)
 rename cpp/{test => tests}/sg/hdbscan_inputs.hpp (100%)
 rename cpp/{test => tests}/sg/hdbscan_test.cu (100%)
 rename cpp/{test => tests}/sg/holtwinters_test.cu (100%)
 rename cpp/{test => tests}/sg/knn_test.cu (100%)
 rename cpp/{test => tests}/sg/lars_test.cu (100%)
 rename cpp/{test => tests}/sg/linear_svm_test.cu (100%)
 rename cpp/{test => tests}/sg/linkage_test.cu (100%)
 rename cpp/{test => tests}/sg/logger.cpp (100%)
 rename cpp/{test => tests}/sg/multi_sum_test.cu (100%)
 rename cpp/{test => tests}/sg/ols.cu (100%)
 rename cpp/{test => tests}/sg/pca_test.cu (100%)
 rename cpp/{test => tests}/sg/quasi_newton.cu (100%)
 rename cpp/{test => tests}/sg/rf_test.cu (100%)
 rename cpp/{test => tests}/sg/ridge.cu (100%)
 rename cpp/{test => tests}/sg/rproj_test.cu (100%)
 rename cpp/{test => tests}/sg/sgd.cu (100%)
 rename cpp/{test => tests}/sg/shap_kernel.cu (100%)
 rename cpp/{test => tests}/sg/svc_test.cu (100%)
 rename cpp/{test => tests}/sg/time_series_datasets.h (100%)
 rename cpp/{test => tests}/sg/trustworthiness_test.cu (100%)
 rename cpp/{test => tests}/sg/tsne_test.cu (100%)
 rename cpp/{test => tests}/sg/tsvd_test.cu (100%)
 rename cpp/{test => tests}/sg/umap_parametrizable_test.cu (100%)

diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index b7ef6e2293..8b1e564694 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -724,7 +724,7 @@ endif()
 
 if(BUILD_CUML_TESTS)
   include(CTest)
-  add_subdirectory(test)
+  add_subdirectory(tests)
 endif()
 
 ##############################################################################
diff --git a/cpp/test/.gitkeep b/cpp/tests/.gitkeep
similarity index 100%
rename from cpp/test/.gitkeep
rename to cpp/tests/.gitkeep
diff --git a/cpp/test/CMakeLists.txt b/cpp/tests/CMakeLists.txt
similarity index 100%
rename from cpp/test/CMakeLists.txt
rename to cpp/tests/CMakeLists.txt
diff --git a/cpp/test/c_api/README.md b/cpp/tests/c_api/README.md
similarity index 100%
rename from cpp/test/c_api/README.md
rename to cpp/tests/c_api/README.md
diff --git a/cpp/test/c_api/dbscan_api_test.c b/cpp/tests/c_api/dbscan_api_test.c
similarity index 100%
rename from cpp/test/c_api/dbscan_api_test.c
rename to cpp/tests/c_api/dbscan_api_test.c
diff --git a/cpp/test/c_api/glm_api_test.c b/cpp/tests/c_api/glm_api_test.c
similarity index 100%
rename from cpp/test/c_api/glm_api_test.c
rename to cpp/tests/c_api/glm_api_test.c
diff --git a/cpp/test/c_api/holtwinters_api_test.c b/cpp/tests/c_api/holtwinters_api_test.c
similarity index 100%
rename from cpp/test/c_api/holtwinters_api_test.c
rename to cpp/tests/c_api/holtwinters_api_test.c
diff --git a/cpp/test/c_api/knn_api_test.c b/cpp/tests/c_api/knn_api_test.c
similarity index 100%
rename from cpp/test/c_api/knn_api_test.c
rename to cpp/tests/c_api/knn_api_test.c
diff --git a/cpp/test/c_api/svm_api_test.c b/cpp/tests/c_api/svm_api_test.c
similarity index 100%
rename from cpp/test/c_api/svm_api_test.c
rename to cpp/tests/c_api/svm_api_test.c
diff --git a/cpp/test/mg/kmeans_test.cu b/cpp/tests/mg/kmeans_test.cu
similarity index 100%
rename from cpp/test/mg/kmeans_test.cu
rename to cpp/tests/mg/kmeans_test.cu
diff --git a/cpp/test/mg/knn.cu b/cpp/tests/mg/knn.cu
similarity index 100%
rename from cpp/test/mg/knn.cu
rename to cpp/tests/mg/knn.cu
diff --git a/cpp/test/mg/knn_classify.cu b/cpp/tests/mg/knn_classify.cu
similarity index 100%
rename from cpp/test/mg/knn_classify.cu
rename to cpp/tests/mg/knn_classify.cu
diff --git a/cpp/test/mg/knn_regress.cu b/cpp/tests/mg/knn_regress.cu
similarity index 100%
rename from cpp/test/mg/knn_regress.cu
rename to cpp/tests/mg/knn_regress.cu
diff --git a/cpp/test/mg/knn_test_helper.cuh b/cpp/tests/mg/knn_test_helper.cuh
similarity index 100%
rename from cpp/test/mg/knn_test_helper.cuh
rename to cpp/tests/mg/knn_test_helper.cuh
diff --git a/cpp/test/mg/main.cu b/cpp/tests/mg/main.cu
similarity index 100%
rename from cpp/test/mg/main.cu
rename to cpp/tests/mg/main.cu
diff --git a/cpp/test/mg/pca.cu b/cpp/tests/mg/pca.cu
similarity index 100%
rename from cpp/test/mg/pca.cu
rename to cpp/tests/mg/pca.cu
diff --git a/cpp/test/mg/test_opg_utils.h b/cpp/tests/mg/test_opg_utils.h
similarity index 100%
rename from cpp/test/mg/test_opg_utils.h
rename to cpp/tests/mg/test_opg_utils.h
diff --git a/cpp/test/prims/add_sub_dev_scalar.cu b/cpp/tests/prims/add_sub_dev_scalar.cu
similarity index 100%
rename from cpp/test/prims/add_sub_dev_scalar.cu
rename to cpp/tests/prims/add_sub_dev_scalar.cu
diff --git a/cpp/test/prims/batched/csr.cu b/cpp/tests/prims/batched/csr.cu
similarity index 100%
rename from cpp/test/prims/batched/csr.cu
rename to cpp/tests/prims/batched/csr.cu
diff --git a/cpp/test/prims/batched/gemv.cu b/cpp/tests/prims/batched/gemv.cu
similarity index 100%
rename from cpp/test/prims/batched/gemv.cu
rename to cpp/tests/prims/batched/gemv.cu
diff --git a/cpp/test/prims/batched/make_symm.cu b/cpp/tests/prims/batched/make_symm.cu
similarity index 100%
rename from cpp/test/prims/batched/make_symm.cu
rename to cpp/tests/prims/batched/make_symm.cu
diff --git a/cpp/test/prims/batched/matrix.cu b/cpp/tests/prims/batched/matrix.cu
similarity index 100%
rename from cpp/test/prims/batched/matrix.cu
rename to cpp/tests/prims/batched/matrix.cu
diff --git a/cpp/test/prims/decoupled_lookback.cu b/cpp/tests/prims/decoupled_lookback.cu
similarity index 100%
rename from cpp/test/prims/decoupled_lookback.cu
rename to cpp/tests/prims/decoupled_lookback.cu
diff --git a/cpp/test/prims/device_utils.cu b/cpp/tests/prims/device_utils.cu
similarity index 100%
rename from cpp/test/prims/device_utils.cu
rename to cpp/tests/prims/device_utils.cu
diff --git a/cpp/test/prims/dist_adj.cu b/cpp/tests/prims/dist_adj.cu
similarity index 100%
rename from cpp/test/prims/dist_adj.cu
rename to cpp/tests/prims/dist_adj.cu
diff --git a/cpp/test/prims/distance_base.cuh b/cpp/tests/prims/distance_base.cuh
similarity index 100%
rename from cpp/test/prims/distance_base.cuh
rename to cpp/tests/prims/distance_base.cuh
diff --git a/cpp/test/prims/eltwise2d.cu b/cpp/tests/prims/eltwise2d.cu
similarity index 100%
rename from cpp/test/prims/eltwise2d.cu
rename to cpp/tests/prims/eltwise2d.cu
diff --git a/cpp/test/prims/fast_int_div.cu b/cpp/tests/prims/fast_int_div.cu
similarity index 100%
rename from cpp/test/prims/fast_int_div.cu
rename to cpp/tests/prims/fast_int_div.cu
diff --git a/cpp/test/prims/fillna.cu b/cpp/tests/prims/fillna.cu
similarity index 100%
rename from cpp/test/prims/fillna.cu
rename to cpp/tests/prims/fillna.cu
diff --git a/cpp/test/prims/grid_sync.cu b/cpp/tests/prims/grid_sync.cu
similarity index 100%
rename from cpp/test/prims/grid_sync.cu
rename to cpp/tests/prims/grid_sync.cu
diff --git a/cpp/test/prims/hinge.cu b/cpp/tests/prims/hinge.cu
similarity index 100%
rename from cpp/test/prims/hinge.cu
rename to cpp/tests/prims/hinge.cu
diff --git a/cpp/test/prims/jones_transform.cu b/cpp/tests/prims/jones_transform.cu
similarity index 100%
rename from cpp/test/prims/jones_transform.cu
rename to cpp/tests/prims/jones_transform.cu
diff --git a/cpp/test/prims/knn_classify.cu b/cpp/tests/prims/knn_classify.cu
similarity index 100%
rename from cpp/test/prims/knn_classify.cu
rename to cpp/tests/prims/knn_classify.cu
diff --git a/cpp/test/prims/knn_regression.cu b/cpp/tests/prims/knn_regression.cu
similarity index 100%
rename from cpp/test/prims/knn_regression.cu
rename to cpp/tests/prims/knn_regression.cu
diff --git a/cpp/test/prims/kselection.cu b/cpp/tests/prims/kselection.cu
similarity index 100%
rename from cpp/test/prims/kselection.cu
rename to cpp/tests/prims/kselection.cu
diff --git a/cpp/test/prims/linalg_block.cu b/cpp/tests/prims/linalg_block.cu
similarity index 100%
rename from cpp/test/prims/linalg_block.cu
rename to cpp/tests/prims/linalg_block.cu
diff --git a/cpp/test/prims/linalg_naive.h b/cpp/tests/prims/linalg_naive.h
similarity index 100%
rename from cpp/test/prims/linalg_naive.h
rename to cpp/tests/prims/linalg_naive.h
diff --git a/cpp/test/prims/linearReg.cu b/cpp/tests/prims/linearReg.cu
similarity index 100%
rename from cpp/test/prims/linearReg.cu
rename to cpp/tests/prims/linearReg.cu
diff --git a/cpp/test/prims/log.cu b/cpp/tests/prims/log.cu
similarity index 100%
rename from cpp/test/prims/log.cu
rename to cpp/tests/prims/log.cu
diff --git a/cpp/test/prims/logisticReg.cu b/cpp/tests/prims/logisticReg.cu
similarity index 100%
rename from cpp/test/prims/logisticReg.cu
rename to cpp/tests/prims/logisticReg.cu
diff --git a/cpp/test/prims/make_arima.cu b/cpp/tests/prims/make_arima.cu
similarity index 100%
rename from cpp/test/prims/make_arima.cu
rename to cpp/tests/prims/make_arima.cu
diff --git a/cpp/test/prims/penalty.cu b/cpp/tests/prims/penalty.cu
similarity index 100%
rename from cpp/test/prims/penalty.cu
rename to cpp/tests/prims/penalty.cu
diff --git a/cpp/test/prims/sigmoid.cu b/cpp/tests/prims/sigmoid.cu
similarity index 100%
rename from cpp/test/prims/sigmoid.cu
rename to cpp/tests/prims/sigmoid.cu
diff --git a/cpp/test/prims/test_utils.h b/cpp/tests/prims/test_utils.h
similarity index 100%
rename from cpp/test/prims/test_utils.h
rename to cpp/tests/prims/test_utils.h
diff --git a/cpp/test/sg/cd_test.cu b/cpp/tests/sg/cd_test.cu
similarity index 100%
rename from cpp/test/sg/cd_test.cu
rename to cpp/tests/sg/cd_test.cu
diff --git a/cpp/test/sg/dbscan_test.cu b/cpp/tests/sg/dbscan_test.cu
similarity index 100%
rename from cpp/test/sg/dbscan_test.cu
rename to cpp/tests/sg/dbscan_test.cu
diff --git a/cpp/test/sg/experimental/fil/raft_proto/buffer.cpp b/cpp/tests/sg/experimental/fil/raft_proto/buffer.cpp
similarity index 100%
rename from cpp/test/sg/experimental/fil/raft_proto/buffer.cpp
rename to cpp/tests/sg/experimental/fil/raft_proto/buffer.cpp
diff --git a/cpp/test/sg/experimental/fil/raft_proto/buffer.cu b/cpp/tests/sg/experimental/fil/raft_proto/buffer.cu
similarity index 100%
rename from cpp/test/sg/experimental/fil/raft_proto/buffer.cu
rename to cpp/tests/sg/experimental/fil/raft_proto/buffer.cu
diff --git a/cpp/test/sg/fil_child_index_test.cu b/cpp/tests/sg/fil_child_index_test.cu
similarity index 100%
rename from cpp/test/sg/fil_child_index_test.cu
rename to cpp/tests/sg/fil_child_index_test.cu
diff --git a/cpp/test/sg/fil_test.cu b/cpp/tests/sg/fil_test.cu
similarity index 100%
rename from cpp/test/sg/fil_test.cu
rename to cpp/tests/sg/fil_test.cu
diff --git a/cpp/test/sg/fnv_hash_test.cpp b/cpp/tests/sg/fnv_hash_test.cpp
similarity index 100%
rename from cpp/test/sg/fnv_hash_test.cpp
rename to cpp/tests/sg/fnv_hash_test.cpp
diff --git a/cpp/test/sg/genetic/evolution_test.cu b/cpp/tests/sg/genetic/evolution_test.cu
similarity index 100%
rename from cpp/test/sg/genetic/evolution_test.cu
rename to cpp/tests/sg/genetic/evolution_test.cu
diff --git a/cpp/test/sg/genetic/node_test.cpp b/cpp/tests/sg/genetic/node_test.cpp
similarity index 100%
rename from cpp/test/sg/genetic/node_test.cpp
rename to cpp/tests/sg/genetic/node_test.cpp
diff --git a/cpp/test/sg/genetic/param_test.cu b/cpp/tests/sg/genetic/param_test.cu
similarity index 100%
rename from cpp/test/sg/genetic/param_test.cu
rename to cpp/tests/sg/genetic/param_test.cu
diff --git a/cpp/test/sg/genetic/program_test.cu b/cpp/tests/sg/genetic/program_test.cu
similarity index 100%
rename from cpp/test/sg/genetic/program_test.cu
rename to cpp/tests/sg/genetic/program_test.cu
diff --git a/cpp/test/sg/handle_test.cu b/cpp/tests/sg/handle_test.cu
similarity index 100%
rename from cpp/test/sg/handle_test.cu
rename to cpp/tests/sg/handle_test.cu
diff --git a/cpp/test/sg/hdbscan_inputs.hpp b/cpp/tests/sg/hdbscan_inputs.hpp
similarity index 100%
rename from cpp/test/sg/hdbscan_inputs.hpp
rename to cpp/tests/sg/hdbscan_inputs.hpp
diff --git a/cpp/test/sg/hdbscan_test.cu b/cpp/tests/sg/hdbscan_test.cu
similarity index 100%
rename from cpp/test/sg/hdbscan_test.cu
rename to cpp/tests/sg/hdbscan_test.cu
diff --git a/cpp/test/sg/holtwinters_test.cu b/cpp/tests/sg/holtwinters_test.cu
similarity index 100%
rename from cpp/test/sg/holtwinters_test.cu
rename to cpp/tests/sg/holtwinters_test.cu
diff --git a/cpp/test/sg/knn_test.cu b/cpp/tests/sg/knn_test.cu
similarity index 100%
rename from cpp/test/sg/knn_test.cu
rename to cpp/tests/sg/knn_test.cu
diff --git a/cpp/test/sg/lars_test.cu b/cpp/tests/sg/lars_test.cu
similarity index 100%
rename from cpp/test/sg/lars_test.cu
rename to cpp/tests/sg/lars_test.cu
diff --git a/cpp/test/sg/linear_svm_test.cu b/cpp/tests/sg/linear_svm_test.cu
similarity index 100%
rename from cpp/test/sg/linear_svm_test.cu
rename to cpp/tests/sg/linear_svm_test.cu
diff --git a/cpp/test/sg/linkage_test.cu b/cpp/tests/sg/linkage_test.cu
similarity index 100%
rename from cpp/test/sg/linkage_test.cu
rename to cpp/tests/sg/linkage_test.cu
diff --git a/cpp/test/sg/logger.cpp b/cpp/tests/sg/logger.cpp
similarity index 100%
rename from cpp/test/sg/logger.cpp
rename to cpp/tests/sg/logger.cpp
diff --git a/cpp/test/sg/multi_sum_test.cu b/cpp/tests/sg/multi_sum_test.cu
similarity index 100%
rename from cpp/test/sg/multi_sum_test.cu
rename to cpp/tests/sg/multi_sum_test.cu
diff --git a/cpp/test/sg/ols.cu b/cpp/tests/sg/ols.cu
similarity index 100%
rename from cpp/test/sg/ols.cu
rename to cpp/tests/sg/ols.cu
diff --git a/cpp/test/sg/pca_test.cu b/cpp/tests/sg/pca_test.cu
similarity index 100%
rename from cpp/test/sg/pca_test.cu
rename to cpp/tests/sg/pca_test.cu
diff --git a/cpp/test/sg/quasi_newton.cu b/cpp/tests/sg/quasi_newton.cu
similarity index 100%
rename from cpp/test/sg/quasi_newton.cu
rename to cpp/tests/sg/quasi_newton.cu
diff --git a/cpp/test/sg/rf_test.cu b/cpp/tests/sg/rf_test.cu
similarity index 100%
rename from cpp/test/sg/rf_test.cu
rename to cpp/tests/sg/rf_test.cu
diff --git a/cpp/test/sg/ridge.cu b/cpp/tests/sg/ridge.cu
similarity index 100%
rename from cpp/test/sg/ridge.cu
rename to cpp/tests/sg/ridge.cu
diff --git a/cpp/test/sg/rproj_test.cu b/cpp/tests/sg/rproj_test.cu
similarity index 100%
rename from cpp/test/sg/rproj_test.cu
rename to cpp/tests/sg/rproj_test.cu
diff --git a/cpp/test/sg/sgd.cu b/cpp/tests/sg/sgd.cu
similarity index 100%
rename from cpp/test/sg/sgd.cu
rename to cpp/tests/sg/sgd.cu
diff --git a/cpp/test/sg/shap_kernel.cu b/cpp/tests/sg/shap_kernel.cu
similarity index 100%
rename from cpp/test/sg/shap_kernel.cu
rename to cpp/tests/sg/shap_kernel.cu
diff --git a/cpp/test/sg/svc_test.cu b/cpp/tests/sg/svc_test.cu
similarity index 100%
rename from cpp/test/sg/svc_test.cu
rename to cpp/tests/sg/svc_test.cu
diff --git a/cpp/test/sg/time_series_datasets.h b/cpp/tests/sg/time_series_datasets.h
similarity index 100%
rename from cpp/test/sg/time_series_datasets.h
rename to cpp/tests/sg/time_series_datasets.h
diff --git a/cpp/test/sg/trustworthiness_test.cu b/cpp/tests/sg/trustworthiness_test.cu
similarity index 100%
rename from cpp/test/sg/trustworthiness_test.cu
rename to cpp/tests/sg/trustworthiness_test.cu
diff --git a/cpp/test/sg/tsne_test.cu b/cpp/tests/sg/tsne_test.cu
similarity index 100%
rename from cpp/test/sg/tsne_test.cu
rename to cpp/tests/sg/tsne_test.cu
diff --git a/cpp/test/sg/tsvd_test.cu b/cpp/tests/sg/tsvd_test.cu
similarity index 100%
rename from cpp/test/sg/tsvd_test.cu
rename to cpp/tests/sg/tsvd_test.cu
diff --git a/cpp/test/sg/umap_parametrizable_test.cu b/cpp/tests/sg/umap_parametrizable_test.cu
similarity index 100%
rename from cpp/test/sg/umap_parametrizable_test.cu
rename to cpp/tests/sg/umap_parametrizable_test.cu

From 9b13d90b10b1c192035e3954c41def1f2238a5b2 Mon Sep 17 00:00:00 2001
From: Dante Gama Dessavre <danteg@nvidia.com>
Date: Mon, 27 Jan 2025 01:20:08 -0600
Subject: [PATCH 11/15] Fix for porter stemmer for cuDF change and ARIMA pytest
 adjustments (#6227)

Authors:
  - Dante Gama Dessavre (https://github.com/dantegd)
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)
  - Tim Head (https://github.com/betatim)

URL: https://github.com/rapidsai/cuml/pull/6227
---
 .../cuml/cuml/preprocessing/text/stem/porter_stemmer.py   | 8 +++++++-
 python/cuml/cuml/tests/test_arima.py                      | 4 ++--
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/python/cuml/cuml/preprocessing/text/stem/porter_stemmer.py b/python/cuml/cuml/preprocessing/text/stem/porter_stemmer.py
index b49ad4f04b..d58a8b1646 100644
--- a/python/cuml/cuml/preprocessing/text/stem/porter_stemmer.py
+++ b/python/cuml/cuml/preprocessing/text/stem/porter_stemmer.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2020-2023, NVIDIA CORPORATION.
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -753,6 +753,12 @@ def apply_rule(word_str_ser, rule, w_in_c_flag):
         # mask where replacement will happen
         valid_mask = double_consonant_mask & condition_mask & w_in_c_flag
 
+        # recent cuDF change made it so that the conditions above have a NA
+        # instead of null, which makes us need to replace them with False
+        # here so replace_suffix works correctly and doesn't duplicate
+        # single letters we don't want to.
+        valid_mask = valid_mask.fillna(False)
+
         # new series with updated valid_mask
         word_str_ser = replace_suffix(
             word_str_ser, suffix, replacement, valid_mask
diff --git a/python/cuml/cuml/tests/test_arima.py b/python/cuml/cuml/tests/test_arima.py
index f96def4d04..bdc6a82f6e 100644
--- a/python/cuml/cuml/tests/test_arima.py
+++ b/python/cuml/cuml/tests/test_arima.py
@@ -165,7 +165,7 @@ def __init__(
     n_obs=101,
     n_test=10,
     dataset="alcohol",
-    tolerance_integration=0.01,
+    tolerance_integration=0.09,
 )
 
 # ARIMA(5,1,0)
@@ -261,7 +261,7 @@ def __init__(
     ((5, 1, 0, 0, 0, 0, 0, 0), test_510),
     # Skip due to update to Scipy 1.15
     # ((1, 1, 1, 2, 0, 0, 4, 1), test_111_200_4c),
-    ((1, 1, 1, 2, 0, 0, 4, 1), test_111_200_4c_missing),
+    # ((1, 1, 1, 2, 0, 0, 4, 1), test_111_200_4c_missing),
     ((1, 1, 1, 2, 0, 0, 4, 1), test_111_200_4c_missing_exog),
     ((1, 1, 2, 0, 1, 2, 4, 0), test_112_012_4),
     stress_param((1, 1, 1, 1, 1, 1, 12, 0), test_111_111_12),

From 3a3222887fda1c311c1845e43da56c34fa87da0b Mon Sep 17 00:00:00 2001
From: Jim Crist-Harif <jcristharif@gmail.com>
Date: Mon, 27 Jan 2025 15:04:26 -0600
Subject: [PATCH 12/15] Ensure all method signatures are sklearn compatible
 (#6260)

`sklearn` requires `fit`/`fit_transform`/... always take a `y` parameter, even if it's ignored. This adds a test to ensure our signatures match this rule, and fixes any cases where they didn't. This makes it easier to include `cuml` estimators within sklearn pipelines.

Fixes #6255.

Authors:
  - Jim Crist-Harif (https://github.com/jcrist)
  - Bradley Dice (https://github.com/bdice)
  - Tim Head (https://github.com/betatim)

Approvers:
  - William Hicks (https://github.com/wphicks)
  - Tim Head (https://github.com/betatim)

URL: https://github.com/rapidsai/cuml/pull/6260
---
 python/cuml/cuml/cluster/kmeans.pyx           |  6 +--
 python/cuml/cuml/feature_extraction/_tfidf.py |  6 +--
 python/cuml/cuml/manifold/t_sne.pyx           |  4 +-
 .../random_projection/random_projection.pyx   |  4 +-
 python/cuml/cuml/tests/test_base.py           | 47 ++++++++++++++++++-
 python/cuml/cuml/tests/test_tsne.py           | 30 +++++++-----
 6 files changed, 75 insertions(+), 22 deletions(-)

diff --git a/python/cuml/cuml/cluster/kmeans.pyx b/python/cuml/cuml/cluster/kmeans.pyx
index 6be09f6912..ed26df5cd6 100644
--- a/python/cuml/cuml/cluster/kmeans.pyx
+++ b/python/cuml/cuml/cluster/kmeans.pyx
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2019-2024, NVIDIA CORPORATION.
+# Copyright (c) 2019-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -284,7 +284,7 @@ class KMeans(UniversalBase,
 
     @generate_docstring()
     @enable_device_interop
-    def fit(self, X, sample_weight=None, convert_dtype=True) -> "KMeans":
+    def fit(self, X, y=None, sample_weight=None, convert_dtype=True) -> "KMeans":
         """
         Compute k-means clustering with X.
 
@@ -422,7 +422,7 @@ class KMeans(UniversalBase,
                                        'description': 'Cluster indexes',
                                        'shape': '(n_samples, 1)'})
     @enable_device_interop
-    def fit_predict(self, X, sample_weight=None) -> CumlArray:
+    def fit_predict(self, X, y=None, sample_weight=None) -> CumlArray:
         """
         Compute cluster centers and predict cluster index for each sample.
 
diff --git a/python/cuml/cuml/feature_extraction/_tfidf.py b/python/cuml/cuml/feature_extraction/_tfidf.py
index 2cf5974119..f929ed2dfa 100644
--- a/python/cuml/cuml/feature_extraction/_tfidf.py
+++ b/python/cuml/cuml/feature_extraction/_tfidf.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2019-2024, NVIDIA CORPORATION.
+# Copyright (c) 2019-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -182,7 +182,7 @@ def _set_idf_diag(self):
         del self.__df
 
     @cuml.internals.api_base_return_any_skipall
-    def fit(self, X) -> "TfidfTransformer":
+    def fit(self, X, y=None) -> "TfidfTransformer":
         """Learn the idf vector (global term weights).
 
         Parameters
@@ -251,7 +251,7 @@ def transform(self, X, copy=True):
         return X
 
     @cuml.internals.api_base_return_any_skipall
-    def fit_transform(self, X, copy=True):
+    def fit_transform(self, X, y=None, copy=True):
         """
         Fit TfidfTransformer to X, then transform X.
         Equivalent to fit(X).transform(X).
diff --git a/python/cuml/cuml/manifold/t_sne.pyx b/python/cuml/cuml/manifold/t_sne.pyx
index b984d47818..08ec39913a 100644
--- a/python/cuml/cuml/manifold/t_sne.pyx
+++ b/python/cuml/cuml/manifold/t_sne.pyx
@@ -413,7 +413,7 @@ class TSNE(UniversalBase,
                         X='dense_sparse',
                         convert_dtype_cast='np.float32')
     @enable_device_interop
-    def fit(self, X, convert_dtype=True, knn_graph=None) -> "TSNE":
+    def fit(self, X, y=None, convert_dtype=True, knn_graph=None) -> "TSNE":
         """
         Fit X into an embedded space.
 
@@ -578,7 +578,7 @@ class TSNE(UniversalBase,
                                        'shape': '(n_samples, n_components)'})
     @cuml.internals.api_base_fit_transform()
     @enable_device_interop
-    def fit_transform(self, X, convert_dtype=True,
+    def fit_transform(self, X, y=None, convert_dtype=True,
                       knn_graph=None) -> CumlArray:
         """
         Fit X into an embedded space and return that transformed output.
diff --git a/python/cuml/cuml/random_projection/random_projection.pyx b/python/cuml/cuml/random_projection/random_projection.pyx
index 81811a4849..48ef013e44 100644
--- a/python/cuml/cuml/random_projection/random_projection.pyx
+++ b/python/cuml/cuml/random_projection/random_projection.pyx
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2018-2024, NVIDIA CORPORATION.
+# Copyright (c) 2018-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -322,7 +322,7 @@ cdef class BaseRandomProjection():
         return X_new
 
     @cuml.internals.api_base_return_array(get_output_type=False)
-    def fit_transform(self, X, convert_dtype=True):
+    def fit_transform(self, X, y=None, convert_dtype=True):
         return self.fit(X).transform(X, convert_dtype)
 
 
diff --git a/python/cuml/cuml/tests/test_base.py b/python/cuml/cuml/tests/test_base.py
index 0cd01acabb..d258020339 100644
--- a/python/cuml/cuml/tests/test_base.py
+++ b/python/cuml/cuml/tests/test_base.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2019-2024, NVIDIA CORPORATION.
+# Copyright (c) 2019-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -196,3 +196,48 @@ def test_base_children__get_param_names(child_class: str):
                 continue
 
             assert name in param_names
+
+
+# We explicitly skip the models in `cuml.tsa` since they match the statsmodels
+# interface rather than the sklearn interface (https://github.com/rapidsai/cuml/issues/6258).
+# Also skip a few classes that don't match this interface intentionally, since their sklearn
+# equivalents are also exceptions.
+@pytest.mark.parametrize(
+    "cls",
+    [
+        cls
+        for cls in all_base_children.values()
+        if not cls.__module__.startswith("cuml.tsa.")
+        and cls
+        not in {
+            cuml.preprocessing.LabelBinarizer,
+            cuml.preprocessing.LabelEncoder,
+        }
+    ],
+)
+def test_sklearn_methods_with_required_y_parameter(cls):
+    optional_params = {
+        inspect.Parameter.KEYWORD_ONLY,
+        inspect.Parameter.POSITIONAL_OR_KEYWORD,
+        inspect.Parameter.VAR_KEYWORD,
+    }
+    for name in [
+        "fit",
+        "partial_fit",
+        "score",
+        "fit_transform",
+        "fit_predict",
+    ]:
+        if (method := getattr(cls, name, None)) is None:
+            # Method not defined, skip
+            continue
+        params = list(inspect.signature(method).parameters.values())
+        # Assert method has a 2nd parameter named y, which is required by sklearn
+        assert (
+            len(params) > 2 and params[2].name == "y"
+        ), f"`{name}` requires a `y` parameter, even if it's ignored"
+        # Check that all remaining parameters are optional
+        for param in params[3:]:
+            assert (
+                param.kind in optional_params
+            ), f"`{name}` parameter `{param.name}` must be optional"
diff --git a/python/cuml/cuml/tests/test_tsne.py b/python/cuml/cuml/tests/test_tsne.py
index fe119eb999..115ade2848 100644
--- a/python/cuml/cuml/tests/test_tsne.py
+++ b/python/cuml/cuml/tests/test_tsne.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2019-2024, NVIDIA CORPORATION.
+# Copyright (c) 2019-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -75,7 +75,7 @@ def test_tsne_knn_graph_used(test_datasets, type_knn_graph, method):
     )
 
     # Perform tsne with normal knn_graph
-    Y = tsne.fit_transform(X, True, knn_graph)
+    Y = tsne.fit_transform(X, convert_dtype=True, knn_graph=knn_graph)
 
     trust_normal = trustworthiness(X, Y, n_neighbors=DEFAULT_N_NEIGHBORS)
 
@@ -97,16 +97,16 @@ def test_tsne_knn_graph_used(test_datasets, type_knn_graph, method):
     )
 
     # Perform tsne with garbage knn_graph
-    Y = tsne.fit_transform(X, True, knn_graph_garbage)
+    Y = tsne.fit_transform(X, convert_dtype=True, knn_graph=knn_graph_garbage)
 
     trust_garbage = trustworthiness(X, Y, n_neighbors=DEFAULT_N_NEIGHBORS)
     assert (trust_normal - trust_garbage) > 0.15
 
-    Y = tsne.fit_transform(X, True, knn_graph_garbage)
+    Y = tsne.fit_transform(X, convert_dtype=True, knn_graph=knn_graph_garbage)
     trust_garbage = trustworthiness(X, Y, n_neighbors=DEFAULT_N_NEIGHBORS)
     assert (trust_normal - trust_garbage) > 0.15
 
-    Y = tsne.fit_transform(X, True, knn_graph_garbage)
+    Y = tsne.fit_transform(X, convert_dtype=True, knn_graph=knn_graph_garbage)
     trust_garbage = trustworthiness(X, Y, n_neighbors=DEFAULT_N_NEIGHBORS)
     assert (trust_normal - trust_garbage) > 0.15
 
@@ -137,13 +137,17 @@ def test_tsne_knn_parameters(test_datasets, type_knn_graph, method):
         perplexity=DEFAULT_PERPLEXITY,
     )
 
-    embed = tsne.fit_transform(X, True, knn_graph)
+    embed = tsne.fit_transform(X, convert_dtype=True, knn_graph=knn_graph)
     validate_embedding(X, embed)
 
-    embed = tsne.fit_transform(X, True, knn_graph.tocoo())
+    embed = tsne.fit_transform(
+        X, convert_dtype=True, knn_graph=knn_graph.tocoo()
+    )
     validate_embedding(X, embed)
 
-    embed = tsne.fit_transform(X, True, knn_graph.tocsc())
+    embed = tsne.fit_transform(
+        X, convert_dtype=True, knn_graph=knn_graph.tocsc()
+    )
     validate_embedding(X, embed)
 
 
@@ -309,17 +313,21 @@ def test_tsne_knn_parameters_sparse(type_knn_graph, input_type, method):
 
     new_data = sp_prefix.csr_matrix(scipy.sparse.csr_matrix(digits))
 
-    Y = tsne.fit_transform(new_data, True, knn_graph)
+    Y = tsne.fit_transform(new_data, convert_dtype=True, knn_graph=knn_graph)
     if input_type == "cupy":
         Y = Y.get()
     validate_embedding(digits, Y, 0.85)
 
-    Y = tsne.fit_transform(new_data, True, knn_graph.tocoo())
+    Y = tsne.fit_transform(
+        new_data, convert_dtype=True, knn_graph=knn_graph.tocoo()
+    )
     if input_type == "cupy":
         Y = Y.get()
     validate_embedding(digits, Y, 0.85)
 
-    Y = tsne.fit_transform(new_data, True, knn_graph.tocsc())
+    Y = tsne.fit_transform(
+        new_data, convert_dtype=True, knn_graph=knn_graph.tocsc()
+    )
     if input_type == "cupy":
         Y = Y.get()
     validate_embedding(digits, Y, 0.85)

From 66abfc506f148dccfed3116159133a0a97653938 Mon Sep 17 00:00:00 2001
From: Michael Schellenberger Costa <miscco@nvidia.com>
Date: Mon, 27 Jan 2025 22:06:22 +0100
Subject: [PATCH 13/15] Drop deprecated thrust features and replace with
 libcu++ ones (#6248)

CCCL is deprecating a lot of legacy thrust features, so replace them with the standard ones from libc++ or just drop them altogether

Authors:
  - Michael Schellenberger Costa (https://github.com/miscco)

Approvers:
  - Dante Gama Dessavre (https://github.com/dantegd)

URL: https://github.com/rapidsai/cuml/pull/6248
---
 cpp/src/dbscan/vertexdeg/algo.cuh         |  4 ++--
 cpp/src/glm/qn/glm_base.cuh               |  5 +++--
 cpp/src/hdbscan/condensed_hierarchy.cu    |  3 +--
 cpp/src/hdbscan/detail/membership.cuh     |  5 ++---
 cpp/src/hdbscan/detail/select.cuh         |  5 +++--
 cpp/src/hdbscan/detail/stabilities.cuh    |  5 ++---
 cpp/src/hdbscan/detail/utils.h            | 16 ++++------------
 cpp/src/hdbscan/prediction_data.cu        |  4 ++--
 cpp/src/svm/kernelcache.cuh               |  4 ++--
 cpp/src/svm/sparse_util.cuh               |  9 +++++----
 cpp/src/tsa/auto_arima.cuh                |  4 ++--
 cpp/src/tsne/fft_tsne.cuh                 |  9 +++++----
 cpp/src_prims/timeSeries/stationarity.cuh |  4 ++--
 cpp/tests/sg/rf_test.cu                   |  5 +++--
 14 files changed, 38 insertions(+), 44 deletions(-)

diff --git a/cpp/src/dbscan/vertexdeg/algo.cuh b/cpp/src/dbscan/vertexdeg/algo.cuh
index efb1299df2..097e1cdd34 100644
--- a/cpp/src/dbscan/vertexdeg/algo.cuh
+++ b/cpp/src/dbscan/vertexdeg/algo.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2018-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -46,7 +46,7 @@ namespace VertexDeg {
 namespace Algo {
 
 template <typename index_t = int>
-struct column_counter : public thrust::unary_function<index_t, index_t> {
+struct column_counter {
   index_t* ia_;
   index_t n_;
 
diff --git a/cpp/src/glm/qn/glm_base.cuh b/cpp/src/glm/qn/glm_base.cuh
index cfe2657a0b..d35a9b91f6 100644
--- a/cpp/src/glm/qn/glm_base.cuh
+++ b/cpp/src/glm/qn/glm_base.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2018-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -26,6 +26,7 @@
 #include <raft/util/cuda_utils.cuh>
 #include <raft/util/cudart_utils.hpp>
 
+#include <cuda/std/functional>
 #include <thrust/execution_policy.h>
 #include <thrust/functional.h>
 #include <thrust/reduce.h>
@@ -124,7 +125,7 @@ struct GLMBase : GLMDims {
                                        sample_weights,
                                        sample_weights + n_samples,
                                        (T)0,
-                                       thrust::plus<T>());
+                                       cuda::std::plus<T>());
   }
 
   /*
diff --git a/cpp/src/hdbscan/condensed_hierarchy.cu b/cpp/src/hdbscan/condensed_hierarchy.cu
index 5744bc51c8..c9fc13dee4 100644
--- a/cpp/src/hdbscan/condensed_hierarchy.cu
+++ b/cpp/src/hdbscan/condensed_hierarchy.cu
@@ -27,7 +27,6 @@
 
 #include <cub/cub.cuh>
 #include <cuda/functional>
-#include <cuda/std/functional>
 #include <thrust/copy.h>
 #include <thrust/device_ptr.h>
 #include <thrust/execution_policy.h>
@@ -161,7 +160,7 @@ void CondensedHierarchy<value_idx, value_t>::condense(value_idx* full_parents,
     cuda::proclaim_return_type<value_idx>(
       [=] __device__(value_idx a) -> value_idx { return static_cast<value_idx>(a != -1); }),
     static_cast<value_idx>(0),
-    thrust::plus<value_idx>());
+    cuda::std::plus<value_idx>());
 
   parents.resize(n_edges, stream);
   children.resize(n_edges, stream);
diff --git a/cpp/src/hdbscan/detail/membership.cuh b/cpp/src/hdbscan/detail/membership.cuh
index 1392788329..3d820e4f3f 100644
--- a/cpp/src/hdbscan/detail/membership.cuh
+++ b/cpp/src/hdbscan/detail/membership.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -80,8 +80,7 @@ void get_probabilities(const raft::handle_t& handle,
                              int,
                              const value_idx*,
                              const value_idx*,
-                             cudaStream_t,
-                             bool) =
+                             cudaStream_t) =
     cub::DeviceSegmentedReduce::Max<const value_t*, value_t*, const value_idx*, const value_idx*>;
   Utils::cub_segmented_reduce(
     lambdas, deaths.data(), n_clusters, sorted_parents_offsets.data(), stream, reduce_func);
diff --git a/cpp/src/hdbscan/detail/select.cuh b/cpp/src/hdbscan/detail/select.cuh
index 36e674e40b..6818a9fffb 100644
--- a/cpp/src/hdbscan/detail/select.cuh
+++ b/cpp/src/hdbscan/detail/select.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -30,6 +30,7 @@
 #include <rmm/exec_policy.hpp>
 
 #include <cub/cub.cuh>
+#include <cuda/std/functional>
 #include <thrust/copy.h>
 #include <thrust/execution_policy.h>
 #include <thrust/fill.h>
@@ -223,7 +224,7 @@ void excess_of_mass(const raft::handle_t& handle,
                                  cuda::proclaim_return_type<value_t>(
                                    [=] __device__(value_idx a) -> value_t { return stability[a]; }),
                                  0.0,
-                                 thrust::plus<value_t>());
+                                 cuda::std::plus<value_t>());
     }
 
     if (subtree_stability > node_stability || cluster_sizes_h[node] > max_cluster_size) {
diff --git a/cpp/src/hdbscan/detail/stabilities.cuh b/cpp/src/hdbscan/detail/stabilities.cuh
index 674a58dd59..0f4bcbf54f 100644
--- a/cpp/src/hdbscan/detail/stabilities.cuh
+++ b/cpp/src/hdbscan/detail/stabilities.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -104,8 +104,7 @@ void compute_stabilities(const raft::handle_t& handle,
                              int,
                              const value_idx*,
                              const value_idx*,
-                             cudaStream_t,
-                             bool) =
+                             cudaStream_t) =
     cub::DeviceSegmentedReduce::Min<const value_t*, value_t*, const value_idx*, const value_idx*>;
   Utils::cub_segmented_reduce(lambdas,
                               births_parent_min.data() + 1,
diff --git a/cpp/src/hdbscan/detail/utils.h b/cpp/src/hdbscan/detail/utils.h
index 4456416a6f..e3414fd507 100644
--- a/cpp/src/hdbscan/detail/utils.h
+++ b/cpp/src/hdbscan/detail/utils.h
@@ -76,19 +76,11 @@ void cub_segmented_reduce(const value_t* in,
 {
   rmm::device_uvector<char> d_temp_storage(0, stream);
   size_t temp_storage_bytes = 0;
-  cub_reduce_func(
-    nullptr, temp_storage_bytes, in, out, n_segments, offsets, offsets + 1, stream, false);
+  cub_reduce_func(nullptr, temp_storage_bytes, in, out, n_segments, offsets, offsets + 1, stream);
   d_temp_storage.resize(temp_storage_bytes, stream);
 
-  cub_reduce_func(d_temp_storage.data(),
-                  temp_storage_bytes,
-                  in,
-                  out,
-                  n_segments,
-                  offsets,
-                  offsets + 1,
-                  stream,
-                  false);
+  cub_reduce_func(
+    d_temp_storage.data(), temp_storage_bytes, in, out, n_segments, offsets, offsets + 1, stream);
 }
 
 /**
@@ -118,7 +110,7 @@ Common::CondensedHierarchy<value_idx, value_t> make_cluster_tree(
     cuda::proclaim_return_type<value_idx>(
       [=] __device__(value_idx a) -> value_idx { return static_cast<value_idx>(a > 1); }),
     static_cast<value_idx>(0),
-    thrust::plus<value_idx>());
+    cuda::std::plus<value_idx>());
 
   // remove leaves from condensed tree
   rmm::device_uvector<value_idx> cluster_parents(cluster_tree_edges, stream);
diff --git a/cpp/src/hdbscan/prediction_data.cu b/cpp/src/hdbscan/prediction_data.cu
index 8e4db992cb..73f80f80db 100644
--- a/cpp/src/hdbscan/prediction_data.cu
+++ b/cpp/src/hdbscan/prediction_data.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -130,7 +130,7 @@ void generate_prediction_data(const raft::handle_t& handle,
 
   // this is to find maximum lambdas of all children under a parent
   cudaError_t (*reduce_func)(
-    void*, size_t&, const float*, float*, int, const int*, const int*, cudaStream_t, bool) =
+    void*, size_t&, const float*, float*, int, const int*, const int*, cudaStream_t) =
     cub::DeviceSegmentedReduce::Max<const float*, float*, const int*, const int*>;
   detail::Utils::cub_segmented_reduce(lambdas,
                                       prediction_data.get_deaths(),
diff --git a/cpp/src/svm/kernelcache.cuh b/cpp/src/svm/kernelcache.cuh
index cbaec2b386..d4f4f09c8a 100644
--- a/cpp/src/svm/kernelcache.cuh
+++ b/cpp/src/svm/kernelcache.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -81,7 +81,7 @@ CUML_KERNEL void mapColumnIndicesToSVRSpace(
 }
 
 template <typename math_t>
-struct select_at_index : public thrust::unary_function<int, math_t> {
+struct select_at_index {
   const math_t* dot_;
   select_at_index(const math_t* dot) : dot_(dot) {}
 
diff --git a/cpp/src/svm/sparse_util.cuh b/cpp/src/svm/sparse_util.cuh
index c4d0b277e9..020c173d4e 100644
--- a/cpp/src/svm/sparse_util.cuh
+++ b/cpp/src/svm/sparse_util.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2023-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2023-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -26,6 +26,7 @@
 
 #include <rmm/device_uvector.hpp>
 
+#include <cuda/std/functional>
 #include <thrust/device_ptr.h>
 #include <thrust/execution_policy.h>
 #include <thrust/iterator/constant_iterator.h>
@@ -325,7 +326,7 @@ raft::device_csr_matrix_view<math_t, int, int, int> getMatrixBatch(
                         inptr_src + batch_size + 1,
                         thrust::make_constant_iterator(nnz_offset),
                         inptr_tgt,
-                        thrust::minus<int>());
+                        cuda::std::minus<int>());
     }
 
     auto csr_struct_out = raft::make_device_compressed_structure_view<int, int, int>(
@@ -496,7 +497,7 @@ static void copySparseRowsToDense(const int* indptr,
   RAFT_CUDA_TRY(cudaPeekAtLastError());
 }
 
-struct rowsize : public thrust::unary_function<int, int> {
+struct rowsize {
   const int* indptr_;
   rowsize(const int* indptr) : indptr_(indptr) {}
 
@@ -610,7 +611,7 @@ int computeIndptrForSubset(
                                    row_new_indices_ptr + num_indices,
                                    row_sizes_ptr + 1,
                                    rowsize(indptr_in),
-                                   thrust::plus<int>());
+                                   cuda::std::plus<int>());
 
   // retrieve nnz from indptr_in[num_indices]
   int nnz;
diff --git a/cpp/src/tsa/auto_arima.cuh b/cpp/src/tsa/auto_arima.cuh
index 2bf049c04b..b67f5034e2 100644
--- a/cpp/src/tsa/auto_arima.cuh
+++ b/cpp/src/tsa/auto_arima.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -181,7 +181,7 @@ inline void divide_by_mask_execute(const DataT* d_in,
  * a matrix from its index. This makes possible a 2d scan with thrust.
  * Found in thrust/examples/scan_matrix_by_rows.cu
  */
-struct which_col : thrust::unary_function<int, int> {
+struct which_col {
   MLCommon::FastIntDiv divisor;
   __host__ which_col(int col_length) : divisor(col_length) {}
   __host__ __device__ int operator()(int idx) const { return idx / divisor; }
diff --git a/cpp/src/tsne/fft_tsne.cuh b/cpp/src/tsne/fft_tsne.cuh
index cb5dedf932..cb1dfe933b 100644
--- a/cpp/src/tsne/fft_tsne.cuh
+++ b/cpp/src/tsne/fft_tsne.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -35,6 +35,7 @@
 #include <rmm/device_scalar.hpp>
 #include <rmm/device_uvector.hpp>
 
+#include <cuda/std/functional>
 #include <thrust/device_ptr.h>
 #include <thrust/fill.h>
 #include <thrust/functional.h>
@@ -512,7 +513,7 @@ value_t FFT_TSNE(value_t* VAL,
                                     norm_vec_thrust,
                                     norm_vec_thrust + normalization_vec_device.size(),
                                     0.0f,
-                                    thrust::plus<value_t>());
+                                    cuda::std::plus<value_t>());
       normalization = sumQ - n;
     }
 
@@ -565,7 +566,7 @@ value_t FFT_TSNE(value_t* VAL,
                       att_forces_thrust + n,
                       att_forces_thrust + n,
                       att_forces_thrust,
-                      thrust::plus<value_t>());
+                      cuda::std::plus<value_t>());
 
     thrust::transform(thrust_policy,
                       att_forces_thrust,
@@ -577,7 +578,7 @@ value_t FFT_TSNE(value_t* VAL,
                                        att_forces_thrust,
                                        att_forces_thrust + attractive_forces_device.size(),
                                        0.0f,
-                                       thrust::plus<value_t>()) /
+                                       cuda::std::plus<value_t>()) /
                         attractive_forces_device.size();
 
     if (grad_norm <= params.min_grad_norm) {
diff --git a/cpp/src_prims/timeSeries/stationarity.cuh b/cpp/src_prims/timeSeries/stationarity.cuh
index b85b874d94..31c140f304 100644
--- a/cpp/src_prims/timeSeries/stationarity.cuh
+++ b/cpp/src_prims/timeSeries/stationarity.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -174,7 +174,7 @@ CUML_KERNEL void kpss_stationarity_check_kernel(bool* results,
  * Found in thrust/examples/scan_matrix_by_rows.cu
  */
 template <typename IdxT>
-struct which_col : thrust::unary_function<IdxT, IdxT> {
+struct which_col {
   IdxT col_length;
   __host__ __device__ which_col(IdxT col_length_) : col_length(col_length_) {}
   __host__ __device__ IdxT operator()(IdxT idx) const { return idx / col_length; }
diff --git a/cpp/tests/sg/rf_test.cu b/cpp/tests/sg/rf_test.cu
index 117ec37960..1be3c3c84a 100644
--- a/cpp/tests/sg/rf_test.cu
+++ b/cpp/tests/sg/rf_test.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -25,6 +25,7 @@
 #include <raft/util/cuda_utils.cuh>
 #include <raft/util/cudart_utils.hpp>
 
+#include <cuda/std/functional>
 #include <thrust/binary_search.h>
 #include <thrust/copy.h>
 #include <thrust/device_vector.h>
@@ -305,7 +306,7 @@ class RfSpecialisedTest {
       thrust::device_vector<double> normal(params.n_rows);
       r.normal(normal.data().get(), normal.size(), 0.0, 2.0, nullptr);
       thrust::transform(
-        normal.begin(), normal.end(), y_temp.begin(), y.begin(), thrust::plus<LabelT>());
+        normal.begin(), normal.end(), y_temp.begin(), y.begin(), cuda::std::plus<LabelT>());
     }
     raft::linalg::transpose(
       handle, X.data().get(), X_transpose.data().get(), params.n_rows, params.n_cols, nullptr);

From 59faa1ce45ccf31af8032f3f0cd15eaca201e5ad Mon Sep 17 00:00:00 2001
From: Robert Maynard <rmaynard@nvidia.com>
Date: Mon, 27 Jan 2025 16:48:41 -0500
Subject: [PATCH 14/15] Allow CUDA ODR violations in 25.02 (#6264)

Cuda 12.8+ doesn't allow CUDA kernels to be called cross TUs when building in whole compilation mode. So in the 25.02 short-term we ignore this warning

Authors:
  - Robert Maynard (https://github.com/robertmaynard)

Approvers:
  - Dante Gama Dessavre (https://github.com/dantegd)
  - Bradley Dice (https://github.com/bdice)
  - Paul Taylor (https://github.com/trxcllnt)

URL: https://github.com/rapidsai/cuml/pull/6264
---
 cpp/cmake/modules/ConfigureCUDA.cmake | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/cpp/cmake/modules/ConfigureCUDA.cmake b/cpp/cmake/modules/ConfigureCUDA.cmake
index 60cc5dae15..5198eea540 100644
--- a/cpp/cmake/modules/ConfigureCUDA.cmake
+++ b/cpp/cmake/modules/ConfigureCUDA.cmake
@@ -1,5 +1,5 @@
 #=============================================================================
-# Copyright (c) 2018-2024, NVIDIA CORPORATION.
+# Copyright (c) 2018-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -35,6 +35,12 @@ if(DISABLE_DEPRECATION_WARNINGS)
     list(APPEND CUML_CUDA_FLAGS -Wno-deprecated-declarations -Xcompiler=-Wno-deprecated-declarations -DRAFT_HIDE_DEPRECATION_WARNINGS)
 endif()
 
+# Allow invalid CUDA kernels in the short term
+if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 12.8.0)
+    list(APPEND CUML_CUDA_FLAGS -static-global-template-stub=false)
+endif()
+
+
 # make sure we produce smallest binary size
 list(APPEND CUML_CUDA_FLAGS -Xfatbin=-compress-all)
 

From 6985c516f7ab2f1fdcc3feb278e6f87e9ebe6d14 Mon Sep 17 00:00:00 2001
From: Simon Adorf <sadorf@nvidia.com>
Date: Mon, 27 Jan 2025 18:56:20 -0600
Subject: [PATCH 15/15] Fix UMAP transform illegal memory access error when
 data_on_host=True (#6259)

Fixes #6216 by identifying whether the original input data is on host or device and conditionally builds the brute force index (required for a separate `transform()` call) for the correct matrix view.

- [x] Identify and fix root cause
- [x] Clean up implementation
- [x] Implement unit test
- [x] Document fix

Closes #6216

Authors:
  - Simon Adorf (https://github.com/csadorf)
  - Dante Gama Dessavre (https://github.com/dantegd)

Approvers:
  - William Hicks (https://github.com/wphicks)
  - Victor Lafargue (https://github.com/viclafargue)

URL: https://github.com/rapidsai/cuml/pull/6259
---
 cpp/src/umap/knn_graph/algo.cuh     | 24 +++++++++++++++++-------
 python/cuml/cuml/tests/test_umap.py | 17 +++++++++++++----
 2 files changed, 30 insertions(+), 11 deletions(-)

diff --git a/cpp/src/umap/knn_graph/algo.cuh b/cpp/src/umap/knn_graph/algo.cuh
index 6617d72c00..c99003758e 100644
--- a/cpp/src/umap/knn_graph/algo.cuh
+++ b/cpp/src/umap/knn_graph/algo.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -92,12 +92,22 @@ inline void launcher(const raft::handle_t& handle,
                      cudaStream_t stream)
 {
   if (params->build_algo == ML::UMAPParams::graph_build_algo::BRUTE_FORCE_KNN) {
-    auto idx = cuvs::neighbors::brute_force::build(
-      handle,
-      raft::make_device_matrix_view<const float, int64_t>(inputsA.X, inputsA.n, inputsA.d),
-      params->metric,
-      params->p);
-
+    cudaPointerAttributes attr;
+    RAFT_CUDA_TRY(cudaPointerGetAttributes(&attr, inputsA.X));
+    float* ptr = reinterpret_cast<float*>(attr.devicePointer);
+    auto idx   = [&]() {
+      if (ptr != nullptr) {  // inputsA on device
+        return cuvs::neighbors::brute_force::build(
+          handle,
+          {params->metric, params->p},
+          raft::make_device_matrix_view<const float, int64_t>(inputsA.X, inputsA.n, inputsA.d));
+      } else {  // inputsA on host
+        return cuvs::neighbors::brute_force::build(
+          handle,
+          {params->metric, params->p},
+          raft::make_host_matrix_view<const float, int64_t>(inputsA.X, inputsA.n, inputsA.d));
+      }
+    }();
     cuvs::neighbors::brute_force::search(
       handle,
       idx,
diff --git a/python/cuml/cuml/tests/test_umap.py b/python/cuml/cuml/tests/test_umap.py
index 6d91012177..296678666f 100644
--- a/python/cuml/cuml/tests/test_umap.py
+++ b/python/cuml/cuml/tests/test_umap.py
@@ -842,7 +842,10 @@ def test_umap_distance_metrics_fit_transform_trust_on_sparse_input(
 
 @pytest.mark.parametrize("data_on_host", [True, False])
 @pytest.mark.parametrize("num_clusters", [0, 3, 5])
-def test_umap_trustworthiness_on_batch_nnd(data_on_host, num_clusters):
+@pytest.mark.parametrize("fit_then_transform", [False, True])
+def test_umap_trustworthiness_on_batch_nnd(
+    data_on_host, num_clusters, fit_then_transform
+):
 
     digits = datasets.load_digits()
 
@@ -853,9 +856,15 @@ def test_umap_trustworthiness_on_batch_nnd(data_on_host, num_clusters):
         build_kwds={"nnd_n_clusters": num_clusters},
     )
 
-    cuml_embedding = cuml_model.fit_transform(
-        digits.data, convert_dtype=True, data_on_host=data_on_host
-    )
+    if fit_then_transform:
+        cuml_model.fit(
+            digits.data, convert_dtype=True, data_on_host=data_on_host
+        )
+        cuml_embedding = cuml_model.transform(digits.data)
+    else:
+        cuml_embedding = cuml_model.fit_transform(
+            digits.data, convert_dtype=True, data_on_host=data_on_host
+        )
 
     cuml_trust = trustworthiness(digits.data, cuml_embedding, n_neighbors=10)