From cd365e974f472ee2381819eb77717a8085cdfad4 Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Tue, 30 Apr 2024 08:02:48 -0500
Subject: [PATCH 01/41] [R-package] [ci] Manually install 'Matrix' (fixes
 #6433) (#6434)

---
 .ci/test_r_package.sh | 13 +++++++++++--
 docs/FAQ.rst          | 16 ++++++++++++++++
 2 files changed, 27 insertions(+), 2 deletions(-)

diff --git a/.ci/test_r_package.sh b/.ci/test_r_package.sh
index c1f933bfc8a0..66a3ecaa663d 100755
--- a/.ci/test_r_package.sh
+++ b/.ci/test_r_package.sh
@@ -110,15 +110,24 @@ fi
 # "Warning: dependency ‘lattice’ is not available"
 if [[ "${R_MAJOR_VERSION}" == "3" ]]; then
     Rscript --vanilla -e "install.packages('https://cran.r-project.org/src/contrib/Archive/lattice/lattice_0.20-41.tar.gz', repos = NULL, lib = '${R_LIB_PATH}')"
+else
+    # {Matrix} needs {lattice}, so this needs to run before manually installing {Matrix}.
+    # This should be unnecessary on R >=4.4.0
+    # ref: https://github.com/microsoft/LightGBM/issues/6433
+    Rscript --vanilla -e "install.packages('lattice', repos = '${CRAN_MIRROR}', lib = '${R_LIB_PATH}')"
 fi
 
+# manually install {Matrix}, as {Matrix}=1.7-0 raised its R floor all the way to R 4.4.0
+# ref: https://github.com/microsoft/LightGBM/issues/6433
+Rscript --vanilla -e "install.packages('https://cran.r-project.org/src/contrib/Archive/Matrix/Matrix_1.6-5.tar.gz', repos = NULL, lib = '${R_LIB_PATH}')"
+
 # Manually install Depends and Imports libraries + 'knitr', 'markdown', 'RhpcBLASctl', 'testthat'
 # to avoid a CI-time dependency on devtools (for devtools::install_deps())
 # NOTE: testthat is not required when running rchk
 if [[ "${TASK}" == "r-rchk" ]]; then
-    packages="c('data.table', 'jsonlite', 'knitr', 'markdown', 'Matrix', 'R6', 'RhpcBLASctl')"
+    packages="c('data.table', 'jsonlite', 'knitr', 'markdown', 'R6', 'RhpcBLASctl')"
 else
-    packages="c('data.table', 'jsonlite', 'knitr', 'markdown', 'Matrix', 'R6', 'RhpcBLASctl', 'testthat')"
+    packages="c('data.table', 'jsonlite', 'knitr', 'markdown', 'R6', 'RhpcBLASctl', 'testthat')"
 fi
 compile_from_source="both"
 if [[ $OS_NAME == "macos" ]]; then
diff --git a/docs/FAQ.rst b/docs/FAQ.rst
index 43999931ca07..8f0024b45730 100644
--- a/docs/FAQ.rst
+++ b/docs/FAQ.rst
@@ -236,6 +236,22 @@ As of LightGBM v4.0.0, ``setinfo()`` has been replaced by a new method, ``set_fi
 
 If you are experiencing this error when running ``lightgbm``, you may be facing the same issue reported in `#2715 <https://github.com/microsoft/LightGBM/issues/2715>`_ and later in `#2989 <https://github.com/microsoft/LightGBM/pull/2989#issuecomment-614374151>`_. We have seen that in some situations, using ``data.table`` 1.11.x results in this error. To get around this, you can upgrade your version of ``data.table`` to at least version 1.12.0.
 
+4. package ‘Matrix’ is not available
+
+In April 2024, ``Matrix==1.7-0`` was published to CRAN.
+That version had a floor of ``R (>=4.4.0)``.
+``{Matrix}`` is a hard runtime dependency of ``{lightgbm}``, so on any version of R older than ``4.4.0``, running ``install.packages("lightgbm")`` results in something like the following.
+
+.. code-block:: text
+
+    package ‘Matrix’ is not available for this version of R
+
+To fix that without upgrading to R 4.4.0 or greater, manually install an older version of ``{Matrix}``.
+
+.. code-block:: R
+
+    install.packages('https://cran.r-project.org/src/contrib/Archive/Matrix/Matrix_1.6-5.tar.gz', repos = NULL)
+
 ------
 
 Python-package

From d7fcb3c2000518922f1df59775b9ebb5628b6559 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E3=81=AA=E3=82=8B=E3=81=BF?=
 <4680567+narumiruna@users.noreply.github.com>
Date: Wed, 1 May 2024 00:31:00 +0800
Subject: [PATCH 02/41] upgrade CMake in dockerfile-cli (fixes #6420) (#6426)

---
 docker/dockerfile-cli | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/docker/dockerfile-cli b/docker/dockerfile-cli
index 51e7bd9627e5..63c3e1f3d32c 100644
--- a/docker/dockerfile-cli
+++ b/docker/dockerfile-cli
@@ -8,7 +8,7 @@ ENV \
 RUN apt-get update -y && \
     apt-get install -y --no-install-recommends \
         ca-certificates \
-        cmake \
+        curl \
         build-essential \
         gcc \
         g++ \
@@ -16,12 +16,17 @@ RUN apt-get update -y && \
         libomp-dev && \
     rm -rf /var/lib/apt/lists/*
 
+RUN curl -L -o cmake.sh https://github.com/Kitware/CMake/releases/download/v3.29.2/cmake-3.29.2-linux-x86_64.sh && \
+    chmod +x cmake.sh && \
+    sh ./cmake.sh --prefix=/usr/local --skip-license && \
+    rm cmake.sh
+
 RUN git clone \
         --recursive \
         --branch stable \
         --depth 1 \
         https://github.com/Microsoft/LightGBM && \
-    cd ./Lightgbm && \
+    cd ./LightGBM && \
     cmake -B build -S . && \
     cmake --build build -j4 && \
     cmake --install build && \

From da9bb5fd7292e993dfff036f6e579495085add18 Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Wed, 1 May 2024 12:55:33 -0500
Subject: [PATCH 03/41] [R-package] always name the shared library 'lightgbm',
 not 'lib_lightgbm' (#6432)

---
 CMakeLists.txt               | 13 ++++++++++++-
 R-package/NAMESPACE          |  2 +-
 R-package/R/lightgbm.R       |  2 +-
 R-package/src/install.libs.R |  6 +++---
 build-cran-package.sh        | 20 +-------------------
 build_r.R                    | 36 ------------------------------------
 6 files changed, 18 insertions(+), 61 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 4136575e43eb..ad7e44067490 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -461,11 +461,22 @@ if(BUILD_STATIC_LIB)
 else()
   add_library(_lightgbm SHARED)
 endif()
+
+# R expects libraries of the form <project>.{dll,dylib,so}, not lib_<project>.{dll,dylib,so}
+if(__BUILD_FOR_R)
+  set_target_properties(
+    _lightgbm
+    PROPERTIES
+      PREFIX ""
+      OUTPUT_NAME "lightgbm"
+  )
+endif()
+
 # LightGBM headers include openmp, cuda, R etc. headers,
 # thus PUBLIC is required for building _lightgbm_swig target.
 target_link_libraries(_lightgbm PUBLIC lightgbm_capi_objs lightgbm_objs)
 
-if(MSVC)
+if(MSVC AND NOT __BUILD_FOR_R)
   set_target_properties(_lightgbm PROPERTIES OUTPUT_NAME "lib_lightgbm")
 endif()
 
diff --git a/R-package/NAMESPACE b/R-package/NAMESPACE
index 1f6ed248811c..49ef2b5cb8fc 100644
--- a/R-package/NAMESPACE
+++ b/R-package/NAMESPACE
@@ -62,4 +62,4 @@ importFrom(parallel,detectCores)
 importFrom(stats,quantile)
 importFrom(utils,modifyList)
 importFrom(utils,read.delim)
-useDynLib(lib_lightgbm , .registration = TRUE)
+useDynLib(lightgbm , .registration = TRUE)
diff --git a/R-package/R/lightgbm.R b/R-package/R/lightgbm.R
index e5df7a93fc97..f1a0090f950a 100644
--- a/R-package/R/lightgbm.R
+++ b/R-package/R/lightgbm.R
@@ -325,7 +325,7 @@ NULL
 #' @import methods
 #' @importFrom Matrix Matrix
 #' @importFrom R6 R6Class
-#' @useDynLib lib_lightgbm , .registration = TRUE
+#' @useDynLib lightgbm , .registration = TRUE
 NULL
 
 # Suppress false positive warnings from R CMD CHECK about
diff --git a/R-package/src/install.libs.R b/R-package/src/install.libs.R
index cda512e08549..f9d4233046db 100644
--- a/R-package/src/install.libs.R
+++ b/R-package/src/install.libs.R
@@ -227,9 +227,9 @@ if (!makefiles_already_generated) {
 }
 
 # build the library
-message("Building lib_lightgbm")
+message(paste0("Building lightgbm", SHLIB_EXT))
 .run_shell_command(build_cmd, build_args)
-src <- file.path(lib_folder, paste0("lib_lightgbm", SHLIB_EXT), fsep = "/")
+src <- file.path(lib_folder, paste0("lightgbm", SHLIB_EXT), fsep = "/")
 
 # Packages with install.libs.R need to copy some artifacts into the
 # expected places in the package structure.
@@ -247,7 +247,7 @@ if (file.exists(src)) {
   }
 
 } else {
-  stop(paste0("Cannot find lib_lightgbm", SHLIB_EXT))
+  stop(paste0("Cannot find lightgbm", SHLIB_EXT))
 }
 
 # clean up the "build" directory
diff --git a/build-cran-package.sh b/build-cran-package.sh
index 767c128d9693..89c6606803c5 100755
--- a/build-cran-package.sh
+++ b/build-cran-package.sh
@@ -165,23 +165,6 @@ cd "${TEMP_R_DIR}"
         -e 's/\.\..*fast_double_parser\.h/LightGBM\/fast_double_parser\.h/' \
         src/include/LightGBM/utils/common.h
 
-    # When building an R package with 'configure', it seems
-    # you're guaranteed to get a shared library called
-    #  <packagename>.so/dll/dylib. The package source code expects
-    # 'lib_lightgbm.so', not 'lightgbm.so', to comply with the way
-    # this project has historically handled installation
-    echo "Changing lib_lightgbm to lightgbm"
-    for file in R/*.R; do
-        sed \
-            -i.bak \
-            -e 's/lib_lightgbm/lightgbm/' \
-            "${file}"
-    done
-    sed \
-        -i.bak \
-        -e 's/lib_lightgbm/lightgbm/' \
-        NAMESPACE
-
     # 'processx' is listed as a 'Suggests' dependency in DESCRIPTION
     # because it is used in install.libs.R, a file that is not
     # included in the CRAN distribution of the package
@@ -191,8 +174,7 @@ cd "${TEMP_R_DIR}"
         DESCRIPTION
 
     echo "Cleaning sed backup files"
-    rm R/*.R.bak
-    rm NAMESPACE.bak
+    rm *.bak
 
 cd "${ORIG_WD}"
 
diff --git a/build_r.R b/build_r.R
index 50d61e550ace..c2703778a7dd 100644
--- a/build_r.R
+++ b/build_r.R
@@ -398,42 +398,6 @@ description_contents <- gsub(
 )
 writeLines(description_contents, DESCRIPTION_FILE)
 
-# CMake-based builds can't currently use R's builtin routine registration,
-# so have to update NAMESPACE manually, with a statement like this:
-#
-# useDynLib(lib_lightgbm, LGBM_DatasetCreateFromFile_R, ...)
-#
-# See https://cran.r-project.org/doc/manuals/r-release/R-exts.html#useDynLib for
-# documentation of this approach, where the NAMESPACE file uses a statement like
-# useDynLib(foo, myRoutine, myOtherRoutine)
-NAMESPACE_FILE <- file.path(TEMP_R_DIR, "NAMESPACE")
-namespace_contents <- readLines(NAMESPACE_FILE)
-dynlib_line <- grep(
-  pattern = "^useDynLib"
-  , x = namespace_contents
-)
-
-c_api_contents <- readLines(file.path(TEMP_SOURCE_DIR, "src", "lightgbm_R.h"))
-c_api_contents <- c_api_contents[startsWith(c_api_contents, "LIGHTGBM_C_EXPORT")]
-c_api_contents <- gsub(
-  pattern = "LIGHTGBM_C_EXPORT SEXP "
-  , replacement = ""
-  , x = c_api_contents
-  , fixed = TRUE
-)
-c_api_symbols <- gsub(
-  pattern = "\\(.*"
-  , replacement = ""
-  , x = c_api_contents
-)
-dynlib_statement <- paste0(
-  "useDynLib(lib_lightgbm, "
-  , toString(c_api_symbols)
-  , ")"
-)
-namespace_contents[dynlib_line] <- dynlib_statement
-writeLines(namespace_contents, NAMESPACE_FILE)
-
 # NOTE: --keep-empty-dirs is necessary to keep the deep paths expected
 #       by CMake while also meeting the CRAN req to create object files
 #       on demand

From 9f5fbb647920ed2f163fec68f3535ebbf7eaeea5 Mon Sep 17 00:00:00 2001
From: Oliver Borchert <oliver.borchert@quantco.com>
Date: Wed, 1 May 2024 19:57:55 +0200
Subject: [PATCH 04/41] [python-package] Allow to pass early stopping min delta
 in params (#6274)

* [python-package] Allow to pass early stopping min delta in params

* Fix test

* Add separate test

* Fix

* Add to cpp config

* Adjust test

* Adjust test

* Debug

* Revert

* Apply suggestions from code review

---------

Co-authored-by: James Lamb <jaylamb20@gmail.com>
---
 R-package/tests/testthat/test_lgb.Booster.R |  1 +
 docs/Parameters.rst                         |  4 ++++
 include/LightGBM/config.h                   |  4 ++++
 python-package/lightgbm/engine.py           |  2 ++
 src/boosting/gbdt.cpp                       |  4 +++-
 src/boosting/gbdt.h                         |  2 ++
 src/io/config_auto.cpp                      |  7 ++++++
 tests/python_package_test/test_engine.py    | 24 +++++++++++++++++++++
 8 files changed, 47 insertions(+), 1 deletion(-)

diff --git a/R-package/tests/testthat/test_lgb.Booster.R b/R-package/tests/testthat/test_lgb.Booster.R
index e6b0e8abda64..7bf0a1bf43d2 100644
--- a/R-package/tests/testthat/test_lgb.Booster.R
+++ b/R-package/tests/testthat/test_lgb.Booster.R
@@ -850,6 +850,7 @@ test_that("all parameters are stored correctly with save_model_to_string()", {
         , "[extra_trees: 0]"
         , "[extra_seed: 6642]"
         , "[early_stopping_round: 0]"
+        , "[early_stopping_min_delta: 0]"
         , "[first_metric_only: 0]"
         , "[max_delta_step: 0]"
         , "[lambda_l1: 0]"
diff --git a/docs/Parameters.rst b/docs/Parameters.rst
index 94f7e36d8ef2..02f01ae4408b 100644
--- a/docs/Parameters.rst
+++ b/docs/Parameters.rst
@@ -410,6 +410,10 @@ Learning Control Parameters
 
    -  can be used to speed up training
 
+-  ``early_stopping_min_delta`` :raw-html:`<a id="early_stopping_min_delta" title="Permalink to this parameter" href="#early_stopping_min_delta">&#x1F517;&#xFE0E;</a>`, default = ``0.0``, type = double, constraints: ``early_stopping_min_delta >= 0.0``
+
+   -  when early stopping is used (i.e. ``early_stopping_round > 0``), require the early stopping metric to improve by at least this delta to be considered an improvement
+
 -  ``first_metric_only`` :raw-html:`<a id="first_metric_only" title="Permalink to this parameter" href="#first_metric_only">&#x1F517;&#xFE0E;</a>`, default = ``false``, type = bool
 
    -  LightGBM allows you to provide multiple evaluation metrics. Set this to ``true``, if you want to use only the first metric for early stopping
diff --git a/include/LightGBM/config.h b/include/LightGBM/config.h
index a2f1a02370b7..b626e1b1bcc2 100644
--- a/include/LightGBM/config.h
+++ b/include/LightGBM/config.h
@@ -394,6 +394,10 @@ struct Config {
   // desc = can be used to speed up training
   int early_stopping_round = 0;
 
+  // check = >=0.0
+  // desc = when early stopping is used (i.e. ``early_stopping_round > 0``), require the early stopping metric to improve by at least this delta to be considered an improvement
+  double early_stopping_min_delta = 0.0;
+
   // desc = LightGBM allows you to provide multiple evaluation metrics. Set this to ``true``, if you want to use only the first metric for early stopping
   bool first_metric_only = false;
 
diff --git a/python-package/lightgbm/engine.py b/python-package/lightgbm/engine.py
index a19b29e7b584..4a4ab8b4fd13 100644
--- a/python-package/lightgbm/engine.py
+++ b/python-package/lightgbm/engine.py
@@ -241,6 +241,7 @@ def train(
             callback.early_stopping(
                 stopping_rounds=params["early_stopping_round"],  # type: ignore[arg-type]
                 first_metric_only=first_metric_only,
+                min_delta=params.get("early_stopping_min_delta", 0.0),
                 verbose=_choose_param_value(
                     main_param_name="verbosity",
                     params=params,
@@ -765,6 +766,7 @@ def cv(
             callback.early_stopping(
                 stopping_rounds=params["early_stopping_round"],  # type: ignore[arg-type]
                 first_metric_only=first_metric_only,
+                min_delta=params.get("early_stopping_min_delta", 0.0),
                 verbose=_choose_param_value(
                     main_param_name="verbosity",
                     params=params,
diff --git a/src/boosting/gbdt.cpp b/src/boosting/gbdt.cpp
index 5be3b9765bc4..86a8a5a3ca65 100644
--- a/src/boosting/gbdt.cpp
+++ b/src/boosting/gbdt.cpp
@@ -30,6 +30,7 @@ GBDT::GBDT()
       config_(nullptr),
       objective_function_(nullptr),
       early_stopping_round_(0),
+      early_stopping_min_delta_(0.0),
       es_first_metric_only_(false),
       max_feature_idx_(0),
       num_tree_per_iteration_(1),
@@ -65,6 +66,7 @@ void GBDT::Init(const Config* config, const Dataset* train_data, const Objective
   num_class_ = config->num_class;
   config_ = std::unique_ptr<Config>(new Config(*config));
   early_stopping_round_ = config_->early_stopping_round;
+  early_stopping_min_delta_ = config->early_stopping_min_delta;
   es_first_metric_only_ = config_->first_metric_only;
   shrinkage_rate_ = config_->learning_rate;
 
@@ -576,7 +578,7 @@ std::string GBDT::OutputMetric(int iter) {
         if (es_first_metric_only_ && j > 0) { continue; }
         if (ret.empty() && early_stopping_round_ > 0) {
           auto cur_score = valid_metrics_[i][j]->factor_to_bigger_better() * test_scores.back();
-          if (cur_score > best_score_[i][j]) {
+          if (cur_score - best_score_[i][j] > early_stopping_min_delta_) {
             best_score_[i][j] = cur_score;
             best_iter_[i][j] = iter;
             meet_early_stopping_pairs.emplace_back(i, j);
diff --git a/src/boosting/gbdt.h b/src/boosting/gbdt.h
index 28ebee446fad..4557830fa863 100644
--- a/src/boosting/gbdt.h
+++ b/src/boosting/gbdt.h
@@ -532,6 +532,8 @@ class GBDT : public GBDTBase {
   std::vector<std::vector<const Metric*>> valid_metrics_;
   /*! \brief Number of rounds for early stopping */
   int early_stopping_round_;
+  /*! \brief Minimum improvement for early stopping */
+  double early_stopping_min_delta_;
   /*! \brief Only use first metric for early stopping */
   bool es_first_metric_only_;
   /*! \brief Best iteration(s) for early stopping */
diff --git a/src/io/config_auto.cpp b/src/io/config_auto.cpp
index 394614af3f33..ca4fda1c3d4c 100644
--- a/src/io/config_auto.cpp
+++ b/src/io/config_auto.cpp
@@ -214,6 +214,7 @@ const std::unordered_set<std::string>& Config::parameter_set() {
   "extra_trees",
   "extra_seed",
   "early_stopping_round",
+  "early_stopping_min_delta",
   "first_metric_only",
   "max_delta_step",
   "lambda_l1",
@@ -392,6 +393,9 @@ void Config::GetMembersFromString(const std::unordered_map<std::string, std::str
 
   GetInt(params, "early_stopping_round", &early_stopping_round);
 
+  GetDouble(params, "early_stopping_min_delta", &early_stopping_min_delta);
+  CHECK_GE(early_stopping_min_delta, 0.0);
+
   GetBool(params, "first_metric_only", &first_metric_only);
 
   GetDouble(params, "max_delta_step", &max_delta_step);
@@ -690,6 +694,7 @@ std::string Config::SaveMembersToString() const {
   str_buf << "[extra_trees: " << extra_trees << "]\n";
   str_buf << "[extra_seed: " << extra_seed << "]\n";
   str_buf << "[early_stopping_round: " << early_stopping_round << "]\n";
+  str_buf << "[early_stopping_min_delta: " << early_stopping_min_delta << "]\n";
   str_buf << "[first_metric_only: " << first_metric_only << "]\n";
   str_buf << "[max_delta_step: " << max_delta_step << "]\n";
   str_buf << "[lambda_l1: " << lambda_l1 << "]\n";
@@ -814,6 +819,7 @@ const std::unordered_map<std::string, std::vector<std::string>>& Config::paramet
     {"extra_trees", {"extra_tree"}},
     {"extra_seed", {}},
     {"early_stopping_round", {"early_stopping_rounds", "early_stopping", "n_iter_no_change"}},
+    {"early_stopping_min_delta", {}},
     {"first_metric_only", {}},
     {"max_delta_step", {"max_tree_output", "max_leaf_output"}},
     {"lambda_l1", {"reg_alpha", "l1_regularization"}},
@@ -957,6 +963,7 @@ const std::unordered_map<std::string, std::string>& Config::ParameterTypes() {
     {"extra_trees", "bool"},
     {"extra_seed", "int"},
     {"early_stopping_round", "int"},
+    {"early_stopping_min_delta", "double"},
     {"first_metric_only", "bool"},
     {"max_delta_step", "double"},
     {"lambda_l1", "double"},
diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py
index 05c5792b1836..29210b94b4a1 100644
--- a/tests/python_package_test/test_engine.py
+++ b/tests/python_package_test/test_engine.py
@@ -1067,6 +1067,29 @@ def test_early_stopping_min_delta(first_only, single_metric, greater_is_better):
         assert np.greater_equal(last_score, best_score - min_delta).any()
 
 
+@pytest.mark.parametrize("early_stopping_min_delta", [1e3, 0.0])
+def test_early_stopping_min_delta_via_global_params(early_stopping_min_delta):
+    X, y = load_breast_cancer(return_X_y=True)
+    num_trees = 5
+    params = {
+        "num_trees": num_trees,
+        "num_leaves": 5,
+        "objective": "binary",
+        "metric": "None",
+        "verbose": -1,
+        "early_stopping_round": 2,
+        "early_stopping_min_delta": early_stopping_min_delta,
+    }
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
+    lgb_train = lgb.Dataset(X_train, y_train)
+    lgb_eval = lgb.Dataset(X_test, y_test, reference=lgb_train)
+    gbm = lgb.train(params, lgb_train, feval=decreasing_metric, valid_sets=lgb_eval)
+    if early_stopping_min_delta == 0:
+        assert gbm.best_iteration == num_trees
+    else:
+        assert gbm.best_iteration == 1
+
+
 def test_early_stopping_can_be_triggered_via_custom_callback():
     X, y = make_synthetic_regression()
 
@@ -1556,6 +1579,7 @@ def test_all_expected_params_are_written_out_to_model_text(tmp_path):
         "[extra_trees: 0]",
         "[extra_seed: 6642]",
         "[early_stopping_round: 0]",
+        "[early_stopping_min_delta: 0]",
         "[first_metric_only: 0]",
         "[max_delta_step: 0]",
         "[lambda_l1: 0]",

From 6e78e697f8e27d4bbf493a0ebd9134c3254a2ecd Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Fri, 3 May 2024 22:37:27 -0500
Subject: [PATCH 05/41] [c++] remove uses of '..' in headers (#6409)

---
 CMakeLists.txt                  | 20 +++++++++++++++++++-
 build-cran-package.sh           | 16 +++-------------
 include/LightGBM/utils/common.h |  4 ++--
 3 files changed, 24 insertions(+), 16 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index ad7e44067490..6ae40950d750 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -114,6 +114,12 @@ include_directories(${EIGEN_DIR})
 add_definitions(-DEIGEN_MPL2_ONLY)
 add_definitions(-DEIGEN_DONT_PARALLELIZE)
 
+set(FAST_DOUBLE_PARSER_INCLUDE_DIR "${PROJECT_SOURCE_DIR}/external_libs/fast_double_parser/include")
+include_directories(${FAST_DOUBLE_PARSER_INCLUDE_DIR})
+
+set(FMT_INCLUDE_DIR "${PROJECT_SOURCE_DIR}/external_libs/fmt/include")
+include_directories(${FMT_INCLUDE_DIR})
+
 if(__BUILD_FOR_R)
     find_package(LibR REQUIRED)
     message(STATUS "LIBR_EXECUTABLE: ${LIBR_EXECUTABLE}")
@@ -702,5 +708,17 @@ install(
 )
 
 if(INSTALL_HEADERS)
-    install(DIRECTORY ${LightGBM_HEADER_DIR}/LightGBM DESTINATION ${CMAKE_INSTALL_PREFIX}/include)
+    install(
+      DIRECTORY ${LightGBM_HEADER_DIR}/LightGBM
+      DESTINATION ${CMAKE_INSTALL_PREFIX}/include
+    )
+    install(
+      FILES ${FAST_DOUBLE_PARSER_INCLUDE_DIR}/fast_double_parser.h
+      DESTINATION ${CMAKE_INSTALL_PREFIX}/include/LightGBM/utils
+    )
+    install(
+      DIRECTORY ${FMT_INCLUDE_DIR}/
+      DESTINATION ${CMAKE_INSTALL_PREFIX}/include/LightGBM/utils
+      FILES_MATCHING PATTERN "*.h"
+    )
 endif()
diff --git a/build-cran-package.sh b/build-cran-package.sh
index 89c6606803c5..2ed2e6e9c4ca 100755
--- a/build-cran-package.sh
+++ b/build-cran-package.sh
@@ -76,12 +76,12 @@ fi
 
 cp \
     external_libs/fast_double_parser/include/fast_double_parser.h \
-    "${TEMP_R_DIR}/src/include/LightGBM"
+    "${TEMP_R_DIR}/src/include/LightGBM/utils"
 
-mkdir -p "${TEMP_R_DIR}/src/include/LightGBM/fmt"
+mkdir -p "${TEMP_R_DIR}/src/include/LightGBM/utils/fmt"
 cp \
     external_libs/fmt/include/fmt/*.h \
-    "${TEMP_R_DIR}/src/include/LightGBM/fmt/"
+    "${TEMP_R_DIR}/src/include/LightGBM/utils/fmt"
 
 # including only specific files from Eigen, to keep the R package
 # small and avoid redistributing code with licenses incompatible with
@@ -155,16 +155,6 @@ cd "${TEMP_R_DIR}"
     done
     find . -name '*.h.bak' -o -name '*.hpp.bak' -o -name '*.cpp.bak' -exec rm {} \;
 
-    sed \
-        -i.bak \
-        -e 's/\.\..*fmt\/format\.h/LightGBM\/fmt\/format\.h/' \
-        src/include/LightGBM/utils/common.h
-
-    sed \
-        -i.bak \
-        -e 's/\.\..*fast_double_parser\.h/LightGBM\/fast_double_parser\.h/' \
-        src/include/LightGBM/utils/common.h
-
     # 'processx' is listed as a 'Suggests' dependency in DESCRIPTION
     # because it is used in install.libs.R, a file that is not
     # included in the CRAN distribution of the package
diff --git a/include/LightGBM/utils/common.h b/include/LightGBM/utils/common.h
index f38375fb7370..6c3ebf5d0096 100644
--- a/include/LightGBM/utils/common.h
+++ b/include/LightGBM/utils/common.h
@@ -30,8 +30,8 @@
 #include <vector>
 
 #define FMT_HEADER_ONLY
-#include "../../../external_libs/fast_double_parser/include/fast_double_parser.h"
-#include "../../../external_libs/fmt/include/fmt/format.h"
+#include "fast_double_parser.h"
+#include "fmt/format.h"
 
 #ifdef _MSC_VER
 #include <intrin.h>

From f539536073f49dd2d266d8bd5cf7bbed6441a862 Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Mon, 6 May 2024 12:50:48 -0500
Subject: [PATCH 06/41] [docs] ensure pkgdown site index accurately reflects
 R-package public API (fixes #6444) (#6445)

---
 R-package/pkgdown/_pkgdown.yml | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/R-package/pkgdown/_pkgdown.yml b/R-package/pkgdown/_pkgdown.yml
index 99a3b1010d41..c2d6718a2926 100644
--- a/R-package/pkgdown/_pkgdown.yml
+++ b/R-package/pkgdown/_pkgdown.yml
@@ -63,7 +63,6 @@ reference:
     - '`dimnames.lgb.Dataset`'
     - '`get_field`'
     - '`set_field`'
-    - '`slice`'
     - '`lgb.Dataset`'
     - '`lgb.Dataset.construct`'
     - '`lgb.Dataset.create.valid`'
@@ -71,6 +70,7 @@ reference:
     - '`lgb.Dataset.set.categorical`'
     - '`lgb.Dataset.set.reference`'
     - '`lgb.convert_with_rules`'
+    - '`lgb.slice.Dataset`'
   - title: Machine Learning
     desc: Train models with LightGBM and then use them to make predictions on new data
     contents:
@@ -78,6 +78,7 @@ reference:
     - '`lgb.train`'
     - '`predict.lgb.Booster`'
     - '`lgb.cv`'
+    - '`lgb.configure_fast_predict`'
   - title: Saving / Loading Models
     desc: Save and load LightGBM models
     contents:
@@ -85,6 +86,9 @@ reference:
     - '`lgb.save`'
     - '`lgb.load`'
     - '`lgb.model.dt.tree`'
+    - '`lgb.drop_serialized`'
+    - '`lgb.make_serializable`'
+    - '`lgb.restore_handle`'
   - title: Model Interpretation
     desc: Analyze your models
     contents:
@@ -93,3 +97,10 @@ reference:
     - '`lgb.interprete`'
     - '`lgb.plot.importance`'
     - '`lgb.plot.interpretation`'
+    - '`print.lgb.Booster`'
+    - '`summary.lgb.Booster`'
+  - title: Multithreading Control
+    desc: Manage degree of parallelism used by LightGBM
+    contents:
+    - '`getLGBMThreads`'
+    - '`setLGBMThreads`'

From 88cec4776e621ac93f9ba03aa0015035570545da Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Tue, 7 May 2024 12:36:59 -0500
Subject: [PATCH 07/41] deprecate HDFS support (#6443)

---
 CMakeLists.txt              | 6 ++++++
 docs/Installation-Guide.rst | 4 ++++
 python-package/README.rst   | 4 ++++
 3 files changed, 14 insertions(+)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 6ae40950d750..2bb8e6b41794 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -286,6 +286,12 @@ if(USE_CUDA)
 endif()
 
 if(USE_HDFS)
+    message(
+      DEPRECATION
+      "HDFS support in LightGBM is deprecated, and will be removed in a future release.\
+      See https://github.com/microsoft/LightGBM/issues/6436.
+      "
+    )
     find_package(JNI REQUIRED)
     find_path(HDFS_INCLUDE_DIR hdfs.h REQUIRED)
     find_library(HDFS_LIB NAMES hdfs REQUIRED)
diff --git a/docs/Installation-Guide.rst b/docs/Installation-Guide.rst
index d6362f1c2103..c59898032b70 100644
--- a/docs/Installation-Guide.rst
+++ b/docs/Installation-Guide.rst
@@ -631,6 +631,10 @@ Use the GPU version (``device_type=gpu``) for GPU acceleration on Windows.
 Build HDFS Version
 ~~~~~~~~~~~~~~~~~~
 
+.. warning::
+   HDFS support in LightGBM is deprecated, and will be removed in a future release.
+   See https://github.com/microsoft/LightGBM/issues/6436.
+
 The HDFS version of LightGBM was tested on CDH-5.14.4 cluster.
 
 Linux
diff --git a/python-package/README.rst b/python-package/README.rst
index c3b73ffdf5d1..68f267ec659e 100644
--- a/python-package/README.rst
+++ b/python-package/README.rst
@@ -160,6 +160,10 @@ To use the CUDA version within Python, pass ``{"device": "cuda"}`` respectively
 Build HDFS Version
 ~~~~~~~~~~~~~~~~~~
 
+.. warning::
+   HDFS support in LightGBM is deprecated, and will be removed in a future release.
+   See https://github.com/microsoft/LightGBM/issues/6436.
+
 .. code:: sh
 
     pip install lightgbm --config-settings=cmake.define.USE_HDFS=ON

From 20f20928f016e197da74f104a76870c5883cc778 Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Wed, 8 May 2024 16:08:29 -0500
Subject: [PATCH 08/41] [python-package] upgrade to scikit-build-core 0.9.3
 (#6263)

---
 python-package/pyproject.toml | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/python-package/pyproject.toml b/python-package/pyproject.toml
index 0f07c8978532..387fbd2e407a 100644
--- a/python-package/pyproject.toml
+++ b/python-package/pyproject.toml
@@ -57,26 +57,30 @@ changelog = "https://github.com/microsoft/LightGBM/releases"
 # start:build-system
 [build-system]
 
-requires = ["scikit-build-core>=0.4.4"]
+requires = ["scikit-build-core>=0.9.3"]
 build-backend = "scikit_build_core.build"
 
 # based on https://github.com/scikit-build/scikit-build-core#configuration
 [tool.scikit-build]
 
-cmake.minimum-version = "3.18"
-ninja.minimum-version = "1.11"
+cmake.version = ">=3.18"
+ninja.version = ">=1.11"
 ninja.make-fallback = true
 cmake.args = [
     "-D__BUILD_FOR_PYTHON:BOOL=ON"
 ]
 cmake.verbose = false
 cmake.build-type = "Release"
+cmake.targets = ["_lightgbm"]
+# stripping binaries should be turned back on once this is fixed:
+# https://github.com/jameslamb/pydistcheck/issues/235
+install.strip = false
 logging.level = "INFO"
 sdist.reproducible = true
 wheel.py-api = "py3"
 experimental = false
 strict-config = true
-minimum-version = "0.4.4"
+minimum-version = "0.9.3"
 
 # end:build-system
 

From ae55f32b5ab86bbc9f35673eee9ba93966c6cf12 Mon Sep 17 00:00:00 2001
From: Oliver Borchert <oliver.borchert@quantco.com>
Date: Fri, 10 May 2024 05:55:40 +0200
Subject: [PATCH 09/41] [ci] Add dependabot configuration (#6449)

---
 .github/dependabot.yml | 14 ++++++++++++++
 1 file changed, 14 insertions(+)
 create mode 100644 .github/dependabot.yml

diff --git a/.github/dependabot.yml b/.github/dependabot.yml
new file mode 100644
index 000000000000..9447519369c0
--- /dev/null
+++ b/.github/dependabot.yml
@@ -0,0 +1,14 @@
+version: 2
+updates:
+  - package-ecosystem: github-actions
+    directory: /
+    schedule:
+      interval: monthly
+    groups:
+      ci-dependencies:
+        patterns:
+          - "*"
+    commit-message:
+      prefix: "[ci]"
+    labels:
+      - maintenance

From a70e832782445e5f49f2a5400896f2dc21475037 Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Fri, 10 May 2024 19:26:39 -0500
Subject: [PATCH 10/41] [R-package] [python-package] deprecate Dataset
 arguments to cv() and train() (#6446)

---
 R-package/R/lgb.cv.R                          | 37 ++++++++++++++++---
 R-package/R/lgb.train.R                       | 23 ++++++++++--
 R-package/R/lightgbm.R                        | 21 ++++++++---
 R-package/R/utils.R                           | 16 ++++++++
 R-package/man/lgb.cv.Rd                       | 19 +++++++---
 R-package/man/lgb.train.Rd                    | 15 ++++++--
 R-package/man/lightgbm.Rd                     | 13 +++++--
 R-package/tests/testthat/test_basic.R         | 22 ++++++-----
 examples/python-guide/advanced_example.py     | 12 +++---
 .../notebooks/interactive_plot_example.ipynb  |  9 +++--
 examples/python-guide/plot_example.py         |  9 +++--
 python-package/lightgbm/engine.py             | 29 +++++++++++++++
 python-package/lightgbm/sklearn.py            |  2 +-
 tests/python_package_test/test_engine.py      | 11 +++---
 tests/python_package_test/test_utilities.py   |  8 ++--
 15 files changed, 185 insertions(+), 61 deletions(-)

diff --git a/R-package/R/lgb.cv.R b/R-package/R/lgb.cv.R
index 13685e7f2204..c22d0ea848bb 100644
--- a/R-package/R/lgb.cv.R
+++ b/R-package/R/lgb.cv.R
@@ -25,8 +25,8 @@ CVBooster <- R6::R6Class(
 #' @description Cross validation logic used by LightGBM
 #' @inheritParams lgb_shared_params
 #' @param nfold the original dataset is randomly partitioned into \code{nfold} equal size subsamples.
-#' @param label Vector of labels, used if \code{data} is not an \code{\link{lgb.Dataset}}
-#' @param weight vector of response values. If not NULL, will set to dataset
+#' @param label Deprecated. See "Deprecated Arguments" section below.
+#' @param weight Deprecated. See "Deprecated Arguments" section below.
 #' @param record Boolean, TRUE will record iteration message to \code{booster$record_evals}
 #' @param showsd \code{boolean}, whether to show standard deviation of cross validation.
 #'               This parameter defaults to \code{TRUE}. Setting it to \code{FALSE} can lead to a
@@ -36,10 +36,8 @@ CVBooster <- R6::R6Class(
 #' @param folds \code{list} provides a possibility to use a list of pre-defined CV folds
 #'              (each element must be a vector of test fold's indices). When folds are supplied,
 #'              the \code{nfold} and \code{stratified} parameters are ignored.
-#' @param colnames feature names, if not null, will use this to overwrite the names in dataset
-#' @param categorical_feature categorical features. This can either be a character vector of feature
-#'                            names or an integer vector with the indices of the features (e.g.
-#'                            \code{c(1L, 10L)} to say "the first and tenth columns").
+#' @param colnames Deprecated. See "Deprecated Arguments" section below.
+#' @param categorical_feature Deprecated. See "Deprecated Arguments" section below.
 #' @param callbacks List of callback functions that are applied at each iteration.
 #' @param reset_data Boolean, setting it to TRUE (not the default value) will transform the booster model
 #'                   into a predictor model which frees up memory and the original datasets
@@ -70,6 +68,13 @@ CVBooster <- R6::R6Class(
 #'   , nfold = 3L
 #' )
 #' }
+#'
+#' @section Deprecated Arguments:
+#'
+#' A future release of \code{lightgbm} will require passing an \code{lgb.Dataset}
+#' to argument \code{'data'}. It will also remove support for passing arguments
+#' \code{'categorical_feature'}, \code{'colnames'}, \code{'label'}, and \code{'weight'}.
+#'
 #' @importFrom data.table data.table setorderv
 #' @export
 lgb.cv <- function(params = list()
@@ -102,12 +107,32 @@ lgb.cv <- function(params = list()
 
   # If 'data' is not an lgb.Dataset, try to construct one using 'label'
   if (!.is_Dataset(x = data)) {
+    warning(paste0(
+      "Passing anything other than an lgb.Dataset object to lgb.cv() is deprecated. "
+      , "Either pass an lgb.Dataset object, or use lightgbm()."
+    ))
     if (is.null(label)) {
       stop("'label' must be provided for lgb.cv if 'data' is not an 'lgb.Dataset'")
     }
     data <- lgb.Dataset(data = data, label = label)
   }
 
+  # raise deprecation warnings if necessary
+  # ref: https://github.com/microsoft/LightGBM/issues/6435
+  args <- names(match.call())
+  if ("categorical_feature" %in% args) {
+    .emit_dataset_kwarg_warning("categorical_feature", "lgb.cv")
+  }
+  if ("colnames" %in% args) {
+    .emit_dataset_kwarg_warning("colnames", "lgb.cv")
+  }
+  if ("label" %in% args) {
+    .emit_dataset_kwarg_warning("label", "lgb.cv")
+  }
+  if ("weight" %in% args) {
+    .emit_dataset_kwarg_warning("weight", "lgb.cv")
+  }
+
   # set some parameters, resolving the way they were passed in with other parameters
   # in `params`.
   # this ensures that the model stored with Booster$save() correctly represents
diff --git a/R-package/R/lgb.train.R b/R-package/R/lgb.train.R
index 8a299fb6b8ac..dafb4d83b66b 100644
--- a/R-package/R/lgb.train.R
+++ b/R-package/R/lgb.train.R
@@ -6,10 +6,8 @@
 #' @inheritParams lgb_shared_params
 #' @param valids a list of \code{lgb.Dataset} objects, used for validation
 #' @param record Boolean, TRUE will record iteration message to \code{booster$record_evals}
-#' @param colnames feature names, if not null, will use this to overwrite the names in dataset
-#' @param categorical_feature categorical features. This can either be a character vector of feature
-#'                            names or an integer vector with the indices of the features (e.g.
-#'                            \code{c(1L, 10L)} to say "the first and tenth columns").
+#' @param colnames Deprecated. See "Deprecated Arguments" section below.
+#' @param categorical_feature Deprecated. See "Deprecated Arguments" section below.
 #' @param callbacks List of callback functions that are applied at each iteration.
 #' @param reset_data Boolean, setting it to TRUE (not the default value) will transform the
 #'                   booster model into a predictor model which frees up memory and the
@@ -43,6 +41,13 @@
 #'   , early_stopping_rounds = 3L
 #' )
 #' }
+#'
+#' @section Deprecated Arguments:
+#'
+#' A future release of \code{lightgbm} will remove support for passing arguments
+#' \code{'categorical_feature'} and \code{'colnames'}. Pass those things to
+#' \code{lgb.Dataset} instead.
+#'
 #' @export
 lgb.train <- function(params = list(),
                       data,
@@ -78,6 +83,16 @@ lgb.train <- function(params = list(),
     }
   }
 
+  # raise deprecation warnings if necessary
+  # ref: https://github.com/microsoft/LightGBM/issues/6435
+  args <- names(match.call())
+  if ("categorical_feature" %in% args) {
+    .emit_dataset_kwarg_warning("categorical_feature", "lgb.train")
+  }
+  if ("colnames" %in% args) {
+    .emit_dataset_kwarg_warning("colnames", "lgb.train")
+  }
+
   # set some parameters, resolving the way they were passed in with other parameters
   # in `params`.
   # this ensures that the model stored with Booster$save() correctly represents
diff --git a/R-package/R/lightgbm.R b/R-package/R/lightgbm.R
index f1a0090f950a..efa593ffe12f 100644
--- a/R-package/R/lightgbm.R
+++ b/R-package/R/lightgbm.R
@@ -144,6 +144,12 @@ NULL
 #'
 #'                    \emph{New in version 4.0.0}
 #'
+#' @param colnames Character vector of features. Only used if \code{data} is not an \code{\link{lgb.Dataset}}.
+#' @param categorical_feature categorical features. This can either be a character vector of feature
+#'                            names or an integer vector with the indices of the features (e.g.
+#'                            \code{c(1L, 10L)} to say "the first and tenth columns").
+#'                            Only used if \code{data} is not an \code{\link{lgb.Dataset}}.
+#'
 #' @param ... Additional arguments passed to \code{\link{lgb.train}}. For example
 #'     \itemize{
 #'        \item{\code{valids}: a list of \code{lgb.Dataset} objects, used for validation}
@@ -152,10 +158,6 @@ NULL
 #'                    \code{binary}, \code{lambdarank}, \code{multiclass}, \code{multiclass}}
 #'        \item{\code{eval}: evaluation function, can be (a list of) character or custom eval function}
 #'        \item{\code{record}: Boolean, TRUE will record iteration message to \code{booster$record_evals}}
-#'        \item{\code{colnames}: feature names, if not null, will use this to overwrite the names in dataset}
-#'        \item{\code{categorical_feature}: categorical features. This can either be a character vector of feature
-#'                            names or an integer vector with the indices of the features (e.g. \code{c(1L, 10L)} to
-#'                            say "the first and tenth columns").}
 #'        \item{\code{reset_data}: Boolean, setting it to TRUE (not the default value) will transform the booster model
 #'                          into a predictor model which frees up memory and the original datasets}
 #'     }
@@ -176,6 +178,8 @@ lightgbm <- function(data,
                      objective = "auto",
                      init_score = NULL,
                      num_threads = NULL,
+                     colnames = NULL,
+                     categorical_feature = NULL,
                      ...) {
 
   # validate inputs early to avoid unnecessary computation
@@ -221,7 +225,14 @@ lightgbm <- function(data,
 
   # Check whether data is lgb.Dataset, if not then create lgb.Dataset manually
   if (!.is_Dataset(x = dtrain)) {
-    dtrain <- lgb.Dataset(data = data, label = label, weight = weights, init_score = init_score)
+    dtrain <- lgb.Dataset(
+      data = data
+      , label = label
+      , weight = weights
+      , init_score = init_score
+      , categorical_feature = categorical_feature
+      , colnames = colnames
+    )
   }
 
   train_args <- list(
diff --git a/R-package/R/utils.R b/R-package/R/utils.R
index 1ac6f197ca77..646a306c97f6 100644
--- a/R-package/R/utils.R
+++ b/R-package/R/utils.R
@@ -260,3 +260,19 @@
     return(a == b)
   }
 }
+
+# ref: https://github.com/microsoft/LightGBM/issues/6435
+.emit_dataset_kwarg_warning <- function(calling_function, argname) {
+  msg <- sprintf(
+    paste0(
+      "Argument '%s' to %s() is deprecated and will be removed in a future release. "
+      , "Set '%s' with lgb.Dataset() instead. "
+      , "See https://github.com/microsoft/LightGBM/issues/6435."
+    )
+    , argname
+    , calling_function
+    , argname
+  )
+  warning(msg)
+  return(invisible(NULL))
+}
diff --git a/R-package/man/lgb.cv.Rd b/R-package/man/lgb.cv.Rd
index 7ea2928c6166..cee059d494ca 100644
--- a/R-package/man/lgb.cv.Rd
+++ b/R-package/man/lgb.cv.Rd
@@ -41,9 +41,9 @@ may allow you to pass other types of data like \code{matrix} and then separately
 
 \item{nfold}{the original dataset is randomly partitioned into \code{nfold} equal size subsamples.}
 
-\item{label}{Vector of labels, used if \code{data} is not an \code{\link{lgb.Dataset}}}
+\item{label}{Deprecated. See "Deprecated Arguments" section below.}
 
-\item{weight}{vector of response values. If not NULL, will set to dataset}
+\item{weight}{Deprecated. See "Deprecated Arguments" section below.}
 
 \item{obj}{objective function, can be character or custom objective function. Examples include
 \code{regression}, \code{regression_l1}, \code{huber},
@@ -103,11 +103,9 @@ the \code{nfold} and \code{stratified} parameters are ignored.}
 
 \item{init_model}{path of model file or \code{lgb.Booster} object, will continue training from this model}
 
-\item{colnames}{feature names, if not null, will use this to overwrite the names in dataset}
+\item{colnames}{Deprecated. See "Deprecated Arguments" section below.}
 
-\item{categorical_feature}{categorical features. This can either be a character vector of feature
-names or an integer vector with the indices of the features (e.g.
-\code{c(1L, 10L)} to say "the first and tenth columns").}
+\item{categorical_feature}{Deprecated. See "Deprecated Arguments" section below.}
 
 \item{early_stopping_rounds}{int. Activates early stopping. When this parameter is non-null,
 training will stop if the evaluation of any metric on any validation set
@@ -133,6 +131,14 @@ a trained model \code{lgb.CVBooster}.
 \description{
 Cross validation logic used by LightGBM
 }
+\section{Deprecated Arguments}{
+
+
+A future release of \code{lightgbm} will require passing an \code{lgb.Dataset}
+to argument \code{'data'}. It will also remove support for passing arguments
+\code{'categorical_feature'}, \code{'colnames'}, \code{'label'}, and \code{'weight'}.
+}
+
 \section{Early Stopping}{
 
 
@@ -171,4 +177,5 @@ model <- lgb.cv(
   , nfold = 3L
 )
 }
+
 }
diff --git a/R-package/man/lgb.train.Rd b/R-package/man/lgb.train.Rd
index 557c85b7f9dc..ebbfc206998e 100644
--- a/R-package/man/lgb.train.Rd
+++ b/R-package/man/lgb.train.Rd
@@ -82,11 +82,9 @@ printing of evaluation during training}
 
 \item{init_model}{path of model file or \code{lgb.Booster} object, will continue training from this model}
 
-\item{colnames}{feature names, if not null, will use this to overwrite the names in dataset}
+\item{colnames}{Deprecated. See "Deprecated Arguments" section below.}
 
-\item{categorical_feature}{categorical features. This can either be a character vector of feature
-names or an integer vector with the indices of the features (e.g.
-\code{c(1L, 10L)} to say "the first and tenth columns").}
+\item{categorical_feature}{Deprecated. See "Deprecated Arguments" section below.}
 
 \item{early_stopping_rounds}{int. Activates early stopping. When this parameter is non-null,
 training will stop if the evaluation of any metric on any validation set
@@ -111,6 +109,14 @@ Low-level R interface to train a LightGBM model. Unlike \code{\link{lightgbm}},
              this function is focused on performance (e.g. speed, memory efficiency). It is also
              less likely to have breaking API changes in new releases than \code{\link{lightgbm}}.
 }
+\section{Deprecated Arguments}{
+
+
+A future release of \code{lightgbm} will remove support for passing arguments
+\code{'categorical_feature'} and \code{'colnames'}. Pass those things to
+\code{lgb.Dataset} instead.
+}
+
 \section{Early Stopping}{
 
 
@@ -154,4 +160,5 @@ model <- lgb.train(
   , early_stopping_rounds = 3L
 )
 }
+
 }
diff --git a/R-package/man/lightgbm.Rd b/R-package/man/lightgbm.Rd
index 09d7704605c1..90cb3166bf5c 100644
--- a/R-package/man/lightgbm.Rd
+++ b/R-package/man/lightgbm.Rd
@@ -19,6 +19,8 @@ lightgbm(
   objective = "auto",
   init_score = NULL,
   num_threads = NULL,
+  colnames = NULL,
+  categorical_feature = NULL,
   ...
 )
 }
@@ -96,6 +98,13 @@ set to the iteration number of the best iteration.}
 
                    \emph{New in version 4.0.0}}
 
+\item{colnames}{Character vector of features. Only used if \code{data} is not an \code{\link{lgb.Dataset}}.}
+
+\item{categorical_feature}{categorical features. This can either be a character vector of feature
+names or an integer vector with the indices of the features (e.g.
+\code{c(1L, 10L)} to say "the first and tenth columns").
+Only used if \code{data} is not an \code{\link{lgb.Dataset}}.}
+
 \item{...}{Additional arguments passed to \code{\link{lgb.train}}. For example
 \itemize{
    \item{\code{valids}: a list of \code{lgb.Dataset} objects, used for validation}
@@ -104,10 +113,6 @@ set to the iteration number of the best iteration.}
                \code{binary}, \code{lambdarank}, \code{multiclass}, \code{multiclass}}
    \item{\code{eval}: evaluation function, can be (a list of) character or custom eval function}
    \item{\code{record}: Boolean, TRUE will record iteration message to \code{booster$record_evals}}
-   \item{\code{colnames}: feature names, if not null, will use this to overwrite the names in dataset}
-   \item{\code{categorical_feature}: categorical features. This can either be a character vector of feature
-                       names or an integer vector with the indices of the features (e.g. \code{c(1L, 10L)} to
-                       say "the first and tenth columns").}
    \item{\code{reset_data}: Boolean, setting it to TRUE (not the default value) will transform the booster model
                      into a predictor model which frees up memory and the original datasets}
 }}
diff --git a/R-package/tests/testthat/test_basic.R b/R-package/tests/testthat/test_basic.R
index 75abd26dd152..74c46dcef141 100644
--- a/R-package/tests/testthat/test_basic.R
+++ b/R-package/tests/testthat/test_basic.R
@@ -433,7 +433,7 @@ test_that("lgb.cv() rejects negative or 0 value passed to nrounds", {
   }
 })
 
-test_that("lgb.cv() throws an informative error is 'data' is not an lgb.Dataset and labels are not given", {
+test_that("lgb.cv() throws an informative error if 'data' is not an lgb.Dataset and labels are not given", {
   bad_values <- list(
     4L
     , "hello"
@@ -1788,11 +1788,6 @@ test_that("lgb.train() works with early stopping for regression with a metric th
 
 
 test_that("lgb.train() supports non-ASCII feature names", {
-  dtrain <- lgb.Dataset(
-    data = matrix(rnorm(400L), ncol =  4L)
-    , label = rnorm(100L)
-    , params = list(num_threads = .LGB_MAX_THREADS)
-  )
   # content below is equivalent to
   #
   #  feature_names <- c("F_零", "F_一", "F_二", "F_三")
@@ -1805,6 +1800,12 @@ test_that("lgb.train() supports non-ASCII feature names", {
     , rawToChar(as.raw(c(0x46, 0x5f, 0xe4, 0xba, 0x8c)))
     , rawToChar(as.raw(c(0x46, 0x5f, 0xe4, 0xb8, 0x89)))
   )
+  dtrain <- lgb.Dataset(
+    data = matrix(rnorm(400L), ncol =  4L)
+    , label = rnorm(100L)
+    , params = list(num_threads = .LGB_MAX_THREADS)
+    , colnames = feature_names
+  )
   bst <- lgb.train(
     data = dtrain
     , nrounds = 5L
@@ -1814,7 +1815,6 @@ test_that("lgb.train() supports non-ASCII feature names", {
       , verbose = .LGB_VERBOSITY
       , num_threads = .LGB_MAX_THREADS
     )
-    , colnames = feature_names
   )
   expect_true(.is_Booster(bst))
   dumped_model <- jsonlite::fromJSON(bst$dump_model())
@@ -2838,7 +2838,11 @@ test_that(paste0("lgb.train() gives same result when interaction_constraints is
 
 test_that(paste0("lgb.train() gives same results when using interaction_constraints and specifying colnames"), {
   set.seed(1L)
-  dtrain <- lgb.Dataset(train$data, label = train$label, params = list(num_threads = .LGB_MAX_THREADS))
+  dtrain <- lgb.Dataset(
+    train$data
+    , label = train$label
+    , params = list(num_threads = .LGB_MAX_THREADS)
+  )
 
   params <- list(
     objective = "regression"
@@ -2854,6 +2858,7 @@ test_that(paste0("lgb.train() gives same results when using interaction_constrai
   pred1 <- bst$predict(test$data)
 
   new_colnames <- paste0(colnames(train$data), "_x")
+  dtrain$set_colnames(new_colnames)
   params <- list(
     objective = "regression"
     , interaction_constraints = list(c(new_colnames[1L], new_colnames[2L]), new_colnames[3L])
@@ -2864,7 +2869,6 @@ test_that(paste0("lgb.train() gives same results when using interaction_constrai
     data = dtrain
     , params = params
     , nrounds = 2L
-    , colnames = new_colnames
   )
   pred2 <- bst$predict(test$data)
 
diff --git a/examples/python-guide/advanced_example.py b/examples/python-guide/advanced_example.py
index 4f0263286237..601a04d01481 100644
--- a/examples/python-guide/advanced_example.py
+++ b/examples/python-guide/advanced_example.py
@@ -25,9 +25,14 @@
 
 num_train, num_feature = X_train.shape
 
+# generate feature names
+feature_name = [f"feature_{col}" for col in range(num_feature)]
+
 # create dataset for lightgbm
 # if you want to re-use data, remember to set free_raw_data=False
-lgb_train = lgb.Dataset(X_train, y_train, weight=W_train, free_raw_data=False)
+lgb_train = lgb.Dataset(
+    X_train, y_train, weight=W_train, feature_name=feature_name, categorical_feature=[21], free_raw_data=False
+)
 lgb_eval = lgb.Dataset(X_test, y_test, reference=lgb_train, weight=W_test, free_raw_data=False)
 
 # specify your configurations as a dict
@@ -43,9 +48,6 @@
     "verbose": 0,
 }
 
-# generate feature names
-feature_name = [f"feature_{col}" for col in range(num_feature)]
-
 print("Starting training...")
 # feature_name and categorical_feature
 gbm = lgb.train(
@@ -53,8 +55,6 @@
     lgb_train,
     num_boost_round=10,
     valid_sets=lgb_train,  # eval training data
-    feature_name=feature_name,
-    categorical_feature=[21],
 )
 
 print("Finished first 10 rounds...")
diff --git a/examples/python-guide/notebooks/interactive_plot_example.ipynb b/examples/python-guide/notebooks/interactive_plot_example.ipynb
index 2cab2ff43881..cc8efa2c187b 100644
--- a/examples/python-guide/notebooks/interactive_plot_example.ipynb
+++ b/examples/python-guide/notebooks/interactive_plot_example.ipynb
@@ -78,7 +78,12 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "lgb_train = lgb.Dataset(X_train, y_train)\n",
+    "lgb_train = lgb.Dataset(\n",
+    "    X_train,\n",
+    "    y_train,\n",
+    "    feature_name=[f\"f{i + 1}\" for i in range(X_train.shape[-1])],\n",
+    "    categorical_feature=[21],\n",
+    ")\n",
     "lgb_test = lgb.Dataset(X_test, y_test, reference=lgb_train)"
    ]
   },
@@ -144,8 +149,6 @@
     "    lgb_train,\n",
     "    num_boost_round=100,\n",
     "    valid_sets=[lgb_train, lgb_test],\n",
-    "    feature_name=[f\"f{i + 1}\" for i in range(X_train.shape[-1])],\n",
-    "    categorical_feature=[21],\n",
     "    callbacks=[lgb.log_evaluation(10), lgb.record_evaluation(evals_result)],\n",
     ")"
    ]
diff --git a/examples/python-guide/plot_example.py b/examples/python-guide/plot_example.py
index efbb971d52a4..eaef1e91b466 100644
--- a/examples/python-guide/plot_example.py
+++ b/examples/python-guide/plot_example.py
@@ -22,7 +22,12 @@
 X_test = df_test.drop(0, axis=1)
 
 # create dataset for lightgbm
-lgb_train = lgb.Dataset(X_train, y_train)
+lgb_train = lgb.Dataset(
+    X_train,
+    y_train,
+    feature_name=[f"f{i + 1}" for i in range(X_train.shape[-1])],
+    categorical_feature=[21],
+)
 lgb_test = lgb.Dataset(X_test, y_test, reference=lgb_train)
 
 # specify your configurations as a dict
@@ -37,8 +42,6 @@
     lgb_train,
     num_boost_round=100,
     valid_sets=[lgb_train, lgb_test],
-    feature_name=[f"f{i + 1}" for i in range(X_train.shape[-1])],
-    categorical_feature=[21],
     callbacks=[lgb.log_evaluation(10), lgb.record_evaluation(evals_result)],
 )
 
diff --git a/python-package/lightgbm/engine.py b/python-package/lightgbm/engine.py
index 4a4ab8b4fd13..74b211f4a426 100644
--- a/python-package/lightgbm/engine.py
+++ b/python-package/lightgbm/engine.py
@@ -2,6 +2,7 @@
 """Library with training routines of LightGBM."""
 import copy
 import json
+import warnings
 from collections import OrderedDict, defaultdict
 from operator import attrgetter
 from pathlib import Path
@@ -13,6 +14,7 @@
 from .basic import (
     Booster,
     Dataset,
+    LGBMDeprecationWarning,
     LightGBMError,
     _choose_param_value,
     _ConfigAliases,
@@ -51,6 +53,15 @@
 ]
 
 
+def _emit_dataset_kwarg_warning(calling_function: str, argname: str) -> None:
+    msg = (
+        f"Argument '{argname}' to {calling_function}() is deprecated and will be removed in "
+        f"a future release. Set '{argname}' when calling lightgbm.Dataset() instead. "
+        "See https://github.com/microsoft/LightGBM/issues/6435."
+    )
+    warnings.warn(msg, category=LGBMDeprecationWarning, stacklevel=2)
+
+
 def train(
     params: Dict[str, Any],
     train_set: Dataset,
@@ -103,9 +114,11 @@ def train(
     init_model : str, pathlib.Path, Booster or None, optional (default=None)
         Filename of LightGBM model or Booster instance used for continue training.
     feature_name : list of str, or 'auto', optional (default="auto")
+        **Deprecated.** Set ``feature_name`` on ``train_set`` instead.
         Feature names.
         If 'auto' and data is pandas DataFrame, data columns names are used.
     categorical_feature : list of str or int, or 'auto', optional (default="auto")
+        **Deprecated.** Set ``categorical_feature`` on ``train_set`` instead.
         Categorical features.
         If list of int, interpreted as indices.
         If list of str, interpreted as feature names (need to specify ``feature_name`` as well).
@@ -166,6 +179,13 @@ def train(
                     f"Item {i} has type '{type(valid_item).__name__}'."
                 )
 
+    # raise deprecation warnings if necessary
+    # ref: https://github.com/microsoft/LightGBM/issues/6435
+    if categorical_feature != "auto":
+        _emit_dataset_kwarg_warning("train", "categorical_feature")
+    if feature_name != "auto":
+        _emit_dataset_kwarg_warning("train", "feature_name")
+
     # create predictor first
     params = copy.deepcopy(params)
     params = _choose_param_value(
@@ -625,9 +645,11 @@ def cv(
     init_model : str, pathlib.Path, Booster or None, optional (default=None)
         Filename of LightGBM model or Booster instance used for continue training.
     feature_name : list of str, or 'auto', optional (default="auto")
+        **Deprecated.** Set ``feature_name`` on ``train_set`` instead.
         Feature names.
         If 'auto' and data is pandas DataFrame, data columns names are used.
     categorical_feature : list of str or int, or 'auto', optional (default="auto")
+        **Deprecated.** Set ``categorical_feature`` on ``train_set`` instead.
         Categorical features.
         If list of int, interpreted as indices.
         If list of str, interpreted as feature names (need to specify ``feature_name`` as well).
@@ -693,6 +715,13 @@ def cv(
     if num_boost_round <= 0:
         raise ValueError(f"num_boost_round must be greater than 0. Got {num_boost_round}.")
 
+    # raise deprecation warnings if necessary
+    # ref: https://github.com/microsoft/LightGBM/issues/6435
+    if categorical_feature != "auto":
+        _emit_dataset_kwarg_warning("cv", "categorical_feature")
+    if feature_name != "auto":
+        _emit_dataset_kwarg_warning("cv", "feature_name")
+
     params = copy.deepcopy(params)
     params = _choose_param_value(
         main_param_name="objective",
diff --git a/python-package/lightgbm/sklearn.py b/python-package/lightgbm/sklearn.py
index 0b4c99933652..1ea7b47c5462 100644
--- a/python-package/lightgbm/sklearn.py
+++ b/python-package/lightgbm/sklearn.py
@@ -862,6 +862,7 @@ def fit(
             group=group,
             init_score=init_score,
             categorical_feature=categorical_feature,
+            feature_name=feature_name,
             params=params,
         )
 
@@ -928,7 +929,6 @@ def _get_meta_data(collection, name, i):
             valid_names=eval_names,
             feval=eval_metrics_callable,  # type: ignore[arg-type]
             init_model=init_model,
-            feature_name=feature_name,
             callbacks=callbacks,
         )
 
diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py
index 29210b94b4a1..7b1009632626 100644
--- a/tests/python_package_test/test_engine.py
+++ b/tests/python_package_test/test_engine.py
@@ -1421,13 +1421,14 @@ def test_cvbooster_picklable(serializer):
 def test_feature_name():
     X_train, y_train = make_synthetic_regression()
     params = {"verbose": -1}
-    lgb_train = lgb.Dataset(X_train, y_train)
     feature_names = [f"f_{i}" for i in range(X_train.shape[-1])]
-    gbm = lgb.train(params, lgb_train, num_boost_round=5, feature_name=feature_names)
+    lgb_train = lgb.Dataset(X_train, y_train, feature_name=feature_names)
+    gbm = lgb.train(params, lgb_train, num_boost_round=5)
     assert feature_names == gbm.feature_name()
     # test feature_names with whitespaces
     feature_names_with_space = [f"f {i}" for i in range(X_train.shape[-1])]
-    gbm = lgb.train(params, lgb_train, num_boost_round=5, feature_name=feature_names_with_space)
+    lgb_train.set_feature_name(feature_names_with_space)
+    gbm = lgb.train(params, lgb_train, num_boost_round=5)
     assert feature_names == gbm.feature_name()
 
 
@@ -1437,9 +1438,9 @@ def test_feature_name_with_non_ascii():
     # This has non-ascii strings.
     feature_names = ["F_零", "F_一", "F_二", "F_三"]
     params = {"verbose": -1}
-    lgb_train = lgb.Dataset(X_train, y_train)
+    lgb_train = lgb.Dataset(X_train, y_train, feature_name=feature_names)
 
-    gbm = lgb.train(params, lgb_train, num_boost_round=5, feature_name=feature_names)
+    gbm = lgb.train(params, lgb_train, num_boost_round=5)
     assert feature_names == gbm.feature_name()
     gbm.save_model("lgb.model")
 
diff --git a/tests/python_package_test/test_utilities.py b/tests/python_package_test/test_utilities.py
index 08208ccfbf4a..3359d060e109 100644
--- a/tests/python_package_test/test_utilities.py
+++ b/tests/python_package_test/test_utilities.py
@@ -25,8 +25,8 @@ def dummy_metric(_, __):
 
     X = np.array([[1, 2, 3], [1, 2, 4], [1, 2, 4], [1, 2, 3]], dtype=np.float32)
     y = np.array([0, 1, 1, 0])
-    lgb_train = lgb.Dataset(X, y)
-    lgb_valid = lgb.Dataset(X, y)  # different object for early-stopping
+    lgb_train = lgb.Dataset(X, y, categorical_feature=[1])
+    lgb_valid = lgb.Dataset(X, y, categorical_feature=[1])  # different object for early-stopping
 
     eval_records = {}
     callbacks = [lgb.record_evaluation(eval_records), lgb.log_evaluation(2), lgb.early_stopping(10)]
@@ -36,7 +36,6 @@ def dummy_metric(_, __):
         num_boost_round=10,
         feval=dummy_metric,
         valid_sets=[lgb_valid],
-        categorical_feature=[1],
         callbacks=callbacks,
     )
 
@@ -151,12 +150,11 @@ def custom_warning(self, msg: str) -> None:
     logged_messages = []
     X = np.array([[1, 2, 3], [1, 2, 4], [1, 2, 4], [1, 2, 3]], dtype=np.float32)
     y = np.array([0, 1, 1, 0])
-    lgb_data = lgb.Dataset(X, y)
+    lgb_data = lgb.Dataset(X, y, categorical_feature=[1])
     lgb.train(
         {"objective": "binary", "metric": "auc"},
         lgb_data,
         num_boost_round=10,
         valid_sets=[lgb_data],
-        categorical_feature=[1],
     )
     assert logged_messages, "custom logger was not called"

From e0ac63568c900405e5e438d95c539f3490919aab Mon Sep 17 00:00:00 2001
From: Michael Mayer <mayermichael79@gmail.com>
Date: Thu, 16 May 2024 05:35:11 +0200
Subject: [PATCH 11/41] [R-package] expose start_iteration to 
 dump/save/lgb.model.dt.tree (#6398)

---
 R-package/R/lgb.Booster.R                     | 46 ++++++++--
 R-package/R/lgb.model.dt.tree.R               | 21 +++--
 R-package/man/lgb.dump.Rd                     |  8 +-
 R-package/man/lgb.model.dt.tree.Rd            | 11 ++-
 R-package/man/lgb.save.Rd                     | 10 +-
 R-package/src/lightgbm_R.cpp                  | 27 +++---
 R-package/src/lightgbm_R.h                    | 12 ++-
 R-package/tests/testthat/test_lgb.Booster.R   | 92 +++++++++++++++++++
 .../tests/testthat/test_lgb.model.dt.tree.R   | 26 ++++++
 9 files changed, 214 insertions(+), 39 deletions(-)

diff --git a/R-package/R/lgb.Booster.R b/R-package/R/lgb.Booster.R
index 4437c6fa552e..7f0ccc31dd49 100644
--- a/R-package/R/lgb.Booster.R
+++ b/R-package/R/lgb.Booster.R
@@ -416,7 +416,12 @@ Booster <- R6::R6Class(
     },
 
     # Save model
-    save_model = function(filename, num_iteration = NULL, feature_importance_type = 0L) {
+    save_model = function(
+      filename
+      , num_iteration = NULL
+      , feature_importance_type = 0L
+      , start_iteration = 1L
+    ) {
 
       self$restore_handle()
 
@@ -432,12 +437,18 @@ Booster <- R6::R6Class(
         , as.integer(num_iteration)
         , as.integer(feature_importance_type)
         , filename
+        , as.integer(start_iteration) - 1L  # Turn to 0-based
       )
 
       return(invisible(self))
     },
 
-    save_model_to_string = function(num_iteration = NULL, feature_importance_type = 0L, as_char = TRUE) {
+    save_model_to_string = function(
+      num_iteration = NULL
+      , feature_importance_type = 0L
+      , as_char = TRUE
+      , start_iteration = 1L
+    ) {
 
       self$restore_handle()
 
@@ -450,6 +461,7 @@ Booster <- R6::R6Class(
           , private$handle
           , as.integer(num_iteration)
           , as.integer(feature_importance_type)
+          , as.integer(start_iteration) - 1L  # Turn to 0-based
       )
 
       if (as_char) {
@@ -461,7 +473,9 @@ Booster <- R6::R6Class(
     },
 
     # Dump model in memory
-    dump_model = function(num_iteration = NULL, feature_importance_type = 0L) {
+    dump_model = function(
+      num_iteration = NULL, feature_importance_type = 0L, start_iteration = 1L
+    ) {
 
       self$restore_handle()
 
@@ -474,6 +488,7 @@ Booster <- R6::R6Class(
         , private$handle
         , as.integer(num_iteration)
         , as.integer(feature_importance_type)
+        , as.integer(start_iteration) - 1L  # Turn to 0-based
       )
 
       return(model_str)
@@ -1288,8 +1303,11 @@ lgb.load <- function(filename = NULL, model_str = NULL) {
 #' @title Save LightGBM model
 #' @description Save LightGBM model
 #' @param booster Object of class \code{lgb.Booster}
-#' @param filename saved filename
-#' @param num_iteration number of iteration want to predict with, NULL or <= 0 means use best iteration
+#' @param filename Saved filename
+#' @param num_iteration Number of iterations to save, NULL or <= 0 means use best iteration
+#' @param start_iteration Index (1-based) of the first boosting round to save.
+#'        For example, passing \code{start_iteration=5, num_iteration=3} for a regression model
+#'        means "save the fifth, sixth, and seventh tree"
 #'
 #' @return lgb.Booster
 #'
@@ -1322,7 +1340,9 @@ lgb.load <- function(filename = NULL, model_str = NULL) {
 #' lgb.save(model, tempfile(fileext = ".txt"))
 #' }
 #' @export
-lgb.save <- function(booster, filename, num_iteration = NULL) {
+lgb.save <- function(
+    booster, filename, num_iteration = NULL, start_iteration = 1L
+  ) {
 
   if (!.is_Booster(x = booster)) {
     stop("lgb.save: booster should be an ", sQuote("lgb.Booster"))
@@ -1338,6 +1358,7 @@ lgb.save <- function(booster, filename, num_iteration = NULL) {
     invisible(booster$save_model(
       filename = filename
       , num_iteration = num_iteration
+      , start_iteration = start_iteration
     ))
   )
 
@@ -1347,7 +1368,10 @@ lgb.save <- function(booster, filename, num_iteration = NULL) {
 #' @title Dump LightGBM model to json
 #' @description Dump LightGBM model to json
 #' @param booster Object of class \code{lgb.Booster}
-#' @param num_iteration number of iteration want to predict with, NULL or <= 0 means use best iteration
+#' @param num_iteration Number of iterations to be dumped. NULL or <= 0 means use best iteration
+#' @param start_iteration Index (1-based) of the first boosting round to dump.
+#'        For example, passing \code{start_iteration=5, num_iteration=3} for a regression model
+#'        means "dump the fifth, sixth, and seventh tree"
 #'
 #' @return json format of model
 #'
@@ -1380,14 +1404,18 @@ lgb.save <- function(booster, filename, num_iteration = NULL) {
 #' json_model <- lgb.dump(model)
 #' }
 #' @export
-lgb.dump <- function(booster, num_iteration = NULL) {
+lgb.dump <- function(booster, num_iteration = NULL, start_iteration = 1L) {
 
   if (!.is_Booster(x = booster)) {
     stop("lgb.dump: booster should be an ", sQuote("lgb.Booster"))
   }
 
   # Return booster at requested iteration
-  return(booster$dump_model(num_iteration =  num_iteration))
+  return(
+    booster$dump_model(
+      num_iteration = num_iteration, start_iteration = start_iteration
+    )
+  )
 
 }
 
diff --git a/R-package/R/lgb.model.dt.tree.R b/R-package/R/lgb.model.dt.tree.R
index be877c40de0c..cc58011a2d11 100644
--- a/R-package/R/lgb.model.dt.tree.R
+++ b/R-package/R/lgb.model.dt.tree.R
@@ -1,9 +1,11 @@
 #' @name lgb.model.dt.tree
 #' @title Parse a LightGBM model json dump
 #' @description Parse a LightGBM model json dump into a \code{data.table} structure.
-#' @param model object of class \code{lgb.Booster}
-#' @param num_iteration number of iterations you want to predict with. NULL or
-#'                      <= 0 means use best iteration
+#' @param model object of class \code{lgb.Booster}.
+#' @param num_iteration Number of iterations to include. NULL or <= 0 means use best iteration.
+#' @param start_iteration Index (1-based) of the first boosting round to include in the output.
+#'        For example, passing \code{start_iteration=5, num_iteration=3} for a regression model
+#'        means "return information about the fifth, sixth, and seventh trees".
 #' @return
 #' A \code{data.table} with detailed information about model trees' nodes and leafs.
 #'
@@ -51,9 +53,15 @@
 #' @importFrom data.table := rbindlist
 #' @importFrom jsonlite fromJSON
 #' @export
-lgb.model.dt.tree <- function(model, num_iteration = NULL) {
-
-  json_model <- lgb.dump(booster = model, num_iteration = num_iteration)
+lgb.model.dt.tree <- function(
+    model, num_iteration = NULL, start_iteration = 1L
+  ) {
+
+  json_model <- lgb.dump(
+    booster = model
+    , num_iteration = num_iteration
+    , start_iteration = start_iteration
+  )
 
   parsed_json_model <- jsonlite::fromJSON(
     txt = json_model
@@ -84,7 +92,6 @@ lgb.model.dt.tree <- function(model, num_iteration = NULL) {
   tree_dt[, split_feature := feature_names]
 
   return(tree_dt)
-
 }
 
 
diff --git a/R-package/man/lgb.dump.Rd b/R-package/man/lgb.dump.Rd
index 39f0e3018ac7..e1790946d8f6 100644
--- a/R-package/man/lgb.dump.Rd
+++ b/R-package/man/lgb.dump.Rd
@@ -4,12 +4,16 @@
 \alias{lgb.dump}
 \title{Dump LightGBM model to json}
 \usage{
-lgb.dump(booster, num_iteration = NULL)
+lgb.dump(booster, num_iteration = NULL, start_iteration = 1L)
 }
 \arguments{
 \item{booster}{Object of class \code{lgb.Booster}}
 
-\item{num_iteration}{number of iteration want to predict with, NULL or <= 0 means use best iteration}
+\item{num_iteration}{Number of iterations to be dumped. NULL or <= 0 means use best iteration}
+
+\item{start_iteration}{Index (1-based) of the first boosting round to dump.
+For example, passing \code{start_iteration=5, num_iteration=3} for a regression model
+means "dump the fifth, sixth, and seventh tree"}
 }
 \value{
 json format of model
diff --git a/R-package/man/lgb.model.dt.tree.Rd b/R-package/man/lgb.model.dt.tree.Rd
index 60ef8cdac133..126ecc7b23cc 100644
--- a/R-package/man/lgb.model.dt.tree.Rd
+++ b/R-package/man/lgb.model.dt.tree.Rd
@@ -4,13 +4,16 @@
 \alias{lgb.model.dt.tree}
 \title{Parse a LightGBM model json dump}
 \usage{
-lgb.model.dt.tree(model, num_iteration = NULL)
+lgb.model.dt.tree(model, num_iteration = NULL, start_iteration = 1L)
 }
 \arguments{
-\item{model}{object of class \code{lgb.Booster}}
+\item{model}{object of class \code{lgb.Booster}.}
 
-\item{num_iteration}{number of iterations you want to predict with. NULL or
-<= 0 means use best iteration}
+\item{num_iteration}{Number of iterations to include. NULL or <= 0 means use best iteration.}
+
+\item{start_iteration}{Index (1-based) of the first boosting round to include in the output.
+For example, passing \code{start_iteration=5, num_iteration=3} for a regression model
+means "return information about the fifth, sixth, and seventh trees".}
 }
 \value{
 A \code{data.table} with detailed information about model trees' nodes and leafs.
diff --git a/R-package/man/lgb.save.Rd b/R-package/man/lgb.save.Rd
index 62ec0ed462f6..118c008464d7 100644
--- a/R-package/man/lgb.save.Rd
+++ b/R-package/man/lgb.save.Rd
@@ -4,14 +4,18 @@
 \alias{lgb.save}
 \title{Save LightGBM model}
 \usage{
-lgb.save(booster, filename, num_iteration = NULL)
+lgb.save(booster, filename, num_iteration = NULL, start_iteration = 1L)
 }
 \arguments{
 \item{booster}{Object of class \code{lgb.Booster}}
 
-\item{filename}{saved filename}
+\item{filename}{Saved filename}
 
-\item{num_iteration}{number of iteration want to predict with, NULL or <= 0 means use best iteration}
+\item{num_iteration}{Number of iterations to save, NULL or <= 0 means use best iteration}
+
+\item{start_iteration}{Index (1-based) of the first boosting round to save.
+For example, passing \code{start_iteration=5, num_iteration=3} for a regression model
+means "save the fifth, sixth, and seventh tree"}
 }
 \value{
 lgb.Booster
diff --git a/R-package/src/lightgbm_R.cpp b/R-package/src/lightgbm_R.cpp
index a76a56c06b24..91c7c915abe5 100644
--- a/R-package/src/lightgbm_R.cpp
+++ b/R-package/src/lightgbm_R.cpp
@@ -1093,11 +1093,12 @@ SEXP LGBM_BoosterPredictForMatSingleRowFast_R(SEXP handle_fastConfig,
 SEXP LGBM_BoosterSaveModel_R(SEXP handle,
   SEXP num_iteration,
   SEXP feature_importance_type,
-  SEXP filename) {
+  SEXP filename,
+  SEXP start_iteration) {
   R_API_BEGIN();
   _AssertBoosterHandleNotNull(handle);
   const char* filename_ptr = CHAR(PROTECT(Rf_asChar(filename)));
-  CHECK_CALL(LGBM_BoosterSaveModel(R_ExternalPtrAddr(handle), 0, Rf_asInteger(num_iteration), Rf_asInteger(feature_importance_type), filename_ptr));
+  CHECK_CALL(LGBM_BoosterSaveModel(R_ExternalPtrAddr(handle), Rf_asInteger(start_iteration), Rf_asInteger(num_iteration), Rf_asInteger(feature_importance_type), filename_ptr));
   UNPROTECT(1);
   return R_NilValue;
   R_API_END();
@@ -1105,20 +1106,22 @@ SEXP LGBM_BoosterSaveModel_R(SEXP handle,
 
 SEXP LGBM_BoosterSaveModelToString_R(SEXP handle,
   SEXP num_iteration,
-  SEXP feature_importance_type) {
+  SEXP feature_importance_type,
+  SEXP start_iteration) {
   SEXP cont_token = PROTECT(R_MakeUnwindCont());
   R_API_BEGIN();
   _AssertBoosterHandleNotNull(handle);
   int64_t out_len = 0;
   int64_t buf_len = 1024 * 1024;
   int num_iter = Rf_asInteger(num_iteration);
+  int start_iter = Rf_asInteger(start_iteration);
   int importance_type = Rf_asInteger(feature_importance_type);
   std::vector<char> inner_char_buf(buf_len);
-  CHECK_CALL(LGBM_BoosterSaveModelToString(R_ExternalPtrAddr(handle), 0, num_iter, importance_type, buf_len, &out_len, inner_char_buf.data()));
+  CHECK_CALL(LGBM_BoosterSaveModelToString(R_ExternalPtrAddr(handle), start_iter, num_iter, importance_type, buf_len, &out_len, inner_char_buf.data()));
   SEXP model_str = PROTECT(safe_R_raw(out_len, &cont_token));
   // if the model string was larger than the initial buffer, call the function again, writing directly to the R object
   if (out_len > buf_len) {
-    CHECK_CALL(LGBM_BoosterSaveModelToString(R_ExternalPtrAddr(handle), 0, num_iter, importance_type, out_len, &out_len, reinterpret_cast<char*>(RAW(model_str))));
+    CHECK_CALL(LGBM_BoosterSaveModelToString(R_ExternalPtrAddr(handle), start_iter, num_iter, importance_type, out_len, &out_len, reinterpret_cast<char*>(RAW(model_str))));
   } else {
     std::copy(inner_char_buf.begin(), inner_char_buf.begin() + out_len, reinterpret_cast<char*>(RAW(model_str)));
   }
@@ -1129,7 +1132,8 @@ SEXP LGBM_BoosterSaveModelToString_R(SEXP handle,
 
 SEXP LGBM_BoosterDumpModel_R(SEXP handle,
   SEXP num_iteration,
-  SEXP feature_importance_type) {
+  SEXP feature_importance_type,
+  SEXP start_iteration) {
   SEXP cont_token = PROTECT(R_MakeUnwindCont());
   R_API_BEGIN();
   _AssertBoosterHandleNotNull(handle);
@@ -1137,13 +1141,14 @@ SEXP LGBM_BoosterDumpModel_R(SEXP handle,
   int64_t out_len = 0;
   int64_t buf_len = 1024 * 1024;
   int num_iter = Rf_asInteger(num_iteration);
+  int start_iter = Rf_asInteger(start_iteration);
   int importance_type = Rf_asInteger(feature_importance_type);
   std::vector<char> inner_char_buf(buf_len);
-  CHECK_CALL(LGBM_BoosterDumpModel(R_ExternalPtrAddr(handle), 0, num_iter, importance_type, buf_len, &out_len, inner_char_buf.data()));
+  CHECK_CALL(LGBM_BoosterDumpModel(R_ExternalPtrAddr(handle), start_iter, num_iter, importance_type, buf_len, &out_len, inner_char_buf.data()));
   // if the model string was larger than the initial buffer, allocate a bigger buffer and try again
   if (out_len > buf_len) {
     inner_char_buf.resize(out_len);
-    CHECK_CALL(LGBM_BoosterDumpModel(R_ExternalPtrAddr(handle), 0, num_iter, importance_type, out_len, &out_len, inner_char_buf.data()));
+    CHECK_CALL(LGBM_BoosterDumpModel(R_ExternalPtrAddr(handle), start_iter, num_iter, importance_type, out_len, &out_len, inner_char_buf.data()));
   }
   model_str = PROTECT(safe_R_string(static_cast<R_xlen_t>(1), &cont_token));
   SET_STRING_ELT(model_str, 0, safe_R_mkChar(inner_char_buf.data(), &cont_token));
@@ -1261,9 +1266,9 @@ static const R_CallMethodDef CallEntries[] = {
   {"LGBM_BoosterPredictForMatSingleRow_R"        , (DL_FUNC) &LGBM_BoosterPredictForMatSingleRow_R        , 9},
   {"LGBM_BoosterPredictForMatSingleRowFastInit_R", (DL_FUNC) &LGBM_BoosterPredictForMatSingleRowFastInit_R, 8},
   {"LGBM_BoosterPredictForMatSingleRowFast_R"    , (DL_FUNC) &LGBM_BoosterPredictForMatSingleRowFast_R    , 3},
-  {"LGBM_BoosterSaveModel_R"                     , (DL_FUNC) &LGBM_BoosterSaveModel_R                     , 4},
-  {"LGBM_BoosterSaveModelToString_R"             , (DL_FUNC) &LGBM_BoosterSaveModelToString_R             , 3},
-  {"LGBM_BoosterDumpModel_R"                     , (DL_FUNC) &LGBM_BoosterDumpModel_R                     , 3},
+  {"LGBM_BoosterSaveModel_R"                     , (DL_FUNC) &LGBM_BoosterSaveModel_R                     , 5},
+  {"LGBM_BoosterSaveModelToString_R"             , (DL_FUNC) &LGBM_BoosterSaveModelToString_R             , 4},
+  {"LGBM_BoosterDumpModel_R"                     , (DL_FUNC) &LGBM_BoosterDumpModel_R                     , 4},
   {"LGBM_NullBoosterHandleError_R"               , (DL_FUNC) &LGBM_NullBoosterHandleError_R               , 0},
   {"LGBM_DumpParamAliases_R"                     , (DL_FUNC) &LGBM_DumpParamAliases_R                     , 0},
   {"LGBM_GetMaxThreads_R"                        , (DL_FUNC) &LGBM_GetMaxThreads_R                        , 1},
diff --git a/R-package/src/lightgbm_R.h b/R-package/src/lightgbm_R.h
index 4f0407e8f2ec..574c9733acd9 100644
--- a/R-package/src/lightgbm_R.h
+++ b/R-package/src/lightgbm_R.h
@@ -809,13 +809,15 @@ LIGHTGBM_C_EXPORT SEXP LGBM_BoosterPredictForMatSingleRowFast_R(
 * \param num_iteration, <= 0 means save all
 * \param feature_importance_type type of feature importance, 0: split, 1: gain
 * \param filename file name
+* \param start_iteration Starting iteration (0 based)
 * \return R NULL value
 */
 LIGHTGBM_C_EXPORT SEXP LGBM_BoosterSaveModel_R(
   SEXP handle,
   SEXP num_iteration,
   SEXP feature_importance_type,
-  SEXP filename
+  SEXP filename,
+  SEXP start_iteration
 );
 
 /*!
@@ -823,12 +825,14 @@ LIGHTGBM_C_EXPORT SEXP LGBM_BoosterSaveModel_R(
 * \param handle Booster handle
 * \param num_iteration, <= 0 means save all
 * \param feature_importance_type type of feature importance, 0: split, 1: gain
+* \param start_iteration Starting iteration (0 based)
 * \return R character vector (length=1) with model string
 */
 LIGHTGBM_C_EXPORT SEXP LGBM_BoosterSaveModelToString_R(
   SEXP handle,
   SEXP num_iteration,
-  SEXP feature_importance_type
+  SEXP feature_importance_type,
+  SEXP start_iteration
 );
 
 /*!
@@ -836,12 +840,14 @@ LIGHTGBM_C_EXPORT SEXP LGBM_BoosterSaveModelToString_R(
 * \param handle Booster handle
 * \param num_iteration, <= 0 means save all
 * \param feature_importance_type type of feature importance, 0: split, 1: gain
+* \param start_iteration Index of starting iteration (0 based)
 * \return R character vector (length=1) with model JSON
 */
 LIGHTGBM_C_EXPORT SEXP LGBM_BoosterDumpModel_R(
   SEXP handle,
   SEXP num_iteration,
-  SEXP feature_importance_type
+  SEXP feature_importance_type,
+  SEXP start_iteration
 );
 
 /*!
diff --git a/R-package/tests/testthat/test_lgb.Booster.R b/R-package/tests/testthat/test_lgb.Booster.R
index 7bf0a1bf43d2..8e49c7b7069b 100644
--- a/R-package/tests/testthat/test_lgb.Booster.R
+++ b/R-package/tests/testthat/test_lgb.Booster.R
@@ -1519,3 +1519,95 @@ test_that("LGBM_BoosterGetNumFeature_R returns correct outputs", {
     ncols <- .Call(LGBM_BoosterGetNumFeature_R, model$.__enclos_env__$private$handle)
     expect_equal(ncols, ncol(iris) - 1L)
 })
+
+# Helper function that creates a fitted model with nrounds boosting rounds
+.get_test_model <- function(nrounds) {
+    set.seed(1L)
+    data(agaricus.train, package = "lightgbm")
+    train <- agaricus.train
+    bst <- lightgbm(
+        data = as.matrix(train$data)
+        , label = train$label
+        , params = list(objective = "binary", num_threads = .LGB_MAX_THREADS)
+        , nrounds = nrounds
+        , verbose = .LGB_VERBOSITY
+    )
+    return(bst)
+}
+
+# Simplified version of lgb.model.dt.tree()
+.get_trees_from_dump <- function(x) {
+  parsed <- jsonlite::fromJSON(
+    txt = x
+    , simplifyVector = TRUE
+    , simplifyDataFrame = FALSE
+    , simplifyMatrix = FALSE
+    , flatten = FALSE
+  )
+  return(lapply(parsed$tree_info, FUN = .single_tree_parse))
+}
+
+test_that("num_iteration and start_iteration work for lgb.dump()", {
+  bst <- .get_test_model(5L)
+
+  first2 <- .get_trees_from_dump(lgb.dump(bst, num_iteration = 2L))
+  last3 <- .get_trees_from_dump(
+    lgb.dump(bst, num_iteration = 3L, start_iteration = 3L)
+  )
+  all5 <- .get_trees_from_dump(lgb.dump(bst))
+  too_many <- .get_trees_from_dump(lgb.dump(bst, num_iteration = 10L))
+
+  expect_equal(
+    data.table::rbindlist(c(first2, last3)), data.table::rbindlist(all5)
+  )
+  expect_equal(too_many, all5)
+})
+
+test_that("num_iteration and start_iteration work for lgb.save()", {
+  .get_n_trees <- function(x) {
+    return(length(.get_trees_from_dump(lgb.dump(x))))
+  }
+
+  .save_and_load <- function(bst, ...) {
+    model_file <- tempfile(fileext = ".model")
+    lgb.save(bst, model_file, ...)
+    return(lgb.load(model_file))
+  }
+
+  bst <- .get_test_model(5L)
+  n_first2 <- .get_n_trees(.save_and_load(bst, num_iteration = 2L))
+  n_last3 <- .get_n_trees(
+    .save_and_load(bst, num_iteration = 3L, start_iteration = 3L)
+  )
+  n_all5 <- .get_n_trees(.save_and_load(bst))
+  n_too_many <- .get_n_trees(.save_and_load(bst, num_iteration = 10L))
+
+  expect_equal(n_first2, 2L)
+  expect_equal(n_last3, 3L)
+  expect_equal(n_all5, 5L)
+  expect_equal(n_too_many, 5L)
+})
+
+test_that("num_iteration and start_iteration work for save_model_to_string()", {
+  .get_n_trees_from_string <- function(x) {
+    return(sum(gregexpr("Tree=", x, fixed = TRUE)[[1L]] > 0L))
+  }
+
+  bst <- .get_test_model(5L)
+
+  n_first2 <- .get_n_trees_from_string(
+    bst$save_model_to_string(num_iteration = 2L)
+  )
+  n_last3 <- .get_n_trees_from_string(
+    bst$save_model_to_string(num_iteration = 3L, start_iteration = 3L)
+  )
+  n_all5 <- .get_n_trees_from_string(bst$save_model_to_string())
+  n_too_many <- .get_n_trees_from_string(
+    bst$save_model_to_string(num_iteration = 10L)
+  )
+
+  expect_equal(n_first2, 2L)
+  expect_equal(n_last3, 3L)
+  expect_equal(n_all5, 5L)
+  expect_equal(n_too_many, 5L)
+})
diff --git a/R-package/tests/testthat/test_lgb.model.dt.tree.R b/R-package/tests/testthat/test_lgb.model.dt.tree.R
index 2c26474afd6b..c27703ee490b 100644
--- a/R-package/tests/testthat/test_lgb.model.dt.tree.R
+++ b/R-package/tests/testthat/test_lgb.model.dt.tree.R
@@ -156,3 +156,29 @@ for (model_name in names(models)) {
     expect_true(all(counts > 1L & counts <= N))
   })
 }
+
+test_that("num_iteration and start_iteration work as expected", {
+  set.seed(1L)
+  data(agaricus.train, package = "lightgbm")
+  train <- agaricus.train
+  bst <- lightgbm(
+    data = as.matrix(train$data)
+    , label = train$label
+    , params = list(objective = "binary", num_threads = .LGB_MAX_THREADS)
+    , nrounds = 5L
+    , verbose = .LGB_VERBOSITY
+  )
+
+  first2 <- lgb.model.dt.tree(bst, num_iteration = 2L)
+  last3 <- lgb.model.dt.tree(bst, num_iteration = 3L, start_iteration = 3L)
+  all5 <- lgb.model.dt.tree(bst)
+  too_many <- lgb.model.dt.tree(bst, num_iteration = 10L)
+
+  expect_equal(data.table::rbindlist(list(first2, last3)), all5)
+  expect_equal(too_many, all5)
+
+  # Check tree indices
+  expect_equal(unique(first2[["tree_index"]]), 0L:1L)
+  expect_equal(unique(last3[["tree_index"]]), 2L:4L)
+  expect_equal(unique(all5[["tree_index"]]), 0L:4L)
+})

From 3e9ab53c7d0c03258c947b34d99669c5bb887525 Mon Sep 17 00:00:00 2001
From: Marco Vela <36453977+characat0@users.noreply.github.com>
Date: Thu, 16 May 2024 22:29:52 -0500
Subject: [PATCH 12/41] [cmake] switch to FindCUDAToolkit (#6457)

---
 CMakeLists.txt | 39 ++++++++++++++++-----------------------
 1 file changed, 16 insertions(+), 23 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 2bb8e6b41794..ea25c7787098 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -200,32 +200,28 @@ if(__INTEGRATE_OPENCL)
 endif()
 
 if(USE_CUDA)
-    find_package(CUDA 11.0 REQUIRED)
-    include_directories(${CUDA_INCLUDE_DIRS})
+    find_package(CUDAToolkit 11.0 REQUIRED)
+    include_directories(${CUDAToolkit_INCLUDE_DIRS})
     set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler=${OpenMP_CXX_FLAGS} -Xcompiler=-fPIC -Xcompiler=-Wall")
 
     # reference for mapping of CUDA toolkit component versions to supported architectures ("compute capabilities"):
     # https://en.wikipedia.org/wiki/CUDA#GPUs_supported
-    set(CUDA_ARCHS "6.0" "6.1" "6.2" "7.0" "7.5")
-    if(CUDA_VERSION VERSION_GREATER_EQUAL "11.0")
-        list(APPEND CUDA_ARCHS "8.0")
+    set(CUDA_ARCHS "60" "61" "62" "70" "75")
+    if(CUDA_VERSION VERSION_GREATER_EQUAL "110")
+        list(APPEND CUDA_ARCHS "80")
     endif()
-    if(CUDA_VERSION VERSION_GREATER_EQUAL "11.1")
-        list(APPEND CUDA_ARCHS "8.6")
+    if(CUDA_VERSION VERSION_GREATER_EQUAL "111")
+        list(APPEND CUDA_ARCHS "86")
     endif()
-    if(CUDA_VERSION VERSION_GREATER_EQUAL "11.5")
-        list(APPEND CUDA_ARCHS "8.7")
+    if(CUDA_VERSION VERSION_GREATER_EQUAL "115")
+        list(APPEND CUDA_ARCHS "87")
     endif()
-    if(CUDA_VERSION VERSION_GREATER_EQUAL "11.8")
-        list(APPEND CUDA_ARCHS "8.9")
-        list(APPEND CUDA_ARCHS "9.0")
+    if(CUDA_VERSION VERSION_GREATER_EQUAL "118")
+        list(APPEND CUDA_ARCHS "89")
+        list(APPEND CUDA_ARCHS "90")
     endif()
     list(POP_BACK CUDA_ARCHS CUDA_LAST_SUPPORTED_ARCH)
     list(APPEND CUDA_ARCHS "${CUDA_LAST_SUPPORTED_ARCH}+PTX")
-    cuda_select_nvcc_arch_flags(CUDA_ARCH_FLAGS ${CUDA_ARCHS})
-    string(REPLACE ";" " " CUDA_ARCH_FLAGS "${CUDA_ARCH_FLAGS}")
-
-    set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} ${CUDA_ARCH_FLAGS}")
     if(USE_DEBUG)
       set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -g")
     else()
@@ -262,7 +258,7 @@ if(USE_CUDA)
     function(add_histogram hsize hname hadd hconst hdir)
       add_library(histo${hsize}${hname} OBJECT src/treelearner/kernels/histogram${hsize}.cu)
       set_target_properties(histo${hsize}${hname} PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
-      set_target_properties(histo${hsize}${hname} PROPERTIES CUDA_ARCHITECTURES OFF)
+      set_target_properties(histo${hsize}${hname} PROPERTIES CUDA_ARCHITECTURES ${CUDA_ARCHS})
       if(hadd)
         list(APPEND histograms histo${hsize}${hname})
         set(histograms ${histograms} PARENT_SCOPE)
@@ -608,13 +604,10 @@ if(__INTEGRATE_OPENCL)
 endif()
 
 if(USE_CUDA)
-  # Disable cmake warning about policy CMP0104. Refer to issue #3754 and PR #4268.
-  # Custom target properties does not propagate, thus we need to specify for
-  # each target that contains or depends on cuda source.
-  set_target_properties(lightgbm_objs PROPERTIES CUDA_ARCHITECTURES OFF)
-  set_target_properties(_lightgbm PROPERTIES CUDA_ARCHITECTURES OFF)
+  set_target_properties(lightgbm_objs PROPERTIES CUDA_ARCHITECTURES ${CUDA_ARCHS})
+  set_target_properties(_lightgbm PROPERTIES CUDA_ARCHITECTURES ${CUDA_ARCHS})
   if(BUILD_CLI)
-    set_target_properties(lightgbm PROPERTIES CUDA_ARCHITECTURES OFF)
+    set_target_properties(lightgbm PROPERTIES CUDA_ARCHITECTURES ${CUDA_ARCHS})
   endif()
 
   set_target_properties(lightgbm_objs PROPERTIES CUDA_SEPARABLE_COMPILATION ON)

From dd9da91fae9f06593c8be6071032f6195918adb1 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Wed, 22 May 2024 00:12:13 +0200
Subject: [PATCH 13/41] [ci]: Bump actions/checkout from 1 to 4 in the
 ci-dependencies group (#6453)

* [ci]: Bump actions/checkout from 1 to 4 in the ci-dependencies group

Bumps the ci-dependencies group with 1 update: [actions/checkout](https://github.com/actions/checkout).


Updates `actions/checkout` from 1 to 4
- [Release notes](https://github.com/actions/checkout/releases)
- [Changelog](https://github.com/actions/checkout/blob/main/CHANGELOG.md)
- [Commits](https://github.com/actions/checkout/compare/v1...v4)

---
updated-dependencies:
- dependency-name: actions/checkout
  dependency-type: direct:production
  update-type: version-update:semver-major
  dependency-group: ci-dependencies
...

Signed-off-by: dependabot[bot] <support@github.com>

* downgrade a few

---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: James Lamb <jaylamb20@gmail.com>
---
 .github/workflows/linkchecker.yml         | 2 +-
 .github/workflows/optional_checks.yml     | 2 +-
 .github/workflows/python_package.yml      | 4 ++--
 .github/workflows/r_configure.yml         | 2 +-
 .github/workflows/r_package.yml           | 4 ++--
 .github/workflows/r_valgrind.yml          | 2 +-
 .github/workflows/static_analysis.yml     | 4 ++--
 .github/workflows/triggering_comments.yml | 2 +-
 8 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/.github/workflows/linkchecker.yml b/.github/workflows/linkchecker.yml
index fe0176ac6b9d..28c73e12149a 100644
--- a/.github/workflows/linkchecker.yml
+++ b/.github/workflows/linkchecker.yml
@@ -20,7 +20,7 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - name: Checkout repository
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
         with:
           fetch-depth: 5
           submodules: false
diff --git a/.github/workflows/optional_checks.yml b/.github/workflows/optional_checks.yml
index 645d40d1c2ed..10d6bd27c8c3 100644
--- a/.github/workflows/optional_checks.yml
+++ b/.github/workflows/optional_checks.yml
@@ -12,7 +12,7 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - name: Checkout repository
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
         with:
           fetch-depth: 5
           submodules: false
diff --git a/.github/workflows/python_package.yml b/.github/workflows/python_package.yml
index 8b2ed1dcc5c1..ee7d01ad8fff 100644
--- a/.github/workflows/python_package.yml
+++ b/.github/workflows/python_package.yml
@@ -55,7 +55,7 @@ jobs:
           #   python_version: '3.8'
     steps:
       - name: Checkout repository
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
         with:
           fetch-depth: 5
           submodules: true
@@ -84,7 +84,7 @@ jobs:
     timeout-minutes: 60
     steps:
       - name: Checkout repository
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
         with:
           fetch-depth: 5
           submodules: true
diff --git a/.github/workflows/r_configure.yml b/.github/workflows/r_configure.yml
index b8781cc87b17..27329c761dd4 100644
--- a/.github/workflows/r_configure.yml
+++ b/.github/workflows/r_configure.yml
@@ -21,7 +21,7 @@ jobs:
         run: |
           git config --global --add safe.directory "${GITHUB_WORKSPACE}"
       - name: Checkout repository
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
         with:
           fetch-depth: 5
           submodules: true
diff --git a/.github/workflows/r_package.yml b/.github/workflows/r_package.yml
index c0346866deb9..ec1b4de77308 100644
--- a/.github/workflows/r_package.yml
+++ b/.github/workflows/r_package.yml
@@ -237,7 +237,7 @@ jobs:
         run: |
           git config --global --add safe.directory "${GITHUB_WORKSPACE}"
       - name: Checkout repository
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
         with:
           fetch-depth: 5
           submodules: true
@@ -280,7 +280,7 @@ jobs:
         run: |
           git config --global --add safe.directory "${GITHUB_WORKSPACE}"
       - name: Checkout repository
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
         with:
           fetch-depth: 5
           submodules: true
diff --git a/.github/workflows/r_valgrind.yml b/.github/workflows/r_valgrind.yml
index d00611e4f128..b7008a07f801 100644
--- a/.github/workflows/r_valgrind.yml
+++ b/.github/workflows/r_valgrind.yml
@@ -24,7 +24,7 @@ jobs:
         run: |
           git config --global --add safe.directory "${GITHUB_WORKSPACE}"
       - name: Checkout repository
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
         with:
           fetch-depth: 5
           submodules: true
diff --git a/.github/workflows/static_analysis.yml b/.github/workflows/static_analysis.yml
index eb84e41d4a83..2ff52b21fa4b 100644
--- a/.github/workflows/static_analysis.yml
+++ b/.github/workflows/static_analysis.yml
@@ -36,7 +36,7 @@ jobs:
           - task: check-docs
     steps:
       - name: Checkout repository
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
         with:
           fetch-depth: 5
           submodules: false
@@ -59,7 +59,7 @@ jobs:
         run: |
           git config --global --add safe.directory "${GITHUB_WORKSPACE}"
       - name: Checkout repository
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
         with:
           fetch-depth: 5
           submodules: true
diff --git a/.github/workflows/triggering_comments.yml b/.github/workflows/triggering_comments.yml
index f2ee395fddf5..220a0287772b 100644
--- a/.github/workflows/triggering_comments.yml
+++ b/.github/workflows/triggering_comments.yml
@@ -12,7 +12,7 @@ jobs:
       SECRETS_WORKFLOW: ${{ secrets.WORKFLOW }}
     steps:
     - name: Checkout repository
-      uses: actions/checkout@v3
+      uses: actions/checkout@v4
       with:
         fetch-depth: 5
         submodules: false

From 4b5d549d0d2c61c99059ee4df86308a3619908dc Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Wed, 22 May 2024 21:03:55 -0500
Subject: [PATCH 14/41] [ci] simplify CI configurations, parallelize
 compilation, test CUDA on Ubuntu 22.04 (#6458)

---
 .appveyor.yml                         |   2 +-
 .ci/test.sh                           |   2 +
 .ci/test_windows.ps1                  |   2 +
 .github/workflows/cuda.yml            | 139 +++++++++++++++-----------
 .github/workflows/linkchecker.yml     |   1 -
 .github/workflows/python_package.yml  |   3 +-
 .github/workflows/r_package.yml       |   3 +-
 .github/workflows/static_analysis.yml |   1 -
 .vsts-ci.yml                          |   5 +-
 9 files changed, 88 insertions(+), 70 deletions(-)

diff --git a/.appveyor.yml b/.appveyor.yml
index bb15958136c3..da16fd6e50f8 100644
--- a/.appveyor.yml
+++ b/.appveyor.yml
@@ -27,10 +27,10 @@ install:
   - set PYTHON_VERSION=%CONFIGURATION%
   - set CONDA_ENV="test-env"
   - ps: |
+      $env:CMAKE_BUILD_PARALLEL_LEVEL = 4
       $env:MINICONDA = "C:\Miniconda3-x64"
       $env:PATH = "$env:MINICONDA;$env:MINICONDA\Scripts;$env:PATH"
       $env:BUILD_SOURCESDIRECTORY = "$env:APPVEYOR_BUILD_FOLDER"
-      $env:LGB_VER = (Get-Content $env:APPVEYOR_BUILD_FOLDER\VERSION.txt).trim()
 
 build: false
 
diff --git a/.ci/test.sh b/.ci/test.sh
index 1b8a31340130..4e632320a5c6 100755
--- a/.ci/test.sh
+++ b/.ci/test.sh
@@ -10,6 +10,8 @@ SANITIZERS=${SANITIZERS:-""}
 
 ARCH=$(uname -m)
 
+LGB_VER=$(head -n 1 ${BUILD_DIRECTORY}/VERSION.txt)
+
 if [[ $OS_NAME == "macos" ]] && [[ $COMPILER == "gcc" ]]; then
     export CXX=g++-11
     export CC=gcc-11
diff --git a/.ci/test_windows.ps1 b/.ci/test_windows.ps1
index 12f90f71cd05..1942dfaf7c6d 100644
--- a/.ci/test_windows.ps1
+++ b/.ci/test_windows.ps1
@@ -6,6 +6,8 @@ function Check-Output {
   }
 }
 
+$env:LGB_VER = (Get-Content $env:BUILD_SOURCESDIRECTORY\VERSION.txt).trim()
+
 # unify environment variable for Azure DevOps and AppVeyor
 if (Test-Path env:APPVEYOR) {
   $env:APPVEYOR = "true"
diff --git a/.github/workflows/cuda.yml b/.github/workflows/cuda.yml
index b7f825b245b8..39ecde4e1e7e 100644
--- a/.github/workflows/cuda.yml
+++ b/.github/workflows/cuda.yml
@@ -8,53 +8,41 @@ on:
     branches:
     - master
     - release/*
+  # Run manually by clicking a button in the UI
+  workflow_dispatch:
+    inputs:
+      restart_docker:
+        description: 'Restart nvidia-docker on the runner before building?'
+        required: true
+        type: boolean
+        default: false
 
 # automatically cancel in-progress builds if another commit is pushed
 concurrency:
   group: ${{ github.workflow }}-${{ github.ref }}
   cancel-in-progress: true
 
-env:
-  github_actions: 'true'
-  os_name: linux
-  conda_env: test-env
-
 jobs:
-  test:
-    name: ${{ matrix.task }} ${{ matrix.cuda_version }} ${{ matrix.method }} (linux, ${{ matrix.compiler }}, Python ${{ matrix.python_version }})
+  # Optionally reinstall + restart docker on the runner before building.
+  # This is safe as long as only 1 of these jobs runs at a time.
+  restart-docker:
+    name: set up docker
     runs-on: [self-hosted, linux]
-    timeout-minutes: 60
-    strategy:
-      fail-fast: false
-      matrix:
-        include:
-          - method: wheel
-            compiler: gcc
-            python_version: "3.11"
-            cuda_version: "11.8.0"
-            task: cuda
-          - method: source
-            compiler: gcc
-            python_version: "3.9"
-            cuda_version: "12.2.0"
-            task: cuda
-          - method: pip
-            compiler: clang
-            python_version: "3.10"
-            cuda_version: "11.8.0"
-            task: cuda
+    timeout-minutes: 30
     steps:
       - name: Setup or update software on host machine
+        if: ${{ inputs.restart_docker }}
         run: |
+            # install core packages
             sudo apt-get update
             sudo apt-get install --no-install-recommends -y \
                 apt-transport-https \
                 ca-certificates \
                 curl \
-                git \
                 gnupg-agent \
                 lsb-release \
                 software-properties-common
+            # set up nvidia-docker
             curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add -
             sudo add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" -y
             curl -sL https://nvidia.github.io/nvidia-docker/gpgkey | sudo apt-key add -
@@ -67,43 +55,76 @@ jobs:
                 nvidia-docker2
             sudo chmod a+rw /var/run/docker.sock
             sudo systemctl restart docker
-      - name: Remove old folder with repository
-        run: sudo rm -rf $GITHUB_WORKSPACE
+      - name: mark job successful
+        run: |
+          exit 0
+  test:
+    name: ${{ matrix.task }} ${{ matrix.cuda_version }} ${{ matrix.method }} (linux, ${{ matrix.compiler }}, Python ${{ matrix.python_version }})
+    runs-on: [self-hosted, linux]
+    needs: [restart-docker]
+    container:
+      image: ${{ matrix.image }}
+      env:
+        CMAKE_BUILD_PARALLEL_LEVEL: 4
+        COMPILER: ${{ matrix.compiler }}
+        CONDA: /tmp/miniforge
+        CONDA_ENV: test-env
+        DEBIAN_FRONTEND: noninteractive
+        METHOD: ${{ matrix.method }}
+        OS_NAME: linux
+        PYTHON_VERSION: ${{ matrix.python_version }}
+        TASK: ${{ matrix.task }}
+      options: --gpus all
+    timeout-minutes: 30
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - method: wheel
+            compiler: gcc
+            python_version: "3.11"
+            cuda_version: "11.8.0"
+            image: nvcr.io/nvidia/cuda:11.8.0-devel-ubuntu20.04
+            task: cuda
+          - method: source
+            compiler: gcc
+            python_version: "3.9"
+            cuda_version: "12.2.0"
+            image: nvcr.io/nvidia/cuda:12.2.0-devel-ubuntu22.04
+            task: cuda
+          - method: pip
+            compiler: clang
+            python_version: "3.10"
+            cuda_version: "11.8.0"
+            image: nvcr.io/nvidia/cuda:11.8.0-devel-ubuntu20.04
+            task: cuda
+    steps:
+      - name: Install latest git
+        run: |
+          apt-get update
+          apt-get install --no-install-recommends -y \
+              ca-certificates \
+              software-properties-common
+          add-apt-repository ppa:git-core/ppa -y
+          apt-get update
+          apt-get install --no-install-recommends -y \
+              git
       - name: Checkout repository
-        uses: actions/checkout@v1
+        uses: actions/checkout@v4
         with:
           fetch-depth: 5
           submodules: true
       - name: Setup and run tests
         run: |
-            export ROOT_DOCKER_FOLDER=/LightGBM
-            cat > docker.env <<EOF
-            GITHUB_ACTIONS=${{ env.github_actions }}
-            OS_NAME=${{ env.os_name }}
-            COMPILER=${{ matrix.compiler }}
-            TASK=${{ matrix.task }}
-            METHOD=${{ matrix.method }}
-            CONDA_ENV=${{ env.conda_env }}
-            PYTHON_VERSION=${{ matrix.python_version }}
-            BUILD_DIRECTORY=$ROOT_DOCKER_FOLDER
-            LGB_VER=$(head -n 1 VERSION.txt)
-            EOF
-            cat > docker-script.sh <<EOF
-            export CONDA=\$HOME/miniforge
-            export PATH=\$CONDA/bin:\$PATH
-            nvidia-smi
-            $ROOT_DOCKER_FOLDER/.ci/setup.sh || exit 1
-            $ROOT_DOCKER_FOLDER/.ci/test.sh || exit 1
-            EOF
-            cuda_version="${{ matrix.cuda_version }}"
-            cuda_major=${cuda_version%%.*}
-            docker_img="nvcr.io/nvidia/cuda:${cuda_version}-devel"
-            if [[ ${cuda_major} -eq 11 ]]; then
-                docker_img="${docker_img}-ubuntu18.04"
-            elif [[ ${cuda_major} -ge 12 ]]; then
-                docker_img="${docker_img}-ubuntu20.04"
-            fi
-            docker run --env-file docker.env -v "$GITHUB_WORKSPACE":"$ROOT_DOCKER_FOLDER" --rm --gpus all "$docker_img" /bin/bash $ROOT_DOCKER_FOLDER/docker-script.sh
+          export BUILD_DIRECTORY="$GITHUB_WORKSPACE"
+          export PATH=$CONDA/bin:$PATH
+
+          # check GPU usage
+          nvidia-smi
+
+          # build and test
+          $GITHUB_WORKSPACE/.ci/setup.sh
+          $GITHUB_WORKSPACE/.ci/test.sh
   all-cuda-jobs-successful:
     if: always()
     runs-on: ubuntu-latest
diff --git a/.github/workflows/linkchecker.yml b/.github/workflows/linkchecker.yml
index 28c73e12149a..cfef16469f1b 100644
--- a/.github/workflows/linkchecker.yml
+++ b/.github/workflows/linkchecker.yml
@@ -9,7 +9,6 @@ on:
 
 env:
   CONDA_ENV: test-env
-  GITHUB_ACTIONS: 'true'
   OS_NAME: 'linux'
   PYTHON_VERSION: '3.11'
   TASK: 'check-links'
diff --git a/.github/workflows/python_package.yml b/.github/workflows/python_package.yml
index ee7d01ad8fff..8941e0c73471 100644
--- a/.github/workflows/python_package.yml
+++ b/.github/workflows/python_package.yml
@@ -15,8 +15,8 @@ concurrency:
   cancel-in-progress: true
 
 env:
+  CMAKE_BUILD_PARALLEL_LEVEL: 4
   CONDA_ENV: test-env
-  GITHUB_ACTIONS: 'true'
 
 jobs:
   test:
@@ -73,7 +73,6 @@ jobs:
               export OS_NAME="linux"
           fi
           export BUILD_DIRECTORY="$GITHUB_WORKSPACE"
-          export LGB_VER=$(head -n 1 VERSION.txt)
           export CONDA=${HOME}/miniforge
           export PATH=${CONDA}/bin:${PATH}
           $GITHUB_WORKSPACE/.ci/setup.sh || exit 1
diff --git a/.github/workflows/r_package.yml b/.github/workflows/r_package.yml
index ec1b4de77308..6f3650a928f0 100644
--- a/.github/workflows/r_package.yml
+++ b/.github/workflows/r_package.yml
@@ -15,6 +15,7 @@ concurrency:
   cancel-in-progress: true
 
 env:
+  CMAKE_BUILD_PARALLEL_LEVEL: 4
   # hack to get around this:
   # https://stat.ethz.ch/pipermail/r-package-devel/2020q3/005930.html
   _R_CHECK_SYSTEM_CLOCK_: 0
@@ -189,7 +190,6 @@ jobs:
         run: |
           export TASK="${{ matrix.task }}"
           export COMPILER="${{ matrix.compiler }}"
-          export GITHUB_ACTIONS="true"
           if [[ "${{ matrix.os }}" == "macos-13" ]]; then
               export OS_NAME="macos"
           elif [[ "${{ matrix.os }}" == "ubuntu-latest" ]]; then
@@ -216,7 +216,6 @@ jobs:
           $env:R_VERSION = "${{ matrix.r_version }}"
           $env:R_BUILD_TYPE = "${{ matrix.build_type }}"
           $env:COMPILER = "${{ matrix.compiler }}"
-          $env:GITHUB_ACTIONS = "true"
           $env:TASK = "${{ matrix.task }}"
           & "$env:GITHUB_WORKSPACE/.ci/test_windows.ps1"
   test-r-sanitizers:
diff --git a/.github/workflows/static_analysis.yml b/.github/workflows/static_analysis.yml
index 2ff52b21fa4b..6146499a0915 100644
--- a/.github/workflows/static_analysis.yml
+++ b/.github/workflows/static_analysis.yml
@@ -19,7 +19,6 @@ concurrency:
 env:
   COMPILER: 'gcc'
   CONDA_ENV: test-env
-  GITHUB_ACTIONS: 'true'
   OS_NAME: 'linux'
   PYTHON_VERSION: '3.11'
 
diff --git a/.vsts-ci.yml b/.vsts-ci.yml
index b9cba8d78671..67be0dd4082c 100644
--- a/.vsts-ci.yml
+++ b/.vsts-ci.yml
@@ -11,6 +11,7 @@ pr:
 variables:
   AZURE: 'true'
   PYTHON_VERSION: '3.11'
+  CMAKE_BUILD_PARALLEL_LEVEL: 4
   CONDA_ENV: test-env
   runCodesignValidationInjection: false
   skipComponentGovernanceDetection: true
@@ -82,7 +83,6 @@ jobs:
   steps:
   - script: |
       echo "##vso[task.setvariable variable=BUILD_DIRECTORY]$BUILD_SOURCESDIRECTORY"
-      echo "##vso[task.setvariable variable=LGB_VER]$(head -n 1 VERSION.txt)"
       echo "##vso[task.prependpath]/usr/lib64/openmpi/bin"
       echo "##vso[task.prependpath]$CONDA/bin"
     displayName: 'Set variables'
@@ -159,7 +159,6 @@ jobs:
   steps:
   - script: |
       echo "##vso[task.setvariable variable=BUILD_DIRECTORY]$BUILD_SOURCESDIRECTORY"
-      echo "##vso[task.setvariable variable=LGB_VER]$(head -n 1 VERSION.txt)"
       CONDA=$HOME/miniforge
       echo "##vso[task.setvariable variable=CONDA]$CONDA"
       echo "##vso[task.prependpath]$CONDA/bin"
@@ -225,7 +224,6 @@ jobs:
       CONDA_ENV=$CONDA_ENV
       PYTHON_VERSION=$PYTHON_VERSION
       BUILD_DIRECTORY=$ROOT_DOCKER_FOLDER
-      LGB_VER=$(head -n 1 VERSION.txt)
       PRODUCES_ARTIFACTS=$PRODUCES_ARTIFACTS
       BUILD_ARTIFACTSTAGINGDIRECTORY=$BUILD_ARTIFACTSTAGINGDIRECTORY
       EOF
@@ -283,7 +281,6 @@ jobs:
   steps:
   - script: |
       echo "##vso[task.setvariable variable=BUILD_DIRECTORY]$BUILD_SOURCESDIRECTORY"
-      echo "##vso[task.setvariable variable=LGB_VER]$(head -n 1 VERSION.txt)"
       CONDA=$AGENT_HOMEDIRECTORY/miniforge
       echo "##vso[task.setvariable variable=CONDA]$CONDA"
       echo "##vso[task.prependpath]$CONDA/bin"

From 7d1529848b72593879e63ea79b3ae1f02793d4c1 Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Mon, 27 May 2024 10:03:34 -0500
Subject: [PATCH 15/41] [ci] fix CUDA 11.8 builds (fixes #6466) (#6465)

---
 .ci/test.sh          | 17 +++++++----------
 .ci/test_windows.ps1 |  6 +++---
 2 files changed, 10 insertions(+), 13 deletions(-)

diff --git a/.ci/test.sh b/.ci/test.sh
index 4e632320a5c6..675b86571dfc 100755
--- a/.ci/test.sh
+++ b/.ci/test.sh
@@ -117,7 +117,7 @@ if [[ $TASK == "check-docs" ]] || [[ $TASK == "check-links" ]]; then
     make html || exit 1
     if [[ $TASK == "check-links" ]]; then
         # check docs for broken links
-        pip install --user linkchecker
+        pip install linkchecker
         linkchecker --config=.linkcheckerrc ./_build/html/*.html || exit 1
         exit 0
     fi
@@ -155,7 +155,7 @@ fi
 if [[ $TASK == "sdist" ]]; then
     cd $BUILD_DIRECTORY && sh ./build-python.sh sdist || exit 1
     sh $BUILD_DIRECTORY/.ci/check_python_dists.sh $BUILD_DIRECTORY/dist || exit 1
-    pip install --user $BUILD_DIRECTORY/dist/lightgbm-$LGB_VER.tar.gz -v || exit 1
+    pip install $BUILD_DIRECTORY/dist/lightgbm-$LGB_VER.tar.gz -v || exit 1
     if [[ $PRODUCES_ARTIFACTS == "true" ]]; then
         cp $BUILD_DIRECTORY/dist/lightgbm-$LGB_VER.tar.gz $BUILD_ARTIFACTSTAGINGDIRECTORY || exit 1
     fi
@@ -200,7 +200,7 @@ elif [[ $TASK == "bdist" ]]; then
         # Make sure we can do both CPU and GPU; see tests/python_package_test/test_dual.py
         export LIGHTGBM_TEST_DUAL_CPU_GPU=1
     fi
-    pip install --user $BUILD_DIRECTORY/dist/*.whl || exit 1
+    pip install -v $BUILD_DIRECTORY/dist/*.whl || exit 1
     pytest $BUILD_DIRECTORY/tests || exit 1
     exit 0
 fi
@@ -212,7 +212,6 @@ if [[ $TASK == "gpu" ]]; then
         cd $BUILD_DIRECTORY && sh ./build-python.sh sdist || exit 1
         sh $BUILD_DIRECTORY/.ci/check_python_dists.sh $BUILD_DIRECTORY/dist || exit 1
         pip install \
-            --user \
             -v \
             --config-settings=cmake.define.USE_GPU=ON \
             $BUILD_DIRECTORY/dist/lightgbm-$LGB_VER.tar.gz \
@@ -222,7 +221,7 @@ if [[ $TASK == "gpu" ]]; then
     elif [[ $METHOD == "wheel" ]]; then
         cd $BUILD_DIRECTORY && sh ./build-python.sh bdist_wheel --gpu || exit 1
         sh $BUILD_DIRECTORY/.ci/check_python_dists.sh $BUILD_DIRECTORY/dist || exit 1
-        pip install --user $BUILD_DIRECTORY/dist/lightgbm-$LGB_VER*.whl -v || exit 1
+        pip install $BUILD_DIRECTORY/dist/lightgbm-$LGB_VER*.whl -v || exit 1
         pytest $BUILD_DIRECTORY/tests || exit 1
         exit 0
     elif [[ $METHOD == "source" ]]; then
@@ -238,7 +237,6 @@ elif [[ $TASK == "cuda" ]]; then
         cd $BUILD_DIRECTORY && sh ./build-python.sh sdist || exit 1
         sh $BUILD_DIRECTORY/.ci/check_python_dists.sh $BUILD_DIRECTORY/dist || exit 1
         pip install \
-            --user \
             -v \
             --config-settings=cmake.define.USE_CUDA=ON \
             $BUILD_DIRECTORY/dist/lightgbm-$LGB_VER.tar.gz \
@@ -248,7 +246,7 @@ elif [[ $TASK == "cuda" ]]; then
     elif [[ $METHOD == "wheel" ]]; then
         cd $BUILD_DIRECTORY && sh ./build-python.sh bdist_wheel --cuda || exit 1
         sh $BUILD_DIRECTORY/.ci/check_python_dists.sh $BUILD_DIRECTORY/dist || exit 1
-        pip install --user $BUILD_DIRECTORY/dist/lightgbm-$LGB_VER*.whl -v || exit 1
+        pip install $BUILD_DIRECTORY/dist/lightgbm-$LGB_VER*.whl -v || exit 1
         pytest $BUILD_DIRECTORY/tests || exit 1
         exit 0
     elif [[ $METHOD == "source" ]]; then
@@ -259,7 +257,6 @@ elif [[ $TASK == "mpi" ]]; then
         cd $BUILD_DIRECTORY && sh ./build-python.sh sdist || exit 1
         sh $BUILD_DIRECTORY/.ci/check_python_dists.sh $BUILD_DIRECTORY/dist || exit 1
         pip install \
-            --user \
             -v \
             --config-settings=cmake.define.USE_MPI=ON \
             $BUILD_DIRECTORY/dist/lightgbm-$LGB_VER.tar.gz \
@@ -269,7 +266,7 @@ elif [[ $TASK == "mpi" ]]; then
     elif [[ $METHOD == "wheel" ]]; then
         cd $BUILD_DIRECTORY && sh ./build-python.sh bdist_wheel --mpi || exit 1
         sh $BUILD_DIRECTORY/.ci/check_python_dists.sh $BUILD_DIRECTORY/dist || exit 1
-        pip install --user $BUILD_DIRECTORY/dist/lightgbm-$LGB_VER*.whl -v || exit 1
+        pip install $BUILD_DIRECTORY/dist/lightgbm-$LGB_VER*.whl -v || exit 1
         pytest $BUILD_DIRECTORY/tests || exit 1
         exit 0
     elif [[ $METHOD == "source" ]]; then
@@ -281,7 +278,7 @@ fi
 
 cmake --build build --target _lightgbm -j4 || exit 1
 
-cd $BUILD_DIRECTORY && sh ./build-python.sh install --precompile --user || exit 1
+cd $BUILD_DIRECTORY && sh ./build-python.sh install --precompile || exit 1
 pytest $BUILD_DIRECTORY/tests || exit 1
 
 if [[ $TASK == "regular" ]]; then
diff --git a/.ci/test_windows.ps1 b/.ci/test_windows.ps1
index 1942dfaf7c6d..5a362bac9309 100644
--- a/.ci/test_windows.ps1
+++ b/.ci/test_windows.ps1
@@ -97,14 +97,14 @@ elseif ($env:TASK -eq "bdist") {
   cd $env:BUILD_SOURCESDIRECTORY
   sh "build-python.sh" bdist_wheel --integrated-opencl ; Check-Output $?
   sh $env:BUILD_SOURCESDIRECTORY/.ci/check_python_dists.sh $env:BUILD_SOURCESDIRECTORY/dist ; Check-Output $?
-  cd dist; pip install --user @(Get-ChildItem *py3-none-win_amd64.whl) ; Check-Output $?
+  cd dist; pip install @(Get-ChildItem *py3-none-win_amd64.whl) ; Check-Output $?
   cp @(Get-ChildItem *py3-none-win_amd64.whl) $env:BUILD_ARTIFACTSTAGINGDIRECTORY
 } elseif (($env:APPVEYOR -eq "true") -and ($env:TASK -eq "python")) {
   cd $env:BUILD_SOURCESDIRECTORY
   if ($env:COMPILER -eq "MINGW") {
-    sh $env:BUILD_SOURCESDIRECTORY/build-python.sh install --user --mingw ; Check-Output $?
+    sh $env:BUILD_SOURCESDIRECTORY/build-python.sh install --mingw ; Check-Output $?
   } else {
-    sh $env:BUILD_SOURCESDIRECTORY/build-python.sh install --user; Check-Output $?
+    sh $env:BUILD_SOURCESDIRECTORY/build-python.sh install; Check-Output $?
   }
 }
 

From 69b5bd311ddaae1ee1877735b1a6bf4e8b823231 Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Mon, 27 May 2024 15:27:39 -0500
Subject: [PATCH 16/41] [python-package] drop Python 3.6 support, add Python
 3.12 (#6459)

---
 .ci/conda-envs/ci-core-py38.txt       | 51 +++++++++++++++++++++++++++
 .ci/test-python-oldest.sh             | 13 +++----
 .ci/test.sh                           |  2 ++
 .ci/test_windows.ps1                  |  2 ++
 .github/workflows/cuda.yml            |  6 ++--
 .github/workflows/linkchecker.yml     |  2 +-
 .github/workflows/python_package.yml  | 16 ++++-----
 .github/workflows/static_analysis.yml |  2 +-
 .vsts-ci.yml                          | 22 ++++++------
 python-package/pyproject.toml         |  4 +--
 10 files changed, 88 insertions(+), 32 deletions(-)
 create mode 100644 .ci/conda-envs/ci-core-py38.txt

diff --git a/.ci/conda-envs/ci-core-py38.txt b/.ci/conda-envs/ci-core-py38.txt
new file mode 100644
index 000000000000..7fcd986b40f8
--- /dev/null
+++ b/.ci/conda-envs/ci-core-py38.txt
@@ -0,0 +1,51 @@
+# [description]
+#
+#   Similar to ci-core.txt, but specific to Python 3.8.
+#
+#   Unlike ci-core.txt, this includes a Python version and uses
+#   `=` and `<=` pins to make solves faster and prevent against
+#   issues like https://github.com/microsoft/LightGBM/pull/6370.
+#
+# [usage]
+#
+#   conda create \
+#     --name test-env \
+#     --file ./.ci/conda-envs/ci-core-py38.txt
+#
+
+# python
+python=3.8.*
+
+# direct imports
+cffi=1.15.*
+dask=2023.5.*
+distributed=2023.5.*
+joblib=1.4.*
+matplotlib-base=3.7.*
+numpy=1.24.*
+pandas=1.5.*
+pyarrow-core=16.1.*
+python-graphviz=0.20.*
+scikit-learn=1.3.*
+scipy=1.10.*
+
+# testing-only dependencies
+cloudpickle=3.0.*
+pluggy=1.5.*
+psutil=5.9.8
+pytest=8.2.*
+
+# other recursive dependencies, just
+# pinned here to help speed up solves
+bokeh=3.1.*
+fsspec=2024.5.*
+msgpack-python=1.0.*
+pluggy=1.5.*
+pytz=2024.1
+setuptools=69.5.*
+snappy=1.2.*
+tomli=2.0.*
+tornado=6.4.*
+wheel=0.43.*
+zict=3.0.*
+zipp=3.17.*
diff --git a/.ci/test-python-oldest.sh b/.ci/test-python-oldest.sh
index 7f1c586e1f22..c6de079351e3 100644
--- a/.ci/test-python-oldest.sh
+++ b/.ci/test-python-oldest.sh
@@ -3,19 +3,20 @@
 set -e -E -u -o pipefail
 
 # oldest versions of dependencies published after
-# minimum supported Python version's first release
+# minimum supported Python version's first release,
+# for which there are wheels compatible with the
+# python:{version} image
 #
 # see https://devguide.python.org/versions/
 #
 echo "installing lightgbm's dependencies"
 pip install \
   'cffi==1.15.1' \
-  'dataclasses' \
-  'numpy==1.16.6' \
-  'pandas==0.24.0' \
+  'numpy==1.19.0' \
+  'pandas==1.1.3' \
   'pyarrow==6.0.1' \
-  'scikit-learn==0.18.2' \
-  'scipy==0.19.0' \
+  'scikit-learn==0.24.0' \
+  'scipy==1.6.0' \
 || exit 1
 echo "done installing lightgbm's dependencies"
 
diff --git a/.ci/test.sh b/.ci/test.sh
index 675b86571dfc..8ac54bc7ac9e 100755
--- a/.ci/test.sh
+++ b/.ci/test.sh
@@ -132,6 +132,8 @@ fi
 
 if [[ $PYTHON_VERSION == "3.7" ]]; then
     CONDA_REQUIREMENT_FILES="--file ${BUILD_DIRECTORY}/.ci/conda-envs/ci-core-py37.txt"
+elif [[ $PYTHON_VERSION == "3.8" ]]; then
+    CONDA_REQUIREMENT_FILES="--file ${BUILD_DIRECTORY}/.ci/conda-envs/ci-core-py38.txt"
 else
     CONDA_REQUIREMENT_FILES="--file ${BUILD_DIRECTORY}/.ci/conda-envs/ci-core.txt"
 fi
diff --git a/.ci/test_windows.ps1 b/.ci/test_windows.ps1
index 5a362bac9309..64cf5d048387 100644
--- a/.ci/test_windows.ps1
+++ b/.ci/test_windows.ps1
@@ -55,6 +55,8 @@ conda update -q -y conda
 
 if ($env:PYTHON_VERSION -eq "3.7") {
   $env:CONDA_REQUIREMENT_FILE = "$env:BUILD_SOURCESDIRECTORY/.ci/conda-envs/ci-core-py37.txt"
+} elseif ($env:PYTHON_VERSION -eq "3.8") {
+  $env:CONDA_REQUIREMENT_FILE = "$env:BUILD_SOURCESDIRECTORY/.ci/conda-envs/ci-core-py38.txt"
 } else {
   $env:CONDA_REQUIREMENT_FILE = "$env:BUILD_SOURCESDIRECTORY/.ci/conda-envs/ci-core.txt"
 }
diff --git a/.github/workflows/cuda.yml b/.github/workflows/cuda.yml
index 39ecde4e1e7e..e13e96be3c26 100644
--- a/.github/workflows/cuda.yml
+++ b/.github/workflows/cuda.yml
@@ -82,19 +82,19 @@ jobs:
         include:
           - method: wheel
             compiler: gcc
-            python_version: "3.11"
+            python_version: "3.10"
             cuda_version: "11.8.0"
             image: nvcr.io/nvidia/cuda:11.8.0-devel-ubuntu20.04
             task: cuda
           - method: source
             compiler: gcc
-            python_version: "3.9"
+            python_version: "3.12"
             cuda_version: "12.2.0"
             image: nvcr.io/nvidia/cuda:12.2.0-devel-ubuntu22.04
             task: cuda
           - method: pip
             compiler: clang
-            python_version: "3.10"
+            python_version: "3.11"
             cuda_version: "11.8.0"
             image: nvcr.io/nvidia/cuda:11.8.0-devel-ubuntu20.04
             task: cuda
diff --git a/.github/workflows/linkchecker.yml b/.github/workflows/linkchecker.yml
index cfef16469f1b..b43d151a5cdc 100644
--- a/.github/workflows/linkchecker.yml
+++ b/.github/workflows/linkchecker.yml
@@ -10,7 +10,7 @@ on:
 env:
   CONDA_ENV: test-env
   OS_NAME: 'linux'
-  PYTHON_VERSION: '3.11'
+  PYTHON_VERSION: '3.12'
   TASK: 'check-links'
 
 jobs:
diff --git a/.github/workflows/python_package.yml b/.github/workflows/python_package.yml
index 8941e0c73471..648d911e485a 100644
--- a/.github/workflows/python_package.yml
+++ b/.github/workflows/python_package.yml
@@ -29,30 +29,30 @@ jobs:
         include:
           - os: macos-13
             task: regular
-            python_version: '3.9'
+            python_version: '3.10'
           - os: macos-13
             task: sdist
-            python_version: '3.10'
+            python_version: '3.11'
           - os: macos-13
             task: bdist
-            python_version: '3.7'
+            python_version: '3.8'
           - os: macos-13
             task: if-else
-            python_version: '3.9'
+            python_version: '3.10'
           # We're currently skipping MPI jobs on macOS, see https://github.com/microsoft/LightGBM/pull/6425
           # for further details.
           # - os: macos-13
           #   task: mpi
           #   method: source
-          #   python_version: '3.10'
+          #   python_version: '3.11'
           # - os: macos-13
           #   task: mpi
           #   method: pip
-          #   python_version: '3.11'
+          #   python_version: '3.12'
           # - os: macos-13
           #   task: mpi
           #   method: wheel
-          #   python_version: '3.8'
+          #   python_version: '3.9'
     steps:
       - name: Checkout repository
         uses: actions/checkout@v4
@@ -101,7 +101,7 @@ jobs:
             --rm \
             -v $(pwd):/opt/lgb-build \
             -w /opt/lgb-build \
-            python:3.6 \
+            python:3.7 \
             /bin/bash ./.ci/test-python-oldest.sh
   all-python-package-jobs-successful:
     if: always()
diff --git a/.github/workflows/static_analysis.yml b/.github/workflows/static_analysis.yml
index 6146499a0915..6e0b46b15618 100644
--- a/.github/workflows/static_analysis.yml
+++ b/.github/workflows/static_analysis.yml
@@ -20,7 +20,7 @@ env:
   COMPILER: 'gcc'
   CONDA_ENV: test-env
   OS_NAME: 'linux'
-  PYTHON_VERSION: '3.11'
+  PYTHON_VERSION: '3.12'
 
 jobs:
   test:
diff --git a/.vsts-ci.yml b/.vsts-ci.yml
index 67be0dd4082c..24ec95f45d07 100644
--- a/.vsts-ci.yml
+++ b/.vsts-ci.yml
@@ -10,8 +10,8 @@ pr:
 - release/*
 variables:
   AZURE: 'true'
-  PYTHON_VERSION: '3.11'
   CMAKE_BUILD_PARALLEL_LEVEL: 4
+  PYTHON_VERSION: '3.12'
   CONDA_ENV: test-env
   runCodesignValidationInjection: false
   skipComponentGovernanceDetection: true
@@ -62,19 +62,19 @@ jobs:
     matrix:
       regular:
         TASK: regular
-        PYTHON_VERSION: '3.9'
+        PYTHON_VERSION: '3.10'
       sdist:
         TASK: sdist
-        PYTHON_VERSION: '3.7'
+        PYTHON_VERSION: '3.8'
       bdist:
         TASK: bdist
-        PYTHON_VERSION: '3.8'
+        PYTHON_VERSION: '3.9'
       inference:
         TASK: if-else
       mpi_source:
         TASK: mpi
         METHOD: source
-        PYTHON_VERSION: '3.8'
+        PYTHON_VERSION: '3.9'
       gpu_source:
         TASK: gpu
         METHOD: source
@@ -127,7 +127,7 @@ jobs:
         TASK: sdist
       bdist:
         TASK: bdist
-        PYTHON_VERSION: '3.9'
+        PYTHON_VERSION: '3.10'
       inference:
         TASK: if-else
       mpi_source:
@@ -136,23 +136,23 @@ jobs:
       mpi_pip:
         TASK: mpi
         METHOD: pip
-        PYTHON_VERSION: '3.10'
+        PYTHON_VERSION: '3.11'
       mpi_wheel:
         TASK: mpi
         METHOD: wheel
-        PYTHON_VERSION: '3.8'
+        PYTHON_VERSION: '3.9'
       gpu_source:
         TASK: gpu
         METHOD: source
-        PYTHON_VERSION: '3.10'
+        PYTHON_VERSION: '3.11'
       gpu_pip:
         TASK: gpu
         METHOD: pip
-        PYTHON_VERSION: '3.9'
+        PYTHON_VERSION: '3.10'
       gpu_wheel:
         TASK: gpu
         METHOD: wheel
-        PYTHON_VERSION: '3.8'
+        PYTHON_VERSION: '3.9'
       cpp_tests:
         TASK: cpp-tests
         METHOD: with-sanitizers
diff --git a/python-package/pyproject.toml b/python-package/pyproject.toml
index 387fbd2e407a..b7bff79edfc8 100644
--- a/python-package/pyproject.toml
+++ b/python-package/pyproject.toml
@@ -15,10 +15,10 @@ classifiers = [
     "Programming Language :: Python :: 3.9",
     "Programming Language :: Python :: 3.10",
     "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
     "Topic :: Scientific/Engineering :: Artificial Intelligence"
 ]
 dependencies = [
-    "dataclasses ; python_version < '3.7'",
     "numpy",
     "scipy"
 ]
@@ -29,7 +29,7 @@ maintainers = [
 ]
 name = "lightgbm"
 readme = "README.rst"
-requires-python = ">=3.6"
+requires-python = ">=3.7"
 version = "4.3.0.99"
 
 [project.optional-dependencies]

From ceb9986192e19a6fc41d75a58b5f5f824fd2bbff Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Tue, 28 May 2024 16:06:22 -0500
Subject: [PATCH 17/41] [ci] further simplify CI configurations (#6463)

---
 .appveyor.yml                         |  9 +++----
 .ci/check_python_dists.sh             |  5 ++--
 .ci/get_workflow_status.py            |  6 +----
 .ci/setup.sh                          | 14 +++--------
 .ci/test.sh                           |  1 +
 .ci/test_r_package_valgrind.sh        |  7 +++++-
 .ci/test_windows.ps1                  |  7 +-----
 .github/workflows/cuda.yml            | 12 ++++-----
 .github/workflows/linkchecker.yml     |  1 -
 .github/workflows/optional_checks.yml |  1 -
 .github/workflows/python_package.yml  |  2 --
 .github/workflows/r_package.yml       |  1 -
 .github/workflows/static_analysis.yml |  2 --
 .vsts-ci.yml                          | 35 +++++++++++----------------
 14 files changed, 39 insertions(+), 64 deletions(-)

diff --git a/.appveyor.yml b/.appveyor.yml
index da16fd6e50f8..3c12ebaa36f8 100644
--- a/.appveyor.yml
+++ b/.appveyor.yml
@@ -2,15 +2,13 @@ version: 4.3.0.99.{build}
 
 image: Visual Studio 2015
 platform: x64
-configuration:  # a trick to construct a build matrix with multiple Python versions
+configuration:
   - '3.8'
 
-# only build pull requests and
-# commits to 'master' or any branch starting with 'release'
+# only build on 'master' and pull requests targeting it
 branches:
   only:
     - master
-    - /^release/
 
 environment:
   matrix:
@@ -25,8 +23,9 @@ install:
   - git submodule update --init --recursive  # get `external_libs` folder
   - set PATH=C:\mingw-w64\x86_64-8.1.0-posix-seh-rt_v6-rev0\mingw64\bin;%PATH%
   - set PYTHON_VERSION=%CONFIGURATION%
-  - set CONDA_ENV="test-env"
   - ps: |
+      $env:ALLOW_SKIP_ARROW_TESTS = "1"
+      $env:APPVEYOR = "true"
       $env:CMAKE_BUILD_PARALLEL_LEVEL = 4
       $env:MINICONDA = "C:\Miniconda3-x64"
       $env:PATH = "$env:MINICONDA;$env:MINICONDA\Scripts;$env:PATH"
diff --git a/.ci/check_python_dists.sh b/.ci/check_python_dists.sh
index 59547722508b..c291bdc38745 100644
--- a/.ci/check_python_dists.sh
+++ b/.ci/check_python_dists.sh
@@ -26,11 +26,12 @@ fi
 PY_MINOR_VER=$(python -c "import sys; print(sys.version_info.minor)")
 if [ $PY_MINOR_VER -gt 7 ]; then
     echo "pydistcheck..."
-    pip install pydistcheck
+    pip install 'pydistcheck>=0.7.0'
     if { test "${TASK}" = "cuda" || test "${METHOD}" = "wheel"; }; then
         pydistcheck \
             --inspect \
-            --ignore 'compiled-objects-have-debug-symbols,distro-too-large-compressed' \
+            --ignore 'compiled-objects-have-debug-symbols'\
+            --ignore 'distro-too-large-compressed' \
             --max-allowed-size-uncompressed '100M' \
             --max-allowed-files 800 \
             ${DIST_DIR}/* || exit 1
diff --git a/.ci/get_workflow_status.py b/.ci/get_workflow_status.py
index b2cb714b3e36..e2e8926bd692 100644
--- a/.ci/get_workflow_status.py
+++ b/.ci/get_workflow_status.py
@@ -10,11 +10,7 @@
 from os import environ
 from sys import argv, exit
 from time import sleep
-
-try:
-    from urllib import request
-except ImportError:
-    import urllib2 as request
+from urllib import request
 
 
 def get_runs(trigger_phrase):
diff --git a/.ci/setup.sh b/.ci/setup.sh
index 104648789bf8..7fe54db9c2fc 100755
--- a/.ci/setup.sh
+++ b/.ci/setup.sh
@@ -30,10 +30,6 @@ if [[ $OS_NAME == "macos" ]]; then
     if [[ $TASK == "swig" ]]; then
         brew install swig
     fi
-    curl \
-        -sL \
-        -o miniforge.sh \
-        https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-MacOSX-${ARCH}.sh
 else  # Linux
     if [[ $IN_UBUNTU_BASE_CONTAINER == "true" ]]; then
         # fixes error "unable to initialize frontend: Dialog"
@@ -144,16 +140,14 @@ else  # Linux
         apt-get install --no-install-recommends -y \
             cmake
     fi
-    if [[ $SETUP_CONDA != "false" ]]; then
-        curl \
-            -sL \
-            -o miniforge.sh \
-            https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-Linux-${ARCH}.sh
-    fi
 fi
 
 if [[ "${TASK}" != "r-package" ]] && [[ "${TASK}" != "r-rchk" ]]; then
     if [[ $SETUP_CONDA != "false" ]]; then
+        curl \
+            -sL \
+            -o miniforge.sh \
+            https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-$(uname)-${ARCH}.sh
         sh miniforge.sh -b -p $CONDA
     fi
     conda config --set always_yes yes --set changeps1 no
diff --git a/.ci/test.sh b/.ci/test.sh
index 8ac54bc7ac9e..1b0d5a745ae0 100755
--- a/.ci/test.sh
+++ b/.ci/test.sh
@@ -3,6 +3,7 @@
 set -e -E -o -u pipefail
 
 # defaults
+CONDA_ENV="test-env"
 IN_UBUNTU_BASE_CONTAINER=${IN_UBUNTU_BASE_CONTAINER:-"false"}
 METHOD=${METHOD:-""}
 PRODUCES_ARTIFACTS=${PRODUCES_ARTIFACTS:-"false"}
diff --git a/.ci/test_r_package_valgrind.sh b/.ci/test_r_package_valgrind.sh
index 97a7ac942cfe..5e3d977e2f8b 100755
--- a/.ci/test_r_package_valgrind.sh
+++ b/.ci/test_r_package_valgrind.sh
@@ -72,10 +72,14 @@ bytes_possibly_lost=$(
     | tr -d ","
 )
 echo "valgrind found ${bytes_possibly_lost} bytes possibly lost"
-if [[ ${bytes_possibly_lost} -gt 1056 ]]; then
+if [[ ${bytes_possibly_lost} -gt 1104 ]]; then
     exit 1
 fi
 
+# ensure 'grep --count' doesn't cause failures
+set +e
+
+echo "checking for invalid reads"
 invalid_reads=$(
   cat ${VALGRIND_LOGS_FILE} \
     | grep --count -i "Invalid read"
@@ -85,6 +89,7 @@ if [[ ${invalid_reads} -gt 0 ]]; then
     exit 1
 fi
 
+echo "checking for invalid writes"
 invalid_writes=$(
   cat ${VALGRIND_LOGS_FILE} \
     | grep --count -i "Invalid write"
diff --git a/.ci/test_windows.ps1 b/.ci/test_windows.ps1
index 64cf5d048387..fe006601b236 100644
--- a/.ci/test_windows.ps1
+++ b/.ci/test_windows.ps1
@@ -6,14 +6,9 @@ function Check-Output {
   }
 }
 
+$env:CONDA_ENV = "test-env"
 $env:LGB_VER = (Get-Content $env:BUILD_SOURCESDIRECTORY\VERSION.txt).trim()
 
-# unify environment variable for Azure DevOps and AppVeyor
-if (Test-Path env:APPVEYOR) {
-  $env:APPVEYOR = "true"
-  $env:ALLOW_SKIP_ARROW_TESTS = "1"
-}
-
 if ($env:TASK -eq "r-package") {
   & $env:BUILD_SOURCESDIRECTORY\.ci\test_r_package_windows.ps1 ; Check-Output $?
   Exit 0
diff --git a/.github/workflows/cuda.yml b/.github/workflows/cuda.yml
index e13e96be3c26..a6005ec0344b 100644
--- a/.github/workflows/cuda.yml
+++ b/.github/workflows/cuda.yml
@@ -7,7 +7,6 @@ on:
   pull_request:
     branches:
     - master
-    - release/*
   # Run manually by clicking a button in the UI
   workflow_dispatch:
     inputs:
@@ -59,16 +58,15 @@ jobs:
         run: |
           exit 0
   test:
-    name: ${{ matrix.task }} ${{ matrix.cuda_version }} ${{ matrix.method }} (linux, ${{ matrix.compiler }}, Python ${{ matrix.python_version }})
+    name: ${{ matrix.task }} ${{ matrix.cuda_version }} ${{ matrix.method }} (${{ matrix.linux_version }}, ${{ matrix.compiler }}, Python ${{ matrix.python_version }})
     runs-on: [self-hosted, linux]
     needs: [restart-docker]
     container:
-      image: ${{ matrix.image }}
+      image: nvcr.io/nvidia/cuda:${{ matrix.cuda_version }}-devel-${{ matrix.linux_version }}
       env:
         CMAKE_BUILD_PARALLEL_LEVEL: 4
         COMPILER: ${{ matrix.compiler }}
         CONDA: /tmp/miniforge
-        CONDA_ENV: test-env
         DEBIAN_FRONTEND: noninteractive
         METHOD: ${{ matrix.method }}
         OS_NAME: linux
@@ -84,19 +82,19 @@ jobs:
             compiler: gcc
             python_version: "3.10"
             cuda_version: "11.8.0"
-            image: nvcr.io/nvidia/cuda:11.8.0-devel-ubuntu20.04
+            linux_version: "ubuntu20.04"
             task: cuda
           - method: source
             compiler: gcc
             python_version: "3.12"
             cuda_version: "12.2.0"
-            image: nvcr.io/nvidia/cuda:12.2.0-devel-ubuntu22.04
+            linux_version: "ubuntu22.04"
             task: cuda
           - method: pip
             compiler: clang
             python_version: "3.11"
             cuda_version: "11.8.0"
-            image: nvcr.io/nvidia/cuda:11.8.0-devel-ubuntu20.04
+            linux_version: "ubuntu20.04"
             task: cuda
     steps:
       - name: Install latest git
diff --git a/.github/workflows/linkchecker.yml b/.github/workflows/linkchecker.yml
index b43d151a5cdc..5055d5ca9399 100644
--- a/.github/workflows/linkchecker.yml
+++ b/.github/workflows/linkchecker.yml
@@ -8,7 +8,6 @@ on:
     - cron: '0 8 * * *'
 
 env:
-  CONDA_ENV: test-env
   OS_NAME: 'linux'
   PYTHON_VERSION: '3.12'
   TASK: 'check-links'
diff --git a/.github/workflows/optional_checks.yml b/.github/workflows/optional_checks.yml
index 10d6bd27c8c3..fe20fb1f53df 100644
--- a/.github/workflows/optional_checks.yml
+++ b/.github/workflows/optional_checks.yml
@@ -4,7 +4,6 @@ on:
   pull_request:
     branches:
       - master
-      - release/*
 
 jobs:
   all-optional-checks-successful:
diff --git a/.github/workflows/python_package.yml b/.github/workflows/python_package.yml
index 648d911e485a..73fbc78a2ea1 100644
--- a/.github/workflows/python_package.yml
+++ b/.github/workflows/python_package.yml
@@ -7,7 +7,6 @@ on:
   pull_request:
     branches:
     - master
-    - release/*
 
 # automatically cancel in-progress builds if another commit is pushed
 concurrency:
@@ -16,7 +15,6 @@ concurrency:
 
 env:
   CMAKE_BUILD_PARALLEL_LEVEL: 4
-  CONDA_ENV: test-env
 
 jobs:
   test:
diff --git a/.github/workflows/r_package.yml b/.github/workflows/r_package.yml
index 6f3650a928f0..c72ec7fae112 100644
--- a/.github/workflows/r_package.yml
+++ b/.github/workflows/r_package.yml
@@ -7,7 +7,6 @@ on:
   pull_request:
     branches:
     - master
-    - release/*
 
 # automatically cancel in-progress builds if another commit is pushed
 concurrency:
diff --git a/.github/workflows/static_analysis.yml b/.github/workflows/static_analysis.yml
index 6e0b46b15618..380a71b492d9 100644
--- a/.github/workflows/static_analysis.yml
+++ b/.github/workflows/static_analysis.yml
@@ -9,7 +9,6 @@ on:
   pull_request:
     branches:
     - master
-    - release/*
 
 # automatically cancel in-progress builds if another commit is pushed
 concurrency:
@@ -18,7 +17,6 @@ concurrency:
 
 env:
   COMPILER: 'gcc'
-  CONDA_ENV: test-env
   OS_NAME: 'linux'
   PYTHON_VERSION: '3.12'
 
diff --git a/.vsts-ci.yml b/.vsts-ci.yml
index 24ec95f45d07..9b440cb9fac1 100644
--- a/.vsts-ci.yml
+++ b/.vsts-ci.yml
@@ -7,12 +7,10 @@ trigger:
     - v*
 pr:
 - master
-- release/*
 variables:
   AZURE: 'true'
   CMAKE_BUILD_PARALLEL_LEVEL: 4
   PYTHON_VERSION: '3.12'
-  CONDA_ENV: test-env
   runCodesignValidationInjection: false
   skipComponentGovernanceDetection: true
   DOTNET_CLI_TELEMETRY_OPTOUT: true
@@ -187,8 +185,8 @@ jobs:
 - job: QEMU_multiarch
 ###########################################
   variables:
+    BUILD_DIRECTORY: /LightGBM
     COMPILER: gcc
-    OS_NAME: 'linux'
     PRODUCES_ARTIFACTS: 'true'
   pool:
     vmImage: ubuntu-22.04
@@ -214,25 +212,12 @@ jobs:
       git clean -d -f -x
     displayName: 'Clean source directory'
   - script: |
-      export ROOT_DOCKER_FOLDER=/LightGBM
-      cat > docker.env <<EOF
-      AZURE=$AZURE
-      OS_NAME=$OS_NAME
-      COMPILER=$COMPILER
-      TASK=$TASK
-      METHOD=$METHOD
-      CONDA_ENV=$CONDA_ENV
-      PYTHON_VERSION=$PYTHON_VERSION
-      BUILD_DIRECTORY=$ROOT_DOCKER_FOLDER
-      PRODUCES_ARTIFACTS=$PRODUCES_ARTIFACTS
-      BUILD_ARTIFACTSTAGINGDIRECTORY=$BUILD_ARTIFACTSTAGINGDIRECTORY
-      EOF
       cat > docker-script.sh <<EOF
       export CONDA=\$HOME/miniforge
       export PATH=\$CONDA/bin:/opt/rh/llvm-toolset-7.0/root/usr/bin:\$PATH
       export LD_LIBRARY_PATH=/opt/rh/llvm-toolset-7.0/root/usr/lib64:\$LD_LIBRARY_PATH
-      $ROOT_DOCKER_FOLDER/.ci/setup.sh || exit 1
-      $ROOT_DOCKER_FOLDER/.ci/test.sh || exit 1
+      \$BUILD_DIRECTORY/.ci/setup.sh || exit 1
+      \$BUILD_DIRECTORY/.ci/test.sh || exit 1
       EOF
       IMAGE_URI="lightgbm/vsts-agent:manylinux2014_aarch64"
       docker pull "${IMAGE_URI}" || exit 1
@@ -241,11 +226,19 @@ jobs:
       docker run \
         --platform "${PLATFORM}" \
         --rm \
-        --env-file docker.env \
-        -v "$(Build.SourcesDirectory)":"$ROOT_DOCKER_FOLDER" \
+        --env AZURE=true \
+        --env BUILD_ARTIFACTSTAGINGDIRECTORY=$BUILD_ARTIFACTSTAGINGDIRECTORY \
+        --env BUILD_DIRECTORY=$BUILD_DIRECTORY \
+        --env COMPILER=$COMPILER \
+        --env METHOD=$METHOD \
+        --env OS_NAME=linux \
+        --env PRODUCES_ARTIFACTS=$PRODUCES_ARTIFACTS \
+        --env PYTHON_VERSION=$PYTHON_VERSION \
+        --env TASK=$TASK \
+        -v "$(Build.SourcesDirectory)":"$BUILD_DIRECTORY" \
         -v "$(Build.ArtifactStagingDirectory)":"$(Build.ArtifactStagingDirectory)" \
         "${IMAGE_URI}" \
-        /bin/bash $ROOT_DOCKER_FOLDER/docker-script.sh
+        /bin/bash $BUILD_DIRECTORY/docker-script.sh
     displayName: 'Setup and run tests'
   - task: PublishBuildArtifacts@1
     condition: and(succeeded(), in(variables['TASK'], 'bdist'), not(startsWith(variables['Build.SourceBranch'], 'refs/pull/')))

From f6c8f5d8a18db057616d4010bf844f39236d4e8e Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Tue, 28 May 2024 19:12:42 -0500
Subject: [PATCH 18/41] [python-package] make LGBMDeprecationWarning inherit
 from FutureWarning (#6447)

---
 python-package/lightgbm/basic.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/python-package/lightgbm/basic.py b/python-package/lightgbm/basic.py
index ee55b642ffa0..8e33d0ab56f2 100644
--- a/python-package/lightgbm/basic.py
+++ b/python-package/lightgbm/basic.py
@@ -556,7 +556,8 @@ class LightGBMError(Exception):
 
 
 # DeprecationWarning is not shown by default, so let's create our own with higher level
-class LGBMDeprecationWarning(UserWarning):
+# ref: https://peps.python.org/pep-0565/#additional-use-case-for-futurewarning
+class LGBMDeprecationWarning(FutureWarning):
     """Custom deprecation warning."""
 
     pass

From dee8a1888915e6f08582eabb38dc55c408f77aa4 Mon Sep 17 00:00:00 2001
From: david-cortes <david.cortes.rivera@gmail.com>
Date: Wed, 29 May 2024 06:19:38 +0200
Subject: [PATCH 19/41] [R-package] Use ALTREP system to return C++-allocated
 arrays (#6213)

---
 R-package/src/lightgbm_R.cpp | 222 +++++++++++++++++++++++++++++++++--
 1 file changed, 211 insertions(+), 11 deletions(-)

diff --git a/R-package/src/lightgbm_R.cpp b/R-package/src/lightgbm_R.cpp
index 91c7c915abe5..4bef9cefe1e9 100644
--- a/R-package/src/lightgbm_R.cpp
+++ b/R-package/src/lightgbm_R.cpp
@@ -11,6 +11,7 @@
 #include <LightGBM/utils/text_reader.h>
 
 #include <R_ext/Rdynload.h>
+#include <R_ext/Altrep.h>
 
 #define R_NO_REMAP
 #define R_USE_C99_IN_CXX
@@ -24,6 +25,150 @@
 #include <utility>
 #include <vector>
 #include <algorithm>
+#include <type_traits>
+
+R_altrep_class_t lgb_altrepped_char_vec;
+R_altrep_class_t lgb_altrepped_int_arr;
+R_altrep_class_t lgb_altrepped_dbl_arr;
+
+template <class T>
+void delete_cpp_array(SEXP R_ptr) {
+  T *ptr_to_cpp_obj = static_cast<T*>(R_ExternalPtrAddr(R_ptr));
+  delete[] ptr_to_cpp_obj;
+  R_ClearExternalPtr(R_ptr);
+}
+
+void delete_cpp_char_vec(SEXP R_ptr) {
+  std::vector<char> *ptr_to_cpp_obj = static_cast<std::vector<char>*>(R_ExternalPtrAddr(R_ptr));
+  delete ptr_to_cpp_obj;
+  R_ClearExternalPtr(R_ptr);
+}
+
+// Note: MSVC has issues with Altrep classes, so they are disabled for it.
+// See: https://github.com/microsoft/LightGBM/pull/6213#issuecomment-2111025768
+#ifdef _MSC_VER
+#  define LGB_NO_ALTREP
+#endif
+
+#ifndef LGB_NO_ALTREP
+SEXP make_altrepped_raw_vec(void *void_ptr) {
+  std::unique_ptr<std::vector<char>> *ptr_to_cpp_vec = static_cast<std::unique_ptr<std::vector<char>>*>(void_ptr);
+  SEXP R_ptr = PROTECT(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
+  SEXP R_raw = PROTECT(R_new_altrep(lgb_altrepped_char_vec, R_NilValue, R_NilValue));
+
+  R_SetExternalPtrAddr(R_ptr, ptr_to_cpp_vec->get());
+  R_RegisterCFinalizerEx(R_ptr, delete_cpp_char_vec, TRUE);
+  ptr_to_cpp_vec->release();
+
+  R_set_altrep_data1(R_raw, R_ptr);
+  UNPROTECT(2);
+  return R_raw;
+}
+#else
+SEXP make_r_raw_vec(void *void_ptr) {
+  std::unique_ptr<std::vector<char>> *ptr_to_cpp_vec = static_cast<std::unique_ptr<std::vector<char>>*>(void_ptr);
+  R_xlen_t len = ptr_to_cpp_vec->get()->size();
+  SEXP out = PROTECT(Rf_allocVector(RAWSXP, len));
+  std::copy(ptr_to_cpp_vec->get()->begin(), ptr_to_cpp_vec->get()->end(), reinterpret_cast<char*>(RAW(out)));
+  UNPROTECT(1);
+  return out;
+}
+#define make_altrepped_raw_vec make_r_raw_vec
+#endif
+
+std::vector<char>* get_ptr_from_altrepped_raw(SEXP R_raw) {
+  return static_cast<std::vector<char>*>(R_ExternalPtrAddr(R_altrep_data1(R_raw)));
+}
+
+R_xlen_t get_altrepped_raw_len(SEXP R_raw) {
+  return get_ptr_from_altrepped_raw(R_raw)->size();
+}
+
+const void* get_altrepped_raw_dataptr_or_null(SEXP R_raw) {
+  return get_ptr_from_altrepped_raw(R_raw)->data();
+}
+
+void* get_altrepped_raw_dataptr(SEXP R_raw, Rboolean writeable) {
+  return get_ptr_from_altrepped_raw(R_raw)->data();
+}
+
+#ifndef LGB_NO_ALTREP
+template <class T>
+R_altrep_class_t get_altrep_class_for_type() {
+  if (std::is_same<T, double>::value) {
+    return lgb_altrepped_dbl_arr;
+  } else {
+    return lgb_altrepped_int_arr;
+  }
+}
+#else
+template <class T>
+SEXPTYPE get_sexptype_class_for_type() {
+  if (std::is_same<T, double>::value) {
+    return REALSXP;
+  } else {
+    return INTSXP;
+  }
+}
+
+template <class T>
+T* get_r_vec_ptr(SEXP x) {
+  if (std::is_same<T, double>::value) {
+    return static_cast<T*>(static_cast<void*>(REAL(x)));
+  } else {
+    return static_cast<T*>(static_cast<void*>(INTEGER(x)));
+  }
+}
+#endif
+
+template <class T>
+struct arr_and_len {
+  T *arr;
+  int64_t len;
+};
+
+#ifndef LGB_NO_ALTREP
+template <class T>
+SEXP make_altrepped_vec_from_arr(void *void_ptr) {
+  T *arr = static_cast<arr_and_len<T>*>(void_ptr)->arr;
+  uint64_t len = static_cast<arr_and_len<T>*>(void_ptr)->len;
+  SEXP R_ptr = PROTECT(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
+  SEXP R_len = PROTECT(Rf_allocVector(REALSXP, 1));
+  SEXP R_vec = PROTECT(R_new_altrep(get_altrep_class_for_type<T>(), R_NilValue, R_NilValue));
+
+  REAL(R_len)[0] = static_cast<double>(len);
+  R_SetExternalPtrAddr(R_ptr, arr);
+  R_RegisterCFinalizerEx(R_ptr, delete_cpp_array<T>, TRUE);
+
+  R_set_altrep_data1(R_vec, R_ptr);
+  R_set_altrep_data2(R_vec, R_len);
+  UNPROTECT(3);
+  return R_vec;
+}
+#else
+template <class T>
+SEXP make_R_vec_from_arr(void *void_ptr) {
+  T *arr = static_cast<arr_and_len<T>*>(void_ptr)->arr;
+  uint64_t len = static_cast<arr_and_len<T>*>(void_ptr)->len;
+  SEXP out = PROTECT(Rf_allocVector(get_sexptype_class_for_type<T>(), len));
+  std::copy(arr, arr + len, get_r_vec_ptr<T>(out));
+  UNPROTECT(1);
+  return out;
+}
+#define make_altrepped_vec_from_arr make_R_vec_from_arr
+#endif
+
+R_xlen_t get_altrepped_vec_len(SEXP R_vec) {
+  return static_cast<R_xlen_t>(Rf_asReal(R_altrep_data2(R_vec)));
+}
+
+const void* get_altrepped_vec_dataptr_or_null(SEXP R_vec) {
+  return R_ExternalPtrAddr(R_altrep_data1(R_vec));
+}
+
+void* get_altrepped_vec_dataptr(SEXP R_vec, Rboolean writeable) {
+  return R_ExternalPtrAddr(R_altrep_data1(R_vec));
+}
 
 #define COL_MAJOR (0)
 
@@ -964,8 +1109,6 @@ struct SparseOutputPointers {
   void* indptr;
   int32_t* indices;
   void* data;
-  int indptr_type;
-  int data_type;
   SparseOutputPointers(void* indptr, int32_t* indices, void* data)
   : indptr(indptr), indices(indices), data(data) {}
 };
@@ -1015,15 +1158,26 @@ SEXP LGBM_BoosterPredictSparseOutput_R(SEXP handle,
     &delete_SparseOutputPointers
   };
 
-  SEXP out_indptr_R = safe_R_int(out_len[1], &cont_token);
-  SET_VECTOR_ELT(out, 0, out_indptr_R);
-  SEXP out_indices_R = safe_R_int(out_len[0], &cont_token);
-  SET_VECTOR_ELT(out, 1, out_indices_R);
-  SEXP out_data_R = safe_R_real(out_len[0], &cont_token);
-  SET_VECTOR_ELT(out, 2, out_data_R);
-  std::memcpy(INTEGER(out_indptr_R), out_indptr, out_len[1]*sizeof(int));
-  std::memcpy(INTEGER(out_indices_R), out_indices, out_len[0]*sizeof(int));
-  std::memcpy(REAL(out_data_R), out_data, out_len[0]*sizeof(double));
+  arr_and_len<int> indptr_str{static_cast<int*>(out_indptr), out_len[1]};
+  SET_VECTOR_ELT(
+    out, 0,
+    R_UnwindProtect(make_altrepped_vec_from_arr<int>,
+      static_cast<void*>(&indptr_str), throw_R_memerr, &cont_token, cont_token));
+  pointers_struct->indptr = nullptr;
+
+  arr_and_len<int> indices_str{static_cast<int*>(out_indices), out_len[0]};
+  SET_VECTOR_ELT(
+    out, 1,
+    R_UnwindProtect(make_altrepped_vec_from_arr<int>,
+      static_cast<void*>(&indices_str), throw_R_memerr, &cont_token, cont_token));
+  pointers_struct->indices = nullptr;
+
+  arr_and_len<double> data_str{static_cast<double*>(out_data), out_len[0]};
+  SET_VECTOR_ELT(
+    out, 2,
+    R_UnwindProtect(make_altrepped_vec_from_arr<double>,
+      static_cast<void*>(&data_str), throw_R_memerr, &cont_token, cont_token));
+  pointers_struct->data = nullptr;
 
   UNPROTECT(3);
   return out;
@@ -1104,6 +1258,34 @@ SEXP LGBM_BoosterSaveModel_R(SEXP handle,
   R_API_END();
 }
 
+// Note: for some reason, MSVC crashes when an error is thrown here
+// if the buffer variable is defined as 'std::unique_ptr<std::vector<char>>',
+// but not if it is defined as '<std::vector<char>'.
+#ifndef _MSC_VER
+SEXP LGBM_BoosterSaveModelToString_R(SEXP handle,
+  SEXP num_iteration,
+  SEXP feature_importance_type,
+  SEXP start_iteration) {
+  SEXP cont_token = PROTECT(R_MakeUnwindCont());
+  R_API_BEGIN();
+  _AssertBoosterHandleNotNull(handle);
+  int64_t out_len = 0;
+  int64_t buf_len = 1024 * 1024;
+  int num_iter = Rf_asInteger(num_iteration);
+  int start_iter = Rf_asInteger(start_iteration);
+  int importance_type = Rf_asInteger(feature_importance_type);
+  std::unique_ptr<std::vector<char>> inner_char_buf(new std::vector<char>(buf_len));
+  CHECK_CALL(LGBM_BoosterSaveModelToString(R_ExternalPtrAddr(handle), start_iter, num_iter, importance_type, buf_len, &out_len, inner_char_buf->data()));
+  inner_char_buf->resize(out_len);
+  if (out_len > buf_len) {
+    CHECK_CALL(LGBM_BoosterSaveModelToString(R_ExternalPtrAddr(handle), start_iter, num_iter, importance_type, out_len, &out_len, inner_char_buf->data()));
+  }
+  SEXP out = R_UnwindProtect(make_altrepped_raw_vec, &inner_char_buf, throw_R_memerr, &cont_token, cont_token);
+  UNPROTECT(1);
+  return out;
+  R_API_END();
+}
+#else
 SEXP LGBM_BoosterSaveModelToString_R(SEXP handle,
   SEXP num_iteration,
   SEXP feature_importance_type,
@@ -1129,6 +1311,7 @@ SEXP LGBM_BoosterSaveModelToString_R(SEXP handle,
   return model_str;
   R_API_END();
 }
+#endif
 
 SEXP LGBM_BoosterDumpModel_R(SEXP handle,
   SEXP num_iteration,
@@ -1281,4 +1464,21 @@ LIGHTGBM_C_EXPORT void R_init_lightgbm(DllInfo *dll);
 void R_init_lightgbm(DllInfo *dll) {
   R_registerRoutines(dll, NULL, CallEntries, NULL, NULL);
   R_useDynamicSymbols(dll, FALSE);
+
+#ifndef LGB_NO_ALTREP
+  lgb_altrepped_char_vec = R_make_altraw_class("lgb_altrepped_char_vec", "lightgbm", dll);
+  R_set_altrep_Length_method(lgb_altrepped_char_vec, get_altrepped_raw_len);
+  R_set_altvec_Dataptr_method(lgb_altrepped_char_vec, get_altrepped_raw_dataptr);
+  R_set_altvec_Dataptr_or_null_method(lgb_altrepped_char_vec, get_altrepped_raw_dataptr_or_null);
+
+  lgb_altrepped_int_arr = R_make_altinteger_class("lgb_altrepped_int_arr", "lightgbm", dll);
+  R_set_altrep_Length_method(lgb_altrepped_int_arr, get_altrepped_vec_len);
+  R_set_altvec_Dataptr_method(lgb_altrepped_int_arr, get_altrepped_vec_dataptr);
+  R_set_altvec_Dataptr_or_null_method(lgb_altrepped_int_arr, get_altrepped_vec_dataptr_or_null);
+
+  lgb_altrepped_dbl_arr = R_make_altreal_class("lgb_altrepped_dbl_arr", "lightgbm", dll);
+  R_set_altrep_Length_method(lgb_altrepped_dbl_arr, get_altrepped_vec_len);
+  R_set_altvec_Dataptr_method(lgb_altrepped_dbl_arr, get_altrepped_vec_dataptr);
+  R_set_altvec_Dataptr_or_null_method(lgb_altrepped_dbl_arr, get_altrepped_vec_dataptr_or_null);
+#endif
 }

From c07694bac369795a85cd31285471cfa505541a9f Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Wed, 29 May 2024 21:36:01 -0500
Subject: [PATCH 20/41] [cmake] always target Windows SDK v10.x when using
 Visual Studio generators (fixes #6448) (#6451)

---
 CMakeLists.txt | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index ea25c7787098..d4eefbf3d840 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -25,6 +25,14 @@ option(__INTEGRATE_OPENCL "Set to ON if building LightGBM with the OpenCL ICD Lo
 
 cmake_minimum_required(VERSION 3.18)
 
+# If using Visual Studio generators, always target v10.x of the Windows SDK.
+# Doing this avoids lookups that could fall back to very old versions, e.g. by finding
+# outdated registry entries.
+# ref: https://cmake.org/cmake/help/latest/variable/CMAKE_VS_WINDOWS_TARGET_PLATFORM_VERSION.html
+if(CMAKE_GENERATOR MATCHES "Visual Studio")
+    set(CMAKE_SYSTEM_VERSION 10.0 CACHE INTERNAL "target Windows SDK version" FORCE)
+endif()
+
 project(lightgbm LANGUAGES C CXX)
 
 if(BUILD_CPP_TEST)

From 89a470466d0fe8ff0cb0ee4593a121d835e89f8c Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Sun, 2 Jun 2024 22:12:27 -0500
Subject: [PATCH 21/41] [ci] [R-package] add new linters from {lintr} 3.1.1
 (#6471)

---
 .ci/lint_r_code.R | 3 +++
 .ci/test.sh       | 2 +-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/.ci/lint_r_code.R b/.ci/lint_r_code.R
index d5b1217a5b04..8de09c0ff1ac 100755
--- a/.ci/lint_r_code.R
+++ b/.ci/lint_r_code.R
@@ -52,6 +52,8 @@ LINTERS_TO_USE <- list(
     , "inner_combine"        = lintr::inner_combine_linter()
     , "is_numeric"           = lintr::is_numeric_linter()
     , "lengths"              = lintr::lengths_linter()
+    , "length_levels"        = lintr::length_levels_linter()
+    , "length_test"          = lintr::length_test_linter()
     , "line_length"          = lintr::line_length_linter(length = 120L)
     , "literal_coercion"     = lintr::literal_coercion_linter()
     , "matrix"               = lintr::matrix_apply_linter()
@@ -66,6 +68,7 @@ LINTERS_TO_USE <- list(
     , "redundant_equals"     = lintr::redundant_equals_linter()
     , "regex_subset"         = lintr::regex_subset_linter()
     , "routine_registration" = lintr::routine_registration_linter()
+    , "scalar_in"            = lintr::scalar_in_linter()
     , "semicolon"            = lintr::semicolon_linter()
     , "seq"                  = lintr::seq_linter()
     , "spaces_inside"        = lintr::spaces_inside_linter()
diff --git a/.ci/test.sh b/.ci/test.sh
index 1b0d5a745ae0..c71c54ed906c 100755
--- a/.ci/test.sh
+++ b/.ci/test.sh
@@ -86,7 +86,7 @@ if [[ $TASK == "lint" ]]; then
         'mypy>=1.8.0' \
         'pre-commit>=3.6.0' \
         'pyarrow>=6.0' \
-        'r-lintr>=3.1'
+        'r-lintr>=3.1.2'
     source activate $CONDA_ENV
     echo "Linting Python code"
     bash ${BUILD_DIRECTORY}/.ci/lint-python.sh || exit 1

From ebac9e8e279a25f241c5a1a8af476db0cea84159 Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Sun, 2 Jun 2024 22:33:53 -0500
Subject: [PATCH 22/41] [ci] upgrade linters to latest version (#6472)

---
 .ci/get_workflow_status.py            | 1 +
 .nuget/create_nuget.py                | 1 +
 .pre-commit-config.yaml               | 4 ++--
 docs/conf.py                          | 1 +
 helpers/check_dynamic_dependencies.py | 1 +
 helpers/parameter_generator.py        | 1 +
 python-package/lightgbm/__init__.py   | 1 +
 python-package/lightgbm/basic.py      | 1 +
 python-package/lightgbm/callback.py   | 1 +
 python-package/lightgbm/dask.py       | 1 +
 python-package/lightgbm/engine.py     | 1 +
 python-package/lightgbm/libpath.py    | 1 +
 python-package/lightgbm/plotting.py   | 1 +
 python-package/lightgbm/sklearn.py    | 1 +
 14 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/.ci/get_workflow_status.py b/.ci/get_workflow_status.py
index e2e8926bd692..875345c2efde 100644
--- a/.ci/get_workflow_status.py
+++ b/.ci/get_workflow_status.py
@@ -6,6 +6,7 @@
 
 TRIGGER_PHRASE: Code phrase that triggers workflow.
 """
+
 import json
 from os import environ
 from sys import argv, exit
diff --git a/.nuget/create_nuget.py b/.nuget/create_nuget.py
index ec954dae7ca3..1e6258dbcdb3 100644
--- a/.nuget/create_nuget.py
+++ b/.nuget/create_nuget.py
@@ -1,5 +1,6 @@
 # coding: utf-8
 """Script for generating files with NuGet package metadata."""
+
 import datetime
 import sys
 from pathlib import Path
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 8ffb6a8f8f2f..6c8732a01416 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -13,7 +13,7 @@ exclude: |
 
 repos:
   - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v4.5.0
+    rev: v4.6.0
     hooks:
       - id: end-of-file-fixer
       - id: trailing-whitespace
@@ -25,7 +25,7 @@ repos:
         args: ["--settings-path", "python-package/pyproject.toml"]
   - repo: https://github.com/astral-sh/ruff-pre-commit
     # Ruff version.
-    rev: v0.2.1
+    rev: v0.4.7
     hooks:
       # Run the linter.
       - id: ruff
diff --git a/docs/conf.py b/docs/conf.py
index 2780f47d913f..f8bd29a69922 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -17,6 +17,7 @@
 # add these directories to sys.path here. If the directory is relative to the
 # documentation root, use os.path.abspath to make it absolute.
 """Sphinx configuration file."""
+
 import datetime
 import os
 import sys
diff --git a/helpers/check_dynamic_dependencies.py b/helpers/check_dynamic_dependencies.py
index fcf41ea52701..087061d9575c 100644
--- a/helpers/check_dynamic_dependencies.py
+++ b/helpers/check_dynamic_dependencies.py
@@ -12,6 +12,7 @@
 * GLIBCXX: https://gcc.gnu.org/onlinedocs/libstdc++/manual/abi.html
 * OMP/GOMP: https://github.com/gcc-mirror/gcc/blob/master/libgomp/libgomp.map
 """
+
 import re
 import sys
 from pathlib import Path
diff --git a/helpers/parameter_generator.py b/helpers/parameter_generator.py
index fb1749638bca..15db36da2bb2 100644
--- a/helpers/parameter_generator.py
+++ b/helpers/parameter_generator.py
@@ -6,6 +6,7 @@
 along with parameters description in LightGBM/docs/Parameters.rst file
 from the information in LightGBM/include/LightGBM/config.h file.
 """
+
 import re
 from collections import defaultdict
 from pathlib import Path
diff --git a/python-package/lightgbm/__init__.py b/python-package/lightgbm/__init__.py
index ae38a6169a22..600f71284159 100644
--- a/python-package/lightgbm/__init__.py
+++ b/python-package/lightgbm/__init__.py
@@ -3,6 +3,7 @@
 
 Contributors: https://github.com/microsoft/LightGBM/graphs/contributors.
 """
+
 from pathlib import Path
 
 from .basic import Booster, Dataset, Sequence, register_logger
diff --git a/python-package/lightgbm/basic.py b/python-package/lightgbm/basic.py
index 8e33d0ab56f2..5bfb8dcbbb58 100644
--- a/python-package/lightgbm/basic.py
+++ b/python-package/lightgbm/basic.py
@@ -1,5 +1,6 @@
 # coding: utf-8
 """Wrapper for C API of LightGBM."""
+
 import abc
 import ctypes
 import inspect
diff --git a/python-package/lightgbm/callback.py b/python-package/lightgbm/callback.py
index e776ea953bd1..ae1e72c549d4 100644
--- a/python-package/lightgbm/callback.py
+++ b/python-package/lightgbm/callback.py
@@ -1,5 +1,6 @@
 # coding: utf-8
 """Callbacks library."""
+
 from collections import OrderedDict
 from dataclasses import dataclass
 from functools import partial
diff --git a/python-package/lightgbm/dask.py b/python-package/lightgbm/dask.py
index 928fe51bddce..e15979bc40db 100644
--- a/python-package/lightgbm/dask.py
+++ b/python-package/lightgbm/dask.py
@@ -6,6 +6,7 @@
 
 It is based on dask-lightgbm, which was based on dask-xgboost.
 """
+
 import operator
 import socket
 from collections import defaultdict
diff --git a/python-package/lightgbm/engine.py b/python-package/lightgbm/engine.py
index 74b211f4a426..5f93824458d4 100644
--- a/python-package/lightgbm/engine.py
+++ b/python-package/lightgbm/engine.py
@@ -1,5 +1,6 @@
 # coding: utf-8
 """Library with training routines of LightGBM."""
+
 import copy
 import json
 import warnings
diff --git a/python-package/lightgbm/libpath.py b/python-package/lightgbm/libpath.py
index 09eb946e3c45..a55e7362ab44 100644
--- a/python-package/lightgbm/libpath.py
+++ b/python-package/lightgbm/libpath.py
@@ -1,5 +1,6 @@
 # coding: utf-8
 """Find the path to LightGBM dynamic library files."""
+
 from pathlib import Path
 from platform import system
 from typing import List
diff --git a/python-package/lightgbm/plotting.py b/python-package/lightgbm/plotting.py
index 9bcc1b928ffd..1c7bda5c68b6 100644
--- a/python-package/lightgbm/plotting.py
+++ b/python-package/lightgbm/plotting.py
@@ -1,5 +1,6 @@
 # coding: utf-8
 """Plotting library."""
+
 import math
 from copy import deepcopy
 from io import BytesIO
diff --git a/python-package/lightgbm/sklearn.py b/python-package/lightgbm/sklearn.py
index 1ea7b47c5462..9f1a62f542ca 100644
--- a/python-package/lightgbm/sklearn.py
+++ b/python-package/lightgbm/sklearn.py
@@ -1,5 +1,6 @@
 # coding: utf-8
 """Scikit-learn wrapper interface for LightGBM."""
+
 import copy
 from inspect import signature
 from pathlib import Path

From e0cda880fc74ca6d1b7d6cb425a24e3a69764bb1 Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Mon, 3 Jun 2024 20:17:40 -0500
Subject: [PATCH 23/41] [python-package] remove uses of deprecated NumPy random
 number generation APIs, require 'numpy>=1.17.0' (#6468)

---
 .gitignore                                   |   2 +-
 docs/Python-Intro.rst                        |  14 +-
 examples/python-guide/logistic_regression.py |   9 +-
 python-package/lightgbm/compat.py            |  12 --
 python-package/lightgbm/sklearn.py           |   5 +-
 python-package/pyproject.toml                |   4 +-
 tests/python_package_test/conftest.py        |  12 ++
 tests/python_package_test/test_basic.py      | 107 +++++-----
 tests/python_package_test/test_engine.py     | 212 +++++++++----------
 tests/python_package_test/test_sklearn.py    |  64 +++---
 10 files changed, 223 insertions(+), 218 deletions(-)
 create mode 100644 tests/python_package_test/conftest.py

diff --git a/.gitignore b/.gitignore
index 9403475cc190..efa59fdfc962 100644
--- a/.gitignore
+++ b/.gitignore
@@ -405,7 +405,7 @@ python-package/lightgbm/VERSION.txt
 
 # R build artefacts
 **/autom4te.cache/
-conftest*
+R-package/conftest*
 R-package/config.status
 !R-package/data/agaricus.test.rda
 !R-package/data/agaricus.train.rda
diff --git a/docs/Python-Intro.rst b/docs/Python-Intro.rst
index 3c1cb1557e3f..a1c62c5858e4 100644
--- a/docs/Python-Intro.rst
+++ b/docs/Python-Intro.rst
@@ -59,8 +59,9 @@ Many of the examples in this page use functionality from ``numpy``. To run the e
 
 .. code:: python
 
-    data = np.random.rand(500, 10)  # 500 entities, each contains 10 features
-    label = np.random.randint(2, size=500)  # binary target
+    rng = np.random.default_rng()
+    data = rng.uniform(size=(500, 10))  # 500 entities, each contains 10 features
+    label = rng.integers(low=0, high=2, size=(500, ))  # binary target
     train_data = lgb.Dataset(data, label=label)
 
 **To load a scipy.sparse.csr\_matrix array into Dataset:**
@@ -139,7 +140,8 @@ It doesn't need to convert to one-hot encoding, and is much faster than one-hot
 
 .. code:: python
 
-    w = np.random.rand(500, )
+    rng = np.random.default_rng()
+    w = rng.uniform(size=(500, ))
     train_data = lgb.Dataset(data, label=label, weight=w)
 
 or
@@ -147,7 +149,8 @@ or
 .. code:: python
 
     train_data = lgb.Dataset(data, label=label)
-    w = np.random.rand(500, )
+    rng = np.random.default_rng()
+    w = rng.uniform(size=(500, ))
     train_data.set_weight(w)
 
 And you can use ``Dataset.set_init_score()`` to set initial score, and ``Dataset.set_group()`` to set group/query data for ranking tasks.
@@ -249,7 +252,8 @@ A model that has been trained or loaded can perform predictions on datasets:
 .. code:: python
 
     # 7 entities, each contains 10 features
-    data = np.random.rand(7, 10)
+    rng = np.random.default_rng()
+    data = rng.uniform(size=(7, 10))
     ypred = bst.predict(data)
 
 If early stopping is enabled during training, you can get predictions from the best iteration with ``bst.best_iteration``:
diff --git a/examples/python-guide/logistic_regression.py b/examples/python-guide/logistic_regression.py
index ea02382eb293..c73155da59ef 100644
--- a/examples/python-guide/logistic_regression.py
+++ b/examples/python-guide/logistic_regression.py
@@ -22,15 +22,15 @@
 #################
 # Simulate some binary data with a single categorical and
 #   single continuous predictor
-np.random.seed(0)
+rng = np.random.default_rng(seed=0)
 N = 1000
 X = pd.DataFrame({"continuous": range(N), "categorical": np.repeat([0, 1, 2, 3, 4], N / 5)})
 CATEGORICAL_EFFECTS = [-1, -1, -2, -2, 2]
 LINEAR_TERM = np.array(
     [-0.5 + 0.01 * X["continuous"][k] + CATEGORICAL_EFFECTS[X["categorical"][k]] for k in range(X.shape[0])]
-) + np.random.normal(0, 1, X.shape[0])
+) + rng.normal(loc=0, scale=1, size=X.shape[0])
 TRUE_PROB = expit(LINEAR_TERM)
-Y = np.random.binomial(1, TRUE_PROB, size=N)
+Y = rng.binomial(n=1, p=TRUE_PROB, size=N)
 DATA = {
     "X": X,
     "probability_labels": TRUE_PROB,
@@ -65,10 +65,9 @@ def experiment(objective, label_type, data):
     result : dict
         Experiment summary stats.
     """
-    np.random.seed(0)
     nrounds = 5
     lgb_data = data[f"lgb_with_{label_type}_labels"]
-    params = {"objective": objective, "feature_fraction": 1, "bagging_fraction": 1, "verbose": -1}
+    params = {"objective": objective, "feature_fraction": 1, "bagging_fraction": 1, "verbose": -1, "seed": 123}
     time_zero = time.time()
     gbm = lgb.train(params, lgb_data, num_boost_round=nrounds)
     y_fitted = gbm.predict(data["X"])
diff --git a/python-package/lightgbm/compat.py b/python-package/lightgbm/compat.py
index 9eed61a66a6c..113960609058 100644
--- a/python-package/lightgbm/compat.py
+++ b/python-package/lightgbm/compat.py
@@ -37,18 +37,6 @@ def __init__(self, *args: Any, **kwargs: Any):
 
     concat = None
 
-"""numpy"""
-try:
-    from numpy.random import Generator as np_random_Generator
-except ImportError:
-
-    class np_random_Generator:  # type: ignore
-        """Dummy class for np.random.Generator."""
-
-        def __init__(self, *args: Any, **kwargs: Any):
-            pass
-
-
 """matplotlib"""
 try:
     import matplotlib  # noqa: F401
diff --git a/python-package/lightgbm/sklearn.py b/python-package/lightgbm/sklearn.py
index 9f1a62f542ca..46f41a428348 100644
--- a/python-package/lightgbm/sklearn.py
+++ b/python-package/lightgbm/sklearn.py
@@ -41,7 +41,6 @@
     _LGBMModelBase,
     _LGBMRegressorBase,
     dt_DataTable,
-    np_random_Generator,
     pd_DataFrame,
 )
 from .engine import train
@@ -476,7 +475,7 @@ def __init__(
         colsample_bytree: float = 1.0,
         reg_alpha: float = 0.0,
         reg_lambda: float = 0.0,
-        random_state: Optional[Union[int, np.random.RandomState, "np.random.Generator"]] = None,
+        random_state: Optional[Union[int, np.random.RandomState, np.random.Generator]] = None,
         n_jobs: Optional[int] = None,
         importance_type: str = "split",
         **kwargs: Any,
@@ -739,7 +738,7 @@ def _process_params(self, stage: str) -> Dict[str, Any]:
 
         if isinstance(params["random_state"], np.random.RandomState):
             params["random_state"] = params["random_state"].randint(np.iinfo(np.int32).max)
-        elif isinstance(params["random_state"], np_random_Generator):
+        elif isinstance(params["random_state"], np.random.Generator):
             params["random_state"] = int(params["random_state"].integers(np.iinfo(np.int32).max))
         if self._n_classes > 2:
             for alias in _ConfigAliases.get("num_class"):
diff --git a/python-package/pyproject.toml b/python-package/pyproject.toml
index b7bff79edfc8..d671b5456ae6 100644
--- a/python-package/pyproject.toml
+++ b/python-package/pyproject.toml
@@ -19,7 +19,7 @@ classifiers = [
     "Topic :: Scientific/Engineering :: Artificial Intelligence"
 ]
 dependencies = [
-    "numpy",
+    "numpy>=1.17.0",
     "scipy"
 ]
 description = "LightGBM Python Package"
@@ -156,6 +156,8 @@ select = [
     "E",
     # pyflakes
     "F",
+    # NumPy-specific rules
+    "NPY",
     # pylint
     "PL",
     # flake8-return: unnecessary assignment before return
diff --git a/tests/python_package_test/conftest.py b/tests/python_package_test/conftest.py
new file mode 100644
index 000000000000..7d9c5b27079f
--- /dev/null
+++ b/tests/python_package_test/conftest.py
@@ -0,0 +1,12 @@
+import numpy as np
+import pytest
+
+
+@pytest.fixture(scope="function")
+def rng():
+    return np.random.default_rng()
+
+
+@pytest.fixture(scope="function")
+def rng_fixed_seed():
+    return np.random.default_rng(seed=42)
diff --git a/tests/python_package_test/test_basic.py b/tests/python_package_test/test_basic.py
index 92f5593ef7c1..e2f379dad9d3 100644
--- a/tests/python_package_test/test_basic.py
+++ b/tests/python_package_test/test_basic.py
@@ -136,7 +136,7 @@ def _create_sequence_from_ndarray(data, num_seq, batch_size):
 @pytest.mark.parametrize("batch_size", [3, None])
 @pytest.mark.parametrize("include_0_and_nan", [False, True])
 @pytest.mark.parametrize("num_seq", [1, 3])
-def test_sequence(tmpdir, sample_count, batch_size, include_0_and_nan, num_seq):
+def test_sequence(tmpdir, sample_count, batch_size, include_0_and_nan, num_seq, rng):
     params = {"bin_construct_sample_cnt": sample_count}
 
     nrow = 50
@@ -175,7 +175,6 @@ def test_sequence(tmpdir, sample_count, batch_size, include_0_and_nan, num_seq):
 
     # Test for validation set.
     # Select some random rows as valid data.
-    rng = np.random.default_rng()  # Pass integer to set seed when needed.
     valid_idx = (rng.random(10) * nrow).astype(np.int32)
     valid_data = data[valid_idx, :]
     valid_X = valid_data[:, :-1]
@@ -201,7 +200,7 @@ def test_sequence(tmpdir, sample_count, batch_size, include_0_and_nan, num_seq):
 
 
 @pytest.mark.parametrize("num_seq", [1, 2])
-def test_sequence_get_data(num_seq):
+def test_sequence_get_data(num_seq, rng):
     nrow = 20
     ncol = 11
     data = np.arange(nrow * ncol, dtype=np.float64).reshape((nrow, ncol))
@@ -212,7 +211,7 @@ def test_sequence_get_data(num_seq):
     seq_ds = lgb.Dataset(seqs, label=Y, params=None, free_raw_data=False).construct()
     assert seq_ds.get_data() == seqs
 
-    used_indices = np.random.choice(np.arange(nrow), nrow // 3, replace=False)
+    used_indices = rng.choice(a=np.arange(nrow), size=nrow // 3, replace=False)
     subset_data = seq_ds.subset(used_indices).construct()
     np.testing.assert_array_equal(subset_data.get_data(), X[sorted(used_indices)])
 
@@ -246,8 +245,8 @@ def test_chunked_dataset_linear():
     valid_data.construct()
 
 
-def test_save_dataset_subset_and_load_from_file(tmp_path):
-    data = np.random.rand(100, 2)
+def test_save_dataset_subset_and_load_from_file(tmp_path, rng):
+    data = rng.standard_normal(size=(100, 2))
     params = {"max_bin": 50, "min_data_in_bin": 10}
     ds = lgb.Dataset(data, params=params)
     ds.subset([1, 2, 3, 5, 8]).save_binary(tmp_path / "subset.bin")
@@ -267,18 +266,18 @@ def test_subset_group():
     assert subset_group[1] == 9
 
 
-def test_add_features_throws_if_num_data_unequal():
-    X1 = np.random.random((100, 1))
-    X2 = np.random.random((10, 1))
+def test_add_features_throws_if_num_data_unequal(rng):
+    X1 = rng.uniform(size=(100, 1))
+    X2 = rng.uniform(size=(10, 1))
     d1 = lgb.Dataset(X1).construct()
     d2 = lgb.Dataset(X2).construct()
     with pytest.raises(lgb.basic.LightGBMError):
         d1.add_features_from(d2)
 
 
-def test_add_features_throws_if_datasets_unconstructed():
-    X1 = np.random.random((100, 1))
-    X2 = np.random.random((100, 1))
+def test_add_features_throws_if_datasets_unconstructed(rng):
+    X1 = rng.uniform(size=(100, 1))
+    X2 = rng.uniform(size=(100, 1))
     with pytest.raises(ValueError):
         d1 = lgb.Dataset(X1)
         d2 = lgb.Dataset(X2)
@@ -293,8 +292,8 @@ def test_add_features_throws_if_datasets_unconstructed():
         d1.add_features_from(d2)
 
 
-def test_add_features_equal_data_on_alternating_used_unused(tmp_path):
-    X = np.random.random((100, 5))
+def test_add_features_equal_data_on_alternating_used_unused(tmp_path, rng):
+    X = rng.uniform(size=(100, 5))
     X[:, [1, 3]] = 0
     names = [f"col_{i}" for i in range(5)]
     for j in range(1, 5):
@@ -313,8 +312,8 @@ def test_add_features_equal_data_on_alternating_used_unused(tmp_path):
         assert dtxt == d1txt
 
 
-def test_add_features_same_booster_behaviour(tmp_path):
-    X = np.random.random((100, 5))
+def test_add_features_same_booster_behaviour(tmp_path, rng):
+    X = rng.uniform(size=(100, 5))
     X[:, [1, 3]] = 0
     names = [f"col_{i}" for i in range(5)]
     for j in range(1, 5):
@@ -322,7 +321,7 @@ def test_add_features_same_booster_behaviour(tmp_path):
         d2 = lgb.Dataset(X[:, j:], feature_name=names[j:]).construct()
         d1.add_features_from(d2)
         d = lgb.Dataset(X, feature_name=names).construct()
-        y = np.random.random(100)
+        y = rng.uniform(size=(100,))
         d1.set_label(y)
         d.set_label(y)
         b1 = lgb.Booster(train_set=d1)
@@ -341,11 +340,11 @@ def test_add_features_same_booster_behaviour(tmp_path):
         assert dtxt == d1txt
 
 
-def test_add_features_from_different_sources():
+def test_add_features_from_different_sources(rng):
     pd = pytest.importorskip("pandas")
     n_row = 100
     n_col = 5
-    X = np.random.random((n_row, n_col))
+    X = rng.uniform(size=(n_row, n_col))
     xxs = [X, sparse.csr_matrix(X), pd.DataFrame(X)]
     names = [f"col_{i}" for i in range(n_col)]
     seq = _create_sequence_from_ndarray(X, 1, 30)
@@ -380,9 +379,9 @@ def test_add_features_from_different_sources():
             assert d1.feature_name == res_feature_names
 
 
-def test_add_features_does_not_fail_if_initial_dataset_has_zero_informative_features(capsys):
+def test_add_features_does_not_fail_if_initial_dataset_has_zero_informative_features(capsys, rng):
     arr_a = np.zeros((100, 1), dtype=np.float32)
-    arr_b = np.random.normal(size=(100, 5))
+    arr_b = rng.uniform(size=(100, 5))
 
     dataset_a = lgb.Dataset(arr_a).construct()
     expected_msg = (
@@ -402,10 +401,10 @@ def test_add_features_does_not_fail_if_initial_dataset_has_zero_informative_feat
     assert dataset_a._handle.value == original_handle
 
 
-def test_cegb_affects_behavior(tmp_path):
-    X = np.random.random((100, 5))
+def test_cegb_affects_behavior(tmp_path, rng):
+    X = rng.uniform(size=(100, 5))
     X[:, [1, 3]] = 0
-    y = np.random.random(100)
+    y = rng.uniform(size=(100,))
     names = [f"col_{i}" for i in range(5)]
     ds = lgb.Dataset(X, feature_name=names).construct()
     ds.set_label(y)
@@ -433,10 +432,10 @@ def test_cegb_affects_behavior(tmp_path):
         assert basetxt != casetxt
 
 
-def test_cegb_scaling_equalities(tmp_path):
-    X = np.random.random((100, 5))
+def test_cegb_scaling_equalities(tmp_path, rng):
+    X = rng.uniform(size=(100, 5))
     X[:, [1, 3]] = 0
-    y = np.random.random(100)
+    y = rng.uniform(size=(100,))
     names = [f"col_{i}" for i in range(5)]
     ds = lgb.Dataset(X, feature_name=names).construct()
     ds.set_label(y)
@@ -573,10 +572,10 @@ def test_dataset_construction_overwrites_user_provided_metadata_fields():
     np_assert_array_equal(dtrain.get_field("weight"), expected_weight, strict=True)
 
 
-def test_dataset_construction_with_high_cardinality_categorical_succeeds():
+def test_dataset_construction_with_high_cardinality_categorical_succeeds(rng):
     pd = pytest.importorskip("pandas")
-    X = pd.DataFrame({"x1": np.random.randint(0, 5_000, 10_000)})
-    y = np.random.rand(10_000)
+    X = pd.DataFrame({"x1": rng.integers(low=0, high=5_000, size=(10_000,))})
+    y = rng.uniform(size=(10_000,))
     ds = lgb.Dataset(X, y, categorical_feature=["x1"])
     ds.construct()
     assert ds.num_data() == 10_000
@@ -663,11 +662,11 @@ def test_choose_param_value_objective(objective_alias):
 
 @pytest.mark.parametrize("collection", ["1d_np", "2d_np", "pd_float", "pd_str", "1d_list", "2d_list"])
 @pytest.mark.parametrize("dtype", [np.float32, np.float64])
-def test_list_to_1d_numpy(collection, dtype):
+def test_list_to_1d_numpy(collection, dtype, rng):
     collection2y = {
-        "1d_np": np.random.rand(10),
-        "2d_np": np.random.rand(10, 1),
-        "pd_float": np.random.rand(10),
+        "1d_np": rng.uniform(size=(10,)),
+        "2d_np": rng.uniform(size=(10, 1)),
+        "pd_float": rng.uniform(size=(10,)),
         "pd_str": ["a", "b"],
         "1d_list": [1] * 10,
         "2d_list": [[1], [2]],
@@ -696,7 +695,7 @@ def test_list_to_1d_numpy(collection, dtype):
 
 
 @pytest.mark.parametrize("init_score_type", ["array", "dataframe", "list"])
-def test_init_score_for_multiclass_classification(init_score_type):
+def test_init_score_for_multiclass_classification(init_score_type, rng):
     init_score = [[i * 10 + j for j in range(3)] for i in range(10)]
     if init_score_type == "array":
         init_score = np.array(init_score)
@@ -704,7 +703,7 @@ def test_init_score_for_multiclass_classification(init_score_type):
         if not PANDAS_INSTALLED:
             pytest.skip("Pandas is not installed.")
         init_score = pd_DataFrame(init_score)
-    data = np.random.rand(10, 2)
+    data = rng.uniform(size=(10, 2))
     ds = lgb.Dataset(data, init_score=init_score).construct()
     np.testing.assert_equal(ds.get_field("init_score"), init_score)
     np.testing.assert_equal(ds.init_score, init_score)
@@ -741,16 +740,20 @@ def test_param_aliases():
 
 
 def _bad_gradients(preds, _):
-    return np.random.randn(len(preds) + 1), np.random.rand(len(preds) + 1)
+    rng = np.random.default_rng()
+    # "bad" = 1 element too many
+    size = (len(preds) + 1,)
+    return rng.standard_normal(size=size), rng.uniform(size=size)
 
 
 def _good_gradients(preds, _):
-    return np.random.randn(*preds.shape), np.random.rand(*preds.shape)
+    rng = np.random.default_rng()
+    return rng.standard_normal(size=preds.shape), rng.uniform(size=preds.shape)
 
 
-def test_custom_objective_safety():
+def test_custom_objective_safety(rng):
     nrows = 100
-    X = np.random.randn(nrows, 5)
+    X = rng.standard_normal(size=(nrows, 5))
     y_binary = np.arange(nrows) % 2
     classes = [0, 1, 2]
     nclass = len(classes)
@@ -771,9 +774,9 @@ def test_custom_objective_safety():
 
 @pytest.mark.parametrize("dtype", [np.float32, np.float64])
 @pytest.mark.parametrize("feature_name", [["x1", "x2"], "auto"])
-def test_no_copy_when_single_float_dtype_dataframe(dtype, feature_name):
+def test_no_copy_when_single_float_dtype_dataframe(dtype, feature_name, rng):
     pd = pytest.importorskip("pandas")
-    X = np.random.rand(10, 2).astype(dtype)
+    X = rng.uniform(size=(10, 2)).astype(dtype)
     df = pd.DataFrame(X)
     built_data = lgb.basic._data_from_pandas(
         data=df, feature_name=feature_name, categorical_feature="auto", pandas_categorical=None
@@ -784,9 +787,9 @@ def test_no_copy_when_single_float_dtype_dataframe(dtype, feature_name):
 
 @pytest.mark.parametrize("feature_name", [["x1"], [42], "auto"])
 @pytest.mark.parametrize("categories", ["seen", "unseen"])
-def test_categorical_code_conversion_doesnt_modify_original_data(feature_name, categories):
+def test_categorical_code_conversion_doesnt_modify_original_data(feature_name, categories, rng):
     pd = pytest.importorskip("pandas")
-    X = np.random.choice(["a", "b"], 100).reshape(-1, 1)
+    X = rng.choice(a=["a", "b"], size=(100, 1))
     column_name = "a" if feature_name == "auto" else feature_name[0]
     df = pd.DataFrame(X.copy(), columns=[column_name], dtype="category")
     if categories == "seen":
@@ -814,15 +817,15 @@ def test_categorical_code_conversion_doesnt_modify_original_data(feature_name, c
 
 
 @pytest.mark.parametrize("min_data_in_bin", [2, 10])
-def test_feature_num_bin(min_data_in_bin):
+def test_feature_num_bin(min_data_in_bin, rng):
     X = np.vstack(
         [
-            np.random.rand(100),
+            rng.uniform(size=(100,)),
             np.array([1, 2] * 50),
             np.array([0, 1, 2] * 33 + [0]),
             np.array([1, 2] * 49 + 2 * [np.nan]),
             np.zeros(100),
-            np.random.choice([0, 1], 100),
+            rng.choice(a=[0, 1], size=(100,)),
         ]
     ).T
     n_continuous = X.shape[1] - 1
@@ -862,9 +865,9 @@ def test_feature_num_bin(min_data_in_bin):
         ds.feature_num_bin(num_features)
 
 
-def test_feature_num_bin_with_max_bin_by_feature():
-    X = np.random.rand(100, 3)
-    max_bin_by_feature = np.random.randint(3, 30, size=X.shape[1])
+def test_feature_num_bin_with_max_bin_by_feature(rng):
+    X = rng.uniform(size=(100, 3))
+    max_bin_by_feature = rng.integers(low=3, high=30, size=X.shape[1])
     ds = lgb.Dataset(X, params={"max_bin_by_feature": max_bin_by_feature}).construct()
     actual_num_bins = [ds.feature_num_bin(i) for i in range(X.shape[1])]
     np.testing.assert_equal(actual_num_bins, max_bin_by_feature)
@@ -882,8 +885,8 @@ def test_set_leaf_output():
     np.testing.assert_allclose(bst.predict(X), y_pred + 1)
 
 
-def test_feature_names_are_set_correctly_when_no_feature_names_passed_into_Dataset():
+def test_feature_names_are_set_correctly_when_no_feature_names_passed_into_Dataset(rng):
     ds = lgb.Dataset(
-        data=np.random.randn(100, 3),
+        data=rng.standard_normal(size=(100, 3)),
     )
     assert ds.construct().feature_name == ["Column_0", "Column_1", "Column_2"]
diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py
index 7b1009632626..a0706e401c71 100644
--- a/tests/python_package_test/test_engine.py
+++ b/tests/python_package_test/test_engine.py
@@ -550,7 +550,7 @@ def test_multi_class_error():
 @pytest.mark.skipif(
     getenv("TASK", "") == "cuda", reason="Skip due to differences in implementation details of CUDA version"
 )
-def test_auc_mu():
+def test_auc_mu(rng):
     # should give same result as binary auc for 2 classes
     X, y = load_digits(n_class=10, return_X_y=True)
     y_new = np.zeros((len(y)))
@@ -578,7 +578,7 @@ def test_auc_mu():
     assert results_auc_mu["training"]["auc_mu"][-1] == pytest.approx(0.5)
     # test that weighted data gives different auc_mu
     lgb_X = lgb.Dataset(X, label=y)
-    lgb_X_weighted = lgb.Dataset(X, label=y, weight=np.abs(np.random.normal(size=y.shape)))
+    lgb_X_weighted = lgb.Dataset(X, label=y, weight=np.abs(rng.standard_normal(size=y.shape)))
     results_unweighted = {}
     results_weighted = {}
     params = dict(params, num_classes=10, num_leaves=5)
@@ -1432,9 +1432,9 @@ def test_feature_name():
     assert feature_names == gbm.feature_name()
 
 
-def test_feature_name_with_non_ascii():
-    X_train = np.random.normal(size=(100, 4))
-    y_train = np.random.random(100)
+def test_feature_name_with_non_ascii(rng):
+    X_train = rng.normal(size=(100, 4))
+    y_train = rng.normal(size=(100,))
     # This has non-ascii strings.
     feature_names = ["F_零", "F_一", "F_二", "F_三"]
     params = {"verbose": -1}
@@ -1448,9 +1448,14 @@ def test_feature_name_with_non_ascii():
     assert feature_names == gbm2.feature_name()
 
 
-def test_parameters_are_loaded_from_model_file(tmp_path, capsys):
-    X = np.hstack([np.random.rand(100, 1), np.random.randint(0, 5, (100, 2))])
-    y = np.random.rand(100)
+def test_parameters_are_loaded_from_model_file(tmp_path, capsys, rng):
+    X = np.hstack(
+        [
+            rng.uniform(size=(100, 1)),
+            rng.integers(low=0, high=5, size=(100, 2)),
+        ]
+    )
+    y = rng.uniform(size=(100,))
     ds = lgb.Dataset(X, y)
     params = {
         "bagging_fraction": 0.8,
@@ -1702,29 +1707,29 @@ def test_all_expected_params_are_written_out_to_model_text(tmp_path):
         assert param_str in model_txt_from_memory
 
 
-def test_pandas_categorical():
+# why fixed seed?
+# sometimes there is no difference how cols are treated (cat or not cat)
+def test_pandas_categorical(rng_fixed_seed):
     pd = pytest.importorskip("pandas")
-    np.random.seed(42)  # sometimes there is no difference how cols are treated (cat or not cat)
     X = pd.DataFrame(
         {
-            "A": np.random.permutation(["a", "b", "c", "d"] * 75),  # str
-            "B": np.random.permutation([1, 2, 3] * 100),  # int
-            "C": np.random.permutation([0.1, 0.2, -0.1, -0.1, 0.2] * 60),  # float
-            "D": np.random.permutation([True, False] * 150),  # bool
-            "E": pd.Categorical(np.random.permutation(["z", "y", "x", "w", "v"] * 60), ordered=True),
+            "A": rng_fixed_seed.permutation(["a", "b", "c", "d"] * 75),  # str
+            "B": rng_fixed_seed.permutation([1, 2, 3] * 100),  # int
+            "C": rng_fixed_seed.permutation([0.1, 0.2, -0.1, -0.1, 0.2] * 60),  # float
+            "D": rng_fixed_seed.permutation([True, False] * 150),  # bool
+            "E": pd.Categorical(rng_fixed_seed.permutation(["z", "y", "x", "w", "v"] * 60), ordered=True),
         }
     )  # str and ordered categorical
-    y = np.random.permutation([0, 1] * 150)
+    y = rng_fixed_seed.permutation([0, 1] * 150)
     X_test = pd.DataFrame(
         {
-            "A": np.random.permutation(["a", "b", "e"] * 20),  # unseen category
-            "B": np.random.permutation([1, 3] * 30),
-            "C": np.random.permutation([0.1, -0.1, 0.2, 0.2] * 15),
-            "D": np.random.permutation([True, False] * 30),
-            "E": pd.Categorical(np.random.permutation(["z", "y"] * 30), ordered=True),
+            "A": rng_fixed_seed.permutation(["a", "b", "e"] * 20),  # unseen category
+            "B": rng_fixed_seed.permutation([1, 3] * 30),
+            "C": rng_fixed_seed.permutation([0.1, -0.1, 0.2, 0.2] * 15),
+            "D": rng_fixed_seed.permutation([True, False] * 30),
+            "E": pd.Categorical(rng_fixed_seed.permutation(["z", "y"] * 30), ordered=True),
         }
     )
-    np.random.seed()  # reset seed
     cat_cols_actual = ["A", "B", "C", "D"]
     cat_cols_to_store = cat_cols_actual + ["E"]
     X[cat_cols_actual] = X[cat_cols_actual].astype("category")
@@ -1786,21 +1791,21 @@ def test_pandas_categorical():
     assert gbm7.pandas_categorical == cat_values
 
 
-def test_pandas_sparse():
+def test_pandas_sparse(rng):
     pd = pytest.importorskip("pandas")
     X = pd.DataFrame(
         {
-            "A": pd.arrays.SparseArray(np.random.permutation([0, 1, 2] * 100)),
-            "B": pd.arrays.SparseArray(np.random.permutation([0.0, 0.1, 0.2, -0.1, 0.2] * 60)),
-            "C": pd.arrays.SparseArray(np.random.permutation([True, False] * 150)),
+            "A": pd.arrays.SparseArray(rng.permutation([0, 1, 2] * 100)),
+            "B": pd.arrays.SparseArray(rng.permutation([0.0, 0.1, 0.2, -0.1, 0.2] * 60)),
+            "C": pd.arrays.SparseArray(rng.permutation([True, False] * 150)),
         }
     )
-    y = pd.Series(pd.arrays.SparseArray(np.random.permutation([0, 1] * 150)))
+    y = pd.Series(pd.arrays.SparseArray(rng.permutation([0, 1] * 150)))
     X_test = pd.DataFrame(
         {
-            "A": pd.arrays.SparseArray(np.random.permutation([0, 2] * 30)),
-            "B": pd.arrays.SparseArray(np.random.permutation([0.0, 0.1, 0.2, -0.1] * 15)),
-            "C": pd.arrays.SparseArray(np.random.permutation([True, False] * 30)),
+            "A": pd.arrays.SparseArray(rng.permutation([0, 2] * 30)),
+            "B": pd.arrays.SparseArray(rng.permutation([0.0, 0.1, 0.2, -0.1] * 15)),
+            "C": pd.arrays.SparseArray(rng.permutation([True, False] * 30)),
         }
     )
     for dtype in pd.concat([X.dtypes, X_test.dtypes, pd.Series(y.dtypes)]):
@@ -1816,9 +1821,9 @@ def test_pandas_sparse():
     np.testing.assert_allclose(pred_sparse, pred_dense)
 
 
-def test_reference_chain():
-    X = np.random.normal(size=(100, 2))
-    y = np.random.normal(size=100)
+def test_reference_chain(rng):
+    X = rng.normal(size=(100, 2))
+    y = rng.normal(size=(100,))
     tmp_dat = lgb.Dataset(X, y)
     # take subsets and train
     tmp_dat_train = tmp_dat.subset(np.arange(80))
@@ -1940,28 +1945,28 @@ def test_contribs_sparse_multiclass():
         np.testing.assert_allclose(contribs_csc_array, contribs_dense)
 
 
-@pytest.mark.skipif(psutil.virtual_memory().available / 1024 / 1024 / 1024 < 3, reason="not enough RAM")
-def test_int32_max_sparse_contribs():
-    params = {"objective": "binary"}
-    train_features = np.random.rand(100, 1000)
-    train_targets = [0] * 50 + [1] * 50
-    lgb_train = lgb.Dataset(train_features, train_targets)
-    gbm = lgb.train(params, lgb_train, num_boost_round=2)
-    csr_input_shape = (3000000, 1000)
-    test_features = csr_matrix(csr_input_shape)
-    for i in range(0, csr_input_shape[0], csr_input_shape[0] // 6):
-        for j in range(0, 1000, 100):
-            test_features[i, j] = random.random()
-    y_pred_csr = gbm.predict(test_features, pred_contrib=True)
-    # Note there is an extra column added to the output for the expected value
-    csr_output_shape = (csr_input_shape[0], csr_input_shape[1] + 1)
-    assert y_pred_csr.shape == csr_output_shape
-    y_pred_csc = gbm.predict(test_features.tocsc(), pred_contrib=True)
-    # Note output CSC shape should be same as CSR output shape
-    assert y_pred_csc.shape == csr_output_shape
-
-
-def test_sliced_data():
+# @pytest.mark.skipif(psutil.virtual_memory().available / 1024 / 1024 / 1024 < 3, reason="not enough RAM")
+# def test_int32_max_sparse_contribs(rng):
+#     params = {"objective": "binary"}
+#     train_features = rng.uniform(size=(100, 1000))
+#     train_targets = [0] * 50 + [1] * 50
+#     lgb_train = lgb.Dataset(train_features, train_targets)
+#     gbm = lgb.train(params, lgb_train, num_boost_round=2)
+#     csr_input_shape = (3000000, 1000)
+#     test_features = csr_matrix(csr_input_shape)
+#     for i in range(0, csr_input_shape[0], csr_input_shape[0] // 6):
+#         for j in range(0, 1000, 100):
+#             test_features[i, j] = random.random()
+#     y_pred_csr = gbm.predict(test_features, pred_contrib=True)
+#     # Note there is an extra column added to the output for the expected value
+#     csr_output_shape = (csr_input_shape[0], csr_input_shape[1] + 1)
+#     assert y_pred_csr.shape == csr_output_shape
+#     y_pred_csc = gbm.predict(test_features.tocsc(), pred_contrib=True)
+#     # Note output CSC shape should be same as CSR output shape
+#     assert y_pred_csc.shape == csr_output_shape
+
+
+def test_sliced_data(rng):
     def train_and_get_predictions(features, labels):
         dataset = lgb.Dataset(features, label=labels)
         lgb_params = {
@@ -1977,7 +1982,7 @@ def train_and_get_predictions(features, labels):
         return gbm.predict(features)
 
     num_samples = 100
-    features = np.random.rand(num_samples, 5)
+    features = rng.uniform(size=(num_samples, 5))
     positive_samples = int(num_samples * 0.25)
     labels = np.append(
         np.ones(positive_samples, dtype=np.float32), np.zeros(num_samples - positive_samples, dtype=np.float32)
@@ -2011,13 +2016,13 @@ def train_and_get_predictions(features, labels):
     np.testing.assert_allclose(origin_pred, sliced_pred)
 
 
-def test_init_with_subset():
-    data = np.random.random((50, 2))
+def test_init_with_subset(rng):
+    data = rng.uniform(size=(50, 2))
     y = [1] * 25 + [0] * 25
     lgb_train = lgb.Dataset(data, y, free_raw_data=False)
-    subset_index_1 = np.random.choice(np.arange(50), 30, replace=False)
+    subset_index_1 = rng.choice(a=np.arange(50), size=30, replace=False)
     subset_data_1 = lgb_train.subset(subset_index_1)
-    subset_index_2 = np.random.choice(np.arange(50), 20, replace=False)
+    subset_index_2 = rng.choice(a=np.arange(50), size=20, replace=False)
     subset_data_2 = lgb_train.subset(subset_index_2)
     params = {"objective": "binary", "verbose": -1}
     init_gbm = lgb.train(params=params, train_set=subset_data_1, num_boost_round=10, keep_training_booster=True)
@@ -2037,9 +2042,9 @@ def test_init_with_subset():
     assert subset_data_4.get_data() == "lgb_train_data.bin"
 
 
-def test_training_on_constructed_subset_without_params():
-    X = np.random.random((100, 10))
-    y = np.random.random(100)
+def test_training_on_constructed_subset_without_params(rng):
+    X = rng.uniform(size=(100, 10))
+    y = rng.uniform(size=(100,))
     lgb_data = lgb.Dataset(X, y)
     subset_indices = [1, 2, 3, 4]
     subset = lgb_data.subset(subset_indices).construct()
@@ -2051,9 +2056,10 @@ def test_training_on_constructed_subset_without_params():
 
 def generate_trainset_for_monotone_constraints_tests(x3_to_category=True):
     number_of_dpoints = 3000
-    x1_positively_correlated_with_y = np.random.random(size=number_of_dpoints)
-    x2_negatively_correlated_with_y = np.random.random(size=number_of_dpoints)
-    x3_negatively_correlated_with_y = np.random.random(size=number_of_dpoints)
+    rng = np.random.default_rng()
+    x1_positively_correlated_with_y = rng.uniform(size=number_of_dpoints)
+    x2_negatively_correlated_with_y = rng.uniform(size=number_of_dpoints)
+    x3_negatively_correlated_with_y = rng.uniform(size=number_of_dpoints)
     x = np.column_stack(
         (
             x1_positively_correlated_with_y,
@@ -2062,8 +2068,8 @@ def generate_trainset_for_monotone_constraints_tests(x3_to_category=True):
         )
     )
 
-    zs = np.random.normal(loc=0.0, scale=0.01, size=number_of_dpoints)
-    scales = 10.0 * (np.random.random(6) + 0.5)
+    zs = rng.normal(loc=0.0, scale=0.01, size=number_of_dpoints)
+    scales = 10.0 * (rng.uniform(size=6) + 0.5)
     y = (
         scales[0] * x1_positively_correlated_with_y
         + np.sin(scales[1] * np.pi * x1_positively_correlated_with_y)
@@ -2265,9 +2271,8 @@ def test_max_bin_by_feature():
     assert len(np.unique(est.predict(X))) == 3
 
 
-def test_small_max_bin():
-    np.random.seed(0)
-    y = np.random.choice([0, 1], 100)
+def test_small_max_bin(rng_fixed_seed):
+    y = rng_fixed_seed.choice([0, 1], 100)
     x = np.ones((100, 1))
     x[:30, 0] = -1
     x[60:, 0] = 2
@@ -2278,7 +2283,6 @@ def test_small_max_bin():
     params["max_bin"] = 3
     lgb_x = lgb.Dataset(x, label=y)
     lgb.train(params, lgb_x, num_boost_round=5)
-    np.random.seed()  # reset seed
 
 
 def test_refit():
@@ -2293,14 +2297,14 @@ def test_refit():
     assert err_pred > new_err_pred
 
 
-def test_refit_dataset_params():
+def test_refit_dataset_params(rng):
     # check refit accepts dataset_params
     X, y = load_breast_cancer(return_X_y=True)
     lgb_train = lgb.Dataset(X, y, init_score=np.zeros(y.size))
     train_params = {"objective": "binary", "verbose": -1, "seed": 123}
     gbm = lgb.train(train_params, lgb_train, num_boost_round=10)
     non_weight_err_pred = log_loss(y, gbm.predict(X))
-    refit_weight = np.random.rand(y.shape[0])
+    refit_weight = rng.uniform(size=(y.shape[0],))
     dataset_params = {
         "max_bin": 260,
         "min_data_in_bin": 5,
@@ -3011,7 +3015,7 @@ def test_model_size():
 @pytest.mark.skipif(
     getenv("TASK", "") == "cuda", reason="Skip due to differences in implementation details of CUDA version"
 )
-def test_get_split_value_histogram():
+def test_get_split_value_histogram(rng_fixed_seed):
     X, y = make_synthetic_regression()
     X = np.repeat(X, 3, axis=0)
     y = np.repeat(y, 3, axis=0)
@@ -3351,7 +3355,7 @@ def test_binning_same_sign():
     assert predicted[1] == pytest.approx(predicted[2])
 
 
-def test_dataset_update_params():
+def test_dataset_update_params(rng):
     default_params = {
         "max_bin": 100,
         "max_bin_by_feature": [20, 10],
@@ -3400,8 +3404,8 @@ def test_dataset_update_params():
         "linear_tree": True,
         "precise_float_parser": False,
     }
-    X = np.random.random((100, 2))
-    y = np.random.random(100)
+    X = rng.uniform(size=(100, 2))
+    y = rng.uniform(size=(100,))
 
     # decreasing without freeing raw data is allowed
     lgb_data = lgb.Dataset(X, y, params=default_params, free_raw_data=False).construct()
@@ -3443,12 +3447,12 @@ def test_dataset_update_params():
             lgb.train(new_params, lgb_data, num_boost_round=3)
 
 
-def test_dataset_params_with_reference():
+def test_dataset_params_with_reference(rng):
     default_params = {"max_bin": 100}
-    X = np.random.random((100, 2))
-    y = np.random.random(100)
-    X_val = np.random.random((100, 2))
-    y_val = np.random.random(100)
+    X = rng.uniform(size=(100, 2))
+    y = rng.uniform(size=(100,))
+    X_val = rng.uniform(size=(100, 2))
+    y_val = rng.uniform(size=(100,))
     lgb_train = lgb.Dataset(X, y, params=default_params, free_raw_data=False).construct()
     lgb_val = lgb.Dataset(X_val, y_val, reference=lgb_train, free_raw_data=False).construct()
     assert lgb_train.get_params() == default_params
@@ -3486,7 +3490,7 @@ def test_path_smoothing():
     assert err < err_new
 
 
-def test_trees_to_dataframe():
+def test_trees_to_dataframe(rng):
     pytest.importorskip("pandas")
 
     def _imptcs_to_numpy(X, impcts_dict):
@@ -3516,7 +3520,7 @@ def _imptcs_to_numpy(X, impcts_dict):
 
     # test edge case with one leaf
     X = np.ones((10, 2))
-    y = np.random.rand(10)
+    y = rng.uniform(size=(10,))
     data = lgb.Dataset(X, label=y)
     bst = lgb.train({"objective": "binary", "verbose": -1}, data, num_trees)
     tree_df = bst.trees_to_dataframe()
@@ -3574,11 +3578,10 @@ def test_interaction_constraints():
     )
 
 
-def test_linear_trees_num_threads():
+def test_linear_trees_num_threads(rng_fixed_seed):
     # check that number of threads does not affect result
-    np.random.seed(0)
     x = np.arange(0, 1000, 0.1)
-    y = 2 * x + np.random.normal(0, 0.1, len(x))
+    y = 2 * x + rng_fixed_seed.normal(loc=0, scale=0.1, size=(len(x),))
     x = x[:, np.newaxis]
     lgb_train = lgb.Dataset(x, label=y)
     params = {"verbose": -1, "objective": "regression", "seed": 0, "linear_tree": True, "num_threads": 2}
@@ -3590,11 +3593,10 @@ def test_linear_trees_num_threads():
     np.testing.assert_allclose(pred1, pred2)
 
 
-def test_linear_trees(tmp_path):
+def test_linear_trees(tmp_path, rng_fixed_seed):
     # check that setting linear_tree=True fits better than ordinary trees when data has linear relationship
-    np.random.seed(0)
     x = np.arange(0, 100, 0.1)
-    y = 2 * x + np.random.normal(0, 0.1, len(x))
+    y = 2 * x + rng_fixed_seed.normal(0, 0.1, len(x))
     x = x[:, np.newaxis]
     lgb_train = lgb.Dataset(x, label=y)
     params = {"verbose": -1, "metric": "mse", "seed": 0, "num_leaves": 2}
@@ -4099,21 +4101,20 @@ def test_record_evaluation_with_cv(train_metric):
                 np.testing.assert_allclose(cv_hist[key], eval_result[dataset][f"{metric}-{agg}"])
 
 
-def test_pandas_with_numpy_regular_dtypes():
+def test_pandas_with_numpy_regular_dtypes(rng_fixed_seed):
     pd = pytest.importorskip("pandas")
     uints = ["uint8", "uint16", "uint32", "uint64"]
     ints = ["int8", "int16", "int32", "int64"]
     bool_and_floats = ["bool", "float16", "float32", "float64"]
-    rng = np.random.RandomState(42)
 
     n_samples = 100
     # data as float64
     df = pd.DataFrame(
         {
-            "x1": rng.randint(0, 2, n_samples),
-            "x2": rng.randint(1, 3, n_samples),
-            "x3": 10 * rng.randint(1, 3, n_samples),
-            "x4": 100 * rng.randint(1, 3, n_samples),
+            "x1": rng_fixed_seed.integers(low=0, high=2, size=n_samples),
+            "x2": rng_fixed_seed.integers(low=1, high=3, size=n_samples),
+            "x3": 10 * rng_fixed_seed.integers(low=1, high=3, size=n_samples),
+            "x4": 100 * rng_fixed_seed.integers(low=1, high=3, size=n_samples),
         }
     )
     df = df.astype(np.float64)
@@ -4139,15 +4140,14 @@ def test_pandas_with_numpy_regular_dtypes():
         np.testing.assert_allclose(preds, preds2)
 
 
-def test_pandas_nullable_dtypes():
+def test_pandas_nullable_dtypes(rng_fixed_seed):
     pd = pytest.importorskip("pandas")
-    rng = np.random.RandomState(0)
     df = pd.DataFrame(
         {
-            "x1": rng.randint(1, 3, size=100),
+            "x1": rng_fixed_seed.integers(low=1, high=3, size=100),
             "x2": np.linspace(-1, 1, 100),
-            "x3": pd.arrays.SparseArray(rng.randint(0, 11, size=100)),
-            "x4": rng.rand(100) < 0.5,
+            "x3": pd.arrays.SparseArray(rng_fixed_seed.integers(low=0, high=11, size=100)),
+            "x4": rng_fixed_seed.uniform(size=(100,)) < 0.5,
         }
     )
     # introduce some missing values
@@ -4219,7 +4219,7 @@ def test_boost_from_average_with_single_leaf_trees():
     assert y.min() <= mean_preds <= y.max()
 
 
-def test_cegb_split_buffer_clean():
+def test_cegb_split_buffer_clean(rng_fixed_seed):
     # modified from https://github.com/microsoft/LightGBM/issues/3679#issuecomment-938652811
     # and https://github.com/microsoft/LightGBM/pull/5087
     # test that the ``splits_per_leaf_`` of CEGB is cleaned before training a new tree
@@ -4228,11 +4228,9 @@ def test_cegb_split_buffer_clean():
     #    Check failed: (best_split_info.left_count) > (0)
 
     R, C = 1000, 100
-    seed = 29
-    np.random.seed(seed)
-    data = np.random.randn(R, C)
+    data = rng_fixed_seed.standard_normal(size=(R, C))
     for i in range(1, C):
-        data[i] += data[0] * np.random.randn()
+        data[i] += data[0] * rng_fixed_seed.standard_normal()
 
     N = int(0.8 * len(data))
     train_data = data[:N]
diff --git a/tests/python_package_test/test_sklearn.py b/tests/python_package_test/test_sklearn.py
index 2fc127b5232d..a995bfcae6b2 100644
--- a/tests/python_package_test/test_sklearn.py
+++ b/tests/python_package_test/test_sklearn.py
@@ -340,7 +340,7 @@ def test_grid_search():
     assert evals_result == grid.best_estimator_.evals_result_
 
 
-def test_random_search():
+def test_random_search(rng):
     X, y = load_iris(return_X_y=True)
     y = y.astype(str)  # utilize label encoder at it's max power
     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
@@ -349,8 +349,8 @@ def test_random_search():
     params = {"subsample": 0.8, "subsample_freq": 1}
     param_dist = {
         "boosting_type": ["rf", "gbdt"],
-        "n_estimators": [np.random.randint(low=3, high=10) for i in range(n_iter)],
-        "reg_alpha": [np.random.uniform(low=0.01, high=0.06) for i in range(n_iter)],
+        "n_estimators": rng.integers(low=3, high=10, size=(n_iter,)).tolist(),
+        "reg_alpha": rng.uniform(low=0.01, high=0.06, size=(n_iter,)).tolist(),
     }
     fit_params = {"eval_set": [(X_val, y_val)], "eval_metric": constant_metric, "callbacks": [lgb.early_stopping(2)]}
     rand = RandomizedSearchCV(
@@ -556,29 +556,29 @@ def test_feature_importances_type():
     assert importance_split_top1 != importance_gain_top1
 
 
-def test_pandas_categorical():
+# why fixed seed?
+# sometimes there is no difference how cols are treated (cat or not cat)
+def test_pandas_categorical(rng_fixed_seed):
     pd = pytest.importorskip("pandas")
-    np.random.seed(42)  # sometimes there is no difference how cols are treated (cat or not cat)
     X = pd.DataFrame(
         {
-            "A": np.random.permutation(["a", "b", "c", "d"] * 75),  # str
-            "B": np.random.permutation([1, 2, 3] * 100),  # int
-            "C": np.random.permutation([0.1, 0.2, -0.1, -0.1, 0.2] * 60),  # float
-            "D": np.random.permutation([True, False] * 150),  # bool
-            "E": pd.Categorical(np.random.permutation(["z", "y", "x", "w", "v"] * 60), ordered=True),
+            "A": rng_fixed_seed.permutation(["a", "b", "c", "d"] * 75),  # str
+            "B": rng_fixed_seed.permutation([1, 2, 3] * 100),  # int
+            "C": rng_fixed_seed.permutation([0.1, 0.2, -0.1, -0.1, 0.2] * 60),  # float
+            "D": rng_fixed_seed.permutation([True, False] * 150),  # bool
+            "E": pd.Categorical(rng_fixed_seed.permutation(["z", "y", "x", "w", "v"] * 60), ordered=True),
         }
     )  # str and ordered categorical
-    y = np.random.permutation([0, 1] * 150)
+    y = rng_fixed_seed.permutation([0, 1] * 150)
     X_test = pd.DataFrame(
         {
-            "A": np.random.permutation(["a", "b", "e"] * 20),  # unseen category
-            "B": np.random.permutation([1, 3] * 30),
-            "C": np.random.permutation([0.1, -0.1, 0.2, 0.2] * 15),
-            "D": np.random.permutation([True, False] * 30),
-            "E": pd.Categorical(np.random.permutation(["z", "y"] * 30), ordered=True),
+            "A": rng_fixed_seed.permutation(["a", "b", "e"] * 20),  # unseen category
+            "B": rng_fixed_seed.permutation([1, 3] * 30),
+            "C": rng_fixed_seed.permutation([0.1, -0.1, 0.2, 0.2] * 15),
+            "D": rng_fixed_seed.permutation([True, False] * 30),
+            "E": pd.Categorical(rng_fixed_seed.permutation(["z", "y"] * 30), ordered=True),
         }
     )
-    np.random.seed()  # reset seed
     cat_cols_actual = ["A", "B", "C", "D"]
     cat_cols_to_store = cat_cols_actual + ["E"]
     X[cat_cols_actual] = X[cat_cols_actual].astype("category")
@@ -620,21 +620,21 @@ def test_pandas_categorical():
     assert gbm6.booster_.pandas_categorical == cat_values
 
 
-def test_pandas_sparse():
+def test_pandas_sparse(rng):
     pd = pytest.importorskip("pandas")
     X = pd.DataFrame(
         {
-            "A": pd.arrays.SparseArray(np.random.permutation([0, 1, 2] * 100)),
-            "B": pd.arrays.SparseArray(np.random.permutation([0.0, 0.1, 0.2, -0.1, 0.2] * 60)),
-            "C": pd.arrays.SparseArray(np.random.permutation([True, False] * 150)),
+            "A": pd.arrays.SparseArray(rng.permutation([0, 1, 2] * 100)),
+            "B": pd.arrays.SparseArray(rng.permutation([0.0, 0.1, 0.2, -0.1, 0.2] * 60)),
+            "C": pd.arrays.SparseArray(rng.permutation([True, False] * 150)),
         }
     )
-    y = pd.Series(pd.arrays.SparseArray(np.random.permutation([0, 1] * 150)))
+    y = pd.Series(pd.arrays.SparseArray(rng.permutation([0, 1] * 150)))
     X_test = pd.DataFrame(
         {
-            "A": pd.arrays.SparseArray(np.random.permutation([0, 2] * 30)),
-            "B": pd.arrays.SparseArray(np.random.permutation([0.0, 0.1, 0.2, -0.1] * 15)),
-            "C": pd.arrays.SparseArray(np.random.permutation([True, False] * 30)),
+            "A": pd.arrays.SparseArray(rng.permutation([0, 2] * 30)),
+            "B": pd.arrays.SparseArray(rng.permutation([0.0, 0.1, 0.2, -0.1] * 15)),
+            "C": pd.arrays.SparseArray(rng.permutation([True, False] * 30)),
         }
     )
     for dtype in pd.concat([X.dtypes, X_test.dtypes, pd.Series(y.dtypes)]):
@@ -1073,11 +1073,11 @@ def test_multiple_eval_metrics():
     assert "binary_logloss" in gbm.evals_result_["training"]
 
 
-def test_nan_handle():
+def test_nan_handle(rng):
     nrows = 100
     ncols = 10
-    X = np.random.randn(nrows, ncols)
-    y = np.random.randn(nrows) + np.full(nrows, 1e30)
+    X = rng.standard_normal(size=(nrows, ncols))
+    y = rng.standard_normal(size=(nrows,)) + np.full(nrows, 1e30)
     weight = np.zeros(nrows)
     params = {"n_estimators": 20, "verbose": -1}
     params_fit = {"X": X, "y": y, "sample_weight": weight, "eval_set": (X, y), "callbacks": [lgb.early_stopping(5)]}
@@ -1410,13 +1410,13 @@ def test_validate_features(task):
 @pytest.mark.parametrize("X_type", ["dt_DataTable", "list2d", "numpy", "scipy_csc", "scipy_csr", "pd_DataFrame"])
 @pytest.mark.parametrize("y_type", ["list1d", "numpy", "pd_Series", "pd_DataFrame"])
 @pytest.mark.parametrize("task", ["binary-classification", "multiclass-classification", "regression"])
-def test_classification_and_regression_minimally_work_with_all_all_accepted_data_types(X_type, y_type, task):
+def test_classification_and_regression_minimally_work_with_all_all_accepted_data_types(X_type, y_type, task, rng):
     if any(t.startswith("pd_") for t in [X_type, y_type]) and not PANDAS_INSTALLED:
         pytest.skip("pandas is not installed")
     if any(t.startswith("dt_") for t in [X_type, y_type]) and not DATATABLE_INSTALLED:
         pytest.skip("datatable is not installed")
     X, y, g = _create_data(task, n_samples=2_000)
-    weights = np.abs(np.random.randn(y.shape[0]))
+    weights = np.abs(rng.standard_normal(size=(y.shape[0],)))
 
     if task == "binary-classification" or task == "regression":
         init_score = np.full_like(y, np.mean(y))
@@ -1487,13 +1487,13 @@ def test_classification_and_regression_minimally_work_with_all_all_accepted_data
 @pytest.mark.parametrize("X_type", ["dt_DataTable", "list2d", "numpy", "scipy_csc", "scipy_csr", "pd_DataFrame"])
 @pytest.mark.parametrize("y_type", ["list1d", "numpy", "pd_DataFrame", "pd_Series"])
 @pytest.mark.parametrize("g_type", ["list1d_float", "list1d_int", "numpy", "pd_Series"])
-def test_ranking_minimally_works_with_all_all_accepted_data_types(X_type, y_type, g_type):
+def test_ranking_minimally_works_with_all_all_accepted_data_types(X_type, y_type, g_type, rng):
     if any(t.startswith("pd_") for t in [X_type, y_type, g_type]) and not PANDAS_INSTALLED:
         pytest.skip("pandas is not installed")
     if any(t.startswith("dt_") for t in [X_type, y_type, g_type]) and not DATATABLE_INSTALLED:
         pytest.skip("datatable is not installed")
     X, y, g = _create_data(task="ranking", n_samples=1_000)
-    weights = np.abs(np.random.randn(y.shape[0]))
+    weights = np.abs(rng.standard_normal(size=(y.shape[0],)))
     init_score = np.full_like(y, np.mean(y))
     X_valid = X * 2
 

From 8579d5e34f97d797e10cf1e87e5093330992f520 Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Tue, 4 Jun 2024 19:46:07 -0500
Subject: [PATCH 24/41] [python-package] clarify max_depth warning and limit
 when it is emitted (#6402)

---
 docs/Parameters-Tuning.rst                |  3 +-
 include/LightGBM/config.h                 |  2 +-
 python-package/lightgbm/sklearn.py        |  1 +
 src/io/config.cpp                         | 28 ++++++++----
 tests/python_package_test/test_basic.py   | 56 ++++++++++++++++++++++-
 tests/python_package_test/test_sklearn.py | 14 ++++++
 6 files changed, 92 insertions(+), 12 deletions(-)

diff --git a/docs/Parameters-Tuning.rst b/docs/Parameters-Tuning.rst
index ece235f6e6c0..9a3593f7e891 100644
--- a/docs/Parameters-Tuning.rst
+++ b/docs/Parameters-Tuning.rst
@@ -22,7 +22,7 @@ To get good results using a leaf-wise tree, these are some important parameters:
 1. ``num_leaves``. This is the main parameter to control the complexity of the tree model.
    Theoretically, we can set ``num_leaves = 2^(max_depth)`` to obtain the same number of leaves as depth-wise tree.
    However, this simple conversion is not good in practice.
-   The reason is that a leaf-wise tree is typically much deeper than a depth-wise tree for a fixed number of leaves. Unconstrained depth can induce over-fitting.
+   A leaf-wise tree is typically much deeper than a depth-wise tree for a fixed number of leaves. Unconstrained depth can induce over-fitting.
    Thus, when trying to tune the ``num_leaves``, we should let it be smaller than ``2^(max_depth)``.
    For example, when the ``max_depth=7`` the depth-wise tree can get good accuracy,
    but setting ``num_leaves`` to ``127`` may cause over-fitting, and setting it to ``70`` or ``80`` may get better accuracy than depth-wise.
@@ -33,6 +33,7 @@ To get good results using a leaf-wise tree, these are some important parameters:
    In practice, setting it to hundreds or thousands is enough for a large dataset.
 
 3. ``max_depth``. You also can use ``max_depth`` to limit the tree depth explicitly.
+   If you set ``max_depth``, also explicitly set ``num_leaves`` to some value ``<= 2^max_depth``.
 
 For Faster Speed
 ----------------
diff --git a/include/LightGBM/config.h b/include/LightGBM/config.h
index b626e1b1bcc2..b9d71aaa9882 100644
--- a/include/LightGBM/config.h
+++ b/include/LightGBM/config.h
@@ -1142,7 +1142,7 @@ struct Config {
   static const std::string DumpAliases();
 
  private:
-  void CheckParamConflict();
+  void CheckParamConflict(const std::unordered_map<std::string, std::string>& params);
   void GetMembersFromString(const std::unordered_map<std::string, std::string>& params);
   std::string SaveMembersToString() const;
   void GetAucMuWeights();
diff --git a/python-package/lightgbm/sklearn.py b/python-package/lightgbm/sklearn.py
index 46f41a428348..cb577c18c265 100644
--- a/python-package/lightgbm/sklearn.py
+++ b/python-package/lightgbm/sklearn.py
@@ -492,6 +492,7 @@ def __init__(
             Maximum tree leaves for base learners.
         max_depth : int, optional (default=-1)
             Maximum tree depth for base learners, <=0 means no limit.
+            If setting this to a positive value, consider also changing ``num_leaves`` to ``<= 2^max_depth``.
         learning_rate : float, optional (default=0.1)
             Boosting learning rate.
             You can use ``callbacks`` parameter of ``fit`` method to shrink/adapt learning rate
diff --git a/src/io/config.cpp b/src/io/config.cpp
index e25bb6d4fd70..7516ddbd4ac6 100644
--- a/src/io/config.cpp
+++ b/src/io/config.cpp
@@ -289,14 +289,14 @@ void Config::Set(const std::unordered_map<std::string, std::string>& params) {
   }
 
   // check for conflicts
-  CheckParamConflict();
+  CheckParamConflict(params);
 }
 
 bool CheckMultiClassObjective(const std::string& objective) {
   return (objective == std::string("multiclass") || objective == std::string("multiclassova"));
 }
 
-void Config::CheckParamConflict() {
+void Config::CheckParamConflict(const std::unordered_map<std::string, std::string>& params) {
   // check if objective, metric, and num_class match
   int num_class_check = num_class;
   bool objective_type_multiclass = CheckMultiClassObjective(objective) || (objective == std::string("custom") && num_class_check > 1);
@@ -356,14 +356,24 @@ void Config::CheckParamConflict() {
                  tree_learner.c_str());
     }
   }
-  // Check max_depth and num_leaves
-  if (max_depth > 0) {
+
+  // max_depth defaults to -1, so max_depth>0 implies "you explicitly overrode the default"
+  //
+  // Changing max_depth while leaving num_leaves at its default (31) can lead to 2 undesirable situations:
+  //
+  //   * (0 <= max_depth <= 4) it's not possible to produce a tree with 31 leaves
+  //     - this block reduces num_leaves to 2^max_depth
+  //   * (max_depth > 4) 31 leaves is less than a full depth-wise tree, which might lead to underfitting
+  //     - this block warns about that
+  // ref: https://github.com/microsoft/LightGBM/issues/2898#issuecomment-1002860601
+  if (max_depth > 0 && (params.count("num_leaves") == 0 || params.at("num_leaves").empty())) {
     double full_num_leaves = std::pow(2, max_depth);
-    if (full_num_leaves > num_leaves
-        && num_leaves == kDefaultNumLeaves) {
-      Log::Warning("Accuracy may be bad since you didn't explicitly set num_leaves OR 2^max_depth > num_leaves."
-                   " (num_leaves=%d).",
-                   num_leaves);
+    if (full_num_leaves > num_leaves) {
+      Log::Warning("Provided parameters constrain tree depth (max_depth=%d) without explicitly setting 'num_leaves'. "
+                   "This can lead to underfitting. To resolve this warning, pass 'num_leaves' (<=%.0f) in params. "
+                   "Alternatively, pass (max_depth=-1) and just use 'num_leaves' to constrain model complexity.",
+                   max_depth,
+                   full_num_leaves);
     }
 
     if (full_num_leaves < num_leaves) {
diff --git a/tests/python_package_test/test_basic.py b/tests/python_package_test/test_basic.py
index e2f379dad9d3..14a621a1604f 100644
--- a/tests/python_package_test/test_basic.py
+++ b/tests/python_package_test/test_basic.py
@@ -9,7 +9,7 @@
 import numpy as np
 import pytest
 from scipy import sparse
-from sklearn.datasets import dump_svmlight_file, load_svmlight_file
+from sklearn.datasets import dump_svmlight_file, load_svmlight_file, make_blobs
 from sklearn.model_selection import train_test_split
 
 import lightgbm as lgb
@@ -890,3 +890,57 @@ def test_feature_names_are_set_correctly_when_no_feature_names_passed_into_Datas
         data=rng.standard_normal(size=(100, 3)),
     )
     assert ds.construct().feature_name == ["Column_0", "Column_1", "Column_2"]
+
+
+# NOTE: this intentionally contains values where num_leaves <, ==, and > (max_depth^2)
+@pytest.mark.parametrize(("max_depth", "num_leaves"), [(-1, 3), (-1, 50), (5, 3), (5, 31), (5, 32), (8, 3), (8, 31)])
+def test_max_depth_warning_is_not_raised_if_num_leaves_is_also_provided(capsys, num_leaves, max_depth):
+    X, y = make_blobs(n_samples=1_000, n_features=1, centers=2)
+    lgb.Booster(
+        params={
+            "objective": "binary",
+            "max_depth": max_depth,
+            "num_leaves": num_leaves,
+            "num_iterations": 1,
+            "verbose": 0,
+        },
+        train_set=lgb.Dataset(X, label=y),
+    )
+    assert "Provided parameters constrain tree depth" not in capsys.readouterr().out
+
+
+# NOTE: max_depth < 5 is significant here because the default for num_leaves=31. With max_depth=5,
+#       a full depth-wise tree would have 2^5 = 32 leaves.
+@pytest.mark.parametrize("max_depth", [1, 2, 3, 4])
+def test_max_depth_warning_is_not_raised_if_max_depth_gt_1_and_lt_5_and_num_leaves_omitted(capsys, max_depth):
+    X, y = make_blobs(n_samples=1_000, n_features=1, centers=2)
+    lgb.Booster(
+        params={
+            "objective": "binary",
+            "max_depth": max_depth,
+            "num_iterations": 1,
+            "verbose": 0,
+        },
+        train_set=lgb.Dataset(X, label=y),
+    )
+    assert "Provided parameters constrain tree depth" not in capsys.readouterr().out
+
+
+@pytest.mark.parametrize("max_depth", [5, 6, 7, 8, 9])
+def test_max_depth_warning_is_raised_if_max_depth_gte_5_and_num_leaves_omitted(capsys, max_depth):
+    X, y = make_blobs(n_samples=1_000, n_features=1, centers=2)
+    lgb.Booster(
+        params={
+            "objective": "binary",
+            "max_depth": max_depth,
+            "num_iterations": 1,
+            "verbose": 0,
+        },
+        train_set=lgb.Dataset(X, label=y),
+    )
+    expected_warning = (
+        f"[LightGBM] [Warning] Provided parameters constrain tree depth (max_depth={max_depth}) without explicitly "
+        f"setting 'num_leaves'. This can lead to underfitting. To resolve this warning, pass 'num_leaves' (<={2**max_depth}) "
+        "in params. Alternatively, pass (max_depth=-1) and just use 'num_leaves' to constrain model complexity."
+    )
+    assert expected_warning in capsys.readouterr().out
diff --git a/tests/python_package_test/test_sklearn.py b/tests/python_package_test/test_sklearn.py
index a995bfcae6b2..b458192a2ee0 100644
--- a/tests/python_package_test/test_sklearn.py
+++ b/tests/python_package_test/test_sklearn.py
@@ -1276,6 +1276,20 @@ def test_check_is_fitted():
         check_is_fitted(model)
 
 
+@pytest.mark.parametrize("estimator_class", [lgb.LGBMModel, lgb.LGBMClassifier, lgb.LGBMRegressor, lgb.LGBMRanker])
+@pytest.mark.parametrize("max_depth", [3, 4, 5, 8])
+def test_max_depth_warning_is_never_raised(capsys, estimator_class, max_depth):
+    X, y = make_blobs(n_samples=1_000, n_features=1, centers=2)
+    params = {"n_estimators": 1, "max_depth": max_depth, "verbose": 0}
+    if estimator_class is lgb.LGBMModel:
+        estimator_class(**{**params, "objective": "binary"}).fit(X, y)
+    elif estimator_class is lgb.LGBMRanker:
+        estimator_class(**params).fit(X, y, group=np.ones(X.shape[0]))
+    else:
+        estimator_class(**params).fit(X, y)
+    assert "Provided parameters constrain tree depth" not in capsys.readouterr().out
+
+
 @parametrize_with_checks([lgb.LGBMClassifier(), lgb.LGBMRegressor()])
 def test_sklearn_integration(estimator, check):
     estimator.set_params(min_child_samples=1, min_data_in_bin=1)

From 44014015536574743b5d0eb7b4791258268090b7 Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Wed, 5 Jun 2024 07:54:16 -0500
Subject: [PATCH 25/41] [python-package] add a few type hints in
 LGBMModel.fit() (#6470)

---
 python-package/lightgbm/sklearn.py | 59 ++++++++++++++++++++++--------
 1 file changed, 44 insertions(+), 15 deletions(-)

diff --git a/python-package/lightgbm/sklearn.py b/python-package/lightgbm/sklearn.py
index cb577c18c265..8fb998984720 100644
--- a/python-package/lightgbm/sklearn.py
+++ b/python-package/lightgbm/sklearn.py
@@ -454,6 +454,30 @@ def __call__(
     """
 
 
+def _extract_evaluation_meta_data(
+    *,
+    collection: Optional[Union[Dict[Any, Any], List[Any]]],
+    name: str,
+    i: int,
+) -> Optional[Any]:
+    """Try to extract the ith element of one of the ``eval_*`` inputs."""
+    if collection is None:
+        return None
+    elif isinstance(collection, list):
+        # It's possible, for example, to pass 3 eval sets through `eval_set`,
+        # but only 1 init_score through `eval_init_score`.
+        #
+        # This if-else accounts for that possiblity.
+        if len(collection) > i:
+            return collection[i]
+        else:
+            return None
+    elif isinstance(collection, dict):
+        return collection.get(i, None)
+    else:
+        raise TypeError(f"{name} should be dict or list")
+
+
 class LGBMModel(_LGBMModelBase):
     """Implementation of the scikit-learn API for LightGBM."""
 
@@ -869,17 +893,6 @@ def fit(
 
         valid_sets: List[Dataset] = []
         if eval_set is not None:
-
-            def _get_meta_data(collection, name, i):
-                if collection is None:
-                    return None
-                elif isinstance(collection, list):
-                    return collection[i] if len(collection) > i else None
-                elif isinstance(collection, dict):
-                    return collection.get(i, None)
-                else:
-                    raise TypeError(f"{name} should be dict or list")
-
             if isinstance(eval_set, tuple):
                 eval_set = [eval_set]
             for i, valid_data in enumerate(eval_set):
@@ -887,8 +900,16 @@ def _get_meta_data(collection, name, i):
                 if valid_data[0] is X and valid_data[1] is y:
                     valid_set = train_set
                 else:
-                    valid_weight = _get_meta_data(eval_sample_weight, "eval_sample_weight", i)
-                    valid_class_weight = _get_meta_data(eval_class_weight, "eval_class_weight", i)
+                    valid_weight = _extract_evaluation_meta_data(
+                        collection=eval_sample_weight,
+                        name="eval_sample_weight",
+                        i=i,
+                    )
+                    valid_class_weight = _extract_evaluation_meta_data(
+                        collection=eval_class_weight,
+                        name="eval_class_weight",
+                        i=i,
+                    )
                     if valid_class_weight is not None:
                         if isinstance(valid_class_weight, dict) and self._class_map is not None:
                             valid_class_weight = {self._class_map[k]: v for k, v in valid_class_weight.items()}
@@ -897,8 +918,16 @@ def _get_meta_data(collection, name, i):
                             valid_weight = valid_class_sample_weight
                         else:
                             valid_weight = np.multiply(valid_weight, valid_class_sample_weight)
-                    valid_init_score = _get_meta_data(eval_init_score, "eval_init_score", i)
-                    valid_group = _get_meta_data(eval_group, "eval_group", i)
+                    valid_init_score = _extract_evaluation_meta_data(
+                        collection=eval_init_score,
+                        name="eval_init_score",
+                        i=i,
+                    )
+                    valid_group = _extract_evaluation_meta_data(
+                        collection=eval_group,
+                        name="eval_group",
+                        i=i,
+                    )
                     valid_set = Dataset(
                         data=valid_data[0],
                         label=valid_data[1],

From 63926827d2009beb8afe1d84042a83e956a44221 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jos=C3=A9=20Morales?= <jmoralz92@gmail.com>
Date: Wed, 12 Jun 2024 21:26:17 -0600
Subject: [PATCH 26/41] [R-package] skip integer categorical feature check when
 building dataset subset (fixes #6412) (#6442)

---
 .ci/test_r_package.sh                   |  4 +--
 R-package/R/lgb.Dataset.R               |  7 ++++-
 R-package/tests/testthat/test_dataset.R | 38 +++++++++++++++++++++++++
 3 files changed, 46 insertions(+), 3 deletions(-)

diff --git a/.ci/test_r_package.sh b/.ci/test_r_package.sh
index 66a3ecaa663d..0ed581b9bf75 100755
--- a/.ci/test_r_package.sh
+++ b/.ci/test_r_package.sh
@@ -106,10 +106,10 @@ if [[ $OS_NAME == "macos" ]]; then
         -target / || exit 1
 fi
 
-# fix for issue where CRAN was not returning {lattice} when using R 3.6
+# fix for issue where CRAN was not returning {lattice} and {evaluate} when using R 3.6
 # "Warning: dependency ‘lattice’ is not available"
 if [[ "${R_MAJOR_VERSION}" == "3" ]]; then
-    Rscript --vanilla -e "install.packages('https://cran.r-project.org/src/contrib/Archive/lattice/lattice_0.20-41.tar.gz', repos = NULL, lib = '${R_LIB_PATH}')"
+    Rscript --vanilla -e "install.packages(c('https://cran.r-project.org/src/contrib/Archive/lattice/lattice_0.20-41.tar.gz', 'https://cran.r-project.org/src/contrib/Archive/evaluate/evaluate_0.23.tar.gz'), repos = NULL, lib = '${R_LIB_PATH}')"
 else
     # {Matrix} needs {lattice}, so this needs to run before manually installing {Matrix}.
     # This should be unnecessary on R >=4.4.0
diff --git a/R-package/R/lgb.Dataset.R b/R-package/R/lgb.Dataset.R
index 98b9505120de..8a4a7246b16c 100644
--- a/R-package/R/lgb.Dataset.R
+++ b/R-package/R/lgb.Dataset.R
@@ -170,7 +170,12 @@ Dataset <- R6::R6Class(
 
             # Check if more categorical features were output over the feature space
             data_is_not_filename <- !is.character(private$raw_data)
-            if (data_is_not_filename && max(private$categorical_feature) > ncol(private$raw_data)) {
+            if (
+              data_is_not_filename
+              && !is.null(private$raw_data)
+              && is.null(private$used_indices)
+              && max(private$categorical_feature) > ncol(private$raw_data)
+            ) {
               stop(
                 "lgb.Dataset.construct: supplied a too large value in categorical_feature: "
                 , max(private$categorical_feature)
diff --git a/R-package/tests/testthat/test_dataset.R b/R-package/tests/testthat/test_dataset.R
index 65255a730017..cbd2e7e076f7 100644
--- a/R-package/tests/testthat/test_dataset.R
+++ b/R-package/tests/testthat/test_dataset.R
@@ -440,6 +440,35 @@ test_that("lgb.Dataset: should be able to run lgb.cv() immediately after using l
   expect_true(methods::is(bst, "lgb.CVBooster"))
 })
 
+test_that("lgb.Dataset: should be able to be used in lgb.cv() when constructed with categorical feature indices", {
+  data("mtcars")
+  y <- mtcars$mpg
+  x <- as.matrix(mtcars[, -1L])
+  categorical_feature <- which(names(mtcars) %in% c("cyl", "vs", "am", "gear", "carb")) - 1L
+  dtrain <- lgb.Dataset(
+    data = x
+    , label = y
+    , categorical_feature = categorical_feature
+    , free_raw_data = TRUE
+    , params = list(num_threads = .LGB_MAX_THREADS)
+  )
+  # constructing the Dataset frees the raw data
+  dtrain$construct()
+  params <- list(
+    objective = "regression"
+    , num_leaves = 2L
+    , verbose = .LGB_VERBOSITY
+    , num_threads = .LGB_MAX_THREADS
+  )
+  # cv should reuse the same categorical features without checking the indices
+  bst <- lgb.cv(params = params, data = dtrain, stratified = FALSE, nrounds = 1L)
+  expect_equal(
+    unlist(bst$boosters[[1L]]$booster$params$categorical_feature)
+    , categorical_feature - 1L  # 0-based
+  )
+})
+
+
 test_that("lgb.Dataset: should be able to use and retrieve long feature names", {
   # set one feature to a value longer than the default buffer size used
   # in LGBM_DatasetGetFeatureNames_R
@@ -621,3 +650,12 @@ test_that("lgb.Dataset can be constructed with categorical features and without
     lgb.Dataset(raw_mat, categorical_feature = 2L)$construct()
   }, regexp = "supplied a too large value in categorical_feature: 2 but only 1 features")
 })
+
+test_that("lgb.Dataset.slice fails with a categorical feature index greater than the number of features", {
+  data <- matrix(runif(100L), nrow = 50L, ncol = 2L)
+  ds <- lgb.Dataset(data = data, categorical_feature = 3L)
+  subset <- ds$slice(1L:20L)
+  expect_error({
+    subset$construct()
+  }, regexp = "supplied a too large value in categorical_feature: 3 but only 2 features")
+})

From 1e7ebc51943f51ef09dcf025bdb081d3b97938de Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Wed, 12 Jun 2024 23:22:22 -0500
Subject: [PATCH 27/41] [python-package] Add support for NumPy 2.0, test
 against nightly versions of dependencies (fixes #6454) (#6467)

---
 .ci/test-python-latest.sh               | 50 +++++++++++++++++++++++++
 .github/workflows/python_package.yml    | 30 ++++++++++++++-
 python-package/lightgbm/basic.py        | 16 ++++----
 python-package/lightgbm/engine.py       |  4 +-
 tests/c_api_test/test_.py               |  4 +-
 tests/python_package_test/test_arrow.py |  4 ++
 tests/python_package_test/test_basic.py |  5 ++-
 7 files changed, 99 insertions(+), 14 deletions(-)
 create mode 100755 .ci/test-python-latest.sh

diff --git a/.ci/test-python-latest.sh b/.ci/test-python-latest.sh
new file mode 100755
index 000000000000..08fc8558ef3e
--- /dev/null
+++ b/.ci/test-python-latest.sh
@@ -0,0 +1,50 @@
+#!/bin/bash
+
+set -e -E -u -o pipefail
+
+# latest versions of lightgbm's dependencies,
+# including pre-releases and nightlies
+#
+# ref: https://github.com/pydata/xarray/blob/31111b3afe44fd6f7dac363264e94186cc5168d2/.github/workflows/upstream-dev-ci.yaml
+echo "installing testing dependencies"
+python -m pip install \
+    cloudpickle \
+    psutil \
+    pytest
+echo "done installing testing dependencies"
+
+echo "installing lightgbm's dependencies"
+python -m pip install \
+    --extra-index-url https://pypi.anaconda.org/scientific-python-nightly-wheels/simple \
+    --prefer-binary \
+    --pre \
+    --upgrade \
+        'numpy>=2.0.0.dev0' \
+        'matplotlib>=3.10.0.dev0' \
+        'pandas>=3.0.0.dev0' \
+        'scikit-learn>=1.6.dev0' \
+        'scipy>=1.15.0.dev0'
+
+python -m pip install \
+    --extra-index-url https://pypi.fury.io/arrow-nightlies/ \
+    --prefer-binary \
+    --pre \
+    --upgrade \
+        'pyarrow>=17.0.0.dev0'
+
+python -m pip install \
+    'cffi>=1.15.1'
+
+echo "done installing lightgbm's dependencies"
+
+echo "installing lightgbm"
+pip install --no-deps dist/*.whl
+echo "done installing lightgbm"
+
+echo "installed package versions:"
+pip freeze
+
+echo ""
+echo "running tests"
+pytest tests/c_api_test/
+pytest tests/python_package_test/
diff --git a/.github/workflows/python_package.yml b/.github/workflows/python_package.yml
index 73fbc78a2ea1..83149a078cf6 100644
--- a/.github/workflows/python_package.yml
+++ b/.github/workflows/python_package.yml
@@ -75,6 +75,33 @@ jobs:
           export PATH=${CONDA}/bin:${PATH}
           $GITHUB_WORKSPACE/.ci/setup.sh || exit 1
           $GITHUB_WORKSPACE/.ci/test.sh || exit 1
+  test-latest-versions:
+    name: Python - latest versions (ubuntu-latest)
+    runs-on: ubuntu-latest
+    timeout-minutes: 60
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 5
+          submodules: true
+      - name: Create wheel
+        run: |
+          docker run \
+            --rm \
+            --env CMAKE_BUILD_PARALLEL_LEVEL=${{ env.CMAKE_BUILD_PARALLEL_LEVEL }} \
+            -v $(pwd):/opt/lgb-build \
+            -w /opt/lgb-build \
+            lightgbm/vsts-agent:manylinux_2_28_x86_64 \
+            /bin/bash -c 'PATH=/opt/miniforge/bin:$PATH sh ./build-python.sh bdist_wheel --nomp'
+      - name: Test compatibility
+        run: |
+          docker run \
+            --rm \
+            -v $(pwd):/opt/lgb-build \
+            -w /opt/lgb-build \
+            python:3.11 \
+            /bin/bash ./.ci/test-python-latest.sh
   test-oldest-versions:
     name: Python - oldest supported versions (ubuntu-latest)
     runs-on: ubuntu-latest
@@ -89,6 +116,7 @@ jobs:
         run: |
           docker run \
             --rm \
+            --env CMAKE_BUILD_PARALLEL_LEVEL=${{ env.CMAKE_BUILD_PARALLEL_LEVEL }} \
             -v $(pwd):/opt/lgb-build \
             -w /opt/lgb-build \
             lightgbm/vsts-agent:manylinux_2_28_x86_64 \
@@ -104,7 +132,7 @@ jobs:
   all-python-package-jobs-successful:
     if: always()
     runs-on: ubuntu-latest
-    needs: [test, test-oldest-versions]
+    needs: [test, test-latest-versions, test-oldest-versions]
     steps:
     - name: Note that all tests succeeded
       uses: re-actors/alls-green@v1.2.2
diff --git a/python-package/lightgbm/basic.py b/python-package/lightgbm/basic.py
index 5bfb8dcbbb58..373c9911303a 100644
--- a/python-package/lightgbm/basic.py
+++ b/python-package/lightgbm/basic.py
@@ -356,10 +356,10 @@ def _list_to_1d_numpy(
         array = data.ravel()
         return _cast_numpy_array_to_dtype(array, dtype)
     elif _is_1d_list(data):
-        return np.array(data, dtype=dtype, copy=False)
+        return np.asarray(data, dtype=dtype)
     elif isinstance(data, pd_Series):
         _check_for_bad_pandas_dtypes(data.to_frame().dtypes)
-        return np.array(data, dtype=dtype, copy=False)  # SparseArray should be supported as well
+        return np.asarray(data, dtype=dtype)  # SparseArray should be supported as well
     else:
         raise TypeError(
             f"Wrong type({type(data).__name__}) for {name}.\n" "It should be list, numpy 1-D array or pandas Series"
@@ -728,7 +728,7 @@ def _convert_from_sliced_object(data: np.ndarray) -> np.ndarray:
 def _c_float_array(data: np.ndarray) -> Tuple[_ctypes_float_ptr, int, np.ndarray]:
     """Get pointer of float numpy array / list."""
     if _is_1d_list(data):
-        data = np.array(data, copy=False)
+        data = np.asarray(data)
     if _is_numpy_1d_array(data):
         data = _convert_from_sliced_object(data)
         assert data.flags.c_contiguous
@@ -749,7 +749,7 @@ def _c_float_array(data: np.ndarray) -> Tuple[_ctypes_float_ptr, int, np.ndarray
 def _c_int_array(data: np.ndarray) -> Tuple[_ctypes_int_ptr, int, np.ndarray]:
     """Get pointer of int numpy array / list."""
     if _is_1d_list(data):
-        data = np.array(data, copy=False)
+        data = np.asarray(data)
     if _is_numpy_1d_array(data):
         data = _convert_from_sliced_object(data)
         assert data.flags.c_contiguous
@@ -1270,7 +1270,7 @@ def __inner_predict_np2d(
         preds: Optional[np.ndarray],
     ) -> Tuple[np.ndarray, int]:
         if mat.dtype == np.float32 or mat.dtype == np.float64:
-            data = np.array(mat.reshape(mat.size), dtype=mat.dtype, copy=False)
+            data = np.asarray(mat.reshape(mat.size), dtype=mat.dtype)
         else:  # change non-float data to float data, need to copy
             data = np.array(mat.reshape(mat.size), dtype=np.float32)
         ptr_data, type_ptr_data, _ = _c_float_array(data)
@@ -2285,9 +2285,9 @@ def __init_from_np2d(
 
         self._handle = ctypes.c_void_p()
         if mat.dtype == np.float32 or mat.dtype == np.float64:
-            data = np.array(mat.reshape(mat.size), dtype=mat.dtype, copy=False)
+            data = np.asarray(mat.reshape(mat.size), dtype=mat.dtype)
         else:  # change non-float data to float data, need to copy
-            data = np.array(mat.reshape(mat.size), dtype=np.float32)
+            data = np.asarray(mat.reshape(mat.size), dtype=np.float32)
 
         ptr_data, type_ptr_data, _ = _c_float_array(data)
         _safe_call(
@@ -2332,7 +2332,7 @@ def __init_from_list_np2d(
             nrow[i] = mat.shape[0]
 
             if mat.dtype == np.float32 or mat.dtype == np.float64:
-                mats[i] = np.array(mat.reshape(mat.size), dtype=mat.dtype, copy=False)
+                mats[i] = np.asarray(mat.reshape(mat.size), dtype=mat.dtype)
             else:  # change non-float data to float data, need to copy
                 mats[i] = np.array(mat.reshape(mat.size), dtype=np.float32)
 
diff --git a/python-package/lightgbm/engine.py b/python-package/lightgbm/engine.py
index 5f93824458d4..9325cf50c203 100644
--- a/python-package/lightgbm/engine.py
+++ b/python-package/lightgbm/engine.py
@@ -512,7 +512,7 @@ def _make_n_folds(
         if hasattr(folds, "split"):
             group_info = full_data.get_group()
             if group_info is not None:
-                group_info = np.array(group_info, dtype=np.int32, copy=False)
+                group_info = np.asarray(group_info, dtype=np.int32)
                 flatted_group = np.repeat(range(len(group_info)), repeats=group_info)
             else:
                 flatted_group = np.zeros(num_data, dtype=np.int32)
@@ -526,7 +526,7 @@ def _make_n_folds(
             if not SKLEARN_INSTALLED:
                 raise LightGBMError("scikit-learn is required for ranking cv")
             # ranking task, split according to groups
-            group_info = np.array(full_data.get_group(), dtype=np.int32, copy=False)
+            group_info = np.asarray(full_data.get_group(), dtype=np.int32)
             flatted_group = np.repeat(range(len(group_info)), repeats=group_info)
             group_kfold = _LGBMGroupKFold(n_splits=nfold)
             folds = group_kfold.split(X=np.empty(num_data), groups=flatted_group)
diff --git a/tests/c_api_test/test_.py b/tests/c_api_test/test_.py
index 0abd40ecec31..77fb7f6e8ead 100644
--- a/tests/c_api_test/test_.py
+++ b/tests/c_api_test/test_.py
@@ -125,7 +125,7 @@ def load_from_mat(filename, reference):
     mat = np.loadtxt(str(filename), dtype=np.float64)
     label = mat[:, 0].astype(np.float32)
     mat = mat[:, 1:]
-    data = np.array(mat.reshape(mat.size), dtype=np.float64, copy=False)
+    data = np.asarray(mat.reshape(mat.size), dtype=np.float64)
     handle = ctypes.c_void_p()
     ref = None
     if reference is not None:
@@ -203,7 +203,7 @@ def test_booster():
     mat = data[:, 1:]
     preb = np.empty(mat.shape[0], dtype=np.float64)
     num_preb = ctypes.c_int64(0)
-    data = np.array(mat.reshape(mat.size), dtype=np.float64, copy=False)
+    data = np.asarray(mat.reshape(mat.size), dtype=np.float64)
     LIB.LGBM_BoosterPredictForMat(
         booster2,
         data.ctypes.data_as(ctypes.POINTER(ctypes.c_double)),
diff --git a/tests/python_package_test/test_arrow.py b/tests/python_package_test/test_arrow.py
index ab871048a949..3a7e0f8d4fce 100644
--- a/tests/python_package_test/test_arrow.py
+++ b/tests/python_package_test/test_arrow.py
@@ -20,6 +20,10 @@
 else:
     import pyarrow as pa  # type: ignore
 
+    assert (
+        lgb.compat.PYARROW_INSTALLED is True
+    ), "'pyarrow' and its dependencies must be installed to run the arrow tests"
+
 # ----------------------------------------------------------------------------------------------- #
 #                                            UTILITIES                                            #
 # ----------------------------------------------------------------------------------------------- #
diff --git a/tests/python_package_test/test_basic.py b/tests/python_package_test/test_basic.py
index 14a621a1604f..f3367c59f911 100644
--- a/tests/python_package_test/test_basic.py
+++ b/tests/python_package_test/test_basic.py
@@ -777,7 +777,10 @@ def test_custom_objective_safety(rng):
 def test_no_copy_when_single_float_dtype_dataframe(dtype, feature_name, rng):
     pd = pytest.importorskip("pandas")
     X = rng.uniform(size=(10, 2)).astype(dtype)
-    df = pd.DataFrame(X)
+    # copy=False is necessary because starting with pandas 3.0, pd.DataFrame() creates
+    # a copy of the input numpy array by default
+    # ref: https://github.com/pandas-dev/pandas/issues/58913
+    df = pd.DataFrame(X, copy=False)
     built_data = lgb.basic._data_from_pandas(
         data=df, feature_name=feature_name, categorical_feature="auto", pandas_categorical=None
     )[0]

From 4e744033221f5366007130da33ef74f7a465f587 Mon Sep 17 00:00:00 2001
From: Michael Mayer <mayermichael79@gmail.com>
Date: Thu, 13 Jun 2024 14:49:14 +0200
Subject: [PATCH 28/41] [R-package] ensure use of interaction_constraints does
 not lead to features being ignored (#6377)

---
 .ci/lint-python.sh                          |   8 +-
 R-package/R/utils.R                         | 100 ++++++++++----------
 R-package/tests/testthat/test_basic.R       |  45 +++++++--
 R-package/tests/testthat/test_lgb.Booster.R |   2 +-
 R-package/tests/testthat/test_utils.R       |  18 ++++
 5 files changed, 110 insertions(+), 63 deletions(-)

diff --git a/.ci/lint-python.sh b/.ci/lint-python.sh
index e1e9e306c883..edab8993a799 100755
--- a/.ci/lint-python.sh
+++ b/.ci/lint-python.sh
@@ -2,9 +2,11 @@
 
 set -e -E -u -o pipefail
 
-echo "running pre-commit checks"
-pre-commit run --all-files || exit 1
-echo "done running pre-commit checks"
+# this can be re-enabled when this is fixed:
+# https://github.com/tox-dev/filelock/issues/337
+# echo "running pre-commit checks"
+# pre-commit run --all-files || exit 1
+# echo "done running pre-commit checks"
 
 echo "running mypy"
 mypy \
diff --git a/R-package/R/utils.R b/R-package/R/utils.R
index 646a306c97f6..9fbdba778cc4 100644
--- a/R-package/R/utils.R
+++ b/R-package/R/utils.R
@@ -59,68 +59,66 @@
 
 }
 
+# [description]
+#
+#     Besides applying checks, this function
+#
+#         1. turns feature *names* into 1-based integer positions, then
+#         2. adds an extra list element with skipped features, then
+#         3. turns 1-based integer positions into 0-based positions, and finally
+#         4. collapses the values of each list element into a string like "[0, 1]".
+#
 .check_interaction_constraints <- function(interaction_constraints, column_names) {
+  if (is.null(interaction_constraints)) {
+    return(list())
+  }
+  if (!identical(class(interaction_constraints), "list")) {
+    stop("interaction_constraints must be a list")
+  }
 
-  # Convert interaction constraints to feature numbers
-  string_constraints <- list()
+  column_indices <- seq_along(column_names)
 
-  if (!is.null(interaction_constraints)) {
+  # Convert feature names to 1-based integer positions and apply checks
+  for (j in seq_along(interaction_constraints)) {
+    constraint <- interaction_constraints[[j]]
 
-    if (!methods::is(interaction_constraints, "list")) {
-        stop("interaction_constraints must be a list")
-    }
-    constraint_is_character_or_numeric <- sapply(
-        X = interaction_constraints
-        , FUN = function(x) {
-            return(is.character(x) || is.numeric(x))
-        }
-    )
-    if (!all(constraint_is_character_or_numeric)) {
-        stop("every element in interaction_constraints must be a character vector or numeric vector")
+    if (is.character(constraint)) {
+      constraint_indices <- match(constraint, column_names)
+    } else if (is.numeric(constraint)) {
+      constraint_indices <- as.integer(constraint)
+    } else {
+      stop("every element in interaction_constraints must be a character vector or numeric vector")
     }
 
-    for (constraint in interaction_constraints) {
-
-      # Check for character name
-      if (is.character(constraint)) {
-
-          constraint_indices <- as.integer(match(constraint, column_names) - 1L)
-
-          # Provided indices, but some indices are not existing?
-          if (sum(is.na(constraint_indices)) > 0L) {
-            stop(
-              "supplied an unknown feature in interaction_constraints "
-              , sQuote(constraint[is.na(constraint_indices)])
-            )
-          }
-
-        } else {
-
-          # Check that constraint indices are at most number of features
-          if (max(constraint) > length(column_names)) {
-            stop(
-              "supplied a too large value in interaction_constraints: "
-              , max(constraint)
-              , " but only "
-              , length(column_names)
-              , " features"
-            )
-          }
-
-          # Store indices as [0, n-1] indexed instead of [1, n] indexed
-          constraint_indices <- as.integer(constraint - 1L)
-
-        }
-
-        # Convert constraint to string
-        constraint_string <- paste0("[", paste0(constraint_indices, collapse = ","), "]")
-        string_constraints <- append(string_constraints, constraint_string)
+    # Features outside range?
+    bad <- !(constraint_indices %in% column_indices)
+    if (any(bad)) {
+      stop(
+        "unknown feature(s) in interaction_constraints: "
+        , toString(sQuote(constraint[bad], q = "'"))
+      )
     }
 
+    interaction_constraints[[j]] <- constraint_indices
   }
 
-  return(string_constraints)
+  # Add missing features as new interaction set
+  remaining_indices <- setdiff(
+    column_indices, sort(unique(unlist(interaction_constraints)))
+  )
+  if (length(remaining_indices) > 0L) {
+    interaction_constraints <- c(
+      interaction_constraints, list(remaining_indices)
+    )
+  }
 
+  # Turn indices 0-based and convert to string
+  for (j in seq_along(interaction_constraints)) {
+    interaction_constraints[[j]] <- paste0(
+      "[", paste0(interaction_constraints[[j]] - 1L, collapse = ","), "]"
+    )
+  }
+  return(interaction_constraints)
 }
 
 
diff --git a/R-package/tests/testthat/test_basic.R b/R-package/tests/testthat/test_basic.R
index 74c46dcef141..ed477a42c00b 100644
--- a/R-package/tests/testthat/test_basic.R
+++ b/R-package/tests/testthat/test_basic.R
@@ -2776,14 +2776,12 @@ test_that(paste0("lgb.train() throws an informative error if the members of inte
 test_that("lgb.train() throws an informative error if interaction_constraints contains a too large index", {
   dtrain <- lgb.Dataset(train$data, label = train$label)
   params <- list(objective = "regression",
-                 interaction_constraints = list(c(1L, length(colnames(train$data)) + 1L), 3L))
-    expect_error({
-      bst <- lightgbm(
-        data = dtrain
-        , params = params
-        , nrounds = 2L
-      )
-    }, "supplied a too large value in interaction_constraints")
+                 interaction_constraints = list(c(1L, ncol(train$data) + 1L:2L), 3L))
+    expect_error(
+      lightgbm(data = dtrain, params = params, nrounds = 2L)
+      , "unknown feature(s) in interaction_constraints: '127', '128'"
+      , fixed = TRUE
+    )
 })
 
 test_that(paste0("lgb.train() gives same result when interaction_constraints is specified as a list of ",
@@ -2876,6 +2874,37 @@ test_that(paste0("lgb.train() gives same results when using interaction_constrai
 
 })
 
+test_that("Interaction constraints add missing features correctly as new group", {
+  dtrain <- lgb.Dataset(
+    train$data[, 1L:6L]  # Pick only some columns
+    , label = train$label
+    , params = list(num_threads = .LGB_MAX_THREADS)
+  )
+
+  list_of_constraints <- list(
+    list(3L, 1L:2L)
+    , list("cap-shape=convex", c("cap-shape=bell", "cap-shape=conical"))
+  )
+
+  for (constraints in list_of_constraints) {
+    params <- list(
+      objective = "regression"
+      , interaction_constraints = constraints
+      , verbose = .LGB_VERBOSITY
+      , num_threads = .LGB_MAX_THREADS
+    )
+    bst <- lightgbm(data = dtrain, params = params, nrounds = 10L)
+
+    expected_list <- list("[2]", "[0,1]", "[3,4,5]")
+    expect_equal(bst$params$interaction_constraints, expected_list)
+
+    expected_string <- "[interaction_constraints: [2],[0,1],[3,4,5]]"
+    expect_true(
+      grepl(expected_string, bst$save_model_to_string(), fixed = TRUE)
+    )
+  }
+})
+
 .generate_trainset_for_monotone_constraints_tests <- function(x3_to_categorical) {
   n_samples <- 3000L
   x1_positively_correlated_with_y <- runif(n = n_samples, min = 0.0, max = 1.0)
diff --git a/R-package/tests/testthat/test_lgb.Booster.R b/R-package/tests/testthat/test_lgb.Booster.R
index 8e49c7b7069b..e81dc89673e0 100644
--- a/R-package/tests/testthat/test_lgb.Booster.R
+++ b/R-package/tests/testthat/test_lgb.Booster.R
@@ -174,7 +174,7 @@ test_that("Loading a Booster from a text file works", {
         , bagging_freq = 1L
         , boost_from_average = FALSE
         , categorical_feature = c(1L, 2L)
-        , interaction_constraints = list(c(1L, 2L), 1L)
+        , interaction_constraints = list(1L:2L, 3L, 4L:ncol(train$data))
         , feature_contri = rep(0.5, ncol(train$data))
         , metric = c("mape", "average_precision")
         , learning_rate = 1.0
diff --git a/R-package/tests/testthat/test_utils.R b/R-package/tests/testthat/test_utils.R
index 898aed9b0915..2534cb24cb13 100644
--- a/R-package/tests/testthat/test_utils.R
+++ b/R-package/tests/testthat/test_utils.R
@@ -147,3 +147,21 @@ test_that(".equal_or_both_null produces expected results", {
     expect_false(.equal_or_both_null(10.0, 1L))
     expect_true(.equal_or_both_null(0L, 0L))
 })
+
+test_that(".check_interaction_constraints() adds skipped features", {
+  ref <- letters[1L:5L]
+  ic_num <- list(1L, c(2L, 3L))
+  ic_char <- list("a", c("b", "c"))
+  expected <- list("[0]", "[1,2]", "[3,4]")
+
+  ic_checked_num <- .check_interaction_constraints(
+    interaction_constraints = ic_num, column_names = ref
+  )
+
+  ic_checked_char <- .check_interaction_constraints(
+    interaction_constraints = ic_char, column_names = ref
+  )
+
+  expect_equal(ic_checked_num, expected)
+  expect_equal(ic_checked_char, expected)
+})

From ad1237d90830d795f450f40a8697f8bc6faa4d1a Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Thu, 13 Jun 2024 20:03:42 -0500
Subject: [PATCH 29/41] [ci] [python-package] add arm64 macOS wheels (#6391)

---
 .ci/lint-python.sh                   |  8 ++-
 .ci/test.sh                          |  3 +-
 .github/workflows/python_package.yml | 16 +++++-
 CMakeLists.txt                       | 77 ++++++++++++++++++++++++++++
 4 files changed, 96 insertions(+), 8 deletions(-)

diff --git a/.ci/lint-python.sh b/.ci/lint-python.sh
index edab8993a799..e1e9e306c883 100755
--- a/.ci/lint-python.sh
+++ b/.ci/lint-python.sh
@@ -2,11 +2,9 @@
 
 set -e -E -u -o pipefail
 
-# this can be re-enabled when this is fixed:
-# https://github.com/tox-dev/filelock/issues/337
-# echo "running pre-commit checks"
-# pre-commit run --all-files || exit 1
-# echo "done running pre-commit checks"
+echo "running pre-commit checks"
+pre-commit run --all-files || exit 1
+echo "done running pre-commit checks"
 
 echo "running mypy"
 mypy \
diff --git a/.ci/test.sh b/.ci/test.sh
index c71c54ed906c..4f3de6a6f1ea 100755
--- a/.ci/test.sh
+++ b/.ci/test.sh
@@ -174,8 +174,7 @@ elif [[ $TASK == "bdist" ]]; then
         if [[ $ARCH == "x86_64" ]]; then
             PLATFORM="macosx_10_15_x86_64.macosx_11_6_x86_64.macosx_12_5_x86_64"
         else
-            echo "ERROR: macos wheels not supported yet on architecture '${ARCH}'"
-            exit 1
+            PLATFORM="macosx_14_0_arm64"
         fi
         mv \
             ./dist/tmp.whl \
diff --git a/.github/workflows/python_package.yml b/.github/workflows/python_package.yml
index 83149a078cf6..035340e2ec1b 100644
--- a/.github/workflows/python_package.yml
+++ b/.github/workflows/python_package.yml
@@ -36,6 +36,10 @@ jobs:
             python_version: '3.8'
           - os: macos-13
             task: if-else
+            python_version: '3.9'
+          - os: macos-14
+            task: bdist
+            method: wheel
             python_version: '3.10'
           # We're currently skipping MPI jobs on macOS, see https://github.com/microsoft/LightGBM/pull/6425
           # for further details.
@@ -63,7 +67,11 @@ jobs:
           export TASK="${{ matrix.task }}"
           export METHOD="${{ matrix.method }}"
           export PYTHON_VERSION="${{ matrix.python_version }}"
-          if [[ "${{ matrix.os }}" == "macos-13" ]]; then
+          if [[ "${{ matrix.os }}" == "macos-14" ]]; then
+              # use clang when creating macOS release artifacts
+              export COMPILER="clang"
+              export OS_NAME="macos"
+          elif [[ "${{ matrix.os }}" == "macos-13" ]]; then
               export COMPILER="gcc"
               export OS_NAME="macos"
           elif [[ "${{ matrix.os }}" == "ubuntu-latest" ]]; then
@@ -75,6 +83,12 @@ jobs:
           export PATH=${CONDA}/bin:${PATH}
           $GITHUB_WORKSPACE/.ci/setup.sh || exit 1
           $GITHUB_WORKSPACE/.ci/test.sh || exit 1
+      - name: upload wheels
+        if: ${{ matrix.method == 'wheel' && matrix.os == 'macos-14' }}
+        uses: actions/upload-artifact@v4
+        with:
+          name: macosx-arm64-wheel
+          path: dist/*.whl
   test-latest-versions:
     name: Python - latest versions (ubuntu-latest)
     runs-on: ubuntu-latest
diff --git a/CMakeLists.txt b/CMakeLists.txt
index d4eefbf3d840..3492289be078 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -707,6 +707,83 @@ if(__BUILD_FOR_PYTHON)
     set(CMAKE_INSTALL_PREFIX "lightgbm")
 endif()
 
+# The macOS linker puts an absolute path to linked libraries in lib_lightgb.dylib.
+# This block overrides that information for LightGBM's OpenMP dependency, to allow
+# finding that library in more places.
+#
+# This reduces the risk of runtime issues resulting from multiple libomp.dylib being loaded.
+#
+if(APPLE AND USE_OPENMP)
+  # store path to libomp found at build time in a variable
+  get_target_property(
+    OpenMP_LIBRARY_LOCATION
+    OpenMP::OpenMP_CXX
+    INTERFACE_LINK_LIBRARIES
+  )
+  # get just the filename of that path
+  # (to deal with the possibility that it might be 'libomp.dylib' or 'libgomp.dylib' or 'libiomp.dylib')
+  get_filename_component(
+    OpenMP_LIBRARY_NAME
+    ${OpenMP_LIBRARY_LOCATION}
+    NAME
+  )
+  # get directory of that path
+  get_filename_component(
+    OpenMP_LIBRARY_DIR
+    ${OpenMP_LIBRARY_LOCATION}
+    DIRECTORY
+  )
+  # get exact name of the library in a variable
+  get_target_property(
+    __LIB_LIGHTGBM_OUTPUT_NAME
+    _lightgbm
+    OUTPUT_NAME
+  )
+  if(NOT __LIB_LIGHTGBM_OUTPUT_NAME)
+    set(__LIB_LIGHTGBM_OUTPUT_NAME "lib_lightgbm")
+  endif()
+
+  if(CMAKE_SHARED_LIBRARY_SUFFIX_CXX)
+    set(
+      __LIB_LIGHTGBM_FILENAME "${__LIB_LIGHTGBM_OUTPUT_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX_CXX}"
+      CACHE INTERNAL "lightgbm shared library filename"
+    )
+  else()
+    set(
+      __LIB_LIGHTGBM_FILENAME "${__LIB_LIGHTGBM_OUTPUT_NAME}.dylib"
+      CACHE INTERNAL "lightgbm shared library filename"
+    )
+  endif()
+
+  # Override the absolute path to OpenMP with a relative one using @rpath.
+  #
+  # This also ensures that if a libomp.dylib has already been loaded, it'll just use that.
+  add_custom_command(
+    TARGET _lightgbm
+    POST_BUILD
+      COMMAND
+        install_name_tool
+        -change
+        ${OpenMP_LIBRARY_LOCATION}
+        "@rpath/${OpenMP_LIBRARY_NAME}"
+        "${__LIB_LIGHTGBM_FILENAME}"
+      WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+      COMMENT "Replacing hard-coded OpenMP install_name with '@rpath/${OpenMP_LIBRARY_NAME}'..."
+  )
+  # add RPATH entries to ensure the loader looks in the following, in the following order:
+  #
+  #   - /opt/homebrew/opt/libomp/lib (where 'brew install' / 'brew link' puts libomp.dylib)
+  #   - ${OpenMP_LIBRARY_DIR}        (wherever find_package(OpenMP) found OpenMP at build time)
+  #
+  set_target_properties(
+    _lightgbm
+    PROPERTIES
+      BUILD_WITH_INSTALL_RPATH TRUE
+      INSTALL_RPATH "/opt/homebrew/opt/libomp/lib;${OpenMP_LIBRARY_DIR}"
+      INSTALL_RPATH_USE_LINK_PATH FALSE
+  )
+endif()
+
 install(
   TARGETS _lightgbm
   RUNTIME DESTINATION ${CMAKE_INSTALL_PREFIX}/bin

From 271d1ffdf847aeda0d7aa3351bd3fc1ee952dbe6 Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Fri, 14 Jun 2024 16:14:06 -0500
Subject: [PATCH 30/41] release v4.4.0 (#6439)

---
 .appveyor.yml                      |  2 +-
 R-package/R/lgb.Booster.R          |  4 ++++
 R-package/R/lgb.Dataset.R          |  3 +++
 R-package/R/lgb.model.dt.tree.R    |  3 +++
 R-package/configure                | 18 +++++++++---------
 R-package/cran-comments.md         |  6 ++++++
 R-package/man/lgb.dump.Rd          |  6 ++++--
 R-package/man/lgb.model.dt.tree.Rd |  6 ++++--
 R-package/man/lgb.save.Rd          |  6 ++++--
 R-package/man/lgb.slice.Dataset.Rd |  2 ++
 R-package/pkgdown/_pkgdown.yml     |  2 +-
 VERSION.txt                        |  2 +-
 docs/Parameters.rst                |  2 ++
 include/LightGBM/config.h          |  1 +
 python-package/pyproject.toml      |  2 +-
 15 files changed, 46 insertions(+), 19 deletions(-)

diff --git a/.appveyor.yml b/.appveyor.yml
index 3c12ebaa36f8..33b860bf8a49 100644
--- a/.appveyor.yml
+++ b/.appveyor.yml
@@ -1,4 +1,4 @@
-version: 4.3.0.99.{build}
+version: 4.4.0.{build}
 
 image: Visual Studio 2015
 platform: x64
diff --git a/R-package/R/lgb.Booster.R b/R-package/R/lgb.Booster.R
index 7f0ccc31dd49..bce9e785bf4e 100644
--- a/R-package/R/lgb.Booster.R
+++ b/R-package/R/lgb.Booster.R
@@ -1309,6 +1309,8 @@ lgb.load <- function(filename = NULL, model_str = NULL) {
 #'        For example, passing \code{start_iteration=5, num_iteration=3} for a regression model
 #'        means "save the fifth, sixth, and seventh tree"
 #'
+#'        \emph{New in version 4.4.0}
+#'
 #' @return lgb.Booster
 #'
 #' @examples
@@ -1373,6 +1375,8 @@ lgb.save <- function(
 #'        For example, passing \code{start_iteration=5, num_iteration=3} for a regression model
 #'        means "dump the fifth, sixth, and seventh tree"
 #'
+#'        \emph{New in version 4.4.0}
+#'
 #' @return json format of model
 #'
 #' @examples
diff --git a/R-package/R/lgb.Dataset.R b/R-package/R/lgb.Dataset.R
index 8a4a7246b16c..ed1216c7986a 100644
--- a/R-package/R/lgb.Dataset.R
+++ b/R-package/R/lgb.Dataset.R
@@ -1054,6 +1054,9 @@ dimnames.lgb.Dataset <- function(x) {
 #' @title Slice a dataset
 #' @description Get a new \code{lgb.Dataset} containing the specified rows of
 #'              original \code{lgb.Dataset} object
+#'
+#'              \emph{Renamed from} \code{slice()} \emph{in 4.4.0}
+#'
 #' @param dataset Object of class \code{lgb.Dataset}
 #' @param idxset an integer vector of indices of rows needed
 #' @return constructed sub dataset
diff --git a/R-package/R/lgb.model.dt.tree.R b/R-package/R/lgb.model.dt.tree.R
index cc58011a2d11..db4ef955f866 100644
--- a/R-package/R/lgb.model.dt.tree.R
+++ b/R-package/R/lgb.model.dt.tree.R
@@ -6,6 +6,9 @@
 #' @param start_iteration Index (1-based) of the first boosting round to include in the output.
 #'        For example, passing \code{start_iteration=5, num_iteration=3} for a regression model
 #'        means "return information about the fifth, sixth, and seventh trees".
+#'
+#'        \emph{New in version 4.4.0}
+#'
 #' @return
 #' A \code{data.table} with detailed information about model trees' nodes and leafs.
 #'
diff --git a/R-package/configure b/R-package/configure
index b84169b14336..32ffa32a18ee 100755
--- a/R-package/configure
+++ b/R-package/configure
@@ -1,6 +1,6 @@
 #! /bin/sh
 # Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.71 for lightgbm 4.3.0.99.
+# Generated by GNU Autoconf 2.71 for lightgbm 4.4.0.
 #
 #
 # Copyright (C) 1992-1996, 1998-2017, 2020-2021 Free Software Foundation,
@@ -607,8 +607,8 @@ MAKEFLAGS=
 # Identity of this package.
 PACKAGE_NAME='lightgbm'
 PACKAGE_TARNAME='lightgbm'
-PACKAGE_VERSION='4.3.0.99'
-PACKAGE_STRING='lightgbm 4.3.0.99'
+PACKAGE_VERSION='4.4.0'
+PACKAGE_STRING='lightgbm 4.4.0'
 PACKAGE_BUGREPORT=''
 PACKAGE_URL=''
 
@@ -1211,7 +1211,7 @@ if test "$ac_init_help" = "long"; then
   # Omit some internal or obsolete options to make the list less imposing.
   # This message is too long to be a string in the A/UX 3.1 sh.
   cat <<_ACEOF
-\`configure' configures lightgbm 4.3.0.99 to adapt to many kinds of systems.
+\`configure' configures lightgbm 4.4.0 to adapt to many kinds of systems.
 
 Usage: $0 [OPTION]... [VAR=VALUE]...
 
@@ -1273,7 +1273,7 @@ fi
 
 if test -n "$ac_init_help"; then
   case $ac_init_help in
-     short | recursive ) echo "Configuration of lightgbm 4.3.0.99:";;
+     short | recursive ) echo "Configuration of lightgbm 4.4.0:";;
    esac
   cat <<\_ACEOF
 
@@ -1341,7 +1341,7 @@ fi
 test -n "$ac_init_help" && exit $ac_status
 if $ac_init_version; then
   cat <<\_ACEOF
-lightgbm configure 4.3.0.99
+lightgbm configure 4.4.0
 generated by GNU Autoconf 2.71
 
 Copyright (C) 2021 Free Software Foundation, Inc.
@@ -1378,7 +1378,7 @@ cat >config.log <<_ACEOF
 This file contains any messages produced by compilers while
 running configure, to aid debugging if configure makes a mistake.
 
-It was created by lightgbm $as_me 4.3.0.99, which was
+It was created by lightgbm $as_me 4.4.0, which was
 generated by GNU Autoconf 2.71.  Invocation command line was
 
   $ $0$ac_configure_args_raw
@@ -2454,7 +2454,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
 # report actual input values of CONFIG_FILES etc. instead of their
 # values after options handling.
 ac_log="
-This file was extended by lightgbm $as_me 4.3.0.99, which was
+This file was extended by lightgbm $as_me 4.4.0, which was
 generated by GNU Autoconf 2.71.  Invocation command line was
 
   CONFIG_FILES    = $CONFIG_FILES
@@ -2509,7 +2509,7 @@ ac_cs_config_escaped=`printf "%s\n" "$ac_cs_config" | sed "s/^ //; s/'/'\\\\\\\\
 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
 ac_cs_config='$ac_cs_config_escaped'
 ac_cs_version="\\
-lightgbm config.status 4.3.0.99
+lightgbm config.status 4.4.0
 configured by $0, generated by GNU Autoconf 2.71,
   with options \\"\$ac_cs_config\\"
 
diff --git a/R-package/cran-comments.md b/R-package/cran-comments.md
index 404c471c1c16..137df4912c04 100644
--- a/R-package/cran-comments.md
+++ b/R-package/cran-comments.md
@@ -1,5 +1,11 @@
 # CRAN Submission History
 
+## v4.4.0 - Submission 1 - (TBD)
+
+### CRAN response
+
+### Maintainer Notes
+
 ## v4.3.0 - Submission 1 - (January 18, 2024)
 
 ### CRAN response
diff --git a/R-package/man/lgb.dump.Rd b/R-package/man/lgb.dump.Rd
index e1790946d8f6..e356251466e5 100644
--- a/R-package/man/lgb.dump.Rd
+++ b/R-package/man/lgb.dump.Rd
@@ -12,8 +12,10 @@ lgb.dump(booster, num_iteration = NULL, start_iteration = 1L)
 \item{num_iteration}{Number of iterations to be dumped. NULL or <= 0 means use best iteration}
 
 \item{start_iteration}{Index (1-based) of the first boosting round to dump.
-For example, passing \code{start_iteration=5, num_iteration=3} for a regression model
-means "dump the fifth, sixth, and seventh tree"}
+       For example, passing \code{start_iteration=5, num_iteration=3} for a regression model
+       means "dump the fifth, sixth, and seventh tree"
+
+       \emph{New in version 4.4.0}}
 }
 \value{
 json format of model
diff --git a/R-package/man/lgb.model.dt.tree.Rd b/R-package/man/lgb.model.dt.tree.Rd
index 126ecc7b23cc..ecfee17332f5 100644
--- a/R-package/man/lgb.model.dt.tree.Rd
+++ b/R-package/man/lgb.model.dt.tree.Rd
@@ -12,8 +12,10 @@ lgb.model.dt.tree(model, num_iteration = NULL, start_iteration = 1L)
 \item{num_iteration}{Number of iterations to include. NULL or <= 0 means use best iteration.}
 
 \item{start_iteration}{Index (1-based) of the first boosting round to include in the output.
-For example, passing \code{start_iteration=5, num_iteration=3} for a regression model
-means "return information about the fifth, sixth, and seventh trees".}
+       For example, passing \code{start_iteration=5, num_iteration=3} for a regression model
+       means "return information about the fifth, sixth, and seventh trees".
+
+       \emph{New in version 4.4.0}}
 }
 \value{
 A \code{data.table} with detailed information about model trees' nodes and leafs.
diff --git a/R-package/man/lgb.save.Rd b/R-package/man/lgb.save.Rd
index 118c008464d7..3a532e9f0c32 100644
--- a/R-package/man/lgb.save.Rd
+++ b/R-package/man/lgb.save.Rd
@@ -14,8 +14,10 @@ lgb.save(booster, filename, num_iteration = NULL, start_iteration = 1L)
 \item{num_iteration}{Number of iterations to save, NULL or <= 0 means use best iteration}
 
 \item{start_iteration}{Index (1-based) of the first boosting round to save.
-For example, passing \code{start_iteration=5, num_iteration=3} for a regression model
-means "save the fifth, sixth, and seventh tree"}
+       For example, passing \code{start_iteration=5, num_iteration=3} for a regression model
+       means "save the fifth, sixth, and seventh tree"
+
+       \emph{New in version 4.4.0}}
 }
 \value{
 lgb.Booster
diff --git a/R-package/man/lgb.slice.Dataset.Rd b/R-package/man/lgb.slice.Dataset.Rd
index f2a47bcb1885..c40ec0d5bb32 100644
--- a/R-package/man/lgb.slice.Dataset.Rd
+++ b/R-package/man/lgb.slice.Dataset.Rd
@@ -17,6 +17,8 @@ constructed sub dataset
 \description{
 Get a new \code{lgb.Dataset} containing the specified rows of
              original \code{lgb.Dataset} object
+
+             \emph{Renamed from} \code{slice()} \emph{in 4.4.0}
 }
 \examples{
 \donttest{
diff --git a/R-package/pkgdown/_pkgdown.yml b/R-package/pkgdown/_pkgdown.yml
index c2d6718a2926..be3563210330 100644
--- a/R-package/pkgdown/_pkgdown.yml
+++ b/R-package/pkgdown/_pkgdown.yml
@@ -14,7 +14,7 @@ repo:
     user: https://github.com/
 
 development:
-  mode: unreleased
+  mode: release
 
 authors:
   Yu Shi:
diff --git a/VERSION.txt b/VERSION.txt
index c42a8a79f6d1..fdc6698807a9 100644
--- a/VERSION.txt
+++ b/VERSION.txt
@@ -1 +1 @@
-4.3.0.99
+4.4.0
diff --git a/docs/Parameters.rst b/docs/Parameters.rst
index 02f01ae4408b..40551cfd5858 100644
--- a/docs/Parameters.rst
+++ b/docs/Parameters.rst
@@ -414,6 +414,8 @@ Learning Control Parameters
 
    -  when early stopping is used (i.e. ``early_stopping_round > 0``), require the early stopping metric to improve by at least this delta to be considered an improvement
 
+   -  *New in 4.4.0*
+
 -  ``first_metric_only`` :raw-html:`<a id="first_metric_only" title="Permalink to this parameter" href="#first_metric_only">&#x1F517;&#xFE0E;</a>`, default = ``false``, type = bool
 
    -  LightGBM allows you to provide multiple evaluation metrics. Set this to ``true``, if you want to use only the first metric for early stopping
diff --git a/include/LightGBM/config.h b/include/LightGBM/config.h
index b9d71aaa9882..a2a352669460 100644
--- a/include/LightGBM/config.h
+++ b/include/LightGBM/config.h
@@ -396,6 +396,7 @@ struct Config {
 
   // check = >=0.0
   // desc = when early stopping is used (i.e. ``early_stopping_round > 0``), require the early stopping metric to improve by at least this delta to be considered an improvement
+  // desc = *New in 4.4.0*
   double early_stopping_min_delta = 0.0;
 
   // desc = LightGBM allows you to provide multiple evaluation metrics. Set this to ``true``, if you want to use only the first metric for early stopping
diff --git a/python-package/pyproject.toml b/python-package/pyproject.toml
index d671b5456ae6..9b9dcc0195f2 100644
--- a/python-package/pyproject.toml
+++ b/python-package/pyproject.toml
@@ -30,7 +30,7 @@ maintainers = [
 name = "lightgbm"
 readme = "README.rst"
 requires-python = ">=3.7"
-version = "4.3.0.99"
+version = "4.4.0"
 
 [project.optional-dependencies]
 arrow = [

From b96d02404a36eb0c8b10c7ebda3cdf0cd85725d7 Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Sun, 16 Jun 2024 21:33:28 -0500
Subject: [PATCH 31/41] bump development version to 4.4.0.99 (#6485)

---
 .appveyor.yml                  |  2 +-
 R-package/configure            | 18 +++++++++---------
 R-package/pkgdown/_pkgdown.yml |  2 +-
 VERSION.txt                    |  2 +-
 python-package/pyproject.toml  |  2 +-
 5 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/.appveyor.yml b/.appveyor.yml
index 33b860bf8a49..aae1c3a074fd 100644
--- a/.appveyor.yml
+++ b/.appveyor.yml
@@ -1,4 +1,4 @@
-version: 4.4.0.{build}
+version: 4.4.0.99.{build}
 
 image: Visual Studio 2015
 platform: x64
diff --git a/R-package/configure b/R-package/configure
index 32ffa32a18ee..d3fdcbbb3f67 100755
--- a/R-package/configure
+++ b/R-package/configure
@@ -1,6 +1,6 @@
 #! /bin/sh
 # Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.71 for lightgbm 4.4.0.
+# Generated by GNU Autoconf 2.71 for lightgbm 4.4.0.99.
 #
 #
 # Copyright (C) 1992-1996, 1998-2017, 2020-2021 Free Software Foundation,
@@ -607,8 +607,8 @@ MAKEFLAGS=
 # Identity of this package.
 PACKAGE_NAME='lightgbm'
 PACKAGE_TARNAME='lightgbm'
-PACKAGE_VERSION='4.4.0'
-PACKAGE_STRING='lightgbm 4.4.0'
+PACKAGE_VERSION='4.4.0.99'
+PACKAGE_STRING='lightgbm 4.4.0.99'
 PACKAGE_BUGREPORT=''
 PACKAGE_URL=''
 
@@ -1211,7 +1211,7 @@ if test "$ac_init_help" = "long"; then
   # Omit some internal or obsolete options to make the list less imposing.
   # This message is too long to be a string in the A/UX 3.1 sh.
   cat <<_ACEOF
-\`configure' configures lightgbm 4.4.0 to adapt to many kinds of systems.
+\`configure' configures lightgbm 4.4.0.99 to adapt to many kinds of systems.
 
 Usage: $0 [OPTION]... [VAR=VALUE]...
 
@@ -1273,7 +1273,7 @@ fi
 
 if test -n "$ac_init_help"; then
   case $ac_init_help in
-     short | recursive ) echo "Configuration of lightgbm 4.4.0:";;
+     short | recursive ) echo "Configuration of lightgbm 4.4.0.99:";;
    esac
   cat <<\_ACEOF
 
@@ -1341,7 +1341,7 @@ fi
 test -n "$ac_init_help" && exit $ac_status
 if $ac_init_version; then
   cat <<\_ACEOF
-lightgbm configure 4.4.0
+lightgbm configure 4.4.0.99
 generated by GNU Autoconf 2.71
 
 Copyright (C) 2021 Free Software Foundation, Inc.
@@ -1378,7 +1378,7 @@ cat >config.log <<_ACEOF
 This file contains any messages produced by compilers while
 running configure, to aid debugging if configure makes a mistake.
 
-It was created by lightgbm $as_me 4.4.0, which was
+It was created by lightgbm $as_me 4.4.0.99, which was
 generated by GNU Autoconf 2.71.  Invocation command line was
 
   $ $0$ac_configure_args_raw
@@ -2454,7 +2454,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
 # report actual input values of CONFIG_FILES etc. instead of their
 # values after options handling.
 ac_log="
-This file was extended by lightgbm $as_me 4.4.0, which was
+This file was extended by lightgbm $as_me 4.4.0.99, which was
 generated by GNU Autoconf 2.71.  Invocation command line was
 
   CONFIG_FILES    = $CONFIG_FILES
@@ -2509,7 +2509,7 @@ ac_cs_config_escaped=`printf "%s\n" "$ac_cs_config" | sed "s/^ //; s/'/'\\\\\\\\
 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
 ac_cs_config='$ac_cs_config_escaped'
 ac_cs_version="\\
-lightgbm config.status 4.4.0
+lightgbm config.status 4.4.0.99
 configured by $0, generated by GNU Autoconf 2.71,
   with options \\"\$ac_cs_config\\"
 
diff --git a/R-package/pkgdown/_pkgdown.yml b/R-package/pkgdown/_pkgdown.yml
index be3563210330..c2d6718a2926 100644
--- a/R-package/pkgdown/_pkgdown.yml
+++ b/R-package/pkgdown/_pkgdown.yml
@@ -14,7 +14,7 @@ repo:
     user: https://github.com/
 
 development:
-  mode: release
+  mode: unreleased
 
 authors:
   Yu Shi:
diff --git a/VERSION.txt b/VERSION.txt
index fdc6698807a9..50e035a516aa 100644
--- a/VERSION.txt
+++ b/VERSION.txt
@@ -1 +1 @@
-4.4.0
+4.4.0.99
diff --git a/python-package/pyproject.toml b/python-package/pyproject.toml
index 9b9dcc0195f2..3f016a841f80 100644
--- a/python-package/pyproject.toml
+++ b/python-package/pyproject.toml
@@ -30,7 +30,7 @@ maintainers = [
 name = "lightgbm"
 readme = "README.rst"
 requires-python = ">=3.7"
-version = "4.4.0"
+version = "4.4.0.99"
 
 [project.optional-dependencies]
 arrow = [

From d56a7a3c3603589852c8d2d02f4ad0ea80fd4349 Mon Sep 17 00:00:00 2001
From: Ikko Eltociear Ashimine <eltociear@gmail.com>
Date: Mon, 17 Jun 2024 11:33:43 +0900
Subject: [PATCH 32/41] chore: update dataset_loader.cpp (#6483)

---
 src/io/dataset_loader.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/io/dataset_loader.cpp b/src/io/dataset_loader.cpp
index 84bf3907a43c..9c8a0417b118 100644
--- a/src/io/dataset_loader.cpp
+++ b/src/io/dataset_loader.cpp
@@ -274,7 +274,7 @@ Dataset* DatasetLoader::LoadFromFile(const char* filename, int rank, int num_mac
     dataset.reset(LoadFromBinFile(filename, bin_filename.c_str(), rank, num_machines, &num_global_data, &used_data_indices));
 
     // checks whether there's a initial score file when loaded from binary data files
-    // the intial score file should with suffix ".bin.init"
+    // the initial score file should with suffix ".bin.init"
     dataset->metadata_.LoadInitialScore(bin_filename);
 
     dataset->device_type_ = config_.device_type;
@@ -344,7 +344,7 @@ Dataset* DatasetLoader::LoadFromFileAlignWithOtherDataset(const char* filename,
     // load data from binary file
     dataset.reset(LoadFromBinFile(filename, bin_filename.c_str(), 0, 1, &num_global_data, &used_data_indices));
     // checks whether there's a initial score file when loaded from binary data files
-    // the intial score file should with suffix ".bin.init"
+    // the initial score file should with suffix ".bin.init"
     dataset->metadata_.LoadInitialScore(bin_filename);
   }
   // not need to check validation data

From 7cb189235914cf5211e70c514488d5e7d25e94a2 Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Mon, 17 Jun 2024 09:12:17 -0500
Subject: [PATCH 33/41] [ci] [python-package] run macOS x86_64 tests on macOS
 12 (Monterey), stop manually setting macOS wheel tags (#6487)

---
 .ci/setup.sh |  2 +-
 .ci/test.sh  | 24 +++++++++++++-----------
 .vsts-ci.yml |  2 +-
 3 files changed, 15 insertions(+), 13 deletions(-)

diff --git a/.ci/setup.sh b/.ci/setup.sh
index 7fe54db9c2fc..3fd3356e34d7 100755
--- a/.ci/setup.sh
+++ b/.ci/setup.sh
@@ -14,7 +14,7 @@ if [[ $OS_NAME == "macos" ]]; then
     if  [[ $COMPILER == "clang" ]]; then
         brew install libomp
         if [[ $AZURE == "true" ]]; then
-            sudo xcode-select -s /Applications/Xcode_11.7.app/Contents/Developer || exit 1
+            sudo xcode-select -s /Applications/Xcode_13.1.0.app/Contents/Developer || exit 1
         fi
     else  # gcc
         # Check https://github.com/actions/runner-images/tree/main/images/macos for available
diff --git a/.ci/test.sh b/.ci/test.sh
index 4f3de6a6f1ea..d950f01e989f 100755
--- a/.ci/test.sh
+++ b/.ci/test.sh
@@ -29,6 +29,19 @@ if [[ $IN_UBUNTU_BASE_CONTAINER == "true" ]]; then
     export LC_ALL="en_US.UTF-8"
 fi
 
+# Setting MACOSX_DEPLOYMENT_TARGET prevents CMake from building against too-new
+# macOS features, and helps tools like Python build tools determine the appropriate
+# wheel compatibility tags.
+#
+# ref:
+#   * https://cmake.org/cmake/help/latest/envvar/MACOSX_DEPLOYMENT_TARGET.html
+#   * https://github.com/scikit-build/scikit-build-core/blob/acb7d0346e4a05bcb47a4ea3939c705ab71e3145/src/scikit_build_core/builder/macos.py#L36
+if [[ $ARCH == "x86_64" ]]; then
+    export MACOSX_DEPLOYMENT_TARGET=10.15
+else
+    export MACOSX_DEPLOYMENT_TARGET=12.0
+fi
+
 if [[ "${TASK}" == "r-package" ]] || [[ "${TASK}" == "r-rchk" ]]; then
     bash ${BUILD_DIRECTORY}/.ci/test_r_package.sh || exit 1
     exit 0
@@ -168,17 +181,6 @@ elif [[ $TASK == "bdist" ]]; then
     if [[ $OS_NAME == "macos" ]]; then
         cd $BUILD_DIRECTORY && sh ./build-python.sh bdist_wheel || exit 1
         sh $BUILD_DIRECTORY/.ci/check_python_dists.sh $BUILD_DIRECTORY/dist || exit 1
-        mv \
-            ./dist/*.whl \
-            ./dist/tmp.whl || exit 1
-        if [[ $ARCH == "x86_64" ]]; then
-            PLATFORM="macosx_10_15_x86_64.macosx_11_6_x86_64.macosx_12_5_x86_64"
-        else
-            PLATFORM="macosx_14_0_arm64"
-        fi
-        mv \
-            ./dist/tmp.whl \
-            dist/lightgbm-$LGB_VER-py3-none-$PLATFORM.whl || exit 1
         if [[ $PRODUCES_ARTIFACTS == "true" ]]; then
             cp dist/lightgbm-$LGB_VER-py3-none-macosx*.whl $BUILD_ARTIFACTSTAGINGDIRECTORY || exit 1
         fi
diff --git a/.vsts-ci.yml b/.vsts-ci.yml
index 9b440cb9fac1..2b1cdaa4058c 100644
--- a/.vsts-ci.yml
+++ b/.vsts-ci.yml
@@ -254,7 +254,7 @@ jobs:
     OS_NAME: 'macos'
     PRODUCES_ARTIFACTS: 'true'
   pool:
-    vmImage: 'macOS-11'
+    vmImage: 'macOS-12'
   strategy:
     matrix:
       regular:

From 7d33879e224476aabb8d06cde644d1a11d97ec7c Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Mon, 17 Jun 2024 15:04:30 -0500
Subject: [PATCH 34/41] [ci] remove unnecessary package installations in CI
 (#6488)

---
 .ci/setup.sh | 21 +++++++--------------
 1 file changed, 7 insertions(+), 14 deletions(-)

diff --git a/.ci/setup.sh b/.ci/setup.sh
index 3fd3356e34d7..fe918d68a78c 100755
--- a/.ci/setup.sh
+++ b/.ci/setup.sh
@@ -20,9 +20,7 @@ if [[ $OS_NAME == "macos" ]]; then
         # Check https://github.com/actions/runner-images/tree/main/images/macos for available
         # versions of Xcode
         sudo xcode-select -s /Applications/Xcode_14.3.1.app/Contents/Developer || exit 1
-        if [[ $TASK != "mpi" ]]; then
-            brew install gcc
-        fi
+        brew install gcc
     fi
     if [[ $TASK == "mpi" ]]; then
         brew install open-mpi
@@ -41,35 +39,30 @@ else  # Linux
             software-properties-common
 
         sudo apt-get install --no-install-recommends -y \
-            apt-utils \
             build-essential \
             ca-certificates \
             cmake \
             curl \
             git \
-            iputils-ping \
-            jq \
             libcurl4 \
             libicu-dev \
             libssl-dev \
-            libunwind8 \
             locales \
-            locales-all \
-            netcat \
-            unzip \
-            zip || exit 1
+            locales-all || exit 1
         if [[ $COMPILER == "clang" ]]; then
             sudo apt-get install --no-install-recommends -y \
                 clang \
                 libomp-dev
         elif [[ $COMPILER == "clang-17" ]]; then
-            sudo apt-get install wget
+            sudo apt-get install --no-install-recommends -y \
+                wget
             wget -qO- https://apt.llvm.org/llvm-snapshot.gpg.key | sudo tee /etc/apt/trusted.gpg.d/apt.llvm.org.asc
             sudo apt-add-repository deb http://apt.llvm.org/jammy/ llvm-toolchain-jammy-17 main
             sudo apt-add-repository deb-src http://apt.llvm.org/jammy/ llvm-toolchain-jammy-17 main
             sudo apt-get update
-            sudo apt-get install -y clang-17
-            sudo apt-get install --no-install-recommends -y libomp-17-dev
+            sudo apt-get install -y \
+                clang-17 \
+                libomp-17-dev
         fi
 
         export LANG="en_US.UTF-8"

From 5cd95a5b161d7630731d50e9ac529c6bf3dc809f Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Tue, 18 Jun 2024 08:50:20 -0500
Subject: [PATCH 35/41] [docs] remove pmml redirect, simplify some other docs
 (#6490)

---
 .ci/test_windows.ps1      | 8 +-------
 README.md                 | 4 +---
 pmml/README.md            | 6 ------
 python-package/README.rst | 6 ------
 4 files changed, 2 insertions(+), 22 deletions(-)
 delete mode 100644 pmml/README.md

diff --git a/.ci/test_windows.ps1 b/.ci/test_windows.ps1
index fe006601b236..8ee20a245b38 100644
--- a/.ci/test_windows.ps1
+++ b/.ci/test_windows.ps1
@@ -40,13 +40,7 @@ if ($env:TASK -eq "swig") {
 conda init powershell
 conda activate
 conda config --set always_yes yes --set changeps1 no
-
-# ref:
-# * https://stackoverflow.com/a/62897729/3986677
-# * https://github.com/microsoft/LightGBM/issues/5899
-conda install "brotlipy>=0.7"
-
-conda update -q -y conda
+conda update -q -y conda "python=$env:PYTHON_VERSION[build=*cpython]"
 
 if ($env:PYTHON_VERSION -eq "3.7") {
   $env:CONDA_REQUIREMENT_FILE = "$env:BUILD_SOURCESDIRECTORY/.ci/conda-envs/ci-core-py37.txt"
diff --git a/README.md b/README.md
index 3b3fe40790db..abd99075e3ad 100644
--- a/README.md
+++ b/README.md
@@ -133,7 +133,7 @@ Support
 -------
 
 - Ask a question [on Stack Overflow with the `lightgbm` tag](https://stackoverflow.com/questions/ask?tags=lightgbm), we monitor this for new questions.
-- Open **bug reports** and **feature requests** (not questions) on [GitHub issues](https://github.com/microsoft/LightGBM/issues).
+- Open **bug reports** and **feature requests** on [GitHub issues](https://github.com/microsoft/LightGBM/issues).
 
 How to Contribute
 -----------------
@@ -156,8 +156,6 @@ Qi Meng, Guolin Ke, Taifeng Wang, Wei Chen, Qiwei Ye, Zhi-Ming Ma, Tie-Yan Liu.
 
 Huan Zhang, Si Si and Cho-Jui Hsieh. "[GPU Acceleration for Large-scale Tree Boosting](https://arxiv.org/abs/1706.08359)". SysML Conference, 2018.
 
-**Note**: If you use LightGBM in your GitHub projects, please add `lightgbm` in the `requirements.txt`.
-
 License
 -------
 
diff --git a/pmml/README.md b/pmml/README.md
deleted file mode 100644
index 759340b61118..000000000000
--- a/pmml/README.md
+++ /dev/null
@@ -1,6 +0,0 @@
-PMML Generator
-==============
-
-The old Python convert script is removed due to it cannot support the new format of categorical features.
-
-Please refer to https://github.com/jpmml/jpmml-lightgbm.
diff --git a/python-package/README.rst b/python-package/README.rst
index 68f267ec659e..86d5da32b755 100644
--- a/python-package/README.rst
+++ b/python-package/README.rst
@@ -11,8 +11,6 @@ Preparation
 
 32-bit Python is not supported. Please install 64-bit version. If you have a strong need to install with 32-bit Python, refer to `Build 32-bit Version with 32-bit Python section <#build-32-bit-version-with-32-bit-python>`__.
 
-`setuptools <https://pypi.org/project/setuptools>`_ is needed.
-
 Install from `PyPI <https://pypi.org/project/lightgbm>`_
 ''''''''''''''''''''''''''''''''''''''''''''''''''''''''
 
@@ -299,10 +297,6 @@ Refer to the walk through examples in `Python guide folder <https://github.com/m
 Development Guide
 -----------------
 
-The code style of Python-package follows `PEP 8 <https://www.python.org/dev/peps/pep-0008/>`_.
-
-The package's documentation strings (docstrings) are written in the `numpydoc style <https://numpydoc.readthedocs.io/en/latest/format.html>`_.
-
 To check that a contribution to the package matches its style expectations, run the following from the root of the repo.
 
 .. code:: sh

From d88dc49e440af8d5f6038f2510850259aeeaed88 Mon Sep 17 00:00:00 2001
From: Jorrick Sleijster <jorricks3@gmail.com>
Date: Fri, 21 Jun 2024 03:54:05 +0200
Subject: [PATCH 36/41] [python-package] Change build settings to set
 strict-config to false (#6493)

---
 .github/workflows/cuda.yml           | 1 +
 .github/workflows/python_package.yml | 1 +
 .vsts-ci.yml                         | 1 +
 python-package/pyproject.toml        | 2 +-
 4 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/cuda.yml b/.github/workflows/cuda.yml
index a6005ec0344b..735bcef4a0d7 100644
--- a/.github/workflows/cuda.yml
+++ b/.github/workflows/cuda.yml
@@ -72,6 +72,7 @@ jobs:
         OS_NAME: linux
         PYTHON_VERSION: ${{ matrix.python_version }}
         TASK: ${{ matrix.task }}
+        SKBUILD_STRICT_CONFIG: true
       options: --gpus all
     timeout-minutes: 30
     strategy:
diff --git a/.github/workflows/python_package.yml b/.github/workflows/python_package.yml
index 035340e2ec1b..cd16696336c7 100644
--- a/.github/workflows/python_package.yml
+++ b/.github/workflows/python_package.yml
@@ -15,6 +15,7 @@ concurrency:
 
 env:
   CMAKE_BUILD_PARALLEL_LEVEL: 4
+  SKBUILD_STRICT_CONFIG: true
 
 jobs:
   test:
diff --git a/.vsts-ci.yml b/.vsts-ci.yml
index 2b1cdaa4058c..169b04fffe13 100644
--- a/.vsts-ci.yml
+++ b/.vsts-ci.yml
@@ -15,6 +15,7 @@ variables:
   skipComponentGovernanceDetection: true
   DOTNET_CLI_TELEMETRY_OPTOUT: true
   DOTNET_SKIP_FIRST_TIME_EXPERIENCE: true
+  SKBUILD_STRICT_CONFIG: true
 resources:
   # The __work/ directory, where Azure DevOps writes the source files, needs to be read-write because
   # LightGBM's CI jobs write files in the source directory.
diff --git a/python-package/pyproject.toml b/python-package/pyproject.toml
index 3f016a841f80..39fc6d8f0566 100644
--- a/python-package/pyproject.toml
+++ b/python-package/pyproject.toml
@@ -79,7 +79,7 @@ logging.level = "INFO"
 sdist.reproducible = true
 wheel.py-api = "py3"
 experimental = false
-strict-config = true
+strict-config = false
 minimum-version = "0.9.3"
 
 # end:build-system

From 6fa5dc8f060e4a2de1171d7e09a3b8b38fda2f05 Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Fri, 21 Jun 2024 10:26:35 -0500
Subject: [PATCH 37/41] [ci] [docs] fix link-checks job (#6495)

---
 .github/workflows/linkchecker.yml | 1 +
 docs/FAQ.rst                      | 4 ++--
 docs/GPU-Windows.rst              | 4 ++--
 3 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/linkchecker.yml b/.github/workflows/linkchecker.yml
index 5055d5ca9399..42391bb31da0 100644
--- a/.github/workflows/linkchecker.yml
+++ b/.github/workflows/linkchecker.yml
@@ -8,6 +8,7 @@ on:
     - cron: '0 8 * * *'
 
 env:
+  COMPILER: gcc
   OS_NAME: 'linux'
   PYTHON_VERSION: '3.12'
   TASK: 'check-links'
diff --git a/docs/FAQ.rst b/docs/FAQ.rst
index 8f0024b45730..43141480cff5 100644
--- a/docs/FAQ.rst
+++ b/docs/FAQ.rst
@@ -149,7 +149,7 @@ and copy memory as required by creating new processes instead of forking (or, us
 
 Cloud platform container services may cause LightGBM to hang, if they use Linux fork to run multiple containers on a
 single instance. For example, LightGBM hangs in AWS Batch array jobs, which `use the ECS agent
-<https://aws.amazon.com/batch/faqs/#Features>`__ to manage multiple running jobs. Setting ``nthreads=1`` mitigates the issue.
+<https://aws.amazon.com/batch/faqs>`__ to manage multiple running jobs. Setting ``nthreads=1`` mitigates the issue.
 
 12. Why is early stopping not enabled by default in LightGBM?
 -------------------------------------------------------------
@@ -321,7 +321,7 @@ We are doing our best to provide universal wheels which have high running speed
 However, sometimes it's just impossible to guarantee the possibility of usage of LightGBM in any specific environment (see `Microsoft/LightGBM#1743 <https://github.com/microsoft/LightGBM/issues/1743>`__).
 
 Therefore, the first thing you should try in case of segfaults is **compiling from the source** using ``pip install --no-binary lightgbm lightgbm``.
-For the OS-specific prerequisites see `this guide <https://github.com/microsoft/LightGBM/blob/master/python-package/README.rst#user-content-build-from-sources>`__.
+For the OS-specific prerequisites see https://github.com/microsoft/LightGBM/blob/master/python-package/README.rst.
 
 Also, feel free to post a new issue in our GitHub repository. We always look at each case individually and try to find a root cause.
 
diff --git a/docs/GPU-Windows.rst b/docs/GPU-Windows.rst
index c4c2ca818433..8536f49d6731 100644
--- a/docs/GPU-Windows.rst
+++ b/docs/GPU-Windows.rst
@@ -602,9 +602,9 @@ And open an issue in GitHub `here`_ with that log.
 
 .. _Boost: https://www.boost.org/users/history/
 
-.. _Prebuilt Boost x86_64: https://www.rpmfind.net/linux/fedora/linux/releases/38/Everything/x86_64/os/Packages/m/mingw64-boost-static-1.78.0-4.fc38.noarch.rpm
+.. _Prebuilt Boost x86_64: https://www.rpmfind.net/linux/fedora/linux/releases/40/Everything/x86_64/os/Packages/m/mingw64-boost-static-1.78.0-9.fc40.noarch.rpm
 
-.. _Prebuilt Boost i686: https://www.rpmfind.net/linux/fedora/linux/releases/38/Everything/x86_64/os/Packages/m/mingw32-boost-static-1.78.0-4.fc38.noarch.rpm
+.. _Prebuilt Boost i686: https://www.rpmfind.net/linux/fedora/linux/releases/40/Everything/x86_64/os/Packages/m/mingw32-boost-static-1.78.0-9.fc40.noarch.rpm
 
 .. _7zip: https://www.7-zip.org/download.html
 

From c4e70f3ce1005eeb084e571020a5941f977b87f7 Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Fri, 21 Jun 2024 10:27:16 -0500
Subject: [PATCH 38/41] [ci] remove OpenMP / MKL workaround in CI jobs (#6496)

---
 .ci/test.sh | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/.ci/test.sh b/.ci/test.sh
index d950f01e989f..b73d7ea64189 100755
--- a/.ci/test.sh
+++ b/.ci/test.sh
@@ -163,11 +163,6 @@ source activate $CONDA_ENV
 
 cd $BUILD_DIRECTORY
 
-if [[ $OS_NAME == "macos" ]] && [[ $COMPILER == "clang" ]]; then
-    # fix "OMP: Error #15: Initializing libiomp5.dylib, but found libomp.dylib already initialized." (OpenMP library conflict due to conda's MKL)
-    for LIBOMP_ALIAS in libgomp.dylib libiomp5.dylib libomp.dylib; do sudo ln -sf "$(brew --cellar libomp)"/*/lib/libomp.dylib $CONDA_PREFIX/lib/$LIBOMP_ALIAS || exit 1; done
-fi
-
 if [[ $TASK == "sdist" ]]; then
     cd $BUILD_DIRECTORY && sh ./build-python.sh sdist || exit 1
     sh $BUILD_DIRECTORY/.ci/check_python_dists.sh $BUILD_DIRECTORY/dist || exit 1

From fd8f79944955cf83fcefb9ed598b9e0bcd07ad09 Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Fri, 21 Jun 2024 10:28:47 -0500
Subject: [PATCH 39/41] [docs] [R-package] update cran-comments.md (#6499)

---
 R-package/cran-comments.md | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/R-package/cran-comments.md b/R-package/cran-comments.md
index 137df4912c04..32916a0c827e 100644
--- a/R-package/cran-comments.md
+++ b/R-package/cran-comments.md
@@ -1,11 +1,15 @@
 # CRAN Submission History
 
-## v4.4.0 - Submission 1 - (TBD)
+## v4.4.0 - Submission 1 - (June 14, 2024)
 
 ### CRAN response
 
+Accepted to CRAN
+
 ### Maintainer Notes
 
+This was a standard release of `{lightgbm}`, not intended to fix any particular R-specific issues.
+
 ## v4.3.0 - Submission 1 - (January 18, 2024)
 
 ### CRAN response

From bf2641d8cb37abf9db6eba760b3d11f3fb43c873 Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Fri, 21 Jun 2024 12:16:41 -0500
Subject: [PATCH 40/41] [ci] reduce repetition of BUILD_DIRECTORY in CI scripts
 (#6497)

---
 .ci/test.sh           | 145 ++++++++++++++++++++++--------------------
 .ci/test_r_package.sh |   4 +-
 .ci/test_windows.ps1  |  25 ++++----
 3 files changed, 88 insertions(+), 86 deletions(-)

diff --git a/.ci/test.sh b/.ci/test.sh
index b73d7ea64189..6e862ba0248f 100755
--- a/.ci/test.sh
+++ b/.ci/test.sh
@@ -11,7 +11,7 @@ SANITIZERS=${SANITIZERS:-""}
 
 ARCH=$(uname -m)
 
-LGB_VER=$(head -n 1 ${BUILD_DIRECTORY}/VERSION.txt)
+LGB_VER=$(head -n 1 "${BUILD_DIRECTORY}/VERSION.txt")
 
 if [[ $OS_NAME == "macos" ]] && [[ $COMPILER == "gcc" ]]; then
     export CXX=g++-11
@@ -43,7 +43,7 @@ else
 fi
 
 if [[ "${TASK}" == "r-package" ]] || [[ "${TASK}" == "r-rchk" ]]; then
-    bash ${BUILD_DIRECTORY}/.ci/test_r_package.sh || exit 1
+    bash "${BUILD_DIRECTORY}/.ci/test_r_package.sh" || exit 1
     exit 0
 fi
 
@@ -70,27 +70,31 @@ if [[ $TASK == "if-else" ]]; then
     source activate $CONDA_ENV
     cmake -B build -S . || exit 1
     cmake --build build --target lightgbm -j4 || exit 1
-    cd $BUILD_DIRECTORY/tests/cpp_tests && ../../lightgbm config=train.conf convert_model_language=cpp convert_model=../../src/boosting/gbdt_prediction.cpp && ../../lightgbm config=predict.conf output_result=origin.pred || exit 1
-    cd $BUILD_DIRECTORY/tests/cpp_tests && ../../lightgbm config=predict.conf output_result=ifelse.pred && python test.py || exit 1
+    cd "$BUILD_DIRECTORY/tests/cpp_tests"
+    ../../lightgbm config=train.conf convert_model_language=cpp convert_model=../../src/boosting/gbdt_prediction.cpp
+    ../../lightgbm config=predict.conf output_result=origin.pred
+    ../../lightgbm config=predict.conf output_result=ifelse.pred
+    python test.py
     exit 0
 fi
 
+cd "${BUILD_DIRECTORY}"
+
 if [[ $TASK == "swig" ]]; then
     cmake -B build -S . -DUSE_SWIG=ON
     cmake --build build -j4 || exit 1
     if [[ $OS_NAME == "linux" ]] && [[ $COMPILER == "gcc" ]]; then
-        objdump -T $BUILD_DIRECTORY/lib_lightgbm.so > $BUILD_DIRECTORY/objdump.log || exit 1
-        objdump -T $BUILD_DIRECTORY/lib_lightgbm_swig.so >> $BUILD_DIRECTORY/objdump.log || exit 1
-        python $BUILD_DIRECTORY/helpers/check_dynamic_dependencies.py $BUILD_DIRECTORY/objdump.log || exit 1
+        objdump -T ./lib_lightgbm.so > ./objdump.log || exit 1
+        objdump -T ./lib_lightgbm_swig.so >> ./objdump.log || exit 1
+        python ./helpers/check_dynamic_dependencies.py ./objdump.log || exit 1
     fi
     if [[ $PRODUCES_ARTIFACTS == "true" ]]; then
-        cp $BUILD_DIRECTORY/build/lightgbmlib.jar $BUILD_ARTIFACTSTAGINGDIRECTORY/lightgbmlib_$OS_NAME.jar
+        cp ./build/lightgbmlib.jar $BUILD_ARTIFACTSTAGINGDIRECTORY/lightgbmlib_$OS_NAME.jar
     fi
     exit 0
 fi
 
 if [[ $TASK == "lint" ]]; then
-    cd ${BUILD_DIRECTORY}
     mamba create -q -y -n $CONDA_ENV \
         ${CONDA_PYTHON_REQUIREMENT} \
         'cmakelint>=1.4.2' \
@@ -102,16 +106,16 @@ if [[ $TASK == "lint" ]]; then
         'r-lintr>=3.1.2'
     source activate $CONDA_ENV
     echo "Linting Python code"
-    bash ${BUILD_DIRECTORY}/.ci/lint-python.sh || exit 1
+    bash ./.ci/lint-python.sh || exit 1
     echo "Linting R code"
-    Rscript ${BUILD_DIRECTORY}/.ci/lint_r_code.R ${BUILD_DIRECTORY} || exit 1
+    Rscript ./.ci/lint_r_code.R "${BUILD_DIRECTORY}" || exit 1
     echo "Linting C++ code"
-    bash ${BUILD_DIRECTORY}/.ci/lint-cpp.sh || exit 1
+    bash ./.ci/lint-cpp.sh || exit 1
     exit 0
 fi
 
 if [[ $TASK == "check-docs" ]] || [[ $TASK == "check-links" ]]; then
-    cd $BUILD_DIRECTORY/docs
+    cd "${BUILD_DIRECTORY}/docs"
     mamba env create \
         -n $CONDA_ENV \
         --file ./env.yml || exit 1
@@ -123,9 +127,9 @@ if [[ $TASK == "check-docs" ]] || [[ $TASK == "check-links" ]]; then
             'rstcheck>=6.2.0' || exit 1
     source activate $CONDA_ENV
     # check reStructuredText formatting
-    cd $BUILD_DIRECTORY/python-package
+    cd "${BUILD_DIRECTORY}/python-package"
     rstcheck --report-level warning $(find . -type f -name "*.rst") || exit 1
-    cd $BUILD_DIRECTORY/docs
+    cd "${BUILD_DIRECTORY}/docs"
     rstcheck --report-level warning --ignore-directives=autoclass,autofunction,autosummary,doxygenfile $(find . -type f -name "*.rst") || exit 1
     # build docs
     make html || exit 1
@@ -136,11 +140,12 @@ if [[ $TASK == "check-docs" ]] || [[ $TASK == "check-links" ]]; then
         exit 0
     fi
     # check the consistency of parameters' descriptions and other stuff
-    cp $BUILD_DIRECTORY/docs/Parameters.rst $BUILD_DIRECTORY/docs/Parameters-backup.rst
-    cp $BUILD_DIRECTORY/src/io/config_auto.cpp $BUILD_DIRECTORY/src/io/config_auto-backup.cpp
-    python $BUILD_DIRECTORY/helpers/parameter_generator.py || exit 1
-    diff $BUILD_DIRECTORY/docs/Parameters-backup.rst $BUILD_DIRECTORY/docs/Parameters.rst || exit 1
-    diff $BUILD_DIRECTORY/src/io/config_auto-backup.cpp $BUILD_DIRECTORY/src/io/config_auto.cpp || exit 1
+    cd "${BUILD_DIRECTORY}"
+    cp ./docs/Parameters.rst ./docs/Parameters-backup.rst
+    cp ./src/io/config_auto.cpp ./src/io/config_auto-backup.cpp
+    python ./helpers/parameter_generator.py || exit 1
+    diff ./docs/Parameters-backup.rst ./docs/Parameters.rst || exit 1
+    diff ./src/io/config_auto-backup.cpp ./src/io/config_auto.cpp || exit 1
     exit 0
 fi
 
@@ -161,21 +166,21 @@ mamba create \
 
 source activate $CONDA_ENV
 
-cd $BUILD_DIRECTORY
+cd "${BUILD_DIRECTORY}"
 
 if [[ $TASK == "sdist" ]]; then
-    cd $BUILD_DIRECTORY && sh ./build-python.sh sdist || exit 1
-    sh $BUILD_DIRECTORY/.ci/check_python_dists.sh $BUILD_DIRECTORY/dist || exit 1
-    pip install $BUILD_DIRECTORY/dist/lightgbm-$LGB_VER.tar.gz -v || exit 1
+    sh ./build-python.sh sdist || exit 1
+    sh .ci/check_python_dists.sh ./dist || exit 1
+    pip install ./dist/lightgbm-$LGB_VER.tar.gz -v || exit 1
     if [[ $PRODUCES_ARTIFACTS == "true" ]]; then
-        cp $BUILD_DIRECTORY/dist/lightgbm-$LGB_VER.tar.gz $BUILD_ARTIFACTSTAGINGDIRECTORY || exit 1
+        cp ./dist/lightgbm-$LGB_VER.tar.gz $BUILD_ARTIFACTSTAGINGDIRECTORY || exit 1
     fi
-    pytest $BUILD_DIRECTORY/tests/python_package_test || exit 1
+    pytest ./tests/python_package_test || exit 1
     exit 0
 elif [[ $TASK == "bdist" ]]; then
     if [[ $OS_NAME == "macos" ]]; then
-        cd $BUILD_DIRECTORY && sh ./build-python.sh bdist_wheel || exit 1
-        sh $BUILD_DIRECTORY/.ci/check_python_dists.sh $BUILD_DIRECTORY/dist || exit 1
+        sh ./build-python.sh bdist_wheel || exit 1
+        sh .ci/check_python_dists.sh ./dist || exit 1
         if [[ $PRODUCES_ARTIFACTS == "true" ]]; then
             cp dist/lightgbm-$LGB_VER-py3-none-macosx*.whl $BUILD_ARTIFACTSTAGINGDIRECTORY || exit 1
         fi
@@ -185,88 +190,88 @@ elif [[ $TASK == "bdist" ]]; then
         else
             PLATFORM="manylinux2014_$ARCH"
         fi
-        cd $BUILD_DIRECTORY && sh ./build-python.sh bdist_wheel --integrated-opencl || exit 1
+        sh ./build-python.sh bdist_wheel --integrated-opencl || exit 1
         mv \
             ./dist/*.whl \
             ./dist/tmp.whl || exit 1
         mv \
             ./dist/tmp.whl \
             ./dist/lightgbm-$LGB_VER-py3-none-$PLATFORM.whl || exit 1
-        sh $BUILD_DIRECTORY/.ci/check_python_dists.sh $BUILD_DIRECTORY/dist || exit 1
+        sh .ci/check_python_dists.sh ./dist || exit 1
         if [[ $PRODUCES_ARTIFACTS == "true" ]]; then
             cp dist/lightgbm-$LGB_VER-py3-none-$PLATFORM.whl $BUILD_ARTIFACTSTAGINGDIRECTORY || exit 1
         fi
         # Make sure we can do both CPU and GPU; see tests/python_package_test/test_dual.py
         export LIGHTGBM_TEST_DUAL_CPU_GPU=1
     fi
-    pip install -v $BUILD_DIRECTORY/dist/*.whl || exit 1
-    pytest $BUILD_DIRECTORY/tests || exit 1
+    pip install -v ./dist/*.whl || exit 1
+    pytest ./tests || exit 1
     exit 0
 fi
 
 if [[ $TASK == "gpu" ]]; then
-    sed -i'.bak' 's/std::string device_type = "cpu";/std::string device_type = "gpu";/' $BUILD_DIRECTORY/include/LightGBM/config.h
-    grep -q 'std::string device_type = "gpu"' $BUILD_DIRECTORY/include/LightGBM/config.h || exit 1  # make sure that changes were really done
+    sed -i'.bak' 's/std::string device_type = "cpu";/std::string device_type = "gpu";/' ./include/LightGBM/config.h
+    grep -q 'std::string device_type = "gpu"' ./include/LightGBM/config.h || exit 1  # make sure that changes were really done
     if [[ $METHOD == "pip" ]]; then
-        cd $BUILD_DIRECTORY && sh ./build-python.sh sdist || exit 1
-        sh $BUILD_DIRECTORY/.ci/check_python_dists.sh $BUILD_DIRECTORY/dist || exit 1
+        sh ./build-python.sh sdist || exit 1
+        sh .ci/check_python_dists.sh ./dist || exit 1
         pip install \
             -v \
             --config-settings=cmake.define.USE_GPU=ON \
-            $BUILD_DIRECTORY/dist/lightgbm-$LGB_VER.tar.gz \
+            ./dist/lightgbm-$LGB_VER.tar.gz \
         || exit 1
-        pytest $BUILD_DIRECTORY/tests/python_package_test || exit 1
+        pytest ./tests/python_package_test || exit 1
         exit 0
     elif [[ $METHOD == "wheel" ]]; then
-        cd $BUILD_DIRECTORY && sh ./build-python.sh bdist_wheel --gpu || exit 1
-        sh $BUILD_DIRECTORY/.ci/check_python_dists.sh $BUILD_DIRECTORY/dist || exit 1
-        pip install $BUILD_DIRECTORY/dist/lightgbm-$LGB_VER*.whl -v || exit 1
-        pytest $BUILD_DIRECTORY/tests || exit 1
+        sh ./build-python.sh bdist_wheel --gpu || exit 1
+        sh ./.ci/check_python_dists.sh ./dist || exit 1
+        pip install ./dist/lightgbm-$LGB_VER*.whl -v || exit 1
+        pytest ./tests || exit 1
         exit 0
     elif [[ $METHOD == "source" ]]; then
         cmake -B build -S . -DUSE_GPU=ON
     fi
 elif [[ $TASK == "cuda" ]]; then
-    sed -i'.bak' 's/std::string device_type = "cpu";/std::string device_type = "cuda";/' $BUILD_DIRECTORY/include/LightGBM/config.h
-    grep -q 'std::string device_type = "cuda"' $BUILD_DIRECTORY/include/LightGBM/config.h || exit 1  # make sure that changes were really done
+    sed -i'.bak' 's/std::string device_type = "cpu";/std::string device_type = "cuda";/' ./include/LightGBM/config.h
+    grep -q 'std::string device_type = "cuda"' ./include/LightGBM/config.h || exit 1  # make sure that changes were really done
     # by default ``gpu_use_dp=false`` for efficiency. change to ``true`` here for exact results in ci tests
-    sed -i'.bak' 's/gpu_use_dp = false;/gpu_use_dp = true;/' $BUILD_DIRECTORY/include/LightGBM/config.h
-    grep -q 'gpu_use_dp = true' $BUILD_DIRECTORY/include/LightGBM/config.h || exit 1  # make sure that changes were really done
+    sed -i'.bak' 's/gpu_use_dp = false;/gpu_use_dp = true;/' ./include/LightGBM/config.h
+    grep -q 'gpu_use_dp = true' ./include/LightGBM/config.h || exit 1  # make sure that changes were really done
     if [[ $METHOD == "pip" ]]; then
-        cd $BUILD_DIRECTORY && sh ./build-python.sh sdist || exit 1
-        sh $BUILD_DIRECTORY/.ci/check_python_dists.sh $BUILD_DIRECTORY/dist || exit 1
+        sh ./build-python.sh sdist || exit 1
+        sh ./.ci/check_python_dists.sh ./dist || exit 1
         pip install \
             -v \
             --config-settings=cmake.define.USE_CUDA=ON \
-            $BUILD_DIRECTORY/dist/lightgbm-$LGB_VER.tar.gz \
+            ./dist/lightgbm-$LGB_VER.tar.gz \
         || exit 1
-        pytest $BUILD_DIRECTORY/tests/python_package_test || exit 1
+        pytest ./tests/python_package_test || exit 1
         exit 0
     elif [[ $METHOD == "wheel" ]]; then
-        cd $BUILD_DIRECTORY && sh ./build-python.sh bdist_wheel --cuda || exit 1
-        sh $BUILD_DIRECTORY/.ci/check_python_dists.sh $BUILD_DIRECTORY/dist || exit 1
-        pip install $BUILD_DIRECTORY/dist/lightgbm-$LGB_VER*.whl -v || exit 1
-        pytest $BUILD_DIRECTORY/tests || exit 1
+        sh ./build-python.sh bdist_wheel --cuda || exit 1
+        sh ./.ci/check_python_dists.sh ./dist || exit 1
+        pip install ./dist/lightgbm-$LGB_VER*.whl -v || exit 1
+        pytest ./tests || exit 1
         exit 0
     elif [[ $METHOD == "source" ]]; then
         cmake -B build -S . -DUSE_CUDA=ON
     fi
 elif [[ $TASK == "mpi" ]]; then
     if [[ $METHOD == "pip" ]]; then
-        cd $BUILD_DIRECTORY && sh ./build-python.sh sdist || exit 1
-        sh $BUILD_DIRECTORY/.ci/check_python_dists.sh $BUILD_DIRECTORY/dist || exit 1
+        sh ./build-python.sh sdist || exit 1
+        sh ./.ci/check_python_dists.sh ./dist || exit 1
         pip install \
             -v \
             --config-settings=cmake.define.USE_MPI=ON \
-            $BUILD_DIRECTORY/dist/lightgbm-$LGB_VER.tar.gz \
+            ./dist/lightgbm-$LGB_VER.tar.gz \
         || exit 1
-        pytest $BUILD_DIRECTORY/tests/python_package_test || exit 1
+        pytest ./tests/python_package_test || exit 1
         exit 0
     elif [[ $METHOD == "wheel" ]]; then
-        cd $BUILD_DIRECTORY && sh ./build-python.sh bdist_wheel --mpi || exit 1
-        sh $BUILD_DIRECTORY/.ci/check_python_dists.sh $BUILD_DIRECTORY/dist || exit 1
-        pip install $BUILD_DIRECTORY/dist/lightgbm-$LGB_VER*.whl -v || exit 1
-        pytest $BUILD_DIRECTORY/tests || exit 1
+        sh ./build-python.sh bdist_wheel --mpi || exit 1
+        sh ./.ci/check_python_dists.sh ./dist || exit 1
+        pip install ./dist/lightgbm-$LGB_VER*.whl -v || exit 1
+        pytest ./tests || exit 1
         exit 0
     elif [[ $METHOD == "source" ]]; then
         cmake -B build -S . -DUSE_MPI=ON -DUSE_DEBUG=ON
@@ -277,22 +282,22 @@ fi
 
 cmake --build build --target _lightgbm -j4 || exit 1
 
-cd $BUILD_DIRECTORY && sh ./build-python.sh install --precompile || exit 1
-pytest $BUILD_DIRECTORY/tests || exit 1
+sh ./build-python.sh install --precompile || exit 1
+pytest ./tests || exit 1
 
 if [[ $TASK == "regular" ]]; then
     if [[ $PRODUCES_ARTIFACTS == "true" ]]; then
         if [[ $OS_NAME == "macos" ]]; then
-            cp $BUILD_DIRECTORY/lib_lightgbm.dylib $BUILD_ARTIFACTSTAGINGDIRECTORY/lib_lightgbm.dylib
+            cp ./lib_lightgbm.dylib $BUILD_ARTIFACTSTAGINGDIRECTORY/lib_lightgbm.dylib
         else
             if [[ $COMPILER == "gcc" ]]; then
-                objdump -T $BUILD_DIRECTORY/lib_lightgbm.so > $BUILD_DIRECTORY/objdump.log || exit 1
-                python $BUILD_DIRECTORY/helpers/check_dynamic_dependencies.py $BUILD_DIRECTORY/objdump.log || exit 1
+                objdump -T ./lib_lightgbm.so > ./objdump.log || exit 1
+                python ./helpers/check_dynamic_dependencies.py ./objdump.log || exit 1
             fi
-            cp $BUILD_DIRECTORY/lib_lightgbm.so $BUILD_ARTIFACTSTAGINGDIRECTORY/lib_lightgbm.so
+            cp ./lib_lightgbm.so $BUILD_ARTIFACTSTAGINGDIRECTORY/lib_lightgbm.so
         fi
     fi
-    cd $BUILD_DIRECTORY/examples/python-guide
+    cd "$BUILD_DIRECTORY/examples/python-guide"
     sed -i'.bak' '/import lightgbm as lgb/a\
 import matplotlib\
 matplotlib.use\(\"Agg\"\)\
@@ -304,7 +309,7 @@ matplotlib.use\(\"Agg\"\)\
         'ipywidgets>=8.1.2' \
         'notebook>=7.1.2'
     for f in *.py **/*.py; do python $f || exit 1; done  # run all examples
-    cd $BUILD_DIRECTORY/examples/python-guide/notebooks
+    cd "$BUILD_DIRECTORY/examples/python-guide/notebooks"
     sed -i'.bak' 's/INTERACTIVE = False/assert False, \\"Interactive mode disabled\\"/' interactive_plot_example.ipynb
     jupyter nbconvert --ExecutePreprocessor.timeout=180 --to notebook --execute --inplace *.ipynb || exit 1  # run all notebooks
 
diff --git a/.ci/test_r_package.sh b/.ci/test_r_package.sh
index 0ed581b9bf75..6c4ac4b73833 100755
--- a/.ci/test_r_package.sh
+++ b/.ci/test_r_package.sh
@@ -136,7 +136,7 @@ if [[ $OS_NAME == "macos" ]]; then
 fi
 Rscript --vanilla -e "options(install.packages.compile.from.source = '${compile_from_source}'); install.packages(${packages}, repos = '${CRAN_MIRROR}', lib = '${R_LIB_PATH}', dependencies = c('Depends', 'Imports', 'LinkingTo'), Ncpus = parallel::detectCores())" || exit 1
 
-cd ${BUILD_DIRECTORY}
+cd "${BUILD_DIRECTORY}"
 
 PKG_TARBALL="lightgbm_*.tar.gz"
 LOG_FILE_NAME="lightgbm.Rcheck/00check.log"
@@ -147,7 +147,7 @@ elif [[ $R_BUILD_TYPE == "cran" ]]; then
     # on Linux, we recreate configure in CI to test if
     # a change in a PR has changed configure.ac
     if [[ $OS_NAME == "linux" ]]; then
-        ${BUILD_DIRECTORY}/R-package/recreate-configure.sh
+        ./R-package/recreate-configure.sh
 
         num_files_changed=$(
             git diff --name-only | wc -l
diff --git a/.ci/test_windows.ps1 b/.ci/test_windows.ps1
index 8ee20a245b38..3a130e1af6da 100644
--- a/.ci/test_windows.ps1
+++ b/.ci/test_windows.ps1
@@ -10,7 +10,7 @@ $env:CONDA_ENV = "test-env"
 $env:LGB_VER = (Get-Content $env:BUILD_SOURCESDIRECTORY\VERSION.txt).trim()
 
 if ($env:TASK -eq "r-package") {
-  & $env:BUILD_SOURCESDIRECTORY\.ci\test_r_package_windows.ps1 ; Check-Output $?
+  & .\.ci\test_r_package_windows.ps1 ; Check-Output $?
   Exit 0
 }
 
@@ -31,7 +31,7 @@ if ($env:TASK -eq "swig") {
   cmake -B build -S . -A x64 -DUSE_SWIG=ON ; Check-Output $?
   cmake --build build --target ALL_BUILD --config Release ; Check-Output $?
   if ($env:AZURE -eq "true") {
-    cp $env:BUILD_SOURCESDIRECTORY/build/lightgbmlib.jar $env:BUILD_ARTIFACTSTAGINGDIRECTORY/lightgbmlib_win.jar ; Check-Output $?
+    cp ./build/lightgbmlib.jar $env:BUILD_ARTIFACTSTAGINGDIRECTORY/lightgbmlib_win.jar ; Check-Output $?
   }
   Exit 0
 }
@@ -60,18 +60,17 @@ if ($env:TASK -ne "bdist") {
   conda activate $env:CONDA_ENV
 }
 
+cd $env:BUILD_SOURCESDIRECTORY
 if ($env:TASK -eq "regular") {
   cmake -B build -S . -A x64 ; Check-Output $?
   cmake --build build --target ALL_BUILD --config Release ; Check-Output $?
-  cd $env:BUILD_SOURCESDIRECTORY
-  sh $env:BUILD_SOURCESDIRECTORY/build-python.sh install --precompile ; Check-Output $?
-  cp $env:BUILD_SOURCESDIRECTORY/Release/lib_lightgbm.dll $env:BUILD_ARTIFACTSTAGINGDIRECTORY
-  cp $env:BUILD_SOURCESDIRECTORY/Release/lightgbm.exe $env:BUILD_ARTIFACTSTAGINGDIRECTORY
+  sh ./build-python.sh install --precompile ; Check-Output $?
+  cp ./Release/lib_lightgbm.dll $env:BUILD_ARTIFACTSTAGINGDIRECTORY
+  cp ./Release/lightgbm.exe $env:BUILD_ARTIFACTSTAGINGDIRECTORY
 }
 elseif ($env:TASK -eq "sdist") {
-  cd $env:BUILD_SOURCESDIRECTORY
-  sh $env:BUILD_SOURCESDIRECTORY/build-python.sh sdist ; Check-Output $?
-  sh $env:BUILD_SOURCESDIRECTORY/.ci/check_python_dists.sh $env:BUILD_SOURCESDIRECTORY/dist ; Check-Output $?
+  sh ./build-python.sh sdist ; Check-Output $?
+  sh ./.ci/check_python_dists.sh ./dist ; Check-Output $?
   cd dist; pip install @(Get-ChildItem *.gz) -v ; Check-Output $?
 }
 elseif ($env:TASK -eq "bdist") {
@@ -85,17 +84,15 @@ elseif ($env:TASK -eq "bdist") {
   Get-ItemProperty -Path Registry::HKEY_LOCAL_MACHINE\SOFTWARE\Khronos\OpenCL\Vendors
 
   conda activate $env:CONDA_ENV
-  cd $env:BUILD_SOURCESDIRECTORY
   sh "build-python.sh" bdist_wheel --integrated-opencl ; Check-Output $?
-  sh $env:BUILD_SOURCESDIRECTORY/.ci/check_python_dists.sh $env:BUILD_SOURCESDIRECTORY/dist ; Check-Output $?
+  sh ./.ci/check_python_dists.sh ./dist ; Check-Output $?
   cd dist; pip install @(Get-ChildItem *py3-none-win_amd64.whl) ; Check-Output $?
   cp @(Get-ChildItem *py3-none-win_amd64.whl) $env:BUILD_ARTIFACTSTAGINGDIRECTORY
 } elseif (($env:APPVEYOR -eq "true") -and ($env:TASK -eq "python")) {
-  cd $env:BUILD_SOURCESDIRECTORY
   if ($env:COMPILER -eq "MINGW") {
-    sh $env:BUILD_SOURCESDIRECTORY/build-python.sh install --mingw ; Check-Output $?
+    sh ./build-python.sh install --mingw ; Check-Output $?
   } else {
-    sh $env:BUILD_SOURCESDIRECTORY/build-python.sh install; Check-Output $?
+    sh ./build-python.sh install; Check-Output $?
   }
 }
 

From e9a6c798071632f0e9bdf4c5fa2fdf16bfd5eb36 Mon Sep 17 00:00:00 2001
From: david-cortes <david.cortes.rivera@gmail.com>
Date: Fri, 21 Jun 2024 20:14:28 +0200
Subject: [PATCH 41/41] [R-package] Use `Rf_`-prefixed versions of
 protect/unprotect (#6502)

---
 R-package/src/lightgbm_R.cpp | 188 +++++++++++++++++------------------
 1 file changed, 94 insertions(+), 94 deletions(-)

diff --git a/R-package/src/lightgbm_R.cpp b/R-package/src/lightgbm_R.cpp
index 4bef9cefe1e9..e8383c5c366e 100644
--- a/R-package/src/lightgbm_R.cpp
+++ b/R-package/src/lightgbm_R.cpp
@@ -53,24 +53,24 @@ void delete_cpp_char_vec(SEXP R_ptr) {
 #ifndef LGB_NO_ALTREP
 SEXP make_altrepped_raw_vec(void *void_ptr) {
   std::unique_ptr<std::vector<char>> *ptr_to_cpp_vec = static_cast<std::unique_ptr<std::vector<char>>*>(void_ptr);
-  SEXP R_ptr = PROTECT(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
-  SEXP R_raw = PROTECT(R_new_altrep(lgb_altrepped_char_vec, R_NilValue, R_NilValue));
+  SEXP R_ptr = Rf_protect(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
+  SEXP R_raw = Rf_protect(R_new_altrep(lgb_altrepped_char_vec, R_NilValue, R_NilValue));
 
   R_SetExternalPtrAddr(R_ptr, ptr_to_cpp_vec->get());
   R_RegisterCFinalizerEx(R_ptr, delete_cpp_char_vec, TRUE);
   ptr_to_cpp_vec->release();
 
   R_set_altrep_data1(R_raw, R_ptr);
-  UNPROTECT(2);
+  Rf_unprotect(2);
   return R_raw;
 }
 #else
 SEXP make_r_raw_vec(void *void_ptr) {
   std::unique_ptr<std::vector<char>> *ptr_to_cpp_vec = static_cast<std::unique_ptr<std::vector<char>>*>(void_ptr);
   R_xlen_t len = ptr_to_cpp_vec->get()->size();
-  SEXP out = PROTECT(Rf_allocVector(RAWSXP, len));
+  SEXP out = Rf_protect(Rf_allocVector(RAWSXP, len));
   std::copy(ptr_to_cpp_vec->get()->begin(), ptr_to_cpp_vec->get()->end(), reinterpret_cast<char*>(RAW(out)));
-  UNPROTECT(1);
+  Rf_unprotect(1);
   return out;
 }
 #define make_altrepped_raw_vec make_r_raw_vec
@@ -132,9 +132,9 @@ template <class T>
 SEXP make_altrepped_vec_from_arr(void *void_ptr) {
   T *arr = static_cast<arr_and_len<T>*>(void_ptr)->arr;
   uint64_t len = static_cast<arr_and_len<T>*>(void_ptr)->len;
-  SEXP R_ptr = PROTECT(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
-  SEXP R_len = PROTECT(Rf_allocVector(REALSXP, 1));
-  SEXP R_vec = PROTECT(R_new_altrep(get_altrep_class_for_type<T>(), R_NilValue, R_NilValue));
+  SEXP R_ptr = Rf_protect(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
+  SEXP R_len = Rf_protect(Rf_allocVector(REALSXP, 1));
+  SEXP R_vec = Rf_protect(R_new_altrep(get_altrep_class_for_type<T>(), R_NilValue, R_NilValue));
 
   REAL(R_len)[0] = static_cast<double>(len);
   R_SetExternalPtrAddr(R_ptr, arr);
@@ -142,7 +142,7 @@ SEXP make_altrepped_vec_from_arr(void *void_ptr) {
 
   R_set_altrep_data1(R_vec, R_ptr);
   R_set_altrep_data2(R_vec, R_len);
-  UNPROTECT(3);
+  Rf_unprotect(3);
   return R_vec;
 }
 #else
@@ -150,9 +150,9 @@ template <class T>
 SEXP make_R_vec_from_arr(void *void_ptr) {
   T *arr = static_cast<arr_and_len<T>*>(void_ptr)->arr;
   uint64_t len = static_cast<arr_and_len<T>*>(void_ptr)->len;
-  SEXP out = PROTECT(Rf_allocVector(get_sexptype_class_for_type<T>(), len));
+  SEXP out = Rf_protect(Rf_allocVector(get_sexptype_class_for_type<T>(), len));
   std::copy(arr, arr + len, get_r_vec_ptr<T>(out));
-  UNPROTECT(1);
+  Rf_unprotect(1);
   return out;
 }
 #define make_altrepped_vec_from_arr make_R_vec_from_arr
@@ -288,18 +288,18 @@ SEXP LGBM_DatasetCreateFromFile_R(SEXP filename,
   SEXP parameters,
   SEXP reference) {
   R_API_BEGIN();
-  SEXP ret = PROTECT(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
+  SEXP ret = Rf_protect(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
   DatasetHandle handle = nullptr;
   DatasetHandle ref = nullptr;
   if (!Rf_isNull(reference)) {
     ref = R_ExternalPtrAddr(reference);
   }
-  const char* filename_ptr = CHAR(PROTECT(Rf_asChar(filename)));
-  const char* parameters_ptr = CHAR(PROTECT(Rf_asChar(parameters)));
+  const char* filename_ptr = CHAR(Rf_protect(Rf_asChar(filename)));
+  const char* parameters_ptr = CHAR(Rf_protect(Rf_asChar(parameters)));
   CHECK_CALL(LGBM_DatasetCreateFromFile(filename_ptr, parameters_ptr, ref, &handle));
   R_SetExternalPtrAddr(ret, handle);
   R_RegisterCFinalizerEx(ret, _DatasetFinalizer, TRUE);
-  UNPROTECT(3);
+  Rf_unprotect(3);
   return ret;
   R_API_END();
 }
@@ -313,14 +313,14 @@ SEXP LGBM_DatasetCreateFromCSC_R(SEXP indptr,
   SEXP parameters,
   SEXP reference) {
   R_API_BEGIN();
-  SEXP ret = PROTECT(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
+  SEXP ret = Rf_protect(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
   const int* p_indptr = INTEGER(indptr);
   const int* p_indices = INTEGER(indices);
   const double* p_data = REAL(data);
   int64_t nindptr = static_cast<int64_t>(Rf_asInteger(num_indptr));
   int64_t ndata = static_cast<int64_t>(Rf_asInteger(nelem));
   int64_t nrow = static_cast<int64_t>(Rf_asInteger(num_row));
-  const char* parameters_ptr = CHAR(PROTECT(Rf_asChar(parameters)));
+  const char* parameters_ptr = CHAR(Rf_protect(Rf_asChar(parameters)));
   DatasetHandle handle = nullptr;
   DatasetHandle ref = nullptr;
   if (!Rf_isNull(reference)) {
@@ -331,7 +331,7 @@ SEXP LGBM_DatasetCreateFromCSC_R(SEXP indptr,
     nrow, parameters_ptr, ref, &handle));
   R_SetExternalPtrAddr(ret, handle);
   R_RegisterCFinalizerEx(ret, _DatasetFinalizer, TRUE);
-  UNPROTECT(2);
+  Rf_unprotect(2);
   return ret;
   R_API_END();
 }
@@ -342,11 +342,11 @@ SEXP LGBM_DatasetCreateFromMat_R(SEXP data,
   SEXP parameters,
   SEXP reference) {
   R_API_BEGIN();
-  SEXP ret = PROTECT(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
+  SEXP ret = Rf_protect(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
   int32_t nrow = static_cast<int32_t>(Rf_asInteger(num_row));
   int32_t ncol = static_cast<int32_t>(Rf_asInteger(num_col));
   double* p_mat = REAL(data);
-  const char* parameters_ptr = CHAR(PROTECT(Rf_asChar(parameters)));
+  const char* parameters_ptr = CHAR(Rf_protect(Rf_asChar(parameters)));
   DatasetHandle handle = nullptr;
   DatasetHandle ref = nullptr;
   if (!Rf_isNull(reference)) {
@@ -356,7 +356,7 @@ SEXP LGBM_DatasetCreateFromMat_R(SEXP data,
     parameters_ptr, ref, &handle));
   R_SetExternalPtrAddr(ret, handle);
   R_RegisterCFinalizerEx(ret, _DatasetFinalizer, TRUE);
-  UNPROTECT(2);
+  Rf_unprotect(2);
   return ret;
   R_API_END();
 }
@@ -367,7 +367,7 @@ SEXP LGBM_DatasetGetSubset_R(SEXP handle,
   SEXP parameters) {
   R_API_BEGIN();
   _AssertDatasetHandleNotNull(handle);
-  SEXP ret = PROTECT(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
+  SEXP ret = Rf_protect(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
   int32_t len = static_cast<int32_t>(Rf_asInteger(len_used_row_indices));
   std::unique_ptr<int32_t[]> idxvec(new int32_t[len]);
   // convert from one-based to zero-based index
@@ -378,14 +378,14 @@ SEXP LGBM_DatasetGetSubset_R(SEXP handle,
   for (int32_t i = 0; i < len; ++i) {
     idxvec[i] = static_cast<int32_t>(used_row_indices_[i] - 1);
   }
-  const char* parameters_ptr = CHAR(PROTECT(Rf_asChar(parameters)));
+  const char* parameters_ptr = CHAR(Rf_protect(Rf_asChar(parameters)));
   DatasetHandle res = nullptr;
   CHECK_CALL(LGBM_DatasetGetSubset(R_ExternalPtrAddr(handle),
     idxvec.get(), len, parameters_ptr,
     &res));
   R_SetExternalPtrAddr(ret, res);
   R_RegisterCFinalizerEx(ret, _DatasetFinalizer, TRUE);
-  UNPROTECT(2);
+  Rf_unprotect(2);
   return ret;
   R_API_END();
 }
@@ -394,7 +394,7 @@ SEXP LGBM_DatasetSetFeatureNames_R(SEXP handle,
   SEXP feature_names) {
   R_API_BEGIN();
   _AssertDatasetHandleNotNull(handle);
-  auto vec_names = Split(CHAR(PROTECT(Rf_asChar(feature_names))), '\t');
+  auto vec_names = Split(CHAR(Rf_protect(Rf_asChar(feature_names))), '\t');
   int len = static_cast<int>(vec_names.size());
   std::unique_ptr<const char*[]> vec_sptr(new const char*[len]);
   for (int i = 0; i < len; ++i) {
@@ -402,13 +402,13 @@ SEXP LGBM_DatasetSetFeatureNames_R(SEXP handle,
   }
   CHECK_CALL(LGBM_DatasetSetFeatureNames(R_ExternalPtrAddr(handle),
     vec_sptr.get(), len));
-  UNPROTECT(1);
+  Rf_unprotect(1);
   return R_NilValue;
   R_API_END();
 }
 
 SEXP LGBM_DatasetGetFeatureNames_R(SEXP handle) {
-  SEXP cont_token = PROTECT(R_MakeUnwindCont());
+  SEXP cont_token = Rf_protect(R_MakeUnwindCont());
   R_API_BEGIN();
   _AssertDatasetHandleNotNull(handle);
   SEXP feature_names;
@@ -446,11 +446,11 @@ SEXP LGBM_DatasetGetFeatureNames_R(SEXP handle) {
         ptr_names.data()));
   }
   CHECK_EQ(len, out_len);
-  feature_names = PROTECT(safe_R_string(static_cast<R_xlen_t>(len), &cont_token));
+  feature_names = Rf_protect(safe_R_string(static_cast<R_xlen_t>(len), &cont_token));
   for (int i = 0; i < len; ++i) {
     SET_STRING_ELT(feature_names, i, safe_R_mkChar(ptr_names[i], &cont_token));
   }
-  UNPROTECT(2);
+  Rf_unprotect(2);
   return feature_names;
   R_API_END();
 }
@@ -459,10 +459,10 @@ SEXP LGBM_DatasetSaveBinary_R(SEXP handle,
   SEXP filename) {
   R_API_BEGIN();
   _AssertDatasetHandleNotNull(handle);
-  const char* filename_ptr = CHAR(PROTECT(Rf_asChar(filename)));
+  const char* filename_ptr = CHAR(Rf_protect(Rf_asChar(filename)));
   CHECK_CALL(LGBM_DatasetSaveBinary(R_ExternalPtrAddr(handle),
     filename_ptr));
-  UNPROTECT(1);
+  Rf_unprotect(1);
   return R_NilValue;
   R_API_END();
 }
@@ -484,7 +484,7 @@ SEXP LGBM_DatasetSetField_R(SEXP handle,
   R_API_BEGIN();
   _AssertDatasetHandleNotNull(handle);
   int len = Rf_asInteger(num_element);
-  const char* name = CHAR(PROTECT(Rf_asChar(field_name)));
+  const char* name = CHAR(Rf_protect(Rf_asChar(field_name)));
   if (!strcmp("group", name) || !strcmp("query", name)) {
     CHECK_CALL(LGBM_DatasetSetField(R_ExternalPtrAddr(handle), name, INTEGER(field_data), len, C_API_DTYPE_INT32));
   } else if (!strcmp("init_score", name)) {
@@ -494,7 +494,7 @@ SEXP LGBM_DatasetSetField_R(SEXP handle,
     std::copy(REAL(field_data), REAL(field_data) + len, vec.get());
     CHECK_CALL(LGBM_DatasetSetField(R_ExternalPtrAddr(handle), name, vec.get(), len, C_API_DTYPE_FLOAT32));
   }
-  UNPROTECT(1);
+  Rf_unprotect(1);
   return R_NilValue;
   R_API_END();
 }
@@ -504,7 +504,7 @@ SEXP LGBM_DatasetGetField_R(SEXP handle,
   SEXP field_data) {
   R_API_BEGIN();
   _AssertDatasetHandleNotNull(handle);
-  const char* name = CHAR(PROTECT(Rf_asChar(field_name)));
+  const char* name = CHAR(Rf_protect(Rf_asChar(field_name)));
   int out_len = 0;
   int out_type = 0;
   const void* res;
@@ -526,7 +526,7 @@ SEXP LGBM_DatasetGetField_R(SEXP handle,
     auto p_data = reinterpret_cast<const float*>(res);
     std::copy(p_data, p_data + out_len, REAL(field_data));
   }
-  UNPROTECT(1);
+  Rf_unprotect(1);
   return R_NilValue;
   R_API_END();
 }
@@ -536,7 +536,7 @@ SEXP LGBM_DatasetGetFieldSize_R(SEXP handle,
   SEXP out) {
   R_API_BEGIN();
   _AssertDatasetHandleNotNull(handle);
-  const char* name = CHAR(PROTECT(Rf_asChar(field_name)));
+  const char* name = CHAR(Rf_protect(Rf_asChar(field_name)));
   int out_len = 0;
   int out_type = 0;
   const void* res;
@@ -545,7 +545,7 @@ SEXP LGBM_DatasetGetFieldSize_R(SEXP handle,
     out_len -= 1;
   }
   INTEGER(out)[0] = out_len;
-  UNPROTECT(1);
+  Rf_unprotect(1);
   return R_NilValue;
   R_API_END();
 }
@@ -553,10 +553,10 @@ SEXP LGBM_DatasetGetFieldSize_R(SEXP handle,
 SEXP LGBM_DatasetUpdateParamChecking_R(SEXP old_params,
   SEXP new_params) {
   R_API_BEGIN();
-  const char* old_params_ptr = CHAR(PROTECT(Rf_asChar(old_params)));
-  const char* new_params_ptr = CHAR(PROTECT(Rf_asChar(new_params)));
+  const char* old_params_ptr = CHAR(Rf_protect(Rf_asChar(old_params)));
+  const char* new_params_ptr = CHAR(Rf_protect(Rf_asChar(new_params)));
   CHECK_CALL(LGBM_DatasetUpdateParamChecking(old_params_ptr, new_params_ptr));
-  UNPROTECT(2);
+  Rf_unprotect(2);
   return R_NilValue;
   R_API_END();
 }
@@ -613,34 +613,34 @@ SEXP LGBM_BoosterCreate_R(SEXP train_data,
   SEXP parameters) {
   R_API_BEGIN();
   _AssertDatasetHandleNotNull(train_data);
-  SEXP ret = PROTECT(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
-  const char* parameters_ptr = CHAR(PROTECT(Rf_asChar(parameters)));
+  SEXP ret = Rf_protect(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
+  const char* parameters_ptr = CHAR(Rf_protect(Rf_asChar(parameters)));
   BoosterHandle handle = nullptr;
   CHECK_CALL(LGBM_BoosterCreate(R_ExternalPtrAddr(train_data), parameters_ptr, &handle));
   R_SetExternalPtrAddr(ret, handle);
   R_RegisterCFinalizerEx(ret, _BoosterFinalizer, TRUE);
-  UNPROTECT(2);
+  Rf_unprotect(2);
   return ret;
   R_API_END();
 }
 
 SEXP LGBM_BoosterCreateFromModelfile_R(SEXP filename) {
   R_API_BEGIN();
-  SEXP ret = PROTECT(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
+  SEXP ret = Rf_protect(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
   int out_num_iterations = 0;
-  const char* filename_ptr = CHAR(PROTECT(Rf_asChar(filename)));
+  const char* filename_ptr = CHAR(Rf_protect(Rf_asChar(filename)));
   BoosterHandle handle = nullptr;
   CHECK_CALL(LGBM_BoosterCreateFromModelfile(filename_ptr, &out_num_iterations, &handle));
   R_SetExternalPtrAddr(ret, handle);
   R_RegisterCFinalizerEx(ret, _BoosterFinalizer, TRUE);
-  UNPROTECT(2);
+  Rf_unprotect(2);
   return ret;
   R_API_END();
 }
 
 SEXP LGBM_BoosterLoadModelFromString_R(SEXP model_str) {
   R_API_BEGIN();
-  SEXP ret = PROTECT(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
+  SEXP ret = Rf_protect(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
   SEXP temp = NULL;
   int n_protected = 1;
   int out_num_iterations = 0;
@@ -655,7 +655,7 @@ SEXP LGBM_BoosterLoadModelFromString_R(SEXP model_str) {
       break;
     }
     case STRSXP: {
-      temp = PROTECT(STRING_ELT(model_str, 0));
+      temp = Rf_protect(STRING_ELT(model_str, 0));
       n_protected++;
       model_str_ptr = reinterpret_cast<const char*>(CHAR(temp));
     }
@@ -664,7 +664,7 @@ SEXP LGBM_BoosterLoadModelFromString_R(SEXP model_str) {
   CHECK_CALL(LGBM_BoosterLoadModelFromString(model_str_ptr, &out_num_iterations, &handle));
   R_SetExternalPtrAddr(ret, handle);
   R_RegisterCFinalizerEx(ret, _BoosterFinalizer, TRUE);
-  UNPROTECT(n_protected);
+  Rf_unprotect(n_protected);
   return ret;
   R_API_END();
 }
@@ -703,9 +703,9 @@ SEXP LGBM_BoosterResetParameter_R(SEXP handle,
   SEXP parameters) {
   R_API_BEGIN();
   _AssertBoosterHandleNotNull(handle);
-  const char* parameters_ptr = CHAR(PROTECT(Rf_asChar(parameters)));
+  const char* parameters_ptr = CHAR(Rf_protect(Rf_asChar(parameters)));
   CHECK_CALL(LGBM_BoosterResetParameter(R_ExternalPtrAddr(handle), parameters_ptr));
-  UNPROTECT(1);
+  Rf_unprotect(1);
   return R_NilValue;
   R_API_END();
 }
@@ -795,7 +795,7 @@ SEXP LGBM_BoosterGetLowerBoundValue_R(SEXP handle,
 }
 
 SEXP LGBM_BoosterGetEvalNames_R(SEXP handle) {
-  SEXP cont_token = PROTECT(R_MakeUnwindCont());
+  SEXP cont_token = Rf_protect(R_MakeUnwindCont());
   R_API_BEGIN();
   _AssertBoosterHandleNotNull(handle);
   SEXP eval_names;
@@ -834,11 +834,11 @@ SEXP LGBM_BoosterGetEvalNames_R(SEXP handle) {
         ptr_names.data()));
   }
   CHECK_EQ(out_len, len);
-  eval_names = PROTECT(safe_R_string(static_cast<R_xlen_t>(len), &cont_token));
+  eval_names = Rf_protect(safe_R_string(static_cast<R_xlen_t>(len), &cont_token));
   for (int i = 0; i < len; ++i) {
     SET_STRING_ELT(eval_names, i, safe_R_mkChar(ptr_names[i], &cont_token));
   }
-  UNPROTECT(2);
+  Rf_unprotect(2);
   return eval_names;
   R_API_END();
 }
@@ -908,14 +908,14 @@ SEXP LGBM_BoosterPredictForFile_R(SEXP handle,
   SEXP result_filename) {
   R_API_BEGIN();
   _AssertBoosterHandleNotNull(handle);
-  const char* data_filename_ptr = CHAR(PROTECT(Rf_asChar(data_filename)));
-  const char* parameter_ptr = CHAR(PROTECT(Rf_asChar(parameter)));
-  const char* result_filename_ptr = CHAR(PROTECT(Rf_asChar(result_filename)));
+  const char* data_filename_ptr = CHAR(Rf_protect(Rf_asChar(data_filename)));
+  const char* parameter_ptr = CHAR(Rf_protect(Rf_asChar(parameter)));
+  const char* result_filename_ptr = CHAR(Rf_protect(Rf_asChar(result_filename)));
   int pred_type = GetPredictType(is_rawscore, is_leafidx, is_predcontrib);
   CHECK_CALL(LGBM_BoosterPredictForFile(R_ExternalPtrAddr(handle), data_filename_ptr,
     Rf_asInteger(data_has_header), pred_type, Rf_asInteger(start_iteration), Rf_asInteger(num_iteration), parameter_ptr,
     result_filename_ptr));
-  UNPROTECT(3);
+  Rf_unprotect(3);
   return R_NilValue;
   R_API_END();
 }
@@ -964,12 +964,12 @@ SEXP LGBM_BoosterPredictForCSC_R(SEXP handle,
   int64_t nrow = static_cast<int64_t>(Rf_asInteger(num_row));
   double* ptr_ret = REAL(out_result);
   int64_t out_len;
-  const char* parameter_ptr = CHAR(PROTECT(Rf_asChar(parameter)));
+  const char* parameter_ptr = CHAR(Rf_protect(Rf_asChar(parameter)));
   CHECK_CALL(LGBM_BoosterPredictForCSC(R_ExternalPtrAddr(handle),
     p_indptr, C_API_DTYPE_INT32, p_indices,
     p_data, C_API_DTYPE_FLOAT64, nindptr, ndata,
     nrow, pred_type, Rf_asInteger(start_iteration), Rf_asInteger(num_iteration), parameter_ptr, &out_len, ptr_ret));
-  UNPROTECT(1);
+  Rf_unprotect(1);
   return R_NilValue;
   R_API_END();
 }
@@ -989,7 +989,7 @@ SEXP LGBM_BoosterPredictForCSR_R(SEXP handle,
   R_API_BEGIN();
   _AssertBoosterHandleNotNull(handle);
   int pred_type = GetPredictType(is_rawscore, is_leafidx, is_predcontrib);
-  const char* parameter_ptr = CHAR(PROTECT(Rf_asChar(parameter)));
+  const char* parameter_ptr = CHAR(Rf_protect(Rf_asChar(parameter)));
   int64_t out_len;
   CHECK_CALL(LGBM_BoosterPredictForCSR(R_ExternalPtrAddr(handle),
     INTEGER(indptr), C_API_DTYPE_INT32, INTEGER(indices),
@@ -997,7 +997,7 @@ SEXP LGBM_BoosterPredictForCSR_R(SEXP handle,
     Rf_xlength(indptr), Rf_xlength(data), Rf_asInteger(ncols),
     pred_type, Rf_asInteger(start_iteration), Rf_asInteger(num_iteration),
     parameter_ptr, &out_len, REAL(out_result)));
-  UNPROTECT(1);
+  Rf_unprotect(1);
   return R_NilValue;
   R_API_END();
 }
@@ -1016,7 +1016,7 @@ SEXP LGBM_BoosterPredictForCSRSingleRow_R(SEXP handle,
   R_API_BEGIN();
   _AssertBoosterHandleNotNull(handle);
   int pred_type = GetPredictType(is_rawscore, is_leafidx, is_predcontrib);
-  const char* parameter_ptr = CHAR(PROTECT(Rf_asChar(parameter)));
+  const char* parameter_ptr = CHAR(Rf_protect(Rf_asChar(parameter)));
   int nnz = static_cast<int>(Rf_xlength(data));
   const int indptr[] = {0, nnz};
   int64_t out_len;
@@ -1026,7 +1026,7 @@ SEXP LGBM_BoosterPredictForCSRSingleRow_R(SEXP handle,
     2, nnz, Rf_asInteger(ncols),
     pred_type, Rf_asInteger(start_iteration), Rf_asInteger(num_iteration),
     parameter_ptr, &out_len, REAL(out_result)));
-  UNPROTECT(1);
+  Rf_unprotect(1);
   return R_NilValue;
   R_API_END();
 }
@@ -1046,8 +1046,8 @@ SEXP LGBM_BoosterPredictForCSRSingleRowFastInit_R(SEXP handle,
   R_API_BEGIN();
   _AssertBoosterHandleNotNull(handle);
   int pred_type = GetPredictType(is_rawscore, is_leafidx, is_predcontrib);
-  SEXP ret = PROTECT(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
-  const char* parameter_ptr = CHAR(PROTECT(Rf_asChar(parameter)));
+  SEXP ret = Rf_protect(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
+  const char* parameter_ptr = CHAR(Rf_protect(Rf_asChar(parameter)));
   FastConfigHandle out_fastConfig;
   CHECK_CALL(LGBM_BoosterPredictForCSRSingleRowFastInit(R_ExternalPtrAddr(handle),
     pred_type, Rf_asInteger(start_iteration), Rf_asInteger(num_iteration),
@@ -1055,7 +1055,7 @@ SEXP LGBM_BoosterPredictForCSRSingleRowFastInit_R(SEXP handle,
     parameter_ptr, &out_fastConfig));
   R_SetExternalPtrAddr(ret, out_fastConfig);
   R_RegisterCFinalizerEx(ret, LGBM_FastConfigFree_wrapped, TRUE);
-  UNPROTECT(2);
+  Rf_unprotect(2);
   return ret;
   R_API_END();
 }
@@ -1095,12 +1095,12 @@ SEXP LGBM_BoosterPredictForMat_R(SEXP handle,
   int32_t ncol = static_cast<int32_t>(Rf_asInteger(num_col));
   const double* p_mat = REAL(data);
   double* ptr_ret = REAL(out_result);
-  const char* parameter_ptr = CHAR(PROTECT(Rf_asChar(parameter)));
+  const char* parameter_ptr = CHAR(Rf_protect(Rf_asChar(parameter)));
   int64_t out_len;
   CHECK_CALL(LGBM_BoosterPredictForMat(R_ExternalPtrAddr(handle),
     p_mat, C_API_DTYPE_FLOAT64, nrow, ncol, COL_MAJOR,
     pred_type, Rf_asInteger(start_iteration), Rf_asInteger(num_iteration), parameter_ptr, &out_len, ptr_ret));
-  UNPROTECT(1);
+  Rf_unprotect(1);
   return R_NilValue;
   R_API_END();
 }
@@ -1128,12 +1128,12 @@ SEXP LGBM_BoosterPredictSparseOutput_R(SEXP handle,
   SEXP start_iteration,
   SEXP num_iteration,
   SEXP parameter) {
-  SEXP cont_token = PROTECT(R_MakeUnwindCont());
+  SEXP cont_token = Rf_protect(R_MakeUnwindCont());
   R_API_BEGIN();
   _AssertBoosterHandleNotNull(handle);
   const char* out_names[] = {"indptr", "indices", "data", ""};
-  SEXP out = PROTECT(Rf_mkNamed(VECSXP, out_names));
-  const char* parameter_ptr = CHAR(PROTECT(Rf_asChar(parameter)));
+  SEXP out = Rf_protect(Rf_mkNamed(VECSXP, out_names));
+  const char* parameter_ptr = CHAR(Rf_protect(Rf_asChar(parameter)));
 
   int64_t out_len[2];
   void *out_indptr;
@@ -1179,7 +1179,7 @@ SEXP LGBM_BoosterPredictSparseOutput_R(SEXP handle,
       static_cast<void*>(&data_str), throw_R_memerr, &cont_token, cont_token));
   pointers_struct->data = nullptr;
 
-  UNPROTECT(3);
+  Rf_unprotect(3);
   return out;
   R_API_END();
 }
@@ -1196,14 +1196,14 @@ SEXP LGBM_BoosterPredictForMatSingleRow_R(SEXP handle,
   R_API_BEGIN();
   _AssertBoosterHandleNotNull(handle);
   int pred_type = GetPredictType(is_rawscore, is_leafidx, is_predcontrib);
-  const char* parameter_ptr = CHAR(PROTECT(Rf_asChar(parameter)));
+  const char* parameter_ptr = CHAR(Rf_protect(Rf_asChar(parameter)));
   double* ptr_ret = REAL(out_result);
   int64_t out_len;
   CHECK_CALL(LGBM_BoosterPredictForMatSingleRow(R_ExternalPtrAddr(handle),
     REAL(data), C_API_DTYPE_FLOAT64, Rf_xlength(data), 1,
     pred_type, Rf_asInteger(start_iteration), Rf_asInteger(num_iteration),
     parameter_ptr, &out_len, ptr_ret));
-  UNPROTECT(1);
+  Rf_unprotect(1);
   return R_NilValue;
   R_API_END();
 }
@@ -1219,8 +1219,8 @@ SEXP LGBM_BoosterPredictForMatSingleRowFastInit_R(SEXP handle,
   R_API_BEGIN();
   _AssertBoosterHandleNotNull(handle);
   int pred_type = GetPredictType(is_rawscore, is_leafidx, is_predcontrib);
-  SEXP ret = PROTECT(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
-  const char* parameter_ptr = CHAR(PROTECT(Rf_asChar(parameter)));
+  SEXP ret = Rf_protect(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
+  const char* parameter_ptr = CHAR(Rf_protect(Rf_asChar(parameter)));
   FastConfigHandle out_fastConfig;
   CHECK_CALL(LGBM_BoosterPredictForMatSingleRowFastInit(R_ExternalPtrAddr(handle),
     pred_type, Rf_asInteger(start_iteration), Rf_asInteger(num_iteration),
@@ -1228,7 +1228,7 @@ SEXP LGBM_BoosterPredictForMatSingleRowFastInit_R(SEXP handle,
     parameter_ptr, &out_fastConfig));
   R_SetExternalPtrAddr(ret, out_fastConfig);
   R_RegisterCFinalizerEx(ret, LGBM_FastConfigFree_wrapped, TRUE);
-  UNPROTECT(2);
+  Rf_unprotect(2);
   return ret;
   R_API_END();
 }
@@ -1251,9 +1251,9 @@ SEXP LGBM_BoosterSaveModel_R(SEXP handle,
   SEXP start_iteration) {
   R_API_BEGIN();
   _AssertBoosterHandleNotNull(handle);
-  const char* filename_ptr = CHAR(PROTECT(Rf_asChar(filename)));
+  const char* filename_ptr = CHAR(Rf_protect(Rf_asChar(filename)));
   CHECK_CALL(LGBM_BoosterSaveModel(R_ExternalPtrAddr(handle), Rf_asInteger(start_iteration), Rf_asInteger(num_iteration), Rf_asInteger(feature_importance_type), filename_ptr));
-  UNPROTECT(1);
+  Rf_unprotect(1);
   return R_NilValue;
   R_API_END();
 }
@@ -1266,7 +1266,7 @@ SEXP LGBM_BoosterSaveModelToString_R(SEXP handle,
   SEXP num_iteration,
   SEXP feature_importance_type,
   SEXP start_iteration) {
-  SEXP cont_token = PROTECT(R_MakeUnwindCont());
+  SEXP cont_token = Rf_protect(R_MakeUnwindCont());
   R_API_BEGIN();
   _AssertBoosterHandleNotNull(handle);
   int64_t out_len = 0;
@@ -1281,7 +1281,7 @@ SEXP LGBM_BoosterSaveModelToString_R(SEXP handle,
     CHECK_CALL(LGBM_BoosterSaveModelToString(R_ExternalPtrAddr(handle), start_iter, num_iter, importance_type, out_len, &out_len, inner_char_buf->data()));
   }
   SEXP out = R_UnwindProtect(make_altrepped_raw_vec, &inner_char_buf, throw_R_memerr, &cont_token, cont_token);
-  UNPROTECT(1);
+  Rf_unprotect(1);
   return out;
   R_API_END();
 }
@@ -1290,7 +1290,7 @@ SEXP LGBM_BoosterSaveModelToString_R(SEXP handle,
   SEXP num_iteration,
   SEXP feature_importance_type,
   SEXP start_iteration) {
-  SEXP cont_token = PROTECT(R_MakeUnwindCont());
+  SEXP cont_token = Rf_protect(R_MakeUnwindCont());
   R_API_BEGIN();
   _AssertBoosterHandleNotNull(handle);
   int64_t out_len = 0;
@@ -1300,14 +1300,14 @@ SEXP LGBM_BoosterSaveModelToString_R(SEXP handle,
   int importance_type = Rf_asInteger(feature_importance_type);
   std::vector<char> inner_char_buf(buf_len);
   CHECK_CALL(LGBM_BoosterSaveModelToString(R_ExternalPtrAddr(handle), start_iter, num_iter, importance_type, buf_len, &out_len, inner_char_buf.data()));
-  SEXP model_str = PROTECT(safe_R_raw(out_len, &cont_token));
+  SEXP model_str = Rf_protect(safe_R_raw(out_len, &cont_token));
   // if the model string was larger than the initial buffer, call the function again, writing directly to the R object
   if (out_len > buf_len) {
     CHECK_CALL(LGBM_BoosterSaveModelToString(R_ExternalPtrAddr(handle), start_iter, num_iter, importance_type, out_len, &out_len, reinterpret_cast<char*>(RAW(model_str))));
   } else {
     std::copy(inner_char_buf.begin(), inner_char_buf.begin() + out_len, reinterpret_cast<char*>(RAW(model_str)));
   }
-  UNPROTECT(2);
+  Rf_unprotect(2);
   return model_str;
   R_API_END();
 }
@@ -1317,7 +1317,7 @@ SEXP LGBM_BoosterDumpModel_R(SEXP handle,
   SEXP num_iteration,
   SEXP feature_importance_type,
   SEXP start_iteration) {
-  SEXP cont_token = PROTECT(R_MakeUnwindCont());
+  SEXP cont_token = Rf_protect(R_MakeUnwindCont());
   R_API_BEGIN();
   _AssertBoosterHandleNotNull(handle);
   SEXP model_str;
@@ -1333,15 +1333,15 @@ SEXP LGBM_BoosterDumpModel_R(SEXP handle,
     inner_char_buf.resize(out_len);
     CHECK_CALL(LGBM_BoosterDumpModel(R_ExternalPtrAddr(handle), start_iter, num_iter, importance_type, out_len, &out_len, inner_char_buf.data()));
   }
-  model_str = PROTECT(safe_R_string(static_cast<R_xlen_t>(1), &cont_token));
+  model_str = Rf_protect(safe_R_string(static_cast<R_xlen_t>(1), &cont_token));
   SET_STRING_ELT(model_str, 0, safe_R_mkChar(inner_char_buf.data(), &cont_token));
-  UNPROTECT(2);
+  Rf_unprotect(2);
   return model_str;
   R_API_END();
 }
 
 SEXP LGBM_DumpParamAliases_R() {
-  SEXP cont_token = PROTECT(R_MakeUnwindCont());
+  SEXP cont_token = Rf_protect(R_MakeUnwindCont());
   R_API_BEGIN();
   SEXP aliases_str;
   int64_t out_len = 0;
@@ -1353,15 +1353,15 @@ SEXP LGBM_DumpParamAliases_R() {
     inner_char_buf.resize(out_len);
     CHECK_CALL(LGBM_DumpParamAliases(out_len, &out_len, inner_char_buf.data()));
   }
-  aliases_str = PROTECT(safe_R_string(static_cast<R_xlen_t>(1), &cont_token));
+  aliases_str = Rf_protect(safe_R_string(static_cast<R_xlen_t>(1), &cont_token));
   SET_STRING_ELT(aliases_str, 0, safe_R_mkChar(inner_char_buf.data(), &cont_token));
-  UNPROTECT(2);
+  Rf_unprotect(2);
   return aliases_str;
   R_API_END();
 }
 
 SEXP LGBM_BoosterGetLoadedParam_R(SEXP handle) {
-  SEXP cont_token = PROTECT(R_MakeUnwindCont());
+  SEXP cont_token = Rf_protect(R_MakeUnwindCont());
   R_API_BEGIN();
   _AssertBoosterHandleNotNull(handle);
   SEXP params_str;
@@ -1374,9 +1374,9 @@ SEXP LGBM_BoosterGetLoadedParam_R(SEXP handle) {
     inner_char_buf.resize(out_len);
     CHECK_CALL(LGBM_BoosterGetLoadedParam(R_ExternalPtrAddr(handle), out_len, &out_len, inner_char_buf.data()));
   }
-  params_str = PROTECT(safe_R_string(static_cast<R_xlen_t>(1), &cont_token));
+  params_str = Rf_protect(safe_R_string(static_cast<R_xlen_t>(1), &cont_token));
   SET_STRING_ELT(params_str, 0, safe_R_mkChar(inner_char_buf.data(), &cont_token));
-  UNPROTECT(2);
+  Rf_unprotect(2);
   return params_str;
   R_API_END();
 }