From 56770b53cb89f24718a79e59e9bf616ba6f607db Mon Sep 17 00:00:00 2001
From: Martin <jullum@nr.no>
Date: Tue, 10 Aug 2021 12:59:55 +0200
Subject: [PATCH 01/16] w_threshold doc + reference

---
 R/explanation.R  | 9 ++++++++-
 R/observations.R | 3 ---
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/R/explanation.R b/R/explanation.R
index e0aedb103..9d274c789 100644
--- a/R/explanation.R
+++ b/R/explanation.R
@@ -62,6 +62,10 @@
 #'
 #' @author Camilla Lingjaerde, Nikolai Sellereite, Martin Jullum, Annabelle Redelmeier
 #'
+#'@references
+#'   Aas, K., Jullum, M., & Løland, A. (2021). Explaining individual predictions when features are dependent:
+#'   More accurate approximations to Shapley values. Artificial Intelligence, 298, 103502.
+#'
 #' @examples
 #' if (requireNamespace("MASS", quietly = TRUE)) {
 #'   # Load example data
@@ -186,7 +190,10 @@ explain <- function(x, explainer, approach, prediction_zero, ...) {
 #' is only applicable when \code{approach = "empirical"}, and \code{type} is either equal to
 #' \code{"AICc_each_k"} or \code{"AICc_full"}
 #'
-#' @param w_threshold Positive integer between 0 and 1.
+#' @param w_threshold Numeric vector of length 1, with \code{0 < w_threshold <= 1} representing the minimum proportion
+#' of the total empirical weight that data samples should use. If e.g. \code{w_threshold = .8} we will choose the
+#' \code{K} samples with the largest weight so that the sum of the weights accounts for 80\% of the total weight.
+#' \code{w_threshold} is the \eqn{\eta} parameter in equation (15) of Aas et al (2021).
 #'
 #' @rdname explain
 #'
diff --git a/R/observations.R b/R/observations.R
index ed96a2add..37a6e177f 100644
--- a/R/observations.R
+++ b/R/observations.R
@@ -7,9 +7,6 @@
 #' the total number of unique features, respectively. Note that \code{m = ncol(x_train)}.
 #' @param x_train Numeric matrix
 #' @param x_test Numeric matrix
-#' @param w_threshold Numeric vector of length 1, where \code{w_threshold > 0} and
-#' \code{w_threshold <= 1}. If \code{w_threshold = .8} we will choose the \code{K} samples with
-#' the largest weight so that the sum of the weights accounts for 80\% of the total weight.
 #'
 #' @return data.table
 #'

From 585bf5a8798163e1706409b39c1ec5feac4f1a8c Mon Sep 17 00:00:00 2001
From: Martin <jullum@nr.no>
Date: Tue, 10 Aug 2021 13:06:43 +0200
Subject: [PATCH 02/16] update rd

---
 R/observations.R          |  3 +++
 man/explain.Rd            |  9 ++++++++-
 man/observation_impute.Rd | 11 ++++++++---
 3 files changed, 19 insertions(+), 4 deletions(-)

diff --git a/R/observations.R b/R/observations.R
index 37a6e177f..5df93d570 100644
--- a/R/observations.R
+++ b/R/observations.R
@@ -8,6 +8,9 @@
 #' @param x_train Numeric matrix
 #' @param x_test Numeric matrix
 #'
+#' @inheritParams explain
+#' @inherit explain references
+#'
 #' @return data.table
 #'
 #' @keywords internal
diff --git a/man/explain.Rd b/man/explain.Rd
index afdf14e7f..8a27840d8 100644
--- a/man/explain.Rd
+++ b/man/explain.Rd
@@ -98,7 +98,10 @@ optimizing the AICc. Note that this argument is only applicable when
 is only applicable when \code{approach = "empirical"}, and \code{type} is either equal to
 \code{"AICc_each_k"} or \code{"AICc_full"}}
 
-\item{w_threshold}{Positive integer between 0 and 1.}
+\item{w_threshold}{Numeric vector of length 1, with \code{0 < w_threshold <= 1} representing the minimum proportion
+of the total empirical weight that data samples should use. If e.g. \code{w_threshold = .8} we will choose the
+\code{K} samples with the largest weight so that the sum of the weights accounts for 80\% of the total weight.
+\code{w_threshold} is the \eqn{\eta} parameter in equation (15) of Aas et al (2021).}
 
 \item{mu}{Numeric vector. (Optional) Containing the mean of the data generating distribution.
 If \code{NULL} the expected values are estimated from the data. Note that this is only used
@@ -241,6 +244,10 @@ if (requireNamespace("MASS", quietly = TRUE)) {
   print(explain_groups$dt)
 }
 }
+\references{
+Aas, K., Jullum, M., & Løland, A. (2021). Explaining individual predictions when features are dependent:
+  More accurate approximations to Shapley values. Artificial Intelligence, 298, 103502.
+}
 \author{
 Camilla Lingjaerde, Nikolai Sellereite, Martin Jullum, Annabelle Redelmeier
 }
diff --git a/man/observation_impute.Rd b/man/observation_impute.Rd
index e437f39c3..a9a40ab03 100644
--- a/man/observation_impute.Rd
+++ b/man/observation_impute.Rd
@@ -25,9 +25,10 @@ the total number of unique features, respectively. Note that \code{m = ncol(x_tr
 
 \item{x_test}{Numeric matrix}
 
-\item{w_threshold}{Numeric vector of length 1, where \code{w_threshold > 0} and
-\code{w_threshold <= 1}. If \code{w_threshold = .8} we will choose the \code{K} samples with
-the largest weight so that the sum of the weights accounts for 80\% of the total weight.}
+\item{w_threshold}{Numeric vector of length 1, with \code{0 < w_threshold <= 1} representing the minimum proportion
+of the total empirical weight that data samples should use. If e.g. \code{w_threshold = .8} we will choose the
+\code{K} samples with the largest weight so that the sum of the weights accounts for 80\% of the total weight.
+\code{w_threshold} is the \eqn{\eta} parameter in equation (15) of Aas et al (2021).}
 }
 \value{
 data.table
@@ -35,6 +36,10 @@ data.table
 \description{
 Generate permutations of training data using test observations
 }
+\references{
+Aas, K., Jullum, M., & Løland, A. (2021). Explaining individual predictions when features are dependent:
+  More accurate approximations to Shapley values. Artificial Intelligence, 298, 103502.
+}
 \author{
 Nikolai Sellereite
 }

From 4ae95c89e10c57c28d9432663db897b126e006be Mon Sep 17 00:00:00 2001
From: Martin <jullum@nr.no>
Date: Tue, 10 Aug 2021 14:12:05 +0200
Subject: [PATCH 03/16] move n_samples to explain

---
 R/explanation.R  | 30 +++++++++++++++++++++++-------
 R/observations.R | 29 ++++++++++++-----------------
 2 files changed, 35 insertions(+), 24 deletions(-)

diff --git a/R/explanation.R b/R/explanation.R
index 9d274c789..e8cd237f3 100644
--- a/R/explanation.R
+++ b/R/explanation.R
@@ -11,6 +11,9 @@
 #' either be \code{"gaussian"}, \code{"copula"}, \code{"empirical"}, or \code{"ctree"}. See details for more
 #' information.
 #'
+#' @param n_samples Positive integer. Indicating the maximum number of samples to use in the
+#' Monte Carlo integration for every conditional expectation. See also details.
+#'
 #' @param prediction_zero Numeric. The prediction value for unseen data, typically equal to the mean of
 #' the response.
 #'
@@ -19,7 +22,6 @@
 #' @details The most important thing to notice is that \code{shapr} has implemented four different
 #' approaches for estimating the conditional distributions of the data, namely \code{"empirical"},
 #' \code{"gaussian"}, \code{"copula"} and \code{"ctree"}.
-#'
 #' In addition, the user also has the option of combining the four approaches.
 #' E.g. if you're in a situation where you have trained a model the consists of 10 features,
 #' and you'd like to use the \code{"gaussian"} approach when you condition on a single feature,
@@ -29,6 +31,13 @@
 #' \code{"approach[i]" = "gaussian"} it means that you'd like to use the \code{"gaussian"} approach
 #' when conditioning on \code{i} features.
 #'
+#' For \code{approach="ctree"}, \code{n_samples} corresponds to the number of samples
+#' from the leaf node (see an exception related to the \code{sample} argument).
+#' For \code{approach="empirical"}, \code{n_samples} is  the \eqn{K} parameter in equations (14-15) of
+#' Aas et al. (2021), i.e. the maximum number of observations (with largest weights) that is used, see also the
+#' \code{w_threshold} argument.
+#'
+#'
 #' @return Object of class \code{c("shapr", "list")}. Contains the following items:
 #' \describe{
 #'   \item{dt}{data.table}
@@ -135,7 +144,7 @@
 #'   )
 #'   print(explain_groups$dt)
 #' }
-explain <- function(x, explainer, approach, prediction_zero, ...) {
+explain <- function(x, explainer, approach, prediction_zero, n_samples = 1e3,...) {
   extras <- list(...)
 
   # Check input for x
@@ -199,9 +208,10 @@ explain <- function(x, explainer, approach, prediction_zero, ...) {
 #'
 #' @export
 explain.empirical <- function(x, explainer, approach, prediction_zero,
+                              n_samples = 1e3, w_threshold = 0.95,
                               type = "fixed_sigma", fixed_sigma_vec = 0.1,
                               n_samples_aicc = 1000, eval_max_aicc = 20,
-                              start_aicc = 0.1, w_threshold = 0.95, ...) {
+                              start_aicc = 0.1, ...) {
 
   # Add arguments to explainer object
   explainer$x_test <- as.matrix(preprocess_data(x, explainer$feature_list)$x_dt)
@@ -212,6 +222,7 @@ explain.empirical <- function(x, explainer, approach, prediction_zero,
   explainer$eval_max_aicc <- eval_max_aicc
   explainer$start_aicc <- start_aicc
   explainer$w_threshold <- w_threshold
+  explainer$n_samples <- n_samples
 
   # Generate data
   dt <- prepare_data(explainer, ...)
@@ -236,12 +247,14 @@ explain.empirical <- function(x, explainer, approach, prediction_zero,
 #' @rdname explain
 #'
 #' @export
-explain.gaussian <- function(x, explainer, approach, prediction_zero, mu = NULL, cov_mat = NULL, ...) {
+explain.gaussian <- function(x, explainer, approach, prediction_zero, n_samples = 1e3, mu = NULL, cov_mat = NULL, ...) {
 
 
   # Add arguments to explainer object
   explainer$x_test <- as.matrix(preprocess_data(x, explainer$feature_list)$x_dt)
   explainer$approach <- approach
+  explainer$n_samples <- n_samples
+
 
   # If mu is not provided directly, use mean of training data
   if (is.null(mu)) {
@@ -277,11 +290,12 @@ explain.gaussian <- function(x, explainer, approach, prediction_zero, mu = NULL,
 
 #' @rdname explain
 #' @export
-explain.copula <- function(x, explainer, approach, prediction_zero, ...) {
+explain.copula <- function(x, explainer, approach, prediction_zero, n_samples = 1e3, ...) {
 
   # Setup
   explainer$x_test <- as.matrix(preprocess_data(x, explainer$feature_list)$x_dt)
   explainer$approach <- approach
+  explainer$n_samples <- n_samples
 
   # Prepare transformed data
   x_train <- apply(
@@ -341,7 +355,7 @@ explain.copula <- function(x, explainer, approach, prediction_zero, ...) {
 #' @name explain
 #'
 #' @export
-explain.ctree <- function(x, explainer, approach, prediction_zero,
+explain.ctree <- function(x, explainer, approach, prediction_zero, n_samples = 1e3,
                           mincriterion = 0.95, minsplit = 20,
                           minbucket = 7, sample = TRUE, ...) {
   # Checks input argument
@@ -356,6 +370,7 @@ explain.ctree <- function(x, explainer, approach, prediction_zero,
   explainer$minsplit <- minsplit
   explainer$minbucket <- minbucket
   explainer$sample <- sample
+  explainer$n_samples <- n_samples
 
   # Generate data
   dt <- prepare_data(explainer, ...)
@@ -374,12 +389,13 @@ explain.ctree <- function(x, explainer, approach, prediction_zero,
 #' @name explain
 #'
 #' @export
-explain.combined <- function(x, explainer, approach, prediction_zero,
+explain.combined <- function(x, explainer, approach, prediction_zero, n_samples = 1e3,
                              mu = NULL, cov_mat = NULL, ...) {
   # Get indices of combinations
   l <- get_list_approaches(explainer$X$n_features, approach)
   explainer$return <- TRUE
   explainer$x_test <- as.matrix(preprocess_data(x, explainer$feature_list)$x_dt)
+  explainer$n_samples <- n_samples
 
   dt_l <- list()
   for (i in seq_along(l)) {
diff --git a/R/observations.R b/R/observations.R
index 5df93d570..8ba92bced 100644
--- a/R/observations.R
+++ b/R/observations.R
@@ -71,9 +71,6 @@ observation_impute <- function(W_kernel, S, x_train, x_test, w_threshold = .7, n
 #'
 #' @param x Explainer object. See \code{\link{explain}} for more information.
 #'
-#' @param n_samples Positive integer. Indicating the maximum number of samples to use in the
-#' Monte Carlo integration for every conditional expectation.
-#'
 #' @param seed Positive integer. If \code{NULL} the seed will be inherited from the calling environment.
 #'
 #' @param index_features Positive integer vector. Specifies the indices of combinations to apply to the present method.
@@ -94,7 +91,7 @@ prepare_data <- function(x, ...) {
 
 #' @rdname prepare_data
 #' @export
-prepare_data.empirical <- function(x, seed = 1, n_samples = 1e3, index_features = NULL, ...) {
+prepare_data.empirical <- function(x, seed = 1, index_features = NULL, ...) {
   id <- id_combination <- w <- NULL # due to NSE notes in R CMD check
 
   # Get distance matrix ----------------
@@ -158,7 +155,7 @@ prepare_data.empirical <- function(x, seed = 1, n_samples = 1e3, index_features
       x_train = as.matrix(x$x_train),
       x_test = x$x_test[i, , drop = FALSE],
       w_threshold = x$w_threshold,
-      n_samples = n_samples
+      n_samples = x$n_samples
     )
 
     dt_l[[i]][, id := i]
@@ -171,7 +168,7 @@ prepare_data.empirical <- function(x, seed = 1, n_samples = 1e3, index_features
 
 #' @rdname prepare_data
 #' @export
-prepare_data.gaussian <- function(x, seed = 1, n_samples = 1e3, index_features = NULL, ...) {
+prepare_data.gaussian <- function(x, seed = 1, index_features = NULL, ...) {
   id <- id_combination <- w <- NULL # due to NSE notes in R CMD check
 
   n_xtest <- nrow(x$x_test)
@@ -187,7 +184,7 @@ prepare_data.gaussian <- function(x, seed = 1, n_samples = 1e3, index_features =
     l <- lapply(
       X = features,
       FUN = sample_gaussian,
-      n_samples = n_samples,
+      n_samples = x$n_samples,
       mu = x$mu,
       cov_mat = x$cov_mat,
       m = ncol(x$x_test),
@@ -195,7 +192,7 @@ prepare_data.gaussian <- function(x, seed = 1, n_samples = 1e3, index_features =
     )
 
     dt_l[[i]] <- data.table::rbindlist(l, idcol = "id_combination")
-    dt_l[[i]][, w := 1 / n_samples]
+    dt_l[[i]][, w := 1 / x$n_samples]
     dt_l[[i]][, id := i]
     if (!is.null(index_features)) dt_l[[i]][, id_combination := index_features[id_combination]]
   }
@@ -206,7 +203,7 @@ prepare_data.gaussian <- function(x, seed = 1, n_samples = 1e3, index_features =
 
 #' @rdname prepare_data
 #' @export
-prepare_data.copula <- function(x, x_test_gaussian = 1, seed = 1, n_samples = 1e3, index_features = NULL, ...) {
+prepare_data.copula <- function(x, x_test_gaussian = 1, seed = 1, index_features = NULL, ...) {
   id <- id_combination <- w <- NULL # due to NSE notes in R CMD check
   n_xtest <- nrow(x$x_test)
   dt_l <- list()
@@ -221,7 +218,7 @@ prepare_data.copula <- function(x, x_test_gaussian = 1, seed = 1, n_samples = 1e
     l <- lapply(
       X = features,
       FUN = sample_copula,
-      n_samples = n_samples,
+      n_samples = x$n_samples,
       mu = x$mu,
       cov_mat = x$cov_mat,
       m = ncol(x$x_test),
@@ -231,7 +228,7 @@ prepare_data.copula <- function(x, x_test_gaussian = 1, seed = 1, n_samples = 1e
     )
 
     dt_l[[i]] <- data.table::rbindlist(l, idcol = "id_combination")
-    dt_l[[i]][, w := 1 / n_samples]
+    dt_l[[i]][, w := 1 / x$n_samples]
     dt_l[[i]][, id := i]
     if (!is.null(index_features)) dt_l[[i]][, id_combination := index_features[id_combination]]
   }
@@ -239,9 +236,7 @@ prepare_data.copula <- function(x, x_test_gaussian = 1, seed = 1, n_samples = 1e
   return(dt)
 }
 
-#' @param n_samples Integer. The number of obs to sample from the leaf if \code{sample} = TRUE or if \code{sample}
-#' = FALSE but \code{n_samples} is less than the number of obs in the leaf.
-#'
+
 #' @param index_features List. Default is NULL but if either various methods are being used or various mincriterion are
 #' used for different numbers of conditioned features, this will be a list with the features to pass.
 #'
@@ -258,7 +253,7 @@ prepare_data.copula <- function(x, x_test_gaussian = 1, seed = 1, n_samples = 1e
 #'
 #' @rdname prepare_data
 #' @export
-prepare_data.ctree <- function(x, seed = 1, n_samples = 1e3, index_features = NULL,
+prepare_data.ctree <- function(x, seed = 1, index_features = NULL,
                                mc_cores = 1, mc_cores_create_ctree = mc_cores,
                                mc_cores_sample_ctree = mc_cores, ...) {
   id <- id_combination <- w <- NULL # due to NSE notes in R CMD check
@@ -290,7 +285,7 @@ prepare_data.ctree <- function(x, seed = 1, n_samples = 1e3, index_features = NU
     l <- parallel::mclapply(
       X = all_trees,
       FUN = sample_ctree,
-      n_samples = n_samples,
+      n_samples = x$n_samples,
       x_test = x$x_test[i, , drop = FALSE],
       x_train = x$x_train,
       p = ncol(x$x_test),
@@ -300,7 +295,7 @@ prepare_data.ctree <- function(x, seed = 1, n_samples = 1e3, index_features = NU
     )
 
     dt_l[[i]] <- data.table::rbindlist(l, idcol = "id_combination")
-    dt_l[[i]][, w := 1 / n_samples]
+    dt_l[[i]][, w := 1 / x$n_samples]
     dt_l[[i]][, id := i]
     if (!is.null(index_features)) dt_l[[i]][, id_combination := index_features[id_combination]]
   }

From 38e2635944f90b849232d465f96b8deffe4cb5f1 Mon Sep 17 00:00:00 2001
From: Martin <jullum@nr.no>
Date: Tue, 10 Aug 2021 14:12:14 +0200
Subject: [PATCH 04/16] update rd

---
 R/observations.R          | 29 +++++++++++++++++------------
 man/explain.Rd            | 30 +++++++++++++++++++++---------
 man/observation_impute.Rd |  3 +++
 man/prepare_data.Rd       | 17 +++--------------
 4 files changed, 44 insertions(+), 35 deletions(-)

diff --git a/R/observations.R b/R/observations.R
index 8ba92bced..5df93d570 100644
--- a/R/observations.R
+++ b/R/observations.R
@@ -71,6 +71,9 @@ observation_impute <- function(W_kernel, S, x_train, x_test, w_threshold = .7, n
 #'
 #' @param x Explainer object. See \code{\link{explain}} for more information.
 #'
+#' @param n_samples Positive integer. Indicating the maximum number of samples to use in the
+#' Monte Carlo integration for every conditional expectation.
+#'
 #' @param seed Positive integer. If \code{NULL} the seed will be inherited from the calling environment.
 #'
 #' @param index_features Positive integer vector. Specifies the indices of combinations to apply to the present method.
@@ -91,7 +94,7 @@ prepare_data <- function(x, ...) {
 
 #' @rdname prepare_data
 #' @export
-prepare_data.empirical <- function(x, seed = 1, index_features = NULL, ...) {
+prepare_data.empirical <- function(x, seed = 1, n_samples = 1e3, index_features = NULL, ...) {
   id <- id_combination <- w <- NULL # due to NSE notes in R CMD check
 
   # Get distance matrix ----------------
@@ -155,7 +158,7 @@ prepare_data.empirical <- function(x, seed = 1, index_features = NULL, ...) {
       x_train = as.matrix(x$x_train),
       x_test = x$x_test[i, , drop = FALSE],
       w_threshold = x$w_threshold,
-      n_samples = x$n_samples
+      n_samples = n_samples
     )
 
     dt_l[[i]][, id := i]
@@ -168,7 +171,7 @@ prepare_data.empirical <- function(x, seed = 1, index_features = NULL, ...) {
 
 #' @rdname prepare_data
 #' @export
-prepare_data.gaussian <- function(x, seed = 1, index_features = NULL, ...) {
+prepare_data.gaussian <- function(x, seed = 1, n_samples = 1e3, index_features = NULL, ...) {
   id <- id_combination <- w <- NULL # due to NSE notes in R CMD check
 
   n_xtest <- nrow(x$x_test)
@@ -184,7 +187,7 @@ prepare_data.gaussian <- function(x, seed = 1, index_features = NULL, ...) {
     l <- lapply(
       X = features,
       FUN = sample_gaussian,
-      n_samples = x$n_samples,
+      n_samples = n_samples,
       mu = x$mu,
       cov_mat = x$cov_mat,
       m = ncol(x$x_test),
@@ -192,7 +195,7 @@ prepare_data.gaussian <- function(x, seed = 1, index_features = NULL, ...) {
     )
 
     dt_l[[i]] <- data.table::rbindlist(l, idcol = "id_combination")
-    dt_l[[i]][, w := 1 / x$n_samples]
+    dt_l[[i]][, w := 1 / n_samples]
     dt_l[[i]][, id := i]
     if (!is.null(index_features)) dt_l[[i]][, id_combination := index_features[id_combination]]
   }
@@ -203,7 +206,7 @@ prepare_data.gaussian <- function(x, seed = 1, index_features = NULL, ...) {
 
 #' @rdname prepare_data
 #' @export
-prepare_data.copula <- function(x, x_test_gaussian = 1, seed = 1, index_features = NULL, ...) {
+prepare_data.copula <- function(x, x_test_gaussian = 1, seed = 1, n_samples = 1e3, index_features = NULL, ...) {
   id <- id_combination <- w <- NULL # due to NSE notes in R CMD check
   n_xtest <- nrow(x$x_test)
   dt_l <- list()
@@ -218,7 +221,7 @@ prepare_data.copula <- function(x, x_test_gaussian = 1, seed = 1, index_features
     l <- lapply(
       X = features,
       FUN = sample_copula,
-      n_samples = x$n_samples,
+      n_samples = n_samples,
       mu = x$mu,
       cov_mat = x$cov_mat,
       m = ncol(x$x_test),
@@ -228,7 +231,7 @@ prepare_data.copula <- function(x, x_test_gaussian = 1, seed = 1, index_features
     )
 
     dt_l[[i]] <- data.table::rbindlist(l, idcol = "id_combination")
-    dt_l[[i]][, w := 1 / x$n_samples]
+    dt_l[[i]][, w := 1 / n_samples]
     dt_l[[i]][, id := i]
     if (!is.null(index_features)) dt_l[[i]][, id_combination := index_features[id_combination]]
   }
@@ -236,7 +239,9 @@ prepare_data.copula <- function(x, x_test_gaussian = 1, seed = 1, index_features
   return(dt)
 }
 
-
+#' @param n_samples Integer. The number of obs to sample from the leaf if \code{sample} = TRUE or if \code{sample}
+#' = FALSE but \code{n_samples} is less than the number of obs in the leaf.
+#'
 #' @param index_features List. Default is NULL but if either various methods are being used or various mincriterion are
 #' used for different numbers of conditioned features, this will be a list with the features to pass.
 #'
@@ -253,7 +258,7 @@ prepare_data.copula <- function(x, x_test_gaussian = 1, seed = 1, index_features
 #'
 #' @rdname prepare_data
 #' @export
-prepare_data.ctree <- function(x, seed = 1, index_features = NULL,
+prepare_data.ctree <- function(x, seed = 1, n_samples = 1e3, index_features = NULL,
                                mc_cores = 1, mc_cores_create_ctree = mc_cores,
                                mc_cores_sample_ctree = mc_cores, ...) {
   id <- id_combination <- w <- NULL # due to NSE notes in R CMD check
@@ -285,7 +290,7 @@ prepare_data.ctree <- function(x, seed = 1, index_features = NULL,
     l <- parallel::mclapply(
       X = all_trees,
       FUN = sample_ctree,
-      n_samples = x$n_samples,
+      n_samples = n_samples,
       x_test = x$x_test[i, , drop = FALSE],
       x_train = x$x_train,
       p = ncol(x$x_test),
@@ -295,7 +300,7 @@ prepare_data.ctree <- function(x, seed = 1, index_features = NULL,
     )
 
     dt_l[[i]] <- data.table::rbindlist(l, idcol = "id_combination")
-    dt_l[[i]][, w := 1 / x$n_samples]
+    dt_l[[i]][, w := 1 / n_samples]
     dt_l[[i]][, id := i]
     if (!is.null(index_features)) dt_l[[i]][, id_combination := index_features[id_combination]]
   }
diff --git a/man/explain.Rd b/man/explain.Rd
index 8a27840d8..3e4d1c67f 100644
--- a/man/explain.Rd
+++ b/man/explain.Rd
@@ -10,19 +10,20 @@
 \alias{explain.ctree_comb_mincrit}
 \title{Explain the output of machine learning models with more accurately estimated Shapley values}
 \usage{
-explain(x, explainer, approach, prediction_zero, ...)
+explain(x, explainer, approach, prediction_zero, n_samples = 1000, ...)
 
 \method{explain}{empirical}(
   x,
   explainer,
   approach,
   prediction_zero,
+  n_samples = 1000,
+  w_threshold = 0.95,
   type = "fixed_sigma",
   fixed_sigma_vec = 0.1,
   n_samples_aicc = 1000,
   eval_max_aicc = 20,
   start_aicc = 0.1,
-  w_threshold = 0.95,
   ...
 )
 
@@ -31,18 +32,20 @@ explain(x, explainer, approach, prediction_zero, ...)
   explainer,
   approach,
   prediction_zero,
+  n_samples = 1000,
   mu = NULL,
   cov_mat = NULL,
   ...
 )
 
-\method{explain}{copula}(x, explainer, approach, prediction_zero, ...)
+\method{explain}{copula}(x, explainer, approach, prediction_zero, n_samples = 1000, ...)
 
 \method{explain}{ctree}(
   x,
   explainer,
   approach,
   prediction_zero,
+  n_samples = 1000,
   mincriterion = 0.95,
   minsplit = 20,
   minbucket = 7,
@@ -55,6 +58,7 @@ explain(x, explainer, approach, prediction_zero, ...)
   explainer,
   approach,
   prediction_zero,
+  n_samples = 1000,
   mu = NULL,
   cov_mat = NULL,
   ...
@@ -77,8 +81,16 @@ information.}
 \item{prediction_zero}{Numeric. The prediction value for unseen data, typically equal to the mean of
 the response.}
 
+\item{n_samples}{Positive integer. Indicating the maximum number of samples to use in the
+Monte Carlo integration for every conditional expectation. See also details.}
+
 \item{...}{Additional arguments passed to \code{\link{prepare_data}}}
 
+\item{w_threshold}{Numeric vector of length 1, with \code{0 < w_threshold <= 1} representing the minimum proportion
+of the total empirical weight that data samples should use. If e.g. \code{w_threshold = .8} we will choose the
+\code{K} samples with the largest weight so that the sum of the weights accounts for 80\% of the total weight.
+\code{w_threshold} is the \eqn{\eta} parameter in equation (15) of Aas et al (2021).}
+
 \item{type}{Character. Should be equal to either \code{"independence"},
 \code{"fixed_sigma"}, \code{"AICc_each_k"} or \code{"AICc_full"}.}
 
@@ -98,11 +110,6 @@ optimizing the AICc. Note that this argument is only applicable when
 is only applicable when \code{approach = "empirical"}, and \code{type} is either equal to
 \code{"AICc_each_k"} or \code{"AICc_full"}}
 
-\item{w_threshold}{Numeric vector of length 1, with \code{0 < w_threshold <= 1} representing the minimum proportion
-of the total empirical weight that data samples should use. If e.g. \code{w_threshold = .8} we will choose the
-\code{K} samples with the largest weight so that the sum of the weights accounts for 80\% of the total weight.
-\code{w_threshold} is the \eqn{\eta} parameter in equation (15) of Aas et al (2021).}
-
 \item{mu}{Numeric vector. (Optional) Containing the mean of the data generating distribution.
 If \code{NULL} the expected values are estimated from the data. Note that this is only used
 when \code{approach = "gaussian"}.}
@@ -164,7 +171,6 @@ Explain the output of machine learning models with more accurately estimated Sha
 The most important thing to notice is that \code{shapr} has implemented four different
 approaches for estimating the conditional distributions of the data, namely \code{"empirical"},
 \code{"gaussian"}, \code{"copula"} and \code{"ctree"}.
-
 In addition, the user also has the option of combining the four approaches.
 E.g. if you're in a situation where you have trained a model the consists of 10 features,
 and you'd like to use the \code{"gaussian"} approach when you condition on a single feature,
@@ -173,6 +179,12 @@ if you condition on more than 5 features this can be done by simply passing
 \code{approach = c("gaussian", rep("empirical", 4), rep("copula", 5))}. If
 \code{"approach[i]" = "gaussian"} it means that you'd like to use the \code{"gaussian"} approach
 when conditioning on \code{i} features.
+
+For \code{approach="ctree"}, \code{n_samples} corresponds to the number of samples
+from the leaf node (see an exception related to the \code{sample} argument).
+For \code{approach="empirical"}, \code{n_samples} is  the \eqn{K} parameter in equations (14-15) of
+Aas et al. (2021), i.e. the maximum number of observations (with largest weights) that is used, see also the
+\code{w_threshold} argument.
 }
 \examples{
 if (requireNamespace("MASS", quietly = TRUE)) {
diff --git a/man/observation_impute.Rd b/man/observation_impute.Rd
index a9a40ab03..dd48d6c1f 100644
--- a/man/observation_impute.Rd
+++ b/man/observation_impute.Rd
@@ -29,6 +29,9 @@ the total number of unique features, respectively. Note that \code{m = ncol(x_tr
 of the total empirical weight that data samples should use. If e.g. \code{w_threshold = .8} we will choose the
 \code{K} samples with the largest weight so that the sum of the weights accounts for 80\% of the total weight.
 \code{w_threshold} is the \eqn{\eta} parameter in equation (15) of Aas et al (2021).}
+
+\item{n_samples}{Positive integer. Indicating the maximum number of samples to use in the
+Monte Carlo integration for every conditional expectation. See also details.}
 }
 \value{
 data.table
diff --git a/man/prepare_data.Rd b/man/prepare_data.Rd
index 49977759a..15a1f07e9 100644
--- a/man/prepare_data.Rd
+++ b/man/prepare_data.Rd
@@ -10,23 +10,15 @@
 \usage{
 prepare_data(x, ...)
 
-\method{prepare_data}{empirical}(x, seed = 1, n_samples = 1000, index_features = NULL, ...)
+\method{prepare_data}{empirical}(x, seed = 1, index_features = NULL, ...)
 
-\method{prepare_data}{gaussian}(x, seed = 1, n_samples = 1000, index_features = NULL, ...)
+\method{prepare_data}{gaussian}(x, seed = 1, index_features = NULL, ...)
 
-\method{prepare_data}{copula}(
-  x,
-  x_test_gaussian = 1,
-  seed = 1,
-  n_samples = 1000,
-  index_features = NULL,
-  ...
-)
+\method{prepare_data}{copula}(x, x_test_gaussian = 1, seed = 1, index_features = NULL, ...)
 
 \method{prepare_data}{ctree}(
   x,
   seed = 1,
-  n_samples = 1000,
   index_features = NULL,
   mc_cores = 1,
   mc_cores_create_ctree = mc_cores,
@@ -41,9 +33,6 @@ prepare_data(x, ...)
 
 \item{seed}{Positive integer. If \code{NULL} the seed will be inherited from the calling environment.}
 
-\item{n_samples}{Integer. The number of obs to sample from the leaf if \code{sample} = TRUE or if \code{sample}
-= FALSE but \code{n_samples} is less than the number of obs in the leaf.}
-
 \item{index_features}{List. Default is NULL but if either various methods are being used or various mincriterion are
 used for different numbers of conditioned features, this will be a list with the features to pass.}
 

From 153b66fd894c08c6f5cc47c6121b172e631c55d8 Mon Sep 17 00:00:00 2001
From: Martin <jullum@nr.no>
Date: Tue, 10 Aug 2021 14:20:12 +0200
Subject: [PATCH 05/16] more

---
 R/observations.R | 29 ++++++++++++-----------------
 1 file changed, 12 insertions(+), 17 deletions(-)

diff --git a/R/observations.R b/R/observations.R
index 5df93d570..8ba92bced 100644
--- a/R/observations.R
+++ b/R/observations.R
@@ -71,9 +71,6 @@ observation_impute <- function(W_kernel, S, x_train, x_test, w_threshold = .7, n
 #'
 #' @param x Explainer object. See \code{\link{explain}} for more information.
 #'
-#' @param n_samples Positive integer. Indicating the maximum number of samples to use in the
-#' Monte Carlo integration for every conditional expectation.
-#'
 #' @param seed Positive integer. If \code{NULL} the seed will be inherited from the calling environment.
 #'
 #' @param index_features Positive integer vector. Specifies the indices of combinations to apply to the present method.
@@ -94,7 +91,7 @@ prepare_data <- function(x, ...) {
 
 #' @rdname prepare_data
 #' @export
-prepare_data.empirical <- function(x, seed = 1, n_samples = 1e3, index_features = NULL, ...) {
+prepare_data.empirical <- function(x, seed = 1, index_features = NULL, ...) {
   id <- id_combination <- w <- NULL # due to NSE notes in R CMD check
 
   # Get distance matrix ----------------
@@ -158,7 +155,7 @@ prepare_data.empirical <- function(x, seed = 1, n_samples = 1e3, index_features
       x_train = as.matrix(x$x_train),
       x_test = x$x_test[i, , drop = FALSE],
       w_threshold = x$w_threshold,
-      n_samples = n_samples
+      n_samples = x$n_samples
     )
 
     dt_l[[i]][, id := i]
@@ -171,7 +168,7 @@ prepare_data.empirical <- function(x, seed = 1, n_samples = 1e3, index_features
 
 #' @rdname prepare_data
 #' @export
-prepare_data.gaussian <- function(x, seed = 1, n_samples = 1e3, index_features = NULL, ...) {
+prepare_data.gaussian <- function(x, seed = 1, index_features = NULL, ...) {
   id <- id_combination <- w <- NULL # due to NSE notes in R CMD check
 
   n_xtest <- nrow(x$x_test)
@@ -187,7 +184,7 @@ prepare_data.gaussian <- function(x, seed = 1, n_samples = 1e3, index_features =
     l <- lapply(
       X = features,
       FUN = sample_gaussian,
-      n_samples = n_samples,
+      n_samples = x$n_samples,
       mu = x$mu,
       cov_mat = x$cov_mat,
       m = ncol(x$x_test),
@@ -195,7 +192,7 @@ prepare_data.gaussian <- function(x, seed = 1, n_samples = 1e3, index_features =
     )
 
     dt_l[[i]] <- data.table::rbindlist(l, idcol = "id_combination")
-    dt_l[[i]][, w := 1 / n_samples]
+    dt_l[[i]][, w := 1 / x$n_samples]
     dt_l[[i]][, id := i]
     if (!is.null(index_features)) dt_l[[i]][, id_combination := index_features[id_combination]]
   }
@@ -206,7 +203,7 @@ prepare_data.gaussian <- function(x, seed = 1, n_samples = 1e3, index_features =
 
 #' @rdname prepare_data
 #' @export
-prepare_data.copula <- function(x, x_test_gaussian = 1, seed = 1, n_samples = 1e3, index_features = NULL, ...) {
+prepare_data.copula <- function(x, x_test_gaussian = 1, seed = 1, index_features = NULL, ...) {
   id <- id_combination <- w <- NULL # due to NSE notes in R CMD check
   n_xtest <- nrow(x$x_test)
   dt_l <- list()
@@ -221,7 +218,7 @@ prepare_data.copula <- function(x, x_test_gaussian = 1, seed = 1, n_samples = 1e
     l <- lapply(
       X = features,
       FUN = sample_copula,
-      n_samples = n_samples,
+      n_samples = x$n_samples,
       mu = x$mu,
       cov_mat = x$cov_mat,
       m = ncol(x$x_test),
@@ -231,7 +228,7 @@ prepare_data.copula <- function(x, x_test_gaussian = 1, seed = 1, n_samples = 1e
     )
 
     dt_l[[i]] <- data.table::rbindlist(l, idcol = "id_combination")
-    dt_l[[i]][, w := 1 / n_samples]
+    dt_l[[i]][, w := 1 / x$n_samples]
     dt_l[[i]][, id := i]
     if (!is.null(index_features)) dt_l[[i]][, id_combination := index_features[id_combination]]
   }
@@ -239,9 +236,7 @@ prepare_data.copula <- function(x, x_test_gaussian = 1, seed = 1, n_samples = 1e
   return(dt)
 }
 
-#' @param n_samples Integer. The number of obs to sample from the leaf if \code{sample} = TRUE or if \code{sample}
-#' = FALSE but \code{n_samples} is less than the number of obs in the leaf.
-#'
+
 #' @param index_features List. Default is NULL but if either various methods are being used or various mincriterion are
 #' used for different numbers of conditioned features, this will be a list with the features to pass.
 #'
@@ -258,7 +253,7 @@ prepare_data.copula <- function(x, x_test_gaussian = 1, seed = 1, n_samples = 1e
 #'
 #' @rdname prepare_data
 #' @export
-prepare_data.ctree <- function(x, seed = 1, n_samples = 1e3, index_features = NULL,
+prepare_data.ctree <- function(x, seed = 1, index_features = NULL,
                                mc_cores = 1, mc_cores_create_ctree = mc_cores,
                                mc_cores_sample_ctree = mc_cores, ...) {
   id <- id_combination <- w <- NULL # due to NSE notes in R CMD check
@@ -290,7 +285,7 @@ prepare_data.ctree <- function(x, seed = 1, n_samples = 1e3, index_features = NU
     l <- parallel::mclapply(
       X = all_trees,
       FUN = sample_ctree,
-      n_samples = n_samples,
+      n_samples = x$n_samples,
       x_test = x$x_test[i, , drop = FALSE],
       x_train = x$x_train,
       p = ncol(x$x_test),
@@ -300,7 +295,7 @@ prepare_data.ctree <- function(x, seed = 1, n_samples = 1e3, index_features = NU
     )
 
     dt_l[[i]] <- data.table::rbindlist(l, idcol = "id_combination")
-    dt_l[[i]][, w := 1 / n_samples]
+    dt_l[[i]][, w := 1 / x$n_samples]
     dt_l[[i]][, id := i]
     if (!is.null(index_features)) dt_l[[i]][, id_combination := index_features[id_combination]]
   }

From bb535e148e1a0d98019dfa190d7e21c0c3130d7a Mon Sep 17 00:00:00 2001
From: Martin <jullum@nr.no>
Date: Tue, 10 Aug 2021 14:25:24 +0200
Subject: [PATCH 06/16] bug

---
 R/explanation.R | 2 +-
 man/explain.Rd  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/explanation.R b/R/explanation.R
index e8cd237f3..649d8cfe7 100644
--- a/R/explanation.R
+++ b/R/explanation.R
@@ -458,7 +458,7 @@ get_list_approaches <- function(n_features, approach) {
 #'
 #' @export
 explain.ctree_comb_mincrit <- function(x, explainer, approach,
-                                       prediction_zero, mincriterion, ...) {
+                                       prediction_zero, n_samples, mincriterion, ...) {
 
   # Get indices of combinations
   l <- get_list_ctree_mincrit(explainer$X$n_features, mincriterion)
diff --git a/man/explain.Rd b/man/explain.Rd
index 3e4d1c67f..2491332e9 100644
--- a/man/explain.Rd
+++ b/man/explain.Rd
@@ -64,7 +64,7 @@ explain(x, explainer, approach, prediction_zero, n_samples = 1000, ...)
   ...
 )
 
-\method{explain}{ctree_comb_mincrit}(x, explainer, approach, prediction_zero, mincriterion, ...)
+\method{explain}{ctree_comb_mincrit}(x, explainer, approach, prediction_zero, n_samples, mincriterion, ...)
 }
 \arguments{
 \item{x}{A matrix or data.frame. Contains the the features, whose

From d14b3ddf04bf807fef5ddc3bf7ed67e39a392899 Mon Sep 17 00:00:00 2001
From: Martin <jullum@nr.no>
Date: Tue, 10 Aug 2021 14:48:14 +0200
Subject: [PATCH 07/16] Add "Success with message:" to all messages for clarity

---
 R/features.R        |  3 ++-
 R/preprocess_data.R | 15 +++++++++++----
 2 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/R/features.R b/R/features.R
index c51ae9ab0..d120917f3 100644
--- a/R/features.R
+++ b/R/features.R
@@ -73,7 +73,8 @@ feature_combinations <- function(m, exact = TRUE, n_combinations = 200, weight_z
     exact <- TRUE
     message(
       paste0(
-        "\nn_combinations is larger than or equal to 2^m = ", 2^m, ". \n",
+        "\nSuccess with message:\n",
+        "n_combinations is larger than or equal to 2^m = ", 2^m, ". \n",
         "Using exact instead."
       )
     )
diff --git a/R/preprocess_data.R b/R/preprocess_data.R
index 746a575c6..49283972f 100644
--- a/R/preprocess_data.R
+++ b/R/preprocess_data.R
@@ -156,6 +156,7 @@ check_features <- function(f_list_1, f_list_2,
   }
   if (NULL_1 & use_1_as_truth) {
     message(paste0(
+      "\nSuccess with message:\n",
       "The specified ", name_1, " provides NULL feature labels. ",
       "The labels of ", name_2, " are taken as the truth."
     ))
@@ -170,6 +171,7 @@ check_features <- function(f_list_1, f_list_2,
   }
   if ((NA_1 & use_1_as_truth)) {
     message(paste0(
+      "\nSuccess with message:\n",
       "The specified ", name_1, " provides feature labels that are NA. ",
       "The labels of ", name_2, " are taken as the truth."
     ))
@@ -245,6 +247,7 @@ check_features <- function(f_list_1, f_list_2,
   #### Checking classes ####
   if (any(is.na(f_list_1$classes)) & use_1_as_truth) { # Only relevant when f_list_1 is a model
     message(paste0(
+      "\nSuccess with message:\n",
       "The specified ", name_1, " provides feature classes that are NA. ",
       "The classes of ", name_2, " are taken as the truth."
     ))
@@ -272,6 +275,7 @@ check_features <- function(f_list_1, f_list_2,
     is_NULL <- any(is.null(relevant_factor_levels))
     if ((is_NA | is_NULL) & use_1_as_truth) {
       message(paste0(
+        "\nSuccess with message:\n",
         "The specified ", name_1, " provides factor feature levels that are NULL or NA. ",
         "The factor levels of ", name_2, " are taken as the truth."
       ))
@@ -330,9 +334,9 @@ update_data <- function(data, updater) {
   # Reorder and delete unused columns
   cnms_remove <- setdiff(colnames(data), new_labels)
   if (length(cnms_remove) > 0) {
-    message(
-      paste0(
-        "The columns(s) ",
+    message(paste0(
+      "\nSuccess with message:\n",
+      "The columns(s) ",
         paste0(cnms_remove, collapse = ", "),
         " is not used by the model and thus removed from the data."
       )
@@ -348,6 +352,7 @@ update_data <- function(data, updater) {
     if (any(!identical_levels)) {
       changed_levels <- which(!identical_levels)
       message(paste0(
+        "\nSuccess with message:\n",
         "Levels are reordered for the factor feature(s) ",
         paste0(new_labels[changed_levels], collapse = ", "), "."
       ))
@@ -383,7 +388,9 @@ process_groups <- function(group, feature_labels) {
 
   # Make group names if not existing
   if (is.null(names(group))) {
-    message("Group names not provided. Assigning them the default names 'group1', 'group2', 'group3' etc.")
+    message(
+      "\nSuccess with message:\n
+      Group names not provided. Assigning them the default names 'group1', 'group2', 'group3' etc.")
     names(group) <- paste0("group", seq_along(group))
   }
 

From bf2d2e69fde3b11c90069df622dfb83688379cd4 Mon Sep 17 00:00:00 2001
From: Martin <jullum@nr.no>
Date: Tue, 10 Aug 2021 15:02:26 +0200
Subject: [PATCH 08/16] update GHA cmd checking

---
 .github/workflows/R-CMD-check.yaml | 64 ++++++++++++++----------------
 1 file changed, 29 insertions(+), 35 deletions(-)

diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml
index 5e10ea6be..8818274f0 100644
--- a/.github/workflows/R-CMD-check.yaml
+++ b/.github/workflows/R-CMD-check.yaml
@@ -1,12 +1,17 @@
+# For help debugging build failures open an issue on the RStudio community with the 'github-actions' tag.
+# https://community.rstudio.com/new-topic?category=Package%20development&tags=github-actions
+# This is slightly modified verison of this https://github.com/r-lib/actions/blob/master/examples/check-full.yaml
+(added aug 10th 2021)
+
 on:
   push:
     branches:
+      - main
       - master
-      - cranversion
   pull_request:
     branches:
+      - main
       - master
-      - cranversion
 
 name: R-CMD-check
 
@@ -20,12 +25,12 @@ jobs:
       fail-fast: false
       matrix:
         config:
-          - {os: macOS-latest,   r: 'release'}
-          - {os: macOS-latest,   r: '4.0'}
-          - {os: windows-latest, r: '4.0'}
-          - {os: ubuntu-16.04,   r: '4.0', rspm: "https://packagemanager.rstudio.com/cran/__linux__/xenial/latest"}
-          - {os: ubuntu-16.04,   r: '3.6', rspm: "https://packagemanager.rstudio.com/cran/__linux__/xenial/latest"}
-          - {os: ubuntu-16.04,   r: '3.5', rspm: "https://packagemanager.rstudio.com/cran/__linux__/xenial/latest"}
+          - {os: windows-latest, r: 'release'}
+          - {os: macOS-latest, r: 'release'}
+          - {os: ubuntu-20.04, r: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"}
+          - {os: ubuntu-20.04,   r: 'devel', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest", http-user-agent: "R/4.1.0 (ubuntu-20.04) R (4.1.0 x86_64-pc-linux-gnu x86_64 linux-gnu) on GitHub Actions" }
+          - {os: ubuntu-20.04,   r: 'oldrel', rspm: "https://packagemanager.rstudio.com/cran/__linux__/bionic/latest"} # Added manually by martinju
+          - {os: ubuntu-20.04,   r: '3.6', rspm: "https://packagemanager.rstudio.com/cran/__linux__/xenial/latest"} # Added manually by martinju, remove some time in 2022
 
     env:
       R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
@@ -35,34 +40,33 @@ jobs:
     steps:
       - uses: actions/checkout@v2
 
-      - uses: r-lib/actions/setup-r@master
+      - uses: r-lib/actions/setup-r@v1
         with:
           r-version: ${{ matrix.config.r }}
 
-      - uses: r-lib/actions/setup-pandoc@master
+      - uses: r-lib/actions/setup-pandoc@v1
 
       - name: Query dependencies
         run: |
           install.packages('remotes')
           saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2)
+          writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version")
         shell: Rscript {0}
 
-      - name: Cache R packages
-        if: runner.os != 'Windows'
-        uses: actions/cache@v1
+      - name: Restore R package cache
+        uses: actions/cache@v2
         with:
           path: ${{ env.R_LIBS_USER }}
-          key: ${{ runner.os }}-r-${{ matrix.config.r }}-1-${{ hashFiles('.github/depends.Rds') }}
-          restore-keys: ${{ runner.os }}-r-${{ matrix.config.r }}-1-
+          key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }}
+          restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-
 
       - name: Install system dependencies
         if: runner.os == 'Linux'
-        env:
-          RHUB_PLATFORM: linux-x86_64-ubuntu-gcc
         run: |
-          Rscript -e "remotes::install_github('r-hub/sysreqs')"
-          sysreqs=$(Rscript -e "cat(sysreqs::sysreq_commands('DESCRIPTION'))")
-          sudo -s eval "$sysreqs"
+          while read -r cmd
+          do
+            eval sudo $cmd
+          done < <(Rscript -e 'writeLines(remotes::system_requirements("ubuntu", "20.04"))')
 
       - name: Install dependencies
         run: |
@@ -70,27 +74,17 @@ jobs:
           remotes::install_cran("rcmdcheck")
         shell: Rscript {0}
 
-      - name: Session info
-        run: |
-          options(width = 100)
-          pkgs <- installed.packages()[, "Package"]
-          sessioninfo::session_info(pkgs, include_base = TRUE)
-        shell: Rscript {0}
-
       - name: Check
         env:
-          _R_CHECK_CRAN_INCOMING_: false
-        run: rcmdcheck::rcmdcheck(args = c("--no-manual", "--as-cran"), error_on = "warning", check_dir = "check")
+          _R_CHECK_CRAN_INCOMING_REMOTE_: false
+        run: |
+          options(crayon.enabled = TRUE)
+          rcmdcheck::rcmdcheck(args = c("--no-manual", "--as-cran"), error_on = "warning", check_dir = "check")
         shell: Rscript {0}
 
-      - name: Show testthat output
-        if: always()
-        run: find check -name 'testthat.Rout*' -exec cat '{}' \; || true
-        shell: bash
-
       - name: Upload check results
         if: failure()
-        uses: actions/upload-artifact@master
+        uses: actions/upload-artifact@main
         with:
           name: ${{ runner.os }}-r${{ matrix.config.r }}-results
           path: check

From cafe69fbd7cce74cba738bbda2d5fa8df7e2830b Mon Sep 17 00:00:00 2001
From: Martin <jullum@nr.no>
Date: Tue, 10 Aug 2021 15:08:29 +0200
Subject: [PATCH 09/16] bugfix

---
 .github/workflows/R-CMD-check.yaml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml
index 8818274f0..0fc731373 100644
--- a/.github/workflows/R-CMD-check.yaml
+++ b/.github/workflows/R-CMD-check.yaml
@@ -1,17 +1,17 @@
 # For help debugging build failures open an issue on the RStudio community with the 'github-actions' tag.
 # https://community.rstudio.com/new-topic?category=Package%20development&tags=github-actions
 # This is slightly modified verison of this https://github.com/r-lib/actions/blob/master/examples/check-full.yaml
-(added aug 10th 2021)
+# (added aug 10th 2021)
 
 on:
   push:
     branches:
-      - main
       - master
+      - cranversion
   pull_request:
     branches:
-      - main
       - master
+      -cranversion
 
 name: R-CMD-check
 

From e0941ea0bcea08631b9fcc97b66898037f69e019 Mon Sep 17 00:00:00 2001
From: Martin <jullum@nr.no>
Date: Tue, 10 Aug 2021 15:10:22 +0200
Subject: [PATCH 10/16] .

---
 .github/workflows/R-CMD-check.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml
index 0fc731373..1f93255ab 100644
--- a/.github/workflows/R-CMD-check.yaml
+++ b/.github/workflows/R-CMD-check.yaml
@@ -11,7 +11,7 @@ on:
   pull_request:
     branches:
       - master
-      -cranversion
+      - cranversion
 
 name: R-CMD-check
 

From 2a6e60d3c8afa03c507e14868503ba83979b4074 Mon Sep 17 00:00:00 2001
From: Martin <jullum@nr.no>
Date: Tue, 10 Aug 2021 15:30:43 +0200
Subject: [PATCH 11/16] adding no-vignette version of R-CMD-check

---
 .../workflows/R-CMD-check-no-vignette.yaml    | 89 +++++++++++++++++++
 1 file changed, 89 insertions(+)
 create mode 100644 .github/workflows/R-CMD-check-no-vignette.yaml

diff --git a/.github/workflows/R-CMD-check-no-vignette.yaml b/.github/workflows/R-CMD-check-no-vignette.yaml
new file mode 100644
index 000000000..e74ab31b8
--- /dev/null
+++ b/.github/workflows/R-CMD-check-no-vignette.yaml
@@ -0,0 +1,89 @@
+# This is duplicate of R-CMD-check, but with arguments skipping the building of vignettes which often tends to fail
+# on older R versions.
+
+on:
+  push:
+    branches:
+      - master
+      - cranversion
+  pull_request:
+    branches:
+      - master
+      - cranversion
+
+name: R-CMD-check-no-vignette
+
+jobs:
+  R-CMD-check:
+    runs-on: ${{ matrix.config.os }}
+
+    name: ${{ matrix.config.os }} (${{ matrix.config.r }})
+
+    strategy:
+      fail-fast: false
+      matrix:
+        config:
+          - {os: windows-latest, r: 'release'}
+          - {os: macOS-latest, r: 'release'}
+          - {os: ubuntu-20.04, r: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"}
+          - {os: ubuntu-20.04,   r: 'devel', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest", http-user-agent: "R/4.1.0 (ubuntu-20.04) R (4.1.0 x86_64-pc-linux-gnu x86_64 linux-gnu) on GitHub Actions" }
+          - {os: ubuntu-20.04,   r: 'oldrel', rspm: "https://packagemanager.rstudio.com/cran/__linux__/bionic/latest"} # Added manually by martinju
+          - {os: ubuntu-20.04,   r: '3.6', rspm: "https://packagemanager.rstudio.com/cran/__linux__/xenial/latest"} # Added manually by martinju, remove some time in 2022
+
+    env:
+      R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
+      RSPM: ${{ matrix.config.rspm }}
+      GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
+
+    steps:
+      - uses: actions/checkout@v2
+
+      - uses: r-lib/actions/setup-r@v1
+        with:
+          r-version: ${{ matrix.config.r }}
+
+      - uses: r-lib/actions/setup-pandoc@v1
+
+      - name: Query dependencies
+        run: |
+          install.packages('remotes')
+          saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2)
+          writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version")
+        shell: Rscript {0}
+
+      - name: Restore R package cache
+        uses: actions/cache@v2
+        with:
+          path: ${{ env.R_LIBS_USER }}
+          key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }}
+          restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-
+
+      - name: Install system dependencies
+        if: runner.os == 'Linux'
+        run: |
+          while read -r cmd
+          do
+            eval sudo $cmd
+          done < <(Rscript -e 'writeLines(remotes::system_requirements("ubuntu", "20.04"))')
+
+      - name: Install dependencies
+        run: |
+          remotes::install_deps(dependencies = TRUE)
+          remotes::install_cran("rcmdcheck")
+        shell: Rscript {0}
+
+      - name: Check
+        env:
+          _R_CHECK_CRAN_INCOMING_REMOTE_: false
+        run: |
+          options(crayon.enabled = TRUE)
+          rcmdcheck::rcmdcheck(args = c("--no-manual", "--ignore-vignettes", "--no-build-vignettes"),
+          build_args = c("--no-manual", "--ignore-vignettes", "--no-build-vignettes"), error_on = "warning", check_dir = "check")
+        shell: Rscript {0}
+
+      - name: Upload check results
+        if: failure()
+        uses: actions/upload-artifact@main
+        with:
+          name: ${{ runner.os }}-r${{ matrix.config.r }}-results
+          path: check

From ecb7970c41aee4bda6ee32e1fc67563a0910ddcc Mon Sep 17 00:00:00 2001
From: Martin <jullum@nr.no>
Date: Tue, 10 Aug 2021 15:59:01 +0200
Subject: [PATCH 12/16] smoother version

just running the manually added version without vignette, the others the standard way
---
 .github/workflows/R-CMD-check-no-vignette.yaml | 8 ++++----
 .github/workflows/R-CMD-check.yaml             | 4 ++--
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/R-CMD-check-no-vignette.yaml b/.github/workflows/R-CMD-check-no-vignette.yaml
index e74ab31b8..5f5e261bd 100644
--- a/.github/workflows/R-CMD-check-no-vignette.yaml
+++ b/.github/workflows/R-CMD-check-no-vignette.yaml
@@ -23,10 +23,10 @@ jobs:
       fail-fast: false
       matrix:
         config:
-          - {os: windows-latest, r: 'release'}
-          - {os: macOS-latest, r: 'release'}
-          - {os: ubuntu-20.04, r: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"}
-          - {os: ubuntu-20.04,   r: 'devel', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest", http-user-agent: "R/4.1.0 (ubuntu-20.04) R (4.1.0 x86_64-pc-linux-gnu x86_64 linux-gnu) on GitHub Actions" }
+#          - {os: windows-latest, r: 'release'}
+#          - {os: macOS-latest, r: 'release'}
+#          - {os: ubuntu-20.04, r: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"}
+#          - {os: ubuntu-20.04,   r: 'devel', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest", http-user-agent: "R/4.1.0 (ubuntu-20.04) R (4.1.0 x86_64-pc-linux-gnu x86_64 linux-gnu) on GitHub Actions" }
           - {os: ubuntu-20.04,   r: 'oldrel', rspm: "https://packagemanager.rstudio.com/cran/__linux__/bionic/latest"} # Added manually by martinju
           - {os: ubuntu-20.04,   r: '3.6', rspm: "https://packagemanager.rstudio.com/cran/__linux__/xenial/latest"} # Added manually by martinju, remove some time in 2022
 
diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml
index 1f93255ab..a0952ddd4 100644
--- a/.github/workflows/R-CMD-check.yaml
+++ b/.github/workflows/R-CMD-check.yaml
@@ -29,8 +29,8 @@ jobs:
           - {os: macOS-latest, r: 'release'}
           - {os: ubuntu-20.04, r: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"}
           - {os: ubuntu-20.04,   r: 'devel', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest", http-user-agent: "R/4.1.0 (ubuntu-20.04) R (4.1.0 x86_64-pc-linux-gnu x86_64 linux-gnu) on GitHub Actions" }
-          - {os: ubuntu-20.04,   r: 'oldrel', rspm: "https://packagemanager.rstudio.com/cran/__linux__/bionic/latest"} # Added manually by martinju
-          - {os: ubuntu-20.04,   r: '3.6', rspm: "https://packagemanager.rstudio.com/cran/__linux__/xenial/latest"} # Added manually by martinju, remove some time in 2022
+#          - {os: ubuntu-20.04,   r: 'oldrel', rspm: "https://packagemanager.rstudio.com/cran/__linux__/bionic/latest"} # Added manually by martinju
+#          - {os: ubuntu-20.04,   r: '3.6', rspm: "https://packagemanager.rstudio.com/cran/__linux__/xenial/latest"} # Added manually by martinju, remove some time in 2022
 
     env:
       R_REMOTES_NO_ERRORS_FROM_WARNINGS: true

From 2480134476a953ff9c1f23f3928b75b1c1adf94a Mon Sep 17 00:00:00 2001
From: Martin <jullum@nr.no>
Date: Fri, 13 Aug 2021 13:57:26 +0200
Subject: [PATCH 13/16] testing lint.yaml updating

---
 .github/workflows/lint.yaml | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml
index 92a57554d..41c78890b 100644
--- a/.github/workflows/lint.yaml
+++ b/.github/workflows/lint.yaml
@@ -18,27 +18,32 @@ jobs:
     steps:
       - uses: actions/checkout@v2
 
-      - uses: r-lib/actions/setup-r@master
+      - uses: r-lib/actions/setup-r@v1
 
       - name: Query dependencies
         run: |
           install.packages('remotes')
           saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2)
+          writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version")
         shell: Rscript {0}
 
-      - name: Cache R packages
-        uses: actions/cache@v1
+      - name: Restore R package cache
+        uses: actions/cache@v2
         with:
           path: ${{ env.R_LIBS_USER }}
-          key: macOS-r-4.0-1-${{ hashFiles('.github/depends.Rds') }}
-          restore-keys: macOS-r-4.0-1-
+          key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }}
+          restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-
 
       - name: Install dependencies
         run: |
-          remotes::install_deps(dependencies = TRUE, type = "binary")
-          remotes::install_cran("lintr", type = "binary")
+          install.packages(c("remotes"))
+          remotes::install_deps(dependencies = TRUE)
+          remotes::install_cran("lintr")
         shell: Rscript {0}
 
+      - name: Install package
+        run: R CMD INSTALL .
+
       - name: Lint
         run: lintr::lint_package()
         shell: Rscript {0}

From f419659d1e29ff63dfec5e7ff12dbb3a5d23a6a1 Mon Sep 17 00:00:00 2001
From: Martin <jullum@nr.no>
Date: Fri, 13 Aug 2021 14:04:34 +0200
Subject: [PATCH 14/16] linting

---
 R/explanation.R                   | 2 +-
 tests/testthat/test-explanation.R | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/R/explanation.R b/R/explanation.R
index 649d8cfe7..ce0ef04ff 100644
--- a/R/explanation.R
+++ b/R/explanation.R
@@ -144,7 +144,7 @@
 #'   )
 #'   print(explain_groups$dt)
 #' }
-explain <- function(x, explainer, approach, prediction_zero, n_samples = 1e3,...) {
+explain <- function(x, explainer, approach, prediction_zero, n_samples = 1e3, ...) {
   extras <- list(...)
 
   # Check input for x
diff --git a/tests/testthat/test-explanation.R b/tests/testthat/test-explanation.R
index c82584c41..ce2917838 100644
--- a/tests/testthat/test-explanation.R
+++ b/tests/testthat/test-explanation.R
@@ -33,8 +33,8 @@ test_that("Test functions in explanation.R", {
     # Test way to insert test data (shapr<v0.2.1 threw error for line 2-4 below on R>=4.0)
     expect_silent(explain(x_test, explainer, approach = "gaussian", prediction_zero = p0))
     expect_silent(explain(head(x_test), explainer, approach = "gaussian", prediction_zero = p0))
-    expect_silent(explain(x_test[,1:4], explainer, approach = "gaussian", prediction_zero = p0))
-    expect_silent(explain(x_test[1:2,], explainer, approach = "gaussian", prediction_zero = p0))
+    expect_silent(explain(x_test[, 1:4], explainer, approach = "gaussian", prediction_zero = p0))
+    expect_silent(explain(x_test[1:2, ], explainer, approach = "gaussian", prediction_zero = p0))
 
 
     # Creating list with lots of different explainer objects

From bd695769cef615f03b1dd9cb13441041084a4ba8 Mon Sep 17 00:00:00 2001
From: Martin Jullum <jullum@nr.no>
Date: Tue, 17 Aug 2021 08:27:02 +0200
Subject: [PATCH 15/16] Update R/explanation.R

Co-authored-by: Annabelle Redelmeier <aredelmeier@gmail.com>
---
 R/explanation.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/explanation.R b/R/explanation.R
index ce0ef04ff..5443195d6 100644
--- a/R/explanation.R
+++ b/R/explanation.R
@@ -23,7 +23,7 @@
 #' approaches for estimating the conditional distributions of the data, namely \code{"empirical"},
 #' \code{"gaussian"}, \code{"copula"} and \code{"ctree"}.
 #' In addition, the user also has the option of combining the four approaches.
-#' E.g. if you're in a situation where you have trained a model the consists of 10 features,
+#' E.g., if you're in a situation where you have trained a model that consists of 10 features,
 #' and you'd like to use the \code{"gaussian"} approach when you condition on a single feature,
 #' the \code{"empirical"} approach if you condition on 2-5 features, and \code{"copula"} version
 #' if you condition on more than 5 features this can be done by simply passing

From 277e15b2e63ac0e7d19cce6ec5f59e0eff3f35c6 Mon Sep 17 00:00:00 2001
From: Martin Jullum <jullum@nr.no>
Date: Tue, 17 Aug 2021 08:27:10 +0200
Subject: [PATCH 16/16] Update R/explanation.R

Co-authored-by: Annabelle Redelmeier <aredelmeier@gmail.com>
---
 R/explanation.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/explanation.R b/R/explanation.R
index 5443195d6..374aba5c1 100644
--- a/R/explanation.R
+++ b/R/explanation.R
@@ -28,7 +28,7 @@
 #' the \code{"empirical"} approach if you condition on 2-5 features, and \code{"copula"} version
 #' if you condition on more than 5 features this can be done by simply passing
 #' \code{approach = c("gaussian", rep("empirical", 4), rep("copula", 5))}. If
-#' \code{"approach[i]" = "gaussian"} it means that you'd like to use the \code{"gaussian"} approach
+#' \code{"approach[i]" = "gaussian"} means that you'd like to use the \code{"gaussian"} approach
 #' when conditioning on \code{i} features.
 #'
 #' For \code{approach="ctree"}, \code{n_samples} corresponds to the number of samples