egouldo · egouldo · Aug 29, 2024 · Aug 29, 2024 · Aug 29, 2024 · Aug 29, 2024
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: ManyEcoEvo
 Title: Meta-analyse data from 'Many-Analysts' style studies
-Version: 2.7.1
+Version: 2.7.2
 Authors@R: c(
     person("Elliot", "Gould", , "[email protected]", role = c("aut", "cre"),
            comment = c(ORCID = "https://orcid.org/0000-0002-6585-538X")),
@@ -27,7 +27,7 @@ Imports:
     betapart,
     cli,
     data.table,
-    dplyr,
+    dplyr (>= 1.1.4),
     forcats,
     fs,
     glue,
@@ -67,6 +67,6 @@ Remotes:
     daniel1noble/orchaRd,
     NightingaleHealth/ggforestplot
 Encoding: UTF-8
-LazyData: false
+LazyData: true
 Roxygen: list(markdown = TRUE)
 RoxygenNote: 7.3.2
diff --git a/NAMESPACE b/NAMESPACE
@@ -120,6 +120,7 @@ importFrom(EnvStats,stat_n_text)
 importFrom(NatParksPalettes,scale_color_natparks_d)
 importFrom(betapart,beta.pair)
 importFrom(broom,tidy)
+importFrom(broom.mixed,tidy)
 importFrom(cli,cli_abort)
 importFrom(cli,cli_alert)
 importFrom(cli,cli_alert_danger)

diff --git a/NEWS.md b/NEWS.md
@@ -1,7 +1,12 @@
-# ManyEcoEvo (development version)
+# ManyEcoEvo 2.7.2
 
 <!-- NEWS.md is maintained by https://cynkra.github.io/fledge, do not edit -->
 
+- separated column creation to occur under three conditions: NULL outcome_variable supplied, character string supplied, and expression argument supplied
+- separated subset creation to occur separately on results of conditional evaluation
+- Added conditional behaviour for when character vector supplied
+- feat!: added arg checks #116 and cli output for when this condition is triggered
+- explicitly supply `outcome_variable` and `outcome_SE` args for Zr
 - #118 docs: Add explanation about updated behaviour when `estimate_type` is missing in `ManyEcoEvo` dataframe
 - #118 build: devtools::document()
 

diff --git a/R/fit_uni_mixed_effects.R b/R/fit_uni_mixed_effects.R
@@ -1,31 +1,27 @@
 #' Fit model of Box-Cox transformed deviation scores as a function random-effects inclusion in analyses
-#' @description Fits a univariate glm of box-cox transformed absolute deviation from the meta-analytic mean scores as a function of whether the analysis was a mixed effects model \(i.e. included random effects\) or not.
-#'
-#' @param data Dataframe containing box-cox transformed absolute deviation scores and binary column called `mixed_model` describing whether or not the analysis used a mixed-effects model.
+#' @description Fits a univariate glm of Box-Cox transformed absolute deviation from the meta-analytic mean scores as a function of whether the analysis was a mixed effects model (i.e. included random effects) or not.
 #'
+#' @param data Dataframe containing Box-Cox transformed absolute deviation scores and binary column called `mixed_model` describing whether or not the analysis used a mixed-effects model.
+#' @param N threshold number of analyses in each predictor category for fitting model
 #' @return A fitted model object of class `glm` and `parsnip`
 #' @export
 #' @family Model fitting and meta-analysis
 #' @examples
 #' # library(tidyverse);library(targets);library(metafor);library(tidymodels)
 #' # tar_load(meta_analysis_outputs)
 #' # fit_uni_mixed_effects(meta_analysis_results$data[[1]])
-#' # Note: used tidymodels approach for dynamic outcome var selection
-#' # base R approach will be more succinct.
 #' @import dplyr
 #' @importFrom cli cli_h2 cli_warn cli_alert_warning
 #' @importFrom pointblank test_col_exists
 #' @importFrom recipes recipe update_role step_mutate step_naomit
 #' @importFrom parsnip fit linear_reg
 #' @importFrom workflows workflow add_model add_recipe extract_fit_parsnip
 #' @seealso [parsnip::details_linear_reg_glm] for details on the [parsnip::linear_reg] engine. 
-fit_uni_mixed_effects <- function(data) {
-
-  cli::cli_h2(c("Fitting glm for box-cox transformed outcome with inclusion of random effects (binary variable) as predictor"))
+fit_uni_mixed_effects <- function(data, N = 5) {
 
-  if (pointblank::test_col_exists(data,
-                                  columns = c("mixed_model", 
-                                              starts_with("box_cox_abs_")))) {
+  if (!pointblank::test_col_exists(data,
+                                   columns = c("mixed_model", 
+                                               starts_with("box_cox_abs_")))) {
 
     cli::cli_alert_warning(
       c("Columns {.var mixed_model} and ", 
@@ -39,13 +35,25 @@
 
   }  else if ( length(unique(data$mixed_model)) == 1) {
 
-    cli::cli_warn(message = "More than 1 unique value of {.var mixed_model} ",
+    cli::cli_warn(message = c("More than 1 unique value of {.var mixed_model} ",
                   "is needed to fit model with {.var mixed_model} ",
-                  "as predictor variable. Returning {.val {NA}}")
+                  "as predictor variable. Returning {.val {NA}}"))
+
+    return(NA)
 
+  } else if (!pointblank::test_col_vals_gte(data, 
+                                            columns = n, 
+                                            value = N, 
+                                            preconditions = \(x) count(x, mixed_model))) {
+
+    cli::cli_warn(message = "Less than {.arg N} = {.val {N}} observations in ",
+                  "each level of {.var mixed_model}. Returning {.val {NA}}.")
+    print(data %>% count(mixed_model))
     return(NA)
 
-  } else {
+  } else{
+
+
 
     data <- data %>%
       dplyr::select(dplyr::starts_with("box_cox_abs_"), 
@@ -58,9 +66,11 @@
      recipes::update_role(starts_with("box_cox_abs_"), new_role = "outcome") %>%
       recipes::step_mutate(mixed_model = as.factor(mixed_model)) %>%
       recipes::step_naomit()
 
+    cli::cli_h2(c("Fitting glm for Box-Cox transformed outcome with inclusion of random effects (binary variable) as predictor"))
+
     glm_mod <- parsnip::linear_reg(engine = "glm")
 
     fitted_mod <-
      workflows::workflow() %>%
      workflows::add_model(glm_mod) %>%
@@ -68,7 +78,7 @@
      parsnip::fit(data = data) %>%
      workflows::extract_fit_parsnip()
  }   

  return(fitted_mod)
 }


diff --git a/R/generate_outlier_subsets.R b/R/generate_outlier_subsets.R
@@ -62,21 +62,21 @@ generate_outlier_subsets <- function(data, outcome_variable = NULL, n_min = NULL
                         "estimate_type", 
                         "dataset")
 
-  #TODO consider switching to exprs instead of list as input
-  # see meta_analyse_datasets
-  if (!is.null(enexpr(ignore_subsets))) { 
-    ignore_subsets_columns <- rlang::call_args(enquo(ignore_subsets)) %>% 
-      map(rlang::f_lhs) %>% 
-      map(rlang::as_string) %>% 
-      list_c() %>% 
-      append(values = required_columns) %>% 
-      unique()
-  } else {
-    ignore_subsets_columns <- required_columns
+  if (rlang::is_list(ignore_subsets)) {
+    if (!all(map_lgl(ignore_subsets, rlang::is_call))) {
+      cli_abort("{.arg filter_vars} must be a list of calls")
+    } else {
+      required_columns <-  ignore_subsets %>% 
+        map(rlang::f_lhs) %>% 
+        map(rlang::as_string) %>% 
+        list_c() %>%
+        append(values = required_columns) %>% 
+        unique()
+    }
   }
 
   pointblank::expect_col_exists(data, 
-                                columns = ignore_subsets_columns)
+                                columns = required_columns)
 
   if (is.list(n_min)) {
     map(n_min, ~ {
@@ -125,59 +125,117 @@ generate_outlier_subsets <- function(data, outcome_variable = NULL, n_min = NULL
     formulae_match_n_max <- formulae_match(unique(data$dataset), n_min)
   }
 
-  matched_formulae <-  map(outcome_variable, ~ formulae_match(x = names(.x), y = .x))
 
-  # ----- Generate Outlier Subsets -----
   if (str_detect(data$estimate_type, "Zr") %>% any(na.rm = TRUE)) {
 
-    if (!is.null(enexpr(ignore_subsets))) {
-      filter_vars <- quos(estimate_type == "Zr", 
-                          !!!rlang::call_args(enquo(ignore_subsets)))
-    } else {
-      filter_vars <- quo(estimate_type == "Zr")
-    }
-
     data_Zr <- data %>%
       filter(estimate_type == "Zr")
+
+    # ---- assign outcome variables ----
+
+    if (rlang::is_null(outcome_variable)) {
+      # NULL value supplied 
+      outcome_variable <- "Zr"
+
+      cli::cli_alert_warning(
+        "Assigning default {.arg outcome_variable} = {.val {outcome_variable}}"
+        )
+
+      data_Zr <- data_Zr %>% 
+        ungroup %>% 
+        mutate(outcome_colname = outcome_variable)
+
+    } else if (rlang::is_character(outcome_variable)) {
+      # Single Value Supplied
+      stopifnot(length(outcome_variable) == 1)
+      pointblank::expect_col_exists(data_Zr, columns = {{outcome_variable}})
+      data_Zr <- data_Zr %>% 
+        ungroup %>% 
+        mutate(outcome_colname = outcome_variable)
+    } 
+    else{
+      # expression argument supplied
+      matched_formulae <-  map(outcome_variable, 
+                               ~ formulae_match(x = names(.x), y = .x))
+
+      data_Zr <- 
+        map2(
+          names(matched_formulae), 
+          matched_formulae,
+          .f = ~ map_match_formulae(data_Zr, .x, .y)) %>% 
+        bind_rows() %>% 
+        drop_na(outcome_colname)
+    }
 
-    data_Zr <- 
-      map2(names(matched_formulae), matched_formulae,
-           .f = ~ map_match_formulae(data_Zr, .x, .y)) %>% 
-      bind_rows() %>% 
-      drop_na(outcome_colname) %>% 
-      map_match_formulae(variable_name = "dataset", formulae_match_n_min, col_name = "n_min") %>%
-      map_match_formulae(variable_name = "dataset", formulae_match_n_max, col_name = "n_max") %>% 
+    # ----- Generate Outlier Subsets for Zr datasets -----
+
+    data_Zr <- data_Zr %>% 
+      map_match_formulae(variable_name = "dataset", 
+                         formulae_match_n_min, 
+                         col_name = "n_min") %>%
+      map_match_formulae(variable_name = "dataset", 
+                         formulae_match_n_max, 
+                         col_name = "n_max") %>% 
       apply_slice_conditionally(
         x = .,
-        filter_vars = filter_vars) %>% 
+        filter_vars = ignore_subsets) %>% 
       select(-outcome_colname, -n_min, -n_max)
+
   }
 
+  # ---- Generate Outlier Subsets for yi datasets -----
   if (str_detect(data$estimate_type, "y") %>% 
       any(na.rm = TRUE)) {
 
-    if (!is.null(enexpr(ignore_subsets))) {
-      filter_vars <- quos(str_detect(estimate_type, "y"), 
-                          !!!rlang::call_args(enquo(ignore_subsets)))
-    } else {
-      filter_vars <- quo(str_detect(estimate_type, "y"))
-    }
-
     data_yi <- data %>%
       filter(str_detect(estimate_type, "y"))
+
+    # ---- assign outcome variables ----
+
+    if (rlang::is_null(outcome_variable)) {
+      # NULL value supplied 
+      outcome_variable <- "Z"
+
+      cli::cli_alert_warning(
+        "Assigning default {.arg outcome_variable} = {.val {outcome_variable}}"
+      )
+
+      data_yi <- data_yi %>% 
+        ungroup %>% 
+        mutate(outcome_colname = outcome_variable)
+    } else if (rlang::is_character(outcome_variable)) {
+      # Single Value Supplied
+      stopifnot(length(outcome_variable) == 1)
+      pointblank::expect_col_exists(data_yi, columns = {{outcome_variable}})
+      data_yi <- data_yi %>% 
+        ungroup %>% 
+        mutate(outcome_colname = outcome_variable)
+    } else {
+      # expression argument supplied
+      matched_formulae <-  map(outcome_variable, 
+                               ~ formulae_match(x = names(.x), y = .x))
+
+      data_yi <-  
+        map2(
+          names(matched_formulae), 
+          matched_formulae,
+          .f = ~ map_match_formulae(data_yi, .x, .y)) %>% 
+        bind_rows() %>% 
+        drop_na(outcome_colname) 
+    }
 
-    data_yi <-  map2(
-      names(matched_formulae), 
-      matched_formulae,
-      .f = ~ map_match_formulae(data_yi, .x, .y)) %>% 
-      bind_rows() %>% 
-      drop_na(outcome_colname) %>% 
-      map_match_formulae(variable_name = "dataset", formulae_match_n_min, col_name = "n_min") %>%
-      map_match_formulae(variable_name = "dataset", formulae_match_n_max, col_name = "n_max") %>% 
+    data_yi <- data_yi %>% 
+      map_match_formulae(variable_name = "dataset", 
+                         formulae_match_n_min, 
+                         col_name = "n_min") %>%
+      map_match_formulae(variable_name = "dataset", 
+                         formulae_match_n_max, 
+                         col_name = "n_max") %>% 
       apply_slice_conditionally(
         x = .,
         filter_vars = filter_vars) %>% 
       select(-outcome_colname, -n_min, -n_max)
+
   }
 
   out <- if (exists(x = "data_Zr") & exists(x = "data_yi")) {
@@ -220,37 +278,37 @@ apply_slice_conditionally <- function(x, filter_vars){
                                if ("exclusion_set" %in% colnames(.)) {
                                  exclusion_set } 
                              else {"complete"}), {
-    x %>%
-      filter(!!!filter_vars) %>%
-      mutate(data = 
-               pmap(list(data, outcome_colname, n_min, n_max),
-                    .f = ~ slice_conditionally(..1, 
-                                               n_min = ..3, 
-                                               n_max = ..4, 
-                                               outcome_variable = ..2
-                    ))) %>%
-      mutate( exclusion_set = 
-                if ("exclusion_set" %in% colnames(.)) {
-                  paste0(exclusion_set, "-rm_outliers") } 
-              else {"complete-rm_outliers"},
-        data =
-          map2(
-            .x = data,
-            .y = data,
-            .f = ~ semi_join(.x, .y, 
-                             by = join_by(id_col)) %>% 
-              distinct()
-          ),
-        diversity_data =
-          map2(
-            .x = diversity_data,
-            .y = data,
-            .f = ~ semi_join(.x, .y, 
-                             by = join_by(id_col)) %>% 
-              distinct()
-          )
-      )
-  })
+                               x %>%
+                                 filter(!!!filter_vars) %>%
+                                 mutate(data = 
+                                          pmap(list(data, outcome_colname, n_min, n_max),
+                                               .f = ~ slice_conditionally(..1, 
+                                                                          n_min = ..3, 
+                                                                          n_max = ..4, 
+                                                                          outcome_variable = ..2
+                                               ))) %>%
+                                 mutate( exclusion_set = 
+                                           if ("exclusion_set" %in% colnames(.)) {
+                                             paste0(exclusion_set, "-rm_outliers") } 
+                                         else {"complete-rm_outliers"},
+                                         data =
+                                           map2(
+                                             .x = data,
+                                             .y = data,
+                                             .f = ~ semi_join(.x, .y, 
+                                                              by = join_by(id_col)) %>% 
+                                               distinct()
+                                           ),
+                                         diversity_data =
+                                           map2(
+                                             .x = diversity_data,
+                                             .y = data,
+                                             .f = ~ semi_join(.x, .y, 
+                                                              by = join_by(id_col)) %>% 
+                                               distinct()
+                                           )
+                                 )
+                             })
 
   return(out)
 }