Allow multiple sample sizes

* Allow input of multiple sample sizes (#81) * Use purrr and reformat output from run_simulations() Function now returns a list one-element deep for each iteration. The object in this list is a data frame containing run meta information and the associated p-values. * Set p-value tables to show results for all sample sizes Lists of data frames are row-bound into a single data table that can be displayed for the comparisons and group1/2 results. * Allow passing of multiple sample sizes New plot created Underlying functions expect tibbles now T1 error, T2 error/power calculating functions map over multiple sample sizes * Create function for plotting power Implement new plotting function in the UI and server of the Shiny app. * Suspend evaluation of vignette help docs * Remove and update package sample data Rerun the data simulation procedure with the formatting changes made to the run_simulations function, and deleted the data/*_formatted.rda files as they are no longer needed. * Clean up code and package for plotting functions * Match tests to new behaviors of functions Update output names of objects and select varaibles where needed * Fix note for non-visible binding by using .data * Remove unneeded packages and ggridges dependency Also add explicit calls to pkgload and markdown in the app.R file to ensure these packages are picked up by renv snapshots without including them in the Imports list. * Compress data files to smaller output size * Use LazyDataCompression in description file * Update renv lockfile
NeuroShepherd · Apr 21, 2024 · fda0f39 · fda0f39
1 parent 026dce5
commit fda0f39
Show file tree

Hide file tree

Showing 23 changed files with 293 additions and 175 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -19,7 +19,6 @@ Imports:
     glue,
     golem (>= 0.4.0),
     magrittr,
-    pkgload,
     purrr,
     rhandsontable,
     rlang,
@@ -30,10 +29,10 @@ Imports:
     shinydashboardPlus,
     stats,
     stringr,
-    tidyr,
-    markdown
+    tidyr
 Encoding: UTF-8
 LazyData: true
+LazyDataCompression: xz
 RoxygenNote: 7.2.3
 URL: https://github.com/NeuroShepherd/ordinalsimr, https://neuroshepherd.github.io/ordinalsimr/
 BugReports: https://github.com/NeuroShepherd/ordinalsimr/issues

diff --git a/NAMESPACE b/NAMESPACE
@@ -8,6 +8,7 @@ export(format_simulation_data)
 export(ordinal_tests)
 export(parse_ratio_text)
 export(plot_distribution_results)
+export(plot_power)
 export(run_app)
 export(run_simulations)
 import(assertthat)

diff --git a/R/data.R b/R/data.R
@@ -11,18 +11,6 @@
 
 "simulation_data_two_groups"
 
-#' Formatted Simulation Data for Two Groups
-#'
-#' Simulated p-values and metadata for a two group comparison in a tidy format. Useful for Type II error and power calculations.
-#'
-#' @format ## `simulation_data_two_groups_formatted`
-#' A list
-#' \describe{
-#'   \item{p_values}{A data frame of p-values from each run of each test}
-#'   \item{initial_groups}{A nested list with information for each simulation run}
-#' }
-
-"simulation_data_two_groups_formatted"
 
 #' Simulation Data for One Group
 #'
@@ -37,15 +25,3 @@
 
 "simulation_data_one_group"
 
-#' Formatted Simulation Data for One Group
-#'
-#' Simulated p-values and metadata for a two group comparison in a tidy format. Useful for Type I error calculations.
-#'
-#' @format ## `simulation_data_one_group_formatted`
-#' A list
-#' \describe{
-#'   \item{p_values}{A data frame of p-values from each run of each test}
-#'   \item{initial_groups}{A nested list with information for each simulation run}
-#' }
-
-"simulation_data_one_group_formatted"
diff --git a/R/mod_plot_distributions.R b/R/mod_plot_distributions.R
@@ -29,7 +29,14 @@ mod_plot_distributions_ui <- function(id){
     ),
     box(
       width = 9,
-      shinycssloaders::withSpinner(plotOutput(ns("distribution_plot_results")), type = 2,color.background = "#0275D8"),
+      tabsetPanel(type = "tabs",
+                  tabPanel("Power Plot",
+                           shinycssloaders::withSpinner(plotOutput(ns("power_plot")),
+                                                        type = 2,color.background = "#0275D8")),
+                  tabPanel("p-value Plot",
+                           shinycssloaders::withSpinner(plotOutput(ns("distribution_plot_results")),
+                                                        type = 2,color.background = "#0275D8"))
+        ),
       br(),
       tabsetPanel(type = "tabs",
                   tabPanel("Power and Type II Error",
@@ -53,22 +60,26 @@ mod_plot_distributions_server <- function(id, p_value_table, n){
 
     outlier_percent_removal <- reactive({ (100 - input$remove_outlier_percentage)/100 })
     p_val_threshold <- reactive({ input$user_p_val })
-    p_value_reactive_table <- reactive({ as.data.frame(p_value_table$comparison_results()$p_values) })
+    p_value_reactive_table <- reactive({ bind_rows(p_value_table$comparison_results()) })
 
 
     # COMPARISON STATISTICS/GRAPHING
     # !!!plot!!!
     distribution_plot <- reactive({
       p_value_reactive_table() %>%
+        dplyr::select(.data$wilcox:.data$coinasymp, .data$sample_size) %>%
         plot_distribution_results(outlier_removal = outlier_percent_removal(),
                                   alpha = p_val_threshold())})
+
     output$distribution_plot_results <- renderPlot({
       distribution_plot()
     })
 
 
     # !!!statistics!!!
-    distribution_statistics <- reactive({p_value_reactive_table() %>%
+    distribution_statistics <- reactive({
+      p_value_reactive_table() %>%
+      select(.data$wilcox:.data$coinasymp, .data$sample_size) %>%
       calculate_power_t2error(alpha = p_val_threshold(),
                               n = n(),
                               power_confidence_int = input$power_confidence_int)
@@ -77,34 +88,49 @@ mod_plot_distributions_server <- function(id, p_value_table, n){
       distribution_statistics() %>%
         select(-.data$lower_power_bound, -.data$upper_power_bound,
                -.data$lower_t2error_bound, -.data$upper_t2error_bound) %>%
+        arrange(desc(.data$power)) %>%
         rename(`Statistical Test` = .data$test,
                "Power (1-\U03B2)" = .data$power,
                "Type II Error (\U03B2)" = .data$t2_error) %>%
         DT::datatable() %>%
-        DT::formatRound(c(2,4), 5)
+        DT::formatRound(c(3,5), 5)
+    })
+
+
+    # Plot Power
+    output$power_plot <- renderPlot({
+      distribution_statistics() %>%
+        plot_power()
     })
 
 
     # GROUP 1 TYPE 1 ERROR
-    group1_t1_reactive_table <- reactive({ p_value_table$group1_results()$p_values %>%
-        as.data.frame() %>%
+    group1_t1_reactive_table <- reactive({
+      p_value_table$group1_results() %>%
+        bind_rows() %>%
+        dplyr::select(.data$wilcox:.data$coinasymp, .data$sample_size) %>%
+        group_by(.data$sample_size) %>%
         calculate_t1_error(t1_error_confidence_int = input$t1_error_group1_confidence_int)
       })
     output$t1_error_group1 <- DT::renderDataTable({
+      # browser()
       group1_t1_reactive_table() %>%
         select(-.data$lower_t1_bound, -.data$upper_t1_bound) %>%
         rename(`Statistical Test` = .data$test,
                "Type I Error (\U003B1)" = .data$t1_error) %>%
         DT::datatable() %>%
-        DT::formatRound(c(2), 5)
+        DT::formatRound(c(3), 5)
 
     })
 
 
 
     # GROUP 2 TYPE 1 ERROR
-    group2_t1_reactive_table <- reactive({ p_value_table$group2_results()$p_values %>%
-        as.data.frame() %>%
+    group2_t1_reactive_table <- reactive({
+      p_value_table$group2_results() %>%
+        bind_rows() %>%
+        dplyr::select(.data$wilcox:.data$coinasymp, .data$sample_size) %>%
+        group_by(.data$sample_size) %>%
         calculate_t1_error(t1_error_confidence_int = input$t1_error_group2_confidence_int)
       })
     output$t1_error_group2 <- DT::renderDataTable({
@@ -113,7 +139,7 @@ mod_plot_distributions_server <- function(id, p_value_table, n){
         rename(`Statistical Test` = .data$test,
                "Type I Error (\U003B1)" = .data$t1_error) %>%
         DT::datatable() %>%
-        DT::formatRound(c(2), 5)
+        DT::formatRound(c(3), 5)
     })
 
 

diff --git a/R/mod_sample_size.R b/R/mod_sample_size.R
@@ -10,8 +10,7 @@
 mod_sample_size_ui <- function(id){
   ns <- NS(id)
   tagList(
-    shiny::numericInput(ns("sample_n"), "Sample Size",
-                        value = 80, min = 1, max = Inf)
+    shiny::textInput(ns("sample_n"), "Sample Size", value = 80)
   )
 }
 
@@ -22,7 +21,13 @@ mod_sample_size_server <- function(id){
   moduleServer( id, function(input, output, session){
     ns <- session$ns
 
-    sample_n <- reactive({input$sample_n})
+    sample_n <- reactive({
+      # should technically set up checks that only a numeric vector is
+      # being passed before evaluating the expression (or, that the string
+      # being passed is comma-separated values with `:` and seq*()
+      # also being allowed.)
+      unique(eval(parse(text = paste0("c(", input$sample_n, ")") )))
+      })
 
     return(sample_n)
 

diff --git a/R/mod_stats_calculations.R b/R/mod_stats_calculations.R
@@ -87,28 +87,31 @@ mod_stats_calculations_server <- function(id, probability_data, sample_prob, ite
 
 
     output$results_table <- DT::renderDataTable(
-      DT::datatable(data = as.data.frame(comparison_results()$p_values),
-                    options = list(scrollX = TRUE)
-      ) %>%
-        DT::formatRound(1:7, digits = 5)
+      comparison_results() %>%
+        bind_rows() %>%
+        dplyr::select(.data$sample_size, .data$wilcox:.data$coinasymp) %>%
+        DT::datatable(options = list(scrollX = TRUE)) %>%
+        DT::formatRound(2:8, digits = 5)
     )
     outputOptions(output, "results_table", suspendWhenHidden = FALSE)
 
     # if not keeping these output tables, use observe({group1_results()}) to
     # ensure evaluation
     output$group1_pvalues <- DT::renderDataTable(
-      DT::datatable(data = as.data.frame(group1_results()$p_values),
-                    options = list(scrollX = TRUE)
-      ) %>%
-        DT::formatRound(1:7, digits = 5)
+      group1_results() %>%
+        bind_rows() %>%
+        dplyr::select(.data$sample_size, .data$wilcox:.data$coinasymp) %>%
+        DT::datatable(options = list(scrollX = TRUE)) %>%
+        DT::formatRound(2:8, digits = 5)
     )
     outputOptions(output, "group1_pvalues", suspendWhenHidden = FALSE)
 
     output$group2_pvalues <- DT::renderDataTable(
-      DT::datatable(data = as.data.frame(group2_results()$p_values),
-                    options = list(scrollX = TRUE)
-      ) %>%
-        DT::formatRound(1:7, digits = 5)
+      group2_results() %>%
+        bind_rows() %>%
+        dplyr::select(.data$sample_size, .data$wilcox:.data$coinasymp) %>%
+        DT::datatable(options = list(scrollX = TRUE)) %>%
+        DT::formatRound(2:8, digits = 5)
     )
     outputOptions(output, "group2_pvalues", suspendWhenHidden = FALSE)
 

diff --git a/R/run_simulations.R b/R/run_simulations.R
@@ -6,12 +6,19 @@
 #' @param prob0 Vector of probabilities for control group
 #' @param prob1 Vector of probabilities for intervention group
 #' @param niter Number of simulation iterations to complete#'
-#' @return list with elements `p_values` which is a matrix of p values for tests at each iteration, and `initial_groups` which is the group assignment information for each iteration
+#' @return a list of lists; sub-list elements include `p_values` which is a matrix of p values for tests at each iteration, and `initial_groups` which is the group assignment information for each iteration
 #'
 #' @import assertthat
 #'
 #' @export
 #'
+#' @examples
+#' run_simulations(sample_size = c(40,50,60),
+#' sample_prob = c(0.5,0.5),
+#' prob0 = c(0.1,0.2,0.3,0.4),
+#' prob1 = c(0.6,0.2,0.1,0.1),
+#' niter = 100)
+#'
 #'
 run_simulations <- function(sample_size, sample_prob, prob0, prob1, niter) {
 
@@ -28,18 +35,32 @@ run_simulations <- function(sample_size, sample_prob, prob0, prob1, niter) {
     "Prop. Odds", "Kruskal-Wallis", "Coin Indep. Test")
 
 
-  initial_groups <- list()
+  purrr::map(sample_size,
+             ~{
+               sample_size_nested <- .x
+               initial_groups <- purrr::map(1:niter, ~assign_groups(sample_size = sample_size_nested,
+                                                                    sample_prob = sample_prob,
+                                                                    prob0 = prob0, prob1 = prob1,
+                                                                    seed = .x)   )
+
+               p_values <- initial_groups %>%
+                 sapply(.,function(x) ordinal_tests(x[["x"]], x[["y"]])) %>%
+                 t()
+
+               initial_groups_formatted <- initial_groups %>%
+                 purrr::map_df(~tibble(y = list(.x[["y"]]), x = list(.x[["x"]]),
+                                n_null = .x[["n_null"]], n_intervene = .x[["n_intervene"]],
+                                sample_size = .x[["sample_size"]], K = .x[["K"]])
+                 ) %>%
+                 mutate(run = row_number(), .before = y)
 
-  for (i in 1:niter) {
-    initial_groups[[i]] <- assign_groups(sample_size = sample_size,
-                                         sample_prob = sample_prob,
-                                         prob0 = prob0, prob1 = prob1,
-                                         seed = i)
+               return( sim_results_table = bind_cols(p_values, initial_groups_formatted) )
+             },
 
-    p_values[i, ] <- ordinal_tests(x = initial_groups[[i]]$x,
-                                   y = initial_groups[[i]]$y)
-  }
+             .progress = list(caller = environment(),
+                              format = "Running {niter} iterations on {length(sample_size)} sample sizes. Progress: {cli::pb_bar} {cli::pb_percent} {cli::pb_eta}")
 
-  return(list(p_values = p_values, initial_groups = initial_groups))
+  ) %>%
+    purrr::set_names(glue::glue("sample_size_{sample_size}"))
 
 }