Skip to content

Commit

Permalink
Allow multiple sample sizes
Browse files Browse the repository at this point in the history
* Allow input of multiple sample sizes (#81)

* Use purrr and reformat output from run_simulations()

Function now returns a list one-element deep for each iteration. The object in this list is a data frame containing run meta information and the associated p-values.

* Set p-value tables to show results for all sample sizes

Lists of data frames are row-bound into a single data table that can be displayed for the comparisons and group1/2 results.

* Allow passing of multiple sample sizes

New plot created

Underlying functions expect tibbles now

T1 error, T2 error/power calculating functions map over multiple sample sizes

* Create function for plotting power

Implement new plotting function in the UI and server of the Shiny app.

* Suspend evaluation of vignette help docs

* Remove and update package sample data

Rerun the data simulation procedure with the formatting changes made to the run_simulations function, and deleted the data/*_formatted.rda files as they are no longer needed.

* Clean up code and package for plotting functions

* Match tests to new behaviors of functions

Update output names of objects and select varaibles where needed

* Fix note for non-visible binding by using .data

* Remove unneeded packages and ggridges dependency

Also add explicit calls to pkgload and markdown in the app.R file to ensure these packages are picked up by renv snapshots without including them in the Imports list.

* Compress data files to smaller output size

* Use LazyDataCompression in description file

* Update renv lockfile
  • Loading branch information
NeuroShepherd authored Apr 21, 2024
1 parent 026dce5 commit fda0f39
Show file tree
Hide file tree
Showing 23 changed files with 293 additions and 175 deletions.
5 changes: 2 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ Imports:
glue,
golem (>= 0.4.0),
magrittr,
pkgload,
purrr,
rhandsontable,
rlang,
Expand All @@ -30,10 +29,10 @@ Imports:
shinydashboardPlus,
stats,
stringr,
tidyr,
markdown
tidyr
Encoding: UTF-8
LazyData: true
LazyDataCompression: xz
RoxygenNote: 7.2.3
URL: https://github.com/NeuroShepherd/ordinalsimr, https://neuroshepherd.github.io/ordinalsimr/
BugReports: https://github.com/NeuroShepherd/ordinalsimr/issues
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ export(format_simulation_data)
export(ordinal_tests)
export(parse_ratio_text)
export(plot_distribution_results)
export(plot_power)
export(run_app)
export(run_simulations)
import(assertthat)
Expand Down
24 changes: 0 additions & 24 deletions R/data.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,18 +11,6 @@

"simulation_data_two_groups"

#' Formatted Simulation Data for Two Groups
#'
#' Simulated p-values and metadata for a two group comparison in a tidy format. Useful for Type II error and power calculations.
#'
#' @format ## `simulation_data_two_groups_formatted`
#' A list
#' \describe{
#' \item{p_values}{A data frame of p-values from each run of each test}
#' \item{initial_groups}{A nested list with information for each simulation run}
#' }

"simulation_data_two_groups_formatted"

#' Simulation Data for One Group
#'
Expand All @@ -37,15 +25,3 @@

"simulation_data_one_group"

#' Formatted Simulation Data for One Group
#'
#' Simulated p-values and metadata for a two group comparison in a tidy format. Useful for Type I error calculations.
#'
#' @format ## `simulation_data_one_group_formatted`
#' A list
#' \describe{
#' \item{p_values}{A data frame of p-values from each run of each test}
#' \item{initial_groups}{A nested list with information for each simulation run}
#' }

"simulation_data_one_group_formatted"
46 changes: 36 additions & 10 deletions R/mod_plot_distributions.R
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,14 @@ mod_plot_distributions_ui <- function(id){
),
box(
width = 9,
shinycssloaders::withSpinner(plotOutput(ns("distribution_plot_results")), type = 2,color.background = "#0275D8"),
tabsetPanel(type = "tabs",
tabPanel("Power Plot",
shinycssloaders::withSpinner(plotOutput(ns("power_plot")),
type = 2,color.background = "#0275D8")),
tabPanel("p-value Plot",
shinycssloaders::withSpinner(plotOutput(ns("distribution_plot_results")),
type = 2,color.background = "#0275D8"))
),
br(),
tabsetPanel(type = "tabs",
tabPanel("Power and Type II Error",
Expand All @@ -53,22 +60,26 @@ mod_plot_distributions_server <- function(id, p_value_table, n){

outlier_percent_removal <- reactive({ (100 - input$remove_outlier_percentage)/100 })
p_val_threshold <- reactive({ input$user_p_val })
p_value_reactive_table <- reactive({ as.data.frame(p_value_table$comparison_results()$p_values) })
p_value_reactive_table <- reactive({ bind_rows(p_value_table$comparison_results()) })


# COMPARISON STATISTICS/GRAPHING
# !!!plot!!!
distribution_plot <- reactive({
p_value_reactive_table() %>%
dplyr::select(.data$wilcox:.data$coinasymp, .data$sample_size) %>%
plot_distribution_results(outlier_removal = outlier_percent_removal(),
alpha = p_val_threshold())})

output$distribution_plot_results <- renderPlot({
distribution_plot()
})


# !!!statistics!!!
distribution_statistics <- reactive({p_value_reactive_table() %>%
distribution_statistics <- reactive({
p_value_reactive_table() %>%
select(.data$wilcox:.data$coinasymp, .data$sample_size) %>%
calculate_power_t2error(alpha = p_val_threshold(),
n = n(),
power_confidence_int = input$power_confidence_int)
Expand All @@ -77,34 +88,49 @@ mod_plot_distributions_server <- function(id, p_value_table, n){
distribution_statistics() %>%
select(-.data$lower_power_bound, -.data$upper_power_bound,
-.data$lower_t2error_bound, -.data$upper_t2error_bound) %>%
arrange(desc(.data$power)) %>%
rename(`Statistical Test` = .data$test,
"Power (1-\U03B2)" = .data$power,
"Type II Error (\U03B2)" = .data$t2_error) %>%
DT::datatable() %>%
DT::formatRound(c(2,4), 5)
DT::formatRound(c(3,5), 5)
})


# Plot Power
output$power_plot <- renderPlot({
distribution_statistics() %>%
plot_power()
})


# GROUP 1 TYPE 1 ERROR
group1_t1_reactive_table <- reactive({ p_value_table$group1_results()$p_values %>%
as.data.frame() %>%
group1_t1_reactive_table <- reactive({
p_value_table$group1_results() %>%
bind_rows() %>%
dplyr::select(.data$wilcox:.data$coinasymp, .data$sample_size) %>%
group_by(.data$sample_size) %>%
calculate_t1_error(t1_error_confidence_int = input$t1_error_group1_confidence_int)
})
output$t1_error_group1 <- DT::renderDataTable({
# browser()
group1_t1_reactive_table() %>%
select(-.data$lower_t1_bound, -.data$upper_t1_bound) %>%
rename(`Statistical Test` = .data$test,
"Type I Error (\U003B1)" = .data$t1_error) %>%
DT::datatable() %>%
DT::formatRound(c(2), 5)
DT::formatRound(c(3), 5)

})



# GROUP 2 TYPE 1 ERROR
group2_t1_reactive_table <- reactive({ p_value_table$group2_results()$p_values %>%
as.data.frame() %>%
group2_t1_reactive_table <- reactive({
p_value_table$group2_results() %>%
bind_rows() %>%
dplyr::select(.data$wilcox:.data$coinasymp, .data$sample_size) %>%
group_by(.data$sample_size) %>%
calculate_t1_error(t1_error_confidence_int = input$t1_error_group2_confidence_int)
})
output$t1_error_group2 <- DT::renderDataTable({
Expand All @@ -113,7 +139,7 @@ mod_plot_distributions_server <- function(id, p_value_table, n){
rename(`Statistical Test` = .data$test,
"Type I Error (\U003B1)" = .data$t1_error) %>%
DT::datatable() %>%
DT::formatRound(c(2), 5)
DT::formatRound(c(3), 5)
})


Expand Down
11 changes: 8 additions & 3 deletions R/mod_sample_size.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,7 @@
mod_sample_size_ui <- function(id){
ns <- NS(id)
tagList(
shiny::numericInput(ns("sample_n"), "Sample Size",
value = 80, min = 1, max = Inf)
shiny::textInput(ns("sample_n"), "Sample Size", value = 80)
)
}

Expand All @@ -22,7 +21,13 @@ mod_sample_size_server <- function(id){
moduleServer( id, function(input, output, session){
ns <- session$ns

sample_n <- reactive({input$sample_n})
sample_n <- reactive({
# should technically set up checks that only a numeric vector is
# being passed before evaluating the expression (or, that the string
# being passed is comma-separated values with `:` and seq*()
# also being allowed.)
unique(eval(parse(text = paste0("c(", input$sample_n, ")") )))
})

return(sample_n)

Expand Down
27 changes: 15 additions & 12 deletions R/mod_stats_calculations.R
Original file line number Diff line number Diff line change
Expand Up @@ -87,28 +87,31 @@ mod_stats_calculations_server <- function(id, probability_data, sample_prob, ite


output$results_table <- DT::renderDataTable(
DT::datatable(data = as.data.frame(comparison_results()$p_values),
options = list(scrollX = TRUE)
) %>%
DT::formatRound(1:7, digits = 5)
comparison_results() %>%
bind_rows() %>%
dplyr::select(.data$sample_size, .data$wilcox:.data$coinasymp) %>%
DT::datatable(options = list(scrollX = TRUE)) %>%
DT::formatRound(2:8, digits = 5)
)
outputOptions(output, "results_table", suspendWhenHidden = FALSE)

# if not keeping these output tables, use observe({group1_results()}) to
# ensure evaluation
output$group1_pvalues <- DT::renderDataTable(
DT::datatable(data = as.data.frame(group1_results()$p_values),
options = list(scrollX = TRUE)
) %>%
DT::formatRound(1:7, digits = 5)
group1_results() %>%
bind_rows() %>%
dplyr::select(.data$sample_size, .data$wilcox:.data$coinasymp) %>%
DT::datatable(options = list(scrollX = TRUE)) %>%
DT::formatRound(2:8, digits = 5)
)
outputOptions(output, "group1_pvalues", suspendWhenHidden = FALSE)

output$group2_pvalues <- DT::renderDataTable(
DT::datatable(data = as.data.frame(group2_results()$p_values),
options = list(scrollX = TRUE)
) %>%
DT::formatRound(1:7, digits = 5)
group2_results() %>%
bind_rows() %>%
dplyr::select(.data$sample_size, .data$wilcox:.data$coinasymp) %>%
DT::datatable(options = list(scrollX = TRUE)) %>%
DT::formatRound(2:8, digits = 5)
)
outputOptions(output, "group2_pvalues", suspendWhenHidden = FALSE)

Expand Down
43 changes: 32 additions & 11 deletions R/run_simulations.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,19 @@
#' @param prob0 Vector of probabilities for control group
#' @param prob1 Vector of probabilities for intervention group
#' @param niter Number of simulation iterations to complete#'
#' @return list with elements `p_values` which is a matrix of p values for tests at each iteration, and `initial_groups` which is the group assignment information for each iteration
#' @return a list of lists; sub-list elements include `p_values` which is a matrix of p values for tests at each iteration, and `initial_groups` which is the group assignment information for each iteration
#'
#' @import assertthat
#'
#' @export
#'
#' @examples
#' run_simulations(sample_size = c(40,50,60),
#' sample_prob = c(0.5,0.5),
#' prob0 = c(0.1,0.2,0.3,0.4),
#' prob1 = c(0.6,0.2,0.1,0.1),
#' niter = 100)
#'
#'
run_simulations <- function(sample_size, sample_prob, prob0, prob1, niter) {

Expand All @@ -28,18 +35,32 @@ run_simulations <- function(sample_size, sample_prob, prob0, prob1, niter) {
"Prop. Odds", "Kruskal-Wallis", "Coin Indep. Test")


initial_groups <- list()
purrr::map(sample_size,
~{
sample_size_nested <- .x
initial_groups <- purrr::map(1:niter, ~assign_groups(sample_size = sample_size_nested,
sample_prob = sample_prob,
prob0 = prob0, prob1 = prob1,
seed = .x) )

p_values <- initial_groups %>%
sapply(.,function(x) ordinal_tests(x[["x"]], x[["y"]])) %>%
t()

initial_groups_formatted <- initial_groups %>%
purrr::map_df(~tibble(y = list(.x[["y"]]), x = list(.x[["x"]]),
n_null = .x[["n_null"]], n_intervene = .x[["n_intervene"]],
sample_size = .x[["sample_size"]], K = .x[["K"]])
) %>%
mutate(run = row_number(), .before = y)

for (i in 1:niter) {
initial_groups[[i]] <- assign_groups(sample_size = sample_size,
sample_prob = sample_prob,
prob0 = prob0, prob1 = prob1,
seed = i)
return( sim_results_table = bind_cols(p_values, initial_groups_formatted) )
},

p_values[i, ] <- ordinal_tests(x = initial_groups[[i]]$x,
y = initial_groups[[i]]$y)
}
.progress = list(caller = environment(),
format = "Running {niter} iterations on {length(sample_size)} sample sizes. Progress: {cli::pb_bar} {cli::pb_percent} {cli::pb_eta}")

return(list(p_values = p_values, initial_groups = initial_groups))
) %>%
purrr::set_names(glue::glue("sample_size_{sample_size}"))

}
Loading

0 comments on commit fda0f39

Please sign in to comment.