Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Investigate and fix rating analysis weights #42

Merged
merged 15 commits into from
Jun 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: ManyEcoEvo
Title: Meta-analyse data from 'Many-Analysts' style studies
Version: 1.3.0
Version: 1.5.0
Authors@R: c(person(given = "Elliot",
family = "Gould",
email = "[email protected]",
Expand Down
7 changes: 7 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ export(summarise_variable_counts)
export(validate_predictions)
export(validate_predictions_df_blue_tit)
export(validate_predictions_df_euc)
import(cli)
import(dplyr)
import(ggbeeswarm)
import(ggplot2)
Expand Down Expand Up @@ -120,9 +121,15 @@ importFrom(forcats,fct_relevel)
importFrom(magrittr,"%>%")
importFrom(pointblank,col_vals_not_null)
importFrom(purrr,map)
importFrom(purrr,map2)
importFrom(purrr,map_chr)
importFrom(purrr,map_dfr)
importFrom(purrr,pluck)
importFrom(purrr,pmap)
importFrom(purrr,possibly)
importFrom(purrr,set_names)
importFrom(rlang,is_na)
importFrom(rlang,is_null)
importFrom(rlang,na_chr)
importFrom(sae,bxcx)
importFrom(tidyr,pivot_longer)
Expand Down
15 changes: 7 additions & 8 deletions R/fit_boxcox_ratings_cat.R
Original file line number Diff line number Diff line change
Expand Up @@ -37,20 +37,19 @@ fit_boxcox_ratings_cat <- function(.data, outcome, outcome_var, interceptless =
obs_id = 1:n())

if(interceptless == FALSE){

f <- rlang::new_formula(rlang::ensym(outcome),
expr(PublishableAsIs +
(1 | ReviewerId) # + (1 | study_id ) RE ommitted due to convergence issues
))
mod <- lme4::lmer(f,
data = data_tbl ,
weights = I(1/pull(data_tbl,{{outcome_var}}))
)

mod <- lme4::lmer(formula = f, data = data_tbl)

}else(#interceptless: for plotting

mod <- lme4::lmer(rlang::new_formula(rlang::ensym(outcome),
expr(-1 + PublishableAsIs + (1 | ReviewerId))), #+ (1 | study_id) #problem with the groups
data = data_tbl #,
# weights = I(1/pull(data_tbl,{{outcome_var}}))
)
expr(-1 + PublishableAsIs + (1 | ReviewerId))),
data = data_tbl)
)

return(mod)
Expand Down
8 changes: 6 additions & 2 deletions R/generate_collinearity_subset.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,16 @@
#' - `generate_rating_subsets()`
#'
#' `generate_collinearity_subset()` only creates expertise subsets based on the full dataset where `exclusion_set == "complete"` and `publishable_subset == "All"` and `expertise_subset == "All"`.
#' @import dplyr
#' @importFrom purrr map
#' @importFrom purrr map2
#' @importFrom purrr pluck
#' @examples
#' ManyEcoEvo %>%
#' prepare_response_variables(estimate_type = "Zr") |>
#' generate_exclusion_subsets(estimate_type = "Zr") |>
#' generate_rating_subsets() |>
#' generate_expertise_subsets(expert_subset) |>
#' generate_expertise_subsets(ManyEcoEvo:::expert_subset) |>
#' generate_collinearity_subset(collinearity_subset = collinearity_subset)
generate_collinearity_subset <- function(ManyEcoEvo, collinearity_subset) {
# Check if the inputs are a dataframe
Expand Down Expand Up @@ -56,7 +60,7 @@ generate_collinearity_subset <- function(ManyEcoEvo, collinearity_subset) {
mutate(diversity_data =
map2(.x = diversity_data,
.y = data,
.f = ~ semi_join(.x, .y) %>% distinct),
.f = ~ semi_join(.x, .y, join_by(id_col, dataset)) %>% distinct),
collinearity_subset = "collinearity_removed")

out <- bind_rows(
Expand Down
4 changes: 1 addition & 3 deletions R/generate_expertise_subsets.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,11 @@
#' library(ManyEcoEvo)
#' library(tidyverse)
#' library(targets)
#' targets::tar_load(ManyEcoEvo)
#' targets::tar_load(expert_subset)
#' ManyEcoEvo %>%
#' prepare_response_variables(estimate_type = "Zr") |>
#' generate_exclusion_subsets(estimate_type = "Zr") |>
#' generate_rating_subsets() |>
#' generate_expertise_subsets(expert_subset)
#' generate_expertise_subsets(ManyEcoEvo:::expert_subset)
generate_expertise_subsets <- function(ManyEcoEvo, expert_subset) {
#TODO idea, allow ellipses arg in function and pass those expressions to filter.
# that way isn't hardcoded in the function. Repeat for all other generate / exclude map funs
Expand Down
11 changes: 7 additions & 4 deletions R/meta_analyse_datasets.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@
#'
#' @return A nested dataframe with all columns of object parsed to arg `MA_data`, but with additional columns for the results of each analysis: `MA_mod`, `sorensen_glm`, `box_cox_ratings_cont`, `box_cox_ratings_cat`, `box_cox_rating_cat_no_int`, `uni_mixed_effects`
#' @export
#' @importFrom purrr map_chr map2 map possibly pmap
#' @import dplyr
#' @import cli
#' @importFrom rlang na_chr is_null na_chr
#' @family Multi-dataset Wrapper Functions
#'
#' @examples
Expand All @@ -23,8 +27,7 @@
#' # dplyr::filter(dataset == "eucalyptus",
#' # (max(VZr, na.rm = TRUE) == VZr)) TODO, do we need to include now that INF's removed?
meta_analyse_datasets <- function(MA_data){
#example:


poss_fit_metafor_mv <- purrr::possibly(fit_metafor_mv,
otherwise = NA,
quiet = FALSE)
Expand All @@ -34,7 +37,7 @@ meta_analyse_datasets <- function(MA_data){
fit_MA_mv <- function(effects_analysis, Z_colname, VZ_colname, estimate_type){
Zr <- effects_analysis %>% pull({{Z_colname}})
VZr <- effects_analysis %>% pull({{VZ_colname}})
mod <- fit_metafor_mv(estimate = Zr,
mod <- poss_fit_metafor_mv(estimate = Zr,
variance = VZr,
estimate_type = estimate_type,
data = effects_analysis)
Expand All @@ -45,7 +48,7 @@ meta_analyse_datasets <- function(MA_data){
# Must group by cols else multiple "effects_analysis" elements
# get passed to fit_MA_mv()
MA_data <- MA_data %>%
group_by(estimate_type, dataset, exclusion_set, publishable_subset, expertise_subset)
group_by(estimate_type, dataset, exclusion_set, publishable_subset, expertise_subset, collinearity_subset)
} else {
MA_data <- MA_data %>%
group_by(estimate_type, dataset, exclusion_set)
Expand Down
Binary file modified R/sysdata.rda
Binary file not shown.
5 changes: 1 addition & 4 deletions _targets.R
Original file line number Diff line number Diff line change
Expand Up @@ -65,9 +65,6 @@ list(tarchetypes::tar_file_read(name = euc_reviews,
tarchetypes::tar_file_read(name = list_of_new_prediction_files,
command = "data-raw/analyst_data/S2/list_of_new_csv_files.csv",
read = readr::read_csv(!!.x)),
tarchetypes::tar_file_read(name = expert_subset,
command = "data-raw/metadata_and_key_data/Good_Statistician_ResponseIds.csv",
read = readr::read_csv(file = !!.x)),
targets::tar_target(name = all_review_data,
command = prepare_review_data(bt_reviews,euc_reviews)),
targets::tar_target(ManyEcoEvo,
Expand All @@ -79,7 +76,7 @@ list(tarchetypes::tar_file_read(name = euc_reviews,
prepare_response_variables(estimate_type = "Zr") |>
generate_exclusion_subsets(estimate_type = "Zr") |>
generate_rating_subsets() |>
generate_expertise_subsets(expert_subset) |>
generate_expertise_subsets(ManyEcoEvo:::expert_subset) |>
generate_collinearity_subset(ManyEcoEvo:::collinearity_subset) |>
compute_MA_inputs(estimate_type = "Zr") |>
generate_outlier_subsets() |> # TODO run before MA_inputs? diversity indices need to be recalculated!!
Expand Down
Loading