From a3edfa44ddde251484ff7de6261e845b383dac76 Mon Sep 17 00:00:00 2001 From: egouldo Date: Mon, 5 Aug 2024 21:24:24 +1000 Subject: [PATCH] #80 Add quarto markdown explaining package data generation And explain implicit exclusions from yi meta-analysis --- data-raw/analysis_datasets/README.qmd | 74 +++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) create mode 100644 data-raw/analysis_datasets/README.qmd diff --git a/data-raw/analysis_datasets/README.qmd b/data-raw/analysis_datasets/README.qmd new file mode 100644 index 0000000..34bff21 --- /dev/null +++ b/data-raw/analysis_datasets/README.qmd @@ -0,0 +1,74 @@ +--- +title: "README: `data-raw/analysis_datasets`" +format: gfm +--- + +This directory contains the analysis datasets used by analysis teams to answer the project's research questions. + +- `blue_tit_data_updated_2020-04-18.csv` contains the Evolutionary Ecology or Blue tit dataset, stored at . +- `Euc_data.csv` contains the Ecology and Conservation or Eucalyptus dataset, stored at . + +The two files are downloaded to this directory using the script `osf_load_analyst_datasets.R`, which uses the [`osfr::` package](https://github.com/ropensci/osfr). + +For assistance with setting up authentication to run this script, check out . Alternatively, simply download the files directly from the OSF using the URL's above. The script downloads the files, loads them into R, and then calculates some additional variables, which were variables constructed by analysts, but not present in the supplied analysis datasets. + +This script creates the package data `euc_data` and `blue_tit_data`. + +## A note on additional variable construction in `osf_load_analyst_datasets.R` and potential for implicit exclusion + +Note that for inclusion in the out-of-sample predictions $y_i$ meta-analysis, parameter tables including the mean and SD for each response variable are needed standardise the data. If the variable is not present in either `euc_data` or `blue_tit_data` (i.e. it isn't calculated in `osf_load_analyst_datasets.R`, nor present in the original data files downloaded from the OSF), then analyses using that response variable are excluded from further meta-analysis. See table below for constructed variables that are included/excluded in the out-of-sample predictions meta-analysis: + +```{R} +#| label: tbl-constructed-variables +#| results: asis +#| echo: true +#| message: false +#| code-fold: true + +library(ManyEcoEvo) +library(dplyr) +library(purrr) +library(stringr) +library(tidyr) +library(tibble) + +# Constructed Variables Included in the ManyAnalysts meta-analysis +ManyEcoEvo_constructed_vars <- + tribble(~response_variable_name, + "euc_sdlgs_all", + "euc_sdlgs>50cm", + "euc_sdlgs0_2m", + "small*0.25+medium*1.25+large*2.5", + "euc_sdlgs50cm_2m", + "average.proportion.of.plots.containing.at.least.one.euc.seedling.of.any.size", + "day_14_weight/(day_14_tarsus_length^2)", + "day_14_weight/day_14_tarsus_length", + "day_14_weight*day_14_tarsus_length" + ) + +# Analyst Constructed Variables +all_constructed_vars <- + ManyEcoEvo %>% + pull(data, dataset) %>% + list_rbind(names_to = "dataset") %>% + filter(str_detect(response_variable_type, "constructed")) %>% + distinct(response_variable_name) %>% + drop_na() %>% + arrange() + +by <- join_by(response_variable_name) + +all_constructed_vars %>% + semi_join(ManyEcoEvo_constructed_vars, by) %>% + mutate(included_in_yi = TRUE) %>% + bind_rows( + { + all_constructed_vars %>% + anti_join(ManyEcoEvo_constructed_vars, by) %>% + mutate(included_in_yi = FALSE) + } + ) %>% + knitr::kable(col.names = c("Constructed Variable", + "Included in $y_i$ meta-analysis?"), + format = "markdown") +```