diff --git a/R/bbs_species_list_functions.R b/R/bbs_species_list_functions.R index b3027da..e94042b 100644 --- a/R/bbs_species_list_functions.R +++ b/R/bbs_species_list_functions.R @@ -9,7 +9,8 @@ #' #' is_unidentified <- function(names) { - names[names == "auratus auratus x auratus cafer"] <- "auratus auratus" + names[names == "auratus auratus x auratus cafer"] <- + "auratus auratus" grepl("sp\\.| x |\\/", names) } @@ -23,9 +24,12 @@ is_unidentified <- function(names) { #' @keywords internal find_unidentified_species <- function(species_table) { unidentified_species_table <- species_table - unidentified_species_table$is_unid <- is_unidentified(unidentified_species_table$species) - unidentified_species_table <- unidentified_species_table[unidentified_species_table$is_unid, ] - unidentified_species_table <- unidentified_species_table[, c("AOU", "english_common_name", "genus", "species")] + unidentified_species_table$is_unid <- + is_unidentified(unidentified_species_table$species) + unidentified_species_table <- + unidentified_species_table[unidentified_species_table$is_unid,] + unidentified_species_table <- + unidentified_species_table[, c("AOU", "english_common_name", "genus", "species")] unidentified_species_table } @@ -41,16 +45,26 @@ find_unidentified_species <- function(species_table) { #' @keywords internal find_nontarget_species <- function(species_table) { nontarget_species_complement <- species_table - nontarget_species_complement <- nontarget_species_complement[nontarget_species_complement$AOU > 2880, ] - nontarget_species_complement <- nontarget_species_complement[nontarget_species_complement$AOU < 3650 | nontarget_species_complement$AOU > 3810, ] - nontarget_species_complement <- nontarget_species_complement[nontarget_species_complement$AOU < 3900 | nontarget_species_complement$AOU > 3910, ] - nontarget_species_complement <- nontarget_species_complement[nontarget_species_complement$AOU < 4160 | nontarget_species_complement$AOU > 4210, ] - nontarget_species_complement <- nontarget_species_complement[nontarget_species_complement$AOU != 7010, ] + nontarget_species_complement <- + nontarget_species_complement[nontarget_species_complement$AOU > 2880,] + nontarget_species_complement <- + nontarget_species_complement[nontarget_species_complement$AOU < 3650 | + nontarget_species_complement$AOU > 3810,] + nontarget_species_complement <- + nontarget_species_complement[nontarget_species_complement$AOU < 3900 | + nontarget_species_complement$AOU > 3910,] + nontarget_species_complement <- + nontarget_species_complement[nontarget_species_complement$AOU < 4160 | + nontarget_species_complement$AOU > 4210,] + nontarget_species_complement <- + nontarget_species_complement[nontarget_species_complement$AOU != 7010,] nontarget_species_table <- species_table - nontarget_species_table <- nontarget_species_table[!(nontarget_species_table$AOU %in% nontarget_species_complement$AOU), ] - nontarget_species_table <- nontarget_species_table[, c("AOU", "english_common_name", "genus", "species")] + nontarget_species_table <- + nontarget_species_table[!(nontarget_species_table$AOU %in% nontarget_species_complement$AOU),] + nontarget_species_table <- + nontarget_species_table[, c("AOU", "english_common_name", "genus", "species")] nontarget_species_table } diff --git a/R/community_generate.R b/R/community_generate.R index c0ec934..2c62683 100644 --- a/R/community_generate.R +++ b/R/community_generate.R @@ -1,12 +1,17 @@ #' Simulate individual measurements for many populations #' -#' For a community (i.e. a collection of populations of different species, or of the same species at different points in time or locations, etc), simulate individual-level size and metabolic rate measurements. +#' For a community (i.e. a collection of populations of different species, or of +#' the same species at different points in time or locations, etc), simulate +#' individual-level size and metabolic rate measurements. #' -#' @param community_data_table dataframe containing at least one of `AOU`, `scientific_name`, or `mean_size` and a column for species abundances -#' @param abundance_column_name character, the name of the column with species abundances. Defaults to "speciestotal". -#' @return a dataframe one row per individual, all columns from `community_data_table`, and additional columns for species attributes. +#' @param community_data_table dataframe containing at least one of `AOU`, +#' `scientific_name`, or `mean_size` and a column for species abundances +#' @param abundance_column_name character, the name of the column with species +#' abundances. Defaults to "speciestotal". +#' @return a dataframe one row per individual, all columns from +#' `community_data_table`, and additional columns for species attributes. #' -#' Specifically: +#' Specifically: #' #' * `AOU`: the AOU, if provided #' * `sim_species_id`: the `sim_species_id` if provided @@ -25,84 +30,112 @@ #' #' demo_community <- community_generate(demo_route_clean) #' head(demo_community) -community_generate <- function(community_data_table, abundance_column_name = "speciestotal") { - colnames(community_data_table) <- tolower(colnames(community_data_table)) - colnames(community_data_table)[which(colnames(community_data_table) == "aou")] <- "AOU" +community_generate <- + function(community_data_table, + abundance_column_name = "speciestotal") { + colnames(community_data_table) <- + tolower(colnames(community_data_table)) + colnames(community_data_table)[which(colnames(community_data_table) == "aou")] <- + "AOU" - community_vars <- colnames(community_data_table) + community_vars <- colnames(community_data_table) - # Check that the necessary variables are provided #### + # Check that the necessary variables are provided #### - contains_AOU <- "AOU" %in% community_vars - contains_scientific_name <- "scientific_name" %in% community_vars - contains_mean <- "mean_size" %in% community_vars - contains_abundance <- abundance_column_name %in% community_vars + contains_AOU <- "AOU" %in% community_vars + contains_scientific_name <- "scientific_name" %in% community_vars + contains_mean <- "mean_size" %in% community_vars + contains_abundance <- abundance_column_name %in% community_vars - if (!contains_abundance) { - stop("abundance column is required. If the name is not `speciestotal` specify using the `abundance_column_name` argument") - } + if (!contains_abundance) { + stop( + "abundance column is required. If the name is not `speciestotal` specify using the `abundance_column_name` argument" + ) + } - if (!(contains_AOU | contains_mean | contains_scientific_name)) { - stop("At least one of `AOU`, `scientific_name`, or `mean_size` is required") - } + if (!(contains_AOU | contains_mean | contains_scientific_name)) { + stop("At least one of `AOU`, `scientific_name`, or `mean_size` is required") + } - # Identify ID/grouping columns and columns to pass to sim fxns. #### + # Identify ID/grouping columns and columns to pass to sim fxns. #### - community_data_table$rejoining_id <- seq_len(nrow(community_data_table)) - abundance_values <- as.matrix(community_data_table[, abundance_column_name]) - abundance_values <- as.vector(abundance_values[, 1]) - community_data_table$abundance <- abundance_values + community_data_table$rejoining_id <- + seq_len(nrow(community_data_table)) + abundance_values <- + as.matrix(community_data_table[, abundance_column_name]) + abundance_values <- as.vector(abundance_values[, 1]) + community_data_table$abundance <- abundance_values - community_vars_mod <- colnames(community_data_table) + community_vars_mod <- colnames(community_data_table) - possible_sim_vars <- c("abundance", "AOU", "mean_size", "sd_size", "sim_species_id", "scientific_name") + possible_sim_vars <- + c("abundance", + "AOU", + "mean_size", + "sd_size", + "sim_species_id", + "scientific_name") - id_vars <- c(community_vars_mod[which(!(community_vars_mod %in% possible_sim_vars))]) + id_vars <- + c(community_vars_mod[which(!(community_vars_mod %in% possible_sim_vars))]) - sim_vars <- c(community_vars_mod[which(community_vars_mod %in% possible_sim_vars)]) + sim_vars <- + c(community_vars_mod[which(community_vars_mod %in% possible_sim_vars)]) - # For the cols to pass in, add NA columns for any of the variables that the sim fxns can use that aren't included #### - na_vars <- possible_sim_vars[which(!(possible_sim_vars %in% community_vars_mod))] + # For the cols to pass in, add NA columns for any of the variables that the + # sim fxns can use that aren't included #### + na_vars <- + possible_sim_vars[which(!(possible_sim_vars %in% community_vars_mod))] - na_table <- matrix(nrow = nrow(community_data_table), ncol = length(na_vars)) - na_table <- as.data.frame(na_table) - colnames(na_table) <- na_vars + na_table <- + matrix(nrow = nrow(community_data_table), + ncol = length(na_vars)) + na_table <- as.data.frame(na_table) + colnames(na_table) <- na_vars - # Split into 2 tables, one with ID cols and one for the cols to pass in. - ids_table <- as.data.frame(community_data_table[, id_vars]) - colnames(ids_table) <- id_vars + # Split into 2 tables, one with ID cols and one for the cols to pass in. + ids_table <- as.data.frame(community_data_table[, id_vars]) + colnames(ids_table) <- id_vars - sim_vars_table <- community_data_table[, c(sim_vars, "rejoining_id")] - sim_vars_table <- cbind(sim_vars_table, na_table) + sim_vars_table <- + community_data_table[, c(sim_vars, "rejoining_id")] + sim_vars_table <- cbind(sim_vars_table, na_table) - pop_generate_rejoining <- function(this_id, sim_vars_table) { - this_row <- sim_vars_table[sim_vars_table$rejoining_id == this_id, ] + pop_generate_rejoining <- function(this_id, sim_vars_table) { + this_row <- sim_vars_table[sim_vars_table$rejoining_id == this_id,] - this_population <- pop_generate( - abundance = this_row$abundance[1], - AOU = this_row$AOU[1], - scientific_name = this_row$scientific_name[1], - mean_size = this_row$mean_size[1], - sd_size = this_row$sd_size[1], - sim_species_id = this_row$sim_species_id[1] - ) + this_population <- pop_generate( + abundance = this_row$abundance[1], + AOU = this_row$AOU[1], + scientific_name = this_row$scientific_name[1], + mean_size = this_row$mean_size[1], + sd_size = this_row$sd_size[1], + sim_species_id = this_row$sim_species_id[1] + ) - this_population$rejoining_id <- this_id + this_population$rejoining_id <- this_id - this_population - } + this_population + } - populations_list <- apply(as.matrix(sim_vars_table$rejoining_id), MARGIN = 1, FUN = pop_generate_rejoining, sim_vars_table = sim_vars_table) + populations_list <- + apply( + as.matrix(sim_vars_table$rejoining_id), + MARGIN = 1, + FUN = pop_generate_rejoining, + sim_vars_table = sim_vars_table + ) - populations <- do.call("rbind", populations_list) + populations <- do.call("rbind", populations_list) - community <- merge(ids_table, populations, by = "rejoining_id") - community <- community[, -which(colnames(community) == "rejoining_id")] + community <- merge(ids_table, populations, by = "rejoining_id") + community <- + community[,-which(colnames(community) == "rejoining_id")] - return(community) -} + return(community) + } diff --git a/R/data.R b/R/data.R index 874b059..33c9f22 100644 --- a/R/data.R +++ b/R/data.R @@ -200,7 +200,8 @@ #' Toy data frame of abundances and species names (for vignettes) #' -#' This data table is a toy data frame for the vignettes. It has abundances and scientific names for 5 species to make up a hypothetical community. +#' This data table is a toy data frame for the vignettes. It has abundances and +#' scientific names for 5 species to make up a hypothetical community. #' @format A data frame with 5 rows and 2 variables: #' \describe{ #' \item{scientific_name}{Scientific name} @@ -214,7 +215,8 @@ #' Toy data frame of abundances and species mean sizes (for vignettes) #' -#' This data table is a toy data frame for the vignettes. It has abundances and mean body sizes for 5 species to make up a hypothetical community. +#' This data table is a toy data frame for the vignettes. It has abundances and +#' mean body sizes for 5 species to make up a hypothetical community. #' @format A data frame with 5 rows and 3 variables: #' \describe{ #' \item{mean_size}{Mean mass, in g} diff --git a/R/filter_bbs_survey.R b/R/filter_bbs_survey.R index e746f1c..42826ee 100644 --- a/R/filter_bbs_survey.R +++ b/R/filter_bbs_survey.R @@ -1,10 +1,13 @@ #' Clean raw Breeding Bird Survey survey data #' -#' The raw data for the Breeding Bird Survey includes unidentified species and some species that are not well-sampled by the BBS methods. This function filters a dataframe to remove those species. +#' The raw data for the Breeding Bird Survey includes unidentified species and +#' some species that are not well-sampled by the BBS methods. This function +#' filters a dataframe to remove those species. #' #' @param bbs_survey_data data frame with columns for species and AOU #' -#' @return bbs_survey_data with unidentified species, nightbirds, waterbirds, non-targets removed +#' @return bbs_survey_data with unidentified species, nightbirds, waterbirds, +#' non-targets removed #' @export #' @importFrom utils data #' @examples @@ -12,7 +15,8 @@ filter_bbs_survey <- function(bbs_survey_data) { colnames(bbs_survey_data) <- tolower(colnames(bbs_survey_data)) - colnames(bbs_survey_data)[which(colnames(bbs_survey_data) == "aou")] <- "AOU" + colnames(bbs_survey_data)[which(colnames(bbs_survey_data) == "aou")] <- + "AOU" if (!("AOU" %in% colnames(bbs_survey_data))) { @@ -20,8 +24,10 @@ filter_bbs_survey <- function(bbs_survey_data) { } - bbs_survey_data <- bbs_survey_data[!(bbs_survey_data$AOU %in% unidentified_species$AOU), ] - bbs_survey_data <- bbs_survey_data[!(bbs_survey_data$AOU %in% nontarget_species$AOU), ] + bbs_survey_data <- + bbs_survey_data[!(bbs_survey_data$AOU %in% unidentified_species$AOU),] + bbs_survey_data <- + bbs_survey_data[!(bbs_survey_data$AOU %in% nontarget_species$AOU),] bbs_survey_data } diff --git a/R/ind_draw.R b/R/ind_draw.R index d6fbddc..e1ccad2 100644 --- a/R/ind_draw.R +++ b/R/ind_draw.R @@ -1,6 +1,7 @@ #' Draw individuals to make a population. #' -#' This is not a user-facing function; it is the random number generator under-the-hood for [pop_generate]. +#' This is not a user-facing function; it is the random number generator +#' under-the-hood for [pop_generate]. #' #' @param species_mean mean body size #' @param species_sd standard deviation of body size @@ -11,29 +12,39 @@ #' @importFrom truncnorm rtruncnorm #' @importFrom stats pnorm #' @keywords internal -ind_draw <- function(species_mean = NA_real_, species_sd = NA_real_, species_abundance = NA_integer_) { - if (is.na(species_mean)) { - stop("`species_mean` must be provided") - } +ind_draw <- + function(species_mean = NA_real_, + species_sd = NA_real_, + species_abundance = NA_integer_) { + if (is.na(species_mean)) { + stop("`species_mean` must be provided") + } - if (is.na(species_sd)) { - stop("`species_sd` must be provided") - } + if (is.na(species_sd)) { + stop("`species_sd` must be provided") + } - if (is.na(species_abundance)) { - stop("`species_abundance` must be provided") - } + if (is.na(species_abundance)) { + stop("`species_abundance` must be provided") + } - if (!is.numeric(species_abundance)) { - stop("`species_abundance` must be numeric") - } + if (!is.numeric(species_abundance)) { + stop("`species_abundance` must be numeric") + } - if (!(round(species_abundance) == species_abundance)) { - stop("`species_abundance` must be a whole number") - } + if (!(round(species_abundance) == species_abundance)) { + stop("`species_abundance` must be a whole number") + } - population <- truncnorm::rtruncnorm(n = species_abundance, a = 1, b = Inf, mean = species_mean, sd = species_sd) + population <- + truncnorm::rtruncnorm( + n = species_abundance, + a = 1, + b = Inf, + mean = species_mean, + sd = species_sd + ) - population -} + population + } diff --git a/R/metabolic_rate.R b/R/metabolic_rate.R index 31d1ef8..279703d 100644 --- a/R/metabolic_rate.R +++ b/R/metabolic_rate.R @@ -1,10 +1,11 @@ #' Estimate individual-level BMR #' -#' Given an individual's body mass (in grams), use allometric scaling (Fristoe 2015) to estimate basal metabolic rate. +#' Given an individual's body mass (in grams), use allometric scaling (Fristoe +#' 2015) to estimate basal metabolic rate. #' -#' @references \itemize{ -#' \item{Fristoe, T. S. (2015). Energy use by migrants and residents in North American breeding bird communities. Global Ecology and Biogeography, 24(4), 406–415. https://doi.org/10.1111/geb.12262} -#' } +#' @references \itemize{ \item{Fristoe, T. S. (2015). Energy use by migrants and +#' residents in North American breeding bird communities. Global Ecology and +#' Biogeography, 24(4), 406–415. https://doi.org/10.1111/geb.12262} } #' #' @param mass mass in grams #' @return estimated basal metabolic rate diff --git a/R/pop_generate.R b/R/pop_generate.R index 5f87b6f..b269241 100644 --- a/R/pop_generate.R +++ b/R/pop_generate.R @@ -1,19 +1,24 @@ #' Simulate body masses for a population #' -#' Draws body mass measurements for a population of birds (of all the same species) given the population size and either (1) the species AOU or (2) the mean and potentially standard deviation of body mass for that species. +#' Draws body mass measurements for a population of birds (of all the same +#' species) given the population size and either (1) the species AOU or (2) the +#' mean and potentially standard deviation of body mass for that species. #' -#' `abundance` is required, as well as *one of*: `AOU`, `scientific_name`, or `mean_size`. +#' `abundance` is required, as well as *one of*: `AOU`, `scientific_name`, or +#' `mean_size`. #' #' @param abundance integer number of individuals to draw. *Required*. -#' @param AOU the numeric AOU code used for this species in the Breeding Bird Survey +#' @param AOU the numeric AOU code used for this species in the Breeding Bird +#' Survey #' @param scientific_name as "Genus species" #' @param mean_size numeric, mean body mass (in grams) for this species. #' @param sd_size numeric, standard deviation of body mass for this species. #' @param sim_species_id defaults AOU or 1 #' -#' @return a dataframe with `abundance` rows - one record per individual - and columns for species attributes. +#' @return a dataframe with `abundance` rows - one record per individual - and +#' columns for species attributes. #' -#' Specifically: +#' Specifically: #' #' * `AOU`: the AOU, if provided #' * `sim_species_id`: the `sim_species_id` if provided @@ -32,61 +37,72 @@ #' pop_generate(abundance = 5, scientific_name = "Selasphorus calliope") #' pop_generate(abundance = 5, mean_size = 20, sd_size = 3) #' -pop_generate <- function(abundance = NA_integer_, AOU = NA_integer_, scientific_name = NA_character_, mean_size = NA_real_, sd_size = NA_real_, sim_species_id = 1) { - this_species <- species_define( - AOU = AOU, - scientific_name = scientific_name, - mean_size = mean_size, - sd_size = sd_size, - sim_species_id = sim_species_id - ) +pop_generate <- + function(abundance = NA_integer_, + AOU = NA_integer_, + scientific_name = NA_character_, + mean_size = NA_real_, + sd_size = NA_real_, + sim_species_id = 1) { + this_species <- species_define( + AOU = AOU, + scientific_name = scientific_name, + mean_size = mean_size, + sd_size = sd_size, + sim_species_id = sim_species_id + ) - # abundance errors - if (is.na(abundance)) { - stop("`abundance` must be provided") - } + # abundance errors + if (is.na(abundance)) { + stop("`abundance` must be provided") + } - if (!is.numeric(abundance)) { - stop("`abundance` must be numeric") - } + if (!is.numeric(abundance)) { + stop("`abundance` must be numeric") + } - if (!(round(abundance) == abundance)) { - stop("`abundance` must be a whole number") - } + if (!(round(abundance) == abundance)) { + stop("`abundance` must be a whole number") + } - # errors related to size pars + # errors related to size pars - if (is.na(this_species$mean_size)) { - stop("`species_mean` must be provided") - } + if (is.na(this_species$mean_size)) { + stop("`species_mean` must be provided") + } - if (is.na(this_species$sd_size)) { - stop("`species_sd` must be provided") - } + if (is.na(this_species$sd_size)) { + stop("`species_sd` must be provided") + } - # print message if the combination of mean and SD is likely (> 1% chance) to produce negative masses + # print message if the combination of mean and SD is likely (> 1% chance) to produce negative masses - if (pnorm(1, this_species$mean_size, this_species$sd_size) > .01) { - message("Very tiny species (a greater than 1% chance of a body mass value less than 1g)!") - } + if (pnorm(1, this_species$mean_size, this_species$sd_size) > .01) { + message("Very tiny species (a greater than 1% chance of a body mass value less than 1g)!") + } - # draw + # draw - this_population <- ind_draw(species_mean = this_species$mean_size, species_sd = this_species$sd_size, species_abundance = abundance) + this_population <- + ind_draw( + species_mean = this_species$mean_size, + species_sd = this_species$sd_size, + species_abundance = abundance + ) - this_population_bmr <- individual_metabolic_rate(this_population) + this_population_bmr <- individual_metabolic_rate(this_population) - population_df <- data.frame( - AOU = this_species$AOU, - sim_species_id = this_species$sim_species_id, - individual_mass = this_population, - individual_bmr = this_population_bmr, - mean_size = this_species$mean_size, - sd_size = this_species$sd_size, - abundance = abundance, - sd_method = this_species$sd_method, - scientific_name = this_species$scientific_name - ) + population_df <- data.frame( + AOU = this_species$AOU, + sim_species_id = this_species$sim_species_id, + individual_mass = this_population, + individual_bmr = this_population_bmr, + mean_size = this_species$mean_size, + sd_size = this_species$sd_size, + abundance = abundance, + sd_method = this_species$sd_method, + scientific_name = this_species$scientific_name + ) - population_df -} + population_df + } diff --git a/R/species_data_functions.R b/R/species_data_functions.R index 7700404..36c0e4f 100644 --- a/R/species_data_functions.R +++ b/R/species_data_functions.R @@ -1,12 +1,21 @@ #' Estimate parameters for scaling of standard deviation with mean body size #' -#' Calculates parameters for a (log-log linear) scaling relationship between the mean and standard deviation of a species' mean body size. Given a table of species with known mean and standard deviation body sizes, fits a linear model of the form `log(var(body_size)) ~ log(mean(body_size))` and extracts parameter estimates, which can then be used to estimate the standard deviation of body mass for a species based only on its mean body mass. See also Thibault et al. (2011) for this method applied to the Breeding Bird Survey. -#' -#' @references \itemize{ -#' \item{Thibault, K. M., White, E. P., Hurlbert, A. H., & Ernest, S. K. M. (2011). Multimodality in the individual size distributions of bird communities. Global Ecology and Biogeography, 20(1), 145–153. https://doi.org/10.1111/j.1466-8238.2010.00576.x} -#' } -#' -#' @param raw_size_data dataframe of species' mean and standard deviation body sizes; use the included `raw_masses` data table. +#' Calculates parameters for a (log-log linear) scaling relationship between the +#' mean and standard deviation of a species' mean body size. Given a table of +#' species with known mean and standard deviation body sizes, fits a linear +#' model of the form `log(var(body_size)) ~ log(mean(body_size))` and extracts +#' parameter estimates, which can then be used to estimate the standard +#' deviation of body mass for a species based only on its mean body mass. See +#' also Thibault et al. (2011) for this method applied to the Breeding Bird +#' Survey. +#' +#' @references \itemize{ \item{Thibault, K. M., White, E. P., Hurlbert, A. H., & +#' Ernest, S. K. M. (2011). Multimodality in the individual size distributions +#' of bird communities. Global Ecology and Biogeography, 20(1), 145–153. +#' https://doi.org/10.1111/j.1466-8238.2010.00576.x} } +#' +#' @param raw_size_data dataframe of species' mean and standard deviation body +#' sizes; use the included `raw_masses` data table. #' #' @return list of `$slope` and `$intercept` from the linear model fit #' @@ -14,10 +23,10 @@ #' #' @importFrom stats lm var family get_sd_parameters <- function(raw_size_data) { - sp_for_sd <- raw_size_data[!is.na(raw_size_data$sd), ] + sp_for_sd <- raw_size_data[!is.na(raw_size_data$sd),] sp_for_sd$mass <- as.numeric(sp_for_sd$mass) sp_for_sd$sd <- as.numeric(sp_for_sd$sd) - sp_for_sd$var <- sp_for_sd$sd^2 + sp_for_sd$var <- sp_for_sd$sd ^ 2 sp_for_sd$log_m <- log(sp_for_sd$mass) sp_for_sd$log_var <- log(sp_for_sd$var) @@ -27,19 +36,21 @@ get_sd_parameters <- function(raw_size_data) { intercept <- exp(sd_fit$coefficients[[1]]) slope <- sd_fit$coefficient[[2]] - return(list( - intercept = intercept, - slope = slope - )) + return(list(intercept = intercept, + slope = slope)) } -#' Estimate a species' standard deviation of body mass based on its mean body mass +#' Estimate a species' standard deviation of body mass based on its mean body +#' mass #' -#' Using the parameters estimated in `get_sd_parameters`, estimate the standard deviation of body mass for a species based only on its mean body mass. +#' Using the parameters estimated in `get_sd_parameters`, estimate the standard +#' deviation of body mass for a species based only on its mean body mass. #' #' @param sp_mean mean body mass, in grams -#' @param pars list containing `$slope` and `$intercept`, generated by [get_sd_parameters()]. If not provided, estimated by running `get_sd_parameters(raw_masses)`. +#' @param pars list containing `$slope` and `$intercept`, generated by +#' [get_sd_parameters()]. If not provided, estimated by running +#' `get_sd_parameters(raw_masses)`. #' @keywords internal #' @return the estimated standard deviation of body mass #' @@ -49,48 +60,53 @@ species_estimate_sd <- function(sp_mean, pars = NULL) { pars <- get_sd_parameters(raw_masses) } - fitted_sd <- sqrt(pars$intercept * (sp_mean^pars$slope)) + fitted_sd <- sqrt(pars$intercept * (sp_mean ^ pars$slope)) return(fitted_sd) } #' Reconcile taxonomic updates between 2008 and 2019 #' -#' Six species have undergone name changes or other minor taxonomic rearrangements between 2008 and 2019, resulting in name mismatches between data from the Breeding Bird Survey (Paradieck et al. 2019) and the CRC Handbook (Dunning 2008). This function resolves those mismatches such that all species in the Breeding Bird Survey are associated with the appropriate size records from the CRC Handbook. +#' Six species have undergone name changes or other minor taxonomic +#' rearrangements between 2008 and 2019, resulting in name mismatches between +#' data from the Breeding Bird Survey (Paradieck et al. 2019) and the CRC +#' Handbook (Dunning 2008). This function resolves those mismatches such that +#' all species in the Breeding Bird Survey are associated with the appropriate +#' size records from the CRC Handbook. #' -#' @param raw_size_data dataframe of species' mean and standard deviation body sizes; use the included `raw_masses` data table. +#' @param raw_size_data dataframe of species' mean and standard deviation body +#' sizes; use the included `raw_masses` data table. #' -#' @return dataframe of species' mean and standard deviation body sizes, with name mismatches resolved. +#' @return dataframe of species' mean and standard deviation body sizes, with +#' name mismatches resolved. #' -#' @references \itemize{ -#' \item{Dunning, J. B. (2008). CRC handbook of avian body masses (2nd ed.). CRC Press.} -#' \item{Pardieck, K. L., Ziolkowski, D. J., Lutmerding, M., Aponte, V., & Hudson, M.-A. (2019). North American Breeding Bird Survey Dataset 1966—2018, version 2018.0. U.S. Geological Survey. https://doi.org/10.5066/P9HE8XYJ} -#' } +#' @references \itemize{ \item{Dunning, J. B. (2008). CRC handbook of avian body +#' masses (2nd ed.). CRC Press.} \item{Pardieck, K. L., Ziolkowski, D. J., +#' Lutmerding, M., Aponte, V., & Hudson, M.-A. (2019). North American Breeding +#' Bird Survey Dataset 1966—2018, version 2018.0. U.S. Geological Survey. +#' https://doi.org/10.5066/P9HE8XYJ} } #' #' @keywords internal clean_sp_size_data <- function(raw_size_data) { - cols_to_remove <- which(colnames(raw_size_data) %in% c("english_common_name", "sporder", "family")) - sp_clean <- raw_size_data[, -cols_to_remove] + cols_to_remove <- + which(colnames(raw_size_data) %in% c("english_common_name", "sporder", "family")) + sp_clean <- raw_size_data[,-cols_to_remove] sp_clean$mass <- as.numeric(sp_clean$mass) - name_change <- sp_clean[which(sp_clean$not_in_dunning == 1), ] + name_change <- sp_clean[which(sp_clean$not_in_dunning == 1),] - sp_clean <- sp_clean[which(is.na(sp_clean$not_in_dunning)), ] + sp_clean <- sp_clean[which(is.na(sp_clean$not_in_dunning)),] sp_clean$added_flag <- NA_integer_ for (i in seq_len(nrow(name_change))) { if (!is.na(name_change$close_subspecies[i])) { - matched_rows <- sp_clean[ - sp_clean$genus == name_change$close_genus[i] & - sp_clean$species == name_change$close_species[i] & - sp_clean$subspecies == name_change$close_subspecies[i], - ] + matched_rows <- sp_clean[sp_clean$genus == name_change$close_genus[i] & + sp_clean$species == name_change$close_species[i] & + sp_clean$subspecies == name_change$close_subspecies[i],] } else { - matched_rows <- sp_clean[ - sp_clean$genus == name_change$close_genus[i] & - sp_clean$species == name_change$close_species[i], - ] + matched_rows <- sp_clean[sp_clean$genus == name_change$close_genus[i] & + sp_clean$species == name_change$close_species[i],] } sp_to_add <- matched_rows @@ -106,12 +122,20 @@ clean_sp_size_data <- function(raw_size_data) { #' Estimate missing records for standard deviation based on mean of body size #' -#' Fill in missing records for the standard deviation of body mass for a species, based on its mean body size and the parameters estimated by the linear model fit by `get_sd_parameters`. +#' Fill in missing records for the standard deviation of body mass for a +#' species, based on its mean body size and the parameters estimated by the +#' linear model fit by `get_sd_parameters`. #' -#' @param clean_size_data dataframe of species' masses and standard deviations; as generated by `clean_sp_size_data` -#' @param sd_pars parameters as list of `$slope`, `$intercept`; as generated by `get_sd_parameters` +#' @param clean_size_data dataframe of species' masses and standard deviations; +#' as generated by `clean_sp_size_data` +#' @param sd_pars parameters as list of `$slope`, `$intercept`; as generated by +#' `get_sd_parameters` #' @keywords internal -#' @return a dataframe of species' `species_id` (which matches the AOU in the Breeding Bird Survey), `mass` mean body mass, `sd` standard deviation body mass, and a new column for `estimated_sd`, a `TRUE`/`FALSE` flag for whether the standard deviation has been estimated using the parameters provided in `sd_pars`. +#' @return a dataframe of species' `species_id` (which matches the AOU in the +#' Breeding Bird Survey), `mass` mean body mass, `sd` standard deviation body +#' mass, and a new column for `estimated_sd`, a `TRUE`/`FALSE` flag for +#' whether the standard deviation has been estimated using the parameters +#' provided in `sd_pars`. #' #' add_estimated_sds <- function(clean_size_data, sd_pars) { @@ -120,7 +144,8 @@ add_estimated_sds <- function(clean_size_data, sd_pars) { for (i in seq_len(nrow(clean_size_data))) { if (is.na(clean_size_data$sd[i])) { clean_size_data$estimated_sd[i] <- TRUE - clean_size_data$sd[i] <- species_estimate_sd(clean_size_data$mass[i], pars = sd_pars) + clean_size_data$sd[i] <- + species_estimate_sd(clean_size_data$mass[i], pars = sd_pars) } else { clean_size_data$estimated_sd[i] <- FALSE } @@ -129,17 +154,23 @@ add_estimated_sds <- function(clean_size_data, sd_pars) { return(clean_size_data) } -#' Summarize records of mean and standard deviation of body mass to species-level means +#' Summarize records of mean and standard deviation of body mass to +#' species-level means #' -#' The CRC Handbook (Dunning 2008) often contains multiple records for mean body mass (and standard deviation of body mass) for a species, drawn from different locations or for different sexes. This function summarizes across all records for each species to produce species-level means for the mean and standard deviation of body mass. +#' The CRC Handbook (Dunning 2008) often contains multiple records for mean body +#' mass (and standard deviation of body mass) for a species, drawn from +#' different locations or for different sexes. This function summarizes across +#' all records for each species to produce species-level means for the mean and +#' standard deviation of body mass. #' -#' @param sd_dat dataframe of mean and standard deviation of body mass for all records for all species; generated by `add_estimated_sds` +#' @param sd_dat dataframe of mean and standard deviation of body mass for all +#' records for all species; generated by `add_estimated_sds` #' -#' @return `sd_dat` summarized to species-level means for the mean and standard deviation of body mass +#' @return `sd_dat` summarized to species-level means for the mean and standard +#' deviation of body mass #' @keywords internal -#' @references \itemize{ -#' \item{Dunning, J. B. (2008). CRC handbook of avian body masses (2nd ed.). CRC Press.} -#' } +#' @references \itemize{ \item{Dunning, J. B. (2008). CRC handbook of avian body +#' masses (2nd ed.). CRC Press.} } #' #' get_sp_mean_size <- function(sd_dat) { @@ -153,34 +184,48 @@ get_sp_mean_size <- function(sd_dat) { for (i in seq_len(nrow(unique_combinations))) { - this_combination <- sd_dat[which(sd_dat$AOU == unique_combinations$AOU[i] & - sd_dat$genus == unique_combinations$genus[i] & - sd_dat$species == unique_combinations$species[i]), ] + this_combination <- + sd_dat[which( + sd_dat$AOU == unique_combinations$AOU[i] & + sd_dat$genus == unique_combinations$genus[i] & + sd_dat$species == unique_combinations$species[i] + ),] unique_combinations$mean_mass[i] <- mean(this_combination$mass) - unique_combinations$mean_sd[i] <- mean(this_combination$sd, na.rm = FALSE) - unique_combinations$contains_estimates[i] <- any(this_combination$estimated_sd) + unique_combinations$mean_sd[i] <- + mean(this_combination$sd, na.rm = FALSE) + unique_combinations$contains_estimates[i] <- + any(this_combination$estimated_sd) } - sp_means <- unique_combinations[!is.na(unique_combinations$AOU), ] - sp_means <- sp_means[order(sp_means$AOU), ] + sp_means <- unique_combinations[!is.na(unique_combinations$AOU),] + sp_means <- sp_means[order(sp_means$AOU),] sp_means } -#' Generate table of species-level means for the mean and standard deviation of body mass for species in the Breeding Bird Survey +#' Generate table of species-level means for the mean and standard deviation of +#' body mass for species in the Breeding Bird Survey #' -#' Goes from the `raw_masses` dataframe (included in `bbssize`) of records of species' mean and (where provided) standard deviation of body mass from the CRC Handbook (Dunning 2008) to a table of species-level means for the mean and standard deviation of body mass, incorporating estimates for missing standard deviation records and resolving taxonomic updates between the publication of the CRC Handbook and present releases of the Breeding Bird Survey dataset (Paradieck et al. 2019). +#' Goes from the `raw_masses` dataframe (included in `bbssize`) of records of +#' species' mean and (where provided) standard deviation of body mass from the +#' CRC Handbook (Dunning 2008) to a table of species-level means for the mean +#' and standard deviation of body mass, incorporating estimates for missing +#' standard deviation records and resolving taxonomic updates between the +#' publication of the CRC Handbook and present releases of the Breeding Bird +#' Survey dataset (Paradieck et al. 2019). #' #' @param raw_size_data the `raw_masses` dataframe #' -#' @references \itemize{ -#' \item{Dunning, J. B. (2008). CRC handbook of avian body masses (2nd ed.). CRC Press.} -#' \item{Pardieck, K. L., Ziolkowski, D. J., Lutmerding, M., Aponte, V., & Hudson, M.-A. (2019). North American Breeding Bird Survey Dataset 1966—2018, version 2018.0. U.S. Geological Survey. https://doi.org/10.5066/P9HE8XYJ} -#' } +#' @references \itemize{ \item{Dunning, J. B. (2008). CRC handbook of avian body +#' masses (2nd ed.). CRC Press.} \item{Pardieck, K. L., Ziolkowski, D. J., +#' Lutmerding, M., Aponte, V., & Hudson, M.-A. (2019). North American Breeding +#' Bird Survey Dataset 1966—2018, version 2018.0. U.S. Geological Survey. +#' https://doi.org/10.5066/P9HE8XYJ} } #' -#' @return a dataframe of species-level means for mean body size and standard deviation of body size +#' @return a dataframe of species-level means for mean body size and standard +#' deviation of body size #' @keywords internal #' generate_sd_table <- function(raw_size_data) { @@ -191,14 +236,13 @@ generate_sd_table <- function(raw_size_data) { clean_size_dat <- clean_sp_size_data(raw_size_data) # Add estimates for missing standard deviation records - sd_size_dat <- add_estimated_sds( - clean_size_data = clean_size_dat, - sd_pars = fitted_pars - ) + sd_size_dat <- add_estimated_sds(clean_size_data = clean_size_dat, + sd_pars = fitted_pars) # Summarize to species-level means for the mean and standard deviation of body mass sp_mean_size_dat <- get_sp_mean_size(sd_size_dat) - sp_mean_size_dat$scientific_name <- paste(sp_mean_size_dat$genus, sp_mean_size_dat$species, sep = " ") + sp_mean_size_dat$scientific_name <- + paste(sp_mean_size_dat$genus, sp_mean_size_dat$species, sep = " ") sp_mean_size_dat } diff --git a/R/species_define.R b/R/species_define.R index 9d728c7..38fd021 100644 --- a/R/species_define.R +++ b/R/species_define.R @@ -1,14 +1,20 @@ #' Define a species #' -#' Creates a list with taxonomic/identifying information and parameters for mean and standard deviation of body mass. +#' Creates a list with taxonomic/identifying information and parameters for mean +#' and standard deviation of body mass. #' -#' The identifying information used depends on which parameters are provided, with the following order of preference: AOU > scientific name > user provided mean and sd > user provided mean and estimated sd. +#' The identifying information used depends on which parameters are provided, +#' with the following order of preference: AOU > scientific name > user provided +#' mean and sd > user provided mean and estimated sd. #' -#' @param AOU the numeric AOU code used for this species in the Breeding Bird Survey +#' @param AOU the numeric AOU code used for this species in the Breeding Bird +#' Survey #' @param scientific_name the species' scientific name, as "Genus species" #' @param mean_size mean body size #' @param sd_size sd of body size -#' @param sim_species_id identifier; if using taxonomic info, defaults to AOU. If not, defaults to 1. Supplying other values can be useful for simulation models. +#' @param sim_species_id identifier; if using taxonomic info, defaults to AOU. +#' If not, defaults to 1. Supplying other values can be useful for simulation +#' models. #' #' @return list with species parameter information #' @export @@ -17,87 +23,133 @@ #' species_define(scientific_name = "Perdix perdix") #' species_define(mean_size = 400, sd_size = 30) #' species_define(mean_size = 400) -species_define <- function(AOU = NA_integer_, scientific_name = NA_character_, mean_size = NA_real_, sd_size = NA_real_, sim_species_id = 1) { - if (!is.na(AOU)) { - # use AOU to get mean, sd, genus, and species - spPars <- species_lookup(AOU = AOU) - thisSpecies <- list(AOU = AOU, scientific_name = spPars$scientific_name, mean_size = spPars$mean_mass, sd_size = spPars$mean_sd, sd_method = "AOU lookup", sim_species_id = AOU) +species_define <- + function(AOU = NA_integer_, + scientific_name = NA_character_, + mean_size = NA_real_, + sd_size = NA_real_, + sim_species_id = 1) { + if (!is.na(AOU)) { + # use AOU to get mean, sd, genus, and species + spPars <- species_lookup(AOU = AOU) + thisSpecies <- + list( + AOU = AOU, + scientific_name = spPars$scientific_name, + mean_size = spPars$mean_mass, + sd_size = spPars$mean_sd, + sd_method = "AOU lookup", + sim_species_id = AOU + ) - # Check that any user-supplied taxonomic info matches the AOU provided - if (!is.na(scientific_name)) { - if (scientific_name != thisSpecies$scientific_name) { - warning("User-provided scientific name does not match scientific name associated with this AOU") + # Check that any user-supplied taxonomic info matches the AOU provided + if (!is.na(scientific_name)) { + if (scientific_name != thisSpecies$scientific_name) { + warning( + "User-provided scientific name does not match scientific name associated with this AOU" + ) + } } + + return(thisSpecies) } - return(thisSpecies) - } + # If AOU is not provided (implicit in order) try scientific name - # If AOU is not provided (implicit in order) try scientific name + if (all(!is.na(scientific_name))) { + spPars <- species_lookup(scientific_name = scientific_name) + thisSpecies <- + list( + AOU = spPars$AOU, + scientific_name = spPars$scientific_name, + mean_size = spPars$mean_mass, + sd_size = spPars$mean_sd, + sd_method = "Scientific name lookup", + sim_species_id = spPars$AOU + ) + return(thisSpecies) + } - if (all(!is.na(scientific_name))) { - spPars <- species_lookup(scientific_name = scientific_name) - thisSpecies <- list(AOU = spPars$AOU, scientific_name = spPars$scientific_name, mean_size = spPars$mean_mass, sd_size = spPars$mean_sd, sd_method = "Scientific name lookup", sim_species_id = spPars$AOU) - return(thisSpecies) - } + # If neither of AOU or scientific name is provided (implicit in order) + if (!is.na(mean_size)) { + if (!is.na(sd_size)) { + thisSpecies <- + list( + AOU = NA_integer_, + scientific_name = NA_character_, + mean_size = mean_size, + sd_size = sd_size, + sd_method = "Mean and SD provided", + sim_species_id = sim_species_id + ) + return(thisSpecies) + } - # If neither of AOU or scientific name is provided (implicit in order) - if (!is.na(mean_size)) { - if (!is.na(sd_size)) { - thisSpecies <- list(AOU = NA_integer_, scientific_name = NA_character_, mean_size = mean_size, sd_size = sd_size, sd_method = "Mean and SD provided", sim_species_id = sim_species_id) + this_sd <- species_estimate_sd(mean_size) + thisSpecies <- + list( + AOU = NA_integer_, + scientific_name = NA_character_, + mean_size = mean_size, + sd_size = this_sd, + sd_method = "SD estimated from mean", + sim_species_id = sim_species_id + ) return(thisSpecies) } - this_sd <- species_estimate_sd(mean_size) - thisSpecies <- list(AOU = NA_integer_, scientific_name = NA_character_, mean_size = mean_size, sd_size = this_sd, sd_method = "SD estimated from mean", sim_species_id = sim_species_id) - return(thisSpecies) - } + # If insufficient information is provided, throw an error - # If insufficient information is provided, throw an error - - stop("At least one of: AOU, scientific_name, or mean_size must be provided!") -} + stop("At least one of: AOU, scientific_name, or mean_size must be provided!") + } #' Species lookup #' -#' Given either AOU or scientific name, looks up a species' taxonomic information and mean and standard deviation of body size in [sd_table]. +#' Given either AOU or scientific name, looks up a species' taxonomic +#' information and mean and standard deviation of body size in [sd_table]. #' -#' @param AOU the numeric AOU code used for this species in the Breeding Bird Survey +#' @param AOU the numeric AOU code used for this species in the Breeding Bird +#' Survey #' @param scientific_name the species' scientific name, as "Genus species" #' -#' @return data frame with columns AOU, genus, species, mean_mass, mean_sd, contains_estimates, scientific_name +#' @return data frame with columns AOU, genus, species, mean_mass, mean_sd, +#' contains_estimates, scientific_name #' @export #' #' @examples #' species_lookup(AOU = 2881) #' species_lookup(scientific_name = "Selasphorus calliope") -species_lookup <- function(AOU = NA_integer_, scientific_name = NA_character_) { - provided_AOU <- AOU +species_lookup <- + function(AOU = NA_integer_, + scientific_name = NA_character_) { + provided_AOU <- AOU - if (!is.na(provided_AOU)) { - if (!(provided_AOU %in% sd_table$AOU)) { - stop("`AOU` is invalid.") - } + if (!is.na(provided_AOU)) { + if (!(provided_AOU %in% sd_table$AOU)) { + stop("`AOU` is invalid.") + } - return(sd_table[sd_table$AOU == provided_AOU, ]) - } else if (is.character(scientific_name)) { - proper_scientific_name <- tolower(scientific_name) - substr(proper_scientific_name, 1, 1) <- toupper(substr(proper_scientific_name, 1, 1)) + return(sd_table[sd_table$AOU == provided_AOU,]) + } else if (is.character(scientific_name)) { + proper_scientific_name <- tolower(scientific_name) + substr(proper_scientific_name, 1, 1) <- + toupper(substr(proper_scientific_name, 1, 1)) - sp_pars <- sd_table[sd_table$scientific_name == proper_scientific_name, ] + sp_pars <- + sd_table[sd_table$scientific_name == proper_scientific_name,] - if (nrow(sp_pars) > 1) { - sp_pars <- sp_pars[1, ] - } + if (nrow(sp_pars) > 1) { + sp_pars <- sp_pars[1,] + } - valid_name <- nrow(sp_pars) == 1 + valid_name <- nrow(sp_pars) == 1 - if (valid_name) { - return(sp_pars) + if (valid_name) { + return(sp_pars) + } else { + stop("Scientific name is invalid.") + } } else { - stop("Scientific name is invalid.") + stop("Either `AOU` or a valid scientific name must be provided.") } - } else { - stop("Either `AOU` or a valid scientific name must be provided.") } -} diff --git a/man/add_estimated_sds.Rd b/man/add_estimated_sds.Rd index d725cd7..9e4e722 100644 --- a/man/add_estimated_sds.Rd +++ b/man/add_estimated_sds.Rd @@ -7,14 +7,22 @@ add_estimated_sds(clean_size_data, sd_pars) } \arguments{ -\item{clean_size_data}{dataframe of species' masses and standard deviations; as generated by \code{clean_sp_size_data}} +\item{clean_size_data}{dataframe of species' masses and standard deviations; +as generated by \code{clean_sp_size_data}} -\item{sd_pars}{parameters as list of \verb{$slope}, \verb{$intercept}; as generated by \code{get_sd_parameters}} +\item{sd_pars}{parameters as list of \verb{$slope}, \verb{$intercept}; as generated by +\code{get_sd_parameters}} } \value{ -a dataframe of species' \code{species_id} (which matches the AOU in the Breeding Bird Survey), \code{mass} mean body mass, \code{sd} standard deviation body mass, and a new column for \code{estimated_sd}, a \code{TRUE}/\code{FALSE} flag for whether the standard deviation has been estimated using the parameters provided in \code{sd_pars}. +a dataframe of species' \code{species_id} (which matches the AOU in the +Breeding Bird Survey), \code{mass} mean body mass, \code{sd} standard deviation body +mass, and a new column for \code{estimated_sd}, a \code{TRUE}/\code{FALSE} flag for +whether the standard deviation has been estimated using the parameters +provided in \code{sd_pars}. } \description{ -Fill in missing records for the standard deviation of body mass for a species, based on its mean body size and the parameters estimated by the linear model fit by \code{get_sd_parameters}. +Fill in missing records for the standard deviation of body mass for a +species, based on its mean body size and the parameters estimated by the +linear model fit by \code{get_sd_parameters}. } \keyword{internal} diff --git a/man/clean_sp_size_data.Rd b/man/clean_sp_size_data.Rd index d48ec2e..bae4ab3 100644 --- a/man/clean_sp_size_data.Rd +++ b/man/clean_sp_size_data.Rd @@ -7,18 +7,26 @@ clean_sp_size_data(raw_size_data) } \arguments{ -\item{raw_size_data}{dataframe of species' mean and standard deviation body sizes; use the included \code{raw_masses} data table.} +\item{raw_size_data}{dataframe of species' mean and standard deviation body +sizes; use the included \code{raw_masses} data table.} } \value{ -dataframe of species' mean and standard deviation body sizes, with name mismatches resolved. +dataframe of species' mean and standard deviation body sizes, with +name mismatches resolved. } \description{ -Six species have undergone name changes or other minor taxonomic rearrangements between 2008 and 2019, resulting in name mismatches between data from the Breeding Bird Survey (Paradieck et al. 2019) and the CRC Handbook (Dunning 2008). This function resolves those mismatches such that all species in the Breeding Bird Survey are associated with the appropriate size records from the CRC Handbook. +Six species have undergone name changes or other minor taxonomic +rearrangements between 2008 and 2019, resulting in name mismatches between +data from the Breeding Bird Survey (Paradieck et al. 2019) and the CRC +Handbook (Dunning 2008). This function resolves those mismatches such that +all species in the Breeding Bird Survey are associated with the appropriate +size records from the CRC Handbook. } \references{ -\itemize{ -\item{Dunning, J. B. (2008). CRC handbook of avian body masses (2nd ed.). CRC Press.} -\item{Pardieck, K. L., Ziolkowski, D. J., Lutmerding, M., Aponte, V., & Hudson, M.-A. (2019). North American Breeding Bird Survey Dataset 1966—2018, version 2018.0. U.S. Geological Survey. https://doi.org/10.5066/P9HE8XYJ} -} +\itemize{ \item{Dunning, J. B. (2008). CRC handbook of avian body +masses (2nd ed.). CRC Press.} \item{Pardieck, K. L., Ziolkowski, D. J., +Lutmerding, M., Aponte, V., & Hudson, M.-A. (2019). North American Breeding +Bird Survey Dataset 1966—2018, version 2018.0. U.S. Geological Survey. +https://doi.org/10.5066/P9HE8XYJ} } } \keyword{internal} diff --git a/man/community_generate.Rd b/man/community_generate.Rd index 9577bec..1cdb167 100644 --- a/man/community_generate.Rd +++ b/man/community_generate.Rd @@ -10,12 +10,15 @@ community_generate( ) } \arguments{ -\item{community_data_table}{dataframe containing at least one of \code{AOU}, \code{scientific_name}, or \code{mean_size} and a column for species abundances} +\item{community_data_table}{dataframe containing at least one of \code{AOU}, +\code{scientific_name}, or \code{mean_size} and a column for species abundances} -\item{abundance_column_name}{character, the name of the column with species abundances. Defaults to "speciestotal".} +\item{abundance_column_name}{character, the name of the column with species +abundances. Defaults to "speciestotal".} } \value{ -a dataframe one row per individual, all columns from \code{community_data_table}, and additional columns for species attributes. +a dataframe one row per individual, all columns from +\code{community_data_table}, and additional columns for species attributes. Specifically: \itemize{ @@ -33,7 +36,9 @@ Specifically: } } \description{ -For a community (i.e. a collection of populations of different species, or of the same species at different points in time or locations, etc), simulate individual-level size and metabolic rate measurements. +For a community (i.e. a collection of populations of different species, or of +the same species at different points in time or locations, etc), simulate +individual-level size and metabolic rate measurements. } \examples{ diff --git a/man/filter_bbs_survey.Rd b/man/filter_bbs_survey.Rd index ba1b557..76db21f 100644 --- a/man/filter_bbs_survey.Rd +++ b/man/filter_bbs_survey.Rd @@ -10,10 +10,13 @@ filter_bbs_survey(bbs_survey_data) \item{bbs_survey_data}{data frame with columns for species and AOU} } \value{ -bbs_survey_data with unidentified species, nightbirds, waterbirds, non-targets removed +bbs_survey_data with unidentified species, nightbirds, waterbirds, +non-targets removed } \description{ -The raw data for the Breeding Bird Survey includes unidentified species and some species that are not well-sampled by the BBS methods. This function filters a dataframe to remove those species. +The raw data for the Breeding Bird Survey includes unidentified species and +some species that are not well-sampled by the BBS methods. This function +filters a dataframe to remove those species. } \examples{ head(filter_bbs_survey(demo_route_raw)) diff --git a/man/generate_sd_table.Rd b/man/generate_sd_table.Rd index f2a50ab..9d2bb3b 100644 --- a/man/generate_sd_table.Rd +++ b/man/generate_sd_table.Rd @@ -2,7 +2,8 @@ % Please edit documentation in R/species_data_functions.R \name{generate_sd_table} \alias{generate_sd_table} -\title{Generate table of species-level means for the mean and standard deviation of body mass for species in the Breeding Bird Survey} +\title{Generate table of species-level means for the mean and standard deviation of +body mass for species in the Breeding Bird Survey} \usage{ generate_sd_table(raw_size_data) } @@ -10,15 +11,23 @@ generate_sd_table(raw_size_data) \item{raw_size_data}{the \code{raw_masses} dataframe} } \value{ -a dataframe of species-level means for mean body size and standard deviation of body size +a dataframe of species-level means for mean body size and standard +deviation of body size } \description{ -Goes from the \code{raw_masses} dataframe (included in \code{bbssize}) of records of species' mean and (where provided) standard deviation of body mass from the CRC Handbook (Dunning 2008) to a table of species-level means for the mean and standard deviation of body mass, incorporating estimates for missing standard deviation records and resolving taxonomic updates between the publication of the CRC Handbook and present releases of the Breeding Bird Survey dataset (Paradieck et al. 2019). +Goes from the \code{raw_masses} dataframe (included in \code{bbssize}) of records of +species' mean and (where provided) standard deviation of body mass from the +CRC Handbook (Dunning 2008) to a table of species-level means for the mean +and standard deviation of body mass, incorporating estimates for missing +standard deviation records and resolving taxonomic updates between the +publication of the CRC Handbook and present releases of the Breeding Bird +Survey dataset (Paradieck et al. 2019). } \references{ -\itemize{ -\item{Dunning, J. B. (2008). CRC handbook of avian body masses (2nd ed.). CRC Press.} -\item{Pardieck, K. L., Ziolkowski, D. J., Lutmerding, M., Aponte, V., & Hudson, M.-A. (2019). North American Breeding Bird Survey Dataset 1966—2018, version 2018.0. U.S. Geological Survey. https://doi.org/10.5066/P9HE8XYJ} -} +\itemize{ \item{Dunning, J. B. (2008). CRC handbook of avian body +masses (2nd ed.). CRC Press.} \item{Pardieck, K. L., Ziolkowski, D. J., +Lutmerding, M., Aponte, V., & Hudson, M.-A. (2019). North American Breeding +Bird Survey Dataset 1966—2018, version 2018.0. U.S. Geological Survey. +https://doi.org/10.5066/P9HE8XYJ} } } \keyword{internal} diff --git a/man/get_sd_parameters.Rd b/man/get_sd_parameters.Rd index 38e3670..77c6b22 100644 --- a/man/get_sd_parameters.Rd +++ b/man/get_sd_parameters.Rd @@ -7,17 +7,26 @@ get_sd_parameters(raw_size_data) } \arguments{ -\item{raw_size_data}{dataframe of species' mean and standard deviation body sizes; use the included \code{raw_masses} data table.} +\item{raw_size_data}{dataframe of species' mean and standard deviation body +sizes; use the included \code{raw_masses} data table.} } \value{ list of \verb{$slope} and \verb{$intercept} from the linear model fit } \description{ -Calculates parameters for a (log-log linear) scaling relationship between the mean and standard deviation of a species' mean body size. Given a table of species with known mean and standard deviation body sizes, fits a linear model of the form \code{log(var(body_size)) ~ log(mean(body_size))} and extracts parameter estimates, which can then be used to estimate the standard deviation of body mass for a species based only on its mean body mass. See also Thibault et al. (2011) for this method applied to the Breeding Bird Survey. +Calculates parameters for a (log-log linear) scaling relationship between the +mean and standard deviation of a species' mean body size. Given a table of +species with known mean and standard deviation body sizes, fits a linear +model of the form \code{log(var(body_size)) ~ log(mean(body_size))} and extracts +parameter estimates, which can then be used to estimate the standard +deviation of body mass for a species based only on its mean body mass. See +also Thibault et al. (2011) for this method applied to the Breeding Bird +Survey. } \references{ -\itemize{ -\item{Thibault, K. M., White, E. P., Hurlbert, A. H., & Ernest, S. K. M. (2011). Multimodality in the individual size distributions of bird communities. Global Ecology and Biogeography, 20(1), 145–153. https://doi.org/10.1111/j.1466-8238.2010.00576.x} -} +\itemize{ \item{Thibault, K. M., White, E. P., Hurlbert, A. H., & +Ernest, S. K. M. (2011). Multimodality in the individual size distributions +of bird communities. Global Ecology and Biogeography, 20(1), 145–153. +https://doi.org/10.1111/j.1466-8238.2010.00576.x} } } \keyword{internal} diff --git a/man/get_sp_mean_size.Rd b/man/get_sp_mean_size.Rd index e4f016b..59b2b07 100644 --- a/man/get_sp_mean_size.Rd +++ b/man/get_sp_mean_size.Rd @@ -2,22 +2,28 @@ % Please edit documentation in R/species_data_functions.R \name{get_sp_mean_size} \alias{get_sp_mean_size} -\title{Summarize records of mean and standard deviation of body mass to species-level means} +\title{Summarize records of mean and standard deviation of body mass to +species-level means} \usage{ get_sp_mean_size(sd_dat) } \arguments{ -\item{sd_dat}{dataframe of mean and standard deviation of body mass for all records for all species; generated by \code{add_estimated_sds}} +\item{sd_dat}{dataframe of mean and standard deviation of body mass for all +records for all species; generated by \code{add_estimated_sds}} } \value{ -\code{sd_dat} summarized to species-level means for the mean and standard deviation of body mass +\code{sd_dat} summarized to species-level means for the mean and standard +deviation of body mass } \description{ -The CRC Handbook (Dunning 2008) often contains multiple records for mean body mass (and standard deviation of body mass) for a species, drawn from different locations or for different sexes. This function summarizes across all records for each species to produce species-level means for the mean and standard deviation of body mass. +The CRC Handbook (Dunning 2008) often contains multiple records for mean body +mass (and standard deviation of body mass) for a species, drawn from +different locations or for different sexes. This function summarizes across +all records for each species to produce species-level means for the mean and +standard deviation of body mass. } \references{ -\itemize{ -\item{Dunning, J. B. (2008). CRC handbook of avian body masses (2nd ed.). CRC Press.} -} +\itemize{ \item{Dunning, J. B. (2008). CRC handbook of avian body +masses (2nd ed.). CRC Press.} } } \keyword{internal} diff --git a/man/ind_draw.Rd b/man/ind_draw.Rd index 37f1d60..b1f5d4b 100644 --- a/man/ind_draw.Rd +++ b/man/ind_draw.Rd @@ -21,6 +21,7 @@ ind_draw( vector of individuals' simulated body masses } \description{ -This is not a user-facing function; it is the random number generator under-the-hood for \link{pop_generate}. +This is not a user-facing function; it is the random number generator +under-the-hood for \link{pop_generate}. } \keyword{internal} diff --git a/man/individual_metabolic_rate.Rd b/man/individual_metabolic_rate.Rd index a7f3f9b..f8217c1 100644 --- a/man/individual_metabolic_rate.Rd +++ b/man/individual_metabolic_rate.Rd @@ -13,13 +13,14 @@ individual_metabolic_rate(mass) estimated basal metabolic rate } \description{ -Given an individual's body mass (in grams), use allometric scaling (Fristoe 2015) to estimate basal metabolic rate. +Given an individual's body mass (in grams), use allometric scaling (Fristoe +2015) to estimate basal metabolic rate. } \examples{ individual_metabolic_rate(10) } \references{ -\itemize{ -\item{Fristoe, T. S. (2015). Energy use by migrants and residents in North American breeding bird communities. Global Ecology and Biogeography, 24(4), 406–415. https://doi.org/10.1111/geb.12262} -} +\itemize{ \item{Fristoe, T. S. (2015). Energy use by migrants and +residents in North American breeding bird communities. Global Ecology and +Biogeography, 24(4), 406–415. https://doi.org/10.1111/geb.12262} } } diff --git a/man/pop_generate.Rd b/man/pop_generate.Rd index b63b72d..1f3e427 100644 --- a/man/pop_generate.Rd +++ b/man/pop_generate.Rd @@ -16,7 +16,8 @@ pop_generate( \arguments{ \item{abundance}{integer number of individuals to draw. \emph{Required}.} -\item{AOU}{the numeric AOU code used for this species in the Breeding Bird Survey} +\item{AOU}{the numeric AOU code used for this species in the Breeding Bird +Survey} \item{scientific_name}{as "Genus species"} @@ -27,7 +28,8 @@ pop_generate( \item{sim_species_id}{defaults AOU or 1} } \value{ -a dataframe with \code{abundance} rows - one record per individual - and columns for species attributes. +a dataframe with \code{abundance} rows - one record per individual - and +columns for species attributes. Specifically: \itemize{ @@ -43,10 +45,13 @@ Specifically: } } \description{ -Draws body mass measurements for a population of birds (of all the same species) given the population size and either (1) the species AOU or (2) the mean and potentially standard deviation of body mass for that species. +Draws body mass measurements for a population of birds (of all the same +species) given the population size and either (1) the species AOU or (2) the +mean and potentially standard deviation of body mass for that species. } \details{ -\code{abundance} is required, as well as \emph{one of}: \code{AOU}, \code{scientific_name}, or \code{mean_size}. +\code{abundance} is required, as well as \emph{one of}: \code{AOU}, \code{scientific_name}, or +\code{mean_size}. } \examples{ diff --git a/man/species_define.Rd b/man/species_define.Rd index 7075f4f..0f83c7e 100644 --- a/man/species_define.Rd +++ b/man/species_define.Rd @@ -13,7 +13,8 @@ species_define( ) } \arguments{ -\item{AOU}{the numeric AOU code used for this species in the Breeding Bird Survey} +\item{AOU}{the numeric AOU code used for this species in the Breeding Bird +Survey} \item{scientific_name}{the species' scientific name, as "Genus species"} @@ -21,16 +22,21 @@ species_define( \item{sd_size}{sd of body size} -\item{sim_species_id}{identifier; if using taxonomic info, defaults to AOU. If not, defaults to 1. Supplying other values can be useful for simulation models.} +\item{sim_species_id}{identifier; if using taxonomic info, defaults to AOU. +If not, defaults to 1. Supplying other values can be useful for simulation +models.} } \value{ list with species parameter information } \description{ -Creates a list with taxonomic/identifying information and parameters for mean and standard deviation of body mass. +Creates a list with taxonomic/identifying information and parameters for mean +and standard deviation of body mass. } \details{ -The identifying information used depends on which parameters are provided, with the following order of preference: AOU > scientific name > user provided mean and sd > user provided mean and estimated sd. +The identifying information used depends on which parameters are provided, +with the following order of preference: AOU > scientific name > user provided +mean and sd > user provided mean and estimated sd. } \examples{ species_define(AOU = 2881) diff --git a/man/species_estimate_sd.Rd b/man/species_estimate_sd.Rd index a33a060..a624b44 100644 --- a/man/species_estimate_sd.Rd +++ b/man/species_estimate_sd.Rd @@ -2,19 +2,23 @@ % Please edit documentation in R/species_data_functions.R \name{species_estimate_sd} \alias{species_estimate_sd} -\title{Estimate a species' standard deviation of body mass based on its mean body mass} +\title{Estimate a species' standard deviation of body mass based on its mean body +mass} \usage{ species_estimate_sd(sp_mean, pars = NULL) } \arguments{ \item{sp_mean}{mean body mass, in grams} -\item{pars}{list containing \verb{$slope} and \verb{$intercept}, generated by \code{\link[=get_sd_parameters]{get_sd_parameters()}}. If not provided, estimated by running \code{get_sd_parameters(raw_masses)}.} +\item{pars}{list containing \verb{$slope} and \verb{$intercept}, generated by +\code{\link[=get_sd_parameters]{get_sd_parameters()}}. If not provided, estimated by running +\code{get_sd_parameters(raw_masses)}.} } \value{ the estimated standard deviation of body mass } \description{ -Using the parameters estimated in \code{get_sd_parameters}, estimate the standard deviation of body mass for a species based only on its mean body mass. +Using the parameters estimated in \code{get_sd_parameters}, estimate the standard +deviation of body mass for a species based only on its mean body mass. } \keyword{internal} diff --git a/man/species_lookup.Rd b/man/species_lookup.Rd index fbf843b..8867899 100644 --- a/man/species_lookup.Rd +++ b/man/species_lookup.Rd @@ -7,15 +7,18 @@ species_lookup(AOU = NA_integer_, scientific_name = NA_character_) } \arguments{ -\item{AOU}{the numeric AOU code used for this species in the Breeding Bird Survey} +\item{AOU}{the numeric AOU code used for this species in the Breeding Bird +Survey} \item{scientific_name}{the species' scientific name, as "Genus species"} } \value{ -data frame with columns AOU, genus, species, mean_mass, mean_sd, contains_estimates, scientific_name +data frame with columns AOU, genus, species, mean_mass, mean_sd, +contains_estimates, scientific_name } \description{ -Given either AOU or scientific name, looks up a species' taxonomic information and mean and standard deviation of body size in \link{sd_table}. +Given either AOU or scientific name, looks up a species' taxonomic +information and mean and standard deviation of body size in \link{sd_table}. } \examples{ species_lookup(AOU = 2881) diff --git a/man/toy_size_community.Rd b/man/toy_size_community.Rd index 4fe6737..4354a13 100644 --- a/man/toy_size_community.Rd +++ b/man/toy_size_community.Rd @@ -16,6 +16,7 @@ A data frame with 5 rows and 3 variables: toy_size_community } \description{ -This data table is a toy data frame for the vignettes. It has abundances and mean body sizes for 5 species to make up a hypothetical community. +This data table is a toy data frame for the vignettes. It has abundances and +mean body sizes for 5 species to make up a hypothetical community. } \keyword{datasets} diff --git a/man/toy_species_name_community.Rd b/man/toy_species_name_community.Rd index 2e39d82..ba92b25 100644 --- a/man/toy_species_name_community.Rd +++ b/man/toy_species_name_community.Rd @@ -15,6 +15,7 @@ A data frame with 5 rows and 2 variables: toy_species_name_community } \description{ -This data table is a toy data frame for the vignettes. It has abundances and scientific names for 5 species to make up a hypothetical community. +This data table is a toy data frame for the vignettes. It has abundances and +scientific names for 5 species to make up a hypothetical community. } \keyword{datasets} diff --git a/review_edits.md b/review_edits.md index 55c971a..3b1a0b1 100644 --- a/review_edits.md +++ b/review_edits.md @@ -331,15 +331,26 @@ I've also added sections right at the beginning of both the README and Getting S # From editor: -* I'd recommend running desc::desc_normalize(): it will order DESCRIPTION fields in a standard way and order dependencies alphabetically. _Done!_ -* You can remove the {remotes} installation instructions and keep the {devtools} ones only as {devtools} calls {remotes} (might call {pak} in the future?) and is the interface for users. _Done!_ -* It'd be nice to add grouping to the reference index https://pkgdown.r-lib.org/reference/build_reference.html#reference-index Given your package's naming scheme, you might want to use the starts_with() helper. _I haven't done this because there are so few functions, it seems to me like it verges on redundant. I'm happy to revisit this!_ -* In the test filenames, why use numbers? If the R files and test files have the same basename, it's easier to navigate between the two in RStudio IDE. https://r-pkgs.org/testing-basics.html#test-organisation _Ah, I learned to use the numbers so that tests fail informatively in order. The tests also don't - all - correspond directly to RScripts. Is this a major barrier? If so, I'm happy to revisit!_ -* In your test files and scripts I see a lot of vertical space, more than one empty lines between some elements. I'd recommend using one empty line only as it increases the amount of code one can see at a time on the screen. It's still nice to organize the code in paragraphs, I am not suggesting to remove all empty lines. 😁 _I think this has been largely addressed via the re-styling I've done, but please let me know if the issue peresists!_ -* In the README instead of "Package summary" as a header you could use the same text as this issue "birdsize: Estimate avian body size distributions" which is more informative. _Done!_ -* Regarding data yes it might make sense to contact the author of the included dataset? _Done! I've reached out and Dr. Dunning agreed to be listed as a contributor. I've sent a more recent-follow up with the revisions and giving him the opportunity to confirm, and haven't heard back yet._ -* For comments such as https://github.com/diazrenata/birdsize/blob/7469df457989a9016ecc3761b0dd125497a3be51/R/simulate_community.R#L51 if you add 4 hyphens afterwards ----, the comment will appear in the script outline on the right in RStudio IDE (if you use that IDE), helping code navigation. https://blog.r-hub.io/2023/01/26/code-comments-self-explaining-code/#use-comments-for-the-scripts-outline _I've added these to the long community_generate script_. -* why have this "trivial" test file? https://github.com/diazrenata/birdsize/blob/main/tests/testthat/test-01_trivial.R _I use this to test that CI is working, but it's not needed now the package is built. Deleted!_ -* in test code you don't need to write birdsize::: as testthat loads your package code. Example https://github.com/diazrenata/birdsize/blob/7469df457989a9016ecc3761b0dd125497a3be51/tests/testthat/test-02_included_data.R#L13 _These are removed!_ -* why are some expectations commented out? https://github.com/diazrenata/birdsize/blob/7469df457989a9016ecc3761b0dd125497a3be51/tests/testthat/test-07_simulate_community.R#L34 _These were vestigial, removed!_ +* I'd recommend running desc::desc_normalize(): it will order DESCRIPTION fields in a standard way and order dependencies alphabetically. + * *Response*: Done! +* You can remove the {remotes} installation instructions and keep the {devtools} ones only as {devtools} calls {remotes} (might call {pak} in the future?) and is the interface for users. + * *Response*: Done! +* It'd be nice to add grouping to the reference index https://pkgdown.r-lib.org/reference/build_reference.html#reference-index Given your package's naming scheme, you might want to use the starts_with() helper. + * *Response*: I haven't done this because there are so few functions, it seems to me like it verges on redundant. I'm happy to revisit this! +* In the test filenames, why use numbers? If the R files and test files have the same basename, it's easier to navigate between the two in RStudio IDE. https://r-pkgs.org/testing-basics.html#test-organisation + * *Response*: Ah, I learned to use the numbers so that tests fail informatively in order. The tests also don't - all - correspond directly to RScripts. Is this a major barrier? If so, I'm happy to revisit! +* In your test files and scripts I see a lot of vertical space, more than one empty lines between some elements. I'd recommend using one empty line only as it increases the amount of code one can see at a time on the screen. It's still nice to organize the code in paragraphs, I am not suggesting to remove all empty lines. 😁 + * *Response*: I think this has been largely addressed via the re-styling I've done, but please let me know if the issue persists! +* In the README instead of "Package summary" as a header you could use the same text as this issue "birdsize: Estimate avian body size distributions" which is more informative. + * *Response*: Done! +* Regarding data yes it might make sense to contact the author of the included dataset? + * *Response:* Done! I've reached out and Dr. Dunning agreed to be listed as a contributor. I've sent a more recent-follow up with the revisions and giving him the opportunity to confirm. I'm still waiting to hear back there, and will let you know. +* For comments such as https://github.com/diazrenata/birdsize/blob/7469df457989a9016ecc3761b0dd125497a3be51/R/simulate_community.R#L51 if you add 4 hyphens afterwards ----, the comment will appear in the script outline on the right in RStudio IDE (if you use that IDE), helping code navigation. https://blog.r-hub.io/2023/01/26/code-comments-self-explaining-code/#use-comments-for-the-scripts-outline + * *Response*: I've added these to the long community_generate script. +* why have this "trivial" test file? https://github.com/diazrenata/birdsize/blob/main/tests/testthat/test-01_trivial.R + * *Response*: I use this to test that CI is working, but it's not needed now the package is built. Deleted! +* in test code you don't need to write birdsize::: as testthat loads your package code. Example https://github.com/diazrenata/birdsize/blob/7469df457989a9016ecc3761b0dd125497a3be51/tests/testthat/test-02_included_data.R#L13 + * *Response*: These are removed! +* why are some expectations commented out? https://github.com/diazrenata/birdsize/blob/7469df457989a9016ecc3761b0dd125497a3be51/tests/testthat/test-07_simulate_community.R#L34 + * *Response*: These were vestigial, removed!