diff --git a/.gitignore b/.gitignore index b969a87..f6c20f7 100644 --- a/.gitignore +++ b/.gitignore @@ -14,5 +14,5 @@ doc Meta acadia_national_park/ shapefiles/ - +joss/ docs/ diff --git a/R/compare_periods.R b/R/compare_periods.R index 631bc30..14542ec 100644 --- a/R/compare_periods.R +++ b/R/compare_periods.R @@ -84,27 +84,29 @@ compare_periods <- function( # Combine into one data frame cdf <- rbind(df1, df2) + # Add periods in case that's helpful + if ( length(unique(reference_period)) == 1 ) { + ref_print = as.character(reference_period[1]) + } else { + ref_print = paste(reference_period[1], reference_period[2], sep = " - ") + } + if ( length(unique(target_period)) == 1 ) { + target_print = as.character(target_period[1]) + } else { + target_print = paste(target_period[1], target_period[2], sep = " - ") + } + cdf["reference_period"] <- ref_print + cdf["target_period"] <- target_print + # Rearrange columns - cdf <- cdf[c("model", "rcp", "variable", "units", "value_reference", "value_target", - "difference" )] + cdf <- cdf[c("model", "rcp", "parameter", "units", "reference_period", "target_period", + "reference_value", "target_value", "difference" )] return(cdf) } - -# convert file reference object temperature from kelvin to celsius if necessary -convert_temperature <- function(file_df, column) { - temperature_rows <- grep("air_temperature", file_df$parameter_long) - temperature_kelvin <- unlist(file_df[[column]][temperature_rows]) - temperature_c <- temperature_kelvin - 273.15 - file_df[[column]][temperature_rows] <- temperature_c - file_df$units[temperature_rows] <- "C" - return(file_df) -} - - -# Get the difference in values for one variable -df_difference <- function(df, variable, agg_fun, target_period, reference_period, month_map) { +# Get the difference in values for one parameter +df_difference <- function(df, parameter, agg_fun, target_period, reference_period, month_map) { # Match the aggregation function string to a function tryCatch({ @@ -116,35 +118,35 @@ df_difference <- function(df, variable, agg_fun, target_period, reference_period # Reference Period df_ref <- df %>% - dplyr::select(.data$rcp, .data$model, .data$year, .data$month, variable) %>% - dplyr::filter(.data$month %in% month_map[[variable]]) %>% + dplyr::select(.data$rcp, .data$model, .data$year, .data$month, parameter) %>% + dplyr::filter(.data$month %in% month_map[[parameter]]) %>% dplyr::filter(.data$year >= reference_period[1], .data$year <= reference_period[2]) %>% dplyr::group_by(.data$rcp, .data$model) %>% - dplyr::summarise("value" = fun(!!as.name(variable))) + dplyr::summarise("value" = fun(!!as.name(parameter))) # Target Period df_tar <- df %>% - dplyr::select(.data$rcp, .data$model, .data$year, .data$month, variable) %>% - dplyr::filter(.data$month %in% month_map[[variable]]) %>% + dplyr::select(.data$rcp, .data$model, .data$year, .data$month, parameter) %>% + dplyr::filter(.data$month %in% month_map[[parameter]]) %>% dplyr::filter(.data$year >= target_period[1], .data$year <= target_period[2]) %>% dplyr::group_by(.data$rcp, .data$model) %>% - dplyr::summarise("value" = fun(!!as.name(variable))) - + dplyr::summarise("value" = fun(!!as.name(parameter))) + # Join - df = dplyr::left_join(df_ref, df_tar, by = c("rcp", "model"), - suffix = c("_reference", "_target")) - + df <- dplyr::left_join(df_ref, df_tar, by = c("rcp", "model")) + names(df) <- c("rcp", "model", "reference_value", "target_value") + # Find the difference in values between target and reference periods - df = df %>% dplyr::mutate(difference = .data$value_target - .data$value_reference) + df = df %>% dplyr::mutate(difference = .data$target_value - .data$reference_value) # Add variable name - df$variable <- variable + df$parameter <- parameter # Add in units arg_ref <- Argument_Reference() - internal_var <- as.character(arg_ref$variables[ variable]) + internal_var <- as.character(arg_ref$variables[parameter]) units = as.character(arg_ref$units[internal_var]) df$units <- units diff --git a/R/cst_df.R b/R/cst_df.R index 7092e0b..c385a41 100644 --- a/R/cst_df.R +++ b/R/cst_df.R @@ -1,4 +1,6 @@ -#' Generate a climate data frame +#' Generate a climate data frame of spatial averages from the file +#' reference object generated by cstdata. Averages represent all +#' grid cells within or touching the area of interest provided to cstdata. #' #' @param file_reference A file reference data frame generated by `cstdata()` #' @param ncores Number of cores to use (int) @@ -19,15 +21,18 @@ #' #' @export cst_df <- function(file_reference, ncores = 1) { + + # Create long form data frame message("Computing spatial averages...") cl <- parallel::makeCluster(ncores) on.exit(parallel::stopCluster(cl)) - df <- pbapply::pbapply(file_reference, - MARGIN = 1, - FUN = r_to_df, + df <- pbapply::pbapply(file_reference, + MARGIN = 1, + FUN = r_to_df, cl = cl) %>% dplyr::bind_rows() - + + # Conver to wide form message("Generating climate data.frame...") wide_df <- split(df, df$date) %>% pbapply::pblapply(FUN = tidyr::pivot_wider, @@ -57,3 +62,4 @@ r_to_df <- function(df_row) { area_name = df_row["area_name"] ) } + diff --git a/man/cst_df.Rd b/man/cst_df.Rd index be0d416..32595ff 100644 --- a/man/cst_df.Rd +++ b/man/cst_df.Rd @@ -2,7 +2,9 @@ % Please edit documentation in R/cst_df.R \name{cst_df} \alias{cst_df} -\title{Generate a climate data frame} +\title{Generate a climate data frame of spatial averages from the file +reference object generated by cstdata. Averages represent all +grid cells within or touching the area of interest provided to cstdata.} \usage{ cst_df(file_reference, ncores = 1) } @@ -15,7 +17,9 @@ cst_df(file_reference, ncores = 1) A tibble of climate data averaged spatially over an area of interest. } \description{ -Generate a climate data frame +Generate a climate data frame of spatial averages from the file +reference object generated by cstdata. Averages represent all +grid cells within or touching the area of interest provided to cstdata. } \examples{ \dontrun{ diff --git a/tests/testthat/test-compare_periods.R b/tests/testthat/test-compare_periods.R index d33cd90..a1cf603 100644 --- a/tests/testthat/test-compare_periods.R +++ b/tests/testthat/test-compare_periods.R @@ -27,16 +27,6 @@ test_that("Months can be formatted from integers", { expect_identical(month_string, c("Jan", "Feb", "Mar")) }) -test_that("Temperature conversion succeeds", { - test_df <- data.frame(parameter_long = "air_temperature", - units = "K", - value = 273.15, - stringsAsFactors = FALSE) - out_df <- convert_temperature(test_df, "value") - expect_equal(out_df$value, 0) - expect_equal(out_df$units, "C") -}) - test_that("Invalid filter params raise errors", { expect_error(compare_periods(df, var1 = "foo", var2 = "bar"), regexp = "The requested variables are not present") @@ -104,4 +94,3 @@ test_that("Providing invalid scenario raises errors.", { scenarios = "rcp9000"), regexp = "The requested scenarios are not present") }) - diff --git a/vignettes/cst-intro.Rmd b/vignettes/cst-intro.Rmd index 5168356..b01673b 100644 --- a/vignettes/cst-intro.Rmd +++ b/vignettes/cst-intro.Rmd @@ -1,7 +1,7 @@ --- title: "Getting started with the Climate Scenarios Toolbox" author: "Travis Williams and Max Joseph" -date: "2020-03-25" +date: "2020-04-06" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{cst} @@ -47,17 +47,56 @@ scale estimates. The cst package uses one downscaled climate model called MACA (Multivariate Adaptive Climate Analog) Version 2 ([details here](http://www.climatologylab.org/maca.html)). + +## Installation + +There are a few steps required for the `cst` package to work: + +1) Install +[rgdal](https://cran.r-project.org/web/packages/rgdal/rgdal.pdf) and +[ncdf4](https://cran.r-project.org/web/packages/ncdf4/ncdf4.pdf), +which are common R packages used for dealing with gridded data sets. Both of +these packages depend on system level libraries who installations depend on +the user's operating system. + +2) Install `cst` using the package `remotes` as follows: + +```r +# install.packages("remotes") +remotes::install_github("earthlab/cst") +``` + +3) Install Python dependencies. To provide a consistent and relatively quick way +to retrieve these datasets from their source, a few Python dependencies are also +required. In `cst` these are accessed automatically through the `reticulate` R +package, and you will not need to know any Python, but you will need to have a +working installation. We recommend using Conda because it is the simplest way to +ensure a functioning installation and it manages dependencies well. First make +sure you have conda installed +([see instructions here](https://docs.conda.io/en/latest/miniconda.html)), then +you can install the python dependencies with: + + + +```r +cst::install_py_deps() +#> Using virtual environment 'cst' ... +#> +#> Installation complete. +reticulate::use_condaenv("cst") +``` + + ### Acquiring and subsetting data within National Park Service boundaries This package was originally written with the National Park Service in mind, so -it has the option to use the name of any park within the NPS. Use the -`cstdata()` function to specify a range of years, a set of models, a set of -parameters, and a set of representative concentration pathways to return. +it has the option to use the name of any park (or monument, preserve, etc.) within +the NPS. Use the `cstdata()` function to specify a range of years, a set of models, +a set of parameters, and a set of representative concentration pathways to return. Leaving these arguments empty will results in a download of all available data for that location. - ```r library(cst) library(tibble) @@ -69,23 +108,14 @@ library(dplyr) library(reticulate) ``` -If you haven't already installed the python dependencies for the package using -conda, first make sure you have conda installed -([see instructions here](https://docs.conda.io/en/latest/miniconda.html)), then -you can install the python dependencies with: - - -```r -cst::install_py_deps() -use_condaenv("cst") -``` - ```r -# choose a project directory to store data +# Choose a project directory to store data +# (Leaving this argument empty will download data to a temporary directory) + proj_dir <- "~" # download data @@ -95,9 +125,20 @@ file_refs <- cstdata(park = "Wind Cave National Park", parameters = c("tasmin", "tasmax", "uas", "vas"), ncores = parallel::detectCores()) #> [1] "Retrieving area of interest boundaries..." +#> NOTE: rgdal::checkCRSArgs: no proj_defs.dat in PROJ.4 shared files +#> NOTE: rgdal::checkCRSArgs: no proj_defs.dat in PROJ.4 shared files #> [1] "Building area of interest grid..." +#> NOTE: rgdal::checkCRSArgs: no proj_defs.dat in PROJ.4 shared files +#> NOTE: rgdal::checkCRSArgs: no proj_defs.dat in PROJ.4 shared files +#> NOTE: rgdal::checkCRSArgs: no proj_defs.dat in PROJ.4 shared files +#> NOTE: rgdal::checkCRSArgs: no proj_defs.dat in PROJ.4 shared files +#> NOTE: rgdal::checkCRSArgs: no proj_defs.dat in PROJ.4 shared files +#> NOTE: rgdal::checkCRSArgs: no proj_defs.dat in PROJ.4 shared files +#> NOTE: rgdal::checkCRSArgs: no proj_defs.dat in PROJ.4 shared files +#> NOTE: rgdal::checkCRSArgs: no proj_defs.dat in PROJ.4 shared files +#> NOTE: rgdal::checkCRSArgs: no proj_defs.dat in PROJ.4 shared files #> [1] "Retrieving climate data for wind_cave_national_park" -#> [1] "Saving local files to /home/mjoseph/wind_cave_national_park" +#> [1] "Saving local files to /home/travis/wind_cave_national_park" ``` The output of `cstdata` is a data.frame, where each row corresponds to one file: @@ -105,10 +146,10 @@ The output of `cstdata` is a data.frame, where each row corresponds to one file: ```r glimpse(file_refs) -#> Observations: 160 -#> Variables: 13 +#> Rows: 160 +#> Columns: 13 #> $ local_file "tasmin_wind_cave_national_park_bcc-csm1-1_r1i1p1_rc… -#> $ local_path "/home/mjoseph/wind_cave_national_park/tasmin_wind_c… +#> $ local_path "/home/travis/wind_cave_national_park/tasmin_wind_ca… #> $ model "bcc-csm1-1", "bcc-csm1-1", "bcc-csm1-1", "bcc-csm1-… #> $ parameter "tasmin", "tasmin", "tasmax", "tasmax", "uas", "uas"… #> $ rcp "rcp45", "rcp85", "rcp45", "rcp85", "rcp45", "rcp85"… @@ -134,13 +175,14 @@ df <- cst_df(file_refs, ncores = parallel::detectCores()) Now, we have a tibble where each row represents a day by model by scenario combination, where the climate parameters of interest are represented as -columns : +columns. Note that, in the output of `cst_df()`, average values represent all 2km by +2km grid cells that touch the Park boundary: ```r glimpse(df) -#> Observations: 891,240 -#> Variables: 9 +#> Rows: 891,240 +#> Columns: 9 #> $ rcp "rcp45", "rcp85", "rcp45", "rcp85", "rcp45", "rcp85", "rcp4… #> $ date 1980-01-01, 1980-01-01, 1980-01-01, 1980-01-01, 1980-01-01… #> $ model "bcc-csm1-1", "bcc-csm1-1", "bcc-csm1-1-m", "bcc-csm1-1-m",… @@ -273,8 +315,8 @@ growing_seasons %>% ## Comparing climate in two time periods -Use the file reference object that is returned from cst as an input to -`compare_periods` to compare climate between a reference and target period. You +Use the tibble object that is returned from `cst_df()` as an input to +`compare_periods()` to compare climate between a reference and target period. You may specify the function with which to aggregate your chosen variable as well as the yearly time period months of the year to include in this calculation. @@ -297,16 +339,18 @@ and reference period. ```r glimpse(comps) -#> Observations: 80 -#> Variables: 7 +#> Rows: 80 +#> Columns: 9 #> Groups: rcp [2] -#> $ model "bcc-csm1-1", "bcc-csm1-1-m", "BNU-ESM", "CanESM2", "… -#> $ rcp "rcp45", "rcp45", "rcp45", "rcp45", "rcp45", "rcp45",… -#> $ variable "tasmin", "tasmin", "tasmin", "tasmin", "tasmin", "ta… -#> $ units "K", "K", "K", "K", "K", "K", "K", "K", "K", "K", "K"… -#> $ value_reference 284.0221, 283.8716, 284.1512, 283.6515, 283.9051, 283… -#> $ value_target 284.9774, 285.0160, 285.6242, 286.2288, 285.4229, 285… -#> $ difference 0.9553126, 1.1443937, 1.4730053, 2.5772584, 1.5177512… +#> $ model "bcc-csm1-1", "bcc-csm1-1-m", "BNU-ESM", "CanESM2", … +#> $ rcp "rcp45", "rcp45", "rcp45", "rcp45", "rcp45", "rcp45"… +#> $ parameter "tasmin", "tasmin", "tasmin", "tasmin", "tasmin", "t… +#> $ units "K", "K", "K", "K", "K", "K", "K", "K", "K", "K", "K… +#> $ reference_period "1980 - 2000", "1980 - 2000", "1980 - 2000", "1980 -… +#> $ target_period "2020 - 2040", "2020 - 2040", "2020 - 2040", "2020 -… +#> $ reference_value 284.0221, 283.8716, 284.1512, 283.6515, 283.9051, 28… +#> $ target_value 284.9774, 285.0160, 285.6242, 286.2288, 285.4229, 28… +#> $ difference 0.9553126, 1.1443937, 1.4730053, 2.5772584, 1.517751… ``` One useful plot shows the difference in the two variables between reference and @@ -314,21 +358,24 @@ target periods: ```r +title <- paste(comps$reference_period, comps$target_period, sep= " vs " )[1] + comps %>% - dplyr::select(parameter, rcp, model, year1, year2, diff_summary) %>% - pivot_wider(names_from = parameter, values_from = diff_summary) %>% + dplyr::select(parameter, rcp, model, reference_period, target_period, difference) %>% + pivot_wider(names_from = parameter, values_from = difference) %>% ggplot(aes(tasmin, tasmax, color = rcp)) + + ggtitle(title) + geom_point() + geom_hline(yintercept = 0, alpha = .2) + geom_vline(xintercept = 0, alpha = .2) + geom_text_repel(aes(label = model), segment.size = .3, size = 3) + xlab("Difference in min. temperature (C)") + ylab("Difference in max. temperature (C)") + - scale_color_manual(values = c("dodgerblue", "red")) -#> Error: Can't subset columns that don't exist. -#> x The column `parameter` doesn't exist. + scale_color_manual(values = c("dodgerblue", "red")) ``` +plot of chunk plot-comps + So, nearly all model runs indicate warming, but the amount of warming varies by model and emissions scenario. @@ -369,7 +416,7 @@ references$scenarios #> [1] "rcp45" "rcp85" ``` -And here are the climate parameters +And here are the climate parameters: ```r @@ -378,6 +425,26 @@ references$parameters #> [9] "huss" "vpd" ``` +Labels for each acronym are also available, for example: + + +```r +references$labels["vpd"] +#> $vpd +#> [1] "Vapor Pressure Deficit" +references$labels["CCSM4"] +#> $CCSM4 +#> [1] "Community Climate System Model 4" + +# For hyphenated acronyms, use them either as objects or with backticks +model <- "IPSL-CM5A-MR" +references$labels[model] +#> $`IPSL-CM5A-MR` +#> [1] "Institut Pierre Simon Laplace (IPSL) - Climate Model 5A - Medium Resolution" +references$labels$`IPSL-CM5A-MR` +#> [1] "Institut Pierre Simon Laplace (IPSL) - Climate Model 5A - Medium Resolution" +``` + Not every model has the same set of parameters available, and the `get_args` method lists model-specific information. @@ -398,7 +465,7 @@ references$get_args("CCSM4") ## Why write the cst package? The amount of data generated by downscaled GCMs can be quite large -(e.g., daily data at a few km spatial resolution). +(e.g., daily data at a few km spatial resolution). The Climate Scenarios Toolbox was developed to help users access and use smaller subsets. diff --git a/vignettes/cst-intro.Rmd.orig b/vignettes/cst-intro.Rmd.orig index a78aeba..f4a1183 100644 --- a/vignettes/cst-intro.Rmd.orig +++ b/vignettes/cst-intro.Rmd.orig @@ -51,16 +51,51 @@ scale estimates. The cst package uses one downscaled climate model called MACA (Multivariate Adaptive Climate Analog) Version 2 ([details here](http://www.climatologylab.org/maca.html)). + +## Installation + +There are a few steps required for the `cst` package to work: + +1) Install +[rgdal](https://cran.r-project.org/web/packages/rgdal/rgdal.pdf) and +[ncdf4](https://cran.r-project.org/web/packages/ncdf4/ncdf4.pdf), +which are common R packages used for dealing with gridded data sets. Both of +these packages depend on system level libraries who installations depend on +the user's operating system. + +2) Install `cst` using the package `remotes` as follows: + +```r +# install.packages("remotes") +remotes::install_github("earthlab/cst") +``` + +3) Install Python dependencies. To provide a consistent and relatively quick way +to retrieve these datasets from their source, a few Python dependencies are also +required. In `cst` these are accessed automatically through the `reticulate` R +package, and you will not need to know any Python, but you will need to have a +working installation. We recommend using Conda because it is the simplest way to +ensure a functioning installation and it manages dependencies well. First make +sure you have conda installed +([see instructions here](https://docs.conda.io/en/latest/miniconda.html)), then +you can install the python dependencies with: + + +```{r} +cst::install_py_deps() +reticulate::use_condaenv("cst") +``` + + ### Acquiring and subsetting data within National Park Service boundaries This package was originally written with the National Park Service in mind, so -it has the option to use the name of any park within the NPS. Use the -`cstdata()` function to specify a range of years, a set of models, a set of -parameters, and a set of representative concentration pathways to return. +it has the option to use the name of any park (or monument, preserve, etc.) within +the NPS. Use the `cstdata()` function to specify a range of years, a set of models, +a set of parameters, and a set of representative concentration pathways to return. Leaving these arguments empty will results in a download of all available data for that location. - ```{r load-deps, message=FALSE, comment = NA} library(cst) library(tibble) @@ -72,23 +107,15 @@ library(dplyr) library(reticulate) ``` -If you haven't already installed the python dependencies for the package using -conda, first make sure you have conda installed -([see instructions here](https://docs.conda.io/en/latest/miniconda.html)), then -you can install the python dependencies with: - -```{r install-py-deps, eval = FALSE} -cst::install_py_deps() -use_condaenv("cst") -``` - ```{r set-seed, echo = FALSE} set.seed(1234) # for ggrepel, which randomly jitters labels ``` ```{r download_1} -# choose a project directory to store data +# Choose a project directory to store data +# (Leaving this argument empty will download data to a temporary directory) + proj_dir <- "~" # download data @@ -114,7 +141,8 @@ df <- cst_df(file_refs, ncores = parallel::detectCores()) Now, we have a tibble where each row represents a day by model by scenario combination, where the climate parameters of interest are represented as -columns : +columns. Note that, in the output of `cst_df()`, average values represent all 2km by +2km grid cells that touch the Park boundary: ```{r print-df} glimpse(df) @@ -216,8 +244,8 @@ growing_seasons %>% ## Comparing climate in two time periods -Use the file reference object that is returned from cst as an input to -`compare_periods` to compare climate between a reference and target period. You +Use the tibble object that is returned from `cst_df()` as an input to +`compare_periods()` to compare climate between a reference and target period. You may specify the function with which to aggregate your chosen variable as well as the yearly time period months of the year to include in this calculation. @@ -243,18 +271,21 @@ glimpse(comps) One useful plot shows the difference in the two variables between reference and target periods: -```{r plot-comps, fig.height = 5, fig.width = 9} +```{r plot-comps, fig.height = 6, fig.width = 9} +title <- paste(comps$reference_period, comps$target_period, sep= " vs " )[1] + comps %>% - dplyr::select(parameter, rcp, model, year1, year2, diff_summary) %>% - pivot_wider(names_from = parameter, values_from = diff_summary) %>% + dplyr::select(parameter, rcp, model, reference_period, target_period, difference) %>% + pivot_wider(names_from = parameter, values_from = difference) %>% ggplot(aes(tasmin, tasmax, color = rcp)) + + ggtitle(title) + geom_point() + geom_hline(yintercept = 0, alpha = .2) + geom_vline(xintercept = 0, alpha = .2) + geom_text_repel(aes(label = model), segment.size = .3, size = 3) + xlab("Difference in min. temperature (C)") + ylab("Difference in max. temperature (C)") + - scale_color_manual(values = c("dodgerblue", "red")) + scale_color_manual(values = c("dodgerblue", "red")) ``` So, nearly all model runs indicate warming, but the amount of warming varies by @@ -288,12 +319,24 @@ Here are the emissions scenarios: references$scenarios ``` -And here are the climate parameters +And here are the climate parameters: ```{r printparams} references$parameters ``` +Labels for each acronym are also available, for example: + +```{r labels} +references$labels["vpd"] +references$labels["CCSM4"] + +# For hyphenated acronyms, use them either as objects or with backticks +model <- "IPSL-CM5A-MR" +references$labels[model] +references$labels$`IPSL-CM5A-MR` +``` + Not every model has the same set of parameters available, and the `get_args` method lists model-specific information. @@ -305,7 +348,7 @@ references$get_args("CCSM4") ## Why write the cst package? The amount of data generated by downscaled GCMs can be quite large -(e.g., daily data at a few km spatial resolution). +(e.g., daily data at a few km spatial resolution). The Climate Scenarios Toolbox was developed to help users access and use smaller subsets. diff --git a/vignettes/plot-comps-1.png b/vignettes/plot-comps-1.png index eeb5e1d..ba121e6 100644 Binary files a/vignettes/plot-comps-1.png and b/vignettes/plot-comps-1.png differ diff --git a/vignettes/plot-grow-season-1.png b/vignettes/plot-grow-season-1.png index 33db5d4..faac3a0 100644 Binary files a/vignettes/plot-grow-season-1.png and b/vignettes/plot-grow-season-1.png differ diff --git a/vignettes/vpd-timeseries-1.png b/vignettes/vpd-timeseries-1.png index bedfa01..84b6e97 100644 Binary files a/vignettes/vpd-timeseries-1.png and b/vignettes/vpd-timeseries-1.png differ