diff --git a/.buildlibrary b/.buildlibrary index 7164f64..05f5701 100644 --- a/.buildlibrary +++ b/.buildlibrary @@ -1,4 +1,4 @@ -ValidationKey: '3512075' +ValidationKey: '3540240' AutocreateReadme: yes AcceptedWarnings: - 'Warning: package ''.*'' was built under R version' @@ -8,3 +8,4 @@ AddInReadme: inst/README.md AddLogoReadme: inst/img/logo.png enforceVersionUpdate: no skipCoverage: no +AutocreateCITATION: yes diff --git a/.github/workflows/check.yaml b/.github/workflows/check.yaml index d85a316..54aa78b 100644 --- a/.github/workflows/check.yaml +++ b/.github/workflows/check.yaml @@ -44,6 +44,13 @@ jobs: [ -f requirements.txt ] && python -m pip install --upgrade pip wheel || true [ -f requirements.txt ] && pip install -r requirements.txt || true + - name: Run pre-commit checks + shell: bash + run: | + python -m pip install pre-commit + python -m pip freeze --local + pre-commit run --show-diff-on-failure --color=always --all-files + - name: Verify validation key shell: Rscript {0} run: lucode2:::validkey(stopIfInvalid = TRUE) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 7dcd45b..3edf90a 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -25,4 +25,4 @@ repos: - id: readme-rmd-rendered - id: use-tidy-description ci: - autoupdate_schedule: quarterly + autoupdate_schedule: weekly diff --git a/CITATION.cff b/CITATION.cff index 7893b5a..2480286 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -2,8 +2,8 @@ cff-version: 1.2.0 message: If you use this software, please cite it using the metadata from this file. type: software title: 'lpjmlkit: Toolkit for Basic LPJmL Handling' -version: 1.7.5 -date-released: '2024-12-12' +version: 1.7.6 +date-released: '2025-01-27' abstract: A collection of basic functions to facilitate the work with the Dynamic Global Vegetation Model (DGVM) Lund-Potsdam-Jena managed Land (LPJmL) hosted at the Potsdam Institute for Climate Impact Research (PIK). It provides functions for diff --git a/DESCRIPTION b/DESCRIPTION index 750fec6..ad88d5a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: lpjmlkit Type: Package Title: Toolkit for Basic LPJmL Handling -Version: 1.7.5 +Version: 1.7.6 Authors@R: c( person("Jannes", "Breier", , "jannesbr@pik-potsdam.de", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-9055-6904")), person("Sebastian","Ostberg", , "ostberg@pik-potsdam.de", role = "aut", comment = c(ORCID = "0000-0002-2368-7015")), @@ -55,4 +55,4 @@ Suggests: sf Config/testthat/edition: 3 VignetteBuilder: knitr -Date: 2024-12-12 +Date: 2025-01-27 diff --git a/R/LPJmLData_transform.R b/R/LPJmLData_transform.R index ba88d9c..e22fe4d 100644 --- a/R/LPJmLData_transform.R +++ b/R/LPJmLData_transform.R @@ -16,8 +16,10 @@ #' @examples #' \dontrun{ #' -#' runoff <- read_io(filename = "runoff.bin.json", -#' subset = list(year = as.character(1991:2000))) +#' runoff <- read_io( +#' filename = "runoff.bin.json", +#' subset = list(year = as.character(1991:2000)) +#' ) #' #' # Transform into space format "lon_lat". This assumes a "grid.bin.json" file #' # is present in the same directory as "runoff.bin.json". @@ -60,40 +62,33 @@ LPJmLData$set( "private", ".transform", function(to) { - # Transform time format if (any(to %in% private$.meta$._dimension_map_$time)) { private$.transform_time(to = "time") - to <- to[!to %in% private$.meta$._dimension_map_$time] + to <- setdiff(to, private$.meta$._dimension_map_$time) } else if (any(to %in% private$.meta$._dimension_map_$year_month_day)) { private$.transform_time(to = "year_month_day") - to <- to[!to %in% private$.meta$._dimension_map_$year_month_day] + to <- setdiff(to, private$.meta$._dimension_map_$year_month_day) } # Transform space format if (any(to %in% private$.meta$._dimension_map_$cell)) { private$.transform_space(to = "cell") - to <- to[!to %in% private$.meta$._dimension_map_$cell] + to <- setdiff(to, private$.meta$._dimension_map_$cell) } else if (any(to %in% private$.meta$._dimension_map_$lon_lat)) { private$.transform_space(to = "lon_lat") - to <- to[!to %in% private$.meta$._dimension_map_$lon_lat] + to <- setdiff(to, private$.meta$._dimension_map_$lon_lat) } if (length(to) > 0) { stop( ifelse(length(to) > 1, "Formats ", "Format "), - paste0(col_var(to), collapse = ", "), + toString(col_var(to)), ifelse(length(to) > 1, " are ", " is "), "not valid. Please choose from available space formats ", - paste0( - col_var(private$.meta$._dimension_map_$space_format), - collapse = ", " - ), + toString(col_var(private$.meta$._dimension_map_$space_format)), " and available time formats ", - paste0( - col_var(private$.meta$._dimension_map_$time_format), - collapse = ", " - ), + toString(col_var(private$.meta$._dimension_map_$time_format)), call. = FALSE ) } @@ -109,7 +104,6 @@ LPJmLData$set( "private", ".transform_space", function(to) { - # If to equals current format return directly if (private$.meta$._space_format_ == to) { return(invisible(self)) @@ -122,13 +116,11 @@ LPJmLData$set( # Extract dimensions other than space dimension(s) from self other_dimnames <- dimnames(self$data) %>% `[<-`(unlist(strsplit(private$.meta$._space_format_, "_")), NULL) - other_dims <- dim(self$data) %>% - `[`(names(other_dimnames)) + other_dims <- dim(self$data)[names(other_dimnames)] # Case 1: Transformation from cell dimension to lon, lat dimensions if (private$.meta$._space_format_ == "cell" && to == "lon_lat") { - private$.grid$transform(to = to) # Matrix with ilon and ilat indices of cells in new array @@ -136,8 +128,10 @@ LPJmLData$set( match(as.integer(dimnames(self)[["cell"]]), private$.grid$data), dim(private$.grid) ) - dimnames(ilonilat) <- list(cell = dimnames(self)[["cell"]], - band = c("lon", "lat")) + dimnames(ilonilat) <- list( + cell = dimnames(self)[["cell"]], + band = c("lon", "lat") + ) # Index matrix to access elements from source data index_source <- expand.grid(sapply(dim(self), seq_len)) # nolint:undesirable_function_linter. @@ -162,12 +156,16 @@ LPJmLData$set( gc(full = TRUE) # Append new space dimension where they have been before - new_dimnames <- append(other_dimnames, - values = dimnames(private$.grid$data)[c("lat", "lon")], # nolint - after = get_predim(self$data, "cell")) - new_dims <- append(other_dims, - values = dim(private$.grid$data)[c("lat", "lon")], - after = get_predim(self$data, "cell")) + new_dimnames <- append( + other_dimnames, + values = dimnames(private$.grid$data)[c("lat", "lon")], # nolint + after = get_predim(self$data, "cell") + ) + new_dims <- append( + other_dims, + values = dim(private$.grid$data)[c("lat", "lon")], + after = get_predim(self$data, "cell") + ) # Replace source space dimension with target space dimensions in dim and # dimnames attribute @@ -190,14 +188,18 @@ LPJmLData$set( # Case 2: Transformation between lon, lat dimensions and cell dimension } else if (private$.meta$._space_format_ == "lon_lat" && to == "cell") { - # Matrix with ilon and ilat indices of cells in new array - ilonilat <- arrayInd(match(sort(private$.grid$data), private$.grid$data), - dim(private$.grid)) + ilonilat <- arrayInd( + match(sort(private$.grid$data), private$.grid$data), + dim(private$.grid) + ) - dimnames(ilonilat) <- list(cell = format(sort(private$.grid$data), - trim = TRUE, scientific = FALSE), - band = c("lon", "lat")) + dimnames(ilonilat) <- list( + cell = format(sort(private$.grid$data), + trim = TRUE, scientific = FALSE + ), + band = c("lon", "lat") + ) # Transform grid to target space format private$.grid$transform(to = to) @@ -236,9 +238,10 @@ LPJmLData$set( rm(index_source, ilonilat) gc(full = TRUE) - target_array <- array(self$data[index_target], dim = new_dims, - dimnames = new_dimnames) - + target_array <- array(self$data[index_target], + dim = new_dims, + dimnames = new_dimnames + ) } else { return(invisible(self)) } @@ -260,8 +263,6 @@ LPJmLData$set( "private", ".transform_time", function(to) { - - # If to equals current format return directly if (private$.meta$._time_format_ == to) { return(invisible(self)) @@ -270,22 +271,56 @@ LPJmLData$set( # Case 1: Transformation from "time" dimension to "year", "month", "day" # dimensions (if available) if (private$.meta$._time_format_ == "time" && to == "year_month_day") { - - # Possible ndays of months - ndays_in_month <- c(31, 30, 28) - # Split time string "year-month-day" into year, month, day integer vector, # reverse it to get it into the right order for array conversion time_dimnames <- split_time_names(self$dimnames()[["time"]]) %>% rev() - # Remove day dimension if all entries fall on last day of month - if (all(time_dimnames[["day"]] %in% ndays_in_month)) { - time_dimnames[["day"]] <- NULL + # Check that time axis is in sequence. Re-sequence if necessary. + valid_timestamps <- create_time_names( + nstep = private$.meta$nstep, + years <- as.integer(time_dimnames[["year"]]), + months = as.integer(time_dimnames[["month"]]), + days = as.integer(time_dimnames[["day"]]) + ) + invalid_timestamps <- setdiff(self$dimnames()[["time"]], valid_timestamps) + if (length(invalid_timestamps) > 0) { + # This may need revising for NetCDF files if monthly or annual values + # are set to a different date. + stop( + length(invalid_timestamps), + " unexpected time stamp(s) in time axis: ", + ifelse( + length(invalid_timestamps) > 5, + paste(toString(invalid_timestamps[seq_len(5)]), "[...]"), + toString(invalid_timestamps) + ) + ) + } + if (!identical(valid_timestamps, self$dimnames()[["time"]])) { + if (!all(valid_timestamps %in% self$dimnames()[["time"]])) { + warning( + "Prior subsetting induced gaps in time dimension that will be", + " filled with NAs during transformation.", + call. = FALSE + ) + } else { + warning( + "Time dimension is out of sequence. Re-sequencing before", + " transformation.", + call. = FALSE + ) + } + # Re-sequence from start to end + private$.subset( + time = intersect(valid_timestamps, self$dimnames()[["time"]]) + ) } - # Remove month dimension if there is only one month in data -> annual - if (length(time_dimnames$month) == 1 && is.null(time_dimnames[["day"]])) { - time_dimnames[["month"]] <- NULL + # Remove sub-dimensions not necessary for monthly or annual data + if (private$.meta$nstep == 1) { + time_dimnames[["day"]] <- time_dimnames[["month"]] <- NULL + } else if (private$.meta$nstep == 12) { + time_dimnames[["day"]] <- NULL } new_time_dim <- "year_month_day" @@ -294,16 +329,46 @@ LPJmLData$set( # (if available) to "time" dimension } else if (private$.meta$._time_format_ == "year_month_day" && to == "time") { # nolint:line_length_linter + # Check if time axis is in sequence and re-sequence if necessary + if ("day" %in% names(self$dimnames()) && + any(diff(as.integer(self$dimnames()$day)) < 1)) { + warning( + "Prior subsetting created day axis out of sequence. Days will be", + " re-sequenced during transformation.", + call. = FALSE + ) + private$.subset( + day = as.character(sort(as.integer(self$dimnames()$day))) + ) + } + if ("month" %in% names(self$dimnames()) && + any(diff(as.integer(self$dimnames()$month)) < 1)) { + warning( + "Prior subsetting created month axis out of sequence. Months will be", + " re-sequenced during transformation.", + call. = FALSE + ) + private$.subset( + month = as.character(sort(as.integer(self$dimnames()$month))) + ) + } + # Convert time dimnames back to time - pre_dimnames <- self$dimnames() %>% + old_dimnames <- self$dimnames() %>% lapply(as.integer) %>% suppressWarnings() time_dimnames <- list( - time = create_time_names(nstep = private$.meta$nstep, - years = pre_dimnames$year, - months = pre_dimnames$month, - days = pre_dimnames$day) + time = create_time_names( + nstep = private$.meta$nstep, + years = old_dimnames$year, + months = { + if (is.null(old_dimnames$month)) NULL else sort(old_dimnames$month) + }, + days = { + if (is.null(old_dimnames$day)) NULL else sort(old_dimnames$day) + } + ) ) new_time_dim <- "time" @@ -316,8 +381,7 @@ LPJmLData$set( # Extract dimensions other than space dimension(s) from self other_dimnames <- dimnames(self$data) %>% `[<-`(unlist(strsplit(private$.meta$._time_format_, "_")), NULL) - other_dims <- dim(self$data) %>% - `[`(names(other_dimnames)) + other_dims <- dim(self$data)[names(other_dimnames)] time_dims <- lapply(time_dimnames, length) @@ -341,7 +405,12 @@ LPJmLData$set( # Create new data array based on disaggregated time dimension self$.__set_data__( - array(self$data, dim = new_dims, dimnames = new_dimnames) + assign_data_dimensions( + x = self, + new_dims = new_dims, + new_dimnames = new_dimnames, + to_format = to + ) ) private$.meta$.__transform_time_format__(new_time_dim) @@ -354,3 +423,160 @@ LPJmLData$set( get_predim <- function(x, dims) { which(names(dim(x)) %in% dims)[1] - 1 } +# Function to get position of first dimension after last space dimension in +# array +get_postdim <- function(x, dims) { + max(which(names(dim(x)) %in% dims)) + 1 +} + +# Function to handle correct data dimensions, especially for daily data +# that comes with months of different length, so NAs are assigned in day +# dimension for day 28/29/30/31 where necessary. If transformed from +# "year_month_day" format, invalid dates are removed again. Also correct +# transformation in case of incomplete time series. +assign_data_dimensions <- function(x, new_dims, new_dimnames, to_format = "time") { # nolint:line_length_linter + old_dimnames <- dimnames(x) + if (x$meta$nstep == 365 || + (to_format == "year_month_day" && + dim(x)["time"] != x$meta$nstep * x$meta$nyear) || + (to_format == "time" && + any( + dim(x)[unlist(strsplit(x$meta$._time_format_, "_"))] != + c(x$meta$nyear, 12, 31), + na.rm = TRUE + ) + ) + ) { + # Only use complex transformation for daily data or incomplete time series. + if (to_format == "year_month_day") { + use_dimnames <- new_dimnames + } else { + use_dimnames <- old_dimnames + } + + # Compute full timestamps including invalid dates for array handling + full_timestamps <- create_time_names( + nstep = x$meta$nstep, + years = as.integer(use_dimnames$year), + months = { + if (is.null(use_dimnames$month)) + NULL + else + sort(as.integer(use_dimnames$month)) + }, + days = { + if (is.null(use_dimnames$day)) + NULL + else + sort(as.integer(use_dimnames$day)) + }, + only_valid = x$meta$nstep != 365 + ) + # Compute valid timestamps to match against full timestamps to filter + # invalids. + valid_timestamps <- create_time_names( + nstep = x$meta$nstep, + years = as.integer(use_dimnames$year), + months = { + if (is.null(use_dimnames$month)) + NULL + else + sort(as.integer(use_dimnames$month)) + }, + days = { + if (is.null(use_dimnames$day)) + NULL + else + sort(as.integer(use_dimnames$day)) + }, + only_valid = TRUE + ) + + if (to_format == "time") { + # Assign NAs to invalid timestamps + full_timestamps[!full_timestamps %in% valid_timestamps] <- NA + + # Expand time axis to cover full array grid + pre_dim <- seq_len( + get_predim(x, unlist(strsplit(x$meta$._time_format_, "_"))) + ) + if (length(pre_dim) > 0 && any(dim(x)[pre_dim] > 1)) { + expanded_time <- rep( + full_timestamps, + each = prod(dim(x)[pre_dim]) + ) + } else { + expanded_time <- full_timestamps + } + if (get_postdim(x, unlist(strsplit(x$meta$._time_format_, "_"))) <= + length(dim(x)) + ) { + post_dim <- seq( + get_postdim(x, unlist(strsplit(x$meta$._time_format_, "_"))), + length(dim(x)) + ) + if (any(dim(x)[post_dim] > 1)) { + expanded_time <- rep( + expanded_time, + times = prod(dim(x)[post_dim]) + ) + } + } + # Initialize the new_data array without NAs (filter invalid timestamps) + new_data <- array( + x$data[which(!is.na(expanded_time))], + dim = new_dims, + dimnames = new_dimnames + ) + } else if (to_format == "year_month_day") { + + # Assign NAs to invalid timestamps + index <- which( + !full_timestamps %in% valid_timestamps | + !full_timestamps %in% old_dimnames$time + ) + full_timestamps[index] <- NA + + # Expand time axis to cover full array grid + pre_dim <- seq_len( + get_predim(x, unlist(strsplit(x$meta$._time_format_, "_"))) + ) + expanded_time <- match(full_timestamps, old_dimnames$time) + if (length(pre_dim) > 0 && any(dim(x)[pre_dim] > 1)) { + expanded_time <- rep( + expanded_time, + each = prod(dim(x)[pre_dim]) + ) + } + if (get_postdim(x, unlist(strsplit(x$meta$._time_format_, "_"))) <= + length(dim(x)) + ) { + post_dim <- seq( + get_postdim(x, unlist(strsplit(x$meta$._time_format_, "_"))), + length(dim(x)) + ) + if (any(dim(x)[post_dim] > 1)) { + expanded_time <- rep( + expanded_time, + times = prod(dim(x)[post_dim]) + ) + } + } + valid <- which(!is.na(expanded_time)) + expanded_time[valid] <- seq_len(length(valid)) + rm(valid) + + # Assign data only to valid timestamps of new data array + new_data <- array( + x$data[expanded_time], + dim = new_dims, + dimnames = new_dimnames + ) + } + } else { + # Default case + new_data <- array(x$data, dim = new_dims, dimnames = new_dimnames) + } + + return(new_data) +} diff --git a/R/read_io.R b/R/read_io.R index 26c7529..bf1a06b 100644 --- a/R/read_io.R +++ b/R/read_io.R @@ -677,8 +677,51 @@ read_io_data <- function( # Dimension order during reading. Note: Must be 3 dimensions in total, with # "time" being last dimension for code below to work. read_band_order <- c("cell", "band", "time") - # Loop over subset years result_is_init <- FALSE + + # Prepare dimensions and dimension names for array containing one year of data + read_dim <- switch( + default(meta_data$order, "cellyear"), + cellyear = c( + band = unname(default(meta_data$nbands, 1)), + time = unname(default(meta_data$nstep, 1)), + cell = unname(meta_data$ncell) + ), + yearcell = stop("Order yearcell not supported"), + cellindex = stop("Order cellindex not supported"), + cellseq = c( + cell = unname(meta_data$ncell), + band = unname(default(meta_data$nbands, 1)), + time = unname(default(meta_data$nstep, 1)) + ) + ) + + band_names <- default( + meta_data$band_names, seq_len(default(meta_data$nbands, 1)) + ) + + cell_dimnames <- seq( + default(meta_data$firstcell, 0), + length.out = meta_data$ncell + ) + read_dimnames <- switch( + default(meta_data$order, "cellyear"), + cellyear = list( # order 1 + band = band_names, + time = NULL, # Assign dates later + cell = cell_dimnames + ), + yearcell = stop("Order yearcell not supported"), # order 2 + cellindex = stop("Order cellindex not supported"), # order 3 + cellseq = list( # order 4 + cell = cell_dimnames, + band = band_names, + time = NULL # Assign dates later + ) + ) + need_perm <- !identical(names(read_dim), read_band_order) + + # Loop over subset years for (yy in years) { # Compute offset data_offset <- (yy - default(meta_data$firstyear, 1901)) / @@ -703,50 +746,13 @@ read_io_data <- function( # Convert to array. # Note: order of nbands and nstep for "cellyear" (order = 1) is currently # not defined in LPJmL. - dim(year_data) <- switch( - default(meta_data$order, "cellyear"), - cellyear = c( - band = unname(default(meta_data$nbands, 1)), - time = unname(default(meta_data$nstep, 1)), - cell = unname(meta_data$ncell) - ), - yearcell = stop("Order yearcell not supported"), - cellindex = stop("Order cellindex not supported"), - cellseq = c( - cell = unname(meta_data$ncell), - band = unname(default(meta_data$nbands, 1)), - time = unname(default(meta_data$nstep, 1)) - ) - ) - - # Assign dimension names to array. - band_names <- default( - meta_data$band_names, seq_len(default(meta_data$nbands, 1)) - ) + dim(year_data) <- read_dim - cell_dimnames <- seq( - default(meta_data$firstcell, 0), - length.out = meta_data$ncell - ) - - dimnames(year_data) <- switch( - default(meta_data$order, "cellyear"), - cellyear = list( # order 1 - band = band_names, - time = NULL, # Assign dates later - cell = cell_dimnames - ), - yearcell = stop("Order yearcell not supported"), # order 2 - cellindex = stop("Order cellindex not supported"), # order 3 - cellseq = list( # order 4 - cell = cell_dimnames, - band = band_names, - time = NULL # Assign dates later - ) - ) + dimnames(year_data) <- read_dimnames # Convert to read_band_order and apply subsetting along bands or cells - year_data <- aperm(year_data, perm = read_band_order) + if (need_perm) + year_data <- aperm(year_data, perm = read_band_order) # Apply any subsetting along bands or cells index <- which(!names(subset) %in% c("day", "month", "year", "time")) diff --git a/R/time_names.R b/R/time_names.R index 6d617cb..0cbca15 100644 --- a/R/time_names.R +++ b/R/time_names.R @@ -7,15 +7,21 @@ create_time_names <- function( nstep = 365, years = 2000, months = NULL, - days = NULL + days = NULL, + only_valid = TRUE ) { # Number of days per month. - ndays_in_month <- c(31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31) %>% - # Subset months if defined - { # nolint - if (is.null(months)) . else .[months] - } + if (only_valid) { + ndays_in_month <- c(31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31) + } else { + ndays_in_month <- rep(31, 12) + } + + # Subset months if defined + if (!is.null(months)) { + ndays_in_month <- ndays_in_month[months] + } # Days and months in two-digits format (e.g. "01"). dd <- unlist(lapply(ndays_in_month, FUN = seq_len)) %>% @@ -28,6 +34,14 @@ create_time_names <- function( mm <- {if (is.null(months)) seq_len(12) else months} %>% #nolint sprintf("%02d", .) + if (!is.null(days)) { + monthly_day_length <- sapply( # nolint:undesirable_function_linter. + ndays_in_month, + function(m, days) length(which(days %in% seq_len(m))), + days = days + ) + } + if (nstep == 365) { # Daily data: YYYY-MM-DD d_mmdd <- paste( @@ -35,10 +49,8 @@ create_time_names <- function( # Cases of months or days being subsetted or not. if (is.null(days)) { #nolint ndays_in_month - } else if (!is.null(days) && is.null(months)) { - rep(length(days), 12) } else { - rep(length(days), length(months)) + monthly_day_length } } ), dd, sep = "-" @@ -100,15 +112,14 @@ split_time_names <- function(time_names) { time_split <- regmatches( time_names, regexec("([-]?[[:digit:]]+)-([[:digit:]]+)-([[:digit:]]+)", time_names) - ) %>% lapply(function(x) as.character(as.integer(x[-1]))) + ) %>% lapply(function(x) as.integer(x[-1])) # Create corresponding dimnames for disaggregated array by unique entry matrix(unlist(time_split), nrow = length(time_split), byrow = TRUE, - dimnames = list(seq_along(time_split), - c("year", "month", "day"))) %>% - apply(2, unique) %>% + dimnames = list(NULL, c("year", "month", "day"))) %>% + apply(2, function(x) as.character(sort(unique(x)))) %>% as.list() %>% return() } diff --git a/README.md b/README.md index 0735e41..3e32640 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # logo Toolkit for Basic LPJmL Handling -R package **lpjmlkit**, version **1.7.5** +R package **lpjmlkit**, version **1.7.6** [![CRAN status](https://www.r-pkg.org/badges/version/lpjmlkit)](https://cran.r-project.org/package=lpjmlkit) [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.7773134.svg)](https://doi.org/10.5281/zenodo.7773134) [![R build status](https://github.com/PIK-LPJmL/lpjmlkit/workflows/check/badge.svg)](https://github.com/PIK-LPJmL/lpjmlkit/actions) [![codecov](https://codecov.io/gh/PIK-LPJmL/lpjmlkit/branch/master/graph/badge.svg)](https://app.codecov.io/gh/PIK-LPJmL/lpjmlkit) [![r-universe](https://pik-piam.r-universe.dev/badges/lpjmlkit)](https://pik-piam.r-universe.dev/builds) @@ -76,17 +76,18 @@ In case of questions / problems please contact Jannes Breier , R package version 1.7.5, . +Breier J, Ostberg S, Wirth S, Minoli S, Stenzel F, Hötten D, Müller C (2025). "lpjmlkit: Toolkit for Basic LPJmL Handling." doi:10.5281/zenodo.7773134 , Version: 1.7.6, . A BibTeX entry for LaTeX users is ```latex -@Manual{, +@Misc{, title = {lpjmlkit: Toolkit for Basic LPJmL Handling}, author = {Jannes Breier and Sebastian Ostberg and Stephen Björn Wirth and Sara Minoli and Fabian Stenzel and David Hötten and Christoph Müller}, - year = {2024}, - note = {R package version 1.7.5}, - url = {https://github.com/PIK-LPJmL/lpjmlkit}, doi = {10.5281/zenodo.7773134}, + date = {2025-01-27}, + year = {2025}, + url = {https://github.com/PIK-LPJmL/lpjmlkit}, + note = {Version: 1.7.6}, } ``` diff --git a/man/transform.Rd b/man/transform.Rd index 97baf22..2576296 100644 --- a/man/transform.Rd +++ b/man/transform.Rd @@ -25,8 +25,10 @@ possible. \examples{ \dontrun{ -runoff <- read_io(filename = "runoff.bin.json", - subset = list(year = as.character(1991:2000))) +runoff <- read_io( + filename = "runoff.bin.json", + subset = list(year = as.character(1991:2000)) +) # Transform into space format "lon_lat". This assumes a "grid.bin.json" file # is present in the same directory as "runoff.bin.json". diff --git a/tests/testdata/create_test_arrays.R b/tests/testdata/create_test_arrays.R index eda77dc..2779ea7 100644 --- a/tests/testdata/create_test_arrays.R +++ b/tests/testdata/create_test_arrays.R @@ -8,3 +8,42 @@ saveRDS(output$as_array(), "../testdata/test_array.rds") output <- read_io(filename = "../testdata/output/pft_npp.bin.json") output$transform(to = "lon_lat") saveRDS(output$data, "../testdata/test_array_lonlat.rds") + +# Create temperature data subset +tas_data <- read_io( + file.path( + "/p", "projects", "lpjml", "input", "historical", "ISIMIP3av2", "obsclim", + "GSWP3-W5E5", + "tas_gswp3-w5e5_obsclim_1901-2019.clm.json" + ), + subset = list( + year = as.character(2015:2019), + cell = as.character(10000:10002) + ) +) +tas_header <- tas_data$meta$as_header() +tas_header <- set_header_item(tas_header, version = 4, name = "LPJCLIM") +tas_write_data <- tas_data$data %>% `dim<-`(c(3, 365, 5)) %>% aperm(c(2, 1, 3)) +write_header( + filename = "../testdata/input/tas_gswp3-w5e5_obsclim_2015-2019.clm", + header = tas_header +) +fp <- file("../testdata/input/tas_gswp3-w5e5_obsclim_2015-2019.clm", "ab") +if (typeof(get_datatype(tas_header)$type) == "integer") { + writeBin( + as.integer(round(tas_write_data / get_header_item(tas_header, "scalar"))), + con = fp, + size = get_datatype(tas_header)$size, + endian = get_header_item(tas_header, "endian") + ) +} else if (typeof(get_datatype(tas_header)$type) == "double") { + writeBin( + as.double(tas_write_data / get_header_item(tas_header, "scalar")), + con = fp, + size = get_datatype(tas_header)$size, + endian = get_header_item(tas_header, "endian") + ) +} else { + stop("Unsupported datatype ", get_header_item(tas_header, "datatype")) +} +close(fp) diff --git a/tests/testdata/input/tas_gswp3-w5e5_obsclim_2015-2019.clm b/tests/testdata/input/tas_gswp3-w5e5_obsclim_2015-2019.clm new file mode 100644 index 0000000..a4f0ec6 Binary files /dev/null and b/tests/testdata/input/tas_gswp3-w5e5_obsclim_2015-2019.clm differ diff --git a/tests/testthat/test-LPJmLData_subset_transform.R b/tests/testthat/test-LPJmLData_subset_transform.R index fb62bb0..a0ed8ea 100644 --- a/tests/testthat/test-LPJmLData_subset_transform.R +++ b/tests/testthat/test-LPJmLData_subset_transform.R @@ -154,10 +154,18 @@ test_that("test transform (time) method", { aperm(output$data, names(dim(output2))) ) + output$transform("year_month_day") + # remove a month, check that remaining data is not affected + output$subset(month = -11) + output$transform("time") + expect_identical( + subset(output2, time = dimnames(output)$time)$data, + aperm(output$data, names(dim(output2))) + ) }) -# test transform_time method +# test transform_space method test_that("test transform (space) method", { file_name <- "../testdata/output/transp.bin.json" output <- read_io(filename = file_name) @@ -293,3 +301,67 @@ test_that("test transform (space) method", { "Values for coordinate pairs must be supplied as strings" ) }) + +# test transforming daily data into different time formats +test_that("test transform (time) method", { + + file_name <- "../testdata/input/tas_gswp3-w5e5_obsclim_2015-2019.clm" + tas <- read_io(filename = file_name) + + # transformation to "year_month_day" format + tas_time_transformed <- transform(tas, "year_month_day") + expect_true(all(is.na(tas_time_transformed$data[, 31, 2, , ]))) + + # back transformation to time format + tas_time_transformed$transform("time") + # check against original data + expect_identical(tas$data, tas_time_transformed$data) + + # add grid for spatial transformation + tas$add_grid("../testdata/output/grid.bin.json") + # test NA handling of days in combination with spatial transformation + # includes additional NAs in lat, lon matrix + tas_transformed <- transform(tas, "lat_lon") + + # transformation to "year_month_day" format + tas_transformed$transform("year_month_day") + expect_true(all(is.na(tas_transformed$data[, , 31, 2, , ]))) + + # back transformation to time format + tas_transformed$transform("time") + # back transformation to cell format + tas_transformed$transform("cell") + + # check against original data + expect_identical(tas$data, tas_time_transformed$data) + + # test subsetting daily data with gaps in time and random shuffling + subset_tas <- subset(tas, time = sample(dim(tas)["time"])[-c(10, 200)]) + missing_timestep <- setdiff(dimnames(tas)$time, dimnames(subset_tas)$time) + + # transformation to "year_month_day" format, avoid NA warning + expect_warning( + tas_subset_transformed <- transform(subset_tas, "year_month_day"), + "gaps" + ) + # back transformation to time format + tas_subset_transformed$transform("time") + + # Timesteps removed randomly before transformation have been reintroduced + # containing NAs. + expect_true( + all( + is.na(subset(tas_subset_transformed, time = missing_timestep)$data) + ) + ) + # Non-removed timesteps should be restored to original sequence after + # transformation and be identical with original data. + remaining_time_steps <- setdiff( + dimnames(tas_subset_transformed)$time, + missing_timestep + ) + expect_identical( + subset(tas, time = remaining_time_steps)$data, + subset(tas_subset_transformed, time = remaining_time_steps)$data + ) +}) diff --git a/vignettes/lpjml-runner.Rmd b/vignettes/lpjml-runner.Rmd index 45eb8dc..a11cc52 100644 --- a/vignettes/lpjml-runner.Rmd +++ b/vignettes/lpjml-runner.Rmd @@ -39,8 +39,6 @@ To install LPJmL, read the [LPJmL installation instructions](https://github.com/ operating systems in which the [working environment for LPJmL](https://github.com/PIK-LPJmL/LPJmL/blob/master/INSTALL) is configured!\ -For users on the PIK cluster: Load the `"lpjml"` module or add it to your -`".profile"`.   ## Overview