diff --git a/.buildlibrary b/.buildlibrary
index 7164f64..05f5701 100644
--- a/.buildlibrary
+++ b/.buildlibrary
@@ -1,4 +1,4 @@
-ValidationKey: '3512075'
+ValidationKey: '3540240'
AutocreateReadme: yes
AcceptedWarnings:
- 'Warning: package ''.*'' was built under R version'
@@ -8,3 +8,4 @@ AddInReadme: inst/README.md
AddLogoReadme: inst/img/logo.png
enforceVersionUpdate: no
skipCoverage: no
+AutocreateCITATION: yes
diff --git a/.github/workflows/check.yaml b/.github/workflows/check.yaml
index d85a316..54aa78b 100644
--- a/.github/workflows/check.yaml
+++ b/.github/workflows/check.yaml
@@ -44,6 +44,13 @@ jobs:
[ -f requirements.txt ] && python -m pip install --upgrade pip wheel || true
[ -f requirements.txt ] && pip install -r requirements.txt || true
+ - name: Run pre-commit checks
+ shell: bash
+ run: |
+ python -m pip install pre-commit
+ python -m pip freeze --local
+ pre-commit run --show-diff-on-failure --color=always --all-files
+
- name: Verify validation key
shell: Rscript {0}
run: lucode2:::validkey(stopIfInvalid = TRUE)
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 7dcd45b..3edf90a 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -25,4 +25,4 @@ repos:
- id: readme-rmd-rendered
- id: use-tidy-description
ci:
- autoupdate_schedule: quarterly
+ autoupdate_schedule: weekly
diff --git a/CITATION.cff b/CITATION.cff
index 7893b5a..2480286 100644
--- a/CITATION.cff
+++ b/CITATION.cff
@@ -2,8 +2,8 @@ cff-version: 1.2.0
message: If you use this software, please cite it using the metadata from this file.
type: software
title: 'lpjmlkit: Toolkit for Basic LPJmL Handling'
-version: 1.7.5
-date-released: '2024-12-12'
+version: 1.7.6
+date-released: '2025-01-27'
abstract: A collection of basic functions to facilitate the work with the Dynamic
Global Vegetation Model (DGVM) Lund-Potsdam-Jena managed Land (LPJmL) hosted at
the Potsdam Institute for Climate Impact Research (PIK). It provides functions for
diff --git a/DESCRIPTION b/DESCRIPTION
index 750fec6..ad88d5a 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
Package: lpjmlkit
Type: Package
Title: Toolkit for Basic LPJmL Handling
-Version: 1.7.5
+Version: 1.7.6
Authors@R: c(
person("Jannes", "Breier", , "jannesbr@pik-potsdam.de", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-9055-6904")),
person("Sebastian","Ostberg", , "ostberg@pik-potsdam.de", role = "aut", comment = c(ORCID = "0000-0002-2368-7015")),
@@ -55,4 +55,4 @@ Suggests:
sf
Config/testthat/edition: 3
VignetteBuilder: knitr
-Date: 2024-12-12
+Date: 2025-01-27
diff --git a/R/LPJmLData_transform.R b/R/LPJmLData_transform.R
index ba88d9c..e22fe4d 100644
--- a/R/LPJmLData_transform.R
+++ b/R/LPJmLData_transform.R
@@ -16,8 +16,10 @@
#' @examples
#' \dontrun{
#'
-#' runoff <- read_io(filename = "runoff.bin.json",
-#' subset = list(year = as.character(1991:2000)))
+#' runoff <- read_io(
+#' filename = "runoff.bin.json",
+#' subset = list(year = as.character(1991:2000))
+#' )
#'
#' # Transform into space format "lon_lat". This assumes a "grid.bin.json" file
#' # is present in the same directory as "runoff.bin.json".
@@ -60,40 +62,33 @@ LPJmLData$set(
"private",
".transform",
function(to) {
-
# Transform time format
if (any(to %in% private$.meta$._dimension_map_$time)) {
private$.transform_time(to = "time")
- to <- to[!to %in% private$.meta$._dimension_map_$time]
+ to <- setdiff(to, private$.meta$._dimension_map_$time)
} else if (any(to %in% private$.meta$._dimension_map_$year_month_day)) {
private$.transform_time(to = "year_month_day")
- to <- to[!to %in% private$.meta$._dimension_map_$year_month_day]
+ to <- setdiff(to, private$.meta$._dimension_map_$year_month_day)
}
# Transform space format
if (any(to %in% private$.meta$._dimension_map_$cell)) {
private$.transform_space(to = "cell")
- to <- to[!to %in% private$.meta$._dimension_map_$cell]
+ to <- setdiff(to, private$.meta$._dimension_map_$cell)
} else if (any(to %in% private$.meta$._dimension_map_$lon_lat)) {
private$.transform_space(to = "lon_lat")
- to <- to[!to %in% private$.meta$._dimension_map_$lon_lat]
+ to <- setdiff(to, private$.meta$._dimension_map_$lon_lat)
}
if (length(to) > 0) {
stop(
ifelse(length(to) > 1, "Formats ", "Format "),
- paste0(col_var(to), collapse = ", "),
+ toString(col_var(to)),
ifelse(length(to) > 1, " are ", " is "),
"not valid. Please choose from available space formats ",
- paste0(
- col_var(private$.meta$._dimension_map_$space_format),
- collapse = ", "
- ),
+ toString(col_var(private$.meta$._dimension_map_$space_format)),
" and available time formats ",
- paste0(
- col_var(private$.meta$._dimension_map_$time_format),
- collapse = ", "
- ),
+ toString(col_var(private$.meta$._dimension_map_$time_format)),
call. = FALSE
)
}
@@ -109,7 +104,6 @@ LPJmLData$set(
"private",
".transform_space",
function(to) {
-
# If to equals current format return directly
if (private$.meta$._space_format_ == to) {
return(invisible(self))
@@ -122,13 +116,11 @@ LPJmLData$set(
# Extract dimensions other than space dimension(s) from self
other_dimnames <- dimnames(self$data) %>%
`[<-`(unlist(strsplit(private$.meta$._space_format_, "_")), NULL)
- other_dims <- dim(self$data) %>%
- `[`(names(other_dimnames))
+ other_dims <- dim(self$data)[names(other_dimnames)]
# Case 1: Transformation from cell dimension to lon, lat dimensions
if (private$.meta$._space_format_ == "cell" &&
to == "lon_lat") {
-
private$.grid$transform(to = to)
# Matrix with ilon and ilat indices of cells in new array
@@ -136,8 +128,10 @@ LPJmLData$set(
match(as.integer(dimnames(self)[["cell"]]), private$.grid$data),
dim(private$.grid)
)
- dimnames(ilonilat) <- list(cell = dimnames(self)[["cell"]],
- band = c("lon", "lat"))
+ dimnames(ilonilat) <- list(
+ cell = dimnames(self)[["cell"]],
+ band = c("lon", "lat")
+ )
# Index matrix to access elements from source data
index_source <- expand.grid(sapply(dim(self), seq_len)) # nolint:undesirable_function_linter.
@@ -162,12 +156,16 @@ LPJmLData$set(
gc(full = TRUE)
# Append new space dimension where they have been before
- new_dimnames <- append(other_dimnames,
- values = dimnames(private$.grid$data)[c("lat", "lon")], # nolint
- after = get_predim(self$data, "cell"))
- new_dims <- append(other_dims,
- values = dim(private$.grid$data)[c("lat", "lon")],
- after = get_predim(self$data, "cell"))
+ new_dimnames <- append(
+ other_dimnames,
+ values = dimnames(private$.grid$data)[c("lat", "lon")], # nolint
+ after = get_predim(self$data, "cell")
+ )
+ new_dims <- append(
+ other_dims,
+ values = dim(private$.grid$data)[c("lat", "lon")],
+ after = get_predim(self$data, "cell")
+ )
# Replace source space dimension with target space dimensions in dim and
# dimnames attribute
@@ -190,14 +188,18 @@ LPJmLData$set(
# Case 2: Transformation between lon, lat dimensions and cell dimension
} else if (private$.meta$._space_format_ == "lon_lat" && to == "cell") {
-
# Matrix with ilon and ilat indices of cells in new array
- ilonilat <- arrayInd(match(sort(private$.grid$data), private$.grid$data),
- dim(private$.grid))
+ ilonilat <- arrayInd(
+ match(sort(private$.grid$data), private$.grid$data),
+ dim(private$.grid)
+ )
- dimnames(ilonilat) <- list(cell = format(sort(private$.grid$data),
- trim = TRUE, scientific = FALSE),
- band = c("lon", "lat"))
+ dimnames(ilonilat) <- list(
+ cell = format(sort(private$.grid$data),
+ trim = TRUE, scientific = FALSE
+ ),
+ band = c("lon", "lat")
+ )
# Transform grid to target space format
private$.grid$transform(to = to)
@@ -236,9 +238,10 @@ LPJmLData$set(
rm(index_source, ilonilat)
gc(full = TRUE)
- target_array <- array(self$data[index_target], dim = new_dims,
- dimnames = new_dimnames)
-
+ target_array <- array(self$data[index_target],
+ dim = new_dims,
+ dimnames = new_dimnames
+ )
} else {
return(invisible(self))
}
@@ -260,8 +263,6 @@ LPJmLData$set(
"private",
".transform_time",
function(to) {
-
-
# If to equals current format return directly
if (private$.meta$._time_format_ == to) {
return(invisible(self))
@@ -270,22 +271,56 @@ LPJmLData$set(
# Case 1: Transformation from "time" dimension to "year", "month", "day"
# dimensions (if available)
if (private$.meta$._time_format_ == "time" && to == "year_month_day") {
-
- # Possible ndays of months
- ndays_in_month <- c(31, 30, 28)
-
# Split time string "year-month-day" into year, month, day integer vector,
# reverse it to get it into the right order for array conversion
time_dimnames <- split_time_names(self$dimnames()[["time"]]) %>% rev()
- # Remove day dimension if all entries fall on last day of month
- if (all(time_dimnames[["day"]] %in% ndays_in_month)) {
- time_dimnames[["day"]] <- NULL
+ # Check that time axis is in sequence. Re-sequence if necessary.
+ valid_timestamps <- create_time_names(
+ nstep = private$.meta$nstep,
+ years <- as.integer(time_dimnames[["year"]]),
+ months = as.integer(time_dimnames[["month"]]),
+ days = as.integer(time_dimnames[["day"]])
+ )
+ invalid_timestamps <- setdiff(self$dimnames()[["time"]], valid_timestamps)
+ if (length(invalid_timestamps) > 0) {
+ # This may need revising for NetCDF files if monthly or annual values
+ # are set to a different date.
+ stop(
+ length(invalid_timestamps),
+ " unexpected time stamp(s) in time axis: ",
+ ifelse(
+ length(invalid_timestamps) > 5,
+ paste(toString(invalid_timestamps[seq_len(5)]), "[...]"),
+ toString(invalid_timestamps)
+ )
+ )
+ }
+ if (!identical(valid_timestamps, self$dimnames()[["time"]])) {
+ if (!all(valid_timestamps %in% self$dimnames()[["time"]])) {
+ warning(
+ "Prior subsetting induced gaps in time dimension that will be",
+ " filled with NAs during transformation.",
+ call. = FALSE
+ )
+ } else {
+ warning(
+ "Time dimension is out of sequence. Re-sequencing before",
+ " transformation.",
+ call. = FALSE
+ )
+ }
+ # Re-sequence from start to end
+ private$.subset(
+ time = intersect(valid_timestamps, self$dimnames()[["time"]])
+ )
}
- # Remove month dimension if there is only one month in data -> annual
- if (length(time_dimnames$month) == 1 && is.null(time_dimnames[["day"]])) {
- time_dimnames[["month"]] <- NULL
+ # Remove sub-dimensions not necessary for monthly or annual data
+ if (private$.meta$nstep == 1) {
+ time_dimnames[["day"]] <- time_dimnames[["month"]] <- NULL
+ } else if (private$.meta$nstep == 12) {
+ time_dimnames[["day"]] <- NULL
}
new_time_dim <- "year_month_day"
@@ -294,16 +329,46 @@ LPJmLData$set(
# (if available) to "time" dimension
} else if (private$.meta$._time_format_ == "year_month_day" && to == "time") { # nolint:line_length_linter
+ # Check if time axis is in sequence and re-sequence if necessary
+ if ("day" %in% names(self$dimnames()) &&
+ any(diff(as.integer(self$dimnames()$day)) < 1)) {
+ warning(
+ "Prior subsetting created day axis out of sequence. Days will be",
+ " re-sequenced during transformation.",
+ call. = FALSE
+ )
+ private$.subset(
+ day = as.character(sort(as.integer(self$dimnames()$day)))
+ )
+ }
+ if ("month" %in% names(self$dimnames()) &&
+ any(diff(as.integer(self$dimnames()$month)) < 1)) {
+ warning(
+ "Prior subsetting created month axis out of sequence. Months will be",
+ " re-sequenced during transformation.",
+ call. = FALSE
+ )
+ private$.subset(
+ month = as.character(sort(as.integer(self$dimnames()$month)))
+ )
+ }
+
# Convert time dimnames back to time
- pre_dimnames <- self$dimnames() %>%
+ old_dimnames <- self$dimnames() %>%
lapply(as.integer) %>%
suppressWarnings()
time_dimnames <- list(
- time = create_time_names(nstep = private$.meta$nstep,
- years = pre_dimnames$year,
- months = pre_dimnames$month,
- days = pre_dimnames$day)
+ time = create_time_names(
+ nstep = private$.meta$nstep,
+ years = old_dimnames$year,
+ months = {
+ if (is.null(old_dimnames$month)) NULL else sort(old_dimnames$month)
+ },
+ days = {
+ if (is.null(old_dimnames$day)) NULL else sort(old_dimnames$day)
+ }
+ )
)
new_time_dim <- "time"
@@ -316,8 +381,7 @@ LPJmLData$set(
# Extract dimensions other than space dimension(s) from self
other_dimnames <- dimnames(self$data) %>%
`[<-`(unlist(strsplit(private$.meta$._time_format_, "_")), NULL)
- other_dims <- dim(self$data) %>%
- `[`(names(other_dimnames))
+ other_dims <- dim(self$data)[names(other_dimnames)]
time_dims <- lapply(time_dimnames, length)
@@ -341,7 +405,12 @@ LPJmLData$set(
# Create new data array based on disaggregated time dimension
self$.__set_data__(
- array(self$data, dim = new_dims, dimnames = new_dimnames)
+ assign_data_dimensions(
+ x = self,
+ new_dims = new_dims,
+ new_dimnames = new_dimnames,
+ to_format = to
+ )
)
private$.meta$.__transform_time_format__(new_time_dim)
@@ -354,3 +423,160 @@ LPJmLData$set(
get_predim <- function(x, dims) {
which(names(dim(x)) %in% dims)[1] - 1
}
+# Function to get position of first dimension after last space dimension in
+# array
+get_postdim <- function(x, dims) {
+ max(which(names(dim(x)) %in% dims)) + 1
+}
+
+# Function to handle correct data dimensions, especially for daily data
+# that comes with months of different length, so NAs are assigned in day
+# dimension for day 28/29/30/31 where necessary. If transformed from
+# "year_month_day" format, invalid dates are removed again. Also correct
+# transformation in case of incomplete time series.
+assign_data_dimensions <- function(x, new_dims, new_dimnames, to_format = "time") { # nolint:line_length_linter
+ old_dimnames <- dimnames(x)
+ if (x$meta$nstep == 365 ||
+ (to_format == "year_month_day" &&
+ dim(x)["time"] != x$meta$nstep * x$meta$nyear) ||
+ (to_format == "time" &&
+ any(
+ dim(x)[unlist(strsplit(x$meta$._time_format_, "_"))] !=
+ c(x$meta$nyear, 12, 31),
+ na.rm = TRUE
+ )
+ )
+ ) {
+ # Only use complex transformation for daily data or incomplete time series.
+ if (to_format == "year_month_day") {
+ use_dimnames <- new_dimnames
+ } else {
+ use_dimnames <- old_dimnames
+ }
+
+ # Compute full timestamps including invalid dates for array handling
+ full_timestamps <- create_time_names(
+ nstep = x$meta$nstep,
+ years = as.integer(use_dimnames$year),
+ months = {
+ if (is.null(use_dimnames$month))
+ NULL
+ else
+ sort(as.integer(use_dimnames$month))
+ },
+ days = {
+ if (is.null(use_dimnames$day))
+ NULL
+ else
+ sort(as.integer(use_dimnames$day))
+ },
+ only_valid = x$meta$nstep != 365
+ )
+ # Compute valid timestamps to match against full timestamps to filter
+ # invalids.
+ valid_timestamps <- create_time_names(
+ nstep = x$meta$nstep,
+ years = as.integer(use_dimnames$year),
+ months = {
+ if (is.null(use_dimnames$month))
+ NULL
+ else
+ sort(as.integer(use_dimnames$month))
+ },
+ days = {
+ if (is.null(use_dimnames$day))
+ NULL
+ else
+ sort(as.integer(use_dimnames$day))
+ },
+ only_valid = TRUE
+ )
+
+ if (to_format == "time") {
+ # Assign NAs to invalid timestamps
+ full_timestamps[!full_timestamps %in% valid_timestamps] <- NA
+
+ # Expand time axis to cover full array grid
+ pre_dim <- seq_len(
+ get_predim(x, unlist(strsplit(x$meta$._time_format_, "_")))
+ )
+ if (length(pre_dim) > 0 && any(dim(x)[pre_dim] > 1)) {
+ expanded_time <- rep(
+ full_timestamps,
+ each = prod(dim(x)[pre_dim])
+ )
+ } else {
+ expanded_time <- full_timestamps
+ }
+ if (get_postdim(x, unlist(strsplit(x$meta$._time_format_, "_"))) <=
+ length(dim(x))
+ ) {
+ post_dim <- seq(
+ get_postdim(x, unlist(strsplit(x$meta$._time_format_, "_"))),
+ length(dim(x))
+ )
+ if (any(dim(x)[post_dim] > 1)) {
+ expanded_time <- rep(
+ expanded_time,
+ times = prod(dim(x)[post_dim])
+ )
+ }
+ }
+ # Initialize the new_data array without NAs (filter invalid timestamps)
+ new_data <- array(
+ x$data[which(!is.na(expanded_time))],
+ dim = new_dims,
+ dimnames = new_dimnames
+ )
+ } else if (to_format == "year_month_day") {
+
+ # Assign NAs to invalid timestamps
+ index <- which(
+ !full_timestamps %in% valid_timestamps |
+ !full_timestamps %in% old_dimnames$time
+ )
+ full_timestamps[index] <- NA
+
+ # Expand time axis to cover full array grid
+ pre_dim <- seq_len(
+ get_predim(x, unlist(strsplit(x$meta$._time_format_, "_")))
+ )
+ expanded_time <- match(full_timestamps, old_dimnames$time)
+ if (length(pre_dim) > 0 && any(dim(x)[pre_dim] > 1)) {
+ expanded_time <- rep(
+ expanded_time,
+ each = prod(dim(x)[pre_dim])
+ )
+ }
+ if (get_postdim(x, unlist(strsplit(x$meta$._time_format_, "_"))) <=
+ length(dim(x))
+ ) {
+ post_dim <- seq(
+ get_postdim(x, unlist(strsplit(x$meta$._time_format_, "_"))),
+ length(dim(x))
+ )
+ if (any(dim(x)[post_dim] > 1)) {
+ expanded_time <- rep(
+ expanded_time,
+ times = prod(dim(x)[post_dim])
+ )
+ }
+ }
+ valid <- which(!is.na(expanded_time))
+ expanded_time[valid] <- seq_len(length(valid))
+ rm(valid)
+
+ # Assign data only to valid timestamps of new data array
+ new_data <- array(
+ x$data[expanded_time],
+ dim = new_dims,
+ dimnames = new_dimnames
+ )
+ }
+ } else {
+ # Default case
+ new_data <- array(x$data, dim = new_dims, dimnames = new_dimnames)
+ }
+
+ return(new_data)
+}
diff --git a/R/read_io.R b/R/read_io.R
index 26c7529..bf1a06b 100644
--- a/R/read_io.R
+++ b/R/read_io.R
@@ -677,8 +677,51 @@ read_io_data <- function(
# Dimension order during reading. Note: Must be 3 dimensions in total, with
# "time" being last dimension for code below to work.
read_band_order <- c("cell", "band", "time")
- # Loop over subset years
result_is_init <- FALSE
+
+ # Prepare dimensions and dimension names for array containing one year of data
+ read_dim <- switch(
+ default(meta_data$order, "cellyear"),
+ cellyear = c(
+ band = unname(default(meta_data$nbands, 1)),
+ time = unname(default(meta_data$nstep, 1)),
+ cell = unname(meta_data$ncell)
+ ),
+ yearcell = stop("Order yearcell not supported"),
+ cellindex = stop("Order cellindex not supported"),
+ cellseq = c(
+ cell = unname(meta_data$ncell),
+ band = unname(default(meta_data$nbands, 1)),
+ time = unname(default(meta_data$nstep, 1))
+ )
+ )
+
+ band_names <- default(
+ meta_data$band_names, seq_len(default(meta_data$nbands, 1))
+ )
+
+ cell_dimnames <- seq(
+ default(meta_data$firstcell, 0),
+ length.out = meta_data$ncell
+ )
+ read_dimnames <- switch(
+ default(meta_data$order, "cellyear"),
+ cellyear = list( # order 1
+ band = band_names,
+ time = NULL, # Assign dates later
+ cell = cell_dimnames
+ ),
+ yearcell = stop("Order yearcell not supported"), # order 2
+ cellindex = stop("Order cellindex not supported"), # order 3
+ cellseq = list( # order 4
+ cell = cell_dimnames,
+ band = band_names,
+ time = NULL # Assign dates later
+ )
+ )
+ need_perm <- !identical(names(read_dim), read_band_order)
+
+ # Loop over subset years
for (yy in years) {
# Compute offset
data_offset <- (yy - default(meta_data$firstyear, 1901)) /
@@ -703,50 +746,13 @@ read_io_data <- function(
# Convert to array.
# Note: order of nbands and nstep for "cellyear" (order = 1) is currently
# not defined in LPJmL.
- dim(year_data) <- switch(
- default(meta_data$order, "cellyear"),
- cellyear = c(
- band = unname(default(meta_data$nbands, 1)),
- time = unname(default(meta_data$nstep, 1)),
- cell = unname(meta_data$ncell)
- ),
- yearcell = stop("Order yearcell not supported"),
- cellindex = stop("Order cellindex not supported"),
- cellseq = c(
- cell = unname(meta_data$ncell),
- band = unname(default(meta_data$nbands, 1)),
- time = unname(default(meta_data$nstep, 1))
- )
- )
-
- # Assign dimension names to array.
- band_names <- default(
- meta_data$band_names, seq_len(default(meta_data$nbands, 1))
- )
+ dim(year_data) <- read_dim
- cell_dimnames <- seq(
- default(meta_data$firstcell, 0),
- length.out = meta_data$ncell
- )
-
- dimnames(year_data) <- switch(
- default(meta_data$order, "cellyear"),
- cellyear = list( # order 1
- band = band_names,
- time = NULL, # Assign dates later
- cell = cell_dimnames
- ),
- yearcell = stop("Order yearcell not supported"), # order 2
- cellindex = stop("Order cellindex not supported"), # order 3
- cellseq = list( # order 4
- cell = cell_dimnames,
- band = band_names,
- time = NULL # Assign dates later
- )
- )
+ dimnames(year_data) <- read_dimnames
# Convert to read_band_order and apply subsetting along bands or cells
- year_data <- aperm(year_data, perm = read_band_order)
+ if (need_perm)
+ year_data <- aperm(year_data, perm = read_band_order)
# Apply any subsetting along bands or cells
index <- which(!names(subset) %in% c("day", "month", "year", "time"))
diff --git a/R/time_names.R b/R/time_names.R
index 6d617cb..0cbca15 100644
--- a/R/time_names.R
+++ b/R/time_names.R
@@ -7,15 +7,21 @@ create_time_names <- function(
nstep = 365,
years = 2000,
months = NULL,
- days = NULL
+ days = NULL,
+ only_valid = TRUE
) {
# Number of days per month.
- ndays_in_month <- c(31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31) %>%
- # Subset months if defined
- { # nolint
- if (is.null(months)) . else .[months]
- }
+ if (only_valid) {
+ ndays_in_month <- c(31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31)
+ } else {
+ ndays_in_month <- rep(31, 12)
+ }
+
+ # Subset months if defined
+ if (!is.null(months)) {
+ ndays_in_month <- ndays_in_month[months]
+ }
# Days and months in two-digits format (e.g. "01").
dd <- unlist(lapply(ndays_in_month, FUN = seq_len)) %>%
@@ -28,6 +34,14 @@ create_time_names <- function(
mm <- {if (is.null(months)) seq_len(12) else months} %>% #nolint
sprintf("%02d", .)
+ if (!is.null(days)) {
+ monthly_day_length <- sapply( # nolint:undesirable_function_linter.
+ ndays_in_month,
+ function(m, days) length(which(days %in% seq_len(m))),
+ days = days
+ )
+ }
+
if (nstep == 365) {
# Daily data: YYYY-MM-DD
d_mmdd <- paste(
@@ -35,10 +49,8 @@ create_time_names <- function(
# Cases of months or days being subsetted or not.
if (is.null(days)) { #nolint
ndays_in_month
- } else if (!is.null(days) && is.null(months)) {
- rep(length(days), 12)
} else {
- rep(length(days), length(months))
+ monthly_day_length
}
}
), dd, sep = "-"
@@ -100,15 +112,14 @@ split_time_names <- function(time_names) {
time_split <- regmatches(
time_names,
regexec("([-]?[[:digit:]]+)-([[:digit:]]+)-([[:digit:]]+)", time_names)
- ) %>% lapply(function(x) as.character(as.integer(x[-1])))
+ ) %>% lapply(function(x) as.integer(x[-1]))
# Create corresponding dimnames for disaggregated array by unique entry
matrix(unlist(time_split),
nrow = length(time_split),
byrow = TRUE,
- dimnames = list(seq_along(time_split),
- c("year", "month", "day"))) %>%
- apply(2, unique) %>%
+ dimnames = list(NULL, c("year", "month", "day"))) %>%
+ apply(2, function(x) as.character(sort(unique(x)))) %>%
as.list() %>%
return()
}
diff --git a/README.md b/README.md
index 0735e41..3e32640 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
#
Toolkit for Basic LPJmL Handling
-R package **lpjmlkit**, version **1.7.5**
+R package **lpjmlkit**, version **1.7.6**
[](https://cran.r-project.org/package=lpjmlkit) [](https://doi.org/10.5281/zenodo.7773134) [](https://github.com/PIK-LPJmL/lpjmlkit/actions) [](https://app.codecov.io/gh/PIK-LPJmL/lpjmlkit) [](https://pik-piam.r-universe.dev/builds)
@@ -76,17 +76,18 @@ In case of questions / problems please contact Jannes Breier , R package version 1.7.5, .
+Breier J, Ostberg S, Wirth S, Minoli S, Stenzel F, Hötten D, Müller C (2025). "lpjmlkit: Toolkit for Basic LPJmL Handling." doi:10.5281/zenodo.7773134 , Version: 1.7.6, .
A BibTeX entry for LaTeX users is
```latex
-@Manual{,
+@Misc{,
title = {lpjmlkit: Toolkit for Basic LPJmL Handling},
author = {Jannes Breier and Sebastian Ostberg and Stephen Björn Wirth and Sara Minoli and Fabian Stenzel and David Hötten and Christoph Müller},
- year = {2024},
- note = {R package version 1.7.5},
- url = {https://github.com/PIK-LPJmL/lpjmlkit},
doi = {10.5281/zenodo.7773134},
+ date = {2025-01-27},
+ year = {2025},
+ url = {https://github.com/PIK-LPJmL/lpjmlkit},
+ note = {Version: 1.7.6},
}
```
diff --git a/man/transform.Rd b/man/transform.Rd
index 97baf22..2576296 100644
--- a/man/transform.Rd
+++ b/man/transform.Rd
@@ -25,8 +25,10 @@ possible.
\examples{
\dontrun{
-runoff <- read_io(filename = "runoff.bin.json",
- subset = list(year = as.character(1991:2000)))
+runoff <- read_io(
+ filename = "runoff.bin.json",
+ subset = list(year = as.character(1991:2000))
+)
# Transform into space format "lon_lat". This assumes a "grid.bin.json" file
# is present in the same directory as "runoff.bin.json".
diff --git a/tests/testdata/create_test_arrays.R b/tests/testdata/create_test_arrays.R
index eda77dc..2779ea7 100644
--- a/tests/testdata/create_test_arrays.R
+++ b/tests/testdata/create_test_arrays.R
@@ -8,3 +8,42 @@ saveRDS(output$as_array(), "../testdata/test_array.rds")
output <- read_io(filename = "../testdata/output/pft_npp.bin.json")
output$transform(to = "lon_lat")
saveRDS(output$data, "../testdata/test_array_lonlat.rds")
+
+# Create temperature data subset
+tas_data <- read_io(
+ file.path(
+ "/p", "projects", "lpjml", "input", "historical", "ISIMIP3av2", "obsclim",
+ "GSWP3-W5E5",
+ "tas_gswp3-w5e5_obsclim_1901-2019.clm.json"
+ ),
+ subset = list(
+ year = as.character(2015:2019),
+ cell = as.character(10000:10002)
+ )
+)
+tas_header <- tas_data$meta$as_header()
+tas_header <- set_header_item(tas_header, version = 4, name = "LPJCLIM")
+tas_write_data <- tas_data$data %>% `dim<-`(c(3, 365, 5)) %>% aperm(c(2, 1, 3))
+write_header(
+ filename = "../testdata/input/tas_gswp3-w5e5_obsclim_2015-2019.clm",
+ header = tas_header
+)
+fp <- file("../testdata/input/tas_gswp3-w5e5_obsclim_2015-2019.clm", "ab")
+if (typeof(get_datatype(tas_header)$type) == "integer") {
+ writeBin(
+ as.integer(round(tas_write_data / get_header_item(tas_header, "scalar"))),
+ con = fp,
+ size = get_datatype(tas_header)$size,
+ endian = get_header_item(tas_header, "endian")
+ )
+} else if (typeof(get_datatype(tas_header)$type) == "double") {
+ writeBin(
+ as.double(tas_write_data / get_header_item(tas_header, "scalar")),
+ con = fp,
+ size = get_datatype(tas_header)$size,
+ endian = get_header_item(tas_header, "endian")
+ )
+} else {
+ stop("Unsupported datatype ", get_header_item(tas_header, "datatype"))
+}
+close(fp)
diff --git a/tests/testdata/input/tas_gswp3-w5e5_obsclim_2015-2019.clm b/tests/testdata/input/tas_gswp3-w5e5_obsclim_2015-2019.clm
new file mode 100644
index 0000000..a4f0ec6
Binary files /dev/null and b/tests/testdata/input/tas_gswp3-w5e5_obsclim_2015-2019.clm differ
diff --git a/tests/testthat/test-LPJmLData_subset_transform.R b/tests/testthat/test-LPJmLData_subset_transform.R
index fb62bb0..a0ed8ea 100644
--- a/tests/testthat/test-LPJmLData_subset_transform.R
+++ b/tests/testthat/test-LPJmLData_subset_transform.R
@@ -154,10 +154,18 @@ test_that("test transform (time) method", {
aperm(output$data, names(dim(output2)))
)
+ output$transform("year_month_day")
+ # remove a month, check that remaining data is not affected
+ output$subset(month = -11)
+ output$transform("time")
+ expect_identical(
+ subset(output2, time = dimnames(output)$time)$data,
+ aperm(output$data, names(dim(output2)))
+ )
})
-# test transform_time method
+# test transform_space method
test_that("test transform (space) method", {
file_name <- "../testdata/output/transp.bin.json"
output <- read_io(filename = file_name)
@@ -293,3 +301,67 @@ test_that("test transform (space) method", {
"Values for coordinate pairs must be supplied as strings"
)
})
+
+# test transforming daily data into different time formats
+test_that("test transform (time) method", {
+
+ file_name <- "../testdata/input/tas_gswp3-w5e5_obsclim_2015-2019.clm"
+ tas <- read_io(filename = file_name)
+
+ # transformation to "year_month_day" format
+ tas_time_transformed <- transform(tas, "year_month_day")
+ expect_true(all(is.na(tas_time_transformed$data[, 31, 2, , ])))
+
+ # back transformation to time format
+ tas_time_transformed$transform("time")
+ # check against original data
+ expect_identical(tas$data, tas_time_transformed$data)
+
+ # add grid for spatial transformation
+ tas$add_grid("../testdata/output/grid.bin.json")
+ # test NA handling of days in combination with spatial transformation
+ # includes additional NAs in lat, lon matrix
+ tas_transformed <- transform(tas, "lat_lon")
+
+ # transformation to "year_month_day" format
+ tas_transformed$transform("year_month_day")
+ expect_true(all(is.na(tas_transformed$data[, , 31, 2, , ])))
+
+ # back transformation to time format
+ tas_transformed$transform("time")
+ # back transformation to cell format
+ tas_transformed$transform("cell")
+
+ # check against original data
+ expect_identical(tas$data, tas_time_transformed$data)
+
+ # test subsetting daily data with gaps in time and random shuffling
+ subset_tas <- subset(tas, time = sample(dim(tas)["time"])[-c(10, 200)])
+ missing_timestep <- setdiff(dimnames(tas)$time, dimnames(subset_tas)$time)
+
+ # transformation to "year_month_day" format, avoid NA warning
+ expect_warning(
+ tas_subset_transformed <- transform(subset_tas, "year_month_day"),
+ "gaps"
+ )
+ # back transformation to time format
+ tas_subset_transformed$transform("time")
+
+ # Timesteps removed randomly before transformation have been reintroduced
+ # containing NAs.
+ expect_true(
+ all(
+ is.na(subset(tas_subset_transformed, time = missing_timestep)$data)
+ )
+ )
+ # Non-removed timesteps should be restored to original sequence after
+ # transformation and be identical with original data.
+ remaining_time_steps <- setdiff(
+ dimnames(tas_subset_transformed)$time,
+ missing_timestep
+ )
+ expect_identical(
+ subset(tas, time = remaining_time_steps)$data,
+ subset(tas_subset_transformed, time = remaining_time_steps)$data
+ )
+})
diff --git a/vignettes/lpjml-runner.Rmd b/vignettes/lpjml-runner.Rmd
index 45eb8dc..a11cc52 100644
--- a/vignettes/lpjml-runner.Rmd
+++ b/vignettes/lpjml-runner.Rmd
@@ -39,8 +39,6 @@ To install LPJmL, read the [LPJmL installation instructions](https://github.com/
operating systems in which the
[working environment for LPJmL](https://github.com/PIK-LPJmL/LPJmL/blob/master/INSTALL)
is configured!\
-For users on the PIK cluster: Load the `"lpjml"` module or add it to your
-`".profile"`.
## Overview