diff --git a/.buildlibrary b/.buildlibrary index dc1c4d6..c62ef26 100644 --- a/.buildlibrary +++ b/.buildlibrary @@ -1,4 +1,4 @@ -ValidationKey: '3177440' +ValidationKey: '3381470' AutocreateReadme: yes AcceptedWarnings: - 'Warning: package ''.*'' was built under R version' diff --git a/CITATION.cff b/CITATION.cff index 3f11812..3e30a34 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -2,8 +2,8 @@ cff-version: 1.2.0 message: If you use this software, please cite it using the metadata from this file. type: software title: 'lpjmlkit: Toolkit for Basic LPJmL Handling' -version: 1.6.0 -date-released: '2024-05-16' +version: 1.7.0 +date-released: '2024-06-17' abstract: A collection of basic functions to facilitate the work with the Dynamic Global Vegetation Model (DGVM) Lund-Potsdam-Jena managed Land (LPJmL) hosted at the Potsdam Institute for Climate Impact Research (PIK). It provides functions for diff --git a/DESCRIPTION b/DESCRIPTION index 7bee223..164643b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: lpjmlkit Type: Package Title: Toolkit for Basic LPJmL Handling -Version: 1.6.0 +Version: 1.7.0 Authors@R: c( person("Jannes", "Breier", , "jannesbr@pik-potsdam.de", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-9055-6904")), person("Sebastian","Ostberg", , "ostberg@pik-potsdam.de", role = "aut", comment = c(ORCID = "0000-0002-2368-7015")), @@ -54,4 +54,4 @@ Suggests: sf Config/testthat/edition: 3 VignetteBuilder: knitr -Date: 2024-05-16 +Date: 2024-06-17 diff --git a/R/run_lpjml.R b/R/run_lpjml.R index 0cd8c4e..582ba0e 100644 --- a/R/run_lpjml.R +++ b/R/run_lpjml.R @@ -19,6 +19,10 @@ #' written, including output, restart and configuration files. If `NULL`, #' `model_path` is used. See also [write_config] #' +#' @param run_cmd Character string defining the command used to execute lpjml +#' (see details). Defaults to "srun --propagate" (compute ondes of old cluster +#' at PIK). Change to "mpirun" for HPC2024 at PIK. +#' #' @param parallel_cores Integer defining the number of available CPU #' cores/nodes for parallelization. Defaults to `1` (no parallelization). #' Please note that parallelization is only supported for SLURM jobs and not @@ -37,6 +41,8 @@ #' @return See `x`, extended by columns `"type"`, `"job_id"` and `"status"`. #' #' @details +#' +#' \bold{x}: #' A \link[tibble]{tibble} for `x` that has been generated by #' [`write_config()`] and can look like the following examples can #' supplied: @@ -62,6 +68,18 @@ #' Also be aware that the order of the supplied config files is important #' (e.g. make sure the spin-up run is run before the transient one). #' +#' \bold{run_cmd}: +#' The `run_cmd` argument is used to define the command to execute LPJmL. This +#' is needed because the LPJmL executable can not directly be used on all +#' machines. Which command has to be used depends on the software installed. +#' Further information on this can be found in the INSTALL file of LPJmL. +#' To determine the correct command, check the lpj_submit.sh file in the bin +#' directory of LPJmL. Using PIK infrastrucure the command is \code{srun} for +#' the hpc2015 and \code{mpirun} for the hpc2024. To facilitate usage on the +#' interactive (login) nodes, no command is needed for hpc2015. For the hpc2024 +#' the command remains \code{mpirun} (in these cases \code{run_lpjml} adjusts +#' \code{run_cmd} accordingly). +#' #' @examples #' #' \dontrun{ @@ -149,6 +167,7 @@ run_lpjml <- function(x, model_path = ".", sim_path = NULL, + run_cmd = "srun --propagate", parallel_cores = 1, write_stdout = FALSE, raise_error = TRUE, @@ -160,6 +179,10 @@ run_lpjml <- function(x, if (!dir.exists(model_path)) { stop("Folder of model_path \"", model_path, "\" does not exist.") } + # Check if run_cmd is valid + if (!grepl("mpirun|srun", run_cmd) && run_cmd != "") { + stop("Please provide a supported run command (\"mpirun\", \"srun\" or \"\")") + } sim_path <- deprecate_arg(new_arg = sim_path, deprec_arg = output_path, @@ -191,11 +214,11 @@ run_lpjml <- function(x, if (parallel_cores == 1) { do_sequential( - sim_names, model_path, sim_path, write_stdout, raise_error + sim_names, model_path, sim_path, run_cmd, write_stdout, raise_error ) } else if (parallel_cores > 1 && Sys.getenv("SLURM_JOB_ID") != "") { do_parallel( - sim_names, model_path, sim_path, parallel_cores, raise_error + sim_names, model_path, sim_path, run_cmd, parallel_cores, raise_error ) } else { stop( @@ -209,11 +232,11 @@ run_lpjml <- function(x, } else { if (parallel_cores == 1) { do_sequential( - x$sim_name, model_path, sim_path, write_stdout, raise_error + x$sim_name, model_path, sim_path, run_cmd, write_stdout, raise_error ) } else if (parallel_cores > 1 && Sys.getenv("SLURM_JOB_ID") != "") { do_parallel( - x$sim_name, model_path, sim_path, parallel_cores, raise_error + x$sim_name, model_path, sim_path, run_cmd, parallel_cores, raise_error ) } else { stop( @@ -233,6 +256,7 @@ run_lpjml <- function(x, do_run <- function(sim_name, model_path, sim_path, + run_cmd, write_stdout, raise_error) { @@ -248,8 +272,8 @@ do_run <- function(sim_name, # When running inside a slurm job it ensures to propagate ressources inner_command <- paste0(ifelse(Sys.getenv("SLURM_JOB_ID") == "", - "", - "srun --propagate "), + ifelse(grepl("mpirun", run_cmd), run_cmd, ""), + run_cmd), model_path, "/bin/lpjml ", # nolint:absolute_path_linter. sim_path, @@ -289,9 +313,9 @@ do_run <- function(sim_name, echo = !testthat::is_testing(), cleanup_tree = TRUE, spinner = ifelse(write_stdout && - Sys.getenv("SLURM_JOB_ID") == "", - TRUE, - FALSE), + Sys.getenv("SLURM_JOB_ID") == "", + TRUE, + FALSE), error_on_status = raise_error, wd = sim_path) } @@ -302,6 +326,7 @@ do_run <- function(sim_name, do_sequential <- function(sim_names, model_path, sim_path, + run_cmd, write_stdout, raise_error) { @@ -311,14 +336,19 @@ do_sequential <- function(sim_names, tryCatch({ # Check if slurm is available - if (is_slurm_available() && Sys.getenv("SLURM_JOB_ID") == "") { + if (is_slurm_available() && Sys.getenv("SLURM_JOB_ID") == "" && !grepl("mpirun", run_cmd)) { mpi_var <- Sys.getenv("I_MPI_DAPL_UD_PROVIDER") Sys.unsetenv("I_MPI_DAPL_UD_PROVIDER")# nolint:undesirable_function_linter. } else { mpi_var <- NULL + # If not specified by the user set number of processes to 1 to run lpjml + # in interactive mode + if (run_cmd == "mpirun") { + run_cmd <- paste0(run_cmd, " -np 1 ") + } } for (sim_name in sim_names) { - do_run(sim_name, model_path, sim_path, write_stdout, raise_error) + do_run(sim_name, model_path, sim_path, run_cmd, write_stdout, raise_error) } }, finally = { # Check if slurm is available @@ -333,6 +363,7 @@ do_sequential <- function(sim_names, do_parallel <- function(sim_names, model_path, sim_path, + run_cmd, parallel_cores, raise_error) { @@ -346,16 +377,23 @@ do_parallel <- function(sim_names, # Parallel foreach sim_name. job_details <- foreach::foreach(sim_name = sim_names, # nolint:object_usage_linter. - .errorhandling = "stop" + .errorhandling = "stop" ) %dopar% { # Write single call tryCatch({ + + if (is_slurm_available() && Sys.getenv("SLURM_JOB_ID") != "") { + mpi_lib <- Sys.getenv("I_MPI_PMI_LIBRARY") + Sys.setenv(I_MPI_PMI_LIBRARY = "/p/system/slurm/lib/libpmi.so") # nolint + } else { + mpi_lib <- NULL + } do_run( - sim_name, model_path, sim_path, write_stdout = TRUE, raise_error + sim_name, model_path, sim_path, run_cmd, write_stdout = TRUE, raise_error ) - # Stop when error occures + # Stop when error occures }, error = function(e) { # Check if error is returned @@ -379,6 +417,10 @@ do_parallel <- function(sim_names, " parallelization" ) } + }, finally = { + if (!is.null(mpi_lib)) { + Sys.setenv(I_MPI_PMI_LIBRARY = mpi_lib) # nolint:undesirable_function_linter. + } }) } diff --git a/R/submit_lpjml.R b/R/submit_lpjml.R index d61326f..91e414c 100644 --- a/R/submit_lpjml.R +++ b/R/submit_lpjml.R @@ -20,7 +20,7 @@ #' `model_path` is used. See also [write_config] #' #' @param group Character string defining the user group for which the job is -#' submitted. Defaults to `"lpjml"`. +#' submitted. #' #' @param sclass Character string defining the job classification. Available #' options at PIK: `c("short", "medium", "long", "priority", "standby", "io")` @@ -185,7 +185,7 @@ submit_lpjml <- function(x, # nolint:cyclocomp_linter. model_path, sim_path = NULL, - group = "lpjml", + group = "", sclass = "short", ntasks = 256, wtime = "", @@ -254,7 +254,7 @@ submit_lpjml <- function(x, # nolint:cyclocomp_linter. # to this function by mapply call slurm_param <- ( x[slurm_args[slurm_args %in% colnames(x)]][ - sim_idx, ] + sim_idx, ] ) mapply( # nolint:undesirable_function_linter. @@ -306,7 +306,7 @@ submit_lpjml <- function(x, # nolint:cyclocomp_linter. # this function by mapply call slurm_param <- ( x[slurm_args[slurm_args %in% colnames(x)]][ - sim_idx, ] + sim_idx, ] ) mapply( # nolint:undesirable_function_linter. @@ -375,28 +375,28 @@ submit_run <- function(sim_name, timestamp <- format(Sys.time(), "%Y%m%d_%H%M") stdout <- paste0(sim_path, - "/output/", - sim_name, - "/", - "outfile_", - timestamp, - ".out") + "/output/", + sim_name, + "/", + "outfile_", + timestamp, + ".out") stderr <- paste0(sim_path, - "/output/", - sim_name, - "/", - "errfile_", - timestamp, - ".err") + "/output/", + sim_name, + "/", + "errfile_", + timestamp, + ".err") output_config <- paste0(sim_path, - "/output/", - sim_name, - "/", - "config_", - timestamp, - ".json") + "/output/", + sim_name, + "/", + "config_", + timestamp, + ".json") if (is.list(slurm_options) && length(slurm_options) > 0) { further_slurm_options <- paste0( @@ -409,7 +409,9 @@ submit_run <- function(sim_name, inner_command <- paste0(model_path, "/bin/lpjsubmit", # nolint:absolute_path_linter. " -nocheck", " -class ", sclass, - " -group ", group, + ifelse(group != "", + paste0(" -group ", group), + ""), ifelse(wtime != "", paste0(" -wtime ", wtime), ""), diff --git a/README.md b/README.md index 3360bc7..b98e3fc 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # Toolkit for Basic LPJmL Handling -R package **lpjmlkit**, version **1.6.0** +R package **lpjmlkit**, version **1.7.0** [![CRAN status](https://www.r-pkg.org/badges/version/lpjmlkit)](https://cran.r-project.org/package=lpjmlkit) [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.7773134.svg)](https://doi.org/10.5281/zenodo.7773134) [![R build status](https://github.com/PIK-LPJmL/lpjmlkit/workflows/check/badge.svg)](https://github.com/PIK-LPJmL/lpjmlkit/actions) [![codecov](https://codecov.io/gh/PIK-LPJmL/lpjmlkit/branch/master/graph/badge.svg)](https://app.codecov.io/gh/PIK-LPJmL/lpjmlkit) [![r-universe](https://pik-piam.r-universe.dev/badges/lpjmlkit)](https://pik-piam.r-universe.dev/builds) @@ -76,7 +76,7 @@ In case of questions / problems please contact Jannes Breier . +Breier J, Ostberg S, Wirth S, Minoli S, Stenzel F, Müller C (2024). _lpjmlkit: Toolkit for Basic LPJmL Handling_. doi: 10.5281/zenodo.7773134 (URL: https://doi.org/10.5281/zenodo.7773134), R package version 1.7.0, . A BibTeX entry for LaTeX users is @@ -85,7 +85,7 @@ A BibTeX entry for LaTeX users is title = {lpjmlkit: Toolkit for Basic LPJmL Handling}, author = {Jannes Breier and Sebastian Ostberg and Stephen Björn Wirth and Sara Minoli and Fabian Stenzel and Christoph Müller}, year = {2024}, - note = {R package version 1.6.0}, + note = {R package version 1.7.0}, doi = {10.5281/zenodo.7773134}, url = {https://github.com/PIK-LPJmL/lpjmlkit}, } diff --git a/man/run_lpjml.Rd b/man/run_lpjml.Rd index f0b4817..4d7fba6 100644 --- a/man/run_lpjml.Rd +++ b/man/run_lpjml.Rd @@ -8,6 +8,7 @@ run_lpjml( x, model_path = ".", sim_path = NULL, + run_cmd = "srun --propagate", parallel_cores = 1, write_stdout = FALSE, raise_error = TRUE, @@ -29,6 +30,10 @@ multiple generated configuration file(s).} written, including output, restart and configuration files. If \code{NULL}, \code{model_path} is used. See also \link{write_config}} +\item{run_cmd}{Character string defining the command used to execute lpjml +(see details). Defaults to "srun --propagate" (compute ondes of old cluster +at PIK). Change to "mpirun" for HPC2024 at PIK.} + \item{parallel_cores}{Integer defining the number of available CPU cores/nodes for parallelization. Defaults to \code{1} (no parallelization). Please note that parallelization is only supported for SLURM jobs and not @@ -54,6 +59,7 @@ that can be used as an input (see \code{x}). It contains the details to run sing or multiple (dependent/subsequent) model runs. } \details{ +\bold{x}: A \link[tibble]{tibble} for \code{x} that has been generated by \code{\link[=write_config]{write_config()}} and can look like the following examples can supplied:\tabular{l}{ @@ -77,6 +83,18 @@ As a shortcut it is also possible to provide the config file character string vector directly as the \code{x} argument to \code{run_lpjml}. \cr Also be aware that the order of the supplied config files is important (e.g. make sure the spin-up run is run before the transient one). + +\bold{run_cmd}: +The \code{run_cmd} argument is used to define the command to execute LPJmL. This +is needed because the LPJmL executable can not directly be used on all +machines. Which command has to be used depends on the software installed. +Further information on this can be found in the INSTALL file of LPJmL. +To determine the correct command, check the lpj_submit.sh file in the bin +directory of LPJmL. Using PIK infrastrucure the command is \code{srun} for +the hpc2015 and \code{mpirun} for the hpc2024. To facilitate usage on the +interactive (login) nodes, no command is needed for hpc2015. For the hpc2024 +the command remains \code{mpirun} (in these cases \code{run_lpjml} adjusts +\code{run_cmd} accordingly). } \examples{ diff --git a/man/submit_lpjml.Rd b/man/submit_lpjml.Rd index 9ceb758..3e6aa69 100644 --- a/man/submit_lpjml.Rd +++ b/man/submit_lpjml.Rd @@ -8,7 +8,7 @@ submit_lpjml( x, model_path, sim_path = NULL, - group = "lpjml", + group = "", sclass = "short", ntasks = 256, wtime = "", @@ -35,7 +35,7 @@ written, including output, restart and configuration files. If \code{NULL}, \code{model_path} is used. See also \link{write_config}} \item{group}{Character string defining the user group for which the job is -submitted. Defaults to \code{"lpjml"}.} +submitted.} \item{sclass}{Character string defining the job classification. Available options at PIK: \code{c("short", "medium", "long", "priority", "standby", "io")} diff --git a/tests/testthat/test-submit_run_lpjml.R b/tests/testthat/test-submit_run_lpjml.R index d9d36ed..2c7cf62 100644 --- a/tests/testthat/test-submit_run_lpjml.R +++ b/tests/testthat/test-submit_run_lpjml.R @@ -21,7 +21,7 @@ test_that("check submit_lpjml with tibble", { test_params$status <- "not submitted" expect_true(all(unlist(as.list(test_submit)) == unlist(as.list(test_params)), - na.rm = TRUE)) + na.rm = TRUE)) # Check submit_run functionality test_submit <- submit_lpjml( @@ -31,7 +31,7 @@ test_that("check submit_lpjml with tibble", { test_params$status <- "failed" expect_true(all(unlist(as.list(test_submit)) == unlist(as.list(test_params)), - na.rm = TRUE)) + na.rm = TRUE)) }) @@ -86,7 +86,16 @@ test_that("raise run_lpjml errors", { "Folder of model_path" ) - # Check if system command failes + # Check if unsuported run_cmd fails + expect_error( + run_lpjml( + "./config_scen1_spinup.json", + run_cmd = "invalid_command" + ), + "run command" + ) + + # Check if system command fails expect_error( run_lpjml( "./config_scen1_spinup.json"