From cd8841c5fd73cf757147af23a715cf0b9636f29a Mon Sep 17 00:00:00 2001 From: "Rees, Nathan {MDBL~Welwyn}" Date: Tue, 5 Mar 2024 18:58:56 +0100 Subject: [PATCH 1/5] process_cut updated so that the patient_cut_v, date_cut_m and no_cut_v arguments have a default value of NULL --- NEWS.md | 4 +++- R/process_cut.R | 36 ++++++++++++++++++------------------ man/process_cut.Rd | 6 +++--- 3 files changed, 24 insertions(+), 22 deletions(-) diff --git a/NEWS.md b/NEWS.md index e147e9ac..2d7a0112 100644 --- a/NEWS.md +++ b/NEWS.md @@ -6,10 +6,12 @@ ## Updates of Existing Functions - Update to `impute_dcutdtc()`, `date_cut()` and `special_dm_cut()` functions to allow for datacut date to be null. In this case, all records for this patient -will be kept/left unchanged. (#179, #188, #189, #190) +will be kept/left unchanged. (#179, #189, #190) - Warning added to `process_cut` if expected dataset `dm` is missing (#172) - Warning added to `create_dcut` if cut date being passed as `NULL`, and not valid date or `NA`/`""` (#181) +- `process_cut` updated so that the `patient_cut_v`, `date_cut_m` and `no_cut_v` +arguments have a default value of `NULL` (#188) ## Breaking Changes - Added dependency on `admiraldev` >= 0.3.0 (#173) diff --git a/R/process_cut.R b/R/process_cut.R index 2fe59186..124a89da 100644 --- a/R/process_cut.R +++ b/R/process_cut.R @@ -52,36 +52,36 @@ #' ) #' process_cut <- function(source_sdtm_data, - patient_cut_v = vector(), - date_cut_m = matrix(nrow = 0, ncol = 2), - no_cut_v = vector(), + patient_cut_v = NULL, + date_cut_m = NULL, + no_cut_v = NULL, dataset_cut, cut_var, special_dm = TRUE) { # Assertions for input parameters ----------------------------------------------- assert_that(is.list(source_sdtm_data), - msg = "source_sdtm_data must be a list" + msg = "source_sdtm_data must be of class list" ) assert_that(all(unlist(lapply(source_sdtm_data, is.data.frame))), - msg = "All elements of the list source_sdtm_data must be a dataframe" + msg = "All elements of source_sdtm_data must be a dataframe" ) - assert_that(all(is.vector(patient_cut_v), patient_cut_v != ""), - msg = "patient_cut_v must be a vector. \n -Note: If you do not wish to use a patient cut on any SDTMv domains, then please leave -patient_cut_v empty, in which case a default value of vector() will be used." + assert_that(all(is.vector(patient_cut_v) | is.null(patient_cut_v), patient_cut_v != ""), + msg = "patient_cut_v must be a vector or NULL. \n +Note: If no SDTMv domains use a patient cut, then please leave patient_cut_v +empty, in which case a default value of NULL will be used." ) - assert_that(all(is.matrix(date_cut_m), date_cut_m != ""), - msg = "date_cut_m must be a matrix \n -Note: If you do not wish to use a date cut on any SDTMv domains, then please leave -date_cut_m empty, in which case a default value of matrix(nrow=0, ncol=2) will be used." + assert_that(all(is.matrix(date_cut_m) | is.null(date_cut_m), date_cut_m != ""), + msg = "date_cut_m must be a matrix or NULL. \n +Note: If no SDTMv domains use a date cut, then please leave date_cut_m +empty, in which case a default value of NULL will be used." ) - assert_that(ncol(date_cut_m) == 2, - msg = "date_cut_m must be a matrix with two columns" + assert_that(any(ncol(date_cut_m) == 2, is.null(date_cut_m)), + msg = "date_cut_m must be a matrix with two columns or NULL." ) - assert_that(all(is.vector(no_cut_v), no_cut_v != ""), - msg = "no_cut_v must be a vector. \n + assert_that(all(is.vector(no_cut_v) | is.null(no_cut_v), no_cut_v != ""), + msg = "no_cut_v must be a vector or NULL. \n Note: If you do not wish to leave any SDTMv domains uncut, then please leave -no_cut_v empty, in which case a default value of vector() will be used." +no_cut_v empty, in which case a default value of NULL will be used." ) cut_var <- assert_symbol(enexpr(cut_var)) assert_data_frame(dataset_cut, diff --git a/man/process_cut.Rd b/man/process_cut.Rd index bcb9a233..fe8a1ef3 100644 --- a/man/process_cut.Rd +++ b/man/process_cut.Rd @@ -6,9 +6,9 @@ \usage{ process_cut( source_sdtm_data, - patient_cut_v = vector(), - date_cut_m = matrix(nrow = 0, ncol = 2), - no_cut_v = vector(), + patient_cut_v = NULL, + date_cut_m = NULL, + no_cut_v = NULL, dataset_cut, cut_var, special_dm = TRUE From 67d339116f1b247b594be7fd90e2bf06547bb05b Mon Sep 17 00:00:00 2001 From: "Rees, Nathan {MDBL~Welwyn}" Date: Tue, 5 Mar 2024 19:34:22 +0100 Subject: [PATCH 2/5] Removed indentation_linter=NULL from the .lintr file --- .lintr | 1 - 1 file changed, 1 deletion(-) diff --git a/.lintr b/.lintr index b78e223d..b5d74abe 100644 --- a/.lintr +++ b/.lintr @@ -2,7 +2,6 @@ linters: linters_with_defaults( line_length_linter(100), object_usage_linter=NULL, infix_spaces_linter=NULL, - indentation_linter=NULL, cyclocomp_linter(complexity_limit = 22) ) exclusions: list( From bb056a2770540617216672e87b6b9e362699dee0 Mon Sep 17 00:00:00 2001 From: "Rees, Nathan {MDBL~Welwyn}" Date: Thu, 14 Mar 2024 17:41:12 +0100 Subject: [PATCH 3/5] Added more detailed error message when incorrect datasets are fed into process_cut --- NEWS.md | 2 ++ R/process_cut.R | 60 +++++++++++++++++-------------- tests/testthat/test-process_cut.R | 12 +++---- 3 files changed, 39 insertions(+), 35 deletions(-) diff --git a/NEWS.md b/NEWS.md index 2d7a0112..3fd79196 100644 --- a/NEWS.md +++ b/NEWS.md @@ -12,6 +12,8 @@ will be kept/left unchanged. (#179, #189, #190) and not valid date or `NA`/`""` (#181) - `process_cut` updated so that the `patient_cut_v`, `date_cut_m` and `no_cut_v` arguments have a default value of `NULL` (#188) +- `process_cut` updated to have more detailed error messages when incorrect datasets +are fed in (#180) ## Breaking Changes - Added dependency on `admiraldev` >= 0.3.0 (#173) diff --git a/R/process_cut.R b/R/process_cut.R index 124a89da..7682f957 100644 --- a/R/process_cut.R +++ b/R/process_cut.R @@ -62,67 +62,73 @@ process_cut <- function(source_sdtm_data, assert_that(is.list(source_sdtm_data), msg = "source_sdtm_data must be of class list" ) + assert_that(all(unlist(lapply(source_sdtm_data, is.data.frame))), msg = "All elements of source_sdtm_data must be a dataframe" ) + assert_that(all(is.vector(patient_cut_v) | is.null(patient_cut_v), patient_cut_v != ""), msg = "patient_cut_v must be a vector or NULL. \n Note: If no SDTMv domains use a patient cut, then please leave patient_cut_v empty, in which case a default value of NULL will be used." ) + assert_that(all(is.matrix(date_cut_m) | is.null(date_cut_m), date_cut_m != ""), msg = "date_cut_m must be a matrix or NULL. \n Note: If no SDTMv domains use a date cut, then please leave date_cut_m empty, in which case a default value of NULL will be used." ) + assert_that(any(ncol(date_cut_m) == 2, is.null(date_cut_m)), msg = "date_cut_m must be a matrix with two columns or NULL." ) + assert_that(all(is.vector(no_cut_v) | is.null(no_cut_v), no_cut_v != ""), msg = "no_cut_v must be a vector or NULL. \n Note: If you do not wish to leave any SDTMv domains uncut, then please leave no_cut_v empty, in which case a default value of NULL will be used." ) + cut_var <- assert_symbol(enexpr(cut_var)) assert_data_frame(dataset_cut, required_vars = exprs(USUBJID, !!cut_var) ) + assert_that(is.logical(special_dm), msg = "special_dm must be either TRUE or FALSE" ) + if (special_dm) { assert_that("dm" %in% names(source_sdtm_data), - msg = "dataset `dm` is missing but special_dm processing expects this" + msg = "dataset `dm` is missing from source_sdtm_data but special_dm processing expects this" ) - assert_that( - setequal(names(source_sdtm_data), c( - patient_cut_v, date_cut_m[, 1], no_cut_v, - "dm" - )), - msg = "Inconsistency between input SDTMv datasets and the SDTMv datasets -listed under each cut approach. Please check for the two likely issues below... \n -1) There are input SDTMv datasets where no cut method has been defined. -2) A cut method has been defined for a SDTMv dataset that does not exist in the -source SDTMv data." + cut_inputs <- c(patient_cut_v, date_cut_m[, 1], no_cut_v, "dm") + } else { + cut_inputs <- c(patient_cut_v, date_cut_m[, 1], no_cut_v) + } + sdtm_inputs <- names(source_sdtm_data) + for (i in seq_len(length(sdtm_inputs))) { + error_msg1 <- paste0(sdtm_inputs[i], " exists more than once in source_sdtm_data") + assert_that(!(sdtm_inputs[i] %in% sdtm_inputs[-i]), + msg = error_msg1 ) - assert_that( - length(unique(c(patient_cut_v, date_cut_m[, 1], no_cut_v, "dm"))) - == length(c(patient_cut_v, date_cut_m[, 1], no_cut_v, "dm")), - msg = "The number of SDTMv datasets in the source data does not match the -number of SDTMv datasets in which a cut approach has been defined." + + error_msg2 <- paste0(sdtm_inputs[i], " exists in source_sdtm_data but no cut +method has been assigned") + assert_that(sdtm_inputs[i] %in% cut_inputs, + msg = error_msg2 ) - } else { - assert_that(setequal(names(source_sdtm_data), c(patient_cut_v, date_cut_m[, 1], no_cut_v)), - msg = "Inconsistency between input SDTMv datasets and the SDTMv datasets -listed under each cut approach. Please check for the two likely issues below... \n -1) There are input SDTMv datasets where no cut method has been defined. -2) A cut method has been defined for a SDTMv dataset that does not exist in the source SDTMv data." + } + for (i in seq_len(length(cut_inputs))) { + error_msg1 <- paste0("Multiple cut types have been assigned for ", cut_inputs[i]) + assert_that(!(cut_inputs[i] %in% cut_inputs[-i]), + msg = error_msg1 ) - assert_that( - length(unique(c(patient_cut_v, date_cut_m[, 1], no_cut_v))) - == length(c(patient_cut_v, date_cut_m[, 1], no_cut_v)), - msg = "The number of SDTMv datasets in the source data does not match the -number of SDTMv datasets in which a cut approach has been defined." + + error_msg2 <- paste0("Cut types have been assigned for ", cut_inputs[i], + " which does not exist in source_sdtm_data") + assert_that(cut_inputs[i] %in% sdtm_inputs, + msg = error_msg2 ) } diff --git a/tests/testthat/test-process_cut.R b/tests/testthat/test-process_cut.R index 6dc702a1..58ccaa43 100644 --- a/tests/testthat/test-process_cut.R +++ b/tests/testthat/test-process_cut.R @@ -101,8 +101,7 @@ test_that("Test that process_cut() errors when a source SDTM dataset is not cut_var = DCUTDTM, special_dm = TRUE ), - regexp = "Inconsistency between input SDTMv datasets and the SDTMv datasets -listed under each cut approach." + regexp = "sc exists in source_sdtm_data but no cut\nmethod has been assigned" ) }) @@ -124,8 +123,7 @@ test_that("Test that process_cut() errors when an input list includes a source cut_var = DCUTDTM, special_dm = TRUE ), - regexp = "Inconsistency between input SDTMv datasets and the SDTMv datasets -listed under each cut approach." + regexp = "Cut types have been assigned for vs which does not exist in source_sdtm_data" ) }) @@ -147,8 +145,7 @@ test_that("Test that process_cut() errors when a source SDTMv dataset is cut_var = DCUTDTM, special_dm = TRUE ), - regexp = "The number of SDTMv datasets in the source data does not match the -number of SDTMv datasets in which a cut approach has been defined." + regexp = "Multiple cut types have been assigned for ae" ) }) @@ -170,8 +167,7 @@ test_that("Test that process_cut() errors when special_dm = TRUE and dm is also cut_var = DCUTDTM, special_dm = TRUE ), - regexp = "The number of SDTMv datasets in the source data does not match the -number of SDTMv datasets in which a cut approach has been defined." + regexp = "Multiple cut types have been assigned for dm" ) }) From f5cc2c819855748bfff1a5020cd644047f7aa576 Mon Sep 17 00:00:00 2001 From: Nathan Rees Date: Wed, 3 Apr 2024 13:27:45 +0000 Subject: [PATCH 4/5] Updated to account for when more than one domain is mis-matching. --- R/process_cut.R | 68 ++++++++++++++++++++----------- tests/testthat/test-process_cut.R | 2 +- 2 files changed, 45 insertions(+), 25 deletions(-) diff --git a/R/process_cut.R b/R/process_cut.R index 7682f957..58fe6fef 100644 --- a/R/process_cut.R +++ b/R/process_cut.R @@ -98,39 +98,59 @@ no_cut_v empty, in which case a default value of NULL will be used." msg = "special_dm must be either TRUE or FALSE" ) + sdtm_inputs <- names(source_sdtm_data) + cut_inputs <- c(patient_cut_v, date_cut_m[, 1], no_cut_v) + if (special_dm) { assert_that("dm" %in% names(source_sdtm_data), msg = "dataset `dm` is missing from source_sdtm_data but special_dm processing expects this" ) - cut_inputs <- c(patient_cut_v, date_cut_m[, 1], no_cut_v, "dm") - } else { - cut_inputs <- c(patient_cut_v, date_cut_m[, 1], no_cut_v) + cut_inputs <- append(cut_inputs, "dm") } - sdtm_inputs <- names(source_sdtm_data) - for (i in seq_len(length(sdtm_inputs))) { - error_msg1 <- paste0(sdtm_inputs[i], " exists more than once in source_sdtm_data") - assert_that(!(sdtm_inputs[i] %in% sdtm_inputs[-i]), - msg = error_msg1 - ) - error_msg2 <- paste0(sdtm_inputs[i], " exists in source_sdtm_data but no cut -method has been assigned") - assert_that(sdtm_inputs[i] %in% cut_inputs, - msg = error_msg2 - ) + sdtm_inputs_dups <- c() + no_cut_method <- c() + for (i in seq_len(length(sdtm_inputs))) { + if ((sdtm_inputs[i] %in% sdtm_inputs[-i]) && !(sdtm_inputs[i] %in% sdtm_inputs_dups)) { + sdtm_inputs_dups <- append(sdtm_inputs_dups, sdtm_inputs[i]) + } + if (!(sdtm_inputs[i] %in% cut_inputs) && !(sdtm_inputs[i] %in% no_cut_method)) { + no_cut_method <- append(no_cut_method, sdtm_inputs[i]) + } } - for (i in seq_len(length(cut_inputs))) { - error_msg1 <- paste0("Multiple cut types have been assigned for ", cut_inputs[i]) - assert_that(!(cut_inputs[i] %in% cut_inputs[-i]), - msg = error_msg1 - ) + error_msg1 <- paste0(paste(sdtm_inputs_dups, collapse = " & "), + " exists more than once in source_sdtm_data") + assert_that(is.null(sdtm_inputs_dups), + msg = error_msg1 + ) + error_msg2 <- paste0(paste(no_cut_method, collapse = " & "), + " exists in source_sdtm_data but no cut method has been assigned") + assert_that(is.null(no_cut_method), + msg = error_msg2 + ) - error_msg2 <- paste0("Cut types have been assigned for ", cut_inputs[i], - " which does not exist in source_sdtm_data") - assert_that(cut_inputs[i] %in% sdtm_inputs, - msg = error_msg2 - ) + cut_inputs_dups <- c() + no_sdtm <- c() + for (i in seq_len(length(cut_inputs))) { + if ((cut_inputs[i] %in% cut_inputs[-i]) && !(cut_inputs[i] %in% cut_inputs_dups)) { + cut_inputs_dups <- append(cut_inputs_dups, cut_inputs[i]) + } + if (!(cut_inputs[i] %in% sdtm_inputs) && !(cut_inputs[i] %in% no_sdtm)) { + no_sdtm <- append(no_sdtm, cut_inputs[i]) + } } + error_msg3 <- paste0("Multiple cut types have been assigned for ", + paste(cut_inputs_dups, collapse = " & ")) + assert_that(is.null(cut_inputs_dups), + msg = error_msg3 + ) + error_msg4 <- paste0( + "Cut types have been assigned for ", paste(no_sdtm, collapse = " & "), + " which does not exist in source_sdtm_data" + ) + assert_that(is.null(no_sdtm), + msg = error_msg4 + ) # Conduct Patient-Level Cut ------------------------------------------------------ diff --git a/tests/testthat/test-process_cut.R b/tests/testthat/test-process_cut.R index 58ccaa43..5925c909 100644 --- a/tests/testthat/test-process_cut.R +++ b/tests/testthat/test-process_cut.R @@ -101,7 +101,7 @@ test_that("Test that process_cut() errors when a source SDTM dataset is not cut_var = DCUTDTM, special_dm = TRUE ), - regexp = "sc exists in source_sdtm_data but no cut\nmethod has been assigned" + regexp = "sc exists in source_sdtm_data but no cut method has been assigned" ) }) From a62b11541e55168ea9df4e214e5a464c71644f81 Mon Sep 17 00:00:00 2001 From: "Rees, Nathan {MDBL~Welwyn}" Date: Thu, 16 May 2024 15:06:56 +0200 Subject: [PATCH 5/5] Styled process_cut.R --- R/process_cut.R | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/R/process_cut.R b/R/process_cut.R index 58fe6fef..f8590f8c 100644 --- a/R/process_cut.R +++ b/R/process_cut.R @@ -118,13 +118,17 @@ no_cut_v empty, in which case a default value of NULL will be used." no_cut_method <- append(no_cut_method, sdtm_inputs[i]) } } - error_msg1 <- paste0(paste(sdtm_inputs_dups, collapse = " & "), - " exists more than once in source_sdtm_data") + error_msg1 <- paste0( + paste(sdtm_inputs_dups, collapse = " & "), + " exists more than once in source_sdtm_data" + ) assert_that(is.null(sdtm_inputs_dups), msg = error_msg1 ) - error_msg2 <- paste0(paste(no_cut_method, collapse = " & "), - " exists in source_sdtm_data but no cut method has been assigned") + error_msg2 <- paste0( + paste(no_cut_method, collapse = " & "), + " exists in source_sdtm_data but no cut method has been assigned" + ) assert_that(is.null(no_cut_method), msg = error_msg2 ) @@ -139,8 +143,10 @@ no_cut_v empty, in which case a default value of NULL will be used." no_sdtm <- append(no_sdtm, cut_inputs[i]) } } - error_msg3 <- paste0("Multiple cut types have been assigned for ", - paste(cut_inputs_dups, collapse = " & ")) + error_msg3 <- paste0( + "Multiple cut types have been assigned for ", + paste(cut_inputs_dups, collapse = " & ") + ) assert_that(is.null(cut_inputs_dups), msg = error_msg3 )