From af076d22dead89820ecd3f0342fa3be24f839c6c Mon Sep 17 00:00:00 2001 From: Daniel Date: Fri, 13 Dec 2024 16:01:14 +0100 Subject: [PATCH] Check column names of dataframes? (#567) * Check column names of dataframes? Fixes #276 * Update R/utils.R Co-authored-by: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> * revise wording --------- Co-authored-by: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> --- DESCRIPTION | 2 +- R/adjust.R | 8 ++------ R/utils.R | 23 +++++++++++++++++++++++ tests/testthat/test-adjust.R | 10 ++++++++++ 4 files changed, 36 insertions(+), 7 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index dffcdfc3a..3935073b0 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: datawizard Title: Easy Data Wrangling and Statistical Transformations -Version: 0.13.0.17 +Version: 0.13.0.18 Authors@R: c( person("Indrajeet", "Patil", , "patilindrajeet.science@gmail.com", role = "aut", comment = c(ORCID = "0000-0003-1995-6531")), diff --git a/R/adjust.R b/R/adjust.R index 5d50b16d0..9cd5805e9 100644 --- a/R/adjust.R +++ b/R/adjust.R @@ -75,12 +75,8 @@ adjust <- function(data, ignore_case = FALSE, regex = FALSE, verbose = FALSE) { - if (!all(colnames(data) == make.names(colnames(data), unique = TRUE))) { - insight::format_warning( - "Bad column names (e.g., with spaces) have been detected which might create issues in many functions.", - "Please fix it (you can run `names(mydata) <- make.names(names(mydata))` for a quick fix)." - ) - } + # make sure column names are syntactically valid + .check_dataframe_names(data, action = "error") # check for formula notation, convert to character vector if (inherits(effect, "formula")) { diff --git a/R/utils.R b/R/utils.R index 25275fee5..97f6d922b 100644 --- a/R/utils.R +++ b/R/utils.R @@ -47,6 +47,29 @@ } +#' Checks dataframes for syntactically valid column names +#' Argument "action" can be "warning", "error", or "message". +#' +#' @keywords internal +#' @noRd +.check_dataframe_names <- function(x, action = "warning", verbose = TRUE) { + if (verbose && !all(colnames(x) == make.names(colnames(x), unique = TRUE))) { + insight::format_alert( + "Bad column names (e.g., with spaces) have been detected which might create issues in many functions.", + paste0( + "We recommend to rename following columns: ", + text_concatenate( + colnames(x)[colnames(x) != make.names(colnames(x), unique = TRUE)], + enclose = "`" + ) + ), + "You can run `names(mydata) <- make.names(names(mydata))` or use `janitor::clean_names()` for a quick fix.", # nolint + type = action + ) + } +} + + #' Fuzzy grep, matches pattern that are close, but not identical #' @examples #' colnames(iris) diff --git a/tests/testthat/test-adjust.R b/tests/testthat/test-adjust.R index 44e5e8dc2..c5a3364ee 100644 --- a/tests/testthat/test-adjust.R +++ b/tests/testthat/test-adjust.R @@ -28,3 +28,13 @@ test_that("adjust regex", { adjust(mtcars, select = "mpg") ) }) + +# select helpers ------------------------------ +test_that("adjust, invalid column names", { + data(iris) + colnames(iris)[1] <- "I am" + expect_error( + adjust(iris[c("I am", "Species")], multilevel = FALSE, bayesian = FALSE), + regex = "Bad column names" + ) +})