From af076d22dead89820ecd3f0342fa3be24f839c6c Mon Sep 17 00:00:00 2001
From: Daniel <mail@danielluedecke.de>
Date: Fri, 13 Dec 2024 16:01:14 +0100
Subject: [PATCH] Check column names of dataframes? (#567)

* Check column names of dataframes?
Fixes #276

* Update R/utils.R

Co-authored-by: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com>

* revise wording

---------

Co-authored-by: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com>
---
 DESCRIPTION                  |  2 +-
 R/adjust.R                   |  8 ++------
 R/utils.R                    | 23 +++++++++++++++++++++++
 tests/testthat/test-adjust.R | 10 ++++++++++
 4 files changed, 36 insertions(+), 7 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index dffcdfc3a..3935073b0 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
 Type: Package
 Package: datawizard
 Title: Easy Data Wrangling and Statistical Transformations
-Version: 0.13.0.17
+Version: 0.13.0.18
 Authors@R: c(
     person("Indrajeet", "Patil", , "patilindrajeet.science@gmail.com", role = "aut",
            comment = c(ORCID = "0000-0003-1995-6531")),
diff --git a/R/adjust.R b/R/adjust.R
index 5d50b16d0..9cd5805e9 100644
--- a/R/adjust.R
+++ b/R/adjust.R
@@ -75,12 +75,8 @@ adjust <- function(data,
                    ignore_case = FALSE,
                    regex = FALSE,
                    verbose = FALSE) {
-  if (!all(colnames(data) == make.names(colnames(data), unique = TRUE))) {
-    insight::format_warning(
-      "Bad column names (e.g., with spaces) have been detected which might create issues in many functions.",
-      "Please fix it (you can run `names(mydata) <- make.names(names(mydata))` for a quick fix)."
-    )
-  }
+  # make sure column names are syntactically valid
+  .check_dataframe_names(data, action = "error")
 
   # check for formula notation, convert to character vector
   if (inherits(effect, "formula")) {
diff --git a/R/utils.R b/R/utils.R
index 25275fee5..97f6d922b 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -47,6 +47,29 @@
 }
 
 
+#' Checks dataframes for syntactically valid column names
+#' Argument "action" can be "warning", "error", or "message".
+#'
+#' @keywords internal
+#' @noRd
+.check_dataframe_names <- function(x, action = "warning", verbose = TRUE) {
+  if (verbose && !all(colnames(x) == make.names(colnames(x), unique = TRUE))) {
+    insight::format_alert(
+      "Bad column names (e.g., with spaces) have been detected which might create issues in many functions.",
+      paste0(
+        "We recommend to rename following columns: ",
+        text_concatenate(
+          colnames(x)[colnames(x) != make.names(colnames(x), unique = TRUE)],
+          enclose = "`"
+        )
+      ),
+      "You can run `names(mydata) <- make.names(names(mydata))` or use `janitor::clean_names()` for a quick fix.", # nolint
+      type = action
+    )
+  }
+}
+
+
 #' Fuzzy grep, matches pattern that are close, but not identical
 #' @examples
 #' colnames(iris)
diff --git a/tests/testthat/test-adjust.R b/tests/testthat/test-adjust.R
index 44e5e8dc2..c5a3364ee 100644
--- a/tests/testthat/test-adjust.R
+++ b/tests/testthat/test-adjust.R
@@ -28,3 +28,13 @@ test_that("adjust regex", {
     adjust(mtcars, select = "mpg")
   )
 })
+
+# select helpers ------------------------------
+test_that("adjust, invalid column names", {
+  data(iris)
+  colnames(iris)[1] <- "I am"
+  expect_error(
+    adjust(iris[c("I am", "Species")], multilevel = FALSE, bayesian = FALSE),
+    regex = "Bad column names"
+  )
+})