Fix level column name (#356)

* Fix level column name * fix * add tests * fix * version * fix * comment * fix * fix * fix * fix
easystats · Jan 19, 2025 · 87c72f9 · 87c72f9
1 parent 7d75e9c
commit 87c72f9
Show file tree

Hide file tree

Showing 14 changed files with 201 additions and 156 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Type: Package
 Package: modelbased
 Title: Estimation of Model-Based Predictions, Contrasts and Means
-Version: 0.8.9.41
+Version: 0.8.9.42
 Authors@R:
     c(person(given = "Dominique",
              family = "Makowski",

diff --git a/R/format.R b/R/format.R
@@ -180,6 +180,10 @@ format.marginaleffects_contrasts <- function(x, model, p_adjust, comparison, ...
       lapply(x$Parameter, .split_at_minus_outside_parentheses)
     ))
 
+    # we *could* stop here and simply rename the split columns, but then
+    # we cannot filter by `by` - thus, we go on, extract all single levels,
+    # combine only relevant levels and then we're able to filter...
+
     # When we filter contrasts, e.g. `contrast = c("vs", "am='1'")` or
     # `contrast = c("vs", "am"), by = "gear='5'"`, we get no contrasts if one
     # of the focal terms only has one unique value in the data grid. Thus,
@@ -244,56 +248,39 @@ format.marginaleffects_contrasts <- function(x, model, p_adjust, comparison, ...
         by <- NULL
       }
 
-      # when we just have one term for comparison, we unite levels and use a
-      # dash / minus as separator char. The column name is the name of the
-      # contrast term.
+      # if we have more than one contrast term, we unite the levels from
+      # all contrast terms that belong to one "contrast group", separated
+      # by comma, and each the two "new contrast groups" go into separate
+      # columns named "Level1" and "Level2". For one contrast term, we just
+      # need to rename the columns
       if (length(contrast) == 1) {
-        params <- datawizard::data_unite(
+        # rename columns
+        params <- datawizard::data_rename(
           params,
-          new_column = contrast,
           select = paste0(contrast, 1:2),
-          separator = " - ",
+          replacement = c("Level1", "Level2"),
           verbose = FALSE
         )
       } else {
-        # if we have more than one contrast term, we unite the levels from
-        # all contrast terms that belong to one "contrast group", separated
-        # by comma, and each the two "new contrast groups" go into separate
-        # columns named "Level 1" and "Level 2".
+        # prepare all contrast terms
         for (i in 1:2) {
           contrast_names <- paste0(contrast, i)
-          # since we combine levels from different factors, we have to make
-          # sure levels are unique across different terms. If not, paste
-          # variable names to levels. We first find the intersection of all
-          # levels from all current contrast terms
-          multiple_levels <- Reduce(
-            function(i, j) intersect(i, j),
-            lapply(params[contrast_names], unique),
-            accumulate = FALSE
-          )
-          # if we find any intersections, we have identical labels for different
-          # terms in one "contrast group" - we thus add the variable name to the
-          # levels, to avoid identical levels without knowing to which factor
-          # it belongs
-          if (length(multiple_levels)) {
-            for (cn in contrast_names) {
-              params[[cn]] <- paste(gsub(".{1}$", "", cn), params[[cn]])
-            }
-          }
+          params <- .fix_duplicated_contrastlevels(params, contrast_names)
           # finally, unite levels back into single column
           params <- datawizard::data_unite(
             params,
-            new_column = paste("Level", i),
+            new_column = paste0("Level", i),
             select = contrast_names,
             separator = ", ",
             verbose = FALSE
           )
         }
-        # we need to update these variables, because these are the new column
-        # names for contrasts and focal terms
-        contrast <- focal_terms <- c("Level 1", "Level 2")
       }
 
+      # we need to update these variables, because these are the new column
+      # names for contrasts and focal terms
+      contrast <- focal_terms <- c("Level1", "Level2")
+
       # ------------------------------------------------------------------
       # old code for the display was just:
       #
@@ -323,7 +310,12 @@ format.marginaleffects_contrasts <- function(x, model, p_adjust, comparison, ...
         # column name back, so we can properly merge all variables in
         # "contrast" and "by" to the original data
         by_columns <- paste0(by, 1)
-        params <- datawizard::data_rename(params, select = by_columns, replacement = by, verbose = FALSE)
+        params <- datawizard::data_rename(
+          params,
+          select = by_columns,
+          replacement = by,
+          verbose = FALSE
+        )
 
         # filter original data and new params by "by"
         x <- x[keep_rows, ]
@@ -351,6 +343,30 @@ format.marginaleffects_contrasts <- function(x, model, p_adjust, comparison, ...
 # -----------------------------------------------------------------------------
 
 
+# since we combine levels from different factors, we have to make
+# sure levels are unique across different terms. If not, paste
+# variable names to levels. We first find the intersection of all
+# levels from all current contrast terms
+
+.fix_duplicated_contrastlevels <- function(params, contrast_names) {
+  multiple_levels <- Reduce(
+    function(i, j) intersect(i, j),
+    lapply(params[contrast_names], unique),
+    accumulate = FALSE
+  )
+  # if we find any intersections, we have identical labels for different
+  # terms in one "contrast group" - we thus add the variable name to the
+  # levels, to avoid identical levels without knowing to which factor
+  # it belongs
+  if (length(multiple_levels)) {
+    for (cn in contrast_names) {
+      params[[cn]] <- paste(gsub(".{1}$", "", cn), params[[cn]])
+    }
+  }
+  params
+}
+
+
 # This function renames columns to have a consistent naming scheme,
 # and relocates columns to get a standardized column order across all
 # outputs from {marginaleffects}

diff --git a/README.Rmd b/README.Rmd
@@ -141,7 +141,7 @@ Check-out [**this vignette**](https://easystats.github.io/modelbased/articles/es
 model <- lm(Sepal.Width ~ Species, data = iris)
 
 # 2. Estimate marginal contrasts
-contrasts <- estimate_contrasts(model, contrast = "Species", backend = "emmeans")
+contrasts <- estimate_contrasts(model, contrast = "Species")
 contrasts
 ```
 

diff --git a/README.md b/README.md
@@ -179,15 +179,15 @@ for a detailed walkthrough on *contrast analysis*.
 model <- lm(Sepal.Width ~ Species, data = iris)
 
 # 2. Estimate marginal contrasts
-contrasts <- estimate_contrasts(model, contrast = "Species", backend = "emmeans")
+contrasts <- estimate_contrasts(model, contrast = "Species")
 contrasts
 ## Marginal Contrasts Analysis
 ## 
-## Level1     |     Level2 | Difference |         95% CI |   SE | t(147) |      p
+## Level1     | Level2     | Difference |   SE |         95% CI | t(147) |      p
 ## ------------------------------------------------------------------------------
-## setosa     | versicolor |       0.66 | [ 0.52,  0.79] | 0.07 |   9.69 | < .001
-## setosa     |  virginica |       0.45 | [ 0.32,  0.59] | 0.07 |   6.68 | < .001
-## versicolor |  virginica |      -0.20 | [-0.34, -0.07] | 0.07 |  -3.00 | 0.003 
+## setosa     | versicolor |       0.66 | 0.07 | [ 0.52,  0.79] |   9.69 | < .001
+## setosa     | virginica  |       0.45 | 0.07 | [ 0.32,  0.59] |   6.68 | < .001
+## versicolor | virginica  |      -0.20 | 0.07 | [-0.34, -0.07] |  -3.00 |  0.003
 ## 
 ## Variable predicted: Sepal.Width
 ## Predictors contrasted: Species
@@ -218,17 +218,17 @@ difference <- estimate_contrasts(
 print(difference, table_width = Inf)
 ## Marginal Contrasts Analysis
 ## 
-## Species                | Petal.Length | Difference |   SE |        95% CI | t(144) |      p
-## -------------------------------------------------------------------------------------------
-## setosa - versicolor    |         1.00 |       1.70 | 0.34 | [ 1.02, 2.37] |   4.97 | < .001
-## setosa - virginica     |         1.00 |       1.34 | 0.40 | [ 0.56, 2.13] |   3.38 | < .001
-## versicolor - virginica |         1.00 |      -0.36 | 0.49 | [-1.33, 0.61] |  -0.73 | 0.468 
-## setosa - versicolor    |         3.95 |       1.74 | 0.65 | [ 0.45, 3.03] |   2.67 | 0.008 
-## setosa - virginica     |         3.95 |       1.79 | 0.66 | [ 0.48, 3.11] |   2.70 | 0.008 
-## versicolor - virginica |         3.95 |       0.06 | 0.15 | [-0.24, 0.35] |   0.37 | 0.710 
-## setosa - versicolor    |         6.90 |       1.78 | 1.44 | [-1.06, 4.62] |   1.24 | 0.218 
-## setosa - virginica     |         6.90 |       2.25 | 1.42 | [-0.56, 5.06] |   1.58 | 0.116 
-## versicolor - virginica |         6.90 |       0.47 | 0.28 | [-0.09, 1.03] |   1.65 | 0.101 
+## Level1     | Level2     | Petal.Length | Difference |   SE |        95% CI | t(144) |      p
+## --------------------------------------------------------------------------------------------
+## setosa     | versicolor | 1.00         |       1.70 | 0.34 | [ 1.02, 2.37] |   4.97 | < .001
+## setosa     | virginica  | 1.00         |       1.34 | 0.40 | [ 0.56, 2.13] |   3.38 | < .001
+## setosa     | versicolor | 3.95         |       1.74 | 0.65 | [ 0.45, 3.03] |   2.67 |  0.008
+## setosa     | virginica  | 3.95         |       1.79 | 0.66 | [ 0.48, 3.11] |   2.70 |  0.008
+## setosa     | versicolor | 6.90         |       1.78 | 1.44 | [-1.06, 4.62] |   1.24 |  0.218
+## setosa     | virginica  | 6.90         |       2.25 | 1.42 | [-0.56, 5.06] |   1.58 |  0.116
+## versicolor | virginica  | 1.00         |      -0.36 | 0.49 | [-1.33, 0.61] |  -0.73 |  0.468
+## versicolor | virginica  | 3.95         |       0.06 | 0.15 | [-0.24, 0.35] |   0.37 |  0.710
+## versicolor | virginica  | 6.90         |       0.47 | 0.28 | [-0.09, 1.03] |   1.65 |  0.101
 ## 
 ## Variable predicted: Sepal.Width
 ## Predictors contrasted: Species
@@ -381,14 +381,14 @@ model <- lmer(mpg ~ drat + (1 + drat | cyl), data = mtcars)
 
 random <- estimate_grouplevel(model)
 random
-## Group | Level |   Parameter | Coefficient |   SE |         95% CI
+## Group | Level | Parameter   | Coefficient |   SE |         95% CI
 ## -----------------------------------------------------------------
-## cyl   |     4 | (Intercept) |       -3.45 | 0.56 | [-4.55, -2.36]
-## cyl   |     4 |        drat |        2.24 | 0.36 | [ 1.53,  2.95]
-## cyl   |     6 | (Intercept) |        0.13 | 0.84 | [-1.52,  1.78]
-## cyl   |     6 |        drat |       -0.09 | 0.54 | [-1.15,  0.98]
-## cyl   |     8 | (Intercept) |        3.32 | 0.73 | [ 1.89,  4.74]
-## cyl   |     8 |        drat |       -2.15 | 0.47 | [-3.07, -1.23]
+## cyl   | 4     | (Intercept) |       -3.45 | 0.56 | [-4.55, -2.36]
+## cyl   | 4     | drat        |        2.24 | 0.36 | [ 1.53,  2.95]
+## cyl   | 6     | (Intercept) |        0.13 | 0.84 | [-1.52,  1.78]
+## cyl   | 6     | drat        |       -0.09 | 0.54 | [-1.15,  0.98]
+## cyl   | 8     | (Intercept) |        3.32 | 0.73 | [ 1.89,  4.74]
+## cyl   | 8     | drat        |       -2.15 | 0.47 | [-3.07, -1.23]
 
 plot(random) +
   geom_hline(yintercept = 0, linetype = "dashed") +

diff --git a/man/figures/unnamed-chunk-3-1.png b/man/figures/unnamed-chunk-3-1.png