From 1a3d82aa617898f5e969c36baa3c4dbcdbe0f4db Mon Sep 17 00:00:00 2001 From: hornik Date: Mon, 17 Feb 2025 18:39:27 +0000 Subject: [PATCH] Have interaction(drop = TRUE) compute used levels directly instead of dropping unused levels from all possible levels (PR#18276). git-svn-id: https://svn.r-project.org/R/trunk@87735 00db46b3-68df-0310-9c12-caf00c1e9a41 --- src/library/base/R/interaction.R | 57 +++++++++++++++++++------------- 1 file changed, 34 insertions(+), 23 deletions(-) diff --git a/src/library/base/R/interaction.R b/src/library/base/R/interaction.R index 41dd89df93..a46b467633 100644 --- a/src/library/base/R/interaction.R +++ b/src/library/base/R/interaction.R @@ -1,7 +1,7 @@ # File src/library/base/R/interaction.R # Part of the R package, https://www.R-project.org # -# Copyright (C) 1995-2013 The R Core Team +# Copyright (C) 1995-2025 The R Core Team # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -29,39 +29,50 @@ interaction <- function(..., drop = FALSE, sep = ".", lex.order = FALSE) narg <- length(args) } for(i in narg:1L) { - f <- as.factor(args[[i]])[, drop = drop] - l <- levels(f) - if1 <- as.integer(f) - 1L + x <- as.factor(args[[i]])[, drop = drop] + ax <- as.integer(x) - 1L + lx <- levels(x) if(i == narg) { - ans <- if1 - lvs <- l + ay <- ax + ly <- lx } else { + nx <- length(lx) + ny <- length(ly) if(lex.order) { - ll <- length(lvs) - ans <- ans + ll * if1 - lvs <- paste(rep(l, each = ll), rep(lvs, length(l)), sep=sep) + ay <- ay + ny * ax + if(drop) { + az <- sort(unique(ay)) + ly <- paste(lx[az %/% ny + 1L], ly[az %% ny + 1L], + sep = sep) + ay <- match(ay, az) - 1L + } else { + ly <- paste(rep(lx, each = ny), rep(ly, nx), + sep = sep) + } } else { - ans <- ans * length(l) + if1 - lvs <- paste(rep(l, length(lvs)), - rep(lvs, each = length(l)), sep=sep) + ay <- ay * nx + ax + if(drop) { + az <- sort(unique(ay)) + ly <- paste(lx[az %% nx + 1L], ly[az %/% nx + 1L], + sep = sep) + ay <- match(ay, az) - 1L + } else { + ly <- paste(rep(lx, ny), rep(ly, each = nx), + sep = sep) + } } - while(j <- anyDuplicated(lvs)) { + while(j <- anyDuplicated(ly)) { ## If levels at positions i and j > i are the same, we ## need to drop the one at j, change the code for that ## level to the code for level i, and decrease all codes ## beyond the code for level j by one. - i <- match(lvs[j], lvs) - lvs <- lvs[-j] + i <- match(ly[j], ly) + ly <- ly[-j] j <- j - 1L - ans[ans == j] <- i - 1L - ans[ans > j] <- ans[ans > j] - 1L - } - if(drop) { - olvs <- lvs - lvs <- lvs[sort(unique(ans+1L))] - ans <- match(olvs[ans+1L], lvs) - 1L + ay[ay == j] <- i - 1L + ay[ay > j] <- ay[ay > j] - 1L } } } - structure(as.integer(ans+1L), levels=lvs, class = "factor") + structure(as.integer(ay + 1L), levels = ly, class = "factor") }