Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Normalizing verbose messages in bmerge.R #6728

Merged
merged 13 commits into from
Jan 30, 2025
Merged
45 changes: 23 additions & 22 deletions R/bmerge.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,20 @@ mergeType = function(x) {
ans
}

cast_with_atts = function(x, as.f) {
ans = as.f(x)
if (!is.null(attributes(x))) attributes(ans) = attributes(x)
cast_with_attrs = function(x, cast_fun) {
ans = cast_fun(x)
# do not copy attributes when coercing factor (to character)
if (!is.factor(x) && !is.null(attributes(x))) attributes(ans) = attributes(x)
ans
}

coerce_col = function(dt, col, from_type, to_type, from_name, to_name, verbose_msg=NULL) {
if (!is.null(verbose_msg)) catf(verbose_msg, from_type, from_name, to_type, to_name, domain=NULL)
set(dt, j=col, value=cast_with_atts(dt[[col]], match.fun(paste0("as.", to_type))))
coerce_col = function(dt, col, from_type, to_type, from_name, to_name, from_detail="", to_detail="", verbose) {
if (verbose) catf(
"Coercing %s column %s%s to type %s to match type of %s%s.\n",
from_type, from_name, from_detail, to_type, to_name, to_detail
)
cast_fun = switch(to_type, integer64 = bit64::as.integer64, match.fun(paste0("as.", to_type)))
set(dt, j=col, value=cast_with_attrs(dt[[col]], cast_fun))
}

bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbose)
Expand Down Expand Up @@ -68,9 +73,8 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos
next
} else {
if (x_merge_type=="character") {
if (verbose) catf("Coercing factor column %s to type character to match type of %s.\n", iname, xname)
set(i, j=icol, value=val<-as.character(i[[icol]]))
set(callersi, j=icol, value=val) # factor in i joining to character in x will return character and not keep x's factor; e.g. for antaresRead #3581
coerce_col(i, icol, "factor", "character", iname, xname, verbose=verbose)
set(callersi, j=icol, value=i[[icol]]) # factor in i joining to character in x will return character and not keep x's factor; e.g. for antaresRead #3581
next
} else if (i_merge_type=="character") {
if (verbose) catf("Matching character column %s to factor levels in %s.\n", iname, xname)
Expand All @@ -89,13 +93,12 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos
}
cfl = c("character", "logical", "factor")
if (x_merge_type %chin% cfl || i_merge_type %chin% cfl) {
msg = if(verbose) gettext("Coercing all-NA %s column %s to type %s to match type of %s.\n") else NULL
if (anyNA(i[[icol]]) && allNA(i[[icol]])) {
coerce_col(i, icol, i_merge_type, x_merge_type, iname, xname, msg)
coerce_col(i, icol, i_merge_type, x_merge_type, iname, xname, from_detail=gettext(" (all-NA)"), verbose=verbose)
next
}
if (anyNA(x[[xcol]]) && allNA(x[[xcol]])) {
coerce_col(x, xcol, x_merge_type, i_merge_type, xname, iname, msg)
coerce_col(x, xcol, x_merge_type, i_merge_type, xname, iname, from_detail=gettext(" (all-NA)"), verbose=verbose)
next
}
stopf("Incompatible join types: %s (%s) and %s (%s)", xname, x_merge_type, iname, i_merge_type)
Expand All @@ -104,8 +107,8 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos
nm = c(iname, xname)
if (x_merge_type=="integer64") { w=i; wc=icol; wclass=i_merge_type; } else { w=x; wc=xcol; wclass=x_merge_type; nm=rev(nm) } # w is which to coerce
if (wclass=="integer" || (wclass=="double" && fitsInInt64(w[[wc]]))) {
if (verbose) catf("Coercing %s column %s%s to type integer64 to match type of %s.\n", wclass, nm[1L], if (wclass=="double") " (which has integer64 representation, e.g. no fractions)" else "", nm[2L])
set(w, j=wc, value=bit64::as.integer64(w[[wc]]))
from_detail = if (wclass == "double") gettext(" (which has integer64 representation, e.g. no fractions)") else ""
coerce_col(w, wc, wclass, "integer64", nm[1L], nm[2L], from_detail, verbose=verbose)
} else stopf("Incompatible join types: %s is type integer64 but %s is type double and cannot be coerced to integer64 (e.g. has fractions)", nm[2L], nm[1L])
} else {
# just integer and double left
Expand All @@ -126,28 +129,26 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos
}
}
if (coerce_x) {
msg = if (verbose) gettext("Coercing %s column %s (which contains no fractions) to type %s to match type of %s.\n") else NULL
coerce_col(i, icol, "double", "integer", iname, xname, msg)
from_detail = gettext(" (which contains no fractions)")
coerce_col(i, icol, "double", "integer", iname, xname, from_detail, verbose=verbose)
set(callersi, j=icol, value=i[[icol]]) # change the shallow copy of i up in [.data.table to reflect in the result, too.
if (length(ic_idx)>1L) {
xc_idx = xcols[ic_idx]
for (xb in xc_idx[which(vapply_1c(.shallow(x, xc_idx), mergeType) == "double")]) {
coerce_col(x, xb, "double", "integer", paste0("x.", names(x)[xb]), xname, msg)
coerce_col(x, xb, "double", "integer", paste0("x.", names(x)[xb]), xname, from_detail, verbose=verbose)
}
}
}
}
if (!coerce_x) {
msg = if (verbose) gettext("Coercing %s column %s to type %s to match type of %s which contains fractions.\n") else NULL
coerce_col(x, xcol, "integer", "double", xname, iname, msg)
coerce_col(x, xcol, "integer", "double", xname, iname, to_detail=gettext(" (which contains fractions)"), verbose=verbose)
}
} else {
msg = if (verbose) gettext("Coercing %s column %s to type %s for join to match type of %s.\n") else NULL
coerce_col(i, icol, "integer", "double", iname, xname, msg)
coerce_col(i, icol, "integer", "double", iname, xname, from_detail=gettext(" (for join)"), verbose=verbose)
if (length(ic_idx)>1L) {
xc_idx = xcols[ic_idx]
for (xb in xc_idx[which(vapply_1c(.shallow(x, xc_idx), mergeType) == "integer")]) {
coerce_col(x, xb, "integer", "double", paste0("x.", names(x)[xb]), xname, msg)
coerce_col(x, xb, "integer", "double", paste0("x.", names(x)[xb]), xname, verbose=verbose)
}
}
}
Expand Down
1 change: 1 addition & 0 deletions R/test.data.table.R
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ test.data.table = function(script="tests.Rraw", verbose=FALSE, pkg=".", silent=F
datatable.rbindlist.check = NULL,
datatable.integer64 = "integer64",
digits = 7L, # ensure printing rounds to the expected number of digits in all sessions, #5285
useFancyQuotes = FALSE, # ensure ASCII ' and " are always produced from base
warn = 0L, # ensure signals are emitted as they are in the code, #5285
warnPartialMatchArgs = base::getRversion()>="3.6.0", # ensure we don't rely on partial argument matching in internal code, #3664; >=3.6.0 for #3865
warnPartialMatchAttr = TRUE,
Expand Down
23 changes: 10 additions & 13 deletions inst/tests/tests.Rraw
Original file line number Diff line number Diff line change
Expand Up @@ -159,9 +159,6 @@ TZnotUTC = !identical(tt,"") && !is_utc(tt)
# (3) function factory for matching messages exactly by substituting anything between delimiters [delim, fmt=TRUE]
# (4) function factory for matching messages exactly by substituting a generic string [fmt=string]
get_msg = function(e, delim, fmt=FALSE) {
ufq = options(useFancyQuotes = FALSE) # otherwise we get angled quotes, hard to match robustly
on.exit(options(ufq))

condition = tryCatch({e; NULL}, error=identity, warning=identity)
if (is.null(condition)) return(condition)
msg = condition$message
Expand Down Expand Up @@ -11105,7 +11102,7 @@ test(1743.123, fread("a,b\n1+3i,2015-01-01", colClasses=c(NA,"IDate")), data.tab
test(1743.13, lapply(fread("a,b\n09/05/98,2015-01-01", colClasses = "Date"), class), y=list(a="character", b=c("IDate", "Date")), warning=base_messages$ambiguous_date_fmt)

## Just invalid
test(1743.14, options = c(useFancyQuotes = FALSE),
test(1743.14,
sapply(fread("a,b\n2017-01-01,1", colClasses=c("foo", "integer")), class), c(a="character", b="integer"),
warning=base_messages$missing_coerce_method)
test(1743.15, sapply(fread("a,b\n2017-01-01,1", colClasses=c("foo", "integer")), class), c(a="character", b="integer"), warning="the column has been left as type .*character")
Expand Down Expand Up @@ -15165,13 +15162,13 @@ test(2044.60, dt1[dt2, ..cols, on="int==doubleInt", verbose=TRUE],
test(2044.61, dt1[dt2, ..cols, on="int==realDouble", verbose=TRUE], # this was wrong in v1.12.2 (the fractions were truncated and joined to next lowest int)
data.table(x.bool=c(NA,FALSE,NA,FALSE,NA), x.int=INT(NA,1,NA,2,NA), x.doubleInt=c(NA,1,NA,2,NA),
i.bool=TRUE, i.int=1:5, i.doubleInt=as.double(1:5), i.char=letters[1:5]),
output="Coercing integer column x.int to type double to match type of i.realDouble which contains fractions")
output="Coercing integer column x.int to type double to match type of i.realDouble .which contains fractions.")
test(2044.62, dt1[dt2, ..cols, on="doubleInt==int", verbose=TRUE],
data.table(x.bool=FALSE, x.int=1:5, x.doubleInt=as.double(1:5), i.bool=TRUE, i.int=1:5, i.doubleInt=as.double(1:5), i.char=letters[1:5]),
output="Coercing integer column i.int to type double for join to match type of x.doubleInt")
output="Coercing integer column i.int .for join. to type double to match type of x.doubleInt")
test(2044.63, dt1[dt2, ..cols, on="realDouble==int", verbose=TRUE],
data.table(x.bool=c(rep(FALSE,4),TRUE), x.int=INT(2,4,6,8,10), x.doubleInt=c(2,4,6,8,10), i.bool=TRUE, i.int=1:5, i.doubleInt=as.double(1:5), i.char=letters[1:5]),
output="Coercing integer column i.int to type double for join to match type of x.realDouble")
output="Coercing integer column i.int .for join. to type double to match type of x.realDouble")
cols = c("x.int","x.char","x.fact","i.int","i.char","i.char")
test(2044.64, dt1[dt2, ..cols, on="char==fact", verbose=TRUE],
ans<-data.table(x.int=1:5, x.char=letters[1:5], x.fact=factor(letters[1:5]), i.int=1:5, i.char=letters[1:5], i.char=letters[1:5]),
Expand Down Expand Up @@ -15206,15 +15203,15 @@ if (test_bit64) {
dt1 = data.table(a=1, b=NA_character_)
dt2 = data.table(a=2L, b=NA)
test(2044.80, dt1[dt2, on="a==b", verbose=TRUE], data.table(a=NA, b=NA_character_, i.a=2L),
output=msg<-"Coercing all-NA logical column i.b to type double to match type of x.a")
output=msg<-"Coercing logical column i.b .all-NA. to type double to match type of x.a")
test(2044.81, dt1[dt2, on="a==b", nomatch=0L, verbose=TRUE], data.table(a=logical(), b=character(), i.a=integer()),
output=msg)
test(2044.82, dt1[dt2, on="b==b", verbose=TRUE], data.table(a=1, b=NA, i.a=2L),
output=msg<-"Coercing all-NA logical column i.b to type character to match type of x.b")
output=msg<-"Coercing logical column i.b .all-NA. to type character to match type of x.b")
test(2044.83, dt1[dt2, on="b==b", nomatch=0L, verbose=TRUE], data.table(a=1, b=NA, i.a=2L),
output=msg)
test(2044.84, dt1[dt2, on="b==a", verbose=TRUE], data.table(a=NA_real_, b=2L, i.b=NA),
output=msg<-"Coercing all-NA character column x.b to type integer to match type of i.a")
output=msg<-"Coercing character column x.b .all-NA. to type integer to match type of i.a")
test(2044.85, dt1[dt2, on="b==a", nomatch=0L, verbose=TRUE], data.table(a=double(), b=integer(), i.b=logical()),
output=msg)

Expand Down Expand Up @@ -15641,7 +15638,7 @@ i = data.table(date = dbl_date, key = 'date')
test(2064.1, x[i, class(date), verbose=TRUE], 'Date',
output="Coercing double column i.date (which contains no fractions) to type integer to match type of x.date")
test(2064.2, i[x, class(date), verbose=TRUE], 'Date',
output="Coercing integer column i.date to type double for join to match type of x.date")
output="Coercing integer column i.date .for join. to type double to match type of x.date")

# complex values in grouping, #3639
set.seed(42)
Expand Down Expand Up @@ -20688,8 +20685,8 @@ test(2296, d2[x %no such operator% 1], error = '%no such operator%')
# fix coercing integer/double for joins on multiple columns, #6602
x = data.table(a=1L)
y = data.table(c=1L, d=1)
test(2297.01, y[x, on=.(c == a, d == a), verbose=TRUE], data.table(c=1L, d=1L), output="Coercing .*a to type double.*Coercing .*c to type double")
test(2297.02, y[x, on=.(d == a, c == a), verbose=TRUE], data.table(c=1L, d=1L), output="Coercing .*a to type double.*Coercing .*c to type double")
test(2297.01, y[x, on=.(c == a, d == a), verbose=TRUE], data.table(c=1L, d=1L), output="Coercing .*a .for join. to type double.*Coercing .*c to type double")
test(2297.02, y[x, on=.(d == a, c == a), verbose=TRUE], data.table(c=1L, d=1L), output="Coercing .*a .for join. to type double.*Coercing .*c to type double")
x = data.table(a=1)
y = data.table(c=1, d=1L)
test(2297.03, y[x, on=.(c == a, d == a), verbose=TRUE], data.table(c=1L, d=1L), output="Coercing .*a .*no fractions.* to type integer.*Coercing .*c .*no fractions.* to type integer")
Expand Down
Loading