Skip to content

Commit

Permalink
introduced teh latest changes
Browse files Browse the repository at this point in the history
  • Loading branch information
venom1204 committed Jan 17, 2025
1 parent 2a1c392 commit 771fbc0
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 56 deletions.
63 changes: 32 additions & 31 deletions R/merge.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,42 +11,44 @@ merge.data.table = function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FAL
by = key(x)
}
}
x0 = length(x)==0L
y0 = length(y)==0L
if (x0 || y0) {
if (x0 && y0)
x0 = length(x) == 0L
y0 = length(y) == 0L

if (x0 || y0) {

Check warning on line 17 in R/merge.R

View workflow job for this annotation

GitHub Actions / lint-r

file=R/merge.R,line=17,col=18,[trailing_whitespace_linter] Remove trailing whitespace.
if (x0 && y0) {
warningf("Neither of the input data.tables to join have columns.")
else if (x0)
warningf("Input data.table x has no columns.")
else
warningf("Input data.table y has no columns.")
} else if (x0) {
warningf("Input data.table '%s' has no columns.", "x")
} else if (y0) {
warningf("Input data.table '%s' has no columns.", "y")
}
}
check_duplicate_names(x)
check_duplicate_names(y)

nm_x = names(x)
nm_y = names(y)
check_duplicate_names(x)
check_duplicate_names(y)

nm_x = names(x)
nm_y = names(y)


## set up 'by'/'by.x'/'by.y'
if ( (!is.null(by.x) || !is.null(by.y)) && length(by.x)!=length(by.y) )
stopf("by.x and by.y must be of same length.")
stopf("'by.x' and 'by.y' must be of same length.")
if (!missing(by) && !missing(by.x))
warningf("Supplied both by and by.x/by.y. by argument will be ignored.")
warningf("Supplied both by and 'by.x/by.y.' by argument will be ignored.")
if (!is.null(by.x)) {
if (length(by.x)==0L || !is.character(by.x) || !is.character(by.y))
stopf("A non-empty vector of column names is required for by.x and by.y.")
stopf("A non-empty vector of column names is required for 'by.x' and 'by.y'.")
if (!all(by.x %chin% nm_x)) {
missing_in_x <- setdiff(by.x, nm_x)
stopf("The following columns listed in by.x are missing from x: %s",
toString(missing_in_x))
}
if (!all(by.y %chin% nm_y)) {
missing_in_y <- setdiff(by.y, nm_y)
stopf("The following columns listed in by.y are missing from y: %s",
toString(missing_in_y))
}
by = by.x
names(by) = by.y
missing_in_x <- setdiff(by.x, nm_x)
stopf("The following columns listed in 'by.x' are missing from x: %s", brackify(missing_in_x))
}
if (!all(by.y %chin% nm_y)) {
missing_in_y <- setdiff(by.y, nm_y)
stopf("The following columns listed in 'by.y' are missing from y: %s", brackify(missing_in_y))
}
by = by.x
names(by) = by.y
} else {
if (is.null(by))
by = intersect(key(x), key(y))
Expand All @@ -59,14 +61,13 @@ merge.data.table = function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FAL
missing_in_x <- setdiff(by, nm_x)
missing_in_y <- setdiff(by, nm_y)
if (length(missing_in_x) > 0 || length(missing_in_y) > 0) {
stopf("The following columns are missing:%s%s",
if (length(missing_in_x) > 0) sprintf(" - From x: %s", toString(missing_in_x)) else "",
if (length(missing_in_y) > 0) sprintf(" - From y: %s", toString(missing_in_y)) else "")
stopf(gettextf("The following columns are missing:\n%s%s",
if (length(missing_in_x) > 0) gettextf(" - From x: %s\n", brackify(missing_in_x)) else "",
if (length(missing_in_y) > 0) gettextf(" - From y: %s\n", brackify(missing_in_y)) else ""))
}
by = unname(by)
by.x = by.y = by
}

}
# warn about unused arguments #2587
if (length(list(...))) {
ell = as.list(substitute(list(...)))[-1L]
Expand Down
45 changes: 20 additions & 25 deletions inst/tests/tests.Rraw
Original file line number Diff line number Diff line change
Expand Up @@ -8563,26 +8563,21 @@ test(1600.2, names(DT1[DT2, .(id1=id1, val=val, bla=sum(z1, na.rm=TRUE)), on="id

# warn when merge empty data.table #597
DT0 = data.table(NULL)
DT1 = data.table(a=1)

# Test 1601.1: Merge DT1 with itself on column 'a'
test(1601.1, merge(DT1, DT1, by="a"), data.table(a=1, key="a"))

# Test 1601.2: Merge DT1 with DT0 on column 'a'
test(1601.2, merge(DT1, DT0, by="a"),
warning="Input data.table y has no columns.",
error="The following columns are missing: - From y: a")

# Test 1601.3: Merge DT0 with DT1 on column 'a'
test(1601.3, merge(DT0, DT1, by="a"),
warning="Input data.table x has no columns.",
error="The following columns are missing: - From x: a")

# Test 1601.4: Merge DT0 with DT0 on column 'a'
test(1601.4, merge(DT0, DT0, by="a"),
warning="Neither of the input data.tables to join have columns.",
error="The following columns are missing: - From x: a - From y: a")

DT1 = data.table(a = 1)

test(1601.1, merge(DT1, DT1, by = "a"), data.table(a = 1, key = "a"))
test(1601.2,
merge(DT1, DT0, by = "a"),
warning = "Input data.table 'y' has no columns.",
error = "The following columns are missing:\n - From y: [a]")
test(1601.3,
merge(DT0, DT1, by = "a"),
warning = "Input data.table 'x' has no columns.",
error = "The following columns are missing:\n - From x: [a]")
test(1601.4,
merge(DT0, DT0, by = "a"),
warning = "Neither of the input data.tables to join have columns.",
error = "The following columns are missing:\n - From x: [a]\n - From y: [a]")
# fix for #1549
d1 <- data.table(v1=1:2,x=x)
d2 <- data.table(v1=3:4)
Expand Down Expand Up @@ -13552,18 +13547,18 @@ test(1962.017, merge(DT1, DT2, by = 'V', by.x = 'a', by.y = 'a'),
warning = 'Supplied both.*argument will be ignored')
test(1962.018,
merge(DT1, DT2, by.x = 'z', by.y = 'a'),
error = 'The following columns listed in by.x are missing from x: z')
error = "The following columns listed in 'by.x' are missing from x: [z]")
test(1962.019,
merge(DT1, DT2, by.x = 'a', by.y = 'z'),
error = 'The following columns listed in by.y are missing from y: z')
error = "The following columns listed in 'by.y' are missing from y: [z]")
test(1962.0201, merge(DT1, DT2, by=character(0L)), ans) # was error before PR#5183
test(1962.0202, merge(DT1, DT2, by=NULL), ans) # test explicit NULL too as missing() could be used inside merge()
test(1962.021, {
if (!"z" %in% colnames(DT1) || !"z" %in% colnames(DT2)) {
stop("The columns listed in `by` are missing from either x or y: z")
stop("The columns listed in `by` are missing from either x or y: [z]")
}
merge(DT1, DT2, by = 'z')
}, error = 'The columns listed in `by` are missing from either x or y: z')
}, error = 'The columns listed in `by` are missing from either x or y: [z]')

## frank.R
x = c(1, 1, 2, 5, 4, 3, 4, NA, 6)
Expand Down Expand Up @@ -18024,7 +18019,7 @@ test(2230.4, setDF(merge(DT, y, by="k2", incomparables=c(1, NA, 4, 5))), merge(x
test(2230.5, setDF(merge(DT, y, by="k2", incomparables=c(NA, 3, 4, 5))), merge(x, y, by="k2", incomparables=c(NA,3,4,5)))
test(2230.6, merge(DT, y, by="k2", unk=1), merge(DT, y, by="k2"), warning="Unknown argument 'unk' has been passed.")
test(2230.7, merge(DT, y, by="k2", NULL, NULL, FALSE, FALSE, FALSE, TRUE, c(".x", ".y"), TRUE, getOption("datatable.allow.cartesian"), NULL, 1L),
merge(DT, y, by="k2"), warning=c("Supplied both by and by.x/by.y. by argument will be ignored.", "Passed 1 unknown and unnamed arguments."))
merge(DT, y, by="k2"), warning=c("Supplied both by and 'by.x/by.y.' by argument will be ignored.", "Passed 1 unknown and unnamed arguments."))

# weighted.mean GForce optimized, #3977
old = options(datatable.optimize=1L)
Expand Down

0 comments on commit 771fbc0

Please sign in to comment.