Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix generation of multivariate normal random numbers with a single stream #127

Merged
merged 7 commits into from
Dec 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: monty
Title: Monte Carlo Models
Version: 0.3.15
Version: 0.3.16
Authors@R: c(person("Rich", "FitzJohn", role = c("aut", "cre"),
email = "[email protected]"),
person("Wes", "Hinsley", role = "aut"),
Expand Down
84 changes: 54 additions & 30 deletions R/distributions.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,17 @@
## because it's quite gross. Mostly it is trying to tidy up some of
## the ways that we might draw from multivariate normal distributions.
## This is complicated by wanting to cache the results of the vcv
## decomposition where possible.

## Not in base R
rmvnorm <- function(x, vcv, rng) {
make_rmvnorm(vcv)(x, rng)
}
## decomposition where possible. We don't expose this anywhere to the
## user, and doing so is difficult because we'd need a thread-safe way
## of doing matrix multiplication (and possibly Cholesky
## factorisation) but this involves LAPACK (and therefore linking to
## libfortran) and is not guaranteed to be thread-safe.

## It's also tangled up with the distribution support, being different
## to most distributions in having vector output and *matrix*
## input. The most effficient way of using this really requires that
## we have the decomposition cached wherever possible, so it does not
## neatly fit into our usual approach at all.

## This is the form of the Cholesky factorisation of a matrix we use
## in the multivariate normal sampling.
Expand All @@ -18,46 +22,66 @@ chol_pivot <- function(x) {
}


make_rmvnorm <- function(vcv, centred = FALSE) {
## There are three uses of this; two use centred and one does not.
##
## * monty_example_gaussian uses centred
## * sampler_hmc also uses centred
## * make_random_walk_proposal does not use centred, but could


make_rmvnorm <- function(vcv) {
n <- nrow(vcv)
if (n == 1) {
if (is.matrix(vcv)) {
## Special case for transformations in single-dimensional case; no
## need to work with matrix multiplication or decompositions here.
sd <- sqrt(drop(vcv))
if (centred) {
if (n == 1) {
sd <- sqrt(drop(vcv))
function(rng) {
monty_random_normal(0, sd, rng)
}
} else {
function(x, rng) {
x + monty_random_normal(0, sd, rng)
}
}
} else if (is.matrix(vcv)) {
r <- chol_pivot(vcv)
if (centred) {
r <- chol_pivot(vcv)
function(rng) {
drop(monty_random_n_normal(n, 0, 1, rng) %*% r)
}
} else {
function(x, rng) {
x + drop(monty_random_n_normal(n, 0, 1, rng) %*% r)
}
}
} else {
stopifnot(length(dim(vcv)) == 3)
m <- dim(vcv)[[3]]
r <- vapply(seq_len(m), function(i) chol_pivot(vcv[, , i]), vcv[, , 1])
if (centred) {
function(rng) {
mu <- monty_random_n_normal(n, 0, 1, rng)
vapply(seq_len(m), function(i) mu[, i] %*% r[, , i], numeric(n))
}
## At this point we have a vcv that has 'm' entries; we'll be
## given a rng that has either 1 stream (PT) or 'm' streams
## (simultaneous sampling)
if (n == 1) {
r <- sqrt(drop(vcv))
} else {
function(x, rng) {
mu <- monty_random_n_normal(n, 0, 1, rng) # + x perhaps?
x + vapply(seq_len(m), function(i) mu[, i] %*% r[, , i], numeric(n))
r <- vapply(seq_len(m), function(i) chol_pivot(vcv[, , i]), vcv[, , 1])
}
function(rng) {
n_streams <- length(rng)
if (n_streams == 1) {
rand <- matrix(monty_random_n_normal(n * m, 0, 1, rng), n, m)
} else if (n_streams == m) {
rand <- monty_random_n_normal(n, 0, 1, rng)
} else {
if (m == 1) {
cli::cli_abort(
"Expected a random number generator with 1 stream, not {n_streams}")
} else {
cli::cli_abort(paste(
"Expected a random number generator with 1 or {m} streams, not",
"{n_streams}"))
}
}
if (n == 1) {
ret <- drop(rand) * r
} else {
ret <- vapply(seq_len(m), function(i) rand[, i] %*% r[, , i],
numeric(n))
if (m == 1 && !attr(rng, "preserve_stream_dimension")) {
dim(ret) <- NULL # TODO - test this....
}
}
ret
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion R/example.R
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ monty_example_gaussian <- function(vcv) {
n <- nrow(vcv)
monty_model(list(
parameters = letters[seq_len(n)],
direct_sample = make_rmvnorm(vcv, centred = TRUE),
direct_sample = make_rmvnorm(vcv),
density = make_ldmvnorm(vcv),
gradient = make_deriv_ldmvnorm(vcv),
domain = cbind(rep(-Inf, n), rep(Inf, n))))
Expand Down
2 changes: 1 addition & 1 deletion R/sampler-hmc.R
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ monty_sampler_hmc <- function(epsilon = 0.015, n_integration_steps = 10,
}
} else {
vcv <- sampler_validate_vcv(vcv, pars)
internal$sample_momentum <- make_rmvnorm(vcv, centred = TRUE)
internal$sample_momentum <- make_rmvnorm(vcv)
}
if (debug) {
internal$history <- list()
Expand Down
6 changes: 4 additions & 2 deletions R/sampler-random-walk.R
Original file line number Diff line number Diff line change
Expand Up @@ -124,13 +124,15 @@ monty_sampler_random_walk <- function(vcv = NULL, boundaries = "reflect",
make_random_walk_proposal <- function(vcv, domain, boundaries) {
mvn <- make_rmvnorm(vcv)
if (boundaries != "reflect" || !any(is.finite(domain))) {
return(mvn)
return(function(x, rng) {
x + mvn(rng)
})
}

x_min <- domain[, 1]
x_max <- domain[, 2]
function(x, rng) {
reflect_proposal(mvn(x, rng), x_min, x_max)
reflect_proposal(x + mvn(rng), x_min, x_max)
}
}

Expand Down
108 changes: 72 additions & 36 deletions tests/testthat/test-distributions.R
Original file line number Diff line number Diff line change
@@ -1,32 +1,20 @@
test_that("repeated calls rmvnorm agrees with one-shot", {
vcv <- matrix(c(4, 2, 2, 3), ncol = 2)
x <- runif(4)
g1 <- monty_rng_create(seed = 42)
g2 <- monty_rng_create(seed = 42)
r1 <- rmvnorm(x, vcv, g1)
r2 <- make_rmvnorm(vcv)(x, g2)
expect_identical(r2, r1)
})


test_that("rmvnorm has correct behaviour in trivial case", {
vcv <- matrix(1, 1, 1)
g1 <- monty_rng_create(seed = 42)
g2 <- monty_rng_create(seed = 42)
r1 <- replicate(100, rmvnorm(0, vcv, g1))
mvn <- make_rmvnorm(vcv)
r1 <- replicate(100, mvn(g1))
r2 <- monty_random_n_normal(100, 0, 1, g2)
expect_identical(r1, r2)
})


## This is still just too slow to really push around:
test_that("rvnorm produces correct expectation", {
vcv <- matrix(c(4, 2, 2, 3), ncol = 2)
f <- make_rmvnorm(vcv)
rng <- monty_rng_create(seed = 42)
x <- c(1, 2)
r <- t(replicate(1e5, f(x, rng)))
expect_equal(colMeans(r), c(1, 2), tolerance = 0.01)
r <- t(replicate(1e5, f(rng)))
expect_equal(colMeans(r), c(0, 0), tolerance = 0.01)
expect_equal(var(r), vcv, tolerance = 0.01)
})

Expand Down Expand Up @@ -61,54 +49,102 @@ test_that("Can draw samples from many single-variable MVNs", {
r1 <- monty_rng_create(seed = 42, n_streams = 4)
r2 <- monty_rng_create(seed = 42, n_streams = 4)
vcv <- array(1, c(1, 1, 4))
x <- runif(4)
expect_equal(rmvnorm(x, vcv, r2),
monty_random_normal(0, 1, r1) + x)
expect_equal(rmvnorm(x, 0.1 * vcv, r2),
monty_random_normal(0, 1, r1) * sqrt(0.1) + x)
expect_equal(rmvnorm(x, 0.1 * 1:4 * vcv, r2),
monty_random_normal(0, 1, r1) * sqrt(0.1 * 1:4) + x)
expect_equal(make_rmvnorm(vcv)(r2),
monty_random_normal(0, 1, r1))
expect_equal(make_rmvnorm(0.1 * vcv)(r2),
monty_random_normal(0, 1, r1) * sqrt(0.1))
expect_equal(make_rmvnorm(0.1 * 1:4 * vcv)(r2),
monty_random_normal(0, 1, r1) * sqrt(0.1 * 1:4))
})


test_that("Can draw samples from many centred single-variable MVNs", {
r1 <- monty_rng_create(seed = 42, n_streams = 4)
r2 <- monty_rng_create(seed = 42, n_streams = 4)
vcv <- array(1, c(1, 1, 4))
x <- runif(4)
expect_equal(make_rmvnorm(vcv, centred = TRUE)(r2),
monty_random_normal(0, 1, r1))

expect_equal(make_rmvnorm(vcv, centred = TRUE)(r2),
expect_equal(make_rmvnorm(vcv)(r2),
monty_random_normal(0, 1, r1))
expect_equal(make_rmvnorm(0.1 * vcv, centred = TRUE)(r2),
expect_equal(make_rmvnorm(0.1 * vcv)(r2),
monty_random_normal(0, 1, r1) * sqrt(0.1))
expect_equal(make_rmvnorm(0.1 * 1:4 * vcv, centred = TRUE)(r2),
expect_equal(make_rmvnorm(0.1 * 1:4 * vcv)(r2),
monty_random_normal(0, 1, r1) * sqrt(0.1 * 1:4))
})


test_that("Can draw samples from many bivariate MVNs", {
r1 <- monty_rng_create(seed = 42, n_streams = 5)
r2 <- monty_rng_create(seed = 42, n_streams = 5)
r3 <- monty_rng_create(seed = 42, n_streams = 5)
vcv <- array(0, c(2, 2, 5))
set.seed(1)
vcv[1, 1, ] <- 1:5
vcv[2, 2, ] <- 1
vcv[1, 2, ] <- vcv[2, 1, ] <- rnorm(5, 0, 0.1)

x <- matrix(runif(10), 2, 5)
y <- rmvnorm(x, vcv, r2)
expect_equal(dim(y), dim(x))
y <- make_rmvnorm(vcv)(r2)
expect_equal(dim(y), c(2, 5))

## A bit of work do do these separately:
rng_state <- matrix(monty_rng_state(r1), ncol = 5)
z <- vapply(1:5, function(i) {
rmvnorm(x[, i], vcv[, , i], monty_rng_create(seed = rng_state[, i]))
r <- monty_rng_create(seed = rng_state[, i])
make_rmvnorm(vcv[, , i])(r)
}, numeric(2))
expect_identical(y, z)
})


test_that("can draw from single-variable mvns with a single stream", {
r1 <- monty_rng_create(seed = 42)
r2 <- monty_rng_create(seed = 42)
vcv <- array(1, c(1, 1, 4))

expect_equal(make_rmvnorm(vcv)(r2),
monty_random_n_normal(4, 0, 1, r1))
expect_equal(make_rmvnorm(0.1 * vcv)(r2),
monty_random_n_normal(4, 0, 1, r1) * sqrt(0.1))
expect_equal(make_rmvnorm(0.1 * 1:4 * vcv)(r2),
monty_random_n_normal(4, 0, 1, r1) * sqrt(0.1 * 1:4))
})


test_that("Can draw samples from many bivariate MVNs with a single stream", {
r1 <- monty_rng_create(seed = 42)
r2 <- monty_rng_create(seed = 42)
vcv <- array(0, c(2, 2, 5))
set.seed(1)
vcv[1, 1, ] <- 1:5
vcv[2, 2, ] <- 1
vcv[1, 2, ] <- vcv[2, 1, ] <- rnorm(5, 0, 0.1)

y <- make_rmvnorm(vcv)(r2)
expect_equal(dim(y), c(2, 5))

## A bit of work do do these separately:
z <- vapply(1:5, function(i) make_rmvnorm(vcv[, , i])(r1), numeric(2))
expect_identical(y, z)
})


test_that("rng must be compatible with number of layers in vcv", {
r <- monty_rng_create(n_streams = 3)
vcv <- array(0, c(2, 2, 5))
vcv[1, 1, ] <- 1:5
vcv[2, 2, ] <- 1
vcv[1, 2, ] <- vcv[2, 1, ] <- rnorm(5, 0, 0.1)
expect_error(
make_rmvnorm(vcv)(r),
"Expected a random number generator with 1 or 5 streams, not 3")
expect_error(
make_rmvnorm(vcv[, , 1, drop = FALSE])(r),
"Expected a random number generator with 1 stream, not 3")
})


expect_equal(make_rmvnorm(vcv, centred = TRUE)(r3),
y - x)
test_that("drop dimensions where requested", {
r1 <- monty_rng_create(n_streams = 1, preserve_stream_dimension = TRUE)
r2 <- monty_rng_create(n_streams = 1, preserve_stream_dimension = FALSE)
vcv <- array(diag(2), c(2, 2, 1))
expect_equal(dim(make_rmvnorm(vcv)(r1)), c(2, 1))
expect_null(dim(make_rmvnorm(vcv)(r2)))
})
2 changes: 1 addition & 1 deletion tests/testthat/test-sampler-random-walk.R
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ test_that("can reflect parameters in matrix form", {
sapply(1:4, function(i) reflect_proposal(p[, i], -xb, xa)))


mvn <- make_rmvnorm(diag(3) * c(4, 8, 16), centred = TRUE)
mvn <- make_rmvnorm(diag(3) * c(4, 8, 16))
r <- monty_rng_create(seed = 42)
x <- replicate(10, mvn(r))
x_min <- c(-2, -Inf, -1)
Expand Down
Loading