Skip to content

Commit

Permalink
Merge pull request #127 from mrc-ide/mrc-6102
Browse files Browse the repository at this point in the history
Fix generation of multivariate normal random numbers with a single stream
  • Loading branch information
weshinsley authored Dec 12, 2024
2 parents e724328 + a97a9ab commit 6c40a24
Show file tree
Hide file tree
Showing 7 changed files with 134 additions and 72 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: monty
Title: Monte Carlo Models
Version: 0.3.15
Version: 0.3.16
Authors@R: c(person("Rich", "FitzJohn", role = c("aut", "cre"),
email = "[email protected]"),
person("Wes", "Hinsley", role = "aut"),
Expand Down
84 changes: 54 additions & 30 deletions R/distributions.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,17 @@
## because it's quite gross. Mostly it is trying to tidy up some of
## the ways that we might draw from multivariate normal distributions.
## This is complicated by wanting to cache the results of the vcv
## decomposition where possible.

## Not in base R
rmvnorm <- function(x, vcv, rng) {
make_rmvnorm(vcv)(x, rng)
}
## decomposition where possible. We don't expose this anywhere to the
## user, and doing so is difficult because we'd need a thread-safe way
## of doing matrix multiplication (and possibly Cholesky
## factorisation) but this involves LAPACK (and therefore linking to
## libfortran) and is not guaranteed to be thread-safe.

## It's also tangled up with the distribution support, being different
## to most distributions in having vector output and *matrix*
## input. The most effficient way of using this really requires that
## we have the decomposition cached wherever possible, so it does not
## neatly fit into our usual approach at all.

## This is the form of the Cholesky factorisation of a matrix we use
## in the multivariate normal sampling.
Expand All @@ -18,46 +22,66 @@ chol_pivot <- function(x) {
}


make_rmvnorm <- function(vcv, centred = FALSE) {
## There are three uses of this; two use centred and one does not.
##
## * monty_example_gaussian uses centred
## * sampler_hmc also uses centred
## * make_random_walk_proposal does not use centred, but could


make_rmvnorm <- function(vcv) {
n <- nrow(vcv)
if (n == 1) {
if (is.matrix(vcv)) {
## Special case for transformations in single-dimensional case; no
## need to work with matrix multiplication or decompositions here.
sd <- sqrt(drop(vcv))
if (centred) {
if (n == 1) {
sd <- sqrt(drop(vcv))
function(rng) {
monty_random_normal(0, sd, rng)
}
} else {
function(x, rng) {
x + monty_random_normal(0, sd, rng)
}
}
} else if (is.matrix(vcv)) {
r <- chol_pivot(vcv)
if (centred) {
r <- chol_pivot(vcv)
function(rng) {
drop(monty_random_n_normal(n, 0, 1, rng) %*% r)
}
} else {
function(x, rng) {
x + drop(monty_random_n_normal(n, 0, 1, rng) %*% r)
}
}
} else {
stopifnot(length(dim(vcv)) == 3)
m <- dim(vcv)[[3]]
r <- vapply(seq_len(m), function(i) chol_pivot(vcv[, , i]), vcv[, , 1])
if (centred) {
function(rng) {
mu <- monty_random_n_normal(n, 0, 1, rng)
vapply(seq_len(m), function(i) mu[, i] %*% r[, , i], numeric(n))
}
## At this point we have a vcv that has 'm' entries; we'll be
## given a rng that has either 1 stream (PT) or 'm' streams
## (simultaneous sampling)
if (n == 1) {
r <- sqrt(drop(vcv))
} else {
function(x, rng) {
mu <- monty_random_n_normal(n, 0, 1, rng) # + x perhaps?
x + vapply(seq_len(m), function(i) mu[, i] %*% r[, , i], numeric(n))
r <- vapply(seq_len(m), function(i) chol_pivot(vcv[, , i]), vcv[, , 1])
}
function(rng) {
n_streams <- length(rng)
if (n_streams == 1) {
rand <- matrix(monty_random_n_normal(n * m, 0, 1, rng), n, m)
} else if (n_streams == m) {
rand <- monty_random_n_normal(n, 0, 1, rng)
} else {
if (m == 1) {
cli::cli_abort(
"Expected a random number generator with 1 stream, not {n_streams}")
} else {
cli::cli_abort(paste(
"Expected a random number generator with 1 or {m} streams, not",
"{n_streams}"))
}
}
if (n == 1) {
ret <- drop(rand) * r
} else {
ret <- vapply(seq_len(m), function(i) rand[, i] %*% r[, , i],
numeric(n))
if (m == 1 && !attr(rng, "preserve_stream_dimension")) {
dim(ret) <- NULL # TODO - test this....
}
}
ret
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion R/example.R
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ monty_example_gaussian <- function(vcv) {
n <- nrow(vcv)
monty_model(list(
parameters = letters[seq_len(n)],
direct_sample = make_rmvnorm(vcv, centred = TRUE),
direct_sample = make_rmvnorm(vcv),
density = make_ldmvnorm(vcv),
gradient = make_deriv_ldmvnorm(vcv),
domain = cbind(rep(-Inf, n), rep(Inf, n))))
Expand Down
2 changes: 1 addition & 1 deletion R/sampler-hmc.R
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ monty_sampler_hmc <- function(epsilon = 0.015, n_integration_steps = 10,
}
} else {
vcv <- sampler_validate_vcv(vcv, pars)
internal$sample_momentum <- make_rmvnorm(vcv, centred = TRUE)
internal$sample_momentum <- make_rmvnorm(vcv)
}
if (debug) {
internal$history <- list()
Expand Down
6 changes: 4 additions & 2 deletions R/sampler-random-walk.R
Original file line number Diff line number Diff line change
Expand Up @@ -124,13 +124,15 @@ monty_sampler_random_walk <- function(vcv = NULL, boundaries = "reflect",
make_random_walk_proposal <- function(vcv, domain, boundaries) {
mvn <- make_rmvnorm(vcv)
if (boundaries != "reflect" || !any(is.finite(domain))) {
return(mvn)
return(function(x, rng) {
x + mvn(rng)
})
}

x_min <- domain[, 1]
x_max <- domain[, 2]
function(x, rng) {
reflect_proposal(mvn(x, rng), x_min, x_max)
reflect_proposal(x + mvn(rng), x_min, x_max)
}
}

Expand Down
108 changes: 72 additions & 36 deletions tests/testthat/test-distributions.R
Original file line number Diff line number Diff line change
@@ -1,32 +1,20 @@
test_that("repeated calls rmvnorm agrees with one-shot", {
vcv <- matrix(c(4, 2, 2, 3), ncol = 2)
x <- runif(4)
g1 <- monty_rng_create(seed = 42)
g2 <- monty_rng_create(seed = 42)
r1 <- rmvnorm(x, vcv, g1)
r2 <- make_rmvnorm(vcv)(x, g2)
expect_identical(r2, r1)
})


test_that("rmvnorm has correct behaviour in trivial case", {
vcv <- matrix(1, 1, 1)
g1 <- monty_rng_create(seed = 42)
g2 <- monty_rng_create(seed = 42)
r1 <- replicate(100, rmvnorm(0, vcv, g1))
mvn <- make_rmvnorm(vcv)
r1 <- replicate(100, mvn(g1))
r2 <- monty_random_n_normal(100, 0, 1, g2)
expect_identical(r1, r2)
})


## This is still just too slow to really push around:
test_that("rvnorm produces correct expectation", {
vcv <- matrix(c(4, 2, 2, 3), ncol = 2)
f <- make_rmvnorm(vcv)
rng <- monty_rng_create(seed = 42)
x <- c(1, 2)
r <- t(replicate(1e5, f(x, rng)))
expect_equal(colMeans(r), c(1, 2), tolerance = 0.01)
r <- t(replicate(1e5, f(rng)))
expect_equal(colMeans(r), c(0, 0), tolerance = 0.01)
expect_equal(var(r), vcv, tolerance = 0.01)
})

Expand Down Expand Up @@ -61,54 +49,102 @@ test_that("Can draw samples from many single-variable MVNs", {
r1 <- monty_rng_create(seed = 42, n_streams = 4)
r2 <- monty_rng_create(seed = 42, n_streams = 4)
vcv <- array(1, c(1, 1, 4))
x <- runif(4)
expect_equal(rmvnorm(x, vcv, r2),
monty_random_normal(0, 1, r1) + x)
expect_equal(rmvnorm(x, 0.1 * vcv, r2),
monty_random_normal(0, 1, r1) * sqrt(0.1) + x)
expect_equal(rmvnorm(x, 0.1 * 1:4 * vcv, r2),
monty_random_normal(0, 1, r1) * sqrt(0.1 * 1:4) + x)
expect_equal(make_rmvnorm(vcv)(r2),
monty_random_normal(0, 1, r1))
expect_equal(make_rmvnorm(0.1 * vcv)(r2),
monty_random_normal(0, 1, r1) * sqrt(0.1))
expect_equal(make_rmvnorm(0.1 * 1:4 * vcv)(r2),
monty_random_normal(0, 1, r1) * sqrt(0.1 * 1:4))
})


test_that("Can draw samples from many centred single-variable MVNs", {
r1 <- monty_rng_create(seed = 42, n_streams = 4)
r2 <- monty_rng_create(seed = 42, n_streams = 4)
vcv <- array(1, c(1, 1, 4))
x <- runif(4)
expect_equal(make_rmvnorm(vcv, centred = TRUE)(r2),
monty_random_normal(0, 1, r1))

expect_equal(make_rmvnorm(vcv, centred = TRUE)(r2),
expect_equal(make_rmvnorm(vcv)(r2),
monty_random_normal(0, 1, r1))
expect_equal(make_rmvnorm(0.1 * vcv, centred = TRUE)(r2),
expect_equal(make_rmvnorm(0.1 * vcv)(r2),
monty_random_normal(0, 1, r1) * sqrt(0.1))
expect_equal(make_rmvnorm(0.1 * 1:4 * vcv, centred = TRUE)(r2),
expect_equal(make_rmvnorm(0.1 * 1:4 * vcv)(r2),
monty_random_normal(0, 1, r1) * sqrt(0.1 * 1:4))
})


test_that("Can draw samples from many bivariate MVNs", {
r1 <- monty_rng_create(seed = 42, n_streams = 5)
r2 <- monty_rng_create(seed = 42, n_streams = 5)
r3 <- monty_rng_create(seed = 42, n_streams = 5)
vcv <- array(0, c(2, 2, 5))
set.seed(1)
vcv[1, 1, ] <- 1:5
vcv[2, 2, ] <- 1
vcv[1, 2, ] <- vcv[2, 1, ] <- rnorm(5, 0, 0.1)

x <- matrix(runif(10), 2, 5)
y <- rmvnorm(x, vcv, r2)
expect_equal(dim(y), dim(x))
y <- make_rmvnorm(vcv)(r2)
expect_equal(dim(y), c(2, 5))

## A bit of work do do these separately:
rng_state <- matrix(monty_rng_state(r1), ncol = 5)
z <- vapply(1:5, function(i) {
rmvnorm(x[, i], vcv[, , i], monty_rng_create(seed = rng_state[, i]))
r <- monty_rng_create(seed = rng_state[, i])
make_rmvnorm(vcv[, , i])(r)
}, numeric(2))
expect_identical(y, z)
})


test_that("can draw from single-variable mvns with a single stream", {
r1 <- monty_rng_create(seed = 42)
r2 <- monty_rng_create(seed = 42)
vcv <- array(1, c(1, 1, 4))

expect_equal(make_rmvnorm(vcv)(r2),
monty_random_n_normal(4, 0, 1, r1))
expect_equal(make_rmvnorm(0.1 * vcv)(r2),
monty_random_n_normal(4, 0, 1, r1) * sqrt(0.1))
expect_equal(make_rmvnorm(0.1 * 1:4 * vcv)(r2),
monty_random_n_normal(4, 0, 1, r1) * sqrt(0.1 * 1:4))
})


test_that("Can draw samples from many bivariate MVNs with a single stream", {
r1 <- monty_rng_create(seed = 42)
r2 <- monty_rng_create(seed = 42)
vcv <- array(0, c(2, 2, 5))
set.seed(1)
vcv[1, 1, ] <- 1:5
vcv[2, 2, ] <- 1
vcv[1, 2, ] <- vcv[2, 1, ] <- rnorm(5, 0, 0.1)

y <- make_rmvnorm(vcv)(r2)
expect_equal(dim(y), c(2, 5))

## A bit of work do do these separately:
z <- vapply(1:5, function(i) make_rmvnorm(vcv[, , i])(r1), numeric(2))
expect_identical(y, z)
})


test_that("rng must be compatible with number of layers in vcv", {
r <- monty_rng_create(n_streams = 3)
vcv <- array(0, c(2, 2, 5))
vcv[1, 1, ] <- 1:5
vcv[2, 2, ] <- 1
vcv[1, 2, ] <- vcv[2, 1, ] <- rnorm(5, 0, 0.1)
expect_error(
make_rmvnorm(vcv)(r),
"Expected a random number generator with 1 or 5 streams, not 3")
expect_error(
make_rmvnorm(vcv[, , 1, drop = FALSE])(r),
"Expected a random number generator with 1 stream, not 3")
})


expect_equal(make_rmvnorm(vcv, centred = TRUE)(r3),
y - x)
test_that("drop dimensions where requested", {
r1 <- monty_rng_create(n_streams = 1, preserve_stream_dimension = TRUE)
r2 <- monty_rng_create(n_streams = 1, preserve_stream_dimension = FALSE)
vcv <- array(diag(2), c(2, 2, 1))
expect_equal(dim(make_rmvnorm(vcv)(r1)), c(2, 1))
expect_null(dim(make_rmvnorm(vcv)(r2)))
})
2 changes: 1 addition & 1 deletion tests/testthat/test-sampler-random-walk.R
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ test_that("can reflect parameters in matrix form", {
sapply(1:4, function(i) reflect_proposal(p[, i], -xb, xa)))


mvn <- make_rmvnorm(diag(3) * c(4, 8, 16), centred = TRUE)
mvn <- make_rmvnorm(diag(3) * c(4, 8, 16))
r <- monty_rng_create(seed = 42)
x <- replicate(10, mvn(r))
x_min <- c(-2, -Inf, -1)
Expand Down

0 comments on commit 6c40a24

Please sign in to comment.