diff --git a/DESCRIPTION b/DESCRIPTION index 45dc3f1e..1de9d49a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: monty Title: Monte Carlo Models -Version: 0.3.15 +Version: 0.3.16 Authors@R: c(person("Rich", "FitzJohn", role = c("aut", "cre"), email = "rich.fitzjohn@gmail.com"), person("Wes", "Hinsley", role = "aut"), diff --git a/R/distributions.R b/R/distributions.R index 358f0001..edf36384 100644 --- a/R/distributions.R +++ b/R/distributions.R @@ -2,13 +2,17 @@ ## because it's quite gross. Mostly it is trying to tidy up some of ## the ways that we might draw from multivariate normal distributions. ## This is complicated by wanting to cache the results of the vcv -## decomposition where possible. - -## Not in base R -rmvnorm <- function(x, vcv, rng) { - make_rmvnorm(vcv)(x, rng) -} +## decomposition where possible. We don't expose this anywhere to the +## user, and doing so is difficult because we'd need a thread-safe way +## of doing matrix multiplication (and possibly Cholesky +## factorisation) but this involves LAPACK (and therefore linking to +## libfortran) and is not guaranteed to be thread-safe. +## It's also tangled up with the distribution support, being different +## to most distributions in having vector output and *matrix* +## input. The most effficient way of using this really requires that +## we have the decomposition cached wherever possible, so it does not +## neatly fit into our usual approach at all. ## This is the form of the Cholesky factorisation of a matrix we use ## in the multivariate normal sampling. @@ -18,46 +22,66 @@ chol_pivot <- function(x) { } -make_rmvnorm <- function(vcv, centred = FALSE) { +## There are three uses of this; two use centred and one does not. +## +## * monty_example_gaussian uses centred +## * sampler_hmc also uses centred +## * make_random_walk_proposal does not use centred, but could + + +make_rmvnorm <- function(vcv) { n <- nrow(vcv) - if (n == 1) { + if (is.matrix(vcv)) { ## Special case for transformations in single-dimensional case; no ## need to work with matrix multiplication or decompositions here. - sd <- sqrt(drop(vcv)) - if (centred) { + if (n == 1) { + sd <- sqrt(drop(vcv)) function(rng) { monty_random_normal(0, sd, rng) } } else { - function(x, rng) { - x + monty_random_normal(0, sd, rng) - } - } - } else if (is.matrix(vcv)) { - r <- chol_pivot(vcv) - if (centred) { + r <- chol_pivot(vcv) function(rng) { drop(monty_random_n_normal(n, 0, 1, rng) %*% r) } - } else { - function(x, rng) { - x + drop(monty_random_n_normal(n, 0, 1, rng) %*% r) - } } } else { stopifnot(length(dim(vcv)) == 3) m <- dim(vcv)[[3]] - r <- vapply(seq_len(m), function(i) chol_pivot(vcv[, , i]), vcv[, , 1]) - if (centred) { - function(rng) { - mu <- monty_random_n_normal(n, 0, 1, rng) - vapply(seq_len(m), function(i) mu[, i] %*% r[, , i], numeric(n)) - } + ## At this point we have a vcv that has 'm' entries; we'll be + ## given a rng that has either 1 stream (PT) or 'm' streams + ## (simultaneous sampling) + if (n == 1) { + r <- sqrt(drop(vcv)) } else { - function(x, rng) { - mu <- monty_random_n_normal(n, 0, 1, rng) # + x perhaps? - x + vapply(seq_len(m), function(i) mu[, i] %*% r[, , i], numeric(n)) + r <- vapply(seq_len(m), function(i) chol_pivot(vcv[, , i]), vcv[, , 1]) + } + function(rng) { + n_streams <- length(rng) + if (n_streams == 1) { + rand <- matrix(monty_random_n_normal(n * m, 0, 1, rng), n, m) + } else if (n_streams == m) { + rand <- monty_random_n_normal(n, 0, 1, rng) + } else { + if (m == 1) { + cli::cli_abort( + "Expected a random number generator with 1 stream, not {n_streams}") + } else { + cli::cli_abort(paste( + "Expected a random number generator with 1 or {m} streams, not", + "{n_streams}")) + } + } + if (n == 1) { + ret <- drop(rand) * r + } else { + ret <- vapply(seq_len(m), function(i) rand[, i] %*% r[, , i], + numeric(n)) + if (m == 1 && !attr(rng, "preserve_stream_dimension")) { + dim(ret) <- NULL # TODO - test this.... + } } + ret } } } diff --git a/R/example.R b/R/example.R index 6530d1c3..ae68c7ac 100644 --- a/R/example.R +++ b/R/example.R @@ -92,7 +92,7 @@ monty_example_gaussian <- function(vcv) { n <- nrow(vcv) monty_model(list( parameters = letters[seq_len(n)], - direct_sample = make_rmvnorm(vcv, centred = TRUE), + direct_sample = make_rmvnorm(vcv), density = make_ldmvnorm(vcv), gradient = make_deriv_ldmvnorm(vcv), domain = cbind(rep(-Inf, n), rep(Inf, n)))) diff --git a/R/sampler-hmc.R b/R/sampler-hmc.R index ccf50b15..ac8fed9b 100644 --- a/R/sampler-hmc.R +++ b/R/sampler-hmc.R @@ -43,7 +43,7 @@ monty_sampler_hmc <- function(epsilon = 0.015, n_integration_steps = 10, } } else { vcv <- sampler_validate_vcv(vcv, pars) - internal$sample_momentum <- make_rmvnorm(vcv, centred = TRUE) + internal$sample_momentum <- make_rmvnorm(vcv) } if (debug) { internal$history <- list() diff --git a/R/sampler-random-walk.R b/R/sampler-random-walk.R index a3641eb2..c7e534e6 100644 --- a/R/sampler-random-walk.R +++ b/R/sampler-random-walk.R @@ -124,13 +124,15 @@ monty_sampler_random_walk <- function(vcv = NULL, boundaries = "reflect", make_random_walk_proposal <- function(vcv, domain, boundaries) { mvn <- make_rmvnorm(vcv) if (boundaries != "reflect" || !any(is.finite(domain))) { - return(mvn) + return(function(x, rng) { + x + mvn(rng) + }) } x_min <- domain[, 1] x_max <- domain[, 2] function(x, rng) { - reflect_proposal(mvn(x, rng), x_min, x_max) + reflect_proposal(x + mvn(rng), x_min, x_max) } } diff --git a/tests/testthat/test-distributions.R b/tests/testthat/test-distributions.R index 8ad810c1..c04b3b81 100644 --- a/tests/testthat/test-distributions.R +++ b/tests/testthat/test-distributions.R @@ -1,32 +1,20 @@ -test_that("repeated calls rmvnorm agrees with one-shot", { - vcv <- matrix(c(4, 2, 2, 3), ncol = 2) - x <- runif(4) - g1 <- monty_rng_create(seed = 42) - g2 <- monty_rng_create(seed = 42) - r1 <- rmvnorm(x, vcv, g1) - r2 <- make_rmvnorm(vcv)(x, g2) - expect_identical(r2, r1) -}) - - test_that("rmvnorm has correct behaviour in trivial case", { vcv <- matrix(1, 1, 1) g1 <- monty_rng_create(seed = 42) g2 <- monty_rng_create(seed = 42) - r1 <- replicate(100, rmvnorm(0, vcv, g1)) + mvn <- make_rmvnorm(vcv) + r1 <- replicate(100, mvn(g1)) r2 <- monty_random_n_normal(100, 0, 1, g2) expect_identical(r1, r2) }) -## This is still just too slow to really push around: test_that("rvnorm produces correct expectation", { vcv <- matrix(c(4, 2, 2, 3), ncol = 2) f <- make_rmvnorm(vcv) rng <- monty_rng_create(seed = 42) - x <- c(1, 2) - r <- t(replicate(1e5, f(x, rng))) - expect_equal(colMeans(r), c(1, 2), tolerance = 0.01) + r <- t(replicate(1e5, f(rng))) + expect_equal(colMeans(r), c(0, 0), tolerance = 0.01) expect_equal(var(r), vcv, tolerance = 0.01) }) @@ -61,13 +49,12 @@ test_that("Can draw samples from many single-variable MVNs", { r1 <- monty_rng_create(seed = 42, n_streams = 4) r2 <- monty_rng_create(seed = 42, n_streams = 4) vcv <- array(1, c(1, 1, 4)) - x <- runif(4) - expect_equal(rmvnorm(x, vcv, r2), - monty_random_normal(0, 1, r1) + x) - expect_equal(rmvnorm(x, 0.1 * vcv, r2), - monty_random_normal(0, 1, r1) * sqrt(0.1) + x) - expect_equal(rmvnorm(x, 0.1 * 1:4 * vcv, r2), - monty_random_normal(0, 1, r1) * sqrt(0.1 * 1:4) + x) + expect_equal(make_rmvnorm(vcv)(r2), + monty_random_normal(0, 1, r1)) + expect_equal(make_rmvnorm(0.1 * vcv)(r2), + monty_random_normal(0, 1, r1) * sqrt(0.1)) + expect_equal(make_rmvnorm(0.1 * 1:4 * vcv)(r2), + monty_random_normal(0, 1, r1) * sqrt(0.1 * 1:4)) }) @@ -75,15 +62,12 @@ test_that("Can draw samples from many centred single-variable MVNs", { r1 <- monty_rng_create(seed = 42, n_streams = 4) r2 <- monty_rng_create(seed = 42, n_streams = 4) vcv <- array(1, c(1, 1, 4)) - x <- runif(4) - expect_equal(make_rmvnorm(vcv, centred = TRUE)(r2), - monty_random_normal(0, 1, r1)) - expect_equal(make_rmvnorm(vcv, centred = TRUE)(r2), + expect_equal(make_rmvnorm(vcv)(r2), monty_random_normal(0, 1, r1)) - expect_equal(make_rmvnorm(0.1 * vcv, centred = TRUE)(r2), + expect_equal(make_rmvnorm(0.1 * vcv)(r2), monty_random_normal(0, 1, r1) * sqrt(0.1)) - expect_equal(make_rmvnorm(0.1 * 1:4 * vcv, centred = TRUE)(r2), + expect_equal(make_rmvnorm(0.1 * 1:4 * vcv)(r2), monty_random_normal(0, 1, r1) * sqrt(0.1 * 1:4)) }) @@ -91,24 +75,76 @@ test_that("Can draw samples from many centred single-variable MVNs", { test_that("Can draw samples from many bivariate MVNs", { r1 <- monty_rng_create(seed = 42, n_streams = 5) r2 <- monty_rng_create(seed = 42, n_streams = 5) - r3 <- monty_rng_create(seed = 42, n_streams = 5) vcv <- array(0, c(2, 2, 5)) set.seed(1) vcv[1, 1, ] <- 1:5 vcv[2, 2, ] <- 1 vcv[1, 2, ] <- vcv[2, 1, ] <- rnorm(5, 0, 0.1) - x <- matrix(runif(10), 2, 5) - y <- rmvnorm(x, vcv, r2) - expect_equal(dim(y), dim(x)) + y <- make_rmvnorm(vcv)(r2) + expect_equal(dim(y), c(2, 5)) ## A bit of work do do these separately: rng_state <- matrix(monty_rng_state(r1), ncol = 5) z <- vapply(1:5, function(i) { - rmvnorm(x[, i], vcv[, , i], monty_rng_create(seed = rng_state[, i])) + r <- monty_rng_create(seed = rng_state[, i]) + make_rmvnorm(vcv[, , i])(r) }, numeric(2)) expect_identical(y, z) +}) + + +test_that("can draw from single-variable mvns with a single stream", { + r1 <- monty_rng_create(seed = 42) + r2 <- monty_rng_create(seed = 42) + vcv <- array(1, c(1, 1, 4)) + + expect_equal(make_rmvnorm(vcv)(r2), + monty_random_n_normal(4, 0, 1, r1)) + expect_equal(make_rmvnorm(0.1 * vcv)(r2), + monty_random_n_normal(4, 0, 1, r1) * sqrt(0.1)) + expect_equal(make_rmvnorm(0.1 * 1:4 * vcv)(r2), + monty_random_n_normal(4, 0, 1, r1) * sqrt(0.1 * 1:4)) +}) + + +test_that("Can draw samples from many bivariate MVNs with a single stream", { + r1 <- monty_rng_create(seed = 42) + r2 <- monty_rng_create(seed = 42) + vcv <- array(0, c(2, 2, 5)) + set.seed(1) + vcv[1, 1, ] <- 1:5 + vcv[2, 2, ] <- 1 + vcv[1, 2, ] <- vcv[2, 1, ] <- rnorm(5, 0, 0.1) + + y <- make_rmvnorm(vcv)(r2) + expect_equal(dim(y), c(2, 5)) + + ## A bit of work do do these separately: + z <- vapply(1:5, function(i) make_rmvnorm(vcv[, , i])(r1), numeric(2)) + expect_identical(y, z) +}) + + +test_that("rng must be compatible with number of layers in vcv", { + r <- monty_rng_create(n_streams = 3) + vcv <- array(0, c(2, 2, 5)) + vcv[1, 1, ] <- 1:5 + vcv[2, 2, ] <- 1 + vcv[1, 2, ] <- vcv[2, 1, ] <- rnorm(5, 0, 0.1) + expect_error( + make_rmvnorm(vcv)(r), + "Expected a random number generator with 1 or 5 streams, not 3") + expect_error( + make_rmvnorm(vcv[, , 1, drop = FALSE])(r), + "Expected a random number generator with 1 stream, not 3") +}) + - expect_equal(make_rmvnorm(vcv, centred = TRUE)(r3), - y - x) +test_that("drop dimensions where requested", { + r1 <- monty_rng_create(n_streams = 1, preserve_stream_dimension = TRUE) + r2 <- monty_rng_create(n_streams = 1, preserve_stream_dimension = FALSE) + vcv <- array(diag(2), c(2, 2, 1)) + expect_equal(dim(make_rmvnorm(vcv)(r1)), c(2, 1)) + expect_null(dim(make_rmvnorm(vcv)(r2))) }) diff --git a/tests/testthat/test-sampler-random-walk.R b/tests/testthat/test-sampler-random-walk.R index 4165b448..1dc8d6e0 100644 --- a/tests/testthat/test-sampler-random-walk.R +++ b/tests/testthat/test-sampler-random-walk.R @@ -193,7 +193,7 @@ test_that("can reflect parameters in matrix form", { sapply(1:4, function(i) reflect_proposal(p[, i], -xb, xa))) - mvn <- make_rmvnorm(diag(3) * c(4, 8, 16), centred = TRUE) + mvn <- make_rmvnorm(diag(3) * c(4, 8, 16)) r <- monty_rng_create(seed = 42) x <- replicate(10, mvn(r)) x_min <- c(-2, -Inf, -1)