Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Speed up dev_beta_binom() by adding a scalar version of the log-likelihood function, a vectorized optimization routine, and memoization. #69

Merged
merged 22 commits into from
Jan 7, 2025
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
a386e0e
Add binomial distribution to README
nehill197 Aug 8, 2024
3ca7c00
Optimize log-likelihood function for beta-binomial deviance calculation
nehill197 Aug 8, 2024
8632d1d
Styler
nehill197 Aug 8, 2024
0a51424
Add memoized function to repeated portion of log-likelihood
nehill197 Aug 12, 2024
df94a1b
- Use vectorized optimization function to speed up deviance calculati…
nehill197 Aug 12, 2024
dfd1406
Improved optimization has slightly different values - update tests ac…
nehill197 Aug 12, 2024
ecfeebc
Improved optimization has slightly different values - update tests ac…
nehill197 Aug 12, 2024
3455738
Comment out original R optimize function (not used)
nehill197 Aug 13, 2024
6a0284b
Add tests for memoized function and two cases missing test coverage i…
nehill197 Aug 13, 2024
e789cc4
Move `optimize_R()` function to the scripts folder for reference
nehill197 Aug 13, 2024
17386a4
Remove link
nehill197 Aug 13, 2024
5930dad
Merge branch 'main' into f-optimize-betabin
nehill197 Sep 17, 2024
4aef63e
Update documentation
nehill197 Sep 17, 2024
87170e0
up-to-date with main branch
nehill197 Jan 6, 2025
1bd7cd8
add ProjectId
nehill197 Jan 6, 2025
f27f8ab
Roxygen to 7.3.2.9000 and update associated documentation
nehill197 Jan 6, 2025
90ffbf7
fledge: Bump version to 0.8.0
nehill197 Jan 6, 2025
5b60d94
Update NEWS file
nehill197 Jan 6, 2025
931c4cc
style
joethorley Jan 7, 2025
80c177f
tidy description and rearrange pkgdown
joethorley Jan 7, 2025
0ffe002
add development mode auto for dev website
joethorley Jan 7, 2025
d00b9ac
move and update installation instructions
joethorley Jan 7, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ Suggests:
ggplot2,
knitr,
MASS,
memoise,
rlang,
rmarkdown,
scales,
Expand Down
119 changes: 95 additions & 24 deletions R/dev.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,59 @@
# Function to optimize a vector of parameter values at the same time
parallel_optimize <- function(f, interval, N, tol = .Machine$double.eps^0.5) {
if (N == 0) {
return(numeric(0))
}

lower <- rep(interval[1], N)
upper <- rep(interval[2], N)
threshold <- (upper - lower) * tol

# Golden ratio
gr <- (sqrt(5) - 1) / 2

# Initial points
x1 <- lower + (1 - gr) * (upper - lower)
x2 <- lower + gr * (upper - lower)
f1 <- f(x1)
f2 <- f(x2)
stopifnot(!anyNA(f1), !anyNA(f2))

# Iteratively narrow the search interval
while (all((abs(upper - lower) > threshold) | (abs(f2 - f1) > pmax(abs(f1) * tol, 1e-150)))) {
pos_smaller <- f1 < f2
idx_smaller <- which(pos_smaller)
idx_larger <- which(!pos_smaller)

upper[idx_smaller] <- x2[idx_smaller]
x2[idx_smaller] <- x1[idx_smaller]
f2[idx_smaller] <- f1[idx_smaller]
x1[idx_smaller] <- lower[idx_smaller] + (1 - gr) * (upper[idx_smaller] - lower[idx_smaller])

lower[idx_larger] <- x1[idx_larger]
x1[idx_larger] <- x2[idx_larger]
f1[idx_larger] <- f2[idx_larger]
x2[idx_larger] <- lower[idx_larger] + gr * (upper[idx_larger] - lower[idx_larger])

x_new <- ifelse(pos_smaller, x1, x2)
f_new <- f(x_new)
f1[idx_smaller] <- f_new[idx_smaller]
f2[idx_larger] <- f_new[idx_larger]
}

x_inside <- x1
x_inside[idx_larger] <- x2[idx_larger]
f_inside <- f1
f_inside[idx_larger] <- f2[idx_larger]

x_inside <- ifelse(pos_smaller, x1, x2)
f_inside <- ifelse(pos_smaller, f1, f2)

x_outside <- ifelse(pos_smaller, lower, upper)
f_outside <- f(x_outside)

return(ifelse(f_inside < f_outside, x_inside, x_outside))
}

#' Beta-Binomial Deviances
#'
#' This parameterization of the beta-binomial distribution uses an expected
Expand All @@ -21,44 +77,59 @@
#' @examples
#' dev_beta_binom(c(0, 1, 2), 10, 0.5, 0.1)
dev_beta_binom <- function(x, size = 1, prob = 0.5, theta = 0, res = FALSE) {
opt_beta_binom <- function(prob, x, size = size, theta = theta) {
-log_lik_beta_binom(x = x, size = size, prob = prob, theta = theta)
}
force(x)
args_not_na <- !is.na(x + size + theta)
if (length(size) == 1) {
size <- rep(size, length(x))
size_vec <- size
size_rep <- rep(size, length(x))
} else {
size_vec <- size[args_not_na]
size_rep <- size
}
if (length(theta) == 1) {
theta <- rep(theta, length(x))
}
opt_p <- rep(NA, length(x))
bol <- !is.na(x) & !is.na(size) & !is.na(theta)
for (i in seq_along(x)) {
if (bol[i] && size[i] < x[i]) {
opt_p[i] <- 1
} else if (bol[i] && !is.na(bol[i])) {
opt_p[i] <- stats::optimize(
opt_beta_binom,
interval = c(0, 1),
x = x[i],
size = size[i],
theta = theta[i],
tol = 1e-8
)$minimum
}
theta_vec <- theta
theta_rep <- rep(theta, length(x))
} else {
theta_vec <- theta[args_not_na]
theta_rep <- theta
}
opt_p <- rep(NA, length(args_not_na))
opt_p[args_not_na] <- parallel_optimize(
f = make_opt_beta_binom(x[args_not_na], size_vec, theta_vec),
interval = c(0, 1),
N = sum(args_not_na)
)
dev1 <- log_lik_beta_binom(x = x, size = size, prob = opt_p, theta = theta)
dev2 <- log_lik_beta_binom(x = x, size = size, prob = prob, theta = theta)
dev <- dev1 - dev2
dev[dev < 0 & dev > -1e-7] <- 0
dev <- dev * 2
use_binom <- (!is.na(theta) & theta == 0) |
(!is.na(x) & !is.na(size) & x == 0 & size == 0)
use_binom <- (!is.na(theta_rep) & theta_rep == 0) |
(!is.na(x) & !is.na(size_rep) & x == 0 & size_rep == 0)
dev_binom <- dev_binom(x = x, size = size, prob = prob, res = FALSE)
dev[use_binom] <- dev_binom[use_binom]
if (vld_false(res)) {
return(dev)
}
dev_res(x, ifelse(use_binom, size * prob, size * opt_p), dev)
dev_res(x, size_rep * prob, dev)
}

# Objective function for optimization.
make_opt_beta_binom <- function(x, size, theta) {
force(x)
force(size)
force(theta)
function(prob) {
out <- -log_lik_beta_binom(
x = x,
prob = prob,
size = size,
theta = theta,
memoize = TRUE
)
out[is.nan(out)] <- Inf
out
}
}

#' Bernoulli Deviances
Expand Down
82 changes: 65 additions & 17 deletions R/log-lik.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,35 +12,83 @@
#'
#' @inheritParams params
#' @param x A non-negative whole numeric vector of values.
#' @param memoize Whether or not to memoize the function.
#'
#' @return An numeric vector of the corresponding log-likelihoods.
#' @family log_lik_dist
#' @export
#'
#' @examples
#' log_lik_beta_binom(c(0, 1, 2), 3, 0.5, 0)
log_lik_beta_binom <- function(x, size = 1, prob = 0.5, theta = 0) {
log_lik_beta_binom <- function(x, size = 1, prob = 0.5, theta = 0, memoize = FALSE) {
alpha <- prob * 2 * (1 / theta)
beta <- (1 - prob) * 2 * (1 / theta)
lbeta_binom <- lgamma(size + 1) - lgamma(x + 1) - lgamma(size - x + 1) +

# Memoise use case:
# Posterior_predictive_check calls this function repeatedly with
# x and size unchanged; memoize it to reduce repeated calls
# when length(x) is large enough to outweigh the overhead required by memoize.
# For length(x) < 800, memoize is slower
# See detail here https://github.com/poissonconsulting/extras/issues/63
if (memoize && length(x) >= 800) {
lgamma_size_x <- lgamma_size_x(size, x)
} else {
lgamma_size_x <- lgamma(size + 1) - lgamma(x + 1) - lgamma(size - x + 1)
}
lbeta_binom <- lgamma_size_x +
lgamma(x + alpha) + lgamma(size - x + beta) - lgamma(size + alpha + beta) +
lgamma(alpha + beta) - lgamma(alpha) - lgamma(beta)
bol <- !is.na(x + size + prob + theta)
lbeta_binom[bol & ((x == 0 & prob == 0) | (x == size & prob == 1))] <- 0
lbeta_binom[bol & x != 0 & prob == 0] <- -Inf
lbeta_binom[bol & x != size & prob == 1] <- -Inf
lbeta_binom[bol & x > size] <- -Inf
bol_theta <- !is.na(theta)
lbeta_binom[bol_theta & theta < 0] <- NaN
use_binom <- bol_theta & theta == 0
if (any(use_binom)) {
lbinom <- log_lik_binom(x, size = size, prob = prob)
lbeta_binom[use_binom] <- lbinom[use_binom]
}
if (length(bol) == 0) {
lbeta_binom <- numeric(0)

args_na <- is.na(x + size + prob + theta)
length_args_na <- length(args_na)
if (length_args_na == 1) {
if (args_na) {
return(NA_real_)
}
if (prob == 0) {
if (x == 0) {
lbeta_binom <- 0
} else {
lbeta_binom <- -Inf
}
} else if (prob == 1) {
if (x == size) {
lbeta_binom <- 0
} else {
lbeta_binom <- -Inf
}
}
if (x > size) {
lbeta_binom <- -Inf
}
if (theta == 0) {
lbeta_binom <- log_lik_binom(x = x, size = size, prob = prob)
} else if (theta < 0) {
lbeta_binom <- NaN
}
lbeta_binom
} else if (length_args_na == 0) {
numeric(0)
} else {
args_not_na <- !args_na
lbeta_binom[args_not_na & ((x == 0 & prob == 0) | (x == size & prob == 1))] <- 0
lbeta_binom[args_not_na & x != 0 & prob == 0] <- -Inf
lbeta_binom[args_not_na & x != size & prob == 1] <- -Inf
lbeta_binom[args_not_na & x > size] <- -Inf
theta_not_na <- !is.na(theta)
lbeta_binom[theta_not_na & theta < 0] <- NaN
use_binom <- theta_not_na & theta == 0
if (any(use_binom)) {
lbinom <- log_lik_binom(x, size = size, prob = prob)
lbeta_binom[use_binom] <- lbinom[use_binom]
}
lbeta_binom
}
lbeta_binom
}

# Function to memoize (called repeatedly for non-changing values of size and x)
lgamma_size_x <- function(size, x) {
lgamma(size + 1) - lgamma(x + 1) - lgamma(size - x + 1)
}

#' Bernoulli Log-Likelihood
Expand Down
5 changes: 5 additions & 0 deletions R/zzz.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
.onLoad <- function(libname, pkgname) {
if (requireNamespace("memoise", quietly = TRUE)) {
lgamma_size_x <<- memoise::memoise(lgamma_size_x)
}
}
1 change: 1 addition & 0 deletions README.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ svalue(rnorm(100, mean = 3))
Implemented distributions with functions to draw random samples, calculate log-likelihoods, and calculate deviance residuals for include:

- Bernoulli
- Binomial
- Beta-binomial
- Gamma
- Gamma-Poisson
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ Implemented distributions with functions to draw random samples,
calculate log-likelihoods, and calculate deviance residuals for include:

- Bernoulli
- Binomial
- Beta-binomial
- Gamma
- Gamma-Poisson
Expand Down
4 changes: 3 additions & 1 deletion man/log_lik_beta_binom.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions tests/testthat/test-chk-index.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,14 @@ test_that("chk_index", {
)
})

test_that("chk_index errors with x = NA",{
test_that("chk_index errors with x = NA", {
expect_snapshot(
error = TRUE,
chk_index(NA)
)
})

test_that("chk_index errors with empty x",{
test_that("chk_index errors with empty x", {
expect_snapshot(
error = TRUE,
chk_index(integer(0))
Expand Down
10 changes: 5 additions & 5 deletions tests/testthat/test-dev.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@ test_that("beta_binom known values", {
expect_equal(dev_beta_binom(1, 1, 0, 0.5), Inf)
expect_equal(dev_beta_binom(1, 1, 0.5, 0), 1.38629436111989)
expect_identical(dev_beta_binom(1, 1, 0.5, 0), dev_binom(1, 1, 0.5))
expect_equal(dev_beta_binom(1, 1, 0.5, 1), 1.38629430121326)
expect_equal(dev_beta_binom(1, 1, 0.5, 0.5), 1.38629430121326)
expect_equal(dev_beta_binom(1, 1, 0.5, 1), 1.38629436111989)
expect_equal(dev_beta_binom(1, 1, 0.5, 0.5), 1.38629436111989)
expect_equal(dev_beta_binom(1, 2, 0.2, 0), 0.892574205256839)
expect_equal(dev_beta_binom(1, 2, 0.2, 1), 0.892574205256839)
expect_equal(dev_beta_binom(1, 2, 0.2, 0.5), 0.892574205256839)
Expand All @@ -39,8 +39,8 @@ test_that("beta_binom known values", {
expect_equal(dev_beta_binom(0, 2, 0.5, 0.1), 2.67954869096997)
expect_equal(dev_beta_binom(0, 2, 0.5, 0.5), 2.40794560865187)
expect_equal(dev_beta_binom(0, 2, 0.1), 0.421442062631305)
expect_equal(dev_beta_binom(0, 2, 0.1, 0.1), 0.410887933834857)
expect_equal(dev_beta_binom(0, 2, 0.1, 0.5), 0.377484235738089)
expect_equal(dev_beta_binom(0, 2, 0.1, 0.1), 0.410887948429604)
expect_equal(dev_beta_binom(0, 2, 0.1, 0.5), 0.377484249193745)
expect_equal(dev_beta_binom(1, 2, 1, 10), Inf)
expect_equal(dev_beta_binom(2, 2, 1, 10), 0)
expect_equal(dev_beta_binom(0, 2, 0.5, 10), 1.56031711509915)
Expand All @@ -49,7 +49,7 @@ test_that("beta_binom known values", {

test_that("beta_binom vectorized", {
expect_equal(dev_beta_binom(0:3, 5, 0, 0), dev_binom(0:3, 5, 0))
expect_equal(dev_beta_binom(c(0, 1, 3, 0), 3, 0.5, 0.5), c(3.21887580642895, 0.179750127270295, 3.2188756770985, 3.21887580642895))
expect_equal(dev_beta_binom(c(0, 1, 3, 0), 3, 0.5, 0.5), c(3.2188758248682, 0.179750127270293, 3.2188758248682, 3.2188758248682))
expect_equal(dev_beta_binom(0:3, 0:3, rep(1, 4), 0), rep(0, 4))
expect_equal(dev_beta_binom(0:3, 1:4, seq(0, 1, length.out = 4), 0:3), c(0, 0.235566071312767, 0.076961041136129, Inf))
})
Expand Down
46 changes: 46 additions & 0 deletions tests/testthat/test-log-lik.R
Original file line number Diff line number Diff line change
Expand Up @@ -165,10 +165,56 @@ test_that("beta_binom known values", {
expect_equal(log_lik_beta_binom(1, 2, 0.2, 1), -1.54489939129653)
expect_equal(log_lik_beta_binom(2, 2, 0.2, 10), -1.75253875607477)
expect_equal(log_lik_beta_binom(1, 2, 0.5), -0.693147180559945)
expect_equal(log_lik_beta_binom(10, 2, 0.5, 0.1), -Inf)
expect_equal(log_lik_beta_binom(10, 2, 0.5, -0.1), NaN)
expect_equal(log_lik_beta_binom(1, 2, 0.5), log_lik_binom(1, 2, 0.5))
expect_equal(log_lik_beta_binom(1, 2, 0.2), dbinom(1, 2, 0.2, log = TRUE))
})

test_that("beta binomial deviance function is memoized", {
expect_true(memoise::is.memoized(lgamma_size_x))
})

test_that("lgamma_size_x produces expected outputs", {
size <- 100
x <- 1
expect_equal(
lgamma_size_x(size, x),
lgamma(size + 1) - lgamma(x + 1) - lgamma(size - x + 1)
)
size <- 100
x <- 1:100
expect_equal(
lgamma_size_x(size, x),
lgamma(size + 1) - lgamma(x + 1) - lgamma(size - x + 1)
)
size <- 201:300
x <- 1:100
expect_equal(
lgamma_size_x(size, x),
lgamma(size + 1) - lgamma(x + 1) - lgamma(size - x + 1)
)
})

test_that("beta_binom memoized function gives same outputs as non-memoized function", {
expect_equal(
log_lik_beta_binom(1:100, 200, 0.5, 0.1, memoize = FALSE),
log_lik_beta_binom(1:100, 200, 0.5, 0.1, memoize = TRUE)
)
expect_equal(
log_lik_beta_binom(1:100, 200, 0.1, 0, memoize = FALSE),
log_lik_beta_binom(1:100, 200, 0.1, 0, memoize = TRUE)
)
withr::with_seed(
101, {
x <- ran_beta_binom(1e7, 10, 0.5, 0.1)
}
)
time_no_mem <- system.time(log_lik_beta_binom(x, 10, 0.5, 0.2, memoize = FALSE))[3]
time_mem <- system.time(log_lik_beta_binom(x, 10, 0.5, 0.2, memoize = TRUE))[3]
expect_true(time_mem < time_no_mem)
})

test_that("beta_binom vectorized", {
expect_equal(log_lik_beta_binom(0:3, 2, 0.5, 0), c(-1.38629436111989, -0.693147180559945, -1.38629436111989, -Inf))
expect_equal(log_lik_beta_binom(0:3, 2, 0.5, 0), log_lik_binom(0:3, 2, 0.5))
Expand Down
Loading
Loading