Skip to content

Commit

Permalink
Merge pull request #69 from poissonconsulting/f-optimize-betabin
Browse files Browse the repository at this point in the history
Speed up `dev_beta_binom()` by adding a scalar version of the log-likelihood function, a vectorized optimization routine, and memoization.
  • Loading branch information
nehill197 authored Jan 7, 2025
2 parents 1b2c2aa + d00b9ac commit a529131
Show file tree
Hide file tree
Showing 20 changed files with 333 additions and 101 deletions.
10 changes: 6 additions & 4 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: extras
Title: Helper Functions for Bayesian Analyses
Version: 0.7.3.9002
Version: 0.8.0
Authors@R: c(
person("Nicole", "Hill", , "[email protected]", role = c("aut", "cre"),
comment = c(ORCID = "0000-0002-7623-2153")),
Expand Down Expand Up @@ -32,6 +32,7 @@ Suggests:
ggplot2,
hms,
knitr,
memoise,
rlang,
rmarkdown,
scales,
Expand All @@ -41,10 +42,11 @@ Suggests:
tidyr,
viridis,
withr
VignetteBuilder:
knitr
Config/Needs/website: poissonconsulting/poissontemplate
Config/testthat/edition: 3
Encoding: UTF-8
Language: en-US
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.3.2
VignetteBuilder: knitr
Config/Needs/website: poissonconsulting/poissontemplate
RoxygenNote: 7.3.2.9000
6 changes: 6 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
<!-- NEWS.md is maintained by https://fledge.cynkra.com, contributors should not edit this file -->

# extras 0.8.0

- Added a scalar case to `log_lik_beta_binom()` to improve speed for scalar inputs.
- Add memoization (if memoize package is installed) and data has > 800 rows to gain speed from repeated function calls.
- Use a vectorized optimization to improve speed of optimization required for deviance calculation.

# extras 0.7.3.9002

- Remove dependency on MASS package so minimum R version can be brought down to 4.0.0 from 4.3.0.
Expand Down
119 changes: 95 additions & 24 deletions R/dev.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,59 @@
# Function to optimize a vector of parameter values at the same time
parallel_optimize <- function(f, interval, N, tol = .Machine$double.eps^0.5) {
if (N == 0) {
return(numeric(0))
}

lower <- rep(interval[1], N)
upper <- rep(interval[2], N)
threshold <- (upper - lower) * tol

# Golden ratio
gr <- (sqrt(5) - 1) / 2

# Initial points
x1 <- lower + (1 - gr) * (upper - lower)
x2 <- lower + gr * (upper - lower)
f1 <- f(x1)
f2 <- f(x2)
stopifnot(!anyNA(f1), !anyNA(f2))

# Iteratively narrow the search interval
while (all((abs(upper - lower) > threshold) | (abs(f2 - f1) > pmax(abs(f1) * tol, 1e-150)))) {
pos_smaller <- f1 < f2
idx_smaller <- which(pos_smaller)
idx_larger <- which(!pos_smaller)

upper[idx_smaller] <- x2[idx_smaller]
x2[idx_smaller] <- x1[idx_smaller]
f2[idx_smaller] <- f1[idx_smaller]
x1[idx_smaller] <- lower[idx_smaller] + (1 - gr) * (upper[idx_smaller] - lower[idx_smaller])

lower[idx_larger] <- x1[idx_larger]
x1[idx_larger] <- x2[idx_larger]
f1[idx_larger] <- f2[idx_larger]
x2[idx_larger] <- lower[idx_larger] + gr * (upper[idx_larger] - lower[idx_larger])

x_new <- ifelse(pos_smaller, x1, x2)
f_new <- f(x_new)
f1[idx_smaller] <- f_new[idx_smaller]
f2[idx_larger] <- f_new[idx_larger]
}

x_inside <- x1
x_inside[idx_larger] <- x2[idx_larger]
f_inside <- f1
f_inside[idx_larger] <- f2[idx_larger]

x_inside <- ifelse(pos_smaller, x1, x2)
f_inside <- ifelse(pos_smaller, f1, f2)

x_outside <- ifelse(pos_smaller, lower, upper)
f_outside <- f(x_outside)

return(ifelse(f_inside < f_outside, x_inside, x_outside))
}

#' Beta-Binomial Deviances
#'
#' This parameterization of the beta-binomial distribution uses an expected
Expand All @@ -21,44 +77,59 @@
#' @examples
#' dev_beta_binom(c(0, 1, 2), 10, 0.5, 0.1)
dev_beta_binom <- function(x, size = 1, prob = 0.5, theta = 0, res = FALSE) {
opt_beta_binom <- function(prob, x, size = size, theta = theta) {
-log_lik_beta_binom(x = x, size = size, prob = prob, theta = theta)
}
force(x)
args_not_na <- !is.na(x + size + theta)
if (length(size) == 1) {
size <- rep(size, length(x))
size_vec <- size
size_rep <- rep(size, length(x))
} else {
size_vec <- size[args_not_na]
size_rep <- size
}
if (length(theta) == 1) {
theta <- rep(theta, length(x))
}
opt_p <- rep(NA, length(x))
bol <- !is.na(x) & !is.na(size) & !is.na(theta)
for (i in seq_along(x)) {
if (bol[i] && size[i] < x[i]) {
opt_p[i] <- 1
} else if (bol[i] && !is.na(bol[i])) {
opt_p[i] <- stats::optimize(
opt_beta_binom,
interval = c(0, 1),
x = x[i],
size = size[i],
theta = theta[i],
tol = 1e-8
)$minimum
}
theta_vec <- theta
theta_rep <- rep(theta, length(x))
} else {
theta_vec <- theta[args_not_na]
theta_rep <- theta
}
opt_p <- rep(NA, length(args_not_na))
opt_p[args_not_na] <- parallel_optimize(
f = make_opt_beta_binom(x[args_not_na], size_vec, theta_vec),
interval = c(0, 1),
N = sum(args_not_na)
)
dev1 <- log_lik_beta_binom(x = x, size = size, prob = opt_p, theta = theta)
dev2 <- log_lik_beta_binom(x = x, size = size, prob = prob, theta = theta)
dev <- dev1 - dev2
dev[dev < 0 & dev > -1e-7] <- 0
dev <- dev * 2
use_binom <- (!is.na(theta) & theta == 0) |
(!is.na(x) & !is.na(size) & x == 0 & size == 0)
use_binom <- (!is.na(theta_rep) & theta_rep == 0) |
(!is.na(x) & !is.na(size_rep) & x == 0 & size_rep == 0)
dev_binom <- dev_binom(x = x, size = size, prob = prob, res = FALSE)
dev[use_binom] <- dev_binom[use_binom]
if (vld_false(res)) {
return(dev)
}
dev_res(x, ifelse(use_binom, size * prob, size * opt_p), dev)
dev_res(x, size_rep * prob, dev)
}

# Objective function for optimization.
make_opt_beta_binom <- function(x, size, theta) {
force(x)
force(size)
force(theta)
function(prob) {
out <- -log_lik_beta_binom(
x = x,
prob = prob,
size = size,
theta = theta,
memoize = TRUE
)
out[is.nan(out)] <- Inf
out
}
}

#' Bernoulli Deviances
Expand Down
82 changes: 65 additions & 17 deletions R/log-lik.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,35 +12,83 @@
#'
#' @inheritParams params
#' @param x A non-negative whole numeric vector of values.
#' @param memoize Whether or not to memoize the function.
#'
#' @return An numeric vector of the corresponding log-likelihoods.
#' @family log_lik_dist
#' @export
#'
#' @examples
#' log_lik_beta_binom(c(0, 1, 2), 3, 0.5, 0)
log_lik_beta_binom <- function(x, size = 1, prob = 0.5, theta = 0) {
log_lik_beta_binom <- function(x, size = 1, prob = 0.5, theta = 0, memoize = FALSE) {
alpha <- prob * 2 * (1 / theta)
beta <- (1 - prob) * 2 * (1 / theta)
lbeta_binom <- lgamma(size + 1) - lgamma(x + 1) - lgamma(size - x + 1) +

# Memoise use case:
# Posterior_predictive_check calls this function repeatedly with
# x and size unchanged; memoize it to reduce repeated calls
# when length(x) is large enough to outweigh the overhead required by memoize.
# For length(x) < 800, memoize is slower
# See detail here https://github.com/poissonconsulting/extras/issues/63
if (memoize && length(x) >= 800) {
lgamma_size_x <- lgamma_size_x(size, x)
} else {
lgamma_size_x <- lgamma(size + 1) - lgamma(x + 1) - lgamma(size - x + 1)
}
lbeta_binom <- lgamma_size_x +
lgamma(x + alpha) + lgamma(size - x + beta) - lgamma(size + alpha + beta) +
lgamma(alpha + beta) - lgamma(alpha) - lgamma(beta)
bol <- !is.na(x + size + prob + theta)
lbeta_binom[bol & ((x == 0 & prob == 0) | (x == size & prob == 1))] <- 0
lbeta_binom[bol & x != 0 & prob == 0] <- -Inf
lbeta_binom[bol & x != size & prob == 1] <- -Inf
lbeta_binom[bol & x > size] <- -Inf
bol_theta <- !is.na(theta)
lbeta_binom[bol_theta & theta < 0] <- NaN
use_binom <- bol_theta & theta == 0
if (any(use_binom)) {
lbinom <- log_lik_binom(x, size = size, prob = prob)
lbeta_binom[use_binom] <- lbinom[use_binom]
}
if (length(bol) == 0) {
lbeta_binom <- numeric(0)

args_na <- is.na(x + size + prob + theta)
length_args_na <- length(args_na)
if (length_args_na == 1) {
if (args_na) {
return(NA_real_)
}
if (prob == 0) {
if (x == 0) {
lbeta_binom <- 0
} else {
lbeta_binom <- -Inf
}
} else if (prob == 1) {
if (x == size) {
lbeta_binom <- 0
} else {
lbeta_binom <- -Inf
}
}
if (x > size) {
lbeta_binom <- -Inf
}
if (theta == 0) {
lbeta_binom <- log_lik_binom(x = x, size = size, prob = prob)
} else if (theta < 0) {
lbeta_binom <- NaN
}
lbeta_binom
} else if (length_args_na == 0) {
numeric(0)
} else {
args_not_na <- !args_na
lbeta_binom[args_not_na & ((x == 0 & prob == 0) | (x == size & prob == 1))] <- 0
lbeta_binom[args_not_na & x != 0 & prob == 0] <- -Inf
lbeta_binom[args_not_na & x != size & prob == 1] <- -Inf
lbeta_binom[args_not_na & x > size] <- -Inf
theta_not_na <- !is.na(theta)
lbeta_binom[theta_not_na & theta < 0] <- NaN
use_binom <- theta_not_na & theta == 0
if (any(use_binom)) {
lbinom <- log_lik_binom(x, size = size, prob = prob)
lbeta_binom[use_binom] <- lbinom[use_binom]
}
lbeta_binom
}
lbeta_binom
}

# Function to memoize (called repeatedly for non-changing values of size and x)
lgamma_size_x <- function(size, x) {
lgamma(size + 1) - lgamma(x + 1) - lgamma(size - x + 1)
}

#' Bernoulli Log-Likelihood
Expand Down
5 changes: 5 additions & 0 deletions R/zzz.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
.onLoad <- function(libname, pkgname) {
if (requireNamespace("memoise", quietly = TRUE)) {
lgamma_size_x <<- memoise::memoise(lgamma_size_x)
}
}
47 changes: 33 additions & 14 deletions README.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ knitr::opts_chunk$set(
)
```


# extras <a href="https://poissonconsulting.github.io/extras/"><img src="man/figures/logo.png" align="right" height="138" alt="extras website" /></a>

<!-- badges: start -->
Expand All @@ -32,19 +33,6 @@ calculate deviance residuals as well as R translations of some
BUGS (Bayesian Using Gibbs Sampling), JAGS (Just Another Gibbs
Sampler), STAN and TMB (Template Model Builder) functions.

## Installation

<!-- To install the latest release from [CRAN](https://cran.r-project.org) -->
```{r, eval=FALSE, echo=FALSE}
install.packages("extras")
```

To install the developmental version from [GitHub](https://github.com/poissonconsulting/extras)
```{r, eval=FALSE}
# install.packages("pak")
pak::pak("poissonconsulting/extras")
```

## Demonstration

### Summarise MCMC Samples
Expand All @@ -67,6 +55,7 @@ svalue(rnorm(100, mean = 3))
Implemented distributions with functions to draw random samples, calculate log-likelihoods, and calculate deviance residuals for include:

- Bernoulli
- Binomial
- Beta-binomial
- Gamma
- Gamma-Poisson
Expand Down Expand Up @@ -106,6 +95,37 @@ numericise(
)
```

## Installation


## Information

For more information see the [Get Started](https://poissonconsulting.github.io/chk/articles/chk.html) vignette.

## Installation

### Release

To install the release version from [CRAN](https://CRAN.R-project.org/package=extras).
```r
install.packages("extras")
```

The website for the release version is at <https://poissonconsulting.github.io/extras/>.

### Development

To install the development version from [GitHub](https://github.com/poissonconsulting/extras)
```r
# install.packages("remotes")
remotes::install_github("poissonconsulting/extras")
```

or from [r-universe](https://poissonconsulting.r-universe.dev/extras).
```r
install.packages("extras", repos = c("https://poissonconsulting.r-universe.dev", "https://cloud.r-project.org"))
```

## References

Greenland, S. 2019. Valid P-Values Behave Exactly as They Should: Some Misleading Criticisms of P-Values and Their Resolution With S-Values. The American Statistician 73(sup1): 106–114.
Expand All @@ -120,4 +140,3 @@ Please report any [issues](https://github.com/poissonconsulting/extras/issues).

Please note that the extras project is released with a [Contributor Code of Conduct](https://contributor-covenant.org/version/2/0/CODE_OF_CONDUCT.html).
By contributing to this project, you agree to abide by its terms.

Loading

0 comments on commit a529131

Please sign in to comment.