Merge pull request #69 from poissonconsulting/f-optimize-betabin

Speed up `dev_beta_binom()` by adding a scalar version of the log-likelihood function, a vectorized optimization routine, and memoization.
poissonconsulting · Jan 7, 2025 · a529131 · a529131
2 parents 1b2c2aa + d00b9ac
commit a529131
Show file tree

Hide file tree

Showing 20 changed files with 333 additions and 101 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: extras
 Title: Helper Functions for Bayesian Analyses
-Version: 0.7.3.9002
+Version: 0.8.0
 Authors@R: c(
     person("Nicole", "Hill", , "[email protected]", role = c("aut", "cre"),
            comment = c(ORCID = "0000-0002-7623-2153")),
@@ -32,6 +32,7 @@ Suggests:
     ggplot2,
     hms,
     knitr,
+    memoise,
     rlang,
     rmarkdown,
     scales,
@@ -41,10 +42,11 @@ Suggests:
     tidyr,
     viridis,
     withr
+VignetteBuilder: 
+    knitr
+Config/Needs/website: poissonconsulting/poissontemplate
 Config/testthat/edition: 3
 Encoding: UTF-8
 Language: en-US
 Roxygen: list(markdown = TRUE)
-RoxygenNote: 7.3.2
-VignetteBuilder: knitr
-Config/Needs/website: poissonconsulting/poissontemplate
+RoxygenNote: 7.3.2.9000
diff --git a/NEWS.md b/NEWS.md
@@ -1,5 +1,11 @@
 <!-- NEWS.md is maintained by https://fledge.cynkra.com, contributors should not edit this file -->
 
+# extras 0.8.0
+
+- Added a scalar case to `log_lik_beta_binom()` to improve speed for scalar inputs.
+- Add memoization (if memoize package is installed) and data has > 800 rows to gain speed from repeated function calls.
+- Use a vectorized optimization to improve speed of optimization required for deviance calculation.
+
 # extras 0.7.3.9002
 
 - Remove dependency on MASS package so minimum R version can be brought down to 4.0.0 from 4.3.0.

diff --git a/R/dev.R b/R/dev.R
@@ -1,3 +1,59 @@
+# Function to optimize a vector of parameter values at the same time
+parallel_optimize <- function(f, interval, N, tol = .Machine$double.eps^0.5) {
+  if (N == 0) {
+    return(numeric(0))
+  }
+
+  lower <- rep(interval[1], N)
+  upper <- rep(interval[2], N)
+  threshold <- (upper - lower) * tol
+
+  # Golden ratio
+  gr <- (sqrt(5) - 1) / 2
+
+  # Initial points
+  x1 <- lower + (1 - gr) * (upper - lower)
+  x2 <- lower + gr * (upper - lower)
+  f1 <- f(x1)
+  f2 <- f(x2)
+  stopifnot(!anyNA(f1), !anyNA(f2))
+
+  # Iteratively narrow the search interval
+  while (all((abs(upper - lower) > threshold) | (abs(f2 - f1) > pmax(abs(f1) * tol, 1e-150)))) {
+    pos_smaller <- f1 < f2
+    idx_smaller <- which(pos_smaller)
+    idx_larger <- which(!pos_smaller)
+
+    upper[idx_smaller] <- x2[idx_smaller]
+    x2[idx_smaller] <- x1[idx_smaller]
+    f2[idx_smaller] <- f1[idx_smaller]
+    x1[idx_smaller] <- lower[idx_smaller] + (1 - gr) * (upper[idx_smaller] - lower[idx_smaller])
+
+    lower[idx_larger] <- x1[idx_larger]
+    x1[idx_larger] <- x2[idx_larger]
+    f1[idx_larger] <- f2[idx_larger]
+    x2[idx_larger] <- lower[idx_larger] + gr * (upper[idx_larger] - lower[idx_larger])
+
+    x_new <- ifelse(pos_smaller, x1, x2)
+    f_new <- f(x_new)
+    f1[idx_smaller] <- f_new[idx_smaller]
+    f2[idx_larger] <- f_new[idx_larger]
+  }
+
+  x_inside <- x1
+  x_inside[idx_larger] <- x2[idx_larger]
+  f_inside <- f1
+  f_inside[idx_larger] <- f2[idx_larger]
+
+  x_inside <- ifelse(pos_smaller, x1, x2)
+  f_inside <- ifelse(pos_smaller, f1, f2)
+
+  x_outside <- ifelse(pos_smaller, lower, upper)
+  f_outside <- f(x_outside)
+
+  return(ifelse(f_inside < f_outside, x_inside, x_outside))
+}
+
 #' Beta-Binomial Deviances
 #'
 #' This parameterization of the beta-binomial distribution uses an expected
@@ -21,44 +77,59 @@
 #' @examples
 #' dev_beta_binom(c(0, 1, 2), 10, 0.5, 0.1)
 dev_beta_binom <- function(x, size = 1, prob = 0.5, theta = 0, res = FALSE) {
-  opt_beta_binom <- function(prob, x, size = size, theta = theta) {
-    -log_lik_beta_binom(x = x, size = size, prob = prob, theta = theta)
-  }
+  force(x)
+  args_not_na <- !is.na(x + size + theta)
   if (length(size) == 1) {
-    size <- rep(size, length(x))
+    size_vec <- size
+    size_rep <- rep(size, length(x))
+  } else {
+    size_vec <- size[args_not_na]
+    size_rep <- size
   }
   if (length(theta) == 1) {
-    theta <- rep(theta, length(x))
-  }
-  opt_p <- rep(NA, length(x))
-  bol <- !is.na(x) & !is.na(size) & !is.na(theta)
-  for (i in seq_along(x)) {
-    if (bol[i] && size[i] < x[i]) {
-      opt_p[i] <- 1
-    } else if (bol[i] && !is.na(bol[i])) {
-      opt_p[i] <- stats::optimize(
-        opt_beta_binom,
-        interval = c(0, 1),
-        x = x[i],
-        size = size[i],
-        theta = theta[i],
-        tol = 1e-8
-      )$minimum
-    }
+    theta_vec <- theta
+    theta_rep <- rep(theta, length(x))
+  } else {
+    theta_vec <- theta[args_not_na]
+    theta_rep <- theta
   }
+  opt_p <- rep(NA, length(args_not_na))
+  opt_p[args_not_na] <- parallel_optimize(
+    f = make_opt_beta_binom(x[args_not_na], size_vec, theta_vec),
+    interval = c(0, 1),
+    N = sum(args_not_na)
+  )
   dev1 <- log_lik_beta_binom(x = x, size = size, prob = opt_p, theta = theta)
   dev2 <- log_lik_beta_binom(x = x, size = size, prob = prob, theta = theta)
   dev <- dev1 - dev2
   dev[dev < 0 & dev > -1e-7] <- 0
   dev <- dev * 2
-  use_binom <- (!is.na(theta) & theta == 0) |
-    (!is.na(x) & !is.na(size) & x == 0 & size == 0)
+  use_binom <- (!is.na(theta_rep) & theta_rep == 0) |
+    (!is.na(x) & !is.na(size_rep) & x == 0 & size_rep == 0)
   dev_binom <- dev_binom(x = x, size = size, prob = prob, res = FALSE)
   dev[use_binom] <- dev_binom[use_binom]
   if (vld_false(res)) {
     return(dev)
   }
-  dev_res(x, ifelse(use_binom, size * prob, size * opt_p), dev)
+  dev_res(x, size_rep * prob, dev)
+}
+
+# Objective function for optimization.
+make_opt_beta_binom <- function(x, size, theta) {
+  force(x)
+  force(size)
+  force(theta)
+  function(prob) {
+    out <- -log_lik_beta_binom(
+      x = x,
+      prob = prob,
+      size = size,
+      theta = theta,
+      memoize = TRUE
+    )
+    out[is.nan(out)] <- Inf
+    out
+  }
 }
 
 #' Bernoulli Deviances

diff --git a/R/log-lik.R b/R/log-lik.R
@@ -12,35 +12,83 @@
 #'
 #' @inheritParams params
 #' @param x A non-negative whole numeric vector of values.
+#' @param memoize Whether or not to memoize the function.
 #'
 #' @return An numeric vector of the corresponding log-likelihoods.
 #' @family log_lik_dist
 #' @export
 #'
 #' @examples
 #' log_lik_beta_binom(c(0, 1, 2), 3, 0.5, 0)
-log_lik_beta_binom <- function(x, size = 1, prob = 0.5, theta = 0) {
+log_lik_beta_binom <- function(x, size = 1, prob = 0.5, theta = 0, memoize = FALSE) {
   alpha <- prob * 2 * (1 / theta)
   beta <- (1 - prob) * 2 * (1 / theta)
-  lbeta_binom <- lgamma(size + 1) - lgamma(x + 1) - lgamma(size - x + 1) +
+
+  # Memoise use case:
+  # Posterior_predictive_check calls this function repeatedly with
+  # x and size unchanged; memoize it to reduce repeated calls
+  # when length(x) is large enough to outweigh the overhead required by memoize.
+  # For length(x) < 800, memoize is slower
+  # See detail here https://github.com/poissonconsulting/extras/issues/63
+  if (memoize && length(x) >= 800) {
+    lgamma_size_x <- lgamma_size_x(size, x)
+  } else {
+    lgamma_size_x <- lgamma(size + 1) - lgamma(x + 1) - lgamma(size - x + 1)
+  }
+  lbeta_binom <- lgamma_size_x +
     lgamma(x + alpha) + lgamma(size - x + beta) - lgamma(size + alpha + beta) +
     lgamma(alpha + beta) - lgamma(alpha) - lgamma(beta)
-  bol <- !is.na(x + size + prob + theta)
-  lbeta_binom[bol & ((x == 0 & prob == 0) | (x == size & prob == 1))] <- 0
-  lbeta_binom[bol & x != 0 & prob == 0] <- -Inf
-  lbeta_binom[bol & x != size & prob == 1] <- -Inf
-  lbeta_binom[bol & x > size] <- -Inf
-  bol_theta <- !is.na(theta)
-  lbeta_binom[bol_theta & theta < 0] <- NaN
-  use_binom <- bol_theta & theta == 0
-  if (any(use_binom)) {
-    lbinom <- log_lik_binom(x, size = size, prob = prob)
-    lbeta_binom[use_binom] <- lbinom[use_binom]
-  }
-  if (length(bol) == 0) {
-    lbeta_binom <- numeric(0)
+
+  args_na <- is.na(x + size + prob + theta)
+  length_args_na <- length(args_na)
+  if (length_args_na == 1) {
+    if (args_na) {
+      return(NA_real_)
+    }
+    if (prob == 0) {
+      if (x == 0) {
+        lbeta_binom <- 0
+      } else {
+        lbeta_binom <- -Inf
+      }
+    } else if (prob == 1) {
+      if (x == size) {
+        lbeta_binom <- 0
+      } else {
+        lbeta_binom <- -Inf
+      }
+    }
+    if (x > size) {
+      lbeta_binom <- -Inf
+    }
+    if (theta == 0) {
+      lbeta_binom <- log_lik_binom(x = x, size = size, prob = prob)
+    } else if (theta < 0) {
+      lbeta_binom <- NaN
+    }
+    lbeta_binom
+  } else if (length_args_na == 0) {
+    numeric(0)
+  } else {
+    args_not_na <- !args_na
+    lbeta_binom[args_not_na & ((x == 0 & prob == 0) | (x == size & prob == 1))] <- 0
+    lbeta_binom[args_not_na & x != 0 & prob == 0] <- -Inf
+    lbeta_binom[args_not_na & x != size & prob == 1] <- -Inf
+    lbeta_binom[args_not_na & x > size] <- -Inf
+    theta_not_na <- !is.na(theta)
+    lbeta_binom[theta_not_na & theta < 0] <- NaN
+    use_binom <- theta_not_na & theta == 0
+    if (any(use_binom)) {
+      lbinom <- log_lik_binom(x, size = size, prob = prob)
+      lbeta_binom[use_binom] <- lbinom[use_binom]
+    }
+    lbeta_binom
   }
-  lbeta_binom
+}
+
+# Function to memoize (called repeatedly for non-changing values of size and x)
+lgamma_size_x <- function(size, x) {
+  lgamma(size + 1) - lgamma(x + 1) - lgamma(size - x + 1)
 }
 
 #' Bernoulli Log-Likelihood

diff --git a/R/zzz.R b/R/zzz.R
@@ -0,0 +1,5 @@
+.onLoad <- function(libname, pkgname) {
+  if (requireNamespace("memoise", quietly = TRUE)) {
+    lgamma_size_x <<- memoise::memoise(lgamma_size_x)
+  }
+}
diff --git a/README.Rmd b/README.Rmd
@@ -13,6 +13,7 @@ knitr::opts_chunk$set(
 )
 ```
 
+
 # extras <a href="https://poissonconsulting.github.io/extras/"><img src="man/figures/logo.png" align="right" height="138" alt="extras website" /></a>
 
 <!-- badges: start -->
@@ -32,19 +33,6 @@ calculate deviance residuals as well as R translations of some
 BUGS (Bayesian Using Gibbs Sampling), JAGS (Just Another Gibbs
 Sampler), STAN and TMB (Template Model Builder) functions.
 
-## Installation
-
-<!-- To install the latest release from [CRAN](https://cran.r-project.org) -->
-```{r, eval=FALSE, echo=FALSE}
-install.packages("extras")
-```
-
-To install the developmental version from [GitHub](https://github.com/poissonconsulting/extras)
-```{r, eval=FALSE}
-# install.packages("pak")
-pak::pak("poissonconsulting/extras")
-```
-
 ## Demonstration
 
 ### Summarise MCMC Samples
@@ -67,6 +55,7 @@ svalue(rnorm(100, mean = 3))
 Implemented distributions with functions to draw random samples, calculate log-likelihoods, and calculate deviance residuals for include:
 
 - Bernoulli
+- Binomial
 - Beta-binomial
 - Gamma
 - Gamma-Poisson
@@ -106,6 +95,37 @@ numericise(
 )
 ```
 
+## Installation
+
+
+## Information
+
+For more information see the [Get Started](https://poissonconsulting.github.io/chk/articles/chk.html) vignette.
+
+## Installation
+
+### Release
+
+To install the release version from [CRAN](https://CRAN.R-project.org/package=extras).
+```r
+install.packages("extras")
+```
+
+The website for the release version is at <https://poissonconsulting.github.io/extras/>.
+
+### Development
+
+To install the development version from [GitHub](https://github.com/poissonconsulting/extras)
+```r
+# install.packages("remotes")
+remotes::install_github("poissonconsulting/extras")
+```
+
+or from [r-universe](https://poissonconsulting.r-universe.dev/extras).
+```r
+install.packages("extras", repos = c("https://poissonconsulting.r-universe.dev", "https://cloud.r-project.org"))
+```
+
 ## References
 
 Greenland, S. 2019. Valid P-Values Behave Exactly as They Should: Some Misleading Criticisms of P-Values and Their Resolution With S-Values. The American Statistician 73(sup1): 106–114.
@@ -120,4 +140,3 @@ Please report any [issues](https://github.com/poissonconsulting/extras/issues).
 
 Please note that the extras project is released with a [Contributor Code of Conduct](https://contributor-covenant.org/version/2/0/CODE_OF_CONDUCT.html). 
 By contributing to this project, you agree to abide by its terms.
-