Skip to content

Commit

Permalink
- calc_wqis now calculates 95% bootstrap confidence intervals by re…
Browse files Browse the repository at this point in the history
…sampling with

replacement on rows for 10^3 replicates
  • Loading branch information
joethorley committed Feb 11, 2015
1 parent e868279 commit 2950263
Show file tree
Hide file tree
Showing 5 changed files with 74 additions and 57 deletions.
6 changes: 3 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
Package: wqbc
Type: Package
Title: Water Quality Thresholds and Index Calculation for British Columbia
Version: 0.0.0.9005
Date: 2015-01-31
Version: 0.0.0.9006
Date: 2015-02-10
Authors@R: as.person(c(
"Joe Thorley <[email protected]> [aut, cre]",
"Colin Millar <[email protected]> [aut]",
"Andy Teucher [aut]",
"Wendy Wang [ctb]",
"Andy Teucher [ctb]",
"Stephanie Hazlitt [ctb]",
"Robyn Irvine [ctb]"))
Description: Calculates water quality thresholds and water quality indices
Expand Down
5 changes: 5 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# wqbc 0.0.0.9006

- `calc_wqis` now calculates 95% bootstrap confidence intervals by resampling with
replacement on rows for 10^3 replicates

# wqbc 0.0.0.9005

- Released new version number as wqdata now dependent
Expand Down
83 changes: 56 additions & 27 deletions R/calc-wqi.R
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,46 @@ bootstrap_wqis_column <- function (x, R) {
boot::boot(data = x, statistic = resample_wqi_column, R = R, strata = x$Variable)
}

boot_wqis <- function (x, ci, cesi_code) {
resample_wqi_tidy <- function (x, i, nt = nt, nv = nv) {
x <- x[i,,drop = FALSE]
wqif_excursion_variable(x = x$Excursion, v = x$Variable, nt = nt, nv = nv)["WQI"]
}

bootstrap_wqis_tidy <- function (x, R) {
stopifnot(!any(is.na(x$Excursion)))

nt <- nrow(x)
nv <- length(unique(x$Variable))

x <- x[x$Variable %in% x$Variable[x$Excursion != 0],,drop = FALSE]

if(!nrow(x))
return (c(100, 100))

x$Variable <- factor(x$Variable)

boot::boot(data = x, statistic = resample_wqi_tidy, R = R,
strata = x$Variable, nt = nt, nv = nv)
}

lower_upper <- function (x) {
round(quantile(x$t, c(0.025, 0.975)), 1)
}

boot_wqis <- function (x) {
ci <- "row"
cesi_code <- FALSE

stopifnot(is.flag(cesi_code) & noNA(cesi_code))
stopifnot(is.string(ci) & noNA(ci))
stopifnot(ci %in% c("row", "column", "tidy"))

R <- 10^3

if(ci == "tidy") {
boot <- bootstrap_wqis_tidy(x, R = R)
return (lower_upper(boot))
}

if (cesi_code) {
x <- dplyr::mutate_(x, Excursion = ~Excursion + 1)
Expand All @@ -90,19 +129,16 @@ boot_wqis <- function (x, ci, cesi_code) {
x <- dplyr::select_(x, ~-Date)
x <- as.matrix(x)

R <- 10^3

if(cesi_code) {
stopifnot(ci %in% c("row", "column"))
boot <- BootCICol(x, Column = (ci == "column"), Par = 4, CL = 0.95, R = R)
names(boot) <- NULL
} else {

if(ci == "row") {
boot <- boot::boot(x, statistic = wqi_matrix, R = R)
} else {
} else # ci == column
boot <- bootstrap_wqis_column(x, R = R)
}
boot <- round(quantile(boot$t, c(0.025, 0.975)),1)
boot <- lower_upper(boot)
}
boot
}
Expand All @@ -117,7 +153,7 @@ fourtimesfour <- function (x) {
nrow(x) >= 4
}

calc_wqi_by <- function (x, ci, cesi_code, messages) {
calc_wqi_by <- function (x, messages) {

x$Excursion <- get_excursions(x$Value, x$LowerLimit, x$UpperLimit)
check_excursions(x)
Expand All @@ -127,11 +163,7 @@ calc_wqi_by <- function (x, ci, cesi_code, messages) {
nv <- length(unique(x$Variable))
wqi <- wqif_excursion_variable(x = x$Excursion, v = x$Variable, nt = nt, nv = nv)

if(ci == "none") {
boot <- rep(NA_real_,2)
} else {
boot <- boot_wqis(x, ci = ci, cesi_code = cesi_code)
}
boot <- boot_wqis(x)

wqi <- data.frame(WQI = round(wqi["WQI"], 1), Lower = boot[1], Upper = boot[2],
Category = categorize_wqi(wqi["WQI"]),
Expand Down Expand Up @@ -172,31 +204,28 @@ set_detection_limits <- function (x, messages) {
#' and it will not be possible to calculate the WQI. In this case \code{calc_wqi}
#' throws an informative error. Finally it is important to note that in order
#' for the WQI to be calculated the data set must include four variables each
#' with non-missing values on at least four separate days.
#' with non-missing values on at least four separate days. In addition
#' to calculating the WQI the function also generates
#' 95% Lower and Upper bootstrap confidence intervals about the WQI. The confidence
#' intervals are generated by first spreading the data into wide format where each
#' row represents one date and then resampling the rows with replacement to
#' generate 1,000 replicates. The confidence intervals are the 2.5% and 97.5%
#' percentiles of the replicate WQIs.
#'
#' @param x A data.frame to calculate the WQI for.
#' @param by An optional character vector of the columns in x to calculate the WQI by.
#' @param ci A string indicating whether to calculate 95% bootstrap confidence
#' intervals by "row" or "column" or "none". Currently the confidence intervals are
#' considered unreliable and should not be used.
#' @param cesi_code A temporary flag indicating whether to use wqbc or cesi code to calculate the
#' confidence intervals. This argument should be removed prior to release of the package.
#' @param messages A flag indicating whether to print messages.
#' @examples
#' data(ccme)
#' calc_wqi(ccme, messages = TRUE)
#' calc_wqi(ccme, by = "Date", messages = TRUE)
#' @seealso \code{\link{calc_limits}} and \code{\link{wqbc}}
#' @export
calc_wqi <- function (x, by = NULL, ci = "row", cesi_code = FALSE,
messages = getOption("wqbc.messages", default = TRUE)) {
calc_wqi <- function (x, by = NULL,
messages = getOption("wqbc.messages", default = TRUE)) {
assert_that(is.data.frame(x))
assert_that(is.null(by) || (is.character(by) && noNA(by)))
assert_that(is.flag(messages) && noNA(messages))
assert_that(is.string(ci))

ci <- tolower(ci)
if(!ci %in% c("row", "column", "none")) stop("ci must be \"row\", \"column\", or \"none\"")

check_rows(x)
if(!any(c("LowerLimit", "UpperLimit") %in% colnames(x)))
Expand Down Expand Up @@ -239,9 +268,9 @@ calc_wqi <- function (x, by = NULL, ci = "row", cesi_code = FALSE,
check_rows(x)

if(is.null(by)) {
x <- calc_wqi_by(x, ci = ci, cesi_code = cesi_code, messages = messages)
x <- calc_wqi_by(x, messages = messages)
} else {
x <- plyr::ddply(x, .variables = by, .fun = calc_wqi_by, ci = ci, cesi_code = cesi_code,
x <- plyr::ddply(x, .variables = by, .fun = calc_wqi_by,
messages = messages)
}
if(messages) message("Calculated water quality indices.")
Expand Down
19 changes: 9 additions & 10 deletions man/calc_wqi.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -4,21 +4,14 @@
\alias{calc_wqi}
\title{Calculate Water Quality Index (WQI)}
\usage{
calc_wqi(x, by = NULL, ci = "row", cesi_code = FALSE,
messages = getOption("wqbc.messages", default = TRUE))
calc_wqi(x, by = NULL, messages = getOption("wqbc.messages", default =
TRUE))
}
\arguments{
\item{x}{A data.frame to calculate the WQI for.}

\item{by}{An optional character vector of the columns in x to calculate the WQI by.}

\item{ci}{A string indicating whether to calculate 95% bootstrap confidence
intervals by "row" or "column" or "none". Currently the confidence intervals are
considered unreliable and should not be used.}

\item{cesi_code}{A temporary flag indicating whether to use wqbc or cesi code to calculate the
confidence intervals. This argument should be removed prior to release of the package.}

\item{messages}{A flag indicating whether to print messages.}
}
\description{
Expand All @@ -36,7 +29,13 @@ the variable has lower limits because otherwise the excursion will be infinity
and it will not be possible to calculate the WQI. In this case \code{calc_wqi}
throws an informative error. Finally it is important to note that in order
for the WQI to be calculated the data set must include four variables each
with non-missing values on at least four separate days.
with non-missing values on at least four separate days. In addition
to calculating the WQI the function also generates
95% Lower and Upper bootstrap confidence intervals about the WQI. The confidence
intervals are generated by first spreading the data into wide format where each
row represents one date and then resampling the rows with replacement to
generate 1,000 replicates. The confidence intervals are the 2.5% and 97.5%
percentiles of the replicate WQIs.
}
\examples{
data(ccme)
Expand Down
18 changes: 1 addition & 17 deletions tests/testthat/test-calc-wqi.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ test_that("ccme", {

data(ccme)

x <- calc_wqi(ccme, ci = "column")
x <- calc_wqi(ccme)

expect_is(x, "data.frame")
expect_equal(nrow(x), 1)
Expand Down Expand Up @@ -84,19 +84,3 @@ test_that("calc_wqi zero values", {
ccme$DetectionLimit <- 1
expect_is(calc_wqi(ccme), "data.frame")
})

test_that("calc_wqi boot", {

opts <- options()
on.exit(options(opts))
options(wqbc.messages = FALSE)

data(ccme)

calc_wqi(ccme, ci = "column", cesi_code = FALSE)
calc_wqi(ccme, ci = "row", cesi_code = FALSE)
calc_wqi(ccme, ci = "column", cesi_code = TRUE)
calc_wqi(ccme, ci = "row", cesi_code = TRUE)
})


0 comments on commit 2950263

Please sign in to comment.