Skip to content

Commit

Permalink
Merge pull request #169 from poissonconsulting/summarize_bug
Browse files Browse the repository at this point in the history
Summarize bug
Talked with Ayla - decided to not spend budget at this time further editing. Will take Karly's suggestion into an issue for future addition if desired after EnMoDS comes online.
  • Loading branch information
HeatherGranger authored Feb 7, 2023
2 parents 6bf94da + c69ce87 commit d6172fc
Show file tree
Hide file tree
Showing 14 changed files with 129 additions and 74 deletions.
22 changes: 13 additions & 9 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,18 @@ Type: Package
Title: Tidy Water Quality Data and Calculate Thresholds for British Columbia
Version: 0.3.1.9003
Authors@R: c(
person("Joe", "Thorley", , "[email protected]", c("aut", "ctr"), comment = c(ORCID = "0000-0002-7683-4592")),
person("Colin", "Millar", , "[email protected]", c("aut", "ctr")),
person("Andy", "Teucher", , "[email protected]", c("aut", "cre")),
person("Sebastian", "Dalgarno", , "[email protected]", "ctb", comment = c(ORCID = "0000-0002-3658-4517")),
person("Wendy", "Wang", role = c("ctb", "ctr")),
person("Stephanie", "Hazlitt", , "[email protected]", "ctb"),
person("Robyn", "Irvine", role = c("ctb", "ctr")),
person("Province of British Columbia", role = "cph")
person("Joe", "Thorley", , "[email protected]", role = c("aut", "ctr"),
comment = c(ORCID = "0000-0002-7683-4592")),
person("Colin", "Millar", , "[email protected]", role = c("aut", "ctr")),
person("Andy", "Teucher", , "[email protected]", role = c("aut", "cre")),
person("Sebastian", "Dalgarno", , "[email protected]", role = "ctb",
comment = c(ORCID = "0000-0002-3658-4517")),
person("Wendy", "Wang", role = c("ctb", "ctr")),
person("Stephanie", "Hazlitt", , "[email protected]", role = "ctb"),
person("Robyn", "Irvine", role = c("ctb", "ctr")),
person("Province of British Columbia", role = "cph"),
person("Ayla", "Pearson", , "[email protected]", role = "ctb",
comment = c(ORCID = "0000-0001-7388-1222"))
)
Description: Tidies water quality data and calculates water quality thresholds
for British Columbia.
Expand Down Expand Up @@ -51,7 +55,7 @@ Remotes:
bcgov/rems,
bcgov/canwqdata
VignetteBuilder: knitr
RoxygenNote: 7.1.1
RoxygenNote: 7.2.3
Roxygen: list(markdown = TRUE)
Encoding: UTF-8
RdMacros: lifecycle
10 changes: 5 additions & 5 deletions R/calc-limits.R
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ abs_days_diff <- function(x, y) {

assign_30day_periods <- function(x, dates) {
if (!is.null(dates)) dates <- sort(unique(dates))
y <- unique(dplyr::select(x, .data$Date))
y <- unique(dplyr::select(x, "Date"))
y <- dplyr::arrange(y, .data$Date)
y$Period <- NA

Expand Down Expand Up @@ -224,9 +224,9 @@ calc_limits_by <- function(x, term, dates, limits, messages) {
}

if (!is.null(x$DetectionLimit)) {
x <- dplyr::select(x, .data$Date, .data$Variable, .data$Value, .data$UpperLimit, .data$DetectionLimit, .data$Units)
x <- dplyr::select(x, "Date", "Variable", "Value", "UpperLimit", "DetectionLimit", "Units")
} else {
x <- dplyr::select(x, .data$Date, .data$Variable, .data$Value, .data$UpperLimit, .data$Units)
x <- dplyr::select(x, "Date", "Variable", "Value", "UpperLimit", "Units")
}
x
}
Expand Down Expand Up @@ -292,10 +292,10 @@ calc_limits <- function(x, by = NULL, term = "long", dates = NULL, keep_limits =
messages = getOption("wqbc.messages", default = TRUE),
use = "Freshwater Life") {
chk_data(x)
chkor(chk_null(by), check_values(by, ""))
chk_null_or(by, vld = vld_character)
chk_string(term)
chk_subset(term, c("long", "short", "long-daily"))
chkor(chk_null(dates), check_values(dates, Sys.Date()))
chk_null_or(dates, vld = vld_date)
chk_flag(keep_limits)
chk_flag(delete_outliers)
chk_flag(estimate_variables)
Expand Down
2 changes: 1 addition & 1 deletion R/clean-wqdata.R
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ clean_wqdata <- function(x, by = NULL, max_cv = Inf,
FUN = mean) {

chk_data(x)
chkor(chk_null(by), check_values(by, ""))
chk_null_or(by, vld = vld_character)
chk_number(max_cv)
check_values(messages, TRUE)

Expand Down
6 changes: 4 additions & 2 deletions R/codes.R
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@ wqbc_codes <- function(compress = FALSE) {
#' @seealso \code{\link{expand_ems_codes}}
#' @export
compress_ems_codes <- function(x) {
chkor(chk_character(x), chk_s3_class(x, "factor"))

chkor_vld(vld_character(x), vld_s3_class(x, "factor"))

x <- as.character(x)
x <- gsub("[_]", "-", x)
Expand All @@ -52,7 +53,8 @@ compress_ems_codes <- function(x) {
#' @seealso \code{\link{compress_ems_codes}}
#' @export
expand_ems_codes <- function(x) {
chkor(chk_character(x), chk_s3_class(x, "factor"))

chkor_vld(vld_character(x), vld_s3_class(x, "factor"))

x <- as.character(x)
x <- gsub("[-]", "_", x)
Expand Down
4 changes: 2 additions & 2 deletions R/estimate-variable-values.R
Original file line number Diff line number Diff line change
Expand Up @@ -66,15 +66,15 @@ estimate_variable_values_by <- function(x, messages) {
}

# remove working columns
x %<>% dplyr::select(-.data$yday, -.data$day)
x %<>% dplyr::select(-"yday", -"day")
}
x
}

estimate_variable_values <- function(data, by = NULL, variables = estimated_variables(),
messages = getOption("wqbc.messages", default = TRUE)) {
check_data(data, values = list(Date = Sys.Date(), Variable = "", Value = c(1, NA), Units = ""))
chkor(chk_null(by), check_values(by, ""))
chk_null_or(by, vld = vld_character)
check_values(variables, "")
if (!all(variables %in% estimated_variables())) error("Unrecognized variables")
chk_flag(messages)
Expand Down
16 changes: 8 additions & 8 deletions R/lookup.R
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ lookup_variables <- function(
return(wqbc_codes()$Variable)
}

chkor(chk_character(codes), chk_s3_class(codes, "factor"))
chkor_vld(vld_character(codes), vld_s3_class(codes, "factor"))
codes <- as.character(codes)
codes <- compress_ems_codes(codes)
d <- dplyr::left_join(data.frame(Code = codes, stringsAsFactors = FALSE),
Expand Down Expand Up @@ -124,11 +124,11 @@ setup_codes <- function() {
codes <- wqbc_codes()
codes$Date <- as.Date("2000-01-01")
codes$Value <- 1
dplyr::select(codes, .data$Date, .data$Variable, .data$Value, .data$Units)
dplyr::select(codes, "Date", "Variable", "Value", "Units")
}

tidyup_limits <- function(x) {
x <- dplyr::select(x, .data$Variable, .data$UpperLimit, .data$Units)
x <- dplyr::select(x, "Variable", "UpperLimit", "Units")
x$Variable <- factor(x$Variable, levels = lookup_variables())
x$Units <- factor(x$Units, levels = lookup_units())
x <- dplyr::arrange(x, .data$Variable)
Expand All @@ -139,7 +139,7 @@ add_missing_limits <- function(x, term) {
limits <- wqbc_limits()
limits <- dplyr::filter(limits, tolower(.data$Term) == tolower(term))
limits <- dplyr::filter(limits, !.data$Variable %in% x$Variable)
limits <- dplyr::select(limits, .data$Variable, .data$Units)
limits <- dplyr::select(limits, "Variable", "Units")
if (!nrow(limits)) {
return(x)
}
Expand Down Expand Up @@ -169,10 +169,10 @@ add_missing_limits <- function(x, term) {
lookup_limits <- function(ph = NULL, hardness = NULL, chloride = NULL,
methyl_mercury = NULL, term = "long",
use = "Freshwater Life") {
chkor(chk_null(ph), check_values(ph, 1))
chkor(chk_null(hardness), check_values(hardness, 1))
chkor(chk_null(chloride), check_values(chloride, 1))
chkor(chk_null(methyl_mercury), check_values(methyl_mercury, 1))
chk_null_or(ph, vld = vld_double)
chk_null_or(hardness, vld = vld_double)
chk_null_or(chloride, vld = vld_double)
chk_null_or(methyl_mercury, vld = vld_double)
chk_string(term)

term <- tolower(term)
Expand Down
11 changes: 5 additions & 6 deletions R/plot.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,19 +18,19 @@ plot_timeseries_by <- function(data, title = NULL, y0, size, messages) {
data$Detected %<>% factor(levels = c(TRUE, FALSE))
data$Outlier %<>% factor(levels = c(TRUE, FALSE))

gp <- ggplot2::ggplot(data, ggplot2::aes_string(x = "Date", y = "Value"))
gp <- ggplot2::ggplot(data, ggplot2::aes(x = .data$Date, y = .data$Value))

if (!is.null(title)) gp <- gp + ggplot2::ggtitle(title)

if (any(!is.na(data$Outlier))) {
if (any(!is.na(data$Detected))) {
gp <- gp + ggplot2::geom_point(ggplot2::aes_string(color = "Outlier", alpha = "Detected"), size = size)
gp <- gp + ggplot2::geom_point(ggplot2::aes(color = .data$Outlier, alpha = .data$Detected), size = size)
} else {
gp <- gp + ggplot2::geom_point(ggplot2::aes_string(color = "Outlier"), size = size)
gp <- gp + ggplot2::geom_point(ggplot2::aes(color = .data$Outlier), size = size)
}
} else {
if (any(!is.na(data$Detected))) {
gp <- gp + ggplot2::geom_point(ggplot2::aes_string(alpha = "Detected"), size = size)
gp <- gp + ggplot2::geom_point(ggplot2::aes(alpha = .data$Detected), size = size)
} else {
gp <- gp + ggplot2::geom_point(size = size)
}
Expand Down Expand Up @@ -69,8 +69,7 @@ plot_timeseries_fun <- function(data, by, y0, size, messages) {
#' plot_timeseries(ccme, by = "Variable")
plot_timeseries <- function(data, by = NULL, y0 = TRUE, size = 1,
messages = getOption("wqbc.messages", default = TRUE)) {
chkor(chk_null(by), check_values(by, ""))

chk_null_or(by, vld = vld_character)
chk_flag(y0)
chk_flag(messages)

Expand Down
5 changes: 3 additions & 2 deletions R/substitute.R
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,8 @@ wqbc_substitute <- function(org, mod = org, sub, sub_mod = sub, messages) {
#' @export
substitute_units <- function(
x, messages = getOption("wqbc.messages", default = TRUE)) {
chkor(chk_character(x), chk_s3_class(x, "factor"))

chkor_vld(vld_character(x), vld_s3_class(x, "factor"))
check_values(messages, TRUE)

x <- as.character(x)
Expand Down Expand Up @@ -140,7 +141,7 @@ substitute_units <- function(
substitute_variables <- function(
x, strict = TRUE, messages = getOption("wqbc.messages", default = TRUE)) {

chkor(chk_character(x), chk_s3_class(x, "factor"))
chkor_vld(vld_character(x), vld_s3_class(x, "factor"))
check_values(strict, TRUE)
check_values(messages, TRUE)

Expand Down
48 changes: 31 additions & 17 deletions R/summarise-wqdata.R
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,6 @@ summarise_wqdata_by <- function(x, censored, na.rm, conf_level, quan_range) {
if(any(is.na(x$Value))) {
return(summarise_missing_values(x, censored))
}

# get min and max before censored values altered
min <- min(x$Value)
max <- max(x$Value)
Expand All @@ -78,23 +77,38 @@ summarise_wqdata_by <- function(x, censored, na.rm, conf_level, quan_range) {
return(summarise_zero_values(x, censored))
}
ml <- with(x, cenmle(Value, Censored, dist = "lognormal", conf.int = conf_level))
est <- try(mean(ml), silent = TRUE)

est <- mean(ml)
quantiles <- quantile(ml, c((1-quan_range)/2, quan_range + (1-quan_range)/2))

tibble::tibble(
n = nrow(x),
ncen = sum(x$Censored),
min = min,
max = max,
mean = est[["mean"]],
median = median(ml),
lowerQ = quantiles[[1]],
upperQ = quantiles[[2]],
sd = sd(ml),
se = est[["se"]],
lowerCL = est[[3]],
upperCL = est[[4]])
if (!is_try_error(est)) {
quantiles <- quantile(ml, c((1-quan_range)/2, quan_range + (1-quan_range)/2))
tibble::tibble(
n = nrow(x),
ncen = sum(x$Censored),
min = min,
max = max,
mean = est[["mean"]],
median = median(ml),
lowerQ = quantiles[[1]],
upperQ = quantiles[[2]],
sd = sd(ml),
se = est[["se"]],
lowerCL = est[[3]],
upperCL = est[[4]])
} else {
tibble::tibble(
n = nrow(x),
ncen = sum(x$Censored),
min = min,
max = max,
mean = NA_real_,
median = NA_real_,
lowerQ = NA_real_,
upperQ = NA_real_,
sd = NA_real_,
se = NA_real_,
lowerCL = NA_real_,
upperCL = NA_real_)
}
}

summarise_wqdata_norows <- function(x, by) {
Expand Down
18 changes: 9 additions & 9 deletions R/test-trends.R
Original file line number Diff line number Diff line change
Expand Up @@ -91,19 +91,19 @@ test_trends <- function(data, breaks = NULL, FUN = "median", messages = getOptio
Value = c(1, NA)))

# keep only relevant columns
data %<>% dplyr::select(.data$Station, .data$Date, .data$Variable, .data$Value, .data$Units)
data %<>% dplyr::select("Station", "Date", "Variable", "Value", "Units")

# nest for analysis
data %<>% tidyr::nest(Data = c(.data$Date, .data$Value))
data %<>% tidyr::nest(Data = c("Date", "Value"))

# fit trends
data %<>% dplyr::mutate(Trend = purrr::map(.data$Data, do_test_trends,
breaks = breaks, FUN = FUN
))

# unnest and return
data %<>% tidyr::unnest(.data$Trend)
data %<>% dplyr::select(-.data$Data)
data %<>% tidyr::unnest("Trend")
data %<>% dplyr::select(-"Data")
tibble::as_tibble(data)
}

Expand Down Expand Up @@ -173,21 +173,21 @@ summarise_for_trends <- function(data, breaks = NULL, FUN = "median",
Value = c(1, NA)))

# keep only relevant columns
data %<>% dplyr::select(.data$Station, .data$Date, .data$Variable, .data$Value, .data$Units)
data %<>% dplyr::select("Station", "Date", "Variable", "Value", "Units")

# nest for analysis
data %<>% tidyr::nest(Data = c(.data$Date, .data$Value))
data %<>% tidyr::nest(Data = c("Date", "Value"))

# summarise
data %<>% dplyr::mutate(Summary = purrr::map(.data$Data, do_summarise_for_trends,
breaks = breaks, FUN = FUN
))

# unnest
data %<>% tidyr::unnest(.data$Summary)
data %<>% dplyr::select(-.data$Data)
data %<>% tidyr::unnest("Summary")
data %<>% dplyr::select(-"Data")

# gather and return
gather_cols <- setdiff(names(data), c("Station", "Variable", "Units", "Year"))
data %>% tidyr::pivot_longer(gather_cols, names_to = "Month", values_to = "Value")
data %>% tidyr::pivot_longer(tidyr::all_of(gather_cols), names_to = "Month", values_to = "Value")
}
2 changes: 2 additions & 0 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -129,3 +129,5 @@ is_color <- function(x) {
}
vapply(x, fun, TRUE)
}

is_try_error <- function(x) inherits(x, "try-error")
3 changes: 1 addition & 2 deletions man/wqbc-package.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit d6172fc

Please sign in to comment.