Merge pull request #169 from poissonconsulting/summarize_bug

Summarize bug Talked with Ayla - decided to not spend budget at this time further editing. Will take Karly's suggestion into an issue for future addition if desired after EnMoDS comes online.
bcgov · Feb 7, 2023 · d6172fc · d6172fc
2 parents 6bf94da + c69ce87
commit d6172fc
Show file tree

Hide file tree

Showing 14 changed files with 129 additions and 74 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -3,14 +3,18 @@ Type: Package
 Title: Tidy Water Quality Data and Calculate Thresholds for British Columbia
 Version: 0.3.1.9003
 Authors@R: c(
-    person("Joe", "Thorley", , "[email protected]", c("aut", "ctr"), comment = c(ORCID = "0000-0002-7683-4592")),
-	  person("Colin", "Millar", , "[email protected]", c("aut", "ctr")),
-	  person("Andy", "Teucher", , "[email protected]", c("aut", "cre")),
-    person("Sebastian", "Dalgarno", , "[email protected]", "ctb", comment = c(ORCID = "0000-0002-3658-4517")),
-	  person("Wendy", "Wang", role = c("ctb", "ctr")),
-	  person("Stephanie", "Hazlitt", , "[email protected]", "ctb"),
-	  person("Robyn", "Irvine", role = c("ctb", "ctr")),
-	  person("Province of British Columbia", role = "cph")
+    person("Joe", "Thorley", , "[email protected]", role = c("aut", "ctr"),
+           comment = c(ORCID = "0000-0002-7683-4592")),
+    person("Colin", "Millar", , "[email protected]", role = c("aut", "ctr")),
+    person("Andy", "Teucher", , "[email protected]", role = c("aut", "cre")),
+    person("Sebastian", "Dalgarno", , "[email protected]", role = "ctb",
+           comment = c(ORCID = "0000-0002-3658-4517")),
+    person("Wendy", "Wang", role = c("ctb", "ctr")),
+    person("Stephanie", "Hazlitt", , "[email protected]", role = "ctb"),
+    person("Robyn", "Irvine", role = c("ctb", "ctr")),
+    person("Province of British Columbia", role = "cph"),
+    person("Ayla", "Pearson", , "[email protected]", role = "ctb",
+           comment = c(ORCID = "0000-0001-7388-1222"))
   )
 Description: Tidies water quality data and calculates water quality thresholds 
     for British Columbia.
@@ -51,7 +55,7 @@ Remotes:
     bcgov/rems,
     bcgov/canwqdata
 VignetteBuilder: knitr
-RoxygenNote: 7.1.1
+RoxygenNote: 7.2.3
 Roxygen: list(markdown = TRUE)
 Encoding: UTF-8
 RdMacros: lifecycle
diff --git a/R/calc-limits.R b/R/calc-limits.R
@@ -132,7 +132,7 @@ abs_days_diff <- function(x, y) {
 
 assign_30day_periods <- function(x, dates) {
   if (!is.null(dates)) dates <- sort(unique(dates))
-  y <- unique(dplyr::select(x, .data$Date))
+  y <- unique(dplyr::select(x, "Date"))
   y <- dplyr::arrange(y, .data$Date)
   y$Period <- NA
 
@@ -224,9 +224,9 @@ calc_limits_by <- function(x, term, dates, limits, messages) {
   }
 
   if (!is.null(x$DetectionLimit)) {
-    x <- dplyr::select(x, .data$Date, .data$Variable, .data$Value, .data$UpperLimit, .data$DetectionLimit, .data$Units)
+    x <- dplyr::select(x, "Date", "Variable", "Value", "UpperLimit", "DetectionLimit", "Units")
   } else {
-    x <- dplyr::select(x, .data$Date, .data$Variable, .data$Value, .data$UpperLimit, .data$Units)
+    x <- dplyr::select(x, "Date", "Variable", "Value", "UpperLimit", "Units")
   }
   x
 }
@@ -292,10 +292,10 @@ calc_limits <- function(x, by = NULL, term = "long", dates = NULL, keep_limits =
                         messages = getOption("wqbc.messages", default = TRUE),
                         use = "Freshwater Life") {
   chk_data(x)
-  chkor(chk_null(by), check_values(by, ""))
+  chk_null_or(by, vld = vld_character)
   chk_string(term)
   chk_subset(term, c("long", "short", "long-daily"))
-  chkor(chk_null(dates), check_values(dates, Sys.Date()))
+  chk_null_or(dates, vld = vld_date)
   chk_flag(keep_limits)
   chk_flag(delete_outliers)
   chk_flag(estimate_variables)

diff --git a/R/clean-wqdata.R b/R/clean-wqdata.R
@@ -109,7 +109,7 @@ clean_wqdata <- function(x, by = NULL, max_cv = Inf,
                          FUN = mean) {
 
   chk_data(x)
-  chkor(chk_null(by), check_values(by, ""))
+  chk_null_or(by, vld = vld_character)
   chk_number(max_cv)
   check_values(messages, TRUE)
 

diff --git a/R/codes.R b/R/codes.R
@@ -33,7 +33,8 @@ wqbc_codes <- function(compress = FALSE) {
 #' @seealso \code{\link{expand_ems_codes}}
 #' @export
 compress_ems_codes <- function(x) {
-  chkor(chk_character(x), chk_s3_class(x, "factor"))
+
+  chkor_vld(vld_character(x), vld_s3_class(x, "factor"))
 
   x <- as.character(x)
   x <- gsub("[_]", "-", x)
@@ -52,7 +53,8 @@ compress_ems_codes <- function(x) {
 #' @seealso \code{\link{compress_ems_codes}}
 #' @export
 expand_ems_codes <- function(x) {
-  chkor(chk_character(x), chk_s3_class(x, "factor"))
+
+  chkor_vld(vld_character(x), vld_s3_class(x, "factor"))
 
   x <- as.character(x)
   x <- gsub("[-]", "_", x)

diff --git a/R/estimate-variable-values.R b/R/estimate-variable-values.R
@@ -66,15 +66,15 @@ estimate_variable_values_by <- function(x, messages) {
     }
 
     # remove working columns
-    x %<>% dplyr::select(-.data$yday, -.data$day)
+    x %<>% dplyr::select(-"yday", -"day")
   }
   x
 }
 
 estimate_variable_values <- function(data, by = NULL, variables = estimated_variables(),
                                      messages = getOption("wqbc.messages", default = TRUE)) {
   check_data(data, values = list(Date = Sys.Date(), Variable = "", Value = c(1, NA), Units = ""))
-  chkor(chk_null(by), check_values(by, ""))
+  chk_null_or(by, vld = vld_character)
   check_values(variables, "")
   if (!all(variables %in% estimated_variables())) error("Unrecognized variables")
   chk_flag(messages)

diff --git a/R/lookup.R b/R/lookup.R
@@ -92,7 +92,7 @@ lookup_variables <- function(
     return(wqbc_codes()$Variable)
   }
 
-  chkor(chk_character(codes), chk_s3_class(codes, "factor"))
+  chkor_vld(vld_character(codes), vld_s3_class(codes, "factor"))
   codes <- as.character(codes)
   codes <- compress_ems_codes(codes)
   d <- dplyr::left_join(data.frame(Code = codes, stringsAsFactors = FALSE),
@@ -124,11 +124,11 @@ setup_codes <- function() {
   codes <- wqbc_codes()
   codes$Date <- as.Date("2000-01-01")
   codes$Value <- 1
-  dplyr::select(codes, .data$Date, .data$Variable, .data$Value, .data$Units)
+  dplyr::select(codes, "Date", "Variable", "Value", "Units")
 }
 
 tidyup_limits <- function(x) {
-  x <- dplyr::select(x, .data$Variable, .data$UpperLimit, .data$Units)
+  x <- dplyr::select(x, "Variable", "UpperLimit", "Units")
   x$Variable <- factor(x$Variable, levels = lookup_variables())
   x$Units <- factor(x$Units, levels = lookup_units())
   x <- dplyr::arrange(x, .data$Variable)
@@ -139,7 +139,7 @@ add_missing_limits <- function(x, term) {
   limits <- wqbc_limits()
   limits <- dplyr::filter(limits, tolower(.data$Term) == tolower(term))
   limits <- dplyr::filter(limits, !.data$Variable %in% x$Variable)
-  limits <- dplyr::select(limits, .data$Variable, .data$Units)
+  limits <- dplyr::select(limits, "Variable", "Units")
   if (!nrow(limits)) {
     return(x)
   }
@@ -169,10 +169,10 @@ add_missing_limits <- function(x, term) {
 lookup_limits <- function(ph = NULL, hardness = NULL, chloride = NULL,
                           methyl_mercury =  NULL, term = "long",
                           use = "Freshwater Life") {
-  chkor(chk_null(ph), check_values(ph, 1))
-  chkor(chk_null(hardness), check_values(hardness, 1))
-  chkor(chk_null(chloride), check_values(chloride, 1))
-  chkor(chk_null(methyl_mercury), check_values(methyl_mercury, 1))
+  chk_null_or(ph, vld = vld_double)
+  chk_null_or(hardness, vld = vld_double)
+  chk_null_or(chloride, vld = vld_double)
+  chk_null_or(methyl_mercury, vld = vld_double)
   chk_string(term)
 
   term <- tolower(term)

diff --git a/R/plot.R b/R/plot.R
@@ -18,19 +18,19 @@ plot_timeseries_by <- function(data, title = NULL, y0, size, messages) {
   data$Detected %<>% factor(levels = c(TRUE, FALSE))
   data$Outlier %<>% factor(levels = c(TRUE, FALSE))
 
-  gp <- ggplot2::ggplot(data, ggplot2::aes_string(x = "Date", y = "Value"))
+  gp <- ggplot2::ggplot(data, ggplot2::aes(x = .data$Date, y = .data$Value))
 
   if (!is.null(title)) gp <- gp + ggplot2::ggtitle(title)
 
   if (any(!is.na(data$Outlier))) {
     if (any(!is.na(data$Detected))) {
-      gp <- gp + ggplot2::geom_point(ggplot2::aes_string(color = "Outlier", alpha = "Detected"), size = size)
+      gp <- gp + ggplot2::geom_point(ggplot2::aes(color = .data$Outlier, alpha = .data$Detected), size = size)
     } else {
-      gp <- gp + ggplot2::geom_point(ggplot2::aes_string(color = "Outlier"), size = size)
+      gp <- gp + ggplot2::geom_point(ggplot2::aes(color = .data$Outlier), size = size)
     }
   } else {
     if (any(!is.na(data$Detected))) {
-      gp <- gp + ggplot2::geom_point(ggplot2::aes_string(alpha = "Detected"), size = size)
+      gp <- gp + ggplot2::geom_point(ggplot2::aes(alpha = .data$Detected), size = size)
     } else {
       gp <- gp + ggplot2::geom_point(size = size)
     }
@@ -69,8 +69,7 @@ plot_timeseries_fun <- function(data, by, y0, size, messages) {
 #' plot_timeseries(ccme, by = "Variable")
 plot_timeseries <- function(data, by = NULL, y0 = TRUE, size = 1,
                             messages = getOption("wqbc.messages", default = TRUE)) {
-  chkor(chk_null(by), check_values(by, ""))
-
+  chk_null_or(by, vld = vld_character)
   chk_flag(y0)
   chk_flag(messages)
 

diff --git a/R/substitute.R b/R/substitute.R
@@ -96,7 +96,8 @@ wqbc_substitute <- function(org, mod = org, sub, sub_mod = sub, messages) {
 #' @export
 substitute_units <- function(
   x, messages = getOption("wqbc.messages", default = TRUE)) {
-  chkor(chk_character(x), chk_s3_class(x, "factor"))
+
+  chkor_vld(vld_character(x), vld_s3_class(x, "factor"))
   check_values(messages, TRUE)
 
   x <- as.character(x)
@@ -140,7 +141,7 @@ substitute_units <- function(
 substitute_variables <- function(
   x, strict = TRUE, messages = getOption("wqbc.messages", default = TRUE)) {
 
-  chkor(chk_character(x), chk_s3_class(x, "factor"))
+  chkor_vld(vld_character(x), vld_s3_class(x, "factor"))
   check_values(strict, TRUE)
   check_values(messages, TRUE)
 

diff --git a/R/summarise-wqdata.R b/R/summarise-wqdata.R
@@ -63,7 +63,6 @@ summarise_wqdata_by <- function(x, censored, na.rm, conf_level, quan_range) {
   if(any(is.na(x$Value))) {
     return(summarise_missing_values(x, censored))
   }
-
   # get min and max before censored values altered
   min <- min(x$Value)
   max <- max(x$Value)
@@ -78,23 +77,38 @@ summarise_wqdata_by <- function(x, censored, na.rm, conf_level, quan_range) {
     return(summarise_zero_values(x, censored))
   }
   ml <- with(x, cenmle(Value, Censored, dist = "lognormal", conf.int = conf_level))
+  est <- try(mean(ml), silent = TRUE)
 
-  est <- mean(ml)
-  quantiles <- quantile(ml,  c((1-quan_range)/2, quan_range + (1-quan_range)/2))
-
-  tibble::tibble(
-    n = nrow(x),
-    ncen = sum(x$Censored),
-    min = min,
-    max = max,
-    mean = est[["mean"]],
-    median = median(ml),
-    lowerQ = quantiles[[1]],
-    upperQ = quantiles[[2]],
-    sd = sd(ml),
-    se = est[["se"]],
-    lowerCL = est[[3]],
-    upperCL = est[[4]])
+  if (!is_try_error(est)) {
+    quantiles <- quantile(ml,  c((1-quan_range)/2, quan_range + (1-quan_range)/2))
+    tibble::tibble(
+      n = nrow(x),
+      ncen = sum(x$Censored),
+      min = min,
+      max = max,
+      mean = est[["mean"]],
+      median = median(ml),
+      lowerQ = quantiles[[1]],
+      upperQ = quantiles[[2]],
+      sd = sd(ml),
+      se = est[["se"]],
+      lowerCL = est[[3]],
+      upperCL = est[[4]])
+  } else {
+    tibble::tibble(
+      n = nrow(x),
+      ncen = sum(x$Censored),
+      min = min,
+      max = max,
+      mean = NA_real_,
+      median = NA_real_,
+      lowerQ = NA_real_,
+      upperQ = NA_real_,
+      sd = NA_real_,
+      se = NA_real_,
+      lowerCL = NA_real_,
+      upperCL = NA_real_)
+  }
 }
 
 summarise_wqdata_norows <- function(x, by) {

diff --git a/R/test-trends.R b/R/test-trends.R
@@ -91,19 +91,19 @@ test_trends <- function(data, breaks = NULL, FUN = "median", messages = getOptio
     Value = c(1, NA)))
 
   # keep only relevant columns
-  data %<>% dplyr::select(.data$Station, .data$Date, .data$Variable, .data$Value, .data$Units)
+  data %<>% dplyr::select("Station", "Date", "Variable", "Value", "Units")
 
   # nest for analysis
-  data %<>% tidyr::nest(Data = c(.data$Date, .data$Value))
+  data %<>% tidyr::nest(Data = c("Date", "Value"))
 
   # fit trends
   data %<>% dplyr::mutate(Trend = purrr::map(.data$Data, do_test_trends,
     breaks = breaks, FUN = FUN
   ))
 
   # unnest and return
-  data %<>% tidyr::unnest(.data$Trend)
-  data %<>% dplyr::select(-.data$Data)
+  data %<>% tidyr::unnest("Trend")
+  data %<>% dplyr::select(-"Data")
   tibble::as_tibble(data)
 }
 
@@ -173,21 +173,21 @@ summarise_for_trends <- function(data, breaks = NULL, FUN = "median",
     Value = c(1, NA)))
 
   # keep only relevant columns
-  data %<>% dplyr::select(.data$Station, .data$Date, .data$Variable, .data$Value, .data$Units)
+  data %<>% dplyr::select("Station", "Date", "Variable", "Value", "Units")
 
   # nest for analysis
-  data %<>% tidyr::nest(Data = c(.data$Date, .data$Value))
+  data %<>% tidyr::nest(Data = c("Date", "Value"))
 
   # summarise
   data %<>% dplyr::mutate(Summary = purrr::map(.data$Data, do_summarise_for_trends,
     breaks = breaks, FUN = FUN
   ))
 
   # unnest
-  data %<>% tidyr::unnest(.data$Summary)
-  data %<>% dplyr::select(-.data$Data)
+  data %<>% tidyr::unnest("Summary")
+  data %<>% dplyr::select(-"Data")
 
   # gather and return
   gather_cols <- setdiff(names(data), c("Station", "Variable", "Units", "Year"))
-  data %>% tidyr::pivot_longer(gather_cols, names_to = "Month", values_to = "Value")
+  data %>% tidyr::pivot_longer(tidyr::all_of(gather_cols), names_to = "Month", values_to = "Value")
 }
diff --git a/R/utils.R b/R/utils.R
@@ -129,3 +129,5 @@ is_color <- function(x) {
   }
   vapply(x, fun, TRUE)
 }
+
+is_try_error <- function(x) inherits(x, "try-error")
diff --git a/man/wqbc-package.Rd b/man/wqbc-package.Rd
-Original file line number
+Diff line change
@@ Expand Up / @@ -129,3 +129,5 @@ is_color <- function(x) { @@
       }
       vapply(x, fun, TRUE)
     }
+    is_try_error <- function(x) inherits(x, "try-error")