diff --git a/DESCRIPTION b/DESCRIPTION index b56c8c0..2906e60 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -26,7 +26,7 @@ Imports: forcats, magrittr, ncdf4, - ncmeta (>= 0.2.0), + ncmeta (>= 0.3.6), purrr, RNetCDF (>= 1.9-1), rlang, diff --git a/R/hyper_array.R b/R/hyper_array.R index 7a5760c..7271504 100644 --- a/R/hyper_array.R +++ b/R/hyper_array.R @@ -18,7 +18,7 @@ #' By default all variables in the active grid are returned, use `select_var` to #' specify one or more desired variables. #' -#' The transforms are stored as a list of tables in an attribute `transforms``, +#' The transforms are stored as a list of tables in an attribute `transforms`, #' access these with [hyper_transforms()]. #' @param x NetCDF file, connection object, or [tidync] object #' @param drop collapse degenerate dimensions, defaults to `TRUE` @@ -85,8 +85,8 @@ hyper_array.tidync <- function(x, select_var = NULL, ..., ## hack to get the order of the indices of the dimension ordhack <- 1 + as.integer(unlist(strsplit(gsub("D", "", dplyr::filter(x$grid, .data$grid == active(x)) %>% - # dplyr::slice(1L) %>% THERE'S ONLY EVER ONE ACTIVE GRID - dplyr::pull(.data$grid)), ","))) + # dplyr::slice(1L) %>% THERE'S ONLY EVER ONE ACTIVE GRID + dplyr::pull(.data$grid)), ","))) dimension <- x[["dimension"]] %>% dplyr::slice(ordhack) ## ensure dimension is in order of the dims in these vars axis <- x[["axis"]] %>% dplyr::filter(variable %in% varname) @@ -104,7 +104,7 @@ hyper_array.tidync <- function(x, select_var = NULL, ..., if (length(select_var) < 1) stop("no select_var variables available") if (!isTRUE(getOption("tidync.silent"))) { warning(sprintf("some select_var variables not found, and ignored:\n %s", - paste(bad, collapse = ","))) + paste(bad, collapse = ","))) } } ## todo, make this quosic? @@ -112,22 +112,17 @@ hyper_array.tidync <- function(x, select_var = NULL, ..., } #browser() - opt <- getOption("tidync.large.data.check") - if (!isTRUE(opt)) { - opt <- FALSE - } - if (opt && (prod(dimension[["count"]]) * length(varnames)) * 4 > 1e9 && - interactive() && !force) { - message("please confirm data extraction, Y(es) to proceed ... use 'force = TRUE' to avoid size check\n ( see '?hyper_array')") - - mess <- sprintf("pretty big extraction, (%i*%i values [%s]*%i)", - as.integer(prod( COUNT)), length(varnames), - paste( COUNT, collapse = ", "), - length(varnames)) - yes <- utils::askYesNo(mess) - if (!yes) { - stop("extraction cancelled by user", call. = FALSE) -## return(invisible(NULL)) + if (interactive() && !force && prod(COUNT) * length(varnames) * 4 > 1e9) { + opt <- getOption("tidync.large.data.check") + if (!isTRUE(opt)) opt <- FALSE + if (opt) { + message("please confirm data extraction, Y(es) to proceed ... use 'force = TRUE' to avoid size check\n (see '?hyper_array')") + mess <- sprintf("pretty big extraction, (%1$.0f*%2$i values [%3$s]*%2$i)", + prod(COUNT), length(varnames), paste(COUNT, collapse = ", ")) + yes <- utils::askYesNo(mess) + if (!yes) { + stop("extraction cancelled by user", call. = FALSE) + } } } @@ -137,7 +132,7 @@ hyper_array.tidync <- function(x, select_var = NULL, ..., datalist <- lapply(varnames, function(vara) { ncdf4::ncvar_get(con, vara, start = START, count = COUNT, raw_datavals = raw_datavals, collapse_degen = FALSE) - }) + }) ## Get dimension names from the transforms. Use "timestamp" instead of "time" transforms <- active_axis_transforms(x) @@ -177,17 +172,15 @@ hyper_array.tidync <- function(x, select_var = NULL, ..., ## Drop any degenerate dimensions, if requested and needed if (drop && any(lengths(dn) == 1)) datalist <- lapply(datalist, drop) - structure(datalist, names = varnames, - transforms = transforms, + structure(datalist, names = varnames, transforms = transforms, source = x$source, class = "tidync_data") } #' @name hyper_array #' @export -hyper_array.character <- function(x, select_var = NULL, ..., +hyper_array.character <- function(x, select_var = NULL, ..., raw_datavals = FALSE, force = FALSE, drop = TRUE) { tidync(x) %>% - hyper_filter(...) %>% - hyper_array(select_var = select_var, - raw_datavals = raw_datavals, drop = drop) + hyper_filter(...) %>% + hyper_array(select_var = select_var, raw_datavals = raw_datavals, drop = drop) } diff --git a/R/hyper_tibble.R b/R/hyper_tibble.R index 0730fa4..2c4be2f 100644 --- a/R/hyper_tibble.R +++ b/R/hyper_tibble.R @@ -59,12 +59,20 @@ hyper_tibble.tidync<- function(x, ..., na.rm = TRUE, force = FALSE) { out <- tibble::as_tibble(lapply(slabs, as.vector)) prod_dims <- 1 - dn <- dimnames(slabs[[1]]) - nm <- names(dn) + trans <- attr(slabs, "transforms") - for (i in seq_along(nm)) { - out[[nm[i]]] <- rep(dn[[i]], each = prod_dims, length.out = total_prod) - prod_dims <- prod_dims * length(dn[[i]]) + for (i in seq_along(trans)) { + nm <- names(trans)[i] + nr <- sum(trans[[i]]$selected) + + out[[nm]] <- if ("timestamp" %in% colnames(trans[[i]])) + rep(dplyr::filter(trans[[nm]], .data$selected)[["timestamp"]], + each = prod_dims, length.out = total_prod) + else + rep(dplyr::filter(trans[[nm]], .data$selected)[[nm]], + each = prod_dims, length.out = total_prod) + + prod_dims <- prod_dims * nr } if (na.rm) out <- dplyr::filter(out, !all_na) out diff --git a/R/hyper_transforms.R b/R/hyper_transforms.R index bb3ebe4..c7b5ded 100644 --- a/R/hyper_transforms.R +++ b/R/hyper_transforms.R @@ -52,29 +52,27 @@ active_axis_transforms <- function(x, ...) { #' @export hyper_transforms.default <- function(x, all = FALSE, ...) { if (!all) return(active_axis_transforms(x, ...)) - grid <- x$grid - axis <- x$axis - dimension <- x$dimension - source <- x$source - ## ignore activation, just do all - #active_x <- active(x) - dims <- axis %>% - # dplyr::filter(.data$grid == active_x) %>% - #dplyr::inner_join(axis, "variable") %>% - dplyr::inner_join(dimension, c("dimension" = "id")) %>% + + dims <- x$axis %>% + dplyr::inner_join(x$dimension, c("dimension" = "id")) %>% + dplyr::inner_join(x$extended, c("name", "dimension")) %>% dplyr::distinct(.data$name, .data$dimension, .keep_all = TRUE) %>% - dplyr::select(.data$name, .data$dimension, .data$length, .data$coord_dim) + dplyr::select(.data$name, .data$dimension, .data$length, .data$coord_dim, .data$time) transforms <- vector("list", nrow(dims)) names(transforms) <- dims$name - all_atts <- mutate(x$attribute, low_name = tolower(.data$name)) - for (i in seq_along(transforms)) { ll <- list(value = ifelse(rep(dims$coord_dim[i], dims$length[i]), - nc_get(source$source, dims$name[i]), seq_len(dims$length[i]))) + nc_get(x$source$source, dims$name[i]), seq_len(dims$length[i]))) axis <- tibble::as_tibble(ll) - names(axis) <- dims$name[i] + names(axis) <- dims$name[i] + + ## Add timestamp for any "time" dimension by taking the CFtime + ## instance from the extended attributes + ## tidync/issues/54 + if (!is.na(dims$time[i])) + axis$timestamp <- CFtime::CFtimestamp(dims$time[i][[1]]) ## axis might have a column called "i" ## tidync/issues/74 @@ -82,22 +80,6 @@ hyper_transforms.default <- function(x, all = FALSE, ...) { dim_name <- dims$name[i] dim_coord <- dims$coord_dim[i] - ## Add timestamp for any "time" dimension. Since not all files have a - ## "calendar" attribute or "axis == "T"", just try to create a CFtime - ## instance from the "units" attribute and a "calendar" if present - ## tidync/issues/54 - dim_atts <- all_atts %>% dplyr::filter(.data$variable == dim_name) - units <- unlist(dim_atts$value[which(dim_atts$low_name == "units")]) - if (!(is.null(units))) { - cal_idx <- which(dim_atts$low_name == "calendar") - if (length(cal_idx) == 0) calendar <- "standard" - else calendar <- unlist(dim_atts$value[cal_idx]) - try({ - cft <- CFtime::CFtime(units, calendar, axis[[1]]) - axis$timestamp = CFtime::as_timestamp(cft) - }, silent = TRUE) - } - axis <- mutate(axis, index = row_number(), id = id_value, @@ -105,12 +87,7 @@ hyper_transforms.default <- function(x, all = FALSE, ...) { coord_dim = dim_coord, selected = TRUE) - transforms[[i]] <- axis - } - - transforms - } diff --git a/R/tidync.R b/R/tidync.R index 78d767e..d140ed9 100644 --- a/R/tidync.R +++ b/R/tidync.R @@ -137,6 +137,7 @@ tidync.character <- function(x, what, ...) { grid = meta$grid, dimension = meta$dimension, variable = variable, + extended = meta$extended, attribute = meta$attribute) out$transforms <- hyper_transforms(out, all = TRUE) @@ -226,10 +227,9 @@ first_numeric_var <- function(x) { #' argo %>% hyper_filter(N_LEVELS = index > 300) print.tidync <- function(x, ...) { ushapes <- dplyr::distinct(x$grid, .data$grid) %>% - dplyr::arrange(desc(nchar(.data$grid))) + dplyr::arrange(desc(nchar(.data$grid))) nshapes <- nrow(ushapes) - cat(sprintf("\nData Source (%i): %s ...\n", - nrow(x$source), + cat(sprintf("\nData Source (%i): %s ...\n", nrow(x$source), paste(utils::head(basename(x$source$source), 2), collapse = ", "))) cat(sprintf("\nGrids (%i) <dimension family> : <associated variables> \n\n", nshapes)) @@ -241,13 +241,12 @@ print.tidync <- function(x, ...) { return(invisible(NULL)) } active_sh <- active(x) - nms <- if(nrow(ushapes) > 0) nchar(ushapes$grid) else 0 + nms <- if(nrow(ushapes) > 0) nchar(ushapes$grid) else 0 longest <- sprintf("[%%i] %%%is", -max(nms)) - if (utils::packageVersion("tidyr") > "0.8.3" ) { + if (utils::packageVersion("tidyr") > "0.8.3") vargrids <- tidyr::unnest(x$grid, cols = c(.data$variables)) - } else { + else vargrids <- tidyr::unnest(x$grid) - } # Warning message: # In dplyr::inner_join(., x$axis, "variable") : @@ -289,7 +288,6 @@ print.tidync <- function(x, ...) { nms <- names(x$transforms) ## handle case where value is character for (i in seq_along(x$transforms)) { - if (!is.numeric(x$transforms[[nms[i]]][[nms[i]]])) { x$transforms[[nms[i]]][[nms[i]]] <- NA_integer_ } @@ -305,7 +303,6 @@ print.tidync <- function(x, ...) { filter_ranges <- do.call(rbind, filter_ranges) ranges <- do.call(rbind, ranges) - idxnm <- match(names(x$transforms), dims$name) dims$dmin <- dims$dmax <- dims$min <- dims$max <- NA_real_ @@ -328,10 +325,9 @@ print.tidync <- function(x, ...) { dplyr::filter(.data$active) %>% dplyr::mutate(id = NULL, active = NULL), n = Inf) dimension_other <- format(alldims %>% dplyr::filter(!.data$active) %>% - dplyr::select(.data$dim, .data$name, - .data$length, .data$min, .data$max, - - .data$unlim, .data$coord_dim), n = Inf) + dplyr::select(.data$dim, .data$name, .data$length, + .data$min, .data$max, .data$unlim, + .data$coord_dim), n = Inf) } @@ -354,5 +350,3 @@ print.tidync <- function(x, ...) { } invisible(NULL) } - - diff --git a/man/hyper_array.Rd b/man/hyper_array.Rd index 492fa32..c815abb 100644 --- a/man/hyper_array.Rd +++ b/man/hyper_array.Rd @@ -78,7 +78,7 @@ string. By default all variables in the active grid are returned, use \code{select_var} to specify one or more desired variables. -The transforms are stored as a list of tables in an attribute `transforms``, +The transforms are stored as a list of tables in an attribute \code{transforms}, access these with \code{\link[=hyper_transforms]{hyper_transforms()}}. } \examples{