Skip to content

Commit

Permalink
Merge pull request #129 from pvanlaake/main
Browse files Browse the repository at this point in the history
Fixing character lat/long columns in hypertibble() #128
  • Loading branch information
mdsumner authored Oct 15, 2024
2 parents d676143 + 056a1c8 commit 66ee83c
Show file tree
Hide file tree
Showing 6 changed files with 57 additions and 85 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ Imports:
forcats,
magrittr,
ncdf4,
ncmeta (>= 0.2.0),
ncmeta (>= 0.3.6),
purrr,
RNetCDF (>= 1.9-1),
rlang,
Expand Down
47 changes: 20 additions & 27 deletions R/hyper_array.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
#' By default all variables in the active grid are returned, use `select_var` to
#' specify one or more desired variables.
#'
#' The transforms are stored as a list of tables in an attribute `transforms``,
#' The transforms are stored as a list of tables in an attribute `transforms`,
#' access these with [hyper_transforms()].
#' @param x NetCDF file, connection object, or [tidync] object
#' @param drop collapse degenerate dimensions, defaults to `TRUE`
Expand Down Expand Up @@ -85,8 +85,8 @@ hyper_array.tidync <- function(x, select_var = NULL, ...,
## hack to get the order of the indices of the dimension
ordhack <- 1 + as.integer(unlist(strsplit(gsub("D", "",
dplyr::filter(x$grid, .data$grid == active(x)) %>%
# dplyr::slice(1L) %>% THERE'S ONLY EVER ONE ACTIVE GRID
dplyr::pull(.data$grid)), ",")))
# dplyr::slice(1L) %>% THERE'S ONLY EVER ONE ACTIVE GRID
dplyr::pull(.data$grid)), ",")))
dimension <- x[["dimension"]] %>% dplyr::slice(ordhack)
## ensure dimension is in order of the dims in these vars
axis <- x[["axis"]] %>% dplyr::filter(variable %in% varname)
Expand All @@ -104,30 +104,25 @@ hyper_array.tidync <- function(x, select_var = NULL, ...,
if (length(select_var) < 1) stop("no select_var variables available")
if (!isTRUE(getOption("tidync.silent"))) {
warning(sprintf("some select_var variables not found, and ignored:\n %s",
paste(bad, collapse = ",")))
paste(bad, collapse = ",")))
}
}
## todo, make this quosic?
varnames <- select_var
}

#browser()
opt <- getOption("tidync.large.data.check")
if (!isTRUE(opt)) {
opt <- FALSE
}
if (opt && (prod(dimension[["count"]]) * length(varnames)) * 4 > 1e9 &&
interactive() && !force) {
message("please confirm data extraction, Y(es) to proceed ... use 'force = TRUE' to avoid size check\n ( see '?hyper_array')")

mess <- sprintf("pretty big extraction, (%i*%i values [%s]*%i)",
as.integer(prod( COUNT)), length(varnames),
paste( COUNT, collapse = ", "),
length(varnames))
yes <- utils::askYesNo(mess)
if (!yes) {
stop("extraction cancelled by user", call. = FALSE)
## return(invisible(NULL))
if (interactive() && !force && prod(COUNT) * length(varnames) * 4 > 1e9) {
opt <- getOption("tidync.large.data.check")
if (!isTRUE(opt)) opt <- FALSE
if (opt) {
message("please confirm data extraction, Y(es) to proceed ... use 'force = TRUE' to avoid size check\n (see '?hyper_array')")
mess <- sprintf("pretty big extraction, (%1$.0f*%2$i values [%3$s]*%2$i)",
prod(COUNT), length(varnames), paste(COUNT, collapse = ", "))
yes <- utils::askYesNo(mess)
if (!yes) {
stop("extraction cancelled by user", call. = FALSE)
}
}
}

Expand All @@ -137,7 +132,7 @@ hyper_array.tidync <- function(x, select_var = NULL, ...,
datalist <- lapply(varnames, function(vara) {
ncdf4::ncvar_get(con, vara, start = START, count = COUNT,
raw_datavals = raw_datavals, collapse_degen = FALSE)
})
})

## Get dimension names from the transforms. Use "timestamp" instead of "time"
transforms <- active_axis_transforms(x)
Expand Down Expand Up @@ -177,17 +172,15 @@ hyper_array.tidync <- function(x, select_var = NULL, ...,
## Drop any degenerate dimensions, if requested and needed
if (drop && any(lengths(dn) == 1)) datalist <- lapply(datalist, drop)

structure(datalist, names = varnames,
transforms = transforms,
structure(datalist, names = varnames, transforms = transforms,
source = x$source, class = "tidync_data")
}

#' @name hyper_array
#' @export
hyper_array.character <- function(x, select_var = NULL, ...,
hyper_array.character <- function(x, select_var = NULL, ...,
raw_datavals = FALSE, force = FALSE, drop = TRUE) {
tidync(x) %>%
hyper_filter(...) %>%
hyper_array(select_var = select_var,
raw_datavals = raw_datavals, drop = drop)
hyper_filter(...) %>%
hyper_array(select_var = select_var, raw_datavals = raw_datavals, drop = drop)
}
18 changes: 13 additions & 5 deletions R/hyper_tibble.R
Original file line number Diff line number Diff line change
Expand Up @@ -59,12 +59,20 @@ hyper_tibble.tidync<- function(x, ..., na.rm = TRUE, force = FALSE) {
out <- tibble::as_tibble(lapply(slabs, as.vector))

prod_dims <- 1
dn <- dimnames(slabs[[1]])
nm <- names(dn)
trans <- attr(slabs, "transforms")

for (i in seq_along(nm)) {
out[[nm[i]]] <- rep(dn[[i]], each = prod_dims, length.out = total_prod)
prod_dims <- prod_dims * length(dn[[i]])
for (i in seq_along(trans)) {
nm <- names(trans)[i]
nr <- sum(trans[[i]]$selected)

out[[nm]] <- if ("timestamp" %in% colnames(trans[[i]]))
rep(dplyr::filter(trans[[nm]], .data$selected)[["timestamp"]],
each = prod_dims, length.out = total_prod)
else
rep(dplyr::filter(trans[[nm]], .data$selected)[[nm]],
each = prod_dims, length.out = total_prod)

prod_dims <- prod_dims * nr
}
if (na.rm) out <- dplyr::filter(out, !all_na)
out
Expand Down
49 changes: 13 additions & 36 deletions R/hyper_transforms.R
Original file line number Diff line number Diff line change
Expand Up @@ -52,65 +52,42 @@ active_axis_transforms <- function(x, ...) {
#' @export
hyper_transforms.default <- function(x, all = FALSE, ...) {
if (!all) return(active_axis_transforms(x, ...))
grid <- x$grid
axis <- x$axis
dimension <- x$dimension
source <- x$source
## ignore activation, just do all
#active_x <- active(x)
dims <- axis %>%
# dplyr::filter(.data$grid == active_x) %>%
#dplyr::inner_join(axis, "variable") %>%
dplyr::inner_join(dimension, c("dimension" = "id")) %>%

dims <- x$axis %>%
dplyr::inner_join(x$dimension, c("dimension" = "id")) %>%
dplyr::inner_join(x$extended, c("name", "dimension")) %>%
dplyr::distinct(.data$name, .data$dimension, .keep_all = TRUE) %>%
dplyr::select(.data$name, .data$dimension, .data$length, .data$coord_dim)
dplyr::select(.data$name, .data$dimension, .data$length, .data$coord_dim, .data$time)

transforms <- vector("list", nrow(dims))
names(transforms) <- dims$name

all_atts <- mutate(x$attribute, low_name = tolower(.data$name))

for (i in seq_along(transforms)) {
ll <- list(value = ifelse(rep(dims$coord_dim[i], dims$length[i]),
nc_get(source$source, dims$name[i]), seq_len(dims$length[i])))
nc_get(x$source$source, dims$name[i]), seq_len(dims$length[i])))
axis <- tibble::as_tibble(ll)
names(axis) <- dims$name[i]
names(axis) <- dims$name[i]

## Add timestamp for any "time" dimension by taking the CFtime
## instance from the extended attributes
## tidync/issues/54
if (!is.na(dims$time[i]))
axis$timestamp <- CFtime::CFtimestamp(dims$time[i][[1]])

## axis might have a column called "i"
## tidync/issues/74
id_value <- dims$dimension[i]
dim_name <- dims$name[i]
dim_coord <- dims$coord_dim[i]

## Add timestamp for any "time" dimension. Since not all files have a
## "calendar" attribute or "axis == "T"", just try to create a CFtime
## instance from the "units" attribute and a "calendar" if present
## tidync/issues/54
dim_atts <- all_atts %>% dplyr::filter(.data$variable == dim_name)
units <- unlist(dim_atts$value[which(dim_atts$low_name == "units")])
if (!(is.null(units))) {
cal_idx <- which(dim_atts$low_name == "calendar")
if (length(cal_idx) == 0) calendar <- "standard"
else calendar <- unlist(dim_atts$value[cal_idx])
try({
cft <- CFtime::CFtime(units, calendar, axis[[1]])
axis$timestamp = CFtime::as_timestamp(cft)
}, silent = TRUE)
}

axis <- mutate(axis,
index = row_number(),
id = id_value,
name = dim_name,
coord_dim = dim_coord,
selected = TRUE)


transforms[[i]] <- axis

}


transforms

}
24 changes: 9 additions & 15 deletions R/tidync.R
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,7 @@ tidync.character <- function(x, what, ...) {
grid = meta$grid,
dimension = meta$dimension,
variable = variable,
extended = meta$extended,
attribute = meta$attribute)
out$transforms <- hyper_transforms(out, all = TRUE)

Expand Down Expand Up @@ -226,10 +227,9 @@ first_numeric_var <- function(x) {
#' argo %>% hyper_filter(N_LEVELS = index > 300)
print.tidync <- function(x, ...) {
ushapes <- dplyr::distinct(x$grid, .data$grid) %>%
dplyr::arrange(desc(nchar(.data$grid)))
dplyr::arrange(desc(nchar(.data$grid)))
nshapes <- nrow(ushapes)
cat(sprintf("\nData Source (%i): %s ...\n",
nrow(x$source),
cat(sprintf("\nData Source (%i): %s ...\n", nrow(x$source),
paste(utils::head(basename(x$source$source), 2), collapse = ", ")))
cat(sprintf("\nGrids (%i) <dimension family> : <associated variables> \n\n",
nshapes))
Expand All @@ -241,13 +241,12 @@ print.tidync <- function(x, ...) {
return(invisible(NULL))
}
active_sh <- active(x)
nms <- if(nrow(ushapes) > 0) nchar(ushapes$grid) else 0
nms <- if(nrow(ushapes) > 0) nchar(ushapes$grid) else 0
longest <- sprintf("[%%i] %%%is", -max(nms))
if (utils::packageVersion("tidyr") > "0.8.3" ) {
if (utils::packageVersion("tidyr") > "0.8.3")
vargrids <- tidyr::unnest(x$grid, cols = c(.data$variables))
} else {
else
vargrids <- tidyr::unnest(x$grid)
}

# Warning message:
# In dplyr::inner_join(., x$axis, "variable") :
Expand Down Expand Up @@ -289,7 +288,6 @@ print.tidync <- function(x, ...) {
nms <- names(x$transforms)
## handle case where value is character
for (i in seq_along(x$transforms)) {

if (!is.numeric(x$transforms[[nms[i]]][[nms[i]]])) {
x$transforms[[nms[i]]][[nms[i]]] <- NA_integer_
}
Expand All @@ -305,7 +303,6 @@ print.tidync <- function(x, ...) {

filter_ranges <- do.call(rbind, filter_ranges)
ranges <- do.call(rbind, ranges)


idxnm <- match(names(x$transforms), dims$name)
dims$dmin <- dims$dmax <- dims$min <- dims$max <- NA_real_
Expand All @@ -328,10 +325,9 @@ print.tidync <- function(x, ...) {
dplyr::filter(.data$active) %>%
dplyr::mutate(id = NULL, active = NULL), n = Inf)
dimension_other <- format(alldims %>% dplyr::filter(!.data$active) %>%
dplyr::select(.data$dim, .data$name,
.data$length, .data$min, .data$max,

.data$unlim, .data$coord_dim), n = Inf)
dplyr::select(.data$dim, .data$name, .data$length,
.data$min, .data$max, .data$unlim,
.data$coord_dim), n = Inf)

}

Expand All @@ -354,5 +350,3 @@ print.tidync <- function(x, ...) {
}
invisible(NULL)
}


2 changes: 1 addition & 1 deletion man/hyper_array.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 66ee83c

Please sign in to comment.